OpenCores
URL https://opencores.org/ocsvn/zipcpu/zipcpu/trunk

Subversion Repositories zipcpu

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /zipcpu/trunk/rtl/core
    from Rev 49 to Rev 56
    Reverse comparison

Rev 49 → Rev 56

/pipemem.v
77,18 → 77,23
if ((i_rst)||(i_wb_err))
wraddr <= 0;
else if (i_pipe_stb)
wraddr <= wraddr + 1;
wraddr <= wraddr + 4'h1;
always @(posedge i_clk)
if ((i_rst)||(i_wb_err))
rdaddr <= 0;
else if ((i_wb_ack)&&((o_wb_cyc_gbl)||(o_wb_cyc_lcl)))
rdaddr <= rdaddr + 1;
assign nxt_rdaddr = rdaddr + 1;
else if ((i_wb_ack)&&(cyc))
rdaddr <= rdaddr + 4'h1;
assign nxt_rdaddr = rdaddr + 4'h1;
 
reg cyc;
wire gbl_stb, lcl_stb;
assign lcl_stb = (i_addr[31:8]==24'hc00000)&&(i_addr[7:5]==3'h0);
assign gbl_stb = ((i_addr[31:8]!=24'hc00000)||(i_addr[7:5]!=3'h0));
assign gbl_stb = (~lcl_stb);
//= ((i_addr[31:8]!=24'hc00000)||(i_addr[7:5]!=3'h0));
 
initial cyc = 0;
initial o_wb_cyc_lcl = 0;
initial o_wb_cyc_gbl = 0;
always @(posedge i_clk)
if (i_rst)
begin
96,7 → 101,8
o_wb_cyc_lcl <= 1'b0;
o_wb_stb_gbl <= 1'b0;
o_wb_stb_lcl <= 1'b0;
end else if ((o_wb_cyc_gbl)||(o_wb_cyc_lcl))
cyc <= 1'b0;
end else if (cyc)
begin
if ((~i_wb_stall)&&(~i_pipe_stb))
begin
104,8 → 110,8
o_wb_stb_lcl <= 1'b0;
end else if ((i_pipe_stb)&&(~i_wb_stall))
begin
o_wb_addr <= i_addr[(AW-1):0];
o_wb_data <= i_data;
// o_wb_addr <= i_addr[(AW-1):0];
// o_wb_data <= i_data;
end
 
if (((i_wb_ack)&&(nxt_rdaddr == wraddr))||(i_wb_err))
112,6 → 118,7
begin
o_wb_cyc_gbl <= 1'b0;
o_wb_cyc_lcl <= 1'b0;
cyc <= 1'b0;
end
end else if (i_pipe_stb) // New memory operation
begin // Grab the wishbone
119,23 → 126,33
o_wb_cyc_gbl <= gbl_stb;
o_wb_stb_lcl <= lcl_stb;
o_wb_stb_gbl <= gbl_stb;
cyc <= 1'b1;
// o_wb_addr <= i_addr[(AW-1):0];
// o_wb_data <= i_data;
// o_wb_we <= i_op
end
always @(posedge i_clk)
if ((cyc)&&(i_pipe_stb)&&(~i_wb_stall))
begin
o_wb_addr <= i_addr[(AW-1):0];
o_wb_data <= i_data;
// o_wb_we <= i_op
end else if ((~cyc)&&(i_pipe_stb))
begin
o_wb_addr <= i_addr[(AW-1):0];
o_wb_data <= i_data;
end
always @(posedge i_clk)
if ((i_pipe_stb)
&&((~i_wb_stall)
||((~o_wb_cyc_gbl)&&(~o_wb_cyc_lcl))))
if ((i_pipe_stb)&&(~cyc))
o_wb_we <= i_op;
 
initial o_valid = 1'b0;
always @(posedge i_clk)
o_valid <= ((o_wb_cyc_gbl)||(o_wb_cyc_lcl))&&(i_wb_ack)&&(~o_wb_we);
o_valid <= (cyc)&&(i_wb_ack)&&(~o_wb_we);
initial o_err = 1'b0;
always @(posedge i_clk)
o_err <= ((o_wb_cyc_gbl)||(o_wb_cyc_lcl))&&(i_wb_err);
assign o_busy = (o_wb_cyc_gbl)||(o_wb_cyc_lcl);
o_err <= (cyc)&&(i_wb_err);
assign o_busy = cyc;
 
always @(posedge i_clk)
o_wreg <= fifo_oreg[rdaddr];
143,6 → 160,6
if (i_wb_ack)
o_result <= i_wb_data;
 
assign o_pipe_stalled = ((o_wb_cyc_gbl)||(o_wb_cyc_lcl))
assign o_pipe_stalled = (cyc)
&&((i_wb_stall)||((~o_wb_stb_lcl)&&(~o_wb_stb_gbl)));
endmodule
/pipefetch.v
87,7 → 87,8
reg [(LGCACHELEN):0] r_nvalid, r_acks_waiting;
reg [(BUSW-1):0] cache[0:(CACHELEN-1)];
 
reg [(LGCACHELEN-1):0] r_cache_offset;
wire [(LGCACHELEN-1):0] w_cache_offset;
reg [1:0] r_cache_offset;
 
reg r_addr_set;
reg [(AW-1):0] r_addr;
108,8 → 109,11
||(r_addr >= r_cache_base + bus_nvalid+5)));
wire w_running_out_of_cache;
assign w_running_out_of_cache = (r_addr_set)
&&(r_addr >= r_cache_base + (1<<(LGCACHELEN-2))
+ (1<<(LGCACHELEN-1)))
&&(r_addr >= r_cache_base +
// {{(AW-LGCACHELEN-1),{1'b0}},2'b11,
// {(LGCACHELEN-1){1'b0}}})
// (1<<(LGCACHELEN-2)) + (1<<(LGCACHELEN-1)))
+(3<<(LGCACHELEN-2)))
&&(|r_nvalid[(LGCACHELEN):(LGCACHELEN-1)]);
 
initial r_cache_base = RESET_ADDRESS;
151,7 → 155,7
// o_wb_addr <= (i_new_pc) ? i_pc : r_addr;
// r_nvalid <= 0;
// r_cache_base <= (i_new_pc) ? i_pc : r_addr;
// r_cache_offset <= 0;
// w_cache_offset <= 0;
end else if ((~o_wb_cyc)&&(w_running_out_of_cache))
begin
// If we're using the last quarter of the cache, then
161,7 → 165,7
// o_wb_addr <= r_cache_base + (1<<(LGCACHELEN));
// r_nvalid <= r_nvalid - (1<<(LGCACHELEN-2));
// r_cache_base <= r_cache_base + (1<<(LGCACHELEN-2));
// r_cache_offset <= r_cache_offset + (1<<(LGCACHELEN-2));
// w_cache_offset <= w_cache_offset + (1<<(LGCACHELEN-2));
end else if (o_wb_cyc)
begin
// This handles everything ... but the case where
192,9 → 196,11
(w_pc_out_of_bounds)||(w_ran_off_end_of_cache)))
r_nvalid <= 0;
else if ((~o_wb_cyc)&&(w_running_out_of_cache))
r_nvalid <= r_nvalid - (1<<(LGCACHELEN-2));
r_nvalid[LGCACHELEN:(LGCACHELEN-2)]
<= r_nvalid[LGCACHELEN:(LGCACHELEN-2)] +3'b111;
// i.e. - (1<<(LGCACHELEN-2));
else if ((o_wb_cyc)&&(i_wb_ack))
r_nvalid <= r_nvalid+1;
r_nvalid <= r_nvalid + {{(LGCACHELEN){1'b0}},1'b1}; // +1;
 
always @(posedge i_clk)
if (i_clear_cache)
204,7 → 210,10
||(w_ran_off_end_of_cache)))
r_cache_base <= (i_new_pc) ? i_pc : r_addr;
else if ((~o_wb_cyc)&&(w_running_out_of_cache))
r_cache_base <= r_cache_base + (1<<(LGCACHELEN-2));
r_cache_base[(AW-1):(LGCACHELEN-2)]
<= r_cache_base[(AW-1):(LGCACHELEN-2)]
+ {{(AW-LGCACHELEN+1){1'b0}},1'b1};
// i.e. + (1<<(LGCACHELEN-2));
 
always @(posedge i_clk)
if (i_clear_cache)
214,7 → 223,8
||(w_ran_off_end_of_cache)))
r_cache_offset <= 0;
else if ((~o_wb_cyc)&&(w_running_out_of_cache))
r_cache_offset <= r_cache_offset + (1<<(LGCACHELEN-2));
r_cache_offset[1:0] <= r_cache_offset[1:0] + 2'b01;
assign w_cache_offset = { r_cache_offset, {(LGCACHELEN-2){1'b0}} };
 
always @(posedge i_clk)
if (i_clear_cache)
236,13 → 246,13
if (~o_wb_cyc)
r_acks_waiting <= 0;
else if ((o_wb_cyc)&&(o_wb_stb)&&(~i_wb_stall)&&(~i_wb_ack))
r_acks_waiting <= r_acks_waiting + 1;
r_acks_waiting <= r_acks_waiting + {{(LGCACHELEN){1'b0}},1'b1};
else if ((o_wb_cyc)&&(i_wb_ack)&&((~o_wb_stb)||(i_wb_stall)))
r_acks_waiting <= r_acks_waiting - 1;
r_acks_waiting <= r_acks_waiting + {(LGCACHELEN+1){1'b1}}; // - 1;
 
always @(posedge i_clk)
if ((o_wb_cyc)&&(i_wb_ack))
cache[r_nvalid[(LGCACHELEN-1):0]+r_cache_offset]
cache[r_nvalid[(LGCACHELEN-1):0]+w_cache_offset]
<= i_wb_data;
 
initial r_addr_set = 1'b0;
265,11 → 275,11
if (i_new_pc)
r_addr <= i_pc;
else if ( ((i_stall_n)&&(w_cv)) || ((~i_stall_n)&&(w_cv)&&(r_addr == o_pc)) )
r_addr <= r_addr + 1;
r_addr <= r_addr + {{(AW-1){1'b0}},1'b1};
 
wire [(LGCACHELEN-1):0] c_rdaddr, c_cache_base;
assign c_cache_base = r_cache_base[(LGCACHELEN-1):0];
assign c_rdaddr = r_addr[(LGCACHELEN-1):0]-c_cache_base+r_cache_offset;
assign c_rdaddr = r_addr[(LGCACHELEN-1):0]-c_cache_base+w_cache_offset;
always @(posedge i_clk)
if ((~o_v)||((i_stall_n)&&(o_v)))
o_i <= cache[c_rdaddr];
/cpuops.v
29,7 → 29,9
//
///////////////////////////////////////////////////////////////////////////
//
module cpuops(i_clk, i_rst, i_ce, i_valid, i_op, i_a, i_b, o_c, o_f, o_valid);
module cpuops(i_clk, i_rst, i_ce, i_valid, i_op, i_a, i_b, o_c, o_f, o_valid,
o_illegal);
parameter IMPLEMENT_MPY = 1;
input i_clk, i_rst, i_ce;
input [3:0] i_op;
input [31:0] i_a, i_b;
37,11 → 39,13
output reg [31:0] o_c;
output wire [3:0] o_f;
output reg o_valid;
output wire o_illegal;
 
wire [63:0] w_rol_tmp;
assign w_rol_tmp = { i_a, i_a } << i_b[4:0];
wire [31:0] w_rol_result;
assign w_rol_result = w_rol_tmp[63:32]; // Won't set flags
`ifndef NEW_NOT_OLD_CODE
wire [33:0] w_lsr_result, w_asr_result;
wire signed [33:0] w_ia_input;
assign w_ia_input = { i_a[31], i_a, 1'b0 };
49,14 → 53,15
: ( w_ia_input >>> (i_b[4:0]) );// ASR
assign w_lsr_result = (|i_b[31:5])? 34'h00
: { 1'b0, i_a, 1'b0 } >> (i_b[4:0]);// LSR
`else
wire [32:0] w_lsr_result, w_asr_result;
assign w_asr_result = (|i_b[31:5])? {(33){i_a[31]}}
: ( {i_a, 1'b0 } >>> (i_b[4:0]) );// ASR
assign w_lsr_result = (|i_b[31:5])? 33'h00
: ( { i_a, 1'b0 } >> (i_b[4:0]) );// LSR
`endif
 
 
wire signed [16:0] w_mpy_a_input, w_mpy_b_input;
wire signed [33:0] w_mpy_result;
assign w_mpy_a_input = { ((i_a[15])&&(i_op[2])), i_a[15:0] };
assign w_mpy_b_input = { ((i_b[15])&&(i_op[2])), i_b[15:0] };
assign w_mpy_result = w_mpy_a_input * w_mpy_b_input;
 
wire z, n, v;
reg c, pre_sign, set_ovfl;
always @(posedge i_clk)
66,7 → 71,11
||((i_op==4'ha)&&(i_a[31] == i_b[31])) // ADD
||(i_op == 4'hd) // LSL
||(i_op == 4'hf)); // LSR
always @(posedge i_clk)
 
generate
if (IMPLEMENT_MPY == 0)
begin
always @(posedge i_clk)
if (i_ce)
begin
pre_sign <= (i_a[31]);
74,6 → 83,33
casez(i_op)
4'b?000:{c,o_c } <= {(i_b>i_a),i_a - i_b};// CMP/SUB
4'b?001: o_c <= i_a & i_b; // BTST/And
4'h5: o_c <= w_rol_result; // ROL
4'h6: o_c <= { i_a[31:16], i_b[15:0] }; // LODILO
4'h7: o_c <= { i_b[15:0], i_a[15:0] }; // LODIHI
4'ha: { c, o_c } <= i_a + i_b; // Add
4'hb: o_c <= i_a | i_b; // Or
4'hc: o_c <= i_a ^ i_b; // Xor
4'hd: { c, o_c } <= (|i_b[31:5])? 33'h00 : {1'b0, i_a } << i_b[4:0]; // LSL
4'he: { o_c, c } <= w_asr_result[32:0];// ASR
4'hf: { o_c, c } <= w_lsr_result[32:0];// LSR
default: o_c <= i_b; // MOV, LDI
endcase
end
end else begin
wire signed [16:0] w_mpy_a_input, w_mpy_b_input;
wire signed [33:0] w_mpy_result;
assign w_mpy_a_input = { ((i_a[15])&&(i_op[2])), i_a[15:0] };
assign w_mpy_b_input = { ((i_b[15])&&(i_op[2])), i_b[15:0] };
assign w_mpy_result = w_mpy_a_input * w_mpy_b_input;
 
always @(posedge i_clk)
if (i_ce)
begin
pre_sign <= (i_a[31]);
c <= 1'b0;
casez(i_op)
4'b?000:{c,o_c } <= {(i_b>i_a),i_a - i_b};// CMP/SUB
4'b?001: o_c <= i_a & i_b; // BTST/And
4'h3: { c, o_c } <= {1'b0,w_mpy_result[31:0]}; // MPYU/S
4'h4: { c, o_c } <= {1'b0,w_mpy_result[31:0]}; // MPYU/S
4'h5: o_c <= w_rol_result; // ROL
88,7 → 124,19
default: o_c <= i_b; // MOV, LDI
endcase
end
end endgenerate
 
generate
if (IMPLEMENT_MPY == 0)
begin
reg r_illegal;
always @(posedge i_clk)
r_illegal <= (i_op == 4'h3)||(i_op == 4'h4);
assign o_illegal = r_illegal;
end else
assign o_illegal = 1'b0;
endgenerate
 
assign z = (o_c == 32'h0000);
assign n = (o_c[31]);
assign v = (set_ovfl)&&(pre_sign != o_c[31]);
99,8 → 147,6
always @(posedge i_clk)
if (i_rst)
o_valid <= 1'b0;
else if (i_ce)
o_valid <= i_valid;
else if (~i_ce)
o_valid <= 1'b0;
else
o_valid <= (i_ce)&&(i_valid);
endmodule
/zipcpu.v
6,20 → 6,17
//
// Purpose: This is the top level module holding the core of the Zip CPU
// together. The Zip CPU is designed to be as simple as possible.
// The instruction set is about as RISC as you can get, there are
// only 16 instruction types supported (of which one isn't yet
// supported ...) Please see the accompanying iset.html file
// for a description of these instructions.
// (actual implementation aside ...) The instruction set is about as
// RISC as you can get, there are only 16 instruction types supported.
// Please see the accompanying spec.pdf file for a description of these
// instructions.
//
// All instructions are 32-bits wide. All bus accesses, both
// address and data, are 32-bits over a wishbone bus.
// All instructions are 32-bits wide. All bus accesses, both address and
// data, are 32-bits over a wishbone bus.
//
// The Zip CPU is fully pipelined with the following pipeline stages:
//
// 1. Prefetch, returns the instruction from memory. On the
// Basys board that I'm working on, one instruction may be
// issued every 20 clocks or so, unless and until I implement a
// cache or local memory.
// 1. Prefetch, returns the instruction from memory.
//
// 2. Instruction Decode
//
29,60 → 26,12
//
// 4. Write-back Results
//
// A lot of difficult work has been placed into the pipeline stall
// handling. My original proposal was not to allow pipeline stalls at all.
// The idea would be that the CPU would just run every clock and whatever
// stalled answer took place would just get fixed a clock or two later,
// meaning that the compiler could just schedule everything out.
// This idea died at the memory interface, which can take a variable
// amount of time to read or write any value, thus the whole CPU needed
// to stall on a stalled memory access.
// Further information about the inner workings of this CPU may be
// found in the spec.pdf file. (The documentation within this file
// had become out of date and out of sync with the spec.pdf, so look
// to the spec.pdf for accurate and up to date information.)
//
// My next idea was to just let things complete. I.e., once an instrution
// starts, it continues to completion no matter what and we go on. This
// failed at writing the PC. If the PC gets written in something such as
// a MOV PC,PC+5 instruction, 3 (or however long the pipeline is) clocks
// later, if whether or not something happens in those clocks depends
// upon the instruction fetch filling the pipeline, then the CPU has a
// non-deterministic behavior.
//
// This leads to two possibilities: either *everything* stalls upon a
// stall condition, or partial results need to be destroyed before
// they are written. This is made more difficult by the fact that
// once a command is written to the memory unit, whether it be a
// read or a write, there is no undoing it--since peripherals on the
// bus may act upon the answer with whatever side effects they might
// have. (For example, writing a '1' to the interrupt register will
// clear certain interrupts ...) Further, since the memory ops depend
// upon conditions, the we'll need to wait for the condition codes to
// be available before executing a memory op. Thus, memory ops can
// proceed without stalling whenever either the previous instruction
// doesn't write the flags register, or when the memory instruction doesn't
// depend upon the flags register.
//
// The other possibility is that we leave independent instruction
// execution behind, so that the pipeline is always full and stalls,
// or moves forward, together on every clock.
//
// For now, we pick the first approach: independent instruction execution.
// Thus, if stage 2 stalls, stages 3-5 may still complete the instructions
// in their pipeline. This leaves another problem: what happens on a
// MOV -1+PC,PC instruction? There will be four instructions behind this
// one (or is it five?) that will need to be 'cancelled'. So here's
// the plan: Anything can be cancelled before the ALU/MEM stage,
// since memory ops cannot be canceled after being issued. Thus, the
// ALU/MEM stage must stall if any prior instruction is going to write
// the PC register (i.e. JMP).
//
// Further, let's define a "STALL" as a reason to not execute a stage
// due to some condition at or beyond the stage, and let's define
// a VALID flag to mean that this stage has completed. Thus, the clock
// enable for a stage is (STG[n-1]VALID)&&((~STG[n]VALID)||(~STG[n]STALL)).
// The ALU/MEM stages will also depend upon a master clock enable
// (~SLEEP) condition as well.
//
//
//
// Creator: Dan Gisselquist, Ph.D.
// Gisselquist Tecnology, LLC
//
128,18 → 77,9
`define CPU_GIE_BIT 5
`define CPU_SLEEP_BIT 4
// Compile time defines
// (Currently unused)
// `define OPT_SINGLE_FETCH
// (Best path--define these!)
`define OPT_CONDITIONAL_FLAGS
`define OPT_ILLEGAL_INSTRUCTION
`ifndef OPT_SINGLE_FETCH
// The following are pipeline optimization options.
// They make no sense in a single instruction fetch mode.
`define OPT_PRECLEAR_BUS
`define OPT_EARLY_BRANCHING
`define OPT_PIPELINED_BUS_ACCESS
`endif
//
`include "cpudefs.v"
//
module zipcpu(i_clk, i_rst, i_interrupt,
// Debug interface
i_halt, i_clear_pf_cache, i_dbg_reg, i_dbg_we, i_dbg_data,
152,9 → 92,16
i_wb_ack, i_wb_stall, i_wb_data,
i_wb_err,
// Accounting/CPU usage interface
o_op_stall, o_pf_stall, o_i_count);
o_op_stall, o_pf_stall, o_i_count,
//
o_debug);
parameter RESET_ADDRESS=32'h0100000, ADDRESS_WIDTH=24,
LGICACHE=6, AW=ADDRESS_WIDTH;
`ifdef OPT_MULTIPLY
parameter IMPLEMENT_MPY = 1;
`else
parameter IMPLEMENT_MPY = 0;
`endif
input i_clk, i_rst, i_interrupt;
// Debug interface -- inputs
input i_halt, i_clear_pf_cache;
164,7 → 111,7
// Debug interface -- outputs
output reg o_dbg_stall;
output reg [31:0] o_dbg_reg;
output reg [1:0] o_dbg_cc;
output reg [3:0] o_dbg_cc;
output wire o_break;
// Wishbone interface -- outputs
output wire o_wb_gbl_cyc, o_wb_gbl_stb;
179,13 → 126,25
output wire o_op_stall;
output wire o_pf_stall;
output wire o_i_count;
//
output reg [31:0] o_debug;
 
 
// Registers
//
// The distributed RAM style comment is necessary on the
// SPARTAN6 with XST to prevent XST from oversimplifying the register
// set and in the process ruining everything else. It basically
// optimizes logic away, to where it no longer works. The logic
// as described herein will work, this just makes sure XST implements
// that logic.
//
(* ram_style = "distributed" *)
reg [31:0] regset [0:31];
 
// Condition codes
reg [3:0] flags, iflags; // (TRAP,FPEN,BREAKEN,STEP,GIE,SLEEP ), V, N, C, Z
// (BUS, TRAP,ILL,BREAKEN,STEP,GIE,SLEEP ), V, N, C, Z
reg [3:0] flags, iflags;
wire [10:0] w_uflags, w_iflags;
reg trap, break_en, step, gie, sleep;
`ifdef OPT_ILLEGAL_INSTRUCTION
232,7 → 191,9
dcdM, dcdF_wr, dcd_gie, dcd_break;
reg [(AW-1):0] dcd_pc;
reg [23:0] r_dcdI;
`ifdef OPT_SINGLE_CYCLE
reg dcd_zI; // true if dcdI == 0
`endif
wire dcdA_stall, dcdB_stall, dcdF_stall;
 
`ifdef OPT_PRECLEAR_BUS
265,12 → 226,17
reg [(AW-1):0] op_pc;
wire [31:0] w_opA, w_opB;
wire [31:0] opA_nowait, opB_nowait, opA, opB;
reg opR_wr, opR_cc, opF_wr, op_gie,
opA_rd, opB_rd;
reg opR_wr, opR_cc, opF_wr, op_gie;
wire [10:0] opFl;
reg [6:0] r_opF;
wire [8:0] opF;
reg [5:0] r_opF;
wire [7:0] opF;
reg [2:0] opF_cp;
wire op_ce;
// Some pipeline control wires
`ifdef OPT_SINGLE_CYCLE
reg opA_alu, opA_mem;
reg opB_alu, opB_mem;
`endif
`ifdef OPT_PRECLEAR_BUS
reg op_clear_bus;
`endif
293,11 → 259,8
wire alu_valid;
wire set_cond;
reg alu_wr, alF_wr, alu_gie;
`ifdef OPT_ILLEGAL_INSTRUCTION
reg alu_illegal;
`else
wire alu_illegal_op;
wire alu_illegal;
`endif
 
 
 
351,8 → 314,22
//
// PIPELINE STAGE #3 :: Read Operands
// Calculate stall conditions
assign op_stall = ((mem_stalled)&&(opvalid_mem))
||((alu_stall)&&(opvalid_alu));
assign op_stall = ((opvalid)&&(~master_ce))||(
// Stall if going into the ALU and the ALU is stalled
// i.e. if the memory is busy, or we are single
// stepping
((opvalid_alu)&&(alu_stall))
//
// ||((opvalid_alu)&&(mem_rdbusy)) // part of alu_stall
// Stall if we are going into memory with an operation
// that cannot be pipelined, and the memory is
// already busy
||((opvalid_mem)&&(~op_pipe)&&(mem_busy))
//
// Stall if we are going into memory with a pipeable
// operation, but the memory unit declares it is
// not going to accept any more pipeline operations
||((opvalid_mem)&&( op_pipe)&&(mem_pipe_stalled)));
assign op_ce = (dcdvalid)&&((~opvalid)||(~op_stall));
 
//
363,15 → 340,16
// busy.
// 2. Also stall if the prior stage is valid and the master clock enable
// is de-selected
// 3. Next case: Stall if we want to start a memory operation and the
// prior operation will write either the PC or CC registers.
// 3. Stall if someone on the other end is writing the CC register,
// since we don't know if it'll put us to sleep or not.
// 4. Last case: Stall if we would otherwise move a break instruction
// through the ALU. Break instructions are not allowed through
// the ALU.
assign alu_stall = (((~master_ce)||(mem_rdbusy))&&(opvalid_alu)) //Case 1&2
||((opvalid_mem)&&(wr_reg_ce)&&(wr_reg_id[4] == op_gie)
&&((wr_write_pc)||(wr_write_cc))) // Case 3
||((opvalid)&&(op_break)); // Case 4
// Old case #3--this isn't an ALU stall though ...
||((opvalid_alu)&&(wr_reg_ce)&&(wr_reg_id[4] == op_gie)
&&(wr_write_cc)) // Case 3
||((opvalid_alu)&&(op_break)); // Case 3
assign alu_ce = (master_ce)&&(~mem_rdbusy)&&(opvalid_alu)&&(~alu_stall)&&(~clear_pipeline);
//
`ifdef OPT_PIPELINED_BUS_ACCESS
443,7 → 421,7
 
`ifdef OPT_EARLY_BRANCHING
always @(posedge i_clk)
if ((dcd_ce)&&(instruction[27:24]==`CPU_PC_REG)&&(~sleep))
if ((dcd_ce)&&(instruction[27:24]==`CPU_PC_REG)&&(master_ce))
begin
dcd_early_branch <= 1'b0;
// First case, a move to PC instruction
476,16 → 454,40
if (dcd_ce) dcd_early_branch <= 1'b0;
dcd_early_branch_stb <= 1'b0;
end
always @(posedge i_clk)
generate
if (AW == 24)
begin
always @(posedge i_clk)
if (dcd_ce)
begin
if (instruction[31]) // Add
dcd_branch_pc <= instruction_pc+{ {(AW-20){instruction[19]}}, instruction[19:0] } + {{(AW-1){1'b0}},1'b1};
else if (~instruction[28]) // 4'h2 = MOV
begin
dcd_branch_pc <= instruction_pc
+ { {(AW-20){instruction[19]}}, instruction[19:0] }
+ {{(AW-1){1'b0}},1'b1};
end else if (~instruction[28]) // 4'h2 = MOV
dcd_branch_pc <= instruction_pc+{ {(AW-15){instruction[14]}}, instruction[14:0] } + {{(AW-1){1'b0}},1'b1};
else // if (instruction[28]) // 4'h3 = LDI
dcd_branch_pc <= instruction_pc+{ instruction[23:0] } + {{(AW-1){1'b0}},1'b1};
end
end else begin
always @(posedge i_clk)
if (dcd_ce)
begin
if (instruction[31]) // Add
begin
dcd_branch_pc <= instruction_pc
+ { {(AW-20){instruction[19]}}, instruction[19:0] }
+ {{(AW-1){1'b0}},1'b1};
end else if (~instruction[28]) // 4'h2 = MOV
begin
dcd_branch_pc <= instruction_pc+{ {(AW-15){instruction[14]}}, instruction[14:0] } + {{(AW-1){1'b0}},1'b1};
end else // if (instruction[28]) // 4'h3 = LDI
begin
dcd_branch_pc <= instruction_pc+{ {(AW-24){instruction[23]}}, instruction[23:0] } + {{(AW-1){1'b0}},1'b1};
end
end
end endgenerate
`else // OPT_EARLY_BRANCHING
assign dcd_early_branch_stb = 1'b0;
assign dcd_early_branch = 1'b0;
495,7 → 497,8
always @(posedge i_clk)
if (dcd_ce)
begin
dcd_pc <= instruction_pc+1;
dcd_pc <= instruction_pc
+{{(AW-1){1'b0}},1'b1}; // i.e. dcd_pc+1
 
// Record what operation we are doing
dcdOp <= instruction[31:28];
535,7 → 538,9
dcdA_rd <= 1'b0;
dcdB_rd <= 1'b1;
r_dcdI <= { {(9){instruction[14]}}, instruction[14:0] };
`ifdef OPT_SINGLE_CYCLE
dcd_zI <= (instruction[14:0] == 0);
`endif
dcdF_wr <= 1'b0; // Don't write flags
end
4'h3: begin // Load immediate
543,7 → 548,9
dcdA_rd <= 1'b0;
dcdB_rd <= 1'b0;
r_dcdI <= { instruction[23:0] };
`ifdef OPT_SINGLE_CYCLE
dcd_zI <= (instruction[23:0] == 0);
`endif
dcdF_wr <= 1'b0; // Don't write flags
dcdF <= 4'h8; // This is unconditional
dcdOp <= 4'h2;
559,7 → 566,9
dcdF_wr <= (instruction[27:25] != 3'h7);
`endif
r_dcdI <= { 8'h00, instruction[15:0] };
`ifdef OPT_SINGLE_CYCLE
dcd_zI <= (instruction[15:0] == 0);
`endif
if (instruction[27:24] == 4'he)
begin
// NOOP instruction
584,12 → 593,14
end else begin
// Actual multiply instruction
r_dcdI <= { 8'h00, instruction[15:0] };
`ifdef OPT_SINGLE_CYCLE
dcd_zI <= (instruction[15:0] == 0);
`endif
dcdA_rd <= 1'b1;
dcdB_rd <= (instruction[19:16] != 4'hf);
dcdOp[3:0] <= (instruction[20])? 4'h4:4'h3;
end end
4'b011?: begin // Load/Store
4'b011?: begin // LOD/STO or Load/Store
dcdF_wr <= 1'b0; // Don't write flags
dcdA_wr <= (~instruction[28]); // Write on loads
dcdA_rd <= (instruction[28]); // Read on stores
597,10 → 608,14
if (instruction[20])
begin
r_dcdI <= { {(8){instruction[15]}}, instruction[15:0] };
`ifdef OPT_SINGLE_CYCLE
dcd_zI <= (instruction[15:0] == 0);
`endif
end else begin
r_dcdI <= { {(4){instruction[19]}}, instruction[19:0] };
`ifdef OPT_SINGLE_CYCLE
dcd_zI <= (instruction[19:0] == 0);
`endif
end
dcdM <= 1'b1; // Memory operation
`ifdef OPT_PRECLEAR_BUS
614,10 → 629,14
if (instruction[20])
begin
r_dcdI <= { {(8){instruction[15]}}, instruction[15:0] };
`ifdef OPT_SINGLE_CYCLE
dcd_zI <= (instruction[15:0] == 0);
`endif
end else begin
r_dcdI <= { {(4){instruction[19]}}, instruction[19:0] };
`ifdef OPT_SINGLE_CYCLE
dcd_zI <= (instruction[19:0] == 0);
`endif
end end
endcase
 
647,6 → 666,7
op_pipe <= (dcdvalid)&&(opvalid_mem)&&(dcdM) // Both mem
&&(dcdOp[0]==opn[0]) // Both Rd, or both Wr
&&(dcdB == op_B) // Same address register
&&(dcdF[2:0] == opF_cp) // Same condition
&&((r_dcdI == r_opI)||(r_dcdI==r_opI+24'h1));
always @(posedge i_clk)
if (op_ce) // &&(dcdvalid))
663,39 → 683,59
//
assign w_opA = regset[dcdA];
assign w_opB = regset[dcdB];
 
wire [31:0] w_pcA_v;
generate
if (AW < 32)
assign w_pcA_v = {{(32-AW){1'b0}}, (dcdA[4] == dcd_gie)?dcd_pc:upc };
else
assign w_pcA_v = (dcdA[4] == dcd_gie)?dcd_pc:upc;
endgenerate
always @(posedge i_clk)
if (op_ce) // &&(dcdvalid))
begin
if ((wr_reg_ce)&&(wr_reg_id == dcdA))
r_opA <= wr_reg_vl;
else if ((dcdA_pc)&&(dcdA[4] == dcd_gie))
r_opA <= { {(32-AW){1'b0}}, dcd_pc };
else if (dcdA_pc)
r_opA <= { {(32-AW){1'b0}}, upc };
r_opA <= w_pcA_v;
else if (dcdA_cc)
r_opA <= { w_opA[31:11], (dcd_gie)?w_uflags:w_iflags };
else
r_opA <= w_opA;
`ifdef OPT_SINGLE_CYCLE
end else if (opvalid)
begin // We were going to pick these up when they became valid,
// but for some reason we're stuck here as they became
// valid. Pick them up now anyway
if ((opA_alu)||((opA_mem)&&(mem_valid)))
if (((opA_alu)&&(alu_valid)&&(alu_wr))||((opA_mem)&&(mem_valid)))
r_opA <= wr_reg_vl;
`endif
end
wire [31:0] dcdI, w_opBnI;
 
wire [31:0] dcdI, w_opBnI, w_pcB_v;
assign dcdI = { {(8){r_dcdI[23]}}, r_dcdI };
generate
if (AW < 32)
assign w_pcB_v = {{(32-AW){1'b0}}, (dcdB[4] == dcd_gie)?dcd_pc:upc };
else
assign w_pcB_v = (dcdB[4] == dcd_gie)?dcd_pc:upc;
endgenerate
 
assign w_opBnI = (~dcdB_rd) ? 32'h00
: (((wr_reg_ce)&&(wr_reg_id == dcdB)) ? wr_reg_vl
: (((dcdB_pc)&&(dcdB[4] == dcd_gie)) ? {{(32-AW){1'b0}},dcd_pc }
: ((dcdB_pc) ? {{(32-AW){1'b0}},upc}
: ((dcdB_cc) ? { w_opB[31:11], (dcd_gie)?w_uflags:w_iflags}
: regset[dcdB]))));
: (((wr_reg_ce)&&(wr_reg_id == dcdB)) ? wr_reg_vl
: ((dcdB_pc) ? w_pcB_v
: ((dcdB_cc) ? { w_opB[31:11], (dcd_gie)?w_uflags:w_iflags}
: w_opB)));
 
always @(posedge i_clk)
if (op_ce) // &&(dcdvalid))
r_opB <= w_opBnI + dcdI;
else if ((opvalid)&&((opB_alu)||((opB_mem)&&(mem_valid))))
`ifdef OPT_SINGLE_CYCLE
else if ((opvalid)&&(
((opB_alu)&&(alu_valid)&&(alu_wr))
||((opB_mem)&&(mem_valid))))
r_opB <= wr_reg_vl;
`endif
 
// The logic here has become more complex than it should be, no thanks
// to Xilinx's Vivado trying to help. The conditions are supposed to
710,17 → 750,20
if (op_ce)
begin // Set the flag condition codes, bit order is [3:0]=VNCZ
case(dcdF[2:0])
3'h0: r_opF <= 7'h80; // Always
3'h1: r_opF <= 7'h11; // Z
3'h2: r_opF <= 7'h10; // NE
3'h3: r_opF <= 7'h20; // GE (!N)
3'h4: r_opF <= 7'h30; // GT (!N&!Z)
3'h5: r_opF <= 7'h24; // LT
3'h6: r_opF <= 7'h02; // C
3'h7: r_opF <= 7'h08; // V
3'h0: r_opF <= 6'h00; // Always
3'h1: r_opF <= 6'h11; // Z
3'h2: r_opF <= 6'h10; // NE
3'h3: r_opF <= 6'h20; // GE (!N)
3'h4: r_opF <= 6'h30; // GT (!N&!Z)
3'h5: r_opF <= 6'h24; // LT
3'h6: r_opF <= 6'h02; // C
3'h7: r_opF <= 6'h08; // V
endcase
end // Bit order is { (flags_not_used), VNCZ mask, VNCZ value }
assign opF = { r_opF[6], r_opF[3], r_opF[5], r_opF[1], r_opF[4:0] };
assign opF = { r_opF[3], r_opF[5], r_opF[1], r_opF[4:0] };
always @(posedge i_clk)
if (op_ce)
opF_cp[2:0] <= dcdF[2:0];
 
initial opvalid = 1'b0;
initial opvalid_alu = 1'b0;
798,13 → 841,6
// User level (1), vs supervisor (0)/interrupts disabled
op_gie <= dcd_gie;
 
// We're not done with these yet--we still need them
// for the unclocked assign. We need the unclocked
// assign so that there's no wait state between an
// ALU or memory result and the next register that may
// use that value.
opA_rd <= dcdA_rd;
opB_rd <= dcdB_rd;
//
`ifdef OPT_EARLY_BRANCHING
op_wr_pc <= ((dcdA_wr)&&(dcdA_pc)&&(dcdA[4] == dcd_gie))&&(~dcd_early_branch);
832,37 → 868,48
// We'll create a flag here to start our coordination. Once we
// define this flag to something other than just plain zero, then
// the stalls will already be in place.
reg opA_alu, opA_mem;
`ifdef OPT_SINGLE_CYCLE
initial opA_alu = 1'b0;
always @(posedge i_clk)
if (op_ce)
opA_alu <= (opvalid_alu)&&(opR == dcdA)&&(opR_wr)&&(dcdA_rd);
else if ((opvalid)&&(opA_alu)&&(alu_valid))
opA_alu <= 1'b0;
initial opA_mem = 1'b0;
always @(posedge i_clk)
if (op_ce)
opA_mem <= ((opvalid_mem)&&(opR == dcdA)&&(dcdA_rd))
opA_mem <= ((opvalid_mem)&&(opR == dcdA)&&(dcdA_rd)&&(~opn[0]))
||((~opvalid)&&(mem_busy)&&(~mem_we)
&&(mem_last_reg == dcdA)&&(dcdA_rd));
else if ((opvalid)&&(opA_mem)&&(mem_valid))
opA_mem <= 1'b0;
`endif
 
always @(posedge i_clk)
if (mem_ce)
mem_last_reg <= opR;
assign opA = (opA_alu) ? alu_result
`ifdef OPT_SINGLE_CYCLE
assign opA = ((opA_alu)&&(alu_valid)&&(alu_wr)) ? alu_result
: ( ((opA_mem)&&(mem_valid))?mem_result
: r_opA );
`else
assign opA = r_opA;
`endif
 
assign dcdA_stall = (dcdvalid)&&(dcdA_rd)&&(
`ifdef OPT_SINGLE_CYCLE
// Skip the requirement on writing back opA
// Stall on memory, since we'll always need to stall for a
// memory access anyway
// ((opvalid_mem)&&(opR_wr)&&(opR == dcdA))
((opvalid_alu)&&(opF_wr)&&(dcdA_cc)));
// Place stalls for this latter case into the ops stage
// ||((mem_busy)&&(~mem_we));
`else
((opvalid)&&(opR_wr)&&(opR == dcdA))
||((opvalid_alu)&&(opF_wr)&&(dcdA_cc))
||((mem_rdbusy)&&(mem_last_reg == dcdA))
);
`endif
 
reg opB_alu, opB_mem;
`ifdef OPT_SINGLE_CYCLE
always @(posedge i_clk)
if (op_ce)
opB_alu <= (opvalid_alu)&&(opR == dcdB)&&(opR_wr)&&(dcdB_rd)&&(dcd_zI);
869,15 → 916,20
always @(posedge i_clk)
if (op_ce)
opB_mem <= (dcd_zI)&&(dcdB_rd)&&(
((opvalid_mem)&&(opR == dcdB))
((opvalid_mem)&&(opR == dcdB)&&(~opn[0]))
||((~opvalid)&&(mem_busy)&&(~mem_we)
&&(mem_last_reg == dcdB)));
else if ((opvalid)&&(opB_mem)&&(mem_valid))
opB_mem <= 1'b0;
assign opB = (opB_alu) ? alu_result
assign opB = ((opB_alu)&&(alu_valid)&&(alu_wr)) ? alu_result
: ( ((opB_mem)&&(mem_valid))?mem_result
: r_opB );
`else
assign opB = r_opB;
`endif
 
assign dcdB_stall = (dcdvalid)&&(dcdB_rd)&&(
`ifdef OPT_SINGLE_CYCLE
// Stall on memory ops writing to my register
// (i.e. loads), or on any write to my
// register if I have an immediate offset
897,6 → 949,12
// Stall on any ongoing memory operation that
// will write to opB
||((mem_busy)&&(~mem_we)&&(mem_last_reg==dcdB)));
`else
((opvalid)&&(opR_wr)&&(opR == dcdB))
||((opvalid_alu)&&(opF_wr)&&(dcdB_cc))
||((mem_rdbusy)&&(mem_last_reg == dcdB))
);
`endif
assign dcdF_stall = (dcdvalid)&&((~dcdF[3])||(dcdA_cc)||(dcdB_cc))
&&(opvalid)&&(opR_cc);
//
904,9 → 962,9
// PIPELINE STAGE #4 :: Apply Instruction
//
//
cpuops doalu(i_clk, i_rst, alu_ce,
cpuops #(IMPLEMENT_MPY) doalu(i_clk, i_rst, alu_ce,
(opvalid_alu), opn, opA, opB,
alu_result, alu_flags, alu_valid);
alu_result, alu_flags, alu_valid, alu_illegal_op);
 
assign set_cond = ((opF[7:4]&opFl[3:0])==opF[3:0]);
initial alF_wr = 1'b0;
934,9 → 992,12
if ((alu_ce)||(mem_ce))
alu_pc <= op_pc;
`ifdef OPT_ILLEGAL_INSTRUCTION
reg r_alu_illegal;
initial r_alu_illegal = 0;
always @(posedge i_clk)
if ((alu_ce)||(mem_ce))
alu_illegal <= op_illegal;
r_alu_illegal <= op_illegal;
assign alu_illegal = (alu_illegal_op)||(r_alu_illegal);
`endif
 
initial alu_pc_valid = 1'b0;
1025,8 → 1086,8
// includes the set condition ...
assign wr_flags_ce = (alF_wr)&&(alu_valid)&&(~clear_pipeline)&&(~alu_illegal);
`ifdef OPT_ILLEGAL_INSTRUCTION
assign w_uflags = { bus_err_flag, trap, ill_err, 1'b0, step, 1'b1, sleep, ((wr_flags_ce)&&(alu_gie))?alu_flags:flags };
assign w_iflags = { bus_err_flag, trap, ill_err, break_en, 1'b0, 1'b0, sleep, ((wr_flags_ce)&&(~alu_gie))?alu_flags:iflags };
assign w_uflags = { bus_err_flag, trap, ill_err, 1'b0, step, 1'b1, sleep, ((wr_flags_ce)&&(alu_gie))?alu_flags:flags };
assign w_iflags = { bus_err_flag, trap, ill_err,break_en, 1'b0, 1'b0, sleep, ((wr_flags_ce)&&(~alu_gie))?alu_flags:iflags };
`else
assign w_uflags = { bus_err_flag, trap, ill_err, 1'b0, step, 1'b1, sleep, ((wr_flags_ce)&&(alu_gie))?alu_flags:flags };
assign w_iflags = { bus_err_flag, trap, ill_err, break_en, 1'b0, 1'b0, sleep, ((wr_flags_ce)&&(~alu_gie))?alu_flags:iflags };
1237,7 → 1298,7
&&(i_dbg_reg[4:0] == { gie, `CPU_PC_REG}))
pf_pc <= i_dbg_data[(AW-1):0];
else if (dcd_ce)
pf_pc <= pf_pc + 1;
pf_pc <= pf_pc + {{(AW-1){1'b0}},1'b1};
 
initial new_pc = 1'b1;
always @(posedge i_clk)
1257,16 → 1318,36
 
//
// The debug interface
always @(posedge i_clk)
generate
if (AW<32)
begin
always @(posedge i_clk)
begin
o_dbg_reg <= regset[i_dbg_reg];
if (i_dbg_reg[3:0] == `CPU_PC_REG)
o_dbg_reg <= {{(32-AW){1'b0}},(i_dbg_reg[4])?upc:ipc};
else if (i_dbg_reg[3:0] == `CPU_CC_REG)
begin
o_dbg_reg[10:0] <= (i_dbg_reg[4])?w_uflags:w_iflags;
o_dbg_reg[`CPU_GIE_BIT] <= gie;
end
end
end else begin
always @(posedge i_clk)
begin
o_dbg_reg <= regset[i_dbg_reg];
if (i_dbg_reg[3:0] == `CPU_PC_REG)
o_dbg_reg <= (i_dbg_reg[4])?upc:ipc;
else if (i_dbg_reg[3:0] == `CPU_CC_REG)
begin
o_dbg_reg[10:0] <= (i_dbg_reg[4])?w_uflags:w_iflags;
o_dbg_reg[`CPU_GIE_BIT] <= gie;
end
end
end endgenerate
 
always @(posedge i_clk)
o_dbg_cc <= { gie, sleep };
o_dbg_cc <= { o_break, bus_err, gie, sleep };
 
always @(posedge i_clk)
o_dbg_stall <= (i_halt)&&(
1283,4 → 1364,14
assign o_op_stall = (master_ce)&&((~opvalid)||(op_stall));
assign o_pf_stall = (master_ce)&&(~pf_valid);
assign o_i_count = (alu_pc_valid)&&(~clear_pipeline);
 
always @(posedge i_clk)
o_debug <= {
pf_pc[7:0],
pf_valid, dcdvalid, opvalid, alu_valid, mem_valid,
op_ce, alu_ce, mem_ce,
opA[23:20], opA[3:0],
wr_reg_vl[7:0]
};
endmodule

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.