OpenCores
URL https://opencores.org/ocsvn/zipcpu/zipcpu/trunk

Subversion Repositories zipcpu

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /zipcpu
    from Rev 204 to Rev 205
    Reverse comparison

Rev 204 → Rev 205

/trunk/rtl/core/cpuops.v
128,8 → 128,8
assign mpy_result = r_mpy_a_input * r_mpy_b_input;
assign mpybusy = 1'b0;
 
reg mpypipe;
initial mpypipe = 1'b0;
reg mpypipe;
always @(posedge i_clk)
if (i_rst)
mpypipe <= 1'b0;
205,6 → 205,7
reg [2:0] mpypipe;
 
// First clock, latch in the inputs
initial mpypipe = 3'b0;
always @(posedge i_clk)
begin
// mpypipe indicates we have a multiply in the
/trunk/rtl/core/dblfetch.v
0,0 → 1,232
////////////////////////////////////////////////////////////////////////////////
//
// Filename: dblfetch.v
//
// Project: Zip CPU -- a small, lightweight, RISC CPU soft core
//
// Purpose: This is one step beyond the simplest instruction fetch,
// prefetch.v. dblfetch.v uses memory pipelining to fetch two
// instruction words in one cycle, figuring that the unpipelined CPU can't
// go through both at once, but yet recycles itself fast enough for the
// next instruction that would follow. It is designed to be a touch
// faster than the single instruction prefetch, although not as fast as
// the prefetch and cache found elsewhere.
//
// There are some gotcha's in this logic, however. For example, it's
// illegal to switch devices mid-transaction, since the second device
// might have different timing. I.e. the first device might take 8
// clocks to create an ACK, and the second device might take 2 clocks, the
// acks might therefore come on top of each other, or even out of order.
// But ... in order to keep logic down, we keep track of the PC in the
// o_wb_addr register. Hence, this register gets changed on any i_new_pc.
// The i_pc value associated with i_new_pc will only be valid for one
// clock, hence we can't wait to change. To keep from violating the WB
// rule, therefore, we *must* immediately stop requesting any transaction,
// and then terminate the bus request as soon as possible.
//
// This has consequences in terms of logic used, leaving this routine
// anything but simple--even though the number of wires affected by
// this is small (o_wb_cyc, o_wb_stb, and last_ack).
//
//
// Creator: Dan Gisselquist, Ph.D.
// Gisselquist Technology, LLC
//
////////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2017, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
// by the Free Software Foundation, either version 3 of the License, or (at
// your option) any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with this program. (It's in the $(ROOT)/doc directory. Run make with no
// target there if the PDF file isn't present.) If not, see
// <http://www.gnu.org/licenses/> for a copy.
//
// License: GPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/gpl.html
//
//
////////////////////////////////////////////////////////////////////////////////
//
//
module dblfetch(i_clk, i_rst, i_new_pc, i_clear_cache,
i_stall_n, i_pc, o_i, o_pc, o_v,
o_wb_cyc, o_wb_stb, o_wb_we, o_wb_addr, o_wb_data,
i_wb_ack, i_wb_stall, i_wb_err, i_wb_data,
o_illegal);
parameter ADDRESS_WIDTH=32, AUX_WIDTH = 1;
localparam AW=ADDRESS_WIDTH;
input i_clk, i_rst, i_new_pc, i_clear_cache,
i_stall_n;
input [(AW-1):0] i_pc;
output reg [31:0] o_i;
output reg [(AW-1):0] o_pc;
output wire o_v;
// Wishbone outputs
output reg o_wb_cyc, o_wb_stb;
output wire o_wb_we;
output reg [(AW-1):0] o_wb_addr;
output wire [31:0] o_wb_data;
// And return inputs
input i_wb_ack, i_wb_stall, i_wb_err;
input [31:0] i_wb_data;
// And ... the result if we got an error
output reg o_illegal;
 
assign o_wb_we = 1'b0;
assign o_wb_data = 32'h0000;
 
reg last_ack, last_stb, invalid_bus_cycle;
 
reg [31:0] cache [0:1];
reg cache_read_addr, cache_write_addr;
reg [1:0] cache_valid;
 
initial o_wb_cyc = 1'b0;
initial o_wb_stb = 1'b0;
always @(posedge i_clk)
if ((i_rst)||(i_wb_err))
begin
o_wb_cyc <= 1'b0;
o_wb_stb <= 1'b0;
// last_stb <= 1'b0;
// last_ack <= 1'b0;
end else if (o_wb_cyc)
begin
if ((o_wb_stb)&&(!i_wb_stall))
begin
// last_stb <= 1'b1;
o_wb_stb <= !last_stb;
end
// if (i_wb_ack)
// last_ack <= 1'b1;
if ((i_new_pc)||(invalid_bus_cycle))
o_wb_stb <= 1'b0;
 
if ((i_wb_ack)&&(
// Relase the bus on the second ack
(last_ack)
// Or on the first ACK, if we've been told
// we have an invalid bus cycle
||((o_wb_stb)&&(i_wb_stall)&&(last_stb)&&(
(i_new_pc)||(invalid_bus_cycle)))
))
begin
o_wb_cyc <= 1'b0;
o_wb_stb <= 1'b0;
end
 
if ((!last_stb)&&(i_wb_stall)&&((i_new_pc)||(invalid_bus_cycle)))
// Also release the bus with no acks, if we
// haven't made any requests
begin
o_wb_cyc <= 1'b0;
o_wb_stb <= 1'b0;
end
end else if ((invalid_bus_cycle)
||((o_v)&&(i_stall_n)&&(cache_read_addr))) // Initiate a bus cycle
begin
o_wb_cyc <= 1'b1;
o_wb_stb <= 1'b1;
// last_stb <= 1'b0;
// last_ack <= 1'b0;
end
 
initial last_stb = 1'b0;
always @(posedge i_clk)
if ((o_wb_cyc)&&(o_wb_stb)&&(!i_wb_stall))
last_stb <= 1'b1;
else if (!o_wb_cyc)
last_stb <= 1'b0;
 
initial last_ack = 1'b0;
always @(posedge i_clk)
if ((o_wb_cyc)&&(i_wb_ack))
last_ack <= 1'b1;
else if ((o_wb_cyc)&&(o_wb_stb)&&(i_wb_stall)&&(
(i_new_pc)||(invalid_bus_cycle)))
last_ack <= 1'b1;
else if ((o_wb_cyc)&&(o_wb_stb)&&(!i_wb_stall)&&(!last_stb)&&(
(i_new_pc)||(invalid_bus_cycle)))
last_ack <= 1'b1;
else if (!o_wb_cyc)
last_ack <= 1'b0;
 
initial invalid_bus_cycle = 1'b0;
always @(posedge i_clk)
if (i_rst)
invalid_bus_cycle <= 1'b0;
else if ((i_new_pc)||(i_clear_cache))
invalid_bus_cycle <= 1'b1;
else if (!o_wb_cyc)
invalid_bus_cycle <= 1'b0;
 
initial o_wb_addr = {(AW){1'b1}};
always @(posedge i_clk)
if (i_new_pc)
o_wb_addr <= i_pc;
else if ((o_wb_stb)&&(!i_wb_stall)&&(!invalid_bus_cycle))
o_wb_addr <= o_wb_addr + 1'b1;
 
initial cache_write_addr = 1'b0;
always @(posedge i_clk)
if (!o_wb_cyc)
cache_write_addr <= 1'b0;
else if ((o_wb_cyc)&&(i_wb_ack))
cache_write_addr <= cache_write_addr + 1'b1;
 
always @(posedge i_clk)
if ((o_wb_cyc)&&(i_wb_ack))
cache[cache_write_addr] <= i_wb_data;
 
initial cache_read_addr = 1'b0;
always @(posedge i_clk)
if ((i_new_pc)||(invalid_bus_cycle)
||((o_v)&&(cache_read_addr)&&(i_stall_n)))
cache_read_addr <= 1'b0;
else if ((o_v)&&(i_stall_n))
cache_read_addr <= 1'b1;
 
always @(posedge i_clk)
if ((i_new_pc)||(invalid_bus_cycle))
cache_valid <= 2'b00;
else begin
if ((o_v)&&(i_stall_n))
cache_valid[cache_read_addr] <= 1'b0;
if ((o_wb_cyc)&&(i_wb_ack))
cache_valid[cache_write_addr] <= 1'b1;
end
 
initial o_i = {(32){1'b1}};
always @(posedge i_clk)
if ((i_stall_n)&&(o_wb_cyc)&&(i_wb_ack))
o_i <= i_wb_data;
else
o_i <= cache[cache_read_addr];
 
initial o_pc = 0;
always @(posedge i_clk)
if (i_new_pc)
o_pc <= i_pc;
else if ((o_v)&&(i_stall_n))
o_pc <= o_pc + 1'b1;
 
assign o_v = cache_valid[cache_read_addr];
 
initial o_illegal = 1'b0;
always @(posedge i_clk)
if ((o_wb_cyc)&&(i_wb_err))
o_illegal <= 1'b1;
else if ((!o_wb_cyc)&&((i_new_pc)||(invalid_bus_cycle)))
o_illegal <= 1'b0;
 
endmodule
/trunk/rtl/core/div.v
34,7 → 34,7
//
//
// At this point, the divide is has started. The divide works by walking
// through every shift of the
// through every shift of the
//
// DIVIDEND over the
// DIVISOR
50,7 → 50,7
// DIVIDEND
// DIVISOR
//
// At this point, if the DIVISOR is less than the dividend, the
// At this point, if the DIVISOR is less than the dividend, the
// divisor is subtracted from the dividend, and the DIVISOR is again
// shifted to the right. Further, a '1' bit gets set in the output
// quotient.
62,7 → 62,7
//
// On the clock when we are done, o_busy is set to false, and o_valid set
// to true. (It is a violation of the ZipCPU internal protocol for both
// busy and valid to ever be true on the same clock. It is also a
// busy and valid to ever be true on the same clock. It is also a
// violation for busy to be false with valid true thereafter.)
//
//
121,12 → 121,13
 
reg r_sign, pre_sign, r_z, r_c, last_bit;
reg [(LGBW-1):0] r_bit;
 
reg zero_divisor;
initial zero_divisor = 1'b0;
always @(posedge i_clk)
zero_divisor <= (r_divisor == 0)&&(r_busy);
 
// The Divide logic begins with r_busy. We use r_busy to determine
// whether or not the divide is in progress, vs being complete.
// Here, we clear r_busy on any reset and set it on i_wr (the request
// do to a divide). The divide ends when we are on the last bit,
// or equivalently when we discover we are dividing by zero.
initial r_busy = 1'b0;
always @(posedge i_clk)
if (i_rst)
136,6 → 137,12
else if ((last_bit)||(zero_divisor))
r_busy <= 1'b0;
 
// o_busy is very similar to r_busy, save for some key differences.
// Primary among them is that o_busy needs to (possibly) be true
// for an extra clock after r_busy clears. This would be that extra
// clock where we negate the result (assuming a signed divide, and that
// the result is supposed to be negative.) Otherwise, the two are
// identical.
initial o_busy = 1'b0;
always @(posedge i_clk)
if (i_rst)
147,19 → 154,50
else if (~r_busy)
o_busy <= 1'b0;
 
// If we are asked to divide by zero, we need to halt. The sooner
// we halt and report the error, the better. Hence, here we look
// for a zero divisor while being busy. The always above us will then
// look at this and halt a divide in the middle if we are trying to
// divide by zero.
//
// Note that this works off of the 2BW-1 length vector. If we can
// simplify that, it should simplify our logic as well.
initial zero_divisor = 1'b0;
always @(posedge i_clk)
if ((i_rst)||(i_wr))
// zero_divisor <= (r_divisor == 0)&&(r_busy);
if (i_rst)
zero_divisor <= 1'b0;
else if (i_wr)
zero_divisor <= (i_denominator == 0);
else if (!r_busy)
zero_divisor <= 1'b0;
 
// o_valid is part of the ZipCPU protocol. It will be set to true
// anytime our answer is valid and may be used by the calling module.
// Indeed, the ZipCPU will halt (and ignore us) once the i_wr has been
// set until o_valid gets set.
//
// Here, we clear o_valid on a reset, and any time we are on the last
// bit while busy (provided the sign is zero, or we are dividing by
// zero). Since o_valid is self-clearing, we don't need to clear
// it on an i_wr signal.
initial o_valid = 1'b0;
always @(posedge i_clk)
if (i_rst)
o_valid <= 1'b0;
else if (r_busy)
begin
if ((last_bit)||(zero_divisor))
o_valid <= (zero_divisor)||(~r_sign);
o_valid <= (zero_divisor)||(!r_sign);
end else if (r_sign)
begin
o_valid <= (~zero_divisor); // 1'b1;
o_valid <= (!zero_divisor); // 1'b1;
end else
o_valid <= 1'b0;
 
// Division by zero error reporting. Anytime we detect a zero divisor,
// we set our output error, and then hold it until we are valid and
// everything clears.
initial o_err = 1'b0;
always @(posedge i_clk)
if((i_rst)||(o_valid))
169,91 → 207,145
else
o_err <= 1'b0;
 
// r_bit
//
// Keep track of which "bit" of our divide we are on. This number
// ranges from 31 down to zero. On any write, we set ourselves to
// 5'h1f. Otherwise, while we are busy (but not within the pre-sign
// adjustment stage), we subtract one from our value on every clock.
always @(posedge i_clk)
if ((r_busy)&&(!pre_sign))
r_bit <= r_bit + {(LGBW){1'b1}};
else
r_bit <= {(LGBW){1'b1}};
 
// last_bit
//
// This logic replaces a lot of logic that was inside our giant state
// machine with ... something simpler. In particular, we'll use this
// logic to determine we are processing our last bit. The only trick
// is, this bit needs to be set whenever (r_busy) and (r_bit == 0),
// hence we need to set on (r_busy) and (r_bit == 1) so as to be set
// when (r_bit == 0).
initial last_bit = 1'b0;
always @(posedge i_clk)
if ((i_wr)||(pre_sign)||(i_rst))
if (r_busy)
last_bit <= (r_bit == {{(LGBW-1){1'b0}},1'b1});
else
last_bit <= 1'b0;
else if (r_busy)
last_bit <= (r_bit == {{(LGBW-1){1'b0}},1'b1});
 
// pre_sign
//
// This is part of the state machine. pre_sign indicates that we need
// a extra clock to take the absolute value of our inputs. It need only
// be true for the one clock, and then it must clear itself.
initial pre_sign = 1'b0;
always @(posedge i_clk)
// if (i_rst) r_busy <= 1'b0;
// else
if (i_wr)
begin
//
// Set our values upon an initial command. Here's
// where we come in and start.
//
// r_busy <= 1'b1;
//
o_quotient <= 0;
r_bit <= {(LGBW){1'b1}};
r_divisor <= { i_denominator, {(BW-1){1'b0}} };
r_dividend <= i_numerator;
r_sign <= 1'b0;
pre_sign <= i_signed;
else
pre_sign <= 1'b0;
 
// As a result of our operation, we need to set the flags. The most
// difficult of these is the "Z" flag indicating that the result is
// zero. Here, we'll use the same logic that sets the low-order
// bit to clear our zero flag, and leave the zero flag set in all
// other cases. Well ... not quite. If we need to flip the sign of
// our value, then we can't quite clear the zero flag ... yet.
always @(posedge i_clk)
if((r_busy)&&(r_divisor[(2*BW-2):(BW)] == 0)&&(!diff[BW]))
// If we are busy, the upper bits of our divisor are
// zero (i.e., we got the shift right), and the top
// (carry) bit of the difference is zero (no overflow),
// then we could subtract our divisor from our dividend
// and hence we add a '1' to the quotient, while setting
// the zero flag to false.
r_z <= 1'b0;
else if ((!r_busy)&&(!r_sign))
r_z <= 1'b1;
end else if (pre_sign)
 
// r_dividend
// This is initially the numerator. On a signed divide, it then becomes
// the absolute value of the numerator. We'll subtract from this value
// the divisor shifted as appropriate for every output bit we are
// looking for--just as with traditional long division.
always @(posedge i_clk)
if (pre_sign)
begin
//
// Note that we only come in here, for one clock, if
// our initial value may have been signed. If we are
// doing an unsigned divide, we then skip this step.
//
r_sign <= ((r_divisor[(2*BW-2)])^(r_dividend[(BW-1)]));
// Negate our dividend if necessary so that it becomes
// a magnitude only value
// If we are doing a signed divide, then take the
// absolute value of the dividend
if (r_dividend[BW-1])
r_dividend <= -r_dividend;
// Do the same with the divisor--rendering it into
// a magnitude only.
// The begin/end block is important so we don't lose
// the fact that on an else we don't do anything.
end else if((r_busy)&&(r_divisor[(2*BW-2):(BW)]==0)&&(!diff[BW]))
// This is the condition whereby we set a '1' in our
// output quotient, and we subtract the (current)
// divisor from our dividend. (The difference is
// already kept in the diff vector above.)
r_dividend <= diff[(BW-1):0];
else if (!r_busy)
// Once we are done, and r_busy is no longer high, we'll
// always accept new values into our dividend. This
// guarantees that, when i_wr is set, the new value
// is already set as desired.
r_dividend <= i_numerator;
 
initial r_divisor = 0;
always @(posedge i_clk)
if (pre_sign)
begin
if (r_divisor[(2*BW-2)])
r_divisor[(2*BW-2):(BW-1)] <= -r_divisor[(2*BW-2):(BW-1)];
//
// We only do this stage for a single clock, so go on
// with the rest of the divide otherwise.
pre_sign <= 1'b0;
r_divisor[(2*BW-2):(BW-1)]
<= -r_divisor[(2*BW-2):(BW-1)];
end else if (r_busy)
r_divisor <= { 1'b0, r_divisor[(2*BW-2):1] };
else
r_divisor <= { i_denominator, {(BW-1){1'b0}} };
 
// r_sign
// is a flag for our state machine control(s). r_sign will be set to
// true any time we are doing a signed divide and the result must be
// negative. In that case, we take a final logic stage at the end of
// the divide to negate the output. This flag is what tells us we need
// to do that. r_busy will be true during the divide, then when r_busy
// goes low, r_sign will be checked, then the idle/reset stage will have
// been reached. For this reason, we cannot set r_sign unless we are
// up to something.
initial r_sign = 1'b0;
always @(posedge i_clk)
if (pre_sign)
r_sign <= ((r_divisor[(2*BW-2)])^(r_dividend[(BW-1)]));
else if (r_busy)
r_sign <= (r_sign)&&(!zero_divisor);
else
r_sign <= 1'b0;
 
always @(posedge i_clk)
if (r_busy)
begin
// While the divide is taking place, we examine each bit
// in turn here.
//
r_bit <= r_bit + {(LGBW){1'b1}}; // r_bit = r_bit - 1;
r_divisor <= { 1'b0, r_divisor[(2*BW-2):1] };
if (|r_divisor[(2*BW-2):(BW)])
o_quotient <= { o_quotient[(BW-2):0], 1'b0 };
if ((r_divisor[(2*BW-2):(BW)] == 0)&&(!diff[BW]))
begin
end else if (diff[BW])
begin
//
// diff = r_dividend - r_divisor[(BW-1):0];
//
// If this value was negative, there wasn't
// enough value in the dividend to support
// pulling off a bit. We'll move down a bit
// therefore and try again.
//
end else begin
//
// Put a '1' into our output accumulator.
// Subtract the divisor from the dividend,
// and then move on to the next bit
//
r_dividend <= diff[(BW-1):0];
o_quotient[r_bit[(LGBW-1):0]] <= 1'b1;
r_z <= 1'b0;
o_quotient[0] <= 1'b1;
end
r_sign <= (r_sign)&&(~zero_divisor);
end else if (r_sign)
begin
r_sign <= 1'b0;
o_quotient <= -o_quotient;
end
else
o_quotient <= 0;
 
// Set Carry on an exact divide
wire w_n;
// Perhaps nothing uses this, but ... well, I suppose we could remove
// this logic eventually, just ... not yet.
always @(posedge i_clk)
r_c <= (r_busy)&&((diff == 0)||(r_dividend == 0));
 
// The last flag: Negative. This flag is set assuming that the result
// of the divide was negative (i.e., the high order bit is set). This
// will also be true of an unsigned divide--if the high order bit is
// ever set upon completion. Indeed, you might argue that there's no
// logic involved.
wire w_n;
assign w_n = o_quotient[(BW-1)];
 
assign o_flags = { 1'b0, w_n, r_c, r_z };
/trunk/rtl/core/idecode.v
55,6 → 55,7
module idecode(i_clk, i_rst, i_ce, i_stalled,
i_instruction, i_gie, i_pc, i_pf_valid,
i_illegal,
o_valid,
o_phase, o_illegal,
o_pc, o_gie,
o_dcdR, o_dcdA, o_dcdB, o_I, o_zI,
72,7 → 73,7
input i_gie;
input [(AW-1):0] i_pc;
input i_pf_valid, i_illegal;
output wire o_phase;
output wire o_valid, o_phase;
output reg o_illegal;
output reg [AW:0] o_pc;
output reg o_gie;
105,13 → 106,14
 
 
wire [4:0] w_op;
wire w_ldi, w_mov, w_cmptst, w_ldilo, w_ALU, w_brev, w_noop;
wire w_ldi, w_mov, w_cmptst, w_ldilo, w_ALU, w_brev,
w_noop, w_lock;
wire [4:0] w_dcdR, w_dcdB, w_dcdA;
wire w_dcdR_pc, w_dcdR_cc;
wire w_dcdA_pc, w_dcdA_cc;
wire w_dcdB_pc, w_dcdB_cc;
wire [3:0] w_cond;
wire w_wF, w_mem, w_sto, w_lod, w_div, w_fpu;
wire w_wF, w_mem, w_sto, w_div, w_fpu;
wire w_wR, w_rA, w_rB, w_wR_n;
wire w_ljmp, w_ljmp_dly, w_cis_ljmp;
wire [31:0] iword;
217,6 → 219,9
// If the result register is either CC or PC, and this would otherwise
// be a floating point instruction with floating point opcode of 0,
// then this is a NOOP.
assign w_lock = (!iword[31])&&(w_op[4:0]==5'h1d)&&(
((IMPLEMENT_FPU>0)&&(w_dcdR[3:1]==3'h7))
||(IMPLEMENT_FPU==0));
assign w_noop = (!iword[31])&&(w_op[4:0] == 5'h1f)&&(
((IMPLEMENT_FPU>0)&&(w_dcdR[3:1] == 3'h7))
||(IMPLEMENT_FPU==0));
252,7 → 257,6
// 1 LUT
assign w_mem = (w_cis_op[4:3] == 2'b10)&&(w_cis_op[2:1] !=2'b00);
assign w_sto = (w_mem)&&( w_cis_op[0]);
assign w_lod = (w_mem)&&(!w_cis_op[0]);
// 1 LUT
assign w_div = (!iword[31])&&(w_op[4:1] == 4'h7);
// 2 LUTs
403,12 → 407,16
if (!o_phase)
o_gie<= i_gie;
 
if ((iword[31])&&(!o_phase))
o_pc <= { i_pc, 1'b1 };
else if ((iword[31])&&(i_pf_valid))
o_pc <= { i_pc, 1'b0 };
else
if (iword[31])
begin
if (o_phase)
o_pc <= o_pc + 1'b1;
else if (i_pf_valid)
o_pc <= { i_pc, 1'b1 };
end else begin
// The normal, non-CIS case
o_pc <= { i_pc + 1'b1, 1'b0 };
end
`else
o_gie<= i_gie;
o_pc <= { i_pc + 1'b1, 1'b0 };
460,9 → 468,7
((IMPLEMENT_FPU>0)&&(w_dcdR[3:1]==3'h7))
||(IMPLEMENT_FPU==0));
`ifdef OPT_PIPELINED
r_lock <= (!iword[31])&&(w_op[4:0]==5'h1d)&&(
((IMPLEMENT_FPU>0)&&(w_dcdR[3:1]==3'h7))
||(IMPLEMENT_FPU==0));
r_lock <= w_lock;
`endif
`ifdef OPT_CIS
r_nxt_half <= { iword[31], iword[14:0] };
588,14 → 594,15
always @(posedge i_clk)
if (i_rst)
r_valid <= 1'b0;
else if ((i_ce)&&(o_ljmp))
else if (i_ce)
r_valid <= ((i_pf_valid)||(o_phase)||(i_illegal))
&&(!o_ljmp)&&(!o_early_branch);
else if (!i_stalled)
r_valid <= 1'b0;
else if ((i_ce)&&(i_pf_valid))
r_valid <= 1'b1;
else if (~i_stalled)
r_valid <= 1'b0;
 
assign o_valid = r_valid;
 
 
assign o_I = { {(32-22){r_I[22]}}, r_I[21:0] };
 
endmodule
/trunk/rtl/core/memops.v
163,10 → 163,10
 
initial o_valid = 1'b0;
always @(posedge i_clk)
o_valid <= ((o_wb_cyc_gbl)||(o_wb_cyc_lcl))&&(i_wb_ack)&&(~o_wb_we);
o_valid <= (!i_rst)&&((o_wb_cyc_gbl)||(o_wb_cyc_lcl))&&(i_wb_ack)&&(~o_wb_we);
initial o_err = 1'b0;
always @(posedge i_clk)
o_err <= ((o_wb_cyc_gbl)||(o_wb_cyc_lcl))&&(i_wb_err);
o_err <= (!i_rst)&&((o_wb_cyc_gbl)||(o_wb_cyc_lcl))&&(i_wb_err);
assign o_busy = (o_wb_cyc_gbl)||(o_wb_cyc_lcl);
 
always @(posedge i_clk)
/trunk/rtl/core/prefetch.v
53,18 → 53,18
// mode which this prefetch does not support. In non--pipelined mode, the
// flash will require (16+6+6)*2 = 56 clocks plus 16 clocks per word read,
// or 72 clocks to fetch one instruction.
module prefetch(i_clk, i_rst, i_ce, i_stalled_n, i_pc, i_aux,
o_i, o_pc, o_aux, o_valid, o_illegal,
module prefetch(i_clk, i_rst, i_new_pc, i_clear_cache, i_stalled_n, i_pc,
o_i, o_pc, o_valid, o_illegal,
o_wb_cyc, o_wb_stb, o_wb_we, o_wb_addr, o_wb_data,
i_wb_ack, i_wb_stall, i_wb_err, i_wb_data);
parameter ADDRESS_WIDTH=32, AUX_WIDTH = 1, AW=ADDRESS_WIDTH;
input i_clk, i_rst, i_ce, i_stalled_n;
parameter ADDRESS_WIDTH=32;
localparam AW=ADDRESS_WIDTH;
input i_clk, i_rst, i_new_pc, i_clear_cache,
i_stalled_n;
input [(AW-1):0] i_pc;
input [(AUX_WIDTH-1):0] i_aux;
output reg [31:0] o_i;
output reg [(AW-1):0] o_pc;
output reg [(AUX_WIDTH-1):0] o_aux;
output reg o_valid, o_illegal;
output wire [(AW-1):0] o_pc;
output reg o_valid;
// Wishbone outputs
output reg o_wb_cyc, o_wb_stb;
output wire o_wb_we;
71,8 → 71,9
output reg [(AW-1):0] o_wb_addr;
output wire [31:0] o_wb_data;
// And return inputs
input i_wb_ack, i_wb_stall, i_wb_err;
input [31:0] i_wb_data;
input i_wb_ack, i_wb_stall, i_wb_err;
input [31:0] i_wb_data;
output reg o_illegal;
 
assign o_wb_we = 1'b0;
assign o_wb_data = 32'h0000;
84,47 → 85,54
initial o_wb_stb = 1'b0;
initial o_wb_addr= 0;
always @(posedge i_clk)
if ((i_rst)||(i_wb_ack))
if ((i_rst)||(i_wb_ack)||(i_wb_err))
begin
o_wb_cyc <= 1'b0;
o_wb_stb <= 1'b0;
end else if ((i_ce)&&(~o_wb_cyc)) // Initiate a bus cycle
begin
end else if ((!o_wb_cyc)&&((i_stalled_n)||(!o_valid)))
begin // Initiate a bus cycle
o_wb_cyc <= 1'b1;
o_wb_stb <= 1'b1;
end else if (o_wb_cyc) // Independent of ce
begin
if ((o_wb_cyc)&&(o_wb_stb)&&(~i_wb_stall))
if (~i_wb_stall)
o_wb_stb <= 1'b0;
if (i_wb_ack)
o_wb_cyc <= 1'b0;
end
 
reg invalid;
initial invalid = 1'b0;
always @(posedge i_clk)
if (i_rst) // Set the address to guarantee the result is invalid
o_wb_addr <= {(AW){1'b1}};
else if ((i_ce)&&(~o_wb_cyc))
if (!o_wb_cyc)
invalid <= 1'b0;
else if ((i_new_pc)||(i_clear_cache))
invalid <= (!o_wb_stb);
 
always @(posedge i_clk)
if (i_new_pc)
o_wb_addr <= i_pc;
else if ((!o_wb_cyc)&&(i_stalled_n)&&(!invalid))
o_wb_addr <= o_wb_addr + 1'b1;
 
always @(posedge i_clk)
if ((o_wb_cyc)&&(i_wb_ack))
o_aux <= i_aux;
always @(posedge i_clk)
if ((o_wb_cyc)&&(i_wb_ack))
o_i <= i_wb_data;
always @(posedge i_clk)
if ((o_wb_cyc)&&(i_wb_ack))
o_pc <= o_wb_addr;
 
initial o_valid = 1'b0;
initial o_illegal = 1'b0;
always @(posedge i_clk)
if ((o_wb_cyc)&&(i_wb_ack))
if (i_rst)
begin
o_valid <= (i_pc == o_wb_addr)&&(~i_wb_err);
o_illegal <= (i_wb_err)&&(i_pc == o_wb_addr);
end else if (i_stalled_n)
o_valid <= 1'b0;
o_illegal <= 1'b0;
end else if ((o_wb_cyc)&&(i_wb_ack))
begin
o_valid <= (!i_wb_err)&&(!invalid);
o_illegal <= ( i_wb_err)&&(!invalid);
end else if ((i_stalled_n)||(i_clear_cache))
begin
o_valid <= 1'b0;
o_illegal <= 1'b0;
end
 
assign o_pc = o_wb_addr;
endmodule
/trunk/rtl/core/zipcpu.v
45,7 → 45,7
// as:
//
//
// assign (n)_ce = (n-1)_valid && (~(n)_stall)
// assign (n)_ce = (n-1)_valid && (!(n)_stall)
//
//
// always @(posedge i_clk)
261,7 → 261,6
dcd_ALU, dcd_M, dcd_DIV, dcd_FP,
dcd_wF, dcd_gie, dcd_break, dcd_lock,
dcd_pipe, dcd_ljmp;
reg r_dcd_valid;
wire dcd_valid;
wire [AW:0] dcd_pc /* verilator public_flat */;
wire [31:0] dcd_I;
298,10 → 297,6
wire [7:0] op_F;
wire op_ce, op_phase, op_pipe, op_change_data_ce;
// Some pipeline control wires
`ifdef OPT_PIPELINED
reg op_A_alu, op_A_mem;
reg op_B_alu, op_B_mem;
`endif
reg op_illegal;
wire op_break;
wire op_lock;
347,8 → 342,8
wire [31:0] div_result;
wire [3:0] div_flags;
 
assign div_ce = (master_ce)&&(~clear_pipeline)&&(op_valid_div)
&&(~mem_rdbusy)&&(~div_busy)&&(~fpu_busy)
assign div_ce = (master_ce)&&(!clear_pipeline)&&(op_valid_div)
&&(!mem_rdbusy)&&(!div_busy)&&(!fpu_busy)
&&(set_cond);
 
wire fpu_ce, fpu_error, fpu_busy, fpu_valid;
355,8 → 350,8
wire [31:0] fpu_result;
wire [3:0] fpu_flags;
 
assign fpu_ce = (master_ce)&&(~clear_pipeline)&&(op_valid_fpu)
&&(~mem_rdbusy)&&(~div_busy)&&(~fpu_busy)
assign fpu_ce = (master_ce)&&(!clear_pipeline)&&(op_valid_fpu)
&&(!mem_rdbusy)&&(!div_busy)&&(!fpu_busy)
&&(set_cond);
 
wire adf_ce_unconditional;
379,7 → 374,7
//
// MASTER: clock enable.
//
assign master_ce = ((~i_halt)||(alu_phase))&&(~o_break)&&(~sleep);
assign master_ce = ((!i_halt)||(alu_phase))&&(!o_break)&&(!sleep);
 
 
//
392,15 → 387,11
//
// PIPELINE STAGE #2 :: Instruction Decode
// Calculate stall conditions
assign dcd_ce = ((~dcd_valid)||(~dcd_stalled))&&(~clear_pipeline);
 
`ifdef OPT_PIPELINED
assign dcd_stalled = (dcd_valid)&&(op_stall);
`else
// If not pipelined, there will be no op_valid_ anything, and the
// op_stall will be false, dcd_X_stall will be false, thus we can simply
// do a ...
assign dcd_stalled = 1'b0;
`else // Not pipelined -- either double or single fetch
assign dcd_stalled = (dcd_valid)&&(op_stall);
`endif
//
// PIPELINE STAGE #3 :: Read Operands
418,7 → 409,7
assign op_stall = (op_valid)&&( // Only stall if we're loaded w/validins
// Stall if we're stopped, and not allowed to execute
// an instruction
// (~master_ce) // Already captured in alu_stall
// (!master_ce) // Already captured in alu_stall
//
// Stall if going into the ALU and the ALU is stalled
// i.e. if the memory is busy, or we are single
452,6 → 443,11
);
assign op_ce = ((dcd_valid)||(dcd_illegal)||(dcd_early_branch))&&(!op_stall);
 
`else
assign op_stall = (alu_busy)||(div_busy)||(fpu_busy)||(wr_reg_ce)
||(mem_busy)||(op_valid)||(!master_ce)||(wr_flags_ce);
assign op_ce = ((dcd_valid)||(dcd_illegal)||(dcd_early_branch))&&(!op_stall);
`endif
 
// BUT ... op_ce is too complex for many of the data operations. So
// let's make their circuit enable code simpler. In particular, if
458,12 → 454,7
// op_ doesn't need to be preserved, we can change it all we want
// ... right? The clear_pipeline code, for example, really only needs
// to determine whether op_valid is true.
assign op_change_data_ce = (~op_stall);
`else
assign op_stall = (op_valid)&&(~master_ce);
assign op_ce = ((dcd_valid)||(dcd_illegal)||(dcd_early_branch))&&(~clear_pipeline);
assign op_change_data_ce = 1'b1;
`endif
assign op_change_data_ce = (!op_stall);
 
//
// PIPELINE STAGE #4 :: ALU / Memory
479,16 → 470,16
// through the ALU. Break instructions are not allowed through
// the ALU.
`ifdef OPT_PIPELINED
assign alu_stall = (((~master_ce)||(mem_rdbusy)||(alu_busy))&&(op_valid_alu)) //Case 1&2
assign alu_stall = (((!master_ce)||(mem_rdbusy)||(alu_busy))&&(op_valid_alu)) //Case 1&2
||(prelock_stall)
||((op_valid)&&(op_break))
||(wr_reg_ce)&&(wr_write_cc)
||(div_busy)||(fpu_busy);
assign alu_ce = (master_ce)&&(op_valid_alu)&&(~alu_stall)
&&(~clear_pipeline);
assign alu_ce = (master_ce)&&(op_valid_alu)&&(!alu_stall)
&&(!clear_pipeline);
`else
assign alu_stall = (op_valid_alu)&&((~master_ce)||(op_break));
assign alu_ce = (master_ce)&&(op_valid_alu)&&(~alu_stall)&&(~clear_pipeline);
assign alu_stall = (op_valid_alu)&&((!master_ce)||(op_break));
assign alu_ce = (master_ce)&&(op_valid_alu)&&(!alu_stall)&&(!clear_pipeline);
`endif
//
 
496,26 → 487,14
// Note: if you change the conditions for mem_ce, you must also change
// alu_pc_valid.
//
`ifdef OPT_PIPELINED
assign mem_ce = (master_ce)&&(op_valid_mem)&&(~mem_stalled)
&&(~clear_pipeline);
`else
// If we aren't pipelined, then no one will be changing what's in the
// pipeline (i.e. clear_pipeline), while our only instruction goes
// through the ... pipeline.
//
// However, in hind sight this logic didn't work. What happens when
// something gets in the pipeline and then (due to interrupt or some
// such) needs to be voided? Thus we avoid simplification and keep
// what worked here.
assign mem_ce = (master_ce)&&(op_valid_mem)&&(~mem_stalled)
&&(~clear_pipeline);
`endif
assign mem_ce = (master_ce)&&(op_valid_mem)&&(!mem_stalled)
&&(!clear_pipeline);
 
`ifdef OPT_PIPELINED_BUS_ACCESS
assign mem_stalled = (~master_ce)||(alu_busy)||((op_valid_mem)&&(
assign mem_stalled = (!master_ce)||(alu_busy)||((op_valid_mem)&&(
(mem_pipe_stalled)
||(prelock_stall)
||((~op_pipe)&&(mem_busy))
||((!op_pipe)&&(mem_busy))
||(div_busy)
||(fpu_busy)
// Stall waiting for flags to be valid
527,7 → 506,7
`else
`ifdef OPT_PIPELINED
assign mem_stalled = (mem_busy)||((op_valid_mem)&&(
(~master_ce)
(!master_ce)
// Stall waiting for flags to be valid
// Or waiting for a write to the PC register
// Or CC register, since that can change the
534,15 → 513,15
// PC as well
||((wr_reg_ce)&&(wr_reg_id[4] == op_gie)&&((wr_write_pc)||(wr_write_cc)))));
`else
assign mem_stalled = (op_valid_mem)&&(~master_ce);
assign mem_stalled = (op_valid_mem)&&(!master_ce);
`endif
`endif
 
// ALU, DIV, or FPU CE ... equivalent to the OR of all three of these
assign adf_ce_unconditional = (master_ce)&&(~clear_pipeline)&&(op_valid)
&&(~op_valid_mem)&&(~mem_rdbusy)
&&((~op_valid_alu)||(~alu_stall))&&(~op_break)
&&(~div_busy)&&(~fpu_busy)&&(~clear_pipeline);
assign adf_ce_unconditional = (master_ce)&&(!clear_pipeline)&&(op_valid)
&&(!op_valid_mem)&&(!mem_rdbusy)
&&((!op_valid_alu)||(!alu_stall))&&(!op_break)
&&(!div_busy)&&(!fpu_busy)&&(!clear_pipeline);
 
//
//
549,38 → 528,46
// PIPELINE STAGE #1 :: Prefetch
//
//
wire pf_stalled;
assign pf_stalled = (dcd_stalled)||(dcd_phase);
 
wire pf_new_pc;
assign pf_new_pc = (new_pc)||((dcd_early_branch)&&(!clear_pipeline));
 
wire [(AW-1):0] pf_request_address;
assign pf_request_address = ((dcd_early_branch)&&(!clear_pipeline))
? dcd_branch_pc:pf_pc[(AW+1):2];
assign pf_gie = gie;
`ifdef OPT_SINGLE_FETCH
wire pf_ce;
 
assign pf_ce = (~pf_valid)&&(~dcd_valid)&&(~op_valid)&&(~alu_busy)&&(~mem_busy)&&(~alu_pc_valid)&&(~mem_pc_valid);
prefetch #(ADDRESS_WIDTH)
pf(i_clk, (i_rst), (pf_ce), (~dcd_stalled), pf_pc[(AW+1):2], gie,
pf_instruction, pf_instruction_pc, pf_gie,
pf(i_clk, (i_rst), pf_new_pc, w_clear_icache,
(!pf_stalled),
pf_request_address,
pf_instruction, pf_instruction_pc,
pf_valid, pf_illegal,
pf_cyc, pf_stb, pf_we, pf_addr, pf_data,
pf_ack, pf_stall, pf_err, i_wb_data);
 
initial r_dcd_valid = 1'b0;
always @(posedge i_clk)
if (clear_pipeline)
r_dcd_valid <= 1'b0;
else if (dcd_ce)
r_dcd_valid <= (pf_valid)||(pf_illegal);
else if (op_ce)
r_dcd_valid <= 1'b0;
assign dcd_valid = r_dcd_valid;
`else
`ifdef OPT_DOUBLE_FETCH
 
`else // Pipe fetch
wire [1:0] pf_dbg;
dblfetch #(ADDRESS_WIDTH)
pf(i_clk, i_rst, pf_new_pc,
w_clear_icache,
(!pf_stalled),
pf_request_address,
pf_instruction, pf_instruction_pc,
pf_valid,
pf_cyc, pf_stb, pf_we, pf_addr, pf_data,
pf_ack, pf_stall, pf_err, i_wb_data,
pf_illegal);
 
wire pf_stalled;
assign pf_stalled = (dcd_stalled)||(dcd_phase);
`else // Not single fetch and not double fetch
 
`ifdef OPT_TRADITIONAL_PFCACHE
wire [(AW-1):0] pf_request_address;
assign pf_request_address = ((dcd_early_branch)&&(!clear_pipeline))
? dcd_branch_pc:pf_pc[(AW+1):2];
pfcache #(LGICACHE, ADDRESS_WIDTH)
pf(i_clk, i_rst, (new_pc)||((dcd_early_branch)&&(~clear_pipeline)),
w_clear_icache,
pf(i_clk, i_rst, pf_new_pc, w_clear_icache,
// dcd_pc,
(!pf_stalled),
pf_request_address,
590,7 → 577,7
pf_illegal);
`else
pipefetch #(RESET_BUS_ADDRESS, LGICACHE, ADDRESS_WIDTH)
pf(i_clk, i_rst, (new_pc)||(dcd_early_branch),
pf(i_clk, i_rst, pf_new_pc,
w_clear_icache, (!pf_stalled),
(new_pc)?pf_pc[(AW+1):2]:dcd_branch_pc,
pf_instruction, pf_instruction_pc, pf_valid,
598,31 → 585,19
pf_ack, pf_stall, pf_err, i_wb_data,
(mem_cyc_lcl)||(mem_cyc_gbl),
pf_illegal);
`endif
`ifdef OPT_NO_USERMODE
assign pf_gie = 1'b0;
`else
assign pf_gie = gie;
`endif
`endif // OPT_TRADITIONAL_CACHE
`endif // OPT_DOUBLE_FETCH
`endif // OPT_SINGLE_FETCH
 
initial r_dcd_valid = 1'b0;
always @(posedge i_clk)
if ((clear_pipeline)||(w_clear_icache))
r_dcd_valid <= 1'b0;
else if (dcd_ce)
r_dcd_valid <= ((dcd_phase)||(pf_valid))
&&(~dcd_ljmp)&&(~dcd_early_branch);
else if (op_ce)
r_dcd_valid <= 1'b0;
assign dcd_valid = r_dcd_valid;
`endif
 
// If not pipelined, there will be no op_valid_ anything, and the
assign dcd_ce = (!dcd_valid)||(!dcd_stalled);
idecode #(AW, IMPLEMENT_MPY, EARLY_BRANCHING, IMPLEMENT_DIVIDE,
IMPLEMENT_FPU)
instruction_decoder(i_clk, (clear_pipeline),
(~dcd_valid)||(~op_stall), dcd_stalled, pf_instruction, pf_gie,
pf_instruction_pc, pf_valid, pf_illegal, dcd_phase,
instruction_decoder(i_clk,
(clear_pipeline)||(w_clear_icache),
dcd_ce,
dcd_stalled, pf_instruction, pf_gie,
pf_instruction_pc, pf_valid, pf_illegal,
dcd_valid, dcd_phase,
dcd_illegal, dcd_pc, dcd_gie,
{ dcd_Rcc, dcd_Rpc, dcd_R },
{ dcd_Acc, dcd_Apc, dcd_A },
729,9 → 704,7
`endif
 
always @(posedge i_clk)
`ifdef OPT_PIPELINED
if (op_ce)
`endif
begin
`ifdef OPT_PIPELINED
if ((wr_reg_ce)&&(wr_reg_id == dcd_A))
746,11 → 719,7
r_op_Av <= w_op_Av;
`ifdef OPT_PIPELINED
end else
begin // We were going to pick these up when they became valid,
// but for some reason we're stuck here as they became
// valid. Pick them up now anyway
// if (((op_A_alu)&&(alu_wR))||((op_A_mem)&&(mem_valid)))
// r_op_Av <= wr_gpreg_vl;
begin
if ((wr_reg_ce)&&(wr_reg_id == op_Aid)&&(op_rA))
r_op_Av <= wr_gpreg_vl;
`endif
798,11 → 767,9
// below, arriving at what we finally want in the (now wire net)
// op_F.
always @(posedge i_clk)
`ifdef OPT_PIPELINED
if (op_ce) // Cannot do op_change_data_ce here since op_F depends
// upon being either correct for a valid op, or correct
// for the last valid op
`endif
begin // Set the flag condition codes, bit order is [3:0]=VNCZ
case(dcd_F[2:0])
3'h0: r_op_F <= 7'h00; // Always
818,7 → 785,7
assign op_F = { r_op_F[3], r_op_F[6:0] };
 
wire w_op_valid;
assign w_op_valid = (~clear_pipeline)&&(dcd_valid)&&(~dcd_ljmp)&&(!dcd_early_branch);
assign w_op_valid = (!clear_pipeline)&&(dcd_valid)&&(!dcd_ljmp)&&(!dcd_early_branch);
initial op_valid = 1'b0;
initial op_valid_alu = 1'b0;
initial op_valid_mem = 1'b0;
845,9 → 812,9
op_valid<= (w_op_valid)||(dcd_illegal)&&(dcd_valid)||(dcd_early_branch);
op_valid_alu <= (w_op_valid)&&((dcd_ALU)||(dcd_illegal)
||(dcd_early_branch));
op_valid_mem <= (dcd_M)&&(~dcd_illegal)&&(w_op_valid);
op_valid_div <= (dcd_DIV)&&(~dcd_illegal)&&(w_op_valid);
op_valid_fpu <= (dcd_FP)&&(~dcd_illegal)&&(w_op_valid);
op_valid_mem <= (dcd_M)&&(!dcd_illegal)&&(w_op_valid);
op_valid_div <= (dcd_DIV)&&(!dcd_illegal)&&(w_op_valid);
op_valid_fpu <= (dcd_FP)&&(!dcd_illegal)&&(w_op_valid);
end else if ((adf_ce_unconditional)||(mem_ce))
begin
op_valid <= 1'b0;
866,7 → 833,6
// to be, step through it, and then replace it back. In this fashion,
// a debugger can step through code.
// assign w_op_break = (dcd_break)&&(r_dcd_I[15:0] == 16'h0001);
`ifdef OPT_PIPELINED
reg r_op_break;
 
initial r_op_break = 1'b0;
877,9 → 843,6
else if (!op_valid)
r_op_break <= 1'b0;
assign op_break = r_op_break;
`else
assign op_break = dcd_break;
`endif
 
`ifdef OPT_PIPELINED
generate
892,7 → 855,7
if (clear_pipeline)
r_op_lock <= 1'b0;
else if (op_ce)
r_op_lock <= (dcd_valid)&&(dcd_lock)&&(~clear_pipeline);
r_op_lock <= (dcd_valid)&&(dcd_lock)&&(!clear_pipeline);
assign op_lock = r_op_lock;
 
end else begin
925,21 → 888,27
always @(posedge i_clk)
if (op_ce)
begin
op_wF <= (dcd_wF)&&((~dcd_Rcc)||(~dcd_wR))
&&(~dcd_early_branch)&&(~dcd_illegal);
op_wR <= (dcd_wR)&&(~dcd_early_branch)&&(~dcd_illegal);
op_wF <= (dcd_wF)&&((!dcd_Rcc)||(!dcd_wR))
&&(!dcd_early_branch)&&(!dcd_illegal);
op_wR <= (dcd_wR)&&(!dcd_early_branch)&&(!dcd_illegal);
end
`else
always @(posedge i_clk)
begin
op_wF <= (dcd_wF)&&((~dcd_Rcc)||(~dcd_wR))
&&(~dcd_early_branch)&&(~dcd_illegal);
op_wR <= (dcd_wR)&&(~dcd_early_branch)&&(~dcd_illegal);
op_wF <= (dcd_wF)&&((!dcd_Rcc)||(!dcd_wR))
&&(!dcd_early_branch)&&(!dcd_illegal);
op_wR <= (dcd_wR)&&(!dcd_early_branch)&&(!dcd_illegal);
end
`endif
 
`ifdef VERILATOR
`ifdef OPT_PIPELINED
`ifdef SINGLE_FETCH
always @(*)
begin
op_sim = dcd_sim;
op_sim_immv = dcd_sim_immv;
end
`else
always @(posedge i_clk)
if (op_change_data_ce)
begin
946,20 → 915,15
op_sim <= dcd_sim;
op_sim_immv <= dcd_sim_immv;
end
`else
always @(*)
begin
op_sim = dcd_sim;
op_sim_immv = dcd_sim_immv;
end
`endif
`endif
 
`ifdef OPT_PIPELINED
reg [3:0] r_op_opn;
reg [4:0] r_op_R;
reg r_op_Rcc;
reg r_op_gie;
 
initial r_op_gie = 1'b0;
always @(posedge i_clk)
if (op_change_data_ce)
begin
978,24 → 942,9
end
assign op_opn = r_op_opn;
assign op_R = r_op_R;
`ifdef OPT_NO_USERMODE
assign op_gie = 1'b0;
`else
assign op_gie = r_op_gie;
`endif
assign op_Rcc = r_op_Rcc;
`else
assign op_opn = dcd_opn;
assign op_R = dcd_R;
`ifdef OPT_NO_USERMODE
assign op_gie = 1'b0;
`else
assign op_gie = dcd_gie;
`endif
// With no pipelining, there is no early branching. We keep it
always @(posedge i_clk)
op_pc <= (dcd_early_branch)?dcd_branch_pc:dcd_pc[AW:1];
`endif
 
assign op_Fl = (op_gie)?(w_uflags):(w_iflags);
 
`ifdef OPT_CIS
1087,9 → 1036,9
// decode circuit has told us that the hazard
// is clear, so we're okay then.
//
((~dcd_zI)&&(
((!dcd_zI)&&(
((op_R == dcd_B)&&(op_wR))
||((mem_rdbusy)&&(~dcd_pipe))
||((mem_rdbusy)&&(!dcd_pipe))
))
// Stall following any instruction that will
// set the flags, if we're going to need the
1097,10 → 1046,10
||(((op_wF)||(cc_invalid_for_dcd))&&(dcd_Bcc))
// Stall on any ongoing memory operation that
// will write to op_B -- captured above
// ||((mem_busy)&&(~mem_we)&&(mem_last_reg==dcd_B)&&(~dcd_zI))
// ||((mem_busy)&&(!mem_we)&&(mem_last_reg==dcd_B)&&(!dcd_zI))
)
||((dcd_rB)&&(dcd_Bcc)&&(cc_invalid_for_dcd));
assign dcd_F_stall = ((~dcd_F[3])
assign dcd_F_stall = ((!dcd_F[3])
||((dcd_rA)&&(dcd_Acc))
||((dcd_rB)&&(dcd_Bcc)))
&&(op_valid)&&(op_Rcc);
1169,7 → 1118,7
// alu_reg <= op_R;
alu_wR <= (op_wR)&&(set_cond);
alu_wF <= (op_wF)&&(set_cond);
end else if (~alu_busy) begin
end else if (!alu_busy) begin
// These are strobe signals, so clear them if not
// set for any particular clock
alu_wR <= (i_halt)&&(i_dbg_we);
1209,7 → 1158,7
reg dbgv;
initial dbgv = 1'b0;
always @(posedge i_clk)
dbgv <= (~i_rst)&&(i_halt)&&(i_dbg_we)&&(r_halted);
dbgv <= (!i_rst)&&(i_halt)&&(i_dbg_we)&&(r_halted);
reg [31:0] dbg_val;
always @(posedge i_clk)
dbg_val <= i_dbg_data;
1232,8 → 1181,8
reg [(AW-1):0] r_alu_pc;
always @(posedge i_clk)
if ((adf_ce_unconditional)
||((master_ce)&&(op_valid_mem)&&(~clear_pipeline)
&&(~mem_stalled)))
||((master_ce)&&(op_valid_mem)&&(!clear_pipeline)
&&(!mem_stalled)))
r_alu_pc <= op_pc;
assign alu_pc = r_alu_pc;
`else
1256,11 → 1205,11
always @(posedge i_clk)
if (clear_pipeline)
r_alu_pc_valid <= 1'b0;
else if ((adf_ce_unconditional)&&(!op_phase)) //Includes&&(~alu_clear_pipeline)
else if ((adf_ce_unconditional)&&(!op_phase))
r_alu_pc_valid <= 1'b1;
else if (((~alu_busy)&&(~div_busy)&&(~fpu_busy))||(clear_pipeline))
else if (((!alu_busy)&&(!div_busy)&&(!fpu_busy))||(clear_pipeline))
r_alu_pc_valid <= 1'b0;
assign alu_pc_valid = (r_alu_pc_valid)&&((~alu_busy)&&(~div_busy)&&(~fpu_busy));
assign alu_pc_valid = (r_alu_pc_valid)&&((!alu_busy)&&(!div_busy)&&(!fpu_busy));
always @(posedge i_clk)
if (i_rst)
mem_pc_valid <= 1'b0;
1326,7 → 1275,8
mem_ack, mem_stall, mem_err, i_wb_data);
 
`else // PIPELINED_BUS_ACCESS
memops #(AW,IMPLEMENT_LOCK,WITH_LOCAL_BUS) domem(i_clk, i_rst,(mem_ce)&&(set_cond), bus_lock,
memops #(AW,IMPLEMENT_LOCK,WITH_LOCAL_BUS) domem(i_clk, i_rst,
(mem_ce)&&(set_cond), bus_lock,
(op_opn[2:0]), op_Bv, op_Av, op_R,
mem_busy,
mem_valid, bus_err, mem_wreg, mem_result,
1336,7 → 1286,7
mem_ack, mem_stall, mem_err, i_wb_data);
assign mem_pipe_stalled = 1'b0;
`endif // PIPELINED_BUS_ACCESS
assign mem_rdbusy = ((mem_busy)&&(~mem_we));
assign mem_rdbusy = ((mem_busy)&&(!mem_we));
 
// Either the prefetch or the instruction gets the memory bus, but
// never both.
1389,7 → 1339,7
// Further, alu_wR includes (set_cond), so we don't need to
// check for that here either.
assign wr_reg_ce = (dbgv)||(mem_valid)
||((~clear_pipeline)&&(~alu_illegal)
||((!clear_pipeline)&&(!alu_illegal)
&&(((alu_wR)&&(alu_valid))
||(div_valid)||(fpu_valid)));
// Which register shall be written?
1431,7 → 1381,7
// Write back to the condition codes/flags register ...
// When shall we write to our flags register? alu_wF already
// includes the set condition ...
assign wr_flags_ce = ((alu_wF)||(div_valid)||(fpu_valid))&&(~clear_pipeline)&&(~alu_illegal);
assign wr_flags_ce = ((alu_wF)||(div_valid)||(fpu_valid))&&(!clear_pipeline)&&(!alu_illegal);
assign w_uflags = { 1'b0, uhalt_phase, ufpu_err_flag,
udiv_err_flag, ubus_err_flag, trap, ill_err_u,
ubreak, step, 1'b1, sleep,
1439,7 → 1389,7
assign w_iflags = { 1'b0, ihalt_phase, ifpu_err_flag,
idiv_err_flag, ibus_err_flag, trap, ill_err_i,
break_en, 1'b0, 1'b0, sleep,
((wr_flags_ce)&&(~alu_gie))?alu_flags:iflags };
((wr_flags_ce)&&(!alu_gie))?alu_flags:iflags };
 
 
// What value to write?
1455,7 → 1405,7
always @(posedge i_clk)
if ((wr_reg_ce)&&(wr_write_scc))
iflags <= wr_gpreg_vl[3:0];
else if ((wr_flags_ce)&&(~alu_gie))
else if ((wr_flags_ce)&&(!alu_gie))
iflags <= (div_valid)?div_flags:((fpu_valid)?fpu_flags
: alu_flags);
 
1486,10 → 1436,10
 
initial r_break_pending = 1'b0;
always @(posedge i_clk)
if ((clear_pipeline)||(~op_valid))
if ((clear_pipeline)||(!op_valid))
r_break_pending <= 1'b0;
else if (op_break)
r_break_pending <= (~alu_busy)&&(~div_busy)&&(~fpu_busy)&&(~mem_busy)&&(!wr_reg_ce);
r_break_pending <= (!alu_busy)&&(!div_busy)&&(!fpu_busy)&&(!mem_busy)&&(!wr_reg_ce);
else
r_break_pending <= 1'b0;
assign break_pending = r_break_pending;
1498,12 → 1448,12
`endif
 
 
assign o_break = ((break_en)||(~op_gie))&&(break_pending)
&&(~clear_pipeline)
||((~alu_gie)&&(bus_err))
||((~alu_gie)&&(div_error))
||((~alu_gie)&&(fpu_error))
||((~alu_gie)&&(alu_illegal)&&(!clear_pipeline));
assign o_break = ((break_en)||(!op_gie))&&(break_pending)
&&(!clear_pipeline)
||((!alu_gie)&&(bus_err))
||((!alu_gie)&&(div_error))
||((!alu_gie)&&(fpu_error))
||((!alu_gie)&&(alu_illegal)&&(!clear_pipeline));
 
// The sleep register. Setting the sleep register causes the CPU to
// sleep until the next interrupt. Setting the sleep register within
1520,7 → 1470,7
r_sleep_is_halt <= 1'b0;
else if ((wr_reg_ce)&&(wr_write_cc)
&&(wr_spreg_vl[`CPU_SLEEP_BIT])
&&(~wr_spreg_vl[`CPU_GIE_BIT]))
&&(!wr_spreg_vl[`CPU_GIE_BIT]))
r_sleep_is_halt <= 1'b1;
 
// Trying to switch to user mode, either via a WAIT or an RTU
1536,7 → 1486,7
always @(posedge i_clk)
if ((i_rst)||(w_switch_to_interrupt))
sleep <= 1'b0;
else if ((wr_reg_ce)&&(wr_write_cc)&&(~alu_gie))
else if ((wr_reg_ce)&&(wr_write_cc)&&(!alu_gie))
// In supervisor mode, we have no protections. The
// supervisor can set the sleep bit however he wants.
// Well ... not quite. Switching to user mode and
1546,7 → 1496,7
// don't set the sleep bit
// otherwise however it would o.w. be set
sleep <= (wr_spreg_vl[`CPU_SLEEP_BIT])
&&((~i_interrupt)||(~wr_spreg_vl[`CPU_GIE_BIT]));
&&((!i_interrupt)||(!wr_spreg_vl[`CPU_GIE_BIT]));
else if ((wr_reg_ce)&&(wr_write_cc)&&(wr_spreg_vl[`CPU_GIE_BIT]))
// In user mode, however, you can only set the sleep
// mode while remaining in user mode. You can't switch
1558,7 → 1508,7
always @(posedge i_clk)
if (i_rst)
step <= 1'b0;
else if ((wr_reg_ce)&&(~alu_gie)&&(wr_write_ucc))
else if ((wr_reg_ce)&&(!alu_gie)&&(wr_write_ucc))
step <= wr_spreg_vl[`CPU_STEP_BIT];
 
// The GIE register. Only interrupts can disable the interrupt register
1568,12 → 1518,12
`else
assign w_switch_to_interrupt = (gie)&&(
// On interrupt (obviously)
((i_interrupt)&&(~alu_phase)&&(~bus_lock))
((i_interrupt)&&(!alu_phase)&&(!bus_lock))
// If we are stepping the CPU
||(((alu_pc_valid)||(mem_pc_valid))&&(step)&&(~alu_phase)&&(~bus_lock))
||(((alu_pc_valid)||(mem_pc_valid))&&(step)&&(!alu_phase)&&(!bus_lock))
// If we encounter a break instruction, if the break
// enable isn't set.
||((master_ce)&&(break_pending)&&(~break_en))
||((master_ce)&&(break_pending)&&(!break_en))
// On an illegal instruction
||((alu_illegal)&&(!clear_pipeline))
// On division by zero. If the divide isn't
1587,10 → 1537,10
//
||(bus_err)
// If we write to the CC register
||((wr_reg_ce)&&(~wr_spreg_vl[`CPU_GIE_BIT])
||((wr_reg_ce)&&(!wr_spreg_vl[`CPU_GIE_BIT])
&&(wr_reg_id[4])&&(wr_write_cc))
);
assign w_release_from_interrupt = (~gie)&&(~i_interrupt)
assign w_release_from_interrupt = (!gie)&&(!i_interrupt)
// Then if we write the sCC register
&&(((wr_reg_ce)&&(wr_spreg_vl[`CPU_GIE_BIT])
&&(wr_write_scc))
1623,10 → 1573,10
always @(posedge i_clk)
if ((i_rst)||(w_release_from_interrupt))
r_trap <= 1'b0;
else if ((alu_gie)&&(wr_reg_ce)&&(~wr_spreg_vl[`CPU_GIE_BIT])
else if ((alu_gie)&&(wr_reg_ce)&&(!wr_spreg_vl[`CPU_GIE_BIT])
&&(wr_write_ucc)) // &&(wr_reg_id[4]) implied
r_trap <= 1'b1;
else if ((wr_reg_ce)&&(wr_write_ucc)&&(~alu_gie))
else if ((wr_reg_ce)&&(wr_write_ucc)&&(!alu_gie))
r_trap <= (r_trap)&&(wr_spreg_vl[`CPU_TRAP_BIT]);
 
reg r_ubreak;
1637,7 → 1587,7
r_ubreak <= 1'b0;
else if ((op_gie)&&(break_pending)&&(w_switch_to_interrupt))
r_ubreak <= 1'b1;
else if (((~alu_gie)||(dbgv))&&(wr_reg_ce)&&(wr_write_ucc))
else if (((!alu_gie)||(dbgv))&&(wr_reg_ce)&&(wr_write_ucc))
r_ubreak <= (ubreak)&&(wr_spreg_vl[`CPU_BREAK_BIT]);
 
assign trap = r_trap;
1653,7 → 1603,7
// Only the debug interface can clear this bit
else if ((dbgv)&&(wr_write_scc))
ill_err_i <= (ill_err_i)&&(wr_spreg_vl[`CPU_ILL_BIT]);
else if ((alu_illegal)&&(~alu_gie)&&(!clear_pipeline))
else if ((alu_illegal)&&(!alu_gie)&&(!clear_pipeline))
ill_err_i <= 1'b1;
 
`ifdef OPT_NO_USERMODE
1669,7 → 1619,7
r_ill_err_u <= 1'b0;
// If the supervisor (or debugger) writes to this register,
// clearing the bit, then clear it
else if (((~alu_gie)||(dbgv))&&(wr_reg_ce)&&(wr_write_ucc))
else if (((!alu_gie)||(dbgv))&&(wr_reg_ce)&&(wr_write_ucc))
r_ill_err_u <=((ill_err_u)&&(wr_spreg_vl[`CPU_ILL_BIT]));
else if ((alu_illegal)&&(alu_gie)&&(!clear_pipeline))
r_ill_err_u <= 1'b1;
1688,7 → 1638,7
ibus_err_flag <= 1'b0;
else if ((dbgv)&&(wr_write_scc))
ibus_err_flag <= (ibus_err_flag)&&(wr_spreg_vl[`CPU_BUSERR_BIT]);
else if ((bus_err)&&(~alu_gie))
else if ((bus_err)&&(!alu_gie))
ibus_err_flag <= 1'b1;
// User bus error flag -- if ever set, it will cause an interrupt to
// supervisor mode.
1701,7 → 1651,7
always @(posedge i_clk)
if ((i_rst)||(w_release_from_interrupt))
r_ubus_err_flag <= 1'b0;
else if (((~alu_gie)||(dbgv))&&(wr_reg_ce)&&(wr_write_ucc))
else if (((!alu_gie)||(dbgv))&&(wr_reg_ce)&&(wr_write_ucc))
r_ubus_err_flag <= (ubus_err_flag)&&(wr_spreg_vl[`CPU_BUSERR_BIT]);
else if ((bus_err)&&(alu_gie))
r_ubus_err_flag <= 1'b1;
1723,7 → 1673,7
r_idiv_err_flag <= 1'b0;
else if ((dbgv)&&(wr_write_scc))
r_idiv_err_flag <= (r_idiv_err_flag)&&(wr_spreg_vl[`CPU_DIVERR_BIT]);
else if ((div_error)&&(~alu_gie))
else if ((div_error)&&(!alu_gie))
r_idiv_err_flag <= 1'b1;
 
assign idiv_err_flag = r_idiv_err_flag;
1736,7 → 1686,7
always @(posedge i_clk)
if ((i_rst)||(w_release_from_interrupt))
r_udiv_err_flag <= 1'b0;
else if (((~alu_gie)||(dbgv))&&(wr_reg_ce)
else if (((!alu_gie)||(dbgv))&&(wr_reg_ce)
&&(wr_write_ucc))
r_udiv_err_flag <= (r_udiv_err_flag)&&(wr_spreg_vl[`CPU_DIVERR_BIT]);
else if ((div_error)&&(alu_gie))
1761,7 → 1711,7
r_ifpu_err_flag <= 1'b0;
else if ((dbgv)&&(wr_write_scc))
r_ifpu_err_flag <= (r_ifpu_err_flag)&&(wr_spreg_vl[`CPU_FPUERR_BIT]);
else if ((fpu_error)&&(fpu_valid)&&(~alu_gie))
else if ((fpu_error)&&(fpu_valid)&&(!alu_gie))
r_ifpu_err_flag <= 1'b1;
// User floating point error flag -- if ever set, it will cause
// a sudden switch interrupt to supervisor mode.
1769,7 → 1719,7
always @(posedge i_clk)
if ((i_rst)&&(w_release_from_interrupt))
r_ufpu_err_flag <= 1'b0;
else if (((~alu_gie)||(dbgv))&&(wr_reg_ce)
else if (((!alu_gie)||(dbgv))&&(wr_reg_ce)
&&(wr_write_ucc))
r_ufpu_err_flag <= (r_ufpu_err_flag)&&(wr_spreg_vl[`CPU_FPUERR_BIT]);
else if ((fpu_error)&&(alu_gie)&&(fpu_valid))
1789,7 → 1739,7
always @(posedge i_clk)
if (i_rst)
r_ihalt_phase <= 1'b0;
else if ((~alu_gie)&&(alu_pc_valid)&&(~clear_pipeline))
else if ((!alu_gie)&&(alu_pc_valid)&&(!clear_pipeline))
r_ihalt_phase <= alu_phase;
 
assign ihalt_phase = r_ihalt_phase;
1805,7 → 1755,7
r_uhalt_phase <= 1'b0;
else if ((alu_gie)&&(alu_pc_valid))
r_uhalt_phase <= alu_phase;
else if ((~alu_gie)&&(wr_reg_ce)&&(wr_write_ucc))
else if ((!alu_gie)&&(wr_reg_ce)&&(wr_write_ucc))
r_uhalt_phase <= wr_spreg_vl[`CPU_PHASE_BIT];
 
assign uhalt_phase = r_uhalt_phase;
1822,7 → 1772,7
// the instruction writes the PC, we write whichever PC is appropriate.
//
// Do we need to all our partial results from the pipeline?
// What happens when the pipeline has gie and ~gie instructions within
// What happens when the pipeline has gie and !gie instructions within
// it? Do we clear both? What if a gie instruction tries to clear
// a non-gie instruction?
`ifdef OPT_NO_USERMODE
1834,7 → 1784,7
if ((wr_reg_ce)&&(wr_reg_id[4])&&(wr_write_pc))
r_upc <= { wr_spreg_vl[(AW+1):2], 2'b00 };
else if ((alu_gie)&&
(((alu_pc_valid)&&(~clear_pipeline)&&(!alu_illegal))
(((alu_pc_valid)&&(!clear_pipeline)&&(!alu_illegal))
||(mem_pc_valid)))
r_upc <= { alu_pc, 2'b00 };
assign upc = r_upc;
1843,10 → 1793,10
always @(posedge i_clk)
if (i_rst)
ipc <= { RESET_BUS_ADDRESS, 2'b00 };
else if ((wr_reg_ce)&&(~wr_reg_id[4])&&(wr_write_pc))
else if ((wr_reg_ce)&&(!wr_reg_id[4])&&(wr_write_pc))
ipc <= { wr_spreg_vl[(AW+1):2], 2'b00 };
else if ((!alu_gie)&&(!alu_phase)&&
(((alu_pc_valid)&&(~clear_pipeline)&&(!alu_illegal))
(((alu_pc_valid)&&(!clear_pipeline)&&(!alu_illegal))
||(mem_pc_valid)))
ipc <= { alu_pc, 2'b00 };
 
1853,25 → 1803,20
always @(posedge i_clk)
if (i_rst)
pf_pc <= { RESET_BUS_ADDRESS, 2'b00 };
else if ((w_switch_to_interrupt)||((~gie)&&(w_clear_icache)))
else if ((w_switch_to_interrupt)||((!gie)&&(w_clear_icache)))
pf_pc <= { ipc[(AW+1):2], 2'b00 };
else if ((w_release_from_interrupt)||((gie)&&(w_clear_icache)))
pf_pc <= { upc[(AW+1):2], 2'b00 };
else if ((wr_reg_ce)&&(wr_reg_id[4] == gie)&&(wr_write_pc))
pf_pc <= { wr_spreg_vl[(AW+1):2], 2'b00 };
`ifdef OPT_PIPELINED
else if ((dcd_early_branch)&&(~clear_pipeline))
else if ((dcd_early_branch)&&(!clear_pipeline))
pf_pc <= { dcd_branch_pc + 1'b1, 2'b00 };
else if ((new_pc)||((!pf_stalled)&&(pf_valid)))
pf_pc <= { pf_pc[(AW+1):2] + {{(AW-1){1'b0}},1'b1}, 2'b00 };
`else
else if ((alu_gie==gie)&&(
((alu_pc_valid)&&(~clear_pipeline))
||(mem_pc_valid)))
pf_pc <= { alu_pc[(AW-1):0], 2'b00 };
`endif
 
`ifdef OPT_PIPELINED
// If we aren't pipelined, or equivalently if we have no cache, these
// instructions will get quietly (or not so quietly) ignored by the
// optimizer.
reg r_clear_icache;
initial r_clear_icache = 1'b1;
always @(posedge i_clk)
1882,9 → 1827,6
else
r_clear_icache <= 1'b0;
assign w_clear_icache = r_clear_icache;
`else
assign w_clear_icache = i_clear_pf_cache;
`endif
 
initial new_pc = 1'b1;
always @(posedge i_clk)
1949,8 → 1891,8
r_halted <= (i_halt)&&(
// To be halted, any long lasting instruction must
// be completed.
(~pf_cyc)&&(~mem_busy)&&(~alu_busy)
&&(~div_busy)&&(~fpu_busy)
(!pf_cyc)&&(!mem_busy)&&(!alu_busy)
&&(!div_busy)&&(!fpu_busy)
// Operations must either be valid, or illegal
&&((op_valid)||(i_rst)||(dcd_illegal))
// Decode stage must be either valid, in reset, or ill
1959,7 → 1901,7
always @(posedge i_clk)
r_halted <= (i_halt)&&((op_valid)||(i_rst));
`endif
assign o_dbg_stall = ~r_halted;
assign o_dbg_stall = !r_halted;
 
//
//
1968,8 → 1910,8
//
//
assign o_op_stall = (master_ce)&&(op_stall);
assign o_pf_stall = (master_ce)&&(~pf_valid);
assign o_i_count = (alu_pc_valid)&&(~clear_pipeline);
assign o_pf_stall = (master_ce)&&(!pf_valid);
assign o_i_count = (alu_pc_valid)&&(!clear_pipeline);
 
`ifdef DEBUG_SCOPE
always @(posedge i_clk)
/trunk/rtl/cpudefs.v
150,7 → 150,59
// `define OPT_SINGLE_FETCH
//
//
// OPT_DOUBLE_FETCH is an alternative to OPT_SINGLE_FETCH. It is designed to
// increase performance primarily when using an instruction memory which has
// one cost for a random access, and a second (lower) cost for sequential
// access. The driving example behind this implementation was flash memory
// with 34 clocks for an initial access and 16 clocks for any subsequent access,
// but SDRAM memory with 27 clocks for an initial access and 1 clock for a
// subsequent access is also a good example. Even block RAM might be a good
// example, if there were any bus delays in getting to the RAM device. Using
// OPT_DOUBLE_FETCH also increases the pipeline speed, as it allows CIS
// instructions and therefore partial pipelining. (No work is done to resolve
// pipeline conflicts past the decode stage, as is the case with full pipeline
// mode.
//
// Do not define OPT_DOUBLE_FETCH if you wish to fully pipeline the CPU. Do
// not define both OPT_DOUBLE_FETCH and OPT_SINGLE_FETCH (the ifndef below
// should prevent that).
//
//
`ifndef OPT_SINGLE_FETCH
// `define OPT_DOUBLE_FETCH
`endif
//
//
//
// The ZipCPU ISA defines an optional compressed instruction set (CIS)
// complement. This compressed instruction format allows two instructions to
// be packed into the same instruction word. Some instructions can be so
// compressed, although not all. Compressed instructions take the same time to
// complete--they are just compressed within memory to spare troubles with the
// prefetch. Set OPT_CIS to include these compressed instructions as part of
// the instruction set.
//
`define OPT_CIS // COST: about 87 LUTs
//
//
//
//
// OPT_EARLY_BRANCHING is an attempt to execute a BRA statement as early
// as possible, to avoid as many pipeline stalls on a branch as possible.
// With the OPT_TRADITIONAL_PFCACHE, BRA instructions cost only a single
// extra stall cycle, while LJMP instructions cost two (assuming the target is
// in the cache). Indeed, the result is that a BRA instruction can be used as
// the compiler's branch prediction optimizer: BRA's barely stall, while
// conditional branches will always suffer about 4 stall cycles or so.
//
// I recommend setting this flag, so as to turn early branching on---if you
// have the LUTs available to afford it.
//
`define OPT_EARLY_BRANCHING
//
//
//
//
// The next several options are pipeline optimization options. They make no
// sense in a single instruction fetch mode, hence we #ifndef them so they
// are only defined if we are in a full pipelined mode (i.e. OPT_SINGLE_FETCH
157,6 → 209,7
// is not defined).
//
`ifndef OPT_SINGLE_FETCH
`ifndef OPT_DOUBLE_FETCH
//
//
//
185,21 → 238,7
//
//
//
// OPT_EARLY_BRANCHING is an attempt to execute a BRA statement as early
// as possible, to avoid as many pipeline stalls on a branch as possible.
// It's not tremendously successful yet--BRA's still suffer stalls,
// but I intend to keep working on this approach until the number of stalls
// gets down to one or (ideally) zero. (With the OPT_TRADITIONAL_PFCACHE, this
// gets down to a single stall cycle ...) That way a "BRA" can be used as the
// compiler's branch prediction optimizer: BRA's barely stall, while branches
// on conditions will always suffer about 4 stall cycles or so.
//
// I recommend setting this flag, so as to turn early branching on.
//
`define OPT_EARLY_BRANCHING
//
//
//
// OPT_PIPELINED_BUS_ACCESS controls whether or not LOD/STO instructions
// can take advantaged of pipelined bus instructions. To be eligible, the
// operations must be identical (cannot pipeline loads and stores, just loads
218,28 → 257,7
//
//
//
// The instruction set defines an optional compressed instruction set (CIS)
// complement. These were at one time erroneously called Very Long Instruction
// Words. They are more appropriately referred to as compressed instructions.
// The compressed instruction format allows two instructions to be packed into
// the same instruction word. Some instructions can be compressed, not all.
// Compressed instructions take the same time to complete. Set OPT_CIS to
// include these double instructions as part of the instruction set. These
// instructions are designed to get more code density from the instruction set,
// and to hopefully take some pain off of the performance of the pre-fetch and
// instruction cache.
//
// These new instructions, however, also necessitate a change in the Zip
// CPU--the Zip CPU can no longer execute instructions atomically. It must
// now execute non-CIS instructions, or CIS instruction pairs, atomically.
// This logic has been added into the ZipCPU, but it has not (yet) been
// tested thoroughly.
//
//
`define OPT_CIS
//
//
//
`endif // OPT_DOUBLE_FETCH
`endif // OPT_SINGLE_FETCH
//
//

powered by: WebSVN 2.1.0

© copyright 1999-2022 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.