OpenCores
URL https://opencores.org/ocsvn/zipcpu/zipcpu/trunk

Subversion Repositories zipcpu

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /zipcpu/trunk/rtl/core
    from Rev 205 to Rev 209
    Reverse comparison

Rev 205 → Rev 209

/README.md
0,0 → 1,49
## The Core of the ZipCPU
 
Here are the core files to the ZipCPU. In here, you'll find not only the
[main ZipCPU core](./zipcpu.v), but also:
 
- Several prefetch routines
 
o [prefetch.v](./prefetch.v) an older prefetch module that only fetched
one instruction at a time, and so prevented pipelining
 
o [pipefetch.v](./pipefetch.v), my first attempt at building a prefetch with
cache. It took a rather unique approach to the cache, implementing it as
a rolling window in memory. This file really sticks around for historical
reasons, but not much more.
 
o [dblfetch.v](./dbgfetch.v), fetches two instructions at once (on subsequent
clocks). This is designed to increase the speed of the CPU when it isn't
pipelined, by exploiting the fact that many memory accesses go faster for
the second access.
 
o [pfcache.v](./pfcache.v), this is the current/best instruction cache
for the CPU.
 
 
- [idecode.v](./idecode.v), an instruction decoder
 
- Several memory access routines
 
o [memops.v](./memops.v), a typical/traditional one memory operation at a
time means of accessing memory. This was my first approach to memory,
and the appropriate approach still when the CPU is not running in its
pipelind mode.
 
o [pipemem.v](./pipemem.v), a faster memory access method that groups
consecutive memory accesses together into a pipelined bus access.
This routine has so far compensated for the fact that the ZipCPU does not
(yet) have an integrated data cache.
 
o [dcache.v](./dcache.v), is my attempt at building a data cache. This
has never been integrated with the CPU, and may not be integrated until
the MMU is also integrated.
 
- [div.v](./div.v), the divide unit
 
- [cpuops.v](./cpuops.v), the ALU unit
 
The defines within [cpudefs.v](../cpudefs.v) will determine which of these
modules gets linked into your CPU.
 
/cpuops.v
4,9 → 4,8
//
// Project: Zip CPU -- a small, lightweight, RISC CPU soft core
//
// Purpose: This supports the instruction set reordering of operations
// created by the second generation instruction set, as well as
// the new operations of POPC (population count) and BREV (bit reversal).
// Purpose: This is the ZipCPU ALU function. It handles all of the
// instruction opcodes 0-13. (14-15 are divide opcodes).
//
//
// Creator: Dan Gisselquist, Ph.D.
14,7 → 13,7
//
////////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2015-2017, Gisselquist Technology, LLC
// Copyright (C) 2015-2019, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
37,37 → 36,53
//
////////////////////////////////////////////////////////////////////////////////
//
//
`default_nettype none
//
//
`include "cpudefs.v"
//
module cpuops(i_clk,i_rst, i_ce, i_op, i_a, i_b, o_c, o_f, o_valid,
module cpuops(i_clk,i_reset, i_stb, i_op, i_a, i_b, o_c, o_f, o_valid,
o_busy);
parameter IMPLEMENT_MPY = `OPT_MULTIPLY;
input i_clk, i_rst, i_ce;
input [3:0] i_op;
input [31:0] i_a, i_b;
parameter IMPLEMENT_MPY = `OPT_MULTIPLY;
parameter [0:0] OPT_SHIFTS = 1'b1;
input wire i_clk, i_reset, i_stb;
input wire [3:0] i_op;
input wire [31:0] i_a, i_b;
output reg [31:0] o_c;
output wire [3:0] o_f;
output reg o_valid;
output wire o_busy;
 
genvar k;
 
// Shift register pre-logic
wire [32:0] w_lsr_result, w_asr_result, w_lsl_result;
wire signed [32:0] w_pre_asr_input, w_pre_asr_shifted;
assign w_pre_asr_input = { i_a, 1'b0 };
assign w_pre_asr_shifted = w_pre_asr_input >>> i_b[4:0];
assign w_asr_result = (|i_b[31:5])? {(33){i_a[31]}}
: w_pre_asr_shifted;// ASR
assign w_lsr_result = ((|i_b[31:6])||(i_b[5]&&(i_b[4:0]!=0)))? 33'h00
:((i_b[5])?{32'h0,i_a[31]}
: ( { i_a, 1'b0 } >> (i_b[4:0]) ));// LSR
assign w_lsl_result = ((|i_b[31:6])||(i_b[5]&&(i_b[4:0]!=0)))? 33'h00
:((i_b[5])?{i_a[0], 32'h0}
: ({1'b0, i_a } << i_b[4:0])); // LSL
generate if (OPT_SHIFTS)
begin : IMPLEMENT_SHIFTS
wire signed [32:0] w_pre_asr_input, w_pre_asr_shifted;
assign w_pre_asr_input = { i_a, 1'b0 };
assign w_pre_asr_shifted = w_pre_asr_input >>> i_b[4:0];
assign w_asr_result = (|i_b[31:5])? {(33){i_a[31]}}
: w_pre_asr_shifted;// ASR
assign w_lsr_result = ((|i_b[31:6])||(i_b[5]&&(i_b[4:0]!=0)))? 33'h00
:((i_b[5])?{32'h0,i_a[31]}
 
: ( { i_a, 1'b0 } >> (i_b[4:0]) ));// LSR
assign w_lsl_result = ((|i_b[31:6])||(i_b[5]&&(i_b[4:0]!=0)))? 33'h00
:((i_b[5])?{i_a[0], 32'h0}
: ({1'b0, i_a } << i_b[4:0])); // LSL
end else begin : NO_SHIFTS
 
assign w_asr_result = { i_a[31], i_a[31:0] };
assign w_lsr_result = { 1'b0, i_a[31:0] };
assign w_lsl_result = { i_a[31:0], 1'b0 };
 
end endgenerate
 
//
// Bit reversal pre-logic
wire [31:0] w_brev_result;
genvar k;
generate
for(k=0; k<32; k=k+1)
begin : bit_reversal_cpuop
78,241 → 93,48
wire z, n, v;
reg c, pre_sign, set_ovfl, keep_sgn_on_ovfl;
always @(posedge i_clk)
if (i_ce) // 1 LUT
set_ovfl<=(((i_op==4'h0)&&(i_a[31] != i_b[31]))//SUB&CMP
||((i_op==4'h2)&&(i_a[31] == i_b[31])) // ADD
||(i_op == 4'h6) // LSL
||(i_op == 4'h5)); // LSR
if (i_stb) // 1 LUT
set_ovfl<=(((i_op==4'h0)&&(i_a[31] != i_b[31]))//SUB&CMP
||((i_op==4'h2)&&(i_a[31] == i_b[31])) // ADD
||(i_op == 4'h6) // LSL
||(i_op == 4'h5)); // LSR
 
always @(posedge i_clk)
if (i_ce) // 1 LUT
keep_sgn_on_ovfl<=
(((i_op==4'h0)&&(i_a[31] != i_b[31]))//SUB&CMP
||((i_op==4'h2)&&(i_a[31] == i_b[31]))); // ADD
if (i_stb) // 1 LUT
keep_sgn_on_ovfl<=
(((i_op==4'h0)&&(i_a[31] != i_b[31]))//SUB&CMP
||((i_op==4'h2)&&(i_a[31] == i_b[31]))); // ADD
 
wire [63:0] mpy_result; // Where we dump the multiply result
reg mpyhi; // Return the high half of the multiply
wire mpyhi; // Return the high half of the multiply
wire mpybusy; // The multiply is busy if true
wire mpydone; // True if we'll be valid on the next clock;
 
// A 4-way multiplexer can be done in one 6-LUT.
// A 16-way multiplexer can therefore be done in 4x 6-LUT's with
// the Xilinx multiplexer fabric that follows.
// the Xilinx multiplexer fabric that follows.
// Given that we wish to apply this multiplexer approach to 33-bits,
// this will cost a minimum of 132 6-LUTs.
 
wire this_is_a_multiply_op;
assign this_is_a_multiply_op = (i_ce)&&((i_op[3:1]==3'h5)||(i_op[3:0]==4'hc));
assign this_is_a_multiply_op = (i_stb)&&((i_op[3:1]==3'h5)||(i_op[3:0]==4'hc));
 
generate
if (IMPLEMENT_MPY == 0)
begin // No multiply support.
assign mpy_result = 63'h00;
end else if (IMPLEMENT_MPY == 1)
begin // Our single clock option (no extra clocks)
wire signed [63:0] w_mpy_a_input, w_mpy_b_input;
assign w_mpy_a_input = {{(32){(i_a[31])&(i_op[0])}},i_a[31:0]};
assign w_mpy_b_input = {{(32){(i_b[31])&(i_op[0])}},i_b[31:0]};
assign mpy_result = w_mpy_a_input * w_mpy_b_input;
assign mpybusy = 1'b0;
assign mpydone = 1'b0;
always @(*) mpyhi = 1'b0; // Not needed
end else if (IMPLEMENT_MPY == 2)
begin // Our two clock option (ALU must pause for 1 clock)
reg signed [63:0] r_mpy_a_input, r_mpy_b_input;
always @(posedge i_clk)
begin
r_mpy_a_input <={{(32){(i_a[31])&(i_op[0])}},i_a[31:0]};
r_mpy_b_input <={{(32){(i_b[31])&(i_op[0])}},i_b[31:0]};
end
 
assign mpy_result = r_mpy_a_input * r_mpy_b_input;
assign mpybusy = 1'b0;
 
reg mpypipe;
initial mpypipe = 1'b0;
always @(posedge i_clk)
if (i_rst)
mpypipe <= 1'b0;
else
mpypipe <= (this_is_a_multiply_op);
 
assign mpydone = mpypipe; // this_is_a_multiply_op;
always @(posedge i_clk)
if (this_is_a_multiply_op)
mpyhi = i_op[1];
end else if (IMPLEMENT_MPY == 3)
begin // Our three clock option (ALU pauses for 2 clocks)
reg signed [63:0] r_smpy_result;
reg [63:0] r_umpy_result;
reg signed [31:0] r_mpy_a_input, r_mpy_b_input;
reg [1:0] mpypipe;
reg [1:0] r_sgn;
 
initial mpypipe = 2'b0;
always @(posedge i_clk)
if (i_rst)
mpypipe <= 2'b0;
else
mpypipe <= { mpypipe[0], this_is_a_multiply_op };
 
// First clock
always @(posedge i_clk)
begin
r_mpy_a_input <= i_a[31:0];
r_mpy_b_input <= i_b[31:0];
r_sgn <= { r_sgn[0], i_op[0] };
end
 
// Second clock
`ifdef VERILATOR
wire signed [63:0] s_mpy_a_input, s_mpy_b_input;
wire [63:0] u_mpy_a_input, u_mpy_b_input;
 
assign s_mpy_a_input = {{(32){r_mpy_a_input[31]}},r_mpy_a_input};
assign s_mpy_b_input = {{(32){r_mpy_b_input[31]}},r_mpy_b_input};
assign u_mpy_a_input = {32'h00,r_mpy_a_input};
assign u_mpy_b_input = {32'h00,r_mpy_b_input};
always @(posedge i_clk)
r_smpy_result = s_mpy_a_input * s_mpy_b_input;
always @(posedge i_clk)
r_umpy_result = u_mpy_a_input * u_mpy_b_input;
//
// Pull in the multiply logic from elsewhere
//
`ifdef FORMAL
`define MPYOP abs_mpy
`else
 
wire [31:0] u_mpy_a_input, u_mpy_b_input;
 
assign u_mpy_a_input = r_mpy_a_input;
assign u_mpy_b_input = r_mpy_b_input;
 
always @(posedge i_clk)
r_smpy_result = r_mpy_a_input * r_mpy_b_input;
always @(posedge i_clk)
r_umpy_result = u_mpy_a_input * u_mpy_b_input;
`define MPYOP mpyop
`endif
`MPYOP #(.IMPLEMENT_MPY(IMPLEMENT_MPY)) thempy(i_clk, i_reset, this_is_a_multiply_op, i_op[1:0],
i_a, i_b, mpydone, mpybusy, mpy_result, mpyhi);
 
always @(posedge i_clk)
if (this_is_a_multiply_op)
mpyhi = i_op[1];
assign mpybusy = mpypipe[0];
assign mpy_result = (r_sgn[1])?r_smpy_result:r_umpy_result;
assign mpydone = mpypipe[1];
 
// Results are then set on the third clock
end else // if (IMPLEMENT_MPY <= 4)
begin // The three clock option
reg [63:0] r_mpy_result;
reg [31:0] r_mpy_a_input, r_mpy_b_input;
reg r_mpy_signed;
reg [2:0] mpypipe;
 
// First clock, latch in the inputs
initial mpypipe = 3'b0;
always @(posedge i_clk)
begin
// mpypipe indicates we have a multiply in the
// pipeline. In this case, the multiply
// pipeline is a two stage pipeline, so we need
// two bits in the pipe.
if (i_rst)
mpypipe <= 3'h0;
else begin
mpypipe[0] <= this_is_a_multiply_op;
mpypipe[1] <= mpypipe[0];
mpypipe[2] <= mpypipe[1];
end
 
if (i_op[0]) // i.e. if signed multiply
begin
r_mpy_a_input <= {(~i_a[31]),i_a[30:0]};
r_mpy_b_input <= {(~i_b[31]),i_b[30:0]};
end else begin
r_mpy_a_input <= i_a[31:0];
r_mpy_b_input <= i_b[31:0];
end
// The signed bit really only matters in the
// case of 64 bit multiply. We'll keep track
// of it, though, and pretend in all other
// cases.
r_mpy_signed <= i_op[0];
 
if (this_is_a_multiply_op)
mpyhi = i_op[1];
end
 
assign mpybusy = |mpypipe[1:0];
assign mpydone = mpypipe[2];
 
// Second clock, do the multiplies, get the "partial
// products". Here, we break our input up into two
// halves,
//
// A = (2^16 ah + al)
// B = (2^16 bh + bl)
//
// and use these to compute partial products.
//
// AB = (2^32 ah*bh + 2^16 (ah*bl + al*bh) + (al*bl)
//
// Since we're following the FOIL algorithm to get here,
// we'll name these partial products according to FOIL.
//
// The trick is what happens if A or B is signed. In
// those cases, the real value of A will not be given by
// A = (2^16 ah + al)
// but rather
// A = (2^16 ah[31^] + al) - 2^31
// (where we have flipped the sign bit of A)
// and so ...
//
// AB= (2^16 ah + al - 2^31) * (2^16 bh + bl - 2^31)
// = 2^32(ah*bh)
// +2^16 (ah*bl+al*bh)
// +(al*bl)
// - 2^31 (2^16 bh+bl + 2^16 ah+al)
// - 2^62
// = 2^32(ah*bh)
// +2^16 (ah*bl+al*bh)
// +(al*bl)
// - 2^31 (2^16 bh+bl + 2^16 ah+al + 2^31)
//
reg [31:0] pp_f, pp_l; // F and L from FOIL
reg [32:0] pp_oi; // The O and I from FOIL
reg [32:0] pp_s;
always @(posedge i_clk)
begin
pp_f<=r_mpy_a_input[31:16]*r_mpy_b_input[31:16];
pp_oi<=r_mpy_a_input[31:16]*r_mpy_b_input[15: 0]
+ r_mpy_a_input[15: 0]*r_mpy_b_input[31:16];
pp_l<=r_mpy_a_input[15: 0]*r_mpy_b_input[15: 0];
// And a special one for the sign
if (r_mpy_signed)
pp_s <= 32'h8000_0000-(
r_mpy_a_input[31:0]
+ r_mpy_b_input[31:0]);
else
pp_s <= 33'h0;
end
 
// Third clock, add the results and produce a product
always @(posedge i_clk)
begin
r_mpy_result[15:0] <= pp_l[15:0];
r_mpy_result[63:16] <=
{ 32'h00, pp_l[31:16] }
+ { 15'h00, pp_oi }
+ { pp_s, 15'h00 }
+ { pp_f, 16'h00 };
end
 
assign mpy_result = r_mpy_result;
// Fourth clock -- results are clocked into writeback
end
endgenerate // All possible multiply results have been determined
 
//
// The master ALU case statement
//
always @(posedge i_clk)
if (i_ce)
if (i_stb)
begin
pre_sign <= (i_a[31]);
c <= 1'b0;
333,16 → 155,19
default: o_c <= i_b; // MOV, LDI
endcase
end else // if (mpydone)
// set the output based upon the multiply result
o_c <= (mpyhi)?mpy_result[63:32]:mpy_result[31:0];
 
reg r_busy;
initial r_busy = 1'b0;
always @(posedge i_clk)
if (i_rst)
r_busy <= 1'b0;
else
r_busy <= ((IMPLEMENT_MPY > 1)
&&(this_is_a_multiply_op))||mpybusy;
if (i_reset)
r_busy <= 1'b0;
else if (IMPLEMENT_MPY > 1)
r_busy <= ((i_stb)&&(this_is_a_multiply_op))||mpybusy;
else
r_busy <= 1'b0;
 
assign o_busy = (r_busy); // ||((IMPLEMENT_MPY>1)&&(this_is_a_multiply_op));
 
 
355,11 → 180,108
 
initial o_valid = 1'b0;
always @(posedge i_clk)
if (i_rst)
o_valid <= 1'b0;
else if (IMPLEMENT_MPY <= 1)
o_valid <= (i_ce);
else
o_valid <=((i_ce)&&(!this_is_a_multiply_op))||(mpydone);
if (i_reset)
o_valid <= 1'b0;
else if (IMPLEMENT_MPY <= 1)
o_valid <= (i_stb);
else
o_valid <=((i_stb)&&(!this_is_a_multiply_op))||(mpydone);
 
`ifdef FORMAL
initial assume(i_reset);
reg f_past_valid;
 
initial f_past_valid = 1'b0;
always @(posedge i_clk)
f_past_valid = 1'b1;
 
`define ASSERT assert
`ifdef CPUOPS
`define ASSUME assume
`else
`define ASSUME assert
`endif
 
// No request should be given us if/while we are busy
always @(posedge i_clk)
if (o_busy)
`ASSUME(!i_stb);
 
// Following any request other than a multiply request, we should
// respond in the next cycle
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(o_busy))&&(!$past(this_is_a_multiply_op)))
`ASSERT(!o_busy);
 
// Valid and busy can never both be asserted
always @(posedge i_clk)
`ASSERT((!o_valid)||(!r_busy));
 
// Following any busy, we should always become valid
always @(posedge i_clk)
if ((f_past_valid)&&($past(o_busy))&&(!o_busy))
`ASSERT($past(i_reset) || o_valid);
 
// Check the shift values
always @(posedge i_clk)
if ((f_past_valid)&&($past(i_stb)))
begin
if (($past(|i_b[31:6]))||($past(i_b[5:0])>6'd32))
begin
assert(($past(i_op)!=4'h5)
||({o_c,c}=={(33){1'b0}}));
assert(($past(i_op)!=4'h6)
||({c,o_c}=={(33){1'b0}}));
assert(($past(i_op)!=4'h7)
||({o_c,c}=={(33){$past(i_a[31])}}));
end else if ($past(i_b[5:0]==6'd32))
begin
assert(($past(i_op)!=4'h5)
||(o_c=={(32){1'b0}}));
assert(($past(i_op)!=4'h6)
||(o_c=={(32){1'b0}}));
assert(($past(i_op)!=4'h7)
||(o_c=={(32){$past(i_a[31])}}));
end if ($past(i_b)==0)
begin
assert(($past(i_op)!=4'h5)
||({o_c,c}=={$past(i_a), 1'b0}));
assert(($past(i_op)!=4'h6)
||({c,o_c}=={1'b0, $past(i_a)}));
assert(($past(i_op)!=4'h7)
||({o_c,c}=={$past(i_a), 1'b0}));
end if ($past(i_b)==1)
begin
assert(($past(i_op)!=4'h5)
||({o_c,c}=={1'b0, $past(i_a)}));
assert(($past(i_op)!=4'h6)
||({c,o_c}=={$past(i_a),1'b0}));
assert(($past(i_op)!=4'h7)
||({o_c,c}=={$past(i_a[31]),$past(i_a)}));
end if ($past(i_b)==2)
begin
assert(($past(i_op)!=4'h5)
||({o_c,c}=={2'b0, $past(i_a[31:1])}));
assert(($past(i_op)!=4'h6)
||({c,o_c}=={$past(i_a[30:0]),2'b0}));
assert(($past(i_op)!=4'h7)
||({o_c,c}=={{(2){$past(i_a[31])}},$past(i_a[31:1])}));
end if ($past(i_b)==31)
begin
assert(($past(i_op)!=4'h5)
||({o_c,c}=={31'b0, $past(i_a[31:30])}));
assert(($past(i_op)!=4'h6)
||({c,o_c}=={$past(i_a[1:0]),31'b0}));
assert(($past(i_op)!=4'h7)
||({o_c,c}=={{(31){$past(i_a[31])}},$past(i_a[31:30])}));
end
end
`endif
endmodule
//
// iCE40 NoMPY,w/Shift NoMPY,w/o Shift
// SB_CARRY 64 64
// SB_DFFE 3 3
// SB_DFFESR 1 1
// SB_DFFSR 33 33
// SB_LUT4 748 323
/dblfetch.v
6,35 → 6,24
//
// Purpose: This is one step beyond the simplest instruction fetch,
// prefetch.v. dblfetch.v uses memory pipelining to fetch two
// instruction words in one cycle, figuring that the unpipelined CPU can't
// go through both at once, but yet recycles itself fast enough for the
// next instruction that would follow. It is designed to be a touch
// faster than the single instruction prefetch, although not as fast as
// the prefetch and cache found elsewhere.
// (or more) instruction words in one bus cycle. If the CPU consumes
// either of these before the bus cycle completes, a new request will be
// made of the bus. In this way, we can keep the CPU filled in spite
// of a (potentially) slow memory operation. The bus request will end
// when both requests have been sent and both result locations are empty.
//
// There are some gotcha's in this logic, however. For example, it's
// illegal to switch devices mid-transaction, since the second device
// might have different timing. I.e. the first device might take 8
// clocks to create an ACK, and the second device might take 2 clocks, the
// acks might therefore come on top of each other, or even out of order.
// But ... in order to keep logic down, we keep track of the PC in the
// o_wb_addr register. Hence, this register gets changed on any i_new_pc.
// The i_pc value associated with i_new_pc will only be valid for one
// clock, hence we can't wait to change. To keep from violating the WB
// rule, therefore, we *must* immediately stop requesting any transaction,
// and then terminate the bus request as soon as possible.
// This routine is designed to be a touch faster than the single
// instruction prefetch (prefetch.v), although not as fast as the
// prefetch and cache approach found elsewhere (pfcache.v).
//
// This has consequences in terms of logic used, leaving this routine
// anything but simple--even though the number of wires affected by
// this is small (o_wb_cyc, o_wb_stb, and last_ack).
// 20180222: Completely rebuilt.
//
//
// Creator: Dan Gisselquist, Ph.D.
// Gisselquist Technology, LLC
//
////////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2017, Gisselquist Technology, LLC
// Copyright (C) 2017-2019, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
58,27 → 47,29
////////////////////////////////////////////////////////////////////////////////
//
//
module dblfetch(i_clk, i_rst, i_new_pc, i_clear_cache,
i_stall_n, i_pc, o_i, o_pc, o_v,
`default_nettype none
//
module dblfetch(i_clk, i_reset, i_new_pc, i_clear_cache,
i_stall_n, i_pc, o_insn, o_pc, o_valid,
o_wb_cyc, o_wb_stb, o_wb_we, o_wb_addr, o_wb_data,
i_wb_ack, i_wb_stall, i_wb_err, i_wb_data,
o_illegal);
parameter ADDRESS_WIDTH=32, AUX_WIDTH = 1;
localparam AW=ADDRESS_WIDTH;
input i_clk, i_rst, i_new_pc, i_clear_cache,
parameter ADDRESS_WIDTH=30, AUX_WIDTH = 1;
localparam AW=ADDRESS_WIDTH, DW = 32;
input wire i_clk, i_reset, i_new_pc, i_clear_cache,
i_stall_n;
input [(AW-1):0] i_pc;
output reg [31:0] o_i;
output reg [(AW-1):0] o_pc;
output wire o_v;
input wire [(AW+1):0] i_pc;
output reg [(DW-1):0] o_insn;
output reg [(AW+1):0] o_pc;
output reg o_valid;
// Wishbone outputs
output reg o_wb_cyc, o_wb_stb;
output wire o_wb_we;
output reg [(AW-1):0] o_wb_addr;
output wire [31:0] o_wb_data;
output wire [(DW-1):0] o_wb_data;
// And return inputs
input i_wb_ack, i_wb_stall, i_wb_err;
input [31:0] i_wb_data;
input wire i_wb_ack, i_wb_stall, i_wb_err;
input wire [(DW-1):0] i_wb_data;
// And ... the result if we got an error
output reg o_illegal;
 
85,87 → 76,67
assign o_wb_we = 1'b0;
assign o_wb_data = 32'h0000;
 
reg last_ack, last_stb, invalid_bus_cycle;
reg last_stb, invalid_bus_cycle;
 
reg [31:0] cache [0:1];
reg cache_read_addr, cache_write_addr;
reg [1:0] cache_valid;
reg [(DW-1):0] cache_word;
reg cache_valid;
reg [1:0] inflight;
reg cache_illegal;
 
initial o_wb_cyc = 1'b0;
initial o_wb_stb = 1'b0;
always @(posedge i_clk)
if ((i_rst)||(i_wb_err))
if ((i_reset)||((o_wb_cyc)&&(i_wb_err)))
begin
o_wb_cyc <= 1'b0;
o_wb_stb <= 1'b0;
// last_stb <= 1'b0;
// last_ack <= 1'b0;
end else if (o_wb_cyc)
begin
if ((o_wb_stb)&&(!i_wb_stall))
begin
// last_stb <= 1'b1;
o_wb_stb <= !last_stb;
end
// if (i_wb_ack)
// last_ack <= 1'b1;
if ((i_new_pc)||(invalid_bus_cycle))
o_wb_stb <= 1'b0;
if ((!o_wb_stb)||(!i_wb_stall))
o_wb_stb <= (!last_stb);
 
if ((i_wb_ack)&&(
// Relase the bus on the second ack
(last_ack)
// Or on the first ACK, if we've been told
// we have an invalid bus cycle
||((o_wb_stb)&&(i_wb_stall)&&(last_stb)&&(
(i_new_pc)||(invalid_bus_cycle)))
))
// Relase the bus on the second ack
if (((i_wb_ack)&&(!o_wb_stb)&&(inflight<=1))
||((!o_wb_stb)&&(inflight == 0))
// Or any new transaction request
||((i_new_pc)||(i_clear_cache)))
begin
o_wb_cyc <= 1'b0;
o_wb_stb <= 1'b0;
end
 
if ((!last_stb)&&(i_wb_stall)&&((i_new_pc)||(invalid_bus_cycle)))
// Also release the bus with no acks, if we
// haven't made any requests
begin
o_wb_cyc <= 1'b0;
o_wb_stb <= 1'b0;
end
end else if ((invalid_bus_cycle)
||((o_v)&&(i_stall_n)&&(cache_read_addr))) // Initiate a bus cycle
end else if ((i_new_pc)||(invalid_bus_cycle)
||((o_valid)&&(i_stall_n)&&(!o_illegal)))
begin
// Initiate a bus cycle if ... the last bus cycle was
// aborted (bus error or new_pc), we've been given a
// new PC to go get, or we just exhausted our one
// instruction cache
o_wb_cyc <= 1'b1;
o_wb_stb <= 1'b1;
// last_stb <= 1'b0;
// last_ack <= 1'b0;
end
 
initial last_stb = 1'b0;
initial inflight = 2'b00;
always @(posedge i_clk)
if ((o_wb_cyc)&&(o_wb_stb)&&(!i_wb_stall))
last_stb <= 1'b1;
else if (!o_wb_cyc)
last_stb <= 1'b0;
if (!o_wb_cyc)
inflight <= 2'b00;
else begin
case({ ((o_wb_stb)&&(!i_wb_stall)), i_wb_ack })
2'b01: inflight <= inflight - 1'b1;
2'b10: inflight <= inflight + 1'b1;
// If neither ack nor request, then no change. Likewise
// if we have both an ack and a request, there's no change
// in the number of requests in flight.
default: begin end
endcase
end
 
initial last_ack = 1'b0;
always @(posedge i_clk)
if ((o_wb_cyc)&&(i_wb_ack))
last_ack <= 1'b1;
else if ((o_wb_cyc)&&(o_wb_stb)&&(i_wb_stall)&&(
(i_new_pc)||(invalid_bus_cycle)))
last_ack <= 1'b1;
else if ((o_wb_cyc)&&(o_wb_stb)&&(!i_wb_stall)&&(!last_stb)&&(
(i_new_pc)||(invalid_bus_cycle)))
last_ack <= 1'b1;
else if (!o_wb_cyc)
last_ack <= 1'b0;
always @(*)
last_stb = (inflight != 2'b00)||((o_valid)&&(!i_stall_n));
 
initial invalid_bus_cycle = 1'b0;
always @(posedge i_clk)
if (i_rst)
invalid_bus_cycle <= 1'b0;
else if ((i_new_pc)||(i_clear_cache))
if ((o_wb_cyc)&&(i_new_pc))
invalid_bus_cycle <= 1'b1;
else if (!o_wb_cyc)
invalid_bus_cycle <= 1'b0;
173,60 → 144,573
initial o_wb_addr = {(AW){1'b1}};
always @(posedge i_clk)
if (i_new_pc)
o_wb_addr <= i_pc;
else if ((o_wb_stb)&&(!i_wb_stall)&&(!invalid_bus_cycle))
o_wb_addr <= i_pc[AW+1:2];
else if ((o_wb_stb)&&(!i_wb_stall))
o_wb_addr <= o_wb_addr + 1'b1;
 
initial cache_write_addr = 1'b0;
//////////////////
//
// Now for the immediate output word to the CPU
//
//////////////////
 
initial o_valid = 1'b0;
always @(posedge i_clk)
if (!o_wb_cyc)
cache_write_addr <= 1'b0;
else if ((o_wb_cyc)&&(i_wb_ack))
cache_write_addr <= cache_write_addr + 1'b1;
if ((i_reset)||(i_new_pc)||(i_clear_cache))
o_valid <= 1'b0;
else if ((o_wb_cyc)&&((i_wb_ack)||(i_wb_err)))
o_valid <= 1'b1;
else if (i_stall_n)
o_valid <= cache_valid;
 
always @(posedge i_clk)
if ((o_wb_cyc)&&(i_wb_ack))
cache[cache_write_addr] <= i_wb_data;
if ((!o_valid)||(i_stall_n))
begin
if (cache_valid)
o_insn <= cache_word;
else
o_insn <= i_wb_data;
end
 
initial cache_read_addr = 1'b0;
initial o_pc[1:0] = 2'b00;
always @(posedge i_clk)
if ((i_new_pc)||(invalid_bus_cycle)
||((o_v)&&(cache_read_addr)&&(i_stall_n)))
cache_read_addr <= 1'b0;
else if ((o_v)&&(i_stall_n))
cache_read_addr <= 1'b1;
if (i_new_pc)
o_pc <= i_pc;
else if ((o_valid)&&(i_stall_n))
o_pc[AW+1:2] <= o_pc[AW+1:2] + 1'b1;
 
initial o_illegal = 1'b0;
always @(posedge i_clk)
if ((i_new_pc)||(invalid_bus_cycle))
cache_valid <= 2'b00;
if ((i_reset)||(i_new_pc)||(i_clear_cache))
o_illegal <= 1'b0;
else if ((!o_valid)||(i_stall_n))
begin
if (cache_valid)
o_illegal <= (o_illegal)||(cache_illegal);
else if ((o_wb_cyc)&&(i_wb_err))
o_illegal <= 1'b1;
end
 
 
//////////////////
//
// Now for the output/cached word
//
//////////////////
 
initial cache_valid = 1'b0;
always @(posedge i_clk)
if ((i_reset)||(i_new_pc)||(i_clear_cache))
cache_valid <= 1'b0;
else begin
if ((o_v)&&(i_stall_n))
cache_valid[cache_read_addr] <= 1'b0;
if ((o_wb_cyc)&&(i_wb_ack))
cache_valid[cache_write_addr] <= 1'b1;
if ((o_valid)&&(o_wb_cyc)&&((i_wb_ack)||(i_wb_err)))
cache_valid <= (!i_stall_n)||(cache_valid);
else if (i_stall_n)
cache_valid <= 1'b0;
end
 
initial o_i = {(32){1'b1}};
always @(posedge i_clk)
if ((i_stall_n)&&(o_wb_cyc)&&(i_wb_ack))
o_i <= i_wb_data;
if ((o_wb_cyc)&&(i_wb_ack))
cache_word <= i_wb_data;
 
initial cache_illegal = 1'b0;
always @(posedge i_clk)
if ((i_reset)||(i_clear_cache)||(i_new_pc))
cache_illegal <= 1'b0;
else if ((o_wb_cyc)&&(i_wb_err)&&(o_valid)&&(!i_stall_n))
cache_illegal <= 1'b1;
//
// Some of these properties can be done in yosys-smtbmc, *or* Verilator
//
// Ver1lator is different from yosys, however, in that Verilator doesn't support
// the $past() directive. Further, any `assume`'s turn into `assert()`s
// within Verilator. We can use this to help prove that the properties
// of interest truly hold, and that any contracts we create or assumptions we
// make truly hold in practice (i.e. in simulation).
//
`ifdef FORMAL
`define VERILATOR_FORMAL
`else
`ifdef VERILATOR
//
// Define VERILATOR_FORMAL here to have Verilator check your formal properties
// during simulation. assert() and assume() statements will both have the
// same effect within VERILATOR of causing your simulation to suddenly end.
//
// I have this property commented because it only works on the newest versions
// of Verilator (3.9 something and later), and I tend to still use Verilator
// 3.874.
//
// `define VERILATOR_FORMAL
`endif
`endif
 
`ifdef VERILATOR_FORMAL
// Keep track of a flag telling us whether or not $past()
// will return valid results
reg f_past_valid;
initial f_past_valid = 1'b0;
always @(posedge i_clk)
f_past_valid = 1'b1;
 
// Keep track of some alternatives to $past that can still be used
// in a VERILATOR environment
reg f_past_reset, f_past_clear_cache, f_past_o_valid,
f_past_stall_n;
 
initial f_past_reset = 1'b1;
initial f_past_clear_cache = 1'b0;
initial f_past_o_valid = 1'b0;
initial f_past_stall_n = 1'b1;
always @(posedge i_clk)
begin
f_past_reset <= i_reset;
f_past_clear_cache <= i_clear_cache;
f_past_o_valid <= o_valid;
f_past_stall_n <= i_stall_n;
end
`endif
 
`ifdef FORMAL
//
//
// Generic setup
//
//
`ifdef DBLFETCH
`define ASSUME assume
`else
`define ASSUME assert
`endif
 
/////////////////////////////////////////////////
//
//
// Assumptions about our inputs
//
//
/////////////////////////////////////////////////
 
always @(*)
if (!f_past_valid)
`ASSUME(i_reset);
 
//
// Assume that resets, new-pc commands, and clear-cache commands
// are never more than pulses--one clock wide at most.
//
// It may be that the CPU treats us differently. We'll only restrict
// our solver to this here.
/*
always @(posedge i_clk)
if (f_past_valid)
begin
if (f_past_reset)
restrict(!i_reset);
if ($past(i_new_pc))
restrict(!i_new_pc);
end
*/
 
//
// Assume we start from a reset condition
initial assume(i_reset);
 
/////////////////////////////////////////////////
//
//
// Wishbone bus properties
//
//
/////////////////////////////////////////////////
 
localparam F_LGDEPTH=2;
wire [(F_LGDEPTH-1):0] f_nreqs, f_nacks, f_outstanding;
 
//
// Add a bunch of wishbone-based asserts
fwb_master #(.AW(AW), .DW(DW), .F_LGDEPTH(F_LGDEPTH),
.F_MAX_STALL(2),
.F_MAX_REQUESTS(0), .F_OPT_SOURCE(1),
.F_OPT_RMW_BUS_OPTION(1),
.F_OPT_DISCONTINUOUS(0))
f_wbm(i_clk, i_reset,
o_wb_cyc, o_wb_stb, o_wb_we, o_wb_addr, o_wb_data, 4'h0,
i_wb_ack, i_wb_stall, i_wb_data, i_wb_err,
f_nreqs, f_nacks, f_outstanding);
 
`endif
 
//
// Now, apply the following to Verilator *or* yosys-smtbmc
//
`ifdef VERILATOR_FORMAL
/////////////////////////////////////////////////
//
//
// Assumptions about our interaction with the CPU
//
//
/////////////////////////////////////////////////
 
// Assume that any reset is either accompanied by a new address,
// or a new address immediately follows it.
always @(posedge i_clk)
if ((f_past_valid)&&(f_past_reset))
assume(i_new_pc);
 
always @(posedge i_clk)
if (f_past_clear_cache)
assume(!i_clear_cache);
 
//
//
// The bottom two bits of the PC address register are always zero.
// They are there to make examining traces easier, but I expect
// the synthesis tool to remove them.
//
always @(*)
assume(i_pc[1:0] == 2'b00);
 
// Some things to know from the CPU ... there will always be a
// i_new_pc request following any reset
always @(posedge i_clk)
if ((f_past_valid)&&(f_past_reset))
assume(i_new_pc);
 
// There will also be a i_new_pc request following any request to clear
// the cache.
always @(posedge i_clk)
if ((f_past_valid)&&(f_past_clear_cache))
assume(i_new_pc);
 
always @(posedge i_clk)
if (f_past_clear_cache)
assume(!i_clear_cache);
 
always @(*)
assume(i_pc[1:0] == 2'b00);
`endif
 
`ifdef FORMAL
//
// Let's make some assumptions about how long it takes our phantom
// (i.e. assumed) CPU to respond.
//
// This delay needs to be long enough to flush out any potential
// errors, yet still short enough that the formal method doesn't
// take forever to solve.
//
`ifdef DBLFETCH
localparam F_CPU_DELAY = 4;
reg [4:0] f_cpu_delay;
 
// Now, let's look at the delay the CPU takes to accept an instruction.
always @(posedge i_clk)
// If no instruction is ready, then keep our counter at zero
if ((!o_valid)||(i_stall_n))
f_cpu_delay <= 0;
else
o_i <= cache[cache_read_addr];
// Otherwise, count the clocks the CPU takes to respond
f_cpu_delay <= f_cpu_delay + 1'b1;
 
initial o_pc = 0;
always @(posedge i_clk)
if (i_new_pc)
o_pc <= i_pc;
else if ((o_v)&&(i_stall_n))
o_pc <= o_pc + 1'b1;
assume(f_cpu_delay < F_CPU_DELAY);
`endif
 
assign o_v = cache_valid[cache_read_addr];
 
initial o_illegal = 1'b0;
 
/////////////////////////////////////////////////
//
//
// Assertions about our outputs
//
//
/////////////////////////////////////////////////
always @(posedge i_clk)
if ((o_wb_cyc)&&(i_wb_err))
o_illegal <= 1'b1;
else if ((!o_wb_cyc)&&((i_new_pc)||(invalid_bus_cycle)))
o_illegal <= 1'b0;
if ((f_past_valid)&&($past(o_wb_stb))&&(!$past(i_wb_stall))
&&(!$past(i_new_pc)))
assert(o_wb_addr <= $past(o_wb_addr)+1'b1);
 
//
// Assertions about our return responses to the CPU
//
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(i_reset))
&&(!$past(i_new_pc))&&(!$past(i_clear_cache))
&&($past(o_valid))&&(!$past(i_stall_n)))
begin
assert($stable(o_pc));
assert($stable(o_insn));
assert($stable(o_valid));
assert($stable(o_illegal));
end
 
// The same is true of the cache as well.
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(i_reset))
&&(!$past(i_new_pc))&&(!$past(i_clear_cache))
&&($past(o_valid))&&(!$past(i_stall_n))
&&($past(cache_valid)))
begin
assert($stable(cache_valid));
assert($stable(cache_word));
assert($stable(cache_illegal));
end
 
// Consider it invalid to present the CPU with the same instruction
// twice in a row. Any effort to present the CPU with the same
// instruction twice in a row must go through i_new_pc, and thus a
// new bus cycle--hence the assertion below makes sense.
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(i_new_pc))
&&($past(o_valid))&&($past(i_stall_n)))
assert(o_pc[AW+1:2] == $past(o_pc[AW+1:2])+1'b1);
 
 
//
// As with i_pc[1:0], the bottom two bits of the address are unused.
// Let's assert here that they remain zero.
always @(*)
assert(o_pc[1:0] == 2'b00);
 
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(i_reset))
&&(!$past(i_new_pc))
&&(!$past(i_clear_cache))
&&($past(o_wb_cyc))&&($past(i_wb_err)))
assert( ((o_valid)&&(o_illegal))
||((cache_valid)&&(cache_illegal)) );
 
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(o_illegal))&&(o_illegal))
assert(o_valid);
 
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(cache_illegal))&&(!cache_valid))
assert(!cache_illegal);
 
always @(posedge i_clk)
if ((f_past_valid)&&($past(i_new_pc)))
assert(!o_valid);
 
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(i_reset))&&(!$past(i_clear_cache))
&&($past(o_valid))&&(!o_valid)&&(!o_illegal))
assert((o_wb_cyc)||(invalid_bus_cycle));
 
/////////////////////////////////////////////////
//
//
// Our "contract" with the CPU
//
//
/////////////////////////////////////////////////
//
// For any particular address, that address is associated with an
// instruction and a flag regarding whether or not it is illegal.
//
// Any attempt to return to the CPU a value from this address,
// must return the value and the illegal flag.
//
(* anyconst *) reg [AW-1:0] f_const_addr;
(* anyconst *) reg [DW-1:0] f_const_insn;
(* anyconst *) reg f_const_illegal;
 
//
// While these wires may seem like overkill, and while they make the
// following logic perhaps a bit more obscure, these predicates make
// it easier to follow the complex logic on a scope. They don't
// affect anything synthesized.
//
wire f_this_addr, f_this_pc, f_this_req, f_this_data,
f_this_insn;
 
assign f_this_addr = (o_wb_addr == f_const_addr);
assign f_this_pc = (o_pc == { f_const_addr, 2'b00 });
assign f_this_req = (i_pc == { f_const_addr, 2'b00 });
assign f_this_data = (i_wb_data == f_const_insn);
assign f_this_insn = (o_insn == f_const_insn);
 
 
//
//
// Here's our contract:
//
// Any time we return a value for the address above, it *must* be
// the "right" value.
//
always @(*)
if ((o_valid)&&(f_this_pc))
begin
if (f_const_illegal)
assert(o_illegal);
if (!o_illegal)
assert(f_this_insn);
end
 
//
// The contract will only work if we assume the return from the
// bus at this address will be the right return.
wire f_this_return;
assign f_this_return = (o_wb_addr - f_outstanding == f_const_addr);
always @(*)
if ((o_wb_cyc)&&(f_this_return))
begin
if (i_wb_ack)
assume(i_wb_data == f_const_insn);
 
if (f_const_illegal)
assume(!i_wb_ack);
else
assume(!i_wb_err);
end
 
//
// Here is a corrollary to our contract. Anything in the one-word
// cache must also match the contract as well.
//
always @(*)
if ((o_pc[AW+1:2] + 1'b1 == f_const_addr)&&(cache_valid))
begin
if (!cache_illegal)
assert(cache_word == f_const_insn);
 
if (f_const_illegal)
assert(cache_illegal);
end
 
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(cache_illegal))&&(!cache_valid))
assert(!cache_illegal);
 
////////////////////////////////////////////////////////
//
//
// Additional assertions necessary to pass induction
//
//
////////////////////////////////////////////////////////
//
// We have only a one word cache. Hence, we shouldn't be asking
// for more data any time we have nowhere to put it.
always @(*)
if (o_wb_stb)
assert((!cache_valid)||(i_stall_n));
 
always @(*)
if ((o_valid)&&(cache_valid))
assert((f_outstanding == 0)&&(!o_wb_stb));
 
always @(*)
if ((o_valid)&&(!i_stall_n))
assert(f_outstanding < 2);
 
always @(*)
if ((!o_valid)||(i_stall_n))
assert(f_outstanding <= 2);
 
always @(posedge i_clk)
if ((f_past_valid)&&($past(o_wb_cyc))&&(!$past(o_wb_stb))
&&(o_wb_cyc))
assert(inflight != 0);
 
always @(*)
if ((o_wb_cyc)&&(i_wb_ack))
assert(!cache_valid);
 
always @(posedge i_clk)
if (o_wb_cyc)
assert(inflight == f_outstanding);
 
wire [AW-1:0] this_return_address,
next_pc_address;
assign this_return_address = o_wb_addr - f_outstanding;
assign next_pc_address = o_pc[AW+1:2] + 1'b1;
 
always @(posedge i_clk)
if ((f_past_valid)&&($past(o_wb_cyc))
&&(!$past(i_reset))
&&(!$past(i_new_pc))
&&(!$past(i_clear_cache))
&&(!$past(invalid_bus_cycle))
&&(($past(i_wb_ack))||($past(i_wb_err)))
&&((!$past(o_valid))||($past(i_stall_n)))
&&(!$past(cache_valid)))
assert(o_pc[AW+1:2] == $past(this_return_address));
 
always @(posedge i_clk)
if ((f_past_valid)&&($past(o_wb_cyc))&&(!o_valid)&&(!$past(i_new_pc))
&&(o_wb_cyc))
assert(o_pc[AW+1:2] == this_return_address);
 
always @(posedge i_clk)
if ((f_past_valid)&&($past(o_wb_cyc))
&&(!$past(cache_valid))&&(cache_valid))
assert(next_pc_address == $past(this_return_address));
 
 
 
always @(posedge i_clk)
if ((f_past_valid)&&($past(o_wb_cyc))&&(o_wb_cyc))
begin
if ((o_valid)&&(!cache_valid))
assert(this_return_address == next_pc_address);
else if (!o_valid)
assert(this_return_address == o_pc[AW+1:2]);
end else if ((f_past_valid)&&(!invalid_bus_cycle)
&&(!o_wb_cyc)&&(o_valid)&&(!o_illegal)
&&(!cache_valid))
assert(o_wb_addr == next_pc_address);
 
 
always @(*)
if (invalid_bus_cycle)
assert(!o_wb_cyc);
always @(*)
if (cache_valid)
assert(o_valid);
 
/////////////////////////////////////////////////////
//
//
// Cover statements
//
//
/////////////////////////////////////////////////////
 
always @(posedge i_clk)
cover((f_past_valid)&&($past(f_nacks)==3)
&&($past(i_wb_ack))&&($past(o_wb_cyc)));
 
 
/////////////////////////////////////////////////////
//
//
// Temporary simplifications
//
//
/////////////////////////////////////////////////////
 
// always @(*)
// assume((!i_wb_err)&&(!f_const_illegal));
 
 
`endif // FORMAL
endmodule
//
// Usage: (this) (prior) (old) (S6)
// Cells 374 387 585 459
// FDRE 135 108 203 171
// LUT1 2 3 2
// LUT2 9 3 4 5
// LUT3 98 76 104 71
// LUT4 2 0 2 2
// LUT5 3 35 35 3
// LUT6 6 5 10 43
// MUXCY 58 62 93 62
// MUXF7 1 0 2 3
// MUXF8 0 1 1
// RAM64X1D 0 32 32 32
// XORCY 60 64 96 64
//
/dcache.v
40,15 → 40,15
// virtual page size--lest in the middle of reading a page a TLB miss
// take place referencing only a part of the cacheable page.
//
//
//
//
//
// Creator: Dan Gisselquist, Ph.D.
// Gisselquist Technology, LLC
//
////////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2016, Gisselquist Technology, LLC
// Copyright (C) 2016-2019, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
67,88 → 67,146
////////////////////////////////////////////////////////////////////////////////
//
//
module dcache(i_clk, i_rst, i_pipe_stb, i_lock,
`default_nettype none
//
//
`ifdef FORMAL
`define ASSERT assert
 
`ifdef DCACHE
`define ASSUME assume
`else
`define ASSUME assert
`endif
`endif
 
module dcache(i_clk, i_reset, i_pipe_stb, i_lock,
i_op, i_addr, i_data, i_oreg,
o_busy, o_pipe_stalled, o_valid, o_err, o_wreg,o_data,
o_wb_cyc_gbl, o_wb_cyc_lcl, o_wb_stb_gbl, o_wb_stb_lcl,
o_wb_we, o_wb_addr, o_wb_data,
i_wb_ack, i_wb_stall, i_wb_err, i_wb_data);
o_wb_we, o_wb_addr, o_wb_data, o_wb_sel,
i_wb_ack, i_wb_stall, i_wb_err, i_wb_data
`ifdef FORMAL
, f_nreqs, f_nacks, f_outstanding, f_pc
`endif
);
parameter LGCACHELEN = 8,
ADDRESS_WIDTH=32,
LGNLINES=5, // Log of the number of separate cache lines
IMPLEMENT_LOCK=0,
ADDRESS_WIDTH=30,
LGNLINES=(LGCACHELEN-3), // Log of the number of separate cache lines
NAUX=5; // # of aux d-wires to keep aligned w/memops
localparam SDRAM_BIT = 26;
localparam FLASH_BIT = 22;
localparam BLKRAM_BIT= 15;
parameter [0:0] OPT_LOCAL_BUS=1'b1;
parameter [0:0] OPT_PIPE=1'b1;
parameter [0:0] OPT_LOCK=1'b1;
parameter [0:0] OPT_DUAL_READ_PORT=1'b1;
parameter OPT_FIFO_DEPTH = 4;
localparam AW = ADDRESS_WIDTH; // Just for ease of notation below
localparam CS = LGCACHELEN; // Number of bits in a cache address
localparam LS = CS-LGNLINES; // Bits to spec position w/in cline
localparam LGAUX = 3; // log_2 of the maximum number of piped data
input i_clk, i_rst;
parameter F_LGDEPTH=1 + (((!OPT_PIPE)||(LS > OPT_FIFO_DEPTH))
? LS : OPT_FIFO_DEPTH);
localparam LGAUX = 3; // log_2 of the maximum number of piped data
localparam DW = 32; // Bus data width
localparam DP = OPT_FIFO_DEPTH;
//
localparam [1:0] DC_IDLE = 2'b00; // Bus is idle
localparam [1:0] DC_WRITE = 2'b01; // Write
localparam [1:0] DC_READS = 2'b10; // Read a single value(!cachd)
localparam [1:0] DC_READC = 2'b11; // Read a whole cache line
//
input wire i_clk, i_reset;
// Interface from the CPU
input i_pipe_stb, i_lock;
input i_op;
input [31:0] i_addr;
input [31:0] i_data;
input [(NAUX-1):0] i_oreg; // Aux data, such as reg to write to
input wire i_pipe_stb, i_lock;
input wire [2:0] i_op;
input wire [(DW-1):0] i_addr;
input wire [(DW-1):0] i_data;
input wire [(NAUX-1):0] i_oreg; // Aux data, such as reg to write to
// Outputs, going back to the CPU
output wire o_busy, o_pipe_stalled, o_valid, o_err;
output reg o_busy;
output reg o_pipe_stalled;
output reg o_valid, o_err;
output reg [(NAUX-1):0] o_wreg;
output reg [31:0] o_data;
output reg [(DW-1):0] o_data;
// Wishbone bus master outputs
output wire o_wb_cyc_gbl, o_wb_cyc_lcl;
output reg o_wb_stb_gbl, o_wb_stb_lcl;
output reg o_wb_we;
output reg [(AW-1):0] o_wb_addr;
output reg [31:0] o_wb_data;
output reg [(AW-1):0] o_wb_addr;
output reg [(DW-1):0] o_wb_data;
output wire [(DW/8-1):0] o_wb_sel;
// Wishbone bus slave response inputs
input i_wb_ack, i_wb_stall, i_wb_err;
input [31:0] i_wb_data;
input wire i_wb_ack, i_wb_stall, i_wb_err;
input wire [(DW-1):0] i_wb_data;
`ifdef FORMAL
output wire [(F_LGDEPTH-1):0] f_nreqs, f_nacks, f_outstanding;
output wire f_pc;
 
reg f_past_valid;
`endif
//
// output reg [31:0] o_debug;
 
 
reg cyc, stb, last_ack, end_of_line, last_line_stb;
reg r_wb_cyc_gbl, r_wb_cyc_lcl;
// npending is the number of pending non-cached operations, counted
// from the i_pipe_stb to the o_wb_ack
reg [DP:0] npending;
 
 
reg [((1<<LGNLINES)-1):0] c_v; // One bit per cache line, is it valid?
reg [(AW-LS-1):0] c_vtags [0:((1<<LGNLINES)-1)];
reg [31:0] c_mem [0:((1<<CS)-1)];
// reg [((1<<LGNLINES)-1):0] c_wr; // Is the cache line writable?
// reg c_wdata;
// reg c_waddr;
reg [(DW-1):0] c_mem [0:((1<<CS)-1)];
reg set_vflag;
reg [1:0] state;
reg [(CS-1):0] wr_addr;
reg [(DW-1):0] cached_idata, cached_rdata;
reg [DW-1:0] pre_data;
reg lock_gbl, lock_lcl;
 
 
// To simplify writing to the cache, and the job of the synthesizer to
// recognize that a cache write needs to take place, we'll take an extra
// clock to get there, and use these c_w... registers to capture the
// data in the meantime.
reg c_wr;
reg [31:0] c_wdata;
reg [(DW-1):0] c_wdata;
reg [(DW/8-1):0] c_wsel;
reg [(CS-1):0] c_waddr;
 
reg [(AW-LS-1):0] last_tag;
reg last_tag_valid;
 
 
wire [(LGNLINES-1):0] i_cline;
wire [(CS-1):0] i_caddr;
wire [(AW-LS-1):0] i_ctag;
 
assign i_cline = i_addr[(CS-1):LS];
assign i_caddr = i_addr[(CS-1):0];
assign i_ctag = i_addr[(AW-1):LS];
`ifdef FORMAL
reg [F_LGDEPTH-1:0] f_fill;
reg [AW:0] f_return_address;
reg [AW:0] f_pending_addr;
`endif
 
assign i_cline = i_addr[(CS+1):LS+2];
assign i_caddr = i_addr[(CS+1):2];
 
wire cache_miss_inow, w_cachable;
assign cache_miss_inow = (last_tag != i_addr[31:LS])||(!c_v[i_cline]);
assign w_cachable = (i_addr[31:30]!=2'b11)&&(!i_lock)&&(
((SDRAM_BIT>0)&&(i_addr[SDRAM_BIT]))
||((FLASH_BIT>0)&&(i_addr[FLASH_BIT]))
||((BLKRAM_BIT>0)&&(i_addr[BLKRAM_BIT])));
assign cache_miss_inow = (!last_tag_valid)
||(last_tag != i_addr[(AW+1):LS+2])
||(!c_v[i_cline]);
 
reg r_cachable, r_svalid, r_dvalid, r_rd, r_cache_miss, r_rvalid;
reg [(AW-1):0] r_addr;
reg [31:0] r_idata, r_ddata, r_rdata;
wire raw_cachable_address;
 
iscachable chkaddress(i_addr[AW+1:2], raw_cachable_address);
 
assign w_cachable = ((!OPT_LOCAL_BUS)||(i_addr[(DW-1):(DW-8)]!=8'hff))
&&((!i_lock)||(!OPT_LOCK))&&(raw_cachable_address);
 
reg r_cachable, r_svalid, r_dvalid, r_rd, r_cache_miss,
r_rd_pending;
reg [(AW-1):0] r_addr;
wire [(LGNLINES-1):0] r_cline;
wire [(CS-1):0] r_caddr;
wire [(AW-LS-1):0] r_ctag;
wire [(CS-1):0] r_caddr;
wire [(AW-LS-1):0] r_ctag;
 
assign r_cline = r_addr[(CS-1):LS];
assign r_caddr = r_addr[(CS-1):0];
155,9 → 213,14
assign r_ctag = r_addr[(AW-1):LS];
 
 
reg wr_cstb, r_iv, pipeable_op, non_pipeable_op, in_cache;
reg wr_cstb, r_iv, in_cache;
reg [(AW-LS-1):0] r_itag;
reg [DW/8-1:0] r_sel;
reg [(NAUX+4-1):0] req_data;
reg gie;
 
 
 
//
// The one-clock delayed read values from the cache.
//
165,15 → 228,28
initial r_cachable = 1'b0;
initial r_svalid = 1'b0;
initial r_dvalid = 1'b0;
initial r_cache_miss = 1'b0;
initial r_addr = 0;
initial last_tag_valid = 0;
initial r_rd_pending = 0;
always @(posedge i_clk)
if (i_reset)
begin
r_rd <= 1'b0;
r_cachable <= 1'b0;
r_svalid <= 1'b0;
r_dvalid <= 1'b0;
r_cache_miss <= 1'b0;
r_addr <= 0;
r_rd_pending <= 0;
last_tag_valid <= 0;
end else begin
// The single clock path
r_idata <= c_mem[i_addr[(CS-1):0]];
// The valid for the single clock path
// Only ... we need to wait if we are currently writing
// to our cache.
r_svalid<= (!i_op)&&(!cache_miss_inow)&&(w_cachable)
&&(i_pipe_stb)&&(!c_wr)&&(!wr_cstb);
r_svalid<= (i_pipe_stb)&&(!i_op[0])&&(w_cachable)
&&(!cache_miss_inow)&&(!c_wr)&&(!wr_cstb);
 
//
// The two clock in-cache path
180,25 → 256,37
//
// Some preliminaries that needed to be calculated on the first
// clock
if (!o_busy)
if ((!o_pipe_stalled)&&(!r_rd_pending))
r_addr <= i_addr[(AW+1):2];
if ((!o_pipe_stalled)&&(!r_rd_pending))
begin
r_iv <= c_v[i_cline];
r_itag <= c_vtags[i_cline];
r_addr <= i_addr;
r_cachable <= (!i_op)&&(w_cachable)&&(i_pipe_stb);
r_cachable <= (!i_op[0])&&(w_cachable)&&(i_pipe_stb);
r_rd_pending <= (i_pipe_stb)&&(!i_op[0])&&(w_cachable)
&&((cache_miss_inow)||(c_wr)||(wr_cstb));
// &&((!c_wr)||(!wr_cstb));
end else begin
r_iv <= c_v[r_cline];
r_itag <= c_vtags[r_cline];
r_rd_pending <= (r_rd_pending)
&&((!cyc)||(!i_wb_err))
&&((r_itag != r_ctag)||(!r_iv));
end
// r_idata still contains the right answer
r_rd <= (i_pipe_stb)&&(!i_op);
r_ddata <= r_idata;
r_rd <= (i_pipe_stb)&&(!i_op[0]);
// r_itag contains the tag we didn't have available to us on the
// last clock, r_ctag is a bit select from r_addr containing a
// one clock delayed address.
r_dvalid <= (r_itag == r_ctag)&&(r_iv)&&(r_cachable);
if ((r_itag == r_ctag)&&(r_iv)&&(r_cachable))
r_dvalid <= (!r_svalid)&&(!r_dvalid)&&(r_itag == r_ctag)&&(r_iv)
&&(r_cachable)&&(r_rd_pending);
if ((r_itag == r_ctag)&&(r_iv)&&(r_cachable)&&(r_rd_pending))
begin
last_tag_valid <= 1'b1;
last_tag <= r_ctag;
end else if ((state == DC_READC)
&&(last_tag[CS-LS-1:0]==o_wb_addr[CS-1:LS])
&&((i_wb_ack)||(i_wb_err)))
last_tag_valid <= 1'b0;
 
// r_cache miss takes a clock cycle. It is only ever true for
// something that should be cachable, but isn't in the cache.
214,63 → 302,453
// Two clock path -- misses as well
&&(r_rd)&&(!r_svalid)
&&((r_itag != r_ctag)||(!r_iv));
end
 
r_rdata <= c_mem[r_addr[(CS-1):0]];
r_rvalid<= ((i_wb_ack)&&(last_ack));
initial r_sel = 4'hf;
always @(posedge i_clk)
if (i_reset)
r_sel <= 4'hf;
else if (!o_pipe_stalled)
begin
casez({i_op[2:1], i_addr[1:0]})
4'b0???: r_sel <= 4'b1111;
4'b100?: r_sel <= 4'b1100;
4'b101?: r_sel <= 4'b0011;
4'b1100: r_sel <= 4'b1000;
4'b1101: r_sel <= 4'b0100;
4'b1110: r_sel <= 4'b0010;
4'b1111: r_sel <= 4'b0001;
endcase
end
 
`define DC_IDLE 2'b00
`define DC_WRITE 2'b01
`define DC_READS 2'b10
`define DC_READC 2'b11
reg [1:0] state;
assign o_wb_sel = (state == DC_READC) ? 4'hf : r_sel;
 
reg [(AW-LS-1):0] wr_wtag, wr_vtag;
reg [31:0] wr_data;
reg [(CS-1):0] wr_addr;
initial o_wb_data = 0;
always @(posedge i_clk)
if (i_reset)
o_wb_data <= 0;
else if ((!o_busy)||((stb)&&(!i_wb_stall)))
begin
casez(i_op[2:1])
2'b0?: o_wb_data <= i_data;
2'b10: o_wb_data <= { (2){i_data[15:0]} };
2'b11: o_wb_data <= { (4){i_data[ 7:0]} };
endcase
end
 
generate if (OPT_PIPE)
begin : OPT_PIPE_FIFO
reg [NAUX+4-2:0] fifo_data [0:((1<<OPT_FIFO_DEPTH)-1)];
 
reg [DP:0] wraddr, rdaddr;
 
always @(posedge i_clk)
if (i_pipe_stb)
fifo_data[wraddr[DP-1:0]]
<= { i_oreg[NAUX-2:0], i_op[2:1], i_addr[1:0] };
 
always @(posedge i_clk)
if (i_pipe_stb)
gie <= i_oreg[NAUX-1];
 
`ifdef NO_BKRAM
reg [NAUX+4-2:0] r_req_data, r_last_data;
reg single_write;
 
always @(posedge i_clk)
r_req_data <= fifo_data[rdaddr[DP-1:0]];
 
always @(posedge i_clk)
single_write <= (rdaddr == wraddr)&&(i_pipe_stb);
 
always @(posedge i_clk)
if (i_pipe_stb)
r_last_data <= { i_oreg[NAUX-2:0],
i_op[2:1], i_addr[1:0] };
 
always @(*)
begin
req_data[NAUX+4-1] = gie;
// if ((r_svalid)||(state == DC_READ))
if (single_write)
req_data[NAUX+4-2:0] = r_last_data;
else
req_data[NAUX+4-2:0] = r_req_data;
end
 
always @(*)
`ASSERT(req_data == fifo_data[rdaddr[DP-1:0]]);
`else
always @(*)
req_data[NAUX+4-2:0] = fifo_data[rdaddr[DP-1:0]];
always @(*)
req_data[NAUX+4-1] = gie;
`endif
 
initial wraddr = 0;
always @(posedge i_clk)
if ((i_reset)||((cyc)&&(i_wb_err)))
wraddr <= 0;
else if (i_pipe_stb)
wraddr <= wraddr + 1'b1;
 
initial rdaddr = 0;
always @(posedge i_clk)
if ((i_reset)||((cyc)&&(i_wb_err)))
rdaddr <= 0;
else if ((r_dvalid)||(r_svalid))
rdaddr <= rdaddr + 1'b1;
else if ((state == DC_WRITE)&&(i_wb_ack))
rdaddr <= rdaddr + 1'b1;
else if ((state == DC_READS)&&(i_wb_ack))
rdaddr <= rdaddr + 1'b1;
 
`ifdef FORMAL
reg [AW-1:0] f_fifo_addr [0:((1<<OPT_FIFO_DEPTH)-1)];
reg [F_LGDEPTH-1:0] f_last_wraddr;
reg f_pc_pending;
 
always @(*)
begin
f_fill = 0;
f_fill[DP:0] = wraddr - rdaddr;
end
 
always @(*)
`ASSERT(f_fill <= { 1'b1, {(DP){1'b0}} });
 
always @(*)
if ((r_dvalid)||(r_svalid))
begin
if (r_svalid)
`ASSERT(f_fill == 1);
else if (r_dvalid)
`ASSERT(f_fill == 1);
else
`ASSERT(f_fill == 0);
end else if (r_rd_pending)
`ASSERT(f_fill == 1);
else
`ASSERT(f_fill == npending);
 
 
initial f_pc_pending = 0;
always @(posedge i_clk)
if (i_reset)
f_pc_pending <= 1'b0;
else if (i_pipe_stb)
f_pc_pending <= (!i_op[0])&&(i_oreg[3:1] == 3'h7);
else if (f_fill == 0)
f_pc_pending <= 1'b0;
//else if ((o_valid)&&(o_wreg[3:1] == 3'h7)&&(f_fill == 0))
// f_pc_pending <= 1'b0;
 
always @(posedge i_clk)
if (f_pc_pending)
`ASSUME(!i_pipe_stb);
 
always @(posedge i_clk)
if (state == DC_WRITE)
`ASSERT(!f_pc_pending);
 
always @(*)
begin
f_last_wraddr = 0;
f_last_wraddr[DP:0] = wraddr - 1'b1;
end
 
always @(posedge i_clk)
if (r_rd_pending)
`ASSERT(f_pc_pending == (fifo_data[f_last_wraddr][7:5] == 3'h7));
 
`define INSPECT_FIFO
reg [((1<<(DP+1))-1):0] f_valid_fifo_entry;
 
genvar k;
for(k=0; k<(1<<(DP+1)); k=k+1)
begin
 
always @(*)
begin
f_valid_fifo_entry[k] = 1'b0;
/*
if ((rdaddr[DP] != wraddr[DP])
&&(rdaddr[DP-1:0] == wraddr[DP-1:0]))
f_valid_fifo_entry[k] = 1'b1;
else */
if ((rdaddr < wraddr)&&(k < wraddr)
&&(k >= rdaddr))
f_valid_fifo_entry[k] = 1'b1;
else if ((rdaddr > wraddr)&&(k >= rdaddr))
f_valid_fifo_entry[k] = 1'b1;
else if ((rdaddr > wraddr)&&(k < wraddr))
f_valid_fifo_entry[k] = 1'b1;
end
 
`ifdef INSPECT_FIFO
wire [NAUX+4-2:0] fifo_data_k;
 
assign fifo_data_k = fifo_data[k[DP-1:0]];
always @(*)
if (f_valid_fifo_entry[k])
begin
if (!f_pc_pending)
`ASSERT((o_wb_we)||(fifo_data_k[7:5] != 3'h7));
else if (k != f_last_wraddr)
`ASSERT(fifo_data_k[7:5] != 3'h7);
end
`endif // INSPECT_FIFO
 
end
 
`ifndef INSPECT_FIFO
always @(posedge i_clk)
if ((r_rd_pending)&&(rdaddr[DP:0] != f_last_wraddr[DP-1]))
assume(fifo_data[rdaddr][7:5] != 3'h7);
`endif // INSPECT_FIFO
 
assign f_pc = f_pc_pending;
 
//
//
//
always @(*)
f_pending_addr = f_fifo_addr[rdaddr];
 
//
//
//
always @(posedge i_clk)
if (i_pipe_stb)
f_fifo_addr[wraddr[DP-1:0]] <= i_addr[AW+1:2];
 
always @(*)
begin
f_return_address[AW] = (o_wb_cyc_lcl);
f_return_address[AW-1:0] = f_fifo_addr[rdaddr];
if (state == DC_READC)
f_return_address[LS-1:0]
= (o_wb_addr[LS-1:0] - f_outstanding[LS-1:0]);
end
 
`define TWIN_WRITE_TEST
`ifdef TWIN_WRITE_TEST
(* anyconst *) reg [DP:0] f_twin_base;
reg [DP:0] f_twin_next;
(* anyconst *) reg [AW+NAUX+4-2-1:0] f_twin_first,
f_twin_second;
// reg [AW-1:0] f_fifo_addr [0:((1<<OPT_FIFO_DEPTH)-1)];
// reg [NAUX+4-2:0] fifo_data [0:((1<<OPT_FIFO_DEPTH)-1)];
 
always @(*) f_twin_next = f_twin_base+1;
 
reg f_twin_none, f_twin_single, f_twin_double, f_twin_last;
reg f_twin_valid_one, f_twin_valid_two;
always @(*)
begin
f_twin_valid_one = ((f_valid_fifo_entry[f_twin_base])
&&(f_twin_first == { f_fifo_addr[f_twin_base],
fifo_data[f_twin_base] }));
f_twin_valid_two = ((f_valid_fifo_entry[f_twin_next])
&&(f_twin_second == { f_fifo_addr[f_twin_next],
fifo_data[f_twin_next] }));
end
 
always @(*)
begin
f_twin_none =(!f_twin_valid_one)&&(!f_twin_valid_two);
f_twin_single =( f_twin_valid_one)&&(!f_twin_valid_two);
f_twin_double =( f_twin_valid_one)&&( f_twin_valid_two);
f_twin_last =(!f_twin_valid_one)&&( f_twin_valid_two);
end
 
always @(posedge i_clk)
if ((!f_past_valid)||($past(i_reset))||($past(cyc && i_wb_err)))
`ASSERT(f_twin_none);
else if ($past(f_twin_none))
`ASSERT(f_twin_none || f_twin_single || f_twin_last);
else if ($past(f_twin_single))
`ASSERT(f_twin_none || f_twin_single || f_twin_double || f_twin_last);
else if ($past(f_twin_double))
`ASSERT(f_twin_double || f_twin_last);
else if ($past(f_twin_last))
`ASSERT(f_twin_none || f_twin_single || f_twin_last);
 
`endif // TWIN_WRITE_TEST
 
always @(*)
`ASSERT(req_data == { gie, fifo_data[rdaddr[DP-1:0]] });
 
always @(posedge i_clk)
if (r_svalid||r_dvalid || r_rd_pending)
`ASSERT(f_fill == 1);
else if (f_fill > 0)
`ASSERT(cyc);
 
always @(posedge i_clk)
if (state != 0)
`ASSERT(f_fill > 0);
else if (!r_svalid && !r_dvalid && !r_rd_pending)
`ASSERT(f_fill == 0);
 
`endif // FORMAL
 
always @(posedge i_clk)
o_wreg <= req_data[(NAUX+4-1):4];
 
/*
reg fifo_err;
always @(posedge i_clk)
begin
fifo_err <= 1'b0;
if ((!o_busy)&&(rdaddr != wraddr))
fifo_err <= 1'b1;
if ((!r_dvalid)&&(!r_svalid)&&(!r_rd_pending))
fifo_err <= (npending != (wraddr-rdaddr));
end
 
always @(*)
o_debug = { i_pipe_stb, state, cyc, stb, // 5b
fifo_err, i_oreg[3:0], o_wreg, // 10b
rdaddr, wraddr, // 10b
i_wb_ack, i_wb_err, o_pipe_stalled, o_busy,//4b
r_svalid, r_dvalid, r_rd_pending };
*/
end else begin : NO_FIFO
 
always @(posedge i_clk)
if (i_pipe_stb)
req_data <= { i_oreg, i_op[2:1], i_addr[1:0] };
 
always @(*)
o_wreg = req_data[(NAUX+4-1):4];
 
always @(*)
gie = i_oreg[NAUX-1];
 
`ifdef FORMAL
assign f_pc = ((r_rd_pending)||(o_valid))&&(o_wreg[3:1] == 3'h7);
 
//
//
//
initial f_pending_addr = 0;
always @(posedge i_clk)
if (i_reset)
f_pending_addr <= 0;
else if (i_pipe_stb)
begin
f_pending_addr <= { (OPT_LOCAL_BUS)&&(&i_addr[DW-1:DW-8]),
i_addr[(AW+1):2] };
end
 
//
//
always @(*)
begin
f_return_address[AW] = o_wb_cyc_lcl;
f_return_address[AW-1:LS] = o_wb_addr[AW-1:LS];
end
always @(*)
if (state == DC_READS)
f_return_address[LS-1:0] = o_wb_addr[LS-1:0];
else
f_return_address[LS-1:0]
= (o_wb_addr[LS-1:0] - f_outstanding[LS-1:0]);
 
`endif
/*
always @(*)
o_debug = { i_pipe_stb, state, cyc, stb, // 5b
i_oreg, o_wreg, // 10b
10'hb, // 10b
i_wb_ack, i_wb_err, o_pipe_stalled, o_busy,//4b
r_svalid, r_dvalid, r_rd_pending };
*/
 
// verilator lint_off UNUSED
wire unused_no_fifo;
assign unused_no_fifo = gie;
// verilator lint_on UNUSED
end endgenerate
 
 
initial r_wb_cyc_gbl = 0;
initial r_wb_cyc_lcl = 0;
initial o_wb_stb_gbl = 0;
initial o_wb_stb_lcl = 0;
initial c_v = 0;
initial cyc = 0;
initial stb = 0;
initial c_wr = 0;
initial wr_cstb = 0;
initial state = DC_IDLE;
initial set_vflag = 1'b0;
always @(posedge i_clk)
if (i_reset)
begin
c_v <= 0;
c_wr <= 1'b0;
c_wsel <= 4'hf;
r_wb_cyc_gbl <= 1'b0;
r_wb_cyc_lcl <= 1'b0;
o_wb_stb_gbl <= 0;
o_wb_stb_lcl <= 0;
wr_cstb <= 1'b0;
last_line_stb <= 1'b0;
end_of_line <= 1'b0;
state <= DC_IDLE;
cyc <= 1'b0;
stb <= 1'b0;
state <= DC_IDLE;
set_vflag <= 1'b0;
end else begin
// By default, update the cache from the write 1-clock ago
c_wr <= (wr_cstb)&&(wr_wtag == wr_vtag);
c_wdata <= wr_data;
c_waddr <= wr_addr[(CS-1):0];
// c_wr <= (wr_cstb)&&(wr_wtag == wr_vtag);
// c_waddr <= wr_addr[(CS-1):0];
c_wr <= 0;
 
set_vflag <= 1'b0;
if ((!cyc)&&(set_vflag))
c_v[c_waddr[(CS-1):LS]] <= 1'b1;
 
wr_cstb <= 1'b0;
wr_vtag <= c_vtags[o_wb_addr[(CS-LS-1):0]];
wr_wtag <= o_wb_addr[(AW-LS-1):0];
wr_data <= o_wb_data;
wr_addr <= o_wb_addr[(CS-1):0];
 
if (!cyc)
wr_addr <= r_addr[(CS-1):0];
else if (i_wb_ack)
wr_addr <= wr_addr + 1'b1;
else
wr_addr <= wr_addr;
 
if (LS <= 1)
if (LS <= 0)
end_of_line <= 1'b1;
else
end_of_line<=(cyc)&&((c_waddr[(LS-1):1]=={(LS-1){1'b1}})
||((i_wb_ack)
&&(c_waddr[(LS-1):0]=={{(LS-2){1'b1}},2'b01})));
else if (!cyc)
end_of_line <= 1'b0;
else if (!end_of_line)
begin
if (i_wb_ack)
end_of_line
<= (c_waddr[(LS-1):0] == {{(LS-2){1'b1}},2'b01});
else
end_of_line
<= (c_waddr[(LS-1):0]=={{(LS-1){1'b1}}, 1'b0});
end
 
if (LS <= 1)
if (!cyc)
last_line_stb <= (LS <= 0);
else if ((stb)&&(!i_wb_stall)&&(LS <= 1))
last_line_stb <= 1'b1;
else
last_line_stb <= (stb)&&
((o_wb_addr[(LS-1):1]=={(LS-1){1'b1}})
||((!i_wb_stall)
&&(o_wb_addr[(LS-1):0]
=={{(LS-2){1'b1}},2'b01})));
else if ((stb)&&(!i_wb_stall))
last_line_stb <= (o_wb_addr[(LS-1):1]=={(LS-1){1'b1}});
else if (stb)
last_line_stb <= (o_wb_addr[(LS-1):0]=={(LS){1'b1}});
 
//
if (state == `DC_IDLE)
pipeable_op <= 1'b0;
if (state == `DC_IDLE)
non_pipeable_op <= 1'b0;
 
 
if (state == `DC_IDLE)
//
if (state == DC_IDLE)
begin
o_wb_we <= 1'b0;
o_wb_data <= i_data;
pipeable_op <= 1'b0;
non_pipeable_op <= 1'b1;
 
cyc <= 1'b0;
stb <= 1'b0;
280,47 → 758,58
o_wb_stb_gbl <= 1'b0;
o_wb_stb_lcl <= 1'b0;
 
in_cache <= (i_op)&&(w_cachable);
if ((i_pipe_stb)&&(i_op))
in_cache <= (i_op[0])&&(w_cachable);
if ((i_pipe_stb)&&(i_op[0]))
begin // Write operation
state <= `DC_WRITE;
o_wb_addr <= i_addr;
state <= DC_WRITE;
o_wb_addr <= i_addr[(AW+1):2];
o_wb_we <= 1'b1;
pipeable_op <= 1'b1;
 
cyc <= 1'b1;
stb <= 1'b1;
 
r_wb_cyc_gbl <= (i_addr[31:30]!=2'b11);
r_wb_cyc_lcl <= (i_addr[31:30]==2'b11);
o_wb_stb_gbl <= (i_addr[31:30]!=2'b11);
o_wb_stb_lcl <= (i_addr[31:30]==2'b11);
if (OPT_LOCAL_BUS)
begin
r_wb_cyc_gbl <= (i_addr[DW-1:DW-8]!=8'hff);
r_wb_cyc_lcl <= (i_addr[DW-1:DW-8]==8'hff);
o_wb_stb_gbl <= (i_addr[DW-1:DW-8]!=8'hff);
o_wb_stb_lcl <= (i_addr[DW-1:DW-8]==8'hff);
end else begin
r_wb_cyc_gbl <= 1'b1;
o_wb_stb_gbl <= 1'b1;
end
 
end else if (r_cache_miss)
begin
state <= `DC_READC;
o_wb_addr <= { i_ctag, {(LS){1'b0}} };
non_pipeable_op <= 1'b1;
state <= DC_READC;
o_wb_addr <= { r_ctag, {(LS){1'b0}} };
 
c_waddr <= { r_ctag[CS-LS-1:0], {(LS){1'b0}} }-1'b1;
cyc <= 1'b1;
stb <= 1'b1;
r_wb_cyc_gbl <= 1'b1;
o_wb_stb_gbl <= 1'b1;
wr_addr[LS-1:0] <= 0;
end else if ((i_pipe_stb)&&(!w_cachable))
begin // Read non-cachable memory area
state <= `DC_READS;
o_wb_addr <= i_addr;
pipeable_op <= 1'b1;
state <= DC_READS;
o_wb_addr <= i_addr[(AW+1):2];
 
cyc <= 1'b1;
stb <= 1'b1;
r_wb_cyc_gbl <= (i_addr[31:30]!=2'b11);
r_wb_cyc_lcl <= (i_addr[31:30]==2'b11);
o_wb_stb_gbl <= (i_addr[31:30]!=2'b11);
o_wb_stb_lcl <= (i_addr[31:30]==2'b11);
if (OPT_LOCAL_BUS)
begin
r_wb_cyc_gbl <= (i_addr[DW-1:DW-8]!=8'hff);
r_wb_cyc_lcl <= (i_addr[DW-1:DW-8]==8'hff);
o_wb_stb_gbl <= (i_addr[DW-1:DW-8]!=8'hff);
o_wb_stb_lcl <= (i_addr[DW-1:DW-8]==8'hff);
end else begin
r_wb_cyc_gbl <= 1'b1;
o_wb_stb_gbl <= 1'b1;
end
end // else we stay idle
 
end else if (state == `DC_READC)
end else if (state == DC_READC)
begin
// We enter here once we have committed to reading
// data into a cache line.
331,26 → 820,31
o_wb_addr[(LS-1):0] <= o_wb_addr[(LS-1):0]+1'b1;
end
 
if(stb)
c_v[o_wb_addr[(CS-LS-1):0]] <= 1'b0;
if ((i_wb_ack)&&(!end_of_line))
c_v[o_wb_addr[(CS-1):LS]] <= 1'b0;
 
c_wr <= (i_wb_ack);
c_wdata <= o_wb_data;
c_waddr <= ((c_wr)?(c_waddr+1'b1):c_waddr);
c_wr <= (i_wb_ack);
c_wdata <= i_wb_data;
c_waddr <= ((i_wb_ack)?(c_waddr+1'b1):c_waddr);
c_wsel <= 4'hf;
 
c_vtags[o_wb_addr[(CS-LS-1):0]]<= o_wb_addr[(AW-LS-1):0];
set_vflag <= !i_wb_err;
if (i_wb_ack)
c_vtags[r_addr[(CS-1):LS]]
<= r_addr[(AW-1):LS];
 
if (((i_wb_ack)&&(end_of_line))||(i_wb_err))
begin
state <= `DC_IDLE;
non_pipeable_op <= 1'b0;
state <= DC_IDLE;
cyc <= 1'b0;
stb <= 1'b0;
r_wb_cyc_gbl <= 1'b0;
r_wb_cyc_lcl <= 1'b0;
o_wb_stb_gbl <= 1'b0;
o_wb_stb_lcl <= 1'b0;
//
c_v[o_wb_addr[(CS-LS-1):0]] <= i_wb_ack;
end
end else if (state == `DC_READS)
end else if (state == DC_READS)
begin
// We enter here once we have committed to reading
// data that cannot go into a cache line
359,27 → 853,31
stb <= 1'b0;
o_wb_stb_gbl <= 1'b0;
o_wb_stb_lcl <= 1'b0;
pipeable_op <= 1'b0;
end
 
if ((!i_wb_stall)&&(i_pipe_stb))
o_wb_addr <= i_data;
o_wb_addr <= i_addr[(AW+1):2];
 
c_wr <= 1'b0;
 
if (((i_wb_ack)&&(last_ack))||(i_wb_err))
begin
state <= `DC_IDLE;
state <= DC_IDLE;
cyc <= 1'b0;
stb <= 1'b0;
r_wb_cyc_gbl <= 1'b0;
r_wb_cyc_lcl <= 1'b0;
o_wb_stb_gbl <= 1'b0;
o_wb_stb_lcl <= 1'b0;
end
end else if (state == `DC_WRITE)
end else if (state == DC_WRITE)
begin
// c_wr <= (c_v[])&&(c_tag[])&&(in_cache)&&(stb);
c_wr <= (stb)&&(c_v[o_wb_addr[CS-1:LS]])
&&(c_vtags[o_wb_addr[CS-1:LS]]==o_wb_addr[AW-1:LS])
&&(stb);
c_wdata <= o_wb_data;
c_waddr <= (state == `DC_IDLE)?i_caddr
: ((c_wr)?(c_waddr+1'b1):c_waddr);
c_waddr <= r_addr[CS-1:0];
c_wsel <= o_wb_sel;
 
if ((!i_wb_stall)&&(!i_pipe_stb))
begin
386,27 → 884,79
stb <= 1'b0;
o_wb_stb_gbl <= 1'b0;
o_wb_stb_lcl <= 1'b0;
pipeable_op <= 1'b0;
end
 
wr_cstb <= (stb)&&(!i_wb_stall)&&(in_cache);
 
if ((stb)&&(!i_wb_stall)&&(i_pipe_stb))
o_wb_addr <= i_addr;
if ((stb)&&(!i_wb_stall)&&(i_pipe_stb))
o_wb_data <= i_data;
if (((i_wb_ack)&&(last_ack))||(i_wb_err))
if ((stb)&&(!i_wb_stall))
o_wb_addr <= i_addr[(AW+1):2];
 
if (((i_wb_ack)&&(last_ack)
&&((!OPT_PIPE)||(!i_pipe_stb)))
||(i_wb_err))
begin
state <= `DC_IDLE;
state <= DC_IDLE;
cyc <= 1'b0;
stb <= 1'b0;
r_wb_cyc_gbl <= 1'b0;
r_wb_cyc_lcl <= 1'b0;
o_wb_stb_gbl <= 1'b0;
o_wb_stb_lcl <= 1'b0;
end
end
end
 
//
// npending is the number of outstanding (non-cached) read or write
// requests
initial npending = 0;
always @(posedge i_clk)
if ((i_reset)||(!OPT_PIPE)
||((cyc)&&(i_wb_err))
||((!cyc)&&(!i_pipe_stb))
||(state == DC_READC))
npending <= 0;
else if (r_svalid)
npending <= (i_pipe_stb) ? 1:0;
else case({ (i_pipe_stb), (cyc)&&(i_wb_ack) })
2'b01: npending <= npending - 1'b1;
2'b10: npending <= npending + 1'b1;
default: begin end
endcase
 
initial last_ack = 1'b0;
always @(posedge i_clk)
if (i_reset)
last_ack <= 1'b0;
else if (state == DC_IDLE)
begin
last_ack <= 1'b0;
if ((i_pipe_stb)&&(i_op[0]))
last_ack <= 1'b1;
else if (r_cache_miss)
last_ack <= (LS == 0);
else if ((i_pipe_stb)&&(!w_cachable))
last_ack <= 1'b1;
end else if (state == DC_READC)
begin
if (i_wb_ack)
last_ack <= last_ack || (&wr_addr[LS-1:1]);
else
last_ack <= last_ack || (&wr_addr[LS-1:0]);
end else case({ (i_pipe_stb), (i_wb_ack) })
2'b01: last_ack <= (npending <= 2);
2'b10: last_ack <= (!cyc)||(npending == 0);
default: begin end
endcase
 
`ifdef FORMAL
always @(*)
`ASSERT(npending <= { 1'b1, {(DP){1'b0}} });
 
`endif
 
 
//
// Writes to the cache
//
// These have been made as simple as possible. Note that the c_wr
413,10 → 963,19
// line has already been determined, as have the write value and address
// on the last clock. Further, this structure is defined to match the
// block RAM design of as many architectures as possible.
//
//
always @(posedge i_clk)
if (c_wr)
c_mem[c_waddr] <= c_wdata;
if (c_wr)
begin
if (c_wsel[0])
c_mem[c_waddr][7:0] <= c_wdata[7:0];
if (c_wsel[1])
c_mem[c_waddr][15:8] <= c_wdata[15:8];
if (c_wsel[2])
c_mem[c_waddr][23:16] <= c_wdata[23:16];
if (c_wsel[3])
c_mem[c_waddr][31:24] <= c_wdata[31:24];
end
 
//
// Reads from the cache
426,82 → 985,1101
// going to be our output will need to be determined with combinatorial
// logic on the output.
//
reg [31:0] cached_idata, cached_rdata;
always @(posedge i_clk)
cached_idata <= c_mem[i_caddr];
generate if (OPT_DUAL_READ_PORT)
begin
 
always @(posedge i_clk)
cached_rdata <= c_mem[r_caddr];
always @(posedge i_clk)
cached_idata <= c_mem[i_caddr];
 
always @(posedge i_clk)
cached_rdata <= c_mem[r_caddr];
 
end else begin
 
always @(posedge i_clk)
cached_rdata <= c_mem[(o_busy) ? r_caddr : i_caddr];
 
always @(*)
cached_idata = cached_rdata;
 
end endgenerate
 
// o_data can come from one of three places:
// 1. The cache, assuming the data was in the last cache line
// 2. The cache, second clock, assuming the data was in the cache at all
// 3. The cache, after filling the cache
// 4. The wishbone state machine, upon reading the value desired.
always @(posedge i_clk)
always @(*)
if (r_svalid)
o_data <= cached_idata;
else if ((i_wb_ack)&&(pipeable_op))
o_data <= i_wb_data;
pre_data = cached_idata;
else if (state == DC_READS)
pre_data = i_wb_data;
else
o_data <= cached_rdata;
pre_data = cached_rdata;
 
always @(posedge i_clk)
o_valid <= (r_svalid)||((i_wb_ack)&&(pipeable_op))
||(r_dvalid)||(r_rvalid);
casez(req_data[3:0])
4'b100?: o_data <= { 16'h0, pre_data[31:16] };
4'b101?: o_data <= { 16'h0, pre_data[15: 0] };
4'b1100: o_data <= { 24'h0, pre_data[31:24] };
4'b1101: o_data <= { 24'h0, pre_data[23:16] };
4'b1110: o_data <= { 24'h0, pre_data[15: 8] };
4'b1111: o_data <= { 24'h0, pre_data[ 7: 0] };
default o_data <= pre_data;
endcase
 
initial o_valid = 1'b0;
always @(posedge i_clk)
if (i_reset)
o_valid <= 1'b0;
else if (state == DC_READS)
o_valid <= i_wb_ack;
else
o_valid <= (r_svalid)||(r_dvalid);
 
initial o_err = 1'b0;
always @(posedge i_clk)
if (i_reset)
o_err <= 1'b0;
else
o_err <= (cyc)&&(i_wb_err);
 
assign o_busy = (state != `DC_IDLE);
initial o_busy = 0;
always @(posedge i_clk)
if ((i_reset)||((cyc)&&(i_wb_err)))
o_busy <= 1'b0;
else if (i_pipe_stb)
o_busy <= 1'b1;
else if ((state == DC_READS)&&(i_wb_ack))
o_busy <= 1'b0;
else if ((r_rd_pending)&&(!r_dvalid))
o_busy <= 1'b1;
else if ((state == DC_WRITE)
&&(i_wb_ack)&&(last_ack)&&(!i_pipe_stb))
o_busy <= 1'b0;
else if (cyc)
o_busy <= 1'b1;
else // if ((r_dvalid)||(r_svalid))
o_busy <= 1'b0;
 
//
// We can use our FIFO addresses to pre-calculate when an ACK is going
// to be the last_noncachable_ack.
 
 
always @(*)
if (OPT_PIPE)
o_pipe_stalled = (cyc)&&((!o_wb_we)||(i_wb_stall)||(!stb))
||(r_rd_pending)||(npending[DP]);
else
o_pipe_stalled = o_busy;
 
initial lock_gbl = 0;
initial lock_lcl = 0;
always @(posedge i_clk)
if (i_reset)
begin
lock_gbl <= 1'b0;
lock_lcl<= 1'b0;
end else begin
lock_gbl <= (OPT_LOCK)&&(i_lock)&&((r_wb_cyc_gbl)||(lock_gbl));
lock_lcl <= (OPT_LOCK)&&(i_lock)&&((r_wb_cyc_lcl)||(lock_lcl));
end
 
assign o_wb_cyc_gbl = (r_wb_cyc_gbl)||(lock_gbl);
assign o_wb_cyc_lcl = (r_wb_cyc_lcl)||(lock_lcl);
 
generate if (AW+2 < DW)
begin : UNUSED_BITS
 
// Verilator lint_off UNUSED
wire [DW-AW-2:0] unused;
assign unused = i_addr[DW-1:AW+1];
// Verilator lint_on UNUSED
end endgenerate
 
`ifdef FORMAL
 
initial f_past_valid = 1'b0;
always @(posedge i_clk)
f_past_valid <= 1'b1;
 
////////////////////////////////////////////////
//
// Handle our auxilliary data lines.
// Reset properties
//
// These just go into a FIFO upon request, and then get fed back out
// upon completion of an OP.
////////////////////////////////////////////////
//
// These are currently designed for handling bursts of writes or
// non-cachable reads.
//
// A very similar structure will be used once we switch to using an
// MMU, in order to make certain memory operations are synchronous
// enough to deal with bus errors.
always @(*)
if(!f_past_valid)
`ASSUME(i_reset);
 
always @(posedge i_clk)
if ((!f_past_valid)||($past(i_reset)))
begin
// Insist on initial statements matching reset values
`ASSERT(r_rd == 1'b0);
`ASSERT(r_cachable == 1'b0);
`ASSERT(r_svalid == 1'b0);
`ASSERT(r_dvalid == 1'b0);
`ASSERT(r_cache_miss == 1'b0);
`ASSERT(r_addr == 0);
//
`ASSERT(c_wr == 0);
`ASSERT(c_v == 0);
//
// assert(aux_head == 0);
// assert(aux_tail == 0);
//
`ASSERT(lock_gbl == 0);
`ASSERT(lock_lcl == 0);
end
 
////////////////////////////////////////////////
//
reg [(LGAUX-1):0] aux_head, aux_tail;
reg [(NAUX-1):0] aux_fifo [0:((1<<LGAUX)-1)];
initial aux_head = 0;
initial aux_tail = 0;
// Assumptions about our inputs
//
////////////////////////////////////////////////
//
//
always @(*)
if (o_pipe_stalled)
`ASSUME(!i_pipe_stb);
 
always @(*)
if (!f_past_valid)
`ASSUME(!i_pipe_stb);
 
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(i_reset))
&&($past(i_pipe_stb))&&($past(o_pipe_stalled)))
begin
if ((i_rst)||(i_wb_err))
aux_head <= 0;
else if ((i_pipe_stb)&&(!o_busy))
aux_head <= aux_head + 1'b1;
aux_fifo[aux_head] <= i_oreg;
`ASSUME($stable(i_pipe_stb));
`ASSUME($stable(i_op[0]));
`ASSUME($stable(i_addr));
if (i_op[0])
`ASSUME($stable(i_data));
end
 
always @(posedge i_clk)
if (o_err)
`ASSUME(!i_pipe_stb);
 
////////////////////////////////////////////////
//
// Wishbone properties
//
////////////////////////////////////////////////
//
//
wire f_cyc, f_stb;
 
assign f_cyc = (o_wb_cyc_gbl)|(o_wb_cyc_lcl);
assign f_stb = (o_wb_stb_gbl)|(o_wb_stb_lcl);
 
always @(*)
begin
if ((i_rst)||(i_wb_err))
aux_tail <= 0;
else if (o_valid) // ||(aux_tail[WBIT])&&(no-mmu-error)
aux_tail <= aux_tail + 1'b1;
o_wreg <= aux_fifo[aux_tail];
// Only one interface can be active at once
`ASSERT((!o_wb_cyc_gbl)||(!o_wb_cyc_lcl));
// Strobe may only be active on the active interface
`ASSERT((r_wb_cyc_gbl)||(!o_wb_stb_gbl));
`ASSERT((r_wb_cyc_lcl)||(!o_wb_stb_lcl));
if (o_wb_stb_lcl)
begin
if (o_wb_we)
assert(state == DC_WRITE);
else
assert(state == DC_READS);
end
 
if (cyc)
assert(o_wb_we == (state == DC_WRITE));
end
 
always @(posedge i_clk)
if ((f_past_valid)&&(cyc)&&($past(cyc)))
begin
`ASSERT($stable(r_wb_cyc_gbl));
`ASSERT($stable(r_wb_cyc_lcl));
end
 
 
`ifdef DCACHE
`define FWB_MASTER fwb_master
`else
`define FWB_MASTER fwb_counter
`endif
 
`FWB_MASTER #(
.AW(AW), .DW(DW),
.F_MAX_STALL(2),
.F_MAX_ACK_DELAY(3),
// If you need the proof to run faster, use these
// lines instead of the two that follow
// .F_MAX_STALL(1),
// .F_MAX_ACK_DELAY(1),
.F_LGDEPTH(F_LGDEPTH),
.F_MAX_REQUESTS((OPT_PIPE) ? 0 : (1<<LS)),
`ifdef DCACHE
.F_OPT_SOURCE(1'b1),
`endif
.F_OPT_DISCONTINUOUS(0)
) fwb(i_clk, i_reset,
cyc, f_stb, o_wb_we, o_wb_addr, o_wb_data, o_wb_sel,
i_wb_ack, i_wb_stall, i_wb_data, i_wb_err,
f_nreqs, f_nacks, f_outstanding);
 
`ifdef DCACHE // Arbitrary access is specific to local dcache implementation
////////////////////////////////////////////////
//
// We can use our FIFO addresses to pre-calculate when an ACK is going
// to be the last_noncachable_ack.
// Arbitrary address properties
//
////////////////////////////////////////////////
//
//
(* anyconst *) reg [AW:0] f_const_addr;
(* anyconst *) reg f_const_buserr;
wire [AW-LS-1:0] f_const_tag, f_ctag_here, f_wb_tag;
wire [CS-LS-1:0] f_const_tag_addr;
reg [DW-1:0] f_const_data;
wire [DW-1:0] f_cmem_here;
reg f_pending_rd;
wire f_cval_in_cache;
 
assign f_const_tag = f_const_addr[AW-1:LS];
assign f_const_tag_addr = f_const_addr[CS-1:LS];
assign f_cmem_here = c_mem[f_const_addr[CS-1:0]];
assign f_ctag_here = c_vtags[f_const_addr[CS-1:LS]];
assign f_wb_tag = o_wb_addr[AW-1:LS];
 
assign o_pipe_stalled=((pipeable_op)&&(i_wb_stall))||(non_pipeable_op);
// pipeable_op must become zero when stb goes low
assign f_cval_in_cache= (c_v[f_const_addr[CS-1:LS]])
&&(f_ctag_here == f_const_tag);
 
generate if ((AW > DW - 8)&&(OPT_LOCAL_BUS))
begin : UPPER_CONST_ADDR_BITS
 
always @(*)
if (f_const_addr[AW])
assume(&f_const_addr[(AW-1):(DW-8)]);
else
assume(!(&f_const_addr[(AW-1):(DW-8)]));
end endgenerate
 
wire [AW-1:0] wb_start;
assign wb_start = (f_stb) ? (o_wb_addr - f_nreqs) : o_wb_addr;
 
// Data changes upon request
always @(posedge i_clk)
begin
lock_gbl <= (i_lock)&&((r_wb_cyc_gbl)||(lock_gbl));
lock_lcl <= (i_lock)&&((r_wb_cyc_lcl)||(lock_lcl));
if ((i_pipe_stb)&&(i_addr[(AW+1):2] == f_const_addr[AW-1:0])
&&(f_const_addr[AW] == ((OPT_LOCAL_BUS)
&&(&i_addr[(DW-1):(DW-8)])))
&&(i_op[0]))
begin
casez({ i_op[2:1], i_addr[1:0] })
4'b0???: f_const_data <= i_data;
4'b100?: f_const_data[31:16] <= i_data[15:0];
4'b101?: f_const_data[15: 0] <= i_data[15:0];
4'b1100: f_const_data[31:24] <= i_data[ 7:0];
4'b1101: f_const_data[23:16] <= i_data[ 7:0];
4'b1110: f_const_data[15: 8] <= i_data[ 7:0];
4'b1111: f_const_data[ 7: 0] <= i_data[ 7:0];
endcase
end
 
if (f_cval_in_cache)
assume((!i_wb_err)
||(!i_pipe_stb)
||(f_const_addr[AW-1:0] != i_addr[AW+1:2]));
end
 
assign o_wb_cyc_gbl = (r_wb_cyc_gbl)||(lock_gbl);
assign o_wb_cyc_lcl = (r_wb_cyc_lcl)||(lock_lcl);
 
always @(posedge i_clk)
if ((f_past_valid)&&(!i_reset)&&(!f_const_buserr))
begin
if ((cyc)&&(o_wb_we)&&(f_stb)
&&(o_wb_addr[AW-1:0] == f_const_addr[AW-1:0])
&&( o_wb_stb_lcl == f_const_addr[AW]))
begin
 
//
// Changing our data
//
if (o_wb_sel[0])
`ASSERT(o_wb_data[ 7: 0]==f_const_data[ 7: 0]);
if (o_wb_sel[1])
`ASSERT(o_wb_data[15: 8]==f_const_data[15: 8]);
if (o_wb_sel[2])
`ASSERT(o_wb_data[23:16]==f_const_data[23:16]);
if (o_wb_sel[3])
`ASSERT(o_wb_data[31:24]==f_const_data[31:24]);
 
// Check the data in the cache
if ((!f_const_addr[AW])&&(c_v[f_const_tag_addr])
&&(f_ctag_here == o_wb_addr[AW-1:LS]))
begin
if ((!c_wsel[0])&&(!o_wb_sel[0]))
`ASSERT(f_cmem_here[ 7: 0]==f_const_data[ 7: 0]);
if ((!c_wsel[1])&&(!o_wb_sel[1]))
`ASSERT(f_cmem_here[15: 8]==f_const_data[15: 8]);
if ((!c_wsel[2])&&(!o_wb_sel[2]))
`ASSERT(f_cmem_here[23:16]==f_const_data[23:16]);
if ((!c_wsel[3])&&(!o_wb_sel[3]))
`ASSERT(f_cmem_here[31:24]==f_const_data[31:24]);
 
end
end else if ((!f_const_addr[AW])&&(c_v[f_const_tag_addr])
&&(f_ctag_here ==f_const_addr[AW-1:LS]))
begin
// If ...
// 1. Our magic address is cachable
// 2. Our magic address is associated with a valid
// cache line
// 3. The cache tag matches our magic address
 
 
// if ($past(cyc && i_wb_err))
// begin
// Ignore what happens on an error, the result
// becomes undefined anyway
// end else
if ((c_wr)
&&(c_waddr[CS-1:0] == f_const_addr[CS-1:0]))
begin
//
// If we are writing to this valid cache line
//
if (c_wsel[3])
`ASSERT(c_wdata[31:24]
== f_const_data[31:24]);
else
`ASSERT(f_cmem_here[31:24]
== f_const_data[31:24]);
if (c_wsel[2])
`ASSERT(c_wdata[23:16]
== f_const_data[23:16]);
else
`ASSERT(f_cmem_here[23:16] == f_const_data[23:16]);
if (c_wsel[1])
`ASSERT(c_wdata[15:8]
== f_const_data[15:8]);
else
`ASSERT(f_cmem_here[15:8] == f_const_data[15:8]);
if (c_wsel[0])
`ASSERT(c_wdata[7:0]
== f_const_data[7:0]);
else
`ASSERT(f_cmem_here[7:0] == f_const_data[7:0]);
end else
`ASSERT(f_cmem_here == f_const_data);
end
end
 
always @(posedge i_clk)
if ((f_past_valid)&&(state == DC_READC))
begin
`ASSERT(f_wb_tag == r_ctag);
if ((wb_start[AW-1:LS] == f_const_tag)
&&(!c_v[f_const_tag_addr])
&&(f_const_addr[AW] == r_wb_cyc_lcl)
&&(f_nacks > f_const_addr[LS-1:0]))
begin
// We are reading the cache line containing our
// constant address f_const_addr. Make sure the data
// is correct.
if ((c_wr)&&(c_waddr[CS-1:0] == f_const_addr[CS-1:0]))
`ASSERT(c_wdata == f_const_data);
else
`ASSERT(f_cmem_here == f_const_data);
end
 
if (f_nacks > 0)
`ASSERT(!c_v[wb_start[CS-1:LS]]);
end
 
always @(posedge i_clk)
if ((state == DC_READC)&&(f_nacks > 0))
begin
`ASSERT(c_vtags[wb_start[(CS-1):LS]] <= wb_start[(AW-1):LS]);
`ASSERT(c_vtags[wb_start[(CS-1):LS]] <= r_addr[AW-1:LS]);
end
 
reg [AW-1:0] f_cache_waddr;
wire f_this_cache_waddr;
 
always @(*)
begin
// f_cache_waddr[AW-1:LS] = c_vtags[c_waddr[CS-1:CS-LS]];
f_cache_waddr[AW-1:LS] = wb_start[AW-1:LS];
f_cache_waddr[CS-1: 0] = c_waddr[CS-1:0];
end
 
assign f_this_cache_waddr = (!f_const_addr[AW])
&&(f_cache_waddr == f_const_addr[AW-1:0]);
always @(posedge i_clk)
if ((f_past_valid)&&(state == DC_READC))
begin
if ((c_wr)&&(c_waddr[LS-1:0] != 0)&&(f_this_cache_waddr))
`ASSERT(c_wdata == f_const_data);
end
 
always @(posedge i_clk)
if ((OPT_PIPE)&&(o_busy)&&(i_pipe_stb))
begin
`ASSUME(i_op[0] == o_wb_we);
if (o_wb_cyc_lcl)
assume(&i_addr[DW-1:DW-8]);
else
assume(!(&i_addr[DW-1:DW-8]));
end
 
initial f_pending_rd = 0;
always @(posedge i_clk)
if (i_reset)
f_pending_rd <= 0;
else if (i_pipe_stb)
f_pending_rd <= (!i_op[0]);
else if ((o_valid)&&((!OPT_PIPE)
||((state != DC_READS)&&(!r_svalid)&&(!$past(i_pipe_stb)))))
f_pending_rd <= 1'b0;
 
always @(*)
if ((state == DC_READC)&&(!f_stb))
`ASSERT(f_nreqs == (1<<LS));
 
always @(*)
if ((state == DC_READC)&&(f_stb))
`ASSERT(f_nreqs == { 1'b0, o_wb_addr[LS-1:0] });
 
always @(posedge i_clk)
if (state == DC_READC)
begin
if (($past(i_wb_ack))&&(!$past(f_stb)))
`ASSERT(f_nacks-1 == { 1'b0, c_waddr[LS-1:0] });
else if (f_nacks > 0)
begin
`ASSERT(f_nacks-1 == { 1'b0, c_waddr[LS-1:0] });
`ASSERT(c_waddr[CS-1:LS] == o_wb_addr[CS-1:LS]);
end else begin
`ASSERT(c_waddr[CS-1:LS] == o_wb_addr[CS-1:LS]-1'b1);
`ASSERT(&c_waddr[LS-1:0]);
end
end
 
always @(*)
if (r_rd_pending)
`ASSERT(r_addr == f_pending_addr[AW-1:0]);
 
always @(*)
if (f_pending_addr[AW])
begin
`ASSERT(state != DC_READC);
`ASSERT((!o_wb_we)||(!o_wb_cyc_gbl));
end
 
always @(posedge i_clk)
if ((f_past_valid)&&(o_valid)&&($past(f_pending_addr) == f_const_addr))
begin
if (f_const_buserr)
`ASSERT(o_err);
else if (f_pending_rd)
begin
casez($past(req_data[3:0]))
4'b0???: `ASSERT(o_data ==f_const_data);
4'b101?: `ASSERT(o_data =={16'h00,f_const_data[15: 0]});
4'b100?: `ASSERT(o_data =={16'h00,f_const_data[31:16]});
4'b1100: `ASSERT(o_data =={24'h00,f_const_data[31:24]});
4'b1101: `ASSERT(o_data =={24'h00,f_const_data[23:16]});
4'b1110: `ASSERT(o_data =={24'h00,f_const_data[15: 8]});
4'b1111: `ASSERT(o_data =={24'h00,f_const_data[ 7: 0]});
endcase
end
end
 
wire f_this_return;
 
assign f_this_return = (f_return_address == f_const_addr);
always @(*)
if ((f_cyc)&&(
((state == DC_READC)
&&(f_return_address[AW-1:LS] == f_const_addr[AW-1:LS]))
||(f_this_return))&&(f_cyc))
begin
if (f_const_buserr)
assume(!i_wb_ack);
else begin
assume(!i_wb_err);
assume(i_wb_data == f_const_data);
end
end
 
always @(posedge i_clk)
if ((f_past_valid)&&(last_tag == f_const_tag)&&(f_const_buserr)
&&(!f_const_addr[AW]))
`ASSERT(!last_tag_valid);
 
always @(*)
if (f_const_buserr)
begin
`ASSERT((!c_v[f_const_tag_addr])||(f_const_addr[AW])
||(f_ctag_here != f_const_tag));
 
if ((state == DC_READC)&&(wb_start[AW-1:LS] == f_const_tag))
begin
`ASSERT(f_nacks <= f_const_tag[LS-1:0]);
if (f_nacks == f_const_tag[LS-1:0])
assume(!i_wb_ack);
end
end
 
`endif // DCACHE
 
////////////////////////////////////////////////
//
// Checking the lock
//
////////////////////////////////////////////////
//
//
 
always @(*)
`ASSERT((!lock_gbl)||(!lock_lcl));
always @(*)
if (!OPT_LOCK)
`ASSERT((!lock_gbl)&&(!lock_lcl));
 
////////////////////////////////////////////////
//
// State based properties
//
////////////////////////////////////////////////
//
//
reg [F_LGDEPTH-1:0] f_rdpending;
 
initial f_rdpending = 0;
always @(posedge i_clk)
if ((i_reset)||(o_err))
f_rdpending <= 0;
else case({ (i_pipe_stb)&&(!i_op[0]), o_valid })
2'b01: f_rdpending <= f_rdpending - 1'b1;
2'b10: f_rdpending <= f_rdpending + 1'b1;
default: begin end
endcase
 
wire f_wb_cachable;
iscachable #(.ADDRESS_WIDTH(AW))
f_chkwb_addr(o_wb_addr, f_wb_cachable);
 
 
always @(*)
if (state == DC_IDLE)
begin
`ASSERT(!r_wb_cyc_gbl);
`ASSERT(!r_wb_cyc_lcl);
 
`ASSERT(!cyc);
 
if ((r_rd_pending)||(r_dvalid)||(r_svalid))
`ASSERT(o_busy);
 
if (!OPT_PIPE)
begin
if (r_rd_pending)
`ASSERT(o_busy);
else if (r_svalid)
`ASSERT(o_busy);
else if (o_valid)
`ASSERT(!o_busy);
else if (o_err)
`ASSERT(!o_busy);
end
end else begin
`ASSERT(o_busy);
`ASSERT(cyc);
end
 
 
 
always @(posedge i_clk)
if (state == DC_IDLE)
begin
if (r_svalid)
begin
`ASSERT(!r_dvalid);
`ASSERT(!r_rd_pending);
if (!OPT_PIPE)
`ASSERT(!o_valid);
else if (o_valid)
`ASSERT(f_rdpending == 2);
end
 
if (r_dvalid)
begin
`ASSERT(!r_rd_pending);
`ASSERT(npending == 0);
`ASSERT(f_rdpending == 1);
end
 
if (r_rd_pending)
begin
if ((OPT_PIPE)&&(o_valid))
`ASSERT(f_rdpending <= 2);
else
`ASSERT(f_rdpending == 1);
 
end else if ((OPT_PIPE)&&(o_valid)&&($past(r_dvalid|r_svalid)))
`ASSERT(f_rdpending <= 2);
else
`ASSERT(f_rdpending <= 1);
end
 
always @(posedge i_clk)
if (state == DC_READC)
begin
`ASSERT( o_wb_cyc_gbl);
`ASSERT(!o_wb_cyc_lcl);
`ASSERT(!o_wb_we);
`ASSERT(f_wb_cachable);
 
`ASSERT(r_rd_pending);
`ASSERT(r_cachable);
if (($past(cyc))&&(!$past(o_wb_stb_gbl)))
`ASSERT(!o_wb_stb_gbl);
 
if ((OPT_PIPE)&&(o_valid))
`ASSERT(f_rdpending == 2);
else
`ASSERT(f_rdpending == 1);
end
 
always @(*)
if (state == DC_READS)
begin
`ASSERT(!o_wb_we);
 
if (OPT_PIPE)
begin
if (o_valid)
`ASSERT((f_rdpending == npending + 1)
||(f_rdpending == npending));
else
`ASSERT(f_rdpending == npending);
end
end else if (state == DC_WRITE)
`ASSERT(o_wb_we);
 
always @(posedge i_clk)
if ((state == DC_READS)||(state == DC_WRITE))
begin
`ASSERT(o_wb_we == (state == DC_WRITE));
`ASSERT(!r_rd_pending);
if (o_wb_we)
`ASSERT(f_rdpending == 0);
 
if (OPT_PIPE)
begin
casez({ $past(i_pipe_stb), f_stb })
2'b00: `ASSERT(npending == f_outstanding);
2'b1?: `ASSERT(npending == f_outstanding + 1);
2'b01: `ASSERT(npending == f_outstanding + 1);
endcase
 
if (state == DC_WRITE)
`ASSERT(!o_valid);
end else
`ASSERT(f_outstanding <= 1);
end
 
always @(*)
if (OPT_PIPE)
`ASSERT(f_rdpending <= 2);
else
`ASSERT(f_rdpending <= 1);
 
always @(posedge i_clk)
if ((!OPT_PIPE)&&(o_valid))
`ASSERT(f_rdpending == 1);
else if (o_valid)
`ASSERT(f_rdpending >= 1);
 
 
always @(*)
if ((!o_busy)&&(!o_err)&&(!o_valid))
`ASSERT(f_rdpending == 0);
 
always @(*)
`ASSERT(cyc == ((r_wb_cyc_gbl)||(r_wb_cyc_lcl)));
 
always @(*)
if ((!i_reset)&&(f_nreqs == f_nacks)&&(!f_stb))
`ASSERT(!cyc);
 
always @(posedge i_clk)
if ((f_past_valid)&&($past(o_err)))
`ASSUME(!i_lock);
else if ((f_past_valid)&&(OPT_LOCK)&&($past(i_lock))
&&((!$past(o_valid)) || ($past(i_pipe_stb))))
`ASSUME($stable(i_lock));
 
 
////////////////////////////////////////////////
//
// Ad-hoc properties
//
////////////////////////////////////////////////
//
//
always @(*)
if ((OPT_PIPE)&&(state == DC_WRITE)&&(!i_wb_stall)&&(stb)
&&(!npending[DP]))
`ASSERT(!o_pipe_stalled);
 
always @(posedge i_clk)
if (state == DC_WRITE)
`ASSERT(o_wb_we);
else if ((state == DC_READS)||(state == DC_READC))
`ASSERT(!o_wb_we);
 
always @(*)
if (cyc)
`ASSERT(f_cyc);
 
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(cyc))&&(!c_wr)&&(last_tag_valid)
&&(!r_rd_pending))
`ASSERT((c_v[last_tag[(CS-LS-1):0]])
&&(c_vtags[last_tag[(CS-LS-1):0]] == last_tag));
 
always @(*)
if (!OPT_LOCAL_BUS)
begin
`ASSERT(r_wb_cyc_lcl == 1'b0);
`ASSERT(o_wb_stb_lcl == 1'b0);
`ASSERT(lock_lcl == 1'b0);
end
 
always @(posedge i_clk)
if ((state == DC_READC)&&(!stb))
begin
`ASSERT(o_wb_addr[LS-1:0] == 0);
`ASSERT(o_wb_addr[AW-1:CS] == r_addr[AW-1:CS]);
end else if ((state == DC_READC)&&(stb))
begin
`ASSERT(o_wb_addr[AW-1:CS] == r_addr[AW-1:CS]);
`ASSERT(o_wb_addr[LS-1:0] == f_nreqs[LS-1:0]);
end
 
wire [CS-1:0] f_expected_caddr;
assign f_expected_caddr = { r_ctag[CS-LS-1:0], {(LS){1'b0}} }-1
+ f_nacks;
always @(posedge i_clk)
if (state == DC_READC)
begin
if (LS == 0)
`ASSERT(end_of_line);
else if (f_nacks < (1<<LS)-1)
`ASSERT(!end_of_line);
else if (f_nacks == (1<<LS)-1)
`ASSERT(end_of_line);
`ASSERT(f_nacks <= (1<<LS));
`ASSERT(f_nreqs <= (1<<LS));
if (f_nreqs < (1<<LS))
begin
`ASSERT(o_wb_stb_gbl);
`ASSERT(o_wb_addr[(LS-1):0] == f_nreqs[LS-1:0]);
end else
`ASSERT(!f_stb);
`ASSERT((f_nreqs == 0)||(f_nacks <= f_nreqs));
`ASSERT(c_waddr == f_expected_caddr);
end
 
always @(posedge i_clk)
if ((f_past_valid)&&(r_rd)&&(!$past(i_reset)))
begin
`ASSERT((o_busy)||(r_svalid));
end
 
always @(posedge i_clk)
if (!$past(o_busy))
`ASSERT(!r_dvalid);
 
always @(posedge i_clk)
if ((state == DC_READC)&&(c_wr))
`ASSERT(c_wsel == 4'hf);
 
always @(*)
if (c_wr)
`ASSERT((c_wsel == 4'hf)
||(c_wsel == 4'hc)
||(c_wsel == 4'h3)
||(c_wsel == 4'h8)
||(c_wsel == 4'h4)
||(c_wsel == 4'h2)
||(c_wsel == 4'h1));
 
always @(*)
if (!OPT_PIPE)
`ASSERT(o_pipe_stalled == o_busy);
else if (o_pipe_stalled)
`ASSERT(o_busy);
 
//
// Only ever abort on reset
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(i_reset))&&($past(cyc))&&(!$past(i_wb_err)))
begin
if (($past(i_pipe_stb))&&(!$past(o_pipe_stalled)))
`ASSERT(cyc);
else if ($past(f_outstanding > 1))
`ASSERT(cyc);
else if (($past(f_outstanding == 1))
&&((!$past(i_wb_ack))
||(($past(f_stb))
&&(!$past(i_wb_stall)))))
`ASSERT(cyc);
else if (($past(f_outstanding == 0))
&&($past(f_stb)&&(!$past(i_wb_ack))))
`ASSERT(cyc);
end
 
always @(posedge i_clk)
if ((OPT_PIPE)&&(f_past_valid)&&(!$past(i_reset))&&(state != DC_READC))
begin
if ($past(cyc && i_wb_err))
begin
`ASSERT(npending == 0);
end else if (($past(i_pipe_stb))||($past(i_wb_stall && stb)))
`ASSERT((npending == f_outstanding+1)
||(npending == f_outstanding+2));
else
`ASSERT(npending == f_outstanding);
end
 
always @(posedge i_clk)
if ((OPT_PIPE)&&(state != DC_READC)&&(state != DC_IDLE))
`ASSERT(last_ack == (npending <= 1));
 
always @(*)
`ASSERT(stb == f_stb);
 
always @(*)
if (r_rd_pending)
`ASSERT(!r_svalid);
 
always @(*)
if (o_err)
`ASSUME(!i_pipe_stb);
 
always @(*)
if (last_tag_valid)
`ASSERT(|c_v);
 
always @(posedge i_clk)
if ((cyc)&&(state == DC_READC)&&($past(f_nacks > 0)))
`ASSERT(!c_v[o_wb_addr[CS-1:LS]]);
 
always @(*)
if (last_tag_valid)
begin
`ASSERT((!cyc)||(o_wb_we)||(state == DC_READS)
||(o_wb_addr[AW-1:LS] != last_tag));
end
 
wire f_cachable_last_tag, f_cachable_r_addr;
 
iscachable #(.ADDRESS_WIDTH(AW))
fccheck_last_tag({last_tag, {(LS){1'b0}}}, f_cachable_last_tag);
 
iscachable #(.ADDRESS_WIDTH(AW))
fccheck_r_cachable(r_addr, f_cachable_r_addr);
 
always @(*)
if ((r_cachable)&&(r_rd_pending))
begin
`ASSERT(state != DC_WRITE);
// `ASSERT(state != DC_READS);
`ASSERT(f_cachable_r_addr);
if (cyc)
`ASSERT(o_wb_addr[AW-1:LS] == r_addr[AW-1:LS]);
end
 
always @(*)
if (last_tag_valid)
begin
`ASSERT(f_cachable_last_tag);
`ASSERT(c_v[last_tag[CS-LS-1:0]]);
`ASSERT(c_vtags[last_tag[CS-LS-1:0]]==last_tag);
`ASSERT((state != DC_READC)||(last_tag != o_wb_addr[AW-1:LS]));
end
 
 
////////////////////////////////////////////////
//
// Cover statements
//
////////////////////////////////////////////////
//
//
 
always @(posedge i_clk)
cover(o_valid);
 
always @(posedge i_clk)
if (f_past_valid)
cover($past(r_svalid));
 
generate if (OPT_PIPE)
begin : PIPE_COVER
 
wire recent_reset;
reg [2:0] recent_reset_sreg;
initial recent_reset_sreg = -1;
always @(posedge i_clk)
if (i_reset)
recent_reset_sreg <= -1;
else
recent_reset_sreg <= { recent_reset_sreg[1:0], 1'b0 };
 
assign recent_reset = (i_reset)||(|recent_reset_sreg);
 
//
//
wire f_cvr_cread = (!recent_reset)&&(i_pipe_stb)&&(!i_op[0])
&&(w_cachable);
 
wire f_cvr_cwrite = (!recent_reset)&&(i_pipe_stb)&&(i_op[0])
&&(!cache_miss_inow);
 
wire f_cvr_writes = (!recent_reset)&&(i_pipe_stb)&&(i_op[0])
&&(!w_cachable);
wire f_cvr_reads = (!recent_reset)&&(i_pipe_stb)&&(!i_op[0])
&&(!w_cachable);
wire f_cvr_test = (!recent_reset)&&(cyc);
 
always @(posedge i_clk)
if ((f_past_valid)&&($past(o_valid)))
cover(o_valid); // !
 
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(i_reset))&&($past(i_pipe_stb)))
cover(i_pipe_stb);
 
always @(posedge i_clk)
if ((f_past_valid)&&($past(o_valid))&&($past(o_valid,2)))
cover(o_valid);
 
always @(posedge i_clk)
cover(($past(f_cvr_cread))&&(f_cvr_cread));
 
always @(posedge i_clk)
cover(($past(f_cvr_cwrite))&&(f_cvr_cwrite));
 
always @(posedge i_clk)
cover(($past(f_cvr_writes))&&(f_cvr_writes));
 
/*
* This cover statement will never pass. Why not? Because
* cache reads must be separated from non-cache reads. Hence,
* we can only allow a single non-cache read at a time, otherwise
* we'd bypass the cache read logic.
*
always @(posedge i_clk)
cover(($past(f_cvr_reads))&&(f_cvr_reads));
*/
 
//
// This is unrealistic, as it depends upon the Wishbone
// acknoledging the request on the same cycle
always @(posedge i_clk)
cover(($past(f_cvr_reads,2))&&(f_cvr_reads));
 
always @(posedge i_clk)
cover(($past(r_dvalid))&&(r_svalid));
 
//
// A minimum of one clock must separate two dvalid's.
// This option is rather difficult to cover, since it means
// we must first load two separate cache lines before
// this can even be tried.
always @(posedge i_clk)
cover(($past(r_dvalid,2))&&(r_dvalid));
 
//
// This is the optimal configuration we want:
// i_pipe_stb
// ##1 i_pipe_stb && r_svalid
// ##1 r_svalid && o_valid
// ##1 o_valid
// It proves that we can handle a 2 clock delay, but that
// we can also pipelin these cache accesses, so this
// 2-clock delay becomes a 1-clock delay between pipelined
// memory reads.
//
always @(posedge i_clk)
cover(($past(r_svalid))&&(r_svalid));
 
//
// While we'd never do this (it breaks the ZipCPU's pipeline
// rules), it's nice to know we could.
// i_pipe_stb && (!i_op[0]) // a read
// ##1 i_pipe_stb && (i_op[0]) && r_svalid // a write
// ##1 o_valid
always @(posedge i_clk)
cover(($past(r_svalid))&&(f_cvr_writes));
 
/* Unreachable
*
always @(posedge i_clk)
cover(($past(f_cvr_writes))&&(o_valid));
 
always @(posedge i_clk)
cover(($past(f_cvr_writes,2))&&(o_valid));
 
always @(posedge i_clk)
cover(($past(f_cvr_writes,3))&&(o_valid));
 
always @(posedge i_clk)
cover(($past(r_dvalid,3))&&(r_dvalid));
 
*/
 
always @(posedge i_clk)
cover(($past(f_cvr_writes,4))&&(o_valid));
 
end endgenerate
 
////////////////////////////////////////////////
//
// Carelesss assumption section
//
////////////////////////////////////////////////
//
//
 
//
// Can't jump from local to global mid lock
always @(*)
if((OPT_LOCK)&&(OPT_LOCAL_BUS))
begin
if ((i_lock)&&(o_wb_cyc_gbl)&&(i_pipe_stb))
assume(!(&i_addr[(DW-1):(DW-8)]));
else if ((i_lock)&&(o_wb_cyc_lcl)&&(i_pipe_stb))
assume(&i_addr[(DW-1):(DW-8)]);
end
 
always @(*)
if ((OPT_PIPE)&&(o_busy || i_lock)&&(!o_pipe_stalled))
begin
if (i_pipe_stb)
assume((!OPT_LOCAL_BUS)
||(f_pending_addr[AW]==(&i_addr[DW-1:DW-8])));
end
 
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(cyc))&&(!cyc))
assume((!i_wb_err)&&(!i_wb_ack));
 
`endif
endmodule
/div.v
8,10 → 8,10
// for both signed and unsigned divide.
//
// Steps:
// i_rst The DIVide unit starts in idle. It can also be placed into an
// i_reset The DIVide unit starts in idle. It can also be placed into an
// idle by asserting the reset input.
//
// i_wr When i_rst is asserted, a divide begins. On the next clock:
// i_wr When i_reset is asserted, a divide begins. On the next clock:
//
// o_busy is set high so everyone else knows we are at work and they can
// wait for us to complete.
71,7 → 71,7
//
////////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2015-2017, Gisselquist Technology, LLC
// Copyright (C) 2015-2019, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
95,15 → 95,17
////////////////////////////////////////////////////////////////////////////////
//
//
`default_nettype none
//
// `include "cpudefs.v"
//
module div(i_clk, i_rst, i_wr, i_signed, i_numerator, i_denominator,
module div(i_clk, i_reset, i_wr, i_signed, i_numerator, i_denominator,
o_busy, o_valid, o_err, o_quotient, o_flags);
parameter BW=32, LGBW = 5;
input i_clk, i_rst;
input wire i_clk, i_reset;
// Input parameters
input i_wr, i_signed;
input [(BW-1):0] i_numerator, i_denominator;
input wire i_wr, i_signed;
input wire [(BW-1):0] i_numerator, i_denominator;
// Output parameters
output reg o_busy, o_valid, o_err;
output reg [(BW-1):0] o_quotient;
113,11 → 115,10
// before we are valid, so it can't be o_busy ...
//
reg r_busy;
reg [(2*BW-2):0] r_divisor;
reg [(BW-1):0] r_dividend;
reg [BW-1:0] r_divisor;
reg [(2*BW-2):0] r_dividend;
wire [(BW):0] diff; // , xdiff[(BW-1):0];
assign diff = r_dividend - r_divisor[(BW-1):0];
// assign xdiff= r_dividend - { 1'b0, r_divisor[(BW-1):1] };
assign diff = r_dividend[2*BW-2:BW-1] - r_divisor;
 
reg r_sign, pre_sign, r_z, r_c, last_bit;
reg [(LGBW-1):0] r_bit;
130,12 → 131,12
// or equivalently when we discover we are dividing by zero.
initial r_busy = 1'b0;
always @(posedge i_clk)
if (i_rst)
r_busy <= 1'b0;
else if (i_wr)
r_busy <= 1'b1;
else if ((last_bit)||(zero_divisor))
r_busy <= 1'b0;
if (i_reset)
r_busy <= 1'b0;
else if (i_wr)
r_busy <= 1'b1;
else if ((last_bit)||(zero_divisor))
r_busy <= 1'b0;
 
// o_busy is very similar to r_busy, save for some key differences.
// Primary among them is that o_busy needs to (possibly) be true
145,32 → 146,18
// identical.
initial o_busy = 1'b0;
always @(posedge i_clk)
if (i_rst)
o_busy <= 1'b0;
else if (i_wr)
o_busy <= 1'b1;
else if (((last_bit)&&(~r_sign))||(zero_divisor))
o_busy <= 1'b0;
else if (~r_busy)
o_busy <= 1'b0;
if (i_reset)
o_busy <= 1'b0;
else if (i_wr)
o_busy <= 1'b1;
else if (((last_bit)&&(!r_sign))||(zero_divisor))
o_busy <= 1'b0;
else if (!r_busy)
o_busy <= 1'b0;
 
// If we are asked to divide by zero, we need to halt. The sooner
// we halt and report the error, the better. Hence, here we look
// for a zero divisor while being busy. The always above us will then
// look at this and halt a divide in the middle if we are trying to
// divide by zero.
//
// Note that this works off of the 2BW-1 length vector. If we can
// simplify that, it should simplify our logic as well.
initial zero_divisor = 1'b0;
always @(posedge i_clk)
// zero_divisor <= (r_divisor == 0)&&(r_busy);
if (i_rst)
zero_divisor <= 1'b0;
else if (i_wr)
zero_divisor <= (i_denominator == 0);
else if (!r_busy)
zero_divisor <= 1'b0;
if (i_wr)
zero_divisor <= (i_denominator == 0);
 
// o_valid is part of the ZipCPU protocol. It will be set to true
// anytime our answer is valid and may be used by the calling module.
183,17 → 170,19
// it on an i_wr signal.
initial o_valid = 1'b0;
always @(posedge i_clk)
if (i_rst)
o_valid <= 1'b0;
else if (r_busy)
begin
if ((last_bit)||(zero_divisor))
o_valid <= (zero_divisor)||(!r_sign);
end else if (r_sign)
begin
o_valid <= (!zero_divisor); // 1'b1;
end else
o_valid <= 1'b0;
if ((i_reset)||(o_valid))
o_valid <= 1'b0;
else if ((r_busy)&&(zero_divisor))
o_valid <= 1'b1;
else if (r_busy)
begin
if (last_bit)
o_valid <= (!r_sign);
end else if (r_sign)
begin
o_valid <= 1'b1;
end else
o_valid <= 1'b0;
 
// Division by zero error reporting. Anytime we detect a zero divisor,
// we set our output error, and then hold it until we are valid and
200,12 → 189,12
// everything clears.
initial o_err = 1'b0;
always @(posedge i_clk)
if((i_rst)||(o_valid))
o_err <= 1'b0;
else if (((r_busy)||(r_sign))&&(zero_divisor))
o_err <= 1'b1;
else
o_err <= 1'b0;
if (i_reset)
o_err <= 1'b0;
else if ((r_busy)&&(zero_divisor))
o_err <= 1'b1;
else
o_err <= 1'b0;
 
// r_bit
//
213,26 → 202,31
// ranges from 31 down to zero. On any write, we set ourselves to
// 5'h1f. Otherwise, while we are busy (but not within the pre-sign
// adjustment stage), we subtract one from our value on every clock.
initial r_bit = 0;
always @(posedge i_clk)
if ((r_busy)&&(!pre_sign))
r_bit <= r_bit + {(LGBW){1'b1}};
else
r_bit <= {(LGBW){1'b1}};
if (i_reset)
r_bit <= 0;
else if ((r_busy)&&(!pre_sign))
r_bit <= r_bit + 1'b1;
else
r_bit <= 0;
 
// last_bit
//
// This logic replaces a lot of logic that was inside our giant state
// machine with ... something simpler. In particular, we'll use this
// logic to determine we are processing our last bit. The only trick
// is, this bit needs to be set whenever (r_busy) and (r_bit == 0),
// hence we need to set on (r_busy) and (r_bit == 1) so as to be set
// logic to determine if we are processing our last bit. The only trick
// is, this bit needs to be set whenever (r_busy) and (r_bit == -1),
// hence we need to set on (r_busy) and (r_bit == -2) so as to be set
// when (r_bit == 0).
initial last_bit = 1'b0;
always @(posedge i_clk)
if (r_busy)
last_bit <= (r_bit == {{(LGBW-1){1'b0}},1'b1});
else
last_bit <= 1'b0;
if (i_reset)
last_bit <= 1'b0;
else if (r_busy)
last_bit <= (r_bit == {(LGBW){1'b1}}-1'b1);
else
last_bit <= 1'b0;
 
// pre_sign
//
241,67 → 235,59
// be true for the one clock, and then it must clear itself.
initial pre_sign = 1'b0;
always @(posedge i_clk)
if (i_wr)
pre_sign <= i_signed;
else
pre_sign <= 1'b0;
if (i_reset)
pre_sign <= 1'b0;
else
pre_sign <= (i_wr)&&(i_signed)&&((i_numerator[BW-1])||(i_denominator[BW-1]));
 
// As a result of our operation, we need to set the flags. The most
// difficult of these is the "Z" flag indicating that the result is
// zero. Here, we'll use the same logic that sets the low-order
// bit to clear our zero flag, and leave the zero flag set in all
// other cases. Well ... not quite. If we need to flip the sign of
// our value, then we can't quite clear the zero flag ... yet.
// other cases.
always @(posedge i_clk)
if((r_busy)&&(r_divisor[(2*BW-2):(BW)] == 0)&&(!diff[BW]))
// If we are busy, the upper bits of our divisor are
// zero (i.e., we got the shift right), and the top
// (carry) bit of the difference is zero (no overflow),
// then we could subtract our divisor from our dividend
// and hence we add a '1' to the quotient, while setting
// the zero flag to false.
r_z <= 1'b0;
else if ((!r_busy)&&(!r_sign))
r_z <= 1'b1;
if (i_wr)
r_z <= 1'b1;
else if ((r_busy)&&(!pre_sign)&&(!diff[BW]))
r_z <= 1'b0;
 
// r_dividend
// This is initially the numerator. On a signed divide, it then becomes
// the absolute value of the numerator. We'll subtract from this value
// the divisor shifted as appropriate for every output bit we are
// looking for--just as with traditional long division.
// the divisor for every output bit we are looking for--just as with
// traditional long division.
always @(posedge i_clk)
if (pre_sign)
if (pre_sign)
begin
// If we are doing a signed divide, then take the
// absolute value of the dividend
if (r_dividend[BW-1])
begin
// If we are doing a signed divide, then take the
// absolute value of the dividend
if (r_dividend[BW-1])
r_dividend <= -r_dividend;
// The begin/end block is important so we don't lose
// the fact that on an else we don't do anything.
end else if((r_busy)&&(r_divisor[(2*BW-2):(BW)]==0)&&(!diff[BW]))
// This is the condition whereby we set a '1' in our
// output quotient, and we subtract the (current)
// divisor from our dividend. (The difference is
// already kept in the diff vector above.)
r_dividend <= diff[(BW-1):0];
else if (!r_busy)
// Once we are done, and r_busy is no longer high, we'll
// always accept new values into our dividend. This
// guarantees that, when i_wr is set, the new value
// is already set as desired.
r_dividend <= i_numerator;
r_dividend[2*BW-2:0] <= {(2*BW-1){1'b1}};
r_dividend[BW-1:0] <= -r_dividend[BW-1:0];
end
end else if (r_busy)
begin
r_dividend <= { r_dividend[2*BW-3:0], 1'b0 };
if (!diff[BW])
r_dividend[2*BW-2:BW] <= diff[(BW-2):0];
end else if (!r_busy)
// Once we are done, and r_busy is no longer high, we'll
// always accept new values into our dividend. This
// guarantees that, when i_wr is set, the new value
// is already set as desired.
r_dividend <= { 31'h0, i_numerator };
 
initial r_divisor = 0;
always @(posedge i_clk)
if (pre_sign)
begin
if (r_divisor[(2*BW-2)])
r_divisor[(2*BW-2):(BW-1)]
<= -r_divisor[(2*BW-2):(BW-1)];
end else if (r_busy)
r_divisor <= { 1'b0, r_divisor[(2*BW-2):1] };
else
r_divisor <= { i_denominator, {(BW-1){1'b0}} };
if (i_reset)
r_divisor <= 0;
else if ((pre_sign)&&(r_busy))
begin
if (r_divisor[BW-1])
r_divisor <= -r_divisor;
end else if (!r_busy)
r_divisor <= i_denominator;
 
// r_sign
// is a flag for our state machine control(s). r_sign will be set to
314,31 → 300,38
// up to something.
initial r_sign = 1'b0;
always @(posedge i_clk)
if (pre_sign)
r_sign <= ((r_divisor[(2*BW-2)])^(r_dividend[(BW-1)]));
else if (r_busy)
r_sign <= (r_sign)&&(!zero_divisor);
else
r_sign <= 1'b0;
if (i_reset)
r_sign <= 1'b0;
else if (pre_sign)
r_sign <= ((r_divisor[(BW-1)])^(r_dividend[(BW-1)]));
else if (r_busy)
r_sign <= (r_sign)&&(!zero_divisor);
else
r_sign <= 1'b0;
 
initial o_quotient = 0;
always @(posedge i_clk)
if (r_busy)
begin
o_quotient <= { o_quotient[(BW-2):0], 1'b0 };
if ((r_divisor[(2*BW-2):(BW)] == 0)&&(!diff[BW]))
begin
o_quotient[0] <= 1'b1;
end
end else if (r_sign)
o_quotient <= -o_quotient;
else
o_quotient <= 0;
if (i_reset)
o_quotient <= 0;
else if (r_busy)
begin
o_quotient <= { o_quotient[(BW-2):0], 1'b0 };
if (!diff[BW])
o_quotient[0] <= 1'b1;
end else if (r_sign)
o_quotient <= -o_quotient;
else
o_quotient <= 0;
 
// Set Carry on an exact divide
// Perhaps nothing uses this, but ... well, I suppose we could remove
// this logic eventually, just ... not yet.
initial r_c = 1'b0;
always @(posedge i_clk)
r_c <= (r_busy)&&((diff == 0)||(r_dividend == 0));
if (i_reset)
r_c <= 1'b0;
else
r_c <= (r_busy)&&(diff == 0);
 
// The last flag: Negative. This flag is set assuming that the result
// of the divide was negative (i.e., the high order bit is set). This
349,4 → 342,215
assign w_n = o_quotient[(BW-1)];
 
assign o_flags = { 1'b0, w_n, r_c, r_z };
 
`ifdef FORMAL
reg f_past_valid;
initial f_past_valid = 0;
always @(posedge i_clk)
f_past_valid <= 1'b1;
 
`ifdef DIV
`define ASSUME assume
`else
`define ASSUME assert
`endif
 
initial `ASSUME(i_reset);
always @(*)
if (!f_past_valid)
`ASSUME(i_reset);
 
always @(posedge i_clk)
if ((!f_past_valid)||($past(i_reset)))
begin
assert(!o_busy);
assert(!o_valid);
assert(!o_err);
//
assert(!r_busy);
// assert(!zero_divisor);
assert(r_bit==0);
assert(!last_bit);
assert(!pre_sign);
// assert(!r_z);
// assert(r_dividend==0);
assert(o_quotient==0);
assert(!r_c);
assert(r_divisor==0);
 
`ASSUME(!i_wr);
end
 
always @(*)
if (o_busy)
`ASSUME(!i_wr);
 
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(i_reset))&&($past(o_busy))&&(!o_busy))
begin
assert(o_valid);
end
 
// A formal methods section
//
// This section isn't yet complete. For now, it is just
// a description of things I think should be in here ... not
// yet a description of what it would take to prove
// this divide (yet).
always @(*)
if (o_err)
assert(o_valid);
 
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(i_wr)))
assert(!pre_sign);
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(i_reset))&&($past(i_wr))&&($past(i_signed))
&&(|$past({i_numerator[BW-1],i_denominator[BW-1]})))
assert(pre_sign);
 
// always @(posedge i_clk)
// if ((f_past_valid)&&(!$past(pre_sign)))
// assert(!r_sign);
reg [BW:0] f_bits_set;
 
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(i_reset))&&($past(i_wr)))
assert(o_busy);
 
always @(posedge i_clk)
if ((f_past_valid)&&($past(o_valid)))
assert(!o_valid);
 
always @(*)
if ((o_valid)&&(!o_err))
assert(r_z == ((o_quotient == 0)? 1'b1:1'b0));
else if (o_busy)
assert(r_z == (((o_quotient&f_bits_set[BW-1:0]) == 0)? 1'b1: 1'b0));
 
always @(*)
if ((o_valid)&&(!o_err))
assert(w_n == o_quotient[BW-1]);
 
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(r_busy))&&(!$past(i_wr)))
assert(!o_busy);
always @(posedge i_clk)
assert((!o_busy)||(!o_valid));
 
always @(*)
if(r_busy)
assert(o_busy);
 
always @(posedge i_clk)
if (i_reset)
f_bits_set <= 0;
else if (i_wr)
f_bits_set <= 0;
else if ((r_busy)&&(!pre_sign))
f_bits_set <= { f_bits_set[BW-1:0], 1'b1 };
 
always @(posedge i_clk)
if (r_busy)
assert(((1<<r_bit)-1) == f_bits_set);
 
always @(*)
if ((o_valid)&&(!o_err))
assert((!f_bits_set[BW])&&(&f_bits_set[BW-1:0]));
 
 
/*
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(i_reset))&&($past(r_busy))
&&($past(r_divisor[2*BW-2:BW])==0))
begin
if ($past(r_divisor) == 0)
assert(o_err);
else if ($past(pre_sign))
begin
if ($past(r_dividend[BW-1]))
assert(r_dividend == -$past(r_dividend));
if ($past(r_divisor[(2*BW-2)]))
begin
assert(r_divisor[(2*BW-2):(BW-1)]
== -$past(r_divisor[(2*BW-2):(BW-1)]));
assert(r_divisor[BW-2:0] == 0);
end
end else begin
if (o_quotient[0])
assert(r_dividend == $past(diff));
else
assert(r_dividend == $past(r_dividend));
 
// r_divisor should shift down on every step
assert(r_divisor[2*BW-2]==0);
assert(r_divisor[2*BW-3:0]==$past(r_divisor[2*BW-2:1]));
end
if ($past(r_dividend) >= $past(r_divisor[BW-1:0]))
assert(o_quotient[0]);
else
assert(!o_quotient[0]);
end
*/
 
always @(*)
if (r_busy)
assert((f_bits_set & r_dividend[BW-1:0])==0);
 
always @(*)
if (r_busy)
assert((r_divisor == 0) == zero_divisor);
 
`ifdef VERIFIC
// Verify unsigned division
assert property (@(posedge i_clk)
disable iff (i_reset)
(i_wr)&&(i_denominator != 0)&&(!i_signed)
|=> ((!o_err)&&(!o_valid)&&(o_busy)&&(!r_sign)&&(!pre_sign)
throughout (r_bit == 0)
##1 ((r_bit == $past(r_bit)+1)&&({1'b0,r_bit}< BW-1))
[*0:$]
##1 ({ 1'b0, r_bit } == BW-1))
##1 (!o_err)&&(o_valid));
 
// Verify division by zero
assert property (@(posedge i_clk)
disable iff (i_reset)
(i_wr)&&(i_denominator == 0)
|=> (zero_divisor throughout
(!o_err)&&(!o_valid)&&(pre_sign) [*0:1]
##1 ((r_busy)&&(!o_err)&&(!o_valid))
##1 ((o_err)&&(o_valid))));
 
 
`endif // VERIFIC
`endif
endmodule
//
// How much logic will this divide use, now that it's been updated to
// a different (long division) algorithm?
//
// iCE40 stats (Updated) (Original)
// Number of cells: 700 820
// SB_CARRY 125 125
// SB_DFF 1
// SB_DFFE 33 1
// SB_DFFESR 37
// SB_DFFESS 31
// SB_DFFSR 40 40
// SB_LUT4 433 553
//
// Xilinx stats (Updated) (Original)
// Number of cells: 758 831
// FDRE 142 142
// LUT1 97 97
// LUT2 69 174
// LUT3 6 5
// LUT4 1 6
// LUT5 68 35
// LUT6 94 98
// MUXCY 129 129
// MUXF7 12 8
// MUXF8 6 3
// XORCY 134 134
 
/idecode.v
19,7 → 19,7
//
////////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2015-2017, Gisselquist Technology, LLC
// Copyright (C) 2015-2019, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
43,41 → 43,60
////////////////////////////////////////////////////////////////////////////////
//
//
`default_nettype none
//
`define CPU_SP_REG 4'hd
`define CPU_CC_REG 4'he
`define CPU_PC_REG 4'hf
//
`include "cpudefs.v"
`define CISBIT 31
`define CISIMMSEL 23
`define IMMSEL 18
//
//
//
module idecode(i_clk, i_rst, i_ce, i_stalled,
module idecode(i_clk, i_reset, i_ce, i_stalled,
i_instruction, i_gie, i_pc, i_pf_valid,
i_illegal,
o_valid,
o_phase, o_illegal,
o_pc, o_gie,
o_dcdR, o_dcdA, o_dcdB, o_I, o_zI,
o_pc,
o_dcdR, o_dcdA, o_dcdB,
o_preA, o_preB,
o_I, o_zI,
o_cond, o_wF,
o_op, o_ALU, o_M, o_DV, o_FP, o_break, o_lock,
o_wR, o_rA, o_rB,
o_early_branch, o_branch_pc, o_ljmp,
o_early_branch, o_early_branch_stb, o_branch_pc, o_ljmp,
o_pipe,
o_sim, o_sim_immv
`ifdef FORMAL
, f_insn_word, f_insn_gie
`endif
);
parameter ADDRESS_WIDTH=24, IMPLEMENT_MPY=1, EARLY_BRANCHING=1,
IMPLEMENT_DIVIDE=1, IMPLEMENT_FPU=0, AW = ADDRESS_WIDTH;
input i_clk, i_rst, i_ce, i_stalled;
input [31:0] i_instruction;
input i_gie;
input [(AW-1):0] i_pc;
input i_pf_valid, i_illegal;
parameter ADDRESS_WIDTH=24;
parameter [0:0] OPT_MPY = 1'b1;
parameter [0:0] OPT_EARLY_BRANCHING = 1'b1;
parameter [0:0] OPT_PIPELINED = 1'b1;
parameter [0:0] OPT_DIVIDE = (OPT_PIPELINED);
parameter [0:0] OPT_FPU = 1'b0;
parameter [0:0] OPT_CIS = 1'b1;
parameter [0:0] OPT_LOCK = (OPT_PIPELINED);
parameter [0:0] OPT_OPIPE = (OPT_PIPELINED);
parameter [0:0] OPT_SIM = 1'b0;
parameter [0:0] OPT_NO_USERMODE = 1'b0;
localparam AW = ADDRESS_WIDTH;
//
input wire i_clk, i_reset, i_ce, i_stalled;
input wire [31:0] i_instruction;
input wire i_gie;
input wire [(AW+1):0] i_pc;
input wire i_pf_valid, i_illegal;
output wire o_valid, o_phase;
output reg o_illegal;
output reg [AW:0] o_pc;
output reg o_gie;
output reg [(AW+1):0] o_pc;
output reg [6:0] o_dcdR, o_dcdA, o_dcdB;
output wire [4:0] o_preA, o_preB;
output wire [31:0] o_I;
output reg o_zI;
output reg [3:0] o_cond;
84,30 → 103,24
output reg o_wF;
output reg [3:0] o_op;
output reg o_ALU, o_M, o_DV, o_FP, o_break;
output wire o_lock;
output reg o_lock;
output reg o_wR, o_rA, o_rB;
output wire o_early_branch;
output wire [(AW-1):0] o_branch_pc;
output wire o_early_branch, o_early_branch_stb;
output wire [(AW+1):0] o_branch_pc;
output wire o_ljmp;
output wire o_pipe;
output reg o_sim /* verilator public_flat */;
output reg [22:0] o_sim_immv /* verilator public_flat */;
 
wire dcdA_stall, dcdB_stall, dcdF_stall;
wire o_dcd_early_branch;
wire [(AW-1):0] o_dcd_branch_pc;
reg o_dcdI, o_dcdIz;
`ifdef OPT_PIPELINED
reg r_lock;
`ifdef FORMAL
output reg [31:0] f_insn_word;
output reg f_insn_gie;
`endif
`ifdef OPT_PIPELINED_BUS_ACCESS
reg r_pipe;
`endif
 
 
wire [4:0] w_op;
wire w_ldi, w_mov, w_cmptst, w_ldilo, w_ALU, w_brev,
w_noop, w_lock;
w_noop, w_lock, w_sim, w_break, w_special, w_add,
w_mpy;
wire [4:0] w_dcdR, w_dcdB, w_dcdA;
wire w_dcdR_pc, w_dcdR_cc;
wire w_dcdA_pc, w_dcdA_cc;
117,91 → 130,103
wire w_wR, w_rA, w_rB, w_wR_n;
wire w_ljmp, w_ljmp_dly, w_cis_ljmp;
wire [31:0] iword;
wire pf_valid;
 
assign pf_valid = (i_pf_valid)&&(!o_early_branch_stb);
 
`ifdef OPT_CIS
reg [15:0] r_nxt_half;
assign iword = (o_phase)
 
reg [14:0] r_nxt_half;
 
generate if (OPT_CIS)
begin : SET_IWORD
 
assign iword = (o_phase)
// set second half as a NOOP ... but really
// shouldn't matter
? { r_nxt_half[15:0], i_instruction[15:0] }
? { 1'b1, r_nxt_half[14:0], i_instruction[15:0] }
: i_instruction;
`else
assign iword = { 1'b0, i_instruction[30:0] };
`endif
end else begin : CLR_IWORD
assign iword = { 1'b0, i_instruction[30:0] };
 
// verilator lint_off UNUSED
wire [14:0] unused_nxt_half;
assign unused_nxt_half = r_nxt_half;
// verilator lint_on UNUSED
end endgenerate
 
generate
if (EARLY_BRANCHING != 0)
if (OPT_EARLY_BRANCHING)
begin
`ifdef OPT_CIS
reg r_pre_ljmp;
always @(posedge i_clk)
if ((i_rst)||(o_early_branch))
r_pre_ljmp <= 1'b0;
else if ((i_ce)&&(i_pf_valid))
r_pre_ljmp <= (!o_phase)&&(i_instruction[31])
&&(i_instruction[14:0] == 15'h7cf8);
else if (i_ce)
r_pre_ljmp <= 1'b0;
if (OPT_CIS)
begin : CIS_EARLY_BRANCHING
 
assign w_cis_ljmp = r_pre_ljmp;
`else
assign w_cis_ljmp = 1'b0;
`endif
// 0.1111.10010.000.1.1111.000000000...
// 0111.1100.1000.0111.11000....
assign w_cis_ljmp = (o_phase)&&(iword[31:16] == 16'hfcf8);
 
end else begin : NOCIS_EARLY_BRANCH
 
assign w_cis_ljmp = 1'b0;
 
end
 
assign w_ljmp = (iword == 32'h7c87c000);
end else begin
 
end else begin : NO_EARLY_BRANCHING
 
assign w_cis_ljmp = 1'b0;
assign w_ljmp = 1'b0;
end
endgenerate
end endgenerate
 
`ifdef OPT_CIS
`ifdef VERILATOR
wire [4:0] w_cis_op;
always @(iword)
if (!iword[31])
w_cis_op = w_op;
else case(iword[26:24])
3'h0: w_cis_op = 5'h00;
3'h1: w_cis_op = 5'h01;
3'h2: w_cis_op = 5'h02;
3'h3: w_cis_op = 5'h10;
3'h4: w_cis_op = 5'h12;
3'h5: w_cis_op = 5'h13;
3'h6: w_cis_op = 5'h18;
3'h7: w_cis_op = 5'h0d;
endcase
`else
reg [4:0] w_cis_op;
always @(iword,w_op)
if (!iword[31])
w_cis_op <= w_op;
 
generate if (OPT_CIS)
begin : GEN_CIS_OP
 
always @(*)
if (!iword[`CISBIT])
w_cis_op = iword[26:22];
else case(iword[26:24])
3'h0: w_cis_op <= 5'h00;
3'h1: w_cis_op <= 5'h01;
3'h2: w_cis_op <= 5'h02;
3'h3: w_cis_op <= 5'h10;
3'h4: w_cis_op <= 5'h12;
3'h5: w_cis_op <= 5'h13;
3'h6: w_cis_op <= 5'h18;
3'h7: w_cis_op <= 5'h0d;
3'h0: w_cis_op = 5'h00; // ADD
3'h1: w_cis_op = 5'h01; // AND
3'h2: w_cis_op = 5'h02; // SUB
3'h3: w_cis_op = 5'h10; // BREV
3'h4: w_cis_op = 5'h12; // LW
3'h5: w_cis_op = 5'h13; // SW
3'h6: w_cis_op = 5'h18; // LDI
3'h7: w_cis_op = 5'h0d; // MOV
endcase
`endif
`else
wire [4:0] w_cis_op;
assign w_cis_op = w_op;
`endif
 
end else begin : GEN_NOCIS_OP
 
always @(*)
w_cis_op = w_op;
 
end endgenerate
 
// Decode instructions
assign w_op= iword[26:22];
assign w_mov = (w_cis_op == 5'h0d);
assign w_ldi = (w_cis_op[4:1] == 4'hc);
assign w_brev = (w_cis_op == 5'h8);
assign w_brev = (w_cis_op == 5'h08);
assign w_mpy = (w_cis_op[4:1] == 4'h5)||(w_cis_op[4:0]==5'h0c);
assign w_cmptst = (w_cis_op[4:1] == 4'h8);
assign w_ldilo = (w_cis_op[4:0] == 5'h9);
assign w_ldilo = (w_cis_op[4:0] == 5'h09);
assign w_ALU = (!w_cis_op[4]) // anything with [4]==0, but ...
&&(w_cis_op[3:1] != 3'h7); // not the divide
assign w_add = (w_cis_op[4:0] == 5'h02);
assign w_mem = (w_cis_op[4:3] == 2'b10)&&(w_cis_op[2:1] !=2'b00);
assign w_sto = (w_mem)&&( w_cis_op[0]);
assign w_div = (!iword[`CISBIT])&&(w_op[4:1] == 4'h7);
assign w_fpu = (!iword[`CISBIT])&&(w_op[4:3] == 2'b11)
&&(w_dcdR[3:1] != 3'h7)&&(w_op[2:1] != 2'b00);
// If the result register is either CC or PC, and this would otherwise
// be a floating point instruction with floating point opcode of 0,
// then this is a NOOP.
assign w_special= (!iword[`CISBIT])&&((!OPT_FPU)||(w_dcdR[3:1]==3'h7))
&&(w_op[4:2] == 3'b111);
assign w_break = (w_special)&&(w_op[4:0]==5'h1c);
assign w_lock = (w_special)&&(w_op[4:0]==5'h1d);
assign w_sim = (w_special)&&(w_op[4:0]==5'h1e);
assign w_noop = (w_special)&&(w_op[4:0]==5'h1f);
 
 
// w_dcdR (4 LUTs)
212,25 → 237,14
// moves in iword[18] but only for the supervisor, and the other
// four bits encoded in the instruction.
//
assign w_dcdR = { ((!iword[31])&&(w_mov)&&(~i_gie))?iword[18]:i_gie,
assign w_dcdR = { ((!iword[`CISBIT])&&(!OPT_NO_USERMODE)&&(w_mov)&&(!i_gie))?iword[`IMMSEL]:i_gie,
iword[30:27] };
// 2 LUTs
//
// If the result register is either CC or PC, and this would otherwise
// be a floating point instruction with floating point opcode of 0,
// then this is a NOOP.
assign w_lock = (!iword[31])&&(w_op[4:0]==5'h1d)&&(
((IMPLEMENT_FPU>0)&&(w_dcdR[3:1]==3'h7))
||(IMPLEMENT_FPU==0));
assign w_noop = (!iword[31])&&(w_op[4:0] == 5'h1f)&&(
((IMPLEMENT_FPU>0)&&(w_dcdR[3:1] == 3'h7))
||(IMPLEMENT_FPU==0));
 
// dcdB - What register is used in the opB?
//
assign w_dcdB[4] = ((!iword[31])&&(w_mov)&&(~i_gie))?iword[13]:i_gie;
assign w_dcdB[3:0]= (iword[31])
? (((!iword[23])&&(iword[26:25]==2'b10))
assign w_dcdB[4] = ((!iword[`CISBIT])&&(w_mov)&&(!OPT_NO_USERMODE)&&(!i_gie))?iword[13]:i_gie;
assign w_dcdB[3:0]= (iword[`CISBIT])
? (((!iword[`CISIMMSEL])&&(iword[26:25]==2'b10))
? `CPU_SP_REG : iword[22:19])
: iword[17:14];
 
246,26 → 260,18
assign w_dcdB_pc = (w_rB)&&(w_dcdB[3:0] == `CPU_PC_REG);
assign w_dcdB_cc = (w_rB)&&(w_dcdB[3:0] == `CPU_CC_REG);
 
// Under what condition will we execute this
// instruction? Only the load immediate instruction
// is completely unconditional.
//
// 3+4 LUTs
assign w_cond = ((w_ldi)||(iword[31])) ? 4'h8 :
// Under what condition will we execute this instruction? Only the
// load immediate instruction and the CIS instructions are completely
// unconditional. Well ... not quite. The BREAK, LOCK, and SIM/NOOP
// instructions are also unconditional.
//
assign w_cond = ((w_ldi)||(w_special)||(iword[`CISBIT])) ? 4'h8 :
{ (iword[21:19]==3'h0), iword[21:19] };
 
// 1 LUT
assign w_mem = (w_cis_op[4:3] == 2'b10)&&(w_cis_op[2:1] !=2'b00);
assign w_sto = (w_mem)&&( w_cis_op[0]);
// 1 LUT
assign w_div = (!iword[31])&&(w_op[4:1] == 4'h7);
// 2 LUTs
assign w_fpu = (!iword[31])&&(w_op[4:3] == 2'b11)
&&(w_dcdR[3:1] != 3'h7)&&(w_op[2:1] != 2'b00);
//
// rA - do we need to read register A?
assign w_rA = // Floating point reads reg A
((w_fpu)&&(w_cis_op[4:1] != 4'hf))
((w_fpu)&&(OPT_FPU))
// Divide's read A
||(w_div)
// ALU ops read A,
278,25 → 284,24
// rB -- do we read a register for operand B? Specifically, do we
// add the registers value to the immediate to create opB?
assign w_rB = (w_mov)
||((!iword[31])&&(iword[18])&&(!w_ldi))
||(( iword[31])&&(iword[23])&&(!w_ldi))
||((!iword[`CISBIT])&&(iword[`IMMSEL])&&(!w_ldi)&&(!w_special))
||(( iword[`CISBIT])&&(iword[`CISIMMSEL])&&(!w_ldi))
// If using compressed instruction sets,
// we *always* read on memory operands.
||(( iword[31])&&(w_mem));
||(( iword[`CISBIT])&&(w_mem));
 
// wR -- will we be writing our result back?
// wR_n = !wR
// 1 LUT: All but STO, NOOP/BREAK/LOCK, and CMP/TST write back to w_dcdR
// All but STO, NOOP/BREAK/LOCK, and CMP/TST write back to w_dcdR
assign w_wR_n = (w_sto)
||((!iword[31])&&(w_cis_op[4:3]==2'b11)
&&(w_cis_op[2:1]!=2'b00)
&&(w_dcdR[3:1]==3'h7))
||(w_special)
||(w_cmptst);
assign w_wR = ~w_wR_n;
assign w_wR = !w_wR_n;
//
// wF -- do we write flags when we are done?
//
assign w_wF = (w_cmptst)
||((w_cond[3])&&((w_fpu)||(w_div)
||((w_cond[3])&&(((w_fpu)&&(OPT_FPU))||(w_div)
||((w_ALU)&&(!w_mov)&&(!w_ldilo)&&(!w_brev)
&&(w_dcdR[3:1] != 3'h7))));
 
313,27 → 318,33
// MOVE immediates have one less bit
:((w_mov) ?{ {(23-13){iword[12]}}, iword[12:0] }
// Normal Op-B immediate ... 18 or 14 bits
:((~iword[18]) ? { {(23-18){iword[17]}}, iword[17:0] }
:((!iword[`IMMSEL]) ? { {(23-18){iword[17]}}, iword[17:0] }
: { {(23-14){iword[13]}}, iword[13:0] }
));
 
`ifdef OPT_CIS
wire [7:0] w_halfbits;
assign w_halfbits = iword[23:16];
generate if (OPT_CIS)
begin : GEN_CIS_IMMEDIATE
wire [7:0] w_halfbits;
assign w_halfbits = iword[`CISIMMSEL:16];
 
wire [7:0] w_halfI;
assign w_halfI = (iword[26:24]==3'h6) ? w_halfbits[7:0]
:(w_halfbits[7])?
{ {(6){w_halfbits[2]}}, w_halfbits[1:0]}
:{ w_halfbits[6], w_halfbits[6:0] };
assign w_I = (iword[31])?{{(23-8){w_halfI[7]}}, w_halfI }:w_fullI;
`else
assign w_I = w_fullI;
`endif
wire [7:0] w_halfI;
assign w_halfI = (iword[26:24]==3'h6) ? w_halfbits[7:0] // 8'b for LDI
:(w_halfbits[7])?
{ {(6){w_halfbits[2]}}, w_halfbits[1:0]}
:{ w_halfbits[6], w_halfbits[6:0] };
assign w_I = (iword[`CISBIT])
? {{(23-8){w_halfI[7]}}, w_halfI }
: w_fullI;
 
end else begin : GEN_NOCIS_IMMEDIATE
 
assign w_I = w_fullI;
 
end endgenerate
 
assign w_Iz = (w_I == 0);
 
 
`ifdef OPT_CIS
//
// The o_phase parameter is special. It needs to let the software
// following know that it cannot break/interrupt on an o_phase asserted
341,213 → 352,276
// half of a CIS instruction. To do this, o_phase must be asserted
// when the first instruction half is valid, but not asserted on either
// a 32-bit instruction or the second half of a 2x16-bit instruction.
reg r_phase;
initial r_phase = 1'b0;
always @(posedge i_clk)
if ((i_rst) // When no instruction is in the pipe, phase is zero
||(o_early_branch)||(w_ljmp_dly))
generate if (OPT_CIS)
begin : GEN_CIS_PHASE
reg r_phase;
 
// Phase is '1' on the first instruction of a two-part set
// But, due to the delay in processing, it's '1' when our
// output is valid for that first part, but that'll be the
// same time we are processing the second part ... so it may
// look to us like a '1' on the second half of processing.
 
// When no instruction is in the pipe, phase is zero
initial r_phase = 1'b0;
always @(posedge i_clk)
if ((i_reset)||(w_ljmp_dly))
r_phase <= 1'b0;
else if ((i_ce)&&(i_pf_valid))
r_phase <= (o_phase)? 1'b0
: ((i_instruction[31])&&(i_pf_valid));
else if (i_ce)
else if ((i_ce)&&(pf_valid))
begin
if (o_phase)
// CIS instructions only have two parts. On
// the second part (o_phase is true), return
// back to the first
r_phase <= 0;
else
r_phase <= (i_instruction[`CISBIT])&&(!i_illegal);
end else if (i_ce)
r_phase <= 1'b0;
// Phase is '1' on the first instruction of a two-part set
// But, due to the delay in processing, it's '1' when our output is
// valid for that first part, but that'll be the same time we
// are processing the second part ... so it may look to us like a '1'
// on the second half of processing.
 
assign o_phase = r_phase;
`else
assign o_phase = 1'b0;
`endif
assign o_phase = r_phase;
end else begin
assign o_phase = 1'b0;
end endgenerate
 
 
initial o_illegal = 1'b0;
always @(posedge i_clk)
if (i_rst)
o_illegal <= 1'b0;
else if (i_ce)
begin
`ifdef OPT_CIS
o_illegal <= (i_illegal);
`else
o_illegal <= ((i_illegal) || (i_instruction[31]));
`endif
if ((IMPLEMENT_MPY==0)&&((w_cis_op[4:1]==4'h5)||(w_cis_op[4:0]==5'h0c)))
o_illegal <= 1'b1;
if (i_ce)
begin
if (OPT_PIPELINED)
o_illegal <= ((i_illegal)
&&((!o_phase)||(!o_valid)))
||((o_illegal)&&(o_phase)&&(o_valid));
else
o_illegal <= (i_illegal)&&(!o_phase);
if ((!OPT_CIS)&&(i_instruction[`CISBIT]))
o_illegal <= 1'b1;
if ((!OPT_MPY)&&(w_mpy))
o_illegal <= 1'b1;
 
if ((IMPLEMENT_DIVIDE==0)&&(w_div))
o_illegal <= 1'b1;
else if ((IMPLEMENT_DIVIDE!=0)&&(w_div)&&(w_dcdR[3:1]==3'h7))
o_illegal <= 1'b1;
if ((!OPT_DIVIDE)&&(w_div))
o_illegal <= 1'b1;
else if ((OPT_DIVIDE)&&(w_div)&&(w_dcdR[3:1]==3'h7))
o_illegal <= 1'b1;
 
 
if ((IMPLEMENT_FPU==0)&&(w_fpu))
o_illegal <= 1'b1;
if ((!OPT_FPU)&&(w_fpu))
o_illegal <= 1'b1;
 
if ((w_cis_op[4:3]==2'b11)&&(w_cis_op[2:1]!=2'b00)
&&(w_dcdR[3:1]==3'h7)
&&(
(w_cis_op[2:0] != 3'h4) // BREAK
`ifdef OPT_PIPELINED
&&(w_cis_op[2:0] != 3'h5) // LOCK
`endif
// SIM instructions are always illegal
&&(w_cis_op[2:0] != 3'h7))) // NOOP
o_illegal <= 1'b1;
end
if ((!OPT_SIM)&&(w_sim))
// Simulation instructions on real hardware should
// always cause an illegal instruction error
o_illegal <= 1'b1;
 
// There are two (missing) special instructions
// These should cause an illegal instruction error
if ((w_dcdR[3:1]==3'h7)&&(w_cis_op[4:1]==4'b1101))
o_illegal <= 1'b1;
 
// If the lock function isn't implemented, this should
// also cause an illegal instruction error
if ((!OPT_LOCK)&&(w_lock))
o_illegal <= 1'b1;
end
 
initial o_pc = 0;
always @(posedge i_clk)
if (i_ce)
if ((i_ce)&&((o_phase)||(i_pf_valid)))
begin
o_pc[0] <= 1'b0;
 
if (OPT_CIS)
begin
`ifdef OPT_CIS
if (!o_phase)
o_gie<= i_gie;
 
if (iword[31])
if (iword[`CISBIT])
begin
if (o_phase)
o_pc <= o_pc + 1'b1;
else if (i_pf_valid)
o_pc <= { i_pc, 1'b1 };
o_pc[AW+1:1] <= o_pc[AW+1:1] + 1'b1;
else
o_pc <= { i_pc[AW+1:2], 1'b1, 1'b0 };
end else begin
// The normal, non-CIS case
o_pc <= { i_pc + 1'b1, 1'b0 };
o_pc <= { i_pc[AW+1:2] + 1'b1, 2'b00 };
end
`else
o_gie<= i_gie;
o_pc <= { i_pc + 1'b1, 1'b0 };
`endif
end else begin
// The normal, non-CIS case
o_pc <= { i_pc[AW+1:2] + 1'b1, 2'b00 };
end
end
 
// Under what condition will we execute this
// instruction? Only the load immediate instruction
// is completely unconditional.
o_cond <= w_cond;
// Don't change the flags on conditional instructions,
// UNLESS: the conditional instruction was a CMP
// or TST instruction.
o_wF <= w_wF;
initial o_dcdR = 0;
initial o_dcdA = 0;
initial o_dcdB = 0;
initial o_DV = 0;
initial o_FP = 0;
initial o_lock = 0;
always @(posedge i_clk)
if (i_ce)
begin
// Under what condition will we execute this
// instruction? Only the load immediate instruction
// is completely unconditional.
o_cond <= w_cond;
// Don't change the flags on conditional instructions,
// UNLESS: the conditional instruction was a CMP
// or TST instruction.
o_wF <= w_wF;
 
// Record what operation/op-code (4-bits) we are doing
// Note that LDI magically becomes a MOV
// instruction here. That way it's a pass through
// the ALU. Likewise, the two compare instructions
// CMP and TST becomes SUB and AND here as well.
// We keep only the bottom four bits, since we've
// already done the rest of the decode necessary to
// settle between the other instructions. For example,
// o_FP plus these four bits uniquely defines the FP
// instruction, o_DV plus the bottom of these defines
// the divide, etc.
o_op <= ((w_ldi)||(w_noop))? 4'hd : w_cis_op[3:0];
// Record what operation/op-code (4-bits) we are doing
// Note that LDI magically becomes a MOV
// instruction here. That way it's a pass through
// the ALU. Likewise, the two compare instructions
// CMP and TST becomes SUB and AND here as well.
// We keep only the bottom four bits, since we've
// already done the rest of the decode necessary to
// settle between the other instructions. For example,
// o_FP plus these four bits uniquely defines the FP
// instruction, o_DV plus the bottom of these defines
// the divide, etc.
o_op <= w_cis_op[3:0];
if ((w_ldi)||(w_noop)||(w_lock))
o_op <= 4'hd;
 
// Default values
o_dcdR <= { w_dcdR_cc, w_dcdR_pc, w_dcdR};
o_dcdA <= { w_dcdA_cc, w_dcdA_pc, w_dcdA};
o_dcdB <= { w_dcdB_cc, w_dcdB_pc, w_dcdB};
o_wR <= w_wR;
o_rA <= w_rA;
o_rB <= w_rB;
r_I <= w_I;
o_zI <= w_Iz;
// Default values
o_dcdR <= { w_dcdR_cc, w_dcdR_pc, w_dcdR};
o_dcdA <= { w_dcdA_cc, w_dcdA_pc, w_dcdA};
o_dcdB <= { w_dcdB_cc, w_dcdB_pc, w_dcdB};
o_wR <= w_wR;
o_rA <= w_rA;
o_rB <= w_rB;
r_I <= w_I;
o_zI <= w_Iz;
 
// Turn a NOOP into an ALU operation--subtract in
// particular, although it doesn't really matter as long
// as it doesn't take longer than one clock. Note
// also that this depends upon not setting any registers
// or flags, which should already be true.
o_ALU <= (w_ALU)||(w_ldi)||(w_cmptst)||(w_noop);
o_M <= w_mem;
o_DV <= w_div;
o_FP <= w_fpu;
// Turn a NOOP into an ALU operation--subtract in
// particular, although it doesn't really matter as long
// as it doesn't take longer than one clock. Note
// also that this depends upon not setting any registers
// or flags, which should already be true.
o_ALU <= (w_ALU)||(w_ldi)||(w_cmptst)||(w_noop)
||((!OPT_LOCK)&&(w_lock));
o_M <= w_mem;
o_DV <= (OPT_DIVIDE)&&(w_div);
o_FP <= (OPT_FPU)&&(w_fpu);
 
o_break <= (!iword[31])&&(w_op[4:0]==5'h1c)&&(
((IMPLEMENT_FPU>0)&&(w_dcdR[3:1]==3'h7))
||(IMPLEMENT_FPU==0));
`ifdef OPT_PIPELINED
r_lock <= w_lock;
`endif
`ifdef OPT_CIS
r_nxt_half <= { iword[31], iword[14:0] };
`endif
o_break <= w_break;
o_lock <= (OPT_LOCK)&&(w_lock);
 
`ifdef VERILATOR
if (OPT_CIS)
r_nxt_half <= { iword[14:0] };
else
r_nxt_half <= 0;
 
if (OPT_SIM)
begin
// Support the SIM instruction(s)
o_sim <= (!iword[31])&&(w_op[4:1] == 4'hf)
&&(w_dcdR[3:1] == 3'h7);
`else
o_sim <= (w_sim)||(w_noop);
o_sim_immv <= iword[22:0];
end else begin
o_sim <= 1'b0;
`endif
o_sim_immv <= iword[22:0];
o_sim_immv <= 0;
end
end
 
`ifdef OPT_PIPELINED
assign o_lock = r_lock;
`else
assign o_lock = 1'b0;
`endif
assign o_preA = w_dcdA;
assign o_preB = w_dcdB;
 
generate
if (EARLY_BRANCHING!=0)
begin
reg r_early_branch, r_ljmp;
reg [(AW-1):0] r_branch_pc;
generate if (OPT_EARLY_BRANCHING)
begin : GEN_EARLY_BRANCH_LOGIC
reg r_early_branch,
r_early_branch_stb,
r_ljmp;
reg [(AW+1):0] r_branch_pc;
 
initial r_ljmp = 1'b0;
always @(posedge i_clk)
if (i_rst)
if (i_reset)
r_ljmp <= 1'b0;
else if (i_ce)
begin
if ((r_ljmp)&&(pf_valid))
r_ljmp <= 1'b0;
`ifdef OPT_CIS
else if ((i_ce)&&(o_phase))
else if (o_early_branch_stb)
r_ljmp <= 1'b0;
else if (pf_valid)
begin
if ((OPT_CIS)&&(iword[`CISBIT]))
r_ljmp <= w_cis_ljmp;
else
r_ljmp <= (w_ljmp);
end else if ((OPT_CIS)&&(o_phase)&&(iword[`CISBIT]))
r_ljmp <= w_cis_ljmp;
`endif
else if ((i_ce)&&(i_pf_valid))
r_ljmp <= (w_ljmp);
end
assign o_ljmp = r_ljmp;
 
initial r_early_branch = 1'b0;
initial r_early_branch_stb = 1'b0;
always @(posedge i_clk)
if (i_rst)
r_early_branch <= 1'b0;
else if ((i_ce)&&(i_pf_valid))
if (i_reset)
begin
r_early_branch <= 1'b0;
r_early_branch_stb <= 1'b0;
end else if ((i_ce)&&(pf_valid))
begin
if (r_ljmp)
// LOD (PC),PC
r_early_branch <= 1'b1;
else if ((!iword[31])&&(iword[30:27]==`CPU_PC_REG)
begin
// LW (PC),PC
r_early_branch <= 1'b1;
r_early_branch_stb <= 1'b1;
end else if ((!iword[`CISBIT])&&(iword[30:27]==`CPU_PC_REG)
&&(w_cond[3]))
begin
if ((w_op[4:0]==5'h02)&&(!iword[18]))
if ((w_add)&&(!iword[`IMMSEL]))
begin
// Add x,PC
r_early_branch <= 1'b1;
else
r_early_branch_stb <= 1'b1;
end else begin
r_early_branch <= 1'b0;
end else
r_early_branch_stb <= 1'b0;
end
// LDI #x,PC is no longer supported
end else begin
r_early_branch <= 1'b0;
r_early_branch_stb <= 1'b0;
end
end else if (i_ce)
r_early_branch <= 1'b0;
begin
r_early_branch <= 1'b0;
r_early_branch_stb <= 1'b0;
end else
r_early_branch_stb <= 1'b0;
 
initial r_branch_pc = 0;
always @(posedge i_clk)
if (i_ce)
begin
if (r_ljmp)
r_branch_pc <= iword[(AW+1):2];
else // Add x,PC
r_branch_pc <= i_pc
+ {{(AW-15){iword[17]}},iword[16:2]}
+ {{(AW-1){1'b0}},1'b1};
if (i_ce)
begin
if (r_ljmp)
r_branch_pc <= { iword[(AW+1):2],
2'b00 };
else begin
// Add x,PC
r_branch_pc[AW+1:2] <= i_pc[AW+1:2]
+ {{(AW-15){iword[17]}},iword[16:2]}
+ {{(AW-1){1'b0}},1'b1};
r_branch_pc[1:0] <= 2'b00;
end
end
 
assign w_ljmp_dly = r_ljmp;
assign o_early_branch = r_early_branch;
assign o_early_branch_stb = r_early_branch_stb;
assign o_branch_pc = r_branch_pc;
end else begin
assign w_ljmp_dly = 1'b0;
assign o_early_branch = 1'b0;
assign o_branch_pc = {(AW){1'b0}};
assign o_early_branch = 1'b0;
assign o_early_branch_stb = 1'b0;
assign o_branch_pc = {(AW+2){1'b0}};
assign o_ljmp = 1'b0;
 
// verilator lint_off UNUSED
wire early_branch_unused;
assign early_branch_unused = w_add;
// verilator lint_on UNUSED
end endgenerate
 
 
561,14 → 635,64
// Note that we're not using iword here ... there's a lot of logic
// taking place, and it's only valid if the new word is not compressed.
//
reg r_valid;
`ifdef OPT_PIPELINED_BUS_ACCESS
initial r_pipe = 1'b0;
always @(posedge i_clk)
if (i_ce)
r_pipe <= (r_valid)&&((i_pf_valid)||(o_phase))
reg r_valid, r_insn_is_pipeable;
generate if (OPT_OPIPE)
begin : GEN_OPIPE
reg r_pipe;
 
wire [13:0] pipe_addr_diff;
assign pipe_addr_diff = w_I[13:0] - r_I[13:0];
 
// Pipeline logic is too extreme for a single clock.
// Let's break it into two clocks, using r_insn_is_pipeable
// If this function is true, then the instruction associated
// with the current output *may* have a pipeable instruction
// following it.
//
initial r_insn_is_pipeable = 1'b0;
always @(posedge i_clk)
if (i_reset)
r_insn_is_pipeable <= 1'b0;
else if ((i_ce)&&((!pf_valid)||(i_illegal))&&(!o_phase))
// Pipeline bubble, can't pipe through it
r_insn_is_pipeable <= 1'b0;
else if (o_ljmp)
r_insn_is_pipeable <= 1'b0;
else if ((i_ce)&&((!OPT_CIS)&&(i_instruction[`CISBIT])))
r_insn_is_pipeable <= 1'b0;
else if (i_ce)
begin // This is a valid instruction
r_insn_is_pipeable <= (w_mem)&&(w_rB)
// PC (and CC) registers can change
// underneath us. Therefore they cannot
// be used as a base register for piped
// memory ops
&&(w_dcdB[3:1] != 3'h7)
// Writes to PC or CC will destroy any
// possibility of pipeing--since they
// could create a jump
&&(w_dcdR[3:1] != 3'h7)
//
// Loads landing in the current address
// pointer register are not allowed,
// as they could then be used to violate
// our rule(s)
&&((w_cis_op[0])||(w_dcdB != w_dcdA));
end // else
// The pipeline is stalled
 
 
initial r_pipe = 1'b0;
always @(posedge i_clk)
if (i_reset)
r_pipe <= 1'b0;
else if (i_ce)
r_pipe <= ((pf_valid)||(o_phase))
// The last operation must be capable of
// being followed by a pipeable memory op
&&(r_insn_is_pipeable)
// Both must be memory operations
&&(w_mem)&&(o_M)
&&(w_mem)
// Both must be writes, or both stores
&&(o_op[0] == w_cis_op[0])
// Both must be register ops
575,34 → 699,1161
&&(w_rB)
// Both must use the same register for B
&&(w_dcdB[3:0] == o_dcdB[3:0])
// CC or PC registers are not valid addresses
// Captured above
// But ... the result can never be B
&&((o_op[0])
||(w_dcdB[3:0] != o_dcdA[3:0]))
// Needs to be to the mode, supervisor or user
&&(i_gie == o_gie)
// Captured above
//
// Reads to CC or PC not allowed
// &&((o_op[0])||(w_dcdR[3:1] != 3'h7))
// Prior-reads to CC or PC not allowed
// Captured above
// Same condition, or no condition before
&&((i_instruction[21:19]==o_cond[2:0])
&&((w_cond[2:0]==o_cond[2:0])
||(o_cond[2:0] == 3'h0))
// Same immediate
&&((w_I[13:2]==r_I[13:2])
||({1'b0, w_I[13:2]}==(r_I[13:2]+12'h1)));
assign o_pipe = r_pipe;
`else
assign o_pipe = 1'b0;
`endif
// Same or incrementing immediate
&&(w_I[13]==r_I[13])
&&(pipe_addr_diff <= 14'h4);
assign o_pipe = r_pipe;
end else begin
assign o_pipe = 1'b0;
always @(*)
r_insn_is_pipeable = 1'b0;
 
always @(posedge i_clk)
if (i_rst)
// verilator lint_off UNUSED
wire unused_pipable;
assign unused_pipable = r_insn_is_pipeable;
// verilator lint_on UNUSED
end endgenerate
 
initial r_valid = 1'b0;
generate if (OPT_PIPELINED)
begin : GEN_DCD_VALID
 
always @(posedge i_clk)
if (i_reset)
r_valid <= 1'b0;
else if (i_ce)
r_valid <= ((pf_valid)||(o_phase))&&(!o_ljmp);
else if (!i_stalled)
r_valid <= 1'b0;
 
end else begin : GEN_DCD_VALID
 
always @(posedge i_clk)
if (i_reset)
r_valid <= 1'b0;
else if (i_ce)
r_valid <= ((i_pf_valid)||(o_phase)||(i_illegal))
&&(!o_ljmp)&&(!o_early_branch);
else if (!i_stalled)
r_valid <= ((pf_valid)||(o_phase))&&(!o_ljmp);
else
r_valid <= 1'b0;
 
end endgenerate
 
assign o_valid = r_valid;
 
 
assign o_I = { {(32-22){r_I[22]}}, r_I[21:0] };
 
// Make Verilator happy across all our various options
// verilator lint_off UNUSED
wire [5:0] possibly_unused;
assign possibly_unused = { w_lock, w_ljmp, w_ljmp_dly, w_cis_ljmp, i_pc[1:0] };
// verilator lint_on UNUSED
`ifdef FORMAL
reg f_past_valid;
 
initial f_past_valid = 1'b0;
always @(posedge i_clk)
f_past_valid <= 1'b1;
 
`define ASSERT assert
`ifdef IDECODE
`define ASSUME assume
`else
`define ASSUME assert
`endif
always @(posedge i_clk)
if ((i_ce)&&(i_pf_valid)&&(!o_phase))
f_insn_word <= i_instruction;
always @(posedge i_clk)
if ((i_ce)&&(i_pf_valid)&&(!o_phase))
f_insn_gie = i_gie;
always @(*)
if (o_phase)
assert(r_nxt_half == f_insn_word[14:0]);
 
////////////////////////////
//
//
// Assumptions about our inputs
//
//
///////////////////////////
always @(*)
if (OPT_PIPELINED)
`ASSUME(i_ce == ((!o_valid)||(!i_stalled)));
else
`ASSUME(i_ce == !i_stalled);
 
always @(posedge i_clk)
if ((!f_past_valid)||($past(i_reset)))
begin
`ASSERT(!o_valid);
// `ASSERT(!o_illegal);
`ASSERT(!o_phase);
`ASSERT(!o_ljmp);
`ASSERT(!o_pipe);
 
`ASSUME(!i_pf_valid);
end
 
always @(posedge i_clk)
if ((f_past_valid)&&(!i_reset))
`ASSUME(i_gie == $past(i_gie));
 
`ifdef IDECODE
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(i_reset))&&(!$past(i_ce))
&&($past(f_past_valid))&&(!$past(i_reset,2))&&(!$past(i_ce,2)))
assume(i_ce);
`endif
 
reg f_new_insn, f_last_insn;
 
initial f_new_insn = 1'b0;
always @(posedge i_clk)
if (i_reset)
f_new_insn <= 1'b0;
else
f_new_insn <= ((pf_valid)&&(!i_stalled));
 
initial f_last_insn = 1'b0;
always @(posedge i_clk)
if (i_reset)
f_last_insn <= 1'b0;
else
f_last_insn <= (o_valid)&&(i_stalled);
 
always @(posedge i_clk)
if ((f_past_valid)&&(f_last_insn)&&(!i_reset))
begin
if (($past(pf_valid))&&(pf_valid))
begin
`ASSUME(i_instruction == $past(i_instruction));
`ASSUME(i_gie == $past(i_gie));
`ASSUME(i_pc == $past(i_pc));
`ASSUME(i_illegal == $past(i_illegal));
end
end
 
always @(posedge i_clk)
if ((f_past_valid)&&(o_early_branch_stb))
`ASSUME(!pf_valid);
 
always @(*)
`ASSUME(i_pc[1:0] == 2'b00);
always @(*)
if ((o_valid)&&(!o_early_branch))
`ASSERT((o_illegal)||(o_pc[1] == o_phase));
 
wire [4+21+32+1+4+1+4+11+AW+3+23-1:0] f_result;
assign f_result = { o_valid, o_phase, o_illegal,
i_gie, o_dcdR, o_dcdA, o_dcdB, o_I, o_zI, o_cond,
o_wF, o_op, o_ALU, o_M, o_DV, o_FP, o_break, o_lock,
o_wR, o_rA, o_rB, o_early_branch, o_branch_pc, o_ljmp,
o_pipe, o_sim, o_sim_immv, o_pc };
 
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(i_reset))&&(f_last_insn))
`ASSERT(f_result == $past(f_result));
 
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(i_reset))&&($past(pf_valid))
&&(!$past(o_ljmp)))
`ASSERT((!OPT_PIPELINED)||(o_valid));
 
always @(posedge i_clk)
if ((f_past_valid)&&(f_new_insn)
&&($past(pf_valid))&&($past(i_illegal))&&(!$past(o_phase)))
`ASSERT(o_illegal);
 
`ifdef IDECODE
// Let's walk through some basic instructions
// First 8-instructions, SUB - ASR
always @(*)
if ((!iword[`CISBIT])&&(iword[26:25]==2'b00))
begin
`ASSERT(!w_cmptst);
`ASSERT(!w_div);
`ASSERT(!w_mem);
`ASSERT(!w_sto);
`ASSERT(!w_ldi);
`ASSERT(!w_mov);
`ASSERT(!w_brev);
`ASSERT(!w_ldilo);
`ASSERT(!w_special);
`ASSERT(!w_fpu);
`ASSERT(!w_mpy);
`ASSERT((w_rA)&&(w_wR)&&(w_ALU));
`ASSERT(w_rB == iword[`IMMSEL]);
`ASSERT(w_dcdA[4] == i_gie);
`ASSERT(w_dcdB[4] == i_gie);
`ASSERT(w_dcdA[3:0] == iword[30:27]);
`ASSERT(w_dcdB[3:0] == iword[17:14]);
 
`ASSERT(w_cis_op == w_op);
 
`ASSERT(w_cond[3] == (iword[21:19] == 3'b000));
`ASSERT(w_cond[2:0] == iword[21:19]);
`ASSERT((w_wF == w_cond[3])||(w_dcdA[3:1]==3'b111));
end else if ((iword[`CISBIT])&&(iword[26:24]<3'b011))
begin
`ASSERT(!w_cmptst);
`ASSERT(!w_div);
`ASSERT(!w_mem);
`ASSERT(!w_sto);
`ASSERT(!w_ldi);
`ASSERT(!w_mov);
`ASSERT(!w_brev);
`ASSERT(!w_ldilo);
`ASSERT(!w_special);
`ASSERT(!w_fpu);
`ASSERT(!w_mpy);
`ASSERT((w_rA)&&(w_wR)&&(w_ALU));
`ASSERT(w_rB == iword[`CISIMMSEL]);
`ASSERT(w_dcdA[4] == i_gie);
`ASSERT(w_dcdB[4] == i_gie);
`ASSERT(w_dcdA[3:0] == iword[30:27]);
`ASSERT(w_dcdB[3:0] == iword[22:19]);
 
if (iword[26:24] == 3'b000)
`ASSERT(w_cis_op == 5'h0);
else if (iword[26:24] == 5'h01)
`ASSERT(w_cis_op == 5'h01);
else // if (iword[26:24] == 3'b010)
`ASSERT(w_cis_op == 5'h02);
 
`ASSERT(w_cond == 4'h8);
 
if (iword[`CISIMMSEL])
`ASSERT(w_I == { {(23-3){iword[18]}}, iword[18:16] });
else
`ASSERT(w_I == { {(23-7){iword[22]}}, iword[22:16] });
end else
`ASSERT(!w_add);
 
// BREV and LDILO
always @(*)
if ((!iword[`CISBIT])&&((w_cis_op == 5'h8)
||(w_cis_op == 5'h09)))
begin
`ASSERT(!w_mpy);
`ASSERT(!w_div);
`ASSERT(!w_cmptst);
`ASSERT(!w_mem);
`ASSERT(!w_sto);
`ASSERT(!w_ldi);
`ASSERT(!w_mov);
if (w_cis_op == 5'h8)
begin
`ASSERT(w_brev);
`ASSERT(!w_ldilo);
`ASSERT((!w_rA)&&(w_wR)&&(w_ALU));
end else begin// if (w_cis_op == 5'h9)
`ASSERT(w_ldilo);
`ASSERT(!w_brev);
`ASSERT((w_rA)&&(w_wR)&&(w_ALU));
end
`ASSERT(!w_special);
`ASSERT(!w_fpu);
`ASSERT(w_rB == iword[`IMMSEL]);
`ASSERT(w_dcdA[4] == i_gie);
`ASSERT(w_dcdB[4] == i_gie);
`ASSERT(w_dcdA[3:0] == iword[30:27]);
`ASSERT(w_dcdB[3:0] == iword[17:14]);
 
`ASSERT(w_cis_op == w_op);
 
`ASSERT(w_cond[3] == (iword[21:19] == 3'b000));
`ASSERT(w_cond[2:0] == iword[21:19]);
`ASSERT(!w_wF);
end else begin
`ASSERT(!w_brev);
`ASSERT(!w_ldilo);
end
 
//
// Multiply instructions
always @(*)
if ((!iword[`CISBIT])&&((w_cis_op == 5'ha)
||(w_cis_op == 5'h0b)
||(w_cis_op == 5'h0c)))
begin
`ASSERT(w_mpy);
`ASSERT(!w_div);
`ASSERT(!w_cmptst);
`ASSERT(!w_mem);
`ASSERT(!w_sto);
`ASSERT(!w_ldi);
`ASSERT(!w_mov);
`ASSERT(!w_brev);
`ASSERT(!w_ldilo);
`ASSERT(!w_special);
`ASSERT(!w_fpu);
`ASSERT((w_rA)&&(w_wR)&&(w_ALU));
`ASSERT(w_rB == iword[`IMMSEL]);
`ASSERT(w_dcdA[4] == i_gie);
`ASSERT(w_dcdB[4] == i_gie);
`ASSERT(w_dcdA[3:0] == iword[30:27]);
`ASSERT(w_dcdB[3:0] == iword[17:14]);
 
`ASSERT(w_cis_op == w_op);
 
`ASSERT(w_cond[3] == (iword[21:19] == 3'b000));
`ASSERT(w_cond[2:0] == iword[21:19]);
`ASSERT((w_wF == w_cond[3])||(w_dcdA[3:1]==3'b111));
end else
`ASSERT(!w_mpy);
 
//
// Move instruction
always @(*)
if ((!iword[`CISBIT])&&((w_cis_op == 5'hd)))
begin
`ASSERT(w_mov);
`ASSERT(!w_div);
`ASSERT(!w_mpy);
`ASSERT(!w_cmptst);
`ASSERT(!w_mem);
`ASSERT(!w_sto);
`ASSERT(!w_ldi);
`ASSERT(!w_brev);
`ASSERT(!w_ldilo);
`ASSERT(!w_special);
`ASSERT(!w_fpu);
`ASSERT((!w_rA)&&(w_wR)&&(w_ALU));
`ASSERT(w_rB);
`ASSERT(w_dcdA[4] == ((i_gie)||(iword[`IMMSEL])));
`ASSERT(w_dcdB[4] == ((i_gie)||(iword[13])));
`ASSERT(w_dcdA[3:0] == iword[30:27]);
`ASSERT(w_dcdB[3:0] == iword[17:14]);
 
`ASSERT(w_cis_op == w_op);
 
`ASSERT(w_cond[3] == (iword[21:19] == 3'b000));
`ASSERT(w_cond[2:0] == iword[21:19]);
`ASSERT(!w_wF);
end else if ((iword[`CISBIT])&&(iword[26:24]==3'b111))
begin
`ASSERT(w_mov);
`ASSERT(!w_div);
`ASSERT(!w_mpy);
`ASSERT(!w_cmptst);
`ASSERT(!w_mem);
`ASSERT(!w_sto);
`ASSERT(!w_ldi);
`ASSERT(!w_brev);
`ASSERT(!w_ldilo);
`ASSERT(!w_special);
`ASSERT(!w_fpu);
`ASSERT((!w_rA)&&(w_wR)&&(w_ALU));
`ASSERT(w_rB);
`ASSERT(w_dcdA[4] == (i_gie));
`ASSERT(w_dcdB[4] == (i_gie));
`ASSERT(w_dcdA[3:0] == iword[30:27]);
`ASSERT(w_dcdB[3:0] == iword[22:19]);
 
`ASSERT(w_cis_op == 5'h0d);
 
`ASSERT(w_cond == 4'h8);
`ASSERT(!w_wF);
end else
`ASSERT(!w_mov);
 
//
// Divide instruction
always @(*)
if ((!iword[`CISBIT])&&(iword[26:23]==4'b0111))
begin
`ASSERT(w_div);
`ASSERT(!w_cmptst);
`ASSERT(!w_mem);
`ASSERT(!w_sto);
`ASSERT(!w_ldi);
`ASSERT(!w_mov);
`ASSERT(!w_brev);
`ASSERT(!w_ldilo);
`ASSERT(!w_special);
`ASSERT(!w_fpu);
`ASSERT(!w_mpy);
`ASSERT((w_rA)&&(w_wR));
`ASSERT(w_rB == iword[`IMMSEL]);
`ASSERT(w_dcdA[4] == i_gie);
`ASSERT(w_dcdB[4] == i_gie);
`ASSERT(w_dcdA[3:0] == iword[30:27]);
`ASSERT(w_dcdB[3:0] == iword[17:14]);
 
`ASSERT(w_cis_op == w_op);
 
`ASSERT(w_cond[3] == (iword[21:19] == 3'b000));
`ASSERT(w_cond[2:0] == iword[21:19]);
`ASSERT((w_wF == w_cond[3])||(w_dcdA[3:1]==3'b111));
end else
`ASSERT(!w_div);
 
//
// Comparison instructions
always @(*)
if ((!iword[`CISBIT])&&(iword[26:23]==4'b1000))
begin
`ASSERT(w_cmptst);
`ASSERT(!w_div);
`ASSERT(!w_mem);
`ASSERT(!w_sto);
`ASSERT(!w_ldi);
`ASSERT(!w_mov);
`ASSERT(!w_brev);
`ASSERT(!w_ldilo);
`ASSERT(!w_special);
`ASSERT(!w_fpu);
`ASSERT(!w_mpy);
`ASSERT((w_rA)&&(!w_wR)&&(!w_ALU));
`ASSERT(w_rB == iword[`IMMSEL]);
`ASSERT(w_dcdA[4] == i_gie);
`ASSERT(w_dcdB[4] == i_gie);
`ASSERT(w_dcdA[3:0] == iword[30:27]);
`ASSERT(w_dcdB[3:0] == iword[17:14]);
 
`ASSERT(w_cis_op == w_op);
 
`ASSERT(w_cond[3] == (iword[21:19] == 3'b000));
`ASSERT(w_cond[2:0] == iword[21:19]);
`ASSERT(w_wF);
end else if ((iword[`CISBIT])&&(iword[26:24]==3'b011))
begin
`ASSERT(w_cmptst);
`ASSERT(!w_div);
`ASSERT(!w_mem);
`ASSERT(!w_sto);
`ASSERT(!w_ldi);
`ASSERT(!w_mov);
`ASSERT(!w_brev);
`ASSERT(!w_ldilo);
`ASSERT(!w_special);
`ASSERT(!w_fpu);
`ASSERT(!w_mpy);
`ASSERT((w_rA)&&(!w_wR)&&(!w_ALU));
`ASSERT(w_rB == iword[`CISIMMSEL]);
`ASSERT(w_dcdA[4] == i_gie);
`ASSERT(w_dcdB[4] == i_gie);
`ASSERT(w_dcdA[3:0] == iword[30:27]);
`ASSERT(w_dcdB[3:0] == iword[22:19]);
 
`ASSERT(w_cis_op == 5'h10);
 
`ASSERT(w_cond == 4'h8);
if (iword[`CISIMMSEL])
`ASSERT(w_I == { {(23-3){iword[18]}}, iword[18:16] });
else
`ASSERT(w_I == { {(23-7){iword[22]}}, iword[22:16] });
`ASSERT(w_wF);
end else
`ASSERT(!w_cmptst);
 
always @(posedge i_clk)
if ((f_new_insn)&&($past(w_cmptst)))
`ASSERT(o_ALU);
 
//
// Memory instructions
always @(*)
if ((!iword[`CISBIT])&&(
(iword[26:23]==4'b1001) // Word
||(iword[26:23]==4'b1010) // Half-word, or short
||(iword[26:23]==4'b1011))) // Byte ops
begin
`ASSERT(w_mem);
`ASSERT(w_sto == iword[22]);
`ASSERT(!w_cmptst);
`ASSERT(!w_div);
`ASSERT(!w_ldi);
`ASSERT(!w_mov);
`ASSERT(!w_brev);
`ASSERT(!w_ldilo);
`ASSERT(!w_special);
`ASSERT(!w_fpu);
`ASSERT(!w_mpy);
if (w_sto)
`ASSERT((w_rA)&&(!w_wR));
else
`ASSERT((!w_rA)&&(w_wR));
`ASSERT(!w_ALU);
`ASSERT(w_rB == iword[`IMMSEL]);
`ASSERT(w_dcdA[4] == i_gie);
`ASSERT(w_dcdB[4] == i_gie);
`ASSERT(w_dcdA[3:0] == iword[30:27]);
`ASSERT(w_dcdB[3:0] == iword[17:14]);
 
`ASSERT(w_cis_op == w_op);
 
`ASSERT(w_cond[3] == (iword[21:19] == 3'b000));
`ASSERT(w_cond[2:0] == iword[21:19]);
`ASSERT(!w_wF);
end else if ((iword[`CISBIT])&&(iword[26:25]==2'b10))
begin
`ASSERT(w_mem);
`ASSERT(w_sto == iword[24]);
`ASSERT(!w_cmptst);
`ASSERT(!w_div);
`ASSERT(!w_ldi);
`ASSERT(!w_mov);
`ASSERT(!w_brev);
`ASSERT(!w_ldilo);
`ASSERT(!w_special);
`ASSERT(!w_fpu);
`ASSERT(!w_mpy);
if (w_sto)
`ASSERT((w_rA)&&(!w_wR));
else
`ASSERT((!w_rA)&&(w_wR));
`ASSERT(!w_ALU);
`ASSERT(w_rB);
`ASSERT(w_dcdA[4] == i_gie);
`ASSERT(w_dcdB[4] == i_gie);
`ASSERT(w_dcdA[3:0] == iword[30:27]);
if (iword[`CISIMMSEL])
`ASSERT(w_dcdB[3:0] == iword[22:19]);
else
`ASSERT(w_dcdB[3:0] == `CPU_SP_REG);
 
if (w_sto)
`ASSERT(w_cis_op == 5'h13);
else
`ASSERT(w_cis_op == 5'h12);
 
`ASSERT(w_cond == 4'h8);
`ASSERT(!w_wF);
end else begin
`ASSERT(!w_sto);
`ASSERT(!w_mem);
end
 
always @(*)
if (w_sto)
`ASSERT(w_mem);
 
//
// LDI -- Load immediate
always @(*)
if ((!iword[`CISBIT])&&(w_op[4:1] == 4'hc))
begin
`ASSERT(w_ldi);
`ASSERT(!w_mpy);
`ASSERT(!w_div);
`ASSERT(!w_cmptst);
`ASSERT(!w_mem);
`ASSERT(!w_sto);
`ASSERT(!w_mov);
`ASSERT(!w_brev);
`ASSERT(!w_ldilo);
`ASSERT((!w_rA)&&(w_wR)&&(!w_ALU));
`ASSERT(!w_special);
`ASSERT(!w_fpu);
`ASSERT(w_rB == 1'b0);
`ASSERT(w_dcdA[4] == i_gie);
`ASSERT(w_dcdB[4] == i_gie);
`ASSERT(w_dcdA[3:0] == iword[30:27]);
`ASSERT(w_dcdB[3:0] == iword[17:14]);
 
`ASSERT(w_cis_op == w_op);
 
`ASSERT(w_cond == 4'h8);
`ASSERT(!w_wF);
 
`ASSERT(w_Iz == (iword[22:0] == 0));
`ASSERT(w_I[22:0] == iword[22:0]);
end else if ((iword[`CISBIT])&&(iword[26:24] == 3'b110))
begin
`ASSERT(w_ldi);
`ASSERT(!w_mpy);
`ASSERT(!w_div);
`ASSERT(!w_cmptst);
`ASSERT(!w_mem);
`ASSERT(!w_sto);
`ASSERT(!w_mov);
`ASSERT(!w_brev);
`ASSERT(!w_ldilo);
`ASSERT((!w_rA)&&(w_wR)&&(!w_ALU));
`ASSERT(!w_special);
`ASSERT(!w_fpu);
`ASSERT(w_rB == 1'b0);
`ASSERT(w_dcdA[4] == i_gie);
`ASSERT(w_dcdA[3:0] == iword[30:27]);
 
`ASSERT(w_cis_op[4:1] == 4'hc);
 
`ASSERT(w_cond == 4'h8);
`ASSERT(!w_wF);
 
`ASSERT(w_Iz == (iword[23:16] == 0));
`ASSERT(w_I[22:0] == { {(23-8){iword[23]}}, iword[23:16] });
end else
`ASSERT(!w_ldi);
`endif // IDECODE
 
always @(posedge i_clk)
if ((f_new_insn)&&($past(w_ldi)))
`ASSERT(o_ALU);
 
`ifdef IDECODE
always @(*)
if ((w_break)||(w_lock)||(w_sim)||(w_noop))
`ASSERT(w_special);
 
 
//
// FPU -- Floating point instructions
always @(*)
if ((!iword[`CISBIT])&&(OPT_FPU)&&(
(w_cis_op[4:1] == 4'hd)
||(w_cis_op[4:1] == 4'he)
||(w_cis_op[4:1] == 4'hf))
&&(iword[30:28] != 3'h7))
begin
`ASSERT(w_fpu);
`ASSERT(!w_ldi);
`ASSERT(!w_mpy);
`ASSERT(!w_div);
`ASSERT(!w_cmptst);
`ASSERT(!w_mem);
`ASSERT(!w_sto);
`ASSERT(!w_mov);
`ASSERT(!w_brev);
`ASSERT(!w_ldilo);
`ASSERT((w_wR)&&(!w_ALU));
if ((w_cis_op == 5'he)||(w_cis_op == 5'hf))
`ASSERT(!w_rA);
else
`ASSERT(w_rA);
`ASSERT(!w_special);
`ASSERT(w_rB == iword[`IMMSEL]);
`ASSERT(w_dcdA[4] == i_gie);
`ASSERT(w_dcdB[4] == i_gie);
`ASSERT(w_dcdA[3:0] == iword[30:27]);
`ASSERT(w_dcdB[3:0] == iword[17:14]);
 
`ASSERT(w_cis_op == w_op);
 
`ASSERT(w_cond[3] == (iword[21:19] == 3'b000));
`ASSERT(w_cond[2:0] == iword[21:19]);
`ASSERT((w_wF == w_cond[3])||(w_dcdA[3:1]==3'b111));
end else
`ASSERT((!w_fpu)||(!OPT_FPU));
 
//
// Special instructions
always @(*)
if ((!iword[`CISBIT])&&(
(w_cis_op == 5'h1c)
||(w_cis_op == 5'h1d)
||(w_cis_op == 5'h1e)
||(w_cis_op == 5'h1f))
&&((iword[30:28] == 3'h7)||(!OPT_FPU)))
begin
`ASSERT(w_special);
if (w_cis_op == 5'h1c)
begin
`ASSERT(w_break);
`ASSERT(!w_lock);
`ASSERT(!w_sim);
`ASSERT(!w_noop);
end else if (w_cis_op == 5'h1d)
begin
`ASSERT(!w_break);
`ASSERT( w_lock);
`ASSERT(!w_sim);
`ASSERT(!w_noop);
end else if (w_cis_op == 5'h1e)
begin
`ASSERT(!w_break);
`ASSERT(!w_lock);
`ASSERT( w_sim);
`ASSERT(!w_noop);
end else begin
`ASSERT(!w_break);
`ASSERT(!w_lock);
`ASSERT(!w_sim);
`ASSERT( w_noop);
end
`ASSERT((!w_fpu)||(!OPT_FPU));
`ASSERT(!w_ldi);
`ASSERT(!w_mpy);
`ASSERT(!w_div);
`ASSERT(!w_cmptst);
`ASSERT(!w_mem);
`ASSERT(!w_sto);
`ASSERT(!w_mov);
`ASSERT(!w_brev);
`ASSERT(!w_ldilo);
 
`ASSERT((!w_rA)&&(!w_rB)&&(!w_wR)&&(!w_ALU));
 
`ASSERT(w_cis_op == w_op);
 
`ASSERT(w_cond == 4'h8);
`ASSERT(!w_wF);
end else begin
`ASSERT(!w_special);
`ASSERT(!w_break);
`ASSERT(!w_lock);
`ASSERT(!w_sim);
`ASSERT(!w_noop);
end
`endif
 
generate if (OPT_EARLY_BRANCHING)
begin
always @(posedge i_clk)
if ((f_past_valid)&&($past(i_ce))&&(!$past(i_reset))&&(!i_reset))
begin
if ($past(pf_valid))
begin
if ($past(o_ljmp))
begin
// 2nd half of LW (PC),PC
`ASSERT(o_early_branch);
`ASSERT(o_early_branch_stb);
end else if ((!$past(iword[`CISBIT]))&&($past(w_add))
&&(!$past(w_rB))
&&($past(w_cond[3]))
&&(o_dcdR[4:0]=={ i_gie, 4'hf }))
begin
// ADD #x,PC
`ASSERT(o_early_branch);
`ASSERT(o_early_branch_stb);
end else if ((!$past(iword[`CISBIT]))
&&($past(w_cis_op == 5'h12))
&&($past(w_rB))
&&($past(w_cond[3]))
&&(o_zI)
&&(o_dcdB[4:0]=={ i_gie, 4'hf })
&&(o_dcdR[4:0]=={ i_gie, 4'hf }))
begin
// LW (PC),PC
`ASSERT(!o_early_branch);
`ASSERT(!o_early_branch_stb);
end else if ((OPT_CIS)&&($past(o_phase))
&&($past(w_cis_op == 5'h12))
&&($past(w_rB))
&&($past(w_cond[3]))
&&($past(w_Iz))
&&($past(w_dcdB_pc))
&&($past(w_dcdR_pc))
&&(o_dcdR[4:0]=={ i_gie, 4'hf }))
begin
// (CIS) LW (PC),PC
`ASSERT(!o_early_branch);
`ASSERT(!o_early_branch_stb);
end else begin
`ASSERT(!o_early_branch);
end
end else if ((OPT_CIS)&&($past(o_phase)))
begin
if (($past(w_cis_op == 5'h12))
&&($past(w_rB))
&&($past(w_cond[3]))
&&($past(w_Iz))
&&($past(w_dcdB_pc))
&&($past(w_dcdR_pc)))
begin
// (CIS) LW (PC),PC
`ASSERT(!o_early_branch);
`ASSERT(!o_early_branch_stb);
end else begin
`ASSERT(!o_early_branch);
`ASSERT(!o_early_branch_stb);
end
end
end else if (!i_reset)
`ASSERT(!o_early_branch_stb);
 
// // CIS instruction 16'hfcf8 decodes into:
// // 1.1111.100.1.1111.0000
// // = LW (PC),PC
// always @(*)
// assume(i_instruction[31:16] != 16'hfcf8);
 
end else begin
always @(*)
`ASSERT(!o_early_branch_stb);
always @(*)
`ASSERT(!o_early_branch);
end endgenerate
 
always @(*)
if (o_early_branch_stb)
`ASSERT(o_early_branch);
always @(posedge i_clk)
if ((f_past_valid)&&($past(o_early_branch_stb))&&(!$past(pf_valid)))
`ASSERT(!o_early_branch_stb);
 
always @(*)
if (!OPT_LOCK)
`ASSERT(!o_lock);
 
generate if (OPT_CIS)
begin : F_OPT_CIS
always @(*)
if ((OPT_PIPELINED)&&(!o_valid))
`ASSERT(!o_phase);
 
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(i_reset)))
begin
if ((o_phase)&&($past(i_ce)))
`ASSERT((iword[30:16] == $past(i_instruction[14:0]))
&&(iword[`CISBIT]));
else if (!o_phase)
`ASSERT(iword == i_instruction);
 
if ((!$past(o_phase))&&($past(i_ce))
&&($past(pf_valid))
&&(!$past(i_illegal))
&&(!$past(w_ljmp_dly))
&&($past(i_instruction[`CISBIT]))
&&((!$past(w_dcdR_pc))
||(!$past(w_wR))))
`ASSERT(o_phase);
else if (($past(o_phase))&&($past(i_ce)))
`ASSERT(!o_phase);
if (($past(i_ce))&&(!$past(o_phase))
&&($past(i_illegal))&&($past(i_pf_valid)))
`ASSERT((o_illegal)&&(!o_phase));
 
`ASSERT((!o_phase)||(!o_ljmp));
end
 
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(i_stalled))&&($past(pf_valid))
&&($past(i_ce)))
begin
`ASSERT(o_pc[0] == 1'b0);
if (!$past(iword[`CISBIT]))
begin
`ASSERT(o_pc[1:0]==2'b00);
`ASSERT(o_pc[AW+1:2] == $past(i_pc[AW+1:2])+1'b1);
end else if ($past(iword[`CISBIT])&&($past(o_phase)))
`ASSERT(o_pc[(AW+1):1] == $past(o_pc[(AW+1):1]) + 1'b1);
else if ($past(iword[`CISBIT]))
begin
`ASSERT(o_pc[(AW+1):1] == { $past(i_pc[(AW+1):2]), 1'b1});
if (o_valid)
begin
`ASSERT(o_pc[1]);
`ASSERT((o_illegal)||(o_phase));
end
end
end
 
 
always @(*)
if (iword[`CISBIT])
begin
`ASSERT((!w_ldi)||(w_I == { {(23-8){iword[23]}}, iword[23:16] }));
`ASSERT((w_ldi)||(iword[`CISIMMSEL])
||(w_I == { {(23-7){iword[22]}}, iword[22:16] }));
`ASSERT((w_ldi)||(!iword[`CISIMMSEL])
||(w_I == { {(23-3){iword[18]}}, iword[18:16] }));
end else begin
`ASSERT((!w_ldi)||(w_I == iword[22:0]));
`ASSERT((!w_mov)||(w_I == { {(23-13){iword[12]}}, iword[12:0] }));
`ASSERT((w_ldi)||(w_mov)||(iword[`IMMSEL])
||(w_I == { {(23-18){iword[17]}}, iword[17:0] }));
`ASSERT((w_ldi)||(w_mov)||(!iword[`IMMSEL])
||(w_I == { {(23-14){iword[13]}}, iword[13:0] }));
end
 
always @(posedge i_clk)
if ((f_past_valid)&&(o_phase)&&($past(i_ce)))
`ASSERT(($past(i_instruction[`CISBIT]))
&&(r_nxt_half[14:0]==$past(i_instruction[14:0])));
end else begin
 
always @(*)
begin
`ASSERT((o_phase)||(iword[30:0] == i_instruction[30:0]));
`ASSERT(o_phase == 1'b0);
`ASSERT(o_pc[0] == 1'b0);
end
 
always @(posedge i_clk)
if ((f_past_valid)&&($past(i_ce))&&($past(i_pf_valid)))
`ASSERT(o_pc[AW+1:2] == $past(i_pc[AW+1:2]) + 1'b1);
else if (f_past_valid)
`ASSERT(o_pc == $past(o_pc));
 
always @(*)
`ASSERT(o_pc[1:0] == 2'b00);
 
always @(*)
`ASSERT((!w_ldi)||(w_I == iword[22:0]));
always @(*)
`ASSERT((!w_mov)||(w_I == { {(23-13){iword[12]}}, iword[12:0] }));
always @(*)
`ASSERT((w_ldi)||(w_mov)||(iword[`IMMSEL])
||(w_I == { {(23-18){iword[17]}}, iword[17:0] }));
always @(*)
`ASSERT((w_ldi)||(w_mov)||(!iword[`IMMSEL])
||(w_I == { {(23-14){iword[13]}}, iword[13:0] }));
 
always @(posedge i_clk)
if ((f_past_valid)&&($past(i_ce))&&(!$past(i_reset)))
`ASSERT((!$past(i_instruction[`CISBIT]))
||(!$past(pf_valid))||(o_illegal));
end endgenerate
 
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(i_reset))&&($past(i_ce))&&($past(w_fpu)))
begin
if (OPT_FPU)
`ASSERT(o_FP);
else if (!$past(w_special))
`ASSERT(o_illegal);
end
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(i_reset))&&($past(i_ce))&&($past(w_lock)))
begin
if (OPT_LOCK)
`ASSERT(o_lock);
else
`ASSERT(o_illegal);
end
 
wire [20:0] f_next_pipe_I, f_this_pipe_I;
assign f_this_pipe_I = r_I[22:2];
assign f_next_pipe_I = r_I[22:2]+1'b1;
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(i_reset)))
begin
if (OPT_OPIPE)
begin
if (($past(i_ce))
&&(($past(pf_valid))||($past(o_phase))))
begin
if ((!$past(o_M))||(!o_M))
`ASSERT(!o_pipe);
else if ($past(o_op[0])!=o_op[0])
`ASSERT(!o_pipe);
else if ($past(o_rB)!=o_rB)
`ASSERT(!o_pipe);
else if ((o_rB)&&($past(o_dcdB) != o_dcdB))
`ASSERT(!o_pipe);
else if (($past(o_wR))
&&($past(o_dcdR[3:1]) == 3'h7))
`ASSERT(!o_pipe);
// else if ((o_wR)&&(o_dcdR[3:1] == 3'h7))
// `ASSERT(!o_pipe);
else if (o_wR != $past(o_wR))
`ASSERT(!o_pipe);
else if ((o_wR)&&($past(o_dcdR) == o_dcdB))
`ASSERT(!o_pipe);
else if ((o_wR)&&(o_dcdB[3:1] == 3'h7))
`ASSERT(!o_pipe);
else if (($past(o_cond) != 4'h8)
&&($past(o_cond) != o_cond))
`ASSERT(!o_pipe);
else if ($past(r_I[22])!=r_I[22])
`ASSERT(!o_pipe);
else if (r_I[22:0] - $past(r_I[22:0])>23'h4)
`ASSERT(!o_pipe);
else if (!$past(o_valid))
`ASSERT(!o_pipe);
// else
// assert(o_pipe);
end else if ($past(i_stalled))
`ASSERT(o_pipe == $past(o_pipe));
end
end
 
always @(*)
`ASSERT((OPT_OPIPE)||(!o_pipe));
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(i_reset))&&($past(i_ce))
&&($past(i_pf_valid))&&($past(w_mpy)))
`ASSERT((OPT_MPY)||(o_illegal));
 
always @(*)
if (o_valid)
`ASSERT((!o_phase)||(!o_early_branch));
 
always @(posedge i_clk)
if ((f_past_valid)&&($past(o_valid))&&($past(o_ljmp))&&($past(!i_stalled)))
`ASSERT(!o_valid);
 
always @(posedge i_clk)
if ((f_past_valid)&&($past(o_early_branch_stb)))
begin
`ASSERT(!o_phase);
if (!$past(i_stalled))
`ASSERT(!o_valid);
`ASSERT(!o_ljmp);
end
 
// Unless another valid instruction comes along, once o_ljmp is asserted
// it should stay asserted until either a reset or an early branch
// strobe.
always @(posedge i_clk)
if ((OPT_EARLY_BRANCHING)&&(f_past_valid)
&&($past(o_ljmp))&&(!$past(pf_valid))
&&(!$past(i_reset))&&(!$past(o_early_branch_stb)))
`ASSERT(o_ljmp);
 
// o_ljmp should only ever be asserted following a valid prefetch
// input. Hence, if the prefetch input isn't valid, then o_ljmp
// should be left low
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(o_ljmp))
&&( (!$past(pf_valid)) || (!$past(i_ce)) )
&&( !$past(o_phase) )
&&(!$past(i_reset))&&(!$past(o_early_branch_stb)))
`ASSERT(!o_ljmp);
 
always @(posedge i_clk)
if ((OPT_EARLY_BRANCHING)&&(f_past_valid)&&($past(o_ljmp))&&(!o_ljmp)
&&(!$past(i_reset)))
`ASSERT((o_early_branch_stb)&&(!o_valid));
 
always @(posedge i_clk)
`ASSERT((!o_early_branch_stb)||(!o_ljmp));
 
always @(posedge i_clk)
`ASSERT((!o_valid)||(!o_ljmp)||(o_phase == o_pc[1]));
 
always @(posedge i_clk)
if (!OPT_CIS)
`ASSERT(!o_phase);
else if (!f_insn_word[31])
`ASSERT(!o_phase);
else if (o_phase)
`ASSERT(o_pc[1]);
 
always @(*)
if ((o_early_branch)&&(!o_early_branch_stb))
`ASSERT(!o_pipe);
 
always @(*)
if (o_ljmp)
`ASSERT(!o_pipe);
 
always @(*)
`ASSERT(o_dcdR == o_dcdA);
 
always @(*)
if ((o_valid)&&(o_phase))
begin
`ASSERT(!o_illegal);
`ASSERT(o_pc[1]);
`ASSERT(f_insn_word[31]);
end
 
always @(*)
`ASSERT(o_branch_pc[1:0] == 2'b00);
always @(*)
`ASSERT(o_pc[0] == 1'b0);
always @(posedge i_clk)
if ((f_past_valid)&&($past(i_pf_valid))&&(i_pf_valid))
`ASSUME((i_reset)||($stable(i_gie)));
 
wire fc_illegal, fc_wF, fc_ALU, fc_M, fc_DV, fc_FP, fc_break,
fc_lock, fc_wR, fc_rA, fc_rB, fc_prepipe, fc_sim;
wire [6:0] fc_dcdR, fc_dcdA, fc_dcdB;
wire [31:0] fc_I;
wire [3:0] fc_cond;
wire [3:0] fc_op;
wire [22:0] fc_sim_immv;
f_idecode #(
.ADDRESS_WIDTH(AW),
.OPT_MPY(OPT_MPY),
.OPT_EARLY_BRANCHING(OPT_EARLY_BRANCHING),
.OPT_DIVIDE(OPT_DIVIDE),
.OPT_FPU(OPT_FPU),
.OPT_CIS(OPT_CIS),
.OPT_LOCK(OPT_LOCK),
.OPT_OPIPE(OPT_OPIPE),
.OPT_SIM(OPT_SIM)
) formal_decoder(
f_insn_word, o_phase, f_insn_gie,
fc_illegal,
fc_dcdR, fc_dcdA,fc_dcdB, fc_I, fc_cond, fc_wF, fc_op,
fc_ALU, fc_M, fc_DV, fc_FP, fc_break, fc_lock,
fc_wR, fc_rA, fc_rB, fc_prepipe, fc_sim, fc_sim_immv);
 
always @(posedge i_clk)
if ((o_valid)&&(fc_illegal))
assert(o_illegal);
 
always @(posedge i_clk)
if ((o_valid)&&(!o_illegal))
begin
`ASSERT(fc_dcdR== o_dcdR); //
`ASSERT(fc_dcdA== o_dcdA); //
`ASSERT(fc_dcdB== o_dcdB); //
`ASSERT(fc_I == o_I);
`ASSERT(o_zI == (fc_I == 0));
`ASSERT(fc_cond== o_cond);
`ASSERT(fc_wF == o_wF);
`ASSERT(fc_op == o_op);
`ASSERT(fc_ALU == o_ALU);
`ASSERT(fc_M == o_M);
`ASSERT(fc_DV == o_DV);
`ASSERT(fc_FP == o_FP);
`ASSERT(fc_break== o_break);
`ASSERT(fc_lock == o_lock);
`ASSERT(fc_wR == o_wR);
`ASSERT(fc_rA == o_rA);
`ASSERT(fc_rB == o_rB);
`ASSERT(fc_sim == o_sim);
`ASSERT(fc_sim_immv == o_sim_immv);
`ASSERT(fc_prepipe == r_insn_is_pipeable);
end else
`ASSERT(!r_insn_is_pipeable);
 
always @(*)
if (o_phase)
`ASSERT(r_nxt_half[14:0] == f_insn_word[14:0]);
 
always @(posedge i_clk)
if ((f_past_valid)&&($past(i_ce))&&(o_valid)&&(!$past(i_reset)))
begin
`ASSERT(((fc_illegal)
||$past((i_illegal)&&(!o_phase))
||$past((o_illegal)&&( o_phase)))== o_illegal);
end
 
always @(posedge i_clk)
if ((!o_valid)||(o_illegal))
`ASSERT(!r_insn_is_pipeable);
 
generate if ((OPT_CIS)&&(OPT_EARLY_BRANCHING))
begin
 
always @(*)
if ((o_valid)
// LW
&&(o_M)&&(o_op[2:0]==3'b010)
// Zero immediate
&&(o_zI)
// Unconditional
&&(o_cond[3])
// From PC to PC
&&(o_dcdR[5])&&(o_dcdB[5]))
`ASSERT((o_ljmp)
||((f_insn_word[31])&&(o_phase || o_illegal)));
else if (o_valid)
`ASSERT(!o_ljmp);
 
end endgenerate
 
`endif // FORMAL
endmodule
/iscachable.v
0,0 → 1,60
////////////////////////////////////////////////////////////////////////////////
//
// Filename: iscachable.v
//
// Project: Zip CPU -- a small, lightweight, RISC CPU soft core
//
// Purpose: A helper function to both dcache and its formal properties,
// used to determine when a particular address is cachable. This
// module must be built of entirely combinatorial logic and nothing more.
//
// Creator: Dan Gisselquist, Ph.D.
// Gisselquist Technology, LLC
//
////////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2018-2019, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
// by the Free Software Foundation, either version 3 of the License, or (at
// your option) any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// License: GPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/gpl.html
//
//
////////////////////////////////////////////////////////////////////////////////
//
//
`default_nettype none
//
module iscachable(i_addr, o_cachable);
parameter ADDRESS_WIDTH=30;
localparam AW = ADDRESS_WIDTH; // Just for ease of notation below
parameter [AW-1:0] SDRAM_ADDR = 0, SDRAM_MASK = 0;
parameter [AW-1:0] BKRAM_ADDR = 30'h4000000,
BKRAM_MASK = 30'h4000000;
parameter [AW-1:0] FLASH_ADDR = 0, FLASH_MASK = 0;
 
input wire [AW-1:0] i_addr;
output reg o_cachable;
 
 
always @(*)
begin
o_cachable = 1'b0;
if ((SDRAM_ADDR !=0)&&((i_addr & SDRAM_MASK)== SDRAM_ADDR))
o_cachable = 1'b1;
else if ((FLASH_ADDR !=0)&&((i_addr & FLASH_MASK)== FLASH_ADDR))
o_cachable = 1'b1;
else if ((BKRAM_ADDR !=0)&&((i_addr & BKRAM_MASK)== BKRAM_ADDR))
o_cachable = 1'b1;
end
 
endmodule
/memops.v
6,7 → 6,7
//
// Purpose: A memory unit to support a CPU.
//
// In the interests of code simplicity, this memory operator is
// In the interests of code simplicity, this memory operator is
// susceptible to unknown results should a new command be sent to it
// before it completes the last one. Unpredictable results might then
// occurr.
19,7 → 19,7
//
////////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2015,2017, Gisselquist Technology, LLC
// Copyright (C) 2015,2017-2019, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
43,22 → 43,32
////////////////////////////////////////////////////////////////////////////////
//
//
module memops(i_clk, i_rst, i_stb, i_lock,
`default_nettype none
//
module memops(i_clk, i_reset, i_stb, i_lock,
i_op, i_addr, i_data, i_oreg,
o_busy, o_valid, o_err, o_wreg, o_result,
o_wb_cyc_gbl, o_wb_cyc_lcl,
o_wb_stb_gbl, o_wb_stb_lcl,
o_wb_we, o_wb_addr, o_wb_data, o_wb_sel,
i_wb_ack, i_wb_stall, i_wb_err, i_wb_data);
parameter ADDRESS_WIDTH=30, IMPLEMENT_LOCK=0, WITH_LOCAL_BUS=0;
i_wb_ack, i_wb_stall, i_wb_err, i_wb_data
`ifdef FORMAL
, f_nreqs, f_nacks, f_outstanding
`endif
);
parameter ADDRESS_WIDTH=30;
parameter [0:0] IMPLEMENT_LOCK=1'b1,
WITH_LOCAL_BUS=1'b1,
OPT_ALIGNMENT_ERR=1'b1,
OPT_ZERO_ON_IDLE=1'b0;
localparam AW=ADDRESS_WIDTH;
input i_clk, i_rst;
input i_stb, i_lock;
input wire i_clk, i_reset;
input wire i_stb, i_lock;
// CPU interface
input [2:0] i_op;
input [31:0] i_addr;
input [31:0] i_data;
input [4:0] i_oreg;
input wire [2:0] i_op;
input wire [31:0] i_addr;
input wire [31:0] i_data;
input wire [4:0] i_oreg;
// CPU outputs
output wire o_busy;
output reg o_valid;
75,51 → 85,91
output reg [31:0] o_wb_data;
output reg [3:0] o_wb_sel;
// Wishbone inputs
input i_wb_ack, i_wb_stall, i_wb_err;
input [31:0] i_wb_data;
input wire i_wb_ack, i_wb_stall, i_wb_err;
input wire [31:0] i_wb_data;
// Formal
parameter F_LGDEPTH = 2;
`ifdef FORMAL
output wire [(F_LGDEPTH-1):0] f_nreqs, f_nacks, f_outstanding;
`endif
 
reg misaligned;
 
generate if (OPT_ALIGNMENT_ERR)
begin : GENERATE_ALIGNMENT_ERR
always @(*)
casez({ i_op[2:1], i_addr[1:0] })
4'b01?1: misaligned = i_stb; // Words must be halfword aligned
4'b0110: misaligned = i_stb; // Words must be word aligned
4'b10?1: misaligned = i_stb; // Halfwords must be aligned
// 4'b11??: misaligned <= 1'b0; Byte access are never misaligned
default: misaligned = 1'b0;
endcase
end else
always @(*) misaligned = 1'b0;
endgenerate
 
reg r_wb_cyc_gbl, r_wb_cyc_lcl;
wire gbl_stb, lcl_stb;
assign lcl_stb = (i_stb)&&(WITH_LOCAL_BUS!=0)&&(i_addr[31:24]==8'hff);
assign gbl_stb = (i_stb)&&((WITH_LOCAL_BUS==0)||(i_addr[31:24]!=8'hff));
assign lcl_stb = (i_stb)&&(WITH_LOCAL_BUS!=0)&&(i_addr[31:24]==8'hff)
&&(!misaligned);
assign gbl_stb = (i_stb)&&((WITH_LOCAL_BUS==0)||(i_addr[31:24]!=8'hff))
&&(!misaligned);
 
initial r_wb_cyc_gbl = 1'b0;
initial r_wb_cyc_lcl = 1'b0;
always @(posedge i_clk)
if (i_rst)
if (i_reset)
begin
r_wb_cyc_gbl <= 1'b0;
r_wb_cyc_lcl <= 1'b0;
end else if ((r_wb_cyc_gbl)||(r_wb_cyc_lcl))
begin
if ((i_wb_ack)||(i_wb_err))
begin
r_wb_cyc_gbl <= 1'b0;
r_wb_cyc_lcl <= 1'b0;
end else if ((r_wb_cyc_gbl)||(r_wb_cyc_lcl))
begin
if ((i_wb_ack)||(i_wb_err))
begin
r_wb_cyc_gbl <= 1'b0;
r_wb_cyc_lcl <= 1'b0;
end
end else if (i_stb) // New memory operation
begin // Grab the wishbone
r_wb_cyc_lcl <= lcl_stb;
r_wb_cyc_gbl <= gbl_stb;
end
end else begin // New memory operation
// Grab the wishbone
r_wb_cyc_lcl <= (lcl_stb);
r_wb_cyc_gbl <= (gbl_stb);
end
initial o_wb_stb_gbl = 1'b0;
always @(posedge i_clk)
if (o_wb_cyc_gbl)
o_wb_stb_gbl <= (o_wb_stb_gbl)&&(i_wb_stall);
else
o_wb_stb_gbl <= gbl_stb; // Grab wishbone on new operation
if (i_reset)
o_wb_stb_gbl <= 1'b0;
else if ((i_wb_err)&&(r_wb_cyc_gbl))
o_wb_stb_gbl <= 1'b0;
else if (o_wb_cyc_gbl)
o_wb_stb_gbl <= (o_wb_stb_gbl)&&(i_wb_stall);
else
// Grab wishbone on any new transaction to the gbl bus
o_wb_stb_gbl <= (gbl_stb);
 
initial o_wb_stb_lcl = 1'b0;
always @(posedge i_clk)
if (o_wb_cyc_lcl)
o_wb_stb_lcl <= (o_wb_stb_lcl)&&(i_wb_stall);
else
o_wb_stb_lcl <= lcl_stb; // Grab wishbone on new operation
if (i_reset)
o_wb_stb_lcl <= 1'b0;
else if ((i_wb_err)&&(r_wb_cyc_lcl))
o_wb_stb_lcl <= 1'b0;
else if (o_wb_cyc_lcl)
o_wb_stb_lcl <= (o_wb_stb_lcl)&&(i_wb_stall);
else
// Grab wishbone on any new transaction to the lcl bus
o_wb_stb_lcl <= (lcl_stb);
 
reg [3:0] r_op;
initial o_wb_we = 1'b0;
initial o_wb_data = 0;
initial o_wb_sel = 0;
always @(posedge i_clk)
if (i_stb)
if (i_stb)
begin
o_wb_we <= i_op[0];
if (OPT_ZERO_ON_IDLE)
begin
o_wb_we <= i_op[0];
casez({ i_op[2:1], i_addr[1:0] })
`ifdef ZERO_ON_IDLE
4'b100?: o_wb_data <= { i_data[15:0], 16'h00 };
4'b101?: o_wb_data <= { 16'h00, i_data[15:0] };
4'b1100: o_wb_data <= { i_data[7:0], 24'h00 };
126,58 → 176,62
4'b1101: o_wb_data <= { 8'h00, i_data[7:0], 16'h00 };
4'b1110: o_wb_data <= { 16'h00, i_data[7:0], 8'h00 };
4'b1111: o_wb_data <= { 24'h00, i_data[7:0] };
`else
default: o_wb_data <= i_data;
endcase
end else
casez({ i_op[2:1], i_addr[1:0] })
4'b10??: o_wb_data <= { (2){ i_data[15:0] } };
4'b11??: o_wb_data <= { (4){ i_data[7:0] } };
`endif
default: o_wb_data <= i_data;
endcase
 
o_wb_addr <= i_addr[(AW+1):2];
`ifdef SET_SEL_ON_READ
if (i_op[0] == 1'b0)
o_wb_sel <= 4'hf;
else
`endif
casez({ i_op[2:1], i_addr[1:0] })
4'b01??: o_wb_sel <= 4'b1111;
4'b100?: o_wb_sel <= 4'b1100;
4'b101?: o_wb_sel <= 4'b0011;
4'b1100: o_wb_sel <= 4'b1000;
4'b1101: o_wb_sel <= 4'b0100;
4'b1110: o_wb_sel <= 4'b0010;
4'b1111: o_wb_sel <= 4'b0001;
default: o_wb_sel <= 4'b1111;
endcase
r_op <= { i_op[2:1] , i_addr[1:0] };
end
`ifdef ZERO_ON_IDLE
else if ((!o_wb_cyc_gbl)&&(!o_wb_cyc_lcl))
begin
o_wb_we <= 1'b0;
o_wb_addr <= 0;
o_wb_data <= 32'h0;
o_wb_sel <= 4'h0;
end
`endif
o_wb_addr <= i_addr[(AW+1):2];
casez({ i_op[2:1], i_addr[1:0] })
4'b01??: o_wb_sel <= 4'b1111;
4'b100?: o_wb_sel <= 4'b1100;
4'b101?: o_wb_sel <= 4'b0011;
4'b1100: o_wb_sel <= 4'b1000;
4'b1101: o_wb_sel <= 4'b0100;
4'b1110: o_wb_sel <= 4'b0010;
4'b1111: o_wb_sel <= 4'b0001;
default: o_wb_sel <= 4'b1111;
endcase
r_op <= { i_op[2:1] , i_addr[1:0] };
end else if ((OPT_ZERO_ON_IDLE)&&(!o_wb_cyc_gbl)&&(!o_wb_cyc_lcl))
begin
o_wb_we <= 1'b0;
o_wb_addr <= 0;
o_wb_data <= 32'h0;
o_wb_sel <= 4'h0;
end
 
initial o_valid = 1'b0;
always @(posedge i_clk)
o_valid <= (!i_rst)&&((o_wb_cyc_gbl)||(o_wb_cyc_lcl))&&(i_wb_ack)&&(~o_wb_we);
if (i_reset)
o_valid <= 1'b0;
else
o_valid <= (((o_wb_cyc_gbl)||(o_wb_cyc_lcl))
&&(i_wb_ack)&&(!o_wb_we));
initial o_err = 1'b0;
always @(posedge i_clk)
o_err <= (!i_rst)&&((o_wb_cyc_gbl)||(o_wb_cyc_lcl))&&(i_wb_err);
assign o_busy = (o_wb_cyc_gbl)||(o_wb_cyc_lcl);
if (i_reset)
o_err <= 1'b0;
else if ((r_wb_cyc_gbl)||(r_wb_cyc_lcl))
o_err <= i_wb_err;
else if ((i_stb)&&(!o_busy))
o_err <= misaligned;
else
o_err <= 1'b0;
 
assign o_busy = (r_wb_cyc_gbl)||(r_wb_cyc_lcl);
 
always @(posedge i_clk)
if (i_stb)
o_wreg <= i_oreg;
if (i_stb)
o_wreg <= i_oreg;
always @(posedge i_clk)
`ifdef ZERO_ON_IDLE
if (!i_wb_ack)
o_result <= 32'h0;
else
`endif
if ((OPT_ZERO_ON_IDLE)&&(!i_wb_ack))
o_result <= 32'h0;
else begin
casez(r_op)
4'b01??: o_result <= i_wb_data;
4'b100?: o_result <= { 16'h00, i_wb_data[31:16] };
188,25 → 242,438
4'b1111: o_result <= { 24'h00, i_wb_data[ 7: 0] };
default: o_result <= i_wb_data;
endcase
end
 
reg lock_gbl, lock_lcl;
 
generate
if (IMPLEMENT_LOCK != 0)
begin
reg lock_gbl, lock_lcl;
 
initial lock_gbl = 1'b0;
initial lock_lcl = 1'b0;
 
always @(posedge i_clk)
if (i_reset)
begin
lock_gbl <= (i_lock)&&((r_wb_cyc_gbl)||(lock_gbl));
lock_lcl <= (i_lock)&&((r_wb_cyc_lcl)||(lock_lcl));
lock_gbl <= 1'b0;
lock_lcl <= 1'b0;
end else if (((i_wb_err)&&((r_wb_cyc_gbl)||(r_wb_cyc_lcl)))
||(misaligned))
begin
// Kill the lock if
// there's a bus error, or
// User requests a misaligned memory op
lock_gbl <= 1'b0;
lock_lcl <= 1'b0;
end else begin
// Kill the lock if
// i_lock goes down
// User starts on the global bus, then switches
// to local or vice versa
lock_gbl <= (i_lock)&&((r_wb_cyc_gbl)||(lock_gbl))
&&(!lcl_stb);
lock_lcl <= (i_lock)&&((r_wb_cyc_lcl)||(lock_lcl))
&&(!gbl_stb);
end
 
assign o_wb_cyc_gbl = (r_wb_cyc_gbl)||(lock_gbl);
assign o_wb_cyc_lcl = (r_wb_cyc_lcl)||(lock_lcl);
end else begin
 
assign o_wb_cyc_gbl = (r_wb_cyc_gbl);
assign o_wb_cyc_lcl = (r_wb_cyc_lcl);
 
always @(*)
{ lock_gbl, lock_lcl } = 2'b00;
 
// Make verilator happy
// verilator lint_off UNUSED
wire [2:0] lock_unused;
assign lock_unused = { i_lock, lock_gbl, lock_lcl };
// verilator lint_on UNUSED
 
end endgenerate
 
`ifdef VERILATOR
always @(posedge i_clk)
if ((r_wb_cyc_gbl)||(r_wb_cyc_lcl))
assert(!i_stb);
`endif
 
 
// Make verilator happy
// verilator lint_off UNUSED
generate if (AW < 22)
begin : TOO_MANY_ADDRESS_BITS
 
wire [(21-AW):0] unused_addr;
assign unused_addr = i_addr[23:(AW+2)];
 
end endgenerate
// verilator lint_on UNUSED
 
`ifdef FORMAL
`define ASSERT assert
`ifdef MEMOPS
`define ASSUME assume
`else
`define ASSUME assert
`endif
 
reg f_past_valid;
initial f_past_valid = 0;
always @(posedge i_clk)
f_past_valid = 1'b1;
always @(*)
if (!f_past_valid)
`ASSUME(i_reset);
initial `ASSUME(!i_stb);
 
wire f_cyc, f_stb;
assign f_cyc = (o_wb_cyc_gbl)||(o_wb_cyc_lcl);
assign f_stb = (o_wb_stb_gbl)||(o_wb_stb_lcl);
 
`ifdef MEMOPS
`define MASTER fwb_master
`else
`define MASTER fwb_counter
`endif
 
fwb_master #(.AW(AW), .F_LGDEPTH(F_LGDEPTH),
.F_OPT_RMW_BUS_OPTION(IMPLEMENT_LOCK),
.F_OPT_DISCONTINUOUS(IMPLEMENT_LOCK))
f_wb(i_clk, i_reset,
f_cyc, f_stb, o_wb_we, o_wb_addr, o_wb_data, o_wb_sel,
i_wb_ack, i_wb_stall, i_wb_data, i_wb_err,
f_nreqs, f_nacks, f_outstanding);
 
 
// Rule: Only one of the two CYC's may be valid, never both
always @(posedge i_clk)
`ASSERT((!o_wb_cyc_gbl)||(!o_wb_cyc_lcl));
 
// Rule: Only one of the two STB's may be valid, never both
always @(posedge i_clk)
`ASSERT((!o_wb_stb_gbl)||(!o_wb_stb_lcl));
 
// Rule: if WITH_LOCAL_BUS is ever false, neither the local STB nor CYC
// may be valid
always @(*)
if (!WITH_LOCAL_BUS)
begin
`ASSERT(!o_wb_cyc_lcl);
`ASSERT(!o_wb_stb_lcl);
end
 
// Rule: If the global CYC is ever true, the LCL one cannot be true
// on the next clock without an intervening idle of both
always @(posedge i_clk)
if ((f_past_valid)&&($past(r_wb_cyc_gbl)))
`ASSERT(!r_wb_cyc_lcl);
 
// Same for if the LCL CYC is true
always @(posedge i_clk)
if ((f_past_valid)&&($past(r_wb_cyc_lcl)))
`ASSERT(!r_wb_cyc_gbl);
 
// STB can never be true unless CYC is also true
always @(posedge i_clk)
if (o_wb_stb_gbl)
`ASSERT(r_wb_cyc_gbl);
always @(posedge i_clk)
if (o_wb_stb_lcl)
`ASSERT(r_wb_cyc_lcl);
 
// This core only ever has zero or one outstanding transaction(s)
always @(posedge i_clk)
if ((o_wb_stb_gbl)||(o_wb_stb_lcl))
`ASSERT(f_outstanding == 0);
else
`ASSERT((f_outstanding == 0)||(f_outstanding == 1));
 
// The LOCK function only allows up to two transactions (at most)
// before CYC must be dropped.
always @(posedge i_clk)
if ((o_wb_stb_gbl)||(o_wb_stb_lcl))
begin
if (IMPLEMENT_LOCK)
`ASSERT((f_outstanding == 0)||(f_outstanding == 1));
else
`ASSERT(f_nreqs <= 1);
end
 
always @(posedge i_clk)
if ((f_past_valid)&&(o_busy))
begin
 
// If i_stb doesn't change, then neither do any of the other
// inputs
if (($past(i_stb))&&(i_stb))
begin
`ASSUME($stable(i_op));
`ASSUME($stable(i_addr));
`ASSUME($stable(i_data));
`ASSUME($stable(i_oreg));
`ASSUME($stable(i_lock));
end
 
 
// No strobe's are allowed if a request is outstanding, either
// having been accepted by the bus or waiting to be accepted
// by the bus.
if ((f_outstanding != 0)||(f_stb))
`ASSUME(!i_stb);
/*
if (o_busy)
assert( (!i_stb)
||((!o_wb_stb_gbl)&&(!o_wb_stb_lcl)&&(i_lock)));
 
if ((f_cyc)&&($past(f_cyc)))
assert($stable(r_op));
*/
end
 
always @(*)
if (!IMPLEMENT_LOCK)
`ASSUME(!i_lock);
 
always @(posedge i_clk)
if ((f_past_valid)&&($past(f_cyc))&&($past(!i_lock)))
`ASSUME(!i_lock);
 
// Following any i_stb request, assuming we are idle, immediately
// begin a bus transaction
always @(posedge i_clk)
if ((f_past_valid)&&($past(i_stb))
&&(!$past(f_cyc))&&(!$past(i_reset)))
begin
if ($past(misaligned))
begin
`ASSERT(!f_cyc);
`ASSERT(!o_busy);
`ASSERT(o_err);
`ASSERT(!o_valid);
end else begin
`ASSERT(f_cyc);
`ASSERT(o_busy);
end
end
 
always @(posedge i_clk)
if (o_busy)
`ASSUME(!i_stb);
 
always @(posedge i_clk)
if (o_wb_cyc_gbl)
`ASSERT((o_busy)||(lock_gbl));
 
always @(posedge i_clk)
if (o_wb_cyc_lcl)
`ASSERT((o_busy)||(lock_lcl));
 
always @(posedge i_clk)
if (f_outstanding > 0)
`ASSERT(o_busy);
 
// If a transaction ends in an error, send o_err on the output port.
always @(posedge i_clk)
if (f_past_valid)
begin
if ($past(i_reset))
`ASSERT(!o_err);
else if (($past(f_cyc))&&($past(i_wb_err)))
`ASSERT(o_err);
else if ($past(misaligned))
`ASSERT(o_err);
end
 
// Always following a successful ACK, return an O_VALID value.
always @(posedge i_clk)
if (f_past_valid)
begin
if ($past(i_reset))
`ASSERT(!o_valid);
else if(($past(f_cyc))&&($past(i_wb_ack))
&&(!$past(o_wb_we)))
`ASSERT(o_valid);
else if ($past(misaligned))
`ASSERT((!o_valid)&&(o_err));
else
`ASSERT(!o_valid);
end
 
//always @(posedge i_clk)
// if ((f_past_valid)&&($past(f_cyc))&&(!$past(o_wb_we))&&($past(i_wb_ack)))
 
/*
input wire [2:0] i_op;
input wire [31:0] i_addr;
input wire [31:0] i_data;
input wire [4:0] i_oreg;
// CPU outputs
output wire o_busy;
output reg o_valid;
output reg o_err;
output reg [4:0] o_wreg;
output reg [31:0] o_result;
*/
 
initial o_wb_we = 1'b0;
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(i_reset))&&($past(i_stb)))
begin
// On a write, assert o_wb_we should be true
assert( $past(i_op[0]) == o_wb_we);
 
// Word write
if ($past(i_op[2:1]) == 2'b01)
begin
`ASSERT(o_wb_sel == 4'hf);
`ASSERT(o_wb_data == $past(i_data));
end
 
// Halfword (short) write
if ($past(i_op[2:1]) == 2'b10)
begin
if (!$past(i_addr[1]))
begin
`ASSERT(o_wb_sel == 4'hc);
`ASSERT(o_wb_data[31:16] == $past(i_data[15:0]));
end else begin
`ASSERT(o_wb_sel == 4'h3);
`ASSERT(o_wb_data[15:0] == $past(i_data[15:0]));
end
end
 
if ($past(i_op[2:1]) == 2'b11)
begin
if ($past(i_addr[1:0])==2'b00)
begin
`ASSERT(o_wb_sel == 4'h8);
`ASSERT(o_wb_data[31:24] == $past(i_data[7:0]));
end
 
if ($past(i_addr[1:0])==2'b01)
begin
`ASSERT(o_wb_sel == 4'h4);
`ASSERT(o_wb_data[23:16] == $past(i_data[7:0]));
end
if ($past(i_addr[1:0])==2'b10)
begin
`ASSERT(o_wb_sel == 4'h2);
`ASSERT(o_wb_data[15:8] == $past(i_data[7:0]));
end
if ($past(i_addr[1:0])==2'b11)
begin
`ASSERT(o_wb_sel == 4'h1);
`ASSERT(o_wb_data[7:0] == $past(i_data[7:0]));
end
end
 
`ASSUME($past(i_op[2:1] != 2'b00));
end
 
// This logic is fixed in the definitions of the lock(s) above
// i.e., the user cna be stupid and this will still work
/*
always @(posedge i_clk)
if ((i_lock)&&(i_stb)&&(WITH_LOCAL_BUS))
begin
restrict((lock_gbl)||(i_addr[31:24] ==8'hff));
restrict((lock_lcl)||(i_addr[31:24]!==8'hff));
end
*/
 
always @(posedge i_clk)
if (o_wb_stb_lcl)
`ASSERT(o_wb_addr[29:22] == 8'hff);
 
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(i_reset))&&($past(misaligned)))
begin
`ASSERT(!o_wb_cyc_gbl);
`ASSERT(!o_wb_cyc_lcl);
`ASSERT(!o_wb_stb_gbl);
`ASSERT(!o_wb_stb_lcl);
`ASSERT(o_err);
//OPT_ALIGNMENT_ERR=1'b0,
//OPT_ZERO_ON_IDLE=1'b0;
end
 
always @(posedge i_clk)
if ((!f_past_valid)||($past(i_reset)))
`ASSUME(!i_stb);
always @(*)
if (o_busy)
`ASSUME(!i_stb);
 
always @(posedge i_clk)
if ((f_past_valid)&&(IMPLEMENT_LOCK)
&&(!$past(i_reset))&&(!$past(i_wb_err))
&&(!$past(misaligned))
&&(!$past(lcl_stb))
&&($past(i_lock))&&($past(lock_gbl)))
assert(lock_gbl);
 
always @(posedge i_clk)
if ((f_past_valid)&&(IMPLEMENT_LOCK)
&&(!$past(i_reset))&&(!$past(i_wb_err))
&&(!$past(misaligned))
&&(!$past(lcl_stb))
&&($past(o_wb_cyc_gbl))&&($past(i_lock))
&&($past(lock_gbl)))
assert(o_wb_cyc_gbl);
 
always @(posedge i_clk)
if ((f_past_valid)&&(IMPLEMENT_LOCK)
&&(!$past(i_reset))&&(!$past(i_wb_err))
&&(!$past(misaligned))
&&(!$past(gbl_stb))
&&($past(o_wb_cyc_lcl))&&($past(i_lock))
&&($past(lock_lcl)))
assert(o_wb_cyc_lcl);
 
//
// Cover properties
//
always @(posedge i_clk)
cover(i_wb_ack);
 
// Cover a response on the same clock it is made
always @(posedge i_clk)
cover((o_wb_stb_gbl)&&(i_wb_ack));
 
// Cover a response a clock later
always @(posedge i_clk)
cover((o_wb_stb_gbl)&&(i_wb_ack));
 
 
generate if (WITH_LOCAL_BUS)
begin
 
// Same things on the local bus
always @(posedge i_clk)
cover((o_wb_cyc_lcl)&&(!o_wb_stb_lcl)&&(i_wb_ack));
always @(posedge i_clk)
cover((o_wb_stb_lcl)&&(i_wb_ack));
 
end endgenerate
 
`endif
endmodule
//
//
// Usage (from yosys):
// (BFOR) (!ZOI,ALIGN) (ZOI,ALIGN) (!ZOI,!ALIGN)
// Cells 230 226 281 225
// FDRE 114 116 116 116
// LUT2 17 23 76 19
// LUT3 9 23 17 20
// LUT4 15 4 11 14
// LUT5 18 18 7 15
// LUT6 33 18 54 38
// MUX7 16 12 2
// MUX8 8 1 1
//
//
/mpyop.v
0,0 → 1,319
////////////////////////////////////////////////////////////////////////////////
//
// Filename: mpyop.v
//
// Project: Zip CPU -- a small, lightweight, RISC CPU soft core
//
// Purpose: This code has been pulled from the cpuops.v file so as to
// encapsulate the multiply component--the one component that
// (can't be) formally verified well, and so must be abstracted away.
// This separation was done to support potential future abstraction.
//
//
// Creator: Dan Gisselquist, Ph.D.
// Gisselquist Technology, LLC
//
////////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2015-2019, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
// by the Free Software Foundation, either version 3 of the License, or (at
// your option) any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with this program. (It's in the $(ROOT)/doc directory. Run make with no
// target there if the PDF file isn't present.) If not, see
// <http://www.gnu.org/licenses/> for a copy.
//
// License: GPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/gpl.html
//
//
////////////////////////////////////////////////////////////////////////////////
//
//
`default_nettype none
//
module mpyop(i_clk,i_reset, i_stb, i_op, i_a, i_b, o_valid, o_busy, o_result, o_hi);
// The following parameter selects which multiply algorithm we use.
// Timing performance is strictly dependent upon it.
parameter IMPLEMENT_MPY = 1;
input wire i_clk, i_reset, i_stb;
input wire [1:0] i_op; // 2'b00=MPY, 2'b10=MPYUHI, 2'b11=MPYSHI
input wire [31:0] i_a, i_b;
output wire o_valid; // True if we'll be valid on the next clock;
output wire o_busy; // The multiply is busy if true
output wire [63:0] o_result; // Where we dump the multiply result
output reg o_hi; // Return the high half of the multiply
 
 
// A 4-way multiplexer can be done in one 6-LUT.
// A 16-way multiplexer can therefore be done in 4x 6-LUT's with
// the Xilinx multiplexer fabric that follows.
// Given that we wish to apply this multiplexer approach to 33-bits,
// this will cost a minimum of 132 6-LUTs.
 
// i_stb instead of this_is_a_multiply_op
// o_result
// o_busy
// o_done
generate
if (IMPLEMENT_MPY == 0)
begin : MPYNONE // No multiply support.
 
assign o_result = 64'h00;
assign o_busy = 1'b0;
assign o_valid = i_stb;
always @(*) o_hi = 1'b0; // Not needed
 
`ifdef VERILATOR
// verilator lint_off UNUSED
wire [32+32+5-1:0] mpy_unused;
assign mpy_unused = { i_clk, i_reset, i_stb, i_op, i_a, i_b };
// verilator lint_on UNUSED
`endif
end else begin : IMPY
if (IMPLEMENT_MPY == 1)
begin : MPY1CK // Our single clock option (no extra clocks)
 
wire signed [63:0] w_mpy_a_input, w_mpy_b_input;
 
assign w_mpy_a_input = {{(32){(i_a[31])&(i_op[0])}},i_a[31:0]};
assign w_mpy_b_input = {{(32){(i_b[31])&(i_op[0])}},i_b[31:0]};
 
assign o_result = w_mpy_a_input * w_mpy_b_input;
 
assign o_busy = 1'b0;
assign o_valid = 1'b0;
always @(*) o_hi = i_op[1];
 
`ifdef VERILATOR
// verilator lint_off UNUSED
wire [3:0] mpy_unused;
assign mpy_unused = { i_clk, i_reset, i_stb, i_op[1] };
// verilator lint_on UNUSED
`endif
 
end else begin: MPN1
if (IMPLEMENT_MPY == 2)
begin : MPY2CK // Our two clock option (ALU must pause for 1 clock)
 
reg signed [63:0] r_mpy_a_input, r_mpy_b_input;
always @(posedge i_clk)
begin
r_mpy_a_input <={{(32){(i_a[31])&(i_op[0])}},i_a[31:0]};
r_mpy_b_input <={{(32){(i_b[31])&(i_op[0])}},i_b[31:0]};
end
 
assign o_result = r_mpy_a_input * r_mpy_b_input;
assign o_busy = 1'b0;
 
reg mpypipe;
initial mpypipe = 1'b0;
always @(posedge i_clk)
if (i_reset)
mpypipe <= 1'b0;
else
mpypipe <= (i_stb);
 
assign o_valid = mpypipe; // this_is_a_multiply_op;
always @(posedge i_clk)
if (i_stb)
o_hi <= i_op[1];
 
end else begin : MPN2
if (IMPLEMENT_MPY == 3)
begin : MPY3CK // Our three clock option (ALU pauses for 2 clocks)
reg signed [63:0] r_smpy_result;
reg [63:0] r_umpy_result;
reg signed [31:0] r_mpy_a_input, r_mpy_b_input;
reg [1:0] mpypipe;
reg [1:0] r_sgn;
 
initial mpypipe = 2'b0;
always @(posedge i_clk)
if (i_reset)
mpypipe <= 2'b0;
else
mpypipe <= { mpypipe[0], i_stb };
 
// First clock
always @(posedge i_clk)
begin
r_mpy_a_input <= i_a[31:0];
r_mpy_b_input <= i_b[31:0];
r_sgn <= { r_sgn[0], i_op[0] };
end
 
// Second clock
`ifdef VERILATOR
wire signed [63:0] s_mpy_a_input, s_mpy_b_input;
wire [63:0] u_mpy_a_input, u_mpy_b_input;
 
assign s_mpy_a_input = {{(32){r_mpy_a_input[31]}},r_mpy_a_input};
assign s_mpy_b_input = {{(32){r_mpy_b_input[31]}},r_mpy_b_input};
assign u_mpy_a_input = {32'h00,r_mpy_a_input};
assign u_mpy_b_input = {32'h00,r_mpy_b_input};
always @(posedge i_clk)
r_smpy_result <= s_mpy_a_input * s_mpy_b_input;
always @(posedge i_clk)
r_umpy_result <= u_mpy_a_input * u_mpy_b_input;
`else
 
wire [31:0] u_mpy_a_input, u_mpy_b_input;
 
assign u_mpy_a_input = r_mpy_a_input;
assign u_mpy_b_input = r_mpy_b_input;
 
always @(posedge i_clk)
r_smpy_result <= r_mpy_a_input * r_mpy_b_input;
always @(posedge i_clk)
r_umpy_result <= u_mpy_a_input * u_mpy_b_input;
`endif
 
always @(posedge i_clk)
if (i_stb)
o_hi <= i_op[1];
assign o_busy = mpypipe[0];
assign o_result = (r_sgn[1])?r_smpy_result:r_umpy_result;
assign o_valid = mpypipe[1];
 
// Results are then set on the third clock
end else begin : MPN3
if (IMPLEMENT_MPY == 4)
begin : MPY4CK // The three clock option
reg [63:0] r_mpy_result;
reg [31:0] r_mpy_a_input, r_mpy_b_input;
reg r_mpy_signed;
reg [2:0] mpypipe;
 
// First clock, latch in the inputs
initial mpypipe = 3'b0;
always @(posedge i_clk)
begin
// mpypipe indicates we have a multiply in the
// pipeline. In this case, the multiply
// pipeline is a two stage pipeline, so we need
// two bits in the pipe.
if (i_reset)
mpypipe <= 3'h0;
else begin
mpypipe[0] <= i_stb;
mpypipe[1] <= mpypipe[0];
mpypipe[2] <= mpypipe[1];
end
 
if (i_op[0]) // i.e. if signed multiply
begin
r_mpy_a_input <= {(~i_a[31]),i_a[30:0]};
r_mpy_b_input <= {(~i_b[31]),i_b[30:0]};
end else begin
r_mpy_a_input <= i_a[31:0];
r_mpy_b_input <= i_b[31:0];
end
// The signed bit really only matters in the
// case of 64 bit multiply. We'll keep track
// of it, though, and pretend in all other
// cases.
r_mpy_signed <= i_op[0];
 
if (i_stb)
o_hi <= i_op[1];
end
 
assign o_busy = |mpypipe[1:0];
assign o_valid = mpypipe[2];
 
// Second clock, do the multiplies, get the "partial
// products". Here, we break our input up into two
// halves,
//
// A = (2^16 ah + al)
// B = (2^16 bh + bl)
//
// and use these to compute partial products.
//
// AB = (2^32 ah*bh + 2^16 (ah*bl + al*bh) + (al*bl)
//
// Since we're following the FOIL algorithm to get here,
// we'll name these partial products according to FOIL.
//
// The trick is what happens if A or B is signed. In
// those cases, the real value of A will not be given by
// A = (2^16 ah + al)
// but rather
// A = (2^16 ah[31^] + al) - 2^31
// (where we have flipped the sign bit of A)
// and so ...
//
// AB= (2^16 ah + al - 2^31) * (2^16 bh + bl - 2^31)
// = 2^32(ah*bh)
// +2^16 (ah*bl+al*bh)
// +(al*bl)
// - 2^31 (2^16 bh+bl + 2^16 ah+al)
// - 2^62
// = 2^32(ah*bh)
// +2^16 (ah*bl+al*bh)
// +(al*bl)
// - 2^31 (2^16 bh+bl + 2^16 ah+al + 2^31)
//
reg [31:0] pp_f, pp_l; // F and L from FOIL
reg [32:0] pp_oi; // The O and I from FOIL
reg [32:0] pp_s;
always @(posedge i_clk)
begin
pp_f<=r_mpy_a_input[31:16]*r_mpy_b_input[31:16];
pp_oi<=r_mpy_a_input[31:16]*r_mpy_b_input[15: 0]
+ r_mpy_a_input[15: 0]*r_mpy_b_input[31:16];
pp_l<=r_mpy_a_input[15: 0]*r_mpy_b_input[15: 0];
// And a special one for the sign
if (r_mpy_signed)
pp_s <= 32'h8000_0000-(
r_mpy_a_input[31:0]
+ r_mpy_b_input[31:0]);
else
pp_s <= 33'h0;
end
 
// Third clock, add the results and produce a product
always @(posedge i_clk)
begin
r_mpy_result[15:0] <= pp_l[15:0];
r_mpy_result[63:16] <=
{ 32'h00, pp_l[31:16] }
+ { 15'h00, pp_oi }
+ { pp_s, 15'h00 }
+ { pp_f, 16'h00 };
end
 
assign o_result = r_mpy_result;
// Fourth clock -- results are clocked into writeback
end else begin : MPYSLOW
 
// verilator lint_off UNUSED
wire unused_aux;
wire [65:0] full_result;
// verilator lint_on UNUSED
 
slowmpy #(.LGNA(6), .NA(33)) slowmpyi(i_clk, i_reset, i_stb,
{ (i_op[0])&(i_a[31]), i_a },
{ (i_op[0])&(i_b[31]), i_b }, 1'b0, o_busy,
o_valid, full_result, unused_aux);
 
assign o_result = full_result[63:0];
 
always @(posedge i_clk)
if (i_stb)
o_hi <= i_op[1];
 
end end end end end
endgenerate // All possible multiply results have been determined
 
endmodule
/pfcache.v
5,15 → 5,37
// Project: Zip CPU -- a small, lightweight, RISC CPU soft core
//
// Purpose: Keeping our CPU fed with instructions, at one per clock and
// with no stalls. An unusual feature of this cache is the
// requirement that the entire cache may be cleared (if necessary).
// with only a minimum number stalls. The entire cache may also
// be cleared (if necessary).
//
// This logic is driven by a couple realities:
// 1. It takes a clock to read from a block RAM address, and hence a clock
// to read from the cache.
// 2. It takes another clock to check that the tag matches
//
// Our goal will be to avoid this second check if at all possible.
// Hence, we'll test on the clock of any given request whether
// or not the request matches the last tag value, and on the next
// clock whether it new tag value (if it has changed). Hence,
// for anything found within the cache, there will be a one
// cycle delay on any branch.
//
//
// Address Words are separated into three components:
// [ Tag bits ] [ Cache line number ] [ Cache position w/in the line ]
//
// On any read from the cache, only the second two components are required.
// On any read from memory, the first two components will be fixed across
// the bus, and the third component will be adjusted from zero to its
// maximum value.
//
//
// Creator: Dan Gisselquist, Ph.D.
// Gisselquist Technology, LLC
//
////////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2015-2017, Gisselquist Technology, LLC
// Copyright (C) 2015-2019, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
37,36 → 59,62
////////////////////////////////////////////////////////////////////////////////
//
//
module pfcache(i_clk, i_rst, i_new_pc, i_clear_cache,
`default_nettype none
//
module pfcache(i_clk, i_reset, i_new_pc, i_clear_cache,
// i_early_branch, i_from_addr,
i_stall_n, i_pc, o_i, o_pc, o_v,
i_stall_n, i_pc, o_insn, o_pc, o_valid,
o_wb_cyc, o_wb_stb, o_wb_we, o_wb_addr, o_wb_data,
i_wb_ack, i_wb_stall, i_wb_err, i_wb_data,
o_illegal);
parameter LGCACHELEN = 8, ADDRESS_WIDTH=24,
LGLINES=5; // Log of the number of separate cache lines
localparam CACHELEN=(1<<LGCACHELEN); // Size of our cache memory
o_illegal
`ifdef NOT_YET_READY
, i_mmu_ack, i_mmu_we, i_mmu_paddr
`endif
`ifdef FORMAL
, f_pc_wb
`endif
);
`ifdef FORMAL
parameter LGCACHELEN = 4, ADDRESS_WIDTH=30,
LGLINES=2; // Log of the number of separate cache lines
`else
parameter LGCACHELEN = 12, ADDRESS_WIDTH=30,
LGLINES=6; // Log of the number of separate cache lines
`endif
localparam CACHELEN=(1<<LGCACHELEN); //Wrd Size of our cache memory
localparam CW=LGCACHELEN; // Short hand for LGCACHELEN
localparam PW=LGCACHELEN-LGLINES; // Size of a cache line
localparam LS=LGCACHELEN-LGLINES; // Size of a cache line
localparam BUSW = 32; // Number of data lines on the bus
localparam AW=ADDRESS_WIDTH; // Shorthand for ADDRESS_WIDTH
input i_clk, i_rst, i_new_pc;
input i_clear_cache;
input i_stall_n;
input [(AW-1):0] i_pc;
output wire [(BUSW-1):0] o_i;
output wire [(AW-1):0] o_pc;
output wire o_v;
//
output reg o_wb_cyc, o_wb_stb;
output wire o_wb_we;
input wire i_clk, i_reset;
//
// The interface with the rest of the CPU
input wire i_new_pc;
input wire i_clear_cache;
input wire i_stall_n;
input wire [(AW+1):0] i_pc;
output wire [(BUSW-1):0] o_insn;
output wire [(AW+1):0] o_pc;
output wire o_valid;
//
// The wishbone bus interface
output reg o_wb_cyc, o_wb_stb;
output wire o_wb_we;
output reg [(AW-1):0] o_wb_addr;
output wire [(BUSW-1):0] o_wb_data;
//
input i_wb_ack, i_wb_stall, i_wb_err;
input [(BUSW-1):0] i_wb_data;
input wire i_wb_ack, i_wb_stall, i_wb_err;
input wire [(BUSW-1):0] i_wb_data;
//
// o_illegal will be true if this instruction was the result of a
// bus error (This is also part of the CPU interface)
output reg o_illegal;
//
`ifdef NOT_YET_READY
input wire i_mmu_ack, i_mmu_we;
input wire [(PAW-1):0] i_mmu_paddr;
`endif
 
// Fixed bus outputs: we read from the bus only, never write.
// Thus the output data is ... irrelevant and don't care. We set it
74,64 → 122,132
assign o_wb_we = 1'b0;
assign o_wb_data = 0;
 
`ifdef NOT_YET_READY
// These wires will be used below as part of the cache invalidation
// routine, should the MMU be used. This allows us to snoop on the
// physical side of the MMU bus, and invalidate any results should
// we need to do so.
wire mmu_inval;
wire [(PAW-CW-1):0] mmu_mskaddr;
`endif
`ifdef FORMAL
output wire [AW-1:0] f_pc_wb;
assign f_pc_wb = i_pc[AW+1:2];
`endif
 
 
wire r_v;
reg [(BUSW-1):0] cache [0:((1<<CW)-1)];
reg [(AW-CW-1):0] tags [0:((1<<(LGLINES))-1)];
reg [((1<<(LGLINES))-1):0] vmask;
reg [(AW-CW-1):0] cache_tags [0:((1<<(LGLINES))-1)];
reg [((1<<(LGLINES))-1):0] valid_mask;
 
reg [(AW-1):0] lastpc;
reg [(CW-1):0] rdaddr;
reg r_v_from_pc, r_v_from_last, r_new_request;
reg rvsrc;
wire w_v_from_pc, w_v_from_last;
reg [(AW+1):0] lastpc;
reg [(CW-1):0] wraddr;
reg [(AW-1):CW] tagvalipc, tagvallst;
wire [(AW-1):CW] tagval;
wire [(AW-1):PW] lasttag;
wire [(AW-1):LS] lasttag;
reg illegal_valid;
reg [(AW-1):PW] illegal_cache;
reg [(AW-1):LS] illegal_cache;
 
// initial o_i = 32'h76_00_00_00; // A NOOP instruction
// initial o_pc = 0;
reg [(BUSW-1):0] r_pc_cache, r_last_cache;
reg [(AW-1):0] r_pc, r_lastpc;
reg isrc;
reg [(AW+1):0] r_pc, r_lastpc;
reg isrc;
reg [1:0] delay;
reg svmask, last_ack, needload, last_addr,
bus_abort;
reg [(LGLINES-1):0] saddr;
 
wire w_advance;
assign w_advance = (i_new_pc)||((r_v)&&(i_stall_n));
 
/////////////////////////////////////////////////
//
// Read the instruction from the cache
//
/////////////////////////////////////////////////
//
//
// We'll read two values from the cache, the first is the value if
// i_pc contains the address we want, the second is the value we'd read
// if lastpc (i.e. $past(i_pc)) was the address we wanted.
initial r_pc = 0;
initial r_lastpc = 0;
always @(posedge i_clk)
begin
// We don't have the logic to select what to read, we must
// read both the value at i_pc and lastpc. cache[i_pc] is
// the value we return if the cache is good, cacne[lastpc] is
// the value we return if we've been stalled, weren't valid,
// or had to wait a clock or two. (Remember i_pc can't stop
// changing for a clock, so we need to keep track of the last
// one from before it stopped.)
// the value we return if the last cache request was in the
// cache on the last clock, cacne[lastpc] is the value we
// return if we've been stalled, weren't valid, or had to wait
// a clock or two.
//
// Here we keep track of which answer we want/need
isrc <= ((r_v)&&(i_stall_n))||(i_new_pc);
// Part of the issue here is that i_pc is going to increment
// on this clock before we know whether or not the cache entry
// we've just read is valid. We can't stop this. Hence, we
// need to read from the lastpc entry.
//
//
// Here we keep track of which answer we want/need.
// If we reported a valid value to the CPU on the last clock,
// and the CPU wasn't stalled, then we want to use i_pc.
// Likewise if the CPU gave us an i_new_pc request, then we'll
// want to return the value associated with reading the cache
// at i_pc.
isrc <= w_advance;
 
// Here we read both, and select which was write using isrc
// on the next clock.
r_pc_cache <= cache[i_pc[(CW-1):0]];
r_last_cache <= cache[lastpc[(CW-1):0]];
// Here we read both cache entries, at i_pc and lastpc.
// We'll select from among these cache possibilities on the
// next clock
r_pc_cache <= cache[i_pc[(CW+1):2]];
r_last_cache <= cache[lastpc[(CW+1):2]];
//
// Let's also register(delay) the r_pc and r_lastpc values
// for the next clock, so we can accurately report the address
// of the cache value we just looked up.
r_pc <= i_pc;
r_lastpc <= lastpc;
end
assign o_pc = (isrc) ? r_pc : r_lastpc;
assign o_i = (isrc) ? r_pc_cache : r_last_cache;
 
reg tagsrc;
always @(posedge i_clk)
// It may be possible to recover a clock once the cache line
// has been filled, but our prior attempt to do so has lead
// to a race condition, so we keep this logic simple.
if (((r_v)&&(i_stall_n))||(i_clear_cache)||(i_new_pc))
tagsrc <= 1'b1;
else
tagsrc <= 1'b0;
// On our next clock, our result with either be the registered i_pc
// value from the last clock (if isrc), otherwise r_lastpc
assign o_pc = (isrc) ? r_pc : r_lastpc;
// The same applies for determining what the next output instruction
// will be. We just read it in the last clock, now we just need to
// select between the two possibilities we just read.
assign o_insn= (isrc) ? r_pc_cache : r_last_cache;
 
 
/////////////////////////////////////////////////
//
// Read the tag value associated with this tcache line
//
/////////////////////////////////////////////////
//
//
 
//
// Read the tag value associated with this i_pc value
initial tagvalipc = 0;
always @(posedge i_clk)
tagvalipc <= tags[i_pc[(CW-1):PW]];
tagvalipc <= cache_tags[i_pc[(CW+1):LS+2]];
 
 
//
// Read the tag value associated with the lastpc value, from what
// i_pc was when we could not tell if this value was in our cache or
// not, or perhaps from when we determined that i was not in the cache.
initial tagvallst = 0;
always @(posedge i_clk)
tagvallst <= tags[lastpc[(CW-1):PW]];
assign tagval = (tagsrc)?tagvalipc : tagvallst;
tagvallst <= cache_tags[lastpc[(CW+1):LS+2]];
 
// Select from between these two values on the next clock
assign tagval = (isrc)?tagvalipc : tagvallst;
 
// i_pc will only increment when everything else isn't stalled, thus
// we can set it without worrying about that. Doing this enables
// us to work in spite of stalls. For example, if the next address
139,170 → 255,724
// anyway.
initial lastpc = 0;
always @(posedge i_clk)
if (((r_v)&&(i_stall_n))||(i_clear_cache)||(i_new_pc))
lastpc <= i_pc;
if (w_advance)
lastpc <= i_pc;
 
assign lasttag = lastpc[(AW-1):PW];
assign lasttag = lastpc[(AW+1):LS+2];
 
wire w_v_from_pc, w_v_from_last;
assign w_v_from_pc = ((i_pc[(AW-1):PW] == lasttag)
&&(tagvalipc == i_pc[(AW-1):CW])
&&(vmask[i_pc[(CW-1):PW]]));
assign w_v_from_last = (
//(lastpc[(AW-1):PW] == lasttag)&&
(tagval == lastpc[(AW-1):CW])
&&(vmask[lastpc[(CW-1):PW]]));
/////////////////////////////////////////////////
//
// Use the tag value to determine if our output instruction will be
// valid.
//
/////////////////////////////////////////////////
//
//
assign w_v_from_pc = ((i_pc[(AW+1):LS+2] == lasttag)
&&(tagval == i_pc[(AW+1):CW+2])
&&(valid_mask[i_pc[(CW+1):LS+2]]));
assign w_v_from_last = ((tagval == lastpc[(AW+1):CW+2])
&&(valid_mask[lastpc[(CW+1):LS+2]]));
 
reg [1:0] delay;
 
initial delay = 2'h3;
reg rvsrc;
always @(posedge i_clk)
if ((i_rst)||(i_clear_cache)||(i_new_pc)||((r_v)&&(i_stall_n)))
begin
// r_v <= r_v_from_pc;
rvsrc <= 1'b1;
if ((i_reset)||(i_clear_cache)||(w_advance))
begin
// Source our valid signal from i_pc
rvsrc <= 1'b1;
// Delay at least two clocks before declaring that
// we have an invalid result. This will give us time
// to check the tag value of what's in the cache.
delay <= 2'h2;
end else if ((!r_v)&&(!o_illegal)) begin
// If we aren't sourcing our valid signal from the
// i_pc clock, then we are sourcing it from the
// lastpc clock (one clock later). If r_v still
// isn't valid, we may need to make a bus request.
// Apply our timer and timeout.
rvsrc <= 1'b0;
 
// Delay is two once the bus starts, in case the
// bus transaction needs to be restarted upon completion
// This might happen if, after we start loading the
// cache, we discover a branch. The cache load will
// still complete, but the branches address needs to be
// the onen we jump to. This may mean we need to load
// the cache twice.
if (o_wb_cyc)
delay <= 2'h2;
end else if (~r_v) begin // Otherwise, r_v was true and we were
// stalled, hence only if ~r_v
rvsrc <= 1'b0;
if (o_wb_cyc)
delay <= 2'h2;
else if (delay != 0)
delay <= delay + 2'b11; // i.e. delay -= 1;
end
reg r_v_from_pc, r_v_from_last;
else if (delay != 0)
delay <= delay + 2'b11; // i.e. delay -= 1;
end else begin
// After sourcing our output from i_pc, if it wasn't
// accepted, source the instruction from the lastpc valid
// determination instead
rvsrc <= 1'b0;
if (o_illegal)
delay <= 2'h2;
end
 
wire w_invalidate_result;
assign w_invalidate_result = (i_reset)||(i_clear_cache);
 
reg r_prior_illegal;
initial r_prior_illegal = 0;
initial r_new_request = 0;
initial r_v_from_pc = 0;
initial r_v_from_last = 0;
always @(posedge i_clk)
r_v_from_pc <= w_v_from_pc;
begin
r_new_request <= w_invalidate_result;
r_v_from_pc <= (w_v_from_pc)&&(!w_invalidate_result)
&&(!o_illegal);
r_v_from_last <= (w_v_from_last)&&(!w_invalidate_result);
 
r_prior_illegal <= (o_wb_cyc)&&(i_wb_err);
end
 
// Now use rvsrc to determine which of the two valid flags we'll be
// using: r_v_from_pc (the i_pc address), or r_v_from_last (the lastpc
// address)
assign r_v = ((rvsrc)?(r_v_from_pc):(r_v_from_last))&&(!r_new_request);
assign o_valid = (((rvsrc)?(r_v_from_pc):(r_v_from_last))
||(o_illegal))
&&(!i_new_pc)&&(!r_prior_illegal);
 
/////////////////////////////////////////////////
//
// If the instruction isn't in our cache, then we need to load
// a new cache line from memory.
//
/////////////////////////////////////////////////
//
//
initial needload = 1'b0;
always @(posedge i_clk)
r_v_from_last <= w_v_from_last;
if ((i_clear_cache)||(o_wb_cyc))
needload <= 1'b0;
else if ((w_advance)&&(!o_illegal))
needload <= 1'b0;
else
needload <= (delay==0)&&(!w_v_from_last)
// Prevent us from reloading an illegal address
// (i.e. one that produced a bus error) over and over
// and over again
&&((!illegal_valid)
||(lastpc[(AW+1):LS+2] != illegal_cache));
 
assign r_v = ((rvsrc)?(r_v_from_pc):(r_v_from_last));
assign o_v = (((rvsrc)?(r_v_from_pc):(r_v_from_last))
||((o_illegal)&&(~o_wb_cyc)))
&&(~i_new_pc)&&(~i_rst);
//
// Working from the rule that you want to keep complex logic out of
// a state machine if possible, we calculate a "last_stb" value one
// clock ahead of time. Hence, any time a request is accepted, if
// last_stb is also true we'll know we need to drop the strobe line,
// having finished requesting a complete cache line.
initial last_addr = 1'b0;
always @(posedge i_clk)
if (!o_wb_cyc)
last_addr <= 1'b0;
else if ((o_wb_addr[(LS-1):1] == {(LS-1){1'b1}})
&&((!i_wb_stall)|(o_wb_addr[0])))
last_addr <= 1'b1;
 
reg last_ack;
//
// "last_ack" is almost identical to last_addr, save that this
// will be true on the same clock as the last acknowledgment from the
// bus. The state machine logic will use this to determine when to
// get off the bus and end the wishbone bus cycle.
initial last_ack = 1'b0;
always @(posedge i_clk)
last_ack <= (o_wb_cyc)&&(
(rdaddr[(PW-1):1]=={(PW-1){1'b1}})
&&((rdaddr[0])||(i_wb_ack)));
(wraddr[(LS-1):1]=={(LS-1){1'b1}})
&&((wraddr[0])||(i_wb_ack)));
 
reg needload;
initial needload = 1'b0;
initial bus_abort = 1'b0;
always @(posedge i_clk)
needload <= ((~r_v)&&(delay==0)
&&((tagvallst != lastpc[(AW-1):CW])
||(~vmask[lastpc[(CW-1):PW]]))
&&((~illegal_valid)
||(lastpc[(AW-1):PW] != illegal_cache)));
if (!o_wb_cyc)
bus_abort <= 1'b0;
else if ((i_clear_cache)||(i_new_pc))
bus_abort <= 1'b1;
 
reg last_addr;
initial last_addr = 1'b0;
always @(posedge i_clk)
last_addr <= (o_wb_cyc)&&(o_wb_addr[(PW-1):1] == {(PW-1){1'b1}})
&&((~i_wb_stall)|(o_wb_addr[0]));
 
//
// Here's the difficult piece of state machine logic--the part that
// determines o_wb_cyc and o_wb_stb. We've already moved most of the
// complicated logic off of this statemachine, calculating it one cycle
// early. As a result, this is a fairly easy piece of logic.
initial o_wb_cyc = 1'b0;
initial o_wb_stb = 1'b0;
initial o_wb_addr = {(AW){1'b0}};
initial rdaddr = 0;
always @(posedge i_clk)
if ((i_rst)||(i_clear_cache))
begin
o_wb_cyc <= 1'b0;
if ((i_reset)||(i_clear_cache))
begin
o_wb_cyc <= 1'b0;
o_wb_stb <= 1'b0;
end else if (o_wb_cyc)
begin
if (i_wb_err)
o_wb_stb <= 1'b0;
end else if (o_wb_cyc)
begin
if (i_wb_err)
o_wb_stb <= 1'b0;
else if ((o_wb_stb)&&(~i_wb_stall)&&(last_addr))
o_wb_stb <= 1'b0;
else if ((o_wb_stb)&&(!i_wb_stall)&&(last_addr))
o_wb_stb <= 1'b0;
 
if (((i_wb_ack)&&(last_ack))||(i_wb_err))
o_wb_cyc <= 1'b0;
if (((i_wb_ack)&&(last_ack))||(i_wb_err))
o_wb_cyc <= 1'b0;
 
// else if (rdaddr[(PW-1):1] == {(PW-1){1'b1}})
// tags[lastpc[(CW-1):PW]] <= lastpc[(AW-1):CW];
end else if ((needload)&&(!i_new_pc))
begin
o_wb_cyc <= 1'b1;
o_wb_stb <= 1'b1;
end
 
end else if (needload)
begin
o_wb_cyc <= 1'b1;
o_wb_stb <= 1'b1;
end
// If we are reading from this cache line, then once we get the first
// acknowledgement, this cache line has the new tag value
always @(posedge i_clk)
if ((o_wb_cyc)&&(i_wb_ack))
cache_tags[o_wb_addr[(CW-1):LS]] <= o_wb_addr[(AW-1):CW];
 
 
// On each acknowledgment, increment the address we use to write into
// our cache. Hence, this is the write address into our cache block
// RAM.
initial wraddr = 0;
always @(posedge i_clk)
if (o_wb_cyc) // &&(i_wb_ack)
tags[o_wb_addr[(CW-1):PW]] <= o_wb_addr[(AW-1):CW];
if ((o_wb_cyc)&&(i_wb_ack)&&(!last_ack))
wraddr <= wraddr + 1'b1;
else if (!o_wb_cyc)
wraddr <= { lastpc[(CW+1):LS+2], {(LS){1'b0}} };
 
//
// The wishbone request address. This has meaning anytime o_wb_stb
// is active, and needs to be incremented any time an address is
// accepted--WITH THE EXCEPTION OF THE LAST ADDRESS. We need to keep
// this steady for that last address, unless the last address returns
// a bus error. In that case, the whole cache line will be marked as
// invalid--but we'll need the value of this register to know how
// to do that propertly.
initial o_wb_addr = {(AW){1'b0}};
always @(posedge i_clk)
if ((o_wb_cyc)&&(i_wb_ack))
rdaddr <= rdaddr + 1;
else if (~o_wb_cyc)
rdaddr <= { lastpc[(CW-1):PW], {(PW){1'b0}} };
always @(posedge i_clk)
if ((o_wb_stb)&&(~i_wb_stall)&&(~last_addr))
o_wb_addr[(PW-1):0] <= o_wb_addr[(PW-1):0]+1;
else if (~o_wb_cyc)
o_wb_addr <= { lastpc[(AW-1):PW], {(PW){1'b0}} };
if ((o_wb_stb)&&(!i_wb_stall)&&(!last_addr))
o_wb_addr[(LS-1):0] <= o_wb_addr[(LS-1):0]+1'b1;
else if (!o_wb_cyc)
o_wb_addr <= { lastpc[(AW+1):LS+2], {(LS){1'b0}} };
 
// Can't initialize an array, so leave cache uninitialized
// We'll also never get an ack without sys being active, so skip
// that check. Or rather, let's just use o_wb_cyc instead. This
// will work because multiple writes to the same address, ending with
// a valid write, aren't a problem.
// Since it is impossible to initialize an array, our cache will start
// up cache uninitialized. We'll also never get a valid ack without
// cyc being active, although we might get one on the clock after
// cyc was active--so we need to test and gate on whether o_wb_cyc
// is true.
//
// wraddr will advance forward on every clock cycle where ack is true,
// hence we don't need to check i_wb_ack here. This will work because
// multiple writes to the same address, ending with a valid write,
// will always yield the valid write's value only after our bus cycle
// is over.
always @(posedge i_clk)
if (o_wb_cyc) // &&(i_wb_ack)
cache[rdaddr] <= i_wb_data;
if (o_wb_cyc)
cache[wraddr] <= i_wb_data;
 
// VMask ... is a section loaded?
// Note "svmask". It's purpose is to delay the vmask setting by one
// clock, so that we can insure the right value of the cache is loaded
// before declaring that the cache line is valid. Without this, the
// cache line would get read, and the instruction would read from the
// last cache line.
reg svmask;
initial vmask = 0;
// Note "svmask". It's purpose is to delay the valid_mask setting by
// one clock, so that we can insure the right value of the cache is
// loaded before declaring that the cache line is valid. Without
// this, the cache line would get read, and the instruction would
// read from the last cache line.
initial valid_mask = 0;
initial svmask = 1'b0;
reg [(LGLINES-1):0] saddr;
always @(posedge i_clk)
if ((i_rst)||(i_clear_cache))
if ((i_reset)||(i_clear_cache))
begin
valid_mask <= 0;
svmask<= 1'b0;
end else begin
svmask <= ((o_wb_cyc)&&(i_wb_ack)&&(last_ack)&&(!bus_abort));
 
if (svmask)
valid_mask[saddr] <= (!bus_abort);
if ((!o_wb_cyc)&&(needload))
valid_mask[lastpc[(CW+1):LS+2]] <= 1'b0;
`ifdef NOT_YET_READY
//
// MMU code
//
if (mmu_inval)
valid_mask[mmu_mskadr] <= 1'b0;
`endif
end
 
always @(posedge i_clk)
if ((o_wb_cyc)&&(i_wb_ack))
saddr <= wraddr[(CW-1):LS];
// MMU code
//
//
`ifdef NOT_YET_READY
parameter [0:0] USE_MMU = 1'b1;
generate if (USE_MMU)
begin
reg [(PAW-CW-1):0] ptag [0:((1<<(LGLINES))-1)];
reg mmu_pre_inval, r_mmu_inval;
reg [(PAW-CW-1):0] mmu_pre_tag, mmu_pre_padr;
reg [(CW-LS-1):0] r_mmu_mskadr;
 
initial mmu_pre_inval = 0;
initial mmu_pre_tag = 0;
initial mmu_pre_padr = 0;
initial mmu_pre2_inval = 0;
initial mmu_pre2_mskadr = 0;
 
always @(posedge i_clk)
if ((o_wb_cyc)&&(!last_addr)&&(i_mmu_ack))
ptag[i_mmu_paddr[(CW-1):LS]] <= i_mmu_paddr[(PAW-1):CW];
 
always @(posedge i_clk)
if (i_reset)
begin
vmask <= 0;
svmask<= 1'b0;
mmu_pre_inval <= 0;
r_mmu_inval <= 0;
end else begin
mmu_pre_inval <= (i_mmu_ack)&&(i_mmu_we);
r_mmu_inval <= (mmu_pre_inval)&&(mmu_pre_inval)
&&(mmu_pre_tag == mmu_pre_paddr);
end
else begin
svmask <= ((o_wb_cyc)&&(i_wb_ack)&&(last_ack));
if (svmask)
vmask[saddr] <= 1'b1;
if ((~o_wb_cyc)&&(needload))
vmask[lastpc[(CW-1):PW]] <= 1'b0;
 
always @(posedge i_clk)
mmu_pre_tag <= ptag[i_mmu_paddr[(CW-1):LS]];
 
always @(posedge i_clk)
begin
mmu_pre_padr <= i_mmu_paddr[(PAW-1):CW];
r_mmu_mskadr <= mmu_pre_padr[(PAW-LS-1):(CW-LS)];
end
always @(posedge i_clk)
if ((o_wb_cyc)&&(i_wb_ack))
saddr <= rdaddr[(CW-1):PW];
 
assign mmu_inval = r_mmu_inval;
assign mmu_mskadr = r_mmu_mskadr;
end else begin
assign mmu_inval = 0;
assign mmu_mskadr = 0;
end endgenerate
`endif
 
/////////////////////////////////////////////////
//
// Handle bus errors here. If a bus read request
// returns an error, then we'll mark the entire
// line as having a (valid) illegal value.
//
/////////////////////////////////////////////////
//
//
//
//
initial illegal_cache = 0;
initial illegal_valid = 0;
always @(posedge i_clk)
if ((i_rst)||(i_clear_cache))
if ((i_reset)||(i_clear_cache))
begin
illegal_cache <= 0;
illegal_valid <= 0;
end else if ((o_wb_cyc)&&(i_wb_err))
begin
illegal_cache <= o_wb_addr[(AW-1):LS];
illegal_valid <= 1'b1;
end
 
initial o_illegal = 1'b0;
always @(posedge i_clk)
if ((i_reset)||(i_clear_cache)||(i_new_pc))
o_illegal <= 1'b0;
else if ((o_illegal)||((o_valid)&&(i_stall_n)))
o_illegal <= 1'b0;
else
o_illegal <= (illegal_valid)
&&(illegal_cache == lastpc[(AW+1):LS+2]);
 
`ifdef FORMAL
//
//
// Generic setup
//
//
`ifdef PFCACHE
`define ASSUME assume
`else
`define ASSUME assert
`define STEP_CLOCK
`endif
 
// Keep track of a flag telling us whether or not $past()
// will return valid results
reg f_past_valid;
initial f_past_valid = 1'b0;
always @(posedge i_clk)
f_past_valid = 1'b1;
always @(*)
if (!f_past_valid)
`ASSUME(i_reset);
 
/////////////////////////////////////////////////
//
//
// Assumptions about our inputs
//
//
/////////////////////////////////////////////////
 
 
`ifdef PFCACHE
//
// Assume that resets, new-pc commands, and clear-cache commands
// are never more than pulses--one clock wide at most.
//
// It may be that the CPU treats us differently. We'll only assume
// our solver to this here.
always @(posedge i_clk)
if (!f_past_valid)
begin
if ($past(i_reset))
assume(!i_reset);
if ($past(i_new_pc))
assume(!i_new_pc);
if ($past(i_clear_cache))
assume(!i_clear_cache);
end
`endif
 
//
// Assume we start from a reset condition
initial `ASSUME(i_reset);
 
// Assume that any reset is either accompanied by a new address,
// or a new address immediately follows it.
always @(posedge i_clk)
if ((f_past_valid)&&($past(i_reset)))
`ASSUME(i_new_pc);
//
// Let's make some assumptions about how long it takes our
// phantom bus and phantom CPU to respond.
//
// These delays need to be long enough to flush out any potential
// errors, yet still short enough that the formal method doesn't
// take forever to solve.
//
localparam F_CPU_DELAY = 4;
reg [4:0] f_cpu_delay;
 
// Now, let's repeat this bit but now looking at the delay the CPU
// takes to accept an instruction.
always @(posedge i_clk)
// If no instruction is ready, then keep our counter at zero
if ((!o_valid)||(i_stall_n))
f_cpu_delay <= 0;
else
// Otherwise, count the clocks the CPU takes to respond
f_cpu_delay <= f_cpu_delay + 1'b1;
 
`ifdef PFCACHE
always @(posedge i_clk)
assume(f_cpu_delay < F_CPU_DELAY);
`endif
 
always @(posedge i_clk)
if ($past(i_reset || i_clear_cache))
assume(i_stall_n);
else if ($past(i_stall_n && !o_valid))
assume(i_stall_n);
else if (i_new_pc)
assume(i_stall_n);
 
/////////////////////////////////////////////////
//
//
// Assertions about our outputs
//
//
/////////////////////////////////////////////////
 
localparam F_LGDEPTH=LS+1;
wire [(F_LGDEPTH-1):0] f_nreqs, f_nacks, f_outstanding;
 
fwb_master #(.AW(AW), .DW(BUSW), .F_LGDEPTH(F_LGDEPTH),
.F_MAX_STALL(2), .F_MAX_ACK_DELAY(3),
.F_MAX_REQUESTS(1<<LS), .F_OPT_SOURCE(1),
.F_OPT_RMW_BUS_OPTION(0),
.F_OPT_DISCONTINUOUS(0))
f_wbm(i_clk, i_reset,
o_wb_cyc, o_wb_stb, o_wb_we, o_wb_addr, o_wb_data, 4'h0,
i_wb_ack, i_wb_stall, i_wb_data, i_wb_err,
f_nreqs, f_nacks, f_outstanding);
 
// writes are also illegal for a prefetch.
always @(posedge i_clk)
if (o_wb_stb)
assert(!o_wb_we);
 
always @(posedge i_clk)
begin
assert(f_nreqs <= (1<<LS));
if ((o_wb_cyc)&&(o_wb_stb))
assert(f_nreqs == o_wb_addr[(LS-1):0]);
if ((f_past_valid)&&($past(o_wb_cyc))
&&(!o_wb_stb)&&(!$past(i_wb_err || i_reset || i_clear_cache)))
assert(f_nreqs == (1<<LS));
end
 
always @(posedge i_clk)
if (f_past_valid)
begin
if ((!o_wb_cyc)&&($past(o_wb_cyc))&&(!$past(i_reset))
&&(!$past(i_clear_cache)) &&(!$past(i_wb_err)))
assert(f_nacks == (1<<LS));
else if (o_wb_cyc)
assert(f_nacks[(LS-1):0] == wraddr[(LS-1):0]);
end
 
// The last-ack line
always @(posedge i_clk)
if (o_wb_cyc)
assert(last_ack == (f_nacks == ((1<<LS)-1)));
 
// The valid line for whats being read
always @(posedge i_clk)
if (o_wb_cyc)
assert(!valid_mask[o_wb_addr[CW-1:LS]]);
 
always @(posedge i_clk)
if ((illegal_valid)&&(o_wb_cyc))
assert(o_wb_addr[AW-1:LS] != illegal_cache);
 
reg [((1<<(LGLINES))-1):0] f_past_valid_mask;
initial f_past_valid_mask = 0;
always @(posedge i_clk)
f_past_valid_mask = valid_mask;
 
always @(posedge i_clk)
if ((o_valid)&&($past(!o_valid || !o_illegal)))
assert((!o_wb_cyc)
||(o_wb_addr[AW-1:LS] != o_pc[AW+1:LS+2]));
always @(posedge i_clk)
if (illegal_valid)
begin
assert((!o_wb_cyc)
||(o_wb_addr[AW-1:LS] != illegal_cache));
 
// The illegal cache line should never be valid within our
// cache
assert((!valid_mask[illegal_cache[CW-1:LS]])
||(cache_tags[illegal_cache[CW-1:LS]]
!= illegal_cache[AW-1:CW]));
end
 
/////////////////////////////////////////////////////
//
//
// Assertions about our return responses to the CPU
//
//
/////////////////////////////////////////////////////
 
 
always @(posedge i_clk)
if ((f_past_valid)&&($past(o_wb_cyc)))
assert(o_wb_addr[(AW-1):LS] == $past(o_wb_addr[(AW-1):LS]));
 
// Consider it invalid to present the CPU with the same instruction
// twice in a row.
always @(posedge i_clk)
if ((f_past_valid)&&($past(o_valid))&&($past(i_stall_n))&&(o_valid))
assert(o_pc != $past(o_pc));
 
always @(posedge i_clk)
if (o_valid)
begin
if (!o_illegal)
begin
illegal_cache <= 0;
illegal_valid <= 0;
end else if ((o_wb_cyc)&&(i_wb_err))
begin
illegal_cache <= o_wb_addr[(AW-1):PW];
illegal_valid <= 1'b1;
assert(cache_tags[o_pc[(CW+1):LS+2]] == o_pc[(AW+1):CW+2]);
assert(valid_mask[o_pc[(CW+1):LS+2]] || (o_illegal));
assert(o_insn == cache[o_pc[(CW+1):2]]);
assert((!illegal_valid)
||((illegal_cache != o_pc[(AW+1):LS+2])));
end
 
initial o_illegal = 1'b0;
assert(o_illegal == ($past(illegal_valid)
&&($past(illegal_cache)== o_pc[(AW+1):LS+2])));
end
 
always @(*)
begin
`ASSUME(i_pc[1:0] == 2'b00);
assert(o_pc[1:0] == 2'b00);
assert(r_pc[1:0] == 2'b00);
assert(r_lastpc[1:0] == 2'b00);
end
 
reg [(AW+1):0] f_next_pc;
 
always @(posedge i_clk)
if ((i_rst)||(i_clear_cache)||(o_wb_cyc))
o_illegal <= 1'b0;
if ((f_past_valid)&&(!$past(i_reset)))
begin
if (isrc)
assert(lastpc == r_pc);
else
o_illegal <= (illegal_valid)
&&(illegal_cache == i_pc[(AW-1):PW]);
assert(lastpc + 4== r_pc);
end
 
always @(posedge i_clk)
if (i_new_pc)
f_next_pc <= { i_pc[AW+1:2] + 1'b1, 2'b00 };
else if ((i_stall_n)&&(r_v))
f_next_pc <= { i_pc[AW+1:2] + 1'b1, 2'b00 };
always @(*)
if (!i_new_pc)
`ASSUME(i_pc == f_next_pc);
 
always @(posedge i_clk)
if ((f_past_valid)&&(o_valid)&&($past(o_valid))
&&(!$past(i_reset))
&&(!$past(i_new_pc))
&&(!$past(i_stall_n))
&&(!o_illegal))
begin
assert(cache_tags[o_pc[(CW+1):LS+2]] == o_pc[(AW+1):CW+2]);
end
 
//
// If an instruction is accepted, we should *always* move on to another
// instruction. The only exception is following an i_new_pc (or
// other invalidator), at which point the next instruction should
// be invalid.
always @(posedge i_clk)
if ((f_past_valid)&&($past(o_valid))&&($past(i_stall_n)))
begin
// Should always advance the instruction
assert((!o_valid)||(o_pc != $past(o_pc)));
end
 
//
// Once an instruction becomes valid, it should never become invalid
// unless there's been a request for a new instruction.
always @(posedge i_clk)
if ((f_past_valid)&&($past(!i_reset && !i_clear_cache && !i_new_pc))
&&($past(o_valid && !i_stall_n))
&&(!i_new_pc))
begin
if ((!$past(o_illegal))&&(!$past(o_wb_cyc && i_wb_err)))
begin
assert(o_valid);
assert($stable(o_illegal));
assert($stable(o_insn));
end else
assert((o_illegal)||(!o_valid));
end
`ifdef PFCACHE
/////////////////////////////////////////////////////
//
//
// Assertions associated with a response to a known
// address request
//
//
/////////////////////////////////////////////////////
 
 
(* anyconst *) reg [AW:0] f_const_addr;
(* anyconst *) reg [BUSW-1:0] f_const_insn;
 
wire f_this_pc, f_this_insn, f_this_data, f_this_line,
f_this_ack, f_this_tag; // f_this_addr;
assign f_this_pc = (o_pc == { f_const_addr[AW-1:0], 2'b00 });
// assign f_this_addr = (o_wb_addr == f_const_addr[AW-1:0] );
assign f_this_insn = (o_insn == f_const_insn);
assign f_this_data = (i_wb_data == f_const_insn);
assign f_this_line = (o_wb_addr[AW-1:LS] == f_const_addr[AW-1:LS]);
assign f_this_ack = (f_this_line)&&(f_nacks == f_const_addr[LS-1:0]);
assign f_this_tag = (tagval == f_const_addr[AW-1:CW]);
 
always @(posedge i_clk)
if ((o_valid)&&(f_this_pc)&&(!$past(o_illegal)))
begin
assert(o_illegal == f_const_addr[AW]);
if (!o_illegal)
begin
assert(f_this_insn);
assert(f_this_tag);
end
end
 
always @(*)
if ((valid_mask[f_const_addr[CW-1:LS]])
&&(cache_tags[f_const_addr[(CW-1):LS]]==f_const_addr[AW-1:CW]))
assert(f_const_insn == cache[f_const_addr[CW-1:0]]);
else if ((o_wb_cyc)&&(o_wb_addr[AW-1:LS] == f_const_addr[AW-1:LS])
&&(f_nacks > f_const_addr[LS-1:0]))
begin
assert(f_const_insn == cache[f_const_addr[CW-1:0]]);
end
 
always @(*)
if (o_wb_cyc)
assert(wraddr[CW-1:LS] == o_wb_addr[CW-1:LS]);
 
always @(*)
if (!f_const_addr[AW])
assert((!illegal_valid)
||(illegal_cache != f_const_addr[AW-1:LS]));
else
assert((cache_tags[f_const_addr[CW-1:LS]]!=f_const_addr[AW-1:CW])
||(!valid_mask[f_const_addr[CW-1:LS]]));
 
always @(*)
if ((f_this_line)&&(o_wb_cyc))
begin
if (f_const_addr[AW])
assume(!i_wb_ack);
else
assume(!i_wb_err);
 
if ((f_this_ack)&&(i_wb_ack))
assume(f_this_data);
end
 
always @(*)
if ((f_this_line)&&(!f_const_addr[AW]))
assume(!i_wb_err);
 
always @(*)
if (!f_const_addr[AW])
assume((!valid_mask[f_const_addr[CW-1:LS]])
||(cache_tags[f_const_addr[CW-1:LS]] != f_const_addr[AW-1:CW]));
`endif
 
//
//
// Cover properties
//
//
reg f_valid_legal;
always @(*)
f_valid_legal = o_valid && (!o_illegal);
always @(posedge i_clk) // Trace 0
cover((o_valid)&&( o_illegal));
always @(posedge i_clk) // Trace 1
cover(f_valid_legal);
always @(posedge i_clk) // Trace 2
cover((f_valid_legal)
&&($past(!o_valid && !i_new_pc))
&&($past(i_new_pc,2)));
always @(posedge i_clk) // Trace 3
cover((f_valid_legal)&&($past(i_stall_n))&&($past(i_new_pc)));
always @(posedge i_clk) // Trace 4
cover((f_valid_legal)&&($past(f_valid_legal && i_stall_n)));
always @(posedge i_clk) // Trace 5
cover((f_valid_legal)
&&($past(f_valid_legal && i_stall_n))
&&($past(f_valid_legal && i_stall_n,2))
&&($past(f_valid_legal && i_stall_n,3)));
always @(posedge i_clk) // Trace 6
cover((f_valid_legal)
&&($past(f_valid_legal && i_stall_n))
&&($past(f_valid_legal && i_stall_n,2))
&&($past(!o_illegal && i_stall_n && i_new_pc,3))
&&($past(f_valid_legal && i_stall_n,4))
&&($past(f_valid_legal && i_stall_n,5))
&&($past(f_valid_legal && i_stall_n,6)));
 
`endif // FORMAL
endmodule
/pipefetch.v
30,7 → 30,7
//
////////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2015,2017, Gisselquist Technology, LLC
// Copyright (C) 2015,2017,2019 Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
54,6 → 54,8
////////////////////////////////////////////////////////////////////////////////
//
//
`default_nettype none
//
module pipefetch(i_clk, i_rst, i_new_pc, i_clear_cache, i_stall_n, i_pc,
o_i, o_pc, o_v,
o_wb_cyc, o_wb_stb, o_wb_we, o_wb_addr, o_wb_data,
62,9 → 64,9
parameter RESET_ADDRESS=32'h0010_0000,
LGCACHELEN = 6, ADDRESS_WIDTH=24,
CACHELEN=(1<<LGCACHELEN), BUSW=32, AW=ADDRESS_WIDTH;
input i_clk, i_rst, i_new_pc,
input wire i_clk, i_rst, i_new_pc,
i_clear_cache, i_stall_n;
input [(AW-1):0] i_pc;
input wire [(AW-1):0] i_pc;
output reg [(BUSW-1):0] o_i;
output reg [(AW-1):0] o_pc;
output wire o_v;
74,11 → 76,11
output reg [(AW-1):0] o_wb_addr;
output wire [(BUSW-1):0] o_wb_data;
//
input i_wb_ack, i_wb_stall, i_wb_err;
input [(BUSW-1):0] i_wb_data;
input wire i_wb_ack, i_wb_stall, i_wb_err;
input wire [(BUSW-1):0] i_wb_data;
//
// Is the (data) memory unit also requesting access to the bus?
input i_wb_request;
input wire i_wb_request;
output wire o_illegal;
 
// Fixed bus outputs: we read from the bus only, never write.
120,7 → 122,7
+(3<<(LGCACHELEN-2)))
&&(|r_nvalid[(LGCACHELEN):(LGCACHELEN-1)]);
 
initial r_cache_base = RESET_ADDRESS;
initial r_cache_base = RESET_ADDRESS[(AW+1):2];
always @(posedge i_clk)
begin
if ((i_rst)||(i_clear_cache)||((o_wb_cyc)&&(i_wb_err)))
301,5 → 303,4
 
assign o_illegal = (o_pc == ill_address)&&(~i_rst)&&(~i_new_pc)&&(~i_clear_cache);
 
 
endmodule
/pipemem.v
17,7 → 17,7
//
////////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2015-2017, Gisselquist Technology, LLC
// Copyright (C) 2015-2019, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
41,22 → 41,35
////////////////////////////////////////////////////////////////////////////////
//
//
module pipemem(i_clk, i_rst, i_pipe_stb, i_lock,
`default_nettype none
//
module pipemem(i_clk, i_reset, i_pipe_stb, i_lock,
i_op, i_addr, i_data, i_oreg,
o_busy, o_pipe_stalled, o_valid, o_err, o_wreg, o_result,
o_wb_cyc_gbl, o_wb_cyc_lcl,
o_wb_stb_gbl, o_wb_stb_lcl,
o_wb_we, o_wb_addr, o_wb_data, o_wb_sel,
i_wb_ack, i_wb_stall, i_wb_err, i_wb_data);
parameter ADDRESS_WIDTH=30, IMPLEMENT_LOCK=0;
localparam AW=ADDRESS_WIDTH;
input i_clk, i_rst;
input i_pipe_stb, i_lock;
i_wb_ack, i_wb_stall, i_wb_err, i_wb_data
`ifdef FORMAL
, f_nreqs, f_nacks, f_outstanding, f_pc
`endif
);
parameter ADDRESS_WIDTH=30;
parameter [0:0] IMPLEMENT_LOCK=1'b1,
WITH_LOCAL_BUS=1'b1,
OPT_ZERO_ON_IDLE=1'b0,
// OPT_ALIGNMENT_ERR
OPT_ALIGNMENT_ERR=1'b0;
localparam AW=ADDRESS_WIDTH,
FLN=4;
parameter [(FLN-1):0] OPT_MAXDEPTH=4'hd;
input wire i_clk, i_reset;
input wire i_pipe_stb, i_lock;
// CPU interface
input [2:0] i_op;
input [31:0] i_addr;
input [31:0] i_data;
input [4:0] i_oreg;
input wire [2:0] i_op;
input wire [31:0] i_addr;
input wire [31:0] i_data;
input wire [4:0] i_oreg;
// CPU outputs
output wire o_busy;
output wire o_pipe_stalled;
73,134 → 86,201
output reg [31:0] o_wb_data;
output reg [3:0] o_wb_sel;
// Wishbone inputs
input i_wb_ack, i_wb_stall, i_wb_err;
input [31:0] i_wb_data;
input wire i_wb_ack, i_wb_stall, i_wb_err;
input wire [31:0] i_wb_data;
// Formal
parameter F_LGDEPTH=5;
`ifdef FORMAL
output wire [(F_LGDEPTH-1):0] f_nreqs, f_nacks, f_outstanding;
output reg f_pc;
`endif
 
reg cyc;
reg r_wb_cyc_gbl, r_wb_cyc_lcl;
reg [3:0] rdaddr, wraddr;
wire [3:0] nxt_rdaddr;
reg [(4+5-1):0] fifo_oreg [0:15];
 
reg cyc;
reg r_wb_cyc_gbl, r_wb_cyc_lcl, fifo_full;
reg [(FLN-1):0] rdaddr, wraddr;
wire [(FLN-1):0] nxt_rdaddr, fifo_fill;
reg [(3+5-1):0] fifo_oreg [0:15];
reg fifo_gie;
initial rdaddr = 0;
initial wraddr = 0;
 
reg misaligned;
 
always @(*)
if (OPT_ALIGNMENT_ERR)
begin
casez({ i_op[2:1], i_addr[1:0] })
4'b01?1: misaligned = i_pipe_stb;
4'b0110: misaligned = i_pipe_stb;
4'b10?1: misaligned = i_pipe_stb;
default: misaligned = i_pipe_stb;
endcase
end else
misaligned = 1'b0;
 
always @(posedge i_clk)
fifo_oreg[wraddr] <= { i_oreg, i_op[2:1], i_addr[1:0] };
fifo_oreg[wraddr] <= { i_oreg[3:0], i_op[2:1], i_addr[1:0] };
 
always @(posedge i_clk)
if ((i_rst)||(i_wb_err))
if (i_pipe_stb)
fifo_gie <= i_oreg[4];
 
initial wraddr = 0;
always @(posedge i_clk)
if (i_reset)
wraddr <= 0;
else if (((i_wb_err)&&(cyc))||((i_pipe_stb)&&(misaligned)))
wraddr <= 0;
else if (i_pipe_stb)
wraddr <= wraddr + 1'b1;
else if (i_pipe_stb)
wraddr <= wraddr + 1'b1;
 
initial rdaddr = 0;
always @(posedge i_clk)
if ((i_rst)||(i_wb_err))
rdaddr <= 0;
else if ((i_wb_ack)&&(cyc))
rdaddr <= rdaddr + 1'b1;
if (i_reset)
rdaddr <= 0;
else if (((i_wb_err)&&(cyc))||((i_pipe_stb)&&(misaligned)))
rdaddr <= 0;
else if ((i_wb_ack)&&(cyc))
rdaddr <= rdaddr + 1'b1;
 
assign fifo_fill = wraddr - rdaddr;
 
initial fifo_full = 0;
always @(posedge i_clk)
if (i_reset)
fifo_full <= 0;
else if (((i_wb_err)&&(cyc))||((i_pipe_stb)&&(misaligned)))
fifo_full <= 0;
else if (i_pipe_stb)
fifo_full <= (fifo_fill >= OPT_MAXDEPTH-1);
else
fifo_full <= (fifo_fill >= OPT_MAXDEPTH);
 
assign nxt_rdaddr = rdaddr + 1'b1;
 
wire gbl_stb, lcl_stb;