OpenCores
URL https://opencores.org/ocsvn/xulalx25soc/xulalx25soc/trunk

Subversion Repositories xulalx25soc

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /xulalx25soc/trunk/rtl
    from Rev 117 to Rev 118
    Reverse comparison

Rev 117 → Rev 118

/cpu/busdelay.v
1,4 → 1,4
///////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
//
// Filename: busdelay.v
//
16,12 → 16,26
// *must* know on the first clock whether or not the bus will stall.
//
//
// After a period of time, I started a new design where the timing
// associated with this original bus clock just wasn't ... fast enough.
// I needed to delay the stall line as well. A new busdelay was then
// written and debugged whcih delays the stall line. (I know, you aren't
// supposed to delay the stall line--but what if you *have* to in order
// to meet timing?) This new logic has been merged in with the old,
// and the DELAY_STALL line can be set to non-zero to use it instead
// of the original logic. Don't use it if you don't need it: it will
// consume resources and slow your bus down more, but if you do need
// it--don't be afraid to use it.
//
// Both versions of the bus delay will maintain a single access per
// clock when pipelined, they only delay the time between the strobe
// going high and the actual command being accomplished.
//
//
// Creator: Dan Gisselquist, Ph.D.
// Gisselquist Technology, LLC
//
///////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2015-2016, Gisselquist Technology, LLC
//
39,7 → 53,7
// http://www.gnu.org/licenses/gpl.html
//
//
///////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
//
module busdelay(i_clk,
// The input bus
48,7 → 62,7
// The delayed bus
o_dly_cyc, o_dly_stb, o_dly_we, o_dly_addr, o_dly_data,
i_dly_ack, i_dly_stall, i_dly_data, i_dly_err);
parameter AW=32, DW=32;
parameter AW=32, DW=32, DELAY_STALL = 0;
input i_clk;
// Input/master bus
input i_wb_cyc, i_wb_stb, i_wb_we;
55,12 → 69,11
input [(AW-1):0] i_wb_addr;
input [(DW-1):0] i_wb_data;
output reg o_wb_ack;
output reg o_wb_stall;
output wire o_wb_stall;
output reg [(DW-1):0] o_wb_data;
output reg o_wb_err;
output wire o_wb_err;
// Delayed bus
output reg o_dly_cyc, o_dly_we;
output wire o_dly_stb;
output reg o_dly_cyc, o_dly_stb, o_dly_we;
output reg [(AW-1):0] o_dly_addr;
output reg [(DW-1):0] o_dly_data;
input i_dly_ack;
68,36 → 81,100
input [(DW-1):0] i_dly_data;
input i_dly_err;
 
reg loaded;
initial o_dly_cyc = 1'b0;
initial loaded = 1'b0;
generate
if (DELAY_STALL != 0)
begin
reg r_stb, r_we, r_rtn_stall, r_rtn_err;
reg [(DW-1):0] r_data;
reg [(AW-1):0] r_addr;
 
always @(posedge i_clk)
o_wb_stall <= (loaded)&&(i_dly_stall);
initial o_dly_cyc = 1'b0;
initial r_rtn_stall= 1'b0;
initial r_stb = 1'b0;
always @(posedge i_clk)
begin
o_dly_cyc <= (i_wb_cyc);
if (!i_dly_stall)
begin
r_we <= i_wb_we;
r_addr <= i_wb_addr;
r_data <= i_wb_data;
 
initial o_dly_cyc = 1'b0;
always @(posedge i_clk)
o_dly_cyc <= (i_wb_cyc);
// Add the i_wb_cyc criteria here, so we can simplify the o_wb_stall
// criteria below, which would otherwise *and* these two.
always @(posedge i_clk)
loaded <= (i_wb_stb)||((loaded)&&(i_dly_stall)&&(~i_dly_err)&&(i_wb_cyc));
assign o_dly_stb = loaded;
always @(posedge i_clk)
if (~i_dly_stall)
o_dly_we <= i_wb_we;
always @(posedge i_clk)
if (~i_dly_stall)
o_dly_addr<= i_wb_addr;
always @(posedge i_clk)
if (~i_dly_stall)
o_dly_data <= i_wb_data;
always @(posedge i_clk)
o_wb_ack <= (i_dly_ack)&&(o_dly_cyc)&&(i_wb_cyc);
always @(posedge i_clk)
o_wb_data <= i_dly_data;
if (r_stb)
begin
o_dly_we <= r_we;
o_dly_addr <= r_addr;
o_dly_data <= r_data;
o_dly_stb <= 1'b1;
r_rtn_stall <= 1'b0;
r_stb <= 1'b0;
end else begin
o_dly_we <= i_wb_we;
o_dly_addr <= i_wb_addr;
o_dly_data <= i_wb_data;
o_dly_stb <= i_wb_stb;
r_stb <= 1'b0;
r_rtn_stall <= 1'b0;
end
end else if ((!r_stb)&&(!o_wb_stall))
begin
r_we <= i_wb_we;
r_addr <= i_wb_addr;
r_data <= i_wb_data;
r_stb <= i_wb_stb;
 
always @(posedge i_clk)
o_wb_err <= (i_dly_err)&&(o_dly_cyc)&&(i_wb_cyc);
r_rtn_stall <= i_wb_stb;
end
 
if (!i_wb_cyc)
begin
o_dly_stb <= 1'b0;
r_stb <= 1'b0;
r_rtn_stall <= 1'b0;
end
 
o_wb_ack <= (i_dly_ack)&&(i_wb_cyc)&&(o_dly_cyc);
o_wb_data <= i_dly_data;
r_rtn_err <= (i_dly_err)&&(i_wb_cyc)&&(o_dly_cyc);
end
 
assign o_wb_stall = r_rtn_stall;
assign o_wb_err = r_rtn_err;
 
end else begin
 
initial o_dly_cyc = 1'b0;
initial o_dly_stb = 1'b0;
 
always @(posedge i_clk)
o_dly_cyc <= i_wb_cyc;
// Add the i_wb_cyc criteria here, so we can simplify the
// o_wb_stall criteria below, which would otherwise *and*
// these two.
always @(posedge i_clk)
if (~o_wb_stall)
o_dly_stb <= ((i_wb_cyc)&&(i_wb_stb));
always @(posedge i_clk)
if (~o_wb_stall)
o_dly_we <= i_wb_we;
always @(posedge i_clk)
if (~o_wb_stall)
o_dly_addr<= i_wb_addr;
always @(posedge i_clk)
if (~o_wb_stall)
o_dly_data <= i_wb_data;
always @(posedge i_clk)
o_wb_ack <= (i_dly_ack)&&(o_dly_cyc)&&(i_wb_cyc);
always @(posedge i_clk)
o_wb_data <= i_dly_data;
 
// Our only non-delayed line, yet still really delayed. Perhaps
// there's a way to register this?
// o_wb_stall <= (i_wb_cyc)&&(i_wb_stb) ... or some such?
// assign o_wb_stall=((i_wb_cyc)&&(i_dly_stall)&&(o_dly_stb));//&&o_cyc
assign o_wb_stall = ((i_dly_stall)&&(o_dly_stb));//&&o_cyc
assign o_wb_err = i_dly_err;
end endgenerate
 
endmodule
/cpu/cpudefs.v
29,7 → 29,7
//
///////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2015, Gisselquist Technology, LLC
// Copyright (C) 2015-2016, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
77,8 → 77,13
// OPT_ILLEGAL_INSTRUCTION is set, then the multiply will create an illegal
// instruction that will then trip the illegal instruction trap.
//
// Either not defining this value, or defining it to zero will disable the
// hardware multiply. A value of '1' will cause the multiply to occurr in one
// clock cycle only--often at the expense of the rest of the CPUs speed.
// A value of 2 will cause the multiply to have a single delay cycle, 3 will
// have two delay cycles, and 4 (or more) will have 3 delay cycles.
//
`define OPT_MULTIPLY 2
`define OPT_MULTIPLY 4
//
//
//
111,27 → 116,7
//
//
//
// OPT_NEW_INSTRUCTION_SET controls whether or not the new instruction set
// is in use. The new instruction set contains space for floating point
// operations, signed and unsigned divide instructions, as well as bit reversal
// and ... at least two other operations yet to be defined. The decoder alone
// uses about 70 fewer LUTs, although in practice this works out to 12 fewer
// when all works out in the wash. Further, floating point and divide
// instructions will cause an illegal instruction exception if they are not
// implemented--so software capability can be built to use these instructions
// immediately, even if the hardware is not yet ready.
//
// This option is likely to go away in the future, obsoleting the previous
// instruction set, so I recommend setting this option and switching to the
// new instruction set as soon as possible.
//
`define OPT_NEW_INSTRUCTION_SET
//
//
//
//
//
//
// OPT_SINGLE_FETCH controls whether or not the prefetch has a cache, and
// whether or not it can issue one instruction per clock. When set, the
// prefetch has no cache, and only one instruction is fetched at a time.
226,10 → 211,8
//
//
//
`ifdef OPT_NEW_INSTRUCTION_SET
//
//
//
// The new instruction set also defines a set of very long instruction words.
// Well, calling them "very long" instruction words is probably a misnomer,
// although we're going to do it. They're really 2x16-bit instructions---
253,9 → 236,7
`define OPT_VLIW
//
//
`endif // OPT_NEW_INSTRUCTION_SET
//
//
`endif // OPT_SINGLE_FETCH
//
//
/cpu/cpuops.v
14,7 → 14,7
//
///////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2015, Gisselquist Technology, LLC
// Copyright (C) 2015-2016, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
32,18 → 32,17
//
///////////////////////////////////////////////////////////////////////////
//
`define LONG_MPY
module cpuops(i_clk,i_rst, i_ce, i_valid, i_op, i_a, i_b, o_c, o_f, o_valid,
o_illegal, o_busy);
parameter IMPLEMENT_MPY = 1;
`include "cpudefs.v"
//
module cpuops(i_clk,i_rst, i_ce, i_op, i_a, i_b, o_c, o_f, o_valid,
o_busy);
parameter IMPLEMENT_MPY = `OPT_MULTIPLY;
input i_clk, i_rst, i_ce;
input [3:0] i_op;
input [31:0] i_a, i_b;
input i_valid;
output reg [31:0] o_c;
output wire [3:0] o_f;
output reg o_valid;
output wire o_illegal;
output wire o_busy;
 
// Rotate-left pre-logic
99,10 → 98,10
||(i_op == 4'h6) // LSL
||(i_op == 4'h5)); // LSR
 
`ifdef LONG_MPY
reg mpyhi;
wire mpybusy;
`endif
wire [63:0] mpy_result; // Where we dump the multiply result
reg mpyhi; // Return the high half of the multiply
wire mpybusy; // The multiply is busy if true
wire mpydone; // True if we'll be valid on the next clock;
 
// A 4-way multiplexer can be done in one 6-LUT.
// A 16-way multiplexer can therefore be done in 4x 6-LUT's with
109,244 → 108,254
// the Xilinx multiplexer fabric that follows.
// Given that we wish to apply this multiplexer approach to 33-bits,
// this will cost a minimum of 132 6-LUTs.
 
wire this_is_a_multiply_op;
assign this_is_a_multiply_op = (i_ce)&&((i_op[3:1]==3'h5)||(i_op[3:0]==4'h8));
 
generate
if (IMPLEMENT_MPY == 0)
begin
begin // No multiply support.
assign mpy_result = 63'h00;
end else if (IMPLEMENT_MPY == 1)
begin // Our single clock option (no extra clocks)
wire signed [63:0] w_mpy_a_input, w_mpy_b_input;
assign w_mpy_a_input = {{(32){(i_a[31])&(i_op[0])}},i_a[31:0]};
assign w_mpy_b_input = {{(32){(i_b[31])&(i_op[0])}},i_b[31:0]};
assign mpy_result = w_mpy_a_input * w_mpy_b_input;
assign mpybusy = 1'b0;
assign mpydone = 1'b0;
always @(*) mpyhi = 1'b0; // Not needed
end else if (IMPLEMENT_MPY == 2)
begin // Our two clock option (ALU must pause for 1 clock)
reg signed [63:0] r_mpy_a_input, r_mpy_b_input;
always @(posedge i_clk)
if (i_ce)
begin
pre_sign <= (i_a[31]);
c <= 1'b0;
casez(i_op)
4'b0000:{c,o_c } <= {1'b0,i_a}-{1'b0,i_b};// CMP/SUB
4'b0001: o_c <= i_a & i_b; // BTST/And
4'b0010:{c,o_c } <= i_a + i_b; // Add
4'b0011: o_c <= i_a | i_b; // Or
4'b0100: o_c <= i_a ^ i_b; // Xor
4'b0101:{o_c,c } <= w_lsr_result[32:0]; // LSR
4'b0110:{c,o_c } <= w_lsl_result[32:0]; // LSL
4'b0111:{o_c,c } <= w_asr_result[32:0]; // ASR
`ifndef LONG_MPY
4'b1000: o_c <= { i_b[15: 0], i_a[15:0] }; // LODIHI
`endif
4'b1001: o_c <= { i_a[31:16], i_b[15:0] }; // LODILO
// 4'h1010: The unimplemented MPYU,
// 4'h1011: and here for the unimplemented MPYS
4'b1100: o_c <= w_brev_result; // BREV
4'b1101: o_c <= w_popc_result; // POPC
4'b1110: o_c <= w_rol_result; // ROL
default: o_c <= i_b; // MOV, LDI
endcase
r_mpy_a_input <={{(32){(i_a[31])&(i_op[0])}},i_a[31:0]};
r_mpy_b_input <={{(32){(i_b[31])&(i_op[0])}},i_b[31:0]};
end
 
assign o_busy = 1'b0;
assign mpy_result = r_mpy_a_input * r_mpy_b_input;
assign mpybusy = 1'b0;
 
reg r_illegal;
initial mpypipe = 1'b0;
reg mpypipe;
always @(posedge i_clk)
r_illegal <= (i_ce)&&((i_op == 4'ha)||(i_op == 4'hb)
`ifdef LONG_MPY
||(i_op == 4'h8)
if (i_rst)
mpypipe <= 1'b0;
else
mpypipe <= (this_is_a_multiply_op);
 
assign mpydone = mpypipe; // this_is_a_multiply_op;
always @(posedge i_clk)
if (this_is_a_multiply_op)
mpyhi = i_op[1];
end else if (IMPLEMENT_MPY == 3)
begin // Our three clock option (ALU pauses for 2 clocks)
reg signed [63:0] r_smpy_result;
reg [63:0] r_umpy_result;
reg signed [31:0] r_mpy_a_input, r_mpy_b_input;
reg [1:0] mpypipe;
reg [1:0] r_sgn;
 
initial mpypipe = 2'b0;
always @(posedge i_clk)
if (i_rst)
mpypipe <= 2'b0;
else
mpypipe <= { mpypipe[0], this_is_a_multiply_op };
 
// First clock
always @(posedge i_clk)
begin
r_mpy_a_input <= i_a[31:0];
r_mpy_b_input <= i_b[31:0];
r_sgn <= { r_sgn[0], i_op[0] };
end
 
// Second clock
`ifdef VERILATOR
wire signed [63:0] s_mpy_a_input, s_mpy_b_input;
wire [63:0] u_mpy_a_input, u_mpy_b_input;
 
assign s_mpy_a_input = {{(32){r_mpy_a_input[31]}},r_mpy_a_input};
assign s_mpy_b_input = {{(32){r_mpy_b_input[31]}},r_mpy_b_input};
assign u_mpy_a_input = {32'h00,r_mpy_a_input};
assign u_mpy_b_input = {32'h00,r_mpy_b_input};
always @(posedge i_clk)
r_smpy_result = s_mpy_a_input * s_mpy_b_input;
always @(posedge i_clk)
r_umpy_result = u_mpy_a_input * u_mpy_b_input;
`else
 
wire [31:0] u_mpy_a_input, u_mpy_b_input;
 
assign u_mpy_a_input = r_mpy_a_input;
assign u_mpy_b_input = r_mpy_b_input;
 
always @(posedge i_clk)
r_smpy_result = r_mpy_a_input * r_mpy_b_input;
always @(posedge i_clk)
r_umpy_result = u_mpy_a_input * u_mpy_b_input;
`endif
);
assign o_illegal = r_illegal;
end else begin
//
// Multiply pre-logic
//
`ifdef LONG_MPY
 
always @(posedge i_clk)
if (this_is_a_multiply_op)
mpyhi = i_op[1];
assign mpybusy = mpypipe[0];
assign mpy_result = (r_sgn[1])?r_smpy_result:r_umpy_result;
assign mpydone = mpypipe[1];
 
// Results are then set on the third clock
end else // if (IMPLEMENT_MPY <= 4)
begin // The three clock option
reg [63:0] r_mpy_result;
if (IMPLEMENT_MPY == 1)
begin // Our two clock option (one clock extra)
reg signed [64:0] r_mpy_a_input, r_mpy_b_input;
reg mpypipe, x;
initial mpypipe = 1'b0;
always @(posedge i_clk)
mpypipe <= (i_ce)&&((i_op[3:1]==3'h5)||(i_op[3:0]==4'h8));
always @(posedge i_clk)
if (i_ce)
begin
r_mpy_a_input <= {{(33){(i_a[31])&(i_op[0])}},
i_a[31:0]};
r_mpy_b_input <= {{(33){(i_b[31])&(i_op[0])}},
i_b[31:0]};
end
always @(posedge i_clk)
if (mpypipe)
{x, r_mpy_result} = r_mpy_a_input
* r_mpy_b_input;
always @(posedge i_clk)
if (i_ce)
mpyhi = i_op[1];
assign mpybusy = mpypipe;
end else if (IMPLEMENT_MPY == 2)
begin // The three clock option
reg [31:0] r_mpy_a_input, r_mpy_b_input;
reg r_mpy_signed;
reg [1:0] mpypipe;
reg [31:0] r_mpy_a_input, r_mpy_b_input;
reg r_mpy_signed;
reg [2:0] mpypipe;
 
// First clock, latch in the inputs
always @(posedge i_clk)
begin
// mpypipe indicates we have a multiply in the
// pipeline. In this case, the multiply
// pipeline is a two stage pipeline, so we need
// two bits in the pipe.
mpypipe[0] <= (i_ce)&&((i_op[3:1]==3'h5)
||(i_op[3:0]==4'h8));
// First clock, latch in the inputs
always @(posedge i_clk)
begin
// mpypipe indicates we have a multiply in the
// pipeline. In this case, the multiply
// pipeline is a two stage pipeline, so we need
// two bits in the pipe.
if (i_rst)
mpypipe <= 3'h0;
else begin
mpypipe[0] <= this_is_a_multiply_op;
mpypipe[1] <= mpypipe[0];
if (i_op[0]) // i.e. if signed multiply
begin
r_mpy_a_input <= {(~i_a[31]),i_a[30:0]};
r_mpy_b_input <= {(~i_b[31]),i_b[30:0]};
end else begin
r_mpy_a_input <= i_a[31:0];
r_mpy_b_input <= i_b[31:0];
end
// The signed bit really only matters in the
// case of 64 bit multiply. We'll keep track
// of it, though, and pretend in all other
// cases.
r_mpy_signed <= i_op[0];
 
if (i_ce)
mpyhi = i_op[1];
mpypipe[2] <= mpypipe[1];
end
 
assign mpybusy = |mpypipe;
 
// Second clock, do the multiplies, get the "partial
// products". Here, we break our input up into two
// halves,
//
// A = (2^16 ah + al)
// B = (2^16 bh + bl)
//
// and use these to compute partial products.
//
// AB = (2^32 ah*bh + 2^16 (ah*bl + al*bh) + (al*bl)
//
// Since we're following the FOIL algorithm to get here,
// we'll name these partial products according to FOIL.
//
// The trick is what happens if A or B is signed. In
// those cases, the real value of A will not be given by
// A = (2^16 ah + al)
// but rather
// A = (2^16 ah[31^] + al) - 2^31
// (where we have flipped the sign bit of A)
// and so ...
//
// AB= (2^16 ah + al - 2^31) * (2^16 bh + bl - 2^31)
// = 2^32(ah*bh)
// +2^16 (ah*bl+al*bh)
// +(al*bl)
// - 2^31 (2^16 bh+bl + 2^16 ah+al)
// - 2^62
// = 2^32(ah*bh)
// +2^16 (ah*bl+al*bh)
// +(al*bl)
// - 2^31 (2^16 bh+bl + 2^16 ah+al + 2^31)
//
reg [31:0] pp_f, pp_l; // F and L from FOIL
reg [32:0] pp_oi; // The O and I from FOIL
reg [32:0] pp_s;
always @(posedge i_clk)
if (i_op[0]) // i.e. if signed multiply
begin
pp_f<=r_mpy_a_input[31:16]*r_mpy_b_input[31:16];
pp_oi<=r_mpy_a_input[31:16]*r_mpy_b_input[15: 0]
+ r_mpy_a_input[15: 0]*r_mpy_b_input[31:16];
pp_l<=r_mpy_a_input[15: 0]*r_mpy_b_input[15: 0];
// And a special one for the sign
if (r_mpy_signed)
pp_s <= 32'h8000_0000-(
r_mpy_a_input[31:0]
+ r_mpy_b_input[31:0]);
else
pp_s <= 33'h0;
r_mpy_a_input <= {(~i_a[31]),i_a[30:0]};
r_mpy_b_input <= {(~i_b[31]),i_b[30:0]};
end else begin
r_mpy_a_input <= i_a[31:0];
r_mpy_b_input <= i_b[31:0];
end
// The signed bit really only matters in the
// case of 64 bit multiply. We'll keep track
// of it, though, and pretend in all other
// cases.
r_mpy_signed <= i_op[0];
 
// Third clock, add the results and produce a product
always @(posedge i_clk)
begin
r_mpy_result[15:0] <= pp_l[15:0];
r_mpy_result[63:16] <=
{ 32'h00, pp_l[31:16] }
+ { 15'h00, pp_oi }
+ { pp_s, 15'h00 }
+ { pp_f, 16'h00 };
end
end // Fourth clock -- results are available for writeback.
`else
wire signed [16:0] w_mpy_a_input, w_mpy_b_input;
wire [33:0] w_mpy_result;
reg [31:0] r_mpy_result;
assign w_mpy_a_input ={ ((i_a[15])&(i_op[0])), i_a[15:0] };
assign w_mpy_b_input ={ ((i_b[15])&(i_op[0])), i_b[15:0] };
assign w_mpy_result = w_mpy_a_input * w_mpy_b_input;
always @(posedge i_clk)
if (i_ce)
r_mpy_result = w_mpy_result[31:0];
`endif
if (this_is_a_multiply_op)
mpyhi = i_op[1];
end
 
assign mpybusy = |mpypipe[1:0];
assign mpydone = mpypipe[2];
 
// Second clock, do the multiplies, get the "partial
// products". Here, we break our input up into two
// halves,
//
// The master ALU case statement
// A = (2^16 ah + al)
// B = (2^16 bh + bl)
//
// and use these to compute partial products.
//
// AB = (2^32 ah*bh + 2^16 (ah*bl + al*bh) + (al*bl)
//
// Since we're following the FOIL algorithm to get here,
// we'll name these partial products according to FOIL.
//
// The trick is what happens if A or B is signed. In
// those cases, the real value of A will not be given by
// A = (2^16 ah + al)
// but rather
// A = (2^16 ah[31^] + al) - 2^31
// (where we have flipped the sign bit of A)
// and so ...
//
// AB= (2^16 ah + al - 2^31) * (2^16 bh + bl - 2^31)
// = 2^32(ah*bh)
// +2^16 (ah*bl+al*bh)
// +(al*bl)
// - 2^31 (2^16 bh+bl + 2^16 ah+al)
// - 2^62
// = 2^32(ah*bh)
// +2^16 (ah*bl+al*bh)
// +(al*bl)
// - 2^31 (2^16 bh+bl + 2^16 ah+al + 2^31)
//
reg [31:0] pp_f, pp_l; // F and L from FOIL
reg [32:0] pp_oi; // The O and I from FOIL
reg [32:0] pp_s;
always @(posedge i_clk)
if (i_ce)
begin
pre_sign <= (i_a[31]);
c <= 1'b0;
casez(i_op)
4'b0000:{c,o_c } <= {1'b0,i_a}-{1'b0,i_b};// CMP/SUB
4'b0001: o_c <= i_a & i_b; // BTST/And
4'b0010:{c,o_c } <= i_a + i_b; // Add
4'b0011: o_c <= i_a | i_b; // Or
4'b0100: o_c <= i_a ^ i_b; // Xor
4'b0101:{o_c,c } <= w_lsr_result[32:0]; // LSR
4'b0110:{c,o_c } <= w_lsl_result[32:0]; // LSL
4'b0111:{o_c,c } <= w_asr_result[32:0]; // ASR
`ifdef LONG_MPY
4'b1000: o_c <= r_mpy_result[31:0]; // MPY
`else
4'b1000: o_c <= { i_b[15: 0], i_a[15:0] }; // LODIHI
`endif
4'b1001: o_c <= { i_a[31:16], i_b[15:0] }; // LODILO
`ifdef LONG_MPY
4'b1010: o_c <= r_mpy_result[63:32]; // MPYHU
4'b1011: o_c <= r_mpy_result[63:32]; // MPYHS
`else
4'b1010: o_c <= r_mpy_result; // MPYU
4'b1011: o_c <= r_mpy_result; // MPYS
`endif
4'b1100: o_c <= w_brev_result; // BREV
4'b1101: o_c <= w_popc_result; // POPC
4'b1110: o_c <= w_rol_result; // ROL
default: o_c <= i_b; // MOV, LDI
endcase
end else if (r_busy)
`ifdef LONG_MPY
o_c <= (mpyhi)?r_mpy_result[63:32]:r_mpy_result[31:0];
`else
o_c <= r_mpy_result;
`endif
pp_f<=r_mpy_a_input[31:16]*r_mpy_b_input[31:16];
pp_oi<=r_mpy_a_input[31:16]*r_mpy_b_input[15: 0]
+ r_mpy_a_input[15: 0]*r_mpy_b_input[31:16];
pp_l<=r_mpy_a_input[15: 0]*r_mpy_b_input[15: 0];
// And a special one for the sign
if (r_mpy_signed)
pp_s <= 32'h8000_0000-(
r_mpy_a_input[31:0]
+ r_mpy_b_input[31:0]);
else
pp_s <= 33'h0;
end
 
reg r_busy;
initial r_busy = 1'b0;
// Third clock, add the results and produce a product
always @(posedge i_clk)
r_busy <= (~i_rst)&&(i_ce)&&(i_valid)
`ifdef LONG_MPY
&&((i_op[3:1] == 3'h5)
||(i_op[3:0] == 4'h8))||mpybusy;
`else
&&(i_op[3:1] == 3'h5);
`endif
begin
r_mpy_result[15:0] <= pp_l[15:0];
r_mpy_result[63:16] <=
{ 32'h00, pp_l[31:16] }
+ { 15'h00, pp_oi }
+ { pp_s, 15'h00 }
+ { pp_f, 16'h00 };
end
 
assign o_busy = r_busy;
assign mpy_result = r_mpy_result;
// Fourth clock -- results are clocked into writeback
end
endgenerate // All possible multiply results have been determined
 
assign o_illegal = 1'b0;
end endgenerate
//
// The master ALU case statement
//
always @(posedge i_clk)
if (i_ce)
begin
pre_sign <= (i_a[31]);
c <= 1'b0;
casez(i_op)
4'b0000:{c,o_c } <= {1'b0,i_a}-{1'b0,i_b};// CMP/SUB
4'b0001: o_c <= i_a & i_b; // BTST/And
4'b0010:{c,o_c } <= i_a + i_b; // Add
4'b0011: o_c <= i_a | i_b; // Or
4'b0100: o_c <= i_a ^ i_b; // Xor
4'b0101:{o_c,c } <= w_lsr_result[32:0]; // LSR
4'b0110:{c,o_c } <= w_lsl_result[32:0]; // LSL
4'b0111:{o_c,c } <= w_asr_result[32:0]; // ASR
4'b1000: o_c <= mpy_result[31:0]; // MPY
4'b1001: o_c <= { i_a[31:16], i_b[15:0] }; // LODILO
4'b1010: o_c <= mpy_result[63:32]; // MPYHU
4'b1011: o_c <= mpy_result[63:32]; // MPYHS
4'b1100: o_c <= w_brev_result; // BREV
4'b1101: o_c <= w_popc_result; // POPC
4'b1110: o_c <= w_rol_result; // ROL
default: o_c <= i_b; // MOV, LDI
endcase
end else // if (mpydone)
o_c <= (mpyhi)?mpy_result[63:32]:mpy_result[31:0];
 
reg r_busy;
initial r_busy = 1'b0;
always @(posedge i_clk)
if (i_rst)
r_busy <= 1'b0;
else
r_busy <= ((IMPLEMENT_MPY > 1)
&&(this_is_a_multiply_op))||mpybusy;
assign o_busy = (r_busy); // ||((IMPLEMENT_MPY>1)&&(this_is_a_multiply_op));
 
 
assign z = (o_c == 32'h0000);
assign n = (o_c[31]);
assign v = (set_ovfl)&&(pre_sign != o_c[31]);
357,12 → 366,9
always @(posedge i_clk)
if (i_rst)
o_valid <= 1'b0;
else if (IMPLEMENT_MPY <= 1)
o_valid <= (i_ce);
else
o_valid <= (i_ce)&&(i_valid)
`ifdef LONG_MPY
&&(i_op[3:1] != 3'h5)&&(i_op[3:0] != 4'h8)
||(o_busy)&&(~mpybusy);
`else
&&(i_op[3:1] != 3'h5)||(o_busy);
`endif
o_valid <=((i_ce)&&(!this_is_a_multiply_op))||(mpydone);
 
endmodule
/cpu/div.v
34,8 → 34,8
//
module div(i_clk, i_rst, i_wr, i_signed, i_numerator, i_denominator,
o_busy, o_valid, o_err, o_quotient, o_flags);
parameter BW=32, LGBW = 5;
input i_clk, i_rst;
parameter BW=32, LGBW = 5;
input i_clk, i_rst;
// Input parameters
input i_wr, i_signed;
input [(BW-1):0] i_numerator, i_denominator;
/cpu/idecode.v
5,15 → 5,9
// Project: Zip CPU -- a small, lightweight, RISC CPU soft core
//
// Purpose: This RTL file specifies how instructions are to be decoded
// into their underlying meanings. This is specifically a version
// designed to support a "Next Generation", or "Version 2" instruction
// set as (currently) activated by the OPT_NEW_INSTRUCTION_SET option
// in cpudefs.v.
// into their underlying meanings.
//
// I expect to (eventually) retire the old instruction set, at which point
// this will become the default instruction set decoder.
//
//
// Creator: Dan Gisselquist, Ph.D.
// Gisselquist Technology, LLC
//
/cpu/pfcache.v
13,7 → 13,7
//
////////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2015, Gisselquist Technology, LLC
// Copyright (C) 2015-2016, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
38,8 → 38,12
i_wb_ack, i_wb_stall, i_wb_err, i_wb_data,
o_illegal);
parameter LGCACHELEN = 8, ADDRESS_WIDTH=24,
CACHELEN=(1<<LGCACHELEN), BUSW=32, AW=ADDRESS_WIDTH,
CW=LGCACHELEN, PW=LGCACHELEN-5;
LGLINES=5; // Log of the number of separate cache lines
localparam CACHELEN=(1<<LGCACHELEN); // Size of our cache memory
localparam CW=LGCACHELEN; // Short hand for LGCACHELEN
localparam PW=LGCACHELEN-LGLINES; // Size of a cache line
localparam BUSW = 32; // Number of data lines on the bus
localparam AW=ADDRESS_WIDTH; // Shorthand for ADDRESS_WIDTH
input i_clk, i_rst, i_new_pc;
input i_clear_cache;
input i_stall_n;
66,8 → 70,8
 
wire r_v;
reg [(BUSW-1):0] cache [0:((1<<CW)-1)];
reg [(AW-CW-1):0] tags [0:((1<<(CW-PW))-1)];
reg [((1<<(CW-PW))-1):0] vmask;
reg [(AW-CW-1):0] tags [0:((1<<(LGLINES))-1)];
reg [((1<<(LGLINES))-1):0] vmask;
 
reg [(AW-1):0] lastpc;
reg [(CW-1):0] rdaddr;
255,7 → 259,7
reg svmask;
initial vmask = 0;
initial svmask = 1'b0;
reg [(CW-PW-1):0] saddr;
reg [(LGLINES-1):0] saddr;
always @(posedge i_clk)
if ((i_rst)||(i_clear_cache))
begin
/cpu/pipemem.v
42,7 → 42,8
o_wb_stb_gbl, o_wb_stb_lcl,
o_wb_we, o_wb_addr, o_wb_data,
i_wb_ack, i_wb_stall, i_wb_err, i_wb_data);
parameter ADDRESS_WIDTH=24, IMPLEMENT_LOCK=0, AW=ADDRESS_WIDTH;
parameter ADDRESS_WIDTH=32, IMPLEMENT_LOCK=0;
localparam AW=ADDRESS_WIDTH;
input i_clk, i_rst;
input i_pipe_stb, i_lock;
// CPU interface
/cpu/wbdmac.v
168,7 → 168,7
 
reg last_read_request, last_read_ack,
last_write_request, last_write_ack;
reg trigger, abort;
reg trigger, abort, user_halt;
 
initial dma_state = `DMA_IDLE;
initial o_interrupt = 1'b0;
193,7 → 193,7
begin
case(i_swb_addr)
2'b00: begin
if ((i_swb_data[27:16] == 12'hfed)
if ((i_swb_data[31:16] == 16'h0fed)
&&(cfg_len_nonzero))
dma_state <= `DMA_WAIT;
cfg_blocklen_sub_one
221,6 → 221,8
nread <= 0;
if (abort)
dma_state <= `DMA_IDLE;
else if (user_halt)
dma_state <= `DMA_IDLE;
else if (trigger)
dma_state <= `DMA_READ_REQ;
end
240,11 → 242,14
+ {{(AW-1){1'b0}},1'b1};
end
 
if (user_halt)
dma_state <= `DMA_READ_ACK;
if (i_mwb_err)
begin
cfg_len <= 0;
dma_state <= `DMA_IDLE;
end
 
if (abort)
dma_state <= `DMA_IDLE;
if (i_mwb_ack)
266,6 → 271,8
nread <= nread+1;
if (last_read_ack) // (nread+1 == nracks)
dma_state <= `DMA_PRE_WRITE;
if (user_halt)
dma_state <= `DMA_IDLE;
if (cfg_incs)
cfg_raddr <= cfg_raddr
+ {{(AW-1){1'b0}},1'b1};
303,6 → 310,8
nwacks <= nwacks+1;
cfg_len <= cfg_len +{(AW){1'b1}}; // -1
end
if (user_halt)
dma_state <= `DMA_WRITE_ACK;
if (abort)
dma_state <= `DMA_IDLE;
end
466,5 → 475,12
&&(i_swb_addr == 2'b00)
&&(i_swb_data == 32'hffed0000));
 
initial user_halt = 1'b0;
always @(posedge i_clk)
user_halt <= ((user_halt)&&(dma_state != `DMA_IDLE))
||((i_swb_stb)&&(i_swb_we)&&(dma_state != `DMA_IDLE)
&&(i_swb_addr == 2'b00)
&&(i_swb_data == 32'hafed0000));
 
endmodule
 
/cpu/zipbones.v
45,11 → 45,11
i_dbg_cyc, i_dbg_stb, i_dbg_we, i_dbg_addr, i_dbg_data,
o_dbg_ack, o_dbg_stall, o_dbg_data
`ifdef DEBUG_SCOPE
, o_cpu_debug
, o_zip_debug
`endif
);
parameter RESET_ADDRESS=32'h0100000, ADDRESS_WIDTH=32,
LGICACHE=6, START_HALTED=0,
LGICACHE=8, START_HALTED=0,
AW=ADDRESS_WIDTH;
input i_clk, i_rst;
// Wishbone master
71,7 → 71,7
output wire [31:0] o_dbg_data;
//
`ifdef DEBUG_SCOPE
output wire [31:0] o_cpu_debug;
output wire [31:0] o_zip_debug;
`endif
 
//
119,8 → 119,12
 
initial cmd_clear_pf_cache = 1'b0;
always @(posedge i_clk)
cmd_clear_pf_cache = (~i_rst)&&(dbg_cmd_write)
&&((i_dbg_data[11])||(i_dbg_data[6]));
if (i_rst)
cmd_clear_pf_cache <= 1'b0;
else if (dbg_cmd_write)
cmd_clear_pf_cache <= i_dbg_data[11];
else
cmd_clear_pf_cache <= 1'b0;
//
initial cmd_step = 1'b0;
always @(posedge i_clk)
175,7 → 179,7
(i_wb_err)||(cpu_lcl_cyc),
cpu_op_stall, cpu_pf_stall, cpu_i_count
`ifdef DEBUG_SCOPE
, o_cpu_debug
, o_zip_debug
`endif
);
 
184,7 → 188,7
initial o_dbg_ack = 1'b0;
always @(posedge i_clk)
o_dbg_ack <= (i_dbg_cyc)&&((~i_dbg_addr)||(~o_dbg_stall));
assign o_dbg_stall= 1'b0; //(i_dbg_cyc)&&(cpu_dbg_stall)&&(i_dbg_addr);
assign o_dbg_stall=(i_dbg_cyc)&&(cpu_dbg_stall)&&(i_dbg_addr);
 
assign o_ext_int = (cmd_halt) && (~i_wb_stall);
 
/cpu/zipcpu.v
106,8 → 106,8
//
`define CPU_CC_REG 4'he
`define CPU_PC_REG 4'hf
`define CPU_CLRCACHE_BIT 14 // Floating point error flag, set on error
`define CPU_PHASE_BIT 13 // Floating point error flag, set on error
`define CPU_CLRCACHE_BIT 14 // Set to clear the I-cache, automatically clears
`define CPU_PHASE_BIT 13 // Set if we are executing the latter half of a VLIW
`define CPU_FPUERR_BIT 12 // Floating point error flag, set on error
`define CPU_DIVERR_BIT 11 // Divide error flag, set on divide by zero
`define CPU_BUSERR_BIT 10 // Bus error flag, set on error
139,7 → 139,7
, o_debug
`endif
);
parameter RESET_ADDRESS=32'h0100000, ADDRESS_WIDTH=24,
parameter RESET_ADDRESS=32'h0100000, ADDRESS_WIDTH=32,
LGICACHE=8;
`ifdef OPT_MULTIPLY
parameter IMPLEMENT_MPY = `OPT_MULTIPLY;
162,7 → 162,7
`else
parameter EARLY_BRANCHING = 0;
`endif
parameter AW=ADDRESS_WIDTH;
localparam AW=ADDRESS_WIDTH;
input i_clk, i_rst, i_interrupt;
// Debug interface -- inputs
input i_halt, i_clear_pf_cache;
325,7 → 325,7
wire alu_valid, alu_busy;
wire set_cond;
reg alu_wr, alF_wr;
wire alu_gie, alu_illegal_op, alu_illegal;
wire alu_gie, alu_illegal;
 
 
 
476,7 → 476,7
`ifdef OPT_PIPELINED
assign alu_stall = (((~master_ce)||(mem_rdbusy)||(alu_busy))&&(opvalid_alu)) //Case 1&2
||((opvalid)&&(op_lock)&&(op_lock_stall))
||((opvalid)&&(op_break))
||((opvalid)&&(op_break)) // || op_illegal
||(wr_reg_ce)&&(wr_write_cc)
||(div_busy)||(fpu_busy);
assign alu_ce = (master_ce)&&(opvalid_alu)&&(~alu_stall)
483,7 → 483,7
&&(~clear_pipeline);
`else
assign alu_stall = (opvalid_alu)&&((~master_ce)||(op_break));
assign alu_ce = (master_ce)&&((opvalid_alu)||(op_illegal))&&(~alu_stall)&&(~clear_pipeline);
assign alu_ce = (master_ce)&&(opvalid_alu)&&(~alu_stall)&&(~clear_pipeline);
`endif
//
 
607,8 → 607,6
assign dcdvalid = r_dcdvalid;
`endif
 
`ifdef OPT_NEW_INSTRUCTION_SET
 
// If not pipelined, there will be no opvalid_ anything, and the
idecode #(AW, IMPLEMENT_MPY, EARLY_BRANCHING, IMPLEMENT_DIVIDE,
IMPLEMENT_FPU)
625,25 → 623,6
dcd_early_branch,
dcd_branch_pc, dcd_ljmp,
dcd_pipe);
`else
idecode_deprecated
#(AW, IMPLEMENT_MPY, EARLY_BRANCHING, IMPLEMENT_DIVIDE,
IMPLEMENT_FPU)
instruction_decoder(i_clk, (i_rst)||(clear_pipeline),
dcd_ce, dcd_stalled, instruction, instruction_gie,
instruction_pc, pf_valid, pf_illegal, dcd_phase,
dcd_illegal, dcd_pc, dcd_gie,
{ dcdR_cc, dcdR_pc, dcdR },
{ dcdA_cc, dcdA_pc, dcdA },
{ dcdB_cc, dcdB_pc, dcdB },
dcdI, dcd_zI, dcdF, dcdF_wr, dcdOp,
dcdALU, dcdM, dcdDV, dcdFP, dcd_break, dcd_lock,
dcdR_wr,dcdA_rd, dcdB_rd,
dcd_early_branch,
dcd_branch_pc,
dcd_pipe);
assign dcd_ljmp = 1'b0;
`endif
 
`ifdef OPT_PIPELINED_BUS_ACCESS
reg r_op_pipe;
819,7 → 798,6
begin // Set the flag condition codes, bit order is [3:0]=VNCZ
case(dcdF[2:0])
3'h0: r_opF <= 6'h00; // Always
`ifdef OPT_NEW_INSTRUCTION_SET
// These were remapped as part of the new instruction
// set in order to make certain that the low order
// two bits contained the most commonly used
829,13 → 807,6
3'h3: r_opF <= 6'h10; // NE
3'h4: r_opF <= 6'h30; // GT (!N&!Z)
3'h5: r_opF <= 6'h20; // GE (!N)
`else
3'h1: r_opF <= 6'h11; // Z
3'h2: r_opF <= 6'h10; // NE
3'h3: r_opF <= 6'h20; // GE (!N)
3'h4: r_opF <= 6'h30; // GT (!N&!Z)
3'h5: r_opF <= 6'h24; // LT
`endif
3'h6: r_opF <= 6'h02; // C
3'h7: r_opF <= 6'h08; // V
endcase
850,7 → 821,7
initial opvalid_div = 1'b0;
initial opvalid_fpu = 1'b0;
always @(posedge i_clk)
if (i_rst)
if ((i_rst)||(clear_pipeline))
begin
opvalid <= 1'b0;
opvalid_alu <= 1'b0;
879,7 → 850,7
opvalid_div <= (dcdDV)&&(w_opvalid);
opvalid_fpu <= (dcdFP)&&(w_opvalid);
`endif
end else if ((clear_pipeline)||(adf_ce_unconditional)||(mem_ce))
end else if ((adf_ce_unconditional)||(mem_ce))
begin
opvalid <= 1'b0;
opvalid_alu <= 1'b0;
903,7 → 874,7
initial r_op_break = 1'b0;
always @(posedge i_clk)
if (i_rst) r_op_break <= 1'b0;
else if (op_ce) r_op_break <= (dcd_break); // &&(dcdvalid)
else if (op_ce) r_op_break <= (dcd_break);
else if ((clear_pipeline)||(~opvalid))
r_op_break <= 1'b0;
assign op_break = r_op_break;
1132,17 → 1103,9
// PIPELINE STAGE #4 :: Apply Instruction
//
//
`ifdef OPT_NEW_INSTRUCTION_SET
cpuops #(IMPLEMENT_MPY) doalu(i_clk, i_rst, alu_ce,
(opvalid_alu), opn, opA, opB,
alu_result, alu_flags, alu_valid, alu_illegal_op,
alu_busy);
`else
cpuops_deprecated #(IMPLEMENT_MPY) doalu(i_clk, i_rst, alu_ce,
(opvalid_alu), opn, opA, opB,
alu_result, alu_flags, alu_valid, alu_illegal_op);
assign alu_busy = 1'b0;
`endif
cpuops #(IMPLEMENT_MPY) doalu(i_clk, (i_rst)||(clear_pipeline),
alu_ce, opn, opA, opB,
alu_result, alu_flags, alu_valid, alu_busy);
 
generate
if (IMPLEMENT_DIVIDE != 0)
1267,7 → 1230,7
r_alu_illegal <= op_illegal;
else
r_alu_illegal <= 1'b0;
assign alu_illegal = (alu_illegal_op)||(r_alu_illegal);
assign alu_illegal = (r_alu_illegal);
`else
assign alu_illegal = 1'b0;
`endif
1469,7 → 1432,7
if ((i_rst)||(clear_pipeline)||(~opvalid))
r_break_pending <= 1'b0;
else if (op_break)
r_break_pending <= (~alu_busy)&&(~div_busy)&&(~fpu_busy)&&(~mem_busy);
r_break_pending <= (~alu_busy)&&(~div_busy)&&(~fpu_busy)&&(~mem_busy)&&(!wr_reg_ce);
else
r_break_pending <= 1'b0;
assign break_pending = r_break_pending;
1483,7 → 1446,7
||((~alu_gie)&&(bus_err))
||((~alu_gie)&&(div_error))
||((~alu_gie)&&(fpu_error))
||((~alu_gie)&&(alu_illegal));
||((~alu_gie)&&(alu_illegal)&&(!clear_pipeline));
 
// The sleep register. Setting the sleep register causes the CPU to
// sleep until the next interrupt. Setting the sleep register within
1513,12 → 1476,10
sleep <= wr_spreg_vl[`CPU_SLEEP_BIT];
 
always @(posedge i_clk)
if ((i_rst)||(w_switch_to_interrupt))
if (i_rst)
step <= 1'b0;
else if ((wr_reg_ce)&&(~alu_gie)&&(wr_write_ucc))
step <= wr_spreg_vl[`CPU_STEP_BIT];
else if (((alu_pc_valid)||(mem_pc_valid))&&(step)&&(gie))
step <= 1'b0;
 
// The GIE register. Only interrupts can disable the interrupt register
assign w_switch_to_interrupt = (gie)&&(
1531,7 → 1492,7
||((master_ce)&&(break_pending)&&(~break_en))
`ifdef OPT_ILLEGAL_INSTRUCTION
// On an illegal instruction
||(alu_illegal)
||((alu_illegal)&&(!clear_pipeline))
`endif
// On division by zero. If the divide isn't
// implemented, div_valid and div_error will be short
1588,7 → 1549,7
// Only the debug interface can clear this bit
else if ((dbgv)&&(wr_write_scc))
ill_err_i <= (ill_err_i)&&(wr_spreg_vl[`CPU_ILL_BIT]);
else if ((alu_illegal)&&(~alu_gie))
else if ((alu_illegal)&&(~alu_gie)&&(!clear_pipeline))
ill_err_i <= 1'b1;
initial ill_err_u = 1'b0;
always @(posedge i_clk)
1600,7 → 1561,7
// clearing the bit, then clear it
else if (((~alu_gie)||(dbgv))&&(wr_reg_ce)&&(wr_write_ucc))
ill_err_u <=((ill_err_u)&&(wr_spreg_vl[`CPU_ILL_BIT]));
else if ((alu_illegal)&&(alu_gie))
else if ((alu_illegal)&&(alu_gie)&&(!clear_pipeline))
ill_err_u <= 1'b1;
`else
assign ill_err_u = 1'b0;
1734,7 → 1695,7
if ((wr_reg_ce)&&(wr_reg_id[4])&&(wr_write_pc))
upc <= wr_spreg_vl[(AW-1):0];
else if ((alu_gie)&&
(((alu_pc_valid)&&(~clear_pipeline))
(((alu_pc_valid)&&(~clear_pipeline)&&(!alu_illegal))
||(mem_pc_valid)))
upc <= alu_pc;
 
1744,7 → 1705,7
else if ((wr_reg_ce)&&(~wr_reg_id[4])&&(wr_write_pc))
ipc <= wr_spreg_vl[(AW-1):0];
else if ((~alu_gie)&&
(((alu_pc_valid)&&(~clear_pipeline))
(((alu_pc_valid)&&(~clear_pipeline)&&(!alu_illegal))
||(mem_pc_valid)))
ipc <= alu_pc;
 
/cpu/zipsystem.v
170,7 → 170,7
, o_cpu_debug
`endif
);
parameter RESET_ADDRESS=24'h0100000, ADDRESS_WIDTH=24,
parameter RESET_ADDRESS=32'h0100000, ADDRESS_WIDTH=32,
LGICACHE=10, START_HALTED=1, EXTERNAL_INTERRUPTS=1,
`ifdef OPT_MULTIPLY
IMPLEMENT_MPY = `OPT_MULTIPLY,
187,8 → 187,8
`else
IMPLEMENT_FPU=0,
`endif
IMPLEMENT_LOCK=1,
// Derived parameters
IMPLEMENT_LOCK=1;
localparam // Derived parameters
AW=ADDRESS_WIDTH;
input i_clk, i_rst;
// Wishbone master
523,6 → 523,9
wire [31:0] dc_data;
wire [(AW-1):0] dc_addr;
wire cpu_gbl_cyc;
wire [31:0] dmac_int_vec;
assign dmac_int_vec = { 1'b0, alt_int_vector, 1'b0,
main_int_vector[14:1], 1'b0 };
assign dmac_stb = (sys_stb)&&(sys_addr[4]);
`ifdef INCLUDE_DMA_CONTROLLER
wbdmac #(AW) dma_controller(i_clk, cpu_reset,
533,8 → 536,7
dc_cyc, dc_stb, dc_we, dc_addr, dc_data,
dc_ack, dc_stall, ext_idata, dc_err,
// External device interrupts
{ 1'b0, alt_int_vector, 1'b0,
main_int_vector[14:1], 1'b0 },
dmac_int_vec,
// DMAC interrupt, for upon completion
dmac_int);
`else
782,7 → 784,7
 
assign sys_stall = (tma_stall | tmb_stall | tmc_stall | jif_stall
| wdt_stall | ctri_stall | actr_stall
| pic_stall | dmac_stall);
| pic_stall | dmac_stall); // Always 1'b0!
assign cpu_stall = (sys_stall)|(cpu_ext_stall);
assign sys_ack = (tmr_ack|wdt_ack|ctri_ack|actr_ack|pic_ack|dmac_ack|wdbus_ack);
assign cpu_ack = (sys_ack)||(cpu_ext_ack);

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.