URL
https://opencores.org/ocsvn/xulalx25soc/xulalx25soc/trunk
Subversion Repositories xulalx25soc
Compare Revisions
- This comparison shows the changes necessary to convert path
/
- from Rev 117 to Rev 118
- ↔ Reverse comparison
Rev 117 → Rev 118
/xulalx25soc/trunk/bench/cpp/busmaster_tb.cpp
140,7 → 140,7
m_core->v__DOT__serialport__DOT__r_setup); |
PIPECMDR::tick(); |
|
#define DEBUGGING_OUTPUT |
// #define DEBUGGING_OUTPUT |
#ifdef DEBUGGING_OUTPUT |
bool writeout = false; |
/* |
374,7 → 374,7
printf("|%s%s%s%s%s", |
(m_core->v__DOT__zippy__DOT__thecpu__DOT__alu_ce)?"a":"-", |
(m_core->v__DOT__zippy__DOT__thecpu__DOT__alu_stall)?"s":"-", |
(m_core->v__DOT__zippy__DOT__thecpu__DOT__doalu__DOT__genblk2__DOT__r_busy)?"B":"-", |
(m_core->v__DOT__zippy__DOT__thecpu__DOT__doalu__DOT__r_busy)?"B":"-", |
(m_core->v__DOT__zippy__DOT__thecpu__DOT__r_alu_gie)?"G":"-", |
(m_core->v__DOT__zippy__DOT__thecpu__DOT__r_alu_illegal)?"i":"-"); |
printf("|%s%s%s%2x %s%s%s %2d %2d", |
/xulalx25soc/trunk/rtl/cpu/busdelay.v
1,4 → 1,4
/////////////////////////////////////////////////////////////////////////// |
//////////////////////////////////////////////////////////////////////////////// |
// |
// Filename: busdelay.v |
// |
16,12 → 16,26
// *must* know on the first clock whether or not the bus will stall. |
// |
// |
// After a period of time, I started a new design where the timing |
// associated with this original bus clock just wasn't ... fast enough. |
// I needed to delay the stall line as well. A new busdelay was then |
// written and debugged whcih delays the stall line. (I know, you aren't |
// supposed to delay the stall line--but what if you *have* to in order |
// to meet timing?) This new logic has been merged in with the old, |
// and the DELAY_STALL line can be set to non-zero to use it instead |
// of the original logic. Don't use it if you don't need it: it will |
// consume resources and slow your bus down more, but if you do need |
// it--don't be afraid to use it. |
// |
// Both versions of the bus delay will maintain a single access per |
// clock when pipelined, they only delay the time between the strobe |
// going high and the actual command being accomplished. |
// |
// |
// Creator: Dan Gisselquist, Ph.D. |
// Gisselquist Technology, LLC |
// |
/////////////////////////////////////////////////////////////////////////// |
//////////////////////////////////////////////////////////////////////////////// |
// |
// Copyright (C) 2015-2016, Gisselquist Technology, LLC |
// |
39,7 → 53,7
// http://www.gnu.org/licenses/gpl.html |
// |
// |
/////////////////////////////////////////////////////////////////////////// |
//////////////////////////////////////////////////////////////////////////////// |
// |
module busdelay(i_clk, |
// The input bus |
48,7 → 62,7
// The delayed bus |
o_dly_cyc, o_dly_stb, o_dly_we, o_dly_addr, o_dly_data, |
i_dly_ack, i_dly_stall, i_dly_data, i_dly_err); |
parameter AW=32, DW=32; |
parameter AW=32, DW=32, DELAY_STALL = 0; |
input i_clk; |
// Input/master bus |
input i_wb_cyc, i_wb_stb, i_wb_we; |
55,12 → 69,11
input [(AW-1):0] i_wb_addr; |
input [(DW-1):0] i_wb_data; |
output reg o_wb_ack; |
output reg o_wb_stall; |
output wire o_wb_stall; |
output reg [(DW-1):0] o_wb_data; |
output reg o_wb_err; |
output wire o_wb_err; |
// Delayed bus |
output reg o_dly_cyc, o_dly_we; |
output wire o_dly_stb; |
output reg o_dly_cyc, o_dly_stb, o_dly_we; |
output reg [(AW-1):0] o_dly_addr; |
output reg [(DW-1):0] o_dly_data; |
input i_dly_ack; |
68,36 → 81,100
input [(DW-1):0] i_dly_data; |
input i_dly_err; |
|
reg loaded; |
initial o_dly_cyc = 1'b0; |
initial loaded = 1'b0; |
generate |
if (DELAY_STALL != 0) |
begin |
reg r_stb, r_we, r_rtn_stall, r_rtn_err; |
reg [(DW-1):0] r_data; |
reg [(AW-1):0] r_addr; |
|
always @(posedge i_clk) |
o_wb_stall <= (loaded)&&(i_dly_stall); |
initial o_dly_cyc = 1'b0; |
initial r_rtn_stall= 1'b0; |
initial r_stb = 1'b0; |
always @(posedge i_clk) |
begin |
o_dly_cyc <= (i_wb_cyc); |
|
if (!i_dly_stall) |
begin |
r_we <= i_wb_we; |
r_addr <= i_wb_addr; |
r_data <= i_wb_data; |
|
initial o_dly_cyc = 1'b0; |
always @(posedge i_clk) |
o_dly_cyc <= (i_wb_cyc); |
// Add the i_wb_cyc criteria here, so we can simplify the o_wb_stall |
// criteria below, which would otherwise *and* these two. |
always @(posedge i_clk) |
loaded <= (i_wb_stb)||((loaded)&&(i_dly_stall)&&(~i_dly_err)&&(i_wb_cyc)); |
assign o_dly_stb = loaded; |
always @(posedge i_clk) |
if (~i_dly_stall) |
o_dly_we <= i_wb_we; |
always @(posedge i_clk) |
if (~i_dly_stall) |
o_dly_addr<= i_wb_addr; |
always @(posedge i_clk) |
if (~i_dly_stall) |
o_dly_data <= i_wb_data; |
always @(posedge i_clk) |
o_wb_ack <= (i_dly_ack)&&(o_dly_cyc)&&(i_wb_cyc); |
always @(posedge i_clk) |
o_wb_data <= i_dly_data; |
if (r_stb) |
begin |
o_dly_we <= r_we; |
o_dly_addr <= r_addr; |
o_dly_data <= r_data; |
o_dly_stb <= 1'b1; |
r_rtn_stall <= 1'b0; |
r_stb <= 1'b0; |
end else begin |
o_dly_we <= i_wb_we; |
o_dly_addr <= i_wb_addr; |
o_dly_data <= i_wb_data; |
o_dly_stb <= i_wb_stb; |
r_stb <= 1'b0; |
r_rtn_stall <= 1'b0; |
end |
end else if ((!r_stb)&&(!o_wb_stall)) |
begin |
r_we <= i_wb_we; |
r_addr <= i_wb_addr; |
r_data <= i_wb_data; |
r_stb <= i_wb_stb; |
|
always @(posedge i_clk) |
o_wb_err <= (i_dly_err)&&(o_dly_cyc)&&(i_wb_cyc); |
r_rtn_stall <= i_wb_stb; |
end |
|
if (!i_wb_cyc) |
begin |
o_dly_stb <= 1'b0; |
r_stb <= 1'b0; |
r_rtn_stall <= 1'b0; |
end |
|
o_wb_ack <= (i_dly_ack)&&(i_wb_cyc)&&(o_dly_cyc); |
o_wb_data <= i_dly_data; |
r_rtn_err <= (i_dly_err)&&(i_wb_cyc)&&(o_dly_cyc); |
end |
|
assign o_wb_stall = r_rtn_stall; |
assign o_wb_err = r_rtn_err; |
|
end else begin |
|
initial o_dly_cyc = 1'b0; |
initial o_dly_stb = 1'b0; |
|
always @(posedge i_clk) |
o_dly_cyc <= i_wb_cyc; |
// Add the i_wb_cyc criteria here, so we can simplify the |
// o_wb_stall criteria below, which would otherwise *and* |
// these two. |
always @(posedge i_clk) |
if (~o_wb_stall) |
o_dly_stb <= ((i_wb_cyc)&&(i_wb_stb)); |
always @(posedge i_clk) |
if (~o_wb_stall) |
o_dly_we <= i_wb_we; |
always @(posedge i_clk) |
if (~o_wb_stall) |
o_dly_addr<= i_wb_addr; |
always @(posedge i_clk) |
if (~o_wb_stall) |
o_dly_data <= i_wb_data; |
always @(posedge i_clk) |
o_wb_ack <= (i_dly_ack)&&(o_dly_cyc)&&(i_wb_cyc); |
always @(posedge i_clk) |
o_wb_data <= i_dly_data; |
|
// Our only non-delayed line, yet still really delayed. Perhaps |
// there's a way to register this? |
// o_wb_stall <= (i_wb_cyc)&&(i_wb_stb) ... or some such? |
// assign o_wb_stall=((i_wb_cyc)&&(i_dly_stall)&&(o_dly_stb));//&&o_cyc |
assign o_wb_stall = ((i_dly_stall)&&(o_dly_stb));//&&o_cyc |
assign o_wb_err = i_dly_err; |
end endgenerate |
|
endmodule |
/xulalx25soc/trunk/rtl/cpu/cpudefs.v
29,7 → 29,7
// |
/////////////////////////////////////////////////////////////////////////////// |
// |
// Copyright (C) 2015, Gisselquist Technology, LLC |
// Copyright (C) 2015-2016, Gisselquist Technology, LLC |
// |
// This program is free software (firmware): you can redistribute it and/or |
// modify it under the terms of the GNU General Public License as published |
77,8 → 77,13
// OPT_ILLEGAL_INSTRUCTION is set, then the multiply will create an illegal |
// instruction that will then trip the illegal instruction trap. |
// |
// Either not defining this value, or defining it to zero will disable the |
// hardware multiply. A value of '1' will cause the multiply to occurr in one |
// clock cycle only--often at the expense of the rest of the CPUs speed. |
// A value of 2 will cause the multiply to have a single delay cycle, 3 will |
// have two delay cycles, and 4 (or more) will have 3 delay cycles. |
// |
`define OPT_MULTIPLY 2 |
`define OPT_MULTIPLY 4 |
// |
// |
// |
111,27 → 116,7
// |
// |
// |
// OPT_NEW_INSTRUCTION_SET controls whether or not the new instruction set |
// is in use. The new instruction set contains space for floating point |
// operations, signed and unsigned divide instructions, as well as bit reversal |
// and ... at least two other operations yet to be defined. The decoder alone |
// uses about 70 fewer LUTs, although in practice this works out to 12 fewer |
// when all works out in the wash. Further, floating point and divide |
// instructions will cause an illegal instruction exception if they are not |
// implemented--so software capability can be built to use these instructions |
// immediately, even if the hardware is not yet ready. |
// |
// This option is likely to go away in the future, obsoleting the previous |
// instruction set, so I recommend setting this option and switching to the |
// new instruction set as soon as possible. |
// |
`define OPT_NEW_INSTRUCTION_SET |
// |
// |
// |
// |
// |
// |
// OPT_SINGLE_FETCH controls whether or not the prefetch has a cache, and |
// whether or not it can issue one instruction per clock. When set, the |
// prefetch has no cache, and only one instruction is fetched at a time. |
226,10 → 211,8
// |
// |
// |
`ifdef OPT_NEW_INSTRUCTION_SET |
// |
// |
// |
// The new instruction set also defines a set of very long instruction words. |
// Well, calling them "very long" instruction words is probably a misnomer, |
// although we're going to do it. They're really 2x16-bit instructions--- |
253,9 → 236,7
`define OPT_VLIW |
// |
// |
`endif // OPT_NEW_INSTRUCTION_SET |
// |
// |
`endif // OPT_SINGLE_FETCH |
// |
// |
/xulalx25soc/trunk/rtl/cpu/cpuops.v
14,7 → 14,7
// |
/////////////////////////////////////////////////////////////////////////// |
// |
// Copyright (C) 2015, Gisselquist Technology, LLC |
// Copyright (C) 2015-2016, Gisselquist Technology, LLC |
// |
// This program is free software (firmware): you can redistribute it and/or |
// modify it under the terms of the GNU General Public License as published |
32,18 → 32,17
// |
/////////////////////////////////////////////////////////////////////////// |
// |
`define LONG_MPY |
module cpuops(i_clk,i_rst, i_ce, i_valid, i_op, i_a, i_b, o_c, o_f, o_valid, |
o_illegal, o_busy); |
parameter IMPLEMENT_MPY = 1; |
`include "cpudefs.v" |
// |
module cpuops(i_clk,i_rst, i_ce, i_op, i_a, i_b, o_c, o_f, o_valid, |
o_busy); |
parameter IMPLEMENT_MPY = `OPT_MULTIPLY; |
input i_clk, i_rst, i_ce; |
input [3:0] i_op; |
input [31:0] i_a, i_b; |
input i_valid; |
output reg [31:0] o_c; |
output wire [3:0] o_f; |
output reg o_valid; |
output wire o_illegal; |
output wire o_busy; |
|
// Rotate-left pre-logic |
99,10 → 98,10
||(i_op == 4'h6) // LSL |
||(i_op == 4'h5)); // LSR |
|
`ifdef LONG_MPY |
reg mpyhi; |
wire mpybusy; |
`endif |
wire [63:0] mpy_result; // Where we dump the multiply result |
reg mpyhi; // Return the high half of the multiply |
wire mpybusy; // The multiply is busy if true |
wire mpydone; // True if we'll be valid on the next clock; |
|
// A 4-way multiplexer can be done in one 6-LUT. |
// A 16-way multiplexer can therefore be done in 4x 6-LUT's with |
109,244 → 108,254
// the Xilinx multiplexer fabric that follows. |
// Given that we wish to apply this multiplexer approach to 33-bits, |
// this will cost a minimum of 132 6-LUTs. |
|
wire this_is_a_multiply_op; |
assign this_is_a_multiply_op = (i_ce)&&((i_op[3:1]==3'h5)||(i_op[3:0]==4'h8)); |
|
generate |
if (IMPLEMENT_MPY == 0) |
begin |
begin // No multiply support. |
assign mpy_result = 63'h00; |
end else if (IMPLEMENT_MPY == 1) |
begin // Our single clock option (no extra clocks) |
wire signed [63:0] w_mpy_a_input, w_mpy_b_input; |
assign w_mpy_a_input = {{(32){(i_a[31])&(i_op[0])}},i_a[31:0]}; |
assign w_mpy_b_input = {{(32){(i_b[31])&(i_op[0])}},i_b[31:0]}; |
assign mpy_result = w_mpy_a_input * w_mpy_b_input; |
assign mpybusy = 1'b0; |
assign mpydone = 1'b0; |
always @(*) mpyhi = 1'b0; // Not needed |
end else if (IMPLEMENT_MPY == 2) |
begin // Our two clock option (ALU must pause for 1 clock) |
reg signed [63:0] r_mpy_a_input, r_mpy_b_input; |
always @(posedge i_clk) |
if (i_ce) |
begin |
pre_sign <= (i_a[31]); |
c <= 1'b0; |
casez(i_op) |
4'b0000:{c,o_c } <= {1'b0,i_a}-{1'b0,i_b};// CMP/SUB |
4'b0001: o_c <= i_a & i_b; // BTST/And |
4'b0010:{c,o_c } <= i_a + i_b; // Add |
4'b0011: o_c <= i_a | i_b; // Or |
4'b0100: o_c <= i_a ^ i_b; // Xor |
4'b0101:{o_c,c } <= w_lsr_result[32:0]; // LSR |
4'b0110:{c,o_c } <= w_lsl_result[32:0]; // LSL |
4'b0111:{o_c,c } <= w_asr_result[32:0]; // ASR |
`ifndef LONG_MPY |
4'b1000: o_c <= { i_b[15: 0], i_a[15:0] }; // LODIHI |
`endif |
4'b1001: o_c <= { i_a[31:16], i_b[15:0] }; // LODILO |
// 4'h1010: The unimplemented MPYU, |
// 4'h1011: and here for the unimplemented MPYS |
4'b1100: o_c <= w_brev_result; // BREV |
4'b1101: o_c <= w_popc_result; // POPC |
4'b1110: o_c <= w_rol_result; // ROL |
default: o_c <= i_b; // MOV, LDI |
endcase |
r_mpy_a_input <={{(32){(i_a[31])&(i_op[0])}},i_a[31:0]}; |
r_mpy_b_input <={{(32){(i_b[31])&(i_op[0])}},i_b[31:0]}; |
end |
|
assign o_busy = 1'b0; |
assign mpy_result = r_mpy_a_input * r_mpy_b_input; |
assign mpybusy = 1'b0; |
|
reg r_illegal; |
initial mpypipe = 1'b0; |
reg mpypipe; |
always @(posedge i_clk) |
r_illegal <= (i_ce)&&((i_op == 4'ha)||(i_op == 4'hb) |
`ifdef LONG_MPY |
||(i_op == 4'h8) |
if (i_rst) |
mpypipe <= 1'b0; |
else |
mpypipe <= (this_is_a_multiply_op); |
|
assign mpydone = mpypipe; // this_is_a_multiply_op; |
always @(posedge i_clk) |
if (this_is_a_multiply_op) |
mpyhi = i_op[1]; |
end else if (IMPLEMENT_MPY == 3) |
begin // Our three clock option (ALU pauses for 2 clocks) |
reg signed [63:0] r_smpy_result; |
reg [63:0] r_umpy_result; |
reg signed [31:0] r_mpy_a_input, r_mpy_b_input; |
reg [1:0] mpypipe; |
reg [1:0] r_sgn; |
|
initial mpypipe = 2'b0; |
always @(posedge i_clk) |
if (i_rst) |
mpypipe <= 2'b0; |
else |
mpypipe <= { mpypipe[0], this_is_a_multiply_op }; |
|
// First clock |
always @(posedge i_clk) |
begin |
r_mpy_a_input <= i_a[31:0]; |
r_mpy_b_input <= i_b[31:0]; |
r_sgn <= { r_sgn[0], i_op[0] }; |
end |
|
// Second clock |
`ifdef VERILATOR |
wire signed [63:0] s_mpy_a_input, s_mpy_b_input; |
wire [63:0] u_mpy_a_input, u_mpy_b_input; |
|
assign s_mpy_a_input = {{(32){r_mpy_a_input[31]}},r_mpy_a_input}; |
assign s_mpy_b_input = {{(32){r_mpy_b_input[31]}},r_mpy_b_input}; |
assign u_mpy_a_input = {32'h00,r_mpy_a_input}; |
assign u_mpy_b_input = {32'h00,r_mpy_b_input}; |
always @(posedge i_clk) |
r_smpy_result = s_mpy_a_input * s_mpy_b_input; |
always @(posedge i_clk) |
r_umpy_result = u_mpy_a_input * u_mpy_b_input; |
`else |
|
wire [31:0] u_mpy_a_input, u_mpy_b_input; |
|
assign u_mpy_a_input = r_mpy_a_input; |
assign u_mpy_b_input = r_mpy_b_input; |
|
always @(posedge i_clk) |
r_smpy_result = r_mpy_a_input * r_mpy_b_input; |
always @(posedge i_clk) |
r_umpy_result = u_mpy_a_input * u_mpy_b_input; |
`endif |
); |
assign o_illegal = r_illegal; |
end else begin |
// |
// Multiply pre-logic |
// |
`ifdef LONG_MPY |
|
always @(posedge i_clk) |
if (this_is_a_multiply_op) |
mpyhi = i_op[1]; |
assign mpybusy = mpypipe[0]; |
assign mpy_result = (r_sgn[1])?r_smpy_result:r_umpy_result; |
assign mpydone = mpypipe[1]; |
|
// Results are then set on the third clock |
end else // if (IMPLEMENT_MPY <= 4) |
begin // The three clock option |
reg [63:0] r_mpy_result; |
if (IMPLEMENT_MPY == 1) |
begin // Our two clock option (one clock extra) |
reg signed [64:0] r_mpy_a_input, r_mpy_b_input; |
reg mpypipe, x; |
initial mpypipe = 1'b0; |
always @(posedge i_clk) |
mpypipe <= (i_ce)&&((i_op[3:1]==3'h5)||(i_op[3:0]==4'h8)); |
always @(posedge i_clk) |
if (i_ce) |
begin |
r_mpy_a_input <= {{(33){(i_a[31])&(i_op[0])}}, |
i_a[31:0]}; |
r_mpy_b_input <= {{(33){(i_b[31])&(i_op[0])}}, |
i_b[31:0]}; |
end |
always @(posedge i_clk) |
if (mpypipe) |
{x, r_mpy_result} = r_mpy_a_input |
* r_mpy_b_input; |
always @(posedge i_clk) |
if (i_ce) |
mpyhi = i_op[1]; |
assign mpybusy = mpypipe; |
end else if (IMPLEMENT_MPY == 2) |
begin // The three clock option |
reg [31:0] r_mpy_a_input, r_mpy_b_input; |
reg r_mpy_signed; |
reg [1:0] mpypipe; |
reg [31:0] r_mpy_a_input, r_mpy_b_input; |
reg r_mpy_signed; |
reg [2:0] mpypipe; |
|
// First clock, latch in the inputs |
always @(posedge i_clk) |
begin |
// mpypipe indicates we have a multiply in the |
// pipeline. In this case, the multiply |
// pipeline is a two stage pipeline, so we need |
// two bits in the pipe. |
mpypipe[0] <= (i_ce)&&((i_op[3:1]==3'h5) |
||(i_op[3:0]==4'h8)); |
// First clock, latch in the inputs |
always @(posedge i_clk) |
begin |
// mpypipe indicates we have a multiply in the |
// pipeline. In this case, the multiply |
// pipeline is a two stage pipeline, so we need |
// two bits in the pipe. |
if (i_rst) |
mpypipe <= 3'h0; |
else begin |
mpypipe[0] <= this_is_a_multiply_op; |
mpypipe[1] <= mpypipe[0]; |
|
if (i_op[0]) // i.e. if signed multiply |
begin |
r_mpy_a_input <= {(~i_a[31]),i_a[30:0]}; |
r_mpy_b_input <= {(~i_b[31]),i_b[30:0]}; |
end else begin |
r_mpy_a_input <= i_a[31:0]; |
r_mpy_b_input <= i_b[31:0]; |
end |
// The signed bit really only matters in the |
// case of 64 bit multiply. We'll keep track |
// of it, though, and pretend in all other |
// cases. |
r_mpy_signed <= i_op[0]; |
|
if (i_ce) |
mpyhi = i_op[1]; |
mpypipe[2] <= mpypipe[1]; |
end |
|
assign mpybusy = |mpypipe; |
|
// Second clock, do the multiplies, get the "partial |
// products". Here, we break our input up into two |
// halves, |
// |
// A = (2^16 ah + al) |
// B = (2^16 bh + bl) |
// |
// and use these to compute partial products. |
// |
// AB = (2^32 ah*bh + 2^16 (ah*bl + al*bh) + (al*bl) |
// |
// Since we're following the FOIL algorithm to get here, |
// we'll name these partial products according to FOIL. |
// |
// The trick is what happens if A or B is signed. In |
// those cases, the real value of A will not be given by |
// A = (2^16 ah + al) |
// but rather |
// A = (2^16 ah[31^] + al) - 2^31 |
// (where we have flipped the sign bit of A) |
// and so ... |
// |
// AB= (2^16 ah + al - 2^31) * (2^16 bh + bl - 2^31) |
// = 2^32(ah*bh) |
// +2^16 (ah*bl+al*bh) |
// +(al*bl) |
// - 2^31 (2^16 bh+bl + 2^16 ah+al) |
// - 2^62 |
// = 2^32(ah*bh) |
// +2^16 (ah*bl+al*bh) |
// +(al*bl) |
// - 2^31 (2^16 bh+bl + 2^16 ah+al + 2^31) |
// |
reg [31:0] pp_f, pp_l; // F and L from FOIL |
reg [32:0] pp_oi; // The O and I from FOIL |
reg [32:0] pp_s; |
always @(posedge i_clk) |
if (i_op[0]) // i.e. if signed multiply |
begin |
pp_f<=r_mpy_a_input[31:16]*r_mpy_b_input[31:16]; |
pp_oi<=r_mpy_a_input[31:16]*r_mpy_b_input[15: 0] |
+ r_mpy_a_input[15: 0]*r_mpy_b_input[31:16]; |
pp_l<=r_mpy_a_input[15: 0]*r_mpy_b_input[15: 0]; |
// And a special one for the sign |
if (r_mpy_signed) |
pp_s <= 32'h8000_0000-( |
r_mpy_a_input[31:0] |
+ r_mpy_b_input[31:0]); |
else |
pp_s <= 33'h0; |
r_mpy_a_input <= {(~i_a[31]),i_a[30:0]}; |
r_mpy_b_input <= {(~i_b[31]),i_b[30:0]}; |
end else begin |
r_mpy_a_input <= i_a[31:0]; |
r_mpy_b_input <= i_b[31:0]; |
end |
// The signed bit really only matters in the |
// case of 64 bit multiply. We'll keep track |
// of it, though, and pretend in all other |
// cases. |
r_mpy_signed <= i_op[0]; |
|
// Third clock, add the results and produce a product |
always @(posedge i_clk) |
begin |
r_mpy_result[15:0] <= pp_l[15:0]; |
r_mpy_result[63:16] <= |
{ 32'h00, pp_l[31:16] } |
+ { 15'h00, pp_oi } |
+ { pp_s, 15'h00 } |
+ { pp_f, 16'h00 }; |
end |
end // Fourth clock -- results are available for writeback. |
`else |
wire signed [16:0] w_mpy_a_input, w_mpy_b_input; |
wire [33:0] w_mpy_result; |
reg [31:0] r_mpy_result; |
assign w_mpy_a_input ={ ((i_a[15])&(i_op[0])), i_a[15:0] }; |
assign w_mpy_b_input ={ ((i_b[15])&(i_op[0])), i_b[15:0] }; |
assign w_mpy_result = w_mpy_a_input * w_mpy_b_input; |
always @(posedge i_clk) |
if (i_ce) |
r_mpy_result = w_mpy_result[31:0]; |
`endif |
if (this_is_a_multiply_op) |
mpyhi = i_op[1]; |
end |
|
assign mpybusy = |mpypipe[1:0]; |
assign mpydone = mpypipe[2]; |
|
// Second clock, do the multiplies, get the "partial |
// products". Here, we break our input up into two |
// halves, |
// |
// The master ALU case statement |
// A = (2^16 ah + al) |
// B = (2^16 bh + bl) |
// |
// and use these to compute partial products. |
// |
// AB = (2^32 ah*bh + 2^16 (ah*bl + al*bh) + (al*bl) |
// |
// Since we're following the FOIL algorithm to get here, |
// we'll name these partial products according to FOIL. |
// |
// The trick is what happens if A or B is signed. In |
// those cases, the real value of A will not be given by |
// A = (2^16 ah + al) |
// but rather |
// A = (2^16 ah[31^] + al) - 2^31 |
// (where we have flipped the sign bit of A) |
// and so ... |
// |
// AB= (2^16 ah + al - 2^31) * (2^16 bh + bl - 2^31) |
// = 2^32(ah*bh) |
// +2^16 (ah*bl+al*bh) |
// +(al*bl) |
// - 2^31 (2^16 bh+bl + 2^16 ah+al) |
// - 2^62 |
// = 2^32(ah*bh) |
// +2^16 (ah*bl+al*bh) |
// +(al*bl) |
// - 2^31 (2^16 bh+bl + 2^16 ah+al + 2^31) |
// |
reg [31:0] pp_f, pp_l; // F and L from FOIL |
reg [32:0] pp_oi; // The O and I from FOIL |
reg [32:0] pp_s; |
always @(posedge i_clk) |
if (i_ce) |
begin |
pre_sign <= (i_a[31]); |
c <= 1'b0; |
casez(i_op) |
4'b0000:{c,o_c } <= {1'b0,i_a}-{1'b0,i_b};// CMP/SUB |
4'b0001: o_c <= i_a & i_b; // BTST/And |
4'b0010:{c,o_c } <= i_a + i_b; // Add |
4'b0011: o_c <= i_a | i_b; // Or |
4'b0100: o_c <= i_a ^ i_b; // Xor |
4'b0101:{o_c,c } <= w_lsr_result[32:0]; // LSR |
4'b0110:{c,o_c } <= w_lsl_result[32:0]; // LSL |
4'b0111:{o_c,c } <= w_asr_result[32:0]; // ASR |
`ifdef LONG_MPY |
4'b1000: o_c <= r_mpy_result[31:0]; // MPY |
`else |
4'b1000: o_c <= { i_b[15: 0], i_a[15:0] }; // LODIHI |
`endif |
4'b1001: o_c <= { i_a[31:16], i_b[15:0] }; // LODILO |
`ifdef LONG_MPY |
4'b1010: o_c <= r_mpy_result[63:32]; // MPYHU |
4'b1011: o_c <= r_mpy_result[63:32]; // MPYHS |
`else |
4'b1010: o_c <= r_mpy_result; // MPYU |
4'b1011: o_c <= r_mpy_result; // MPYS |
`endif |
4'b1100: o_c <= w_brev_result; // BREV |
4'b1101: o_c <= w_popc_result; // POPC |
4'b1110: o_c <= w_rol_result; // ROL |
default: o_c <= i_b; // MOV, LDI |
endcase |
end else if (r_busy) |
`ifdef LONG_MPY |
o_c <= (mpyhi)?r_mpy_result[63:32]:r_mpy_result[31:0]; |
`else |
o_c <= r_mpy_result; |
`endif |
pp_f<=r_mpy_a_input[31:16]*r_mpy_b_input[31:16]; |
pp_oi<=r_mpy_a_input[31:16]*r_mpy_b_input[15: 0] |
+ r_mpy_a_input[15: 0]*r_mpy_b_input[31:16]; |
pp_l<=r_mpy_a_input[15: 0]*r_mpy_b_input[15: 0]; |
// And a special one for the sign |
if (r_mpy_signed) |
pp_s <= 32'h8000_0000-( |
r_mpy_a_input[31:0] |
+ r_mpy_b_input[31:0]); |
else |
pp_s <= 33'h0; |
end |
|
reg r_busy; |
initial r_busy = 1'b0; |
// Third clock, add the results and produce a product |
always @(posedge i_clk) |
r_busy <= (~i_rst)&&(i_ce)&&(i_valid) |
`ifdef LONG_MPY |
&&((i_op[3:1] == 3'h5) |
||(i_op[3:0] == 4'h8))||mpybusy; |
`else |
&&(i_op[3:1] == 3'h5); |
`endif |
begin |
r_mpy_result[15:0] <= pp_l[15:0]; |
r_mpy_result[63:16] <= |
{ 32'h00, pp_l[31:16] } |
+ { 15'h00, pp_oi } |
+ { pp_s, 15'h00 } |
+ { pp_f, 16'h00 }; |
end |
|
assign o_busy = r_busy; |
assign mpy_result = r_mpy_result; |
// Fourth clock -- results are clocked into writeback |
end |
endgenerate // All possible multiply results have been determined |
|
assign o_illegal = 1'b0; |
end endgenerate |
// |
// The master ALU case statement |
// |
always @(posedge i_clk) |
if (i_ce) |
begin |
pre_sign <= (i_a[31]); |
c <= 1'b0; |
casez(i_op) |
4'b0000:{c,o_c } <= {1'b0,i_a}-{1'b0,i_b};// CMP/SUB |
4'b0001: o_c <= i_a & i_b; // BTST/And |
4'b0010:{c,o_c } <= i_a + i_b; // Add |
4'b0011: o_c <= i_a | i_b; // Or |
4'b0100: o_c <= i_a ^ i_b; // Xor |
4'b0101:{o_c,c } <= w_lsr_result[32:0]; // LSR |
4'b0110:{c,o_c } <= w_lsl_result[32:0]; // LSL |
4'b0111:{o_c,c } <= w_asr_result[32:0]; // ASR |
4'b1000: o_c <= mpy_result[31:0]; // MPY |
4'b1001: o_c <= { i_a[31:16], i_b[15:0] }; // LODILO |
4'b1010: o_c <= mpy_result[63:32]; // MPYHU |
4'b1011: o_c <= mpy_result[63:32]; // MPYHS |
4'b1100: o_c <= w_brev_result; // BREV |
4'b1101: o_c <= w_popc_result; // POPC |
4'b1110: o_c <= w_rol_result; // ROL |
default: o_c <= i_b; // MOV, LDI |
endcase |
end else // if (mpydone) |
o_c <= (mpyhi)?mpy_result[63:32]:mpy_result[31:0]; |
|
reg r_busy; |
initial r_busy = 1'b0; |
always @(posedge i_clk) |
if (i_rst) |
r_busy <= 1'b0; |
else |
r_busy <= ((IMPLEMENT_MPY > 1) |
&&(this_is_a_multiply_op))||mpybusy; |
assign o_busy = (r_busy); // ||((IMPLEMENT_MPY>1)&&(this_is_a_multiply_op)); |
|
|
assign z = (o_c == 32'h0000); |
assign n = (o_c[31]); |
assign v = (set_ovfl)&&(pre_sign != o_c[31]); |
357,12 → 366,9
always @(posedge i_clk) |
if (i_rst) |
o_valid <= 1'b0; |
else if (IMPLEMENT_MPY <= 1) |
o_valid <= (i_ce); |
else |
o_valid <= (i_ce)&&(i_valid) |
`ifdef LONG_MPY |
&&(i_op[3:1] != 3'h5)&&(i_op[3:0] != 4'h8) |
||(o_busy)&&(~mpybusy); |
`else |
&&(i_op[3:1] != 3'h5)||(o_busy); |
`endif |
o_valid <=((i_ce)&&(!this_is_a_multiply_op))||(mpydone); |
|
endmodule |
/xulalx25soc/trunk/rtl/cpu/div.v
34,8 → 34,8
// |
module div(i_clk, i_rst, i_wr, i_signed, i_numerator, i_denominator, |
o_busy, o_valid, o_err, o_quotient, o_flags); |
parameter BW=32, LGBW = 5; |
input i_clk, i_rst; |
parameter BW=32, LGBW = 5; |
input i_clk, i_rst; |
// Input parameters |
input i_wr, i_signed; |
input [(BW-1):0] i_numerator, i_denominator; |
/xulalx25soc/trunk/rtl/cpu/idecode.v
5,15 → 5,9
// Project: Zip CPU -- a small, lightweight, RISC CPU soft core |
// |
// Purpose: This RTL file specifies how instructions are to be decoded |
// into their underlying meanings. This is specifically a version |
// designed to support a "Next Generation", or "Version 2" instruction |
// set as (currently) activated by the OPT_NEW_INSTRUCTION_SET option |
// in cpudefs.v. |
// into their underlying meanings. |
// |
// I expect to (eventually) retire the old instruction set, at which point |
// this will become the default instruction set decoder. |
// |
// |
// Creator: Dan Gisselquist, Ph.D. |
// Gisselquist Technology, LLC |
// |
/xulalx25soc/trunk/rtl/cpu/pfcache.v
13,7 → 13,7
// |
//////////////////////////////////////////////////////////////////////////////// |
// |
// Copyright (C) 2015, Gisselquist Technology, LLC |
// Copyright (C) 2015-2016, Gisselquist Technology, LLC |
// |
// This program is free software (firmware): you can redistribute it and/or |
// modify it under the terms of the GNU General Public License as published |
38,8 → 38,12
i_wb_ack, i_wb_stall, i_wb_err, i_wb_data, |
o_illegal); |
parameter LGCACHELEN = 8, ADDRESS_WIDTH=24, |
CACHELEN=(1<<LGCACHELEN), BUSW=32, AW=ADDRESS_WIDTH, |
CW=LGCACHELEN, PW=LGCACHELEN-5; |
LGLINES=5; // Log of the number of separate cache lines |
localparam CACHELEN=(1<<LGCACHELEN); // Size of our cache memory |
localparam CW=LGCACHELEN; // Short hand for LGCACHELEN |
localparam PW=LGCACHELEN-LGLINES; // Size of a cache line |
localparam BUSW = 32; // Number of data lines on the bus |
localparam AW=ADDRESS_WIDTH; // Shorthand for ADDRESS_WIDTH |
input i_clk, i_rst, i_new_pc; |
input i_clear_cache; |
input i_stall_n; |
66,8 → 70,8
|
wire r_v; |
reg [(BUSW-1):0] cache [0:((1<<CW)-1)]; |
reg [(AW-CW-1):0] tags [0:((1<<(CW-PW))-1)]; |
reg [((1<<(CW-PW))-1):0] vmask; |
reg [(AW-CW-1):0] tags [0:((1<<(LGLINES))-1)]; |
reg [((1<<(LGLINES))-1):0] vmask; |
|
reg [(AW-1):0] lastpc; |
reg [(CW-1):0] rdaddr; |
255,7 → 259,7
reg svmask; |
initial vmask = 0; |
initial svmask = 1'b0; |
reg [(CW-PW-1):0] saddr; |
reg [(LGLINES-1):0] saddr; |
always @(posedge i_clk) |
if ((i_rst)||(i_clear_cache)) |
begin |
/xulalx25soc/trunk/rtl/cpu/pipemem.v
42,7 → 42,8
o_wb_stb_gbl, o_wb_stb_lcl, |
o_wb_we, o_wb_addr, o_wb_data, |
i_wb_ack, i_wb_stall, i_wb_err, i_wb_data); |
parameter ADDRESS_WIDTH=24, IMPLEMENT_LOCK=0, AW=ADDRESS_WIDTH; |
parameter ADDRESS_WIDTH=32, IMPLEMENT_LOCK=0; |
localparam AW=ADDRESS_WIDTH; |
input i_clk, i_rst; |
input i_pipe_stb, i_lock; |
// CPU interface |
/xulalx25soc/trunk/rtl/cpu/wbdmac.v
168,7 → 168,7
|
reg last_read_request, last_read_ack, |
last_write_request, last_write_ack; |
reg trigger, abort; |
reg trigger, abort, user_halt; |
|
initial dma_state = `DMA_IDLE; |
initial o_interrupt = 1'b0; |
193,7 → 193,7
begin |
case(i_swb_addr) |
2'b00: begin |
if ((i_swb_data[27:16] == 12'hfed) |
if ((i_swb_data[31:16] == 16'h0fed) |
&&(cfg_len_nonzero)) |
dma_state <= `DMA_WAIT; |
cfg_blocklen_sub_one |
221,6 → 221,8
nread <= 0; |
if (abort) |
dma_state <= `DMA_IDLE; |
else if (user_halt) |
dma_state <= `DMA_IDLE; |
else if (trigger) |
dma_state <= `DMA_READ_REQ; |
end |
240,11 → 242,14
+ {{(AW-1){1'b0}},1'b1}; |
end |
|
if (user_halt) |
dma_state <= `DMA_READ_ACK; |
if (i_mwb_err) |
begin |
cfg_len <= 0; |
dma_state <= `DMA_IDLE; |
end |
|
if (abort) |
dma_state <= `DMA_IDLE; |
if (i_mwb_ack) |
266,6 → 271,8
nread <= nread+1; |
if (last_read_ack) // (nread+1 == nracks) |
dma_state <= `DMA_PRE_WRITE; |
if (user_halt) |
dma_state <= `DMA_IDLE; |
if (cfg_incs) |
cfg_raddr <= cfg_raddr |
+ {{(AW-1){1'b0}},1'b1}; |
303,6 → 310,8
nwacks <= nwacks+1; |
cfg_len <= cfg_len +{(AW){1'b1}}; // -1 |
end |
if (user_halt) |
dma_state <= `DMA_WRITE_ACK; |
if (abort) |
dma_state <= `DMA_IDLE; |
end |
466,5 → 475,12
&&(i_swb_addr == 2'b00) |
&&(i_swb_data == 32'hffed0000)); |
|
initial user_halt = 1'b0; |
always @(posedge i_clk) |
user_halt <= ((user_halt)&&(dma_state != `DMA_IDLE)) |
||((i_swb_stb)&&(i_swb_we)&&(dma_state != `DMA_IDLE) |
&&(i_swb_addr == 2'b00) |
&&(i_swb_data == 32'hafed0000)); |
|
endmodule |
|
/xulalx25soc/trunk/rtl/cpu/zipbones.v
45,11 → 45,11
i_dbg_cyc, i_dbg_stb, i_dbg_we, i_dbg_addr, i_dbg_data, |
o_dbg_ack, o_dbg_stall, o_dbg_data |
`ifdef DEBUG_SCOPE |
, o_cpu_debug |
, o_zip_debug |
`endif |
); |
parameter RESET_ADDRESS=32'h0100000, ADDRESS_WIDTH=32, |
LGICACHE=6, START_HALTED=0, |
LGICACHE=8, START_HALTED=0, |
AW=ADDRESS_WIDTH; |
input i_clk, i_rst; |
// Wishbone master |
71,7 → 71,7
output wire [31:0] o_dbg_data; |
// |
`ifdef DEBUG_SCOPE |
output wire [31:0] o_cpu_debug; |
output wire [31:0] o_zip_debug; |
`endif |
|
// |
119,8 → 119,12
|
initial cmd_clear_pf_cache = 1'b0; |
always @(posedge i_clk) |
cmd_clear_pf_cache = (~i_rst)&&(dbg_cmd_write) |
&&((i_dbg_data[11])||(i_dbg_data[6])); |
if (i_rst) |
cmd_clear_pf_cache <= 1'b0; |
else if (dbg_cmd_write) |
cmd_clear_pf_cache <= i_dbg_data[11]; |
else |
cmd_clear_pf_cache <= 1'b0; |
// |
initial cmd_step = 1'b0; |
always @(posedge i_clk) |
175,7 → 179,7
(i_wb_err)||(cpu_lcl_cyc), |
cpu_op_stall, cpu_pf_stall, cpu_i_count |
`ifdef DEBUG_SCOPE |
, o_cpu_debug |
, o_zip_debug |
`endif |
); |
|
184,7 → 188,7
initial o_dbg_ack = 1'b0; |
always @(posedge i_clk) |
o_dbg_ack <= (i_dbg_cyc)&&((~i_dbg_addr)||(~o_dbg_stall)); |
assign o_dbg_stall= 1'b0; //(i_dbg_cyc)&&(cpu_dbg_stall)&&(i_dbg_addr); |
assign o_dbg_stall=(i_dbg_cyc)&&(cpu_dbg_stall)&&(i_dbg_addr); |
|
assign o_ext_int = (cmd_halt) && (~i_wb_stall); |
|
/xulalx25soc/trunk/rtl/cpu/zipcpu.v
106,8 → 106,8
// |
`define CPU_CC_REG 4'he |
`define CPU_PC_REG 4'hf |
`define CPU_CLRCACHE_BIT 14 // Floating point error flag, set on error |
`define CPU_PHASE_BIT 13 // Floating point error flag, set on error |
`define CPU_CLRCACHE_BIT 14 // Set to clear the I-cache, automatically clears |
`define CPU_PHASE_BIT 13 // Set if we are executing the latter half of a VLIW |
`define CPU_FPUERR_BIT 12 // Floating point error flag, set on error |
`define CPU_DIVERR_BIT 11 // Divide error flag, set on divide by zero |
`define CPU_BUSERR_BIT 10 // Bus error flag, set on error |
139,7 → 139,7
, o_debug |
`endif |
); |
parameter RESET_ADDRESS=32'h0100000, ADDRESS_WIDTH=24, |
parameter RESET_ADDRESS=32'h0100000, ADDRESS_WIDTH=32, |
LGICACHE=8; |
`ifdef OPT_MULTIPLY |
parameter IMPLEMENT_MPY = `OPT_MULTIPLY; |
162,7 → 162,7
`else |
parameter EARLY_BRANCHING = 0; |
`endif |
parameter AW=ADDRESS_WIDTH; |
localparam AW=ADDRESS_WIDTH; |
input i_clk, i_rst, i_interrupt; |
// Debug interface -- inputs |
input i_halt, i_clear_pf_cache; |
325,7 → 325,7
wire alu_valid, alu_busy; |
wire set_cond; |
reg alu_wr, alF_wr; |
wire alu_gie, alu_illegal_op, alu_illegal; |
wire alu_gie, alu_illegal; |
|
|
|
476,7 → 476,7
`ifdef OPT_PIPELINED |
assign alu_stall = (((~master_ce)||(mem_rdbusy)||(alu_busy))&&(opvalid_alu)) //Case 1&2 |
||((opvalid)&&(op_lock)&&(op_lock_stall)) |
||((opvalid)&&(op_break)) |
||((opvalid)&&(op_break)) // || op_illegal |
||(wr_reg_ce)&&(wr_write_cc) |
||(div_busy)||(fpu_busy); |
assign alu_ce = (master_ce)&&(opvalid_alu)&&(~alu_stall) |
483,7 → 483,7
&&(~clear_pipeline); |
`else |
assign alu_stall = (opvalid_alu)&&((~master_ce)||(op_break)); |
assign alu_ce = (master_ce)&&((opvalid_alu)||(op_illegal))&&(~alu_stall)&&(~clear_pipeline); |
assign alu_ce = (master_ce)&&(opvalid_alu)&&(~alu_stall)&&(~clear_pipeline); |
`endif |
// |
|
607,8 → 607,6
assign dcdvalid = r_dcdvalid; |
`endif |
|
`ifdef OPT_NEW_INSTRUCTION_SET |
|
// If not pipelined, there will be no opvalid_ anything, and the |
idecode #(AW, IMPLEMENT_MPY, EARLY_BRANCHING, IMPLEMENT_DIVIDE, |
IMPLEMENT_FPU) |
625,25 → 623,6
dcd_early_branch, |
dcd_branch_pc, dcd_ljmp, |
dcd_pipe); |
`else |
idecode_deprecated |
#(AW, IMPLEMENT_MPY, EARLY_BRANCHING, IMPLEMENT_DIVIDE, |
IMPLEMENT_FPU) |
instruction_decoder(i_clk, (i_rst)||(clear_pipeline), |
dcd_ce, dcd_stalled, instruction, instruction_gie, |
instruction_pc, pf_valid, pf_illegal, dcd_phase, |
dcd_illegal, dcd_pc, dcd_gie, |
{ dcdR_cc, dcdR_pc, dcdR }, |
{ dcdA_cc, dcdA_pc, dcdA }, |
{ dcdB_cc, dcdB_pc, dcdB }, |
dcdI, dcd_zI, dcdF, dcdF_wr, dcdOp, |
dcdALU, dcdM, dcdDV, dcdFP, dcd_break, dcd_lock, |
dcdR_wr,dcdA_rd, dcdB_rd, |
dcd_early_branch, |
dcd_branch_pc, |
dcd_pipe); |
assign dcd_ljmp = 1'b0; |
`endif |
|
`ifdef OPT_PIPELINED_BUS_ACCESS |
reg r_op_pipe; |
819,7 → 798,6
begin // Set the flag condition codes, bit order is [3:0]=VNCZ |
case(dcdF[2:0]) |
3'h0: r_opF <= 6'h00; // Always |
`ifdef OPT_NEW_INSTRUCTION_SET |
// These were remapped as part of the new instruction |
// set in order to make certain that the low order |
// two bits contained the most commonly used |
829,13 → 807,6
3'h3: r_opF <= 6'h10; // NE |
3'h4: r_opF <= 6'h30; // GT (!N&!Z) |
3'h5: r_opF <= 6'h20; // GE (!N) |
`else |
3'h1: r_opF <= 6'h11; // Z |
3'h2: r_opF <= 6'h10; // NE |
3'h3: r_opF <= 6'h20; // GE (!N) |
3'h4: r_opF <= 6'h30; // GT (!N&!Z) |
3'h5: r_opF <= 6'h24; // LT |
`endif |
3'h6: r_opF <= 6'h02; // C |
3'h7: r_opF <= 6'h08; // V |
endcase |
850,7 → 821,7
initial opvalid_div = 1'b0; |
initial opvalid_fpu = 1'b0; |
always @(posedge i_clk) |
if (i_rst) |
if ((i_rst)||(clear_pipeline)) |
begin |
opvalid <= 1'b0; |
opvalid_alu <= 1'b0; |
879,7 → 850,7
opvalid_div <= (dcdDV)&&(w_opvalid); |
opvalid_fpu <= (dcdFP)&&(w_opvalid); |
`endif |
end else if ((clear_pipeline)||(adf_ce_unconditional)||(mem_ce)) |
end else if ((adf_ce_unconditional)||(mem_ce)) |
begin |
opvalid <= 1'b0; |
opvalid_alu <= 1'b0; |
903,7 → 874,7
initial r_op_break = 1'b0; |
always @(posedge i_clk) |
if (i_rst) r_op_break <= 1'b0; |
else if (op_ce) r_op_break <= (dcd_break); // &&(dcdvalid) |
else if (op_ce) r_op_break <= (dcd_break); |
else if ((clear_pipeline)||(~opvalid)) |
r_op_break <= 1'b0; |
assign op_break = r_op_break; |
1132,17 → 1103,9
// PIPELINE STAGE #4 :: Apply Instruction |
// |
// |
`ifdef OPT_NEW_INSTRUCTION_SET |
cpuops #(IMPLEMENT_MPY) doalu(i_clk, i_rst, alu_ce, |
(opvalid_alu), opn, opA, opB, |
alu_result, alu_flags, alu_valid, alu_illegal_op, |
alu_busy); |
`else |
cpuops_deprecated #(IMPLEMENT_MPY) doalu(i_clk, i_rst, alu_ce, |
(opvalid_alu), opn, opA, opB, |
alu_result, alu_flags, alu_valid, alu_illegal_op); |
assign alu_busy = 1'b0; |
`endif |
cpuops #(IMPLEMENT_MPY) doalu(i_clk, (i_rst)||(clear_pipeline), |
alu_ce, opn, opA, opB, |
alu_result, alu_flags, alu_valid, alu_busy); |
|
generate |
if (IMPLEMENT_DIVIDE != 0) |
1267,7 → 1230,7
r_alu_illegal <= op_illegal; |
else |
r_alu_illegal <= 1'b0; |
assign alu_illegal = (alu_illegal_op)||(r_alu_illegal); |
assign alu_illegal = (r_alu_illegal); |
`else |
assign alu_illegal = 1'b0; |
`endif |
1469,7 → 1432,7
if ((i_rst)||(clear_pipeline)||(~opvalid)) |
r_break_pending <= 1'b0; |
else if (op_break) |
r_break_pending <= (~alu_busy)&&(~div_busy)&&(~fpu_busy)&&(~mem_busy); |
r_break_pending <= (~alu_busy)&&(~div_busy)&&(~fpu_busy)&&(~mem_busy)&&(!wr_reg_ce); |
else |
r_break_pending <= 1'b0; |
assign break_pending = r_break_pending; |
1483,7 → 1446,7
||((~alu_gie)&&(bus_err)) |
||((~alu_gie)&&(div_error)) |
||((~alu_gie)&&(fpu_error)) |
||((~alu_gie)&&(alu_illegal)); |
||((~alu_gie)&&(alu_illegal)&&(!clear_pipeline)); |
|
// The sleep register. Setting the sleep register causes the CPU to |
// sleep until the next interrupt. Setting the sleep register within |
1513,12 → 1476,10
sleep <= wr_spreg_vl[`CPU_SLEEP_BIT]; |
|
always @(posedge i_clk) |
if ((i_rst)||(w_switch_to_interrupt)) |
if (i_rst) |
step <= 1'b0; |
else if ((wr_reg_ce)&&(~alu_gie)&&(wr_write_ucc)) |
step <= wr_spreg_vl[`CPU_STEP_BIT]; |
else if (((alu_pc_valid)||(mem_pc_valid))&&(step)&&(gie)) |
step <= 1'b0; |
|
// The GIE register. Only interrupts can disable the interrupt register |
assign w_switch_to_interrupt = (gie)&&( |
1531,7 → 1492,7
||((master_ce)&&(break_pending)&&(~break_en)) |
`ifdef OPT_ILLEGAL_INSTRUCTION |
// On an illegal instruction |
||(alu_illegal) |
||((alu_illegal)&&(!clear_pipeline)) |
`endif |
// On division by zero. If the divide isn't |
// implemented, div_valid and div_error will be short |
1588,7 → 1549,7
// Only the debug interface can clear this bit |
else if ((dbgv)&&(wr_write_scc)) |
ill_err_i <= (ill_err_i)&&(wr_spreg_vl[`CPU_ILL_BIT]); |
else if ((alu_illegal)&&(~alu_gie)) |
else if ((alu_illegal)&&(~alu_gie)&&(!clear_pipeline)) |
ill_err_i <= 1'b1; |
initial ill_err_u = 1'b0; |
always @(posedge i_clk) |
1600,7 → 1561,7
// clearing the bit, then clear it |
else if (((~alu_gie)||(dbgv))&&(wr_reg_ce)&&(wr_write_ucc)) |
ill_err_u <=((ill_err_u)&&(wr_spreg_vl[`CPU_ILL_BIT])); |
else if ((alu_illegal)&&(alu_gie)) |
else if ((alu_illegal)&&(alu_gie)&&(!clear_pipeline)) |
ill_err_u <= 1'b1; |
`else |
assign ill_err_u = 1'b0; |
1734,7 → 1695,7
if ((wr_reg_ce)&&(wr_reg_id[4])&&(wr_write_pc)) |
upc <= wr_spreg_vl[(AW-1):0]; |
else if ((alu_gie)&& |
(((alu_pc_valid)&&(~clear_pipeline)) |
(((alu_pc_valid)&&(~clear_pipeline)&&(!alu_illegal)) |
||(mem_pc_valid))) |
upc <= alu_pc; |
|
1744,7 → 1705,7
else if ((wr_reg_ce)&&(~wr_reg_id[4])&&(wr_write_pc)) |
ipc <= wr_spreg_vl[(AW-1):0]; |
else if ((~alu_gie)&& |
(((alu_pc_valid)&&(~clear_pipeline)) |
(((alu_pc_valid)&&(~clear_pipeline)&&(!alu_illegal)) |
||(mem_pc_valid))) |
ipc <= alu_pc; |
|
/xulalx25soc/trunk/rtl/cpu/zipsystem.v
170,7 → 170,7
, o_cpu_debug |
`endif |
); |
parameter RESET_ADDRESS=24'h0100000, ADDRESS_WIDTH=24, |
parameter RESET_ADDRESS=32'h0100000, ADDRESS_WIDTH=32, |
LGICACHE=10, START_HALTED=1, EXTERNAL_INTERRUPTS=1, |
`ifdef OPT_MULTIPLY |
IMPLEMENT_MPY = `OPT_MULTIPLY, |
187,8 → 187,8
`else |
IMPLEMENT_FPU=0, |
`endif |
IMPLEMENT_LOCK=1, |
// Derived parameters |
IMPLEMENT_LOCK=1; |
localparam // Derived parameters |
AW=ADDRESS_WIDTH; |
input i_clk, i_rst; |
// Wishbone master |
523,6 → 523,9
wire [31:0] dc_data; |
wire [(AW-1):0] dc_addr; |
wire cpu_gbl_cyc; |
wire [31:0] dmac_int_vec; |
assign dmac_int_vec = { 1'b0, alt_int_vector, 1'b0, |
main_int_vector[14:1], 1'b0 }; |
assign dmac_stb = (sys_stb)&&(sys_addr[4]); |
`ifdef INCLUDE_DMA_CONTROLLER |
wbdmac #(AW) dma_controller(i_clk, cpu_reset, |
533,8 → 536,7
dc_cyc, dc_stb, dc_we, dc_addr, dc_data, |
dc_ack, dc_stall, ext_idata, dc_err, |
// External device interrupts |
{ 1'b0, alt_int_vector, 1'b0, |
main_int_vector[14:1], 1'b0 }, |
dmac_int_vec, |
// DMAC interrupt, for upon completion |
dmac_int); |
`else |
782,7 → 784,7
|
assign sys_stall = (tma_stall | tmb_stall | tmc_stall | jif_stall |
| wdt_stall | ctri_stall | actr_stall |
| pic_stall | dmac_stall); |
| pic_stall | dmac_stall); // Always 1'b0! |
assign cpu_stall = (sys_stall)|(cpu_ext_stall); |
assign sys_ack = (tmr_ack|wdt_ack|ctri_ack|actr_ack|pic_ack|dmac_ack|wdbus_ack); |
assign cpu_ack = (sys_ack)||(cpu_ext_ack); |
/xulalx25soc/trunk/sw/zipstate.cpp
15,7 → 15,7
// |
//////////////////////////////////////////////////////////////////////////////// |
// |
// Copyright (C) 2015, Gisselquist Technology, LLC |
// Copyright (C) 2015-2016, Gisselquist Technology, LLC |
// |
// This program is free software (firmware): you can redistribute it and/or |
// modify it under the terms of the GNU General Public License as published |
27,6 → 27,11
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
// for more details. |
// |
// You should have received a copy of the GNU General Public License along |
// with this program. (It's in the $(ROOT)/doc directory, run make with no |
// target there if the PDF file isn't present.) If not, see |
// <http://www.gnu.org/licenses/> for a copy. |
// |
// License: GPL, v3, as defined and found on www.gnu.org, |
// http://www.gnu.org/licenses/gpl.html |
// |
67,7 → 72,7
printf("ERR: errcount(%d) >= MAXERR on cmd_read(a=%02x)\n", |
errcount, r); |
printf("ZIPCTRL = 0x%08x", s); |
if ((s & 0x0200)==0) printf(" STALL"); |
if ((s & 0x0200)==0) printf(" BUSY"); |
if (s & 0x0400) printf(" HALTED"); |
if ((s & 0x03000)==0x01000) |
printf(" SW-HALT"); |
116,7 → 121,7
printf("0x%08x: ", v); |
if (v & 0x0080) printf("PINT "); |
// if (v & 0x0100) printf("STEP "); // self resetting |
if((v & 0x00200)==0) printf("STALL "); |
if((v & 0x00200)==0) printf("BUSY "); |
if (v & 0x00400) printf("HALTED "); |
if((v & 0x03000)==0x01000) { |
printf("SW-HALT"); |
/xulalx25soc/trunk/xilinx/toplevel.bit
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream
/xulalx25soc/trunk/xula.ucf
140,7 → 140,21
############################## |
NET "i_clk_12mhz" TNM_NET = "i_clk_12mhz"; |
NET "i_ram_feedback_clk" TNM_NET = "i_ram_feedback_clk"; |
TIMESPEC "TSi_clk_12mhz" = PERIOD "i_clk_12mhz" 83.0 ns HIGH 50%; |
|
# |
# TimeSpec. The source clock to the XuLA2-LX25 and LX9 boards is a 12MHz |
# crystal oscillator. 12MHz corresponds to an 83.3333ns clocks---which would |
# be the line below: |
# |
# (Please leave this commented ... I'll explain ...) |
# TIMESPEC "TSi_clk_12mhz" = PERIOD "i_clk_12mhz" 83.333333 ns HIGH 50%; |
# TIMESPEC "TSi_ram_feedback_clk" = PERIOD "i_ram_feedback_clk" 10.0 ns HIGH 50%; |
# |
# However, ISE struggles to meet timing with this design. By slightly |
# adjusting the input clock speed faster in the hundreds of picoseconds range, |
# I can create timing closure. At one time, someone explained to me that I was |
# really just forcing the XISE to use a different random seed for starting. |
# This may well be the case, but ... it's worked for me so far. |
TIMESPEC "TSi_clk_12mhz" = PERIOD "i_clk_12mhz" 83.1 ns HIGH 50%; |
# |
# The following line is included for completeness. It is not used. |
TIMESPEC "TSi_ram_feedback_clk" = PERIOD "i_ram_feedback_clk" 11.3 ns HIGH 50%; |