URL
https://opencores.org/ocsvn/xulalx25soc/xulalx25soc/trunk
Subversion Repositories xulalx25soc
Compare Revisions
- This comparison shows the changes necessary to convert path
/xulalx25soc
- from Rev 51 to Rev 52
- ↔ Reverse comparison
Rev 51 → Rev 52
/trunk/rtl/cpu/cpudefs.v
1,4 → 1,4
`define XULA25 |
//`define XULA25 |
/////////////////////////////////////////////////////////////////////////////// |
// |
// Filename: cpudefs.v |
78,7 → 78,7
// instruction that will then trip the illegal instruction trap. |
// |
// |
`define OPT_MULTIPLY |
`define OPT_MULTIPLY 2 |
// |
// |
// |
/trunk/rtl/cpu/idecode.v
75,7 → 75,8
output reg [3:0] o_cond; |
output reg o_wF; |
output reg [3:0] o_op; |
output reg o_ALU, o_M, o_DV, o_FP, o_break, o_lock; |
output reg o_ALU, o_M, o_DV, o_FP, o_break; |
output wire o_lock; |
output reg o_wR, o_rA, o_rB; |
output wire o_early_branch; |
output wire [(AW-1):0] o_branch_pc; |
86,10 → 87,13
wire o_dcd_early_branch; |
wire [(AW-1):0] o_dcd_branch_pc; |
reg o_dcdI, o_dcdIz; |
`ifdef OPT_PIPELINED |
reg r_lock; |
`endif |
|
|
wire [4:0] w_op; |
wire w_ldi, w_mov, w_cmptst, w_ldixx, w_ALU, w_brev; |
wire w_ldi, w_mov, w_cmptst, w_ldilo, w_ALU, w_brev; |
wire [4:0] w_dcdR, w_dcdB, w_dcdA; |
wire w_dcdR_pc, w_dcdR_cc; |
wire w_dcdA_pc, w_dcdA_cc; |
125,7 → 129,7
assign w_ldi = (w_op[4:1] == 4'hb); |
assign w_brev = (w_op == 5'hc); |
assign w_cmptst = (w_op[4:1] == 4'h8); |
assign w_ldixx = (w_op[4:1] == 4'h4); |
assign w_ldilo = (w_op[4:0] == 5'h9); |
assign w_ALU = (~w_op[4]); |
|
// 4 LUTs |
177,7 → 181,7
||(w_op[4:1]== 4'h8); |
// 1 LUTs -- do we read a register for operand B? Specifically, do |
// we need to stall if the register is not (yet) ready? |
assign w_rB = (w_mov)||((iword[18])&&((~w_ldi)&&(~w_ldixx))); |
assign w_rB = (w_mov)||((iword[18])&&(~w_ldi)); |
// 1 LUT: All but STO, NOOP/BREAK/LOCK, and CMP/TST write back to w_dcdR |
assign w_wR_n = ((w_dcdM)&&(w_op[0])) |
||((w_op[4:3]==2'b11)&&(w_dcdR[3:1]==3'h7)) |
190,7 → 194,7
// and writes to the PC/CC register(s). |
assign w_wF = (w_cmptst) |
||((w_cond[3])&&((w_dcdFP)||(w_dcdDV) |
||((w_ALU)&&(~w_mov)&&(~w_ldixx)&&(~w_brev) |
||((w_ALU)&&(~w_mov)&&(~w_ldilo)&&(~w_brev) |
&&(iword[30:28] != 3'h7)))); |
|
// Bottom 13 bits: no LUT's |
273,8 → 277,10
|
if ((w_op[4:3]==2'b11)&&(w_dcdR[3:1]==3'h7) |
&&( |
(w_op[2:0] != 3'h2) // LOCK |
&&(w_op[2:0] != 3'h1) // BREAK |
(w_op[2:0] != 3'h1) // BREAK |
`ifdef OPT_PIPELINED |
&&(w_op[2:0] != 3'h2) // LOCK |
`endif |
&&(w_op[2:0] != 3'h0))) // NOOP |
o_illegal <= 1'b1; |
end |
333,7 → 339,9
o_FP <= w_dcdFP; |
|
o_break <= (w_op[4:3]==2'b11)&&(w_dcdR[3:1]==3'h7)&&(w_op[2:0]==3'b001); |
o_lock <= (w_op[4:3]==2'b11)&&(w_dcdR[3:1]==3'h7)&&(w_op[2:0]==3'b010); |
`ifdef OPT_PIPELINED |
r_lock <= (w_op[4:3]==2'b11)&&(w_dcdR[3:1]==3'h7)&&(w_op[2:0]==3'b010); |
`endif |
`ifdef OPT_VLIW |
r_nxt_half <= { iword[31], iword[13:5], |
((iword[21])? iword[20:19] : 2'h0), |
341,6 → 349,12
`endif |
end |
|
`ifdef OPT_PIPELINED |
assign o_lock = r_lock; |
`else |
assign o_lock = 1'b0; |
`endif |
|
generate |
if (EARLY_BRANCHING!=0) |
begin |
/trunk/rtl/cpu/pipemem.v
68,6 → 68,7
input i_wb_ack, i_wb_stall, i_wb_err; |
input [31:0] i_wb_data; |
|
reg cyc; |
reg r_wb_cyc_gbl, r_wb_cyc_lcl; |
reg [3:0] rdaddr, wraddr; |
wire [3:0] nxt_rdaddr; |
88,7 → 89,6
rdaddr <= rdaddr + 4'h1; |
assign nxt_rdaddr = rdaddr + 4'h1; |
|
reg cyc; |
wire gbl_stb, lcl_stb; |
assign lcl_stb = (i_addr[31:8]==24'hc00000)&&(i_addr[7:5]==3'h0); |
assign gbl_stb = (~lcl_stb); |
/trunk/rtl/cpu/wbdmac.v
193,7 → 193,7
end else if (i_mwb_ack) |
begin |
nacks <= nacks+1; |
cfg_len <= cfg_len - {{(AW-1){1'b0}},1'b1}; |
cfg_len <= cfg_len - 1; |
if ((nacks+1 == nwritten)&&(~o_mwb_stb)) |
begin |
o_mwb_cyc <= 1'b0; |
/trunk/rtl/cpu/zipsystem.v
364,19 → 364,7
wire cpu_gie; |
assign cpu_gie = cpu_dbg_cc[1]; |
|
`ifdef USE_TRAP |
// |
// The TRAP peripheral |
// |
wire trap_ack, trap_stall, trap_int; |
wire [31:0] trap_data; |
ziptrap trapp(i_clk, |
sys_cyc, (sys_stb)&&(sys_addr == `TRAP_ADDR), sys_we, |
sys_data, |
trap_ack, trap_stall, trap_data, trap_int); |
`endif |
|
// |
// The WATCHDOG Timer |
// |
wire wdt_ack, wdt_stall, wdt_reset; |
/trunk/rtl/cpu/cpuops.v
32,6 → 32,7
// |
/////////////////////////////////////////////////////////////////////////// |
// |
// `define LONG_MPY |
module cpuops(i_clk,i_rst, i_ce, i_valid, i_op, i_a, i_b, o_c, o_f, o_valid, |
o_illegal, o_busy); |
parameter IMPLEMENT_MPY = 1; |
90,6 → 91,10
||(i_op == 4'h6) // LSL |
||(i_op == 4'h5)); // LSR |
|
`ifdef LONG_MPY |
reg mpyhi; |
wire mpybusy; |
`endif |
|
// A 4-way multiplexer can be done in one 6-LUT. |
// A 16-way multiplexer can therefore be done in 4x 6-LUT's with |
96,7 → 101,6
// the Xilinx multiplexer fabric that follows. |
// Given that we wish to apply this multiplexer approach to 33-bits, |
// this will cost a minimum of 132 6-LUTs. |
wire w_illegal; |
generate |
if (IMPLEMENT_MPY == 0) |
begin |
114,7 → 118,9
4'b0101:{o_c,c } <= w_lsr_result[32:0]; // LSR |
4'b0110:{c,o_c } <= (|i_b[31:5])? 33'h00 : {1'b0, i_a } << i_b[4:0]; // LSL |
4'b0111:{o_c,c } <= w_asr_result[32:0]; // ASR |
`ifndef LONG_MPY |
4'b1000: o_c <= { i_b[15: 0], i_a[15:0] }; // LODIHI |
`endif |
4'b1001: o_c <= { i_a[31:16], i_b[15:0] }; // LODILO |
// 4'h1010: The unimplemented MPYU, |
// 4'h1011: and here for the unimplemented MPYS |
127,15 → 133,143
|
assign o_busy = 1'b0; |
|
assign w_illegal = (i_ce)&&((i_op == 4'h3)||(i_op == 4'h4)); |
reg r_illegal; |
always @(posedge i_clk) |
r_illegal <= w_illegal; |
r_illegal <= (i_ce)&&((i_op == 4'ha)||(i_op == 4'hb) |
`ifdef LONG_MPY |
||(i_op == 4'h8) |
`endif |
); |
assign o_illegal = r_illegal; |
end else begin |
// |
// Multiply pre-logic |
// |
`ifdef LONG_MPY |
reg [63:0] r_mpy_result; |
if (IMPLEMENT_MPY == 1) |
begin // Our two clock option (one clock extra) |
reg signed [64:0] r_mpy_a_input, r_mpy_b_input; |
reg mpypipe, x; |
initial mpypipe = 1'b0; |
always @(posedge i_clk) |
mpypipe <= (i_ce)&&((i_op[3:1]==3'h5)||(i_op[3:0]==4'h8)); |
always @(posedge i_clk) |
if (i_ce) |
begin |
r_mpy_a_input <= {{(33){(i_a[31])&(i_op[0])}}, |
i_a[31:0]}; |
r_mpy_b_input <= {{(33){(i_b[31])&(i_op[0])}}, |
i_b[31:0]}; |
end |
always @(posedge i_clk) |
if (mpypipe) |
{x, r_mpy_result} = r_mpy_a_input |
* r_mpy_b_input; |
always @(posedge i_clk) |
if (i_ce) |
mpyhi = i_op[1]; |
assign mpybusy = mpypipe; |
end else if (IMPLEMENT_MPY == 2) |
begin // The three clock option |
reg [31:0] r_mpy_a_input, r_mpy_b_input; |
reg r_mpy_signed; |
reg [1:0] mpypipe; |
|
// First clock, latch in the inputs |
always @(posedge i_clk) |
begin |
// mpypipe indicates we have a multiply in the |
// pipeline. In this case, the multiply |
// pipeline is a two stage pipeline, so we need |
// two bits in the pipe. |
mpypipe[0] <= (i_ce)&&((i_op[3:1]==3'h5) |
||(i_op[3:0]==4'h8)); |
mpypipe[1] <= mpypipe[0]; |
|
if (i_op[0]) // i.e. if signed multiply |
begin |
r_mpy_a_input <= {(~i_a[31]),i_a[30:0]}; |
r_mpy_b_input <= {(~i_b[31]),i_b[30:0]}; |
end else begin |
r_mpy_a_input <= i_a[31:0]; |
r_mpy_b_input <= i_b[31:0]; |
end |
// The signed bit really only matters in the |
// case of 64 bit multiply. We'll keep track |
// of it, though, and pretend in all other |
// cases. |
r_mpy_signed <= i_op[0]; |
|
if (i_ce) |
mpyhi = i_op[1]; |
end |
|
assign mpybusy = |mpypipe; |
|
// Second clock, do the multiplies, get the "partial |
// products". Here, we break our input up into two |
// halves, |
// |
// A = (2^16 ah + al) |
// B = (2^16 bh + bl) |
// |
// and use these to compute partial products. |
// |
// AB = (2^32 ah*bh + 2^16 (ah*bl + al*bh) + (al*bl) |
// |
// Since we're following the FOIL algorithm to get here, |
// we'll name these partial products according to FOIL. |
// |
// The trick is what happens if A or B is signed. In |
// those cases, the real value of A will not be given by |
// A = (2^16 ah + al) |
// but rather |
// A = (2^16 ah[31^] + al) - 2^31 |
// (where we have flipped the sign bit of A) |
// and so ... |
// |
// AB= (2^16 ah + al - 2^31) * (2^16 bh + bl - 2^31) |
// = 2^32(ah*bh) |
// +2^16 (ah*bl+al*bh) |
// +(al*bl) |
// - 2^31 (2^16 bh+bl + 2^16 ah+al) |
// - 2^62 |
// = 2^32(ah*bh) |
// +2^16 (ah*bl+al*bh) |
// +(al*bl) |
// - 2^31 (2^16 bh+bl + 2^16 ah+al + 2^31) |
// |
reg [31:0] pp_f, pp_o, pp_i, pp_l; |
reg [32:0] pp_s; |
always @(posedge i_clk) |
begin |
pp_f<=r_mpy_a_input[31:16]*r_mpy_b_input[31:16]; |
pp_o<=r_mpy_a_input[31:16]*r_mpy_b_input[15: 0]; |
pp_i<=r_mpy_a_input[15: 0]*r_mpy_b_input[31:16]; |
pp_l<=r_mpy_a_input[15: 0]*r_mpy_b_input[15: 0]; |
// And a special one for the sign |
if (r_mpy_signed) |
pp_s <= 32'h8000_0000-( |
r_mpy_a_input[31:0] |
+ r_mpy_b_input[31:0]); |
else |
pp_s <= 33'h0; |
end |
|
// Third clock, add the results and produce a product |
always @(posedge i_clk) |
begin |
r_mpy_result[15:0] <= pp_l[15:0]; |
r_mpy_result[63:16] <= |
{ 32'h00, pp_l[31:16] } |
+ { 16'h00, pp_o } |
+ { 16'h00, pp_i } |
+ { pp_s, 15'h00 } |
+ { pp_f, 16'h00 }; |
end |
end // Fourth clock -- results are available for writeback. |
`else |
wire signed [16:0] w_mpy_a_input, w_mpy_b_input; |
wire [33:0] w_mpy_result; |
reg [31:0] r_mpy_result; |
145,6 → 279,7
always @(posedge i_clk) |
if (i_ce) |
r_mpy_result = w_mpy_result[31:0]; |
`endif |
|
// |
// The master ALU case statement |
163,10 → 298,19
4'b0101:{o_c,c } <= w_lsr_result[32:0]; // LSR |
4'b0110:{c,o_c } <= (|i_b[31:5])? 33'h00 : {1'b0, i_a } << i_b[4:0]; // LSL |
4'b0111:{o_c,c } <= w_asr_result[32:0]; // ASR |
`ifdef LONG_MPY |
4'b1000: o_c <= r_mpy_result[31:0]; // MPY |
`else |
4'b1000: o_c <= { i_b[15: 0], i_a[15:0] }; // LODIHI |
`endif |
4'b1001: o_c <= { i_a[31:16], i_b[15:0] }; // LODILO |
`ifdef LONG_MPY |
4'b1010: o_c <= r_mpy_result[63:32]; // MPYHU |
4'b1011: o_c <= r_mpy_result[63:32]; // MPYHS |
`else |
4'b1010: o_c <= r_mpy_result; // MPYU |
4'b1011: o_c <= r_mpy_result; // MPYS |
`endif |
4'b1100: o_c <= w_brev_result; // BREV |
4'b1101: o_c <= w_popc_result; // POPC |
4'b1110: o_c <= w_rol_result; // ROL |
173,17 → 317,25
default: o_c <= i_b; // MOV, LDI |
endcase |
end else if (r_busy) |
`ifdef LONG_MPY |
o_c <= (mpyhi)?r_mpy_result[63:32]:r_mpy_result[31:0]; |
`else |
o_c <= r_mpy_result; |
`endif |
|
reg r_busy; |
initial r_busy = 1'b0; |
always @(posedge i_clk) |
r_busy <= (~i_rst)&&(i_ce)&&(i_valid) |
`ifdef LONG_MPY |
&&((i_op[3:1] == 3'h5) |
||(i_op[3:0] == 4'h8))||mpybusy; |
`else |
&&(i_op[3:1] == 3'h5); |
`endif |
|
assign o_busy = r_busy; |
|
assign w_illegal = 1'b0; |
assign o_illegal = 1'b0; |
end endgenerate |
|
198,6 → 350,11
if (i_rst) |
o_valid <= 1'b0; |
else |
o_valid <= (i_ce)&&(i_valid)&&(i_op[3:1] != 3'h5)&&(~w_illegal) |
||(o_busy); |
o_valid <= (i_ce)&&(i_valid) |
`ifdef LONG_MPY |
&&(i_op[3:1] != 3'h5)&&(i_op[3:0] != 4'h8) |
||(o_busy)&&(~mpybusy); |
`else |
&&(i_op[3:1] != 3'h5)||(o_busy); |
`endif |
endmodule |
/trunk/rtl/cpu/zipcpu.v
137,7 → 137,7
parameter RESET_ADDRESS=32'h0100000, ADDRESS_WIDTH=24, |
LGICACHE=6; |
`ifdef OPT_MULTIPLY |
parameter IMPLEMENT_MPY = 1; |
parameter IMPLEMENT_MPY = `OPT_MULTIPLY; |
`else |
parameter IMPLEMENT_MPY = 0; |
`endif |
869,10 → 869,8
always @(posedge i_clk) |
if (i_rst) |
r_op_lock <= 1'b0; |
else if ((op_ce)&&(dcd_lock)) |
r_op_lock <= 1'b1; |
else if ((op_ce)||(clear_pipeline)) |
r_op_lock <= 1'b0; |
else if (op_ce) |
r_op_lock <= (dcd_lock)&&(~clear_pipeline); |
assign op_lock = r_op_lock; |
|
end else begin |
1180,16 → 1178,16
generate |
if (IMPLEMENT_LOCK != 0) |
begin |
reg r_bus_lock; |
initial r_bus_lock = 1'b0; |
reg [1:0] r_bus_lock; |
initial r_bus_lock = 2'b00; |
always @(posedge i_clk) |
if (i_rst) |
r_bus_lock <= 1'b0; |
r_bus_lock <= 2'b00; |
else if ((op_ce)&&(op_lock)) |
r_bus_lock <= 1'b1; |
else if (~opvalid_mem) |
r_bus_lock <= 1'b0; |
assign bus_lock = r_bus_lock; |
r_bus_lock <= 2'b11; |
else if ((|r_bus_lock)&&((~opvalid_mem)||(~op_ce))) |
r_bus_lock <= r_bus_lock + 2'b11; |
assign bus_lock = |r_bus_lock; |
end else begin |
assign bus_lock = 1'b0; |
end endgenerate |
1449,7 → 1447,7
always @(posedge i_clk) |
if (i_rst) |
ill_err_i <= 1'b0; |
// The debug interface can clear this bit |
// Only the debug interface can clear this bit |
else if ((dbgv)&&(wr_reg_id == {1'b0, `CPU_CC_REG}) |
&&(~wr_reg_vl[`CPU_ILL_BIT])) |
ill_err_i <= 1'b0; |
1709,8 → 1707,7
`ifdef DEBUG_SCOPE |
always @(posedge i_clk) |
o_debug <= { |
/* |
i_wb_err, pf_pc[2:0], |
o_break, i_wb_err, pf_pc[1:0], |
flags, |
pf_valid, dcdvalid, opvalid, alu_valid, mem_valid, |
op_ce, alu_ce, mem_ce, |
1724,7 → 1721,6
// ||((opvalid_mem)&&( op_pipe)&&(mem_pipe_stalled))); |
// opA[23:20], opA[3:0], |
gie, sleep, wr_reg_ce, wr_reg_vl[4:0] |
*/ |
/* |
i_rst, master_ce, (new_pc), |
((dcd_early_branch)&&(dcdvalid)), |
1743,6 → 1739,7
pf_valid, (pf_valid) ? alu_pc[14:0] |
:{ pf_cyc, pf_stb, pf_pc[12:0] } |
*/ |
/* |
i_wb_err, gie, new_pc, dcd_early_branch, // 4 |
pf_valid, pf_cyc, pf_stb, instruction_pc[0], // 4 |
instruction[30:27], // 4 |
1750,6 → 1747,7
dcdvalid, |
((dcd_early_branch)&&(~clear_pipeline)) // 15 |
? dcd_branch_pc[14:0]:pf_pc[14:0] |
*/ |
}; |
`endif |
|