URL
https://opencores.org/ocsvn/zipcpu/zipcpu/trunk
Subversion Repositories zipcpu
Compare Revisions
- This comparison shows the changes necessary to convert path
/zipcpu/trunk/rtl
- from Rev 184 to Rev 193
- ↔ Reverse comparison
Rev 184 → Rev 193
/Makefile
32,7 → 32,7
################################################################################ |
# |
.PHONY: all |
all: zipsystem zipbones cpudefs.h div |
all: zipsystem zipbones cpudefs.h div zipmmu cpuops |
|
CORED:= core |
PRPHD:= peripherals |
40,7 → 40,7
VSRC := zipsystem.v cpudefs.v \ |
$(PRPHD)/wbdmac.v $(PRPHD)/icontrol.v \ |
$(PRPHD)/zipcounter.v $(PRPHD)/zipjiffies.v \ |
$(PRPHD)/ziptimer.v $(PRPHD)/ziptrap.v \ |
$(PRPHD)/ziptimer.v \ |
$(CORED)/zipcpu.v $(CORED)/cpuops.v $(CORED)/idecode.v \ |
$(CORED)/pipefetch.v $(CORED)/prefetch.v \ |
$(CORED)/pfcache.v \ |
69,9 → 69,17
$(VOBJ)/Vzipbones.h: $(VOBJ)/Vzipbones.cpp |
|
$(VOBJ)/Vdiv.cpp: $(CORED)/div.v |
verilator -cc -y $(CORED) -y $(PRPHD) -y $(AUXD) $(CORED)/div.v |
verilator -cc -y $(CORED) $(CORED)/div.v |
$(VOBJ)/Vdiv.h: $(VOBJ)/Vdiv.cpp |
|
$(VOBJ)/Vcpuops.cpp: $(CORED)/cpuops.v cpudefs.v |
verilator -cc -y $(CORED) $(CORED)/cpuops.v |
$(VOBJ)/Vcpuops.h: $(VOBJ)/Vcpuops.cpp |
|
$(VOBJ)/Vzipmmu.cpp: $(PRPHD)/zipmmu.v |
verilator -cc -y $(PRPHD) $(PRPHD)/zipmmu.v |
$(VOBJ)/Vzipmmu.h: $(VOBJ)/Vzipmmu.cpp |
|
$(VOBJ)/Vzipsystem__ALL.a: $(VOBJ)/Vzipsystem.cpp $(VOBJ)/Vzipsystem.h |
cd $(VOBJ); make --no-print-directory -f Vzipsystem.mk |
|
81,6 → 89,15
$(VOBJ)/Vdiv__ALL.a: $(VOBJ)/Vdiv.cpp $(VOBJ)/Vdiv.h |
cd $(VOBJ); make --no-print-directory -f Vdiv.mk |
|
$(VOBJ)/Vcpuops__ALL.a: $(VOBJ)/Vcpuops.cpp $(VOBJ)/Vcpuops.h |
cd $(VOBJ); make --no-print-directory -f Vcpuops.mk |
|
$(VOBJ)/Vzipmmu__ALL.a: $(VOBJ)/Vzipmmu.cpp $(VOBJ)/Vzipmmu.h |
cd $(VOBJ); make --no-print-directory -f Vzipmmu.mk |
|
# $(VOBJ)/V%__ALL.a: $(VOBJ)/V%.cpp $(VOBJ)/V%.h |
# cd $(VOBJ); make --no-print-directory -f V%.mk |
|
cpudefs.h: cpudefs.v |
@echo "Building cpudefs.h" |
@echo "// " > $@ |
90,14 → 107,20
@grep "^\`" $^ | sed -e '{ s/^`/#/ }' >> $@ |
|
.PHONY: zipsystem |
zipsystem: $(VOBJ)/Vzipsystem__ALL.a |
zipsystem: $(VOBJ)/Vzipsystem__ALL.a cpudefs.h |
|
.PHONY: zipbones |
zipbones: $(VOBJ)/Vzipbones__ALL.a |
zipbones: $(VOBJ)/Vzipbones__ALL.a cpudefs.h |
|
.PHONY: div |
div: $(VOBJ)/Vdiv__ALL.a |
|
.PHONY: cpuops |
cpuops: $(VOBJ)/Vcpuops__ALL.a cpudefs.h |
|
.PHONY: zipmmu |
zipmmu: $(VOBJ)/Vzipmmu__ALL.a |
|
.PHONY: clean |
clean: |
rm -rf $(VOBJ) cpudefs.h |
/core/cpuops.v
14,7 → 14,7
// |
/////////////////////////////////////////////////////////////////////////// |
// |
// Copyright (C) 2015, Gisselquist Technology, LLC |
// Copyright (C) 2015-2016, Gisselquist Technology, LLC |
// |
// This program is free software (firmware): you can redistribute it and/or |
// modify it under the terms of the GNU General Public License as published |
32,18 → 32,17
// |
/////////////////////////////////////////////////////////////////////////// |
// |
`define LONG_MPY |
module cpuops(i_clk,i_rst, i_ce, i_valid, i_op, i_a, i_b, o_c, o_f, o_valid, |
o_illegal, o_busy); |
parameter IMPLEMENT_MPY = 1; |
`include "cpudefs.v" |
// |
module cpuops(i_clk,i_rst, i_ce, i_op, i_a, i_b, o_c, o_f, o_valid, |
o_busy); |
parameter IMPLEMENT_MPY = `OPT_MULTIPLY; |
input i_clk, i_rst, i_ce; |
input [3:0] i_op; |
input [31:0] i_a, i_b; |
input i_valid; |
output reg [31:0] o_c; |
output wire [3:0] o_f; |
output reg o_valid; |
output wire o_illegal; |
output wire o_busy; |
|
// Rotate-left pre-logic |
99,10 → 98,10
||(i_op == 4'h6) // LSL |
||(i_op == 4'h5)); // LSR |
|
`ifdef LONG_MPY |
reg mpyhi; |
wire mpybusy; |
`endif |
wire [63:0] mpy_result; // Where we dump the multiply result |
reg mpyhi; // Return the high half of the multiply |
wire mpybusy; // The multiply is busy if true |
wire mpydone; // True if we'll be valid on the next clock; |
|
// A 4-way multiplexer can be done in one 6-LUT. |
// A 16-way multiplexer can therefore be done in 4x 6-LUT's with |
109,244 → 108,254
// the Xilinx multiplexer fabric that follows. |
// Given that we wish to apply this multiplexer approach to 33-bits, |
// this will cost a minimum of 132 6-LUTs. |
|
wire this_is_a_multiply_op; |
assign this_is_a_multiply_op = (i_ce)&&((i_op[3:1]==3'h5)||(i_op[3:0]==4'h8)); |
|
generate |
if (IMPLEMENT_MPY == 0) |
begin |
begin // No multiply support. |
assign mpy_result = 63'h00; |
end else if (IMPLEMENT_MPY == 1) |
begin // Our single clock option (no extra clocks) |
wire signed [63:0] w_mpy_a_input, w_mpy_b_input; |
assign w_mpy_a_input = {{(32){(i_a[31])&(i_op[0])}},i_a[31:0]}; |
assign w_mpy_b_input = {{(32){(i_b[31])&(i_op[0])}},i_b[31:0]}; |
assign mpy_result = w_mpy_a_input * w_mpy_b_input; |
assign mpybusy = 1'b0; |
assign mpydone = 1'b0; |
always @(*) mpyhi = 1'b0; // Not needed |
end else if (IMPLEMENT_MPY == 2) |
begin // Our two clock option (ALU must pause for 1 clock) |
reg signed [63:0] r_mpy_a_input, r_mpy_b_input; |
always @(posedge i_clk) |
if (i_ce) |
begin |
pre_sign <= (i_a[31]); |
c <= 1'b0; |
casez(i_op) |
4'b0000:{c,o_c } <= {1'b0,i_a}-{1'b0,i_b};// CMP/SUB |
4'b0001: o_c <= i_a & i_b; // BTST/And |
4'b0010:{c,o_c } <= i_a + i_b; // Add |
4'b0011: o_c <= i_a | i_b; // Or |
4'b0100: o_c <= i_a ^ i_b; // Xor |
4'b0101:{o_c,c } <= w_lsr_result[32:0]; // LSR |
4'b0110:{c,o_c } <= w_lsl_result[32:0]; // LSL |
4'b0111:{o_c,c } <= w_asr_result[32:0]; // ASR |
`ifndef LONG_MPY |
4'b1000: o_c <= { i_b[15: 0], i_a[15:0] }; // LODIHI |
`endif |
4'b1001: o_c <= { i_a[31:16], i_b[15:0] }; // LODILO |
// 4'h1010: The unimplemented MPYU, |
// 4'h1011: and here for the unimplemented MPYS |
4'b1100: o_c <= w_brev_result; // BREV |
4'b1101: o_c <= w_popc_result; // POPC |
4'b1110: o_c <= w_rol_result; // ROL |
default: o_c <= i_b; // MOV, LDI |
endcase |
r_mpy_a_input <={{(32){(i_a[31])&(i_op[0])}},i_a[31:0]}; |
r_mpy_b_input <={{(32){(i_b[31])&(i_op[0])}},i_b[31:0]}; |
end |
|
assign o_busy = 1'b0; |
assign mpy_result = r_mpy_a_input * r_mpy_b_input; |
assign mpybusy = 1'b0; |
|
reg r_illegal; |
initial mpypipe = 1'b0; |
reg mpypipe; |
always @(posedge i_clk) |
r_illegal <= (i_ce)&&((i_op == 4'ha)||(i_op == 4'hb) |
`ifdef LONG_MPY |
||(i_op == 4'h8) |
if (i_rst) |
mpypipe <= 1'b0; |
else |
mpypipe <= (this_is_a_multiply_op); |
|
assign mpydone = mpypipe; // this_is_a_multiply_op; |
always @(posedge i_clk) |
if (this_is_a_multiply_op) |
mpyhi = i_op[1]; |
end else if (IMPLEMENT_MPY == 3) |
begin // Our three clock option (ALU pauses for 2 clocks) |
reg signed [63:0] r_smpy_result; |
reg [63:0] r_umpy_result; |
reg signed [31:0] r_mpy_a_input, r_mpy_b_input; |
reg [1:0] mpypipe; |
reg [1:0] r_sgn; |
|
initial mpypipe = 2'b0; |
always @(posedge i_clk) |
if (i_rst) |
mpypipe <= 2'b0; |
else |
mpypipe <= { mpypipe[0], this_is_a_multiply_op }; |
|
// First clock |
always @(posedge i_clk) |
begin |
r_mpy_a_input <= i_a[31:0]; |
r_mpy_b_input <= i_b[31:0]; |
r_sgn <= { r_sgn[0], i_op[0] }; |
end |
|
// Second clock |
`ifdef VERILATOR |
wire signed [63:0] s_mpy_a_input, s_mpy_b_input; |
wire [63:0] u_mpy_a_input, u_mpy_b_input; |
|
assign s_mpy_a_input = {{(32){r_mpy_a_input[31]}},r_mpy_a_input}; |
assign s_mpy_b_input = {{(32){r_mpy_b_input[31]}},r_mpy_b_input}; |
assign u_mpy_a_input = {32'h00,r_mpy_a_input}; |
assign u_mpy_b_input = {32'h00,r_mpy_b_input}; |
always @(posedge i_clk) |
r_smpy_result = s_mpy_a_input * s_mpy_b_input; |
always @(posedge i_clk) |
r_umpy_result = u_mpy_a_input * u_mpy_b_input; |
`else |
|
wire [31:0] u_mpy_a_input, u_mpy_b_input; |
|
assign u_mpy_a_input = r_mpy_a_input; |
assign u_mpy_b_input = r_mpy_b_input; |
|
always @(posedge i_clk) |
r_smpy_result = r_mpy_a_input * r_mpy_b_input; |
always @(posedge i_clk) |
r_umpy_result = u_mpy_a_input * u_mpy_b_input; |
`endif |
); |
assign o_illegal = r_illegal; |
end else begin |
// |
// Multiply pre-logic |
// |
`ifdef LONG_MPY |
|
always @(posedge i_clk) |
if (this_is_a_multiply_op) |
mpyhi = i_op[1]; |
assign mpybusy = mpypipe[0]; |
assign mpy_result = (r_sgn[1])?r_smpy_result:r_umpy_result; |
assign mpydone = mpypipe[1]; |
|
// Results are then set on the third clock |
end else // if (IMPLEMENT_MPY <= 4) |
begin // The three clock option |
reg [63:0] r_mpy_result; |
if (IMPLEMENT_MPY == 1) |
begin // Our two clock option (one clock extra) |
reg signed [64:0] r_mpy_a_input, r_mpy_b_input; |
reg mpypipe, x; |
initial mpypipe = 1'b0; |
always @(posedge i_clk) |
mpypipe <= (i_ce)&&((i_op[3:1]==3'h5)||(i_op[3:0]==4'h8)); |
always @(posedge i_clk) |
if (i_ce) |
begin |
r_mpy_a_input <= {{(33){(i_a[31])&(i_op[0])}}, |
i_a[31:0]}; |
r_mpy_b_input <= {{(33){(i_b[31])&(i_op[0])}}, |
i_b[31:0]}; |
end |
always @(posedge i_clk) |
if (mpypipe) |
{x, r_mpy_result} = r_mpy_a_input |
* r_mpy_b_input; |
always @(posedge i_clk) |
if (i_ce) |
mpyhi = i_op[1]; |
assign mpybusy = mpypipe; |
end else if (IMPLEMENT_MPY == 2) |
begin // The three clock option |
reg [31:0] r_mpy_a_input, r_mpy_b_input; |
reg r_mpy_signed; |
reg [1:0] mpypipe; |
reg [31:0] r_mpy_a_input, r_mpy_b_input; |
reg r_mpy_signed; |
reg [2:0] mpypipe; |
|
// First clock, latch in the inputs |
always @(posedge i_clk) |
begin |
// mpypipe indicates we have a multiply in the |
// pipeline. In this case, the multiply |
// pipeline is a two stage pipeline, so we need |
// two bits in the pipe. |
mpypipe[0] <= (i_ce)&&((i_op[3:1]==3'h5) |
||(i_op[3:0]==4'h8)); |
// First clock, latch in the inputs |
always @(posedge i_clk) |
begin |
// mpypipe indicates we have a multiply in the |
// pipeline. In this case, the multiply |
// pipeline is a two stage pipeline, so we need |
// two bits in the pipe. |
if (i_rst) |
mpypipe <= 3'h0; |
else begin |
mpypipe[0] <= this_is_a_multiply_op; |
mpypipe[1] <= mpypipe[0]; |
|
if (i_op[0]) // i.e. if signed multiply |
begin |
r_mpy_a_input <= {(~i_a[31]),i_a[30:0]}; |
r_mpy_b_input <= {(~i_b[31]),i_b[30:0]}; |
end else begin |
r_mpy_a_input <= i_a[31:0]; |
r_mpy_b_input <= i_b[31:0]; |
end |
// The signed bit really only matters in the |
// case of 64 bit multiply. We'll keep track |
// of it, though, and pretend in all other |
// cases. |
r_mpy_signed <= i_op[0]; |
|
if (i_ce) |
mpyhi = i_op[1]; |
mpypipe[2] <= mpypipe[1]; |
end |
|
assign mpybusy = |mpypipe; |
|
// Second clock, do the multiplies, get the "partial |
// products". Here, we break our input up into two |
// halves, |
// |
// A = (2^16 ah + al) |
// B = (2^16 bh + bl) |
// |
// and use these to compute partial products. |
// |
// AB = (2^32 ah*bh + 2^16 (ah*bl + al*bh) + (al*bl) |
// |
// Since we're following the FOIL algorithm to get here, |
// we'll name these partial products according to FOIL. |
// |
// The trick is what happens if A or B is signed. In |
// those cases, the real value of A will not be given by |
// A = (2^16 ah + al) |
// but rather |
// A = (2^16 ah[31^] + al) - 2^31 |
// (where we have flipped the sign bit of A) |
// and so ... |
// |
// AB= (2^16 ah + al - 2^31) * (2^16 bh + bl - 2^31) |
// = 2^32(ah*bh) |
// +2^16 (ah*bl+al*bh) |
// +(al*bl) |
// - 2^31 (2^16 bh+bl + 2^16 ah+al) |
// - 2^62 |
// = 2^32(ah*bh) |
// +2^16 (ah*bl+al*bh) |
// +(al*bl) |
// - 2^31 (2^16 bh+bl + 2^16 ah+al + 2^31) |
// |
reg [31:0] pp_f, pp_l; // F and L from FOIL |
reg [32:0] pp_oi; // The O and I from FOIL |
reg [32:0] pp_s; |
always @(posedge i_clk) |
if (i_op[0]) // i.e. if signed multiply |
begin |
pp_f<=r_mpy_a_input[31:16]*r_mpy_b_input[31:16]; |
pp_oi<=r_mpy_a_input[31:16]*r_mpy_b_input[15: 0] |
+ r_mpy_a_input[15: 0]*r_mpy_b_input[31:16]; |
pp_l<=r_mpy_a_input[15: 0]*r_mpy_b_input[15: 0]; |
// And a special one for the sign |
if (r_mpy_signed) |
pp_s <= 32'h8000_0000-( |
r_mpy_a_input[31:0] |
+ r_mpy_b_input[31:0]); |
else |
pp_s <= 33'h0; |
r_mpy_a_input <= {(~i_a[31]),i_a[30:0]}; |
r_mpy_b_input <= {(~i_b[31]),i_b[30:0]}; |
end else begin |
r_mpy_a_input <= i_a[31:0]; |
r_mpy_b_input <= i_b[31:0]; |
end |
// The signed bit really only matters in the |
// case of 64 bit multiply. We'll keep track |
// of it, though, and pretend in all other |
// cases. |
r_mpy_signed <= i_op[0]; |
|
// Third clock, add the results and produce a product |
always @(posedge i_clk) |
begin |
r_mpy_result[15:0] <= pp_l[15:0]; |
r_mpy_result[63:16] <= |
{ 32'h00, pp_l[31:16] } |
+ { 15'h00, pp_oi } |
+ { pp_s, 15'h00 } |
+ { pp_f, 16'h00 }; |
end |
end // Fourth clock -- results are available for writeback. |
`else |
wire signed [16:0] w_mpy_a_input, w_mpy_b_input; |
wire [33:0] w_mpy_result; |
reg [31:0] r_mpy_result; |
assign w_mpy_a_input ={ ((i_a[15])&(i_op[0])), i_a[15:0] }; |
assign w_mpy_b_input ={ ((i_b[15])&(i_op[0])), i_b[15:0] }; |
assign w_mpy_result = w_mpy_a_input * w_mpy_b_input; |
always @(posedge i_clk) |
if (i_ce) |
r_mpy_result = w_mpy_result[31:0]; |
`endif |
if (this_is_a_multiply_op) |
mpyhi = i_op[1]; |
end |
|
assign mpybusy = |mpypipe[1:0]; |
assign mpydone = mpypipe[2]; |
|
// Second clock, do the multiplies, get the "partial |
// products". Here, we break our input up into two |
// halves, |
// |
// The master ALU case statement |
// A = (2^16 ah + al) |
// B = (2^16 bh + bl) |
// |
// and use these to compute partial products. |
// |
// AB = (2^32 ah*bh + 2^16 (ah*bl + al*bh) + (al*bl) |
// |
// Since we're following the FOIL algorithm to get here, |
// we'll name these partial products according to FOIL. |
// |
// The trick is what happens if A or B is signed. In |
// those cases, the real value of A will not be given by |
// A = (2^16 ah + al) |
// but rather |
// A = (2^16 ah[31^] + al) - 2^31 |
// (where we have flipped the sign bit of A) |
// and so ... |
// |
// AB= (2^16 ah + al - 2^31) * (2^16 bh + bl - 2^31) |
// = 2^32(ah*bh) |
// +2^16 (ah*bl+al*bh) |
// +(al*bl) |
// - 2^31 (2^16 bh+bl + 2^16 ah+al) |
// - 2^62 |
// = 2^32(ah*bh) |
// +2^16 (ah*bl+al*bh) |
// +(al*bl) |
// - 2^31 (2^16 bh+bl + 2^16 ah+al + 2^31) |
// |
reg [31:0] pp_f, pp_l; // F and L from FOIL |
reg [32:0] pp_oi; // The O and I from FOIL |
reg [32:0] pp_s; |
always @(posedge i_clk) |
if (i_ce) |
begin |
pre_sign <= (i_a[31]); |
c <= 1'b0; |
casez(i_op) |
4'b0000:{c,o_c } <= {1'b0,i_a}-{1'b0,i_b};// CMP/SUB |
4'b0001: o_c <= i_a & i_b; // BTST/And |
4'b0010:{c,o_c } <= i_a + i_b; // Add |
4'b0011: o_c <= i_a | i_b; // Or |
4'b0100: o_c <= i_a ^ i_b; // Xor |
4'b0101:{o_c,c } <= w_lsr_result[32:0]; // LSR |
4'b0110:{c,o_c } <= w_lsl_result[32:0]; // LSL |
4'b0111:{o_c,c } <= w_asr_result[32:0]; // ASR |
`ifdef LONG_MPY |
4'b1000: o_c <= r_mpy_result[31:0]; // MPY |
`else |
4'b1000: o_c <= { i_b[15: 0], i_a[15:0] }; // LODIHI |
`endif |
4'b1001: o_c <= { i_a[31:16], i_b[15:0] }; // LODILO |
`ifdef LONG_MPY |
4'b1010: o_c <= r_mpy_result[63:32]; // MPYHU |
4'b1011: o_c <= r_mpy_result[63:32]; // MPYHS |
`else |
4'b1010: o_c <= r_mpy_result; // MPYU |
4'b1011: o_c <= r_mpy_result; // MPYS |
`endif |
4'b1100: o_c <= w_brev_result; // BREV |
4'b1101: o_c <= w_popc_result; // POPC |
4'b1110: o_c <= w_rol_result; // ROL |
default: o_c <= i_b; // MOV, LDI |
endcase |
end else if (r_busy) |
`ifdef LONG_MPY |
o_c <= (mpyhi)?r_mpy_result[63:32]:r_mpy_result[31:0]; |
`else |
o_c <= r_mpy_result; |
`endif |
pp_f<=r_mpy_a_input[31:16]*r_mpy_b_input[31:16]; |
pp_oi<=r_mpy_a_input[31:16]*r_mpy_b_input[15: 0] |
+ r_mpy_a_input[15: 0]*r_mpy_b_input[31:16]; |
pp_l<=r_mpy_a_input[15: 0]*r_mpy_b_input[15: 0]; |
// And a special one for the sign |
if (r_mpy_signed) |
pp_s <= 32'h8000_0000-( |
r_mpy_a_input[31:0] |
+ r_mpy_b_input[31:0]); |
else |
pp_s <= 33'h0; |
end |
|
reg r_busy; |
initial r_busy = 1'b0; |
// Third clock, add the results and produce a product |
always @(posedge i_clk) |
r_busy <= (~i_rst)&&(i_ce)&&(i_valid) |
`ifdef LONG_MPY |
&&((i_op[3:1] == 3'h5) |
||(i_op[3:0] == 4'h8))||mpybusy; |
`else |
&&(i_op[3:1] == 3'h5); |
`endif |
begin |
r_mpy_result[15:0] <= pp_l[15:0]; |
r_mpy_result[63:16] <= |
{ 32'h00, pp_l[31:16] } |
+ { 15'h00, pp_oi } |
+ { pp_s, 15'h00 } |
+ { pp_f, 16'h00 }; |
end |
|
assign o_busy = r_busy; |
assign mpy_result = r_mpy_result; |
// Fourth clock -- results are clocked into writeback |
end |
endgenerate // All possible multiply results have been determined |
|
assign o_illegal = 1'b0; |
end endgenerate |
// |
// The master ALU case statement |
// |
always @(posedge i_clk) |
if (i_ce) |
begin |
pre_sign <= (i_a[31]); |
c <= 1'b0; |
casez(i_op) |
4'b0000:{c,o_c } <= {1'b0,i_a}-{1'b0,i_b};// CMP/SUB |
4'b0001: o_c <= i_a & i_b; // BTST/And |
4'b0010:{c,o_c } <= i_a + i_b; // Add |
4'b0011: o_c <= i_a | i_b; // Or |
4'b0100: o_c <= i_a ^ i_b; // Xor |
4'b0101:{o_c,c } <= w_lsr_result[32:0]; // LSR |
4'b0110:{c,o_c } <= w_lsl_result[32:0]; // LSL |
4'b0111:{o_c,c } <= w_asr_result[32:0]; // ASR |
4'b1000: o_c <= mpy_result[31:0]; // MPY |
4'b1001: o_c <= { i_a[31:16], i_b[15:0] }; // LODILO |
4'b1010: o_c <= mpy_result[63:32]; // MPYHU |
4'b1011: o_c <= mpy_result[63:32]; // MPYHS |
4'b1100: o_c <= w_brev_result; // BREV |
4'b1101: o_c <= w_popc_result; // POPC |
4'b1110: o_c <= w_rol_result; // ROL |
default: o_c <= i_b; // MOV, LDI |
endcase |
end else // if (mpydone) |
o_c <= (mpyhi)?mpy_result[63:32]:mpy_result[31:0]; |
|
reg r_busy; |
initial r_busy = 1'b0; |
always @(posedge i_clk) |
if (i_rst) |
r_busy <= 1'b0; |
else |
r_busy <= ((IMPLEMENT_MPY > 1) |
&&(this_is_a_multiply_op))||mpybusy; |
assign o_busy = (r_busy); // ||((IMPLEMENT_MPY>1)&&(this_is_a_multiply_op)); |
|
|
assign z = (o_c == 32'h0000); |
assign n = (o_c[31]); |
assign v = (set_ovfl)&&(pre_sign != o_c[31]); |
357,12 → 366,9
always @(posedge i_clk) |
if (i_rst) |
o_valid <= 1'b0; |
else if (IMPLEMENT_MPY <= 1) |
o_valid <= (i_ce); |
else |
o_valid <= (i_ce)&&(i_valid) |
`ifdef LONG_MPY |
&&(i_op[3:1] != 3'h5)&&(i_op[3:0] != 4'h8) |
||(o_busy)&&(~mpybusy); |
`else |
&&(i_op[3:1] != 3'h5)||(o_busy); |
`endif |
o_valid <=((i_ce)&&(!this_is_a_multiply_op))||(mpydone); |
|
endmodule |
/core/zipcpu.v
106,8 → 106,8
// |
`define CPU_CC_REG 4'he |
`define CPU_PC_REG 4'hf |
`define CPU_CLRCACHE_BIT 14 // Floating point error flag, set on error |
`define CPU_PHASE_BIT 13 // Floating point error flag, set on error |
`define CPU_CLRCACHE_BIT 14 // Set to clear the I-cache, automatically clears |
`define CPU_PHASE_BIT 13 // Set if we are executing the latter half of a VLIW |
`define CPU_FPUERR_BIT 12 // Floating point error flag, set on error |
`define CPU_DIVERR_BIT 11 // Divide error flag, set on divide by zero |
`define CPU_BUSERR_BIT 10 // Bus error flag, set on error |
139,7 → 139,7
, o_debug |
`endif |
); |
parameter RESET_ADDRESS=32'h0100000, ADDRESS_WIDTH=24, |
parameter RESET_ADDRESS=32'h0100000, ADDRESS_WIDTH=32, |
LGICACHE=8; |
`ifdef OPT_MULTIPLY |
parameter IMPLEMENT_MPY = `OPT_MULTIPLY; |
162,7 → 162,7
`else |
parameter EARLY_BRANCHING = 0; |
`endif |
parameter AW=ADDRESS_WIDTH; |
localparam AW=ADDRESS_WIDTH; |
input i_clk, i_rst, i_interrupt; |
// Debug interface -- inputs |
input i_halt, i_clear_pf_cache; |
325,7 → 325,7
wire alu_valid, alu_busy; |
wire set_cond; |
reg alu_wr, alF_wr; |
wire alu_gie, alu_illegal_op, alu_illegal; |
wire alu_gie, alu_illegal; |
|
|
|
476,7 → 476,7
`ifdef OPT_PIPELINED |
assign alu_stall = (((~master_ce)||(mem_rdbusy)||(alu_busy))&&(opvalid_alu)) //Case 1&2 |
||((opvalid)&&(op_lock)&&(op_lock_stall)) |
||((opvalid)&&(op_break)) |
||((opvalid)&&(op_break)) // || op_illegal |
||(wr_reg_ce)&&(wr_write_cc) |
||(div_busy)||(fpu_busy); |
assign alu_ce = (master_ce)&&(opvalid_alu)&&(~alu_stall) |
483,7 → 483,7
&&(~clear_pipeline); |
`else |
assign alu_stall = (opvalid_alu)&&((~master_ce)||(op_break)); |
assign alu_ce = (master_ce)&&((opvalid_alu)||(op_illegal))&&(~alu_stall)&&(~clear_pipeline); |
assign alu_ce = (master_ce)&&(opvalid_alu)&&(~alu_stall)&&(~clear_pipeline); |
`endif |
// |
|
850,7 → 850,7
initial opvalid_div = 1'b0; |
initial opvalid_fpu = 1'b0; |
always @(posedge i_clk) |
if (i_rst) |
if ((i_rst)||(clear_pipeline)) |
begin |
opvalid <= 1'b0; |
opvalid_alu <= 1'b0; |
879,7 → 879,7
opvalid_div <= (dcdDV)&&(w_opvalid); |
opvalid_fpu <= (dcdFP)&&(w_opvalid); |
`endif |
end else if ((clear_pipeline)||(adf_ce_unconditional)||(mem_ce)) |
end else if ((adf_ce_unconditional)||(mem_ce)) |
begin |
opvalid <= 1'b0; |
opvalid_alu <= 1'b0; |
903,7 → 903,7
initial r_op_break = 1'b0; |
always @(posedge i_clk) |
if (i_rst) r_op_break <= 1'b0; |
else if (op_ce) r_op_break <= (dcd_break); // &&(dcdvalid) |
else if (op_ce) r_op_break <= (dcd_break); //||dcd_illegal &&(dcdvalid) |
else if ((clear_pipeline)||(~opvalid)) |
r_op_break <= 1'b0; |
assign op_break = r_op_break; |
1132,17 → 1132,9
// PIPELINE STAGE #4 :: Apply Instruction |
// |
// |
`ifdef OPT_NEW_INSTRUCTION_SET |
cpuops #(IMPLEMENT_MPY) doalu(i_clk, i_rst, alu_ce, |
(opvalid_alu), opn, opA, opB, |
alu_result, alu_flags, alu_valid, alu_illegal_op, |
alu_busy); |
`else |
cpuops_deprecated #(IMPLEMENT_MPY) doalu(i_clk, i_rst, alu_ce, |
(opvalid_alu), opn, opA, opB, |
alu_result, alu_flags, alu_valid, alu_illegal_op); |
assign alu_busy = 1'b0; |
`endif |
cpuops #(IMPLEMENT_MPY) doalu(i_clk, (i_rst)||(clear_pipeline), |
alu_ce, opn, opA, opB, |
alu_result, alu_flags, alu_valid, alu_busy); |
|
generate |
if (IMPLEMENT_DIVIDE != 0) |
1267,7 → 1259,7
r_alu_illegal <= op_illegal; |
else |
r_alu_illegal <= 1'b0; |
assign alu_illegal = (alu_illegal_op)||(r_alu_illegal); |
assign alu_illegal = (r_alu_illegal); |
`else |
assign alu_illegal = 1'b0; |
`endif |
1469,7 → 1461,7
if ((i_rst)||(clear_pipeline)||(~opvalid)) |
r_break_pending <= 1'b0; |
else if (op_break) |
r_break_pending <= (~alu_busy)&&(~div_busy)&&(~fpu_busy)&&(~mem_busy); |
r_break_pending <= (~alu_busy)&&(~div_busy)&&(~fpu_busy)&&(~mem_busy)&&(!wr_reg_ce); |
else |
r_break_pending <= 1'b0; |
assign break_pending = r_break_pending; |
1483,7 → 1475,7
||((~alu_gie)&&(bus_err)) |
||((~alu_gie)&&(div_error)) |
||((~alu_gie)&&(fpu_error)) |
||((~alu_gie)&&(alu_illegal)); |
||((~alu_gie)&&(alu_illegal)&&(!clear_pipeline)); |
|
// The sleep register. Setting the sleep register causes the CPU to |
// sleep until the next interrupt. Setting the sleep register within |
1513,12 → 1505,10
sleep <= wr_spreg_vl[`CPU_SLEEP_BIT]; |
|
always @(posedge i_clk) |
if ((i_rst)||(w_switch_to_interrupt)) |
if (i_rst) |
step <= 1'b0; |
else if ((wr_reg_ce)&&(~alu_gie)&&(wr_write_ucc)) |
step <= wr_spreg_vl[`CPU_STEP_BIT]; |
else if (((alu_pc_valid)||(mem_pc_valid))&&(step)&&(gie)) |
step <= 1'b0; |
|
// The GIE register. Only interrupts can disable the interrupt register |
assign w_switch_to_interrupt = (gie)&&( |
1531,7 → 1521,7
||((master_ce)&&(break_pending)&&(~break_en)) |
`ifdef OPT_ILLEGAL_INSTRUCTION |
// On an illegal instruction |
||(alu_illegal) |
||((alu_illegal)&&(!clear_pipeline)) |
`endif |
// On division by zero. If the divide isn't |
// implemented, div_valid and div_error will be short |
1588,7 → 1578,7
// Only the debug interface can clear this bit |
else if ((dbgv)&&(wr_write_scc)) |
ill_err_i <= (ill_err_i)&&(wr_spreg_vl[`CPU_ILL_BIT]); |
else if ((alu_illegal)&&(~alu_gie)) |
else if ((alu_illegal)&&(~alu_gie)&&(!clear_pipeline)) |
ill_err_i <= 1'b1; |
initial ill_err_u = 1'b0; |
always @(posedge i_clk) |
1600,7 → 1590,7
// clearing the bit, then clear it |
else if (((~alu_gie)||(dbgv))&&(wr_reg_ce)&&(wr_write_ucc)) |
ill_err_u <=((ill_err_u)&&(wr_spreg_vl[`CPU_ILL_BIT])); |
else if ((alu_illegal)&&(alu_gie)) |
else if ((alu_illegal)&&(alu_gie)&&(!clear_pipeline)) |
ill_err_u <= 1'b1; |
`else |
assign ill_err_u = 1'b0; |
1734,7 → 1724,7
if ((wr_reg_ce)&&(wr_reg_id[4])&&(wr_write_pc)) |
upc <= wr_spreg_vl[(AW-1):0]; |
else if ((alu_gie)&& |
(((alu_pc_valid)&&(~clear_pipeline)) |
(((alu_pc_valid)&&(~clear_pipeline)&&(!alu_illegal)) |
||(mem_pc_valid))) |
upc <= alu_pc; |
|
/cpudefs.v
28,7 → 28,7
// |
/////////////////////////////////////////////////////////////////////////////// |
// |
// Copyright (C) 2015, Gisselquist Technology, LLC |
// Copyright (C) 2015-2016, Gisselquist Technology, LLC |
// |
// This program is free software (firmware): you can redistribute it and/or |
// modify it under the terms of the GNU General Public License as published |
76,11 → 76,17
// OPT_ILLEGAL_INSTRUCTION is set, then the multiply will create an illegal |
// instruction that will then trip the illegal instruction trap. |
// |
// Either not defining this value, or defining it to zero will disable the |
// hardware multiply. A value of '1' will cause the multiply to occurr in one |
// clock cycle only--often at the expense of the rest of the CPUs speed. |
// A value of 2 will cause the multiply to have a single delay cycle, 3 will |
// have two delay cycles, and 4 (or more) will have 3 delay cycles. |
// |
`define OPT_MULTIPLY 1 |
// |
`define OPT_MULTIPLY 3 |
// |
// |
// |
// OPT_DIVIDE controls whether or not the divide instruction is built and |
// included into the ZipCPU by default. Set this option and a parameter will |
// be set that causes the divide unit to be included. (This parameter may |