URL
https://opencores.org/ocsvn/zipcpu/zipcpu/trunk
Subversion Repositories zipcpu
Compare Revisions
- This comparison shows the changes necessary to convert path
/zipcpu/trunk/rtl
- from Rev 132 to Rev 133
- ↔ Reverse comparison
Rev 132 → Rev 133
/core/cpuops.v
32,6 → 32,7
// |
/////////////////////////////////////////////////////////////////////////// |
// |
// `define LONG_MPY |
module cpuops(i_clk,i_rst, i_ce, i_valid, i_op, i_a, i_b, o_c, o_f, o_valid, |
o_illegal, o_busy); |
parameter IMPLEMENT_MPY = 1; |
90,6 → 91,10
||(i_op == 4'h6) // LSL |
||(i_op == 4'h5)); // LSR |
|
`ifdef LONG_MPY |
reg mpyhi; |
wire mpybusy; |
`endif |
|
// A 4-way multiplexer can be done in one 6-LUT. |
// A 16-way multiplexer can therefore be done in 4x 6-LUT's with |
113,7 → 118,9
4'b0101:{o_c,c } <= w_lsr_result[32:0]; // LSR |
4'b0110:{c,o_c } <= (|i_b[31:5])? 33'h00 : {1'b0, i_a } << i_b[4:0]; // LSL |
4'b0111:{o_c,c } <= w_asr_result[32:0]; // ASR |
`ifndef LONG_MPY |
4'b1000: o_c <= { i_b[15: 0], i_a[15:0] }; // LODIHI |
`endif |
4'b1001: o_c <= { i_a[31:16], i_b[15:0] }; // LODILO |
// 4'h1010: The unimplemented MPYU, |
// 4'h1011: and here for the unimplemented MPYS |
128,12 → 135,141
|
reg r_illegal; |
always @(posedge i_clk) |
r_illegal <= (i_ce)&&((i_op == 4'h3)||(i_op == 4'h4)); |
r_illegal <= (i_ce)&&((i_op == 4'ha)||(i_op == 4'hb) |
`ifdef LONG_MPY |
||(i_op == 4'h8) |
`endif |
); |
assign o_illegal = r_illegal; |
end else begin |
// |
// Multiply pre-logic |
// |
`ifdef LONG_MPY |
reg [63:0] r_mpy_result; |
if (IMPLEMENT_MPY == 1) |
begin // Our two clock option (one clock extra) |
reg signed [64:0] r_mpy_a_input, r_mpy_b_input; |
reg mpypipe, x; |
initial mpypipe = 1'b0; |
always @(posedge i_clk) |
mpypipe <= (i_ce)&&((i_op[3:1]==3'h5)||(i_op[3:0]==4'h8)); |
always @(posedge i_clk) |
if (i_ce) |
begin |
r_mpy_a_input <= {{(33){(i_a[31])&(i_op[0])}}, |
i_a[31:0]}; |
r_mpy_b_input <= {{(33){(i_b[31])&(i_op[0])}}, |
i_b[31:0]}; |
end |
always @(posedge i_clk) |
if (mpypipe) |
{x, r_mpy_result} = r_mpy_a_input |
* r_mpy_b_input; |
always @(posedge i_clk) |
if (i_ce) |
mpyhi = i_op[1]; |
assign mpybusy = mpypipe; |
end else if (IMPLEMENT_MPY == 2) |
begin // The three clock option |
reg [31:0] r_mpy_a_input, r_mpy_b_input; |
reg r_mpy_signed; |
reg [1:0] mpypipe; |
|
// First clock, latch in the inputs |
always @(posedge i_clk) |
begin |
// mpypipe indicates we have a multiply in the |
// pipeline. In this case, the multiply |
// pipeline is a two stage pipeline, so we need |
// two bits in the pipe. |
mpypipe[0] <= (i_ce)&&((i_op[3:1]==3'h5) |
||(i_op[3:0]==4'h8)); |
mpypipe[1] <= mpypipe[0]; |
|
if (i_op[0]) // i.e. if signed multiply |
begin |
r_mpy_a_input <= {(~i_a[31]),i_a[30:0]}; |
r_mpy_b_input <= {(~i_b[31]),i_b[30:0]}; |
end else begin |
r_mpy_a_input <= i_a[31:0]; |
r_mpy_b_input <= i_b[31:0]; |
end |
// The signed bit really only matters in the |
// case of 64 bit multiply. We'll keep track |
// of it, though, and pretend in all other |
// cases. |
r_mpy_signed <= i_op[0]; |
|
if (i_ce) |
mpyhi = i_op[1]; |
end |
|
assign mpybusy = |mpypipe; |
|
// Second clock, do the multiplies, get the "partial |
// products". Here, we break our input up into two |
// halves, |
// |
// A = (2^16 ah + al) |
// B = (2^16 bh + bl) |
// |
// and use these to compute partial products. |
// |
// AB = (2^32 ah*bh + 2^16 (ah*bl + al*bh) + (al*bl) |
// |
// Since we're following the FOIL algorithm to get here, |
// we'll name these partial products according to FOIL. |
// |
// The trick is what happens if A or B is signed. In |
// those cases, the real value of A will not be given by |
// A = (2^16 ah + al) |
// but rather |
// A = (2^16 ah[31^] + al) - 2^31 |
// (where we have flipped the sign bit of A) |
// and so ... |
// |
// AB= (2^16 ah + al - 2^31) * (2^16 bh + bl - 2^31) |
// = 2^32(ah*bh) |
// +2^16 (ah*bl+al*bh) |
// +(al*bl) |
// - 2^31 (2^16 bh+bl + 2^16 ah+al) |
// - 2^62 |
// = 2^32(ah*bh) |
// +2^16 (ah*bl+al*bh) |
// +(al*bl) |
// - 2^31 (2^16 bh+bl + 2^16 ah+al + 2^31) |
// |
reg [31:0] pp_f, pp_o, pp_i, pp_l; |
reg [32:0] pp_s; |
always @(posedge i_clk) |
begin |
pp_f<=r_mpy_a_input[31:16]*r_mpy_b_input[31:16]; |
pp_o<=r_mpy_a_input[31:16]*r_mpy_b_input[15: 0]; |
pp_i<=r_mpy_a_input[15: 0]*r_mpy_b_input[31:16]; |
pp_l<=r_mpy_a_input[15: 0]*r_mpy_b_input[15: 0]; |
// And a special one for the sign |
if (r_mpy_signed) |
pp_s <= 32'h8000_0000-( |
r_mpy_a_input[31:0] |
+ r_mpy_b_input[31:0]); |
else |
pp_s <= 33'h0; |
end |
|
// Third clock, add the results and produce a product |
always @(posedge i_clk) |
begin |
r_mpy_result[15:0] <= pp_l[15:0]; |
r_mpy_result[63:16] <= |
{ 32'h00, pp_l[31:16] } |
+ { 16'h00, pp_o } |
+ { 16'h00, pp_i } |
+ { pp_s, 15'h00 } |
+ { pp_f, 16'h00 }; |
end |
end // Fourth clock -- results are available for writeback. |
`else |
wire signed [16:0] w_mpy_a_input, w_mpy_b_input; |
wire [33:0] w_mpy_result; |
reg [31:0] r_mpy_result; |
143,6 → 279,7
always @(posedge i_clk) |
if (i_ce) |
r_mpy_result = w_mpy_result[31:0]; |
`endif |
|
// |
// The master ALU case statement |
161,10 → 298,19
4'b0101:{o_c,c } <= w_lsr_result[32:0]; // LSR |
4'b0110:{c,o_c } <= (|i_b[31:5])? 33'h00 : {1'b0, i_a } << i_b[4:0]; // LSL |
4'b0111:{o_c,c } <= w_asr_result[32:0]; // ASR |
`ifdef LONG_MPY |
4'b1000: o_c <= r_mpy_result[31:0]; // MPY |
`else |
4'b1000: o_c <= { i_b[15: 0], i_a[15:0] }; // LODIHI |
`endif |
4'b1001: o_c <= { i_a[31:16], i_b[15:0] }; // LODILO |
`ifdef LONG_MPY |
4'b1010: o_c <= r_mpy_result[63:32]; // MPYHU |
4'b1011: o_c <= r_mpy_result[63:32]; // MPYHS |
`else |
4'b1010: o_c <= r_mpy_result; // MPYU |
4'b1011: o_c <= r_mpy_result; // MPYS |
`endif |
4'b1100: o_c <= w_brev_result; // BREV |
4'b1101: o_c <= w_popc_result; // POPC |
4'b1110: o_c <= w_rol_result; // ROL |
171,13 → 317,22
default: o_c <= i_b; // MOV, LDI |
endcase |
end else if (r_busy) |
`ifdef LONG_MPY |
o_c <= (mpyhi)?r_mpy_result[63:32]:r_mpy_result[31:0]; |
`else |
o_c <= r_mpy_result; |
`endif |
|
reg r_busy; |
initial r_busy = 1'b0; |
always @(posedge i_clk) |
r_busy <= (~i_rst)&&(i_ce)&&(i_valid) |
`ifdef LONG_MPY |
&&((i_op[3:1] == 3'h5) |
||(i_op[3:0] == 4'h8))||mpybusy; |
`else |
&&(i_op[3:1] == 3'h5); |
`endif |
|
assign o_busy = r_busy; |
|
195,6 → 350,11
if (i_rst) |
o_valid <= 1'b0; |
else |
o_valid <= (i_ce)&&(i_valid)&&(i_op[3:1] != 3'h5) |
||(o_busy); |
o_valid <= (i_ce)&&(i_valid) |
`ifdef LONG_MPY |
&&(i_op[3:1] != 3'h5)&&(i_op[3:0] != 4'h8) |
||(o_busy)&&(~mpybusy); |
`else |
&&(i_op[3:1] != 3'h5)||(o_busy); |
`endif |
endmodule |