OpenCores

Rev 133	Rev 138
`///////////////////////////////////////////////////////////////////////////`	`///////////////////////////////////////////////////////////////////////////`
`//`	`//`
`// Filename: cpuops.v`	`// Filename: cpuops.v`
`//`	`//`
`// Project: Zip CPU -- a small, lightweight, RISC CPU soft core`	`// Project: Zip CPU -- a small, lightweight, RISC CPU soft core`
`//`	`//`
`// Purpose: This supports the instruction set reordering of operations`	`// Purpose: This supports the instruction set reordering of operations`
`// created by the second generation instruction set, as well as`	`// created by the second generation instruction set, as well as`
`// the new operations of POPC (population count) and BREV (bit reversal).`	`// the new operations of POPC (population count) and BREV (bit reversal).`
`//`	`//`
`//`	`//`
`// Creator: Dan Gisselquist, Ph.D.`	`// Creator: Dan Gisselquist, Ph.D.`
`// Gisselquist Technology, LLC`	`// Gisselquist Technology, LLC`
`//`	`//`
`///////////////////////////////////////////////////////////////////////////`	`///////////////////////////////////////////////////////////////////////////`
`//`	`//`
`// Copyright (C) 2015, Gisselquist Technology, LLC`	`// Copyright (C) 2015, Gisselquist Technology, LLC`
`//`	`//`
`// This program is free software (firmware): you can redistribute it and/or`	`// This program is free software (firmware): you can redistribute it and/or`
`// modify it under the terms of the GNU General Public License as published`	`// modify it under the terms of the GNU General Public License as published`
`// by the Free Software Foundation, either version 3 of the License, or (at`	`// by the Free Software Foundation, either version 3 of the License, or (at`
`// your option) any later version.`	`// your option) any later version.`
`//`	`//`
`// This program is distributed in the hope that it will be useful, but WITHOUT`	`// This program is distributed in the hope that it will be useful, but WITHOUT`
`// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or`	`// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or`
`// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License`	`// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License`
`// for more details.`	`// for more details.`
`//`	`//`
`// License: GPL, v3, as defined and found on www.gnu.org,`	`// License: GPL, v3, as defined and found on www.gnu.org,`
`// http://www.gnu.org/licenses/gpl.html`	`// http://www.gnu.org/licenses/gpl.html`
`//`	`//`
`//`	`//`
`///////////////////////////////////////////////////////////////////////////`	`///////////////////////////////////////////////////////////////////////////`
`//`	`//`
// `define LONG_MPY	`define LONG_MPY
`module cpuops(i_clk,i_rst, i_ce, i_valid, i_op, i_a, i_b, o_c, o_f, o_valid,`	`module cpuops(i_clk,i_rst, i_ce, i_valid, i_op, i_a, i_b, o_c, o_f, o_valid,`
`o_illegal, o_busy);`	`o_illegal, o_busy);`
`parameter IMPLEMENT_MPY = 1;`	`parameter IMPLEMENT_MPY = 1;`
`input i_clk, i_rst, i_ce;`	`input i_clk, i_rst, i_ce;`
`input [3:0] i_op;`	`input [3:0] i_op;`
`input [31:0] i_a, i_b;`	`input [31:0] i_a, i_b;`
`input i_valid;`	`input i_valid;`
`output reg [31:0] o_c;`	`output reg [31:0] o_c;`
`output wire [3:0] o_f;`	`output wire [3:0] o_f;`
`output reg o_valid;`	`output reg o_valid;`
`output wire o_illegal;`	`output wire o_illegal;`
`output wire o_busy;`	`output wire o_busy;`

`// Rotate-left pre-logic`	`// Rotate-left pre-logic`
`wire [63:0] w_rol_tmp;`	`wire [63:0] w_rol_tmp;`
`assign w_rol_tmp = { i_a, i_a } << i_b[4:0];`	`assign w_rol_tmp = { i_a, i_a } << i_b[4:0];`
`wire [31:0] w_rol_result;`	`wire [31:0] w_rol_result;`
`assign w_rol_result = w_rol_tmp[63:32]; // Won't set flags`	`assign w_rol_result = w_rol_tmp[63:32]; // Won't set flags`

`// Shift register pre-logic`	`// Shift register pre-logic`
`wire [32:0] w_lsr_result, w_asr_result;`	`wire [32:0] w_lsr_result, w_asr_result;`
`assign w_asr_result = (\|i_b[31:5])? {(33){i_a[31]}}`	`assign w_asr_result = (\|i_b[31:5])? {(33){i_a[31]}}`
`: ( {i_a, 1'b0 } >>> (i_b[4:0]) );// ASR`	`: ( {i_a, 1'b0 } >>> (i_b[4:0]) );// ASR`
`assign w_lsr_result = (\|i_b[31:5])? 33'h00`	`assign w_lsr_result = (\|i_b[31:5])? 33'h00`
`: ( { i_a, 1'b0 } >> (i_b[4:0]) );// LSR`	`: ( { i_a, 1'b0 } >> (i_b[4:0]) );// LSR`

`// Bit reversal pre-logic`	`// Bit reversal pre-logic`
`wire [31:0] w_brev_result;`	`wire [31:0] w_brev_result;`
`genvar k;`	`genvar k;`
`generate`	`generate`
`for(k=0; k<32; k=k+1)`	`for(k=0; k<32; k=k+1)`
`begin : bit_reversal_cpuop`	`begin : bit_reversal_cpuop`
`assign w_brev_result[k] = i_b[31-k];`	`assign w_brev_result[k] = i_b[31-k];`
`end endgenerate`	`end endgenerate`

`// Popcount pre-logic`	`// Popcount pre-logic`
`wire [31:0] w_popc_result;`	`wire [31:0] w_popc_result;`
`assign w_popc_result[5:0]=`	`assign w_popc_result[5:0]=`
`({5'h0,i_b[ 0]}+{5'h0,i_b[ 1]}+{5'h0,i_b[ 2]}+{5'h0,i_b[ 3]})`	`({5'h0,i_b[ 0]}+{5'h0,i_b[ 1]}+{5'h0,i_b[ 2]}+{5'h0,i_b[ 3]})`
`+({5'h0,i_b[ 4]}+{5'h0,i_b[ 5]}+{5'h0,i_b[ 6]}+{5'h0,i_b[ 7]})`	`+({5'h0,i_b[ 4]}+{5'h0,i_b[ 5]}+{5'h0,i_b[ 6]}+{5'h0,i_b[ 7]})`
`+({5'h0,i_b[ 8]}+{5'h0,i_b[ 9]}+{5'h0,i_b[10]}+{5'h0,i_b[11]})`	`+({5'h0,i_b[ 8]}+{5'h0,i_b[ 9]}+{5'h0,i_b[10]}+{5'h0,i_b[11]})`
`+({5'h0,i_b[12]}+{5'h0,i_b[13]}+{5'h0,i_b[14]}+{5'h0,i_b[15]})`	`+({5'h0,i_b[12]}+{5'h0,i_b[13]}+{5'h0,i_b[14]}+{5'h0,i_b[15]})`
`+({5'h0,i_b[16]}+{5'h0,i_b[17]}+{5'h0,i_b[18]}+{5'h0,i_b[19]})`	`+({5'h0,i_b[16]}+{5'h0,i_b[17]}+{5'h0,i_b[18]}+{5'h0,i_b[19]})`
`+({5'h0,i_b[20]}+{5'h0,i_b[21]}+{5'h0,i_b[22]}+{5'h0,i_b[23]})`	`+({5'h0,i_b[20]}+{5'h0,i_b[21]}+{5'h0,i_b[22]}+{5'h0,i_b[23]})`
`+({5'h0,i_b[24]}+{5'h0,i_b[25]}+{5'h0,i_b[26]}+{5'h0,i_b[27]})`	`+({5'h0,i_b[24]}+{5'h0,i_b[25]}+{5'h0,i_b[26]}+{5'h0,i_b[27]})`
`+({5'h0,i_b[28]}+{5'h0,i_b[29]}+{5'h0,i_b[30]}+{5'h0,i_b[31]});`	`+({5'h0,i_b[28]}+{5'h0,i_b[29]}+{5'h0,i_b[30]}+{5'h0,i_b[31]});`
`assign w_popc_result[31:6] = 26'h00;`	`assign w_popc_result[31:6] = 26'h00;`

`// Prelogic for our flags registers`	`// Prelogic for our flags registers`
`wire z, n, v;`	`wire z, n, v;`
`reg c, pre_sign, set_ovfl;`	`reg c, pre_sign, set_ovfl;`
`always @(posedge i_clk)`	`always @(posedge i_clk)`
`if (i_ce) // 1 LUT`	`if (i_ce) // 1 LUT`
`set_ovfl =(((i_op==4'h0)&&(i_a[31] != i_b[31]))//SUB&CMP`	`set_ovfl =(((i_op==4'h0)&&(i_a[31] != i_b[31]))//SUB&CMP`
`\|\|((i_op==4'h2)&&(i_a[31] == i_b[31])) // ADD`	`\|\|((i_op==4'h2)&&(i_a[31] == i_b[31])) // ADD`
`\|\|(i_op == 4'h6) // LSL`	`\|\|(i_op == 4'h6) // LSL`
`\|\|(i_op == 4'h5)); // LSR`	`\|\|(i_op == 4'h5)); // LSR`

`ifdef LONG_MPY	`ifdef LONG_MPY
`reg mpyhi;`	`reg mpyhi;`
`wire mpybusy;`	`wire mpybusy;`
`endif	`endif

`// A 4-way multiplexer can be done in one 6-LUT.`	`// A 4-way multiplexer can be done in one 6-LUT.`
`// A 16-way multiplexer can therefore be done in 4x 6-LUT's with`	`// A 16-way multiplexer can therefore be done in 4x 6-LUT's with`
`// the Xilinx multiplexer fabric that follows.`	`// the Xilinx multiplexer fabric that follows.`
`// Given that we wish to apply this multiplexer approach to 33-bits,`	`// Given that we wish to apply this multiplexer approach to 33-bits,`
`// this will cost a minimum of 132 6-LUTs.`	`// this will cost a minimum of 132 6-LUTs.`
`generate`	`generate`
`if (IMPLEMENT_MPY == 0)`	`if (IMPLEMENT_MPY == 0)`
`begin`	`begin`
`always @(posedge i_clk)`	`always @(posedge i_clk)`
`if (i_ce)`	`if (i_ce)`
`begin`	`begin`
`pre_sign <= (i_a[31]);`	`pre_sign <= (i_a[31]);`
`c <= 1'b0;`	`c <= 1'b0;`
`casez(i_op)`	`casez(i_op)`
`4'b0000:{c,o_c } <= {1'b0,i_a}-{1'b0,i_b};// CMP/SUB`	`4'b0000:{c,o_c } <= {1'b0,i_a}-{1'b0,i_b};// CMP/SUB`
`4'b0001: o_c <= i_a & i_b; // BTST/And`	`4'b0001: o_c <= i_a & i_b; // BTST/And`
`4'b0010:{c,o_c } <= i_a + i_b; // Add`	`4'b0010:{c,o_c } <= i_a + i_b; // Add`
`4'b0011: o_c <= i_a \| i_b; // Or`	`4'b0011: o_c <= i_a \| i_b; // Or`
`4'b0100: o_c <= i_a ^ i_b; // Xor`	`4'b0100: o_c <= i_a ^ i_b; // Xor`
`4'b0101:{o_c,c } <= w_lsr_result[32:0]; // LSR`	`4'b0101:{o_c,c } <= w_lsr_result[32:0]; // LSR`
`4'b0110:{c,o_c } <= (\|i_b[31:5])? 33'h00 : {1'b0, i_a } << i_b[4:0]; // LSL`	`4'b0110:{c,o_c } <= (\|i_b[31:5])? 33'h00 : {1'b0, i_a } << i_b[4:0]; // LSL`
`4'b0111:{o_c,c } <= w_asr_result[32:0]; // ASR`	`4'b0111:{o_c,c } <= w_asr_result[32:0]; // ASR`
`ifndef LONG_MPY	`ifndef LONG_MPY
`4'b1000: o_c <= { i_b[15: 0], i_a[15:0] }; // LODIHI`	`4'b1000: o_c <= { i_b[15: 0], i_a[15:0] }; // LODIHI`
`endif	`endif
`4'b1001: o_c <= { i_a[31:16], i_b[15:0] }; // LODILO`	`4'b1001: o_c <= { i_a[31:16], i_b[15:0] }; // LODILO`
`// 4'h1010: The unimplemented MPYU,`	`// 4'h1010: The unimplemented MPYU,`
`// 4'h1011: and here for the unimplemented MPYS`	`// 4'h1011: and here for the unimplemented MPYS`
`4'b1100: o_c <= w_brev_result; // BREV`	`4'b1100: o_c <= w_brev_result; // BREV`
`4'b1101: o_c <= w_popc_result; // POPC`	`4'b1101: o_c <= w_popc_result; // POPC`
`4'b1110: o_c <= w_rol_result; // ROL`	`4'b1110: o_c <= w_rol_result; // ROL`
`default: o_c <= i_b; // MOV, LDI`	`default: o_c <= i_b; // MOV, LDI`
`endcase`	`endcase`
`end`	`end`

`assign o_busy = 1'b0;`	`assign o_busy = 1'b0;`

`reg r_illegal;`	`reg r_illegal;`
`always @(posedge i_clk)`	`always @(posedge i_clk)`
`r_illegal <= (i_ce)&&((i_op == 4'ha)\|\|(i_op == 4'hb)`	`r_illegal <= (i_ce)&&((i_op == 4'ha)\|\|(i_op == 4'hb)`
`ifdef LONG_MPY	`ifdef LONG_MPY
`\|\|(i_op == 4'h8)`	`\|\|(i_op == 4'h8)`
`endif	`endif
`);`	`);`
`assign o_illegal = r_illegal;`	`assign o_illegal = r_illegal;`
`end else begin`	`end else begin`
`//`	`//`
`// Multiply pre-logic`	`// Multiply pre-logic`
`//`	`//`
`ifdef LONG_MPY	`ifdef LONG_MPY
`reg [63:0] r_mpy_result;`	`reg [63:0] r_mpy_result;`
`if (IMPLEMENT_MPY == 1)`	`if (IMPLEMENT_MPY == 1)`
`begin // Our two clock option (one clock extra)`	`begin // Our two clock option (one clock extra)`
`reg signed [64:0] r_mpy_a_input, r_mpy_b_input;`	`reg signed [64:0] r_mpy_a_input, r_mpy_b_input;`
`reg mpypipe, x;`	`reg mpypipe, x;`
`initial mpypipe = 1'b0;`	`initial mpypipe = 1'b0;`
`always @(posedge i_clk)`	`always @(posedge i_clk)`
`mpypipe <= (i_ce)&&((i_op[3:1]==3'h5)\|\|(i_op[3:0]==4'h8));`	`mpypipe <= (i_ce)&&((i_op[3:1]==3'h5)\|\|(i_op[3:0]==4'h8));`
`always @(posedge i_clk)`	`always @(posedge i_clk)`
`if (i_ce)`	`if (i_ce)`
`begin`	`begin`
`r_mpy_a_input <= {{(33){(i_a[31])&(i_op[0])}},`	`r_mpy_a_input <= {{(33){(i_a[31])&(i_op[0])}},`
`i_a[31:0]};`	`i_a[31:0]};`
`r_mpy_b_input <= {{(33){(i_b[31])&(i_op[0])}},`	`r_mpy_b_input <= {{(33){(i_b[31])&(i_op[0])}},`
`i_b[31:0]};`	`i_b[31:0]};`
`end`	`end`
`always @(posedge i_clk)`	`always @(posedge i_clk)`
`if (mpypipe)`	`if (mpypipe)`
`{x, r_mpy_result} = r_mpy_a_input`	`{x, r_mpy_result} = r_mpy_a_input`
`* r_mpy_b_input;`	`* r_mpy_b_input;`
`always @(posedge i_clk)`	`always @(posedge i_clk)`
`if (i_ce)`	`if (i_ce)`
`mpyhi = i_op[1];`	`mpyhi = i_op[1];`
`assign mpybusy = mpypipe;`	`assign mpybusy = mpypipe;`
`end else if (IMPLEMENT_MPY == 2)`	`end else if (IMPLEMENT_MPY == 2)`
`begin // The three clock option`	`begin // The three clock option`
`reg [31:0] r_mpy_a_input, r_mpy_b_input;`	`reg [31:0] r_mpy_a_input, r_mpy_b_input;`
`reg r_mpy_signed;`	`reg r_mpy_signed;`
`reg [1:0] mpypipe;`	`reg [1:0] mpypipe;`

`// First clock, latch in the inputs`	`// First clock, latch in the inputs`
`always @(posedge i_clk)`	`always @(posedge i_clk)`
`begin`	`begin`
`// mpypipe indicates we have a multiply in the`	`// mpypipe indicates we have a multiply in the`
`// pipeline. In this case, the multiply`	`// pipeline. In this case, the multiply`
`// pipeline is a two stage pipeline, so we need`	`// pipeline is a two stage pipeline, so we need`
`// two bits in the pipe.`	`// two bits in the pipe.`
`mpypipe[0] <= (i_ce)&&((i_op[3:1]==3'h5)`	`mpypipe[0] <= (i_ce)&&((i_op[3:1]==3'h5)`
`\|\|(i_op[3:0]==4'h8));`	`\|\|(i_op[3:0]==4'h8));`
`mpypipe[1] <= mpypipe[0];`	`mpypipe[1] <= mpypipe[0];`

`if (i_op[0]) // i.e. if signed multiply`	`if (i_op[0]) // i.e. if signed multiply`
`begin`	`begin`
`r_mpy_a_input <= {(~i_a[31]),i_a[30:0]};`	`r_mpy_a_input <= {(~i_a[31]),i_a[30:0]};`
`r_mpy_b_input <= {(~i_b[31]),i_b[30:0]};`	`r_mpy_b_input <= {(~i_b[31]),i_b[30:0]};`
`end else begin`	`end else begin`
`r_mpy_a_input <= i_a[31:0];`	`r_mpy_a_input <= i_a[31:0];`
`r_mpy_b_input <= i_b[31:0];`	`r_mpy_b_input <= i_b[31:0];`
`end`	`end`
`// The signed bit really only matters in the`	`// The signed bit really only matters in the`
`// case of 64 bit multiply. We'll keep track`	`// case of 64 bit multiply. We'll keep track`
`// of it, though, and pretend in all other`	`// of it, though, and pretend in all other`
`// cases.`	`// cases.`
`r_mpy_signed <= i_op[0];`	`r_mpy_signed <= i_op[0];`

`if (i_ce)`	`if (i_ce)`
`mpyhi = i_op[1];`	`mpyhi = i_op[1];`
`end`	`end`

`assign mpybusy = \|mpypipe;`	`assign mpybusy = \|mpypipe;`

`// Second clock, do the multiplies, get the "partial`	`// Second clock, do the multiplies, get the "partial`
`// products". Here, we break our input up into two`	`// products". Here, we break our input up into two`
`// halves,`	`// halves,`
`//`	`//`
`// A = (2^16 ah + al)`	`// A = (2^16 ah + al)`
`// B = (2^16 bh + bl)`	`// B = (2^16 bh + bl)`
`//`	`//`
`// and use these to compute partial products.`	`// and use these to compute partial products.`
`//`	`//`
`// AB = (2^32 ahbh + 2^16 (ahbl + albh) + (albl)`	`// AB = (2^32 ahbh + 2^16 (ahbl + albh) + (albl)`
`//`	`//`
`// Since we're following the FOIL algorithm to get here,`	`// Since we're following the FOIL algorithm to get here,`
`// we'll name these partial products according to FOIL.`	`// we'll name these partial products according to FOIL.`
`//`	`//`
`// The trick is what happens if A or B is signed. In`	`// The trick is what happens if A or B is signed. In`
`// those cases, the real value of A will not be given by`	`// those cases, the real value of A will not be given by`
`// A = (2^16 ah + al)`	`// A = (2^16 ah + al)`
`// but rather`	`// but rather`
`// A = (2^16 ah[31^] + al) - 2^31`	`// A = (2^16 ah[31^] + al) - 2^31`
`// (where we have flipped the sign bit of A)`	`// (where we have flipped the sign bit of A)`
`// and so ...`	`// and so ...`
`//`	`//`
`// AB= (2^16 ah + al - 2^31) * (2^16 bh + bl - 2^31)`	`// AB= (2^16 ah + al - 2^31) * (2^16 bh + bl - 2^31)`
`// = 2^32(ah*bh)`	`// = 2^32(ah*bh)`
`// +2^16 (ahbl+albh)`	`// +2^16 (ahbl+albh)`
`// +(al*bl)`	`// +(al*bl)`
`// - 2^31 (2^16 bh+bl + 2^16 ah+al)`	`// - 2^31 (2^16 bh+bl + 2^16 ah+al)`
`// - 2^62`	`// - 2^62`
`// = 2^32(ah*bh)`	`// = 2^32(ah*bh)`
`// +2^16 (ahbl+albh)`	`// +2^16 (ahbl+albh)`
`// +(al*bl)`	`// +(al*bl)`
`// - 2^31 (2^16 bh+bl + 2^16 ah+al + 2^31)`	`// - 2^31 (2^16 bh+bl + 2^16 ah+al + 2^31)`
`//`	`//`
`reg [31:0] pp_f, pp_o, pp_i, pp_l;`	`reg [31:0] pp_f, pp_l; // F and L from FOIL`
	`reg [32:0] pp_oi; // The O and I from FOIL`
`reg [32:0] pp_s;`	`reg [32:0] pp_s;`
`always @(posedge i_clk)`	`always @(posedge i_clk)`
`begin`	`begin`
`pp_f<=r_mpy_a_input[31:16]*r_mpy_b_input[31:16];`	`pp_f<=r_mpy_a_input[31:16]*r_mpy_b_input[31:16];`
`pp_o<=r_mpy_a_input[31:16]*r_mpy_b_input[15: 0];`	`pp_oi<=r_mpy_a_input[31:16]*r_mpy_b_input[15: 0]`
`pp_i<=r_mpy_a_input[15: 0]*r_mpy_b_input[31:16];`	`+ r_mpy_a_input[15: 0]*r_mpy_b_input[31:16];`
`pp_l<=r_mpy_a_input[15: 0]*r_mpy_b_input[15: 0];`	`pp_l<=r_mpy_a_input[15: 0]*r_mpy_b_input[15: 0];`
`// And a special one for the sign`	`// And a special one for the sign`
`if (r_mpy_signed)`	`if (r_mpy_signed)`
`pp_s <= 32'h8000_0000-(`	`pp_s <= 32'h8000_0000-(`
`r_mpy_a_input[31:0]`	`r_mpy_a_input[31:0]`
`+ r_mpy_b_input[31:0]);`	`+ r_mpy_b_input[31:0]);`
`else`	`else`
`pp_s <= 33'h0;`	`pp_s <= 33'h0;`
`end`	`end`

`// Third clock, add the results and produce a product`	`// Third clock, add the results and produce a product`
`always @(posedge i_clk)`	`always @(posedge i_clk)`
`begin`	`begin`
`r_mpy_result[15:0] <= pp_l[15:0];`	`r_mpy_result[15:0] <= pp_l[15:0];`
`r_mpy_result[63:16] <=`	`r_mpy_result[63:16] <=`
`{ 32'h00, pp_l[31:16] }`	`{ 32'h00, pp_l[31:16] }`
`+ { 16'h00, pp_o }`	`+ { 15'h00, pp_oi }`
`+ { 16'h00, pp_i }`
`+ { pp_s, 15'h00 }`	`+ { pp_s, 15'h00 }`
`+ { pp_f, 16'h00 };`	`+ { pp_f, 16'h00 };`
`end`	`end`
`end // Fourth clock -- results are available for writeback.`	`end // Fourth clock -- results are available for writeback.`
`else	`else
`wire signed [16:0] w_mpy_a_input, w_mpy_b_input;`	`wire signed [16:0] w_mpy_a_input, w_mpy_b_input;`
`wire [33:0] w_mpy_result;`	`wire [33:0] w_mpy_result;`
`reg [31:0] r_mpy_result;`	`reg [31:0] r_mpy_result;`
`assign w_mpy_a_input ={ ((i_a[15])&(i_op[0])), i_a[15:0] };`	`assign w_mpy_a_input ={ ((i_a[15])&(i_op[0])), i_a[15:0] };`
`assign w_mpy_b_input ={ ((i_b[15])&(i_op[0])), i_b[15:0] };`	`assign w_mpy_b_input ={ ((i_b[15])&(i_op[0])), i_b[15:0] };`
`assign w_mpy_result = w_mpy_a_input * w_mpy_b_input;`	`assign w_mpy_result = w_mpy_a_input * w_mpy_b_input;`
`always @(posedge i_clk)`	`always @(posedge i_clk)`
`if (i_ce)`	`if (i_ce)`
`r_mpy_result = w_mpy_result[31:0];`	`r_mpy_result = w_mpy_result[31:0];`
`endif	`endif

`//`	`//`
`// The master ALU case statement`	`// The master ALU case statement`
`//`	`//`
`always @(posedge i_clk)`	`always @(posedge i_clk)`
`if (i_ce)`	`if (i_ce)`
`begin`	`begin`
`pre_sign <= (i_a[31]);`	`pre_sign <= (i_a[31]);`
`c <= 1'b0;`	`c <= 1'b0;`
`casez(i_op)`	`casez(i_op)`
`4'b0000:{c,o_c } <= {1'b0,i_a}-{1'b0,i_b};// CMP/SUB`	`4'b0000:{c,o_c } <= {1'b0,i_a}-{1'b0,i_b};// CMP/SUB`
`4'b0001: o_c <= i_a & i_b; // BTST/And`	`4'b0001: o_c <= i_a & i_b; // BTST/And`
`4'b0010:{c,o_c } <= i_a + i_b; // Add`	`4'b0010:{c,o_c } <= i_a + i_b; // Add`
`4'b0011: o_c <= i_a \| i_b; // Or`	`4'b0011: o_c <= i_a \| i_b; // Or`
`4'b0100: o_c <= i_a ^ i_b; // Xor`	`4'b0100: o_c <= i_a ^ i_b; // Xor`
`4'b0101:{o_c,c } <= w_lsr_result[32:0]; // LSR`	`4'b0101:{o_c,c } <= w_lsr_result[32:0]; // LSR`
`4'b0110:{c,o_c } <= (\|i_b[31:5])? 33'h00 : {1'b0, i_a } << i_b[4:0]; // LSL`	`4'b0110:{c,o_c } <= (\|i_b[31:5])? 33'h00 : {1'b0, i_a } << i_b[4:0]; // LSL`
`4'b0111:{o_c,c } <= w_asr_result[32:0]; // ASR`	`4'b0111:{o_c,c } <= w_asr_result[32:0]; // ASR`
`ifdef LONG_MPY	`ifdef LONG_MPY
`4'b1000: o_c <= r_mpy_result[31:0]; // MPY`	`4'b1000: o_c <= r_mpy_result[31:0]; // MPY`
`else	`else
`4'b1000: o_c <= { i_b[15: 0], i_a[15:0] }; // LODIHI`	`4'b1000: o_c <= { i_b[15: 0], i_a[15:0] }; // LODIHI`
`endif	`endif
`4'b1001: o_c <= { i_a[31:16], i_b[15:0] }; // LODILO`	`4'b1001: o_c <= { i_a[31:16], i_b[15:0] }; // LODILO`
`ifdef LONG_MPY	`ifdef LONG_MPY
`4'b1010: o_c <= r_mpy_result[63:32]; // MPYHU`	`4'b1010: o_c <= r_mpy_result[63:32]; // MPYHU`
`4'b1011: o_c <= r_mpy_result[63:32]; // MPYHS`	`4'b1011: o_c <= r_mpy_result[63:32]; // MPYHS`
`else	`else
`4'b1010: o_c <= r_mpy_result; // MPYU`	`4'b1010: o_c <= r_mpy_result; // MPYU`
`4'b1011: o_c <= r_mpy_result; // MPYS`	`4'b1011: o_c <= r_mpy_result; // MPYS`
`endif	`endif
`4'b1100: o_c <= w_brev_result; // BREV`	`4'b1100: o_c <= w_brev_result; // BREV`
`4'b1101: o_c <= w_popc_result; // POPC`	`4'b1101: o_c <= w_popc_result; // POPC`
`4'b1110: o_c <= w_rol_result; // ROL`	`4'b1110: o_c <= w_rol_result; // ROL`
`default: o_c <= i_b; // MOV, LDI`	`default: o_c <= i_b; // MOV, LDI`
`endcase`	`endcase`
`end else if (r_busy)`	`end else if (r_busy)`
`ifdef LONG_MPY	`ifdef LONG_MPY
`o_c <= (mpyhi)?r_mpy_result[63:32]:r_mpy_result[31:0];`	`o_c <= (mpyhi)?r_mpy_result[63:32]:r_mpy_result[31:0];`
`else	`else
`o_c <= r_mpy_result;`	`o_c <= r_mpy_result;`
`endif	`endif

`reg r_busy;`	`reg r_busy;`
`initial r_busy = 1'b0;`	`initial r_busy = 1'b0;`
`always @(posedge i_clk)`	`always @(posedge i_clk)`
`r_busy <= (~i_rst)&&(i_ce)&&(i_valid)`	`r_busy <= (~i_rst)&&(i_ce)&&(i_valid)`
`ifdef LONG_MPY	`ifdef LONG_MPY
`&&((i_op[3:1] == 3'h5)`	`&&((i_op[3:1] == 3'h5)`
`\|\|(i_op[3:0] == 4'h8))\|\|mpybusy;`	`\|\|(i_op[3:0] == 4'h8))\|\|mpybusy;`
`else	`else
`&&(i_op[3:1] == 3'h5);`	`&&(i_op[3:1] == 3'h5);`
`endif	`endif

`assign o_busy = r_busy;`	`assign o_busy = r_busy;`

`assign o_illegal = 1'b0;`	`assign o_illegal = 1'b0;`
`end endgenerate`	`end endgenerate`

`assign z = (o_c == 32'h0000);`	`assign z = (o_c == 32'h0000);`
`assign n = (o_c[31]);`	`assign n = (o_c[31]);`
`assign v = (set_ovfl)&&(pre_sign != o_c[31]);`	`assign v = (set_ovfl)&&(pre_sign != o_c[31]);`

`assign o_f = { v, n, c, z };`	`assign o_f = { v, n, c, z };`

`initial o_valid = 1'b0;`	`initial o_valid = 1'b0;`
`always @(posedge i_clk)`	`always @(posedge i_clk)`
`if (i_rst)`	`if (i_rst)`
`o_valid <= 1'b0;`	`o_valid <= 1'b0;`
`else`	`else`
`o_valid <= (i_ce)&&(i_valid)`	`o_valid <= (i_ce)&&(i_valid)`
`ifdef LONG_MPY	`ifdef LONG_MPY
`&&(i_op[3:1] != 3'h5)&&(i_op[3:0] != 4'h8)`	`&&(i_op[3:1] != 3'h5)&&(i_op[3:0] != 4'h8)`
`\|\|(o_busy)&&(~mpybusy);`	`\|\|(o_busy)&&(~mpybusy);`
`else	`else
`&&(i_op[3:1] != 3'h5)\|\|(o_busy);`	`&&(i_op[3:1] != 3'h5)\|\|(o_busy);`
`endif	`endif
`endmodule`	`endmodule`

///////////////////////////////////////////////////////////////////////////

///////////////////////////////////////////////////////////////////////////

//

//

// Filename:    cpuops.v

// Filename:    cpuops.v

//

//

// Project:     Zip CPU -- a small, lightweight, RISC CPU soft core

// Project:     Zip CPU -- a small, lightweight, RISC CPU soft core

//

//

// Purpose:     This supports the instruction set reordering of operations

// Purpose:     This supports the instruction set reordering of operations

//              created by the second generation instruction set, as well as

//              created by the second generation instruction set, as well as

//      the new operations of POPC (population count) and BREV (bit reversal).

//      the new operations of POPC (population count) and BREV (bit reversal).

//

//

//

//

// Creator:     Dan Gisselquist, Ph.D.

// Creator:     Dan Gisselquist, Ph.D.

//              Gisselquist Technology, LLC

//              Gisselquist Technology, LLC

//

//

///////////////////////////////////////////////////////////////////////////

///////////////////////////////////////////////////////////////////////////

//

//

// Copyright (C) 2015, Gisselquist Technology, LLC

// Copyright (C) 2015, Gisselquist Technology, LLC

//

//

// This program is free software (firmware): you can redistribute it and/or

// This program is free software (firmware): you can redistribute it and/or

// modify it under the terms of  the GNU General Public License as published

// modify it under the terms of  the GNU General Public License as published

// by the Free Software Foundation, either version 3 of the License, or (at

// by the Free Software Foundation, either version 3 of the License, or (at

// your option) any later version.

// your option) any later version.

//

//

// This program is distributed in the hope that it will be useful, but WITHOUT

// This program is distributed in the hope that it will be useful, but WITHOUT

// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or

// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or

// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

// for more details.

// for more details.

//

//

// License:     GPL, v3, as defined and found on www.gnu.org,

// License:     GPL, v3, as defined and found on www.gnu.org,

//              http://www.gnu.org/licenses/gpl.html

//              http://www.gnu.org/licenses/gpl.html

//

//

//

//

///////////////////////////////////////////////////////////////////////////

///////////////////////////////////////////////////////////////////////////

//

//

// `define      LONG_MPY

`define LONG_MPY

module  cpuops(i_clk,i_rst, i_ce, i_valid, i_op, i_a, i_b, o_c, o_f, o_valid,

module  cpuops(i_clk,i_rst, i_ce, i_valid, i_op, i_a, i_b, o_c, o_f, o_valid,

                        o_illegal, o_busy);

                        o_illegal, o_busy);

        parameter       IMPLEMENT_MPY = 1;

        parameter       IMPLEMENT_MPY = 1;

        input           i_clk, i_rst, i_ce;

        input           i_clk, i_rst, i_ce;

        input           [3:0]    i_op;

        input           [3:0]    i_op;

        input           [31:0]   i_a, i_b;

        input           [31:0]   i_a, i_b;

        input                   i_valid;

        input                   i_valid;

        output  reg     [31:0]   o_c;

        output  reg     [31:0]   o_c;

        output  wire    [3:0]    o_f;

        output  wire    [3:0]    o_f;

        output  reg             o_valid;

        output  reg             o_valid;

        output  wire            o_illegal;

        output  wire            o_illegal;

        output  wire            o_busy;

        output  wire            o_busy;

        // Rotate-left pre-logic

        // Rotate-left pre-logic

        wire    [63:0]   w_rol_tmp;

        wire    [63:0]   w_rol_tmp;

        assign  w_rol_tmp = { i_a, i_a } << i_b[4:0];

        assign  w_rol_tmp = { i_a, i_a } << i_b[4:0];

        wire    [31:0]   w_rol_result;

        wire    [31:0]   w_rol_result;

        assign  w_rol_result = w_rol_tmp[63:32]; // Won't set flags

        assign  w_rol_result = w_rol_tmp[63:32]; // Won't set flags

        // Shift register pre-logic

        // Shift register pre-logic

        wire    [32:0]           w_lsr_result, w_asr_result;

        wire    [32:0]           w_lsr_result, w_asr_result;

        assign  w_asr_result = (|i_b[31:5])? {(33){i_a[31]}}

        assign  w_asr_result = (|i_b[31:5])? {(33){i_a[31]}}

                                : ( {i_a, 1'b0 } >>> (i_b[4:0]) );// ASR

                                : ( {i_a, 1'b0 } >>> (i_b[4:0]) );// ASR

        assign  w_lsr_result = (|i_b[31:5])? 33'h00

        assign  w_lsr_result = (|i_b[31:5])? 33'h00

                                : ( { i_a, 1'b0 } >> (i_b[4:0]) );// LSR

                                : ( { i_a, 1'b0 } >> (i_b[4:0]) );// LSR

        // Bit reversal pre-logic

        // Bit reversal pre-logic

        wire    [31:0]   w_brev_result;

        wire    [31:0]   w_brev_result;

        genvar  k;

        genvar  k;

        generate

        generate

        for(k=0; k<32; k=k+1)

        for(k=0; k<32; k=k+1)

        begin : bit_reversal_cpuop

        begin : bit_reversal_cpuop

                assign w_brev_result[k] = i_b[31-k];

                assign w_brev_result[k] = i_b[31-k];

        end endgenerate

        end endgenerate

        // Popcount pre-logic

        // Popcount pre-logic

        wire    [31:0]   w_popc_result;

        wire    [31:0]   w_popc_result;

        assign  w_popc_result[5:0]=

        assign  w_popc_result[5:0]=

                 ({5'h0,i_b[ 0]}+{5'h0,i_b[ 1]}+{5'h0,i_b[ 2]}+{5'h0,i_b[ 3]})

                 ({5'h0,i_b[ 0]}+{5'h0,i_b[ 1]}+{5'h0,i_b[ 2]}+{5'h0,i_b[ 3]})

                +({5'h0,i_b[ 4]}+{5'h0,i_b[ 5]}+{5'h0,i_b[ 6]}+{5'h0,i_b[ 7]})

                +({5'h0,i_b[ 4]}+{5'h0,i_b[ 5]}+{5'h0,i_b[ 6]}+{5'h0,i_b[ 7]})

                +({5'h0,i_b[ 8]}+{5'h0,i_b[ 9]}+{5'h0,i_b[10]}+{5'h0,i_b[11]})

                +({5'h0,i_b[ 8]}+{5'h0,i_b[ 9]}+{5'h0,i_b[10]}+{5'h0,i_b[11]})

                +({5'h0,i_b[12]}+{5'h0,i_b[13]}+{5'h0,i_b[14]}+{5'h0,i_b[15]})

                +({5'h0,i_b[12]}+{5'h0,i_b[13]}+{5'h0,i_b[14]}+{5'h0,i_b[15]})

                +({5'h0,i_b[16]}+{5'h0,i_b[17]}+{5'h0,i_b[18]}+{5'h0,i_b[19]})

                +({5'h0,i_b[16]}+{5'h0,i_b[17]}+{5'h0,i_b[18]}+{5'h0,i_b[19]})

                +({5'h0,i_b[20]}+{5'h0,i_b[21]}+{5'h0,i_b[22]}+{5'h0,i_b[23]})

                +({5'h0,i_b[20]}+{5'h0,i_b[21]}+{5'h0,i_b[22]}+{5'h0,i_b[23]})

                +({5'h0,i_b[24]}+{5'h0,i_b[25]}+{5'h0,i_b[26]}+{5'h0,i_b[27]})

                +({5'h0,i_b[24]}+{5'h0,i_b[25]}+{5'h0,i_b[26]}+{5'h0,i_b[27]})

                +({5'h0,i_b[28]}+{5'h0,i_b[29]}+{5'h0,i_b[30]}+{5'h0,i_b[31]});

                +({5'h0,i_b[28]}+{5'h0,i_b[29]}+{5'h0,i_b[30]}+{5'h0,i_b[31]});

        assign  w_popc_result[31:6] = 26'h00;

        assign  w_popc_result[31:6] = 26'h00;

        // Prelogic for our flags registers

        // Prelogic for our flags registers

        wire    z, n, v;

        wire    z, n, v;

        reg     c, pre_sign, set_ovfl;

        reg     c, pre_sign, set_ovfl;

        always @(posedge i_clk)

        always @(posedge i_clk)

                if (i_ce) // 1 LUT

                if (i_ce) // 1 LUT

                        set_ovfl =(((i_op==4'h0)&&(i_a[31] != i_b[31]))//SUB&CMP

                        set_ovfl =(((i_op==4'h0)&&(i_a[31] != i_b[31]))//SUB&CMP

                                ||((i_op==4'h2)&&(i_a[31] == i_b[31])) // ADD

                                ||((i_op==4'h2)&&(i_a[31] == i_b[31])) // ADD

                                ||(i_op == 4'h6) // LSL

                                ||(i_op == 4'h6) // LSL

                                ||(i_op == 4'h5)); // LSR

                                ||(i_op == 4'h5)); // LSR

`ifdef  LONG_MPY

`ifdef  LONG_MPY

        reg     mpyhi;

        reg     mpyhi;

        wire    mpybusy;

        wire    mpybusy;

`endif

`endif

        // A 4-way multiplexer can be done in one 6-LUT.

        // A 4-way multiplexer can be done in one 6-LUT.

        // A 16-way multiplexer can therefore be done in 4x 6-LUT's with

        // A 16-way multiplexer can therefore be done in 4x 6-LUT's with

        //      the Xilinx multiplexer fabric that follows.

        //      the Xilinx multiplexer fabric that follows.

        // Given that we wish to apply this multiplexer approach to 33-bits,

        // Given that we wish to apply this multiplexer approach to 33-bits,

        // this will cost a minimum of 132 6-LUTs.

        // this will cost a minimum of 132 6-LUTs.

        generate

        generate

        if (IMPLEMENT_MPY == 0)

        if (IMPLEMENT_MPY == 0)

        begin

        begin

                always @(posedge i_clk)

                always @(posedge i_clk)

                if (i_ce)

                if (i_ce)

                begin

                begin

                        pre_sign <= (i_a[31]);

                        pre_sign <= (i_a[31]);

                        c <= 1'b0;

                        c <= 1'b0;

                        casez(i_op)

                        casez(i_op)

                        4'b0000:{c,o_c } <= {1'b0,i_a}-{1'b0,i_b};// CMP/SUB

                        4'b0000:{c,o_c } <= {1'b0,i_a}-{1'b0,i_b};// CMP/SUB

                        4'b0001:   o_c   <= i_a & i_b;          // BTST/And

                        4'b0001:   o_c   <= i_a & i_b;          // BTST/And

                        4'b0010:{c,o_c } <= i_a + i_b;          // Add

                        4'b0010:{c,o_c } <= i_a + i_b;          // Add

                        4'b0011:   o_c   <= i_a | i_b;          // Or

                        4'b0011:   o_c   <= i_a | i_b;          // Or

                        4'b0100:   o_c   <= i_a ^ i_b;          // Xor

                        4'b0100:   o_c   <= i_a ^ i_b;          // Xor

                        4'b0101:{o_c,c } <= w_lsr_result[32:0];  // LSR

                        4'b0101:{o_c,c } <= w_lsr_result[32:0];  // LSR

                        4'b0110:{c,o_c } <= (|i_b[31:5])? 33'h00 : {1'b0, i_a } << i_b[4:0];     // LSL

                        4'b0110:{c,o_c } <= (|i_b[31:5])? 33'h00 : {1'b0, i_a } << i_b[4:0];     // LSL

                        4'b0111:{o_c,c } <= w_asr_result[32:0];  // ASR

                        4'b0111:{o_c,c } <= w_asr_result[32:0];  // ASR

`ifndef LONG_MPY

`ifndef LONG_MPY

                        4'b1000:   o_c   <= { i_b[15: 0], i_a[15:0] }; // LODIHI

                        4'b1000:   o_c   <= { i_b[15: 0], i_a[15:0] }; // LODIHI

`endif

`endif

                        4'b1001:   o_c   <= { i_a[31:16], i_b[15:0] }; // LODILO

                        4'b1001:   o_c   <= { i_a[31:16], i_b[15:0] }; // LODILO

                        // 4'h1010: The unimplemented MPYU,

                        // 4'h1010: The unimplemented MPYU,

                        // 4'h1011: and here for the unimplemented MPYS

                        // 4'h1011: and here for the unimplemented MPYS

                        4'b1100:   o_c   <= w_brev_result;      // BREV

                        4'b1100:   o_c   <= w_brev_result;      // BREV

                        4'b1101:   o_c   <= w_popc_result;      // POPC

                        4'b1101:   o_c   <= w_popc_result;      // POPC

                        4'b1110:   o_c   <= w_rol_result;       // ROL

                        4'b1110:   o_c   <= w_rol_result;       // ROL

                        default:   o_c   <= i_b;                // MOV, LDI

                        default:   o_c   <= i_b;                // MOV, LDI

                        endcase

                        endcase

end

end

                assign o_busy = 1'b0;

                assign o_busy = 1'b0;

                reg     r_illegal;

                reg     r_illegal;

                always @(posedge i_clk)

                always @(posedge i_clk)

                        r_illegal <= (i_ce)&&((i_op == 4'ha)||(i_op == 4'hb)

                        r_illegal <= (i_ce)&&((i_op == 4'ha)||(i_op == 4'hb)

`ifdef  LONG_MPY

`ifdef  LONG_MPY

                                ||(i_op == 4'h8)

                                ||(i_op == 4'h8)

`endif

`endif

);

);

                assign o_illegal = r_illegal;

                assign o_illegal = r_illegal;

        end else begin

        end else begin

//

//

                // Multiply pre-logic

                // Multiply pre-logic

//

//

`ifdef  LONG_MPY

`ifdef  LONG_MPY

                reg     [63:0]   r_mpy_result;

                reg     [63:0]   r_mpy_result;

                if (IMPLEMENT_MPY == 1)

                if (IMPLEMENT_MPY == 1)

                begin // Our two clock option (one clock extra)

                begin // Our two clock option (one clock extra)

                        reg     signed  [64:0]   r_mpy_a_input, r_mpy_b_input;

                        reg     signed  [64:0]   r_mpy_a_input, r_mpy_b_input;

                        reg                     mpypipe, x;

                        reg                     mpypipe, x;

                        initial mpypipe = 1'b0;

                        initial mpypipe = 1'b0;

                        always @(posedge i_clk)

                        always @(posedge i_clk)

                                mpypipe <= (i_ce)&&((i_op[3:1]==3'h5)||(i_op[3:0]==4'h8));

                                mpypipe <= (i_ce)&&((i_op[3:1]==3'h5)||(i_op[3:0]==4'h8));

                        always @(posedge i_clk)

                        always @(posedge i_clk)

                        if (i_ce)

                        if (i_ce)

                        begin

                        begin

                                r_mpy_a_input <= {{(33){(i_a[31])&(i_op[0])}},

                                r_mpy_a_input <= {{(33){(i_a[31])&(i_op[0])}},

                                                        i_a[31:0]};

                                                        i_a[31:0]};

                                r_mpy_b_input <= {{(33){(i_b[31])&(i_op[0])}},

                                r_mpy_b_input <= {{(33){(i_b[31])&(i_op[0])}},

                                                        i_b[31:0]};

                                                        i_b[31:0]};

end

end

                        always @(posedge i_clk)

                        always @(posedge i_clk)

                                if (mpypipe)

                                if (mpypipe)

                                        {x, r_mpy_result} = r_mpy_a_input

                                        {x, r_mpy_result} = r_mpy_a_input

                                                        * r_mpy_b_input;

                                                        * r_mpy_b_input;

                        always @(posedge i_clk)

                        always @(posedge i_clk)

                                if (i_ce)

                                if (i_ce)

                                        mpyhi  = i_op[1];

                                        mpyhi  = i_op[1];

                        assign  mpybusy = mpypipe;

                        assign  mpybusy = mpypipe;

                end else if (IMPLEMENT_MPY == 2)

                end else if (IMPLEMENT_MPY == 2)

                begin // The three clock option

                begin // The three clock option

                        reg     [31:0]   r_mpy_a_input, r_mpy_b_input;

                        reg     [31:0]   r_mpy_a_input, r_mpy_b_input;

                        reg             r_mpy_signed;

                        reg             r_mpy_signed;

                        reg     [1:0]    mpypipe;

                        reg     [1:0]    mpypipe;

                        // First clock, latch in the inputs

                        // First clock, latch in the inputs

                        always @(posedge i_clk)

                        always @(posedge i_clk)

                        begin

                        begin

                                // mpypipe indicates we have a multiply in the

                                // mpypipe indicates we have a multiply in the

                                // pipeline.  In this case, the multiply

                                // pipeline.  In this case, the multiply

                                // pipeline is a two stage pipeline, so we need

                                // pipeline is a two stage pipeline, so we need

                                // two bits in the pipe.

                                // two bits in the pipe.

                                mpypipe[0] <= (i_ce)&&((i_op[3:1]==3'h5)

                                mpypipe[0] <= (i_ce)&&((i_op[3:1]==3'h5)

                                                        ||(i_op[3:0]==4'h8));

                                                        ||(i_op[3:0]==4'h8));

                                mpypipe[1] <= mpypipe[0];

                                mpypipe[1] <= mpypipe[0];

                                if (i_op[0]) // i.e. if signed multiply

                                if (i_op[0]) // i.e. if signed multiply

                                begin

                                begin

                                        r_mpy_a_input <= {(~i_a[31]),i_a[30:0]};

                                        r_mpy_a_input <= {(~i_a[31]),i_a[30:0]};

                                        r_mpy_b_input <= {(~i_b[31]),i_b[30:0]};

                                        r_mpy_b_input <= {(~i_b[31]),i_b[30:0]};

                                end else begin

                                end else begin

                                        r_mpy_a_input <= i_a[31:0];

                                        r_mpy_a_input <= i_a[31:0];

                                        r_mpy_b_input <= i_b[31:0];

                                        r_mpy_b_input <= i_b[31:0];

end

end

                                // The signed bit really only matters in the

                                // The signed bit really only matters in the

                                // case of 64 bit multiply.  We'll keep track

                                // case of 64 bit multiply.  We'll keep track

                                // of it, though, and pretend in all other

                                // of it, though, and pretend in all other

                                // cases.

                                // cases.

                                r_mpy_signed  <= i_op[0];

                                r_mpy_signed  <= i_op[0];

                                if (i_ce)

                                if (i_ce)

                                        mpyhi  = i_op[1];

                                        mpyhi  = i_op[1];

end

end

                        assign  mpybusy = |mpypipe;

                        assign  mpybusy = |mpypipe;

                        // Second clock, do the multiplies, get the "partial

                        // Second clock, do the multiplies, get the "partial

                        // products".  Here, we break our input up into two

                        // products".  Here, we break our input up into two

                        // halves,

                        // halves,

//

//

                        //   A  = (2^16 ah + al)

                        //   A  = (2^16 ah + al)

                        //   B  = (2^16 bh + bl)

                        //   B  = (2^16 bh + bl)

//

//

                        // and use these to compute partial products.

                        // and use these to compute partial products.

//

//

                        //   AB = (2^32 ah*bh + 2^16 (ah*bl + al*bh) + (al*bl)

                        //   AB = (2^32 ah*bh + 2^16 (ah*bl + al*bh) + (al*bl)

//

//

                        // Since we're following the FOIL algorithm to get here,

                        // Since we're following the FOIL algorithm to get here,

                        // we'll name these partial products according to FOIL.

                        // we'll name these partial products according to FOIL.

//

//

                        // The trick is what happens if A or B is signed.  In

                        // The trick is what happens if A or B is signed.  In

                        // those cases, the real value of A will not be given by

                        // those cases, the real value of A will not be given by

                        //      A = (2^16 ah + al)

                        //      A = (2^16 ah + al)

                        // but rather

                        // but rather

                        //      A = (2^16 ah[31^] + al) - 2^31

                        //      A = (2^16 ah[31^] + al) - 2^31

                        //  (where we have flipped the sign bit of A)

                        //  (where we have flipped the sign bit of A)

                        // and so ...

                        // and so ...

//

//

                        // AB= (2^16 ah + al - 2^31) * (2^16 bh + bl - 2^31)

                        // AB= (2^16 ah + al - 2^31) * (2^16 bh + bl - 2^31)

                        //      = 2^32(ah*bh)

                        //      = 2^32(ah*bh)

                        //              +2^16 (ah*bl+al*bh)

                        //              +2^16 (ah*bl+al*bh)

                        //              +(al*bl)

                        //              +(al*bl)

                        //              - 2^31 (2^16 bh+bl + 2^16 ah+al)

                        //              - 2^31 (2^16 bh+bl + 2^16 ah+al)

                        //              - 2^62

                        //              - 2^62

                        //      = 2^32(ah*bh)

                        //      = 2^32(ah*bh)

                        //              +2^16 (ah*bl+al*bh)

                        //              +2^16 (ah*bl+al*bh)

                        //              +(al*bl)

                        //              +(al*bl)

                        //              - 2^31 (2^16 bh+bl + 2^16 ah+al + 2^31)

                        //              - 2^31 (2^16 bh+bl + 2^16 ah+al + 2^31)

//

//

                        reg     [31:0]   pp_f, pp_o, pp_i, pp_l;

                        reg     [31:0]   pp_f, pp_l; // F and L from FOIL

                        reg     [32:0]   pp_oi; // The O and I from FOIL

                        reg     [32:0]   pp_s;

                        reg     [32:0]   pp_s;

                        always @(posedge i_clk)

                        always @(posedge i_clk)

                        begin

                        begin

                                pp_f<=r_mpy_a_input[31:16]*r_mpy_b_input[31:16];

                                pp_f<=r_mpy_a_input[31:16]*r_mpy_b_input[31:16];

                                pp_o<=r_mpy_a_input[31:16]*r_mpy_b_input[15: 0];

                                pp_oi<=r_mpy_a_input[31:16]*r_mpy_b_input[15: 0]

                                pp_i<=r_mpy_a_input[15: 0]*r_mpy_b_input[31:16];

                                        + r_mpy_a_input[15: 0]*r_mpy_b_input[31:16];

                                pp_l<=r_mpy_a_input[15: 0]*r_mpy_b_input[15: 0];

                                pp_l<=r_mpy_a_input[15: 0]*r_mpy_b_input[15: 0];

                                // And a special one for the sign

                                // And a special one for the sign

                                if (r_mpy_signed)

                                if (r_mpy_signed)

                                        pp_s <= 32'h8000_0000-(

                                        pp_s <= 32'h8000_0000-(

                                                r_mpy_a_input[31:0]

                                                r_mpy_a_input[31:0]

                                                + r_mpy_b_input[31:0]);

                                                + r_mpy_b_input[31:0]);

                                else

                                else

                                        pp_s <= 33'h0;

                                        pp_s <= 33'h0;

end

end

                        // Third clock, add the results and produce a product

                        // Third clock, add the results and produce a product

                        always @(posedge i_clk)

                        always @(posedge i_clk)

                        begin

                        begin

                                r_mpy_result[15:0] <= pp_l[15:0];

                                r_mpy_result[15:0] <= pp_l[15:0];

                                r_mpy_result[63:16] <=

                                r_mpy_result[63:16] <=

                                        { 32'h00, pp_l[31:16] }

                                        { 32'h00, pp_l[31:16] }

                                        + { 16'h00, pp_o }

                                        + { 15'h00, pp_oi }

                                        + { 16'h00, pp_i }

                                        + { pp_s, 15'h00 }

                                        + { pp_s, 15'h00 }

                                        + { pp_f, 16'h00 };

                                        + { pp_f, 16'h00 };

end

end

                end // Fourth clock -- results are available for writeback.

                end // Fourth clock -- results are available for writeback.

`else

`else

                wire    signed  [16:0]   w_mpy_a_input, w_mpy_b_input;

                wire    signed  [16:0]   w_mpy_a_input, w_mpy_b_input;

                wire            [33:0]   w_mpy_result;

                wire            [33:0]   w_mpy_result;

                reg             [31:0]   r_mpy_result;

                reg             [31:0]   r_mpy_result;

                assign  w_mpy_a_input ={ ((i_a[15])&(i_op[0])), i_a[15:0] };

                assign  w_mpy_a_input ={ ((i_a[15])&(i_op[0])), i_a[15:0] };

                assign  w_mpy_b_input ={ ((i_b[15])&(i_op[0])), i_b[15:0] };

                assign  w_mpy_b_input ={ ((i_b[15])&(i_op[0])), i_b[15:0] };

                assign  w_mpy_result   = w_mpy_a_input * w_mpy_b_input;

                assign  w_mpy_result   = w_mpy_a_input * w_mpy_b_input;

                always @(posedge i_clk)

                always @(posedge i_clk)

                        if (i_ce)

                        if (i_ce)

                                r_mpy_result  = w_mpy_result[31:0];

                                r_mpy_result  = w_mpy_result[31:0];

`endif

`endif

//

//

                // The master ALU case statement

                // The master ALU case statement

//

//

                always @(posedge i_clk)

                always @(posedge i_clk)

                if (i_ce)

                if (i_ce)

                begin

                begin

                        pre_sign <= (i_a[31]);

                        pre_sign <= (i_a[31]);

                        c <= 1'b0;

                        c <= 1'b0;

                        casez(i_op)

                        casez(i_op)

                        4'b0000:{c,o_c } <= {1'b0,i_a}-{1'b0,i_b};// CMP/SUB

                        4'b0000:{c,o_c } <= {1'b0,i_a}-{1'b0,i_b};// CMP/SUB

                        4'b0001:   o_c   <= i_a & i_b;          // BTST/And

                        4'b0001:   o_c   <= i_a & i_b;          // BTST/And

                        4'b0010:{c,o_c } <= i_a + i_b;          // Add

                        4'b0010:{c,o_c } <= i_a + i_b;          // Add

                        4'b0011:   o_c   <= i_a | i_b;          // Or

                        4'b0011:   o_c   <= i_a | i_b;          // Or

                        4'b0100:   o_c   <= i_a ^ i_b;          // Xor

                        4'b0100:   o_c   <= i_a ^ i_b;          // Xor

                        4'b0101:{o_c,c } <= w_lsr_result[32:0];  // LSR

                        4'b0101:{o_c,c } <= w_lsr_result[32:0];  // LSR

                        4'b0110:{c,o_c } <= (|i_b[31:5])? 33'h00 : {1'b0, i_a } << i_b[4:0];     // LSL

                        4'b0110:{c,o_c } <= (|i_b[31:5])? 33'h00 : {1'b0, i_a } << i_b[4:0];     // LSL

                        4'b0111:{o_c,c } <= w_asr_result[32:0];  // ASR

                        4'b0111:{o_c,c } <= w_asr_result[32:0];  // ASR

`ifdef  LONG_MPY

`ifdef  LONG_MPY

                        4'b1000:   o_c   <= r_mpy_result[31:0]; // MPY

                        4'b1000:   o_c   <= r_mpy_result[31:0]; // MPY

`else

`else

                        4'b1000:   o_c   <= { i_b[15: 0], i_a[15:0] }; // LODIHI

                        4'b1000:   o_c   <= { i_b[15: 0], i_a[15:0] }; // LODIHI

`endif

`endif

                        4'b1001:   o_c   <= { i_a[31:16], i_b[15:0] }; // LODILO

                        4'b1001:   o_c   <= { i_a[31:16], i_b[15:0] }; // LODILO

`ifdef  LONG_MPY

`ifdef  LONG_MPY

                        4'b1010:   o_c   <= r_mpy_result[63:32]; // MPYHU

                        4'b1010:   o_c   <= r_mpy_result[63:32]; // MPYHU

                        4'b1011:   o_c   <= r_mpy_result[63:32]; // MPYHS

                        4'b1011:   o_c   <= r_mpy_result[63:32]; // MPYHS

`else

`else

                        4'b1010:   o_c   <= r_mpy_result; // MPYU

                        4'b1010:   o_c   <= r_mpy_result; // MPYU

                        4'b1011:   o_c   <= r_mpy_result; // MPYS

                        4'b1011:   o_c   <= r_mpy_result; // MPYS

`endif

`endif

                        4'b1100:   o_c   <= w_brev_result;      // BREV

                        4'b1100:   o_c   <= w_brev_result;      // BREV

                        4'b1101:   o_c   <= w_popc_result;      // POPC

                        4'b1101:   o_c   <= w_popc_result;      // POPC

                        4'b1110:   o_c   <= w_rol_result;       // ROL

                        4'b1110:   o_c   <= w_rol_result;       // ROL

                        default:   o_c   <= i_b;                // MOV, LDI

                        default:   o_c   <= i_b;                // MOV, LDI

                        endcase

                        endcase

                end else if (r_busy)

                end else if (r_busy)

`ifdef  LONG_MPY

`ifdef  LONG_MPY

                        o_c <= (mpyhi)?r_mpy_result[63:32]:r_mpy_result[31:0];

                        o_c <= (mpyhi)?r_mpy_result[63:32]:r_mpy_result[31:0];

`else

`else

                        o_c <= r_mpy_result;

                        o_c <= r_mpy_result;

`endif

`endif

                reg     r_busy;

                reg     r_busy;

                initial r_busy = 1'b0;

                initial r_busy = 1'b0;

                always @(posedge i_clk)

                always @(posedge i_clk)

                        r_busy <= (~i_rst)&&(i_ce)&&(i_valid)

                        r_busy <= (~i_rst)&&(i_ce)&&(i_valid)

`ifdef  LONG_MPY

`ifdef  LONG_MPY

                                        &&((i_op[3:1] == 3'h5)

                                        &&((i_op[3:1] == 3'h5)

                                                ||(i_op[3:0] == 4'h8))||mpybusy;

                                                ||(i_op[3:0] == 4'h8))||mpybusy;

`else

`else

                                        &&(i_op[3:1] == 3'h5);

                                        &&(i_op[3:1] == 3'h5);

`endif

`endif

                assign o_busy = r_busy;

                assign o_busy = r_busy;

                assign o_illegal = 1'b0;

                assign o_illegal = 1'b0;

        end endgenerate

        end endgenerate

        assign  z = (o_c == 32'h0000);

        assign  z = (o_c == 32'h0000);

        assign  n = (o_c[31]);

        assign  n = (o_c[31]);

        assign  v = (set_ovfl)&&(pre_sign != o_c[31]);

        assign  v = (set_ovfl)&&(pre_sign != o_c[31]);

        assign  o_f = { v, n, c, z };

        assign  o_f = { v, n, c, z };

        initial o_valid = 1'b0;

        initial o_valid = 1'b0;

        always @(posedge i_clk)

        always @(posedge i_clk)

                if (i_rst)

                if (i_rst)

                        o_valid <= 1'b0;

                        o_valid <= 1'b0;

                else

                else

                        o_valid <= (i_ce)&&(i_valid)

                        o_valid <= (i_ce)&&(i_valid)

`ifdef  LONG_MPY

`ifdef  LONG_MPY

                                &&(i_op[3:1] != 3'h5)&&(i_op[3:0] != 4'h8)

                                &&(i_op[3:1] != 3'h5)&&(i_op[3:0] != 4'h8)

                                ||(o_busy)&&(~mpybusy);

                                ||(o_busy)&&(~mpybusy);

`else

`else

                                &&(i_op[3:1] != 3'h5)||(o_busy);

                                &&(i_op[3:1] != 3'h5)||(o_busy);

`endif

`endif

endmodule

endmodule

Browse

Tools

Subversion Repositories zipcpu

[/] [zipcpu/] [trunk/] [rtl/] [core/] [cpuops.v] - Diff between revs 133 and 138