URL https://opencores.org/ocsvn/openarty/openarty/trunk

Subversion Repositories openarty

[/] [openarty/] [trunk/] [rtl/] [cpu/] [cpuops.v] - Blame information for rev 20

Go to most recent revision | Details | Compare with Previous | View Log


///////////////////////////////////////////////////////////////////////////
//
// Filename:    cpuops.v
//
// Project:     Zip CPU -- a small, lightweight, RISC CPU soft core
//
// Purpose:     This supports the instruction set reordering of operations
//              created by the second generation instruction set, as well as
//      the new operations of POPC (population count) and BREV (bit reversal).
//
//
// Creator:     Dan Gisselquist, Ph.D.
//              Gisselquist Technology, LLC
//
///////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2015, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of  the GNU General Public License as published
// by the Free Software Foundation, either version 3 of the License, or (at
// your option) any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
// for more details.
//
// License:     GPL, v3, as defined and found on www.gnu.org,
//              http://www.gnu.org/licenses/gpl.html
//
//
///////////////////////////////////////////////////////////////////////////
//
`define LONG_MPY
module  cpuops(i_clk,i_rst, i_ce, i_valid, i_op, i_a, i_b, o_c, o_f, o_valid,
                        o_illegal, o_busy);
        parameter       IMPLEMENT_MPY = 1;
        input           i_clk, i_rst, i_ce;
        input           [3:0]    i_op;
        input           [31:0]   i_a, i_b;
        input                   i_valid;
        output  reg     [31:0]   o_c;
        output  wire    [3:0]    o_f;
        output  reg             o_valid;
        output  wire            o_illegal;
        output  wire            o_busy;
 
        // Rotate-left pre-logic
        wire    [63:0]   w_rol_tmp;
        assign  w_rol_tmp = { i_a, i_a } << i_b[4:0];
        wire    [31:0]   w_rol_result;
        assign  w_rol_result = w_rol_tmp[63:32]; // Won't set flags
 
        // Shift register pre-logic
        wire    [32:0]           w_lsr_result, w_asr_result, w_lsl_result;
        wire    signed  [32:0]   w_pre_asr_input, w_pre_asr_shifted;
        assign  w_pre_asr_input = { i_a, 1'b0 };
        assign  w_pre_asr_shifted = w_pre_asr_input >>> i_b[4:0];
        assign  w_asr_result = (|i_b[31:5])? {(33){i_a[31]}}
                                : w_pre_asr_shifted;// ASR
        assign  w_lsr_result = ((|i_b[31:6])||(i_b[5]&&(i_b[4:0]!=0)))? 33'h00
                                :((i_b[5])?{32'h0,i_a[31]}
 
                                : ( { i_a, 1'b0 } >> (i_b[4:0]) ));// LSR
        assign  w_lsl_result = ((|i_b[31:6])||(i_b[5]&&(i_b[4:0]!=0)))? 33'h00
                                :((i_b[5])?{i_a[0], 32'h0}
                                : ({1'b0, i_a } << i_b[4:0]));   // LSL
 
        // Bit reversal pre-logic
        wire    [31:0]   w_brev_result;
        genvar  k;
        generate
        for(k=0; k<32; k=k+1)
        begin : bit_reversal_cpuop
                assign w_brev_result[k] = i_b[31-k];
        end endgenerate
 
        // Popcount pre-logic
        wire    [31:0]   w_popc_result;
        assign  w_popc_result[5:0]=
                 ({5'h0,i_b[ 0]}+{5'h0,i_b[ 1]}+{5'h0,i_b[ 2]}+{5'h0,i_b[ 3]})
                +({5'h0,i_b[ 4]}+{5'h0,i_b[ 5]}+{5'h0,i_b[ 6]}+{5'h0,i_b[ 7]})
                +({5'h0,i_b[ 8]}+{5'h0,i_b[ 9]}+{5'h0,i_b[10]}+{5'h0,i_b[11]})
                +({5'h0,i_b[12]}+{5'h0,i_b[13]}+{5'h0,i_b[14]}+{5'h0,i_b[15]})
                +({5'h0,i_b[16]}+{5'h0,i_b[17]}+{5'h0,i_b[18]}+{5'h0,i_b[19]})
                +({5'h0,i_b[20]}+{5'h0,i_b[21]}+{5'h0,i_b[22]}+{5'h0,i_b[23]})
                +({5'h0,i_b[24]}+{5'h0,i_b[25]}+{5'h0,i_b[26]}+{5'h0,i_b[27]})
                +({5'h0,i_b[28]}+{5'h0,i_b[29]}+{5'h0,i_b[30]}+{5'h0,i_b[31]});
        assign  w_popc_result[31:6] = 26'h00;
 
        // Prelogic for our flags registers
        wire    z, n, v;
        reg     c, pre_sign, set_ovfl;
        always @(posedge i_clk)
                if (i_ce) // 1 LUT
                        set_ovfl =(((i_op==4'h0)&&(i_a[31] != i_b[31]))//SUB&CMP
                                ||((i_op==4'h2)&&(i_a[31] == i_b[31])) // ADD
                                ||(i_op == 4'h6) // LSL
                                ||(i_op == 4'h5)); // LSR
 
`ifdef  LONG_MPY
        reg     mpyhi;
        wire    mpybusy;
`endif
 
        // A 4-way multiplexer can be done in one 6-LUT.
        // A 16-way multiplexer can therefore be done in 4x 6-LUT's with
        //      the Xilinx multiplexer fabric that follows. 
        // Given that we wish to apply this multiplexer approach to 33-bits,
        // this will cost a minimum of 132 6-LUTs.
        generate
        if (IMPLEMENT_MPY == 0)
        begin
                always @(posedge i_clk)
                if (i_ce)
                begin
                        pre_sign <= (i_a[31]);
                        c <= 1'b0;
                        casez(i_op)
                        4'b0000:{c,o_c } <= {1'b0,i_a}-{1'b0,i_b};// CMP/SUB
                        4'b0001:   o_c   <= i_a & i_b;          // BTST/And
                        4'b0010:{c,o_c } <= i_a + i_b;          // Add
                        4'b0011:   o_c   <= i_a | i_b;          // Or
                        4'b0100:   o_c   <= i_a ^ i_b;          // Xor
                        4'b0101:{o_c,c } <= w_lsr_result[32:0];  // LSR
                        4'b0110:{c,o_c } <= w_lsl_result[32:0]; // LSL
                        4'b0111:{o_c,c } <= w_asr_result[32:0];  // ASR
`ifndef LONG_MPY
                        4'b1000:   o_c   <= { i_b[15: 0], i_a[15:0] }; // LODIHI
`endif
                        4'b1001:   o_c   <= { i_a[31:16], i_b[15:0] }; // LODILO
                        // 4'h1010: The unimplemented MPYU,
                        // 4'h1011: and here for the unimplemented MPYS
                        4'b1100:   o_c   <= w_brev_result;      // BREV
                        4'b1101:   o_c   <= w_popc_result;      // POPC
                        4'b1110:   o_c   <= w_rol_result;       // ROL
                        default:   o_c   <= i_b;                // MOV, LDI
                        endcase
                end
 
                assign o_busy = 1'b0;
 
                reg     r_illegal;
                always @(posedge i_clk)
                        r_illegal <= (i_ce)&&((i_op == 4'ha)||(i_op == 4'hb)
`ifdef  LONG_MPY
                                ||(i_op == 4'h8)
`endif
                        );
                assign o_illegal = r_illegal;
        end else begin
                //
                // Multiply pre-logic
                //
`ifdef  LONG_MPY
                reg     [63:0]   r_mpy_result;
                if (IMPLEMENT_MPY == 1)
                begin // Our two clock option (one clock extra)
                        reg     signed  [64:0]   r_mpy_a_input, r_mpy_b_input;
                        reg                     mpypipe, x;
                        initial mpypipe = 1'b0;
                        always @(posedge i_clk)
                                mpypipe <= (i_ce)&&((i_op[3:1]==3'h5)||(i_op[3:0]==4'h8));
                        always @(posedge i_clk)
                        if (i_ce)
                        begin
                                r_mpy_a_input <= {{(33){(i_a[31])&(i_op[0])}},
                                                        i_a[31:0]};
                                r_mpy_b_input <= {{(33){(i_b[31])&(i_op[0])}},
                                                        i_b[31:0]};
                        end
                        always @(posedge i_clk)
                                if (mpypipe)
                                        {x, r_mpy_result} = r_mpy_a_input
                                                        * r_mpy_b_input;
                        always @(posedge i_clk)
                                if (i_ce)
                                        mpyhi  = i_op[1];
                        assign  mpybusy = mpypipe;
                end else if (IMPLEMENT_MPY == 2)
                begin // The three clock option
                        reg     [31:0]   r_mpy_a_input, r_mpy_b_input;
                        reg             r_mpy_signed;
                        reg     [1:0]    mpypipe;
 
                        // First clock, latch in the inputs
                        always @(posedge i_clk)
                        begin
                                // mpypipe indicates we have a multiply in the
                                // pipeline.  In this case, the multiply
                                // pipeline is a two stage pipeline, so we need 
                                // two bits in the pipe.
                                mpypipe[0] <= (i_ce)&&((i_op[3:1]==3'h5)
                                                        ||(i_op[3:0]==4'h8));
                                mpypipe[1] <= mpypipe[0];
 
                                if (i_op[0]) // i.e. if signed multiply
                                begin
                                        r_mpy_a_input <= {(~i_a[31]),i_a[30:0]};
                                        r_mpy_b_input <= {(~i_b[31]),i_b[30:0]};
                                end else begin
                                        r_mpy_a_input <= i_a[31:0];
                                        r_mpy_b_input <= i_b[31:0];
                                end
                                // The signed bit really only matters in the
                                // case of 64 bit multiply.  We'll keep track
                                // of it, though, and pretend in all other
                                // cases.
                                r_mpy_signed  <= i_op[0];
 
                                if (i_ce)
                                        mpyhi  = i_op[1];
                        end
 
                        assign  mpybusy = |mpypipe;
 
                        // Second clock, do the multiplies, get the "partial
                        // products".  Here, we break our input up into two
                        // halves, 
                        //
                        //   A  = (2^16 ah + al)
                        //   B  = (2^16 bh + bl)
                        //
                        // and use these to compute partial products.
                        //
                        //   AB = (2^32 ah*bh + 2^16 (ah*bl + al*bh) + (al*bl)
                        //
                        // Since we're following the FOIL algorithm to get here,
                        // we'll name these partial products according to FOIL.
                        //
                        // The trick is what happens if A or B is signed.  In
                        // those cases, the real value of A will not be given by
                        //      A = (2^16 ah + al)
                        // but rather
                        //      A = (2^16 ah[31^] + al) - 2^31
                        //  (where we have flipped the sign bit of A)
                        // and so ...
                        //
                        // AB= (2^16 ah + al - 2^31) * (2^16 bh + bl - 2^31)
                        //      = 2^32(ah*bh)
                        //              +2^16 (ah*bl+al*bh)
                        //              +(al*bl)
                        //              - 2^31 (2^16 bh+bl + 2^16 ah+al)
                        //              - 2^62
                        //      = 2^32(ah*bh)
                        //              +2^16 (ah*bl+al*bh)
                        //              +(al*bl)
                        //              - 2^31 (2^16 bh+bl + 2^16 ah+al + 2^31)
                        //
                        reg     [31:0]   pp_f, pp_l; // F and L from FOIL
                        reg     [32:0]   pp_oi; // The O and I from FOIL
                        reg     [32:0]   pp_s;
                        always @(posedge i_clk)
                        begin
                                pp_f<=r_mpy_a_input[31:16]*r_mpy_b_input[31:16];
                                pp_oi<=r_mpy_a_input[31:16]*r_mpy_b_input[15: 0]
                                        + r_mpy_a_input[15: 0]*r_mpy_b_input[31:16];
                                pp_l<=r_mpy_a_input[15: 0]*r_mpy_b_input[15: 0];
                                // And a special one for the sign
                                if (r_mpy_signed)
                                        pp_s <= 32'h8000_0000-(
                                                r_mpy_a_input[31:0]
                                                + r_mpy_b_input[31:0]);
                                else
                                        pp_s <= 33'h0;
                        end
 
                        // Third clock, add the results and produce a product
                        always @(posedge i_clk)
                        begin
                                r_mpy_result[15:0] <= pp_l[15:0];
                                r_mpy_result[63:16] <=
                                        { 32'h00, pp_l[31:16] }
                                        + { 15'h00, pp_oi }
                                        + { pp_s, 15'h00 }
                                        + { pp_f, 16'h00 };
                        end
                end // Fourth clock -- results are available for writeback.
`else
                wire    signed  [16:0]   w_mpy_a_input, w_mpy_b_input;
                wire            [33:0]   w_mpy_result;
                reg             [31:0]   r_mpy_result;
                assign  w_mpy_a_input ={ ((i_a[15])&(i_op[0])), i_a[15:0] };
                assign  w_mpy_b_input ={ ((i_b[15])&(i_op[0])), i_b[15:0] };
                assign  w_mpy_result   = w_mpy_a_input * w_mpy_b_input;
                always @(posedge i_clk)
                        if (i_ce)
                                r_mpy_result  = w_mpy_result[31:0];
`endif
 
                //
                // The master ALU case statement
                //
                always @(posedge i_clk)
                if (i_ce)
                begin
                        pre_sign <= (i_a[31]);
                        c <= 1'b0;
                        casez(i_op)
                        4'b0000:{c,o_c } <= {1'b0,i_a}-{1'b0,i_b};// CMP/SUB
                        4'b0001:   o_c   <= i_a & i_b;          // BTST/And
                        4'b0010:{c,o_c } <= i_a + i_b;          // Add
                        4'b0011:   o_c   <= i_a | i_b;          // Or
                        4'b0100:   o_c   <= i_a ^ i_b;          // Xor
                        4'b0101:{o_c,c } <= w_lsr_result[32:0];  // LSR
                        4'b0110:{c,o_c } <= w_lsl_result[32:0]; // LSL
                        4'b0111:{o_c,c } <= w_asr_result[32:0];  // ASR
`ifdef  LONG_MPY
                        4'b1000:   o_c   <= r_mpy_result[31:0]; // MPY
`else
                        4'b1000:   o_c   <= { i_b[15: 0], i_a[15:0] }; // LODIHI
`endif
                        4'b1001:   o_c   <= { i_a[31:16], i_b[15:0] }; // LODILO
`ifdef  LONG_MPY
                        4'b1010:   o_c   <= r_mpy_result[63:32]; // MPYHU
                        4'b1011:   o_c   <= r_mpy_result[63:32]; // MPYHS
`else
                        4'b1010:   o_c   <= r_mpy_result; // MPYU
                        4'b1011:   o_c   <= r_mpy_result; // MPYS
`endif
                        4'b1100:   o_c   <= w_brev_result;      // BREV
                        4'b1101:   o_c   <= w_popc_result;      // POPC
                        4'b1110:   o_c   <= w_rol_result;       // ROL
                        default:   o_c   <= i_b;                // MOV, LDI
                        endcase
                end else if (r_busy)
`ifdef  LONG_MPY
                        o_c <= (mpyhi)?r_mpy_result[63:32]:r_mpy_result[31:0];
`else
                        o_c <= r_mpy_result;
`endif
 
                reg     r_busy;
                initial r_busy = 1'b0;
                always @(posedge i_clk)
                        r_busy <= (~i_rst)&&(i_ce)&&(i_valid)
`ifdef  LONG_MPY
                                        &&((i_op[3:1] == 3'h5)
                                                ||(i_op[3:0] == 4'h8))||mpybusy;
`else
                                        &&(i_op[3:1] == 3'h5);
`endif
 
                assign o_busy = r_busy;
 
                assign o_illegal = 1'b0;
        end endgenerate
 
        assign  z = (o_c == 32'h0000);
        assign  n = (o_c[31]);
        assign  v = (set_ovfl)&&(pre_sign != o_c[31]);
 
        assign  o_f = { v, n, c, z };
 
        initial o_valid = 1'b0;
        always @(posedge i_clk)
                if (i_rst)
                        o_valid <= 1'b0;
                else
                        o_valid <= (i_ce)&&(i_valid)
`ifdef  LONG_MPY
                                &&(i_op[3:1] != 3'h5)&&(i_op[3:0] != 4'h8)
                                ||(o_busy)&&(~mpybusy);
`else
                                &&(i_op[3:1] != 3'h5)||(o_busy);
`endif
endmodule

Browse

Tools

Subversion Repositories openarty

[/] [openarty/] [trunk/] [rtl/] [cpu/] [cpuops.v] - Blame information for rev 20

Line No.	Rev	Author	Line
1	3	dgisselq	`///////////////////////////////////////////////////////////////////////////`
2			`//`
3			`// Filename: cpuops.v`
4			`//`
5			`// Project: Zip CPU -- a small, lightweight, RISC CPU soft core`
6			`//`
7			`// Purpose: This supports the instruction set reordering of operations`
8			`// created by the second generation instruction set, as well as`
9			`// the new operations of POPC (population count) and BREV (bit reversal).`
10			`//`
11			`//`
12			`// Creator: Dan Gisselquist, Ph.D.`
13			`// Gisselquist Technology, LLC`
14			`//`
15			`///////////////////////////////////////////////////////////////////////////`
16			`//`
17			`// Copyright (C) 2015, Gisselquist Technology, LLC`
18			`//`
19			`// This program is free software (firmware): you can redistribute it and/or`
20			`// modify it under the terms of the GNU General Public License as published`
21			`// by the Free Software Foundation, either version 3 of the License, or (at`
22			`// your option) any later version.`
23			`//`
24			`// This program is distributed in the hope that it will be useful, but WITHOUT`
25			`// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or`
26			`// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License`
27			`// for more details.`
28			`//`
29			`// License: GPL, v3, as defined and found on www.gnu.org,`
30			`// http://www.gnu.org/licenses/gpl.html`
31			`//`
32			`//`
33			`///////////////////////////////////////////////////////////////////////////`
34			`//`
35			`define LONG_MPY
36			`module cpuops(i_clk,i_rst, i_ce, i_valid, i_op, i_a, i_b, o_c, o_f, o_valid,`
37			`o_illegal, o_busy);`
38			`parameter IMPLEMENT_MPY = 1;`
39			`input i_clk, i_rst, i_ce;`
40			`input [3:0] i_op;`
41			`input [31:0] i_a, i_b;`
42			`input i_valid;`
43			`output reg [31:0] o_c;`
44			`output wire [3:0] o_f;`
45			`output reg o_valid;`
46			`output wire o_illegal;`
47			`output wire o_busy;`
48
49			`// Rotate-left pre-logic`
50			`wire [63:0] w_rol_tmp;`
51			`assign w_rol_tmp = { i_a, i_a } << i_b[4:0];`
52			`wire [31:0] w_rol_result;`
53			`assign w_rol_result = w_rol_tmp[63:32]; // Won't set flags`
54
55			`// Shift register pre-logic`
56			`wire [32:0] w_lsr_result, w_asr_result, w_lsl_result;`
57			`wire signed [32:0] w_pre_asr_input, w_pre_asr_shifted;`
58			`assign w_pre_asr_input = { i_a, 1'b0 };`
59			`assign w_pre_asr_shifted = w_pre_asr_input >>> i_b[4:0];`
60			`assign w_asr_result = (\|i_b[31:5])? {(33){i_a[31]}}`
61			`: w_pre_asr_shifted;// ASR`
62			`assign w_lsr_result = ((\|i_b[31:6])\|\|(i_b[5]&&(i_b[4:0]!=0)))? 33'h00`
63			`:((i_b[5])?{32'h0,i_a[31]}`
64
65			`: ( { i_a, 1'b0 } >> (i_b[4:0]) ));// LSR`
66			`assign w_lsl_result = ((\|i_b[31:6])\|\|(i_b[5]&&(i_b[4:0]!=0)))? 33'h00`
67			`:((i_b[5])?{i_a[0], 32'h0}`
68			`: ({1'b0, i_a } << i_b[4:0])); // LSL`
69
70			`// Bit reversal pre-logic`
71			`wire [31:0] w_brev_result;`
72			`genvar k;`
73			`generate`
74			`for(k=0; k<32; k=k+1)`
75			`begin : bit_reversal_cpuop`
76			`assign w_brev_result[k] = i_b[31-k];`
77			`end endgenerate`
78
79			`// Popcount pre-logic`
80			`wire [31:0] w_popc_result;`
81			`assign w_popc_result[5:0]=`
82			`({5'h0,i_b[ 0]}+{5'h0,i_b[ 1]}+{5'h0,i_b[ 2]}+{5'h0,i_b[ 3]})`
83			`+({5'h0,i_b[ 4]}+{5'h0,i_b[ 5]}+{5'h0,i_b[ 6]}+{5'h0,i_b[ 7]})`
84			`+({5'h0,i_b[ 8]}+{5'h0,i_b[ 9]}+{5'h0,i_b[10]}+{5'h0,i_b[11]})`
85			`+({5'h0,i_b[12]}+{5'h0,i_b[13]}+{5'h0,i_b[14]}+{5'h0,i_b[15]})`
86			`+({5'h0,i_b[16]}+{5'h0,i_b[17]}+{5'h0,i_b[18]}+{5'h0,i_b[19]})`
87			`+({5'h0,i_b[20]}+{5'h0,i_b[21]}+{5'h0,i_b[22]}+{5'h0,i_b[23]})`
88			`+({5'h0,i_b[24]}+{5'h0,i_b[25]}+{5'h0,i_b[26]}+{5'h0,i_b[27]})`
89			`+({5'h0,i_b[28]}+{5'h0,i_b[29]}+{5'h0,i_b[30]}+{5'h0,i_b[31]});`
90			`assign w_popc_result[31:6] = 26'h00;`
91
92			`// Prelogic for our flags registers`
93			`wire z, n, v;`
94			`reg c, pre_sign, set_ovfl;`
95			`always @(posedge i_clk)`
96			`if (i_ce) // 1 LUT`
97			`set_ovfl =(((i_op==4'h0)&&(i_a[31] != i_b[31]))//SUB&CMP`
98			`\|\|((i_op==4'h2)&&(i_a[31] == i_b[31])) // ADD`
99			`\|\|(i_op == 4'h6) // LSL`
100			`\|\|(i_op == 4'h5)); // LSR`
101
102			`ifdef LONG_MPY
103			`reg mpyhi;`
104			`wire mpybusy;`
105			`endif
106
107			`// A 4-way multiplexer can be done in one 6-LUT.`
108			`// A 16-way multiplexer can therefore be done in 4x 6-LUT's with`
109			`// the Xilinx multiplexer fabric that follows.`
110			`// Given that we wish to apply this multiplexer approach to 33-bits,`
111			`// this will cost a minimum of 132 6-LUTs.`
112			`generate`
113			`if (IMPLEMENT_MPY == 0)`
114			`begin`
115			`always @(posedge i_clk)`
116			`if (i_ce)`
117			`begin`
118			`pre_sign <= (i_a[31]);`
119			`c <= 1'b0;`
120			`casez(i_op)`
121			`4'b0000:{c,o_c } <= {1'b0,i_a}-{1'b0,i_b};// CMP/SUB`
122			`4'b0001: o_c <= i_a & i_b; // BTST/And`
123			`4'b0010:{c,o_c } <= i_a + i_b; // Add`
124			`4'b0011: o_c <= i_a \| i_b; // Or`
125			`4'b0100: o_c <= i_a ^ i_b; // Xor`
126			`4'b0101:{o_c,c } <= w_lsr_result[32:0]; // LSR`
127			`4'b0110:{c,o_c } <= w_lsl_result[32:0]; // LSL`
128			`4'b0111:{o_c,c } <= w_asr_result[32:0]; // ASR`
129			`ifndef LONG_MPY
130			`4'b1000: o_c <= { i_b[15: 0], i_a[15:0] }; // LODIHI`
131			`endif
132			`4'b1001: o_c <= { i_a[31:16], i_b[15:0] }; // LODILO`
133			`// 4'h1010: The unimplemented MPYU,`
134			`// 4'h1011: and here for the unimplemented MPYS`
135			`4'b1100: o_c <= w_brev_result; // BREV`
136			`4'b1101: o_c <= w_popc_result; // POPC`
137			`4'b1110: o_c <= w_rol_result; // ROL`
138			`default: o_c <= i_b; // MOV, LDI`
139			`endcase`
140			`end`
141
142			`assign o_busy = 1'b0;`
143
144			`reg r_illegal;`
145			`always @(posedge i_clk)`
146			`r_illegal <= (i_ce)&&((i_op == 4'ha)\|\|(i_op == 4'hb)`
147			`ifdef LONG_MPY
148			`\|\|(i_op == 4'h8)`
149			`endif
150			`);`
151			`assign o_illegal = r_illegal;`
152			`end else begin`
153			`//`
154			`// Multiply pre-logic`
155			`//`
156			`ifdef LONG_MPY
157			`reg [63:0] r_mpy_result;`
158			`if (IMPLEMENT_MPY == 1)`
159			`begin // Our two clock option (one clock extra)`
160			`reg signed [64:0] r_mpy_a_input, r_mpy_b_input;`
161			`reg mpypipe, x;`
162			`initial mpypipe = 1'b0;`
163			`always @(posedge i_clk)`
164			`mpypipe <= (i_ce)&&((i_op[3:1]==3'h5)\|\|(i_op[3:0]==4'h8));`
165			`always @(posedge i_clk)`
166			`if (i_ce)`
167			`begin`
168			`r_mpy_a_input <= {{(33){(i_a[31])&(i_op[0])}},`
169			`i_a[31:0]};`
170			`r_mpy_b_input <= {{(33){(i_b[31])&(i_op[0])}},`
171			`i_b[31:0]};`
172			`end`
173			`always @(posedge i_clk)`
174			`if (mpypipe)`
175			`{x, r_mpy_result} = r_mpy_a_input`
176			`* r_mpy_b_input;`
177			`always @(posedge i_clk)`
178			`if (i_ce)`
179			`mpyhi = i_op[1];`
180			`assign mpybusy = mpypipe;`
181			`end else if (IMPLEMENT_MPY == 2)`
182			`begin // The three clock option`
183			`reg [31:0] r_mpy_a_input, r_mpy_b_input;`
184			`reg r_mpy_signed;`
185			`reg [1:0] mpypipe;`
186
187			`// First clock, latch in the inputs`
188			`always @(posedge i_clk)`
189			`begin`
190			`// mpypipe indicates we have a multiply in the`
191			`// pipeline. In this case, the multiply`
192			`// pipeline is a two stage pipeline, so we need`
193			`// two bits in the pipe.`
194			`mpypipe[0] <= (i_ce)&&((i_op[3:1]==3'h5)`
195			`\|\|(i_op[3:0]==4'h8));`
196			`mpypipe[1] <= mpypipe[0];`
197
198			`if (i_op[0]) // i.e. if signed multiply`
199			`begin`
200			`r_mpy_a_input <= {(~i_a[31]),i_a[30:0]};`
201			`r_mpy_b_input <= {(~i_b[31]),i_b[30:0]};`
202			`end else begin`
203			`r_mpy_a_input <= i_a[31:0];`
204			`r_mpy_b_input <= i_b[31:0];`
205			`end`
206			`// The signed bit really only matters in the`
207			`// case of 64 bit multiply. We'll keep track`
208			`// of it, though, and pretend in all other`
209			`// cases.`
210			`r_mpy_signed <= i_op[0];`
211
212			`if (i_ce)`
213			`mpyhi = i_op[1];`
214			`end`
215
216			`assign mpybusy = \|mpypipe;`
217
218			`// Second clock, do the multiplies, get the "partial`
219			`// products". Here, we break our input up into two`
220			`// halves,`
221			`//`
222			`// A = (2^16 ah + al)`
223			`// B = (2^16 bh + bl)`
224			`//`
225			`// and use these to compute partial products.`
226			`//`
227			`// AB = (2^32 ahbh + 2^16 (ahbl + albh) + (albl)`
228			`//`
229			`// Since we're following the FOIL algorithm to get here,`
230			`// we'll name these partial products according to FOIL.`
231			`//`
232			`// The trick is what happens if A or B is signed. In`
233			`// those cases, the real value of A will not be given by`
234			`// A = (2^16 ah + al)`
235			`// but rather`
236			`// A = (2^16 ah[31^] + al) - 2^31`
237			`// (where we have flipped the sign bit of A)`
238			`// and so ...`
239			`//`
240			`// AB= (2^16 ah + al - 2^31) * (2^16 bh + bl - 2^31)`
241			`// = 2^32(ah*bh)`
242			`// +2^16 (ahbl+albh)`
243			`// +(al*bl)`
244			`// - 2^31 (2^16 bh+bl + 2^16 ah+al)`
245			`// - 2^62`
246			`// = 2^32(ah*bh)`
247			`// +2^16 (ahbl+albh)`
248			`// +(al*bl)`
249			`// - 2^31 (2^16 bh+bl + 2^16 ah+al + 2^31)`
250			`//`
251			`reg [31:0] pp_f, pp_l; // F and L from FOIL`
252			`reg [32:0] pp_oi; // The O and I from FOIL`
253			`reg [32:0] pp_s;`
254			`always @(posedge i_clk)`
255			`begin`
256			`pp_f<=r_mpy_a_input[31:16]*r_mpy_b_input[31:16];`
257			`pp_oi<=r_mpy_a_input[31:16]*r_mpy_b_input[15: 0]`
258			`+ r_mpy_a_input[15: 0]*r_mpy_b_input[31:16];`
259			`pp_l<=r_mpy_a_input[15: 0]*r_mpy_b_input[15: 0];`
260			`// And a special one for the sign`
261			`if (r_mpy_signed)`
262			`pp_s <= 32'h8000_0000-(`
263			`r_mpy_a_input[31:0]`
264			`+ r_mpy_b_input[31:0]);`
265			`else`
266			`pp_s <= 33'h0;`
267			`end`
268
269			`// Third clock, add the results and produce a product`
270			`always @(posedge i_clk)`
271			`begin`
272			`r_mpy_result[15:0] <= pp_l[15:0];`
273			`r_mpy_result[63:16] <=`
274			`{ 32'h00, pp_l[31:16] }`
275			`+ { 15'h00, pp_oi }`
276			`+ { pp_s, 15'h00 }`
277			`+ { pp_f, 16'h00 };`
278			`end`
279			`end // Fourth clock -- results are available for writeback.`
280			`else
281			`wire signed [16:0] w_mpy_a_input, w_mpy_b_input;`
282			`wire [33:0] w_mpy_result;`
283			`reg [31:0] r_mpy_result;`
284			`assign w_mpy_a_input ={ ((i_a[15])&(i_op[0])), i_a[15:0] };`
285			`assign w_mpy_b_input ={ ((i_b[15])&(i_op[0])), i_b[15:0] };`
286			`assign w_mpy_result = w_mpy_a_input * w_mpy_b_input;`
287			`always @(posedge i_clk)`
288			`if (i_ce)`
289			`r_mpy_result = w_mpy_result[31:0];`
290			`endif
291
292			`//`
293			`// The master ALU case statement`
294			`//`
295			`always @(posedge i_clk)`
296			`if (i_ce)`
297			`begin`
298			`pre_sign <= (i_a[31]);`
299			`c <= 1'b0;`
300			`casez(i_op)`
301			`4'b0000:{c,o_c } <= {1'b0,i_a}-{1'b0,i_b};// CMP/SUB`
302			`4'b0001: o_c <= i_a & i_b; // BTST/And`
303			`4'b0010:{c,o_c } <= i_a + i_b; // Add`
304			`4'b0011: o_c <= i_a \| i_b; // Or`
305			`4'b0100: o_c <= i_a ^ i_b; // Xor`
306			`4'b0101:{o_c,c } <= w_lsr_result[32:0]; // LSR`
307			`4'b0110:{c,o_c } <= w_lsl_result[32:0]; // LSL`
308			`4'b0111:{o_c,c } <= w_asr_result[32:0]; // ASR`
309			`ifdef LONG_MPY
310			`4'b1000: o_c <= r_mpy_result[31:0]; // MPY`
311			`else
312			`4'b1000: o_c <= { i_b[15: 0], i_a[15:0] }; // LODIHI`
313			`endif
314			`4'b1001: o_c <= { i_a[31:16], i_b[15:0] }; // LODILO`
315			`ifdef LONG_MPY
316			`4'b1010: o_c <= r_mpy_result[63:32]; // MPYHU`
317			`4'b1011: o_c <= r_mpy_result[63:32]; // MPYHS`
318			`else
319			`4'b1010: o_c <= r_mpy_result; // MPYU`
320			`4'b1011: o_c <= r_mpy_result; // MPYS`
321			`endif
322			`4'b1100: o_c <= w_brev_result; // BREV`
323			`4'b1101: o_c <= w_popc_result; // POPC`
324			`4'b1110: o_c <= w_rol_result; // ROL`
325			`default: o_c <= i_b; // MOV, LDI`
326			`endcase`
327			`end else if (r_busy)`
328			`ifdef LONG_MPY
329			`o_c <= (mpyhi)?r_mpy_result[63:32]:r_mpy_result[31:0];`
330			`else
331			`o_c <= r_mpy_result;`
332			`endif
333
334			`reg r_busy;`
335			`initial r_busy = 1'b0;`
336			`always @(posedge i_clk)`
337			`r_busy <= (~i_rst)&&(i_ce)&&(i_valid)`
338			`ifdef LONG_MPY
339			`&&((i_op[3:1] == 3'h5)`
340			`\|\|(i_op[3:0] == 4'h8))\|\|mpybusy;`
341			`else
342			`&&(i_op[3:1] == 3'h5);`
343			`endif
344
345			`assign o_busy = r_busy;`
346
347			`assign o_illegal = 1'b0;`
348			`end endgenerate`
349
350			`assign z = (o_c == 32'h0000);`
351			`assign n = (o_c[31]);`
352			`assign v = (set_ovfl)&&(pre_sign != o_c[31]);`
353
354			`assign o_f = { v, n, c, z };`
355
356			`initial o_valid = 1'b0;`
357			`always @(posedge i_clk)`
358			`if (i_rst)`
359			`o_valid <= 1'b0;`
360			`else`
361			`o_valid <= (i_ce)&&(i_valid)`
362			`ifdef LONG_MPY
363			`&&(i_op[3:1] != 3'h5)&&(i_op[3:0] != 4'h8)`
364			`\|\|(o_busy)&&(~mpybusy);`
365			`else
366			`&&(i_op[3:1] != 3'h5)\|\|(o_busy);`
367			`endif
368			`endmodule`