URL
https://opencores.org/ocsvn/rv01_riscv_core/rv01_riscv_core/trunk
Subversion Repositories rv01_riscv_core
[/] [rv01_riscv_core/] [trunk/] [VHDL/] [RV01_mulu.vhd] - Rev 2
Compare with Previous | Blame | View Log
----------------------------------------------------------------- -- -- ----------------------------------------------------------------- -- -- -- Copyright (C) 2015 Stefano Tonello -- -- -- -- This source file may be used and distributed without -- -- restriction provided that this copyright statement is not -- -- removed from the file and that any derivative work contains -- -- the original copyright notice and the associated disclaimer.-- -- -- -- THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY -- -- EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -- -- TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -- -- FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR -- -- OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -- -- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -- -- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE -- -- GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -- -- BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -- -- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -- -- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT -- -- OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -- -- POSSIBILITY OF SUCH DAMAGE. -- -- -- ----------------------------------------------------------------- --------------------------------------------------------------- -- RV01 4:2 compressor --------------------------------------------------------------- library IEEE; use IEEE.std_logic_1164.all; use IEEE.numeric_std.all; entity RV01_COMP42 is port( A_i : in std_logic_vector(4-1 downto 0); CI_i : in std_logic; S_o : out std_logic; C_o : out std_logic; CO_o : out std_logic ); end RV01_COMP42; architecture ARC of RV01_COMP42 is signal X01,X23,X03 : std_logic; begin X01 <= A_i(0) xor A_i(1); X23 <= A_i(2) xor A_i(3); X03 <= X01 xor X23; S_o <= X03 xor CI_i; C_o <= CI_i when (X03 = '1') else A_i(3); CO_o <= A_i(2) when (X01 = '1') else A_i(0); end ARC; --------------------------------------------------------------- -- RV01 multiplier (signed/unsigned/mix-signed) --------------------------------------------------------------- library IEEE; use IEEE.std_logic_1164.all; use IEEE.numeric_std.all; library WORK; use WORK.RV01_CONSTS_PKG.all; use WORK.RV01_TYPES_PKG.all; use WORK.RV01_FUNCS_PKG.all; use WORK.RV01_ARITH_PKG.all; entity RV01_MULTIPLIER is generic( LEN : natural := 32 ); port( CLK_i : in std_logic; MD_i : in signed(LEN-1 downto 0); MR_i : in signed(LEN-1 downto 0); OP_i : in std_logic_vector(2-1 downto 0); PROD_o : out signed(LEN*2-1 downto 0) ); end RV01_MULTIPLIER; architecture ARC of RV01_MULTIPLIER is constant LEN32 : natural := (LEN/2)*3; subtype ROW_T is std_logic_vector(LEN32-1 downto 0); subtype COL_T is std_logic_vector(4-1 downto 0); type ROW_MTRX_T is array(4-1 downto 0) of ROW_T; type COL_MTRX_T is array(LEN32-1 downto 0) of COL_T; component RV01_COMP42 is port( A_i : in std_logic_vector(4-1 downto 0); CI_i : in std_logic; S_o : out std_logic; C_o : out std_logic; CO_o : out std_logic ); end component; component RV01_ADDER_F is generic( LEN1 : integer := 16; LEN2 : integer := 16 ); port( OPA_i : in signed(LEN1+LEN2-1 downto 0); OPB_i : in signed(LEN1+LEN2-1 downto 0); CI_i : in std_logic; SUM_o : out signed(LEN1+LEN2-1 downto 0) ); end component; signal MDU,MRU,CF,CF_q : unsigned(LEN-1 downto 0); signal PRODU : unsigned(LEN*2 downto 0); signal PRODU0,PRODU0_q : unsigned(LEN-1 downto 0); signal PRODU1,PRODU1_q : unsigned(LEN-1 downto 0); signal PRODU2,PRODU2_q : unsigned(LEN-1 downto 0); signal PRODU3,PRODU3_q : unsigned(LEN-1 downto 0); signal CM : COL_MTRX_T; signal C,S,CY : std_logic_vector(LEN32 downto 0); signal PROD : signed(LEN*2-1 downto 0); signal PROD_CHK : unsigned(LEN*2-1 downto 0); signal ZERO : std_logic := '0'; -- Prevent merging of pipeline registers into -- DSP block attribute KEEP : string; attribute KEEP of PRODU0_q : signal is "true"; attribute KEEP of PRODU1_q : signal is "true"; attribute KEEP of PRODU2_q : signal is "true"; attribute KEEP of PRODU3_q : signal is "true"; begin -- convert multiplicand to unsigned MDU <= to_unsigned(MD_i); -- convert multiplier to unsigned MRU <= to_unsigned(MR_i); -- calculate partial products (each one is LEN bits long) PRODU0 <= MDU(LEN/2-1 downto 0) * MRU(LEN/2-1 downto 0); PRODU1 <= MDU(LEN-1 downto LEN/2) * MRU(LEN/2-1 downto 0); PRODU2 <= MDU(LEN/2-1 downto 0) * MRU(LEN-1 downto LEN/2); PRODU3 <= MDU(LEN-1 downto LEN/2) * MRU(LEN-1 downto LEN/2); -- pipeline registers process(CLK_i) begin if(CLK_i = '1' and CLK_i'event) then PRODU0_q <= PRODU0; PRODU1_q <= PRODU1; PRODU2_q <= PRODU2; PRODU3_q <= PRODU3; end if; end process; -- Partial products and correction factor must be summed up -- in the following fashion (each digit is a byte): -- -- 0000 + -- 1111 + -- 2222 + -- 3333 + -- FFFF -- -------- -- PPPPPPPP -- The LS LEN/2 bits of the result are available without any -- summation (they're provided by PRODU0 LS LEN/2 bits). -- The remaining LEN*3/2 bits of the results require the -- summation of up to 5 terms, which can be reduced to 4 by -- merging PRODU3 and PRODU0, which don't have overlapping -- bits. -- Summation of the 4 terms of above is performed using a -- 4:2 compressor array followed by a carry propagate adder. process(PRODU0_q,PRODU1_q,PRODU2_q,PRODU3_q,CF_q) variable RM : ROW_MTRX_T; constant ZERO : std_logic_vector(LEN/2-1 downto 0) := (others => '0'); variable TMP_R : ROW_T; variable TMP_C : COL_T; begin RM(0) := ZERO & to_std_logic_vector(PRODU1_q); RM(1) := ZERO & to_std_logic_vector(PRODU2_q); RM(2) := to_std_logic_vector(PRODU3_q) & to_std_logic_vector(PRODU0_q(LEN-1 downto LEN/2)); RM(3) := to_std_logic_vector(CF_q) & ZERO; -- transpose RM to generate CM for k in 0 to LEN32-1 loop TMP_R := RM(0); TMP_C(0) := TMP_R(k); TMP_R := RM(1); TMP_C(1) := TMP_R(k); TMP_R := RM(2); TMP_C(2) := TMP_R(k); TMP_R := RM(3); TMP_C(3) := TMP_R(k); CM(k) <= TMP_C; end loop; end process; -- tree-adder (composed of 4:2 compressors) S(LEN32) <= '0'; C(0) <= '0'; CY(0) <= '0'; G0: for k in 0 to LEN32-1 generate UCOMP: RV01_COMP42 port map( A_i => CM(k), CI_i => CY(k), S_o => S(k), C_o => C(k+1), CO_o => CY(k+1) ); end generate; --PRODU(LEN/2-1 downto 0) <= PRODU0(LEN/2-1 downto 0); -- carry-propagate adder --PRODU(LEN*2 downto LEN/2) <= to_unsigned(S) + to_unsigned(C); PROD(LEN/2-1 downto 0) <= to_signed(PRODU0_q(LEN/2-1 downto 0)); U_ADDF : RV01_ADDER_F generic map( LEN1 => LEN32/2, LEN2 => LEN32/2 ) port map( OPA_i => to_signed(S(LEN32-1 downto 0)), OPB_i => to_signed(C(LEN32-1 downto 0)), CI_i => ZERO, SUM_o => PROD(LEN*2-1 downto LEN/2) ); -- select correcting factor for signed and mixed-sign -- multiplication process(OP_i,MDU,MRU) variable SD,SR : std_logic; variable NMDU,NMRU : unsigned(LEN-1 downto 0); begin -- multiplicand sign SD := MDU(LEN-1); -- multiplier sign SR := MRU(LEN-1); -- get MDU 2's complement NMDU := not(MDU)+1; -- get MRU 2's complement NMRU := not(MRU)+1; if(OP_i = "00") then -- signed x signed if(SD = '1' and SR = '1') then CF <= NMDU + NMRU; elsif(SD = '1') then CF <= NMRU; elsif(SR = '1') then CF <= NMDU; else CF <= to_unsigned(0,LEN); end if; elsif(OP_i = "01") then -- signed x unsigned if(SD = '1') then CF <= NMRU; else CF <= to_unsigned(0,LEN); end if; else -- unsigned x unsigned CF <= to_unsigned(0,LEN); end if; end process; -- pipeline registers process(CLK_i) begin if(CLK_i = '1' and CLK_i'event) then CF_q <= CF; end if; end process; -- output --PROD_o <= to_signed(PRODU(LEN*2-1 downto 0)); PROD_o <= PROD; -- synthesis translate_off PROD_CHK <= --((X"0000" & PRODU0_q) + (X"00" & PRODU1_q & X"00")) + --((X"00" & PRODU2_q & X"00") + (PRODU3_q & X"0000")) + --(CF_q & X"0000"); (to_unsigned(0,LEN) & PRODU0_q) + (to_unsigned(0,LEN/2) & PRODU1_q & to_unsigned(0,LEN/2)) + (to_unsigned(0,LEN/2) & PRODU2_q & to_unsigned(0,LEN/2)) + (PRODU3_q & to_unsigned(0,LEN)) + (CF_q & to_unsigned(0,LEN)); -- synthesis translate_on end ARC; --------------------------------------------------------------- -- RV01 Two-cycle multiply unit --------------------------------------------------------------- library IEEE; use IEEE.std_logic_1164.all; use IEEE.numeric_std.all; library WORK; use WORK.RV01_CONSTS_PKG.all; use WORK.RV01_TYPES_PKG.all; use WORK.RV01_ARITH_PKG.all; use WORK.RV01_OP_PKG.all; entity RV01_MULU is port( CLK_i : in std_logic; CTRL_i : in MUL_CTRL; OPA_i : in SDWORD_T; OPB_i : in SDWORD_T; RES_o : out SDWORD_T ); end RV01_MULU; architecture ARC of RV01_MULU is constant ZERO32 : SDWORD_T := (others => '0'); component RV01_ADDER is generic( WIDTH : integer := 16 ); port( OPA_i : in signed(WIDTH-1 downto 0); OPB_i : in signed(WIDTH-1 downto 0); CI_i : in std_logic; SUM_o : out signed(WIDTH-1 downto 0) ); end component; component RV01_ADDER_F is generic( LEN1 : integer := 16; LEN2 : integer := 16 ); port( OPA_i : in signed(LEN1+LEN2-1 downto 0); OPB_i : in signed(LEN1+LEN2-1 downto 0); CI_i : in std_logic; SUM_o : out signed(LEN1+LEN2-1 downto 0) ); end component; component RV01_MULTIPLIER is generic( LEN : natural := 32 ); port( CLK_i : in std_logic; MD_i : in signed(LEN-1 downto 0); MR_i : in signed(LEN-1 downto 0); OP_i : in std_logic_vector(2-1 downto 0); PROD_o : out signed(LEN*2-1 downto 0) ); end component; signal CTRL_q : MUL_CTRL; signal PROD : signed(LDLEN-1 downto 0); signal OP : std_logic_vector(2-1 downto 0); signal RES : signed(SDLEN-1 downto 0); begin ------------------------------------ -- Notes ------------------------------------ -- This units performs three types of multiplication: -- 1) MUL/MULH signed(OPA_i) * signed(OPB_i) -- 2) MULHU unsigned(OPA_i) * unsigned(OPB_i) -- 3) MULHSU signed(OPA_i) * unsigned(OPB_i) -- -n is rapresented in 2's complement as 2^32-n -- So, -n*m => (2^32-n)*m = 2^32*m - n*m => CF = -2^32*m -- So, -n*-m => (2^32-n)*(2^32-n) = 2^64 - 2^32*(m+n) + n*m => CF = 2^32*(m+n) process(CTRL_i) begin case CTRL_i is when MC_MULH => OP <= "00"; -- "01"; when MC_MULHSU => OP <= "01"; --"10"; when others => OP <= "10"; --"00"; end case; end process; U_MUL : RV01_MULTIPLIER generic map( LEN => SDLEN ) port map( CLK_i => CLK_i, MD_i => OPA_i, MR_i => OPB_i, OP_i => OP, PROD_o => PROD ); -- pipeline registers process(CLK_i) begin if(CLK_i = '1' and CLK_i'event) then CTRL_q <= CTRL_i; end if; end process; RES <= PROD(SDLEN-1 downto 0) when (CTRL_q = MC_MUL) else PROD(LDLEN-1 downto SDLEN); RES_o <= RES; end ARC;