URL https://opencores.org/ocsvn/neorv32/neorv32/trunk

Subversion Repositories neorv32

[/] [neorv32/] [trunk/] [rtl/] [core/] [neorv32_cpu_cp_muldiv.vhd] - Blame information for rev 69

Go to most recent revision | Details | Compare with Previous | View Log


-- #################################################################################################
-- # << NEORV32 - CPU Co-Processor: Integer Multiplier/Divider Unit (RISC-V "M" Extension) >>      #
-- # ********************************************************************************************* #
-- # Multiplier and Divider unit. Implements the RISC-V M CPU extension.                           #
-- #                                                                                               #
-- # Multiplier core (signed/unsigned) uses classical serial algorithm. Unit latency: 31+3 cycles  #
-- # Divider core (unsigned) uses classical serial algorithm. Unit latency: 32+4 cycles            #
-- #                                                                                               #
-- # Multiplications can be mapped to DSP blocks (faster!) when FAST_MUL_EN = true.                #
-- # ********************************************************************************************* #
-- # BSD 3-Clause License                                                                          #
-- #                                                                                               #
-- # Copyright (c) 2021, Stephan Nolting. All rights reserved.                                     #
-- #                                                                                               #
-- # Redistribution and use in source and binary forms, with or without modification, are          #
-- # permitted provided that the following conditions are met:                                     #
-- #                                                                                               #
-- # 1. Redistributions of source code must retain the above copyright notice, this list of        #
-- #    conditions and the following disclaimer.                                                   #
-- #                                                                                               #
-- # 2. Redistributions in binary form must reproduce the above copyright notice, this list of     #
-- #    conditions and the following disclaimer in the documentation and/or other materials        #
-- #    provided with the distribution.                                                            #
-- #                                                                                               #
-- # 3. Neither the name of the copyright holder nor the names of its contributors may be used to  #
-- #    endorse or promote products derived from this software without specific prior written      #
-- #    permission.                                                                                #
-- #                                                                                               #
-- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS   #
-- # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF               #
-- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE    #
-- # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,     #
-- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE #
-- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED    #
-- # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     #
-- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED  #
-- # OF THE POSSIBILITY OF SUCH DAMAGE.                                                            #
-- # ********************************************************************************************* #
-- # The NEORV32 Processor - https://github.com/stnolting/neorv32              (c) Stephan Nolting #
-- #################################################################################################
 
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
 
library neorv32;
use neorv32.neorv32_package.all;
 
entity neorv32_cpu_cp_muldiv is
  generic (
    FAST_MUL_EN : boolean; -- use DSPs for faster multiplication
    DIVISION_EN : boolean  -- implement divider hardware
  );
  port (
    -- global control --
    clk_i   : in  std_ulogic; -- global clock, rising edge
    rstn_i  : in  std_ulogic; -- global reset, low-active, async
    ctrl_i  : in  std_ulogic_vector(ctrl_width_c-1 downto 0); -- main control bus
    start_i : in  std_ulogic; -- trigger operation
    -- data input --
    rs1_i   : in  std_ulogic_vector(data_width_c-1 downto 0); -- rf source 1
    rs2_i   : in  std_ulogic_vector(data_width_c-1 downto 0); -- rf source 2
    -- result and status --
    res_o   : out std_ulogic_vector(data_width_c-1 downto 0); -- operation result
    valid_o : out std_ulogic -- data output valid
  );
end neorv32_cpu_cp_muldiv;
 
architecture neorv32_cpu_cp_muldiv_rtl of neorv32_cpu_cp_muldiv is
 
  -- operations --
  constant cp_op_mul_c    : std_ulogic_vector(2 downto 0) := "000"; -- mul
  constant cp_op_mulh_c   : std_ulogic_vector(2 downto 0) := "001"; -- mulh
  constant cp_op_mulhsu_c : std_ulogic_vector(2 downto 0) := "010"; -- mulhsu
  constant cp_op_mulhu_c  : std_ulogic_vector(2 downto 0) := "011"; -- mulhu
  constant cp_op_div_c    : std_ulogic_vector(2 downto 0) := "100"; -- div
  constant cp_op_divu_c   : std_ulogic_vector(2 downto 0) := "101"; -- divu
  constant cp_op_rem_c    : std_ulogic_vector(2 downto 0) := "110"; -- rem
  constant cp_op_remu_c   : std_ulogic_vector(2 downto 0) := "111"; -- remu
 
  -- controller --
  type state_t is (IDLE, DIV_PREPROCESS, PROCESSING, FINALIZE);
  signal state         : state_t;
  signal cnt           : std_ulogic_vector(4 downto 0);
  signal cp_op         : std_ulogic_vector(2 downto 0); -- operation to execute
  signal cp_op_ff      : std_ulogic_vector(2 downto 0); -- operation that was executed
  signal start_div     : std_ulogic;
  signal start_mul     : std_ulogic;
  signal operation     : std_ulogic;
  signal div_opy       : std_ulogic_vector(data_width_c-1 downto 0);
  signal rs1_is_signed : std_ulogic;
  signal rs2_is_signed : std_ulogic;
  signal opy_is_zero   : std_ulogic;
  signal div_res_corr  : std_ulogic;
  signal out_en        : std_ulogic;
 
  -- divider core --
  signal remainder        : std_ulogic_vector(data_width_c-1 downto 0);
  signal quotient         : std_ulogic_vector(data_width_c-1 downto 0);
  signal div_sub          : std_ulogic_vector(data_width_c   downto 0);
  signal div_sign_comp_in : std_ulogic_vector(data_width_c-1 downto 0);
  signal div_sign_comp    : std_ulogic_vector(data_width_c-1 downto 0);
  signal div_res          : std_ulogic_vector(data_width_c-1 downto 0);
 
  -- multiplier core --
  signal mul_product    : std_ulogic_vector(63 downto 0);
  signal mul_do_add     : std_ulogic_vector(data_width_c downto 0);
  signal mul_sign_cycle : std_ulogic;
  signal mul_p_sext     : std_ulogic;
  signal mul_op_x       : signed(32 downto 0); -- for using DSPs
  signal mul_op_y       : signed(32 downto 0); -- for using DSPs
 
begin
 
  -- Co-Processor Controller ----------------------------------------------------------------
  -- -------------------------------------------------------------------------------------------
  coprocessor_ctrl: process(rstn_i, clk_i)
  begin
    if (rstn_i = '0') then
      state        <= IDLE;
      div_opy      <= (others => def_rst_val_c);
      cnt          <= (others => def_rst_val_c);
      cp_op_ff     <= (others => def_rst_val_c);
      start_div    <= '0';
      out_en       <= '0';
      valid_o      <= '0';
      div_res_corr <= def_rst_val_c;
      opy_is_zero  <= def_rst_val_c;
    elsif rising_edge(clk_i) then
      -- defaults --
      start_div <= '0';
      out_en    <= '0';
      valid_o   <= '0';
 
      -- FSM --
      case state is
 
        when IDLE =>
          cp_op_ff <= cp_op;
          cnt      <= "11110";
          if (start_i = '1') then
            if (operation = '1') and (DIVISION_EN = true) then -- division
              start_div <= '1';
              state     <= DIV_PREPROCESS;
            else -- multiplication
              if (FAST_MUL_EN = true) then
                valid_o <= '1';
                state   <= FINALIZE;
              else
                state <= PROCESSING;
              end if;
            end if;
          end if;
 
        when DIV_PREPROCESS =>
          -- check relevant input signs --
          if (cp_op = cp_op_div_c) then -- result sign compensation for div?
            div_res_corr <= rs1_i(rs1_i'left) xor rs2_i(rs2_i'left);
          elsif (cp_op = cp_op_rem_c) then -- result sign compensation for rem?
            div_res_corr <= rs1_i(rs1_i'left);
          else
            div_res_corr <= '0';
          end if;
          -- divide by zero? --
          opy_is_zero <= not or_reduce_f(rs2_i); -- set if rs2 = 0
          -- abs(rs2) --
          if ((rs2_i(rs2_i'left) and rs2_is_signed) = '1') then -- signed division?
            div_opy <= std_ulogic_vector(0 - unsigned(rs2_i)); -- make positive
          else
            div_opy <= rs2_i;
          end if;
          --
          state <= PROCESSING;
 
        when PROCESSING =>
          cnt <= std_ulogic_vector(unsigned(cnt) - 1);
          if (cnt = "00000") then
            valid_o <= '1';
            state   <= FINALIZE;
          end if;
 
        when FINALIZE =>
          out_en <= '1';
          state  <= IDLE;
 
        when others =>
          state <= IDLE;
      end case;
    end if;
  end process coprocessor_ctrl;
 
  -- co-processor command --
  cp_op <= ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c);
 
  -- operation: 0=mul, 1=div --
  operation <= '1' when (cp_op(2) = '1') else '0';
 
  -- opx (rs1) signed? --
  rs1_is_signed <= '1' when (cp_op = cp_op_mulh_c) or (cp_op = cp_op_mulhsu_c) or (cp_op = cp_op_div_c) or (cp_op = cp_op_rem_c) else '0';
 
  -- opy (rs2) signed? --
  rs2_is_signed <= '1' when (cp_op = cp_op_mulh_c) or (cp_op = cp_op_div_c) or (cp_op = cp_op_rem_c) else '0';
 
  -- start MUL operation (do it fast!) --
  start_mul <= '1' when (state = IDLE) and (start_i = '1') and (operation = '0') else '0';
 
 
  -- Multiplier Core (signed/unsigned) ------------------------------------------------------
  -- -------------------------------------------------------------------------------------------
  -- iterative multiplication (bit-serial) --
  multiplier_core_serial:
  if (FAST_MUL_EN = false) generate
    multiplier_core: process(rstn_i, clk_i)
    begin
      if (rstn_i = '0') then
        mul_product <= (others => def_rst_val_c);
      elsif rising_edge(clk_i) then
        if (start_mul = '1') then -- start new multiplication
          mul_product(63 downto 32) <= (others => '0');
          mul_product(31 downto 00) <= rs2_i;
        elsif (state = PROCESSING) or (state = FINALIZE) then -- processing step or sign-finalization step
          mul_product(63 downto 31) <= mul_do_add(32 downto 0);
          mul_product(30 downto 00) <= mul_product(31 downto 1);
        end if;
      end if;
    end process multiplier_core;
  end generate;
 
  -- parallel multiplication (using DSP blocks) --
  multiplier_core_dsp:
  if (FAST_MUL_EN = true) generate
    multiplier_core: process(clk_i)
      variable tmp_v : signed(65 downto 0);
    begin
      if rising_edge(clk_i) then
        if (start_mul = '1') then
          mul_op_x <= signed((rs1_i(rs1_i'left) and rs1_is_signed) & rs1_i);
          mul_op_y <= signed((rs2_i(rs2_i'left) and rs2_is_signed) & rs2_i);
        end if;
        tmp_v := mul_op_x * mul_op_y;
        mul_product <= std_ulogic_vector(tmp_v(63 downto 0));
        --mul_buf_ff  <= mul_op_x * mul_op_y;
        --mul_product <= std_ulogic_vector(mul_buf_ff(63 downto 0)); -- let the register balancing do the magic here
      end if;
    end process multiplier_core;
  end generate;
 
  -- do another addition (bit-serial) --
  mul_update: process(mul_product, mul_sign_cycle, mul_p_sext, rs1_is_signed, rs1_i)
  begin
    -- current bit of rs2_i to take care of --
    if (mul_product(0) = '1') then -- multiply with 1
      if (mul_sign_cycle = '1') then -- for signed operations only: take care of negative weighted MSB -> multiply with -1
        mul_do_add <= std_ulogic_vector(unsigned(mul_p_sext & mul_product(63 downto 32)) - unsigned((rs1_i(rs1_i'left) and rs1_is_signed) & rs1_i));
      else -- multiply with +1
        mul_do_add <= std_ulogic_vector(unsigned(mul_p_sext & mul_product(63 downto 32)) + unsigned((rs1_i(rs1_i'left) and rs1_is_signed) & rs1_i));
      end if;
    else -- multiply with 0
      mul_do_add <= mul_p_sext & mul_product(63 downto 32);
    end if;
  end process mul_update;
 
  -- sign control --
  mul_sign_cycle <= rs2_is_signed when (state = FINALIZE) else '0';
  mul_p_sext     <= mul_product(mul_product'left) and rs1_is_signed;
 
 
  -- Divider Core (unsigned) ----------------------------------------------------------------
  -- -------------------------------------------------------------------------------------------
  divider_core_serial:
  if (DIVISION_EN = true) generate
    divider_core: process(rstn_i, clk_i)
    begin
      if (rstn_i = '0') then
        quotient  <= (others => def_rst_val_c);
        remainder <= (others => def_rst_val_c);
      elsif rising_edge(clk_i) then
        if (start_div = '1') then -- start new division
          if ((rs1_i(rs1_i'left) and rs1_is_signed) = '1') then -- signed division?
            quotient <= std_ulogic_vector(0 - unsigned(rs1_i)); -- make positive
          else
            quotient <= rs1_i;
          end if;
          remainder <= (others => '0');
        elsif (state = PROCESSING) or (state = FINALIZE) then -- running?
          quotient <= quotient(30 downto 0) & (not div_sub(32));
          if (div_sub(32) = '0') then -- still overflowing
            remainder <= div_sub(31 downto 0);
          else -- underflow
            remainder <= remainder(30 downto 0) & quotient(31);
          end if;
        end if;
      end if;
    end process divider_core;
 
    -- try another subtraction --
    div_sub <= std_ulogic_vector(unsigned('0' & remainder(30 downto 0) & quotient(31)) - unsigned('0' & div_opy));
 
    -- result sign compensation --
    div_sign_comp_in <= quotient when (cp_op = cp_op_div_c) else remainder;
    div_sign_comp    <= std_ulogic_vector(0 - unsigned(div_sign_comp_in));
    div_res          <= div_sign_comp when (div_res_corr = '1') and (opy_is_zero = '0') else div_sign_comp_in;
  end generate;
 
  -- no divider --
  divider_core_serial_none:
  if (DIVISION_EN = false) generate
    remainder <= (others => '0');
    quotient  <= (others => '0');
    div_res   <= (others => '0');
  end generate;
 
 
  -- Data Output ----------------------------------------------------------------------------
  -- -------------------------------------------------------------------------------------------
  operation_result: process(out_en, cp_op_ff, mul_product, div_res, quotient, opy_is_zero, rs1_i, remainder)
  begin
    if (out_en = '1') then
      case cp_op_ff is
        when cp_op_mul_c =>
          res_o <= mul_product(31 downto 00);
        when cp_op_mulh_c | cp_op_mulhsu_c | cp_op_mulhu_c =>
          res_o <= mul_product(63 downto 32);
        when cp_op_div_c =>
          res_o <= div_res;
        when cp_op_divu_c =>
          res_o <= quotient;
        when cp_op_rem_c =>
          if (opy_is_zero = '0') then
            res_o <= div_res;
          else
            res_o <= rs1_i;
          end if;
        when others => -- cp_op_remu_c
          res_o <= remainder;
      end case;
    else
      res_o <= (others => '0');
    end if;
  end process operation_result;
 
 
end neorv32_cpu_cp_muldiv_rtl;

Line No.	Rev	Author	Line
1	2	zero_gravi	`-- #################################################################################################`
2	61	zero_gravi	`-- # << NEORV32 - CPU Co-Processor: Integer Multiplier/Divider Unit (RISC-V "M" Extension) >> #`
3	2	zero_gravi	`-- # ********************************************************************************************* #`
4	56	zero_gravi	`-- # Multiplier and Divider unit. Implements the RISC-V M CPU extension. #`
5			`-- # #`
6	61	zero_gravi	`-- # Multiplier core (signed/unsigned) uses classical serial algorithm. Unit latency: 31+3 cycles #`
7	56	zero_gravi	`-- # Divider core (unsigned) uses classical serial algorithm. Unit latency: 32+4 cycles #`
8			`-- # #`
9			`-- # Multiplications can be mapped to DSP blocks (faster!) when FAST_MUL_EN = true. #`
10	2	zero_gravi	`-- # ********************************************************************************************* #`
11			`-- # BSD 3-Clause License #`
12			`-- # #`
13	44	zero_gravi	`-- # Copyright (c) 2021, Stephan Nolting. All rights reserved. #`
14	2	zero_gravi	`-- # #`
15			`-- # Redistribution and use in source and binary forms, with or without modification, are #`
16			`-- # permitted provided that the following conditions are met: #`
17			`-- # #`
18			`-- # 1. Redistributions of source code must retain the above copyright notice, this list of #`
19			`-- # conditions and the following disclaimer. #`
20			`-- # #`
21			`-- # 2. Redistributions in binary form must reproduce the above copyright notice, this list of #`
22			`-- # conditions and the following disclaimer in the documentation and/or other materials #`
23			`-- # provided with the distribution. #`
24			`-- # #`
25			`-- # 3. Neither the name of the copyright holder nor the names of its contributors may be used to #`
26			`-- # endorse or promote products derived from this software without specific prior written #`
27			`-- # permission. #`
28			`-- # #`
29			`-- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS #`
30			`-- # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF #`
31			`-- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE #`
32			`-- # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, #`
33			`-- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE #`
34			`-- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED #`
35			`-- # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING #`
36			`-- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED #`
37			`-- # OF THE POSSIBILITY OF SUCH DAMAGE. #`
38			`-- # ********************************************************************************************* #`
39			`-- # The NEORV32 Processor - https://github.com/stnolting/neorv32 (c) Stephan Nolting #`
40			`-- #################################################################################################`
41
42			`library ieee;`
43			`use ieee.std_logic_1164.all;`
44			`use ieee.numeric_std.all;`
45
46			`library neorv32;`
47			`use neorv32.neorv32_package.all;`
48
49			`entity neorv32_cpu_cp_muldiv is`
50	19	zero_gravi	`generic (`
51	62	zero_gravi	`FAST_MUL_EN : boolean; -- use DSPs for faster multiplication`
52			`DIVISION_EN : boolean -- implement divider hardware`
53	19	zero_gravi	`);`
54	2	zero_gravi	`port (`
55			`-- global control --`
56			`clk_i : in std_ulogic; -- global clock, rising edge`
57			`rstn_i : in std_ulogic; -- global reset, low-active, async`
58			`ctrl_i : in std_ulogic_vector(ctrl_width_c-1 downto 0); -- main control bus`
59	36	zero_gravi	`start_i : in std_ulogic; -- trigger operation`
60	2	zero_gravi	`-- data input --`
61			`rs1_i : in std_ulogic_vector(data_width_c-1 downto 0); -- rf source 1`
62			`rs2_i : in std_ulogic_vector(data_width_c-1 downto 0); -- rf source 2`
63			`-- result and status --`
64			`res_o : out std_ulogic_vector(data_width_c-1 downto 0); -- operation result`
65			`valid_o : out std_ulogic -- data output valid`
66			`);`
67			`end neorv32_cpu_cp_muldiv;`
68
69			`architecture neorv32_cpu_cp_muldiv_rtl of neorv32_cpu_cp_muldiv is`
70
71	44	zero_gravi	`-- operations --`
72			`constant cp_op_mul_c : std_ulogic_vector(2 downto 0) := "000"; -- mul`
73			`constant cp_op_mulh_c : std_ulogic_vector(2 downto 0) := "001"; -- mulh`
74			`constant cp_op_mulhsu_c : std_ulogic_vector(2 downto 0) := "010"; -- mulhsu`
75			`constant cp_op_mulhu_c : std_ulogic_vector(2 downto 0) := "011"; -- mulhu`
76			`constant cp_op_div_c : std_ulogic_vector(2 downto 0) := "100"; -- div`
77			`constant cp_op_divu_c : std_ulogic_vector(2 downto 0) := "101"; -- divu`
78			`constant cp_op_rem_c : std_ulogic_vector(2 downto 0) := "110"; -- rem`
79			`constant cp_op_remu_c : std_ulogic_vector(2 downto 0) := "111"; -- remu`
80
81	2	zero_gravi	`-- controller --`
82	69	zero_gravi	`type state_t is (IDLE, DIV_PREPROCESS, PROCESSING, FINALIZE);`
83	2	zero_gravi	`signal state : state_t;`
84			`signal cnt : std_ulogic_vector(4 downto 0);`
85			`signal cp_op : std_ulogic_vector(2 downto 0); -- operation to execute`
86	39	zero_gravi	`signal cp_op_ff : std_ulogic_vector(2 downto 0); -- operation that was executed`
87	56	zero_gravi	`signal start_div : std_ulogic;`
88			`signal start_mul : std_ulogic;`
89	2	zero_gravi	`signal operation : std_ulogic;`
90	56	zero_gravi	`signal div_opy : std_ulogic_vector(data_width_c-1 downto 0);`
91			`signal rs1_is_signed : std_ulogic;`
92			`signal rs2_is_signed : std_ulogic;`
93	6	zero_gravi	`signal opy_is_zero : std_ulogic;`
94	2	zero_gravi	`signal div_res_corr : std_ulogic;`
95	69	zero_gravi	`signal out_en : std_ulogic;`
96	2	zero_gravi
97			`-- divider core --`
98			`signal remainder : std_ulogic_vector(data_width_c-1 downto 0);`
99			`signal quotient : std_ulogic_vector(data_width_c-1 downto 0);`
100			`signal div_sub : std_ulogic_vector(data_width_c downto 0);`
101			`signal div_sign_comp_in : std_ulogic_vector(data_width_c-1 downto 0);`
102			`signal div_sign_comp : std_ulogic_vector(data_width_c-1 downto 0);`
103			`signal div_res : std_ulogic_vector(data_width_c-1 downto 0);`
104
105			`-- multiplier core --`
106			`signal mul_product : std_ulogic_vector(63 downto 0);`
107	12	zero_gravi	`signal mul_do_add : std_ulogic_vector(data_width_c downto 0);`
108	2	zero_gravi	`signal mul_sign_cycle : std_ulogic;`
109			`signal mul_p_sext : std_ulogic;`
110	19	zero_gravi	`signal mul_op_x : signed(32 downto 0); -- for using DSPs`
111			`signal mul_op_y : signed(32 downto 0); -- for using DSPs`
112	2	zero_gravi
113			`begin`
114
115			`-- Co-Processor Controller ----------------------------------------------------------------`
116			`-- -------------------------------------------------------------------------------------------`
117			`coprocessor_ctrl: process(rstn_i, clk_i)`
118			`begin`
119			`if (rstn_i = '0') then`
120			`state <= IDLE;`
121	56	zero_gravi	`div_opy <= (others => def_rst_val_c);`
122			`cnt <= (others => def_rst_val_c);`
123			`cp_op_ff <= (others => def_rst_val_c);`
124			`start_div <= '0';`
125	69	zero_gravi	`out_en <= '0';`
126			`valid_o <= '0';`
127	56	zero_gravi	`div_res_corr <= def_rst_val_c;`
128			`opy_is_zero <= def_rst_val_c;`
129	2	zero_gravi	`elsif rising_edge(clk_i) then`
130			`-- defaults --`
131	56	zero_gravi	`start_div <= '0';`
132	69	zero_gravi	`out_en <= '0';`
133			`valid_o <= '0';`
134	2	zero_gravi
135			`-- FSM --`
136			`case state is`
137	69	zero_gravi
138	2	zero_gravi	`when IDLE =>`
139	56	zero_gravi	`cp_op_ff <= cp_op;`
140	69	zero_gravi	`cnt <= "11110";`
141	19	zero_gravi	`if (start_i = '1') then`
142	61	zero_gravi	`if (operation = '1') and (DIVISION_EN = true) then -- division`
143	69	zero_gravi	`start_div <= '1';`
144			`state <= DIV_PREPROCESS;`
145			`else -- multiplication`
146	56	zero_gravi	`if (FAST_MUL_EN = true) then`
147	69	zero_gravi	`valid_o <= '1';`
148			`state <= FINALIZE;`
149	56	zero_gravi	`else`
150			`state <= PROCESSING;`
151			`end if;`
152			`end if;`
153	2	zero_gravi	`end if;`
154
155	56	zero_gravi	`when DIV_PREPROCESS =>`
156	69	zero_gravi	`-- check relevant input signs --`
157			`if (cp_op = cp_op_div_c) then -- result sign compensation for div?`
158			`div_res_corr <= rs1_i(rs1_i'left) xor rs2_i(rs2_i'left);`
159			`elsif (cp_op = cp_op_rem_c) then -- result sign compensation for rem?`
160			`div_res_corr <= rs1_i(rs1_i'left);`
161	2	zero_gravi	`else`
162	69	zero_gravi	`div_res_corr <= '0';`
163	2	zero_gravi	`end if;`
164	69	zero_gravi	`-- divide by zero? --`
165			`opy_is_zero <= not or_reduce_f(rs2_i); -- set if rs2 = 0`
166			`-- abs(rs2) --`
167			`if ((rs2_i(rs2_i'left) and rs2_is_signed) = '1') then -- signed division?`
168			`div_opy <= std_ulogic_vector(0 - unsigned(rs2_i)); -- make positive`
169			`else`
170			`div_opy <= rs2_i;`
171			`end if;`
172			`--`
173			`state <= PROCESSING;`
174	2	zero_gravi
175			`when PROCESSING =>`
176			`cnt <= std_ulogic_vector(unsigned(cnt) - 1);`
177			`if (cnt = "00000") then`
178	69	zero_gravi	`valid_o <= '1';`
179			`state <= FINALIZE;`
180	2	zero_gravi	`end if;`
181
182			`when FINALIZE =>`
183	69	zero_gravi	`out_en <= '1';`
184			`state <= IDLE;`
185	2	zero_gravi
186	69	zero_gravi	`when others =>`
187	39	zero_gravi	`state <= IDLE;`
188	2	zero_gravi	`end case;`
189			`end if;`
190			`end process coprocessor_ctrl;`
191
192	36	zero_gravi	`-- co-processor command --`
193			`cp_op <= ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c);`
194
195	56	zero_gravi	`-- operation: 0=mul, 1=div --`
196			`operation <= '1' when (cp_op(2) = '1') else '0';`
197	2	zero_gravi
198			`-- opx (rs1) signed? --`
199	56	zero_gravi	`rs1_is_signed <= '1' when (cp_op = cp_op_mulh_c) or (cp_op = cp_op_mulhsu_c) or (cp_op = cp_op_div_c) or (cp_op = cp_op_rem_c) else '0';`
200	2	zero_gravi
201			`-- opy (rs2) signed? --`
202	56	zero_gravi	`rs2_is_signed <= '1' when (cp_op = cp_op_mulh_c) or (cp_op = cp_op_div_c) or (cp_op = cp_op_rem_c) else '0';`
203	2	zero_gravi
204	56	zero_gravi	`-- start MUL operation (do it fast!) --`
205			`start_mul <= '1' when (state = IDLE) and (start_i = '1') and (operation = '0') else '0';`
206	2	zero_gravi
207	56	zero_gravi
208	36	zero_gravi	`-- Multiplier Core (signed/unsigned) ------------------------------------------------------`
209	2	zero_gravi	`-- -------------------------------------------------------------------------------------------`
210	61	zero_gravi	`-- iterative multiplication (bit-serial) --`
211			`multiplier_core_serial:`
212			`if (FAST_MUL_EN = false) generate`
213			`multiplier_core: process(rstn_i, clk_i)`
214			`begin`
215			`if (rstn_i = '0') then`
216			`mul_product <= (others => def_rst_val_c);`
217			`elsif rising_edge(clk_i) then`
218	56	zero_gravi	`if (start_mul = '1') then -- start new multiplication`
219	61	zero_gravi	`mul_product(63 downto 32) <= (others => '0');`
220			`mul_product(31 downto 00) <= rs2_i;`
221	56	zero_gravi	`elsif (state = PROCESSING) or (state = FINALIZE) then -- processing step or sign-finalization step`
222	61	zero_gravi	`mul_product(63 downto 31) <= mul_do_add(32 downto 0);`
223			`mul_product(30 downto 00) <= mul_product(31 downto 1);`
224	12	zero_gravi	`end if;`
225	56	zero_gravi	`end if;`
226	61	zero_gravi	`end process multiplier_core;`
227			`end generate;`
228	56	zero_gravi
229	69	zero_gravi	`-- parallel multiplication (using DSP blocks) --`
230	61	zero_gravi	`multiplier_core_dsp:`
231			`if (FAST_MUL_EN = true) generate`
232			`multiplier_core: process(clk_i)`
233	69	zero_gravi	`variable tmp_v : signed(65 downto 0);`
234	61	zero_gravi	`begin`
235			`if rising_edge(clk_i) then`
236	56	zero_gravi	`if (start_mul = '1') then`
237			`mul_op_x <= signed((rs1_i(rs1_i'left) and rs1_is_signed) & rs1_i);`
238			`mul_op_y <= signed((rs2_i(rs2_i'left) and rs2_is_signed) & rs2_i);`
239	12	zero_gravi	`end if;`
240	69	zero_gravi	`tmp_v := mul_op_x * mul_op_y;`
241			`mul_product <= std_ulogic_vector(tmp_v(63 downto 0));`
242			`--mul_buf_ff <= mul_op_x * mul_op_y;`
243			`--mul_product <= std_ulogic_vector(mul_buf_ff(63 downto 0)); -- let the register balancing do the magic here`
244	2	zero_gravi	`end if;`
245	61	zero_gravi	`end process multiplier_core;`
246			`end generate;`
247	2	zero_gravi
248	61	zero_gravi	`-- do another addition (bit-serial) --`
249	56	zero_gravi	`mul_update: process(mul_product, mul_sign_cycle, mul_p_sext, rs1_is_signed, rs1_i)`
250	2	zero_gravi	`begin`
251	56	zero_gravi	`-- current bit of rs2_i to take care of --`
252	12	zero_gravi	`if (mul_product(0) = '1') then -- multiply with 1`
253			`if (mul_sign_cycle = '1') then -- for signed operations only: take care of negative weighted MSB -> multiply with -1`
254	56	zero_gravi	`mul_do_add <= std_ulogic_vector(unsigned(mul_p_sext & mul_product(63 downto 32)) - unsigned((rs1_i(rs1_i'left) and rs1_is_signed) & rs1_i));`
255	12	zero_gravi	`else -- multiply with +1`
256	56	zero_gravi	`mul_do_add <= std_ulogic_vector(unsigned(mul_p_sext & mul_product(63 downto 32)) + unsigned((rs1_i(rs1_i'left) and rs1_is_signed) & rs1_i));`
257	2	zero_gravi	`end if;`
258	12	zero_gravi	`else -- multiply with 0`
259	2	zero_gravi	`mul_do_add <= mul_p_sext & mul_product(63 downto 32);`
260			`end if;`
261			`end process mul_update;`
262
263			`-- sign control --`
264	56	zero_gravi	`mul_sign_cycle <= rs2_is_signed when (state = FINALIZE) else '0';`
265			`mul_p_sext <= mul_product(mul_product'left) and rs1_is_signed;`
266	2	zero_gravi
267
268	12	zero_gravi	`-- Divider Core (unsigned) ----------------------------------------------------------------`
269	2	zero_gravi	`-- -------------------------------------------------------------------------------------------`
270	61	zero_gravi	`divider_core_serial:`
271			`if (DIVISION_EN = true) generate`
272			`divider_core: process(rstn_i, clk_i)`
273			`begin`
274			`if (rstn_i = '0') then`
275			`quotient <= (others => def_rst_val_c);`
276			`remainder <= (others => def_rst_val_c);`
277			`elsif rising_edge(clk_i) then`
278			`if (start_div = '1') then -- start new division`
279	69	zero_gravi	`if ((rs1_i(rs1_i'left) and rs1_is_signed) = '1') then -- signed division?`
280			`quotient <= std_ulogic_vector(0 - unsigned(rs1_i)); -- make positive`
281			`else`
282			`quotient <= rs1_i;`
283			`end if;`
284	61	zero_gravi	`remainder <= (others => '0');`
285			`elsif (state = PROCESSING) or (state = FINALIZE) then -- running?`
286			`quotient <= quotient(30 downto 0) & (not div_sub(32));`
287			`if (div_sub(32) = '0') then -- still overflowing`
288			`remainder <= div_sub(31 downto 0);`
289			`else -- underflow`
290			`remainder <= remainder(30 downto 0) & quotient(31);`
291			`end if;`
292	2	zero_gravi	`end if;`
293			`end if;`
294	61	zero_gravi	`end process divider_core;`
295	2	zero_gravi
296	61	zero_gravi	`-- try another subtraction --`
297			`div_sub <= std_ulogic_vector(unsigned('0' & remainder(30 downto 0) & quotient(31)) - unsigned('0' & div_opy));`
298	2	zero_gravi
299	61	zero_gravi	`-- result sign compensation --`
300			`div_sign_comp_in <= quotient when (cp_op = cp_op_div_c) else remainder;`
301			`div_sign_comp <= std_ulogic_vector(0 - unsigned(div_sign_comp_in));`
302			`div_res <= div_sign_comp when (div_res_corr = '1') and (opy_is_zero = '0') else div_sign_comp_in;`
303			`end generate;`
304	2	zero_gravi
305	61	zero_gravi	`-- no divider --`
306			`divider_core_serial_none:`
307			`if (DIVISION_EN = false) generate`
308	69	zero_gravi	`remainder <= (others => '0');`
309			`quotient <= (others => '0');`
310			`div_res <= (others => '0');`
311	61	zero_gravi	`end generate;`
312	2	zero_gravi
313	61	zero_gravi
314	2	zero_gravi	`-- Data Output ----------------------------------------------------------------------------`
315			`-- -------------------------------------------------------------------------------------------`
316	69	zero_gravi	`operation_result: process(out_en, cp_op_ff, mul_product, div_res, quotient, opy_is_zero, rs1_i, remainder)`
317	2	zero_gravi	`begin`
318	69	zero_gravi	`if (out_en = '1') then`
319			`case cp_op_ff is`
320			`when cp_op_mul_c =>`
321			`res_o <= mul_product(31 downto 00);`
322			`when cp_op_mulh_c \| cp_op_mulhsu_c \| cp_op_mulhu_c =>`
323			`res_o <= mul_product(63 downto 32);`
324			`when cp_op_div_c =>`
325			`res_o <= div_res;`
326			`when cp_op_divu_c =>`
327			`res_o <= quotient;`
328			`when cp_op_rem_c =>`
329			`if (opy_is_zero = '0') then`
330			`res_o <= div_res;`
331			`else`
332			`res_o <= rs1_i;`
333			`end if;`
334			`when others => -- cp_op_remu_c`
335			`res_o <= remainder;`
336			`end case;`
337			`else`
338	47	zero_gravi	`res_o <= (others => '0');`
339	2	zero_gravi	`end if;`
340			`end process operation_result;`
341
342
343			`end neorv32_cpu_cp_muldiv_rtl;`

Browse

Tools

Subversion Repositories neorv32

[/] [neorv32/] [trunk/] [rtl/] [core/] [neorv32_cpu_cp_muldiv.vhd] - Blame information for rev 69