Line 42... |
Line 42... |
|
|
library neorv32;
|
library neorv32;
|
use neorv32.neorv32_package.all;
|
use neorv32.neorv32_package.all;
|
|
|
entity neorv32_cpu_cp_muldiv is
|
entity neorv32_cpu_cp_muldiv is
|
|
generic (
|
|
FAST_MUL_EN : boolean := false -- use DSPs for faster multiplication
|
|
);
|
port (
|
port (
|
-- global control --
|
-- global control --
|
clk_i : in std_ulogic; -- global clock, rising edge
|
clk_i : in std_ulogic; -- global clock, rising edge
|
rstn_i : in std_ulogic; -- global reset, low-active, async
|
rstn_i : in std_ulogic; -- global reset, low-active, async
|
ctrl_i : in std_ulogic_vector(ctrl_width_c-1 downto 0); -- main control bus
|
ctrl_i : in std_ulogic_vector(ctrl_width_c-1 downto 0); -- main control bus
|
-- data input --
|
-- data input --
|
|
start_i : in std_ulogic; -- trigger operation
|
rs1_i : in std_ulogic_vector(data_width_c-1 downto 0); -- rf source 1
|
rs1_i : in std_ulogic_vector(data_width_c-1 downto 0); -- rf source 1
|
rs2_i : in std_ulogic_vector(data_width_c-1 downto 0); -- rf source 2
|
rs2_i : in std_ulogic_vector(data_width_c-1 downto 0); -- rf source 2
|
-- result and status --
|
-- result and status --
|
res_o : out std_ulogic_vector(data_width_c-1 downto 0); -- operation result
|
res_o : out std_ulogic_vector(data_width_c-1 downto 0); -- operation result
|
valid_o : out std_ulogic -- data output valid
|
valid_o : out std_ulogic -- data output valid
|
);
|
);
|
end neorv32_cpu_cp_muldiv;
|
end neorv32_cpu_cp_muldiv;
|
|
|
architecture neorv32_cpu_cp_muldiv_rtl of neorv32_cpu_cp_muldiv is
|
architecture neorv32_cpu_cp_muldiv_rtl of neorv32_cpu_cp_muldiv is
|
|
|
-- configuration - still experimental --
|
-- advanced configuration --
|
constant FAST_MUL_EN : boolean := false; -- use DSPs for faster multiplication
|
constant dsp_add_reg_stage_c : boolean := false; -- add another register stage to DSP-based multiplication for timing-closure
|
|
|
-- controller --
|
-- controller --
|
type state_t is (IDLE, DECODE, INIT_OPX, INIT_OPY, PROCESSING, FINALIZE, COMPLETED);
|
type state_t is (IDLE, DECODE, INIT_OPX, INIT_OPY, PROCESSING, FINALIZE, COMPLETED);
|
signal state : state_t;
|
signal state : state_t;
|
signal cnt : std_ulogic_vector(4 downto 0);
|
signal cnt : std_ulogic_vector(4 downto 0);
|
Line 87... |
Line 91... |
-- multiplier core --
|
-- multiplier core --
|
signal mul_product : std_ulogic_vector(63 downto 0);
|
signal mul_product : std_ulogic_vector(63 downto 0);
|
signal mul_do_add : std_ulogic_vector(data_width_c downto 0);
|
signal mul_do_add : std_ulogic_vector(data_width_c downto 0);
|
signal mul_sign_cycle : std_ulogic;
|
signal mul_sign_cycle : std_ulogic;
|
signal mul_p_sext : std_ulogic;
|
signal mul_p_sext : std_ulogic;
|
signal mul_op_x : std_ulogic_vector(32 downto 0);
|
signal mul_op_x : signed(32 downto 0); -- for using DSPs
|
signal mul_op_y : std_ulogic_vector(32 downto 0);
|
signal mul_op_y : signed(32 downto 0); -- for using DSPs
|
signal mul_buf_ff0 : std_ulogic_vector(65 downto 0);
|
signal mul_buf_ff : signed(65 downto 0); -- for using DSPs
|
signal mul_buf_ff1 : std_ulogic_vector(65 downto 0);
|
signal mul_buf2_ff : signed(65 downto 0); -- for using DSPs
|
|
|
begin
|
begin
|
|
|
-- Co-Processor Controller ----------------------------------------------------------------
|
-- Co-Processor Controller ----------------------------------------------------------------
|
-- -------------------------------------------------------------------------------------------
|
-- -------------------------------------------------------------------------------------------
|
Line 118... |
Line 122... |
-- FSM --
|
-- FSM --
|
case state is
|
case state is
|
when IDLE =>
|
when IDLE =>
|
opx <= rs1_i;
|
opx <= rs1_i;
|
opy <= rs2_i;
|
opy <= rs2_i;
|
|
if (start_i = '1') then
|
cp_op <= ctrl_i(ctrl_cp_cmd2_c downto ctrl_cp_cmd0_c);
|
cp_op <= ctrl_i(ctrl_cp_cmd2_c downto ctrl_cp_cmd0_c);
|
if (ctrl_i(ctrl_cp_use_c) = '1') and (ctrl_i(ctrl_cp_id_msb_c downto ctrl_cp_id_lsb_c) = cp_sel_muldiv_c) then
|
|
state <= DECODE;
|
state <= DECODE;
|
end if;
|
end if;
|
|
|
when DECODE =>
|
when DECODE =>
|
--
|
--
|
Line 146... |
Line 150... |
state <= INIT_OPX;
|
state <= INIT_OPX;
|
else -- multiplication
|
else -- multiplication
|
if (FAST_MUL_EN = false) then
|
if (FAST_MUL_EN = false) then
|
cnt <= "11111";
|
cnt <= "11111";
|
else
|
else
|
cnt <= "00101"; -- FIXME
|
cnt <= "00001";
|
end if;
|
end if;
|
start <= '1';
|
start <= '1';
|
state <= PROCESSING;
|
state <= PROCESSING;
|
end if;
|
end if;
|
|
|
Line 206... |
Line 210... |
mul_product(63 downto 31) <= mul_do_add(32 downto 0);
|
mul_product(63 downto 31) <= mul_do_add(32 downto 0);
|
mul_product(30 downto 00) <= mul_product(31 downto 1);
|
mul_product(30 downto 00) <= mul_product(31 downto 1);
|
end if;
|
end if;
|
else -- use direct approach using (several!) DSP blocks
|
else -- use direct approach using (several!) DSP blocks
|
if (start = '1') then
|
if (start = '1') then
|
mul_op_x <= (opx(opx'left) and opx_is_signed) & opx;
|
mul_op_x <= signed((opx(opx'left) and opx_is_signed) & opx);
|
mul_op_y <= (opy(opy'left) and opy_is_signed) & opy;
|
mul_op_y <= signed((opy(opy'left) and opy_is_signed) & opy);
|
|
end if;
|
|
mul_buf_ff <= mul_op_x * mul_op_y;
|
|
if (dsp_add_reg_stage_c = true) then -- add another reg stage?
|
|
mul_buf2_ff <= mul_buf_ff;
|
|
mul_product <= std_ulogic_vector(mul_buf2_ff(63 downto 0)); -- let the register balancing do the magic here
|
|
else
|
|
mul_product <= std_ulogic_vector(mul_buf_ff(63 downto 0)); -- let the register balancing do the magic here
|
end if;
|
end if;
|
mul_buf_ff0 <= std_ulogic_vector(signed(mul_op_x) * signed(mul_op_y));
|
|
mul_buf_ff1 <= mul_buf_ff0;
|
|
mul_product <= mul_buf_ff1(63 downto 0); -- let the register balancing do the magic here
|
|
end if;
|
end if;
|
end if;
|
end if;
|
end process multiplier_core;
|
end process multiplier_core;
|
|
|
-- MUL: do another addition --
|
-- MUL: do another addition --
|