Line 2... |
Line 2... |
-- # << NEORV32 - CPU Co-Processor: MULDIV unit >> #
|
-- # << NEORV32 - CPU Co-Processor: MULDIV unit >> #
|
-- # ********************************************************************************************* #
|
-- # ********************************************************************************************* #
|
-- # Multiplier and Divider unit. Implements the RISC-V RV32-M CPU extension. #
|
-- # Multiplier and Divider unit. Implements the RISC-V RV32-M CPU extension. #
|
-- # Multiplier core (signed/unsigned) uses serial algorithm. -> 32+4 cycles latency #
|
-- # Multiplier core (signed/unsigned) uses serial algorithm. -> 32+4 cycles latency #
|
-- # Divider core (unsigned) uses serial algorithm. -> 32+6 cycles latency #
|
-- # Divider core (unsigned) uses serial algorithm. -> 32+6 cycles latency #
|
|
-- # Multiplications can be mapped to DSP block when FAST_MUL_EN = true. #
|
-- # ********************************************************************************************* #
|
-- # ********************************************************************************************* #
|
-- # BSD 3-Clause License #
|
-- # BSD 3-Clause License #
|
-- # #
|
-- # #
|
-- # Copyright (c) 2020, Stephan Nolting. All rights reserved. #
|
-- # Copyright (c) 2020, Stephan Nolting. All rights reserved. #
|
-- # #
|
-- # #
|
Line 50... |
Line 51... |
port (
|
port (
|
-- global control --
|
-- global control --
|
clk_i : in std_ulogic; -- global clock, rising edge
|
clk_i : in std_ulogic; -- global clock, rising edge
|
rstn_i : in std_ulogic; -- global reset, low-active, async
|
rstn_i : in std_ulogic; -- global reset, low-active, async
|
ctrl_i : in std_ulogic_vector(ctrl_width_c-1 downto 0); -- main control bus
|
ctrl_i : in std_ulogic_vector(ctrl_width_c-1 downto 0); -- main control bus
|
-- data input --
|
|
start_i : in std_ulogic; -- trigger operation
|
start_i : in std_ulogic; -- trigger operation
|
|
-- data input --
|
rs1_i : in std_ulogic_vector(data_width_c-1 downto 0); -- rf source 1
|
rs1_i : in std_ulogic_vector(data_width_c-1 downto 0); -- rf source 1
|
rs2_i : in std_ulogic_vector(data_width_c-1 downto 0); -- rf source 2
|
rs2_i : in std_ulogic_vector(data_width_c-1 downto 0); -- rf source 2
|
-- result and status --
|
-- result and status --
|
res_o : out std_ulogic_vector(data_width_c-1 downto 0); -- operation result
|
res_o : out std_ulogic_vector(data_width_c-1 downto 0); -- operation result
|
valid_o : out std_ulogic -- data output valid
|
valid_o : out std_ulogic -- data output valid
|
Line 104... |
Line 105... |
-- -------------------------------------------------------------------------------------------
|
-- -------------------------------------------------------------------------------------------
|
coprocessor_ctrl: process(rstn_i, clk_i)
|
coprocessor_ctrl: process(rstn_i, clk_i)
|
begin
|
begin
|
if (rstn_i = '0') then
|
if (rstn_i = '0') then
|
state <= IDLE;
|
state <= IDLE;
|
cp_op <= (others => '0');
|
|
opx <= (others => '0');
|
opx <= (others => '0');
|
opy <= (others => '0');
|
opy <= (others => '0');
|
cnt <= (others => '0');
|
cnt <= (others => '0');
|
start <= '0';
|
start <= '0';
|
valid_o <= '0';
|
valid_o <= '0';
|
Line 123... |
Line 123... |
case state is
|
case state is
|
when IDLE =>
|
when IDLE =>
|
opx <= rs1_i;
|
opx <= rs1_i;
|
opy <= rs2_i;
|
opy <= rs2_i;
|
if (start_i = '1') then
|
if (start_i = '1') then
|
cp_op <= ctrl_i(ctrl_cp_cmd2_c downto ctrl_cp_cmd0_c);
|
|
state <= DECODE;
|
state <= DECODE;
|
end if;
|
end if;
|
|
|
when DECODE =>
|
when DECODE =>
|
--
|
--
|
Line 187... |
Line 186... |
state <= IDLE;
|
state <= IDLE;
|
end case;
|
end case;
|
end if;
|
end if;
|
end process coprocessor_ctrl;
|
end process coprocessor_ctrl;
|
|
|
|
-- co-processor command --
|
|
cp_op <= ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c);
|
|
|
-- operation --
|
-- operation --
|
operation <= '1' when (cp_op = cp_op_div_c) or (cp_op = cp_op_divu_c) or (cp_op = cp_op_rem_c) or (cp_op = cp_op_remu_c) else '0';
|
operation <= '1' when (cp_op = cp_op_div_c) or (cp_op = cp_op_divu_c) or (cp_op = cp_op_rem_c) or (cp_op = cp_op_remu_c) else '0';
|
|
|
-- opx (rs1) signed? --
|
-- opx (rs1) signed? --
|
opx_is_signed <= '1' when (cp_op = cp_op_mulh_c) or (cp_op = cp_op_mulhsu_c) or (cp_op = cp_op_div_c) or (cp_op = cp_op_rem_c) else '0';
|
opx_is_signed <= '1' when (cp_op = cp_op_mulh_c) or (cp_op = cp_op_mulhsu_c) or (cp_op = cp_op_div_c) or (cp_op = cp_op_rem_c) else '0';
|
|
|
-- opy (rs2) signed? --
|
-- opy (rs2) signed? --
|
opy_is_signed <= '1' when (cp_op = cp_op_mulh_c) or (cp_op = cp_op_div_c) or (cp_op = cp_op_rem_c) else '0';
|
opy_is_signed <= '1' when (cp_op = cp_op_mulh_c) or (cp_op = cp_op_div_c) or (cp_op = cp_op_rem_c) else '0';
|
|
|
|
|
-- Multiplier Core (signed) ---------------------------------------------------------------
|
-- Multiplier Core (signed/unsigned) ------------------------------------------------------
|
-- -------------------------------------------------------------------------------------------
|
-- -------------------------------------------------------------------------------------------
|
multiplier_core: process(clk_i)
|
multiplier_core: process(clk_i)
|
begin
|
begin
|
if rising_edge(clk_i) then
|
if rising_edge(clk_i) then
|
|
-- ---------------------------------------------------------
|
if (FAST_MUL_EN = false) then -- use small iterative computation
|
if (FAST_MUL_EN = false) then -- use small iterative computation
|
if (start = '1') then -- start new multiplication
|
if (start = '1') then -- start new multiplication
|
mul_product(63 downto 32) <= (others => '0');
|
mul_product(63 downto 32) <= (others => '0');
|
mul_product(31 downto 00) <= opy;
|
mul_product(31 downto 00) <= opy;
|
elsif ((state = PROCESSING) or (state = FINALIZE)) and (operation = '0') then
|
elsif ((state = PROCESSING) or (state = FINALIZE)) and (operation = '0') then
|
mul_product(63 downto 31) <= mul_do_add(32 downto 0);
|
mul_product(63 downto 31) <= mul_do_add(32 downto 0);
|
mul_product(30 downto 00) <= mul_product(31 downto 1);
|
mul_product(30 downto 00) <= mul_product(31 downto 1);
|
end if;
|
end if;
|
|
-- ---------------------------------------------------------
|
else -- use direct approach using (several!) DSP blocks
|
else -- use direct approach using (several!) DSP blocks
|
if (start = '1') then
|
if (start = '1') then
|
mul_op_x <= signed((opx(opx'left) and opx_is_signed) & opx);
|
mul_op_x <= signed((opx(opx'left) and opx_is_signed) & opx);
|
mul_op_y <= signed((opy(opy'left) and opy_is_signed) & opy);
|
mul_op_y <= signed((opy(opy'left) and opy_is_signed) & opy);
|
end if;
|
end if;
|