Line 5... |
Line 5... |
-- # cycle for logic/arithmetic operations and 3+shamt (=shift amount) cycles for shift(-related) #
|
-- # cycle for logic/arithmetic operations and 3+shamt (=shift amount) cycles for shift(-related) #
|
-- # operations. Use the FAST_SHIFT_EN option to reduce shift-related instruction's latency to a #
|
-- # operations. Use the FAST_SHIFT_EN option to reduce shift-related instruction's latency to a #
|
-- # fixed value of 3 cycles latency (using barrel shifters). #
|
-- # fixed value of 3 cycles latency (using barrel shifters). #
|
-- # #
|
-- # #
|
-- # Supported sub-extensions (Zb*): #
|
-- # Supported sub-extensions (Zb*): #
|
|
-- # - Zba: Address generation instructions #
|
-- # - Zbb: Basic bit-manipulation instructions #
|
-- # - Zbb: Basic bit-manipulation instructions #
|
-- # ********************************************************************************************* #
|
-- # ********************************************************************************************* #
|
-- # BSD 3-Clause License #
|
-- # BSD 3-Clause License #
|
-- # #
|
-- # #
|
-- # Copyright (c) 2021, Stephan Nolting. All rights reserved. #
|
-- # Copyright (c) 2021, Stephan Nolting. All rights reserved. #
|
Line 59... |
Line 60... |
start_i : in std_ulogic; -- trigger operation
|
start_i : in std_ulogic; -- trigger operation
|
-- data input --
|
-- data input --
|
cmp_i : in std_ulogic_vector(1 downto 0); -- comparator status
|
cmp_i : in std_ulogic_vector(1 downto 0); -- comparator status
|
rs1_i : in std_ulogic_vector(data_width_c-1 downto 0); -- rf source 1
|
rs1_i : in std_ulogic_vector(data_width_c-1 downto 0); -- rf source 1
|
rs2_i : in std_ulogic_vector(data_width_c-1 downto 0); -- rf source 2
|
rs2_i : in std_ulogic_vector(data_width_c-1 downto 0); -- rf source 2
|
|
shamt_i : in std_ulogic_vector(index_size_f(data_width_c)-1 downto 0); -- shift amount
|
-- result and status --
|
-- result and status --
|
res_o : out std_ulogic_vector(data_width_c-1 downto 0); -- operation result
|
res_o : out std_ulogic_vector(data_width_c-1 downto 0); -- operation result
|
valid_o : out std_ulogic -- data output valid
|
valid_o : out std_ulogic -- data output valid
|
);
|
);
|
end neorv32_cpu_cp_bitmanip;
|
end neorv32_cpu_cp_bitmanip;
|
|
|
architecture neorv32_cpu_cp_bitmanip_rtl of neorv32_cpu_cp_bitmanip is
|
architecture neorv32_cpu_cp_bitmanip_rtl of neorv32_cpu_cp_bitmanip is
|
|
|
-- commands: logic with negate --
|
-- Sub-extension configuration --
|
|
constant zbb_en_c : boolean := true;
|
|
constant zba_en_c : boolean := true;
|
|
-- --------------------------- --
|
|
|
|
-- commands: Zbb - logic with negate --
|
constant op_andn_c : natural := 0;
|
constant op_andn_c : natural := 0;
|
constant op_orn_c : natural := 1;
|
constant op_orn_c : natural := 1;
|
constant op_xnor_c : natural := 2;
|
constant op_xnor_c : natural := 2;
|
-- commands: count leading/trailing zero bits --
|
-- commands: Zbb - count leading/trailing zero bits --
|
constant op_clz_c : natural := 3;
|
constant op_clz_c : natural := 3;
|
constant op_ctz_c : natural := 4;
|
constant op_ctz_c : natural := 4;
|
-- commands: count population --
|
-- commands: Zbb - count population --
|
constant op_cpop_c : natural := 5;
|
constant op_cpop_c : natural := 5;
|
-- commands: integer minimum/maximum --
|
-- commands: Zbb - integer minimum/maximum --
|
constant op_max_c : natural := 6; -- signed/unsigned
|
constant op_max_c : natural := 6; -- signed/unsigned
|
constant op_min_c : natural := 7; -- signed/unsigned
|
constant op_min_c : natural := 7; -- signed/unsigned
|
-- commands: sign- and zero-extension --
|
-- commands: Zbb - sign- and zero-extension --
|
constant op_sextb_c : natural := 8;
|
constant op_sextb_c : natural := 8;
|
constant op_sexth_c : natural := 9;
|
constant op_sexth_c : natural := 9;
|
constant op_zexth_c : natural := 10;
|
constant op_zexth_c : natural := 10;
|
-- commands: bitwise rotation --
|
-- commands: Zbb - bitwise rotation --
|
constant op_rol_c : natural := 11;
|
constant op_rol_c : natural := 11;
|
constant op_ror_c : natural := 12; -- rori
|
constant op_ror_c : natural := 12; -- rori
|
-- commands: or-combine --
|
-- commands: Zbb - or-combine --
|
constant op_orcb_c : natural := 13;
|
constant op_orcb_c : natural := 13;
|
-- commands: byte-reverse --
|
-- commands: Zbb - byte-reverse --
|
constant op_rev8_c : natural := 14;
|
constant op_rev8_c : natural := 14;
|
|
-- commands: Zba - shifted add --
|
|
constant op_sh1add_c : natural := 15;
|
|
constant op_sh2add_c : natural := 16;
|
|
constant op_sh3add_c : natural := 17;
|
--
|
--
|
constant op_width_c : natural := 15;
|
constant op_width_c : natural := 18;
|
|
|
-- controller --
|
-- controller --
|
type ctrl_state_t is (S_IDLE, S_START_SHIFT, S_BUSY_SHIFT);
|
type ctrl_state_t is (S_IDLE, S_START_SHIFT, S_BUSY_SHIFT);
|
signal ctrl_state : ctrl_state_t;
|
signal ctrl_state : ctrl_state_t;
|
signal cmd, cmd_buf : std_ulogic_vector(op_width_c-1 downto 0);
|
signal cmd, cmd_buf : std_ulogic_vector(op_width_c-1 downto 0);
|
signal valid : std_ulogic;
|
signal valid : std_ulogic;
|
|
|
-- operand buffers --
|
-- operand buffers --
|
signal rs1_reg : std_ulogic_vector(data_width_c-1 downto 0);
|
signal rs1_reg : std_ulogic_vector(data_width_c-1 downto 0);
|
signal rs2_reg : std_ulogic_vector(data_width_c-1 downto 0);
|
signal rs2_reg : std_ulogic_vector(data_width_c-1 downto 0);
|
|
signal sha_reg : std_ulogic_vector(index_size_f(data_width_c)-1 downto 0);
|
signal less_ff : std_ulogic;
|
signal less_ff : std_ulogic;
|
|
|
-- shift amount (immediate or register) --
|
|
signal shamt : std_ulogic_vector(index_size_f(data_width_c)-1 downto 0);
|
|
|
|
-- serial shifter --
|
-- serial shifter --
|
type shifter_t is record
|
type shifter_t is record
|
start : std_ulogic;
|
start : std_ulogic;
|
run : std_ulogic;
|
run : std_ulogic;
|
bcnt : std_ulogic_vector(index_size_f(data_width_c) downto 0); -- bit counter
|
bcnt : std_ulogic_vector(index_size_f(data_width_c) downto 0); -- bit counter
|
Line 126... |
Line 135... |
|
|
-- operation results --
|
-- operation results --
|
type res_t is array (0 to op_width_c-1) of std_ulogic_vector(data_width_c-1 downto 0);
|
type res_t is array (0 to op_width_c-1) of std_ulogic_vector(data_width_c-1 downto 0);
|
signal res_int, res_out : res_t;
|
signal res_int, res_out : res_t;
|
|
|
|
-- shifted-add unit --
|
|
signal adder_core : std_ulogic_vector(data_width_c-1 downto 0);
|
|
|
begin
|
begin
|
|
|
|
-- Sub-Extension Configuration ------------------------------------------------------------
|
|
-- -------------------------------------------------------------------------------------------
|
|
assert false report
|
|
"Implementing bit-manipulation (B) sub-extensions: " &
|
|
cond_sel_string_f(zbb_en_c, "Zbb", "") &
|
|
cond_sel_string_f(zba_en_c, "Zba", "") &
|
|
""
|
|
severity note;
|
|
|
|
|
-- Instruction Decoding (One-Hot) ---------------------------------------------------------
|
-- Instruction Decoding (One-Hot) ---------------------------------------------------------
|
-- -------------------------------------------------------------------------------------------
|
-- -------------------------------------------------------------------------------------------
|
-- a minimal decoding logic is used here -> just to distinguish between B.Zbb instructions
|
-- a minimal decoding logic is used here -> just to distinguish between B.Zbb instructions
|
-- a more specific decoding and instruction check is done by the CPU control unit
|
-- a more specific decoding and instruction check is done by the CPU control unit
|
|
|
-- Zbb - Basic bit-manipulation instructions --
|
-- Zbb - Basic bit-manipulation instructions --
|
cmd(op_andn_c) <= '1' when (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "10") and (ctrl_i(ctrl_ir_funct3_1_c downto ctrl_ir_funct3_0_c) = "11") else '0';
|
cmd(op_andn_c) <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "10") and (ctrl_i(ctrl_ir_funct3_1_c downto ctrl_ir_funct3_0_c) = "11") else '0';
|
cmd(op_orn_c) <= '1' when (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "10") and (ctrl_i(ctrl_ir_funct3_1_c downto ctrl_ir_funct3_0_c) = "10") else '0';
|
cmd(op_orn_c) <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "10") and (ctrl_i(ctrl_ir_funct3_1_c downto ctrl_ir_funct3_0_c) = "10") else '0';
|
cmd(op_xnor_c) <= '1' when (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "10") and (ctrl_i(ctrl_ir_funct3_1_c downto ctrl_ir_funct3_0_c) = "00") else '0';
|
cmd(op_xnor_c) <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "10") and (ctrl_i(ctrl_ir_funct3_1_c downto ctrl_ir_funct3_0_c) = "00") else '0';
|
--
|
--
|
cmd(op_max_c) <= '1' when (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "00") and (ctrl_i(ctrl_ir_funct12_5_c) = '1') and (ctrl_i(ctrl_ir_funct3_1_c) = '1') else '0';
|
cmd(op_max_c) <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "00") and (ctrl_i(ctrl_ir_funct12_5_c) = '1') and (ctrl_i(ctrl_ir_funct3_1_c) = '1') else '0';
|
cmd(op_min_c) <= '1' when (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "00") and (ctrl_i(ctrl_ir_funct12_5_c) = '1') and (ctrl_i(ctrl_ir_funct3_1_c) = '0') else '0';
|
cmd(op_min_c) <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "00") and (ctrl_i(ctrl_ir_funct12_5_c) = '1') and (ctrl_i(ctrl_ir_funct3_1_c) = '0') else '0';
|
cmd(op_zexth_c) <= '1' when (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "00") and (ctrl_i(ctrl_ir_funct12_5_c) = '0') else '0';
|
cmd(op_zexth_c) <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "00") and (ctrl_i(ctrl_ir_funct12_5_c) = '0') else '0';
|
--
|
--
|
cmd(op_orcb_c) <= '1' when (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "01") else '0';
|
cmd(op_orcb_c) <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "01") and (ctrl_i(ctrl_ir_funct12_7_c) = '1') else '0';
|
--
|
--
|
cmd(op_clz_c) <= '1' when (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "000") else '0';
|
cmd(op_clz_c) <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "000") else '0';
|
cmd(op_ctz_c) <= '1' when (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "001") else '0';
|
cmd(op_ctz_c) <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "001") else '0';
|
cmd(op_cpop_c) <= '1' when (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "010") else '0';
|
cmd(op_cpop_c) <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "010") else '0';
|
cmd(op_sextb_c) <= '1' when (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "100") else '0';
|
cmd(op_sextb_c) <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "100") else '0';
|
cmd(op_sexth_c) <= '1' when (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "101") else '0';
|
cmd(op_sexth_c) <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "101") else '0';
|
cmd(op_rol_c) <= '1' when (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) = "001") and (ctrl_i(ctrl_ir_opcode7_5_c) = '1') else '0';
|
cmd(op_rol_c) <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) = "001") and (ctrl_i(ctrl_ir_opcode7_5_c) = '1') else '0';
|
cmd(op_ror_c) <= '1' when (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) = "101") else '0';
|
cmd(op_ror_c) <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) = "101") else '0';
|
cmd(op_rev8_c) <= '1' when (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '1') else '0';
|
cmd(op_rev8_c) <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '1') else '0';
|
|
|
|
-- Zba - Address generation instructions --
|
|
cmd(op_sh1add_c) <= '1' when (zba_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "01") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_1_c) = "01") else '0';
|
|
cmd(op_sh2add_c) <= '1' when (zba_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "01") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_1_c) = "10") else '0';
|
|
cmd(op_sh3add_c) <= '1' when (zba_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "01") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_1_c) = "11") else '0';
|
|
|
|
|
-- Co-Processor Controller ----------------------------------------------------------------
|
-- Co-Processor Controller ----------------------------------------------------------------
|
-- -------------------------------------------------------------------------------------------
|
-- -------------------------------------------------------------------------------------------
|
coprocessor_ctrl: process(rstn_i, clk_i)
|
coprocessor_ctrl: process(rstn_i, clk_i)
|
Line 163... |
Line 190... |
if (rstn_i = '0') then
|
if (rstn_i = '0') then
|
ctrl_state <= S_IDLE;
|
ctrl_state <= S_IDLE;
|
cmd_buf <= (others => def_rst_val_c);
|
cmd_buf <= (others => def_rst_val_c);
|
rs1_reg <= (others => def_rst_val_c);
|
rs1_reg <= (others => def_rst_val_c);
|
rs2_reg <= (others => def_rst_val_c);
|
rs2_reg <= (others => def_rst_val_c);
|
|
sha_reg <= (others => def_rst_val_c);
|
less_ff <= def_rst_val_c;
|
less_ff <= def_rst_val_c;
|
shifter.start <= '0';
|
shifter.start <= '0';
|
valid <= '0';
|
valid <= '0';
|
elsif rising_edge(clk_i) then
|
elsif rising_edge(clk_i) then
|
-- defaults --
|
-- defaults --
|
Line 181... |
Line 209... |
if (start_i = '1') then
|
if (start_i = '1') then
|
less_ff <= cmp_i(cmp_less_c);
|
less_ff <= cmp_i(cmp_less_c);
|
cmd_buf <= cmd;
|
cmd_buf <= cmd;
|
rs1_reg <= rs1_i;
|
rs1_reg <= rs1_i;
|
rs2_reg <= rs2_i;
|
rs2_reg <= rs2_i;
|
|
sha_reg <= shamt_i;
|
if ((cmd(op_clz_c) or cmd(op_ctz_c) or cmd(op_cpop_c) or cmd(op_ror_c) or cmd(op_rol_c)) = '1') then -- multi-cycle shift operation
|
if ((cmd(op_clz_c) or cmd(op_ctz_c) or cmd(op_cpop_c) or cmd(op_ror_c) or cmd(op_rol_c)) = '1') then -- multi-cycle shift operation
|
if (FAST_SHIFT_EN = false) then -- default: iterative computation
|
if (FAST_SHIFT_EN = false) then -- default: iterative computation
|
shifter.start <= '1';
|
shifter.start <= '1';
|
ctrl_state <= S_START_SHIFT;
|
ctrl_state <= S_START_SHIFT;
|
else -- full-parallel computation
|
else -- full-parallel computation
|
Line 214... |
Line 243... |
end case;
|
end case;
|
end if;
|
end if;
|
end process coprocessor_ctrl;
|
end process coprocessor_ctrl;
|
|
|
|
|
-- Shift Amount ---------------------------------------------------------------------------
|
|
-- -------------------------------------------------------------------------------------------
|
|
-- we could also use ALU's internal operand B - but we are having a local version here in order to allow
|
|
-- better logic combination inside the ALU (since that is the critical path of the CPU)
|
|
shamt <= ctrl_i(ctrl_ir_funct12_0_c+shamt'left downto ctrl_ir_funct12_0_c) when (ctrl_i(ctrl_ir_opcode7_5_c) = '0') else rs2_reg(shamt'left downto 0);
|
|
|
|
|
|
-- Shifter Function Core (iterative: small but slow) --------------------------------------
|
-- Shifter Function Core (iterative: small but slow) --------------------------------------
|
-- -------------------------------------------------------------------------------------------
|
-- -------------------------------------------------------------------------------------------
|
serial_shifter:
|
serial_shifter:
|
if (FAST_SHIFT_EN = false) generate
|
if (FAST_SHIFT_EN = false) generate
|
shifter_unit: process(rstn_i, clk_i)
|
shifter_unit: process(rstn_i, clk_i)
|
Line 247... |
Line 269... |
-- max shift amount --
|
-- max shift amount --
|
if (cmd_buf(op_cpop_c) = '1') then -- population count
|
if (cmd_buf(op_cpop_c) = '1') then -- population count
|
shifter.cnt_max <= (others => '0');
|
shifter.cnt_max <= (others => '0');
|
shifter.cnt_max(shifter.cnt_max'left) <= '1';
|
shifter.cnt_max(shifter.cnt_max'left) <= '1';
|
else
|
else
|
shifter.cnt_max <= '0' & shamt;
|
shifter.cnt_max <= '0' & sha_reg;
|
end if;
|
end if;
|
shifter.bcnt <= (others => '0');
|
shifter.bcnt <= (others => '0');
|
elsif (shifter.run = '1') then -- right shifts only
|
elsif (shifter.run = '1') then -- right shifts only
|
new_bit_v := ((cmd_buf(op_ror_c) or cmd_buf(op_rol_c)) and shifter.sreg(0)) or (cmd_buf(op_clz_c) or cmd_buf(op_ctz_c));
|
new_bit_v := ((cmd_buf(op_ror_c) or cmd_buf(op_rol_c)) and shifter.sreg(0)) or (cmd_buf(op_clz_c) or cmd_buf(op_ctz_c));
|
shifter.sreg <= new_bit_v & shifter.sreg(shifter.sreg'left downto 1); -- ro[r/l]/lsr(for counting)
|
shifter.sreg <= new_bit_v & shifter.sreg(shifter.sreg'left downto 1); -- ro[r/l]/lsr(for counting)
|
Line 311... |
Line 333... |
end generate;
|
end generate;
|
|
|
-- barrel shifter array --
|
-- barrel shifter array --
|
barrel_shifter_async:
|
barrel_shifter_async:
|
if (FAST_SHIFT_EN = true) generate
|
if (FAST_SHIFT_EN = true) generate
|
shifter_unit_async: process(rs1_reg, shamt, cmd_buf, bs_level)
|
shifter_unit_async: process(rs1_reg, sha_reg, cmd_buf, bs_level)
|
begin
|
begin
|
-- input level: convert left shifts to right shifts --
|
-- input level: convert left shifts to right shifts --
|
if (cmd_buf(op_rol_c) = '1') then -- is left shift?
|
if (cmd_buf(op_rol_c) = '1') then -- is left shift?
|
bs_level(index_size_f(data_width_c)) <= bit_rev_f(rs1_reg); -- reverse bit order of input operand
|
bs_level(index_size_f(data_width_c)) <= bit_rev_f(rs1_reg); -- reverse bit order of input operand
|
else
|
else
|
bs_level(index_size_f(data_width_c)) <= rs1_reg;
|
bs_level(index_size_f(data_width_c)) <= rs1_reg;
|
end if;
|
end if;
|
|
|
-- shifter array --
|
-- shifter array --
|
for i in index_size_f(data_width_c)-1 downto 0 loop
|
for i in index_size_f(data_width_c)-1 downto 0 loop
|
if (shamt(i) = '1') then
|
if (sha_reg(i) = '1') then
|
bs_level(i)(data_width_c-1 downto data_width_c-(2**i)) <= bs_level(i+1)((2**i)-1 downto 0);
|
bs_level(i)(data_width_c-1 downto data_width_c-(2**i)) <= bs_level(i+1)((2**i)-1 downto 0);
|
bs_level(i)((data_width_c-(2**i))-1 downto 0) <= bs_level(i+1)(data_width_c-1 downto 2**i);
|
bs_level(i)((data_width_c-(2**i))-1 downto 0) <= bs_level(i+1)(data_width_c-1 downto 2**i);
|
else
|
else
|
bs_level(i) <= bs_level(i+1);
|
bs_level(i) <= bs_level(i+1);
|
end if;
|
end if;
|
end loop;
|
end loop;
|
end process shifter_unit_async;
|
end process shifter_unit_async;
|
end generate;
|
end generate;
|
|
|
|
|
|
-- Shifted-Add Core -----------------------------------------------------------------------
|
|
-- -------------------------------------------------------------------------------------------
|
|
shift_adder: process(rs1_reg, rs2_reg, ctrl_i)
|
|
variable opb_v : std_ulogic_vector(data_width_c-1 downto 0);
|
|
begin
|
|
case ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_1_c) is
|
|
when "01" => opb_v := rs1_reg(rs1_reg'left-1 downto 0) & '0'; -- << 1
|
|
when "10" => opb_v := rs1_reg(rs1_reg'left-2 downto 0) & "00"; -- << 2
|
|
when "11" => opb_v := rs1_reg(rs1_reg'left-3 downto 0) & "000"; -- << 3
|
|
when others => opb_v := rs1_reg(rs1_reg'left-1 downto 0) & '0'; -- undefined
|
|
end case;
|
|
adder_core <= std_ulogic_vector(unsigned(rs2_reg) + unsigned(opb_v));
|
|
end process shift_adder;
|
|
|
|
|
-- Operation Results ----------------------------------------------------------------------
|
-- Operation Results ----------------------------------------------------------------------
|
-- -------------------------------------------------------------------------------------------
|
-- -------------------------------------------------------------------------------------------
|
-- logic with negate --
|
-- logic with negate --
|
res_int(op_andn_c) <= rs1_reg and (not rs2_reg); -- logical and-not
|
res_int(op_andn_c) <= rs1_reg and (not rs2_reg); -- logical and-not
|
res_int(op_orn_c) <= rs1_reg or (not rs2_reg); -- logical or-not
|
res_int(op_orn_c) <= rs1_reg or (not rs2_reg); -- logical or-not
|
Line 374... |
Line 411... |
end generate; -- i
|
end generate; -- i
|
|
|
-- reversal.8 (byte swap) --
|
-- reversal.8 (byte swap) --
|
res_int(op_rev8_c) <= bswap32_f(rs1_reg);
|
res_int(op_rev8_c) <= bswap32_f(rs1_reg);
|
|
|
|
-- address generation instructions --
|
|
res_int(op_sh1add_c) <= adder_core;
|
|
res_int(op_sh2add_c) <= (others => '0'); -- unused/redundant
|
|
res_int(op_sh3add_c) <= (others => '0'); -- unused/redundant
|
|
|
|
|
-- Output Selector ------------------------------------------------------------------------
|
-- Output Selector ------------------------------------------------------------------------
|
-- -------------------------------------------------------------------------------------------
|
-- -------------------------------------------------------------------------------------------
|
res_out(op_andn_c) <= res_int(op_andn_c) when (cmd_buf(op_andn_c) = '1') else (others => '0');
|
res_out(op_andn_c) <= res_int(op_andn_c) when (cmd_buf(op_andn_c) = '1') else (others => '0');
|
res_out(op_orn_c) <= res_int(op_orn_c) when (cmd_buf(op_orn_c) = '1') else (others => '0');
|
res_out(op_orn_c) <= res_int(op_orn_c) when (cmd_buf(op_orn_c) = '1') else (others => '0');
|
Line 392... |
Line 434... |
res_out(op_zexth_c) <= res_int(op_zexth_c) when (cmd_buf(op_zexth_c) = '1') else (others => '0');
|
res_out(op_zexth_c) <= res_int(op_zexth_c) when (cmd_buf(op_zexth_c) = '1') else (others => '0');
|
res_out(op_ror_c) <= res_int(op_ror_c) when (cmd_buf(op_ror_c) = '1') else (others => '0');
|
res_out(op_ror_c) <= res_int(op_ror_c) when (cmd_buf(op_ror_c) = '1') else (others => '0');
|
res_out(op_rol_c) <= res_int(op_rol_c) when (cmd_buf(op_rol_c) = '1') else (others => '0');
|
res_out(op_rol_c) <= res_int(op_rol_c) when (cmd_buf(op_rol_c) = '1') else (others => '0');
|
res_out(op_orcb_c) <= res_int(op_orcb_c) when (cmd_buf(op_orcb_c) = '1') else (others => '0');
|
res_out(op_orcb_c) <= res_int(op_orcb_c) when (cmd_buf(op_orcb_c) = '1') else (others => '0');
|
res_out(op_rev8_c) <= res_int(op_rev8_c) when (cmd_buf(op_rev8_c) = '1') else (others => '0');
|
res_out(op_rev8_c) <= res_int(op_rev8_c) when (cmd_buf(op_rev8_c) = '1') else (others => '0');
|
|
--
|
|
res_out(op_sh1add_c) <= res_int(op_sh1add_c) when ((cmd_buf(op_sh1add_c) or cmd_buf(op_sh2add_c) or cmd_buf(op_sh3add_c)) = '1') else (others => '0');
|
|
res_out(op_sh2add_c) <= (others => '0'); -- unused/redundant
|
|
res_out(op_sh3add_c) <= (others => '0'); -- unused/redundant
|
|
|
|
|
-- Output Gate ----------------------------------------------------------------------------
|
-- Output Gate ----------------------------------------------------------------------------
|
-- -------------------------------------------------------------------------------------------
|
-- -------------------------------------------------------------------------------------------
|
output_gate: process(rstn_i, clk_i)
|
output_gate: process(rstn_i, clk_i)
|
Line 408... |
Line 454... |
res_o <= res_out(op_andn_c) or res_out(op_orn_c) or res_out(op_xnor_c) or
|
res_o <= res_out(op_andn_c) or res_out(op_orn_c) or res_out(op_xnor_c) or
|
res_out(op_clz_c) or res_out(op_cpop_c) or -- res_out(op_ctz_c) is unused here
|
res_out(op_clz_c) or res_out(op_cpop_c) or -- res_out(op_ctz_c) is unused here
|
res_out(op_min_c) or -- res_out(op_max_c) is unused here
|
res_out(op_min_c) or -- res_out(op_max_c) is unused here
|
res_out(op_sextb_c) or res_out(op_sexth_c) or res_out(op_zexth_c) or
|
res_out(op_sextb_c) or res_out(op_sexth_c) or res_out(op_zexth_c) or
|
res_out(op_ror_c) or res_out(op_rol_c) or
|
res_out(op_ror_c) or res_out(op_rol_c) or
|
res_out(op_orcb_c) or res_out(op_rev8_c);
|
res_out(op_orcb_c) or res_out(op_rev8_c) or
|
|
res_out(op_sh1add_c); -- res_out(op_sh2add_c) and res_out(op_sh3add_c) are unused here
|
end if;
|
end if;
|
end if;
|
end if;
|
end process output_gate;
|
end process output_gate;
|
|
|
-- valid output --
|
-- valid output --
|