OpenCores
URL https://opencores.org/ocsvn/neorv32/neorv32/trunk

Subversion Repositories neorv32

[/] [neorv32/] [trunk/] [rtl/] [core/] [neorv32_cpu_cp_bitmanip.vhd] - Diff between revs 66 and 71

Go to most recent revision | Show entire file | Details | Blame | View Log

Rev 66 Rev 71
Line 1... Line 1...
-- #################################################################################################
-- #################################################################################################
-- # << NEORV32 - CPU Co-Processor: Bit-Manipulation Co-Processor Unit (RISC-V "B" Extension) >>   #
-- # << NEORV32 - CPU Co-Processor: Bit-Manipulation Co-Processor Unit (RISC-V "B" Extension) >>   #
-- # ********************************************************************************************* #
-- # ********************************************************************************************* #
-- # The bit manipulation unit is implemented as co-processor that has a processing latency of 1   #
-- # Supported B sub-extensions (Zb*):                                                             #
-- # cycle for logic/arithmetic operations and 3+shamt (=shift amount) cycles for shift(-related)  #
-- # - Zba: Address-generation instructions                                                        #
-- # operations. Use the FAST_SHIFT_EN option to reduce shift-related instruction's latency to a   #
 
-- # fixed value of 3 cycles latency (using barrel shifters).                                      #
 
-- #                                                                                               #
 
-- # Supported sub-extensions (Zb*):                                                               #
 
-- # - Zba: Address generation instructions                                                        #
 
-- # - Zbb: Basic bit-manipulation instructions                                                    #
-- # - Zbb: Basic bit-manipulation instructions                                                    #
 
-- # - Zbs: Single-bit instructions                                                                #
 
-- # - Zbc: Carry-less multiplication instructions                                                 #
 
-- #                                                                                               #
 
-- # NOTE: This is a first implementation of the bit-manipulation co-processor that supports all   #
 
-- #       sub-sets of the B extension. Hence, it is not yet optimized for area, latency or speed. #
-- # ********************************************************************************************* #
-- # ********************************************************************************************* #
-- # BSD 3-Clause License                                                                          #
-- # BSD 3-Clause License                                                                          #
-- #                                                                                               #
-- #                                                                                               #
-- # Copyright (c) 2021, Stephan Nolting. All rights reserved.                                     #
-- # Copyright (c) 2022, Stephan Nolting. All rights reserved.                                     #
-- #                                                                                               #
-- #                                                                                               #
-- # Redistribution and use in source and binary forms, with or without modification, are          #
-- # Redistribution and use in source and binary forms, with or without modification, are          #
-- # permitted provided that the following conditions are met:                                     #
-- # permitted provided that the following conditions are met:                                     #
-- #                                                                                               #
-- #                                                                                               #
-- # 1. Redistributions of source code must retain the above copyright notice, this list of        #
-- # 1. Redistributions of source code must retain the above copyright notice, this list of        #
Line 69... Line 69...
  );
  );
end neorv32_cpu_cp_bitmanip;
end neorv32_cpu_cp_bitmanip;
 
 
architecture neorv32_cpu_cp_bitmanip_rtl of neorv32_cpu_cp_bitmanip is
architecture neorv32_cpu_cp_bitmanip_rtl of neorv32_cpu_cp_bitmanip is
 
 
  -- Sub-extension configuration --
  -- Sub-extension configuration ----------------------------
 
  -- Note that this configurations does NOT effect the CPU's (illegal) instruction decoding logic!
  constant zbb_en_c : boolean := true;
  constant zbb_en_c : boolean := true;
  constant zba_en_c : boolean := true;
  constant zba_en_c : boolean := true;
  -- --------------------------- --
  constant zbc_en_c : boolean := true;
 
  constant zbs_en_c : boolean := true;
 
  -- --------------------------------------------------------
 
 
  -- commands: Zbb - logic with negate --
  -- Zbb - logic with negate --
  constant op_andn_c    : natural := 0;
  constant op_andn_c    : natural := 0;
  constant op_orn_c     : natural := 1;
  constant op_orn_c     : natural := 1;
  constant op_xnor_c    : natural := 2;
  constant op_xnor_c    : natural := 2;
  -- commands: Zbb - count leading/trailing zero bits --
  -- Zbb - count leading/trailing zero bits --
  constant op_clz_c     : natural := 3;
  constant op_clz_c     : natural := 3;
  constant op_ctz_c     : natural := 4;
  constant op_ctz_c     : natural := 4;
  -- commands: Zbb - count population --
  -- Zbb - count population --
  constant op_cpop_c    : natural := 5;
  constant op_cpop_c    : natural := 5;
  -- commands: Zbb - integer minimum/maximum --
  -- Zbb - integer minimum/maximum --
  constant op_max_c     : natural := 6; -- signed/unsigned
  constant op_max_c     : natural := 6; -- signed/unsigned
  constant op_min_c     : natural := 7; -- signed/unsigned
  constant op_min_c     : natural := 7; -- signed/unsigned
  -- commands: Zbb - sign- and zero-extension --
  -- Zbb - sign- and zero-extension --
  constant op_sextb_c   : natural := 8;
  constant op_sextb_c   : natural := 8;
  constant op_sexth_c   : natural := 9;
  constant op_sexth_c   : natural := 9;
  constant op_zexth_c   : natural := 10;
  constant op_zexth_c   : natural := 10;
  -- commands: Zbb - bitwise rotation --
  -- Zbb - bitwise rotation --
  constant op_rol_c     : natural := 11;
  constant op_rol_c     : natural := 11;
  constant op_ror_c     : natural := 12; -- rori
  constant op_ror_c     : natural := 12; -- also rori
  -- commands: Zbb - or-combine --
  -- Zbb - or-combine --
  constant op_orcb_c    : natural := 13;
  constant op_orcb_c    : natural := 13;
  -- commands: Zbb - byte-reverse --
  -- Zbb - byte-reverse --
  constant op_rev8_c    : natural := 14;
  constant op_rev8_c    : natural := 14;
  -- commands: Zba - shifted add --
  -- Zba - shifted-add --
  constant op_sh1add_c  : natural := 15;
  constant op_sh1add_c  : natural := 15;
  constant op_sh2add_c  : natural := 16;
  constant op_sh2add_c  : natural := 16;
  constant op_sh3add_c  : natural := 17;
  constant op_sh3add_c  : natural := 17;
 
  -- Zbs - single-bit operations --
 
  constant op_bclr_c    : natural := 18;
 
  constant op_bext_c    : natural := 19;
 
  constant op_binv_c    : natural := 20;
 
  constant op_bset_c    : natural := 21;
 
  -- Zbc - carry-less multiplication --
 
  constant op_clmul_c   : natural := 22;
 
  constant op_clmulh_c  : natural := 23;
 
  constant op_clmulr_c  : natural := 24;
  --
  --
  constant op_width_c   : natural := 18;
  constant op_width_c   : natural := 25;
 
 
  -- controller --
  -- controller --
  type ctrl_state_t is (S_IDLE, S_START_SHIFT, S_BUSY_SHIFT);
  type ctrl_state_t is (S_IDLE, S_START_SHIFT, S_BUSY_SHIFT, S_START_CLMUL, S_BUSY_CLMUL);
  signal ctrl_state   : ctrl_state_t;
  signal ctrl_state   : ctrl_state_t;
  signal cmd, cmd_buf : std_ulogic_vector(op_width_c-1 downto 0);
  signal cmd, cmd_buf : std_ulogic_vector(op_width_c-1 downto 0);
  signal valid        : std_ulogic;
  signal valid        : std_ulogic;
 
 
  -- operand buffers --
  -- operand buffers --
Line 138... Line 150...
  signal res_int, res_out : res_t;
  signal res_int, res_out : res_t;
 
 
  -- shifted-add unit --
  -- shifted-add unit --
  signal adder_core : std_ulogic_vector(data_width_c-1 downto 0);
  signal adder_core : std_ulogic_vector(data_width_c-1 downto 0);
 
 
 
  -- one-hot shifter --
 
  signal one_hot_core : std_ulogic_vector(data_width_c-1 downto 0);
 
 
 
  -- carry-less multiplier --
 
  type clmultiplier_t is record
 
    start : std_ulogic;
 
    busy  : std_ulogic;
 
    rs2   : std_ulogic_vector(data_width_c-1 downto 0);
 
    cnt   : std_ulogic_vector(index_size_f(data_width_c) downto 0);
 
    prod  : std_ulogic_vector(2*data_width_c-1 downto 0);
 
  end record;
 
  signal clmul : clmultiplier_t;
 
 
begin
begin
 
 
  -- Sub-Extension Configuration ------------------------------------------------------------
  -- Sub-Extension Configuration ------------------------------------------------------------
  -- -------------------------------------------------------------------------------------------
  -- -------------------------------------------------------------------------------------------
  assert false report
  assert false report
  "Implementing bit-manipulation (B) sub-extensions: " &
  "NEORV32 CPU: Implementing bit-manipulation (B) sub-extensions " &
  cond_sel_string_f(zbb_en_c, "Zbb", "") &
 
  cond_sel_string_f(zba_en_c, "Zba", "") &
  cond_sel_string_f(zba_en_c, "Zba", "") &
 
  cond_sel_string_f(zbb_en_c, "Zbb ", "") &
 
  cond_sel_string_f(zbc_en_c, "Zbc ", "") &
 
  cond_sel_string_f(zbs_en_c, "Zbs ", "") &
  ""
  ""
  severity note;
  severity note;
 
 
 
 
  -- Instruction Decoding (One-Hot) ---------------------------------------------------------
  -- Instruction Decoding (One-Hot) ---------------------------------------------------------
  -- -------------------------------------------------------------------------------------------
  -- -------------------------------------------------------------------------------------------
  -- a minimal decoding logic is used here -> just to distinguish between B.Zbb instructions
  -- a minimal decoding logic is used here just to distinguish between the different B instruction
  -- a more specific decoding and instruction check is done by the CPU control unit
  -- a more precise decoding and valid-instruction check is done by the CPU control unit
 
 
  -- Zbb - Basic bit-manipulation instructions --
  -- Zbb - Basic bit-manipulation instructions --
  cmd(op_andn_c)   <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "10") and (ctrl_i(ctrl_ir_funct3_1_c downto ctrl_ir_funct3_0_c) = "11") else '0';
  cmd(op_andn_c)   <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "10") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_1_c downto ctrl_ir_funct3_0_c) = "11") else '0';
  cmd(op_orn_c)    <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "10") and (ctrl_i(ctrl_ir_funct3_1_c downto ctrl_ir_funct3_0_c) = "10") else '0';
  cmd(op_orn_c)    <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "10") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_1_c downto ctrl_ir_funct3_0_c) = "10") else '0';
  cmd(op_xnor_c)   <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "10") and (ctrl_i(ctrl_ir_funct3_1_c downto ctrl_ir_funct3_0_c) = "00") else '0';
  cmd(op_xnor_c)   <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "10") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_1_c downto ctrl_ir_funct3_0_c) = "00") else '0';
  --
  --
  cmd(op_max_c)    <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "00") and (ctrl_i(ctrl_ir_funct12_5_c) = '1') and (ctrl_i(ctrl_ir_funct3_1_c) = '1') else '0';
  cmd(op_max_c)    <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "00") and (ctrl_i(ctrl_ir_funct12_5_c) = '1') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_1_c) = "11") else '0';
  cmd(op_min_c)    <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "00") and (ctrl_i(ctrl_ir_funct12_5_c) = '1') and (ctrl_i(ctrl_ir_funct3_1_c) = '0') else '0';
  cmd(op_min_c)    <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "00") and (ctrl_i(ctrl_ir_funct12_5_c) = '1') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_1_c) = "10") else '0';
  cmd(op_zexth_c)  <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "00") and (ctrl_i(ctrl_ir_funct12_5_c) = '0') else '0';
  cmd(op_zexth_c)  <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "00") and (ctrl_i(ctrl_ir_funct12_5_c) = '0') else '0';
  --
  --
  cmd(op_orcb_c)   <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "01") and (ctrl_i(ctrl_ir_funct12_7_c) = '1') else '0';
  cmd(op_orcb_c)   <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "01") and (ctrl_i(ctrl_ir_funct12_7_c) = '1') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) = "101") else '0';
  --
  --
  cmd(op_clz_c)    <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "000") else '0';
  cmd(op_clz_c)    <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "000") else '0';
  cmd(op_ctz_c)    <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "001") else '0';
  cmd(op_ctz_c)    <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "001") else '0';
  cmd(op_cpop_c)   <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "010") else '0';
  cmd(op_cpop_c)   <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "010") and (ctrl_i(ctrl_ir_opcode7_5_c) = '0') else '0';
  cmd(op_sextb_c)  <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "100") else '0';
  cmd(op_sextb_c)  <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "100") else '0';
  cmd(op_sexth_c)  <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "101") else '0';
  cmd(op_sexth_c)  <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "101") else '0';
  cmd(op_rol_c)    <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) = "001") and (ctrl_i(ctrl_ir_opcode7_5_c) = '1') else '0';
  cmd(op_rol_c)    <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) = "001") and (ctrl_i(ctrl_ir_opcode7_5_c) = '1') else '0';
  cmd(op_ror_c)    <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) = "101") else '0';
  cmd(op_ror_c)    <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) = "101") else '0';
  cmd(op_rev8_c)   <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '1') else '0';
  cmd(op_rev8_c)   <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '1') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) = "101") else '0';
 
 
  -- Zba - Address generation instructions --
  -- Zba - Address generation instructions --
  cmd(op_sh1add_c) <= '1' when (zba_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "01") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_1_c) = "01") else '0';
  cmd(op_sh1add_c) <= '1' when (zba_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "01") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_1_c) = "01") else '0';
  cmd(op_sh2add_c) <= '1' when (zba_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "01") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_1_c) = "10") else '0';
  cmd(op_sh2add_c) <= '1' when (zba_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "01") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_1_c) = "10") else '0';
  cmd(op_sh3add_c) <= '1' when (zba_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "01") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_1_c) = "11") else '0';
  cmd(op_sh3add_c) <= '1' when (zba_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "01") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_1_c) = "11") else '0';
 
 
 
  -- Zbs - Single-bit instructions --
 
  cmd(op_bclr_c)   <= '1' when (zbs_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "10") and (ctrl_i(ctrl_ir_funct12_7_c) = '1') and (ctrl_i(ctrl_ir_funct3_2_c) = '0') else '0';
 
  cmd(op_bext_c)   <= '1' when (zbs_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "10") and (ctrl_i(ctrl_ir_funct12_7_c) = '1') and (ctrl_i(ctrl_ir_funct3_2_c) = '1') else '0';
 
  cmd(op_binv_c)   <= '1' when (zbs_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '1') and (ctrl_i(ctrl_ir_funct3_2_c) = '0') else '0';
 
  cmd(op_bset_c)   <= '1' when (zbs_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "01") and (ctrl_i(ctrl_ir_funct12_7_c) = '1') and (ctrl_i(ctrl_ir_funct3_2_c) = '0') else '0';
 
 
 
  -- Zbc - Carry-less multiplication instructions --
 
  cmd(op_clmul_c)  <= '1' when (zbc_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "00") and (ctrl_i(ctrl_ir_funct12_5_c) = '1') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) = "001") else '0';
 
  cmd(op_clmulh_c) <= '1' when (zbc_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "00") and (ctrl_i(ctrl_ir_funct12_5_c) = '1') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) = "011") else '0';
 
  cmd(op_clmulr_c) <= '1' when (zbc_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "00") and (ctrl_i(ctrl_ir_funct12_5_c) = '1') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) = "010") else '0';
 
 
 
 
  -- Co-Processor Controller ----------------------------------------------------------------
  -- Co-Processor Controller ----------------------------------------------------------------
  -- -------------------------------------------------------------------------------------------
  -- -------------------------------------------------------------------------------------------
  coprocessor_ctrl: process(rstn_i, clk_i)
  coprocessor_ctrl: process(rstn_i, clk_i)
  begin
  begin
Line 192... Line 230...
      cmd_buf       <= (others => def_rst_val_c);
      cmd_buf       <= (others => def_rst_val_c);
      rs1_reg       <= (others => def_rst_val_c);
      rs1_reg       <= (others => def_rst_val_c);
      rs2_reg       <= (others => def_rst_val_c);
      rs2_reg       <= (others => def_rst_val_c);
      sha_reg       <= (others => def_rst_val_c);
      sha_reg       <= (others => def_rst_val_c);
      less_ff       <= def_rst_val_c;
      less_ff       <= def_rst_val_c;
 
      clmul.start   <= '0';
      shifter.start <= '0';
      shifter.start <= '0';
      valid         <= '0';
      valid         <= '0';
    elsif rising_edge(clk_i) then
    elsif rising_edge(clk_i) then
      -- defaults --
      -- defaults --
      shifter.start <= '0';
      shifter.start <= '0';
 
      clmul.start   <= '0';
      valid         <= '0';
      valid         <= '0';
 
 
      -- fsm --
      -- fsm --
      case ctrl_state is
      case ctrl_state is
 
 
Line 217... Line 257...
                shifter.start <= '1';
                shifter.start <= '1';
                ctrl_state <= S_START_SHIFT;
                ctrl_state <= S_START_SHIFT;
              else -- full-parallel computation
              else -- full-parallel computation
                ctrl_state <= S_BUSY_SHIFT;
                ctrl_state <= S_BUSY_SHIFT;
              end if;
              end if;
 
            elsif (zbc_en_c = true) and ((cmd(op_clmul_c) or cmd(op_clmulh_c) or cmd(op_clmulr_c)) = '1') then -- multi-cycle clmul operation
 
              clmul.start <= '1';
 
              ctrl_state  <= S_START_CLMUL;
            else
            else
              valid      <= '1';
              valid      <= '1';
              ctrl_state <= S_IDLE;
              ctrl_state <= S_IDLE;
            end if;
            end if;
          end if;
          end if;
Line 229... Line 272...
        -- ------------------------------------------------------------
        -- ------------------------------------------------------------
          ctrl_state <= S_BUSY_SHIFT;
          ctrl_state <= S_BUSY_SHIFT;
 
 
        when S_BUSY_SHIFT => -- wait for multi-cycle shift operation to finish
        when S_BUSY_SHIFT => -- wait for multi-cycle shift operation to finish
        -- ------------------------------------------------------------
        -- ------------------------------------------------------------
          if (shifter.run = '0') then
          if (shifter.run = '0') or (ctrl_i(ctrl_trap_c) = '1') then -- abort on trap
 
            valid      <= '1';
 
            ctrl_state <= S_IDLE;
 
          end if;
 
 
 
        when S_START_CLMUL => -- one cycle delay to start clmul operation
 
        -- ------------------------------------------------------------
 
          ctrl_state <= S_BUSY_CLMUL;
 
 
 
        when S_BUSY_CLMUL => -- wait for multi-cycle clmul operation to finish
 
        -- ------------------------------------------------------------
 
          if (clmul.busy = '0') or (ctrl_i(ctrl_trap_c) = '1') then -- abort on trap
            valid      <= '1';
            valid      <= '1';
            ctrl_state <= S_IDLE;
            ctrl_state <= S_IDLE;
          end if;
          end if;
 
 
        when others => -- undefined
        when others => -- undefined
Line 364... Line 418...
  begin
  begin
    case ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_1_c) is
    case ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_1_c) is
      when "01"   => opb_v := rs1_reg(rs1_reg'left-1 downto 0) & '0';   -- << 1
      when "01"   => opb_v := rs1_reg(rs1_reg'left-1 downto 0) & '0';   -- << 1
      when "10"   => opb_v := rs1_reg(rs1_reg'left-2 downto 0) & "00";  -- << 2
      when "10"   => opb_v := rs1_reg(rs1_reg'left-2 downto 0) & "00";  -- << 2
      when "11"   => opb_v := rs1_reg(rs1_reg'left-3 downto 0) & "000"; -- << 3
      when "11"   => opb_v := rs1_reg(rs1_reg'left-3 downto 0) & "000"; -- << 3
      when others => opb_v := rs1_reg(rs1_reg'left-1 downto 0) & '0';   -- undefined
      when others => opb_v := (others => '-'); -- undefined
    end case;
    end case;
    adder_core <= std_ulogic_vector(unsigned(rs2_reg) + unsigned(opb_v));
    adder_core <= std_ulogic_vector(unsigned(rs2_reg) + unsigned(opb_v));
  end process shift_adder;
  end process shift_adder;
 
 
 
 
 
  -- One-Hot Generator Core -----------------------------------------------------------------
 
  -- -------------------------------------------------------------------------------------------
 
  shift_one_hot: process(sha_reg)
 
  begin
 
    one_hot_core <= (others => '0');
 
    if (zbs_en_c = true) then
 
      one_hot_core(to_integer(unsigned(sha_reg))) <= '1';
 
    end if;
 
  end process shift_one_hot;
 
 
 
 
 
  -- Carry-Less Multiplication Core ---------------------------------------------------------
 
  -- -------------------------------------------------------------------------------------------
 
  clmul_core: process(rstn_i, clk_i)
 
  begin
 
    if (rstn_i = '0') then
 
      clmul.cnt  <= (others => def_rst_val_c);
 
      clmul.prod <= (others => def_rst_val_c);
 
    elsif rising_edge(clk_i) then
 
      if (clmul.start = '1') then -- start new multiplication
 
        clmul.cnt                 <= (others => '0');
 
        clmul.cnt(clmul.cnt'left) <= '1';
 
        clmul.prod(63 downto 32)  <= (others => '0');
 
        if (cmd_buf(op_clmulr_c) = '1') then -- reverse input operands?
 
          clmul.prod(31 downto 00) <= bit_rev_f(rs1_reg);
 
        else
 
          clmul.prod(31 downto 00) <= rs1_reg;
 
        end if;
 
      elsif (clmul.busy = '1') then -- processing
 
        clmul.cnt <= std_ulogic_vector(unsigned(clmul.cnt) - 1);
 
        if (clmul.prod(0) = '1') then
 
          clmul.prod(62 downto 31) <= clmul.prod(63 downto 32) xor clmul.rs2;
 
        else
 
          clmul.prod(62 downto 31) <= clmul.prod(63 downto 32);
 
        end if;
 
        clmul.prod(30 downto 00) <= clmul.prod(31 downto 1);
 
      end if;
 
    end if;
 
  end process clmul_core;
 
 
 
  -- reverse input operands? --
 
  clmul.rs2 <= bit_rev_f(rs2_reg) when (cmd_buf(op_clmulr_c) = '1') else rs2_reg;
 
 
 
  -- multiplier busy? --
 
  clmul.busy <= or_reduce_f(clmul.cnt);
 
 
 
 
  -- Operation Results ----------------------------------------------------------------------
  -- Operation Results ----------------------------------------------------------------------
  -- -------------------------------------------------------------------------------------------
  -- -------------------------------------------------------------------------------------------
  -- logic with negate --
  -- logic with negate --
  res_int(op_andn_c) <= rs1_reg and (not rs2_reg); -- logical and-not
  res_int(op_andn_c) <= rs1_reg and (not rs2_reg);
  res_int(op_orn_c)  <= rs1_reg or  (not rs2_reg); -- logical or-not
  res_int(op_orn_c)  <= rs1_reg or  (not rs2_reg);
  res_int(op_xnor_c) <= rs1_reg xor (not rs2_reg); -- logical xor-not
  res_int(op_xnor_c) <= rs1_reg xor (not rs2_reg);
 
 
  -- count leading/trailing zeros --
  -- count leading/trailing zeros --
  res_int(op_clz_c)(data_width_c-1 downto shifter.cnt'left+1) <= (others => '0');
  res_int(op_clz_c)(data_width_c-1 downto shifter.cnt'left+1) <= (others => '0');
  res_int(op_clz_c)(shifter.cnt'left downto 0) <= shifter.cnt;
  res_int(op_clz_c)(shifter.cnt'left downto 0) <= shifter.cnt;
  res_int(op_ctz_c) <= (others => '0'); -- unused/redundant
  res_int(op_ctz_c) <= (others => '0'); -- unused/redundant
Line 416... Line 517...
  -- address generation instructions --
  -- address generation instructions --
  res_int(op_sh1add_c) <= adder_core;
  res_int(op_sh1add_c) <= adder_core;
  res_int(op_sh2add_c) <= (others => '0'); -- unused/redundant
  res_int(op_sh2add_c) <= (others => '0'); -- unused/redundant
  res_int(op_sh3add_c) <= (others => '0'); -- unused/redundant
  res_int(op_sh3add_c) <= (others => '0'); -- unused/redundant
 
 
 
  -- single-bit instructions --
 
  res_int(op_bclr_c) <= rs1_reg and (not one_hot_core);
 
  res_int(op_bext_c)(data_width_c-1 downto 1) <= (others => '0');
 
  res_int(op_bext_c)(0) <= or_reduce_f(rs1_reg and one_hot_core);
 
  res_int(op_binv_c) <= rs1_reg xor one_hot_core;
 
  res_int(op_bset_c) <= rs1_reg or one_hot_core;
 
 
 
  -- carry-less multiplication instructions --
 
  res_int(op_clmul_c)  <= clmul.prod(31 downto 00);
 
  res_int(op_clmulh_c) <= clmul.prod(63 downto 32);
 
  res_int(op_clmulr_c) <= bit_rev_f(clmul.prod(31 downto 00));
 
 
 
 
  -- Output Selector ------------------------------------------------------------------------
  -- Output Selector ------------------------------------------------------------------------
  -- -------------------------------------------------------------------------------------------
  -- -------------------------------------------------------------------------------------------
  res_out(op_andn_c)  <= res_int(op_andn_c)  when (cmd_buf(op_andn_c)  = '1') else (others => '0');
  res_out(op_andn_c)  <= res_int(op_andn_c)  when (cmd_buf(op_andn_c)  = '1') else (others => '0');
  res_out(op_orn_c)   <= res_int(op_orn_c)   when (cmd_buf(op_orn_c)   = '1') else (others => '0');
  res_out(op_orn_c)   <= res_int(op_orn_c)   when (cmd_buf(op_orn_c)   = '1') else (others => '0');
Line 438... Line 551...
  res_out(op_rev8_c)  <= res_int(op_rev8_c)  when (cmd_buf(op_rev8_c)  = '1') else (others => '0');
  res_out(op_rev8_c)  <= res_int(op_rev8_c)  when (cmd_buf(op_rev8_c)  = '1') else (others => '0');
  --
  --
  res_out(op_sh1add_c) <= res_int(op_sh1add_c) when ((cmd_buf(op_sh1add_c) or cmd_buf(op_sh2add_c) or cmd_buf(op_sh3add_c))  = '1') else (others => '0');
  res_out(op_sh1add_c) <= res_int(op_sh1add_c) when ((cmd_buf(op_sh1add_c) or cmd_buf(op_sh2add_c) or cmd_buf(op_sh3add_c))  = '1') else (others => '0');
  res_out(op_sh2add_c) <= (others => '0'); -- unused/redundant
  res_out(op_sh2add_c) <= (others => '0'); -- unused/redundant
  res_out(op_sh3add_c) <= (others => '0'); -- unused/redundant
  res_out(op_sh3add_c) <= (others => '0'); -- unused/redundant
 
  --
 
  res_out(op_bclr_c) <= res_int(op_bclr_c) when (cmd_buf(op_bclr_c) = '1') else (others => '0');
 
  res_out(op_bext_c) <= res_int(op_bext_c) when (cmd_buf(op_bext_c) = '1') else (others => '0');
 
  res_out(op_binv_c) <= res_int(op_binv_c) when (cmd_buf(op_binv_c) = '1') else (others => '0');
 
  res_out(op_bset_c) <= res_int(op_bset_c) when (cmd_buf(op_bset_c) = '1') else (others => '0');
 
  --
 
  res_out(op_clmul_c)  <= res_int(op_clmul_c)  when (cmd_buf(op_clmul_c) = '1')  else (others => '0');
 
  res_out(op_clmulh_c) <= res_int(op_clmulh_c) when (cmd_buf(op_clmulh_c) = '1') else (others => '0');
 
  res_out(op_clmulr_c) <= res_int(op_clmulr_c) when (cmd_buf(op_clmulr_c) = '1') else (others => '0');
 
 
 
 
  -- Output Gate ----------------------------------------------------------------------------
  -- Output Gate ----------------------------------------------------------------------------
  -- -------------------------------------------------------------------------------------------
  -- -------------------------------------------------------------------------------------------
  output_gate: process(rstn_i, clk_i)
  output_gate: process(rstn_i, clk_i)
Line 455... Line 577...
                 res_out(op_clz_c)   or res_out(op_cpop_c)  or -- res_out(op_ctz_c) is unused here
                 res_out(op_clz_c)   or res_out(op_cpop_c)  or -- res_out(op_ctz_c) is unused here
                 res_out(op_min_c)   or -- res_out(op_max_c) is unused here
                 res_out(op_min_c)   or -- res_out(op_max_c) is unused here
                 res_out(op_sextb_c) or res_out(op_sexth_c) or res_out(op_zexth_c) or
                 res_out(op_sextb_c) or res_out(op_sexth_c) or res_out(op_zexth_c) or
                 res_out(op_ror_c)   or res_out(op_rol_c)   or
                 res_out(op_ror_c)   or res_out(op_rol_c)   or
                 res_out(op_orcb_c)  or res_out(op_rev8_c)  or
                 res_out(op_orcb_c)  or res_out(op_rev8_c)  or
                 res_out(op_sh1add_c); -- res_out(op_sh2add_c) and res_out(op_sh3add_c) are unused here
                 res_out(op_sh1add_c) or -- res_out(op_sh2add_c) and res_out(op_sh3add_c) are unused here
 
                 res_out(op_bclr_c)   or res_out(op_bext_c)   or res_out(op_binv_c)  or res_out(op_bset_c) or
 
                 res_out(op_clmul_c)  or res_out(op_clmulh_c) or res_out(op_clmulr_c);
      end if;
      end if;
    end if;
    end if;
  end process output_gate;
  end process output_gate;
 
 
  -- valid output --
  -- valid output --

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.