OpenCores
URL https://opencores.org/ocsvn/neorv32/neorv32/trunk

Subversion Repositories neorv32

[/] [neorv32/] [trunk/] [rtl/] [core/] [neorv32_cpu_cp_bitmanip.vhd] - Blame information for rev 74

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 63 zero_gravi
-- #################################################################################################
2
-- # << NEORV32 - CPU Co-Processor: Bit-Manipulation Co-Processor Unit (RISC-V "B" Extension) >>   #
3
-- # ********************************************************************************************* #
4 71 zero_gravi
-- # Supported B sub-extensions (Zb*):                                                             #
5
-- # - Zba: Address-generation instructions                                                        #
6
-- # - Zbb: Basic bit-manipulation instructions                                                    #
7
-- # - Zbs: Single-bit instructions                                                                #
8
-- # - Zbc: Carry-less multiplication instructions                                                 #
9 63 zero_gravi
-- #                                                                                               #
10 71 zero_gravi
-- # NOTE: This is a first implementation of the bit-manipulation co-processor that supports all   #
11
-- #       sub-sets of the B extension. Hence, it is not yet optimized for area, latency or speed. #
12 63 zero_gravi
-- # ********************************************************************************************* #
13
-- # BSD 3-Clause License                                                                          #
14
-- #                                                                                               #
15 71 zero_gravi
-- # Copyright (c) 2022, Stephan Nolting. All rights reserved.                                     #
16 63 zero_gravi
-- #                                                                                               #
17
-- # Redistribution and use in source and binary forms, with or without modification, are          #
18
-- # permitted provided that the following conditions are met:                                     #
19
-- #                                                                                               #
20
-- # 1. Redistributions of source code must retain the above copyright notice, this list of        #
21
-- #    conditions and the following disclaimer.                                                   #
22
-- #                                                                                               #
23
-- # 2. Redistributions in binary form must reproduce the above copyright notice, this list of     #
24
-- #    conditions and the following disclaimer in the documentation and/or other materials        #
25
-- #    provided with the distribution.                                                            #
26
-- #                                                                                               #
27
-- # 3. Neither the name of the copyright holder nor the names of its contributors may be used to  #
28
-- #    endorse or promote products derived from this software without specific prior written      #
29
-- #    permission.                                                                                #
30
-- #                                                                                               #
31
-- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS   #
32
-- # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF               #
33
-- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE    #
34
-- # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,     #
35
-- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE #
36
-- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED    #
37
-- # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     #
38
-- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED  #
39
-- # OF THE POSSIBILITY OF SUCH DAMAGE.                                                            #
40
-- # ********************************************************************************************* #
41
-- # The NEORV32 Processor - https://github.com/stnolting/neorv32              (c) Stephan Nolting #
42
-- #################################################################################################
43
 
44
library ieee;
45
use ieee.std_logic_1164.all;
46
use ieee.numeric_std.all;
47
 
48
library neorv32;
49
use neorv32.neorv32_package.all;
50
 
51
entity neorv32_cpu_cp_bitmanip is
52
  generic (
53
    FAST_SHIFT_EN : boolean -- use barrel shifter for shift operations
54
  );
55
  port (
56
    -- global control --
57
    clk_i   : in  std_ulogic; -- global clock, rising edge
58
    rstn_i  : in  std_ulogic; -- global reset, low-active, async
59
    ctrl_i  : in  std_ulogic_vector(ctrl_width_c-1 downto 0); -- main control bus
60
    start_i : in  std_ulogic; -- trigger operation
61
    -- data input --
62
    cmp_i   : in  std_ulogic_vector(1 downto 0); -- comparator status
63
    rs1_i   : in  std_ulogic_vector(data_width_c-1 downto 0); -- rf source 1
64
    rs2_i   : in  std_ulogic_vector(data_width_c-1 downto 0); -- rf source 2
65 66 zero_gravi
    shamt_i : in  std_ulogic_vector(index_size_f(data_width_c)-1 downto 0); -- shift amount
66 63 zero_gravi
    -- result and status --
67
    res_o   : out std_ulogic_vector(data_width_c-1 downto 0); -- operation result
68
    valid_o : out std_ulogic -- data output valid
69
  );
70
end neorv32_cpu_cp_bitmanip;
71
 
72
architecture neorv32_cpu_cp_bitmanip_rtl of neorv32_cpu_cp_bitmanip is
73
 
74 71 zero_gravi
  -- Sub-extension configuration ----------------------------
75
  -- Note that this configurations does NOT effect the CPU's (illegal) instruction decoding logic!
76 66 zero_gravi
  constant zbb_en_c : boolean := true;
77
  constant zba_en_c : boolean := true;
78 71 zero_gravi
  constant zbc_en_c : boolean := true;
79
  constant zbs_en_c : boolean := true;
80
  -- --------------------------------------------------------
81 66 zero_gravi
 
82 71 zero_gravi
  -- Zbb - logic with negate --
83 66 zero_gravi
  constant op_andn_c    : natural := 0;
84
  constant op_orn_c     : natural := 1;
85
  constant op_xnor_c    : natural := 2;
86 71 zero_gravi
  -- Zbb - count leading/trailing zero bits --
87 66 zero_gravi
  constant op_clz_c     : natural := 3;
88
  constant op_ctz_c     : natural := 4;
89 71 zero_gravi
  -- Zbb - count population --
90 66 zero_gravi
  constant op_cpop_c    : natural := 5;
91 71 zero_gravi
  -- Zbb - integer minimum/maximum --
92 66 zero_gravi
  constant op_max_c     : natural := 6; -- signed/unsigned
93
  constant op_min_c     : natural := 7; -- signed/unsigned
94 71 zero_gravi
  -- Zbb - sign- and zero-extension --
95 66 zero_gravi
  constant op_sextb_c   : natural := 8;
96
  constant op_sexth_c   : natural := 9;
97
  constant op_zexth_c   : natural := 10;
98 71 zero_gravi
  -- Zbb - bitwise rotation --
99 66 zero_gravi
  constant op_rol_c     : natural := 11;
100 71 zero_gravi
  constant op_ror_c     : natural := 12; -- also rori
101
  -- Zbb - or-combine --
102 66 zero_gravi
  constant op_orcb_c    : natural := 13;
103 71 zero_gravi
  -- Zbb - byte-reverse --
104 66 zero_gravi
  constant op_rev8_c    : natural := 14;
105 71 zero_gravi
  -- Zba - shifted-add --
106 66 zero_gravi
  constant op_sh1add_c  : natural := 15;
107
  constant op_sh2add_c  : natural := 16;
108
  constant op_sh3add_c  : natural := 17;
109 71 zero_gravi
  -- Zbs - single-bit operations --
110
  constant op_bclr_c    : natural := 18;
111
  constant op_bext_c    : natural := 19;
112
  constant op_binv_c    : natural := 20;
113
  constant op_bset_c    : natural := 21;
114
  -- Zbc - carry-less multiplication --
115
  constant op_clmul_c   : natural := 22;
116
  constant op_clmulh_c  : natural := 23;
117
  constant op_clmulr_c  : natural := 24;
118 63 zero_gravi
  --
119 71 zero_gravi
  constant op_width_c   : natural := 25;
120 63 zero_gravi
 
121
  -- controller --
122 71 zero_gravi
  type ctrl_state_t is (S_IDLE, S_START_SHIFT, S_BUSY_SHIFT, S_START_CLMUL, S_BUSY_CLMUL);
123 63 zero_gravi
  signal ctrl_state   : ctrl_state_t;
124
  signal cmd, cmd_buf : std_ulogic_vector(op_width_c-1 downto 0);
125
  signal valid        : std_ulogic;
126
 
127
  -- operand buffers --
128
  signal rs1_reg : std_ulogic_vector(data_width_c-1 downto 0);
129
  signal rs2_reg : std_ulogic_vector(data_width_c-1 downto 0);
130 66 zero_gravi
  signal sha_reg : std_ulogic_vector(index_size_f(data_width_c)-1 downto 0);
131 63 zero_gravi
  signal less_ff : std_ulogic;
132
 
133
  -- serial shifter --
134
  type shifter_t is record
135
    start   : std_ulogic;
136
    run     : std_ulogic;
137
    bcnt    : std_ulogic_vector(index_size_f(data_width_c) downto 0); -- bit counter
138
    cnt     : std_ulogic_vector(index_size_f(data_width_c) downto 0); -- iteration counter
139
    cnt_max : std_ulogic_vector(index_size_f(data_width_c) downto 0);
140
    sreg    : std_ulogic_vector(data_width_c-1 downto 0);
141
  end record;
142
  signal shifter : shifter_t;
143
 
144
  -- barrel shifter --
145
  type bs_level_t is array (index_size_f(data_width_c) downto 0) of std_ulogic_vector(data_width_c-1 downto 0);
146
  signal bs_level : bs_level_t;
147
 
148
  -- operation results --
149
  type res_t is array (0 to op_width_c-1) of std_ulogic_vector(data_width_c-1 downto 0);
150
  signal res_int, res_out : res_t;
151
 
152 66 zero_gravi
  -- shifted-add unit --
153
  signal adder_core : std_ulogic_vector(data_width_c-1 downto 0);
154
 
155 71 zero_gravi
  -- one-hot shifter --
156
  signal one_hot_core : std_ulogic_vector(data_width_c-1 downto 0);
157
 
158
  -- carry-less multiplier --
159
  type clmultiplier_t is record
160
    start : std_ulogic;
161
    busy  : std_ulogic;
162
    rs2   : std_ulogic_vector(data_width_c-1 downto 0);
163
    cnt   : std_ulogic_vector(index_size_f(data_width_c) downto 0);
164
    prod  : std_ulogic_vector(2*data_width_c-1 downto 0);
165
  end record;
166
  signal clmul : clmultiplier_t;
167
 
168 63 zero_gravi
begin
169
 
170 66 zero_gravi
  -- Sub-Extension Configuration ------------------------------------------------------------
171
  -- -------------------------------------------------------------------------------------------
172
  assert false report
173 71 zero_gravi
  "NEORV32 CPU: Implementing bit-manipulation (B) sub-extensions " &
174
  cond_sel_string_f(zba_en_c, "Zba ", "") &
175
  cond_sel_string_f(zbb_en_c, "Zbb ", "") &
176
  cond_sel_string_f(zbc_en_c, "Zbc ", "") &
177
  cond_sel_string_f(zbs_en_c, "Zbs ", "") &
178 66 zero_gravi
  ""
179
  severity note;
180
 
181
 
182 63 zero_gravi
  -- Instruction Decoding (One-Hot) ---------------------------------------------------------
183
  -- -------------------------------------------------------------------------------------------
184 71 zero_gravi
  -- a minimal decoding logic is used here just to distinguish between the different B instruction
185
  -- a more precise decoding and valid-instruction check is done by the CPU control unit
186 63 zero_gravi
 
187
  -- Zbb - Basic bit-manipulation instructions --
188 71 zero_gravi
  cmd(op_andn_c)   <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "10") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_1_c downto ctrl_ir_funct3_0_c) = "11") else '0';
189
  cmd(op_orn_c)    <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "10") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_1_c downto ctrl_ir_funct3_0_c) = "10") else '0';
190
  cmd(op_xnor_c)   <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "10") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_1_c downto ctrl_ir_funct3_0_c) = "00") else '0';
191 63 zero_gravi
  --
192 71 zero_gravi
  cmd(op_max_c)    <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "00") and (ctrl_i(ctrl_ir_funct12_5_c) = '1') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_1_c) = "11") else '0';
193
  cmd(op_min_c)    <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "00") and (ctrl_i(ctrl_ir_funct12_5_c) = '1') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_1_c) = "10") else '0';
194 66 zero_gravi
  cmd(op_zexth_c)  <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "00") and (ctrl_i(ctrl_ir_funct12_5_c) = '0') else '0';
195 63 zero_gravi
  --
196 71 zero_gravi
  cmd(op_orcb_c)   <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "01") and (ctrl_i(ctrl_ir_funct12_7_c) = '1') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) = "101") else '0';
197 63 zero_gravi
  --
198 66 zero_gravi
  cmd(op_clz_c)    <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "000") else '0';
199
  cmd(op_ctz_c)    <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "001") else '0';
200 71 zero_gravi
  cmd(op_cpop_c)   <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "010") and (ctrl_i(ctrl_ir_opcode7_5_c) = '0') else '0';
201 74 zero_gravi
  cmd(op_sextb_c)  <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "100") and (ctrl_i(ctrl_ir_funct3_2_c) = '0')  and (ctrl_i(ctrl_ir_opcode7_5_c) = '0') else '0';
202
  cmd(op_sexth_c)  <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "101") and (ctrl_i(ctrl_ir_funct3_2_c) = '0') and (ctrl_i(ctrl_ir_opcode7_5_c) = '0') else '0';
203 66 zero_gravi
  cmd(op_rol_c)    <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) = "001") and (ctrl_i(ctrl_ir_opcode7_5_c) = '1') else '0';
204 74 zero_gravi
  cmd(op_ror_c)    <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) = "101") and (ctrl_i(ctrl_ir_funct3_2_c) = '1') else '0';
205 71 zero_gravi
  cmd(op_rev8_c)   <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '1') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) = "101") else '0';
206 63 zero_gravi
 
207 66 zero_gravi
  -- Zba - Address generation instructions --
208
  cmd(op_sh1add_c) <= '1' when (zba_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "01") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_1_c) = "01") else '0';
209
  cmd(op_sh2add_c) <= '1' when (zba_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "01") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_1_c) = "10") else '0';
210
  cmd(op_sh3add_c) <= '1' when (zba_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "01") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_1_c) = "11") else '0';
211 63 zero_gravi
 
212 71 zero_gravi
  -- Zbs - Single-bit instructions --
213
  cmd(op_bclr_c)   <= '1' when (zbs_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "10") and (ctrl_i(ctrl_ir_funct12_7_c) = '1') and (ctrl_i(ctrl_ir_funct3_2_c) = '0') else '0';
214
  cmd(op_bext_c)   <= '1' when (zbs_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "10") and (ctrl_i(ctrl_ir_funct12_7_c) = '1') and (ctrl_i(ctrl_ir_funct3_2_c) = '1') else '0';
215
  cmd(op_binv_c)   <= '1' when (zbs_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '1') and (ctrl_i(ctrl_ir_funct3_2_c) = '0') else '0';
216
  cmd(op_bset_c)   <= '1' when (zbs_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "01") and (ctrl_i(ctrl_ir_funct12_7_c) = '1') and (ctrl_i(ctrl_ir_funct3_2_c) = '0') else '0';
217 66 zero_gravi
 
218 71 zero_gravi
  -- Zbc - Carry-less multiplication instructions --
219
  cmd(op_clmul_c)  <= '1' when (zbc_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "00") and (ctrl_i(ctrl_ir_funct12_5_c) = '1') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) = "001") else '0';
220
  cmd(op_clmulh_c) <= '1' when (zbc_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "00") and (ctrl_i(ctrl_ir_funct12_5_c) = '1') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) = "011") else '0';
221
  cmd(op_clmulr_c) <= '1' when (zbc_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "00") and (ctrl_i(ctrl_ir_funct12_5_c) = '1') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) = "010") else '0';
222
 
223
 
224 63 zero_gravi
  -- Co-Processor Controller ----------------------------------------------------------------
225
  -- -------------------------------------------------------------------------------------------
226
  coprocessor_ctrl: process(rstn_i, clk_i)
227
  begin
228
    if (rstn_i = '0') then
229
      ctrl_state    <= S_IDLE;
230
      cmd_buf       <= (others => def_rst_val_c);
231
      rs1_reg       <= (others => def_rst_val_c);
232
      rs2_reg       <= (others => def_rst_val_c);
233 66 zero_gravi
      sha_reg       <= (others => def_rst_val_c);
234 63 zero_gravi
      less_ff       <= def_rst_val_c;
235 71 zero_gravi
      clmul.start   <= '0';
236 63 zero_gravi
      shifter.start <= '0';
237
      valid         <= '0';
238
    elsif rising_edge(clk_i) then
239
      -- defaults --
240
      shifter.start <= '0';
241 71 zero_gravi
      clmul.start   <= '0';
242 63 zero_gravi
      valid         <= '0';
243
 
244
      -- fsm --
245
      case ctrl_state is
246
 
247
        when S_IDLE => -- wait for operation trigger
248
        -- ------------------------------------------------------------
249
          if (start_i = '1') then
250
            less_ff <= cmp_i(cmp_less_c);
251
            cmd_buf <= cmd;
252
            rs1_reg <= rs1_i;
253
            rs2_reg <= rs2_i;
254 66 zero_gravi
            sha_reg <= shamt_i;
255 63 zero_gravi
            if ((cmd(op_clz_c) or cmd(op_ctz_c) or cmd(op_cpop_c) or cmd(op_ror_c) or cmd(op_rol_c)) = '1') then -- multi-cycle shift operation
256
              if (FAST_SHIFT_EN = false) then -- default: iterative computation
257
                shifter.start <= '1';
258
                ctrl_state <= S_START_SHIFT;
259
              else -- full-parallel computation
260
                ctrl_state <= S_BUSY_SHIFT;
261
              end if;
262 71 zero_gravi
            elsif (zbc_en_c = true) and ((cmd(op_clmul_c) or cmd(op_clmulh_c) or cmd(op_clmulr_c)) = '1') then -- multi-cycle clmul operation
263
              clmul.start <= '1';
264
              ctrl_state  <= S_START_CLMUL;
265 63 zero_gravi
            else
266
              valid      <= '1';
267
              ctrl_state <= S_IDLE;
268
            end if;
269
          end if;
270
 
271
        when S_START_SHIFT => -- one cycle delay to start shift operation
272
        -- ------------------------------------------------------------
273
          ctrl_state <= S_BUSY_SHIFT;
274
 
275
        when S_BUSY_SHIFT => -- wait for multi-cycle shift operation to finish
276
        -- ------------------------------------------------------------
277 71 zero_gravi
          if (shifter.run = '0') or (ctrl_i(ctrl_trap_c) = '1') then -- abort on trap
278 63 zero_gravi
            valid      <= '1';
279
            ctrl_state <= S_IDLE;
280
          end if;
281
 
282 71 zero_gravi
        when S_START_CLMUL => -- one cycle delay to start clmul operation
283
        -- ------------------------------------------------------------
284
          ctrl_state <= S_BUSY_CLMUL;
285
 
286
        when S_BUSY_CLMUL => -- wait for multi-cycle clmul operation to finish
287
        -- ------------------------------------------------------------
288
          if (clmul.busy = '0') or (ctrl_i(ctrl_trap_c) = '1') then -- abort on trap
289
            valid      <= '1';
290
            ctrl_state <= S_IDLE;
291
          end if;
292
 
293 63 zero_gravi
        when others => -- undefined
294
        -- ------------------------------------------------------------
295
          ctrl_state <= S_IDLE;
296
 
297
      end case;
298
    end if;
299
  end process coprocessor_ctrl;
300
 
301
 
302
  -- Shifter Function Core (iterative: small but slow) --------------------------------------
303
  -- -------------------------------------------------------------------------------------------
304
  serial_shifter:
305
  if (FAST_SHIFT_EN = false) generate
306
    shifter_unit: process(rstn_i, clk_i)
307
      variable new_bit_v : std_ulogic;
308
    begin
309
      if (rstn_i = '0') then
310
        shifter.cnt     <= (others => def_rst_val_c);
311
        shifter.sreg    <= (others => def_rst_val_c);
312
        shifter.cnt_max <= (others => def_rst_val_c);
313
        shifter.bcnt    <= (others => def_rst_val_c);
314
      elsif rising_edge(clk_i) then
315
        if (shifter.start = '1') then -- trigger new shift
316
          shifter.cnt <= (others => '0');
317
          -- shift operand --
318
          if (cmd_buf(op_clz_c) = '1') or (cmd_buf(op_rol_c) = '1') then -- count LEADING zeros / rotate LEFT
319
            shifter.sreg <= bit_rev_f(rs1_reg); -- reverse - we can only do right shifts here
320
          else -- ctz, cpop, ror
321
            shifter.sreg <= rs1_reg;
322
          end if;
323
          -- max shift amount --
324
          if (cmd_buf(op_cpop_c) = '1') then -- population count
325
            shifter.cnt_max <= (others => '0');
326
            shifter.cnt_max(shifter.cnt_max'left) <= '1';
327
          else
328 66 zero_gravi
            shifter.cnt_max <= '0' & sha_reg;
329 63 zero_gravi
          end if;
330
          shifter.bcnt <= (others => '0');
331
        elsif (shifter.run = '1') then -- right shifts only
332
          new_bit_v := ((cmd_buf(op_ror_c) or cmd_buf(op_rol_c)) and shifter.sreg(0)) or (cmd_buf(op_clz_c) or cmd_buf(op_ctz_c));
333
          shifter.sreg <= new_bit_v & shifter.sreg(shifter.sreg'left downto 1); -- ro[r/l]/lsr(for counting)
334
          shifter.cnt  <= std_ulogic_vector(unsigned(shifter.cnt) + 1); -- iteration counter
335
          if (shifter.sreg(0) = '1') then
336
            shifter.bcnt <= std_ulogic_vector(unsigned(shifter.bcnt) + 1); -- bit counter
337
          end if;
338
        end if;
339
      end if;
340
    end process shifter_unit;
341
  end generate;
342
 
343
  -- run control --
344
  serial_shifter_ctrl:
345
  if (FAST_SHIFT_EN = false) generate
346
    shifter_unit_ctrl: process(cmd_buf, shifter)
347
    begin
348
      -- keep shifting until ... --
349
      if (cmd_buf(op_clz_c) = '1') or (cmd_buf(op_ctz_c) = '1') then -- count leading/trailing zeros
350
        shifter.run <= not shifter.sreg(0);
351
      else -- population count / rotate
352
        if (shifter.cnt = shifter.cnt_max) then
353
          shifter.run <= '0';
354
        else
355
          shifter.run <= '1';
356
        end if;
357
      end if;
358
    end process shifter_unit_ctrl;
359
  end generate;
360
 
361
 
362
  -- Shifter Function Core (parallel: fast but large) ---------------------------------------
363
  -- -------------------------------------------------------------------------------------------
364
  barrel_shifter_async_sync:
365
  if (FAST_SHIFT_EN = true) generate
366
    shifter_unit_fast: process(rstn_i, clk_i)
367
      variable new_bit_v : std_ulogic;
368
    begin
369
      if (rstn_i = '0') then
370
        shifter.cnt     <= (others => def_rst_val_c);
371
        shifter.sreg    <= (others => def_rst_val_c);
372
        shifter.bcnt    <= (others => def_rst_val_c);
373
      elsif rising_edge(clk_i) then
374
        -- population count --
375
        shifter.bcnt <= std_ulogic_vector(to_unsigned(popcount_f(rs1_reg), shifter.bcnt'length));
376
        -- count leading/trailing zeros --
377
        if cmd_buf(op_clz_c) = '1' then -- leading
378
          shifter.cnt <= std_ulogic_vector(to_unsigned(leading_zeros_f(rs1_reg), shifter.cnt'length));
379
        else -- trailing
380
          shifter.cnt <= std_ulogic_vector(to_unsigned(leading_zeros_f(bit_rev_f(rs1_reg)), shifter.cnt'length));
381
        end if;
382
        -- barrel shifter --
383
        shifter.sreg <= bs_level(0); -- rol/ror[i]
384
      end if;
385
    end process shifter_unit_fast;
386
    shifter.run <= '0'; -- we are done already!
387
  end generate;
388
 
389
  -- barrel shifter array --
390
  barrel_shifter_async:
391
  if (FAST_SHIFT_EN = true) generate
392 66 zero_gravi
    shifter_unit_async: process(rs1_reg, sha_reg, cmd_buf, bs_level)
393 63 zero_gravi
    begin
394
      -- input level: convert left shifts to right shifts --
395
      if (cmd_buf(op_rol_c) = '1') then -- is left shift?
396
        bs_level(index_size_f(data_width_c)) <= bit_rev_f(rs1_reg); -- reverse bit order of input operand
397
      else
398
        bs_level(index_size_f(data_width_c)) <= rs1_reg;
399
      end if;
400
 
401
      -- shifter array --
402
      for i in index_size_f(data_width_c)-1 downto 0 loop
403 66 zero_gravi
        if (sha_reg(i) = '1') then
404 63 zero_gravi
          bs_level(i)(data_width_c-1 downto data_width_c-(2**i)) <= bs_level(i+1)((2**i)-1 downto 0);
405
          bs_level(i)((data_width_c-(2**i))-1 downto 0) <= bs_level(i+1)(data_width_c-1 downto 2**i);
406
        else
407
          bs_level(i) <= bs_level(i+1);
408
        end if;
409
      end loop;
410
    end process shifter_unit_async;
411
  end generate;
412
 
413
 
414 66 zero_gravi
  -- Shifted-Add Core -----------------------------------------------------------------------
415
  -- -------------------------------------------------------------------------------------------
416
  shift_adder: process(rs1_reg, rs2_reg, ctrl_i)
417
    variable opb_v : std_ulogic_vector(data_width_c-1 downto 0);
418
  begin
419
    case ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_1_c) is
420
      when "01"   => opb_v := rs1_reg(rs1_reg'left-1 downto 0) & '0';   -- << 1
421
      when "10"   => opb_v := rs1_reg(rs1_reg'left-2 downto 0) & "00";  -- << 2
422
      when "11"   => opb_v := rs1_reg(rs1_reg'left-3 downto 0) & "000"; -- << 3
423 71 zero_gravi
      when others => opb_v := (others => '-'); -- undefined
424 66 zero_gravi
    end case;
425
    adder_core <= std_ulogic_vector(unsigned(rs2_reg) + unsigned(opb_v));
426
  end process shift_adder;
427
 
428
 
429 71 zero_gravi
  -- One-Hot Generator Core -----------------------------------------------------------------
430
  -- -------------------------------------------------------------------------------------------
431
  shift_one_hot: process(sha_reg)
432
  begin
433
    one_hot_core <= (others => '0');
434
    if (zbs_en_c = true) then
435
      one_hot_core(to_integer(unsigned(sha_reg))) <= '1';
436
    end if;
437
  end process shift_one_hot;
438
 
439
 
440
  -- Carry-Less Multiplication Core ---------------------------------------------------------
441
  -- -------------------------------------------------------------------------------------------
442
  clmul_core: process(rstn_i, clk_i)
443
  begin
444
    if (rstn_i = '0') then
445
      clmul.cnt  <= (others => def_rst_val_c);
446
      clmul.prod <= (others => def_rst_val_c);
447
    elsif rising_edge(clk_i) then
448
      if (clmul.start = '1') then -- start new multiplication
449
        clmul.cnt                 <= (others => '0');
450
        clmul.cnt(clmul.cnt'left) <= '1';
451
        clmul.prod(63 downto 32)  <= (others => '0');
452
        if (cmd_buf(op_clmulr_c) = '1') then -- reverse input operands?
453
          clmul.prod(31 downto 00) <= bit_rev_f(rs1_reg);
454
        else
455
          clmul.prod(31 downto 00) <= rs1_reg;
456
        end if;
457
      elsif (clmul.busy = '1') then -- processing
458
        clmul.cnt <= std_ulogic_vector(unsigned(clmul.cnt) - 1);
459
        if (clmul.prod(0) = '1') then
460
          clmul.prod(62 downto 31) <= clmul.prod(63 downto 32) xor clmul.rs2;
461
        else
462
          clmul.prod(62 downto 31) <= clmul.prod(63 downto 32);
463
        end if;
464
        clmul.prod(30 downto 00) <= clmul.prod(31 downto 1);
465
      end if;
466
    end if;
467
  end process clmul_core;
468
 
469
  -- reverse input operands? --
470
  clmul.rs2 <= bit_rev_f(rs2_reg) when (cmd_buf(op_clmulr_c) = '1') else rs2_reg;
471
 
472
  -- multiplier busy? --
473 74 zero_gravi
  clmul.busy <= '1' when (or_reduce_f(clmul.cnt) = '1') else '0';
474 71 zero_gravi
 
475
 
476 63 zero_gravi
  -- Operation Results ----------------------------------------------------------------------
477
  -- -------------------------------------------------------------------------------------------
478
  -- logic with negate --
479 71 zero_gravi
  res_int(op_andn_c) <= rs1_reg and (not rs2_reg);
480
  res_int(op_orn_c)  <= rs1_reg or  (not rs2_reg);
481
  res_int(op_xnor_c) <= rs1_reg xor (not rs2_reg);
482 63 zero_gravi
 
483
  -- count leading/trailing zeros --
484
  res_int(op_clz_c)(data_width_c-1 downto shifter.cnt'left+1) <= (others => '0');
485
  res_int(op_clz_c)(shifter.cnt'left downto 0) <= shifter.cnt;
486
  res_int(op_ctz_c) <= (others => '0'); -- unused/redundant
487
 
488
  -- count set bits --
489
  res_int(op_cpop_c)(data_width_c-1 downto shifter.bcnt'left+1) <= (others => '0');
490
  res_int(op_cpop_c)(shifter.bcnt'left downto 0) <= shifter.bcnt;
491
 
492
  -- min/max select --
493
  res_int(op_min_c) <= rs1_reg when ((less_ff xor cmd_buf(op_max_c)) = '1') else rs2_reg;
494
  res_int(op_max_c) <= (others => '0'); -- unused/redundant
495
 
496
  -- sign-extension --
497
  res_int(op_sextb_c)(data_width_c-1 downto 8) <= (others => rs1_reg(7));
498
  res_int(op_sextb_c)(7 downto 0) <= rs1_reg(7 downto 0); -- sign-extend byte
499
  res_int(op_sexth_c)(data_width_c-1 downto 16) <= (others => rs1_reg(15));
500
  res_int(op_sexth_c)(15 downto 0) <= rs1_reg(15 downto 0); -- sign-extend half-word
501
  res_int(op_zexth_c)(data_width_c-1 downto 16) <= (others => '0');
502
  res_int(op_zexth_c)(15 downto 0) <= rs1_reg(15 downto 0); -- zero-extend half-word
503
 
504
  -- rotate right/left --
505
  res_int(op_ror_c) <= shifter.sreg;
506
  res_int(op_rol_c) <= bit_rev_f(shifter.sreg); -- reverse to compensate internal right-only shifts
507
 
508
  -- or-combine.byte --
509
  or_combine_gen:
510
  for i in 0 to (data_width_c/8)-1 generate -- sub-byte loop
511
    res_int(op_orcb_c)(i*8+7 downto i*8) <= (others => or_reduce_f(rs1_reg(i*8+7 downto i*8)));
512
  end generate; -- i
513
 
514
  -- reversal.8 (byte swap) --
515
  res_int(op_rev8_c) <= bswap32_f(rs1_reg);
516
 
517 66 zero_gravi
  -- address generation instructions --
518
  res_int(op_sh1add_c) <= adder_core;
519
  res_int(op_sh2add_c) <= (others => '0'); -- unused/redundant
520
  res_int(op_sh3add_c) <= (others => '0'); -- unused/redundant
521 63 zero_gravi
 
522 71 zero_gravi
  -- single-bit instructions --
523
  res_int(op_bclr_c) <= rs1_reg and (not one_hot_core);
524
  res_int(op_bext_c)(data_width_c-1 downto 1) <= (others => '0');
525 74 zero_gravi
  res_int(op_bext_c)(0) <= '1' when (or_reduce_f(rs1_reg and one_hot_core) = '1') else '0';
526 71 zero_gravi
  res_int(op_binv_c) <= rs1_reg xor one_hot_core;
527
  res_int(op_bset_c) <= rs1_reg or one_hot_core;
528 66 zero_gravi
 
529 71 zero_gravi
  -- carry-less multiplication instructions --
530
  res_int(op_clmul_c)  <= clmul.prod(31 downto 00);
531
  res_int(op_clmulh_c) <= clmul.prod(63 downto 32);
532
  res_int(op_clmulr_c) <= bit_rev_f(clmul.prod(31 downto 00));
533
 
534
 
535 63 zero_gravi
  -- Output Selector ------------------------------------------------------------------------
536
  -- -------------------------------------------------------------------------------------------
537
  res_out(op_andn_c)  <= res_int(op_andn_c)  when (cmd_buf(op_andn_c)  = '1') else (others => '0');
538
  res_out(op_orn_c)   <= res_int(op_orn_c)   when (cmd_buf(op_orn_c)   = '1') else (others => '0');
539
  res_out(op_xnor_c)  <= res_int(op_xnor_c)  when (cmd_buf(op_xnor_c)  = '1') else (others => '0');
540
  res_out(op_clz_c)   <= res_int(op_clz_c)   when ((cmd_buf(op_clz_c) or cmd_buf(op_ctz_c)) = '1') else (others => '0');
541
  res_out(op_ctz_c)   <= (others => '0'); -- unused/redundant
542
  res_out(op_cpop_c)  <= res_int(op_cpop_c)  when (cmd_buf(op_cpop_c)  = '1') else (others => '0');
543
  res_out(op_min_c)   <= res_int(op_min_c)   when ((cmd_buf(op_min_c) or cmd_buf(op_max_c)) = '1') else (others => '0');
544
  res_out(op_max_c)   <= (others => '0'); -- unused/redundant
545
  res_out(op_sextb_c) <= res_int(op_sextb_c) when (cmd_buf(op_sextb_c) = '1') else (others => '0');
546
  res_out(op_sexth_c) <= res_int(op_sexth_c) when (cmd_buf(op_sexth_c) = '1') else (others => '0');
547
  res_out(op_zexth_c) <= res_int(op_zexth_c) when (cmd_buf(op_zexth_c) = '1') else (others => '0');
548
  res_out(op_ror_c)   <= res_int(op_ror_c)   when (cmd_buf(op_ror_c)   = '1') else (others => '0');
549
  res_out(op_rol_c)   <= res_int(op_rol_c)   when (cmd_buf(op_rol_c)   = '1') else (others => '0');
550
  res_out(op_orcb_c)  <= res_int(op_orcb_c)  when (cmd_buf(op_orcb_c)  = '1') else (others => '0');
551
  res_out(op_rev8_c)  <= res_int(op_rev8_c)  when (cmd_buf(op_rev8_c)  = '1') else (others => '0');
552 66 zero_gravi
  --
553
  res_out(op_sh1add_c) <= res_int(op_sh1add_c) when ((cmd_buf(op_sh1add_c) or cmd_buf(op_sh2add_c) or cmd_buf(op_sh3add_c))  = '1') else (others => '0');
554
  res_out(op_sh2add_c) <= (others => '0'); -- unused/redundant
555
  res_out(op_sh3add_c) <= (others => '0'); -- unused/redundant
556 71 zero_gravi
  --
557
  res_out(op_bclr_c) <= res_int(op_bclr_c) when (cmd_buf(op_bclr_c) = '1') else (others => '0');
558
  res_out(op_bext_c) <= res_int(op_bext_c) when (cmd_buf(op_bext_c) = '1') else (others => '0');
559
  res_out(op_binv_c) <= res_int(op_binv_c) when (cmd_buf(op_binv_c) = '1') else (others => '0');
560
  res_out(op_bset_c) <= res_int(op_bset_c) when (cmd_buf(op_bset_c) = '1') else (others => '0');
561
  --
562
  res_out(op_clmul_c)  <= res_int(op_clmul_c)  when (cmd_buf(op_clmul_c) = '1')  else (others => '0');
563
  res_out(op_clmulh_c) <= res_int(op_clmulh_c) when (cmd_buf(op_clmulh_c) = '1') else (others => '0');
564
  res_out(op_clmulr_c) <= res_int(op_clmulr_c) when (cmd_buf(op_clmulr_c) = '1') else (others => '0');
565 63 zero_gravi
 
566
 
567
  -- Output Gate ----------------------------------------------------------------------------
568
  -- -------------------------------------------------------------------------------------------
569
  output_gate: process(rstn_i, clk_i)
570
  begin
571
    if (rstn_i = '0') then
572
      res_o <= (others => def_rst_val_c);
573
    elsif rising_edge(clk_i) then
574
      res_o <= (others => '0');
575
      if (valid = '1') then
576 71 zero_gravi
        res_o <= res_out(op_andn_c)   or res_out(op_orn_c)    or res_out(op_xnor_c)  or
577
                 res_out(op_clz_c)    or res_out(op_cpop_c)   or -- res_out(op_ctz_c) is unused here
578
                 res_out(op_min_c)    or -- res_out(op_max_c) is unused here
579
                 res_out(op_sextb_c)  or res_out(op_sexth_c)  or res_out(op_zexth_c) or
580
                 res_out(op_ror_c)    or res_out(op_rol_c)    or
581
                 res_out(op_orcb_c)   or res_out(op_rev8_c)   or
582
                 res_out(op_sh1add_c) or -- res_out(op_sh2add_c) and res_out(op_sh3add_c) are unused here
583
                 res_out(op_bclr_c)   or res_out(op_bext_c)   or res_out(op_binv_c)  or res_out(op_bset_c) or
584
                 res_out(op_clmul_c)  or res_out(op_clmulh_c) or res_out(op_clmulr_c);
585 63 zero_gravi
      end if;
586
    end if;
587
  end process output_gate;
588
 
589
  -- valid output --
590
  valid_o <= valid;
591
 
592
 
593
end neorv32_cpu_cp_bitmanip_rtl;

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.