OpenCores
URL https://opencores.org/ocsvn/neorv32/neorv32/trunk

Subversion Repositories neorv32

[/] [neorv32/] [trunk/] [rtl/] [core/] [neorv32_cpu_cp_bitmanip.vhd] - Blame information for rev 66

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 63 zero_gravi
-- #################################################################################################
2
-- # << NEORV32 - CPU Co-Processor: Bit-Manipulation Co-Processor Unit (RISC-V "B" Extension) >>   #
3
-- # ********************************************************************************************* #
4
-- # The bit manipulation unit is implemented as co-processor that has a processing latency of 1   #
5
-- # cycle for logic/arithmetic operations and 3+shamt (=shift amount) cycles for shift(-related)  #
6
-- # operations. Use the FAST_SHIFT_EN option to reduce shift-related instruction's latency to a   #
7
-- # fixed value of 3 cycles latency (using barrel shifters).                                      #
8
-- #                                                                                               #
9
-- # Supported sub-extensions (Zb*):                                                               #
10 66 zero_gravi
-- # - Zba: Address generation instructions                                                        #
11 63 zero_gravi
-- # - Zbb: Basic bit-manipulation instructions                                                    #
12
-- # ********************************************************************************************* #
13
-- # BSD 3-Clause License                                                                          #
14
-- #                                                                                               #
15
-- # Copyright (c) 2021, Stephan Nolting. All rights reserved.                                     #
16
-- #                                                                                               #
17
-- # Redistribution and use in source and binary forms, with or without modification, are          #
18
-- # permitted provided that the following conditions are met:                                     #
19
-- #                                                                                               #
20
-- # 1. Redistributions of source code must retain the above copyright notice, this list of        #
21
-- #    conditions and the following disclaimer.                                                   #
22
-- #                                                                                               #
23
-- # 2. Redistributions in binary form must reproduce the above copyright notice, this list of     #
24
-- #    conditions and the following disclaimer in the documentation and/or other materials        #
25
-- #    provided with the distribution.                                                            #
26
-- #                                                                                               #
27
-- # 3. Neither the name of the copyright holder nor the names of its contributors may be used to  #
28
-- #    endorse or promote products derived from this software without specific prior written      #
29
-- #    permission.                                                                                #
30
-- #                                                                                               #
31
-- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS   #
32
-- # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF               #
33
-- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE    #
34
-- # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,     #
35
-- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE #
36
-- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED    #
37
-- # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     #
38
-- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED  #
39
-- # OF THE POSSIBILITY OF SUCH DAMAGE.                                                            #
40
-- # ********************************************************************************************* #
41
-- # The NEORV32 Processor - https://github.com/stnolting/neorv32              (c) Stephan Nolting #
42
-- #################################################################################################
43
 
44
library ieee;
45
use ieee.std_logic_1164.all;
46
use ieee.numeric_std.all;
47
 
48
library neorv32;
49
use neorv32.neorv32_package.all;
50
 
51
entity neorv32_cpu_cp_bitmanip is
52
  generic (
53
    FAST_SHIFT_EN : boolean -- use barrel shifter for shift operations
54
  );
55
  port (
56
    -- global control --
57
    clk_i   : in  std_ulogic; -- global clock, rising edge
58
    rstn_i  : in  std_ulogic; -- global reset, low-active, async
59
    ctrl_i  : in  std_ulogic_vector(ctrl_width_c-1 downto 0); -- main control bus
60
    start_i : in  std_ulogic; -- trigger operation
61
    -- data input --
62
    cmp_i   : in  std_ulogic_vector(1 downto 0); -- comparator status
63
    rs1_i   : in  std_ulogic_vector(data_width_c-1 downto 0); -- rf source 1
64
    rs2_i   : in  std_ulogic_vector(data_width_c-1 downto 0); -- rf source 2
65 66 zero_gravi
    shamt_i : in  std_ulogic_vector(index_size_f(data_width_c)-1 downto 0); -- shift amount
66 63 zero_gravi
    -- result and status --
67
    res_o   : out std_ulogic_vector(data_width_c-1 downto 0); -- operation result
68
    valid_o : out std_ulogic -- data output valid
69
  );
70
end neorv32_cpu_cp_bitmanip;
71
 
72
architecture neorv32_cpu_cp_bitmanip_rtl of neorv32_cpu_cp_bitmanip is
73
 
74 66 zero_gravi
  -- Sub-extension configuration --
75
  constant zbb_en_c : boolean := true;
76
  constant zba_en_c : boolean := true;
77
  -- --------------------------- --
78
 
79
  -- commands: Zbb - logic with negate --
80
  constant op_andn_c    : natural := 0;
81
  constant op_orn_c     : natural := 1;
82
  constant op_xnor_c    : natural := 2;
83
  -- commands: Zbb - count leading/trailing zero bits --
84
  constant op_clz_c     : natural := 3;
85
  constant op_ctz_c     : natural := 4;
86
  -- commands: Zbb - count population --
87
  constant op_cpop_c    : natural := 5;
88
  -- commands: Zbb - integer minimum/maximum --
89
  constant op_max_c     : natural := 6; -- signed/unsigned
90
  constant op_min_c     : natural := 7; -- signed/unsigned
91
  -- commands: Zbb - sign- and zero-extension --
92
  constant op_sextb_c   : natural := 8;
93
  constant op_sexth_c   : natural := 9;
94
  constant op_zexth_c   : natural := 10;
95
  -- commands: Zbb - bitwise rotation --
96
  constant op_rol_c     : natural := 11;
97
  constant op_ror_c     : natural := 12; -- rori
98
  -- commands: Zbb - or-combine --
99
  constant op_orcb_c    : natural := 13;
100
  -- commands: Zbb - byte-reverse --
101
  constant op_rev8_c    : natural := 14;
102
  -- commands: Zba - shifted add --
103
  constant op_sh1add_c  : natural := 15;
104
  constant op_sh2add_c  : natural := 16;
105
  constant op_sh3add_c  : natural := 17;
106 63 zero_gravi
  --
107 66 zero_gravi
  constant op_width_c   : natural := 18;
108 63 zero_gravi
 
109
  -- controller --
110
  type ctrl_state_t is (S_IDLE, S_START_SHIFT, S_BUSY_SHIFT);
111
  signal ctrl_state   : ctrl_state_t;
112
  signal cmd, cmd_buf : std_ulogic_vector(op_width_c-1 downto 0);
113
  signal valid        : std_ulogic;
114
 
115
  -- operand buffers --
116
  signal rs1_reg : std_ulogic_vector(data_width_c-1 downto 0);
117
  signal rs2_reg : std_ulogic_vector(data_width_c-1 downto 0);
118 66 zero_gravi
  signal sha_reg : std_ulogic_vector(index_size_f(data_width_c)-1 downto 0);
119 63 zero_gravi
  signal less_ff : std_ulogic;
120
 
121
  -- serial shifter --
122
  type shifter_t is record
123
    start   : std_ulogic;
124
    run     : std_ulogic;
125
    bcnt    : std_ulogic_vector(index_size_f(data_width_c) downto 0); -- bit counter
126
    cnt     : std_ulogic_vector(index_size_f(data_width_c) downto 0); -- iteration counter
127
    cnt_max : std_ulogic_vector(index_size_f(data_width_c) downto 0);
128
    sreg    : std_ulogic_vector(data_width_c-1 downto 0);
129
  end record;
130
  signal shifter : shifter_t;
131
 
132
  -- barrel shifter --
133
  type bs_level_t is array (index_size_f(data_width_c) downto 0) of std_ulogic_vector(data_width_c-1 downto 0);
134
  signal bs_level : bs_level_t;
135
 
136
  -- operation results --
137
  type res_t is array (0 to op_width_c-1) of std_ulogic_vector(data_width_c-1 downto 0);
138
  signal res_int, res_out : res_t;
139
 
140 66 zero_gravi
  -- shifted-add unit --
141
  signal adder_core : std_ulogic_vector(data_width_c-1 downto 0);
142
 
143 63 zero_gravi
begin
144
 
145 66 zero_gravi
  -- Sub-Extension Configuration ------------------------------------------------------------
146
  -- -------------------------------------------------------------------------------------------
147
  assert false report
148
  "Implementing bit-manipulation (B) sub-extensions: " &
149
  cond_sel_string_f(zbb_en_c, "Zbb", "") &
150
  cond_sel_string_f(zba_en_c, "Zba", "") &
151
  ""
152
  severity note;
153
 
154
 
155 63 zero_gravi
  -- Instruction Decoding (One-Hot) ---------------------------------------------------------
156
  -- -------------------------------------------------------------------------------------------
157
  -- a minimal decoding logic is used here -> just to distinguish between B.Zbb instructions
158
  -- a more specific decoding and instruction check is done by the CPU control unit
159
 
160
  -- Zbb - Basic bit-manipulation instructions --
161 66 zero_gravi
  cmd(op_andn_c)   <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "10") and (ctrl_i(ctrl_ir_funct3_1_c downto ctrl_ir_funct3_0_c) = "11") else '0';
162
  cmd(op_orn_c)    <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "10") and (ctrl_i(ctrl_ir_funct3_1_c downto ctrl_ir_funct3_0_c) = "10") else '0';
163
  cmd(op_xnor_c)   <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "10") and (ctrl_i(ctrl_ir_funct3_1_c downto ctrl_ir_funct3_0_c) = "00") else '0';
164 63 zero_gravi
  --
165 66 zero_gravi
  cmd(op_max_c)    <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "00") and (ctrl_i(ctrl_ir_funct12_5_c) = '1') and (ctrl_i(ctrl_ir_funct3_1_c) = '1') else '0';
166
  cmd(op_min_c)    <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "00") and (ctrl_i(ctrl_ir_funct12_5_c) = '1') and (ctrl_i(ctrl_ir_funct3_1_c) = '0') else '0';
167
  cmd(op_zexth_c)  <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "00") and (ctrl_i(ctrl_ir_funct12_5_c) = '0') else '0';
168 63 zero_gravi
  --
169 66 zero_gravi
  cmd(op_orcb_c)   <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "01") and (ctrl_i(ctrl_ir_funct12_7_c) = '1') else '0';
170 63 zero_gravi
  --
171 66 zero_gravi
  cmd(op_clz_c)    <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "000") else '0';
172
  cmd(op_ctz_c)    <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "001") else '0';
173
  cmd(op_cpop_c)   <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "010") else '0';
174
  cmd(op_sextb_c)  <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "100") else '0';
175
  cmd(op_sexth_c)  <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "101") else '0';
176
  cmd(op_rol_c)    <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) = "001") and (ctrl_i(ctrl_ir_opcode7_5_c) = '1') else '0';
177
  cmd(op_ror_c)    <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) = "101") else '0';
178
  cmd(op_rev8_c)   <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '1') else '0';
179 63 zero_gravi
 
180 66 zero_gravi
  -- Zba - Address generation instructions --
181
  cmd(op_sh1add_c) <= '1' when (zba_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "01") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_1_c) = "01") else '0';
182
  cmd(op_sh2add_c) <= '1' when (zba_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "01") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_1_c) = "10") else '0';
183
  cmd(op_sh3add_c) <= '1' when (zba_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "01") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_1_c) = "11") else '0';
184 63 zero_gravi
 
185 66 zero_gravi
 
186 63 zero_gravi
  -- Co-Processor Controller ----------------------------------------------------------------
187
  -- -------------------------------------------------------------------------------------------
188
  coprocessor_ctrl: process(rstn_i, clk_i)
189
  begin
190
    if (rstn_i = '0') then
191
      ctrl_state    <= S_IDLE;
192
      cmd_buf       <= (others => def_rst_val_c);
193
      rs1_reg       <= (others => def_rst_val_c);
194
      rs2_reg       <= (others => def_rst_val_c);
195 66 zero_gravi
      sha_reg       <= (others => def_rst_val_c);
196 63 zero_gravi
      less_ff       <= def_rst_val_c;
197
      shifter.start <= '0';
198
      valid         <= '0';
199
    elsif rising_edge(clk_i) then
200
      -- defaults --
201
      shifter.start <= '0';
202
      valid         <= '0';
203
 
204
      -- fsm --
205
      case ctrl_state is
206
 
207
        when S_IDLE => -- wait for operation trigger
208
        -- ------------------------------------------------------------
209
          if (start_i = '1') then
210
            less_ff <= cmp_i(cmp_less_c);
211
            cmd_buf <= cmd;
212
            rs1_reg <= rs1_i;
213
            rs2_reg <= rs2_i;
214 66 zero_gravi
            sha_reg <= shamt_i;
215 63 zero_gravi
            if ((cmd(op_clz_c) or cmd(op_ctz_c) or cmd(op_cpop_c) or cmd(op_ror_c) or cmd(op_rol_c)) = '1') then -- multi-cycle shift operation
216
              if (FAST_SHIFT_EN = false) then -- default: iterative computation
217
                shifter.start <= '1';
218
                ctrl_state <= S_START_SHIFT;
219
              else -- full-parallel computation
220
                ctrl_state <= S_BUSY_SHIFT;
221
              end if;
222
            else
223
              valid      <= '1';
224
              ctrl_state <= S_IDLE;
225
            end if;
226
          end if;
227
 
228
        when S_START_SHIFT => -- one cycle delay to start shift operation
229
        -- ------------------------------------------------------------
230
          ctrl_state <= S_BUSY_SHIFT;
231
 
232
        when S_BUSY_SHIFT => -- wait for multi-cycle shift operation to finish
233
        -- ------------------------------------------------------------
234
          if (shifter.run = '0') then
235
            valid      <= '1';
236
            ctrl_state <= S_IDLE;
237
          end if;
238
 
239
        when others => -- undefined
240
        -- ------------------------------------------------------------
241
          ctrl_state <= S_IDLE;
242
 
243
      end case;
244
    end if;
245
  end process coprocessor_ctrl;
246
 
247
 
248
  -- Shifter Function Core (iterative: small but slow) --------------------------------------
249
  -- -------------------------------------------------------------------------------------------
250
  serial_shifter:
251
  if (FAST_SHIFT_EN = false) generate
252
    shifter_unit: process(rstn_i, clk_i)
253
      variable new_bit_v : std_ulogic;
254
    begin
255
      if (rstn_i = '0') then
256
        shifter.cnt     <= (others => def_rst_val_c);
257
        shifter.sreg    <= (others => def_rst_val_c);
258
        shifter.cnt_max <= (others => def_rst_val_c);
259
        shifter.bcnt    <= (others => def_rst_val_c);
260
      elsif rising_edge(clk_i) then
261
        if (shifter.start = '1') then -- trigger new shift
262
          shifter.cnt <= (others => '0');
263
          -- shift operand --
264
          if (cmd_buf(op_clz_c) = '1') or (cmd_buf(op_rol_c) = '1') then -- count LEADING zeros / rotate LEFT
265
            shifter.sreg <= bit_rev_f(rs1_reg); -- reverse - we can only do right shifts here
266
          else -- ctz, cpop, ror
267
            shifter.sreg <= rs1_reg;
268
          end if;
269
          -- max shift amount --
270
          if (cmd_buf(op_cpop_c) = '1') then -- population count
271
            shifter.cnt_max <= (others => '0');
272
            shifter.cnt_max(shifter.cnt_max'left) <= '1';
273
          else
274 66 zero_gravi
            shifter.cnt_max <= '0' & sha_reg;
275 63 zero_gravi
          end if;
276
          shifter.bcnt <= (others => '0');
277
        elsif (shifter.run = '1') then -- right shifts only
278
          new_bit_v := ((cmd_buf(op_ror_c) or cmd_buf(op_rol_c)) and shifter.sreg(0)) or (cmd_buf(op_clz_c) or cmd_buf(op_ctz_c));
279
          shifter.sreg <= new_bit_v & shifter.sreg(shifter.sreg'left downto 1); -- ro[r/l]/lsr(for counting)
280
          shifter.cnt  <= std_ulogic_vector(unsigned(shifter.cnt) + 1); -- iteration counter
281
          if (shifter.sreg(0) = '1') then
282
            shifter.bcnt <= std_ulogic_vector(unsigned(shifter.bcnt) + 1); -- bit counter
283
          end if;
284
        end if;
285
      end if;
286
    end process shifter_unit;
287
  end generate;
288
 
289
  -- run control --
290
  serial_shifter_ctrl:
291
  if (FAST_SHIFT_EN = false) generate
292
    shifter_unit_ctrl: process(cmd_buf, shifter)
293
    begin
294
      -- keep shifting until ... --
295
      if (cmd_buf(op_clz_c) = '1') or (cmd_buf(op_ctz_c) = '1') then -- count leading/trailing zeros
296
        shifter.run <= not shifter.sreg(0);
297
      else -- population count / rotate
298
        if (shifter.cnt = shifter.cnt_max) then
299
          shifter.run <= '0';
300
        else
301
          shifter.run <= '1';
302
        end if;
303
      end if;
304
    end process shifter_unit_ctrl;
305
  end generate;
306
 
307
 
308
  -- Shifter Function Core (parallel: fast but large) ---------------------------------------
309
  -- -------------------------------------------------------------------------------------------
310
  barrel_shifter_async_sync:
311
  if (FAST_SHIFT_EN = true) generate
312
    shifter_unit_fast: process(rstn_i, clk_i)
313
      variable new_bit_v : std_ulogic;
314
    begin
315
      if (rstn_i = '0') then
316
        shifter.cnt     <= (others => def_rst_val_c);
317
        shifter.sreg    <= (others => def_rst_val_c);
318
        shifter.bcnt    <= (others => def_rst_val_c);
319
      elsif rising_edge(clk_i) then
320
        -- population count --
321
        shifter.bcnt <= std_ulogic_vector(to_unsigned(popcount_f(rs1_reg), shifter.bcnt'length));
322
        -- count leading/trailing zeros --
323
        if cmd_buf(op_clz_c) = '1' then -- leading
324
          shifter.cnt <= std_ulogic_vector(to_unsigned(leading_zeros_f(rs1_reg), shifter.cnt'length));
325
        else -- trailing
326
          shifter.cnt <= std_ulogic_vector(to_unsigned(leading_zeros_f(bit_rev_f(rs1_reg)), shifter.cnt'length));
327
        end if;
328
        -- barrel shifter --
329
        shifter.sreg <= bs_level(0); -- rol/ror[i]
330
      end if;
331
    end process shifter_unit_fast;
332
    shifter.run <= '0'; -- we are done already!
333
  end generate;
334
 
335
  -- barrel shifter array --
336
  barrel_shifter_async:
337
  if (FAST_SHIFT_EN = true) generate
338 66 zero_gravi
    shifter_unit_async: process(rs1_reg, sha_reg, cmd_buf, bs_level)
339 63 zero_gravi
    begin
340
      -- input level: convert left shifts to right shifts --
341
      if (cmd_buf(op_rol_c) = '1') then -- is left shift?
342
        bs_level(index_size_f(data_width_c)) <= bit_rev_f(rs1_reg); -- reverse bit order of input operand
343
      else
344
        bs_level(index_size_f(data_width_c)) <= rs1_reg;
345
      end if;
346
 
347
      -- shifter array --
348
      for i in index_size_f(data_width_c)-1 downto 0 loop
349 66 zero_gravi
        if (sha_reg(i) = '1') then
350 63 zero_gravi
          bs_level(i)(data_width_c-1 downto data_width_c-(2**i)) <= bs_level(i+1)((2**i)-1 downto 0);
351
          bs_level(i)((data_width_c-(2**i))-1 downto 0) <= bs_level(i+1)(data_width_c-1 downto 2**i);
352
        else
353
          bs_level(i) <= bs_level(i+1);
354
        end if;
355
      end loop;
356
    end process shifter_unit_async;
357
  end generate;
358
 
359
 
360 66 zero_gravi
  -- Shifted-Add Core -----------------------------------------------------------------------
361
  -- -------------------------------------------------------------------------------------------
362
  shift_adder: process(rs1_reg, rs2_reg, ctrl_i)
363
    variable opb_v : std_ulogic_vector(data_width_c-1 downto 0);
364
  begin
365
    case ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_1_c) is
366
      when "01"   => opb_v := rs1_reg(rs1_reg'left-1 downto 0) & '0';   -- << 1
367
      when "10"   => opb_v := rs1_reg(rs1_reg'left-2 downto 0) & "00";  -- << 2
368
      when "11"   => opb_v := rs1_reg(rs1_reg'left-3 downto 0) & "000"; -- << 3
369
      when others => opb_v := rs1_reg(rs1_reg'left-1 downto 0) & '0';   -- undefined
370
    end case;
371
    adder_core <= std_ulogic_vector(unsigned(rs2_reg) + unsigned(opb_v));
372
  end process shift_adder;
373
 
374
 
375 63 zero_gravi
  -- Operation Results ----------------------------------------------------------------------
376
  -- -------------------------------------------------------------------------------------------
377
  -- logic with negate --
378
  res_int(op_andn_c) <= rs1_reg and (not rs2_reg); -- logical and-not
379
  res_int(op_orn_c)  <= rs1_reg or  (not rs2_reg); -- logical or-not
380
  res_int(op_xnor_c) <= rs1_reg xor (not rs2_reg); -- logical xor-not
381
 
382
  -- count leading/trailing zeros --
383
  res_int(op_clz_c)(data_width_c-1 downto shifter.cnt'left+1) <= (others => '0');
384
  res_int(op_clz_c)(shifter.cnt'left downto 0) <= shifter.cnt;
385
  res_int(op_ctz_c) <= (others => '0'); -- unused/redundant
386
 
387
  -- count set bits --
388
  res_int(op_cpop_c)(data_width_c-1 downto shifter.bcnt'left+1) <= (others => '0');
389
  res_int(op_cpop_c)(shifter.bcnt'left downto 0) <= shifter.bcnt;
390
 
391
  -- min/max select --
392
  res_int(op_min_c) <= rs1_reg when ((less_ff xor cmd_buf(op_max_c)) = '1') else rs2_reg;
393
  res_int(op_max_c) <= (others => '0'); -- unused/redundant
394
 
395
  -- sign-extension --
396
  res_int(op_sextb_c)(data_width_c-1 downto 8) <= (others => rs1_reg(7));
397
  res_int(op_sextb_c)(7 downto 0) <= rs1_reg(7 downto 0); -- sign-extend byte
398
  res_int(op_sexth_c)(data_width_c-1 downto 16) <= (others => rs1_reg(15));
399
  res_int(op_sexth_c)(15 downto 0) <= rs1_reg(15 downto 0); -- sign-extend half-word
400
  res_int(op_zexth_c)(data_width_c-1 downto 16) <= (others => '0');
401
  res_int(op_zexth_c)(15 downto 0) <= rs1_reg(15 downto 0); -- zero-extend half-word
402
 
403
  -- rotate right/left --
404
  res_int(op_ror_c) <= shifter.sreg;
405
  res_int(op_rol_c) <= bit_rev_f(shifter.sreg); -- reverse to compensate internal right-only shifts
406
 
407
  -- or-combine.byte --
408
  or_combine_gen:
409
  for i in 0 to (data_width_c/8)-1 generate -- sub-byte loop
410
    res_int(op_orcb_c)(i*8+7 downto i*8) <= (others => or_reduce_f(rs1_reg(i*8+7 downto i*8)));
411
  end generate; -- i
412
 
413
  -- reversal.8 (byte swap) --
414
  res_int(op_rev8_c) <= bswap32_f(rs1_reg);
415
 
416 66 zero_gravi
  -- address generation instructions --
417
  res_int(op_sh1add_c) <= adder_core;
418
  res_int(op_sh2add_c) <= (others => '0'); -- unused/redundant
419
  res_int(op_sh3add_c) <= (others => '0'); -- unused/redundant
420 63 zero_gravi
 
421 66 zero_gravi
 
422 63 zero_gravi
  -- Output Selector ------------------------------------------------------------------------
423
  -- -------------------------------------------------------------------------------------------
424
  res_out(op_andn_c)  <= res_int(op_andn_c)  when (cmd_buf(op_andn_c)  = '1') else (others => '0');
425
  res_out(op_orn_c)   <= res_int(op_orn_c)   when (cmd_buf(op_orn_c)   = '1') else (others => '0');
426
  res_out(op_xnor_c)  <= res_int(op_xnor_c)  when (cmd_buf(op_xnor_c)  = '1') else (others => '0');
427
  res_out(op_clz_c)   <= res_int(op_clz_c)   when ((cmd_buf(op_clz_c) or cmd_buf(op_ctz_c)) = '1') else (others => '0');
428
  res_out(op_ctz_c)   <= (others => '0'); -- unused/redundant
429
  res_out(op_cpop_c)  <= res_int(op_cpop_c)  when (cmd_buf(op_cpop_c)  = '1') else (others => '0');
430
  res_out(op_min_c)   <= res_int(op_min_c)   when ((cmd_buf(op_min_c) or cmd_buf(op_max_c)) = '1') else (others => '0');
431
  res_out(op_max_c)   <= (others => '0'); -- unused/redundant
432
  res_out(op_sextb_c) <= res_int(op_sextb_c) when (cmd_buf(op_sextb_c) = '1') else (others => '0');
433
  res_out(op_sexth_c) <= res_int(op_sexth_c) when (cmd_buf(op_sexth_c) = '1') else (others => '0');
434
  res_out(op_zexth_c) <= res_int(op_zexth_c) when (cmd_buf(op_zexth_c) = '1') else (others => '0');
435
  res_out(op_ror_c)   <= res_int(op_ror_c)   when (cmd_buf(op_ror_c)   = '1') else (others => '0');
436
  res_out(op_rol_c)   <= res_int(op_rol_c)   when (cmd_buf(op_rol_c)   = '1') else (others => '0');
437
  res_out(op_orcb_c)  <= res_int(op_orcb_c)  when (cmd_buf(op_orcb_c)  = '1') else (others => '0');
438
  res_out(op_rev8_c)  <= res_int(op_rev8_c)  when (cmd_buf(op_rev8_c)  = '1') else (others => '0');
439 66 zero_gravi
  --
440
  res_out(op_sh1add_c) <= res_int(op_sh1add_c) when ((cmd_buf(op_sh1add_c) or cmd_buf(op_sh2add_c) or cmd_buf(op_sh3add_c))  = '1') else (others => '0');
441
  res_out(op_sh2add_c) <= (others => '0'); -- unused/redundant
442
  res_out(op_sh3add_c) <= (others => '0'); -- unused/redundant
443 63 zero_gravi
 
444
 
445
  -- Output Gate ----------------------------------------------------------------------------
446
  -- -------------------------------------------------------------------------------------------
447
  output_gate: process(rstn_i, clk_i)
448
  begin
449
    if (rstn_i = '0') then
450
      res_o <= (others => def_rst_val_c);
451
    elsif rising_edge(clk_i) then
452
      res_o <= (others => '0');
453
      if (valid = '1') then
454
        res_o <= res_out(op_andn_c)  or res_out(op_orn_c)   or res_out(op_xnor_c) or
455
                 res_out(op_clz_c)   or res_out(op_cpop_c)  or -- res_out(op_ctz_c) is unused here
456
                 res_out(op_min_c)   or -- res_out(op_max_c) is unused here
457
                 res_out(op_sextb_c) or res_out(op_sexth_c) or res_out(op_zexth_c) or
458
                 res_out(op_ror_c)   or res_out(op_rol_c)   or
459 66 zero_gravi
                 res_out(op_orcb_c)  or res_out(op_rev8_c)  or
460
                 res_out(op_sh1add_c); -- res_out(op_sh2add_c) and res_out(op_sh3add_c) are unused here
461 63 zero_gravi
      end if;
462
    end if;
463
  end process output_gate;
464
 
465
  -- valid output --
466
  valid_o <= valid;
467
 
468
 
469
end neorv32_cpu_cp_bitmanip_rtl;

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.