1 |
63 |
zero_gravi |
-- #################################################################################################
|
2 |
|
|
-- # << NEORV32 - CPU Co-Processor: Bit-Manipulation Co-Processor Unit (RISC-V "B" Extension) >> #
|
3 |
|
|
-- # ********************************************************************************************* #
|
4 |
71 |
zero_gravi |
-- # Supported B sub-extensions (Zb*): #
|
5 |
|
|
-- # - Zba: Address-generation instructions #
|
6 |
|
|
-- # - Zbb: Basic bit-manipulation instructions #
|
7 |
|
|
-- # - Zbs: Single-bit instructions #
|
8 |
|
|
-- # - Zbc: Carry-less multiplication instructions #
|
9 |
63 |
zero_gravi |
-- # #
|
10 |
71 |
zero_gravi |
-- # NOTE: This is a first implementation of the bit-manipulation co-processor that supports all #
|
11 |
|
|
-- # sub-sets of the B extension. Hence, it is not yet optimized for area, latency or speed. #
|
12 |
63 |
zero_gravi |
-- # ********************************************************************************************* #
|
13 |
|
|
-- # BSD 3-Clause License #
|
14 |
|
|
-- # #
|
15 |
71 |
zero_gravi |
-- # Copyright (c) 2022, Stephan Nolting. All rights reserved. #
|
16 |
63 |
zero_gravi |
-- # #
|
17 |
|
|
-- # Redistribution and use in source and binary forms, with or without modification, are #
|
18 |
|
|
-- # permitted provided that the following conditions are met: #
|
19 |
|
|
-- # #
|
20 |
|
|
-- # 1. Redistributions of source code must retain the above copyright notice, this list of #
|
21 |
|
|
-- # conditions and the following disclaimer. #
|
22 |
|
|
-- # #
|
23 |
|
|
-- # 2. Redistributions in binary form must reproduce the above copyright notice, this list of #
|
24 |
|
|
-- # conditions and the following disclaimer in the documentation and/or other materials #
|
25 |
|
|
-- # provided with the distribution. #
|
26 |
|
|
-- # #
|
27 |
|
|
-- # 3. Neither the name of the copyright holder nor the names of its contributors may be used to #
|
28 |
|
|
-- # endorse or promote products derived from this software without specific prior written #
|
29 |
|
|
-- # permission. #
|
30 |
|
|
-- # #
|
31 |
|
|
-- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS #
|
32 |
|
|
-- # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF #
|
33 |
|
|
-- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE #
|
34 |
|
|
-- # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, #
|
35 |
|
|
-- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE #
|
36 |
|
|
-- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED #
|
37 |
|
|
-- # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING #
|
38 |
|
|
-- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED #
|
39 |
|
|
-- # OF THE POSSIBILITY OF SUCH DAMAGE. #
|
40 |
|
|
-- # ********************************************************************************************* #
|
41 |
|
|
-- # The NEORV32 Processor - https://github.com/stnolting/neorv32 (c) Stephan Nolting #
|
42 |
|
|
-- #################################################################################################
|
43 |
|
|
|
44 |
|
|
library ieee;
|
45 |
|
|
use ieee.std_logic_1164.all;
|
46 |
|
|
use ieee.numeric_std.all;
|
47 |
|
|
|
48 |
|
|
library neorv32;
|
49 |
|
|
use neorv32.neorv32_package.all;
|
50 |
|
|
|
51 |
|
|
entity neorv32_cpu_cp_bitmanip is
|
52 |
|
|
generic (
|
53 |
|
|
FAST_SHIFT_EN : boolean -- use barrel shifter for shift operations
|
54 |
|
|
);
|
55 |
|
|
port (
|
56 |
|
|
-- global control --
|
57 |
|
|
clk_i : in std_ulogic; -- global clock, rising edge
|
58 |
|
|
rstn_i : in std_ulogic; -- global reset, low-active, async
|
59 |
|
|
ctrl_i : in std_ulogic_vector(ctrl_width_c-1 downto 0); -- main control bus
|
60 |
|
|
start_i : in std_ulogic; -- trigger operation
|
61 |
|
|
-- data input --
|
62 |
|
|
cmp_i : in std_ulogic_vector(1 downto 0); -- comparator status
|
63 |
|
|
rs1_i : in std_ulogic_vector(data_width_c-1 downto 0); -- rf source 1
|
64 |
|
|
rs2_i : in std_ulogic_vector(data_width_c-1 downto 0); -- rf source 2
|
65 |
66 |
zero_gravi |
shamt_i : in std_ulogic_vector(index_size_f(data_width_c)-1 downto 0); -- shift amount
|
66 |
63 |
zero_gravi |
-- result and status --
|
67 |
|
|
res_o : out std_ulogic_vector(data_width_c-1 downto 0); -- operation result
|
68 |
|
|
valid_o : out std_ulogic -- data output valid
|
69 |
|
|
);
|
70 |
|
|
end neorv32_cpu_cp_bitmanip;
|
71 |
|
|
|
72 |
|
|
architecture neorv32_cpu_cp_bitmanip_rtl of neorv32_cpu_cp_bitmanip is
|
73 |
|
|
|
74 |
71 |
zero_gravi |
-- Sub-extension configuration ----------------------------
|
75 |
|
|
-- Note that this configurations does NOT effect the CPU's (illegal) instruction decoding logic!
|
76 |
66 |
zero_gravi |
constant zbb_en_c : boolean := true;
|
77 |
|
|
constant zba_en_c : boolean := true;
|
78 |
71 |
zero_gravi |
constant zbc_en_c : boolean := true;
|
79 |
|
|
constant zbs_en_c : boolean := true;
|
80 |
|
|
-- --------------------------------------------------------
|
81 |
66 |
zero_gravi |
|
82 |
71 |
zero_gravi |
-- Zbb - logic with negate --
|
83 |
66 |
zero_gravi |
constant op_andn_c : natural := 0;
|
84 |
|
|
constant op_orn_c : natural := 1;
|
85 |
|
|
constant op_xnor_c : natural := 2;
|
86 |
71 |
zero_gravi |
-- Zbb - count leading/trailing zero bits --
|
87 |
66 |
zero_gravi |
constant op_clz_c : natural := 3;
|
88 |
|
|
constant op_ctz_c : natural := 4;
|
89 |
71 |
zero_gravi |
-- Zbb - count population --
|
90 |
66 |
zero_gravi |
constant op_cpop_c : natural := 5;
|
91 |
71 |
zero_gravi |
-- Zbb - integer minimum/maximum --
|
92 |
66 |
zero_gravi |
constant op_max_c : natural := 6; -- signed/unsigned
|
93 |
|
|
constant op_min_c : natural := 7; -- signed/unsigned
|
94 |
71 |
zero_gravi |
-- Zbb - sign- and zero-extension --
|
95 |
66 |
zero_gravi |
constant op_sextb_c : natural := 8;
|
96 |
|
|
constant op_sexth_c : natural := 9;
|
97 |
|
|
constant op_zexth_c : natural := 10;
|
98 |
71 |
zero_gravi |
-- Zbb - bitwise rotation --
|
99 |
66 |
zero_gravi |
constant op_rol_c : natural := 11;
|
100 |
71 |
zero_gravi |
constant op_ror_c : natural := 12; -- also rori
|
101 |
|
|
-- Zbb - or-combine --
|
102 |
66 |
zero_gravi |
constant op_orcb_c : natural := 13;
|
103 |
71 |
zero_gravi |
-- Zbb - byte-reverse --
|
104 |
66 |
zero_gravi |
constant op_rev8_c : natural := 14;
|
105 |
71 |
zero_gravi |
-- Zba - shifted-add --
|
106 |
66 |
zero_gravi |
constant op_sh1add_c : natural := 15;
|
107 |
|
|
constant op_sh2add_c : natural := 16;
|
108 |
|
|
constant op_sh3add_c : natural := 17;
|
109 |
71 |
zero_gravi |
-- Zbs - single-bit operations --
|
110 |
|
|
constant op_bclr_c : natural := 18;
|
111 |
|
|
constant op_bext_c : natural := 19;
|
112 |
|
|
constant op_binv_c : natural := 20;
|
113 |
|
|
constant op_bset_c : natural := 21;
|
114 |
|
|
-- Zbc - carry-less multiplication --
|
115 |
|
|
constant op_clmul_c : natural := 22;
|
116 |
|
|
constant op_clmulh_c : natural := 23;
|
117 |
|
|
constant op_clmulr_c : natural := 24;
|
118 |
63 |
zero_gravi |
--
|
119 |
71 |
zero_gravi |
constant op_width_c : natural := 25;
|
120 |
63 |
zero_gravi |
|
121 |
|
|
-- controller --
|
122 |
71 |
zero_gravi |
type ctrl_state_t is (S_IDLE, S_START_SHIFT, S_BUSY_SHIFT, S_START_CLMUL, S_BUSY_CLMUL);
|
123 |
63 |
zero_gravi |
signal ctrl_state : ctrl_state_t;
|
124 |
|
|
signal cmd, cmd_buf : std_ulogic_vector(op_width_c-1 downto 0);
|
125 |
|
|
signal valid : std_ulogic;
|
126 |
|
|
|
127 |
|
|
-- operand buffers --
|
128 |
|
|
signal rs1_reg : std_ulogic_vector(data_width_c-1 downto 0);
|
129 |
|
|
signal rs2_reg : std_ulogic_vector(data_width_c-1 downto 0);
|
130 |
66 |
zero_gravi |
signal sha_reg : std_ulogic_vector(index_size_f(data_width_c)-1 downto 0);
|
131 |
63 |
zero_gravi |
signal less_ff : std_ulogic;
|
132 |
|
|
|
133 |
|
|
-- serial shifter --
|
134 |
|
|
type shifter_t is record
|
135 |
|
|
start : std_ulogic;
|
136 |
|
|
run : std_ulogic;
|
137 |
|
|
bcnt : std_ulogic_vector(index_size_f(data_width_c) downto 0); -- bit counter
|
138 |
|
|
cnt : std_ulogic_vector(index_size_f(data_width_c) downto 0); -- iteration counter
|
139 |
|
|
cnt_max : std_ulogic_vector(index_size_f(data_width_c) downto 0);
|
140 |
|
|
sreg : std_ulogic_vector(data_width_c-1 downto 0);
|
141 |
|
|
end record;
|
142 |
|
|
signal shifter : shifter_t;
|
143 |
|
|
|
144 |
|
|
-- barrel shifter --
|
145 |
|
|
type bs_level_t is array (index_size_f(data_width_c) downto 0) of std_ulogic_vector(data_width_c-1 downto 0);
|
146 |
|
|
signal bs_level : bs_level_t;
|
147 |
|
|
|
148 |
|
|
-- operation results --
|
149 |
|
|
type res_t is array (0 to op_width_c-1) of std_ulogic_vector(data_width_c-1 downto 0);
|
150 |
|
|
signal res_int, res_out : res_t;
|
151 |
|
|
|
152 |
66 |
zero_gravi |
-- shifted-add unit --
|
153 |
|
|
signal adder_core : std_ulogic_vector(data_width_c-1 downto 0);
|
154 |
|
|
|
155 |
71 |
zero_gravi |
-- one-hot shifter --
|
156 |
|
|
signal one_hot_core : std_ulogic_vector(data_width_c-1 downto 0);
|
157 |
|
|
|
158 |
|
|
-- carry-less multiplier --
|
159 |
|
|
type clmultiplier_t is record
|
160 |
|
|
start : std_ulogic;
|
161 |
|
|
busy : std_ulogic;
|
162 |
|
|
rs2 : std_ulogic_vector(data_width_c-1 downto 0);
|
163 |
|
|
cnt : std_ulogic_vector(index_size_f(data_width_c) downto 0);
|
164 |
|
|
prod : std_ulogic_vector(2*data_width_c-1 downto 0);
|
165 |
|
|
end record;
|
166 |
|
|
signal clmul : clmultiplier_t;
|
167 |
|
|
|
168 |
63 |
zero_gravi |
begin
|
169 |
|
|
|
170 |
66 |
zero_gravi |
-- Sub-Extension Configuration ------------------------------------------------------------
|
171 |
|
|
-- -------------------------------------------------------------------------------------------
|
172 |
|
|
assert false report
|
173 |
71 |
zero_gravi |
"NEORV32 CPU: Implementing bit-manipulation (B) sub-extensions " &
|
174 |
|
|
cond_sel_string_f(zba_en_c, "Zba ", "") &
|
175 |
|
|
cond_sel_string_f(zbb_en_c, "Zbb ", "") &
|
176 |
|
|
cond_sel_string_f(zbc_en_c, "Zbc ", "") &
|
177 |
|
|
cond_sel_string_f(zbs_en_c, "Zbs ", "") &
|
178 |
66 |
zero_gravi |
""
|
179 |
|
|
severity note;
|
180 |
|
|
|
181 |
|
|
|
182 |
63 |
zero_gravi |
-- Instruction Decoding (One-Hot) ---------------------------------------------------------
|
183 |
|
|
-- -------------------------------------------------------------------------------------------
|
184 |
71 |
zero_gravi |
-- a minimal decoding logic is used here just to distinguish between the different B instruction
|
185 |
|
|
-- a more precise decoding and valid-instruction check is done by the CPU control unit
|
186 |
63 |
zero_gravi |
|
187 |
|
|
-- Zbb - Basic bit-manipulation instructions --
|
188 |
71 |
zero_gravi |
cmd(op_andn_c) <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "10") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_1_c downto ctrl_ir_funct3_0_c) = "11") else '0';
|
189 |
|
|
cmd(op_orn_c) <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "10") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_1_c downto ctrl_ir_funct3_0_c) = "10") else '0';
|
190 |
|
|
cmd(op_xnor_c) <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "10") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_1_c downto ctrl_ir_funct3_0_c) = "00") else '0';
|
191 |
63 |
zero_gravi |
--
|
192 |
71 |
zero_gravi |
cmd(op_max_c) <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "00") and (ctrl_i(ctrl_ir_funct12_5_c) = '1') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_1_c) = "11") else '0';
|
193 |
|
|
cmd(op_min_c) <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "00") and (ctrl_i(ctrl_ir_funct12_5_c) = '1') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_1_c) = "10") else '0';
|
194 |
66 |
zero_gravi |
cmd(op_zexth_c) <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "00") and (ctrl_i(ctrl_ir_funct12_5_c) = '0') else '0';
|
195 |
63 |
zero_gravi |
--
|
196 |
71 |
zero_gravi |
cmd(op_orcb_c) <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "01") and (ctrl_i(ctrl_ir_funct12_7_c) = '1') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) = "101") else '0';
|
197 |
63 |
zero_gravi |
--
|
198 |
66 |
zero_gravi |
cmd(op_clz_c) <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "000") else '0';
|
199 |
|
|
cmd(op_ctz_c) <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "001") else '0';
|
200 |
71 |
zero_gravi |
cmd(op_cpop_c) <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "010") and (ctrl_i(ctrl_ir_opcode7_5_c) = '0') else '0';
|
201 |
74 |
zero_gravi |
cmd(op_sextb_c) <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "100") and (ctrl_i(ctrl_ir_funct3_2_c) = '0') and (ctrl_i(ctrl_ir_opcode7_5_c) = '0') else '0';
|
202 |
|
|
cmd(op_sexth_c) <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct12_2_c downto ctrl_ir_funct12_0_c) = "101") and (ctrl_i(ctrl_ir_funct3_2_c) = '0') and (ctrl_i(ctrl_ir_opcode7_5_c) = '0') else '0';
|
203 |
66 |
zero_gravi |
cmd(op_rol_c) <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) = "001") and (ctrl_i(ctrl_ir_opcode7_5_c) = '1') else '0';
|
204 |
74 |
zero_gravi |
cmd(op_ror_c) <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) = "101") and (ctrl_i(ctrl_ir_funct3_2_c) = '1') else '0';
|
205 |
71 |
zero_gravi |
cmd(op_rev8_c) <= '1' when (zbb_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '1') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) = "101") else '0';
|
206 |
63 |
zero_gravi |
|
207 |
66 |
zero_gravi |
-- Zba - Address generation instructions --
|
208 |
|
|
cmd(op_sh1add_c) <= '1' when (zba_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "01") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_1_c) = "01") else '0';
|
209 |
|
|
cmd(op_sh2add_c) <= '1' when (zba_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "01") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_1_c) = "10") else '0';
|
210 |
|
|
cmd(op_sh3add_c) <= '1' when (zba_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "01") and (ctrl_i(ctrl_ir_funct12_7_c) = '0') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_1_c) = "11") else '0';
|
211 |
63 |
zero_gravi |
|
212 |
71 |
zero_gravi |
-- Zbs - Single-bit instructions --
|
213 |
|
|
cmd(op_bclr_c) <= '1' when (zbs_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "10") and (ctrl_i(ctrl_ir_funct12_7_c) = '1') and (ctrl_i(ctrl_ir_funct3_2_c) = '0') else '0';
|
214 |
|
|
cmd(op_bext_c) <= '1' when (zbs_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "10") and (ctrl_i(ctrl_ir_funct12_7_c) = '1') and (ctrl_i(ctrl_ir_funct3_2_c) = '1') else '0';
|
215 |
|
|
cmd(op_binv_c) <= '1' when (zbs_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "11") and (ctrl_i(ctrl_ir_funct12_7_c) = '1') and (ctrl_i(ctrl_ir_funct3_2_c) = '0') else '0';
|
216 |
|
|
cmd(op_bset_c) <= '1' when (zbs_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "01") and (ctrl_i(ctrl_ir_funct12_7_c) = '1') and (ctrl_i(ctrl_ir_funct3_2_c) = '0') else '0';
|
217 |
66 |
zero_gravi |
|
218 |
71 |
zero_gravi |
-- Zbc - Carry-less multiplication instructions --
|
219 |
|
|
cmd(op_clmul_c) <= '1' when (zbc_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "00") and (ctrl_i(ctrl_ir_funct12_5_c) = '1') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) = "001") else '0';
|
220 |
|
|
cmd(op_clmulh_c) <= '1' when (zbc_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "00") and (ctrl_i(ctrl_ir_funct12_5_c) = '1') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) = "011") else '0';
|
221 |
|
|
cmd(op_clmulr_c) <= '1' when (zbc_en_c = true) and (ctrl_i(ctrl_ir_funct12_10_c downto ctrl_ir_funct12_9_c) = "00") and (ctrl_i(ctrl_ir_funct12_5_c) = '1') and (ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) = "010") else '0';
|
222 |
|
|
|
223 |
|
|
|
224 |
63 |
zero_gravi |
-- Co-Processor Controller ----------------------------------------------------------------
|
225 |
|
|
-- -------------------------------------------------------------------------------------------
|
226 |
|
|
coprocessor_ctrl: process(rstn_i, clk_i)
|
227 |
|
|
begin
|
228 |
|
|
if (rstn_i = '0') then
|
229 |
|
|
ctrl_state <= S_IDLE;
|
230 |
|
|
cmd_buf <= (others => def_rst_val_c);
|
231 |
|
|
rs1_reg <= (others => def_rst_val_c);
|
232 |
|
|
rs2_reg <= (others => def_rst_val_c);
|
233 |
66 |
zero_gravi |
sha_reg <= (others => def_rst_val_c);
|
234 |
63 |
zero_gravi |
less_ff <= def_rst_val_c;
|
235 |
71 |
zero_gravi |
clmul.start <= '0';
|
236 |
63 |
zero_gravi |
shifter.start <= '0';
|
237 |
|
|
valid <= '0';
|
238 |
|
|
elsif rising_edge(clk_i) then
|
239 |
|
|
-- defaults --
|
240 |
|
|
shifter.start <= '0';
|
241 |
71 |
zero_gravi |
clmul.start <= '0';
|
242 |
63 |
zero_gravi |
valid <= '0';
|
243 |
|
|
|
244 |
|
|
-- fsm --
|
245 |
|
|
case ctrl_state is
|
246 |
|
|
|
247 |
|
|
when S_IDLE => -- wait for operation trigger
|
248 |
|
|
-- ------------------------------------------------------------
|
249 |
|
|
if (start_i = '1') then
|
250 |
|
|
less_ff <= cmp_i(cmp_less_c);
|
251 |
|
|
cmd_buf <= cmd;
|
252 |
|
|
rs1_reg <= rs1_i;
|
253 |
|
|
rs2_reg <= rs2_i;
|
254 |
66 |
zero_gravi |
sha_reg <= shamt_i;
|
255 |
63 |
zero_gravi |
if ((cmd(op_clz_c) or cmd(op_ctz_c) or cmd(op_cpop_c) or cmd(op_ror_c) or cmd(op_rol_c)) = '1') then -- multi-cycle shift operation
|
256 |
|
|
if (FAST_SHIFT_EN = false) then -- default: iterative computation
|
257 |
|
|
shifter.start <= '1';
|
258 |
|
|
ctrl_state <= S_START_SHIFT;
|
259 |
|
|
else -- full-parallel computation
|
260 |
|
|
ctrl_state <= S_BUSY_SHIFT;
|
261 |
|
|
end if;
|
262 |
71 |
zero_gravi |
elsif (zbc_en_c = true) and ((cmd(op_clmul_c) or cmd(op_clmulh_c) or cmd(op_clmulr_c)) = '1') then -- multi-cycle clmul operation
|
263 |
|
|
clmul.start <= '1';
|
264 |
|
|
ctrl_state <= S_START_CLMUL;
|
265 |
63 |
zero_gravi |
else
|
266 |
|
|
valid <= '1';
|
267 |
|
|
ctrl_state <= S_IDLE;
|
268 |
|
|
end if;
|
269 |
|
|
end if;
|
270 |
|
|
|
271 |
|
|
when S_START_SHIFT => -- one cycle delay to start shift operation
|
272 |
|
|
-- ------------------------------------------------------------
|
273 |
|
|
ctrl_state <= S_BUSY_SHIFT;
|
274 |
|
|
|
275 |
|
|
when S_BUSY_SHIFT => -- wait for multi-cycle shift operation to finish
|
276 |
|
|
-- ------------------------------------------------------------
|
277 |
71 |
zero_gravi |
if (shifter.run = '0') or (ctrl_i(ctrl_trap_c) = '1') then -- abort on trap
|
278 |
63 |
zero_gravi |
valid <= '1';
|
279 |
|
|
ctrl_state <= S_IDLE;
|
280 |
|
|
end if;
|
281 |
|
|
|
282 |
71 |
zero_gravi |
when S_START_CLMUL => -- one cycle delay to start clmul operation
|
283 |
|
|
-- ------------------------------------------------------------
|
284 |
|
|
ctrl_state <= S_BUSY_CLMUL;
|
285 |
|
|
|
286 |
|
|
when S_BUSY_CLMUL => -- wait for multi-cycle clmul operation to finish
|
287 |
|
|
-- ------------------------------------------------------------
|
288 |
|
|
if (clmul.busy = '0') or (ctrl_i(ctrl_trap_c) = '1') then -- abort on trap
|
289 |
|
|
valid <= '1';
|
290 |
|
|
ctrl_state <= S_IDLE;
|
291 |
|
|
end if;
|
292 |
|
|
|
293 |
63 |
zero_gravi |
when others => -- undefined
|
294 |
|
|
-- ------------------------------------------------------------
|
295 |
|
|
ctrl_state <= S_IDLE;
|
296 |
|
|
|
297 |
|
|
end case;
|
298 |
|
|
end if;
|
299 |
|
|
end process coprocessor_ctrl;
|
300 |
|
|
|
301 |
|
|
|
302 |
|
|
-- Shifter Function Core (iterative: small but slow) --------------------------------------
|
303 |
|
|
-- -------------------------------------------------------------------------------------------
|
304 |
|
|
serial_shifter:
|
305 |
|
|
if (FAST_SHIFT_EN = false) generate
|
306 |
|
|
shifter_unit: process(rstn_i, clk_i)
|
307 |
|
|
variable new_bit_v : std_ulogic;
|
308 |
|
|
begin
|
309 |
|
|
if (rstn_i = '0') then
|
310 |
|
|
shifter.cnt <= (others => def_rst_val_c);
|
311 |
|
|
shifter.sreg <= (others => def_rst_val_c);
|
312 |
|
|
shifter.cnt_max <= (others => def_rst_val_c);
|
313 |
|
|
shifter.bcnt <= (others => def_rst_val_c);
|
314 |
|
|
elsif rising_edge(clk_i) then
|
315 |
|
|
if (shifter.start = '1') then -- trigger new shift
|
316 |
|
|
shifter.cnt <= (others => '0');
|
317 |
|
|
-- shift operand --
|
318 |
|
|
if (cmd_buf(op_clz_c) = '1') or (cmd_buf(op_rol_c) = '1') then -- count LEADING zeros / rotate LEFT
|
319 |
|
|
shifter.sreg <= bit_rev_f(rs1_reg); -- reverse - we can only do right shifts here
|
320 |
|
|
else -- ctz, cpop, ror
|
321 |
|
|
shifter.sreg <= rs1_reg;
|
322 |
|
|
end if;
|
323 |
|
|
-- max shift amount --
|
324 |
|
|
if (cmd_buf(op_cpop_c) = '1') then -- population count
|
325 |
|
|
shifter.cnt_max <= (others => '0');
|
326 |
|
|
shifter.cnt_max(shifter.cnt_max'left) <= '1';
|
327 |
|
|
else
|
328 |
66 |
zero_gravi |
shifter.cnt_max <= '0' & sha_reg;
|
329 |
63 |
zero_gravi |
end if;
|
330 |
|
|
shifter.bcnt <= (others => '0');
|
331 |
|
|
elsif (shifter.run = '1') then -- right shifts only
|
332 |
|
|
new_bit_v := ((cmd_buf(op_ror_c) or cmd_buf(op_rol_c)) and shifter.sreg(0)) or (cmd_buf(op_clz_c) or cmd_buf(op_ctz_c));
|
333 |
|
|
shifter.sreg <= new_bit_v & shifter.sreg(shifter.sreg'left downto 1); -- ro[r/l]/lsr(for counting)
|
334 |
|
|
shifter.cnt <= std_ulogic_vector(unsigned(shifter.cnt) + 1); -- iteration counter
|
335 |
|
|
if (shifter.sreg(0) = '1') then
|
336 |
|
|
shifter.bcnt <= std_ulogic_vector(unsigned(shifter.bcnt) + 1); -- bit counter
|
337 |
|
|
end if;
|
338 |
|
|
end if;
|
339 |
|
|
end if;
|
340 |
|
|
end process shifter_unit;
|
341 |
|
|
end generate;
|
342 |
|
|
|
343 |
|
|
-- run control --
|
344 |
|
|
serial_shifter_ctrl:
|
345 |
|
|
if (FAST_SHIFT_EN = false) generate
|
346 |
|
|
shifter_unit_ctrl: process(cmd_buf, shifter)
|
347 |
|
|
begin
|
348 |
|
|
-- keep shifting until ... --
|
349 |
|
|
if (cmd_buf(op_clz_c) = '1') or (cmd_buf(op_ctz_c) = '1') then -- count leading/trailing zeros
|
350 |
|
|
shifter.run <= not shifter.sreg(0);
|
351 |
|
|
else -- population count / rotate
|
352 |
|
|
if (shifter.cnt = shifter.cnt_max) then
|
353 |
|
|
shifter.run <= '0';
|
354 |
|
|
else
|
355 |
|
|
shifter.run <= '1';
|
356 |
|
|
end if;
|
357 |
|
|
end if;
|
358 |
|
|
end process shifter_unit_ctrl;
|
359 |
|
|
end generate;
|
360 |
|
|
|
361 |
|
|
|
362 |
|
|
-- Shifter Function Core (parallel: fast but large) ---------------------------------------
|
363 |
|
|
-- -------------------------------------------------------------------------------------------
|
364 |
|
|
barrel_shifter_async_sync:
|
365 |
|
|
if (FAST_SHIFT_EN = true) generate
|
366 |
|
|
shifter_unit_fast: process(rstn_i, clk_i)
|
367 |
|
|
variable new_bit_v : std_ulogic;
|
368 |
|
|
begin
|
369 |
|
|
if (rstn_i = '0') then
|
370 |
|
|
shifter.cnt <= (others => def_rst_val_c);
|
371 |
|
|
shifter.sreg <= (others => def_rst_val_c);
|
372 |
|
|
shifter.bcnt <= (others => def_rst_val_c);
|
373 |
|
|
elsif rising_edge(clk_i) then
|
374 |
|
|
-- population count --
|
375 |
|
|
shifter.bcnt <= std_ulogic_vector(to_unsigned(popcount_f(rs1_reg), shifter.bcnt'length));
|
376 |
|
|
-- count leading/trailing zeros --
|
377 |
|
|
if cmd_buf(op_clz_c) = '1' then -- leading
|
378 |
|
|
shifter.cnt <= std_ulogic_vector(to_unsigned(leading_zeros_f(rs1_reg), shifter.cnt'length));
|
379 |
|
|
else -- trailing
|
380 |
|
|
shifter.cnt <= std_ulogic_vector(to_unsigned(leading_zeros_f(bit_rev_f(rs1_reg)), shifter.cnt'length));
|
381 |
|
|
end if;
|
382 |
|
|
-- barrel shifter --
|
383 |
|
|
shifter.sreg <= bs_level(0); -- rol/ror[i]
|
384 |
|
|
end if;
|
385 |
|
|
end process shifter_unit_fast;
|
386 |
|
|
shifter.run <= '0'; -- we are done already!
|
387 |
|
|
end generate;
|
388 |
|
|
|
389 |
|
|
-- barrel shifter array --
|
390 |
|
|
barrel_shifter_async:
|
391 |
|
|
if (FAST_SHIFT_EN = true) generate
|
392 |
66 |
zero_gravi |
shifter_unit_async: process(rs1_reg, sha_reg, cmd_buf, bs_level)
|
393 |
63 |
zero_gravi |
begin
|
394 |
|
|
-- input level: convert left shifts to right shifts --
|
395 |
|
|
if (cmd_buf(op_rol_c) = '1') then -- is left shift?
|
396 |
|
|
bs_level(index_size_f(data_width_c)) <= bit_rev_f(rs1_reg); -- reverse bit order of input operand
|
397 |
|
|
else
|
398 |
|
|
bs_level(index_size_f(data_width_c)) <= rs1_reg;
|
399 |
|
|
end if;
|
400 |
|
|
|
401 |
|
|
-- shifter array --
|
402 |
|
|
for i in index_size_f(data_width_c)-1 downto 0 loop
|
403 |
66 |
zero_gravi |
if (sha_reg(i) = '1') then
|
404 |
63 |
zero_gravi |
bs_level(i)(data_width_c-1 downto data_width_c-(2**i)) <= bs_level(i+1)((2**i)-1 downto 0);
|
405 |
|
|
bs_level(i)((data_width_c-(2**i))-1 downto 0) <= bs_level(i+1)(data_width_c-1 downto 2**i);
|
406 |
|
|
else
|
407 |
|
|
bs_level(i) <= bs_level(i+1);
|
408 |
|
|
end if;
|
409 |
|
|
end loop;
|
410 |
|
|
end process shifter_unit_async;
|
411 |
|
|
end generate;
|
412 |
|
|
|
413 |
|
|
|
414 |
66 |
zero_gravi |
-- Shifted-Add Core -----------------------------------------------------------------------
|
415 |
|
|
-- -------------------------------------------------------------------------------------------
|
416 |
|
|
shift_adder: process(rs1_reg, rs2_reg, ctrl_i)
|
417 |
|
|
variable opb_v : std_ulogic_vector(data_width_c-1 downto 0);
|
418 |
|
|
begin
|
419 |
|
|
case ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_1_c) is
|
420 |
|
|
when "01" => opb_v := rs1_reg(rs1_reg'left-1 downto 0) & '0'; -- << 1
|
421 |
|
|
when "10" => opb_v := rs1_reg(rs1_reg'left-2 downto 0) & "00"; -- << 2
|
422 |
|
|
when "11" => opb_v := rs1_reg(rs1_reg'left-3 downto 0) & "000"; -- << 3
|
423 |
71 |
zero_gravi |
when others => opb_v := (others => '-'); -- undefined
|
424 |
66 |
zero_gravi |
end case;
|
425 |
|
|
adder_core <= std_ulogic_vector(unsigned(rs2_reg) + unsigned(opb_v));
|
426 |
|
|
end process shift_adder;
|
427 |
|
|
|
428 |
|
|
|
429 |
71 |
zero_gravi |
-- One-Hot Generator Core -----------------------------------------------------------------
|
430 |
|
|
-- -------------------------------------------------------------------------------------------
|
431 |
|
|
shift_one_hot: process(sha_reg)
|
432 |
|
|
begin
|
433 |
|
|
one_hot_core <= (others => '0');
|
434 |
|
|
if (zbs_en_c = true) then
|
435 |
|
|
one_hot_core(to_integer(unsigned(sha_reg))) <= '1';
|
436 |
|
|
end if;
|
437 |
|
|
end process shift_one_hot;
|
438 |
|
|
|
439 |
|
|
|
440 |
|
|
-- Carry-Less Multiplication Core ---------------------------------------------------------
|
441 |
|
|
-- -------------------------------------------------------------------------------------------
|
442 |
|
|
clmul_core: process(rstn_i, clk_i)
|
443 |
|
|
begin
|
444 |
|
|
if (rstn_i = '0') then
|
445 |
|
|
clmul.cnt <= (others => def_rst_val_c);
|
446 |
|
|
clmul.prod <= (others => def_rst_val_c);
|
447 |
|
|
elsif rising_edge(clk_i) then
|
448 |
|
|
if (clmul.start = '1') then -- start new multiplication
|
449 |
|
|
clmul.cnt <= (others => '0');
|
450 |
|
|
clmul.cnt(clmul.cnt'left) <= '1';
|
451 |
|
|
clmul.prod(63 downto 32) <= (others => '0');
|
452 |
|
|
if (cmd_buf(op_clmulr_c) = '1') then -- reverse input operands?
|
453 |
|
|
clmul.prod(31 downto 00) <= bit_rev_f(rs1_reg);
|
454 |
|
|
else
|
455 |
|
|
clmul.prod(31 downto 00) <= rs1_reg;
|
456 |
|
|
end if;
|
457 |
|
|
elsif (clmul.busy = '1') then -- processing
|
458 |
|
|
clmul.cnt <= std_ulogic_vector(unsigned(clmul.cnt) - 1);
|
459 |
|
|
if (clmul.prod(0) = '1') then
|
460 |
|
|
clmul.prod(62 downto 31) <= clmul.prod(63 downto 32) xor clmul.rs2;
|
461 |
|
|
else
|
462 |
|
|
clmul.prod(62 downto 31) <= clmul.prod(63 downto 32);
|
463 |
|
|
end if;
|
464 |
|
|
clmul.prod(30 downto 00) <= clmul.prod(31 downto 1);
|
465 |
|
|
end if;
|
466 |
|
|
end if;
|
467 |
|
|
end process clmul_core;
|
468 |
|
|
|
469 |
|
|
-- reverse input operands? --
|
470 |
|
|
clmul.rs2 <= bit_rev_f(rs2_reg) when (cmd_buf(op_clmulr_c) = '1') else rs2_reg;
|
471 |
|
|
|
472 |
|
|
-- multiplier busy? --
|
473 |
74 |
zero_gravi |
clmul.busy <= '1' when (or_reduce_f(clmul.cnt) = '1') else '0';
|
474 |
71 |
zero_gravi |
|
475 |
|
|
|
476 |
63 |
zero_gravi |
-- Operation Results ----------------------------------------------------------------------
|
477 |
|
|
-- -------------------------------------------------------------------------------------------
|
478 |
|
|
-- logic with negate --
|
479 |
71 |
zero_gravi |
res_int(op_andn_c) <= rs1_reg and (not rs2_reg);
|
480 |
|
|
res_int(op_orn_c) <= rs1_reg or (not rs2_reg);
|
481 |
|
|
res_int(op_xnor_c) <= rs1_reg xor (not rs2_reg);
|
482 |
63 |
zero_gravi |
|
483 |
|
|
-- count leading/trailing zeros --
|
484 |
|
|
res_int(op_clz_c)(data_width_c-1 downto shifter.cnt'left+1) <= (others => '0');
|
485 |
|
|
res_int(op_clz_c)(shifter.cnt'left downto 0) <= shifter.cnt;
|
486 |
|
|
res_int(op_ctz_c) <= (others => '0'); -- unused/redundant
|
487 |
|
|
|
488 |
|
|
-- count set bits --
|
489 |
|
|
res_int(op_cpop_c)(data_width_c-1 downto shifter.bcnt'left+1) <= (others => '0');
|
490 |
|
|
res_int(op_cpop_c)(shifter.bcnt'left downto 0) <= shifter.bcnt;
|
491 |
|
|
|
492 |
|
|
-- min/max select --
|
493 |
|
|
res_int(op_min_c) <= rs1_reg when ((less_ff xor cmd_buf(op_max_c)) = '1') else rs2_reg;
|
494 |
|
|
res_int(op_max_c) <= (others => '0'); -- unused/redundant
|
495 |
|
|
|
496 |
|
|
-- sign-extension --
|
497 |
|
|
res_int(op_sextb_c)(data_width_c-1 downto 8) <= (others => rs1_reg(7));
|
498 |
|
|
res_int(op_sextb_c)(7 downto 0) <= rs1_reg(7 downto 0); -- sign-extend byte
|
499 |
|
|
res_int(op_sexth_c)(data_width_c-1 downto 16) <= (others => rs1_reg(15));
|
500 |
|
|
res_int(op_sexth_c)(15 downto 0) <= rs1_reg(15 downto 0); -- sign-extend half-word
|
501 |
|
|
res_int(op_zexth_c)(data_width_c-1 downto 16) <= (others => '0');
|
502 |
|
|
res_int(op_zexth_c)(15 downto 0) <= rs1_reg(15 downto 0); -- zero-extend half-word
|
503 |
|
|
|
504 |
|
|
-- rotate right/left --
|
505 |
|
|
res_int(op_ror_c) <= shifter.sreg;
|
506 |
|
|
res_int(op_rol_c) <= bit_rev_f(shifter.sreg); -- reverse to compensate internal right-only shifts
|
507 |
|
|
|
508 |
|
|
-- or-combine.byte --
|
509 |
|
|
or_combine_gen:
|
510 |
|
|
for i in 0 to (data_width_c/8)-1 generate -- sub-byte loop
|
511 |
|
|
res_int(op_orcb_c)(i*8+7 downto i*8) <= (others => or_reduce_f(rs1_reg(i*8+7 downto i*8)));
|
512 |
|
|
end generate; -- i
|
513 |
|
|
|
514 |
|
|
-- reversal.8 (byte swap) --
|
515 |
|
|
res_int(op_rev8_c) <= bswap32_f(rs1_reg);
|
516 |
|
|
|
517 |
66 |
zero_gravi |
-- address generation instructions --
|
518 |
|
|
res_int(op_sh1add_c) <= adder_core;
|
519 |
|
|
res_int(op_sh2add_c) <= (others => '0'); -- unused/redundant
|
520 |
|
|
res_int(op_sh3add_c) <= (others => '0'); -- unused/redundant
|
521 |
63 |
zero_gravi |
|
522 |
71 |
zero_gravi |
-- single-bit instructions --
|
523 |
|
|
res_int(op_bclr_c) <= rs1_reg and (not one_hot_core);
|
524 |
|
|
res_int(op_bext_c)(data_width_c-1 downto 1) <= (others => '0');
|
525 |
74 |
zero_gravi |
res_int(op_bext_c)(0) <= '1' when (or_reduce_f(rs1_reg and one_hot_core) = '1') else '0';
|
526 |
71 |
zero_gravi |
res_int(op_binv_c) <= rs1_reg xor one_hot_core;
|
527 |
|
|
res_int(op_bset_c) <= rs1_reg or one_hot_core;
|
528 |
66 |
zero_gravi |
|
529 |
71 |
zero_gravi |
-- carry-less multiplication instructions --
|
530 |
|
|
res_int(op_clmul_c) <= clmul.prod(31 downto 00);
|
531 |
|
|
res_int(op_clmulh_c) <= clmul.prod(63 downto 32);
|
532 |
|
|
res_int(op_clmulr_c) <= bit_rev_f(clmul.prod(31 downto 00));
|
533 |
|
|
|
534 |
|
|
|
535 |
63 |
zero_gravi |
-- Output Selector ------------------------------------------------------------------------
|
536 |
|
|
-- -------------------------------------------------------------------------------------------
|
537 |
|
|
res_out(op_andn_c) <= res_int(op_andn_c) when (cmd_buf(op_andn_c) = '1') else (others => '0');
|
538 |
|
|
res_out(op_orn_c) <= res_int(op_orn_c) when (cmd_buf(op_orn_c) = '1') else (others => '0');
|
539 |
|
|
res_out(op_xnor_c) <= res_int(op_xnor_c) when (cmd_buf(op_xnor_c) = '1') else (others => '0');
|
540 |
|
|
res_out(op_clz_c) <= res_int(op_clz_c) when ((cmd_buf(op_clz_c) or cmd_buf(op_ctz_c)) = '1') else (others => '0');
|
541 |
|
|
res_out(op_ctz_c) <= (others => '0'); -- unused/redundant
|
542 |
|
|
res_out(op_cpop_c) <= res_int(op_cpop_c) when (cmd_buf(op_cpop_c) = '1') else (others => '0');
|
543 |
|
|
res_out(op_min_c) <= res_int(op_min_c) when ((cmd_buf(op_min_c) or cmd_buf(op_max_c)) = '1') else (others => '0');
|
544 |
|
|
res_out(op_max_c) <= (others => '0'); -- unused/redundant
|
545 |
|
|
res_out(op_sextb_c) <= res_int(op_sextb_c) when (cmd_buf(op_sextb_c) = '1') else (others => '0');
|
546 |
|
|
res_out(op_sexth_c) <= res_int(op_sexth_c) when (cmd_buf(op_sexth_c) = '1') else (others => '0');
|
547 |
|
|
res_out(op_zexth_c) <= res_int(op_zexth_c) when (cmd_buf(op_zexth_c) = '1') else (others => '0');
|
548 |
|
|
res_out(op_ror_c) <= res_int(op_ror_c) when (cmd_buf(op_ror_c) = '1') else (others => '0');
|
549 |
|
|
res_out(op_rol_c) <= res_int(op_rol_c) when (cmd_buf(op_rol_c) = '1') else (others => '0');
|
550 |
|
|
res_out(op_orcb_c) <= res_int(op_orcb_c) when (cmd_buf(op_orcb_c) = '1') else (others => '0');
|
551 |
|
|
res_out(op_rev8_c) <= res_int(op_rev8_c) when (cmd_buf(op_rev8_c) = '1') else (others => '0');
|
552 |
66 |
zero_gravi |
--
|
553 |
|
|
res_out(op_sh1add_c) <= res_int(op_sh1add_c) when ((cmd_buf(op_sh1add_c) or cmd_buf(op_sh2add_c) or cmd_buf(op_sh3add_c)) = '1') else (others => '0');
|
554 |
|
|
res_out(op_sh2add_c) <= (others => '0'); -- unused/redundant
|
555 |
|
|
res_out(op_sh3add_c) <= (others => '0'); -- unused/redundant
|
556 |
71 |
zero_gravi |
--
|
557 |
|
|
res_out(op_bclr_c) <= res_int(op_bclr_c) when (cmd_buf(op_bclr_c) = '1') else (others => '0');
|
558 |
|
|
res_out(op_bext_c) <= res_int(op_bext_c) when (cmd_buf(op_bext_c) = '1') else (others => '0');
|
559 |
|
|
res_out(op_binv_c) <= res_int(op_binv_c) when (cmd_buf(op_binv_c) = '1') else (others => '0');
|
560 |
|
|
res_out(op_bset_c) <= res_int(op_bset_c) when (cmd_buf(op_bset_c) = '1') else (others => '0');
|
561 |
|
|
--
|
562 |
|
|
res_out(op_clmul_c) <= res_int(op_clmul_c) when (cmd_buf(op_clmul_c) = '1') else (others => '0');
|
563 |
|
|
res_out(op_clmulh_c) <= res_int(op_clmulh_c) when (cmd_buf(op_clmulh_c) = '1') else (others => '0');
|
564 |
|
|
res_out(op_clmulr_c) <= res_int(op_clmulr_c) when (cmd_buf(op_clmulr_c) = '1') else (others => '0');
|
565 |
63 |
zero_gravi |
|
566 |
|
|
|
567 |
|
|
-- Output Gate ----------------------------------------------------------------------------
|
568 |
|
|
-- -------------------------------------------------------------------------------------------
|
569 |
|
|
output_gate: process(rstn_i, clk_i)
|
570 |
|
|
begin
|
571 |
|
|
if (rstn_i = '0') then
|
572 |
|
|
res_o <= (others => def_rst_val_c);
|
573 |
|
|
elsif rising_edge(clk_i) then
|
574 |
|
|
res_o <= (others => '0');
|
575 |
|
|
if (valid = '1') then
|
576 |
71 |
zero_gravi |
res_o <= res_out(op_andn_c) or res_out(op_orn_c) or res_out(op_xnor_c) or
|
577 |
|
|
res_out(op_clz_c) or res_out(op_cpop_c) or -- res_out(op_ctz_c) is unused here
|
578 |
|
|
res_out(op_min_c) or -- res_out(op_max_c) is unused here
|
579 |
|
|
res_out(op_sextb_c) or res_out(op_sexth_c) or res_out(op_zexth_c) or
|
580 |
|
|
res_out(op_ror_c) or res_out(op_rol_c) or
|
581 |
|
|
res_out(op_orcb_c) or res_out(op_rev8_c) or
|
582 |
|
|
res_out(op_sh1add_c) or -- res_out(op_sh2add_c) and res_out(op_sh3add_c) are unused here
|
583 |
|
|
res_out(op_bclr_c) or res_out(op_bext_c) or res_out(op_binv_c) or res_out(op_bset_c) or
|
584 |
|
|
res_out(op_clmul_c) or res_out(op_clmulh_c) or res_out(op_clmulr_c);
|
585 |
63 |
zero_gravi |
end if;
|
586 |
|
|
end if;
|
587 |
|
|
end process output_gate;
|
588 |
|
|
|
589 |
|
|
-- valid output --
|
590 |
|
|
valid_o <= valid;
|
591 |
|
|
|
592 |
|
|
|
593 |
|
|
end neorv32_cpu_cp_bitmanip_rtl;
|