1 |
72 |
zero_gravi |
-- #################################################################################################
|
2 |
|
|
-- # << NEORV32 - CPU Co-Processor: Custom (Instructions) Functions Unit >> #
|
3 |
|
|
-- # ********************************************************************************************* #
|
4 |
|
|
-- # Intended for user-defined custom RISC-V instructions (R2-type format only). See the CPU's #
|
5 |
|
|
-- # documentation for more information. #
|
6 |
|
|
-- # #
|
7 |
|
|
-- # NOTE: Take a look at the "software-counterpart" of this CFU example in 'sw/example/demo_cfu'. #
|
8 |
|
|
-- # #
|
9 |
|
|
-- # TODO: Maybe turn this into a wrapper for CFU-playground templates. #
|
10 |
|
|
-- # -> https://github.com/google/CFU-Playground #
|
11 |
|
|
-- # ********************************************************************************************* #
|
12 |
|
|
-- # BSD 3-Clause License #
|
13 |
|
|
-- # #
|
14 |
|
|
-- # Copyright (c) 2022, Stephan Nolting. All rights reserved. #
|
15 |
|
|
-- # #
|
16 |
|
|
-- # Redistribution and use in source and binary forms, with or without modification, are #
|
17 |
|
|
-- # permitted provided that the following conditions are met: #
|
18 |
|
|
-- # #
|
19 |
|
|
-- # 1. Redistributions of source code must retain the above copyright notice, this list of #
|
20 |
|
|
-- # conditions and the following disclaimer. #
|
21 |
|
|
-- # #
|
22 |
|
|
-- # 2. Redistributions in binary form must reproduce the above copyright notice, this list of #
|
23 |
|
|
-- # conditions and the following disclaimer in the documentation and/or other materials #
|
24 |
|
|
-- # provided with the distribution. #
|
25 |
|
|
-- # #
|
26 |
|
|
-- # 3. Neither the name of the copyright holder nor the names of its contributors may be used to #
|
27 |
|
|
-- # endorse or promote products derived from this software without specific prior written #
|
28 |
|
|
-- # permission. #
|
29 |
|
|
-- # #
|
30 |
|
|
-- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS #
|
31 |
|
|
-- # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF #
|
32 |
|
|
-- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE #
|
33 |
|
|
-- # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, #
|
34 |
|
|
-- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE #
|
35 |
|
|
-- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED #
|
36 |
|
|
-- # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING #
|
37 |
|
|
-- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED #
|
38 |
|
|
-- # OF THE POSSIBILITY OF SUCH DAMAGE. #
|
39 |
|
|
-- # ********************************************************************************************* #
|
40 |
|
|
-- # The NEORV32 Processor - https://github.com/stnolting/neorv32 (c) Stephan Nolting #
|
41 |
|
|
-- #################################################################################################
|
42 |
|
|
|
43 |
|
|
library ieee;
|
44 |
|
|
use ieee.std_logic_1164.all;
|
45 |
|
|
use ieee.numeric_std.all;
|
46 |
|
|
|
47 |
|
|
library neorv32;
|
48 |
|
|
use neorv32.neorv32_package.all;
|
49 |
|
|
|
50 |
|
|
entity neorv32_cpu_cp_cfu is
|
51 |
|
|
port (
|
52 |
|
|
-- global control --
|
53 |
|
|
clk_i : in std_ulogic; -- global clock, rising edge
|
54 |
|
|
rstn_i : in std_ulogic; -- global reset, low-active, async
|
55 |
|
|
ctrl_i : in std_ulogic_vector(ctrl_width_c-1 downto 0); -- main control bus
|
56 |
|
|
start_i : in std_ulogic; -- trigger operation
|
57 |
|
|
-- data input --
|
58 |
|
|
rs1_i : in std_ulogic_vector(data_width_c-1 downto 0); -- rf source 1
|
59 |
|
|
rs2_i : in std_ulogic_vector(data_width_c-1 downto 0); -- rf source 2
|
60 |
|
|
-- result and status --
|
61 |
|
|
res_o : out std_ulogic_vector(data_width_c-1 downto 0); -- operation result
|
62 |
|
|
valid_o : out std_ulogic -- data output valid
|
63 |
|
|
);
|
64 |
|
|
end neorv32_cpu_cp_cfu;
|
65 |
|
|
|
66 |
|
|
architecture neorv32_cpu_cp_cfu_rtl of neorv32_cpu_cp_cfu is
|
67 |
|
|
|
68 |
|
|
-- CFU controller - do not modify --
|
69 |
|
|
type control_t is record
|
70 |
|
|
busy : std_ulogic; -- CFU is busy
|
71 |
|
|
done : std_ulogic; -- set to '1' when processing is done
|
72 |
|
|
result : std_ulogic_vector(data_width_c-1 downto 0); -- user's processing result (for write-back to register file)
|
73 |
|
|
funct3 : std_ulogic_vector(2 downto 0); -- "funct3" bit-field from custom instruction
|
74 |
|
|
funct7 : std_ulogic_vector(6 downto 0); -- "funct7" bit-field from custom instruction
|
75 |
|
|
end record;
|
76 |
|
|
signal control : control_t;
|
77 |
|
|
|
78 |
|
|
begin
|
79 |
|
|
|
80 |
73 |
zero_gravi |
-- ****************************************************************************************************************************
|
81 |
|
|
-- This controller is required to handle the CPU/pipeline interface. Do not modify!
|
82 |
|
|
-- ****************************************************************************************************************************
|
83 |
|
|
|
84 |
72 |
zero_gravi |
-- CFU Controller -------------------------------------------------------------------------
|
85 |
|
|
-- -------------------------------------------------------------------------------------------
|
86 |
|
|
cfu_control: process(rstn_i, clk_i)
|
87 |
|
|
begin
|
88 |
|
|
if (rstn_i = '0') then
|
89 |
|
|
res_o <= (others => '0');
|
90 |
|
|
control.busy <= '0';
|
91 |
|
|
elsif rising_edge(clk_i) then
|
92 |
|
|
res_o <= (others => '0'); -- default
|
93 |
|
|
if (control.busy = '0') then -- idle
|
94 |
|
|
if (start_i = '1') then
|
95 |
|
|
control.busy <= '1';
|
96 |
|
|
end if;
|
97 |
|
|
else -- busy
|
98 |
|
|
if (control.done = '1') or (ctrl_i(ctrl_trap_c) = '1') then -- processing done? abort if trap
|
99 |
|
|
res_o <= control.result; -- actual output for only one cycle
|
100 |
|
|
control.busy <= '0';
|
101 |
|
|
end if;
|
102 |
|
|
end if;
|
103 |
|
|
end if;
|
104 |
|
|
end process cfu_control;
|
105 |
|
|
|
106 |
|
|
-- CPU feedback --
|
107 |
|
|
valid_o <= control.busy and control.done; -- set one cycle before result data
|
108 |
|
|
|
109 |
|
|
-- pack user-defined instruction function bits --
|
110 |
|
|
control.funct3 <= ctrl_i(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c);
|
111 |
|
|
control.funct7 <= ctrl_i(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_5_c);
|
112 |
|
|
|
113 |
|
|
|
114 |
|
|
-- ****************************************************************************************************************************
|
115 |
73 |
zero_gravi |
-- Actual CFU user logic - ADD YOUR CUSTOM LOGIC BELOW
|
116 |
72 |
zero_gravi |
-- ****************************************************************************************************************************
|
117 |
|
|
|
118 |
73 |
zero_gravi |
-- ----------------------------------------------------------------------------------------
|
119 |
|
|
-- CFU Instruction Format
|
120 |
|
|
-- ----------------------------------------------------------------------------------------
|
121 |
72 |
zero_gravi |
-- The CFU only supports the R2-type RISC-V instruction format. This format consists of two source registers (rs1 and rs2),
|
122 |
|
|
-- a destination register (rd) and two "immediate" bit-fields (funct7 and funct3). It is up to the user to decide which
|
123 |
|
|
-- of these fields are actually used by the CFU logic.
|
124 |
73 |
zero_gravi |
|
125 |
|
|
|
126 |
|
|
-- ----------------------------------------------------------------------------------------
|
127 |
72 |
zero_gravi |
-- Input Operands
|
128 |
73 |
zero_gravi |
-- ----------------------------------------------------------------------------------------
|
129 |
72 |
zero_gravi |
-- > rs1_i (input, 32-bit): source register 1
|
130 |
|
|
-- > rs2_i (input, 32-bit): source register 2
|
131 |
|
|
-- > control.funct3 (input, 3-bit): 3-bit function select / immediate, driven by instruction word's funct3 bit field
|
132 |
|
|
-- > control.funct7 (input, 7-bit): 7-bit function select / immediate, driven by instruction word's funct7 bit field
|
133 |
|
|
--
|
134 |
|
|
-- The two signal rs1_i and rs2_i provide the data read from the CPU's register file, which is adressed by the
|
135 |
|
|
-- instruction word's rs1 and rs2 bit-fields.
|
136 |
|
|
--
|
137 |
|
|
-- The actual CFU operation can be defined by using the funct3 and funct7 signals. Both signals are directly driven by
|
138 |
|
|
-- the according bit-fields of the custom instruction. Note that these signals represent "immediates" that have to be
|
139 |
|
|
-- static already at compile time. These immediates can be used to select the actual function to be executed or they
|
140 |
|
|
-- can be used as immediates for certain operations (like shift amounts, addresses or offsets).
|
141 |
|
|
--
|
142 |
73 |
zero_gravi |
-- [NOTE] rs1_i and rs2_i are directly driven by the register file (block RAM). For complex CFU designs it is recommended
|
143 |
|
|
-- to buffer these signals using CFU-internal registers before using them for computations as the rs1 and rs2 nets
|
144 |
|
|
-- need to drive a lot of logic in the CPU. Obviously, this will increase the CFU latency by one cycle.
|
145 |
72 |
zero_gravi |
--
|
146 |
73 |
zero_gravi |
-- [NOTE] It is not possible for the CFU and it's according instruction words to cause any kind of exception. The CPU
|
147 |
|
|
-- control logic only verifies the custom instructions OPCODE and checks if the CFU is implemented at all. No
|
148 |
|
|
-- combinations of funct7 and funct3 will cause an exception.
|
149 |
|
|
|
150 |
|
|
|
151 |
|
|
-- ----------------------------------------------------------------------------------------
|
152 |
|
|
-- Result Output
|
153 |
|
|
-- ----------------------------------------------------------------------------------------
|
154 |
72 |
zero_gravi |
-- > control.result (output, 32-bit): processing result
|
155 |
|
|
--
|
156 |
|
|
-- When the CFU has finished computation, the data in the control.result signal will be written to the CPU's register
|
157 |
|
|
-- file. The destination register is addressed by the rd bit-field in the instruction. The CFU result output is
|
158 |
|
|
-- registered in the CFU controller (see above) so do not worry too much about increasing the CPU's critical path. ;)
|
159 |
73 |
zero_gravi |
|
160 |
|
|
|
161 |
|
|
-- ----------------------------------------------------------------------------------------
|
162 |
72 |
zero_gravi |
-- Control
|
163 |
73 |
zero_gravi |
-- ----------------------------------------------------------------------------------------
|
164 |
72 |
zero_gravi |
-- > rstn_i (input, 1-bit): asynchronous reset, low-active
|
165 |
|
|
-- > clk_i (input, 1-bit): main clock, triggering on rising edge
|
166 |
|
|
-- > start_i (input, 1-bit): operation trigger (start processing, high for one cycle)
|
167 |
|
|
-- > control.done (output, 1-bit): set high when processing is done
|
168 |
|
|
--
|
169 |
|
|
-- For pure-combinatorial instructions (without internal state) a subset of those signals is sufficient; see the minimal
|
170 |
|
|
-- example below. If the CFU shall also include states (like memories, registers or "buffers") the start_i signal can be
|
171 |
|
|
-- used to trigger a new CFU operation. As soon as all internal computations have completed, the control.done signal has
|
172 |
73 |
zero_gravi |
-- to be set to indicate completion. This will finish CFU operation and will write the processing result (control.result)
|
173 |
|
|
-- to the CPU register file.
|
174 |
72 |
zero_gravi |
--
|
175 |
73 |
zero_gravi |
-- [NOTE] The control.done **has to be set at some time**, otherwise the CPU will get stalled forever.
|
176 |
72 |
zero_gravi |
|
177 |
|
|
|
178 |
|
|
-- User Logic Example ---------------------------------------------------------------------
|
179 |
|
|
-- -------------------------------------------------------------------------------------------
|
180 |
|
|
user_logic_function_select: process(control, rs1_i, rs2_i)
|
181 |
|
|
begin
|
182 |
|
|
-- This is a simple ALU that implements four pure-combinatorial instructions.
|
183 |
|
|
-- The actual function to-be-executed is selected by the "funct3" bit-field of the custom instruction.
|
184 |
|
|
case control.funct3 is
|
185 |
|
|
when "000" => control.result <= bin_to_gray_f(rs1_i); -- funct3 = "000": convert rs1 from binary to gray
|
186 |
|
|
when "001" => control.result <= gray_to_bin_f(rs1_i); -- funct3 = "001": convert rs1 from gray to binary
|
187 |
|
|
when "010" => control.result <= bit_rev_f(rs1_i); -- funct3 = "010": bit-reversal of rs1
|
188 |
|
|
when "011" => control.result <= rs1_i xnor rs2_i; -- funct3 = "011": XNOR input operands
|
189 |
|
|
when others => control.result <= (others => '0'); -- not implemented, set to zero
|
190 |
|
|
end case;
|
191 |
|
|
end process user_logic_function_select;
|
192 |
|
|
|
193 |
|
|
-- processing done? --
|
194 |
|
|
control.done <= '1'; -- we are just doing pure-combinatorial data processing here, which is done "immediately"
|
195 |
|
|
|
196 |
|
|
|
197 |
|
|
end neorv32_cpu_cp_cfu_rtl;
|