OpenCores
URL https://opencores.org/ocsvn/artificial_neural_network/artificial_neural_network/trunk

Subversion Repositories artificial_neural_network

[/] [artificial_neural_network/] [trunk/] [ANN_kernel/] [RTL_VHDL_files/] [layerSP_top.vhd] - Blame information for rev 3

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 3 ojosynariz
----------------------------------------------------------------------------------
2
-- Company: CEI
3
-- Engineer: David Aledo
4
--
5
-- Create Date:    12:41:19 06/10/2013
6
-- Design Name:    Configurable ANN
7
-- Module Name:    layerSP_top - Behavioral
8
-- Project Name:
9
-- Target Devices:
10
-- Tool versions:
11
-- Description: neuron layer top for artificial neural networks. Serial input and
12
--             parallel output.
13
--
14
-- Dependencies:
15
--
16
-- Revision:
17
-- Revision 0.01 - File Created
18
-- Additional Comments:
19
--
20
----------------------------------------------------------------------------------
21
library IEEE;
22
use IEEE.STD_LOGIC_1164.ALL;
23
use ieee.numeric_std.all;
24
 
25
-- Deprecated XPS library:
26
--library proc_common_v3_00_a;
27
--use proc_common_v3_00_a.proc_common_pkg.all; -- Only for simulation ( pad_power2() )
28
 
29
entity layerSP_top is
30
 
31
   generic
32
   (
33
      NumN    : natural := 8;   ------- Number of neurons of the layer
34
      NumIn   : natural := 64;  ------- Number of inputs of each neuron
35
      NbitIn  : natural := 8;   ------- Bit width of the input data
36
      NbitW   : natural := 8;   ------- Bit width of weights and biases
37
      NbitOut : natural := 12;  ------- Bit width of the output data
38
      lra_l   : natural := 10;  ------- Layer RAM address length. It should value log2(NumN)+log2(NumIn)
39
      wra_l   : natural := 6;   ------- Weight RAM address length. It should value log2(NumIn)
40
      bra_l   : natural := 3;   ------- Bias RAM address length. It should value log2(NumN)
41
      LSbit   : natural := 4    ------- Less significant bit of the outputs
42
   );
43
 
44
   port
45
   (
46
      -- Input ports
47
      reset   : in  std_logic;
48
      clk     : in  std_logic;
49
      run_in  : in  std_logic; -- Start and input data validation
50
      m_en    : in  std_logic; -- Memory enable (external interface)
51
      b_sel   : in  std_logic; -- Bias memory select
52
      m_we    : in  std_logic_vector(((NbitW+7)/8)-1 downto 0); -- Memory write enable (external interface)
53
      inputs  : in  std_logic_vector(NbitIn-1 downto 0); -- Input data (serial)
54
      wdata   : in  std_logic_vector(NbitW-1 downto 0);  -- Write data of weight and bias memories
55
      addr    : in  std_logic_vector(lra_l-1 downto 0); -- Address of weight and bias memories
56
 
57
      -- Output ports
58
      run_out : out std_logic; -- Output data validation, run_in for the next layer
59
      rdata   : out std_logic_vector(NbitW-1 downto 0);  -- Read data of weight and bias memories
60
      outputs : out std_logic_vector((NbitOut*NumN)-1 downto 0) -- Output data (parallel)
61
   );
62
 
63
end layerSP_top;
64
 
65
architecture Behavioral of layerSP_top is
66
 
67
   --type ramd_type is array (pad_power2(NumIn)-1 downto 0) of std_logic_vector(NbitW-1 downto 0); -- Optimal: 32 or 64 spaces
68
   --type layer_ram is array (pad_power2(NumN)-1 downto 0) of ramd_type;
69
   type ramd_type is array (NumIn-1 downto 0) of std_logic_vector(NbitW-1 downto 0); -- Optimal: 32 or 64 spaces
70
   type layer_ram is array (NumN-1 downto 0) of ramd_type;
71
   type outm_type is array (NumN-1 downto 0) of std_logic_vector(NbitW-1 downto 0);
72
 
73
   signal lram  : layer_ram; -- Layer RAM. One RAM per neuron. It stores the weights
74
   signal breg  : outm_type; -- Bias registers. They can not be RAM because they are accessed simultaneously
75
   signal outm  : outm_type; -- RAM outputs to be multiplexed into rdata
76
   signal m_sel : std_logic_vector(NumN-1 downto 0);     -------- RAM select
77
   signal Wyb   : std_logic_vector((NbitW*NumN)-1 downto 0);  --- Weight vectors
78
   signal bias  : std_logic_vector((NbitW*NumN)-1 downto 0);  --- Bias vector
79
   signal Nouts : std_logic_vector((NbitOut*NumN)-1 downto 0); -- Outputs from neurons
80
   signal uaddr : unsigned(lra_l-1 downto 0); -- Unsigned address of weight and bias memories
81
 
82
   signal inreg : std_logic_vector(NbitIn-1 downto 0); -- Input data register -- en1 is delayed 1 cycle in order to insert a register for Wyb
83
 
84
   -- Control signals
85
   signal cont : integer range 0 to NumIn-1; -- Input counter
86
   signal en1 : std_logic; -- First step enable (multiplication of MAC)
87
   signal en2 : std_logic; -- Second stage enable (accumulation of MAC)
88
   signal en3 : std_logic; -- Shift register enable
89
   signal a0  : std_logic; -- Signal to load accumulators with the multiplication result
90
   signal aux_en3 : std_logic; -- Auxiliary signal to delay en3 two cycles
91
   signal aux_a0 : std_logic;
92
   signal aux2_en3 : std_logic;
93
 
94
begin
95
 
96
layerSP_inst: entity work.layerSP
97
   generic map
98
   (
99
      NumN    => NumN,
100
      NumIn   => NumIn,
101
      NbitIn  => NbitIn,
102
      NbitW   => NbitW,
103
      NbitOut => NbitOut,
104
      LSbit   => LSbit
105
   )
106
   port map
107
   (
108
      -- Input ports
109
      reset  => reset,
110
      clk    => clk,
111
      en     => en1,
112
      en2    => en2,
113
      en_r   => en3,
114
      a0     => a0,
115
      inputs => inreg,
116
      Wyb    => Wyb,
117
      bias   => bias,
118
 
119
      -- Output ports
120
      outputs => Nouts
121
   );
122
 
123
   uaddr <= unsigned(addr);
124
 
125
ram_selector:
126
   process (uaddr(lra_l-1 downto wra_l),b_sel) -- Top part of memory address and b_sel
127
   begin
128
      m_sel <= (others => '0'); -- Default
129
      for i in (NumN-1) downto 0 loop
130
         -- The top part of memory address selects which RAM
131
         if ( (to_integer(uaddr(lra_l-1 downto wra_l)) = i) and (b_sel = '0')) then
132
            m_sel(i) <= '1'; -- Enables the selected RAM
133
         end if;
134
      end loop;
135
   end process;
136
 
137
rams: -- Instance as weight and bias memories as neurons there are in the layer
138
   for i in (NumN-1) downto 0 generate
139
      process (clk)
140
         variable d : std_logic_vector(NbitW-1 downto 0); -- Beware of elements whose length is not a multiple of 8
141
      begin
142
         if (clk'event and clk = '1') then
143
            if (m_en = '1' and m_sel(i) = '1') then
144
               for j in ((NbitW+7)/8)-1 downto 0 loop -- we byte to byte
145
                  if (m_we(j) = '1') then
146
                     d((8*(j+1))-1 downto 8*j) := wdata((8*(j+1))-1 downto 8*j);
147
                  else
148
                     d((8*(j+1))-1 downto 8*j) := lram(i)(to_integer(uaddr(wra_l-1 downto 0)))((8*(j+1))-1 downto 8*j);
149
                  end if;
150
               end loop;
151
               -- Bottom part of layer memory selects weights inside the selected RAM
152
               lram(i)(to_integer(uaddr(wra_l-1 downto 0))) <= d;
153
               --
154
            end if;
155
         end if;
156
      end process;
157
      -- Outputs are read in parallel, resulting in a bus of weights:
158
      --Wyb((NbitW*(i+1))-1 downto NbitW*i) <= lram(i)(cont); -- Asynchronous read (forces distributed RAM)
159
      process (clk) -- Synchronous read
160
      begin
161
         if clk'event and clk = '1' then
162
            if reset = '1' then
163
               --Wyb((NbitW*(i+1))-1 downto NbitW*i) <= (others => '0');
164
            else
165
               Wyb((NbitW*(i+1))-1 downto NbitW*i) <= lram(i)(cont);
166
            end if;
167
         end if;
168
      end process;
169
      outm(i) <= lram(i)(to_integer(uaddr(wra_l-1 downto 0))); -- Read all RAM
170
   end generate;
171
 
172
   -- Synchronous read including breg:
173
   process (clk)
174
   begin
175
      if (clk'event and clk = '1') then
176
         if (m_en = '1') then
177
            if (b_sel = '1') then
178
               rdata <= breg(to_integer(uaddr(bra_l-1 downto 0))); -- Bias registers selected
179
            else -- Other RAM selected:
180
               rdata <= outm(to_integer(uaddr(lra_l-1 downto wra_l))); -- Multiplexes RAM outputs
181
               -- May be safer if accesses to top address grater than NumN are avoided
182
            end if;
183
         end if;
184
      end if;
185
   end process;
186
 
187
bias_reg:
188
   process (clk)
189
      variable d : std_logic_vector(NbitW-1 downto 0); -- Beware of elements whose length is not a multiple of 8
190
   begin
191
      if (clk'event and clk = '1') then
192
         if ( (m_en = '1') and (b_sel = '1') ) then
193
            for i in ((NbitW+7)/8)-1 downto 0 loop -- we byte to byte
194
               if (m_we(i) = '1') then
195
                  d((8*(i+1))-1 downto 8*i) := wdata((8*(i+1))-1 downto 8*i);
196
               else
197
                  d((8*(i+1))-1 downto 8*i) := breg(to_integer(uaddr(bra_l-1 downto 0)))((8*(i+1))-1 downto 8*i);
198
               end if;
199
            end loop;
200
            -- The bottom part (reduced) of layer RAM address selects the bias
201
            breg(to_integer(uaddr(bra_l-1 downto 0))) <= d;
202
         end if;
203
      end if;
204
   end process;
205
bias_read:
206
   for i in (NumN-1) downto 0 generate
207
      --bias((NbitW*(i+1))-1 downto NbitW*i) <= breg(i); -- Asynchronous read of all biases in parallel
208
      process (clk)
209
      begin
210
        if clk'event and clk = '1' then
211
           if reset = '1' then
212
              --bias((NbitW*(i+1))-1 downto NbitW*i) <= (others => '0');
213
           else
214
              bias((NbitW*(i+1))-1 downto NbitW*i) <= breg(i); -- Synchronous read of all biases in parallel
215
           end if;
216
        end if;
217
      end process;
218
   end generate;
219
 
220
   outputs <= Nouts;
221
 
222
control:
223
   process (clk)
224
   begin
225
      if (clk'event and clk = '1') then
226
         if (reset = '1') then
227
            cont <= 0;
228
            en1 <= '0';
229
            en2 <= '0';
230
            en3 <= '0';
231
            a0  <= '0';
232
            run_out <= '0';
233
            aux_en3 <= '0';
234
            aux2_en3 <= '0';
235
            aux_a0 <= '0';
236
            inreg <= (others => '0');
237
         else
238
            en1 <= run_in; -- en1 is delayed 1 cycle in order to insert a register for Wyb
239
            inreg <= inputs;
240
            -- Default:
241
            aux2_en3 <= '0';
242
            if (run_in = '1') then
243
               if (cont = NumIn-1) then
244
                  cont <= 0; -- Restarts input counter
245
                  aux2_en3 <= '1';
246
               else
247
                  cont <= cont +1;
248
               end if;
249
            end if;
250
            en2 <= en1;
251
            if (cont = 0 and run_in = '1') then
252
               aux_a0 <= '1'; -- At the count beginning
253
            else
254
               aux_a0 <= '0';
255
            end if;
256
            a0 <= aux_a0;
257
            aux_en3 <= aux2_en3;
258
            en3 <= aux_en3;
259
            run_out <= en3; -- It lasts for 1 cycle, just after the output enable of the layer (when all outputs have just updated)
260
         end if;
261
      end if;
262
   end process;
263
 
264
end Behavioral;

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.