OpenCores
URL https://opencores.org/ocsvn/artificial_neural_network/artificial_neural_network/trunk

Subversion Repositories artificial_neural_network

[/] [artificial_neural_network/] [trunk/] [ANN_kernel/] [RTL_VHDL_files/] [layerSP.vhd] - Rev 3

Compare with Previous | Blame | View Log

----------------------------------------------------------------------------------
-- Company: CEI
-- Engineer: David Aledo
--
-- Create Date:    11:24:24 05/28/2013
-- Design Name:    Configurable ANN
-- Module Name:    layerSP - arq
-- Project Name:
-- Target Devices:
-- Tool versions:
-- Description: basic and parametrizable neuron layer for hardware artificial
--             neural networks. Serial input and parallel output.
--             Implemented by MAC.
--
-- Dependencies:
--
-- Revision:
-- Revision 0.01 - File Created
-- Additional Comments:
--
----------------------------------------------------------------------------------
 
-- NOTE: To optimize MAC, inputs should be registered, and should be checked that this register is implemented as DSP input register
 
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
 
use work.layers_pkg.all;
 
 
entity layerSP is
 
   generic
   (
      NumN    : natural := 8;  -- Number of neurons of the layer
      NumIn   : natural := 64; -- Number of inputs of each neuron (data account before restart Acc)
      NbitIn  : natural := 8;  -- Bit width of the input data
      NbitW   : natural := 8;  -- Bit width of weights and biases
      NbitOut : natural := 12; -- Bit width of the output data
      LSbit   : natural := 4   -- Less significant bit of the outputs
   );
 
   port
   (
      -- Input ports
      reset    : in  std_logic;
      clk      : in  std_logic;
      en       : in  std_logic; -- First step enable (multiplication of MAC)
      en2      : in  std_logic; -- Second stage enable (accumulation of MAC)
      en_r     : in  std_logic; -- Shift register enable
      a0       : in  std_logic; -- Signal to load accumulators with the multiplication result
      inputs   : in  std_logic_vector(NbitIn-1 downto 0);       -- Input data (serial)
      Wyb      : in  std_logic_vector((NbitW*NumN)-1 downto 0); -- Weight vectors
      bias     : in  std_logic_vector((NbitW*NumN)-1 downto 0); -- Bias vector
 
      -- Output ports
      outputs  : out std_logic_vector((NbitOut*NumN)-1 downto 0) -- Output data (parallel)
   );
end layerSP;
 
 
 
architecture arq of layerSP is
 
   constant NbOvrf : natural := log2(NumIn); -- Extra bits in acc to avoid overflow
   constant sat_max : signed(NbitIn+NbitW+NbOvrf downto 0) := (NbitIn+NbitW+NbOvrf downto LSbit+NbitOut-1 => '0') & (LSbit+NbitOut-2 downto 0 => '1'); -- E.g. "0001111"
   constant sat_min : signed(NbitIn+NbitW+NbOvrf downto 0) := (NbitIn+NbitW+NbOvrf downto LSbit+NbitOut-1 => '1') & (LSbit+NbitOut-2 downto 0 => '0'); -- E.g. "1110000"
 
   type v_res is array(NumN-1 downto 0) of std_logic_vector(NbitIn+NbitW+NbOvrf downto 0); -- Array type for MAC results
   type v_reg is array(NumN-1 downto 0) of std_logic_vector(NbitOut-1 downto 0);           -- Array type for shift register
 
   signal res   : v_res; -- MAC results
   signal reg   : v_reg := (others => (others => '0')); -- Output register
 
begin
 
macs: -- Instances as MAC as NumN
   for i in (NumN-1) downto 0 generate
      mac_i: entity work.mac
         generic map
         (
            dirload => FALSE,
            NbOvrf  => NbOvrf,
            NbitIn  => NbitIn,
            NbitC   => NbitW
         )
         port map
         (
            CLK => clk,
            RST => reset,
            A   => inputs,
            B   => Wyb((NbitW*(i+1))-1 downto NbitW*i),
            C   => bias((NbitW*(i+1))-1 downto NbitW*i),
            P   => res(i),
            CE1 => en,
            CE2 => en2,
            LOAD => a0
         );
   end generate;
 
   process(clk)
   begin
      if rising_edge(clk) then
         if reset = '1' then -- Synchronous reset, active high
            reg <= (others => (others => '0'));
         else
 
            if en_r = '1' then -- Output register enable (clipping)
 
               for i in 0 to NumN-1 loop -- As many results as NumN are loaded in parallel
 
                  if signed(res(i)) > sat_max then
                     -- Saturating result to the maximum value:
                     reg(i) <= '0' & (NbitOut-2 downto 0 => '1');
                  elsif signed(res(i)) < sat_min then
                     -- Saturating result to the minimum value:
                     reg(i) <= '1' & (NbitOut-2 downto 0 => '0');
                  else
                     -- Configured window of result bits are assigned to the output:
                     reg(i) <= res(i)(LSbit+NbitOut-1 downto LSbit);
                  end if;
 
               end loop;
 
            end if;
         end if;
 
      end if;
   end process;
 
-- Assigns output registers to output data port:
   process (reg)
   begin
      for i in 0 to NumN-1 loop
         outputs((NbitOut*(i+1))-1 downto NbitOut*i) <= reg(i);
      end loop;
   end process;
 
end arq;
 

Compare with Previous | Blame | View Log

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.