OpenCores
URL https://opencores.org/ocsvn/artificial_neural_network/artificial_neural_network/trunk

Subversion Repositories artificial_neural_network

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /artificial_neural_network/trunk
    from Rev 1 to Rev 2
    Reverse comparison

Rev 1 → Rev 2

/RTL_VHDL_files/layerPS.vhd
0,0 → 1,150
----------------------------------------------------------------------------------
-- Company: CEI
-- Engineer: David Aledo
--
-- Create Date: 11:24:24 05/28/2013
-- Design Name: Configurable ANN
-- Module Name: layerPS - arq
-- Project Name:
-- Target Devices:
-- Tool versions:
-- Description: basic and parametrizable neuron layer for hardware artificial
-- neural networks. Paralel input and serial output.
-- It implemnts one neuron reused to calculate all.
--
-- Dependencies:
--
-- Revision:
-- Revision 0.01 - File Created
-- Additional Comments:
--
----------------------------------------------------------------------------------
 
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
 
-- Deprecated XPS library: -- Needed functions have been implemented in layers_pkg
--library proc_common_v3_00_a;
--use proc_common_v3_00_a.proc_common_pkg.all;
 
use work.layers_pkg.all;
 
 
entity layerPS is
 
generic
(
NumN : natural := 64; -- Number of neurons of the layer
NumIn : natural := 8; -- Number of inputs of each neuron
NbitIn : natural := 12; -- Bit width of the input data
NbitW : natural := 8; -- Bit width of weights and biases
NbitOut : natural := 8; -- Bit width of the output data
LSbit : natural := 4 -- Less significant bit of the outputs
);
 
port
(
-- Input ports
reset : in std_logic;
clk : in std_logic;
en : in std_logic; -- First step enable
en2 : in std_logic; -- Second stage enable
en_r : in std_logic; -- Output register enable
inputs : in std_logic_vector((NbitIn*NumIn)-1 downto 0); -- Input data (parallel)
Wyb : in std_logic_vector((NbitW*NumIn)-1 downto 0); -- Weight vectors
bias : in std_logic_vector(NbitW-1 downto 0); --------- Bias
 
-- Output ports
en_out : out std_logic; -- Output data validation
outputs : out std_logic_vector(NbitOut-1 downto 0) -- Output data (serial)
);
end layerPS;
 
 
 
architecture arq of layerPS is
 
constant NbOvrf : natural := log2(NumIn); -- Extra bits avoid overflow in adders
constant sat_max : signed(NbitIn+NbitW+NbOvrf downto 0) := (NbitIn+NbitW+NbOvrf downto LSbit+NbitOut-1 => '0') & (LSbit+NbitOut-2 downto 0 => '1'); -- E.g. "0001111"
constant sat_min : signed(NbitIn+NbitW+NbOvrf downto 0) := (NbitIn+NbitW+NbOvrf downto LSbit+NbitOut-1 => '1') & (LSbit+NbitOut-2 downto 0 => '0'); -- E.g. "1110000"
 
type v_res is array(NumIn-1 downto 0) of signed((NbitIn+NbitW)-1 downto 0); -- Array type for results from multipliers
 
signal res : v_res := (others => (others => '0')); -- Results from multipliers
signal sum : std_logic_vector(NbitIn+NbitW+NbOvrf downto 0) := (others => '0'); -- Addition result
signal reg : std_logic_vector(NbitOut-1 downto 0) := (others => '0'); ----------- Output register
signal sum_aux : std_logic_vector(((NbitIn+NbitW+NbOvrf+1)*(NumIn+1))-1 downto 0); -- Pipeline registers for adders
 
begin
 
muls: -- Instances as multipliers as NumIn
for i in (NumIn-1) downto 0 generate
process (clk) -- Multiplier
begin
if (clk'event and clk = '1') then
if (reset = '1') then
res(i) <= (others => '0');
else
if (en = '1') then
-- Multiplies every input with its weight:
res(i) <= signed(inputs((NbitIn*(i+1))-1 downto NbitIn*i)) * signed(Wyb((NbitW*(i+1))-1 downto NbitW*i));
end if;
end if;
end if;
end process;
end generate;
 
asign_adder_tree_inputs:
for i in NumIn-1 downto 0 generate
sum_aux(((NbitIn+NbitW+NbOvrf+1)*(i+1))-1 downto (NbitIn+NbitW+NbOvrf+1)*i) <= std_logic_vector(resize(res(i),NbitIn+NbitW+NbOvrf+1));
end generate;
sum_aux(((NbitIn+NbitW+NbOvrf+1)*(NumIn+1))-1 downto (NbitIn+NbitW+NbOvrf+1)*NumIn) <= std_logic_vector(resize(signed(bias),NbitIn+NbitW+NbOvrf+1)); -- Bias is added placed in the last position
 
recursive_adder_tree: entity work.adder_tree
generic map
(
NumIn => NumIn+1, -- +bias
Nbit => NbitIn+NbitW+NbOvrf+1
)
port map
(
clk => clk,
reset => reset,
en => en2,
inputs => sum_aux,
en_out => en_out,
output => sum
);
 
 
process(clk)
begin
if(rising_edge(clk)) then
if(reset = '1') then -- Synchronous reset, active high
reg <= (others => '0');
else
 
if en_r = '1' then -- Output register enable (clipping)
 
if signed(sum) > sat_max then
-- Saturating result to the maximum value:
reg <= '0' & (NbitOut-2 downto 0 => '1');
elsif signed(sum) < sat_min then
-- Saturating result to the minimum value:
reg <= '1' & (NbitOut-2 downto 0 => '0');
else
-- Configured window of result bits are assigned to the output:
reg <= sum(LSbit+NbitOut-1 downto LSbit);
end if;
 
end if;
end if;
 
end if;
end process;
 
-- Assigns output register to output data port:
outputs <= reg;
 
end arq;
/RTL_VHDL_files/layerSP.vhd
0,0 → 1,140
----------------------------------------------------------------------------------
-- Company: CEI
-- Engineer: David Aledo
--
-- Create Date: 11:24:24 05/28/2013
-- Design Name: Configurable ANN
-- Module Name: layerSP - arq
-- Project Name:
-- Target Devices:
-- Tool versions:
-- Description: basic and parametrizable neuron layer for hardware artificial
-- neural networks. Serial input and parallel output.
-- Implemented by MAC.
--
-- Dependencies:
--
-- Revision:
-- Revision 0.01 - File Created
-- Additional Comments:
--
----------------------------------------------------------------------------------
 
-- NOTE: To optimize MAC, inputs should be registered, and should be checked that this register is implemented as DSP input register
 
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
 
use work.layers_pkg.all;
 
 
entity layerSP is
 
generic
(
NumN : natural := 8; -- Number of neurons of the layer
NumIn : natural := 64; -- Number of inputs of each neuron (data account before restart Acc)
NbitIn : natural := 8; -- Bit width of the input data
NbitW : natural := 8; -- Bit width of weights and biases
NbitOut : natural := 12; -- Bit width of the output data
LSbit : natural := 4 -- Less significant bit of the outputs
);
 
port
(
-- Input ports
reset : in std_logic;
clk : in std_logic;
en : in std_logic; -- First step enable (multiplication of MAC)
en2 : in std_logic; -- Second stage enable (accumulation of MAC)
en_r : in std_logic; -- Shift register enable
a0 : in std_logic; -- Signal to load accumulators with the multiplication result
inputs : in std_logic_vector(NbitIn-1 downto 0); -- Input data (serial)
Wyb : in std_logic_vector((NbitW*NumN)-1 downto 0); -- Weight vectors
bias : in std_logic_vector((NbitW*NumN)-1 downto 0); -- Bias vector
 
-- Output ports
outputs : out std_logic_vector((NbitOut*NumN)-1 downto 0) -- Output data (parallel)
);
end layerSP;
 
 
 
architecture arq of layerSP is
 
constant NbOvrf : natural := log2(NumIn); -- Extra bits in acc to avoid overflow
constant sat_max : signed(NbitIn+NbitW+NbOvrf downto 0) := (NbitIn+NbitW+NbOvrf downto LSbit+NbitOut-1 => '0') & (LSbit+NbitOut-2 downto 0 => '1'); -- E.g. "0001111"
constant sat_min : signed(NbitIn+NbitW+NbOvrf downto 0) := (NbitIn+NbitW+NbOvrf downto LSbit+NbitOut-1 => '1') & (LSbit+NbitOut-2 downto 0 => '0'); -- E.g. "1110000"
 
type v_res is array(NumN-1 downto 0) of std_logic_vector(NbitIn+NbitW+NbOvrf downto 0); -- Array type for MAC results
type v_reg is array(NumN-1 downto 0) of std_logic_vector(NbitOut-1 downto 0); -- Array type for shift register
 
signal res : v_res; -- MAC results
signal reg : v_reg := (others => (others => '0')); -- Output register
 
begin
 
macs: -- Instances as MAC as NumN
for i in (NumN-1) downto 0 generate
mac_i: entity work.mac
generic map
(
dirload => FALSE,
NbOvrf => NbOvrf,
NbitIn => NbitIn,
NbitC => NbitW
)
port map
(
CLK => clk,
RST => reset,
A => inputs,
B => Wyb((NbitW*(i+1))-1 downto NbitW*i),
C => bias((NbitW*(i+1))-1 downto NbitW*i),
P => res(i),
CE1 => en,
CE2 => en2,
LOAD => a0
);
end generate;
 
process(clk)
begin
if rising_edge(clk) then
if reset = '1' then -- Synchronous reset, active high
reg <= (others => (others => '0'));
else
 
if en_r = '1' then -- Output register enable (clipping)
 
for i in 0 to NumN-1 loop -- As many results as NumN are loaded in parallel
 
if signed(res(i)) > sat_max then
-- Saturating result to the maximum value:
reg(i) <= '0' & (NbitOut-2 downto 0 => '1');
elsif signed(res(i)) < sat_min then
-- Saturating result to the minimum value:
reg(i) <= '1' & (NbitOut-2 downto 0 => '0');
else
-- Configured window of result bits are assigned to the output:
reg(i) <= res(i)(LSbit+NbitOut-1 downto LSbit);
end if;
 
end loop;
 
end if;
end if;
 
end if;
end process;
 
-- Assigns output registers to output data port:
process (reg)
begin
for i in 0 to NumN-1 loop
outputs((NbitOut*(i+1))-1 downto NbitOut*i) <= reg(i);
end loop;
end process;
 
end arq;
/RTL_VHDL_files/shiftreg_pu.vhd
0,0 → 1,132
----------------------------------------------------------------------------------
-- Company:
-- Engineer:
--
-- Create Date: 18:03:58 05/14/2014
-- Design Name: Configurable ANN
-- Module Name: shiftreg_pu - Behavioral
-- Project Name:
-- Target Devices:
-- Tool versions:
-- Description: Shift register with parallel unload.
--
-- Dependencies:
--
-- Revision:
-- Revision 0.01 - File Created
-- Additional Comments:
--
----------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
 
 
entity shiftreg_pu is
generic
(
Nreg : natural := 64; ---- Number of elements
Nbit : natural := 8 ---- Bit width
);
 
port
(
-- Input ports
reset : in std_logic;
clk : in std_logic;
run_in : in std_logic; -- Start and input data validation
inputs : in std_logic_vector(Nbit-1 downto 0); -- Input data (serial)
-- Output ports
run_out : out std_logic; -- Output data validation, run_in for the next layer
outputs : out std_logic_vector((Nbit*Nreg)-1 downto 0) -- Output data (parallel)
);
end shiftreg_pu;
 
architecture Behavioral of shiftreg_pu is
 
signal count : integer range 0 to Nreg-1;
signal en_r : std_logic; --- Shift register enable
signal unload : std_logic; -- Unload signal to unload the shift register onto the output register
type dreg_type is array (Nreg-1 downto 0) of std_logic_vector(Nbit-1 downto 0); -- Shift register type
signal dreg : dreg_type; ---- Shift register
type reg_st_type is (idle, counting); -- Register state type
signal reg_st : reg_st_type; -- Register state
 
begin
 
-- Shift register with parallel unload:
process (clk)
begin
if clk'event and clk = '1' then
if reset = '1' then
dreg <= (others=> (others => '0'));
else
if en_r = '1' then -- Shift register enable
dreg(Nreg-1) <= inputs; -- Every cycle a new input data is loaded
if count /= 0 then -- When count = 0, shift register is unloaded; other cycles, register is shifted
shift:
for i in 1 to Nreg-1 loop
dreg(i-1) <= dreg(i);
end loop;
end if;
end if;
end if;
end if;
end process;
 
process (clk) -- Output register to mantain constant output the data for pipeline
begin
if clk'event and clk = '1' then
if reset = '1' then
outputs <= (others=> '0');
else
if unload = '1' then -- Parallel unload
for i in 0 to Nreg-1 loop
outputs((Nbit*(i+1))-1 downto Nbit*i) <= dreg(i);
end loop;
end if;
end if;
end if;
end process;
 
-- Shift register control
process (clk)
begin
if clk'event and clk = '1' then
if reset = '1' then
count <= 0;
reg_st <= idle;
run_out <= '0';
unload <= '0';
else
run_out <= unload;
case reg_st is
when idle =>
if run_in = '1' then
reg_st <= counting;
else
reg_st <= idle;
end if;
when counting =>
if count = (Nreg-1) then
reg_st <= idle;
count <= 0;
unload <= '1';
else
reg_st <= counting;
count <= count +1;
end if;
end case;
end if;
end if;
end process;
process (reg_st)
begin
if reg_st = counting then
en_r <= '1';
else
en_r <= '0';
end if;
end process;
 
end Behavioral;
 
/RTL_VHDL_files/layers_pkg.vhd
0,0 → 1,301
----------------------------------------------------------------------------------
-- Company: CEI - UPM
-- Engineer: David Aledo
--
-- Create Date: 01.10.2015
-- Design Name: Configurable ANN
-- Pakage Name: layers_pkg
-- Project Name:
-- Target Devices:
-- Tool Versions:
-- Description: define array types for generics, functions to give them values from
-- string generics, and other help functions
-- Dependencies:
--
-- Revision:
-- Revision 0.01 - File Created
-- Additional Comments:
--
----------------------------------------------------------------------------------
 
library IEEE;
use IEEE.STD_LOGIC_1164.all;
 
--library proc_common_v3_00_a; -- Deprecated libray from XPS tool
--use proc_common_v3_00_a.proc_common_pkg.all;
 
package layers_pkg is
 
-- Array types for generics:
type int_vector is array (natural range <>) of integer; -- Generic integer vector
type ltype_vector is array (integer range <>) of string(1 to 2); -- Layer type vector
type ftype_vector is array (integer range <>) of string(1 to 6); -- Activation function type vector
-- Note: these strings cannot be unconstrined
 
-- Functions to assign values to vector types from string generics:
-- Arguments:
-- str_v : string to be converted
-- n : number of elements of the vector
-- Return: assigned vector
function assign_ints(str_v : string; n : integer) return int_vector;
function assign_ltype(str_v : string; n : integer) return ltype_vector;
function assign_ftype(str_v : string; n : integer) return ftype_vector;
 
-- Other functions:
 
-- Argument: c : character to be checked
-- Return: TRUE if c is 0, 1, 2, 3, 4, 5, 6, 7, 8 or 9
function is_digit(c : character) return boolean;
 
-- Base two logarithm for int_vector:
-- Arguments:
-- v : integer vector
-- n : number of elements of the vector
-- Return : integer vector of the base two logarithms of each elment of v
function log2(v : int_vector; n : integer) return int_vector;
 
-- Calculate the total weight and bias memory address length:
-- Arguments:
-- NumIn : number of inputs of the network
-- NumN : number of neurons of each layer
-- n : number of layers (number of elements of NumN)
-- Return: total weight and bias memory address length (integer)
function calculate_addr_l(NumIn : integer; NumN : int_vector; n : integer) return integer;
 
-- Assign the weight and bias memory address lenght of each layer:
-- Arguments:
-- NumIn : number of inputs of the network
-- NumN : number of neurons of each layer
-- n : number of layers (number of elements of NumN and the return integer vector)
-- Return: weight and bias memory address lenght of each layer (integer vector)
function assign_addrl(NumIn : integer; NumN : int_vector; n : integer) return int_vector;
 
-- Calculate the maximum of the multiplications of two vectors element by element
-- Arguments:
-- v1 : input vector 1
-- v2 : input vector 2
-- Return: maximum of the multiplications of two vectors element by element
function calculate_max_mul(v1 : int_vector; v2 : int_vector) return integer;
 
-- Returns the max value of the input integer vector:
function calculate_max(v : int_vector) return integer;
 
-- Adding needed functions from the deprecated libray proc_common_v3_00_a:
function max2 (num1, num2 : integer) return integer;
function log2(x : natural) return integer;
 
end layers_pkg;
 
package body layers_pkg is
 
function max2 (num1, num2 : integer) return integer is
begin
if num1 >= num2 then
return num1;
else
return num2;
end if;
end function max2;
 
-- Function log2 -- returns number of bits needed to encode x choices
-- x = 0 returns 0
-- x = 1 returns 0
-- x = 2 returns 1
-- x = 4 returns 2, etc.
function log2(x : natural) return integer is
variable i : integer := 0;
variable val: integer := 1;
begin
if x = 0 then
return 0;
else
for j in 0 to 29 loop -- for loop for XST
if val >= x then null;
else
i := i+1;
val := val*2;
end if;
end loop;
-- Fix per CR520627 XST was ignoring this anyway and printing a
-- Warning in SRP file. This will get rid of the warning and not
-- impact simulation.
-- synthesis translate_off
assert val >= x
report "Function log2 received argument larger" &
" than its capability of 2^30. "
severity failure;
-- synthesis translate_on
return i;
end if;
end function log2;
 
 
function is_digit(c : character) return boolean is
begin
case c is
when '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => return true;
when others => return false;
end case;
end is_digit;
 
-- Assign values to a integer vector from a string:
-- Arguments:
-- str_v : string to be converted
-- n : number of elements of the vector
-- Return: assigned integer vector
function assign_ints(str_v : string; n : integer) return int_vector is
variable i : integer := n-1; ---- element counter
variable d_power : integer := 1; -- decimal power
variable ret : int_vector(n-1 downto 0) := (others => 0); -- return value
begin
for c in str_v'length downto 1 loop -- read every character in str_v
if str_v(c) = ' ' then -- a space separates a new element
assert i > 0
report "Error in assign_ints: number of elements in string is greater than n."
severity error;
i := i -1; -- decrease element counter to start calculate a new element
d_power := 1; -- reset the decimal power to 1
else
assert is_digit(str_v(c)) -- assert the new character is a digit
report "Error in assign_ints: character " & str_v(c) & " is not a digit."
severity error;
-- add the value of the new charactar to the element calculation ( + ("<new_digit>" - "0") * d_power):
ret(i) := ret(i) + (character'pos(str_v(c))-character'pos('0'))*d_power;
d_power := d_power*10; -- increase the decimal power for the next digit
end if;
end loop;
assert i = 0
report "Error in assign_ints: number of elements in string is less than n."
severity error;
return ret;
end assign_ints;
 
-- Assign values to an activation function type vector from a string:
-- Arguments:
-- str_v : string to be converted
-- n : number of elements of the vector
-- Return: assigned activation function type vector
function assign_ftype(str_v : string; n : integer) return ftype_vector is
variable i : integer := 0; -- element counter
variable l : integer := 1; -- element length counter
variable ret : ftype_vector(n-1 downto 0) := (others => "linear"); -- return value
begin
for c in 1 to str_v'length loop -- read every character in str_v
if str_v(c) = ' ' then -- a space separates a new element
i := i +1; -- increase element counter to start calculate a new element
l := 1; -- reset element length counter
else
ret(i)(l) := str_v(c);
l := l +1; -- increase element length counter
end if;
end loop;
assert i = n-1
report "Error in assign_ftype: number of elements in string is less than n."
severity error;
return ret;
end assign_ftype;
 
-- Assign values to an layer type vector from a string:
-- Arguments:
-- str_v : string to be converted
-- n : number of elements of the vector
-- Return: assigned layer type vector
function assign_ltype(str_v : string; n : integer) return ltype_vector is
variable i : integer := 0; -- element counter
variable l : integer := 1; -- element length counter
variable ret : ltype_vector(n-1 downto 0) := (others => "SP"); -- return value
begin
for c in 1 to str_v'length loop
if str_v(c) = ' ' then -- a space separates a new element
i := i +1; -- increase element counter to start calculate a new element
l := 1; -- reset element length counter
else
assert str_v(c) = 'P' or str_v(c) = 'S'
report "Error in assign_ltype: character " & str_v(c) & " is not 'P' (parallel) or 'S' (serial)."
severity error;
ret(i)(l) := str_v(c);
l := l +1; -- increase element length counter
end if;
end loop;
assert i = n-1
report "Error in assign_ltype: number of elements do not coincide with number of introduced elements."
severity error;
return ret;
end assign_ltype;
 
-- Calculate the total weight and bias memory address length:
-- Arguments:
-- NumIn : number of inputs of the network
-- NumN : number of neurons of each layer
-- n : number of layers (number of elements of NumN)
-- Return: total weight and bias memory address length (integer)
function calculate_addr_l(NumIn : integer; NumN : int_vector; n : integer) return integer is -- matrix + b_sel
variable addr_l : integer := log2(NumIn)+log2(NumN(0)); -- return value. Initialized with the weight memory length of the first layer
begin
-- Calculate the maximum of the weight memory length:
for i in 1 to n-1 loop
addr_l := max2( addr_l, log2(NumN(i-1)+log2(NumN(i))) );
end loop;
addr_l := addr_l +1; -- add bias select bit
return addr_l;
end calculate_addr_l;
 
-- Base two logarithm for int_vector:
-- Arguments:
-- v : integer vector
-- n : number of elements of the vector
-- Return : integer vector of the base two logarithms of each elment of v
function log2(v : int_vector; n : integer) return int_vector is
variable ret : int_vector(n-1 downto 0); -- return value
begin
-- for each element of v, calculate its base two logarithm:
for i in 0 to n-1 loop
ret(i) := log2(v(i));
end loop;
return ret;
end log2;
 
-- Assign the weight and bias memory address lenght of each layer:
-- Arguments:
-- NumIn : number of inputs of the network
-- NumN : number of neurons of each layer
-- n : number of layers (number of elements of NumN and the return integer vector)
-- Return: weight and bias memory address lenght of each layer (integer vector)
function assign_addrl(NumIn : integer; NumN : int_vector; n : integer) return int_vector is
variable ret : int_vector(n-1 downto 0); -- return value
begin
ret(0) := log2(NumIn)+log2(NumN(0)); -- Weight memory length of the first layer
for i in 1 to n-1 loop
ret(i) := log2(NumN(i-1))+log2(NumN(i));
end loop;
return ret;
end assign_addrl;
 
-- Returns the max value of the input integer vector:
function calculate_max(v : int_vector) return integer is
variable ac_max : integer := 0; -- return value
begin
for i in 0 to v'length-1 loop
ac_max := max2(ac_max,v(i));
end loop;
return ac_max;
end calculate_max;
 
-- Calculate the maximum of the multiplications of two vectors element by element
-- Arguments:
-- v1 : input vector 1
-- v2 : input vector 2
-- Return: maximum of the multiplications of two vectors element by element
function calculate_max_mul(v1 : int_vector; v2 : int_vector) return integer is
variable ac_max : integer := 0;
begin
assert v1'length = v2'length
report "Error in calculate_max_mul: vector's length do not coincide."
severity error;
for i in 0 to v1'length-1 loop
ac_max := max2(ac_max,v1(i)*v2(i));
end loop;
return ac_max;
end calculate_max_mul;
 
end layers_pkg;
/RTL_VHDL_files/mac.vhd
0,0 → 1,85
----------------------------------------------------------------------------------
-- Company: CEI
-- Engineer: David Aledo
--
-- Create Date:
-- Design Name: Configurable ANN
-- Module Name: mac - Behavioral
-- Project Name:
-- Target Devices:
-- Tool versions:
-- Description: Multiplier and accumulator (MAC).
--
-- Dependencies:
--
-- Revision:
-- Revision 0.01 - File Created
-- Additional Comments:
--
----------------------------------------------------------------------------------
 
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use ieee.numeric_std.all;
 
 
entity mac is
generic
(
dirload : boolean := FALSE; -- Direct load. Load accumulator with port C value (TRUE) or A*B + C (FALSE)
NbOvrf : natural := 3; ---- Extra bits in acc to avoid overflow
NbitIn : natural := 16; --- Bit width of the input data
NbitC : natural := 18 ---- Bit width of weight and bias
);
port
(
CLK : in std_logic;
RST : in std_logic;
A : in STD_LOGIC_VECTOR (NbitIn-1 DOWNTO 0); -- Input data
B : in STD_LOGIC_VECTOR (NbitC-1 DOWNTO 0); -- Weights
C : in std_logic_vector (NbitC-1 downto 0); -- Bias
P : out std_logic_vector (NbitIn+NbitC+NbOvrf DOWNTO 0); -- Output data
CE1 : in std_logic; -- Multiplier eneble
CE2 : in std_logic; -- Accumulator enable
LOAD : in std_logic -- Load signal. Resets the accumulator with value determined by dirload parameter
);
end mac;
 
architecture Behavioral of mac is
 
signal acc : signed (NbitIn+NbitC+NbOvrf DOWNTO 0) := (others => '0'); -- Accumulator register
signal Mreg : signed (NbitIn+NbitC-1 DOWNTO 0) := (others => '0'); -- Multiplier output register
 
begin
 
process (CLK)
begin
if CLK'event and CLK = '1' then
if RST = '1' then
acc <= (others => '0');
Mreg <= (others => '0');
else
if CE1 = '1' then
Mreg <= signed(A)*signed(B);
end if;
if CE2 = '1' then
if LOAD = '1' then
if dirload then
-- Load acc with port C value (bias):
acc <= resize(signed(C),NbitIn+NbitC+NbOvrf+1); -- Sign extension
else
-- Load acc with A*B + C (bias):
acc <= resize(signed(C),NbitIn+NbitC+NbOvrf+1) + Mreg;
end if;
else
acc <= acc + Mreg;
end if;
end if;
end if;
end if;
end process;
 
P <= std_logic_vector(acc);
 
end Behavioral;
 
/RTL_VHDL_files/af_sigmoid.vhd
0,0 → 1,99
----------------------------------------------------------------------------------
-- Company: CEI
-- Engineer: Enrique Herrero
--
-- Create Date:
-- Design Name: Configurable ANN
-- Module Name: af_sigmoid - Behavioral
-- Project Name:
-- Target Devices:
-- Tool versions:
-- Description: Sigmoid activation function implemented as a Look-Up-Table (LUT).
--
-- Dependencies:
--
-- Revision:
-- Revision 0.01 - File Created
-- Revision 1 - David Aledo
-- Additional Comments:
--
----------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use ieee.numeric_std.ALL;
use ieee.math_real.all;
 
 
entity af_sigmoid is
generic
(
Nbit : natural := 8
);
port
(
reset : in std_logic;
clk : in std_logic;
run_in : in std_logic; -- Start and input data validation
inputs : in std_logic_vector(Nbit-1 downto 0); -- Input data
run_out : out std_logic; -- Output data validation, run_in for the next layer
outputs : out std_logic_vector(Nbit-1 downto 0) -- Output data
);
end af_sigmoid;
 
 
architecture Behavioral of af_sigmoid is
 
-- Definition of internal modules, constants, signals, etc...
 
-- Sigmoid parameters:
constant f0 : real := 2.0; -- Slope at the origin
constant fr : real := 2.0; -- fr = fmax - fmin
 
signal dataIn: integer range (2**Nbit-1) downto 0; -- To convert std_logic_vector input to integer index for the LUT
type table_t is array(0 to (2**Nbit)-1) of std_logic_vector(Nbit-1 downto 0); -- LUT type
 
-- Function Sigmoidal: generates the Look-Up-Table for the sigmoid activation function:
-- margin: maximun value of x.
function Sigmoidal(margin:real;Nbit:natural) return table_t is
variable scale,x,y,w,t: real;
variable u: integer;
variable fbits: std_logic_vector(Nbit-1 downto 0);
variable table: table_t;
begin
scale := (2.0*margin)/(2.0**Nbit); -- Calculates gap between to points
x := -margin;
for idx in -(2**(Nbit-1)) to (2**(Nbit-1))-1 loop
y := (fr/(1.0+exp(((-4.0*f0)/fr)*x)))-(fr/2.0);
w := y*(2.0**(Nbit-1)); -- Shifts bits to the left
t := round(w);
u := integer(t);
fbits := std_logic_vector(to_signed(u,Nbit));
table(to_integer(to_unsigned(idx+(2**Nbit),Nbit))):= fbits;
x := x+scale;
end loop;
return table;
end Sigmoidal;
signal Table: table_t := Sigmoidal(1.0,Nbit); -- Generation of the LUT (at synthesis time)
 
begin
 
-- Description of the activation function
dataIn <= to_integer(signed(inputs));
 
Activation: process(clk,reset)
begin
if clk'event and clk = '1' then
if reset = '1' then
run_out <= '0';
outputs <= (others => '0');
else
if run_in = '1' then
run_out <='1';
outputs <=Table(dataIn); -- Assigns output value from the LUT
else
run_out <='0';
end if;
end if;
end if;
end process;
end Behavioral;
/RTL_VHDL_files/af_template.vhd
0,0 → 1,49
----------------------------------------------------------------------------------
-- Company:
-- Engineer: User
--
-- Create Date:
-- Design Name: Configurable ANN
-- Module Name: af_template - Behavioral
-- Project Name:
-- Target Devices:
-- Tool versions:
-- Description: User activation function template.
--
-- Dependencies:
--
-- Revision:
-- Revision 0.01 - File Created
-- Additional Comments:
--
----------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use ieee.numeric_std.ALL;
use ieee.math_real.all;
 
-- Only entity name must be changed, please do not modify the template entity:
entity af_template is
generic
(
Nbit : natural := 8
);
port
(
reset : in std_logic;
clk : in std_logic;
run_in : in std_logic; -- Start and input data validation
inputs : in std_logic_vector(Nbit-1 downto 0); -- Input data
run_out : out std_logic; -- Output data validation, run_in for the next layer
outputs : out std_logic_vector(Nbit-1 downto 0) -- Output data
);
end af_template;
 
 
architecture Behavioral of af_template is
-- Add here user constants, internal signals, and other user definitions:
 
begin
-- Add here user logic to describe the user activation function:
 
end Behavioral;
/RTL_VHDL_files/layerPS_top.vhd
0,0 → 1,259
----------------------------------------------------------------------------------
-- Company: CEI
-- Engineer: David Aledo
--
-- Create Date: 12:41:19 06/10/2013
-- Design Name: Configurable ANN
-- Module Name: layerSP_top - Behavioral
-- Project Name:
-- Target Devices:
-- Tool versions:
-- Description: neuron layer top for artificial neural networks. Parallel input and
-- serial output.
--
-- Dependencies:
--
-- Revision:
-- Revision 0.01 - File Created
-- Additional Comments:
--
----------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use ieee.numeric_std.all;
 
-- Deprecated XPS library:
--library proc_common_v3_00_a;
--use proc_common_v3_00_a.proc_common_pkg.all; -- Only for simulation ( pad_power2() )
 
entity layerPS_top is
 
generic
(
NumN : natural := 64; ------- Number of neurons of the layer
NumIn : natural := 8; ------- Number of inputs of each neuron
NbitIn : natural := 12; ------- Bit width of the input data
NbitW : natural := 8; ------- Bit width of weights and biases
NbitOut : natural := 8; ------- Bit width of the output data
lra_l : natural := 10; ------- Layer RAM address length. It should value log2(NumN)+log2(NumIn)
wra_l : natural := 3; ------- Weight RAM address length. It should value log2(NumIn)
bra_l : natural := 6; ------- Bias RAM address length. It should value log2(NumN)
LSbit : natural := 4 ------- Less significant bit of the outputs
);
 
port
(
-- Input ports
reset : in std_logic;
clk : in std_logic;
run_in : in std_logic; -- Start and input data validation
m_en : in std_logic; -- Memory enable (external interface)
b_sel : in std_logic; -- Bias memory select
m_we : in std_logic_vector(((NbitW+7)/8)-1 downto 0); -- Memory write enable (external interface)
inputs : in std_logic_vector((NbitIn*NumIn)-1 downto 0); -- Input data (parallel)
wdata : in std_logic_vector(NbitW-1 downto 0); -- Write data of weight and bias memories
addr : in std_logic_vector(lra_l-1 downto 0); -- Address of weight and bias memories
 
-- Output ports
run_out : out std_logic; -- Output data validation, run_in for the next layer
rdata : out std_logic_vector(NbitW-1 downto 0); -- Read data of weight and bias memories
outputs : out std_logic_vector(NbitOut-1 downto 0) -- Output data (serial)
);
 
end layerPS_top;
 
architecture Behavioral of layerPS_top is
 
--type ramd_type is array (pad_power2(NumN)-1 downto 0) of std_logic_vector(NbitW-1 downto 0); -- Optimal: 32 or 64 spaces -- pad_power2() only for simulation
--type layer_ram is array (pad_power2(NumIn)-1 downto 0) of ramd_type;
type ramd_type is array (NumN-1 downto 0) of std_logic_vector(NbitW-1 downto 0); -- Optimal: 32 or 64 spaces
type layer_ram is array (NumIn-1 downto 0) of ramd_type;
type outm_type is array (NumIn-1 downto 0) of std_logic_vector(NbitW-1 downto 0);
 
signal lram : layer_ram; -- Layer RAM. One RAM per input. It stores the weights
signal breg : ramd_type; -- Bias RAM. They can be RAM because they are not accessed simultaneously
signal outm : outm_type; -- RAM outputs to be multiplexed into rdata
signal m_sel : std_logic_vector(NumIn-1 downto 0); --------- RAM select
signal Wyb : std_logic_vector((NbitW*NumIn)-1 downto 0); -- Weight vectors
signal bias : std_logic_vector(NbitW-1 downto 0); -------- Bias
signal Nouts : std_logic_vector(NbitOut-1 downto 0); ------ Outputs from neurons
signal uaddr : unsigned(lra_l-1 downto 0); -- Unsigned address of weight and bias memories
 
 
signal cont : integer range 0 to NumN-1; -- Neuron counter
signal cntb : integer range 0 to NumN-1; -- Delayed counter for biases
signal st : bit; ------- State
signal en1 : std_logic; -- First step enable
signal en2 : std_logic; -- Second stage enable
signal en3 : std_logic; -- Shift register enable
signal en_out : std_logic;
 
begin
 
layerPS_inst: entity work.layerPS
generic map
(
NumN => NumN,
NumIn => NumIn,
NbitIn => NbitIn,
NbitW => NbitW,
NbitOut => NbitOut,
LSbit => LSbit
)
port map
(
-- Input ports
reset => reset,
clk => clk,
en => en1,
en2 => en2,
en_r => en3,
inputs => inputs,
Wyb => Wyb,
bias => bias,
 
-- Output ports
en_out => en_out,
outputs => Nouts
);
 
uaddr <= unsigned(addr(lra_l-1 downto 0));
 
ram_selector:
process (uaddr(wra_l-1 downto 0),b_sel) -- Bottom part of memory address and b_sel
begin
m_sel <= (others => '0'); -- Default
for i in (NumIn-1) downto 0 loop
-- The bottom part of memory address selects which RAM
if ( (to_integer(uaddr(wra_l-1 downto 0)) = i) and (b_sel = '0')) then
m_sel(i) <= '1'; -- Enables the selected RAM
end if;
end loop;
end process;
 
rams: -- Instence as weight and bias memories as inputs there are in the layer
for i in (NumIn-1) downto 0 generate
process (clk)
variable d : std_logic_vector(NbitW-1 downto 0); -- Beware of elements whose length is not a multiple of 8
begin
if (clk'event and clk = '1') then
if (m_en = '1' and m_sel(i) = '1') then
for j in ((NbitW+7)/8)-1 downto 0 loop -- we byte to byte
if (m_we(j) = '1') then
d((8*(j+1))-1 downto 8*j) := wdata((8*(j+1))-1 downto 8*j);
else
d((8*(j+1))-1 downto 8*j) := lram(i)(to_integer(uaddr(lra_l-1 downto wra_l)))((8*(j+1))-1 downto 8*j);
end if;
end loop;
-- Top part of weight and bias memory selects weights inside the selected RAM
lram(i)(to_integer(uaddr(lra_l-1 downto wra_l))) <= d; -- Write
--
end if;
end if;
end process;
-- Outpus are read in parallel, resulting in a bus of weights:
--Wyb((NbitW*(i+1))-1 downto NbitW*i) <= lram(i)(cont); -- Asynchronous read (forces distributed RAM)
process (clk) -- Synchronous read
begin
if clk'event and clk = '1' then
if reset = '1' then
--Wyb((NbitW*(i+1))-1 downto NbitW*i) <= (others => '0');
else
Wyb((NbitW*(i+1))-1 downto NbitW*i) <= lram(i)(cont);
end if;
end if;
end process;
outm(i) <= lram(i)(to_integer(uaddr(lra_l-1 downto wra_l))); -- Read all RAM
end generate;
 
-- Synchronous read including breg:
process (clk)
begin
if (clk'event and clk = '1') then
if (m_en = '1') then
if (b_sel = '1') then
rdata <= breg(to_integer(uaddr(bra_l-1 downto 0))); -- Bias RAM selected
else -- Other RAM selected:
rdata <= outm(to_integer(uaddr(wra_l-1 downto 0))); -- Multiplexes RAM outputs
-- May be safer if accesses to bottom address grater than NumIn are avoided
end if;
end if;
end if;
end process;
 
bias_ram:
process (clk)
variable d : std_logic_vector(NbitW-1 downto 0); -- Beware of elements whose length is not a multiple of 8
begin
if (clk'event and clk = '1') then
if ( (m_en = '1') and (b_sel = '1') ) then
for i in ((NbitW+7)/8)-1 downto 0 loop -- we byte to byte
if (m_we(i) = '1') then
d((8*(i+1))-1 downto 8*i) := wdata((8*(i+1))-1 downto 8*i);
else
d((8*(i+1))-1 downto 8*i) := breg(to_integer(uaddr(bra_l-1 downto 0)))((8*(i+1))-1 downto 8*i);
end if;
end loop;
-- The bottom part (extended) of memories address selects the bias
breg(to_integer(uaddr(bra_l-1 downto 0))) <= d;
end if;
end if;
end process;
 
-- Bias read: -- Here, parallel read of bias is not necessary, so it can be RAM
--bias <= breg(cont); -- Asynchronous read
process (clk) -- Synchronous read
begin
if clk'event and clk = '1' then
if reset = '1' then
--bias <= (others => '0');
else
bias <= breg(cntb);
end if;
end if;
end process;
 
outputs <= Nouts;
 
control: -- With counter and control signal shifts
process (clk)
begin
if (clk'event and clk = '1') then
if (reset = '1') then
cont <= 0;
cntb <= 0;
st <= '0';
en1 <= '0';
en2 <= '0';
run_out <= '0';
else
cntb <= cont; -- Bias counter is delayed to assure correctness of pipeline data
case st is
when '0' =>
en1 <= '0'; -- en1 is delayed 1 cycle in order to insert a register for Wyb
case run_in is
when '1' => st <= '1';
when '0' => st <= '0';
when others => st <= '0';
end case;
when '1' =>
en1 <= '1'; -- en1 is delayed 1 cycle in order to insert a register for Wyb
case cont is
when (NumN-1) =>
cont <= 0;
st <= '0';
when others =>
cont <= cont +1;
end case;
end case;
 
en2 <= en1;
 
run_out <= en3; -- It lasts for 1 cycle, just after the output enable of the layer (when all outputs have just updated)
end if;
end if;
end process;
 
en3 <= en_out;
 
end Behavioral;
/RTL_VHDL_files/layerSP_top.vhd
0,0 → 1,264
----------------------------------------------------------------------------------
-- Company: CEI
-- Engineer: David Aledo
--
-- Create Date: 12:41:19 06/10/2013
-- Design Name: Configurable ANN
-- Module Name: layerSP_top - Behavioral
-- Project Name:
-- Target Devices:
-- Tool versions:
-- Description: neuron layer top for artificial neural networks. Serial input and
-- parallel output.
--
-- Dependencies:
--
-- Revision:
-- Revision 0.01 - File Created
-- Additional Comments:
--
----------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use ieee.numeric_std.all;
 
-- Deprecated XPS library:
--library proc_common_v3_00_a;
--use proc_common_v3_00_a.proc_common_pkg.all; -- Only for simulation ( pad_power2() )
 
entity layerSP_top is
 
generic
(
NumN : natural := 8; ------- Number of neurons of the layer
NumIn : natural := 64; ------- Number of inputs of each neuron
NbitIn : natural := 8; ------- Bit width of the input data
NbitW : natural := 8; ------- Bit width of weights and biases
NbitOut : natural := 12; ------- Bit width of the output data
lra_l : natural := 10; ------- Layer RAM address length. It should value log2(NumN)+log2(NumIn)
wra_l : natural := 6; ------- Weight RAM address length. It should value log2(NumIn)
bra_l : natural := 3; ------- Bias RAM address length. It should value log2(NumN)
LSbit : natural := 4 ------- Less significant bit of the outputs
);
 
port
(
-- Input ports
reset : in std_logic;
clk : in std_logic;
run_in : in std_logic; -- Start and input data validation
m_en : in std_logic; -- Memory enable (external interface)
b_sel : in std_logic; -- Bias memory select
m_we : in std_logic_vector(((NbitW+7)/8)-1 downto 0); -- Memory write enable (external interface)
inputs : in std_logic_vector(NbitIn-1 downto 0); -- Input data (serial)
wdata : in std_logic_vector(NbitW-1 downto 0); -- Write data of weight and bias memories
addr : in std_logic_vector(lra_l-1 downto 0); -- Address of weight and bias memories
 
-- Output ports
run_out : out std_logic; -- Output data validation, run_in for the next layer
rdata : out std_logic_vector(NbitW-1 downto 0); -- Read data of weight and bias memories
outputs : out std_logic_vector((NbitOut*NumN)-1 downto 0) -- Output data (parallel)
);
 
end layerSP_top;
 
architecture Behavioral of layerSP_top is
 
--type ramd_type is array (pad_power2(NumIn)-1 downto 0) of std_logic_vector(NbitW-1 downto 0); -- Optimal: 32 or 64 spaces
--type layer_ram is array (pad_power2(NumN)-1 downto 0) of ramd_type;
type ramd_type is array (NumIn-1 downto 0) of std_logic_vector(NbitW-1 downto 0); -- Optimal: 32 or 64 spaces
type layer_ram is array (NumN-1 downto 0) of ramd_type;
type outm_type is array (NumN-1 downto 0) of std_logic_vector(NbitW-1 downto 0);
 
signal lram : layer_ram; -- Layer RAM. One RAM per neuron. It stores the weights
signal breg : outm_type; -- Bias registers. They can not be RAM because they are accessed simultaneously
signal outm : outm_type; -- RAM outputs to be multiplexed into rdata
signal m_sel : std_logic_vector(NumN-1 downto 0); -------- RAM select
signal Wyb : std_logic_vector((NbitW*NumN)-1 downto 0); --- Weight vectors
signal bias : std_logic_vector((NbitW*NumN)-1 downto 0); --- Bias vector
signal Nouts : std_logic_vector((NbitOut*NumN)-1 downto 0); -- Outputs from neurons
signal uaddr : unsigned(lra_l-1 downto 0); -- Unsigned address of weight and bias memories
 
signal inreg : std_logic_vector(NbitIn-1 downto 0); -- Input data register -- en1 is delayed 1 cycle in order to insert a register for Wyb
 
-- Control signals
signal cont : integer range 0 to NumIn-1; -- Input counter
signal en1 : std_logic; -- First step enable (multiplication of MAC)
signal en2 : std_logic; -- Second stage enable (accumulation of MAC)
signal en3 : std_logic; -- Shift register enable
signal a0 : std_logic; -- Signal to load accumulators with the multiplication result
signal aux_en3 : std_logic; -- Auxiliary signal to delay en3 two cycles
signal aux_a0 : std_logic;
signal aux2_en3 : std_logic;
 
begin
 
layerSP_inst: entity work.layerSP
generic map
(
NumN => NumN,
NumIn => NumIn,
NbitIn => NbitIn,
NbitW => NbitW,
NbitOut => NbitOut,
LSbit => LSbit
)
port map
(
-- Input ports
reset => reset,
clk => clk,
en => en1,
en2 => en2,
en_r => en3,
a0 => a0,
inputs => inreg,
Wyb => Wyb,
bias => bias,
 
-- Output ports
outputs => Nouts
);
 
uaddr <= unsigned(addr);
 
ram_selector:
process (uaddr(lra_l-1 downto wra_l),b_sel) -- Top part of memory address and b_sel
begin
m_sel <= (others => '0'); -- Default
for i in (NumN-1) downto 0 loop
-- The top part of memory address selects which RAM
if ( (to_integer(uaddr(lra_l-1 downto wra_l)) = i) and (b_sel = '0')) then
m_sel(i) <= '1'; -- Enables the selected RAM
end if;
end loop;
end process;
 
rams: -- Instance as weight and bias memories as neurons there are in the layer
for i in (NumN-1) downto 0 generate
process (clk)
variable d : std_logic_vector(NbitW-1 downto 0); -- Beware of elements whose length is not a multiple of 8
begin
if (clk'event and clk = '1') then
if (m_en = '1' and m_sel(i) = '1') then
for j in ((NbitW+7)/8)-1 downto 0 loop -- we byte to byte
if (m_we(j) = '1') then
d((8*(j+1))-1 downto 8*j) := wdata((8*(j+1))-1 downto 8*j);
else
d((8*(j+1))-1 downto 8*j) := lram(i)(to_integer(uaddr(wra_l-1 downto 0)))((8*(j+1))-1 downto 8*j);
end if;
end loop;
-- Bottom part of layer memory selects weights inside the selected RAM
lram(i)(to_integer(uaddr(wra_l-1 downto 0))) <= d;
--
end if;
end if;
end process;
-- Outputs are read in parallel, resulting in a bus of weights:
--Wyb((NbitW*(i+1))-1 downto NbitW*i) <= lram(i)(cont); -- Asynchronous read (forces distributed RAM)
process (clk) -- Synchronous read
begin
if clk'event and clk = '1' then
if reset = '1' then
--Wyb((NbitW*(i+1))-1 downto NbitW*i) <= (others => '0');
else
Wyb((NbitW*(i+1))-1 downto NbitW*i) <= lram(i)(cont);
end if;
end if;
end process;
outm(i) <= lram(i)(to_integer(uaddr(wra_l-1 downto 0))); -- Read all RAM
end generate;
 
-- Synchronous read including breg:
process (clk)
begin
if (clk'event and clk = '1') then
if (m_en = '1') then
if (b_sel = '1') then
rdata <= breg(to_integer(uaddr(bra_l-1 downto 0))); -- Bias registers selected
else -- Other RAM selected:
rdata <= outm(to_integer(uaddr(lra_l-1 downto wra_l))); -- Multiplexes RAM outputs
-- May be safer if accesses to top address grater than NumN are avoided
end if;
end if;
end if;
end process;
 
bias_reg:
process (clk)
variable d : std_logic_vector(NbitW-1 downto 0); -- Beware of elements whose length is not a multiple of 8
begin
if (clk'event and clk = '1') then
if ( (m_en = '1') and (b_sel = '1') ) then
for i in ((NbitW+7)/8)-1 downto 0 loop -- we byte to byte
if (m_we(i) = '1') then
d((8*(i+1))-1 downto 8*i) := wdata((8*(i+1))-1 downto 8*i);
else
d((8*(i+1))-1 downto 8*i) := breg(to_integer(uaddr(bra_l-1 downto 0)))((8*(i+1))-1 downto 8*i);
end if;
end loop;
-- The bottom part (reduced) of layer RAM address selects the bias
breg(to_integer(uaddr(bra_l-1 downto 0))) <= d;
end if;
end if;
end process;
bias_read:
for i in (NumN-1) downto 0 generate
--bias((NbitW*(i+1))-1 downto NbitW*i) <= breg(i); -- Asynchronous read of all biases in parallel
process (clk)
begin
if clk'event and clk = '1' then
if reset = '1' then
--bias((NbitW*(i+1))-1 downto NbitW*i) <= (others => '0');
else
bias((NbitW*(i+1))-1 downto NbitW*i) <= breg(i); -- Synchronous read of all biases in parallel
end if;
end if;
end process;
end generate;
 
outputs <= Nouts;
 
control:
process (clk)
begin
if (clk'event and clk = '1') then
if (reset = '1') then
cont <= 0;
en1 <= '0';
en2 <= '0';
en3 <= '0';
a0 <= '0';
run_out <= '0';
aux_en3 <= '0';
aux2_en3 <= '0';
aux_a0 <= '0';
inreg <= (others => '0');
else
en1 <= run_in; -- en1 is delayed 1 cycle in order to insert a register for Wyb
inreg <= inputs;
-- Default:
aux2_en3 <= '0';
if (run_in = '1') then
if (cont = NumIn-1) then
cont <= 0; -- Restarts input counter
aux2_en3 <= '1';
else
cont <= cont +1;
end if;
end if;
en2 <= en1;
if (cont = 0 and run_in = '1') then
aux_a0 <= '1'; -- At the count beginning
else
aux_a0 <= '0';
end if;
a0 <= aux_a0;
aux_en3 <= aux2_en3;
en3 <= aux_en3;
run_out <= en3; -- It lasts for 1 cycle, just after the output enable of the layer (when all outputs have just updated)
end if;
end if;
end process;
 
end Behavioral;
/RTL_VHDL_files/af_sigmoid2.vhd
0,0 → 1,100
----------------------------------------------------------------------------------
-- Company: CEI
-- Engineer: Enrique Herrero
--
-- Create Date:
-- Design Name: Configurable ANN
-- Module Name: af_sigmoid2 - Behavioral
-- Project Name:
-- Target Devices:
-- Tool versions:
-- Description: Sigmoid activation function implemented as a Look-Up-Table (LUT).
-- Alternative set of parameters.
--
-- Dependencies:
--
-- Revision:
-- Revision 0.01 - File Created
-- Revision 1 - David Aledo
-- Additional Comments:
--
----------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use ieee.numeric_std.ALL;
use ieee.math_real.all;
 
 
entity af_sigmoid2 is
generic
(
Nbit : natural := 8
);
port
(
reset : in std_logic;
clk : in std_logic;
run_in : in std_logic; -- Start and input data validation
inputs : in std_logic_vector(Nbit-1 downto 0); -- Input data
run_out : out std_logic; -- Output data validation, run_in for the next layer
outputs : out std_logic_vector(Nbit-1 downto 0) -- Output data
);
end af_sigmoid2;
 
 
architecture Behavioral of af_sigmoid2 is
 
-- Definition of internal modules, constants, signals, etc...
 
-- Sigmoid parameters:
constant f0 : real := 0.5; -- Slope at the origin
constant fr : real := 2.0; -- fr = fmax - fmin
 
signal dataIn: integer range (2**Nbit-1) downto 0; -- To convert std_logic_vector input to integer index for the LUT
type table_t is array(0 to (2**Nbit)-1) of std_logic_vector(Nbit-1 downto 0); -- LUT type
 
-- Function Sigmoidal: generates the Look-Up-Table for the sigmoid activation function:
-- margin: maximun value of x.
function Sigmoidal(margin:real;Nbit:natural) return table_t is
variable scale,x,y,w,t: real;
variable u: integer;
variable fbits: std_logic_vector(Nbit-1 downto 0);
variable table: table_t;
begin
scale := (2.0*margin)/(2.0**Nbit); -- Calculates gap between to points
x := -margin;
for idx in -(2**(Nbit-1)) to (2**(Nbit-1))-1 loop
y := ( fr / (1.0+exp(((-4.0*f0)/fr)*x)) ) - (fr/2.0);
w := y*(2.0**(Nbit-1)); -- Shifts bits to the left
t := round(w);
u := integer(t);
fbits := std_logic_vector(to_signed(u,Nbit));
table(to_integer(to_unsigned(idx+(2**Nbit),Nbit))):= fbits;
x := x+scale;
end loop;
return table;
end Sigmoidal;
signal Table: table_t := Sigmoidal(1.0,Nbit); -- Generation of the LUT (at synthesis time)
 
begin
 
-- Description of the activation function
dataIn <= to_integer(signed(inputs));
 
Activacion: process(clk,reset)
begin
if clk'event and clk = '1' then
if reset = '1' then
run_out <= '0';
outputs <= (others => '0');
else
if run_in = '1' then
run_out<='1';
outputs<=Table(dataIn); -- Assigns output value from the LUT
else
run_out<='0';
end if;
end if;
end if;
end process;
end Behavioral;
/RTL_VHDL_files/activation_function.vhd
0,0 → 1,112
----------------------------------------------------------------------------------
-- Company:
-- Engineer:
--
-- Create Date: 16:16:02 05/14/2014
-- Design Name: Configurable ANN
-- Module Name: activation_function - Structural
-- Project Name:
-- Target Devices:
-- Tool versions:
-- Description: Activation function selector. It instantiates the activation
-- funtion type selected with f_type parameter.
--
-- Dependencies:
--
-- Revision:
-- Revision 0.01 - File Created
-- Additional Comments:
--
----------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
 
 
entity activation_function is
generic
(
f_type : string := "linear"; -- Activation function type
Nbit : natural := 8 -- Bit width
);
port
(
reset : in std_logic;
clk : in std_logic;
run_in : in std_logic; -- Start and input data validation
inputs : in std_logic_vector(Nbit-1 downto 0); -- Input data
run_out : out std_logic; -- Output data validation, run_in for the next layer
outputs : out std_logic_vector(Nbit-1 downto 0) -- Output data
);
end activation_function;
 
architecture Structural of activation_function is
 
begin
 
-- Linear activation function. It is a direct assignment:
linear_f:
if (f_type = "linear") generate
outputs <= inputs;
run_out <= run_in;
end generate;
 
-- Example 1: sigmoid activation function implemented as a Look-Up-Table (LUT):
Sigmoid_f:
if (f_type = "siglut") generate
siglut_inst: entity work.af_sigmoid
generic map
(
Nbit => Nbit
)
port map
(
reset => reset,
clk => clk,
run_in => run_in,
inputs => inputs,
run_out => run_out,
outputs => outputs
);
end generate;
 
-- Example 2: sigmoid activation function implemented as a LUT, with a second different set of parameters:
Sigmoid2_f:
if (f_type = "siglu2") generate
siglut_inst: entity work.af_sigmoid2
generic map
(
Nbit => Nbit
)
port map
(
reset => reset,
clk => clk,
run_in => run_in,
inputs => inputs,
run_out => run_out,
outputs => outputs
);
end generate;
 
-- Template to instance user activation function type ("userAF"):
--userAF_f:
--if (f_type = "userAF") generate
--yourAF_inst: entity work.--palace here user module name--
--generic map
--(
-- Nbits => Nbits
--)
--port map
--(
-- reset => reset,
-- clk => clk,
-- run_in => run_in,
-- inputs => inputs,
-- run_out => run_out,
-- outputs => outputs
--);
--end generate;
-- User can instantiate as many types of activation function as needed, each one of them must be tagged as a 6 character string
 
end Structural;
 
/RTL_VHDL_files/shiftreg_pl.vhd
0,0 → 1,119
----------------------------------------------------------------------------------
-- Company: CEI - UPM
-- Engineer: David Aledo
--
-- Create Date: 11:31:38 05/14/2014
-- Design Name: Configurable ANN
-- Module Name: shiftreg_pl - Behavioral
-- Project Name:
-- Target Devices:
-- Tool versions:
-- Description: Shift register with parallel load.
--
-- Dependencies:
--
-- Revision:
-- Revision 0.01 - File Created
-- Additional Comments:
--
----------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
 
 
entity shiftreg_pl is
generic
(
Nreg : natural := 64; ---- Number of elements
Nbit : natural := 8 ---- Bit width
);
 
port
(
-- Input ports
reset : in std_logic;
clk : in std_logic;
run_in : in std_logic; -- Start and input data validation
inputs : in std_logic_vector((Nbit*Nreg)-1 downto 0); -- Input data (parallel)
-- Output ports
run_out : out std_logic; -- Output data validation, run_in for the next layer
outputs : out std_logic_vector(Nbit-1 downto 0) -- Output data (serial)
);
end shiftreg_pl;
 
architecture Behavioral of shiftreg_pl is
 
signal count : integer range 0 to Nreg-1;
signal en_r : std_logic; --- Shift register enable
type dreg_type is array (Nreg-1 downto 0) of std_logic_vector(Nbit-1 downto 0); -- Shift register type
signal dreg : dreg_type; ---- Shift register
type reg_st_type is (idle, counting); -- Register state type
signal reg_st : reg_st_type; -- Register state
 
begin
 
-- Shift register with parallel load:
process (clk)
begin
if clk'event and clk = '1' then
if reset = '1' then
dreg <= (others=> (others => '0'));
else
if en_r = '1' then -- Shift register enable
if count = 0 then -- Parallel load
for i in 0 to Nreg-1 loop
dreg(i) <= inputs((Nbit*(i+1))-1 downto Nbit*i);
end loop;
else -- Other cycles, register is shifted
dreg(Nreg-1) <= (others => '-');
shift:
for i in 1 to Nreg-1 loop
dreg(i-1) <= dreg(i);
end loop;
end if;
end if;
end if;
end if;
end process;
outputs <= dreg(0);
 
-- Shift register control
process (clk)
begin
if clk'event and clk = '1' then
if reset = '1' then
count <= 0;
reg_st <= idle;
run_out <= '0';
else
run_out <= en_r;
case reg_st is
when idle =>
if run_in = '1' then
reg_st <= counting;
else
reg_st <= idle;
end if;
when counting =>
if count = (Nreg-1) then
reg_st <= idle;
count <= 0;
else
reg_st <= counting;
count <= count +1;
end if;
end case;
end if;
end if;
end process;
process (reg_st)
begin
if reg_st = counting then
en_r <= '1';
else
en_r <= '0';
end if;
end process;
 
end Behavioral;
 
/RTL_VHDL_files/ann.vhd
0,0 → 1,451
----------------------------------------------------------------------------------
-- Company: CEI - UPM
-- Engineer: David Aledo
--
-- Create Date: 01.10.2015 15:15:28
-- Design Name: Configurable ANN
-- Module Name: ann - config_structural
-- Project Name:
-- Target Devices:
-- Tool Versions:
-- Description: generates the structure of an ANN with the given parameters.
--
-- Dependencies:
--
-- Revision:
-- Revision 0.01 - File Created
-- Additional Comments:
--
----------------------------------------------------------------------------------
 
 
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.NUMERIC_STD.ALL;
 
use work.layers_pkg.all;
 
entity ann is
generic
(
Nlayer : integer := 2; ---- Number of layers
NbitW : natural := 16; ---- Bit width of weights and biases
NumIn : natural := 64; ---- Number of inputs to the network
NbitIn : natural := 8; ---- Bit width of the inputs
NumN : int_vector; ------ Number of neurons in each layer
l_type : string; ---------- Layer type of each layer
f_type : string; ---------- Activation function type of each layer
LSbit : int_vector; ------ LSB of the output of each layer
NbitO : int_vector; ------ Bit width of the outputs of each layer
NbitOut : natural := 8 ----- Bit width of the network output
);
 
port
(
-- Input ports
reset : in std_logic;
clk : in std_logic;
run_in : in std_logic; -- Start and input data validation
m_en : in std_logic; -- Weight and bias memory enable (external interface)
m_we : in std_logic_vector(((NbitW+7)/8)-1 downto 0); -- Weight and bias memory write enable (external interface)
inputs : in std_logic_vector(NbitIn-1 downto 0); -- Input data
wdata : in std_logic_vector(NbitW-1 downto 0); -- Weight and bias memory write data
addr : in std_logic_vector((calculate_lra_l(NumIn, NumN, Nlayer)+log2(Nlayer))-1 downto 0); -- Weight and bias memory address
 
-- Output ports
run_out : out std_logic; -- Output data validation
rdata : out std_logic_vector(NbitW-1 downto 0); -- Weight and bias memory read data
outputs : out std_logic_vector(NbitOut-1 downto 0) -- Output data
);
end ann;
 
architecture config_structural of ann is
 
-- Arrays of configuration constants, generated from string generics:
constant ltype_v : ltype_vector(Nlayer-1 downto 0) := assign_ltype(l_type,Nlayer);
constant ftype_v : ftype_vector(Nlayer-1 downto 0) := assign_ftype(f_type,Nlayer);
constant lra_l : int_vector(Nlayer-1 downto 0) := assign_addrl(NumIn,NumN,Nlayer); -- Layer RAM address length of each layer
constant NumIn_v : int_vector(Nlayer-1 downto 0) := NumN(Nlayer-2 downto 0) & NumIn;
constant wra_l : int_vector(Nlayer-1 downto 0) := log2(NumIn_v, Nlayer); -- Weight RAM address length of each layer
constant bra_l : int_vector(Nlayer-1 downto 0) := log2(NumN, Nlayer); -- Bias ram address length of each layer
 
-- Internal signals:
signal lm_en : std_logic_vector(Nlayer-1 downto 0); -- Weight and bias memory enable of each layer
type lrd_type is array (Nlayer-1 downto 0) of std_logic_vector(NbitW-1 downto 0);
signal lrdata : lrd_type; -- Weight and bias memory read data of each layer
 
type lodata_t is array (Nlayer-1 downto 0) of std_logic_vector(calculate_max_mul(NbitO,NumN)-1 downto 0); -- Parallel or serial data
type ladata_t is array (Nlayer-1 downto 0) of std_logic_vector(calculate_max(NbitO)-1 downto 0); -- Always serial data
signal runO : std_logic_vector(Nlayer-1 downto 0); -- Output data validation of each layer (before activation function)
signal runI : std_logic_vector(Nlayer-1 downto 0); -- Input data validation of each layer
signal runA : std_logic_vector(Nlayer-1 downto 0); -- Auxiliar serial data validation of each layer
signal lodata : lodata_t; -- Output data of each layer (before activation function)
signal lidata : lodata_t; -- Input data of each layer
signal ladata : ladata_t; -- Auxiliar serial data of each layer
 
begin
 
-- Weight and bias memory layer selection (combinational mux):
process (addr(addr'length-1 downto addr'length-log2(Nlayer)), m_en, lrdata)
begin
for i in 0 to Nlayer-1 loop
if to_integer(unsigned(addr(addr'length-1 downto addr'length-log2(Nlayer)))) = i then
lm_en(i) <= m_en;
rdata <= lrdata(i);
else
lm_en(i) <= '0';
end if;
end loop;
-- Note: Attention with addresses greater than Nlayer when it is not a power of two
end process;
 
-- ATTENTION: without the following if generate, the first layer must have serial input ('S')
parallelize_inputs:
if ltype_v(0)(1) = 'P' generate
-- TODO: instantiate shift register with parallel output.
-- synthesis translate_off
assert ltype_v(0)(1) /= 'P'
report "Current version does not accept parallel inputs."
severity failure;
-- synthesis translate_on
-- TODO: delete above lines when instantiate shift register with parallel output.
end generate;
 
first_layer_SP:
if ltype_v(0) = "SP" generate
 
first_layerSP_top_inst: entity work.layerSP_top
generic map
(
NumN => NumN(0), -- Number of neurons in the first layer
NumIn => NumIn, ---- Number of inputs of the first layer
NbitIn => NbitIn, --- Bit width of the input data
NbitW => NbitW, ---- Bit width of weights and biases
NbitOut => NbitO(0), -- Bit width of the first layer output
lra_l => lra_l(0), -- Layer RAM address length of the first layer
wra_l => wra_l(0), -- Weight RAM address length of the first layer
bra_l => bra_l(0), -- Bias RAM address length of the first layer
LSbit => LSbit(0) -- Less significant bit of the first layer outputs
)
port map
(
-- Input ports
reset => reset,
clk => clk,
run_in => run_in, --- Input data validation of the first layer
m_en => lm_en(0), -- Weight and bias memory enable of the first layer
b_sel => addr((addr'length-log2(Nlayer))-1), -- Bias select. Selects between layer or bias memories
m_we => m_we, ----- Weight and bias memory write enable
inputs => inputs, --- Inputs of the first layer (serial data)
wdata => wdata, ---- Weight and bias memory write data
addr => addr(lra_l(0)-1 downto 0), -- Weight and bias memory address of the first layer
 
-- Output ports
run_out => runO(0), -- Output data validation of the first layer
rdata => lrdata(0), -- Weight and bias memory read data of the first layer
outputs => lodata(0)((NumN(0)*NbitO(0))-1 downto 0) -- Outputs of the first layer (parallel data)
);
end generate;
 
 
layers_insts:
for i in 1 to Nlayer-1 generate
 
-- If the previous layer (i-1) has parallel outputs and actual layer (i) has serial inputs, a serializer
-- is inserted before the activation function (i-1). So, parallel activations functions are avoided.
serializer:
if (ltype_v(i-1)(2) = 'P') and (ltype_v(i)(1) = 'S') generate
 
-- Instantiate shift-register with parallel load:
shiftreg_parallel_load: entity work.shiftreg_pl
generic map
(
Nreg => NumN(i-1), --- Number of registers in the shift-register corresponds with the number of neurons in the previous layer (i-1)
Nbit => NbitO(i-1) --- Bit width of the registers corresponds with the bit width of the outputs of the previous layer (i-1)
)
port map
(
reset => reset,
clk => clk,
run_in => runO(i-1), -- Input data validation of the shift-register comes from the output data validation of the previous layer (i-1)
inputs => lodata(i-1)((NumN(i-1)*NbitO(i-1))-1 downto 0), -- Parallel input data to the shift-register come from the previous layer (i-1)
run_out => runA(i-1), -- Output data validation goes to the activation function of the previous layer (i-1)
outputs => ladata(i-1)(NbitO(i-1)-1 downto 0) -- Output serial data go to the activation function of the previous layer (i-1)
);
 
-- Instantiate single activation function of the previous layer (i-1):
activation_function_inst: entity work.activation_function
generic map
(
f_type => ftype_v(i-1), -- Activation function type of the previous layer (i-1)
Nbit => NbitO(i-1) --- Bit width of the outputs of the previous layer (i-1)
)
port map
(
reset => reset,
clk => clk,
run_in => runA(i-1), -- Input data validation comes from the shift-register
inputs => ladata(i-1)(NbitO(i-1)-1 downto 0), -- Serial input data come from the shift-register
run_out => runI(i-1), -- Output data validation goes to the input data validation of this layer
outputs => lidata(i-1)(NbitO(i-1)-1 downto 0) -- Serial output data go to the inputs of this layer
);
 
end generate; -- serializer
 
-- If the previous layer (i-1) has serial outputs and actual layer (i) has serial inputs,
-- a single activation function is instantiated:
single_activation_function:
if (ltype_v(i-1)(2) = 'S') and (ltype_v(i)(1) = 'S') generate
 
-- Instantiate single activation function of the previous layer (i-1):
activation_function_inst: entity work.activation_function
generic map
(
f_type => ftype_v(i-1), -- Activation function type of the previous layer (i-1)
Nbit => NbitO(i-1) --- Bit width of the outputs of the previous layer (i-1)
)
port map
(
reset => reset,
clk => clk,
run_in => runO(i-1), -- Input data validation comes from the previous layer (i-1)
inputs => lodata(i-1)(NbitO(i-1)-1 downto 0), -- Serial input data come from the previous layer (i-1)
run_out => runI(i-1), -- Output data validation goes to the input data validation of this layer
outputs => lidata(i-1)(NbitO(i-1)-1 downto 0) -- Serial output data go to the inputs of this layer
);
 
end generate; -- single_activation_function
 
-- If the previous layer (i-1) has parallel outputs and actual layer (i) has parallel inputs,
-- multiple parallel activation functions are instantiated:
multiple_activation_functions:
if (ltype_v(i-1)(2) = 'P') and (ltype_v(i)(1) = 'P') generate
 
-- First of the parallel activation functions. This is the one which generates the output data validation
act_function_inst_0: entity work.activation_function
generic map
(
f_type => ftype_v(i-1), -- Activation function type of the previous layer (i-1)
Nbit => NbitO(i-1) --- Bit width of the outputs of the previous layer (i-1)
)
port map
(
reset => reset,
clk => clk,
run_in => runO(i-1), -- Input data validation comes from the previous layer (i-1)
inputs => lodata(i-1)(NbitO(i-1)-1 downto 0), -- First of the parallel input data wich comes from the previous layer (i-1)
run_out => runI(i-1), -- Output data validation goes to the input data validation of this layer
outputs => lidata(i-1)(NbitO(i-1)-1 downto 0) -- First of the parallel inputs of this layer
);
 
-- Rest of the parallel activation functions of the previous layer (i-1)
multiple_activation_function_insts:
for j in 1 to NumN(i-1)-1 generate
activation_function_inst: entity work.activation_function
generic map
(
f_type => ftype_v(i-1), -- Activation function type of the previous layer (i-1)
Nbit => NbitO(i-1) --- Bit width of the outputs of the previous layer (i-1)
)
port map
(
reset => reset,
clk => clk,
run_in => runO(i-1), -- Input data validation comes from the previous layer (i-1)
inputs => lodata(i-1)((NbitO(i-1)*(j+1))-1 downto NbitO(i-1)*j), -- Rest of the parallel input data which come from the previous layer (i-1)
run_out => open, ------- As only one output data validation is needed, the rest ones are left unconnected
outputs => lidata(i-1)((NbitO(i-1)*(j+1))-1 downto NbitO(i-1)*j) -- Rest of the parallel inputs of this layer
);
end generate;
 
end generate; -- multiple_activation_functions
 
-- If the previous layer (i-1) has serial outputs and actual layer (i) has parallel inputs, a parallelizer
-- is insested after the activation function (i-1):
parallelizer:
if (ltype_v(i-1)(2) = 'S') and (ltype_v(i)(1) = 'P') generate
 
-- Instantiate single activation function of the previous layer (i-1):
activation_function_inst: entity work.activation_function
generic map
(
f_type => ftype_v(i-1),
Nbit => NbitO(i-1)
)
port map
(
reset => reset,
clk => clk,
run_in => runO(i-1),
inputs => lodata(i-1)(NbitO(i-1)-1 downto 0),
run_out => runA(i-1),
outputs => ladata(i-1)(NbitO(i-1)-1 downto 0)
);
 
-- Instantiate shift-register with parallel unload:
shiftreg_parallel_unload: entity work.shiftreg_pu
generic map
(
Nreg => NumN(i-1), --- Number of registers in the shift-register corresponds with the number of neurons in the previous layer (i-1)
Nbit => NbitO(i-1) --- Bit width of the registers corresponds with the bit width of the outputs of the previous layer (i-1)
)
port map
(
reset => reset,
clk => clk,
run_in => runA(i-1), -- Input data validation comes from the activation function of the previous layer (i-1)
inputs => ladata(i-1)(NbitO(i-1)-1 downto 0), -- Serial input data
run_out => runO(i-1), -- Output data validation goes to the input data validation of this layer
outputs => lodata(i-1)((NumN(i-1)*NbitO(i-1))-1 downto 0) -- Parallel output data
);
 
end generate; -- parallelizer
 
-- Instance the layer (i), cases SP, PS or PP:
 
-- Serial-input parallel-output layer:
SP_case:
if ltype_v(i) = "SP" generate
layerSP_top_inst: entity work.layerSP_top
generic map
(
NumN => NumN(i), --- Number of neurons in layer (i)
NumIn => NumN(i-1), -- Number of inputs, is the number of neurons in previous layer (i-1)
NbitIn => NbitO(i-1), -- Bit width of the input data, is the bit width of output data of layer (i-1)
NbitW => NbitW, ----- Bit width of weights and biases
NbitOut => NbitO(i), -- Bit width of layer (i) output
lra_l => lra_l(i), -- Layer RAM address length of layer (i)
wra_l => wra_l(i), -- Weight RAM address length of layer (i)
bra_l => bra_l(i), -- Bias RAM address length of layer (i)
LSbit => LSbit(i) --- Less significant bit of layer (i) outputs
)
port map
(
-- Input ports
reset => reset,
clk => clk,
run_in => runI(i-1), -- Input data validation of this layer
m_en => lm_en(i), -- Weight and bias memory enable of this layer
b_sel => addr((addr'length-log2(Nlayer))-1), -- Bias select. Selects between layer or bias memories
m_we => m_we, ------ Weight and bias memory write enable
inputs => lidata(i-1)(NbitO(i-1)-1 downto 0), -- Inputs of this layer (serial data)
wdata => wdata, ----- Weight and bias memory write data
addr => addr(lra_l(i)-1 downto 0), -- Weight and bias memory address of this layer
 
-- Output ports
run_out => runO(i), -- Output data validation of this layer
rdata => lrdata(i), -- Weight and bias memory read data of this layer
outputs => lodata(i)((NumN(i)*NbitO(i))-1 downto 0) -- Outputs of this layer (parallel data)
);
end generate;
 
-- Parallel-input serial-output layer:
PS_case:
if ltype_v(i) = "PS" generate
layerPS_top_inst: entity work.layerPS_top
generic map
(
NumN => NumN(i), --- Number of neurons in layer (i)
NumIn => NumN(i-1), -- Number of inputs, is the number of neurons in previous layer (i-1)
NbitIn => NbitO(i-1), -- Bit width of the input data, is the bit width of output data of layer (i-1)
NbitW => NbitW, ----- Bit width of weights and biases
NbitOut => NbitO(i), -- Bit width of layer (i) output
lra_l => lra_l(i), -- Layer RAM address length of layer (i)
wra_l => wra_l(i), -- Weight RAM address length of layer (i)
bra_l => bra_l(i), -- Bias ram address length of layer (i)
LSbit => LSbit(i) --- Less significant bit of layer (i) outputs
)
port map
(
-- Input ports
reset => reset,
clk => clk,
run_in => runI(i-1), -- Input data validation of this layer
m_en => lm_en(i), -- Weight and bias memory enable of this layer
b_sel => addr((addr'length-log2(Nlayer))-1), -- Bias select. Selects between layer or bias memories
m_we => m_we, ------ Weight and bias memory write enable
inputs => lidata(i-1)((NumN(i-1)*NbitO(i-1))-1 downto 0), -- Inputs of this layer (parallel data)
wdata => wdata, ----- Weight and bias memory write data
addr => addr(lra_l(i)-1 downto 0), -- Weight and bias memory address of this layer
 
-- Output ports
run_out => runO(i), -- Output data validation of this layer
rdata => lrdata(i), -- Weight and bias memory read data of this layer
outputs => lodata(i)(NbitO(i)-1 downto 0) -- Outputs of this layer (serial data)
);
end generate;
 
-- Parallel-input parallel-output layer:
PP_case:
if ltype_v(i) = "PP" generate
-- TODO: instance a full parallel layer. At current version this layer type has not been developed.
-- synthesis translate_off
assert l_type(i) /= "PP"
report "Current version does not accept parallel-input parallel-output (PP) layer type."
severity failure;
-- synthesis translate_on
-- TODO: delete above lines when instantiate the parallel-input parallel-output layer.
end generate;
 
end generate; -- layers_insts
 
-- If the last layer (Nlayer-1) has parallel outputs, a serializer is inserted before the activation function:
last_serializer:
if (ltype_v(Nlayer-1)(2) = 'P') generate
 
-- Instantiate shift-register with parallel load:
last_shiftreg_parallel_load: entity work.shiftreg_pl
generic map
(
Nreg => NumN(Nlayer-1), --- Number of registers corresponds with the number of neurons in the last layer (Nlayer-1)
Nbit => NbitO(Nlayer-1) --- Bit width of the registers corresponds with the bit width of the outputs of the last layer (Nlayer-1)
)
port map
(
reset => reset,
clk => clk,
run_in => runO(Nlayer-1), -- Input data validation comes from the output data validation of the last layer (Nlayer-1)
inputs => lodata(Nlayer-1)((NumN(Nlayer-1)*NbitO(Nlayer-1))-1 downto 0), -- Parallel input data come from the last layer
run_out => runA(Nlayer-1), -- Output data validation goes to the last activation function (Nlayer-1)
outputs => ladata(Nlayer-1)(NbitO(Nlayer-1)-1 downto 0) -- Serial output data go to the last activation function
);
 
last_activation_function_inst: entity work.activation_function
generic map
(
f_type => ftype_v(Nlayer-1), -- Activation function type of the last layer (Nlayer-1)
Nbit => NbitO(Nlayer-1) --- Bit width of the outputs of the last layer (Nlayer-1)
)
port map
(
reset => reset,
clk => clk,
run_in => runA(Nlayer-1), -- Input data validation comes from the shift-register output validation
inputs => ladata(Nlayer-1)(NbitO(Nlayer-1)-1 downto 0), -- Serial input data come from the shift-register
run_out => run_out, --------- Output data validation of the network
outputs => outputs ---------- Outputs of the network (serial data)
);
 
end generate; -- last_serializer
 
-- If the las layer has serial outputs:
last_simple_activation_function:
if (ltype_v(Nlayer-1)(2) = 'S') generate
last_activation_function_inst: entity work.activation_function
generic map
(
f_type => ftype_v(Nlayer-1), -- Activation function type of the last layer (Nlayer-1)
Nbit => NbitO(Nlayer-1) --- Bit width of the outputs of the last layer (Nlayer-1)
)
port map
(
reset => reset,
clk => clk,
run_in => runO(Nlayer-1), -- Input data validation comes from the last layer (Nlayer-1) output validation
inputs => lodata(Nlayer-1)(NbitO(Nlayer-1)-1 downto 0), -- Inputs come from the outputs of the last layer (serial data)
run_out => run_out, --------- Output data validation of the network
outputs => outputs ---------- Outputs of the network (serial data)
);
end generate;
 
end config_structural;
/RTL_VHDL_files/adder_tree.vhd
0,0 → 1,134
----------------------------------------------------------------------------------
-- Company:
-- Engineer:
--
-- Create Date: 15:27:42 06/20/2013
-- Design Name: Configurable ANN
-- Module Name: adder_tree - Behavioral
-- Project Name:
-- Target Devices:
-- Tool versions:
-- Description: Recursive adder tree
--
-- Dependencies:
--
-- Revision:
-- Revision 0.01 - File Created
-- Additional Comments:
--
----------------------------------------------------------------------------------
 
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use ieee.numeric_std.all;
 
 
entity adder_tree is
generic
(
NumIn : integer := 9; -- Number of inputs
Nbit : integer := 12 -- Bit width of the input data
);
 
port
(
-- Input ports
reset : in std_logic;
clk : in std_logic;
en : in std_logic; -- Enable
inputs : in std_logic_vector((Nbit*NumIn)-1 downto 0); -- Input data
 
-- Output ports
en_out : out std_logic; -- Output enable (output data validation)
output : out std_logic_vector(Nbit-1 downto 0) -- Output of the tree adder
);
end adder_tree;
 
architecture Behavioral of adder_tree is
 
constant NumIn2 : integer := NumIn/2; -- Number of imputs of the next adder tree layer
 
signal next_en : std_logic := '0'; -- Next adder tree layer enable
signal res : std_logic_vector((Nbit*((NumIn2)+(NumIn mod 2)))-1 downto 0); -- Partial results
 
signal resL_reg : std_logic_vector((Nbit*NumIn2)-1 downto 0);
signal resH_reg : std_logic_vector(Nbit-1 downto 0);
begin
 
-- Additions:
add_proc:
process (clk) -- Synchronous to allow pipeline
begin
if (clk'event and clk = '1') then
if (reset = '1') then
resL_reg <= (others => '0');
else
if (en = '1') then
-- Addition of inputs (2*i y 2*i+1), resulting in NumIn/2 outputs of this layer of the adder tree:
for i in ((NumIn2)-1) downto 0 loop
resL_reg((Nbit*(i+1))-1 downto Nbit*i) <= std_logic_vector( signed(inputs((Nbit*((2*i)+1))-1 downto Nbit*2*i)) + signed(inputs((Nbit*((2*i)+2))-1 downto Nbit*((2*i)+1))) );
end loop;
end if;
end if;
end if;
end process;
 
res((Nbit*NumIn2)-1 downto 0) <= resL_reg;
 
-- Register the uneven input (if needed):
uneven_register:
if (NumIn mod 2 = 1) generate
process (clk)
begin
if (clk'event and clk = '1') then
if (reset = '1') then
resH_reg <= (others => '0');
else
if (en = '1') then
resH_reg <= inputs((Nbit*NumIn)-1 downto Nbit*(NumIn-1));
end if;
end if;
end if;
end process;
res((Nbit*((NumIn2)+1))-1 downto Nbit*(NumIn2)) <= resH_reg;
end generate;
 
process (clk)
begin
if (clk'event and clk = '1') then
if reset = '1' then
next_en <= '0';
else
next_en <= en; -- Enable is delayed 1 cycle for the next layer of the adder tree
end if;
end if;
end process;
 
recursion:
if (NumIn > 2) generate
 
sub_adder_tree: entity work.adder_tree
generic map
(
NumIn => (NumIn2)+(NumIn mod 2),
Nbit => Nbit
)
port map
(
clk => clk,
reset => reset,
en => next_en,
inputs => res,
en_out => en_out,
output => output -- Solution is passed from the sub-adder trees to the top adder tree
);
end generate;
 
trivial_solution:
if (NumIn = 2) generate
en_out <= next_en;
output <= res; -- Assign the final result to the adder tree output
end generate;
 
end Behavioral;
 
/API_header_files/ann.h
0,0 → 1,108
/*
* ann.h
*
* Description: This header file helps programmers to access correctly to ANN IP core weight and bias memories.
* User must edit ANN_BASEADDRESS, NLAYER, and definitions of layer inputs and neurons.
* MAX_MUL macro can be calculated manually, or relay on automated calculation if NLAYER<=4.
* A Wyb(x) macro must be declared on the code per layer of the ANN IP core.
* Those macro declare the 2D weight arrays and 1D bias arrays needed to access ANN IP core memories.
*
* Created on: 17/05/2016
* Author: David A
*/
 
#ifndef ANN_H
#define ANN_H
 
/* Base address of weight and bias memories of the ANN IP core */
// Example for Xilinx's SDK using the example wrapper for Vivado. Correct user base address must be defined here:
#define ANN_BASEADDRESS XPAR_ANN_0_WYB_S_AXI_BASEADDR
 
/* Number of layers */
#define NLAYER 4
 
/* Number of inputs and neurons of each layer */
// Add or remove as many layers as needed:
#define NumIn0 16
#define NumN0 13
#define NumIn1 NumN0
#define NumN1 6
#define NumIn2 NumN1
#define NumN2 13
#define NumIn3 NumN2
#define NumN3 16
 
/* (optional) Redefine number of neurons in the last layer as number of outputs */
#define NumOut NumN3
 
/* Next-power-of-two of inputs and neurons of each layer */
// Define a next-power-of-two macro per parameter in the number of inputs and neurons of each layer list:
// NOTE: next_2power(x) macro function calculates the next-power-of-two of x for x<=256. If x>256 it still returns 256.
#define NumN0_b2 next_2power(NumN0)
#define NumIn0_b2 next_2power(NumIn0)
#define NumN1_b2 next_2power(NumN1)
#define NumIn1_b2 next_2power(NumIn1)
#define NumN2_b2 next_2power(NumN2)
#define NumIn2_b2 next_2power(NumIn2)
#define NumN3_b2 next_2power(NumN3)
#define NumIn3_b2 next_2power(NumIn3)
 
/* Maximum multiplication of the next-power-of-two of inputs by the next-power-of-two of neurons */
// MAX_MUL macro can be defined manually, or automatically if NLAYER<=4.
// To define it manually user must determine which layer has the maximum of these products, and edit MAX_MUL definition:
// In the example is layer 0 (or layer 3 with same MAX_MUL), 256 > 128
// NumIn0 = 16 ==> NumIn0_b2 = 16
// NumN0 = 13 ==> NumN0_b2 = 16
// NumN0_b2*NumIn0_b2=16*16=256
// NumIn1 = 13 ==> NumIn1_b2 = 16
// NumN1 = 6 ==> NumN1_b2 = 8
// NumN1_b2*NumIn1_b2=16*8=128
// NumIn2 = 6 ==> NumIn2_b2 = 8
// NumN2 = 13 ==> NumN2_b2 = 16
// NumN2_b2*NumIn2_b2=8*16=128
// NumIn3 = 13 ==> NumIn3_b2 = 16
// NumN3 = 16 ==> NumN3_b2 = 16
// NumN3_b2*NumIn3_b2=16*16=256
 
//#define MAX_MUL (NumN0_b2*NumIn0_b2) //Uncomment and edit this manual definition of MAX_MUL for manual definition of MAX_MUL
 
// Automated calculation of MAX_MUL for NLAYER<=4:
#ifndef MAX_MUL
#if NLAYER > 4
#error MAX_MUL cannot be automatically calculated if NLAYER>4. Define MAX_MUL manually or complete the automaed calculation of MAX_MUL preprocessor code.
#endif
#define max2(x,y) ( ((x) < (y)) ? y : x )
#define MAX_0 (NumN0_b2*NumIn0_b2)
#if NLAYER > 1
#define MAX_1 max2((NumN1_b2*NumIn1_b2),MAX_0)
#if NLAYER > 2
#define MAX_2 max2((NumN2_b2*NumIn2_b2),MAX_1)
#if NLAYER == 4
#define MAX_MUL max2((NumN3_b2*NumIn3_b2),MAX_2)
#elif NLAYER == 3
#define MAX_MUL MAX_2
#endif //NLAYER == 4
#elif NLAYER == 2
#define MAX_MUL MAX_1
#endif //NLAYER > 2
#else //NLAYER == 1
#define MAX_MUL MAX_0
#endif //NLAYER > 1
#endif
 
/* Definition of the macro function next_2power(x) */
// It calculates the next-power-of-two of x for x<=256. If x>256 it still returns 256.
#define next_2power(x) ( ((x) > 128) ? 256 : ((x) > 64) ? 128 : ((x) > 32) ? 64 : ((x) > 16) ? 32 : ((x) > 8) ? 16 : ((x) > 4) ? 8 : ((x) > 2) ? 4 : ((x) > 1) ? 2 : 1 )
 
/* When this macro is expanded for a particular layer x, it declares pointers to the weight 2D array, bias 1D array, and unused spaces; and initializes them with a proper address */
// Declare a Wvb(x) macro per layer on the user's ANN, each time with a different layer number x, from 0 to NLAYER-1.
// Example: For a two layer ANN (NLAYER 2)
// Wvb(0) // declares and initializes int (*W0)[NumN0][NumIn0_b2], (*b0)[NumN0];
// Wyb(1) // declares and initializes int (*W1)[NumN1][NumIn1_b2], (*b1)[NumN1];
// The unused spaces (*NOT_EXISTx0) and (*NOT_EXISTx1) are declared in order to prevent the use of these space address for other proposes. Although it does not assure it will not be used.
#define Wyb(x) volatile int (*W##x)[NumN##x][NumIn##x##_b2] = (void *) ANN_BASEADDRESS + MAX_MUL*2*x*sizeof(int), \
(*NOT_EXIST##x##0)[MAX_MUL-NumN##x*NumIn##x##_b2] = (void *) ANN_BASEADDRESS + (NumN##x*NumIn##x##_b2 + MAX_MUL*2*x)*sizeof(int), \
(*b##x)[NumN##x] = (void *) ANN_BASEADDRESS + MAX_MUL*(2*x+1)*sizeof(int), \
(*NOT_EXIST##x##1)[MAX_MUL-NumN##x] = (void *) ANN_BASEADDRESS + (NumN##x + MAX_MUL*(2*x+1))*sizeof(int);
 
#endif // ANN_H

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.