URL
https://opencores.org/ocsvn/artificial_neural_network/artificial_neural_network/trunk
Subversion Repositories artificial_neural_network
Compare Revisions
- This comparison shows the changes necessary to convert path
/
- from Rev 1 to Rev 2
- ↔ Reverse comparison
Rev 1 → Rev 2
/artificial_neural_network/trunk/RTL_VHDL_files/layerPS.vhd
0,0 → 1,150
---------------------------------------------------------------------------------- |
-- Company: CEI |
-- Engineer: David Aledo |
-- |
-- Create Date: 11:24:24 05/28/2013 |
-- Design Name: Configurable ANN |
-- Module Name: layerPS - arq |
-- Project Name: |
-- Target Devices: |
-- Tool versions: |
-- Description: basic and parametrizable neuron layer for hardware artificial |
-- neural networks. Paralel input and serial output. |
-- It implemnts one neuron reused to calculate all. |
-- |
-- Dependencies: |
-- |
-- Revision: |
-- Revision 0.01 - File Created |
-- Additional Comments: |
-- |
---------------------------------------------------------------------------------- |
|
library ieee; |
use ieee.std_logic_1164.all; |
use ieee.numeric_std.all; |
|
-- Deprecated XPS library: -- Needed functions have been implemented in layers_pkg |
--library proc_common_v3_00_a; |
--use proc_common_v3_00_a.proc_common_pkg.all; |
|
use work.layers_pkg.all; |
|
|
entity layerPS is |
|
generic |
( |
NumN : natural := 64; -- Number of neurons of the layer |
NumIn : natural := 8; -- Number of inputs of each neuron |
NbitIn : natural := 12; -- Bit width of the input data |
NbitW : natural := 8; -- Bit width of weights and biases |
NbitOut : natural := 8; -- Bit width of the output data |
LSbit : natural := 4 -- Less significant bit of the outputs |
); |
|
port |
( |
-- Input ports |
reset : in std_logic; |
clk : in std_logic; |
en : in std_logic; -- First step enable |
en2 : in std_logic; -- Second stage enable |
en_r : in std_logic; -- Output register enable |
inputs : in std_logic_vector((NbitIn*NumIn)-1 downto 0); -- Input data (parallel) |
Wyb : in std_logic_vector((NbitW*NumIn)-1 downto 0); -- Weight vectors |
bias : in std_logic_vector(NbitW-1 downto 0); --------- Bias |
|
-- Output ports |
en_out : out std_logic; -- Output data validation |
outputs : out std_logic_vector(NbitOut-1 downto 0) -- Output data (serial) |
); |
end layerPS; |
|
|
|
architecture arq of layerPS is |
|
constant NbOvrf : natural := log2(NumIn); -- Extra bits avoid overflow in adders |
constant sat_max : signed(NbitIn+NbitW+NbOvrf downto 0) := (NbitIn+NbitW+NbOvrf downto LSbit+NbitOut-1 => '0') & (LSbit+NbitOut-2 downto 0 => '1'); -- E.g. "0001111" |
constant sat_min : signed(NbitIn+NbitW+NbOvrf downto 0) := (NbitIn+NbitW+NbOvrf downto LSbit+NbitOut-1 => '1') & (LSbit+NbitOut-2 downto 0 => '0'); -- E.g. "1110000" |
|
type v_res is array(NumIn-1 downto 0) of signed((NbitIn+NbitW)-1 downto 0); -- Array type for results from multipliers |
|
signal res : v_res := (others => (others => '0')); -- Results from multipliers |
signal sum : std_logic_vector(NbitIn+NbitW+NbOvrf downto 0) := (others => '0'); -- Addition result |
signal reg : std_logic_vector(NbitOut-1 downto 0) := (others => '0'); ----------- Output register |
signal sum_aux : std_logic_vector(((NbitIn+NbitW+NbOvrf+1)*(NumIn+1))-1 downto 0); -- Pipeline registers for adders |
|
begin |
|
muls: -- Instances as multipliers as NumIn |
for i in (NumIn-1) downto 0 generate |
process (clk) -- Multiplier |
begin |
if (clk'event and clk = '1') then |
if (reset = '1') then |
res(i) <= (others => '0'); |
else |
if (en = '1') then |
-- Multiplies every input with its weight: |
res(i) <= signed(inputs((NbitIn*(i+1))-1 downto NbitIn*i)) * signed(Wyb((NbitW*(i+1))-1 downto NbitW*i)); |
end if; |
end if; |
end if; |
end process; |
end generate; |
|
asign_adder_tree_inputs: |
for i in NumIn-1 downto 0 generate |
sum_aux(((NbitIn+NbitW+NbOvrf+1)*(i+1))-1 downto (NbitIn+NbitW+NbOvrf+1)*i) <= std_logic_vector(resize(res(i),NbitIn+NbitW+NbOvrf+1)); |
end generate; |
sum_aux(((NbitIn+NbitW+NbOvrf+1)*(NumIn+1))-1 downto (NbitIn+NbitW+NbOvrf+1)*NumIn) <= std_logic_vector(resize(signed(bias),NbitIn+NbitW+NbOvrf+1)); -- Bias is added placed in the last position |
|
recursive_adder_tree: entity work.adder_tree |
generic map |
( |
NumIn => NumIn+1, -- +bias |
Nbit => NbitIn+NbitW+NbOvrf+1 |
) |
port map |
( |
clk => clk, |
reset => reset, |
en => en2, |
inputs => sum_aux, |
en_out => en_out, |
output => sum |
); |
|
|
process(clk) |
begin |
if(rising_edge(clk)) then |
if(reset = '1') then -- Synchronous reset, active high |
reg <= (others => '0'); |
else |
|
if en_r = '1' then -- Output register enable (clipping) |
|
if signed(sum) > sat_max then |
-- Saturating result to the maximum value: |
reg <= '0' & (NbitOut-2 downto 0 => '1'); |
elsif signed(sum) < sat_min then |
-- Saturating result to the minimum value: |
reg <= '1' & (NbitOut-2 downto 0 => '0'); |
else |
-- Configured window of result bits are assigned to the output: |
reg <= sum(LSbit+NbitOut-1 downto LSbit); |
end if; |
|
end if; |
end if; |
|
end if; |
end process; |
|
-- Assigns output register to output data port: |
outputs <= reg; |
|
end arq; |
/artificial_neural_network/trunk/RTL_VHDL_files/layerSP.vhd
0,0 → 1,140
---------------------------------------------------------------------------------- |
-- Company: CEI |
-- Engineer: David Aledo |
-- |
-- Create Date: 11:24:24 05/28/2013 |
-- Design Name: Configurable ANN |
-- Module Name: layerSP - arq |
-- Project Name: |
-- Target Devices: |
-- Tool versions: |
-- Description: basic and parametrizable neuron layer for hardware artificial |
-- neural networks. Serial input and parallel output. |
-- Implemented by MAC. |
-- |
-- Dependencies: |
-- |
-- Revision: |
-- Revision 0.01 - File Created |
-- Additional Comments: |
-- |
---------------------------------------------------------------------------------- |
|
-- NOTE: To optimize MAC, inputs should be registered, and should be checked that this register is implemented as DSP input register |
|
library ieee; |
use ieee.std_logic_1164.all; |
use ieee.numeric_std.all; |
|
use work.layers_pkg.all; |
|
|
entity layerSP is |
|
generic |
( |
NumN : natural := 8; -- Number of neurons of the layer |
NumIn : natural := 64; -- Number of inputs of each neuron (data account before restart Acc) |
NbitIn : natural := 8; -- Bit width of the input data |
NbitW : natural := 8; -- Bit width of weights and biases |
NbitOut : natural := 12; -- Bit width of the output data |
LSbit : natural := 4 -- Less significant bit of the outputs |
); |
|
port |
( |
-- Input ports |
reset : in std_logic; |
clk : in std_logic; |
en : in std_logic; -- First step enable (multiplication of MAC) |
en2 : in std_logic; -- Second stage enable (accumulation of MAC) |
en_r : in std_logic; -- Shift register enable |
a0 : in std_logic; -- Signal to load accumulators with the multiplication result |
inputs : in std_logic_vector(NbitIn-1 downto 0); -- Input data (serial) |
Wyb : in std_logic_vector((NbitW*NumN)-1 downto 0); -- Weight vectors |
bias : in std_logic_vector((NbitW*NumN)-1 downto 0); -- Bias vector |
|
-- Output ports |
outputs : out std_logic_vector((NbitOut*NumN)-1 downto 0) -- Output data (parallel) |
); |
end layerSP; |
|
|
|
architecture arq of layerSP is |
|
constant NbOvrf : natural := log2(NumIn); -- Extra bits in acc to avoid overflow |
constant sat_max : signed(NbitIn+NbitW+NbOvrf downto 0) := (NbitIn+NbitW+NbOvrf downto LSbit+NbitOut-1 => '0') & (LSbit+NbitOut-2 downto 0 => '1'); -- E.g. "0001111" |
constant sat_min : signed(NbitIn+NbitW+NbOvrf downto 0) := (NbitIn+NbitW+NbOvrf downto LSbit+NbitOut-1 => '1') & (LSbit+NbitOut-2 downto 0 => '0'); -- E.g. "1110000" |
|
type v_res is array(NumN-1 downto 0) of std_logic_vector(NbitIn+NbitW+NbOvrf downto 0); -- Array type for MAC results |
type v_reg is array(NumN-1 downto 0) of std_logic_vector(NbitOut-1 downto 0); -- Array type for shift register |
|
signal res : v_res; -- MAC results |
signal reg : v_reg := (others => (others => '0')); -- Output register |
|
begin |
|
macs: -- Instances as MAC as NumN |
for i in (NumN-1) downto 0 generate |
mac_i: entity work.mac |
generic map |
( |
dirload => FALSE, |
NbOvrf => NbOvrf, |
NbitIn => NbitIn, |
NbitC => NbitW |
) |
port map |
( |
CLK => clk, |
RST => reset, |
A => inputs, |
B => Wyb((NbitW*(i+1))-1 downto NbitW*i), |
C => bias((NbitW*(i+1))-1 downto NbitW*i), |
P => res(i), |
CE1 => en, |
CE2 => en2, |
LOAD => a0 |
); |
end generate; |
|
process(clk) |
begin |
if rising_edge(clk) then |
if reset = '1' then -- Synchronous reset, active high |
reg <= (others => (others => '0')); |
else |
|
if en_r = '1' then -- Output register enable (clipping) |
|
for i in 0 to NumN-1 loop -- As many results as NumN are loaded in parallel |
|
if signed(res(i)) > sat_max then |
-- Saturating result to the maximum value: |
reg(i) <= '0' & (NbitOut-2 downto 0 => '1'); |
elsif signed(res(i)) < sat_min then |
-- Saturating result to the minimum value: |
reg(i) <= '1' & (NbitOut-2 downto 0 => '0'); |
else |
-- Configured window of result bits are assigned to the output: |
reg(i) <= res(i)(LSbit+NbitOut-1 downto LSbit); |
end if; |
|
end loop; |
|
end if; |
end if; |
|
end if; |
end process; |
|
-- Assigns output registers to output data port: |
process (reg) |
begin |
for i in 0 to NumN-1 loop |
outputs((NbitOut*(i+1))-1 downto NbitOut*i) <= reg(i); |
end loop; |
end process; |
|
end arq; |
/artificial_neural_network/trunk/RTL_VHDL_files/shiftreg_pu.vhd
0,0 → 1,132
---------------------------------------------------------------------------------- |
-- Company: |
-- Engineer: |
-- |
-- Create Date: 18:03:58 05/14/2014 |
-- Design Name: Configurable ANN |
-- Module Name: shiftreg_pu - Behavioral |
-- Project Name: |
-- Target Devices: |
-- Tool versions: |
-- Description: Shift register with parallel unload. |
-- |
-- Dependencies: |
-- |
-- Revision: |
-- Revision 0.01 - File Created |
-- Additional Comments: |
-- |
---------------------------------------------------------------------------------- |
library IEEE; |
use IEEE.STD_LOGIC_1164.ALL; |
|
|
entity shiftreg_pu is |
generic |
( |
Nreg : natural := 64; ---- Number of elements |
Nbit : natural := 8 ---- Bit width |
); |
|
port |
( |
-- Input ports |
reset : in std_logic; |
clk : in std_logic; |
run_in : in std_logic; -- Start and input data validation |
inputs : in std_logic_vector(Nbit-1 downto 0); -- Input data (serial) |
-- Output ports |
run_out : out std_logic; -- Output data validation, run_in for the next layer |
outputs : out std_logic_vector((Nbit*Nreg)-1 downto 0) -- Output data (parallel) |
); |
end shiftreg_pu; |
|
architecture Behavioral of shiftreg_pu is |
|
signal count : integer range 0 to Nreg-1; |
signal en_r : std_logic; --- Shift register enable |
signal unload : std_logic; -- Unload signal to unload the shift register onto the output register |
type dreg_type is array (Nreg-1 downto 0) of std_logic_vector(Nbit-1 downto 0); -- Shift register type |
signal dreg : dreg_type; ---- Shift register |
type reg_st_type is (idle, counting); -- Register state type |
signal reg_st : reg_st_type; -- Register state |
|
begin |
|
-- Shift register with parallel unload: |
process (clk) |
begin |
if clk'event and clk = '1' then |
if reset = '1' then |
dreg <= (others=> (others => '0')); |
else |
if en_r = '1' then -- Shift register enable |
dreg(Nreg-1) <= inputs; -- Every cycle a new input data is loaded |
if count /= 0 then -- When count = 0, shift register is unloaded; other cycles, register is shifted |
shift: |
for i in 1 to Nreg-1 loop |
dreg(i-1) <= dreg(i); |
end loop; |
end if; |
end if; |
end if; |
end if; |
end process; |
|
process (clk) -- Output register to mantain constant output the data for pipeline |
begin |
if clk'event and clk = '1' then |
if reset = '1' then |
outputs <= (others=> '0'); |
else |
if unload = '1' then -- Parallel unload |
for i in 0 to Nreg-1 loop |
outputs((Nbit*(i+1))-1 downto Nbit*i) <= dreg(i); |
end loop; |
end if; |
end if; |
end if; |
end process; |
|
-- Shift register control |
process (clk) |
begin |
if clk'event and clk = '1' then |
if reset = '1' then |
count <= 0; |
reg_st <= idle; |
run_out <= '0'; |
unload <= '0'; |
else |
run_out <= unload; |
case reg_st is |
when idle => |
if run_in = '1' then |
reg_st <= counting; |
else |
reg_st <= idle; |
end if; |
when counting => |
if count = (Nreg-1) then |
reg_st <= idle; |
count <= 0; |
unload <= '1'; |
else |
reg_st <= counting; |
count <= count +1; |
end if; |
end case; |
end if; |
end if; |
end process; |
process (reg_st) |
begin |
if reg_st = counting then |
en_r <= '1'; |
else |
en_r <= '0'; |
end if; |
end process; |
|
end Behavioral; |
|
/artificial_neural_network/trunk/RTL_VHDL_files/layers_pkg.vhd
0,0 → 1,301
---------------------------------------------------------------------------------- |
-- Company: CEI - UPM |
-- Engineer: David Aledo |
-- |
-- Create Date: 01.10.2015 |
-- Design Name: Configurable ANN |
-- Pakage Name: layers_pkg |
-- Project Name: |
-- Target Devices: |
-- Tool Versions: |
-- Description: define array types for generics, functions to give them values from |
-- string generics, and other help functions |
-- Dependencies: |
-- |
-- Revision: |
-- Revision 0.01 - File Created |
-- Additional Comments: |
-- |
---------------------------------------------------------------------------------- |
|
library IEEE; |
use IEEE.STD_LOGIC_1164.all; |
|
--library proc_common_v3_00_a; -- Deprecated libray from XPS tool |
--use proc_common_v3_00_a.proc_common_pkg.all; |
|
package layers_pkg is |
|
-- Array types for generics: |
type int_vector is array (natural range <>) of integer; -- Generic integer vector |
type ltype_vector is array (integer range <>) of string(1 to 2); -- Layer type vector |
type ftype_vector is array (integer range <>) of string(1 to 6); -- Activation function type vector |
-- Note: these strings cannot be unconstrined |
|
-- Functions to assign values to vector types from string generics: |
-- Arguments: |
-- str_v : string to be converted |
-- n : number of elements of the vector |
-- Return: assigned vector |
function assign_ints(str_v : string; n : integer) return int_vector; |
function assign_ltype(str_v : string; n : integer) return ltype_vector; |
function assign_ftype(str_v : string; n : integer) return ftype_vector; |
|
-- Other functions: |
|
-- Argument: c : character to be checked |
-- Return: TRUE if c is 0, 1, 2, 3, 4, 5, 6, 7, 8 or 9 |
function is_digit(c : character) return boolean; |
|
-- Base two logarithm for int_vector: |
-- Arguments: |
-- v : integer vector |
-- n : number of elements of the vector |
-- Return : integer vector of the base two logarithms of each elment of v |
function log2(v : int_vector; n : integer) return int_vector; |
|
-- Calculate the total weight and bias memory address length: |
-- Arguments: |
-- NumIn : number of inputs of the network |
-- NumN : number of neurons of each layer |
-- n : number of layers (number of elements of NumN) |
-- Return: total weight and bias memory address length (integer) |
function calculate_addr_l(NumIn : integer; NumN : int_vector; n : integer) return integer; |
|
-- Assign the weight and bias memory address lenght of each layer: |
-- Arguments: |
-- NumIn : number of inputs of the network |
-- NumN : number of neurons of each layer |
-- n : number of layers (number of elements of NumN and the return integer vector) |
-- Return: weight and bias memory address lenght of each layer (integer vector) |
function assign_addrl(NumIn : integer; NumN : int_vector; n : integer) return int_vector; |
|
-- Calculate the maximum of the multiplications of two vectors element by element |
-- Arguments: |
-- v1 : input vector 1 |
-- v2 : input vector 2 |
-- Return: maximum of the multiplications of two vectors element by element |
function calculate_max_mul(v1 : int_vector; v2 : int_vector) return integer; |
|
-- Returns the max value of the input integer vector: |
function calculate_max(v : int_vector) return integer; |
|
-- Adding needed functions from the deprecated libray proc_common_v3_00_a: |
function max2 (num1, num2 : integer) return integer; |
function log2(x : natural) return integer; |
|
end layers_pkg; |
|
package body layers_pkg is |
|
function max2 (num1, num2 : integer) return integer is |
begin |
if num1 >= num2 then |
return num1; |
else |
return num2; |
end if; |
end function max2; |
|
-- Function log2 -- returns number of bits needed to encode x choices |
-- x = 0 returns 0 |
-- x = 1 returns 0 |
-- x = 2 returns 1 |
-- x = 4 returns 2, etc. |
function log2(x : natural) return integer is |
variable i : integer := 0; |
variable val: integer := 1; |
begin |
if x = 0 then |
return 0; |
else |
for j in 0 to 29 loop -- for loop for XST |
if val >= x then null; |
else |
i := i+1; |
val := val*2; |
end if; |
end loop; |
-- Fix per CR520627 XST was ignoring this anyway and printing a |
-- Warning in SRP file. This will get rid of the warning and not |
-- impact simulation. |
-- synthesis translate_off |
assert val >= x |
report "Function log2 received argument larger" & |
" than its capability of 2^30. " |
severity failure; |
-- synthesis translate_on |
return i; |
end if; |
end function log2; |
|
|
function is_digit(c : character) return boolean is |
begin |
case c is |
when '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => return true; |
when others => return false; |
end case; |
end is_digit; |
|
-- Assign values to a integer vector from a string: |
-- Arguments: |
-- str_v : string to be converted |
-- n : number of elements of the vector |
-- Return: assigned integer vector |
function assign_ints(str_v : string; n : integer) return int_vector is |
variable i : integer := n-1; ---- element counter |
variable d_power : integer := 1; -- decimal power |
variable ret : int_vector(n-1 downto 0) := (others => 0); -- return value |
begin |
for c in str_v'length downto 1 loop -- read every character in str_v |
if str_v(c) = ' ' then -- a space separates a new element |
assert i > 0 |
report "Error in assign_ints: number of elements in string is greater than n." |
severity error; |
i := i -1; -- decrease element counter to start calculate a new element |
d_power := 1; -- reset the decimal power to 1 |
else |
assert is_digit(str_v(c)) -- assert the new character is a digit |
report "Error in assign_ints: character " & str_v(c) & " is not a digit." |
severity error; |
-- add the value of the new charactar to the element calculation ( + ("<new_digit>" - "0") * d_power): |
ret(i) := ret(i) + (character'pos(str_v(c))-character'pos('0'))*d_power; |
d_power := d_power*10; -- increase the decimal power for the next digit |
end if; |
end loop; |
assert i = 0 |
report "Error in assign_ints: number of elements in string is less than n." |
severity error; |
return ret; |
end assign_ints; |
|
-- Assign values to an activation function type vector from a string: |
-- Arguments: |
-- str_v : string to be converted |
-- n : number of elements of the vector |
-- Return: assigned activation function type vector |
function assign_ftype(str_v : string; n : integer) return ftype_vector is |
variable i : integer := 0; -- element counter |
variable l : integer := 1; -- element length counter |
variable ret : ftype_vector(n-1 downto 0) := (others => "linear"); -- return value |
begin |
for c in 1 to str_v'length loop -- read every character in str_v |
if str_v(c) = ' ' then -- a space separates a new element |
i := i +1; -- increase element counter to start calculate a new element |
l := 1; -- reset element length counter |
else |
ret(i)(l) := str_v(c); |
l := l +1; -- increase element length counter |
end if; |
end loop; |
assert i = n-1 |
report "Error in assign_ftype: number of elements in string is less than n." |
severity error; |
return ret; |
end assign_ftype; |
|
-- Assign values to an layer type vector from a string: |
-- Arguments: |
-- str_v : string to be converted |
-- n : number of elements of the vector |
-- Return: assigned layer type vector |
function assign_ltype(str_v : string; n : integer) return ltype_vector is |
variable i : integer := 0; -- element counter |
variable l : integer := 1; -- element length counter |
variable ret : ltype_vector(n-1 downto 0) := (others => "SP"); -- return value |
begin |
for c in 1 to str_v'length loop |
if str_v(c) = ' ' then -- a space separates a new element |
i := i +1; -- increase element counter to start calculate a new element |
l := 1; -- reset element length counter |
else |
assert str_v(c) = 'P' or str_v(c) = 'S' |
report "Error in assign_ltype: character " & str_v(c) & " is not 'P' (parallel) or 'S' (serial)." |
severity error; |
ret(i)(l) := str_v(c); |
l := l +1; -- increase element length counter |
end if; |
end loop; |
assert i = n-1 |
report "Error in assign_ltype: number of elements do not coincide with number of introduced elements." |
severity error; |
return ret; |
end assign_ltype; |
|
-- Calculate the total weight and bias memory address length: |
-- Arguments: |
-- NumIn : number of inputs of the network |
-- NumN : number of neurons of each layer |
-- n : number of layers (number of elements of NumN) |
-- Return: total weight and bias memory address length (integer) |
function calculate_addr_l(NumIn : integer; NumN : int_vector; n : integer) return integer is -- matrix + b_sel |
variable addr_l : integer := log2(NumIn)+log2(NumN(0)); -- return value. Initialized with the weight memory length of the first layer |
begin |
-- Calculate the maximum of the weight memory length: |
for i in 1 to n-1 loop |
addr_l := max2( addr_l, log2(NumN(i-1)+log2(NumN(i))) ); |
end loop; |
addr_l := addr_l +1; -- add bias select bit |
return addr_l; |
end calculate_addr_l; |
|
-- Base two logarithm for int_vector: |
-- Arguments: |
-- v : integer vector |
-- n : number of elements of the vector |
-- Return : integer vector of the base two logarithms of each elment of v |
function log2(v : int_vector; n : integer) return int_vector is |
variable ret : int_vector(n-1 downto 0); -- return value |
begin |
-- for each element of v, calculate its base two logarithm: |
for i in 0 to n-1 loop |
ret(i) := log2(v(i)); |
end loop; |
return ret; |
end log2; |
|
-- Assign the weight and bias memory address lenght of each layer: |
-- Arguments: |
-- NumIn : number of inputs of the network |
-- NumN : number of neurons of each layer |
-- n : number of layers (number of elements of NumN and the return integer vector) |
-- Return: weight and bias memory address lenght of each layer (integer vector) |
function assign_addrl(NumIn : integer; NumN : int_vector; n : integer) return int_vector is |
variable ret : int_vector(n-1 downto 0); -- return value |
begin |
ret(0) := log2(NumIn)+log2(NumN(0)); -- Weight memory length of the first layer |
for i in 1 to n-1 loop |
ret(i) := log2(NumN(i-1))+log2(NumN(i)); |
end loop; |
return ret; |
end assign_addrl; |
|
-- Returns the max value of the input integer vector: |
function calculate_max(v : int_vector) return integer is |
variable ac_max : integer := 0; -- return value |
begin |
for i in 0 to v'length-1 loop |
ac_max := max2(ac_max,v(i)); |
end loop; |
return ac_max; |
end calculate_max; |
|
-- Calculate the maximum of the multiplications of two vectors element by element |
-- Arguments: |
-- v1 : input vector 1 |
-- v2 : input vector 2 |
-- Return: maximum of the multiplications of two vectors element by element |
function calculate_max_mul(v1 : int_vector; v2 : int_vector) return integer is |
variable ac_max : integer := 0; |
begin |
assert v1'length = v2'length |
report "Error in calculate_max_mul: vector's length do not coincide." |
severity error; |
for i in 0 to v1'length-1 loop |
ac_max := max2(ac_max,v1(i)*v2(i)); |
end loop; |
return ac_max; |
end calculate_max_mul; |
|
end layers_pkg; |
/artificial_neural_network/trunk/RTL_VHDL_files/mac.vhd
0,0 → 1,85
---------------------------------------------------------------------------------- |
-- Company: CEI |
-- Engineer: David Aledo |
-- |
-- Create Date: |
-- Design Name: Configurable ANN |
-- Module Name: mac - Behavioral |
-- Project Name: |
-- Target Devices: |
-- Tool versions: |
-- Description: Multiplier and accumulator (MAC). |
-- |
-- Dependencies: |
-- |
-- Revision: |
-- Revision 0.01 - File Created |
-- Additional Comments: |
-- |
---------------------------------------------------------------------------------- |
|
library IEEE; |
use IEEE.STD_LOGIC_1164.ALL; |
use ieee.numeric_std.all; |
|
|
entity mac is |
generic |
( |
dirload : boolean := FALSE; -- Direct load. Load accumulator with port C value (TRUE) or A*B + C (FALSE) |
NbOvrf : natural := 3; ---- Extra bits in acc to avoid overflow |
NbitIn : natural := 16; --- Bit width of the input data |
NbitC : natural := 18 ---- Bit width of weight and bias |
); |
port |
( |
CLK : in std_logic; |
RST : in std_logic; |
A : in STD_LOGIC_VECTOR (NbitIn-1 DOWNTO 0); -- Input data |
B : in STD_LOGIC_VECTOR (NbitC-1 DOWNTO 0); -- Weights |
C : in std_logic_vector (NbitC-1 downto 0); -- Bias |
P : out std_logic_vector (NbitIn+NbitC+NbOvrf DOWNTO 0); -- Output data |
CE1 : in std_logic; -- Multiplier eneble |
CE2 : in std_logic; -- Accumulator enable |
LOAD : in std_logic -- Load signal. Resets the accumulator with value determined by dirload parameter |
); |
end mac; |
|
architecture Behavioral of mac is |
|
signal acc : signed (NbitIn+NbitC+NbOvrf DOWNTO 0) := (others => '0'); -- Accumulator register |
signal Mreg : signed (NbitIn+NbitC-1 DOWNTO 0) := (others => '0'); -- Multiplier output register |
|
begin |
|
process (CLK) |
begin |
if CLK'event and CLK = '1' then |
if RST = '1' then |
acc <= (others => '0'); |
Mreg <= (others => '0'); |
else |
if CE1 = '1' then |
Mreg <= signed(A)*signed(B); |
end if; |
if CE2 = '1' then |
if LOAD = '1' then |
if dirload then |
-- Load acc with port C value (bias): |
acc <= resize(signed(C),NbitIn+NbitC+NbOvrf+1); -- Sign extension |
else |
-- Load acc with A*B + C (bias): |
acc <= resize(signed(C),NbitIn+NbitC+NbOvrf+1) + Mreg; |
end if; |
else |
acc <= acc + Mreg; |
end if; |
end if; |
end if; |
end if; |
end process; |
|
P <= std_logic_vector(acc); |
|
end Behavioral; |
|
/artificial_neural_network/trunk/RTL_VHDL_files/af_sigmoid.vhd
0,0 → 1,99
---------------------------------------------------------------------------------- |
-- Company: CEI |
-- Engineer: Enrique Herrero |
-- |
-- Create Date: |
-- Design Name: Configurable ANN |
-- Module Name: af_sigmoid - Behavioral |
-- Project Name: |
-- Target Devices: |
-- Tool versions: |
-- Description: Sigmoid activation function implemented as a Look-Up-Table (LUT). |
-- |
-- Dependencies: |
-- |
-- Revision: |
-- Revision 0.01 - File Created |
-- Revision 1 - David Aledo |
-- Additional Comments: |
-- |
---------------------------------------------------------------------------------- |
library IEEE; |
use IEEE.STD_LOGIC_1164.ALL; |
use ieee.numeric_std.ALL; |
use ieee.math_real.all; |
|
|
entity af_sigmoid is |
generic |
( |
Nbit : natural := 8 |
); |
port |
( |
reset : in std_logic; |
clk : in std_logic; |
run_in : in std_logic; -- Start and input data validation |
inputs : in std_logic_vector(Nbit-1 downto 0); -- Input data |
run_out : out std_logic; -- Output data validation, run_in for the next layer |
outputs : out std_logic_vector(Nbit-1 downto 0) -- Output data |
); |
end af_sigmoid; |
|
|
architecture Behavioral of af_sigmoid is |
|
-- Definition of internal modules, constants, signals, etc... |
|
-- Sigmoid parameters: |
constant f0 : real := 2.0; -- Slope at the origin |
constant fr : real := 2.0; -- fr = fmax - fmin |
|
signal dataIn: integer range (2**Nbit-1) downto 0; -- To convert std_logic_vector input to integer index for the LUT |
type table_t is array(0 to (2**Nbit)-1) of std_logic_vector(Nbit-1 downto 0); -- LUT type |
|
-- Function Sigmoidal: generates the Look-Up-Table for the sigmoid activation function: |
-- margin: maximun value of x. |
function Sigmoidal(margin:real;Nbit:natural) return table_t is |
variable scale,x,y,w,t: real; |
variable u: integer; |
variable fbits: std_logic_vector(Nbit-1 downto 0); |
variable table: table_t; |
begin |
scale := (2.0*margin)/(2.0**Nbit); -- Calculates gap between to points |
x := -margin; |
for idx in -(2**(Nbit-1)) to (2**(Nbit-1))-1 loop |
y := (fr/(1.0+exp(((-4.0*f0)/fr)*x)))-(fr/2.0); |
w := y*(2.0**(Nbit-1)); -- Shifts bits to the left |
t := round(w); |
u := integer(t); |
fbits := std_logic_vector(to_signed(u,Nbit)); |
table(to_integer(to_unsigned(idx+(2**Nbit),Nbit))):= fbits; |
x := x+scale; |
end loop; |
return table; |
end Sigmoidal; |
signal Table: table_t := Sigmoidal(1.0,Nbit); -- Generation of the LUT (at synthesis time) |
|
begin |
|
-- Description of the activation function |
dataIn <= to_integer(signed(inputs)); |
|
Activation: process(clk,reset) |
begin |
if clk'event and clk = '1' then |
if reset = '1' then |
run_out <= '0'; |
outputs <= (others => '0'); |
else |
if run_in = '1' then |
run_out <='1'; |
outputs <=Table(dataIn); -- Assigns output value from the LUT |
else |
run_out <='0'; |
end if; |
end if; |
end if; |
end process; |
end Behavioral; |
/artificial_neural_network/trunk/RTL_VHDL_files/af_template.vhd
0,0 → 1,49
---------------------------------------------------------------------------------- |
-- Company: |
-- Engineer: User |
-- |
-- Create Date: |
-- Design Name: Configurable ANN |
-- Module Name: af_template - Behavioral |
-- Project Name: |
-- Target Devices: |
-- Tool versions: |
-- Description: User activation function template. |
-- |
-- Dependencies: |
-- |
-- Revision: |
-- Revision 0.01 - File Created |
-- Additional Comments: |
-- |
---------------------------------------------------------------------------------- |
library IEEE; |
use IEEE.STD_LOGIC_1164.ALL; |
use ieee.numeric_std.ALL; |
use ieee.math_real.all; |
|
-- Only entity name must be changed, please do not modify the template entity: |
entity af_template is |
generic |
( |
Nbit : natural := 8 |
); |
port |
( |
reset : in std_logic; |
clk : in std_logic; |
run_in : in std_logic; -- Start and input data validation |
inputs : in std_logic_vector(Nbit-1 downto 0); -- Input data |
run_out : out std_logic; -- Output data validation, run_in for the next layer |
outputs : out std_logic_vector(Nbit-1 downto 0) -- Output data |
); |
end af_template; |
|
|
architecture Behavioral of af_template is |
-- Add here user constants, internal signals, and other user definitions: |
|
begin |
-- Add here user logic to describe the user activation function: |
|
end Behavioral; |
/artificial_neural_network/trunk/RTL_VHDL_files/layerPS_top.vhd
0,0 → 1,259
---------------------------------------------------------------------------------- |
-- Company: CEI |
-- Engineer: David Aledo |
-- |
-- Create Date: 12:41:19 06/10/2013 |
-- Design Name: Configurable ANN |
-- Module Name: layerSP_top - Behavioral |
-- Project Name: |
-- Target Devices: |
-- Tool versions: |
-- Description: neuron layer top for artificial neural networks. Parallel input and |
-- serial output. |
-- |
-- Dependencies: |
-- |
-- Revision: |
-- Revision 0.01 - File Created |
-- Additional Comments: |
-- |
---------------------------------------------------------------------------------- |
library IEEE; |
use IEEE.STD_LOGIC_1164.ALL; |
use ieee.numeric_std.all; |
|
-- Deprecated XPS library: |
--library proc_common_v3_00_a; |
--use proc_common_v3_00_a.proc_common_pkg.all; -- Only for simulation ( pad_power2() ) |
|
entity layerPS_top is |
|
generic |
( |
NumN : natural := 64; ------- Number of neurons of the layer |
NumIn : natural := 8; ------- Number of inputs of each neuron |
NbitIn : natural := 12; ------- Bit width of the input data |
NbitW : natural := 8; ------- Bit width of weights and biases |
NbitOut : natural := 8; ------- Bit width of the output data |
lra_l : natural := 10; ------- Layer RAM address length. It should value log2(NumN)+log2(NumIn) |
wra_l : natural := 3; ------- Weight RAM address length. It should value log2(NumIn) |
bra_l : natural := 6; ------- Bias RAM address length. It should value log2(NumN) |
LSbit : natural := 4 ------- Less significant bit of the outputs |
); |
|
port |
( |
-- Input ports |
reset : in std_logic; |
clk : in std_logic; |
run_in : in std_logic; -- Start and input data validation |
m_en : in std_logic; -- Memory enable (external interface) |
b_sel : in std_logic; -- Bias memory select |
m_we : in std_logic_vector(((NbitW+7)/8)-1 downto 0); -- Memory write enable (external interface) |
inputs : in std_logic_vector((NbitIn*NumIn)-1 downto 0); -- Input data (parallel) |
wdata : in std_logic_vector(NbitW-1 downto 0); -- Write data of weight and bias memories |
addr : in std_logic_vector(lra_l-1 downto 0); -- Address of weight and bias memories |
|
-- Output ports |
run_out : out std_logic; -- Output data validation, run_in for the next layer |
rdata : out std_logic_vector(NbitW-1 downto 0); -- Read data of weight and bias memories |
outputs : out std_logic_vector(NbitOut-1 downto 0) -- Output data (serial) |
); |
|
end layerPS_top; |
|
architecture Behavioral of layerPS_top is |
|
--type ramd_type is array (pad_power2(NumN)-1 downto 0) of std_logic_vector(NbitW-1 downto 0); -- Optimal: 32 or 64 spaces -- pad_power2() only for simulation |
--type layer_ram is array (pad_power2(NumIn)-1 downto 0) of ramd_type; |
type ramd_type is array (NumN-1 downto 0) of std_logic_vector(NbitW-1 downto 0); -- Optimal: 32 or 64 spaces |
type layer_ram is array (NumIn-1 downto 0) of ramd_type; |
type outm_type is array (NumIn-1 downto 0) of std_logic_vector(NbitW-1 downto 0); |
|
signal lram : layer_ram; -- Layer RAM. One RAM per input. It stores the weights |
signal breg : ramd_type; -- Bias RAM. They can be RAM because they are not accessed simultaneously |
signal outm : outm_type; -- RAM outputs to be multiplexed into rdata |
signal m_sel : std_logic_vector(NumIn-1 downto 0); --------- RAM select |
signal Wyb : std_logic_vector((NbitW*NumIn)-1 downto 0); -- Weight vectors |
signal bias : std_logic_vector(NbitW-1 downto 0); -------- Bias |
signal Nouts : std_logic_vector(NbitOut-1 downto 0); ------ Outputs from neurons |
signal uaddr : unsigned(lra_l-1 downto 0); -- Unsigned address of weight and bias memories |
|
|
signal cont : integer range 0 to NumN-1; -- Neuron counter |
signal cntb : integer range 0 to NumN-1; -- Delayed counter for biases |
signal st : bit; ------- State |
signal en1 : std_logic; -- First step enable |
signal en2 : std_logic; -- Second stage enable |
signal en3 : std_logic; -- Shift register enable |
signal en_out : std_logic; |
|
begin |
|
layerPS_inst: entity work.layerPS |
generic map |
( |
NumN => NumN, |
NumIn => NumIn, |
NbitIn => NbitIn, |
NbitW => NbitW, |
NbitOut => NbitOut, |
LSbit => LSbit |
) |
port map |
( |
-- Input ports |
reset => reset, |
clk => clk, |
en => en1, |
en2 => en2, |
en_r => en3, |
inputs => inputs, |
Wyb => Wyb, |
bias => bias, |
|
-- Output ports |
en_out => en_out, |
outputs => Nouts |
); |
|
uaddr <= unsigned(addr(lra_l-1 downto 0)); |
|
ram_selector: |
process (uaddr(wra_l-1 downto 0),b_sel) -- Bottom part of memory address and b_sel |
begin |
m_sel <= (others => '0'); -- Default |
for i in (NumIn-1) downto 0 loop |
-- The bottom part of memory address selects which RAM |
if ( (to_integer(uaddr(wra_l-1 downto 0)) = i) and (b_sel = '0')) then |
m_sel(i) <= '1'; -- Enables the selected RAM |
end if; |
end loop; |
end process; |
|
rams: -- Instence as weight and bias memories as inputs there are in the layer |
for i in (NumIn-1) downto 0 generate |
process (clk) |
variable d : std_logic_vector(NbitW-1 downto 0); -- Beware of elements whose length is not a multiple of 8 |
begin |
if (clk'event and clk = '1') then |
if (m_en = '1' and m_sel(i) = '1') then |
for j in ((NbitW+7)/8)-1 downto 0 loop -- we byte to byte |
if (m_we(j) = '1') then |
d((8*(j+1))-1 downto 8*j) := wdata((8*(j+1))-1 downto 8*j); |
else |
d((8*(j+1))-1 downto 8*j) := lram(i)(to_integer(uaddr(lra_l-1 downto wra_l)))((8*(j+1))-1 downto 8*j); |
end if; |
end loop; |
-- Top part of weight and bias memory selects weights inside the selected RAM |
lram(i)(to_integer(uaddr(lra_l-1 downto wra_l))) <= d; -- Write |
-- |
end if; |
end if; |
end process; |
-- Outpus are read in parallel, resulting in a bus of weights: |
--Wyb((NbitW*(i+1))-1 downto NbitW*i) <= lram(i)(cont); -- Asynchronous read (forces distributed RAM) |
process (clk) -- Synchronous read |
begin |
if clk'event and clk = '1' then |
if reset = '1' then |
--Wyb((NbitW*(i+1))-1 downto NbitW*i) <= (others => '0'); |
else |
Wyb((NbitW*(i+1))-1 downto NbitW*i) <= lram(i)(cont); |
end if; |
end if; |
end process; |
outm(i) <= lram(i)(to_integer(uaddr(lra_l-1 downto wra_l))); -- Read all RAM |
end generate; |
|
-- Synchronous read including breg: |
process (clk) |
begin |
if (clk'event and clk = '1') then |
if (m_en = '1') then |
if (b_sel = '1') then |
rdata <= breg(to_integer(uaddr(bra_l-1 downto 0))); -- Bias RAM selected |
else -- Other RAM selected: |
rdata <= outm(to_integer(uaddr(wra_l-1 downto 0))); -- Multiplexes RAM outputs |
-- May be safer if accesses to bottom address grater than NumIn are avoided |
end if; |
end if; |
end if; |
end process; |
|
bias_ram: |
process (clk) |
variable d : std_logic_vector(NbitW-1 downto 0); -- Beware of elements whose length is not a multiple of 8 |
begin |
if (clk'event and clk = '1') then |
if ( (m_en = '1') and (b_sel = '1') ) then |
for i in ((NbitW+7)/8)-1 downto 0 loop -- we byte to byte |
if (m_we(i) = '1') then |
d((8*(i+1))-1 downto 8*i) := wdata((8*(i+1))-1 downto 8*i); |
else |
d((8*(i+1))-1 downto 8*i) := breg(to_integer(uaddr(bra_l-1 downto 0)))((8*(i+1))-1 downto 8*i); |
end if; |
end loop; |
-- The bottom part (extended) of memories address selects the bias |
breg(to_integer(uaddr(bra_l-1 downto 0))) <= d; |
end if; |
end if; |
end process; |
|
-- Bias read: -- Here, parallel read of bias is not necessary, so it can be RAM |
--bias <= breg(cont); -- Asynchronous read |
process (clk) -- Synchronous read |
begin |
if clk'event and clk = '1' then |
if reset = '1' then |
--bias <= (others => '0'); |
else |
bias <= breg(cntb); |
end if; |
end if; |
end process; |
|
outputs <= Nouts; |
|
control: -- With counter and control signal shifts |
process (clk) |
begin |
if (clk'event and clk = '1') then |
if (reset = '1') then |
cont <= 0; |
cntb <= 0; |
st <= '0'; |
en1 <= '0'; |
en2 <= '0'; |
run_out <= '0'; |
else |
cntb <= cont; -- Bias counter is delayed to assure correctness of pipeline data |
case st is |
when '0' => |
en1 <= '0'; -- en1 is delayed 1 cycle in order to insert a register for Wyb |
case run_in is |
when '1' => st <= '1'; |
when '0' => st <= '0'; |
when others => st <= '0'; |
end case; |
when '1' => |
en1 <= '1'; -- en1 is delayed 1 cycle in order to insert a register for Wyb |
case cont is |
when (NumN-1) => |
cont <= 0; |
st <= '0'; |
when others => |
cont <= cont +1; |
end case; |
end case; |
|
en2 <= en1; |
|
run_out <= en3; -- It lasts for 1 cycle, just after the output enable of the layer (when all outputs have just updated) |
end if; |
end if; |
end process; |
|
en3 <= en_out; |
|
end Behavioral; |
/artificial_neural_network/trunk/RTL_VHDL_files/layerSP_top.vhd
0,0 → 1,264
---------------------------------------------------------------------------------- |
-- Company: CEI |
-- Engineer: David Aledo |
-- |
-- Create Date: 12:41:19 06/10/2013 |
-- Design Name: Configurable ANN |
-- Module Name: layerSP_top - Behavioral |
-- Project Name: |
-- Target Devices: |
-- Tool versions: |
-- Description: neuron layer top for artificial neural networks. Serial input and |
-- parallel output. |
-- |
-- Dependencies: |
-- |
-- Revision: |
-- Revision 0.01 - File Created |
-- Additional Comments: |
-- |
---------------------------------------------------------------------------------- |
library IEEE; |
use IEEE.STD_LOGIC_1164.ALL; |
use ieee.numeric_std.all; |
|
-- Deprecated XPS library: |
--library proc_common_v3_00_a; |
--use proc_common_v3_00_a.proc_common_pkg.all; -- Only for simulation ( pad_power2() ) |
|
entity layerSP_top is |
|
generic |
( |
NumN : natural := 8; ------- Number of neurons of the layer |
NumIn : natural := 64; ------- Number of inputs of each neuron |
NbitIn : natural := 8; ------- Bit width of the input data |
NbitW : natural := 8; ------- Bit width of weights and biases |
NbitOut : natural := 12; ------- Bit width of the output data |
lra_l : natural := 10; ------- Layer RAM address length. It should value log2(NumN)+log2(NumIn) |
wra_l : natural := 6; ------- Weight RAM address length. It should value log2(NumIn) |
bra_l : natural := 3; ------- Bias RAM address length. It should value log2(NumN) |
LSbit : natural := 4 ------- Less significant bit of the outputs |
); |
|
port |
( |
-- Input ports |
reset : in std_logic; |
clk : in std_logic; |
run_in : in std_logic; -- Start and input data validation |
m_en : in std_logic; -- Memory enable (external interface) |
b_sel : in std_logic; -- Bias memory select |
m_we : in std_logic_vector(((NbitW+7)/8)-1 downto 0); -- Memory write enable (external interface) |
inputs : in std_logic_vector(NbitIn-1 downto 0); -- Input data (serial) |
wdata : in std_logic_vector(NbitW-1 downto 0); -- Write data of weight and bias memories |
addr : in std_logic_vector(lra_l-1 downto 0); -- Address of weight and bias memories |
|
-- Output ports |
run_out : out std_logic; -- Output data validation, run_in for the next layer |
rdata : out std_logic_vector(NbitW-1 downto 0); -- Read data of weight and bias memories |
outputs : out std_logic_vector((NbitOut*NumN)-1 downto 0) -- Output data (parallel) |
); |
|
end layerSP_top; |
|
architecture Behavioral of layerSP_top is |
|
--type ramd_type is array (pad_power2(NumIn)-1 downto 0) of std_logic_vector(NbitW-1 downto 0); -- Optimal: 32 or 64 spaces |
--type layer_ram is array (pad_power2(NumN)-1 downto 0) of ramd_type; |
type ramd_type is array (NumIn-1 downto 0) of std_logic_vector(NbitW-1 downto 0); -- Optimal: 32 or 64 spaces |
type layer_ram is array (NumN-1 downto 0) of ramd_type; |
type outm_type is array (NumN-1 downto 0) of std_logic_vector(NbitW-1 downto 0); |
|
signal lram : layer_ram; -- Layer RAM. One RAM per neuron. It stores the weights |
signal breg : outm_type; -- Bias registers. They can not be RAM because they are accessed simultaneously |
signal outm : outm_type; -- RAM outputs to be multiplexed into rdata |
signal m_sel : std_logic_vector(NumN-1 downto 0); -------- RAM select |
signal Wyb : std_logic_vector((NbitW*NumN)-1 downto 0); --- Weight vectors |
signal bias : std_logic_vector((NbitW*NumN)-1 downto 0); --- Bias vector |
signal Nouts : std_logic_vector((NbitOut*NumN)-1 downto 0); -- Outputs from neurons |
signal uaddr : unsigned(lra_l-1 downto 0); -- Unsigned address of weight and bias memories |
|
signal inreg : std_logic_vector(NbitIn-1 downto 0); -- Input data register -- en1 is delayed 1 cycle in order to insert a register for Wyb |
|
-- Control signals |
signal cont : integer range 0 to NumIn-1; -- Input counter |
signal en1 : std_logic; -- First step enable (multiplication of MAC) |
signal en2 : std_logic; -- Second stage enable (accumulation of MAC) |
signal en3 : std_logic; -- Shift register enable |
signal a0 : std_logic; -- Signal to load accumulators with the multiplication result |
signal aux_en3 : std_logic; -- Auxiliary signal to delay en3 two cycles |
signal aux_a0 : std_logic; |
signal aux2_en3 : std_logic; |
|
begin |
|
layerSP_inst: entity work.layerSP |
generic map |
( |
NumN => NumN, |
NumIn => NumIn, |
NbitIn => NbitIn, |
NbitW => NbitW, |
NbitOut => NbitOut, |
LSbit => LSbit |
) |
port map |
( |
-- Input ports |
reset => reset, |
clk => clk, |
en => en1, |
en2 => en2, |
en_r => en3, |
a0 => a0, |
inputs => inreg, |
Wyb => Wyb, |
bias => bias, |
|
-- Output ports |
outputs => Nouts |
); |
|
uaddr <= unsigned(addr); |
|
ram_selector: |
process (uaddr(lra_l-1 downto wra_l),b_sel) -- Top part of memory address and b_sel |
begin |
m_sel <= (others => '0'); -- Default |
for i in (NumN-1) downto 0 loop |
-- The top part of memory address selects which RAM |
if ( (to_integer(uaddr(lra_l-1 downto wra_l)) = i) and (b_sel = '0')) then |
m_sel(i) <= '1'; -- Enables the selected RAM |
end if; |
end loop; |
end process; |
|
rams: -- Instance as weight and bias memories as neurons there are in the layer |
for i in (NumN-1) downto 0 generate |
process (clk) |
variable d : std_logic_vector(NbitW-1 downto 0); -- Beware of elements whose length is not a multiple of 8 |
begin |
if (clk'event and clk = '1') then |
if (m_en = '1' and m_sel(i) = '1') then |
for j in ((NbitW+7)/8)-1 downto 0 loop -- we byte to byte |
if (m_we(j) = '1') then |
d((8*(j+1))-1 downto 8*j) := wdata((8*(j+1))-1 downto 8*j); |
else |
d((8*(j+1))-1 downto 8*j) := lram(i)(to_integer(uaddr(wra_l-1 downto 0)))((8*(j+1))-1 downto 8*j); |
end if; |
end loop; |
-- Bottom part of layer memory selects weights inside the selected RAM |
lram(i)(to_integer(uaddr(wra_l-1 downto 0))) <= d; |
-- |
end if; |
end if; |
end process; |
-- Outputs are read in parallel, resulting in a bus of weights: |
--Wyb((NbitW*(i+1))-1 downto NbitW*i) <= lram(i)(cont); -- Asynchronous read (forces distributed RAM) |
process (clk) -- Synchronous read |
begin |
if clk'event and clk = '1' then |
if reset = '1' then |
--Wyb((NbitW*(i+1))-1 downto NbitW*i) <= (others => '0'); |
else |
Wyb((NbitW*(i+1))-1 downto NbitW*i) <= lram(i)(cont); |
end if; |
end if; |
end process; |
outm(i) <= lram(i)(to_integer(uaddr(wra_l-1 downto 0))); -- Read all RAM |
end generate; |
|
-- Synchronous read including breg: |
process (clk) |
begin |
if (clk'event and clk = '1') then |
if (m_en = '1') then |
if (b_sel = '1') then |
rdata <= breg(to_integer(uaddr(bra_l-1 downto 0))); -- Bias registers selected |
else -- Other RAM selected: |
rdata <= outm(to_integer(uaddr(lra_l-1 downto wra_l))); -- Multiplexes RAM outputs |
-- May be safer if accesses to top address grater than NumN are avoided |
end if; |
end if; |
end if; |
end process; |
|
bias_reg: |
process (clk) |
variable d : std_logic_vector(NbitW-1 downto 0); -- Beware of elements whose length is not a multiple of 8 |
begin |
if (clk'event and clk = '1') then |
if ( (m_en = '1') and (b_sel = '1') ) then |
for i in ((NbitW+7)/8)-1 downto 0 loop -- we byte to byte |
if (m_we(i) = '1') then |
d((8*(i+1))-1 downto 8*i) := wdata((8*(i+1))-1 downto 8*i); |
else |
d((8*(i+1))-1 downto 8*i) := breg(to_integer(uaddr(bra_l-1 downto 0)))((8*(i+1))-1 downto 8*i); |
end if; |
end loop; |
-- The bottom part (reduced) of layer RAM address selects the bias |
breg(to_integer(uaddr(bra_l-1 downto 0))) <= d; |
end if; |
end if; |
end process; |
bias_read: |
for i in (NumN-1) downto 0 generate |
--bias((NbitW*(i+1))-1 downto NbitW*i) <= breg(i); -- Asynchronous read of all biases in parallel |
process (clk) |
begin |
if clk'event and clk = '1' then |
if reset = '1' then |
--bias((NbitW*(i+1))-1 downto NbitW*i) <= (others => '0'); |
else |
bias((NbitW*(i+1))-1 downto NbitW*i) <= breg(i); -- Synchronous read of all biases in parallel |
end if; |
end if; |
end process; |
end generate; |
|
outputs <= Nouts; |
|
control: |
process (clk) |
begin |
if (clk'event and clk = '1') then |
if (reset = '1') then |
cont <= 0; |
en1 <= '0'; |
en2 <= '0'; |
en3 <= '0'; |
a0 <= '0'; |
run_out <= '0'; |
aux_en3 <= '0'; |
aux2_en3 <= '0'; |
aux_a0 <= '0'; |
inreg <= (others => '0'); |
else |
en1 <= run_in; -- en1 is delayed 1 cycle in order to insert a register for Wyb |
inreg <= inputs; |
-- Default: |
aux2_en3 <= '0'; |
if (run_in = '1') then |
if (cont = NumIn-1) then |
cont <= 0; -- Restarts input counter |
aux2_en3 <= '1'; |
else |
cont <= cont +1; |
end if; |
end if; |
en2 <= en1; |
if (cont = 0 and run_in = '1') then |
aux_a0 <= '1'; -- At the count beginning |
else |
aux_a0 <= '0'; |
end if; |
a0 <= aux_a0; |
aux_en3 <= aux2_en3; |
en3 <= aux_en3; |
run_out <= en3; -- It lasts for 1 cycle, just after the output enable of the layer (when all outputs have just updated) |
end if; |
end if; |
end process; |
|
end Behavioral; |
/artificial_neural_network/trunk/RTL_VHDL_files/af_sigmoid2.vhd
0,0 → 1,100
---------------------------------------------------------------------------------- |
-- Company: CEI |
-- Engineer: Enrique Herrero |
-- |
-- Create Date: |
-- Design Name: Configurable ANN |
-- Module Name: af_sigmoid2 - Behavioral |
-- Project Name: |
-- Target Devices: |
-- Tool versions: |
-- Description: Sigmoid activation function implemented as a Look-Up-Table (LUT). |
-- Alternative set of parameters. |
-- |
-- Dependencies: |
-- |
-- Revision: |
-- Revision 0.01 - File Created |
-- Revision 1 - David Aledo |
-- Additional Comments: |
-- |
---------------------------------------------------------------------------------- |
library IEEE; |
use IEEE.STD_LOGIC_1164.ALL; |
use ieee.numeric_std.ALL; |
use ieee.math_real.all; |
|
|
entity af_sigmoid2 is |
generic |
( |
Nbit : natural := 8 |
); |
port |
( |
reset : in std_logic; |
clk : in std_logic; |
run_in : in std_logic; -- Start and input data validation |
inputs : in std_logic_vector(Nbit-1 downto 0); -- Input data |
run_out : out std_logic; -- Output data validation, run_in for the next layer |
outputs : out std_logic_vector(Nbit-1 downto 0) -- Output data |
); |
end af_sigmoid2; |
|
|
architecture Behavioral of af_sigmoid2 is |
|
-- Definition of internal modules, constants, signals, etc... |
|
-- Sigmoid parameters: |
constant f0 : real := 0.5; -- Slope at the origin |
constant fr : real := 2.0; -- fr = fmax - fmin |
|
signal dataIn: integer range (2**Nbit-1) downto 0; -- To convert std_logic_vector input to integer index for the LUT |
type table_t is array(0 to (2**Nbit)-1) of std_logic_vector(Nbit-1 downto 0); -- LUT type |
|
-- Function Sigmoidal: generates the Look-Up-Table for the sigmoid activation function: |
-- margin: maximun value of x. |
function Sigmoidal(margin:real;Nbit:natural) return table_t is |
variable scale,x,y,w,t: real; |
variable u: integer; |
variable fbits: std_logic_vector(Nbit-1 downto 0); |
variable table: table_t; |
begin |
scale := (2.0*margin)/(2.0**Nbit); -- Calculates gap between to points |
x := -margin; |
for idx in -(2**(Nbit-1)) to (2**(Nbit-1))-1 loop |
y := ( fr / (1.0+exp(((-4.0*f0)/fr)*x)) ) - (fr/2.0); |
w := y*(2.0**(Nbit-1)); -- Shifts bits to the left |
t := round(w); |
u := integer(t); |
fbits := std_logic_vector(to_signed(u,Nbit)); |
table(to_integer(to_unsigned(idx+(2**Nbit),Nbit))):= fbits; |
x := x+scale; |
end loop; |
return table; |
end Sigmoidal; |
signal Table: table_t := Sigmoidal(1.0,Nbit); -- Generation of the LUT (at synthesis time) |
|
begin |
|
-- Description of the activation function |
dataIn <= to_integer(signed(inputs)); |
|
Activacion: process(clk,reset) |
begin |
if clk'event and clk = '1' then |
if reset = '1' then |
run_out <= '0'; |
outputs <= (others => '0'); |
else |
if run_in = '1' then |
run_out<='1'; |
outputs<=Table(dataIn); -- Assigns output value from the LUT |
else |
run_out<='0'; |
end if; |
end if; |
end if; |
end process; |
end Behavioral; |
/artificial_neural_network/trunk/RTL_VHDL_files/activation_function.vhd
0,0 → 1,112
---------------------------------------------------------------------------------- |
-- Company: |
-- Engineer: |
-- |
-- Create Date: 16:16:02 05/14/2014 |
-- Design Name: Configurable ANN |
-- Module Name: activation_function - Structural |
-- Project Name: |
-- Target Devices: |
-- Tool versions: |
-- Description: Activation function selector. It instantiates the activation |
-- funtion type selected with f_type parameter. |
-- |
-- Dependencies: |
-- |
-- Revision: |
-- Revision 0.01 - File Created |
-- Additional Comments: |
-- |
---------------------------------------------------------------------------------- |
library IEEE; |
use IEEE.STD_LOGIC_1164.ALL; |
|
|
entity activation_function is |
generic |
( |
f_type : string := "linear"; -- Activation function type |
Nbit : natural := 8 -- Bit width |
); |
port |
( |
reset : in std_logic; |
clk : in std_logic; |
run_in : in std_logic; -- Start and input data validation |
inputs : in std_logic_vector(Nbit-1 downto 0); -- Input data |
run_out : out std_logic; -- Output data validation, run_in for the next layer |
outputs : out std_logic_vector(Nbit-1 downto 0) -- Output data |
); |
end activation_function; |
|
architecture Structural of activation_function is |
|
begin |
|
-- Linear activation function. It is a direct assignment: |
linear_f: |
if (f_type = "linear") generate |
outputs <= inputs; |
run_out <= run_in; |
end generate; |
|
-- Example 1: sigmoid activation function implemented as a Look-Up-Table (LUT): |
Sigmoid_f: |
if (f_type = "siglut") generate |
siglut_inst: entity work.af_sigmoid |
generic map |
( |
Nbit => Nbit |
) |
port map |
( |
reset => reset, |
clk => clk, |
run_in => run_in, |
inputs => inputs, |
run_out => run_out, |
outputs => outputs |
); |
end generate; |
|
-- Example 2: sigmoid activation function implemented as a LUT, with a second different set of parameters: |
Sigmoid2_f: |
if (f_type = "siglu2") generate |
siglut_inst: entity work.af_sigmoid2 |
generic map |
( |
Nbit => Nbit |
) |
port map |
( |
reset => reset, |
clk => clk, |
run_in => run_in, |
inputs => inputs, |
run_out => run_out, |
outputs => outputs |
); |
end generate; |
|
-- Template to instance user activation function type ("userAF"): |
--userAF_f: |
--if (f_type = "userAF") generate |
--yourAF_inst: entity work.--palace here user module name-- |
--generic map |
--( |
-- Nbits => Nbits |
--) |
--port map |
--( |
-- reset => reset, |
-- clk => clk, |
-- run_in => run_in, |
-- inputs => inputs, |
-- run_out => run_out, |
-- outputs => outputs |
--); |
--end generate; |
-- User can instantiate as many types of activation function as needed, each one of them must be tagged as a 6 character string |
|
end Structural; |
|
/artificial_neural_network/trunk/RTL_VHDL_files/shiftreg_pl.vhd
0,0 → 1,119
---------------------------------------------------------------------------------- |
-- Company: CEI - UPM |
-- Engineer: David Aledo |
-- |
-- Create Date: 11:31:38 05/14/2014 |
-- Design Name: Configurable ANN |
-- Module Name: shiftreg_pl - Behavioral |
-- Project Name: |
-- Target Devices: |
-- Tool versions: |
-- Description: Shift register with parallel load. |
-- |
-- Dependencies: |
-- |
-- Revision: |
-- Revision 0.01 - File Created |
-- Additional Comments: |
-- |
---------------------------------------------------------------------------------- |
library IEEE; |
use IEEE.STD_LOGIC_1164.ALL; |
|
|
entity shiftreg_pl is |
generic |
( |
Nreg : natural := 64; ---- Number of elements |
Nbit : natural := 8 ---- Bit width |
); |
|
port |
( |
-- Input ports |
reset : in std_logic; |
clk : in std_logic; |
run_in : in std_logic; -- Start and input data validation |
inputs : in std_logic_vector((Nbit*Nreg)-1 downto 0); -- Input data (parallel) |
-- Output ports |
run_out : out std_logic; -- Output data validation, run_in for the next layer |
outputs : out std_logic_vector(Nbit-1 downto 0) -- Output data (serial) |
); |
end shiftreg_pl; |
|
architecture Behavioral of shiftreg_pl is |
|
signal count : integer range 0 to Nreg-1; |
signal en_r : std_logic; --- Shift register enable |
type dreg_type is array (Nreg-1 downto 0) of std_logic_vector(Nbit-1 downto 0); -- Shift register type |
signal dreg : dreg_type; ---- Shift register |
type reg_st_type is (idle, counting); -- Register state type |
signal reg_st : reg_st_type; -- Register state |
|
begin |
|
-- Shift register with parallel load: |
process (clk) |
begin |
if clk'event and clk = '1' then |
if reset = '1' then |
dreg <= (others=> (others => '0')); |
else |
if en_r = '1' then -- Shift register enable |
if count = 0 then -- Parallel load |
for i in 0 to Nreg-1 loop |
dreg(i) <= inputs((Nbit*(i+1))-1 downto Nbit*i); |
end loop; |
else -- Other cycles, register is shifted |
dreg(Nreg-1) <= (others => '-'); |
shift: |
for i in 1 to Nreg-1 loop |
dreg(i-1) <= dreg(i); |
end loop; |
end if; |
end if; |
end if; |
end if; |
end process; |
outputs <= dreg(0); |
|
-- Shift register control |
process (clk) |
begin |
if clk'event and clk = '1' then |
if reset = '1' then |
count <= 0; |
reg_st <= idle; |
run_out <= '0'; |
else |
run_out <= en_r; |
case reg_st is |
when idle => |
if run_in = '1' then |
reg_st <= counting; |
else |
reg_st <= idle; |
end if; |
when counting => |
if count = (Nreg-1) then |
reg_st <= idle; |
count <= 0; |
else |
reg_st <= counting; |
count <= count +1; |
end if; |
end case; |
end if; |
end if; |
end process; |
process (reg_st) |
begin |
if reg_st = counting then |
en_r <= '1'; |
else |
en_r <= '0'; |
end if; |
end process; |
|
end Behavioral; |
|
/artificial_neural_network/trunk/RTL_VHDL_files/ann.vhd
0,0 → 1,451
---------------------------------------------------------------------------------- |
-- Company: CEI - UPM |
-- Engineer: David Aledo |
-- |
-- Create Date: 01.10.2015 15:15:28 |
-- Design Name: Configurable ANN |
-- Module Name: ann - config_structural |
-- Project Name: |
-- Target Devices: |
-- Tool Versions: |
-- Description: generates the structure of an ANN with the given parameters. |
-- |
-- Dependencies: |
-- |
-- Revision: |
-- Revision 0.01 - File Created |
-- Additional Comments: |
-- |
---------------------------------------------------------------------------------- |
|
|
library IEEE; |
use IEEE.STD_LOGIC_1164.ALL; |
use IEEE.NUMERIC_STD.ALL; |
|
use work.layers_pkg.all; |
|
entity ann is |
generic |
( |
Nlayer : integer := 2; ---- Number of layers |
NbitW : natural := 16; ---- Bit width of weights and biases |
NumIn : natural := 64; ---- Number of inputs to the network |
NbitIn : natural := 8; ---- Bit width of the inputs |
NumN : int_vector; ------ Number of neurons in each layer |
l_type : string; ---------- Layer type of each layer |
f_type : string; ---------- Activation function type of each layer |
LSbit : int_vector; ------ LSB of the output of each layer |
NbitO : int_vector; ------ Bit width of the outputs of each layer |
NbitOut : natural := 8 ----- Bit width of the network output |
); |
|
port |
( |
-- Input ports |
reset : in std_logic; |
clk : in std_logic; |
run_in : in std_logic; -- Start and input data validation |
m_en : in std_logic; -- Weight and bias memory enable (external interface) |
m_we : in std_logic_vector(((NbitW+7)/8)-1 downto 0); -- Weight and bias memory write enable (external interface) |
inputs : in std_logic_vector(NbitIn-1 downto 0); -- Input data |
wdata : in std_logic_vector(NbitW-1 downto 0); -- Weight and bias memory write data |
addr : in std_logic_vector((calculate_lra_l(NumIn, NumN, Nlayer)+log2(Nlayer))-1 downto 0); -- Weight and bias memory address |
|
-- Output ports |
run_out : out std_logic; -- Output data validation |
rdata : out std_logic_vector(NbitW-1 downto 0); -- Weight and bias memory read data |
outputs : out std_logic_vector(NbitOut-1 downto 0) -- Output data |
); |
end ann; |
|
architecture config_structural of ann is |
|
-- Arrays of configuration constants, generated from string generics: |
constant ltype_v : ltype_vector(Nlayer-1 downto 0) := assign_ltype(l_type,Nlayer); |
constant ftype_v : ftype_vector(Nlayer-1 downto 0) := assign_ftype(f_type,Nlayer); |
constant lra_l : int_vector(Nlayer-1 downto 0) := assign_addrl(NumIn,NumN,Nlayer); -- Layer RAM address length of each layer |
constant NumIn_v : int_vector(Nlayer-1 downto 0) := NumN(Nlayer-2 downto 0) & NumIn; |
constant wra_l : int_vector(Nlayer-1 downto 0) := log2(NumIn_v, Nlayer); -- Weight RAM address length of each layer |
constant bra_l : int_vector(Nlayer-1 downto 0) := log2(NumN, Nlayer); -- Bias ram address length of each layer |
|
-- Internal signals: |
signal lm_en : std_logic_vector(Nlayer-1 downto 0); -- Weight and bias memory enable of each layer |
type lrd_type is array (Nlayer-1 downto 0) of std_logic_vector(NbitW-1 downto 0); |
signal lrdata : lrd_type; -- Weight and bias memory read data of each layer |
|
type lodata_t is array (Nlayer-1 downto 0) of std_logic_vector(calculate_max_mul(NbitO,NumN)-1 downto 0); -- Parallel or serial data |
type ladata_t is array (Nlayer-1 downto 0) of std_logic_vector(calculate_max(NbitO)-1 downto 0); -- Always serial data |
signal runO : std_logic_vector(Nlayer-1 downto 0); -- Output data validation of each layer (before activation function) |
signal runI : std_logic_vector(Nlayer-1 downto 0); -- Input data validation of each layer |
signal runA : std_logic_vector(Nlayer-1 downto 0); -- Auxiliar serial data validation of each layer |
signal lodata : lodata_t; -- Output data of each layer (before activation function) |
signal lidata : lodata_t; -- Input data of each layer |
signal ladata : ladata_t; -- Auxiliar serial data of each layer |
|
begin |
|
-- Weight and bias memory layer selection (combinational mux): |
process (addr(addr'length-1 downto addr'length-log2(Nlayer)), m_en, lrdata) |
begin |
for i in 0 to Nlayer-1 loop |
if to_integer(unsigned(addr(addr'length-1 downto addr'length-log2(Nlayer)))) = i then |
lm_en(i) <= m_en; |
rdata <= lrdata(i); |
else |
lm_en(i) <= '0'; |
end if; |
end loop; |
-- Note: Attention with addresses greater than Nlayer when it is not a power of two |
end process; |
|
-- ATTENTION: without the following if generate, the first layer must have serial input ('S') |
parallelize_inputs: |
if ltype_v(0)(1) = 'P' generate |
-- TODO: instantiate shift register with parallel output. |
-- synthesis translate_off |
assert ltype_v(0)(1) /= 'P' |
report "Current version does not accept parallel inputs." |
severity failure; |
-- synthesis translate_on |
-- TODO: delete above lines when instantiate shift register with parallel output. |
end generate; |
|
first_layer_SP: |
if ltype_v(0) = "SP" generate |
|
first_layerSP_top_inst: entity work.layerSP_top |
generic map |
( |
NumN => NumN(0), -- Number of neurons in the first layer |
NumIn => NumIn, ---- Number of inputs of the first layer |
NbitIn => NbitIn, --- Bit width of the input data |
NbitW => NbitW, ---- Bit width of weights and biases |
NbitOut => NbitO(0), -- Bit width of the first layer output |
lra_l => lra_l(0), -- Layer RAM address length of the first layer |
wra_l => wra_l(0), -- Weight RAM address length of the first layer |
bra_l => bra_l(0), -- Bias RAM address length of the first layer |
LSbit => LSbit(0) -- Less significant bit of the first layer outputs |
) |
port map |
( |
-- Input ports |
reset => reset, |
clk => clk, |
run_in => run_in, --- Input data validation of the first layer |
m_en => lm_en(0), -- Weight and bias memory enable of the first layer |
b_sel => addr((addr'length-log2(Nlayer))-1), -- Bias select. Selects between layer or bias memories |
m_we => m_we, ----- Weight and bias memory write enable |
inputs => inputs, --- Inputs of the first layer (serial data) |
wdata => wdata, ---- Weight and bias memory write data |
addr => addr(lra_l(0)-1 downto 0), -- Weight and bias memory address of the first layer |
|
-- Output ports |
run_out => runO(0), -- Output data validation of the first layer |
rdata => lrdata(0), -- Weight and bias memory read data of the first layer |
outputs => lodata(0)((NumN(0)*NbitO(0))-1 downto 0) -- Outputs of the first layer (parallel data) |
); |
end generate; |
|
|
layers_insts: |
for i in 1 to Nlayer-1 generate |
|
-- If the previous layer (i-1) has parallel outputs and actual layer (i) has serial inputs, a serializer |
-- is inserted before the activation function (i-1). So, parallel activations functions are avoided. |
serializer: |
if (ltype_v(i-1)(2) = 'P') and (ltype_v(i)(1) = 'S') generate |
|
-- Instantiate shift-register with parallel load: |
shiftreg_parallel_load: entity work.shiftreg_pl |
generic map |
( |
Nreg => NumN(i-1), --- Number of registers in the shift-register corresponds with the number of neurons in the previous layer (i-1) |
Nbit => NbitO(i-1) --- Bit width of the registers corresponds with the bit width of the outputs of the previous layer (i-1) |
) |
port map |
( |
reset => reset, |
clk => clk, |
run_in => runO(i-1), -- Input data validation of the shift-register comes from the output data validation of the previous layer (i-1) |
inputs => lodata(i-1)((NumN(i-1)*NbitO(i-1))-1 downto 0), -- Parallel input data to the shift-register come from the previous layer (i-1) |
run_out => runA(i-1), -- Output data validation goes to the activation function of the previous layer (i-1) |
outputs => ladata(i-1)(NbitO(i-1)-1 downto 0) -- Output serial data go to the activation function of the previous layer (i-1) |
); |
|
-- Instantiate single activation function of the previous layer (i-1): |
activation_function_inst: entity work.activation_function |
generic map |
( |
f_type => ftype_v(i-1), -- Activation function type of the previous layer (i-1) |
Nbit => NbitO(i-1) --- Bit width of the outputs of the previous layer (i-1) |
) |
port map |
( |
reset => reset, |
clk => clk, |
run_in => runA(i-1), -- Input data validation comes from the shift-register |
inputs => ladata(i-1)(NbitO(i-1)-1 downto 0), -- Serial input data come from the shift-register |
run_out => runI(i-1), -- Output data validation goes to the input data validation of this layer |
outputs => lidata(i-1)(NbitO(i-1)-1 downto 0) -- Serial output data go to the inputs of this layer |
); |
|
end generate; -- serializer |
|
-- If the previous layer (i-1) has serial outputs and actual layer (i) has serial inputs, |
-- a single activation function is instantiated: |
single_activation_function: |
if (ltype_v(i-1)(2) = 'S') and (ltype_v(i)(1) = 'S') generate |
|
-- Instantiate single activation function of the previous layer (i-1): |
activation_function_inst: entity work.activation_function |
generic map |
( |
f_type => ftype_v(i-1), -- Activation function type of the previous layer (i-1) |
Nbit => NbitO(i-1) --- Bit width of the outputs of the previous layer (i-1) |
) |
port map |
( |
reset => reset, |
clk => clk, |
run_in => runO(i-1), -- Input data validation comes from the previous layer (i-1) |
inputs => lodata(i-1)(NbitO(i-1)-1 downto 0), -- Serial input data come from the previous layer (i-1) |
run_out => runI(i-1), -- Output data validation goes to the input data validation of this layer |
outputs => lidata(i-1)(NbitO(i-1)-1 downto 0) -- Serial output data go to the inputs of this layer |
); |
|
end generate; -- single_activation_function |
|
-- If the previous layer (i-1) has parallel outputs and actual layer (i) has parallel inputs, |
-- multiple parallel activation functions are instantiated: |
multiple_activation_functions: |
if (ltype_v(i-1)(2) = 'P') and (ltype_v(i)(1) = 'P') generate |
|
-- First of the parallel activation functions. This is the one which generates the output data validation |
act_function_inst_0: entity work.activation_function |
generic map |
( |
f_type => ftype_v(i-1), -- Activation function type of the previous layer (i-1) |
Nbit => NbitO(i-1) --- Bit width of the outputs of the previous layer (i-1) |
) |
port map |
( |
reset => reset, |
clk => clk, |
run_in => runO(i-1), -- Input data validation comes from the previous layer (i-1) |
inputs => lodata(i-1)(NbitO(i-1)-1 downto 0), -- First of the parallel input data wich comes from the previous layer (i-1) |
run_out => runI(i-1), -- Output data validation goes to the input data validation of this layer |
outputs => lidata(i-1)(NbitO(i-1)-1 downto 0) -- First of the parallel inputs of this layer |
); |
|
-- Rest of the parallel activation functions of the previous layer (i-1) |
multiple_activation_function_insts: |
for j in 1 to NumN(i-1)-1 generate |
activation_function_inst: entity work.activation_function |
generic map |
( |
f_type => ftype_v(i-1), -- Activation function type of the previous layer (i-1) |
Nbit => NbitO(i-1) --- Bit width of the outputs of the previous layer (i-1) |
) |
port map |
( |
reset => reset, |
clk => clk, |
run_in => runO(i-1), -- Input data validation comes from the previous layer (i-1) |
inputs => lodata(i-1)((NbitO(i-1)*(j+1))-1 downto NbitO(i-1)*j), -- Rest of the parallel input data which come from the previous layer (i-1) |
run_out => open, ------- As only one output data validation is needed, the rest ones are left unconnected |
outputs => lidata(i-1)((NbitO(i-1)*(j+1))-1 downto NbitO(i-1)*j) -- Rest of the parallel inputs of this layer |
); |
end generate; |
|
end generate; -- multiple_activation_functions |
|
-- If the previous layer (i-1) has serial outputs and actual layer (i) has parallel inputs, a parallelizer |
-- is insested after the activation function (i-1): |
parallelizer: |
if (ltype_v(i-1)(2) = 'S') and (ltype_v(i)(1) = 'P') generate |
|
-- Instantiate single activation function of the previous layer (i-1): |
activation_function_inst: entity work.activation_function |
generic map |
( |
f_type => ftype_v(i-1), |
Nbit => NbitO(i-1) |
) |
port map |
( |
reset => reset, |
clk => clk, |
run_in => runO(i-1), |
inputs => lodata(i-1)(NbitO(i-1)-1 downto 0), |
run_out => runA(i-1), |
outputs => ladata(i-1)(NbitO(i-1)-1 downto 0) |
); |
|
-- Instantiate shift-register with parallel unload: |
shiftreg_parallel_unload: entity work.shiftreg_pu |
generic map |
( |
Nreg => NumN(i-1), --- Number of registers in the shift-register corresponds with the number of neurons in the previous layer (i-1) |
Nbit => NbitO(i-1) --- Bit width of the registers corresponds with the bit width of the outputs of the previous layer (i-1) |
) |
port map |
( |
reset => reset, |
clk => clk, |
run_in => runA(i-1), -- Input data validation comes from the activation function of the previous layer (i-1) |
inputs => ladata(i-1)(NbitO(i-1)-1 downto 0), -- Serial input data |
run_out => runO(i-1), -- Output data validation goes to the input data validation of this layer |
outputs => lodata(i-1)((NumN(i-1)*NbitO(i-1))-1 downto 0) -- Parallel output data |
); |
|
end generate; -- parallelizer |
|
-- Instance the layer (i), cases SP, PS or PP: |
|
-- Serial-input parallel-output layer: |
SP_case: |
if ltype_v(i) = "SP" generate |
layerSP_top_inst: entity work.layerSP_top |
generic map |
( |
NumN => NumN(i), --- Number of neurons in layer (i) |
NumIn => NumN(i-1), -- Number of inputs, is the number of neurons in previous layer (i-1) |
NbitIn => NbitO(i-1), -- Bit width of the input data, is the bit width of output data of layer (i-1) |
NbitW => NbitW, ----- Bit width of weights and biases |
NbitOut => NbitO(i), -- Bit width of layer (i) output |
lra_l => lra_l(i), -- Layer RAM address length of layer (i) |
wra_l => wra_l(i), -- Weight RAM address length of layer (i) |
bra_l => bra_l(i), -- Bias RAM address length of layer (i) |
LSbit => LSbit(i) --- Less significant bit of layer (i) outputs |
) |
port map |
( |
-- Input ports |
reset => reset, |
clk => clk, |
run_in => runI(i-1), -- Input data validation of this layer |
m_en => lm_en(i), -- Weight and bias memory enable of this layer |
b_sel => addr((addr'length-log2(Nlayer))-1), -- Bias select. Selects between layer or bias memories |
m_we => m_we, ------ Weight and bias memory write enable |
inputs => lidata(i-1)(NbitO(i-1)-1 downto 0), -- Inputs of this layer (serial data) |
wdata => wdata, ----- Weight and bias memory write data |
addr => addr(lra_l(i)-1 downto 0), -- Weight and bias memory address of this layer |
|
-- Output ports |
run_out => runO(i), -- Output data validation of this layer |
rdata => lrdata(i), -- Weight and bias memory read data of this layer |
outputs => lodata(i)((NumN(i)*NbitO(i))-1 downto 0) -- Outputs of this layer (parallel data) |
); |
end generate; |
|
-- Parallel-input serial-output layer: |
PS_case: |
if ltype_v(i) = "PS" generate |
layerPS_top_inst: entity work.layerPS_top |
generic map |
( |
NumN => NumN(i), --- Number of neurons in layer (i) |
NumIn => NumN(i-1), -- Number of inputs, is the number of neurons in previous layer (i-1) |
NbitIn => NbitO(i-1), -- Bit width of the input data, is the bit width of output data of layer (i-1) |
NbitW => NbitW, ----- Bit width of weights and biases |
NbitOut => NbitO(i), -- Bit width of layer (i) output |
lra_l => lra_l(i), -- Layer RAM address length of layer (i) |
wra_l => wra_l(i), -- Weight RAM address length of layer (i) |
bra_l => bra_l(i), -- Bias ram address length of layer (i) |
LSbit => LSbit(i) --- Less significant bit of layer (i) outputs |
) |
port map |
( |
-- Input ports |
reset => reset, |
clk => clk, |
run_in => runI(i-1), -- Input data validation of this layer |
m_en => lm_en(i), -- Weight and bias memory enable of this layer |
b_sel => addr((addr'length-log2(Nlayer))-1), -- Bias select. Selects between layer or bias memories |
m_we => m_we, ------ Weight and bias memory write enable |
inputs => lidata(i-1)((NumN(i-1)*NbitO(i-1))-1 downto 0), -- Inputs of this layer (parallel data) |
wdata => wdata, ----- Weight and bias memory write data |
addr => addr(lra_l(i)-1 downto 0), -- Weight and bias memory address of this layer |
|
-- Output ports |
run_out => runO(i), -- Output data validation of this layer |
rdata => lrdata(i), -- Weight and bias memory read data of this layer |
outputs => lodata(i)(NbitO(i)-1 downto 0) -- Outputs of this layer (serial data) |
); |
end generate; |
|
-- Parallel-input parallel-output layer: |
PP_case: |
if ltype_v(i) = "PP" generate |
-- TODO: instance a full parallel layer. At current version this layer type has not been developed. |
-- synthesis translate_off |
assert l_type(i) /= "PP" |
report "Current version does not accept parallel-input parallel-output (PP) layer type." |
severity failure; |
-- synthesis translate_on |
-- TODO: delete above lines when instantiate the parallel-input parallel-output layer. |
end generate; |
|
end generate; -- layers_insts |
|
-- If the last layer (Nlayer-1) has parallel outputs, a serializer is inserted before the activation function: |
last_serializer: |
if (ltype_v(Nlayer-1)(2) = 'P') generate |
|
-- Instantiate shift-register with parallel load: |
last_shiftreg_parallel_load: entity work.shiftreg_pl |
generic map |
( |
Nreg => NumN(Nlayer-1), --- Number of registers corresponds with the number of neurons in the last layer (Nlayer-1) |
Nbit => NbitO(Nlayer-1) --- Bit width of the registers corresponds with the bit width of the outputs of the last layer (Nlayer-1) |
) |
port map |
( |
reset => reset, |
clk => clk, |
run_in => runO(Nlayer-1), -- Input data validation comes from the output data validation of the last layer (Nlayer-1) |
inputs => lodata(Nlayer-1)((NumN(Nlayer-1)*NbitO(Nlayer-1))-1 downto 0), -- Parallel input data come from the last layer |
run_out => runA(Nlayer-1), -- Output data validation goes to the last activation function (Nlayer-1) |
outputs => ladata(Nlayer-1)(NbitO(Nlayer-1)-1 downto 0) -- Serial output data go to the last activation function |
); |
|
last_activation_function_inst: entity work.activation_function |
generic map |
( |
f_type => ftype_v(Nlayer-1), -- Activation function type of the last layer (Nlayer-1) |
Nbit => NbitO(Nlayer-1) --- Bit width of the outputs of the last layer (Nlayer-1) |
) |
port map |
( |
reset => reset, |
clk => clk, |
run_in => runA(Nlayer-1), -- Input data validation comes from the shift-register output validation |
inputs => ladata(Nlayer-1)(NbitO(Nlayer-1)-1 downto 0), -- Serial input data come from the shift-register |
run_out => run_out, --------- Output data validation of the network |
outputs => outputs ---------- Outputs of the network (serial data) |
); |
|
end generate; -- last_serializer |
|
-- If the las layer has serial outputs: |
last_simple_activation_function: |
if (ltype_v(Nlayer-1)(2) = 'S') generate |
last_activation_function_inst: entity work.activation_function |
generic map |
( |
f_type => ftype_v(Nlayer-1), -- Activation function type of the last layer (Nlayer-1) |
Nbit => NbitO(Nlayer-1) --- Bit width of the outputs of the last layer (Nlayer-1) |
) |
port map |
( |
reset => reset, |
clk => clk, |
run_in => runO(Nlayer-1), -- Input data validation comes from the last layer (Nlayer-1) output validation |
inputs => lodata(Nlayer-1)(NbitO(Nlayer-1)-1 downto 0), -- Inputs come from the outputs of the last layer (serial data) |
run_out => run_out, --------- Output data validation of the network |
outputs => outputs ---------- Outputs of the network (serial data) |
); |
end generate; |
|
end config_structural; |
/artificial_neural_network/trunk/RTL_VHDL_files/adder_tree.vhd
0,0 → 1,134
---------------------------------------------------------------------------------- |
-- Company: |
-- Engineer: |
-- |
-- Create Date: 15:27:42 06/20/2013 |
-- Design Name: Configurable ANN |
-- Module Name: adder_tree - Behavioral |
-- Project Name: |
-- Target Devices: |
-- Tool versions: |
-- Description: Recursive adder tree |
-- |
-- Dependencies: |
-- |
-- Revision: |
-- Revision 0.01 - File Created |
-- Additional Comments: |
-- |
---------------------------------------------------------------------------------- |
|
library IEEE; |
use IEEE.STD_LOGIC_1164.ALL; |
use ieee.numeric_std.all; |
|
|
entity adder_tree is |
generic |
( |
NumIn : integer := 9; -- Number of inputs |
Nbit : integer := 12 -- Bit width of the input data |
); |
|
port |
( |
-- Input ports |
reset : in std_logic; |
clk : in std_logic; |
en : in std_logic; -- Enable |
inputs : in std_logic_vector((Nbit*NumIn)-1 downto 0); -- Input data |
|
-- Output ports |
en_out : out std_logic; -- Output enable (output data validation) |
output : out std_logic_vector(Nbit-1 downto 0) -- Output of the tree adder |
); |
end adder_tree; |
|
architecture Behavioral of adder_tree is |
|
constant NumIn2 : integer := NumIn/2; -- Number of imputs of the next adder tree layer |
|
signal next_en : std_logic := '0'; -- Next adder tree layer enable |
signal res : std_logic_vector((Nbit*((NumIn2)+(NumIn mod 2)))-1 downto 0); -- Partial results |
|
signal resL_reg : std_logic_vector((Nbit*NumIn2)-1 downto 0); |
signal resH_reg : std_logic_vector(Nbit-1 downto 0); |
begin |
|
-- Additions: |
add_proc: |
process (clk) -- Synchronous to allow pipeline |
begin |
if (clk'event and clk = '1') then |
if (reset = '1') then |
resL_reg <= (others => '0'); |
else |
if (en = '1') then |
-- Addition of inputs (2*i y 2*i+1), resulting in NumIn/2 outputs of this layer of the adder tree: |
for i in ((NumIn2)-1) downto 0 loop |
resL_reg((Nbit*(i+1))-1 downto Nbit*i) <= std_logic_vector( signed(inputs((Nbit*((2*i)+1))-1 downto Nbit*2*i)) + signed(inputs((Nbit*((2*i)+2))-1 downto Nbit*((2*i)+1))) ); |
end loop; |
end if; |
end if; |
end if; |
end process; |
|
res((Nbit*NumIn2)-1 downto 0) <= resL_reg; |
|
-- Register the uneven input (if needed): |
uneven_register: |
if (NumIn mod 2 = 1) generate |
process (clk) |
begin |
if (clk'event and clk = '1') then |
if (reset = '1') then |
resH_reg <= (others => '0'); |
else |
if (en = '1') then |
resH_reg <= inputs((Nbit*NumIn)-1 downto Nbit*(NumIn-1)); |
end if; |
end if; |
end if; |
end process; |
res((Nbit*((NumIn2)+1))-1 downto Nbit*(NumIn2)) <= resH_reg; |
end generate; |
|
process (clk) |
begin |
if (clk'event and clk = '1') then |
if reset = '1' then |
next_en <= '0'; |
else |
next_en <= en; -- Enable is delayed 1 cycle for the next layer of the adder tree |
end if; |
end if; |
end process; |
|
recursion: |
if (NumIn > 2) generate |
|
sub_adder_tree: entity work.adder_tree |
generic map |
( |
NumIn => (NumIn2)+(NumIn mod 2), |
Nbit => Nbit |
) |
port map |
( |
clk => clk, |
reset => reset, |
en => next_en, |
inputs => res, |
en_out => en_out, |
output => output -- Solution is passed from the sub-adder trees to the top adder tree |
); |
end generate; |
|
trivial_solution: |
if (NumIn = 2) generate |
en_out <= next_en; |
output <= res; -- Assign the final result to the adder tree output |
end generate; |
|
end Behavioral; |
|
/artificial_neural_network/trunk/API_header_files/ann.h
0,0 → 1,108
/* |
* ann.h |
* |
* Description: This header file helps programmers to access correctly to ANN IP core weight and bias memories. |
* User must edit ANN_BASEADDRESS, NLAYER, and definitions of layer inputs and neurons. |
* MAX_MUL macro can be calculated manually, or relay on automated calculation if NLAYER<=4. |
* A Wyb(x) macro must be declared on the code per layer of the ANN IP core. |
* Those macro declare the 2D weight arrays and 1D bias arrays needed to access ANN IP core memories. |
* |
* Created on: 17/05/2016 |
* Author: David A |
*/ |
|
#ifndef ANN_H |
#define ANN_H |
|
/* Base address of weight and bias memories of the ANN IP core */ |
// Example for Xilinx's SDK using the example wrapper for Vivado. Correct user base address must be defined here: |
#define ANN_BASEADDRESS XPAR_ANN_0_WYB_S_AXI_BASEADDR |
|
/* Number of layers */ |
#define NLAYER 4 |
|
/* Number of inputs and neurons of each layer */ |
// Add or remove as many layers as needed: |
#define NumIn0 16 |
#define NumN0 13 |
#define NumIn1 NumN0 |
#define NumN1 6 |
#define NumIn2 NumN1 |
#define NumN2 13 |
#define NumIn3 NumN2 |
#define NumN3 16 |
|
/* (optional) Redefine number of neurons in the last layer as number of outputs */ |
#define NumOut NumN3 |
|
/* Next-power-of-two of inputs and neurons of each layer */ |
// Define a next-power-of-two macro per parameter in the number of inputs and neurons of each layer list: |
// NOTE: next_2power(x) macro function calculates the next-power-of-two of x for x<=256. If x>256 it still returns 256. |
#define NumN0_b2 next_2power(NumN0) |
#define NumIn0_b2 next_2power(NumIn0) |
#define NumN1_b2 next_2power(NumN1) |
#define NumIn1_b2 next_2power(NumIn1) |
#define NumN2_b2 next_2power(NumN2) |
#define NumIn2_b2 next_2power(NumIn2) |
#define NumN3_b2 next_2power(NumN3) |
#define NumIn3_b2 next_2power(NumIn3) |
|
/* Maximum multiplication of the next-power-of-two of inputs by the next-power-of-two of neurons */ |
// MAX_MUL macro can be defined manually, or automatically if NLAYER<=4. |
// To define it manually user must determine which layer has the maximum of these products, and edit MAX_MUL definition: |
// In the example is layer 0 (or layer 3 with same MAX_MUL), 256 > 128 |
// NumIn0 = 16 ==> NumIn0_b2 = 16 |
// NumN0 = 13 ==> NumN0_b2 = 16 |
// NumN0_b2*NumIn0_b2=16*16=256 |
// NumIn1 = 13 ==> NumIn1_b2 = 16 |
// NumN1 = 6 ==> NumN1_b2 = 8 |
// NumN1_b2*NumIn1_b2=16*8=128 |
// NumIn2 = 6 ==> NumIn2_b2 = 8 |
// NumN2 = 13 ==> NumN2_b2 = 16 |
// NumN2_b2*NumIn2_b2=8*16=128 |
// NumIn3 = 13 ==> NumIn3_b2 = 16 |
// NumN3 = 16 ==> NumN3_b2 = 16 |
// NumN3_b2*NumIn3_b2=16*16=256 |
|
//#define MAX_MUL (NumN0_b2*NumIn0_b2) //Uncomment and edit this manual definition of MAX_MUL for manual definition of MAX_MUL |
|
// Automated calculation of MAX_MUL for NLAYER<=4: |
#ifndef MAX_MUL |
#if NLAYER > 4 |
#error MAX_MUL cannot be automatically calculated if NLAYER>4. Define MAX_MUL manually or complete the automaed calculation of MAX_MUL preprocessor code. |
#endif |
#define max2(x,y) ( ((x) < (y)) ? y : x ) |
#define MAX_0 (NumN0_b2*NumIn0_b2) |
#if NLAYER > 1 |
#define MAX_1 max2((NumN1_b2*NumIn1_b2),MAX_0) |
#if NLAYER > 2 |
#define MAX_2 max2((NumN2_b2*NumIn2_b2),MAX_1) |
#if NLAYER == 4 |
#define MAX_MUL max2((NumN3_b2*NumIn3_b2),MAX_2) |
#elif NLAYER == 3 |
#define MAX_MUL MAX_2 |
#endif //NLAYER == 4 |
#elif NLAYER == 2 |
#define MAX_MUL MAX_1 |
#endif //NLAYER > 2 |
#else //NLAYER == 1 |
#define MAX_MUL MAX_0 |
#endif //NLAYER > 1 |
#endif |
|
/* Definition of the macro function next_2power(x) */ |
// It calculates the next-power-of-two of x for x<=256. If x>256 it still returns 256. |
#define next_2power(x) ( ((x) > 128) ? 256 : ((x) > 64) ? 128 : ((x) > 32) ? 64 : ((x) > 16) ? 32 : ((x) > 8) ? 16 : ((x) > 4) ? 8 : ((x) > 2) ? 4 : ((x) > 1) ? 2 : 1 ) |
|
/* When this macro is expanded for a particular layer x, it declares pointers to the weight 2D array, bias 1D array, and unused spaces; and initializes them with a proper address */ |
// Declare a Wvb(x) macro per layer on the user's ANN, each time with a different layer number x, from 0 to NLAYER-1. |
// Example: For a two layer ANN (NLAYER 2) |
// Wvb(0) // declares and initializes int (*W0)[NumN0][NumIn0_b2], (*b0)[NumN0]; |
// Wyb(1) // declares and initializes int (*W1)[NumN1][NumIn1_b2], (*b1)[NumN1]; |
// The unused spaces (*NOT_EXISTx0) and (*NOT_EXISTx1) are declared in order to prevent the use of these space address for other proposes. Although it does not assure it will not be used. |
#define Wyb(x) volatile int (*W##x)[NumN##x][NumIn##x##_b2] = (void *) ANN_BASEADDRESS + MAX_MUL*2*x*sizeof(int), \ |
(*NOT_EXIST##x##0)[MAX_MUL-NumN##x*NumIn##x##_b2] = (void *) ANN_BASEADDRESS + (NumN##x*NumIn##x##_b2 + MAX_MUL*2*x)*sizeof(int), \ |
(*b##x)[NumN##x] = (void *) ANN_BASEADDRESS + MAX_MUL*(2*x+1)*sizeof(int), \ |
(*NOT_EXIST##x##1)[MAX_MUL-NumN##x] = (void *) ANN_BASEADDRESS + (NumN##x + MAX_MUL*(2*x+1))*sizeof(int); |
|
#endif // ANN_H |