----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
---- operand_ram_asym ----
|
---- operand_ram_asym ----
|
---- ----
|
---- ----
|
---- This file is part of the ----
|
---- This file is part of the ----
|
---- Modular Simultaneous Exponentiation Core project ----
|
---- Modular Simultaneous Exponentiation Core project ----
|
---- http://www.opencores.org/cores/mod_sim_exp/ ----
|
---- http://www.opencores.org/cores/mod_sim_exp/ ----
|
---- ----
|
---- ----
|
---- Description ----
|
---- Description ----
|
---- BRAM memory and logic to store the operands, due to the ----
|
---- BRAM memory and logic to store the operands, due to the ----
|
---- achitecture, a minimum depth of 2 is needed for this ----
|
---- achitecture, a minimum depth of 2 is needed for this ----
|
---- module to be inferred into blockram, this version is ----
|
---- module to be inferred into blockram, this version is ----
|
---- slightly more performant than operand_ram_gen and uses ----
|
---- slightly more performant than operand_ram_gen and uses ----
|
---- less resources. but does not work on every fpga, only ----
|
---- less resources. but does not work on every fpga, only ----
|
---- the ones that support asymmetric rams. ----
|
---- the ones that support asymmetric rams. ----
|
---- ----
|
---- ----
|
---- Dependencies: ----
|
---- Dependencies: ----
|
---- - tdpramblock_asym ----
|
---- - tdpramblock_asym ----
|
---- ----
|
---- ----
|
---- Authors: ----
|
---- Authors: ----
|
---- - Geoffrey Ottoy, DraMCo research group ----
|
---- - Geoffrey Ottoy, DraMCo research group ----
|
---- - Jonas De Craene, JonasDC@opencores.org ----
|
---- - Jonas De Craene, JonasDC@opencores.org ----
|
---- ----
|
---- ----
|
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
---- ----
|
---- ----
|
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
|
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
|
---- ----
|
---- ----
|
---- This source file may be used and distributed without ----
|
---- This source file may be used and distributed without ----
|
---- restriction provided that this copyright statement is not ----
|
---- restriction provided that this copyright statement is not ----
|
---- removed from the file and that any derivative work contains ----
|
---- removed from the file and that any derivative work contains ----
|
---- the original copyright notice and the associated disclaimer. ----
|
---- the original copyright notice and the associated disclaimer. ----
|
---- ----
|
---- ----
|
---- This source file is free software; you can redistribute it ----
|
---- This source file is free software; you can redistribute it ----
|
---- and/or modify it under the terms of the GNU Lesser General ----
|
---- and/or modify it under the terms of the GNU Lesser General ----
|
---- Public License as published by the Free Software Foundation; ----
|
---- Public License as published by the Free Software Foundation; ----
|
---- either version 2.1 of the License, or (at your option) any ----
|
---- either version 2.1 of the License, or (at your option) any ----
|
---- later version. ----
|
---- later version. ----
|
---- ----
|
---- ----
|
---- This source is distributed in the hope that it will be ----
|
---- This source is distributed in the hope that it will be ----
|
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
|
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
|
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
|
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
|
---- PURPOSE. See the GNU Lesser General Public License for more ----
|
---- PURPOSE. See the GNU Lesser General Public License for more ----
|
---- details. ----
|
---- details. ----
|
---- ----
|
---- ----
|
---- You should have received a copy of the GNU Lesser General ----
|
---- You should have received a copy of the GNU Lesser General ----
|
---- Public License along with this source; if not, download it ----
|
---- Public License along with this source; if not, download it ----
|
---- from http://www.opencores.org/lgpl.shtml ----
|
---- from http://www.opencores.org/lgpl.shtml ----
|
---- ----
|
---- ----
|
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
|
|
library ieee;
|
library ieee;
|
use ieee.std_logic_1164.all;
|
use ieee.std_logic_1164.all;
|
use ieee.std_logic_arith.all;
|
use ieee.std_logic_arith.all;
|
use ieee.std_logic_unsigned.all;
|
use ieee.std_logic_unsigned.all;
|
|
|
library mod_sim_exp;
|
library mod_sim_exp;
|
|
use mod_sim_exp.mod_sim_exp_pkg.all;
|
use mod_sim_exp.std_functions.all;
|
use mod_sim_exp.std_functions.all;
|
|
|
-- structural description of a RAM to hold the operands, with
|
-- structural description of a RAM to hold the operands, with
|
-- adjustable width (64, 128, 256, 512, 576, 640,..) and depth(nr of operands)
|
-- adjustable width (64, 128, 256, 512, 576, 640,..) and depth(nr of operands)
|
-- formula for available widths: (i*512+(0 or 64 or 128 or 256)) (i=integer number)
|
-- formula for available widths: (i*512+(0 or 64 or 128 or 256)) (i=integer number)
|
--
|
--
|
entity operand_ram_asym is
|
entity operand_ram_asym is
|
generic(
|
generic(
|
width : integer := 1536; -- width of the operands
|
width : integer := 1536; -- width of the operands
|
depth : integer := 4; -- nr of operands
|
depth : integer := 4; -- nr of operands
|
device : string := "xilinx"
|
device : string := "xilinx"
|
);
|
);
|
port(
|
port(
|
-- global ports
|
-- global ports
|
clk : in std_logic;
|
clk : in std_logic;
|
collision : out std_logic; -- 1 if simultaneous write on RAM
|
collision : out std_logic; -- 1 if simultaneous write on RAM
|
-- bus side connections (32-bit serial)
|
-- bus side connections (32-bit serial)
|
write_operand : in std_logic; -- write_enable
|
write_operand : in std_logic; -- write_enable
|
operand_in_sel : in std_logic_vector(log2(depth)-1 downto 0); -- operand to write to
|
operand_in_sel : in std_logic_vector(log2(depth)-1 downto 0); -- operand to write to
|
operand_addr : in std_logic_vector(log2(width/32)-1 downto 0); -- address of operand word to write
|
operand_addr : in std_logic_vector(log2(width/32)-1 downto 0); -- address of operand word to write
|
operand_in : in std_logic_vector(31 downto 0); -- operand word(32-bit) to write
|
operand_in : in std_logic_vector(31 downto 0); -- operand word(32-bit) to write
|
result_out : out std_logic_vector(31 downto 0); -- operand out, reading is always result operand
|
result_out : out std_logic_vector(31 downto 0); -- operand out, reading is always result operand
|
operand_out_sel : in std_logic_vector(log2(depth)-1 downto 0); -- operand to give to multiplier
|
operand_out_sel : in std_logic_vector(log2(depth)-1 downto 0); -- operand to give to multiplier
|
-- multiplier side connections (width-bit parallel)
|
-- multiplier side connections (width-bit parallel)
|
result_dest_op : in std_logic_vector(log2(depth)-1 downto 0); -- operand select for result
|
result_dest_op : in std_logic_vector(log2(depth)-1 downto 0); -- operand select for result
|
operand_out : out std_logic_vector(width-1 downto 0); -- operand out to multiplier
|
operand_out : out std_logic_vector(width-1 downto 0); -- operand out to multiplier
|
write_result : in std_logic; -- write enable for multiplier side
|
write_result : in std_logic; -- write enable for multiplier side
|
result_in : in std_logic_vector(width-1 downto 0) -- result to write from multiplier
|
result_in : in std_logic_vector(width-1 downto 0) -- result to write from multiplier
|
);
|
);
|
end operand_ram_asym;
|
end operand_ram_asym;
|
|
|
architecture Behavioral of operand_ram_asym is
|
architecture Behavioral of operand_ram_asym is
|
-- contstants
|
-- contstants
|
constant RAMblock_maxwidth : integer := 512;
|
constant RAMblock_maxwidth : integer := 512;
|
constant nrRAMblocks_full : integer := width/RAMblock_maxwidth;
|
constant nrRAMblocks_full : integer := width/RAMblock_maxwidth;
|
constant RAMblock_part : integer := width rem RAMblock_maxwidth;
|
constant RAMblock_part : integer := width rem RAMblock_maxwidth;
|
constant RAMblock_part_width : integer := width-(nrRAMblocks_full*RAMblock_maxwidth);
|
constant RAMblock_part_width : integer := width-(nrRAMblocks_full*RAMblock_maxwidth);
|
constant RAMselect_aw : integer := log2(width/32)-log2(nrRAMblocks_full/32);
|
constant RAMselect_aw : integer := log2(width/32)-log2(nrRAMblocks_full/32);
|
|
|
-- internal signals
|
-- internal signals
|
signal mult_op_sel : std_logic_vector(log2(depth)-1 downto 0);
|
signal mult_op_sel : std_logic_vector(log2(depth)-1 downto 0);
|
signal write_operand_i : std_logic;
|
signal write_operand_i : std_logic;
|
begin
|
begin
|
-- WARNING: Very Important!
|
-- WARNING: Very Important!
|
-- wea & web signals must never be high at the same time !!
|
-- wea & web signals must never be high at the same time !!
|
-- web has priority
|
-- web has priority
|
write_operand_i <= write_operand and not write_result; -- portB has write priority
|
write_operand_i <= write_operand and not write_result; -- portB has write priority
|
collision <= write_operand and write_result;
|
collision <= write_operand and write_result;
|
|
|
-- when multiplier is writing back result, select the result address
|
-- when multiplier is writing back result, select the result address
|
with write_result select
|
with write_result select
|
mult_op_sel <= result_dest_op when '1',
|
mult_op_sel <= result_dest_op when '1',
|
operand_out_sel when others;
|
operand_out_sel when others;
|
|
|
-- generate (width/512) ramblocks with a given depth
|
-- generate (width/512) ramblocks with a given depth
|
-- these rams are tyed together to form the following structure
|
-- these rams are tyed together to form the following structure
|
-- True dual port ram:
|
-- True dual port ram:
|
-- - PORT A : 32-bit write | 32-bit read
|
-- - PORT A : 32-bit write | 32-bit read
|
-- - PORT B : (width)-bit write | (width)-bit read
|
-- - PORT B : (width)-bit write | (width)-bit read
|
--
|
--
|
single_block : if (width <= RAMblock_maxwidth) generate
|
single_block : if (width <= RAMblock_maxwidth) generate
|
-- signals for single block
|
-- signals for single block
|
signal addrA_single : std_logic_vector(log2(width*depth/32)-1 downto 0);
|
signal addrA_single : std_logic_vector(log2(width*depth/32)-1 downto 0);
|
begin
|
begin
|
addrA_single <= operand_in_sel & operand_addr;
|
addrA_single <= operand_in_sel & operand_addr;
|
ramblock : entity mod_sim_exp.tdpramblock_asym
|
ramblock : tdpramblock_asym
|
generic map(
|
generic map(
|
depth => depth,
|
depth => depth,
|
width => width,
|
width => width,
|
device => device
|
device => device
|
)
|
)
|
port map(
|
port map(
|
clk => clk,
|
clk => clk,
|
-- port A 32-bit
|
-- port A 32-bit
|
addrA => addrA_single,
|
addrA => addrA_single,
|
weA => write_operand_i,
|
weA => write_operand_i,
|
dinA => operand_in,
|
dinA => operand_in,
|
doutA => result_out,
|
doutA => result_out,
|
-- port B (width)-bit
|
-- port B (width)-bit
|
addrB => mult_op_sel,
|
addrB => mult_op_sel,
|
weB => write_result,
|
weB => write_result,
|
dinB => result_in,
|
dinB => result_in,
|
doutB => operand_out
|
doutB => operand_out
|
);
|
);
|
end generate;
|
end generate;
|
|
|
multiple_full_blocks : if (width > RAMblock_maxwidth) generate
|
multiple_full_blocks : if (width > RAMblock_maxwidth) generate
|
-- signals for multiple blocks
|
-- signals for multiple blocks
|
type wordsplit is array (nrRAMblocks_full downto 0) of std_logic_vector(31 downto 0);
|
type wordsplit is array (nrRAMblocks_full downto 0) of std_logic_vector(31 downto 0);
|
signal doutA_RAM : wordsplit;
|
signal doutA_RAM : wordsplit;
|
signal addrA : std_logic_vector(log2(RAMblock_maxwidth*depth/32)-1 downto 0);
|
signal addrA : std_logic_vector(log2(RAMblock_maxwidth*depth/32)-1 downto 0);
|
signal weA_RAM : std_logic_vector(nrRAMblocks_full-1 downto 0);
|
signal weA_RAM : std_logic_vector(nrRAMblocks_full-1 downto 0);
|
begin
|
begin
|
ramblocks_full : for i in 0 to nrRAMblocks_full generate
|
ramblocks_full : for i in 0 to nrRAMblocks_full generate
|
-- port A signals
|
-- port A signals
|
addrA <= operand_in_sel & operand_addr(log2(RAMblock_maxwidth/32)-1 downto 0);
|
addrA <= operand_in_sel & operand_addr(log2(RAMblock_maxwidth/32)-1 downto 0);
|
|
|
full_ones : if (i < nrRAMblocks_full) generate
|
full_ones : if (i < nrRAMblocks_full) generate
|
ramblock_full : entity mod_sim_exp.tdpramblock_asym
|
ramblock_full : tdpramblock_asym
|
generic map(
|
generic map(
|
depth => depth,
|
depth => depth,
|
width => RAMblock_maxwidth,
|
width => RAMblock_maxwidth,
|
device => device
|
device => device
|
)
|
)
|
port map(
|
port map(
|
clk => clk,
|
clk => clk,
|
-- port A 32-bit
|
-- port A 32-bit
|
addrA => addrA,
|
addrA => addrA,
|
weA => weA_RAM(i),
|
weA => weA_RAM(i),
|
dinA => operand_in,
|
dinA => operand_in,
|
doutA => doutA_RAM(i),
|
doutA => doutA_RAM(i),
|
-- port B (width)-bit
|
-- port B (width)-bit
|
addrB => mult_op_sel,
|
addrB => mult_op_sel,
|
weB => write_result,
|
weB => write_result,
|
dinB => result_in((i+1)*RAMblock_maxwidth-1 downto i*RAMblock_maxwidth),
|
dinB => result_in((i+1)*RAMblock_maxwidth-1 downto i*RAMblock_maxwidth),
|
doutB => operand_out((i+1)*RAMblock_maxwidth-1 downto i*RAMblock_maxwidth)
|
doutB => operand_out((i+1)*RAMblock_maxwidth-1 downto i*RAMblock_maxwidth)
|
);
|
);
|
-- weA, weB
|
-- weA, weB
|
process (write_operand_i, operand_addr)
|
process (write_operand_i, operand_addr)
|
begin
|
begin
|
if operand_addr(log2(width/32)-1 downto log2(RAMblock_maxwidth/32)) = conv_std_logic_vector(i,RAMselect_aw) then
|
if operand_addr(log2(width/32)-1 downto log2(RAMblock_maxwidth/32)) = conv_std_logic_vector(i,RAMselect_aw) then
|
weA_RAM(i) <= write_operand_i;
|
weA_RAM(i) <= write_operand_i;
|
else
|
else
|
weA_RAM(i) <= '0';
|
weA_RAM(i) <= '0';
|
end if;
|
end if;
|
end process;
|
end process;
|
only_once : if (i = 0) generate
|
only_once : if (i = 0) generate
|
-- port A read mux
|
-- port A read mux
|
only_full_blocks : if (RAMblock_part = 0) generate
|
only_full_blocks : if (RAMblock_part = 0) generate
|
result_out <= doutA_RAM(conv_integer(operand_addr(log2(width/32)-1 downto log2(RAMblock_maxwidth/32))))
|
result_out <= doutA_RAM(conv_integer(operand_addr(log2(width/32)-1 downto log2(RAMblock_maxwidth/32))))
|
when (conv_integer(operand_addr(log2(width/32)-1 downto log2(RAMblock_maxwidth/32)))<nrRAMblocks_full)
|
when (conv_integer(operand_addr(log2(width/32)-1 downto log2(RAMblock_maxwidth/32)))<nrRAMblocks_full)
|
else (others=>'0');
|
else (others=>'0');
|
end generate;
|
end generate;
|
with_extra_part : if (RAMblock_part /= 0) generate
|
with_extra_part : if (RAMblock_part /= 0) generate
|
result_out <= doutA_RAM(conv_integer(operand_addr(log2(width/32)-1 downto log2(RAMblock_maxwidth/32))))
|
result_out <= doutA_RAM(conv_integer(operand_addr(log2(width/32)-1 downto log2(RAMblock_maxwidth/32))))
|
when (conv_integer(operand_addr(log2(width/32)-1 downto log2(RAMblock_maxwidth/32)))<nrRAMblocks_full+1)
|
when (conv_integer(operand_addr(log2(width/32)-1 downto log2(RAMblock_maxwidth/32)))<nrRAMblocks_full+1)
|
else (others=>'0');
|
else (others=>'0');
|
end generate;
|
end generate;
|
end generate;
|
end generate;
|
end generate;
|
end generate;
|
|
|
optional_part : if (i = nrRAMblocks_full) and (RAMblock_part /= 0) generate
|
optional_part : if (i = nrRAMblocks_full) and (RAMblock_part /= 0) generate
|
-- signals for part
|
-- signals for part
|
signal addrA_part : std_logic_vector(log2(RAMblock_part_width*depth/32)-1 downto 0);
|
signal addrA_part : std_logic_vector(log2(RAMblock_part_width*depth/32)-1 downto 0);
|
signal weA_part : std_logic;
|
signal weA_part : std_logic;
|
begin
|
begin
|
addrA_part <= operand_in_sel & operand_addr(log2(RAMblock_part_width/32)-1 downto 0);
|
addrA_part <= operand_in_sel & operand_addr(log2(RAMblock_part_width/32)-1 downto 0);
|
ramblock_part : entity mod_sim_exp.tdpramblock_asym
|
ramblock_part : tdpramblock_asym
|
generic map(
|
generic map(
|
depth => depth,
|
depth => depth,
|
width => RAMblock_part_width,
|
width => RAMblock_part_width,
|
device => device
|
device => device
|
)
|
)
|
port map(
|
port map(
|
clk => clk,
|
clk => clk,
|
-- port A 32-bit
|
-- port A 32-bit
|
addrA => addrA_part,
|
addrA => addrA_part,
|
weA => weA_part,
|
weA => weA_part,
|
dinA => operand_in,
|
dinA => operand_in,
|
doutA => doutA_RAM(i),
|
doutA => doutA_RAM(i),
|
-- port B (width)-bit
|
-- port B (width)-bit
|
addrB => mult_op_sel,
|
addrB => mult_op_sel,
|
weB => write_result,
|
weB => write_result,
|
dinB => result_in(width-1 downto i*RAMblock_maxwidth),
|
dinB => result_in(width-1 downto i*RAMblock_maxwidth),
|
doutB => operand_out(width-1 downto i*RAMblock_maxwidth)
|
doutB => operand_out(width-1 downto i*RAMblock_maxwidth)
|
);
|
);
|
-- weA, weB part
|
-- weA, weB part
|
process (write_operand_i, operand_addr)
|
process (write_operand_i, operand_addr)
|
begin
|
begin
|
if operand_addr(log2(width/32)-1 downto log2(RAMblock_maxwidth/32)) = conv_std_logic_vector(i,RAMselect_aw) then
|
if operand_addr(log2(width/32)-1 downto log2(RAMblock_maxwidth/32)) = conv_std_logic_vector(i,RAMselect_aw) then
|
weA_part <= write_operand_i;
|
weA_part <= write_operand_i;
|
else
|
else
|
weA_part <= '0';
|
weA_part <= '0';
|
end if;
|
end if;
|
end process;
|
end process;
|
end generate;
|
end generate;
|
end generate;
|
end generate;
|
end generate;
|
end generate;
|
|
|
end Behavioral;
|
end Behavioral;
|
|
|