1 |
67 |
JonasDC |
----------------------------------------------------------------------
|
2 |
|
|
---- operand_ram_asym ----
|
3 |
|
|
---- ----
|
4 |
|
|
---- This file is part of the ----
|
5 |
|
|
---- Modular Simultaneous Exponentiation Core project ----
|
6 |
|
|
---- http://www.opencores.org/cores/mod_sim_exp/ ----
|
7 |
|
|
---- ----
|
8 |
|
|
---- Description ----
|
9 |
|
|
---- BRAM memory and logic to store the operands, due to the ----
|
10 |
|
|
---- achitecture, a minimum depth of 2 is needed for this ----
|
11 |
|
|
---- module to be inferred into blockram, this version is ----
|
12 |
|
|
---- slightly more performant than operand_ram_gen and uses ----
|
13 |
|
|
---- less resources. but does not work on every fpga, only ----
|
14 |
|
|
---- the ones that support asymmetric rams. ----
|
15 |
|
|
---- ----
|
16 |
|
|
---- Dependencies: ----
|
17 |
|
|
---- - tdpramblock_asym ----
|
18 |
|
|
---- ----
|
19 |
|
|
---- Authors: ----
|
20 |
|
|
---- - Geoffrey Ottoy, DraMCo research group ----
|
21 |
|
|
---- - Jonas De Craene, JonasDC@opencores.org ----
|
22 |
|
|
---- ----
|
23 |
|
|
----------------------------------------------------------------------
|
24 |
|
|
---- ----
|
25 |
|
|
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
|
26 |
|
|
---- ----
|
27 |
|
|
---- This source file may be used and distributed without ----
|
28 |
|
|
---- restriction provided that this copyright statement is not ----
|
29 |
|
|
---- removed from the file and that any derivative work contains ----
|
30 |
|
|
---- the original copyright notice and the associated disclaimer. ----
|
31 |
|
|
---- ----
|
32 |
|
|
---- This source file is free software; you can redistribute it ----
|
33 |
|
|
---- and/or modify it under the terms of the GNU Lesser General ----
|
34 |
|
|
---- Public License as published by the Free Software Foundation; ----
|
35 |
|
|
---- either version 2.1 of the License, or (at your option) any ----
|
36 |
|
|
---- later version. ----
|
37 |
|
|
---- ----
|
38 |
|
|
---- This source is distributed in the hope that it will be ----
|
39 |
|
|
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
|
40 |
|
|
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
|
41 |
|
|
---- PURPOSE. See the GNU Lesser General Public License for more ----
|
42 |
|
|
---- details. ----
|
43 |
|
|
---- ----
|
44 |
|
|
---- You should have received a copy of the GNU Lesser General ----
|
45 |
|
|
---- Public License along with this source; if not, download it ----
|
46 |
|
|
---- from http://www.opencores.org/lgpl.shtml ----
|
47 |
|
|
---- ----
|
48 |
|
|
----------------------------------------------------------------------
|
49 |
|
|
|
50 |
|
|
library ieee;
|
51 |
|
|
use ieee.std_logic_1164.all;
|
52 |
|
|
use ieee.std_logic_arith.all;
|
53 |
|
|
use ieee.std_logic_unsigned.all;
|
54 |
|
|
|
55 |
|
|
library mod_sim_exp;
|
56 |
81 |
JonasDC |
use mod_sim_exp.mod_sim_exp_pkg.all;
|
57 |
67 |
JonasDC |
use mod_sim_exp.std_functions.all;
|
58 |
|
|
|
59 |
|
|
-- structural description of a RAM to hold the operands, with
|
60 |
|
|
-- adjustable width (64, 128, 256, 512, 576, 640,..) and depth(nr of operands)
|
61 |
|
|
-- formula for available widths: (i*512+(0 or 64 or 128 or 256)) (i=integer number)
|
62 |
|
|
--
|
63 |
|
|
entity operand_ram_asym is
|
64 |
|
|
generic(
|
65 |
|
|
width : integer := 1536; -- width of the operands
|
66 |
|
|
depth : integer := 4; -- nr of operands
|
67 |
|
|
device : string := "xilinx"
|
68 |
|
|
);
|
69 |
|
|
port(
|
70 |
|
|
-- global ports
|
71 |
|
|
collision : out std_logic; -- 1 if simultaneous write on RAM
|
72 |
|
|
-- bus side connections (32-bit serial)
|
73 |
94 |
JonasDC |
bus_clk : in std_logic;
|
74 |
67 |
JonasDC |
write_operand : in std_logic; -- write_enable
|
75 |
|
|
operand_in_sel : in std_logic_vector(log2(depth)-1 downto 0); -- operand to write to
|
76 |
|
|
operand_addr : in std_logic_vector(log2(width/32)-1 downto 0); -- address of operand word to write
|
77 |
|
|
operand_in : in std_logic_vector(31 downto 0); -- operand word(32-bit) to write
|
78 |
|
|
result_out : out std_logic_vector(31 downto 0); -- operand out, reading is always result operand
|
79 |
|
|
operand_out_sel : in std_logic_vector(log2(depth)-1 downto 0); -- operand to give to multiplier
|
80 |
|
|
-- multiplier side connections (width-bit parallel)
|
81 |
94 |
JonasDC |
core_clk : in std_logic;
|
82 |
67 |
JonasDC |
result_dest_op : in std_logic_vector(log2(depth)-1 downto 0); -- operand select for result
|
83 |
|
|
operand_out : out std_logic_vector(width-1 downto 0); -- operand out to multiplier
|
84 |
|
|
write_result : in std_logic; -- write enable for multiplier side
|
85 |
|
|
result_in : in std_logic_vector(width-1 downto 0) -- result to write from multiplier
|
86 |
|
|
);
|
87 |
|
|
end operand_ram_asym;
|
88 |
|
|
|
89 |
|
|
architecture Behavioral of operand_ram_asym is
|
90 |
|
|
-- contstants
|
91 |
|
|
constant RAMblock_maxwidth : integer := 512;
|
92 |
|
|
constant nrRAMblocks_full : integer := width/RAMblock_maxwidth;
|
93 |
|
|
constant RAMblock_part : integer := width rem RAMblock_maxwidth;
|
94 |
|
|
constant RAMblock_part_width : integer := width-(nrRAMblocks_full*RAMblock_maxwidth);
|
95 |
|
|
constant RAMselect_aw : integer := log2(width/32)-log2(nrRAMblocks_full/32);
|
96 |
|
|
|
97 |
|
|
-- internal signals
|
98 |
|
|
signal mult_op_sel : std_logic_vector(log2(depth)-1 downto 0);
|
99 |
|
|
signal write_operand_i : std_logic;
|
100 |
|
|
begin
|
101 |
|
|
-- WARNING: Very Important!
|
102 |
|
|
-- wea & web signals must never be high at the same time !!
|
103 |
|
|
-- web has priority
|
104 |
|
|
write_operand_i <= write_operand and not write_result; -- portB has write priority
|
105 |
|
|
collision <= write_operand and write_result;
|
106 |
|
|
|
107 |
|
|
-- when multiplier is writing back result, select the result address
|
108 |
|
|
with write_result select
|
109 |
|
|
mult_op_sel <= result_dest_op when '1',
|
110 |
|
|
operand_out_sel when others;
|
111 |
|
|
|
112 |
|
|
-- generate (width/512) ramblocks with a given depth
|
113 |
|
|
-- these rams are tyed together to form the following structure
|
114 |
|
|
-- True dual port ram:
|
115 |
|
|
-- - PORT A : 32-bit write | 32-bit read
|
116 |
|
|
-- - PORT B : (width)-bit write | (width)-bit read
|
117 |
|
|
--
|
118 |
|
|
single_block : if (width <= RAMblock_maxwidth) generate
|
119 |
|
|
-- signals for single block
|
120 |
|
|
signal addrA_single : std_logic_vector(log2(width*depth/32)-1 downto 0);
|
121 |
|
|
begin
|
122 |
|
|
addrA_single <= operand_in_sel & operand_addr;
|
123 |
81 |
JonasDC |
ramblock : tdpramblock_asym
|
124 |
67 |
JonasDC |
generic map(
|
125 |
|
|
depth => depth,
|
126 |
|
|
width => width,
|
127 |
|
|
device => device
|
128 |
|
|
)
|
129 |
|
|
port map(
|
130 |
|
|
-- port A 32-bit
|
131 |
94 |
JonasDC |
clkA => bus_clk,
|
132 |
67 |
JonasDC |
addrA => addrA_single,
|
133 |
|
|
weA => write_operand_i,
|
134 |
|
|
dinA => operand_in,
|
135 |
|
|
doutA => result_out,
|
136 |
|
|
-- port B (width)-bit
|
137 |
94 |
JonasDC |
clkB => core_clk,
|
138 |
67 |
JonasDC |
addrB => mult_op_sel,
|
139 |
|
|
weB => write_result,
|
140 |
|
|
dinB => result_in,
|
141 |
|
|
doutB => operand_out
|
142 |
|
|
);
|
143 |
|
|
end generate;
|
144 |
|
|
|
145 |
|
|
multiple_full_blocks : if (width > RAMblock_maxwidth) generate
|
146 |
|
|
-- signals for multiple blocks
|
147 |
|
|
type wordsplit is array (nrRAMblocks_full downto 0) of std_logic_vector(31 downto 0);
|
148 |
|
|
signal doutA_RAM : wordsplit;
|
149 |
|
|
signal addrA : std_logic_vector(log2(RAMblock_maxwidth*depth/32)-1 downto 0);
|
150 |
|
|
signal weA_RAM : std_logic_vector(nrRAMblocks_full-1 downto 0);
|
151 |
|
|
begin
|
152 |
|
|
ramblocks_full : for i in 0 to nrRAMblocks_full generate
|
153 |
|
|
-- port A signals
|
154 |
|
|
addrA <= operand_in_sel & operand_addr(log2(RAMblock_maxwidth/32)-1 downto 0);
|
155 |
|
|
|
156 |
|
|
full_ones : if (i < nrRAMblocks_full) generate
|
157 |
81 |
JonasDC |
ramblock_full : tdpramblock_asym
|
158 |
67 |
JonasDC |
generic map(
|
159 |
|
|
depth => depth,
|
160 |
|
|
width => RAMblock_maxwidth,
|
161 |
|
|
device => device
|
162 |
|
|
)
|
163 |
|
|
port map(
|
164 |
|
|
-- port A 32-bit
|
165 |
94 |
JonasDC |
clkA => bus_clk,
|
166 |
67 |
JonasDC |
addrA => addrA,
|
167 |
|
|
weA => weA_RAM(i),
|
168 |
|
|
dinA => operand_in,
|
169 |
|
|
doutA => doutA_RAM(i),
|
170 |
|
|
-- port B (width)-bit
|
171 |
94 |
JonasDC |
clkB => core_clk,
|
172 |
67 |
JonasDC |
addrB => mult_op_sel,
|
173 |
|
|
weB => write_result,
|
174 |
|
|
dinB => result_in((i+1)*RAMblock_maxwidth-1 downto i*RAMblock_maxwidth),
|
175 |
|
|
doutB => operand_out((i+1)*RAMblock_maxwidth-1 downto i*RAMblock_maxwidth)
|
176 |
|
|
);
|
177 |
|
|
-- weA, weB
|
178 |
69 |
JonasDC |
process (write_operand_i, operand_addr)
|
179 |
67 |
JonasDC |
begin
|
180 |
|
|
if operand_addr(log2(width/32)-1 downto log2(RAMblock_maxwidth/32)) = conv_std_logic_vector(i,RAMselect_aw) then
|
181 |
|
|
weA_RAM(i) <= write_operand_i;
|
182 |
|
|
else
|
183 |
|
|
weA_RAM(i) <= '0';
|
184 |
|
|
end if;
|
185 |
|
|
end process;
|
186 |
|
|
only_once : if (i = 0) generate
|
187 |
|
|
-- port A read mux
|
188 |
|
|
only_full_blocks : if (RAMblock_part = 0) generate
|
189 |
|
|
result_out <= doutA_RAM(conv_integer(operand_addr(log2(width/32)-1 downto log2(RAMblock_maxwidth/32))))
|
190 |
|
|
when (conv_integer(operand_addr(log2(width/32)-1 downto log2(RAMblock_maxwidth/32)))<nrRAMblocks_full)
|
191 |
|
|
else (others=>'0');
|
192 |
|
|
end generate;
|
193 |
|
|
with_extra_part : if (RAMblock_part /= 0) generate
|
194 |
|
|
result_out <= doutA_RAM(conv_integer(operand_addr(log2(width/32)-1 downto log2(RAMblock_maxwidth/32))))
|
195 |
|
|
when (conv_integer(operand_addr(log2(width/32)-1 downto log2(RAMblock_maxwidth/32)))<nrRAMblocks_full+1)
|
196 |
|
|
else (others=>'0');
|
197 |
|
|
end generate;
|
198 |
|
|
end generate;
|
199 |
|
|
end generate;
|
200 |
|
|
|
201 |
|
|
optional_part : if (i = nrRAMblocks_full) and (RAMblock_part /= 0) generate
|
202 |
|
|
-- signals for part
|
203 |
|
|
signal addrA_part : std_logic_vector(log2(RAMblock_part_width*depth/32)-1 downto 0);
|
204 |
|
|
signal weA_part : std_logic;
|
205 |
|
|
begin
|
206 |
|
|
addrA_part <= operand_in_sel & operand_addr(log2(RAMblock_part_width/32)-1 downto 0);
|
207 |
81 |
JonasDC |
ramblock_part : tdpramblock_asym
|
208 |
67 |
JonasDC |
generic map(
|
209 |
|
|
depth => depth,
|
210 |
|
|
width => RAMblock_part_width,
|
211 |
|
|
device => device
|
212 |
|
|
)
|
213 |
|
|
port map(
|
214 |
|
|
-- port A 32-bit
|
215 |
94 |
JonasDC |
clkA => bus_clk,
|
216 |
67 |
JonasDC |
addrA => addrA_part,
|
217 |
|
|
weA => weA_part,
|
218 |
|
|
dinA => operand_in,
|
219 |
|
|
doutA => doutA_RAM(i),
|
220 |
|
|
-- port B (width)-bit
|
221 |
94 |
JonasDC |
clkB => core_clk,
|
222 |
67 |
JonasDC |
addrB => mult_op_sel,
|
223 |
|
|
weB => write_result,
|
224 |
|
|
dinB => result_in(width-1 downto i*RAMblock_maxwidth),
|
225 |
|
|
doutB => operand_out(width-1 downto i*RAMblock_maxwidth)
|
226 |
|
|
);
|
227 |
|
|
-- weA, weB part
|
228 |
69 |
JonasDC |
process (write_operand_i, operand_addr)
|
229 |
67 |
JonasDC |
begin
|
230 |
|
|
if operand_addr(log2(width/32)-1 downto log2(RAMblock_maxwidth/32)) = conv_std_logic_vector(i,RAMselect_aw) then
|
231 |
|
|
weA_part <= write_operand_i;
|
232 |
|
|
else
|
233 |
|
|
weA_part <= '0';
|
234 |
|
|
end if;
|
235 |
|
|
end process;
|
236 |
|
|
end generate;
|
237 |
|
|
end generate;
|
238 |
|
|
end generate;
|
239 |
|
|
|
240 |
|
|
end Behavioral;
|