1 |
2 |
danv |
--------------------------------------------------------------------------------
|
2 |
|
|
--
|
3 |
3 |
danv |
-- Copyright 2020
|
4 |
|
|
-- ASTRON (Netherlands Institute for Radio Astronomy) <http://www.astron.nl/>
|
5 |
|
|
-- P.O.Box 2, 7990 AA Dwingeloo, The Netherlands
|
6 |
|
|
--
|
7 |
|
|
-- Licensed under the Apache License, Version 2.0 (the "License");
|
8 |
|
|
-- you may not use this file except in compliance with the License.
|
9 |
|
|
-- You may obtain a copy of the License at
|
10 |
|
|
--
|
11 |
|
|
-- http://www.apache.org/licenses/LICENSE-2.0
|
12 |
|
|
--
|
13 |
|
|
-- Unless required by applicable law or agreed to in writing, software
|
14 |
|
|
-- distributed under the License is distributed on an "AS IS" BASIS,
|
15 |
|
|
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
16 |
|
|
-- See the License for the specific language governing permissions and
|
17 |
|
|
-- limitations under the License.
|
18 |
2 |
danv |
--
|
19 |
|
|
--------------------------------------------------------------------------------
|
20 |
|
|
|
21 |
4 |
danv |
library ieee, common_pkg_lib, common_components_lib, astron_multiplier_lib;
|
22 |
2 |
danv |
use IEEE.std_logic_1164.all;
|
23 |
4 |
danv |
--USE technology_lib.technology_select_pkg.ALL;
|
24 |
2 |
danv |
use common_pkg_lib.common_pkg.all;
|
25 |
|
|
|
26 |
|
|
entity rTwoWMul is
|
27 |
|
|
generic (
|
28 |
4 |
danv |
g_technology : NATURAL := 0;
|
29 |
2 |
danv |
g_stage : natural := 1;
|
30 |
|
|
g_lat : natural := 3+1 -- 3 for mult, 1 for round
|
31 |
|
|
);
|
32 |
|
|
port (
|
33 |
|
|
clk : in std_logic;
|
34 |
|
|
rst : in std_logic;
|
35 |
|
|
weight_re : in std_logic_vector;
|
36 |
|
|
weight_im : in std_logic_vector;
|
37 |
|
|
in_re : in std_logic_vector;
|
38 |
|
|
in_im : in std_logic_vector;
|
39 |
|
|
in_val : in std_logic;
|
40 |
|
|
in_sel : in std_logic;
|
41 |
|
|
out_re : out std_logic_vector;
|
42 |
|
|
out_im : out std_logic_vector;
|
43 |
|
|
out_val : out std_logic
|
44 |
|
|
);
|
45 |
|
|
end entity rTwoWMul;
|
46 |
|
|
|
47 |
|
|
architecture str of rTwoWMul is
|
48 |
|
|
|
49 |
|
|
-- Use multiplier product truncate or signed rounding (= away from zero). On hardware for Fsub in
|
50 |
|
|
-- Apertif and using the WG at various frequencies at subband or between subbands it appears that
|
51 |
|
|
-- using truncate or sround does not make a noticable difference in the SST. Still choose to use
|
52 |
|
|
-- signed rounding to preserve zero DC.
|
53 |
|
|
constant c_use_truncate : boolean := true; --false;
|
54 |
|
|
|
55 |
|
|
-- Derive the common_complex_mult g_pipeline_* values from g_lat. The sum c_total_lat = g_lat, so that g_lat defines
|
56 |
|
|
-- the total latency from in_* to out_*.
|
57 |
|
|
|
58 |
|
|
-- DSP multiplier IP
|
59 |
|
|
constant c_dsp_mult_lat : natural := 3;
|
60 |
|
|
|
61 |
|
|
-- Pipeline multiplier product rounding from c_prod_w via c_round_w to c_out_dat_w
|
62 |
|
|
constant c_round_lat : natural := sel_a_b(g_lat > c_dsp_mult_lat, 1, 0); -- allocate 1 pipeline for round
|
63 |
|
|
constant c_lat : natural := g_lat - c_round_lat; -- allocate remaining pipeline to multiplier
|
64 |
|
|
|
65 |
|
|
constant c_mult_input_lat : natural := sel_a_b(c_lat>1, 1, 0); -- second priority use DSP pipeline input
|
66 |
|
|
constant c_mult_product_lat : natural := 0;
|
67 |
|
|
constant c_mult_adder_lat : natural := sel_a_b(c_lat>2, 1, 0); -- third priority use DSP internal product-sum pipeline
|
68 |
|
|
constant c_mult_extra_lat : natural := sel_a_b(c_lat>3, c_lat-3, 0); -- remaining extra pipelining in logic
|
69 |
|
|
constant c_mult_output_lat : natural := sel_a_b(c_lat>0, 1, 0) + c_mult_extra_lat; -- first priority use DSP pipeline output
|
70 |
|
|
constant c_mult_lat : natural := c_mult_input_lat + c_mult_product_lat + c_mult_adder_lat + c_mult_output_lat;
|
71 |
|
|
|
72 |
|
|
-- Total input to output latency
|
73 |
|
|
constant c_total_lat : natural := c_mult_lat + c_round_lat;
|
74 |
|
|
|
75 |
|
|
-- Quantization
|
76 |
|
|
constant c_in_dat_w : natural:= in_re'length;
|
77 |
|
|
constant c_weight_w : natural:= weight_re'length;
|
78 |
|
|
constant c_prod_w : natural:= c_in_dat_w+c_weight_w+1;
|
79 |
|
|
constant c_round_w : natural:= c_weight_w-c_sign_w; -- the weights are normalized
|
80 |
|
|
constant c_out_dat_w : natural:= out_re'length;
|
81 |
|
|
|
82 |
|
|
signal in_re_dly : std_logic_vector(in_re'range);
|
83 |
|
|
signal in_im_dly : std_logic_vector(in_re'range);
|
84 |
|
|
signal product_re : std_logic_vector(c_prod_w-1 downto 0);
|
85 |
|
|
signal product_im : std_logic_vector(c_prod_w-1 downto 0);
|
86 |
|
|
signal round_re : std_logic_vector(out_re'range);
|
87 |
|
|
signal round_im : std_logic_vector(out_re'range);
|
88 |
|
|
signal out_sel : std_logic;
|
89 |
|
|
|
90 |
|
|
begin
|
91 |
|
|
|
92 |
|
|
-- Total latency check
|
93 |
|
|
ASSERT c_total_lat = g_lat
|
94 |
|
|
REPORT "rTwoWMul: total pipeline error"
|
95 |
|
|
SEVERITY FAILURE;
|
96 |
|
|
|
97 |
|
|
------------------------------------------------------------------------------
|
98 |
|
|
-- Complex multiplication
|
99 |
|
|
-- . use the common_complex_mult(rtl) for the output stage 1 because then
|
100 |
|
|
-- the multiplier instance can get optimized away for the constant
|
101 |
|
|
-- weight_re = 1 and weight_im = 0 inputs.
|
102 |
|
|
-- . the IP in common_complex_mult(stratix4) only supports up to 18b wide
|
103 |
|
|
-- inputs.
|
104 |
|
|
-- . for c_lat = 0,1,2 use the RTL multiplier
|
105 |
|
|
-- . for c_lat >= 3 default best use the FPGA multiplier IP block.
|
106 |
|
|
------------------------------------------------------------------------------
|
107 |
|
|
|
108 |
|
|
gen_rtl : if g_stage=1 or c_in_dat_w>c_dsp_mult_w or c_lat<c_dsp_mult_lat generate
|
109 |
4 |
danv |
u_CmplxMul : entity astron_multiplier_lib.common_complex_mult
|
110 |
2 |
danv |
generic map (
|
111 |
|
|
g_technology => g_technology,
|
112 |
|
|
g_variant => "RTL",
|
113 |
|
|
g_in_a_w => c_in_dat_w,
|
114 |
|
|
g_in_b_w => c_weight_w,
|
115 |
|
|
g_out_p_w => c_prod_w,
|
116 |
|
|
g_conjugate_b => false,
|
117 |
|
|
g_pipeline_input => c_mult_input_lat,
|
118 |
|
|
g_pipeline_product => c_mult_product_lat,
|
119 |
|
|
g_pipeline_adder => c_mult_adder_lat,
|
120 |
|
|
g_pipeline_output => c_mult_output_lat
|
121 |
|
|
)
|
122 |
|
|
port map (
|
123 |
|
|
rst => rst,
|
124 |
|
|
clk => clk,
|
125 |
|
|
in_ar => in_re,
|
126 |
|
|
in_ai => in_im,
|
127 |
|
|
in_br => weight_re,
|
128 |
|
|
in_bi => weight_im,
|
129 |
|
|
in_val => in_val,
|
130 |
|
|
out_pr => product_re,
|
131 |
|
|
out_pi => product_im,
|
132 |
|
|
out_val => OPEN
|
133 |
|
|
);
|
134 |
|
|
end generate;
|
135 |
|
|
|
136 |
|
|
gen_ip : if g_stage>1 and c_in_dat_w<=c_dsp_mult_w and c_lat>=c_dsp_mult_lat generate
|
137 |
4 |
danv |
u_cmplx_mul : entity astron_multiplier_lib.common_complex_mult
|
138 |
2 |
danv |
generic map (
|
139 |
|
|
g_technology => g_technology,
|
140 |
|
|
g_variant => "IP",
|
141 |
|
|
g_in_a_w => in_re'length,
|
142 |
|
|
g_in_b_w => weight_re'length,
|
143 |
|
|
g_out_p_w => product_re'length,
|
144 |
|
|
g_conjugate_b => false,
|
145 |
|
|
g_pipeline_input => c_mult_input_lat,
|
146 |
|
|
g_pipeline_product => c_mult_product_lat,
|
147 |
|
|
g_pipeline_adder => c_mult_adder_lat,
|
148 |
|
|
g_pipeline_output => c_mult_output_lat
|
149 |
|
|
)
|
150 |
|
|
port map (
|
151 |
|
|
rst => rst,
|
152 |
|
|
clk => clk,
|
153 |
|
|
in_ar => in_re,
|
154 |
|
|
in_ai => in_im,
|
155 |
|
|
in_br => weight_re,
|
156 |
|
|
in_bi => weight_im,
|
157 |
|
|
in_val => in_val,
|
158 |
|
|
out_pr => product_re,
|
159 |
|
|
out_pi => product_im,
|
160 |
|
|
out_val => OPEN
|
161 |
|
|
);
|
162 |
|
|
end generate;
|
163 |
|
|
|
164 |
|
|
------------------------------------------------------------------------------
|
165 |
|
|
-- Round WMult output
|
166 |
|
|
------------------------------------------------------------------------------
|
167 |
|
|
|
168 |
|
|
gen_truncate : if c_use_truncate=true GENERATE
|
169 |
|
|
-- use truncate that throws away the c_round_w lower bits as rounding function
|
170 |
|
|
-- use resize_svec that keeps the c_out_dat_w lower bits to get to the output width
|
171 |
|
|
gen_comb : if c_round_lat=0 generate
|
172 |
|
|
round_re <= truncate_and_resize_svec(product_re, c_round_w, c_out_dat_w);
|
173 |
|
|
round_im <= truncate_and_resize_svec(product_im, c_round_w, c_out_dat_w);
|
174 |
|
|
end generate;
|
175 |
|
|
gen_reg : if c_round_lat=1 generate
|
176 |
|
|
round_re <= truncate_and_resize_svec(product_re, c_round_w, c_out_dat_w) when rising_edge(clk);
|
177 |
|
|
round_im <= truncate_and_resize_svec(product_im, c_round_w, c_out_dat_w) when rising_edge(clk);
|
178 |
|
|
end generate;
|
179 |
|
|
end generate;
|
180 |
|
|
|
181 |
|
|
|
182 |
|
|
gen_sround : if c_use_truncate=false GENERATE
|
183 |
|
|
-- Use resize_svec(s_round()) instead of truncate_and_resize_svec() to have symmetrical rounding around 0
|
184 |
|
|
-- Rounding takes logic due to adding 0.5 therefore need to use c_round_lat=1 to achieve timing
|
185 |
|
|
gen_comb : if c_round_lat=0 generate
|
186 |
|
|
ASSERT false REPORT "rTwoWMul: can probably not achieve timing for sround without pipeline" SEVERITY FAILURE;
|
187 |
|
|
round_re <= RESIZE_SVEC(s_round(product_re, c_round_w), c_out_dat_w);
|
188 |
|
|
round_im <= RESIZE_SVEC(s_round(product_im, c_round_w), c_out_dat_w);
|
189 |
|
|
end generate;
|
190 |
|
|
gen_reg : if c_round_lat=1 generate
|
191 |
|
|
round_re <= RESIZE_SVEC(s_round(product_re, c_round_w), c_out_dat_w) when rising_edge(clk);
|
192 |
|
|
round_im <= RESIZE_SVEC(s_round(product_im, c_round_w), c_out_dat_w) when rising_edge(clk);
|
193 |
|
|
end generate;
|
194 |
|
|
end generate;
|
195 |
|
|
|
196 |
|
|
|
197 |
|
|
------------------------------------------------------------------------------
|
198 |
|
|
-- Propagate data and control signals for input/output choice at WMult output
|
199 |
|
|
------------------------------------------------------------------------------
|
200 |
|
|
|
201 |
|
|
-- No need to use rst for data, because initial data value is don't care
|
202 |
|
|
u_re_lat : entity common_components_lib.common_pipeline
|
203 |
|
|
generic map (
|
204 |
|
|
g_pipeline => g_lat,
|
205 |
|
|
g_in_dat_w => in_re'length,
|
206 |
|
|
g_out_dat_w => in_re'length
|
207 |
|
|
)
|
208 |
|
|
port map (
|
209 |
|
|
clk => clk,
|
210 |
|
|
in_dat => in_re,
|
211 |
|
|
out_dat => in_re_dly
|
212 |
|
|
);
|
213 |
|
|
|
214 |
|
|
u_im_lat : entity common_components_lib.common_pipeline
|
215 |
|
|
generic map (
|
216 |
|
|
g_pipeline => g_lat,
|
217 |
|
|
g_in_dat_w => in_im'length,
|
218 |
|
|
g_out_dat_w => in_im'length
|
219 |
|
|
)
|
220 |
|
|
port map (
|
221 |
|
|
clk => clk,
|
222 |
|
|
in_dat => in_im,
|
223 |
|
|
out_dat => in_im_dly
|
224 |
|
|
);
|
225 |
|
|
|
226 |
|
|
-- Use rst for control to ensure initial low
|
227 |
|
|
u_sel_lat : entity common_components_lib.common_pipeline_sl
|
228 |
|
|
generic map (
|
229 |
|
|
g_pipeline => g_lat
|
230 |
|
|
)
|
231 |
|
|
port map (
|
232 |
|
|
rst => rst,
|
233 |
|
|
clk => clk,
|
234 |
|
|
in_dat => in_sel,
|
235 |
|
|
out_dat => out_sel
|
236 |
|
|
);
|
237 |
|
|
|
238 |
|
|
u_pipeline_out_val : entity common_components_lib.common_pipeline_sl
|
239 |
|
|
generic map (
|
240 |
|
|
g_pipeline => g_lat
|
241 |
|
|
)
|
242 |
|
|
port map (
|
243 |
|
|
rst => rst,
|
244 |
|
|
clk => clk,
|
245 |
|
|
in_dat => in_val,
|
246 |
|
|
out_dat => out_val
|
247 |
|
|
);
|
248 |
|
|
|
249 |
|
|
------------------------------------------------------------------------------
|
250 |
|
|
-- Output real and imaginary, switch between input and product
|
251 |
|
|
------------------------------------------------------------------------------
|
252 |
|
|
out_re <= round_re when out_sel = '1' else in_re_dly;
|
253 |
|
|
out_im <= round_im when out_sel = '1' else in_im_dly;
|
254 |
|
|
|
255 |
|
|
end str;
|