| 1 |
2 |
danv |
--------------------------------------------------------------------------------
|
| 2 |
|
|
--
|
| 3 |
3 |
danv |
-- Copyright 2020
|
| 4 |
|
|
-- ASTRON (Netherlands Institute for Radio Astronomy) <http://www.astron.nl/>
|
| 5 |
|
|
-- P.O.Box 2, 7990 AA Dwingeloo, The Netherlands
|
| 6 |
|
|
--
|
| 7 |
|
|
-- Licensed under the Apache License, Version 2.0 (the "License");
|
| 8 |
|
|
-- you may not use this file except in compliance with the License.
|
| 9 |
|
|
-- You may obtain a copy of the License at
|
| 10 |
|
|
--
|
| 11 |
|
|
-- http://www.apache.org/licenses/LICENSE-2.0
|
| 12 |
|
|
--
|
| 13 |
|
|
-- Unless required by applicable law or agreed to in writing, software
|
| 14 |
|
|
-- distributed under the License is distributed on an "AS IS" BASIS,
|
| 15 |
|
|
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 16 |
|
|
-- See the License for the specific language governing permissions and
|
| 17 |
|
|
-- limitations under the License.
|
| 18 |
2 |
danv |
--
|
| 19 |
|
|
--------------------------------------------------------------------------------
|
| 20 |
|
|
|
| 21 |
4 |
danv |
library ieee, common_pkg_lib, common_components_lib, astron_multiplier_lib;
|
| 22 |
2 |
danv |
use IEEE.std_logic_1164.all;
|
| 23 |
4 |
danv |
--USE technology_lib.technology_select_pkg.ALL;
|
| 24 |
2 |
danv |
use common_pkg_lib.common_pkg.all;
|
| 25 |
|
|
|
| 26 |
|
|
entity rTwoWMul is
|
| 27 |
|
|
generic (
|
| 28 |
4 |
danv |
g_technology : NATURAL := 0;
|
| 29 |
2 |
danv |
g_stage : natural := 1;
|
| 30 |
|
|
g_lat : natural := 3+1 -- 3 for mult, 1 for round
|
| 31 |
|
|
);
|
| 32 |
|
|
port (
|
| 33 |
|
|
clk : in std_logic;
|
| 34 |
|
|
rst : in std_logic;
|
| 35 |
|
|
weight_re : in std_logic_vector;
|
| 36 |
|
|
weight_im : in std_logic_vector;
|
| 37 |
|
|
in_re : in std_logic_vector;
|
| 38 |
|
|
in_im : in std_logic_vector;
|
| 39 |
|
|
in_val : in std_logic;
|
| 40 |
|
|
in_sel : in std_logic;
|
| 41 |
|
|
out_re : out std_logic_vector;
|
| 42 |
|
|
out_im : out std_logic_vector;
|
| 43 |
|
|
out_val : out std_logic
|
| 44 |
|
|
);
|
| 45 |
|
|
end entity rTwoWMul;
|
| 46 |
|
|
|
| 47 |
|
|
architecture str of rTwoWMul is
|
| 48 |
|
|
|
| 49 |
|
|
-- Use multiplier product truncate or signed rounding (= away from zero). On hardware for Fsub in
|
| 50 |
|
|
-- Apertif and using the WG at various frequencies at subband or between subbands it appears that
|
| 51 |
|
|
-- using truncate or sround does not make a noticable difference in the SST. Still choose to use
|
| 52 |
|
|
-- signed rounding to preserve zero DC.
|
| 53 |
|
|
constant c_use_truncate : boolean := true; --false;
|
| 54 |
|
|
|
| 55 |
|
|
-- Derive the common_complex_mult g_pipeline_* values from g_lat. The sum c_total_lat = g_lat, so that g_lat defines
|
| 56 |
|
|
-- the total latency from in_* to out_*.
|
| 57 |
|
|
|
| 58 |
|
|
-- DSP multiplier IP
|
| 59 |
|
|
constant c_dsp_mult_lat : natural := 3;
|
| 60 |
|
|
|
| 61 |
|
|
-- Pipeline multiplier product rounding from c_prod_w via c_round_w to c_out_dat_w
|
| 62 |
|
|
constant c_round_lat : natural := sel_a_b(g_lat > c_dsp_mult_lat, 1, 0); -- allocate 1 pipeline for round
|
| 63 |
|
|
constant c_lat : natural := g_lat - c_round_lat; -- allocate remaining pipeline to multiplier
|
| 64 |
|
|
|
| 65 |
|
|
constant c_mult_input_lat : natural := sel_a_b(c_lat>1, 1, 0); -- second priority use DSP pipeline input
|
| 66 |
|
|
constant c_mult_product_lat : natural := 0;
|
| 67 |
|
|
constant c_mult_adder_lat : natural := sel_a_b(c_lat>2, 1, 0); -- third priority use DSP internal product-sum pipeline
|
| 68 |
|
|
constant c_mult_extra_lat : natural := sel_a_b(c_lat>3, c_lat-3, 0); -- remaining extra pipelining in logic
|
| 69 |
|
|
constant c_mult_output_lat : natural := sel_a_b(c_lat>0, 1, 0) + c_mult_extra_lat; -- first priority use DSP pipeline output
|
| 70 |
|
|
constant c_mult_lat : natural := c_mult_input_lat + c_mult_product_lat + c_mult_adder_lat + c_mult_output_lat;
|
| 71 |
|
|
|
| 72 |
|
|
-- Total input to output latency
|
| 73 |
|
|
constant c_total_lat : natural := c_mult_lat + c_round_lat;
|
| 74 |
|
|
|
| 75 |
|
|
-- Quantization
|
| 76 |
|
|
constant c_in_dat_w : natural:= in_re'length;
|
| 77 |
|
|
constant c_weight_w : natural:= weight_re'length;
|
| 78 |
|
|
constant c_prod_w : natural:= c_in_dat_w+c_weight_w+1;
|
| 79 |
|
|
constant c_round_w : natural:= c_weight_w-c_sign_w; -- the weights are normalized
|
| 80 |
|
|
constant c_out_dat_w : natural:= out_re'length;
|
| 81 |
|
|
|
| 82 |
|
|
signal in_re_dly : std_logic_vector(in_re'range);
|
| 83 |
|
|
signal in_im_dly : std_logic_vector(in_re'range);
|
| 84 |
|
|
signal product_re : std_logic_vector(c_prod_w-1 downto 0);
|
| 85 |
|
|
signal product_im : std_logic_vector(c_prod_w-1 downto 0);
|
| 86 |
|
|
signal round_re : std_logic_vector(out_re'range);
|
| 87 |
|
|
signal round_im : std_logic_vector(out_re'range);
|
| 88 |
|
|
signal out_sel : std_logic;
|
| 89 |
|
|
|
| 90 |
|
|
begin
|
| 91 |
|
|
|
| 92 |
|
|
-- Total latency check
|
| 93 |
|
|
ASSERT c_total_lat = g_lat
|
| 94 |
|
|
REPORT "rTwoWMul: total pipeline error"
|
| 95 |
|
|
SEVERITY FAILURE;
|
| 96 |
|
|
|
| 97 |
|
|
------------------------------------------------------------------------------
|
| 98 |
|
|
-- Complex multiplication
|
| 99 |
|
|
-- . use the common_complex_mult(rtl) for the output stage 1 because then
|
| 100 |
|
|
-- the multiplier instance can get optimized away for the constant
|
| 101 |
|
|
-- weight_re = 1 and weight_im = 0 inputs.
|
| 102 |
|
|
-- . the IP in common_complex_mult(stratix4) only supports up to 18b wide
|
| 103 |
|
|
-- inputs.
|
| 104 |
|
|
-- . for c_lat = 0,1,2 use the RTL multiplier
|
| 105 |
|
|
-- . for c_lat >= 3 default best use the FPGA multiplier IP block.
|
| 106 |
|
|
------------------------------------------------------------------------------
|
| 107 |
|
|
|
| 108 |
|
|
gen_rtl : if g_stage=1 or c_in_dat_w>c_dsp_mult_w or c_lat<c_dsp_mult_lat generate
|
| 109 |
4 |
danv |
u_CmplxMul : entity astron_multiplier_lib.common_complex_mult
|
| 110 |
2 |
danv |
generic map (
|
| 111 |
|
|
g_technology => g_technology,
|
| 112 |
|
|
g_variant => "RTL",
|
| 113 |
|
|
g_in_a_w => c_in_dat_w,
|
| 114 |
|
|
g_in_b_w => c_weight_w,
|
| 115 |
|
|
g_out_p_w => c_prod_w,
|
| 116 |
|
|
g_conjugate_b => false,
|
| 117 |
|
|
g_pipeline_input => c_mult_input_lat,
|
| 118 |
|
|
g_pipeline_product => c_mult_product_lat,
|
| 119 |
|
|
g_pipeline_adder => c_mult_adder_lat,
|
| 120 |
|
|
g_pipeline_output => c_mult_output_lat
|
| 121 |
|
|
)
|
| 122 |
|
|
port map (
|
| 123 |
|
|
rst => rst,
|
| 124 |
|
|
clk => clk,
|
| 125 |
|
|
in_ar => in_re,
|
| 126 |
|
|
in_ai => in_im,
|
| 127 |
|
|
in_br => weight_re,
|
| 128 |
|
|
in_bi => weight_im,
|
| 129 |
|
|
in_val => in_val,
|
| 130 |
|
|
out_pr => product_re,
|
| 131 |
|
|
out_pi => product_im,
|
| 132 |
|
|
out_val => OPEN
|
| 133 |
|
|
);
|
| 134 |
|
|
end generate;
|
| 135 |
|
|
|
| 136 |
|
|
gen_ip : if g_stage>1 and c_in_dat_w<=c_dsp_mult_w and c_lat>=c_dsp_mult_lat generate
|
| 137 |
4 |
danv |
u_cmplx_mul : entity astron_multiplier_lib.common_complex_mult
|
| 138 |
2 |
danv |
generic map (
|
| 139 |
|
|
g_technology => g_technology,
|
| 140 |
|
|
g_variant => "IP",
|
| 141 |
|
|
g_in_a_w => in_re'length,
|
| 142 |
|
|
g_in_b_w => weight_re'length,
|
| 143 |
|
|
g_out_p_w => product_re'length,
|
| 144 |
|
|
g_conjugate_b => false,
|
| 145 |
|
|
g_pipeline_input => c_mult_input_lat,
|
| 146 |
|
|
g_pipeline_product => c_mult_product_lat,
|
| 147 |
|
|
g_pipeline_adder => c_mult_adder_lat,
|
| 148 |
|
|
g_pipeline_output => c_mult_output_lat
|
| 149 |
|
|
)
|
| 150 |
|
|
port map (
|
| 151 |
|
|
rst => rst,
|
| 152 |
|
|
clk => clk,
|
| 153 |
|
|
in_ar => in_re,
|
| 154 |
|
|
in_ai => in_im,
|
| 155 |
|
|
in_br => weight_re,
|
| 156 |
|
|
in_bi => weight_im,
|
| 157 |
|
|
in_val => in_val,
|
| 158 |
|
|
out_pr => product_re,
|
| 159 |
|
|
out_pi => product_im,
|
| 160 |
|
|
out_val => OPEN
|
| 161 |
|
|
);
|
| 162 |
|
|
end generate;
|
| 163 |
|
|
|
| 164 |
|
|
------------------------------------------------------------------------------
|
| 165 |
|
|
-- Round WMult output
|
| 166 |
|
|
------------------------------------------------------------------------------
|
| 167 |
|
|
|
| 168 |
|
|
gen_truncate : if c_use_truncate=true GENERATE
|
| 169 |
|
|
-- use truncate that throws away the c_round_w lower bits as rounding function
|
| 170 |
|
|
-- use resize_svec that keeps the c_out_dat_w lower bits to get to the output width
|
| 171 |
|
|
gen_comb : if c_round_lat=0 generate
|
| 172 |
|
|
round_re <= truncate_and_resize_svec(product_re, c_round_w, c_out_dat_w);
|
| 173 |
|
|
round_im <= truncate_and_resize_svec(product_im, c_round_w, c_out_dat_w);
|
| 174 |
|
|
end generate;
|
| 175 |
|
|
gen_reg : if c_round_lat=1 generate
|
| 176 |
|
|
round_re <= truncate_and_resize_svec(product_re, c_round_w, c_out_dat_w) when rising_edge(clk);
|
| 177 |
|
|
round_im <= truncate_and_resize_svec(product_im, c_round_w, c_out_dat_w) when rising_edge(clk);
|
| 178 |
|
|
end generate;
|
| 179 |
|
|
end generate;
|
| 180 |
|
|
|
| 181 |
|
|
|
| 182 |
|
|
gen_sround : if c_use_truncate=false GENERATE
|
| 183 |
|
|
-- Use resize_svec(s_round()) instead of truncate_and_resize_svec() to have symmetrical rounding around 0
|
| 184 |
|
|
-- Rounding takes logic due to adding 0.5 therefore need to use c_round_lat=1 to achieve timing
|
| 185 |
|
|
gen_comb : if c_round_lat=0 generate
|
| 186 |
|
|
ASSERT false REPORT "rTwoWMul: can probably not achieve timing for sround without pipeline" SEVERITY FAILURE;
|
| 187 |
|
|
round_re <= RESIZE_SVEC(s_round(product_re, c_round_w), c_out_dat_w);
|
| 188 |
|
|
round_im <= RESIZE_SVEC(s_round(product_im, c_round_w), c_out_dat_w);
|
| 189 |
|
|
end generate;
|
| 190 |
|
|
gen_reg : if c_round_lat=1 generate
|
| 191 |
|
|
round_re <= RESIZE_SVEC(s_round(product_re, c_round_w), c_out_dat_w) when rising_edge(clk);
|
| 192 |
|
|
round_im <= RESIZE_SVEC(s_round(product_im, c_round_w), c_out_dat_w) when rising_edge(clk);
|
| 193 |
|
|
end generate;
|
| 194 |
|
|
end generate;
|
| 195 |
|
|
|
| 196 |
|
|
|
| 197 |
|
|
------------------------------------------------------------------------------
|
| 198 |
|
|
-- Propagate data and control signals for input/output choice at WMult output
|
| 199 |
|
|
------------------------------------------------------------------------------
|
| 200 |
|
|
|
| 201 |
|
|
-- No need to use rst for data, because initial data value is don't care
|
| 202 |
|
|
u_re_lat : entity common_components_lib.common_pipeline
|
| 203 |
|
|
generic map (
|
| 204 |
|
|
g_pipeline => g_lat,
|
| 205 |
|
|
g_in_dat_w => in_re'length,
|
| 206 |
|
|
g_out_dat_w => in_re'length
|
| 207 |
|
|
)
|
| 208 |
|
|
port map (
|
| 209 |
|
|
clk => clk,
|
| 210 |
|
|
in_dat => in_re,
|
| 211 |
|
|
out_dat => in_re_dly
|
| 212 |
|
|
);
|
| 213 |
|
|
|
| 214 |
|
|
u_im_lat : entity common_components_lib.common_pipeline
|
| 215 |
|
|
generic map (
|
| 216 |
|
|
g_pipeline => g_lat,
|
| 217 |
|
|
g_in_dat_w => in_im'length,
|
| 218 |
|
|
g_out_dat_w => in_im'length
|
| 219 |
|
|
)
|
| 220 |
|
|
port map (
|
| 221 |
|
|
clk => clk,
|
| 222 |
|
|
in_dat => in_im,
|
| 223 |
|
|
out_dat => in_im_dly
|
| 224 |
|
|
);
|
| 225 |
|
|
|
| 226 |
|
|
-- Use rst for control to ensure initial low
|
| 227 |
|
|
u_sel_lat : entity common_components_lib.common_pipeline_sl
|
| 228 |
|
|
generic map (
|
| 229 |
|
|
g_pipeline => g_lat
|
| 230 |
|
|
)
|
| 231 |
|
|
port map (
|
| 232 |
|
|
rst => rst,
|
| 233 |
|
|
clk => clk,
|
| 234 |
|
|
in_dat => in_sel,
|
| 235 |
|
|
out_dat => out_sel
|
| 236 |
|
|
);
|
| 237 |
|
|
|
| 238 |
|
|
u_pipeline_out_val : entity common_components_lib.common_pipeline_sl
|
| 239 |
|
|
generic map (
|
| 240 |
|
|
g_pipeline => g_lat
|
| 241 |
|
|
)
|
| 242 |
|
|
port map (
|
| 243 |
|
|
rst => rst,
|
| 244 |
|
|
clk => clk,
|
| 245 |
|
|
in_dat => in_val,
|
| 246 |
|
|
out_dat => out_val
|
| 247 |
|
|
);
|
| 248 |
|
|
|
| 249 |
|
|
------------------------------------------------------------------------------
|
| 250 |
|
|
-- Output real and imaginary, switch between input and product
|
| 251 |
|
|
------------------------------------------------------------------------------
|
| 252 |
|
|
out_re <= round_re when out_sel = '1' else in_re_dly;
|
| 253 |
|
|
out_im <= round_im when out_sel = '1' else in_im_dly;
|
| 254 |
|
|
|
| 255 |
|
|
end str;
|