URL https://opencores.org/ocsvn/astron_r2sdf_fft/astron_r2sdf_fft/trunk

Subversion Repositories astron_r2sdf_fft

[/] [astron_r2sdf_fft/] [trunk/] [rTwoWMul.vhd] - Blame information for rev 4

Details | Compare with Previous | View Log


--------------------------------------------------------------------------------
--
-- Copyright 2020
-- ASTRON (Netherlands Institute for Radio Astronomy) <http://www.astron.nl/>
-- P.O.Box 2, 7990 AA Dwingeloo, The Netherlands
-- 
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
-- 
--     http://www.apache.org/licenses/LICENSE-2.0
-- 
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
--
--------------------------------------------------------------------------------
 
library ieee, common_pkg_lib, common_components_lib, astron_multiplier_lib;
use IEEE.std_logic_1164.all;
--USE technology_lib.technology_select_pkg.ALL;
use common_pkg_lib.common_pkg.all;
 
entity rTwoWMul is
  generic (
    g_technology : NATURAL := 0;
    g_stage      : natural := 1;
    g_lat        : natural := 3+1       -- 3 for mult, 1 for round
  );
  port (
    clk       : in  std_logic;
    rst       : in  std_logic;
    weight_re : in  std_logic_vector;
    weight_im : in  std_logic_vector;
    in_re     : in  std_logic_vector;
    in_im     : in  std_logic_vector;
    in_val    : in  std_logic;
    in_sel    : in  std_logic;
    out_re    : out std_logic_vector;
    out_im    : out std_logic_vector;
    out_val   : out std_logic
  );
end entity rTwoWMul;
 
architecture str of rTwoWMul is
 
  -- Use multiplier product truncate or signed rounding (= away from zero). On hardware for Fsub in
  -- Apertif and using the WG at various frequencies at subband or between subbands it appears that
  -- using truncate or sround does not make a noticable difference in the SST. Still choose to use
  -- signed rounding to preserve zero DC.
  constant c_use_truncate     : boolean := true; --false;
 
  -- Derive the common_complex_mult g_pipeline_* values from g_lat. The sum c_total_lat = g_lat, so that g_lat defines
  -- the total latency from in_* to out_*.
 
  -- DSP multiplier IP
  constant c_dsp_mult_lat     : natural := 3;
 
  -- Pipeline multiplier product rounding from c_prod_w via c_round_w to c_out_dat_w
  constant c_round_lat        : natural := sel_a_b(g_lat > c_dsp_mult_lat, 1, 0);  -- allocate 1 pipeline for round
  constant c_lat              : natural := g_lat - c_round_lat;                    -- allocate remaining pipeline to multiplier
 
  constant c_mult_input_lat   : natural := sel_a_b(c_lat>1, 1, 0);                     -- second priority use DSP pipeline input
  constant c_mult_product_lat : natural := 0;
  constant c_mult_adder_lat   : natural := sel_a_b(c_lat>2, 1, 0);                     -- third priority use DSP internal product-sum pipeline
  constant c_mult_extra_lat   : natural := sel_a_b(c_lat>3, c_lat-3, 0);               -- remaining extra pipelining in logic
  constant c_mult_output_lat  : natural := sel_a_b(c_lat>0, 1, 0) + c_mult_extra_lat;  -- first priority use DSP pipeline output
  constant c_mult_lat         : natural := c_mult_input_lat + c_mult_product_lat + c_mult_adder_lat + c_mult_output_lat;
 
  -- Total input to output latency
  constant c_total_lat   : natural := c_mult_lat + c_round_lat;
 
  -- Quantization
  constant c_in_dat_w         : natural:= in_re'length;
  constant c_weight_w         : natural:= weight_re'length;
  constant c_prod_w           : natural:= c_in_dat_w+c_weight_w+1;
  constant c_round_w          : natural:= c_weight_w-c_sign_w;   -- the weights are normalized
  constant c_out_dat_w        : natural:= out_re'length;
 
  signal in_re_dly        : std_logic_vector(in_re'range);
  signal in_im_dly        : std_logic_vector(in_re'range);
  signal product_re       : std_logic_vector(c_prod_w-1 downto 0);
  signal product_im       : std_logic_vector(c_prod_w-1 downto 0);
  signal round_re         : std_logic_vector(out_re'range);
  signal round_im         : std_logic_vector(out_re'range);
  signal out_sel          : std_logic;
 
begin
 
  -- Total latency check
  ASSERT c_total_lat = g_lat
    REPORT "rTwoWMul: total pipeline error"
    SEVERITY FAILURE;
 
  ------------------------------------------------------------------------------
  -- Complex multiplication
  -- . use the common_complex_mult(rtl) for the output stage 1 because then
  --   the multiplier instance can get optimized away for the constant
  --   weight_re = 1 and weight_im = 0 inputs.
  -- . the IP in common_complex_mult(stratix4) only supports up to 18b wide
  --   inputs.
  --   . for c_lat = 0,1,2 use the RTL multiplier
  --   . for c_lat >= 3 default best use the FPGA multiplier IP block.
  ------------------------------------------------------------------------------
 
  gen_rtl : if g_stage=1 or c_in_dat_w>c_dsp_mult_w or c_lat<c_dsp_mult_lat generate
    u_CmplxMul : entity astron_multiplier_lib.common_complex_mult
    generic map (
      g_technology       => g_technology,
      g_variant          => "RTL",
      g_in_a_w           => c_in_dat_w,
      g_in_b_w           => c_weight_w,
      g_out_p_w          => c_prod_w,
      g_conjugate_b      => false,
      g_pipeline_input   => c_mult_input_lat,
      g_pipeline_product => c_mult_product_lat,
      g_pipeline_adder   => c_mult_adder_lat,
      g_pipeline_output  => c_mult_output_lat
    )
    port map (
      rst       => rst,
      clk       => clk,
      in_ar     => in_re,
      in_ai     => in_im,
      in_br     => weight_re,
      in_bi     => weight_im,
      in_val    => in_val,
      out_pr    => product_re,
      out_pi    => product_im,
      out_val   => OPEN
    );
  end generate;
 
  gen_ip : if g_stage>1 and c_in_dat_w<=c_dsp_mult_w and c_lat>=c_dsp_mult_lat generate
    u_cmplx_mul : entity astron_multiplier_lib.common_complex_mult
    generic map (
      g_technology       => g_technology,
      g_variant          => "IP",
      g_in_a_w           => in_re'length,
      g_in_b_w           => weight_re'length,
      g_out_p_w          => product_re'length,
      g_conjugate_b      => false,
      g_pipeline_input   => c_mult_input_lat,
      g_pipeline_product => c_mult_product_lat,
      g_pipeline_adder   => c_mult_adder_lat,
      g_pipeline_output  => c_mult_output_lat
    )
    port map (
      rst       => rst,
      clk       => clk,
      in_ar     => in_re,
      in_ai     => in_im,
      in_br     => weight_re,
      in_bi     => weight_im,
      in_val    => in_val,
      out_pr    => product_re,
      out_pi    => product_im,
      out_val   => OPEN
    );
  end generate;
 
  ------------------------------------------------------------------------------
  -- Round WMult output
  ------------------------------------------------------------------------------
 
  gen_truncate : if c_use_truncate=true GENERATE
    -- use truncate    that throws away the c_round_w lower bits as rounding function
    -- use resize_svec that keeps the c_out_dat_w lower bits to get to the output width
    gen_comb : if c_round_lat=0 generate
      round_re <= truncate_and_resize_svec(product_re, c_round_w, c_out_dat_w);
      round_im <= truncate_and_resize_svec(product_im, c_round_w, c_out_dat_w);
    end generate;
    gen_reg : if c_round_lat=1 generate
      round_re <= truncate_and_resize_svec(product_re, c_round_w, c_out_dat_w) when rising_edge(clk);
      round_im <= truncate_and_resize_svec(product_im, c_round_w, c_out_dat_w) when rising_edge(clk);
    end generate;
  end generate;
 
 
  gen_sround : if c_use_truncate=false GENERATE
    -- Use resize_svec(s_round()) instead of truncate_and_resize_svec() to have symmetrical rounding around 0
    -- Rounding takes logic due to adding 0.5 therefore need to use c_round_lat=1 to achieve timing
    gen_comb : if c_round_lat=0 generate
      ASSERT false REPORT "rTwoWMul: can probably not achieve timing for sround without pipeline" SEVERITY FAILURE;
      round_re <= RESIZE_SVEC(s_round(product_re, c_round_w), c_out_dat_w);
      round_im <= RESIZE_SVEC(s_round(product_im, c_round_w), c_out_dat_w);
    end generate;
    gen_reg : if c_round_lat=1 generate
      round_re <= RESIZE_SVEC(s_round(product_re, c_round_w), c_out_dat_w) when rising_edge(clk);
      round_im <= RESIZE_SVEC(s_round(product_im, c_round_w), c_out_dat_w) when rising_edge(clk);
    end generate;
  end generate;
 
 
  ------------------------------------------------------------------------------
  -- Propagate data and control signals for input/output choice at WMult output
  ------------------------------------------------------------------------------
 
  -- No need to use rst for data, because initial data value is don't care
  u_re_lat : entity common_components_lib.common_pipeline
  generic map (
    g_pipeline  => g_lat,
    g_in_dat_w  => in_re'length,
    g_out_dat_w => in_re'length
  )
  port map (
    clk     => clk,
    in_dat  => in_re,
    out_dat => in_re_dly
  );
 
  u_im_lat : entity common_components_lib.common_pipeline
  generic map (
    g_pipeline  => g_lat,
    g_in_dat_w  => in_im'length,
    g_out_dat_w => in_im'length
  )
  port map (
    clk     => clk,
    in_dat  => in_im,
    out_dat => in_im_dly
  );
 
  -- Use rst for control to ensure initial low
  u_sel_lat : entity common_components_lib.common_pipeline_sl
  generic map (
    g_pipeline => g_lat
  )
  port map (
    rst     => rst,
    clk     => clk,
    in_dat  => in_sel,
    out_dat => out_sel
  );
 
  u_pipeline_out_val : entity common_components_lib.common_pipeline_sl
  generic map (
    g_pipeline  => g_lat
  )
  port map (
    rst     => rst,
    clk     => clk,
    in_dat  => in_val,
    out_dat => out_val
  );
 
  ------------------------------------------------------------------------------
  -- Output real and imaginary, switch between input and product
  ------------------------------------------------------------------------------
  out_re <= round_re when out_sel = '1' else in_re_dly;
  out_im <= round_im when out_sel = '1' else in_im_dly;
 
end str;

Browse

Tools

Subversion Repositories astron_r2sdf_fft

[/] [astron_r2sdf_fft/] [trunk/] [rTwoWMul.vhd] - Blame information for rev 4

Line No.	Rev	Author	Line
1	2	danv	`--------------------------------------------------------------------------------`
2			`--`
3	3	danv	`-- Copyright 2020`
4			`-- ASTRON (Netherlands Institute for Radio Astronomy) <http://www.astron.nl/>`
5			`-- P.O.Box 2, 7990 AA Dwingeloo, The Netherlands`
6			`--`
7			`-- Licensed under the Apache License, Version 2.0 (the "License");`
8			`-- you may not use this file except in compliance with the License.`
9			`-- You may obtain a copy of the License at`
10			`--`
11			`-- http://www.apache.org/licenses/LICENSE-2.0`
12			`--`
13			`-- Unless required by applicable law or agreed to in writing, software`
14			`-- distributed under the License is distributed on an "AS IS" BASIS,`
15			`-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
16			`-- See the License for the specific language governing permissions and`
17			`-- limitations under the License.`
18	2	danv	`--`
19			`--------------------------------------------------------------------------------`
20
21	4	danv	`library ieee, common_pkg_lib, common_components_lib, astron_multiplier_lib;`
22	2	danv	`use IEEE.std_logic_1164.all;`
23	4	danv	`--USE technology_lib.technology_select_pkg.ALL;`
24	2	danv	`use common_pkg_lib.common_pkg.all;`
25
26			`entity rTwoWMul is`
27			`generic (`
28	4	danv	`g_technology : NATURAL := 0;`
29	2	danv	`g_stage : natural := 1;`
30			`g_lat : natural := 3+1 -- 3 for mult, 1 for round`
31			`);`
32			`port (`
33			`clk : in std_logic;`
34			`rst : in std_logic;`
35			`weight_re : in std_logic_vector;`
36			`weight_im : in std_logic_vector;`
37			`in_re : in std_logic_vector;`
38			`in_im : in std_logic_vector;`
39			`in_val : in std_logic;`
40			`in_sel : in std_logic;`
41			`out_re : out std_logic_vector;`
42			`out_im : out std_logic_vector;`
43			`out_val : out std_logic`
44			`);`
45			`end entity rTwoWMul;`
46
47			`architecture str of rTwoWMul is`
48
49			`-- Use multiplier product truncate or signed rounding (= away from zero). On hardware for Fsub in`
50			`-- Apertif and using the WG at various frequencies at subband or between subbands it appears that`
51			`-- using truncate or sround does not make a noticable difference in the SST. Still choose to use`
52			`-- signed rounding to preserve zero DC.`
53			`constant c_use_truncate : boolean := true; --false;`
54
55			`-- Derive the common_complex_mult g_pipeline_* values from g_lat. The sum c_total_lat = g_lat, so that g_lat defines`
56			`-- the total latency from in_* to out_*.`
57
58			`-- DSP multiplier IP`
59			`constant c_dsp_mult_lat : natural := 3;`
60
61			`-- Pipeline multiplier product rounding from c_prod_w via c_round_w to c_out_dat_w`
62			`constant c_round_lat : natural := sel_a_b(g_lat > c_dsp_mult_lat, 1, 0); -- allocate 1 pipeline for round`
63			`constant c_lat : natural := g_lat - c_round_lat; -- allocate remaining pipeline to multiplier`
64
65			`constant c_mult_input_lat : natural := sel_a_b(c_lat>1, 1, 0); -- second priority use DSP pipeline input`
66			`constant c_mult_product_lat : natural := 0;`
67			`constant c_mult_adder_lat : natural := sel_a_b(c_lat>2, 1, 0); -- third priority use DSP internal product-sum pipeline`
68			`constant c_mult_extra_lat : natural := sel_a_b(c_lat>3, c_lat-3, 0); -- remaining extra pipelining in logic`
69			`constant c_mult_output_lat : natural := sel_a_b(c_lat>0, 1, 0) + c_mult_extra_lat; -- first priority use DSP pipeline output`
70			`constant c_mult_lat : natural := c_mult_input_lat + c_mult_product_lat + c_mult_adder_lat + c_mult_output_lat;`
71
72			`-- Total input to output latency`
73			`constant c_total_lat : natural := c_mult_lat + c_round_lat;`
74
75			`-- Quantization`
76			`constant c_in_dat_w : natural:= in_re'length;`
77			`constant c_weight_w : natural:= weight_re'length;`
78			`constant c_prod_w : natural:= c_in_dat_w+c_weight_w+1;`
79			`constant c_round_w : natural:= c_weight_w-c_sign_w; -- the weights are normalized`
80			`constant c_out_dat_w : natural:= out_re'length;`
81
82			`signal in_re_dly : std_logic_vector(in_re'range);`
83			`signal in_im_dly : std_logic_vector(in_re'range);`
84			`signal product_re : std_logic_vector(c_prod_w-1 downto 0);`
85			`signal product_im : std_logic_vector(c_prod_w-1 downto 0);`
86			`signal round_re : std_logic_vector(out_re'range);`
87			`signal round_im : std_logic_vector(out_re'range);`
88			`signal out_sel : std_logic;`
89
90			`begin`
91
92			`-- Total latency check`
93			`ASSERT c_total_lat = g_lat`
94			`REPORT "rTwoWMul: total pipeline error"`
95			`SEVERITY FAILURE;`
96
97			`------------------------------------------------------------------------------`
98			`-- Complex multiplication`
99			`-- . use the common_complex_mult(rtl) for the output stage 1 because then`
100			`-- the multiplier instance can get optimized away for the constant`
101			`-- weight_re = 1 and weight_im = 0 inputs.`
102			`-- . the IP in common_complex_mult(stratix4) only supports up to 18b wide`
103			`-- inputs.`
104			`-- . for c_lat = 0,1,2 use the RTL multiplier`
105			`-- . for c_lat >= 3 default best use the FPGA multiplier IP block.`
106			`------------------------------------------------------------------------------`
107
108			`gen_rtl : if g_stage=1 or c_in_dat_w>c_dsp_mult_w or c_lat<c_dsp_mult_lat generate`
109	4	danv	`u_CmplxMul : entity astron_multiplier_lib.common_complex_mult`
110	2	danv	`generic map (`
111			`g_technology => g_technology,`
112			`g_variant => "RTL",`
113			`g_in_a_w => c_in_dat_w,`
114			`g_in_b_w => c_weight_w,`
115			`g_out_p_w => c_prod_w,`
116			`g_conjugate_b => false,`
117			`g_pipeline_input => c_mult_input_lat,`
118			`g_pipeline_product => c_mult_product_lat,`
119			`g_pipeline_adder => c_mult_adder_lat,`
120			`g_pipeline_output => c_mult_output_lat`
121			`)`
122			`port map (`
123			`rst => rst,`
124			`clk => clk,`
125			`in_ar => in_re,`
126			`in_ai => in_im,`
127			`in_br => weight_re,`
128			`in_bi => weight_im,`
129			`in_val => in_val,`
130			`out_pr => product_re,`
131			`out_pi => product_im,`
132			`out_val => OPEN`
133			`);`
134			`end generate;`
135
136			`gen_ip : if g_stage>1 and c_in_dat_w<=c_dsp_mult_w and c_lat>=c_dsp_mult_lat generate`
137	4	danv	`u_cmplx_mul : entity astron_multiplier_lib.common_complex_mult`
138	2	danv	`generic map (`
139			`g_technology => g_technology,`
140			`g_variant => "IP",`
141			`g_in_a_w => in_re'length,`
142			`g_in_b_w => weight_re'length,`
143			`g_out_p_w => product_re'length,`
144			`g_conjugate_b => false,`
145			`g_pipeline_input => c_mult_input_lat,`
146			`g_pipeline_product => c_mult_product_lat,`
147			`g_pipeline_adder => c_mult_adder_lat,`
148			`g_pipeline_output => c_mult_output_lat`
149			`)`
150			`port map (`
151			`rst => rst,`
152			`clk => clk,`
153			`in_ar => in_re,`
154			`in_ai => in_im,`
155			`in_br => weight_re,`
156			`in_bi => weight_im,`
157			`in_val => in_val,`
158			`out_pr => product_re,`
159			`out_pi => product_im,`
160			`out_val => OPEN`
161			`);`
162			`end generate;`
163
164			`------------------------------------------------------------------------------`
165			`-- Round WMult output`
166			`------------------------------------------------------------------------------`
167
168			`gen_truncate : if c_use_truncate=true GENERATE`
169			`-- use truncate that throws away the c_round_w lower bits as rounding function`
170			`-- use resize_svec that keeps the c_out_dat_w lower bits to get to the output width`
171			`gen_comb : if c_round_lat=0 generate`
172			`round_re <= truncate_and_resize_svec(product_re, c_round_w, c_out_dat_w);`
173			`round_im <= truncate_and_resize_svec(product_im, c_round_w, c_out_dat_w);`
174			`end generate;`
175			`gen_reg : if c_round_lat=1 generate`
176			`round_re <= truncate_and_resize_svec(product_re, c_round_w, c_out_dat_w) when rising_edge(clk);`
177			`round_im <= truncate_and_resize_svec(product_im, c_round_w, c_out_dat_w) when rising_edge(clk);`
178			`end generate;`
179			`end generate;`
180
181
182			`gen_sround : if c_use_truncate=false GENERATE`
183			`-- Use resize_svec(s_round()) instead of truncate_and_resize_svec() to have symmetrical rounding around 0`
184			`-- Rounding takes logic due to adding 0.5 therefore need to use c_round_lat=1 to achieve timing`
185			`gen_comb : if c_round_lat=0 generate`
186			`ASSERT false REPORT "rTwoWMul: can probably not achieve timing for sround without pipeline" SEVERITY FAILURE;`
187			`round_re <= RESIZE_SVEC(s_round(product_re, c_round_w), c_out_dat_w);`
188			`round_im <= RESIZE_SVEC(s_round(product_im, c_round_w), c_out_dat_w);`
189			`end generate;`
190			`gen_reg : if c_round_lat=1 generate`
191			`round_re <= RESIZE_SVEC(s_round(product_re, c_round_w), c_out_dat_w) when rising_edge(clk);`
192			`round_im <= RESIZE_SVEC(s_round(product_im, c_round_w), c_out_dat_w) when rising_edge(clk);`
193			`end generate;`
194			`end generate;`
195
196
197			`------------------------------------------------------------------------------`
198			`-- Propagate data and control signals for input/output choice at WMult output`
199			`------------------------------------------------------------------------------`
200
201			`-- No need to use rst for data, because initial data value is don't care`
202			`u_re_lat : entity common_components_lib.common_pipeline`
203			`generic map (`
204			`g_pipeline => g_lat,`
205			`g_in_dat_w => in_re'length,`
206			`g_out_dat_w => in_re'length`
207			`)`
208			`port map (`
209			`clk => clk,`
210			`in_dat => in_re,`
211			`out_dat => in_re_dly`
212			`);`
213
214			`u_im_lat : entity common_components_lib.common_pipeline`
215			`generic map (`
216			`g_pipeline => g_lat,`
217			`g_in_dat_w => in_im'length,`
218			`g_out_dat_w => in_im'length`
219			`)`
220			`port map (`
221			`clk => clk,`
222			`in_dat => in_im,`
223			`out_dat => in_im_dly`
224			`);`
225
226			`-- Use rst for control to ensure initial low`
227			`u_sel_lat : entity common_components_lib.common_pipeline_sl`
228			`generic map (`
229			`g_pipeline => g_lat`
230			`)`
231			`port map (`
232			`rst => rst,`
233			`clk => clk,`
234			`in_dat => in_sel,`
235			`out_dat => out_sel`
236			`);`
237
238			`u_pipeline_out_val : entity common_components_lib.common_pipeline_sl`
239			`generic map (`
240			`g_pipeline => g_lat`
241			`)`
242			`port map (`
243			`rst => rst,`
244			`clk => clk,`
245			`in_dat => in_val,`
246			`out_dat => out_val`
247			`);`
248
249			`------------------------------------------------------------------------------`
250			`-- Output real and imaginary, switch between input and product`
251			`------------------------------------------------------------------------------`
252			`out_re <= round_re when out_sel = '1' else in_re_dly;`
253			`out_im <= round_im when out_sel = '1' else in_im_dly;`
254
255			`end str;`