URL https://opencores.org/ocsvn/astron_r2sdf_fft/astron_r2sdf_fft/trunk

Subversion Repositories astron_r2sdf_fft

[/] [astron_r2sdf_fft/] [trunk/] [rTwoWMul.vhd] - Blame information for rev 2

Go to most recent revision | Details | Compare with Previous | View Log


--------------------------------------------------------------------------------
--   Author: Raj Thilak Rajan : rajan at astron.nl: Nov 2009
--   Copyright (C) 2009-2010
--   ASTRON (Netherlands Institute for Radio Astronomy)
--   P.O.Box 2, 7990 AA Dwingeloo, The Netherlands
--
--   This file is part of the UniBoard software suite.
--   The file is free software: you can redistribute it and/or modify
--   it under the terms of the GNU General Public License as published by
--   the Free Software Foundation, either version 3 of the License, or
--   (at your option) any later version.
--
--   This program is distributed in the hope that it will be useful,
--   but WITHOUT ANY WARRANTY; without even the implied warranty of
--   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
--   GNU General Public License for more details.
--
--   You should have received a copy of the GNU General Public License
--   along with this program.  If not, see <http://www.gnu.org/licenses/>.
--------------------------------------------------------------------------------
 
library ieee, common_pkg_lib, common_components_lib, common_mult_lib, technology_lib;
use IEEE.std_logic_1164.all;
USE technology_lib.technology_select_pkg.ALL;
use common_pkg_lib.common_pkg.all;
 
entity rTwoWMul is
  generic (
    g_technology : NATURAL := c_tech_select_default;
    g_stage      : natural := 1;
    g_lat        : natural := 3+1       -- 3 for mult, 1 for round
  );
  port (
    clk       : in  std_logic;
    rst       : in  std_logic;
    weight_re : in  std_logic_vector;
    weight_im : in  std_logic_vector;
    in_re     : in  std_logic_vector;
    in_im     : in  std_logic_vector;
    in_val    : in  std_logic;
    in_sel    : in  std_logic;
    out_re    : out std_logic_vector;
    out_im    : out std_logic_vector;
    out_val   : out std_logic
  );
end entity rTwoWMul;
 
architecture str of rTwoWMul is
 
  -- Use multiplier product truncate or signed rounding (= away from zero). On hardware for Fsub in
  -- Apertif and using the WG at various frequencies at subband or between subbands it appears that
  -- using truncate or sround does not make a noticable difference in the SST. Still choose to use
  -- signed rounding to preserve zero DC.
  constant c_use_truncate     : boolean := true; --false;
 
  -- Derive the common_complex_mult g_pipeline_* values from g_lat. The sum c_total_lat = g_lat, so that g_lat defines
  -- the total latency from in_* to out_*.
 
  -- DSP multiplier IP
  constant c_dsp_mult_lat     : natural := 3;
 
  -- Pipeline multiplier product rounding from c_prod_w via c_round_w to c_out_dat_w
  constant c_round_lat        : natural := sel_a_b(g_lat > c_dsp_mult_lat, 1, 0);  -- allocate 1 pipeline for round
  constant c_lat              : natural := g_lat - c_round_lat;                    -- allocate remaining pipeline to multiplier
 
  constant c_mult_input_lat   : natural := sel_a_b(c_lat>1, 1, 0);                     -- second priority use DSP pipeline input
  constant c_mult_product_lat : natural := 0;
  constant c_mult_adder_lat   : natural := sel_a_b(c_lat>2, 1, 0);                     -- third priority use DSP internal product-sum pipeline
  constant c_mult_extra_lat   : natural := sel_a_b(c_lat>3, c_lat-3, 0);               -- remaining extra pipelining in logic
  constant c_mult_output_lat  : natural := sel_a_b(c_lat>0, 1, 0) + c_mult_extra_lat;  -- first priority use DSP pipeline output
  constant c_mult_lat         : natural := c_mult_input_lat + c_mult_product_lat + c_mult_adder_lat + c_mult_output_lat;
 
  -- Total input to output latency
  constant c_total_lat   : natural := c_mult_lat + c_round_lat;
 
  -- Quantization
  constant c_in_dat_w         : natural:= in_re'length;
  constant c_weight_w         : natural:= weight_re'length;
  constant c_prod_w           : natural:= c_in_dat_w+c_weight_w+1;
  constant c_round_w          : natural:= c_weight_w-c_sign_w;   -- the weights are normalized
  constant c_out_dat_w        : natural:= out_re'length;
 
  signal in_re_dly        : std_logic_vector(in_re'range);
  signal in_im_dly        : std_logic_vector(in_re'range);
  signal product_re       : std_logic_vector(c_prod_w-1 downto 0);
  signal product_im       : std_logic_vector(c_prod_w-1 downto 0);
  signal round_re         : std_logic_vector(out_re'range);
  signal round_im         : std_logic_vector(out_re'range);
  signal out_sel          : std_logic;
 
begin
 
  -- Total latency check
  ASSERT c_total_lat = g_lat
    REPORT "rTwoWMul: total pipeline error"
    SEVERITY FAILURE;
 
  ------------------------------------------------------------------------------
  -- Complex multiplication
  -- . use the common_complex_mult(rtl) for the output stage 1 because then
  --   the multiplier instance can get optimized away for the constant
  --   weight_re = 1 and weight_im = 0 inputs.
  -- . the IP in common_complex_mult(stratix4) only supports up to 18b wide
  --   inputs.
  --   . for c_lat = 0,1,2 use the RTL multiplier
  --   . for c_lat >= 3 default best use the FPGA multiplier IP block.
  ------------------------------------------------------------------------------
 
  gen_rtl : if g_stage=1 or c_in_dat_w>c_dsp_mult_w or c_lat<c_dsp_mult_lat generate
    u_CmplxMul : entity common_mult_lib.common_complex_mult
    generic map (
      g_technology       => g_technology,
      g_variant          => "RTL",
      g_in_a_w           => c_in_dat_w,
      g_in_b_w           => c_weight_w,
      g_out_p_w          => c_prod_w,
      g_conjugate_b      => false,
      g_pipeline_input   => c_mult_input_lat,
      g_pipeline_product => c_mult_product_lat,
      g_pipeline_adder   => c_mult_adder_lat,
      g_pipeline_output  => c_mult_output_lat
    )
    port map (
      rst       => rst,
      clk       => clk,
      in_ar     => in_re,
      in_ai     => in_im,
      in_br     => weight_re,
      in_bi     => weight_im,
      in_val    => in_val,
      out_pr    => product_re,
      out_pi    => product_im,
      out_val   => OPEN
    );
  end generate;
 
  gen_ip : if g_stage>1 and c_in_dat_w<=c_dsp_mult_w and c_lat>=c_dsp_mult_lat generate
    u_cmplx_mul : entity common_mult_lib.common_complex_mult
    generic map (
      g_technology       => g_technology,
      g_variant          => "IP",
      g_in_a_w           => in_re'length,
      g_in_b_w           => weight_re'length,
      g_out_p_w          => product_re'length,
      g_conjugate_b      => false,
      g_pipeline_input   => c_mult_input_lat,
      g_pipeline_product => c_mult_product_lat,
      g_pipeline_adder   => c_mult_adder_lat,
      g_pipeline_output  => c_mult_output_lat
    )
    port map (
      rst       => rst,
      clk       => clk,
      in_ar     => in_re,
      in_ai     => in_im,
      in_br     => weight_re,
      in_bi     => weight_im,
      in_val    => in_val,
      out_pr    => product_re,
      out_pi    => product_im,
      out_val   => OPEN
    );
  end generate;
 
  ------------------------------------------------------------------------------
  -- Round WMult output
  ------------------------------------------------------------------------------
 
  gen_truncate : if c_use_truncate=true GENERATE
    -- use truncate    that throws away the c_round_w lower bits as rounding function
    -- use resize_svec that keeps the c_out_dat_w lower bits to get to the output width
    gen_comb : if c_round_lat=0 generate
      round_re <= truncate_and_resize_svec(product_re, c_round_w, c_out_dat_w);
      round_im <= truncate_and_resize_svec(product_im, c_round_w, c_out_dat_w);
    end generate;
    gen_reg : if c_round_lat=1 generate
      round_re <= truncate_and_resize_svec(product_re, c_round_w, c_out_dat_w) when rising_edge(clk);
      round_im <= truncate_and_resize_svec(product_im, c_round_w, c_out_dat_w) when rising_edge(clk);
    end generate;
  end generate;
 
 
  gen_sround : if c_use_truncate=false GENERATE
    -- Use resize_svec(s_round()) instead of truncate_and_resize_svec() to have symmetrical rounding around 0
    -- Rounding takes logic due to adding 0.5 therefore need to use c_round_lat=1 to achieve timing
    gen_comb : if c_round_lat=0 generate
      ASSERT false REPORT "rTwoWMul: can probably not achieve timing for sround without pipeline" SEVERITY FAILURE;
      round_re <= RESIZE_SVEC(s_round(product_re, c_round_w), c_out_dat_w);
      round_im <= RESIZE_SVEC(s_round(product_im, c_round_w), c_out_dat_w);
    end generate;
    gen_reg : if c_round_lat=1 generate
      round_re <= RESIZE_SVEC(s_round(product_re, c_round_w), c_out_dat_w) when rising_edge(clk);
      round_im <= RESIZE_SVEC(s_round(product_im, c_round_w), c_out_dat_w) when rising_edge(clk);
    end generate;
  end generate;
 
 
  ------------------------------------------------------------------------------
  -- Propagate data and control signals for input/output choice at WMult output
  ------------------------------------------------------------------------------
 
  -- No need to use rst for data, because initial data value is don't care
  u_re_lat : entity common_components_lib.common_pipeline
  generic map (
    g_pipeline  => g_lat,
    g_in_dat_w  => in_re'length,
    g_out_dat_w => in_re'length
  )
  port map (
    clk     => clk,
    in_dat  => in_re,
    out_dat => in_re_dly
  );
 
  u_im_lat : entity common_components_lib.common_pipeline
  generic map (
    g_pipeline  => g_lat,
    g_in_dat_w  => in_im'length,
    g_out_dat_w => in_im'length
  )
  port map (
    clk     => clk,
    in_dat  => in_im,
    out_dat => in_im_dly
  );
 
  -- Use rst for control to ensure initial low
  u_sel_lat : entity common_components_lib.common_pipeline_sl
  generic map (
    g_pipeline => g_lat
  )
  port map (
    rst     => rst,
    clk     => clk,
    in_dat  => in_sel,
    out_dat => out_sel
  );
 
  u_pipeline_out_val : entity common_components_lib.common_pipeline_sl
  generic map (
    g_pipeline  => g_lat
  )
  port map (
    rst     => rst,
    clk     => clk,
    in_dat  => in_val,
    out_dat => out_val
  );
 
  ------------------------------------------------------------------------------
  -- Output real and imaginary, switch between input and product
  ------------------------------------------------------------------------------
  out_re <= round_re when out_sel = '1' else in_re_dly;
  out_im <= round_im when out_sel = '1' else in_im_dly;
 
end str;

Browse

Tools

Subversion Repositories astron_r2sdf_fft

[/] [astron_r2sdf_fft/] [trunk/] [rTwoWMul.vhd] - Blame information for rev 2

Line No.	Rev	Author	Line
1	2	danv	`--------------------------------------------------------------------------------`
2			`-- Author: Raj Thilak Rajan : rajan at astron.nl: Nov 2009`
3			`-- Copyright (C) 2009-2010`
4			`-- ASTRON (Netherlands Institute for Radio Astronomy)`
5			`-- P.O.Box 2, 7990 AA Dwingeloo, The Netherlands`
6			`--`
7			`-- This file is part of the UniBoard software suite.`
8			`-- The file is free software: you can redistribute it and/or modify`
9			`-- it under the terms of the GNU General Public License as published by`
10			`-- the Free Software Foundation, either version 3 of the License, or`
11			`-- (at your option) any later version.`
12			`--`
13			`-- This program is distributed in the hope that it will be useful,`
14			`-- but WITHOUT ANY WARRANTY; without even the implied warranty of`
15			`-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
16			`-- GNU General Public License for more details.`
17			`--`
18			`-- You should have received a copy of the GNU General Public License`
19			`-- along with this program. If not, see <http://www.gnu.org/licenses/>.`
20			`--------------------------------------------------------------------------------`
21
22			`library ieee, common_pkg_lib, common_components_lib, common_mult_lib, technology_lib;`
23			`use IEEE.std_logic_1164.all;`
24			`USE technology_lib.technology_select_pkg.ALL;`
25			`use common_pkg_lib.common_pkg.all;`
26
27			`entity rTwoWMul is`
28			`generic (`
29			`g_technology : NATURAL := c_tech_select_default;`
30			`g_stage : natural := 1;`
31			`g_lat : natural := 3+1 -- 3 for mult, 1 for round`
32			`);`
33			`port (`
34			`clk : in std_logic;`
35			`rst : in std_logic;`
36			`weight_re : in std_logic_vector;`
37			`weight_im : in std_logic_vector;`
38			`in_re : in std_logic_vector;`
39			`in_im : in std_logic_vector;`
40			`in_val : in std_logic;`
41			`in_sel : in std_logic;`
42			`out_re : out std_logic_vector;`
43			`out_im : out std_logic_vector;`
44			`out_val : out std_logic`
45			`);`
46			`end entity rTwoWMul;`
47
48			`architecture str of rTwoWMul is`
49
50			`-- Use multiplier product truncate or signed rounding (= away from zero). On hardware for Fsub in`
51			`-- Apertif and using the WG at various frequencies at subband or between subbands it appears that`
52			`-- using truncate or sround does not make a noticable difference in the SST. Still choose to use`
53			`-- signed rounding to preserve zero DC.`
54			`constant c_use_truncate : boolean := true; --false;`
55
56			`-- Derive the common_complex_mult g_pipeline_* values from g_lat. The sum c_total_lat = g_lat, so that g_lat defines`
57			`-- the total latency from in_* to out_*.`
58
59			`-- DSP multiplier IP`
60			`constant c_dsp_mult_lat : natural := 3;`
61
62			`-- Pipeline multiplier product rounding from c_prod_w via c_round_w to c_out_dat_w`
63			`constant c_round_lat : natural := sel_a_b(g_lat > c_dsp_mult_lat, 1, 0); -- allocate 1 pipeline for round`
64			`constant c_lat : natural := g_lat - c_round_lat; -- allocate remaining pipeline to multiplier`
65
66			`constant c_mult_input_lat : natural := sel_a_b(c_lat>1, 1, 0); -- second priority use DSP pipeline input`
67			`constant c_mult_product_lat : natural := 0;`
68			`constant c_mult_adder_lat : natural := sel_a_b(c_lat>2, 1, 0); -- third priority use DSP internal product-sum pipeline`
69			`constant c_mult_extra_lat : natural := sel_a_b(c_lat>3, c_lat-3, 0); -- remaining extra pipelining in logic`
70			`constant c_mult_output_lat : natural := sel_a_b(c_lat>0, 1, 0) + c_mult_extra_lat; -- first priority use DSP pipeline output`
71			`constant c_mult_lat : natural := c_mult_input_lat + c_mult_product_lat + c_mult_adder_lat + c_mult_output_lat;`
72
73			`-- Total input to output latency`
74			`constant c_total_lat : natural := c_mult_lat + c_round_lat;`
75
76			`-- Quantization`
77			`constant c_in_dat_w : natural:= in_re'length;`
78			`constant c_weight_w : natural:= weight_re'length;`
79			`constant c_prod_w : natural:= c_in_dat_w+c_weight_w+1;`
80			`constant c_round_w : natural:= c_weight_w-c_sign_w; -- the weights are normalized`
81			`constant c_out_dat_w : natural:= out_re'length;`
82
83			`signal in_re_dly : std_logic_vector(in_re'range);`
84			`signal in_im_dly : std_logic_vector(in_re'range);`
85			`signal product_re : std_logic_vector(c_prod_w-1 downto 0);`
86			`signal product_im : std_logic_vector(c_prod_w-1 downto 0);`
87			`signal round_re : std_logic_vector(out_re'range);`
88			`signal round_im : std_logic_vector(out_re'range);`
89			`signal out_sel : std_logic;`
90
91			`begin`
92
93			`-- Total latency check`
94			`ASSERT c_total_lat = g_lat`
95			`REPORT "rTwoWMul: total pipeline error"`
96			`SEVERITY FAILURE;`
97
98			`------------------------------------------------------------------------------`
99			`-- Complex multiplication`
100			`-- . use the common_complex_mult(rtl) for the output stage 1 because then`
101			`-- the multiplier instance can get optimized away for the constant`
102			`-- weight_re = 1 and weight_im = 0 inputs.`
103			`-- . the IP in common_complex_mult(stratix4) only supports up to 18b wide`
104			`-- inputs.`
105			`-- . for c_lat = 0,1,2 use the RTL multiplier`
106			`-- . for c_lat >= 3 default best use the FPGA multiplier IP block.`
107			`------------------------------------------------------------------------------`
108
109			`gen_rtl : if g_stage=1 or c_in_dat_w>c_dsp_mult_w or c_lat<c_dsp_mult_lat generate`
110			`u_CmplxMul : entity common_mult_lib.common_complex_mult`
111			`generic map (`
112			`g_technology => g_technology,`
113			`g_variant => "RTL",`
114			`g_in_a_w => c_in_dat_w,`
115			`g_in_b_w => c_weight_w,`
116			`g_out_p_w => c_prod_w,`
117			`g_conjugate_b => false,`
118			`g_pipeline_input => c_mult_input_lat,`
119			`g_pipeline_product => c_mult_product_lat,`
120			`g_pipeline_adder => c_mult_adder_lat,`
121			`g_pipeline_output => c_mult_output_lat`
122			`)`
123			`port map (`
124			`rst => rst,`
125			`clk => clk,`
126			`in_ar => in_re,`
127			`in_ai => in_im,`
128			`in_br => weight_re,`
129			`in_bi => weight_im,`
130			`in_val => in_val,`
131			`out_pr => product_re,`
132			`out_pi => product_im,`
133			`out_val => OPEN`
134			`);`
135			`end generate;`
136
137			`gen_ip : if g_stage>1 and c_in_dat_w<=c_dsp_mult_w and c_lat>=c_dsp_mult_lat generate`
138			`u_cmplx_mul : entity common_mult_lib.common_complex_mult`
139			`generic map (`
140			`g_technology => g_technology,`
141			`g_variant => "IP",`
142			`g_in_a_w => in_re'length,`
143			`g_in_b_w => weight_re'length,`
144			`g_out_p_w => product_re'length,`
145			`g_conjugate_b => false,`
146			`g_pipeline_input => c_mult_input_lat,`
147			`g_pipeline_product => c_mult_product_lat,`
148			`g_pipeline_adder => c_mult_adder_lat,`
149			`g_pipeline_output => c_mult_output_lat`
150			`)`
151			`port map (`
152			`rst => rst,`
153			`clk => clk,`
154			`in_ar => in_re,`
155			`in_ai => in_im,`
156			`in_br => weight_re,`
157			`in_bi => weight_im,`
158			`in_val => in_val,`
159			`out_pr => product_re,`
160			`out_pi => product_im,`
161			`out_val => OPEN`
162			`);`
163			`end generate;`
164
165			`------------------------------------------------------------------------------`
166			`-- Round WMult output`
167			`------------------------------------------------------------------------------`
168
169			`gen_truncate : if c_use_truncate=true GENERATE`
170			`-- use truncate that throws away the c_round_w lower bits as rounding function`
171			`-- use resize_svec that keeps the c_out_dat_w lower bits to get to the output width`
172			`gen_comb : if c_round_lat=0 generate`
173			`round_re <= truncate_and_resize_svec(product_re, c_round_w, c_out_dat_w);`
174			`round_im <= truncate_and_resize_svec(product_im, c_round_w, c_out_dat_w);`
175			`end generate;`
176			`gen_reg : if c_round_lat=1 generate`
177			`round_re <= truncate_and_resize_svec(product_re, c_round_w, c_out_dat_w) when rising_edge(clk);`
178			`round_im <= truncate_and_resize_svec(product_im, c_round_w, c_out_dat_w) when rising_edge(clk);`
179			`end generate;`
180			`end generate;`
181
182
183			`gen_sround : if c_use_truncate=false GENERATE`
184			`-- Use resize_svec(s_round()) instead of truncate_and_resize_svec() to have symmetrical rounding around 0`
185			`-- Rounding takes logic due to adding 0.5 therefore need to use c_round_lat=1 to achieve timing`
186			`gen_comb : if c_round_lat=0 generate`
187			`ASSERT false REPORT "rTwoWMul: can probably not achieve timing for sround without pipeline" SEVERITY FAILURE;`
188			`round_re <= RESIZE_SVEC(s_round(product_re, c_round_w), c_out_dat_w);`
189			`round_im <= RESIZE_SVEC(s_round(product_im, c_round_w), c_out_dat_w);`
190			`end generate;`
191			`gen_reg : if c_round_lat=1 generate`
192			`round_re <= RESIZE_SVEC(s_round(product_re, c_round_w), c_out_dat_w) when rising_edge(clk);`
193			`round_im <= RESIZE_SVEC(s_round(product_im, c_round_w), c_out_dat_w) when rising_edge(clk);`
194			`end generate;`
195			`end generate;`
196
197
198			`------------------------------------------------------------------------------`
199			`-- Propagate data and control signals for input/output choice at WMult output`
200			`------------------------------------------------------------------------------`
201
202			`-- No need to use rst for data, because initial data value is don't care`
203			`u_re_lat : entity common_components_lib.common_pipeline`
204			`generic map (`
205			`g_pipeline => g_lat,`
206			`g_in_dat_w => in_re'length,`
207			`g_out_dat_w => in_re'length`
208			`)`
209			`port map (`
210			`clk => clk,`
211			`in_dat => in_re,`
212			`out_dat => in_re_dly`
213			`);`
214
215			`u_im_lat : entity common_components_lib.common_pipeline`
216			`generic map (`
217			`g_pipeline => g_lat,`
218			`g_in_dat_w => in_im'length,`
219			`g_out_dat_w => in_im'length`
220			`)`
221			`port map (`
222			`clk => clk,`
223			`in_dat => in_im,`
224			`out_dat => in_im_dly`
225			`);`
226
227			`-- Use rst for control to ensure initial low`
228			`u_sel_lat : entity common_components_lib.common_pipeline_sl`
229			`generic map (`
230			`g_pipeline => g_lat`
231			`)`
232			`port map (`
233			`rst => rst,`
234			`clk => clk,`
235			`in_dat => in_sel,`
236			`out_dat => out_sel`
237			`);`
238
239			`u_pipeline_out_val : entity common_components_lib.common_pipeline_sl`
240			`generic map (`
241			`g_pipeline => g_lat`
242			`)`
243			`port map (`
244			`rst => rst,`
245			`clk => clk,`
246			`in_dat => in_val,`
247			`out_dat => out_val`
248			`);`
249
250			`------------------------------------------------------------------------------`
251			`-- Output real and imaginary, switch between input and product`
252			`------------------------------------------------------------------------------`
253			`out_re <= round_re when out_sel = '1' else in_re_dly;`
254			`out_im <= round_im when out_sel = '1' else in_im_dly;`
255
256			`end str;`