1 |
3 |
wzab |
-------------------------------------------------------------------------------
|
2 |
|
|
-- Title : fft_top
|
3 |
|
|
-- Project : Pipelined, DP RAM based FFT processor
|
4 |
|
|
-------------------------------------------------------------------------------
|
5 |
|
|
-- File : fft_top.vhd
|
6 |
|
|
-- Author : Wojciech Zabolotny
|
7 |
|
|
-- Company :
|
8 |
|
|
-- License : BSD
|
9 |
|
|
-- Created : 2014-01-18
|
10 |
|
|
-- Platform :
|
11 |
|
|
-- Standard : VHDL'93
|
12 |
|
|
-------------------------------------------------------------------------------
|
13 |
|
|
-- Description: This file implements a FFT processor based on a dual port RAM
|
14 |
|
|
-------------------------------------------------------------------------------
|
15 |
|
|
-- Copyright (c) 2014
|
16 |
|
|
-------------------------------------------------------------------------------
|
17 |
|
|
-- Revisions :
|
18 |
|
|
-- Date Version Author Description
|
19 |
|
|
-- 2014-01-18 1.0 wzab Created
|
20 |
|
|
-------------------------------------------------------------------------------
|
21 |
|
|
library ieee;
|
22 |
|
|
use ieee.std_logic_1164.all;
|
23 |
|
|
use ieee.numeric_std.all;
|
24 |
|
|
use ieee.math_real.all;
|
25 |
|
|
use ieee.math_complex.all;
|
26 |
|
|
library work;
|
27 |
|
|
use work.fft_len.all;
|
28 |
|
|
use work.icpx.all;
|
29 |
|
|
use work.fft_support_pkg.all;
|
30 |
|
|
|
31 |
|
|
entity fft_engine is
|
32 |
|
|
generic (
|
33 |
|
|
LOG2_FFT_LEN : integer := 4); -- Defines order of FFT
|
34 |
|
|
port (
|
35 |
|
|
-- System interface
|
36 |
|
|
rst_n : in std_logic;
|
37 |
|
|
clk : in std_logic;
|
38 |
|
|
-- Input memory interface
|
39 |
|
|
din : in icpx_number; -- data input
|
40 |
|
|
valid : out std_logic;
|
41 |
|
|
saddr : out unsigned(LOG2_FFT_LEN-2 downto 0);
|
42 |
|
|
saddr_rev : out unsigned(LOG2_FFT_LEN-2 downto 0);
|
43 |
|
|
sout0 : out icpx_number; -- spectrum output
|
44 |
|
|
sout1 : out icpx_number -- spectrum output
|
45 |
|
|
);
|
46 |
|
|
|
47 |
|
|
end fft_engine;
|
48 |
|
|
|
49 |
|
|
architecture fft_engine_beh of fft_engine is
|
50 |
|
|
|
51 |
|
|
constant MULT_LATENCY : integer := 3;
|
52 |
|
|
|
53 |
|
|
-- Type used to store twiddle factors
|
54 |
|
|
type T_TF_TABLE is array (0 to FFT_LEN/2-1) of icpx_number;
|
55 |
|
|
|
56 |
|
|
-- Function initializing the twiddle factor memory
|
57 |
|
|
-- (during synthesis it is evaluated only during compilation,
|
58 |
|
|
-- so no floating point arithmetics must be synthesized!)
|
59 |
|
|
function tf_table_init
|
60 |
|
|
return t_tf_table is
|
61 |
|
|
variable x : real;
|
62 |
|
|
variable res : t_tf_table;
|
63 |
|
|
begin -- i1st
|
64 |
|
|
for i in 0 to FFT_LEN/2-1 loop
|
65 |
|
|
x := -real(i)*MATH_PI*2.0/(2.0 ** LOG2_FFT_LEN);
|
66 |
|
|
res(i) := cplx2icpx(complex'(cos(x), sin(x)));
|
67 |
|
|
end loop; -- i
|
68 |
|
|
return res;
|
69 |
|
|
end tf_table_init;
|
70 |
|
|
|
71 |
|
|
-- Twiddle factors ROM memory
|
72 |
|
|
constant tf_table : T_TF_TABLE := tf_table_init;
|
73 |
|
|
|
74 |
|
|
-- Type used to store the window function
|
75 |
|
|
type T_WINDOW_TABLE is array (0 to FFT_LEN-1) of icpx_number;
|
76 |
|
|
function tw_table_init
|
77 |
|
|
return T_WINDOW_TABLE is
|
78 |
|
|
variable x : real;
|
79 |
|
|
variable res : T_WINDOW_TABLE;
|
80 |
|
|
begin -- function tw_table_init
|
81 |
|
|
for i in 0 to FFT_LEN-1 loop
|
82 |
|
|
x := real(i)*2.0*MATH_PI/real(FFT_LEN-1);
|
83 |
|
|
res(i) := cplx2icpx(complex'(0.5*(1.0-cos(x)), 0.0));
|
84 |
|
|
--s(i) := cplx2icpx(complex'(1.0, 0.0));
|
85 |
|
|
end loop; -- i
|
86 |
|
|
return res;
|
87 |
|
|
end function tw_table_init;
|
88 |
|
|
-- Window function ROM memory
|
89 |
|
|
constant window_function : T_WINDOW_TABLE := tw_table_init;
|
90 |
|
|
|
91 |
|
|
type T_STEP_MULT is array (0 to LOG2_FFT_LEN) of integer;
|
92 |
|
|
function step_mult_init
|
93 |
|
|
return T_STEP_MULT is
|
94 |
|
|
variable res : T_STEP_MULT;
|
95 |
|
|
begin -- function step_mult_init
|
96 |
|
|
for i in 0 to LOG2_FFT_LEN loop
|
97 |
|
|
res(i) := 2**i;
|
98 |
|
|
end loop; -- i
|
99 |
|
|
return res;
|
100 |
|
|
end function step_mult_init;
|
101 |
|
|
|
102 |
|
|
component icpx_mul is
|
103 |
|
|
generic (
|
104 |
|
|
MULT_LATENCY : integer);
|
105 |
|
|
port (
|
106 |
|
|
din0 : in icpx_number;
|
107 |
|
|
din1 : in icpx_number;
|
108 |
|
|
dout : out icpx_number;
|
109 |
|
|
clk : in std_logic);
|
110 |
|
|
end component icpx_mul;
|
111 |
|
|
|
112 |
|
|
constant BF_DELAY : integer := 3;
|
113 |
|
|
-- Table for index multipliers, when geting TF from the table
|
114 |
|
|
constant STEP_MULT : T_STEP_MULT := step_mult_init;
|
115 |
|
|
|
116 |
|
|
type T_FFT_STATE is (TFS_IDLE, TFS_RUN);
|
117 |
|
|
|
118 |
|
|
-- The input data are stored in the cyclical input buffer of length (?)
|
119 |
|
|
-- Then we feed the data to the first processing unit.
|
120 |
|
|
|
121 |
|
|
type T_FFT_DATA_ARRAY is array (LOG2_FFT_LEN downto 0) of icpx_number;
|
122 |
|
|
signal in0, in1, out0, out1, tft : T_FFT_DATA_ARRAY;
|
123 |
|
|
signal r_din0, r_din1, wf0, wf1 : icpx_number := icpx_zero;
|
124 |
|
|
|
125 |
|
|
signal s_saddr, dptr0 : unsigned(LOG2_FFT_LEN-2 downto 0);
|
126 |
|
|
signal start0_del : integer range 0 to MULT_LATENCY := 0;
|
127 |
|
|
signal start0, start0_pre : std_logic := '0';
|
128 |
|
|
|
129 |
|
|
|
130 |
|
|
signal started : std_logic_vector(LOG2_FFT_LEN downto 0) := (others => '0');
|
131 |
|
|
signal start_dr : std_logic_vector(LOG2_FFT_LEN downto 0) := (others => '0');
|
132 |
|
|
|
133 |
|
|
type T_FFT_INTS is array (0 to LOG2_FFT_LEN) of integer;
|
134 |
|
|
signal next_delay : T_FFT_INTS := (others => 0);
|
135 |
|
|
signal step_bf : T_FFT_INTS := (others => 0);
|
136 |
|
|
signal start_delay : T_FFT_INTS := (others => 0);
|
137 |
|
|
|
138 |
|
|
|
139 |
|
|
begin -- fft_top_beh
|
140 |
|
|
|
141 |
|
|
-- We need something, to synchronize all stages after reset...
|
142 |
|
|
-- This mechanism should consider the processing latency...
|
143 |
|
|
g0 : for i in 0 to LOG2_FFT_LEN-2 generate
|
144 |
|
|
next_delay(i) <= 2**(LOG2_FFT_LEN-2-i);
|
145 |
|
|
end generate g0;
|
146 |
|
|
|
147 |
|
|
|
148 |
|
|
-- Processing of input data -- using the window function!
|
149 |
|
|
dp_ram_rbw_icpx_1 : entity work.dp_ram_rbw_icpx
|
150 |
|
|
generic map (
|
151 |
|
|
ADDR_WIDTH => LOG2_FFT_LEN-1)
|
152 |
|
|
port map (
|
153 |
|
|
clk => clk,
|
154 |
|
|
we_a => '1',
|
155 |
|
|
addr_a => std_logic_vector(dptr0),
|
156 |
|
|
data_a => din,
|
157 |
|
|
q_a => r_din0,
|
158 |
|
|
we_b => '0',
|
159 |
|
|
addr_b => std_logic_vector(dptr0),
|
160 |
|
|
data_b => din,
|
161 |
|
|
q_b => open);
|
162 |
|
|
|
163 |
|
|
|
164 |
|
|
-- Process reading the input data (directly, and from delay line)
|
165 |
|
|
-- Additionally we consider the delay associated with multiplication
|
166 |
|
|
-- by the window function
|
167 |
|
|
ip1 : process (clk, rst_n) is
|
168 |
|
|
begin -- process st2
|
169 |
|
|
if rst_n = '0' then -- asynchronous reset (active low)
|
170 |
|
|
dptr0 <= (others => '0');
|
171 |
|
|
r_din1 <= icpx_zero;
|
172 |
|
|
start0 <= '0';
|
173 |
|
|
elsif clk'event and clk = '1' then -- rising clock edge
|
174 |
|
|
r_din1 <= din;
|
175 |
|
|
if dptr0 < (2**(LOG2_FFT_LEN-1))-1 then
|
176 |
|
|
dptr0 <= dptr0+1;
|
177 |
|
|
else
|
178 |
|
|
dptr0 <= (others => '0');
|
179 |
|
|
start0_pre <= '1';
|
180 |
|
|
end if;
|
181 |
|
|
if start0_pre = '1' then
|
182 |
|
|
if start0_del = MULT_LATENCY-1 then
|
183 |
|
|
start0 <= '1';
|
184 |
|
|
else
|
185 |
|
|
start0_del <= start0_del + 1;
|
186 |
|
|
end if;
|
187 |
|
|
end if;
|
188 |
|
|
end if;
|
189 |
|
|
end process ip1;
|
190 |
|
|
|
191 |
|
|
-- Process providing the values of the window function
|
192 |
|
|
mw1 : process (clk) is
|
193 |
|
|
begin -- process mw1
|
194 |
|
|
if clk'event and clk = '1' then -- rising clock edge
|
195 |
|
|
wf0 <= window_function(to_integer(dptr0));
|
196 |
|
|
wf1 <= window_function(to_integer(dptr0)+FFT_LEN/2);
|
197 |
|
|
end if;
|
198 |
|
|
end process mw1;
|
199 |
|
|
-- Now connect the output signals to the multipliers
|
200 |
|
|
icpx_mul_1 : entity work.icpx_mul
|
201 |
|
|
generic map (
|
202 |
|
|
MULT_LATENCY => MULT_LATENCY)
|
203 |
|
|
port map (
|
204 |
|
|
din0 => r_din0,
|
205 |
|
|
din1 => wf0,
|
206 |
|
|
dout => in0(0),
|
207 |
|
|
rst_n => rst_n,
|
208 |
|
|
clk => clk);
|
209 |
|
|
icpx_mul_2 : entity work.icpx_mul
|
210 |
|
|
generic map (
|
211 |
|
|
MULT_LATENCY => MULT_LATENCY)
|
212 |
|
|
port map (
|
213 |
|
|
din0 => r_din1,
|
214 |
|
|
din1 => wf1,
|
215 |
|
|
dout => in1(0),
|
216 |
|
|
rst_n => rst_n,
|
217 |
|
|
clk => clk);
|
218 |
|
|
|
219 |
|
|
started(0) <= start0;
|
220 |
|
|
-- Now we generate blocks for different stages
|
221 |
|
|
-- For each stage we must maintain three counters
|
222 |
|
|
-- phase - 0 or 1
|
223 |
|
|
-- step - 0 to 2**(STAGE_N)
|
224 |
|
|
-- cycle - jak to nazwac?
|
225 |
|
|
|
226 |
|
|
g1 : for st in 0 to LOG2_FFT_LEN-1 generate
|
227 |
|
|
-- Here we generate structures for a single stage of FFT
|
228 |
|
|
-- First the butterfly unit
|
229 |
|
|
butterfly_1 : entity work.butterfly
|
230 |
|
|
generic map (
|
231 |
|
|
LATENCY => BF_DELAY)
|
232 |
|
|
port map (
|
233 |
|
|
din0 => in0(st),
|
234 |
|
|
din1 => in1(st),
|
235 |
|
|
tf => tft(st),
|
236 |
|
|
dout0 => out0(st),
|
237 |
|
|
dout1 => out1(st),
|
238 |
|
|
clk => clk,
|
239 |
|
|
rst_n => rst_n
|
240 |
|
|
);
|
241 |
|
|
|
242 |
|
|
-- Process controlling selection of twiddle factor for the butterfly unit
|
243 |
|
|
-- after our stage is started, we increase the twiddle factor cyclically
|
244 |
|
|
-- Process also delays starting of data switch
|
245 |
|
|
|
246 |
|
|
process (clk, rst_n) is
|
247 |
|
|
constant STEP_BF_LIMIT : integer := 2**(LOG2_FFT_LEN-st-1)-1;
|
248 |
|
|
begin -- process
|
249 |
|
|
if rst_n = '0' then -- asynchronous reset (active low)
|
250 |
|
|
step_bf(st) <= 0;
|
251 |
|
|
start_delay(st) <= 0;
|
252 |
|
|
start_dr(st) <= '0';
|
253 |
|
|
elsif clk'event and clk = '1' then -- rising clock edge
|
254 |
|
|
if started(st) = '1' then
|
255 |
|
|
if start_delay(st) = BF_DELAY then
|
256 |
|
|
start_dr(st) <= '1'; -- start the "data switch"
|
257 |
|
|
end if;
|
258 |
|
|
if start_delay(st) = BF_DELAY+next_delay(st) then
|
259 |
|
|
started(st+1) <= '1'; -- start the next stage
|
260 |
|
|
end if;
|
261 |
|
|
if start_delay(st) /= BF_DELAY+next_delay(st) then
|
262 |
|
|
start_delay(st) <= start_delay(st)+1;
|
263 |
|
|
end if;
|
264 |
|
|
if step_bf(st) < STEP_BF_LIMIT then
|
265 |
|
|
step_bf(st) <= step_bf(st) + 1;
|
266 |
|
|
else
|
267 |
|
|
step_bf(st) <= 0;
|
268 |
|
|
end if;
|
269 |
|
|
end if;
|
270 |
|
|
end if;
|
271 |
|
|
end process;
|
272 |
|
|
|
273 |
|
|
-- Twiddle factor ROM
|
274 |
|
|
process (clk) is
|
275 |
|
|
begin -- process
|
276 |
|
|
if clk'event and clk = '1' then -- rising clock edge
|
277 |
|
|
tft(st) <= tf_table(step_bf(st)*STEP_MULT(st));
|
278 |
|
|
end if;
|
279 |
|
|
end process;
|
280 |
|
|
|
281 |
|
|
-- Next the data switch, but not for the last stage!
|
282 |
|
|
i3 : if st /= LOG2_FFT_LEN-1 generate
|
283 |
|
|
fft_switch_1 : entity work.fft_data_switch
|
284 |
|
|
generic map (
|
285 |
|
|
LOG2_FFT_LEN => LOG2_FFT_LEN,
|
286 |
|
|
STAGE => st)
|
287 |
|
|
port map (
|
288 |
|
|
in0 => out0(st),
|
289 |
|
|
in1 => out1(st),
|
290 |
|
|
out0 => in0(st+1),
|
291 |
|
|
out1 => in1(st+1),
|
292 |
|
|
enable => start_dr(st),
|
293 |
|
|
rst_n => rst_n,
|
294 |
|
|
clk => clk);
|
295 |
|
|
end generate i3;
|
296 |
|
|
-- In the last stage, we simply count the output samples
|
297 |
|
|
i4 : if st = LOG2_FFT_LEN-1 generate
|
298 |
|
|
process (clk, rst_n) is
|
299 |
|
|
begin -- process
|
300 |
|
|
if rst_n = '0' then -- asynchronous reset (active low)
|
301 |
|
|
s_saddr <= (others => '0');
|
302 |
|
|
elsif clk'event and clk = '1' then -- rising clock edge
|
303 |
|
|
if start_dr(st) = '1' then
|
304 |
|
|
if s_saddr = FFT_LEN/2-1 then
|
305 |
|
|
s_saddr <= (others => '0');
|
306 |
|
|
else
|
307 |
|
|
s_saddr <= s_saddr+1;
|
308 |
|
|
end if;
|
309 |
|
|
end if;
|
310 |
|
|
end if;
|
311 |
|
|
end process;
|
312 |
|
|
end generate i4;
|
313 |
|
|
|
314 |
|
|
end generate g1;
|
315 |
|
|
valid <= started(LOG2_FFT_LEN);
|
316 |
|
|
saddr <= s_saddr;
|
317 |
|
|
saddr_rev <= rev(s_saddr);
|
318 |
|
|
sout0 <= out0(LOG2_FFT_LEN-1);
|
319 |
|
|
sout1 <= out1(LOG2_FFT_LEN-1);
|
320 |
|
|
|
321 |
|
|
end fft_engine_beh;
|