1 |
36 |
dgisselq |
////////////////////////////////////////////////////////////////////////////////
|
2 |
|
|
//
|
3 |
|
|
// Filename: fftstage.v
|
4 |
|
|
//
|
5 |
|
|
// Project: A General Purpose Pipelined FFT Implementation
|
6 |
|
|
//
|
7 |
|
|
// Purpose: This file is (almost) a Verilog source file. It is meant to
|
8 |
|
|
// be used by a FFT core compiler to generate FFTs which may be
|
9 |
|
|
// used as part of an FFT core. Specifically, this file encapsulates
|
10 |
|
|
// the options of an FFT-stage. For any 2^N length FFT, there shall be
|
11 |
|
|
// (N-1) of these stages.
|
12 |
|
|
//
|
13 |
|
|
//
|
14 |
|
|
// Operation:
|
15 |
|
|
// Given a stream of values, operate upon them as though they were
|
16 |
|
|
// value pairs, x[n] and x[n+N/2]. The stream begins when n=0, and ends
|
17 |
|
|
// when n=N/2-1 (i.e. there's a full set of N values). When the value
|
18 |
|
|
// x[0] enters, the synchronization input, i_sync, must be true as well.
|
19 |
|
|
//
|
20 |
|
|
// For this stream, produce outputs
|
21 |
|
|
// y[n ] = x[n] + x[n+N/2], and
|
22 |
|
|
// y[n+N/2] = (x[n] - x[n+N/2]) * c[n],
|
23 |
|
|
// where c[n] is a complex coefficient found in the
|
24 |
|
|
// external memory file COEFFILE.
|
25 |
|
|
// When y[0] is output, a synchronization bit o_sync will be true as
|
26 |
|
|
// well, otherwise it will be zero.
|
27 |
|
|
//
|
28 |
|
|
// Most of the work to do this is done within the butterfly, whether the
|
29 |
|
|
// hardware accelerated butterfly (uses a DSP) or not.
|
30 |
|
|
//
|
31 |
|
|
// Creator: Dan Gisselquist, Ph.D.
|
32 |
|
|
// Gisselquist Technology, LLC
|
33 |
|
|
//
|
34 |
|
|
////////////////////////////////////////////////////////////////////////////////
|
35 |
|
|
//
|
36 |
|
|
// Copyright (C) 2015-2018, Gisselquist Technology, LLC
|
37 |
|
|
//
|
38 |
|
|
// This program is free software (firmware): you can redistribute it and/or
|
39 |
|
|
// modify it under the terms of the GNU General Public License as published
|
40 |
|
|
// by the Free Software Foundation, either version 3 of the License, or (at
|
41 |
|
|
// your option) any later version.
|
42 |
|
|
//
|
43 |
|
|
// This program is distributed in the hope that it will be useful, but WITHOUT
|
44 |
|
|
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
|
45 |
|
|
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
46 |
|
|
// for more details.
|
47 |
|
|
//
|
48 |
|
|
// You should have received a copy of the GNU General Public License along
|
49 |
|
|
// with this program. (It's in the $(ROOT)/doc directory, run make with no
|
50 |
|
|
// target there if the PDF file isn't present.) If not, see
|
51 |
|
|
// <http://www.gnu.org/licenses/> for a copy.
|
52 |
|
|
//
|
53 |
|
|
// License: GPL, v3, as defined and found on www.gnu.org,
|
54 |
|
|
// http://www.gnu.org/licenses/gpl.html
|
55 |
|
|
//
|
56 |
|
|
//
|
57 |
|
|
////////////////////////////////////////////////////////////////////////////////
|
58 |
|
|
//
|
59 |
|
|
//
|
60 |
|
|
`default_nettype none
|
61 |
|
|
//
|
62 |
|
|
module fftstage(i_clk, i_reset, i_ce, i_sync, i_data, o_data, o_sync);
|
63 |
|
|
parameter IWIDTH=15,CWIDTH=20,OWIDTH=16;
|
64 |
|
|
// Parameters specific to the core that should be changed when this
|
65 |
|
|
// core is built ... Note that the minimum LGSPAN (the base two log
|
66 |
|
|
// of the span, or the base two log of the current FFT size) is 3.
|
67 |
|
|
// Smaller spans (i.e. the span of 2) must use the dbl laststage module.
|
68 |
|
|
parameter LGWIDTH=10, LGSPAN=8, BFLYSHIFT=0;
|
69 |
|
|
parameter [0:0] OPT_HWMPY = 1;
|
70 |
|
|
// Clocks per CE. If your incoming data rate is less than 50% of your
|
71 |
|
|
// clock speed, you can set CKPCE to 2'b10, make sure there's at least
|
72 |
|
|
// one clock between cycles when i_ce is high, and then use two
|
73 |
|
|
// multiplies instead of three. Setting CKPCE to 2'b11, and insisting
|
74 |
|
|
// on at least two clocks with i_ce low between cycles with i_ce high,
|
75 |
|
|
// then the hardware optimized butterfly code will used one multiply
|
76 |
|
|
// instead of two.
|
77 |
|
|
parameter CKPCE = 1;
|
78 |
|
|
// The COEFFILE parameter contains the name of the file containing the
|
79 |
|
|
// FFT twiddle factors
|
80 |
|
|
parameter COEFFILE="cmem_o2048.hex";
|
81 |
|
|
|
82 |
|
|
`ifdef VERILATOR
|
83 |
|
|
parameter [0:0] ZERO_ON_IDLE = 1'b0;
|
84 |
|
|
`else
|
85 |
|
|
localparam [0:0] ZERO_ON_IDLE = 1'b0;
|
86 |
|
|
`endif // VERILATOR
|
87 |
|
|
|
88 |
|
|
input i_clk, i_reset, i_ce, i_sync;
|
89 |
|
|
input [(2*IWIDTH-1):0] i_data;
|
90 |
|
|
output reg [(2*OWIDTH-1):0] o_data;
|
91 |
|
|
output reg o_sync;
|
92 |
|
|
|
93 |
|
|
reg wait_for_sync;
|
94 |
|
|
reg [(2*IWIDTH-1):0] ib_a, ib_b;
|
95 |
|
|
reg [(2*CWIDTH-1):0] ib_c;
|
96 |
|
|
reg ib_sync;
|
97 |
|
|
|
98 |
|
|
reg b_started;
|
99 |
|
|
wire ob_sync;
|
100 |
|
|
wire [(2*OWIDTH-1):0] ob_a, ob_b;
|
101 |
|
|
|
102 |
|
|
// cmem is defined as an array of real and complex values,
|
103 |
|
|
// where the top CWIDTH bits are the real value and the bottom
|
104 |
|
|
// CWIDTH bits are the imaginary value.
|
105 |
|
|
//
|
106 |
|
|
// cmem[i] = { (2^(CWIDTH-2)) * cos(2*pi*i/(2^LGWIDTH)),
|
107 |
|
|
// (2^(CWIDTH-2)) * sin(2*pi*i/(2^LGWIDTH)) };
|
108 |
|
|
//
|
109 |
|
|
reg [(2*CWIDTH-1):0] cmem [0:((1<<LGSPAN)-1)];
|
110 |
|
|
initial $readmemh(COEFFILE,cmem);
|
111 |
|
|
|
112 |
|
|
reg [(LGSPAN):0] iaddr;
|
113 |
|
|
reg [(2*IWIDTH-1):0] imem [0:((1<<LGSPAN)-1)];
|
114 |
|
|
|
115 |
|
|
reg [LGSPAN:0] oB;
|
116 |
|
|
reg [(2*OWIDTH-1):0] omem [0:((1<<LGSPAN)-1)];
|
117 |
|
|
|
118 |
|
|
initial wait_for_sync = 1'b1;
|
119 |
|
|
initial iaddr = 0;
|
120 |
|
|
always @(posedge i_clk)
|
121 |
|
|
if (i_reset)
|
122 |
|
|
begin
|
123 |
|
|
wait_for_sync <= 1'b1;
|
124 |
|
|
iaddr <= 0;
|
125 |
|
|
end else if ((i_ce)&&((!wait_for_sync)||(i_sync)))
|
126 |
|
|
begin
|
127 |
|
|
//
|
128 |
|
|
// First step: Record what we're not ready to use yet
|
129 |
|
|
//
|
130 |
|
|
iaddr <= iaddr + { {(LGSPAN){1'b0}}, 1'b1 };
|
131 |
|
|
wait_for_sync <= 1'b0;
|
132 |
|
|
end
|
133 |
|
|
always @(posedge i_clk) // Need to make certain here that we don't read
|
134 |
|
|
if ((i_ce)&&(!iaddr[LGSPAN])) // and write the same address on
|
135 |
|
|
imem[iaddr[(LGSPAN-1):0]] <= i_data; // the same clk
|
136 |
|
|
|
137 |
|
|
//
|
138 |
|
|
// Now, we have all the inputs, so let's feed the butterfly
|
139 |
|
|
//
|
140 |
|
|
initial ib_sync = 1'b0;
|
141 |
|
|
always @(posedge i_clk)
|
142 |
|
|
if (i_reset)
|
143 |
|
|
ib_sync <= 1'b0;
|
144 |
|
|
else if (i_ce)
|
145 |
|
|
begin
|
146 |
|
|
// Set the sync to true on the very first
|
147 |
|
|
// valid input in, and hence on the very
|
148 |
|
|
// first valid data out per FFT.
|
149 |
|
|
ib_sync <= (iaddr==(1<<(LGSPAN)));
|
150 |
|
|
end
|
151 |
|
|
|
152 |
|
|
always @(posedge i_clk)
|
153 |
|
|
if (i_ce)
|
154 |
|
|
begin
|
155 |
|
|
// One input from memory, ...
|
156 |
|
|
ib_a <= imem[iaddr[(LGSPAN-1):0]];
|
157 |
|
|
// One input clocked in from the top
|
158 |
|
|
ib_b <= i_data;
|
159 |
|
|
// and the coefficient or twiddle factor
|
160 |
|
|
ib_c <= cmem[iaddr[(LGSPAN-1):0]];
|
161 |
|
|
end
|
162 |
|
|
|
163 |
|
|
// The idle register is designed to keep track of when an input
|
164 |
|
|
// to the butterfly is important and going to be used. It's used
|
165 |
|
|
// in a flag following, so that when useful values are placed
|
166 |
|
|
// into the butterfly they'll be non-zero (idle=0), otherwise when
|
167 |
|
|
// the inputs to the butterfly are irrelevant and will be ignored,
|
168 |
|
|
// then (idle=1) those inputs will be set to zero. This
|
169 |
|
|
// functionality is not designed to be used in operation, but only
|
170 |
|
|
// within a Verilator simulation context when chasing a bug.
|
171 |
|
|
// In this limited environment, the non-zero answers will stand
|
172 |
|
|
// in a trace making it easier to highlight a bug.
|
173 |
|
|
reg idle;
|
174 |
|
|
generate if (ZERO_ON_IDLE)
|
175 |
|
|
begin
|
176 |
|
|
initial idle = 1;
|
177 |
|
|
always @(posedge i_clk)
|
178 |
|
|
if (i_reset)
|
179 |
|
|
idle <= 1'b1;
|
180 |
|
|
else if (i_ce)
|
181 |
|
|
idle <= (!iaddr[LGSPAN])&&(!wait_for_sync);
|
182 |
|
|
|
183 |
|
|
end else begin
|
184 |
|
|
|
185 |
|
|
always @(*) idle = 0;
|
186 |
|
|
|
187 |
|
|
end endgenerate
|
188 |
|
|
|
189 |
|
|
generate if (OPT_HWMPY)
|
190 |
|
|
begin : HWBFLY
|
191 |
|
|
hwbfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),
|
192 |
|
|
.CKPCE(CKPCE), .SHIFT(BFLYSHIFT))
|
193 |
|
|
bfly(i_clk, i_reset, i_ce, (idle)?0:ib_c,
|
194 |
|
|
(idle || (!i_ce)) ? 0:ib_a,
|
195 |
|
|
(idle || (!i_ce)) ? 0:ib_b,
|
196 |
|
|
(ib_sync)&&(i_ce),
|
197 |
|
|
ob_a, ob_b, ob_sync);
|
198 |
|
|
end else begin : FWBFLY
|
199 |
|
|
butterfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),
|
200 |
|
|
.CKPCE(CKPCE),.SHIFT(BFLYSHIFT))
|
201 |
|
|
bfly(i_clk, i_reset, i_ce,
|
202 |
|
|
(idle||(!i_ce))?0:ib_c,
|
203 |
|
|
(idle||(!i_ce))?0:ib_a,
|
204 |
|
|
(idle||(!i_ce))?0:ib_b,
|
205 |
|
|
(ib_sync&&i_ce),
|
206 |
|
|
ob_a, ob_b, ob_sync);
|
207 |
|
|
end endgenerate
|
208 |
|
|
|
209 |
|
|
//
|
210 |
|
|
// Next step: recover the outputs from the butterfly
|
211 |
|
|
//
|
212 |
|
|
initial oB = 0;
|
213 |
|
|
initial o_sync = 0;
|
214 |
|
|
initial b_started = 0;
|
215 |
|
|
always @(posedge i_clk)
|
216 |
|
|
if (i_reset)
|
217 |
|
|
begin
|
218 |
|
|
oB <= 0;
|
219 |
|
|
o_sync <= 0;
|
220 |
|
|
b_started <= 0;
|
221 |
|
|
end else if (i_ce)
|
222 |
|
|
begin
|
223 |
|
|
o_sync <= (!oB[LGSPAN])?ob_sync : 1'b0;
|
224 |
|
|
if (ob_sync||b_started)
|
225 |
|
|
oB <= oB + { {(LGSPAN){1'b0}}, 1'b1 };
|
226 |
|
|
if ((ob_sync)&&(!oB[LGSPAN]))
|
227 |
|
|
// A butterfly output is available
|
228 |
|
|
b_started <= 1'b1;
|
229 |
|
|
end
|
230 |
|
|
|
231 |
|
|
reg [(LGSPAN-1):0] dly_addr;
|
232 |
|
|
reg [(2*OWIDTH-1):0] dly_value;
|
233 |
|
|
always @(posedge i_clk)
|
234 |
|
|
if (i_ce)
|
235 |
|
|
begin
|
236 |
|
|
dly_addr <= oB[(LGSPAN-1):0];
|
237 |
|
|
dly_value <= ob_b;
|
238 |
|
|
end
|
239 |
|
|
always @(posedge i_clk)
|
240 |
|
|
if (i_ce)
|
241 |
|
|
omem[dly_addr] <= dly_value;
|
242 |
|
|
|
243 |
|
|
always @(posedge i_clk)
|
244 |
|
|
if (i_ce)
|
245 |
|
|
o_data <= (!oB[LGSPAN])?ob_a : omem[oB[(LGSPAN-1):0]];
|
246 |
|
|
|
247 |
|
|
endmodule
|