OpenCores
URL https://opencores.org/ocsvn/dblclockfft/dblclockfft/trunk

Subversion Repositories dblclockfft

[/] [dblclockfft/] [trunk/] [rtl/] [fftstage.v] - Blame information for rev 36

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 36 dgisselq
////////////////////////////////////////////////////////////////////////////////
2
//
3
// Filename:    fftstage.v
4
//
5
// Project:     A General Purpose Pipelined FFT Implementation
6
//
7
// Purpose:     This file is (almost) a Verilog source file.  It is meant to
8
//              be used by a FFT core compiler to generate FFTs which may be
9
//      used as part of an FFT core.  Specifically, this file encapsulates
10
//      the options of an FFT-stage.  For any 2^N length FFT, there shall be
11
//      (N-1) of these stages.
12
//
13
//
14
// Operation:
15
//      Given a stream of values, operate upon them as though they were
16
//      value pairs, x[n] and x[n+N/2].  The stream begins when n=0, and ends
17
//      when n=N/2-1 (i.e. there's a full set of N values).  When the value
18
//      x[0] enters, the synchronization input, i_sync, must be true as well.
19
//
20
//      For this stream, produce outputs
21
//      y[n    ] = x[n] + x[n+N/2], and
22
//      y[n+N/2] = (x[n] - x[n+N/2]) * c[n],
23
//                      where c[n] is a complex coefficient found in the
24
//                      external memory file COEFFILE.
25
//      When y[0] is output, a synchronization bit o_sync will be true as
26
//      well, otherwise it will be zero.
27
//
28
//      Most of the work to do this is done within the butterfly, whether the
29
//      hardware accelerated butterfly (uses a DSP) or not.
30
//
31
// Creator:     Dan Gisselquist, Ph.D.
32
//              Gisselquist Technology, LLC
33
//
34
////////////////////////////////////////////////////////////////////////////////
35
//
36
// Copyright (C) 2015-2018, Gisselquist Technology, LLC
37
//
38
// This program is free software (firmware): you can redistribute it and/or
39
// modify it under the terms of  the GNU General Public License as published
40
// by the Free Software Foundation, either version 3 of the License, or (at
41
// your option) any later version.
42
//
43
// This program is distributed in the hope that it will be useful, but WITHOUT
44
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
45
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
46
// for more details.
47
//
48
// You should have received a copy of the GNU General Public License along
49
// with this program.  (It's in the $(ROOT)/doc directory, run make with no
50
// target there if the PDF file isn't present.)  If not, see
51
// <http://www.gnu.org/licenses/> for a copy.
52
//
53
// License:     GPL, v3, as defined and found on www.gnu.org,
54
//              http://www.gnu.org/licenses/gpl.html
55
//
56
//
57
////////////////////////////////////////////////////////////////////////////////
58
//
59
//
60
`default_nettype        none
61
//
62
module  fftstage(i_clk, i_reset, i_ce, i_sync, i_data, o_data, o_sync);
63
        parameter       IWIDTH=15,CWIDTH=20,OWIDTH=16;
64
        // Parameters specific to the core that should be changed when this
65
        // core is built ... Note that the minimum LGSPAN (the base two log
66
        // of the span, or the base two log of the current FFT size) is 3.
67
        // Smaller spans (i.e. the span of 2) must use the dbl laststage module.
68
        parameter       LGWIDTH=10, LGSPAN=8, BFLYSHIFT=0;
69
        parameter       [0:0]     OPT_HWMPY = 1;
70
        // Clocks per CE.  If your incoming data rate is less than 50% of your
71
        // clock speed, you can set CKPCE to 2'b10, make sure there's at least
72
        // one clock between cycles when i_ce is high, and then use two
73
        // multiplies instead of three.  Setting CKPCE to 2'b11, and insisting
74
        // on at least two clocks with i_ce low between cycles with i_ce high,
75
        // then the hardware optimized butterfly code will used one multiply
76
        // instead of two.
77
        parameter               CKPCE = 1;
78
        // The COEFFILE parameter contains the name of the file containing the
79
        // FFT twiddle factors
80
        parameter       COEFFILE="cmem_o2048.hex";
81
 
82
`ifdef  VERILATOR
83
        parameter [0:0] ZERO_ON_IDLE = 1'b0;
84
`else
85
        localparam [0:0] ZERO_ON_IDLE = 1'b0;
86
`endif // VERILATOR
87
 
88
        input                                   i_clk, i_reset, i_ce, i_sync;
89
        input           [(2*IWIDTH-1):0] i_data;
90
        output  reg     [(2*OWIDTH-1):0] o_data;
91
        output  reg                             o_sync;
92
 
93
        reg     wait_for_sync;
94
        reg     [(2*IWIDTH-1):0] ib_a, ib_b;
95
        reg     [(2*CWIDTH-1):0] ib_c;
96
        reg     ib_sync;
97
 
98
        reg     b_started;
99
        wire    ob_sync;
100
        wire    [(2*OWIDTH-1):0] ob_a, ob_b;
101
 
102
        // cmem is defined as an array of real and complex values,
103
        // where the top CWIDTH bits are the real value and the bottom
104
        // CWIDTH bits are the imaginary value.
105
        //
106
        // cmem[i] = { (2^(CWIDTH-2)) * cos(2*pi*i/(2^LGWIDTH)),
107
        //              (2^(CWIDTH-2)) * sin(2*pi*i/(2^LGWIDTH)) };
108
        //
109
        reg     [(2*CWIDTH-1):0] cmem [0:((1<<LGSPAN)-1)];
110
        initial $readmemh(COEFFILE,cmem);
111
 
112
        reg     [(LGSPAN):0]             iaddr;
113
        reg     [(2*IWIDTH-1):0] imem    [0:((1<<LGSPAN)-1)];
114
 
115
        reg     [LGSPAN:0]               oB;
116
        reg     [(2*OWIDTH-1):0] omem    [0:((1<<LGSPAN)-1)];
117
 
118
        initial wait_for_sync = 1'b1;
119
        initial iaddr = 0;
120
        always @(posedge i_clk)
121
                if (i_reset)
122
        begin
123
                        wait_for_sync <= 1'b1;
124
                        iaddr <= 0;
125
        end else if ((i_ce)&&((!wait_for_sync)||(i_sync)))
126
        begin
127
                //
128
                // First step: Record what we're not ready to use yet
129
                //
130
                iaddr <= iaddr + { {(LGSPAN){1'b0}}, 1'b1 };
131
                wait_for_sync <= 1'b0;
132
        end
133
        always @(posedge i_clk) // Need to make certain here that we don't read
134
        if ((i_ce)&&(!iaddr[LGSPAN])) // and write the same address on
135
                imem[iaddr[(LGSPAN-1):0]] <= i_data; // the same clk
136
 
137
        //
138
        // Now, we have all the inputs, so let's feed the butterfly
139
        //
140
        initial ib_sync = 1'b0;
141
        always @(posedge i_clk)
142
        if (i_reset)
143
                ib_sync <= 1'b0;
144
        else if (i_ce)
145
        begin
146
                // Set the sync to true on the very first
147
                // valid input in, and hence on the very
148
                // first valid data out per FFT.
149
                ib_sync <= (iaddr==(1<<(LGSPAN)));
150
        end
151
 
152
        always  @(posedge i_clk)
153
        if (i_ce)
154
        begin
155
                // One input from memory, ...
156
                ib_a <= imem[iaddr[(LGSPAN-1):0]];
157
                // One input clocked in from the top
158
                ib_b <= i_data;
159
                // and the coefficient or twiddle factor
160
                ib_c <= cmem[iaddr[(LGSPAN-1):0]];
161
        end
162
 
163
        // The idle register is designed to keep track of when an input
164
        // to the butterfly is important and going to be used.  It's used
165
        // in a flag following, so that when useful values are placed
166
        // into the butterfly they'll be non-zero (idle=0), otherwise when
167
        // the inputs to the butterfly are irrelevant and will be ignored,
168
        // then (idle=1) those inputs will be set to zero.  This
169
        // functionality is not designed to be used in operation, but only
170
        // within a Verilator simulation context when chasing a bug.
171
        // In this limited environment, the non-zero answers will stand
172
        // in a trace making it easier to highlight a bug.
173
        reg     idle;
174
        generate if (ZERO_ON_IDLE)
175
        begin
176
                initial idle = 1;
177
                always @(posedge i_clk)
178
                if (i_reset)
179
                        idle <= 1'b1;
180
                else if (i_ce)
181
                        idle <= (!iaddr[LGSPAN])&&(!wait_for_sync);
182
 
183
        end else begin
184
 
185
                always @(*) idle = 0;
186
 
187
        end endgenerate
188
 
189
        generate if (OPT_HWMPY)
190
        begin : HWBFLY
191
                hwbfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),
192
                                .CKPCE(CKPCE), .SHIFT(BFLYSHIFT))
193
                        bfly(i_clk, i_reset, i_ce, (idle)?0:ib_c,
194
                                (idle || (!i_ce)) ? 0:ib_a,
195
                                (idle || (!i_ce)) ? 0:ib_b,
196
                                (ib_sync)&&(i_ce),
197
                                ob_a, ob_b, ob_sync);
198
        end else begin : FWBFLY
199
                butterfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),
200
                                .CKPCE(CKPCE),.SHIFT(BFLYSHIFT))
201
                        bfly(i_clk, i_reset, i_ce,
202
                                        (idle||(!i_ce))?0:ib_c,
203
                                        (idle||(!i_ce))?0:ib_a,
204
                                        (idle||(!i_ce))?0:ib_b,
205
                                        (ib_sync&&i_ce),
206
                                        ob_a, ob_b, ob_sync);
207
        end endgenerate
208
 
209
        //
210
        // Next step: recover the outputs from the butterfly
211
        //
212
        initial oB        = 0;
213
        initial o_sync    = 0;
214
        initial b_started = 0;
215
        always @(posedge i_clk)
216
                if (i_reset)
217
        begin
218
                oB <= 0;
219
                o_sync <= 0;
220
                b_started <= 0;
221
        end else if (i_ce)
222
        begin
223
                o_sync <= (!oB[LGSPAN])?ob_sync : 1'b0;
224
                if (ob_sync||b_started)
225
                        oB <= oB + { {(LGSPAN){1'b0}}, 1'b1 };
226
                if ((ob_sync)&&(!oB[LGSPAN]))
227
                // A butterfly output is available
228
                        b_started <= 1'b1;
229
        end
230
 
231
        reg     [(LGSPAN-1):0]           dly_addr;
232
        reg     [(2*OWIDTH-1):0] dly_value;
233
        always @(posedge i_clk)
234
        if (i_ce)
235
        begin
236
                dly_addr <= oB[(LGSPAN-1):0];
237
                dly_value <= ob_b;
238
        end
239
        always @(posedge i_clk)
240
        if (i_ce)
241
                omem[dly_addr] <= dly_value;
242
 
243
        always @(posedge i_clk)
244
        if (i_ce)
245
                o_data <= (!oB[LGSPAN])?ob_a : omem[oB[(LGSPAN-1):0]];
246
 
247
endmodule

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.