OpenCores
URL https://opencores.org/ocsvn/dblclockfft/dblclockfft/trunk

Subversion Repositories dblclockfft

[/] [dblclockfft/] [trunk/] [sw/] [fftgen.cpp] - Blame information for rev 21

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 16 dgisselq
/////////////////////////////////////////////////////////////////////////////
2
//
3
// Filename:    fftgen.v
4
//
5
// Project:     A Doubletime Pipelined FFT
6
//
7
// Purpose:     This is the core generator for the project.  Every part
8
//              and piece of this project begins and ends in this program.
9
//              Once built, this program will build an FFT (or IFFT) core
10
//              of arbitrary width, precision, etc., that will run at
11
//              two samples per clock.  (Incidentally, I didn't pick two
12
//              samples per clock because it was easier, but rather because
13
//              there weren't any two-sample per clock FFT's posted on 
14
//              opencores.com.  Further, FFT's running at one sample per
15
//              clock aren't that hard to find.)
16
//
17
//              You can find the documentation for this program in two places.
18
//              One is in the usage() function below.  The second is in the
19
//              'doc'uments directory that comes with this package, 
20
//              specifically in the spec.pdf file.  If it's not there, type
21
//              make in the documents directory to build it.
22
//
23
// Creator:     Dan Gisselquist, Ph.D.
24
//              Gisselquist Tecnology, LLC
25
//
26
///////////////////////////////////////////////////////////////////////////
27
//
28
// Copyright (C) 2015, Gisselquist Technology, LLC
29
//
30
// This program is free software (firmware): you can redistribute it and/or
31
// modify it under the terms of  the GNU General Public License as published
32
// by the Free Software Foundation, either version 3 of the License, or (at
33
// your option) any later version.
34
//
35
// This program is distributed in the hope that it will be useful, but WITHOUT
36
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
37
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
38
// for more details.
39
//
40
// You should have received a copy of the GNU General Public License along
41
// with this program.  (It's in the $(ROOT)/doc directory, run make with no
42
// target there if the PDF file isn't present.)  If not, see
43
// <http://www.gnu.org/licenses/> for a copy.
44
//
45
// License:     GPL, v3, as defined and found on www.gnu.org,
46
//              http://www.gnu.org/licenses/gpl.html
47
//
48
//
49
///////////////////////////////////////////////////////////////////////////
50
//
51
//
52 2 dgisselq
#include <stdio.h>
53
#include <stdlib.h>
54
#include <unistd.h>
55
#include <sys/stat.h>
56
#include <string.h>
57 14 dgisselq
#include <string>
58 2 dgisselq
#include <math.h>
59
#include <ctype.h>
60
#include <assert.h>
61
 
62
#define COREDIR "fft-core"
63
 
64
const char      cpyleft[] =
65
"///////////////////////////////////////////////////////////////////////////\n"
66
"//\n"
67
"// Copyright (C) 2015, Gisselquist Technology, LLC\n"
68
"//\n"
69
"// This program is free software (firmware): you can redistribute it and/or\n"
70
"// modify it under the terms of  the GNU General Public License as published\n"
71
"// by the Free Software Foundation, either version 3 of the License, or (at\n"
72
"// your option) any later version.\n"
73
"//\n"
74
"// This program is distributed in the hope that it will be useful, but WITHOUT\n"
75
"// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or\n"
76
"// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n"
77
"// for more details.\n"
78
"//\n"
79
"// You should have received a copy of the GNU General Public License along\n"
80 5 dgisselq
"// with this program.  (It's in the $(ROOT)/doc directory, run make with no\n"
81
"// target there if the PDF file isn\'t present.)  If not, see\n"
82
"// <http://www.gnu.org/licenses/> for a copy.\n"
83
"//\n"
84 2 dgisselq
"// License:    GPL, v3, as defined and found on www.gnu.org,\n"
85
"//             http://www.gnu.org/licenses/gpl.html\n"
86
"//\n"
87
"//\n"
88
"///////////////////////////////////////////////////////////////////////////\n";
89 14 dgisselq
const char      prjname[] = "A Doubletime Pipelined FFT";
90 2 dgisselq
const char      creator[] =     "// Creator:    Dan Gisselquist, Ph.D.\n"
91
                                "//             Gisselquist Tecnology, LLC\n";
92
 
93
int     lgval(int vl) {
94
        int     lg;
95
 
96
        for(lg=1; (1<<lg) < vl; lg++)
97
                ;
98
        return lg;
99
}
100
 
101
int     nextlg(int vl) {
102
        int     r;
103
 
104
        for(r=1; r<vl; r<<=1)
105
                ;
106
        return r;
107
}
108
 
109 14 dgisselq
int     bflydelay(int nbits, int xtra) {
110 2 dgisselq
        int     cbits = nbits + xtra;
111 14 dgisselq
        int     delay;
112 2 dgisselq
        if (nbits+1<cbits)
113 5 dgisselq
                delay = nbits+4;
114 2 dgisselq
        else
115 5 dgisselq
                delay = cbits+3;
116 14 dgisselq
        return delay;
117 2 dgisselq
}
118
 
119 14 dgisselq
int     lgdelay(int nbits, int xtra) {
120
        // The butterfly code needs to compare a valid address, of this
121
        // many bits, with an address two greater.  This guarantees we
122
        // have enough bits for that comparison.  We'll also end up with
123
        // more storage space to look for these values, but without a 
124
        // redesign that's just what we'll deal with.
125
        return lgval(bflydelay(nbits, xtra)+3);
126
}
127
 
128 2 dgisselq
void    build_quarters(const char *fname) {
129
        FILE    *fp = fopen(fname, "w");
130
        if (NULL == fp) {
131
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
132
                perror("O/S Err was:");
133
                return;
134
        }
135
 
136
        fprintf(fp,
137
"///////////////////////////////////////////////////////////////////////////\n"
138
"//\n"
139
"// Filename:   qtrstage.v\n"
140
"//             \n"
141
"// Project:    %s\n"
142
"//\n"
143 5 dgisselq
"// Purpose:    This file encapsulates the 4 point stage of a decimation in\n"
144
"//             frequency FFT.  This particular implementation is optimized\n"
145
"//             so that all of the multiplies are accomplished by additions\n"
146
"//             and multiplexers only.\n"
147
"//\n"
148 2 dgisselq
"//\n%s"
149
"//\n",
150
                prjname, creator);
151
        fprintf(fp, "%s", cpyleft);
152
 
153
        fprintf(fp,
154
"module\tqtrstage(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync);\n"
155 5 dgisselq
        "\tparameter    IWIDTH=16, OWIDTH=IWIDTH+1;\n"
156
        "\t// Parameters specific to the core that should be changed when this\n"
157
        "\t// core is built ... Note that the minimum LGSPAN is 2.  Smaller \n"
158
        "\t// spans must use the fftdoubles stage.\n"
159 15 dgisselq
        "\tparameter\tLGWIDTH=8, ODD=0, INVERSE=0,SHIFT=0,ROUND=1;\n"
160 5 dgisselq
        "\tinput\t                              i_clk, i_rst, i_ce, i_sync;\n"
161
        "\tinput\t      [(2*IWIDTH-1):0]        i_data;\n"
162
        "\toutput\treg  [(2*OWIDTH-1):0]        o_data;\n"
163
        "\toutput\treg                          o_sync;\n"
164 14 dgisselq
        "\t\n");
165
        fprintf(fp,
166 5 dgisselq
        "\treg\t        wait_for_sync;\n"
167
        "\treg\t[2:0]   pipeline;\n"
168 2 dgisselq
"\n"
169 5 dgisselq
        "\treg\t[(IWIDTH):0]    sum_r, sum_i, diff_r, diff_i;\n"
170 14 dgisselq
        "\twire\t[(IWIDTH):0]   n_diff_r, n_diff_i;\n"
171
        "\tassign n_diff_r = -diff_r;\n"
172 5 dgisselq
        "\tassign n_diff_i = -diff_i;\n"
173 2 dgisselq
"\n"
174 5 dgisselq
        "\treg\t[(2*OWIDTH-1):0]        ob_a;\n"
175
        "\twire\t[(2*OWIDTH-1):0]       ob_b;\n"
176
        "\treg\t[(OWIDTH-1):0]          ob_b_r, ob_b_i;\n"
177
        "\tassign       ob_b = { ob_b_r, ob_b_i };\n"
178 2 dgisselq
"\n"
179 5 dgisselq
        "\treg\t[(LGWIDTH-1):0]         iaddr;\n"
180
        "\treg\t[(2*IWIDTH-1):0]        imem;\n"
181 2 dgisselq
"\n"
182 5 dgisselq
        "\twire\tsigned\t[(IWIDTH-1):0]\timem_r, imem_i;\n"
183
        "\tassign\timem_r = imem[(2*IWIDTH-1):(IWIDTH)];\n"
184
        "\tassign\timem_i = imem[(IWIDTH-1):0];\n"
185 2 dgisselq
"\n"
186 5 dgisselq
        "\twire\tsigned\t[(IWIDTH-1):0]\ti_data_r, i_data_i;\n"
187
        "\tassign\ti_data_r = i_data[(2*IWIDTH-1):(IWIDTH)];\n"
188
        "\tassign\ti_data_i = i_data[(IWIDTH-1):0];\n"
189 2 dgisselq
"\n"
190 5 dgisselq
        "\treg  [(2*OWIDTH-1):0]        omem;\n"
191 14 dgisselq
"\n");
192
        fprintf(fp,
193 5 dgisselq
        "\twire [(IWIDTH-1):0]  rnd;\n"
194 9 dgisselq
        "\tgenerate\n"
195
        "\tif ((ROUND)&&((IWIDTH+1-OWIDTH-SHIFT)>0))\n"
196
                "\t\tassign rnd = { {(IWIDTH-1){1'b0}}, 1'b1 };\n"
197
        "\telse\n"
198
                "\t\tassign rnd = { {(IWIDTH){1'b0}}};\n"
199
        "\tendgenerate\n"
200 2 dgisselq
"\n"
201 5 dgisselq
        "\talways @(posedge i_clk)\n"
202
                "\t\tif (i_rst)\n"
203
                "\t\tbegin\n"
204
                        "\t\t\twait_for_sync <= 1'b1;\n"
205
                        "\t\t\tiaddr <= 0;\n"
206
                        "\t\t\tpipeline <= 3'b000;\n"
207
                "\t\tend\n"
208
                "\t\telse if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"
209
                "\t\tbegin\n"
210
                        "\t\t\t// Always\n"
211
                        "\t\t\timem <= i_data;\n"
212
                        "\t\t\tiaddr <= iaddr + 1;\n"
213
                        "\t\t\twait_for_sync <= 1'b0;\n"
214 2 dgisselq
"\n"
215 5 dgisselq
                        "\t\t\t// In sequence, clock = 0\n"
216
                        "\t\t\tif (iaddr[0])\n"
217
                        "\t\t\tbegin\n"
218
                                "\t\t\t\tsum_r  <= imem_r + i_data_r + rnd;\n"
219
                                "\t\t\t\tsum_i  <= imem_i + i_data_i + rnd;\n"
220
                                "\t\t\t\tdiff_r <= imem_r - i_data_r + rnd;\n"
221
                                "\t\t\t\tdiff_i <= imem_i - i_data_i + rnd;\n"
222 2 dgisselq
"\n"
223 5 dgisselq
                        "\t\t\t\tpipeline[2:0] <= { pipeline[1:0], 1'b1 };\n"
224
                        "\t\t\tend else\n"
225
                        "\t\t\t\tpipeline[2:0] <= { pipeline[1:0], 1'b0 };\n"
226 2 dgisselq
"\n"
227 5 dgisselq
                        "\t\t\t// In sequence, clock = 1\n"
228
                        "\t\t\tif (pipeline[1])\n"
229
                        "\t\t\tbegin\n"
230 9 dgisselq
"\t\t\t\tob_a <= { sum_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)],\n"
231
        "\t\t\t\t\t\tsum_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)] };\n"
232 5 dgisselq
                                "\t\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"
233 9 dgisselq
                                "\t\t\t\tif (ODD == 0)\n"
234 5 dgisselq
                                "\t\t\t\tbegin\n"
235 2 dgisselq
"\t\t\t\t\tob_b_r <= diff_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
236
"\t\t\t\t\tob_b_i <= diff_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
237 9 dgisselq
// "\t\t\t\t\tob_b_r <=   { (OWIDTH) {1'b0} };\n"
238
// "\t\t\t\t\tob_b_i <=   { (OWIDTH) {1'b0} };\n"
239 14 dgisselq
                                "\t\t\t\tend else if (INVERSE==0) begin\n"
240 2 dgisselq
"\t\t\t\t\t// on Odd, W = e^{-j2pi 1/4} = -j\n"
241
"\t\t\t\t\tob_b_r <=   diff_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
242 14 dgisselq
"\t\t\t\t\tob_b_i <= n_diff_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
243 9 dgisselq
// "\t\t\t\t\tob_b_r <=   { (OWIDTH) {1'b0} };\n"
244
// "\t\t\t\t\tob_b_i <=   { (OWIDTH) {1'b0} };\n"
245 5 dgisselq
                                "\t\t\t\tend else begin\n"
246 2 dgisselq
"\t\t\t\t\t// on Odd, W = e^{j2pi 1/4} = j\n"
247
"\t\t\t\t\tob_b_r <= n_diff_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
248
"\t\t\t\t\tob_b_i <=   diff_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
249 9 dgisselq
// "\t\t\t\t\tob_b_r <=   { (OWIDTH) {1'b0} };\n"
250
// "\t\t\t\t\tob_b_i <=   { (OWIDTH) {1'b0} };\n"
251
 
252 5 dgisselq
                                "\t\t\t\tend\n"
253
                                "\t\t\t\t// (wire) ob_b <= { ob_b_r, ob_b_i };\n"
254
                        "\t\t\tend\n"
255
                        "\t\t\t// In sequence, clock = 2\n"
256
                        "\t\t\tif (pipeline[2])\n"
257
                        "\t\t\tbegin\n"
258
                                "\t\t\t\tomem <= ob_b;\n"
259
                                "\t\t\t\to_data <= ob_a;\n"
260
                        "\t\t\tend else\n"
261
                                "\t\t\t\to_data <= omem;\n"
262 6 dgisselq
                        "\t\t\t// Don\'t forget in the sync check that we are running\n"
263
                        "\t\t\t// at two clocks per sample.  Thus we need to\n"
264
                        "\t\t\t// produce a sync every 2^(LGWIDTH-1) clocks.\n"
265
                        "\t\t\to_sync <= &(~iaddr[(LGWIDTH-2):3]) && (iaddr[2:0] == 3'b100);\n"
266 5 dgisselq
                "\t\tend\n"
267 2 dgisselq
"endmodule\n");
268
}
269
 
270
void    build_dblstage(const char *fname) {
271
        FILE    *fp = fopen(fname, "w");
272
        if (NULL == fp) {
273
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
274
                perror("O/S Err was:");
275
                return;
276
        }
277
 
278
        fprintf(fp,
279
"///////////////////////////////////////////////////////////////////////////\n"
280
"//\n"
281
"// Filename:   dblstage.v\n"
282
"//\n"
283
"// Project:    %s\n"
284
"//\n"
285
"// Purpose:    This is part of an FPGA implementation that will process\n"
286 5 dgisselq
"//             the final stage of a decimate-in-frequency FFT, running\n"
287
"//             through the data at two samples per clock.  If you notice\n"
288
"//             from the derivation of an FFT, the only time both even and\n"
289
"//             odd samples are used at the same time is in this stage.\n"
290
"//             Therefore, other than this stage and these twiddles, all of\n"
291
"//             the other stages can run two stages at a time at one sample\n"
292
"//             per clock.\n"
293 2 dgisselq
"//\n"
294
"//             In this implementation, the output is valid one clock after\n"
295
"//             the input is valid.  The output also accumulates one bit\n"
296
"//             above and beyond the number of bits in the input.\n"
297
"//             \n"
298
"//             i_clk   A system clock\n"
299 6 dgisselq
"//             i_rst   A synchronous reset\n"
300 2 dgisselq
"//             i_ce    Circuit enable--nothing happens unless this line is high\n"
301 6 dgisselq
"//             i_sync  A synchronization signal, high once per FFT at the start\n"
302 2 dgisselq
"//             i_left  The first (even) complex sample input.  The higher order\n"
303
"//                     bits contain the real portion, low order bits the\n"
304
"//                     imaginary portion, all in two\'s complement.\n"
305
"//             i_right The next (odd) complex sample input, same format as\n"
306
"//                     i_left.\n"
307
"//             o_left  The first (even) complex output.\n"
308
"//             o_right The next (odd) complex output.\n"
309 6 dgisselq
"//             o_sync  Output synchronization signal.\n"
310 2 dgisselq
"//\n%s"
311
"//\n", prjname, creator);
312
 
313
        fprintf(fp, "%s", cpyleft);
314
        fprintf(fp,
315 9 dgisselq
"module\tdblstage(i_clk, i_rst, i_ce, i_sync, i_left, i_right, o_left, o_right, o_sync);\n"
316 19 dgisselq
        "\tparameter\tIWIDTH=16,OWIDTH=IWIDTH+1, SHIFT=0, ROUND=1;\n"
317 6 dgisselq
        "\tinput\t\ti_clk, i_rst, i_ce, i_sync;\n"
318 5 dgisselq
        "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"
319 6 dgisselq
        "\toutput\twire\t[(2*OWIDTH-1):0]\to_left, o_right;\n"
320
        "\toutput\treg\t\t\to_sync;\n"
321 19 dgisselq
        "\n");
322
        fprintf(fp,
323 5 dgisselq
        "\twire\tsigned\t[(IWIDTH-1):0]\ti_in_0r, i_in_0i, i_in_1r, i_in_1i;\n"
324
        "\tassign\ti_in_0r = i_left[(2*IWIDTH-1):(IWIDTH)]; \n"
325
        "\tassign\ti_in_0i = i_left[(IWIDTH-1):0]; \n"
326
        "\tassign\ti_in_1r = i_right[(2*IWIDTH-1):(IWIDTH)]; \n"
327
        "\tassign\ti_in_1i = i_right[(IWIDTH-1):0]; \n"
328
        "\twire\t[(OWIDTH-1):0]\t\to_out_0r, o_out_0i,\n"
329
                                "\t\t\t\t\to_out_1r, o_out_1i;\n"
330 2 dgisselq
"\n"
331 15 dgisselq
"\n"
332 19 dgisselq
        "\t// Handle a potential rounding situation, when IWIDTH>=OWIDTH.\n"
333 15 dgisselq
"\n"
334 19 dgisselq
        "\twire\tsigned\t[(IWIDTH):0]\trnd;\n"
335
"\n"
336
        "\tgenerate\n"
337
        "\tif ((ROUND==0)||(IWIDTH+1-OWIDTH-SHIFT==0))\n"
338
                "\t\tassign rnd = { {(IWIDTH+1){1'b0}} };\n"
339
        "\telse if (IWIDTH+1-OWIDTH-SHIFT==1)\n"
340
                "\t\tassign rnd = { {(IWIDTH){1'b0}}, 1'b1 };\n"
341
        "\telse if (IWIDTH+1-OWIDTH-SHIFT>1)\n"
342
                "\t\tassign rnd = { {(IWIDTH-(IWIDTH+1-OWIDTH-SHIFT-1)){1'b0}}, 1'b1, {(IWIDTH+1-OWIDTH-SHIFT-1){1'b0}} };\n"
343
        "\tendgenerate\n"
344
"\n"
345 5 dgisselq
        "\t// Don't forget that we accumulate a bit by adding two values\n"
346
        "\t// together. Therefore our intermediate value must have one more\n"
347
        "\t// bit than the two originals.\n"
348
        "\treg\t[IWIDTH:0]\tout_0r, out_0i, out_1r, out_1i;\n"
349 2 dgisselq
"\n"
350 6 dgisselq
        "\treg\twait_for_sync;\n"
351
"\n"
352 5 dgisselq
        "\talways @(posedge i_clk)\n"
353 6 dgisselq
                "\t\tif (i_rst)\n"
354
                        "\t\t\twait_for_sync <= 1'b1;\n"
355
                "\t\telse if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"
356 5 dgisselq
                "\t\tbegin\n"
357 6 dgisselq
                        "\t\t\twait_for_sync <= 1'b0;\n"
358
                        "\t\t\t//\n"
359 19 dgisselq
                        "\t\t\tout_0r <= i_in_0r + i_in_1r + rnd;\n"
360
                        "\t\t\tout_0i <= i_in_0i + i_in_1i + rnd;\n"
361 5 dgisselq
                        "\t\t\t//\n"
362 19 dgisselq
                        "\t\t\tout_1r <= i_in_0r - i_in_1r + rnd;\n"
363
                        "\t\t\tout_1i <= i_in_0i - i_in_1i + rnd;\n"
364 6 dgisselq
                        "\t\t\t//\n"
365
                        "\t\t\to_sync <= i_sync;\n"
366 5 dgisselq
                "\t\tend\n"
367 2 dgisselq
"\n"
368 5 dgisselq
        "\t// Now, if the master control program doesn't want to keep all of\n"
369
        "\t// our bits, we can shift down to OWIDTH bits here.\n"
370
        "\tassign\to_out_0r = out_0r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
371
        "\tassign\to_out_0i = out_0i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
372
        "\tassign\to_out_1r = out_1r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
373
        "\tassign\to_out_1i = out_1i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
374 2 dgisselq
"\n"
375 5 dgisselq
        "\tassign\to_left  = { o_out_0r, o_out_0i };\n"
376
        "\tassign\to_right = { o_out_1r, o_out_1i };\n"
377 2 dgisselq
"\n"
378
"endmodule\n");
379
        fclose(fp);
380
}
381
 
382
void    build_multiply(const char *fname) {
383
        FILE    *fp = fopen(fname, "w");
384
        if (NULL == fp) {
385
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
386
                perror("O/S Err was:");
387
                return;
388
        }
389
 
390
        fprintf(fp,
391
"///////////////////////////////////////////////////////////////////////////\n"
392
"//\n"
393
"// Filename:   shiftaddmpy.v\n"
394
"//\n"
395
"// Project:    %s\n"
396
"//\n"
397
"// Purpose:    A portable shift and add multiply.\n"
398
"//\n"
399
"//             While both Xilinx and Altera will offer single clock \n"
400
"//             multiplies, this simple approach will multiply two numbers\n"
401
"//             on any architecture.  The result maintains the full width\n"
402
"//             of the multiply, there are no extra stuff bits, no rounding,\n"
403
"//             no shifted bits, etc.\n"
404
"//\n"
405
"//             Further, for those applications that can support it, this\n"
406
"//             multiply is pipelined and will produce one answer per clock.\n"
407
"//\n"
408
"//             For minimal processing delay, make the first parameter\n"
409
"//             the one with the least bits, so that AWIDTH <= BWIDTH.\n"
410
"//\n"
411
"//             The processing delay in this multiply is (AWIDTH+1) cycles.\n"
412
"//             That is, if the data is present on the input at clock t=0,\n"
413
"//             the result will be present on the output at time t=AWIDTH+1;\n"
414
"//\n"
415
"//\n%s"
416
"//\n", prjname, creator);
417
 
418
        fprintf(fp, "%s", cpyleft);
419
        fprintf(fp,
420
"module shiftaddmpy(i_clk, i_ce, i_a, i_b, o_r);\n"
421
        "\tparameter\tAWIDTH=16,BWIDTH=AWIDTH;\n"
422
        "\tinput\t\t\t\t\ti_clk, i_ce;\n"
423
        "\tinput\t\t[(AWIDTH-1):0]\t\ti_a;\n"
424
        "\tinput\t\t[(BWIDTH-1):0]\t\ti_b;\n"
425
        "\toutput\treg\t[(AWIDTH+BWIDTH-1):0]\to_r;\n"
426
"\n"
427
        "\treg\t[(AWIDTH-1):0]\tu_a;\n"
428
        "\treg\t[(BWIDTH-1):0]\tu_b;\n"
429
        "\treg\t\t\tsgn;\n"
430
"\n"
431
        "\treg\t[(AWIDTH-2):0]\t\tr_a[0:(AWIDTH-1)];\n"
432
        "\treg\t[(AWIDTH+BWIDTH-2):0]\tr_b[0:(AWIDTH-1)];\n"
433
        "\treg\t\t\t\tr_s[0:(AWIDTH-1)];\n"
434
        "\treg\t[(AWIDTH+BWIDTH-1):0]\tacc[0:(AWIDTH-1)];\n"
435
        "\tgenvar k;\n"
436
"\n"
437 5 dgisselq
        "\t// If we were forced to stay within two\'s complement arithmetic,\n"
438
        "\t// taking the absolute value here would require an additional bit.\n"
439
        "\t// However, because our results are now unsigned, we can stay\n"
440
        "\t// within the number of bits given (for now).\n"
441 2 dgisselq
        "\talways @(posedge i_clk)\n"
442
                "\t\tif (i_ce)\n"
443
                "\t\tbegin\n"
444
                        "\t\t\tu_a <= (i_a[AWIDTH-1])?(-i_a):(i_a);\n"
445
                        "\t\t\tu_b <= (i_b[BWIDTH-1])?(-i_b):(i_b);\n"
446
                        "\t\t\tsgn <= i_a[AWIDTH-1] ^ i_b[BWIDTH-1];\n"
447
                "\t\tend\n"
448
"\n"
449
        "\talways @(posedge i_clk)\n"
450
                "\t\tif (i_ce)\n"
451
                "\t\tbegin\n"
452
                        "\t\t\tacc[0] <= (u_a[0]) ? { {(AWIDTH){1'b0}}, u_b }\n"
453
                        "\t\t\t\t\t: {(AWIDTH+BWIDTH){1'b0}};\n"
454
                        "\t\t\tr_a[0] <= { u_a[(AWIDTH-1):1] };\n"
455
                        "\t\t\tr_b[0] <= { {(AWIDTH-1){1'b0}}, u_b };\n"
456
                        "\t\t\tr_s[0] <= sgn; // The final sign, needs to be preserved\n"
457
                "\t\tend\n"
458
"\n"
459
        "\tgenerate\n"
460 21 dgisselq
        "\tfor(k=0; k<AWIDTH-1; k=k+1)\n"
461 2 dgisselq
        "\tbegin\n"
462 21 dgisselq
                "\t\talways @(posedge i_clk)\n"
463
                "\t\tif (i_ce)\n"
464 2 dgisselq
                "\t\tbegin\n"
465
                        "\t\t\tacc[k+1] <= acc[k] + ((r_a[k][0]) ? {r_b[k],1'b0}:0);\n"
466
                        "\t\t\tr_a[k+1] <= { 1'b0, r_a[k][(AWIDTH-2):1] };\n"
467
                        "\t\t\tr_b[k+1] <= { r_b[k][(AWIDTH+BWIDTH-3):0], 1'b0};\n"
468
                        "\t\t\tr_s[k+1] <= r_s[k];\n"
469
                "\t\tend\n"
470
        "\tend\n"
471
        "\tendgenerate\n"
472
"\n"
473
        "\talways @(posedge i_clk)\n"
474
                "\t\tif (i_ce)\n"
475
                        "\t\t\to_r <= (r_s[AWIDTH-1]) ? (-acc[AWIDTH-1]) : acc[AWIDTH-1];\n"
476
"\n"
477
"endmodule\n");
478
 
479
        fclose(fp);
480
}
481
 
482
void    build_dblreverse(const char *fname) {
483
        FILE    *fp = fopen(fname, "w");
484
        if (NULL == fp) {
485
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
486
                perror("O/S Err was:");
487
                return;
488
        }
489
 
490
        fprintf(fp,
491
"///////////////////////////////////////////////////////////////////////////\n"
492
"//\n"
493
"// Filename:   dblreverse.v\n"
494
"//\n"
495
"// Project:    %s\n"
496
"//\n"
497
"// Purpose:    This module bitreverses a pipelined FFT input.  Operation is\n"
498
"//             expected as follows:\n"
499
"//\n"
500
"//             i_clk   A running clock at whatever system speed is offered.\n"
501
"//             i_rst   A synchronous reset signal, that resets all internals\n"
502
"//             i_ce    If this is one, one input is consumed and an output\n"
503
"//                     is produced.\n"
504
"//             i_in_0, i_in_1\n"
505
"//                     Two inputs to be consumed, each of width WIDTH.\n"
506
"//             o_out_0, o_out_1\n"
507
"//                     Two of the bitreversed outputs, also of the same\n"
508
"//                     width, WIDTH.  Of course, there is a delay from the\n"
509
"//                     first input to the first output.  For this purpose,\n"
510
"//                     o_sync is present.\n"
511
"//             o_sync  This will be a 1'b1 for the first value in any block.\n"
512
"//                     Following a reset, this will only become 1'b1 once\n"
513
"//                     the data has been loaded and is now valid.  After that,\n"
514
"//                     all outputs will be valid.\n"
515
"//\n%s"
516
"//\n", prjname, creator);
517
        fprintf(fp, "%s", cpyleft);
518
        fprintf(fp,
519
"\n\n"
520
"//\n"
521
"// How do we do bit reversing at two smples per clock?  Can we separate out\n"
522
"// our work into eight memory banks, writing two banks at once and reading\n"
523
"// another two banks in the same clock?\n"
524
"//\n"
525
"//     mem[00xxx0] = s_0[n]\n"
526
"//     mem[00xxx1] = s_1[n]\n"
527
"//     o_0[n] = mem[10xxx0]\n"
528
"//     o_1[n] = mem[11xxx0]\n"
529
"//     ...\n"
530
"//     mem[01xxx0] = s_0[m]\n"
531
"//     mem[01xxx1] = s_1[m]\n"
532
"//     o_0[m] = mem[10xxx1]\n"
533
"//     o_1[m] = mem[11xxx1]\n"
534
"//     ...\n"
535
"//     mem[10xxx0] = s_0[n]\n"
536
"//     mem[10xxx1] = s_1[n]\n"
537
"//     o_0[n] = mem[00xxx0]\n"
538
"//     o_1[n] = mem[01xxx0]\n"
539
"//     ...\n"
540
"//     mem[11xxx0] = s_0[m]\n"
541
"//     mem[11xxx1] = s_1[m]\n"
542
"//     o_0[m] = mem[00xxx1]\n"
543
"//     o_1[m] = mem[01xxx1]\n"
544
"//     ...\n"
545
"//\n"
546 5 dgisselq
"//     The answer is that, yes we can but: we need to use four memory banks\n"
547
"//     to do it properly.  These four banks are defined by the two bits\n"
548
"//     that determine the top and bottom of the correct address.  Larger\n"
549
"//     FFT\'s would require more memories.\n"
550
"//\n"
551 2 dgisselq
"//\n");
552
        fprintf(fp,
553
"module dblreverse(i_clk, i_rst, i_ce, i_in_0, i_in_1,\n"
554 5 dgisselq
        "\t\to_out_0, o_out_1, o_sync);\n"
555
        "\tparameter\t\t\tLGSIZE=4, WIDTH=24;\n"
556
        "\tinput\t\t\t\ti_clk, i_rst, i_ce;\n"
557
        "\tinput\t\t[(2*WIDTH-1):0]\ti_in_0, i_in_1;\n"
558
        "\toutput\treg\t[(2*WIDTH-1):0]\to_out_0, o_out_1;\n"
559
        "\toutput\treg\t\t\to_sync;\n"
560 2 dgisselq
"\n"
561 5 dgisselq
        "\treg\tin_reset;\n"
562
        "\treg\t[(LGSIZE):0]\tiaddr;\n"
563
        "\treg\t[(2*WIDTH-1):0]\tmem_0e [0:((1<<(LGSIZE-1))-1)];\n"
564
        "\treg\t[(2*WIDTH-1):0]\tmem_0o [0:((1<<(LGSIZE-1))-1)];\n"
565
        "\treg\t[(2*WIDTH-1):0]\tmem_1e [0:((1<<(LGSIZE-1))-1)];\n"
566
        "\treg\t[(2*WIDTH-1):0]\tmem_1o [0:((1<<(LGSIZE-1))-1)];\n"
567 2 dgisselq
"\n"
568 5 dgisselq
        "\twire\t[(2*LGSIZE-1):0]       braddr;\n"
569
        "\tgenvar\tk;\n"
570 21 dgisselq
        "\tgenerate for(k=0; k<LGSIZE; k=k+1)\n"
571 5 dgisselq
                "\t\tassign braddr[k] = iaddr[LGSIZE-1-k];\n"
572
        "\tendgenerate\n"
573 2 dgisselq
"\n"
574 5 dgisselq
        "\talways @(posedge i_clk)\n"
575
                "\t\tif (i_rst)\n"
576
                "\t\tbegin\n"
577
                        "\t\t\tiaddr <= 0;\n"
578
                        "\t\t\tin_reset <= 1'b1;\n"
579
                "\t\tend else if (i_ce)\n"
580
                "\t\tbegin\n"
581
                        "\t\t\tif (iaddr[(LGSIZE-1)])\n"
582
                        "\t\t\tbegin\n"
583
                                "\t\t\t\tmem_1e[{iaddr[LGSIZE],iaddr[(LGSIZE-2):1]}] <= i_in_0;\n"
584
                                "\t\t\t\tmem_1o[{iaddr[LGSIZE],iaddr[(LGSIZE-2):1]}] <= i_in_1;\n"
585
                        "\t\t\tend else begin\n"
586
                                "\t\t\t\tmem_0e[{iaddr[LGSIZE],iaddr[(LGSIZE-2):1]}] <= i_in_0;\n"
587
                                "\t\t\t\tmem_0o[{iaddr[LGSIZE],iaddr[(LGSIZE-2):1]}] <= i_in_1;\n"
588
                        "\t\t\tend\n"
589
                        "\t\t\tiaddr <= iaddr + 2;\n"
590
                        "\t\t\tif (&iaddr[(LGSIZE-1):1])\n"
591
                                "\t\t\t\tin_reset <= 1'b0;\n"
592
                        "\t\t\tif (in_reset)\n"
593
                        "\t\t\tbegin\n"
594
                                "\t\t\t\to_out_0 <= {(2*WIDTH){1'b0}};\n"
595
                                "\t\t\t\to_out_1 <= {(2*WIDTH){1'b0}};\n"
596
                                "\t\t\t\to_sync <= 1'b0;\n"
597
                        "\t\t\tend else\n"
598
                        "\t\t\tbegin\n"
599
                                "\t\t\t\tif (braddr[0])\n"
600
                                "\t\t\t\tbegin\n"
601 2 dgisselq
"\t\t\t\t\to_out_0 <= mem_0o[{~iaddr[LGSIZE],braddr[(LGSIZE-2):1]}];\n"
602
"\t\t\t\t\to_out_1 <= mem_1o[{~iaddr[LGSIZE],braddr[(LGSIZE-2):1]}];\n"
603 5 dgisselq
                                "\t\t\t\tend else begin\n"
604 2 dgisselq
"\t\t\t\t\to_out_0 <= mem_0e[{~iaddr[LGSIZE],braddr[(LGSIZE-2):1]}];\n"
605
"\t\t\t\t\to_out_1 <= mem_1e[{~iaddr[LGSIZE],braddr[(LGSIZE-2):1]}];\n"
606 5 dgisselq
                                "\t\t\t\tend\n"
607
                                "\t\t\t\to_sync <= ~(|iaddr[(LGSIZE-1):0]);\n"
608
                        "\t\t\tend\n"
609
                "\t\tend\n"
610 2 dgisselq
"\n"
611 21 dgisselq
"endmodule\n");
612 2 dgisselq
 
613
        fclose(fp);
614
}
615
 
616 14 dgisselq
void    build_butterfly(const char *fname, int xtracbits) {
617 2 dgisselq
        FILE    *fp = fopen(fname, "w");
618
        if (NULL == fp) {
619
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
620
                perror("O/S Err was:");
621
                return;
622
        }
623
 
624
        fprintf(fp,
625
"///////////////////////////////////////////////////////////////////////////\n"
626
"//\n"
627
"// Filename:   butterfly.v\n"
628
"//\n"
629
"// Project:    %s\n"
630
"//\n"
631
"// Purpose:    This routine caculates a butterfly for a decimation\n"
632
"//             in frequency version of an FFT.  Specifically, given\n"
633
"//             complex Left and Right values together with a \n"
634
"//             coefficient, the output of this routine is given\n"
635
"//             by:\n"
636
"//\n"
637
"//             L' = L + R\n"
638
"//             R' = (L - R)*C\n"
639
"//\n"
640
"//             The rest of the junk below handles timing (mostly),\n"
641
"//             to make certain that L' and R' reach the output at\n"
642
"//             the same clock.  Further, just to make certain\n"
643
"//             that is the case, an 'aux' input exists.  This\n"
644
"//             aux value will come out of this routine synchronized\n"
645
"//             to the values it came in with.  (i.e., both L', R',\n"
646
"//             and aux all have the same delay.)  Hence, a caller\n"
647
"//             of this routine may set aux on the first input with\n"
648
"//             valid data, and then wait to see aux set on the output\n"
649
"//             to know when to find the first output with valid data.\n"
650
"//\n"
651
"//             All bits are preserved until the very last clock,\n"
652
"//             where any more bits than OWIDTH will be quietly\n"
653
"//             discarded.\n"
654
"//\n"
655
"//             This design features no overflow checking.\n"
656
"// \n"
657
"// Notes:\n"
658
"//             CORDIC:\n"
659
"//             Much as we would like, we can't use a cordic here.\n"
660
"//             The goal is to accomplish an FFT, as defined, and a\n"
661
"//             CORDIC places a scale factor onto the data.  Removing\n"
662
"//             the scale factor would cost a two multiplies, which\n"
663
"//             is precisely what we are trying to avoid.\n"
664
"//\n"
665
"//\n"
666
"//             3-MULTIPLIES:\n"
667
"//             It should also be possible to do this with three \n"
668
"//             multiplies and an extra two addition cycles.  \n"
669
"//\n"
670
"//             We want\n"
671
"//                     R+I = (a + jb) * (c + jd)\n"
672
"//                     R+I = (ac-bd) + j(ad+bc)\n"
673
"//             We multiply\n"
674
"//                     P1 = ac\n"
675
"//                     P2 = bd\n"
676
"//                     P3 = (a+b)(c+d)\n"
677
"//             Then \n"
678
"//                     R+I=(P1-P2)+j(P3-P2-P1)\n"
679
"//\n"
680
"//             WIDTHS:\n"
681
"//             On multiplying an X width number by an\n"
682
"//             Y width number, X>Y, the result should be (X+Y)\n"
683
"//             bits, right?\n"
684
"//             -2^(X-1) <= a <= 2^(X-1) - 1\n"
685
"//             -2^(Y-1) <= b <= 2^(Y-1) - 1\n"
686
"//             (2^(Y-1)-1)*(-2^(X-1)) <= ab <= 2^(X-1)2^(Y-1)\n"
687
"//             -2^(X+Y-2)+2^(X-1) <= ab <= 2^(X+Y-2) <= 2^(X+Y-1) - 1\n"
688
"//             -2^(X+Y-1) <= ab <= 2^(X+Y-1)-1\n"
689
"//             YUP!  But just barely.  Do this and you'll really want\n"
690
"//             to drop a bit, although you will risk overflow in so\n"
691
"//             doing.\n"
692
"//\n%s"
693
"//\n", prjname, creator);
694
        fprintf(fp, "%s", cpyleft);
695
 
696
        fprintf(fp,
697 6 dgisselq
"module\tbutterfly(i_clk, i_rst, i_ce, i_coef, i_left, i_right, i_aux,\n"
698 5 dgisselq
                "\t\to_left, o_right, o_aux);\n"
699
        "\t// Public changeable parameters ...\n"
700 14 dgisselq
        "\tparameter IWIDTH=%d,CWIDTH=IWIDTH+%d,OWIDTH=IWIDTH+1;\n"
701 5 dgisselq
        "\t// Parameters specific to the core that should not be changed.\n"
702 14 dgisselq
        "\tparameter    MPYDELAY=%d'd%d, // (IWIDTH+1 < CWIDTH)?(IWIDTH+4):(CWIDTH+3),\n"
703 15 dgisselq
                        "\t\t\tSHIFT=0, ROUND=1;\n"
704 5 dgisselq
        "\t// The LGDELAY should be the base two log of the MPYDELAY.  If\n"
705
        "\t// this value is fractional, then round up to the nearest\n"
706
        "\t// integer: LGDELAY=ceil(log(MPYDELAY)/log(2));\n"
707 14 dgisselq
        "\tparameter\tLGDELAY=%d;\n"
708 6 dgisselq
        "\tinput\t\ti_clk, i_rst, i_ce;\n"
709 5 dgisselq
        "\tinput\t\t[(2*CWIDTH-1):0] i_coef;\n"
710
        "\tinput\t\t[(2*IWIDTH-1):0] i_left, i_right;\n"
711
        "\tinput\t\ti_aux;\n"
712
        "\toutput\twire [(2*OWIDTH-1):0] o_left, o_right;\n"
713 21 dgisselq
        "\toutput\treg  o_aux;\n"
714 14 dgisselq
        "\n", 16, xtracbits, lgdelay(16,xtracbits),
715
        bflydelay(16, xtracbits), lgdelay(16,xtracbits));
716
        fprintf(fp,
717 5 dgisselq
        "\twire\t[(OWIDTH-1):0] o_left_r, o_left_i, o_right_r, o_right_i;\n"
718 2 dgisselq
"\n"
719 5 dgisselq
        "\treg\t[(2*IWIDTH-1):0]\tr_left, r_right;\n"
720
        "\treg\t\t\t\tr_aux, r_aux_2;\n"
721
        "\treg\t[(2*CWIDTH-1):0]\tr_coef, r_coef_2;\n"
722
        "\twire\tsigned\t[(CWIDTH-1):0]\tr_coef_r, r_coef_i;\n"
723
        "\tassign\tr_coef_r  = r_coef_2[ (2*CWIDTH-1):(CWIDTH)];\n"
724
        "\tassign\tr_coef_i  = r_coef_2[ (  CWIDTH-1):0];\n"
725
        "\twire\tsigned\t[(IWIDTH-1):0]\tr_left_r, r_left_i, r_right_r, r_right_i;\n"
726
        "\tassign\tr_left_r  = r_left[ (2*IWIDTH-1):(IWIDTH)];\n"
727
        "\tassign\tr_left_i  = r_left[ (IWIDTH-1):0];\n"
728
        "\tassign\tr_right_r = r_right[(2*IWIDTH-1):(IWIDTH)];\n"
729
        "\tassign\tr_right_i = r_right[(IWIDTH-1):0];\n"
730 2 dgisselq
"\n"
731 5 dgisselq
        "\treg\tsigned\t[(IWIDTH):0]\tr_sum_r, r_sum_i, r_dif_r, r_dif_i;\n"
732 2 dgisselq
"\n"
733 5 dgisselq
        "\treg  [(LGDELAY-1):0] fifo_addr;\n"
734
        "\twire [(LGDELAY-1):0] fifo_read_addr;\n"
735 6 dgisselq
        "\tassign\tfifo_read_addr = fifo_addr - MPYDELAY;\n"
736 5 dgisselq
        "\treg  [(2*IWIDTH+2):0]        fifo_left [ 0:((1<<LGDELAY)-1)];\n"
737 6 dgisselq
        "\treg\t\t\t\tovalid;\n"
738 5 dgisselq
"\n");
739
        fprintf(fp,
740
        "\t// Set up the input to the multiply\n"
741 2 dgisselq
        "\talways @(posedge i_clk)\n"
742
                "\t\tif (i_ce)\n"
743
                "\t\tbegin\n"
744
                        "\t\t\t// One clock just latches the inputs\n"
745
                        "\t\t\tr_left <= i_left;        // No change in # of bits\n"
746
                        "\t\t\tr_right <= i_right;\n"
747
                        "\t\t\tr_aux <= i_aux;\n"
748
                        "\t\t\tr_coef  <= i_coef;\n"
749
                        "\t\t\t// Next clock adds/subtracts\n"
750
                        "\t\t\tr_sum_r <= r_left_r + r_right_r; // Now IWIDTH+1 bits\n"
751
                        "\t\t\tr_sum_i <= r_left_i + r_right_i;\n"
752
                        "\t\t\tr_dif_r <= r_left_r - r_right_r;\n"
753
                        "\t\t\tr_dif_i <= r_left_i - r_right_i;\n"
754
                        "\t\t\t// Other inputs are simply delayed on second clock\n"
755
                        "\t\t\tr_aux_2 <= r_aux;\n"
756
                        "\t\t\tr_coef_2<= r_coef;\n"
757
        "\t\tend\n"
758 5 dgisselq
"\n");
759
        fprintf(fp,
760
        "\t// Don\'t forget to record the even side, since it doesn\'t need\n"
761
        "\t// to be multiplied, but yet we still need the results in sync\n"
762
        "\t// with the answer when it is ready.\n"
763 2 dgisselq
        "\talways @(posedge i_clk)\n"
764 6 dgisselq
                "\t\tif (i_rst)\n"
765 2 dgisselq
                "\t\tbegin\n"
766 6 dgisselq
                        "\t\t\tfifo_addr <= 0;\n"
767
                        "\t\t\tovalid <= 1'b0;\n"
768
                "\t\tend else if (i_ce)\n"
769
                "\t\tbegin\n"
770 2 dgisselq
                        "\t\t\t// Need to delay the sum side--nothing else happens\n"
771
                        "\t\t\t// to it, but it needs to stay synchronized with the\n"
772
                        "\t\t\t// right side.\n"
773
                        "\t\t\tfifo_left[fifo_addr] <= { r_aux_2, r_sum_r, r_sum_i };\n"
774
                        "\t\t\tfifo_addr <= fifo_addr + 1;\n"
775 14 dgisselq
"\n"
776
                        "\t\t\tovalid <= (ovalid) || (fifo_addr > (MPYDELAY+1));\n"
777 2 dgisselq
                "\t\tend\n"
778
"\n"
779 5 dgisselq
        "\twire\tsigned\t[(CWIDTH-1):0] ir_coef_r, ir_coef_i;\n"
780
        "\tassign\tir_coef_r = r_coef_2[(2*CWIDTH-1):CWIDTH];\n"
781
        "\tassign\tir_coef_i = r_coef_2[(CWIDTH-1):0];\n"
782
        "\twire\tsigned\t[((IWIDTH+2)+(CWIDTH+1)-1):0]\tp_one, p_two, p_three;\n"
783 2 dgisselq
"\n"
784 5 dgisselq
"\n");
785
        fprintf(fp,
786
        "\t// Multiply output is always a width of the sum of the widths of\n"
787
        "\t// the two inputs.  ALWAYS.  This is independent of the number of\n"
788
        "\t// bits in p_one, p_two, or p_three.  These values needed to \n"
789
        "\t// accumulate a bit (or two) each.  However, this approach to a\n"
790
        "\t// three multiply complex multiply cannot increase the total\n"
791
        "\t// number of bits in our final output.  We\'ll take care of\n"
792
        "\t// dropping back down to the proper width, OWIDTH, in our routine\n"
793
        "\t// below.\n"
794 2 dgisselq
"\n"
795 5 dgisselq
"\n");
796
        fprintf(fp,
797
        "\t// We accomplish here \"Karatsuba\" multiplication.  That is,\n"
798
        "\t// by doing three multiplies we accomplish the work of four.\n"
799
        "\t// Let\'s prove to ourselves that this works ... We wish to\n"
800
        "\t// multiply: (a+jb) * (c+jd), where a+jb is given by\n"
801
        "\t//\ta + jb = r_dif_r + j r_dif_i, and\n"
802
        "\t//\tc + jd = ir_coef_r + j ir_coef_i.\n"
803
        "\t// We do this by calculating the intermediate products P1, P2,\n"
804
        "\t// and P3 as\n"
805
        "\t//\tP1 = ac\n"
806
        "\t//\tP2 = bd\n"
807
        "\t//\tP3 = (a + b) * (c + d)\n"
808
        "\t// and then complete our final answer with\n"
809
        "\t//\tac - bd = P1 - P2 (this checks)\n"
810
        "\t//\tad + bc = P3 - P2 - P1\n"
811
        "\t//\t        = (ac + bc + ad + bd) - bd - ac\n"
812
        "\t//\t        = bc + ad (this checks)\n"
813 2 dgisselq
"\n"
814 5 dgisselq
"\n");
815
        fprintf(fp,
816
        "\t// This should really be based upon an IF, such as in\n"
817
        "\t// if (IWIDTH < CWIDTH) then ...\n"
818
        "\t// However, this is the only (other) way I know to do it.\n"
819 2 dgisselq
        "\tgenerate\n"
820
        "\tif (CWIDTH < IWIDTH+1)\n"
821
        "\tbegin\n"
822
                "\t\t// We need to pad these first two multiplies by an extra\n"
823 5 dgisselq
                "\t\t// bit just to keep them aligned with the third,\n"
824
                "\t\t// simpler, multiply.\n"
825 2 dgisselq
                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p1(i_clk, i_ce,\n"
826
                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r},\n"
827
                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r}, p_one);\n"
828
                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p2(i_clk, i_ce,\n"
829 5 dgisselq
                                "\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i},\n"
830 2 dgisselq
                                "\t\t\t\t{r_dif_i[IWIDTH],r_dif_i}, p_two);\n"
831
                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p3(i_clk, i_ce,\n"
832 5 dgisselq
                        "\t\t\t\tir_coef_i+ir_coef_r,\n"
833
                        "\t\t\t\tr_dif_r + r_dif_i,\n"
834
                        "\t\t\t\tp_three);\n"
835 2 dgisselq
        "\tend else begin\n"
836
                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p1a(i_clk, i_ce,\n"
837
                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r},\n"
838
                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r}, p_one);\n"
839
                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p2a(i_clk, i_ce,\n"
840
                                "\t\t\t\t{r_dif_i[IWIDTH], r_dif_i},\n"
841 5 dgisselq
                                "\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i}, p_two);\n"
842 2 dgisselq
                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p3a(i_clk, i_ce,\n"
843
                                "\t\t\t\tr_dif_r+r_dif_i,\n"
844
                                "\t\t\t\tir_coef_i+ir_coef_r,\n"
845
                                "\t\t\t\tp_three);\n"
846
        "\tend\n"
847
        "\tendgenerate\n"
848 5 dgisselq
"\n");
849
        fprintf(fp,
850
        "\t// These values are held in memory and delayed during the\n"
851
        "\t// multiply.  Here, we recover them.  During the multiply,\n"
852
        "\t// values were multiplied by 2^(CWIDTH-2)*exp{-j*2*pi*...},\n"
853
        "\t// therefore, the left_x values need to be right shifted by\n"
854
        "\t// CWIDTH-2 as well.  The additional bits come from a sign\n"
855
        "\t// extension.\n"
856 2 dgisselq
        "\twire aux;\n"
857 5 dgisselq
        "\twire\tsigned\t[(IWIDTH+CWIDTH):0]    fifo_i, fifo_r;\n"
858
        "\treg\t\t[(2*IWIDTH+2):0]      fifo_read;\n"
859
        "\tassign\tfifo_r = { {2{fifo_read[2*(IWIDTH+1)-1]}}, fifo_read[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1'b0}} };\n"
860
        "\tassign\tfifo_i = { {2{fifo_read[(IWIDTH+1)-1]}}, fifo_read[((IWIDTH+1)-1):0], {(CWIDTH-2){1'b0}} };\n"
861
        "\tassign\taux = fifo_read[2*IWIDTH+2];\n"
862 2 dgisselq
"\n"
863
"\n"
864 5 dgisselq
        "\treg\tsigned\t[(CWIDTH+IWIDTH+3-1):0] b_left_r, b_left_i,\n"
865
                        "\t\t\t\t\t\tb_right_r, b_right_i;\n"
866
        "\treg\tsigned\t[(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i;\n"
867 21 dgisselq
        "\twire\tsigned\t[(CWIDTH+IWIDTH+3-1):0]        rnd;\n"
868 5 dgisselq
        "\tgenerate\n"
869 16 dgisselq
        "\tif ((ROUND==0)||(CWIDTH+IWIDTH-OWIDTH-SHIFT<2))\n"
870 5 dgisselq
                "\t\tassign rnd = ({(CWIDTH+IWIDTH+3){1'b0}});\n"
871 16 dgisselq
        "\telse if ((IWIDTH+CWIDTH)-(OWIDTH+SHIFT) == 2)\n"
872
                "\t\tassign rnd = ({ {(OWIDTH+4+SHIFT){1'b0}},1'b1 });\n"
873 5 dgisselq
        "\telse\n"
874 15 dgisselq
                "\t\tassign rnd = ({ {(OWIDTH+4+SHIFT){1'b0}},1'b1,\n"
875
                "\t\t\t\t{((IWIDTH+CWIDTH+3)-(OWIDTH+SHIFT+5)){1'b0}} });\n"
876 5 dgisselq
        "\tendgenerate\n"
877
"\n");
878
        fprintf(fp,
879 2 dgisselq
        "\talways @(posedge i_clk)\n"
880
                "\t\tif (i_ce)\n"
881
                "\t\tbegin\n"
882
                        "\t\t\t// First clock, recover all values\n"
883
                        "\t\t\tfifo_read <= fifo_left[fifo_read_addr];\n"
884
                        "\t\t\t// These values are IWIDTH+CWIDTH+3 bits wide\n"
885 5 dgisselq
                        "\t\t\t// although they only need to be (IWIDTH+1)\n"
886
                        "\t\t\t// + (CWIDTH) bits wide.  (We\'ve got two\n"
887
                        "\t\t\t// extra bits we need to get rid of.)\n"
888 2 dgisselq
                        "\t\t\tmpy_r <= p_one - p_two;\n"
889
                        "\t\t\tmpy_i <= p_three - p_one - p_two;\n"
890
"\n"
891
                        "\t\t\t// Second clock, round and latch for final clock\n"
892
                        "\t\t\tb_right_r <= mpy_r + rnd;\n"
893
                        "\t\t\tb_right_i <= mpy_i + rnd;\n"
894 5 dgisselq
                        "\t\t\tb_left_r <= { {2{fifo_r[(IWIDTH+CWIDTH)]}},fifo_r } + rnd;\n"
895
                        "\t\t\tb_left_i <= { {2{fifo_i[(IWIDTH+CWIDTH)]}},fifo_i } + rnd;\n"
896 6 dgisselq
                        "\t\t\to_aux <= aux & ovalid;\n"
897 2 dgisselq
                "\t\tend\n"
898 5 dgisselq
"\n");
899
        fprintf(fp,
900 2 dgisselq
        "\t// Final clock--clock and remove unnecessary bits.\n"
901 5 dgisselq
        "\t// We have (IWIDTH+CWIDTH+3) bits here, we need to drop down to\n"
902
        "\t// OWIDTH, and SHIFT by SHIFT bits in the process.  The trick is\n"
903
        "\t// that we don\'t need (IWIDTH+CWIDTH+3) bits.  We\'ve accumulated\n"
904
        "\t// them, but the actual values will never fill all these bits.\n"
905
        "\t// In particular, we only need:\n"
906
        "\t//\t IWIDTH bits for the input\n"
907
        "\t//\t     +1 bit for the add/subtract\n"
908
        "\t//\t+CWIDTH bits for the coefficient multiply\n"
909
        "\t//\t     +1 bit for the add/subtract in the complex multiply\n"
910
        "\t//\t ------\n"
911
        "\t//\t (IWIDTH+CWIDTH+2) bits at full precision.\n"
912
        "\t//\n"
913
        "\t// However, the coefficient multiply multiplied by a maximum value\n"
914
        "\t// of 2^(CWIDTH-2).  Thus, we only have\n"
915
        "\t//\t   IWIDTH bits for the input\n"
916
        "\t//\t       +1 bit for the add/subtract\n"
917
        "\t//\t+CWIDTH-2 bits for the coefficient multiply\n"
918
        "\t//\t       +1 (optional) bit for the add/subtract in the cpx mpy.\n"
919
        "\t//\t -------- ... multiply.  (This last bit may be shifted out.)\n"
920
        "\t//\t (IWIDTH+CWIDTH) valid output bits. \n"
921
        "\t// Now, if the user wants to keep any extras of these (via OWIDTH),\n"
922
        "\t// or if he wishes to arbitrarily shift some of these off (via\n"
923
        "\t// SHIFT) we accomplish that here.\n"
924
        "\tassign o_left_r  = b_left_r[ (CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"
925
        "\tassign o_left_i  = b_left_i[ (CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"
926
        "\tassign o_right_r = b_right_r[(CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"
927
        "\tassign o_right_i = b_right_i[(CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"
928 2 dgisselq
"\n"
929 5 dgisselq
        "\t// As a final step, we pack our outputs into two packed two\'s\n"
930
        "\t// complement numbers per output word, so that each output word\n"
931
        "\t// has (2*OWIDTH) bits in it, with the top half being the real\n"
932
        "\t// portion and the bottom half being the imaginary portion.\n"
933 2 dgisselq
        "\tassign       o_left = { o_left_r, o_left_i };\n"
934
        "\tassign       o_right= { o_right_r,o_right_i};\n"
935
"\n"
936
"endmodule\n");
937
        fclose(fp);
938
}
939
 
940
void    build_stage(const char *fname, int stage, bool odd, int nbits, bool inv, int xtra) {
941
        FILE    *fstage = fopen(fname, "w");
942
        int     cbits = nbits + xtra;
943
 
944
        if ((cbits * 2) >= sizeof(long long)*8) {
945
                fprintf(stderr, "ERROR: CMEM Coefficient precision requested overflows long long data type.\n");
946
                exit(-1);
947
        }
948
 
949
        if (fstage == NULL) {
950
                fprintf(stderr, "ERROR: Could not open %s for writing!\n", fname);
951
                perror("O/S Err was:");
952
                fprintf(stderr, "Attempting to continue, but this file will be missing.\n");
953
                return;
954
        }
955
 
956
        fprintf(fstage,
957
"////////////////////////////////////////////////////////////////////////////\n"
958
"//\n"
959
"// Filename:   %sfftstage_%c%d.v\n"
960
"//\n"
961
"// Project:    %s\n"
962
"//\n"
963
"// Purpose:    This file is (almost) a Verilog source file.  It is meant to\n"
964
"//             be used by a FFT core compiler to generate FFTs which may be\n"
965
"//             used as part of an FFT core.  Specifically, this file \n"
966
"//             encapsulates the options of an FFT-stage.  For any 2^N length\n"
967
"//             FFT, there shall be (N-1) of these stages.  \n"
968
"//\n%s"
969
"//\n",
970
                (inv)?"i":"", (odd)?'o':'e', stage*2, prjname, creator);
971
        fprintf(fstage, "%s", cpyleft);
972
        fprintf(fstage, "module\t%sfftstage_%c%d(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync);\n",
973
                (inv)?"i":"", (odd)?'o':'e', stage*2);
974
        // These parameter values are useless at this point--they are to be
975
        // replaced by the parameter values in the calling program.  Only
976
        // problem is, the CWIDTH needs to match exactly!
977
        fprintf(fstage, "\tparameter\tIWIDTH=%d,CWIDTH=%d,OWIDTH=%d;\n",
978
                nbits, cbits, nbits+1);
979
        fprintf(fstage,
980
"\t// Parameters specific to the core that should be changed when this\n"
981
"\t// core is built ... Note that the minimum LGSPAN (the base two log\n"
982
"\t// of the span, or the base two log of the current FFT size) is 3.\n"
983
"\t// Smaller spans (i.e. the span of 2) must use the dblstage module.\n"
984 6 dgisselq
"\tparameter\tLGWIDTH=11, LGSPAN=9, LGBDLY=5, BFLYSHIFT=0;\n");
985 2 dgisselq
        fprintf(fstage,
986
"\tinput                                        i_clk, i_rst, i_ce, i_sync;\n"
987
"\tinput                [(2*IWIDTH-1):0]        i_data;\n"
988
"\toutput       reg     [(2*OWIDTH-1):0]        o_data;\n"
989
"\toutput       reg                             o_sync;\n"
990
"\n"
991
"\treg  wait_for_sync;\n"
992
"\treg  [(2*IWIDTH-1):0]        ib_a, ib_b;\n"
993
"\treg  [(2*CWIDTH-1):0]        ib_c;\n"
994 8 dgisselq
"\treg  ib_sync;\n"
995 2 dgisselq
"\n"
996
"\treg  b_started;\n"
997
"\twire ob_sync;\n"
998
"\twire [(2*OWIDTH-1):0]        ob_a, ob_b;\n");
999
        fprintf(fstage,
1000
"\n"
1001
"\t// %scmem is defined as an array of real and complex values,\n"
1002
"\t// where the top CWIDTH bits are the real value and the bottom\n"
1003
"\t// CWIDTH bits are the imaginary value.\n"
1004
"\t//\n"
1005
"\t// cmem[i] = { (2^(CWIDTH-2)) * cos(2*pi*i/(2^LGWIDTH)),\n"
1006
"\t//           (2^(CWIDTH-2)) * sin(2*pi*i/(2^LGWIDTH)) };\n"
1007
"\t//\n"
1008
"\treg  [(2*CWIDTH-1):0]        %scmem [0:((1<<LGSPAN)-1)];\n"
1009
"\tinitial\t$readmemh(\"%scmem_%c%d.hex\",%scmem);\n\n",
1010
                (inv)?"i":"", (inv)?"i":"",
1011
                (inv)?"i":"", (odd)?'o':'e',stage<<1,
1012
                (inv)?"i":"");
1013
        {
1014
                FILE    *cmem;
1015
 
1016 14 dgisselq
                {
1017
                        char    *memfile, *ptr;
1018
 
1019
                        memfile = new char[strlen(fname)+128];
1020
                        strcpy(memfile, fname);
1021
                        if ((NULL != (ptr = strrchr(memfile, '/')))&&(ptr>memfile)) {
1022
                                ptr++;
1023
                                sprintf(ptr, "%scmem_%c%d.hex", (inv)?"i":"", (odd)?'o':'e', stage*2);
1024
                        } else {
1025
                                sprintf(memfile, "%s/%scmem_%c%d.hex",
1026
                                        COREDIR, (inv)?"i":"",
1027
                                        (odd)?'o':'e', stage*2);
1028
                        }
1029
                        // strcpy(&memfile[strlen(memfile)-2], ".hex");
1030
                        cmem = fopen(memfile, "w");
1031
                        if (NULL == cmem) {
1032
                                fprintf(stderr, "Could not open/write \'%s\' with FFT coefficients.\n", memfile);
1033
                                perror("Err from O/S:");
1034
                                exit(-2);
1035
                        }
1036
 
1037
                        delete[] memfile;
1038 2 dgisselq
                }
1039
                // fprintf(cmem, "// CBITS = %d, inv = %s\n", cbits, (inv)?"true":"false");
1040
                for(int i=0; i<stage/2; i++) {
1041
                        int k = 2*i+odd;
1042 9 dgisselq
                        double  W = ((inv)?1:-1)*2.0*M_PI*k/(double)(2*stage);
1043 2 dgisselq
                        double  c, s;
1044
                        long long ic, is, vl;
1045
 
1046
                        c = cos(W); s = sin(W);
1047 20 dgisselq
                        ic = (long long)round((1ll<<(cbits-2)) * c);
1048
                        is = (long long)round((1ll<<(cbits-2)) * s);
1049 2 dgisselq
                        vl = (ic & (~(-1ll << (cbits))));
1050
                        vl <<= (cbits);
1051
                        vl |= (is & (~(-1ll << (cbits))));
1052
                        fprintf(cmem, "%0*llx\n", ((cbits*2+3)/4), vl);
1053
                        /*
1054
                        fprintf(cmem, "%0*llx\t\t// %f+j%f -> %llx +j%llx\n",
1055
                                ((cbits*2+3)/4), vl, c, s,
1056
                                ic & (~(-1ll<<(((cbits+3)/4)*4))),
1057
                                is & (~(-1ll<<(((cbits+3)/4)*4))));
1058
                        */
1059
                } fclose(cmem);
1060
        }
1061
 
1062
        fprintf(fstage,
1063 6 dgisselq
"\treg  [(LGWIDTH-2):0]         iaddr;\n"
1064 2 dgisselq
"\treg  [(2*IWIDTH-1):0]        imem    [0:((1<<LGSPAN)-1)];\n"
1065
"\n"
1066 8 dgisselq
"\treg  [LGSPAN:0]              oB;\n"
1067 2 dgisselq
"\treg  [(2*OWIDTH-1):0]        omem    [0:((1<<LGSPAN)-1)];\n"
1068
"\n"
1069
"\talways @(posedge i_clk)\n"
1070
        "\t\tif (i_rst)\n"
1071
        "\t\tbegin\n"
1072
                "\t\t\twait_for_sync <= 1'b1;\n"
1073
                "\t\t\tiaddr <= 0;\n"
1074
                "\t\t\toB <= 0;\n"
1075 8 dgisselq
                "\t\t\tib_sync   <= 1'b0;\n"
1076
                "\t\t\to_sync    <= 1'b0;\n"
1077
                "\t\t\tb_started <= 1'b0;\n"
1078 2 dgisselq
        "\t\tend\n"
1079
        "\t\telse if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"
1080
        "\t\tbegin\n"
1081
                "\t\t\t//\n"
1082
                "\t\t\t// First step: Record what we\'re not ready to use yet\n"
1083
                "\t\t\t//\n"
1084
                "\t\t\timem[iaddr[(LGSPAN-1):0]] <= i_data;\n"
1085
                "\t\t\tiaddr <= iaddr + 1;\n"
1086
                "\t\t\twait_for_sync <= 1'b0;\n"
1087
"\n"
1088
                "\t\t\t//\n"
1089
                "\t\t\t// Now, we have all the inputs, so let\'s feed the\n"
1090
                "\t\t\t// butterfly\n"
1091
                "\t\t\t//\n"
1092 6 dgisselq
                "\t\t\tif (iaddr[LGSPAN])\n"
1093 2 dgisselq
                "\t\t\tbegin\n"
1094
                        "\t\t\t\t// One input from memory, ...\n"
1095
                        "\t\t\t\tib_a <= imem[iaddr[(LGSPAN-1):0]];\n"
1096
                        "\t\t\t\t// One input clocked in from the top\n"
1097
                        "\t\t\t\tib_b <= i_data;\n"
1098
                        "\t\t\t\t// Set the sync to true on the very first\n"
1099
                        "\t\t\t\t// valid input in, and hence on the very\n"
1100
                        "\t\t\t\t// first valid data out per FFT.\n"
1101 6 dgisselq
                        "\t\t\t\tib_sync <= (iaddr==(1<<(LGSPAN)));\n"
1102 2 dgisselq
                        "\t\t\t\tib_c <= %scmem[iaddr[(LGSPAN-1):0]];\n"
1103 8 dgisselq
                "\t\t\tend else begin\n"
1104
                        "\t\t\t\t// Just to make debugging easier, let\'s\n"
1105
                        "\t\t\t\t// clear these registers.  That\'ll make\n"
1106
                        "\t\t\t\t// the transition easier to watch.\n"
1107
                        "\t\t\t\tib_a <= {(2*IWIDTH){1'b0}};\n"
1108
                        "\t\t\t\tib_b <= {(2*IWIDTH){1'b0}};\n"
1109
                        "\t\t\t\tib_sync <= 1'b0;\n"
1110
                "\t\t\tend\n"
1111 2 dgisselq
"\n"
1112
                "\t\t\t//\n"
1113
                "\t\t\t// Next step: recover the outputs from the butterfly\n"
1114
                "\t\t\t//\n"
1115 8 dgisselq
                "\t\t\tif ((ob_sync||b_started)&&(~oB[LGSPAN]))\n"
1116 2 dgisselq
                "\t\t\tbegin // A butterfly output is available\n"
1117
                        "\t\t\t\tb_started <= 1'b1;\n"
1118 8 dgisselq
                        "\t\t\t\tomem[oB[(LGSPAN-1):0]] <= ob_b;\n"
1119 2 dgisselq
                        "\t\t\t\toB <= oB+1;\n"
1120
"\n"
1121 6 dgisselq
                        "\t\t\t\to_sync <= (ob_sync);\n"
1122 2 dgisselq
                        "\t\t\t\to_data <= ob_a;\n"
1123
                "\t\t\tend else if (b_started)\n"
1124
                "\t\t\tbegin // and keep outputting once you start--at a rate\n"
1125
                "\t\t\t// of one guaranteed output per clock that has i_ce set.\n"
1126 8 dgisselq
                        "\t\t\t\to_data <= omem[oB[(LGSPAN-1):0]];\n"
1127 2 dgisselq
                        "\t\t\t\toB <= oB + 1;\n"
1128
                        "\t\t\t\to_sync <= 1'b0;\n"
1129
                "\t\t\tend else\n"
1130
                        "\t\t\t\to_sync <= 1'b0;\n"
1131
        "\t\tend\n"
1132 5 dgisselq
"\n", (inv)?"i":"");
1133
        fprintf(fstage,
1134 2 dgisselq
"\tbutterfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),\n"
1135 5 dgisselq
"\t\t\t.MPYDELAY(%d\'d%d),.LGDELAY(LGBDLY),.SHIFT(BFLYSHIFT))\n"
1136 8 dgisselq
"\t\tbfly(i_clk, i_rst, i_ce, ib_c,\n"
1137 2 dgisselq
"\t\t\tib_a, ib_b, ib_sync, ob_a, ob_b, ob_sync);\n"
1138 21 dgisselq
"endmodule\n",
1139 14 dgisselq
        lgdelay(nbits, xtra), bflydelay(nbits, xtra));
1140 2 dgisselq
}
1141
 
1142
void    usage(void) {
1143
        fprintf(stderr,
1144
"USAGE:\tfftgen [-f <size>] [-d dir] [-c cbits] [-n nbits] [-m mxbits] [-s01]\n"
1145
// "\tfftgen -i\n"
1146
"\t-c <cbits>\tCauses all internal complex coefficients to be\n"
1147
"\t\tlonger than the corresponding data bits, to help avoid\n"
1148
"\t\tcoefficient truncation errors.\n"
1149
"\t-d <dir>\tPlaces all of the generated verilog files into <dir>.\n"
1150
"\t-f <size>\tSets the size of the FFT as the number of complex\n"
1151
"\t\tsamples input to the transform.\n"
1152
"\t-n <nbits>\tSets the number of bits in the twos complement input\n"
1153
"\t\tto the FFT routine.\n"
1154
"\t-m <mxbits>\tSets the maximum bit width that the FFT should ever\n"
1155
"\t\tproduce.  Internal values greater than this value will be\n"
1156
"\t\ttruncated to this value.\n"
1157
"\t-s\tSkip the final bit reversal stage.  This is useful in\n"
1158
"\t\talgorithms that need to apply a filter without needing to do\n"
1159
"\t\tbin shifting, as these algorithms can, with this option, just\n"
1160
"\t\tmultiply by a bit reversed correlation sequence and then\n"
1161
"\t\tinverse FFT the (still bit reversed) result.\n"
1162
"\t-S\tInclude the final bit reversal stage (default).\n"
1163
"\t-0\tA forward FFT (default), meaning that the coefficients are\n"
1164
"\t\tgiven by e^{-j 2 pi k/N n }.\n"
1165
"\t-1\tAn inverse FFT, meaning that the coefficients are\n"
1166
"\t\tgiven by e^{ j 2 pi k/N n }.\n");
1167
}
1168
 
1169
// Features still needed:
1170
//      Interactivity.
1171
//      Some number of maximum bits, beyond which we won't accumulate any more.
1172
//      Obviously, the build_stage above.
1173
//      Copying the files of interest into the fft-core directory, from
1174
//              whatever directory this file is run out of.
1175
int main(int argc, char **argv) {
1176
        int     fftsize = -1, lgsize = -1;
1177
        int     nbitsin = 16, xtracbits = 4;
1178 19 dgisselq
        int     nbitsout, maxbitsout = -1, xtrapbits=0;
1179 2 dgisselq
        bool    bitreverse = true, inverse=false, interactive = false,
1180
                verbose_flag = false;
1181
        FILE    *vmain;
1182 14 dgisselq
        std::string     coredir = "fft-core", cmdline = "";
1183 2 dgisselq
 
1184
        if (argc <= 1)
1185
                usage();
1186
 
1187 14 dgisselq
        cmdline = argv[0];
1188 2 dgisselq
        for(int argn=1; argn<argc; argn++) {
1189 14 dgisselq
                cmdline += " ";
1190
                cmdline += argv[argn];
1191
        }
1192
 
1193
        for(int argn=1; argn<argc; argn++) {
1194 2 dgisselq
                if ('-' == argv[argn][0]) {
1195
                        for(int j=1; (argv[argn][j])&&(j<100); j++) {
1196
                                switch(argv[argn][j]) {
1197
                                        case '0':
1198
                                                inverse = false;
1199
                                                break;
1200
                                        case '1':
1201
                                                inverse = true;
1202
                                                break;
1203
                                        case 'c':
1204
                                                if (argn+1 >= argc) {
1205 19 dgisselq
                                                        printf("ERR: No extra number of coefficient bits given!\n\n");
1206 2 dgisselq
                                                        usage(); exit(-1);
1207
                                                }
1208
                                                xtracbits = atoi(argv[++argn]);
1209
                                                j+= 200;
1210
                                                break;
1211
                                        case 'd':
1212
                                                if (argn+1 >= argc) {
1213 19 dgisselq
                                                        printf("ERR: No directory given into which to place the core!\n\n");
1214 2 dgisselq
                                                        usage(); exit(-1);
1215
                                                }
1216 14 dgisselq
                                                coredir = argv[++argn];
1217 2 dgisselq
                                                j += 200;
1218
                                                break;
1219
                                        case 'f':
1220
                                                if (argn+1 >= argc) {
1221 19 dgisselq
                                                        printf("ERR: No FFT Size given!\n\n");
1222 2 dgisselq
                                                        usage(); exit(-1);
1223
                                                }
1224
                                                fftsize = atoi(argv[++argn]);
1225
                                                { int sln = strlen(argv[argn]);
1226
                                                if (!isdigit(argv[argn][sln-1])){
1227
                                                        switch(argv[argn][sln-1]) {
1228
                                                        case 'k': case 'K':
1229
                                                                fftsize <<= 10;
1230
                                                                break;
1231
                                                        case 'm': case 'M':
1232
                                                                fftsize <<= 20;
1233
                                                                break;
1234
                                                        case 'g': case 'G':
1235
                                                                fftsize <<= 30;
1236
                                                                break;
1237
                                                        default:
1238 19 dgisselq
                                                                printf("ERR: Unknown FFT size, %s!\n", argv[argn]);
1239 2 dgisselq
                                                                exit(-1);
1240
                                                        }
1241
                                                }}
1242
                                                j += 200;
1243
                                                break;
1244
                                        case 'h':
1245
                                                usage();
1246
                                                exit(0);
1247
                                                break;
1248
                                        case 'i':
1249
                                                interactive = true;
1250
                                                break;
1251
                                        case 'm':
1252
                                                if (argn+1 >= argc) {
1253 19 dgisselq
                                                        printf("ERR: No maximum output bit value given!\n\n");
1254 2 dgisselq
                                                        exit(-1);
1255
                                                }
1256
                                                maxbitsout = atoi(argv[++argn]);
1257
                                                j += 200;
1258
                                                break;
1259
                                        case 'n':
1260
                                                if (argn+1 >= argc) {
1261 19 dgisselq
                                                        printf("ERR: No input bit size given!\n\n");
1262 2 dgisselq
                                                        exit(-1);
1263
                                                }
1264
                                                nbitsin = atoi(argv[++argn]);
1265
                                                j += 200;
1266
                                                break;
1267
                                        case 'S':
1268
                                                bitreverse = true;
1269
                                                break;
1270
                                        case 's':
1271
                                                bitreverse = false;
1272
                                                break;
1273 19 dgisselq
                                        case 'x':
1274
                                                if (argn+1 >= argc) {
1275
                                                        printf("ERR: No extra number of bits given!\n\n");
1276
                                                        usage(); exit(-1);
1277
                                                } j+= 200;
1278
                                                xtrapbits = atoi(argv[++argn]);
1279
                                                break;
1280 2 dgisselq
                                        case 'v':
1281
                                                verbose_flag = true;
1282
                                                break;
1283
                                        default:
1284
                                                printf("Unknown argument, -%c\n", argv[argn][j]);
1285
                                                usage();
1286
                                                exit(-1);
1287
                                }
1288
                        }
1289
                } else {
1290
                        printf("Unrecognized argument, %s\n", argv[argn]);
1291
                        usage();
1292
                        exit(-1);
1293
                }
1294
        }
1295
 
1296
        if ((lgsize < 0)&&(fftsize > 1)) {
1297
                for(lgsize=1; (1<<lgsize) < fftsize; lgsize++)
1298
                        ;
1299
        }
1300
 
1301
        if ((fftsize <= 0)||(nbitsin < 1)||(nbitsin>48)) {
1302
                printf("INVALID PARAMETERS!!!!\n");
1303
                exit(-1);
1304
        }
1305
 
1306
 
1307
        if (nextlg(fftsize) != fftsize) {
1308
                fprintf(stderr, "ERR: FFTSize (%d) *must* be a power of two\n",
1309
                                fftsize);
1310
                exit(-1);
1311
        } else if (fftsize < 2) {
1312
                fprintf(stderr, "ERR: Minimum FFTSize is 2, not %d\n",
1313
                                fftsize);
1314
                if (fftsize == 1) {
1315
                        fprintf(stderr, "You do realize that a 1 point FFT makes very little sense\n");
1316
                        fprintf(stderr, "in an FFT operation that handles two samples per clock?\n");
1317
                        fprintf(stderr, "If you really need to do an FFT of this size, the output\n");
1318
                        fprintf(stderr, "can be connected straight to the input.\n");
1319
                } else {
1320
                        fprintf(stderr, "Indeed, a size of %d doesn\'t make much sense to me at all.\n", fftsize);
1321
                        fprintf(stderr, "Is such an operation even defined?\n");
1322
                }
1323
                exit(-1);
1324
        }
1325
 
1326
        // Calculate how many output bits we'll have, and what the log
1327
        // based two size of our FFT is.
1328
        {
1329
                int     tmp_size = fftsize;
1330
 
1331
                // The first stage always accumulates one bit, regardless
1332
                // of whether you need to or not.
1333
                nbitsout = nbitsin + 1;
1334
                tmp_size >>= 1;
1335
 
1336
                while(tmp_size > 4) {
1337
                        nbitsout += 1;
1338
                        tmp_size >>= 2;
1339
                }
1340
 
1341
                if (tmp_size > 1)
1342
                        nbitsout ++;
1343
 
1344
                if (fftsize <= 2)
1345
                        bitreverse = false;
1346
        } if ((maxbitsout > 0)&&(nbitsout > maxbitsout))
1347
                nbitsout = maxbitsout;
1348
 
1349
 
1350
        {
1351
                struct stat     sbuf;
1352 14 dgisselq
                if (lstat(coredir.c_str(), &sbuf)==0) {
1353 2 dgisselq
                        if (!S_ISDIR(sbuf.st_mode)) {
1354 14 dgisselq
                                fprintf(stderr, "\'%s\' already exists, and is not a directory!\n", coredir.c_str());
1355 2 dgisselq
                                fprintf(stderr, "I will stop now, lest I overwrite something you care about.\n");
1356
                                fprintf(stderr, "To try again, please remove this file.\n");
1357
                                exit(-1);
1358
                        }
1359
                } else
1360 14 dgisselq
                        mkdir(coredir.c_str(), 0755);
1361
                if (access(coredir.c_str(), X_OK|W_OK) != 0) {
1362
                        fprintf(stderr, "I have no access to the directory \'%s\'.\n", coredir.c_str());
1363 2 dgisselq
                        exit(-1);
1364
                }
1365
        }
1366
 
1367 14 dgisselq
        {
1368
                std::string     fname_string;
1369
 
1370
                fname_string = coredir;
1371
                fname_string += "/";
1372
                if (inverse) fname_string += "i";
1373
                fname_string += "fftmain.v";
1374
 
1375
                vmain = fopen(fname_string.c_str(), "w");
1376
                if (NULL == vmain) {
1377
                        fprintf(stderr, "Could not open \'%s\' for writing\n", fname_string.c_str());
1378
                        perror("Err from O/S:");
1379
                        exit(-1);
1380
                }
1381 2 dgisselq
        }
1382
 
1383
        fprintf(vmain, "/////////////////////////////////////////////////////////////////////////////\n");
1384
        fprintf(vmain, "//\n");
1385
        fprintf(vmain, "// Filename:    %sfftmain.v\n", (inverse)?"i":"");
1386
        fprintf(vmain, "//\n");
1387
        fprintf(vmain, "// Project:     %s\n", prjname);
1388
        fprintf(vmain, "//\n");
1389
        fprintf(vmain, "// Purpose:     This is the main module in the Doubletime FPGA FFT project.\n");
1390
        fprintf(vmain, "//              As such, all other modules are subordinate to this one.\n");
1391
        fprintf(vmain, "//              (I have been reading too much legalese this week ...)\n");
1392
        fprintf(vmain, "//              This module accomplish a fixed size Complex FFT on %d data\n", fftsize);
1393
        fprintf(vmain, "//              points.  The FFT is fully pipelined, and accepts as inputs\n");
1394
        fprintf(vmain, "//              two complex two\'s complement samples per clock.\n");
1395
        fprintf(vmain, "//\n");
1396
        fprintf(vmain, "// Parameters:\n");
1397
        fprintf(vmain, "//      i_clk\tThe clock.  All operations are synchronous with this clock.\n");
1398
        fprintf(vmain, "//\ti_rst\tSynchronous reset, active high.  Setting this line will\n");
1399
        fprintf(vmain, "//\t\t\tforce the reset of all of the internals to this routine.\n");
1400
        fprintf(vmain, "//\t\t\tFurther, following a reset, the o_sync line will go\n");
1401
        fprintf(vmain, "//\t\t\thigh the same time the first output sample is valid.\n");
1402
        fprintf(vmain, "//      i_ce\tA clock enable line.  If this line is set, this module\n");
1403
        fprintf(vmain, "//\t\t\twill accept two complex values as inputs, and produce\n");
1404
        fprintf(vmain, "//\t\t\ttwo (possibly empty) complex values as outputs.\n");
1405
        fprintf(vmain, "//\t\ti_left\tThe first of two complex input samples.  This value\n");
1406
        fprintf(vmain, "//\t\t\tis split into two two\'s complement numbers, of \n");
1407
        fprintf(vmain, "//\t\t\t%d bits each, with the real portion in the high\n", nbitsin);
1408
        fprintf(vmain, "//\t\t\torder bits, and the imaginary portion taking the\n");
1409
        fprintf(vmain, "//\t\t\tbottom %d bits.\n", nbitsin);
1410
        fprintf(vmain, "//\t\ti_right\tThis is the same thing as i_left, only this is the\n");
1411
        fprintf(vmain, "//\t\t\tsecond of two such samples.  Hence, i_left would\n");
1412
        fprintf(vmain, "//\t\t\tcontain input sample zero, i_right would contain\n");
1413
        fprintf(vmain, "//\t\t\tsample one.  On the next clock i_left would contain\n");
1414
        fprintf(vmain, "//\t\t\tinput sample two, i_right number three and so forth.\n");
1415
        fprintf(vmain, "//\t\to_left\tThe first of two output samples, of the same\n");
1416
        fprintf(vmain, "//\t\t\tformat as i_left, only having %d bits for each of\n", nbitsout);
1417
        fprintf(vmain, "//\t\t\tthe real and imaginary components, leading to %d\n", nbitsout*2);
1418
        fprintf(vmain, "//\t\t\tbits total.\n");
1419
        fprintf(vmain, "//\t\to_right\tThe second of two output samples produced each clock.\n");
1420
        fprintf(vmain, "//\t\t\tThis has the same format as o_left.\n");
1421
        fprintf(vmain, "//\t\to_sync\tA one bit output indicating the first valid sample\n");
1422
        fprintf(vmain, "//\t\t\tproduced by this FFT following a reset.  Ever after,\n");
1423
        fprintf(vmain, "//\t\t\tthis will indicate the first sample of an FFT frame.\n");
1424
        fprintf(vmain, "//\n");
1425 14 dgisselq
        fprintf(vmain, "// Arguments:\tThis file was computer generated using the\n");
1426
        fprintf(vmain, "//\t\tfollowing command line:\n");
1427
        fprintf(vmain, "//\n");
1428
        fprintf(vmain, "//\t\t%% %s\n", cmdline.c_str());
1429
        fprintf(vmain, "//\n");
1430 2 dgisselq
        fprintf(vmain, "%s", creator);
1431
        fprintf(vmain, "//\n");
1432
        fprintf(vmain, "%s", cpyleft);
1433
 
1434
 
1435
        fprintf(vmain, "//\n");
1436
        fprintf(vmain, "//\n");
1437
        fprintf(vmain, "module %sfftmain(i_clk, i_rst, i_ce,\n", (inverse)?"i":"");
1438
        fprintf(vmain, "\t\ti_left, i_right,\n");
1439
        fprintf(vmain, "\t\to_left, o_right, o_sync);\n");
1440
        fprintf(vmain, "\tparameter\tIWIDTH=%d, OWIDTH=%d, LGWIDTH=%d;\n", nbitsin, nbitsout, lgsize);
1441
        assert(lgsize > 0);
1442
        fprintf(vmain, "\tinput\t\ti_clk, i_rst, i_ce;\n");
1443
        fprintf(vmain, "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n");
1444
        fprintf(vmain, "\toutput\treg\t[(2*OWIDTH-1):0]\to_left, o_right;\n");
1445
        fprintf(vmain, "\toutput\treg\t\t\to_sync;\n");
1446
        fprintf(vmain, "\n\n");
1447
 
1448
        fprintf(vmain, "\t// Outputs of the FFT, ready for bit reversal.\n");
1449
        fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_left, br_right;\n");
1450
        fprintf(vmain, "\n\n");
1451
 
1452
        int     tmp_size = fftsize, lgtmp = lgsize;
1453
        if (fftsize == 2) {
1454
                if (bitreverse) {
1455
                        fprintf(vmain, "\treg\tbr_start;\n");
1456
                        fprintf(vmain, "\talways @(posedge i_clk)\n");
1457
                        fprintf(vmain, "\t\tif (i_rst)\n");
1458
                        fprintf(vmain, "\t\t\tbr_start <= 1'b0;\n");
1459
                        fprintf(vmain, "\t\telse if (i_ce)\n");
1460
                        fprintf(vmain, "\t\t\tbr_start <= 1'b1;\n");
1461
                }
1462
                fprintf(vmain, "\n\n");
1463 6 dgisselq
                fprintf(vmain, "\tdblstage\t#(IWIDTH)\tstage_2(i_clk, i_rst, i_ce,\n");
1464
                fprintf(vmain, "\t\t\t(~i_rst), i_left, i_right, br_left, br_right);\n");
1465 2 dgisselq
                fprintf(vmain, "\n\n");
1466
        } else {
1467
                int     nbits = nbitsin, dropbit=0;
1468
                // Always do a first stage
1469
                fprintf(vmain, "\n\n");
1470
                fprintf(vmain, "\twire\t\tw_s%d, w_os%d;\n", fftsize, fftsize);
1471 19 dgisselq
                fprintf(vmain, "\twire\t[%d:0]\tw_e%d, w_o%d;\n", 2*(nbits+1+xtrapbits)-1, fftsize, fftsize);
1472
                fprintf(vmain, "\t%sfftstage_e%d\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,%d,0)\tstage_e%d(i_clk, i_rst, i_ce,\n",
1473 2 dgisselq
                        (inverse)?"i":"", fftsize,
1474 19 dgisselq
                        xtracbits, nbits+1+xtrapbits,
1475 2 dgisselq
                        lgsize, lgtmp-2, lgdelay(nbits,xtracbits),
1476
                        fftsize);
1477
                fprintf(vmain, "\t\t\t(~i_rst), i_left, w_e%d, w_s%d);\n", fftsize, fftsize);
1478 19 dgisselq
                fprintf(vmain, "\t%sfftstage_o%d\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,%d,0)\tstage_o%d(i_clk, i_rst, i_ce,\n",
1479 2 dgisselq
                        (inverse)?"i":"", fftsize,
1480 19 dgisselq
                        xtracbits, nbits+1+xtrapbits,
1481 2 dgisselq
                        lgsize, lgtmp-2, lgdelay(nbits,xtracbits),
1482
                        fftsize);
1483 9 dgisselq
                fprintf(vmain, "\t\t\t(~i_rst), i_right, w_o%d, w_os%d);\n", fftsize, fftsize);
1484 2 dgisselq
                fprintf(vmain, "\n\n");
1485
 
1486 14 dgisselq
                {
1487
                        std::string     fname;
1488
                        char    numstr[12];
1489 2 dgisselq
 
1490 14 dgisselq
                        fname = coredir + "/";
1491
                        if (inverse) fname += "i";
1492
                        fname += "fftstage_e";
1493
                        sprintf(numstr, "%d", fftsize);
1494
                        fname += numstr;
1495
                        fname += ".v";
1496
                        build_stage(fname.c_str(), fftsize/2, 0, nbits, inverse, xtracbits);     // Even stage
1497
 
1498
                        fname = coredir + "/";
1499
                        if (inverse) fname += "i";
1500
                        fname += "fftstage_o";
1501
                        sprintf(numstr, "%d", fftsize);
1502
                        fname += numstr;
1503
                        fname += ".v";
1504
                        build_stage(fname.c_str(), fftsize/2, 1, nbits, inverse, xtracbits);    // Odd  stage
1505
                }
1506
 
1507 2 dgisselq
                nbits += 1;     // New number of input bits
1508
                tmp_size >>= 1; lgtmp--;
1509
                dropbit = 0;
1510
                fprintf(vmain, "\n\n");
1511
                while(tmp_size >= 8) {
1512
                        int     obits = nbits+((dropbit)?0:1);
1513
 
1514
                        if ((maxbitsout > 0)&&(obits > maxbitsout))
1515
                                obits = maxbitsout;
1516
 
1517
                        fprintf(vmain, "\twire\t\tw_s%d, w_os%d;\n", tmp_size, tmp_size);
1518 19 dgisselq
                        fprintf(vmain, "\twire\t[%d:0]\tw_e%d, w_o%d;\n", 2*(obits+xtrapbits)-1, tmp_size, tmp_size);
1519 2 dgisselq
                        fprintf(vmain, "\t%sfftstage_e%d\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_e%d(i_clk, i_rst, i_ce,\n",
1520
                                (inverse)?"i":"", tmp_size,
1521 19 dgisselq
                                nbits+xtrapbits, nbits+xtracbits+xtrapbits, obits+xtrapbits,
1522
                                lgsize, lgtmp-2, lgdelay(nbits+xtrapbits,xtracbits), (dropbit)?0:0,
1523 2 dgisselq
                                tmp_size);
1524
                        fprintf(vmain, "\t\t\t\t\t\tw_s%d, w_e%d, w_e%d, w_s%d);\n", tmp_size<<1, tmp_size<<1, tmp_size, tmp_size);
1525
                        fprintf(vmain, "\t%sfftstage_o%d\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_o%d(i_clk, i_rst, i_ce,\n",
1526
                                (inverse)?"i":"", tmp_size,
1527 19 dgisselq
                                nbits+xtrapbits, nbits+xtracbits+xtrapbits, obits+xtrapbits,
1528
                                lgsize, lgtmp-2, lgdelay(nbits+xtrapbits,xtracbits), (dropbit)?0:0,
1529 2 dgisselq
                                tmp_size);
1530
                        fprintf(vmain, "\t\t\t\t\t\tw_s%d, w_o%d, w_o%d, w_os%d);\n", tmp_size<<1, tmp_size<<1, tmp_size, tmp_size);
1531
                        fprintf(vmain, "\n\n");
1532
 
1533 14 dgisselq
                        {
1534
                                std::string     fname;
1535
                                char            numstr[12];
1536 2 dgisselq
 
1537 14 dgisselq
                                fname = coredir + "/";
1538
                                if (inverse) fname += "i";
1539
                                fname += "fftstage_e";
1540
                                sprintf(numstr, "%d", tmp_size);
1541
                                fname += numstr;
1542
                                fname += ".v";
1543 19 dgisselq
                                build_stage(fname.c_str(), tmp_size/2, 0, nbits+xtrapbits, inverse, xtracbits);  // Even stage
1544 2 dgisselq
 
1545 14 dgisselq
                                fname = coredir + "/";
1546
                                if (inverse) fname += "i";
1547
                                fname += "fftstage_o";
1548
                                sprintf(numstr, "%d", tmp_size);
1549
                                fname += numstr;
1550
                                fname += ".v";
1551 19 dgisselq
                                build_stage(fname.c_str(), tmp_size/2, 1, nbits+xtrapbits, inverse, xtracbits); // Odd  stage
1552 14 dgisselq
                        }
1553
 
1554
 
1555 2 dgisselq
                        dropbit ^= 1;
1556
                        nbits = obits;
1557
                        tmp_size >>= 1; lgtmp--;
1558
                }
1559
 
1560
                if (tmp_size == 4) {
1561
                        int     obits = nbits+((dropbit)?0:1);
1562
 
1563
                        if ((maxbitsout > 0)&&(obits > maxbitsout))
1564
                                obits = maxbitsout;
1565
 
1566
                        fprintf(vmain, "\twire\t\tw_s4, w_os4;\n");
1567 19 dgisselq
                        fprintf(vmain, "\twire\t[%d:0]\tw_e4, w_o4;\n", 2*(obits+xtrapbits)-1);
1568 2 dgisselq
                        fprintf(vmain, "\tqtrstage\t#(%d,%d,%d,0,%d,%d)\tstage_e4(i_clk, i_rst, i_ce,\n",
1569 19 dgisselq
                                nbits+xtrapbits, obits+xtrapbits, lgsize, (inverse)?1:0, (dropbit)?0:0);
1570 6 dgisselq
                        fprintf(vmain, "\t\t\t\t\t\tw_s8, w_e8, w_e4, w_s4);\n");
1571 2 dgisselq
                        fprintf(vmain, "\tqtrstage\t#(%d,%d,%d,1,%d,%d)\tstage_o4(i_clk, i_rst, i_ce,\n",
1572 19 dgisselq
                                nbits+xtrapbits, obits+xtrapbits, lgsize, (inverse)?1:0, (dropbit)?0:0);
1573 6 dgisselq
                        fprintf(vmain, "\t\t\t\t\t\tw_s8, w_o8, w_o4, w_os4);\n");
1574 2 dgisselq
                        dropbit ^= 1;
1575
                        nbits = obits;
1576
                        tmp_size >>= 1; lgtmp--;
1577
                }
1578
 
1579
                {
1580
                        int obits = nbits+((dropbit)?0:1);
1581
                        if (obits > nbitsout)
1582
                                obits = nbitsout;
1583
                        if ((maxbitsout>0)&&(obits > maxbitsout))
1584
                                obits = maxbitsout;
1585
                        fprintf(vmain, "\twire\t\tw_s2;\n");
1586
                        fprintf(vmain, "\twire\t[%d:0]\tw_e2, w_o2;\n", 2*obits-1);
1587 19 dgisselq
                        fprintf(vmain, "\tdblstage\t#(%d,%d,%d)\tstage_2(i_clk, i_rst, i_ce,\n", nbits+xtrapbits, obits,(dropbit)?0:1);
1588 6 dgisselq
                        fprintf(vmain, "\t\t\t\t\tw_s4, w_e4, w_o4, w_e2, w_o2, w_s2);\n");
1589 2 dgisselq
 
1590
                        fprintf(vmain, "\n\n");
1591
                        nbits = obits;
1592
                }
1593
 
1594
                fprintf(vmain, "\t// Prepare for a (potential) bit-reverse stage.\n");
1595
                fprintf(vmain, "\tassign\tbr_left  = w_e2;\n");
1596
                fprintf(vmain, "\tassign\tbr_right = w_o2;\n");
1597
                fprintf(vmain, "\n");
1598
                if (bitreverse) {
1599
                        fprintf(vmain, "\twire\tbr_start;\n");
1600
                        fprintf(vmain, "\treg\tr_br_started;\n");
1601
                        fprintf(vmain, "\t// A delay of one clock here is perfect, as it matches the delay in\n");
1602
                        fprintf(vmain, "\t// our dblstage.\n");
1603
                        fprintf(vmain, "\talways @(posedge i_clk)\n");
1604
                        fprintf(vmain, "\t\tif (i_rst)\n");
1605
                        fprintf(vmain, "\t\t\tr_br_started <= 1'b0;\n");
1606
                        fprintf(vmain, "\t\telse\n");
1607
                        fprintf(vmain, "\t\t\tr_br_started <= r_br_started || w_s4;\n");
1608
                        fprintf(vmain, "\tassign\tbr_start = r_br_started;\n");
1609
                }
1610
        }
1611
 
1612
        fprintf(vmain, "\n");
1613
        fprintf(vmain, "\t// Now for the bit-reversal stage.\n");
1614
        fprintf(vmain, "\twire\tbr_sync;\n");
1615
        fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_o_left, br_o_right;\n");
1616
        if (bitreverse) {
1617
                fprintf(vmain, "\tdblreverse\t#(%d,%d)\trevstage(i_clk, i_rst,\n", lgsize, nbitsout);
1618
                fprintf(vmain, "\t\t\t(i_ce & br_start), br_left, br_right,\n");
1619
                fprintf(vmain, "\t\t\tbr_o_left, br_o_right, br_sync);\n");
1620
        } else {
1621
                fprintf(vmain, "\tassign\tbr_o_left  = br_left;\n");
1622
                fprintf(vmain, "\tassign\tbr_o_right = br_right;\n");
1623
                fprintf(vmain, "\tassign\tbr_sync    = w_s2;\n");
1624
        }
1625
 
1626
        fprintf(vmain, "\n\n");
1627
        fprintf(vmain, "\t// Last clock: Register our outputs, we\'re done.\n");
1628
        fprintf(vmain, "\talways @(posedge i_clk)\n");
1629
        fprintf(vmain, "\t\tbegin\n");
1630
        fprintf(vmain, "\t\t\to_left  <= br_o_left;\n");
1631
        fprintf(vmain, "\t\t\to_right <= br_o_right;\n");
1632
        fprintf(vmain, "\t\t\to_sync  <= br_sync;\n");
1633
        fprintf(vmain, "\t\tend\n");
1634
        fprintf(vmain, "\n\n");
1635
        fprintf(vmain, "endmodule\n");
1636
        fclose(vmain);
1637
 
1638 14 dgisselq
        {
1639
                std::string     fname;
1640 2 dgisselq
 
1641 14 dgisselq
                fname = coredir + "/butterfly.v";
1642
                build_butterfly(fname.c_str(), xtracbits);
1643 2 dgisselq
 
1644 14 dgisselq
                fname = coredir + "/shiftaddmpy.v";
1645
                build_multiply(fname.c_str());
1646 2 dgisselq
 
1647 14 dgisselq
                fname = coredir + "/qtrstage.v";
1648
                build_quarters(fname.c_str());
1649 2 dgisselq
 
1650 14 dgisselq
                fname = coredir + "/dblstage.v";
1651
                build_dblstage(fname.c_str());
1652
 
1653
                if (bitreverse) {
1654
                        fname = coredir + "/dblreverse.v";
1655
                        build_dblreverse(fname.c_str());
1656
                }
1657 2 dgisselq
        }
1658
}
1659
 
1660 16 dgisselq
 

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.