OpenCores
URL https://opencores.org/ocsvn/dblclockfft/dblclockfft/trunk

Subversion Repositories dblclockfft

[/] [dblclockfft/] [trunk/] [sw/] [fftgen.cpp] - Blame information for rev 37

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 29 dgisselq
////////////////////////////////////////////////////////////////////////////////
2 16 dgisselq
//
3 24 dgisselq
// Filename:    fftgen.cpp
4 16 dgisselq
//
5 36 dgisselq
// Project:     A General Purpose Pipelined FFT Implementation
6 16 dgisselq
//
7
// Purpose:     This is the core generator for the project.  Every part
8
//              and piece of this project begins and ends in this program.
9 33 dgisselq
//      Once built, this program will build an FFT (or IFFT) core of arbitrary
10
//      width, precision, etc., that will run at two samples per clock.
11
//      (Incidentally, I didn't pick two samples per clock because it was
12
//      easier, but rather because there weren't any two-sample per clock
13
//      FFT's posted on opencores.com.  Further, FFT's running at one sample
14
//      per aren't that hard to find.)
15 16 dgisselq
//
16 33 dgisselq
//      You can find the documentation for this program in two places.  One is
17
//      in the usage() function below.  The second is in the 'doc'uments
18
//      directory that comes with this package, specifically in the spec.pdf
19
//      file.  If it's not there, type make in the documents directory to
20
//      build it.
21 16 dgisselq
//
22 31 dgisselq
//      20160123 - Thanks to Lesha Birukov, adjusted for MS Visual Studio 2012.
23
//              (Adjustments are at the top of the file ...)
24
//
25 16 dgisselq
// Creator:     Dan Gisselquist, Ph.D.
26 30 dgisselq
//              Gisselquist Technology, LLC
27 16 dgisselq
//
28 29 dgisselq
////////////////////////////////////////////////////////////////////////////////
29 16 dgisselq
//
30 36 dgisselq
// Copyright (C) 2015-2018, Gisselquist Technology, LLC
31 16 dgisselq
//
32
// This program is free software (firmware): you can redistribute it and/or
33
// modify it under the terms of  the GNU General Public License as published
34
// by the Free Software Foundation, either version 3 of the License, or (at
35
// your option) any later version.
36
//
37
// This program is distributed in the hope that it will be useful, but WITHOUT
38
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
39
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
40
// for more details.
41
//
42
// You should have received a copy of the GNU General Public License along
43 37 dgisselq
// with this program.  (It's in the $(ROOT)/doc directory.  Run make with no
44 16 dgisselq
// target there if the PDF file isn't present.)  If not, see
45
// <http://www.gnu.org/licenses/> for a copy.
46
//
47
// License:     GPL, v3, as defined and found on www.gnu.org,
48
//              http://www.gnu.org/licenses/gpl.html
49
//
50
//
51 29 dgisselq
////////////////////////////////////////////////////////////////////////////////
52 16 dgisselq
//
53
//
54 31 dgisselq
#define _CRT_SECURE_NO_WARNINGS   //  ms vs 2012 doesn't like fopen
55 2 dgisselq
#include <stdio.h>
56
#include <stdlib.h>
57 31 dgisselq
 
58
#ifdef _MSC_VER //  added for ms vs compatibility
59
 
60
#include <io.h>
61
#include <direct.h>
62
#define _USE_MATH_DEFINES
63
#define R_OK    4       /* Test for read permission.  */
64
#define W_OK    2       /* Test for write permission.  */
65
#define X_OK    0       /* !!!!!! execute permission - unsupported in windows*/
66
#define F_OK    0       /* Test for existence.  */
67
 
68
#if _MSC_VER <= 1700
69
 
70
int lstat(const char *filename, struct stat *buf) { return 1; };
71
#define S_ISDIR(A)      0
72
 
73
#else
74
 
75
#define lstat   _stat
76
#define S_ISDIR _S_IFDIR
77
 
78
#endif
79
 
80
#define mkdir(A,B)      _mkdir(A)
81
 
82
#define access _access
83
 
84
#else
85
// And for G++/Linux environment
86
 
87
#include <unistd.h>     // Defines the R_OK/W_OK/etc. macros
88 2 dgisselq
#include <sys/stat.h>
89 31 dgisselq
#endif
90
 
91 2 dgisselq
#include <string.h>
92 14 dgisselq
#include <string>
93 2 dgisselq
#include <math.h>
94
#include <ctype.h>
95
#include <assert.h>
96
 
97 36 dgisselq
#include "defaults.h"
98
#include "legal.h"
99
#include "rounding.h"
100
#include "fftlib.h"
101
#include "bldstage.h"
102
#include "bitreverse.h"
103
#include "softmpy.h"
104
#include "butterfly.h"
105 2 dgisselq
 
106 36 dgisselq
void    build_dblquarters(const char *fname, ROUND_T rounding, const bool async_reset=false, const bool dbg=false) {
107 2 dgisselq
        FILE    *fp = fopen(fname, "w");
108
        if (NULL == fp) {
109
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
110
                perror("O/S Err was:");
111
                return;
112
        }
113 23 dgisselq
        const   char    *rnd_string;
114
        if (rounding == RND_TRUNCATE)
115
                rnd_string = "truncate";
116
        else if (rounding == RND_FROMZERO)
117
                rnd_string = "roundfromzero";
118
        else if (rounding == RND_HALFUP)
119
                rnd_string = "roundhalfup";
120
        else
121
                rnd_string = "convround";
122
 
123
 
124
        fprintf(fp,
125 36 dgisselq
SLASHLINE
126 23 dgisselq
"//\n"
127 36 dgisselq
"// Filename:\tqtrstage%s.v\n"
128 2 dgisselq
"//\n"
129 36 dgisselq
"// Project:\t%s\n"
130
"//\n"
131 5 dgisselq
"// Purpose:    This file encapsulates the 4 point stage of a decimation in\n"
132
"//             frequency FFT.  This particular implementation is optimized\n"
133 36 dgisselq
"//     so that all of the multiplies are accomplished by additions and\n"
134
"//     multiplexers only.\n"
135 5 dgisselq
"//\n"
136 2 dgisselq
"//\n%s"
137
"//\n",
138 26 dgisselq
                (dbg)?"_dbg":"", prjname, creator);
139 2 dgisselq
        fprintf(fp, "%s", cpyleft);
140 35 dgisselq
        fprintf(fp, "//\n//\n`default_nettype\tnone\n//\n");
141 2 dgisselq
 
142 36 dgisselq
        std::string     resetw("i_reset");
143
        if (async_reset)
144
                resetw = std::string("i_areset_n");
145
 
146 2 dgisselq
        fprintf(fp,
147 36 dgisselq
"module\tqtrstage%s(i_clk, %s, i_ce, i_sync, i_data, o_data, o_sync%s);\n"
148 29 dgisselq
        "\tparameter    IWIDTH=%d, OWIDTH=IWIDTH+1;\n"
149 5 dgisselq
        "\t// Parameters specific to the core that should be changed when this\n"
150 36 dgisselq
        "\t// core is built ... Note that the minimum LGSPAN is 2.  Smaller\n"
151 5 dgisselq
        "\t// spans must use the fftdoubles stage.\n"
152 29 dgisselq
        "\tparameter\tLGWIDTH=%d, ODD=0, INVERSE=0,SHIFT=0;\n"
153 37 dgisselq
        "\tinput\twire                          i_clk, %s, i_ce, i_sync;\n"
154
        "\tinput\twire  [(2*IWIDTH-1):0]        i_data;\n"
155 5 dgisselq
        "\toutput\treg  [(2*OWIDTH-1):0]        o_data;\n"
156
        "\toutput\treg                          o_sync;\n"
157 36 dgisselq
        "\t\n", (dbg)?"_dbg":"",
158
        resetw.c_str(),
159
        (dbg)?", o_dbg":"", TST_QTRSTAGE_IWIDTH,
160
        TST_QTRSTAGE_LGWIDTH, resetw.c_str());
161 26 dgisselq
        if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"
162
                "\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2*OWIDTH-1):(2*OWIDTH-16)],\n"
163
                        "\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n"
164
"\n");
165
        }
166 14 dgisselq
        fprintf(fp,
167 5 dgisselq
        "\treg\t        wait_for_sync;\n"
168 23 dgisselq
        "\treg\t[3:0]   pipeline;\n"
169 2 dgisselq
"\n"
170 5 dgisselq
        "\treg\t[(IWIDTH):0]    sum_r, sum_i, diff_r, diff_i;\n"
171 2 dgisselq
"\n"
172 23 dgisselq
        "\treg\t[(2*OWIDTH-1):0]\tob_a;\n"
173
        "\twire\t[(2*OWIDTH-1):0]\tob_b;\n"
174
        "\treg\t[(OWIDTH-1):0]\t\tob_b_r, ob_b_i;\n"
175
        "\tassign\tob_b = { ob_b_r, ob_b_i };\n"
176 2 dgisselq
"\n"
177 23 dgisselq
        "\treg\t[(LGWIDTH-1):0]\t\tiaddr;\n"
178
        "\treg\t[(2*IWIDTH-1):0]\timem;\n"
179 2 dgisselq
"\n"
180 5 dgisselq
        "\twire\tsigned\t[(IWIDTH-1):0]\timem_r, imem_i;\n"
181
        "\tassign\timem_r = imem[(2*IWIDTH-1):(IWIDTH)];\n"
182
        "\tassign\timem_i = imem[(IWIDTH-1):0];\n"
183 2 dgisselq
"\n"
184 5 dgisselq
        "\twire\tsigned\t[(IWIDTH-1):0]\ti_data_r, i_data_i;\n"
185
        "\tassign\ti_data_r = i_data[(2*IWIDTH-1):(IWIDTH)];\n"
186
        "\tassign\ti_data_i = i_data[(IWIDTH-1):0];\n"
187 2 dgisselq
"\n"
188 5 dgisselq
        "\treg  [(2*OWIDTH-1):0]        omem;\n"
189 14 dgisselq
"\n");
190
        fprintf(fp,
191 23 dgisselq
        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_sum_r, rnd_sum_i, rnd_diff_r, rnd_diff_i,\n");
192
        fprintf(fp,
193
        "\t\t\t\t\tn_rnd_diff_r, n_rnd_diff_i;\n");
194
        fprintf(fp,
195 26 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_r(i_clk, i_ce,\n"
196 23 dgisselq
        "\t\t\t\tsum_r, rnd_sum_r);\n\n", rnd_string);
197
        fprintf(fp,
198 26 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_i(i_clk, i_ce,\n"
199 23 dgisselq
        "\t\t\t\tsum_i, rnd_sum_i);\n\n", rnd_string);
200
        fprintf(fp,
201 26 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_r(i_clk, i_ce,\n"
202 23 dgisselq
        "\t\t\t\tdiff_r, rnd_diff_r);\n\n", rnd_string);
203
        fprintf(fp,
204 26 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_i(i_clk, i_ce,\n"
205 23 dgisselq
        "\t\t\t\tdiff_i, rnd_diff_i);\n\n", rnd_string);
206
        fprintf(fp, "\tassign n_rnd_diff_r = - rnd_diff_r;\n"
207
                "\tassign n_rnd_diff_i = - rnd_diff_i;\n");
208
/*
209
        fprintf(fp,
210 5 dgisselq
        "\twire [(IWIDTH-1):0]  rnd;\n"
211 9 dgisselq
        "\tgenerate\n"
212
        "\tif ((ROUND)&&((IWIDTH+1-OWIDTH-SHIFT)>0))\n"
213 26 dgisselq
                "\t\tassign rnd = { {(IWIDTH-1){1\'b0}}, 1\'b1 };\n"
214 9 dgisselq
        "\telse\n"
215 26 dgisselq
                "\t\tassign rnd = { {(IWIDTH){1\'b0}}};\n"
216 9 dgisselq
        "\tendgenerate\n"
217 2 dgisselq
"\n"
218 23 dgisselq
*/
219
        fprintf(fp,
220 25 dgisselq
        "\tinitial wait_for_sync = 1\'b1;\n"
221 36 dgisselq
        "\tinitial iaddr = 0;\n");
222
        if (async_reset)
223
                fprintf(fp,
224
                        "\talways @(posedge i_clk, negedge i_areset_n)\n"
225
                                "\t\tif (!i_reset)\n");
226
        else
227
                fprintf(fp,
228 5 dgisselq
        "\talways @(posedge i_clk)\n"
229 36 dgisselq
                "\t\tif (i_reset)\n");
230
        fprintf(fp,
231 5 dgisselq
                "\t\tbegin\n"
232 26 dgisselq
                        "\t\t\twait_for_sync <= 1\'b1;\n"
233 5 dgisselq
                        "\t\t\tiaddr <= 0;\n"
234 35 dgisselq
                "\t\tend else if ((i_ce)&&((!wait_for_sync)||(i_sync)))\n"
235 5 dgisselq
                "\t\tbegin\n"
236 26 dgisselq
                        "\t\t\tiaddr <= iaddr + { {(LGWIDTH-1){1\'b0}}, 1\'b1 };\n"
237
                        "\t\t\twait_for_sync <= 1\'b0;\n"
238 36 dgisselq
                "\t\tend\n\n"
239 26 dgisselq
        "\talways @(posedge i_clk)\n"
240
                "\t\tif (i_ce)\n"
241 5 dgisselq
                        "\t\t\timem <= i_data;\n"
242 26 dgisselq
                "\n\n");
243 23 dgisselq
        fprintf(fp,
244
        "\t// Note that we don\'t check on wait_for_sync or i_sync here.\n"
245
        "\t// Why not?  Because iaddr will always be zero until after the\n"
246
        "\t// first i_ce, so we are safe.\n"
247 36 dgisselq
        "\tinitial pipeline = 4\'h0;\n");
248
        if (async_reset)
249
                fprintf(fp,
250
        "\talways\t@(posedge i_clk, negedge i_areset_n)\n"
251
                "\t\tif (!i_reset)\n");
252
        else
253
                fprintf(fp,
254 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
255 36 dgisselq
                "\t\tif (i_reset)\n");
256
 
257
        fprintf(fp,
258 26 dgisselq
                        "\t\t\tpipeline <= 4\'h0;\n"
259 23 dgisselq
                "\t\telse if (i_ce) // is our pipeline process full?  Which stages?\n"
260
                        "\t\t\tpipeline <= { pipeline[2:0], iaddr[0] };\n\n");
261
        fprintf(fp,
262
        "\t// This is the pipeline[-1] stage, pipeline[0] will be set next.\n"
263
        "\talways\t@(posedge i_clk)\n"
264
                "\t\tif ((i_ce)&&(iaddr[0]))\n"
265
                "\t\tbegin\n"
266
                        "\t\t\tsum_r  <= imem_r + i_data_r;\n"
267
                        "\t\t\tsum_i  <= imem_i + i_data_i;\n"
268
                        "\t\t\tdiff_r <= imem_r - i_data_r;\n"
269
                        "\t\t\tdiff_i <= imem_i - i_data_i;\n"
270
                "\t\tend\n\n");
271
        fprintf(fp,
272
        "\t// pipeline[1] takes sum_x and diff_x and produces rnd_x\n\n");
273
        fprintf(fp,
274 26 dgisselq
        "\t// Now for pipeline[2].  We can actually do this at all i_ce\n"
275
        "\t// clock times, since nothing will listen unless pipeline[3]\n"
276
        "\t// on the next clock.  Thus, we simplify this logic and do\n"
277
        "\t// it independent of pipeline[2].\n"
278 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
279 26 dgisselq
                "\t\tif (i_ce)\n"
280 23 dgisselq
                "\t\tbegin\n"
281
                        "\t\t\tob_a <= { rnd_sum_r, rnd_sum_i };\n"
282
                        "\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"
283
                        "\t\t\tif (ODD == 0)\n"
284 5 dgisselq
                        "\t\t\tbegin\n"
285 23 dgisselq
                        "\t\t\t\tob_b_r <= rnd_diff_r;\n"
286
                        "\t\t\t\tob_b_i <= rnd_diff_i;\n"
287
                        "\t\t\tend else if (INVERSE==0) begin\n"
288
                        "\t\t\t\t// on Odd, W = e^{-j2pi 1/4} = -j\n"
289
                        "\t\t\t\tob_b_r <=   rnd_diff_i;\n"
290
                        "\t\t\t\tob_b_i <= n_rnd_diff_r;\n"
291
                        "\t\t\tend else begin\n"
292
                        "\t\t\t\t// on Odd, W = e^{j2pi 1/4} = j\n"
293
                        "\t\t\t\tob_b_r <= n_rnd_diff_i;\n"
294
                        "\t\t\t\tob_b_i <=   rnd_diff_r;\n"
295 5 dgisselq
                        "\t\t\tend\n"
296 23 dgisselq
                "\t\tend\n\n");
297
        fprintf(fp,
298
        "\talways\t@(posedge i_clk)\n"
299
                "\t\tif (i_ce)\n"
300
                "\t\tbegin // In sequence, clock = 3\n"
301
                        "\t\t\tif (pipeline[3])\n"
302 5 dgisselq
                        "\t\t\tbegin\n"
303
                                "\t\t\t\tomem <= ob_b;\n"
304
                                "\t\t\t\to_data <= ob_a;\n"
305
                        "\t\t\tend else\n"
306
                                "\t\t\t\to_data <= omem;\n"
307 23 dgisselq
                "\t\tend\n\n");
308
 
309
        fprintf(fp,
310
        "\t// Don\'t forget in the sync check that we are running\n"
311
        "\t// at two clocks per sample.  Thus we need to\n"
312
        "\t// produce a sync every 2^(LGWIDTH-1) clocks.\n"
313 36 dgisselq
        "\tinitial\to_sync = 1\'b0;\n");
314
 
315
        if (async_reset)
316
                fprintf(fp,
317
        "\talways\t@(posedge i_clk, negedge i_areset_n)\n"
318
                "\t\tif (!i_areset_n)\n");
319
        else
320
                fprintf(fp,
321 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
322 36 dgisselq
                "\t\tif (i_reset)\n");
323
        fprintf(fp,
324 26 dgisselq
                "\t\t\to_sync <= 1\'b0;\n"
325
                "\t\telse if (i_ce)\n"
326 23 dgisselq
                        "\t\t\to_sync <= &(~iaddr[(LGWIDTH-2):3]) && (iaddr[2:0] == 3'b101);\n");
327
        fprintf(fp, "endmodule\n");
328 2 dgisselq
}
329
 
330 36 dgisselq
void    build_snglquarters(const char *fname, ROUND_T rounding, const bool async_reset=false, const bool dbg=false) {
331 2 dgisselq
        FILE    *fp = fopen(fname, "w");
332
        if (NULL == fp) {
333
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
334
                perror("O/S Err was:");
335
                return;
336
        }
337 23 dgisselq
        const   char    *rnd_string;
338
        if (rounding == RND_TRUNCATE)
339
                rnd_string = "truncate";
340
        else if (rounding == RND_FROMZERO)
341
                rnd_string = "roundfromzero";
342
        else if (rounding == RND_HALFUP)
343
                rnd_string = "roundhalfup";
344
        else
345
                rnd_string = "convround";
346
 
347
 
348 2 dgisselq
        fprintf(fp,
349 36 dgisselq
SLASHLINE
350 2 dgisselq
"//\n"
351 36 dgisselq
"// Filename:\tqtrstage%s.v\n"
352 2 dgisselq
"//\n"
353 36 dgisselq
"// Project:\t%s\n"
354 2 dgisselq
"//\n"
355 36 dgisselq
"// Purpose:    This file encapsulates the 4 point stage of a decimation in\n"
356
"//             frequency FFT.  This particular implementation is optimized\n"
357
"//     so that all of the multiplies are accomplished by additions and\n"
358
"//     multiplexers only.\n"
359 2 dgisselq
"//\n"
360 36 dgisselq
"// Operation:\n"
361
"//     The operation of this stage is identical to the regular stages of\n"
362
"//     the FFT (see them for details), with one additional and critical\n"
363
"//     difference: this stage doesn't require any hardware multiplication.\n"
364
"//     The multiplies within it may all be accomplished using additions and\n"
365
"//     subtractions.\n"
366
"//\n"
367
"//     Let's see how this is done.  Given x[n] and x[n+2], cause thats the\n"
368
"//     stage we are working on, with i_sync true for x[0] being input,\n"
369
"//     produce the output:\n"
370
"//\n"
371
"//     y[n  ] = x[n] + x[n+2]\n"
372
"//     y[n+2] = (x[n] - x[n+2]) * e^{-j2pi n/2}        (forward transform)\n"
373
"//            = (x[n] - x[n+2]) * -j^n\n"
374
"//\n"
375
"//     y[n].r = x[n].r + x[n+2].r      (This is the easy part)\n"
376
"//     y[n].i = x[n].i + x[n+2].i\n"
377
"//\n"
378
"//     y[2].r = x[0].r - x[2].r\n"
379
"//     y[2].i = x[0].i - x[2].i\n"
380
"//\n"
381
"//     y[3].r =   (x[1].i - x[3].i)            (forward transform)\n"
382
"//     y[3].i = - (x[1].r - x[3].r)\n"
383
"//\n"
384
"//     y[3].r = - (x[1].i - x[3].i)            (inverse transform)\n"
385
"//     y[3].i =   (x[1].r - x[3].r)            (INVERSE = 1)\n"
386
// "//\n"
387
// "//  When the FFT is run in the two samples per clock mode, this quarter\n"
388
// "//  stage will operate on either x[0] and x[2] (ODD = 0), or x[1] and\n"
389
// "//  x[3] (ODD = 1).  In all other cases, it will operate on all four\n"
390
// "//  values.\n"
391 2 dgisselq
"//\n%s"
392 36 dgisselq
"//\n",
393
                (dbg)?"_dbg":"", prjname, creator);
394 2 dgisselq
        fprintf(fp, "%s", cpyleft);
395 35 dgisselq
        fprintf(fp, "//\n//\n`default_nettype\tnone\n//\n");
396 36 dgisselq
 
397
        std::string     resetw("i_reset");
398
        if (async_reset)
399
                resetw = std::string("i_areset_n");
400
 
401 33 dgisselq
        fprintf(fp,
402 36 dgisselq
"module\tqtrstage%s(i_clk, %s, i_ce, i_sync, i_data, o_data, o_sync%s);\n"
403
        "\tparameter    IWIDTH=%d, OWIDTH=IWIDTH+1;\n"
404
        "\tparameter\tLGWIDTH=%d, INVERSE=0,SHIFT=0;\n"
405 37 dgisselq
        "\tinput\twire                          i_clk, %s, i_ce, i_sync;\n"
406
        "\tinput\twire  [(2*IWIDTH-1):0]        i_data;\n"
407 36 dgisselq
        "\toutput\treg  [(2*OWIDTH-1):0]        o_data;\n"
408
        "\toutput\treg                          o_sync;\n"
409
                "\t\n", (dbg)?"_dbg":"", resetw.c_str(),
410
                (dbg)?", o_dbg":"", TST_QTRSTAGE_IWIDTH,
411
                TST_QTRSTAGE_LGWIDTH, resetw.c_str());
412 26 dgisselq
        if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"
413 36 dgisselq
                "\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2*OWIDTH-1):(2*OWIDTH-16)],\n"
414
                        "\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n"
415 26 dgisselq
"\n");
416
        }
417 36 dgisselq
 
418 33 dgisselq
        fprintf(fp,
419 36 dgisselq
        "\treg\t        wait_for_sync;\n"
420
        "\treg\t[2:0]   pipeline;\n"
421 2 dgisselq
"\n"
422 36 dgisselq
        "\treg\tsigned [(IWIDTH):0]     sum_r, sum_i, diff_r, diff_i;\n"
423 15 dgisselq
"\n"
424 36 dgisselq
        "\treg\t[(2*OWIDTH-1):0]\tob_a;\n"
425
        "\twire\t[(2*OWIDTH-1):0]\tob_b;\n"
426
        "\treg\t[(OWIDTH-1):0]\t\tob_b_r, ob_b_i;\n"
427
        "\tassign\tob_b = { ob_b_r, ob_b_i };\n"
428 15 dgisselq
"\n"
429 36 dgisselq
        "\treg\t[(LGWIDTH-1):0]\t\tiaddr;\n"
430
        "\treg\t[(2*IWIDTH-1):0]\timem\t[0:1];\n"
431 2 dgisselq
"\n"
432 36 dgisselq
        "\twire\tsigned\t[(IWIDTH-1):0]\timem_r, imem_i;\n"
433
        "\tassign\timem_r = imem[1][(2*IWIDTH-1):(IWIDTH)];\n"
434
        "\tassign\timem_i = imem[1][(IWIDTH-1):0];\n"
435 26 dgisselq
"\n"
436 36 dgisselq
        "\twire\tsigned\t[(IWIDTH-1):0]\ti_data_r, i_data_i;\n"
437
        "\tassign\ti_data_r = i_data[(2*IWIDTH-1):(IWIDTH)];\n"
438
        "\tassign\ti_data_i = i_data[(IWIDTH-1):0];\n"
439
"\n"
440
        "\treg  [(2*OWIDTH-1):0]        omem [0:1];\n"
441 28 dgisselq
"\n");
442 36 dgisselq
 
443
        fprintf(fp, "\t//\n"
444
        "\t// Round our output values down to OWIDTH bits\n"
445
        "\t//\n");
446
 
447 28 dgisselq
        fprintf(fp,
448 36 dgisselq
        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_sum_r, rnd_sum_i,\n"
449
        "\t\t\trnd_diff_r, rnd_diff_i, n_rnd_diff_r, n_rnd_diff_i;\n"
450
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_r(i_clk, i_ce,\n"
451
        "\t\t\t\tsum_r, rnd_sum_r);\n\n", rnd_string);
452 28 dgisselq
        fprintf(fp,
453 36 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_i(i_clk, i_ce,\n"
454
        "\t\t\t\tsum_i, rnd_sum_i);\n\n", rnd_string);
455 28 dgisselq
        fprintf(fp,
456 36 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_r(i_clk, i_ce,\n"
457
        "\t\t\t\tdiff_r, rnd_diff_r);\n\n", rnd_string);
458 28 dgisselq
        fprintf(fp,
459 36 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_i(i_clk, i_ce,\n"
460
        "\t\t\t\tdiff_i, rnd_diff_i);\n\n", rnd_string);
461
        fprintf(fp, "\tassign n_rnd_diff_r = - rnd_diff_r;\n"
462
                "\tassign n_rnd_diff_i = - rnd_diff_i;\n");
463
        fprintf(fp,
464
        "\tinitial wait_for_sync = 1\'b1;\n"
465
        "\tinitial iaddr = 0;\n");
466
        if (async_reset)
467
                fprintf(fp,
468
                        "\talways @(posedge i_clk, negedge i_areset_n)\n"
469
                                "\t\tif (!i_reset)\n");
470
        else
471
                fprintf(fp,
472
        "\talways @(posedge i_clk)\n"
473
                "\t\tif (i_reset)\n");
474 28 dgisselq
 
475 36 dgisselq
        fprintf(fp, "\t\tbegin\n"
476
                        "\t\t\twait_for_sync <= 1\'b1;\n"
477
                        "\t\t\tiaddr <= 0;\n"
478
                "\t\tend else if ((i_ce)&&((!wait_for_sync)||(i_sync)))\n"
479
                "\t\tbegin\n"
480
                        "\t\t\tiaddr <= iaddr + 1\'b1;\n"
481
                        "\t\t\twait_for_sync <= 1\'b0;\n"
482
                "\t\tend\n\n"
483 28 dgisselq
        "\talways @(posedge i_clk)\n"
484
                "\t\tif (i_ce)\n"
485
                "\t\tbegin\n"
486 36 dgisselq
                        "\t\t\timem[0] <= i_data;\n"
487
                        "\t\t\timem[1] <= imem[0];\n"
488 28 dgisselq
                "\t\tend\n"
489 36 dgisselq
                "\n\n");
490
        fprintf(fp,
491
        "\t// Note that we don\'t check on wait_for_sync or i_sync here.\n"
492
        "\t// Why not?  Because iaddr will always be zero until after the\n"
493
        "\t// first i_ce, so we are safe.\n"
494
        "\tinitial pipeline = 3\'h0;\n");
495 2 dgisselq
 
496 36 dgisselq
        if (async_reset)
497
                fprintf(fp,
498
        "\talways\t@(posedge i_clk, negedge i_areset_n)\n"
499
                "\t\tif (!i_reset)\n");
500
        else
501
                fprintf(fp,
502
        "\talways\t@(posedge i_clk)\n"
503
                "\t\tif (i_reset)\n");
504 2 dgisselq
 
505
        fprintf(fp,
506 36 dgisselq
                        "\t\t\tpipeline <= 3\'h0;\n"
507
                "\t\telse if (i_ce) // is our pipeline process full?  Which stages?\n"
508
                        "\t\t\tpipeline <= { pipeline[1:0], iaddr[1] };\n\n");
509
        fprintf(fp,
510
        "\t// This is the pipeline[-1] stage, pipeline[0] will be set next.\n"
511
        "\talways\t@(posedge i_clk)\n"
512
                "\t\tif ((i_ce)&&(iaddr[1]))\n"
513
                "\t\tbegin\n"
514
                        "\t\t\tsum_r  <= imem_r + i_data_r;\n"
515
                        "\t\t\tsum_i  <= imem_i + i_data_i;\n"
516
                        "\t\t\tdiff_r <= imem_r - i_data_r;\n"
517
                        "\t\t\tdiff_i <= imem_i - i_data_i;\n"
518
                "\t\tend\n\n");
519
        fprintf(fp,
520
        "\t// pipeline[1] takes sum_x and diff_x and produces rnd_x\n\n");
521 2 dgisselq
 
522 33 dgisselq
        fprintf(fp,
523 36 dgisselq
        "\t// Now for pipeline[2].  We can actually do this at all i_ce\n"
524
        "\t// clock times, since nothing will listen unless pipeline[3]\n"
525
        "\t// on the next clock.  Thus, we simplify this logic and do\n"
526
        "\t// it independent of pipeline[2].\n"
527
        "\talways\t@(posedge i_clk)\n"
528 2 dgisselq
                "\t\tif (i_ce)\n"
529
                "\t\tbegin\n"
530 36 dgisselq
                        "\t\t\tob_a <= { rnd_sum_r, rnd_sum_i };\n"
531
                        "\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"
532
                        "\t\t\tif (!iaddr[0])\n"
533
                        "\t\t\tbegin\n"
534
                        "\t\t\t\tob_b_r <= rnd_diff_r;\n"
535
                        "\t\t\t\tob_b_i <= rnd_diff_i;\n"
536
                        "\t\t\tend else if (INVERSE==0) begin\n"
537
                        "\t\t\t\t// on Odd, W = e^{-j2pi 1/4} = -j\n"
538
                        "\t\t\t\tob_b_r <=   rnd_diff_i;\n"
539
                        "\t\t\t\tob_b_i <= n_rnd_diff_r;\n"
540
                        "\t\t\tend else begin\n"
541
                        "\t\t\t\t// on Odd, W = e^{j2pi 1/4} = j\n"
542
                        "\t\t\t\tob_b_r <= n_rnd_diff_i;\n"
543
                        "\t\t\t\tob_b_i <=   rnd_diff_r;\n"
544
                        "\t\t\tend\n"
545
                "\t\tend\n\n");
546
        fprintf(fp,
547
        "\talways\t@(posedge i_clk)\n"
548 2 dgisselq
                "\t\tif (i_ce)\n"
549 36 dgisselq
                "\t\tbegin // In sequence, clock = 3\n"
550
                        "\t\t\tomem[0] <= ob_b;\n"
551
                        "\t\t\tomem[1] <= omem[0];\n"
552
                        "\t\t\tif (pipeline[2])\n"
553
                                "\t\t\t\to_data <= ob_a;\n"
554
                        "\t\t\telse\n"
555
                                "\t\t\t\to_data <= omem[1];\n"
556
                "\t\tend\n\n");
557 2 dgisselq
 
558 36 dgisselq
        fprintf(fp,
559
        "\tinitial\to_sync = 1\'b0;\n");
560 2 dgisselq
 
561 36 dgisselq
        if (async_reset)
562
                fprintf(fp,
563
        "\talways\t@(posedge i_clk, negedge i_areset_n)\n"
564
                "\t\tif (!i_areset_n)\n");
565
        else
566
                fprintf(fp,
567
        "\talways\t@(posedge i_clk)\n"
568
                "\t\tif (i_reset)\n");
569 29 dgisselq
        fprintf(fp,
570 36 dgisselq
                "\t\t\to_sync <= 1\'b0;\n"
571
                "\t\telse if (i_ce)\n"
572
                        "\t\t\to_sync <= (iaddr[2:0] == 3'b101);\n\n");
573 29 dgisselq
 
574 36 dgisselq
        if (formal_property_flag) {
575
                fprintf(fp,
576
"`ifdef FORMAL\n"
577
        "\treg  f_past_valid;\n"
578
        "\tinitial      f_past_valid = 1'b0;\n"
579
        "\talways @(posedge i_clk)\n"
580
        "\t     f_past_valid = 1'b1;\n"
581 29 dgisselq
"\n"
582 36 dgisselq
"`ifdef QTRSTAGE\n"
583
        "\talways @(posedge i_clk)\n"
584
        "\t     assume((i_ce)||($past(i_ce))||($past(i_ce,2)));\n"
585
"`endif\n"
586 29 dgisselq
"\n"
587 36 dgisselq
        "\t// The below logic only works if the rounding stage does nothing\n"
588
        "\tinitial      assert(IWIDTH+1 == OWIDTH);\n"
589 29 dgisselq
"\n"
590 36 dgisselq
        "\treg  signed [IWIDTH-1:0]     f_piped_real    [0:7];\n"
591
        "\treg  signed [IWIDTH-1:0]     f_piped_imag    [0:7];\n"
592 29 dgisselq
"\n"
593 36 dgisselq
        "\talways @(posedge i_clk)\n"
594
        "\tif (i_ce)\n"
595
        "\tbegin\n"
596
        "\t     f_piped_real[0] <= i_data[2*IWIDTH-1:IWIDTH];\n"
597
        "\t     f_piped_imag[0] <= i_data[  IWIDTH-1:0];\n"
598 29 dgisselq
"\n"
599 36 dgisselq
        "\t     f_piped_real[1] <= f_piped_real[0];\n"
600
        "\t     f_piped_imag[1] <= f_piped_imag[0];\n"
601 29 dgisselq
"\n"
602 36 dgisselq
        "\t     f_piped_real[2] <= f_piped_real[1];\n"
603
        "\t     f_piped_imag[2] <= f_piped_imag[1];\n"
604 29 dgisselq
"\n"
605 36 dgisselq
        "\t     f_piped_real[3] <= f_piped_real[2];\n"
606
        "\t     f_piped_imag[3] <= f_piped_imag[2];\n"
607 29 dgisselq
"\n"
608 36 dgisselq
        "\t     f_piped_real[4] <= f_piped_real[3];\n"
609
        "\t     f_piped_imag[4] <= f_piped_imag[3];\n"
610 29 dgisselq
"\n"
611 36 dgisselq
        "\t     f_piped_real[5] <= f_piped_real[4];\n"
612
        "\t     f_piped_imag[5] <= f_piped_imag[4];\n"
613 29 dgisselq
"\n"
614 36 dgisselq
        "\t     f_piped_real[6] <= f_piped_real[5];\n"
615
        "\t     f_piped_imag[6] <= f_piped_imag[5];\n"
616 29 dgisselq
"\n"
617 36 dgisselq
        "\t     f_piped_real[7] <= f_piped_real[6];\n"
618
        "\t     f_piped_imag[7] <= f_piped_imag[6];\n"
619
        "\tend\n"
620 29 dgisselq
"\n"
621 36 dgisselq
        "\treg  f_rsyncd;\n"
622
        "\twire f_syncd;\n"
623 29 dgisselq
"\n"
624 36 dgisselq
        "\tinitial      f_rsyncd = 0;\n"
625 29 dgisselq
        "\talways @(posedge i_clk)\n"
626 36 dgisselq
        "\tif(i_reset)\n"
627
        "\t     f_rsyncd <= 1'b0;\n"
628
        "\telse if (!f_rsyncd)\n"
629
        "\t     f_rsyncd <= (o_sync);\n"
630
        "\tassign       f_syncd = (f_rsyncd)||(o_sync);\n"
631 29 dgisselq
"\n"
632 36 dgisselq
        "\treg  [1:0]   f_state;\n"
633 35 dgisselq
"\n"
634 2 dgisselq
"\n"
635 36 dgisselq
        "\tinitial      f_state = 0;\n"
636
        "\talways @(posedge i_clk)\n"
637
        "\tif (i_reset)\n"
638
        "\t     f_state <= 0;\n"
639
        "\telse if ((i_ce)&&((!wait_for_sync)||(i_sync)))\n"
640
        "\t     f_state <= f_state + 1;\n"
641 2 dgisselq
"\n"
642 36 dgisselq
        "\talways @(*)\n"
643
        "\tif (f_state != 0)\n"
644
        "\t     assume(!i_sync);\n"
645 2 dgisselq
"\n"
646 5 dgisselq
        "\talways @(posedge i_clk)\n"
647 36 dgisselq
        "\t     assert(f_state[1:0] == iaddr[1:0]);\n"
648 2 dgisselq
"\n"
649 36 dgisselq
        "\twire signed [2*IWIDTH-1:0]   f_i_real, f_i_imag;\n"
650
        "\tassign                       f_i_real = i_data[2*IWIDTH-1:IWIDTH];\n"
651
        "\tassign                       f_i_imag = i_data[  IWIDTH-1:0];\n"
652 26 dgisselq
"\n"
653 36 dgisselq
        "\twire signed [OWIDTH-1:0]     f_o_real, f_o_imag;\n"
654
        "\tassign                       f_o_real = o_data[2*OWIDTH-1:OWIDTH];\n"
655
        "\tassign                       f_o_imag = o_data[  OWIDTH-1:0];\n"
656
"\n"
657 26 dgisselq
        "\talways @(posedge i_clk)\n"
658 36 dgisselq
        "\tif (f_state == 2'b11)\n"
659
        "\tbegin\n"
660
        "\t     assume(f_piped_real[0] != 3'sb100);\n"
661
        "\t     assume(f_piped_real[2] != 3'sb100);\n"
662
        "\t     assert(sum_r  == f_piped_real[2] + f_piped_real[0]);\n"
663
        "\t     assert(sum_i  == f_piped_imag[2] + f_piped_imag[0]);\n"
664 26 dgisselq
"\n"
665 36 dgisselq
        "\t     assert(diff_r == f_piped_real[2] - f_piped_real[0]);\n"
666
        "\t     assert(diff_i == f_piped_imag[2] - f_piped_imag[0]);\n"
667
        "\tend\n"
668 26 dgisselq
"\n"
669 36 dgisselq
        "\talways @(posedge i_clk)\n"
670
        "\tif ((f_state == 2'b00)&&((f_syncd)||(iaddr >= 4)))\n"
671
        "\tbegin\n"
672
        "\t     assert(rnd_sum_r  == f_piped_real[3]+f_piped_real[1]);\n"
673
        "\t     assert(rnd_sum_i  == f_piped_imag[3]+f_piped_imag[1]);\n"
674
        "\t     assert(rnd_diff_r == f_piped_real[3]-f_piped_real[1]);\n"
675
        "\t     assert(rnd_diff_i == f_piped_imag[3]-f_piped_imag[1]);\n"
676
        "\tend\n"
677 26 dgisselq
"\n"
678
        "\talways @(posedge i_clk)\n"
679 36 dgisselq
        "\tif ((f_state == 2'b10)&&(f_syncd))\n"
680
        "\tbegin\n"
681
        "\t     // assert(o_sync);\n"
682
        "\t     assert(f_o_real == f_piped_real[5] + f_piped_real[3]);\n"
683
        "\t     assert(f_o_imag == f_piped_imag[5] + f_piped_imag[3]);\n"
684
        "\tend\n"
685
"\n"
686 26 dgisselq
        "\talways @(posedge i_clk)\n"
687 36 dgisselq
        "\tif ((f_state == 2'b11)&&(f_syncd))\n"
688
        "\tbegin\n"
689
        "\t     assert(!o_sync);\n"
690
        "\t     assert(f_o_real == f_piped_real[5] + f_piped_real[3]);\n"
691
        "\t     assert(f_o_imag == f_piped_imag[5] + f_piped_imag[3]);\n"
692
        "\tend\n"
693 26 dgisselq
"\n"
694
        "\talways @(posedge i_clk)\n"
695 36 dgisselq
        "\tif ((f_state == 2'b00)&&(f_syncd))\n"
696
        "\tbegin\n"
697
        "\t     assert(!o_sync);\n"
698
        "\t     assert(f_o_real == f_piped_real[7] - f_piped_real[5]);\n"
699
        "\t     assert(f_o_imag == f_piped_imag[7] - f_piped_imag[5]);\n"
700
        "\tend\n"
701 26 dgisselq
"\n"
702 36 dgisselq
        "\talways @(*)\n"
703
        "\tif ((iaddr[2:0] == 0)&&(!wait_for_sync))\n"
704
        "\t     assume(i_sync);\n"
705 26 dgisselq
"\n"
706 36 dgisselq
        "\talways @(*)\n"
707
        "\tif (wait_for_sync)\n"
708
        "\t     assert((iaddr == 0)&&(f_state == 2'b00)&&(!o_sync)&&(!f_rsyncd));\n"
709 2 dgisselq
"\n"
710
        "\talways @(posedge i_clk)\n"
711 36 dgisselq
        "\tif ((f_past_valid)&&($past(i_ce))&&($past(i_sync))&&(!$past(i_reset)))\n"
712
        "\t     assert(!wait_for_sync);\n"
713 14 dgisselq
"\n"
714 26 dgisselq
        "\talways @(posedge i_clk)\n"
715 36 dgisselq
        "\tif ((f_state == 2'b01)&&(f_syncd))\n"
716 2 dgisselq
        "\tbegin\n"
717 36 dgisselq
        "\t     assert(!o_sync);\n"
718
        "\t     if (INVERSE)\n"
719
        "\t     begin\n"
720
        "\t             assert(f_o_real == -f_piped_imag[7]+f_piped_imag[5]);\n"
721
        "\t             assert(f_o_imag ==  f_piped_real[7]-f_piped_real[5]);\n"
722
        "\t     end else begin\n"
723
        "\t             assert(f_o_real ==  f_piped_imag[7]-f_piped_imag[5]);\n"
724
        "\t             assert(f_o_imag == -f_piped_real[7]+f_piped_real[5]);\n"
725
        "\t     end\n"
726 2 dgisselq
        "\tend\n"
727
"\n"
728 36 dgisselq
"`endif\n");
729
        }
730 23 dgisselq
 
731 36 dgisselq
        fprintf(fp, "endmodule\n");
732
}
733 26 dgisselq
 
734 24 dgisselq
 
735 36 dgisselq
void    build_sngllast(const char *fname, const bool async_reset = false) {
736 22 dgisselq
        FILE    *fp = fopen(fname, "w");
737
        if (NULL == fp) {
738
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
739
                perror("O/S Err was:");
740
                return;
741
        }
742
 
743 36 dgisselq
        std::string     resetw("i_reset");
744
        if (async_reset)
745
                resetw = std::string("i_areset_n");
746 23 dgisselq
 
747 22 dgisselq
        fprintf(fp,
748 36 dgisselq
SLASHLINE
749 22 dgisselq
"//\n"
750 36 dgisselq
"// Filename:\tlaststage.v\n"
751 22 dgisselq
"//\n"
752
"// Project:    %s\n"
753
"//\n"
754 36 dgisselq
"// Purpose:    This is part of an FPGA implementation that will process\n"
755
"//             the final stage of a decimate-in-frequency FFT, running\n"
756
"//     through the data at one sample per clock.\n"
757 22 dgisselq
"//\n"
758
"//\n%s"
759
"//\n", prjname, creator);
760
        fprintf(fp, "%s", cpyleft);
761 35 dgisselq
        fprintf(fp, "//\n//\n`default_nettype\tnone\n//\n");
762 36 dgisselq
 
763 22 dgisselq
        fprintf(fp,
764 36 dgisselq
"module laststage(i_clk, %s, i_ce, i_sync, i_val, o_val, o_sync);\n"
765
"       parameter       IWIDTH=16,OWIDTH=IWIDTH+1, SHIFT=0;\n"
766 37 dgisselq
"       input   wire                            i_clk, %s, i_ce, i_sync;\n"
767
"       input   wire    [(2*IWIDTH-1):0]        i_val;\n"
768 36 dgisselq
"       output  wire    [(2*OWIDTH-1):0]        o_val;\n"
769
"       output  reg                             o_sync;\n\n",
770
                resetw.c_str(), resetw.c_str());
771
 
772 22 dgisselq
        fprintf(fp,
773 36 dgisselq
"       reg     signed  [(IWIDTH-1):0]  m_r, m_i;\n"
774
"       wire    signed  [(IWIDTH-1):0]  i_r, i_i;\n"
775 22 dgisselq
"\n"
776 36 dgisselq
"       assign  i_r = i_val[(2*IWIDTH-1):(IWIDTH)]; \n"
777
"       assign  i_i = i_val[(IWIDTH-1):0]; \n"
778 22 dgisselq
"\n"
779 36 dgisselq
"       // Don't forget that we accumulate a bit by adding two values\n"
780
"       // together. Therefore our intermediate value must have one more\n"
781
"       // bit than the two originals.\n"
782
"       reg     signed  [(IWIDTH):0]    rnd_r, rnd_i, sto_r, sto_i;\n"
783
"       reg                             wait_for_sync, stage;\n"
784
"       reg             [1:0]           sync_pipe;\n"
785 22 dgisselq
"\n"
786 36 dgisselq
"       initial wait_for_sync = 1'b1;\n"
787
"       initial stage         = 1'b0;\n");
788 22 dgisselq
 
789 36 dgisselq
        if (async_reset)
790
                fprintf(fp, "\talways @(posedge i_clk, negedge i_areset_n)\n\t\tif (!i_areset_n)\n");
791
        else
792
                fprintf(fp, "\talways @(posedge i_clk)\n\t\tif (i_reset)\n");
793 33 dgisselq
        fprintf(fp,
794 36 dgisselq
"               begin\n"
795
"                       wait_for_sync <= 1'b1;\n"
796
"                       stage         <= 1'b0;\n"
797
"               end else if ((i_ce)&&((!wait_for_sync)||(i_sync))&&(!stage))\n"
798
"               begin\n"
799
"                       wait_for_sync <= 1'b0;\n"
800
"                       //\n"
801
"                       stage <= 1'b1;\n"
802
"                       //\n"
803
"               end else if (i_ce)\n"
804
"                       stage <= 1'b0;\n\n");
805 22 dgisselq
 
806 36 dgisselq
        fprintf(fp, "\tinitial\tsync_pipe = 0;\n");
807
        if (async_reset)
808
                fprintf(fp,
809
                "\talways @(posedge i_clk, negedge i_areset_n)\n"
810
                "\tif (!i_areset_n)\n");
811
        else
812
                fprintf(fp,
813
                "\talways @(posedge i_clk)\n"
814
                "\tif (i_reset)\n");
815
 
816 22 dgisselq
        fprintf(fp,
817 36 dgisselq
                "\t\tsync_pipe <= 0;\n"
818
                "\telse if (i_ce)\n"
819
                "\t\tsync_pipe <= { sync_pipe[0], i_sync };\n\n");
820 23 dgisselq
 
821 36 dgisselq
        fprintf(fp, "\tinitial\to_sync = 1\'b0;\n");
822
        if (async_reset)
823
                fprintf(fp,
824
                "\talways @(posedge i_clk, negedge i_areset_n)\n"
825
                "\tif (!i_areset_n)\n");
826
        else
827
                fprintf(fp,
828
                "\talways @(posedge i_clk)\n"
829
                "\tif (i_reset)\n");
830
 
831 23 dgisselq
        fprintf(fp,
832 36 dgisselq
                "\t\to_sync <= 1\'b0;\n"
833
                "\telse if (i_ce)\n"
834
                "\t\to_sync <= sync_pipe[1];\n\n");
835 22 dgisselq
 
836
        fprintf(fp,
837 36 dgisselq
"       always @(posedge i_clk)\n"
838
"       if (i_ce)\n"
839
"       begin\n"
840
"               if (!stage)\n"
841
"               begin\n"
842
"                       // Clock 1\n"
843
"                       m_r <= i_r;\n"
844
"                       m_i <= i_i;\n"
845
"                       // Clock 3\n"
846
"                       rnd_r <= sto_r;\n"
847
"                       rnd_i <= sto_i;\n"
848
"                       //\n"
849
"               end else begin\n"
850
"                       // Clock 2\n"
851
"                       rnd_r <= m_r + i_r;\n"
852
"                       rnd_i <= m_i + i_i;\n"
853
"                       //\n"
854
"                       sto_r <= m_r - i_r;\n"
855
"                       sto_i <= m_i - i_i;\n"
856
"                       //\n"
857
"               end\n"
858
"       end\n"
859 22 dgisselq
"\n"
860 36 dgisselq
"       // Now that we have our results, let's round them and report them\n"
861
"       wire    signed  [(OWIDTH-1):0]  o_r, o_i;\n"
862 2 dgisselq
"\n"
863 36 dgisselq
"       convround #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_r(i_clk, i_ce, rnd_r, o_r);\n"
864
"       convround #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_i(i_clk, i_ce, rnd_i, o_i);\n"
865 2 dgisselq
"\n"
866 36 dgisselq
"       assign  o_val  = { o_r, o_i };\n"
867
"\n");
868 2 dgisselq
 
869 14 dgisselq
 
870 36 dgisselq
        if (formal_property_flag) {
871
                fprintf(fp,
872
        "`ifdef FORMAL\n"
873
                "\treg  f_past_valid;\n"
874
                "\tinitial      f_past_valid = 1'b0;\n"
875
                "\talways @(posedge i_clk)\n"
876
                "\t     f_past_valid <= 1'b1;\n"
877
        "\n"
878
        "`ifdef LASTSTAGE\n"
879
                "\talways @(posedge i_clk)\n"
880
                "\t     assume((i_ce)||($past(i_ce))||($past(i_ce,2)));\n"
881
        "`endif\n"
882
        "\n"
883
                "\tinitial      assert(IWIDTH+1 == OWIDTH);\n"
884
        "\n"
885
                "\treg  signed  [IWIDTH-1:0]    f_piped_real    [0:3];\n"
886
                "\treg  signed  [IWIDTH-1:0]    f_piped_imag    [0:3];\n"
887
                "\talways @(posedge i_clk)\n"
888
                "\tif (i_ce)\n"
889
                "\tbegin\n"
890
                "\t     f_piped_real[0] <= i_val[2*IWIDTH-1:IWIDTH];\n"
891
                "\t     f_piped_imag[0] <= i_val[  IWIDTH-1:0];\n"
892
        "\n"
893
                "\t     f_piped_real[1] <= f_piped_real[0];\n"
894
                "\t     f_piped_imag[1] <= f_piped_imag[0];\n"
895
        "\n"
896
                "\t     f_piped_real[2] <= f_piped_real[1];\n"
897
                "\t     f_piped_imag[2] <= f_piped_imag[1];\n"
898
        "\n"
899
                "\t     f_piped_real[3] <= f_piped_real[2];\n"
900
                "\t     f_piped_imag[3] <= f_piped_imag[2];\n"
901
                "\tend\n"
902
        "\n"
903
                "\twire f_syncd;\n"
904
                "\treg  f_rsyncd;\n"
905
        "\n"
906
                "\tinitial      f_rsyncd        = 0;\n"
907
                "\talways @(posedge i_clk)\n"
908
                "\tif (i_reset)\n"
909
                "\t     f_rsyncd <= 1'b0;\n"
910
                "\telse if (!f_rsyncd)\n"
911
                "\t     f_rsyncd <= o_sync;\n"
912
                "\tassign       f_syncd = (f_rsyncd)||(o_sync);\n"
913
        "\n"
914
                "\treg  f_state;\n"
915
                "\tinitial      f_state = 0;\n"
916
                "\talways @(posedge i_clk)\n"
917
                "\tif (i_reset)\n"
918
                "\t     f_state <= 0;\n"
919
                "\telse if ((i_ce)&&((!wait_for_sync)||(i_sync)))\n"
920
                "\t     f_state <= f_state + 1;\n"
921
        "\n"
922
                "\talways @(*)\n"
923
                "\tif (f_state != 0)\n"
924
                "\t     assume(!i_sync);\n"
925
        "\n"
926
                "\talways @(*)\n"
927
                "\t     assert(stage == f_state[0]);\n"
928
        "\n"
929
                "\talways @(posedge i_clk)\n"
930
                "\tif ((f_state == 1'b1)&&(f_syncd))\n"
931
                "\tbegin\n"
932
                "\t     assert(o_r == f_piped_real[2] + f_piped_real[1]);\n"
933
                "\t     assert(o_i == f_piped_imag[2] + f_piped_imag[1]);\n"
934
                "\tend\n"
935
        "\n"
936
                "\talways @(posedge i_clk)\n"
937
                "\tif ((f_state == 1'b0)&&(f_syncd))\n"
938
                "\tbegin\n"
939
                "\t     assert(!o_sync);\n"
940
                "\t     assert(o_r == f_piped_real[3] - f_piped_real[2]);\n"
941
                "\t     assert(o_i == f_piped_imag[3] - f_piped_imag[2]);\n"
942
                "\tend\n"
943
        "\n"
944
                "\talways @(*)\n"
945
                "\tif (wait_for_sync)\n"
946
                "\tbegin\n"
947
                "\t     assert(!f_rsyncd);\n"
948
                "\t     assert(!o_sync);\n"
949
                "\t     assert(f_state == 0);\n"
950
                "\tend\n\n");
951 2 dgisselq
        }
952
 
953 36 dgisselq
        fprintf(fp,
954
"`endif // FORMAL\n"
955
"endmodule\n");
956 23 dgisselq
 
957 36 dgisselq
        fclose(fp);
958 2 dgisselq
}
959
 
960
void    usage(void) {
961
        fprintf(stderr,
962 26 dgisselq
"USAGE:\tfftgen [-f <size>] [-d dir] [-c cbits] [-n nbits] [-m mxbits] [-s]\n"
963 2 dgisselq
// "\tfftgen -i\n"
964 36 dgisselq
"\t-1\tBuild a normal FFT, running at one clock per complex sample, or\n"
965
"\t\t(for a real FFT) at one clock per two real input samples.\n"
966
"\t-a <hdrname>  Create a header of information describing the built-in\n"
967
"\t\tparameters, useful for module-level testing with Verilator\n"
968 2 dgisselq
"\t-c <cbits>\tCauses all internal complex coefficients to be\n"
969
"\t\tlonger than the corresponding data bits, to help avoid\n"
970 32 dgisselq
"\t\tcoefficient truncation errors.  The default is %d bits longer\n"
971 26 dgisselq
"\t\tthan the data bits.\n"
972 36 dgisselq
"\t-d <dir>  Places all of the generated verilog files into <dir>.\n"
973
"\t\tThe default is a subdirectory of the current directory\n"
974
"\t\tnamed %s.\n"
975
"\t-f <size>  Sets the size of the FFT as the number of complex\n"
976 26 dgisselq
"\t\tsamples input to the transform.  (No default value, this is\n"
977
"\t\ta required parameter.)\n"
978
"\t-i\tAn inverse FFT, meaning that the coefficients are\n"
979
"\t\tgiven by e^{ j 2 pi k/N n }.  The default is a forward FFT, with\n"
980
"\t\tcoefficients given by e^{ -j 2 pi k/N n }.\n"
981 36 dgisselq
"\t-k #\tSets # clocks per sample, used to minimize multiplies.  Also\n"
982
"\t\tsets one sample in per i_ce clock (opt -1)\n"
983 2 dgisselq
"\t-m <mxbits>\tSets the maximum bit width that the FFT should ever\n"
984
"\t\tproduce.  Internal values greater than this value will be\n"
985 26 dgisselq
"\t\ttruncated to this value.  (The default value grows the input\n"
986
"\t\tsize by one bit for every two FFT stages.)\n"
987 22 dgisselq
"\t-n <nbits>\tSets the bitwidth for values coming into the (i)FFT.\n"
988 26 dgisselq
"\t\tThe default is %d bits input for each component of the two\n"
989
"\t\tcomplex values into the FFT.\n"
990 36 dgisselq
"\t-p <nmpy>  Sets the number of hardware multiplies (DSPs) to use, versus\n"
991
"\t\tshift-add emulation.  The default is not to use any hardware\n"
992
"\t\tmultipliers.\n"
993 26 dgisselq
"\t-r\tBuild a real-FFT at four input points per sample, rather than a\n"
994
"\t\tcomplex FFT.  (Default is a Complex FFT.)\n"
995 37 dgisselq
"\t\tThis option is a place-holder.  The real-FFT has not (yet) been\n"
996
"\t\timplemented.\n"
997 2 dgisselq
"\t-s\tSkip the final bit reversal stage.  This is useful in\n"
998
"\t\talgorithms that need to apply a filter without needing to do\n"
999
"\t\tbin shifting, as these algorithms can, with this option, just\n"
1000
"\t\tmultiply by a bit reversed correlation sequence and then\n"
1001 22 dgisselq
"\t\tinverse FFT the (still bit reversed) result.  (You would need\n"
1002
"\t\ta decimation in time inverse to do this, which this program does\n"
1003
"\t\tnot yet provide.)\n"
1004 2 dgisselq
"\t-S\tInclude the final bit reversal stage (default).\n"
1005 22 dgisselq
"\t-x <xtrabits>\tUse this many extra bits internally, before any final\n"
1006 36 dgisselq
"\t\trounding or truncation of the answer to the final number of\n"
1007
"\t\tbits.  The default is to use %d extra bits internally.\n",
1008 26 dgisselq
/*
1009 2 dgisselq
"\t-0\tA forward FFT (default), meaning that the coefficients are\n"
1010
"\t\tgiven by e^{-j 2 pi k/N n }.\n"
1011
"\t-1\tAn inverse FFT, meaning that the coefficients are\n"
1012 26 dgisselq
"\t\tgiven by e^{ j 2 pi k/N n }.\n",
1013
*/
1014
        DEF_XTRACBITS, DEF_COREDIR, DEF_NBITSIN, DEF_XTRAPBITS);
1015 2 dgisselq
}
1016
 
1017
// Features still needed:
1018
//      Interactivity.
1019
int main(int argc, char **argv) {
1020
        int     fftsize = -1, lgsize = -1;
1021 26 dgisselq
        int     nbitsin = DEF_NBITSIN, xtracbits = DEF_XTRACBITS,
1022 36 dgisselq
                        nummpy=DEF_NMPY, nmpypstage=6, mpy_stages;
1023
        int     nbitsout, maxbitsout = -1, xtrapbits=DEF_XTRAPBITS, ckpce = 0;
1024
        const char *EMPTYSTR = "";
1025 26 dgisselq
        bool    bitreverse = true, inverse=false,
1026 36 dgisselq
                verbose_flag = false,
1027 37 dgisselq
                single_clock = true,
1028 36 dgisselq
                real_fft = false,
1029
                async_reset = false;
1030 2 dgisselq
        FILE    *vmain;
1031 28 dgisselq
        std::string     coredir = DEF_COREDIR, cmdline = "", hdrname = "";
1032 23 dgisselq
        ROUND_T rounding = RND_CONVERGENT;
1033
        // ROUND_T      rounding = RND_HALFUP;
1034 2 dgisselq
 
1035 26 dgisselq
        bool    dbg = false;
1036
        int     dbgstage = 128;
1037
 
1038 2 dgisselq
        if (argc <= 1)
1039
                usage();
1040
 
1041 36 dgisselq
        // Copy the original command line before we mess with it
1042 14 dgisselq
        cmdline = argv[0];
1043 2 dgisselq
        for(int argn=1; argn<argc; argn++) {
1044 14 dgisselq
                cmdline += " ";
1045
                cmdline += argv[argn];
1046
        }
1047
 
1048 36 dgisselq
        { int c;
1049
        while((c = getopt(argc, argv, "12Aa:c:d:D:f:hik:m:n:p:rsSx:v")) != -1) {
1050
                switch(c) {
1051
                case '1':       single_clock = true;  break;
1052
                case '2':       single_clock = false; break;
1053
                case 'A':       async_reset  = true;  break;
1054
                case 'a':       hdrname = strdup(optarg);       break;
1055
                case 'c':       xtracbits = atoi(optarg);       break;
1056
                case 'd':       coredir = std::string(optarg);  break;
1057
                case 'D':       dbgstage = atoi(optarg);        break;
1058
                case 'f':       fftsize = atoi(optarg);
1059
                                { int sln = strlen(optarg);
1060
                                if (!isdigit(optarg[sln-1])){
1061
                                        switch(optarg[sln-1]) {
1062
                                        case 'k': case 'K':
1063
                                                fftsize <<= 10;
1064 2 dgisselq
                                                break;
1065 36 dgisselq
                                        case 'm': case 'M':
1066
                                                fftsize <<= 20;
1067 2 dgisselq
                                                break;
1068 36 dgisselq
                                        case 'g': case 'G':
1069
                                                fftsize <<= 30;
1070 28 dgisselq
                                                break;
1071 33 dgisselq
                                        default:
1072 36 dgisselq
                                                printf("ERR: Unknown FFT size, %s!\n", optarg);
1073
                                                exit(EXIT_FAILURE);
1074
                                        }
1075
                                }} break;
1076
                case 'h':       usage(); exit(EXIT_SUCCESS);    break;
1077
                case 'i':       inverse = true;                 break;
1078
                case 'k':       ckpce = atoi(optarg);
1079
                                single_clock = true;
1080
                                break;
1081
                case 'm':       maxbitsout = atoi(optarg);      break;
1082
                case 'n':       nbitsin = atoi(optarg);         break;
1083
                case 'p':       nummpy = atoi(optarg);          break;
1084
                case 'r':       real_fft = true;                break;
1085
                case 'S':       bitreverse = true;              break;
1086
                case 's':       bitreverse = false;             break;
1087
                case 'x':       xtrapbits = atoi(optarg);       break;
1088
                case 'v':       verbose_flag = true;            break;
1089
                // case 'z':    variable_size = true;           break;
1090
                default:
1091
                        printf("Unknown argument, -%c\n", c);
1092 2 dgisselq
                        usage();
1093 36 dgisselq
                        exit(EXIT_FAILURE);
1094 2 dgisselq
                }
1095 36 dgisselq
        }}
1096
 
1097
        if (verbose_flag) {
1098
                if (inverse)
1099
                        printf("Building a %d point inverse FFT module, with %s outputs\n",
1100
                                fftsize,
1101
                                (real_fft)?"real ":"complex");
1102
                else
1103
                        printf("Building a %d point %sforward FFT module\n",
1104
                                fftsize,
1105
                                (real_fft)?"real ":"");
1106
                if (!single_clock)
1107
                        printf("  that accepts two inputs per clock\n");
1108
                if (async_reset)
1109
                        printf("  using a negative logic ASYNC reset\n");
1110
 
1111
                printf("The core will be placed into the %s/ directory\n", coredir.c_str());
1112
 
1113
                if (hdrname[0])
1114
                        printf("A C header file, %s, will be written capturing these\n"
1115
                                "options for a Verilator testbench\n",
1116
                                        hdrname.c_str());
1117
                // nummpy
1118
                // xtrapbits
1119 2 dgisselq
        }
1120
 
1121 26 dgisselq
        if (real_fft) {
1122
                printf("The real FFT option is not implemented yet, but still on\nmy to do list.  Please try again later.\n");
1123 36 dgisselq
                exit(EXIT_FAILURE);
1124
        }
1125
 
1126
        if (ckpce < 1)
1127
                ckpce = 1;
1128
        if (!bitreverse) {
1129 26 dgisselq
                printf("WARNING: While I can skip the bit reverse stage, the code to do\n");
1130
                printf("an inverse FFT on a bit--reversed input has not yet been\n");
1131
                printf("built.\n");
1132
        }
1133
 
1134 2 dgisselq
        if ((lgsize < 0)&&(fftsize > 1)) {
1135
                for(lgsize=1; (1<<lgsize) < fftsize; lgsize++)
1136
                        ;
1137
        }
1138
 
1139
        if ((fftsize <= 0)||(nbitsin < 1)||(nbitsin>48)) {
1140
                printf("INVALID PARAMETERS!!!!\n");
1141 36 dgisselq
                exit(EXIT_FAILURE);
1142 2 dgisselq
        }
1143
 
1144
 
1145
        if (nextlg(fftsize) != fftsize) {
1146
                fprintf(stderr, "ERR: FFTSize (%d) *must* be a power of two\n",
1147
                                fftsize);
1148 36 dgisselq
                exit(EXIT_FAILURE);
1149 2 dgisselq
        } else if (fftsize < 2) {
1150
                fprintf(stderr, "ERR: Minimum FFTSize is 2, not %d\n",
1151
                                fftsize);
1152
                if (fftsize == 1) {
1153
                        fprintf(stderr, "You do realize that a 1 point FFT makes very little sense\n");
1154
                        fprintf(stderr, "in an FFT operation that handles two samples per clock?\n");
1155
                        fprintf(stderr, "If you really need to do an FFT of this size, the output\n");
1156
                        fprintf(stderr, "can be connected straight to the input.\n");
1157
                } else {
1158
                        fprintf(stderr, "Indeed, a size of %d doesn\'t make much sense to me at all.\n", fftsize);
1159
                        fprintf(stderr, "Is such an operation even defined?\n");
1160
                }
1161 36 dgisselq
                exit(EXIT_FAILURE);
1162 2 dgisselq
        }
1163
 
1164
        // Calculate how many output bits we'll have, and what the log
1165
        // based two size of our FFT is.
1166
        {
1167
                int     tmp_size = fftsize;
1168
 
1169
                // The first stage always accumulates one bit, regardless
1170
                // of whether you need to or not.
1171
                nbitsout = nbitsin + 1;
1172
                tmp_size >>= 1;
1173
 
1174
                while(tmp_size > 4) {
1175
                        nbitsout += 1;
1176
                        tmp_size >>= 2;
1177
                }
1178
 
1179
                if (tmp_size > 1)
1180
                        nbitsout ++;
1181
 
1182
                if (fftsize <= 2)
1183
                        bitreverse = false;
1184
        } if ((maxbitsout > 0)&&(nbitsout > maxbitsout))
1185
                nbitsout = maxbitsout;
1186
 
1187 36 dgisselq
        if (verbose_flag) {
1188
                printf("Output samples will be %d bits wide\n", nbitsout);
1189
                printf("This %sFFT will take %d-bit samples in, and produce %d samples out\n", (inverse)?"i":"", nbitsin, nbitsout);
1190
                if (maxbitsout > 0)
1191
                        printf("  Internally, it will allow items to accumulate to %d bits\n", maxbitsout);
1192
                printf("  Twiddle-factors of %d bits will be used\n",
1193
                        nbitsin+xtracbits);
1194
                if (!bitreverse)
1195
                printf("  The output will be left in bit-reversed order\n");
1196
        }
1197
 
1198 22 dgisselq
        // Figure out how many multiply stages to use, and how many to skip
1199 36 dgisselq
        if (!single_clock) {
1200
                nmpypstage = 6;
1201
        } else if (ckpce <= 1) {
1202
                nmpypstage = 3;
1203
        } else if (ckpce == 2) {
1204
                nmpypstage = 2;
1205
        } else
1206
                nmpypstage = 1;
1207 2 dgisselq
 
1208 36 dgisselq
        mpy_stages = nummpy / nmpypstage;
1209
        if (mpy_stages > lgval(fftsize)-2)
1210
                mpy_stages = lgval(fftsize)-2;
1211 22 dgisselq
 
1212 2 dgisselq
        {
1213
                struct stat     sbuf;
1214 14 dgisselq
                if (lstat(coredir.c_str(), &sbuf)==0) {
1215 2 dgisselq
                        if (!S_ISDIR(sbuf.st_mode)) {
1216 14 dgisselq
                                fprintf(stderr, "\'%s\' already exists, and is not a directory!\n", coredir.c_str());
1217 2 dgisselq
                                fprintf(stderr, "I will stop now, lest I overwrite something you care about.\n");
1218
                                fprintf(stderr, "To try again, please remove this file.\n");
1219 36 dgisselq
                                exit(EXIT_FAILURE);
1220 2 dgisselq
                        }
1221 33 dgisselq
                } else
1222 14 dgisselq
                        mkdir(coredir.c_str(), 0755);
1223
                if (access(coredir.c_str(), X_OK|W_OK) != 0) {
1224
                        fprintf(stderr, "I have no access to the directory \'%s\'.\n", coredir.c_str());
1225 36 dgisselq
                        exit(EXIT_FAILURE);
1226 2 dgisselq
                }
1227
        }
1228
 
1229 28 dgisselq
        if (hdrname.length() > 0) {
1230
                FILE    *hdr = fopen(hdrname.c_str(), "w");
1231
                if (hdr == NULL) {
1232
                        fprintf(stderr, "ERROR: Cannot open %s to create header file\n", hdrname.c_str());
1233
                        perror("O/S Err:");
1234 36 dgisselq
                        exit(EXIT_FAILURE);
1235 28 dgisselq
                }
1236
 
1237 36 dgisselq
                fprintf(hdr,
1238
SLASHLINE
1239
"//\n"
1240
"// Filename:\t%s\n"
1241
"//\n"
1242
"// Project:\t%s\n"
1243
"//\n"
1244
"// Purpose:    This simple header file captures the internal constants\n"
1245
"//             within the FFT that were used to build it, for the purpose\n"
1246
"//     of making C++ integration (and test bench testing) simpler.  That is,\n"
1247
"//     should the FFT change size, this will note that size change and thus\n"
1248
"//     any test bench or other C++ program dependent upon either the size of\n"
1249
"//     the FFT, the number of bits in or out of it, etc., can pick up the\n"
1250
"//     changes in the defines found within this file.\n"
1251
"//\n",
1252
                hdrname.c_str(), prjname);
1253 28 dgisselq
                fprintf(hdr, "%s", creator);
1254
                fprintf(hdr, "//\n");
1255
                fprintf(hdr, "%s", cpyleft);
1256
                fprintf(hdr, "//\n"
1257
                "//\n"
1258
                "#ifndef %sFFTHDR_H\n"
1259
                "#define %sFFTHDR_H\n"
1260
                "\n"
1261
                "#define\t%sFFT_IWIDTH\t%d\n"
1262
                "#define\t%sFFT_OWIDTH\t%d\n"
1263
                "#define\t%sFFT_LGWIDTH\t%d\n"
1264
                "#define\t%sFFT_SIZE\t(1<<%sFFT_LGWIDTH)\n\n",
1265
                        (inverse)?"I":"", (inverse)?"I":"",
1266
                        (inverse)?"I":"", nbitsin,
1267
                        (inverse)?"I":"", nbitsout,
1268
                        (inverse)?"I":"", lgsize,
1269
                        (inverse)?"I":"", (inverse)?"I":"");
1270 36 dgisselq
                if (ckpce > 0)
1271
                        fprintf(hdr, "#define\t%sFFT_CKPCE\t%d\t// Clocks per CE\n",
1272
                                (inverse)?"I":"", ckpce);
1273
                else
1274
                        fprintf(hdr, "// Two samples per i_ce\n");
1275 28 dgisselq
                if (!bitreverse)
1276
                        fprintf(hdr, "#define\t%sFFT_SKIPS_BIT_REVERSE\n",
1277
                                (inverse)?"I":"");
1278
                if (real_fft)
1279
                        fprintf(hdr, "#define\tRL%sFFT\n\n", (inverse)?"I":"");
1280
                if (!single_clock)
1281
                        fprintf(hdr, "#define\tDBLCLK%sFFT\n\n", (inverse)?"I":"");
1282 36 dgisselq
                else
1283
                        fprintf(hdr, "// #define\tDBLCLK%sFFT // this FFT takes one input sample per clock\n\n", (inverse)?"I":"");
1284 29 dgisselq
                if (USE_OLD_MULTIPLY)
1285
                        fprintf(hdr, "#define\tUSE_OLD_MULTIPLY\n\n");
1286 33 dgisselq
 
1287 29 dgisselq
                fprintf(hdr, "// Parameters for testing the longbimpy\n");
1288
                fprintf(hdr, "#define\tTST_LONGBIMPY_AW\t%d\n", TST_LONGBIMPY_AW);
1289
#ifdef  TST_LONGBIMPY_BW
1290
                fprintf(hdr, "#define\tTST_LONGBIMPY_BW\t%d\n\n", TST_LONGBIMPY_BW);
1291
#else
1292
                fprintf(hdr, "#define\tTST_LONGBIMPY_BW\tTST_LONGBIMPY_AW\n\n");
1293
#endif
1294
 
1295
                fprintf(hdr, "// Parameters for testing the shift add multiply\n");
1296
                fprintf(hdr, "#define\tTST_SHIFTADDMPY_AW\t%d\n", TST_SHIFTADDMPY_AW);
1297
#ifdef  TST_SHIFTADDMPY_BW
1298
                fprintf(hdr, "#define\tTST_SHIFTADDMPY_BW\t%d\n\n", TST_SHIFTADDMPY_BW);
1299
#else
1300
                fprintf(hdr, "#define\tTST_SHIFTADDMPY_BW\tTST_SHIFTADDMPY_AW\n\n");
1301
#endif
1302
 
1303
#define TST_SHIFTADDMPY_AW      16
1304
#define TST_SHIFTADDMPY_BW      20      // Leave undefined to match AW
1305
                fprintf(hdr, "// Parameters for testing the butterfly\n");
1306
                fprintf(hdr, "#define\tTST_BUTTERFLY_IWIDTH\t%d\n", TST_BUTTERFLY_IWIDTH);
1307
                fprintf(hdr, "#define\tTST_BUTTERFLY_CWIDTH\t%d\n", TST_BUTTERFLY_CWIDTH);
1308
                fprintf(hdr, "#define\tTST_BUTTERFLY_OWIDTH\t%d\n", TST_BUTTERFLY_OWIDTH);
1309
                fprintf(hdr, "#define\tTST_BUTTERFLY_MPYDELAY\t%d\n\n",
1310
                                bflydelay(TST_BUTTERFLY_IWIDTH,
1311
                                        TST_BUTTERFLY_CWIDTH-TST_BUTTERFLY_IWIDTH));
1312
 
1313
                fprintf(hdr, "// Parameters for testing the quarter stage\n");
1314
                fprintf(hdr, "#define\tTST_QTRSTAGE_IWIDTH\t%d\n", TST_QTRSTAGE_IWIDTH);
1315
                fprintf(hdr, "#define\tTST_QTRSTAGE_LGWIDTH\t%d\n\n", TST_QTRSTAGE_LGWIDTH);
1316
 
1317
                fprintf(hdr, "// Parameters for testing the double stage\n");
1318
                fprintf(hdr, "#define\tTST_DBLSTAGE_IWIDTH\t%d\n", TST_DBLSTAGE_IWIDTH);
1319
                fprintf(hdr, "#define\tTST_DBLSTAGE_SHIFT\t%d\n\n", TST_DBLSTAGE_SHIFT);
1320
 
1321
                fprintf(hdr, "// Parameters for testing the bit reversal stage\n");
1322
                fprintf(hdr, "#define\tTST_DBLREVERSE_LGSIZE\t%d\n\n", TST_DBLREVERSE_LGSIZE);
1323 28 dgisselq
                fprintf(hdr, "\n" "#endif\n\n");
1324
                fclose(hdr);
1325
        }
1326
 
1327 14 dgisselq
        {
1328
                std::string     fname_string;
1329
 
1330
                fname_string = coredir;
1331
                fname_string += "/";
1332
                if (inverse) fname_string += "i";
1333
                fname_string += "fftmain.v";
1334
 
1335
                vmain = fopen(fname_string.c_str(), "w");
1336
                if (NULL == vmain) {
1337
                        fprintf(stderr, "Could not open \'%s\' for writing\n", fname_string.c_str());
1338
                        perror("Err from O/S:");
1339 36 dgisselq
                        exit(EXIT_FAILURE);
1340 14 dgisselq
                }
1341 36 dgisselq
 
1342
                if (verbose_flag)
1343
                        printf("Opened %s\n", fname_string.c_str());
1344 2 dgisselq
        }
1345
 
1346 36 dgisselq
        fprintf(vmain,
1347
SLASHLINE
1348
"//\n"
1349
"// Filename:\t%sfftmain.v\n"
1350
"//\n"
1351
"// Project:    %s\n"
1352
"//\n"
1353
"// Purpose:    This is the main module in the General Purpose FPGA FFT\n"
1354
"//             implementation.  As such, all other modules are subordinate\n"
1355
"//     to this one.  This module accomplish a fixed size Complex FFT on\n"
1356
"//     %d data points.\n",
1357
                (inverse)?"i":"",prjname, fftsize);
1358
        if (single_clock) {
1359
        fprintf(vmain,
1360
"//     The FFT is fully pipelined, and accepts as inputs one complex two\'s\n"
1361
"//     complement sample per clock.\n");
1362
        } else {
1363
        fprintf(vmain,
1364
"//     The FFT is fully pipelined, and accepts as inputs two complex two\'s\n"
1365
"//     complement samples per clock.\n");
1366
        }
1367
 
1368
        fprintf(vmain,
1369
"//\n"
1370
"// Parameters:\n"
1371
"//     i_clk\tThe clock.  All operations are synchronous with this clock.\n"
1372
"//     i_%sreset%s\tSynchronous reset, active high.  Setting this line will\n"
1373
"//     \t\tforce the reset of all of the internals to this routine.\n"
1374
"//     \t\tFurther, following a reset, the o_sync line will go\n"
1375
"//     \t\thigh the same time the first output sample is valid.\n",
1376
                (async_reset)?"a":"", (async_reset)?"_n":"");
1377
        if (single_clock) {
1378
                fprintf(vmain,
1379
"//     i_ce\tA clock enable line.  If this line is set, this module\n"
1380
"//     \t\twill accept one complex input value, and produce\n"
1381
"//     \t\tone (possibly empty) complex output value.\n"
1382
"//     i_sample\tThe complex input sample.  This value is split\n"
1383
"//     \t\tinto two two\'s complement numbers, %d bits each, with\n"
1384
"//     \t\tthe real portion in the high order bits, and the\n"
1385
"//     \t\timaginary portion taking the bottom %d bits.\n"
1386
"//     o_result\tThe output result, of the same format as i_sample,\n"
1387
"//     \t\tonly having %d bits for each of the real and imaginary\n"
1388
"//     \t\tcomponents, leading to %d bits total.\n"
1389
"//     o_sync\tA one bit output indicating the first sample of the FFT frame.\n"
1390
"//     \t\tIt also indicates the first valid sample out of the FFT\n"
1391
"//     \t\ton the first frame.\n", nbitsin, nbitsin, nbitsout, nbitsout*2);
1392
        } else {
1393
                fprintf(vmain,
1394
"//     i_ce\tA clock enable line.  If this line is set, this module\n"
1395
"//     \t\twill accept two complex values as inputs, and produce\n"
1396
"//     \t\ttwo (possibly empty) complex values as outputs.\n"
1397
"//     i_left\tThe first of two complex input samples.  This value is split\n"
1398
"//     \t\tinto two two\'s complement numbers, %d bits each, with\n"
1399
"//     \t\tthe real portion in the high order bits, and the\n"
1400
"//     \t\timaginary portion taking the bottom %d bits.\n"
1401
"//     i_right\tThis is the same thing as i_left, only this is the second of\n"
1402
"//     \t\ttwo such samples.  Hence, i_left would contain input\n"
1403
"//     \t\tsample zero, i_right would contain sample one.  On the\n"
1404
"//     \t\tnext clock i_left would contain input sample two,\n"
1405
"//     \t\ti_right number three and so forth.\n"
1406
"//     o_left\tThe first of two output samples, of the same format as i_left,\n"
1407
"//     \t\tonly having %d bits for each of the real and imaginary\n"
1408
"//     \t\tcomponents, leading to %d bits total.\n"
1409
"//     o_right\tThe second of two output samples produced each clock.  This has\n"
1410
"//     \t\tthe same format as o_left.\n"
1411
"//     o_sync\tA one bit output indicating the first valid sample produced by\n"
1412
"//     \t\tthis FFT following a reset.  Ever after, this will\n"
1413
"//     \t\tindicate the first sample of an FFT frame.\n",
1414
        nbitsin, nbitsin, nbitsout, nbitsout*2);
1415
        }
1416
 
1417
        fprintf(vmain,
1418
"//\n"
1419
"// Arguments:\tThis file was computer generated using the following command\n"
1420
"//\t\tline:\n"
1421
"//\n");
1422 14 dgisselq
        fprintf(vmain, "//\t\t%% %s\n", cmdline.c_str());
1423
        fprintf(vmain, "//\n");
1424 37 dgisselq
        fprintf(vmain, "//\tThis core will use hardware accelerated multiplies (DSPs)\n"
1425
                "//\tfor %d of the %d stages\n", mpy_stages, lgval(fftsize));
1426
        fprintf(vmain, "//\n");
1427 2 dgisselq
        fprintf(vmain, "%s", creator);
1428
        fprintf(vmain, "//\n");
1429
        fprintf(vmain, "%s", cpyleft);
1430 35 dgisselq
        fprintf(vmain, "//\n//\n`default_nettype\tnone\n//\n");
1431 2 dgisselq
 
1432
 
1433 36 dgisselq
        std::string     resetw("i_reset");
1434
        if (async_reset)
1435
                resetw = "i_areset_n";
1436
 
1437 2 dgisselq
        fprintf(vmain, "//\n");
1438
        fprintf(vmain, "//\n");
1439 36 dgisselq
        fprintf(vmain, "module %sfftmain(i_clk, %s, i_ce,\n",
1440
                (inverse)?"i":"", resetw.c_str());
1441
        if (single_clock) {
1442
                fprintf(vmain, "\t\ti_sample, o_result, o_sync%s);\n",
1443 26 dgisselq
                        (dbg)?", o_dbg":"");
1444 36 dgisselq
        } else {
1445
                fprintf(vmain, "\t\ti_left, i_right,\n");
1446
                fprintf(vmain, "\t\to_left, o_right, o_sync%s);\n",
1447
                        (dbg)?", o_dbg":"");
1448
        }
1449 37 dgisselq
        fprintf(vmain,
1450
        "\t// The bit-width of the input, IWIDTH, output, OWIDTH, and the log\n"
1451
        "\t// of the FFT size.  These are localparams, rather than parameters,\n"
1452
        "\t// because once the core has been generated, they can no longer be\n"
1453
        "\t// changed.  (These values can be adjusted by running the core\n"
1454
        "\t// generator again.)  The reason is simply that these values have\n"
1455
        "\t// been hardwired into the core at several places.\n");
1456
        fprintf(vmain, "\tlocalparam\tIWIDTH=%d, OWIDTH=%d, LGWIDTH=%d;\n\t//\n", nbitsin, nbitsout, lgsize);
1457 2 dgisselq
        assert(lgsize > 0);
1458 37 dgisselq
        fprintf(vmain, "\tinput\twire\t\t\t\ti_clk, %s, i_ce;\n\t//\n",
1459 36 dgisselq
                resetw.c_str());
1460
        if (single_clock) {
1461 37 dgisselq
        fprintf(vmain, "\tinput\twire\t[(2*IWIDTH-1):0]\ti_sample;\n");
1462 36 dgisselq
        fprintf(vmain, "\toutput\treg\t[(2*OWIDTH-1):0]\to_result;\n");
1463
        } else {
1464 37 dgisselq
        fprintf(vmain, "\tinput\twire\t[(2*IWIDTH-1):0]\ti_left, i_right;\n");
1465 2 dgisselq
        fprintf(vmain, "\toutput\treg\t[(2*OWIDTH-1):0]\to_left, o_right;\n");
1466 36 dgisselq
        }
1467
        fprintf(vmain, "\toutput\treg\t\t\t\to_sync;\n");
1468 26 dgisselq
        if (dbg)
1469
                fprintf(vmain, "\toutput\twire\t[33:0]\t\to_dbg;\n");
1470 2 dgisselq
        fprintf(vmain, "\n\n");
1471
 
1472
        fprintf(vmain, "\t// Outputs of the FFT, ready for bit reversal.\n");
1473 36 dgisselq
        if (single_clock)
1474
                fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_sample;\n");
1475
        else
1476
                fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_left, br_right;\n");
1477 2 dgisselq
        int     tmp_size = fftsize, lgtmp = lgsize;
1478
        if (fftsize == 2) {
1479
                if (bitreverse) {
1480
                        fprintf(vmain, "\treg\tbr_start;\n");
1481 25 dgisselq
                        fprintf(vmain, "\tinitial br_start = 1\'b0;\n");
1482 36 dgisselq
                        if (async_reset) {
1483
                                fprintf(vmain, "\talways @(posedge i_clk, negedge i_arese_n)\n");
1484
                                fprintf(vmain, "\t\tif (!i_areset_n)\n");
1485
                        } else {
1486
                                fprintf(vmain, "\talways @(posedge i_clk)\n");
1487
                                fprintf(vmain, "\t\tif (i_reset)\n");
1488
                        }
1489 26 dgisselq
                        fprintf(vmain, "\t\t\tbr_start <= 1\'b0;\n");
1490 2 dgisselq
                        fprintf(vmain, "\t\telse if (i_ce)\n");
1491 26 dgisselq
                        fprintf(vmain, "\t\t\tbr_start <= 1\'b1;\n");
1492 2 dgisselq
                }
1493
                fprintf(vmain, "\n\n");
1494 36 dgisselq
                fprintf(vmain, "\tlaststage\t#(IWIDTH)\tstage_2(i_clk, %s, i_ce,\n", resetw.c_str());
1495
                fprintf(vmain, "\t\t\t(%s%s), i_left, i_right, br_left, br_right);\n",
1496
                        (async_reset)?"":"!", resetw.c_str());
1497 2 dgisselq
                fprintf(vmain, "\n\n");
1498
        } else {
1499
                int     nbits = nbitsin, dropbit=0;
1500 26 dgisselq
                int     obits = nbits+1+xtrapbits;
1501 36 dgisselq
                std::string     cmem;
1502
                FILE    *cmemfp;
1503 26 dgisselq
 
1504
                if ((maxbitsout > 0)&&(obits > maxbitsout))
1505
                        obits = maxbitsout;
1506
 
1507 2 dgisselq
                // Always do a first stage
1508 14 dgisselq
                {
1509 22 dgisselq
                        bool    mpystage;
1510 2 dgisselq
 
1511 22 dgisselq
                        // Last two stages are always non-multiply stages
1512
                        // since the multiplies can be done by adds
1513 36 dgisselq
                        mpystage = ((lgtmp-2) <= mpy_stages);
1514 22 dgisselq
 
1515 37 dgisselq
                        fprintf(vmain, "\n\n");
1516 28 dgisselq
                        if (mpystage)
1517
                                fprintf(vmain, "\t// A hardware optimized FFT stage\n");
1518 35 dgisselq
                        fprintf(vmain, "\twire\t\tw_s%d;\n", fftsize);
1519 36 dgisselq
                        if (single_clock) {
1520
                                fprintf(vmain, "\twire\t[%d:0]\tw_d%d;\n", 2*(obits+xtrapbits)-1, fftsize);
1521 37 dgisselq
                                cmem = gen_coeff_fname(coredir.c_str(), fftsize, 1, 0, inverse);
1522 36 dgisselq
                                cmemfp = gen_coeff_open(cmem.c_str());
1523
                                gen_coeffs(cmemfp, fftsize,  nbitsin+xtracbits, 1, 0, inverse);
1524 37 dgisselq
                                cmem = gen_coeff_fname(EMPTYSTR, fftsize, 1, 0, inverse);
1525
                                fprintf(vmain, "\tfftstage%s\t#(IWIDTH,IWIDTH+%d,%d,%d,0,\n\t\t\t%d, %d, \"%s\")\n\t\tstage_%d(i_clk, %s, i_ce,\n",
1526 28 dgisselq
                                        ((dbg)&&(dbgstage == fftsize))?"_dbg":"",
1527 36 dgisselq
                                        xtracbits, obits+xtrapbits,
1528 37 dgisselq
                                        lgtmp-1, (mpystage)?1:0,
1529 36 dgisselq
                                        ckpce, cmem.c_str(),
1530
                                        fftsize, resetw.c_str());
1531
                                fprintf(vmain, "\t\t\t(%s%s), i_sample, w_d%d, w_s%d%s);\n",
1532
                                        (async_reset)?"":"!", resetw.c_str(),
1533
                                        fftsize, fftsize,
1534
                                        ((dbg)&&(dbgstage == fftsize))
1535
                                                ? ", o_dbg":"");
1536
                        } else {
1537
                                fprintf(vmain, "\t// verilator lint_off UNUSED\n\twire\t\tw_os%d;\n\t// verilator lint_on  UNUSED\n", fftsize);
1538
                                fprintf(vmain, "\twire\t[%d:0]\tw_e%d, w_o%d;\n", 2*(obits+xtrapbits)-1, fftsize, fftsize);
1539 37 dgisselq
                                cmem = gen_coeff_fname(coredir.c_str(), fftsize, 2, 0, inverse);
1540 36 dgisselq
                                cmemfp = gen_coeff_open(cmem.c_str());
1541
                                gen_coeffs(cmemfp, fftsize,  nbitsin+xtracbits, 2, 0, inverse);
1542 37 dgisselq
                                cmem = gen_coeff_fname(EMPTYSTR, fftsize, 2, 0, inverse);
1543
                                fprintf(vmain, "\tfftstage%s\t#(IWIDTH,IWIDTH+%d,%d,%d,0,\n\t\t\t%d, %d, \"%s\")\n\t\tstage_e%d(i_clk, %s, i_ce,\n",
1544 36 dgisselq
                                        ((dbg)&&(dbgstage == fftsize))?"_dbg":"",
1545
                                        xtracbits, obits+xtrapbits,
1546 37 dgisselq
                                        lgtmp-2, (mpystage)?1:0,
1547 36 dgisselq
                                        ckpce, cmem.c_str(),
1548
                                        fftsize, resetw.c_str());
1549
                                fprintf(vmain, "\t\t\t(%s%s), i_left, w_e%d, w_s%d%s);\n",
1550
                                        (async_reset)?"":"!", resetw.c_str(),
1551
                                        fftsize, fftsize,
1552
                                        ((dbg)&&(dbgstage == fftsize))?", o_dbg":"");
1553 37 dgisselq
                                cmem = gen_coeff_fname(coredir.c_str(), fftsize, 2, 1, inverse);
1554 36 dgisselq
                                cmemfp = gen_coeff_open(cmem.c_str());
1555
                                gen_coeffs(cmemfp, fftsize,  nbitsin+xtracbits, 2, 1, inverse);
1556 37 dgisselq
                                cmem = gen_coeff_fname(EMPTYSTR, fftsize, 2, 1, inverse);
1557
                                fprintf(vmain, "\tfftstage\t#(IWIDTH,IWIDTH+%d,%d,%d,0,\n\t\t\t%d, %d, \"%s\")\n\t\tstage_o%d(i_clk, %s, i_ce,\n",
1558 36 dgisselq
                                        xtracbits, obits+xtrapbits,
1559 37 dgisselq
                                        lgtmp-2, (mpystage)?1:0,
1560 36 dgisselq
                                        ckpce, cmem.c_str(),
1561
                                        fftsize, resetw.c_str());
1562
                                fprintf(vmain, "\t\t\t(%s%s), i_right, w_o%d, w_os%d);\n",
1563
                                        (async_reset)?"":"!",resetw.c_str(),
1564
                                        fftsize, fftsize);
1565
                        }
1566 28 dgisselq
 
1567
                        std::string     fname;
1568
 
1569 14 dgisselq
                        fname = coredir + "/";
1570 36 dgisselq
                        if (inverse)
1571
                                fname += "i";
1572
                        fname += "fftstage";
1573
                        if (dbg) {
1574
                                std::string     dbgname(fname);
1575
                                dbgname += "_dbg";
1576
                                dbgname += ".v";
1577
                                if (single_clock)
1578
                                        build_stage(fname.c_str(), fftsize, 1, 0, nbits, xtracbits, ckpce, async_reset, true);
1579
                                else
1580 37 dgisselq
                                        build_stage(fname.c_str(), fftsize, 2, 1, nbits, xtracbits, ckpce, async_reset, true);
1581 36 dgisselq
                        }
1582 14 dgisselq
 
1583
                        fname += ".v";
1584 36 dgisselq
                        if (single_clock) {
1585
                                build_stage(fname.c_str(), fftsize, 1, 0,
1586
                                        nbits, xtracbits, ckpce, async_reset,
1587
                                        false);
1588
                        } else {
1589
                                // All stages use the same Verilog, so we only
1590
                                // need to build one
1591 37 dgisselq
                                build_stage(fname.c_str(), fftsize, 2, 1,
1592 36 dgisselq
                                        nbits, xtracbits, ckpce, async_reset, false);
1593
                        }
1594 14 dgisselq
                }
1595
 
1596 26 dgisselq
                nbits = obits;  // New number of input bits
1597 2 dgisselq
                tmp_size >>= 1; lgtmp--;
1598
                dropbit = 0;
1599
                fprintf(vmain, "\n\n");
1600
                while(tmp_size >= 8) {
1601 26 dgisselq
                        obits = nbits+((dropbit)?0:1);
1602 2 dgisselq
 
1603
                        if ((maxbitsout > 0)&&(obits > maxbitsout))
1604
                                obits = maxbitsout;
1605
 
1606 14 dgisselq
                        {
1607 22 dgisselq
                                bool            mpystage;
1608 2 dgisselq
 
1609 36 dgisselq
                                mpystage = ((lgtmp-2) <= mpy_stages);
1610 22 dgisselq
 
1611 28 dgisselq
                                if (mpystage)
1612
                                        fprintf(vmain, "\t// A hardware optimized FFT stage\n");
1613 35 dgisselq
                                fprintf(vmain, "\twire\t\tw_s%d;\n",
1614
                                        tmp_size);
1615 36 dgisselq
                                if (single_clock) {
1616
                                        fprintf(vmain,"\twire\t[%d:0]\tw_d%d;\n",
1617
                                                2*(obits+xtrapbits)-1,
1618
                                                tmp_size);
1619 37 dgisselq
                                        cmem = gen_coeff_fname(coredir.c_str(), tmp_size, 1, 0, inverse);
1620 36 dgisselq
                                        cmemfp = gen_coeff_open(cmem.c_str());
1621
                                        gen_coeffs(cmemfp, tmp_size,
1622
                                                nbits+xtracbits+xtrapbits, 1, 0, inverse);
1623 37 dgisselq
                                        cmem = gen_coeff_fname(EMPTYSTR, tmp_size, 1, 0, inverse);
1624
                                        fprintf(vmain, "\tfftstage%s\t#(%d,%d,%d,%d,%d,\n\t\t\t%d, %d, \"%s\")\n\t\tstage_%d(i_clk, %s, i_ce,\n",
1625 36 dgisselq
                                                ((dbg)&&(dbgstage==tmp_size))?"_dbg":"",
1626
                                                nbits+xtrapbits,
1627
                                                nbits+xtracbits+xtrapbits,
1628
                                                obits+xtrapbits,
1629 37 dgisselq
                                                lgtmp-1, (dropbit)?0:0, (mpystage)?1:0,
1630 36 dgisselq
                                                ckpce,
1631
                                                cmem.c_str(), tmp_size,
1632
                                                resetw.c_str());
1633
                                        fprintf(vmain, "\t\t\tw_s%d, w_d%d, w_d%d, w_s%d%s);\n",
1634
                                                tmp_size<<1, tmp_size<<1,
1635
                                                tmp_size, tmp_size,
1636
                                                ((dbg)&&(dbgstage == tmp_size))
1637
                                                        ?", o_dbg":"");
1638
                                } else {
1639
                                        fprintf(vmain, "\t// verilator lint_off UNUSED\n\twire\t\tw_os%d;\n\t// verilator lint_on  UNUSED\n",
1640
                                                tmp_size);
1641
                                        fprintf(vmain,"\twire\t[%d:0]\tw_e%d, w_o%d;\n",
1642
                                                2*(obits+xtrapbits)-1,
1643
                                                tmp_size, tmp_size);
1644 37 dgisselq
                                        cmem = gen_coeff_fname(coredir.c_str(), tmp_size, 2, 0, inverse);
1645 36 dgisselq
                                        cmemfp = gen_coeff_open(cmem.c_str());
1646
                                        gen_coeffs(cmemfp, tmp_size,
1647
                                                nbits+xtracbits+xtrapbits, 2, 0, inverse);
1648 37 dgisselq
                                        cmem = gen_coeff_fname(EMPTYSTR, tmp_size, 2, 0, inverse);
1649
                                        fprintf(vmain, "\tfftstage%s\t#(%d,%d,%d,%d,%d,\n\t\t\t%d, %d, \"%s\")\n\t\tstage_e%d(i_clk, %s, i_ce,\n",
1650 36 dgisselq
                                                ((dbg)&&(dbgstage==tmp_size))?"_dbg":"",
1651
                                                nbits+xtrapbits,
1652
                                                nbits+xtracbits+xtrapbits,
1653
                                                obits+xtrapbits,
1654 37 dgisselq
                                                lgtmp-2, (dropbit)?0:0, (mpystage)?1:0,
1655 36 dgisselq
                                                ckpce,
1656
                                                cmem.c_str(), tmp_size,
1657
                                                resetw.c_str());
1658
                                        fprintf(vmain, "\t\t\tw_s%d, w_e%d, w_e%d, w_s%d%s);\n",
1659
                                                tmp_size<<1, tmp_size<<1,
1660
                                                tmp_size, tmp_size,
1661
                                                ((dbg)&&(dbgstage == tmp_size))
1662
                                                        ?", o_dbg":"");
1663 37 dgisselq
                                        cmem = gen_coeff_fname(coredir.c_str(),
1664 36 dgisselq
                                                tmp_size, 2, 1, inverse);
1665
                                        cmemfp = gen_coeff_open(cmem.c_str());
1666
                                        gen_coeffs(cmemfp, tmp_size,
1667
                                                nbits+xtracbits+xtrapbits,
1668
                                                2, 1, inverse);
1669 37 dgisselq
                                        cmem = gen_coeff_fname(EMPTYSTR,
1670
                                                tmp_size, 2, 1, inverse);
1671
                                        fprintf(vmain, "\tfftstage\t#(%d,%d,%d,%d,%d,\n\t\t\t%d, %d, \"%s\")\n\t\tstage_o%d(i_clk, %s, i_ce,\n",
1672 36 dgisselq
                                                nbits+xtrapbits,
1673
                                                nbits+xtracbits+xtrapbits,
1674
                                                obits+xtrapbits,
1675 37 dgisselq
                                                lgtmp-2, (dropbit)?0:0, (mpystage)?1:0,
1676 36 dgisselq
                                                ckpce, cmem.c_str(), tmp_size,
1677
                                                resetw.c_str());
1678
                                        fprintf(vmain, "\t\t\tw_s%d, w_o%d, w_o%d, w_os%d);\n",
1679
                                                tmp_size<<1, tmp_size<<1,
1680
                                                tmp_size, tmp_size);
1681
                                }
1682
                                fprintf(vmain, "\n");
1683 14 dgisselq
                        }
1684
 
1685
 
1686 2 dgisselq
                        dropbit ^= 1;
1687
                        nbits = obits;
1688
                        tmp_size >>= 1; lgtmp--;
1689
                }
1690
 
1691
                if (tmp_size == 4) {
1692 26 dgisselq
                        obits = nbits+((dropbit)?0:1);
1693 2 dgisselq
 
1694
                        if ((maxbitsout > 0)&&(obits > maxbitsout))
1695
                                obits = maxbitsout;
1696
 
1697 35 dgisselq
                        fprintf(vmain, "\twire\t\tw_s4;\n");
1698 36 dgisselq
                        if (single_clock) {
1699
                                fprintf(vmain, "\twire\t[%d:0]\tw_d4;\n",
1700
                                        2*(obits+xtrapbits)-1);
1701
                                fprintf(vmain, "\tqtrstage%s\t#(%d,%d,%d,%d,%d)\tstage_4(i_clk, %s, i_ce,\n",
1702
                                        ((dbg)&&(dbgstage==4))?"_dbg":"",
1703
                                        nbits+xtrapbits, obits+xtrapbits, lgsize,
1704
                                        (inverse)?1:0, (dropbit)?0:0,
1705
                                        resetw.c_str());
1706
                                fprintf(vmain, "\t\t\t\t\t\tw_s8, w_d8, w_d4, w_s4%s);\n",
1707
                                        ((dbg)&&(dbgstage==4))?", o_dbg":"");
1708
                        } else {
1709
                                fprintf(vmain, "\t// verilator lint_off UNUSED\n\twire\t\tw_os4;\n\t// verilator lint_on  UNUSED\n");
1710
                                fprintf(vmain, "\twire\t[%d:0]\tw_e4, w_o4;\n", 2*(obits+xtrapbits)-1);
1711
                                fprintf(vmain, "\tqtrstage%s\t#(%d,%d,%d,0,%d,%d)\tstage_e4(i_clk, %s, i_ce,\n",
1712
                                        ((dbg)&&(dbgstage==4))?"_dbg":"",
1713
                                        nbits+xtrapbits, obits+xtrapbits, lgsize,
1714
                                        (inverse)?1:0, (dropbit)?0:0,
1715
                                        resetw.c_str());
1716
                                fprintf(vmain, "\t\t\t\t\t\tw_s8, w_e8, w_e4, w_s4%s);\n",
1717
                                        ((dbg)&&(dbgstage==4))?", o_dbg":"");
1718
                                fprintf(vmain, "\tqtrstage\t#(%d,%d,%d,1,%d,%d)\tstage_o4(i_clk, %s, i_ce,\n",
1719
                                        nbits+xtrapbits, obits+xtrapbits, lgsize, (inverse)?1:0, (dropbit)?0:0,
1720
                                        resetw.c_str());
1721
                                fprintf(vmain, "\t\t\t\t\t\tw_s8, w_o8, w_o4, w_os4);\n");
1722
                        }
1723 2 dgisselq
                        dropbit ^= 1;
1724
                        nbits = obits;
1725
                        tmp_size >>= 1; lgtmp--;
1726
                }
1727
 
1728
                {
1729 26 dgisselq
                        obits = nbits+((dropbit)?0:1);
1730 2 dgisselq
                        if (obits > nbitsout)
1731
                                obits = nbitsout;
1732
                        if ((maxbitsout>0)&&(obits > maxbitsout))
1733
                                obits = maxbitsout;
1734
                        fprintf(vmain, "\twire\t\tw_s2;\n");
1735 36 dgisselq
                        if (single_clock) {
1736
                                fprintf(vmain, "\twire\t[%d:0]\tw_d2;\n",
1737
                                        2*obits-1);
1738
                        } else {
1739
                                fprintf(vmain, "\twire\t[%d:0]\tw_e2, w_o2;\n",
1740
                                        2*obits-1);
1741
                        }
1742 37 dgisselq
                        /*
1743 28 dgisselq
                        if ((nbits+xtrapbits+1 == obits)&&(!dropbit))
1744 37 dgisselq
                                printf("Warning: Less than optimal scaling\n");
1745
                        */
1746 2 dgisselq
 
1747 36 dgisselq
                        if (single_clock) {
1748
                                fprintf(vmain, "\tlaststage\t#(%d,%d,%d)\tstage_2(i_clk, %s, i_ce,\n",
1749
                                        nbits+xtrapbits, obits,(dropbit)?0:1,
1750
                                        resetw.c_str());
1751
                                fprintf(vmain, "\t\t\t\t\tw_s4, w_d4, w_d2, w_s2);\n");
1752
                        } else {
1753
                                fprintf(vmain, "\tlaststage\t#(%d,%d,%d)\tstage_2(i_clk, %s, i_ce,\n",
1754
                                        nbits+xtrapbits, obits,(dropbit)?0:1,
1755
                                        resetw.c_str());
1756
                                fprintf(vmain, "\t\t\t\t\tw_s4, w_e4, w_o4, w_e2, w_o2, w_s2);\n");
1757
                        }
1758
 
1759 2 dgisselq
                        fprintf(vmain, "\n\n");
1760
                        nbits = obits;
1761
                }
1762
 
1763
                fprintf(vmain, "\t// Prepare for a (potential) bit-reverse stage.\n");
1764 36 dgisselq
                if (single_clock)
1765
                        fprintf(vmain, "\tassign\tbr_sample= w_d2;\n");
1766
                else {
1767
                        fprintf(vmain, "\tassign\tbr_left  = w_e2;\n");
1768
                        fprintf(vmain, "\tassign\tbr_right = w_o2;\n");
1769
                }
1770 2 dgisselq
                fprintf(vmain, "\n");
1771
                if (bitreverse) {
1772
                        fprintf(vmain, "\twire\tbr_start;\n");
1773
                        fprintf(vmain, "\treg\tr_br_started;\n");
1774 25 dgisselq
                        fprintf(vmain, "\tinitial\tr_br_started = 1\'b0;\n");
1775 36 dgisselq
                        if (async_reset) {
1776
                                fprintf(vmain, "\talways @(posedge i_clk, negedge i_areset_n)\n");
1777
                                fprintf(vmain, "\t\tif (!i_areset_n)\n");
1778
                        } else {
1779
                                fprintf(vmain, "\talways @(posedge i_clk)\n");
1780
                                fprintf(vmain, "\t\tif (i_reset)\n");
1781
                        }
1782 26 dgisselq
                        fprintf(vmain, "\t\t\tr_br_started <= 1\'b0;\n");
1783
                        fprintf(vmain, "\t\telse if (i_ce)\n");
1784 23 dgisselq
                        fprintf(vmain, "\t\t\tr_br_started <= r_br_started || w_s2;\n");
1785
                        fprintf(vmain, "\tassign\tbr_start = r_br_started || w_s2;\n");
1786 2 dgisselq
                }
1787
        }
1788
 
1789 36 dgisselq
 
1790 2 dgisselq
        fprintf(vmain, "\n");
1791
        fprintf(vmain, "\t// Now for the bit-reversal stage.\n");
1792
        fprintf(vmain, "\twire\tbr_sync;\n");
1793
        if (bitreverse) {
1794 36 dgisselq
                if (single_clock) {
1795
                        fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_o_result;\n");
1796
                        fprintf(vmain, "\tbitreverse\t#(%d,%d)\n\t\trevstage(i_clk, %s,\n", lgsize, nbitsout, resetw.c_str());
1797
                        fprintf(vmain, "\t\t\t(i_ce & br_start), br_sample,\n");
1798
                        fprintf(vmain, "\t\t\tbr_o_result, br_sync);\n");
1799
                } else {
1800
                        fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_o_left, br_o_right;\n");
1801
                        fprintf(vmain, "\tbitreverse\t#(%d,%d)\n\t\trevstage(i_clk, %s,\n", lgsize, nbitsout, resetw.c_str());
1802
                        fprintf(vmain, "\t\t\t(i_ce & br_start), br_left, br_right,\n");
1803
                        fprintf(vmain, "\t\t\tbr_o_left, br_o_right, br_sync);\n");
1804
                }
1805
        } else if (single_clock) {
1806
                fprintf(vmain, "\tassign\tbr_o_result = br_result;\n");
1807
                fprintf(vmain, "\tassign\tbr_sync     = w_s2;\n");
1808 2 dgisselq
        } else {
1809
                fprintf(vmain, "\tassign\tbr_o_left  = br_left;\n");
1810
                fprintf(vmain, "\tassign\tbr_o_right = br_right;\n");
1811
                fprintf(vmain, "\tassign\tbr_sync    = w_s2;\n");
1812
        }
1813
 
1814 36 dgisselq
        fprintf(vmain,
1815
"\n\n"
1816
"\t// Last clock: Register our outputs, we\'re done.\n"
1817
"\tinitial\to_sync  = 1\'b0;\n");
1818
        if (async_reset)
1819
                fprintf(vmain,
1820
"\talways @(posedge i_clk, negedge i_areset_n)\n\t\tif (!i_areset_n)\n");
1821
        else {
1822
                fprintf(vmain,
1823
"\talways @(posedge i_clk)\n\t\tif (i_reset)\n");
1824
        }
1825
 
1826
        fprintf(vmain,
1827
"\t\t\to_sync  <= 1\'b0;\n"
1828
"\t\telse if (i_ce)\n"
1829
"\t\t\to_sync  <= br_sync;\n"
1830
"\n"
1831
"\talways @(posedge i_clk)\n"
1832
"\t\tif (i_ce)\n");
1833
        if (single_clock) {
1834
                fprintf(vmain, "\t\t\to_result  <= br_o_result;\n");
1835
        } else {
1836
                fprintf(vmain,
1837
"\t\tbegin\n"
1838
"\t\t\to_left  <= br_o_left;\n"
1839
"\t\t\to_right <= br_o_right;\n"
1840
"\t\tend\n");
1841
        }
1842
 
1843
        fprintf(vmain,
1844
"\n\n"
1845
"endmodule\n");
1846 2 dgisselq
        fclose(vmain);
1847
 
1848 36 dgisselq
 
1849 14 dgisselq
        {
1850
                std::string     fname;
1851 2 dgisselq
 
1852 14 dgisselq
                fname = coredir + "/butterfly.v";
1853 36 dgisselq
                build_butterfly(fname.c_str(), xtracbits, rounding,
1854
                        ckpce, async_reset);
1855 2 dgisselq
 
1856 36 dgisselq
                fname = coredir + "/hwbfly.v";
1857
                build_hwbfly(fname.c_str(), xtracbits, rounding,
1858
                        ckpce, async_reset);
1859 22 dgisselq
 
1860 29 dgisselq
                {
1861
                        // To make debugging easier, we build both of these
1862
                        fname = coredir + "/shiftaddmpy.v";
1863
                        build_multiply(fname.c_str());
1864 2 dgisselq
 
1865 29 dgisselq
                        fname = coredir + "/longbimpy.v";
1866
                        build_longbimpy(fname.c_str());
1867
                        fname = coredir + "/bimpy.v";
1868
                        build_bimpy(fname.c_str());
1869
                }
1870
 
1871 26 dgisselq
                if ((dbg)&&(dbgstage == 4)) {
1872
                        fname = coredir + "/qtrstage_dbg.v";
1873 36 dgisselq
                        if (single_clock)
1874
                                build_snglquarters(fname.c_str(), rounding,
1875
                                        async_reset, true);
1876
                        else
1877
                                build_dblquarters(fname.c_str(), rounding,
1878
                                        async_reset, true);
1879 26 dgisselq
                }
1880 14 dgisselq
                fname = coredir + "/qtrstage.v";
1881 36 dgisselq
                if (single_clock)
1882
                        build_snglquarters(fname.c_str(), rounding,
1883
                                        async_reset, false);
1884 26 dgisselq
                else
1885 36 dgisselq
                        build_dblquarters(fname.c_str(), rounding,
1886
                                        async_reset, false);
1887 14 dgisselq
 
1888 36 dgisselq
 
1889
                if (single_clock) {
1890
                        fname = coredir + "/laststage.v";
1891
                        build_sngllast(fname.c_str(), async_reset);
1892
                } else {
1893
                        if ((dbg)&&(dbgstage == 2))
1894
                                fname = coredir + "/laststage_dbg.v";
1895
                        else
1896
                                fname = coredir + "/laststage.v";
1897
                        build_dblstage(fname.c_str(), rounding,
1898
                                async_reset, (dbg)&&(dbgstage==2));
1899
                }
1900
 
1901 14 dgisselq
                if (bitreverse) {
1902 36 dgisselq
                        fname = coredir + "/bitreverse.v";
1903
                        if (single_clock)
1904
                                build_snglbrev(fname.c_str(), async_reset);
1905
                        else
1906
                                build_dblreverse(fname.c_str(), async_reset);
1907 14 dgisselq
                }
1908 23 dgisselq
 
1909
                const   char    *rnd_string = "";
1910
                switch(rounding) {
1911
                        case RND_TRUNCATE:      rnd_string = "/truncate.v"; break;
1912
                        case RND_FROMZERO:      rnd_string = "/roundfromzero.v"; break;
1913
                        case RND_HALFUP:        rnd_string = "/roundhalfup.v"; break;
1914
                        default:
1915
                                rnd_string = "/convround.v"; break;
1916
                } fname = coredir + rnd_string;
1917
                switch(rounding) {
1918
                        case RND_TRUNCATE: build_truncator(fname.c_str()); break;
1919
                        case RND_FROMZERO: build_roundfromzero(fname.c_str()); break;
1920
                        case RND_HALFUP: build_roundhalfup(fname.c_str()); break;
1921
                        default:
1922
                                build_convround(fname.c_str()); break;
1923
                }
1924
 
1925 2 dgisselq
        }
1926 36 dgisselq
 
1927
        if (verbose_flag)
1928
                printf("All done -- success\n");
1929 2 dgisselq
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.