OpenCores
URL https://opencores.org/ocsvn/dblclockfft/dblclockfft/trunk

Subversion Repositories dblclockfft

[/] [dblclockfft/] [trunk/] [sw/] [fftgen.cpp] - Blame information for rev 36

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 29 dgisselq
////////////////////////////////////////////////////////////////////////////////
2 16 dgisselq
//
3 24 dgisselq
// Filename:    fftgen.cpp
4 16 dgisselq
//
5 36 dgisselq
// Project:     A General Purpose Pipelined FFT Implementation
6 16 dgisselq
//
7
// Purpose:     This is the core generator for the project.  Every part
8
//              and piece of this project begins and ends in this program.
9 33 dgisselq
//      Once built, this program will build an FFT (or IFFT) core of arbitrary
10
//      width, precision, etc., that will run at two samples per clock.
11
//      (Incidentally, I didn't pick two samples per clock because it was
12
//      easier, but rather because there weren't any two-sample per clock
13
//      FFT's posted on opencores.com.  Further, FFT's running at one sample
14
//      per aren't that hard to find.)
15 16 dgisselq
//
16 33 dgisselq
//      You can find the documentation for this program in two places.  One is
17
//      in the usage() function below.  The second is in the 'doc'uments
18
//      directory that comes with this package, specifically in the spec.pdf
19
//      file.  If it's not there, type make in the documents directory to
20
//      build it.
21 16 dgisselq
//
22 31 dgisselq
//      20160123 - Thanks to Lesha Birukov, adjusted for MS Visual Studio 2012.
23
//              (Adjustments are at the top of the file ...)
24
//
25 16 dgisselq
// Creator:     Dan Gisselquist, Ph.D.
26 30 dgisselq
//              Gisselquist Technology, LLC
27 16 dgisselq
//
28 29 dgisselq
////////////////////////////////////////////////////////////////////////////////
29 16 dgisselq
//
30 36 dgisselq
// Copyright (C) 2015-2018, Gisselquist Technology, LLC
31 16 dgisselq
//
32
// This program is free software (firmware): you can redistribute it and/or
33
// modify it under the terms of  the GNU General Public License as published
34
// by the Free Software Foundation, either version 3 of the License, or (at
35
// your option) any later version.
36
//
37
// This program is distributed in the hope that it will be useful, but WITHOUT
38
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
39
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
40
// for more details.
41
//
42
// You should have received a copy of the GNU General Public License along
43
// with this program.  (It's in the $(ROOT)/doc directory, run make with no
44
// target there if the PDF file isn't present.)  If not, see
45
// <http://www.gnu.org/licenses/> for a copy.
46
//
47
// License:     GPL, v3, as defined and found on www.gnu.org,
48
//              http://www.gnu.org/licenses/gpl.html
49
//
50
//
51 29 dgisselq
////////////////////////////////////////////////////////////////////////////////
52 16 dgisselq
//
53
//
54 31 dgisselq
#define _CRT_SECURE_NO_WARNINGS   //  ms vs 2012 doesn't like fopen
55 2 dgisselq
#include <stdio.h>
56
#include <stdlib.h>
57 31 dgisselq
 
58
#ifdef _MSC_VER //  added for ms vs compatibility
59
 
60
#include <io.h>
61
#include <direct.h>
62
#define _USE_MATH_DEFINES
63
#define R_OK    4       /* Test for read permission.  */
64
#define W_OK    2       /* Test for write permission.  */
65
#define X_OK    0       /* !!!!!! execute permission - unsupported in windows*/
66
#define F_OK    0       /* Test for existence.  */
67
 
68
#if _MSC_VER <= 1700
69
 
70
int lstat(const char *filename, struct stat *buf) { return 1; };
71
#define S_ISDIR(A)      0
72
 
73
#else
74
 
75
#define lstat   _stat
76
#define S_ISDIR _S_IFDIR
77
 
78
#endif
79
 
80
#define mkdir(A,B)      _mkdir(A)
81
 
82
#define access _access
83
 
84
#else
85
// And for G++/Linux environment
86
 
87
#include <unistd.h>     // Defines the R_OK/W_OK/etc. macros
88 2 dgisselq
#include <sys/stat.h>
89 31 dgisselq
#endif
90
 
91 2 dgisselq
#include <string.h>
92 14 dgisselq
#include <string>
93 2 dgisselq
#include <math.h>
94
#include <ctype.h>
95
#include <assert.h>
96
 
97 36 dgisselq
#include "defaults.h"
98
#include "legal.h"
99
#include "rounding.h"
100
#include "fftlib.h"
101
#include "bldstage.h"
102
#include "bitreverse.h"
103
#include "softmpy.h"
104
#include "butterfly.h"
105 2 dgisselq
 
106 36 dgisselq
void    build_dblquarters(const char *fname, ROUND_T rounding, const bool async_reset=false, const bool dbg=false) {
107 2 dgisselq
        FILE    *fp = fopen(fname, "w");
108
        if (NULL == fp) {
109
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
110
                perror("O/S Err was:");
111
                return;
112
        }
113 23 dgisselq
        const   char    *rnd_string;
114
        if (rounding == RND_TRUNCATE)
115
                rnd_string = "truncate";
116
        else if (rounding == RND_FROMZERO)
117
                rnd_string = "roundfromzero";
118
        else if (rounding == RND_HALFUP)
119
                rnd_string = "roundhalfup";
120
        else
121
                rnd_string = "convround";
122
 
123
 
124
        fprintf(fp,
125 36 dgisselq
SLASHLINE
126 23 dgisselq
"//\n"
127 36 dgisselq
"// Filename:\tqtrstage%s.v\n"
128 2 dgisselq
"//\n"
129 36 dgisselq
"// Project:\t%s\n"
130
"//\n"
131 5 dgisselq
"// Purpose:    This file encapsulates the 4 point stage of a decimation in\n"
132
"//             frequency FFT.  This particular implementation is optimized\n"
133 36 dgisselq
"//     so that all of the multiplies are accomplished by additions and\n"
134
"//     multiplexers only.\n"
135 5 dgisselq
"//\n"
136 2 dgisselq
"//\n%s"
137
"//\n",
138 26 dgisselq
                (dbg)?"_dbg":"", prjname, creator);
139 2 dgisselq
        fprintf(fp, "%s", cpyleft);
140 35 dgisselq
        fprintf(fp, "//\n//\n`default_nettype\tnone\n//\n");
141 2 dgisselq
 
142 36 dgisselq
        std::string     resetw("i_reset");
143
        if (async_reset)
144
                resetw = std::string("i_areset_n");
145
 
146 2 dgisselq
        fprintf(fp,
147 36 dgisselq
"module\tqtrstage%s(i_clk, %s, i_ce, i_sync, i_data, o_data, o_sync%s);\n"
148 29 dgisselq
        "\tparameter    IWIDTH=%d, OWIDTH=IWIDTH+1;\n"
149 5 dgisselq
        "\t// Parameters specific to the core that should be changed when this\n"
150 36 dgisselq
        "\t// core is built ... Note that the minimum LGSPAN is 2.  Smaller\n"
151 5 dgisselq
        "\t// spans must use the fftdoubles stage.\n"
152 29 dgisselq
        "\tparameter\tLGWIDTH=%d, ODD=0, INVERSE=0,SHIFT=0;\n"
153 36 dgisselq
        "\tinput\t                              i_clk, %s, i_ce, i_sync;\n"
154 5 dgisselq
        "\tinput\t      [(2*IWIDTH-1):0]        i_data;\n"
155
        "\toutput\treg  [(2*OWIDTH-1):0]        o_data;\n"
156
        "\toutput\treg                          o_sync;\n"
157 36 dgisselq
        "\t\n", (dbg)?"_dbg":"",
158
        resetw.c_str(),
159
        (dbg)?", o_dbg":"", TST_QTRSTAGE_IWIDTH,
160
        TST_QTRSTAGE_LGWIDTH, resetw.c_str());
161 26 dgisselq
        if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"
162
                "\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2*OWIDTH-1):(2*OWIDTH-16)],\n"
163
                        "\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n"
164
"\n");
165
        }
166 14 dgisselq
        fprintf(fp,
167 5 dgisselq
        "\treg\t        wait_for_sync;\n"
168 23 dgisselq
        "\treg\t[3:0]   pipeline;\n"
169 2 dgisselq
"\n"
170 5 dgisselq
        "\treg\t[(IWIDTH):0]    sum_r, sum_i, diff_r, diff_i;\n"
171 2 dgisselq
"\n"
172 23 dgisselq
        "\treg\t[(2*OWIDTH-1):0]\tob_a;\n"
173
        "\twire\t[(2*OWIDTH-1):0]\tob_b;\n"
174
        "\treg\t[(OWIDTH-1):0]\t\tob_b_r, ob_b_i;\n"
175
        "\tassign\tob_b = { ob_b_r, ob_b_i };\n"
176 2 dgisselq
"\n"
177 23 dgisselq
        "\treg\t[(LGWIDTH-1):0]\t\tiaddr;\n"
178
        "\treg\t[(2*IWIDTH-1):0]\timem;\n"
179 2 dgisselq
"\n"
180 5 dgisselq
        "\twire\tsigned\t[(IWIDTH-1):0]\timem_r, imem_i;\n"
181
        "\tassign\timem_r = imem[(2*IWIDTH-1):(IWIDTH)];\n"
182
        "\tassign\timem_i = imem[(IWIDTH-1):0];\n"
183 2 dgisselq
"\n"
184 5 dgisselq
        "\twire\tsigned\t[(IWIDTH-1):0]\ti_data_r, i_data_i;\n"
185
        "\tassign\ti_data_r = i_data[(2*IWIDTH-1):(IWIDTH)];\n"
186
        "\tassign\ti_data_i = i_data[(IWIDTH-1):0];\n"
187 2 dgisselq
"\n"
188 5 dgisselq
        "\treg  [(2*OWIDTH-1):0]        omem;\n"
189 14 dgisselq
"\n");
190
        fprintf(fp,
191 23 dgisselq
        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_sum_r, rnd_sum_i, rnd_diff_r, rnd_diff_i,\n");
192
        fprintf(fp,
193
        "\t\t\t\t\tn_rnd_diff_r, n_rnd_diff_i;\n");
194
        fprintf(fp,
195 26 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_r(i_clk, i_ce,\n"
196 23 dgisselq
        "\t\t\t\tsum_r, rnd_sum_r);\n\n", rnd_string);
197
        fprintf(fp,
198 26 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_i(i_clk, i_ce,\n"
199 23 dgisselq
        "\t\t\t\tsum_i, rnd_sum_i);\n\n", rnd_string);
200
        fprintf(fp,
201 26 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_r(i_clk, i_ce,\n"
202 23 dgisselq
        "\t\t\t\tdiff_r, rnd_diff_r);\n\n", rnd_string);
203
        fprintf(fp,
204 26 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_i(i_clk, i_ce,\n"
205 23 dgisselq
        "\t\t\t\tdiff_i, rnd_diff_i);\n\n", rnd_string);
206
        fprintf(fp, "\tassign n_rnd_diff_r = - rnd_diff_r;\n"
207
                "\tassign n_rnd_diff_i = - rnd_diff_i;\n");
208
/*
209
        fprintf(fp,
210 5 dgisselq
        "\twire [(IWIDTH-1):0]  rnd;\n"
211 9 dgisselq
        "\tgenerate\n"
212
        "\tif ((ROUND)&&((IWIDTH+1-OWIDTH-SHIFT)>0))\n"
213 26 dgisselq
                "\t\tassign rnd = { {(IWIDTH-1){1\'b0}}, 1\'b1 };\n"
214 9 dgisselq
        "\telse\n"
215 26 dgisselq
                "\t\tassign rnd = { {(IWIDTH){1\'b0}}};\n"
216 9 dgisselq
        "\tendgenerate\n"
217 2 dgisselq
"\n"
218 23 dgisselq
*/
219
        fprintf(fp,
220 25 dgisselq
        "\tinitial wait_for_sync = 1\'b1;\n"
221 36 dgisselq
        "\tinitial iaddr = 0;\n");
222
        if (async_reset)
223
                fprintf(fp,
224
                        "\talways @(posedge i_clk, negedge i_areset_n)\n"
225
                                "\t\tif (!i_reset)\n");
226
        else
227
                fprintf(fp,
228 5 dgisselq
        "\talways @(posedge i_clk)\n"
229 36 dgisselq
                "\t\tif (i_reset)\n");
230
        fprintf(fp,
231 5 dgisselq
                "\t\tbegin\n"
232 26 dgisselq
                        "\t\t\twait_for_sync <= 1\'b1;\n"
233 5 dgisselq
                        "\t\t\tiaddr <= 0;\n"
234 35 dgisselq
                "\t\tend else if ((i_ce)&&((!wait_for_sync)||(i_sync)))\n"
235 5 dgisselq
                "\t\tbegin\n"
236 26 dgisselq
                        "\t\t\tiaddr <= iaddr + { {(LGWIDTH-1){1\'b0}}, 1\'b1 };\n"
237
                        "\t\t\twait_for_sync <= 1\'b0;\n"
238 36 dgisselq
                "\t\tend\n\n"
239 26 dgisselq
        "\talways @(posedge i_clk)\n"
240
                "\t\tif (i_ce)\n"
241 5 dgisselq
                        "\t\t\timem <= i_data;\n"
242 26 dgisselq
                "\n\n");
243 23 dgisselq
        fprintf(fp,
244
        "\t// Note that we don\'t check on wait_for_sync or i_sync here.\n"
245
        "\t// Why not?  Because iaddr will always be zero until after the\n"
246
        "\t// first i_ce, so we are safe.\n"
247 36 dgisselq
        "\tinitial pipeline = 4\'h0;\n");
248
        if (async_reset)
249
                fprintf(fp,
250
        "\talways\t@(posedge i_clk, negedge i_areset_n)\n"
251
                "\t\tif (!i_reset)\n");
252
        else
253
                fprintf(fp,
254 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
255 36 dgisselq
                "\t\tif (i_reset)\n");
256
 
257
        fprintf(fp,
258 26 dgisselq
                        "\t\t\tpipeline <= 4\'h0;\n"
259 23 dgisselq
                "\t\telse if (i_ce) // is our pipeline process full?  Which stages?\n"
260
                        "\t\t\tpipeline <= { pipeline[2:0], iaddr[0] };\n\n");
261
        fprintf(fp,
262
        "\t// This is the pipeline[-1] stage, pipeline[0] will be set next.\n"
263
        "\talways\t@(posedge i_clk)\n"
264
                "\t\tif ((i_ce)&&(iaddr[0]))\n"
265
                "\t\tbegin\n"
266
                        "\t\t\tsum_r  <= imem_r + i_data_r;\n"
267
                        "\t\t\tsum_i  <= imem_i + i_data_i;\n"
268
                        "\t\t\tdiff_r <= imem_r - i_data_r;\n"
269
                        "\t\t\tdiff_i <= imem_i - i_data_i;\n"
270
                "\t\tend\n\n");
271
        fprintf(fp,
272
        "\t// pipeline[1] takes sum_x and diff_x and produces rnd_x\n\n");
273
        fprintf(fp,
274 26 dgisselq
        "\t// Now for pipeline[2].  We can actually do this at all i_ce\n"
275
        "\t// clock times, since nothing will listen unless pipeline[3]\n"
276
        "\t// on the next clock.  Thus, we simplify this logic and do\n"
277
        "\t// it independent of pipeline[2].\n"
278 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
279 26 dgisselq
                "\t\tif (i_ce)\n"
280 23 dgisselq
                "\t\tbegin\n"
281
                        "\t\t\tob_a <= { rnd_sum_r, rnd_sum_i };\n"
282
                        "\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"
283
                        "\t\t\tif (ODD == 0)\n"
284 5 dgisselq
                        "\t\t\tbegin\n"
285 23 dgisselq
                        "\t\t\t\tob_b_r <= rnd_diff_r;\n"
286
                        "\t\t\t\tob_b_i <= rnd_diff_i;\n"
287
                        "\t\t\tend else if (INVERSE==0) begin\n"
288
                        "\t\t\t\t// on Odd, W = e^{-j2pi 1/4} = -j\n"
289
                        "\t\t\t\tob_b_r <=   rnd_diff_i;\n"
290
                        "\t\t\t\tob_b_i <= n_rnd_diff_r;\n"
291
                        "\t\t\tend else begin\n"
292
                        "\t\t\t\t// on Odd, W = e^{j2pi 1/4} = j\n"
293
                        "\t\t\t\tob_b_r <= n_rnd_diff_i;\n"
294
                        "\t\t\t\tob_b_i <=   rnd_diff_r;\n"
295 5 dgisselq
                        "\t\t\tend\n"
296 23 dgisselq
                "\t\tend\n\n");
297
        fprintf(fp,
298
        "\talways\t@(posedge i_clk)\n"
299
                "\t\tif (i_ce)\n"
300
                "\t\tbegin // In sequence, clock = 3\n"
301
                        "\t\t\tif (pipeline[3])\n"
302 5 dgisselq
                        "\t\t\tbegin\n"
303
                                "\t\t\t\tomem <= ob_b;\n"
304
                                "\t\t\t\to_data <= ob_a;\n"
305
                        "\t\t\tend else\n"
306
                                "\t\t\t\to_data <= omem;\n"
307 23 dgisselq
                "\t\tend\n\n");
308
 
309
        fprintf(fp,
310
        "\t// Don\'t forget in the sync check that we are running\n"
311
        "\t// at two clocks per sample.  Thus we need to\n"
312
        "\t// produce a sync every 2^(LGWIDTH-1) clocks.\n"
313 36 dgisselq
        "\tinitial\to_sync = 1\'b0;\n");
314
 
315
        if (async_reset)
316
                fprintf(fp,
317
        "\talways\t@(posedge i_clk, negedge i_areset_n)\n"
318
                "\t\tif (!i_areset_n)\n");
319
        else
320
                fprintf(fp,
321 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
322 36 dgisselq
                "\t\tif (i_reset)\n");
323
        fprintf(fp,
324 26 dgisselq
                "\t\t\to_sync <= 1\'b0;\n"
325
                "\t\telse if (i_ce)\n"
326 23 dgisselq
                        "\t\t\to_sync <= &(~iaddr[(LGWIDTH-2):3]) && (iaddr[2:0] == 3'b101);\n");
327
        fprintf(fp, "endmodule\n");
328 2 dgisselq
}
329
 
330 36 dgisselq
void    build_snglquarters(const char *fname, ROUND_T rounding, const bool async_reset=false, const bool dbg=false) {
331 2 dgisselq
        FILE    *fp = fopen(fname, "w");
332
        if (NULL == fp) {
333
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
334
                perror("O/S Err was:");
335
                return;
336
        }
337 23 dgisselq
        const   char    *rnd_string;
338
        if (rounding == RND_TRUNCATE)
339
                rnd_string = "truncate";
340
        else if (rounding == RND_FROMZERO)
341
                rnd_string = "roundfromzero";
342
        else if (rounding == RND_HALFUP)
343
                rnd_string = "roundhalfup";
344
        else
345
                rnd_string = "convround";
346
 
347
 
348 2 dgisselq
        fprintf(fp,
349 36 dgisselq
SLASHLINE
350 2 dgisselq
"//\n"
351 36 dgisselq
"// Filename:\tqtrstage%s.v\n"
352 2 dgisselq
"//\n"
353 36 dgisselq
"// Project:\t%s\n"
354 2 dgisselq
"//\n"
355 36 dgisselq
"// Purpose:    This file encapsulates the 4 point stage of a decimation in\n"
356
"//             frequency FFT.  This particular implementation is optimized\n"
357
"//     so that all of the multiplies are accomplished by additions and\n"
358
"//     multiplexers only.\n"
359 2 dgisselq
"//\n"
360 36 dgisselq
"// Operation:\n"
361
"//     The operation of this stage is identical to the regular stages of\n"
362
"//     the FFT (see them for details), with one additional and critical\n"
363
"//     difference: this stage doesn't require any hardware multiplication.\n"
364
"//     The multiplies within it may all be accomplished using additions and\n"
365
"//     subtractions.\n"
366
"//\n"
367
"//     Let's see how this is done.  Given x[n] and x[n+2], cause thats the\n"
368
"//     stage we are working on, with i_sync true for x[0] being input,\n"
369
"//     produce the output:\n"
370
"//\n"
371
"//     y[n  ] = x[n] + x[n+2]\n"
372
"//     y[n+2] = (x[n] - x[n+2]) * e^{-j2pi n/2}        (forward transform)\n"
373
"//            = (x[n] - x[n+2]) * -j^n\n"
374
"//\n"
375
"//     y[n].r = x[n].r + x[n+2].r      (This is the easy part)\n"
376
"//     y[n].i = x[n].i + x[n+2].i\n"
377
"//\n"
378
"//     y[2].r = x[0].r - x[2].r\n"
379
"//     y[2].i = x[0].i - x[2].i\n"
380
"//\n"
381
"//     y[3].r =   (x[1].i - x[3].i)            (forward transform)\n"
382
"//     y[3].i = - (x[1].r - x[3].r)\n"
383
"//\n"
384
"//     y[3].r = - (x[1].i - x[3].i)            (inverse transform)\n"
385
"//     y[3].i =   (x[1].r - x[3].r)            (INVERSE = 1)\n"
386
// "//\n"
387
// "//  When the FFT is run in the two samples per clock mode, this quarter\n"
388
// "//  stage will operate on either x[0] and x[2] (ODD = 0), or x[1] and\n"
389
// "//  x[3] (ODD = 1).  In all other cases, it will operate on all four\n"
390
// "//  values.\n"
391 2 dgisselq
"//\n%s"
392 36 dgisselq
"//\n",
393
                (dbg)?"_dbg":"", prjname, creator);
394 2 dgisselq
        fprintf(fp, "%s", cpyleft);
395 35 dgisselq
        fprintf(fp, "//\n//\n`default_nettype\tnone\n//\n");
396 36 dgisselq
 
397
        std::string     resetw("i_reset");
398
        if (async_reset)
399
                resetw = std::string("i_areset_n");
400
 
401 33 dgisselq
        fprintf(fp,
402 36 dgisselq
"module\tqtrstage%s(i_clk, %s, i_ce, i_sync, i_data, o_data, o_sync%s);\n"
403
        "\tparameter    IWIDTH=%d, OWIDTH=IWIDTH+1;\n"
404
        "\tparameter\tLGWIDTH=%d, INVERSE=0,SHIFT=0;\n"
405
        "\tinput\t                              i_clk, %s, i_ce, i_sync;\n"
406
        "\tinput\t      [(2*IWIDTH-1):0]        i_data;\n"
407
        "\toutput\treg  [(2*OWIDTH-1):0]        o_data;\n"
408
        "\toutput\treg                          o_sync;\n"
409
                "\t\n", (dbg)?"_dbg":"", resetw.c_str(),
410
                (dbg)?", o_dbg":"", TST_QTRSTAGE_IWIDTH,
411
                TST_QTRSTAGE_LGWIDTH, resetw.c_str());
412 26 dgisselq
        if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"
413 36 dgisselq
                "\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2*OWIDTH-1):(2*OWIDTH-16)],\n"
414
                        "\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n"
415 26 dgisselq
"\n");
416
        }
417 36 dgisselq
 
418 33 dgisselq
        fprintf(fp,
419 36 dgisselq
        "\treg\t        wait_for_sync;\n"
420
        "\treg\t[2:0]   pipeline;\n"
421 2 dgisselq
"\n"
422 36 dgisselq
        "\treg\tsigned [(IWIDTH):0]     sum_r, sum_i, diff_r, diff_i;\n"
423 15 dgisselq
"\n"
424 36 dgisselq
        "\treg\t[(2*OWIDTH-1):0]\tob_a;\n"
425
        "\twire\t[(2*OWIDTH-1):0]\tob_b;\n"
426
        "\treg\t[(OWIDTH-1):0]\t\tob_b_r, ob_b_i;\n"
427
        "\tassign\tob_b = { ob_b_r, ob_b_i };\n"
428 15 dgisselq
"\n"
429 36 dgisselq
        "\treg\t[(LGWIDTH-1):0]\t\tiaddr;\n"
430
        "\treg\t[(2*IWIDTH-1):0]\timem\t[0:1];\n"
431 2 dgisselq
"\n"
432 36 dgisselq
        "\twire\tsigned\t[(IWIDTH-1):0]\timem_r, imem_i;\n"
433
        "\tassign\timem_r = imem[1][(2*IWIDTH-1):(IWIDTH)];\n"
434
        "\tassign\timem_i = imem[1][(IWIDTH-1):0];\n"
435 26 dgisselq
"\n"
436 36 dgisselq
        "\twire\tsigned\t[(IWIDTH-1):0]\ti_data_r, i_data_i;\n"
437
        "\tassign\ti_data_r = i_data[(2*IWIDTH-1):(IWIDTH)];\n"
438
        "\tassign\ti_data_i = i_data[(IWIDTH-1):0];\n"
439
"\n"
440
        "\treg  [(2*OWIDTH-1):0]        omem [0:1];\n"
441 28 dgisselq
"\n");
442 36 dgisselq
 
443
        fprintf(fp, "\t//\n"
444
        "\t// Round our output values down to OWIDTH bits\n"
445
        "\t//\n");
446
 
447 28 dgisselq
        fprintf(fp,
448 36 dgisselq
        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_sum_r, rnd_sum_i,\n"
449
        "\t\t\trnd_diff_r, rnd_diff_i, n_rnd_diff_r, n_rnd_diff_i;\n"
450
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_r(i_clk, i_ce,\n"
451
        "\t\t\t\tsum_r, rnd_sum_r);\n\n", rnd_string);
452 28 dgisselq
        fprintf(fp,
453 36 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_i(i_clk, i_ce,\n"
454
        "\t\t\t\tsum_i, rnd_sum_i);\n\n", rnd_string);
455 28 dgisselq
        fprintf(fp,
456 36 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_r(i_clk, i_ce,\n"
457
        "\t\t\t\tdiff_r, rnd_diff_r);\n\n", rnd_string);
458 28 dgisselq
        fprintf(fp,
459 36 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_i(i_clk, i_ce,\n"
460
        "\t\t\t\tdiff_i, rnd_diff_i);\n\n", rnd_string);
461
        fprintf(fp, "\tassign n_rnd_diff_r = - rnd_diff_r;\n"
462
                "\tassign n_rnd_diff_i = - rnd_diff_i;\n");
463
        fprintf(fp,
464
        "\tinitial wait_for_sync = 1\'b1;\n"
465
        "\tinitial iaddr = 0;\n");
466
        if (async_reset)
467
                fprintf(fp,
468
                        "\talways @(posedge i_clk, negedge i_areset_n)\n"
469
                                "\t\tif (!i_reset)\n");
470
        else
471
                fprintf(fp,
472
        "\talways @(posedge i_clk)\n"
473
                "\t\tif (i_reset)\n");
474 28 dgisselq
 
475 36 dgisselq
        fprintf(fp, "\t\tbegin\n"
476
                        "\t\t\twait_for_sync <= 1\'b1;\n"
477
                        "\t\t\tiaddr <= 0;\n"
478
                "\t\tend else if ((i_ce)&&((!wait_for_sync)||(i_sync)))\n"
479
                "\t\tbegin\n"
480
                        "\t\t\tiaddr <= iaddr + 1\'b1;\n"
481
                        "\t\t\twait_for_sync <= 1\'b0;\n"
482
                "\t\tend\n\n"
483 28 dgisselq
        "\talways @(posedge i_clk)\n"
484
                "\t\tif (i_ce)\n"
485
                "\t\tbegin\n"
486 36 dgisselq
                        "\t\t\timem[0] <= i_data;\n"
487
                        "\t\t\timem[1] <= imem[0];\n"
488 28 dgisselq
                "\t\tend\n"
489 36 dgisselq
                "\n\n");
490
        fprintf(fp,
491
        "\t// Note that we don\'t check on wait_for_sync or i_sync here.\n"
492
        "\t// Why not?  Because iaddr will always be zero until after the\n"
493
        "\t// first i_ce, so we are safe.\n"
494
        "\tinitial pipeline = 3\'h0;\n");
495 2 dgisselq
 
496 36 dgisselq
        if (async_reset)
497
                fprintf(fp,
498
        "\talways\t@(posedge i_clk, negedge i_areset_n)\n"
499
                "\t\tif (!i_reset)\n");
500
        else
501
                fprintf(fp,
502
        "\talways\t@(posedge i_clk)\n"
503
                "\t\tif (i_reset)\n");
504 2 dgisselq
 
505
        fprintf(fp,
506 36 dgisselq
                        "\t\t\tpipeline <= 3\'h0;\n"
507
                "\t\telse if (i_ce) // is our pipeline process full?  Which stages?\n"
508
                        "\t\t\tpipeline <= { pipeline[1:0], iaddr[1] };\n\n");
509
        fprintf(fp,
510
        "\t// This is the pipeline[-1] stage, pipeline[0] will be set next.\n"
511
        "\talways\t@(posedge i_clk)\n"
512
                "\t\tif ((i_ce)&&(iaddr[1]))\n"
513
                "\t\tbegin\n"
514
                        "\t\t\tsum_r  <= imem_r + i_data_r;\n"
515
                        "\t\t\tsum_i  <= imem_i + i_data_i;\n"
516
                        "\t\t\tdiff_r <= imem_r - i_data_r;\n"
517
                        "\t\t\tdiff_i <= imem_i - i_data_i;\n"
518
                "\t\tend\n\n");
519
        fprintf(fp,
520
        "\t// pipeline[1] takes sum_x and diff_x and produces rnd_x\n\n");
521 2 dgisselq
 
522 33 dgisselq
        fprintf(fp,
523 36 dgisselq
        "\t// Now for pipeline[2].  We can actually do this at all i_ce\n"
524
        "\t// clock times, since nothing will listen unless pipeline[3]\n"
525
        "\t// on the next clock.  Thus, we simplify this logic and do\n"
526
        "\t// it independent of pipeline[2].\n"
527
        "\talways\t@(posedge i_clk)\n"
528 2 dgisselq
                "\t\tif (i_ce)\n"
529
                "\t\tbegin\n"
530 36 dgisselq
                        "\t\t\tob_a <= { rnd_sum_r, rnd_sum_i };\n"
531
                        "\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"
532
                        "\t\t\tif (!iaddr[0])\n"
533
                        "\t\t\tbegin\n"
534
                        "\t\t\t\tob_b_r <= rnd_diff_r;\n"
535
                        "\t\t\t\tob_b_i <= rnd_diff_i;\n"
536
                        "\t\t\tend else if (INVERSE==0) begin\n"
537
                        "\t\t\t\t// on Odd, W = e^{-j2pi 1/4} = -j\n"
538
                        "\t\t\t\tob_b_r <=   rnd_diff_i;\n"
539
                        "\t\t\t\tob_b_i <= n_rnd_diff_r;\n"
540
                        "\t\t\tend else begin\n"
541
                        "\t\t\t\t// on Odd, W = e^{j2pi 1/4} = j\n"
542
                        "\t\t\t\tob_b_r <= n_rnd_diff_i;\n"
543
                        "\t\t\t\tob_b_i <=   rnd_diff_r;\n"
544
                        "\t\t\tend\n"
545
                "\t\tend\n\n");
546
        fprintf(fp,
547
        "\talways\t@(posedge i_clk)\n"
548 2 dgisselq
                "\t\tif (i_ce)\n"
549 36 dgisselq
                "\t\tbegin // In sequence, clock = 3\n"
550
                        "\t\t\tomem[0] <= ob_b;\n"
551
                        "\t\t\tomem[1] <= omem[0];\n"
552
                        "\t\t\tif (pipeline[2])\n"
553
                                "\t\t\t\to_data <= ob_a;\n"
554
                        "\t\t\telse\n"
555
                                "\t\t\t\to_data <= omem[1];\n"
556
                "\t\tend\n\n");
557 2 dgisselq
 
558 36 dgisselq
        fprintf(fp,
559
        "\tinitial\to_sync = 1\'b0;\n");
560 2 dgisselq
 
561 36 dgisselq
        if (async_reset)
562
                fprintf(fp,
563
        "\talways\t@(posedge i_clk, negedge i_areset_n)\n"
564
                "\t\tif (!i_areset_n)\n");
565
        else
566
                fprintf(fp,
567
        "\talways\t@(posedge i_clk)\n"
568
                "\t\tif (i_reset)\n");
569 29 dgisselq
        fprintf(fp,
570 36 dgisselq
                "\t\t\to_sync <= 1\'b0;\n"
571
                "\t\telse if (i_ce)\n"
572
                        "\t\t\to_sync <= (iaddr[2:0] == 3'b101);\n\n");
573 29 dgisselq
 
574 36 dgisselq
        if (formal_property_flag) {
575
                fprintf(fp,
576
"`ifdef FORMAL\n"
577
        "\treg  f_past_valid;\n"
578
        "\tinitial      f_past_valid = 1'b0;\n"
579
        "\talways @(posedge i_clk)\n"
580
        "\t     f_past_valid = 1'b1;\n"
581 29 dgisselq
"\n"
582 36 dgisselq
"`ifdef QTRSTAGE\n"
583
        "\talways @(posedge i_clk)\n"
584
        "\t     assume((i_ce)||($past(i_ce))||($past(i_ce,2)));\n"
585
"`endif\n"
586 29 dgisselq
"\n"
587 36 dgisselq
        "\t// The below logic only works if the rounding stage does nothing\n"
588
        "\tinitial      assert(IWIDTH+1 == OWIDTH);\n"
589 29 dgisselq
"\n"
590 36 dgisselq
        "\treg  signed [IWIDTH-1:0]     f_piped_real    [0:7];\n"
591
        "\treg  signed [IWIDTH-1:0]     f_piped_imag    [0:7];\n"
592 29 dgisselq
"\n"
593 36 dgisselq
        "\talways @(posedge i_clk)\n"
594
        "\tif (i_ce)\n"
595
        "\tbegin\n"
596
        "\t     f_piped_real[0] <= i_data[2*IWIDTH-1:IWIDTH];\n"
597
        "\t     f_piped_imag[0] <= i_data[  IWIDTH-1:0];\n"
598 29 dgisselq
"\n"
599 36 dgisselq
        "\t     f_piped_real[1] <= f_piped_real[0];\n"
600
        "\t     f_piped_imag[1] <= f_piped_imag[0];\n"
601 29 dgisselq
"\n"
602 36 dgisselq
        "\t     f_piped_real[2] <= f_piped_real[1];\n"
603
        "\t     f_piped_imag[2] <= f_piped_imag[1];\n"
604 29 dgisselq
"\n"
605 36 dgisselq
        "\t     f_piped_real[3] <= f_piped_real[2];\n"
606
        "\t     f_piped_imag[3] <= f_piped_imag[2];\n"
607 29 dgisselq
"\n"
608 36 dgisselq
        "\t     f_piped_real[4] <= f_piped_real[3];\n"
609
        "\t     f_piped_imag[4] <= f_piped_imag[3];\n"
610 29 dgisselq
"\n"
611 36 dgisselq
        "\t     f_piped_real[5] <= f_piped_real[4];\n"
612
        "\t     f_piped_imag[5] <= f_piped_imag[4];\n"
613 29 dgisselq
"\n"
614 36 dgisselq
        "\t     f_piped_real[6] <= f_piped_real[5];\n"
615
        "\t     f_piped_imag[6] <= f_piped_imag[5];\n"
616 29 dgisselq
"\n"
617 36 dgisselq
        "\t     f_piped_real[7] <= f_piped_real[6];\n"
618
        "\t     f_piped_imag[7] <= f_piped_imag[6];\n"
619
        "\tend\n"
620 29 dgisselq
"\n"
621 36 dgisselq
        "\treg  f_rsyncd;\n"
622
        "\twire f_syncd;\n"
623 29 dgisselq
"\n"
624 36 dgisselq
        "\tinitial      f_rsyncd = 0;\n"
625 29 dgisselq
        "\talways @(posedge i_clk)\n"
626 36 dgisselq
        "\tif(i_reset)\n"
627
        "\t     f_rsyncd <= 1'b0;\n"
628
        "\telse if (!f_rsyncd)\n"
629
        "\t     f_rsyncd <= (o_sync);\n"
630
        "\tassign       f_syncd = (f_rsyncd)||(o_sync);\n"
631 29 dgisselq
"\n"
632 36 dgisselq
        "\treg  [1:0]   f_state;\n"
633 35 dgisselq
"\n"
634 2 dgisselq
"\n"
635 36 dgisselq
        "\tinitial      f_state = 0;\n"
636
        "\talways @(posedge i_clk)\n"
637
        "\tif (i_reset)\n"
638
        "\t     f_state <= 0;\n"
639
        "\telse if ((i_ce)&&((!wait_for_sync)||(i_sync)))\n"
640
        "\t     f_state <= f_state + 1;\n"
641 2 dgisselq
"\n"
642 36 dgisselq
        "\talways @(*)\n"
643
        "\tif (f_state != 0)\n"
644
        "\t     assume(!i_sync);\n"
645 2 dgisselq
"\n"
646 5 dgisselq
        "\talways @(posedge i_clk)\n"
647 36 dgisselq
        "\t     assert(f_state[1:0] == iaddr[1:0]);\n"
648 2 dgisselq
"\n"
649 36 dgisselq
        "\twire signed [2*IWIDTH-1:0]   f_i_real, f_i_imag;\n"
650
        "\tassign                       f_i_real = i_data[2*IWIDTH-1:IWIDTH];\n"
651
        "\tassign                       f_i_imag = i_data[  IWIDTH-1:0];\n"
652 26 dgisselq
"\n"
653 36 dgisselq
        "\twire signed [OWIDTH-1:0]     f_o_real, f_o_imag;\n"
654
        "\tassign                       f_o_real = o_data[2*OWIDTH-1:OWIDTH];\n"
655
        "\tassign                       f_o_imag = o_data[  OWIDTH-1:0];\n"
656
"\n"
657 26 dgisselq
        "\talways @(posedge i_clk)\n"
658 36 dgisselq
        "\tif (f_state == 2'b11)\n"
659
        "\tbegin\n"
660
        "\t     assume(f_piped_real[0] != 3'sb100);\n"
661
        "\t     assume(f_piped_real[2] != 3'sb100);\n"
662
        "\t     assert(sum_r  == f_piped_real[2] + f_piped_real[0]);\n"
663
        "\t     assert(sum_i  == f_piped_imag[2] + f_piped_imag[0]);\n"
664 26 dgisselq
"\n"
665 36 dgisselq
        "\t     assert(diff_r == f_piped_real[2] - f_piped_real[0]);\n"
666
        "\t     assert(diff_i == f_piped_imag[2] - f_piped_imag[0]);\n"
667
        "\tend\n"
668 26 dgisselq
"\n"
669 36 dgisselq
        "\talways @(posedge i_clk)\n"
670
        "\tif ((f_state == 2'b00)&&((f_syncd)||(iaddr >= 4)))\n"
671
        "\tbegin\n"
672
        "\t     assert(rnd_sum_r  == f_piped_real[3]+f_piped_real[1]);\n"
673
        "\t     assert(rnd_sum_i  == f_piped_imag[3]+f_piped_imag[1]);\n"
674
        "\t     assert(rnd_diff_r == f_piped_real[3]-f_piped_real[1]);\n"
675
        "\t     assert(rnd_diff_i == f_piped_imag[3]-f_piped_imag[1]);\n"
676
        "\tend\n"
677 26 dgisselq
"\n"
678
        "\talways @(posedge i_clk)\n"
679 36 dgisselq
        "\tif ((f_state == 2'b10)&&(f_syncd))\n"
680
        "\tbegin\n"
681
        "\t     // assert(o_sync);\n"
682
        "\t     assert(f_o_real == f_piped_real[5] + f_piped_real[3]);\n"
683
        "\t     assert(f_o_imag == f_piped_imag[5] + f_piped_imag[3]);\n"
684
        "\tend\n"
685
"\n"
686 26 dgisselq
        "\talways @(posedge i_clk)\n"
687 36 dgisselq
        "\tif ((f_state == 2'b11)&&(f_syncd))\n"
688
        "\tbegin\n"
689
        "\t     assert(!o_sync);\n"
690
        "\t     assert(f_o_real == f_piped_real[5] + f_piped_real[3]);\n"
691
        "\t     assert(f_o_imag == f_piped_imag[5] + f_piped_imag[3]);\n"
692
        "\tend\n"
693 26 dgisselq
"\n"
694
        "\talways @(posedge i_clk)\n"
695 36 dgisselq
        "\tif ((f_state == 2'b00)&&(f_syncd))\n"
696
        "\tbegin\n"
697
        "\t     assert(!o_sync);\n"
698
        "\t     assert(f_o_real == f_piped_real[7] - f_piped_real[5]);\n"
699
        "\t     assert(f_o_imag == f_piped_imag[7] - f_piped_imag[5]);\n"
700
        "\tend\n"
701 26 dgisselq
"\n"
702 36 dgisselq
        "\talways @(*)\n"
703
        "\tif ((iaddr[2:0] == 0)&&(!wait_for_sync))\n"
704
        "\t     assume(i_sync);\n"
705 26 dgisselq
"\n"
706 36 dgisselq
        "\talways @(*)\n"
707
        "\tif (wait_for_sync)\n"
708
        "\t     assert((iaddr == 0)&&(f_state == 2'b00)&&(!o_sync)&&(!f_rsyncd));\n"
709 2 dgisselq
"\n"
710
        "\talways @(posedge i_clk)\n"
711 36 dgisselq
        "\tif ((f_past_valid)&&($past(i_ce))&&($past(i_sync))&&(!$past(i_reset)))\n"
712
        "\t     assert(!wait_for_sync);\n"
713 14 dgisselq
"\n"
714 26 dgisselq
        "\talways @(posedge i_clk)\n"
715 36 dgisselq
        "\tif ((f_state == 2'b01)&&(f_syncd))\n"
716 2 dgisselq
        "\tbegin\n"
717 36 dgisselq
        "\t     assert(!o_sync);\n"
718
        "\t     if (INVERSE)\n"
719
        "\t     begin\n"
720
        "\t             assert(f_o_real == -f_piped_imag[7]+f_piped_imag[5]);\n"
721
        "\t             assert(f_o_imag ==  f_piped_real[7]-f_piped_real[5]);\n"
722
        "\t     end else begin\n"
723
        "\t             assert(f_o_real ==  f_piped_imag[7]-f_piped_imag[5]);\n"
724
        "\t             assert(f_o_imag == -f_piped_real[7]+f_piped_real[5]);\n"
725
        "\t     end\n"
726 2 dgisselq
        "\tend\n"
727
"\n"
728 36 dgisselq
"`endif\n");
729
        }
730 23 dgisselq
 
731 36 dgisselq
        fprintf(fp, "endmodule\n");
732
}
733 26 dgisselq
 
734 24 dgisselq
 
735 36 dgisselq
void    build_sngllast(const char *fname, const bool async_reset = false) {
736 22 dgisselq
        FILE    *fp = fopen(fname, "w");
737
        if (NULL == fp) {
738
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
739
                perror("O/S Err was:");
740
                return;
741
        }
742
 
743 36 dgisselq
        std::string     resetw("i_reset");
744
        if (async_reset)
745
                resetw = std::string("i_areset_n");
746 23 dgisselq
 
747 22 dgisselq
        fprintf(fp,
748 36 dgisselq
SLASHLINE
749 22 dgisselq
"//\n"
750 36 dgisselq
"// Filename:\tlaststage.v\n"
751 22 dgisselq
"//\n"
752
"// Project:    %s\n"
753
"//\n"
754 36 dgisselq
"// Purpose:    This is part of an FPGA implementation that will process\n"
755
"//             the final stage of a decimate-in-frequency FFT, running\n"
756
"//     through the data at one sample per clock.\n"
757 22 dgisselq
"//\n"
758
"//\n%s"
759
"//\n", prjname, creator);
760
        fprintf(fp, "%s", cpyleft);
761 35 dgisselq
        fprintf(fp, "//\n//\n`default_nettype\tnone\n//\n");
762 36 dgisselq
 
763 22 dgisselq
        fprintf(fp,
764 36 dgisselq
"module laststage(i_clk, %s, i_ce, i_sync, i_val, o_val, o_sync);\n"
765
"       parameter       IWIDTH=16,OWIDTH=IWIDTH+1, SHIFT=0;\n"
766
"       input                                   i_clk, %s, i_ce, i_sync;\n"
767
"       input           [(2*IWIDTH-1):0]        i_val;\n"
768
"       output  wire    [(2*OWIDTH-1):0]        o_val;\n"
769
"       output  reg                             o_sync;\n\n",
770
                resetw.c_str(), resetw.c_str());
771
 
772 22 dgisselq
        fprintf(fp,
773 36 dgisselq
"       reg     signed  [(IWIDTH-1):0]  m_r, m_i;\n"
774
"       wire    signed  [(IWIDTH-1):0]  i_r, i_i;\n"
775 22 dgisselq
"\n"
776 36 dgisselq
"       assign  i_r = i_val[(2*IWIDTH-1):(IWIDTH)]; \n"
777
"       assign  i_i = i_val[(IWIDTH-1):0]; \n"
778 22 dgisselq
"\n"
779 36 dgisselq
"       // Don't forget that we accumulate a bit by adding two values\n"
780
"       // together. Therefore our intermediate value must have one more\n"
781
"       // bit than the two originals.\n"
782
"       reg     signed  [(IWIDTH):0]    rnd_r, rnd_i, sto_r, sto_i;\n"
783
"       reg                             wait_for_sync, stage;\n"
784
"       reg             [1:0]           sync_pipe;\n"
785 22 dgisselq
"\n"
786 36 dgisselq
"       initial wait_for_sync = 1'b1;\n"
787
"       initial stage         = 1'b0;\n");
788 22 dgisselq
 
789 36 dgisselq
        if (async_reset)
790
                fprintf(fp, "\talways @(posedge i_clk, negedge i_areset_n)\n\t\tif (!i_areset_n)\n");
791
        else
792
                fprintf(fp, "\talways @(posedge i_clk)\n\t\tif (i_reset)\n");
793 33 dgisselq
        fprintf(fp,
794 36 dgisselq
"               begin\n"
795
"                       wait_for_sync <= 1'b1;\n"
796
"                       stage         <= 1'b0;\n"
797
"               end else if ((i_ce)&&((!wait_for_sync)||(i_sync))&&(!stage))\n"
798
"               begin\n"
799
"                       wait_for_sync <= 1'b0;\n"
800
"                       //\n"
801
"                       stage <= 1'b1;\n"
802
"                       //\n"
803
"               end else if (i_ce)\n"
804
"                       stage <= 1'b0;\n\n");
805 22 dgisselq
 
806 36 dgisselq
        fprintf(fp, "\tinitial\tsync_pipe = 0;\n");
807
        if (async_reset)
808
                fprintf(fp,
809
                "\talways @(posedge i_clk, negedge i_areset_n)\n"
810
                "\tif (!i_areset_n)\n");
811
        else
812
                fprintf(fp,
813
                "\talways @(posedge i_clk)\n"
814
                "\tif (i_reset)\n");
815
 
816 22 dgisselq
        fprintf(fp,
817 36 dgisselq
                "\t\tsync_pipe <= 0;\n"
818
                "\telse if (i_ce)\n"
819
                "\t\tsync_pipe <= { sync_pipe[0], i_sync };\n\n");
820 23 dgisselq
 
821 36 dgisselq
        fprintf(fp, "\tinitial\to_sync = 1\'b0;\n");
822
        if (async_reset)
823
                fprintf(fp,
824
                "\talways @(posedge i_clk, negedge i_areset_n)\n"
825
                "\tif (!i_areset_n)\n");
826
        else
827
                fprintf(fp,
828
                "\talways @(posedge i_clk)\n"
829
                "\tif (i_reset)\n");
830
 
831 23 dgisselq
        fprintf(fp,
832 36 dgisselq
                "\t\to_sync <= 1\'b0;\n"
833
                "\telse if (i_ce)\n"
834
                "\t\to_sync <= sync_pipe[1];\n\n");
835 22 dgisselq
 
836
        fprintf(fp,
837 36 dgisselq
"       always @(posedge i_clk)\n"
838
"       if (i_ce)\n"
839
"       begin\n"
840
"               if (!stage)\n"
841
"               begin\n"
842
"                       // Clock 1\n"
843
"                       m_r <= i_r;\n"
844
"                       m_i <= i_i;\n"
845
"                       // Clock 3\n"
846
"                       rnd_r <= sto_r;\n"
847
"                       rnd_i <= sto_i;\n"
848
"                       //\n"
849
"               end else begin\n"
850
"                       // Clock 2\n"
851
"                       rnd_r <= m_r + i_r;\n"
852
"                       rnd_i <= m_i + i_i;\n"
853
"                       //\n"
854
"                       sto_r <= m_r - i_r;\n"
855
"                       sto_i <= m_i - i_i;\n"
856
"                       //\n"
857
"               end\n"
858
"       end\n"
859 22 dgisselq
"\n"
860 36 dgisselq
"       // Now that we have our results, let's round them and report them\n"
861
"       wire    signed  [(OWIDTH-1):0]  o_r, o_i;\n"
862 2 dgisselq
"\n"
863 36 dgisselq
"       convround #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_r(i_clk, i_ce, rnd_r, o_r);\n"
864
"       convround #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_i(i_clk, i_ce, rnd_i, o_i);\n"
865 2 dgisselq
"\n"
866 36 dgisselq
"       assign  o_val  = { o_r, o_i };\n"
867
"\n");
868 2 dgisselq
 
869 14 dgisselq
 
870 36 dgisselq
        if (formal_property_flag) {
871
                fprintf(fp,
872
        "`ifdef FORMAL\n"
873
                "\treg  f_past_valid;\n"
874
                "\tinitial      f_past_valid = 1'b0;\n"
875
                "\talways @(posedge i_clk)\n"
876
                "\t     f_past_valid <= 1'b1;\n"
877
        "\n"
878
        "`ifdef LASTSTAGE\n"
879
                "\talways @(posedge i_clk)\n"
880
                "\t     assume((i_ce)||($past(i_ce))||($past(i_ce,2)));\n"
881
        "`endif\n"
882
        "\n"
883
                "\tinitial      assert(IWIDTH+1 == OWIDTH);\n"
884
        "\n"
885
                "\treg  signed  [IWIDTH-1:0]    f_piped_real    [0:3];\n"
886
                "\treg  signed  [IWIDTH-1:0]    f_piped_imag    [0:3];\n"
887
                "\talways @(posedge i_clk)\n"
888
                "\tif (i_ce)\n"
889
                "\tbegin\n"
890
                "\t     f_piped_real[0] <= i_val[2*IWIDTH-1:IWIDTH];\n"
891
                "\t     f_piped_imag[0] <= i_val[  IWIDTH-1:0];\n"
892
        "\n"
893
                "\t     f_piped_real[1] <= f_piped_real[0];\n"
894
                "\t     f_piped_imag[1] <= f_piped_imag[0];\n"
895
        "\n"
896
                "\t     f_piped_real[2] <= f_piped_real[1];\n"
897
                "\t     f_piped_imag[2] <= f_piped_imag[1];\n"
898
        "\n"
899
                "\t     f_piped_real[3] <= f_piped_real[2];\n"
900
                "\t     f_piped_imag[3] <= f_piped_imag[2];\n"
901
                "\tend\n"
902
        "\n"
903
                "\twire f_syncd;\n"
904
                "\treg  f_rsyncd;\n"
905
        "\n"
906
                "\tinitial      f_rsyncd        = 0;\n"
907
                "\talways @(posedge i_clk)\n"
908
                "\tif (i_reset)\n"
909
                "\t     f_rsyncd <= 1'b0;\n"
910
                "\telse if (!f_rsyncd)\n"
911
                "\t     f_rsyncd <= o_sync;\n"
912
                "\tassign       f_syncd = (f_rsyncd)||(o_sync);\n"
913
        "\n"
914
                "\treg  f_state;\n"
915
                "\tinitial      f_state = 0;\n"
916
                "\talways @(posedge i_clk)\n"
917
                "\tif (i_reset)\n"
918
                "\t     f_state <= 0;\n"
919
                "\telse if ((i_ce)&&((!wait_for_sync)||(i_sync)))\n"
920
                "\t     f_state <= f_state + 1;\n"
921
        "\n"
922
                "\talways @(*)\n"
923
                "\tif (f_state != 0)\n"
924
                "\t     assume(!i_sync);\n"
925
        "\n"
926
                "\talways @(*)\n"
927
                "\t     assert(stage == f_state[0]);\n"
928
        "\n"
929
                "\talways @(posedge i_clk)\n"
930
                "\tif ((f_state == 1'b1)&&(f_syncd))\n"
931
                "\tbegin\n"
932
                "\t     assert(o_r == f_piped_real[2] + f_piped_real[1]);\n"
933
                "\t     assert(o_i == f_piped_imag[2] + f_piped_imag[1]);\n"
934
                "\tend\n"
935
        "\n"
936
                "\talways @(posedge i_clk)\n"
937
                "\tif ((f_state == 1'b0)&&(f_syncd))\n"
938
                "\tbegin\n"
939
                "\t     assert(!o_sync);\n"
940
                "\t     assert(o_r == f_piped_real[3] - f_piped_real[2]);\n"
941
                "\t     assert(o_i == f_piped_imag[3] - f_piped_imag[2]);\n"
942
                "\tend\n"
943
        "\n"
944
                "\talways @(*)\n"
945
                "\tif (wait_for_sync)\n"
946
                "\tbegin\n"
947
                "\t     assert(!f_rsyncd);\n"
948
                "\t     assert(!o_sync);\n"
949
                "\t     assert(f_state == 0);\n"
950
                "\tend\n\n");
951 2 dgisselq
        }
952
 
953 36 dgisselq
        fprintf(fp,
954
"`endif // FORMAL\n"
955
"endmodule\n");
956 23 dgisselq
 
957 36 dgisselq
        fclose(fp);
958 2 dgisselq
}
959
 
960
void    usage(void) {
961
        fprintf(stderr,
962 26 dgisselq
"USAGE:\tfftgen [-f <size>] [-d dir] [-c cbits] [-n nbits] [-m mxbits] [-s]\n"
963 2 dgisselq
// "\tfftgen -i\n"
964 36 dgisselq
"\t-1\tBuild a normal FFT, running at one clock per complex sample, or\n"
965
"\t\t(for a real FFT) at one clock per two real input samples.\n"
966
"\t-a <hdrname>  Create a header of information describing the built-in\n"
967
"\t\tparameters, useful for module-level testing with Verilator\n"
968 2 dgisselq
"\t-c <cbits>\tCauses all internal complex coefficients to be\n"
969
"\t\tlonger than the corresponding data bits, to help avoid\n"
970 32 dgisselq
"\t\tcoefficient truncation errors.  The default is %d bits longer\n"
971 26 dgisselq
"\t\tthan the data bits.\n"
972 36 dgisselq
"\t-d <dir>  Places all of the generated verilog files into <dir>.\n"
973
"\t\tThe default is a subdirectory of the current directory\n"
974
"\t\tnamed %s.\n"
975
"\t-f <size>  Sets the size of the FFT as the number of complex\n"
976 26 dgisselq
"\t\tsamples input to the transform.  (No default value, this is\n"
977
"\t\ta required parameter.)\n"
978
"\t-i\tAn inverse FFT, meaning that the coefficients are\n"
979
"\t\tgiven by e^{ j 2 pi k/N n }.  The default is a forward FFT, with\n"
980
"\t\tcoefficients given by e^{ -j 2 pi k/N n }.\n"
981 36 dgisselq
"\t-k #\tSets # clocks per sample, used to minimize multiplies.  Also\n"
982
"\t\tsets one sample in per i_ce clock (opt -1)\n"
983 2 dgisselq
"\t-m <mxbits>\tSets the maximum bit width that the FFT should ever\n"
984
"\t\tproduce.  Internal values greater than this value will be\n"
985 26 dgisselq
"\t\ttruncated to this value.  (The default value grows the input\n"
986
"\t\tsize by one bit for every two FFT stages.)\n"
987 22 dgisselq
"\t-n <nbits>\tSets the bitwidth for values coming into the (i)FFT.\n"
988 26 dgisselq
"\t\tThe default is %d bits input for each component of the two\n"
989
"\t\tcomplex values into the FFT.\n"
990 36 dgisselq
"\t-p <nmpy>  Sets the number of hardware multiplies (DSPs) to use, versus\n"
991
"\t\tshift-add emulation.  The default is not to use any hardware\n"
992
"\t\tmultipliers.\n"
993 26 dgisselq
"\t-r\tBuild a real-FFT at four input points per sample, rather than a\n"
994
"\t\tcomplex FFT.  (Default is a Complex FFT.)\n"
995 2 dgisselq
"\t-s\tSkip the final bit reversal stage.  This is useful in\n"
996
"\t\talgorithms that need to apply a filter without needing to do\n"
997
"\t\tbin shifting, as these algorithms can, with this option, just\n"
998
"\t\tmultiply by a bit reversed correlation sequence and then\n"
999 22 dgisselq
"\t\tinverse FFT the (still bit reversed) result.  (You would need\n"
1000
"\t\ta decimation in time inverse to do this, which this program does\n"
1001
"\t\tnot yet provide.)\n"
1002 2 dgisselq
"\t-S\tInclude the final bit reversal stage (default).\n"
1003 22 dgisselq
"\t-x <xtrabits>\tUse this many extra bits internally, before any final\n"
1004 36 dgisselq
"\t\trounding or truncation of the answer to the final number of\n"
1005
"\t\tbits.  The default is to use %d extra bits internally.\n",
1006 26 dgisselq
/*
1007 2 dgisselq
"\t-0\tA forward FFT (default), meaning that the coefficients are\n"
1008
"\t\tgiven by e^{-j 2 pi k/N n }.\n"
1009
"\t-1\tAn inverse FFT, meaning that the coefficients are\n"
1010 26 dgisselq
"\t\tgiven by e^{ j 2 pi k/N n }.\n",
1011
*/
1012
        DEF_XTRACBITS, DEF_COREDIR, DEF_NBITSIN, DEF_XTRAPBITS);
1013 2 dgisselq
}
1014
 
1015
// Features still needed:
1016
//      Interactivity.
1017
int main(int argc, char **argv) {
1018
        int     fftsize = -1, lgsize = -1;
1019 26 dgisselq
        int     nbitsin = DEF_NBITSIN, xtracbits = DEF_XTRACBITS,
1020 36 dgisselq
                        nummpy=DEF_NMPY, nmpypstage=6, mpy_stages;
1021
        int     nbitsout, maxbitsout = -1, xtrapbits=DEF_XTRAPBITS, ckpce = 0;
1022
        const char *EMPTYSTR = "";
1023 26 dgisselq
        bool    bitreverse = true, inverse=false,
1024 36 dgisselq
                verbose_flag = false,
1025
                single_clock = false,
1026
                real_fft = false,
1027
                async_reset = false;
1028 2 dgisselq
        FILE    *vmain;
1029 28 dgisselq
        std::string     coredir = DEF_COREDIR, cmdline = "", hdrname = "";
1030 23 dgisselq
        ROUND_T rounding = RND_CONVERGENT;
1031
        // ROUND_T      rounding = RND_HALFUP;
1032 2 dgisselq
 
1033 26 dgisselq
        bool    dbg = false;
1034
        int     dbgstage = 128;
1035
 
1036 2 dgisselq
        if (argc <= 1)
1037
                usage();
1038
 
1039 36 dgisselq
        // Copy the original command line before we mess with it
1040 14 dgisselq
        cmdline = argv[0];
1041 2 dgisselq
        for(int argn=1; argn<argc; argn++) {
1042 14 dgisselq
                cmdline += " ";
1043
                cmdline += argv[argn];
1044
        }
1045
 
1046 36 dgisselq
        { int c;
1047
        while((c = getopt(argc, argv, "12Aa:c:d:D:f:hik:m:n:p:rsSx:v")) != -1) {
1048
                switch(c) {
1049
                case '1':       single_clock = true;  break;
1050
                case '2':       single_clock = false; break;
1051
                case 'A':       async_reset  = true;  break;
1052
                case 'a':       hdrname = strdup(optarg);       break;
1053
                case 'c':       xtracbits = atoi(optarg);       break;
1054
                case 'd':       coredir = std::string(optarg);  break;
1055
                case 'D':       dbgstage = atoi(optarg);        break;
1056
                case 'f':       fftsize = atoi(optarg);
1057
                                { int sln = strlen(optarg);
1058
                                if (!isdigit(optarg[sln-1])){
1059
                                        switch(optarg[sln-1]) {
1060
                                        case 'k': case 'K':
1061
                                                fftsize <<= 10;
1062 2 dgisselq
                                                break;
1063 36 dgisselq
                                        case 'm': case 'M':
1064
                                                fftsize <<= 20;
1065 2 dgisselq
                                                break;
1066 36 dgisselq
                                        case 'g': case 'G':
1067
                                                fftsize <<= 30;
1068 28 dgisselq
                                                break;
1069 33 dgisselq
                                        default:
1070 36 dgisselq
                                                printf("ERR: Unknown FFT size, %s!\n", optarg);
1071
                                                exit(EXIT_FAILURE);
1072
                                        }
1073
                                }} break;
1074
                case 'h':       usage(); exit(EXIT_SUCCESS);    break;
1075
                case 'i':       inverse = true;                 break;
1076
                case 'k':       ckpce = atoi(optarg);
1077
                                single_clock = true;
1078
                                break;
1079
                case 'm':       maxbitsout = atoi(optarg);      break;
1080
                case 'n':       nbitsin = atoi(optarg);         break;
1081
                case 'p':       nummpy = atoi(optarg);          break;
1082
                case 'r':       real_fft = true;                break;
1083
                case 'S':       bitreverse = true;              break;
1084
                case 's':       bitreverse = false;             break;
1085
                case 'x':       xtrapbits = atoi(optarg);       break;
1086
                case 'v':       verbose_flag = true;            break;
1087
                // case 'z':    variable_size = true;           break;
1088
                default:
1089
                        printf("Unknown argument, -%c\n", c);
1090 2 dgisselq
                        usage();
1091 36 dgisselq
                        exit(EXIT_FAILURE);
1092 2 dgisselq
                }
1093 36 dgisselq
        }}
1094
 
1095
        if (verbose_flag) {
1096
                if (inverse)
1097
                        printf("Building a %d point inverse FFT module, with %s outputs\n",
1098
                                fftsize,
1099
                                (real_fft)?"real ":"complex");
1100
                else
1101
                        printf("Building a %d point %sforward FFT module\n",
1102
                                fftsize,
1103
                                (real_fft)?"real ":"");
1104
                if (!single_clock)
1105
                        printf("  that accepts two inputs per clock\n");
1106
                if (async_reset)
1107
                        printf("  using a negative logic ASYNC reset\n");
1108
 
1109
                printf("The core will be placed into the %s/ directory\n", coredir.c_str());
1110
 
1111
                if (hdrname[0])
1112
                        printf("A C header file, %s, will be written capturing these\n"
1113
                                "options for a Verilator testbench\n",
1114
                                        hdrname.c_str());
1115
                // nummpy
1116
                // xtrapbits
1117 2 dgisselq
        }
1118
 
1119 26 dgisselq
        if (real_fft) {
1120
                printf("The real FFT option is not implemented yet, but still on\nmy to do list.  Please try again later.\n");
1121 36 dgisselq
                exit(EXIT_FAILURE);
1122
        }
1123
 
1124
        if (ckpce < 1)
1125
                ckpce = 1;
1126
        if (!bitreverse) {
1127 26 dgisselq
                printf("WARNING: While I can skip the bit reverse stage, the code to do\n");
1128
                printf("an inverse FFT on a bit--reversed input has not yet been\n");
1129
                printf("built.\n");
1130
        }
1131
 
1132 2 dgisselq
        if ((lgsize < 0)&&(fftsize > 1)) {
1133
                for(lgsize=1; (1<<lgsize) < fftsize; lgsize++)
1134
                        ;
1135
        }
1136
 
1137
        if ((fftsize <= 0)||(nbitsin < 1)||(nbitsin>48)) {
1138
                printf("INVALID PARAMETERS!!!!\n");
1139 36 dgisselq
                exit(EXIT_FAILURE);
1140 2 dgisselq
        }
1141
 
1142
 
1143
        if (nextlg(fftsize) != fftsize) {
1144
                fprintf(stderr, "ERR: FFTSize (%d) *must* be a power of two\n",
1145
                                fftsize);
1146 36 dgisselq
                exit(EXIT_FAILURE);
1147 2 dgisselq
        } else if (fftsize < 2) {
1148
                fprintf(stderr, "ERR: Minimum FFTSize is 2, not %d\n",
1149
                                fftsize);
1150
                if (fftsize == 1) {
1151
                        fprintf(stderr, "You do realize that a 1 point FFT makes very little sense\n");
1152
                        fprintf(stderr, "in an FFT operation that handles two samples per clock?\n");
1153
                        fprintf(stderr, "If you really need to do an FFT of this size, the output\n");
1154
                        fprintf(stderr, "can be connected straight to the input.\n");
1155
                } else {
1156
                        fprintf(stderr, "Indeed, a size of %d doesn\'t make much sense to me at all.\n", fftsize);
1157
                        fprintf(stderr, "Is such an operation even defined?\n");
1158
                }
1159 36 dgisselq
                exit(EXIT_FAILURE);
1160 2 dgisselq
        }
1161
 
1162
        // Calculate how many output bits we'll have, and what the log
1163
        // based two size of our FFT is.
1164
        {
1165
                int     tmp_size = fftsize;
1166
 
1167
                // The first stage always accumulates one bit, regardless
1168
                // of whether you need to or not.
1169
                nbitsout = nbitsin + 1;
1170
                tmp_size >>= 1;
1171
 
1172
                while(tmp_size > 4) {
1173
                        nbitsout += 1;
1174
                        tmp_size >>= 2;
1175
                }
1176
 
1177
                if (tmp_size > 1)
1178
                        nbitsout ++;
1179
 
1180
                if (fftsize <= 2)
1181
                        bitreverse = false;
1182
        } if ((maxbitsout > 0)&&(nbitsout > maxbitsout))
1183
                nbitsout = maxbitsout;
1184
 
1185 36 dgisselq
        if (verbose_flag) {
1186
                printf("Output samples will be %d bits wide\n", nbitsout);
1187
                printf("This %sFFT will take %d-bit samples in, and produce %d samples out\n", (inverse)?"i":"", nbitsin, nbitsout);
1188
                if (maxbitsout > 0)
1189
                        printf("  Internally, it will allow items to accumulate to %d bits\n", maxbitsout);
1190
                printf("  Twiddle-factors of %d bits will be used\n",
1191
                        nbitsin+xtracbits);
1192
                if (!bitreverse)
1193
                printf("  The output will be left in bit-reversed order\n");
1194
        }
1195
 
1196 22 dgisselq
        // Figure out how many multiply stages to use, and how many to skip
1197 36 dgisselq
        if (!single_clock) {
1198
                nmpypstage = 6;
1199
        } else if (ckpce <= 1) {
1200
                nmpypstage = 3;
1201
        } else if (ckpce == 2) {
1202
                nmpypstage = 2;
1203
        } else
1204
                nmpypstage = 1;
1205 2 dgisselq
 
1206 36 dgisselq
        mpy_stages = nummpy / nmpypstage;
1207
        if (mpy_stages > lgval(fftsize)-2)
1208
                mpy_stages = lgval(fftsize)-2;
1209 22 dgisselq
 
1210 2 dgisselq
        {
1211
                struct stat     sbuf;
1212 14 dgisselq
                if (lstat(coredir.c_str(), &sbuf)==0) {
1213 2 dgisselq
                        if (!S_ISDIR(sbuf.st_mode)) {
1214 14 dgisselq
                                fprintf(stderr, "\'%s\' already exists, and is not a directory!\n", coredir.c_str());
1215 2 dgisselq
                                fprintf(stderr, "I will stop now, lest I overwrite something you care about.\n");
1216
                                fprintf(stderr, "To try again, please remove this file.\n");
1217 36 dgisselq
                                exit(EXIT_FAILURE);
1218 2 dgisselq
                        }
1219 33 dgisselq
                } else
1220 14 dgisselq
                        mkdir(coredir.c_str(), 0755);
1221
                if (access(coredir.c_str(), X_OK|W_OK) != 0) {
1222
                        fprintf(stderr, "I have no access to the directory \'%s\'.\n", coredir.c_str());
1223 36 dgisselq
                        exit(EXIT_FAILURE);
1224 2 dgisselq
                }
1225
        }
1226
 
1227 28 dgisselq
        if (hdrname.length() > 0) {
1228
                FILE    *hdr = fopen(hdrname.c_str(), "w");
1229
                if (hdr == NULL) {
1230
                        fprintf(stderr, "ERROR: Cannot open %s to create header file\n", hdrname.c_str());
1231
                        perror("O/S Err:");
1232 36 dgisselq
                        exit(EXIT_FAILURE);
1233 28 dgisselq
                }
1234
 
1235 36 dgisselq
                fprintf(hdr,
1236
SLASHLINE
1237
"//\n"
1238
"// Filename:\t%s\n"
1239
"//\n"
1240
"// Project:\t%s\n"
1241
"//\n"
1242
"// Purpose:    This simple header file captures the internal constants\n"
1243
"//             within the FFT that were used to build it, for the purpose\n"
1244
"//     of making C++ integration (and test bench testing) simpler.  That is,\n"
1245
"//     should the FFT change size, this will note that size change and thus\n"
1246
"//     any test bench or other C++ program dependent upon either the size of\n"
1247
"//     the FFT, the number of bits in or out of it, etc., can pick up the\n"
1248
"//     changes in the defines found within this file.\n"
1249
"//\n",
1250
                hdrname.c_str(), prjname);
1251 28 dgisselq
                fprintf(hdr, "%s", creator);
1252
                fprintf(hdr, "//\n");
1253
                fprintf(hdr, "%s", cpyleft);
1254
                fprintf(hdr, "//\n"
1255
                "//\n"
1256
                "#ifndef %sFFTHDR_H\n"
1257
                "#define %sFFTHDR_H\n"
1258
                "\n"
1259
                "#define\t%sFFT_IWIDTH\t%d\n"
1260
                "#define\t%sFFT_OWIDTH\t%d\n"
1261
                "#define\t%sFFT_LGWIDTH\t%d\n"
1262
                "#define\t%sFFT_SIZE\t(1<<%sFFT_LGWIDTH)\n\n",
1263
                        (inverse)?"I":"", (inverse)?"I":"",
1264
                        (inverse)?"I":"", nbitsin,
1265
                        (inverse)?"I":"", nbitsout,
1266
                        (inverse)?"I":"", lgsize,
1267
                        (inverse)?"I":"", (inverse)?"I":"");
1268 36 dgisselq
                if (ckpce > 0)
1269
                        fprintf(hdr, "#define\t%sFFT_CKPCE\t%d\t// Clocks per CE\n",
1270
                                (inverse)?"I":"", ckpce);
1271
                else
1272
                        fprintf(hdr, "// Two samples per i_ce\n");
1273 28 dgisselq
                if (!bitreverse)
1274
                        fprintf(hdr, "#define\t%sFFT_SKIPS_BIT_REVERSE\n",
1275
                                (inverse)?"I":"");
1276
                if (real_fft)
1277
                        fprintf(hdr, "#define\tRL%sFFT\n\n", (inverse)?"I":"");
1278
                if (!single_clock)
1279
                        fprintf(hdr, "#define\tDBLCLK%sFFT\n\n", (inverse)?"I":"");
1280 36 dgisselq
                else
1281
                        fprintf(hdr, "// #define\tDBLCLK%sFFT // this FFT takes one input sample per clock\n\n", (inverse)?"I":"");
1282 29 dgisselq
                if (USE_OLD_MULTIPLY)
1283
                        fprintf(hdr, "#define\tUSE_OLD_MULTIPLY\n\n");
1284 33 dgisselq
 
1285 29 dgisselq
                fprintf(hdr, "// Parameters for testing the longbimpy\n");
1286
                fprintf(hdr, "#define\tTST_LONGBIMPY_AW\t%d\n", TST_LONGBIMPY_AW);
1287
#ifdef  TST_LONGBIMPY_BW
1288
                fprintf(hdr, "#define\tTST_LONGBIMPY_BW\t%d\n\n", TST_LONGBIMPY_BW);
1289
#else
1290
                fprintf(hdr, "#define\tTST_LONGBIMPY_BW\tTST_LONGBIMPY_AW\n\n");
1291
#endif
1292
 
1293
                fprintf(hdr, "// Parameters for testing the shift add multiply\n");
1294
                fprintf(hdr, "#define\tTST_SHIFTADDMPY_AW\t%d\n", TST_SHIFTADDMPY_AW);
1295
#ifdef  TST_SHIFTADDMPY_BW
1296
                fprintf(hdr, "#define\tTST_SHIFTADDMPY_BW\t%d\n\n", TST_SHIFTADDMPY_BW);
1297
#else
1298
                fprintf(hdr, "#define\tTST_SHIFTADDMPY_BW\tTST_SHIFTADDMPY_AW\n\n");
1299
#endif
1300
 
1301
#define TST_SHIFTADDMPY_AW      16
1302
#define TST_SHIFTADDMPY_BW      20      // Leave undefined to match AW
1303
                fprintf(hdr, "// Parameters for testing the butterfly\n");
1304
                fprintf(hdr, "#define\tTST_BUTTERFLY_IWIDTH\t%d\n", TST_BUTTERFLY_IWIDTH);
1305
                fprintf(hdr, "#define\tTST_BUTTERFLY_CWIDTH\t%d\n", TST_BUTTERFLY_CWIDTH);
1306
                fprintf(hdr, "#define\tTST_BUTTERFLY_OWIDTH\t%d\n", TST_BUTTERFLY_OWIDTH);
1307
                fprintf(hdr, "#define\tTST_BUTTERFLY_MPYDELAY\t%d\n\n",
1308
                                bflydelay(TST_BUTTERFLY_IWIDTH,
1309
                                        TST_BUTTERFLY_CWIDTH-TST_BUTTERFLY_IWIDTH));
1310
 
1311
                fprintf(hdr, "// Parameters for testing the quarter stage\n");
1312
                fprintf(hdr, "#define\tTST_QTRSTAGE_IWIDTH\t%d\n", TST_QTRSTAGE_IWIDTH);
1313
                fprintf(hdr, "#define\tTST_QTRSTAGE_LGWIDTH\t%d\n\n", TST_QTRSTAGE_LGWIDTH);
1314
 
1315
                fprintf(hdr, "// Parameters for testing the double stage\n");
1316
                fprintf(hdr, "#define\tTST_DBLSTAGE_IWIDTH\t%d\n", TST_DBLSTAGE_IWIDTH);
1317
                fprintf(hdr, "#define\tTST_DBLSTAGE_SHIFT\t%d\n\n", TST_DBLSTAGE_SHIFT);
1318
 
1319
                fprintf(hdr, "// Parameters for testing the bit reversal stage\n");
1320
                fprintf(hdr, "#define\tTST_DBLREVERSE_LGSIZE\t%d\n\n", TST_DBLREVERSE_LGSIZE);
1321 28 dgisselq
                fprintf(hdr, "\n" "#endif\n\n");
1322
                fclose(hdr);
1323
        }
1324
 
1325 14 dgisselq
        {
1326
                std::string     fname_string;
1327
 
1328
                fname_string = coredir;
1329
                fname_string += "/";
1330
                if (inverse) fname_string += "i";
1331
                fname_string += "fftmain.v";
1332
 
1333
                vmain = fopen(fname_string.c_str(), "w");
1334
                if (NULL == vmain) {
1335
                        fprintf(stderr, "Could not open \'%s\' for writing\n", fname_string.c_str());
1336
                        perror("Err from O/S:");
1337 36 dgisselq
                        exit(EXIT_FAILURE);
1338 14 dgisselq
                }
1339 36 dgisselq
 
1340
                if (verbose_flag)
1341
                        printf("Opened %s\n", fname_string.c_str());
1342 2 dgisselq
        }
1343
 
1344 36 dgisselq
        fprintf(vmain,
1345
SLASHLINE
1346
"//\n"
1347
"// Filename:\t%sfftmain.v\n"
1348
"//\n"
1349
"// Project:    %s\n"
1350
"//\n"
1351
"// Purpose:    This is the main module in the General Purpose FPGA FFT\n"
1352
"//             implementation.  As such, all other modules are subordinate\n"
1353
"//     to this one.  This module accomplish a fixed size Complex FFT on\n"
1354
"//     %d data points.\n",
1355
                (inverse)?"i":"",prjname, fftsize);
1356
        if (single_clock) {
1357
        fprintf(vmain,
1358
"//     The FFT is fully pipelined, and accepts as inputs one complex two\'s\n"
1359
"//     complement sample per clock.\n");
1360
        } else {
1361
        fprintf(vmain,
1362
"//     The FFT is fully pipelined, and accepts as inputs two complex two\'s\n"
1363
"//     complement samples per clock.\n");
1364
        }
1365
 
1366
        fprintf(vmain,
1367
"//\n"
1368
"// Parameters:\n"
1369
"//     i_clk\tThe clock.  All operations are synchronous with this clock.\n"
1370
"//     i_%sreset%s\tSynchronous reset, active high.  Setting this line will\n"
1371
"//     \t\tforce the reset of all of the internals to this routine.\n"
1372
"//     \t\tFurther, following a reset, the o_sync line will go\n"
1373
"//     \t\thigh the same time the first output sample is valid.\n",
1374
                (async_reset)?"a":"", (async_reset)?"_n":"");
1375
        if (single_clock) {
1376
                fprintf(vmain,
1377
"//     i_ce\tA clock enable line.  If this line is set, this module\n"
1378
"//     \t\twill accept one complex input value, and produce\n"
1379
"//     \t\tone (possibly empty) complex output value.\n"
1380
"//     i_sample\tThe complex input sample.  This value is split\n"
1381
"//     \t\tinto two two\'s complement numbers, %d bits each, with\n"
1382
"//     \t\tthe real portion in the high order bits, and the\n"
1383
"//     \t\timaginary portion taking the bottom %d bits.\n"
1384
"//     o_result\tThe output result, of the same format as i_sample,\n"
1385
"//     \t\tonly having %d bits for each of the real and imaginary\n"
1386
"//     \t\tcomponents, leading to %d bits total.\n"
1387
"//     o_sync\tA one bit output indicating the first sample of the FFT frame.\n"
1388
"//     \t\tIt also indicates the first valid sample out of the FFT\n"
1389
"//     \t\ton the first frame.\n", nbitsin, nbitsin, nbitsout, nbitsout*2);
1390
        } else {
1391
                fprintf(vmain,
1392
"//     i_ce\tA clock enable line.  If this line is set, this module\n"
1393
"//     \t\twill accept two complex values as inputs, and produce\n"
1394
"//     \t\ttwo (possibly empty) complex values as outputs.\n"
1395
"//     i_left\tThe first of two complex input samples.  This value is split\n"
1396
"//     \t\tinto two two\'s complement numbers, %d bits each, with\n"
1397
"//     \t\tthe real portion in the high order bits, and the\n"
1398
"//     \t\timaginary portion taking the bottom %d bits.\n"
1399
"//     i_right\tThis is the same thing as i_left, only this is the second of\n"
1400
"//     \t\ttwo such samples.  Hence, i_left would contain input\n"
1401
"//     \t\tsample zero, i_right would contain sample one.  On the\n"
1402
"//     \t\tnext clock i_left would contain input sample two,\n"
1403
"//     \t\ti_right number three and so forth.\n"
1404
"//     o_left\tThe first of two output samples, of the same format as i_left,\n"
1405
"//     \t\tonly having %d bits for each of the real and imaginary\n"
1406
"//     \t\tcomponents, leading to %d bits total.\n"
1407
"//     o_right\tThe second of two output samples produced each clock.  This has\n"
1408
"//     \t\tthe same format as o_left.\n"
1409
"//     o_sync\tA one bit output indicating the first valid sample produced by\n"
1410
"//     \t\tthis FFT following a reset.  Ever after, this will\n"
1411
"//     \t\tindicate the first sample of an FFT frame.\n",
1412
        nbitsin, nbitsin, nbitsout, nbitsout*2);
1413
        }
1414
 
1415
        fprintf(vmain,
1416
"//\n"
1417
"// Arguments:\tThis file was computer generated using the following command\n"
1418
"//\t\tline:\n"
1419
"//\n");
1420 14 dgisselq
        fprintf(vmain, "//\t\t%% %s\n", cmdline.c_str());
1421
        fprintf(vmain, "//\n");
1422 2 dgisselq
        fprintf(vmain, "%s", creator);
1423
        fprintf(vmain, "//\n");
1424
        fprintf(vmain, "%s", cpyleft);
1425 35 dgisselq
        fprintf(vmain, "//\n//\n`default_nettype\tnone\n//\n");
1426 2 dgisselq
 
1427
 
1428 36 dgisselq
        std::string     resetw("i_reset");
1429
        if (async_reset)
1430
                resetw = "i_areset_n";
1431
 
1432 2 dgisselq
        fprintf(vmain, "//\n");
1433
        fprintf(vmain, "//\n");
1434 36 dgisselq
        fprintf(vmain, "module %sfftmain(i_clk, %s, i_ce,\n",
1435
                (inverse)?"i":"", resetw.c_str());
1436
        if (single_clock) {
1437
                fprintf(vmain, "\t\ti_sample, o_result, o_sync%s);\n",
1438 26 dgisselq
                        (dbg)?", o_dbg":"");
1439 36 dgisselq
        } else {
1440
                fprintf(vmain, "\t\ti_left, i_right,\n");
1441
                fprintf(vmain, "\t\to_left, o_right, o_sync%s);\n",
1442
                        (dbg)?", o_dbg":"");
1443
        }
1444
        fprintf(vmain, "\tparameter\tIWIDTH=%d, OWIDTH=%d, LGWIDTH=%d;\n\t//\n", nbitsin, nbitsout, lgsize);
1445 2 dgisselq
        assert(lgsize > 0);
1446 36 dgisselq
        fprintf(vmain, "\tinput\t\t\t\t\ti_clk, %s, i_ce;\n\t//\n",
1447
                resetw.c_str());
1448
        if (single_clock) {
1449
        fprintf(vmain, "\tinput\t\t[(2*IWIDTH-1):0]\ti_sample;\n");
1450
        fprintf(vmain, "\toutput\treg\t[(2*OWIDTH-1):0]\to_result;\n");
1451
        } else {
1452 2 dgisselq
        fprintf(vmain, "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n");
1453
        fprintf(vmain, "\toutput\treg\t[(2*OWIDTH-1):0]\to_left, o_right;\n");
1454 36 dgisselq
        }
1455
        fprintf(vmain, "\toutput\treg\t\t\t\to_sync;\n");
1456 26 dgisselq
        if (dbg)
1457
                fprintf(vmain, "\toutput\twire\t[33:0]\t\to_dbg;\n");
1458 2 dgisselq
        fprintf(vmain, "\n\n");
1459
 
1460
        fprintf(vmain, "\t// Outputs of the FFT, ready for bit reversal.\n");
1461 36 dgisselq
        if (single_clock)
1462
                fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_sample;\n");
1463
        else
1464
                fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_left, br_right;\n");
1465 2 dgisselq
        int     tmp_size = fftsize, lgtmp = lgsize;
1466
        if (fftsize == 2) {
1467
                if (bitreverse) {
1468
                        fprintf(vmain, "\treg\tbr_start;\n");
1469 25 dgisselq
                        fprintf(vmain, "\tinitial br_start = 1\'b0;\n");
1470 36 dgisselq
                        if (async_reset) {
1471
                                fprintf(vmain, "\talways @(posedge i_clk, negedge i_arese_n)\n");
1472
                                fprintf(vmain, "\t\tif (!i_areset_n)\n");
1473
                        } else {
1474
                                fprintf(vmain, "\talways @(posedge i_clk)\n");
1475
                                fprintf(vmain, "\t\tif (i_reset)\n");
1476
                        }
1477 26 dgisselq
                        fprintf(vmain, "\t\t\tbr_start <= 1\'b0;\n");
1478 2 dgisselq
                        fprintf(vmain, "\t\telse if (i_ce)\n");
1479 26 dgisselq
                        fprintf(vmain, "\t\t\tbr_start <= 1\'b1;\n");
1480 2 dgisselq
                }
1481
                fprintf(vmain, "\n\n");
1482 36 dgisselq
                fprintf(vmain, "\tlaststage\t#(IWIDTH)\tstage_2(i_clk, %s, i_ce,\n", resetw.c_str());
1483
                fprintf(vmain, "\t\t\t(%s%s), i_left, i_right, br_left, br_right);\n",
1484
                        (async_reset)?"":"!", resetw.c_str());
1485 2 dgisselq
                fprintf(vmain, "\n\n");
1486
        } else {
1487
                int     nbits = nbitsin, dropbit=0;
1488 26 dgisselq
                int     obits = nbits+1+xtrapbits;
1489 36 dgisselq
                std::string     cmem;
1490
                FILE    *cmemfp;
1491 26 dgisselq
 
1492
                if ((maxbitsout > 0)&&(obits > maxbitsout))
1493
                        obits = maxbitsout;
1494
 
1495 2 dgisselq
                // Always do a first stage
1496 14 dgisselq
                {
1497 22 dgisselq
                        bool    mpystage;
1498 2 dgisselq
 
1499 22 dgisselq
                        // Last two stages are always non-multiply stages
1500
                        // since the multiplies can be done by adds
1501 36 dgisselq
                        mpystage = ((lgtmp-2) <= mpy_stages);
1502 22 dgisselq
 
1503 28 dgisselq
                        if (mpystage)
1504
                                fprintf(vmain, "\t// A hardware optimized FFT stage\n");
1505
                        fprintf(vmain, "\n\n");
1506 35 dgisselq
                        fprintf(vmain, "\twire\t\tw_s%d;\n", fftsize);
1507 36 dgisselq
                        if (single_clock) {
1508
                                fprintf(vmain, "\twire\t[%d:0]\tw_d%d;\n", 2*(obits+xtrapbits)-1, fftsize);
1509
                                cmem = gen_coeff_fname(EMPTYSTR, fftsize, 1, 0, inverse);
1510
                                cmemfp = gen_coeff_open(cmem.c_str());
1511
                                gen_coeffs(cmemfp, fftsize,  nbitsin+xtracbits, 1, 0, inverse);
1512
                                fprintf(vmain, "\tfftstage%s\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,0,\n\t\t\t%d, %d, \"%s\")\n\t\tstage_%d(i_clk, %s, i_ce,\n",
1513 28 dgisselq
                                        ((dbg)&&(dbgstage == fftsize))?"_dbg":"",
1514 36 dgisselq
                                        xtracbits, obits+xtrapbits,
1515
                                        lgsize, lgtmp-1,
1516
                                        (mpystage)?1:0,
1517
                                        ckpce, cmem.c_str(),
1518
                                        fftsize, resetw.c_str());
1519
                                fprintf(vmain, "\t\t\t(%s%s), i_sample, w_d%d, w_s%d%s);\n",
1520
                                        (async_reset)?"":"!", resetw.c_str(),
1521
                                        fftsize, fftsize,
1522
                                        ((dbg)&&(dbgstage == fftsize))
1523
                                                ? ", o_dbg":"");
1524
                        } else {
1525
                                fprintf(vmain, "\t// verilator lint_off UNUSED\n\twire\t\tw_os%d;\n\t// verilator lint_on  UNUSED\n", fftsize);
1526
                                fprintf(vmain, "\twire\t[%d:0]\tw_e%d, w_o%d;\n", 2*(obits+xtrapbits)-1, fftsize, fftsize);
1527
                                cmem = gen_coeff_fname(EMPTYSTR, fftsize, 2, 0, inverse);
1528
                                cmemfp = gen_coeff_open(cmem.c_str());
1529
                                gen_coeffs(cmemfp, fftsize,  nbitsin+xtracbits, 2, 0, inverse);
1530
                                fprintf(vmain, "\tfftstage%s\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,0,\n\t\t\t%d, %d, \"%s\")\n\t\tstage_e%d(i_clk, %s, i_ce,\n",
1531
                                        ((dbg)&&(dbgstage == fftsize))?"_dbg":"",
1532
                                        xtracbits, obits+xtrapbits,
1533
                                        lgsize, lgtmp-2,
1534
                                        (mpystage)?1:0,
1535
                                        ckpce, cmem.c_str(),
1536
                                        fftsize, resetw.c_str());
1537
                                fprintf(vmain, "\t\t\t(%s%s), i_left, w_e%d, w_s%d%s);\n",
1538
                                        (async_reset)?"":"!", resetw.c_str(),
1539
                                        fftsize, fftsize,
1540
                                        ((dbg)&&(dbgstage == fftsize))?", o_dbg":"");
1541
                                cmem = gen_coeff_fname(EMPTYSTR, fftsize, 2, 1, inverse);
1542
                                cmemfp = gen_coeff_open(cmem.c_str());
1543
                                gen_coeffs(cmemfp, fftsize,  nbitsin+xtracbits, 2, 1, inverse);
1544
                                fprintf(vmain, "\tfftstage\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,0,\n\t\t\t%d, %d, \"%s\")\n\t\tstage_o%d(i_clk, %s, i_ce,\n",
1545
                                        xtracbits, obits+xtrapbits,
1546
                                        lgsize, lgtmp-2,
1547
                                        (mpystage)?1:0,
1548
                                        ckpce, cmem.c_str(),
1549
                                        fftsize, resetw.c_str());
1550
                                fprintf(vmain, "\t\t\t(%s%s), i_right, w_o%d, w_os%d);\n",
1551
                                        (async_reset)?"":"!",resetw.c_str(),
1552
                                        fftsize, fftsize);
1553
                        }
1554 28 dgisselq
 
1555
                        std::string     fname;
1556
 
1557 14 dgisselq
                        fname = coredir + "/";
1558 36 dgisselq
                        if (inverse)
1559
                                fname += "i";
1560
                        fname += "fftstage";
1561
                        if (dbg) {
1562
                                std::string     dbgname(fname);
1563
                                dbgname += "_dbg";
1564
                                dbgname += ".v";
1565
                                if (single_clock)
1566
                                        build_stage(fname.c_str(), fftsize, 1, 0, nbits, xtracbits, ckpce, async_reset, true);
1567
                                else
1568
                                        build_stage(fname.c_str(), fftsize/2, 2, 1, nbits, xtracbits, ckpce, async_reset, true);
1569
                        }
1570 14 dgisselq
 
1571
                        fname += ".v";
1572 36 dgisselq
                        if (single_clock) {
1573
                                build_stage(fname.c_str(), fftsize, 1, 0,
1574
                                        nbits, xtracbits, ckpce, async_reset,
1575
                                        false);
1576
                        } else {
1577
                                // All stages use the same Verilog, so we only
1578
                                // need to build one
1579
                                build_stage(fname.c_str(), fftsize/2, 2, 1,
1580
                                        nbits, xtracbits, ckpce, async_reset, false);
1581
                        }
1582 14 dgisselq
                }
1583
 
1584 26 dgisselq
                nbits = obits;  // New number of input bits
1585 2 dgisselq
                tmp_size >>= 1; lgtmp--;
1586
                dropbit = 0;
1587
                fprintf(vmain, "\n\n");
1588
                while(tmp_size >= 8) {
1589 26 dgisselq
                        obits = nbits+((dropbit)?0:1);
1590 2 dgisselq
 
1591
                        if ((maxbitsout > 0)&&(obits > maxbitsout))
1592
                                obits = maxbitsout;
1593
 
1594 14 dgisselq
                        {
1595 22 dgisselq
                                bool            mpystage;
1596 2 dgisselq
 
1597 36 dgisselq
                                mpystage = ((lgtmp-2) <= mpy_stages);
1598 22 dgisselq
 
1599 28 dgisselq
                                if (mpystage)
1600
                                        fprintf(vmain, "\t// A hardware optimized FFT stage\n");
1601 35 dgisselq
                                fprintf(vmain, "\twire\t\tw_s%d;\n",
1602
                                        tmp_size);
1603 36 dgisselq
                                if (single_clock) {
1604
                                        fprintf(vmain,"\twire\t[%d:0]\tw_d%d;\n",
1605
                                                2*(obits+xtrapbits)-1,
1606
                                                tmp_size);
1607
                                        cmem = gen_coeff_fname(EMPTYSTR, tmp_size, 1, 0, inverse);
1608
                                        cmemfp = gen_coeff_open(cmem.c_str());
1609
                                        gen_coeffs(cmemfp, tmp_size,
1610
                                                nbits+xtracbits+xtrapbits, 1, 0, inverse);
1611
                                        fprintf(vmain, "\tfftstage%s\t#(%d,%d,%d,%d,%d,%d,\n\t\t\t%d, %d, \"%s\")\n\t\tstage_%d(i_clk, %s, i_ce,\n",
1612
                                                ((dbg)&&(dbgstage==tmp_size))?"_dbg":"",
1613
                                                nbits+xtrapbits,
1614
                                                nbits+xtracbits+xtrapbits,
1615
                                                obits+xtrapbits,
1616
                                                lgsize, lgtmp-1,
1617
                                                (dropbit)?0:0, (mpystage)?1:0,
1618
                                                ckpce,
1619
                                                cmem.c_str(), tmp_size,
1620
                                                resetw.c_str());
1621
                                        fprintf(vmain, "\t\t\tw_s%d, w_d%d, w_d%d, w_s%d%s);\n",
1622
                                                tmp_size<<1, tmp_size<<1,
1623
                                                tmp_size, tmp_size,
1624
                                                ((dbg)&&(dbgstage == tmp_size))
1625
                                                        ?", o_dbg":"");
1626
                                } else {
1627
                                        fprintf(vmain, "\t// verilator lint_off UNUSED\n\twire\t\tw_os%d;\n\t// verilator lint_on  UNUSED\n",
1628
                                                tmp_size);
1629
                                        fprintf(vmain,"\twire\t[%d:0]\tw_e%d, w_o%d;\n",
1630
                                                2*(obits+xtrapbits)-1,
1631
                                                tmp_size, tmp_size);
1632
                                        cmem = gen_coeff_fname(EMPTYSTR, tmp_size, 2, 0, inverse);
1633
                                        cmemfp = gen_coeff_open(cmem.c_str());
1634
                                        gen_coeffs(cmemfp, tmp_size,
1635
                                                nbits+xtracbits+xtrapbits, 2, 0, inverse);
1636
                                        fprintf(vmain, "\tfftstage%s\t#(%d,%d,%d,%d,%d,%d,\n\t\t\t%d, %d, \"%s\")\n\t\tstage_e%d(i_clk, %s, i_ce,\n",
1637
                                                ((dbg)&&(dbgstage==tmp_size))?"_dbg":"",
1638
                                                nbits+xtrapbits,
1639
                                                nbits+xtracbits+xtrapbits,
1640
                                                obits+xtrapbits,
1641
                                                lgsize, lgtmp-2,
1642
                                                (dropbit)?0:0, (mpystage)?1:0,
1643
                                                ckpce,
1644
                                                cmem.c_str(), tmp_size,
1645
                                                resetw.c_str());
1646
                                        fprintf(vmain, "\t\t\tw_s%d, w_e%d, w_e%d, w_s%d%s);\n",
1647
                                                tmp_size<<1, tmp_size<<1,
1648
                                                tmp_size, tmp_size,
1649
                                                ((dbg)&&(dbgstage == tmp_size))
1650
                                                        ?", o_dbg":"");
1651
                                        cmem = gen_coeff_fname(EMPTYSTR,
1652
                                                tmp_size, 2, 1, inverse);
1653
                                        cmemfp = gen_coeff_open(cmem.c_str());
1654
                                        gen_coeffs(cmemfp, tmp_size,
1655
                                                nbits+xtracbits+xtrapbits,
1656
                                                2, 1, inverse);
1657
                                        fprintf(vmain, "\tfftstage\t#(%d,%d,%d,%d,%d,%d,\n\t\t\t%d, %d, \"%s\")\n\t\tstage_o%d(i_clk, %s, i_ce,\n",
1658
                                                nbits+xtrapbits,
1659
                                                nbits+xtracbits+xtrapbits,
1660
                                                obits+xtrapbits,
1661
                                                lgsize, lgtmp-2,
1662
                                                (dropbit)?0:0, (mpystage)?1:0,
1663
                                                ckpce, cmem.c_str(), tmp_size,
1664
                                                resetw.c_str());
1665
                                        fprintf(vmain, "\t\t\tw_s%d, w_o%d, w_o%d, w_os%d);\n",
1666
                                                tmp_size<<1, tmp_size<<1,
1667
                                                tmp_size, tmp_size);
1668
                                }
1669
                                fprintf(vmain, "\n");
1670 14 dgisselq
                        }
1671
 
1672
 
1673 2 dgisselq
                        dropbit ^= 1;
1674
                        nbits = obits;
1675
                        tmp_size >>= 1; lgtmp--;
1676
                }
1677
 
1678
                if (tmp_size == 4) {
1679 26 dgisselq
                        obits = nbits+((dropbit)?0:1);
1680 2 dgisselq
 
1681
                        if ((maxbitsout > 0)&&(obits > maxbitsout))
1682
                                obits = maxbitsout;
1683
 
1684 35 dgisselq
                        fprintf(vmain, "\twire\t\tw_s4;\n");
1685 36 dgisselq
                        if (single_clock) {
1686
                                fprintf(vmain, "\twire\t[%d:0]\tw_d4;\n",
1687
                                        2*(obits+xtrapbits)-1);
1688
                                fprintf(vmain, "\tqtrstage%s\t#(%d,%d,%d,%d,%d)\tstage_4(i_clk, %s, i_ce,\n",
1689
                                        ((dbg)&&(dbgstage==4))?"_dbg":"",
1690
                                        nbits+xtrapbits, obits+xtrapbits, lgsize,
1691
                                        (inverse)?1:0, (dropbit)?0:0,
1692
                                        resetw.c_str());
1693
                                fprintf(vmain, "\t\t\t\t\t\tw_s8, w_d8, w_d4, w_s4%s);\n",
1694
                                        ((dbg)&&(dbgstage==4))?", o_dbg":"");
1695
                        } else {
1696
                                fprintf(vmain, "\t// verilator lint_off UNUSED\n\twire\t\tw_os4;\n\t// verilator lint_on  UNUSED\n");
1697
                                fprintf(vmain, "\twire\t[%d:0]\tw_e4, w_o4;\n", 2*(obits+xtrapbits)-1);
1698
                                fprintf(vmain, "\tqtrstage%s\t#(%d,%d,%d,0,%d,%d)\tstage_e4(i_clk, %s, i_ce,\n",
1699
                                        ((dbg)&&(dbgstage==4))?"_dbg":"",
1700
                                        nbits+xtrapbits, obits+xtrapbits, lgsize,
1701
                                        (inverse)?1:0, (dropbit)?0:0,
1702
                                        resetw.c_str());
1703
                                fprintf(vmain, "\t\t\t\t\t\tw_s8, w_e8, w_e4, w_s4%s);\n",
1704
                                        ((dbg)&&(dbgstage==4))?", o_dbg":"");
1705
                                fprintf(vmain, "\tqtrstage\t#(%d,%d,%d,1,%d,%d)\tstage_o4(i_clk, %s, i_ce,\n",
1706
                                        nbits+xtrapbits, obits+xtrapbits, lgsize, (inverse)?1:0, (dropbit)?0:0,
1707
                                        resetw.c_str());
1708
                                fprintf(vmain, "\t\t\t\t\t\tw_s8, w_o8, w_o4, w_os4);\n");
1709
                        }
1710 2 dgisselq
                        dropbit ^= 1;
1711
                        nbits = obits;
1712
                        tmp_size >>= 1; lgtmp--;
1713
                }
1714
 
1715
                {
1716 26 dgisselq
                        obits = nbits+((dropbit)?0:1);
1717 2 dgisselq
                        if (obits > nbitsout)
1718
                                obits = nbitsout;
1719
                        if ((maxbitsout>0)&&(obits > maxbitsout))
1720
                                obits = maxbitsout;
1721
                        fprintf(vmain, "\twire\t\tw_s2;\n");
1722 36 dgisselq
                        if (single_clock) {
1723
                                fprintf(vmain, "\twire\t[%d:0]\tw_d2;\n",
1724
                                        2*obits-1);
1725
                        } else {
1726
                                fprintf(vmain, "\twire\t[%d:0]\tw_e2, w_o2;\n",
1727
                                        2*obits-1);
1728
                        }
1729 28 dgisselq
                        if ((nbits+xtrapbits+1 == obits)&&(!dropbit))
1730
                                printf("WARNING: SCALING OFF BY A FACTOR OF TWO--should\'ve dropped a bit in the last stage.\n");
1731 2 dgisselq
 
1732 36 dgisselq
                        if (single_clock) {
1733
                                fprintf(vmain, "\tlaststage\t#(%d,%d,%d)\tstage_2(i_clk, %s, i_ce,\n",
1734
                                        nbits+xtrapbits, obits,(dropbit)?0:1,
1735
                                        resetw.c_str());
1736
                                fprintf(vmain, "\t\t\t\t\tw_s4, w_d4, w_d2, w_s2);\n");
1737
                        } else {
1738
                                fprintf(vmain, "\tlaststage\t#(%d,%d,%d)\tstage_2(i_clk, %s, i_ce,\n",
1739
                                        nbits+xtrapbits, obits,(dropbit)?0:1,
1740
                                        resetw.c_str());
1741
                                fprintf(vmain, "\t\t\t\t\tw_s4, w_e4, w_o4, w_e2, w_o2, w_s2);\n");
1742
                        }
1743
 
1744 2 dgisselq
                        fprintf(vmain, "\n\n");
1745
                        nbits = obits;
1746
                }
1747
 
1748
                fprintf(vmain, "\t// Prepare for a (potential) bit-reverse stage.\n");
1749 36 dgisselq
                if (single_clock)
1750
                        fprintf(vmain, "\tassign\tbr_sample= w_d2;\n");
1751
                else {
1752
                        fprintf(vmain, "\tassign\tbr_left  = w_e2;\n");
1753
                        fprintf(vmain, "\tassign\tbr_right = w_o2;\n");
1754
                }
1755 2 dgisselq
                fprintf(vmain, "\n");
1756
                if (bitreverse) {
1757
                        fprintf(vmain, "\twire\tbr_start;\n");
1758
                        fprintf(vmain, "\treg\tr_br_started;\n");
1759 25 dgisselq
                        fprintf(vmain, "\tinitial\tr_br_started = 1\'b0;\n");
1760 36 dgisselq
                        if (async_reset) {
1761
                                fprintf(vmain, "\talways @(posedge i_clk, negedge i_areset_n)\n");
1762
                                fprintf(vmain, "\t\tif (!i_areset_n)\n");
1763
                        } else {
1764
                                fprintf(vmain, "\talways @(posedge i_clk)\n");
1765
                                fprintf(vmain, "\t\tif (i_reset)\n");
1766
                        }
1767 26 dgisselq
                        fprintf(vmain, "\t\t\tr_br_started <= 1\'b0;\n");
1768
                        fprintf(vmain, "\t\telse if (i_ce)\n");
1769 23 dgisselq
                        fprintf(vmain, "\t\t\tr_br_started <= r_br_started || w_s2;\n");
1770
                        fprintf(vmain, "\tassign\tbr_start = r_br_started || w_s2;\n");
1771 2 dgisselq
                }
1772
        }
1773
 
1774 36 dgisselq
 
1775 2 dgisselq
        fprintf(vmain, "\n");
1776
        fprintf(vmain, "\t// Now for the bit-reversal stage.\n");
1777
        fprintf(vmain, "\twire\tbr_sync;\n");
1778
        if (bitreverse) {
1779 36 dgisselq
                if (single_clock) {
1780
                        fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_o_result;\n");
1781
                        fprintf(vmain, "\tbitreverse\t#(%d,%d)\n\t\trevstage(i_clk, %s,\n", lgsize, nbitsout, resetw.c_str());
1782
                        fprintf(vmain, "\t\t\t(i_ce & br_start), br_sample,\n");
1783
                        fprintf(vmain, "\t\t\tbr_o_result, br_sync);\n");
1784
                } else {
1785
                        fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_o_left, br_o_right;\n");
1786
                        fprintf(vmain, "\tbitreverse\t#(%d,%d)\n\t\trevstage(i_clk, %s,\n", lgsize, nbitsout, resetw.c_str());
1787
                        fprintf(vmain, "\t\t\t(i_ce & br_start), br_left, br_right,\n");
1788
                        fprintf(vmain, "\t\t\tbr_o_left, br_o_right, br_sync);\n");
1789
                }
1790
        } else if (single_clock) {
1791
                fprintf(vmain, "\tassign\tbr_o_result = br_result;\n");
1792
                fprintf(vmain, "\tassign\tbr_sync     = w_s2;\n");
1793 2 dgisselq
        } else {
1794
                fprintf(vmain, "\tassign\tbr_o_left  = br_left;\n");
1795
                fprintf(vmain, "\tassign\tbr_o_right = br_right;\n");
1796
                fprintf(vmain, "\tassign\tbr_sync    = w_s2;\n");
1797
        }
1798
 
1799 36 dgisselq
        fprintf(vmain,
1800
"\n\n"
1801
"\t// Last clock: Register our outputs, we\'re done.\n"
1802
"\tinitial\to_sync  = 1\'b0;\n");
1803
        if (async_reset)
1804
                fprintf(vmain,
1805
"\talways @(posedge i_clk, negedge i_areset_n)\n\t\tif (!i_areset_n)\n");
1806
        else {
1807
                fprintf(vmain,
1808
"\talways @(posedge i_clk)\n\t\tif (i_reset)\n");
1809
        }
1810
 
1811
        fprintf(vmain,
1812
"\t\t\to_sync  <= 1\'b0;\n"
1813
"\t\telse if (i_ce)\n"
1814
"\t\t\to_sync  <= br_sync;\n"
1815
"\n"
1816
"\talways @(posedge i_clk)\n"
1817
"\t\tif (i_ce)\n");
1818
        if (single_clock) {
1819
                fprintf(vmain, "\t\t\to_result  <= br_o_result;\n");
1820
        } else {
1821
                fprintf(vmain,
1822
"\t\tbegin\n"
1823
"\t\t\to_left  <= br_o_left;\n"
1824
"\t\t\to_right <= br_o_right;\n"
1825
"\t\tend\n");
1826
        }
1827
 
1828
        fprintf(vmain,
1829
"\n\n"
1830
"endmodule\n");
1831 2 dgisselq
        fclose(vmain);
1832
 
1833 36 dgisselq
 
1834 14 dgisselq
        {
1835
                std::string     fname;
1836 2 dgisselq
 
1837 14 dgisselq
                fname = coredir + "/butterfly.v";
1838 36 dgisselq
                build_butterfly(fname.c_str(), xtracbits, rounding,
1839
                        ckpce, async_reset);
1840 2 dgisselq
 
1841 36 dgisselq
                fname = coredir + "/hwbfly.v";
1842
                build_hwbfly(fname.c_str(), xtracbits, rounding,
1843
                        ckpce, async_reset);
1844 22 dgisselq
 
1845 29 dgisselq
                {
1846
                        // To make debugging easier, we build both of these
1847
                        fname = coredir + "/shiftaddmpy.v";
1848
                        build_multiply(fname.c_str());
1849 2 dgisselq
 
1850 29 dgisselq
                        fname = coredir + "/longbimpy.v";
1851
                        build_longbimpy(fname.c_str());
1852
                        fname = coredir + "/bimpy.v";
1853
                        build_bimpy(fname.c_str());
1854
                }
1855
 
1856 26 dgisselq
                if ((dbg)&&(dbgstage == 4)) {
1857
                        fname = coredir + "/qtrstage_dbg.v";
1858 36 dgisselq
                        if (single_clock)
1859
                                build_snglquarters(fname.c_str(), rounding,
1860
                                        async_reset, true);
1861
                        else
1862
                                build_dblquarters(fname.c_str(), rounding,
1863
                                        async_reset, true);
1864 26 dgisselq
                }
1865 14 dgisselq
                fname = coredir + "/qtrstage.v";
1866 36 dgisselq
                if (single_clock)
1867
                        build_snglquarters(fname.c_str(), rounding,
1868
                                        async_reset, false);
1869 26 dgisselq
                else
1870 36 dgisselq
                        build_dblquarters(fname.c_str(), rounding,
1871
                                        async_reset, false);
1872 14 dgisselq
 
1873 36 dgisselq
 
1874
                if (single_clock) {
1875
                        fname = coredir + "/laststage.v";
1876
                        build_sngllast(fname.c_str(), async_reset);
1877
                } else {
1878
                        if ((dbg)&&(dbgstage == 2))
1879
                                fname = coredir + "/laststage_dbg.v";
1880
                        else
1881
                                fname = coredir + "/laststage.v";
1882
                        build_dblstage(fname.c_str(), rounding,
1883
                                async_reset, (dbg)&&(dbgstage==2));
1884
                }
1885
 
1886 14 dgisselq
                if (bitreverse) {
1887 36 dgisselq
                        fname = coredir + "/bitreverse.v";
1888
                        if (single_clock)
1889
                                build_snglbrev(fname.c_str(), async_reset);
1890
                        else
1891
                                build_dblreverse(fname.c_str(), async_reset);
1892 14 dgisselq
                }
1893 23 dgisselq
 
1894
                const   char    *rnd_string = "";
1895
                switch(rounding) {
1896
                        case RND_TRUNCATE:      rnd_string = "/truncate.v"; break;
1897
                        case RND_FROMZERO:      rnd_string = "/roundfromzero.v"; break;
1898
                        case RND_HALFUP:        rnd_string = "/roundhalfup.v"; break;
1899
                        default:
1900
                                rnd_string = "/convround.v"; break;
1901
                } fname = coredir + rnd_string;
1902
                switch(rounding) {
1903
                        case RND_TRUNCATE: build_truncator(fname.c_str()); break;
1904
                        case RND_FROMZERO: build_roundfromzero(fname.c_str()); break;
1905
                        case RND_HALFUP: build_roundhalfup(fname.c_str()); break;
1906
                        default:
1907
                                build_convround(fname.c_str()); break;
1908
                }
1909
 
1910 2 dgisselq
        }
1911 36 dgisselq
 
1912
        if (verbose_flag)
1913
                printf("All done -- success\n");
1914 2 dgisselq
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.