OpenCores
URL https://opencores.org/ocsvn/dblclockfft/dblclockfft/trunk

Subversion Repositories dblclockfft

[/] [dblclockfft/] [trunk/] [sw/] [fftgen.cpp] - Blame information for rev 35

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 29 dgisselq
////////////////////////////////////////////////////////////////////////////////
2 16 dgisselq
//
3 24 dgisselq
// Filename:    fftgen.cpp
4 16 dgisselq
//
5
// Project:     A Doubletime Pipelined FFT
6
//
7
// Purpose:     This is the core generator for the project.  Every part
8
//              and piece of this project begins and ends in this program.
9 33 dgisselq
//      Once built, this program will build an FFT (or IFFT) core of arbitrary
10
//      width, precision, etc., that will run at two samples per clock.
11
//      (Incidentally, I didn't pick two samples per clock because it was
12
//      easier, but rather because there weren't any two-sample per clock
13
//      FFT's posted on opencores.com.  Further, FFT's running at one sample
14
//      per aren't that hard to find.)
15 16 dgisselq
//
16 33 dgisselq
//      You can find the documentation for this program in two places.  One is
17
//      in the usage() function below.  The second is in the 'doc'uments
18
//      directory that comes with this package, specifically in the spec.pdf
19
//      file.  If it's not there, type make in the documents directory to
20
//      build it.
21 16 dgisselq
//
22 31 dgisselq
//      20160123 - Thanks to Lesha Birukov, adjusted for MS Visual Studio 2012.
23
//              (Adjustments are at the top of the file ...)
24
//
25 16 dgisselq
// Creator:     Dan Gisselquist, Ph.D.
26 30 dgisselq
//              Gisselquist Technology, LLC
27 16 dgisselq
//
28 29 dgisselq
////////////////////////////////////////////////////////////////////////////////
29 16 dgisselq
//
30 33 dgisselq
// Copyright (C) 2015-2017, Gisselquist Technology, LLC
31 16 dgisselq
//
32
// This program is free software (firmware): you can redistribute it and/or
33
// modify it under the terms of  the GNU General Public License as published
34
// by the Free Software Foundation, either version 3 of the License, or (at
35
// your option) any later version.
36
//
37
// This program is distributed in the hope that it will be useful, but WITHOUT
38
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
39
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
40
// for more details.
41
//
42
// You should have received a copy of the GNU General Public License along
43
// with this program.  (It's in the $(ROOT)/doc directory, run make with no
44
// target there if the PDF file isn't present.)  If not, see
45
// <http://www.gnu.org/licenses/> for a copy.
46
//
47
// License:     GPL, v3, as defined and found on www.gnu.org,
48
//              http://www.gnu.org/licenses/gpl.html
49
//
50
//
51 29 dgisselq
////////////////////////////////////////////////////////////////////////////////
52 16 dgisselq
//
53
//
54 31 dgisselq
#define _CRT_SECURE_NO_WARNINGS   //  ms vs 2012 doesn't like fopen
55 2 dgisselq
#include <stdio.h>
56
#include <stdlib.h>
57 31 dgisselq
 
58
#ifdef _MSC_VER //  added for ms vs compatibility
59
 
60
#include <io.h>
61
#include <direct.h>
62
#define _USE_MATH_DEFINES
63
#define R_OK    4       /* Test for read permission.  */
64
#define W_OK    2       /* Test for write permission.  */
65
#define X_OK    0       /* !!!!!! execute permission - unsupported in windows*/
66
#define F_OK    0       /* Test for existence.  */
67
 
68
#if _MSC_VER <= 1700
69
 
70
long long llround(double d) {
71
        if (d<0) return -(long long)(-d+0.5);
72
        else    return (long long)(d+0.5); }
73
int lstat(const char *filename, struct stat *buf) { return 1; };
74
#define S_ISDIR(A)      0
75
 
76
#else
77
 
78
#define lstat   _stat
79
#define S_ISDIR _S_IFDIR
80
 
81
#endif
82
 
83
#define mkdir(A,B)      _mkdir(A)
84
 
85
#define access _access
86
 
87
#else
88
// And for G++/Linux environment
89
 
90
#include <unistd.h>     // Defines the R_OK/W_OK/etc. macros
91 2 dgisselq
#include <sys/stat.h>
92 31 dgisselq
#endif
93
 
94 2 dgisselq
#include <string.h>
95 14 dgisselq
#include <string>
96 2 dgisselq
#include <math.h>
97
#include <ctype.h>
98
#include <assert.h>
99
 
100 26 dgisselq
#define DEF_NBITSIN     16
101
#define DEF_COREDIR     "fft-core"
102
#define DEF_XTRACBITS   4
103
#define DEF_NMPY        0
104
#define DEF_XTRAPBITS   0
105 29 dgisselq
#define USE_OLD_MULTIPLY        false
106 2 dgisselq
 
107 29 dgisselq
// To coordinate testing, it helps to have some defines in our header file that
108
// are common with the default parameters found within the various subroutines.
109
// We'll define those common parameters here.  These values, however, have no
110
// effect on anything other than bench testing.  They do, though, allow us to
111
// bench test exact copies of what is going on within the FFT when necessary
112
// in order to find problems.
113
// First, parameters for the new multiply based upon the bi-multiply structure
114
// (2-bits/2-tableau rows at a time).
115
#define TST_LONGBIMPY_AW        16
116
#define TST_LONGBIMPY_BW        20      // Leave undefined to match AW
117
 
118
//  We also include parameters for the shift add multiply
119
#define TST_SHIFTADDMPY_AW      16
120
#define TST_SHIFTADDMPY_BW      20      // Leave undefined to match AW
121
 
122
// Now for parameters matching the butterfly
123
#define TST_BUTTERFLY_IWIDTH    16
124
#define TST_BUTTERFLY_CWIDTH    20
125
#define TST_BUTTERFLY_OWIDTH    17
126
 
127
// Now for parameters matching the qtrstage
128
#define TST_QTRSTAGE_IWIDTH     16
129
#define TST_QTRSTAGE_LGWIDTH    8
130
 
131
// Parameters for the dblstage
132
#define TST_DBLSTAGE_IWIDTH     16
133
#define TST_DBLSTAGE_SHIFT      0
134
 
135
// Now for parameters matching the dblreverse stage
136
#define TST_DBLREVERSE_LGSIZE   5
137
 
138 23 dgisselq
typedef enum {
139
        RND_TRUNCATE, RND_FROMZERO, RND_HALFUP, RND_CONVERGENT
140
} ROUND_T;
141
 
142 33 dgisselq
const char      cpyleft[] =
143 29 dgisselq
"////////////////////////////////////////////////////////////////////////////////\n"
144 2 dgisselq
"//\n"
145 33 dgisselq
"// Copyright (C) 2015-2017, Gisselquist Technology, LLC\n"
146 2 dgisselq
"//\n"
147
"// This program is free software (firmware): you can redistribute it and/or\n"
148
"// modify it under the terms of  the GNU General Public License as published\n"
149
"// by the Free Software Foundation, either version 3 of the License, or (at\n"
150
"// your option) any later version.\n"
151
"//\n"
152
"// This program is distributed in the hope that it will be useful, but WITHOUT\n"
153
"// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or\n"
154
"// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n"
155
"// for more details.\n"
156
"//\n"
157
"// You should have received a copy of the GNU General Public License along\n"
158 5 dgisselq
"// with this program.  (It's in the $(ROOT)/doc directory, run make with no\n"
159
"// target there if the PDF file isn\'t present.)  If not, see\n"
160
"// <http://www.gnu.org/licenses/> for a copy.\n"
161
"//\n"
162 2 dgisselq
"// License:    GPL, v3, as defined and found on www.gnu.org,\n"
163
"//             http://www.gnu.org/licenses/gpl.html\n"
164
"//\n"
165
"//\n"
166 29 dgisselq
"////////////////////////////////////////////////////////////////////////////////\n";
167 14 dgisselq
const char      prjname[] = "A Doubletime Pipelined FFT";
168 2 dgisselq
const char      creator[] =     "// Creator:    Dan Gisselquist, Ph.D.\n"
169 30 dgisselq
                                "//             Gisselquist Technology, LLC\n";
170 2 dgisselq
 
171
int     lgval(int vl) {
172
        int     lg;
173
 
174
        for(lg=1; (1<<lg) < vl; lg++)
175
                ;
176
        return lg;
177
}
178
 
179
int     nextlg(int vl) {
180
        int     r;
181
 
182
        for(r=1; r<vl; r<<=1)
183
                ;
184
        return r;
185
}
186
 
187 14 dgisselq
int     bflydelay(int nbits, int xtra) {
188 2 dgisselq
        int     cbits = nbits + xtra;
189 14 dgisselq
        int     delay;
190 29 dgisselq
 
191
        if (USE_OLD_MULTIPLY) {
192
                if (nbits+1<cbits)
193
                        delay = nbits+4;
194
                else
195
                        delay = cbits+3;
196
        } else {
197
                int     na=nbits+2, nb=cbits+1;
198
                if (nb<na) {
199
                        int tmp = nb;
200
                        nb = na; na = tmp;
201
                } delay = ((na)/2+(na&1)+2);
202
        }
203 14 dgisselq
        return delay;
204 2 dgisselq
}
205
 
206 14 dgisselq
int     lgdelay(int nbits, int xtra) {
207
        // The butterfly code needs to compare a valid address, of this
208
        // many bits, with an address two greater.  This guarantees we
209
        // have enough bits for that comparison.  We'll also end up with
210 33 dgisselq
        // more storage space to look for these values, but without a
211 14 dgisselq
        // redesign that's just what we'll deal with.
212
        return lgval(bflydelay(nbits, xtra)+3);
213
}
214
 
215 23 dgisselq
void    build_truncator(const char *fname) {
216
        printf("TRUNCATING!\n");
217 2 dgisselq
        FILE    *fp = fopen(fname, "w");
218
        if (NULL == fp) {
219
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
220
                perror("O/S Err was:");
221
                return;
222
        }
223
 
224
        fprintf(fp,
225
"///////////////////////////////////////////////////////////////////////////\n"
226
"//\n"
227 23 dgisselq
"// Filename:   truncate.v\n"
228
"//             \n"
229
"// Project:    %s\n"
230
"//\n"
231
"// Purpose:    Truncation is one of several options that can be used\n"
232
"//             internal to the various FFT stages to drop bits from one \n"
233
"//             stage to the next.  In general, it is the simplest method\n"
234
"//             of dropping bits, since it requires only a bit selection.\n"
235
"//\n"
236
"//             This form of rounding isn\'t really that great for FFT\'s,\n"
237
"//             since it tends to produce a DC bias in the result.  (Other\n"
238
"//             less pronounced biases may also exist.)\n"
239
"//\n"
240
"//             This particular version also registers the output with the\n"
241
"//             clock, so there will be a delay of one going through this\n"
242
"//             module.  This will keep it in line with the other forms of\n"
243
"//             rounding that can be used.\n"
244
"//\n"
245
"//\n%s"
246
"//\n",
247
                prjname, creator);
248
 
249
        fprintf(fp, "%s", cpyleft);
250 35 dgisselq
        fprintf(fp, "//\n//\n`default_nettype\tnone\n//\n");
251 23 dgisselq
        fprintf(fp,
252
"module truncate(i_clk, i_ce, i_val, o_val);\n"
253
        "\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"
254
        "\tinput\t\t\t\t\ti_clk, i_ce;\n"
255
        "\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"
256
        "\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"
257
"\n"
258
        "\talways @(posedge i_clk)\n"
259
                "\t\tif (i_ce)\n"
260
                "\t\t\to_val <= i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
261
"\n"
262
"endmodule\n");
263
}
264
 
265
 
266
void    build_roundhalfup(const char *fname) {
267
        FILE    *fp = fopen(fname, "w");
268
        if (NULL == fp) {
269
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
270
                perror("O/S Err was:");
271
                return;
272
        }
273
 
274
        fprintf(fp,
275
"///////////////////////////////////////////////////////////////////////////\n"
276
"//\n"
277
"// Filename:   roundhalfup.v\n"
278
"//             \n"
279
"// Project:    %s\n"
280
"//\n"
281
"// Purpose:    Rounding half up is the way I was always taught to round in\n"
282
"//             school.  A one half value is added to the result, and then\n"
283
"//             the result is truncated.  When used in an FFT, this produces\n"
284
"//             less bias than the truncation method, although a bias still\n"
285
"//             tends to remain.\n"
286
"//\n"
287
"//\n%s"
288
"//\n",
289
                prjname, creator);
290
 
291
        fprintf(fp, "%s", cpyleft);
292 35 dgisselq
        fprintf(fp, "//\n//\n`default_nettype\tnone\n//\n");
293 23 dgisselq
        fprintf(fp,
294
"module roundhalfup(i_clk, i_ce, i_val, o_val);\n"
295
        "\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"
296
        "\tinput\t\t\t\t\ti_clk, i_ce;\n"
297
        "\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"
298
        "\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"
299
"\n"
300
        "\t// Let's deal with two cases to be as general as we can be here\n"
301
        "\t//\n"
302
        "\t//   1. The desired output would lose no bits at all\n"
303
        "\t//   2. One or more bits would be dropped, so the rounding is simply\n"
304
        "\t//\t\ta matter of adding one to the bit about to be dropped,\n"
305
        "\t//\t\tmoving all halfway and above numbers up to the next\n"
306
        "\t//\t\tvalue.\n"
307
        "\tgenerate\n"
308
        "\tif (IWID-SHIFT == OWID)\n"
309
        "\tbegin // No truncation or rounding, output drops no bits\n"
310
"\n"
311
                "\t\talways @(posedge i_clk)\n"
312
                        "\t\t\tif (i_ce)\to_val <= i_val[(IWID-SHIFT-1):0];\n"
313
"\n"
314
        "\tend else // if (IWID-SHIFT-1 >= OWID)\n"
315
        "\tbegin // Output drops one bit, can only add one or ... not.\n"
316
                "\t\twire\t[(OWID-1):0] truncated_value, rounded_up;\n"
317
                "\t\twire\t\t\tlast_valid_bit, first_lost_bit;\n"
318
                "\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
319 26 dgisselq
                "\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n"
320 23 dgisselq
                "\t\tassign\tfirst_lost_bit = i_val[(IWID-SHIFT-OWID-1)];\n"
321
"\n"
322
                "\t\talways @(posedge i_clk)\n"
323
                "\t\t\tif (i_ce)\n"
324
                "\t\t\tbegin\n"
325 35 dgisselq
                        "\t\t\t\tif (!first_lost_bit) // Round down / truncate\n"
326 23 dgisselq
                        "\t\t\t\t\to_val <= truncated_value;\n"
327
                        "\t\t\t\telse\n"
328
                        "\t\t\t\t\to_val <= rounded_up; // even value\n"
329
                "\t\t\tend\n"
330
"\n"
331
        "\tend\n"
332
        "\tendgenerate\n"
333
"\n"
334
"endmodule\n");
335
}
336
 
337
void    build_roundfromzero(const char *fname) {
338
        FILE    *fp = fopen(fname, "w");
339
        if (NULL == fp) {
340
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
341
                perror("O/S Err was:");
342
                return;
343
        }
344
 
345
        fprintf(fp,
346
"///////////////////////////////////////////////////////////////////////////\n"
347
"//\n"
348
"// Filename:   roundfromzero.v\n"
349
"//             \n"
350
"// Project:    %s\n"
351
"//\n"
352
"// Purpose:    Truncation is one of several options that can be used\n"
353
"//             internal to the various FFT stages to drop bits from one \n"
354
"//             stage to the next.  In general, it is the simplest method\n"
355
"//             of dropping bits, since it requires only a bit selection.\n"
356
"//\n"
357
"//             This form of rounding isn\'t really that great for FFT\'s,\n"
358
"//             since it tends to produce a DC bias in the result.  (Other\n"
359
"//             less pronounced biases may also exist.)\n"
360
"//\n"
361
"//             This particular version also registers the output with the\n"
362
"//             clock, so there will be a delay of one going through this\n"
363
"//             module.  This will keep it in line with the other forms of\n"
364
"//             rounding that can be used.\n"
365
"//\n"
366
"//\n%s"
367
"//\n",
368
                prjname, creator);
369
 
370
        fprintf(fp, "%s", cpyleft);
371 35 dgisselq
        fprintf(fp, "//\n//\n`default_nettype\tnone\n//\n");
372 23 dgisselq
        fprintf(fp,
373 33 dgisselq
"module roundfromzero(i_clk, i_ce, i_val, o_val);\n"
374 23 dgisselq
        "\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"
375
        "\tinput\t\t\t\t\ti_clk, i_ce;\n"
376
        "\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"
377
        "\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"
378
"\n"
379
        "\t// Let's deal with three cases to be as general as we can be here\n"
380
        "\t//\n"
381
        "\t//\t1. The desired output would lose no bits at all\n"
382
        "\t//\t2. One bit would be dropped, so the rounding is simply\n"
383
        "\t//\t\tadjusting the value to be the closer to zero in\n"
384
        "\t//\t\tcases of being halfway between two.  If identically\n"
385
        "\t//\t\tequal to a number, we just leave it as is.\n"
386
        "\t//\t3. Two or more bits would be dropped.  In this case, we round\n"
387
        "\t//\t\tnormally unless we are rounding a value of exactly\n"
388
        "\t//\t\thalfway between the two.  In the halfway case, we\n"
389
        "\t//\t\tround away from zero.\n"
390
        "\tgenerate\n"
391 28 dgisselq
        "\tif (IWID == OWID) // In this case, the shift is irrelevant and\n"
392
        "\tbegin // cannot be applied.  No truncation or rounding takes\n"
393
        "\t// effect here.\n"
394
"\n"
395
                "\t\talways @(posedge i_clk)\n"
396
                        "\t\t\tif (i_ce)\to_val <= i_val[(IWID-1):0];\n"
397
"\n"
398
        "\tend else if (IWID-SHIFT == OWID)\n"
399 23 dgisselq
        "\tbegin // No truncation or rounding, output drops no bits\n"
400
"\n"
401
                "\t\talways @(posedge i_clk)\n"
402
                        "\t\t\tif (i_ce)\to_val <= i_val[(IWID-SHIFT-1):0];\n"
403
"\n"
404
        "\tend else if (IWID-SHIFT-1 == OWID)\n"
405
        "\tbegin // Output drops one bit, can only add one or ... not.\n"
406
        "\t\twire\t[(OWID-1):0]\ttruncated_value, rounded_up;\n"
407
        "\t\twire\t\t\tsign_bit, first_lost_bit;\n"
408
        "\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
409 26 dgisselq
        "\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n"
410 23 dgisselq
        "\t\tassign\tfirst_lost_bit = i_val[0];\n"
411
        "\t\tassign\tsign_bit = i_val[(IWID-1)];\n"
412
"\n"
413
        "\t\talways @(posedge i_clk)\n"
414
                "\t\t\tif (i_ce)\n"
415
                "\t\t\tbegin\n"
416 35 dgisselq
                        "\t\t\t\tif (!first_lost_bit) // Round down / truncate\n"
417 23 dgisselq
                                "\t\t\t\t\to_val <= truncated_value;\n"
418
                        "\t\t\t\telse if (sign_bit)\n"
419
                                "\t\t\t\t\to_val <= truncated_value;\n"
420
                        "\t\t\t\telse\n"
421
                                "\t\t\t\t\to_val <= rounded_up;\n"
422
                "\t\t\tend\n"
423
"\n"
424
        "\tend else // If there's more than one bit we are dropping\n"
425
        "\tbegin\n"
426
                "\t\twire\t[(OWID-1):0]\ttruncated_value, rounded_up;\n"
427
                "\t\twire\t\t\tsign_bit, first_lost_bit;\n"
428
                "\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
429 26 dgisselq
                "\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n"
430 23 dgisselq
                "\t\tassign\tfirst_lost_bit = i_val[(IWID-SHIFT-OWID-1)];\n"
431
                "\t\tassign\tsign_bit = i_val[(IWID-1)];\n"
432
"\n"
433
                "\t\twire\t[(IWID-SHIFT-OWID-2):0]\tother_lost_bits;\n"
434
                "\t\tassign\tother_lost_bits = i_val[(IWID-SHIFT-OWID-2):0];\n"
435
"\n"
436
                "\t\talways @(posedge i_clk)\n"
437
                        "\t\t\tif (i_ce)\n"
438
                        "\t\t\tbegin\n"
439 35 dgisselq
                        "\t\t\t\tif (!first_lost_bit) // Round down / truncate\n"
440 23 dgisselq
                                "\t\t\t\t\to_val <= truncated_value;\n"
441
                        "\t\t\t\telse if (|other_lost_bits) // Round up to\n"
442
                                "\t\t\t\t\to_val <= rounded_up; // closest value\n"
443
                        "\t\t\t\telse if (sign_bit)\n"
444
                                "\t\t\t\t\to_val <= truncated_value;\n"
445
                        "\t\t\t\telse\n"
446
                                "\t\t\t\t\to_val <= rounded_up;\n"
447
                        "\t\t\tend\n"
448
        "\tend\n"
449
        "\tendgenerate\n"
450
"\n"
451
"endmodule\n");
452
}
453
 
454
void    build_convround(const char *fname) {
455
        FILE    *fp = fopen(fname, "w");
456
        if (NULL == fp) {
457
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
458
                perror("O/S Err was:");
459
                return;
460
        }
461
 
462
        fprintf(fp,
463
"///////////////////////////////////////////////////////////////////////////\n"
464
"//\n"
465
"// Filename:   convround.v\n"
466
"//             \n"
467
"// Project:    %s\n"
468
"//\n"
469
"// Purpose:    A convergent rounding routine, also known as banker\'s\n"
470
"//             rounding, Dutch rounding, Gaussian rounding, unbiased\n"
471 33 dgisselq
"//     rounding, or ... more, at least according to Wikipedia.\n"
472 23 dgisselq
"//\n"
473 33 dgisselq
"//     This form of rounding works by rounding, when the direction is in\n"
474
"//     question, towards the nearest even value.\n"
475 23 dgisselq
"//\n"
476
"//\n%s"
477
"//\n",
478
                prjname, creator);
479
 
480
        fprintf(fp, "%s", cpyleft);
481 35 dgisselq
        fprintf(fp, "//\n//\n`default_nettype\tnone\n//\n");
482 23 dgisselq
        fprintf(fp,
483
"module convround(i_clk, i_ce, i_val, o_val);\n"
484
"\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"
485
"\tinput\t\t\t\t\ti_clk, i_ce;\n"
486
"\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"
487
"\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"
488
"\n"
489
"\t// Let's deal with three cases to be as general as we can be here\n"
490
"\t//\n"
491
"\t//\t1. The desired output would lose no bits at all\n"
492
"\t//\t2. One bit would be dropped, so the rounding is simply\n"
493
"\t//\t\tadjusting the value to be the nearest even number in\n"
494
"\t//\t\tcases of being halfway between two.  If identically\n"
495
"\t//\t\tequal to a number, we just leave it as is.\n"
496
"\t//\t3. Two or more bits would be dropped.  In this case, we round\n"
497
"\t//\t\tnormally unless we are rounding a value of exactly\n"
498
"\t//\t\thalfway between the two.  In the halfway case we round\n"
499
"\t//\t\tto the nearest even number.\n"
500
"\tgenerate\n"
501 33 dgisselq
// What if IWID < OWID?  We should expand here ... somehow
502 28 dgisselq
        "\tif (IWID == OWID) // In this case, the shift is irrelevant and\n"
503
        "\tbegin // cannot be applied.  No truncation or rounding takes\n"
504
        "\t// effect here.\n"
505
"\n"
506
                "\t\talways @(posedge i_clk)\n"
507
                        "\t\t\tif (i_ce)\to_val <= i_val[(IWID-1):0];\n"
508
"\n"
509 33 dgisselq
// What if IWID-SHIFT < OWID?  Shouldn't we also shift here as well?
510 28 dgisselq
"\tend else if (IWID-SHIFT == OWID)\n"
511 23 dgisselq
"\tbegin // No truncation or rounding, output drops no bits\n"
512
"\n"
513
"\t\talways @(posedge i_clk)\n"
514
"\t\t\tif (i_ce)\to_val <= i_val[(IWID-SHIFT-1):0];\n"
515
"\n"
516
"\tend else if (IWID-SHIFT-1 == OWID)\n"
517 33 dgisselq
// Is there any way to limit the number of bits that are examined here, for the
518
// purpose of simplifying/reducing logic?  I mean, if we go from 32 to 16 bits,
519
// must we check all 15 bits for equality to zero?
520 23 dgisselq
"\tbegin // Output drops one bit, can only add one or ... not.\n"
521
"\t\twire\t[(OWID-1):0] truncated_value, rounded_up;\n"
522
"\t\twire\t\t\tlast_valid_bit, first_lost_bit;\n"
523
"\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
524 26 dgisselq
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n"
525 23 dgisselq
"\t\tassign\tlast_valid_bit = truncated_value[0];\n"
526
"\t\tassign\tfirst_lost_bit = i_val[0];\n"
527
"\n"
528
"\t\talways @(posedge i_clk)\n"
529
"\t\t\tif (i_ce)\n"
530
"\t\t\tbegin\n"
531 35 dgisselq
"\t\t\t\tif (!first_lost_bit) // Round down / truncate\n"
532 23 dgisselq
"\t\t\t\t\to_val <= truncated_value;\n"
533
"\t\t\t\telse if (last_valid_bit)// Round up to nearest\n"
534
"\t\t\t\t\to_val <= rounded_up; // even value\n"
535
"\t\t\t\telse // else round down to the nearest\n"
536
"\t\t\t\t\to_val <= truncated_value; // even value\n"
537
"\t\t\tend\n"
538
"\n"
539
"\tend else // If there's more than one bit we are dropping\n"
540
"\tbegin\n"
541
"\t\twire\t[(OWID-1):0] truncated_value, rounded_up;\n"
542
"\t\twire\t\t\tlast_valid_bit, first_lost_bit;\n"
543
"\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
544 26 dgisselq
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n"
545 23 dgisselq
"\t\tassign\tlast_valid_bit = truncated_value[0];\n"
546
"\t\tassign\tfirst_lost_bit = i_val[(IWID-SHIFT-OWID-1)];\n"
547
"\n"
548
"\t\twire\t[(IWID-SHIFT-OWID-2):0]\tother_lost_bits;\n"
549
"\t\tassign\tother_lost_bits = i_val[(IWID-SHIFT-OWID-2):0];\n"
550
"\n"
551
"\t\talways @(posedge i_clk)\n"
552
"\t\t\tif (i_ce)\n"
553
"\t\t\tbegin\n"
554 35 dgisselq
"\t\t\t\tif (!first_lost_bit) // Round down / truncate\n"
555 23 dgisselq
"\t\t\t\t\to_val <= truncated_value;\n"
556
"\t\t\t\telse if (|other_lost_bits) // Round up to\n"
557
"\t\t\t\t\to_val <= rounded_up; // closest value\n"
558
"\t\t\t\telse if (last_valid_bit) // Round up to\n"
559
"\t\t\t\t\to_val <= rounded_up; // nearest even\n"
560
"\t\t\t\telse   // else round down to nearest even\n"
561
"\t\t\t\t\to_val <= truncated_value;\n"
562
"\t\t\tend\n"
563
"\tend\n"
564
"\tendgenerate\n"
565
"\n"
566
"endmodule\n");
567
}
568
 
569 26 dgisselq
void    build_quarters(const char *fname, ROUND_T rounding, bool dbg=false) {
570 23 dgisselq
        FILE    *fp = fopen(fname, "w");
571
        if (NULL == fp) {
572
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
573
                perror("O/S Err was:");
574
                return;
575
        }
576
        const   char    *rnd_string;
577
        if (rounding == RND_TRUNCATE)
578
                rnd_string = "truncate";
579
        else if (rounding == RND_FROMZERO)
580
                rnd_string = "roundfromzero";
581
        else if (rounding == RND_HALFUP)
582
                rnd_string = "roundhalfup";
583
        else
584
                rnd_string = "convround";
585
 
586
 
587
        fprintf(fp,
588
"///////////////////////////////////////////////////////////////////////////\n"
589
"//\n"
590 26 dgisselq
"// Filename:   qtrstage%s.v\n"
591 2 dgisselq
"//             \n"
592
"// Project:    %s\n"
593
"//\n"
594 5 dgisselq
"// Purpose:    This file encapsulates the 4 point stage of a decimation in\n"
595
"//             frequency FFT.  This particular implementation is optimized\n"
596
"//             so that all of the multiplies are accomplished by additions\n"
597
"//             and multiplexers only.\n"
598
"//\n"
599 2 dgisselq
"//\n%s"
600
"//\n",
601 26 dgisselq
                (dbg)?"_dbg":"", prjname, creator);
602 2 dgisselq
        fprintf(fp, "%s", cpyleft);
603 35 dgisselq
        fprintf(fp, "//\n//\n`default_nettype\tnone\n//\n");
604 2 dgisselq
 
605
        fprintf(fp,
606 26 dgisselq
"module\tqtrstage%s(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync%s);\n"
607 29 dgisselq
        "\tparameter    IWIDTH=%d, OWIDTH=IWIDTH+1;\n"
608 5 dgisselq
        "\t// Parameters specific to the core that should be changed when this\n"
609
        "\t// core is built ... Note that the minimum LGSPAN is 2.  Smaller \n"
610
        "\t// spans must use the fftdoubles stage.\n"
611 29 dgisselq
        "\tparameter\tLGWIDTH=%d, ODD=0, INVERSE=0,SHIFT=0;\n"
612 5 dgisselq
        "\tinput\t                              i_clk, i_rst, i_ce, i_sync;\n"
613
        "\tinput\t      [(2*IWIDTH-1):0]        i_data;\n"
614
        "\toutput\treg  [(2*OWIDTH-1):0]        o_data;\n"
615
        "\toutput\treg                          o_sync;\n"
616 29 dgisselq
        "\t\n", (dbg)?"_dbg":"", (dbg)?", o_dbg":"", TST_QTRSTAGE_IWIDTH,
617
        TST_QTRSTAGE_LGWIDTH);
618 26 dgisselq
        if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"
619
                "\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2*OWIDTH-1):(2*OWIDTH-16)],\n"
620
                        "\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n"
621
"\n");
622
        }
623 14 dgisselq
        fprintf(fp,
624 5 dgisselq
        "\treg\t        wait_for_sync;\n"
625 23 dgisselq
        "\treg\t[3:0]   pipeline;\n"
626 2 dgisselq
"\n"
627 5 dgisselq
        "\treg\t[(IWIDTH):0]    sum_r, sum_i, diff_r, diff_i;\n"
628 2 dgisselq
"\n"
629 23 dgisselq
        "\treg\t[(2*OWIDTH-1):0]\tob_a;\n"
630
        "\twire\t[(2*OWIDTH-1):0]\tob_b;\n"
631
        "\treg\t[(OWIDTH-1):0]\t\tob_b_r, ob_b_i;\n"
632
        "\tassign\tob_b = { ob_b_r, ob_b_i };\n"
633 2 dgisselq
"\n"
634 23 dgisselq
        "\treg\t[(LGWIDTH-1):0]\t\tiaddr;\n"
635
        "\treg\t[(2*IWIDTH-1):0]\timem;\n"
636 2 dgisselq
"\n"
637 5 dgisselq
        "\twire\tsigned\t[(IWIDTH-1):0]\timem_r, imem_i;\n"
638
        "\tassign\timem_r = imem[(2*IWIDTH-1):(IWIDTH)];\n"
639
        "\tassign\timem_i = imem[(IWIDTH-1):0];\n"
640 2 dgisselq
"\n"
641 5 dgisselq
        "\twire\tsigned\t[(IWIDTH-1):0]\ti_data_r, i_data_i;\n"
642
        "\tassign\ti_data_r = i_data[(2*IWIDTH-1):(IWIDTH)];\n"
643
        "\tassign\ti_data_i = i_data[(IWIDTH-1):0];\n"
644 2 dgisselq
"\n"
645 5 dgisselq
        "\treg  [(2*OWIDTH-1):0]        omem;\n"
646 14 dgisselq
"\n");
647
        fprintf(fp,
648 23 dgisselq
        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_sum_r, rnd_sum_i, rnd_diff_r, rnd_diff_i,\n");
649
        fprintf(fp,
650
        "\t\t\t\t\tn_rnd_diff_r, n_rnd_diff_i;\n");
651
        fprintf(fp,
652 26 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_r(i_clk, i_ce,\n"
653 23 dgisselq
        "\t\t\t\tsum_r, rnd_sum_r);\n\n", rnd_string);
654
        fprintf(fp,
655 26 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_i(i_clk, i_ce,\n"
656 23 dgisselq
        "\t\t\t\tsum_i, rnd_sum_i);\n\n", rnd_string);
657
        fprintf(fp,
658 26 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_r(i_clk, i_ce,\n"
659 23 dgisselq
        "\t\t\t\tdiff_r, rnd_diff_r);\n\n", rnd_string);
660
        fprintf(fp,
661 26 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_i(i_clk, i_ce,\n"
662 23 dgisselq
        "\t\t\t\tdiff_i, rnd_diff_i);\n\n", rnd_string);
663
        fprintf(fp, "\tassign n_rnd_diff_r = - rnd_diff_r;\n"
664
                "\tassign n_rnd_diff_i = - rnd_diff_i;\n");
665
/*
666
        fprintf(fp,
667 5 dgisselq
        "\twire [(IWIDTH-1):0]  rnd;\n"
668 9 dgisselq
        "\tgenerate\n"
669
        "\tif ((ROUND)&&((IWIDTH+1-OWIDTH-SHIFT)>0))\n"
670 26 dgisselq
                "\t\tassign rnd = { {(IWIDTH-1){1\'b0}}, 1\'b1 };\n"
671 9 dgisselq
        "\telse\n"
672 26 dgisselq
                "\t\tassign rnd = { {(IWIDTH){1\'b0}}};\n"
673 9 dgisselq
        "\tendgenerate\n"
674 2 dgisselq
"\n"
675 23 dgisselq
*/
676
        fprintf(fp,
677 25 dgisselq
        "\tinitial wait_for_sync = 1\'b1;\n"
678
        "\tinitial iaddr = 0;\n"
679 5 dgisselq
        "\talways @(posedge i_clk)\n"
680
                "\t\tif (i_rst)\n"
681
                "\t\tbegin\n"
682 26 dgisselq
                        "\t\t\twait_for_sync <= 1\'b1;\n"
683 5 dgisselq
                        "\t\t\tiaddr <= 0;\n"
684 35 dgisselq
                "\t\tend else if ((i_ce)&&((!wait_for_sync)||(i_sync)))\n"
685 5 dgisselq
                "\t\tbegin\n"
686 26 dgisselq
                        "\t\t\tiaddr <= iaddr + { {(LGWIDTH-1){1\'b0}}, 1\'b1 };\n"
687
                        "\t\t\twait_for_sync <= 1\'b0;\n"
688
                "\t\tend\n"
689
        "\talways @(posedge i_clk)\n"
690
                "\t\tif (i_ce)\n"
691 5 dgisselq
                        "\t\t\timem <= i_data;\n"
692 26 dgisselq
                "\n\n");
693 23 dgisselq
        fprintf(fp,
694
        "\t// Note that we don\'t check on wait_for_sync or i_sync here.\n"
695
        "\t// Why not?  Because iaddr will always be zero until after the\n"
696
        "\t// first i_ce, so we are safe.\n"
697 25 dgisselq
        "\tinitial pipeline = 4\'h0;\n"
698 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
699
                "\t\tif (i_rst)\n"
700 26 dgisselq
                        "\t\t\tpipeline <= 4\'h0;\n"
701 23 dgisselq
                "\t\telse if (i_ce) // is our pipeline process full?  Which stages?\n"
702
                        "\t\t\tpipeline <= { pipeline[2:0], iaddr[0] };\n\n");
703
        fprintf(fp,
704
        "\t// This is the pipeline[-1] stage, pipeline[0] will be set next.\n"
705
        "\talways\t@(posedge i_clk)\n"
706
                "\t\tif ((i_ce)&&(iaddr[0]))\n"
707
                "\t\tbegin\n"
708
                        "\t\t\tsum_r  <= imem_r + i_data_r;\n"
709
                        "\t\t\tsum_i  <= imem_i + i_data_i;\n"
710
                        "\t\t\tdiff_r <= imem_r - i_data_r;\n"
711
                        "\t\t\tdiff_i <= imem_i - i_data_i;\n"
712
                "\t\tend\n\n");
713
        fprintf(fp,
714
        "\t// pipeline[1] takes sum_x and diff_x and produces rnd_x\n\n");
715
        fprintf(fp,
716 26 dgisselq
        "\t// Now for pipeline[2].  We can actually do this at all i_ce\n"
717
        "\t// clock times, since nothing will listen unless pipeline[3]\n"
718
        "\t// on the next clock.  Thus, we simplify this logic and do\n"
719
        "\t// it independent of pipeline[2].\n"
720 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
721 26 dgisselq
                "\t\tif (i_ce)\n"
722 23 dgisselq
                "\t\tbegin\n"
723
                        "\t\t\tob_a <= { rnd_sum_r, rnd_sum_i };\n"
724
                        "\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"
725
                        "\t\t\tif (ODD == 0)\n"
726 5 dgisselq
                        "\t\t\tbegin\n"
727 23 dgisselq
                        "\t\t\t\tob_b_r <= rnd_diff_r;\n"
728
                        "\t\t\t\tob_b_i <= rnd_diff_i;\n"
729
                        "\t\t\tend else if (INVERSE==0) begin\n"
730
                        "\t\t\t\t// on Odd, W = e^{-j2pi 1/4} = -j\n"
731
                        "\t\t\t\tob_b_r <=   rnd_diff_i;\n"
732
                        "\t\t\t\tob_b_i <= n_rnd_diff_r;\n"
733
                        "\t\t\tend else begin\n"
734
                        "\t\t\t\t// on Odd, W = e^{j2pi 1/4} = j\n"
735
                        "\t\t\t\tob_b_r <= n_rnd_diff_i;\n"
736
                        "\t\t\t\tob_b_i <=   rnd_diff_r;\n"
737 5 dgisselq
                        "\t\t\tend\n"
738 23 dgisselq
                "\t\tend\n\n");
739
        fprintf(fp,
740
        "\talways\t@(posedge i_clk)\n"
741
                "\t\tif (i_ce)\n"
742
                "\t\tbegin // In sequence, clock = 3\n"
743
                        "\t\t\tif (pipeline[3])\n"
744 5 dgisselq
                        "\t\t\tbegin\n"
745
                                "\t\t\t\tomem <= ob_b;\n"
746
                                "\t\t\t\to_data <= ob_a;\n"
747
                        "\t\t\tend else\n"
748
                                "\t\t\t\to_data <= omem;\n"
749 23 dgisselq
                "\t\tend\n\n");
750
 
751
        fprintf(fp,
752
        "\t// Don\'t forget in the sync check that we are running\n"
753
        "\t// at two clocks per sample.  Thus we need to\n"
754
        "\t// produce a sync every 2^(LGWIDTH-1) clocks.\n"
755 26 dgisselq
        "\tinitial\to_sync = 1\'b0;\n"
756 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
757 26 dgisselq
                "\t\tif (i_rst)\n"
758
                "\t\t\to_sync <= 1\'b0;\n"
759
                "\t\telse if (i_ce)\n"
760 23 dgisselq
                        "\t\t\to_sync <= &(~iaddr[(LGWIDTH-2):3]) && (iaddr[2:0] == 3'b101);\n");
761
        fprintf(fp, "endmodule\n");
762 2 dgisselq
}
763
 
764 26 dgisselq
void    build_dblstage(const char *fname, ROUND_T rounding, const bool dbg = false) {
765 2 dgisselq
        FILE    *fp = fopen(fname, "w");
766
        if (NULL == fp) {
767
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
768
                perror("O/S Err was:");
769
                return;
770
        }
771
 
772 23 dgisselq
        const   char    *rnd_string;
773
        if (rounding == RND_TRUNCATE)
774
                rnd_string = "truncate";
775
        else if (rounding == RND_FROMZERO)
776
                rnd_string = "roundfromzero";
777
        else if (rounding == RND_HALFUP)
778
                rnd_string = "roundhalfup";
779
        else
780
                rnd_string = "convround";
781
 
782
 
783 2 dgisselq
        fprintf(fp,
784
"///////////////////////////////////////////////////////////////////////////\n"
785
"//\n"
786 26 dgisselq
"// Filename:   dblstage%s.v\n"
787 2 dgisselq
"//\n"
788
"// Project:    %s\n"
789
"//\n"
790
"// Purpose:    This is part of an FPGA implementation that will process\n"
791 5 dgisselq
"//             the final stage of a decimate-in-frequency FFT, running\n"
792
"//             through the data at two samples per clock.  If you notice\n"
793
"//             from the derivation of an FFT, the only time both even and\n"
794
"//             odd samples are used at the same time is in this stage.\n"
795
"//             Therefore, other than this stage and these twiddles, all of\n"
796
"//             the other stages can run two stages at a time at one sample\n"
797
"//             per clock.\n"
798 2 dgisselq
"//\n"
799
"//             In this implementation, the output is valid one clock after\n"
800
"//             the input is valid.  The output also accumulates one bit\n"
801
"//             above and beyond the number of bits in the input.\n"
802
"//             \n"
803
"//             i_clk   A system clock\n"
804 6 dgisselq
"//             i_rst   A synchronous reset\n"
805 2 dgisselq
"//             i_ce    Circuit enable--nothing happens unless this line is high\n"
806 6 dgisselq
"//             i_sync  A synchronization signal, high once per FFT at the start\n"
807 2 dgisselq
"//             i_left  The first (even) complex sample input.  The higher order\n"
808
"//                     bits contain the real portion, low order bits the\n"
809
"//                     imaginary portion, all in two\'s complement.\n"
810
"//             i_right The next (odd) complex sample input, same format as\n"
811
"//                     i_left.\n"
812
"//             o_left  The first (even) complex output.\n"
813
"//             o_right The next (odd) complex output.\n"
814 6 dgisselq
"//             o_sync  Output synchronization signal.\n"
815 2 dgisselq
"//\n%s"
816 26 dgisselq
"//\n", (dbg)?"_dbg":"", prjname, creator);
817 2 dgisselq
 
818
        fprintf(fp, "%s", cpyleft);
819 35 dgisselq
        fprintf(fp, "//\n//\n`default_nettype\tnone\n//\n");
820 33 dgisselq
        fprintf(fp,
821 26 dgisselq
"module\tdblstage%s(i_clk, i_rst, i_ce, i_sync, i_left, i_right, o_left, o_right, o_sync%s);\n"
822 29 dgisselq
        "\tparameter\tIWIDTH=%d,OWIDTH=IWIDTH+1, SHIFT=%d;\n"
823 6 dgisselq
        "\tinput\t\ti_clk, i_rst, i_ce, i_sync;\n"
824 5 dgisselq
        "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"
825 28 dgisselq
        "\toutput\treg\t[(2*OWIDTH-1):0]\to_left, o_right;\n"
826 6 dgisselq
        "\toutput\treg\t\t\to_sync;\n"
827 29 dgisselq
        "\n", (dbg)?"_dbg":"", (dbg)?", o_dbg":"",
828
        TST_DBLSTAGE_IWIDTH, TST_DBLSTAGE_SHIFT);
829 26 dgisselq
 
830
        if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"
831
                "\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_left[(2*OWIDTH-1):(2*OWIDTH-16)],\n"
832
                        "\t\t\t\t\to_left[(OWIDTH-1):(OWIDTH-16)] };\n"
833
"\n");
834
        }
835 33 dgisselq
        fprintf(fp,
836 5 dgisselq
        "\twire\tsigned\t[(IWIDTH-1):0]\ti_in_0r, i_in_0i, i_in_1r, i_in_1i;\n"
837
        "\tassign\ti_in_0r = i_left[(2*IWIDTH-1):(IWIDTH)]; \n"
838
        "\tassign\ti_in_0i = i_left[(IWIDTH-1):0]; \n"
839
        "\tassign\ti_in_1r = i_right[(2*IWIDTH-1):(IWIDTH)]; \n"
840
        "\tassign\ti_in_1i = i_right[(IWIDTH-1):0]; \n"
841
        "\twire\t[(OWIDTH-1):0]\t\to_out_0r, o_out_0i,\n"
842
                                "\t\t\t\t\to_out_1r, o_out_1i;\n"
843 2 dgisselq
"\n"
844 15 dgisselq
"\n"
845 19 dgisselq
        "\t// Handle a potential rounding situation, when IWIDTH>=OWIDTH.\n"
846 15 dgisselq
"\n"
847 23 dgisselq
"\n");
848
        fprintf(fp,
849 26 dgisselq
        "\n"
850
        "\t// As with any register connected to the sync pulse, these must\n"
851
        "\t// have initial values and be reset on the i_rst signal.\n"
852
        "\t// Other data values need only restrict their updates to i_ce\n"
853
        "\t// enabled clocks, but sync\'s must obey resets and initial\n"
854
        "\t// conditions as well.\n"
855 28 dgisselq
        "\treg\trnd_sync, r_sync;\n"
856 2 dgisselq
"\n"
857 28 dgisselq
        "\tinitial\trnd_sync      = 1\'b0; // Sync into rounding\n"
858
        "\tinitial\tr_sync        = 1\'b0; // Sync coming out\n"
859 5 dgisselq
        "\talways @(posedge i_clk)\n"
860 6 dgisselq
                "\t\tif (i_rst)\n"
861 23 dgisselq
                "\t\tbegin\n"
862 26 dgisselq
                        "\t\t\trnd_sync <= 1\'b0;\n"
863 28 dgisselq
                        "\t\t\tr_sync <= 1\'b0;\n"
864
                "\t\tend else if (i_ce)\n"
865 5 dgisselq
                "\t\tbegin\n"
866 26 dgisselq
                        "\t\t\trnd_sync <= i_sync;\n"
867 28 dgisselq
                        "\t\t\tr_sync <= rnd_sync;\n"
868 26 dgisselq
                "\t\tend\n"
869
"\n"
870
        "\t// As with other variables, these are really only updated when in\n"
871
        "\t// the processing pipeline, after the first i_sync.  However, to\n"
872
        "\t// eliminate as much unnecessary logic as possible, we toggle\n"
873 28 dgisselq
        "\t// these any time the i_ce line is enabled, and don\'t reset.\n"
874
        "\t// them on i_rst.\n");
875
        fprintf(fp,
876
        "\t// Don't forget that we accumulate a bit by adding two values\n"
877
        "\t// together. Therefore our intermediate value must have one more\n"
878
        "\t// bit than the two originals.\n"
879
        "\treg\tsigned\t[(IWIDTH):0]\trnd_in_0r, rnd_in_0i;\n"
880
        "\treg\tsigned\t[(IWIDTH):0]\trnd_in_1r, rnd_in_1i;\n\n"
881 26 dgisselq
        "\talways @(posedge i_clk)\n"
882
                "\t\tif (i_ce)\n"
883
                "\t\tbegin\n"
884
                        "\t\t\t//\n"
885 23 dgisselq
                        "\t\t\trnd_in_0r <= i_in_0r + i_in_1r;\n"
886
                        "\t\t\trnd_in_0i <= i_in_0i + i_in_1i;\n"
887 5 dgisselq
                        "\t\t\t//\n"
888 23 dgisselq
                        "\t\t\trnd_in_1r <= i_in_0r - i_in_1r;\n"
889
                        "\t\t\trnd_in_1i <= i_in_0i - i_in_1i;\n"
890 6 dgisselq
                        "\t\t\t//\n"
891 5 dgisselq
                "\t\tend\n"
892 28 dgisselq
"\n");
893
        fprintf(fp,
894
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_0r(i_clk, i_ce,\n"
895
        "\t\t\t\t\t\t\trnd_in_0r, o_out_0r);\n\n", rnd_string);
896
        fprintf(fp,
897
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_0i(i_clk, i_ce,\n"
898
        "\t\t\t\t\t\t\trnd_in_0i, o_out_0i);\n\n", rnd_string);
899
        fprintf(fp,
900
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_1r(i_clk, i_ce,\n"
901
        "\t\t\t\t\t\t\trnd_in_1r, o_out_1r);\n\n", rnd_string);
902
        fprintf(fp,
903
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_1i(i_clk, i_ce,\n"
904
        "\t\t\t\t\t\t\trnd_in_1i, o_out_1i);\n\n", rnd_string);
905
 
906
        fprintf(fp, "\n"
907
        "\t// Prior versions of this routine did not include the extra\n"
908
        "\t// clock and register/flip-flops that this routine requires.\n"
909
        "\t// These are placed in here to correct a bug in Verilator, that\n"
910
        "\t// otherwise struggles.  (Hopefully this will fix the problem ...)\n"
911
        "\talways @(posedge i_clk)\n"
912
                "\t\tif (i_ce)\n"
913
                "\t\tbegin\n"
914
                        "\t\t\to_left  <= { o_out_0r, o_out_0i };\n"
915
                        "\t\t\to_right <= { o_out_1r, o_out_1i };\n"
916
                "\t\tend\n"
917 2 dgisselq
"\n"
918 28 dgisselq
        "\tinitial\to_sync = 1'b0; // Final sync coming out of module\n"
919
        "\talways @(posedge i_clk)\n"
920
                "\t\tif (i_rst)\n"
921
                "\t\t\to_sync <= 1'b0;\n"
922
                "\t\telse if (i_ce)\n"
923
                "\t\t\to_sync <= r_sync;\n"
924 2 dgisselq
"\n"
925
"endmodule\n");
926
        fclose(fp);
927
}
928
 
929
void    build_multiply(const char *fname) {
930
        FILE    *fp = fopen(fname, "w");
931
        if (NULL == fp) {
932
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
933
                perror("O/S Err was:");
934
                return;
935
        }
936
 
937
        fprintf(fp,
938
"///////////////////////////////////////////////////////////////////////////\n"
939
"//\n"
940
"// Filename:   shiftaddmpy.v\n"
941
"//\n"
942
"// Project:    %s\n"
943
"//\n"
944
"// Purpose:    A portable shift and add multiply.\n"
945
"//\n"
946
"//             While both Xilinx and Altera will offer single clock \n"
947
"//             multiplies, this simple approach will multiply two numbers\n"
948
"//             on any architecture.  The result maintains the full width\n"
949
"//             of the multiply, there are no extra stuff bits, no rounding,\n"
950
"//             no shifted bits, etc.\n"
951
"//\n"
952
"//             Further, for those applications that can support it, this\n"
953
"//             multiply is pipelined and will produce one answer per clock.\n"
954
"//\n"
955
"//             For minimal processing delay, make the first parameter\n"
956
"//             the one with the least bits, so that AWIDTH <= BWIDTH.\n"
957
"//\n"
958
"//             The processing delay in this multiply is (AWIDTH+1) cycles.\n"
959
"//             That is, if the data is present on the input at clock t=0,\n"
960
"//             the result will be present on the output at time t=AWIDTH+1;\n"
961
"//\n"
962
"//\n%s"
963
"//\n", prjname, creator);
964
 
965
        fprintf(fp, "%s", cpyleft);
966 35 dgisselq
        fprintf(fp, "//\n//\n`default_nettype\tnone\n//\n");
967 33 dgisselq
        fprintf(fp,
968 2 dgisselq
"module shiftaddmpy(i_clk, i_ce, i_a, i_b, o_r);\n"
969 29 dgisselq
        "\tparameter\tAWIDTH=%d,BWIDTH=", TST_SHIFTADDMPY_AW);
970
#ifdef  TST_SHIFTADDMPY_BW
971
        fprintf(fp, "%d;\n", TST_SHIFTADDMPY_BW);
972
#else
973
        fprintf(fp, "AWIDTH;\n");
974
#endif
975
        fprintf(fp,
976 2 dgisselq
        "\tinput\t\t\t\t\ti_clk, i_ce;\n"
977
        "\tinput\t\t[(AWIDTH-1):0]\t\ti_a;\n"
978
        "\tinput\t\t[(BWIDTH-1):0]\t\ti_b;\n"
979
        "\toutput\treg\t[(AWIDTH+BWIDTH-1):0]\to_r;\n"
980
"\n"
981
        "\treg\t[(AWIDTH-1):0]\tu_a;\n"
982
        "\treg\t[(BWIDTH-1):0]\tu_b;\n"
983
        "\treg\t\t\tsgn;\n"
984
"\n"
985
        "\treg\t[(AWIDTH-2):0]\t\tr_a[0:(AWIDTH-1)];\n"
986
        "\treg\t[(AWIDTH+BWIDTH-2):0]\tr_b[0:(AWIDTH-1)];\n"
987
        "\treg\t\t\t\tr_s[0:(AWIDTH-1)];\n"
988
        "\treg\t[(AWIDTH+BWIDTH-1):0]\tacc[0:(AWIDTH-1)];\n"
989
        "\tgenvar k;\n"
990
"\n"
991 5 dgisselq
        "\t// If we were forced to stay within two\'s complement arithmetic,\n"
992
        "\t// taking the absolute value here would require an additional bit.\n"
993
        "\t// However, because our results are now unsigned, we can stay\n"
994
        "\t// within the number of bits given (for now).\n"
995 2 dgisselq
        "\talways @(posedge i_clk)\n"
996
                "\t\tif (i_ce)\n"
997
                "\t\tbegin\n"
998
                        "\t\t\tu_a <= (i_a[AWIDTH-1])?(-i_a):(i_a);\n"
999
                        "\t\t\tu_b <= (i_b[BWIDTH-1])?(-i_b):(i_b);\n"
1000
                        "\t\t\tsgn <= i_a[AWIDTH-1] ^ i_b[BWIDTH-1];\n"
1001
                "\t\tend\n"
1002
"\n"
1003
        "\talways @(posedge i_clk)\n"
1004
                "\t\tif (i_ce)\n"
1005
                "\t\tbegin\n"
1006 26 dgisselq
                        "\t\t\tacc[0] <= (u_a[0]) ? { {(AWIDTH){1\'b0}}, u_b }\n"
1007
                        "\t\t\t\t\t: {(AWIDTH+BWIDTH){1\'b0}};\n"
1008 2 dgisselq
                        "\t\t\tr_a[0] <= { u_a[(AWIDTH-1):1] };\n"
1009 26 dgisselq
                        "\t\t\tr_b[0] <= { {(AWIDTH-1){1\'b0}}, u_b };\n"
1010 2 dgisselq
                        "\t\t\tr_s[0] <= sgn; // The final sign, needs to be preserved\n"
1011
                "\t\tend\n"
1012
"\n"
1013
        "\tgenerate\n"
1014 21 dgisselq
        "\tfor(k=0; k<AWIDTH-1; k=k+1)\n"
1015 25 dgisselq
        "\tbegin : genstages\n"
1016 21 dgisselq
                "\t\talways @(posedge i_clk)\n"
1017
                "\t\tif (i_ce)\n"
1018 2 dgisselq
                "\t\tbegin\n"
1019 26 dgisselq
                        "\t\t\tacc[k+1] <= acc[k] + ((r_a[k][0]) ? {r_b[k],1\'b0}:0);\n"
1020
                        "\t\t\tr_a[k+1] <= { 1\'b0, r_a[k][(AWIDTH-2):1] };\n"
1021
                        "\t\t\tr_b[k+1] <= { r_b[k][(AWIDTH+BWIDTH-3):0], 1\'b0};\n"
1022 2 dgisselq
                        "\t\t\tr_s[k+1] <= r_s[k];\n"
1023
                "\t\tend\n"
1024
        "\tend\n"
1025
        "\tendgenerate\n"
1026
"\n"
1027
        "\talways @(posedge i_clk)\n"
1028
                "\t\tif (i_ce)\n"
1029
                        "\t\t\to_r <= (r_s[AWIDTH-1]) ? (-acc[AWIDTH-1]) : acc[AWIDTH-1];\n"
1030
"\n"
1031
"endmodule\n");
1032
 
1033
        fclose(fp);
1034
}
1035
 
1036 29 dgisselq
void    build_bimpy(const char *fname) {
1037
        FILE    *fp = fopen(fname, "w");
1038
        if (NULL == fp) {
1039
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
1040
                perror("O/S Err was:");
1041
                return;
1042
        }
1043
 
1044
        fprintf(fp,
1045
"////////////////////////////////////////////////////////////////////////////////\n"
1046
"//\n"
1047
"// Filename:   %s\n"
1048
"//\n"
1049
"// Project:    %s\n"
1050
"//\n"
1051
"// Purpose:    A simple 2-bit multiply based upon the fact that LUT's allow\n"
1052
"//             6-bits of input.  In other words, I could build a 3-bit\n"
1053
"//             multiply from 6 LUTs (5 actually, since the first could have\n"
1054
"//             two outputs).  This would allow multiplication of three bit\n"
1055
"//             digits, save only for the fact that you would need two bits\n"
1056
"//             of carry.  The bimpy approach throttles back a bit and does\n"
1057
"//             a 2x2 bit multiply in a LUT, guaranteeing that it will never\n"
1058
"//             carry more than one bit.  While this multiply is hardware\n"
1059
"//             independent (and can still run under Verilator therefore),\n"
1060
"//             it is really motivated by trying to optimize for a specific\n"
1061
"//             piece of hardware (Xilinx-7 series ...) that has at least\n"
1062
"//             4-input LUT's with carry chains.\n"
1063
"//\n"
1064
"//\n"
1065
"//\n%s"
1066
"//\n", fname, prjname, creator);
1067
 
1068
        fprintf(fp, "%s", cpyleft);
1069 35 dgisselq
        fprintf(fp, "//\n//\n`default_nettype\tnone\n//\n");
1070 33 dgisselq
        fprintf(fp,
1071 29 dgisselq
"module bimpy(i_clk, i_ce, i_a, i_b, o_r);\n"
1072
"\tparameter\tBW=18, // Number of bits in i_b\n"
1073
"\t\t\tLUTB=2; // Number of bits in i_a for our LUT multiply\n"
1074
"\tinput\t\t\t\ti_clk, i_ce;\n"
1075
"\tinput\t\t[(LUTB-1):0]\ti_a;\n"
1076
"\tinput\t\t[(BW-1):0]\ti_b;\n"
1077
"\toutput\treg\t[(BW+LUTB-1):0] o_r;\n"
1078
"\n"
1079
"\twire [(BW+LUTB-2):0] w_r;\n"
1080
"\twire [(BW+LUTB-3):1] c;\n"
1081
"\n"
1082
"\tassign\tw_r =  { ((i_a[1])?i_b:{(BW){1'b0}}), 1'b0 }\n"
1083
"\t\t\t\t^ { 1'b0, ((i_a[0])?i_b:{(BW){1'b0}}) };\n"
1084
"\tassign\tc = { ((i_a[1])?i_b[(BW-2):0]:{(BW-1){1'b0}}) }\n"
1085
"\t\t\t& ((i_a[0])?i_b[(BW-1):1]:{(BW-1){1'b0}});\n"
1086
"\n"
1087
"\talways @(posedge i_clk)\n"
1088
"\t\tif (i_ce)\n"
1089
"\t\t\to_r <= w_r + { c, 2'b0 };\n"
1090
"\n"
1091
"endmodule\n");
1092
 
1093
        fclose(fp);
1094
}
1095
 
1096
void    build_longbimpy(const char *fname) {
1097
        FILE    *fp = fopen(fname, "w");
1098
        if (NULL == fp) {
1099
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
1100
                perror("O/S Err was:");
1101
                return;
1102
        }
1103
 
1104
        fprintf(fp,
1105
"////////////////////////////////////////////////////////////////////////////////\n"
1106
"//\n"
1107
"// Filename:   %s\n"
1108
"//\n"
1109
"// Project:    %s\n"
1110
"//\n"
1111
"// Purpose:    A portable shift and add multiply, built with the knowledge\n"
1112
"//             of the existence of a six bit LUT and carry chain.  That\n"
1113
"//             knowledge allows us to multiply two bits from one value\n"
1114
"//             at a time against all of the bits of the other value.  This\n"
1115
"//             sub multiply is called the bimpy.\n"
1116
"//\n"
1117
"//             For minimal processing delay, make the first parameter\n"
1118
"//             the one with the least bits, so that AWIDTH <= BWIDTH.\n"
1119
"//\n"
1120
"//\n"
1121
"//\n%s"
1122
"//\n", fname, prjname, creator);
1123
 
1124
        fprintf(fp, "%s", cpyleft);
1125 35 dgisselq
        fprintf(fp, "//\n//\n`default_nettype\tnone\n//\n");
1126 33 dgisselq
        fprintf(fp,
1127 29 dgisselq
"module longbimpy(i_clk, i_ce, i_a, i_b, o_r);\n"
1128
        "\tparameter    AW=%d,  // The width of i_a, min width is 5\n"
1129
                        "\t\t\tBW=", TST_LONGBIMPY_AW);
1130
#ifdef  TST_LONGBIMPY_BW
1131
        fprintf(fp, "%d", TST_LONGBIMPY_BW);
1132
#else
1133
        fprintf(fp, "AW");
1134
#endif
1135
 
1136
        fprintf(fp, ",  // The width of i_b, can be anything\n"
1137
                        "\t\t\t// The following three parameters should not be changed\n"
1138
                        "\t\t\t// by any implementation, but are based upon hardware\n"
1139
                        "\t\t\t// and the above values:\n"
1140
                        "\t\t\tOW=AW+BW,        // The output width\n"
1141
                        "\t\t\tIW=(AW+1)&(-2),  // Internal width of A\n"
1142
                        "\t\t\tLUTB=2,  // How many bits we can multiply by at once\n"
1143
                        "\t\t\tTLEN=(AW+(LUTB-1))/LUTB; // Nmbr of rows in our tableau\n"
1144
        "\tinput\t\t\t\ti_clk, i_ce;\n"
1145
        "\tinput\t\t[(AW-1):0]\ti_a;\n"
1146
        "\tinput\t\t[(BW-1):0]\ti_b;\n"
1147
        "\toutput\treg\t[(AW+BW-1):0]\to_r;\n"
1148
"\n"
1149
        "\treg\t[(IW-1):0]\tu_a;\n"
1150
        "\treg\t[(BW-1):0]\tu_b;\n"
1151
        "\treg\t\t\tsgn;\n"
1152
"\n"
1153
        "\treg\t[(IW-1-2*(LUTB)):0]\tr_a[0:(TLEN-3)];\n"
1154
        "\treg\t[(BW-1):0]\t\tr_b[0:(TLEN-3)];\n"
1155
        "\treg\t[(TLEN-1):0]\t\tr_s;\n"
1156
        "\treg\t[(IW+BW-1):0]\t\tacc[0:(TLEN-2)];\n"
1157
        "\tgenvar k;\n"
1158
"\n"
1159
        "\t// First step:\n"
1160
        "\t// Switch to unsigned arithmetic for our multiply, keeping track\n"
1161
        "\t// of the along the way.  We'll then add the sign again later at\n"
1162
        "\t// the end.\n"
1163
        "\t//\n"
1164
        "\t// If we were forced to stay within two's complement arithmetic,\n"
1165
        "\t// taking the absolute value here would require an additional bit.\n"
1166
        "\t// However, because our results are now unsigned, we can stay\n"
1167
        "\t// within the number of bits given (for now).\n"
1168
        "\tgenerate if (IW > AW)\n"
1169
        "\tbegin\n"
1170
                "\t\talways @(posedge i_clk)\n"
1171
                        "\t\t\tif (i_ce)\n"
1172
                        "\t\t\t\tu_a <= { 1'b0, (i_a[AW-1])?(-i_a):(i_a) };\n"
1173
        "\tend else begin\n"
1174
                "\t\talways @(posedge i_clk)\n"
1175
                        "\t\t\tif (i_ce)\n"
1176
                        "\t\t\t\tu_a <= (i_a[AW-1])?(-i_a):(i_a);\n"
1177
        "\tend endgenerate\n"
1178
"\n"
1179
        "\talways @(posedge i_clk)\n"
1180
                "\t\tif (i_ce)\n"
1181
                "\t\tbegin\n"
1182
                        "\t\t\tu_b <= (i_b[BW-1])?(-i_b):(i_b);\n"
1183
                        "\t\t\tsgn <= i_a[AW-1] ^ i_b[BW-1];\n"
1184
                "\t\tend\n"
1185
"\n"
1186
        "\twire [(BW+LUTB-1):0] pr_a, pr_b;\n"
1187
"\n"
1188
        "\t//\n"
1189
        "\t// Second step: First two 2xN products.\n"
1190
        "\t//\n"
1191
        "\t// Since we have no tableau of additions (yet), we can do both\n"
1192
        "\t// of the first two rows at the same time and add them together.\n"
1193
        "\t// For the next round, we'll then have a previous sum to accumulate\n"
1194
        "\t// with new and subsequent product, and so only do one product at\n"
1195
        "\t// a time can follow this--but the first clock can do two at a time.\n"
1196
        "\tbimpy\t#(BW) lmpy_0(i_clk,i_ce,u_a[(  LUTB-1):   0], u_b, pr_a);\n"
1197
        "\tbimpy\t#(BW) lmpy_1(i_clk,i_ce,u_a[(2*LUTB-1):LUTB], u_b, pr_b);\n"
1198
        "\talways @(posedge i_clk)\n"
1199
                "\t\tif (i_ce) r_a[0] <= u_a[(IW-1):(2*LUTB)];\n"
1200
        "\talways @(posedge i_clk)\n"
1201
                "\t\tif (i_ce) r_b[0] <= u_b;\n"
1202
        "\talways @(posedge i_clk)\n"
1203
                "\t\tif (i_ce) r_s <= { r_s[(TLEN-2):0], sgn };\n"
1204
        "\talways @(posedge i_clk) // One clk after p[0],p[1] become valid\n"
1205
                "\t\tif (i_ce) acc[0] <= { {(IW-LUTB){1'b0}}, pr_a}\n"
1206
                        "\t\t\t  +{ {(IW-(2*LUTB)){1'b0}}, pr_b, {(LUTB){1'b0}} };\n"
1207
"\n"
1208
        "\tgenerate // Keep track of intermediate values, before multiplying them\n"
1209
        "\tif (TLEN > 3) for(k=0; k<TLEN-3; k=k+1)\n"
1210
        "\tbegin : gencopies\n"
1211
                "\t\talways @(posedge i_clk)\n"
1212
                "\t\tif (i_ce)\n"
1213
                "\t\tbegin\n"
1214
                        "\t\t\tr_a[k+1] <= { {(LUTB){1'b0}},\n"
1215
                                "\t\t\t\tr_a[k][(IW-1-(2*LUTB)):LUTB] };\n"
1216
                        "\t\t\tr_b[k+1] <= r_b[k];\n"
1217
                        "\t\tend\n"
1218
        "\tend endgenerate\n"
1219
"\n"
1220
        "\tgenerate // The actual multiply and accumulate stage\n"
1221
        "\tif (TLEN > 2) for(k=0; k<TLEN-2; k=k+1)\n"
1222
        "\tbegin : genstages\n"
1223
                "\t\t// First, the multiply: 2-bits times BW bits\n"
1224
                "\t\twire\t[(BW+LUTB-1):0] genp;\n"
1225
                "\t\tbimpy #(BW) genmpy(i_clk,i_ce,r_a[k][(LUTB-1):0],r_b[k], genp);\n"
1226
"\n"
1227
                "\t\t// Then the accumulate step -- on the next clock\n"
1228
                "\t\talways @(posedge i_clk)\n"
1229
                        "\t\t\tif (i_ce)\n"
1230
                                "\t\t\t\tacc[k+1] <= acc[k] + {{(IW-LUTB*(k+3)){1'b0}},\n"
1231 34 dgisselq
                                        "\t\t\t\t\tgenp, {(LUTB*(k+2)){1'b0}} };\n"
1232 29 dgisselq
        "\tend endgenerate\n"
1233
"\n"
1234
        "\twire [(IW+BW-1):0]   w_r;\n"
1235
        "\tassign\tw_r = (r_s[TLEN-1]) ? (-acc[TLEN-2]) : acc[TLEN-2];\n"
1236
        "\talways @(posedge i_clk)\n"
1237
                "\t\tif (i_ce)\n"
1238
                        "\t\t\to_r <= w_r[(AW+BW-1):0];\n"
1239
"\n"
1240 35 dgisselq
        "\tgenerate if (IW > AW)\n"
1241
        "\tbegin : VUNUSED\n"
1242
        "\t\t// verilator lint_off UNUSED\n"
1243
        "\t\twire\t[(IW-AW)-1:0]\tunused;\n"
1244
        "\t\tassign\tunused = w_r[(IW+BW-1):(AW+BW)];\n"
1245
        "\t\t// verilator lint_on UNUSED\n"
1246
        "\tend endgenerate\n"
1247
"\n"
1248 29 dgisselq
"endmodule\n");
1249
 
1250
        fclose(fp);
1251
}
1252
 
1253 2 dgisselq
void    build_dblreverse(const char *fname) {
1254
        FILE    *fp = fopen(fname, "w");
1255
        if (NULL == fp) {
1256
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
1257
                perror("O/S Err was:");
1258
                return;
1259
        }
1260
 
1261
        fprintf(fp,
1262
"///////////////////////////////////////////////////////////////////////////\n"
1263
"//\n"
1264
"// Filename:   dblreverse.v\n"
1265
"//\n"
1266
"// Project:    %s\n"
1267
"//\n"
1268
"// Purpose:    This module bitreverses a pipelined FFT input.  Operation is\n"
1269
"//             expected as follows:\n"
1270
"//\n"
1271
"//             i_clk   A running clock at whatever system speed is offered.\n"
1272
"//             i_rst   A synchronous reset signal, that resets all internals\n"
1273
"//             i_ce    If this is one, one input is consumed and an output\n"
1274
"//                     is produced.\n"
1275
"//             i_in_0, i_in_1\n"
1276
"//                     Two inputs to be consumed, each of width WIDTH.\n"
1277
"//             o_out_0, o_out_1\n"
1278
"//                     Two of the bitreversed outputs, also of the same\n"
1279
"//                     width, WIDTH.  Of course, there is a delay from the\n"
1280
"//                     first input to the first output.  For this purpose,\n"
1281
"//                     o_sync is present.\n"
1282 26 dgisselq
"//             o_sync  This will be a 1\'b1 for the first value in any block.\n"
1283
"//                     Following a reset, this will only become 1\'b1 once\n"
1284 2 dgisselq
"//                     the data has been loaded and is now valid.  After that,\n"
1285
"//                     all outputs will be valid.\n"
1286 26 dgisselq
"//\n"
1287
"//     20150602 -- This module has undergone massive rework in order to\n"
1288
"//             ensure that it uses resources efficiently.  As a result, \n"
1289
"//             it now optimizes nicely into block RAMs.  As an unfortunately\n"
1290
"//             side effect, it now passes it\'s bench test (dblrev_tb) but\n"
1291
"//             fails the integration bench test (fft_tb).\n"
1292
"//\n"
1293 2 dgisselq
"//\n%s"
1294
"//\n", prjname, creator);
1295
        fprintf(fp, "%s", cpyleft);
1296 35 dgisselq
        fprintf(fp, "//\n//\n`default_nettype\tnone\n//\n");
1297 2 dgisselq
        fprintf(fp,
1298
"\n\n"
1299
"//\n"
1300
"// How do we do bit reversing at two smples per clock?  Can we separate out\n"
1301
"// our work into eight memory banks, writing two banks at once and reading\n"
1302
"// another two banks in the same clock?\n"
1303
"//\n"
1304
"//     mem[00xxx0] = s_0[n]\n"
1305
"//     mem[00xxx1] = s_1[n]\n"
1306
"//     o_0[n] = mem[10xxx0]\n"
1307
"//     o_1[n] = mem[11xxx0]\n"
1308
"//     ...\n"
1309
"//     mem[01xxx0] = s_0[m]\n"
1310
"//     mem[01xxx1] = s_1[m]\n"
1311
"//     o_0[m] = mem[10xxx1]\n"
1312
"//     o_1[m] = mem[11xxx1]\n"
1313
"//     ...\n"
1314
"//     mem[10xxx0] = s_0[n]\n"
1315
"//     mem[10xxx1] = s_1[n]\n"
1316
"//     o_0[n] = mem[00xxx0]\n"
1317
"//     o_1[n] = mem[01xxx0]\n"
1318
"//     ...\n"
1319
"//     mem[11xxx0] = s_0[m]\n"
1320
"//     mem[11xxx1] = s_1[m]\n"
1321
"//     o_0[m] = mem[00xxx1]\n"
1322
"//     o_1[m] = mem[01xxx1]\n"
1323
"//     ...\n"
1324
"//\n"
1325 5 dgisselq
"//     The answer is that, yes we can but: we need to use four memory banks\n"
1326
"//     to do it properly.  These four banks are defined by the two bits\n"
1327
"//     that determine the top and bottom of the correct address.  Larger\n"
1328
"//     FFT\'s would require more memories.\n"
1329
"//\n"
1330 2 dgisselq
"//\n");
1331 33 dgisselq
        fprintf(fp,
1332 2 dgisselq
"module dblreverse(i_clk, i_rst, i_ce, i_in_0, i_in_1,\n"
1333 5 dgisselq
        "\t\to_out_0, o_out_1, o_sync);\n"
1334 29 dgisselq
        "\tparameter\t\t\tLGSIZE=%d, WIDTH=24;\n"
1335 5 dgisselq
        "\tinput\t\t\t\ti_clk, i_rst, i_ce;\n"
1336
        "\tinput\t\t[(2*WIDTH-1):0]\ti_in_0, i_in_1;\n"
1337 26 dgisselq
        "\toutput\twire\t[(2*WIDTH-1):0]\to_out_0, o_out_1;\n"
1338 29 dgisselq
        "\toutput\treg\t\t\to_sync;\n", TST_DBLREVERSE_LGSIZE);
1339
 
1340 33 dgisselq
        fprintf(fp,
1341 2 dgisselq
"\n"
1342 26 dgisselq
        "\treg\t\t\tin_reset;\n"
1343
        "\treg\t[(LGSIZE-1):0]\tiaddr;\n"
1344
        "\twire\t[(LGSIZE-3):0]\tbraddr;\n"
1345 2 dgisselq
"\n"
1346 5 dgisselq
        "\tgenvar\tk;\n"
1347 26 dgisselq
        "\tgenerate for(k=0; k<LGSIZE-2; k=k+1)\n"
1348 25 dgisselq
        "\tbegin : gen_a_bit_reversed_value\n"
1349 26 dgisselq
                "\t\tassign braddr[k] = iaddr[LGSIZE-3-k];\n"
1350 25 dgisselq
        "\tend endgenerate\n"
1351 2 dgisselq
"\n"
1352 25 dgisselq
        "\tinitial iaddr = 0;\n"
1353
        "\tinitial in_reset = 1\'b1;\n"
1354 26 dgisselq
        "\tinitial o_sync = 1\'b0;\n"
1355 5 dgisselq
        "\talways @(posedge i_clk)\n"
1356
                "\t\tif (i_rst)\n"
1357
                "\t\tbegin\n"
1358
                        "\t\t\tiaddr <= 0;\n"
1359 26 dgisselq
                        "\t\t\tin_reset <= 1\'b1;\n"
1360
                        "\t\t\to_sync <= 1\'b0;\n"
1361 5 dgisselq
                "\t\tend else if (i_ce)\n"
1362
                "\t\tbegin\n"
1363 26 dgisselq
                        "\t\t\tiaddr <= iaddr + { {(LGSIZE-1){1\'b0}}, 1\'b1 };\n"
1364
                        "\t\t\tif (&iaddr[(LGSIZE-2):0])\n"
1365
                                "\t\t\t\tin_reset <= 1\'b0;\n"
1366 5 dgisselq
                        "\t\t\tif (in_reset)\n"
1367 26 dgisselq
                                "\t\t\t\to_sync <= 1\'b0;\n"
1368
                        "\t\t\telse\n"
1369
                                "\t\t\t\to_sync <= ~(|iaddr[(LGSIZE-2):0]);\n"
1370 5 dgisselq
                "\t\tend\n"
1371 2 dgisselq
"\n"
1372 26 dgisselq
        "\treg\t[(2*WIDTH-1):0]\tmem_e [0:((1<<(LGSIZE))-1)];\n"
1373
        "\treg\t[(2*WIDTH-1):0]\tmem_o [0:((1<<(LGSIZE))-1)];\n"
1374
"\n"
1375
        "\talways @(posedge i_clk)\n"
1376
                "\t\tif (i_ce)\tmem_e[iaddr] <= i_in_0;\n"
1377
        "\talways @(posedge i_clk)\n"
1378
                "\t\tif (i_ce)\tmem_o[iaddr] <= i_in_1;\n"
1379
"\n"
1380
"\n"
1381
        "\treg [(2*WIDTH-1):0] evn_out_0, evn_out_1, odd_out_0, odd_out_1;\n"
1382
"\n"
1383
        "\talways @(posedge i_clk)\n"
1384
                "\t\tif (i_ce)\n\t\t\tevn_out_0 <= mem_e[{~iaddr[LGSIZE-1],1\'b0,braddr}];\n"
1385
        "\talways @(posedge i_clk)\n"
1386
                "\t\tif (i_ce)\n\t\t\tevn_out_1 <= mem_e[{~iaddr[LGSIZE-1],1\'b1,braddr}];\n"
1387
        "\talways @(posedge i_clk)\n"
1388
                "\t\tif (i_ce)\n\t\t\todd_out_0 <= mem_o[{~iaddr[LGSIZE-1],1\'b0,braddr}];\n"
1389
        "\talways @(posedge i_clk)\n"
1390
                "\t\tif (i_ce)\n\t\t\todd_out_1 <= mem_o[{~iaddr[LGSIZE-1],1\'b1,braddr}];\n"
1391
"\n"
1392
        "\treg\tadrz;\n"
1393
        "\talways @(posedge i_clk)\n"
1394 28 dgisselq
                "\t\tif (i_ce) adrz <= iaddr[LGSIZE-2];\n"
1395 26 dgisselq
"\n"
1396
        "\tassign\to_out_0 = (adrz)?odd_out_0:evn_out_0;\n"
1397
        "\tassign\to_out_1 = (adrz)?odd_out_1:evn_out_1;\n"
1398
"\n"
1399 21 dgisselq
"endmodule\n");
1400 2 dgisselq
 
1401
        fclose(fp);
1402
}
1403
 
1404 23 dgisselq
void    build_butterfly(const char *fname, int xtracbits, ROUND_T rounding) {
1405 2 dgisselq
        FILE    *fp = fopen(fname, "w");
1406
        if (NULL == fp) {
1407
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
1408
                perror("O/S Err was:");
1409
                return;
1410
        }
1411 23 dgisselq
        const   char    *rnd_string;
1412
        if (rounding == RND_TRUNCATE)
1413
                rnd_string = "truncate";
1414
        else if (rounding == RND_FROMZERO)
1415
                rnd_string = "roundfromzero";
1416
        else if (rounding == RND_HALFUP)
1417
                rnd_string = "roundhalfup";
1418
        else
1419
                rnd_string = "convround";
1420 2 dgisselq
 
1421
        fprintf(fp,
1422
"///////////////////////////////////////////////////////////////////////////\n"
1423
"//\n"
1424
"// Filename:   butterfly.v\n"
1425
"//\n"
1426
"// Project:    %s\n"
1427
"//\n"
1428
"// Purpose:    This routine caculates a butterfly for a decimation\n"
1429
"//             in frequency version of an FFT.  Specifically, given\n"
1430
"//             complex Left and Right values together with a \n"
1431
"//             coefficient, the output of this routine is given\n"
1432
"//             by:\n"
1433
"//\n"
1434
"//             L' = L + R\n"
1435
"//             R' = (L - R)*C\n"
1436
"//\n"
1437
"//             The rest of the junk below handles timing (mostly),\n"
1438
"//             to make certain that L' and R' reach the output at\n"
1439
"//             the same clock.  Further, just to make certain\n"
1440
"//             that is the case, an 'aux' input exists.  This\n"
1441
"//             aux value will come out of this routine synchronized\n"
1442
"//             to the values it came in with.  (i.e., both L', R',\n"
1443
"//             and aux all have the same delay.)  Hence, a caller\n"
1444
"//             of this routine may set aux on the first input with\n"
1445
"//             valid data, and then wait to see aux set on the output\n"
1446
"//             to know when to find the first output with valid data.\n"
1447
"//\n"
1448
"//             All bits are preserved until the very last clock,\n"
1449
"//             where any more bits than OWIDTH will be quietly\n"
1450
"//             discarded.\n"
1451
"//\n"
1452
"//             This design features no overflow checking.\n"
1453
"// \n"
1454
"// Notes:\n"
1455
"//             CORDIC:\n"
1456
"//             Much as we would like, we can't use a cordic here.\n"
1457
"//             The goal is to accomplish an FFT, as defined, and a\n"
1458
"//             CORDIC places a scale factor onto the data.  Removing\n"
1459
"//             the scale factor would cost a two multiplies, which\n"
1460
"//             is precisely what we are trying to avoid.\n"
1461
"//\n"
1462
"//\n"
1463
"//             3-MULTIPLIES:\n"
1464
"//             It should also be possible to do this with three \n"
1465
"//             multiplies and an extra two addition cycles.  \n"
1466
"//\n"
1467
"//             We want\n"
1468
"//                     R+I = (a + jb) * (c + jd)\n"
1469
"//                     R+I = (ac-bd) + j(ad+bc)\n"
1470
"//             We multiply\n"
1471
"//                     P1 = ac\n"
1472
"//                     P2 = bd\n"
1473
"//                     P3 = (a+b)(c+d)\n"
1474
"//             Then \n"
1475
"//                     R+I=(P1-P2)+j(P3-P2-P1)\n"
1476
"//\n"
1477
"//             WIDTHS:\n"
1478
"//             On multiplying an X width number by an\n"
1479
"//             Y width number, X>Y, the result should be (X+Y)\n"
1480
"//             bits, right?\n"
1481
"//             -2^(X-1) <= a <= 2^(X-1) - 1\n"
1482
"//             -2^(Y-1) <= b <= 2^(Y-1) - 1\n"
1483
"//             (2^(Y-1)-1)*(-2^(X-1)) <= ab <= 2^(X-1)2^(Y-1)\n"
1484
"//             -2^(X+Y-2)+2^(X-1) <= ab <= 2^(X+Y-2) <= 2^(X+Y-1) - 1\n"
1485
"//             -2^(X+Y-1) <= ab <= 2^(X+Y-1)-1\n"
1486
"//             YUP!  But just barely.  Do this and you'll really want\n"
1487
"//             to drop a bit, although you will risk overflow in so\n"
1488
"//             doing.\n"
1489 26 dgisselq
"//\n"
1490
"//     20150602 -- The sync logic lines have been completely redone.  The\n"
1491
"//             synchronization lines no longer go through the FIFO with the\n"
1492
"//             left hand sum, but are kept out of memory.  This allows the\n"
1493
"//             butterfly to use more optimal memory resources, while also\n"
1494
"//             guaranteeing that the sync lines can be properly reset upon\n"
1495
"//             any reset signal.\n"
1496
"//\n"
1497 2 dgisselq
"//\n%s"
1498
"//\n", prjname, creator);
1499
        fprintf(fp, "%s", cpyleft);
1500 35 dgisselq
        fprintf(fp, "//\n//\n`default_nettype\tnone\n//\n");
1501 2 dgisselq
 
1502
        fprintf(fp,
1503 6 dgisselq
"module\tbutterfly(i_clk, i_rst, i_ce, i_coef, i_left, i_right, i_aux,\n"
1504 5 dgisselq
                "\t\to_left, o_right, o_aux);\n"
1505
        "\t// Public changeable parameters ...\n"
1506 29 dgisselq
        "\tparameter IWIDTH=%d,", TST_BUTTERFLY_IWIDTH);
1507
#ifdef  TST_BUTTERFLY_CWIDTH
1508
        fprintf(fp, "CWIDTH=%d,", TST_BUTTERFLY_CWIDTH);
1509
#else
1510
        fprintf(fp, "CWIDTH=IWIDTH+%d,", xtracbits);
1511
#endif
1512
#ifdef  TST_BUTTERFLY_OWIDTH
1513
        fprintf(fp, "OWIDTH=%d;\n", TST_BUTTERFLY_OWIDTH);
1514
#else
1515
        fprintf(fp, "OWIDTH=IWIDTH+1;\n");
1516
#endif
1517
        fprintf(fp,
1518 5 dgisselq
        "\t// Parameters specific to the core that should not be changed.\n"
1519 29 dgisselq
        "\tparameter    MPYDELAY=%d'd%d,\n"
1520 28 dgisselq
                        "\t\t\tSHIFT=0, AUXLEN=(MPYDELAY+3);\n"
1521 5 dgisselq
        "\t// The LGDELAY should be the base two log of the MPYDELAY.  If\n"
1522
        "\t// this value is fractional, then round up to the nearest\n"
1523
        "\t// integer: LGDELAY=ceil(log(MPYDELAY)/log(2));\n"
1524 14 dgisselq
        "\tparameter\tLGDELAY=%d;\n"
1525 6 dgisselq
        "\tinput\t\ti_clk, i_rst, i_ce;\n"
1526 5 dgisselq
        "\tinput\t\t[(2*CWIDTH-1):0] i_coef;\n"
1527
        "\tinput\t\t[(2*IWIDTH-1):0] i_left, i_right;\n"
1528
        "\tinput\t\ti_aux;\n"
1529
        "\toutput\twire [(2*OWIDTH-1):0] o_left, o_right;\n"
1530 26 dgisselq
        "\toutput\treg\to_aux;\n"
1531 29 dgisselq
        "\n", lgdelay(16,xtracbits), bflydelay(16, xtracbits),
1532
                lgdelay(16,xtracbits));
1533 14 dgisselq
        fprintf(fp,
1534 5 dgisselq
        "\treg\t[(2*IWIDTH-1):0]\tr_left, r_right;\n"
1535
        "\treg\t[(2*CWIDTH-1):0]\tr_coef, r_coef_2;\n"
1536
        "\twire\tsigned\t[(IWIDTH-1):0]\tr_left_r, r_left_i, r_right_r, r_right_i;\n"
1537
        "\tassign\tr_left_r  = r_left[ (2*IWIDTH-1):(IWIDTH)];\n"
1538
        "\tassign\tr_left_i  = r_left[ (IWIDTH-1):0];\n"
1539
        "\tassign\tr_right_r = r_right[(2*IWIDTH-1):(IWIDTH)];\n"
1540
        "\tassign\tr_right_i = r_right[(IWIDTH-1):0];\n"
1541 2 dgisselq
"\n"
1542 5 dgisselq
        "\treg\tsigned\t[(IWIDTH):0]\tr_sum_r, r_sum_i, r_dif_r, r_dif_i;\n"
1543 2 dgisselq
"\n"
1544 5 dgisselq
        "\treg  [(LGDELAY-1):0] fifo_addr;\n"
1545
        "\twire [(LGDELAY-1):0] fifo_read_addr;\n"
1546 6 dgisselq
        "\tassign\tfifo_read_addr = fifo_addr - MPYDELAY;\n"
1547 26 dgisselq
        "\treg  [(2*IWIDTH+1):0]        fifo_left [ 0:((1<<LGDELAY)-1)];\n"
1548 5 dgisselq
"\n");
1549
        fprintf(fp,
1550
        "\t// Set up the input to the multiply\n"
1551 2 dgisselq
        "\talways @(posedge i_clk)\n"
1552
                "\t\tif (i_ce)\n"
1553
                "\t\tbegin\n"
1554
                        "\t\t\t// One clock just latches the inputs\n"
1555
                        "\t\t\tr_left <= i_left;        // No change in # of bits\n"
1556
                        "\t\t\tr_right <= i_right;\n"
1557
                        "\t\t\tr_coef  <= i_coef;\n"
1558
                        "\t\t\t// Next clock adds/subtracts\n"
1559
                        "\t\t\tr_sum_r <= r_left_r + r_right_r; // Now IWIDTH+1 bits\n"
1560
                        "\t\t\tr_sum_i <= r_left_i + r_right_i;\n"
1561
                        "\t\t\tr_dif_r <= r_left_r - r_right_r;\n"
1562
                        "\t\t\tr_dif_i <= r_left_i - r_right_i;\n"
1563
                        "\t\t\t// Other inputs are simply delayed on second clock\n"
1564
                        "\t\t\tr_coef_2<= r_coef;\n"
1565
        "\t\tend\n"
1566 5 dgisselq
"\n");
1567
        fprintf(fp,
1568
        "\t// Don\'t forget to record the even side, since it doesn\'t need\n"
1569
        "\t// to be multiplied, but yet we still need the results in sync\n"
1570
        "\t// with the answer when it is ready.\n"
1571 25 dgisselq
        "\tinitial fifo_addr = 0;\n"
1572 2 dgisselq
        "\talways @(posedge i_clk)\n"
1573 6 dgisselq
                "\t\tif (i_rst)\n"
1574
                        "\t\t\tfifo_addr <= 0;\n"
1575 26 dgisselq
                "\t\telse if (i_ce)\n"
1576 2 dgisselq
                        "\t\t\t// Need to delay the sum side--nothing else happens\n"
1577
                        "\t\t\t// to it, but it needs to stay synchronized with the\n"
1578
                        "\t\t\t// right side.\n"
1579
                        "\t\t\tfifo_addr <= fifo_addr + 1;\n"
1580 14 dgisselq
"\n"
1581 26 dgisselq
        "\talways @(posedge i_clk)\n"
1582
                "\t\tif (i_ce)\n"
1583
                        "\t\t\tfifo_left[fifo_addr] <= { r_sum_r, r_sum_i };\n"
1584 2 dgisselq
"\n"
1585 5 dgisselq
        "\twire\tsigned\t[(CWIDTH-1):0] ir_coef_r, ir_coef_i;\n"
1586
        "\tassign\tir_coef_r = r_coef_2[(2*CWIDTH-1):CWIDTH];\n"
1587
        "\tassign\tir_coef_i = r_coef_2[(CWIDTH-1):0];\n"
1588
        "\twire\tsigned\t[((IWIDTH+2)+(CWIDTH+1)-1):0]\tp_one, p_two, p_three;\n"
1589 2 dgisselq
"\n"
1590 5 dgisselq
"\n");
1591
        fprintf(fp,
1592
        "\t// Multiply output is always a width of the sum of the widths of\n"
1593
        "\t// the two inputs.  ALWAYS.  This is independent of the number of\n"
1594
        "\t// bits in p_one, p_two, or p_three.  These values needed to \n"
1595
        "\t// accumulate a bit (or two) each.  However, this approach to a\n"
1596
        "\t// three multiply complex multiply cannot increase the total\n"
1597
        "\t// number of bits in our final output.  We\'ll take care of\n"
1598
        "\t// dropping back down to the proper width, OWIDTH, in our routine\n"
1599
        "\t// below.\n"
1600 2 dgisselq
"\n"
1601 5 dgisselq
"\n");
1602
        fprintf(fp,
1603
        "\t// We accomplish here \"Karatsuba\" multiplication.  That is,\n"
1604
        "\t// by doing three multiplies we accomplish the work of four.\n"
1605
        "\t// Let\'s prove to ourselves that this works ... We wish to\n"
1606
        "\t// multiply: (a+jb) * (c+jd), where a+jb is given by\n"
1607
        "\t//\ta + jb = r_dif_r + j r_dif_i, and\n"
1608
        "\t//\tc + jd = ir_coef_r + j ir_coef_i.\n"
1609
        "\t// We do this by calculating the intermediate products P1, P2,\n"
1610
        "\t// and P3 as\n"
1611
        "\t//\tP1 = ac\n"
1612
        "\t//\tP2 = bd\n"
1613
        "\t//\tP3 = (a + b) * (c + d)\n"
1614
        "\t// and then complete our final answer with\n"
1615
        "\t//\tac - bd = P1 - P2 (this checks)\n"
1616
        "\t//\tad + bc = P3 - P2 - P1\n"
1617
        "\t//\t        = (ac + bc + ad + bd) - bd - ac\n"
1618
        "\t//\t        = bc + ad (this checks)\n"
1619 2 dgisselq
"\n"
1620 5 dgisselq
"\n");
1621
        fprintf(fp,
1622
        "\t// This should really be based upon an IF, such as in\n"
1623
        "\t// if (IWIDTH < CWIDTH) then ...\n"
1624
        "\t// However, this is the only (other) way I know to do it.\n"
1625 29 dgisselq
        "\tgenerate if (CWIDTH < IWIDTH+1)\n"
1626 2 dgisselq
        "\tbegin\n"
1627 22 dgisselq
                "\t\twire\t[(CWIDTH):0]\tp3c_in;\n"
1628
                "\t\twire\t[(IWIDTH+1):0]\tp3d_in;\n"
1629
                "\t\tassign\tp3c_in = ir_coef_i + ir_coef_r;\n"
1630
                "\t\tassign\tp3d_in = r_dif_r + r_dif_i;\n"
1631
                "\n"
1632 2 dgisselq
                "\t\t// We need to pad these first two multiplies by an extra\n"
1633 5 dgisselq
                "\t\t// bit just to keep them aligned with the third,\n"
1634
                "\t\t// simpler, multiply.\n"
1635 29 dgisselq
                "\t\t%s #(CWIDTH+1,IWIDTH+2) p1(i_clk, i_ce,\n"
1636 2 dgisselq
                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r},\n"
1637
                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r}, p_one);\n"
1638 29 dgisselq
                "\t\t%s #(CWIDTH+1,IWIDTH+2) p2(i_clk, i_ce,\n"
1639 5 dgisselq
                                "\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i},\n"
1640 2 dgisselq
                                "\t\t\t\t{r_dif_i[IWIDTH],r_dif_i}, p_two);\n"
1641 29 dgisselq
                "\t\t%s #(CWIDTH+1,IWIDTH+2) p3(i_clk, i_ce,\n"
1642 22 dgisselq
                        "\t\t\t\tp3c_in, p3d_in, p_three);\n"
1643 2 dgisselq
        "\tend else begin\n"
1644 22 dgisselq
                "\t\twire\t[(CWIDTH):0]\tp3c_in;\n"
1645
                "\t\twire\t[(IWIDTH+1):0]\tp3d_in;\n"
1646
                "\t\tassign\tp3c_in = ir_coef_i + ir_coef_r;\n"
1647
                "\t\tassign\tp3d_in = r_dif_r + r_dif_i;\n"
1648
                "\n"
1649 29 dgisselq
                "\t\t%s #(IWIDTH+2,CWIDTH+1) p1a(i_clk, i_ce,\n"
1650 2 dgisselq
                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r},\n"
1651
                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r}, p_one);\n"
1652 29 dgisselq
                "\t\t%s #(IWIDTH+2,CWIDTH+1) p2a(i_clk, i_ce,\n"
1653 2 dgisselq
                                "\t\t\t\t{r_dif_i[IWIDTH], r_dif_i},\n"
1654 5 dgisselq
                                "\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i}, p_two);\n"
1655 29 dgisselq
                "\t\t%s #(IWIDTH+2,CWIDTH+1) p3a(i_clk, i_ce,\n"
1656 22 dgisselq
                                "\t\t\t\tp3d_in, p3c_in, p_three);\n"
1657 2 dgisselq
        "\tend\n"
1658
        "\tendgenerate\n"
1659 29 dgisselq
"\n",
1660
                (USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",
1661
                (USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",
1662
                (USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",
1663
                (USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",
1664
                (USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",
1665
                (USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy");
1666 5 dgisselq
        fprintf(fp,
1667
        "\t// These values are held in memory and delayed during the\n"
1668
        "\t// multiply.  Here, we recover them.  During the multiply,\n"
1669
        "\t// values were multiplied by 2^(CWIDTH-2)*exp{-j*2*pi*...},\n"
1670
        "\t// therefore, the left_x values need to be right shifted by\n"
1671
        "\t// CWIDTH-2 as well.  The additional bits come from a sign\n"
1672
        "\t// extension.\n"
1673
        "\twire\tsigned\t[(IWIDTH+CWIDTH):0]    fifo_i, fifo_r;\n"
1674 26 dgisselq
        "\treg\t\t[(2*IWIDTH+1):0]      fifo_read;\n"
1675
        "\tassign\tfifo_r = { {2{fifo_read[2*(IWIDTH+1)-1]}}, fifo_read[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1\'b0}} };\n"
1676
        "\tassign\tfifo_i = { {2{fifo_read[(IWIDTH+1)-1]}}, fifo_read[((IWIDTH+1)-1):0], {(CWIDTH-2){1\'b0}} };\n"
1677 2 dgisselq
"\n"
1678
"\n"
1679 5 dgisselq
        "\treg\tsigned\t[(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i;\n"
1680
"\n");
1681
        fprintf(fp,
1682 23 dgisselq
        "\t// Let's do some rounding and remove unnecessary bits.\n"
1683 5 dgisselq
        "\t// We have (IWIDTH+CWIDTH+3) bits here, we need to drop down to\n"
1684
        "\t// OWIDTH, and SHIFT by SHIFT bits in the process.  The trick is\n"
1685
        "\t// that we don\'t need (IWIDTH+CWIDTH+3) bits.  We\'ve accumulated\n"
1686
        "\t// them, but the actual values will never fill all these bits.\n"
1687
        "\t// In particular, we only need:\n"
1688
        "\t//\t IWIDTH bits for the input\n"
1689
        "\t//\t     +1 bit for the add/subtract\n"
1690
        "\t//\t+CWIDTH bits for the coefficient multiply\n"
1691
        "\t//\t     +1 bit for the add/subtract in the complex multiply\n"
1692
        "\t//\t ------\n"
1693
        "\t//\t (IWIDTH+CWIDTH+2) bits at full precision.\n"
1694
        "\t//\n"
1695
        "\t// However, the coefficient multiply multiplied by a maximum value\n"
1696
        "\t// of 2^(CWIDTH-2).  Thus, we only have\n"
1697
        "\t//\t   IWIDTH bits for the input\n"
1698
        "\t//\t       +1 bit for the add/subtract\n"
1699
        "\t//\t+CWIDTH-2 bits for the coefficient multiply\n"
1700
        "\t//\t       +1 (optional) bit for the add/subtract in the cpx mpy.\n"
1701
        "\t//\t -------- ... multiply.  (This last bit may be shifted out.)\n"
1702
        "\t//\t (IWIDTH+CWIDTH) valid output bits. \n"
1703
        "\t// Now, if the user wants to keep any extras of these (via OWIDTH),\n"
1704
        "\t// or if he wishes to arbitrarily shift some of these off (via\n"
1705
        "\t// SHIFT) we accomplish that here.\n"
1706 23 dgisselq
"\n");
1707
        fprintf(fp,
1708
        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_left_r, rnd_left_i, rnd_right_r, rnd_right_i;\n\n");
1709
 
1710
        fprintf(fp,
1711 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_left_r(i_clk, i_ce,\n"
1712 23 dgisselq
        "\t\t\t\t{ {2{fifo_r[(IWIDTH+CWIDTH)]}}, fifo_r }, rnd_left_r);\n\n",
1713
                rnd_string);
1714
        fprintf(fp,
1715 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_left_i(i_clk, i_ce,\n"
1716 23 dgisselq
        "\t\t\t\t{ {2{fifo_i[(IWIDTH+CWIDTH)]}}, fifo_i }, rnd_left_i);\n\n",
1717
                rnd_string);
1718
        fprintf(fp,
1719 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_right_r(i_clk, i_ce,\n"
1720 23 dgisselq
        "\t\t\t\tmpy_r, rnd_right_r);\n\n", rnd_string);
1721
        fprintf(fp,
1722 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_right_i(i_clk, i_ce,\n"
1723 23 dgisselq
        "\t\t\t\tmpy_i, rnd_right_i);\n\n", rnd_string);
1724
        fprintf(fp,
1725
        "\talways @(posedge i_clk)\n"
1726
                "\t\tif (i_ce)\n"
1727
                "\t\tbegin\n"
1728
                        "\t\t\t// First clock, recover all values\n"
1729
                        "\t\t\tfifo_read <= fifo_left[fifo_read_addr];\n"
1730
                        "\t\t\t// These values are IWIDTH+CWIDTH+3 bits wide\n"
1731
                        "\t\t\t// although they only need to be (IWIDTH+1)\n"
1732
                        "\t\t\t// + (CWIDTH) bits wide.  (We\'ve got two\n"
1733
                        "\t\t\t// extra bits we need to get rid of.)\n"
1734
                        "\t\t\tmpy_r <= p_one - p_two;\n"
1735
                        "\t\t\tmpy_i <= p_three - p_one - p_two;\n"
1736 24 dgisselq
                "\t\tend\n"
1737
"\n");
1738 26 dgisselq
 
1739 24 dgisselq
        fprintf(fp,
1740 26 dgisselq
        "\treg\t[(AUXLEN-1):0]\taux_pipeline;\n"
1741
        "\tinitial\taux_pipeline = 0;\n"
1742
        "\talways @(posedge i_clk)\n"
1743
        "\t\tif (i_rst)\n"
1744
        "\t\t\taux_pipeline <= 0;\n"
1745
        "\t\telse if (i_ce)\n"
1746
        "\t\t\taux_pipeline <= { aux_pipeline[(AUXLEN-2):0], i_aux };\n"
1747
"\n");
1748
        fprintf(fp,
1749 25 dgisselq
        "\tinitial o_aux = 1\'b0;\n"
1750 24 dgisselq
        "\talways @(posedge i_clk)\n"
1751
                "\t\tif (i_rst)\n"
1752
                "\t\t\to_aux <= 1\'b0;\n"
1753
                "\t\telse if (i_ce)\n"
1754
                "\t\tbegin\n"
1755
                        "\t\t\t// Second clock, latch for final clock\n"
1756 26 dgisselq
                        "\t\t\to_aux <= aux_pipeline[AUXLEN-1];\n"
1757 23 dgisselq
                "\t\tend\n"
1758
"\n");
1759 24 dgisselq
 
1760 23 dgisselq
        fprintf(fp,
1761 5 dgisselq
        "\t// As a final step, we pack our outputs into two packed two\'s\n"
1762
        "\t// complement numbers per output word, so that each output word\n"
1763
        "\t// has (2*OWIDTH) bits in it, with the top half being the real\n"
1764
        "\t// portion and the bottom half being the imaginary portion.\n"
1765 23 dgisselq
        "\tassign       o_left = { rnd_left_r, rnd_left_i };\n"
1766
        "\tassign       o_right= { rnd_right_r,rnd_right_i};\n"
1767 2 dgisselq
"\n"
1768
"endmodule\n");
1769
        fclose(fp);
1770
}
1771
 
1772 23 dgisselq
void    build_hwbfly(const char *fname, int xtracbits, ROUND_T rounding) {
1773 22 dgisselq
        FILE    *fp = fopen(fname, "w");
1774
        if (NULL == fp) {
1775
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
1776
                perror("O/S Err was:");
1777
                return;
1778
        }
1779
 
1780 23 dgisselq
        const   char    *rnd_string;
1781
        if (rounding == RND_TRUNCATE)
1782
                rnd_string = "truncate";
1783
        else if (rounding == RND_FROMZERO)
1784
                rnd_string = "roundfromzero";
1785
        else if (rounding == RND_HALFUP)
1786
                rnd_string = "roundhalfup";
1787
        else
1788
                rnd_string = "convround";
1789
 
1790
 
1791 22 dgisselq
        fprintf(fp,
1792
"///////////////////////////////////////////////////////////////////////////\n"
1793
"//\n"
1794
"// Filename:   hwbfly.v\n"
1795
"//\n"
1796
"// Project:    %s\n"
1797
"//\n"
1798
"// Purpose:    This routine is identical to the butterfly.v routine found\n"
1799
"//             in 'butterfly.v', save only that it uses the verilog \n"
1800 35 dgisselq
"//     operator '*' in hopes that the synthesizer would be able to optimize\n"
1801
"//     it with hardware resources.\n"
1802 22 dgisselq
"//\n"
1803 35 dgisselq
"//     It is understood that a hardware multiply can complete its operation in\n"
1804
"//     a single clock.\n"
1805 22 dgisselq
"//\n"
1806
"//\n%s"
1807
"//\n", prjname, creator);
1808
        fprintf(fp, "%s", cpyleft);
1809 35 dgisselq
        fprintf(fp, "//\n//\n`default_nettype\tnone\n//\n");
1810 22 dgisselq
        fprintf(fp,
1811
"module hwbfly(i_clk, i_rst, i_ce, i_coef, i_left, i_right, i_aux,\n"
1812
                "\t\to_left, o_right, o_aux);\n"
1813
        "\t// Public changeable parameters ...\n"
1814
        "\tparameter IWIDTH=16,CWIDTH=IWIDTH+%d,OWIDTH=IWIDTH+1;\n"
1815
        "\t// Parameters specific to the core that should not be changed.\n"
1816 23 dgisselq
        "\tparameter\tSHIFT=0;\n"
1817 22 dgisselq
        "\tinput\t\ti_clk, i_rst, i_ce;\n"
1818
        "\tinput\t\t[(2*CWIDTH-1):0]\ti_coef;\n"
1819
        "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"
1820
        "\tinput\t\ti_aux;\n"
1821
        "\toutput\twire\t[(2*OWIDTH-1):0]\to_left, o_right;\n"
1822
        "\toutput\treg\to_aux;\n"
1823
"\n", xtracbits);
1824
        fprintf(fp,
1825
        "\treg\t[(2*IWIDTH-1):0]        r_left, r_right;\n"
1826
        "\treg\t                        r_aux, r_aux_2;\n"
1827 35 dgisselq
        "\treg\t[(2*CWIDTH-1):0]        r_coef;\n"
1828 22 dgisselq
        "\twire signed  [(IWIDTH-1):0]  r_left_r, r_left_i, r_right_r, r_right_i;\n"
1829
        "\tassign\tr_left_r  = r_left[ (2*IWIDTH-1):(IWIDTH)];\n"
1830
        "\tassign\tr_left_i  = r_left[ (IWIDTH-1):0];\n"
1831
        "\tassign\tr_right_r = r_right[(2*IWIDTH-1):(IWIDTH)];\n"
1832
        "\tassign\tr_right_i = r_right[(IWIDTH-1):0];\n"
1833 26 dgisselq
        "\treg  signed  [(CWIDTH-1):0]  ir_coef_r, ir_coef_i;\n"
1834 22 dgisselq
"\n"
1835
        "\treg  signed  [(IWIDTH):0]    r_sum_r, r_sum_i, r_dif_r, r_dif_i;\n"
1836
"\n"
1837
        "\treg  [(2*IWIDTH+2):0]        leftv, leftvv;\n"
1838
"\n"
1839
        "\t// Set up the input to the multiply\n"
1840 25 dgisselq
        "\tinitial r_aux   = 1\'b0;\n"
1841
        "\tinitial r_aux_2 = 1\'b0;\n"
1842 22 dgisselq
        "\talways @(posedge i_clk)\n"
1843 25 dgisselq
                "\t\tif (i_rst)\n"
1844
                "\t\tbegin\n"
1845 26 dgisselq
                        "\t\t\tr_aux <= 1\'b0;\n"
1846
                        "\t\t\tr_aux_2 <= 1\'b0;\n"
1847 25 dgisselq
                "\t\tend else if (i_ce)\n"
1848
                "\t\tbegin\n"
1849
                        "\t\t\t// One clock just latches the inputs\n"
1850 26 dgisselq
                        "\t\t\tr_aux <= i_aux;\n"
1851
                        "\t\t\t// Next clock adds/subtracts\n"
1852
                        "\t\t\t// Other inputs are simply delayed on second clock\n"
1853
                        "\t\t\tr_aux_2 <= r_aux;\n"
1854
                "\t\tend\n"
1855
        "\talways @(posedge i_clk)\n"
1856
                "\t\tif (i_ce)\n"
1857
                "\t\tbegin\n"
1858
                        "\t\t\t// One clock just latches the inputs\n"
1859 25 dgisselq
                        "\t\t\tr_left <= i_left;        // No change in # of bits\n"
1860
                        "\t\t\tr_right <= i_right;\n"
1861
                        "\t\t\tr_coef  <= i_coef;\n"
1862
                        "\t\t\t// Next clock adds/subtracts\n"
1863
                        "\t\t\tr_sum_r <= r_left_r + r_right_r; // Now IWIDTH+1 bits\n"
1864
                        "\t\t\tr_sum_i <= r_left_i + r_right_i;\n"
1865
                        "\t\t\tr_dif_r <= r_left_r - r_right_r;\n"
1866
                        "\t\t\tr_dif_i <= r_left_i - r_right_i;\n"
1867
                        "\t\t\t// Other inputs are simply delayed on second clock\n"
1868 26 dgisselq
                        "\t\t\tir_coef_r <= r_coef[(2*CWIDTH-1):CWIDTH];\n"
1869
                        "\t\t\tir_coef_i <= r_coef[(CWIDTH-1):0];\n"
1870 25 dgisselq
                "\t\tend\n"
1871 22 dgisselq
        "\n\n");
1872
        fprintf(fp,
1873
"\t// See comments in the butterfly.v source file for a discussion of\n"
1874
"\t// these operations and the appropriate bit widths.\n\n");
1875 33 dgisselq
        fprintf(fp,
1876 26 dgisselq
        "\treg\tsigned  [((IWIDTH+1)+(CWIDTH)-1):0]     p_one, p_two;\n"
1877
        "\treg\tsigned  [((IWIDTH+2)+(CWIDTH+1)-1):0]   p_three;\n"
1878 22 dgisselq
"\n"
1879 26 dgisselq
        "\treg\tsigned  [(CWIDTH-1):0]  p1c_in, p2c_in; // Coefficient multiply inputs\n"
1880
        "\treg\tsigned  [(IWIDTH):0]    p1d_in, p2d_in; // Data multiply inputs\n"
1881
        "\treg\tsigned  [(CWIDTH):0]    p3c_in; // Product 3, coefficient input\n"
1882
        "\treg\tsigned  [(IWIDTH+1):0]  p3d_in; // Product 3, data input\n"
1883 22 dgisselq
"\n"
1884 25 dgisselq
        "\tinitial leftv    = 0;\n"
1885
        "\tinitial leftvv   = 0;\n"
1886 22 dgisselq
        "\talways @(posedge i_clk)\n"
1887
        "\tbegin\n"
1888
                "\t\tif (i_rst)\n"
1889
                "\t\tbegin\n"
1890
                        "\t\t\tleftv <= 0;\n"
1891
                        "\t\t\tleftvv <= 0;\n"
1892 26 dgisselq
                "\t\tend else if (i_ce)\n"
1893 22 dgisselq
                "\t\tbegin\n"
1894
                        "\t\t\t// Second clock, pipeline = 1\n"
1895 26 dgisselq
                        "\t\t\tleftv <= { r_aux_2, r_sum_r, r_sum_i };\n"
1896
"\n"
1897
                        "\t\t\t// Third clock, pipeline = 3\n"
1898
                        "\t\t\t//   As desired, each of these lines infers a DSP48\n"
1899
                        "\t\t\tleftvv <= leftv;\n"
1900
                "\t\tend\n"
1901
        "\tend\n"
1902
"\n"
1903
        "\talways @(posedge i_clk)\n"
1904
                "\t\tif (i_ce)\n"
1905
                "\t\tbegin\n"
1906
                        "\t\t\t// Second clock, pipeline = 1\n"
1907
                        "\t\t\tp1c_in <= ir_coef_r;\n"
1908
                        "\t\t\tp2c_in <= ir_coef_i;\n"
1909
                        "\t\t\tp1d_in <= r_dif_r;\n"
1910
                        "\t\t\tp2d_in <= r_dif_i;\n"
1911 22 dgisselq
                        "\t\t\tp3c_in <= ir_coef_i + ir_coef_r;\n"
1912
                        "\t\t\tp3d_in <= r_dif_r + r_dif_i;\n"
1913 23 dgisselq
"\n"
1914
"\n"
1915 22 dgisselq
                        "\t\t\t// Third clock, pipeline = 3\n"
1916 26 dgisselq
                        "\t\t\t//   As desired, each of these lines infers a DSP48\n"
1917 22 dgisselq
                        "\t\t\tp_one   <= p1c_in * p1d_in;\n"
1918
                        "\t\t\tp_two   <= p2c_in * p2d_in;\n"
1919
                        "\t\t\tp_three <= p3c_in * p3d_in;\n"
1920 26 dgisselq
                "\t\tend\n"
1921 22 dgisselq
"\n"
1922 26 dgisselq
        "\twire\tsigned [((IWIDTH+2)+(CWIDTH+1)-1):0]   w_one, w_two;\n"
1923
        "\tassign\tw_one = { {(2){p_one[((IWIDTH+1)+(CWIDTH)-1)]}}, p_one };\n"
1924
        "\tassign\tw_two = { {(2){p_two[((IWIDTH+1)+(CWIDTH)-1)]}}, p_two };\n"
1925 22 dgisselq
"\n");
1926
 
1927 33 dgisselq
        fprintf(fp,
1928 22 dgisselq
        "\t// These values are held in memory and delayed during the\n"
1929
        "\t// multiply.  Here, we recover them.  During the multiply,\n"
1930
        "\t// values were multiplied by 2^(CWIDTH-2)*exp{-j*2*pi*...},\n"
1931
        "\t// therefore, the left_x values need to be right shifted by\n"
1932
        "\t// CWIDTH-2 as well.  The additional bits come from a sign\n"
1933
        "\t// extension.\n"
1934 24 dgisselq
        "\twire\taux_s;\n"
1935 22 dgisselq
        "\twire\tsigned\t[(IWIDTH+CWIDTH):0]    left_si, left_sr;\n"
1936
        "\treg\t\t[(2*IWIDTH+2):0]      left_saved;\n"
1937 26 dgisselq
        "\tassign\tleft_sr = { {2{left_saved[2*(IWIDTH+1)-1]}}, left_saved[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1\'b0}} };\n"
1938
        "\tassign\tleft_si = { {2{left_saved[(IWIDTH+1)-1]}}, left_saved[((IWIDTH+1)-1):0], {(CWIDTH-2){1\'b0}} };\n"
1939 22 dgisselq
        "\tassign\taux_s = left_saved[2*IWIDTH+2];\n"
1940
"\n"
1941
"\n"
1942 26 dgisselq
        "\t(* use_dsp48=\"no\" *)\n"
1943 23 dgisselq
        "\treg  signed  [(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i;\n");
1944
        fprintf(fp,
1945
        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_left_r, rnd_left_i, rnd_right_r, rnd_right_i;\n\n");
1946 22 dgisselq
 
1947
        fprintf(fp,
1948 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+1,OWIDTH,SHIFT+2) do_rnd_left_r(i_clk, i_ce,\n"
1949
        "\t\t\t\tleft_sr, rnd_left_r);\n\n",
1950 23 dgisselq
                rnd_string);
1951
        fprintf(fp,
1952 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+1,OWIDTH,SHIFT+2) do_rnd_left_i(i_clk, i_ce,\n"
1953
        "\t\t\t\tleft_si, rnd_left_i);\n\n",
1954 23 dgisselq
                rnd_string);
1955
        fprintf(fp,
1956 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_right_r(i_clk, i_ce,\n"
1957 23 dgisselq
        "\t\t\t\tmpy_r, rnd_right_r);\n\n", rnd_string);
1958
        fprintf(fp,
1959 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_right_i(i_clk, i_ce,\n"
1960 23 dgisselq
        "\t\t\t\tmpy_i, rnd_right_i);\n\n", rnd_string);
1961
 
1962
        fprintf(fp,
1963 25 dgisselq
        "\tinitial left_saved = 0;\n"
1964
        "\tinitial o_aux      = 1\'b0;\n"
1965 22 dgisselq
        "\talways @(posedge i_clk)\n"
1966
        "\t\tif (i_rst)\n"
1967
        "\t\tbegin\n"
1968
                "\t\t\tleft_saved <= 0;\n"
1969 26 dgisselq
                "\t\t\to_aux <= 1\'b0;\n"
1970 22 dgisselq
        "\t\tend else if (i_ce)\n"
1971
        "\t\tbegin\n"
1972
                "\t\t\t// First clock, recover all values\n"
1973
                "\t\t\tleft_saved <= leftvv;\n"
1974 26 dgisselq
"\n"
1975
                "\t\t\t// Second clock, round and latch for final clock\n"
1976
                "\t\t\to_aux <= aux_s;\n"
1977
        "\t\tend\n"
1978
        "\talways @(posedge i_clk)\n"
1979
        "\t\tif (i_ce)\n"
1980
        "\t\tbegin\n"
1981 22 dgisselq
                "\t\t\t// These values are IWIDTH+CWIDTH+3 bits wide\n"
1982
                "\t\t\t// although they only need to be (IWIDTH+1)\n"
1983
                "\t\t\t// + (CWIDTH) bits wide.  (We've got two\n"
1984
                "\t\t\t// extra bits we need to get rid of.)\n"
1985 26 dgisselq
                "\n"
1986
                "\t\t\t// These two lines also infer DSP48\'s.\n"
1987
                "\t\t\t// To keep from using extra DSP48 resources,\n"
1988
                "\t\t\t// they are prevented from using DSP48\'s\n"
1989
                "\t\t\t// by the (* use_dsp48 ... *) comment above.\n"
1990
                "\t\t\tmpy_r <= w_one - w_two;\n"
1991
                "\t\t\tmpy_i <= p_three - w_one - w_two;\n"
1992 22 dgisselq
        "\t\tend\n"
1993
        "\n");
1994
 
1995
        fprintf(fp,
1996
        "\t// As a final step, we pack our outputs into two packed two's\n"
1997
        "\t// complement numbers per output word, so that each output word\n"
1998
        "\t// has (2*OWIDTH) bits in it, with the top half being the real\n"
1999
        "\t// portion and the bottom half being the imaginary portion.\n"
2000 23 dgisselq
        "\tassign\to_left = { rnd_left_r, rnd_left_i };\n"
2001
        "\tassign\to_right= { rnd_right_r,rnd_right_i};\n"
2002 22 dgisselq
"\n"
2003
"endmodule\n");
2004
 
2005
}
2006
 
2007 26 dgisselq
void    build_stage(const char *fname, const char *coredir, int stage, bool odd, int nbits, bool inv, int xtra, bool hwmpy=false, bool dbg=false) {
2008 2 dgisselq
        FILE    *fstage = fopen(fname, "w");
2009
        int     cbits = nbits + xtra;
2010
 
2011
        if ((cbits * 2) >= sizeof(long long)*8) {
2012
                fprintf(stderr, "ERROR: CMEM Coefficient precision requested overflows long long data type.\n");
2013
                exit(-1);
2014
        }
2015
 
2016
        if (fstage == NULL) {
2017
                fprintf(stderr, "ERROR: Could not open %s for writing!\n", fname);
2018
                perror("O/S Err was:");
2019
                fprintf(stderr, "Attempting to continue, but this file will be missing.\n");
2020
                return;
2021
        }
2022
 
2023
        fprintf(fstage,
2024
"////////////////////////////////////////////////////////////////////////////\n"
2025
"//\n"
2026 26 dgisselq
"// Filename:   %sfftstage_%c%d%s.v\n"
2027 2 dgisselq
"//\n"
2028
"// Project:    %s\n"
2029
"//\n"
2030
"// Purpose:    This file is (almost) a Verilog source file.  It is meant to\n"
2031
"//             be used by a FFT core compiler to generate FFTs which may be\n"
2032
"//             used as part of an FFT core.  Specifically, this file \n"
2033
"//             encapsulates the options of an FFT-stage.  For any 2^N length\n"
2034
"//             FFT, there shall be (N-1) of these stages.  \n"
2035
"//\n%s"
2036
"//\n",
2037 26 dgisselq
                (inv)?"i":"", (odd)?'o':'e', stage*2, (dbg)?"_dbg":"", prjname, creator);
2038 2 dgisselq
        fprintf(fstage, "%s", cpyleft);
2039 35 dgisselq
        fprintf(fstage, "//\n//\n`default_nettype\tnone\n//\n");
2040 26 dgisselq
        fprintf(fstage, "module\t%sfftstage_%c%d%s(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync%s);\n",
2041
                (inv)?"i":"", (odd)?'o':'e', stage*2, (dbg)?"_dbg":"",
2042
                (dbg)?", o_dbg":"");
2043 2 dgisselq
        // These parameter values are useless at this point--they are to be
2044
        // replaced by the parameter values in the calling program.  Only
2045
        // problem is, the CWIDTH needs to match exactly!
2046
        fprintf(fstage, "\tparameter\tIWIDTH=%d,CWIDTH=%d,OWIDTH=%d;\n",
2047
                nbits, cbits, nbits+1);
2048
        fprintf(fstage,
2049
"\t// Parameters specific to the core that should be changed when this\n"
2050
"\t// core is built ... Note that the minimum LGSPAN (the base two log\n"
2051
"\t// of the span, or the base two log of the current FFT size) is 3.\n"
2052
"\t// Smaller spans (i.e. the span of 2) must use the dblstage module.\n"
2053 6 dgisselq
"\tparameter\tLGWIDTH=11, LGSPAN=9, LGBDLY=5, BFLYSHIFT=0;\n");
2054 33 dgisselq
        fprintf(fstage,
2055 2 dgisselq
"\tinput                                        i_clk, i_rst, i_ce, i_sync;\n"
2056
"\tinput                [(2*IWIDTH-1):0]        i_data;\n"
2057
"\toutput       reg     [(2*OWIDTH-1):0]        o_data;\n"
2058
"\toutput       reg                             o_sync;\n"
2059 26 dgisselq
"\n");
2060
        if (dbg) { fprintf(fstage, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"
2061
                "\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2*OWIDTH-1):(2*OWIDTH-16)],\n"
2062
                        "\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n"
2063
"\n");
2064
        }
2065 33 dgisselq
        fprintf(fstage,
2066 2 dgisselq
"\treg  wait_for_sync;\n"
2067
"\treg  [(2*IWIDTH-1):0]        ib_a, ib_b;\n"
2068
"\treg  [(2*CWIDTH-1):0]        ib_c;\n"
2069 8 dgisselq
"\treg  ib_sync;\n"
2070 2 dgisselq
"\n"
2071
"\treg  b_started;\n"
2072
"\twire ob_sync;\n"
2073 23 dgisselq
"\twire [(2*OWIDTH-1):0]\tob_a, ob_b;\n");
2074 33 dgisselq
        fprintf(fstage,
2075 2 dgisselq
"\n"
2076
"\t// %scmem is defined as an array of real and complex values,\n"
2077
"\t// where the top CWIDTH bits are the real value and the bottom\n"
2078
"\t// CWIDTH bits are the imaginary value.\n"
2079
"\t//\n"
2080 24 dgisselq
"\t// %scmem[i] = { (2^(CWIDTH-2)) * cos(2*pi*i/(2^LGWIDTH)),\n"
2081 2 dgisselq
"\t//           (2^(CWIDTH-2)) * sin(2*pi*i/(2^LGWIDTH)) };\n"
2082
"\t//\n"
2083
"\treg  [(2*CWIDTH-1):0]        %scmem [0:((1<<LGSPAN)-1)];\n"
2084
"\tinitial\t$readmemh(\"%scmem_%c%d.hex\",%scmem);\n\n",
2085 24 dgisselq
                (inv)?"i":"", (inv)?"i":"", (inv)?"i":"",
2086
                (inv)?"i":"", (odd)?'o':'e',stage<<1, (inv)?"i":"");
2087 2 dgisselq
        {
2088
                FILE    *cmem;
2089
 
2090 14 dgisselq
                {
2091
                        char    *memfile, *ptr;
2092
 
2093
                        memfile = new char[strlen(fname)+128];
2094
                        strcpy(memfile, fname);
2095
                        if ((NULL != (ptr = strrchr(memfile, '/')))&&(ptr>memfile)) {
2096
                                ptr++;
2097
                                sprintf(ptr, "%scmem_%c%d.hex", (inv)?"i":"", (odd)?'o':'e', stage*2);
2098
                        } else {
2099
                                sprintf(memfile, "%s/%scmem_%c%d.hex",
2100 26 dgisselq
                                        coredir, (inv)?"i":"",
2101 14 dgisselq
                                        (odd)?'o':'e', stage*2);
2102
                        }
2103
                        // strcpy(&memfile[strlen(memfile)-2], ".hex");
2104
                        cmem = fopen(memfile, "w");
2105
                        if (NULL == cmem) {
2106
                                fprintf(stderr, "Could not open/write \'%s\' with FFT coefficients.\n", memfile);
2107
                                perror("Err from O/S:");
2108
                                exit(-2);
2109
                        }
2110
 
2111
                        delete[] memfile;
2112 2 dgisselq
                }
2113
                // fprintf(cmem, "// CBITS = %d, inv = %s\n", cbits, (inv)?"true":"false");
2114
                for(int i=0; i<stage/2; i++) {
2115
                        int k = 2*i+odd;
2116 9 dgisselq
                        double  W = ((inv)?1:-1)*2.0*M_PI*k/(double)(2*stage);
2117 2 dgisselq
                        double  c, s;
2118
                        long long ic, is, vl;
2119
 
2120
                        c = cos(W); s = sin(W);
2121 31 dgisselq
                        ic = (long long)llround((1ll<<(cbits-2)) * c);
2122
                        is = (long long)llround((1ll<<(cbits-2)) * s);
2123 2 dgisselq
                        vl = (ic & (~(-1ll << (cbits))));
2124
                        vl <<= (cbits);
2125
                        vl |= (is & (~(-1ll << (cbits))));
2126
                        fprintf(cmem, "%0*llx\n", ((cbits*2+3)/4), vl);
2127
                        /*
2128
                        fprintf(cmem, "%0*llx\t\t// %f+j%f -> %llx +j%llx\n",
2129
                                ((cbits*2+3)/4), vl, c, s,
2130
                                ic & (~(-1ll<<(((cbits+3)/4)*4))),
2131
                                is & (~(-1ll<<(((cbits+3)/4)*4))));
2132
                        */
2133
                } fclose(cmem);
2134
        }
2135
 
2136
        fprintf(fstage,
2137 6 dgisselq
"\treg  [(LGWIDTH-2):0]         iaddr;\n"
2138 2 dgisselq
"\treg  [(2*IWIDTH-1):0]        imem    [0:((1<<LGSPAN)-1)];\n"
2139
"\n"
2140 8 dgisselq
"\treg  [LGSPAN:0]              oB;\n"
2141 2 dgisselq
"\treg  [(2*OWIDTH-1):0]        omem    [0:((1<<LGSPAN)-1)];\n"
2142
"\n"
2143 25 dgisselq
"\tinitial wait_for_sync = 1\'b1;\n"
2144
"\tinitial iaddr = 0;\n"
2145 2 dgisselq
"\talways @(posedge i_clk)\n"
2146
        "\t\tif (i_rst)\n"
2147
        "\t\tbegin\n"
2148 26 dgisselq
                "\t\t\twait_for_sync <= 1\'b1;\n"
2149 2 dgisselq
                "\t\t\tiaddr <= 0;\n"
2150
        "\t\tend\n"
2151 35 dgisselq
        "\t\telse if ((i_ce)&&((!wait_for_sync)||(i_sync)))\n"
2152 2 dgisselq
        "\t\tbegin\n"
2153
                "\t\t\t//\n"
2154
                "\t\t\t// First step: Record what we\'re not ready to use yet\n"
2155
                "\t\t\t//\n"
2156 25 dgisselq
                "\t\t\tiaddr <= iaddr + { {(LGWIDTH-2){1\'b0}}, 1\'b1 };\n"
2157 26 dgisselq
                "\t\t\twait_for_sync <= 1\'b0;\n"
2158
        "\t\tend\n"
2159
"\talways @(posedge i_clk) // Need to make certain here that we don\'t read\n"
2160 35 dgisselq
        "\t\tif ((i_ce)&&(!iaddr[LGSPAN])) // and write the same address on\n"
2161 26 dgisselq
                "\t\t\timem[iaddr[(LGSPAN-1):0]] <= i_data; // the same clk\n"
2162
        "\n");
2163 23 dgisselq
 
2164
        fprintf(fstage,
2165
        "\t//\n"
2166
        "\t// Now, we have all the inputs, so let\'s feed the butterfly\n"
2167
        "\t//\n"
2168 25 dgisselq
        "\tinitial ib_sync = 1\'b0;\n"
2169 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
2170 26 dgisselq
                "\t\tif (i_rst)\n"
2171
                        "\t\t\tib_sync <= 1\'b0;\n"
2172
                "\t\telse if ((i_ce)&&(iaddr[LGSPAN]))\n"
2173
                        "\t\t\tbegin\n"
2174
                                "\t\t\t\t// Set the sync to true on the very first\n"
2175
                                "\t\t\t\t// valid input in, and hence on the very\n"
2176
                                "\t\t\t\t// first valid data out per FFT.\n"
2177
                                "\t\t\t\tib_sync <= (iaddr==(1<<(LGSPAN)));\n"
2178
                        "\t\t\tend\n"
2179 24 dgisselq
        "\talways\t@(posedge i_clk)\n"
2180 26 dgisselq
                "\t\tif ((i_ce)&&(iaddr[LGSPAN]))\n"
2181
                "\t\t\tbegin\n"
2182
                        "\t\t\t\t// One input from memory, ...\n"
2183
                        "\t\t\t\tib_a <= imem[iaddr[(LGSPAN-1):0]];\n"
2184
                        "\t\t\t\t// One input clocked in from the top\n"
2185
                        "\t\t\t\tib_b <= i_data;\n"
2186
                        "\t\t\t\t// and the coefficient or twiddle factor\n"
2187
                        "\t\t\t\tib_c <= %scmem[iaddr[(LGSPAN-1):0]];\n"
2188
                "\t\t\tend\n\n", (inv)?"i":"");
2189 23 dgisselq
 
2190
        if (hwmpy) {
2191
                fprintf(fstage,
2192
        "\thwbfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),\n"
2193
                        "\t\t\t.SHIFT(BFLYSHIFT))\n"
2194
                "\t\tbfly(i_clk, i_rst, i_ce, ib_c,\n"
2195
                        "\t\t\tib_a, ib_b, ib_sync, ob_a, ob_b, ob_sync);\n");
2196
        } else {
2197
        fprintf(fstage,
2198
        "\tbutterfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),\n"
2199
                "\t\t\t.MPYDELAY(%d\'d%d),.LGDELAY(LGBDLY),.SHIFT(BFLYSHIFT))\n"
2200
        "\t\tbfly(i_clk, i_rst, i_ce, ib_c,\n"
2201
                "\t\t\tib_a, ib_b, ib_sync, ob_a, ob_b, ob_sync);\n",
2202
                        lgdelay(nbits, xtra), bflydelay(nbits, xtra));
2203
        }
2204
 
2205
        fprintf(fstage,
2206
        "\t//\n"
2207
        "\t// Next step: recover the outputs from the butterfly\n"
2208
        "\t//\n"
2209 25 dgisselq
        "\tinitial oB        = 0;\n"
2210
        "\tinitial o_sync    = 0;\n"
2211
        "\tinitial b_started = 0;\n"
2212 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
2213
        "\t\tif (i_rst)\n"
2214
        "\t\tbegin\n"
2215
                "\t\t\toB <= 0;\n"
2216
                "\t\t\to_sync <= 0;\n"
2217
                "\t\t\tb_started <= 0;\n"
2218
        "\t\tend else if (i_ce)\n"
2219
        "\t\tbegin\n"
2220 35 dgisselq
        "\t\t\to_sync <= (!oB[LGSPAN])?ob_sync : 1\'b0;\n"
2221 26 dgisselq
        "\t\t\tif (ob_sync||b_started)\n"
2222
                "\t\t\t\toB <= oB + { {(LGSPAN){1\'b0}}, 1\'b1 };\n"
2223 35 dgisselq
        "\t\t\tif ((ob_sync)&&(!oB[LGSPAN]))\n"
2224 26 dgisselq
                "\t\t\t// A butterfly output is available\n"
2225
                        "\t\t\t\tb_started <= 1\'b1;\n"
2226 23 dgisselq
        "\t\tend\n\n");
2227 26 dgisselq
        fprintf(fstage,
2228
        "\treg  [(LGSPAN-1):0]\t\tdly_addr;\n"
2229
        "\treg  [(2*OWIDTH-1):0]\tdly_value;\n"
2230
        "\talways @(posedge i_clk)\n"
2231
        "\t\tif (i_ce)\n"
2232
        "\t\tbegin\n"
2233
        "\t\t\tdly_addr <= oB[(LGSPAN-1):0];\n"
2234
        "\t\t\tdly_value <= ob_b;\n"
2235
        "\t\tend\n"
2236
        "\talways @(posedge i_clk)\n"
2237
        "\t\tif (i_ce)\n"
2238
                "\t\t\tomem[dly_addr] <= dly_value;\n"
2239
"\n");
2240
        fprintf(fstage,
2241
        "\talways @(posedge i_clk)\n"
2242
        "\t\tif (i_ce)\n"
2243 35 dgisselq
        "\t\t\to_data <= (!oB[LGSPAN])?ob_a : omem[oB[(LGSPAN-1):0]];\n"
2244 26 dgisselq
"\n");
2245 22 dgisselq
        fprintf(fstage, "endmodule\n");
2246 2 dgisselq
}
2247
 
2248
void    usage(void) {
2249
        fprintf(stderr,
2250 26 dgisselq
"USAGE:\tfftgen [-f <size>] [-d dir] [-c cbits] [-n nbits] [-m mxbits] [-s]\n"
2251 2 dgisselq
// "\tfftgen -i\n"
2252 26 dgisselq
"\t-1\tBuild a normal FFT, running at one clock per complex sample, or (for\n"
2253
"\t\ta real FFT) at one clock per two real input samples.\n"
2254 2 dgisselq
"\t-c <cbits>\tCauses all internal complex coefficients to be\n"
2255
"\t\tlonger than the corresponding data bits, to help avoid\n"
2256 32 dgisselq
"\t\tcoefficient truncation errors.  The default is %d bits longer\n"
2257 26 dgisselq
"\t\tthan the data bits.\n"
2258 2 dgisselq
"\t-d <dir>\tPlaces all of the generated verilog files into <dir>.\n"
2259 26 dgisselq
"\t\tThe default is a subdirectory of the current directory named %s.\n"
2260 2 dgisselq
"\t-f <size>\tSets the size of the FFT as the number of complex\n"
2261 26 dgisselq
"\t\tsamples input to the transform.  (No default value, this is\n"
2262
"\t\ta required parameter.)\n"
2263
"\t-i\tAn inverse FFT, meaning that the coefficients are\n"
2264
"\t\tgiven by e^{ j 2 pi k/N n }.  The default is a forward FFT, with\n"
2265
"\t\tcoefficients given by e^{ -j 2 pi k/N n }.\n"
2266 2 dgisselq
"\t-m <mxbits>\tSets the maximum bit width that the FFT should ever\n"
2267
"\t\tproduce.  Internal values greater than this value will be\n"
2268 26 dgisselq
"\t\ttruncated to this value.  (The default value grows the input\n"
2269
"\t\tsize by one bit for every two FFT stages.)\n"
2270 22 dgisselq
"\t-n <nbits>\tSets the bitwidth for values coming into the (i)FFT.\n"
2271 26 dgisselq
"\t\tThe default is %d bits input for each component of the two\n"
2272
"\t\tcomplex values into the FFT.\n"
2273 22 dgisselq
"\t-p <nmpy>\tSets the number of stages that will use any hardware \n"
2274
"\t\tmultiplication facility, instead of shift-add emulation.\n"
2275 26 dgisselq
"\t\tThree multiplies per butterfly, or six multiplies per stage will\n"
2276
"\t\tbe accelerated in this fashion.  The default is not to use any\n"
2277
"\t\thardware multipliers.\n"
2278
"\t-r\tBuild a real-FFT at four input points per sample, rather than a\n"
2279
"\t\tcomplex FFT.  (Default is a Complex FFT.)\n"
2280 2 dgisselq
"\t-s\tSkip the final bit reversal stage.  This is useful in\n"
2281
"\t\talgorithms that need to apply a filter without needing to do\n"
2282
"\t\tbin shifting, as these algorithms can, with this option, just\n"
2283
"\t\tmultiply by a bit reversed correlation sequence and then\n"
2284 22 dgisselq
"\t\tinverse FFT the (still bit reversed) result.  (You would need\n"
2285
"\t\ta decimation in time inverse to do this, which this program does\n"
2286
"\t\tnot yet provide.)\n"
2287 2 dgisselq
"\t-S\tInclude the final bit reversal stage (default).\n"
2288 22 dgisselq
"\t-x <xtrabits>\tUse this many extra bits internally, before any final\n"
2289
"\t\trounding or truncation of the answer to the final number of bits.\n"
2290 26 dgisselq
"\t\tThe default is to use %d extra bits internally.\n",
2291
/*
2292 2 dgisselq
"\t-0\tA forward FFT (default), meaning that the coefficients are\n"
2293
"\t\tgiven by e^{-j 2 pi k/N n }.\n"
2294
"\t-1\tAn inverse FFT, meaning that the coefficients are\n"
2295 26 dgisselq
"\t\tgiven by e^{ j 2 pi k/N n }.\n",
2296
*/
2297
        DEF_XTRACBITS, DEF_COREDIR, DEF_NBITSIN, DEF_XTRAPBITS);
2298 2 dgisselq
}
2299
 
2300
// Features still needed:
2301
//      Interactivity.
2302
int main(int argc, char **argv) {
2303
        int     fftsize = -1, lgsize = -1;
2304 26 dgisselq
        int     nbitsin = DEF_NBITSIN, xtracbits = DEF_XTRACBITS,
2305
                        nummpy=DEF_NMPY, nonmpy=2;
2306
        int     nbitsout, maxbitsout = -1, xtrapbits=DEF_XTRAPBITS;
2307
        bool    bitreverse = true, inverse=false,
2308
                verbose_flag = false, single_clock = false,
2309
                real_fft = false;
2310 2 dgisselq
        FILE    *vmain;
2311 28 dgisselq
        std::string     coredir = DEF_COREDIR, cmdline = "", hdrname = "";
2312 23 dgisselq
        ROUND_T rounding = RND_CONVERGENT;
2313
        // ROUND_T      rounding = RND_HALFUP;
2314 2 dgisselq
 
2315 26 dgisselq
        bool    dbg = false;
2316
        int     dbgstage = 128;
2317
 
2318 2 dgisselq
        if (argc <= 1)
2319
                usage();
2320
 
2321 14 dgisselq
        cmdline = argv[0];
2322 2 dgisselq
        for(int argn=1; argn<argc; argn++) {
2323 14 dgisselq
                cmdline += " ";
2324
                cmdline += argv[argn];
2325
        }
2326
 
2327
        for(int argn=1; argn<argc; argn++) {
2328 2 dgisselq
                if ('-' == argv[argn][0]) {
2329
                        for(int j=1; (argv[argn][j])&&(j<100); j++) {
2330
                                switch(argv[argn][j]) {
2331 26 dgisselq
                                        /*
2332 2 dgisselq
                                        case '0':
2333
                                                inverse = false;
2334
                                                break;
2335 26 dgisselq
                                        */
2336 2 dgisselq
                                        case '1':
2337 26 dgisselq
                                                single_clock = true;
2338 2 dgisselq
                                                break;
2339 28 dgisselq
                                        case 'a':
2340
                                                if (argn+1 >= argc) {
2341
                                                        printf("ERR: No header filename given\n\n");
2342
                                                        usage(); exit(-1);
2343
                                                }
2344
                                                hdrname = argv[++argn];
2345
                                                j+= 200;
2346
                                                break;
2347 2 dgisselq
                                        case 'c':
2348
                                                if (argn+1 >= argc) {
2349 19 dgisselq
                                                        printf("ERR: No extra number of coefficient bits given!\n\n");
2350 2 dgisselq
                                                        usage(); exit(-1);
2351
                                                }
2352
                                                xtracbits = atoi(argv[++argn]);
2353
                                                j+= 200;
2354
                                                break;
2355
                                        case 'd':
2356
                                                if (argn+1 >= argc) {
2357 19 dgisselq
                                                        printf("ERR: No directory given into which to place the core!\n\n");
2358 2 dgisselq
                                                        usage(); exit(-1);
2359
                                                }
2360 14 dgisselq
                                                coredir = argv[++argn];
2361 2 dgisselq
                                                j += 200;
2362
                                                break;
2363 26 dgisselq
                                        case 'D':
2364
                                                dbg = true;
2365
                                                if (argn+1 >= argc) {
2366
                                                        printf("ERR: No debug stage number given!\n\n");
2367
                                                        usage(); exit(-1);
2368
                                                }
2369
                                                dbgstage = atoi(argv[++argn]);
2370
                                                j+= 200;
2371
                                                break;
2372 2 dgisselq
                                        case 'f':
2373
                                                if (argn+1 >= argc) {
2374 19 dgisselq
                                                        printf("ERR: No FFT Size given!\n\n");
2375 2 dgisselq
                                                        usage(); exit(-1);
2376
                                                }
2377
                                                fftsize = atoi(argv[++argn]);
2378
                                                { int sln = strlen(argv[argn]);
2379
                                                if (!isdigit(argv[argn][sln-1])){
2380
                                                        switch(argv[argn][sln-1]) {
2381
                                                        case 'k': case 'K':
2382
                                                                fftsize <<= 10;
2383
                                                                break;
2384
                                                        case 'm': case 'M':
2385
                                                                fftsize <<= 20;
2386
                                                                break;
2387
                                                        case 'g': case 'G':
2388
                                                                fftsize <<= 30;
2389
                                                                break;
2390
                                                        default:
2391 19 dgisselq
                                                                printf("ERR: Unknown FFT size, %s!\n", argv[argn]);
2392 2 dgisselq
                                                                exit(-1);
2393
                                                        }
2394
                                                }}
2395
                                                j += 200;
2396
                                                break;
2397
                                        case 'h':
2398
                                                usage();
2399
                                                exit(0);
2400
                                                break;
2401
                                        case 'i':
2402 26 dgisselq
                                                inverse = true;
2403 2 dgisselq
                                                break;
2404
                                        case 'm':
2405
                                                if (argn+1 >= argc) {
2406 19 dgisselq
                                                        printf("ERR: No maximum output bit value given!\n\n");
2407 2 dgisselq
                                                        exit(-1);
2408
                                                }
2409
                                                maxbitsout = atoi(argv[++argn]);
2410
                                                j += 200;
2411
                                                break;
2412
                                        case 'n':
2413
                                                if (argn+1 >= argc) {
2414 19 dgisselq
                                                        printf("ERR: No input bit size given!\n\n");
2415 2 dgisselq
                                                        exit(-1);
2416
                                                }
2417
                                                nbitsin = atoi(argv[++argn]);
2418
                                                j += 200;
2419
                                                break;
2420 22 dgisselq
                                        case 'p':
2421
                                                if (argn+1 >= argc) {
2422
                                                        printf("ERR: No number given for number of hardware multiply stages!\n\n");
2423
                                                        exit(-1);
2424
                                                }
2425
                                                nummpy = atoi(argv[++argn]);
2426
                                                j += 200;
2427
                                                break;
2428 26 dgisselq
                                        case 'r':
2429
                                                real_fft = true;
2430
                                                break;
2431 2 dgisselq
                                        case 'S':
2432
                                                bitreverse = true;
2433
                                                break;
2434
                                        case 's':
2435
                                                bitreverse = false;
2436
                                                break;
2437 19 dgisselq
                                        case 'x':
2438
                                                if (argn+1 >= argc) {
2439
                                                        printf("ERR: No extra number of bits given!\n\n");
2440
                                                        usage(); exit(-1);
2441
                                                } j+= 200;
2442
                                                xtrapbits = atoi(argv[++argn]);
2443
                                                break;
2444 2 dgisselq
                                        case 'v':
2445
                                                verbose_flag = true;
2446
                                                break;
2447 33 dgisselq
                                        default:
2448 2 dgisselq
                                                printf("Unknown argument, -%c\n", argv[argn][j]);
2449
                                                usage();
2450
                                                exit(-1);
2451
                                }
2452
                        }
2453
                } else {
2454
                        printf("Unrecognized argument, %s\n", argv[argn]);
2455
                        usage();
2456
                        exit(-1);
2457
                }
2458
        }
2459
 
2460 26 dgisselq
        if (real_fft) {
2461
                printf("The real FFT option is not implemented yet, but still on\nmy to do list.  Please try again later.\n");
2462
                exit(0);
2463
        } if (single_clock) {
2464
                printf("The single clock FFT option is not implemented yet, but still on\nmy to do list.  Please try again later.\n");
2465
                exit(0);
2466
        } if (!bitreverse) {
2467
                printf("WARNING: While I can skip the bit reverse stage, the code to do\n");
2468
                printf("an inverse FFT on a bit--reversed input has not yet been\n");
2469
                printf("built.\n");
2470
        }
2471
 
2472 2 dgisselq
        if ((lgsize < 0)&&(fftsize > 1)) {
2473
                for(lgsize=1; (1<<lgsize) < fftsize; lgsize++)
2474
                        ;
2475
        }
2476
 
2477
        if ((fftsize <= 0)||(nbitsin < 1)||(nbitsin>48)) {
2478
                printf("INVALID PARAMETERS!!!!\n");
2479
                exit(-1);
2480
        }
2481
 
2482
 
2483
        if (nextlg(fftsize) != fftsize) {
2484
                fprintf(stderr, "ERR: FFTSize (%d) *must* be a power of two\n",
2485
                                fftsize);
2486
                exit(-1);
2487
        } else if (fftsize < 2) {
2488
                fprintf(stderr, "ERR: Minimum FFTSize is 2, not %d\n",
2489
                                fftsize);
2490
                if (fftsize == 1) {
2491
                        fprintf(stderr, "You do realize that a 1 point FFT makes very little sense\n");
2492
                        fprintf(stderr, "in an FFT operation that handles two samples per clock?\n");
2493
                        fprintf(stderr, "If you really need to do an FFT of this size, the output\n");
2494
                        fprintf(stderr, "can be connected straight to the input.\n");
2495
                } else {
2496
                        fprintf(stderr, "Indeed, a size of %d doesn\'t make much sense to me at all.\n", fftsize);
2497
                        fprintf(stderr, "Is such an operation even defined?\n");
2498
                }
2499
                exit(-1);
2500
        }
2501
 
2502
        // Calculate how many output bits we'll have, and what the log
2503
        // based two size of our FFT is.
2504
        {
2505
                int     tmp_size = fftsize;
2506
 
2507
                // The first stage always accumulates one bit, regardless
2508
                // of whether you need to or not.
2509
                nbitsout = nbitsin + 1;
2510
                tmp_size >>= 1;
2511
 
2512
                while(tmp_size > 4) {
2513
                        nbitsout += 1;
2514
                        tmp_size >>= 2;
2515
                }
2516
 
2517
                if (tmp_size > 1)
2518
                        nbitsout ++;
2519
 
2520
                if (fftsize <= 2)
2521
                        bitreverse = false;
2522
        } if ((maxbitsout > 0)&&(nbitsout > maxbitsout))
2523
                nbitsout = maxbitsout;
2524
 
2525 22 dgisselq
        // Figure out how many multiply stages to use, and how many to skip
2526
        {
2527
                int     lgv = lgval(fftsize);
2528 2 dgisselq
 
2529 22 dgisselq
                nonmpy = lgv - nummpy;
2530
                if (nonmpy < 2) nonmpy = 2;
2531
                nummpy = lgv - nonmpy;
2532
        }
2533
 
2534 2 dgisselq
        {
2535
                struct stat     sbuf;
2536 14 dgisselq
                if (lstat(coredir.c_str(), &sbuf)==0) {
2537 2 dgisselq
                        if (!S_ISDIR(sbuf.st_mode)) {
2538 14 dgisselq
                                fprintf(stderr, "\'%s\' already exists, and is not a directory!\n", coredir.c_str());
2539 2 dgisselq
                                fprintf(stderr, "I will stop now, lest I overwrite something you care about.\n");
2540
                                fprintf(stderr, "To try again, please remove this file.\n");
2541
                                exit(-1);
2542
                        }
2543 33 dgisselq
                } else
2544 14 dgisselq
                        mkdir(coredir.c_str(), 0755);
2545
                if (access(coredir.c_str(), X_OK|W_OK) != 0) {
2546
                        fprintf(stderr, "I have no access to the directory \'%s\'.\n", coredir.c_str());
2547 2 dgisselq
                        exit(-1);
2548
                }
2549
        }
2550
 
2551 28 dgisselq
        if (hdrname.length() > 0) {
2552
                FILE    *hdr = fopen(hdrname.c_str(), "w");
2553
                if (hdr == NULL) {
2554
                        fprintf(stderr, "ERROR: Cannot open %s to create header file\n", hdrname.c_str());
2555
                        perror("O/S Err:");
2556
                        exit(-2);
2557
                }
2558
 
2559
                fprintf(hdr, "/////////////////////////////////////////////////////////////////////////////\n");
2560
                fprintf(hdr, "//\n");
2561
                fprintf(hdr, "// Filename:      %s\n", hdrname.c_str());
2562
                fprintf(hdr, "//\n");
2563
                fprintf(hdr, "// Project:       %s\n", prjname);
2564
                fprintf(hdr, "//\n");
2565
                fprintf(hdr, "// Purpose:       This simple header file captures the internal constants\n");
2566
                fprintf(hdr, "//                within the FFT that were used to build it, for the purpose\n");
2567
                fprintf(hdr, "//                of making C++ integration (and test bench testing) simpler.  That\n");
2568
                fprintf(hdr, "//                is, should the FFT change size, this will note that size change\n");
2569
                fprintf(hdr, "//                and thus any test bench or other C++ program dependent upon\n");
2570
                fprintf(hdr, "//                either the size of the FFT, the number of bits in or out of\n");
2571
                fprintf(hdr, "//                it, etc., can pick up the changes in the defines found within\n");
2572
                fprintf(hdr, "//                this file.\n");
2573
                fprintf(hdr, "//\n");
2574
                fprintf(hdr, "%s", creator);
2575
                fprintf(hdr, "//\n");
2576
                fprintf(hdr, "%s", cpyleft);
2577
                fprintf(hdr, "//\n"
2578
                "//\n"
2579
                "#ifndef %sFFTHDR_H\n"
2580
                "#define %sFFTHDR_H\n"
2581
                "\n"
2582
                "#define\t%sFFT_IWIDTH\t%d\n"
2583
                "#define\t%sFFT_OWIDTH\t%d\n"
2584
                "#define\t%sFFT_LGWIDTH\t%d\n"
2585
                "#define\t%sFFT_SIZE\t(1<<%sFFT_LGWIDTH)\n\n",
2586
                        (inverse)?"I":"", (inverse)?"I":"",
2587
                        (inverse)?"I":"", nbitsin,
2588
                        (inverse)?"I":"", nbitsout,
2589
                        (inverse)?"I":"", lgsize,
2590
                        (inverse)?"I":"", (inverse)?"I":"");
2591
                if (!bitreverse)
2592
                        fprintf(hdr, "#define\t%sFFT_SKIPS_BIT_REVERSE\n",
2593
                                (inverse)?"I":"");
2594
                if (real_fft)
2595
                        fprintf(hdr, "#define\tRL%sFFT\n\n", (inverse)?"I":"");
2596
                if (!single_clock)
2597
                        fprintf(hdr, "#define\tDBLCLK%sFFT\n\n", (inverse)?"I":"");
2598 29 dgisselq
                if (USE_OLD_MULTIPLY)
2599
                        fprintf(hdr, "#define\tUSE_OLD_MULTIPLY\n\n");
2600 33 dgisselq
 
2601 29 dgisselq
                fprintf(hdr, "// Parameters for testing the longbimpy\n");
2602
                fprintf(hdr, "#define\tTST_LONGBIMPY_AW\t%d\n", TST_LONGBIMPY_AW);
2603
#ifdef  TST_LONGBIMPY_BW
2604
                fprintf(hdr, "#define\tTST_LONGBIMPY_BW\t%d\n\n", TST_LONGBIMPY_BW);
2605
#else
2606
                fprintf(hdr, "#define\tTST_LONGBIMPY_BW\tTST_LONGBIMPY_AW\n\n");
2607
#endif
2608
 
2609
                fprintf(hdr, "// Parameters for testing the shift add multiply\n");
2610
                fprintf(hdr, "#define\tTST_SHIFTADDMPY_AW\t%d\n", TST_SHIFTADDMPY_AW);
2611
#ifdef  TST_SHIFTADDMPY_BW
2612
                fprintf(hdr, "#define\tTST_SHIFTADDMPY_BW\t%d\n\n", TST_SHIFTADDMPY_BW);
2613
#else
2614
                fprintf(hdr, "#define\tTST_SHIFTADDMPY_BW\tTST_SHIFTADDMPY_AW\n\n");
2615
#endif
2616
 
2617
#define TST_SHIFTADDMPY_AW      16
2618
#define TST_SHIFTADDMPY_BW      20      // Leave undefined to match AW
2619
                fprintf(hdr, "// Parameters for testing the butterfly\n");
2620
                fprintf(hdr, "#define\tTST_BUTTERFLY_IWIDTH\t%d\n", TST_BUTTERFLY_IWIDTH);
2621
                fprintf(hdr, "#define\tTST_BUTTERFLY_CWIDTH\t%d\n", TST_BUTTERFLY_CWIDTH);
2622
                fprintf(hdr, "#define\tTST_BUTTERFLY_OWIDTH\t%d\n", TST_BUTTERFLY_OWIDTH);
2623
                fprintf(hdr, "#define\tTST_BUTTERFLY_MPYDELAY\t%d\n\n",
2624
                                bflydelay(TST_BUTTERFLY_IWIDTH,
2625
                                        TST_BUTTERFLY_CWIDTH-TST_BUTTERFLY_IWIDTH));
2626
 
2627
                fprintf(hdr, "// Parameters for testing the quarter stage\n");
2628
                fprintf(hdr, "#define\tTST_QTRSTAGE_IWIDTH\t%d\n", TST_QTRSTAGE_IWIDTH);
2629
                fprintf(hdr, "#define\tTST_QTRSTAGE_LGWIDTH\t%d\n\n", TST_QTRSTAGE_LGWIDTH);
2630
 
2631
                fprintf(hdr, "// Parameters for testing the double stage\n");
2632
                fprintf(hdr, "#define\tTST_DBLSTAGE_IWIDTH\t%d\n", TST_DBLSTAGE_IWIDTH);
2633
                fprintf(hdr, "#define\tTST_DBLSTAGE_SHIFT\t%d\n\n", TST_DBLSTAGE_SHIFT);
2634
 
2635
                fprintf(hdr, "// Parameters for testing the bit reversal stage\n");
2636
                fprintf(hdr, "#define\tTST_DBLREVERSE_LGSIZE\t%d\n\n", TST_DBLREVERSE_LGSIZE);
2637 28 dgisselq
                fprintf(hdr, "\n" "#endif\n\n");
2638
                fclose(hdr);
2639
        }
2640
 
2641 14 dgisselq
        {
2642
                std::string     fname_string;
2643
 
2644
                fname_string = coredir;
2645
                fname_string += "/";
2646
                if (inverse) fname_string += "i";
2647
                fname_string += "fftmain.v";
2648
 
2649
                vmain = fopen(fname_string.c_str(), "w");
2650
                if (NULL == vmain) {
2651
                        fprintf(stderr, "Could not open \'%s\' for writing\n", fname_string.c_str());
2652
                        perror("Err from O/S:");
2653
                        exit(-1);
2654
                }
2655 2 dgisselq
        }
2656
 
2657
        fprintf(vmain, "/////////////////////////////////////////////////////////////////////////////\n");
2658
        fprintf(vmain, "//\n");
2659
        fprintf(vmain, "// Filename:    %sfftmain.v\n", (inverse)?"i":"");
2660
        fprintf(vmain, "//\n");
2661
        fprintf(vmain, "// Project:     %s\n", prjname);
2662
        fprintf(vmain, "//\n");
2663
        fprintf(vmain, "// Purpose:     This is the main module in the Doubletime FPGA FFT project.\n");
2664
        fprintf(vmain, "//              As such, all other modules are subordinate to this one.\n");
2665
        fprintf(vmain, "//              (I have been reading too much legalese this week ...)\n");
2666
        fprintf(vmain, "//              This module accomplish a fixed size Complex FFT on %d data\n", fftsize);
2667
        fprintf(vmain, "//              points.  The FFT is fully pipelined, and accepts as inputs\n");
2668
        fprintf(vmain, "//              two complex two\'s complement samples per clock.\n");
2669
        fprintf(vmain, "//\n");
2670
        fprintf(vmain, "// Parameters:\n");
2671
        fprintf(vmain, "//      i_clk\tThe clock.  All operations are synchronous with this clock.\n");
2672
        fprintf(vmain, "//\ti_rst\tSynchronous reset, active high.  Setting this line will\n");
2673
        fprintf(vmain, "//\t\t\tforce the reset of all of the internals to this routine.\n");
2674
        fprintf(vmain, "//\t\t\tFurther, following a reset, the o_sync line will go\n");
2675
        fprintf(vmain, "//\t\t\thigh the same time the first output sample is valid.\n");
2676 32 dgisselq
        fprintf(vmain, "//\ti_ce\tA clock enable line.  If this line is set, this module\n");
2677 2 dgisselq
        fprintf(vmain, "//\t\t\twill accept two complex values as inputs, and produce\n");
2678
        fprintf(vmain, "//\t\t\ttwo (possibly empty) complex values as outputs.\n");
2679 32 dgisselq
        fprintf(vmain, "//\ti_left\tThe first of two complex input samples.  This value is split\n");
2680
        fprintf(vmain, "//\t\t\tinto two two\'s complement numbers, %d bits each, with\n", nbitsin);
2681
        fprintf(vmain, "//\t\t\tthe real portion in the high order bits, and the\n");
2682
        fprintf(vmain, "//\t\t\timaginary portion taking the bottom %d bits.\n", nbitsin);
2683
        fprintf(vmain, "//\ti_right\tThis is the same thing as i_left, only this is the second of\n");
2684
        fprintf(vmain, "//\t\t\ttwo such samples.  Hence, i_left would contain input\n");
2685
        fprintf(vmain, "//\t\t\tsample zero, i_right would contain sample one.  On the\n");
2686
        fprintf(vmain, "//\t\t\tnext clock i_left would contain input sample two,\n");
2687
        fprintf(vmain, "//\t\t\ti_right number three and so forth.\n");
2688
        fprintf(vmain, "//\to_left\tThe first of two output samples, of the same format as i_left,\n");
2689
        fprintf(vmain, "//\t\t\tonly having %d bits for each of the real and imaginary\n", nbitsout);
2690
        fprintf(vmain, "//\t\t\tcomponents, leading to %d bits total.\n", nbitsout*2);
2691
        fprintf(vmain, "//\to_right\tThe second of two output samples produced each clock.  This has\n");
2692
        fprintf(vmain, "//\t\t\tthe same format as o_left.\n");
2693
        fprintf(vmain, "//\to_sync\tA one bit output indicating the first valid sample produced by\n");
2694
        fprintf(vmain, "//\t\t\tthis FFT following a reset.  Ever after, this will\n");
2695
        fprintf(vmain, "//\t\t\tindicate the first sample of an FFT frame.\n");
2696 2 dgisselq
        fprintf(vmain, "//\n");
2697 14 dgisselq
        fprintf(vmain, "// Arguments:\tThis file was computer generated using the\n");
2698
        fprintf(vmain, "//\t\tfollowing command line:\n");
2699
        fprintf(vmain, "//\n");
2700
        fprintf(vmain, "//\t\t%% %s\n", cmdline.c_str());
2701
        fprintf(vmain, "//\n");
2702 2 dgisselq
        fprintf(vmain, "%s", creator);
2703
        fprintf(vmain, "//\n");
2704
        fprintf(vmain, "%s", cpyleft);
2705 35 dgisselq
        fprintf(vmain, "//\n//\n`default_nettype\tnone\n//\n");
2706 2 dgisselq
 
2707
 
2708
        fprintf(vmain, "//\n");
2709
        fprintf(vmain, "//\n");
2710
        fprintf(vmain, "module %sfftmain(i_clk, i_rst, i_ce,\n", (inverse)?"i":"");
2711
        fprintf(vmain, "\t\ti_left, i_right,\n");
2712 26 dgisselq
        fprintf(vmain, "\t\to_left, o_right, o_sync%s);\n",
2713
                        (dbg)?", o_dbg":"");
2714 2 dgisselq
        fprintf(vmain, "\tparameter\tIWIDTH=%d, OWIDTH=%d, LGWIDTH=%d;\n", nbitsin, nbitsout, lgsize);
2715
        assert(lgsize > 0);
2716
        fprintf(vmain, "\tinput\t\ti_clk, i_rst, i_ce;\n");
2717
        fprintf(vmain, "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n");
2718
        fprintf(vmain, "\toutput\treg\t[(2*OWIDTH-1):0]\to_left, o_right;\n");
2719
        fprintf(vmain, "\toutput\treg\t\t\to_sync;\n");
2720 26 dgisselq
        if (dbg)
2721
                fprintf(vmain, "\toutput\twire\t[33:0]\t\to_dbg;\n");
2722 2 dgisselq
        fprintf(vmain, "\n\n");
2723
 
2724
        fprintf(vmain, "\t// Outputs of the FFT, ready for bit reversal.\n");
2725 33 dgisselq
        fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_left, br_right;\n");
2726 2 dgisselq
        fprintf(vmain, "\n\n");
2727
 
2728
        int     tmp_size = fftsize, lgtmp = lgsize;
2729
        if (fftsize == 2) {
2730
                if (bitreverse) {
2731
                        fprintf(vmain, "\treg\tbr_start;\n");
2732 25 dgisselq
                        fprintf(vmain, "\tinitial br_start = 1\'b0;\n");
2733 2 dgisselq
                        fprintf(vmain, "\talways @(posedge i_clk)\n");
2734
                        fprintf(vmain, "\t\tif (i_rst)\n");
2735 26 dgisselq
                        fprintf(vmain, "\t\t\tbr_start <= 1\'b0;\n");
2736 2 dgisselq
                        fprintf(vmain, "\t\telse if (i_ce)\n");
2737 26 dgisselq
                        fprintf(vmain, "\t\t\tbr_start <= 1\'b1;\n");
2738 2 dgisselq
                }
2739
                fprintf(vmain, "\n\n");
2740 6 dgisselq
                fprintf(vmain, "\tdblstage\t#(IWIDTH)\tstage_2(i_clk, i_rst, i_ce,\n");
2741 35 dgisselq
                fprintf(vmain, "\t\t\t(!i_rst), i_left, i_right, br_left, br_right);\n");
2742 2 dgisselq
                fprintf(vmain, "\n\n");
2743
        } else {
2744
                int     nbits = nbitsin, dropbit=0;
2745 26 dgisselq
                int     obits = nbits+1+xtrapbits;
2746
 
2747
                if ((maxbitsout > 0)&&(obits > maxbitsout))
2748
                        obits = maxbitsout;
2749
 
2750 2 dgisselq
                // Always do a first stage
2751 14 dgisselq
                {
2752 22 dgisselq
                        bool    mpystage;
2753 2 dgisselq
 
2754 22 dgisselq
                        // Last two stages are always non-multiply stages
2755
                        // since the multiplies can be done by adds
2756
                        mpystage = ((lgtmp-2) <= nummpy);
2757
 
2758 28 dgisselq
                        if (mpystage)
2759
                                fprintf(vmain, "\t// A hardware optimized FFT stage\n");
2760
                        fprintf(vmain, "\n\n");
2761 35 dgisselq
                        fprintf(vmain, "\twire\t\tw_s%d;\n", fftsize);
2762
                        fprintf(vmain, "\t// verilator lint_off UNUSED\n\twire\t\tw_os%d;\n\t// verilator lint_on  UNUSED\n", fftsize);
2763 28 dgisselq
                        fprintf(vmain, "\twire\t[%d:0]\tw_e%d, w_o%d;\n", 2*(obits+xtrapbits)-1, fftsize, fftsize);
2764
                        fprintf(vmain, "\t%sfftstage_e%d%s\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,%d,0)\tstage_e%d(i_clk, i_rst, i_ce,\n",
2765
                                (inverse)?"i":"", fftsize,
2766
                                        ((dbg)&&(dbgstage == fftsize))?"_dbg":"",
2767
                                xtracbits, obits+xtrapbits,
2768
                                lgsize, lgtmp-2, lgdelay(nbits,xtracbits),
2769
                                fftsize);
2770 35 dgisselq
                        fprintf(vmain, "\t\t\t(!i_rst), i_left, w_e%d, w_s%d%s);\n", fftsize, fftsize, ((dbg)&&(dbgstage == fftsize))?", o_dbg":"");
2771 28 dgisselq
                        fprintf(vmain, "\t%sfftstage_o%d\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,%d,0)\tstage_o%d(i_clk, i_rst, i_ce,\n",
2772
                                (inverse)?"i":"", fftsize,
2773
                                xtracbits, obits+xtrapbits,
2774
                                lgsize, lgtmp-2, lgdelay(nbits,xtracbits),
2775
                                fftsize);
2776 35 dgisselq
                        fprintf(vmain, "\t\t\t(!i_rst), i_right, w_o%d, w_os%d);\n", fftsize, fftsize);
2777 28 dgisselq
                        fprintf(vmain, "\n\n");
2778
 
2779
 
2780
                        std::string     fname;
2781
                        char    numstr[12];
2782
 
2783 14 dgisselq
                        fname = coredir + "/";
2784
                        if (inverse) fname += "i";
2785
                        fname += "fftstage_e";
2786
                        sprintf(numstr, "%d", fftsize);
2787
                        fname += numstr;
2788 26 dgisselq
                        if ((dbg)&&(dbgstage == fftsize))
2789
                                fname += "_dbg";
2790 14 dgisselq
                        fname += ".v";
2791 26 dgisselq
                        build_stage(fname.c_str(), coredir.c_str(), fftsize/2, 0, nbits, inverse, xtracbits, mpystage, (dbg)&&(dbgstage == fftsize));    // Even stage
2792 14 dgisselq
 
2793
                        fname = coredir + "/";
2794
                        if (inverse) fname += "i";
2795
                        fname += "fftstage_o";
2796
                        sprintf(numstr, "%d", fftsize);
2797
                        fname += numstr;
2798
                        fname += ".v";
2799 26 dgisselq
                        build_stage(fname.c_str(), coredir.c_str(), fftsize/2, 1, nbits, inverse, xtracbits, mpystage, false);  // Odd  stage
2800 14 dgisselq
                }
2801
 
2802 26 dgisselq
                nbits = obits;  // New number of input bits
2803 2 dgisselq
                tmp_size >>= 1; lgtmp--;
2804
                dropbit = 0;
2805
                fprintf(vmain, "\n\n");
2806
                while(tmp_size >= 8) {
2807 26 dgisselq
                        obits = nbits+((dropbit)?0:1);
2808 2 dgisselq
 
2809
                        if ((maxbitsout > 0)&&(obits > maxbitsout))
2810
                                obits = maxbitsout;
2811
 
2812 14 dgisselq
                        {
2813 22 dgisselq
                                bool            mpystage;
2814 2 dgisselq
 
2815 22 dgisselq
                                mpystage = ((lgtmp-2) <= nummpy);
2816
 
2817 28 dgisselq
                                if (mpystage)
2818
                                        fprintf(vmain, "\t// A hardware optimized FFT stage\n");
2819 35 dgisselq
                                fprintf(vmain, "\twire\t\tw_s%d;\n",
2820
                                        tmp_size);
2821
                                fprintf(vmain, "\t// verilator lint_off UNUSED\n\twire\t\tw_os%d;\n\t// verilator lint_on  UNUSED\n",
2822
                                        tmp_size);
2823 28 dgisselq
                                fprintf(vmain,"\twire\t[%d:0]\tw_e%d, w_o%d;\n",
2824
                                        2*(obits+xtrapbits)-1,
2825
                                        tmp_size, tmp_size);
2826
                                fprintf(vmain, "\t%sfftstage_e%d%s\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_e%d(i_clk, i_rst, i_ce,\n",
2827
                                        (inverse)?"i":"", tmp_size,
2828
                                        ((dbg)&&(dbgstage==tmp_size))?"_dbg":"",
2829
                                        nbits+xtrapbits,
2830
                                        nbits+xtracbits+xtrapbits,
2831
                                        obits+xtrapbits,
2832
                                        lgsize, lgtmp-2,
2833
                                        lgdelay(nbits+xtrapbits,xtracbits),
2834
                                        (dropbit)?0:0, tmp_size);
2835
                                fprintf(vmain, "\t\t\t\t\t\tw_s%d, w_e%d, w_e%d, w_s%d%s);\n",
2836
                                        tmp_size<<1, tmp_size<<1,
2837
                                        tmp_size, tmp_size,
2838
                                        ((dbg)&&(dbgstage == tmp_size))
2839
                                                ?", o_dbg":"");
2840
                                fprintf(vmain, "\t%sfftstage_o%d\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_o%d(i_clk, i_rst, i_ce,\n",
2841
                                        (inverse)?"i":"", tmp_size,
2842
                                        nbits+xtrapbits,
2843
                                        nbits+xtracbits+xtrapbits,
2844
                                        obits+xtrapbits,
2845
                                        lgsize, lgtmp-2,
2846
                                        lgdelay(nbits+xtrapbits,xtracbits),
2847
                                        (dropbit)?0:0, tmp_size);
2848
                                fprintf(vmain, "\t\t\t\t\t\tw_s%d, w_o%d, w_o%d, w_os%d);\n",
2849
                                        tmp_size<<1, tmp_size<<1,
2850
                                        tmp_size, tmp_size);
2851
                                fprintf(vmain, "\n\n");
2852
 
2853
                                std::string     fname;
2854
                                char            numstr[12];
2855
 
2856 14 dgisselq
                                fname = coredir + "/";
2857
                                if (inverse) fname += "i";
2858
                                fname += "fftstage_e";
2859
                                sprintf(numstr, "%d", tmp_size);
2860
                                fname += numstr;
2861 26 dgisselq
                                if ((dbg)&&(dbgstage == tmp_size))
2862
                                        fname += "_dbg";
2863 14 dgisselq
                                fname += ".v";
2864 26 dgisselq
                                build_stage(fname.c_str(), coredir.c_str(), tmp_size/2, 0,
2865 22 dgisselq
                                        nbits+xtrapbits, inverse, xtracbits,
2866 26 dgisselq
                                        mpystage, ((dbg)&&(dbgstage == tmp_size)));     // Even stage
2867 2 dgisselq
 
2868 14 dgisselq
                                fname = coredir + "/";
2869
                                if (inverse) fname += "i";
2870
                                fname += "fftstage_o";
2871
                                sprintf(numstr, "%d", tmp_size);
2872
                                fname += numstr;
2873
                                fname += ".v";
2874 26 dgisselq
                                build_stage(fname.c_str(), coredir.c_str(), tmp_size/2, 1,
2875 22 dgisselq
                                        nbits+xtrapbits, inverse, xtracbits,
2876 26 dgisselq
                                        mpystage, false);       // Odd  stage
2877 14 dgisselq
                        }
2878
 
2879
 
2880 2 dgisselq
                        dropbit ^= 1;
2881
                        nbits = obits;
2882
                        tmp_size >>= 1; lgtmp--;
2883
                }
2884
 
2885
                if (tmp_size == 4) {
2886 26 dgisselq
                        obits = nbits+((dropbit)?0:1);
2887 2 dgisselq
 
2888
                        if ((maxbitsout > 0)&&(obits > maxbitsout))
2889
                                obits = maxbitsout;
2890
 
2891 35 dgisselq
                        fprintf(vmain, "\twire\t\tw_s4;\n");
2892
                        fprintf(vmain, "\t// verilator lint_off UNUSED\n\twire\t\tw_os4;\n\t// verilator lint_on  UNUSED\n");
2893 19 dgisselq
                        fprintf(vmain, "\twire\t[%d:0]\tw_e4, w_o4;\n", 2*(obits+xtrapbits)-1);
2894 26 dgisselq
                        fprintf(vmain, "\tqtrstage%s\t#(%d,%d,%d,0,%d,%d)\tstage_e4(i_clk, i_rst, i_ce,\n",
2895
                                ((dbg)&&(dbgstage==4))?"_dbg":"",
2896
                                nbits+xtrapbits, obits+xtrapbits, lgsize,
2897
                                (inverse)?1:0, (dropbit)?0:0);
2898
                        fprintf(vmain, "\t\t\t\t\t\tw_s8, w_e8, w_e4, w_s4%s);\n",
2899
                                ((dbg)&&(dbgstage==4))?", o_dbg":"");
2900 2 dgisselq
                        fprintf(vmain, "\tqtrstage\t#(%d,%d,%d,1,%d,%d)\tstage_o4(i_clk, i_rst, i_ce,\n",
2901 19 dgisselq
                                nbits+xtrapbits, obits+xtrapbits, lgsize, (inverse)?1:0, (dropbit)?0:0);
2902 6 dgisselq
                        fprintf(vmain, "\t\t\t\t\t\tw_s8, w_o8, w_o4, w_os4);\n");
2903 2 dgisselq
                        dropbit ^= 1;
2904
                        nbits = obits;
2905
                        tmp_size >>= 1; lgtmp--;
2906
                }
2907
 
2908
                {
2909 26 dgisselq
                        obits = nbits+((dropbit)?0:1);
2910 2 dgisselq
                        if (obits > nbitsout)
2911
                                obits = nbitsout;
2912
                        if ((maxbitsout>0)&&(obits > maxbitsout))
2913
                                obits = maxbitsout;
2914
                        fprintf(vmain, "\twire\t\tw_s2;\n");
2915
                        fprintf(vmain, "\twire\t[%d:0]\tw_e2, w_o2;\n", 2*obits-1);
2916 28 dgisselq
                        if ((nbits+xtrapbits+1 == obits)&&(!dropbit))
2917
                                printf("WARNING: SCALING OFF BY A FACTOR OF TWO--should\'ve dropped a bit in the last stage.\n");
2918 19 dgisselq
                        fprintf(vmain, "\tdblstage\t#(%d,%d,%d)\tstage_2(i_clk, i_rst, i_ce,\n", nbits+xtrapbits, obits,(dropbit)?0:1);
2919 6 dgisselq
                        fprintf(vmain, "\t\t\t\t\tw_s4, w_e4, w_o4, w_e2, w_o2, w_s2);\n");
2920 2 dgisselq
 
2921
                        fprintf(vmain, "\n\n");
2922
                        nbits = obits;
2923
                }
2924
 
2925
                fprintf(vmain, "\t// Prepare for a (potential) bit-reverse stage.\n");
2926
                fprintf(vmain, "\tassign\tbr_left  = w_e2;\n");
2927
                fprintf(vmain, "\tassign\tbr_right = w_o2;\n");
2928
                fprintf(vmain, "\n");
2929
                if (bitreverse) {
2930
                        fprintf(vmain, "\twire\tbr_start;\n");
2931
                        fprintf(vmain, "\treg\tr_br_started;\n");
2932 25 dgisselq
                        fprintf(vmain, "\tinitial\tr_br_started = 1\'b0;\n");
2933 2 dgisselq
                        fprintf(vmain, "\talways @(posedge i_clk)\n");
2934
                        fprintf(vmain, "\t\tif (i_rst)\n");
2935 26 dgisselq
                        fprintf(vmain, "\t\t\tr_br_started <= 1\'b0;\n");
2936
                        fprintf(vmain, "\t\telse if (i_ce)\n");
2937 23 dgisselq
                        fprintf(vmain, "\t\t\tr_br_started <= r_br_started || w_s2;\n");
2938
                        fprintf(vmain, "\tassign\tbr_start = r_br_started || w_s2;\n");
2939 2 dgisselq
                }
2940
        }
2941
 
2942
        fprintf(vmain, "\n");
2943
        fprintf(vmain, "\t// Now for the bit-reversal stage.\n");
2944
        fprintf(vmain, "\twire\tbr_sync;\n");
2945
        fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_o_left, br_o_right;\n");
2946
        if (bitreverse) {
2947
                fprintf(vmain, "\tdblreverse\t#(%d,%d)\trevstage(i_clk, i_rst,\n", lgsize, nbitsout);
2948
                fprintf(vmain, "\t\t\t(i_ce & br_start), br_left, br_right,\n");
2949
                fprintf(vmain, "\t\t\tbr_o_left, br_o_right, br_sync);\n");
2950
        } else {
2951
                fprintf(vmain, "\tassign\tbr_o_left  = br_left;\n");
2952
                fprintf(vmain, "\tassign\tbr_o_right = br_right;\n");
2953
                fprintf(vmain, "\tassign\tbr_sync    = w_s2;\n");
2954
        }
2955
 
2956
        fprintf(vmain, "\n\n");
2957
        fprintf(vmain, "\t// Last clock: Register our outputs, we\'re done.\n");
2958 26 dgisselq
        fprintf(vmain, "\tinitial\to_sync  = 1\'b0;\n");
2959 2 dgisselq
        fprintf(vmain, "\talways @(posedge i_clk)\n");
2960 26 dgisselq
        fprintf(vmain, "\t\tif (i_rst)\n");
2961
        fprintf(vmain, "\t\t\to_sync  <= 1\'b0;\n");
2962
        fprintf(vmain, "\t\telse if (i_ce)\n");
2963
        fprintf(vmain, "\t\t\to_sync  <= br_sync;\n");
2964
        fprintf(vmain, "\n");
2965
        fprintf(vmain, "\talways @(posedge i_clk)\n");
2966
        fprintf(vmain, "\t\tif (i_ce)\n");
2967 2 dgisselq
        fprintf(vmain, "\t\tbegin\n");
2968
        fprintf(vmain, "\t\t\to_left  <= br_o_left;\n");
2969
        fprintf(vmain, "\t\t\to_right <= br_o_right;\n");
2970
        fprintf(vmain, "\t\tend\n");
2971
        fprintf(vmain, "\n\n");
2972
        fprintf(vmain, "endmodule\n");
2973
        fclose(vmain);
2974
 
2975 14 dgisselq
        {
2976
                std::string     fname;
2977 2 dgisselq
 
2978 14 dgisselq
                fname = coredir + "/butterfly.v";
2979 23 dgisselq
                build_butterfly(fname.c_str(), xtracbits, rounding);
2980 2 dgisselq
 
2981 22 dgisselq
                if (nummpy > 0) {
2982
                        fname = coredir + "/hwbfly.v";
2983 23 dgisselq
                        build_hwbfly(fname.c_str(), xtracbits, rounding);
2984 22 dgisselq
                }
2985
 
2986 29 dgisselq
                {
2987
                        // To make debugging easier, we build both of these
2988
                        fname = coredir + "/shiftaddmpy.v";
2989
                        build_multiply(fname.c_str());
2990 2 dgisselq
 
2991 29 dgisselq
                        fname = coredir + "/longbimpy.v";
2992
                        build_longbimpy(fname.c_str());
2993
                        fname = coredir + "/bimpy.v";
2994
                        build_bimpy(fname.c_str());
2995
                }
2996
 
2997 26 dgisselq
                if ((dbg)&&(dbgstage == 4)) {
2998
                        fname = coredir + "/qtrstage_dbg.v";
2999
                        build_quarters(fname.c_str(), rounding, true);
3000
                }
3001 14 dgisselq
                fname = coredir + "/qtrstage.v";
3002 26 dgisselq
                build_quarters(fname.c_str(), rounding, false);
3003 2 dgisselq
 
3004 26 dgisselq
                if ((dbg)&&(dbgstage == 2))
3005
                        fname = coredir + "/dblstage_dbg.v";
3006
                else
3007
                        fname = coredir + "/dblstage.v";
3008
                build_dblstage(fname.c_str(), rounding, (dbg)&&(dbgstage==2));
3009 14 dgisselq
 
3010
                if (bitreverse) {
3011
                        fname = coredir + "/dblreverse.v";
3012
                        build_dblreverse(fname.c_str());
3013
                }
3014 23 dgisselq
 
3015
                const   char    *rnd_string = "";
3016
                switch(rounding) {
3017
                        case RND_TRUNCATE:      rnd_string = "/truncate.v"; break;
3018
                        case RND_FROMZERO:      rnd_string = "/roundfromzero.v"; break;
3019
                        case RND_HALFUP:        rnd_string = "/roundhalfup.v"; break;
3020
                        default:
3021
                                rnd_string = "/convround.v"; break;
3022
                } fname = coredir + rnd_string;
3023
                switch(rounding) {
3024
                        case RND_TRUNCATE: build_truncator(fname.c_str()); break;
3025
                        case RND_FROMZERO: build_roundfromzero(fname.c_str()); break;
3026
                        case RND_HALFUP: build_roundhalfup(fname.c_str()); break;
3027
                        default:
3028
                                build_convround(fname.c_str()); break;
3029
                }
3030
 
3031 2 dgisselq
        }
3032
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.