OpenCores
URL https://opencores.org/ocsvn/dblclockfft/dblclockfft/trunk

Subversion Repositories dblclockfft

[/] [dblclockfft/] [trunk/] [sw/] [fftgen.cpp] - Blame information for rev 31

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 29 dgisselq
////////////////////////////////////////////////////////////////////////////////
2 16 dgisselq
//
3 24 dgisselq
// Filename:    fftgen.cpp
4 16 dgisselq
//
5
// Project:     A Doubletime Pipelined FFT
6
//
7
// Purpose:     This is the core generator for the project.  Every part
8
//              and piece of this project begins and ends in this program.
9
//              Once built, this program will build an FFT (or IFFT) core
10
//              of arbitrary width, precision, etc., that will run at
11
//              two samples per clock.  (Incidentally, I didn't pick two
12
//              samples per clock because it was easier, but rather because
13
//              there weren't any two-sample per clock FFT's posted on 
14
//              opencores.com.  Further, FFT's running at one sample per
15
//              clock aren't that hard to find.)
16
//
17
//              You can find the documentation for this program in two places.
18
//              One is in the usage() function below.  The second is in the
19
//              'doc'uments directory that comes with this package, 
20
//              specifically in the spec.pdf file.  If it's not there, type
21
//              make in the documents directory to build it.
22
//
23 31 dgisselq
//      20160123 - Thanks to Lesha Birukov, adjusted for MS Visual Studio 2012.
24
//              (Adjustments are at the top of the file ...)
25
//
26 16 dgisselq
// Creator:     Dan Gisselquist, Ph.D.
27 30 dgisselq
//              Gisselquist Technology, LLC
28 16 dgisselq
//
29 29 dgisselq
////////////////////////////////////////////////////////////////////////////////
30 16 dgisselq
//
31
// Copyright (C) 2015, Gisselquist Technology, LLC
32
//
33
// This program is free software (firmware): you can redistribute it and/or
34
// modify it under the terms of  the GNU General Public License as published
35
// by the Free Software Foundation, either version 3 of the License, or (at
36
// your option) any later version.
37
//
38
// This program is distributed in the hope that it will be useful, but WITHOUT
39
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
40
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
41
// for more details.
42
//
43
// You should have received a copy of the GNU General Public License along
44
// with this program.  (It's in the $(ROOT)/doc directory, run make with no
45
// target there if the PDF file isn't present.)  If not, see
46
// <http://www.gnu.org/licenses/> for a copy.
47
//
48
// License:     GPL, v3, as defined and found on www.gnu.org,
49
//              http://www.gnu.org/licenses/gpl.html
50
//
51
//
52 29 dgisselq
////////////////////////////////////////////////////////////////////////////////
53 16 dgisselq
//
54
//
55 31 dgisselq
#define _CRT_SECURE_NO_WARNINGS   //  ms vs 2012 doesn't like fopen
56 2 dgisselq
#include <stdio.h>
57
#include <stdlib.h>
58 31 dgisselq
 
59
#ifdef _MSC_VER //  added for ms vs compatibility
60
 
61
#include <io.h>
62
#include <direct.h>
63
#define _USE_MATH_DEFINES
64
#define R_OK    4       /* Test for read permission.  */
65
#define W_OK    2       /* Test for write permission.  */
66
#define X_OK    0       /* !!!!!! execute permission - unsupported in windows*/
67
#define F_OK    0       /* Test for existence.  */
68
 
69
#if _MSC_VER <= 1700
70
 
71
long long llround(double d) {
72
        if (d<0) return -(long long)(-d+0.5);
73
        else    return (long long)(d+0.5); }
74
int lstat(const char *filename, struct stat *buf) { return 1; };
75
#define S_ISDIR(A)      0
76
 
77
#else
78
 
79
#define lstat   _stat
80
#define S_ISDIR _S_IFDIR
81
 
82
#endif
83
 
84
#define mkdir(A,B)      _mkdir(A)
85
 
86
#define access _access
87
 
88
#else
89
// And for G++/Linux environment
90
 
91
#include <unistd.h>     // Defines the R_OK/W_OK/etc. macros
92 2 dgisselq
#include <sys/stat.h>
93 31 dgisselq
#endif
94
 
95 2 dgisselq
#include <string.h>
96 14 dgisselq
#include <string>
97 2 dgisselq
#include <math.h>
98
#include <ctype.h>
99
#include <assert.h>
100
 
101 26 dgisselq
#define DEF_NBITSIN     16
102
#define DEF_COREDIR     "fft-core"
103
#define DEF_XTRACBITS   4
104
#define DEF_NMPY        0
105
#define DEF_XTRAPBITS   0
106 29 dgisselq
#define USE_OLD_MULTIPLY        false
107 2 dgisselq
 
108 29 dgisselq
// To coordinate testing, it helps to have some defines in our header file that
109
// are common with the default parameters found within the various subroutines.
110
// We'll define those common parameters here.  These values, however, have no
111
// effect on anything other than bench testing.  They do, though, allow us to
112
// bench test exact copies of what is going on within the FFT when necessary
113
// in order to find problems.
114
// First, parameters for the new multiply based upon the bi-multiply structure
115
// (2-bits/2-tableau rows at a time).
116
#define TST_LONGBIMPY_AW        16
117
#define TST_LONGBIMPY_BW        20      // Leave undefined to match AW
118
 
119
//  We also include parameters for the shift add multiply
120
#define TST_SHIFTADDMPY_AW      16
121
#define TST_SHIFTADDMPY_BW      20      // Leave undefined to match AW
122
 
123
// Now for parameters matching the butterfly
124
#define TST_BUTTERFLY_IWIDTH    16
125
#define TST_BUTTERFLY_CWIDTH    20
126
#define TST_BUTTERFLY_OWIDTH    17
127
 
128
// Now for parameters matching the qtrstage
129
#define TST_QTRSTAGE_IWIDTH     16
130
#define TST_QTRSTAGE_LGWIDTH    8
131
 
132
// Parameters for the dblstage
133
#define TST_DBLSTAGE_IWIDTH     16
134
#define TST_DBLSTAGE_SHIFT      0
135
 
136
// Now for parameters matching the dblreverse stage
137
#define TST_DBLREVERSE_LGSIZE   5
138
 
139 23 dgisselq
typedef enum {
140
        RND_TRUNCATE, RND_FROMZERO, RND_HALFUP, RND_CONVERGENT
141
} ROUND_T;
142
 
143 2 dgisselq
const char      cpyleft[] =
144 29 dgisselq
"////////////////////////////////////////////////////////////////////////////////\n"
145 2 dgisselq
"//\n"
146
"// Copyright (C) 2015, Gisselquist Technology, LLC\n"
147
"//\n"
148
"// This program is free software (firmware): you can redistribute it and/or\n"
149
"// modify it under the terms of  the GNU General Public License as published\n"
150
"// by the Free Software Foundation, either version 3 of the License, or (at\n"
151
"// your option) any later version.\n"
152
"//\n"
153
"// This program is distributed in the hope that it will be useful, but WITHOUT\n"
154
"// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or\n"
155
"// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n"
156
"// for more details.\n"
157
"//\n"
158
"// You should have received a copy of the GNU General Public License along\n"
159 5 dgisselq
"// with this program.  (It's in the $(ROOT)/doc directory, run make with no\n"
160
"// target there if the PDF file isn\'t present.)  If not, see\n"
161
"// <http://www.gnu.org/licenses/> for a copy.\n"
162
"//\n"
163 2 dgisselq
"// License:    GPL, v3, as defined and found on www.gnu.org,\n"
164
"//             http://www.gnu.org/licenses/gpl.html\n"
165
"//\n"
166
"//\n"
167 29 dgisselq
"////////////////////////////////////////////////////////////////////////////////\n";
168 14 dgisselq
const char      prjname[] = "A Doubletime Pipelined FFT";
169 2 dgisselq
const char      creator[] =     "// Creator:    Dan Gisselquist, Ph.D.\n"
170 30 dgisselq
                                "//             Gisselquist Technology, LLC\n";
171 2 dgisselq
 
172
int     lgval(int vl) {
173
        int     lg;
174
 
175
        for(lg=1; (1<<lg) < vl; lg++)
176
                ;
177
        return lg;
178
}
179
 
180
int     nextlg(int vl) {
181
        int     r;
182
 
183
        for(r=1; r<vl; r<<=1)
184
                ;
185
        return r;
186
}
187
 
188 14 dgisselq
int     bflydelay(int nbits, int xtra) {
189 2 dgisselq
        int     cbits = nbits + xtra;
190 14 dgisselq
        int     delay;
191 29 dgisselq
 
192
        if (USE_OLD_MULTIPLY) {
193
                if (nbits+1<cbits)
194
                        delay = nbits+4;
195
                else
196
                        delay = cbits+3;
197
        } else {
198
                int     na=nbits+2, nb=cbits+1;
199
                if (nb<na) {
200
                        int tmp = nb;
201
                        nb = na; na = tmp;
202
                } delay = ((na)/2+(na&1)+2);
203
        }
204 14 dgisselq
        return delay;
205 2 dgisselq
}
206
 
207 14 dgisselq
int     lgdelay(int nbits, int xtra) {
208
        // The butterfly code needs to compare a valid address, of this
209
        // many bits, with an address two greater.  This guarantees we
210
        // have enough bits for that comparison.  We'll also end up with
211
        // more storage space to look for these values, but without a 
212
        // redesign that's just what we'll deal with.
213
        return lgval(bflydelay(nbits, xtra)+3);
214
}
215
 
216 23 dgisselq
void    build_truncator(const char *fname) {
217
        printf("TRUNCATING!\n");
218 2 dgisselq
        FILE    *fp = fopen(fname, "w");
219
        if (NULL == fp) {
220
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
221
                perror("O/S Err was:");
222
                return;
223
        }
224
 
225
        fprintf(fp,
226
"///////////////////////////////////////////////////////////////////////////\n"
227
"//\n"
228 23 dgisselq
"// Filename:   truncate.v\n"
229
"//             \n"
230
"// Project:    %s\n"
231
"//\n"
232
"// Purpose:    Truncation is one of several options that can be used\n"
233
"//             internal to the various FFT stages to drop bits from one \n"
234
"//             stage to the next.  In general, it is the simplest method\n"
235
"//             of dropping bits, since it requires only a bit selection.\n"
236
"//\n"
237
"//             This form of rounding isn\'t really that great for FFT\'s,\n"
238
"//             since it tends to produce a DC bias in the result.  (Other\n"
239
"//             less pronounced biases may also exist.)\n"
240
"//\n"
241
"//             This particular version also registers the output with the\n"
242
"//             clock, so there will be a delay of one going through this\n"
243
"//             module.  This will keep it in line with the other forms of\n"
244
"//             rounding that can be used.\n"
245
"//\n"
246
"//\n%s"
247
"//\n",
248
                prjname, creator);
249
 
250
        fprintf(fp, "%s", cpyleft);
251
        fprintf(fp,
252
"module truncate(i_clk, i_ce, i_val, o_val);\n"
253
        "\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"
254
        "\tinput\t\t\t\t\ti_clk, i_ce;\n"
255
        "\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"
256
        "\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"
257
"\n"
258
        "\talways @(posedge i_clk)\n"
259
                "\t\tif (i_ce)\n"
260
                "\t\t\to_val <= i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
261
"\n"
262
"endmodule\n");
263
}
264
 
265
 
266
void    build_roundhalfup(const char *fname) {
267
        FILE    *fp = fopen(fname, "w");
268
        if (NULL == fp) {
269
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
270
                perror("O/S Err was:");
271
                return;
272
        }
273
 
274
        fprintf(fp,
275
"///////////////////////////////////////////////////////////////////////////\n"
276
"//\n"
277
"// Filename:   roundhalfup.v\n"
278
"//             \n"
279
"// Project:    %s\n"
280
"//\n"
281
"// Purpose:    Rounding half up is the way I was always taught to round in\n"
282
"//             school.  A one half value is added to the result, and then\n"
283
"//             the result is truncated.  When used in an FFT, this produces\n"
284
"//             less bias than the truncation method, although a bias still\n"
285
"//             tends to remain.\n"
286
"//\n"
287
"//\n%s"
288
"//\n",
289
                prjname, creator);
290
 
291
        fprintf(fp, "%s", cpyleft);
292
        fprintf(fp,
293
"module roundhalfup(i_clk, i_ce, i_val, o_val);\n"
294
        "\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"
295
        "\tinput\t\t\t\t\ti_clk, i_ce;\n"
296
        "\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"
297
        "\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"
298
"\n"
299
        "\t// Let's deal with two cases to be as general as we can be here\n"
300
        "\t//\n"
301
        "\t//   1. The desired output would lose no bits at all\n"
302
        "\t//   2. One or more bits would be dropped, so the rounding is simply\n"
303
        "\t//\t\ta matter of adding one to the bit about to be dropped,\n"
304
        "\t//\t\tmoving all halfway and above numbers up to the next\n"
305
        "\t//\t\tvalue.\n"
306
        "\tgenerate\n"
307
        "\tif (IWID-SHIFT == OWID)\n"
308
        "\tbegin // No truncation or rounding, output drops no bits\n"
309
"\n"
310
                "\t\talways @(posedge i_clk)\n"
311
                        "\t\t\tif (i_ce)\to_val <= i_val[(IWID-SHIFT-1):0];\n"
312
"\n"
313
        "\tend else // if (IWID-SHIFT-1 >= OWID)\n"
314
        "\tbegin // Output drops one bit, can only add one or ... not.\n"
315
                "\t\twire\t[(OWID-1):0] truncated_value, rounded_up;\n"
316
                "\t\twire\t\t\tlast_valid_bit, first_lost_bit;\n"
317
                "\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
318 26 dgisselq
                "\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n"
319 23 dgisselq
                "\t\tassign\tfirst_lost_bit = i_val[(IWID-SHIFT-OWID-1)];\n"
320
"\n"
321
                "\t\talways @(posedge i_clk)\n"
322
                "\t\t\tif (i_ce)\n"
323
                "\t\t\tbegin\n"
324
                        "\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"
325
                        "\t\t\t\t\to_val <= truncated_value;\n"
326
                        "\t\t\t\telse\n"
327
                        "\t\t\t\t\to_val <= rounded_up; // even value\n"
328
                "\t\t\tend\n"
329
"\n"
330
        "\tend\n"
331
        "\tendgenerate\n"
332
"\n"
333
"endmodule\n");
334
}
335
 
336
void    build_roundfromzero(const char *fname) {
337
        FILE    *fp = fopen(fname, "w");
338
        if (NULL == fp) {
339
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
340
                perror("O/S Err was:");
341
                return;
342
        }
343
 
344
        fprintf(fp,
345
"///////////////////////////////////////////////////////////////////////////\n"
346
"//\n"
347
"// Filename:   roundfromzero.v\n"
348
"//             \n"
349
"// Project:    %s\n"
350
"//\n"
351
"// Purpose:    Truncation is one of several options that can be used\n"
352
"//             internal to the various FFT stages to drop bits from one \n"
353
"//             stage to the next.  In general, it is the simplest method\n"
354
"//             of dropping bits, since it requires only a bit selection.\n"
355
"//\n"
356
"//             This form of rounding isn\'t really that great for FFT\'s,\n"
357
"//             since it tends to produce a DC bias in the result.  (Other\n"
358
"//             less pronounced biases may also exist.)\n"
359
"//\n"
360
"//             This particular version also registers the output with the\n"
361
"//             clock, so there will be a delay of one going through this\n"
362
"//             module.  This will keep it in line with the other forms of\n"
363
"//             rounding that can be used.\n"
364
"//\n"
365
"//\n%s"
366
"//\n",
367
                prjname, creator);
368
 
369
        fprintf(fp, "%s", cpyleft);
370
        fprintf(fp,
371
"module convround(i_clk, i_ce, i_val, o_val);\n"
372
        "\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"
373
        "\tinput\t\t\t\t\ti_clk, i_ce;\n"
374
        "\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"
375
        "\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"
376
"\n"
377
        "\t// Let's deal with three cases to be as general as we can be here\n"
378
        "\t//\n"
379
        "\t//\t1. The desired output would lose no bits at all\n"
380
        "\t//\t2. One bit would be dropped, so the rounding is simply\n"
381
        "\t//\t\tadjusting the value to be the closer to zero in\n"
382
        "\t//\t\tcases of being halfway between two.  If identically\n"
383
        "\t//\t\tequal to a number, we just leave it as is.\n"
384
        "\t//\t3. Two or more bits would be dropped.  In this case, we round\n"
385
        "\t//\t\tnormally unless we are rounding a value of exactly\n"
386
        "\t//\t\thalfway between the two.  In the halfway case, we\n"
387
        "\t//\t\tround away from zero.\n"
388
        "\tgenerate\n"
389 28 dgisselq
        "\tif (IWID == OWID) // In this case, the shift is irrelevant and\n"
390
        "\tbegin // cannot be applied.  No truncation or rounding takes\n"
391
        "\t// effect here.\n"
392
"\n"
393
                "\t\talways @(posedge i_clk)\n"
394
                        "\t\t\tif (i_ce)\to_val <= i_val[(IWID-1):0];\n"
395
"\n"
396
        "\tend else if (IWID-SHIFT == OWID)\n"
397 23 dgisselq
        "\tbegin // No truncation or rounding, output drops no bits\n"
398
"\n"
399
                "\t\talways @(posedge i_clk)\n"
400
                        "\t\t\tif (i_ce)\to_val <= i_val[(IWID-SHIFT-1):0];\n"
401
"\n"
402
        "\tend else if (IWID-SHIFT-1 == OWID)\n"
403
        "\tbegin // Output drops one bit, can only add one or ... not.\n"
404
        "\t\twire\t[(OWID-1):0]\ttruncated_value, rounded_up;\n"
405
        "\t\twire\t\t\tsign_bit, first_lost_bit;\n"
406
        "\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
407 26 dgisselq
        "\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n"
408 23 dgisselq
        "\t\tassign\tfirst_lost_bit = i_val[0];\n"
409
        "\t\tassign\tsign_bit = i_val[(IWID-1)];\n"
410
"\n"
411
        "\t\talways @(posedge i_clk)\n"
412
                "\t\t\tif (i_ce)\n"
413
                "\t\t\tbegin\n"
414
                        "\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"
415
                                "\t\t\t\t\to_val <= truncated_value;\n"
416
                        "\t\t\t\telse if (sign_bit)\n"
417
                                "\t\t\t\t\to_val <= truncated_value;\n"
418
                        "\t\t\t\telse\n"
419
                                "\t\t\t\t\to_val <= rounded_up;\n"
420
                "\t\t\tend\n"
421
"\n"
422
        "\tend else // If there's more than one bit we are dropping\n"
423
        "\tbegin\n"
424
                "\t\twire\t[(OWID-1):0]\ttruncated_value, rounded_up;\n"
425
                "\t\twire\t\t\tsign_bit, first_lost_bit;\n"
426
                "\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
427 26 dgisselq
                "\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n"
428 23 dgisselq
                "\t\tassign\tfirst_lost_bit = i_val[(IWID-SHIFT-OWID-1)];\n"
429
                "\t\tassign\tsign_bit = i_val[(IWID-1)];\n"
430
"\n"
431
                "\t\twire\t[(IWID-SHIFT-OWID-2):0]\tother_lost_bits;\n"
432
                "\t\tassign\tother_lost_bits = i_val[(IWID-SHIFT-OWID-2):0];\n"
433
"\n"
434
                "\t\talways @(posedge i_clk)\n"
435
                        "\t\t\tif (i_ce)\n"
436
                        "\t\t\tbegin\n"
437
                        "\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"
438
                                "\t\t\t\t\to_val <= truncated_value;\n"
439
                        "\t\t\t\telse if (|other_lost_bits) // Round up to\n"
440
                                "\t\t\t\t\to_val <= rounded_up; // closest value\n"
441
                        "\t\t\t\telse if (sign_bit)\n"
442
                                "\t\t\t\t\to_val <= truncated_value;\n"
443
                        "\t\t\t\telse\n"
444
                                "\t\t\t\t\to_val <= rounded_up;\n"
445
                        "\t\t\tend\n"
446
        "\tend\n"
447
        "\tendgenerate\n"
448
"\n"
449
"endmodule\n");
450
}
451
 
452
void    build_convround(const char *fname) {
453
        FILE    *fp = fopen(fname, "w");
454
        if (NULL == fp) {
455
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
456
                perror("O/S Err was:");
457
                return;
458
        }
459
 
460
        fprintf(fp,
461
"///////////////////////////////////////////////////////////////////////////\n"
462
"//\n"
463
"// Filename:   convround.v\n"
464
"//             \n"
465
"// Project:    %s\n"
466
"//\n"
467
"// Purpose:    A convergent rounding routine, also known as banker\'s\n"
468
"//             rounding, Dutch rounding, Gaussian rounding, unbiased\n"
469
"//             rounding, or ... more, at least according to Wikipedia.\n"
470
"//\n"
471
"//             This form of rounding works by rounding, when the direction\n"
472
"//             is in question, towards the nearest even value.\n"
473
"//\n"
474
"//\n%s"
475
"//\n",
476
                prjname, creator);
477
 
478
        fprintf(fp, "%s", cpyleft);
479
        fprintf(fp,
480
"module convround(i_clk, i_ce, i_val, o_val);\n"
481
"\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"
482
"\tinput\t\t\t\t\ti_clk, i_ce;\n"
483
"\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"
484
"\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"
485
"\n"
486
"\t// Let's deal with three cases to be as general as we can be here\n"
487
"\t//\n"
488
"\t//\t1. The desired output would lose no bits at all\n"
489
"\t//\t2. One bit would be dropped, so the rounding is simply\n"
490
"\t//\t\tadjusting the value to be the nearest even number in\n"
491
"\t//\t\tcases of being halfway between two.  If identically\n"
492
"\t//\t\tequal to a number, we just leave it as is.\n"
493
"\t//\t3. Two or more bits would be dropped.  In this case, we round\n"
494
"\t//\t\tnormally unless we are rounding a value of exactly\n"
495
"\t//\t\thalfway between the two.  In the halfway case we round\n"
496
"\t//\t\tto the nearest even number.\n"
497
"\tgenerate\n"
498 28 dgisselq
        "\tif (IWID == OWID) // In this case, the shift is irrelevant and\n"
499
        "\tbegin // cannot be applied.  No truncation or rounding takes\n"
500
        "\t// effect here.\n"
501
"\n"
502
                "\t\talways @(posedge i_clk)\n"
503
                        "\t\t\tif (i_ce)\to_val <= i_val[(IWID-1):0];\n"
504
"\n"
505
"\tend else if (IWID-SHIFT == OWID)\n"
506 23 dgisselq
"\tbegin // No truncation or rounding, output drops no bits\n"
507
"\n"
508
"\t\talways @(posedge i_clk)\n"
509
"\t\t\tif (i_ce)\to_val <= i_val[(IWID-SHIFT-1):0];\n"
510
"\n"
511
"\tend else if (IWID-SHIFT-1 == OWID)\n"
512
"\tbegin // Output drops one bit, can only add one or ... not.\n"
513
"\t\twire\t[(OWID-1):0] truncated_value, rounded_up;\n"
514
"\t\twire\t\t\tlast_valid_bit, first_lost_bit;\n"
515
"\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
516 26 dgisselq
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n"
517 23 dgisselq
"\t\tassign\tlast_valid_bit = truncated_value[0];\n"
518
"\t\tassign\tfirst_lost_bit = i_val[0];\n"
519
"\n"
520
"\t\talways @(posedge i_clk)\n"
521
"\t\t\tif (i_ce)\n"
522
"\t\t\tbegin\n"
523
"\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"
524
"\t\t\t\t\to_val <= truncated_value;\n"
525
"\t\t\t\telse if (last_valid_bit)// Round up to nearest\n"
526
"\t\t\t\t\to_val <= rounded_up; // even value\n"
527
"\t\t\t\telse // else round down to the nearest\n"
528
"\t\t\t\t\to_val <= truncated_value; // even value\n"
529
"\t\t\tend\n"
530
"\n"
531
"\tend else // If there's more than one bit we are dropping\n"
532
"\tbegin\n"
533
"\t\twire\t[(OWID-1):0] truncated_value, rounded_up;\n"
534
"\t\twire\t\t\tlast_valid_bit, first_lost_bit;\n"
535
"\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
536 26 dgisselq
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n"
537 23 dgisselq
"\t\tassign\tlast_valid_bit = truncated_value[0];\n"
538
"\t\tassign\tfirst_lost_bit = i_val[(IWID-SHIFT-OWID-1)];\n"
539
"\n"
540
"\t\twire\t[(IWID-SHIFT-OWID-2):0]\tother_lost_bits;\n"
541
"\t\tassign\tother_lost_bits = i_val[(IWID-SHIFT-OWID-2):0];\n"
542
"\n"
543
"\t\talways @(posedge i_clk)\n"
544
"\t\t\tif (i_ce)\n"
545
"\t\t\tbegin\n"
546
"\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"
547
"\t\t\t\t\to_val <= truncated_value;\n"
548
"\t\t\t\telse if (|other_lost_bits) // Round up to\n"
549
"\t\t\t\t\to_val <= rounded_up; // closest value\n"
550
"\t\t\t\telse if (last_valid_bit) // Round up to\n"
551
"\t\t\t\t\to_val <= rounded_up; // nearest even\n"
552
"\t\t\t\telse   // else round down to nearest even\n"
553
"\t\t\t\t\to_val <= truncated_value;\n"
554
"\t\t\tend\n"
555
"\tend\n"
556
"\tendgenerate\n"
557
"\n"
558
"endmodule\n");
559
}
560
 
561 26 dgisselq
void    build_quarters(const char *fname, ROUND_T rounding, bool dbg=false) {
562 23 dgisselq
        FILE    *fp = fopen(fname, "w");
563
        if (NULL == fp) {
564
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
565
                perror("O/S Err was:");
566
                return;
567
        }
568
        const   char    *rnd_string;
569
        if (rounding == RND_TRUNCATE)
570
                rnd_string = "truncate";
571
        else if (rounding == RND_FROMZERO)
572
                rnd_string = "roundfromzero";
573
        else if (rounding == RND_HALFUP)
574
                rnd_string = "roundhalfup";
575
        else
576
                rnd_string = "convround";
577
 
578
 
579
        fprintf(fp,
580
"///////////////////////////////////////////////////////////////////////////\n"
581
"//\n"
582 26 dgisselq
"// Filename:   qtrstage%s.v\n"
583 2 dgisselq
"//             \n"
584
"// Project:    %s\n"
585
"//\n"
586 5 dgisselq
"// Purpose:    This file encapsulates the 4 point stage of a decimation in\n"
587
"//             frequency FFT.  This particular implementation is optimized\n"
588
"//             so that all of the multiplies are accomplished by additions\n"
589
"//             and multiplexers only.\n"
590
"//\n"
591 2 dgisselq
"//\n%s"
592
"//\n",
593 26 dgisselq
                (dbg)?"_dbg":"", prjname, creator);
594 2 dgisselq
        fprintf(fp, "%s", cpyleft);
595
 
596
        fprintf(fp,
597 26 dgisselq
"module\tqtrstage%s(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync%s);\n"
598 29 dgisselq
        "\tparameter    IWIDTH=%d, OWIDTH=IWIDTH+1;\n"
599 5 dgisselq
        "\t// Parameters specific to the core that should be changed when this\n"
600
        "\t// core is built ... Note that the minimum LGSPAN is 2.  Smaller \n"
601
        "\t// spans must use the fftdoubles stage.\n"
602 29 dgisselq
        "\tparameter\tLGWIDTH=%d, ODD=0, INVERSE=0,SHIFT=0;\n"
603 5 dgisselq
        "\tinput\t                              i_clk, i_rst, i_ce, i_sync;\n"
604
        "\tinput\t      [(2*IWIDTH-1):0]        i_data;\n"
605
        "\toutput\treg  [(2*OWIDTH-1):0]        o_data;\n"
606
        "\toutput\treg                          o_sync;\n"
607 29 dgisselq
        "\t\n", (dbg)?"_dbg":"", (dbg)?", o_dbg":"", TST_QTRSTAGE_IWIDTH,
608
        TST_QTRSTAGE_LGWIDTH);
609 26 dgisselq
        if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"
610
                "\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2*OWIDTH-1):(2*OWIDTH-16)],\n"
611
                        "\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n"
612
"\n");
613
        }
614 14 dgisselq
        fprintf(fp,
615 5 dgisselq
        "\treg\t        wait_for_sync;\n"
616 23 dgisselq
        "\treg\t[3:0]   pipeline;\n"
617 2 dgisselq
"\n"
618 5 dgisselq
        "\treg\t[(IWIDTH):0]    sum_r, sum_i, diff_r, diff_i;\n"
619 2 dgisselq
"\n"
620 23 dgisselq
        "\treg\t[(2*OWIDTH-1):0]\tob_a;\n"
621
        "\twire\t[(2*OWIDTH-1):0]\tob_b;\n"
622
        "\treg\t[(OWIDTH-1):0]\t\tob_b_r, ob_b_i;\n"
623
        "\tassign\tob_b = { ob_b_r, ob_b_i };\n"
624 2 dgisselq
"\n"
625 23 dgisselq
        "\treg\t[(LGWIDTH-1):0]\t\tiaddr;\n"
626
        "\treg\t[(2*IWIDTH-1):0]\timem;\n"
627 2 dgisselq
"\n"
628 5 dgisselq
        "\twire\tsigned\t[(IWIDTH-1):0]\timem_r, imem_i;\n"
629
        "\tassign\timem_r = imem[(2*IWIDTH-1):(IWIDTH)];\n"
630
        "\tassign\timem_i = imem[(IWIDTH-1):0];\n"
631 2 dgisselq
"\n"
632 5 dgisselq
        "\twire\tsigned\t[(IWIDTH-1):0]\ti_data_r, i_data_i;\n"
633
        "\tassign\ti_data_r = i_data[(2*IWIDTH-1):(IWIDTH)];\n"
634
        "\tassign\ti_data_i = i_data[(IWIDTH-1):0];\n"
635 2 dgisselq
"\n"
636 5 dgisselq
        "\treg  [(2*OWIDTH-1):0]        omem;\n"
637 14 dgisselq
"\n");
638
        fprintf(fp,
639 23 dgisselq
        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_sum_r, rnd_sum_i, rnd_diff_r, rnd_diff_i,\n");
640
        fprintf(fp,
641
        "\t\t\t\t\tn_rnd_diff_r, n_rnd_diff_i;\n");
642
        fprintf(fp,
643 26 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_r(i_clk, i_ce,\n"
644 23 dgisselq
        "\t\t\t\tsum_r, rnd_sum_r);\n\n", rnd_string);
645
        fprintf(fp,
646 26 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_i(i_clk, i_ce,\n"
647 23 dgisselq
        "\t\t\t\tsum_i, rnd_sum_i);\n\n", rnd_string);
648
        fprintf(fp,
649 26 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_r(i_clk, i_ce,\n"
650 23 dgisselq
        "\t\t\t\tdiff_r, rnd_diff_r);\n\n", rnd_string);
651
        fprintf(fp,
652 26 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_i(i_clk, i_ce,\n"
653 23 dgisselq
        "\t\t\t\tdiff_i, rnd_diff_i);\n\n", rnd_string);
654
        fprintf(fp, "\tassign n_rnd_diff_r = - rnd_diff_r;\n"
655
                "\tassign n_rnd_diff_i = - rnd_diff_i;\n");
656
/*
657
        fprintf(fp,
658 5 dgisselq
        "\twire [(IWIDTH-1):0]  rnd;\n"
659 9 dgisselq
        "\tgenerate\n"
660
        "\tif ((ROUND)&&((IWIDTH+1-OWIDTH-SHIFT)>0))\n"
661 26 dgisselq
                "\t\tassign rnd = { {(IWIDTH-1){1\'b0}}, 1\'b1 };\n"
662 9 dgisselq
        "\telse\n"
663 26 dgisselq
                "\t\tassign rnd = { {(IWIDTH){1\'b0}}};\n"
664 9 dgisselq
        "\tendgenerate\n"
665 2 dgisselq
"\n"
666 23 dgisselq
*/
667
        fprintf(fp,
668 25 dgisselq
        "\tinitial wait_for_sync = 1\'b1;\n"
669
        "\tinitial iaddr = 0;\n"
670 5 dgisselq
        "\talways @(posedge i_clk)\n"
671
                "\t\tif (i_rst)\n"
672
                "\t\tbegin\n"
673 26 dgisselq
                        "\t\t\twait_for_sync <= 1\'b1;\n"
674 5 dgisselq
                        "\t\t\tiaddr <= 0;\n"
675 23 dgisselq
                "\t\tend else if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"
676 5 dgisselq
                "\t\tbegin\n"
677 26 dgisselq
                        "\t\t\tiaddr <= iaddr + { {(LGWIDTH-1){1\'b0}}, 1\'b1 };\n"
678
                        "\t\t\twait_for_sync <= 1\'b0;\n"
679
                "\t\tend\n"
680
        "\talways @(posedge i_clk)\n"
681
                "\t\tif (i_ce)\n"
682 5 dgisselq
                        "\t\t\timem <= i_data;\n"
683 26 dgisselq
                "\n\n");
684 23 dgisselq
        fprintf(fp,
685
        "\t// Note that we don\'t check on wait_for_sync or i_sync here.\n"
686
        "\t// Why not?  Because iaddr will always be zero until after the\n"
687
        "\t// first i_ce, so we are safe.\n"
688 25 dgisselq
        "\tinitial pipeline = 4\'h0;\n"
689 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
690
                "\t\tif (i_rst)\n"
691 26 dgisselq
                        "\t\t\tpipeline <= 4\'h0;\n"
692 23 dgisselq
                "\t\telse if (i_ce) // is our pipeline process full?  Which stages?\n"
693
                        "\t\t\tpipeline <= { pipeline[2:0], iaddr[0] };\n\n");
694
        fprintf(fp,
695
        "\t// This is the pipeline[-1] stage, pipeline[0] will be set next.\n"
696
        "\talways\t@(posedge i_clk)\n"
697
                "\t\tif ((i_ce)&&(iaddr[0]))\n"
698
                "\t\tbegin\n"
699
                        "\t\t\tsum_r  <= imem_r + i_data_r;\n"
700
                        "\t\t\tsum_i  <= imem_i + i_data_i;\n"
701
                        "\t\t\tdiff_r <= imem_r - i_data_r;\n"
702
                        "\t\t\tdiff_i <= imem_i - i_data_i;\n"
703
                "\t\tend\n\n");
704
        fprintf(fp,
705
        "\t// pipeline[1] takes sum_x and diff_x and produces rnd_x\n\n");
706
        fprintf(fp,
707 26 dgisselq
        "\t// Now for pipeline[2].  We can actually do this at all i_ce\n"
708
        "\t// clock times, since nothing will listen unless pipeline[3]\n"
709
        "\t// on the next clock.  Thus, we simplify this logic and do\n"
710
        "\t// it independent of pipeline[2].\n"
711 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
712 26 dgisselq
                "\t\tif (i_ce)\n"
713 23 dgisselq
                "\t\tbegin\n"
714
                        "\t\t\tob_a <= { rnd_sum_r, rnd_sum_i };\n"
715
                        "\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"
716
                        "\t\t\tif (ODD == 0)\n"
717 5 dgisselq
                        "\t\t\tbegin\n"
718 23 dgisselq
                        "\t\t\t\tob_b_r <= rnd_diff_r;\n"
719
                        "\t\t\t\tob_b_i <= rnd_diff_i;\n"
720
                        "\t\t\tend else if (INVERSE==0) begin\n"
721
                        "\t\t\t\t// on Odd, W = e^{-j2pi 1/4} = -j\n"
722
                        "\t\t\t\tob_b_r <=   rnd_diff_i;\n"
723
                        "\t\t\t\tob_b_i <= n_rnd_diff_r;\n"
724
                        "\t\t\tend else begin\n"
725
                        "\t\t\t\t// on Odd, W = e^{j2pi 1/4} = j\n"
726
                        "\t\t\t\tob_b_r <= n_rnd_diff_i;\n"
727
                        "\t\t\t\tob_b_i <=   rnd_diff_r;\n"
728 5 dgisselq
                        "\t\t\tend\n"
729 23 dgisselq
                "\t\tend\n\n");
730
        fprintf(fp,
731
        "\talways\t@(posedge i_clk)\n"
732
                "\t\tif (i_ce)\n"
733
                "\t\tbegin // In sequence, clock = 3\n"
734
                        "\t\t\tif (pipeline[3])\n"
735 5 dgisselq
                        "\t\t\tbegin\n"
736
                                "\t\t\t\tomem <= ob_b;\n"
737
                                "\t\t\t\to_data <= ob_a;\n"
738
                        "\t\t\tend else\n"
739
                                "\t\t\t\to_data <= omem;\n"
740 23 dgisselq
                "\t\tend\n\n");
741
 
742
        fprintf(fp,
743
        "\t// Don\'t forget in the sync check that we are running\n"
744
        "\t// at two clocks per sample.  Thus we need to\n"
745
        "\t// produce a sync every 2^(LGWIDTH-1) clocks.\n"
746 26 dgisselq
        "\tinitial\to_sync = 1\'b0;\n"
747 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
748 26 dgisselq
                "\t\tif (i_rst)\n"
749
                "\t\t\to_sync <= 1\'b0;\n"
750
                "\t\telse if (i_ce)\n"
751 23 dgisselq
                        "\t\t\to_sync <= &(~iaddr[(LGWIDTH-2):3]) && (iaddr[2:0] == 3'b101);\n");
752
        fprintf(fp, "endmodule\n");
753 2 dgisselq
}
754
 
755 26 dgisselq
void    build_dblstage(const char *fname, ROUND_T rounding, const bool dbg = false) {
756 2 dgisselq
        FILE    *fp = fopen(fname, "w");
757
        if (NULL == fp) {
758
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
759
                perror("O/S Err was:");
760
                return;
761
        }
762
 
763 23 dgisselq
        const   char    *rnd_string;
764
        if (rounding == RND_TRUNCATE)
765
                rnd_string = "truncate";
766
        else if (rounding == RND_FROMZERO)
767
                rnd_string = "roundfromzero";
768
        else if (rounding == RND_HALFUP)
769
                rnd_string = "roundhalfup";
770
        else
771
                rnd_string = "convround";
772
 
773
 
774 2 dgisselq
        fprintf(fp,
775
"///////////////////////////////////////////////////////////////////////////\n"
776
"//\n"
777 26 dgisselq
"// Filename:   dblstage%s.v\n"
778 2 dgisselq
"//\n"
779
"// Project:    %s\n"
780
"//\n"
781
"// Purpose:    This is part of an FPGA implementation that will process\n"
782 5 dgisselq
"//             the final stage of a decimate-in-frequency FFT, running\n"
783
"//             through the data at two samples per clock.  If you notice\n"
784
"//             from the derivation of an FFT, the only time both even and\n"
785
"//             odd samples are used at the same time is in this stage.\n"
786
"//             Therefore, other than this stage and these twiddles, all of\n"
787
"//             the other stages can run two stages at a time at one sample\n"
788
"//             per clock.\n"
789 2 dgisselq
"//\n"
790
"//             In this implementation, the output is valid one clock after\n"
791
"//             the input is valid.  The output also accumulates one bit\n"
792
"//             above and beyond the number of bits in the input.\n"
793
"//             \n"
794
"//             i_clk   A system clock\n"
795 6 dgisselq
"//             i_rst   A synchronous reset\n"
796 2 dgisselq
"//             i_ce    Circuit enable--nothing happens unless this line is high\n"
797 6 dgisselq
"//             i_sync  A synchronization signal, high once per FFT at the start\n"
798 2 dgisselq
"//             i_left  The first (even) complex sample input.  The higher order\n"
799
"//                     bits contain the real portion, low order bits the\n"
800
"//                     imaginary portion, all in two\'s complement.\n"
801
"//             i_right The next (odd) complex sample input, same format as\n"
802
"//                     i_left.\n"
803
"//             o_left  The first (even) complex output.\n"
804
"//             o_right The next (odd) complex output.\n"
805 6 dgisselq
"//             o_sync  Output synchronization signal.\n"
806 2 dgisselq
"//\n%s"
807 26 dgisselq
"//\n", (dbg)?"_dbg":"", prjname, creator);
808 2 dgisselq
 
809
        fprintf(fp, "%s", cpyleft);
810
        fprintf(fp,
811 26 dgisselq
"module\tdblstage%s(i_clk, i_rst, i_ce, i_sync, i_left, i_right, o_left, o_right, o_sync%s);\n"
812 29 dgisselq
        "\tparameter\tIWIDTH=%d,OWIDTH=IWIDTH+1, SHIFT=%d;\n"
813 6 dgisselq
        "\tinput\t\ti_clk, i_rst, i_ce, i_sync;\n"
814 5 dgisselq
        "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"
815 28 dgisselq
        "\toutput\treg\t[(2*OWIDTH-1):0]\to_left, o_right;\n"
816 6 dgisselq
        "\toutput\treg\t\t\to_sync;\n"
817 29 dgisselq
        "\n", (dbg)?"_dbg":"", (dbg)?", o_dbg":"",
818
        TST_DBLSTAGE_IWIDTH, TST_DBLSTAGE_SHIFT);
819 26 dgisselq
 
820
        if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"
821
                "\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_left[(2*OWIDTH-1):(2*OWIDTH-16)],\n"
822
                        "\t\t\t\t\to_left[(OWIDTH-1):(OWIDTH-16)] };\n"
823
"\n");
824
        }
825 19 dgisselq
        fprintf(fp,
826 5 dgisselq
        "\twire\tsigned\t[(IWIDTH-1):0]\ti_in_0r, i_in_0i, i_in_1r, i_in_1i;\n"
827
        "\tassign\ti_in_0r = i_left[(2*IWIDTH-1):(IWIDTH)]; \n"
828
        "\tassign\ti_in_0i = i_left[(IWIDTH-1):0]; \n"
829
        "\tassign\ti_in_1r = i_right[(2*IWIDTH-1):(IWIDTH)]; \n"
830
        "\tassign\ti_in_1i = i_right[(IWIDTH-1):0]; \n"
831
        "\twire\t[(OWIDTH-1):0]\t\to_out_0r, o_out_0i,\n"
832
                                "\t\t\t\t\to_out_1r, o_out_1i;\n"
833 2 dgisselq
"\n"
834 15 dgisselq
"\n"
835 19 dgisselq
        "\t// Handle a potential rounding situation, when IWIDTH>=OWIDTH.\n"
836 15 dgisselq
"\n"
837 23 dgisselq
"\n");
838
        fprintf(fp,
839 26 dgisselq
        "\n"
840
        "\t// As with any register connected to the sync pulse, these must\n"
841
        "\t// have initial values and be reset on the i_rst signal.\n"
842
        "\t// Other data values need only restrict their updates to i_ce\n"
843
        "\t// enabled clocks, but sync\'s must obey resets and initial\n"
844
        "\t// conditions as well.\n"
845 28 dgisselq
        "\treg\trnd_sync, r_sync;\n"
846 2 dgisselq
"\n"
847 28 dgisselq
        "\tinitial\trnd_sync      = 1\'b0; // Sync into rounding\n"
848
        "\tinitial\tr_sync        = 1\'b0; // Sync coming out\n"
849 5 dgisselq
        "\talways @(posedge i_clk)\n"
850 6 dgisselq
                "\t\tif (i_rst)\n"
851 23 dgisselq
                "\t\tbegin\n"
852 26 dgisselq
                        "\t\t\trnd_sync <= 1\'b0;\n"
853 28 dgisselq
                        "\t\t\tr_sync <= 1\'b0;\n"
854
                "\t\tend else if (i_ce)\n"
855 5 dgisselq
                "\t\tbegin\n"
856 26 dgisselq
                        "\t\t\trnd_sync <= i_sync;\n"
857 28 dgisselq
                        "\t\t\tr_sync <= rnd_sync;\n"
858 26 dgisselq
                "\t\tend\n"
859
"\n"
860
        "\t// As with other variables, these are really only updated when in\n"
861
        "\t// the processing pipeline, after the first i_sync.  However, to\n"
862
        "\t// eliminate as much unnecessary logic as possible, we toggle\n"
863 28 dgisselq
        "\t// these any time the i_ce line is enabled, and don\'t reset.\n"
864
        "\t// them on i_rst.\n");
865
        fprintf(fp,
866
        "\t// Don't forget that we accumulate a bit by adding two values\n"
867
        "\t// together. Therefore our intermediate value must have one more\n"
868
        "\t// bit than the two originals.\n"
869
        "\treg\tsigned\t[(IWIDTH):0]\trnd_in_0r, rnd_in_0i;\n"
870
        "\treg\tsigned\t[(IWIDTH):0]\trnd_in_1r, rnd_in_1i;\n\n"
871 26 dgisselq
        "\talways @(posedge i_clk)\n"
872
                "\t\tif (i_ce)\n"
873
                "\t\tbegin\n"
874
                        "\t\t\t//\n"
875 23 dgisselq
                        "\t\t\trnd_in_0r <= i_in_0r + i_in_1r;\n"
876
                        "\t\t\trnd_in_0i <= i_in_0i + i_in_1i;\n"
877 5 dgisselq
                        "\t\t\t//\n"
878 23 dgisselq
                        "\t\t\trnd_in_1r <= i_in_0r - i_in_1r;\n"
879
                        "\t\t\trnd_in_1i <= i_in_0i - i_in_1i;\n"
880 6 dgisselq
                        "\t\t\t//\n"
881 5 dgisselq
                "\t\tend\n"
882 28 dgisselq
"\n");
883
        fprintf(fp,
884
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_0r(i_clk, i_ce,\n"
885
        "\t\t\t\t\t\t\trnd_in_0r, o_out_0r);\n\n", rnd_string);
886
        fprintf(fp,
887
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_0i(i_clk, i_ce,\n"
888
        "\t\t\t\t\t\t\trnd_in_0i, o_out_0i);\n\n", rnd_string);
889
        fprintf(fp,
890
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_1r(i_clk, i_ce,\n"
891
        "\t\t\t\t\t\t\trnd_in_1r, o_out_1r);\n\n", rnd_string);
892
        fprintf(fp,
893
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_1i(i_clk, i_ce,\n"
894
        "\t\t\t\t\t\t\trnd_in_1i, o_out_1i);\n\n", rnd_string);
895
 
896
        fprintf(fp, "\n"
897
        "\t// Prior versions of this routine did not include the extra\n"
898
        "\t// clock and register/flip-flops that this routine requires.\n"
899
        "\t// These are placed in here to correct a bug in Verilator, that\n"
900
        "\t// otherwise struggles.  (Hopefully this will fix the problem ...)\n"
901
        "\talways @(posedge i_clk)\n"
902
                "\t\tif (i_ce)\n"
903
                "\t\tbegin\n"
904
                        "\t\t\to_left  <= { o_out_0r, o_out_0i };\n"
905
                        "\t\t\to_right <= { o_out_1r, o_out_1i };\n"
906
                "\t\tend\n"
907 2 dgisselq
"\n"
908 28 dgisselq
        "\tinitial\to_sync = 1'b0; // Final sync coming out of module\n"
909
        "\talways @(posedge i_clk)\n"
910
                "\t\tif (i_rst)\n"
911
                "\t\t\to_sync <= 1'b0;\n"
912
                "\t\telse if (i_ce)\n"
913
                "\t\t\to_sync <= r_sync;\n"
914 2 dgisselq
"\n"
915
"endmodule\n");
916
        fclose(fp);
917
}
918
 
919
void    build_multiply(const char *fname) {
920
        FILE    *fp = fopen(fname, "w");
921
        if (NULL == fp) {
922
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
923
                perror("O/S Err was:");
924
                return;
925
        }
926
 
927
        fprintf(fp,
928
"///////////////////////////////////////////////////////////////////////////\n"
929
"//\n"
930
"// Filename:   shiftaddmpy.v\n"
931
"//\n"
932
"// Project:    %s\n"
933
"//\n"
934
"// Purpose:    A portable shift and add multiply.\n"
935
"//\n"
936
"//             While both Xilinx and Altera will offer single clock \n"
937
"//             multiplies, this simple approach will multiply two numbers\n"
938
"//             on any architecture.  The result maintains the full width\n"
939
"//             of the multiply, there are no extra stuff bits, no rounding,\n"
940
"//             no shifted bits, etc.\n"
941
"//\n"
942
"//             Further, for those applications that can support it, this\n"
943
"//             multiply is pipelined and will produce one answer per clock.\n"
944
"//\n"
945
"//             For minimal processing delay, make the first parameter\n"
946
"//             the one with the least bits, so that AWIDTH <= BWIDTH.\n"
947
"//\n"
948
"//             The processing delay in this multiply is (AWIDTH+1) cycles.\n"
949
"//             That is, if the data is present on the input at clock t=0,\n"
950
"//             the result will be present on the output at time t=AWIDTH+1;\n"
951
"//\n"
952
"//\n%s"
953
"//\n", prjname, creator);
954
 
955
        fprintf(fp, "%s", cpyleft);
956
        fprintf(fp,
957
"module shiftaddmpy(i_clk, i_ce, i_a, i_b, o_r);\n"
958 29 dgisselq
        "\tparameter\tAWIDTH=%d,BWIDTH=", TST_SHIFTADDMPY_AW);
959
#ifdef  TST_SHIFTADDMPY_BW
960
        fprintf(fp, "%d;\n", TST_SHIFTADDMPY_BW);
961
#else
962
        fprintf(fp, "AWIDTH;\n");
963
#endif
964
        fprintf(fp,
965 2 dgisselq
        "\tinput\t\t\t\t\ti_clk, i_ce;\n"
966
        "\tinput\t\t[(AWIDTH-1):0]\t\ti_a;\n"
967
        "\tinput\t\t[(BWIDTH-1):0]\t\ti_b;\n"
968
        "\toutput\treg\t[(AWIDTH+BWIDTH-1):0]\to_r;\n"
969
"\n"
970
        "\treg\t[(AWIDTH-1):0]\tu_a;\n"
971
        "\treg\t[(BWIDTH-1):0]\tu_b;\n"
972
        "\treg\t\t\tsgn;\n"
973
"\n"
974
        "\treg\t[(AWIDTH-2):0]\t\tr_a[0:(AWIDTH-1)];\n"
975
        "\treg\t[(AWIDTH+BWIDTH-2):0]\tr_b[0:(AWIDTH-1)];\n"
976
        "\treg\t\t\t\tr_s[0:(AWIDTH-1)];\n"
977
        "\treg\t[(AWIDTH+BWIDTH-1):0]\tacc[0:(AWIDTH-1)];\n"
978
        "\tgenvar k;\n"
979
"\n"
980 5 dgisselq
        "\t// If we were forced to stay within two\'s complement arithmetic,\n"
981
        "\t// taking the absolute value here would require an additional bit.\n"
982
        "\t// However, because our results are now unsigned, we can stay\n"
983
        "\t// within the number of bits given (for now).\n"
984 2 dgisselq
        "\talways @(posedge i_clk)\n"
985
                "\t\tif (i_ce)\n"
986
                "\t\tbegin\n"
987
                        "\t\t\tu_a <= (i_a[AWIDTH-1])?(-i_a):(i_a);\n"
988
                        "\t\t\tu_b <= (i_b[BWIDTH-1])?(-i_b):(i_b);\n"
989
                        "\t\t\tsgn <= i_a[AWIDTH-1] ^ i_b[BWIDTH-1];\n"
990
                "\t\tend\n"
991
"\n"
992
        "\talways @(posedge i_clk)\n"
993
                "\t\tif (i_ce)\n"
994
                "\t\tbegin\n"
995 26 dgisselq
                        "\t\t\tacc[0] <= (u_a[0]) ? { {(AWIDTH){1\'b0}}, u_b }\n"
996
                        "\t\t\t\t\t: {(AWIDTH+BWIDTH){1\'b0}};\n"
997 2 dgisselq
                        "\t\t\tr_a[0] <= { u_a[(AWIDTH-1):1] };\n"
998 26 dgisselq
                        "\t\t\tr_b[0] <= { {(AWIDTH-1){1\'b0}}, u_b };\n"
999 2 dgisselq
                        "\t\t\tr_s[0] <= sgn; // The final sign, needs to be preserved\n"
1000
                "\t\tend\n"
1001
"\n"
1002
        "\tgenerate\n"
1003 21 dgisselq
        "\tfor(k=0; k<AWIDTH-1; k=k+1)\n"
1004 25 dgisselq
        "\tbegin : genstages\n"
1005 21 dgisselq
                "\t\talways @(posedge i_clk)\n"
1006
                "\t\tif (i_ce)\n"
1007 2 dgisselq
                "\t\tbegin\n"
1008 26 dgisselq
                        "\t\t\tacc[k+1] <= acc[k] + ((r_a[k][0]) ? {r_b[k],1\'b0}:0);\n"
1009
                        "\t\t\tr_a[k+1] <= { 1\'b0, r_a[k][(AWIDTH-2):1] };\n"
1010
                        "\t\t\tr_b[k+1] <= { r_b[k][(AWIDTH+BWIDTH-3):0], 1\'b0};\n"
1011 2 dgisselq
                        "\t\t\tr_s[k+1] <= r_s[k];\n"
1012
                "\t\tend\n"
1013
        "\tend\n"
1014
        "\tendgenerate\n"
1015
"\n"
1016
        "\talways @(posedge i_clk)\n"
1017
                "\t\tif (i_ce)\n"
1018
                        "\t\t\to_r <= (r_s[AWIDTH-1]) ? (-acc[AWIDTH-1]) : acc[AWIDTH-1];\n"
1019
"\n"
1020
"endmodule\n");
1021
 
1022
        fclose(fp);
1023
}
1024
 
1025 29 dgisselq
void    build_bimpy(const char *fname) {
1026
        FILE    *fp = fopen(fname, "w");
1027
        if (NULL == fp) {
1028
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
1029
                perror("O/S Err was:");
1030
                return;
1031
        }
1032
 
1033
        fprintf(fp,
1034
"////////////////////////////////////////////////////////////////////////////////\n"
1035
"//\n"
1036
"// Filename:   %s\n"
1037
"//\n"
1038
"// Project:    %s\n"
1039
"//\n"
1040
"// Purpose:    A simple 2-bit multiply based upon the fact that LUT's allow\n"
1041
"//             6-bits of input.  In other words, I could build a 3-bit\n"
1042
"//             multiply from 6 LUTs (5 actually, since the first could have\n"
1043
"//             two outputs).  This would allow multiplication of three bit\n"
1044
"//             digits, save only for the fact that you would need two bits\n"
1045
"//             of carry.  The bimpy approach throttles back a bit and does\n"
1046
"//             a 2x2 bit multiply in a LUT, guaranteeing that it will never\n"
1047
"//             carry more than one bit.  While this multiply is hardware\n"
1048
"//             independent (and can still run under Verilator therefore),\n"
1049
"//             it is really motivated by trying to optimize for a specific\n"
1050
"//             piece of hardware (Xilinx-7 series ...) that has at least\n"
1051
"//             4-input LUT's with carry chains.\n"
1052
"//\n"
1053
"//\n"
1054
"//\n%s"
1055
"//\n", fname, prjname, creator);
1056
 
1057
        fprintf(fp, "%s", cpyleft);
1058
        fprintf(fp,
1059
"module bimpy(i_clk, i_ce, i_a, i_b, o_r);\n"
1060
"\tparameter\tBW=18, // Number of bits in i_b\n"
1061
"\t\t\tLUTB=2; // Number of bits in i_a for our LUT multiply\n"
1062
"\tinput\t\t\t\ti_clk, i_ce;\n"
1063
"\tinput\t\t[(LUTB-1):0]\ti_a;\n"
1064
"\tinput\t\t[(BW-1):0]\ti_b;\n"
1065
"\toutput\treg\t[(BW+LUTB-1):0] o_r;\n"
1066
"\n"
1067
"\twire [(BW+LUTB-2):0] w_r;\n"
1068
"\twire [(BW+LUTB-3):1] c;\n"
1069
"\n"
1070
"\tassign\tw_r =  { ((i_a[1])?i_b:{(BW){1'b0}}), 1'b0 }\n"
1071
"\t\t\t\t^ { 1'b0, ((i_a[0])?i_b:{(BW){1'b0}}) };\n"
1072
"\tassign\tc = { ((i_a[1])?i_b[(BW-2):0]:{(BW-1){1'b0}}) }\n"
1073
"\t\t\t& ((i_a[0])?i_b[(BW-1):1]:{(BW-1){1'b0}});\n"
1074
"\n"
1075
"\talways @(posedge i_clk)\n"
1076
"\t\tif (i_ce)\n"
1077
"\t\t\to_r <= w_r + { c, 2'b0 };\n"
1078
"\n"
1079
"endmodule\n");
1080
 
1081
        fclose(fp);
1082
}
1083
 
1084
void    build_longbimpy(const char *fname) {
1085
        FILE    *fp = fopen(fname, "w");
1086
        if (NULL == fp) {
1087
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
1088
                perror("O/S Err was:");
1089
                return;
1090
        }
1091
 
1092
        fprintf(fp,
1093
"////////////////////////////////////////////////////////////////////////////////\n"
1094
"//\n"
1095
"// Filename:   %s\n"
1096
"//\n"
1097
"// Project:    %s\n"
1098
"//\n"
1099
"// Purpose:    A portable shift and add multiply, built with the knowledge\n"
1100
"//             of the existence of a six bit LUT and carry chain.  That\n"
1101
"//             knowledge allows us to multiply two bits from one value\n"
1102
"//             at a time against all of the bits of the other value.  This\n"
1103
"//             sub multiply is called the bimpy.\n"
1104
"//\n"
1105
"//             For minimal processing delay, make the first parameter\n"
1106
"//             the one with the least bits, so that AWIDTH <= BWIDTH.\n"
1107
"//\n"
1108
"//\n"
1109
"//\n%s"
1110
"//\n", fname, prjname, creator);
1111
 
1112
        fprintf(fp, "%s", cpyleft);
1113
        fprintf(fp,
1114
"module longbimpy(i_clk, i_ce, i_a, i_b, o_r);\n"
1115
        "\tparameter    AW=%d,  // The width of i_a, min width is 5\n"
1116
                        "\t\t\tBW=", TST_LONGBIMPY_AW);
1117
#ifdef  TST_LONGBIMPY_BW
1118
        fprintf(fp, "%d", TST_LONGBIMPY_BW);
1119
#else
1120
        fprintf(fp, "AW");
1121
#endif
1122
 
1123
        fprintf(fp, ",  // The width of i_b, can be anything\n"
1124
                        "\t\t\t// The following three parameters should not be changed\n"
1125
                        "\t\t\t// by any implementation, but are based upon hardware\n"
1126
                        "\t\t\t// and the above values:\n"
1127
                        "\t\t\tOW=AW+BW,        // The output width\n"
1128
                        "\t\t\tIW=(AW+1)&(-2),  // Internal width of A\n"
1129
                        "\t\t\tLUTB=2,  // How many bits we can multiply by at once\n"
1130
                        "\t\t\tTLEN=(AW+(LUTB-1))/LUTB; // Nmbr of rows in our tableau\n"
1131
        "\tinput\t\t\t\ti_clk, i_ce;\n"
1132
        "\tinput\t\t[(AW-1):0]\ti_a;\n"
1133
        "\tinput\t\t[(BW-1):0]\ti_b;\n"
1134
        "\toutput\treg\t[(AW+BW-1):0]\to_r;\n"
1135
"\n"
1136
        "\treg\t[(IW-1):0]\tu_a;\n"
1137
        "\treg\t[(BW-1):0]\tu_b;\n"
1138
        "\treg\t\t\tsgn;\n"
1139
"\n"
1140
        "\treg\t[(IW-1-2*(LUTB)):0]\tr_a[0:(TLEN-3)];\n"
1141
        "\treg\t[(BW-1):0]\t\tr_b[0:(TLEN-3)];\n"
1142
        "\treg\t[(TLEN-1):0]\t\tr_s;\n"
1143
        "\treg\t[(IW+BW-1):0]\t\tacc[0:(TLEN-2)];\n"
1144
        "\tgenvar k;\n"
1145
"\n"
1146
        "\t// First step:\n"
1147
        "\t// Switch to unsigned arithmetic for our multiply, keeping track\n"
1148
        "\t// of the along the way.  We'll then add the sign again later at\n"
1149
        "\t// the end.\n"
1150
        "\t//\n"
1151
        "\t// If we were forced to stay within two's complement arithmetic,\n"
1152
        "\t// taking the absolute value here would require an additional bit.\n"
1153
        "\t// However, because our results are now unsigned, we can stay\n"
1154
        "\t// within the number of bits given (for now).\n"
1155
        "\tgenerate if (IW > AW)\n"
1156
        "\tbegin\n"
1157
                "\t\talways @(posedge i_clk)\n"
1158
                        "\t\t\tif (i_ce)\n"
1159
                        "\t\t\t\tu_a <= { 1'b0, (i_a[AW-1])?(-i_a):(i_a) };\n"
1160
        "\tend else begin\n"
1161
                "\t\talways @(posedge i_clk)\n"
1162
                        "\t\t\tif (i_ce)\n"
1163
                        "\t\t\t\tu_a <= (i_a[AW-1])?(-i_a):(i_a);\n"
1164
        "\tend endgenerate\n"
1165
"\n"
1166
        "\talways @(posedge i_clk)\n"
1167
                "\t\tif (i_ce)\n"
1168
                "\t\tbegin\n"
1169
                        "\t\t\tu_b <= (i_b[BW-1])?(-i_b):(i_b);\n"
1170
                        "\t\t\tsgn <= i_a[AW-1] ^ i_b[BW-1];\n"
1171
                "\t\tend\n"
1172
"\n"
1173
        "\twire [(BW+LUTB-1):0] pr_a, pr_b;\n"
1174
"\n"
1175
        "\t//\n"
1176
        "\t// Second step: First two 2xN products.\n"
1177
        "\t//\n"
1178
        "\t// Since we have no tableau of additions (yet), we can do both\n"
1179
        "\t// of the first two rows at the same time and add them together.\n"
1180
        "\t// For the next round, we'll then have a previous sum to accumulate\n"
1181
        "\t// with new and subsequent product, and so only do one product at\n"
1182
        "\t// a time can follow this--but the first clock can do two at a time.\n"
1183
        "\tbimpy\t#(BW) lmpy_0(i_clk,i_ce,u_a[(  LUTB-1):   0], u_b, pr_a);\n"
1184
        "\tbimpy\t#(BW) lmpy_1(i_clk,i_ce,u_a[(2*LUTB-1):LUTB], u_b, pr_b);\n"
1185
        "\talways @(posedge i_clk)\n"
1186
                "\t\tif (i_ce) r_a[0] <= u_a[(IW-1):(2*LUTB)];\n"
1187
        "\talways @(posedge i_clk)\n"
1188
                "\t\tif (i_ce) r_b[0] <= u_b;\n"
1189
        "\talways @(posedge i_clk)\n"
1190
                "\t\tif (i_ce) r_s <= { r_s[(TLEN-2):0], sgn };\n"
1191
        "\talways @(posedge i_clk) // One clk after p[0],p[1] become valid\n"
1192
                "\t\tif (i_ce) acc[0] <= { {(IW-LUTB){1'b0}}, pr_a}\n"
1193
                        "\t\t\t  +{ {(IW-(2*LUTB)){1'b0}}, pr_b, {(LUTB){1'b0}} };\n"
1194
"\n"
1195
        "\tgenerate // Keep track of intermediate values, before multiplying them\n"
1196
        "\tif (TLEN > 3) for(k=0; k<TLEN-3; k=k+1)\n"
1197
        "\tbegin : gencopies\n"
1198
                "\t\talways @(posedge i_clk)\n"
1199
                "\t\tif (i_ce)\n"
1200
                "\t\tbegin\n"
1201
                        "\t\t\tr_a[k+1] <= { {(LUTB){1'b0}},\n"
1202
                                "\t\t\t\tr_a[k][(IW-1-(2*LUTB)):LUTB] };\n"
1203
                        "\t\t\tr_b[k+1] <= r_b[k];\n"
1204
                        "\t\tend\n"
1205
        "\tend endgenerate\n"
1206
"\n"
1207
        "\tgenerate // The actual multiply and accumulate stage\n"
1208
        "\tif (TLEN > 2) for(k=0; k<TLEN-2; k=k+1)\n"
1209
        "\tbegin : genstages\n"
1210
                "\t\t// First, the multiply: 2-bits times BW bits\n"
1211
                "\t\twire\t[(BW+LUTB-1):0] genp;\n"
1212
                "\t\tbimpy #(BW) genmpy(i_clk,i_ce,r_a[k][(LUTB-1):0],r_b[k], genp);\n"
1213
"\n"
1214
                "\t\t// Then the accumulate step -- on the next clock\n"
1215
                "\t\talways @(posedge i_clk)\n"
1216
                        "\t\t\tif (i_ce)\n"
1217
                                "\t\t\t\tacc[k+1] <= acc[k] + {{(IW-LUTB*(k+3)){1'b0}},\n"
1218
                                        "\t\t\t\t\tgenp, {{(LUTB*(k+2))}{1'b0}} };\n"
1219
        "\tend endgenerate\n"
1220
"\n"
1221
        "\twire [(IW+BW-1):0]   w_r;\n"
1222
        "\tassign\tw_r = (r_s[TLEN-1]) ? (-acc[TLEN-2]) : acc[TLEN-2];\n"
1223
        "\talways @(posedge i_clk)\n"
1224
                "\t\tif (i_ce)\n"
1225
                        "\t\t\to_r <= w_r[(AW+BW-1):0];\n"
1226
"\n"
1227
"endmodule\n");
1228
 
1229
        fclose(fp);
1230
}
1231
 
1232 2 dgisselq
void    build_dblreverse(const char *fname) {
1233
        FILE    *fp = fopen(fname, "w");
1234
        if (NULL == fp) {
1235
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
1236
                perror("O/S Err was:");
1237
                return;
1238
        }
1239
 
1240
        fprintf(fp,
1241
"///////////////////////////////////////////////////////////////////////////\n"
1242
"//\n"
1243
"// Filename:   dblreverse.v\n"
1244
"//\n"
1245
"// Project:    %s\n"
1246
"//\n"
1247
"// Purpose:    This module bitreverses a pipelined FFT input.  Operation is\n"
1248
"//             expected as follows:\n"
1249
"//\n"
1250
"//             i_clk   A running clock at whatever system speed is offered.\n"
1251
"//             i_rst   A synchronous reset signal, that resets all internals\n"
1252
"//             i_ce    If this is one, one input is consumed and an output\n"
1253
"//                     is produced.\n"
1254
"//             i_in_0, i_in_1\n"
1255
"//                     Two inputs to be consumed, each of width WIDTH.\n"
1256
"//             o_out_0, o_out_1\n"
1257
"//                     Two of the bitreversed outputs, also of the same\n"
1258
"//                     width, WIDTH.  Of course, there is a delay from the\n"
1259
"//                     first input to the first output.  For this purpose,\n"
1260
"//                     o_sync is present.\n"
1261 26 dgisselq
"//             o_sync  This will be a 1\'b1 for the first value in any block.\n"
1262
"//                     Following a reset, this will only become 1\'b1 once\n"
1263 2 dgisselq
"//                     the data has been loaded and is now valid.  After that,\n"
1264
"//                     all outputs will be valid.\n"
1265 26 dgisselq
"//\n"
1266
"//     20150602 -- This module has undergone massive rework in order to\n"
1267
"//             ensure that it uses resources efficiently.  As a result, \n"
1268
"//             it now optimizes nicely into block RAMs.  As an unfortunately\n"
1269
"//             side effect, it now passes it\'s bench test (dblrev_tb) but\n"
1270
"//             fails the integration bench test (fft_tb).\n"
1271
"//\n"
1272 2 dgisselq
"//\n%s"
1273
"//\n", prjname, creator);
1274
        fprintf(fp, "%s", cpyleft);
1275
        fprintf(fp,
1276
"\n\n"
1277
"//\n"
1278
"// How do we do bit reversing at two smples per clock?  Can we separate out\n"
1279
"// our work into eight memory banks, writing two banks at once and reading\n"
1280
"// another two banks in the same clock?\n"
1281
"//\n"
1282
"//     mem[00xxx0] = s_0[n]\n"
1283
"//     mem[00xxx1] = s_1[n]\n"
1284
"//     o_0[n] = mem[10xxx0]\n"
1285
"//     o_1[n] = mem[11xxx0]\n"
1286
"//     ...\n"
1287
"//     mem[01xxx0] = s_0[m]\n"
1288
"//     mem[01xxx1] = s_1[m]\n"
1289
"//     o_0[m] = mem[10xxx1]\n"
1290
"//     o_1[m] = mem[11xxx1]\n"
1291
"//     ...\n"
1292
"//     mem[10xxx0] = s_0[n]\n"
1293
"//     mem[10xxx1] = s_1[n]\n"
1294
"//     o_0[n] = mem[00xxx0]\n"
1295
"//     o_1[n] = mem[01xxx0]\n"
1296
"//     ...\n"
1297
"//     mem[11xxx0] = s_0[m]\n"
1298
"//     mem[11xxx1] = s_1[m]\n"
1299
"//     o_0[m] = mem[00xxx1]\n"
1300
"//     o_1[m] = mem[01xxx1]\n"
1301
"//     ...\n"
1302
"//\n"
1303 5 dgisselq
"//     The answer is that, yes we can but: we need to use four memory banks\n"
1304
"//     to do it properly.  These four banks are defined by the two bits\n"
1305
"//     that determine the top and bottom of the correct address.  Larger\n"
1306
"//     FFT\'s would require more memories.\n"
1307
"//\n"
1308 2 dgisselq
"//\n");
1309
        fprintf(fp,
1310
"module dblreverse(i_clk, i_rst, i_ce, i_in_0, i_in_1,\n"
1311 5 dgisselq
        "\t\to_out_0, o_out_1, o_sync);\n"
1312 29 dgisselq
        "\tparameter\t\t\tLGSIZE=%d, WIDTH=24;\n"
1313 5 dgisselq
        "\tinput\t\t\t\ti_clk, i_rst, i_ce;\n"
1314
        "\tinput\t\t[(2*WIDTH-1):0]\ti_in_0, i_in_1;\n"
1315 26 dgisselq
        "\toutput\twire\t[(2*WIDTH-1):0]\to_out_0, o_out_1;\n"
1316 29 dgisselq
        "\toutput\treg\t\t\to_sync;\n", TST_DBLREVERSE_LGSIZE);
1317
 
1318
        fprintf(fp,
1319 2 dgisselq
"\n"
1320 26 dgisselq
        "\treg\t\t\tin_reset;\n"
1321
        "\treg\t[(LGSIZE-1):0]\tiaddr;\n"
1322
        "\twire\t[(LGSIZE-3):0]\tbraddr;\n"
1323 2 dgisselq
"\n"
1324 5 dgisselq
        "\tgenvar\tk;\n"
1325 26 dgisselq
        "\tgenerate for(k=0; k<LGSIZE-2; k=k+1)\n"
1326 25 dgisselq
        "\tbegin : gen_a_bit_reversed_value\n"
1327 26 dgisselq
                "\t\tassign braddr[k] = iaddr[LGSIZE-3-k];\n"
1328 25 dgisselq
        "\tend endgenerate\n"
1329 2 dgisselq
"\n"
1330 25 dgisselq
        "\tinitial iaddr = 0;\n"
1331
        "\tinitial in_reset = 1\'b1;\n"
1332 26 dgisselq
        "\tinitial o_sync = 1\'b0;\n"
1333 5 dgisselq
        "\talways @(posedge i_clk)\n"
1334
                "\t\tif (i_rst)\n"
1335
                "\t\tbegin\n"
1336
                        "\t\t\tiaddr <= 0;\n"
1337 26 dgisselq
                        "\t\t\tin_reset <= 1\'b1;\n"
1338
                        "\t\t\to_sync <= 1\'b0;\n"
1339 5 dgisselq
                "\t\tend else if (i_ce)\n"
1340
                "\t\tbegin\n"
1341 26 dgisselq
                        "\t\t\tiaddr <= iaddr + { {(LGSIZE-1){1\'b0}}, 1\'b1 };\n"
1342
                        "\t\t\tif (&iaddr[(LGSIZE-2):0])\n"
1343
                                "\t\t\t\tin_reset <= 1\'b0;\n"
1344 5 dgisselq
                        "\t\t\tif (in_reset)\n"
1345 26 dgisselq
                                "\t\t\t\to_sync <= 1\'b0;\n"
1346
                        "\t\t\telse\n"
1347
                                "\t\t\t\to_sync <= ~(|iaddr[(LGSIZE-2):0]);\n"
1348 5 dgisselq
                "\t\tend\n"
1349 2 dgisselq
"\n"
1350 26 dgisselq
        "\treg\t[(2*WIDTH-1):0]\tmem_e [0:((1<<(LGSIZE))-1)];\n"
1351
        "\treg\t[(2*WIDTH-1):0]\tmem_o [0:((1<<(LGSIZE))-1)];\n"
1352
"\n"
1353
        "\talways @(posedge i_clk)\n"
1354
                "\t\tif (i_ce)\tmem_e[iaddr] <= i_in_0;\n"
1355
        "\talways @(posedge i_clk)\n"
1356
                "\t\tif (i_ce)\tmem_o[iaddr] <= i_in_1;\n"
1357
"\n"
1358
"\n"
1359
        "\treg [(2*WIDTH-1):0] evn_out_0, evn_out_1, odd_out_0, odd_out_1;\n"
1360
"\n"
1361
        "\talways @(posedge i_clk)\n"
1362
                "\t\tif (i_ce)\n\t\t\tevn_out_0 <= mem_e[{~iaddr[LGSIZE-1],1\'b0,braddr}];\n"
1363
        "\talways @(posedge i_clk)\n"
1364
                "\t\tif (i_ce)\n\t\t\tevn_out_1 <= mem_e[{~iaddr[LGSIZE-1],1\'b1,braddr}];\n"
1365
        "\talways @(posedge i_clk)\n"
1366
                "\t\tif (i_ce)\n\t\t\todd_out_0 <= mem_o[{~iaddr[LGSIZE-1],1\'b0,braddr}];\n"
1367
        "\talways @(posedge i_clk)\n"
1368
                "\t\tif (i_ce)\n\t\t\todd_out_1 <= mem_o[{~iaddr[LGSIZE-1],1\'b1,braddr}];\n"
1369
"\n"
1370
        "\treg\tadrz;\n"
1371
        "\talways @(posedge i_clk)\n"
1372 28 dgisselq
                "\t\tif (i_ce) adrz <= iaddr[LGSIZE-2];\n"
1373 26 dgisselq
"\n"
1374
        "\tassign\to_out_0 = (adrz)?odd_out_0:evn_out_0;\n"
1375
        "\tassign\to_out_1 = (adrz)?odd_out_1:evn_out_1;\n"
1376
"\n"
1377 21 dgisselq
"endmodule\n");
1378 2 dgisselq
 
1379
        fclose(fp);
1380
}
1381
 
1382 23 dgisselq
void    build_butterfly(const char *fname, int xtracbits, ROUND_T rounding) {
1383 2 dgisselq
        FILE    *fp = fopen(fname, "w");
1384
        if (NULL == fp) {
1385
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
1386
                perror("O/S Err was:");
1387
                return;
1388
        }
1389 23 dgisselq
        const   char    *rnd_string;
1390
        if (rounding == RND_TRUNCATE)
1391
                rnd_string = "truncate";
1392
        else if (rounding == RND_FROMZERO)
1393
                rnd_string = "roundfromzero";
1394
        else if (rounding == RND_HALFUP)
1395
                rnd_string = "roundhalfup";
1396
        else
1397
                rnd_string = "convround";
1398 2 dgisselq
 
1399
        fprintf(fp,
1400
"///////////////////////////////////////////////////////////////////////////\n"
1401
"//\n"
1402
"// Filename:   butterfly.v\n"
1403
"//\n"
1404
"// Project:    %s\n"
1405
"//\n"
1406
"// Purpose:    This routine caculates a butterfly for a decimation\n"
1407
"//             in frequency version of an FFT.  Specifically, given\n"
1408
"//             complex Left and Right values together with a \n"
1409
"//             coefficient, the output of this routine is given\n"
1410
"//             by:\n"
1411
"//\n"
1412
"//             L' = L + R\n"
1413
"//             R' = (L - R)*C\n"
1414
"//\n"
1415
"//             The rest of the junk below handles timing (mostly),\n"
1416
"//             to make certain that L' and R' reach the output at\n"
1417
"//             the same clock.  Further, just to make certain\n"
1418
"//             that is the case, an 'aux' input exists.  This\n"
1419
"//             aux value will come out of this routine synchronized\n"
1420
"//             to the values it came in with.  (i.e., both L', R',\n"
1421
"//             and aux all have the same delay.)  Hence, a caller\n"
1422
"//             of this routine may set aux on the first input with\n"
1423
"//             valid data, and then wait to see aux set on the output\n"
1424
"//             to know when to find the first output with valid data.\n"
1425
"//\n"
1426
"//             All bits are preserved until the very last clock,\n"
1427
"//             where any more bits than OWIDTH will be quietly\n"
1428
"//             discarded.\n"
1429
"//\n"
1430
"//             This design features no overflow checking.\n"
1431
"// \n"
1432
"// Notes:\n"
1433
"//             CORDIC:\n"
1434
"//             Much as we would like, we can't use a cordic here.\n"
1435
"//             The goal is to accomplish an FFT, as defined, and a\n"
1436
"//             CORDIC places a scale factor onto the data.  Removing\n"
1437
"//             the scale factor would cost a two multiplies, which\n"
1438
"//             is precisely what we are trying to avoid.\n"
1439
"//\n"
1440
"//\n"
1441
"//             3-MULTIPLIES:\n"
1442
"//             It should also be possible to do this with three \n"
1443
"//             multiplies and an extra two addition cycles.  \n"
1444
"//\n"
1445
"//             We want\n"
1446
"//                     R+I = (a + jb) * (c + jd)\n"
1447
"//                     R+I = (ac-bd) + j(ad+bc)\n"
1448
"//             We multiply\n"
1449
"//                     P1 = ac\n"
1450
"//                     P2 = bd\n"
1451
"//                     P3 = (a+b)(c+d)\n"
1452
"//             Then \n"
1453
"//                     R+I=(P1-P2)+j(P3-P2-P1)\n"
1454
"//\n"
1455
"//             WIDTHS:\n"
1456
"//             On multiplying an X width number by an\n"
1457
"//             Y width number, X>Y, the result should be (X+Y)\n"
1458
"//             bits, right?\n"
1459
"//             -2^(X-1) <= a <= 2^(X-1) - 1\n"
1460
"//             -2^(Y-1) <= b <= 2^(Y-1) - 1\n"
1461
"//             (2^(Y-1)-1)*(-2^(X-1)) <= ab <= 2^(X-1)2^(Y-1)\n"
1462
"//             -2^(X+Y-2)+2^(X-1) <= ab <= 2^(X+Y-2) <= 2^(X+Y-1) - 1\n"
1463
"//             -2^(X+Y-1) <= ab <= 2^(X+Y-1)-1\n"
1464
"//             YUP!  But just barely.  Do this and you'll really want\n"
1465
"//             to drop a bit, although you will risk overflow in so\n"
1466
"//             doing.\n"
1467 26 dgisselq
"//\n"
1468
"//     20150602 -- The sync logic lines have been completely redone.  The\n"
1469
"//             synchronization lines no longer go through the FIFO with the\n"
1470
"//             left hand sum, but are kept out of memory.  This allows the\n"
1471
"//             butterfly to use more optimal memory resources, while also\n"
1472
"//             guaranteeing that the sync lines can be properly reset upon\n"
1473
"//             any reset signal.\n"
1474
"//\n"
1475 2 dgisselq
"//\n%s"
1476
"//\n", prjname, creator);
1477
        fprintf(fp, "%s", cpyleft);
1478
 
1479
        fprintf(fp,
1480 6 dgisselq
"module\tbutterfly(i_clk, i_rst, i_ce, i_coef, i_left, i_right, i_aux,\n"
1481 5 dgisselq
                "\t\to_left, o_right, o_aux);\n"
1482
        "\t// Public changeable parameters ...\n"
1483 29 dgisselq
        "\tparameter IWIDTH=%d,", TST_BUTTERFLY_IWIDTH);
1484
#ifdef  TST_BUTTERFLY_CWIDTH
1485
        fprintf(fp, "CWIDTH=%d,", TST_BUTTERFLY_CWIDTH);
1486
#else
1487
        fprintf(fp, "CWIDTH=IWIDTH+%d,", xtracbits);
1488
#endif
1489
#ifdef  TST_BUTTERFLY_OWIDTH
1490
        fprintf(fp, "OWIDTH=%d;\n", TST_BUTTERFLY_OWIDTH);
1491
#else
1492
        fprintf(fp, "OWIDTH=IWIDTH+1;\n");
1493
#endif
1494
        fprintf(fp,
1495 5 dgisselq
        "\t// Parameters specific to the core that should not be changed.\n"
1496 29 dgisselq
        "\tparameter    MPYDELAY=%d'd%d,\n"
1497 28 dgisselq
                        "\t\t\tSHIFT=0, AUXLEN=(MPYDELAY+3);\n"
1498 5 dgisselq
        "\t// The LGDELAY should be the base two log of the MPYDELAY.  If\n"
1499
        "\t// this value is fractional, then round up to the nearest\n"
1500
        "\t// integer: LGDELAY=ceil(log(MPYDELAY)/log(2));\n"
1501 14 dgisselq
        "\tparameter\tLGDELAY=%d;\n"
1502 6 dgisselq
        "\tinput\t\ti_clk, i_rst, i_ce;\n"
1503 5 dgisselq
        "\tinput\t\t[(2*CWIDTH-1):0] i_coef;\n"
1504
        "\tinput\t\t[(2*IWIDTH-1):0] i_left, i_right;\n"
1505
        "\tinput\t\ti_aux;\n"
1506
        "\toutput\twire [(2*OWIDTH-1):0] o_left, o_right;\n"
1507 26 dgisselq
        "\toutput\treg\to_aux;\n"
1508 29 dgisselq
        "\n", lgdelay(16,xtracbits), bflydelay(16, xtracbits),
1509
                lgdelay(16,xtracbits));
1510 14 dgisselq
        fprintf(fp,
1511 5 dgisselq
        "\twire\t[(OWIDTH-1):0] o_left_r, o_left_i, o_right_r, o_right_i;\n"
1512 2 dgisselq
"\n"
1513 5 dgisselq
        "\treg\t[(2*IWIDTH-1):0]\tr_left, r_right;\n"
1514
        "\treg\t\t\t\tr_aux, r_aux_2;\n"
1515
        "\treg\t[(2*CWIDTH-1):0]\tr_coef, r_coef_2;\n"
1516
        "\twire\tsigned\t[(IWIDTH-1):0]\tr_left_r, r_left_i, r_right_r, r_right_i;\n"
1517
        "\tassign\tr_left_r  = r_left[ (2*IWIDTH-1):(IWIDTH)];\n"
1518
        "\tassign\tr_left_i  = r_left[ (IWIDTH-1):0];\n"
1519
        "\tassign\tr_right_r = r_right[(2*IWIDTH-1):(IWIDTH)];\n"
1520
        "\tassign\tr_right_i = r_right[(IWIDTH-1):0];\n"
1521 2 dgisselq
"\n"
1522 5 dgisselq
        "\treg\tsigned\t[(IWIDTH):0]\tr_sum_r, r_sum_i, r_dif_r, r_dif_i;\n"
1523 2 dgisselq
"\n"
1524 5 dgisselq
        "\treg  [(LGDELAY-1):0] fifo_addr;\n"
1525
        "\twire [(LGDELAY-1):0] fifo_read_addr;\n"
1526 6 dgisselq
        "\tassign\tfifo_read_addr = fifo_addr - MPYDELAY;\n"
1527 26 dgisselq
        "\treg  [(2*IWIDTH+1):0]        fifo_left [ 0:((1<<LGDELAY)-1)];\n"
1528 5 dgisselq
"\n");
1529
        fprintf(fp,
1530
        "\t// Set up the input to the multiply\n"
1531 2 dgisselq
        "\talways @(posedge i_clk)\n"
1532
                "\t\tif (i_ce)\n"
1533
                "\t\tbegin\n"
1534
                        "\t\t\t// One clock just latches the inputs\n"
1535
                        "\t\t\tr_left <= i_left;        // No change in # of bits\n"
1536
                        "\t\t\tr_right <= i_right;\n"
1537
                        "\t\t\tr_coef  <= i_coef;\n"
1538
                        "\t\t\t// Next clock adds/subtracts\n"
1539
                        "\t\t\tr_sum_r <= r_left_r + r_right_r; // Now IWIDTH+1 bits\n"
1540
                        "\t\t\tr_sum_i <= r_left_i + r_right_i;\n"
1541
                        "\t\t\tr_dif_r <= r_left_r - r_right_r;\n"
1542
                        "\t\t\tr_dif_i <= r_left_i - r_right_i;\n"
1543
                        "\t\t\t// Other inputs are simply delayed on second clock\n"
1544
                        "\t\t\tr_coef_2<= r_coef;\n"
1545
        "\t\tend\n"
1546 5 dgisselq
"\n");
1547
        fprintf(fp,
1548
        "\t// Don\'t forget to record the even side, since it doesn\'t need\n"
1549
        "\t// to be multiplied, but yet we still need the results in sync\n"
1550
        "\t// with the answer when it is ready.\n"
1551 25 dgisselq
        "\tinitial fifo_addr = 0;\n"
1552 2 dgisselq
        "\talways @(posedge i_clk)\n"
1553 6 dgisselq
                "\t\tif (i_rst)\n"
1554
                        "\t\t\tfifo_addr <= 0;\n"
1555 26 dgisselq
                "\t\telse if (i_ce)\n"
1556 2 dgisselq
                        "\t\t\t// Need to delay the sum side--nothing else happens\n"
1557
                        "\t\t\t// to it, but it needs to stay synchronized with the\n"
1558
                        "\t\t\t// right side.\n"
1559
                        "\t\t\tfifo_addr <= fifo_addr + 1;\n"
1560 14 dgisselq
"\n"
1561 26 dgisselq
        "\talways @(posedge i_clk)\n"
1562
                "\t\tif (i_ce)\n"
1563
                        "\t\t\tfifo_left[fifo_addr] <= { r_sum_r, r_sum_i };\n"
1564 2 dgisselq
"\n"
1565 5 dgisselq
        "\twire\tsigned\t[(CWIDTH-1):0] ir_coef_r, ir_coef_i;\n"
1566
        "\tassign\tir_coef_r = r_coef_2[(2*CWIDTH-1):CWIDTH];\n"
1567
        "\tassign\tir_coef_i = r_coef_2[(CWIDTH-1):0];\n"
1568
        "\twire\tsigned\t[((IWIDTH+2)+(CWIDTH+1)-1):0]\tp_one, p_two, p_three;\n"
1569 2 dgisselq
"\n"
1570 5 dgisselq
"\n");
1571
        fprintf(fp,
1572
        "\t// Multiply output is always a width of the sum of the widths of\n"
1573
        "\t// the two inputs.  ALWAYS.  This is independent of the number of\n"
1574
        "\t// bits in p_one, p_two, or p_three.  These values needed to \n"
1575
        "\t// accumulate a bit (or two) each.  However, this approach to a\n"
1576
        "\t// three multiply complex multiply cannot increase the total\n"
1577
        "\t// number of bits in our final output.  We\'ll take care of\n"
1578
        "\t// dropping back down to the proper width, OWIDTH, in our routine\n"
1579
        "\t// below.\n"
1580 2 dgisselq
"\n"
1581 5 dgisselq
"\n");
1582
        fprintf(fp,
1583
        "\t// We accomplish here \"Karatsuba\" multiplication.  That is,\n"
1584
        "\t// by doing three multiplies we accomplish the work of four.\n"
1585
        "\t// Let\'s prove to ourselves that this works ... We wish to\n"
1586
        "\t// multiply: (a+jb) * (c+jd), where a+jb is given by\n"
1587
        "\t//\ta + jb = r_dif_r + j r_dif_i, and\n"
1588
        "\t//\tc + jd = ir_coef_r + j ir_coef_i.\n"
1589
        "\t// We do this by calculating the intermediate products P1, P2,\n"
1590
        "\t// and P3 as\n"
1591
        "\t//\tP1 = ac\n"
1592
        "\t//\tP2 = bd\n"
1593
        "\t//\tP3 = (a + b) * (c + d)\n"
1594
        "\t// and then complete our final answer with\n"
1595
        "\t//\tac - bd = P1 - P2 (this checks)\n"
1596
        "\t//\tad + bc = P3 - P2 - P1\n"
1597
        "\t//\t        = (ac + bc + ad + bd) - bd - ac\n"
1598
        "\t//\t        = bc + ad (this checks)\n"
1599 2 dgisselq
"\n"
1600 5 dgisselq
"\n");
1601
        fprintf(fp,
1602
        "\t// This should really be based upon an IF, such as in\n"
1603
        "\t// if (IWIDTH < CWIDTH) then ...\n"
1604
        "\t// However, this is the only (other) way I know to do it.\n"
1605 29 dgisselq
        "\tgenerate if (CWIDTH < IWIDTH+1)\n"
1606 2 dgisselq
        "\tbegin\n"
1607 22 dgisselq
                "\t\twire\t[(CWIDTH):0]\tp3c_in;\n"
1608
                "\t\twire\t[(IWIDTH+1):0]\tp3d_in;\n"
1609
                "\t\tassign\tp3c_in = ir_coef_i + ir_coef_r;\n"
1610
                "\t\tassign\tp3d_in = r_dif_r + r_dif_i;\n"
1611
                "\n"
1612 2 dgisselq
                "\t\t// We need to pad these first two multiplies by an extra\n"
1613 5 dgisselq
                "\t\t// bit just to keep them aligned with the third,\n"
1614
                "\t\t// simpler, multiply.\n"
1615 29 dgisselq
                "\t\t%s #(CWIDTH+1,IWIDTH+2) p1(i_clk, i_ce,\n"
1616 2 dgisselq
                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r},\n"
1617
                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r}, p_one);\n"
1618 29 dgisselq
                "\t\t%s #(CWIDTH+1,IWIDTH+2) p2(i_clk, i_ce,\n"
1619 5 dgisselq
                                "\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i},\n"
1620 2 dgisselq
                                "\t\t\t\t{r_dif_i[IWIDTH],r_dif_i}, p_two);\n"
1621 29 dgisselq
                "\t\t%s #(CWIDTH+1,IWIDTH+2) p3(i_clk, i_ce,\n"
1622 22 dgisselq
                        "\t\t\t\tp3c_in, p3d_in, p_three);\n"
1623 2 dgisselq
        "\tend else begin\n"
1624 22 dgisselq
                "\t\twire\t[(CWIDTH):0]\tp3c_in;\n"
1625
                "\t\twire\t[(IWIDTH+1):0]\tp3d_in;\n"
1626
                "\t\tassign\tp3c_in = ir_coef_i + ir_coef_r;\n"
1627
                "\t\tassign\tp3d_in = r_dif_r + r_dif_i;\n"
1628
                "\n"
1629 29 dgisselq
                "\t\t%s #(IWIDTH+2,CWIDTH+1) p1a(i_clk, i_ce,\n"
1630 2 dgisselq
                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r},\n"
1631
                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r}, p_one);\n"
1632 29 dgisselq
                "\t\t%s #(IWIDTH+2,CWIDTH+1) p2a(i_clk, i_ce,\n"
1633 2 dgisselq
                                "\t\t\t\t{r_dif_i[IWIDTH], r_dif_i},\n"
1634 5 dgisselq
                                "\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i}, p_two);\n"
1635 29 dgisselq
                "\t\t%s #(IWIDTH+2,CWIDTH+1) p3a(i_clk, i_ce,\n"
1636 22 dgisselq
                                "\t\t\t\tp3d_in, p3c_in, p_three);\n"
1637 2 dgisselq
        "\tend\n"
1638
        "\tendgenerate\n"
1639 29 dgisselq
"\n",
1640
                (USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",
1641
                (USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",
1642
                (USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",
1643
                (USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",
1644
                (USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",
1645
                (USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy");
1646 5 dgisselq
        fprintf(fp,
1647
        "\t// These values are held in memory and delayed during the\n"
1648
        "\t// multiply.  Here, we recover them.  During the multiply,\n"
1649
        "\t// values were multiplied by 2^(CWIDTH-2)*exp{-j*2*pi*...},\n"
1650
        "\t// therefore, the left_x values need to be right shifted by\n"
1651
        "\t// CWIDTH-2 as well.  The additional bits come from a sign\n"
1652
        "\t// extension.\n"
1653
        "\twire\tsigned\t[(IWIDTH+CWIDTH):0]    fifo_i, fifo_r;\n"
1654 26 dgisselq
        "\treg\t\t[(2*IWIDTH+1):0]      fifo_read;\n"
1655
        "\tassign\tfifo_r = { {2{fifo_read[2*(IWIDTH+1)-1]}}, fifo_read[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1\'b0}} };\n"
1656
        "\tassign\tfifo_i = { {2{fifo_read[(IWIDTH+1)-1]}}, fifo_read[((IWIDTH+1)-1):0], {(CWIDTH-2){1\'b0}} };\n"
1657 2 dgisselq
"\n"
1658
"\n"
1659 23 dgisselq
        "\treg\tsigned\t[(OWIDTH-1):0]  b_left_r, b_left_i,\n"
1660 5 dgisselq
                        "\t\t\t\t\t\tb_right_r, b_right_i;\n"
1661
        "\treg\tsigned\t[(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i;\n"
1662
"\n");
1663
        fprintf(fp,
1664 23 dgisselq
        "\t// Let's do some rounding and remove unnecessary bits.\n"
1665 5 dgisselq
        "\t// We have (IWIDTH+CWIDTH+3) bits here, we need to drop down to\n"
1666
        "\t// OWIDTH, and SHIFT by SHIFT bits in the process.  The trick is\n"
1667
        "\t// that we don\'t need (IWIDTH+CWIDTH+3) bits.  We\'ve accumulated\n"
1668
        "\t// them, but the actual values will never fill all these bits.\n"
1669
        "\t// In particular, we only need:\n"
1670
        "\t//\t IWIDTH bits for the input\n"
1671
        "\t//\t     +1 bit for the add/subtract\n"
1672
        "\t//\t+CWIDTH bits for the coefficient multiply\n"
1673
        "\t//\t     +1 bit for the add/subtract in the complex multiply\n"
1674
        "\t//\t ------\n"
1675
        "\t//\t (IWIDTH+CWIDTH+2) bits at full precision.\n"
1676
        "\t//\n"
1677
        "\t// However, the coefficient multiply multiplied by a maximum value\n"
1678
        "\t// of 2^(CWIDTH-2).  Thus, we only have\n"
1679
        "\t//\t   IWIDTH bits for the input\n"
1680
        "\t//\t       +1 bit for the add/subtract\n"
1681
        "\t//\t+CWIDTH-2 bits for the coefficient multiply\n"
1682
        "\t//\t       +1 (optional) bit for the add/subtract in the cpx mpy.\n"
1683
        "\t//\t -------- ... multiply.  (This last bit may be shifted out.)\n"
1684
        "\t//\t (IWIDTH+CWIDTH) valid output bits. \n"
1685
        "\t// Now, if the user wants to keep any extras of these (via OWIDTH),\n"
1686
        "\t// or if he wishes to arbitrarily shift some of these off (via\n"
1687
        "\t// SHIFT) we accomplish that here.\n"
1688 23 dgisselq
"\n");
1689
        fprintf(fp,
1690
        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_left_r, rnd_left_i, rnd_right_r, rnd_right_i;\n\n");
1691
 
1692
        fprintf(fp,
1693 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_left_r(i_clk, i_ce,\n"
1694 23 dgisselq
        "\t\t\t\t{ {2{fifo_r[(IWIDTH+CWIDTH)]}}, fifo_r }, rnd_left_r);\n\n",
1695
                rnd_string);
1696
        fprintf(fp,
1697 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_left_i(i_clk, i_ce,\n"
1698 23 dgisselq
        "\t\t\t\t{ {2{fifo_i[(IWIDTH+CWIDTH)]}}, fifo_i }, rnd_left_i);\n\n",
1699
                rnd_string);
1700
        fprintf(fp,
1701 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_right_r(i_clk, i_ce,\n"
1702 23 dgisselq
        "\t\t\t\tmpy_r, rnd_right_r);\n\n", rnd_string);
1703
        fprintf(fp,
1704 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_right_i(i_clk, i_ce,\n"
1705 23 dgisselq
        "\t\t\t\tmpy_i, rnd_right_i);\n\n", rnd_string);
1706
        fprintf(fp,
1707
        "\talways @(posedge i_clk)\n"
1708
                "\t\tif (i_ce)\n"
1709
                "\t\tbegin\n"
1710
                        "\t\t\t// First clock, recover all values\n"
1711
                        "\t\t\tfifo_read <= fifo_left[fifo_read_addr];\n"
1712
                        "\t\t\t// These values are IWIDTH+CWIDTH+3 bits wide\n"
1713
                        "\t\t\t// although they only need to be (IWIDTH+1)\n"
1714
                        "\t\t\t// + (CWIDTH) bits wide.  (We\'ve got two\n"
1715
                        "\t\t\t// extra bits we need to get rid of.)\n"
1716
                        "\t\t\tmpy_r <= p_one - p_two;\n"
1717
                        "\t\t\tmpy_i <= p_three - p_one - p_two;\n"
1718 2 dgisselq
"\n"
1719 23 dgisselq
                        "\t\t\t// Second clock, round and latch for final clock\n"
1720
                        "\t\t\tb_right_r <= rnd_right_r;\n"
1721
                        "\t\t\tb_right_i <= rnd_right_i;\n"
1722
                        "\t\t\tb_left_r <= rnd_left_r;\n"
1723
                        "\t\t\tb_left_i <= rnd_left_i;\n"
1724 24 dgisselq
                "\t\tend\n"
1725
"\n");
1726 26 dgisselq
 
1727 24 dgisselq
        fprintf(fp,
1728 26 dgisselq
        "\treg\t[(AUXLEN-1):0]\taux_pipeline;\n"
1729
        "\tinitial\taux_pipeline = 0;\n"
1730
        "\talways @(posedge i_clk)\n"
1731
        "\t\tif (i_rst)\n"
1732
        "\t\t\taux_pipeline <= 0;\n"
1733
        "\t\telse if (i_ce)\n"
1734
        "\t\t\taux_pipeline <= { aux_pipeline[(AUXLEN-2):0], i_aux };\n"
1735
"\n");
1736
        fprintf(fp,
1737 25 dgisselq
        "\tinitial o_aux = 1\'b0;\n"
1738 24 dgisselq
        "\talways @(posedge i_clk)\n"
1739
                "\t\tif (i_rst)\n"
1740
                "\t\t\to_aux <= 1\'b0;\n"
1741
                "\t\telse if (i_ce)\n"
1742
                "\t\tbegin\n"
1743
                        "\t\t\t// Second clock, latch for final clock\n"
1744 26 dgisselq
                        "\t\t\to_aux <= aux_pipeline[AUXLEN-1];\n"
1745 23 dgisselq
                "\t\tend\n"
1746
"\n");
1747 24 dgisselq
 
1748 23 dgisselq
        fprintf(fp,
1749 5 dgisselq
        "\t// As a final step, we pack our outputs into two packed two\'s\n"
1750
        "\t// complement numbers per output word, so that each output word\n"
1751
        "\t// has (2*OWIDTH) bits in it, with the top half being the real\n"
1752
        "\t// portion and the bottom half being the imaginary portion.\n"
1753 23 dgisselq
        "\tassign       o_left = { rnd_left_r, rnd_left_i };\n"
1754
        "\tassign       o_right= { rnd_right_r,rnd_right_i};\n"
1755 2 dgisselq
"\n"
1756
"endmodule\n");
1757
        fclose(fp);
1758
}
1759
 
1760 23 dgisselq
void    build_hwbfly(const char *fname, int xtracbits, ROUND_T rounding) {
1761 22 dgisselq
        FILE    *fp = fopen(fname, "w");
1762
        if (NULL == fp) {
1763
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
1764
                perror("O/S Err was:");
1765
                return;
1766
        }
1767
 
1768 23 dgisselq
        const   char    *rnd_string;
1769
        if (rounding == RND_TRUNCATE)
1770
                rnd_string = "truncate";
1771
        else if (rounding == RND_FROMZERO)
1772
                rnd_string = "roundfromzero";
1773
        else if (rounding == RND_HALFUP)
1774
                rnd_string = "roundhalfup";
1775
        else
1776
                rnd_string = "convround";
1777
 
1778
 
1779 22 dgisselq
        fprintf(fp,
1780
"///////////////////////////////////////////////////////////////////////////\n"
1781
"//\n"
1782
"// Filename:   hwbfly.v\n"
1783
"//\n"
1784
"// Project:    %s\n"
1785
"//\n"
1786
"// Purpose:    This routine is identical to the butterfly.v routine found\n"
1787
"//             in 'butterfly.v', save only that it uses the verilog \n"
1788
"//             operator '*' in hopes that the synthesizer would be able\n"
1789
"//             to optimize it with hardware resources.\n"
1790
"//\n"
1791
"//             It is understood that a hardware multiply can complete its\n"
1792
"//             operation in a single clock.\n"
1793
"//\n"
1794
"//\n%s"
1795
"//\n", prjname, creator);
1796
        fprintf(fp, "%s", cpyleft);
1797
        fprintf(fp,
1798
"module hwbfly(i_clk, i_rst, i_ce, i_coef, i_left, i_right, i_aux,\n"
1799
                "\t\to_left, o_right, o_aux);\n"
1800
        "\t// Public changeable parameters ...\n"
1801
        "\tparameter IWIDTH=16,CWIDTH=IWIDTH+%d,OWIDTH=IWIDTH+1;\n"
1802
        "\t// Parameters specific to the core that should not be changed.\n"
1803 23 dgisselq
        "\tparameter\tSHIFT=0;\n"
1804 22 dgisselq
        "\tinput\t\ti_clk, i_rst, i_ce;\n"
1805
        "\tinput\t\t[(2*CWIDTH-1):0]\ti_coef;\n"
1806
        "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"
1807
        "\tinput\t\ti_aux;\n"
1808
        "\toutput\twire\t[(2*OWIDTH-1):0]\to_left, o_right;\n"
1809
        "\toutput\treg\to_aux;\n"
1810
"\n", xtracbits);
1811
        fprintf(fp,
1812
        "\twire\t[(OWIDTH-1):0] o_left_r, o_left_i, o_right_r, o_right_i;\n"
1813
"\n"
1814
        "\treg\t[(2*IWIDTH-1):0]        r_left, r_right;\n"
1815
        "\treg\t                        r_aux, r_aux_2;\n"
1816
        "\treg\t[(2*CWIDTH-1):0]        r_coef, r_coef_2;\n"
1817
        "\twire signed  [(IWIDTH-1):0]  r_left_r, r_left_i, r_right_r, r_right_i;\n"
1818
        "\tassign\tr_left_r  = r_left[ (2*IWIDTH-1):(IWIDTH)];\n"
1819
        "\tassign\tr_left_i  = r_left[ (IWIDTH-1):0];\n"
1820
        "\tassign\tr_right_r = r_right[(2*IWIDTH-1):(IWIDTH)];\n"
1821
        "\tassign\tr_right_i = r_right[(IWIDTH-1):0];\n"
1822 26 dgisselq
        "\treg  signed  [(CWIDTH-1):0]  ir_coef_r, ir_coef_i;\n"
1823 22 dgisselq
"\n"
1824
        "\treg  signed  [(IWIDTH):0]    r_sum_r, r_sum_i, r_dif_r, r_dif_i;\n"
1825
"\n"
1826
        "\treg  [(2*IWIDTH+2):0]        leftv, leftvv;\n"
1827
"\n"
1828
        "\t// Set up the input to the multiply\n"
1829 25 dgisselq
        "\tinitial r_aux   = 1\'b0;\n"
1830
        "\tinitial r_aux_2 = 1\'b0;\n"
1831 22 dgisselq
        "\talways @(posedge i_clk)\n"
1832 25 dgisselq
                "\t\tif (i_rst)\n"
1833
                "\t\tbegin\n"
1834 26 dgisselq
                        "\t\t\tr_aux <= 1\'b0;\n"
1835
                        "\t\t\tr_aux_2 <= 1\'b0;\n"
1836 25 dgisselq
                "\t\tend else if (i_ce)\n"
1837
                "\t\tbegin\n"
1838
                        "\t\t\t// One clock just latches the inputs\n"
1839 26 dgisselq
                        "\t\t\tr_aux <= i_aux;\n"
1840
                        "\t\t\t// Next clock adds/subtracts\n"
1841
                        "\t\t\t// Other inputs are simply delayed on second clock\n"
1842
                        "\t\t\tr_aux_2 <= r_aux;\n"
1843
                "\t\tend\n"
1844
        "\talways @(posedge i_clk)\n"
1845
                "\t\tif (i_ce)\n"
1846
                "\t\tbegin\n"
1847
                        "\t\t\t// One clock just latches the inputs\n"
1848 25 dgisselq
                        "\t\t\tr_left <= i_left;        // No change in # of bits\n"
1849
                        "\t\t\tr_right <= i_right;\n"
1850
                        "\t\t\tr_coef  <= i_coef;\n"
1851
                        "\t\t\t// Next clock adds/subtracts\n"
1852
                        "\t\t\tr_sum_r <= r_left_r + r_right_r; // Now IWIDTH+1 bits\n"
1853
                        "\t\t\tr_sum_i <= r_left_i + r_right_i;\n"
1854
                        "\t\t\tr_dif_r <= r_left_r - r_right_r;\n"
1855
                        "\t\t\tr_dif_i <= r_left_i - r_right_i;\n"
1856
                        "\t\t\t// Other inputs are simply delayed on second clock\n"
1857 26 dgisselq
                        "\t\t\tir_coef_r <= r_coef[(2*CWIDTH-1):CWIDTH];\n"
1858
                        "\t\t\tir_coef_i <= r_coef[(CWIDTH-1):0];\n"
1859 25 dgisselq
                "\t\tend\n"
1860 22 dgisselq
        "\n\n");
1861
        fprintf(fp,
1862
"\t// See comments in the butterfly.v source file for a discussion of\n"
1863
"\t// these operations and the appropriate bit widths.\n\n");
1864
        fprintf(fp,
1865 26 dgisselq
        "\treg\tsigned  [((IWIDTH+1)+(CWIDTH)-1):0]     p_one, p_two;\n"
1866
        "\treg\tsigned  [((IWIDTH+2)+(CWIDTH+1)-1):0]   p_three;\n"
1867 22 dgisselq
"\n"
1868 26 dgisselq
        "\treg\tsigned  [(CWIDTH-1):0]  p1c_in, p2c_in; // Coefficient multiply inputs\n"
1869
        "\treg\tsigned  [(IWIDTH):0]    p1d_in, p2d_in; // Data multiply inputs\n"
1870
        "\treg\tsigned  [(CWIDTH):0]    p3c_in; // Product 3, coefficient input\n"
1871
        "\treg\tsigned  [(IWIDTH+1):0]  p3d_in; // Product 3, data input\n"
1872 22 dgisselq
"\n"
1873 25 dgisselq
        "\tinitial leftv    = 0;\n"
1874
        "\tinitial leftvv   = 0;\n"
1875 22 dgisselq
        "\talways @(posedge i_clk)\n"
1876
        "\tbegin\n"
1877
                "\t\tif (i_rst)\n"
1878
                "\t\tbegin\n"
1879
                        "\t\t\tleftv <= 0;\n"
1880
                        "\t\t\tleftvv <= 0;\n"
1881 26 dgisselq
                "\t\tend else if (i_ce)\n"
1882 22 dgisselq
                "\t\tbegin\n"
1883
                        "\t\t\t// Second clock, pipeline = 1\n"
1884 26 dgisselq
                        "\t\t\tleftv <= { r_aux_2, r_sum_r, r_sum_i };\n"
1885
"\n"
1886
                        "\t\t\t// Third clock, pipeline = 3\n"
1887
                        "\t\t\t//   As desired, each of these lines infers a DSP48\n"
1888
                        "\t\t\tleftvv <= leftv;\n"
1889
                "\t\tend\n"
1890
        "\tend\n"
1891
"\n"
1892
        "\talways @(posedge i_clk)\n"
1893
                "\t\tif (i_ce)\n"
1894
                "\t\tbegin\n"
1895
                        "\t\t\t// Second clock, pipeline = 1\n"
1896
                        "\t\t\tp1c_in <= ir_coef_r;\n"
1897
                        "\t\t\tp2c_in <= ir_coef_i;\n"
1898
                        "\t\t\tp1d_in <= r_dif_r;\n"
1899
                        "\t\t\tp2d_in <= r_dif_i;\n"
1900 22 dgisselq
                        "\t\t\tp3c_in <= ir_coef_i + ir_coef_r;\n"
1901
                        "\t\t\tp3d_in <= r_dif_r + r_dif_i;\n"
1902 23 dgisselq
"\n"
1903
"\n"
1904 22 dgisselq
                        "\t\t\t// Third clock, pipeline = 3\n"
1905 26 dgisselq
                        "\t\t\t//   As desired, each of these lines infers a DSP48\n"
1906 22 dgisselq
                        "\t\t\tp_one   <= p1c_in * p1d_in;\n"
1907
                        "\t\t\tp_two   <= p2c_in * p2d_in;\n"
1908
                        "\t\t\tp_three <= p3c_in * p3d_in;\n"
1909 26 dgisselq
                "\t\tend\n"
1910 22 dgisselq
"\n"
1911 26 dgisselq
        "\twire\tsigned [((IWIDTH+2)+(CWIDTH+1)-1):0]   w_one, w_two;\n"
1912
        "\tassign\tw_one = { {(2){p_one[((IWIDTH+1)+(CWIDTH)-1)]}}, p_one };\n"
1913
        "\tassign\tw_two = { {(2){p_two[((IWIDTH+1)+(CWIDTH)-1)]}}, p_two };\n"
1914 22 dgisselq
"\n");
1915
 
1916
        fprintf(fp,
1917
        "\t// These values are held in memory and delayed during the\n"
1918
        "\t// multiply.  Here, we recover them.  During the multiply,\n"
1919
        "\t// values were multiplied by 2^(CWIDTH-2)*exp{-j*2*pi*...},\n"
1920
        "\t// therefore, the left_x values need to be right shifted by\n"
1921
        "\t// CWIDTH-2 as well.  The additional bits come from a sign\n"
1922
        "\t// extension.\n"
1923 24 dgisselq
        "\twire\taux_s;\n"
1924 22 dgisselq
        "\twire\tsigned\t[(IWIDTH+CWIDTH):0]    left_si, left_sr;\n"
1925
        "\treg\t\t[(2*IWIDTH+2):0]      left_saved;\n"
1926 26 dgisselq
        "\tassign\tleft_sr = { {2{left_saved[2*(IWIDTH+1)-1]}}, left_saved[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1\'b0}} };\n"
1927
        "\tassign\tleft_si = { {2{left_saved[(IWIDTH+1)-1]}}, left_saved[((IWIDTH+1)-1):0], {(CWIDTH-2){1\'b0}} };\n"
1928 22 dgisselq
        "\tassign\taux_s = left_saved[2*IWIDTH+2];\n"
1929
"\n"
1930
"\n"
1931 26 dgisselq
        "\t(* use_dsp48=\"no\" *)\n"
1932 23 dgisselq
        "\treg  signed  [(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i;\n");
1933
        fprintf(fp,
1934
        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_left_r, rnd_left_i, rnd_right_r, rnd_right_i;\n\n");
1935 22 dgisselq
 
1936
        fprintf(fp,
1937 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+1,OWIDTH,SHIFT+2) do_rnd_left_r(i_clk, i_ce,\n"
1938
        "\t\t\t\tleft_sr, rnd_left_r);\n\n",
1939 23 dgisselq
                rnd_string);
1940
        fprintf(fp,
1941 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+1,OWIDTH,SHIFT+2) do_rnd_left_i(i_clk, i_ce,\n"
1942
        "\t\t\t\tleft_si, rnd_left_i);\n\n",
1943 23 dgisselq
                rnd_string);
1944
        fprintf(fp,
1945 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_right_r(i_clk, i_ce,\n"
1946 23 dgisselq
        "\t\t\t\tmpy_r, rnd_right_r);\n\n", rnd_string);
1947
        fprintf(fp,
1948 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_right_i(i_clk, i_ce,\n"
1949 23 dgisselq
        "\t\t\t\tmpy_i, rnd_right_i);\n\n", rnd_string);
1950
 
1951
        fprintf(fp,
1952 25 dgisselq
        "\tinitial left_saved = 0;\n"
1953
        "\tinitial o_aux      = 1\'b0;\n"
1954 22 dgisselq
        "\talways @(posedge i_clk)\n"
1955
        "\t\tif (i_rst)\n"
1956
        "\t\tbegin\n"
1957
                "\t\t\tleft_saved <= 0;\n"
1958 26 dgisselq
                "\t\t\to_aux <= 1\'b0;\n"
1959 22 dgisselq
        "\t\tend else if (i_ce)\n"
1960
        "\t\tbegin\n"
1961
                "\t\t\t// First clock, recover all values\n"
1962
                "\t\t\tleft_saved <= leftvv;\n"
1963 26 dgisselq
"\n"
1964
                "\t\t\t// Second clock, round and latch for final clock\n"
1965
                "\t\t\to_aux <= aux_s;\n"
1966
        "\t\tend\n"
1967
        "\talways @(posedge i_clk)\n"
1968
        "\t\tif (i_ce)\n"
1969
        "\t\tbegin\n"
1970 22 dgisselq
                "\t\t\t// These values are IWIDTH+CWIDTH+3 bits wide\n"
1971
                "\t\t\t// although they only need to be (IWIDTH+1)\n"
1972
                "\t\t\t// + (CWIDTH) bits wide.  (We've got two\n"
1973
                "\t\t\t// extra bits we need to get rid of.)\n"
1974 26 dgisselq
                "\n"
1975
                "\t\t\t// These two lines also infer DSP48\'s.\n"
1976
                "\t\t\t// To keep from using extra DSP48 resources,\n"
1977
                "\t\t\t// they are prevented from using DSP48\'s\n"
1978
                "\t\t\t// by the (* use_dsp48 ... *) comment above.\n"
1979
                "\t\t\tmpy_r <= w_one - w_two;\n"
1980
                "\t\t\tmpy_i <= p_three - w_one - w_two;\n"
1981 22 dgisselq
        "\t\tend\n"
1982
        "\n");
1983
 
1984
        fprintf(fp,
1985
        "\t// As a final step, we pack our outputs into two packed two's\n"
1986
        "\t// complement numbers per output word, so that each output word\n"
1987
        "\t// has (2*OWIDTH) bits in it, with the top half being the real\n"
1988
        "\t// portion and the bottom half being the imaginary portion.\n"
1989 23 dgisselq
        "\tassign\to_left = { rnd_left_r, rnd_left_i };\n"
1990
        "\tassign\to_right= { rnd_right_r,rnd_right_i};\n"
1991 22 dgisselq
"\n"
1992
"endmodule\n");
1993
 
1994
}
1995
 
1996 26 dgisselq
void    build_stage(const char *fname, const char *coredir, int stage, bool odd, int nbits, bool inv, int xtra, bool hwmpy=false, bool dbg=false) {
1997 2 dgisselq
        FILE    *fstage = fopen(fname, "w");
1998
        int     cbits = nbits + xtra;
1999
 
2000
        if ((cbits * 2) >= sizeof(long long)*8) {
2001
                fprintf(stderr, "ERROR: CMEM Coefficient precision requested overflows long long data type.\n");
2002
                exit(-1);
2003
        }
2004
 
2005
        if (fstage == NULL) {
2006
                fprintf(stderr, "ERROR: Could not open %s for writing!\n", fname);
2007
                perror("O/S Err was:");
2008
                fprintf(stderr, "Attempting to continue, but this file will be missing.\n");
2009
                return;
2010
        }
2011
 
2012
        fprintf(fstage,
2013
"////////////////////////////////////////////////////////////////////////////\n"
2014
"//\n"
2015 26 dgisselq
"// Filename:   %sfftstage_%c%d%s.v\n"
2016 2 dgisselq
"//\n"
2017
"// Project:    %s\n"
2018
"//\n"
2019
"// Purpose:    This file is (almost) a Verilog source file.  It is meant to\n"
2020
"//             be used by a FFT core compiler to generate FFTs which may be\n"
2021
"//             used as part of an FFT core.  Specifically, this file \n"
2022
"//             encapsulates the options of an FFT-stage.  For any 2^N length\n"
2023
"//             FFT, there shall be (N-1) of these stages.  \n"
2024
"//\n%s"
2025
"//\n",
2026 26 dgisselq
                (inv)?"i":"", (odd)?'o':'e', stage*2, (dbg)?"_dbg":"", prjname, creator);
2027 2 dgisselq
        fprintf(fstage, "%s", cpyleft);
2028 26 dgisselq
        fprintf(fstage, "module\t%sfftstage_%c%d%s(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync%s);\n",
2029
                (inv)?"i":"", (odd)?'o':'e', stage*2, (dbg)?"_dbg":"",
2030
                (dbg)?", o_dbg":"");
2031 2 dgisselq
        // These parameter values are useless at this point--they are to be
2032
        // replaced by the parameter values in the calling program.  Only
2033
        // problem is, the CWIDTH needs to match exactly!
2034
        fprintf(fstage, "\tparameter\tIWIDTH=%d,CWIDTH=%d,OWIDTH=%d;\n",
2035
                nbits, cbits, nbits+1);
2036
        fprintf(fstage,
2037
"\t// Parameters specific to the core that should be changed when this\n"
2038
"\t// core is built ... Note that the minimum LGSPAN (the base two log\n"
2039
"\t// of the span, or the base two log of the current FFT size) is 3.\n"
2040
"\t// Smaller spans (i.e. the span of 2) must use the dblstage module.\n"
2041 6 dgisselq
"\tparameter\tLGWIDTH=11, LGSPAN=9, LGBDLY=5, BFLYSHIFT=0;\n");
2042 2 dgisselq
        fprintf(fstage,
2043
"\tinput                                        i_clk, i_rst, i_ce, i_sync;\n"
2044
"\tinput                [(2*IWIDTH-1):0]        i_data;\n"
2045
"\toutput       reg     [(2*OWIDTH-1):0]        o_data;\n"
2046
"\toutput       reg                             o_sync;\n"
2047 26 dgisselq
"\n");
2048
        if (dbg) { fprintf(fstage, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"
2049
                "\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2*OWIDTH-1):(2*OWIDTH-16)],\n"
2050
                        "\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n"
2051
"\n");
2052
        }
2053
        fprintf(fstage,
2054 2 dgisselq
"\treg  wait_for_sync;\n"
2055
"\treg  [(2*IWIDTH-1):0]        ib_a, ib_b;\n"
2056
"\treg  [(2*CWIDTH-1):0]        ib_c;\n"
2057 8 dgisselq
"\treg  ib_sync;\n"
2058 2 dgisselq
"\n"
2059
"\treg  b_started;\n"
2060
"\twire ob_sync;\n"
2061 23 dgisselq
"\twire [(2*OWIDTH-1):0]\tob_a, ob_b;\n");
2062 2 dgisselq
        fprintf(fstage,
2063
"\n"
2064
"\t// %scmem is defined as an array of real and complex values,\n"
2065
"\t// where the top CWIDTH bits are the real value and the bottom\n"
2066
"\t// CWIDTH bits are the imaginary value.\n"
2067
"\t//\n"
2068 24 dgisselq
"\t// %scmem[i] = { (2^(CWIDTH-2)) * cos(2*pi*i/(2^LGWIDTH)),\n"
2069 2 dgisselq
"\t//           (2^(CWIDTH-2)) * sin(2*pi*i/(2^LGWIDTH)) };\n"
2070
"\t//\n"
2071
"\treg  [(2*CWIDTH-1):0]        %scmem [0:((1<<LGSPAN)-1)];\n"
2072
"\tinitial\t$readmemh(\"%scmem_%c%d.hex\",%scmem);\n\n",
2073 24 dgisselq
                (inv)?"i":"", (inv)?"i":"", (inv)?"i":"",
2074
                (inv)?"i":"", (odd)?'o':'e',stage<<1, (inv)?"i":"");
2075 2 dgisselq
        {
2076
                FILE    *cmem;
2077
 
2078 14 dgisselq
                {
2079
                        char    *memfile, *ptr;
2080
 
2081
                        memfile = new char[strlen(fname)+128];
2082
                        strcpy(memfile, fname);
2083
                        if ((NULL != (ptr = strrchr(memfile, '/')))&&(ptr>memfile)) {
2084
                                ptr++;
2085
                                sprintf(ptr, "%scmem_%c%d.hex", (inv)?"i":"", (odd)?'o':'e', stage*2);
2086
                        } else {
2087
                                sprintf(memfile, "%s/%scmem_%c%d.hex",
2088 26 dgisselq
                                        coredir, (inv)?"i":"",
2089 14 dgisselq
                                        (odd)?'o':'e', stage*2);
2090
                        }
2091
                        // strcpy(&memfile[strlen(memfile)-2], ".hex");
2092
                        cmem = fopen(memfile, "w");
2093
                        if (NULL == cmem) {
2094
                                fprintf(stderr, "Could not open/write \'%s\' with FFT coefficients.\n", memfile);
2095
                                perror("Err from O/S:");
2096
                                exit(-2);
2097
                        }
2098
 
2099
                        delete[] memfile;
2100 2 dgisselq
                }
2101
                // fprintf(cmem, "// CBITS = %d, inv = %s\n", cbits, (inv)?"true":"false");
2102
                for(int i=0; i<stage/2; i++) {
2103
                        int k = 2*i+odd;
2104 9 dgisselq
                        double  W = ((inv)?1:-1)*2.0*M_PI*k/(double)(2*stage);
2105 2 dgisselq
                        double  c, s;
2106
                        long long ic, is, vl;
2107
 
2108
                        c = cos(W); s = sin(W);
2109 31 dgisselq
                        ic = (long long)llround((1ll<<(cbits-2)) * c);
2110
                        is = (long long)llround((1ll<<(cbits-2)) * s);
2111 2 dgisselq
                        vl = (ic & (~(-1ll << (cbits))));
2112
                        vl <<= (cbits);
2113
                        vl |= (is & (~(-1ll << (cbits))));
2114
                        fprintf(cmem, "%0*llx\n", ((cbits*2+3)/4), vl);
2115
                        /*
2116
                        fprintf(cmem, "%0*llx\t\t// %f+j%f -> %llx +j%llx\n",
2117
                                ((cbits*2+3)/4), vl, c, s,
2118
                                ic & (~(-1ll<<(((cbits+3)/4)*4))),
2119
                                is & (~(-1ll<<(((cbits+3)/4)*4))));
2120
                        */
2121
                } fclose(cmem);
2122
        }
2123
 
2124
        fprintf(fstage,
2125 6 dgisselq
"\treg  [(LGWIDTH-2):0]         iaddr;\n"
2126 2 dgisselq
"\treg  [(2*IWIDTH-1):0]        imem    [0:((1<<LGSPAN)-1)];\n"
2127
"\n"
2128 8 dgisselq
"\treg  [LGSPAN:0]              oB;\n"
2129 2 dgisselq
"\treg  [(2*OWIDTH-1):0]        omem    [0:((1<<LGSPAN)-1)];\n"
2130
"\n"
2131 25 dgisselq
"\tinitial wait_for_sync = 1\'b1;\n"
2132
"\tinitial iaddr = 0;\n"
2133 2 dgisselq
"\talways @(posedge i_clk)\n"
2134
        "\t\tif (i_rst)\n"
2135
        "\t\tbegin\n"
2136 26 dgisselq
                "\t\t\twait_for_sync <= 1\'b1;\n"
2137 2 dgisselq
                "\t\t\tiaddr <= 0;\n"
2138
        "\t\tend\n"
2139
        "\t\telse if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"
2140
        "\t\tbegin\n"
2141
                "\t\t\t//\n"
2142
                "\t\t\t// First step: Record what we\'re not ready to use yet\n"
2143
                "\t\t\t//\n"
2144 25 dgisselq
                "\t\t\tiaddr <= iaddr + { {(LGWIDTH-2){1\'b0}}, 1\'b1 };\n"
2145 26 dgisselq
                "\t\t\twait_for_sync <= 1\'b0;\n"
2146
        "\t\tend\n"
2147
"\talways @(posedge i_clk) // Need to make certain here that we don\'t read\n"
2148
        "\t\tif ((i_ce)&&(~iaddr[LGSPAN])) // and write the same address on\n"
2149
                "\t\t\timem[iaddr[(LGSPAN-1):0]] <= i_data; // the same clk\n"
2150
        "\n");
2151 23 dgisselq
 
2152
        fprintf(fstage,
2153
        "\t//\n"
2154
        "\t// Now, we have all the inputs, so let\'s feed the butterfly\n"
2155
        "\t//\n"
2156 25 dgisselq
        "\tinitial ib_sync = 1\'b0;\n"
2157 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
2158 26 dgisselq
                "\t\tif (i_rst)\n"
2159
                        "\t\t\tib_sync <= 1\'b0;\n"
2160
                "\t\telse if ((i_ce)&&(iaddr[LGSPAN]))\n"
2161
                        "\t\t\tbegin\n"
2162
                                "\t\t\t\t// Set the sync to true on the very first\n"
2163
                                "\t\t\t\t// valid input in, and hence on the very\n"
2164
                                "\t\t\t\t// first valid data out per FFT.\n"
2165
                                "\t\t\t\tib_sync <= (iaddr==(1<<(LGSPAN)));\n"
2166
                        "\t\t\tend\n"
2167 24 dgisselq
        "\talways\t@(posedge i_clk)\n"
2168 26 dgisselq
                "\t\tif ((i_ce)&&(iaddr[LGSPAN]))\n"
2169
                "\t\t\tbegin\n"
2170
                        "\t\t\t\t// One input from memory, ...\n"
2171
                        "\t\t\t\tib_a <= imem[iaddr[(LGSPAN-1):0]];\n"
2172
                        "\t\t\t\t// One input clocked in from the top\n"
2173
                        "\t\t\t\tib_b <= i_data;\n"
2174
                        "\t\t\t\t// and the coefficient or twiddle factor\n"
2175
                        "\t\t\t\tib_c <= %scmem[iaddr[(LGSPAN-1):0]];\n"
2176
                "\t\t\tend\n\n", (inv)?"i":"");
2177 23 dgisselq
 
2178
        if (hwmpy) {
2179
                fprintf(fstage,
2180
        "\thwbfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),\n"
2181
                        "\t\t\t.SHIFT(BFLYSHIFT))\n"
2182
                "\t\tbfly(i_clk, i_rst, i_ce, ib_c,\n"
2183
                        "\t\t\tib_a, ib_b, ib_sync, ob_a, ob_b, ob_sync);\n");
2184
        } else {
2185
        fprintf(fstage,
2186
        "\tbutterfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),\n"
2187
                "\t\t\t.MPYDELAY(%d\'d%d),.LGDELAY(LGBDLY),.SHIFT(BFLYSHIFT))\n"
2188
        "\t\tbfly(i_clk, i_rst, i_ce, ib_c,\n"
2189
                "\t\t\tib_a, ib_b, ib_sync, ob_a, ob_b, ob_sync);\n",
2190
                        lgdelay(nbits, xtra), bflydelay(nbits, xtra));
2191
        }
2192
 
2193
        fprintf(fstage,
2194
        "\t//\n"
2195
        "\t// Next step: recover the outputs from the butterfly\n"
2196
        "\t//\n"
2197 25 dgisselq
        "\tinitial oB        = 0;\n"
2198
        "\tinitial o_sync    = 0;\n"
2199
        "\tinitial b_started = 0;\n"
2200 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
2201
        "\t\tif (i_rst)\n"
2202
        "\t\tbegin\n"
2203
                "\t\t\toB <= 0;\n"
2204
                "\t\t\to_sync <= 0;\n"
2205
                "\t\t\tb_started <= 0;\n"
2206
        "\t\tend else if (i_ce)\n"
2207
        "\t\tbegin\n"
2208 26 dgisselq
        "\t\t\to_sync <= (~oB[LGSPAN])?ob_sync : 1\'b0;\n"
2209
        "\t\t\tif (ob_sync||b_started)\n"
2210
                "\t\t\t\toB <= oB + { {(LGSPAN){1\'b0}}, 1\'b1 };\n"
2211
        "\t\t\tif ((ob_sync)&&(~oB[LGSPAN]))\n"
2212
                "\t\t\t// A butterfly output is available\n"
2213
                        "\t\t\t\tb_started <= 1\'b1;\n"
2214 23 dgisselq
        "\t\tend\n\n");
2215 26 dgisselq
        fprintf(fstage,
2216
        "\treg  [(LGSPAN-1):0]\t\tdly_addr;\n"
2217
        "\treg  [(2*OWIDTH-1):0]\tdly_value;\n"
2218
        "\talways @(posedge i_clk)\n"
2219
        "\t\tif (i_ce)\n"
2220
        "\t\tbegin\n"
2221
        "\t\t\tdly_addr <= oB[(LGSPAN-1):0];\n"
2222
        "\t\t\tdly_value <= ob_b;\n"
2223
        "\t\tend\n"
2224
        "\talways @(posedge i_clk)\n"
2225
        "\t\tif (i_ce)\n"
2226
                "\t\t\tomem[dly_addr] <= dly_value;\n"
2227
"\n");
2228
        fprintf(fstage,
2229
        "\talways @(posedge i_clk)\n"
2230
        "\t\tif (i_ce)\n"
2231
        "\t\t\to_data <= (~oB[LGSPAN])?ob_a : omem[oB[(LGSPAN-1):0]];\n"
2232
"\n");
2233 22 dgisselq
        fprintf(fstage, "endmodule\n");
2234 2 dgisselq
}
2235
 
2236
void    usage(void) {
2237
        fprintf(stderr,
2238 26 dgisselq
"USAGE:\tfftgen [-f <size>] [-d dir] [-c cbits] [-n nbits] [-m mxbits] [-s]\n"
2239 2 dgisselq
// "\tfftgen -i\n"
2240 26 dgisselq
"\t-1\tBuild a normal FFT, running at one clock per complex sample, or (for\n"
2241
"\t\ta real FFT) at one clock per two real input samples.\n"
2242 2 dgisselq
"\t-c <cbits>\tCauses all internal complex coefficients to be\n"
2243
"\t\tlonger than the corresponding data bits, to help avoid\n"
2244 26 dgisselq
"\t\tcoefficient truncation errors.  The default is %d bits lnoger\n"
2245
"\t\tthan the data bits.\n"
2246 2 dgisselq
"\t-d <dir>\tPlaces all of the generated verilog files into <dir>.\n"
2247 26 dgisselq
"\t\tThe default is a subdirectory of the current directory named %s.\n"
2248 2 dgisselq
"\t-f <size>\tSets the size of the FFT as the number of complex\n"
2249 26 dgisselq
"\t\tsamples input to the transform.  (No default value, this is\n"
2250
"\t\ta required parameter.)\n"
2251
"\t-i\tAn inverse FFT, meaning that the coefficients are\n"
2252
"\t\tgiven by e^{ j 2 pi k/N n }.  The default is a forward FFT, with\n"
2253
"\t\tcoefficients given by e^{ -j 2 pi k/N n }.\n"
2254 2 dgisselq
"\t-m <mxbits>\tSets the maximum bit width that the FFT should ever\n"
2255
"\t\tproduce.  Internal values greater than this value will be\n"
2256 26 dgisselq
"\t\ttruncated to this value.  (The default value grows the input\n"
2257
"\t\tsize by one bit for every two FFT stages.)\n"
2258 22 dgisselq
"\t-n <nbits>\tSets the bitwidth for values coming into the (i)FFT.\n"
2259 26 dgisselq
"\t\tThe default is %d bits input for each component of the two\n"
2260
"\t\tcomplex values into the FFT.\n"
2261 22 dgisselq
"\t-p <nmpy>\tSets the number of stages that will use any hardware \n"
2262
"\t\tmultiplication facility, instead of shift-add emulation.\n"
2263 26 dgisselq
"\t\tThree multiplies per butterfly, or six multiplies per stage will\n"
2264
"\t\tbe accelerated in this fashion.  The default is not to use any\n"
2265
"\t\thardware multipliers.\n"
2266
"\t-r\tBuild a real-FFT at four input points per sample, rather than a\n"
2267
"\t\tcomplex FFT.  (Default is a Complex FFT.)\n"
2268 2 dgisselq
"\t-s\tSkip the final bit reversal stage.  This is useful in\n"
2269
"\t\talgorithms that need to apply a filter without needing to do\n"
2270
"\t\tbin shifting, as these algorithms can, with this option, just\n"
2271
"\t\tmultiply by a bit reversed correlation sequence and then\n"
2272 22 dgisselq
"\t\tinverse FFT the (still bit reversed) result.  (You would need\n"
2273
"\t\ta decimation in time inverse to do this, which this program does\n"
2274
"\t\tnot yet provide.)\n"
2275 2 dgisselq
"\t-S\tInclude the final bit reversal stage (default).\n"
2276 22 dgisselq
"\t-x <xtrabits>\tUse this many extra bits internally, before any final\n"
2277
"\t\trounding or truncation of the answer to the final number of bits.\n"
2278 26 dgisselq
"\t\tThe default is to use %d extra bits internally.\n",
2279
/*
2280 2 dgisselq
"\t-0\tA forward FFT (default), meaning that the coefficients are\n"
2281
"\t\tgiven by e^{-j 2 pi k/N n }.\n"
2282
"\t-1\tAn inverse FFT, meaning that the coefficients are\n"
2283 26 dgisselq
"\t\tgiven by e^{ j 2 pi k/N n }.\n",
2284
*/
2285
        DEF_XTRACBITS, DEF_COREDIR, DEF_NBITSIN, DEF_XTRAPBITS);
2286 2 dgisselq
}
2287
 
2288
// Features still needed:
2289
//      Interactivity.
2290
int main(int argc, char **argv) {
2291
        int     fftsize = -1, lgsize = -1;
2292 26 dgisselq
        int     nbitsin = DEF_NBITSIN, xtracbits = DEF_XTRACBITS,
2293
                        nummpy=DEF_NMPY, nonmpy=2;
2294
        int     nbitsout, maxbitsout = -1, xtrapbits=DEF_XTRAPBITS;
2295
        bool    bitreverse = true, inverse=false,
2296
                verbose_flag = false, single_clock = false,
2297
                real_fft = false;
2298 2 dgisselq
        FILE    *vmain;
2299 28 dgisselq
        std::string     coredir = DEF_COREDIR, cmdline = "", hdrname = "";
2300 23 dgisselq
        ROUND_T rounding = RND_CONVERGENT;
2301
        // ROUND_T      rounding = RND_HALFUP;
2302 2 dgisselq
 
2303 26 dgisselq
        bool    dbg = false;
2304
        int     dbgstage = 128;
2305
 
2306 2 dgisselq
        if (argc <= 1)
2307
                usage();
2308
 
2309 14 dgisselq
        cmdline = argv[0];
2310 2 dgisselq
        for(int argn=1; argn<argc; argn++) {
2311 14 dgisselq
                cmdline += " ";
2312
                cmdline += argv[argn];
2313
        }
2314
 
2315
        for(int argn=1; argn<argc; argn++) {
2316 2 dgisselq
                if ('-' == argv[argn][0]) {
2317
                        for(int j=1; (argv[argn][j])&&(j<100); j++) {
2318
                                switch(argv[argn][j]) {
2319 26 dgisselq
                                        /*
2320 2 dgisselq
                                        case '0':
2321
                                                inverse = false;
2322
                                                break;
2323 26 dgisselq
                                        */
2324 2 dgisselq
                                        case '1':
2325 26 dgisselq
                                                single_clock = true;
2326 2 dgisselq
                                                break;
2327 28 dgisselq
                                        case 'a':
2328
                                                if (argn+1 >= argc) {
2329
                                                        printf("ERR: No header filename given\n\n");
2330
                                                        usage(); exit(-1);
2331
                                                }
2332
                                                hdrname = argv[++argn];
2333
                                                j+= 200;
2334
                                                break;
2335 2 dgisselq
                                        case 'c':
2336
                                                if (argn+1 >= argc) {
2337 19 dgisselq
                                                        printf("ERR: No extra number of coefficient bits given!\n\n");
2338 2 dgisselq
                                                        usage(); exit(-1);
2339
                                                }
2340
                                                xtracbits = atoi(argv[++argn]);
2341
                                                j+= 200;
2342
                                                break;
2343
                                        case 'd':
2344
                                                if (argn+1 >= argc) {
2345 19 dgisselq
                                                        printf("ERR: No directory given into which to place the core!\n\n");
2346 2 dgisselq
                                                        usage(); exit(-1);
2347
                                                }
2348 14 dgisselq
                                                coredir = argv[++argn];
2349 2 dgisselq
                                                j += 200;
2350
                                                break;
2351 26 dgisselq
                                        case 'D':
2352
                                                dbg = true;
2353
                                                if (argn+1 >= argc) {
2354
                                                        printf("ERR: No debug stage number given!\n\n");
2355
                                                        usage(); exit(-1);
2356
                                                }
2357
                                                dbgstage = atoi(argv[++argn]);
2358
                                                j+= 200;
2359
                                                break;
2360 2 dgisselq
                                        case 'f':
2361
                                                if (argn+1 >= argc) {
2362 19 dgisselq
                                                        printf("ERR: No FFT Size given!\n\n");
2363 2 dgisselq
                                                        usage(); exit(-1);
2364
                                                }
2365
                                                fftsize = atoi(argv[++argn]);
2366
                                                { int sln = strlen(argv[argn]);
2367
                                                if (!isdigit(argv[argn][sln-1])){
2368
                                                        switch(argv[argn][sln-1]) {
2369
                                                        case 'k': case 'K':
2370
                                                                fftsize <<= 10;
2371
                                                                break;
2372
                                                        case 'm': case 'M':
2373
                                                                fftsize <<= 20;
2374
                                                                break;
2375
                                                        case 'g': case 'G':
2376
                                                                fftsize <<= 30;
2377
                                                                break;
2378
                                                        default:
2379 19 dgisselq
                                                                printf("ERR: Unknown FFT size, %s!\n", argv[argn]);
2380 2 dgisselq
                                                                exit(-1);
2381
                                                        }
2382
                                                }}
2383
                                                j += 200;
2384
                                                break;
2385
                                        case 'h':
2386
                                                usage();
2387
                                                exit(0);
2388
                                                break;
2389
                                        case 'i':
2390 26 dgisselq
                                                inverse = true;
2391 2 dgisselq
                                                break;
2392
                                        case 'm':
2393
                                                if (argn+1 >= argc) {
2394 19 dgisselq
                                                        printf("ERR: No maximum output bit value given!\n\n");
2395 2 dgisselq
                                                        exit(-1);
2396
                                                }
2397
                                                maxbitsout = atoi(argv[++argn]);
2398
                                                j += 200;
2399
                                                break;
2400
                                        case 'n':
2401
                                                if (argn+1 >= argc) {
2402 19 dgisselq
                                                        printf("ERR: No input bit size given!\n\n");
2403 2 dgisselq
                                                        exit(-1);
2404
                                                }
2405
                                                nbitsin = atoi(argv[++argn]);
2406
                                                j += 200;
2407
                                                break;
2408 22 dgisselq
                                        case 'p':
2409
                                                if (argn+1 >= argc) {
2410
                                                        printf("ERR: No number given for number of hardware multiply stages!\n\n");
2411
                                                        exit(-1);
2412
                                                }
2413
                                                nummpy = atoi(argv[++argn]);
2414
                                                j += 200;
2415
                                                break;
2416 26 dgisselq
                                        case 'r':
2417
                                                real_fft = true;
2418
                                                break;
2419 2 dgisselq
                                        case 'S':
2420
                                                bitreverse = true;
2421
                                                break;
2422
                                        case 's':
2423
                                                bitreverse = false;
2424
                                                break;
2425 19 dgisselq
                                        case 'x':
2426
                                                if (argn+1 >= argc) {
2427
                                                        printf("ERR: No extra number of bits given!\n\n");
2428
                                                        usage(); exit(-1);
2429
                                                } j+= 200;
2430
                                                xtrapbits = atoi(argv[++argn]);
2431
                                                break;
2432 2 dgisselq
                                        case 'v':
2433
                                                verbose_flag = true;
2434
                                                break;
2435
                                        default:
2436
                                                printf("Unknown argument, -%c\n", argv[argn][j]);
2437
                                                usage();
2438
                                                exit(-1);
2439
                                }
2440
                        }
2441
                } else {
2442
                        printf("Unrecognized argument, %s\n", argv[argn]);
2443
                        usage();
2444
                        exit(-1);
2445
                }
2446
        }
2447
 
2448 26 dgisselq
        if (real_fft) {
2449
                printf("The real FFT option is not implemented yet, but still on\nmy to do list.  Please try again later.\n");
2450
                exit(0);
2451
        } if (single_clock) {
2452
                printf("The single clock FFT option is not implemented yet, but still on\nmy to do list.  Please try again later.\n");
2453
                exit(0);
2454
        } if (!bitreverse) {
2455
                printf("WARNING: While I can skip the bit reverse stage, the code to do\n");
2456
                printf("an inverse FFT on a bit--reversed input has not yet been\n");
2457
                printf("built.\n");
2458
        }
2459
 
2460 2 dgisselq
        if ((lgsize < 0)&&(fftsize > 1)) {
2461
                for(lgsize=1; (1<<lgsize) < fftsize; lgsize++)
2462
                        ;
2463
        }
2464
 
2465
        if ((fftsize <= 0)||(nbitsin < 1)||(nbitsin>48)) {
2466
                printf("INVALID PARAMETERS!!!!\n");
2467
                exit(-1);
2468
        }
2469
 
2470
 
2471
        if (nextlg(fftsize) != fftsize) {
2472
                fprintf(stderr, "ERR: FFTSize (%d) *must* be a power of two\n",
2473
                                fftsize);
2474
                exit(-1);
2475
        } else if (fftsize < 2) {
2476
                fprintf(stderr, "ERR: Minimum FFTSize is 2, not %d\n",
2477
                                fftsize);
2478
                if (fftsize == 1) {
2479
                        fprintf(stderr, "You do realize that a 1 point FFT makes very little sense\n");
2480
                        fprintf(stderr, "in an FFT operation that handles two samples per clock?\n");
2481
                        fprintf(stderr, "If you really need to do an FFT of this size, the output\n");
2482
                        fprintf(stderr, "can be connected straight to the input.\n");
2483
                } else {
2484
                        fprintf(stderr, "Indeed, a size of %d doesn\'t make much sense to me at all.\n", fftsize);
2485
                        fprintf(stderr, "Is such an operation even defined?\n");
2486
                }
2487
                exit(-1);
2488
        }
2489
 
2490
        // Calculate how many output bits we'll have, and what the log
2491
        // based two size of our FFT is.
2492
        {
2493
                int     tmp_size = fftsize;
2494
 
2495
                // The first stage always accumulates one bit, regardless
2496
                // of whether you need to or not.
2497
                nbitsout = nbitsin + 1;
2498
                tmp_size >>= 1;
2499
 
2500
                while(tmp_size > 4) {
2501
                        nbitsout += 1;
2502
                        tmp_size >>= 2;
2503
                }
2504
 
2505
                if (tmp_size > 1)
2506
                        nbitsout ++;
2507
 
2508
                if (fftsize <= 2)
2509
                        bitreverse = false;
2510
        } if ((maxbitsout > 0)&&(nbitsout > maxbitsout))
2511
                nbitsout = maxbitsout;
2512
 
2513 22 dgisselq
        // Figure out how many multiply stages to use, and how many to skip
2514
        {
2515
                int     lgv = lgval(fftsize);
2516 2 dgisselq
 
2517 22 dgisselq
                nonmpy = lgv - nummpy;
2518
                if (nonmpy < 2) nonmpy = 2;
2519
                nummpy = lgv - nonmpy;
2520
        }
2521
 
2522 2 dgisselq
        {
2523
                struct stat     sbuf;
2524 14 dgisselq
                if (lstat(coredir.c_str(), &sbuf)==0) {
2525 2 dgisselq
                        if (!S_ISDIR(sbuf.st_mode)) {
2526 14 dgisselq
                                fprintf(stderr, "\'%s\' already exists, and is not a directory!\n", coredir.c_str());
2527 2 dgisselq
                                fprintf(stderr, "I will stop now, lest I overwrite something you care about.\n");
2528
                                fprintf(stderr, "To try again, please remove this file.\n");
2529
                                exit(-1);
2530
                        }
2531
                } else
2532 14 dgisselq
                        mkdir(coredir.c_str(), 0755);
2533
                if (access(coredir.c_str(), X_OK|W_OK) != 0) {
2534
                        fprintf(stderr, "I have no access to the directory \'%s\'.\n", coredir.c_str());
2535 2 dgisselq
                        exit(-1);
2536
                }
2537
        }
2538
 
2539 28 dgisselq
        if (hdrname.length() > 0) {
2540
                FILE    *hdr = fopen(hdrname.c_str(), "w");
2541
                if (hdr == NULL) {
2542
                        fprintf(stderr, "ERROR: Cannot open %s to create header file\n", hdrname.c_str());
2543
                        perror("O/S Err:");
2544
                        exit(-2);
2545
                }
2546
 
2547
                fprintf(hdr, "/////////////////////////////////////////////////////////////////////////////\n");
2548
                fprintf(hdr, "//\n");
2549
                fprintf(hdr, "// Filename:      %s\n", hdrname.c_str());
2550
                fprintf(hdr, "//\n");
2551
                fprintf(hdr, "// Project:       %s\n", prjname);
2552
                fprintf(hdr, "//\n");
2553
                fprintf(hdr, "// Purpose:       This simple header file captures the internal constants\n");
2554
                fprintf(hdr, "//                within the FFT that were used to build it, for the purpose\n");
2555
                fprintf(hdr, "//                of making C++ integration (and test bench testing) simpler.  That\n");
2556
                fprintf(hdr, "//                is, should the FFT change size, this will note that size change\n");
2557
                fprintf(hdr, "//                and thus any test bench or other C++ program dependent upon\n");
2558
                fprintf(hdr, "//                either the size of the FFT, the number of bits in or out of\n");
2559
                fprintf(hdr, "//                it, etc., can pick up the changes in the defines found within\n");
2560
                fprintf(hdr, "//                this file.\n");
2561
                fprintf(hdr, "//\n");
2562
                fprintf(hdr, "%s", creator);
2563
                fprintf(hdr, "//\n");
2564
                fprintf(hdr, "%s", cpyleft);
2565
                fprintf(hdr, "//\n"
2566
                "//\n"
2567
                "#ifndef %sFFTHDR_H\n"
2568
                "#define %sFFTHDR_H\n"
2569
                "\n"
2570
                "#define\t%sFFT_IWIDTH\t%d\n"
2571
                "#define\t%sFFT_OWIDTH\t%d\n"
2572
                "#define\t%sFFT_LGWIDTH\t%d\n"
2573
                "#define\t%sFFT_SIZE\t(1<<%sFFT_LGWIDTH)\n\n",
2574
                        (inverse)?"I":"", (inverse)?"I":"",
2575
                        (inverse)?"I":"", nbitsin,
2576
                        (inverse)?"I":"", nbitsout,
2577
                        (inverse)?"I":"", lgsize,
2578
                        (inverse)?"I":"", (inverse)?"I":"");
2579
                if (!bitreverse)
2580
                        fprintf(hdr, "#define\t%sFFT_SKIPS_BIT_REVERSE\n",
2581
                                (inverse)?"I":"");
2582
                if (real_fft)
2583
                        fprintf(hdr, "#define\tRL%sFFT\n\n", (inverse)?"I":"");
2584
                if (!single_clock)
2585
                        fprintf(hdr, "#define\tDBLCLK%sFFT\n\n", (inverse)?"I":"");
2586 29 dgisselq
                if (USE_OLD_MULTIPLY)
2587
                        fprintf(hdr, "#define\tUSE_OLD_MULTIPLY\n\n");
2588
 
2589
                fprintf(hdr, "// Parameters for testing the longbimpy\n");
2590
                fprintf(hdr, "#define\tTST_LONGBIMPY_AW\t%d\n", TST_LONGBIMPY_AW);
2591
#ifdef  TST_LONGBIMPY_BW
2592
                fprintf(hdr, "#define\tTST_LONGBIMPY_BW\t%d\n\n", TST_LONGBIMPY_BW);
2593
#else
2594
                fprintf(hdr, "#define\tTST_LONGBIMPY_BW\tTST_LONGBIMPY_AW\n\n");
2595
#endif
2596
 
2597
                fprintf(hdr, "// Parameters for testing the shift add multiply\n");
2598
                fprintf(hdr, "#define\tTST_SHIFTADDMPY_AW\t%d\n", TST_SHIFTADDMPY_AW);
2599
#ifdef  TST_SHIFTADDMPY_BW
2600
                fprintf(hdr, "#define\tTST_SHIFTADDMPY_BW\t%d\n\n", TST_SHIFTADDMPY_BW);
2601
#else
2602
                fprintf(hdr, "#define\tTST_SHIFTADDMPY_BW\tTST_SHIFTADDMPY_AW\n\n");
2603
#endif
2604
 
2605
#define TST_SHIFTADDMPY_AW      16
2606
#define TST_SHIFTADDMPY_BW      20      // Leave undefined to match AW
2607
                fprintf(hdr, "// Parameters for testing the butterfly\n");
2608
                fprintf(hdr, "#define\tTST_BUTTERFLY_IWIDTH\t%d\n", TST_BUTTERFLY_IWIDTH);
2609
                fprintf(hdr, "#define\tTST_BUTTERFLY_CWIDTH\t%d\n", TST_BUTTERFLY_CWIDTH);
2610
                fprintf(hdr, "#define\tTST_BUTTERFLY_OWIDTH\t%d\n", TST_BUTTERFLY_OWIDTH);
2611
                fprintf(hdr, "#define\tTST_BUTTERFLY_MPYDELAY\t%d\n\n",
2612
                                bflydelay(TST_BUTTERFLY_IWIDTH,
2613
                                        TST_BUTTERFLY_CWIDTH-TST_BUTTERFLY_IWIDTH));
2614
 
2615
                fprintf(hdr, "// Parameters for testing the quarter stage\n");
2616
                fprintf(hdr, "#define\tTST_QTRSTAGE_IWIDTH\t%d\n", TST_QTRSTAGE_IWIDTH);
2617
                fprintf(hdr, "#define\tTST_QTRSTAGE_LGWIDTH\t%d\n\n", TST_QTRSTAGE_LGWIDTH);
2618
 
2619
                fprintf(hdr, "// Parameters for testing the double stage\n");
2620
                fprintf(hdr, "#define\tTST_DBLSTAGE_IWIDTH\t%d\n", TST_DBLSTAGE_IWIDTH);
2621
                fprintf(hdr, "#define\tTST_DBLSTAGE_SHIFT\t%d\n\n", TST_DBLSTAGE_SHIFT);
2622
 
2623
                fprintf(hdr, "// Parameters for testing the bit reversal stage\n");
2624
                fprintf(hdr, "#define\tTST_DBLREVERSE_LGSIZE\t%d\n\n", TST_DBLREVERSE_LGSIZE);
2625 28 dgisselq
                fprintf(hdr, "\n" "#endif\n\n");
2626
                fclose(hdr);
2627
        }
2628
 
2629 14 dgisselq
        {
2630
                std::string     fname_string;
2631
 
2632
                fname_string = coredir;
2633
                fname_string += "/";
2634
                if (inverse) fname_string += "i";
2635
                fname_string += "fftmain.v";
2636
 
2637
                vmain = fopen(fname_string.c_str(), "w");
2638
                if (NULL == vmain) {
2639
                        fprintf(stderr, "Could not open \'%s\' for writing\n", fname_string.c_str());
2640
                        perror("Err from O/S:");
2641
                        exit(-1);
2642
                }
2643 2 dgisselq
        }
2644
 
2645
        fprintf(vmain, "/////////////////////////////////////////////////////////////////////////////\n");
2646
        fprintf(vmain, "//\n");
2647
        fprintf(vmain, "// Filename:    %sfftmain.v\n", (inverse)?"i":"");
2648
        fprintf(vmain, "//\n");
2649
        fprintf(vmain, "// Project:     %s\n", prjname);
2650
        fprintf(vmain, "//\n");
2651
        fprintf(vmain, "// Purpose:     This is the main module in the Doubletime FPGA FFT project.\n");
2652
        fprintf(vmain, "//              As such, all other modules are subordinate to this one.\n");
2653
        fprintf(vmain, "//              (I have been reading too much legalese this week ...)\n");
2654
        fprintf(vmain, "//              This module accomplish a fixed size Complex FFT on %d data\n", fftsize);
2655
        fprintf(vmain, "//              points.  The FFT is fully pipelined, and accepts as inputs\n");
2656
        fprintf(vmain, "//              two complex two\'s complement samples per clock.\n");
2657
        fprintf(vmain, "//\n");
2658
        fprintf(vmain, "// Parameters:\n");
2659
        fprintf(vmain, "//      i_clk\tThe clock.  All operations are synchronous with this clock.\n");
2660
        fprintf(vmain, "//\ti_rst\tSynchronous reset, active high.  Setting this line will\n");
2661
        fprintf(vmain, "//\t\t\tforce the reset of all of the internals to this routine.\n");
2662
        fprintf(vmain, "//\t\t\tFurther, following a reset, the o_sync line will go\n");
2663
        fprintf(vmain, "//\t\t\thigh the same time the first output sample is valid.\n");
2664
        fprintf(vmain, "//      i_ce\tA clock enable line.  If this line is set, this module\n");
2665
        fprintf(vmain, "//\t\t\twill accept two complex values as inputs, and produce\n");
2666
        fprintf(vmain, "//\t\t\ttwo (possibly empty) complex values as outputs.\n");
2667
        fprintf(vmain, "//\t\ti_left\tThe first of two complex input samples.  This value\n");
2668
        fprintf(vmain, "//\t\t\tis split into two two\'s complement numbers, of \n");
2669
        fprintf(vmain, "//\t\t\t%d bits each, with the real portion in the high\n", nbitsin);
2670
        fprintf(vmain, "//\t\t\torder bits, and the imaginary portion taking the\n");
2671
        fprintf(vmain, "//\t\t\tbottom %d bits.\n", nbitsin);
2672
        fprintf(vmain, "//\t\ti_right\tThis is the same thing as i_left, only this is the\n");
2673
        fprintf(vmain, "//\t\t\tsecond of two such samples.  Hence, i_left would\n");
2674
        fprintf(vmain, "//\t\t\tcontain input sample zero, i_right would contain\n");
2675
        fprintf(vmain, "//\t\t\tsample one.  On the next clock i_left would contain\n");
2676
        fprintf(vmain, "//\t\t\tinput sample two, i_right number three and so forth.\n");
2677
        fprintf(vmain, "//\t\to_left\tThe first of two output samples, of the same\n");
2678
        fprintf(vmain, "//\t\t\tformat as i_left, only having %d bits for each of\n", nbitsout);
2679
        fprintf(vmain, "//\t\t\tthe real and imaginary components, leading to %d\n", nbitsout*2);
2680
        fprintf(vmain, "//\t\t\tbits total.\n");
2681
        fprintf(vmain, "//\t\to_right\tThe second of two output samples produced each clock.\n");
2682
        fprintf(vmain, "//\t\t\tThis has the same format as o_left.\n");
2683
        fprintf(vmain, "//\t\to_sync\tA one bit output indicating the first valid sample\n");
2684
        fprintf(vmain, "//\t\t\tproduced by this FFT following a reset.  Ever after,\n");
2685
        fprintf(vmain, "//\t\t\tthis will indicate the first sample of an FFT frame.\n");
2686
        fprintf(vmain, "//\n");
2687 14 dgisselq
        fprintf(vmain, "// Arguments:\tThis file was computer generated using the\n");
2688
        fprintf(vmain, "//\t\tfollowing command line:\n");
2689
        fprintf(vmain, "//\n");
2690
        fprintf(vmain, "//\t\t%% %s\n", cmdline.c_str());
2691
        fprintf(vmain, "//\n");
2692 2 dgisselq
        fprintf(vmain, "%s", creator);
2693
        fprintf(vmain, "//\n");
2694
        fprintf(vmain, "%s", cpyleft);
2695
 
2696
 
2697
        fprintf(vmain, "//\n");
2698
        fprintf(vmain, "//\n");
2699
        fprintf(vmain, "module %sfftmain(i_clk, i_rst, i_ce,\n", (inverse)?"i":"");
2700
        fprintf(vmain, "\t\ti_left, i_right,\n");
2701 26 dgisselq
        fprintf(vmain, "\t\to_left, o_right, o_sync%s);\n",
2702
                        (dbg)?", o_dbg":"");
2703 2 dgisselq
        fprintf(vmain, "\tparameter\tIWIDTH=%d, OWIDTH=%d, LGWIDTH=%d;\n", nbitsin, nbitsout, lgsize);
2704
        assert(lgsize > 0);
2705
        fprintf(vmain, "\tinput\t\ti_clk, i_rst, i_ce;\n");
2706
        fprintf(vmain, "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n");
2707
        fprintf(vmain, "\toutput\treg\t[(2*OWIDTH-1):0]\to_left, o_right;\n");
2708
        fprintf(vmain, "\toutput\treg\t\t\to_sync;\n");
2709 26 dgisselq
        if (dbg)
2710
                fprintf(vmain, "\toutput\twire\t[33:0]\t\to_dbg;\n");
2711 2 dgisselq
        fprintf(vmain, "\n\n");
2712
 
2713
        fprintf(vmain, "\t// Outputs of the FFT, ready for bit reversal.\n");
2714
        fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_left, br_right;\n");
2715
        fprintf(vmain, "\n\n");
2716
 
2717
        int     tmp_size = fftsize, lgtmp = lgsize;
2718
        if (fftsize == 2) {
2719
                if (bitreverse) {
2720
                        fprintf(vmain, "\treg\tbr_start;\n");
2721 25 dgisselq
                        fprintf(vmain, "\tinitial br_start = 1\'b0;\n");
2722 2 dgisselq
                        fprintf(vmain, "\talways @(posedge i_clk)\n");
2723
                        fprintf(vmain, "\t\tif (i_rst)\n");
2724 26 dgisselq
                        fprintf(vmain, "\t\t\tbr_start <= 1\'b0;\n");
2725 2 dgisselq
                        fprintf(vmain, "\t\telse if (i_ce)\n");
2726 26 dgisselq
                        fprintf(vmain, "\t\t\tbr_start <= 1\'b1;\n");
2727 2 dgisselq
                }
2728
                fprintf(vmain, "\n\n");
2729 6 dgisselq
                fprintf(vmain, "\tdblstage\t#(IWIDTH)\tstage_2(i_clk, i_rst, i_ce,\n");
2730
                fprintf(vmain, "\t\t\t(~i_rst), i_left, i_right, br_left, br_right);\n");
2731 2 dgisselq
                fprintf(vmain, "\n\n");
2732
        } else {
2733
                int     nbits = nbitsin, dropbit=0;
2734 26 dgisselq
                int     obits = nbits+1+xtrapbits;
2735
 
2736
                if ((maxbitsout > 0)&&(obits > maxbitsout))
2737
                        obits = maxbitsout;
2738
 
2739 2 dgisselq
                // Always do a first stage
2740 14 dgisselq
                {
2741 22 dgisselq
                        bool    mpystage;
2742 2 dgisselq
 
2743 22 dgisselq
                        // Last two stages are always non-multiply stages
2744
                        // since the multiplies can be done by adds
2745
                        mpystage = ((lgtmp-2) <= nummpy);
2746
 
2747 28 dgisselq
                        if (mpystage)
2748
                                fprintf(vmain, "\t// A hardware optimized FFT stage\n");
2749
                        fprintf(vmain, "\n\n");
2750
                        fprintf(vmain, "\twire\t\tw_s%d, w_os%d;\n", fftsize, fftsize);
2751
                        fprintf(vmain, "\twire\t[%d:0]\tw_e%d, w_o%d;\n", 2*(obits+xtrapbits)-1, fftsize, fftsize);
2752
                        fprintf(vmain, "\t%sfftstage_e%d%s\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,%d,0)\tstage_e%d(i_clk, i_rst, i_ce,\n",
2753
                                (inverse)?"i":"", fftsize,
2754
                                        ((dbg)&&(dbgstage == fftsize))?"_dbg":"",
2755
                                xtracbits, obits+xtrapbits,
2756
                                lgsize, lgtmp-2, lgdelay(nbits,xtracbits),
2757
                                fftsize);
2758
                        fprintf(vmain, "\t\t\t(~i_rst), i_left, w_e%d, w_s%d%s);\n", fftsize, fftsize, ((dbg)&&(dbgstage == fftsize))?", o_dbg":"");
2759
                        fprintf(vmain, "\t%sfftstage_o%d\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,%d,0)\tstage_o%d(i_clk, i_rst, i_ce,\n",
2760
                                (inverse)?"i":"", fftsize,
2761
                                xtracbits, obits+xtrapbits,
2762
                                lgsize, lgtmp-2, lgdelay(nbits,xtracbits),
2763
                                fftsize);
2764
                        fprintf(vmain, "\t\t\t(~i_rst), i_right, w_o%d, w_os%d);\n", fftsize, fftsize);
2765
                        fprintf(vmain, "\n\n");
2766
 
2767
 
2768
                        std::string     fname;
2769
                        char    numstr[12];
2770
 
2771 14 dgisselq
                        fname = coredir + "/";
2772
                        if (inverse) fname += "i";
2773
                        fname += "fftstage_e";
2774
                        sprintf(numstr, "%d", fftsize);
2775
                        fname += numstr;
2776 26 dgisselq
                        if ((dbg)&&(dbgstage == fftsize))
2777
                                fname += "_dbg";
2778 14 dgisselq
                        fname += ".v";
2779 26 dgisselq
                        build_stage(fname.c_str(), coredir.c_str(), fftsize/2, 0, nbits, inverse, xtracbits, mpystage, (dbg)&&(dbgstage == fftsize));    // Even stage
2780 14 dgisselq
 
2781
                        fname = coredir + "/";
2782
                        if (inverse) fname += "i";
2783
                        fname += "fftstage_o";
2784
                        sprintf(numstr, "%d", fftsize);
2785
                        fname += numstr;
2786
                        fname += ".v";
2787 26 dgisselq
                        build_stage(fname.c_str(), coredir.c_str(), fftsize/2, 1, nbits, inverse, xtracbits, mpystage, false);  // Odd  stage
2788 14 dgisselq
                }
2789
 
2790 26 dgisselq
                nbits = obits;  // New number of input bits
2791 2 dgisselq
                tmp_size >>= 1; lgtmp--;
2792
                dropbit = 0;
2793
                fprintf(vmain, "\n\n");
2794
                while(tmp_size >= 8) {
2795 26 dgisselq
                        obits = nbits+((dropbit)?0:1);
2796 2 dgisselq
 
2797
                        if ((maxbitsout > 0)&&(obits > maxbitsout))
2798
                                obits = maxbitsout;
2799
 
2800 14 dgisselq
                        {
2801 22 dgisselq
                                bool            mpystage;
2802 2 dgisselq
 
2803 22 dgisselq
                                mpystage = ((lgtmp-2) <= nummpy);
2804
 
2805 28 dgisselq
                                if (mpystage)
2806
                                        fprintf(vmain, "\t// A hardware optimized FFT stage\n");
2807
                                fprintf(vmain, "\twire\t\tw_s%d, w_os%d;\n",
2808
                                        tmp_size, tmp_size);
2809
                                fprintf(vmain,"\twire\t[%d:0]\tw_e%d, w_o%d;\n",
2810
                                        2*(obits+xtrapbits)-1,
2811
                                        tmp_size, tmp_size);
2812
                                fprintf(vmain, "\t%sfftstage_e%d%s\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_e%d(i_clk, i_rst, i_ce,\n",
2813
                                        (inverse)?"i":"", tmp_size,
2814
                                        ((dbg)&&(dbgstage==tmp_size))?"_dbg":"",
2815
                                        nbits+xtrapbits,
2816
                                        nbits+xtracbits+xtrapbits,
2817
                                        obits+xtrapbits,
2818
                                        lgsize, lgtmp-2,
2819
                                        lgdelay(nbits+xtrapbits,xtracbits),
2820
                                        (dropbit)?0:0, tmp_size);
2821
                                fprintf(vmain, "\t\t\t\t\t\tw_s%d, w_e%d, w_e%d, w_s%d%s);\n",
2822
                                        tmp_size<<1, tmp_size<<1,
2823
                                        tmp_size, tmp_size,
2824
                                        ((dbg)&&(dbgstage == tmp_size))
2825
                                                ?", o_dbg":"");
2826
                                fprintf(vmain, "\t%sfftstage_o%d\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_o%d(i_clk, i_rst, i_ce,\n",
2827
                                        (inverse)?"i":"", tmp_size,
2828
                                        nbits+xtrapbits,
2829
                                        nbits+xtracbits+xtrapbits,
2830
                                        obits+xtrapbits,
2831
                                        lgsize, lgtmp-2,
2832
                                        lgdelay(nbits+xtrapbits,xtracbits),
2833
                                        (dropbit)?0:0, tmp_size);
2834
                                fprintf(vmain, "\t\t\t\t\t\tw_s%d, w_o%d, w_o%d, w_os%d);\n",
2835
                                        tmp_size<<1, tmp_size<<1,
2836
                                        tmp_size, tmp_size);
2837
                                fprintf(vmain, "\n\n");
2838
 
2839
                                std::string     fname;
2840
                                char            numstr[12];
2841
 
2842 14 dgisselq
                                fname = coredir + "/";
2843
                                if (inverse) fname += "i";
2844
                                fname += "fftstage_e";
2845
                                sprintf(numstr, "%d", tmp_size);
2846
                                fname += numstr;
2847 26 dgisselq
                                if ((dbg)&&(dbgstage == tmp_size))
2848
                                        fname += "_dbg";
2849 14 dgisselq
                                fname += ".v";
2850 26 dgisselq
                                build_stage(fname.c_str(), coredir.c_str(), tmp_size/2, 0,
2851 22 dgisselq
                                        nbits+xtrapbits, inverse, xtracbits,
2852 26 dgisselq
                                        mpystage, ((dbg)&&(dbgstage == tmp_size)));     // Even stage
2853 2 dgisselq
 
2854 14 dgisselq
                                fname = coredir + "/";
2855
                                if (inverse) fname += "i";
2856
                                fname += "fftstage_o";
2857
                                sprintf(numstr, "%d", tmp_size);
2858
                                fname += numstr;
2859
                                fname += ".v";
2860 26 dgisselq
                                build_stage(fname.c_str(), coredir.c_str(), tmp_size/2, 1,
2861 22 dgisselq
                                        nbits+xtrapbits, inverse, xtracbits,
2862 26 dgisselq
                                        mpystage, false);       // Odd  stage
2863 14 dgisselq
                        }
2864
 
2865
 
2866 2 dgisselq
                        dropbit ^= 1;
2867
                        nbits = obits;
2868
                        tmp_size >>= 1; lgtmp--;
2869
                }
2870
 
2871
                if (tmp_size == 4) {
2872 26 dgisselq
                        obits = nbits+((dropbit)?0:1);
2873 2 dgisselq
 
2874
                        if ((maxbitsout > 0)&&(obits > maxbitsout))
2875
                                obits = maxbitsout;
2876
 
2877
                        fprintf(vmain, "\twire\t\tw_s4, w_os4;\n");
2878 19 dgisselq
                        fprintf(vmain, "\twire\t[%d:0]\tw_e4, w_o4;\n", 2*(obits+xtrapbits)-1);
2879 26 dgisselq
                        fprintf(vmain, "\tqtrstage%s\t#(%d,%d,%d,0,%d,%d)\tstage_e4(i_clk, i_rst, i_ce,\n",
2880
                                ((dbg)&&(dbgstage==4))?"_dbg":"",
2881
                                nbits+xtrapbits, obits+xtrapbits, lgsize,
2882
                                (inverse)?1:0, (dropbit)?0:0);
2883
                        fprintf(vmain, "\t\t\t\t\t\tw_s8, w_e8, w_e4, w_s4%s);\n",
2884
                                ((dbg)&&(dbgstage==4))?", o_dbg":"");
2885 2 dgisselq
                        fprintf(vmain, "\tqtrstage\t#(%d,%d,%d,1,%d,%d)\tstage_o4(i_clk, i_rst, i_ce,\n",
2886 19 dgisselq
                                nbits+xtrapbits, obits+xtrapbits, lgsize, (inverse)?1:0, (dropbit)?0:0);
2887 6 dgisselq
                        fprintf(vmain, "\t\t\t\t\t\tw_s8, w_o8, w_o4, w_os4);\n");
2888 2 dgisselq
                        dropbit ^= 1;
2889
                        nbits = obits;
2890
                        tmp_size >>= 1; lgtmp--;
2891
                }
2892
 
2893
                {
2894 26 dgisselq
                        obits = nbits+((dropbit)?0:1);
2895 2 dgisselq
                        if (obits > nbitsout)
2896
                                obits = nbitsout;
2897
                        if ((maxbitsout>0)&&(obits > maxbitsout))
2898
                                obits = maxbitsout;
2899
                        fprintf(vmain, "\twire\t\tw_s2;\n");
2900
                        fprintf(vmain, "\twire\t[%d:0]\tw_e2, w_o2;\n", 2*obits-1);
2901 28 dgisselq
                        if ((nbits+xtrapbits+1 == obits)&&(!dropbit))
2902
                                printf("WARNING: SCALING OFF BY A FACTOR OF TWO--should\'ve dropped a bit in the last stage.\n");
2903 19 dgisselq
                        fprintf(vmain, "\tdblstage\t#(%d,%d,%d)\tstage_2(i_clk, i_rst, i_ce,\n", nbits+xtrapbits, obits,(dropbit)?0:1);
2904 6 dgisselq
                        fprintf(vmain, "\t\t\t\t\tw_s4, w_e4, w_o4, w_e2, w_o2, w_s2);\n");
2905 2 dgisselq
 
2906
                        fprintf(vmain, "\n\n");
2907
                        nbits = obits;
2908
                }
2909
 
2910
                fprintf(vmain, "\t// Prepare for a (potential) bit-reverse stage.\n");
2911
                fprintf(vmain, "\tassign\tbr_left  = w_e2;\n");
2912
                fprintf(vmain, "\tassign\tbr_right = w_o2;\n");
2913
                fprintf(vmain, "\n");
2914
                if (bitreverse) {
2915
                        fprintf(vmain, "\twire\tbr_start;\n");
2916
                        fprintf(vmain, "\treg\tr_br_started;\n");
2917 25 dgisselq
                        fprintf(vmain, "\tinitial\tr_br_started = 1\'b0;\n");
2918 2 dgisselq
                        fprintf(vmain, "\talways @(posedge i_clk)\n");
2919
                        fprintf(vmain, "\t\tif (i_rst)\n");
2920 26 dgisselq
                        fprintf(vmain, "\t\t\tr_br_started <= 1\'b0;\n");
2921
                        fprintf(vmain, "\t\telse if (i_ce)\n");
2922 23 dgisselq
                        fprintf(vmain, "\t\t\tr_br_started <= r_br_started || w_s2;\n");
2923
                        fprintf(vmain, "\tassign\tbr_start = r_br_started || w_s2;\n");
2924 2 dgisselq
                }
2925
        }
2926
 
2927
        fprintf(vmain, "\n");
2928
        fprintf(vmain, "\t// Now for the bit-reversal stage.\n");
2929
        fprintf(vmain, "\twire\tbr_sync;\n");
2930
        fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_o_left, br_o_right;\n");
2931
        if (bitreverse) {
2932
                fprintf(vmain, "\tdblreverse\t#(%d,%d)\trevstage(i_clk, i_rst,\n", lgsize, nbitsout);
2933
                fprintf(vmain, "\t\t\t(i_ce & br_start), br_left, br_right,\n");
2934
                fprintf(vmain, "\t\t\tbr_o_left, br_o_right, br_sync);\n");
2935
        } else {
2936
                fprintf(vmain, "\tassign\tbr_o_left  = br_left;\n");
2937
                fprintf(vmain, "\tassign\tbr_o_right = br_right;\n");
2938
                fprintf(vmain, "\tassign\tbr_sync    = w_s2;\n");
2939
        }
2940
 
2941
        fprintf(vmain, "\n\n");
2942
        fprintf(vmain, "\t// Last clock: Register our outputs, we\'re done.\n");
2943 26 dgisselq
        fprintf(vmain, "\tinitial\to_sync  = 1\'b0;\n");
2944 2 dgisselq
        fprintf(vmain, "\talways @(posedge i_clk)\n");
2945 26 dgisselq
        fprintf(vmain, "\t\tif (i_rst)\n");
2946
        fprintf(vmain, "\t\t\to_sync  <= 1\'b0;\n");
2947
        fprintf(vmain, "\t\telse if (i_ce)\n");
2948
        fprintf(vmain, "\t\t\to_sync  <= br_sync;\n");
2949
        fprintf(vmain, "\n");
2950
        fprintf(vmain, "\talways @(posedge i_clk)\n");
2951
        fprintf(vmain, "\t\tif (i_ce)\n");
2952 2 dgisselq
        fprintf(vmain, "\t\tbegin\n");
2953
        fprintf(vmain, "\t\t\to_left  <= br_o_left;\n");
2954
        fprintf(vmain, "\t\t\to_right <= br_o_right;\n");
2955
        fprintf(vmain, "\t\tend\n");
2956
        fprintf(vmain, "\n\n");
2957
        fprintf(vmain, "endmodule\n");
2958
        fclose(vmain);
2959
 
2960 14 dgisselq
        {
2961
                std::string     fname;
2962 2 dgisselq
 
2963 14 dgisselq
                fname = coredir + "/butterfly.v";
2964 23 dgisselq
                build_butterfly(fname.c_str(), xtracbits, rounding);
2965 2 dgisselq
 
2966 22 dgisselq
                if (nummpy > 0) {
2967
                        fname = coredir + "/hwbfly.v";
2968 23 dgisselq
                        build_hwbfly(fname.c_str(), xtracbits, rounding);
2969 22 dgisselq
                }
2970
 
2971 29 dgisselq
                {
2972
                        // To make debugging easier, we build both of these
2973
                        fname = coredir + "/shiftaddmpy.v";
2974
                        build_multiply(fname.c_str());
2975 2 dgisselq
 
2976 29 dgisselq
                        fname = coredir + "/longbimpy.v";
2977
                        build_longbimpy(fname.c_str());
2978
                        fname = coredir + "/bimpy.v";
2979
                        build_bimpy(fname.c_str());
2980
                }
2981
 
2982 26 dgisselq
                if ((dbg)&&(dbgstage == 4)) {
2983
                        fname = coredir + "/qtrstage_dbg.v";
2984
                        build_quarters(fname.c_str(), rounding, true);
2985
                }
2986 14 dgisselq
                fname = coredir + "/qtrstage.v";
2987 26 dgisselq
                build_quarters(fname.c_str(), rounding, false);
2988 2 dgisselq
 
2989 26 dgisselq
                if ((dbg)&&(dbgstage == 2))
2990
                        fname = coredir + "/dblstage_dbg.v";
2991
                else
2992
                        fname = coredir + "/dblstage.v";
2993
                build_dblstage(fname.c_str(), rounding, (dbg)&&(dbgstage==2));
2994 14 dgisselq
 
2995
                if (bitreverse) {
2996
                        fname = coredir + "/dblreverse.v";
2997
                        build_dblreverse(fname.c_str());
2998
                }
2999 23 dgisselq
 
3000
                const   char    *rnd_string = "";
3001
                switch(rounding) {
3002
                        case RND_TRUNCATE:      rnd_string = "/truncate.v"; break;
3003
                        case RND_FROMZERO:      rnd_string = "/roundfromzero.v"; break;
3004
                        case RND_HALFUP:        rnd_string = "/roundhalfup.v"; break;
3005
                        default:
3006
                                rnd_string = "/convround.v"; break;
3007
                } fname = coredir + rnd_string;
3008
                switch(rounding) {
3009
                        case RND_TRUNCATE: build_truncator(fname.c_str()); break;
3010
                        case RND_FROMZERO: build_roundfromzero(fname.c_str()); break;
3011
                        case RND_HALFUP: build_roundhalfup(fname.c_str()); break;
3012
                        default:
3013
                                build_convround(fname.c_str()); break;
3014
                }
3015
 
3016 2 dgisselq
        }
3017
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.