OpenCores
URL https://opencores.org/ocsvn/dblclockfft/dblclockfft/trunk

Subversion Repositories dblclockfft

[/] [dblclockfft/] [trunk/] [sw/] [fftgen.cpp] - Blame information for rev 29

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 29 dgisselq
////////////////////////////////////////////////////////////////////////////////
2 16 dgisselq
//
3 24 dgisselq
// Filename:    fftgen.cpp
4 16 dgisselq
//
5
// Project:     A Doubletime Pipelined FFT
6
//
7
// Purpose:     This is the core generator for the project.  Every part
8
//              and piece of this project begins and ends in this program.
9
//              Once built, this program will build an FFT (or IFFT) core
10
//              of arbitrary width, precision, etc., that will run at
11
//              two samples per clock.  (Incidentally, I didn't pick two
12
//              samples per clock because it was easier, but rather because
13
//              there weren't any two-sample per clock FFT's posted on 
14
//              opencores.com.  Further, FFT's running at one sample per
15
//              clock aren't that hard to find.)
16
//
17
//              You can find the documentation for this program in two places.
18
//              One is in the usage() function below.  The second is in the
19
//              'doc'uments directory that comes with this package, 
20
//              specifically in the spec.pdf file.  If it's not there, type
21
//              make in the documents directory to build it.
22
//
23
// Creator:     Dan Gisselquist, Ph.D.
24
//              Gisselquist Tecnology, LLC
25
//
26 29 dgisselq
////////////////////////////////////////////////////////////////////////////////
27 16 dgisselq
//
28
// Copyright (C) 2015, Gisselquist Technology, LLC
29
//
30
// This program is free software (firmware): you can redistribute it and/or
31
// modify it under the terms of  the GNU General Public License as published
32
// by the Free Software Foundation, either version 3 of the License, or (at
33
// your option) any later version.
34
//
35
// This program is distributed in the hope that it will be useful, but WITHOUT
36
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
37
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
38
// for more details.
39
//
40
// You should have received a copy of the GNU General Public License along
41
// with this program.  (It's in the $(ROOT)/doc directory, run make with no
42
// target there if the PDF file isn't present.)  If not, see
43
// <http://www.gnu.org/licenses/> for a copy.
44
//
45
// License:     GPL, v3, as defined and found on www.gnu.org,
46
//              http://www.gnu.org/licenses/gpl.html
47
//
48
//
49 29 dgisselq
////////////////////////////////////////////////////////////////////////////////
50 16 dgisselq
//
51
//
52 2 dgisselq
#include <stdio.h>
53
#include <stdlib.h>
54
#include <unistd.h>
55
#include <sys/stat.h>
56
#include <string.h>
57 14 dgisselq
#include <string>
58 2 dgisselq
#include <math.h>
59
#include <ctype.h>
60
#include <assert.h>
61
 
62 26 dgisselq
#define DEF_NBITSIN     16
63
#define DEF_COREDIR     "fft-core"
64
#define DEF_XTRACBITS   4
65
#define DEF_NMPY        0
66
#define DEF_XTRAPBITS   0
67 29 dgisselq
#define USE_OLD_MULTIPLY        false
68 2 dgisselq
 
69 29 dgisselq
// To coordinate testing, it helps to have some defines in our header file that
70
// are common with the default parameters found within the various subroutines.
71
// We'll define those common parameters here.  These values, however, have no
72
// effect on anything other than bench testing.  They do, though, allow us to
73
// bench test exact copies of what is going on within the FFT when necessary
74
// in order to find problems.
75
// First, parameters for the new multiply based upon the bi-multiply structure
76
// (2-bits/2-tableau rows at a time).
77
#define TST_LONGBIMPY_AW        16
78
#define TST_LONGBIMPY_BW        20      // Leave undefined to match AW
79
 
80
//  We also include parameters for the shift add multiply
81
#define TST_SHIFTADDMPY_AW      16
82
#define TST_SHIFTADDMPY_BW      20      // Leave undefined to match AW
83
 
84
// Now for parameters matching the butterfly
85
#define TST_BUTTERFLY_IWIDTH    16
86
#define TST_BUTTERFLY_CWIDTH    20
87
#define TST_BUTTERFLY_OWIDTH    17
88
 
89
// Now for parameters matching the qtrstage
90
#define TST_QTRSTAGE_IWIDTH     16
91
#define TST_QTRSTAGE_LGWIDTH    8
92
 
93
// Parameters for the dblstage
94
#define TST_DBLSTAGE_IWIDTH     16
95
#define TST_DBLSTAGE_SHIFT      0
96
 
97
// Now for parameters matching the dblreverse stage
98
#define TST_DBLREVERSE_LGSIZE   5
99
 
100 23 dgisselq
typedef enum {
101
        RND_TRUNCATE, RND_FROMZERO, RND_HALFUP, RND_CONVERGENT
102
} ROUND_T;
103
 
104 2 dgisselq
const char      cpyleft[] =
105 29 dgisselq
"////////////////////////////////////////////////////////////////////////////////\n"
106 2 dgisselq
"//\n"
107
"// Copyright (C) 2015, Gisselquist Technology, LLC\n"
108
"//\n"
109
"// This program is free software (firmware): you can redistribute it and/or\n"
110
"// modify it under the terms of  the GNU General Public License as published\n"
111
"// by the Free Software Foundation, either version 3 of the License, or (at\n"
112
"// your option) any later version.\n"
113
"//\n"
114
"// This program is distributed in the hope that it will be useful, but WITHOUT\n"
115
"// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or\n"
116
"// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n"
117
"// for more details.\n"
118
"//\n"
119
"// You should have received a copy of the GNU General Public License along\n"
120 5 dgisselq
"// with this program.  (It's in the $(ROOT)/doc directory, run make with no\n"
121
"// target there if the PDF file isn\'t present.)  If not, see\n"
122
"// <http://www.gnu.org/licenses/> for a copy.\n"
123
"//\n"
124 2 dgisselq
"// License:    GPL, v3, as defined and found on www.gnu.org,\n"
125
"//             http://www.gnu.org/licenses/gpl.html\n"
126
"//\n"
127
"//\n"
128 29 dgisselq
"////////////////////////////////////////////////////////////////////////////////\n";
129 14 dgisselq
const char      prjname[] = "A Doubletime Pipelined FFT";
130 2 dgisselq
const char      creator[] =     "// Creator:    Dan Gisselquist, Ph.D.\n"
131
                                "//             Gisselquist Tecnology, LLC\n";
132
 
133
int     lgval(int vl) {
134
        int     lg;
135
 
136
        for(lg=1; (1<<lg) < vl; lg++)
137
                ;
138
        return lg;
139
}
140
 
141
int     nextlg(int vl) {
142
        int     r;
143
 
144
        for(r=1; r<vl; r<<=1)
145
                ;
146
        return r;
147
}
148
 
149 14 dgisselq
int     bflydelay(int nbits, int xtra) {
150 2 dgisselq
        int     cbits = nbits + xtra;
151 14 dgisselq
        int     delay;
152 29 dgisselq
 
153
        if (USE_OLD_MULTIPLY) {
154
                if (nbits+1<cbits)
155
                        delay = nbits+4;
156
                else
157
                        delay = cbits+3;
158
        } else {
159
                int     na=nbits+2, nb=cbits+1;
160
                if (nb<na) {
161
                        int tmp = nb;
162
                        nb = na; na = tmp;
163
                } delay = ((na)/2+(na&1)+2);
164
        }
165 14 dgisselq
        return delay;
166 2 dgisselq
}
167
 
168 14 dgisselq
int     lgdelay(int nbits, int xtra) {
169
        // The butterfly code needs to compare a valid address, of this
170
        // many bits, with an address two greater.  This guarantees we
171
        // have enough bits for that comparison.  We'll also end up with
172
        // more storage space to look for these values, but without a 
173
        // redesign that's just what we'll deal with.
174
        return lgval(bflydelay(nbits, xtra)+3);
175
}
176
 
177 23 dgisselq
void    build_truncator(const char *fname) {
178
        printf("TRUNCATING!\n");
179 2 dgisselq
        FILE    *fp = fopen(fname, "w");
180
        if (NULL == fp) {
181
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
182
                perror("O/S Err was:");
183
                return;
184
        }
185
 
186
        fprintf(fp,
187
"///////////////////////////////////////////////////////////////////////////\n"
188
"//\n"
189 23 dgisselq
"// Filename:   truncate.v\n"
190
"//             \n"
191
"// Project:    %s\n"
192
"//\n"
193
"// Purpose:    Truncation is one of several options that can be used\n"
194
"//             internal to the various FFT stages to drop bits from one \n"
195
"//             stage to the next.  In general, it is the simplest method\n"
196
"//             of dropping bits, since it requires only a bit selection.\n"
197
"//\n"
198
"//             This form of rounding isn\'t really that great for FFT\'s,\n"
199
"//             since it tends to produce a DC bias in the result.  (Other\n"
200
"//             less pronounced biases may also exist.)\n"
201
"//\n"
202
"//             This particular version also registers the output with the\n"
203
"//             clock, so there will be a delay of one going through this\n"
204
"//             module.  This will keep it in line with the other forms of\n"
205
"//             rounding that can be used.\n"
206
"//\n"
207
"//\n%s"
208
"//\n",
209
                prjname, creator);
210
 
211
        fprintf(fp, "%s", cpyleft);
212
        fprintf(fp,
213
"module truncate(i_clk, i_ce, i_val, o_val);\n"
214
        "\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"
215
        "\tinput\t\t\t\t\ti_clk, i_ce;\n"
216
        "\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"
217
        "\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"
218
"\n"
219
        "\talways @(posedge i_clk)\n"
220
                "\t\tif (i_ce)\n"
221
                "\t\t\to_val <= i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
222
"\n"
223
"endmodule\n");
224
}
225
 
226
 
227
void    build_roundhalfup(const char *fname) {
228
        FILE    *fp = fopen(fname, "w");
229
        if (NULL == fp) {
230
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
231
                perror("O/S Err was:");
232
                return;
233
        }
234
 
235
        fprintf(fp,
236
"///////////////////////////////////////////////////////////////////////////\n"
237
"//\n"
238
"// Filename:   roundhalfup.v\n"
239
"//             \n"
240
"// Project:    %s\n"
241
"//\n"
242
"// Purpose:    Rounding half up is the way I was always taught to round in\n"
243
"//             school.  A one half value is added to the result, and then\n"
244
"//             the result is truncated.  When used in an FFT, this produces\n"
245
"//             less bias than the truncation method, although a bias still\n"
246
"//             tends to remain.\n"
247
"//\n"
248
"//\n%s"
249
"//\n",
250
                prjname, creator);
251
 
252
        fprintf(fp, "%s", cpyleft);
253
        fprintf(fp,
254
"module roundhalfup(i_clk, i_ce, i_val, o_val);\n"
255
        "\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"
256
        "\tinput\t\t\t\t\ti_clk, i_ce;\n"
257
        "\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"
258
        "\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"
259
"\n"
260
        "\t// Let's deal with two cases to be as general as we can be here\n"
261
        "\t//\n"
262
        "\t//   1. The desired output would lose no bits at all\n"
263
        "\t//   2. One or more bits would be dropped, so the rounding is simply\n"
264
        "\t//\t\ta matter of adding one to the bit about to be dropped,\n"
265
        "\t//\t\tmoving all halfway and above numbers up to the next\n"
266
        "\t//\t\tvalue.\n"
267
        "\tgenerate\n"
268
        "\tif (IWID-SHIFT == OWID)\n"
269
        "\tbegin // No truncation or rounding, output drops no bits\n"
270
"\n"
271
                "\t\talways @(posedge i_clk)\n"
272
                        "\t\t\tif (i_ce)\to_val <= i_val[(IWID-SHIFT-1):0];\n"
273
"\n"
274
        "\tend else // if (IWID-SHIFT-1 >= OWID)\n"
275
        "\tbegin // Output drops one bit, can only add one or ... not.\n"
276
                "\t\twire\t[(OWID-1):0] truncated_value, rounded_up;\n"
277
                "\t\twire\t\t\tlast_valid_bit, first_lost_bit;\n"
278
                "\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
279 26 dgisselq
                "\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n"
280 23 dgisselq
                "\t\tassign\tfirst_lost_bit = i_val[(IWID-SHIFT-OWID-1)];\n"
281
"\n"
282
                "\t\talways @(posedge i_clk)\n"
283
                "\t\t\tif (i_ce)\n"
284
                "\t\t\tbegin\n"
285
                        "\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"
286
                        "\t\t\t\t\to_val <= truncated_value;\n"
287
                        "\t\t\t\telse\n"
288
                        "\t\t\t\t\to_val <= rounded_up; // even value\n"
289
                "\t\t\tend\n"
290
"\n"
291
        "\tend\n"
292
        "\tendgenerate\n"
293
"\n"
294
"endmodule\n");
295
}
296
 
297
void    build_roundfromzero(const char *fname) {
298
        FILE    *fp = fopen(fname, "w");
299
        if (NULL == fp) {
300
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
301
                perror("O/S Err was:");
302
                return;
303
        }
304
 
305
        fprintf(fp,
306
"///////////////////////////////////////////////////////////////////////////\n"
307
"//\n"
308
"// Filename:   roundfromzero.v\n"
309
"//             \n"
310
"// Project:    %s\n"
311
"//\n"
312
"// Purpose:    Truncation is one of several options that can be used\n"
313
"//             internal to the various FFT stages to drop bits from one \n"
314
"//             stage to the next.  In general, it is the simplest method\n"
315
"//             of dropping bits, since it requires only a bit selection.\n"
316
"//\n"
317
"//             This form of rounding isn\'t really that great for FFT\'s,\n"
318
"//             since it tends to produce a DC bias in the result.  (Other\n"
319
"//             less pronounced biases may also exist.)\n"
320
"//\n"
321
"//             This particular version also registers the output with the\n"
322
"//             clock, so there will be a delay of one going through this\n"
323
"//             module.  This will keep it in line with the other forms of\n"
324
"//             rounding that can be used.\n"
325
"//\n"
326
"//\n%s"
327
"//\n",
328
                prjname, creator);
329
 
330
        fprintf(fp, "%s", cpyleft);
331
        fprintf(fp,
332
"module convround(i_clk, i_ce, i_val, o_val);\n"
333
        "\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"
334
        "\tinput\t\t\t\t\ti_clk, i_ce;\n"
335
        "\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"
336
        "\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"
337
"\n"
338
        "\t// Let's deal with three cases to be as general as we can be here\n"
339
        "\t//\n"
340
        "\t//\t1. The desired output would lose no bits at all\n"
341
        "\t//\t2. One bit would be dropped, so the rounding is simply\n"
342
        "\t//\t\tadjusting the value to be the closer to zero in\n"
343
        "\t//\t\tcases of being halfway between two.  If identically\n"
344
        "\t//\t\tequal to a number, we just leave it as is.\n"
345
        "\t//\t3. Two or more bits would be dropped.  In this case, we round\n"
346
        "\t//\t\tnormally unless we are rounding a value of exactly\n"
347
        "\t//\t\thalfway between the two.  In the halfway case, we\n"
348
        "\t//\t\tround away from zero.\n"
349
        "\tgenerate\n"
350 28 dgisselq
        "\tif (IWID == OWID) // In this case, the shift is irrelevant and\n"
351
        "\tbegin // cannot be applied.  No truncation or rounding takes\n"
352
        "\t// effect here.\n"
353
"\n"
354
                "\t\talways @(posedge i_clk)\n"
355
                        "\t\t\tif (i_ce)\to_val <= i_val[(IWID-1):0];\n"
356
"\n"
357
        "\tend else if (IWID-SHIFT == OWID)\n"
358 23 dgisselq
        "\tbegin // No truncation or rounding, output drops no bits\n"
359
"\n"
360
                "\t\talways @(posedge i_clk)\n"
361
                        "\t\t\tif (i_ce)\to_val <= i_val[(IWID-SHIFT-1):0];\n"
362
"\n"
363
        "\tend else if (IWID-SHIFT-1 == OWID)\n"
364
        "\tbegin // Output drops one bit, can only add one or ... not.\n"
365
        "\t\twire\t[(OWID-1):0]\ttruncated_value, rounded_up;\n"
366
        "\t\twire\t\t\tsign_bit, first_lost_bit;\n"
367
        "\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
368 26 dgisselq
        "\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n"
369 23 dgisselq
        "\t\tassign\tfirst_lost_bit = i_val[0];\n"
370
        "\t\tassign\tsign_bit = i_val[(IWID-1)];\n"
371
"\n"
372
        "\t\talways @(posedge i_clk)\n"
373
                "\t\t\tif (i_ce)\n"
374
                "\t\t\tbegin\n"
375
                        "\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"
376
                                "\t\t\t\t\to_val <= truncated_value;\n"
377
                        "\t\t\t\telse if (sign_bit)\n"
378
                                "\t\t\t\t\to_val <= truncated_value;\n"
379
                        "\t\t\t\telse\n"
380
                                "\t\t\t\t\to_val <= rounded_up;\n"
381
                "\t\t\tend\n"
382
"\n"
383
        "\tend else // If there's more than one bit we are dropping\n"
384
        "\tbegin\n"
385
                "\t\twire\t[(OWID-1):0]\ttruncated_value, rounded_up;\n"
386
                "\t\twire\t\t\tsign_bit, first_lost_bit;\n"
387
                "\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
388 26 dgisselq
                "\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n"
389 23 dgisselq
                "\t\tassign\tfirst_lost_bit = i_val[(IWID-SHIFT-OWID-1)];\n"
390
                "\t\tassign\tsign_bit = i_val[(IWID-1)];\n"
391
"\n"
392
                "\t\twire\t[(IWID-SHIFT-OWID-2):0]\tother_lost_bits;\n"
393
                "\t\tassign\tother_lost_bits = i_val[(IWID-SHIFT-OWID-2):0];\n"
394
"\n"
395
                "\t\talways @(posedge i_clk)\n"
396
                        "\t\t\tif (i_ce)\n"
397
                        "\t\t\tbegin\n"
398
                        "\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"
399
                                "\t\t\t\t\to_val <= truncated_value;\n"
400
                        "\t\t\t\telse if (|other_lost_bits) // Round up to\n"
401
                                "\t\t\t\t\to_val <= rounded_up; // closest value\n"
402
                        "\t\t\t\telse if (sign_bit)\n"
403
                                "\t\t\t\t\to_val <= truncated_value;\n"
404
                        "\t\t\t\telse\n"
405
                                "\t\t\t\t\to_val <= rounded_up;\n"
406
                        "\t\t\tend\n"
407
        "\tend\n"
408
        "\tendgenerate\n"
409
"\n"
410
"endmodule\n");
411
}
412
 
413
void    build_convround(const char *fname) {
414
        FILE    *fp = fopen(fname, "w");
415
        if (NULL == fp) {
416
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
417
                perror("O/S Err was:");
418
                return;
419
        }
420
 
421
        fprintf(fp,
422
"///////////////////////////////////////////////////////////////////////////\n"
423
"//\n"
424
"// Filename:   convround.v\n"
425
"//             \n"
426
"// Project:    %s\n"
427
"//\n"
428
"// Purpose:    A convergent rounding routine, also known as banker\'s\n"
429
"//             rounding, Dutch rounding, Gaussian rounding, unbiased\n"
430
"//             rounding, or ... more, at least according to Wikipedia.\n"
431
"//\n"
432
"//             This form of rounding works by rounding, when the direction\n"
433
"//             is in question, towards the nearest even value.\n"
434
"//\n"
435
"//\n%s"
436
"//\n",
437
                prjname, creator);
438
 
439
        fprintf(fp, "%s", cpyleft);
440
        fprintf(fp,
441
"module convround(i_clk, i_ce, i_val, o_val);\n"
442
"\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"
443
"\tinput\t\t\t\t\ti_clk, i_ce;\n"
444
"\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"
445
"\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"
446
"\n"
447
"\t// Let's deal with three cases to be as general as we can be here\n"
448
"\t//\n"
449
"\t//\t1. The desired output would lose no bits at all\n"
450
"\t//\t2. One bit would be dropped, so the rounding is simply\n"
451
"\t//\t\tadjusting the value to be the nearest even number in\n"
452
"\t//\t\tcases of being halfway between two.  If identically\n"
453
"\t//\t\tequal to a number, we just leave it as is.\n"
454
"\t//\t3. Two or more bits would be dropped.  In this case, we round\n"
455
"\t//\t\tnormally unless we are rounding a value of exactly\n"
456
"\t//\t\thalfway between the two.  In the halfway case we round\n"
457
"\t//\t\tto the nearest even number.\n"
458
"\tgenerate\n"
459 28 dgisselq
        "\tif (IWID == OWID) // In this case, the shift is irrelevant and\n"
460
        "\tbegin // cannot be applied.  No truncation or rounding takes\n"
461
        "\t// effect here.\n"
462
"\n"
463
                "\t\talways @(posedge i_clk)\n"
464
                        "\t\t\tif (i_ce)\to_val <= i_val[(IWID-1):0];\n"
465
"\n"
466
"\tend else if (IWID-SHIFT == OWID)\n"
467 23 dgisselq
"\tbegin // No truncation or rounding, output drops no bits\n"
468
"\n"
469
"\t\talways @(posedge i_clk)\n"
470
"\t\t\tif (i_ce)\to_val <= i_val[(IWID-SHIFT-1):0];\n"
471
"\n"
472
"\tend else if (IWID-SHIFT-1 == OWID)\n"
473
"\tbegin // Output drops one bit, can only add one or ... not.\n"
474
"\t\twire\t[(OWID-1):0] truncated_value, rounded_up;\n"
475
"\t\twire\t\t\tlast_valid_bit, first_lost_bit;\n"
476
"\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
477 26 dgisselq
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n"
478 23 dgisselq
"\t\tassign\tlast_valid_bit = truncated_value[0];\n"
479
"\t\tassign\tfirst_lost_bit = i_val[0];\n"
480
"\n"
481
"\t\talways @(posedge i_clk)\n"
482
"\t\t\tif (i_ce)\n"
483
"\t\t\tbegin\n"
484
"\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"
485
"\t\t\t\t\to_val <= truncated_value;\n"
486
"\t\t\t\telse if (last_valid_bit)// Round up to nearest\n"
487
"\t\t\t\t\to_val <= rounded_up; // even value\n"
488
"\t\t\t\telse // else round down to the nearest\n"
489
"\t\t\t\t\to_val <= truncated_value; // even value\n"
490
"\t\t\tend\n"
491
"\n"
492
"\tend else // If there's more than one bit we are dropping\n"
493
"\tbegin\n"
494
"\t\twire\t[(OWID-1):0] truncated_value, rounded_up;\n"
495
"\t\twire\t\t\tlast_valid_bit, first_lost_bit;\n"
496
"\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
497 26 dgisselq
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n"
498 23 dgisselq
"\t\tassign\tlast_valid_bit = truncated_value[0];\n"
499
"\t\tassign\tfirst_lost_bit = i_val[(IWID-SHIFT-OWID-1)];\n"
500
"\n"
501
"\t\twire\t[(IWID-SHIFT-OWID-2):0]\tother_lost_bits;\n"
502
"\t\tassign\tother_lost_bits = i_val[(IWID-SHIFT-OWID-2):0];\n"
503
"\n"
504
"\t\talways @(posedge i_clk)\n"
505
"\t\t\tif (i_ce)\n"
506
"\t\t\tbegin\n"
507
"\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"
508
"\t\t\t\t\to_val <= truncated_value;\n"
509
"\t\t\t\telse if (|other_lost_bits) // Round up to\n"
510
"\t\t\t\t\to_val <= rounded_up; // closest value\n"
511
"\t\t\t\telse if (last_valid_bit) // Round up to\n"
512
"\t\t\t\t\to_val <= rounded_up; // nearest even\n"
513
"\t\t\t\telse   // else round down to nearest even\n"
514
"\t\t\t\t\to_val <= truncated_value;\n"
515
"\t\t\tend\n"
516
"\tend\n"
517
"\tendgenerate\n"
518
"\n"
519
"endmodule\n");
520
}
521
 
522 26 dgisselq
void    build_quarters(const char *fname, ROUND_T rounding, bool dbg=false) {
523 23 dgisselq
        FILE    *fp = fopen(fname, "w");
524
        if (NULL == fp) {
525
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
526
                perror("O/S Err was:");
527
                return;
528
        }
529
        const   char    *rnd_string;
530
        if (rounding == RND_TRUNCATE)
531
                rnd_string = "truncate";
532
        else if (rounding == RND_FROMZERO)
533
                rnd_string = "roundfromzero";
534
        else if (rounding == RND_HALFUP)
535
                rnd_string = "roundhalfup";
536
        else
537
                rnd_string = "convround";
538
 
539
 
540
        fprintf(fp,
541
"///////////////////////////////////////////////////////////////////////////\n"
542
"//\n"
543 26 dgisselq
"// Filename:   qtrstage%s.v\n"
544 2 dgisselq
"//             \n"
545
"// Project:    %s\n"
546
"//\n"
547 5 dgisselq
"// Purpose:    This file encapsulates the 4 point stage of a decimation in\n"
548
"//             frequency FFT.  This particular implementation is optimized\n"
549
"//             so that all of the multiplies are accomplished by additions\n"
550
"//             and multiplexers only.\n"
551
"//\n"
552 2 dgisselq
"//\n%s"
553
"//\n",
554 26 dgisselq
                (dbg)?"_dbg":"", prjname, creator);
555 2 dgisselq
        fprintf(fp, "%s", cpyleft);
556
 
557
        fprintf(fp,
558 26 dgisselq
"module\tqtrstage%s(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync%s);\n"
559 29 dgisselq
        "\tparameter    IWIDTH=%d, OWIDTH=IWIDTH+1;\n"
560 5 dgisselq
        "\t// Parameters specific to the core that should be changed when this\n"
561
        "\t// core is built ... Note that the minimum LGSPAN is 2.  Smaller \n"
562
        "\t// spans must use the fftdoubles stage.\n"
563 29 dgisselq
        "\tparameter\tLGWIDTH=%d, ODD=0, INVERSE=0,SHIFT=0;\n"
564 5 dgisselq
        "\tinput\t                              i_clk, i_rst, i_ce, i_sync;\n"
565
        "\tinput\t      [(2*IWIDTH-1):0]        i_data;\n"
566
        "\toutput\treg  [(2*OWIDTH-1):0]        o_data;\n"
567
        "\toutput\treg                          o_sync;\n"
568 29 dgisselq
        "\t\n", (dbg)?"_dbg":"", (dbg)?", o_dbg":"", TST_QTRSTAGE_IWIDTH,
569
        TST_QTRSTAGE_LGWIDTH);
570 26 dgisselq
        if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"
571
                "\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2*OWIDTH-1):(2*OWIDTH-16)],\n"
572
                        "\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n"
573
"\n");
574
        }
575 14 dgisselq
        fprintf(fp,
576 5 dgisselq
        "\treg\t        wait_for_sync;\n"
577 23 dgisselq
        "\treg\t[3:0]   pipeline;\n"
578 2 dgisselq
"\n"
579 5 dgisselq
        "\treg\t[(IWIDTH):0]    sum_r, sum_i, diff_r, diff_i;\n"
580 2 dgisselq
"\n"
581 23 dgisselq
        "\treg\t[(2*OWIDTH-1):0]\tob_a;\n"
582
        "\twire\t[(2*OWIDTH-1):0]\tob_b;\n"
583
        "\treg\t[(OWIDTH-1):0]\t\tob_b_r, ob_b_i;\n"
584
        "\tassign\tob_b = { ob_b_r, ob_b_i };\n"
585 2 dgisselq
"\n"
586 23 dgisselq
        "\treg\t[(LGWIDTH-1):0]\t\tiaddr;\n"
587
        "\treg\t[(2*IWIDTH-1):0]\timem;\n"
588 2 dgisselq
"\n"
589 5 dgisselq
        "\twire\tsigned\t[(IWIDTH-1):0]\timem_r, imem_i;\n"
590
        "\tassign\timem_r = imem[(2*IWIDTH-1):(IWIDTH)];\n"
591
        "\tassign\timem_i = imem[(IWIDTH-1):0];\n"
592 2 dgisselq
"\n"
593 5 dgisselq
        "\twire\tsigned\t[(IWIDTH-1):0]\ti_data_r, i_data_i;\n"
594
        "\tassign\ti_data_r = i_data[(2*IWIDTH-1):(IWIDTH)];\n"
595
        "\tassign\ti_data_i = i_data[(IWIDTH-1):0];\n"
596 2 dgisselq
"\n"
597 5 dgisselq
        "\treg  [(2*OWIDTH-1):0]        omem;\n"
598 14 dgisselq
"\n");
599
        fprintf(fp,
600 23 dgisselq
        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_sum_r, rnd_sum_i, rnd_diff_r, rnd_diff_i,\n");
601
        fprintf(fp,
602
        "\t\t\t\t\tn_rnd_diff_r, n_rnd_diff_i;\n");
603
        fprintf(fp,
604 26 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_r(i_clk, i_ce,\n"
605 23 dgisselq
        "\t\t\t\tsum_r, rnd_sum_r);\n\n", rnd_string);
606
        fprintf(fp,
607 26 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_i(i_clk, i_ce,\n"
608 23 dgisselq
        "\t\t\t\tsum_i, rnd_sum_i);\n\n", rnd_string);
609
        fprintf(fp,
610 26 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_r(i_clk, i_ce,\n"
611 23 dgisselq
        "\t\t\t\tdiff_r, rnd_diff_r);\n\n", rnd_string);
612
        fprintf(fp,
613 26 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_i(i_clk, i_ce,\n"
614 23 dgisselq
        "\t\t\t\tdiff_i, rnd_diff_i);\n\n", rnd_string);
615
        fprintf(fp, "\tassign n_rnd_diff_r = - rnd_diff_r;\n"
616
                "\tassign n_rnd_diff_i = - rnd_diff_i;\n");
617
/*
618
        fprintf(fp,
619 5 dgisselq
        "\twire [(IWIDTH-1):0]  rnd;\n"
620 9 dgisselq
        "\tgenerate\n"
621
        "\tif ((ROUND)&&((IWIDTH+1-OWIDTH-SHIFT)>0))\n"
622 26 dgisselq
                "\t\tassign rnd = { {(IWIDTH-1){1\'b0}}, 1\'b1 };\n"
623 9 dgisselq
        "\telse\n"
624 26 dgisselq
                "\t\tassign rnd = { {(IWIDTH){1\'b0}}};\n"
625 9 dgisselq
        "\tendgenerate\n"
626 2 dgisselq
"\n"
627 23 dgisselq
*/
628
        fprintf(fp,
629 25 dgisselq
        "\tinitial wait_for_sync = 1\'b1;\n"
630
        "\tinitial iaddr = 0;\n"
631 5 dgisselq
        "\talways @(posedge i_clk)\n"
632
                "\t\tif (i_rst)\n"
633
                "\t\tbegin\n"
634 26 dgisselq
                        "\t\t\twait_for_sync <= 1\'b1;\n"
635 5 dgisselq
                        "\t\t\tiaddr <= 0;\n"
636 23 dgisselq
                "\t\tend else if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"
637 5 dgisselq
                "\t\tbegin\n"
638 26 dgisselq
                        "\t\t\tiaddr <= iaddr + { {(LGWIDTH-1){1\'b0}}, 1\'b1 };\n"
639
                        "\t\t\twait_for_sync <= 1\'b0;\n"
640
                "\t\tend\n"
641
        "\talways @(posedge i_clk)\n"
642
                "\t\tif (i_ce)\n"
643 5 dgisselq
                        "\t\t\timem <= i_data;\n"
644 26 dgisselq
                "\n\n");
645 23 dgisselq
        fprintf(fp,
646
        "\t// Note that we don\'t check on wait_for_sync or i_sync here.\n"
647
        "\t// Why not?  Because iaddr will always be zero until after the\n"
648
        "\t// first i_ce, so we are safe.\n"
649 25 dgisselq
        "\tinitial pipeline = 4\'h0;\n"
650 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
651
                "\t\tif (i_rst)\n"
652 26 dgisselq
                        "\t\t\tpipeline <= 4\'h0;\n"
653 23 dgisselq
                "\t\telse if (i_ce) // is our pipeline process full?  Which stages?\n"
654
                        "\t\t\tpipeline <= { pipeline[2:0], iaddr[0] };\n\n");
655
        fprintf(fp,
656
        "\t// This is the pipeline[-1] stage, pipeline[0] will be set next.\n"
657
        "\talways\t@(posedge i_clk)\n"
658
                "\t\tif ((i_ce)&&(iaddr[0]))\n"
659
                "\t\tbegin\n"
660
                        "\t\t\tsum_r  <= imem_r + i_data_r;\n"
661
                        "\t\t\tsum_i  <= imem_i + i_data_i;\n"
662
                        "\t\t\tdiff_r <= imem_r - i_data_r;\n"
663
                        "\t\t\tdiff_i <= imem_i - i_data_i;\n"
664
                "\t\tend\n\n");
665
        fprintf(fp,
666
        "\t// pipeline[1] takes sum_x and diff_x and produces rnd_x\n\n");
667
        fprintf(fp,
668 26 dgisselq
        "\t// Now for pipeline[2].  We can actually do this at all i_ce\n"
669
        "\t// clock times, since nothing will listen unless pipeline[3]\n"
670
        "\t// on the next clock.  Thus, we simplify this logic and do\n"
671
        "\t// it independent of pipeline[2].\n"
672 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
673 26 dgisselq
                "\t\tif (i_ce)\n"
674 23 dgisselq
                "\t\tbegin\n"
675
                        "\t\t\tob_a <= { rnd_sum_r, rnd_sum_i };\n"
676
                        "\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"
677
                        "\t\t\tif (ODD == 0)\n"
678 5 dgisselq
                        "\t\t\tbegin\n"
679 23 dgisselq
                        "\t\t\t\tob_b_r <= rnd_diff_r;\n"
680
                        "\t\t\t\tob_b_i <= rnd_diff_i;\n"
681
                        "\t\t\tend else if (INVERSE==0) begin\n"
682
                        "\t\t\t\t// on Odd, W = e^{-j2pi 1/4} = -j\n"
683
                        "\t\t\t\tob_b_r <=   rnd_diff_i;\n"
684
                        "\t\t\t\tob_b_i <= n_rnd_diff_r;\n"
685
                        "\t\t\tend else begin\n"
686
                        "\t\t\t\t// on Odd, W = e^{j2pi 1/4} = j\n"
687
                        "\t\t\t\tob_b_r <= n_rnd_diff_i;\n"
688
                        "\t\t\t\tob_b_i <=   rnd_diff_r;\n"
689 5 dgisselq
                        "\t\t\tend\n"
690 23 dgisselq
                "\t\tend\n\n");
691
        fprintf(fp,
692
        "\talways\t@(posedge i_clk)\n"
693
                "\t\tif (i_ce)\n"
694
                "\t\tbegin // In sequence, clock = 3\n"
695
                        "\t\t\tif (pipeline[3])\n"
696 5 dgisselq
                        "\t\t\tbegin\n"
697
                                "\t\t\t\tomem <= ob_b;\n"
698
                                "\t\t\t\to_data <= ob_a;\n"
699
                        "\t\t\tend else\n"
700
                                "\t\t\t\to_data <= omem;\n"
701 23 dgisselq
                "\t\tend\n\n");
702
 
703
        fprintf(fp,
704
        "\t// Don\'t forget in the sync check that we are running\n"
705
        "\t// at two clocks per sample.  Thus we need to\n"
706
        "\t// produce a sync every 2^(LGWIDTH-1) clocks.\n"
707 26 dgisselq
        "\tinitial\to_sync = 1\'b0;\n"
708 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
709 26 dgisselq
                "\t\tif (i_rst)\n"
710
                "\t\t\to_sync <= 1\'b0;\n"
711
                "\t\telse if (i_ce)\n"
712 23 dgisselq
                        "\t\t\to_sync <= &(~iaddr[(LGWIDTH-2):3]) && (iaddr[2:0] == 3'b101);\n");
713
        fprintf(fp, "endmodule\n");
714 2 dgisselq
}
715
 
716 26 dgisselq
void    build_dblstage(const char *fname, ROUND_T rounding, const bool dbg = false) {
717 2 dgisselq
        FILE    *fp = fopen(fname, "w");
718
        if (NULL == fp) {
719
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
720
                perror("O/S Err was:");
721
                return;
722
        }
723
 
724 23 dgisselq
        const   char    *rnd_string;
725
        if (rounding == RND_TRUNCATE)
726
                rnd_string = "truncate";
727
        else if (rounding == RND_FROMZERO)
728
                rnd_string = "roundfromzero";
729
        else if (rounding == RND_HALFUP)
730
                rnd_string = "roundhalfup";
731
        else
732
                rnd_string = "convround";
733
 
734
 
735 2 dgisselq
        fprintf(fp,
736
"///////////////////////////////////////////////////////////////////////////\n"
737
"//\n"
738 26 dgisselq
"// Filename:   dblstage%s.v\n"
739 2 dgisselq
"//\n"
740
"// Project:    %s\n"
741
"//\n"
742
"// Purpose:    This is part of an FPGA implementation that will process\n"
743 5 dgisselq
"//             the final stage of a decimate-in-frequency FFT, running\n"
744
"//             through the data at two samples per clock.  If you notice\n"
745
"//             from the derivation of an FFT, the only time both even and\n"
746
"//             odd samples are used at the same time is in this stage.\n"
747
"//             Therefore, other than this stage and these twiddles, all of\n"
748
"//             the other stages can run two stages at a time at one sample\n"
749
"//             per clock.\n"
750 2 dgisselq
"//\n"
751
"//             In this implementation, the output is valid one clock after\n"
752
"//             the input is valid.  The output also accumulates one bit\n"
753
"//             above and beyond the number of bits in the input.\n"
754
"//             \n"
755
"//             i_clk   A system clock\n"
756 6 dgisselq
"//             i_rst   A synchronous reset\n"
757 2 dgisselq
"//             i_ce    Circuit enable--nothing happens unless this line is high\n"
758 6 dgisselq
"//             i_sync  A synchronization signal, high once per FFT at the start\n"
759 2 dgisselq
"//             i_left  The first (even) complex sample input.  The higher order\n"
760
"//                     bits contain the real portion, low order bits the\n"
761
"//                     imaginary portion, all in two\'s complement.\n"
762
"//             i_right The next (odd) complex sample input, same format as\n"
763
"//                     i_left.\n"
764
"//             o_left  The first (even) complex output.\n"
765
"//             o_right The next (odd) complex output.\n"
766 6 dgisselq
"//             o_sync  Output synchronization signal.\n"
767 2 dgisselq
"//\n%s"
768 26 dgisselq
"//\n", (dbg)?"_dbg":"", prjname, creator);
769 2 dgisselq
 
770
        fprintf(fp, "%s", cpyleft);
771
        fprintf(fp,
772 26 dgisselq
"module\tdblstage%s(i_clk, i_rst, i_ce, i_sync, i_left, i_right, o_left, o_right, o_sync%s);\n"
773 29 dgisselq
        "\tparameter\tIWIDTH=%d,OWIDTH=IWIDTH+1, SHIFT=%d;\n"
774 6 dgisselq
        "\tinput\t\ti_clk, i_rst, i_ce, i_sync;\n"
775 5 dgisselq
        "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"
776 28 dgisselq
        "\toutput\treg\t[(2*OWIDTH-1):0]\to_left, o_right;\n"
777 6 dgisselq
        "\toutput\treg\t\t\to_sync;\n"
778 29 dgisselq
        "\n", (dbg)?"_dbg":"", (dbg)?", o_dbg":"",
779
        TST_DBLSTAGE_IWIDTH, TST_DBLSTAGE_SHIFT);
780 26 dgisselq
 
781
        if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"
782
                "\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_left[(2*OWIDTH-1):(2*OWIDTH-16)],\n"
783
                        "\t\t\t\t\to_left[(OWIDTH-1):(OWIDTH-16)] };\n"
784
"\n");
785
        }
786 19 dgisselq
        fprintf(fp,
787 5 dgisselq
        "\twire\tsigned\t[(IWIDTH-1):0]\ti_in_0r, i_in_0i, i_in_1r, i_in_1i;\n"
788
        "\tassign\ti_in_0r = i_left[(2*IWIDTH-1):(IWIDTH)]; \n"
789
        "\tassign\ti_in_0i = i_left[(IWIDTH-1):0]; \n"
790
        "\tassign\ti_in_1r = i_right[(2*IWIDTH-1):(IWIDTH)]; \n"
791
        "\tassign\ti_in_1i = i_right[(IWIDTH-1):0]; \n"
792
        "\twire\t[(OWIDTH-1):0]\t\to_out_0r, o_out_0i,\n"
793
                                "\t\t\t\t\to_out_1r, o_out_1i;\n"
794 2 dgisselq
"\n"
795 15 dgisselq
"\n"
796 19 dgisselq
        "\t// Handle a potential rounding situation, when IWIDTH>=OWIDTH.\n"
797 15 dgisselq
"\n"
798 23 dgisselq
"\n");
799
        fprintf(fp,
800 26 dgisselq
        "\n"
801
        "\t// As with any register connected to the sync pulse, these must\n"
802
        "\t// have initial values and be reset on the i_rst signal.\n"
803
        "\t// Other data values need only restrict their updates to i_ce\n"
804
        "\t// enabled clocks, but sync\'s must obey resets and initial\n"
805
        "\t// conditions as well.\n"
806 28 dgisselq
        "\treg\trnd_sync, r_sync;\n"
807 2 dgisselq
"\n"
808 28 dgisselq
        "\tinitial\trnd_sync      = 1\'b0; // Sync into rounding\n"
809
        "\tinitial\tr_sync        = 1\'b0; // Sync coming out\n"
810 5 dgisselq
        "\talways @(posedge i_clk)\n"
811 6 dgisselq
                "\t\tif (i_rst)\n"
812 23 dgisselq
                "\t\tbegin\n"
813 26 dgisselq
                        "\t\t\trnd_sync <= 1\'b0;\n"
814 28 dgisselq
                        "\t\t\tr_sync <= 1\'b0;\n"
815
                "\t\tend else if (i_ce)\n"
816 5 dgisselq
                "\t\tbegin\n"
817 26 dgisselq
                        "\t\t\trnd_sync <= i_sync;\n"
818 28 dgisselq
                        "\t\t\tr_sync <= rnd_sync;\n"
819 26 dgisselq
                "\t\tend\n"
820
"\n"
821
        "\t// As with other variables, these are really only updated when in\n"
822
        "\t// the processing pipeline, after the first i_sync.  However, to\n"
823
        "\t// eliminate as much unnecessary logic as possible, we toggle\n"
824 28 dgisselq
        "\t// these any time the i_ce line is enabled, and don\'t reset.\n"
825
        "\t// them on i_rst.\n");
826
        fprintf(fp,
827
        "\t// Don't forget that we accumulate a bit by adding two values\n"
828
        "\t// together. Therefore our intermediate value must have one more\n"
829
        "\t// bit than the two originals.\n"
830
        "\treg\tsigned\t[(IWIDTH):0]\trnd_in_0r, rnd_in_0i;\n"
831
        "\treg\tsigned\t[(IWIDTH):0]\trnd_in_1r, rnd_in_1i;\n\n"
832 26 dgisselq
        "\talways @(posedge i_clk)\n"
833
                "\t\tif (i_ce)\n"
834
                "\t\tbegin\n"
835
                        "\t\t\t//\n"
836 23 dgisselq
                        "\t\t\trnd_in_0r <= i_in_0r + i_in_1r;\n"
837
                        "\t\t\trnd_in_0i <= i_in_0i + i_in_1i;\n"
838 5 dgisselq
                        "\t\t\t//\n"
839 23 dgisselq
                        "\t\t\trnd_in_1r <= i_in_0r - i_in_1r;\n"
840
                        "\t\t\trnd_in_1i <= i_in_0i - i_in_1i;\n"
841 6 dgisselq
                        "\t\t\t//\n"
842 5 dgisselq
                "\t\tend\n"
843 28 dgisselq
"\n");
844
        fprintf(fp,
845
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_0r(i_clk, i_ce,\n"
846
        "\t\t\t\t\t\t\trnd_in_0r, o_out_0r);\n\n", rnd_string);
847
        fprintf(fp,
848
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_0i(i_clk, i_ce,\n"
849
        "\t\t\t\t\t\t\trnd_in_0i, o_out_0i);\n\n", rnd_string);
850
        fprintf(fp,
851
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_1r(i_clk, i_ce,\n"
852
        "\t\t\t\t\t\t\trnd_in_1r, o_out_1r);\n\n", rnd_string);
853
        fprintf(fp,
854
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_1i(i_clk, i_ce,\n"
855
        "\t\t\t\t\t\t\trnd_in_1i, o_out_1i);\n\n", rnd_string);
856
 
857
        fprintf(fp, "\n"
858
        "\t// Prior versions of this routine did not include the extra\n"
859
        "\t// clock and register/flip-flops that this routine requires.\n"
860
        "\t// These are placed in here to correct a bug in Verilator, that\n"
861
        "\t// otherwise struggles.  (Hopefully this will fix the problem ...)\n"
862
        "\talways @(posedge i_clk)\n"
863
                "\t\tif (i_ce)\n"
864
                "\t\tbegin\n"
865
                        "\t\t\to_left  <= { o_out_0r, o_out_0i };\n"
866
                        "\t\t\to_right <= { o_out_1r, o_out_1i };\n"
867
                "\t\tend\n"
868 2 dgisselq
"\n"
869 28 dgisselq
        "\tinitial\to_sync = 1'b0; // Final sync coming out of module\n"
870
        "\talways @(posedge i_clk)\n"
871
                "\t\tif (i_rst)\n"
872
                "\t\t\to_sync <= 1'b0;\n"
873
                "\t\telse if (i_ce)\n"
874
                "\t\t\to_sync <= r_sync;\n"
875 2 dgisselq
"\n"
876
"endmodule\n");
877
        fclose(fp);
878
}
879
 
880
void    build_multiply(const char *fname) {
881
        FILE    *fp = fopen(fname, "w");
882
        if (NULL == fp) {
883
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
884
                perror("O/S Err was:");
885
                return;
886
        }
887
 
888
        fprintf(fp,
889
"///////////////////////////////////////////////////////////////////////////\n"
890
"//\n"
891
"// Filename:   shiftaddmpy.v\n"
892
"//\n"
893
"// Project:    %s\n"
894
"//\n"
895
"// Purpose:    A portable shift and add multiply.\n"
896
"//\n"
897
"//             While both Xilinx and Altera will offer single clock \n"
898
"//             multiplies, this simple approach will multiply two numbers\n"
899
"//             on any architecture.  The result maintains the full width\n"
900
"//             of the multiply, there are no extra stuff bits, no rounding,\n"
901
"//             no shifted bits, etc.\n"
902
"//\n"
903
"//             Further, for those applications that can support it, this\n"
904
"//             multiply is pipelined and will produce one answer per clock.\n"
905
"//\n"
906
"//             For minimal processing delay, make the first parameter\n"
907
"//             the one with the least bits, so that AWIDTH <= BWIDTH.\n"
908
"//\n"
909
"//             The processing delay in this multiply is (AWIDTH+1) cycles.\n"
910
"//             That is, if the data is present on the input at clock t=0,\n"
911
"//             the result will be present on the output at time t=AWIDTH+1;\n"
912
"//\n"
913
"//\n%s"
914
"//\n", prjname, creator);
915
 
916
        fprintf(fp, "%s", cpyleft);
917
        fprintf(fp,
918
"module shiftaddmpy(i_clk, i_ce, i_a, i_b, o_r);\n"
919 29 dgisselq
        "\tparameter\tAWIDTH=%d,BWIDTH=", TST_SHIFTADDMPY_AW);
920
#ifdef  TST_SHIFTADDMPY_BW
921
        fprintf(fp, "%d;\n", TST_SHIFTADDMPY_BW);
922
#else
923
        fprintf(fp, "AWIDTH;\n");
924
#endif
925
        fprintf(fp,
926 2 dgisselq
        "\tinput\t\t\t\t\ti_clk, i_ce;\n"
927
        "\tinput\t\t[(AWIDTH-1):0]\t\ti_a;\n"
928
        "\tinput\t\t[(BWIDTH-1):0]\t\ti_b;\n"
929
        "\toutput\treg\t[(AWIDTH+BWIDTH-1):0]\to_r;\n"
930
"\n"
931
        "\treg\t[(AWIDTH-1):0]\tu_a;\n"
932
        "\treg\t[(BWIDTH-1):0]\tu_b;\n"
933
        "\treg\t\t\tsgn;\n"
934
"\n"
935
        "\treg\t[(AWIDTH-2):0]\t\tr_a[0:(AWIDTH-1)];\n"
936
        "\treg\t[(AWIDTH+BWIDTH-2):0]\tr_b[0:(AWIDTH-1)];\n"
937
        "\treg\t\t\t\tr_s[0:(AWIDTH-1)];\n"
938
        "\treg\t[(AWIDTH+BWIDTH-1):0]\tacc[0:(AWIDTH-1)];\n"
939
        "\tgenvar k;\n"
940
"\n"
941 5 dgisselq
        "\t// If we were forced to stay within two\'s complement arithmetic,\n"
942
        "\t// taking the absolute value here would require an additional bit.\n"
943
        "\t// However, because our results are now unsigned, we can stay\n"
944
        "\t// within the number of bits given (for now).\n"
945 2 dgisselq
        "\talways @(posedge i_clk)\n"
946
                "\t\tif (i_ce)\n"
947
                "\t\tbegin\n"
948
                        "\t\t\tu_a <= (i_a[AWIDTH-1])?(-i_a):(i_a);\n"
949
                        "\t\t\tu_b <= (i_b[BWIDTH-1])?(-i_b):(i_b);\n"
950
                        "\t\t\tsgn <= i_a[AWIDTH-1] ^ i_b[BWIDTH-1];\n"
951
                "\t\tend\n"
952
"\n"
953
        "\talways @(posedge i_clk)\n"
954
                "\t\tif (i_ce)\n"
955
                "\t\tbegin\n"
956 26 dgisselq
                        "\t\t\tacc[0] <= (u_a[0]) ? { {(AWIDTH){1\'b0}}, u_b }\n"
957
                        "\t\t\t\t\t: {(AWIDTH+BWIDTH){1\'b0}};\n"
958 2 dgisselq
                        "\t\t\tr_a[0] <= { u_a[(AWIDTH-1):1] };\n"
959 26 dgisselq
                        "\t\t\tr_b[0] <= { {(AWIDTH-1){1\'b0}}, u_b };\n"
960 2 dgisselq
                        "\t\t\tr_s[0] <= sgn; // The final sign, needs to be preserved\n"
961
                "\t\tend\n"
962
"\n"
963
        "\tgenerate\n"
964 21 dgisselq
        "\tfor(k=0; k<AWIDTH-1; k=k+1)\n"
965 25 dgisselq
        "\tbegin : genstages\n"
966 21 dgisselq
                "\t\talways @(posedge i_clk)\n"
967
                "\t\tif (i_ce)\n"
968 2 dgisselq
                "\t\tbegin\n"
969 26 dgisselq
                        "\t\t\tacc[k+1] <= acc[k] + ((r_a[k][0]) ? {r_b[k],1\'b0}:0);\n"
970
                        "\t\t\tr_a[k+1] <= { 1\'b0, r_a[k][(AWIDTH-2):1] };\n"
971
                        "\t\t\tr_b[k+1] <= { r_b[k][(AWIDTH+BWIDTH-3):0], 1\'b0};\n"
972 2 dgisselq
                        "\t\t\tr_s[k+1] <= r_s[k];\n"
973
                "\t\tend\n"
974
        "\tend\n"
975
        "\tendgenerate\n"
976
"\n"
977
        "\talways @(posedge i_clk)\n"
978
                "\t\tif (i_ce)\n"
979
                        "\t\t\to_r <= (r_s[AWIDTH-1]) ? (-acc[AWIDTH-1]) : acc[AWIDTH-1];\n"
980
"\n"
981
"endmodule\n");
982
 
983
        fclose(fp);
984
}
985
 
986 29 dgisselq
void    build_bimpy(const char *fname) {
987
        FILE    *fp = fopen(fname, "w");
988
        if (NULL == fp) {
989
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
990
                perror("O/S Err was:");
991
                return;
992
        }
993
 
994
        fprintf(fp,
995
"////////////////////////////////////////////////////////////////////////////////\n"
996
"//\n"
997
"// Filename:   %s\n"
998
"//\n"
999
"// Project:    %s\n"
1000
"//\n"
1001
"// Purpose:    A simple 2-bit multiply based upon the fact that LUT's allow\n"
1002
"//             6-bits of input.  In other words, I could build a 3-bit\n"
1003
"//             multiply from 6 LUTs (5 actually, since the first could have\n"
1004
"//             two outputs).  This would allow multiplication of three bit\n"
1005
"//             digits, save only for the fact that you would need two bits\n"
1006
"//             of carry.  The bimpy approach throttles back a bit and does\n"
1007
"//             a 2x2 bit multiply in a LUT, guaranteeing that it will never\n"
1008
"//             carry more than one bit.  While this multiply is hardware\n"
1009
"//             independent (and can still run under Verilator therefore),\n"
1010
"//             it is really motivated by trying to optimize for a specific\n"
1011
"//             piece of hardware (Xilinx-7 series ...) that has at least\n"
1012
"//             4-input LUT's with carry chains.\n"
1013
"//\n"
1014
"//\n"
1015
"//\n%s"
1016
"//\n", fname, prjname, creator);
1017
 
1018
        fprintf(fp, "%s", cpyleft);
1019
        fprintf(fp,
1020
"module bimpy(i_clk, i_ce, i_a, i_b, o_r);\n"
1021
"\tparameter\tBW=18, // Number of bits in i_b\n"
1022
"\t\t\tLUTB=2; // Number of bits in i_a for our LUT multiply\n"
1023
"\tinput\t\t\t\ti_clk, i_ce;\n"
1024
"\tinput\t\t[(LUTB-1):0]\ti_a;\n"
1025
"\tinput\t\t[(BW-1):0]\ti_b;\n"
1026
"\toutput\treg\t[(BW+LUTB-1):0] o_r;\n"
1027
"\n"
1028
"\twire [(BW+LUTB-2):0] w_r;\n"
1029
"\twire [(BW+LUTB-3):1] c;\n"
1030
"\n"
1031
"\tassign\tw_r =  { ((i_a[1])?i_b:{(BW){1'b0}}), 1'b0 }\n"
1032
"\t\t\t\t^ { 1'b0, ((i_a[0])?i_b:{(BW){1'b0}}) };\n"
1033
"\tassign\tc = { ((i_a[1])?i_b[(BW-2):0]:{(BW-1){1'b0}}) }\n"
1034
"\t\t\t& ((i_a[0])?i_b[(BW-1):1]:{(BW-1){1'b0}});\n"
1035
"\n"
1036
"\talways @(posedge i_clk)\n"
1037
"\t\tif (i_ce)\n"
1038
"\t\t\to_r <= w_r + { c, 2'b0 };\n"
1039
"\n"
1040
"endmodule\n");
1041
 
1042
        fclose(fp);
1043
}
1044
 
1045
void    build_longbimpy(const char *fname) {
1046
        FILE    *fp = fopen(fname, "w");
1047
        if (NULL == fp) {
1048
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
1049
                perror("O/S Err was:");
1050
                return;
1051
        }
1052
 
1053
        fprintf(fp,
1054
"////////////////////////////////////////////////////////////////////////////////\n"
1055
"//\n"
1056
"// Filename:   %s\n"
1057
"//\n"
1058
"// Project:    %s\n"
1059
"//\n"
1060
"// Purpose:    A portable shift and add multiply, built with the knowledge\n"
1061
"//             of the existence of a six bit LUT and carry chain.  That\n"
1062
"//             knowledge allows us to multiply two bits from one value\n"
1063
"//             at a time against all of the bits of the other value.  This\n"
1064
"//             sub multiply is called the bimpy.\n"
1065
"//\n"
1066
"//             For minimal processing delay, make the first parameter\n"
1067
"//             the one with the least bits, so that AWIDTH <= BWIDTH.\n"
1068
"//\n"
1069
"//\n"
1070
"//\n%s"
1071
"//\n", fname, prjname, creator);
1072
 
1073
        fprintf(fp, "%s", cpyleft);
1074
        fprintf(fp,
1075
"module longbimpy(i_clk, i_ce, i_a, i_b, o_r);\n"
1076
        "\tparameter    AW=%d,  // The width of i_a, min width is 5\n"
1077
                        "\t\t\tBW=", TST_LONGBIMPY_AW);
1078
#ifdef  TST_LONGBIMPY_BW
1079
        fprintf(fp, "%d", TST_LONGBIMPY_BW);
1080
#else
1081
        fprintf(fp, "AW");
1082
#endif
1083
 
1084
        fprintf(fp, ",  // The width of i_b, can be anything\n"
1085
                        "\t\t\t// The following three parameters should not be changed\n"
1086
                        "\t\t\t// by any implementation, but are based upon hardware\n"
1087
                        "\t\t\t// and the above values:\n"
1088
                        "\t\t\tOW=AW+BW,        // The output width\n"
1089
                        "\t\t\tIW=(AW+1)&(-2),  // Internal width of A\n"
1090
                        "\t\t\tLUTB=2,  // How many bits we can multiply by at once\n"
1091
                        "\t\t\tTLEN=(AW+(LUTB-1))/LUTB; // Nmbr of rows in our tableau\n"
1092
        "\tinput\t\t\t\ti_clk, i_ce;\n"
1093
        "\tinput\t\t[(AW-1):0]\ti_a;\n"
1094
        "\tinput\t\t[(BW-1):0]\ti_b;\n"
1095
        "\toutput\treg\t[(AW+BW-1):0]\to_r;\n"
1096
"\n"
1097
        "\treg\t[(IW-1):0]\tu_a;\n"
1098
        "\treg\t[(BW-1):0]\tu_b;\n"
1099
        "\treg\t\t\tsgn;\n"
1100
"\n"
1101
        "\treg\t[(IW-1-2*(LUTB)):0]\tr_a[0:(TLEN-3)];\n"
1102
        "\treg\t[(BW-1):0]\t\tr_b[0:(TLEN-3)];\n"
1103
        "\treg\t[(TLEN-1):0]\t\tr_s;\n"
1104
        "\treg\t[(IW+BW-1):0]\t\tacc[0:(TLEN-2)];\n"
1105
        "\tgenvar k;\n"
1106
"\n"
1107
        "\t// First step:\n"
1108
        "\t// Switch to unsigned arithmetic for our multiply, keeping track\n"
1109
        "\t// of the along the way.  We'll then add the sign again later at\n"
1110
        "\t// the end.\n"
1111
        "\t//\n"
1112
        "\t// If we were forced to stay within two's complement arithmetic,\n"
1113
        "\t// taking the absolute value here would require an additional bit.\n"
1114
        "\t// However, because our results are now unsigned, we can stay\n"
1115
        "\t// within the number of bits given (for now).\n"
1116
        "\tgenerate if (IW > AW)\n"
1117
        "\tbegin\n"
1118
                "\t\talways @(posedge i_clk)\n"
1119
                        "\t\t\tif (i_ce)\n"
1120
                        "\t\t\t\tu_a <= { 1'b0, (i_a[AW-1])?(-i_a):(i_a) };\n"
1121
        "\tend else begin\n"
1122
                "\t\talways @(posedge i_clk)\n"
1123
                        "\t\t\tif (i_ce)\n"
1124
                        "\t\t\t\tu_a <= (i_a[AW-1])?(-i_a):(i_a);\n"
1125
        "\tend endgenerate\n"
1126
"\n"
1127
        "\talways @(posedge i_clk)\n"
1128
                "\t\tif (i_ce)\n"
1129
                "\t\tbegin\n"
1130
                        "\t\t\tu_b <= (i_b[BW-1])?(-i_b):(i_b);\n"
1131
                        "\t\t\tsgn <= i_a[AW-1] ^ i_b[BW-1];\n"
1132
                "\t\tend\n"
1133
"\n"
1134
        "\twire [(BW+LUTB-1):0] pr_a, pr_b;\n"
1135
"\n"
1136
        "\t//\n"
1137
        "\t// Second step: First two 2xN products.\n"
1138
        "\t//\n"
1139
        "\t// Since we have no tableau of additions (yet), we can do both\n"
1140
        "\t// of the first two rows at the same time and add them together.\n"
1141
        "\t// For the next round, we'll then have a previous sum to accumulate\n"
1142
        "\t// with new and subsequent product, and so only do one product at\n"
1143
        "\t// a time can follow this--but the first clock can do two at a time.\n"
1144
        "\tbimpy\t#(BW) lmpy_0(i_clk,i_ce,u_a[(  LUTB-1):   0], u_b, pr_a);\n"
1145
        "\tbimpy\t#(BW) lmpy_1(i_clk,i_ce,u_a[(2*LUTB-1):LUTB], u_b, pr_b);\n"
1146
        "\talways @(posedge i_clk)\n"
1147
                "\t\tif (i_ce) r_a[0] <= u_a[(IW-1):(2*LUTB)];\n"
1148
        "\talways @(posedge i_clk)\n"
1149
                "\t\tif (i_ce) r_b[0] <= u_b;\n"
1150
        "\talways @(posedge i_clk)\n"
1151
                "\t\tif (i_ce) r_s <= { r_s[(TLEN-2):0], sgn };\n"
1152
        "\talways @(posedge i_clk) // One clk after p[0],p[1] become valid\n"
1153
                "\t\tif (i_ce) acc[0] <= { {(IW-LUTB){1'b0}}, pr_a}\n"
1154
                        "\t\t\t  +{ {(IW-(2*LUTB)){1'b0}}, pr_b, {(LUTB){1'b0}} };\n"
1155
"\n"
1156
        "\tgenerate // Keep track of intermediate values, before multiplying them\n"
1157
        "\tif (TLEN > 3) for(k=0; k<TLEN-3; k=k+1)\n"
1158
        "\tbegin : gencopies\n"
1159
                "\t\talways @(posedge i_clk)\n"
1160
                "\t\tif (i_ce)\n"
1161
                "\t\tbegin\n"
1162
                        "\t\t\tr_a[k+1] <= { {(LUTB){1'b0}},\n"
1163
                                "\t\t\t\tr_a[k][(IW-1-(2*LUTB)):LUTB] };\n"
1164
                        "\t\t\tr_b[k+1] <= r_b[k];\n"
1165
                        "\t\tend\n"
1166
        "\tend endgenerate\n"
1167
"\n"
1168
        "\tgenerate // The actual multiply and accumulate stage\n"
1169
        "\tif (TLEN > 2) for(k=0; k<TLEN-2; k=k+1)\n"
1170
        "\tbegin : genstages\n"
1171
                "\t\t// First, the multiply: 2-bits times BW bits\n"
1172
                "\t\twire\t[(BW+LUTB-1):0] genp;\n"
1173
                "\t\tbimpy #(BW) genmpy(i_clk,i_ce,r_a[k][(LUTB-1):0],r_b[k], genp);\n"
1174
"\n"
1175
                "\t\t// Then the accumulate step -- on the next clock\n"
1176
                "\t\talways @(posedge i_clk)\n"
1177
                        "\t\t\tif (i_ce)\n"
1178
                                "\t\t\t\tacc[k+1] <= acc[k] + {{(IW-LUTB*(k+3)){1'b0}},\n"
1179
                                        "\t\t\t\t\tgenp, {{(LUTB*(k+2))}{1'b0}} };\n"
1180
        "\tend endgenerate\n"
1181
"\n"
1182
        "\twire [(IW+BW-1):0]   w_r;\n"
1183
        "\tassign\tw_r = (r_s[TLEN-1]) ? (-acc[TLEN-2]) : acc[TLEN-2];\n"
1184
        "\talways @(posedge i_clk)\n"
1185
                "\t\tif (i_ce)\n"
1186
                        "\t\t\to_r <= w_r[(AW+BW-1):0];\n"
1187
"\n"
1188
"endmodule\n");
1189
 
1190
        fclose(fp);
1191
}
1192
 
1193 2 dgisselq
void    build_dblreverse(const char *fname) {
1194
        FILE    *fp = fopen(fname, "w");
1195
        if (NULL == fp) {
1196
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
1197
                perror("O/S Err was:");
1198
                return;
1199
        }
1200
 
1201
        fprintf(fp,
1202
"///////////////////////////////////////////////////////////////////////////\n"
1203
"//\n"
1204
"// Filename:   dblreverse.v\n"
1205
"//\n"
1206
"// Project:    %s\n"
1207
"//\n"
1208
"// Purpose:    This module bitreverses a pipelined FFT input.  Operation is\n"
1209
"//             expected as follows:\n"
1210
"//\n"
1211
"//             i_clk   A running clock at whatever system speed is offered.\n"
1212
"//             i_rst   A synchronous reset signal, that resets all internals\n"
1213
"//             i_ce    If this is one, one input is consumed and an output\n"
1214
"//                     is produced.\n"
1215
"//             i_in_0, i_in_1\n"
1216
"//                     Two inputs to be consumed, each of width WIDTH.\n"
1217
"//             o_out_0, o_out_1\n"
1218
"//                     Two of the bitreversed outputs, also of the same\n"
1219
"//                     width, WIDTH.  Of course, there is a delay from the\n"
1220
"//                     first input to the first output.  For this purpose,\n"
1221
"//                     o_sync is present.\n"
1222 26 dgisselq
"//             o_sync  This will be a 1\'b1 for the first value in any block.\n"
1223
"//                     Following a reset, this will only become 1\'b1 once\n"
1224 2 dgisselq
"//                     the data has been loaded and is now valid.  After that,\n"
1225
"//                     all outputs will be valid.\n"
1226 26 dgisselq
"//\n"
1227
"//     20150602 -- This module has undergone massive rework in order to\n"
1228
"//             ensure that it uses resources efficiently.  As a result, \n"
1229
"//             it now optimizes nicely into block RAMs.  As an unfortunately\n"
1230
"//             side effect, it now passes it\'s bench test (dblrev_tb) but\n"
1231
"//             fails the integration bench test (fft_tb).\n"
1232
"//\n"
1233 2 dgisselq
"//\n%s"
1234
"//\n", prjname, creator);
1235
        fprintf(fp, "%s", cpyleft);
1236
        fprintf(fp,
1237
"\n\n"
1238
"//\n"
1239
"// How do we do bit reversing at two smples per clock?  Can we separate out\n"
1240
"// our work into eight memory banks, writing two banks at once and reading\n"
1241
"// another two banks in the same clock?\n"
1242
"//\n"
1243
"//     mem[00xxx0] = s_0[n]\n"
1244
"//     mem[00xxx1] = s_1[n]\n"
1245
"//     o_0[n] = mem[10xxx0]\n"
1246
"//     o_1[n] = mem[11xxx0]\n"
1247
"//     ...\n"
1248
"//     mem[01xxx0] = s_0[m]\n"
1249
"//     mem[01xxx1] = s_1[m]\n"
1250
"//     o_0[m] = mem[10xxx1]\n"
1251
"//     o_1[m] = mem[11xxx1]\n"
1252
"//     ...\n"
1253
"//     mem[10xxx0] = s_0[n]\n"
1254
"//     mem[10xxx1] = s_1[n]\n"
1255
"//     o_0[n] = mem[00xxx0]\n"
1256
"//     o_1[n] = mem[01xxx0]\n"
1257
"//     ...\n"
1258
"//     mem[11xxx0] = s_0[m]\n"
1259
"//     mem[11xxx1] = s_1[m]\n"
1260
"//     o_0[m] = mem[00xxx1]\n"
1261
"//     o_1[m] = mem[01xxx1]\n"
1262
"//     ...\n"
1263
"//\n"
1264 5 dgisselq
"//     The answer is that, yes we can but: we need to use four memory banks\n"
1265
"//     to do it properly.  These four banks are defined by the two bits\n"
1266
"//     that determine the top and bottom of the correct address.  Larger\n"
1267
"//     FFT\'s would require more memories.\n"
1268
"//\n"
1269 2 dgisselq
"//\n");
1270
        fprintf(fp,
1271
"module dblreverse(i_clk, i_rst, i_ce, i_in_0, i_in_1,\n"
1272 5 dgisselq
        "\t\to_out_0, o_out_1, o_sync);\n"
1273 29 dgisselq
        "\tparameter\t\t\tLGSIZE=%d, WIDTH=24;\n"
1274 5 dgisselq
        "\tinput\t\t\t\ti_clk, i_rst, i_ce;\n"
1275
        "\tinput\t\t[(2*WIDTH-1):0]\ti_in_0, i_in_1;\n"
1276 26 dgisselq
        "\toutput\twire\t[(2*WIDTH-1):0]\to_out_0, o_out_1;\n"
1277 29 dgisselq
        "\toutput\treg\t\t\to_sync;\n", TST_DBLREVERSE_LGSIZE);
1278
 
1279
        fprintf(fp,
1280 2 dgisselq
"\n"
1281 26 dgisselq
        "\treg\t\t\tin_reset;\n"
1282
        "\treg\t[(LGSIZE-1):0]\tiaddr;\n"
1283
        "\twire\t[(LGSIZE-3):0]\tbraddr;\n"
1284 2 dgisselq
"\n"
1285 5 dgisselq
        "\tgenvar\tk;\n"
1286 26 dgisselq
        "\tgenerate for(k=0; k<LGSIZE-2; k=k+1)\n"
1287 25 dgisselq
        "\tbegin : gen_a_bit_reversed_value\n"
1288 26 dgisselq
                "\t\tassign braddr[k] = iaddr[LGSIZE-3-k];\n"
1289 25 dgisselq
        "\tend endgenerate\n"
1290 2 dgisselq
"\n"
1291 25 dgisselq
        "\tinitial iaddr = 0;\n"
1292
        "\tinitial in_reset = 1\'b1;\n"
1293 26 dgisselq
        "\tinitial o_sync = 1\'b0;\n"
1294 5 dgisselq
        "\talways @(posedge i_clk)\n"
1295
                "\t\tif (i_rst)\n"
1296
                "\t\tbegin\n"
1297
                        "\t\t\tiaddr <= 0;\n"
1298 26 dgisselq
                        "\t\t\tin_reset <= 1\'b1;\n"
1299
                        "\t\t\to_sync <= 1\'b0;\n"
1300 5 dgisselq
                "\t\tend else if (i_ce)\n"
1301
                "\t\tbegin\n"
1302 26 dgisselq
                        "\t\t\tiaddr <= iaddr + { {(LGSIZE-1){1\'b0}}, 1\'b1 };\n"
1303
                        "\t\t\tif (&iaddr[(LGSIZE-2):0])\n"
1304
                                "\t\t\t\tin_reset <= 1\'b0;\n"
1305 5 dgisselq
                        "\t\t\tif (in_reset)\n"
1306 26 dgisselq
                                "\t\t\t\to_sync <= 1\'b0;\n"
1307
                        "\t\t\telse\n"
1308
                                "\t\t\t\to_sync <= ~(|iaddr[(LGSIZE-2):0]);\n"
1309 5 dgisselq
                "\t\tend\n"
1310 2 dgisselq
"\n"
1311 26 dgisselq
        "\treg\t[(2*WIDTH-1):0]\tmem_e [0:((1<<(LGSIZE))-1)];\n"
1312
        "\treg\t[(2*WIDTH-1):0]\tmem_o [0:((1<<(LGSIZE))-1)];\n"
1313
"\n"
1314
        "\talways @(posedge i_clk)\n"
1315
                "\t\tif (i_ce)\tmem_e[iaddr] <= i_in_0;\n"
1316
        "\talways @(posedge i_clk)\n"
1317
                "\t\tif (i_ce)\tmem_o[iaddr] <= i_in_1;\n"
1318
"\n"
1319
"\n"
1320
        "\treg [(2*WIDTH-1):0] evn_out_0, evn_out_1, odd_out_0, odd_out_1;\n"
1321
"\n"
1322
        "\talways @(posedge i_clk)\n"
1323
                "\t\tif (i_ce)\n\t\t\tevn_out_0 <= mem_e[{~iaddr[LGSIZE-1],1\'b0,braddr}];\n"
1324
        "\talways @(posedge i_clk)\n"
1325
                "\t\tif (i_ce)\n\t\t\tevn_out_1 <= mem_e[{~iaddr[LGSIZE-1],1\'b1,braddr}];\n"
1326
        "\talways @(posedge i_clk)\n"
1327
                "\t\tif (i_ce)\n\t\t\todd_out_0 <= mem_o[{~iaddr[LGSIZE-1],1\'b0,braddr}];\n"
1328
        "\talways @(posedge i_clk)\n"
1329
                "\t\tif (i_ce)\n\t\t\todd_out_1 <= mem_o[{~iaddr[LGSIZE-1],1\'b1,braddr}];\n"
1330
"\n"
1331
        "\treg\tadrz;\n"
1332
        "\talways @(posedge i_clk)\n"
1333 28 dgisselq
                "\t\tif (i_ce) adrz <= iaddr[LGSIZE-2];\n"
1334 26 dgisselq
"\n"
1335
        "\tassign\to_out_0 = (adrz)?odd_out_0:evn_out_0;\n"
1336
        "\tassign\to_out_1 = (adrz)?odd_out_1:evn_out_1;\n"
1337
"\n"
1338 21 dgisselq
"endmodule\n");
1339 2 dgisselq
 
1340
        fclose(fp);
1341
}
1342
 
1343 23 dgisselq
void    build_butterfly(const char *fname, int xtracbits, ROUND_T rounding) {
1344 2 dgisselq
        FILE    *fp = fopen(fname, "w");
1345
        if (NULL == fp) {
1346
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
1347
                perror("O/S Err was:");
1348
                return;
1349
        }
1350 23 dgisselq
        const   char    *rnd_string;
1351
        if (rounding == RND_TRUNCATE)
1352
                rnd_string = "truncate";
1353
        else if (rounding == RND_FROMZERO)
1354
                rnd_string = "roundfromzero";
1355
        else if (rounding == RND_HALFUP)
1356
                rnd_string = "roundhalfup";
1357
        else
1358
                rnd_string = "convround";
1359 2 dgisselq
 
1360
        fprintf(fp,
1361
"///////////////////////////////////////////////////////////////////////////\n"
1362
"//\n"
1363
"// Filename:   butterfly.v\n"
1364
"//\n"
1365
"// Project:    %s\n"
1366
"//\n"
1367
"// Purpose:    This routine caculates a butterfly for a decimation\n"
1368
"//             in frequency version of an FFT.  Specifically, given\n"
1369
"//             complex Left and Right values together with a \n"
1370
"//             coefficient, the output of this routine is given\n"
1371
"//             by:\n"
1372
"//\n"
1373
"//             L' = L + R\n"
1374
"//             R' = (L - R)*C\n"
1375
"//\n"
1376
"//             The rest of the junk below handles timing (mostly),\n"
1377
"//             to make certain that L' and R' reach the output at\n"
1378
"//             the same clock.  Further, just to make certain\n"
1379
"//             that is the case, an 'aux' input exists.  This\n"
1380
"//             aux value will come out of this routine synchronized\n"
1381
"//             to the values it came in with.  (i.e., both L', R',\n"
1382
"//             and aux all have the same delay.)  Hence, a caller\n"
1383
"//             of this routine may set aux on the first input with\n"
1384
"//             valid data, and then wait to see aux set on the output\n"
1385
"//             to know when to find the first output with valid data.\n"
1386
"//\n"
1387
"//             All bits are preserved until the very last clock,\n"
1388
"//             where any more bits than OWIDTH will be quietly\n"
1389
"//             discarded.\n"
1390
"//\n"
1391
"//             This design features no overflow checking.\n"
1392
"// \n"
1393
"// Notes:\n"
1394
"//             CORDIC:\n"
1395
"//             Much as we would like, we can't use a cordic here.\n"
1396
"//             The goal is to accomplish an FFT, as defined, and a\n"
1397
"//             CORDIC places a scale factor onto the data.  Removing\n"
1398
"//             the scale factor would cost a two multiplies, which\n"
1399
"//             is precisely what we are trying to avoid.\n"
1400
"//\n"
1401
"//\n"
1402
"//             3-MULTIPLIES:\n"
1403
"//             It should also be possible to do this with three \n"
1404
"//             multiplies and an extra two addition cycles.  \n"
1405
"//\n"
1406
"//             We want\n"
1407
"//                     R+I = (a + jb) * (c + jd)\n"
1408
"//                     R+I = (ac-bd) + j(ad+bc)\n"
1409
"//             We multiply\n"
1410
"//                     P1 = ac\n"
1411
"//                     P2 = bd\n"
1412
"//                     P3 = (a+b)(c+d)\n"
1413
"//             Then \n"
1414
"//                     R+I=(P1-P2)+j(P3-P2-P1)\n"
1415
"//\n"
1416
"//             WIDTHS:\n"
1417
"//             On multiplying an X width number by an\n"
1418
"//             Y width number, X>Y, the result should be (X+Y)\n"
1419
"//             bits, right?\n"
1420
"//             -2^(X-1) <= a <= 2^(X-1) - 1\n"
1421
"//             -2^(Y-1) <= b <= 2^(Y-1) - 1\n"
1422
"//             (2^(Y-1)-1)*(-2^(X-1)) <= ab <= 2^(X-1)2^(Y-1)\n"
1423
"//             -2^(X+Y-2)+2^(X-1) <= ab <= 2^(X+Y-2) <= 2^(X+Y-1) - 1\n"
1424
"//             -2^(X+Y-1) <= ab <= 2^(X+Y-1)-1\n"
1425
"//             YUP!  But just barely.  Do this and you'll really want\n"
1426
"//             to drop a bit, although you will risk overflow in so\n"
1427
"//             doing.\n"
1428 26 dgisselq
"//\n"
1429
"//     20150602 -- The sync logic lines have been completely redone.  The\n"
1430
"//             synchronization lines no longer go through the FIFO with the\n"
1431
"//             left hand sum, but are kept out of memory.  This allows the\n"
1432
"//             butterfly to use more optimal memory resources, while also\n"
1433
"//             guaranteeing that the sync lines can be properly reset upon\n"
1434
"//             any reset signal.\n"
1435
"//\n"
1436 2 dgisselq
"//\n%s"
1437
"//\n", prjname, creator);
1438
        fprintf(fp, "%s", cpyleft);
1439
 
1440
        fprintf(fp,
1441 6 dgisselq
"module\tbutterfly(i_clk, i_rst, i_ce, i_coef, i_left, i_right, i_aux,\n"
1442 5 dgisselq
                "\t\to_left, o_right, o_aux);\n"
1443
        "\t// Public changeable parameters ...\n"
1444 29 dgisselq
        "\tparameter IWIDTH=%d,", TST_BUTTERFLY_IWIDTH);
1445
#ifdef  TST_BUTTERFLY_CWIDTH
1446
        fprintf(fp, "CWIDTH=%d,", TST_BUTTERFLY_CWIDTH);
1447
#else
1448
        fprintf(fp, "CWIDTH=IWIDTH+%d,", xtracbits);
1449
#endif
1450
#ifdef  TST_BUTTERFLY_OWIDTH
1451
        fprintf(fp, "OWIDTH=%d;\n", TST_BUTTERFLY_OWIDTH);
1452
#else
1453
        fprintf(fp, "OWIDTH=IWIDTH+1;\n");
1454
#endif
1455
        fprintf(fp,
1456 5 dgisselq
        "\t// Parameters specific to the core that should not be changed.\n"
1457 29 dgisselq
        "\tparameter    MPYDELAY=%d'd%d,\n"
1458 28 dgisselq
                        "\t\t\tSHIFT=0, AUXLEN=(MPYDELAY+3);\n"
1459 5 dgisselq
        "\t// The LGDELAY should be the base two log of the MPYDELAY.  If\n"
1460
        "\t// this value is fractional, then round up to the nearest\n"
1461
        "\t// integer: LGDELAY=ceil(log(MPYDELAY)/log(2));\n"
1462 14 dgisselq
        "\tparameter\tLGDELAY=%d;\n"
1463 6 dgisselq
        "\tinput\t\ti_clk, i_rst, i_ce;\n"
1464 5 dgisselq
        "\tinput\t\t[(2*CWIDTH-1):0] i_coef;\n"
1465
        "\tinput\t\t[(2*IWIDTH-1):0] i_left, i_right;\n"
1466
        "\tinput\t\ti_aux;\n"
1467
        "\toutput\twire [(2*OWIDTH-1):0] o_left, o_right;\n"
1468 26 dgisselq
        "\toutput\treg\to_aux;\n"
1469 29 dgisselq
        "\n", lgdelay(16,xtracbits), bflydelay(16, xtracbits),
1470
                lgdelay(16,xtracbits));
1471 14 dgisselq
        fprintf(fp,
1472 5 dgisselq
        "\twire\t[(OWIDTH-1):0] o_left_r, o_left_i, o_right_r, o_right_i;\n"
1473 2 dgisselq
"\n"
1474 5 dgisselq
        "\treg\t[(2*IWIDTH-1):0]\tr_left, r_right;\n"
1475
        "\treg\t\t\t\tr_aux, r_aux_2;\n"
1476
        "\treg\t[(2*CWIDTH-1):0]\tr_coef, r_coef_2;\n"
1477
        "\twire\tsigned\t[(IWIDTH-1):0]\tr_left_r, r_left_i, r_right_r, r_right_i;\n"
1478
        "\tassign\tr_left_r  = r_left[ (2*IWIDTH-1):(IWIDTH)];\n"
1479
        "\tassign\tr_left_i  = r_left[ (IWIDTH-1):0];\n"
1480
        "\tassign\tr_right_r = r_right[(2*IWIDTH-1):(IWIDTH)];\n"
1481
        "\tassign\tr_right_i = r_right[(IWIDTH-1):0];\n"
1482 2 dgisselq
"\n"
1483 5 dgisselq
        "\treg\tsigned\t[(IWIDTH):0]\tr_sum_r, r_sum_i, r_dif_r, r_dif_i;\n"
1484 2 dgisselq
"\n"
1485 5 dgisselq
        "\treg  [(LGDELAY-1):0] fifo_addr;\n"
1486
        "\twire [(LGDELAY-1):0] fifo_read_addr;\n"
1487 6 dgisselq
        "\tassign\tfifo_read_addr = fifo_addr - MPYDELAY;\n"
1488 26 dgisselq
        "\treg  [(2*IWIDTH+1):0]        fifo_left [ 0:((1<<LGDELAY)-1)];\n"
1489 5 dgisselq
"\n");
1490
        fprintf(fp,
1491
        "\t// Set up the input to the multiply\n"
1492 2 dgisselq
        "\talways @(posedge i_clk)\n"
1493
                "\t\tif (i_ce)\n"
1494
                "\t\tbegin\n"
1495
                        "\t\t\t// One clock just latches the inputs\n"
1496
                        "\t\t\tr_left <= i_left;        // No change in # of bits\n"
1497
                        "\t\t\tr_right <= i_right;\n"
1498
                        "\t\t\tr_coef  <= i_coef;\n"
1499
                        "\t\t\t// Next clock adds/subtracts\n"
1500
                        "\t\t\tr_sum_r <= r_left_r + r_right_r; // Now IWIDTH+1 bits\n"
1501
                        "\t\t\tr_sum_i <= r_left_i + r_right_i;\n"
1502
                        "\t\t\tr_dif_r <= r_left_r - r_right_r;\n"
1503
                        "\t\t\tr_dif_i <= r_left_i - r_right_i;\n"
1504
                        "\t\t\t// Other inputs are simply delayed on second clock\n"
1505
                        "\t\t\tr_coef_2<= r_coef;\n"
1506
        "\t\tend\n"
1507 5 dgisselq
"\n");
1508
        fprintf(fp,
1509
        "\t// Don\'t forget to record the even side, since it doesn\'t need\n"
1510
        "\t// to be multiplied, but yet we still need the results in sync\n"
1511
        "\t// with the answer when it is ready.\n"
1512 25 dgisselq
        "\tinitial fifo_addr = 0;\n"
1513 2 dgisselq
        "\talways @(posedge i_clk)\n"
1514 6 dgisselq
                "\t\tif (i_rst)\n"
1515
                        "\t\t\tfifo_addr <= 0;\n"
1516 26 dgisselq
                "\t\telse if (i_ce)\n"
1517 2 dgisselq
                        "\t\t\t// Need to delay the sum side--nothing else happens\n"
1518
                        "\t\t\t// to it, but it needs to stay synchronized with the\n"
1519
                        "\t\t\t// right side.\n"
1520
                        "\t\t\tfifo_addr <= fifo_addr + 1;\n"
1521 14 dgisselq
"\n"
1522 26 dgisselq
        "\talways @(posedge i_clk)\n"
1523
                "\t\tif (i_ce)\n"
1524
                        "\t\t\tfifo_left[fifo_addr] <= { r_sum_r, r_sum_i };\n"
1525 2 dgisselq
"\n"
1526 5 dgisselq
        "\twire\tsigned\t[(CWIDTH-1):0] ir_coef_r, ir_coef_i;\n"
1527
        "\tassign\tir_coef_r = r_coef_2[(2*CWIDTH-1):CWIDTH];\n"
1528
        "\tassign\tir_coef_i = r_coef_2[(CWIDTH-1):0];\n"
1529
        "\twire\tsigned\t[((IWIDTH+2)+(CWIDTH+1)-1):0]\tp_one, p_two, p_three;\n"
1530 2 dgisselq
"\n"
1531 5 dgisselq
"\n");
1532
        fprintf(fp,
1533
        "\t// Multiply output is always a width of the sum of the widths of\n"
1534
        "\t// the two inputs.  ALWAYS.  This is independent of the number of\n"
1535
        "\t// bits in p_one, p_two, or p_three.  These values needed to \n"
1536
        "\t// accumulate a bit (or two) each.  However, this approach to a\n"
1537
        "\t// three multiply complex multiply cannot increase the total\n"
1538
        "\t// number of bits in our final output.  We\'ll take care of\n"
1539
        "\t// dropping back down to the proper width, OWIDTH, in our routine\n"
1540
        "\t// below.\n"
1541 2 dgisselq
"\n"
1542 5 dgisselq
"\n");
1543
        fprintf(fp,
1544
        "\t// We accomplish here \"Karatsuba\" multiplication.  That is,\n"
1545
        "\t// by doing three multiplies we accomplish the work of four.\n"
1546
        "\t// Let\'s prove to ourselves that this works ... We wish to\n"
1547
        "\t// multiply: (a+jb) * (c+jd), where a+jb is given by\n"
1548
        "\t//\ta + jb = r_dif_r + j r_dif_i, and\n"
1549
        "\t//\tc + jd = ir_coef_r + j ir_coef_i.\n"
1550
        "\t// We do this by calculating the intermediate products P1, P2,\n"
1551
        "\t// and P3 as\n"
1552
        "\t//\tP1 = ac\n"
1553
        "\t//\tP2 = bd\n"
1554
        "\t//\tP3 = (a + b) * (c + d)\n"
1555
        "\t// and then complete our final answer with\n"
1556
        "\t//\tac - bd = P1 - P2 (this checks)\n"
1557
        "\t//\tad + bc = P3 - P2 - P1\n"
1558
        "\t//\t        = (ac + bc + ad + bd) - bd - ac\n"
1559
        "\t//\t        = bc + ad (this checks)\n"
1560 2 dgisselq
"\n"
1561 5 dgisselq
"\n");
1562
        fprintf(fp,
1563
        "\t// This should really be based upon an IF, such as in\n"
1564
        "\t// if (IWIDTH < CWIDTH) then ...\n"
1565
        "\t// However, this is the only (other) way I know to do it.\n"
1566 29 dgisselq
        "\tgenerate if (CWIDTH < IWIDTH+1)\n"
1567 2 dgisselq
        "\tbegin\n"
1568 22 dgisselq
                "\t\twire\t[(CWIDTH):0]\tp3c_in;\n"
1569
                "\t\twire\t[(IWIDTH+1):0]\tp3d_in;\n"
1570
                "\t\tassign\tp3c_in = ir_coef_i + ir_coef_r;\n"
1571
                "\t\tassign\tp3d_in = r_dif_r + r_dif_i;\n"
1572
                "\n"
1573 2 dgisselq
                "\t\t// We need to pad these first two multiplies by an extra\n"
1574 5 dgisselq
                "\t\t// bit just to keep them aligned with the third,\n"
1575
                "\t\t// simpler, multiply.\n"
1576 29 dgisselq
                "\t\t%s #(CWIDTH+1,IWIDTH+2) p1(i_clk, i_ce,\n"
1577 2 dgisselq
                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r},\n"
1578
                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r}, p_one);\n"
1579 29 dgisselq
                "\t\t%s #(CWIDTH+1,IWIDTH+2) p2(i_clk, i_ce,\n"
1580 5 dgisselq
                                "\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i},\n"
1581 2 dgisselq
                                "\t\t\t\t{r_dif_i[IWIDTH],r_dif_i}, p_two);\n"
1582 29 dgisselq
                "\t\t%s #(CWIDTH+1,IWIDTH+2) p3(i_clk, i_ce,\n"
1583 22 dgisselq
                        "\t\t\t\tp3c_in, p3d_in, p_three);\n"
1584 2 dgisselq
        "\tend else begin\n"
1585 22 dgisselq
                "\t\twire\t[(CWIDTH):0]\tp3c_in;\n"
1586
                "\t\twire\t[(IWIDTH+1):0]\tp3d_in;\n"
1587
                "\t\tassign\tp3c_in = ir_coef_i + ir_coef_r;\n"
1588
                "\t\tassign\tp3d_in = r_dif_r + r_dif_i;\n"
1589
                "\n"
1590 29 dgisselq
                "\t\t%s #(IWIDTH+2,CWIDTH+1) p1a(i_clk, i_ce,\n"
1591 2 dgisselq
                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r},\n"
1592
                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r}, p_one);\n"
1593 29 dgisselq
                "\t\t%s #(IWIDTH+2,CWIDTH+1) p2a(i_clk, i_ce,\n"
1594 2 dgisselq
                                "\t\t\t\t{r_dif_i[IWIDTH], r_dif_i},\n"
1595 5 dgisselq
                                "\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i}, p_two);\n"
1596 29 dgisselq
                "\t\t%s #(IWIDTH+2,CWIDTH+1) p3a(i_clk, i_ce,\n"
1597 22 dgisselq
                                "\t\t\t\tp3d_in, p3c_in, p_three);\n"
1598 2 dgisselq
        "\tend\n"
1599
        "\tendgenerate\n"
1600 29 dgisselq
"\n",
1601
                (USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",
1602
                (USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",
1603
                (USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",
1604
                (USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",
1605
                (USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",
1606
                (USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy");
1607 5 dgisselq
        fprintf(fp,
1608
        "\t// These values are held in memory and delayed during the\n"
1609
        "\t// multiply.  Here, we recover them.  During the multiply,\n"
1610
        "\t// values were multiplied by 2^(CWIDTH-2)*exp{-j*2*pi*...},\n"
1611
        "\t// therefore, the left_x values need to be right shifted by\n"
1612
        "\t// CWIDTH-2 as well.  The additional bits come from a sign\n"
1613
        "\t// extension.\n"
1614
        "\twire\tsigned\t[(IWIDTH+CWIDTH):0]    fifo_i, fifo_r;\n"
1615 26 dgisselq
        "\treg\t\t[(2*IWIDTH+1):0]      fifo_read;\n"
1616
        "\tassign\tfifo_r = { {2{fifo_read[2*(IWIDTH+1)-1]}}, fifo_read[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1\'b0}} };\n"
1617
        "\tassign\tfifo_i = { {2{fifo_read[(IWIDTH+1)-1]}}, fifo_read[((IWIDTH+1)-1):0], {(CWIDTH-2){1\'b0}} };\n"
1618 2 dgisselq
"\n"
1619
"\n"
1620 23 dgisselq
        "\treg\tsigned\t[(OWIDTH-1):0]  b_left_r, b_left_i,\n"
1621 5 dgisselq
                        "\t\t\t\t\t\tb_right_r, b_right_i;\n"
1622
        "\treg\tsigned\t[(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i;\n"
1623
"\n");
1624
        fprintf(fp,
1625 23 dgisselq
        "\t// Let's do some rounding and remove unnecessary bits.\n"
1626 5 dgisselq
        "\t// We have (IWIDTH+CWIDTH+3) bits here, we need to drop down to\n"
1627
        "\t// OWIDTH, and SHIFT by SHIFT bits in the process.  The trick is\n"
1628
        "\t// that we don\'t need (IWIDTH+CWIDTH+3) bits.  We\'ve accumulated\n"
1629
        "\t// them, but the actual values will never fill all these bits.\n"
1630
        "\t// In particular, we only need:\n"
1631
        "\t//\t IWIDTH bits for the input\n"
1632
        "\t//\t     +1 bit for the add/subtract\n"
1633
        "\t//\t+CWIDTH bits for the coefficient multiply\n"
1634
        "\t//\t     +1 bit for the add/subtract in the complex multiply\n"
1635
        "\t//\t ------\n"
1636
        "\t//\t (IWIDTH+CWIDTH+2) bits at full precision.\n"
1637
        "\t//\n"
1638
        "\t// However, the coefficient multiply multiplied by a maximum value\n"
1639
        "\t// of 2^(CWIDTH-2).  Thus, we only have\n"
1640
        "\t//\t   IWIDTH bits for the input\n"
1641
        "\t//\t       +1 bit for the add/subtract\n"
1642
        "\t//\t+CWIDTH-2 bits for the coefficient multiply\n"
1643
        "\t//\t       +1 (optional) bit for the add/subtract in the cpx mpy.\n"
1644
        "\t//\t -------- ... multiply.  (This last bit may be shifted out.)\n"
1645
        "\t//\t (IWIDTH+CWIDTH) valid output bits. \n"
1646
        "\t// Now, if the user wants to keep any extras of these (via OWIDTH),\n"
1647
        "\t// or if he wishes to arbitrarily shift some of these off (via\n"
1648
        "\t// SHIFT) we accomplish that here.\n"
1649 23 dgisselq
"\n");
1650
        fprintf(fp,
1651
        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_left_r, rnd_left_i, rnd_right_r, rnd_right_i;\n\n");
1652
 
1653
        fprintf(fp,
1654 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_left_r(i_clk, i_ce,\n"
1655 23 dgisselq
        "\t\t\t\t{ {2{fifo_r[(IWIDTH+CWIDTH)]}}, fifo_r }, rnd_left_r);\n\n",
1656
                rnd_string);
1657
        fprintf(fp,
1658 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_left_i(i_clk, i_ce,\n"
1659 23 dgisselq
        "\t\t\t\t{ {2{fifo_i[(IWIDTH+CWIDTH)]}}, fifo_i }, rnd_left_i);\n\n",
1660
                rnd_string);
1661
        fprintf(fp,
1662 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_right_r(i_clk, i_ce,\n"
1663 23 dgisselq
        "\t\t\t\tmpy_r, rnd_right_r);\n\n", rnd_string);
1664
        fprintf(fp,
1665 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_right_i(i_clk, i_ce,\n"
1666 23 dgisselq
        "\t\t\t\tmpy_i, rnd_right_i);\n\n", rnd_string);
1667
        fprintf(fp,
1668
        "\talways @(posedge i_clk)\n"
1669
                "\t\tif (i_ce)\n"
1670
                "\t\tbegin\n"
1671
                        "\t\t\t// First clock, recover all values\n"
1672
                        "\t\t\tfifo_read <= fifo_left[fifo_read_addr];\n"
1673
                        "\t\t\t// These values are IWIDTH+CWIDTH+3 bits wide\n"
1674
                        "\t\t\t// although they only need to be (IWIDTH+1)\n"
1675
                        "\t\t\t// + (CWIDTH) bits wide.  (We\'ve got two\n"
1676
                        "\t\t\t// extra bits we need to get rid of.)\n"
1677
                        "\t\t\tmpy_r <= p_one - p_two;\n"
1678
                        "\t\t\tmpy_i <= p_three - p_one - p_two;\n"
1679 2 dgisselq
"\n"
1680 23 dgisselq
                        "\t\t\t// Second clock, round and latch for final clock\n"
1681
                        "\t\t\tb_right_r <= rnd_right_r;\n"
1682
                        "\t\t\tb_right_i <= rnd_right_i;\n"
1683
                        "\t\t\tb_left_r <= rnd_left_r;\n"
1684
                        "\t\t\tb_left_i <= rnd_left_i;\n"
1685 24 dgisselq
                "\t\tend\n"
1686
"\n");
1687 26 dgisselq
 
1688 24 dgisselq
        fprintf(fp,
1689 26 dgisselq
        "\treg\t[(AUXLEN-1):0]\taux_pipeline;\n"
1690
        "\tinitial\taux_pipeline = 0;\n"
1691
        "\talways @(posedge i_clk)\n"
1692
        "\t\tif (i_rst)\n"
1693
        "\t\t\taux_pipeline <= 0;\n"
1694
        "\t\telse if (i_ce)\n"
1695
        "\t\t\taux_pipeline <= { aux_pipeline[(AUXLEN-2):0], i_aux };\n"
1696
"\n");
1697
        fprintf(fp,
1698 25 dgisselq
        "\tinitial o_aux = 1\'b0;\n"
1699 24 dgisselq
        "\talways @(posedge i_clk)\n"
1700
                "\t\tif (i_rst)\n"
1701
                "\t\t\to_aux <= 1\'b0;\n"
1702
                "\t\telse if (i_ce)\n"
1703
                "\t\tbegin\n"
1704
                        "\t\t\t// Second clock, latch for final clock\n"
1705 26 dgisselq
                        "\t\t\to_aux <= aux_pipeline[AUXLEN-1];\n"
1706 23 dgisselq
                "\t\tend\n"
1707
"\n");
1708 24 dgisselq
 
1709 23 dgisselq
        fprintf(fp,
1710 5 dgisselq
        "\t// As a final step, we pack our outputs into two packed two\'s\n"
1711
        "\t// complement numbers per output word, so that each output word\n"
1712
        "\t// has (2*OWIDTH) bits in it, with the top half being the real\n"
1713
        "\t// portion and the bottom half being the imaginary portion.\n"
1714 23 dgisselq
        "\tassign       o_left = { rnd_left_r, rnd_left_i };\n"
1715
        "\tassign       o_right= { rnd_right_r,rnd_right_i};\n"
1716 2 dgisselq
"\n"
1717
"endmodule\n");
1718
        fclose(fp);
1719
}
1720
 
1721 23 dgisselq
void    build_hwbfly(const char *fname, int xtracbits, ROUND_T rounding) {
1722 22 dgisselq
        FILE    *fp = fopen(fname, "w");
1723
        if (NULL == fp) {
1724
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
1725
                perror("O/S Err was:");
1726
                return;
1727
        }
1728
 
1729 23 dgisselq
        const   char    *rnd_string;
1730
        if (rounding == RND_TRUNCATE)
1731
                rnd_string = "truncate";
1732
        else if (rounding == RND_FROMZERO)
1733
                rnd_string = "roundfromzero";
1734
        else if (rounding == RND_HALFUP)
1735
                rnd_string = "roundhalfup";
1736
        else
1737
                rnd_string = "convround";
1738
 
1739
 
1740 22 dgisselq
        fprintf(fp,
1741
"///////////////////////////////////////////////////////////////////////////\n"
1742
"//\n"
1743
"// Filename:   hwbfly.v\n"
1744
"//\n"
1745
"// Project:    %s\n"
1746
"//\n"
1747
"// Purpose:    This routine is identical to the butterfly.v routine found\n"
1748
"//             in 'butterfly.v', save only that it uses the verilog \n"
1749
"//             operator '*' in hopes that the synthesizer would be able\n"
1750
"//             to optimize it with hardware resources.\n"
1751
"//\n"
1752
"//             It is understood that a hardware multiply can complete its\n"
1753
"//             operation in a single clock.\n"
1754
"//\n"
1755
"//\n%s"
1756
"//\n", prjname, creator);
1757
        fprintf(fp, "%s", cpyleft);
1758
        fprintf(fp,
1759
"module hwbfly(i_clk, i_rst, i_ce, i_coef, i_left, i_right, i_aux,\n"
1760
                "\t\to_left, o_right, o_aux);\n"
1761
        "\t// Public changeable parameters ...\n"
1762
        "\tparameter IWIDTH=16,CWIDTH=IWIDTH+%d,OWIDTH=IWIDTH+1;\n"
1763
        "\t// Parameters specific to the core that should not be changed.\n"
1764 23 dgisselq
        "\tparameter\tSHIFT=0;\n"
1765 22 dgisselq
        "\tinput\t\ti_clk, i_rst, i_ce;\n"
1766
        "\tinput\t\t[(2*CWIDTH-1):0]\ti_coef;\n"
1767
        "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"
1768
        "\tinput\t\ti_aux;\n"
1769
        "\toutput\twire\t[(2*OWIDTH-1):0]\to_left, o_right;\n"
1770
        "\toutput\treg\to_aux;\n"
1771
"\n", xtracbits);
1772
        fprintf(fp,
1773
        "\twire\t[(OWIDTH-1):0] o_left_r, o_left_i, o_right_r, o_right_i;\n"
1774
"\n"
1775
        "\treg\t[(2*IWIDTH-1):0]        r_left, r_right;\n"
1776
        "\treg\t                        r_aux, r_aux_2;\n"
1777
        "\treg\t[(2*CWIDTH-1):0]        r_coef, r_coef_2;\n"
1778
        "\twire signed  [(IWIDTH-1):0]  r_left_r, r_left_i, r_right_r, r_right_i;\n"
1779
        "\tassign\tr_left_r  = r_left[ (2*IWIDTH-1):(IWIDTH)];\n"
1780
        "\tassign\tr_left_i  = r_left[ (IWIDTH-1):0];\n"
1781
        "\tassign\tr_right_r = r_right[(2*IWIDTH-1):(IWIDTH)];\n"
1782
        "\tassign\tr_right_i = r_right[(IWIDTH-1):0];\n"
1783 26 dgisselq
        "\treg  signed  [(CWIDTH-1):0]  ir_coef_r, ir_coef_i;\n"
1784 22 dgisselq
"\n"
1785
        "\treg  signed  [(IWIDTH):0]    r_sum_r, r_sum_i, r_dif_r, r_dif_i;\n"
1786
"\n"
1787
        "\treg  [(2*IWIDTH+2):0]        leftv, leftvv;\n"
1788
"\n"
1789
        "\t// Set up the input to the multiply\n"
1790 25 dgisselq
        "\tinitial r_aux   = 1\'b0;\n"
1791
        "\tinitial r_aux_2 = 1\'b0;\n"
1792 22 dgisselq
        "\talways @(posedge i_clk)\n"
1793 25 dgisselq
                "\t\tif (i_rst)\n"
1794
                "\t\tbegin\n"
1795 26 dgisselq
                        "\t\t\tr_aux <= 1\'b0;\n"
1796
                        "\t\t\tr_aux_2 <= 1\'b0;\n"
1797 25 dgisselq
                "\t\tend else if (i_ce)\n"
1798
                "\t\tbegin\n"
1799
                        "\t\t\t// One clock just latches the inputs\n"
1800 26 dgisselq
                        "\t\t\tr_aux <= i_aux;\n"
1801
                        "\t\t\t// Next clock adds/subtracts\n"
1802
                        "\t\t\t// Other inputs are simply delayed on second clock\n"
1803
                        "\t\t\tr_aux_2 <= r_aux;\n"
1804
                "\t\tend\n"
1805
        "\talways @(posedge i_clk)\n"
1806
                "\t\tif (i_ce)\n"
1807
                "\t\tbegin\n"
1808
                        "\t\t\t// One clock just latches the inputs\n"
1809 25 dgisselq
                        "\t\t\tr_left <= i_left;        // No change in # of bits\n"
1810
                        "\t\t\tr_right <= i_right;\n"
1811
                        "\t\t\tr_coef  <= i_coef;\n"
1812
                        "\t\t\t// Next clock adds/subtracts\n"
1813
                        "\t\t\tr_sum_r <= r_left_r + r_right_r; // Now IWIDTH+1 bits\n"
1814
                        "\t\t\tr_sum_i <= r_left_i + r_right_i;\n"
1815
                        "\t\t\tr_dif_r <= r_left_r - r_right_r;\n"
1816
                        "\t\t\tr_dif_i <= r_left_i - r_right_i;\n"
1817
                        "\t\t\t// Other inputs are simply delayed on second clock\n"
1818 26 dgisselq
                        "\t\t\tir_coef_r <= r_coef[(2*CWIDTH-1):CWIDTH];\n"
1819
                        "\t\t\tir_coef_i <= r_coef[(CWIDTH-1):0];\n"
1820 25 dgisselq
                "\t\tend\n"
1821 22 dgisselq
        "\n\n");
1822
        fprintf(fp,
1823
"\t// See comments in the butterfly.v source file for a discussion of\n"
1824
"\t// these operations and the appropriate bit widths.\n\n");
1825
        fprintf(fp,
1826 26 dgisselq
        "\treg\tsigned  [((IWIDTH+1)+(CWIDTH)-1):0]     p_one, p_two;\n"
1827
        "\treg\tsigned  [((IWIDTH+2)+(CWIDTH+1)-1):0]   p_three;\n"
1828 22 dgisselq
"\n"
1829 26 dgisselq
        "\treg\tsigned  [(CWIDTH-1):0]  p1c_in, p2c_in; // Coefficient multiply inputs\n"
1830
        "\treg\tsigned  [(IWIDTH):0]    p1d_in, p2d_in; // Data multiply inputs\n"
1831
        "\treg\tsigned  [(CWIDTH):0]    p3c_in; // Product 3, coefficient input\n"
1832
        "\treg\tsigned  [(IWIDTH+1):0]  p3d_in; // Product 3, data input\n"
1833 22 dgisselq
"\n"
1834 25 dgisselq
        "\tinitial leftv    = 0;\n"
1835
        "\tinitial leftvv   = 0;\n"
1836 22 dgisselq
        "\talways @(posedge i_clk)\n"
1837
        "\tbegin\n"
1838
                "\t\tif (i_rst)\n"
1839
                "\t\tbegin\n"
1840
                        "\t\t\tleftv <= 0;\n"
1841
                        "\t\t\tleftvv <= 0;\n"
1842 26 dgisselq
                "\t\tend else if (i_ce)\n"
1843 22 dgisselq
                "\t\tbegin\n"
1844
                        "\t\t\t// Second clock, pipeline = 1\n"
1845 26 dgisselq
                        "\t\t\tleftv <= { r_aux_2, r_sum_r, r_sum_i };\n"
1846
"\n"
1847
                        "\t\t\t// Third clock, pipeline = 3\n"
1848
                        "\t\t\t//   As desired, each of these lines infers a DSP48\n"
1849
                        "\t\t\tleftvv <= leftv;\n"
1850
                "\t\tend\n"
1851
        "\tend\n"
1852
"\n"
1853
        "\talways @(posedge i_clk)\n"
1854
                "\t\tif (i_ce)\n"
1855
                "\t\tbegin\n"
1856
                        "\t\t\t// Second clock, pipeline = 1\n"
1857
                        "\t\t\tp1c_in <= ir_coef_r;\n"
1858
                        "\t\t\tp2c_in <= ir_coef_i;\n"
1859
                        "\t\t\tp1d_in <= r_dif_r;\n"
1860
                        "\t\t\tp2d_in <= r_dif_i;\n"
1861 22 dgisselq
                        "\t\t\tp3c_in <= ir_coef_i + ir_coef_r;\n"
1862
                        "\t\t\tp3d_in <= r_dif_r + r_dif_i;\n"
1863 23 dgisselq
"\n"
1864
"\n"
1865 22 dgisselq
                        "\t\t\t// Third clock, pipeline = 3\n"
1866 26 dgisselq
                        "\t\t\t//   As desired, each of these lines infers a DSP48\n"
1867 22 dgisselq
                        "\t\t\tp_one   <= p1c_in * p1d_in;\n"
1868
                        "\t\t\tp_two   <= p2c_in * p2d_in;\n"
1869
                        "\t\t\tp_three <= p3c_in * p3d_in;\n"
1870 26 dgisselq
                "\t\tend\n"
1871 22 dgisselq
"\n"
1872 26 dgisselq
        "\twire\tsigned [((IWIDTH+2)+(CWIDTH+1)-1):0]   w_one, w_two;\n"
1873
        "\tassign\tw_one = { {(2){p_one[((IWIDTH+1)+(CWIDTH)-1)]}}, p_one };\n"
1874
        "\tassign\tw_two = { {(2){p_two[((IWIDTH+1)+(CWIDTH)-1)]}}, p_two };\n"
1875 22 dgisselq
"\n");
1876
 
1877
        fprintf(fp,
1878
        "\t// These values are held in memory and delayed during the\n"
1879
        "\t// multiply.  Here, we recover them.  During the multiply,\n"
1880
        "\t// values were multiplied by 2^(CWIDTH-2)*exp{-j*2*pi*...},\n"
1881
        "\t// therefore, the left_x values need to be right shifted by\n"
1882
        "\t// CWIDTH-2 as well.  The additional bits come from a sign\n"
1883
        "\t// extension.\n"
1884 24 dgisselq
        "\twire\taux_s;\n"
1885 22 dgisselq
        "\twire\tsigned\t[(IWIDTH+CWIDTH):0]    left_si, left_sr;\n"
1886
        "\treg\t\t[(2*IWIDTH+2):0]      left_saved;\n"
1887 26 dgisselq
        "\tassign\tleft_sr = { {2{left_saved[2*(IWIDTH+1)-1]}}, left_saved[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1\'b0}} };\n"
1888
        "\tassign\tleft_si = { {2{left_saved[(IWIDTH+1)-1]}}, left_saved[((IWIDTH+1)-1):0], {(CWIDTH-2){1\'b0}} };\n"
1889 22 dgisselq
        "\tassign\taux_s = left_saved[2*IWIDTH+2];\n"
1890
"\n"
1891
"\n"
1892 26 dgisselq
        "\t(* use_dsp48=\"no\" *)\n"
1893 23 dgisselq
        "\treg  signed  [(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i;\n");
1894
        fprintf(fp,
1895
        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_left_r, rnd_left_i, rnd_right_r, rnd_right_i;\n\n");
1896 22 dgisselq
 
1897
        fprintf(fp,
1898 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+1,OWIDTH,SHIFT+2) do_rnd_left_r(i_clk, i_ce,\n"
1899
        "\t\t\t\tleft_sr, rnd_left_r);\n\n",
1900 23 dgisselq
                rnd_string);
1901
        fprintf(fp,
1902 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+1,OWIDTH,SHIFT+2) do_rnd_left_i(i_clk, i_ce,\n"
1903
        "\t\t\t\tleft_si, rnd_left_i);\n\n",
1904 23 dgisselq
                rnd_string);
1905
        fprintf(fp,
1906 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_right_r(i_clk, i_ce,\n"
1907 23 dgisselq
        "\t\t\t\tmpy_r, rnd_right_r);\n\n", rnd_string);
1908
        fprintf(fp,
1909 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_right_i(i_clk, i_ce,\n"
1910 23 dgisselq
        "\t\t\t\tmpy_i, rnd_right_i);\n\n", rnd_string);
1911
 
1912
        fprintf(fp,
1913 25 dgisselq
        "\tinitial left_saved = 0;\n"
1914
        "\tinitial o_aux      = 1\'b0;\n"
1915 22 dgisselq
        "\talways @(posedge i_clk)\n"
1916
        "\t\tif (i_rst)\n"
1917
        "\t\tbegin\n"
1918
                "\t\t\tleft_saved <= 0;\n"
1919 26 dgisselq
                "\t\t\to_aux <= 1\'b0;\n"
1920 22 dgisselq
        "\t\tend else if (i_ce)\n"
1921
        "\t\tbegin\n"
1922
                "\t\t\t// First clock, recover all values\n"
1923
                "\t\t\tleft_saved <= leftvv;\n"
1924 26 dgisselq
"\n"
1925
                "\t\t\t// Second clock, round and latch for final clock\n"
1926
                "\t\t\to_aux <= aux_s;\n"
1927
        "\t\tend\n"
1928
        "\talways @(posedge i_clk)\n"
1929
        "\t\tif (i_ce)\n"
1930
        "\t\tbegin\n"
1931 22 dgisselq
                "\t\t\t// These values are IWIDTH+CWIDTH+3 bits wide\n"
1932
                "\t\t\t// although they only need to be (IWIDTH+1)\n"
1933
                "\t\t\t// + (CWIDTH) bits wide.  (We've got two\n"
1934
                "\t\t\t// extra bits we need to get rid of.)\n"
1935 26 dgisselq
                "\n"
1936
                "\t\t\t// These two lines also infer DSP48\'s.\n"
1937
                "\t\t\t// To keep from using extra DSP48 resources,\n"
1938
                "\t\t\t// they are prevented from using DSP48\'s\n"
1939
                "\t\t\t// by the (* use_dsp48 ... *) comment above.\n"
1940
                "\t\t\tmpy_r <= w_one - w_two;\n"
1941
                "\t\t\tmpy_i <= p_three - w_one - w_two;\n"
1942 22 dgisselq
        "\t\tend\n"
1943
        "\n");
1944
 
1945
        fprintf(fp,
1946
        "\t// As a final step, we pack our outputs into two packed two's\n"
1947
        "\t// complement numbers per output word, so that each output word\n"
1948
        "\t// has (2*OWIDTH) bits in it, with the top half being the real\n"
1949
        "\t// portion and the bottom half being the imaginary portion.\n"
1950 23 dgisselq
        "\tassign\to_left = { rnd_left_r, rnd_left_i };\n"
1951
        "\tassign\to_right= { rnd_right_r,rnd_right_i};\n"
1952 22 dgisselq
"\n"
1953
"endmodule\n");
1954
 
1955
}
1956
 
1957 26 dgisselq
void    build_stage(const char *fname, const char *coredir, int stage, bool odd, int nbits, bool inv, int xtra, bool hwmpy=false, bool dbg=false) {
1958 2 dgisselq
        FILE    *fstage = fopen(fname, "w");
1959
        int     cbits = nbits + xtra;
1960
 
1961
        if ((cbits * 2) >= sizeof(long long)*8) {
1962
                fprintf(stderr, "ERROR: CMEM Coefficient precision requested overflows long long data type.\n");
1963
                exit(-1);
1964
        }
1965
 
1966
        if (fstage == NULL) {
1967
                fprintf(stderr, "ERROR: Could not open %s for writing!\n", fname);
1968
                perror("O/S Err was:");
1969
                fprintf(stderr, "Attempting to continue, but this file will be missing.\n");
1970
                return;
1971
        }
1972
 
1973
        fprintf(fstage,
1974
"////////////////////////////////////////////////////////////////////////////\n"
1975
"//\n"
1976 26 dgisselq
"// Filename:   %sfftstage_%c%d%s.v\n"
1977 2 dgisselq
"//\n"
1978
"// Project:    %s\n"
1979
"//\n"
1980
"// Purpose:    This file is (almost) a Verilog source file.  It is meant to\n"
1981
"//             be used by a FFT core compiler to generate FFTs which may be\n"
1982
"//             used as part of an FFT core.  Specifically, this file \n"
1983
"//             encapsulates the options of an FFT-stage.  For any 2^N length\n"
1984
"//             FFT, there shall be (N-1) of these stages.  \n"
1985
"//\n%s"
1986
"//\n",
1987 26 dgisselq
                (inv)?"i":"", (odd)?'o':'e', stage*2, (dbg)?"_dbg":"", prjname, creator);
1988 2 dgisselq
        fprintf(fstage, "%s", cpyleft);
1989 26 dgisselq
        fprintf(fstage, "module\t%sfftstage_%c%d%s(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync%s);\n",
1990
                (inv)?"i":"", (odd)?'o':'e', stage*2, (dbg)?"_dbg":"",
1991
                (dbg)?", o_dbg":"");
1992 2 dgisselq
        // These parameter values are useless at this point--they are to be
1993
        // replaced by the parameter values in the calling program.  Only
1994
        // problem is, the CWIDTH needs to match exactly!
1995
        fprintf(fstage, "\tparameter\tIWIDTH=%d,CWIDTH=%d,OWIDTH=%d;\n",
1996
                nbits, cbits, nbits+1);
1997
        fprintf(fstage,
1998
"\t// Parameters specific to the core that should be changed when this\n"
1999
"\t// core is built ... Note that the minimum LGSPAN (the base two log\n"
2000
"\t// of the span, or the base two log of the current FFT size) is 3.\n"
2001
"\t// Smaller spans (i.e. the span of 2) must use the dblstage module.\n"
2002 6 dgisselq
"\tparameter\tLGWIDTH=11, LGSPAN=9, LGBDLY=5, BFLYSHIFT=0;\n");
2003 2 dgisselq
        fprintf(fstage,
2004
"\tinput                                        i_clk, i_rst, i_ce, i_sync;\n"
2005
"\tinput                [(2*IWIDTH-1):0]        i_data;\n"
2006
"\toutput       reg     [(2*OWIDTH-1):0]        o_data;\n"
2007
"\toutput       reg                             o_sync;\n"
2008 26 dgisselq
"\n");
2009
        if (dbg) { fprintf(fstage, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"
2010
                "\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2*OWIDTH-1):(2*OWIDTH-16)],\n"
2011
                        "\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n"
2012
"\n");
2013
        }
2014
        fprintf(fstage,
2015 2 dgisselq
"\treg  wait_for_sync;\n"
2016
"\treg  [(2*IWIDTH-1):0]        ib_a, ib_b;\n"
2017
"\treg  [(2*CWIDTH-1):0]        ib_c;\n"
2018 8 dgisselq
"\treg  ib_sync;\n"
2019 2 dgisselq
"\n"
2020
"\treg  b_started;\n"
2021
"\twire ob_sync;\n"
2022 23 dgisselq
"\twire [(2*OWIDTH-1):0]\tob_a, ob_b;\n");
2023 2 dgisselq
        fprintf(fstage,
2024
"\n"
2025
"\t// %scmem is defined as an array of real and complex values,\n"
2026
"\t// where the top CWIDTH bits are the real value and the bottom\n"
2027
"\t// CWIDTH bits are the imaginary value.\n"
2028
"\t//\n"
2029 24 dgisselq
"\t// %scmem[i] = { (2^(CWIDTH-2)) * cos(2*pi*i/(2^LGWIDTH)),\n"
2030 2 dgisselq
"\t//           (2^(CWIDTH-2)) * sin(2*pi*i/(2^LGWIDTH)) };\n"
2031
"\t//\n"
2032
"\treg  [(2*CWIDTH-1):0]        %scmem [0:((1<<LGSPAN)-1)];\n"
2033
"\tinitial\t$readmemh(\"%scmem_%c%d.hex\",%scmem);\n\n",
2034 24 dgisselq
                (inv)?"i":"", (inv)?"i":"", (inv)?"i":"",
2035
                (inv)?"i":"", (odd)?'o':'e',stage<<1, (inv)?"i":"");
2036 2 dgisselq
        {
2037
                FILE    *cmem;
2038
 
2039 14 dgisselq
                {
2040
                        char    *memfile, *ptr;
2041
 
2042
                        memfile = new char[strlen(fname)+128];
2043
                        strcpy(memfile, fname);
2044
                        if ((NULL != (ptr = strrchr(memfile, '/')))&&(ptr>memfile)) {
2045
                                ptr++;
2046
                                sprintf(ptr, "%scmem_%c%d.hex", (inv)?"i":"", (odd)?'o':'e', stage*2);
2047
                        } else {
2048
                                sprintf(memfile, "%s/%scmem_%c%d.hex",
2049 26 dgisselq
                                        coredir, (inv)?"i":"",
2050 14 dgisselq
                                        (odd)?'o':'e', stage*2);
2051
                        }
2052
                        // strcpy(&memfile[strlen(memfile)-2], ".hex");
2053
                        cmem = fopen(memfile, "w");
2054
                        if (NULL == cmem) {
2055
                                fprintf(stderr, "Could not open/write \'%s\' with FFT coefficients.\n", memfile);
2056
                                perror("Err from O/S:");
2057
                                exit(-2);
2058
                        }
2059
 
2060
                        delete[] memfile;
2061 2 dgisselq
                }
2062
                // fprintf(cmem, "// CBITS = %d, inv = %s\n", cbits, (inv)?"true":"false");
2063
                for(int i=0; i<stage/2; i++) {
2064
                        int k = 2*i+odd;
2065 9 dgisselq
                        double  W = ((inv)?1:-1)*2.0*M_PI*k/(double)(2*stage);
2066 2 dgisselq
                        double  c, s;
2067
                        long long ic, is, vl;
2068
 
2069
                        c = cos(W); s = sin(W);
2070 20 dgisselq
                        ic = (long long)round((1ll<<(cbits-2)) * c);
2071
                        is = (long long)round((1ll<<(cbits-2)) * s);
2072 2 dgisselq
                        vl = (ic & (~(-1ll << (cbits))));
2073
                        vl <<= (cbits);
2074
                        vl |= (is & (~(-1ll << (cbits))));
2075
                        fprintf(cmem, "%0*llx\n", ((cbits*2+3)/4), vl);
2076
                        /*
2077
                        fprintf(cmem, "%0*llx\t\t// %f+j%f -> %llx +j%llx\n",
2078
                                ((cbits*2+3)/4), vl, c, s,
2079
                                ic & (~(-1ll<<(((cbits+3)/4)*4))),
2080
                                is & (~(-1ll<<(((cbits+3)/4)*4))));
2081
                        */
2082
                } fclose(cmem);
2083
        }
2084
 
2085
        fprintf(fstage,
2086 6 dgisselq
"\treg  [(LGWIDTH-2):0]         iaddr;\n"
2087 2 dgisselq
"\treg  [(2*IWIDTH-1):0]        imem    [0:((1<<LGSPAN)-1)];\n"
2088
"\n"
2089 8 dgisselq
"\treg  [LGSPAN:0]              oB;\n"
2090 2 dgisselq
"\treg  [(2*OWIDTH-1):0]        omem    [0:((1<<LGSPAN)-1)];\n"
2091
"\n"
2092 25 dgisselq
"\tinitial wait_for_sync = 1\'b1;\n"
2093
"\tinitial iaddr = 0;\n"
2094 2 dgisselq
"\talways @(posedge i_clk)\n"
2095
        "\t\tif (i_rst)\n"
2096
        "\t\tbegin\n"
2097 26 dgisselq
                "\t\t\twait_for_sync <= 1\'b1;\n"
2098 2 dgisselq
                "\t\t\tiaddr <= 0;\n"
2099
        "\t\tend\n"
2100
        "\t\telse if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"
2101
        "\t\tbegin\n"
2102
                "\t\t\t//\n"
2103
                "\t\t\t// First step: Record what we\'re not ready to use yet\n"
2104
                "\t\t\t//\n"
2105 25 dgisselq
                "\t\t\tiaddr <= iaddr + { {(LGWIDTH-2){1\'b0}}, 1\'b1 };\n"
2106 26 dgisselq
                "\t\t\twait_for_sync <= 1\'b0;\n"
2107
        "\t\tend\n"
2108
"\talways @(posedge i_clk) // Need to make certain here that we don\'t read\n"
2109
        "\t\tif ((i_ce)&&(~iaddr[LGSPAN])) // and write the same address on\n"
2110
                "\t\t\timem[iaddr[(LGSPAN-1):0]] <= i_data; // the same clk\n"
2111
        "\n");
2112 23 dgisselq
 
2113
        fprintf(fstage,
2114
        "\t//\n"
2115
        "\t// Now, we have all the inputs, so let\'s feed the butterfly\n"
2116
        "\t//\n"
2117 25 dgisselq
        "\tinitial ib_sync = 1\'b0;\n"
2118 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
2119 26 dgisselq
                "\t\tif (i_rst)\n"
2120
                        "\t\t\tib_sync <= 1\'b0;\n"
2121
                "\t\telse if ((i_ce)&&(iaddr[LGSPAN]))\n"
2122
                        "\t\t\tbegin\n"
2123
                                "\t\t\t\t// Set the sync to true on the very first\n"
2124
                                "\t\t\t\t// valid input in, and hence on the very\n"
2125
                                "\t\t\t\t// first valid data out per FFT.\n"
2126
                                "\t\t\t\tib_sync <= (iaddr==(1<<(LGSPAN)));\n"
2127
                        "\t\t\tend\n"
2128 24 dgisselq
        "\talways\t@(posedge i_clk)\n"
2129 26 dgisselq
                "\t\tif ((i_ce)&&(iaddr[LGSPAN]))\n"
2130
                "\t\t\tbegin\n"
2131
                        "\t\t\t\t// One input from memory, ...\n"
2132
                        "\t\t\t\tib_a <= imem[iaddr[(LGSPAN-1):0]];\n"
2133
                        "\t\t\t\t// One input clocked in from the top\n"
2134
                        "\t\t\t\tib_b <= i_data;\n"
2135
                        "\t\t\t\t// and the coefficient or twiddle factor\n"
2136
                        "\t\t\t\tib_c <= %scmem[iaddr[(LGSPAN-1):0]];\n"
2137
                "\t\t\tend\n\n", (inv)?"i":"");
2138 23 dgisselq
 
2139
        if (hwmpy) {
2140
                fprintf(fstage,
2141
        "\thwbfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),\n"
2142
                        "\t\t\t.SHIFT(BFLYSHIFT))\n"
2143
                "\t\tbfly(i_clk, i_rst, i_ce, ib_c,\n"
2144
                        "\t\t\tib_a, ib_b, ib_sync, ob_a, ob_b, ob_sync);\n");
2145
        } else {
2146
        fprintf(fstage,
2147
        "\tbutterfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),\n"
2148
                "\t\t\t.MPYDELAY(%d\'d%d),.LGDELAY(LGBDLY),.SHIFT(BFLYSHIFT))\n"
2149
        "\t\tbfly(i_clk, i_rst, i_ce, ib_c,\n"
2150
                "\t\t\tib_a, ib_b, ib_sync, ob_a, ob_b, ob_sync);\n",
2151
                        lgdelay(nbits, xtra), bflydelay(nbits, xtra));
2152
        }
2153
 
2154
        fprintf(fstage,
2155
        "\t//\n"
2156
        "\t// Next step: recover the outputs from the butterfly\n"
2157
        "\t//\n"
2158 25 dgisselq
        "\tinitial oB        = 0;\n"
2159
        "\tinitial o_sync    = 0;\n"
2160
        "\tinitial b_started = 0;\n"
2161 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
2162
        "\t\tif (i_rst)\n"
2163
        "\t\tbegin\n"
2164
                "\t\t\toB <= 0;\n"
2165
                "\t\t\to_sync <= 0;\n"
2166
                "\t\t\tb_started <= 0;\n"
2167
        "\t\tend else if (i_ce)\n"
2168
        "\t\tbegin\n"
2169 26 dgisselq
        "\t\t\to_sync <= (~oB[LGSPAN])?ob_sync : 1\'b0;\n"
2170
        "\t\t\tif (ob_sync||b_started)\n"
2171
                "\t\t\t\toB <= oB + { {(LGSPAN){1\'b0}}, 1\'b1 };\n"
2172
        "\t\t\tif ((ob_sync)&&(~oB[LGSPAN]))\n"
2173
                "\t\t\t// A butterfly output is available\n"
2174
                        "\t\t\t\tb_started <= 1\'b1;\n"
2175 23 dgisselq
        "\t\tend\n\n");
2176 26 dgisselq
        fprintf(fstage,
2177
        "\treg  [(LGSPAN-1):0]\t\tdly_addr;\n"
2178
        "\treg  [(2*OWIDTH-1):0]\tdly_value;\n"
2179
        "\talways @(posedge i_clk)\n"
2180
        "\t\tif (i_ce)\n"
2181
        "\t\tbegin\n"
2182
        "\t\t\tdly_addr <= oB[(LGSPAN-1):0];\n"
2183
        "\t\t\tdly_value <= ob_b;\n"
2184
        "\t\tend\n"
2185
        "\talways @(posedge i_clk)\n"
2186
        "\t\tif (i_ce)\n"
2187
                "\t\t\tomem[dly_addr] <= dly_value;\n"
2188
"\n");
2189
        fprintf(fstage,
2190
        "\talways @(posedge i_clk)\n"
2191
        "\t\tif (i_ce)\n"
2192
        "\t\t\to_data <= (~oB[LGSPAN])?ob_a : omem[oB[(LGSPAN-1):0]];\n"
2193
"\n");
2194 22 dgisselq
        fprintf(fstage, "endmodule\n");
2195 2 dgisselq
}
2196
 
2197
void    usage(void) {
2198
        fprintf(stderr,
2199 26 dgisselq
"USAGE:\tfftgen [-f <size>] [-d dir] [-c cbits] [-n nbits] [-m mxbits] [-s]\n"
2200 2 dgisselq
// "\tfftgen -i\n"
2201 26 dgisselq
"\t-1\tBuild a normal FFT, running at one clock per complex sample, or (for\n"
2202
"\t\ta real FFT) at one clock per two real input samples.\n"
2203 2 dgisselq
"\t-c <cbits>\tCauses all internal complex coefficients to be\n"
2204
"\t\tlonger than the corresponding data bits, to help avoid\n"
2205 26 dgisselq
"\t\tcoefficient truncation errors.  The default is %d bits lnoger\n"
2206
"\t\tthan the data bits.\n"
2207 2 dgisselq
"\t-d <dir>\tPlaces all of the generated verilog files into <dir>.\n"
2208 26 dgisselq
"\t\tThe default is a subdirectory of the current directory named %s.\n"
2209 2 dgisselq
"\t-f <size>\tSets the size of the FFT as the number of complex\n"
2210 26 dgisselq
"\t\tsamples input to the transform.  (No default value, this is\n"
2211
"\t\ta required parameter.)\n"
2212
"\t-i\tAn inverse FFT, meaning that the coefficients are\n"
2213
"\t\tgiven by e^{ j 2 pi k/N n }.  The default is a forward FFT, with\n"
2214
"\t\tcoefficients given by e^{ -j 2 pi k/N n }.\n"
2215 2 dgisselq
"\t-m <mxbits>\tSets the maximum bit width that the FFT should ever\n"
2216
"\t\tproduce.  Internal values greater than this value will be\n"
2217 26 dgisselq
"\t\ttruncated to this value.  (The default value grows the input\n"
2218
"\t\tsize by one bit for every two FFT stages.)\n"
2219 22 dgisselq
"\t-n <nbits>\tSets the bitwidth for values coming into the (i)FFT.\n"
2220 26 dgisselq
"\t\tThe default is %d bits input for each component of the two\n"
2221
"\t\tcomplex values into the FFT.\n"
2222 22 dgisselq
"\t-p <nmpy>\tSets the number of stages that will use any hardware \n"
2223
"\t\tmultiplication facility, instead of shift-add emulation.\n"
2224 26 dgisselq
"\t\tThree multiplies per butterfly, or six multiplies per stage will\n"
2225
"\t\tbe accelerated in this fashion.  The default is not to use any\n"
2226
"\t\thardware multipliers.\n"
2227
"\t-r\tBuild a real-FFT at four input points per sample, rather than a\n"
2228
"\t\tcomplex FFT.  (Default is a Complex FFT.)\n"
2229 2 dgisselq
"\t-s\tSkip the final bit reversal stage.  This is useful in\n"
2230
"\t\talgorithms that need to apply a filter without needing to do\n"
2231
"\t\tbin shifting, as these algorithms can, with this option, just\n"
2232
"\t\tmultiply by a bit reversed correlation sequence and then\n"
2233 22 dgisselq
"\t\tinverse FFT the (still bit reversed) result.  (You would need\n"
2234
"\t\ta decimation in time inverse to do this, which this program does\n"
2235
"\t\tnot yet provide.)\n"
2236 2 dgisselq
"\t-S\tInclude the final bit reversal stage (default).\n"
2237 22 dgisselq
"\t-x <xtrabits>\tUse this many extra bits internally, before any final\n"
2238
"\t\trounding or truncation of the answer to the final number of bits.\n"
2239 26 dgisselq
"\t\tThe default is to use %d extra bits internally.\n",
2240
/*
2241 2 dgisselq
"\t-0\tA forward FFT (default), meaning that the coefficients are\n"
2242
"\t\tgiven by e^{-j 2 pi k/N n }.\n"
2243
"\t-1\tAn inverse FFT, meaning that the coefficients are\n"
2244 26 dgisselq
"\t\tgiven by e^{ j 2 pi k/N n }.\n",
2245
*/
2246
        DEF_XTRACBITS, DEF_COREDIR, DEF_NBITSIN, DEF_XTRAPBITS);
2247 2 dgisselq
}
2248
 
2249
// Features still needed:
2250
//      Interactivity.
2251
int main(int argc, char **argv) {
2252
        int     fftsize = -1, lgsize = -1;
2253 26 dgisselq
        int     nbitsin = DEF_NBITSIN, xtracbits = DEF_XTRACBITS,
2254
                        nummpy=DEF_NMPY, nonmpy=2;
2255
        int     nbitsout, maxbitsout = -1, xtrapbits=DEF_XTRAPBITS;
2256
        bool    bitreverse = true, inverse=false,
2257
                verbose_flag = false, single_clock = false,
2258
                real_fft = false;
2259 2 dgisselq
        FILE    *vmain;
2260 28 dgisselq
        std::string     coredir = DEF_COREDIR, cmdline = "", hdrname = "";
2261 23 dgisselq
        ROUND_T rounding = RND_CONVERGENT;
2262
        // ROUND_T      rounding = RND_HALFUP;
2263 2 dgisselq
 
2264 26 dgisselq
        bool    dbg = false;
2265
        int     dbgstage = 128;
2266
 
2267 2 dgisselq
        if (argc <= 1)
2268
                usage();
2269
 
2270 14 dgisselq
        cmdline = argv[0];
2271 2 dgisselq
        for(int argn=1; argn<argc; argn++) {
2272 14 dgisselq
                cmdline += " ";
2273
                cmdline += argv[argn];
2274
        }
2275
 
2276
        for(int argn=1; argn<argc; argn++) {
2277 2 dgisselq
                if ('-' == argv[argn][0]) {
2278
                        for(int j=1; (argv[argn][j])&&(j<100); j++) {
2279
                                switch(argv[argn][j]) {
2280 26 dgisselq
                                        /*
2281 2 dgisselq
                                        case '0':
2282
                                                inverse = false;
2283
                                                break;
2284 26 dgisselq
                                        */
2285 2 dgisselq
                                        case '1':
2286 26 dgisselq
                                                single_clock = true;
2287 2 dgisselq
                                                break;
2288 28 dgisselq
                                        case 'a':
2289
                                                if (argn+1 >= argc) {
2290
                                                        printf("ERR: No header filename given\n\n");
2291
                                                        usage(); exit(-1);
2292
                                                }
2293
                                                hdrname = argv[++argn];
2294
                                                j+= 200;
2295
                                                break;
2296 2 dgisselq
                                        case 'c':
2297
                                                if (argn+1 >= argc) {
2298 19 dgisselq
                                                        printf("ERR: No extra number of coefficient bits given!\n\n");
2299 2 dgisselq
                                                        usage(); exit(-1);
2300
                                                }
2301
                                                xtracbits = atoi(argv[++argn]);
2302
                                                j+= 200;
2303
                                                break;
2304
                                        case 'd':
2305
                                                if (argn+1 >= argc) {
2306 19 dgisselq
                                                        printf("ERR: No directory given into which to place the core!\n\n");
2307 2 dgisselq
                                                        usage(); exit(-1);
2308
                                                }
2309 14 dgisselq
                                                coredir = argv[++argn];
2310 2 dgisselq
                                                j += 200;
2311
                                                break;
2312 26 dgisselq
                                        case 'D':
2313
                                                dbg = true;
2314
                                                if (argn+1 >= argc) {
2315
                                                        printf("ERR: No debug stage number given!\n\n");
2316
                                                        usage(); exit(-1);
2317
                                                }
2318
                                                dbgstage = atoi(argv[++argn]);
2319
                                                j+= 200;
2320
                                                break;
2321 2 dgisselq
                                        case 'f':
2322
                                                if (argn+1 >= argc) {
2323 19 dgisselq
                                                        printf("ERR: No FFT Size given!\n\n");
2324 2 dgisselq
                                                        usage(); exit(-1);
2325
                                                }
2326
                                                fftsize = atoi(argv[++argn]);
2327
                                                { int sln = strlen(argv[argn]);
2328
                                                if (!isdigit(argv[argn][sln-1])){
2329
                                                        switch(argv[argn][sln-1]) {
2330
                                                        case 'k': case 'K':
2331
                                                                fftsize <<= 10;
2332
                                                                break;
2333
                                                        case 'm': case 'M':
2334
                                                                fftsize <<= 20;
2335
                                                                break;
2336
                                                        case 'g': case 'G':
2337
                                                                fftsize <<= 30;
2338
                                                                break;
2339
                                                        default:
2340 19 dgisselq
                                                                printf("ERR: Unknown FFT size, %s!\n", argv[argn]);
2341 2 dgisselq
                                                                exit(-1);
2342
                                                        }
2343
                                                }}
2344
                                                j += 200;
2345
                                                break;
2346
                                        case 'h':
2347
                                                usage();
2348
                                                exit(0);
2349
                                                break;
2350
                                        case 'i':
2351 26 dgisselq
                                                inverse = true;
2352 2 dgisselq
                                                break;
2353
                                        case 'm':
2354
                                                if (argn+1 >= argc) {
2355 19 dgisselq
                                                        printf("ERR: No maximum output bit value given!\n\n");
2356 2 dgisselq
                                                        exit(-1);
2357
                                                }
2358
                                                maxbitsout = atoi(argv[++argn]);
2359
                                                j += 200;
2360
                                                break;
2361
                                        case 'n':
2362
                                                if (argn+1 >= argc) {
2363 19 dgisselq
                                                        printf("ERR: No input bit size given!\n\n");
2364 2 dgisselq
                                                        exit(-1);
2365
                                                }
2366
                                                nbitsin = atoi(argv[++argn]);
2367
                                                j += 200;
2368
                                                break;
2369 22 dgisselq
                                        case 'p':
2370
                                                if (argn+1 >= argc) {
2371
                                                        printf("ERR: No number given for number of hardware multiply stages!\n\n");
2372
                                                        exit(-1);
2373
                                                }
2374
                                                nummpy = atoi(argv[++argn]);
2375
                                                j += 200;
2376
                                                break;
2377 26 dgisselq
                                        case 'r':
2378
                                                real_fft = true;
2379
                                                break;
2380 2 dgisselq
                                        case 'S':
2381
                                                bitreverse = true;
2382
                                                break;
2383
                                        case 's':
2384
                                                bitreverse = false;
2385
                                                break;
2386 19 dgisselq
                                        case 'x':
2387
                                                if (argn+1 >= argc) {
2388
                                                        printf("ERR: No extra number of bits given!\n\n");
2389
                                                        usage(); exit(-1);
2390
                                                } j+= 200;
2391
                                                xtrapbits = atoi(argv[++argn]);
2392
                                                break;
2393 2 dgisselq
                                        case 'v':
2394
                                                verbose_flag = true;
2395
                                                break;
2396
                                        default:
2397
                                                printf("Unknown argument, -%c\n", argv[argn][j]);
2398
                                                usage();
2399
                                                exit(-1);
2400
                                }
2401
                        }
2402
                } else {
2403
                        printf("Unrecognized argument, %s\n", argv[argn]);
2404
                        usage();
2405
                        exit(-1);
2406
                }
2407
        }
2408
 
2409 26 dgisselq
        if (real_fft) {
2410
                printf("The real FFT option is not implemented yet, but still on\nmy to do list.  Please try again later.\n");
2411
                exit(0);
2412
        } if (single_clock) {
2413
                printf("The single clock FFT option is not implemented yet, but still on\nmy to do list.  Please try again later.\n");
2414
                exit(0);
2415
        } if (!bitreverse) {
2416
                printf("WARNING: While I can skip the bit reverse stage, the code to do\n");
2417
                printf("an inverse FFT on a bit--reversed input has not yet been\n");
2418
                printf("built.\n");
2419
        }
2420
 
2421 2 dgisselq
        if ((lgsize < 0)&&(fftsize > 1)) {
2422
                for(lgsize=1; (1<<lgsize) < fftsize; lgsize++)
2423
                        ;
2424
        }
2425
 
2426
        if ((fftsize <= 0)||(nbitsin < 1)||(nbitsin>48)) {
2427
                printf("INVALID PARAMETERS!!!!\n");
2428
                exit(-1);
2429
        }
2430
 
2431
 
2432
        if (nextlg(fftsize) != fftsize) {
2433
                fprintf(stderr, "ERR: FFTSize (%d) *must* be a power of two\n",
2434
                                fftsize);
2435
                exit(-1);
2436
        } else if (fftsize < 2) {
2437
                fprintf(stderr, "ERR: Minimum FFTSize is 2, not %d\n",
2438
                                fftsize);
2439
                if (fftsize == 1) {
2440
                        fprintf(stderr, "You do realize that a 1 point FFT makes very little sense\n");
2441
                        fprintf(stderr, "in an FFT operation that handles two samples per clock?\n");
2442
                        fprintf(stderr, "If you really need to do an FFT of this size, the output\n");
2443
                        fprintf(stderr, "can be connected straight to the input.\n");
2444
                } else {
2445
                        fprintf(stderr, "Indeed, a size of %d doesn\'t make much sense to me at all.\n", fftsize);
2446
                        fprintf(stderr, "Is such an operation even defined?\n");
2447
                }
2448
                exit(-1);
2449
        }
2450
 
2451
        // Calculate how many output bits we'll have, and what the log
2452
        // based two size of our FFT is.
2453
        {
2454
                int     tmp_size = fftsize;
2455
 
2456
                // The first stage always accumulates one bit, regardless
2457
                // of whether you need to or not.
2458
                nbitsout = nbitsin + 1;
2459
                tmp_size >>= 1;
2460
 
2461
                while(tmp_size > 4) {
2462
                        nbitsout += 1;
2463
                        tmp_size >>= 2;
2464
                }
2465
 
2466
                if (tmp_size > 1)
2467
                        nbitsout ++;
2468
 
2469
                if (fftsize <= 2)
2470
                        bitreverse = false;
2471
        } if ((maxbitsout > 0)&&(nbitsout > maxbitsout))
2472
                nbitsout = maxbitsout;
2473
 
2474 22 dgisselq
        // Figure out how many multiply stages to use, and how many to skip
2475
        {
2476
                int     lgv = lgval(fftsize);
2477 2 dgisselq
 
2478 22 dgisselq
                nonmpy = lgv - nummpy;
2479
                if (nonmpy < 2) nonmpy = 2;
2480
                nummpy = lgv - nonmpy;
2481
        }
2482
 
2483 2 dgisselq
        {
2484
                struct stat     sbuf;
2485 14 dgisselq
                if (lstat(coredir.c_str(), &sbuf)==0) {
2486 2 dgisselq
                        if (!S_ISDIR(sbuf.st_mode)) {
2487 14 dgisselq
                                fprintf(stderr, "\'%s\' already exists, and is not a directory!\n", coredir.c_str());
2488 2 dgisselq
                                fprintf(stderr, "I will stop now, lest I overwrite something you care about.\n");
2489
                                fprintf(stderr, "To try again, please remove this file.\n");
2490
                                exit(-1);
2491
                        }
2492
                } else
2493 14 dgisselq
                        mkdir(coredir.c_str(), 0755);
2494
                if (access(coredir.c_str(), X_OK|W_OK) != 0) {
2495
                        fprintf(stderr, "I have no access to the directory \'%s\'.\n", coredir.c_str());
2496 2 dgisselq
                        exit(-1);
2497
                }
2498
        }
2499
 
2500 28 dgisselq
        if (hdrname.length() > 0) {
2501
                FILE    *hdr = fopen(hdrname.c_str(), "w");
2502
                if (hdr == NULL) {
2503
                        fprintf(stderr, "ERROR: Cannot open %s to create header file\n", hdrname.c_str());
2504
                        perror("O/S Err:");
2505
                        exit(-2);
2506
                }
2507
 
2508
                fprintf(hdr, "/////////////////////////////////////////////////////////////////////////////\n");
2509
                fprintf(hdr, "//\n");
2510
                fprintf(hdr, "// Filename:      %s\n", hdrname.c_str());
2511
                fprintf(hdr, "//\n");
2512
                fprintf(hdr, "// Project:       %s\n", prjname);
2513
                fprintf(hdr, "//\n");
2514
                fprintf(hdr, "// Purpose:       This simple header file captures the internal constants\n");
2515
                fprintf(hdr, "//                within the FFT that were used to build it, for the purpose\n");
2516
                fprintf(hdr, "//                of making C++ integration (and test bench testing) simpler.  That\n");
2517
                fprintf(hdr, "//                is, should the FFT change size, this will note that size change\n");
2518
                fprintf(hdr, "//                and thus any test bench or other C++ program dependent upon\n");
2519
                fprintf(hdr, "//                either the size of the FFT, the number of bits in or out of\n");
2520
                fprintf(hdr, "//                it, etc., can pick up the changes in the defines found within\n");
2521
                fprintf(hdr, "//                this file.\n");
2522
                fprintf(hdr, "//\n");
2523
                fprintf(hdr, "%s", creator);
2524
                fprintf(hdr, "//\n");
2525
                fprintf(hdr, "%s", cpyleft);
2526
                fprintf(hdr, "//\n"
2527
                "//\n"
2528
                "#ifndef %sFFTHDR_H\n"
2529
                "#define %sFFTHDR_H\n"
2530
                "\n"
2531
                "#define\t%sFFT_IWIDTH\t%d\n"
2532
                "#define\t%sFFT_OWIDTH\t%d\n"
2533
                "#define\t%sFFT_LGWIDTH\t%d\n"
2534
                "#define\t%sFFT_SIZE\t(1<<%sFFT_LGWIDTH)\n\n",
2535
                        (inverse)?"I":"", (inverse)?"I":"",
2536
                        (inverse)?"I":"", nbitsin,
2537
                        (inverse)?"I":"", nbitsout,
2538
                        (inverse)?"I":"", lgsize,
2539
                        (inverse)?"I":"", (inverse)?"I":"");
2540
                if (!bitreverse)
2541
                        fprintf(hdr, "#define\t%sFFT_SKIPS_BIT_REVERSE\n",
2542
                                (inverse)?"I":"");
2543
                if (real_fft)
2544
                        fprintf(hdr, "#define\tRL%sFFT\n\n", (inverse)?"I":"");
2545
                if (!single_clock)
2546
                        fprintf(hdr, "#define\tDBLCLK%sFFT\n\n", (inverse)?"I":"");
2547 29 dgisselq
                if (USE_OLD_MULTIPLY)
2548
                        fprintf(hdr, "#define\tUSE_OLD_MULTIPLY\n\n");
2549
 
2550
                fprintf(hdr, "// Parameters for testing the longbimpy\n");
2551
                fprintf(hdr, "#define\tTST_LONGBIMPY_AW\t%d\n", TST_LONGBIMPY_AW);
2552
#ifdef  TST_LONGBIMPY_BW
2553
                fprintf(hdr, "#define\tTST_LONGBIMPY_BW\t%d\n\n", TST_LONGBIMPY_BW);
2554
#else
2555
                fprintf(hdr, "#define\tTST_LONGBIMPY_BW\tTST_LONGBIMPY_AW\n\n");
2556
#endif
2557
 
2558
                fprintf(hdr, "// Parameters for testing the shift add multiply\n");
2559
                fprintf(hdr, "#define\tTST_SHIFTADDMPY_AW\t%d\n", TST_SHIFTADDMPY_AW);
2560
#ifdef  TST_SHIFTADDMPY_BW
2561
                fprintf(hdr, "#define\tTST_SHIFTADDMPY_BW\t%d\n\n", TST_SHIFTADDMPY_BW);
2562
#else
2563
                fprintf(hdr, "#define\tTST_SHIFTADDMPY_BW\tTST_SHIFTADDMPY_AW\n\n");
2564
#endif
2565
 
2566
#define TST_SHIFTADDMPY_AW      16
2567
#define TST_SHIFTADDMPY_BW      20      // Leave undefined to match AW
2568
                fprintf(hdr, "// Parameters for testing the butterfly\n");
2569
                fprintf(hdr, "#define\tTST_BUTTERFLY_IWIDTH\t%d\n", TST_BUTTERFLY_IWIDTH);
2570
                fprintf(hdr, "#define\tTST_BUTTERFLY_CWIDTH\t%d\n", TST_BUTTERFLY_CWIDTH);
2571
                fprintf(hdr, "#define\tTST_BUTTERFLY_OWIDTH\t%d\n", TST_BUTTERFLY_OWIDTH);
2572
                fprintf(hdr, "#define\tTST_BUTTERFLY_MPYDELAY\t%d\n\n",
2573
                                bflydelay(TST_BUTTERFLY_IWIDTH,
2574
                                        TST_BUTTERFLY_CWIDTH-TST_BUTTERFLY_IWIDTH));
2575
 
2576
                fprintf(hdr, "// Parameters for testing the quarter stage\n");
2577
                fprintf(hdr, "#define\tTST_QTRSTAGE_IWIDTH\t%d\n", TST_QTRSTAGE_IWIDTH);
2578
                fprintf(hdr, "#define\tTST_QTRSTAGE_LGWIDTH\t%d\n\n", TST_QTRSTAGE_LGWIDTH);
2579
 
2580
                fprintf(hdr, "// Parameters for testing the double stage\n");
2581
                fprintf(hdr, "#define\tTST_DBLSTAGE_IWIDTH\t%d\n", TST_DBLSTAGE_IWIDTH);
2582
                fprintf(hdr, "#define\tTST_DBLSTAGE_SHIFT\t%d\n\n", TST_DBLSTAGE_SHIFT);
2583
 
2584
                fprintf(hdr, "// Parameters for testing the bit reversal stage\n");
2585
                fprintf(hdr, "#define\tTST_DBLREVERSE_LGSIZE\t%d\n\n", TST_DBLREVERSE_LGSIZE);
2586 28 dgisselq
                fprintf(hdr, "\n" "#endif\n\n");
2587
                fclose(hdr);
2588
        }
2589
 
2590 14 dgisselq
        {
2591
                std::string     fname_string;
2592
 
2593
                fname_string = coredir;
2594
                fname_string += "/";
2595
                if (inverse) fname_string += "i";
2596
                fname_string += "fftmain.v";
2597
 
2598
                vmain = fopen(fname_string.c_str(), "w");
2599
                if (NULL == vmain) {
2600
                        fprintf(stderr, "Could not open \'%s\' for writing\n", fname_string.c_str());
2601
                        perror("Err from O/S:");
2602
                        exit(-1);
2603
                }
2604 2 dgisselq
        }
2605
 
2606
        fprintf(vmain, "/////////////////////////////////////////////////////////////////////////////\n");
2607
        fprintf(vmain, "//\n");
2608
        fprintf(vmain, "// Filename:    %sfftmain.v\n", (inverse)?"i":"");
2609
        fprintf(vmain, "//\n");
2610
        fprintf(vmain, "// Project:     %s\n", prjname);
2611
        fprintf(vmain, "//\n");
2612
        fprintf(vmain, "// Purpose:     This is the main module in the Doubletime FPGA FFT project.\n");
2613
        fprintf(vmain, "//              As such, all other modules are subordinate to this one.\n");
2614
        fprintf(vmain, "//              (I have been reading too much legalese this week ...)\n");
2615
        fprintf(vmain, "//              This module accomplish a fixed size Complex FFT on %d data\n", fftsize);
2616
        fprintf(vmain, "//              points.  The FFT is fully pipelined, and accepts as inputs\n");
2617
        fprintf(vmain, "//              two complex two\'s complement samples per clock.\n");
2618
        fprintf(vmain, "//\n");
2619
        fprintf(vmain, "// Parameters:\n");
2620
        fprintf(vmain, "//      i_clk\tThe clock.  All operations are synchronous with this clock.\n");
2621
        fprintf(vmain, "//\ti_rst\tSynchronous reset, active high.  Setting this line will\n");
2622
        fprintf(vmain, "//\t\t\tforce the reset of all of the internals to this routine.\n");
2623
        fprintf(vmain, "//\t\t\tFurther, following a reset, the o_sync line will go\n");
2624
        fprintf(vmain, "//\t\t\thigh the same time the first output sample is valid.\n");
2625
        fprintf(vmain, "//      i_ce\tA clock enable line.  If this line is set, this module\n");
2626
        fprintf(vmain, "//\t\t\twill accept two complex values as inputs, and produce\n");
2627
        fprintf(vmain, "//\t\t\ttwo (possibly empty) complex values as outputs.\n");
2628
        fprintf(vmain, "//\t\ti_left\tThe first of two complex input samples.  This value\n");
2629
        fprintf(vmain, "//\t\t\tis split into two two\'s complement numbers, of \n");
2630
        fprintf(vmain, "//\t\t\t%d bits each, with the real portion in the high\n", nbitsin);
2631
        fprintf(vmain, "//\t\t\torder bits, and the imaginary portion taking the\n");
2632
        fprintf(vmain, "//\t\t\tbottom %d bits.\n", nbitsin);
2633
        fprintf(vmain, "//\t\ti_right\tThis is the same thing as i_left, only this is the\n");
2634
        fprintf(vmain, "//\t\t\tsecond of two such samples.  Hence, i_left would\n");
2635
        fprintf(vmain, "//\t\t\tcontain input sample zero, i_right would contain\n");
2636
        fprintf(vmain, "//\t\t\tsample one.  On the next clock i_left would contain\n");
2637
        fprintf(vmain, "//\t\t\tinput sample two, i_right number three and so forth.\n");
2638
        fprintf(vmain, "//\t\to_left\tThe first of two output samples, of the same\n");
2639
        fprintf(vmain, "//\t\t\tformat as i_left, only having %d bits for each of\n", nbitsout);
2640
        fprintf(vmain, "//\t\t\tthe real and imaginary components, leading to %d\n", nbitsout*2);
2641
        fprintf(vmain, "//\t\t\tbits total.\n");
2642
        fprintf(vmain, "//\t\to_right\tThe second of two output samples produced each clock.\n");
2643
        fprintf(vmain, "//\t\t\tThis has the same format as o_left.\n");
2644
        fprintf(vmain, "//\t\to_sync\tA one bit output indicating the first valid sample\n");
2645
        fprintf(vmain, "//\t\t\tproduced by this FFT following a reset.  Ever after,\n");
2646
        fprintf(vmain, "//\t\t\tthis will indicate the first sample of an FFT frame.\n");
2647
        fprintf(vmain, "//\n");
2648 14 dgisselq
        fprintf(vmain, "// Arguments:\tThis file was computer generated using the\n");
2649
        fprintf(vmain, "//\t\tfollowing command line:\n");
2650
        fprintf(vmain, "//\n");
2651
        fprintf(vmain, "//\t\t%% %s\n", cmdline.c_str());
2652
        fprintf(vmain, "//\n");
2653 2 dgisselq
        fprintf(vmain, "%s", creator);
2654
        fprintf(vmain, "//\n");
2655
        fprintf(vmain, "%s", cpyleft);
2656
 
2657
 
2658
        fprintf(vmain, "//\n");
2659
        fprintf(vmain, "//\n");
2660
        fprintf(vmain, "module %sfftmain(i_clk, i_rst, i_ce,\n", (inverse)?"i":"");
2661
        fprintf(vmain, "\t\ti_left, i_right,\n");
2662 26 dgisselq
        fprintf(vmain, "\t\to_left, o_right, o_sync%s);\n",
2663
                        (dbg)?", o_dbg":"");
2664 2 dgisselq
        fprintf(vmain, "\tparameter\tIWIDTH=%d, OWIDTH=%d, LGWIDTH=%d;\n", nbitsin, nbitsout, lgsize);
2665
        assert(lgsize > 0);
2666
        fprintf(vmain, "\tinput\t\ti_clk, i_rst, i_ce;\n");
2667
        fprintf(vmain, "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n");
2668
        fprintf(vmain, "\toutput\treg\t[(2*OWIDTH-1):0]\to_left, o_right;\n");
2669
        fprintf(vmain, "\toutput\treg\t\t\to_sync;\n");
2670 26 dgisselq
        if (dbg)
2671
                fprintf(vmain, "\toutput\twire\t[33:0]\t\to_dbg;\n");
2672 2 dgisselq
        fprintf(vmain, "\n\n");
2673
 
2674
        fprintf(vmain, "\t// Outputs of the FFT, ready for bit reversal.\n");
2675
        fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_left, br_right;\n");
2676
        fprintf(vmain, "\n\n");
2677
 
2678
        int     tmp_size = fftsize, lgtmp = lgsize;
2679
        if (fftsize == 2) {
2680
                if (bitreverse) {
2681
                        fprintf(vmain, "\treg\tbr_start;\n");
2682 25 dgisselq
                        fprintf(vmain, "\tinitial br_start = 1\'b0;\n");
2683 2 dgisselq
                        fprintf(vmain, "\talways @(posedge i_clk)\n");
2684
                        fprintf(vmain, "\t\tif (i_rst)\n");
2685 26 dgisselq
                        fprintf(vmain, "\t\t\tbr_start <= 1\'b0;\n");
2686 2 dgisselq
                        fprintf(vmain, "\t\telse if (i_ce)\n");
2687 26 dgisselq
                        fprintf(vmain, "\t\t\tbr_start <= 1\'b1;\n");
2688 2 dgisselq
                }
2689
                fprintf(vmain, "\n\n");
2690 6 dgisselq
                fprintf(vmain, "\tdblstage\t#(IWIDTH)\tstage_2(i_clk, i_rst, i_ce,\n");
2691
                fprintf(vmain, "\t\t\t(~i_rst), i_left, i_right, br_left, br_right);\n");
2692 2 dgisselq
                fprintf(vmain, "\n\n");
2693
        } else {
2694
                int     nbits = nbitsin, dropbit=0;
2695 26 dgisselq
                int     obits = nbits+1+xtrapbits;
2696
 
2697
                if ((maxbitsout > 0)&&(obits > maxbitsout))
2698
                        obits = maxbitsout;
2699
 
2700 2 dgisselq
                // Always do a first stage
2701 14 dgisselq
                {
2702 22 dgisselq
                        bool    mpystage;
2703 2 dgisselq
 
2704 22 dgisselq
                        // Last two stages are always non-multiply stages
2705
                        // since the multiplies can be done by adds
2706
                        mpystage = ((lgtmp-2) <= nummpy);
2707
 
2708 28 dgisselq
                        if (mpystage)
2709
                                fprintf(vmain, "\t// A hardware optimized FFT stage\n");
2710
                        fprintf(vmain, "\n\n");
2711
                        fprintf(vmain, "\twire\t\tw_s%d, w_os%d;\n", fftsize, fftsize);
2712
                        fprintf(vmain, "\twire\t[%d:0]\tw_e%d, w_o%d;\n", 2*(obits+xtrapbits)-1, fftsize, fftsize);
2713
                        fprintf(vmain, "\t%sfftstage_e%d%s\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,%d,0)\tstage_e%d(i_clk, i_rst, i_ce,\n",
2714
                                (inverse)?"i":"", fftsize,
2715
                                        ((dbg)&&(dbgstage == fftsize))?"_dbg":"",
2716
                                xtracbits, obits+xtrapbits,
2717
                                lgsize, lgtmp-2, lgdelay(nbits,xtracbits),
2718
                                fftsize);
2719
                        fprintf(vmain, "\t\t\t(~i_rst), i_left, w_e%d, w_s%d%s);\n", fftsize, fftsize, ((dbg)&&(dbgstage == fftsize))?", o_dbg":"");
2720
                        fprintf(vmain, "\t%sfftstage_o%d\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,%d,0)\tstage_o%d(i_clk, i_rst, i_ce,\n",
2721
                                (inverse)?"i":"", fftsize,
2722
                                xtracbits, obits+xtrapbits,
2723
                                lgsize, lgtmp-2, lgdelay(nbits,xtracbits),
2724
                                fftsize);
2725
                        fprintf(vmain, "\t\t\t(~i_rst), i_right, w_o%d, w_os%d);\n", fftsize, fftsize);
2726
                        fprintf(vmain, "\n\n");
2727
 
2728
 
2729
                        std::string     fname;
2730
                        char    numstr[12];
2731
 
2732 14 dgisselq
                        fname = coredir + "/";
2733
                        if (inverse) fname += "i";
2734
                        fname += "fftstage_e";
2735
                        sprintf(numstr, "%d", fftsize);
2736
                        fname += numstr;
2737 26 dgisselq
                        if ((dbg)&&(dbgstage == fftsize))
2738
                                fname += "_dbg";
2739 14 dgisselq
                        fname += ".v";
2740 26 dgisselq
                        build_stage(fname.c_str(), coredir.c_str(), fftsize/2, 0, nbits, inverse, xtracbits, mpystage, (dbg)&&(dbgstage == fftsize));    // Even stage
2741 14 dgisselq
 
2742
                        fname = coredir + "/";
2743
                        if (inverse) fname += "i";
2744
                        fname += "fftstage_o";
2745
                        sprintf(numstr, "%d", fftsize);
2746
                        fname += numstr;
2747
                        fname += ".v";
2748 26 dgisselq
                        build_stage(fname.c_str(), coredir.c_str(), fftsize/2, 1, nbits, inverse, xtracbits, mpystage, false);  // Odd  stage
2749 14 dgisselq
                }
2750
 
2751 26 dgisselq
                nbits = obits;  // New number of input bits
2752 2 dgisselq
                tmp_size >>= 1; lgtmp--;
2753
                dropbit = 0;
2754
                fprintf(vmain, "\n\n");
2755
                while(tmp_size >= 8) {
2756 26 dgisselq
                        obits = nbits+((dropbit)?0:1);
2757 2 dgisselq
 
2758
                        if ((maxbitsout > 0)&&(obits > maxbitsout))
2759
                                obits = maxbitsout;
2760
 
2761 14 dgisselq
                        {
2762 22 dgisselq
                                bool            mpystage;
2763 2 dgisselq
 
2764 22 dgisselq
                                mpystage = ((lgtmp-2) <= nummpy);
2765
 
2766 28 dgisselq
                                if (mpystage)
2767
                                        fprintf(vmain, "\t// A hardware optimized FFT stage\n");
2768
                                fprintf(vmain, "\twire\t\tw_s%d, w_os%d;\n",
2769
                                        tmp_size, tmp_size);
2770
                                fprintf(vmain,"\twire\t[%d:0]\tw_e%d, w_o%d;\n",
2771
                                        2*(obits+xtrapbits)-1,
2772
                                        tmp_size, tmp_size);
2773
                                fprintf(vmain, "\t%sfftstage_e%d%s\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_e%d(i_clk, i_rst, i_ce,\n",
2774
                                        (inverse)?"i":"", tmp_size,
2775
                                        ((dbg)&&(dbgstage==tmp_size))?"_dbg":"",
2776
                                        nbits+xtrapbits,
2777
                                        nbits+xtracbits+xtrapbits,
2778
                                        obits+xtrapbits,
2779
                                        lgsize, lgtmp-2,
2780
                                        lgdelay(nbits+xtrapbits,xtracbits),
2781
                                        (dropbit)?0:0, tmp_size);
2782
                                fprintf(vmain, "\t\t\t\t\t\tw_s%d, w_e%d, w_e%d, w_s%d%s);\n",
2783
                                        tmp_size<<1, tmp_size<<1,
2784
                                        tmp_size, tmp_size,
2785
                                        ((dbg)&&(dbgstage == tmp_size))
2786
                                                ?", o_dbg":"");
2787
                                fprintf(vmain, "\t%sfftstage_o%d\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_o%d(i_clk, i_rst, i_ce,\n",
2788
                                        (inverse)?"i":"", tmp_size,
2789
                                        nbits+xtrapbits,
2790
                                        nbits+xtracbits+xtrapbits,
2791
                                        obits+xtrapbits,
2792
                                        lgsize, lgtmp-2,
2793
                                        lgdelay(nbits+xtrapbits,xtracbits),
2794
                                        (dropbit)?0:0, tmp_size);
2795
                                fprintf(vmain, "\t\t\t\t\t\tw_s%d, w_o%d, w_o%d, w_os%d);\n",
2796
                                        tmp_size<<1, tmp_size<<1,
2797
                                        tmp_size, tmp_size);
2798
                                fprintf(vmain, "\n\n");
2799
 
2800
                                std::string     fname;
2801
                                char            numstr[12];
2802
 
2803 14 dgisselq
                                fname = coredir + "/";
2804
                                if (inverse) fname += "i";
2805
                                fname += "fftstage_e";
2806
                                sprintf(numstr, "%d", tmp_size);
2807
                                fname += numstr;
2808 26 dgisselq
                                if ((dbg)&&(dbgstage == tmp_size))
2809
                                        fname += "_dbg";
2810 14 dgisselq
                                fname += ".v";
2811 26 dgisselq
                                build_stage(fname.c_str(), coredir.c_str(), tmp_size/2, 0,
2812 22 dgisselq
                                        nbits+xtrapbits, inverse, xtracbits,
2813 26 dgisselq
                                        mpystage, ((dbg)&&(dbgstage == tmp_size)));     // Even stage
2814 2 dgisselq
 
2815 14 dgisselq
                                fname = coredir + "/";
2816
                                if (inverse) fname += "i";
2817
                                fname += "fftstage_o";
2818
                                sprintf(numstr, "%d", tmp_size);
2819
                                fname += numstr;
2820
                                fname += ".v";
2821 26 dgisselq
                                build_stage(fname.c_str(), coredir.c_str(), tmp_size/2, 1,
2822 22 dgisselq
                                        nbits+xtrapbits, inverse, xtracbits,
2823 26 dgisselq
                                        mpystage, false);       // Odd  stage
2824 14 dgisselq
                        }
2825
 
2826
 
2827 2 dgisselq
                        dropbit ^= 1;
2828
                        nbits = obits;
2829
                        tmp_size >>= 1; lgtmp--;
2830
                }
2831
 
2832
                if (tmp_size == 4) {
2833 26 dgisselq
                        obits = nbits+((dropbit)?0:1);
2834 2 dgisselq
 
2835
                        if ((maxbitsout > 0)&&(obits > maxbitsout))
2836
                                obits = maxbitsout;
2837
 
2838
                        fprintf(vmain, "\twire\t\tw_s4, w_os4;\n");
2839 19 dgisselq
                        fprintf(vmain, "\twire\t[%d:0]\tw_e4, w_o4;\n", 2*(obits+xtrapbits)-1);
2840 26 dgisselq
                        fprintf(vmain, "\tqtrstage%s\t#(%d,%d,%d,0,%d,%d)\tstage_e4(i_clk, i_rst, i_ce,\n",
2841
                                ((dbg)&&(dbgstage==4))?"_dbg":"",
2842
                                nbits+xtrapbits, obits+xtrapbits, lgsize,
2843
                                (inverse)?1:0, (dropbit)?0:0);
2844
                        fprintf(vmain, "\t\t\t\t\t\tw_s8, w_e8, w_e4, w_s4%s);\n",
2845
                                ((dbg)&&(dbgstage==4))?", o_dbg":"");
2846 2 dgisselq
                        fprintf(vmain, "\tqtrstage\t#(%d,%d,%d,1,%d,%d)\tstage_o4(i_clk, i_rst, i_ce,\n",
2847 19 dgisselq
                                nbits+xtrapbits, obits+xtrapbits, lgsize, (inverse)?1:0, (dropbit)?0:0);
2848 6 dgisselq
                        fprintf(vmain, "\t\t\t\t\t\tw_s8, w_o8, w_o4, w_os4);\n");
2849 2 dgisselq
                        dropbit ^= 1;
2850
                        nbits = obits;
2851
                        tmp_size >>= 1; lgtmp--;
2852
                }
2853
 
2854
                {
2855 26 dgisselq
                        obits = nbits+((dropbit)?0:1);
2856 2 dgisselq
                        if (obits > nbitsout)
2857
                                obits = nbitsout;
2858
                        if ((maxbitsout>0)&&(obits > maxbitsout))
2859
                                obits = maxbitsout;
2860
                        fprintf(vmain, "\twire\t\tw_s2;\n");
2861
                        fprintf(vmain, "\twire\t[%d:0]\tw_e2, w_o2;\n", 2*obits-1);
2862 28 dgisselq
                        if ((nbits+xtrapbits+1 == obits)&&(!dropbit))
2863
                                printf("WARNING: SCALING OFF BY A FACTOR OF TWO--should\'ve dropped a bit in the last stage.\n");
2864 19 dgisselq
                        fprintf(vmain, "\tdblstage\t#(%d,%d,%d)\tstage_2(i_clk, i_rst, i_ce,\n", nbits+xtrapbits, obits,(dropbit)?0:1);
2865 6 dgisselq
                        fprintf(vmain, "\t\t\t\t\tw_s4, w_e4, w_o4, w_e2, w_o2, w_s2);\n");
2866 2 dgisselq
 
2867
                        fprintf(vmain, "\n\n");
2868
                        nbits = obits;
2869
                }
2870
 
2871
                fprintf(vmain, "\t// Prepare for a (potential) bit-reverse stage.\n");
2872
                fprintf(vmain, "\tassign\tbr_left  = w_e2;\n");
2873
                fprintf(vmain, "\tassign\tbr_right = w_o2;\n");
2874
                fprintf(vmain, "\n");
2875
                if (bitreverse) {
2876
                        fprintf(vmain, "\twire\tbr_start;\n");
2877
                        fprintf(vmain, "\treg\tr_br_started;\n");
2878 25 dgisselq
                        fprintf(vmain, "\tinitial\tr_br_started = 1\'b0;\n");
2879 2 dgisselq
                        fprintf(vmain, "\talways @(posedge i_clk)\n");
2880
                        fprintf(vmain, "\t\tif (i_rst)\n");
2881 26 dgisselq
                        fprintf(vmain, "\t\t\tr_br_started <= 1\'b0;\n");
2882
                        fprintf(vmain, "\t\telse if (i_ce)\n");
2883 23 dgisselq
                        fprintf(vmain, "\t\t\tr_br_started <= r_br_started || w_s2;\n");
2884
                        fprintf(vmain, "\tassign\tbr_start = r_br_started || w_s2;\n");
2885 2 dgisselq
                }
2886
        }
2887
 
2888
        fprintf(vmain, "\n");
2889
        fprintf(vmain, "\t// Now for the bit-reversal stage.\n");
2890
        fprintf(vmain, "\twire\tbr_sync;\n");
2891
        fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_o_left, br_o_right;\n");
2892
        if (bitreverse) {
2893
                fprintf(vmain, "\tdblreverse\t#(%d,%d)\trevstage(i_clk, i_rst,\n", lgsize, nbitsout);
2894
                fprintf(vmain, "\t\t\t(i_ce & br_start), br_left, br_right,\n");
2895
                fprintf(vmain, "\t\t\tbr_o_left, br_o_right, br_sync);\n");
2896
        } else {
2897
                fprintf(vmain, "\tassign\tbr_o_left  = br_left;\n");
2898
                fprintf(vmain, "\tassign\tbr_o_right = br_right;\n");
2899
                fprintf(vmain, "\tassign\tbr_sync    = w_s2;\n");
2900
        }
2901
 
2902
        fprintf(vmain, "\n\n");
2903
        fprintf(vmain, "\t// Last clock: Register our outputs, we\'re done.\n");
2904 26 dgisselq
        fprintf(vmain, "\tinitial\to_sync  = 1\'b0;\n");
2905 2 dgisselq
        fprintf(vmain, "\talways @(posedge i_clk)\n");
2906 26 dgisselq
        fprintf(vmain, "\t\tif (i_rst)\n");
2907
        fprintf(vmain, "\t\t\to_sync  <= 1\'b0;\n");
2908
        fprintf(vmain, "\t\telse if (i_ce)\n");
2909
        fprintf(vmain, "\t\t\to_sync  <= br_sync;\n");
2910
        fprintf(vmain, "\n");
2911
        fprintf(vmain, "\talways @(posedge i_clk)\n");
2912
        fprintf(vmain, "\t\tif (i_ce)\n");
2913 2 dgisselq
        fprintf(vmain, "\t\tbegin\n");
2914
        fprintf(vmain, "\t\t\to_left  <= br_o_left;\n");
2915
        fprintf(vmain, "\t\t\to_right <= br_o_right;\n");
2916
        fprintf(vmain, "\t\tend\n");
2917
        fprintf(vmain, "\n\n");
2918
        fprintf(vmain, "endmodule\n");
2919
        fclose(vmain);
2920
 
2921 14 dgisselq
        {
2922
                std::string     fname;
2923 2 dgisselq
 
2924 14 dgisselq
                fname = coredir + "/butterfly.v";
2925 23 dgisselq
                build_butterfly(fname.c_str(), xtracbits, rounding);
2926 2 dgisselq
 
2927 22 dgisselq
                if (nummpy > 0) {
2928
                        fname = coredir + "/hwbfly.v";
2929 23 dgisselq
                        build_hwbfly(fname.c_str(), xtracbits, rounding);
2930 22 dgisselq
                }
2931
 
2932 29 dgisselq
                {
2933
                        // To make debugging easier, we build both of these
2934
                        fname = coredir + "/shiftaddmpy.v";
2935
                        build_multiply(fname.c_str());
2936 2 dgisselq
 
2937 29 dgisselq
                        fname = coredir + "/longbimpy.v";
2938
                        build_longbimpy(fname.c_str());
2939
                        fname = coredir + "/bimpy.v";
2940
                        build_bimpy(fname.c_str());
2941
                }
2942
 
2943 26 dgisselq
                if ((dbg)&&(dbgstage == 4)) {
2944
                        fname = coredir + "/qtrstage_dbg.v";
2945
                        build_quarters(fname.c_str(), rounding, true);
2946
                }
2947 14 dgisselq
                fname = coredir + "/qtrstage.v";
2948 26 dgisselq
                build_quarters(fname.c_str(), rounding, false);
2949 2 dgisselq
 
2950 26 dgisselq
                if ((dbg)&&(dbgstage == 2))
2951
                        fname = coredir + "/dblstage_dbg.v";
2952
                else
2953
                        fname = coredir + "/dblstage.v";
2954
                build_dblstage(fname.c_str(), rounding, (dbg)&&(dbgstage==2));
2955 14 dgisselq
 
2956
                if (bitreverse) {
2957
                        fname = coredir + "/dblreverse.v";
2958
                        build_dblreverse(fname.c_str());
2959
                }
2960 23 dgisselq
 
2961
                const   char    *rnd_string = "";
2962
                switch(rounding) {
2963
                        case RND_TRUNCATE:      rnd_string = "/truncate.v"; break;
2964
                        case RND_FROMZERO:      rnd_string = "/roundfromzero.v"; break;
2965
                        case RND_HALFUP:        rnd_string = "/roundhalfup.v"; break;
2966
                        default:
2967
                                rnd_string = "/convround.v"; break;
2968
                } fname = coredir + rnd_string;
2969
                switch(rounding) {
2970
                        case RND_TRUNCATE: build_truncator(fname.c_str()); break;
2971
                        case RND_FROMZERO: build_roundfromzero(fname.c_str()); break;
2972
                        case RND_HALFUP: build_roundhalfup(fname.c_str()); break;
2973
                        default:
2974
                                build_convround(fname.c_str()); break;
2975
                }
2976
 
2977 2 dgisselq
        }
2978
}
2979
 
2980 16 dgisselq
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.