OpenCores
URL https://opencores.org/ocsvn/dblclockfft/dblclockfft/trunk

Subversion Repositories dblclockfft

[/] [dblclockfft/] [trunk/] [sw/] [fftgen.cpp] - Blame information for rev 28

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 16 dgisselq
/////////////////////////////////////////////////////////////////////////////
2
//
3 24 dgisselq
// Filename:    fftgen.cpp
4 16 dgisselq
//
5
// Project:     A Doubletime Pipelined FFT
6
//
7
// Purpose:     This is the core generator for the project.  Every part
8
//              and piece of this project begins and ends in this program.
9
//              Once built, this program will build an FFT (or IFFT) core
10
//              of arbitrary width, precision, etc., that will run at
11
//              two samples per clock.  (Incidentally, I didn't pick two
12
//              samples per clock because it was easier, but rather because
13
//              there weren't any two-sample per clock FFT's posted on 
14
//              opencores.com.  Further, FFT's running at one sample per
15
//              clock aren't that hard to find.)
16
//
17
//              You can find the documentation for this program in two places.
18
//              One is in the usage() function below.  The second is in the
19
//              'doc'uments directory that comes with this package, 
20
//              specifically in the spec.pdf file.  If it's not there, type
21
//              make in the documents directory to build it.
22
//
23
// Creator:     Dan Gisselquist, Ph.D.
24
//              Gisselquist Tecnology, LLC
25
//
26
///////////////////////////////////////////////////////////////////////////
27
//
28
// Copyright (C) 2015, Gisselquist Technology, LLC
29
//
30
// This program is free software (firmware): you can redistribute it and/or
31
// modify it under the terms of  the GNU General Public License as published
32
// by the Free Software Foundation, either version 3 of the License, or (at
33
// your option) any later version.
34
//
35
// This program is distributed in the hope that it will be useful, but WITHOUT
36
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
37
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
38
// for more details.
39
//
40
// You should have received a copy of the GNU General Public License along
41
// with this program.  (It's in the $(ROOT)/doc directory, run make with no
42
// target there if the PDF file isn't present.)  If not, see
43
// <http://www.gnu.org/licenses/> for a copy.
44
//
45
// License:     GPL, v3, as defined and found on www.gnu.org,
46
//              http://www.gnu.org/licenses/gpl.html
47
//
48
//
49
///////////////////////////////////////////////////////////////////////////
50
//
51
//
52 2 dgisselq
#include <stdio.h>
53
#include <stdlib.h>
54
#include <unistd.h>
55
#include <sys/stat.h>
56
#include <string.h>
57 14 dgisselq
#include <string>
58 2 dgisselq
#include <math.h>
59
#include <ctype.h>
60
#include <assert.h>
61
 
62 26 dgisselq
#define DEF_NBITSIN     16
63
#define DEF_COREDIR     "fft-core"
64
#define DEF_XTRACBITS   4
65
#define DEF_NMPY        0
66
#define DEF_XTRAPBITS   0
67 2 dgisselq
 
68 23 dgisselq
typedef enum {
69
        RND_TRUNCATE, RND_FROMZERO, RND_HALFUP, RND_CONVERGENT
70
} ROUND_T;
71
 
72 2 dgisselq
const char      cpyleft[] =
73
"///////////////////////////////////////////////////////////////////////////\n"
74
"//\n"
75
"// Copyright (C) 2015, Gisselquist Technology, LLC\n"
76
"//\n"
77
"// This program is free software (firmware): you can redistribute it and/or\n"
78
"// modify it under the terms of  the GNU General Public License as published\n"
79
"// by the Free Software Foundation, either version 3 of the License, or (at\n"
80
"// your option) any later version.\n"
81
"//\n"
82
"// This program is distributed in the hope that it will be useful, but WITHOUT\n"
83
"// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or\n"
84
"// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n"
85
"// for more details.\n"
86
"//\n"
87
"// You should have received a copy of the GNU General Public License along\n"
88 5 dgisselq
"// with this program.  (It's in the $(ROOT)/doc directory, run make with no\n"
89
"// target there if the PDF file isn\'t present.)  If not, see\n"
90
"// <http://www.gnu.org/licenses/> for a copy.\n"
91
"//\n"
92 2 dgisselq
"// License:    GPL, v3, as defined and found on www.gnu.org,\n"
93
"//             http://www.gnu.org/licenses/gpl.html\n"
94
"//\n"
95
"//\n"
96
"///////////////////////////////////////////////////////////////////////////\n";
97 14 dgisselq
const char      prjname[] = "A Doubletime Pipelined FFT";
98 2 dgisselq
const char      creator[] =     "// Creator:    Dan Gisselquist, Ph.D.\n"
99
                                "//             Gisselquist Tecnology, LLC\n";
100
 
101
int     lgval(int vl) {
102
        int     lg;
103
 
104
        for(lg=1; (1<<lg) < vl; lg++)
105
                ;
106
        return lg;
107
}
108
 
109
int     nextlg(int vl) {
110
        int     r;
111
 
112
        for(r=1; r<vl; r<<=1)
113
                ;
114
        return r;
115
}
116
 
117 14 dgisselq
int     bflydelay(int nbits, int xtra) {
118 2 dgisselq
        int     cbits = nbits + xtra;
119 14 dgisselq
        int     delay;
120 2 dgisselq
        if (nbits+1<cbits)
121 5 dgisselq
                delay = nbits+4;
122 2 dgisselq
        else
123 5 dgisselq
                delay = cbits+3;
124 14 dgisselq
        return delay;
125 2 dgisselq
}
126
 
127 14 dgisselq
int     lgdelay(int nbits, int xtra) {
128
        // The butterfly code needs to compare a valid address, of this
129
        // many bits, with an address two greater.  This guarantees we
130
        // have enough bits for that comparison.  We'll also end up with
131
        // more storage space to look for these values, but without a 
132
        // redesign that's just what we'll deal with.
133
        return lgval(bflydelay(nbits, xtra)+3);
134
}
135
 
136 23 dgisselq
void    build_truncator(const char *fname) {
137
        printf("TRUNCATING!\n");
138 2 dgisselq
        FILE    *fp = fopen(fname, "w");
139
        if (NULL == fp) {
140
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
141
                perror("O/S Err was:");
142
                return;
143
        }
144
 
145
        fprintf(fp,
146
"///////////////////////////////////////////////////////////////////////////\n"
147
"//\n"
148 23 dgisselq
"// Filename:   truncate.v\n"
149
"//             \n"
150
"// Project:    %s\n"
151
"//\n"
152
"// Purpose:    Truncation is one of several options that can be used\n"
153
"//             internal to the various FFT stages to drop bits from one \n"
154
"//             stage to the next.  In general, it is the simplest method\n"
155
"//             of dropping bits, since it requires only a bit selection.\n"
156
"//\n"
157
"//             This form of rounding isn\'t really that great for FFT\'s,\n"
158
"//             since it tends to produce a DC bias in the result.  (Other\n"
159
"//             less pronounced biases may also exist.)\n"
160
"//\n"
161
"//             This particular version also registers the output with the\n"
162
"//             clock, so there will be a delay of one going through this\n"
163
"//             module.  This will keep it in line with the other forms of\n"
164
"//             rounding that can be used.\n"
165
"//\n"
166
"//\n%s"
167
"//\n",
168
                prjname, creator);
169
 
170
        fprintf(fp, "%s", cpyleft);
171
        fprintf(fp,
172
"module truncate(i_clk, i_ce, i_val, o_val);\n"
173
        "\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"
174
        "\tinput\t\t\t\t\ti_clk, i_ce;\n"
175
        "\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"
176
        "\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"
177
"\n"
178
        "\talways @(posedge i_clk)\n"
179
                "\t\tif (i_ce)\n"
180
                "\t\t\to_val <= i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
181
"\n"
182
"endmodule\n");
183
}
184
 
185
 
186
void    build_roundhalfup(const char *fname) {
187
        FILE    *fp = fopen(fname, "w");
188
        if (NULL == fp) {
189
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
190
                perror("O/S Err was:");
191
                return;
192
        }
193
 
194
        fprintf(fp,
195
"///////////////////////////////////////////////////////////////////////////\n"
196
"//\n"
197
"// Filename:   roundhalfup.v\n"
198
"//             \n"
199
"// Project:    %s\n"
200
"//\n"
201
"// Purpose:    Rounding half up is the way I was always taught to round in\n"
202
"//             school.  A one half value is added to the result, and then\n"
203
"//             the result is truncated.  When used in an FFT, this produces\n"
204
"//             less bias than the truncation method, although a bias still\n"
205
"//             tends to remain.\n"
206
"//\n"
207
"//\n%s"
208
"//\n",
209
                prjname, creator);
210
 
211
        fprintf(fp, "%s", cpyleft);
212
        fprintf(fp,
213
"module roundhalfup(i_clk, i_ce, i_val, o_val);\n"
214
        "\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"
215
        "\tinput\t\t\t\t\ti_clk, i_ce;\n"
216
        "\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"
217
        "\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"
218
"\n"
219
        "\t// Let's deal with two cases to be as general as we can be here\n"
220
        "\t//\n"
221
        "\t//   1. The desired output would lose no bits at all\n"
222
        "\t//   2. One or more bits would be dropped, so the rounding is simply\n"
223
        "\t//\t\ta matter of adding one to the bit about to be dropped,\n"
224
        "\t//\t\tmoving all halfway and above numbers up to the next\n"
225
        "\t//\t\tvalue.\n"
226
        "\tgenerate\n"
227
        "\tif (IWID-SHIFT == OWID)\n"
228
        "\tbegin // No truncation or rounding, output drops no bits\n"
229
"\n"
230
                "\t\talways @(posedge i_clk)\n"
231
                        "\t\t\tif (i_ce)\to_val <= i_val[(IWID-SHIFT-1):0];\n"
232
"\n"
233
        "\tend else // if (IWID-SHIFT-1 >= OWID)\n"
234
        "\tbegin // Output drops one bit, can only add one or ... not.\n"
235
                "\t\twire\t[(OWID-1):0] truncated_value, rounded_up;\n"
236
                "\t\twire\t\t\tlast_valid_bit, first_lost_bit;\n"
237
                "\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
238 26 dgisselq
                "\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n"
239 23 dgisselq
                "\t\tassign\tfirst_lost_bit = i_val[(IWID-SHIFT-OWID-1)];\n"
240
"\n"
241
                "\t\talways @(posedge i_clk)\n"
242
                "\t\t\tif (i_ce)\n"
243
                "\t\t\tbegin\n"
244
                        "\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"
245
                        "\t\t\t\t\to_val <= truncated_value;\n"
246
                        "\t\t\t\telse\n"
247
                        "\t\t\t\t\to_val <= rounded_up; // even value\n"
248
                "\t\t\tend\n"
249
"\n"
250
        "\tend\n"
251
        "\tendgenerate\n"
252
"\n"
253
"endmodule\n");
254
}
255
 
256
void    build_roundfromzero(const char *fname) {
257
        FILE    *fp = fopen(fname, "w");
258
        if (NULL == fp) {
259
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
260
                perror("O/S Err was:");
261
                return;
262
        }
263
 
264
        fprintf(fp,
265
"///////////////////////////////////////////////////////////////////////////\n"
266
"//\n"
267
"// Filename:   roundfromzero.v\n"
268
"//             \n"
269
"// Project:    %s\n"
270
"//\n"
271
"// Purpose:    Truncation is one of several options that can be used\n"
272
"//             internal to the various FFT stages to drop bits from one \n"
273
"//             stage to the next.  In general, it is the simplest method\n"
274
"//             of dropping bits, since it requires only a bit selection.\n"
275
"//\n"
276
"//             This form of rounding isn\'t really that great for FFT\'s,\n"
277
"//             since it tends to produce a DC bias in the result.  (Other\n"
278
"//             less pronounced biases may also exist.)\n"
279
"//\n"
280
"//             This particular version also registers the output with the\n"
281
"//             clock, so there will be a delay of one going through this\n"
282
"//             module.  This will keep it in line with the other forms of\n"
283
"//             rounding that can be used.\n"
284
"//\n"
285
"//\n%s"
286
"//\n",
287
                prjname, creator);
288
 
289
        fprintf(fp, "%s", cpyleft);
290
        fprintf(fp,
291
"module convround(i_clk, i_ce, i_val, o_val);\n"
292
        "\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"
293
        "\tinput\t\t\t\t\ti_clk, i_ce;\n"
294
        "\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"
295
        "\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"
296
"\n"
297
        "\t// Let's deal with three cases to be as general as we can be here\n"
298
        "\t//\n"
299
        "\t//\t1. The desired output would lose no bits at all\n"
300
        "\t//\t2. One bit would be dropped, so the rounding is simply\n"
301
        "\t//\t\tadjusting the value to be the closer to zero in\n"
302
        "\t//\t\tcases of being halfway between two.  If identically\n"
303
        "\t//\t\tequal to a number, we just leave it as is.\n"
304
        "\t//\t3. Two or more bits would be dropped.  In this case, we round\n"
305
        "\t//\t\tnormally unless we are rounding a value of exactly\n"
306
        "\t//\t\thalfway between the two.  In the halfway case, we\n"
307
        "\t//\t\tround away from zero.\n"
308
        "\tgenerate\n"
309 28 dgisselq
        "\tif (IWID == OWID) // In this case, the shift is irrelevant and\n"
310
        "\tbegin // cannot be applied.  No truncation or rounding takes\n"
311
        "\t// effect here.\n"
312
"\n"
313
                "\t\talways @(posedge i_clk)\n"
314
                        "\t\t\tif (i_ce)\to_val <= i_val[(IWID-1):0];\n"
315
"\n"
316
        "\tend else if (IWID-SHIFT == OWID)\n"
317 23 dgisselq
        "\tbegin // No truncation or rounding, output drops no bits\n"
318
"\n"
319
                "\t\talways @(posedge i_clk)\n"
320
                        "\t\t\tif (i_ce)\to_val <= i_val[(IWID-SHIFT-1):0];\n"
321
"\n"
322
        "\tend else if (IWID-SHIFT-1 == OWID)\n"
323
        "\tbegin // Output drops one bit, can only add one or ... not.\n"
324
        "\t\twire\t[(OWID-1):0]\ttruncated_value, rounded_up;\n"
325
        "\t\twire\t\t\tsign_bit, first_lost_bit;\n"
326
        "\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
327 26 dgisselq
        "\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n"
328 23 dgisselq
        "\t\tassign\tfirst_lost_bit = i_val[0];\n"
329
        "\t\tassign\tsign_bit = i_val[(IWID-1)];\n"
330
"\n"
331
        "\t\talways @(posedge i_clk)\n"
332
                "\t\t\tif (i_ce)\n"
333
                "\t\t\tbegin\n"
334
                        "\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"
335
                                "\t\t\t\t\to_val <= truncated_value;\n"
336
                        "\t\t\t\telse if (sign_bit)\n"
337
                                "\t\t\t\t\to_val <= truncated_value;\n"
338
                        "\t\t\t\telse\n"
339
                                "\t\t\t\t\to_val <= rounded_up;\n"
340
                "\t\t\tend\n"
341
"\n"
342
        "\tend else // If there's more than one bit we are dropping\n"
343
        "\tbegin\n"
344
                "\t\twire\t[(OWID-1):0]\ttruncated_value, rounded_up;\n"
345
                "\t\twire\t\t\tsign_bit, first_lost_bit;\n"
346
                "\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
347 26 dgisselq
                "\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n"
348 23 dgisselq
                "\t\tassign\tfirst_lost_bit = i_val[(IWID-SHIFT-OWID-1)];\n"
349
                "\t\tassign\tsign_bit = i_val[(IWID-1)];\n"
350
"\n"
351
                "\t\twire\t[(IWID-SHIFT-OWID-2):0]\tother_lost_bits;\n"
352
                "\t\tassign\tother_lost_bits = i_val[(IWID-SHIFT-OWID-2):0];\n"
353
"\n"
354
                "\t\talways @(posedge i_clk)\n"
355
                        "\t\t\tif (i_ce)\n"
356
                        "\t\t\tbegin\n"
357
                        "\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"
358
                                "\t\t\t\t\to_val <= truncated_value;\n"
359
                        "\t\t\t\telse if (|other_lost_bits) // Round up to\n"
360
                                "\t\t\t\t\to_val <= rounded_up; // closest value\n"
361
                        "\t\t\t\telse if (sign_bit)\n"
362
                                "\t\t\t\t\to_val <= truncated_value;\n"
363
                        "\t\t\t\telse\n"
364
                                "\t\t\t\t\to_val <= rounded_up;\n"
365
                        "\t\t\tend\n"
366
        "\tend\n"
367
        "\tendgenerate\n"
368
"\n"
369
"endmodule\n");
370
}
371
 
372
void    build_convround(const char *fname) {
373
        FILE    *fp = fopen(fname, "w");
374
        if (NULL == fp) {
375
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
376
                perror("O/S Err was:");
377
                return;
378
        }
379
 
380
        fprintf(fp,
381
"///////////////////////////////////////////////////////////////////////////\n"
382
"//\n"
383
"// Filename:   convround.v\n"
384
"//             \n"
385
"// Project:    %s\n"
386
"//\n"
387
"// Purpose:    A convergent rounding routine, also known as banker\'s\n"
388
"//             rounding, Dutch rounding, Gaussian rounding, unbiased\n"
389
"//             rounding, or ... more, at least according to Wikipedia.\n"
390
"//\n"
391
"//             This form of rounding works by rounding, when the direction\n"
392
"//             is in question, towards the nearest even value.\n"
393
"//\n"
394
"//\n%s"
395
"//\n",
396
                prjname, creator);
397
 
398
        fprintf(fp, "%s", cpyleft);
399
        fprintf(fp,
400
"module convround(i_clk, i_ce, i_val, o_val);\n"
401
"\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"
402
"\tinput\t\t\t\t\ti_clk, i_ce;\n"
403
"\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"
404
"\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"
405
"\n"
406
"\t// Let's deal with three cases to be as general as we can be here\n"
407
"\t//\n"
408
"\t//\t1. The desired output would lose no bits at all\n"
409
"\t//\t2. One bit would be dropped, so the rounding is simply\n"
410
"\t//\t\tadjusting the value to be the nearest even number in\n"
411
"\t//\t\tcases of being halfway between two.  If identically\n"
412
"\t//\t\tequal to a number, we just leave it as is.\n"
413
"\t//\t3. Two or more bits would be dropped.  In this case, we round\n"
414
"\t//\t\tnormally unless we are rounding a value of exactly\n"
415
"\t//\t\thalfway between the two.  In the halfway case we round\n"
416
"\t//\t\tto the nearest even number.\n"
417
"\tgenerate\n"
418 28 dgisselq
        "\tif (IWID == OWID) // In this case, the shift is irrelevant and\n"
419
        "\tbegin // cannot be applied.  No truncation or rounding takes\n"
420
        "\t// effect here.\n"
421
"\n"
422
                "\t\talways @(posedge i_clk)\n"
423
                        "\t\t\tif (i_ce)\to_val <= i_val[(IWID-1):0];\n"
424
"\n"
425
"\tend else if (IWID-SHIFT == OWID)\n"
426 23 dgisselq
"\tbegin // No truncation or rounding, output drops no bits\n"
427
"\n"
428
"\t\talways @(posedge i_clk)\n"
429
"\t\t\tif (i_ce)\to_val <= i_val[(IWID-SHIFT-1):0];\n"
430
"\n"
431
"\tend else if (IWID-SHIFT-1 == OWID)\n"
432
"\tbegin // Output drops one bit, can only add one or ... not.\n"
433
"\t\twire\t[(OWID-1):0] truncated_value, rounded_up;\n"
434
"\t\twire\t\t\tlast_valid_bit, first_lost_bit;\n"
435
"\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
436 26 dgisselq
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n"
437 23 dgisselq
"\t\tassign\tlast_valid_bit = truncated_value[0];\n"
438
"\t\tassign\tfirst_lost_bit = i_val[0];\n"
439
"\n"
440
"\t\talways @(posedge i_clk)\n"
441
"\t\t\tif (i_ce)\n"
442
"\t\t\tbegin\n"
443
"\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"
444
"\t\t\t\t\to_val <= truncated_value;\n"
445
"\t\t\t\telse if (last_valid_bit)// Round up to nearest\n"
446
"\t\t\t\t\to_val <= rounded_up; // even value\n"
447
"\t\t\t\telse // else round down to the nearest\n"
448
"\t\t\t\t\to_val <= truncated_value; // even value\n"
449
"\t\t\tend\n"
450
"\n"
451
"\tend else // If there's more than one bit we are dropping\n"
452
"\tbegin\n"
453
"\t\twire\t[(OWID-1):0] truncated_value, rounded_up;\n"
454
"\t\twire\t\t\tlast_valid_bit, first_lost_bit;\n"
455
"\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
456 26 dgisselq
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n"
457 23 dgisselq
"\t\tassign\tlast_valid_bit = truncated_value[0];\n"
458
"\t\tassign\tfirst_lost_bit = i_val[(IWID-SHIFT-OWID-1)];\n"
459
"\n"
460
"\t\twire\t[(IWID-SHIFT-OWID-2):0]\tother_lost_bits;\n"
461
"\t\tassign\tother_lost_bits = i_val[(IWID-SHIFT-OWID-2):0];\n"
462
"\n"
463
"\t\talways @(posedge i_clk)\n"
464
"\t\t\tif (i_ce)\n"
465
"\t\t\tbegin\n"
466
"\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"
467
"\t\t\t\t\to_val <= truncated_value;\n"
468
"\t\t\t\telse if (|other_lost_bits) // Round up to\n"
469
"\t\t\t\t\to_val <= rounded_up; // closest value\n"
470
"\t\t\t\telse if (last_valid_bit) // Round up to\n"
471
"\t\t\t\t\to_val <= rounded_up; // nearest even\n"
472
"\t\t\t\telse   // else round down to nearest even\n"
473
"\t\t\t\t\to_val <= truncated_value;\n"
474
"\t\t\tend\n"
475
"\tend\n"
476
"\tendgenerate\n"
477
"\n"
478
"endmodule\n");
479
}
480
 
481 26 dgisselq
void    build_quarters(const char *fname, ROUND_T rounding, bool dbg=false) {
482 23 dgisselq
        FILE    *fp = fopen(fname, "w");
483
        if (NULL == fp) {
484
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
485
                perror("O/S Err was:");
486
                return;
487
        }
488
        const   char    *rnd_string;
489
        if (rounding == RND_TRUNCATE)
490
                rnd_string = "truncate";
491
        else if (rounding == RND_FROMZERO)
492
                rnd_string = "roundfromzero";
493
        else if (rounding == RND_HALFUP)
494
                rnd_string = "roundhalfup";
495
        else
496
                rnd_string = "convround";
497
 
498
 
499
        fprintf(fp,
500
"///////////////////////////////////////////////////////////////////////////\n"
501
"//\n"
502 26 dgisselq
"// Filename:   qtrstage%s.v\n"
503 2 dgisselq
"//             \n"
504
"// Project:    %s\n"
505
"//\n"
506 5 dgisselq
"// Purpose:    This file encapsulates the 4 point stage of a decimation in\n"
507
"//             frequency FFT.  This particular implementation is optimized\n"
508
"//             so that all of the multiplies are accomplished by additions\n"
509
"//             and multiplexers only.\n"
510
"//\n"
511 2 dgisselq
"//\n%s"
512
"//\n",
513 26 dgisselq
                (dbg)?"_dbg":"", prjname, creator);
514 2 dgisselq
        fprintf(fp, "%s", cpyleft);
515
 
516
        fprintf(fp,
517 26 dgisselq
"module\tqtrstage%s(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync%s);\n"
518 5 dgisselq
        "\tparameter    IWIDTH=16, OWIDTH=IWIDTH+1;\n"
519
        "\t// Parameters specific to the core that should be changed when this\n"
520
        "\t// core is built ... Note that the minimum LGSPAN is 2.  Smaller \n"
521
        "\t// spans must use the fftdoubles stage.\n"
522 23 dgisselq
        "\tparameter\tLGWIDTH=8, ODD=0, INVERSE=0,SHIFT=0;\n"
523 5 dgisselq
        "\tinput\t                              i_clk, i_rst, i_ce, i_sync;\n"
524
        "\tinput\t      [(2*IWIDTH-1):0]        i_data;\n"
525
        "\toutput\treg  [(2*OWIDTH-1):0]        o_data;\n"
526
        "\toutput\treg                          o_sync;\n"
527 26 dgisselq
        "\t\n", (dbg)?"_dbg":"", (dbg)?", o_dbg":"");
528
        if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"
529
                "\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2*OWIDTH-1):(2*OWIDTH-16)],\n"
530
                        "\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n"
531
"\n");
532
        }
533 14 dgisselq
        fprintf(fp,
534 5 dgisselq
        "\treg\t        wait_for_sync;\n"
535 23 dgisselq
        "\treg\t[3:0]   pipeline;\n"
536 2 dgisselq
"\n"
537 5 dgisselq
        "\treg\t[(IWIDTH):0]    sum_r, sum_i, diff_r, diff_i;\n"
538 2 dgisselq
"\n"
539 23 dgisselq
        "\treg\t[(2*OWIDTH-1):0]\tob_a;\n"
540
        "\twire\t[(2*OWIDTH-1):0]\tob_b;\n"
541
        "\treg\t[(OWIDTH-1):0]\t\tob_b_r, ob_b_i;\n"
542
        "\tassign\tob_b = { ob_b_r, ob_b_i };\n"
543 2 dgisselq
"\n"
544 23 dgisselq
        "\treg\t[(LGWIDTH-1):0]\t\tiaddr;\n"
545
        "\treg\t[(2*IWIDTH-1):0]\timem;\n"
546 2 dgisselq
"\n"
547 5 dgisselq
        "\twire\tsigned\t[(IWIDTH-1):0]\timem_r, imem_i;\n"
548
        "\tassign\timem_r = imem[(2*IWIDTH-1):(IWIDTH)];\n"
549
        "\tassign\timem_i = imem[(IWIDTH-1):0];\n"
550 2 dgisselq
"\n"
551 5 dgisselq
        "\twire\tsigned\t[(IWIDTH-1):0]\ti_data_r, i_data_i;\n"
552
        "\tassign\ti_data_r = i_data[(2*IWIDTH-1):(IWIDTH)];\n"
553
        "\tassign\ti_data_i = i_data[(IWIDTH-1):0];\n"
554 2 dgisselq
"\n"
555 5 dgisselq
        "\treg  [(2*OWIDTH-1):0]        omem;\n"
556 14 dgisselq
"\n");
557
        fprintf(fp,
558 23 dgisselq
        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_sum_r, rnd_sum_i, rnd_diff_r, rnd_diff_i,\n");
559
        fprintf(fp,
560
        "\t\t\t\t\tn_rnd_diff_r, n_rnd_diff_i;\n");
561
        fprintf(fp,
562 26 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_r(i_clk, i_ce,\n"
563 23 dgisselq
        "\t\t\t\tsum_r, rnd_sum_r);\n\n", rnd_string);
564
        fprintf(fp,
565 26 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_i(i_clk, i_ce,\n"
566 23 dgisselq
        "\t\t\t\tsum_i, rnd_sum_i);\n\n", rnd_string);
567
        fprintf(fp,
568 26 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_r(i_clk, i_ce,\n"
569 23 dgisselq
        "\t\t\t\tdiff_r, rnd_diff_r);\n\n", rnd_string);
570
        fprintf(fp,
571 26 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_i(i_clk, i_ce,\n"
572 23 dgisselq
        "\t\t\t\tdiff_i, rnd_diff_i);\n\n", rnd_string);
573
        fprintf(fp, "\tassign n_rnd_diff_r = - rnd_diff_r;\n"
574
                "\tassign n_rnd_diff_i = - rnd_diff_i;\n");
575
/*
576
        fprintf(fp,
577 5 dgisselq
        "\twire [(IWIDTH-1):0]  rnd;\n"
578 9 dgisselq
        "\tgenerate\n"
579
        "\tif ((ROUND)&&((IWIDTH+1-OWIDTH-SHIFT)>0))\n"
580 26 dgisselq
                "\t\tassign rnd = { {(IWIDTH-1){1\'b0}}, 1\'b1 };\n"
581 9 dgisselq
        "\telse\n"
582 26 dgisselq
                "\t\tassign rnd = { {(IWIDTH){1\'b0}}};\n"
583 9 dgisselq
        "\tendgenerate\n"
584 2 dgisselq
"\n"
585 23 dgisselq
*/
586
        fprintf(fp,
587 25 dgisselq
        "\tinitial wait_for_sync = 1\'b1;\n"
588
        "\tinitial iaddr = 0;\n"
589 5 dgisselq
        "\talways @(posedge i_clk)\n"
590
                "\t\tif (i_rst)\n"
591
                "\t\tbegin\n"
592 26 dgisselq
                        "\t\t\twait_for_sync <= 1\'b1;\n"
593 5 dgisselq
                        "\t\t\tiaddr <= 0;\n"
594 23 dgisselq
                "\t\tend else if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"
595 5 dgisselq
                "\t\tbegin\n"
596 26 dgisselq
                        "\t\t\tiaddr <= iaddr + { {(LGWIDTH-1){1\'b0}}, 1\'b1 };\n"
597
                        "\t\t\twait_for_sync <= 1\'b0;\n"
598
                "\t\tend\n"
599
        "\talways @(posedge i_clk)\n"
600
                "\t\tif (i_ce)\n"
601 5 dgisselq
                        "\t\t\timem <= i_data;\n"
602 26 dgisselq
                "\n\n");
603 23 dgisselq
        fprintf(fp,
604
        "\t// Note that we don\'t check on wait_for_sync or i_sync here.\n"
605
        "\t// Why not?  Because iaddr will always be zero until after the\n"
606
        "\t// first i_ce, so we are safe.\n"
607 25 dgisselq
        "\tinitial pipeline = 4\'h0;\n"
608 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
609
                "\t\tif (i_rst)\n"
610 26 dgisselq
                        "\t\t\tpipeline <= 4\'h0;\n"
611 23 dgisselq
                "\t\telse if (i_ce) // is our pipeline process full?  Which stages?\n"
612
                        "\t\t\tpipeline <= { pipeline[2:0], iaddr[0] };\n\n");
613
        fprintf(fp,
614
        "\t// This is the pipeline[-1] stage, pipeline[0] will be set next.\n"
615
        "\talways\t@(posedge i_clk)\n"
616
                "\t\tif ((i_ce)&&(iaddr[0]))\n"
617
                "\t\tbegin\n"
618
                        "\t\t\tsum_r  <= imem_r + i_data_r;\n"
619
                        "\t\t\tsum_i  <= imem_i + i_data_i;\n"
620
                        "\t\t\tdiff_r <= imem_r - i_data_r;\n"
621
                        "\t\t\tdiff_i <= imem_i - i_data_i;\n"
622
                "\t\tend\n\n");
623
        fprintf(fp,
624
        "\t// pipeline[1] takes sum_x and diff_x and produces rnd_x\n\n");
625
        fprintf(fp,
626 26 dgisselq
        "\t// Now for pipeline[2].  We can actually do this at all i_ce\n"
627
        "\t// clock times, since nothing will listen unless pipeline[3]\n"
628
        "\t// on the next clock.  Thus, we simplify this logic and do\n"
629
        "\t// it independent of pipeline[2].\n"
630 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
631 26 dgisselq
                "\t\tif (i_ce)\n"
632 23 dgisselq
                "\t\tbegin\n"
633
                        "\t\t\tob_a <= { rnd_sum_r, rnd_sum_i };\n"
634
                        "\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"
635
                        "\t\t\tif (ODD == 0)\n"
636 5 dgisselq
                        "\t\t\tbegin\n"
637 23 dgisselq
                        "\t\t\t\tob_b_r <= rnd_diff_r;\n"
638
                        "\t\t\t\tob_b_i <= rnd_diff_i;\n"
639
                        "\t\t\tend else if (INVERSE==0) begin\n"
640
                        "\t\t\t\t// on Odd, W = e^{-j2pi 1/4} = -j\n"
641
                        "\t\t\t\tob_b_r <=   rnd_diff_i;\n"
642
                        "\t\t\t\tob_b_i <= n_rnd_diff_r;\n"
643
                        "\t\t\tend else begin\n"
644
                        "\t\t\t\t// on Odd, W = e^{j2pi 1/4} = j\n"
645
                        "\t\t\t\tob_b_r <= n_rnd_diff_i;\n"
646
                        "\t\t\t\tob_b_i <=   rnd_diff_r;\n"
647 5 dgisselq
                        "\t\t\tend\n"
648 23 dgisselq
                "\t\tend\n\n");
649
        fprintf(fp,
650
        "\talways\t@(posedge i_clk)\n"
651
                "\t\tif (i_ce)\n"
652
                "\t\tbegin // In sequence, clock = 3\n"
653
                        "\t\t\tif (pipeline[3])\n"
654 5 dgisselq
                        "\t\t\tbegin\n"
655
                                "\t\t\t\tomem <= ob_b;\n"
656
                                "\t\t\t\to_data <= ob_a;\n"
657
                        "\t\t\tend else\n"
658
                                "\t\t\t\to_data <= omem;\n"
659 23 dgisselq
                "\t\tend\n\n");
660
 
661
        fprintf(fp,
662
        "\t// Don\'t forget in the sync check that we are running\n"
663
        "\t// at two clocks per sample.  Thus we need to\n"
664
        "\t// produce a sync every 2^(LGWIDTH-1) clocks.\n"
665 26 dgisselq
        "\tinitial\to_sync = 1\'b0;\n"
666 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
667 26 dgisselq
                "\t\tif (i_rst)\n"
668
                "\t\t\to_sync <= 1\'b0;\n"
669
                "\t\telse if (i_ce)\n"
670 23 dgisselq
                        "\t\t\to_sync <= &(~iaddr[(LGWIDTH-2):3]) && (iaddr[2:0] == 3'b101);\n");
671
        fprintf(fp, "endmodule\n");
672 2 dgisselq
}
673
 
674 26 dgisselq
void    build_dblstage(const char *fname, ROUND_T rounding, const bool dbg = false) {
675 2 dgisselq
        FILE    *fp = fopen(fname, "w");
676
        if (NULL == fp) {
677
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
678
                perror("O/S Err was:");
679
                return;
680
        }
681
 
682 23 dgisselq
        const   char    *rnd_string;
683
        if (rounding == RND_TRUNCATE)
684
                rnd_string = "truncate";
685
        else if (rounding == RND_FROMZERO)
686
                rnd_string = "roundfromzero";
687
        else if (rounding == RND_HALFUP)
688
                rnd_string = "roundhalfup";
689
        else
690
                rnd_string = "convround";
691
 
692
 
693 2 dgisselq
        fprintf(fp,
694
"///////////////////////////////////////////////////////////////////////////\n"
695
"//\n"
696 26 dgisselq
"// Filename:   dblstage%s.v\n"
697 2 dgisselq
"//\n"
698
"// Project:    %s\n"
699
"//\n"
700
"// Purpose:    This is part of an FPGA implementation that will process\n"
701 5 dgisselq
"//             the final stage of a decimate-in-frequency FFT, running\n"
702
"//             through the data at two samples per clock.  If you notice\n"
703
"//             from the derivation of an FFT, the only time both even and\n"
704
"//             odd samples are used at the same time is in this stage.\n"
705
"//             Therefore, other than this stage and these twiddles, all of\n"
706
"//             the other stages can run two stages at a time at one sample\n"
707
"//             per clock.\n"
708 2 dgisselq
"//\n"
709
"//             In this implementation, the output is valid one clock after\n"
710
"//             the input is valid.  The output also accumulates one bit\n"
711
"//             above and beyond the number of bits in the input.\n"
712
"//             \n"
713
"//             i_clk   A system clock\n"
714 6 dgisselq
"//             i_rst   A synchronous reset\n"
715 2 dgisselq
"//             i_ce    Circuit enable--nothing happens unless this line is high\n"
716 6 dgisselq
"//             i_sync  A synchronization signal, high once per FFT at the start\n"
717 2 dgisselq
"//             i_left  The first (even) complex sample input.  The higher order\n"
718
"//                     bits contain the real portion, low order bits the\n"
719
"//                     imaginary portion, all in two\'s complement.\n"
720
"//             i_right The next (odd) complex sample input, same format as\n"
721
"//                     i_left.\n"
722
"//             o_left  The first (even) complex output.\n"
723
"//             o_right The next (odd) complex output.\n"
724 6 dgisselq
"//             o_sync  Output synchronization signal.\n"
725 2 dgisselq
"//\n%s"
726 26 dgisselq
"//\n", (dbg)?"_dbg":"", prjname, creator);
727 2 dgisselq
 
728
        fprintf(fp, "%s", cpyleft);
729
        fprintf(fp,
730 26 dgisselq
"module\tdblstage%s(i_clk, i_rst, i_ce, i_sync, i_left, i_right, o_left, o_right, o_sync%s);\n"
731 23 dgisselq
        "\tparameter\tIWIDTH=16,OWIDTH=IWIDTH+1, SHIFT=0;\n"
732 6 dgisselq
        "\tinput\t\ti_clk, i_rst, i_ce, i_sync;\n"
733 5 dgisselq
        "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"
734 28 dgisselq
        "\toutput\treg\t[(2*OWIDTH-1):0]\to_left, o_right;\n"
735 6 dgisselq
        "\toutput\treg\t\t\to_sync;\n"
736 26 dgisselq
        "\n", (dbg)?"_dbg":"", (dbg)?", o_dbg":"");
737
 
738
        if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"
739
                "\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_left[(2*OWIDTH-1):(2*OWIDTH-16)],\n"
740
                        "\t\t\t\t\to_left[(OWIDTH-1):(OWIDTH-16)] };\n"
741
"\n");
742
        }
743 19 dgisselq
        fprintf(fp,
744 5 dgisselq
        "\twire\tsigned\t[(IWIDTH-1):0]\ti_in_0r, i_in_0i, i_in_1r, i_in_1i;\n"
745
        "\tassign\ti_in_0r = i_left[(2*IWIDTH-1):(IWIDTH)]; \n"
746
        "\tassign\ti_in_0i = i_left[(IWIDTH-1):0]; \n"
747
        "\tassign\ti_in_1r = i_right[(2*IWIDTH-1):(IWIDTH)]; \n"
748
        "\tassign\ti_in_1i = i_right[(IWIDTH-1):0]; \n"
749
        "\twire\t[(OWIDTH-1):0]\t\to_out_0r, o_out_0i,\n"
750
                                "\t\t\t\t\to_out_1r, o_out_1i;\n"
751 2 dgisselq
"\n"
752 15 dgisselq
"\n"
753 19 dgisselq
        "\t// Handle a potential rounding situation, when IWIDTH>=OWIDTH.\n"
754 15 dgisselq
"\n"
755 23 dgisselq
"\n");
756
        fprintf(fp,
757 26 dgisselq
        "\n"
758
        "\t// As with any register connected to the sync pulse, these must\n"
759
        "\t// have initial values and be reset on the i_rst signal.\n"
760
        "\t// Other data values need only restrict their updates to i_ce\n"
761
        "\t// enabled clocks, but sync\'s must obey resets and initial\n"
762
        "\t// conditions as well.\n"
763 28 dgisselq
        "\treg\trnd_sync, r_sync;\n"
764 2 dgisselq
"\n"
765 28 dgisselq
        "\tinitial\trnd_sync      = 1\'b0; // Sync into rounding\n"
766
        "\tinitial\tr_sync        = 1\'b0; // Sync coming out\n"
767 5 dgisselq
        "\talways @(posedge i_clk)\n"
768 6 dgisselq
                "\t\tif (i_rst)\n"
769 23 dgisselq
                "\t\tbegin\n"
770 26 dgisselq
                        "\t\t\trnd_sync <= 1\'b0;\n"
771 28 dgisselq
                        "\t\t\tr_sync <= 1\'b0;\n"
772
                "\t\tend else if (i_ce)\n"
773 5 dgisselq
                "\t\tbegin\n"
774 26 dgisselq
                        "\t\t\trnd_sync <= i_sync;\n"
775 28 dgisselq
                        "\t\t\tr_sync <= rnd_sync;\n"
776 26 dgisselq
                "\t\tend\n"
777
"\n"
778
        "\t// As with other variables, these are really only updated when in\n"
779
        "\t// the processing pipeline, after the first i_sync.  However, to\n"
780
        "\t// eliminate as much unnecessary logic as possible, we toggle\n"
781 28 dgisselq
        "\t// these any time the i_ce line is enabled, and don\'t reset.\n"
782
        "\t// them on i_rst.\n");
783
        fprintf(fp,
784
        "\t// Don't forget that we accumulate a bit by adding two values\n"
785
        "\t// together. Therefore our intermediate value must have one more\n"
786
        "\t// bit than the two originals.\n"
787
        "\treg\tsigned\t[(IWIDTH):0]\trnd_in_0r, rnd_in_0i;\n"
788
        "\treg\tsigned\t[(IWIDTH):0]\trnd_in_1r, rnd_in_1i;\n\n"
789 26 dgisselq
        "\talways @(posedge i_clk)\n"
790
                "\t\tif (i_ce)\n"
791
                "\t\tbegin\n"
792
                        "\t\t\t//\n"
793 23 dgisselq
                        "\t\t\trnd_in_0r <= i_in_0r + i_in_1r;\n"
794
                        "\t\t\trnd_in_0i <= i_in_0i + i_in_1i;\n"
795 5 dgisselq
                        "\t\t\t//\n"
796 23 dgisselq
                        "\t\t\trnd_in_1r <= i_in_0r - i_in_1r;\n"
797
                        "\t\t\trnd_in_1i <= i_in_0i - i_in_1i;\n"
798 6 dgisselq
                        "\t\t\t//\n"
799 5 dgisselq
                "\t\tend\n"
800 28 dgisselq
"\n");
801
        fprintf(fp,
802
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_0r(i_clk, i_ce,\n"
803
        "\t\t\t\t\t\t\trnd_in_0r, o_out_0r);\n\n", rnd_string);
804
        fprintf(fp,
805
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_0i(i_clk, i_ce,\n"
806
        "\t\t\t\t\t\t\trnd_in_0i, o_out_0i);\n\n", rnd_string);
807
        fprintf(fp,
808
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_1r(i_clk, i_ce,\n"
809
        "\t\t\t\t\t\t\trnd_in_1r, o_out_1r);\n\n", rnd_string);
810
        fprintf(fp,
811
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_1i(i_clk, i_ce,\n"
812
        "\t\t\t\t\t\t\trnd_in_1i, o_out_1i);\n\n", rnd_string);
813
 
814
        fprintf(fp, "\n"
815
        "\t// Prior versions of this routine did not include the extra\n"
816
        "\t// clock and register/flip-flops that this routine requires.\n"
817
        "\t// These are placed in here to correct a bug in Verilator, that\n"
818
        "\t// otherwise struggles.  (Hopefully this will fix the problem ...)\n"
819
        "\talways @(posedge i_clk)\n"
820
                "\t\tif (i_ce)\n"
821
                "\t\tbegin\n"
822
                        "\t\t\to_left  <= { o_out_0r, o_out_0i };\n"
823
                        "\t\t\to_right <= { o_out_1r, o_out_1i };\n"
824
                "\t\tend\n"
825 2 dgisselq
"\n"
826 28 dgisselq
        "\tinitial\to_sync = 1'b0; // Final sync coming out of module\n"
827
        "\talways @(posedge i_clk)\n"
828
                "\t\tif (i_rst)\n"
829
                "\t\t\to_sync <= 1'b0;\n"
830
                "\t\telse if (i_ce)\n"
831
                "\t\t\to_sync <= r_sync;\n"
832 2 dgisselq
"\n"
833
"endmodule\n");
834
        fclose(fp);
835
}
836
 
837
void    build_multiply(const char *fname) {
838
        FILE    *fp = fopen(fname, "w");
839
        if (NULL == fp) {
840
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
841
                perror("O/S Err was:");
842
                return;
843
        }
844
 
845
        fprintf(fp,
846
"///////////////////////////////////////////////////////////////////////////\n"
847
"//\n"
848
"// Filename:   shiftaddmpy.v\n"
849
"//\n"
850
"// Project:    %s\n"
851
"//\n"
852
"// Purpose:    A portable shift and add multiply.\n"
853
"//\n"
854
"//             While both Xilinx and Altera will offer single clock \n"
855
"//             multiplies, this simple approach will multiply two numbers\n"
856
"//             on any architecture.  The result maintains the full width\n"
857
"//             of the multiply, there are no extra stuff bits, no rounding,\n"
858
"//             no shifted bits, etc.\n"
859
"//\n"
860
"//             Further, for those applications that can support it, this\n"
861
"//             multiply is pipelined and will produce one answer per clock.\n"
862
"//\n"
863
"//             For minimal processing delay, make the first parameter\n"
864
"//             the one with the least bits, so that AWIDTH <= BWIDTH.\n"
865
"//\n"
866
"//             The processing delay in this multiply is (AWIDTH+1) cycles.\n"
867
"//             That is, if the data is present on the input at clock t=0,\n"
868
"//             the result will be present on the output at time t=AWIDTH+1;\n"
869
"//\n"
870
"//\n%s"
871
"//\n", prjname, creator);
872
 
873
        fprintf(fp, "%s", cpyleft);
874
        fprintf(fp,
875
"module shiftaddmpy(i_clk, i_ce, i_a, i_b, o_r);\n"
876
        "\tparameter\tAWIDTH=16,BWIDTH=AWIDTH;\n"
877
        "\tinput\t\t\t\t\ti_clk, i_ce;\n"
878
        "\tinput\t\t[(AWIDTH-1):0]\t\ti_a;\n"
879
        "\tinput\t\t[(BWIDTH-1):0]\t\ti_b;\n"
880
        "\toutput\treg\t[(AWIDTH+BWIDTH-1):0]\to_r;\n"
881
"\n"
882
        "\treg\t[(AWIDTH-1):0]\tu_a;\n"
883
        "\treg\t[(BWIDTH-1):0]\tu_b;\n"
884
        "\treg\t\t\tsgn;\n"
885
"\n"
886
        "\treg\t[(AWIDTH-2):0]\t\tr_a[0:(AWIDTH-1)];\n"
887
        "\treg\t[(AWIDTH+BWIDTH-2):0]\tr_b[0:(AWIDTH-1)];\n"
888
        "\treg\t\t\t\tr_s[0:(AWIDTH-1)];\n"
889
        "\treg\t[(AWIDTH+BWIDTH-1):0]\tacc[0:(AWIDTH-1)];\n"
890
        "\tgenvar k;\n"
891
"\n"
892 5 dgisselq
        "\t// If we were forced to stay within two\'s complement arithmetic,\n"
893
        "\t// taking the absolute value here would require an additional bit.\n"
894
        "\t// However, because our results are now unsigned, we can stay\n"
895
        "\t// within the number of bits given (for now).\n"
896 2 dgisselq
        "\talways @(posedge i_clk)\n"
897
                "\t\tif (i_ce)\n"
898
                "\t\tbegin\n"
899
                        "\t\t\tu_a <= (i_a[AWIDTH-1])?(-i_a):(i_a);\n"
900
                        "\t\t\tu_b <= (i_b[BWIDTH-1])?(-i_b):(i_b);\n"
901
                        "\t\t\tsgn <= i_a[AWIDTH-1] ^ i_b[BWIDTH-1];\n"
902
                "\t\tend\n"
903
"\n"
904
        "\talways @(posedge i_clk)\n"
905
                "\t\tif (i_ce)\n"
906
                "\t\tbegin\n"
907 26 dgisselq
                        "\t\t\tacc[0] <= (u_a[0]) ? { {(AWIDTH){1\'b0}}, u_b }\n"
908
                        "\t\t\t\t\t: {(AWIDTH+BWIDTH){1\'b0}};\n"
909 2 dgisselq
                        "\t\t\tr_a[0] <= { u_a[(AWIDTH-1):1] };\n"
910 26 dgisselq
                        "\t\t\tr_b[0] <= { {(AWIDTH-1){1\'b0}}, u_b };\n"
911 2 dgisselq
                        "\t\t\tr_s[0] <= sgn; // The final sign, needs to be preserved\n"
912
                "\t\tend\n"
913
"\n"
914
        "\tgenerate\n"
915 21 dgisselq
        "\tfor(k=0; k<AWIDTH-1; k=k+1)\n"
916 25 dgisselq
        "\tbegin : genstages\n"
917 21 dgisselq
                "\t\talways @(posedge i_clk)\n"
918
                "\t\tif (i_ce)\n"
919 2 dgisselq
                "\t\tbegin\n"
920 26 dgisselq
                        "\t\t\tacc[k+1] <= acc[k] + ((r_a[k][0]) ? {r_b[k],1\'b0}:0);\n"
921
                        "\t\t\tr_a[k+1] <= { 1\'b0, r_a[k][(AWIDTH-2):1] };\n"
922
                        "\t\t\tr_b[k+1] <= { r_b[k][(AWIDTH+BWIDTH-3):0], 1\'b0};\n"
923 2 dgisselq
                        "\t\t\tr_s[k+1] <= r_s[k];\n"
924
                "\t\tend\n"
925
        "\tend\n"
926
        "\tendgenerate\n"
927
"\n"
928
        "\talways @(posedge i_clk)\n"
929
                "\t\tif (i_ce)\n"
930
                        "\t\t\to_r <= (r_s[AWIDTH-1]) ? (-acc[AWIDTH-1]) : acc[AWIDTH-1];\n"
931
"\n"
932
"endmodule\n");
933
 
934
        fclose(fp);
935
}
936
 
937
void    build_dblreverse(const char *fname) {
938
        FILE    *fp = fopen(fname, "w");
939
        if (NULL == fp) {
940
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
941
                perror("O/S Err was:");
942
                return;
943
        }
944
 
945
        fprintf(fp,
946
"///////////////////////////////////////////////////////////////////////////\n"
947
"//\n"
948
"// Filename:   dblreverse.v\n"
949
"//\n"
950
"// Project:    %s\n"
951
"//\n"
952
"// Purpose:    This module bitreverses a pipelined FFT input.  Operation is\n"
953
"//             expected as follows:\n"
954
"//\n"
955
"//             i_clk   A running clock at whatever system speed is offered.\n"
956
"//             i_rst   A synchronous reset signal, that resets all internals\n"
957
"//             i_ce    If this is one, one input is consumed and an output\n"
958
"//                     is produced.\n"
959
"//             i_in_0, i_in_1\n"
960
"//                     Two inputs to be consumed, each of width WIDTH.\n"
961
"//             o_out_0, o_out_1\n"
962
"//                     Two of the bitreversed outputs, also of the same\n"
963
"//                     width, WIDTH.  Of course, there is a delay from the\n"
964
"//                     first input to the first output.  For this purpose,\n"
965
"//                     o_sync is present.\n"
966 26 dgisselq
"//             o_sync  This will be a 1\'b1 for the first value in any block.\n"
967
"//                     Following a reset, this will only become 1\'b1 once\n"
968 2 dgisselq
"//                     the data has been loaded and is now valid.  After that,\n"
969
"//                     all outputs will be valid.\n"
970 26 dgisselq
"//\n"
971
"//     20150602 -- This module has undergone massive rework in order to\n"
972
"//             ensure that it uses resources efficiently.  As a result, \n"
973
"//             it now optimizes nicely into block RAMs.  As an unfortunately\n"
974
"//             side effect, it now passes it\'s bench test (dblrev_tb) but\n"
975
"//             fails the integration bench test (fft_tb).\n"
976
"//\n"
977 2 dgisselq
"//\n%s"
978
"//\n", prjname, creator);
979
        fprintf(fp, "%s", cpyleft);
980
        fprintf(fp,
981
"\n\n"
982
"//\n"
983
"// How do we do bit reversing at two smples per clock?  Can we separate out\n"
984
"// our work into eight memory banks, writing two banks at once and reading\n"
985
"// another two banks in the same clock?\n"
986
"//\n"
987
"//     mem[00xxx0] = s_0[n]\n"
988
"//     mem[00xxx1] = s_1[n]\n"
989
"//     o_0[n] = mem[10xxx0]\n"
990
"//     o_1[n] = mem[11xxx0]\n"
991
"//     ...\n"
992
"//     mem[01xxx0] = s_0[m]\n"
993
"//     mem[01xxx1] = s_1[m]\n"
994
"//     o_0[m] = mem[10xxx1]\n"
995
"//     o_1[m] = mem[11xxx1]\n"
996
"//     ...\n"
997
"//     mem[10xxx0] = s_0[n]\n"
998
"//     mem[10xxx1] = s_1[n]\n"
999
"//     o_0[n] = mem[00xxx0]\n"
1000
"//     o_1[n] = mem[01xxx0]\n"
1001
"//     ...\n"
1002
"//     mem[11xxx0] = s_0[m]\n"
1003
"//     mem[11xxx1] = s_1[m]\n"
1004
"//     o_0[m] = mem[00xxx1]\n"
1005
"//     o_1[m] = mem[01xxx1]\n"
1006
"//     ...\n"
1007
"//\n"
1008 5 dgisselq
"//     The answer is that, yes we can but: we need to use four memory banks\n"
1009
"//     to do it properly.  These four banks are defined by the two bits\n"
1010
"//     that determine the top and bottom of the correct address.  Larger\n"
1011
"//     FFT\'s would require more memories.\n"
1012
"//\n"
1013 2 dgisselq
"//\n");
1014
        fprintf(fp,
1015
"module dblreverse(i_clk, i_rst, i_ce, i_in_0, i_in_1,\n"
1016 5 dgisselq
        "\t\to_out_0, o_out_1, o_sync);\n"
1017 26 dgisselq
        "\tparameter\t\t\tLGSIZE=5, WIDTH=24;\n"
1018 5 dgisselq
        "\tinput\t\t\t\ti_clk, i_rst, i_ce;\n"
1019
        "\tinput\t\t[(2*WIDTH-1):0]\ti_in_0, i_in_1;\n"
1020 26 dgisselq
        "\toutput\twire\t[(2*WIDTH-1):0]\to_out_0, o_out_1;\n"
1021 5 dgisselq
        "\toutput\treg\t\t\to_sync;\n"
1022 2 dgisselq
"\n"
1023 26 dgisselq
        "\treg\t\t\tin_reset;\n"
1024
        "\treg\t[(LGSIZE-1):0]\tiaddr;\n"
1025
        "\twire\t[(LGSIZE-3):0]\tbraddr;\n"
1026 2 dgisselq
"\n"
1027 5 dgisselq
        "\tgenvar\tk;\n"
1028 26 dgisselq
        "\tgenerate for(k=0; k<LGSIZE-2; k=k+1)\n"
1029 25 dgisselq
        "\tbegin : gen_a_bit_reversed_value\n"
1030 26 dgisselq
                "\t\tassign braddr[k] = iaddr[LGSIZE-3-k];\n"
1031 25 dgisselq
        "\tend endgenerate\n"
1032 2 dgisselq
"\n"
1033 25 dgisselq
        "\tinitial iaddr = 0;\n"
1034
        "\tinitial in_reset = 1\'b1;\n"
1035 26 dgisselq
        "\tinitial o_sync = 1\'b0;\n"
1036 5 dgisselq
        "\talways @(posedge i_clk)\n"
1037
                "\t\tif (i_rst)\n"
1038
                "\t\tbegin\n"
1039
                        "\t\t\tiaddr <= 0;\n"
1040 26 dgisselq
                        "\t\t\tin_reset <= 1\'b1;\n"
1041
                        "\t\t\to_sync <= 1\'b0;\n"
1042 5 dgisselq
                "\t\tend else if (i_ce)\n"
1043
                "\t\tbegin\n"
1044 26 dgisselq
                        "\t\t\tiaddr <= iaddr + { {(LGSIZE-1){1\'b0}}, 1\'b1 };\n"
1045
                        "\t\t\tif (&iaddr[(LGSIZE-2):0])\n"
1046
                                "\t\t\t\tin_reset <= 1\'b0;\n"
1047 5 dgisselq
                        "\t\t\tif (in_reset)\n"
1048 26 dgisselq
                                "\t\t\t\to_sync <= 1\'b0;\n"
1049
                        "\t\t\telse\n"
1050
                                "\t\t\t\to_sync <= ~(|iaddr[(LGSIZE-2):0]);\n"
1051 5 dgisselq
                "\t\tend\n"
1052 2 dgisselq
"\n"
1053 26 dgisselq
        "\treg\t[(2*WIDTH-1):0]\tmem_e [0:((1<<(LGSIZE))-1)];\n"
1054
        "\treg\t[(2*WIDTH-1):0]\tmem_o [0:((1<<(LGSIZE))-1)];\n"
1055
"\n"
1056
        "\talways @(posedge i_clk)\n"
1057
                "\t\tif (i_ce)\tmem_e[iaddr] <= i_in_0;\n"
1058
        "\talways @(posedge i_clk)\n"
1059
                "\t\tif (i_ce)\tmem_o[iaddr] <= i_in_1;\n"
1060
"\n"
1061
"\n"
1062
        "\treg [(2*WIDTH-1):0] evn_out_0, evn_out_1, odd_out_0, odd_out_1;\n"
1063
"\n"
1064
        "\talways @(posedge i_clk)\n"
1065
                "\t\tif (i_ce)\n\t\t\tevn_out_0 <= mem_e[{~iaddr[LGSIZE-1],1\'b0,braddr}];\n"
1066
        "\talways @(posedge i_clk)\n"
1067
                "\t\tif (i_ce)\n\t\t\tevn_out_1 <= mem_e[{~iaddr[LGSIZE-1],1\'b1,braddr}];\n"
1068
        "\talways @(posedge i_clk)\n"
1069
                "\t\tif (i_ce)\n\t\t\todd_out_0 <= mem_o[{~iaddr[LGSIZE-1],1\'b0,braddr}];\n"
1070
        "\talways @(posedge i_clk)\n"
1071
                "\t\tif (i_ce)\n\t\t\todd_out_1 <= mem_o[{~iaddr[LGSIZE-1],1\'b1,braddr}];\n"
1072
"\n"
1073
        "\treg\tadrz;\n"
1074
        "\talways @(posedge i_clk)\n"
1075 28 dgisselq
                "\t\tif (i_ce) adrz <= iaddr[LGSIZE-2];\n"
1076 26 dgisselq
"\n"
1077
        "\tassign\to_out_0 = (adrz)?odd_out_0:evn_out_0;\n"
1078
        "\tassign\to_out_1 = (adrz)?odd_out_1:evn_out_1;\n"
1079
"\n"
1080 21 dgisselq
"endmodule\n");
1081 2 dgisselq
 
1082
        fclose(fp);
1083
}
1084
 
1085 23 dgisselq
void    build_butterfly(const char *fname, int xtracbits, ROUND_T rounding) {
1086 2 dgisselq
        FILE    *fp = fopen(fname, "w");
1087
        if (NULL == fp) {
1088
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
1089
                perror("O/S Err was:");
1090
                return;
1091
        }
1092 23 dgisselq
        const   char    *rnd_string;
1093
        if (rounding == RND_TRUNCATE)
1094
                rnd_string = "truncate";
1095
        else if (rounding == RND_FROMZERO)
1096
                rnd_string = "roundfromzero";
1097
        else if (rounding == RND_HALFUP)
1098
                rnd_string = "roundhalfup";
1099
        else
1100
                rnd_string = "convround";
1101 2 dgisselq
 
1102
        fprintf(fp,
1103
"///////////////////////////////////////////////////////////////////////////\n"
1104
"//\n"
1105
"// Filename:   butterfly.v\n"
1106
"//\n"
1107
"// Project:    %s\n"
1108
"//\n"
1109
"// Purpose:    This routine caculates a butterfly for a decimation\n"
1110
"//             in frequency version of an FFT.  Specifically, given\n"
1111
"//             complex Left and Right values together with a \n"
1112
"//             coefficient, the output of this routine is given\n"
1113
"//             by:\n"
1114
"//\n"
1115
"//             L' = L + R\n"
1116
"//             R' = (L - R)*C\n"
1117
"//\n"
1118
"//             The rest of the junk below handles timing (mostly),\n"
1119
"//             to make certain that L' and R' reach the output at\n"
1120
"//             the same clock.  Further, just to make certain\n"
1121
"//             that is the case, an 'aux' input exists.  This\n"
1122
"//             aux value will come out of this routine synchronized\n"
1123
"//             to the values it came in with.  (i.e., both L', R',\n"
1124
"//             and aux all have the same delay.)  Hence, a caller\n"
1125
"//             of this routine may set aux on the first input with\n"
1126
"//             valid data, and then wait to see aux set on the output\n"
1127
"//             to know when to find the first output with valid data.\n"
1128
"//\n"
1129
"//             All bits are preserved until the very last clock,\n"
1130
"//             where any more bits than OWIDTH will be quietly\n"
1131
"//             discarded.\n"
1132
"//\n"
1133
"//             This design features no overflow checking.\n"
1134
"// \n"
1135
"// Notes:\n"
1136
"//             CORDIC:\n"
1137
"//             Much as we would like, we can't use a cordic here.\n"
1138
"//             The goal is to accomplish an FFT, as defined, and a\n"
1139
"//             CORDIC places a scale factor onto the data.  Removing\n"
1140
"//             the scale factor would cost a two multiplies, which\n"
1141
"//             is precisely what we are trying to avoid.\n"
1142
"//\n"
1143
"//\n"
1144
"//             3-MULTIPLIES:\n"
1145
"//             It should also be possible to do this with three \n"
1146
"//             multiplies and an extra two addition cycles.  \n"
1147
"//\n"
1148
"//             We want\n"
1149
"//                     R+I = (a + jb) * (c + jd)\n"
1150
"//                     R+I = (ac-bd) + j(ad+bc)\n"
1151
"//             We multiply\n"
1152
"//                     P1 = ac\n"
1153
"//                     P2 = bd\n"
1154
"//                     P3 = (a+b)(c+d)\n"
1155
"//             Then \n"
1156
"//                     R+I=(P1-P2)+j(P3-P2-P1)\n"
1157
"//\n"
1158
"//             WIDTHS:\n"
1159
"//             On multiplying an X width number by an\n"
1160
"//             Y width number, X>Y, the result should be (X+Y)\n"
1161
"//             bits, right?\n"
1162
"//             -2^(X-1) <= a <= 2^(X-1) - 1\n"
1163
"//             -2^(Y-1) <= b <= 2^(Y-1) - 1\n"
1164
"//             (2^(Y-1)-1)*(-2^(X-1)) <= ab <= 2^(X-1)2^(Y-1)\n"
1165
"//             -2^(X+Y-2)+2^(X-1) <= ab <= 2^(X+Y-2) <= 2^(X+Y-1) - 1\n"
1166
"//             -2^(X+Y-1) <= ab <= 2^(X+Y-1)-1\n"
1167
"//             YUP!  But just barely.  Do this and you'll really want\n"
1168
"//             to drop a bit, although you will risk overflow in so\n"
1169
"//             doing.\n"
1170 26 dgisselq
"//\n"
1171
"//     20150602 -- The sync logic lines have been completely redone.  The\n"
1172
"//             synchronization lines no longer go through the FIFO with the\n"
1173
"//             left hand sum, but are kept out of memory.  This allows the\n"
1174
"//             butterfly to use more optimal memory resources, while also\n"
1175
"//             guaranteeing that the sync lines can be properly reset upon\n"
1176
"//             any reset signal.\n"
1177
"//\n"
1178 2 dgisselq
"//\n%s"
1179
"//\n", prjname, creator);
1180
        fprintf(fp, "%s", cpyleft);
1181
 
1182
        fprintf(fp,
1183 6 dgisselq
"module\tbutterfly(i_clk, i_rst, i_ce, i_coef, i_left, i_right, i_aux,\n"
1184 5 dgisselq
                "\t\to_left, o_right, o_aux);\n"
1185
        "\t// Public changeable parameters ...\n"
1186 14 dgisselq
        "\tparameter IWIDTH=%d,CWIDTH=IWIDTH+%d,OWIDTH=IWIDTH+1;\n"
1187 5 dgisselq
        "\t// Parameters specific to the core that should not be changed.\n"
1188 14 dgisselq
        "\tparameter    MPYDELAY=%d'd%d, // (IWIDTH+1 < CWIDTH)?(IWIDTH+4):(CWIDTH+3),\n"
1189 28 dgisselq
                        "\t\t\tSHIFT=0, AUXLEN=(MPYDELAY+3);\n"
1190 5 dgisselq
        "\t// The LGDELAY should be the base two log of the MPYDELAY.  If\n"
1191
        "\t// this value is fractional, then round up to the nearest\n"
1192
        "\t// integer: LGDELAY=ceil(log(MPYDELAY)/log(2));\n"
1193 14 dgisselq
        "\tparameter\tLGDELAY=%d;\n"
1194 6 dgisselq
        "\tinput\t\ti_clk, i_rst, i_ce;\n"
1195 5 dgisselq
        "\tinput\t\t[(2*CWIDTH-1):0] i_coef;\n"
1196
        "\tinput\t\t[(2*IWIDTH-1):0] i_left, i_right;\n"
1197
        "\tinput\t\ti_aux;\n"
1198
        "\toutput\twire [(2*OWIDTH-1):0] o_left, o_right;\n"
1199 26 dgisselq
        "\toutput\treg\to_aux;\n"
1200 14 dgisselq
        "\n", 16, xtracbits, lgdelay(16,xtracbits),
1201 28 dgisselq
        bflydelay(16, xtracbits), lgdelay(16,xtracbits));
1202 14 dgisselq
        fprintf(fp,
1203 5 dgisselq
        "\twire\t[(OWIDTH-1):0] o_left_r, o_left_i, o_right_r, o_right_i;\n"
1204 2 dgisselq
"\n"
1205 5 dgisselq
        "\treg\t[(2*IWIDTH-1):0]\tr_left, r_right;\n"
1206
        "\treg\t\t\t\tr_aux, r_aux_2;\n"
1207
        "\treg\t[(2*CWIDTH-1):0]\tr_coef, r_coef_2;\n"
1208
        "\twire\tsigned\t[(IWIDTH-1):0]\tr_left_r, r_left_i, r_right_r, r_right_i;\n"
1209
        "\tassign\tr_left_r  = r_left[ (2*IWIDTH-1):(IWIDTH)];\n"
1210
        "\tassign\tr_left_i  = r_left[ (IWIDTH-1):0];\n"
1211
        "\tassign\tr_right_r = r_right[(2*IWIDTH-1):(IWIDTH)];\n"
1212
        "\tassign\tr_right_i = r_right[(IWIDTH-1):0];\n"
1213 2 dgisselq
"\n"
1214 5 dgisselq
        "\treg\tsigned\t[(IWIDTH):0]\tr_sum_r, r_sum_i, r_dif_r, r_dif_i;\n"
1215 2 dgisselq
"\n"
1216 5 dgisselq
        "\treg  [(LGDELAY-1):0] fifo_addr;\n"
1217
        "\twire [(LGDELAY-1):0] fifo_read_addr;\n"
1218 6 dgisselq
        "\tassign\tfifo_read_addr = fifo_addr - MPYDELAY;\n"
1219 26 dgisselq
        "\treg  [(2*IWIDTH+1):0]        fifo_left [ 0:((1<<LGDELAY)-1)];\n"
1220 5 dgisselq
"\n");
1221
        fprintf(fp,
1222
        "\t// Set up the input to the multiply\n"
1223 2 dgisselq
        "\talways @(posedge i_clk)\n"
1224
                "\t\tif (i_ce)\n"
1225
                "\t\tbegin\n"
1226
                        "\t\t\t// One clock just latches the inputs\n"
1227
                        "\t\t\tr_left <= i_left;        // No change in # of bits\n"
1228
                        "\t\t\tr_right <= i_right;\n"
1229
                        "\t\t\tr_coef  <= i_coef;\n"
1230
                        "\t\t\t// Next clock adds/subtracts\n"
1231
                        "\t\t\tr_sum_r <= r_left_r + r_right_r; // Now IWIDTH+1 bits\n"
1232
                        "\t\t\tr_sum_i <= r_left_i + r_right_i;\n"
1233
                        "\t\t\tr_dif_r <= r_left_r - r_right_r;\n"
1234
                        "\t\t\tr_dif_i <= r_left_i - r_right_i;\n"
1235
                        "\t\t\t// Other inputs are simply delayed on second clock\n"
1236
                        "\t\t\tr_coef_2<= r_coef;\n"
1237
        "\t\tend\n"
1238 5 dgisselq
"\n");
1239
        fprintf(fp,
1240
        "\t// Don\'t forget to record the even side, since it doesn\'t need\n"
1241
        "\t// to be multiplied, but yet we still need the results in sync\n"
1242
        "\t// with the answer when it is ready.\n"
1243 25 dgisselq
        "\tinitial fifo_addr = 0;\n"
1244 2 dgisselq
        "\talways @(posedge i_clk)\n"
1245 6 dgisselq
                "\t\tif (i_rst)\n"
1246
                        "\t\t\tfifo_addr <= 0;\n"
1247 26 dgisselq
                "\t\telse if (i_ce)\n"
1248 2 dgisselq
                        "\t\t\t// Need to delay the sum side--nothing else happens\n"
1249
                        "\t\t\t// to it, but it needs to stay synchronized with the\n"
1250
                        "\t\t\t// right side.\n"
1251
                        "\t\t\tfifo_addr <= fifo_addr + 1;\n"
1252 14 dgisselq
"\n"
1253 26 dgisselq
        "\talways @(posedge i_clk)\n"
1254
                "\t\tif (i_ce)\n"
1255
                        "\t\t\tfifo_left[fifo_addr] <= { r_sum_r, r_sum_i };\n"
1256 2 dgisselq
"\n"
1257 5 dgisselq
        "\twire\tsigned\t[(CWIDTH-1):0] ir_coef_r, ir_coef_i;\n"
1258
        "\tassign\tir_coef_r = r_coef_2[(2*CWIDTH-1):CWIDTH];\n"
1259
        "\tassign\tir_coef_i = r_coef_2[(CWIDTH-1):0];\n"
1260
        "\twire\tsigned\t[((IWIDTH+2)+(CWIDTH+1)-1):0]\tp_one, p_two, p_three;\n"
1261 2 dgisselq
"\n"
1262 5 dgisselq
"\n");
1263
        fprintf(fp,
1264
        "\t// Multiply output is always a width of the sum of the widths of\n"
1265
        "\t// the two inputs.  ALWAYS.  This is independent of the number of\n"
1266
        "\t// bits in p_one, p_two, or p_three.  These values needed to \n"
1267
        "\t// accumulate a bit (or two) each.  However, this approach to a\n"
1268
        "\t// three multiply complex multiply cannot increase the total\n"
1269
        "\t// number of bits in our final output.  We\'ll take care of\n"
1270
        "\t// dropping back down to the proper width, OWIDTH, in our routine\n"
1271
        "\t// below.\n"
1272 2 dgisselq
"\n"
1273 5 dgisselq
"\n");
1274
        fprintf(fp,
1275
        "\t// We accomplish here \"Karatsuba\" multiplication.  That is,\n"
1276
        "\t// by doing three multiplies we accomplish the work of four.\n"
1277
        "\t// Let\'s prove to ourselves that this works ... We wish to\n"
1278
        "\t// multiply: (a+jb) * (c+jd), where a+jb is given by\n"
1279
        "\t//\ta + jb = r_dif_r + j r_dif_i, and\n"
1280
        "\t//\tc + jd = ir_coef_r + j ir_coef_i.\n"
1281
        "\t// We do this by calculating the intermediate products P1, P2,\n"
1282
        "\t// and P3 as\n"
1283
        "\t//\tP1 = ac\n"
1284
        "\t//\tP2 = bd\n"
1285
        "\t//\tP3 = (a + b) * (c + d)\n"
1286
        "\t// and then complete our final answer with\n"
1287
        "\t//\tac - bd = P1 - P2 (this checks)\n"
1288
        "\t//\tad + bc = P3 - P2 - P1\n"
1289
        "\t//\t        = (ac + bc + ad + bd) - bd - ac\n"
1290
        "\t//\t        = bc + ad (this checks)\n"
1291 2 dgisselq
"\n"
1292 5 dgisselq
"\n");
1293
        fprintf(fp,
1294
        "\t// This should really be based upon an IF, such as in\n"
1295
        "\t// if (IWIDTH < CWIDTH) then ...\n"
1296
        "\t// However, this is the only (other) way I know to do it.\n"
1297 2 dgisselq
        "\tgenerate\n"
1298
        "\tif (CWIDTH < IWIDTH+1)\n"
1299
        "\tbegin\n"
1300 22 dgisselq
                "\t\twire\t[(CWIDTH):0]\tp3c_in;\n"
1301
                "\t\twire\t[(IWIDTH+1):0]\tp3d_in;\n"
1302
                "\t\tassign\tp3c_in = ir_coef_i + ir_coef_r;\n"
1303
                "\t\tassign\tp3d_in = r_dif_r + r_dif_i;\n"
1304
                "\n"
1305 2 dgisselq
                "\t\t// We need to pad these first two multiplies by an extra\n"
1306 5 dgisselq
                "\t\t// bit just to keep them aligned with the third,\n"
1307
                "\t\t// simpler, multiply.\n"
1308 2 dgisselq
                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p1(i_clk, i_ce,\n"
1309
                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r},\n"
1310
                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r}, p_one);\n"
1311
                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p2(i_clk, i_ce,\n"
1312 5 dgisselq
                                "\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i},\n"
1313 2 dgisselq
                                "\t\t\t\t{r_dif_i[IWIDTH],r_dif_i}, p_two);\n"
1314
                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p3(i_clk, i_ce,\n"
1315 22 dgisselq
                        "\t\t\t\tp3c_in, p3d_in, p_three);\n"
1316 2 dgisselq
        "\tend else begin\n"
1317 22 dgisselq
                "\t\twire\t[(CWIDTH):0]\tp3c_in;\n"
1318
                "\t\twire\t[(IWIDTH+1):0]\tp3d_in;\n"
1319
                "\t\tassign\tp3c_in = ir_coef_i + ir_coef_r;\n"
1320
                "\t\tassign\tp3d_in = r_dif_r + r_dif_i;\n"
1321
                "\n"
1322 2 dgisselq
                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p1a(i_clk, i_ce,\n"
1323
                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r},\n"
1324
                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r}, p_one);\n"
1325
                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p2a(i_clk, i_ce,\n"
1326
                                "\t\t\t\t{r_dif_i[IWIDTH], r_dif_i},\n"
1327 5 dgisselq
                                "\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i}, p_two);\n"
1328 2 dgisselq
                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p3a(i_clk, i_ce,\n"
1329 22 dgisselq
                                "\t\t\t\tp3d_in, p3c_in, p_three);\n"
1330 2 dgisselq
        "\tend\n"
1331
        "\tendgenerate\n"
1332 5 dgisselq
"\n");
1333
        fprintf(fp,
1334
        "\t// These values are held in memory and delayed during the\n"
1335
        "\t// multiply.  Here, we recover them.  During the multiply,\n"
1336
        "\t// values were multiplied by 2^(CWIDTH-2)*exp{-j*2*pi*...},\n"
1337
        "\t// therefore, the left_x values need to be right shifted by\n"
1338
        "\t// CWIDTH-2 as well.  The additional bits come from a sign\n"
1339
        "\t// extension.\n"
1340
        "\twire\tsigned\t[(IWIDTH+CWIDTH):0]    fifo_i, fifo_r;\n"
1341 26 dgisselq
        "\treg\t\t[(2*IWIDTH+1):0]      fifo_read;\n"
1342
        "\tassign\tfifo_r = { {2{fifo_read[2*(IWIDTH+1)-1]}}, fifo_read[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1\'b0}} };\n"
1343
        "\tassign\tfifo_i = { {2{fifo_read[(IWIDTH+1)-1]}}, fifo_read[((IWIDTH+1)-1):0], {(CWIDTH-2){1\'b0}} };\n"
1344 2 dgisselq
"\n"
1345
"\n"
1346 23 dgisselq
        "\treg\tsigned\t[(OWIDTH-1):0]  b_left_r, b_left_i,\n"
1347 5 dgisselq
                        "\t\t\t\t\t\tb_right_r, b_right_i;\n"
1348
        "\treg\tsigned\t[(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i;\n"
1349
"\n");
1350
        fprintf(fp,
1351 23 dgisselq
        "\t// Let's do some rounding and remove unnecessary bits.\n"
1352 5 dgisselq
        "\t// We have (IWIDTH+CWIDTH+3) bits here, we need to drop down to\n"
1353
        "\t// OWIDTH, and SHIFT by SHIFT bits in the process.  The trick is\n"
1354
        "\t// that we don\'t need (IWIDTH+CWIDTH+3) bits.  We\'ve accumulated\n"
1355
        "\t// them, but the actual values will never fill all these bits.\n"
1356
        "\t// In particular, we only need:\n"
1357
        "\t//\t IWIDTH bits for the input\n"
1358
        "\t//\t     +1 bit for the add/subtract\n"
1359
        "\t//\t+CWIDTH bits for the coefficient multiply\n"
1360
        "\t//\t     +1 bit for the add/subtract in the complex multiply\n"
1361
        "\t//\t ------\n"
1362
        "\t//\t (IWIDTH+CWIDTH+2) bits at full precision.\n"
1363
        "\t//\n"
1364
        "\t// However, the coefficient multiply multiplied by a maximum value\n"
1365
        "\t// of 2^(CWIDTH-2).  Thus, we only have\n"
1366
        "\t//\t   IWIDTH bits for the input\n"
1367
        "\t//\t       +1 bit for the add/subtract\n"
1368
        "\t//\t+CWIDTH-2 bits for the coefficient multiply\n"
1369
        "\t//\t       +1 (optional) bit for the add/subtract in the cpx mpy.\n"
1370
        "\t//\t -------- ... multiply.  (This last bit may be shifted out.)\n"
1371
        "\t//\t (IWIDTH+CWIDTH) valid output bits. \n"
1372
        "\t// Now, if the user wants to keep any extras of these (via OWIDTH),\n"
1373
        "\t// or if he wishes to arbitrarily shift some of these off (via\n"
1374
        "\t// SHIFT) we accomplish that here.\n"
1375 23 dgisselq
"\n");
1376
        fprintf(fp,
1377
        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_left_r, rnd_left_i, rnd_right_r, rnd_right_i;\n\n");
1378
 
1379
        fprintf(fp,
1380 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_left_r(i_clk, i_ce,\n"
1381 23 dgisselq
        "\t\t\t\t{ {2{fifo_r[(IWIDTH+CWIDTH)]}}, fifo_r }, rnd_left_r);\n\n",
1382
                rnd_string);
1383
        fprintf(fp,
1384 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_left_i(i_clk, i_ce,\n"
1385 23 dgisselq
        "\t\t\t\t{ {2{fifo_i[(IWIDTH+CWIDTH)]}}, fifo_i }, rnd_left_i);\n\n",
1386
                rnd_string);
1387
        fprintf(fp,
1388 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_right_r(i_clk, i_ce,\n"
1389 23 dgisselq
        "\t\t\t\tmpy_r, rnd_right_r);\n\n", rnd_string);
1390
        fprintf(fp,
1391 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_right_i(i_clk, i_ce,\n"
1392 23 dgisselq
        "\t\t\t\tmpy_i, rnd_right_i);\n\n", rnd_string);
1393
        fprintf(fp,
1394
        "\talways @(posedge i_clk)\n"
1395
                "\t\tif (i_ce)\n"
1396
                "\t\tbegin\n"
1397
                        "\t\t\t// First clock, recover all values\n"
1398
                        "\t\t\tfifo_read <= fifo_left[fifo_read_addr];\n"
1399
                        "\t\t\t// These values are IWIDTH+CWIDTH+3 bits wide\n"
1400
                        "\t\t\t// although they only need to be (IWIDTH+1)\n"
1401
                        "\t\t\t// + (CWIDTH) bits wide.  (We\'ve got two\n"
1402
                        "\t\t\t// extra bits we need to get rid of.)\n"
1403
                        "\t\t\tmpy_r <= p_one - p_two;\n"
1404
                        "\t\t\tmpy_i <= p_three - p_one - p_two;\n"
1405 2 dgisselq
"\n"
1406 23 dgisselq
                        "\t\t\t// Second clock, round and latch for final clock\n"
1407
                        "\t\t\tb_right_r <= rnd_right_r;\n"
1408
                        "\t\t\tb_right_i <= rnd_right_i;\n"
1409
                        "\t\t\tb_left_r <= rnd_left_r;\n"
1410
                        "\t\t\tb_left_i <= rnd_left_i;\n"
1411 24 dgisselq
                "\t\tend\n"
1412
"\n");
1413 26 dgisselq
 
1414 24 dgisselq
        fprintf(fp,
1415 26 dgisselq
        "\treg\t[(AUXLEN-1):0]\taux_pipeline;\n"
1416
        "\tinitial\taux_pipeline = 0;\n"
1417
        "\talways @(posedge i_clk)\n"
1418
        "\t\tif (i_rst)\n"
1419
        "\t\t\taux_pipeline <= 0;\n"
1420
        "\t\telse if (i_ce)\n"
1421
        "\t\t\taux_pipeline <= { aux_pipeline[(AUXLEN-2):0], i_aux };\n"
1422
"\n");
1423
        fprintf(fp,
1424 25 dgisselq
        "\tinitial o_aux = 1\'b0;\n"
1425 24 dgisselq
        "\talways @(posedge i_clk)\n"
1426
                "\t\tif (i_rst)\n"
1427
                "\t\t\to_aux <= 1\'b0;\n"
1428
                "\t\telse if (i_ce)\n"
1429
                "\t\tbegin\n"
1430
                        "\t\t\t// Second clock, latch for final clock\n"
1431 26 dgisselq
                        "\t\t\to_aux <= aux_pipeline[AUXLEN-1];\n"
1432 23 dgisselq
                "\t\tend\n"
1433
"\n");
1434 24 dgisselq
 
1435 23 dgisselq
        fprintf(fp,
1436 5 dgisselq
        "\t// As a final step, we pack our outputs into two packed two\'s\n"
1437
        "\t// complement numbers per output word, so that each output word\n"
1438
        "\t// has (2*OWIDTH) bits in it, with the top half being the real\n"
1439
        "\t// portion and the bottom half being the imaginary portion.\n"
1440 23 dgisselq
        "\tassign       o_left = { rnd_left_r, rnd_left_i };\n"
1441
        "\tassign       o_right= { rnd_right_r,rnd_right_i};\n"
1442 2 dgisselq
"\n"
1443
"endmodule\n");
1444
        fclose(fp);
1445
}
1446
 
1447 23 dgisselq
void    build_hwbfly(const char *fname, int xtracbits, ROUND_T rounding) {
1448 22 dgisselq
        FILE    *fp = fopen(fname, "w");
1449
        if (NULL == fp) {
1450
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
1451
                perror("O/S Err was:");
1452
                return;
1453
        }
1454
 
1455 23 dgisselq
        const   char    *rnd_string;
1456
        if (rounding == RND_TRUNCATE)
1457
                rnd_string = "truncate";
1458
        else if (rounding == RND_FROMZERO)
1459
                rnd_string = "roundfromzero";
1460
        else if (rounding == RND_HALFUP)
1461
                rnd_string = "roundhalfup";
1462
        else
1463
                rnd_string = "convround";
1464
 
1465
 
1466 22 dgisselq
        fprintf(fp,
1467
"///////////////////////////////////////////////////////////////////////////\n"
1468
"//\n"
1469
"// Filename:   hwbfly.v\n"
1470
"//\n"
1471
"// Project:    %s\n"
1472
"//\n"
1473
"// Purpose:    This routine is identical to the butterfly.v routine found\n"
1474
"//             in 'butterfly.v', save only that it uses the verilog \n"
1475
"//             operator '*' in hopes that the synthesizer would be able\n"
1476
"//             to optimize it with hardware resources.\n"
1477
"//\n"
1478
"//             It is understood that a hardware multiply can complete its\n"
1479
"//             operation in a single clock.\n"
1480
"//\n"
1481
"//\n%s"
1482
"//\n", prjname, creator);
1483
        fprintf(fp, "%s", cpyleft);
1484
        fprintf(fp,
1485
"module hwbfly(i_clk, i_rst, i_ce, i_coef, i_left, i_right, i_aux,\n"
1486
                "\t\to_left, o_right, o_aux);\n"
1487
        "\t// Public changeable parameters ...\n"
1488
        "\tparameter IWIDTH=16,CWIDTH=IWIDTH+%d,OWIDTH=IWIDTH+1;\n"
1489
        "\t// Parameters specific to the core that should not be changed.\n"
1490 23 dgisselq
        "\tparameter\tSHIFT=0;\n"
1491 22 dgisselq
        "\tinput\t\ti_clk, i_rst, i_ce;\n"
1492
        "\tinput\t\t[(2*CWIDTH-1):0]\ti_coef;\n"
1493
        "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"
1494
        "\tinput\t\ti_aux;\n"
1495
        "\toutput\twire\t[(2*OWIDTH-1):0]\to_left, o_right;\n"
1496
        "\toutput\treg\to_aux;\n"
1497
"\n", xtracbits);
1498
        fprintf(fp,
1499
        "\twire\t[(OWIDTH-1):0] o_left_r, o_left_i, o_right_r, o_right_i;\n"
1500
"\n"
1501
        "\treg\t[(2*IWIDTH-1):0]        r_left, r_right;\n"
1502
        "\treg\t                        r_aux, r_aux_2;\n"
1503
        "\treg\t[(2*CWIDTH-1):0]        r_coef, r_coef_2;\n"
1504
        "\twire signed  [(IWIDTH-1):0]  r_left_r, r_left_i, r_right_r, r_right_i;\n"
1505
        "\tassign\tr_left_r  = r_left[ (2*IWIDTH-1):(IWIDTH)];\n"
1506
        "\tassign\tr_left_i  = r_left[ (IWIDTH-1):0];\n"
1507
        "\tassign\tr_right_r = r_right[(2*IWIDTH-1):(IWIDTH)];\n"
1508
        "\tassign\tr_right_i = r_right[(IWIDTH-1):0];\n"
1509 26 dgisselq
        "\treg  signed  [(CWIDTH-1):0]  ir_coef_r, ir_coef_i;\n"
1510 22 dgisselq
"\n"
1511
        "\treg  signed  [(IWIDTH):0]    r_sum_r, r_sum_i, r_dif_r, r_dif_i;\n"
1512
"\n"
1513
        "\treg  [(2*IWIDTH+2):0]        leftv, leftvv;\n"
1514
"\n"
1515
        "\t// Set up the input to the multiply\n"
1516 25 dgisselq
        "\tinitial r_aux   = 1\'b0;\n"
1517
        "\tinitial r_aux_2 = 1\'b0;\n"
1518 22 dgisselq
        "\talways @(posedge i_clk)\n"
1519 25 dgisselq
                "\t\tif (i_rst)\n"
1520
                "\t\tbegin\n"
1521 26 dgisselq
                        "\t\t\tr_aux <= 1\'b0;\n"
1522
                        "\t\t\tr_aux_2 <= 1\'b0;\n"
1523 25 dgisselq
                "\t\tend else if (i_ce)\n"
1524
                "\t\tbegin\n"
1525
                        "\t\t\t// One clock just latches the inputs\n"
1526 26 dgisselq
                        "\t\t\tr_aux <= i_aux;\n"
1527
                        "\t\t\t// Next clock adds/subtracts\n"
1528
                        "\t\t\t// Other inputs are simply delayed on second clock\n"
1529
                        "\t\t\tr_aux_2 <= r_aux;\n"
1530
                "\t\tend\n"
1531
        "\talways @(posedge i_clk)\n"
1532
                "\t\tif (i_ce)\n"
1533
                "\t\tbegin\n"
1534
                        "\t\t\t// One clock just latches the inputs\n"
1535 25 dgisselq
                        "\t\t\tr_left <= i_left;        // No change in # of bits\n"
1536
                        "\t\t\tr_right <= i_right;\n"
1537
                        "\t\t\tr_coef  <= i_coef;\n"
1538
                        "\t\t\t// Next clock adds/subtracts\n"
1539
                        "\t\t\tr_sum_r <= r_left_r + r_right_r; // Now IWIDTH+1 bits\n"
1540
                        "\t\t\tr_sum_i <= r_left_i + r_right_i;\n"
1541
                        "\t\t\tr_dif_r <= r_left_r - r_right_r;\n"
1542
                        "\t\t\tr_dif_i <= r_left_i - r_right_i;\n"
1543
                        "\t\t\t// Other inputs are simply delayed on second clock\n"
1544 26 dgisselq
                        "\t\t\tir_coef_r <= r_coef[(2*CWIDTH-1):CWIDTH];\n"
1545
                        "\t\t\tir_coef_i <= r_coef[(CWIDTH-1):0];\n"
1546 25 dgisselq
                "\t\tend\n"
1547 22 dgisselq
        "\n\n");
1548
        fprintf(fp,
1549
"\t// See comments in the butterfly.v source file for a discussion of\n"
1550
"\t// these operations and the appropriate bit widths.\n\n");
1551
        fprintf(fp,
1552 26 dgisselq
        "\treg\tsigned  [((IWIDTH+1)+(CWIDTH)-1):0]     p_one, p_two;\n"
1553
        "\treg\tsigned  [((IWIDTH+2)+(CWIDTH+1)-1):0]   p_three;\n"
1554 22 dgisselq
"\n"
1555 26 dgisselq
        "\treg\tsigned  [(CWIDTH-1):0]  p1c_in, p2c_in; // Coefficient multiply inputs\n"
1556
        "\treg\tsigned  [(IWIDTH):0]    p1d_in, p2d_in; // Data multiply inputs\n"
1557
        "\treg\tsigned  [(CWIDTH):0]    p3c_in; // Product 3, coefficient input\n"
1558
        "\treg\tsigned  [(IWIDTH+1):0]  p3d_in; // Product 3, data input\n"
1559 22 dgisselq
"\n"
1560 25 dgisselq
        "\tinitial leftv    = 0;\n"
1561
        "\tinitial leftvv   = 0;\n"
1562 22 dgisselq
        "\talways @(posedge i_clk)\n"
1563
        "\tbegin\n"
1564
                "\t\tif (i_rst)\n"
1565
                "\t\tbegin\n"
1566
                        "\t\t\tleftv <= 0;\n"
1567
                        "\t\t\tleftvv <= 0;\n"
1568 26 dgisselq
                "\t\tend else if (i_ce)\n"
1569 22 dgisselq
                "\t\tbegin\n"
1570
                        "\t\t\t// Second clock, pipeline = 1\n"
1571 26 dgisselq
                        "\t\t\tleftv <= { r_aux_2, r_sum_r, r_sum_i };\n"
1572
"\n"
1573
                        "\t\t\t// Third clock, pipeline = 3\n"
1574
                        "\t\t\t//   As desired, each of these lines infers a DSP48\n"
1575
                        "\t\t\tleftvv <= leftv;\n"
1576
                "\t\tend\n"
1577
        "\tend\n"
1578
"\n"
1579
        "\talways @(posedge i_clk)\n"
1580
                "\t\tif (i_ce)\n"
1581
                "\t\tbegin\n"
1582
                        "\t\t\t// Second clock, pipeline = 1\n"
1583
                        "\t\t\tp1c_in <= ir_coef_r;\n"
1584
                        "\t\t\tp2c_in <= ir_coef_i;\n"
1585
                        "\t\t\tp1d_in <= r_dif_r;\n"
1586
                        "\t\t\tp2d_in <= r_dif_i;\n"
1587 22 dgisselq
                        "\t\t\tp3c_in <= ir_coef_i + ir_coef_r;\n"
1588
                        "\t\t\tp3d_in <= r_dif_r + r_dif_i;\n"
1589 23 dgisselq
"\n"
1590
"\n"
1591 22 dgisselq
                        "\t\t\t// Third clock, pipeline = 3\n"
1592 26 dgisselq
                        "\t\t\t//   As desired, each of these lines infers a DSP48\n"
1593 22 dgisselq
                        "\t\t\tp_one   <= p1c_in * p1d_in;\n"
1594
                        "\t\t\tp_two   <= p2c_in * p2d_in;\n"
1595
                        "\t\t\tp_three <= p3c_in * p3d_in;\n"
1596 26 dgisselq
                "\t\tend\n"
1597 22 dgisselq
"\n"
1598 26 dgisselq
        "\twire\tsigned [((IWIDTH+2)+(CWIDTH+1)-1):0]   w_one, w_two;\n"
1599
        "\tassign\tw_one = { {(2){p_one[((IWIDTH+1)+(CWIDTH)-1)]}}, p_one };\n"
1600
        "\tassign\tw_two = { {(2){p_two[((IWIDTH+1)+(CWIDTH)-1)]}}, p_two };\n"
1601 22 dgisselq
"\n");
1602
 
1603
        fprintf(fp,
1604
        "\t// These values are held in memory and delayed during the\n"
1605
        "\t// multiply.  Here, we recover them.  During the multiply,\n"
1606
        "\t// values were multiplied by 2^(CWIDTH-2)*exp{-j*2*pi*...},\n"
1607
        "\t// therefore, the left_x values need to be right shifted by\n"
1608
        "\t// CWIDTH-2 as well.  The additional bits come from a sign\n"
1609
        "\t// extension.\n"
1610 24 dgisselq
        "\twire\taux_s;\n"
1611 22 dgisselq
        "\twire\tsigned\t[(IWIDTH+CWIDTH):0]    left_si, left_sr;\n"
1612
        "\treg\t\t[(2*IWIDTH+2):0]      left_saved;\n"
1613 26 dgisselq
        "\tassign\tleft_sr = { {2{left_saved[2*(IWIDTH+1)-1]}}, left_saved[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1\'b0}} };\n"
1614
        "\tassign\tleft_si = { {2{left_saved[(IWIDTH+1)-1]}}, left_saved[((IWIDTH+1)-1):0], {(CWIDTH-2){1\'b0}} };\n"
1615 22 dgisselq
        "\tassign\taux_s = left_saved[2*IWIDTH+2];\n"
1616
"\n"
1617
"\n"
1618 26 dgisselq
        "\t(* use_dsp48=\"no\" *)\n"
1619 23 dgisselq
        "\treg  signed  [(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i;\n");
1620
        fprintf(fp,
1621
        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_left_r, rnd_left_i, rnd_right_r, rnd_right_i;\n\n");
1622 22 dgisselq
 
1623
        fprintf(fp,
1624 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+1,OWIDTH,SHIFT+2) do_rnd_left_r(i_clk, i_ce,\n"
1625
        "\t\t\t\tleft_sr, rnd_left_r);\n\n",
1626 23 dgisselq
                rnd_string);
1627
        fprintf(fp,
1628 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+1,OWIDTH,SHIFT+2) do_rnd_left_i(i_clk, i_ce,\n"
1629
        "\t\t\t\tleft_si, rnd_left_i);\n\n",
1630 23 dgisselq
                rnd_string);
1631
        fprintf(fp,
1632 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_right_r(i_clk, i_ce,\n"
1633 23 dgisselq
        "\t\t\t\tmpy_r, rnd_right_r);\n\n", rnd_string);
1634
        fprintf(fp,
1635 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_right_i(i_clk, i_ce,\n"
1636 23 dgisselq
        "\t\t\t\tmpy_i, rnd_right_i);\n\n", rnd_string);
1637
 
1638
        fprintf(fp,
1639 25 dgisselq
        "\tinitial left_saved = 0;\n"
1640
        "\tinitial o_aux      = 1\'b0;\n"
1641 22 dgisselq
        "\talways @(posedge i_clk)\n"
1642
        "\t\tif (i_rst)\n"
1643
        "\t\tbegin\n"
1644
                "\t\t\tleft_saved <= 0;\n"
1645 26 dgisselq
                "\t\t\to_aux <= 1\'b0;\n"
1646 22 dgisselq
        "\t\tend else if (i_ce)\n"
1647
        "\t\tbegin\n"
1648
                "\t\t\t// First clock, recover all values\n"
1649
                "\t\t\tleft_saved <= leftvv;\n"
1650 26 dgisselq
"\n"
1651
                "\t\t\t// Second clock, round and latch for final clock\n"
1652
                "\t\t\to_aux <= aux_s;\n"
1653
        "\t\tend\n"
1654
        "\talways @(posedge i_clk)\n"
1655
        "\t\tif (i_ce)\n"
1656
        "\t\tbegin\n"
1657 22 dgisselq
                "\t\t\t// These values are IWIDTH+CWIDTH+3 bits wide\n"
1658
                "\t\t\t// although they only need to be (IWIDTH+1)\n"
1659
                "\t\t\t// + (CWIDTH) bits wide.  (We've got two\n"
1660
                "\t\t\t// extra bits we need to get rid of.)\n"
1661 26 dgisselq
                "\n"
1662
                "\t\t\t// These two lines also infer DSP48\'s.\n"
1663
                "\t\t\t// To keep from using extra DSP48 resources,\n"
1664
                "\t\t\t// they are prevented from using DSP48\'s\n"
1665
                "\t\t\t// by the (* use_dsp48 ... *) comment above.\n"
1666
                "\t\t\tmpy_r <= w_one - w_two;\n"
1667
                "\t\t\tmpy_i <= p_three - w_one - w_two;\n"
1668 22 dgisselq
        "\t\tend\n"
1669
        "\n");
1670
 
1671
        fprintf(fp,
1672
        "\t// As a final step, we pack our outputs into two packed two's\n"
1673
        "\t// complement numbers per output word, so that each output word\n"
1674
        "\t// has (2*OWIDTH) bits in it, with the top half being the real\n"
1675
        "\t// portion and the bottom half being the imaginary portion.\n"
1676 23 dgisselq
        "\tassign\to_left = { rnd_left_r, rnd_left_i };\n"
1677
        "\tassign\to_right= { rnd_right_r,rnd_right_i};\n"
1678 22 dgisselq
"\n"
1679
"endmodule\n");
1680
 
1681
}
1682
 
1683 26 dgisselq
void    build_stage(const char *fname, const char *coredir, int stage, bool odd, int nbits, bool inv, int xtra, bool hwmpy=false, bool dbg=false) {
1684 2 dgisselq
        FILE    *fstage = fopen(fname, "w");
1685
        int     cbits = nbits + xtra;
1686
 
1687
        if ((cbits * 2) >= sizeof(long long)*8) {
1688
                fprintf(stderr, "ERROR: CMEM Coefficient precision requested overflows long long data type.\n");
1689
                exit(-1);
1690
        }
1691
 
1692
        if (fstage == NULL) {
1693
                fprintf(stderr, "ERROR: Could not open %s for writing!\n", fname);
1694
                perror("O/S Err was:");
1695
                fprintf(stderr, "Attempting to continue, but this file will be missing.\n");
1696
                return;
1697
        }
1698
 
1699
        fprintf(fstage,
1700
"////////////////////////////////////////////////////////////////////////////\n"
1701
"//\n"
1702 26 dgisselq
"// Filename:   %sfftstage_%c%d%s.v\n"
1703 2 dgisselq
"//\n"
1704
"// Project:    %s\n"
1705
"//\n"
1706
"// Purpose:    This file is (almost) a Verilog source file.  It is meant to\n"
1707
"//             be used by a FFT core compiler to generate FFTs which may be\n"
1708
"//             used as part of an FFT core.  Specifically, this file \n"
1709
"//             encapsulates the options of an FFT-stage.  For any 2^N length\n"
1710
"//             FFT, there shall be (N-1) of these stages.  \n"
1711
"//\n%s"
1712
"//\n",
1713 26 dgisselq
                (inv)?"i":"", (odd)?'o':'e', stage*2, (dbg)?"_dbg":"", prjname, creator);
1714 2 dgisselq
        fprintf(fstage, "%s", cpyleft);
1715 26 dgisselq
        fprintf(fstage, "module\t%sfftstage_%c%d%s(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync%s);\n",
1716
                (inv)?"i":"", (odd)?'o':'e', stage*2, (dbg)?"_dbg":"",
1717
                (dbg)?", o_dbg":"");
1718 2 dgisselq
        // These parameter values are useless at this point--they are to be
1719
        // replaced by the parameter values in the calling program.  Only
1720
        // problem is, the CWIDTH needs to match exactly!
1721
        fprintf(fstage, "\tparameter\tIWIDTH=%d,CWIDTH=%d,OWIDTH=%d;\n",
1722
                nbits, cbits, nbits+1);
1723
        fprintf(fstage,
1724
"\t// Parameters specific to the core that should be changed when this\n"
1725
"\t// core is built ... Note that the minimum LGSPAN (the base two log\n"
1726
"\t// of the span, or the base two log of the current FFT size) is 3.\n"
1727
"\t// Smaller spans (i.e. the span of 2) must use the dblstage module.\n"
1728 6 dgisselq
"\tparameter\tLGWIDTH=11, LGSPAN=9, LGBDLY=5, BFLYSHIFT=0;\n");
1729 2 dgisselq
        fprintf(fstage,
1730
"\tinput                                        i_clk, i_rst, i_ce, i_sync;\n"
1731
"\tinput                [(2*IWIDTH-1):0]        i_data;\n"
1732
"\toutput       reg     [(2*OWIDTH-1):0]        o_data;\n"
1733
"\toutput       reg                             o_sync;\n"
1734 26 dgisselq
"\n");
1735
        if (dbg) { fprintf(fstage, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"
1736
                "\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2*OWIDTH-1):(2*OWIDTH-16)],\n"
1737
                        "\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n"
1738
"\n");
1739
        }
1740
        fprintf(fstage,
1741 2 dgisselq
"\treg  wait_for_sync;\n"
1742
"\treg  [(2*IWIDTH-1):0]        ib_a, ib_b;\n"
1743
"\treg  [(2*CWIDTH-1):0]        ib_c;\n"
1744 8 dgisselq
"\treg  ib_sync;\n"
1745 2 dgisselq
"\n"
1746
"\treg  b_started;\n"
1747
"\twire ob_sync;\n"
1748 23 dgisselq
"\twire [(2*OWIDTH-1):0]\tob_a, ob_b;\n");
1749 2 dgisselq
        fprintf(fstage,
1750
"\n"
1751
"\t// %scmem is defined as an array of real and complex values,\n"
1752
"\t// where the top CWIDTH bits are the real value and the bottom\n"
1753
"\t// CWIDTH bits are the imaginary value.\n"
1754
"\t//\n"
1755 24 dgisselq
"\t// %scmem[i] = { (2^(CWIDTH-2)) * cos(2*pi*i/(2^LGWIDTH)),\n"
1756 2 dgisselq
"\t//           (2^(CWIDTH-2)) * sin(2*pi*i/(2^LGWIDTH)) };\n"
1757
"\t//\n"
1758
"\treg  [(2*CWIDTH-1):0]        %scmem [0:((1<<LGSPAN)-1)];\n"
1759
"\tinitial\t$readmemh(\"%scmem_%c%d.hex\",%scmem);\n\n",
1760 24 dgisselq
                (inv)?"i":"", (inv)?"i":"", (inv)?"i":"",
1761
                (inv)?"i":"", (odd)?'o':'e',stage<<1, (inv)?"i":"");
1762 2 dgisselq
        {
1763
                FILE    *cmem;
1764
 
1765 14 dgisselq
                {
1766
                        char    *memfile, *ptr;
1767
 
1768
                        memfile = new char[strlen(fname)+128];
1769
                        strcpy(memfile, fname);
1770
                        if ((NULL != (ptr = strrchr(memfile, '/')))&&(ptr>memfile)) {
1771
                                ptr++;
1772
                                sprintf(ptr, "%scmem_%c%d.hex", (inv)?"i":"", (odd)?'o':'e', stage*2);
1773
                        } else {
1774
                                sprintf(memfile, "%s/%scmem_%c%d.hex",
1775 26 dgisselq
                                        coredir, (inv)?"i":"",
1776 14 dgisselq
                                        (odd)?'o':'e', stage*2);
1777
                        }
1778
                        // strcpy(&memfile[strlen(memfile)-2], ".hex");
1779
                        cmem = fopen(memfile, "w");
1780
                        if (NULL == cmem) {
1781
                                fprintf(stderr, "Could not open/write \'%s\' with FFT coefficients.\n", memfile);
1782
                                perror("Err from O/S:");
1783
                                exit(-2);
1784
                        }
1785
 
1786
                        delete[] memfile;
1787 2 dgisselq
                }
1788
                // fprintf(cmem, "// CBITS = %d, inv = %s\n", cbits, (inv)?"true":"false");
1789
                for(int i=0; i<stage/2; i++) {
1790
                        int k = 2*i+odd;
1791 9 dgisselq
                        double  W = ((inv)?1:-1)*2.0*M_PI*k/(double)(2*stage);
1792 2 dgisselq
                        double  c, s;
1793
                        long long ic, is, vl;
1794
 
1795
                        c = cos(W); s = sin(W);
1796 20 dgisselq
                        ic = (long long)round((1ll<<(cbits-2)) * c);
1797
                        is = (long long)round((1ll<<(cbits-2)) * s);
1798 2 dgisselq
                        vl = (ic & (~(-1ll << (cbits))));
1799
                        vl <<= (cbits);
1800
                        vl |= (is & (~(-1ll << (cbits))));
1801
                        fprintf(cmem, "%0*llx\n", ((cbits*2+3)/4), vl);
1802
                        /*
1803
                        fprintf(cmem, "%0*llx\t\t// %f+j%f -> %llx +j%llx\n",
1804
                                ((cbits*2+3)/4), vl, c, s,
1805
                                ic & (~(-1ll<<(((cbits+3)/4)*4))),
1806
                                is & (~(-1ll<<(((cbits+3)/4)*4))));
1807
                        */
1808
                } fclose(cmem);
1809
        }
1810
 
1811
        fprintf(fstage,
1812 6 dgisselq
"\treg  [(LGWIDTH-2):0]         iaddr;\n"
1813 2 dgisselq
"\treg  [(2*IWIDTH-1):0]        imem    [0:((1<<LGSPAN)-1)];\n"
1814
"\n"
1815 8 dgisselq
"\treg  [LGSPAN:0]              oB;\n"
1816 2 dgisselq
"\treg  [(2*OWIDTH-1):0]        omem    [0:((1<<LGSPAN)-1)];\n"
1817
"\n"
1818 25 dgisselq
"\tinitial wait_for_sync = 1\'b1;\n"
1819
"\tinitial iaddr = 0;\n"
1820 2 dgisselq
"\talways @(posedge i_clk)\n"
1821
        "\t\tif (i_rst)\n"
1822
        "\t\tbegin\n"
1823 26 dgisselq
                "\t\t\twait_for_sync <= 1\'b1;\n"
1824 2 dgisselq
                "\t\t\tiaddr <= 0;\n"
1825
        "\t\tend\n"
1826
        "\t\telse if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"
1827
        "\t\tbegin\n"
1828
                "\t\t\t//\n"
1829
                "\t\t\t// First step: Record what we\'re not ready to use yet\n"
1830
                "\t\t\t//\n"
1831 25 dgisselq
                "\t\t\tiaddr <= iaddr + { {(LGWIDTH-2){1\'b0}}, 1\'b1 };\n"
1832 26 dgisselq
                "\t\t\twait_for_sync <= 1\'b0;\n"
1833
        "\t\tend\n"
1834
"\talways @(posedge i_clk) // Need to make certain here that we don\'t read\n"
1835
        "\t\tif ((i_ce)&&(~iaddr[LGSPAN])) // and write the same address on\n"
1836
                "\t\t\timem[iaddr[(LGSPAN-1):0]] <= i_data; // the same clk\n"
1837
        "\n");
1838 23 dgisselq
 
1839
        fprintf(fstage,
1840
        "\t//\n"
1841
        "\t// Now, we have all the inputs, so let\'s feed the butterfly\n"
1842
        "\t//\n"
1843 25 dgisselq
        "\tinitial ib_sync = 1\'b0;\n"
1844 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
1845 26 dgisselq
                "\t\tif (i_rst)\n"
1846
                        "\t\t\tib_sync <= 1\'b0;\n"
1847
                "\t\telse if ((i_ce)&&(iaddr[LGSPAN]))\n"
1848
                        "\t\t\tbegin\n"
1849
                                "\t\t\t\t// Set the sync to true on the very first\n"
1850
                                "\t\t\t\t// valid input in, and hence on the very\n"
1851
                                "\t\t\t\t// first valid data out per FFT.\n"
1852
                                "\t\t\t\tib_sync <= (iaddr==(1<<(LGSPAN)));\n"
1853
                        "\t\t\tend\n"
1854 24 dgisselq
        "\talways\t@(posedge i_clk)\n"
1855 26 dgisselq
                "\t\tif ((i_ce)&&(iaddr[LGSPAN]))\n"
1856
                "\t\t\tbegin\n"
1857
                        "\t\t\t\t// One input from memory, ...\n"
1858
                        "\t\t\t\tib_a <= imem[iaddr[(LGSPAN-1):0]];\n"
1859
                        "\t\t\t\t// One input clocked in from the top\n"
1860
                        "\t\t\t\tib_b <= i_data;\n"
1861
                        "\t\t\t\t// and the coefficient or twiddle factor\n"
1862
                        "\t\t\t\tib_c <= %scmem[iaddr[(LGSPAN-1):0]];\n"
1863
                "\t\t\tend\n\n", (inv)?"i":"");
1864 23 dgisselq
 
1865
        if (hwmpy) {
1866
                fprintf(fstage,
1867
        "\thwbfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),\n"
1868
                        "\t\t\t.SHIFT(BFLYSHIFT))\n"
1869
                "\t\tbfly(i_clk, i_rst, i_ce, ib_c,\n"
1870
                        "\t\t\tib_a, ib_b, ib_sync, ob_a, ob_b, ob_sync);\n");
1871
        } else {
1872
        fprintf(fstage,
1873
        "\tbutterfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),\n"
1874
                "\t\t\t.MPYDELAY(%d\'d%d),.LGDELAY(LGBDLY),.SHIFT(BFLYSHIFT))\n"
1875
        "\t\tbfly(i_clk, i_rst, i_ce, ib_c,\n"
1876
                "\t\t\tib_a, ib_b, ib_sync, ob_a, ob_b, ob_sync);\n",
1877
                        lgdelay(nbits, xtra), bflydelay(nbits, xtra));
1878
        }
1879
 
1880
        fprintf(fstage,
1881
        "\t//\n"
1882
        "\t// Next step: recover the outputs from the butterfly\n"
1883
        "\t//\n"
1884 25 dgisselq
        "\tinitial oB        = 0;\n"
1885
        "\tinitial o_sync    = 0;\n"
1886
        "\tinitial b_started = 0;\n"
1887 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
1888
        "\t\tif (i_rst)\n"
1889
        "\t\tbegin\n"
1890
                "\t\t\toB <= 0;\n"
1891
                "\t\t\to_sync <= 0;\n"
1892
                "\t\t\tb_started <= 0;\n"
1893
        "\t\tend else if (i_ce)\n"
1894
        "\t\tbegin\n"
1895 26 dgisselq
        "\t\t\to_sync <= (~oB[LGSPAN])?ob_sync : 1\'b0;\n"
1896
        "\t\t\tif (ob_sync||b_started)\n"
1897
                "\t\t\t\toB <= oB + { {(LGSPAN){1\'b0}}, 1\'b1 };\n"
1898
        "\t\t\tif ((ob_sync)&&(~oB[LGSPAN]))\n"
1899
                "\t\t\t// A butterfly output is available\n"
1900
                        "\t\t\t\tb_started <= 1\'b1;\n"
1901 23 dgisselq
        "\t\tend\n\n");
1902 26 dgisselq
        fprintf(fstage,
1903
        "\treg  [(LGSPAN-1):0]\t\tdly_addr;\n"
1904
        "\treg  [(2*OWIDTH-1):0]\tdly_value;\n"
1905
        "\talways @(posedge i_clk)\n"
1906
        "\t\tif (i_ce)\n"
1907
        "\t\tbegin\n"
1908
        "\t\t\tdly_addr <= oB[(LGSPAN-1):0];\n"
1909
        "\t\t\tdly_value <= ob_b;\n"
1910
        "\t\tend\n"
1911
        "\talways @(posedge i_clk)\n"
1912
        "\t\tif (i_ce)\n"
1913
                "\t\t\tomem[dly_addr] <= dly_value;\n"
1914
"\n");
1915
        fprintf(fstage,
1916
        "\talways @(posedge i_clk)\n"
1917
        "\t\tif (i_ce)\n"
1918
        "\t\t\to_data <= (~oB[LGSPAN])?ob_a : omem[oB[(LGSPAN-1):0]];\n"
1919
"\n");
1920 22 dgisselq
        fprintf(fstage, "endmodule\n");
1921 2 dgisselq
}
1922
 
1923
void    usage(void) {
1924
        fprintf(stderr,
1925 26 dgisselq
"USAGE:\tfftgen [-f <size>] [-d dir] [-c cbits] [-n nbits] [-m mxbits] [-s]\n"
1926 2 dgisselq
// "\tfftgen -i\n"
1927 26 dgisselq
"\t-1\tBuild a normal FFT, running at one clock per complex sample, or (for\n"
1928
"\t\ta real FFT) at one clock per two real input samples.\n"
1929 2 dgisselq
"\t-c <cbits>\tCauses all internal complex coefficients to be\n"
1930
"\t\tlonger than the corresponding data bits, to help avoid\n"
1931 26 dgisselq
"\t\tcoefficient truncation errors.  The default is %d bits lnoger\n"
1932
"\t\tthan the data bits.\n"
1933 2 dgisselq
"\t-d <dir>\tPlaces all of the generated verilog files into <dir>.\n"
1934 26 dgisselq
"\t\tThe default is a subdirectory of the current directory named %s.\n"
1935 2 dgisselq
"\t-f <size>\tSets the size of the FFT as the number of complex\n"
1936 26 dgisselq
"\t\tsamples input to the transform.  (No default value, this is\n"
1937
"\t\ta required parameter.)\n"
1938
"\t-i\tAn inverse FFT, meaning that the coefficients are\n"
1939
"\t\tgiven by e^{ j 2 pi k/N n }.  The default is a forward FFT, with\n"
1940
"\t\tcoefficients given by e^{ -j 2 pi k/N n }.\n"
1941 2 dgisselq
"\t-m <mxbits>\tSets the maximum bit width that the FFT should ever\n"
1942
"\t\tproduce.  Internal values greater than this value will be\n"
1943 26 dgisselq
"\t\ttruncated to this value.  (The default value grows the input\n"
1944
"\t\tsize by one bit for every two FFT stages.)\n"
1945 22 dgisselq
"\t-n <nbits>\tSets the bitwidth for values coming into the (i)FFT.\n"
1946 26 dgisselq
"\t\tThe default is %d bits input for each component of the two\n"
1947
"\t\tcomplex values into the FFT.\n"
1948 22 dgisselq
"\t-p <nmpy>\tSets the number of stages that will use any hardware \n"
1949
"\t\tmultiplication facility, instead of shift-add emulation.\n"
1950 26 dgisselq
"\t\tThree multiplies per butterfly, or six multiplies per stage will\n"
1951
"\t\tbe accelerated in this fashion.  The default is not to use any\n"
1952
"\t\thardware multipliers.\n"
1953
"\t-r\tBuild a real-FFT at four input points per sample, rather than a\n"
1954
"\t\tcomplex FFT.  (Default is a Complex FFT.)\n"
1955 2 dgisselq
"\t-s\tSkip the final bit reversal stage.  This is useful in\n"
1956
"\t\talgorithms that need to apply a filter without needing to do\n"
1957
"\t\tbin shifting, as these algorithms can, with this option, just\n"
1958
"\t\tmultiply by a bit reversed correlation sequence and then\n"
1959 22 dgisselq
"\t\tinverse FFT the (still bit reversed) result.  (You would need\n"
1960
"\t\ta decimation in time inverse to do this, which this program does\n"
1961
"\t\tnot yet provide.)\n"
1962 2 dgisselq
"\t-S\tInclude the final bit reversal stage (default).\n"
1963 22 dgisselq
"\t-x <xtrabits>\tUse this many extra bits internally, before any final\n"
1964
"\t\trounding or truncation of the answer to the final number of bits.\n"
1965 26 dgisselq
"\t\tThe default is to use %d extra bits internally.\n",
1966
/*
1967 2 dgisselq
"\t-0\tA forward FFT (default), meaning that the coefficients are\n"
1968
"\t\tgiven by e^{-j 2 pi k/N n }.\n"
1969
"\t-1\tAn inverse FFT, meaning that the coefficients are\n"
1970 26 dgisselq
"\t\tgiven by e^{ j 2 pi k/N n }.\n",
1971
*/
1972
        DEF_XTRACBITS, DEF_COREDIR, DEF_NBITSIN, DEF_XTRAPBITS);
1973 2 dgisselq
}
1974
 
1975
// Features still needed:
1976
//      Interactivity.
1977
int main(int argc, char **argv) {
1978
        int     fftsize = -1, lgsize = -1;
1979 26 dgisselq
        int     nbitsin = DEF_NBITSIN, xtracbits = DEF_XTRACBITS,
1980
                        nummpy=DEF_NMPY, nonmpy=2;
1981
        int     nbitsout, maxbitsout = -1, xtrapbits=DEF_XTRAPBITS;
1982
        bool    bitreverse = true, inverse=false,
1983
                verbose_flag = false, single_clock = false,
1984
                real_fft = false;
1985 2 dgisselq
        FILE    *vmain;
1986 28 dgisselq
        std::string     coredir = DEF_COREDIR, cmdline = "", hdrname = "";
1987 23 dgisselq
        ROUND_T rounding = RND_CONVERGENT;
1988
        // ROUND_T      rounding = RND_HALFUP;
1989 2 dgisselq
 
1990 26 dgisselq
        bool    dbg = false;
1991
        int     dbgstage = 128;
1992
 
1993 2 dgisselq
        if (argc <= 1)
1994
                usage();
1995
 
1996 14 dgisselq
        cmdline = argv[0];
1997 2 dgisselq
        for(int argn=1; argn<argc; argn++) {
1998 14 dgisselq
                cmdline += " ";
1999
                cmdline += argv[argn];
2000
        }
2001
 
2002
        for(int argn=1; argn<argc; argn++) {
2003 2 dgisselq
                if ('-' == argv[argn][0]) {
2004
                        for(int j=1; (argv[argn][j])&&(j<100); j++) {
2005
                                switch(argv[argn][j]) {
2006 26 dgisselq
                                        /*
2007 2 dgisselq
                                        case '0':
2008
                                                inverse = false;
2009
                                                break;
2010 26 dgisselq
                                        */
2011 2 dgisselq
                                        case '1':
2012 26 dgisselq
                                                single_clock = true;
2013 2 dgisselq
                                                break;
2014 28 dgisselq
                                        case 'a':
2015
                                                if (argn+1 >= argc) {
2016
                                                        printf("ERR: No header filename given\n\n");
2017
                                                        usage(); exit(-1);
2018
                                                }
2019
                                                hdrname = argv[++argn];
2020
                                                j+= 200;
2021
                                                break;
2022 2 dgisselq
                                        case 'c':
2023
                                                if (argn+1 >= argc) {
2024 19 dgisselq
                                                        printf("ERR: No extra number of coefficient bits given!\n\n");
2025 2 dgisselq
                                                        usage(); exit(-1);
2026
                                                }
2027
                                                xtracbits = atoi(argv[++argn]);
2028
                                                j+= 200;
2029
                                                break;
2030
                                        case 'd':
2031
                                                if (argn+1 >= argc) {
2032 19 dgisselq
                                                        printf("ERR: No directory given into which to place the core!\n\n");
2033 2 dgisselq
                                                        usage(); exit(-1);
2034
                                                }
2035 14 dgisselq
                                                coredir = argv[++argn];
2036 2 dgisselq
                                                j += 200;
2037
                                                break;
2038 26 dgisselq
                                        case 'D':
2039
                                                dbg = true;
2040
                                                if (argn+1 >= argc) {
2041
                                                        printf("ERR: No debug stage number given!\n\n");
2042
                                                        usage(); exit(-1);
2043
                                                }
2044
                                                dbgstage = atoi(argv[++argn]);
2045
                                                j+= 200;
2046
                                                break;
2047 2 dgisselq
                                        case 'f':
2048
                                                if (argn+1 >= argc) {
2049 19 dgisselq
                                                        printf("ERR: No FFT Size given!\n\n");
2050 2 dgisselq
                                                        usage(); exit(-1);
2051
                                                }
2052
                                                fftsize = atoi(argv[++argn]);
2053
                                                { int sln = strlen(argv[argn]);
2054
                                                if (!isdigit(argv[argn][sln-1])){
2055
                                                        switch(argv[argn][sln-1]) {
2056
                                                        case 'k': case 'K':
2057
                                                                fftsize <<= 10;
2058
                                                                break;
2059
                                                        case 'm': case 'M':
2060
                                                                fftsize <<= 20;
2061
                                                                break;
2062
                                                        case 'g': case 'G':
2063
                                                                fftsize <<= 30;
2064
                                                                break;
2065
                                                        default:
2066 19 dgisselq
                                                                printf("ERR: Unknown FFT size, %s!\n", argv[argn]);
2067 2 dgisselq
                                                                exit(-1);
2068
                                                        }
2069
                                                }}
2070
                                                j += 200;
2071
                                                break;
2072
                                        case 'h':
2073
                                                usage();
2074
                                                exit(0);
2075
                                                break;
2076
                                        case 'i':
2077 26 dgisselq
                                                inverse = true;
2078 2 dgisselq
                                                break;
2079
                                        case 'm':
2080
                                                if (argn+1 >= argc) {
2081 19 dgisselq
                                                        printf("ERR: No maximum output bit value given!\n\n");
2082 2 dgisselq
                                                        exit(-1);
2083
                                                }
2084
                                                maxbitsout = atoi(argv[++argn]);
2085
                                                j += 200;
2086
                                                break;
2087
                                        case 'n':
2088
                                                if (argn+1 >= argc) {
2089 19 dgisselq
                                                        printf("ERR: No input bit size given!\n\n");
2090 2 dgisselq
                                                        exit(-1);
2091
                                                }
2092
                                                nbitsin = atoi(argv[++argn]);
2093
                                                j += 200;
2094
                                                break;
2095 22 dgisselq
                                        case 'p':
2096
                                                if (argn+1 >= argc) {
2097
                                                        printf("ERR: No number given for number of hardware multiply stages!\n\n");
2098
                                                        exit(-1);
2099
                                                }
2100
                                                nummpy = atoi(argv[++argn]);
2101
                                                j += 200;
2102
                                                break;
2103 26 dgisselq
                                        case 'r':
2104
                                                real_fft = true;
2105
                                                break;
2106 2 dgisselq
                                        case 'S':
2107
                                                bitreverse = true;
2108
                                                break;
2109
                                        case 's':
2110
                                                bitreverse = false;
2111
                                                break;
2112 19 dgisselq
                                        case 'x':
2113
                                                if (argn+1 >= argc) {
2114
                                                        printf("ERR: No extra number of bits given!\n\n");
2115
                                                        usage(); exit(-1);
2116
                                                } j+= 200;
2117
                                                xtrapbits = atoi(argv[++argn]);
2118
                                                break;
2119 2 dgisselq
                                        case 'v':
2120
                                                verbose_flag = true;
2121
                                                break;
2122
                                        default:
2123
                                                printf("Unknown argument, -%c\n", argv[argn][j]);
2124
                                                usage();
2125
                                                exit(-1);
2126
                                }
2127
                        }
2128
                } else {
2129
                        printf("Unrecognized argument, %s\n", argv[argn]);
2130
                        usage();
2131
                        exit(-1);
2132
                }
2133
        }
2134
 
2135 26 dgisselq
        if (real_fft) {
2136
                printf("The real FFT option is not implemented yet, but still on\nmy to do list.  Please try again later.\n");
2137
                exit(0);
2138
        } if (single_clock) {
2139
                printf("The single clock FFT option is not implemented yet, but still on\nmy to do list.  Please try again later.\n");
2140
                exit(0);
2141
        } if (!bitreverse) {
2142
                printf("WARNING: While I can skip the bit reverse stage, the code to do\n");
2143
                printf("an inverse FFT on a bit--reversed input has not yet been\n");
2144
                printf("built.\n");
2145
        }
2146
 
2147 2 dgisselq
        if ((lgsize < 0)&&(fftsize > 1)) {
2148
                for(lgsize=1; (1<<lgsize) < fftsize; lgsize++)
2149
                        ;
2150
        }
2151
 
2152
        if ((fftsize <= 0)||(nbitsin < 1)||(nbitsin>48)) {
2153
                printf("INVALID PARAMETERS!!!!\n");
2154
                exit(-1);
2155
        }
2156
 
2157
 
2158
        if (nextlg(fftsize) != fftsize) {
2159
                fprintf(stderr, "ERR: FFTSize (%d) *must* be a power of two\n",
2160
                                fftsize);
2161
                exit(-1);
2162
        } else if (fftsize < 2) {
2163
                fprintf(stderr, "ERR: Minimum FFTSize is 2, not %d\n",
2164
                                fftsize);
2165
                if (fftsize == 1) {
2166
                        fprintf(stderr, "You do realize that a 1 point FFT makes very little sense\n");
2167
                        fprintf(stderr, "in an FFT operation that handles two samples per clock?\n");
2168
                        fprintf(stderr, "If you really need to do an FFT of this size, the output\n");
2169
                        fprintf(stderr, "can be connected straight to the input.\n");
2170
                } else {
2171
                        fprintf(stderr, "Indeed, a size of %d doesn\'t make much sense to me at all.\n", fftsize);
2172
                        fprintf(stderr, "Is such an operation even defined?\n");
2173
                }
2174
                exit(-1);
2175
        }
2176
 
2177
        // Calculate how many output bits we'll have, and what the log
2178
        // based two size of our FFT is.
2179
        {
2180
                int     tmp_size = fftsize;
2181
 
2182
                // The first stage always accumulates one bit, regardless
2183
                // of whether you need to or not.
2184
                nbitsout = nbitsin + 1;
2185
                tmp_size >>= 1;
2186
 
2187
                while(tmp_size > 4) {
2188
                        nbitsout += 1;
2189
                        tmp_size >>= 2;
2190
                }
2191
 
2192
                if (tmp_size > 1)
2193
                        nbitsout ++;
2194
 
2195
                if (fftsize <= 2)
2196
                        bitreverse = false;
2197
        } if ((maxbitsout > 0)&&(nbitsout > maxbitsout))
2198
                nbitsout = maxbitsout;
2199
 
2200 22 dgisselq
        // Figure out how many multiply stages to use, and how many to skip
2201
        {
2202
                int     lgv = lgval(fftsize);
2203 2 dgisselq
 
2204 22 dgisselq
                nonmpy = lgv - nummpy;
2205
                if (nonmpy < 2) nonmpy = 2;
2206
                nummpy = lgv - nonmpy;
2207
        }
2208
 
2209 2 dgisselq
        {
2210
                struct stat     sbuf;
2211 14 dgisselq
                if (lstat(coredir.c_str(), &sbuf)==0) {
2212 2 dgisselq
                        if (!S_ISDIR(sbuf.st_mode)) {
2213 14 dgisselq
                                fprintf(stderr, "\'%s\' already exists, and is not a directory!\n", coredir.c_str());
2214 2 dgisselq
                                fprintf(stderr, "I will stop now, lest I overwrite something you care about.\n");
2215
                                fprintf(stderr, "To try again, please remove this file.\n");
2216
                                exit(-1);
2217
                        }
2218
                } else
2219 14 dgisselq
                        mkdir(coredir.c_str(), 0755);
2220
                if (access(coredir.c_str(), X_OK|W_OK) != 0) {
2221
                        fprintf(stderr, "I have no access to the directory \'%s\'.\n", coredir.c_str());
2222 2 dgisselq
                        exit(-1);
2223
                }
2224
        }
2225
 
2226 28 dgisselq
        if (hdrname.length() > 0) {
2227
                FILE    *hdr = fopen(hdrname.c_str(), "w");
2228
                if (hdr == NULL) {
2229
                        fprintf(stderr, "ERROR: Cannot open %s to create header file\n", hdrname.c_str());
2230
                        perror("O/S Err:");
2231
                        exit(-2);
2232
                }
2233
 
2234
                fprintf(hdr, "/////////////////////////////////////////////////////////////////////////////\n");
2235
                fprintf(hdr, "//\n");
2236
                fprintf(hdr, "// Filename:      %s\n", hdrname.c_str());
2237
                fprintf(hdr, "//\n");
2238
                fprintf(hdr, "// Project:       %s\n", prjname);
2239
                fprintf(hdr, "//\n");
2240
                fprintf(hdr, "// Purpose:       This simple header file captures the internal constants\n");
2241
                fprintf(hdr, "//                within the FFT that were used to build it, for the purpose\n");
2242
                fprintf(hdr, "//                of making C++ integration (and test bench testing) simpler.  That\n");
2243
                fprintf(hdr, "//                is, should the FFT change size, this will note that size change\n");
2244
                fprintf(hdr, "//                and thus any test bench or other C++ program dependent upon\n");
2245
                fprintf(hdr, "//                either the size of the FFT, the number of bits in or out of\n");
2246
                fprintf(hdr, "//                it, etc., can pick up the changes in the defines found within\n");
2247
                fprintf(hdr, "//                this file.\n");
2248
                fprintf(hdr, "//\n");
2249
                fprintf(hdr, "%s", creator);
2250
                fprintf(hdr, "//\n");
2251
                fprintf(hdr, "%s", cpyleft);
2252
                fprintf(hdr, "//\n"
2253
                "//\n"
2254
                "#ifndef %sFFTHDR_H\n"
2255
                "#define %sFFTHDR_H\n"
2256
                "\n"
2257
                "#define\t%sFFT_IWIDTH\t%d\n"
2258
                "#define\t%sFFT_OWIDTH\t%d\n"
2259
                "#define\t%sFFT_LGWIDTH\t%d\n"
2260
                "#define\t%sFFT_SIZE\t(1<<%sFFT_LGWIDTH)\n\n",
2261
                        (inverse)?"I":"", (inverse)?"I":"",
2262
                        (inverse)?"I":"", nbitsin,
2263
                        (inverse)?"I":"", nbitsout,
2264
                        (inverse)?"I":"", lgsize,
2265
                        (inverse)?"I":"", (inverse)?"I":"");
2266
                if (!bitreverse)
2267
                        fprintf(hdr, "#define\t%sFFT_SKIPS_BIT_REVERSE\n",
2268
                                (inverse)?"I":"");
2269
                if (real_fft)
2270
                        fprintf(hdr, "#define\tRL%sFFT\n\n", (inverse)?"I":"");
2271
                if (!single_clock)
2272
                        fprintf(hdr, "#define\tDBLCLK%sFFT\n\n", (inverse)?"I":"");
2273
                fprintf(hdr, "\n" "#endif\n\n");
2274
                fclose(hdr);
2275
        }
2276
 
2277 14 dgisselq
        {
2278
                std::string     fname_string;
2279
 
2280
                fname_string = coredir;
2281
                fname_string += "/";
2282
                if (inverse) fname_string += "i";
2283
                fname_string += "fftmain.v";
2284
 
2285
                vmain = fopen(fname_string.c_str(), "w");
2286
                if (NULL == vmain) {
2287
                        fprintf(stderr, "Could not open \'%s\' for writing\n", fname_string.c_str());
2288
                        perror("Err from O/S:");
2289
                        exit(-1);
2290
                }
2291 2 dgisselq
        }
2292
 
2293
        fprintf(vmain, "/////////////////////////////////////////////////////////////////////////////\n");
2294
        fprintf(vmain, "//\n");
2295
        fprintf(vmain, "// Filename:    %sfftmain.v\n", (inverse)?"i":"");
2296
        fprintf(vmain, "//\n");
2297
        fprintf(vmain, "// Project:     %s\n", prjname);
2298
        fprintf(vmain, "//\n");
2299
        fprintf(vmain, "// Purpose:     This is the main module in the Doubletime FPGA FFT project.\n");
2300
        fprintf(vmain, "//              As such, all other modules are subordinate to this one.\n");
2301
        fprintf(vmain, "//              (I have been reading too much legalese this week ...)\n");
2302
        fprintf(vmain, "//              This module accomplish a fixed size Complex FFT on %d data\n", fftsize);
2303
        fprintf(vmain, "//              points.  The FFT is fully pipelined, and accepts as inputs\n");
2304
        fprintf(vmain, "//              two complex two\'s complement samples per clock.\n");
2305
        fprintf(vmain, "//\n");
2306
        fprintf(vmain, "// Parameters:\n");
2307
        fprintf(vmain, "//      i_clk\tThe clock.  All operations are synchronous with this clock.\n");
2308
        fprintf(vmain, "//\ti_rst\tSynchronous reset, active high.  Setting this line will\n");
2309
        fprintf(vmain, "//\t\t\tforce the reset of all of the internals to this routine.\n");
2310
        fprintf(vmain, "//\t\t\tFurther, following a reset, the o_sync line will go\n");
2311
        fprintf(vmain, "//\t\t\thigh the same time the first output sample is valid.\n");
2312
        fprintf(vmain, "//      i_ce\tA clock enable line.  If this line is set, this module\n");
2313
        fprintf(vmain, "//\t\t\twill accept two complex values as inputs, and produce\n");
2314
        fprintf(vmain, "//\t\t\ttwo (possibly empty) complex values as outputs.\n");
2315
        fprintf(vmain, "//\t\ti_left\tThe first of two complex input samples.  This value\n");
2316
        fprintf(vmain, "//\t\t\tis split into two two\'s complement numbers, of \n");
2317
        fprintf(vmain, "//\t\t\t%d bits each, with the real portion in the high\n", nbitsin);
2318
        fprintf(vmain, "//\t\t\torder bits, and the imaginary portion taking the\n");
2319
        fprintf(vmain, "//\t\t\tbottom %d bits.\n", nbitsin);
2320
        fprintf(vmain, "//\t\ti_right\tThis is the same thing as i_left, only this is the\n");
2321
        fprintf(vmain, "//\t\t\tsecond of two such samples.  Hence, i_left would\n");
2322
        fprintf(vmain, "//\t\t\tcontain input sample zero, i_right would contain\n");
2323
        fprintf(vmain, "//\t\t\tsample one.  On the next clock i_left would contain\n");
2324
        fprintf(vmain, "//\t\t\tinput sample two, i_right number three and so forth.\n");
2325
        fprintf(vmain, "//\t\to_left\tThe first of two output samples, of the same\n");
2326
        fprintf(vmain, "//\t\t\tformat as i_left, only having %d bits for each of\n", nbitsout);
2327
        fprintf(vmain, "//\t\t\tthe real and imaginary components, leading to %d\n", nbitsout*2);
2328
        fprintf(vmain, "//\t\t\tbits total.\n");
2329
        fprintf(vmain, "//\t\to_right\tThe second of two output samples produced each clock.\n");
2330
        fprintf(vmain, "//\t\t\tThis has the same format as o_left.\n");
2331
        fprintf(vmain, "//\t\to_sync\tA one bit output indicating the first valid sample\n");
2332
        fprintf(vmain, "//\t\t\tproduced by this FFT following a reset.  Ever after,\n");
2333
        fprintf(vmain, "//\t\t\tthis will indicate the first sample of an FFT frame.\n");
2334
        fprintf(vmain, "//\n");
2335 14 dgisselq
        fprintf(vmain, "// Arguments:\tThis file was computer generated using the\n");
2336
        fprintf(vmain, "//\t\tfollowing command line:\n");
2337
        fprintf(vmain, "//\n");
2338
        fprintf(vmain, "//\t\t%% %s\n", cmdline.c_str());
2339
        fprintf(vmain, "//\n");
2340 2 dgisselq
        fprintf(vmain, "%s", creator);
2341
        fprintf(vmain, "//\n");
2342
        fprintf(vmain, "%s", cpyleft);
2343
 
2344
 
2345
        fprintf(vmain, "//\n");
2346
        fprintf(vmain, "//\n");
2347
        fprintf(vmain, "module %sfftmain(i_clk, i_rst, i_ce,\n", (inverse)?"i":"");
2348
        fprintf(vmain, "\t\ti_left, i_right,\n");
2349 26 dgisselq
        fprintf(vmain, "\t\to_left, o_right, o_sync%s);\n",
2350
                        (dbg)?", o_dbg":"");
2351 2 dgisselq
        fprintf(vmain, "\tparameter\tIWIDTH=%d, OWIDTH=%d, LGWIDTH=%d;\n", nbitsin, nbitsout, lgsize);
2352
        assert(lgsize > 0);
2353
        fprintf(vmain, "\tinput\t\ti_clk, i_rst, i_ce;\n");
2354
        fprintf(vmain, "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n");
2355
        fprintf(vmain, "\toutput\treg\t[(2*OWIDTH-1):0]\to_left, o_right;\n");
2356
        fprintf(vmain, "\toutput\treg\t\t\to_sync;\n");
2357 26 dgisselq
        if (dbg)
2358
                fprintf(vmain, "\toutput\twire\t[33:0]\t\to_dbg;\n");
2359 2 dgisselq
        fprintf(vmain, "\n\n");
2360
 
2361
        fprintf(vmain, "\t// Outputs of the FFT, ready for bit reversal.\n");
2362
        fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_left, br_right;\n");
2363
        fprintf(vmain, "\n\n");
2364
 
2365
        int     tmp_size = fftsize, lgtmp = lgsize;
2366
        if (fftsize == 2) {
2367
                if (bitreverse) {
2368
                        fprintf(vmain, "\treg\tbr_start;\n");
2369 25 dgisselq
                        fprintf(vmain, "\tinitial br_start = 1\'b0;\n");
2370 2 dgisselq
                        fprintf(vmain, "\talways @(posedge i_clk)\n");
2371
                        fprintf(vmain, "\t\tif (i_rst)\n");
2372 26 dgisselq
                        fprintf(vmain, "\t\t\tbr_start <= 1\'b0;\n");
2373 2 dgisselq
                        fprintf(vmain, "\t\telse if (i_ce)\n");
2374 26 dgisselq
                        fprintf(vmain, "\t\t\tbr_start <= 1\'b1;\n");
2375 2 dgisselq
                }
2376
                fprintf(vmain, "\n\n");
2377 6 dgisselq
                fprintf(vmain, "\tdblstage\t#(IWIDTH)\tstage_2(i_clk, i_rst, i_ce,\n");
2378
                fprintf(vmain, "\t\t\t(~i_rst), i_left, i_right, br_left, br_right);\n");
2379 2 dgisselq
                fprintf(vmain, "\n\n");
2380
        } else {
2381
                int     nbits = nbitsin, dropbit=0;
2382 26 dgisselq
                int     obits = nbits+1+xtrapbits;
2383
 
2384
                if ((maxbitsout > 0)&&(obits > maxbitsout))
2385
                        obits = maxbitsout;
2386
 
2387 2 dgisselq
                // Always do a first stage
2388 14 dgisselq
                {
2389 22 dgisselq
                        bool    mpystage;
2390 2 dgisselq
 
2391 22 dgisselq
                        // Last two stages are always non-multiply stages
2392
                        // since the multiplies can be done by adds
2393
                        mpystage = ((lgtmp-2) <= nummpy);
2394
 
2395 28 dgisselq
                        if (mpystage)
2396
                                fprintf(vmain, "\t// A hardware optimized FFT stage\n");
2397
                        fprintf(vmain, "\n\n");
2398
                        fprintf(vmain, "\twire\t\tw_s%d, w_os%d;\n", fftsize, fftsize);
2399
                        fprintf(vmain, "\twire\t[%d:0]\tw_e%d, w_o%d;\n", 2*(obits+xtrapbits)-1, fftsize, fftsize);
2400
                        fprintf(vmain, "\t%sfftstage_e%d%s\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,%d,0)\tstage_e%d(i_clk, i_rst, i_ce,\n",
2401
                                (inverse)?"i":"", fftsize,
2402
                                        ((dbg)&&(dbgstage == fftsize))?"_dbg":"",
2403
                                xtracbits, obits+xtrapbits,
2404
                                lgsize, lgtmp-2, lgdelay(nbits,xtracbits),
2405
                                fftsize);
2406
                        fprintf(vmain, "\t\t\t(~i_rst), i_left, w_e%d, w_s%d%s);\n", fftsize, fftsize, ((dbg)&&(dbgstage == fftsize))?", o_dbg":"");
2407
                        fprintf(vmain, "\t%sfftstage_o%d\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,%d,0)\tstage_o%d(i_clk, i_rst, i_ce,\n",
2408
                                (inverse)?"i":"", fftsize,
2409
                                xtracbits, obits+xtrapbits,
2410
                                lgsize, lgtmp-2, lgdelay(nbits,xtracbits),
2411
                                fftsize);
2412
                        fprintf(vmain, "\t\t\t(~i_rst), i_right, w_o%d, w_os%d);\n", fftsize, fftsize);
2413
                        fprintf(vmain, "\n\n");
2414
 
2415
 
2416
                        std::string     fname;
2417
                        char    numstr[12];
2418
 
2419 14 dgisselq
                        fname = coredir + "/";
2420
                        if (inverse) fname += "i";
2421
                        fname += "fftstage_e";
2422
                        sprintf(numstr, "%d", fftsize);
2423
                        fname += numstr;
2424 26 dgisselq
                        if ((dbg)&&(dbgstage == fftsize))
2425
                                fname += "_dbg";
2426 14 dgisselq
                        fname += ".v";
2427 26 dgisselq
                        build_stage(fname.c_str(), coredir.c_str(), fftsize/2, 0, nbits, inverse, xtracbits, mpystage, (dbg)&&(dbgstage == fftsize));    // Even stage
2428 14 dgisselq
 
2429
                        fname = coredir + "/";
2430
                        if (inverse) fname += "i";
2431
                        fname += "fftstage_o";
2432
                        sprintf(numstr, "%d", fftsize);
2433
                        fname += numstr;
2434
                        fname += ".v";
2435 26 dgisselq
                        build_stage(fname.c_str(), coredir.c_str(), fftsize/2, 1, nbits, inverse, xtracbits, mpystage, false);  // Odd  stage
2436 14 dgisselq
                }
2437
 
2438 26 dgisselq
                nbits = obits;  // New number of input bits
2439 2 dgisselq
                tmp_size >>= 1; lgtmp--;
2440
                dropbit = 0;
2441
                fprintf(vmain, "\n\n");
2442
                while(tmp_size >= 8) {
2443 26 dgisselq
                        obits = nbits+((dropbit)?0:1);
2444 2 dgisselq
 
2445
                        if ((maxbitsout > 0)&&(obits > maxbitsout))
2446
                                obits = maxbitsout;
2447
 
2448 14 dgisselq
                        {
2449 22 dgisselq
                                bool            mpystage;
2450 2 dgisselq
 
2451 22 dgisselq
                                mpystage = ((lgtmp-2) <= nummpy);
2452
 
2453 28 dgisselq
                                if (mpystage)
2454
                                        fprintf(vmain, "\t// A hardware optimized FFT stage\n");
2455
                                fprintf(vmain, "\twire\t\tw_s%d, w_os%d;\n",
2456
                                        tmp_size, tmp_size);
2457
                                fprintf(vmain,"\twire\t[%d:0]\tw_e%d, w_o%d;\n",
2458
                                        2*(obits+xtrapbits)-1,
2459
                                        tmp_size, tmp_size);
2460
                                fprintf(vmain, "\t%sfftstage_e%d%s\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_e%d(i_clk, i_rst, i_ce,\n",
2461
                                        (inverse)?"i":"", tmp_size,
2462
                                        ((dbg)&&(dbgstage==tmp_size))?"_dbg":"",
2463
                                        nbits+xtrapbits,
2464
                                        nbits+xtracbits+xtrapbits,
2465
                                        obits+xtrapbits,
2466
                                        lgsize, lgtmp-2,
2467
                                        lgdelay(nbits+xtrapbits,xtracbits),
2468
                                        (dropbit)?0:0, tmp_size);
2469
                                fprintf(vmain, "\t\t\t\t\t\tw_s%d, w_e%d, w_e%d, w_s%d%s);\n",
2470
                                        tmp_size<<1, tmp_size<<1,
2471
                                        tmp_size, tmp_size,
2472
                                        ((dbg)&&(dbgstage == tmp_size))
2473
                                                ?", o_dbg":"");
2474
                                fprintf(vmain, "\t%sfftstage_o%d\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_o%d(i_clk, i_rst, i_ce,\n",
2475
                                        (inverse)?"i":"", tmp_size,
2476
                                        nbits+xtrapbits,
2477
                                        nbits+xtracbits+xtrapbits,
2478
                                        obits+xtrapbits,
2479
                                        lgsize, lgtmp-2,
2480
                                        lgdelay(nbits+xtrapbits,xtracbits),
2481
                                        (dropbit)?0:0, tmp_size);
2482
                                fprintf(vmain, "\t\t\t\t\t\tw_s%d, w_o%d, w_o%d, w_os%d);\n",
2483
                                        tmp_size<<1, tmp_size<<1,
2484
                                        tmp_size, tmp_size);
2485
                                fprintf(vmain, "\n\n");
2486
 
2487
                                std::string     fname;
2488
                                char            numstr[12];
2489
 
2490 14 dgisselq
                                fname = coredir + "/";
2491
                                if (inverse) fname += "i";
2492
                                fname += "fftstage_e";
2493
                                sprintf(numstr, "%d", tmp_size);
2494
                                fname += numstr;
2495 26 dgisselq
                                if ((dbg)&&(dbgstage == tmp_size))
2496
                                        fname += "_dbg";
2497 14 dgisselq
                                fname += ".v";
2498 26 dgisselq
                                build_stage(fname.c_str(), coredir.c_str(), tmp_size/2, 0,
2499 22 dgisselq
                                        nbits+xtrapbits, inverse, xtracbits,
2500 26 dgisselq
                                        mpystage, ((dbg)&&(dbgstage == tmp_size)));     // Even stage
2501 2 dgisselq
 
2502 14 dgisselq
                                fname = coredir + "/";
2503
                                if (inverse) fname += "i";
2504
                                fname += "fftstage_o";
2505
                                sprintf(numstr, "%d", tmp_size);
2506
                                fname += numstr;
2507
                                fname += ".v";
2508 26 dgisselq
                                build_stage(fname.c_str(), coredir.c_str(), tmp_size/2, 1,
2509 22 dgisselq
                                        nbits+xtrapbits, inverse, xtracbits,
2510 26 dgisselq
                                        mpystage, false);       // Odd  stage
2511 14 dgisselq
                        }
2512
 
2513
 
2514 2 dgisselq
                        dropbit ^= 1;
2515
                        nbits = obits;
2516
                        tmp_size >>= 1; lgtmp--;
2517
                }
2518
 
2519
                if (tmp_size == 4) {
2520 26 dgisselq
                        obits = nbits+((dropbit)?0:1);
2521 2 dgisselq
 
2522
                        if ((maxbitsout > 0)&&(obits > maxbitsout))
2523
                                obits = maxbitsout;
2524
 
2525
                        fprintf(vmain, "\twire\t\tw_s4, w_os4;\n");
2526 19 dgisselq
                        fprintf(vmain, "\twire\t[%d:0]\tw_e4, w_o4;\n", 2*(obits+xtrapbits)-1);
2527 26 dgisselq
                        fprintf(vmain, "\tqtrstage%s\t#(%d,%d,%d,0,%d,%d)\tstage_e4(i_clk, i_rst, i_ce,\n",
2528
                                ((dbg)&&(dbgstage==4))?"_dbg":"",
2529
                                nbits+xtrapbits, obits+xtrapbits, lgsize,
2530
                                (inverse)?1:0, (dropbit)?0:0);
2531
                        fprintf(vmain, "\t\t\t\t\t\tw_s8, w_e8, w_e4, w_s4%s);\n",
2532
                                ((dbg)&&(dbgstage==4))?", o_dbg":"");
2533 2 dgisselq
                        fprintf(vmain, "\tqtrstage\t#(%d,%d,%d,1,%d,%d)\tstage_o4(i_clk, i_rst, i_ce,\n",
2534 19 dgisselq
                                nbits+xtrapbits, obits+xtrapbits, lgsize, (inverse)?1:0, (dropbit)?0:0);
2535 6 dgisselq
                        fprintf(vmain, "\t\t\t\t\t\tw_s8, w_o8, w_o4, w_os4);\n");
2536 2 dgisselq
                        dropbit ^= 1;
2537
                        nbits = obits;
2538
                        tmp_size >>= 1; lgtmp--;
2539
                }
2540
 
2541
                {
2542 26 dgisselq
                        obits = nbits+((dropbit)?0:1);
2543 2 dgisselq
                        if (obits > nbitsout)
2544
                                obits = nbitsout;
2545
                        if ((maxbitsout>0)&&(obits > maxbitsout))
2546
                                obits = maxbitsout;
2547
                        fprintf(vmain, "\twire\t\tw_s2;\n");
2548
                        fprintf(vmain, "\twire\t[%d:0]\tw_e2, w_o2;\n", 2*obits-1);
2549 28 dgisselq
                        if ((nbits+xtrapbits+1 == obits)&&(!dropbit))
2550
                                printf("WARNING: SCALING OFF BY A FACTOR OF TWO--should\'ve dropped a bit in the last stage.\n");
2551 19 dgisselq
                        fprintf(vmain, "\tdblstage\t#(%d,%d,%d)\tstage_2(i_clk, i_rst, i_ce,\n", nbits+xtrapbits, obits,(dropbit)?0:1);
2552 6 dgisselq
                        fprintf(vmain, "\t\t\t\t\tw_s4, w_e4, w_o4, w_e2, w_o2, w_s2);\n");
2553 2 dgisselq
 
2554
                        fprintf(vmain, "\n\n");
2555
                        nbits = obits;
2556
                }
2557
 
2558
                fprintf(vmain, "\t// Prepare for a (potential) bit-reverse stage.\n");
2559
                fprintf(vmain, "\tassign\tbr_left  = w_e2;\n");
2560
                fprintf(vmain, "\tassign\tbr_right = w_o2;\n");
2561
                fprintf(vmain, "\n");
2562
                if (bitreverse) {
2563
                        fprintf(vmain, "\twire\tbr_start;\n");
2564
                        fprintf(vmain, "\treg\tr_br_started;\n");
2565 25 dgisselq
                        fprintf(vmain, "\tinitial\tr_br_started = 1\'b0;\n");
2566 2 dgisselq
                        fprintf(vmain, "\talways @(posedge i_clk)\n");
2567
                        fprintf(vmain, "\t\tif (i_rst)\n");
2568 26 dgisselq
                        fprintf(vmain, "\t\t\tr_br_started <= 1\'b0;\n");
2569
                        fprintf(vmain, "\t\telse if (i_ce)\n");
2570 23 dgisselq
                        fprintf(vmain, "\t\t\tr_br_started <= r_br_started || w_s2;\n");
2571
                        fprintf(vmain, "\tassign\tbr_start = r_br_started || w_s2;\n");
2572 2 dgisselq
                }
2573
        }
2574
 
2575
        fprintf(vmain, "\n");
2576
        fprintf(vmain, "\t// Now for the bit-reversal stage.\n");
2577
        fprintf(vmain, "\twire\tbr_sync;\n");
2578
        fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_o_left, br_o_right;\n");
2579
        if (bitreverse) {
2580
                fprintf(vmain, "\tdblreverse\t#(%d,%d)\trevstage(i_clk, i_rst,\n", lgsize, nbitsout);
2581
                fprintf(vmain, "\t\t\t(i_ce & br_start), br_left, br_right,\n");
2582
                fprintf(vmain, "\t\t\tbr_o_left, br_o_right, br_sync);\n");
2583
        } else {
2584
                fprintf(vmain, "\tassign\tbr_o_left  = br_left;\n");
2585
                fprintf(vmain, "\tassign\tbr_o_right = br_right;\n");
2586
                fprintf(vmain, "\tassign\tbr_sync    = w_s2;\n");
2587
        }
2588
 
2589
        fprintf(vmain, "\n\n");
2590
        fprintf(vmain, "\t// Last clock: Register our outputs, we\'re done.\n");
2591 26 dgisselq
        fprintf(vmain, "\tinitial\to_sync  = 1\'b0;\n");
2592 2 dgisselq
        fprintf(vmain, "\talways @(posedge i_clk)\n");
2593 26 dgisselq
        fprintf(vmain, "\t\tif (i_rst)\n");
2594
        fprintf(vmain, "\t\t\to_sync  <= 1\'b0;\n");
2595
        fprintf(vmain, "\t\telse if (i_ce)\n");
2596
        fprintf(vmain, "\t\t\to_sync  <= br_sync;\n");
2597
        fprintf(vmain, "\n");
2598
        fprintf(vmain, "\talways @(posedge i_clk)\n");
2599
        fprintf(vmain, "\t\tif (i_ce)\n");
2600 2 dgisselq
        fprintf(vmain, "\t\tbegin\n");
2601
        fprintf(vmain, "\t\t\to_left  <= br_o_left;\n");
2602
        fprintf(vmain, "\t\t\to_right <= br_o_right;\n");
2603
        fprintf(vmain, "\t\tend\n");
2604
        fprintf(vmain, "\n\n");
2605
        fprintf(vmain, "endmodule\n");
2606
        fclose(vmain);
2607
 
2608 14 dgisselq
        {
2609
                std::string     fname;
2610 2 dgisselq
 
2611 14 dgisselq
                fname = coredir + "/butterfly.v";
2612 23 dgisselq
                build_butterfly(fname.c_str(), xtracbits, rounding);
2613 2 dgisselq
 
2614 22 dgisselq
                if (nummpy > 0) {
2615
                        fname = coredir + "/hwbfly.v";
2616 23 dgisselq
                        build_hwbfly(fname.c_str(), xtracbits, rounding);
2617 22 dgisselq
                }
2618
 
2619 14 dgisselq
                fname = coredir + "/shiftaddmpy.v";
2620
                build_multiply(fname.c_str());
2621 2 dgisselq
 
2622 26 dgisselq
                if ((dbg)&&(dbgstage == 4)) {
2623
                        fname = coredir + "/qtrstage_dbg.v";
2624
                        build_quarters(fname.c_str(), rounding, true);
2625
                }
2626 14 dgisselq
                fname = coredir + "/qtrstage.v";
2627 26 dgisselq
                build_quarters(fname.c_str(), rounding, false);
2628 2 dgisselq
 
2629 26 dgisselq
                if ((dbg)&&(dbgstage == 2))
2630
                        fname = coredir + "/dblstage_dbg.v";
2631
                else
2632
                        fname = coredir + "/dblstage.v";
2633
                build_dblstage(fname.c_str(), rounding, (dbg)&&(dbgstage==2));
2634 14 dgisselq
 
2635
                if (bitreverse) {
2636
                        fname = coredir + "/dblreverse.v";
2637
                        build_dblreverse(fname.c_str());
2638
                }
2639 23 dgisselq
 
2640
                const   char    *rnd_string = "";
2641
                switch(rounding) {
2642
                        case RND_TRUNCATE:      rnd_string = "/truncate.v"; break;
2643
                        case RND_FROMZERO:      rnd_string = "/roundfromzero.v"; break;
2644
                        case RND_HALFUP:        rnd_string = "/roundhalfup.v"; break;
2645
                        default:
2646
                                rnd_string = "/convround.v"; break;
2647
                } fname = coredir + rnd_string;
2648
                switch(rounding) {
2649
                        case RND_TRUNCATE: build_truncator(fname.c_str()); break;
2650
                        case RND_FROMZERO: build_roundfromzero(fname.c_str()); break;
2651
                        case RND_HALFUP: build_roundhalfup(fname.c_str()); break;
2652
                        default:
2653
                                build_convround(fname.c_str()); break;
2654
                }
2655
 
2656 2 dgisselq
        }
2657
}
2658
 
2659 16 dgisselq
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.