OpenCores
URL https://opencores.org/ocsvn/dblclockfft/dblclockfft/trunk

Subversion Repositories dblclockfft

[/] [dblclockfft/] [trunk/] [sw/] [fftgen.cpp] - Blame information for rev 26

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 16 dgisselq
/////////////////////////////////////////////////////////////////////////////
2
//
3 24 dgisselq
// Filename:    fftgen.cpp
4 16 dgisselq
//
5
// Project:     A Doubletime Pipelined FFT
6
//
7
// Purpose:     This is the core generator for the project.  Every part
8
//              and piece of this project begins and ends in this program.
9
//              Once built, this program will build an FFT (or IFFT) core
10
//              of arbitrary width, precision, etc., that will run at
11
//              two samples per clock.  (Incidentally, I didn't pick two
12
//              samples per clock because it was easier, but rather because
13
//              there weren't any two-sample per clock FFT's posted on 
14
//              opencores.com.  Further, FFT's running at one sample per
15
//              clock aren't that hard to find.)
16
//
17
//              You can find the documentation for this program in two places.
18
//              One is in the usage() function below.  The second is in the
19
//              'doc'uments directory that comes with this package, 
20
//              specifically in the spec.pdf file.  If it's not there, type
21
//              make in the documents directory to build it.
22
//
23
// Creator:     Dan Gisselquist, Ph.D.
24
//              Gisselquist Tecnology, LLC
25
//
26
///////////////////////////////////////////////////////////////////////////
27
//
28
// Copyright (C) 2015, Gisselquist Technology, LLC
29
//
30
// This program is free software (firmware): you can redistribute it and/or
31
// modify it under the terms of  the GNU General Public License as published
32
// by the Free Software Foundation, either version 3 of the License, or (at
33
// your option) any later version.
34
//
35
// This program is distributed in the hope that it will be useful, but WITHOUT
36
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
37
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
38
// for more details.
39
//
40
// You should have received a copy of the GNU General Public License along
41
// with this program.  (It's in the $(ROOT)/doc directory, run make with no
42
// target there if the PDF file isn't present.)  If not, see
43
// <http://www.gnu.org/licenses/> for a copy.
44
//
45
// License:     GPL, v3, as defined and found on www.gnu.org,
46
//              http://www.gnu.org/licenses/gpl.html
47
//
48
//
49
///////////////////////////////////////////////////////////////////////////
50
//
51
//
52 2 dgisselq
#include <stdio.h>
53
#include <stdlib.h>
54
#include <unistd.h>
55
#include <sys/stat.h>
56
#include <string.h>
57 14 dgisselq
#include <string>
58 2 dgisselq
#include <math.h>
59
#include <ctype.h>
60
#include <assert.h>
61
 
62 26 dgisselq
#define DEF_NBITSIN     16
63
#define DEF_COREDIR     "fft-core"
64
#define DEF_XTRACBITS   4
65
#define DEF_NMPY        0
66
#define DEF_XTRAPBITS   0
67 2 dgisselq
 
68 23 dgisselq
typedef enum {
69
        RND_TRUNCATE, RND_FROMZERO, RND_HALFUP, RND_CONVERGENT
70
} ROUND_T;
71
 
72 2 dgisselq
const char      cpyleft[] =
73
"///////////////////////////////////////////////////////////////////////////\n"
74
"//\n"
75
"// Copyright (C) 2015, Gisselquist Technology, LLC\n"
76
"//\n"
77
"// This program is free software (firmware): you can redistribute it and/or\n"
78
"// modify it under the terms of  the GNU General Public License as published\n"
79
"// by the Free Software Foundation, either version 3 of the License, or (at\n"
80
"// your option) any later version.\n"
81
"//\n"
82
"// This program is distributed in the hope that it will be useful, but WITHOUT\n"
83
"// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or\n"
84
"// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n"
85
"// for more details.\n"
86
"//\n"
87
"// You should have received a copy of the GNU General Public License along\n"
88 5 dgisselq
"// with this program.  (It's in the $(ROOT)/doc directory, run make with no\n"
89
"// target there if the PDF file isn\'t present.)  If not, see\n"
90
"// <http://www.gnu.org/licenses/> for a copy.\n"
91
"//\n"
92 2 dgisselq
"// License:    GPL, v3, as defined and found on www.gnu.org,\n"
93
"//             http://www.gnu.org/licenses/gpl.html\n"
94
"//\n"
95
"//\n"
96
"///////////////////////////////////////////////////////////////////////////\n";
97 14 dgisselq
const char      prjname[] = "A Doubletime Pipelined FFT";
98 2 dgisselq
const char      creator[] =     "// Creator:    Dan Gisselquist, Ph.D.\n"
99
                                "//             Gisselquist Tecnology, LLC\n";
100
 
101
int     lgval(int vl) {
102
        int     lg;
103
 
104
        for(lg=1; (1<<lg) < vl; lg++)
105
                ;
106
        return lg;
107
}
108
 
109
int     nextlg(int vl) {
110
        int     r;
111
 
112
        for(r=1; r<vl; r<<=1)
113
                ;
114
        return r;
115
}
116
 
117 14 dgisselq
int     bflydelay(int nbits, int xtra) {
118 2 dgisselq
        int     cbits = nbits + xtra;
119 14 dgisselq
        int     delay;
120 2 dgisselq
        if (nbits+1<cbits)
121 5 dgisselq
                delay = nbits+4;
122 2 dgisselq
        else
123 5 dgisselq
                delay = cbits+3;
124 14 dgisselq
        return delay;
125 2 dgisselq
}
126
 
127 14 dgisselq
int     lgdelay(int nbits, int xtra) {
128
        // The butterfly code needs to compare a valid address, of this
129
        // many bits, with an address two greater.  This guarantees we
130
        // have enough bits for that comparison.  We'll also end up with
131
        // more storage space to look for these values, but without a 
132
        // redesign that's just what we'll deal with.
133
        return lgval(bflydelay(nbits, xtra)+3);
134
}
135
 
136 23 dgisselq
void    build_truncator(const char *fname) {
137
        printf("TRUNCATING!\n");
138 2 dgisselq
        FILE    *fp = fopen(fname, "w");
139
        if (NULL == fp) {
140
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
141
                perror("O/S Err was:");
142
                return;
143
        }
144
 
145
        fprintf(fp,
146
"///////////////////////////////////////////////////////////////////////////\n"
147
"//\n"
148 23 dgisselq
"// Filename:   truncate.v\n"
149
"//             \n"
150
"// Project:    %s\n"
151
"//\n"
152
"// Purpose:    Truncation is one of several options that can be used\n"
153
"//             internal to the various FFT stages to drop bits from one \n"
154
"//             stage to the next.  In general, it is the simplest method\n"
155
"//             of dropping bits, since it requires only a bit selection.\n"
156
"//\n"
157
"//             This form of rounding isn\'t really that great for FFT\'s,\n"
158
"//             since it tends to produce a DC bias in the result.  (Other\n"
159
"//             less pronounced biases may also exist.)\n"
160
"//\n"
161
"//             This particular version also registers the output with the\n"
162
"//             clock, so there will be a delay of one going through this\n"
163
"//             module.  This will keep it in line with the other forms of\n"
164
"//             rounding that can be used.\n"
165
"//\n"
166
"//\n%s"
167
"//\n",
168
                prjname, creator);
169
 
170
        fprintf(fp, "%s", cpyleft);
171
        fprintf(fp,
172
"module truncate(i_clk, i_ce, i_val, o_val);\n"
173
        "\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"
174
        "\tinput\t\t\t\t\ti_clk, i_ce;\n"
175
        "\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"
176
        "\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"
177
"\n"
178
        "\talways @(posedge i_clk)\n"
179
                "\t\tif (i_ce)\n"
180
                "\t\t\to_val <= i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
181
"\n"
182
"endmodule\n");
183
}
184
 
185
 
186
void    build_roundhalfup(const char *fname) {
187
        FILE    *fp = fopen(fname, "w");
188
        if (NULL == fp) {
189
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
190
                perror("O/S Err was:");
191
                return;
192
        }
193
 
194
        fprintf(fp,
195
"///////////////////////////////////////////////////////////////////////////\n"
196
"//\n"
197
"// Filename:   roundhalfup.v\n"
198
"//             \n"
199
"// Project:    %s\n"
200
"//\n"
201
"// Purpose:    Rounding half up is the way I was always taught to round in\n"
202
"//             school.  A one half value is added to the result, and then\n"
203
"//             the result is truncated.  When used in an FFT, this produces\n"
204
"//             less bias than the truncation method, although a bias still\n"
205
"//             tends to remain.\n"
206
"//\n"
207
"//\n%s"
208
"//\n",
209
                prjname, creator);
210
 
211
        fprintf(fp, "%s", cpyleft);
212
        fprintf(fp,
213
"module roundhalfup(i_clk, i_ce, i_val, o_val);\n"
214
        "\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"
215
        "\tinput\t\t\t\t\ti_clk, i_ce;\n"
216
        "\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"
217
        "\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"
218
"\n"
219
        "\t// Let's deal with two cases to be as general as we can be here\n"
220
        "\t//\n"
221
        "\t//   1. The desired output would lose no bits at all\n"
222
        "\t//   2. One or more bits would be dropped, so the rounding is simply\n"
223
        "\t//\t\ta matter of adding one to the bit about to be dropped,\n"
224
        "\t//\t\tmoving all halfway and above numbers up to the next\n"
225
        "\t//\t\tvalue.\n"
226
        "\tgenerate\n"
227
        "\tif (IWID-SHIFT == OWID)\n"
228
        "\tbegin // No truncation or rounding, output drops no bits\n"
229
"\n"
230
                "\t\talways @(posedge i_clk)\n"
231
                        "\t\t\tif (i_ce)\to_val <= i_val[(IWID-SHIFT-1):0];\n"
232
"\n"
233
        "\tend else // if (IWID-SHIFT-1 >= OWID)\n"
234
        "\tbegin // Output drops one bit, can only add one or ... not.\n"
235
                "\t\twire\t[(OWID-1):0] truncated_value, rounded_up;\n"
236
                "\t\twire\t\t\tlast_valid_bit, first_lost_bit;\n"
237
                "\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
238 26 dgisselq
                "\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n"
239 23 dgisselq
                "\t\tassign\tfirst_lost_bit = i_val[(IWID-SHIFT-OWID-1)];\n"
240
"\n"
241
                "\t\talways @(posedge i_clk)\n"
242
                "\t\t\tif (i_ce)\n"
243
                "\t\t\tbegin\n"
244
                        "\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"
245
                        "\t\t\t\t\to_val <= truncated_value;\n"
246
                        "\t\t\t\telse\n"
247
                        "\t\t\t\t\to_val <= rounded_up; // even value\n"
248
                "\t\t\tend\n"
249
"\n"
250
        "\tend\n"
251
        "\tendgenerate\n"
252
"\n"
253
"endmodule\n");
254
}
255
 
256
void    build_roundfromzero(const char *fname) {
257
        FILE    *fp = fopen(fname, "w");
258
        if (NULL == fp) {
259
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
260
                perror("O/S Err was:");
261
                return;
262
        }
263
 
264
        fprintf(fp,
265
"///////////////////////////////////////////////////////////////////////////\n"
266
"//\n"
267
"// Filename:   roundfromzero.v\n"
268
"//             \n"
269
"// Project:    %s\n"
270
"//\n"
271
"// Purpose:    Truncation is one of several options that can be used\n"
272
"//             internal to the various FFT stages to drop bits from one \n"
273
"//             stage to the next.  In general, it is the simplest method\n"
274
"//             of dropping bits, since it requires only a bit selection.\n"
275
"//\n"
276
"//             This form of rounding isn\'t really that great for FFT\'s,\n"
277
"//             since it tends to produce a DC bias in the result.  (Other\n"
278
"//             less pronounced biases may also exist.)\n"
279
"//\n"
280
"//             This particular version also registers the output with the\n"
281
"//             clock, so there will be a delay of one going through this\n"
282
"//             module.  This will keep it in line with the other forms of\n"
283
"//             rounding that can be used.\n"
284
"//\n"
285
"//\n%s"
286
"//\n",
287
                prjname, creator);
288
 
289
        fprintf(fp, "%s", cpyleft);
290
        fprintf(fp,
291
"module convround(i_clk, i_ce, i_val, o_val);\n"
292
        "\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"
293
        "\tinput\t\t\t\t\ti_clk, i_ce;\n"
294
        "\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"
295
        "\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"
296
"\n"
297
        "\t// Let's deal with three cases to be as general as we can be here\n"
298
        "\t//\n"
299
        "\t//\t1. The desired output would lose no bits at all\n"
300
        "\t//\t2. One bit would be dropped, so the rounding is simply\n"
301
        "\t//\t\tadjusting the value to be the closer to zero in\n"
302
        "\t//\t\tcases of being halfway between two.  If identically\n"
303
        "\t//\t\tequal to a number, we just leave it as is.\n"
304
        "\t//\t3. Two or more bits would be dropped.  In this case, we round\n"
305
        "\t//\t\tnormally unless we are rounding a value of exactly\n"
306
        "\t//\t\thalfway between the two.  In the halfway case, we\n"
307
        "\t//\t\tround away from zero.\n"
308
        "\tgenerate\n"
309
        "\tif (IWID-SHIFT == OWID)\n"
310
        "\tbegin // No truncation or rounding, output drops no bits\n"
311
"\n"
312
                "\t\talways @(posedge i_clk)\n"
313
                        "\t\t\tif (i_ce)\to_val <= i_val[(IWID-SHIFT-1):0];\n"
314
"\n"
315
        "\tend else if (IWID-SHIFT-1 == OWID)\n"
316
        "\tbegin // Output drops one bit, can only add one or ... not.\n"
317
        "\t\twire\t[(OWID-1):0]\ttruncated_value, rounded_up;\n"
318
        "\t\twire\t\t\tsign_bit, first_lost_bit;\n"
319
        "\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
320 26 dgisselq
        "\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n"
321 23 dgisselq
        "\t\tassign\tfirst_lost_bit = i_val[0];\n"
322
        "\t\tassign\tsign_bit = i_val[(IWID-1)];\n"
323
"\n"
324
        "\t\talways @(posedge i_clk)\n"
325
                "\t\t\tif (i_ce)\n"
326
                "\t\t\tbegin\n"
327
                        "\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"
328
                                "\t\t\t\t\to_val <= truncated_value;\n"
329
                        "\t\t\t\telse if (sign_bit)\n"
330
                                "\t\t\t\t\to_val <= truncated_value;\n"
331
                        "\t\t\t\telse\n"
332
                                "\t\t\t\t\to_val <= rounded_up;\n"
333
                "\t\t\tend\n"
334
"\n"
335
        "\tend else // If there's more than one bit we are dropping\n"
336
        "\tbegin\n"
337
                "\t\twire\t[(OWID-1):0]\ttruncated_value, rounded_up;\n"
338
                "\t\twire\t\t\tsign_bit, first_lost_bit;\n"
339
                "\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
340 26 dgisselq
                "\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n"
341 23 dgisselq
                "\t\tassign\tfirst_lost_bit = i_val[(IWID-SHIFT-OWID-1)];\n"
342
                "\t\tassign\tsign_bit = i_val[(IWID-1)];\n"
343
"\n"
344
                "\t\twire\t[(IWID-SHIFT-OWID-2):0]\tother_lost_bits;\n"
345
                "\t\tassign\tother_lost_bits = i_val[(IWID-SHIFT-OWID-2):0];\n"
346
"\n"
347
                "\t\talways @(posedge i_clk)\n"
348
                        "\t\t\tif (i_ce)\n"
349
                        "\t\t\tbegin\n"
350
                        "\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"
351
                                "\t\t\t\t\to_val <= truncated_value;\n"
352
                        "\t\t\t\telse if (|other_lost_bits) // Round up to\n"
353
                                "\t\t\t\t\to_val <= rounded_up; // closest value\n"
354
                        "\t\t\t\telse if (sign_bit)\n"
355
                                "\t\t\t\t\to_val <= truncated_value;\n"
356
                        "\t\t\t\telse\n"
357
                                "\t\t\t\t\to_val <= rounded_up;\n"
358
                        "\t\t\tend\n"
359
        "\tend\n"
360
        "\tendgenerate\n"
361
"\n"
362
"endmodule\n");
363
}
364
 
365
void    build_convround(const char *fname) {
366
        FILE    *fp = fopen(fname, "w");
367
        if (NULL == fp) {
368
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
369
                perror("O/S Err was:");
370
                return;
371
        }
372
 
373
        fprintf(fp,
374
"///////////////////////////////////////////////////////////////////////////\n"
375
"//\n"
376
"// Filename:   convround.v\n"
377
"//             \n"
378
"// Project:    %s\n"
379
"//\n"
380
"// Purpose:    A convergent rounding routine, also known as banker\'s\n"
381
"//             rounding, Dutch rounding, Gaussian rounding, unbiased\n"
382
"//             rounding, or ... more, at least according to Wikipedia.\n"
383
"//\n"
384
"//             This form of rounding works by rounding, when the direction\n"
385
"//             is in question, towards the nearest even value.\n"
386
"//\n"
387
"//\n%s"
388
"//\n",
389
                prjname, creator);
390
 
391
        fprintf(fp, "%s", cpyleft);
392
        fprintf(fp,
393
"module convround(i_clk, i_ce, i_val, o_val);\n"
394
"\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"
395
"\tinput\t\t\t\t\ti_clk, i_ce;\n"
396
"\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"
397
"\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"
398
"\n"
399
"\t// Let's deal with three cases to be as general as we can be here\n"
400
"\t//\n"
401
"\t//\t1. The desired output would lose no bits at all\n"
402
"\t//\t2. One bit would be dropped, so the rounding is simply\n"
403
"\t//\t\tadjusting the value to be the nearest even number in\n"
404
"\t//\t\tcases of being halfway between two.  If identically\n"
405
"\t//\t\tequal to a number, we just leave it as is.\n"
406
"\t//\t3. Two or more bits would be dropped.  In this case, we round\n"
407
"\t//\t\tnormally unless we are rounding a value of exactly\n"
408
"\t//\t\thalfway between the two.  In the halfway case we round\n"
409
"\t//\t\tto the nearest even number.\n"
410
"\tgenerate\n"
411
"\tif (IWID-SHIFT == OWID)\n"
412
"\tbegin // No truncation or rounding, output drops no bits\n"
413
"\n"
414
"\t\talways @(posedge i_clk)\n"
415
"\t\t\tif (i_ce)\to_val <= i_val[(IWID-SHIFT-1):0];\n"
416
"\n"
417
"\tend else if (IWID-SHIFT-1 == OWID)\n"
418
"\tbegin // Output drops one bit, can only add one or ... not.\n"
419
"\t\twire\t[(OWID-1):0] truncated_value, rounded_up;\n"
420
"\t\twire\t\t\tlast_valid_bit, first_lost_bit;\n"
421
"\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
422 26 dgisselq
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n"
423 23 dgisselq
"\t\tassign\tlast_valid_bit = truncated_value[0];\n"
424
"\t\tassign\tfirst_lost_bit = i_val[0];\n"
425
"\n"
426
"\t\talways @(posedge i_clk)\n"
427
"\t\t\tif (i_ce)\n"
428
"\t\t\tbegin\n"
429
"\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"
430
"\t\t\t\t\to_val <= truncated_value;\n"
431
"\t\t\t\telse if (last_valid_bit)// Round up to nearest\n"
432
"\t\t\t\t\to_val <= rounded_up; // even value\n"
433
"\t\t\t\telse // else round down to the nearest\n"
434
"\t\t\t\t\to_val <= truncated_value; // even value\n"
435
"\t\t\tend\n"
436
"\n"
437
"\tend else // If there's more than one bit we are dropping\n"
438
"\tbegin\n"
439
"\t\twire\t[(OWID-1):0] truncated_value, rounded_up;\n"
440
"\t\twire\t\t\tlast_valid_bit, first_lost_bit;\n"
441
"\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
442 26 dgisselq
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n"
443 23 dgisselq
"\t\tassign\tlast_valid_bit = truncated_value[0];\n"
444
"\t\tassign\tfirst_lost_bit = i_val[(IWID-SHIFT-OWID-1)];\n"
445
"\n"
446
"\t\twire\t[(IWID-SHIFT-OWID-2):0]\tother_lost_bits;\n"
447
"\t\tassign\tother_lost_bits = i_val[(IWID-SHIFT-OWID-2):0];\n"
448
"\n"
449
"\t\talways @(posedge i_clk)\n"
450
"\t\t\tif (i_ce)\n"
451
"\t\t\tbegin\n"
452
"\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"
453
"\t\t\t\t\to_val <= truncated_value;\n"
454
"\t\t\t\telse if (|other_lost_bits) // Round up to\n"
455
"\t\t\t\t\to_val <= rounded_up; // closest value\n"
456
"\t\t\t\telse if (last_valid_bit) // Round up to\n"
457
"\t\t\t\t\to_val <= rounded_up; // nearest even\n"
458
"\t\t\t\telse   // else round down to nearest even\n"
459
"\t\t\t\t\to_val <= truncated_value;\n"
460
"\t\t\tend\n"
461
"\tend\n"
462
"\tendgenerate\n"
463
"\n"
464
"endmodule\n");
465
}
466
 
467 26 dgisselq
void    build_quarters(const char *fname, ROUND_T rounding, bool dbg=false) {
468 23 dgisselq
        FILE    *fp = fopen(fname, "w");
469
        if (NULL == fp) {
470
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
471
                perror("O/S Err was:");
472
                return;
473
        }
474
        const   char    *rnd_string;
475
        if (rounding == RND_TRUNCATE)
476
                rnd_string = "truncate";
477
        else if (rounding == RND_FROMZERO)
478
                rnd_string = "roundfromzero";
479
        else if (rounding == RND_HALFUP)
480
                rnd_string = "roundhalfup";
481
        else
482
                rnd_string = "convround";
483
 
484
 
485
        fprintf(fp,
486
"///////////////////////////////////////////////////////////////////////////\n"
487
"//\n"
488 26 dgisselq
"// Filename:   qtrstage%s.v\n"
489 2 dgisselq
"//             \n"
490
"// Project:    %s\n"
491
"//\n"
492 5 dgisselq
"// Purpose:    This file encapsulates the 4 point stage of a decimation in\n"
493
"//             frequency FFT.  This particular implementation is optimized\n"
494
"//             so that all of the multiplies are accomplished by additions\n"
495
"//             and multiplexers only.\n"
496
"//\n"
497 2 dgisselq
"//\n%s"
498
"//\n",
499 26 dgisselq
                (dbg)?"_dbg":"", prjname, creator);
500 2 dgisselq
        fprintf(fp, "%s", cpyleft);
501
 
502
        fprintf(fp,
503 26 dgisselq
"module\tqtrstage%s(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync%s);\n"
504 5 dgisselq
        "\tparameter    IWIDTH=16, OWIDTH=IWIDTH+1;\n"
505
        "\t// Parameters specific to the core that should be changed when this\n"
506
        "\t// core is built ... Note that the minimum LGSPAN is 2.  Smaller \n"
507
        "\t// spans must use the fftdoubles stage.\n"
508 23 dgisselq
        "\tparameter\tLGWIDTH=8, ODD=0, INVERSE=0,SHIFT=0;\n"
509 5 dgisselq
        "\tinput\t                              i_clk, i_rst, i_ce, i_sync;\n"
510
        "\tinput\t      [(2*IWIDTH-1):0]        i_data;\n"
511
        "\toutput\treg  [(2*OWIDTH-1):0]        o_data;\n"
512
        "\toutput\treg                          o_sync;\n"
513 26 dgisselq
        "\t\n", (dbg)?"_dbg":"", (dbg)?", o_dbg":"");
514
        if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"
515
                "\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2*OWIDTH-1):(2*OWIDTH-16)],\n"
516
                        "\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n"
517
"\n");
518
        }
519 14 dgisselq
        fprintf(fp,
520 5 dgisselq
        "\treg\t        wait_for_sync;\n"
521 23 dgisselq
        "\treg\t[3:0]   pipeline;\n"
522 2 dgisselq
"\n"
523 5 dgisselq
        "\treg\t[(IWIDTH):0]    sum_r, sum_i, diff_r, diff_i;\n"
524 2 dgisselq
"\n"
525 23 dgisselq
        "\treg\t[(2*OWIDTH-1):0]\tob_a;\n"
526
        "\twire\t[(2*OWIDTH-1):0]\tob_b;\n"
527
        "\treg\t[(OWIDTH-1):0]\t\tob_b_r, ob_b_i;\n"
528
        "\tassign\tob_b = { ob_b_r, ob_b_i };\n"
529 2 dgisselq
"\n"
530 23 dgisselq
        "\treg\t[(LGWIDTH-1):0]\t\tiaddr;\n"
531
        "\treg\t[(2*IWIDTH-1):0]\timem;\n"
532 2 dgisselq
"\n"
533 5 dgisselq
        "\twire\tsigned\t[(IWIDTH-1):0]\timem_r, imem_i;\n"
534
        "\tassign\timem_r = imem[(2*IWIDTH-1):(IWIDTH)];\n"
535
        "\tassign\timem_i = imem[(IWIDTH-1):0];\n"
536 2 dgisselq
"\n"
537 5 dgisselq
        "\twire\tsigned\t[(IWIDTH-1):0]\ti_data_r, i_data_i;\n"
538
        "\tassign\ti_data_r = i_data[(2*IWIDTH-1):(IWIDTH)];\n"
539
        "\tassign\ti_data_i = i_data[(IWIDTH-1):0];\n"
540 2 dgisselq
"\n"
541 5 dgisselq
        "\treg  [(2*OWIDTH-1):0]        omem;\n"
542 14 dgisselq
"\n");
543
        fprintf(fp,
544 23 dgisselq
        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_sum_r, rnd_sum_i, rnd_diff_r, rnd_diff_i,\n");
545
        fprintf(fp,
546
        "\t\t\t\t\tn_rnd_diff_r, n_rnd_diff_i;\n");
547
        fprintf(fp,
548 26 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_r(i_clk, i_ce,\n"
549 23 dgisselq
        "\t\t\t\tsum_r, rnd_sum_r);\n\n", rnd_string);
550
        fprintf(fp,
551 26 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_i(i_clk, i_ce,\n"
552 23 dgisselq
        "\t\t\t\tsum_i, rnd_sum_i);\n\n", rnd_string);
553
        fprintf(fp,
554 26 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_r(i_clk, i_ce,\n"
555 23 dgisselq
        "\t\t\t\tdiff_r, rnd_diff_r);\n\n", rnd_string);
556
        fprintf(fp,
557 26 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_i(i_clk, i_ce,\n"
558 23 dgisselq
        "\t\t\t\tdiff_i, rnd_diff_i);\n\n", rnd_string);
559
        fprintf(fp, "\tassign n_rnd_diff_r = - rnd_diff_r;\n"
560
                "\tassign n_rnd_diff_i = - rnd_diff_i;\n");
561
/*
562
        fprintf(fp,
563 5 dgisselq
        "\twire [(IWIDTH-1):0]  rnd;\n"
564 9 dgisselq
        "\tgenerate\n"
565
        "\tif ((ROUND)&&((IWIDTH+1-OWIDTH-SHIFT)>0))\n"
566 26 dgisselq
                "\t\tassign rnd = { {(IWIDTH-1){1\'b0}}, 1\'b1 };\n"
567 9 dgisselq
        "\telse\n"
568 26 dgisselq
                "\t\tassign rnd = { {(IWIDTH){1\'b0}}};\n"
569 9 dgisselq
        "\tendgenerate\n"
570 2 dgisselq
"\n"
571 23 dgisselq
*/
572
        fprintf(fp,
573 25 dgisselq
        "\tinitial wait_for_sync = 1\'b1;\n"
574
        "\tinitial iaddr = 0;\n"
575 5 dgisselq
        "\talways @(posedge i_clk)\n"
576
                "\t\tif (i_rst)\n"
577
                "\t\tbegin\n"
578 26 dgisselq
                        "\t\t\twait_for_sync <= 1\'b1;\n"
579 5 dgisselq
                        "\t\t\tiaddr <= 0;\n"
580 23 dgisselq
                "\t\tend else if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"
581 5 dgisselq
                "\t\tbegin\n"
582 26 dgisselq
                        "\t\t\tiaddr <= iaddr + { {(LGWIDTH-1){1\'b0}}, 1\'b1 };\n"
583
                        "\t\t\twait_for_sync <= 1\'b0;\n"
584
                "\t\tend\n"
585
        "\talways @(posedge i_clk)\n"
586
                "\t\tif (i_ce)\n"
587 5 dgisselq
                        "\t\t\timem <= i_data;\n"
588 26 dgisselq
                "\n\n");
589 23 dgisselq
        fprintf(fp,
590
        "\t// Note that we don\'t check on wait_for_sync or i_sync here.\n"
591
        "\t// Why not?  Because iaddr will always be zero until after the\n"
592
        "\t// first i_ce, so we are safe.\n"
593 25 dgisselq
        "\tinitial pipeline = 4\'h0;\n"
594 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
595
                "\t\tif (i_rst)\n"
596 26 dgisselq
                        "\t\t\tpipeline <= 4\'h0;\n"
597 23 dgisselq
                "\t\telse if (i_ce) // is our pipeline process full?  Which stages?\n"
598
                        "\t\t\tpipeline <= { pipeline[2:0], iaddr[0] };\n\n");
599
        fprintf(fp,
600
        "\t// This is the pipeline[-1] stage, pipeline[0] will be set next.\n"
601
        "\talways\t@(posedge i_clk)\n"
602
                "\t\tif ((i_ce)&&(iaddr[0]))\n"
603
                "\t\tbegin\n"
604
                        "\t\t\tsum_r  <= imem_r + i_data_r;\n"
605
                        "\t\t\tsum_i  <= imem_i + i_data_i;\n"
606
                        "\t\t\tdiff_r <= imem_r - i_data_r;\n"
607
                        "\t\t\tdiff_i <= imem_i - i_data_i;\n"
608
                "\t\tend\n\n");
609
        fprintf(fp,
610
        "\t// pipeline[1] takes sum_x and diff_x and produces rnd_x\n\n");
611
        fprintf(fp,
612 26 dgisselq
        "\t// Now for pipeline[2].  We can actually do this at all i_ce\n"
613
        "\t// clock times, since nothing will listen unless pipeline[3]\n"
614
        "\t// on the next clock.  Thus, we simplify this logic and do\n"
615
        "\t// it independent of pipeline[2].\n"
616 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
617 26 dgisselq
                "\t\tif (i_ce)\n"
618 23 dgisselq
                "\t\tbegin\n"
619
                        "\t\t\tob_a <= { rnd_sum_r, rnd_sum_i };\n"
620
                        "\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"
621
                        "\t\t\tif (ODD == 0)\n"
622 5 dgisselq
                        "\t\t\tbegin\n"
623 23 dgisselq
                        "\t\t\t\tob_b_r <= rnd_diff_r;\n"
624
                        "\t\t\t\tob_b_i <= rnd_diff_i;\n"
625
                        "\t\t\tend else if (INVERSE==0) begin\n"
626
                        "\t\t\t\t// on Odd, W = e^{-j2pi 1/4} = -j\n"
627
                        "\t\t\t\tob_b_r <=   rnd_diff_i;\n"
628
                        "\t\t\t\tob_b_i <= n_rnd_diff_r;\n"
629
                        "\t\t\tend else begin\n"
630
                        "\t\t\t\t// on Odd, W = e^{j2pi 1/4} = j\n"
631
                        "\t\t\t\tob_b_r <= n_rnd_diff_i;\n"
632
                        "\t\t\t\tob_b_i <=   rnd_diff_r;\n"
633 5 dgisselq
                        "\t\t\tend\n"
634 23 dgisselq
                "\t\tend\n\n");
635
        fprintf(fp,
636
        "\talways\t@(posedge i_clk)\n"
637
                "\t\tif (i_ce)\n"
638
                "\t\tbegin // In sequence, clock = 3\n"
639
                        "\t\t\tif (pipeline[3])\n"
640 5 dgisselq
                        "\t\t\tbegin\n"
641
                                "\t\t\t\tomem <= ob_b;\n"
642
                                "\t\t\t\to_data <= ob_a;\n"
643
                        "\t\t\tend else\n"
644
                                "\t\t\t\to_data <= omem;\n"
645 23 dgisselq
                "\t\tend\n\n");
646
 
647
        fprintf(fp,
648
        "\t// Don\'t forget in the sync check that we are running\n"
649
        "\t// at two clocks per sample.  Thus we need to\n"
650
        "\t// produce a sync every 2^(LGWIDTH-1) clocks.\n"
651 26 dgisselq
        "\tinitial\to_sync = 1\'b0;\n"
652 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
653 26 dgisselq
                "\t\tif (i_rst)\n"
654
                "\t\t\to_sync <= 1\'b0;\n"
655
                "\t\telse if (i_ce)\n"
656 23 dgisselq
                        "\t\t\to_sync <= &(~iaddr[(LGWIDTH-2):3]) && (iaddr[2:0] == 3'b101);\n");
657
        fprintf(fp, "endmodule\n");
658 2 dgisselq
}
659
 
660 26 dgisselq
void    build_dblstage(const char *fname, ROUND_T rounding, const bool dbg = false) {
661 2 dgisselq
        FILE    *fp = fopen(fname, "w");
662
        if (NULL == fp) {
663
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
664
                perror("O/S Err was:");
665
                return;
666
        }
667
 
668 23 dgisselq
        const   char    *rnd_string;
669
        if (rounding == RND_TRUNCATE)
670
                rnd_string = "truncate";
671
        else if (rounding == RND_FROMZERO)
672
                rnd_string = "roundfromzero";
673
        else if (rounding == RND_HALFUP)
674
                rnd_string = "roundhalfup";
675
        else
676
                rnd_string = "convround";
677
 
678
 
679 2 dgisselq
        fprintf(fp,
680
"///////////////////////////////////////////////////////////////////////////\n"
681
"//\n"
682 26 dgisselq
"// Filename:   dblstage%s.v\n"
683 2 dgisselq
"//\n"
684
"// Project:    %s\n"
685
"//\n"
686
"// Purpose:    This is part of an FPGA implementation that will process\n"
687 5 dgisselq
"//             the final stage of a decimate-in-frequency FFT, running\n"
688
"//             through the data at two samples per clock.  If you notice\n"
689
"//             from the derivation of an FFT, the only time both even and\n"
690
"//             odd samples are used at the same time is in this stage.\n"
691
"//             Therefore, other than this stage and these twiddles, all of\n"
692
"//             the other stages can run two stages at a time at one sample\n"
693
"//             per clock.\n"
694 2 dgisselq
"//\n"
695
"//             In this implementation, the output is valid one clock after\n"
696
"//             the input is valid.  The output also accumulates one bit\n"
697
"//             above and beyond the number of bits in the input.\n"
698
"//             \n"
699
"//             i_clk   A system clock\n"
700 6 dgisselq
"//             i_rst   A synchronous reset\n"
701 2 dgisselq
"//             i_ce    Circuit enable--nothing happens unless this line is high\n"
702 6 dgisselq
"//             i_sync  A synchronization signal, high once per FFT at the start\n"
703 2 dgisselq
"//             i_left  The first (even) complex sample input.  The higher order\n"
704
"//                     bits contain the real portion, low order bits the\n"
705
"//                     imaginary portion, all in two\'s complement.\n"
706
"//             i_right The next (odd) complex sample input, same format as\n"
707
"//                     i_left.\n"
708
"//             o_left  The first (even) complex output.\n"
709
"//             o_right The next (odd) complex output.\n"
710 6 dgisselq
"//             o_sync  Output synchronization signal.\n"
711 2 dgisselq
"//\n%s"
712 26 dgisselq
"//\n", (dbg)?"_dbg":"", prjname, creator);
713 2 dgisselq
 
714
        fprintf(fp, "%s", cpyleft);
715
        fprintf(fp,
716 26 dgisselq
"module\tdblstage%s(i_clk, i_rst, i_ce, i_sync, i_left, i_right, o_left, o_right, o_sync%s);\n"
717 23 dgisselq
        "\tparameter\tIWIDTH=16,OWIDTH=IWIDTH+1, SHIFT=0;\n"
718 6 dgisselq
        "\tinput\t\ti_clk, i_rst, i_ce, i_sync;\n"
719 5 dgisselq
        "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"
720 6 dgisselq
        "\toutput\twire\t[(2*OWIDTH-1):0]\to_left, o_right;\n"
721
        "\toutput\treg\t\t\to_sync;\n"
722 26 dgisselq
        "\n", (dbg)?"_dbg":"", (dbg)?", o_dbg":"");
723
 
724
        if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"
725
                "\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_left[(2*OWIDTH-1):(2*OWIDTH-16)],\n"
726
                        "\t\t\t\t\to_left[(OWIDTH-1):(OWIDTH-16)] };\n"
727
"\n");
728
        }
729 19 dgisselq
        fprintf(fp,
730 5 dgisselq
        "\twire\tsigned\t[(IWIDTH-1):0]\ti_in_0r, i_in_0i, i_in_1r, i_in_1i;\n"
731
        "\tassign\ti_in_0r = i_left[(2*IWIDTH-1):(IWIDTH)]; \n"
732
        "\tassign\ti_in_0i = i_left[(IWIDTH-1):0]; \n"
733
        "\tassign\ti_in_1r = i_right[(2*IWIDTH-1):(IWIDTH)]; \n"
734
        "\tassign\ti_in_1i = i_right[(IWIDTH-1):0]; \n"
735
        "\twire\t[(OWIDTH-1):0]\t\to_out_0r, o_out_0i,\n"
736
                                "\t\t\t\t\to_out_1r, o_out_1i;\n"
737 2 dgisselq
"\n"
738 15 dgisselq
"\n"
739 19 dgisselq
        "\t// Handle a potential rounding situation, when IWIDTH>=OWIDTH.\n"
740 15 dgisselq
"\n"
741 23 dgisselq
"\n");
742
        fprintf(fp,
743 5 dgisselq
        "\t// Don't forget that we accumulate a bit by adding two values\n"
744
        "\t// together. Therefore our intermediate value must have one more\n"
745
        "\t// bit than the two originals.\n"
746 25 dgisselq
        "\treg\tsigned\t[(IWIDTH):0]\trnd_in_0r, rnd_in_0i, rnd_in_1r, rnd_in_1i;\n\n");
747 23 dgisselq
        fprintf(fp,
748 26 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_0r(i_clk, i_ce,\n"
749 23 dgisselq
        "\t\t\t\t\t\t\t\trnd_in_0r, o_out_0r);\n\n", rnd_string);
750
        fprintf(fp,
751 26 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_0i(i_clk, i_ce,\n"
752 23 dgisselq
        "\t\t\t\t\t\t\t\trnd_in_0i, o_out_0i);\n\n", rnd_string);
753
        fprintf(fp,
754 26 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_1r(i_clk, i_ce,\n"
755 23 dgisselq
        "\t\t\t\t\t\t\t\trnd_in_1r, o_out_1r);\n\n", rnd_string);
756
        fprintf(fp,
757 26 dgisselq
        "\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_1i(i_clk, i_ce,\n"
758 23 dgisselq
        "\t\t\t\t\t\t\t\trnd_in_1i, o_out_1i);\n\n", rnd_string);
759
 
760
        fprintf(fp,
761 26 dgisselq
        "\n"
762
        "\t// As with any register connected to the sync pulse, these must\n"
763
        "\t// have initial values and be reset on the i_rst signal.\n"
764
        "\t// Other data values need only restrict their updates to i_ce\n"
765
        "\t// enabled clocks, but sync\'s must obey resets and initial\n"
766
        "\t// conditions as well.\n"
767 23 dgisselq
        "\treg\twait_for_sync, rnd_sync;\n"
768 2 dgisselq
"\n"
769 25 dgisselq
        "\tinitial begin\n"
770
        "\t\trnd_sync      = 1\'b0;\n"
771
        "\t\to_sync        = 1\'b0;\n"
772
        "\t\twait_for_sync = 1\'b1;\n"
773
        "\tend\n"
774 5 dgisselq
        "\talways @(posedge i_clk)\n"
775 6 dgisselq
                "\t\tif (i_rst)\n"
776 23 dgisselq
                "\t\tbegin\n"
777 26 dgisselq
                        "\t\t\trnd_sync <= 1\'b0;\n"
778
                        "\t\t\to_sync <= 1\'b0;\n"
779
                        "\t\t\twait_for_sync <= 1\'b1;\n"
780 23 dgisselq
                "\t\tend else if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"
781 5 dgisselq
                "\t\tbegin\n"
782 26 dgisselq
                        "\t\t\twait_for_sync <= 1\'b0;\n"
783 6 dgisselq
                        "\t\t\t//\n"
784 26 dgisselq
                        "\t\t\trnd_sync <= i_sync;\n"
785
                        "\t\t\to_sync <= rnd_sync;\n"
786
                "\t\tend\n"
787
"\n"
788
        "\t// As with other variables, these are really only updated when in\n"
789
        "\t// the processing pipeline, after the first i_sync.  However, to\n"
790
        "\t// eliminate as much unnecessary logic as possible, we toggle\n"
791
        "\t// these any time the i_ce line is enabled.\n"
792
        "\talways @(posedge i_clk)\n"
793
                "\t\tif (i_ce)\n"
794
                "\t\tbegin\n"
795
                        "\t\t\t//\n"
796 23 dgisselq
                        "\t\t\trnd_in_0r <= i_in_0r + i_in_1r;\n"
797
                        "\t\t\trnd_in_0i <= i_in_0i + i_in_1i;\n"
798 5 dgisselq
                        "\t\t\t//\n"
799 23 dgisselq
                        "\t\t\trnd_in_1r <= i_in_0r - i_in_1r;\n"
800
                        "\t\t\trnd_in_1i <= i_in_0i - i_in_1i;\n"
801 6 dgisselq
                        "\t\t\t//\n"
802 5 dgisselq
                "\t\tend\n"
803 2 dgisselq
"\n"
804 5 dgisselq
        "\tassign\to_left  = { o_out_0r, o_out_0i };\n"
805
        "\tassign\to_right = { o_out_1r, o_out_1i };\n"
806 2 dgisselq
"\n"
807
"endmodule\n");
808
        fclose(fp);
809
}
810
 
811
void    build_multiply(const char *fname) {
812
        FILE    *fp = fopen(fname, "w");
813
        if (NULL == fp) {
814
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
815
                perror("O/S Err was:");
816
                return;
817
        }
818
 
819
        fprintf(fp,
820
"///////////////////////////////////////////////////////////////////////////\n"
821
"//\n"
822
"// Filename:   shiftaddmpy.v\n"
823
"//\n"
824
"// Project:    %s\n"
825
"//\n"
826
"// Purpose:    A portable shift and add multiply.\n"
827
"//\n"
828
"//             While both Xilinx and Altera will offer single clock \n"
829
"//             multiplies, this simple approach will multiply two numbers\n"
830
"//             on any architecture.  The result maintains the full width\n"
831
"//             of the multiply, there are no extra stuff bits, no rounding,\n"
832
"//             no shifted bits, etc.\n"
833
"//\n"
834
"//             Further, for those applications that can support it, this\n"
835
"//             multiply is pipelined and will produce one answer per clock.\n"
836
"//\n"
837
"//             For minimal processing delay, make the first parameter\n"
838
"//             the one with the least bits, so that AWIDTH <= BWIDTH.\n"
839
"//\n"
840
"//             The processing delay in this multiply is (AWIDTH+1) cycles.\n"
841
"//             That is, if the data is present on the input at clock t=0,\n"
842
"//             the result will be present on the output at time t=AWIDTH+1;\n"
843
"//\n"
844
"//\n%s"
845
"//\n", prjname, creator);
846
 
847
        fprintf(fp, "%s", cpyleft);
848
        fprintf(fp,
849
"module shiftaddmpy(i_clk, i_ce, i_a, i_b, o_r);\n"
850
        "\tparameter\tAWIDTH=16,BWIDTH=AWIDTH;\n"
851
        "\tinput\t\t\t\t\ti_clk, i_ce;\n"
852
        "\tinput\t\t[(AWIDTH-1):0]\t\ti_a;\n"
853
        "\tinput\t\t[(BWIDTH-1):0]\t\ti_b;\n"
854
        "\toutput\treg\t[(AWIDTH+BWIDTH-1):0]\to_r;\n"
855
"\n"
856
        "\treg\t[(AWIDTH-1):0]\tu_a;\n"
857
        "\treg\t[(BWIDTH-1):0]\tu_b;\n"
858
        "\treg\t\t\tsgn;\n"
859
"\n"
860
        "\treg\t[(AWIDTH-2):0]\t\tr_a[0:(AWIDTH-1)];\n"
861
        "\treg\t[(AWIDTH+BWIDTH-2):0]\tr_b[0:(AWIDTH-1)];\n"
862
        "\treg\t\t\t\tr_s[0:(AWIDTH-1)];\n"
863
        "\treg\t[(AWIDTH+BWIDTH-1):0]\tacc[0:(AWIDTH-1)];\n"
864
        "\tgenvar k;\n"
865
"\n"
866 5 dgisselq
        "\t// If we were forced to stay within two\'s complement arithmetic,\n"
867
        "\t// taking the absolute value here would require an additional bit.\n"
868
        "\t// However, because our results are now unsigned, we can stay\n"
869
        "\t// within the number of bits given (for now).\n"
870 2 dgisselq
        "\talways @(posedge i_clk)\n"
871
                "\t\tif (i_ce)\n"
872
                "\t\tbegin\n"
873
                        "\t\t\tu_a <= (i_a[AWIDTH-1])?(-i_a):(i_a);\n"
874
                        "\t\t\tu_b <= (i_b[BWIDTH-1])?(-i_b):(i_b);\n"
875
                        "\t\t\tsgn <= i_a[AWIDTH-1] ^ i_b[BWIDTH-1];\n"
876
                "\t\tend\n"
877
"\n"
878
        "\talways @(posedge i_clk)\n"
879
                "\t\tif (i_ce)\n"
880
                "\t\tbegin\n"
881 26 dgisselq
                        "\t\t\tacc[0] <= (u_a[0]) ? { {(AWIDTH){1\'b0}}, u_b }\n"
882
                        "\t\t\t\t\t: {(AWIDTH+BWIDTH){1\'b0}};\n"
883 2 dgisselq
                        "\t\t\tr_a[0] <= { u_a[(AWIDTH-1):1] };\n"
884 26 dgisselq
                        "\t\t\tr_b[0] <= { {(AWIDTH-1){1\'b0}}, u_b };\n"
885 2 dgisselq
                        "\t\t\tr_s[0] <= sgn; // The final sign, needs to be preserved\n"
886
                "\t\tend\n"
887
"\n"
888
        "\tgenerate\n"
889 21 dgisselq
        "\tfor(k=0; k<AWIDTH-1; k=k+1)\n"
890 25 dgisselq
        "\tbegin : genstages\n"
891 21 dgisselq
                "\t\talways @(posedge i_clk)\n"
892
                "\t\tif (i_ce)\n"
893 2 dgisselq
                "\t\tbegin\n"
894 26 dgisselq
                        "\t\t\tacc[k+1] <= acc[k] + ((r_a[k][0]) ? {r_b[k],1\'b0}:0);\n"
895
                        "\t\t\tr_a[k+1] <= { 1\'b0, r_a[k][(AWIDTH-2):1] };\n"
896
                        "\t\t\tr_b[k+1] <= { r_b[k][(AWIDTH+BWIDTH-3):0], 1\'b0};\n"
897 2 dgisselq
                        "\t\t\tr_s[k+1] <= r_s[k];\n"
898
                "\t\tend\n"
899
        "\tend\n"
900
        "\tendgenerate\n"
901
"\n"
902
        "\talways @(posedge i_clk)\n"
903
                "\t\tif (i_ce)\n"
904
                        "\t\t\to_r <= (r_s[AWIDTH-1]) ? (-acc[AWIDTH-1]) : acc[AWIDTH-1];\n"
905
"\n"
906
"endmodule\n");
907
 
908
        fclose(fp);
909
}
910
 
911
void    build_dblreverse(const char *fname) {
912
        FILE    *fp = fopen(fname, "w");
913
        if (NULL == fp) {
914
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
915
                perror("O/S Err was:");
916
                return;
917
        }
918
 
919
        fprintf(fp,
920
"///////////////////////////////////////////////////////////////////////////\n"
921
"//\n"
922
"// Filename:   dblreverse.v\n"
923
"//\n"
924
"// Project:    %s\n"
925
"//\n"
926
"// Purpose:    This module bitreverses a pipelined FFT input.  Operation is\n"
927
"//             expected as follows:\n"
928
"//\n"
929
"//             i_clk   A running clock at whatever system speed is offered.\n"
930
"//             i_rst   A synchronous reset signal, that resets all internals\n"
931
"//             i_ce    If this is one, one input is consumed and an output\n"
932
"//                     is produced.\n"
933
"//             i_in_0, i_in_1\n"
934
"//                     Two inputs to be consumed, each of width WIDTH.\n"
935
"//             o_out_0, o_out_1\n"
936
"//                     Two of the bitreversed outputs, also of the same\n"
937
"//                     width, WIDTH.  Of course, there is a delay from the\n"
938
"//                     first input to the first output.  For this purpose,\n"
939
"//                     o_sync is present.\n"
940 26 dgisselq
"//             o_sync  This will be a 1\'b1 for the first value in any block.\n"
941
"//                     Following a reset, this will only become 1\'b1 once\n"
942 2 dgisselq
"//                     the data has been loaded and is now valid.  After that,\n"
943
"//                     all outputs will be valid.\n"
944 26 dgisselq
"//\n"
945
"//     20150602 -- This module has undergone massive rework in order to\n"
946
"//             ensure that it uses resources efficiently.  As a result, \n"
947
"//             it now optimizes nicely into block RAMs.  As an unfortunately\n"
948
"//             side effect, it now passes it\'s bench test (dblrev_tb) but\n"
949
"//             fails the integration bench test (fft_tb).\n"
950
"//\n"
951 2 dgisselq
"//\n%s"
952
"//\n", prjname, creator);
953
        fprintf(fp, "%s", cpyleft);
954
        fprintf(fp,
955
"\n\n"
956
"//\n"
957
"// How do we do bit reversing at two smples per clock?  Can we separate out\n"
958
"// our work into eight memory banks, writing two banks at once and reading\n"
959
"// another two banks in the same clock?\n"
960
"//\n"
961
"//     mem[00xxx0] = s_0[n]\n"
962
"//     mem[00xxx1] = s_1[n]\n"
963
"//     o_0[n] = mem[10xxx0]\n"
964
"//     o_1[n] = mem[11xxx0]\n"
965
"//     ...\n"
966
"//     mem[01xxx0] = s_0[m]\n"
967
"//     mem[01xxx1] = s_1[m]\n"
968
"//     o_0[m] = mem[10xxx1]\n"
969
"//     o_1[m] = mem[11xxx1]\n"
970
"//     ...\n"
971
"//     mem[10xxx0] = s_0[n]\n"
972
"//     mem[10xxx1] = s_1[n]\n"
973
"//     o_0[n] = mem[00xxx0]\n"
974
"//     o_1[n] = mem[01xxx0]\n"
975
"//     ...\n"
976
"//     mem[11xxx0] = s_0[m]\n"
977
"//     mem[11xxx1] = s_1[m]\n"
978
"//     o_0[m] = mem[00xxx1]\n"
979
"//     o_1[m] = mem[01xxx1]\n"
980
"//     ...\n"
981
"//\n"
982 5 dgisselq
"//     The answer is that, yes we can but: we need to use four memory banks\n"
983
"//     to do it properly.  These four banks are defined by the two bits\n"
984
"//     that determine the top and bottom of the correct address.  Larger\n"
985
"//     FFT\'s would require more memories.\n"
986
"//\n"
987 2 dgisselq
"//\n");
988
        fprintf(fp,
989
"module dblreverse(i_clk, i_rst, i_ce, i_in_0, i_in_1,\n"
990 5 dgisselq
        "\t\to_out_0, o_out_1, o_sync);\n"
991 26 dgisselq
        "\tparameter\t\t\tLGSIZE=5, WIDTH=24;\n"
992 5 dgisselq
        "\tinput\t\t\t\ti_clk, i_rst, i_ce;\n"
993
        "\tinput\t\t[(2*WIDTH-1):0]\ti_in_0, i_in_1;\n"
994 26 dgisselq
        "\toutput\twire\t[(2*WIDTH-1):0]\to_out_0, o_out_1;\n"
995 5 dgisselq
        "\toutput\treg\t\t\to_sync;\n"
996 2 dgisselq
"\n"
997 26 dgisselq
        "\treg\t\t\tin_reset;\n"
998
        "\treg\t[(LGSIZE-1):0]\tiaddr;\n"
999
        "\twire\t[(LGSIZE-3):0]\tbraddr;\n"
1000 2 dgisselq
"\n"
1001 5 dgisselq
        "\tgenvar\tk;\n"
1002 26 dgisselq
        "\tgenerate for(k=0; k<LGSIZE-2; k=k+1)\n"
1003 25 dgisselq
        "\tbegin : gen_a_bit_reversed_value\n"
1004 26 dgisselq
                "\t\tassign braddr[k] = iaddr[LGSIZE-3-k];\n"
1005 25 dgisselq
        "\tend endgenerate\n"
1006 2 dgisselq
"\n"
1007 25 dgisselq
        "\tinitial iaddr = 0;\n"
1008
        "\tinitial in_reset = 1\'b1;\n"
1009 26 dgisselq
        "\tinitial o_sync = 1\'b0;\n"
1010 5 dgisselq
        "\talways @(posedge i_clk)\n"
1011
                "\t\tif (i_rst)\n"
1012
                "\t\tbegin\n"
1013
                        "\t\t\tiaddr <= 0;\n"
1014 26 dgisselq
                        "\t\t\tin_reset <= 1\'b1;\n"
1015
                        "\t\t\to_sync <= 1\'b0;\n"
1016 5 dgisselq
                "\t\tend else if (i_ce)\n"
1017
                "\t\tbegin\n"
1018 26 dgisselq
                        "\t\t\tiaddr <= iaddr + { {(LGSIZE-1){1\'b0}}, 1\'b1 };\n"
1019
                        "\t\t\tif (&iaddr[(LGSIZE-2):0])\n"
1020
                                "\t\t\t\tin_reset <= 1\'b0;\n"
1021 5 dgisselq
                        "\t\t\tif (in_reset)\n"
1022 26 dgisselq
                                "\t\t\t\to_sync <= 1\'b0;\n"
1023
                        "\t\t\telse\n"
1024
                                "\t\t\t\to_sync <= ~(|iaddr[(LGSIZE-2):0]);\n"
1025 5 dgisselq
                "\t\tend\n"
1026 2 dgisselq
"\n"
1027 26 dgisselq
        "\treg\t[(2*WIDTH-1):0]\tmem_e [0:((1<<(LGSIZE))-1)];\n"
1028
        "\treg\t[(2*WIDTH-1):0]\tmem_o [0:((1<<(LGSIZE))-1)];\n"
1029
"\n"
1030
        "\talways @(posedge i_clk)\n"
1031
                "\t\tif (i_ce)\tmem_e[iaddr] <= i_in_0;\n"
1032
        "\talways @(posedge i_clk)\n"
1033
                "\t\tif (i_ce)\tmem_o[iaddr] <= i_in_1;\n"
1034
"\n"
1035
"\n"
1036
        "\treg [(2*WIDTH-1):0] evn_out_0, evn_out_1, odd_out_0, odd_out_1;\n"
1037
"\n"
1038
        "\talways @(posedge i_clk)\n"
1039
                "\t\tif (i_ce)\n\t\t\tevn_out_0 <= mem_e[{~iaddr[LGSIZE-1],1\'b0,braddr}];\n"
1040
        "\talways @(posedge i_clk)\n"
1041
                "\t\tif (i_ce)\n\t\t\tevn_out_1 <= mem_e[{~iaddr[LGSIZE-1],1\'b1,braddr}];\n"
1042
        "\talways @(posedge i_clk)\n"
1043
                "\t\tif (i_ce)\n\t\t\todd_out_0 <= mem_o[{~iaddr[LGSIZE-1],1\'b0,braddr}];\n"
1044
        "\talways @(posedge i_clk)\n"
1045
                "\t\tif (i_ce)\n\t\t\todd_out_1 <= mem_o[{~iaddr[LGSIZE-1],1\'b1,braddr}];\n"
1046
"\n"
1047
        "\treg\tadrz;\n"
1048
        "\talways @(posedge i_clk)\n"
1049
                "\t\tif (i_ce) adrz = iaddr[LGSIZE-2];\n"
1050
"\n"
1051
        "\tassign\to_out_0 = (adrz)?odd_out_0:evn_out_0;\n"
1052
        "\tassign\to_out_1 = (adrz)?odd_out_1:evn_out_1;\n"
1053
"\n"
1054 21 dgisselq
"endmodule\n");
1055 2 dgisselq
 
1056
        fclose(fp);
1057
}
1058
 
1059 23 dgisselq
void    build_butterfly(const char *fname, int xtracbits, ROUND_T rounding) {
1060 2 dgisselq
        FILE    *fp = fopen(fname, "w");
1061
        if (NULL == fp) {
1062
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
1063
                perror("O/S Err was:");
1064
                return;
1065
        }
1066 23 dgisselq
        const   char    *rnd_string;
1067
        if (rounding == RND_TRUNCATE)
1068
                rnd_string = "truncate";
1069
        else if (rounding == RND_FROMZERO)
1070
                rnd_string = "roundfromzero";
1071
        else if (rounding == RND_HALFUP)
1072
                rnd_string = "roundhalfup";
1073
        else
1074
                rnd_string = "convround";
1075 2 dgisselq
 
1076
        fprintf(fp,
1077
"///////////////////////////////////////////////////////////////////////////\n"
1078
"//\n"
1079
"// Filename:   butterfly.v\n"
1080
"//\n"
1081
"// Project:    %s\n"
1082
"//\n"
1083
"// Purpose:    This routine caculates a butterfly for a decimation\n"
1084
"//             in frequency version of an FFT.  Specifically, given\n"
1085
"//             complex Left and Right values together with a \n"
1086
"//             coefficient, the output of this routine is given\n"
1087
"//             by:\n"
1088
"//\n"
1089
"//             L' = L + R\n"
1090
"//             R' = (L - R)*C\n"
1091
"//\n"
1092
"//             The rest of the junk below handles timing (mostly),\n"
1093
"//             to make certain that L' and R' reach the output at\n"
1094
"//             the same clock.  Further, just to make certain\n"
1095
"//             that is the case, an 'aux' input exists.  This\n"
1096
"//             aux value will come out of this routine synchronized\n"
1097
"//             to the values it came in with.  (i.e., both L', R',\n"
1098
"//             and aux all have the same delay.)  Hence, a caller\n"
1099
"//             of this routine may set aux on the first input with\n"
1100
"//             valid data, and then wait to see aux set on the output\n"
1101
"//             to know when to find the first output with valid data.\n"
1102
"//\n"
1103
"//             All bits are preserved until the very last clock,\n"
1104
"//             where any more bits than OWIDTH will be quietly\n"
1105
"//             discarded.\n"
1106
"//\n"
1107
"//             This design features no overflow checking.\n"
1108
"// \n"
1109
"// Notes:\n"
1110
"//             CORDIC:\n"
1111
"//             Much as we would like, we can't use a cordic here.\n"
1112
"//             The goal is to accomplish an FFT, as defined, and a\n"
1113
"//             CORDIC places a scale factor onto the data.  Removing\n"
1114
"//             the scale factor would cost a two multiplies, which\n"
1115
"//             is precisely what we are trying to avoid.\n"
1116
"//\n"
1117
"//\n"
1118
"//             3-MULTIPLIES:\n"
1119
"//             It should also be possible to do this with three \n"
1120
"//             multiplies and an extra two addition cycles.  \n"
1121
"//\n"
1122
"//             We want\n"
1123
"//                     R+I = (a + jb) * (c + jd)\n"
1124
"//                     R+I = (ac-bd) + j(ad+bc)\n"
1125
"//             We multiply\n"
1126
"//                     P1 = ac\n"
1127
"//                     P2 = bd\n"
1128
"//                     P3 = (a+b)(c+d)\n"
1129
"//             Then \n"
1130
"//                     R+I=(P1-P2)+j(P3-P2-P1)\n"
1131
"//\n"
1132
"//             WIDTHS:\n"
1133
"//             On multiplying an X width number by an\n"
1134
"//             Y width number, X>Y, the result should be (X+Y)\n"
1135
"//             bits, right?\n"
1136
"//             -2^(X-1) <= a <= 2^(X-1) - 1\n"
1137
"//             -2^(Y-1) <= b <= 2^(Y-1) - 1\n"
1138
"//             (2^(Y-1)-1)*(-2^(X-1)) <= ab <= 2^(X-1)2^(Y-1)\n"
1139
"//             -2^(X+Y-2)+2^(X-1) <= ab <= 2^(X+Y-2) <= 2^(X+Y-1) - 1\n"
1140
"//             -2^(X+Y-1) <= ab <= 2^(X+Y-1)-1\n"
1141
"//             YUP!  But just barely.  Do this and you'll really want\n"
1142
"//             to drop a bit, although you will risk overflow in so\n"
1143
"//             doing.\n"
1144 26 dgisselq
"//\n"
1145
"//     20150602 -- The sync logic lines have been completely redone.  The\n"
1146
"//             synchronization lines no longer go through the FIFO with the\n"
1147
"//             left hand sum, but are kept out of memory.  This allows the\n"
1148
"//             butterfly to use more optimal memory resources, while also\n"
1149
"//             guaranteeing that the sync lines can be properly reset upon\n"
1150
"//             any reset signal.\n"
1151
"//\n"
1152 2 dgisselq
"//\n%s"
1153
"//\n", prjname, creator);
1154
        fprintf(fp, "%s", cpyleft);
1155
 
1156
        fprintf(fp,
1157 6 dgisselq
"module\tbutterfly(i_clk, i_rst, i_ce, i_coef, i_left, i_right, i_aux,\n"
1158 5 dgisselq
                "\t\to_left, o_right, o_aux);\n"
1159
        "\t// Public changeable parameters ...\n"
1160 14 dgisselq
        "\tparameter IWIDTH=%d,CWIDTH=IWIDTH+%d,OWIDTH=IWIDTH+1;\n"
1161 5 dgisselq
        "\t// Parameters specific to the core that should not be changed.\n"
1162 14 dgisselq
        "\tparameter    MPYDELAY=%d'd%d, // (IWIDTH+1 < CWIDTH)?(IWIDTH+4):(CWIDTH+3),\n"
1163 26 dgisselq
                        "\t\t\tSHIFT=0, AUXLEN=%d;\n"
1164 5 dgisselq
        "\t// The LGDELAY should be the base two log of the MPYDELAY.  If\n"
1165
        "\t// this value is fractional, then round up to the nearest\n"
1166
        "\t// integer: LGDELAY=ceil(log(MPYDELAY)/log(2));\n"
1167 14 dgisselq
        "\tparameter\tLGDELAY=%d;\n"
1168 6 dgisselq
        "\tinput\t\ti_clk, i_rst, i_ce;\n"
1169 5 dgisselq
        "\tinput\t\t[(2*CWIDTH-1):0] i_coef;\n"
1170
        "\tinput\t\t[(2*IWIDTH-1):0] i_left, i_right;\n"
1171
        "\tinput\t\ti_aux;\n"
1172
        "\toutput\twire [(2*OWIDTH-1):0] o_left, o_right;\n"
1173 26 dgisselq
        "\toutput\treg\to_aux;\n"
1174 14 dgisselq
        "\n", 16, xtracbits, lgdelay(16,xtracbits),
1175 26 dgisselq
        bflydelay(16, xtracbits), bflydelay(16, xtracbits)+3,
1176
                lgdelay(16,xtracbits));
1177 14 dgisselq
        fprintf(fp,
1178 5 dgisselq
        "\twire\t[(OWIDTH-1):0] o_left_r, o_left_i, o_right_r, o_right_i;\n"
1179 2 dgisselq
"\n"
1180 5 dgisselq
        "\treg\t[(2*IWIDTH-1):0]\tr_left, r_right;\n"
1181
        "\treg\t\t\t\tr_aux, r_aux_2;\n"
1182
        "\treg\t[(2*CWIDTH-1):0]\tr_coef, r_coef_2;\n"
1183
        "\twire\tsigned\t[(IWIDTH-1):0]\tr_left_r, r_left_i, r_right_r, r_right_i;\n"
1184
        "\tassign\tr_left_r  = r_left[ (2*IWIDTH-1):(IWIDTH)];\n"
1185
        "\tassign\tr_left_i  = r_left[ (IWIDTH-1):0];\n"
1186
        "\tassign\tr_right_r = r_right[(2*IWIDTH-1):(IWIDTH)];\n"
1187
        "\tassign\tr_right_i = r_right[(IWIDTH-1):0];\n"
1188 2 dgisselq
"\n"
1189 5 dgisselq
        "\treg\tsigned\t[(IWIDTH):0]\tr_sum_r, r_sum_i, r_dif_r, r_dif_i;\n"
1190 2 dgisselq
"\n"
1191 5 dgisselq
        "\treg  [(LGDELAY-1):0] fifo_addr;\n"
1192
        "\twire [(LGDELAY-1):0] fifo_read_addr;\n"
1193 6 dgisselq
        "\tassign\tfifo_read_addr = fifo_addr - MPYDELAY;\n"
1194 26 dgisselq
        "\treg  [(2*IWIDTH+1):0]        fifo_left [ 0:((1<<LGDELAY)-1)];\n"
1195 5 dgisselq
"\n");
1196
        fprintf(fp,
1197
        "\t// Set up the input to the multiply\n"
1198 2 dgisselq
        "\talways @(posedge i_clk)\n"
1199
                "\t\tif (i_ce)\n"
1200
                "\t\tbegin\n"
1201
                        "\t\t\t// One clock just latches the inputs\n"
1202
                        "\t\t\tr_left <= i_left;        // No change in # of bits\n"
1203
                        "\t\t\tr_right <= i_right;\n"
1204
                        "\t\t\tr_coef  <= i_coef;\n"
1205
                        "\t\t\t// Next clock adds/subtracts\n"
1206
                        "\t\t\tr_sum_r <= r_left_r + r_right_r; // Now IWIDTH+1 bits\n"
1207
                        "\t\t\tr_sum_i <= r_left_i + r_right_i;\n"
1208
                        "\t\t\tr_dif_r <= r_left_r - r_right_r;\n"
1209
                        "\t\t\tr_dif_i <= r_left_i - r_right_i;\n"
1210
                        "\t\t\t// Other inputs are simply delayed on second clock\n"
1211
                        "\t\t\tr_coef_2<= r_coef;\n"
1212
        "\t\tend\n"
1213 5 dgisselq
"\n");
1214
        fprintf(fp,
1215
        "\t// Don\'t forget to record the even side, since it doesn\'t need\n"
1216
        "\t// to be multiplied, but yet we still need the results in sync\n"
1217
        "\t// with the answer when it is ready.\n"
1218 25 dgisselq
        "\tinitial fifo_addr = 0;\n"
1219 2 dgisselq
        "\talways @(posedge i_clk)\n"
1220 6 dgisselq
                "\t\tif (i_rst)\n"
1221
                        "\t\t\tfifo_addr <= 0;\n"
1222 26 dgisselq
                "\t\telse if (i_ce)\n"
1223 2 dgisselq
                        "\t\t\t// Need to delay the sum side--nothing else happens\n"
1224
                        "\t\t\t// to it, but it needs to stay synchronized with the\n"
1225
                        "\t\t\t// right side.\n"
1226
                        "\t\t\tfifo_addr <= fifo_addr + 1;\n"
1227 14 dgisselq
"\n"
1228 26 dgisselq
        "\talways @(posedge i_clk)\n"
1229
                "\t\tif (i_ce)\n"
1230
                        "\t\t\tfifo_left[fifo_addr] <= { r_sum_r, r_sum_i };\n"
1231 2 dgisselq
"\n"
1232 5 dgisselq
        "\twire\tsigned\t[(CWIDTH-1):0] ir_coef_r, ir_coef_i;\n"
1233
        "\tassign\tir_coef_r = r_coef_2[(2*CWIDTH-1):CWIDTH];\n"
1234
        "\tassign\tir_coef_i = r_coef_2[(CWIDTH-1):0];\n"
1235
        "\twire\tsigned\t[((IWIDTH+2)+(CWIDTH+1)-1):0]\tp_one, p_two, p_three;\n"
1236 2 dgisselq
"\n"
1237 5 dgisselq
"\n");
1238
        fprintf(fp,
1239
        "\t// Multiply output is always a width of the sum of the widths of\n"
1240
        "\t// the two inputs.  ALWAYS.  This is independent of the number of\n"
1241
        "\t// bits in p_one, p_two, or p_three.  These values needed to \n"
1242
        "\t// accumulate a bit (or two) each.  However, this approach to a\n"
1243
        "\t// three multiply complex multiply cannot increase the total\n"
1244
        "\t// number of bits in our final output.  We\'ll take care of\n"
1245
        "\t// dropping back down to the proper width, OWIDTH, in our routine\n"
1246
        "\t// below.\n"
1247 2 dgisselq
"\n"
1248 5 dgisselq
"\n");
1249
        fprintf(fp,
1250
        "\t// We accomplish here \"Karatsuba\" multiplication.  That is,\n"
1251
        "\t// by doing three multiplies we accomplish the work of four.\n"
1252
        "\t// Let\'s prove to ourselves that this works ... We wish to\n"
1253
        "\t// multiply: (a+jb) * (c+jd), where a+jb is given by\n"
1254
        "\t//\ta + jb = r_dif_r + j r_dif_i, and\n"
1255
        "\t//\tc + jd = ir_coef_r + j ir_coef_i.\n"
1256
        "\t// We do this by calculating the intermediate products P1, P2,\n"
1257
        "\t// and P3 as\n"
1258
        "\t//\tP1 = ac\n"
1259
        "\t//\tP2 = bd\n"
1260
        "\t//\tP3 = (a + b) * (c + d)\n"
1261
        "\t// and then complete our final answer with\n"
1262
        "\t//\tac - bd = P1 - P2 (this checks)\n"
1263
        "\t//\tad + bc = P3 - P2 - P1\n"
1264
        "\t//\t        = (ac + bc + ad + bd) - bd - ac\n"
1265
        "\t//\t        = bc + ad (this checks)\n"
1266 2 dgisselq
"\n"
1267 5 dgisselq
"\n");
1268
        fprintf(fp,
1269
        "\t// This should really be based upon an IF, such as in\n"
1270
        "\t// if (IWIDTH < CWIDTH) then ...\n"
1271
        "\t// However, this is the only (other) way I know to do it.\n"
1272 2 dgisselq
        "\tgenerate\n"
1273
        "\tif (CWIDTH < IWIDTH+1)\n"
1274
        "\tbegin\n"
1275 22 dgisselq
                "\t\twire\t[(CWIDTH):0]\tp3c_in;\n"
1276
                "\t\twire\t[(IWIDTH+1):0]\tp3d_in;\n"
1277
                "\t\tassign\tp3c_in = ir_coef_i + ir_coef_r;\n"
1278
                "\t\tassign\tp3d_in = r_dif_r + r_dif_i;\n"
1279
                "\n"
1280 2 dgisselq
                "\t\t// We need to pad these first two multiplies by an extra\n"
1281 5 dgisselq
                "\t\t// bit just to keep them aligned with the third,\n"
1282
                "\t\t// simpler, multiply.\n"
1283 2 dgisselq
                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p1(i_clk, i_ce,\n"
1284
                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r},\n"
1285
                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r}, p_one);\n"
1286
                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p2(i_clk, i_ce,\n"
1287 5 dgisselq
                                "\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i},\n"
1288 2 dgisselq
                                "\t\t\t\t{r_dif_i[IWIDTH],r_dif_i}, p_two);\n"
1289
                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p3(i_clk, i_ce,\n"
1290 22 dgisselq
                        "\t\t\t\tp3c_in, p3d_in, p_three);\n"
1291 2 dgisselq
        "\tend else begin\n"
1292 22 dgisselq
                "\t\twire\t[(CWIDTH):0]\tp3c_in;\n"
1293
                "\t\twire\t[(IWIDTH+1):0]\tp3d_in;\n"
1294
                "\t\tassign\tp3c_in = ir_coef_i + ir_coef_r;\n"
1295
                "\t\tassign\tp3d_in = r_dif_r + r_dif_i;\n"
1296
                "\n"
1297 2 dgisselq
                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p1a(i_clk, i_ce,\n"
1298
                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r},\n"
1299
                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r}, p_one);\n"
1300
                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p2a(i_clk, i_ce,\n"
1301
                                "\t\t\t\t{r_dif_i[IWIDTH], r_dif_i},\n"
1302 5 dgisselq
                                "\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i}, p_two);\n"
1303 2 dgisselq
                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p3a(i_clk, i_ce,\n"
1304 22 dgisselq
                                "\t\t\t\tp3d_in, p3c_in, p_three);\n"
1305 2 dgisselq
        "\tend\n"
1306
        "\tendgenerate\n"
1307 5 dgisselq
"\n");
1308
        fprintf(fp,
1309
        "\t// These values are held in memory and delayed during the\n"
1310
        "\t// multiply.  Here, we recover them.  During the multiply,\n"
1311
        "\t// values were multiplied by 2^(CWIDTH-2)*exp{-j*2*pi*...},\n"
1312
        "\t// therefore, the left_x values need to be right shifted by\n"
1313
        "\t// CWIDTH-2 as well.  The additional bits come from a sign\n"
1314
        "\t// extension.\n"
1315
        "\twire\tsigned\t[(IWIDTH+CWIDTH):0]    fifo_i, fifo_r;\n"
1316 26 dgisselq
        "\treg\t\t[(2*IWIDTH+1):0]      fifo_read;\n"
1317
        "\tassign\tfifo_r = { {2{fifo_read[2*(IWIDTH+1)-1]}}, fifo_read[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1\'b0}} };\n"
1318
        "\tassign\tfifo_i = { {2{fifo_read[(IWIDTH+1)-1]}}, fifo_read[((IWIDTH+1)-1):0], {(CWIDTH-2){1\'b0}} };\n"
1319 2 dgisselq
"\n"
1320
"\n"
1321 23 dgisselq
        "\treg\tsigned\t[(OWIDTH-1):0]  b_left_r, b_left_i,\n"
1322 5 dgisselq
                        "\t\t\t\t\t\tb_right_r, b_right_i;\n"
1323
        "\treg\tsigned\t[(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i;\n"
1324
"\n");
1325
        fprintf(fp,
1326 23 dgisselq
        "\t// Let's do some rounding and remove unnecessary bits.\n"
1327 5 dgisselq
        "\t// We have (IWIDTH+CWIDTH+3) bits here, we need to drop down to\n"
1328
        "\t// OWIDTH, and SHIFT by SHIFT bits in the process.  The trick is\n"
1329
        "\t// that we don\'t need (IWIDTH+CWIDTH+3) bits.  We\'ve accumulated\n"
1330
        "\t// them, but the actual values will never fill all these bits.\n"
1331
        "\t// In particular, we only need:\n"
1332
        "\t//\t IWIDTH bits for the input\n"
1333
        "\t//\t     +1 bit for the add/subtract\n"
1334
        "\t//\t+CWIDTH bits for the coefficient multiply\n"
1335
        "\t//\t     +1 bit for the add/subtract in the complex multiply\n"
1336
        "\t//\t ------\n"
1337
        "\t//\t (IWIDTH+CWIDTH+2) bits at full precision.\n"
1338
        "\t//\n"
1339
        "\t// However, the coefficient multiply multiplied by a maximum value\n"
1340
        "\t// of 2^(CWIDTH-2).  Thus, we only have\n"
1341
        "\t//\t   IWIDTH bits for the input\n"
1342
        "\t//\t       +1 bit for the add/subtract\n"
1343
        "\t//\t+CWIDTH-2 bits for the coefficient multiply\n"
1344
        "\t//\t       +1 (optional) bit for the add/subtract in the cpx mpy.\n"
1345
        "\t//\t -------- ... multiply.  (This last bit may be shifted out.)\n"
1346
        "\t//\t (IWIDTH+CWIDTH) valid output bits. \n"
1347
        "\t// Now, if the user wants to keep any extras of these (via OWIDTH),\n"
1348
        "\t// or if he wishes to arbitrarily shift some of these off (via\n"
1349
        "\t// SHIFT) we accomplish that here.\n"
1350 23 dgisselq
"\n");
1351
        fprintf(fp,
1352
        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_left_r, rnd_left_i, rnd_right_r, rnd_right_i;\n\n");
1353
 
1354
        fprintf(fp,
1355 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_left_r(i_clk, i_ce,\n"
1356 23 dgisselq
        "\t\t\t\t{ {2{fifo_r[(IWIDTH+CWIDTH)]}}, fifo_r }, rnd_left_r);\n\n",
1357
                rnd_string);
1358
        fprintf(fp,
1359 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_left_i(i_clk, i_ce,\n"
1360 23 dgisselq
        "\t\t\t\t{ {2{fifo_i[(IWIDTH+CWIDTH)]}}, fifo_i }, rnd_left_i);\n\n",
1361
                rnd_string);
1362
        fprintf(fp,
1363 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_right_r(i_clk, i_ce,\n"
1364 23 dgisselq
        "\t\t\t\tmpy_r, rnd_right_r);\n\n", rnd_string);
1365
        fprintf(fp,
1366 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_right_i(i_clk, i_ce,\n"
1367 23 dgisselq
        "\t\t\t\tmpy_i, rnd_right_i);\n\n", rnd_string);
1368
        fprintf(fp,
1369
        "\talways @(posedge i_clk)\n"
1370
                "\t\tif (i_ce)\n"
1371
                "\t\tbegin\n"
1372
                        "\t\t\t// First clock, recover all values\n"
1373
                        "\t\t\tfifo_read <= fifo_left[fifo_read_addr];\n"
1374
                        "\t\t\t// These values are IWIDTH+CWIDTH+3 bits wide\n"
1375
                        "\t\t\t// although they only need to be (IWIDTH+1)\n"
1376
                        "\t\t\t// + (CWIDTH) bits wide.  (We\'ve got two\n"
1377
                        "\t\t\t// extra bits we need to get rid of.)\n"
1378
                        "\t\t\tmpy_r <= p_one - p_two;\n"
1379
                        "\t\t\tmpy_i <= p_three - p_one - p_two;\n"
1380 2 dgisselq
"\n"
1381 23 dgisselq
                        "\t\t\t// Second clock, round and latch for final clock\n"
1382
                        "\t\t\tb_right_r <= rnd_right_r;\n"
1383
                        "\t\t\tb_right_i <= rnd_right_i;\n"
1384
                        "\t\t\tb_left_r <= rnd_left_r;\n"
1385
                        "\t\t\tb_left_i <= rnd_left_i;\n"
1386 24 dgisselq
                "\t\tend\n"
1387
"\n");
1388 26 dgisselq
 
1389 24 dgisselq
        fprintf(fp,
1390 26 dgisselq
        "\treg\t[(AUXLEN-1):0]\taux_pipeline;\n"
1391
        "\tinitial\taux_pipeline = 0;\n"
1392
        "\talways @(posedge i_clk)\n"
1393
        "\t\tif (i_rst)\n"
1394
        "\t\t\taux_pipeline <= 0;\n"
1395
        "\t\telse if (i_ce)\n"
1396
        "\t\t\taux_pipeline <= { aux_pipeline[(AUXLEN-2):0], i_aux };\n"
1397
"\n");
1398
        fprintf(fp,
1399 25 dgisselq
        "\tinitial o_aux = 1\'b0;\n"
1400 24 dgisselq
        "\talways @(posedge i_clk)\n"
1401
                "\t\tif (i_rst)\n"
1402
                "\t\t\to_aux <= 1\'b0;\n"
1403
                "\t\telse if (i_ce)\n"
1404
                "\t\tbegin\n"
1405
                        "\t\t\t// Second clock, latch for final clock\n"
1406 26 dgisselq
                        "\t\t\to_aux <= aux_pipeline[AUXLEN-1];\n"
1407 23 dgisselq
                "\t\tend\n"
1408
"\n");
1409 24 dgisselq
 
1410 23 dgisselq
        fprintf(fp,
1411 5 dgisselq
        "\t// As a final step, we pack our outputs into two packed two\'s\n"
1412
        "\t// complement numbers per output word, so that each output word\n"
1413
        "\t// has (2*OWIDTH) bits in it, with the top half being the real\n"
1414
        "\t// portion and the bottom half being the imaginary portion.\n"
1415 23 dgisselq
        "\tassign       o_left = { rnd_left_r, rnd_left_i };\n"
1416
        "\tassign       o_right= { rnd_right_r,rnd_right_i};\n"
1417 2 dgisselq
"\n"
1418
"endmodule\n");
1419
        fclose(fp);
1420
}
1421
 
1422 23 dgisselq
void    build_hwbfly(const char *fname, int xtracbits, ROUND_T rounding) {
1423 22 dgisselq
        FILE    *fp = fopen(fname, "w");
1424
        if (NULL == fp) {
1425
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
1426
                perror("O/S Err was:");
1427
                return;
1428
        }
1429
 
1430 23 dgisselq
        const   char    *rnd_string;
1431
        if (rounding == RND_TRUNCATE)
1432
                rnd_string = "truncate";
1433
        else if (rounding == RND_FROMZERO)
1434
                rnd_string = "roundfromzero";
1435
        else if (rounding == RND_HALFUP)
1436
                rnd_string = "roundhalfup";
1437
        else
1438
                rnd_string = "convround";
1439
 
1440
 
1441 22 dgisselq
        fprintf(fp,
1442
"///////////////////////////////////////////////////////////////////////////\n"
1443
"//\n"
1444
"// Filename:   hwbfly.v\n"
1445
"//\n"
1446
"// Project:    %s\n"
1447
"//\n"
1448
"// Purpose:    This routine is identical to the butterfly.v routine found\n"
1449
"//             in 'butterfly.v', save only that it uses the verilog \n"
1450
"//             operator '*' in hopes that the synthesizer would be able\n"
1451
"//             to optimize it with hardware resources.\n"
1452
"//\n"
1453
"//             It is understood that a hardware multiply can complete its\n"
1454
"//             operation in a single clock.\n"
1455
"//\n"
1456
"//\n%s"
1457
"//\n", prjname, creator);
1458
        fprintf(fp, "%s", cpyleft);
1459
        fprintf(fp,
1460
"module hwbfly(i_clk, i_rst, i_ce, i_coef, i_left, i_right, i_aux,\n"
1461
                "\t\to_left, o_right, o_aux);\n"
1462
        "\t// Public changeable parameters ...\n"
1463
        "\tparameter IWIDTH=16,CWIDTH=IWIDTH+%d,OWIDTH=IWIDTH+1;\n"
1464
        "\t// Parameters specific to the core that should not be changed.\n"
1465 23 dgisselq
        "\tparameter\tSHIFT=0;\n"
1466 22 dgisselq
        "\tinput\t\ti_clk, i_rst, i_ce;\n"
1467
        "\tinput\t\t[(2*CWIDTH-1):0]\ti_coef;\n"
1468
        "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"
1469
        "\tinput\t\ti_aux;\n"
1470
        "\toutput\twire\t[(2*OWIDTH-1):0]\to_left, o_right;\n"
1471
        "\toutput\treg\to_aux;\n"
1472
"\n", xtracbits);
1473
        fprintf(fp,
1474
        "\twire\t[(OWIDTH-1):0] o_left_r, o_left_i, o_right_r, o_right_i;\n"
1475
"\n"
1476
        "\treg\t[(2*IWIDTH-1):0]        r_left, r_right;\n"
1477
        "\treg\t                        r_aux, r_aux_2;\n"
1478
        "\treg\t[(2*CWIDTH-1):0]        r_coef, r_coef_2;\n"
1479
        "\twire signed  [(IWIDTH-1):0]  r_left_r, r_left_i, r_right_r, r_right_i;\n"
1480
        "\tassign\tr_left_r  = r_left[ (2*IWIDTH-1):(IWIDTH)];\n"
1481
        "\tassign\tr_left_i  = r_left[ (IWIDTH-1):0];\n"
1482
        "\tassign\tr_right_r = r_right[(2*IWIDTH-1):(IWIDTH)];\n"
1483
        "\tassign\tr_right_i = r_right[(IWIDTH-1):0];\n"
1484 26 dgisselq
        "\treg  signed  [(CWIDTH-1):0]  ir_coef_r, ir_coef_i;\n"
1485 22 dgisselq
"\n"
1486
        "\treg  signed  [(IWIDTH):0]    r_sum_r, r_sum_i, r_dif_r, r_dif_i;\n"
1487
"\n"
1488
        "\treg  [(2*IWIDTH+2):0]        leftv, leftvv;\n"
1489
"\n"
1490
        "\t// Set up the input to the multiply\n"
1491 25 dgisselq
        "\tinitial r_aux   = 1\'b0;\n"
1492
        "\tinitial r_aux_2 = 1\'b0;\n"
1493 22 dgisselq
        "\talways @(posedge i_clk)\n"
1494 25 dgisselq
                "\t\tif (i_rst)\n"
1495
                "\t\tbegin\n"
1496 26 dgisselq
                        "\t\t\tr_aux <= 1\'b0;\n"
1497
                        "\t\t\tr_aux_2 <= 1\'b0;\n"
1498 25 dgisselq
                "\t\tend else if (i_ce)\n"
1499
                "\t\tbegin\n"
1500
                        "\t\t\t// One clock just latches the inputs\n"
1501 26 dgisselq
                        "\t\t\tr_aux <= i_aux;\n"
1502
                        "\t\t\t// Next clock adds/subtracts\n"
1503
                        "\t\t\t// Other inputs are simply delayed on second clock\n"
1504
                        "\t\t\tr_aux_2 <= r_aux;\n"
1505
                "\t\tend\n"
1506
        "\talways @(posedge i_clk)\n"
1507
                "\t\tif (i_ce)\n"
1508
                "\t\tbegin\n"
1509
                        "\t\t\t// One clock just latches the inputs\n"
1510 25 dgisselq
                        "\t\t\tr_left <= i_left;        // No change in # of bits\n"
1511
                        "\t\t\tr_right <= i_right;\n"
1512
                        "\t\t\tr_coef  <= i_coef;\n"
1513
                        "\t\t\t// Next clock adds/subtracts\n"
1514
                        "\t\t\tr_sum_r <= r_left_r + r_right_r; // Now IWIDTH+1 bits\n"
1515
                        "\t\t\tr_sum_i <= r_left_i + r_right_i;\n"
1516
                        "\t\t\tr_dif_r <= r_left_r - r_right_r;\n"
1517
                        "\t\t\tr_dif_i <= r_left_i - r_right_i;\n"
1518
                        "\t\t\t// Other inputs are simply delayed on second clock\n"
1519 26 dgisselq
                        "\t\t\tir_coef_r <= r_coef[(2*CWIDTH-1):CWIDTH];\n"
1520
                        "\t\t\tir_coef_i <= r_coef[(CWIDTH-1):0];\n"
1521 25 dgisselq
                "\t\tend\n"
1522 22 dgisselq
        "\n\n");
1523
        fprintf(fp,
1524
"\t// See comments in the butterfly.v source file for a discussion of\n"
1525
"\t// these operations and the appropriate bit widths.\n\n");
1526
        fprintf(fp,
1527 26 dgisselq
        "\treg\tsigned  [((IWIDTH+1)+(CWIDTH)-1):0]     p_one, p_two;\n"
1528
        "\treg\tsigned  [((IWIDTH+2)+(CWIDTH+1)-1):0]   p_three;\n"
1529 22 dgisselq
"\n"
1530 26 dgisselq
        "\treg\tsigned  [(CWIDTH-1):0]  p1c_in, p2c_in; // Coefficient multiply inputs\n"
1531
        "\treg\tsigned  [(IWIDTH):0]    p1d_in, p2d_in; // Data multiply inputs\n"
1532
        "\treg\tsigned  [(CWIDTH):0]    p3c_in; // Product 3, coefficient input\n"
1533
        "\treg\tsigned  [(IWIDTH+1):0]  p3d_in; // Product 3, data input\n"
1534 22 dgisselq
"\n"
1535 25 dgisselq
        "\tinitial leftv    = 0;\n"
1536
        "\tinitial leftvv   = 0;\n"
1537 22 dgisselq
        "\talways @(posedge i_clk)\n"
1538
        "\tbegin\n"
1539
                "\t\tif (i_rst)\n"
1540
                "\t\tbegin\n"
1541
                        "\t\t\tleftv <= 0;\n"
1542
                        "\t\t\tleftvv <= 0;\n"
1543 26 dgisselq
                "\t\tend else if (i_ce)\n"
1544 22 dgisselq
                "\t\tbegin\n"
1545
                        "\t\t\t// Second clock, pipeline = 1\n"
1546 26 dgisselq
                        "\t\t\tleftv <= { r_aux_2, r_sum_r, r_sum_i };\n"
1547
"\n"
1548
                        "\t\t\t// Third clock, pipeline = 3\n"
1549
                        "\t\t\t//   As desired, each of these lines infers a DSP48\n"
1550
                        "\t\t\tleftvv <= leftv;\n"
1551
                "\t\tend\n"
1552
        "\tend\n"
1553
"\n"
1554
        "\talways @(posedge i_clk)\n"
1555
                "\t\tif (i_ce)\n"
1556
                "\t\tbegin\n"
1557
                        "\t\t\t// Second clock, pipeline = 1\n"
1558
                        "\t\t\tp1c_in <= ir_coef_r;\n"
1559
                        "\t\t\tp2c_in <= ir_coef_i;\n"
1560
                        "\t\t\tp1d_in <= r_dif_r;\n"
1561
                        "\t\t\tp2d_in <= r_dif_i;\n"
1562 22 dgisselq
                        "\t\t\tp3c_in <= ir_coef_i + ir_coef_r;\n"
1563
                        "\t\t\tp3d_in <= r_dif_r + r_dif_i;\n"
1564 23 dgisselq
"\n"
1565
"\n"
1566 22 dgisselq
                        "\t\t\t// Third clock, pipeline = 3\n"
1567 26 dgisselq
                        "\t\t\t//   As desired, each of these lines infers a DSP48\n"
1568 22 dgisselq
                        "\t\t\tp_one   <= p1c_in * p1d_in;\n"
1569
                        "\t\t\tp_two   <= p2c_in * p2d_in;\n"
1570
                        "\t\t\tp_three <= p3c_in * p3d_in;\n"
1571 26 dgisselq
                "\t\tend\n"
1572 22 dgisselq
"\n"
1573 26 dgisselq
        "\twire\tsigned [((IWIDTH+2)+(CWIDTH+1)-1):0]   w_one, w_two;\n"
1574
        "\tassign\tw_one = { {(2){p_one[((IWIDTH+1)+(CWIDTH)-1)]}}, p_one };\n"
1575
        "\tassign\tw_two = { {(2){p_two[((IWIDTH+1)+(CWIDTH)-1)]}}, p_two };\n"
1576 22 dgisselq
"\n");
1577
 
1578
        fprintf(fp,
1579
        "\t// These values are held in memory and delayed during the\n"
1580
        "\t// multiply.  Here, we recover them.  During the multiply,\n"
1581
        "\t// values were multiplied by 2^(CWIDTH-2)*exp{-j*2*pi*...},\n"
1582
        "\t// therefore, the left_x values need to be right shifted by\n"
1583
        "\t// CWIDTH-2 as well.  The additional bits come from a sign\n"
1584
        "\t// extension.\n"
1585 24 dgisselq
        "\twire\taux_s;\n"
1586 22 dgisselq
        "\twire\tsigned\t[(IWIDTH+CWIDTH):0]    left_si, left_sr;\n"
1587
        "\treg\t\t[(2*IWIDTH+2):0]      left_saved;\n"
1588 26 dgisselq
        "\tassign\tleft_sr = { {2{left_saved[2*(IWIDTH+1)-1]}}, left_saved[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1\'b0}} };\n"
1589
        "\tassign\tleft_si = { {2{left_saved[(IWIDTH+1)-1]}}, left_saved[((IWIDTH+1)-1):0], {(CWIDTH-2){1\'b0}} };\n"
1590 22 dgisselq
        "\tassign\taux_s = left_saved[2*IWIDTH+2];\n"
1591
"\n"
1592
"\n"
1593 26 dgisselq
        "\t(* use_dsp48=\"no\" *)\n"
1594 23 dgisselq
        "\treg  signed  [(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i;\n");
1595
        fprintf(fp,
1596
        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_left_r, rnd_left_i, rnd_right_r, rnd_right_i;\n\n");
1597 22 dgisselq
 
1598
        fprintf(fp,
1599 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+1,OWIDTH,SHIFT+2) do_rnd_left_r(i_clk, i_ce,\n"
1600
        "\t\t\t\tleft_sr, rnd_left_r);\n\n",
1601 23 dgisselq
                rnd_string);
1602
        fprintf(fp,
1603 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+1,OWIDTH,SHIFT+2) do_rnd_left_i(i_clk, i_ce,\n"
1604
        "\t\t\t\tleft_si, rnd_left_i);\n\n",
1605 23 dgisselq
                rnd_string);
1606
        fprintf(fp,
1607 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_right_r(i_clk, i_ce,\n"
1608 23 dgisselq
        "\t\t\t\tmpy_r, rnd_right_r);\n\n", rnd_string);
1609
        fprintf(fp,
1610 26 dgisselq
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_right_i(i_clk, i_ce,\n"
1611 23 dgisselq
        "\t\t\t\tmpy_i, rnd_right_i);\n\n", rnd_string);
1612
 
1613
        fprintf(fp,
1614 25 dgisselq
        "\tinitial left_saved = 0;\n"
1615
        "\tinitial o_aux      = 1\'b0;\n"
1616 22 dgisselq
        "\talways @(posedge i_clk)\n"
1617
        "\t\tif (i_rst)\n"
1618
        "\t\tbegin\n"
1619
                "\t\t\tleft_saved <= 0;\n"
1620 26 dgisselq
                "\t\t\to_aux <= 1\'b0;\n"
1621 22 dgisselq
        "\t\tend else if (i_ce)\n"
1622
        "\t\tbegin\n"
1623
                "\t\t\t// First clock, recover all values\n"
1624
                "\t\t\tleft_saved <= leftvv;\n"
1625 26 dgisselq
"\n"
1626
                "\t\t\t// Second clock, round and latch for final clock\n"
1627
                "\t\t\to_aux <= aux_s;\n"
1628
        "\t\tend\n"
1629
        "\talways @(posedge i_clk)\n"
1630
        "\t\tif (i_ce)\n"
1631
        "\t\tbegin\n"
1632 22 dgisselq
                "\t\t\t// These values are IWIDTH+CWIDTH+3 bits wide\n"
1633
                "\t\t\t// although they only need to be (IWIDTH+1)\n"
1634
                "\t\t\t// + (CWIDTH) bits wide.  (We've got two\n"
1635
                "\t\t\t// extra bits we need to get rid of.)\n"
1636 26 dgisselq
                "\n"
1637
                "\t\t\t// These two lines also infer DSP48\'s.\n"
1638
                "\t\t\t// To keep from using extra DSP48 resources,\n"
1639
                "\t\t\t// they are prevented from using DSP48\'s\n"
1640
                "\t\t\t// by the (* use_dsp48 ... *) comment above.\n"
1641
                "\t\t\tmpy_r <= w_one - w_two;\n"
1642
                "\t\t\tmpy_i <= p_three - w_one - w_two;\n"
1643 22 dgisselq
        "\t\tend\n"
1644
        "\n");
1645
 
1646
        fprintf(fp,
1647
        "\t// As a final step, we pack our outputs into two packed two's\n"
1648
        "\t// complement numbers per output word, so that each output word\n"
1649
        "\t// has (2*OWIDTH) bits in it, with the top half being the real\n"
1650
        "\t// portion and the bottom half being the imaginary portion.\n"
1651 23 dgisselq
        "\tassign\to_left = { rnd_left_r, rnd_left_i };\n"
1652
        "\tassign\to_right= { rnd_right_r,rnd_right_i};\n"
1653 22 dgisselq
"\n"
1654
"endmodule\n");
1655
 
1656
}
1657
 
1658 26 dgisselq
void    build_stage(const char *fname, const char *coredir, int stage, bool odd, int nbits, bool inv, int xtra, bool hwmpy=false, bool dbg=false) {
1659 2 dgisselq
        FILE    *fstage = fopen(fname, "w");
1660
        int     cbits = nbits + xtra;
1661
 
1662
        if ((cbits * 2) >= sizeof(long long)*8) {
1663
                fprintf(stderr, "ERROR: CMEM Coefficient precision requested overflows long long data type.\n");
1664
                exit(-1);
1665
        }
1666
 
1667
        if (fstage == NULL) {
1668
                fprintf(stderr, "ERROR: Could not open %s for writing!\n", fname);
1669
                perror("O/S Err was:");
1670
                fprintf(stderr, "Attempting to continue, but this file will be missing.\n");
1671
                return;
1672
        }
1673
 
1674
        fprintf(fstage,
1675
"////////////////////////////////////////////////////////////////////////////\n"
1676
"//\n"
1677 26 dgisselq
"// Filename:   %sfftstage_%c%d%s.v\n"
1678 2 dgisselq
"//\n"
1679
"// Project:    %s\n"
1680
"//\n"
1681
"// Purpose:    This file is (almost) a Verilog source file.  It is meant to\n"
1682
"//             be used by a FFT core compiler to generate FFTs which may be\n"
1683
"//             used as part of an FFT core.  Specifically, this file \n"
1684
"//             encapsulates the options of an FFT-stage.  For any 2^N length\n"
1685
"//             FFT, there shall be (N-1) of these stages.  \n"
1686
"//\n%s"
1687
"//\n",
1688 26 dgisselq
                (inv)?"i":"", (odd)?'o':'e', stage*2, (dbg)?"_dbg":"", prjname, creator);
1689 2 dgisselq
        fprintf(fstage, "%s", cpyleft);
1690 26 dgisselq
        fprintf(fstage, "module\t%sfftstage_%c%d%s(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync%s);\n",
1691
                (inv)?"i":"", (odd)?'o':'e', stage*2, (dbg)?"_dbg":"",
1692
                (dbg)?", o_dbg":"");
1693 2 dgisselq
        // These parameter values are useless at this point--they are to be
1694
        // replaced by the parameter values in the calling program.  Only
1695
        // problem is, the CWIDTH needs to match exactly!
1696
        fprintf(fstage, "\tparameter\tIWIDTH=%d,CWIDTH=%d,OWIDTH=%d;\n",
1697
                nbits, cbits, nbits+1);
1698
        fprintf(fstage,
1699
"\t// Parameters specific to the core that should be changed when this\n"
1700
"\t// core is built ... Note that the minimum LGSPAN (the base two log\n"
1701
"\t// of the span, or the base two log of the current FFT size) is 3.\n"
1702
"\t// Smaller spans (i.e. the span of 2) must use the dblstage module.\n"
1703 6 dgisselq
"\tparameter\tLGWIDTH=11, LGSPAN=9, LGBDLY=5, BFLYSHIFT=0;\n");
1704 2 dgisselq
        fprintf(fstage,
1705
"\tinput                                        i_clk, i_rst, i_ce, i_sync;\n"
1706
"\tinput                [(2*IWIDTH-1):0]        i_data;\n"
1707
"\toutput       reg     [(2*OWIDTH-1):0]        o_data;\n"
1708
"\toutput       reg                             o_sync;\n"
1709 26 dgisselq
"\n");
1710
        if (dbg) { fprintf(fstage, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"
1711
                "\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2*OWIDTH-1):(2*OWIDTH-16)],\n"
1712
                        "\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n"
1713
"\n");
1714
        }
1715
        fprintf(fstage,
1716 2 dgisselq
"\treg  wait_for_sync;\n"
1717
"\treg  [(2*IWIDTH-1):0]        ib_a, ib_b;\n"
1718
"\treg  [(2*CWIDTH-1):0]        ib_c;\n"
1719 8 dgisselq
"\treg  ib_sync;\n"
1720 2 dgisselq
"\n"
1721
"\treg  b_started;\n"
1722
"\twire ob_sync;\n"
1723 23 dgisselq
"\twire [(2*OWIDTH-1):0]\tob_a, ob_b;\n");
1724 2 dgisselq
        fprintf(fstage,
1725
"\n"
1726
"\t// %scmem is defined as an array of real and complex values,\n"
1727
"\t// where the top CWIDTH bits are the real value and the bottom\n"
1728
"\t// CWIDTH bits are the imaginary value.\n"
1729
"\t//\n"
1730 24 dgisselq
"\t// %scmem[i] = { (2^(CWIDTH-2)) * cos(2*pi*i/(2^LGWIDTH)),\n"
1731 2 dgisselq
"\t//           (2^(CWIDTH-2)) * sin(2*pi*i/(2^LGWIDTH)) };\n"
1732
"\t//\n"
1733
"\treg  [(2*CWIDTH-1):0]        %scmem [0:((1<<LGSPAN)-1)];\n"
1734
"\tinitial\t$readmemh(\"%scmem_%c%d.hex\",%scmem);\n\n",
1735 24 dgisselq
                (inv)?"i":"", (inv)?"i":"", (inv)?"i":"",
1736
                (inv)?"i":"", (odd)?'o':'e',stage<<1, (inv)?"i":"");
1737 2 dgisselq
        {
1738
                FILE    *cmem;
1739
 
1740 14 dgisselq
                {
1741
                        char    *memfile, *ptr;
1742
 
1743
                        memfile = new char[strlen(fname)+128];
1744
                        strcpy(memfile, fname);
1745
                        if ((NULL != (ptr = strrchr(memfile, '/')))&&(ptr>memfile)) {
1746
                                ptr++;
1747
                                sprintf(ptr, "%scmem_%c%d.hex", (inv)?"i":"", (odd)?'o':'e', stage*2);
1748
                        } else {
1749
                                sprintf(memfile, "%s/%scmem_%c%d.hex",
1750 26 dgisselq
                                        coredir, (inv)?"i":"",
1751 14 dgisselq
                                        (odd)?'o':'e', stage*2);
1752
                        }
1753
                        // strcpy(&memfile[strlen(memfile)-2], ".hex");
1754
                        cmem = fopen(memfile, "w");
1755
                        if (NULL == cmem) {
1756
                                fprintf(stderr, "Could not open/write \'%s\' with FFT coefficients.\n", memfile);
1757
                                perror("Err from O/S:");
1758
                                exit(-2);
1759
                        }
1760
 
1761
                        delete[] memfile;
1762 2 dgisselq
                }
1763
                // fprintf(cmem, "// CBITS = %d, inv = %s\n", cbits, (inv)?"true":"false");
1764
                for(int i=0; i<stage/2; i++) {
1765
                        int k = 2*i+odd;
1766 9 dgisselq
                        double  W = ((inv)?1:-1)*2.0*M_PI*k/(double)(2*stage);
1767 2 dgisselq
                        double  c, s;
1768
                        long long ic, is, vl;
1769
 
1770
                        c = cos(W); s = sin(W);
1771 20 dgisselq
                        ic = (long long)round((1ll<<(cbits-2)) * c);
1772
                        is = (long long)round((1ll<<(cbits-2)) * s);
1773 2 dgisselq
                        vl = (ic & (~(-1ll << (cbits))));
1774
                        vl <<= (cbits);
1775
                        vl |= (is & (~(-1ll << (cbits))));
1776
                        fprintf(cmem, "%0*llx\n", ((cbits*2+3)/4), vl);
1777
                        /*
1778
                        fprintf(cmem, "%0*llx\t\t// %f+j%f -> %llx +j%llx\n",
1779
                                ((cbits*2+3)/4), vl, c, s,
1780
                                ic & (~(-1ll<<(((cbits+3)/4)*4))),
1781
                                is & (~(-1ll<<(((cbits+3)/4)*4))));
1782
                        */
1783
                } fclose(cmem);
1784
        }
1785
 
1786
        fprintf(fstage,
1787 6 dgisselq
"\treg  [(LGWIDTH-2):0]         iaddr;\n"
1788 2 dgisselq
"\treg  [(2*IWIDTH-1):0]        imem    [0:((1<<LGSPAN)-1)];\n"
1789
"\n"
1790 8 dgisselq
"\treg  [LGSPAN:0]              oB;\n"
1791 2 dgisselq
"\treg  [(2*OWIDTH-1):0]        omem    [0:((1<<LGSPAN)-1)];\n"
1792
"\n"
1793 25 dgisselq
"\tinitial wait_for_sync = 1\'b1;\n"
1794
"\tinitial iaddr = 0;\n"
1795 2 dgisselq
"\talways @(posedge i_clk)\n"
1796
        "\t\tif (i_rst)\n"
1797
        "\t\tbegin\n"
1798 26 dgisselq
                "\t\t\twait_for_sync <= 1\'b1;\n"
1799 2 dgisselq
                "\t\t\tiaddr <= 0;\n"
1800
        "\t\tend\n"
1801
        "\t\telse if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"
1802
        "\t\tbegin\n"
1803
                "\t\t\t//\n"
1804
                "\t\t\t// First step: Record what we\'re not ready to use yet\n"
1805
                "\t\t\t//\n"
1806 25 dgisselq
                "\t\t\tiaddr <= iaddr + { {(LGWIDTH-2){1\'b0}}, 1\'b1 };\n"
1807 26 dgisselq
                "\t\t\twait_for_sync <= 1\'b0;\n"
1808
        "\t\tend\n"
1809
"\talways @(posedge i_clk) // Need to make certain here that we don\'t read\n"
1810
        "\t\tif ((i_ce)&&(~iaddr[LGSPAN])) // and write the same address on\n"
1811
                "\t\t\timem[iaddr[(LGSPAN-1):0]] <= i_data; // the same clk\n"
1812
        "\n");
1813 23 dgisselq
 
1814
        fprintf(fstage,
1815
        "\t//\n"
1816
        "\t// Now, we have all the inputs, so let\'s feed the butterfly\n"
1817
        "\t//\n"
1818 25 dgisselq
        "\tinitial ib_sync = 1\'b0;\n"
1819 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
1820 26 dgisselq
                "\t\tif (i_rst)\n"
1821
                        "\t\t\tib_sync <= 1\'b0;\n"
1822
                "\t\telse if ((i_ce)&&(iaddr[LGSPAN]))\n"
1823
                        "\t\t\tbegin\n"
1824
                                "\t\t\t\t// Set the sync to true on the very first\n"
1825
                                "\t\t\t\t// valid input in, and hence on the very\n"
1826
                                "\t\t\t\t// first valid data out per FFT.\n"
1827
                                "\t\t\t\tib_sync <= (iaddr==(1<<(LGSPAN)));\n"
1828
                        "\t\t\tend\n"
1829 24 dgisselq
        "\talways\t@(posedge i_clk)\n"
1830 26 dgisselq
                "\t\tif ((i_ce)&&(iaddr[LGSPAN]))\n"
1831
                "\t\t\tbegin\n"
1832
                        "\t\t\t\t// One input from memory, ...\n"
1833
                        "\t\t\t\tib_a <= imem[iaddr[(LGSPAN-1):0]];\n"
1834
                        "\t\t\t\t// One input clocked in from the top\n"
1835
                        "\t\t\t\tib_b <= i_data;\n"
1836
                        "\t\t\t\t// and the coefficient or twiddle factor\n"
1837
                        "\t\t\t\tib_c <= %scmem[iaddr[(LGSPAN-1):0]];\n"
1838
                "\t\t\tend\n\n", (inv)?"i":"");
1839 23 dgisselq
 
1840
        if (hwmpy) {
1841
                fprintf(fstage,
1842
        "\thwbfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),\n"
1843
                        "\t\t\t.SHIFT(BFLYSHIFT))\n"
1844
                "\t\tbfly(i_clk, i_rst, i_ce, ib_c,\n"
1845
                        "\t\t\tib_a, ib_b, ib_sync, ob_a, ob_b, ob_sync);\n");
1846
        } else {
1847
        fprintf(fstage,
1848
        "\tbutterfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),\n"
1849
                "\t\t\t.MPYDELAY(%d\'d%d),.LGDELAY(LGBDLY),.SHIFT(BFLYSHIFT))\n"
1850
        "\t\tbfly(i_clk, i_rst, i_ce, ib_c,\n"
1851
                "\t\t\tib_a, ib_b, ib_sync, ob_a, ob_b, ob_sync);\n",
1852
                        lgdelay(nbits, xtra), bflydelay(nbits, xtra));
1853
        }
1854
 
1855
        fprintf(fstage,
1856
        "\t//\n"
1857
        "\t// Next step: recover the outputs from the butterfly\n"
1858
        "\t//\n"
1859 25 dgisselq
        "\tinitial oB        = 0;\n"
1860
        "\tinitial o_sync    = 0;\n"
1861
        "\tinitial b_started = 0;\n"
1862 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
1863
        "\t\tif (i_rst)\n"
1864
        "\t\tbegin\n"
1865
                "\t\t\toB <= 0;\n"
1866
                "\t\t\to_sync <= 0;\n"
1867
                "\t\t\tb_started <= 0;\n"
1868
        "\t\tend else if (i_ce)\n"
1869
        "\t\tbegin\n"
1870 26 dgisselq
        "\t\t\to_sync <= (~oB[LGSPAN])?ob_sync : 1\'b0;\n"
1871
        "\t\t\tif (ob_sync||b_started)\n"
1872
                "\t\t\t\toB <= oB + { {(LGSPAN){1\'b0}}, 1\'b1 };\n"
1873
        "\t\t\tif ((ob_sync)&&(~oB[LGSPAN]))\n"
1874
                "\t\t\t// A butterfly output is available\n"
1875
                        "\t\t\t\tb_started <= 1\'b1;\n"
1876 23 dgisselq
        "\t\tend\n\n");
1877 26 dgisselq
        fprintf(fstage,
1878
        "\treg  [(LGSPAN-1):0]\t\tdly_addr;\n"
1879
        "\treg  [(2*OWIDTH-1):0]\tdly_value;\n"
1880
        "\talways @(posedge i_clk)\n"
1881
        "\t\tif (i_ce)\n"
1882
        "\t\tbegin\n"
1883
        "\t\t\tdly_addr <= oB[(LGSPAN-1):0];\n"
1884
        "\t\t\tdly_value <= ob_b;\n"
1885
        "\t\tend\n"
1886
        "\talways @(posedge i_clk)\n"
1887
        "\t\tif (i_ce)\n"
1888
                "\t\t\tomem[dly_addr] <= dly_value;\n"
1889
"\n");
1890
        fprintf(fstage,
1891
        "\talways @(posedge i_clk)\n"
1892
        "\t\tif (i_ce)\n"
1893
        "\t\t\to_data <= (~oB[LGSPAN])?ob_a : omem[oB[(LGSPAN-1):0]];\n"
1894
"\n");
1895 22 dgisselq
        fprintf(fstage, "endmodule\n");
1896 2 dgisselq
}
1897
 
1898
void    usage(void) {
1899
        fprintf(stderr,
1900 26 dgisselq
"USAGE:\tfftgen [-f <size>] [-d dir] [-c cbits] [-n nbits] [-m mxbits] [-s]\n"
1901 2 dgisselq
// "\tfftgen -i\n"
1902 26 dgisselq
"\t-1\tBuild a normal FFT, running at one clock per complex sample, or (for\n"
1903
"\t\ta real FFT) at one clock per two real input samples.\n"
1904 2 dgisselq
"\t-c <cbits>\tCauses all internal complex coefficients to be\n"
1905
"\t\tlonger than the corresponding data bits, to help avoid\n"
1906 26 dgisselq
"\t\tcoefficient truncation errors.  The default is %d bits lnoger\n"
1907
"\t\tthan the data bits.\n"
1908 2 dgisselq
"\t-d <dir>\tPlaces all of the generated verilog files into <dir>.\n"
1909 26 dgisselq
"\t\tThe default is a subdirectory of the current directory named %s.\n"
1910 2 dgisselq
"\t-f <size>\tSets the size of the FFT as the number of complex\n"
1911 26 dgisselq
"\t\tsamples input to the transform.  (No default value, this is\n"
1912
"\t\ta required parameter.)\n"
1913
"\t-i\tAn inverse FFT, meaning that the coefficients are\n"
1914
"\t\tgiven by e^{ j 2 pi k/N n }.  The default is a forward FFT, with\n"
1915
"\t\tcoefficients given by e^{ -j 2 pi k/N n }.\n"
1916 2 dgisselq
"\t-m <mxbits>\tSets the maximum bit width that the FFT should ever\n"
1917
"\t\tproduce.  Internal values greater than this value will be\n"
1918 26 dgisselq
"\t\ttruncated to this value.  (The default value grows the input\n"
1919
"\t\tsize by one bit for every two FFT stages.)\n"
1920 22 dgisselq
"\t-n <nbits>\tSets the bitwidth for values coming into the (i)FFT.\n"
1921 26 dgisselq
"\t\tThe default is %d bits input for each component of the two\n"
1922
"\t\tcomplex values into the FFT.\n"
1923 22 dgisselq
"\t-p <nmpy>\tSets the number of stages that will use any hardware \n"
1924
"\t\tmultiplication facility, instead of shift-add emulation.\n"
1925 26 dgisselq
"\t\tThree multiplies per butterfly, or six multiplies per stage will\n"
1926
"\t\tbe accelerated in this fashion.  The default is not to use any\n"
1927
"\t\thardware multipliers.\n"
1928
"\t-r\tBuild a real-FFT at four input points per sample, rather than a\n"
1929
"\t\tcomplex FFT.  (Default is a Complex FFT.)\n"
1930 2 dgisselq
"\t-s\tSkip the final bit reversal stage.  This is useful in\n"
1931
"\t\talgorithms that need to apply a filter without needing to do\n"
1932
"\t\tbin shifting, as these algorithms can, with this option, just\n"
1933
"\t\tmultiply by a bit reversed correlation sequence and then\n"
1934 22 dgisselq
"\t\tinverse FFT the (still bit reversed) result.  (You would need\n"
1935
"\t\ta decimation in time inverse to do this, which this program does\n"
1936
"\t\tnot yet provide.)\n"
1937 2 dgisselq
"\t-S\tInclude the final bit reversal stage (default).\n"
1938 22 dgisselq
"\t-x <xtrabits>\tUse this many extra bits internally, before any final\n"
1939
"\t\trounding or truncation of the answer to the final number of bits.\n"
1940 26 dgisselq
"\t\tThe default is to use %d extra bits internally.\n",
1941
/*
1942 2 dgisselq
"\t-0\tA forward FFT (default), meaning that the coefficients are\n"
1943
"\t\tgiven by e^{-j 2 pi k/N n }.\n"
1944
"\t-1\tAn inverse FFT, meaning that the coefficients are\n"
1945 26 dgisselq
"\t\tgiven by e^{ j 2 pi k/N n }.\n",
1946
*/
1947
        DEF_XTRACBITS, DEF_COREDIR, DEF_NBITSIN, DEF_XTRAPBITS);
1948 2 dgisselq
}
1949
 
1950
// Features still needed:
1951
//      Interactivity.
1952
int main(int argc, char **argv) {
1953
        int     fftsize = -1, lgsize = -1;
1954 26 dgisselq
        int     nbitsin = DEF_NBITSIN, xtracbits = DEF_XTRACBITS,
1955
                        nummpy=DEF_NMPY, nonmpy=2;
1956
        int     nbitsout, maxbitsout = -1, xtrapbits=DEF_XTRAPBITS;
1957
        bool    bitreverse = true, inverse=false,
1958
                verbose_flag = false, single_clock = false,
1959
                real_fft = false;
1960 2 dgisselq
        FILE    *vmain;
1961 26 dgisselq
        std::string     coredir = DEF_COREDIR, cmdline = "";
1962 23 dgisselq
        ROUND_T rounding = RND_CONVERGENT;
1963
        // ROUND_T      rounding = RND_HALFUP;
1964 2 dgisselq
 
1965 26 dgisselq
        bool    dbg = false;
1966
        int     dbgstage = 128;
1967
 
1968 2 dgisselq
        if (argc <= 1)
1969
                usage();
1970
 
1971 14 dgisselq
        cmdline = argv[0];
1972 2 dgisselq
        for(int argn=1; argn<argc; argn++) {
1973 14 dgisselq
                cmdline += " ";
1974
                cmdline += argv[argn];
1975
        }
1976
 
1977
        for(int argn=1; argn<argc; argn++) {
1978 2 dgisselq
                if ('-' == argv[argn][0]) {
1979
                        for(int j=1; (argv[argn][j])&&(j<100); j++) {
1980
                                switch(argv[argn][j]) {
1981 26 dgisselq
                                        /*
1982 2 dgisselq
                                        case '0':
1983
                                                inverse = false;
1984
                                                break;
1985 26 dgisselq
                                        */
1986 2 dgisselq
                                        case '1':
1987 26 dgisselq
                                                single_clock = true;
1988 2 dgisselq
                                                break;
1989
                                        case 'c':
1990
                                                if (argn+1 >= argc) {
1991 19 dgisselq
                                                        printf("ERR: No extra number of coefficient bits given!\n\n");
1992 2 dgisselq
                                                        usage(); exit(-1);
1993
                                                }
1994
                                                xtracbits = atoi(argv[++argn]);
1995
                                                j+= 200;
1996
                                                break;
1997
                                        case 'd':
1998
                                                if (argn+1 >= argc) {
1999 19 dgisselq
                                                        printf("ERR: No directory given into which to place the core!\n\n");
2000 2 dgisselq
                                                        usage(); exit(-1);
2001
                                                }
2002 14 dgisselq
                                                coredir = argv[++argn];
2003 2 dgisselq
                                                j += 200;
2004
                                                break;
2005 26 dgisselq
                                        case 'D':
2006
                                                dbg = true;
2007
                                                if (argn+1 >= argc) {
2008
                                                        printf("ERR: No debug stage number given!\n\n");
2009
                                                        usage(); exit(-1);
2010
                                                }
2011
                                                dbgstage = atoi(argv[++argn]);
2012
                                                j+= 200;
2013
                                                break;
2014 2 dgisselq
                                        case 'f':
2015
                                                if (argn+1 >= argc) {
2016 19 dgisselq
                                                        printf("ERR: No FFT Size given!\n\n");
2017 2 dgisselq
                                                        usage(); exit(-1);
2018
                                                }
2019
                                                fftsize = atoi(argv[++argn]);
2020
                                                { int sln = strlen(argv[argn]);
2021
                                                if (!isdigit(argv[argn][sln-1])){
2022
                                                        switch(argv[argn][sln-1]) {
2023
                                                        case 'k': case 'K':
2024
                                                                fftsize <<= 10;
2025
                                                                break;
2026
                                                        case 'm': case 'M':
2027
                                                                fftsize <<= 20;
2028
                                                                break;
2029
                                                        case 'g': case 'G':
2030
                                                                fftsize <<= 30;
2031
                                                                break;
2032
                                                        default:
2033 19 dgisselq
                                                                printf("ERR: Unknown FFT size, %s!\n", argv[argn]);
2034 2 dgisselq
                                                                exit(-1);
2035
                                                        }
2036
                                                }}
2037
                                                j += 200;
2038
                                                break;
2039
                                        case 'h':
2040
                                                usage();
2041
                                                exit(0);
2042
                                                break;
2043
                                        case 'i':
2044 26 dgisselq
                                                inverse = true;
2045 2 dgisselq
                                                break;
2046
                                        case 'm':
2047
                                                if (argn+1 >= argc) {
2048 19 dgisselq
                                                        printf("ERR: No maximum output bit value given!\n\n");
2049 2 dgisselq
                                                        exit(-1);
2050
                                                }
2051
                                                maxbitsout = atoi(argv[++argn]);
2052
                                                j += 200;
2053
                                                break;
2054
                                        case 'n':
2055
                                                if (argn+1 >= argc) {
2056 19 dgisselq
                                                        printf("ERR: No input bit size given!\n\n");
2057 2 dgisselq
                                                        exit(-1);
2058
                                                }
2059
                                                nbitsin = atoi(argv[++argn]);
2060
                                                j += 200;
2061
                                                break;
2062 22 dgisselq
                                        case 'p':
2063
                                                if (argn+1 >= argc) {
2064
                                                        printf("ERR: No number given for number of hardware multiply stages!\n\n");
2065
                                                        exit(-1);
2066
                                                }
2067
                                                nummpy = atoi(argv[++argn]);
2068
                                                j += 200;
2069
                                                break;
2070 26 dgisselq
                                        case 'r':
2071
                                                real_fft = true;
2072
                                                break;
2073 2 dgisselq
                                        case 'S':
2074
                                                bitreverse = true;
2075
                                                break;
2076
                                        case 's':
2077
                                                bitreverse = false;
2078
                                                break;
2079 19 dgisselq
                                        case 'x':
2080
                                                if (argn+1 >= argc) {
2081
                                                        printf("ERR: No extra number of bits given!\n\n");
2082
                                                        usage(); exit(-1);
2083
                                                } j+= 200;
2084
                                                xtrapbits = atoi(argv[++argn]);
2085
                                                break;
2086 2 dgisselq
                                        case 'v':
2087
                                                verbose_flag = true;
2088
                                                break;
2089
                                        default:
2090
                                                printf("Unknown argument, -%c\n", argv[argn][j]);
2091
                                                usage();
2092
                                                exit(-1);
2093
                                }
2094
                        }
2095
                } else {
2096
                        printf("Unrecognized argument, %s\n", argv[argn]);
2097
                        usage();
2098
                        exit(-1);
2099
                }
2100
        }
2101
 
2102 26 dgisselq
        if (real_fft) {
2103
                printf("The real FFT option is not implemented yet, but still on\nmy to do list.  Please try again later.\n");
2104
                exit(0);
2105
        } if (single_clock) {
2106
                printf("The single clock FFT option is not implemented yet, but still on\nmy to do list.  Please try again later.\n");
2107
                exit(0);
2108
        } if (!bitreverse) {
2109
                printf("WARNING: While I can skip the bit reverse stage, the code to do\n");
2110
                printf("an inverse FFT on a bit--reversed input has not yet been\n");
2111
                printf("built.\n");
2112
        }
2113
 
2114 2 dgisselq
        if ((lgsize < 0)&&(fftsize > 1)) {
2115
                for(lgsize=1; (1<<lgsize) < fftsize; lgsize++)
2116
                        ;
2117
        }
2118
 
2119
        if ((fftsize <= 0)||(nbitsin < 1)||(nbitsin>48)) {
2120
                printf("INVALID PARAMETERS!!!!\n");
2121
                exit(-1);
2122
        }
2123
 
2124
 
2125
        if (nextlg(fftsize) != fftsize) {
2126
                fprintf(stderr, "ERR: FFTSize (%d) *must* be a power of two\n",
2127
                                fftsize);
2128
                exit(-1);
2129
        } else if (fftsize < 2) {
2130
                fprintf(stderr, "ERR: Minimum FFTSize is 2, not %d\n",
2131
                                fftsize);
2132
                if (fftsize == 1) {
2133
                        fprintf(stderr, "You do realize that a 1 point FFT makes very little sense\n");
2134
                        fprintf(stderr, "in an FFT operation that handles two samples per clock?\n");
2135
                        fprintf(stderr, "If you really need to do an FFT of this size, the output\n");
2136
                        fprintf(stderr, "can be connected straight to the input.\n");
2137
                } else {
2138
                        fprintf(stderr, "Indeed, a size of %d doesn\'t make much sense to me at all.\n", fftsize);
2139
                        fprintf(stderr, "Is such an operation even defined?\n");
2140
                }
2141
                exit(-1);
2142
        }
2143
 
2144
        // Calculate how many output bits we'll have, and what the log
2145
        // based two size of our FFT is.
2146
        {
2147
                int     tmp_size = fftsize;
2148
 
2149
                // The first stage always accumulates one bit, regardless
2150
                // of whether you need to or not.
2151
                nbitsout = nbitsin + 1;
2152
                tmp_size >>= 1;
2153
 
2154
                while(tmp_size > 4) {
2155
                        nbitsout += 1;
2156
                        tmp_size >>= 2;
2157
                }
2158
 
2159
                if (tmp_size > 1)
2160
                        nbitsout ++;
2161
 
2162
                if (fftsize <= 2)
2163
                        bitreverse = false;
2164
        } if ((maxbitsout > 0)&&(nbitsout > maxbitsout))
2165
                nbitsout = maxbitsout;
2166
 
2167 22 dgisselq
        // Figure out how many multiply stages to use, and how many to skip
2168
        {
2169
                int     lgv = lgval(fftsize);
2170 2 dgisselq
 
2171 22 dgisselq
                nonmpy = lgv - nummpy;
2172
                if (nonmpy < 2) nonmpy = 2;
2173
                nummpy = lgv - nonmpy;
2174
        }
2175
 
2176 2 dgisselq
        {
2177
                struct stat     sbuf;
2178 14 dgisselq
                if (lstat(coredir.c_str(), &sbuf)==0) {
2179 2 dgisselq
                        if (!S_ISDIR(sbuf.st_mode)) {
2180 14 dgisselq
                                fprintf(stderr, "\'%s\' already exists, and is not a directory!\n", coredir.c_str());
2181 2 dgisselq
                                fprintf(stderr, "I will stop now, lest I overwrite something you care about.\n");
2182
                                fprintf(stderr, "To try again, please remove this file.\n");
2183
                                exit(-1);
2184
                        }
2185
                } else
2186 14 dgisselq
                        mkdir(coredir.c_str(), 0755);
2187
                if (access(coredir.c_str(), X_OK|W_OK) != 0) {
2188
                        fprintf(stderr, "I have no access to the directory \'%s\'.\n", coredir.c_str());
2189 2 dgisselq
                        exit(-1);
2190
                }
2191
        }
2192
 
2193 14 dgisselq
        {
2194
                std::string     fname_string;
2195
 
2196
                fname_string = coredir;
2197
                fname_string += "/";
2198
                if (inverse) fname_string += "i";
2199
                fname_string += "fftmain.v";
2200
 
2201
                vmain = fopen(fname_string.c_str(), "w");
2202
                if (NULL == vmain) {
2203
                        fprintf(stderr, "Could not open \'%s\' for writing\n", fname_string.c_str());
2204
                        perror("Err from O/S:");
2205
                        exit(-1);
2206
                }
2207 2 dgisselq
        }
2208
 
2209
        fprintf(vmain, "/////////////////////////////////////////////////////////////////////////////\n");
2210
        fprintf(vmain, "//\n");
2211
        fprintf(vmain, "// Filename:    %sfftmain.v\n", (inverse)?"i":"");
2212
        fprintf(vmain, "//\n");
2213
        fprintf(vmain, "// Project:     %s\n", prjname);
2214
        fprintf(vmain, "//\n");
2215
        fprintf(vmain, "// Purpose:     This is the main module in the Doubletime FPGA FFT project.\n");
2216
        fprintf(vmain, "//              As such, all other modules are subordinate to this one.\n");
2217
        fprintf(vmain, "//              (I have been reading too much legalese this week ...)\n");
2218
        fprintf(vmain, "//              This module accomplish a fixed size Complex FFT on %d data\n", fftsize);
2219
        fprintf(vmain, "//              points.  The FFT is fully pipelined, and accepts as inputs\n");
2220
        fprintf(vmain, "//              two complex two\'s complement samples per clock.\n");
2221
        fprintf(vmain, "//\n");
2222
        fprintf(vmain, "// Parameters:\n");
2223
        fprintf(vmain, "//      i_clk\tThe clock.  All operations are synchronous with this clock.\n");
2224
        fprintf(vmain, "//\ti_rst\tSynchronous reset, active high.  Setting this line will\n");
2225
        fprintf(vmain, "//\t\t\tforce the reset of all of the internals to this routine.\n");
2226
        fprintf(vmain, "//\t\t\tFurther, following a reset, the o_sync line will go\n");
2227
        fprintf(vmain, "//\t\t\thigh the same time the first output sample is valid.\n");
2228
        fprintf(vmain, "//      i_ce\tA clock enable line.  If this line is set, this module\n");
2229
        fprintf(vmain, "//\t\t\twill accept two complex values as inputs, and produce\n");
2230
        fprintf(vmain, "//\t\t\ttwo (possibly empty) complex values as outputs.\n");
2231
        fprintf(vmain, "//\t\ti_left\tThe first of two complex input samples.  This value\n");
2232
        fprintf(vmain, "//\t\t\tis split into two two\'s complement numbers, of \n");
2233
        fprintf(vmain, "//\t\t\t%d bits each, with the real portion in the high\n", nbitsin);
2234
        fprintf(vmain, "//\t\t\torder bits, and the imaginary portion taking the\n");
2235
        fprintf(vmain, "//\t\t\tbottom %d bits.\n", nbitsin);
2236
        fprintf(vmain, "//\t\ti_right\tThis is the same thing as i_left, only this is the\n");
2237
        fprintf(vmain, "//\t\t\tsecond of two such samples.  Hence, i_left would\n");
2238
        fprintf(vmain, "//\t\t\tcontain input sample zero, i_right would contain\n");
2239
        fprintf(vmain, "//\t\t\tsample one.  On the next clock i_left would contain\n");
2240
        fprintf(vmain, "//\t\t\tinput sample two, i_right number three and so forth.\n");
2241
        fprintf(vmain, "//\t\to_left\tThe first of two output samples, of the same\n");
2242
        fprintf(vmain, "//\t\t\tformat as i_left, only having %d bits for each of\n", nbitsout);
2243
        fprintf(vmain, "//\t\t\tthe real and imaginary components, leading to %d\n", nbitsout*2);
2244
        fprintf(vmain, "//\t\t\tbits total.\n");
2245
        fprintf(vmain, "//\t\to_right\tThe second of two output samples produced each clock.\n");
2246
        fprintf(vmain, "//\t\t\tThis has the same format as o_left.\n");
2247
        fprintf(vmain, "//\t\to_sync\tA one bit output indicating the first valid sample\n");
2248
        fprintf(vmain, "//\t\t\tproduced by this FFT following a reset.  Ever after,\n");
2249
        fprintf(vmain, "//\t\t\tthis will indicate the first sample of an FFT frame.\n");
2250
        fprintf(vmain, "//\n");
2251 14 dgisselq
        fprintf(vmain, "// Arguments:\tThis file was computer generated using the\n");
2252
        fprintf(vmain, "//\t\tfollowing command line:\n");
2253
        fprintf(vmain, "//\n");
2254
        fprintf(vmain, "//\t\t%% %s\n", cmdline.c_str());
2255
        fprintf(vmain, "//\n");
2256 2 dgisselq
        fprintf(vmain, "%s", creator);
2257
        fprintf(vmain, "//\n");
2258
        fprintf(vmain, "%s", cpyleft);
2259
 
2260
 
2261
        fprintf(vmain, "//\n");
2262
        fprintf(vmain, "//\n");
2263
        fprintf(vmain, "module %sfftmain(i_clk, i_rst, i_ce,\n", (inverse)?"i":"");
2264
        fprintf(vmain, "\t\ti_left, i_right,\n");
2265 26 dgisselq
        fprintf(vmain, "\t\to_left, o_right, o_sync%s);\n",
2266
                        (dbg)?", o_dbg":"");
2267 2 dgisselq
        fprintf(vmain, "\tparameter\tIWIDTH=%d, OWIDTH=%d, LGWIDTH=%d;\n", nbitsin, nbitsout, lgsize);
2268
        assert(lgsize > 0);
2269
        fprintf(vmain, "\tinput\t\ti_clk, i_rst, i_ce;\n");
2270
        fprintf(vmain, "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n");
2271
        fprintf(vmain, "\toutput\treg\t[(2*OWIDTH-1):0]\to_left, o_right;\n");
2272
        fprintf(vmain, "\toutput\treg\t\t\to_sync;\n");
2273 26 dgisselq
        if (dbg)
2274
                fprintf(vmain, "\toutput\twire\t[33:0]\t\to_dbg;\n");
2275 2 dgisselq
        fprintf(vmain, "\n\n");
2276
 
2277
        fprintf(vmain, "\t// Outputs of the FFT, ready for bit reversal.\n");
2278
        fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_left, br_right;\n");
2279
        fprintf(vmain, "\n\n");
2280
 
2281
        int     tmp_size = fftsize, lgtmp = lgsize;
2282
        if (fftsize == 2) {
2283
                if (bitreverse) {
2284
                        fprintf(vmain, "\treg\tbr_start;\n");
2285 25 dgisselq
                        fprintf(vmain, "\tinitial br_start = 1\'b0;\n");
2286 2 dgisselq
                        fprintf(vmain, "\talways @(posedge i_clk)\n");
2287
                        fprintf(vmain, "\t\tif (i_rst)\n");
2288 26 dgisselq
                        fprintf(vmain, "\t\t\tbr_start <= 1\'b0;\n");
2289 2 dgisselq
                        fprintf(vmain, "\t\telse if (i_ce)\n");
2290 26 dgisselq
                        fprintf(vmain, "\t\t\tbr_start <= 1\'b1;\n");
2291 2 dgisselq
                }
2292
                fprintf(vmain, "\n\n");
2293 6 dgisselq
                fprintf(vmain, "\tdblstage\t#(IWIDTH)\tstage_2(i_clk, i_rst, i_ce,\n");
2294
                fprintf(vmain, "\t\t\t(~i_rst), i_left, i_right, br_left, br_right);\n");
2295 2 dgisselq
                fprintf(vmain, "\n\n");
2296
        } else {
2297
                int     nbits = nbitsin, dropbit=0;
2298 26 dgisselq
                int     obits = nbits+1+xtrapbits;
2299
 
2300
                if ((maxbitsout > 0)&&(obits > maxbitsout))
2301
                        obits = maxbitsout;
2302
 
2303 2 dgisselq
                // Always do a first stage
2304
                fprintf(vmain, "\n\n");
2305
                fprintf(vmain, "\twire\t\tw_s%d, w_os%d;\n", fftsize, fftsize);
2306 26 dgisselq
                fprintf(vmain, "\twire\t[%d:0]\tw_e%d, w_o%d;\n", 2*(obits+xtrapbits)-1, fftsize, fftsize);
2307
                fprintf(vmain, "\t%sfftstage_e%d%s\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,%d,0)\tstage_e%d(i_clk, i_rst, i_ce,\n",
2308 2 dgisselq
                        (inverse)?"i":"", fftsize,
2309 26 dgisselq
                                ((dbg)&&(dbgstage == fftsize))?"_dbg":"",
2310
                        xtracbits, obits+xtrapbits,
2311 2 dgisselq
                        lgsize, lgtmp-2, lgdelay(nbits,xtracbits),
2312
                        fftsize);
2313 26 dgisselq
                fprintf(vmain, "\t\t\t(~i_rst), i_left, w_e%d, w_s%d%s);\n", fftsize, fftsize, ((dbg)&&(dbgstage == fftsize))?", o_dbg":"");
2314 19 dgisselq
                fprintf(vmain, "\t%sfftstage_o%d\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,%d,0)\tstage_o%d(i_clk, i_rst, i_ce,\n",
2315 2 dgisselq
                        (inverse)?"i":"", fftsize,
2316 26 dgisselq
                        xtracbits, obits+xtrapbits,
2317 2 dgisselq
                        lgsize, lgtmp-2, lgdelay(nbits,xtracbits),
2318
                        fftsize);
2319 9 dgisselq
                fprintf(vmain, "\t\t\t(~i_rst), i_right, w_o%d, w_os%d);\n", fftsize, fftsize);
2320 2 dgisselq
                fprintf(vmain, "\n\n");
2321
 
2322 14 dgisselq
                {
2323
                        std::string     fname;
2324
                        char    numstr[12];
2325 22 dgisselq
                        bool    mpystage;
2326 2 dgisselq
 
2327 22 dgisselq
                        // Last two stages are always non-multiply stages
2328
                        // since the multiplies can be done by adds
2329
                        mpystage = ((lgtmp-2) <= nummpy);
2330
 
2331 14 dgisselq
                        fname = coredir + "/";
2332
                        if (inverse) fname += "i";
2333
                        fname += "fftstage_e";
2334
                        sprintf(numstr, "%d", fftsize);
2335
                        fname += numstr;
2336 26 dgisselq
                        if ((dbg)&&(dbgstage == fftsize))
2337
                                fname += "_dbg";
2338 14 dgisselq
                        fname += ".v";
2339 26 dgisselq
                        build_stage(fname.c_str(), coredir.c_str(), fftsize/2, 0, nbits, inverse, xtracbits, mpystage, (dbg)&&(dbgstage == fftsize));    // Even stage
2340 14 dgisselq
 
2341
                        fname = coredir + "/";
2342
                        if (inverse) fname += "i";
2343
                        fname += "fftstage_o";
2344
                        sprintf(numstr, "%d", fftsize);
2345
                        fname += numstr;
2346
                        fname += ".v";
2347 26 dgisselq
                        build_stage(fname.c_str(), coredir.c_str(), fftsize/2, 1, nbits, inverse, xtracbits, mpystage, false);  // Odd  stage
2348 14 dgisselq
                }
2349
 
2350 26 dgisselq
                nbits = obits;  // New number of input bits
2351 2 dgisselq
                tmp_size >>= 1; lgtmp--;
2352
                dropbit = 0;
2353
                fprintf(vmain, "\n\n");
2354
                while(tmp_size >= 8) {
2355 26 dgisselq
                        obits = nbits+((dropbit)?0:1);
2356 2 dgisselq
 
2357
                        if ((maxbitsout > 0)&&(obits > maxbitsout))
2358
                                obits = maxbitsout;
2359
 
2360
                        fprintf(vmain, "\twire\t\tw_s%d, w_os%d;\n", tmp_size, tmp_size);
2361 19 dgisselq
                        fprintf(vmain, "\twire\t[%d:0]\tw_e%d, w_o%d;\n", 2*(obits+xtrapbits)-1, tmp_size, tmp_size);
2362 26 dgisselq
                        fprintf(vmain, "\t%sfftstage_e%d%s\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_e%d(i_clk, i_rst, i_ce,\n",
2363 2 dgisselq
                                (inverse)?"i":"", tmp_size,
2364 26 dgisselq
                                ((dbg)&&(dbgstage == tmp_size))?"_dbg":"",
2365 19 dgisselq
                                nbits+xtrapbits, nbits+xtracbits+xtrapbits, obits+xtrapbits,
2366
                                lgsize, lgtmp-2, lgdelay(nbits+xtrapbits,xtracbits), (dropbit)?0:0,
2367 2 dgisselq
                                tmp_size);
2368 26 dgisselq
                        fprintf(vmain, "\t\t\t\t\t\tw_s%d, w_e%d, w_e%d, w_s%d%s);\n", tmp_size<<1, tmp_size<<1, tmp_size, tmp_size, ((dbg)&&(dbgstage == tmp_size))?", o_dbg":"");
2369 2 dgisselq
                        fprintf(vmain, "\t%sfftstage_o%d\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_o%d(i_clk, i_rst, i_ce,\n",
2370
                                (inverse)?"i":"", tmp_size,
2371 19 dgisselq
                                nbits+xtrapbits, nbits+xtracbits+xtrapbits, obits+xtrapbits,
2372
                                lgsize, lgtmp-2, lgdelay(nbits+xtrapbits,xtracbits), (dropbit)?0:0,
2373 2 dgisselq
                                tmp_size);
2374
                        fprintf(vmain, "\t\t\t\t\t\tw_s%d, w_o%d, w_o%d, w_os%d);\n", tmp_size<<1, tmp_size<<1, tmp_size, tmp_size);
2375
                        fprintf(vmain, "\n\n");
2376
 
2377 14 dgisselq
                        {
2378
                                std::string     fname;
2379
                                char            numstr[12];
2380 22 dgisselq
                                bool            mpystage;
2381 2 dgisselq
 
2382 22 dgisselq
                                mpystage = ((lgtmp-2) <= nummpy);
2383
 
2384 14 dgisselq
                                fname = coredir + "/";
2385
                                if (inverse) fname += "i";
2386
                                fname += "fftstage_e";
2387
                                sprintf(numstr, "%d", tmp_size);
2388
                                fname += numstr;
2389 26 dgisselq
                                if ((dbg)&&(dbgstage == tmp_size))
2390
                                        fname += "_dbg";
2391 14 dgisselq
                                fname += ".v";
2392 26 dgisselq
                                build_stage(fname.c_str(), coredir.c_str(), tmp_size/2, 0,
2393 22 dgisselq
                                        nbits+xtrapbits, inverse, xtracbits,
2394 26 dgisselq
                                        mpystage, ((dbg)&&(dbgstage == tmp_size)));     // Even stage
2395 2 dgisselq
 
2396 14 dgisselq
                                fname = coredir + "/";
2397
                                if (inverse) fname += "i";
2398
                                fname += "fftstage_o";
2399
                                sprintf(numstr, "%d", tmp_size);
2400
                                fname += numstr;
2401
                                fname += ".v";
2402 26 dgisselq
                                build_stage(fname.c_str(), coredir.c_str(), tmp_size/2, 1,
2403 22 dgisselq
                                        nbits+xtrapbits, inverse, xtracbits,
2404 26 dgisselq
                                        mpystage, false);       // Odd  stage
2405 14 dgisselq
                        }
2406
 
2407
 
2408 2 dgisselq
                        dropbit ^= 1;
2409
                        nbits = obits;
2410
                        tmp_size >>= 1; lgtmp--;
2411
                }
2412
 
2413
                if (tmp_size == 4) {
2414 26 dgisselq
                        obits = nbits+((dropbit)?0:1);
2415 2 dgisselq
 
2416
                        if ((maxbitsout > 0)&&(obits > maxbitsout))
2417
                                obits = maxbitsout;
2418
 
2419
                        fprintf(vmain, "\twire\t\tw_s4, w_os4;\n");
2420 19 dgisselq
                        fprintf(vmain, "\twire\t[%d:0]\tw_e4, w_o4;\n", 2*(obits+xtrapbits)-1);
2421 26 dgisselq
                        fprintf(vmain, "\tqtrstage%s\t#(%d,%d,%d,0,%d,%d)\tstage_e4(i_clk, i_rst, i_ce,\n",
2422
                                ((dbg)&&(dbgstage==4))?"_dbg":"",
2423
                                nbits+xtrapbits, obits+xtrapbits, lgsize,
2424
                                (inverse)?1:0, (dropbit)?0:0);
2425
                        fprintf(vmain, "\t\t\t\t\t\tw_s8, w_e8, w_e4, w_s4%s);\n",
2426
                                ((dbg)&&(dbgstage==4))?", o_dbg":"");
2427 2 dgisselq
                        fprintf(vmain, "\tqtrstage\t#(%d,%d,%d,1,%d,%d)\tstage_o4(i_clk, i_rst, i_ce,\n",
2428 19 dgisselq
                                nbits+xtrapbits, obits+xtrapbits, lgsize, (inverse)?1:0, (dropbit)?0:0);
2429 6 dgisselq
                        fprintf(vmain, "\t\t\t\t\t\tw_s8, w_o8, w_o4, w_os4);\n");
2430 2 dgisselq
                        dropbit ^= 1;
2431
                        nbits = obits;
2432
                        tmp_size >>= 1; lgtmp--;
2433
                }
2434
 
2435
                {
2436 26 dgisselq
                        obits = nbits+((dropbit)?0:1);
2437 2 dgisselq
                        if (obits > nbitsout)
2438
                                obits = nbitsout;
2439
                        if ((maxbitsout>0)&&(obits > maxbitsout))
2440
                                obits = maxbitsout;
2441
                        fprintf(vmain, "\twire\t\tw_s2;\n");
2442
                        fprintf(vmain, "\twire\t[%d:0]\tw_e2, w_o2;\n", 2*obits-1);
2443 19 dgisselq
                        fprintf(vmain, "\tdblstage\t#(%d,%d,%d)\tstage_2(i_clk, i_rst, i_ce,\n", nbits+xtrapbits, obits,(dropbit)?0:1);
2444 6 dgisselq
                        fprintf(vmain, "\t\t\t\t\tw_s4, w_e4, w_o4, w_e2, w_o2, w_s2);\n");
2445 2 dgisselq
 
2446
                        fprintf(vmain, "\n\n");
2447
                        nbits = obits;
2448
                }
2449
 
2450
                fprintf(vmain, "\t// Prepare for a (potential) bit-reverse stage.\n");
2451
                fprintf(vmain, "\tassign\tbr_left  = w_e2;\n");
2452
                fprintf(vmain, "\tassign\tbr_right = w_o2;\n");
2453
                fprintf(vmain, "\n");
2454
                if (bitreverse) {
2455
                        fprintf(vmain, "\twire\tbr_start;\n");
2456
                        fprintf(vmain, "\treg\tr_br_started;\n");
2457 25 dgisselq
                        fprintf(vmain, "\tinitial\tr_br_started = 1\'b0;\n");
2458 2 dgisselq
                        fprintf(vmain, "\talways @(posedge i_clk)\n");
2459
                        fprintf(vmain, "\t\tif (i_rst)\n");
2460 26 dgisselq
                        fprintf(vmain, "\t\t\tr_br_started <= 1\'b0;\n");
2461
                        fprintf(vmain, "\t\telse if (i_ce)\n");
2462 23 dgisselq
                        fprintf(vmain, "\t\t\tr_br_started <= r_br_started || w_s2;\n");
2463
                        fprintf(vmain, "\tassign\tbr_start = r_br_started || w_s2;\n");
2464 2 dgisselq
                }
2465
        }
2466
 
2467
        fprintf(vmain, "\n");
2468
        fprintf(vmain, "\t// Now for the bit-reversal stage.\n");
2469
        fprintf(vmain, "\twire\tbr_sync;\n");
2470
        fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_o_left, br_o_right;\n");
2471
        if (bitreverse) {
2472
                fprintf(vmain, "\tdblreverse\t#(%d,%d)\trevstage(i_clk, i_rst,\n", lgsize, nbitsout);
2473
                fprintf(vmain, "\t\t\t(i_ce & br_start), br_left, br_right,\n");
2474
                fprintf(vmain, "\t\t\tbr_o_left, br_o_right, br_sync);\n");
2475
        } else {
2476
                fprintf(vmain, "\tassign\tbr_o_left  = br_left;\n");
2477
                fprintf(vmain, "\tassign\tbr_o_right = br_right;\n");
2478
                fprintf(vmain, "\tassign\tbr_sync    = w_s2;\n");
2479
        }
2480
 
2481
        fprintf(vmain, "\n\n");
2482
        fprintf(vmain, "\t// Last clock: Register our outputs, we\'re done.\n");
2483 26 dgisselq
        fprintf(vmain, "\tinitial\to_sync  = 1\'b0;\n");
2484 2 dgisselq
        fprintf(vmain, "\talways @(posedge i_clk)\n");
2485 26 dgisselq
        fprintf(vmain, "\t\tif (i_rst)\n");
2486
        fprintf(vmain, "\t\t\to_sync  <= 1\'b0;\n");
2487
        fprintf(vmain, "\t\telse if (i_ce)\n");
2488
        fprintf(vmain, "\t\t\to_sync  <= br_sync;\n");
2489
        fprintf(vmain, "\n");
2490
        fprintf(vmain, "\talways @(posedge i_clk)\n");
2491
        fprintf(vmain, "\t\tif (i_ce)\n");
2492 2 dgisselq
        fprintf(vmain, "\t\tbegin\n");
2493
        fprintf(vmain, "\t\t\to_left  <= br_o_left;\n");
2494
        fprintf(vmain, "\t\t\to_right <= br_o_right;\n");
2495
        fprintf(vmain, "\t\tend\n");
2496
        fprintf(vmain, "\n\n");
2497
        fprintf(vmain, "endmodule\n");
2498
        fclose(vmain);
2499
 
2500 14 dgisselq
        {
2501
                std::string     fname;
2502 2 dgisselq
 
2503 14 dgisselq
                fname = coredir + "/butterfly.v";
2504 23 dgisselq
                build_butterfly(fname.c_str(), xtracbits, rounding);
2505 2 dgisselq
 
2506 22 dgisselq
                if (nummpy > 0) {
2507
                        fname = coredir + "/hwbfly.v";
2508 23 dgisselq
                        build_hwbfly(fname.c_str(), xtracbits, rounding);
2509 22 dgisselq
                }
2510
 
2511 14 dgisselq
                fname = coredir + "/shiftaddmpy.v";
2512
                build_multiply(fname.c_str());
2513 2 dgisselq
 
2514 26 dgisselq
                if ((dbg)&&(dbgstage == 4)) {
2515
                        fname = coredir + "/qtrstage_dbg.v";
2516
                        build_quarters(fname.c_str(), rounding, true);
2517
                }
2518 14 dgisselq
                fname = coredir + "/qtrstage.v";
2519 26 dgisselq
                build_quarters(fname.c_str(), rounding, false);
2520 2 dgisselq
 
2521 26 dgisselq
                if ((dbg)&&(dbgstage == 2))
2522
                        fname = coredir + "/dblstage_dbg.v";
2523
                else
2524
                        fname = coredir + "/dblstage.v";
2525
                build_dblstage(fname.c_str(), rounding, (dbg)&&(dbgstage==2));
2526 14 dgisselq
 
2527
                if (bitreverse) {
2528
                        fname = coredir + "/dblreverse.v";
2529
                        build_dblreverse(fname.c_str());
2530
                }
2531 23 dgisselq
 
2532
                const   char    *rnd_string = "";
2533
                switch(rounding) {
2534
                        case RND_TRUNCATE:      rnd_string = "/truncate.v"; break;
2535
                        case RND_FROMZERO:      rnd_string = "/roundfromzero.v"; break;
2536
                        case RND_HALFUP:        rnd_string = "/roundhalfup.v"; break;
2537
                        default:
2538
                                rnd_string = "/convround.v"; break;
2539
                } fname = coredir + rnd_string;
2540
                switch(rounding) {
2541
                        case RND_TRUNCATE: build_truncator(fname.c_str()); break;
2542
                        case RND_FROMZERO: build_roundfromzero(fname.c_str()); break;
2543
                        case RND_HALFUP: build_roundhalfup(fname.c_str()); break;
2544
                        default:
2545
                                build_convround(fname.c_str()); break;
2546
                }
2547
 
2548 2 dgisselq
        }
2549
}
2550
 
2551 16 dgisselq
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.