OpenCores
URL https://opencores.org/ocsvn/dblclockfft/dblclockfft/trunk

Subversion Repositories dblclockfft

[/] [dblclockfft/] [trunk/] [sw/] [fftgen.cpp] - Blame information for rev 25

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 16 dgisselq
/////////////////////////////////////////////////////////////////////////////
2
//
3 24 dgisselq
// Filename:    fftgen.cpp
4 16 dgisselq
//
5
// Project:     A Doubletime Pipelined FFT
6
//
7
// Purpose:     This is the core generator for the project.  Every part
8
//              and piece of this project begins and ends in this program.
9
//              Once built, this program will build an FFT (or IFFT) core
10
//              of arbitrary width, precision, etc., that will run at
11
//              two samples per clock.  (Incidentally, I didn't pick two
12
//              samples per clock because it was easier, but rather because
13
//              there weren't any two-sample per clock FFT's posted on 
14
//              opencores.com.  Further, FFT's running at one sample per
15
//              clock aren't that hard to find.)
16
//
17
//              You can find the documentation for this program in two places.
18
//              One is in the usage() function below.  The second is in the
19
//              'doc'uments directory that comes with this package, 
20
//              specifically in the spec.pdf file.  If it's not there, type
21
//              make in the documents directory to build it.
22
//
23
// Creator:     Dan Gisselquist, Ph.D.
24
//              Gisselquist Tecnology, LLC
25
//
26
///////////////////////////////////////////////////////////////////////////
27
//
28
// Copyright (C) 2015, Gisselquist Technology, LLC
29
//
30
// This program is free software (firmware): you can redistribute it and/or
31
// modify it under the terms of  the GNU General Public License as published
32
// by the Free Software Foundation, either version 3 of the License, or (at
33
// your option) any later version.
34
//
35
// This program is distributed in the hope that it will be useful, but WITHOUT
36
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
37
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
38
// for more details.
39
//
40
// You should have received a copy of the GNU General Public License along
41
// with this program.  (It's in the $(ROOT)/doc directory, run make with no
42
// target there if the PDF file isn't present.)  If not, see
43
// <http://www.gnu.org/licenses/> for a copy.
44
//
45
// License:     GPL, v3, as defined and found on www.gnu.org,
46
//              http://www.gnu.org/licenses/gpl.html
47
//
48
//
49
///////////////////////////////////////////////////////////////////////////
50
//
51
//
52 2 dgisselq
#include <stdio.h>
53
#include <stdlib.h>
54
#include <unistd.h>
55
#include <sys/stat.h>
56
#include <string.h>
57 14 dgisselq
#include <string>
58 2 dgisselq
#include <math.h>
59
#include <ctype.h>
60
#include <assert.h>
61
 
62
#define COREDIR "fft-core"
63
 
64 23 dgisselq
typedef enum {
65
        RND_TRUNCATE, RND_FROMZERO, RND_HALFUP, RND_CONVERGENT
66
} ROUND_T;
67
 
68 2 dgisselq
const char      cpyleft[] =
69
"///////////////////////////////////////////////////////////////////////////\n"
70
"//\n"
71
"// Copyright (C) 2015, Gisselquist Technology, LLC\n"
72
"//\n"
73
"// This program is free software (firmware): you can redistribute it and/or\n"
74
"// modify it under the terms of  the GNU General Public License as published\n"
75
"// by the Free Software Foundation, either version 3 of the License, or (at\n"
76
"// your option) any later version.\n"
77
"//\n"
78
"// This program is distributed in the hope that it will be useful, but WITHOUT\n"
79
"// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or\n"
80
"// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n"
81
"// for more details.\n"
82
"//\n"
83
"// You should have received a copy of the GNU General Public License along\n"
84 5 dgisselq
"// with this program.  (It's in the $(ROOT)/doc directory, run make with no\n"
85
"// target there if the PDF file isn\'t present.)  If not, see\n"
86
"// <http://www.gnu.org/licenses/> for a copy.\n"
87
"//\n"
88 2 dgisselq
"// License:    GPL, v3, as defined and found on www.gnu.org,\n"
89
"//             http://www.gnu.org/licenses/gpl.html\n"
90
"//\n"
91
"//\n"
92
"///////////////////////////////////////////////////////////////////////////\n";
93 14 dgisselq
const char      prjname[] = "A Doubletime Pipelined FFT";
94 2 dgisselq
const char      creator[] =     "// Creator:    Dan Gisselquist, Ph.D.\n"
95
                                "//             Gisselquist Tecnology, LLC\n";
96
 
97
int     lgval(int vl) {
98
        int     lg;
99
 
100
        for(lg=1; (1<<lg) < vl; lg++)
101
                ;
102
        return lg;
103
}
104
 
105
int     nextlg(int vl) {
106
        int     r;
107
 
108
        for(r=1; r<vl; r<<=1)
109
                ;
110
        return r;
111
}
112
 
113 14 dgisselq
int     bflydelay(int nbits, int xtra) {
114 2 dgisselq
        int     cbits = nbits + xtra;
115 14 dgisselq
        int     delay;
116 2 dgisselq
        if (nbits+1<cbits)
117 5 dgisselq
                delay = nbits+4;
118 2 dgisselq
        else
119 5 dgisselq
                delay = cbits+3;
120 14 dgisselq
        return delay;
121 2 dgisselq
}
122
 
123 14 dgisselq
int     lgdelay(int nbits, int xtra) {
124
        // The butterfly code needs to compare a valid address, of this
125
        // many bits, with an address two greater.  This guarantees we
126
        // have enough bits for that comparison.  We'll also end up with
127
        // more storage space to look for these values, but without a 
128
        // redesign that's just what we'll deal with.
129
        return lgval(bflydelay(nbits, xtra)+3);
130
}
131
 
132 23 dgisselq
void    build_truncator(const char *fname) {
133
        printf("TRUNCATING!\n");
134 2 dgisselq
        FILE    *fp = fopen(fname, "w");
135
        if (NULL == fp) {
136
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
137
                perror("O/S Err was:");
138
                return;
139
        }
140
 
141
        fprintf(fp,
142
"///////////////////////////////////////////////////////////////////////////\n"
143
"//\n"
144 23 dgisselq
"// Filename:   truncate.v\n"
145
"//             \n"
146
"// Project:    %s\n"
147
"//\n"
148
"// Purpose:    Truncation is one of several options that can be used\n"
149
"//             internal to the various FFT stages to drop bits from one \n"
150
"//             stage to the next.  In general, it is the simplest method\n"
151
"//             of dropping bits, since it requires only a bit selection.\n"
152
"//\n"
153
"//             This form of rounding isn\'t really that great for FFT\'s,\n"
154
"//             since it tends to produce a DC bias in the result.  (Other\n"
155
"//             less pronounced biases may also exist.)\n"
156
"//\n"
157
"//             This particular version also registers the output with the\n"
158
"//             clock, so there will be a delay of one going through this\n"
159
"//             module.  This will keep it in line with the other forms of\n"
160
"//             rounding that can be used.\n"
161
"//\n"
162
"//\n%s"
163
"//\n",
164
                prjname, creator);
165
 
166
        fprintf(fp, "%s", cpyleft);
167
        fprintf(fp,
168
"module truncate(i_clk, i_ce, i_val, o_val);\n"
169
        "\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"
170
        "\tinput\t\t\t\t\ti_clk, i_ce;\n"
171
        "\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"
172
        "\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"
173
"\n"
174
        "\talways @(posedge i_clk)\n"
175
                "\t\tif (i_ce)\n"
176
                "\t\t\to_val <= i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
177
"\n"
178
"endmodule\n");
179
}
180
 
181
 
182
void    build_roundhalfup(const char *fname) {
183
        FILE    *fp = fopen(fname, "w");
184
        if (NULL == fp) {
185
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
186
                perror("O/S Err was:");
187
                return;
188
        }
189
 
190
        fprintf(fp,
191
"///////////////////////////////////////////////////////////////////////////\n"
192
"//\n"
193
"// Filename:   roundhalfup.v\n"
194
"//             \n"
195
"// Project:    %s\n"
196
"//\n"
197
"// Purpose:    Rounding half up is the way I was always taught to round in\n"
198
"//             school.  A one half value is added to the result, and then\n"
199
"//             the result is truncated.  When used in an FFT, this produces\n"
200
"//             less bias than the truncation method, although a bias still\n"
201
"//             tends to remain.\n"
202
"//\n"
203
"//\n%s"
204
"//\n",
205
                prjname, creator);
206
 
207
        fprintf(fp, "%s", cpyleft);
208
        fprintf(fp,
209
"module roundhalfup(i_clk, i_ce, i_val, o_val);\n"
210
        "\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"
211
        "\tinput\t\t\t\t\ti_clk, i_ce;\n"
212
        "\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"
213
        "\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"
214
"\n"
215
        "\t// Let's deal with two cases to be as general as we can be here\n"
216
        "\t//\n"
217
        "\t//   1. The desired output would lose no bits at all\n"
218
        "\t//   2. One or more bits would be dropped, so the rounding is simply\n"
219
        "\t//\t\ta matter of adding one to the bit about to be dropped,\n"
220
        "\t//\t\tmoving all halfway and above numbers up to the next\n"
221
        "\t//\t\tvalue.\n"
222
        "\tgenerate\n"
223
        "\tif (IWID-SHIFT == OWID)\n"
224
        "\tbegin // No truncation or rounding, output drops no bits\n"
225
"\n"
226
                "\t\talways @(posedge i_clk)\n"
227
                        "\t\t\tif (i_ce)\to_val <= i_val[(IWID-SHIFT-1):0];\n"
228
"\n"
229
        "\tend else // if (IWID-SHIFT-1 >= OWID)\n"
230
        "\tbegin // Output drops one bit, can only add one or ... not.\n"
231
                "\t\twire\t[(OWID-1):0] truncated_value, rounded_up;\n"
232
                "\t\twire\t\t\tlast_valid_bit, first_lost_bit;\n"
233
                "\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
234
                "\t\tassign\trounded_up=truncated_value + {{(OWID-1){1'b0}}, 1'b1 };\n"
235
                "\t\tassign\tfirst_lost_bit = i_val[(IWID-SHIFT-OWID-1)];\n"
236
"\n"
237
                "\t\talways @(posedge i_clk)\n"
238
                "\t\t\tif (i_ce)\n"
239
                "\t\t\tbegin\n"
240
                        "\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"
241
                        "\t\t\t\t\to_val <= truncated_value;\n"
242
                        "\t\t\t\telse\n"
243
                        "\t\t\t\t\to_val <= rounded_up; // even value\n"
244
                "\t\t\tend\n"
245
"\n"
246
        "\tend\n"
247
        "\tendgenerate\n"
248
"\n"
249
"endmodule\n");
250
}
251
 
252
void    build_roundfromzero(const char *fname) {
253
        FILE    *fp = fopen(fname, "w");
254
        if (NULL == fp) {
255
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
256
                perror("O/S Err was:");
257
                return;
258
        }
259
 
260
        fprintf(fp,
261
"///////////////////////////////////////////////////////////////////////////\n"
262
"//\n"
263
"// Filename:   roundfromzero.v\n"
264
"//             \n"
265
"// Project:    %s\n"
266
"//\n"
267
"// Purpose:    Truncation is one of several options that can be used\n"
268
"//             internal to the various FFT stages to drop bits from one \n"
269
"//             stage to the next.  In general, it is the simplest method\n"
270
"//             of dropping bits, since it requires only a bit selection.\n"
271
"//\n"
272
"//             This form of rounding isn\'t really that great for FFT\'s,\n"
273
"//             since it tends to produce a DC bias in the result.  (Other\n"
274
"//             less pronounced biases may also exist.)\n"
275
"//\n"
276
"//             This particular version also registers the output with the\n"
277
"//             clock, so there will be a delay of one going through this\n"
278
"//             module.  This will keep it in line with the other forms of\n"
279
"//             rounding that can be used.\n"
280
"//\n"
281
"//\n%s"
282
"//\n",
283
                prjname, creator);
284
 
285
        fprintf(fp, "%s", cpyleft);
286
        fprintf(fp,
287
"module convround(i_clk, i_ce, i_val, o_val);\n"
288
        "\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"
289
        "\tinput\t\t\t\t\ti_clk, i_ce;\n"
290
        "\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"
291
        "\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"
292
"\n"
293
        "\t// Let's deal with three cases to be as general as we can be here\n"
294
        "\t//\n"
295
        "\t//\t1. The desired output would lose no bits at all\n"
296
        "\t//\t2. One bit would be dropped, so the rounding is simply\n"
297
        "\t//\t\tadjusting the value to be the closer to zero in\n"
298
        "\t//\t\tcases of being halfway between two.  If identically\n"
299
        "\t//\t\tequal to a number, we just leave it as is.\n"
300
        "\t//\t3. Two or more bits would be dropped.  In this case, we round\n"
301
        "\t//\t\tnormally unless we are rounding a value of exactly\n"
302
        "\t//\t\thalfway between the two.  In the halfway case, we\n"
303
        "\t//\t\tround away from zero.\n"
304
        "\tgenerate\n"
305
        "\tif (IWID-SHIFT == OWID)\n"
306
        "\tbegin // No truncation or rounding, output drops no bits\n"
307
"\n"
308
                "\t\talways @(posedge i_clk)\n"
309
                        "\t\t\tif (i_ce)\to_val <= i_val[(IWID-SHIFT-1):0];\n"
310
"\n"
311
        "\tend else if (IWID-SHIFT-1 == OWID)\n"
312
        "\tbegin // Output drops one bit, can only add one or ... not.\n"
313
        "\t\twire\t[(OWID-1):0]\ttruncated_value, rounded_up;\n"
314
        "\t\twire\t\t\tsign_bit, first_lost_bit;\n"
315
        "\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
316
        "\t\tassign\trounded_up=truncated_value + {{(OWID-1){1'b0}}, 1'b1 };\n"
317
        "\t\tassign\tfirst_lost_bit = i_val[0];\n"
318
        "\t\tassign\tsign_bit = i_val[(IWID-1)];\n"
319
"\n"
320
        "\t\talways @(posedge i_clk)\n"
321
                "\t\t\tif (i_ce)\n"
322
                "\t\t\tbegin\n"
323
                        "\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"
324
                                "\t\t\t\t\to_val <= truncated_value;\n"
325
                        "\t\t\t\telse if (sign_bit)\n"
326
                                "\t\t\t\t\to_val <= truncated_value;\n"
327
                        "\t\t\t\telse\n"
328
                                "\t\t\t\t\to_val <= rounded_up;\n"
329
                "\t\t\tend\n"
330
"\n"
331
        "\tend else // If there's more than one bit we are dropping\n"
332
        "\tbegin\n"
333
                "\t\twire\t[(OWID-1):0]\ttruncated_value, rounded_up;\n"
334
                "\t\twire\t\t\tsign_bit, first_lost_bit;\n"
335
                "\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
336
                "\t\tassign\trounded_up=truncated_value + {{(OWID-1){1'b0}}, 1'b1 };\n"
337
                "\t\tassign\tfirst_lost_bit = i_val[(IWID-SHIFT-OWID-1)];\n"
338
                "\t\tassign\tsign_bit = i_val[(IWID-1)];\n"
339
"\n"
340
                "\t\twire\t[(IWID-SHIFT-OWID-2):0]\tother_lost_bits;\n"
341
                "\t\tassign\tother_lost_bits = i_val[(IWID-SHIFT-OWID-2):0];\n"
342
"\n"
343
                "\t\talways @(posedge i_clk)\n"
344
                        "\t\t\tif (i_ce)\n"
345
                        "\t\t\tbegin\n"
346
                        "\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"
347
                                "\t\t\t\t\to_val <= truncated_value;\n"
348
                        "\t\t\t\telse if (|other_lost_bits) // Round up to\n"
349
                                "\t\t\t\t\to_val <= rounded_up; // closest value\n"
350
                        "\t\t\t\telse if (sign_bit)\n"
351
                                "\t\t\t\t\to_val <= truncated_value;\n"
352
                        "\t\t\t\telse\n"
353
                                "\t\t\t\t\to_val <= rounded_up;\n"
354
                        "\t\t\tend\n"
355
        "\tend\n"
356
        "\tendgenerate\n"
357
"\n"
358
"endmodule\n");
359
}
360
 
361
void    build_convround(const char *fname) {
362
        FILE    *fp = fopen(fname, "w");
363
        if (NULL == fp) {
364
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
365
                perror("O/S Err was:");
366
                return;
367
        }
368
 
369
        fprintf(fp,
370
"///////////////////////////////////////////////////////////////////////////\n"
371
"//\n"
372
"// Filename:   convround.v\n"
373
"//             \n"
374
"// Project:    %s\n"
375
"//\n"
376
"// Purpose:    A convergent rounding routine, also known as banker\'s\n"
377
"//             rounding, Dutch rounding, Gaussian rounding, unbiased\n"
378
"//             rounding, or ... more, at least according to Wikipedia.\n"
379
"//\n"
380
"//             This form of rounding works by rounding, when the direction\n"
381
"//             is in question, towards the nearest even value.\n"
382
"//\n"
383
"//\n%s"
384
"//\n",
385
                prjname, creator);
386
 
387
        fprintf(fp, "%s", cpyleft);
388
        fprintf(fp,
389
"module convround(i_clk, i_ce, i_val, o_val);\n"
390
"\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"
391
"\tinput\t\t\t\t\ti_clk, i_ce;\n"
392
"\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"
393
"\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"
394
"\n"
395
"\t// Let's deal with three cases to be as general as we can be here\n"
396
"\t//\n"
397
"\t//\t1. The desired output would lose no bits at all\n"
398
"\t//\t2. One bit would be dropped, so the rounding is simply\n"
399
"\t//\t\tadjusting the value to be the nearest even number in\n"
400
"\t//\t\tcases of being halfway between two.  If identically\n"
401
"\t//\t\tequal to a number, we just leave it as is.\n"
402
"\t//\t3. Two or more bits would be dropped.  In this case, we round\n"
403
"\t//\t\tnormally unless we are rounding a value of exactly\n"
404
"\t//\t\thalfway between the two.  In the halfway case we round\n"
405
"\t//\t\tto the nearest even number.\n"
406
"\tgenerate\n"
407
"\tif (IWID-SHIFT == OWID)\n"
408
"\tbegin // No truncation or rounding, output drops no bits\n"
409
"\n"
410
"\t\talways @(posedge i_clk)\n"
411
"\t\t\tif (i_ce)\to_val <= i_val[(IWID-SHIFT-1):0];\n"
412
"\n"
413
"\tend else if (IWID-SHIFT-1 == OWID)\n"
414
"\tbegin // Output drops one bit, can only add one or ... not.\n"
415
"\t\twire\t[(OWID-1):0] truncated_value, rounded_up;\n"
416
"\t\twire\t\t\tlast_valid_bit, first_lost_bit;\n"
417
"\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
418
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1'b0}}, 1'b1 };\n"
419
"\t\tassign\tlast_valid_bit = truncated_value[0];\n"
420
"\t\tassign\tfirst_lost_bit = i_val[0];\n"
421
"\n"
422
"\t\talways @(posedge i_clk)\n"
423
"\t\t\tif (i_ce)\n"
424
"\t\t\tbegin\n"
425
"\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"
426
"\t\t\t\t\to_val <= truncated_value;\n"
427
"\t\t\t\telse if (last_valid_bit)// Round up to nearest\n"
428
"\t\t\t\t\to_val <= rounded_up; // even value\n"
429
"\t\t\t\telse // else round down to the nearest\n"
430
"\t\t\t\t\to_val <= truncated_value; // even value\n"
431
"\t\t\tend\n"
432
"\n"
433
"\tend else // If there's more than one bit we are dropping\n"
434
"\tbegin\n"
435
"\t\twire\t[(OWID-1):0] truncated_value, rounded_up;\n"
436
"\t\twire\t\t\tlast_valid_bit, first_lost_bit;\n"
437
"\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
438
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1'b0}}, 1'b1 };\n"
439
"\t\tassign\tlast_valid_bit = truncated_value[0];\n"
440
"\t\tassign\tfirst_lost_bit = i_val[(IWID-SHIFT-OWID-1)];\n"
441
"\n"
442
"\t\twire\t[(IWID-SHIFT-OWID-2):0]\tother_lost_bits;\n"
443
"\t\tassign\tother_lost_bits = i_val[(IWID-SHIFT-OWID-2):0];\n"
444
"\n"
445
"\t\talways @(posedge i_clk)\n"
446
"\t\t\tif (i_ce)\n"
447
"\t\t\tbegin\n"
448
"\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"
449
"\t\t\t\t\to_val <= truncated_value;\n"
450
"\t\t\t\telse if (|other_lost_bits) // Round up to\n"
451
"\t\t\t\t\to_val <= rounded_up; // closest value\n"
452
"\t\t\t\telse if (last_valid_bit) // Round up to\n"
453
"\t\t\t\t\to_val <= rounded_up; // nearest even\n"
454
"\t\t\t\telse   // else round down to nearest even\n"
455
"\t\t\t\t\to_val <= truncated_value;\n"
456
"\t\t\tend\n"
457
"\tend\n"
458
"\tendgenerate\n"
459
"\n"
460
"endmodule\n");
461
}
462
 
463
void    build_quarters(const char *fname, ROUND_T rounding) {
464
        FILE    *fp = fopen(fname, "w");
465
        if (NULL == fp) {
466
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
467
                perror("O/S Err was:");
468
                return;
469
        }
470
        const   char    *rnd_string;
471
        if (rounding == RND_TRUNCATE)
472
                rnd_string = "truncate";
473
        else if (rounding == RND_FROMZERO)
474
                rnd_string = "roundfromzero";
475
        else if (rounding == RND_HALFUP)
476
                rnd_string = "roundhalfup";
477
        else
478
                rnd_string = "convround";
479
 
480
 
481
        fprintf(fp,
482
"///////////////////////////////////////////////////////////////////////////\n"
483
"//\n"
484 2 dgisselq
"// Filename:   qtrstage.v\n"
485
"//             \n"
486
"// Project:    %s\n"
487
"//\n"
488 5 dgisselq
"// Purpose:    This file encapsulates the 4 point stage of a decimation in\n"
489
"//             frequency FFT.  This particular implementation is optimized\n"
490
"//             so that all of the multiplies are accomplished by additions\n"
491
"//             and multiplexers only.\n"
492
"//\n"
493 2 dgisselq
"//\n%s"
494
"//\n",
495
                prjname, creator);
496
        fprintf(fp, "%s", cpyleft);
497
 
498
        fprintf(fp,
499
"module\tqtrstage(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync);\n"
500 5 dgisselq
        "\tparameter    IWIDTH=16, OWIDTH=IWIDTH+1;\n"
501
        "\t// Parameters specific to the core that should be changed when this\n"
502
        "\t// core is built ... Note that the minimum LGSPAN is 2.  Smaller \n"
503
        "\t// spans must use the fftdoubles stage.\n"
504 23 dgisselq
        "\tparameter\tLGWIDTH=8, ODD=0, INVERSE=0,SHIFT=0;\n"
505 5 dgisselq
        "\tinput\t                              i_clk, i_rst, i_ce, i_sync;\n"
506
        "\tinput\t      [(2*IWIDTH-1):0]        i_data;\n"
507
        "\toutput\treg  [(2*OWIDTH-1):0]        o_data;\n"
508
        "\toutput\treg                          o_sync;\n"
509 14 dgisselq
        "\t\n");
510
        fprintf(fp,
511 5 dgisselq
        "\treg\t        wait_for_sync;\n"
512 23 dgisselq
        "\treg\t[3:0]   pipeline;\n"
513 2 dgisselq
"\n"
514 5 dgisselq
        "\treg\t[(IWIDTH):0]    sum_r, sum_i, diff_r, diff_i;\n"
515 2 dgisselq
"\n"
516 23 dgisselq
        "\treg\t[(2*OWIDTH-1):0]\tob_a;\n"
517
        "\twire\t[(2*OWIDTH-1):0]\tob_b;\n"
518
        "\treg\t[(OWIDTH-1):0]\t\tob_b_r, ob_b_i;\n"
519
        "\tassign\tob_b = { ob_b_r, ob_b_i };\n"
520 2 dgisselq
"\n"
521 23 dgisselq
        "\treg\t[(LGWIDTH-1):0]\t\tiaddr;\n"
522
        "\treg\t[(2*IWIDTH-1):0]\timem;\n"
523 2 dgisselq
"\n"
524 5 dgisselq
        "\twire\tsigned\t[(IWIDTH-1):0]\timem_r, imem_i;\n"
525
        "\tassign\timem_r = imem[(2*IWIDTH-1):(IWIDTH)];\n"
526
        "\tassign\timem_i = imem[(IWIDTH-1):0];\n"
527 2 dgisselq
"\n"
528 5 dgisselq
        "\twire\tsigned\t[(IWIDTH-1):0]\ti_data_r, i_data_i;\n"
529
        "\tassign\ti_data_r = i_data[(2*IWIDTH-1):(IWIDTH)];\n"
530
        "\tassign\ti_data_i = i_data[(IWIDTH-1):0];\n"
531 2 dgisselq
"\n"
532 5 dgisselq
        "\treg  [(2*OWIDTH-1):0]        omem;\n"
533 14 dgisselq
"\n");
534
        fprintf(fp,
535 23 dgisselq
        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_sum_r, rnd_sum_i, rnd_diff_r, rnd_diff_i,\n");
536
        fprintf(fp,
537
        "\t\t\t\t\tn_rnd_diff_r, n_rnd_diff_i;\n");
538
        fprintf(fp,
539
        "\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_r(i_clk, i_ce,\n"
540
        "\t\t\t\tsum_r, rnd_sum_r);\n\n", rnd_string);
541
        fprintf(fp,
542
        "\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_i(i_clk, i_ce,\n"
543
        "\t\t\t\tsum_i, rnd_sum_i);\n\n", rnd_string);
544
        fprintf(fp,
545
        "\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_r(i_clk, i_ce,\n"
546
        "\t\t\t\tdiff_r, rnd_diff_r);\n\n", rnd_string);
547
        fprintf(fp,
548
        "\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_i(i_clk, i_ce,\n"
549
        "\t\t\t\tdiff_i, rnd_diff_i);\n\n", rnd_string);
550
        fprintf(fp, "\tassign n_rnd_diff_r = - rnd_diff_r;\n"
551
                "\tassign n_rnd_diff_i = - rnd_diff_i;\n");
552
/*
553
        fprintf(fp,
554 5 dgisselq
        "\twire [(IWIDTH-1):0]  rnd;\n"
555 9 dgisselq
        "\tgenerate\n"
556
        "\tif ((ROUND)&&((IWIDTH+1-OWIDTH-SHIFT)>0))\n"
557
                "\t\tassign rnd = { {(IWIDTH-1){1'b0}}, 1'b1 };\n"
558
        "\telse\n"
559
                "\t\tassign rnd = { {(IWIDTH){1'b0}}};\n"
560
        "\tendgenerate\n"
561 2 dgisselq
"\n"
562 23 dgisselq
*/
563
        fprintf(fp,
564 25 dgisselq
        "\tinitial wait_for_sync = 1\'b1;\n"
565
        "\tinitial iaddr = 0;\n"
566 5 dgisselq
        "\talways @(posedge i_clk)\n"
567
                "\t\tif (i_rst)\n"
568
                "\t\tbegin\n"
569
                        "\t\t\twait_for_sync <= 1'b1;\n"
570
                        "\t\t\tiaddr <= 0;\n"
571 23 dgisselq
                "\t\tend else if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"
572 5 dgisselq
                "\t\tbegin\n"
573
                        "\t\t\timem <= i_data;\n"
574 25 dgisselq
                        "\t\t\tiaddr <= iaddr + { {(LGWIDTH-1){1\'b0}}, 1\'b1 };\n"
575 5 dgisselq
                        "\t\t\twait_for_sync <= 1'b0;\n"
576 23 dgisselq
                "\t\tend\n\n");
577
        fprintf(fp,
578
        "\t// Note that we don\'t check on wait_for_sync or i_sync here.\n"
579
        "\t// Why not?  Because iaddr will always be zero until after the\n"
580
        "\t// first i_ce, so we are safe.\n"
581 25 dgisselq
        "\tinitial pipeline = 4\'h0;\n"
582 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
583
                "\t\tif (i_rst)\n"
584
                        "\t\t\tpipeline <= 4'h0;\n"
585
                "\t\telse if (i_ce) // is our pipeline process full?  Which stages?\n"
586
                        "\t\t\tpipeline <= { pipeline[2:0], iaddr[0] };\n\n");
587
        fprintf(fp,
588
        "\t// This is the pipeline[-1] stage, pipeline[0] will be set next.\n"
589
        "\talways\t@(posedge i_clk)\n"
590
                "\t\tif ((i_ce)&&(iaddr[0]))\n"
591
                "\t\tbegin\n"
592
                        "\t\t\tsum_r  <= imem_r + i_data_r;\n"
593
                        "\t\t\tsum_i  <= imem_i + i_data_i;\n"
594
                        "\t\t\tdiff_r <= imem_r - i_data_r;\n"
595
                        "\t\t\tdiff_i <= imem_i - i_data_i;\n"
596
                "\t\tend\n\n");
597
        fprintf(fp,
598
        "\t// pipeline[1] takes sum_x and diff_x and produces rnd_x\n\n");
599
        fprintf(fp,
600
        "\t// Now for pipeline[2]\n"
601
        "\talways\t@(posedge i_clk)\n"
602
                "\t\tif ((i_ce)&&(pipeline[2]))\n"
603
                "\t\tbegin\n"
604
                        "\t\t\tob_a <= { rnd_sum_r, rnd_sum_i };\n"
605
                        "\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"
606
                        "\t\t\tif (ODD == 0)\n"
607 5 dgisselq
                        "\t\t\tbegin\n"
608 23 dgisselq
                        "\t\t\t\tob_b_r <= rnd_diff_r;\n"
609
                        "\t\t\t\tob_b_i <= rnd_diff_i;\n"
610
                        "\t\t\tend else if (INVERSE==0) begin\n"
611
                        "\t\t\t\t// on Odd, W = e^{-j2pi 1/4} = -j\n"
612
                        "\t\t\t\tob_b_r <=   rnd_diff_i;\n"
613
                        "\t\t\t\tob_b_i <= n_rnd_diff_r;\n"
614
                        "\t\t\tend else begin\n"
615
                        "\t\t\t\t// on Odd, W = e^{j2pi 1/4} = j\n"
616
                        "\t\t\t\tob_b_r <= n_rnd_diff_i;\n"
617
                        "\t\t\t\tob_b_i <=   rnd_diff_r;\n"
618 5 dgisselq
                        "\t\t\tend\n"
619 23 dgisselq
                "\t\tend\n\n");
620
        fprintf(fp,
621
        "\talways\t@(posedge i_clk)\n"
622
                "\t\tif (i_ce)\n"
623
                "\t\tbegin // In sequence, clock = 3\n"
624
                        "\t\t\tif (pipeline[3])\n"
625 5 dgisselq
                        "\t\t\tbegin\n"
626
                                "\t\t\t\tomem <= ob_b;\n"
627
                                "\t\t\t\to_data <= ob_a;\n"
628
                        "\t\t\tend else\n"
629
                                "\t\t\t\to_data <= omem;\n"
630 23 dgisselq
                "\t\tend\n\n");
631
 
632
        fprintf(fp,
633
        "\t// Don\'t forget in the sync check that we are running\n"
634
        "\t// at two clocks per sample.  Thus we need to\n"
635
        "\t// produce a sync every 2^(LGWIDTH-1) clocks.\n"
636
        "\talways\t@(posedge i_clk)\n"
637
                "\t\tif (i_ce)\n"
638
                        "\t\t\to_sync <= &(~iaddr[(LGWIDTH-2):3]) && (iaddr[2:0] == 3'b101);\n");
639
        fprintf(fp, "endmodule\n");
640 2 dgisselq
}
641
 
642 23 dgisselq
void    build_dblstage(const char *fname, ROUND_T rounding) {
643 2 dgisselq
        FILE    *fp = fopen(fname, "w");
644
        if (NULL == fp) {
645
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
646
                perror("O/S Err was:");
647
                return;
648
        }
649
 
650 23 dgisselq
        const   char    *rnd_string;
651
        if (rounding == RND_TRUNCATE)
652
                rnd_string = "truncate";
653
        else if (rounding == RND_FROMZERO)
654
                rnd_string = "roundfromzero";
655
        else if (rounding == RND_HALFUP)
656
                rnd_string = "roundhalfup";
657
        else
658
                rnd_string = "convround";
659
 
660
 
661 2 dgisselq
        fprintf(fp,
662
"///////////////////////////////////////////////////////////////////////////\n"
663
"//\n"
664
"// Filename:   dblstage.v\n"
665
"//\n"
666
"// Project:    %s\n"
667
"//\n"
668
"// Purpose:    This is part of an FPGA implementation that will process\n"
669 5 dgisselq
"//             the final stage of a decimate-in-frequency FFT, running\n"
670
"//             through the data at two samples per clock.  If you notice\n"
671
"//             from the derivation of an FFT, the only time both even and\n"
672
"//             odd samples are used at the same time is in this stage.\n"
673
"//             Therefore, other than this stage and these twiddles, all of\n"
674
"//             the other stages can run two stages at a time at one sample\n"
675
"//             per clock.\n"
676 2 dgisselq
"//\n"
677
"//             In this implementation, the output is valid one clock after\n"
678
"//             the input is valid.  The output also accumulates one bit\n"
679
"//             above and beyond the number of bits in the input.\n"
680
"//             \n"
681
"//             i_clk   A system clock\n"
682 6 dgisselq
"//             i_rst   A synchronous reset\n"
683 2 dgisselq
"//             i_ce    Circuit enable--nothing happens unless this line is high\n"
684 6 dgisselq
"//             i_sync  A synchronization signal, high once per FFT at the start\n"
685 2 dgisselq
"//             i_left  The first (even) complex sample input.  The higher order\n"
686
"//                     bits contain the real portion, low order bits the\n"
687
"//                     imaginary portion, all in two\'s complement.\n"
688
"//             i_right The next (odd) complex sample input, same format as\n"
689
"//                     i_left.\n"
690
"//             o_left  The first (even) complex output.\n"
691
"//             o_right The next (odd) complex output.\n"
692 6 dgisselq
"//             o_sync  Output synchronization signal.\n"
693 2 dgisselq
"//\n%s"
694
"//\n", prjname, creator);
695
 
696
        fprintf(fp, "%s", cpyleft);
697
        fprintf(fp,
698 9 dgisselq
"module\tdblstage(i_clk, i_rst, i_ce, i_sync, i_left, i_right, o_left, o_right, o_sync);\n"
699 23 dgisselq
        "\tparameter\tIWIDTH=16,OWIDTH=IWIDTH+1, SHIFT=0;\n"
700 6 dgisselq
        "\tinput\t\ti_clk, i_rst, i_ce, i_sync;\n"
701 5 dgisselq
        "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"
702 6 dgisselq
        "\toutput\twire\t[(2*OWIDTH-1):0]\to_left, o_right;\n"
703
        "\toutput\treg\t\t\to_sync;\n"
704 19 dgisselq
        "\n");
705
        fprintf(fp,
706 5 dgisselq
        "\twire\tsigned\t[(IWIDTH-1):0]\ti_in_0r, i_in_0i, i_in_1r, i_in_1i;\n"
707
        "\tassign\ti_in_0r = i_left[(2*IWIDTH-1):(IWIDTH)]; \n"
708
        "\tassign\ti_in_0i = i_left[(IWIDTH-1):0]; \n"
709
        "\tassign\ti_in_1r = i_right[(2*IWIDTH-1):(IWIDTH)]; \n"
710
        "\tassign\ti_in_1i = i_right[(IWIDTH-1):0]; \n"
711
        "\twire\t[(OWIDTH-1):0]\t\to_out_0r, o_out_0i,\n"
712
                                "\t\t\t\t\to_out_1r, o_out_1i;\n"
713 2 dgisselq
"\n"
714 15 dgisselq
"\n"
715 19 dgisselq
        "\t// Handle a potential rounding situation, when IWIDTH>=OWIDTH.\n"
716 15 dgisselq
"\n"
717 23 dgisselq
"\n");
718
        fprintf(fp,
719 5 dgisselq
        "\t// Don't forget that we accumulate a bit by adding two values\n"
720
        "\t// together. Therefore our intermediate value must have one more\n"
721
        "\t// bit than the two originals.\n"
722 25 dgisselq
        "\treg\tsigned\t[(IWIDTH):0]\trnd_in_0r, rnd_in_0i, rnd_in_1r, rnd_in_1i;\n\n");
723 23 dgisselq
        fprintf(fp,
724
        "\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_0r(i_clk, i_ce,\n"
725
        "\t\t\t\t\t\t\t\trnd_in_0r, o_out_0r);\n\n", rnd_string);
726
        fprintf(fp,
727
        "\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_0i(i_clk, i_ce,\n"
728
        "\t\t\t\t\t\t\t\trnd_in_0i, o_out_0i);\n\n", rnd_string);
729
        fprintf(fp,
730
        "\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_1r(i_clk, i_ce,\n"
731
        "\t\t\t\t\t\t\t\trnd_in_1r, o_out_1r);\n\n", rnd_string);
732
        fprintf(fp,
733
        "\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_1i(i_clk, i_ce,\n"
734
        "\t\t\t\t\t\t\t\trnd_in_1i, o_out_1i);\n\n", rnd_string);
735
 
736
        fprintf(fp,
737
        "\treg\twait_for_sync, rnd_sync;\n"
738 2 dgisselq
"\n"
739 25 dgisselq
        "\tinitial begin\n"
740
        "\t\trnd_sync      = 1\'b0;\n"
741
        "\t\to_sync        = 1\'b0;\n"
742
        "\t\twait_for_sync = 1\'b1;\n"
743
        "\tend\n"
744 5 dgisselq
        "\talways @(posedge i_clk)\n"
745 6 dgisselq
                "\t\tif (i_rst)\n"
746 23 dgisselq
                "\t\tbegin\n"
747
                        "\t\t\trnd_sync <= 1'b0;\n"
748
                        "\t\t\to_sync <= 1'b0;\n"
749 6 dgisselq
                        "\t\t\twait_for_sync <= 1'b1;\n"
750 23 dgisselq
                "\t\tend else if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"
751 5 dgisselq
                "\t\tbegin\n"
752 6 dgisselq
                        "\t\t\twait_for_sync <= 1'b0;\n"
753
                        "\t\t\t//\n"
754 23 dgisselq
                        "\t\t\trnd_in_0r <= i_in_0r + i_in_1r;\n"
755
                        "\t\t\trnd_in_0i <= i_in_0i + i_in_1i;\n"
756 5 dgisselq
                        "\t\t\t//\n"
757 23 dgisselq
                        "\t\t\trnd_in_1r <= i_in_0r - i_in_1r;\n"
758
                        "\t\t\trnd_in_1i <= i_in_0i - i_in_1i;\n"
759 6 dgisselq
                        "\t\t\t//\n"
760 23 dgisselq
                        "\t\t\trnd_sync <= i_sync;\n"
761
                        "\t\t\to_sync <= rnd_sync;\n"
762 5 dgisselq
                "\t\tend\n"
763 2 dgisselq
"\n"
764 5 dgisselq
        "\tassign\to_left  = { o_out_0r, o_out_0i };\n"
765
        "\tassign\to_right = { o_out_1r, o_out_1i };\n"
766 2 dgisselq
"\n"
767
"endmodule\n");
768
        fclose(fp);
769
}
770
 
771
void    build_multiply(const char *fname) {
772
        FILE    *fp = fopen(fname, "w");
773
        if (NULL == fp) {
774
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
775
                perror("O/S Err was:");
776
                return;
777
        }
778
 
779
        fprintf(fp,
780
"///////////////////////////////////////////////////////////////////////////\n"
781
"//\n"
782
"// Filename:   shiftaddmpy.v\n"
783
"//\n"
784
"// Project:    %s\n"
785
"//\n"
786
"// Purpose:    A portable shift and add multiply.\n"
787
"//\n"
788
"//             While both Xilinx and Altera will offer single clock \n"
789
"//             multiplies, this simple approach will multiply two numbers\n"
790
"//             on any architecture.  The result maintains the full width\n"
791
"//             of the multiply, there are no extra stuff bits, no rounding,\n"
792
"//             no shifted bits, etc.\n"
793
"//\n"
794
"//             Further, for those applications that can support it, this\n"
795
"//             multiply is pipelined and will produce one answer per clock.\n"
796
"//\n"
797
"//             For minimal processing delay, make the first parameter\n"
798
"//             the one with the least bits, so that AWIDTH <= BWIDTH.\n"
799
"//\n"
800
"//             The processing delay in this multiply is (AWIDTH+1) cycles.\n"
801
"//             That is, if the data is present on the input at clock t=0,\n"
802
"//             the result will be present on the output at time t=AWIDTH+1;\n"
803
"//\n"
804
"//\n%s"
805
"//\n", prjname, creator);
806
 
807
        fprintf(fp, "%s", cpyleft);
808
        fprintf(fp,
809
"module shiftaddmpy(i_clk, i_ce, i_a, i_b, o_r);\n"
810
        "\tparameter\tAWIDTH=16,BWIDTH=AWIDTH;\n"
811
        "\tinput\t\t\t\t\ti_clk, i_ce;\n"
812
        "\tinput\t\t[(AWIDTH-1):0]\t\ti_a;\n"
813
        "\tinput\t\t[(BWIDTH-1):0]\t\ti_b;\n"
814
        "\toutput\treg\t[(AWIDTH+BWIDTH-1):0]\to_r;\n"
815
"\n"
816
        "\treg\t[(AWIDTH-1):0]\tu_a;\n"
817
        "\treg\t[(BWIDTH-1):0]\tu_b;\n"
818
        "\treg\t\t\tsgn;\n"
819
"\n"
820
        "\treg\t[(AWIDTH-2):0]\t\tr_a[0:(AWIDTH-1)];\n"
821
        "\treg\t[(AWIDTH+BWIDTH-2):0]\tr_b[0:(AWIDTH-1)];\n"
822
        "\treg\t\t\t\tr_s[0:(AWIDTH-1)];\n"
823
        "\treg\t[(AWIDTH+BWIDTH-1):0]\tacc[0:(AWIDTH-1)];\n"
824
        "\tgenvar k;\n"
825
"\n"
826 5 dgisselq
        "\t// If we were forced to stay within two\'s complement arithmetic,\n"
827
        "\t// taking the absolute value here would require an additional bit.\n"
828
        "\t// However, because our results are now unsigned, we can stay\n"
829
        "\t// within the number of bits given (for now).\n"
830 2 dgisselq
        "\talways @(posedge i_clk)\n"
831
                "\t\tif (i_ce)\n"
832
                "\t\tbegin\n"
833
                        "\t\t\tu_a <= (i_a[AWIDTH-1])?(-i_a):(i_a);\n"
834
                        "\t\t\tu_b <= (i_b[BWIDTH-1])?(-i_b):(i_b);\n"
835
                        "\t\t\tsgn <= i_a[AWIDTH-1] ^ i_b[BWIDTH-1];\n"
836
                "\t\tend\n"
837
"\n"
838
        "\talways @(posedge i_clk)\n"
839
                "\t\tif (i_ce)\n"
840
                "\t\tbegin\n"
841
                        "\t\t\tacc[0] <= (u_a[0]) ? { {(AWIDTH){1'b0}}, u_b }\n"
842
                        "\t\t\t\t\t: {(AWIDTH+BWIDTH){1'b0}};\n"
843
                        "\t\t\tr_a[0] <= { u_a[(AWIDTH-1):1] };\n"
844
                        "\t\t\tr_b[0] <= { {(AWIDTH-1){1'b0}}, u_b };\n"
845
                        "\t\t\tr_s[0] <= sgn; // The final sign, needs to be preserved\n"
846
                "\t\tend\n"
847
"\n"
848
        "\tgenerate\n"
849 21 dgisselq
        "\tfor(k=0; k<AWIDTH-1; k=k+1)\n"
850 25 dgisselq
        "\tbegin : genstages\n"
851 21 dgisselq
                "\t\talways @(posedge i_clk)\n"
852
                "\t\tif (i_ce)\n"
853 2 dgisselq
                "\t\tbegin\n"
854
                        "\t\t\tacc[k+1] <= acc[k] + ((r_a[k][0]) ? {r_b[k],1'b0}:0);\n"
855
                        "\t\t\tr_a[k+1] <= { 1'b0, r_a[k][(AWIDTH-2):1] };\n"
856
                        "\t\t\tr_b[k+1] <= { r_b[k][(AWIDTH+BWIDTH-3):0], 1'b0};\n"
857
                        "\t\t\tr_s[k+1] <= r_s[k];\n"
858
                "\t\tend\n"
859
        "\tend\n"
860
        "\tendgenerate\n"
861
"\n"
862
        "\talways @(posedge i_clk)\n"
863
                "\t\tif (i_ce)\n"
864
                        "\t\t\to_r <= (r_s[AWIDTH-1]) ? (-acc[AWIDTH-1]) : acc[AWIDTH-1];\n"
865
"\n"
866
"endmodule\n");
867
 
868
        fclose(fp);
869
}
870
 
871
void    build_dblreverse(const char *fname) {
872
        FILE    *fp = fopen(fname, "w");
873
        if (NULL == fp) {
874
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
875
                perror("O/S Err was:");
876
                return;
877
        }
878
 
879
        fprintf(fp,
880
"///////////////////////////////////////////////////////////////////////////\n"
881
"//\n"
882
"// Filename:   dblreverse.v\n"
883
"//\n"
884
"// Project:    %s\n"
885
"//\n"
886
"// Purpose:    This module bitreverses a pipelined FFT input.  Operation is\n"
887
"//             expected as follows:\n"
888
"//\n"
889
"//             i_clk   A running clock at whatever system speed is offered.\n"
890
"//             i_rst   A synchronous reset signal, that resets all internals\n"
891
"//             i_ce    If this is one, one input is consumed and an output\n"
892
"//                     is produced.\n"
893
"//             i_in_0, i_in_1\n"
894
"//                     Two inputs to be consumed, each of width WIDTH.\n"
895
"//             o_out_0, o_out_1\n"
896
"//                     Two of the bitreversed outputs, also of the same\n"
897
"//                     width, WIDTH.  Of course, there is a delay from the\n"
898
"//                     first input to the first output.  For this purpose,\n"
899
"//                     o_sync is present.\n"
900
"//             o_sync  This will be a 1'b1 for the first value in any block.\n"
901
"//                     Following a reset, this will only become 1'b1 once\n"
902
"//                     the data has been loaded and is now valid.  After that,\n"
903
"//                     all outputs will be valid.\n"
904
"//\n%s"
905
"//\n", prjname, creator);
906
        fprintf(fp, "%s", cpyleft);
907
        fprintf(fp,
908
"\n\n"
909
"//\n"
910
"// How do we do bit reversing at two smples per clock?  Can we separate out\n"
911
"// our work into eight memory banks, writing two banks at once and reading\n"
912
"// another two banks in the same clock?\n"
913
"//\n"
914
"//     mem[00xxx0] = s_0[n]\n"
915
"//     mem[00xxx1] = s_1[n]\n"
916
"//     o_0[n] = mem[10xxx0]\n"
917
"//     o_1[n] = mem[11xxx0]\n"
918
"//     ...\n"
919
"//     mem[01xxx0] = s_0[m]\n"
920
"//     mem[01xxx1] = s_1[m]\n"
921
"//     o_0[m] = mem[10xxx1]\n"
922
"//     o_1[m] = mem[11xxx1]\n"
923
"//     ...\n"
924
"//     mem[10xxx0] = s_0[n]\n"
925
"//     mem[10xxx1] = s_1[n]\n"
926
"//     o_0[n] = mem[00xxx0]\n"
927
"//     o_1[n] = mem[01xxx0]\n"
928
"//     ...\n"
929
"//     mem[11xxx0] = s_0[m]\n"
930
"//     mem[11xxx1] = s_1[m]\n"
931
"//     o_0[m] = mem[00xxx1]\n"
932
"//     o_1[m] = mem[01xxx1]\n"
933
"//     ...\n"
934
"//\n"
935 5 dgisselq
"//     The answer is that, yes we can but: we need to use four memory banks\n"
936
"//     to do it properly.  These four banks are defined by the two bits\n"
937
"//     that determine the top and bottom of the correct address.  Larger\n"
938
"//     FFT\'s would require more memories.\n"
939
"//\n"
940 2 dgisselq
"//\n");
941
        fprintf(fp,
942
"module dblreverse(i_clk, i_rst, i_ce, i_in_0, i_in_1,\n"
943 5 dgisselq
        "\t\to_out_0, o_out_1, o_sync);\n"
944
        "\tparameter\t\t\tLGSIZE=4, WIDTH=24;\n"
945
        "\tinput\t\t\t\ti_clk, i_rst, i_ce;\n"
946
        "\tinput\t\t[(2*WIDTH-1):0]\ti_in_0, i_in_1;\n"
947
        "\toutput\treg\t[(2*WIDTH-1):0]\to_out_0, o_out_1;\n"
948
        "\toutput\treg\t\t\to_sync;\n"
949 2 dgisselq
"\n"
950 5 dgisselq
        "\treg\tin_reset;\n"
951
        "\treg\t[(LGSIZE):0]\tiaddr;\n"
952
        "\treg\t[(2*WIDTH-1):0]\tmem_0e [0:((1<<(LGSIZE-1))-1)];\n"
953
        "\treg\t[(2*WIDTH-1):0]\tmem_0o [0:((1<<(LGSIZE-1))-1)];\n"
954
        "\treg\t[(2*WIDTH-1):0]\tmem_1e [0:((1<<(LGSIZE-1))-1)];\n"
955
        "\treg\t[(2*WIDTH-1):0]\tmem_1o [0:((1<<(LGSIZE-1))-1)];\n"
956 2 dgisselq
"\n"
957 5 dgisselq
        "\twire\t[(2*LGSIZE-1):0]       braddr;\n"
958
        "\tgenvar\tk;\n"
959 21 dgisselq
        "\tgenerate for(k=0; k<LGSIZE; k=k+1)\n"
960 25 dgisselq
        "\tbegin : gen_a_bit_reversed_value\n"
961 5 dgisselq
                "\t\tassign braddr[k] = iaddr[LGSIZE-1-k];\n"
962 25 dgisselq
        "\tend endgenerate\n"
963 2 dgisselq
"\n"
964 25 dgisselq
        "\tinitial iaddr = 0;\n"
965
        "\tinitial in_reset = 1\'b1;\n"
966 5 dgisselq
        "\talways @(posedge i_clk)\n"
967
                "\t\tif (i_rst)\n"
968
                "\t\tbegin\n"
969
                        "\t\t\tiaddr <= 0;\n"
970
                        "\t\t\tin_reset <= 1'b1;\n"
971
                "\t\tend else if (i_ce)\n"
972
                "\t\tbegin\n"
973
                        "\t\t\tif (iaddr[(LGSIZE-1)])\n"
974
                        "\t\t\tbegin\n"
975
                                "\t\t\t\tmem_1e[{iaddr[LGSIZE],iaddr[(LGSIZE-2):1]}] <= i_in_0;\n"
976
                                "\t\t\t\tmem_1o[{iaddr[LGSIZE],iaddr[(LGSIZE-2):1]}] <= i_in_1;\n"
977
                        "\t\t\tend else begin\n"
978
                                "\t\t\t\tmem_0e[{iaddr[LGSIZE],iaddr[(LGSIZE-2):1]}] <= i_in_0;\n"
979
                                "\t\t\t\tmem_0o[{iaddr[LGSIZE],iaddr[(LGSIZE-2):1]}] <= i_in_1;\n"
980
                        "\t\t\tend\n"
981 25 dgisselq
                        "\t\t\tiaddr <= iaddr + { {(LGSIZE-2){1\'b0}}, 2\'h2 };\n"
982 5 dgisselq
                        "\t\t\tif (&iaddr[(LGSIZE-1):1])\n"
983
                                "\t\t\t\tin_reset <= 1'b0;\n"
984
                        "\t\t\tif (in_reset)\n"
985
                        "\t\t\tbegin\n"
986
                                "\t\t\t\to_out_0 <= {(2*WIDTH){1'b0}};\n"
987
                                "\t\t\t\to_out_1 <= {(2*WIDTH){1'b0}};\n"
988
                                "\t\t\t\to_sync <= 1'b0;\n"
989
                        "\t\t\tend else\n"
990
                        "\t\t\tbegin\n"
991
                                "\t\t\t\tif (braddr[0])\n"
992
                                "\t\t\t\tbegin\n"
993 2 dgisselq
"\t\t\t\t\to_out_0 <= mem_0o[{~iaddr[LGSIZE],braddr[(LGSIZE-2):1]}];\n"
994
"\t\t\t\t\to_out_1 <= mem_1o[{~iaddr[LGSIZE],braddr[(LGSIZE-2):1]}];\n"
995 5 dgisselq
                                "\t\t\t\tend else begin\n"
996 2 dgisselq
"\t\t\t\t\to_out_0 <= mem_0e[{~iaddr[LGSIZE],braddr[(LGSIZE-2):1]}];\n"
997
"\t\t\t\t\to_out_1 <= mem_1e[{~iaddr[LGSIZE],braddr[(LGSIZE-2):1]}];\n"
998 5 dgisselq
                                "\t\t\t\tend\n"
999
                                "\t\t\t\to_sync <= ~(|iaddr[(LGSIZE-1):0]);\n"
1000
                        "\t\t\tend\n"
1001
                "\t\tend\n"
1002 2 dgisselq
"\n"
1003 21 dgisselq
"endmodule\n");
1004 2 dgisselq
 
1005
        fclose(fp);
1006
}
1007
 
1008 23 dgisselq
void    build_butterfly(const char *fname, int xtracbits, ROUND_T rounding) {
1009 2 dgisselq
        FILE    *fp = fopen(fname, "w");
1010
        if (NULL == fp) {
1011
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
1012
                perror("O/S Err was:");
1013
                return;
1014
        }
1015 23 dgisselq
        const   char    *rnd_string;
1016
        if (rounding == RND_TRUNCATE)
1017
                rnd_string = "truncate";
1018
        else if (rounding == RND_FROMZERO)
1019
                rnd_string = "roundfromzero";
1020
        else if (rounding == RND_HALFUP)
1021
                rnd_string = "roundhalfup";
1022
        else
1023
                rnd_string = "convround";
1024 2 dgisselq
 
1025
        fprintf(fp,
1026
"///////////////////////////////////////////////////////////////////////////\n"
1027
"//\n"
1028
"// Filename:   butterfly.v\n"
1029
"//\n"
1030
"// Project:    %s\n"
1031
"//\n"
1032
"// Purpose:    This routine caculates a butterfly for a decimation\n"
1033
"//             in frequency version of an FFT.  Specifically, given\n"
1034
"//             complex Left and Right values together with a \n"
1035
"//             coefficient, the output of this routine is given\n"
1036
"//             by:\n"
1037
"//\n"
1038
"//             L' = L + R\n"
1039
"//             R' = (L - R)*C\n"
1040
"//\n"
1041
"//             The rest of the junk below handles timing (mostly),\n"
1042
"//             to make certain that L' and R' reach the output at\n"
1043
"//             the same clock.  Further, just to make certain\n"
1044
"//             that is the case, an 'aux' input exists.  This\n"
1045
"//             aux value will come out of this routine synchronized\n"
1046
"//             to the values it came in with.  (i.e., both L', R',\n"
1047
"//             and aux all have the same delay.)  Hence, a caller\n"
1048
"//             of this routine may set aux on the first input with\n"
1049
"//             valid data, and then wait to see aux set on the output\n"
1050
"//             to know when to find the first output with valid data.\n"
1051
"//\n"
1052
"//             All bits are preserved until the very last clock,\n"
1053
"//             where any more bits than OWIDTH will be quietly\n"
1054
"//             discarded.\n"
1055
"//\n"
1056
"//             This design features no overflow checking.\n"
1057
"// \n"
1058
"// Notes:\n"
1059
"//             CORDIC:\n"
1060
"//             Much as we would like, we can't use a cordic here.\n"
1061
"//             The goal is to accomplish an FFT, as defined, and a\n"
1062
"//             CORDIC places a scale factor onto the data.  Removing\n"
1063
"//             the scale factor would cost a two multiplies, which\n"
1064
"//             is precisely what we are trying to avoid.\n"
1065
"//\n"
1066
"//\n"
1067
"//             3-MULTIPLIES:\n"
1068
"//             It should also be possible to do this with three \n"
1069
"//             multiplies and an extra two addition cycles.  \n"
1070
"//\n"
1071
"//             We want\n"
1072
"//                     R+I = (a + jb) * (c + jd)\n"
1073
"//                     R+I = (ac-bd) + j(ad+bc)\n"
1074
"//             We multiply\n"
1075
"//                     P1 = ac\n"
1076
"//                     P2 = bd\n"
1077
"//                     P3 = (a+b)(c+d)\n"
1078
"//             Then \n"
1079
"//                     R+I=(P1-P2)+j(P3-P2-P1)\n"
1080
"//\n"
1081
"//             WIDTHS:\n"
1082
"//             On multiplying an X width number by an\n"
1083
"//             Y width number, X>Y, the result should be (X+Y)\n"
1084
"//             bits, right?\n"
1085
"//             -2^(X-1) <= a <= 2^(X-1) - 1\n"
1086
"//             -2^(Y-1) <= b <= 2^(Y-1) - 1\n"
1087
"//             (2^(Y-1)-1)*(-2^(X-1)) <= ab <= 2^(X-1)2^(Y-1)\n"
1088
"//             -2^(X+Y-2)+2^(X-1) <= ab <= 2^(X+Y-2) <= 2^(X+Y-1) - 1\n"
1089
"//             -2^(X+Y-1) <= ab <= 2^(X+Y-1)-1\n"
1090
"//             YUP!  But just barely.  Do this and you'll really want\n"
1091
"//             to drop a bit, although you will risk overflow in so\n"
1092
"//             doing.\n"
1093
"//\n%s"
1094
"//\n", prjname, creator);
1095
        fprintf(fp, "%s", cpyleft);
1096
 
1097
        fprintf(fp,
1098 6 dgisselq
"module\tbutterfly(i_clk, i_rst, i_ce, i_coef, i_left, i_right, i_aux,\n"
1099 5 dgisselq
                "\t\to_left, o_right, o_aux);\n"
1100
        "\t// Public changeable parameters ...\n"
1101 14 dgisselq
        "\tparameter IWIDTH=%d,CWIDTH=IWIDTH+%d,OWIDTH=IWIDTH+1;\n"
1102 5 dgisselq
        "\t// Parameters specific to the core that should not be changed.\n"
1103 14 dgisselq
        "\tparameter    MPYDELAY=%d'd%d, // (IWIDTH+1 < CWIDTH)?(IWIDTH+4):(CWIDTH+3),\n"
1104 23 dgisselq
                        "\t\t\tSHIFT=0;\n"
1105 5 dgisselq
        "\t// The LGDELAY should be the base two log of the MPYDELAY.  If\n"
1106
        "\t// this value is fractional, then round up to the nearest\n"
1107
        "\t// integer: LGDELAY=ceil(log(MPYDELAY)/log(2));\n"
1108 14 dgisselq
        "\tparameter\tLGDELAY=%d;\n"
1109 6 dgisselq
        "\tinput\t\ti_clk, i_rst, i_ce;\n"
1110 5 dgisselq
        "\tinput\t\t[(2*CWIDTH-1):0] i_coef;\n"
1111
        "\tinput\t\t[(2*IWIDTH-1):0] i_left, i_right;\n"
1112
        "\tinput\t\ti_aux;\n"
1113
        "\toutput\twire [(2*OWIDTH-1):0] o_left, o_right;\n"
1114 21 dgisselq
        "\toutput\treg  o_aux;\n"
1115 14 dgisselq
        "\n", 16, xtracbits, lgdelay(16,xtracbits),
1116
        bflydelay(16, xtracbits), lgdelay(16,xtracbits));
1117
        fprintf(fp,
1118 5 dgisselq
        "\twire\t[(OWIDTH-1):0] o_left_r, o_left_i, o_right_r, o_right_i;\n"
1119 2 dgisselq
"\n"
1120 5 dgisselq
        "\treg\t[(2*IWIDTH-1):0]\tr_left, r_right;\n"
1121
        "\treg\t\t\t\tr_aux, r_aux_2;\n"
1122
        "\treg\t[(2*CWIDTH-1):0]\tr_coef, r_coef_2;\n"
1123
        "\twire\tsigned\t[(IWIDTH-1):0]\tr_left_r, r_left_i, r_right_r, r_right_i;\n"
1124
        "\tassign\tr_left_r  = r_left[ (2*IWIDTH-1):(IWIDTH)];\n"
1125
        "\tassign\tr_left_i  = r_left[ (IWIDTH-1):0];\n"
1126
        "\tassign\tr_right_r = r_right[(2*IWIDTH-1):(IWIDTH)];\n"
1127
        "\tassign\tr_right_i = r_right[(IWIDTH-1):0];\n"
1128 2 dgisselq
"\n"
1129 5 dgisselq
        "\treg\tsigned\t[(IWIDTH):0]\tr_sum_r, r_sum_i, r_dif_r, r_dif_i;\n"
1130 2 dgisselq
"\n"
1131 5 dgisselq
        "\treg  [(LGDELAY-1):0] fifo_addr;\n"
1132
        "\twire [(LGDELAY-1):0] fifo_read_addr;\n"
1133 6 dgisselq
        "\tassign\tfifo_read_addr = fifo_addr - MPYDELAY;\n"
1134 5 dgisselq
        "\treg  [(2*IWIDTH+2):0]        fifo_left [ 0:((1<<LGDELAY)-1)];\n"
1135 6 dgisselq
        "\treg\t\t\t\tovalid;\n"
1136 5 dgisselq
"\n");
1137
        fprintf(fp,
1138
        "\t// Set up the input to the multiply\n"
1139 2 dgisselq
        "\talways @(posedge i_clk)\n"
1140
                "\t\tif (i_ce)\n"
1141
                "\t\tbegin\n"
1142
                        "\t\t\t// One clock just latches the inputs\n"
1143
                        "\t\t\tr_left <= i_left;        // No change in # of bits\n"
1144
                        "\t\t\tr_right <= i_right;\n"
1145
                        "\t\t\tr_aux <= i_aux;\n"
1146
                        "\t\t\tr_coef  <= i_coef;\n"
1147
                        "\t\t\t// Next clock adds/subtracts\n"
1148
                        "\t\t\tr_sum_r <= r_left_r + r_right_r; // Now IWIDTH+1 bits\n"
1149
                        "\t\t\tr_sum_i <= r_left_i + r_right_i;\n"
1150
                        "\t\t\tr_dif_r <= r_left_r - r_right_r;\n"
1151
                        "\t\t\tr_dif_i <= r_left_i - r_right_i;\n"
1152
                        "\t\t\t// Other inputs are simply delayed on second clock\n"
1153
                        "\t\t\tr_aux_2 <= r_aux;\n"
1154
                        "\t\t\tr_coef_2<= r_coef;\n"
1155
        "\t\tend\n"
1156 5 dgisselq
"\n");
1157
        fprintf(fp,
1158
        "\t// Don\'t forget to record the even side, since it doesn\'t need\n"
1159
        "\t// to be multiplied, but yet we still need the results in sync\n"
1160
        "\t// with the answer when it is ready.\n"
1161 25 dgisselq
        "\tinitial fifo_addr = 0;\n"
1162
        "\tinitial ovalid = 1'b0;\n"
1163 2 dgisselq
        "\talways @(posedge i_clk)\n"
1164 6 dgisselq
                "\t\tif (i_rst)\n"
1165 2 dgisselq
                "\t\tbegin\n"
1166 6 dgisselq
                        "\t\t\tfifo_addr <= 0;\n"
1167
                        "\t\t\tovalid <= 1'b0;\n"
1168
                "\t\tend else if (i_ce)\n"
1169
                "\t\tbegin\n"
1170 2 dgisselq
                        "\t\t\t// Need to delay the sum side--nothing else happens\n"
1171
                        "\t\t\t// to it, but it needs to stay synchronized with the\n"
1172
                        "\t\t\t// right side.\n"
1173
                        "\t\t\tfifo_left[fifo_addr] <= { r_aux_2, r_sum_r, r_sum_i };\n"
1174
                        "\t\t\tfifo_addr <= fifo_addr + 1;\n"
1175 14 dgisselq
"\n"
1176
                        "\t\t\tovalid <= (ovalid) || (fifo_addr > (MPYDELAY+1));\n"
1177 2 dgisselq
                "\t\tend\n"
1178
"\n"
1179 5 dgisselq
        "\twire\tsigned\t[(CWIDTH-1):0] ir_coef_r, ir_coef_i;\n"
1180
        "\tassign\tir_coef_r = r_coef_2[(2*CWIDTH-1):CWIDTH];\n"
1181
        "\tassign\tir_coef_i = r_coef_2[(CWIDTH-1):0];\n"
1182
        "\twire\tsigned\t[((IWIDTH+2)+(CWIDTH+1)-1):0]\tp_one, p_two, p_three;\n"
1183 2 dgisselq
"\n"
1184 5 dgisselq
"\n");
1185
        fprintf(fp,
1186
        "\t// Multiply output is always a width of the sum of the widths of\n"
1187
        "\t// the two inputs.  ALWAYS.  This is independent of the number of\n"
1188
        "\t// bits in p_one, p_two, or p_three.  These values needed to \n"
1189
        "\t// accumulate a bit (or two) each.  However, this approach to a\n"
1190
        "\t// three multiply complex multiply cannot increase the total\n"
1191
        "\t// number of bits in our final output.  We\'ll take care of\n"
1192
        "\t// dropping back down to the proper width, OWIDTH, in our routine\n"
1193
        "\t// below.\n"
1194 2 dgisselq
"\n"
1195 5 dgisselq
"\n");
1196
        fprintf(fp,
1197
        "\t// We accomplish here \"Karatsuba\" multiplication.  That is,\n"
1198
        "\t// by doing three multiplies we accomplish the work of four.\n"
1199
        "\t// Let\'s prove to ourselves that this works ... We wish to\n"
1200
        "\t// multiply: (a+jb) * (c+jd), where a+jb is given by\n"
1201
        "\t//\ta + jb = r_dif_r + j r_dif_i, and\n"
1202
        "\t//\tc + jd = ir_coef_r + j ir_coef_i.\n"
1203
        "\t// We do this by calculating the intermediate products P1, P2,\n"
1204
        "\t// and P3 as\n"
1205
        "\t//\tP1 = ac\n"
1206
        "\t//\tP2 = bd\n"
1207
        "\t//\tP3 = (a + b) * (c + d)\n"
1208
        "\t// and then complete our final answer with\n"
1209
        "\t//\tac - bd = P1 - P2 (this checks)\n"
1210
        "\t//\tad + bc = P3 - P2 - P1\n"
1211
        "\t//\t        = (ac + bc + ad + bd) - bd - ac\n"
1212
        "\t//\t        = bc + ad (this checks)\n"
1213 2 dgisselq
"\n"
1214 5 dgisselq
"\n");
1215
        fprintf(fp,
1216
        "\t// This should really be based upon an IF, such as in\n"
1217
        "\t// if (IWIDTH < CWIDTH) then ...\n"
1218
        "\t// However, this is the only (other) way I know to do it.\n"
1219 2 dgisselq
        "\tgenerate\n"
1220
        "\tif (CWIDTH < IWIDTH+1)\n"
1221
        "\tbegin\n"
1222 22 dgisselq
                "\t\twire\t[(CWIDTH):0]\tp3c_in;\n"
1223
                "\t\twire\t[(IWIDTH+1):0]\tp3d_in;\n"
1224
                "\t\tassign\tp3c_in = ir_coef_i + ir_coef_r;\n"
1225
                "\t\tassign\tp3d_in = r_dif_r + r_dif_i;\n"
1226
                "\n"
1227 2 dgisselq
                "\t\t// We need to pad these first two multiplies by an extra\n"
1228 5 dgisselq
                "\t\t// bit just to keep them aligned with the third,\n"
1229
                "\t\t// simpler, multiply.\n"
1230 2 dgisselq
                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p1(i_clk, i_ce,\n"
1231
                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r},\n"
1232
                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r}, p_one);\n"
1233
                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p2(i_clk, i_ce,\n"
1234 5 dgisselq
                                "\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i},\n"
1235 2 dgisselq
                                "\t\t\t\t{r_dif_i[IWIDTH],r_dif_i}, p_two);\n"
1236
                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p3(i_clk, i_ce,\n"
1237 22 dgisselq
                        "\t\t\t\tp3c_in, p3d_in, p_three);\n"
1238 2 dgisselq
        "\tend else begin\n"
1239 22 dgisselq
                "\t\twire\t[(CWIDTH):0]\tp3c_in;\n"
1240
                "\t\twire\t[(IWIDTH+1):0]\tp3d_in;\n"
1241
                "\t\tassign\tp3c_in = ir_coef_i + ir_coef_r;\n"
1242
                "\t\tassign\tp3d_in = r_dif_r + r_dif_i;\n"
1243
                "\n"
1244 2 dgisselq
                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p1a(i_clk, i_ce,\n"
1245
                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r},\n"
1246
                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r}, p_one);\n"
1247
                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p2a(i_clk, i_ce,\n"
1248
                                "\t\t\t\t{r_dif_i[IWIDTH], r_dif_i},\n"
1249 5 dgisselq
                                "\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i}, p_two);\n"
1250 2 dgisselq
                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p3a(i_clk, i_ce,\n"
1251 22 dgisselq
                                "\t\t\t\tp3d_in, p3c_in, p_three);\n"
1252 2 dgisselq
        "\tend\n"
1253
        "\tendgenerate\n"
1254 5 dgisselq
"\n");
1255
        fprintf(fp,
1256
        "\t// These values are held in memory and delayed during the\n"
1257
        "\t// multiply.  Here, we recover them.  During the multiply,\n"
1258
        "\t// values were multiplied by 2^(CWIDTH-2)*exp{-j*2*pi*...},\n"
1259
        "\t// therefore, the left_x values need to be right shifted by\n"
1260
        "\t// CWIDTH-2 as well.  The additional bits come from a sign\n"
1261
        "\t// extension.\n"
1262 2 dgisselq
        "\twire aux;\n"
1263 5 dgisselq
        "\twire\tsigned\t[(IWIDTH+CWIDTH):0]    fifo_i, fifo_r;\n"
1264
        "\treg\t\t[(2*IWIDTH+2):0]      fifo_read;\n"
1265
        "\tassign\tfifo_r = { {2{fifo_read[2*(IWIDTH+1)-1]}}, fifo_read[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1'b0}} };\n"
1266
        "\tassign\tfifo_i = { {2{fifo_read[(IWIDTH+1)-1]}}, fifo_read[((IWIDTH+1)-1):0], {(CWIDTH-2){1'b0}} };\n"
1267
        "\tassign\taux = fifo_read[2*IWIDTH+2];\n"
1268 2 dgisselq
"\n"
1269
"\n"
1270 23 dgisselq
        "\treg\tsigned\t[(OWIDTH-1):0]  b_left_r, b_left_i,\n"
1271 5 dgisselq
                        "\t\t\t\t\t\tb_right_r, b_right_i;\n"
1272
        "\treg\tsigned\t[(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i;\n"
1273
"\n");
1274
        fprintf(fp,
1275 23 dgisselq
        "\t// Let's do some rounding and remove unnecessary bits.\n"
1276 5 dgisselq
        "\t// We have (IWIDTH+CWIDTH+3) bits here, we need to drop down to\n"
1277
        "\t// OWIDTH, and SHIFT by SHIFT bits in the process.  The trick is\n"
1278
        "\t// that we don\'t need (IWIDTH+CWIDTH+3) bits.  We\'ve accumulated\n"
1279
        "\t// them, but the actual values will never fill all these bits.\n"
1280
        "\t// In particular, we only need:\n"
1281
        "\t//\t IWIDTH bits for the input\n"
1282
        "\t//\t     +1 bit for the add/subtract\n"
1283
        "\t//\t+CWIDTH bits for the coefficient multiply\n"
1284
        "\t//\t     +1 bit for the add/subtract in the complex multiply\n"
1285
        "\t//\t ------\n"
1286
        "\t//\t (IWIDTH+CWIDTH+2) bits at full precision.\n"
1287
        "\t//\n"
1288
        "\t// However, the coefficient multiply multiplied by a maximum value\n"
1289
        "\t// of 2^(CWIDTH-2).  Thus, we only have\n"
1290
        "\t//\t   IWIDTH bits for the input\n"
1291
        "\t//\t       +1 bit for the add/subtract\n"
1292
        "\t//\t+CWIDTH-2 bits for the coefficient multiply\n"
1293
        "\t//\t       +1 (optional) bit for the add/subtract in the cpx mpy.\n"
1294
        "\t//\t -------- ... multiply.  (This last bit may be shifted out.)\n"
1295
        "\t//\t (IWIDTH+CWIDTH) valid output bits. \n"
1296
        "\t// Now, if the user wants to keep any extras of these (via OWIDTH),\n"
1297
        "\t// or if he wishes to arbitrarily shift some of these off (via\n"
1298
        "\t// SHIFT) we accomplish that here.\n"
1299 23 dgisselq
"\n");
1300
        fprintf(fp,
1301
        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_left_r, rnd_left_i, rnd_right_r, rnd_right_i;\n\n");
1302
 
1303
        fprintf(fp,
1304
        "\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_left_r(i_clk, i_ce,\n"
1305
        "\t\t\t\t{ {2{fifo_r[(IWIDTH+CWIDTH)]}}, fifo_r }, rnd_left_r);\n\n",
1306
                rnd_string);
1307
        fprintf(fp,
1308
        "\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_left_i(i_clk, i_ce,\n"
1309
        "\t\t\t\t{ {2{fifo_i[(IWIDTH+CWIDTH)]}}, fifo_i }, rnd_left_i);\n\n",
1310
                rnd_string);
1311
        fprintf(fp,
1312
        "\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_right_r(i_clk, i_ce,\n"
1313
        "\t\t\t\tmpy_r, rnd_right_r);\n\n", rnd_string);
1314
        fprintf(fp,
1315
        "\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_right_i(i_clk, i_ce,\n"
1316
        "\t\t\t\tmpy_i, rnd_right_i);\n\n", rnd_string);
1317
        fprintf(fp,
1318
        "\talways @(posedge i_clk)\n"
1319
                "\t\tif (i_ce)\n"
1320
                "\t\tbegin\n"
1321
                        "\t\t\t// First clock, recover all values\n"
1322
                        "\t\t\tfifo_read <= fifo_left[fifo_read_addr];\n"
1323
                        "\t\t\t// These values are IWIDTH+CWIDTH+3 bits wide\n"
1324
                        "\t\t\t// although they only need to be (IWIDTH+1)\n"
1325
                        "\t\t\t// + (CWIDTH) bits wide.  (We\'ve got two\n"
1326
                        "\t\t\t// extra bits we need to get rid of.)\n"
1327
                        "\t\t\tmpy_r <= p_one - p_two;\n"
1328
                        "\t\t\tmpy_i <= p_three - p_one - p_two;\n"
1329 2 dgisselq
"\n"
1330 23 dgisselq
                        "\t\t\t// Second clock, round and latch for final clock\n"
1331
                        "\t\t\tb_right_r <= rnd_right_r;\n"
1332
                        "\t\t\tb_right_i <= rnd_right_i;\n"
1333
                        "\t\t\tb_left_r <= rnd_left_r;\n"
1334
                        "\t\t\tb_left_i <= rnd_left_i;\n"
1335 24 dgisselq
                "\t\tend\n"
1336
"\n");
1337
        fprintf(fp,
1338 25 dgisselq
        "\tinitial o_aux = 1\'b0;\n"
1339 24 dgisselq
        "\talways @(posedge i_clk)\n"
1340
                "\t\tif (i_rst)\n"
1341
                "\t\t\to_aux <= 1\'b0;\n"
1342
                "\t\telse if (i_ce)\n"
1343
                "\t\tbegin\n"
1344
                        "\t\t\t// Second clock, latch for final clock\n"
1345 23 dgisselq
                        "\t\t\to_aux <= aux & ovalid;\n"
1346
                "\t\tend\n"
1347
"\n");
1348 24 dgisselq
 
1349 23 dgisselq
        fprintf(fp,
1350 5 dgisselq
        "\t// As a final step, we pack our outputs into two packed two\'s\n"
1351
        "\t// complement numbers per output word, so that each output word\n"
1352
        "\t// has (2*OWIDTH) bits in it, with the top half being the real\n"
1353
        "\t// portion and the bottom half being the imaginary portion.\n"
1354 23 dgisselq
        "\tassign       o_left = { rnd_left_r, rnd_left_i };\n"
1355
        "\tassign       o_right= { rnd_right_r,rnd_right_i};\n"
1356 2 dgisselq
"\n"
1357
"endmodule\n");
1358
        fclose(fp);
1359
}
1360
 
1361 23 dgisselq
void    build_hwbfly(const char *fname, int xtracbits, ROUND_T rounding) {
1362 22 dgisselq
        FILE    *fp = fopen(fname, "w");
1363
        if (NULL == fp) {
1364
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
1365
                perror("O/S Err was:");
1366
                return;
1367
        }
1368
 
1369 23 dgisselq
        const   char    *rnd_string;
1370
        if (rounding == RND_TRUNCATE)
1371
                rnd_string = "truncate";
1372
        else if (rounding == RND_FROMZERO)
1373
                rnd_string = "roundfromzero";
1374
        else if (rounding == RND_HALFUP)
1375
                rnd_string = "roundhalfup";
1376
        else
1377
                rnd_string = "convround";
1378
 
1379
 
1380 22 dgisselq
        fprintf(fp,
1381
"///////////////////////////////////////////////////////////////////////////\n"
1382
"//\n"
1383
"// Filename:   hwbfly.v\n"
1384
"//\n"
1385
"// Project:    %s\n"
1386
"//\n"
1387
"// Purpose:    This routine is identical to the butterfly.v routine found\n"
1388
"//             in 'butterfly.v', save only that it uses the verilog \n"
1389
"//             operator '*' in hopes that the synthesizer would be able\n"
1390
"//             to optimize it with hardware resources.\n"
1391
"//\n"
1392
"//             It is understood that a hardware multiply can complete its\n"
1393
"//             operation in a single clock.\n"
1394
"//\n"
1395
"//\n%s"
1396
"//\n", prjname, creator);
1397
        fprintf(fp, "%s", cpyleft);
1398
        fprintf(fp,
1399
"module hwbfly(i_clk, i_rst, i_ce, i_coef, i_left, i_right, i_aux,\n"
1400
                "\t\to_left, o_right, o_aux);\n"
1401
        "\t// Public changeable parameters ...\n"
1402
        "\tparameter IWIDTH=16,CWIDTH=IWIDTH+%d,OWIDTH=IWIDTH+1;\n"
1403
        "\t// Parameters specific to the core that should not be changed.\n"
1404 23 dgisselq
        "\tparameter\tSHIFT=0;\n"
1405 22 dgisselq
        "\tinput\t\ti_clk, i_rst, i_ce;\n"
1406
        "\tinput\t\t[(2*CWIDTH-1):0]\ti_coef;\n"
1407
        "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"
1408
        "\tinput\t\ti_aux;\n"
1409
        "\toutput\twire\t[(2*OWIDTH-1):0]\to_left, o_right;\n"
1410
        "\toutput\treg\to_aux;\n"
1411
"\n", xtracbits);
1412
        fprintf(fp,
1413
        "\twire\t[(OWIDTH-1):0] o_left_r, o_left_i, o_right_r, o_right_i;\n"
1414
"\n"
1415
        "\treg\t[(2*IWIDTH-1):0]        r_left, r_right;\n"
1416
        "\treg\t                        r_aux, r_aux_2;\n"
1417
        "\treg\t[(2*CWIDTH-1):0]        r_coef, r_coef_2;\n"
1418
        "\twire signed  [(IWIDTH-1):0]  r_left_r, r_left_i, r_right_r, r_right_i;\n"
1419
        "\tassign\tr_left_r  = r_left[ (2*IWIDTH-1):(IWIDTH)];\n"
1420
        "\tassign\tr_left_i  = r_left[ (IWIDTH-1):0];\n"
1421
        "\tassign\tr_right_r = r_right[(2*IWIDTH-1):(IWIDTH)];\n"
1422
        "\tassign\tr_right_i = r_right[(IWIDTH-1):0];\n"
1423
"\n"
1424
        "\treg  signed  [(IWIDTH):0]    r_sum_r, r_sum_i, r_dif_r, r_dif_i;\n"
1425
"\n"
1426
        "\treg  [(2*IWIDTH+2):0]        leftv, leftvv;\n"
1427
"\n"
1428
        "\t// Set up the input to the multiply\n"
1429 25 dgisselq
        "\tinitial r_aux   = 1\'b0;\n"
1430
        "\tinitial r_aux_2 = 1\'b0;\n"
1431 22 dgisselq
        "\talways @(posedge i_clk)\n"
1432 25 dgisselq
                "\t\tif (i_rst)\n"
1433
                "\t\tbegin\n"
1434
                        "\t\t\tr_aux <= 1'b0;\n"
1435
                        "\t\t\tr_aux_2 <= 1'b0;\n"
1436
                "\t\tend else if (i_ce)\n"
1437
                "\t\tbegin\n"
1438
                        "\t\t\t// One clock just latches the inputs\n"
1439
                        "\t\t\tr_left <= i_left;        // No change in # of bits\n"
1440
                        "\t\t\tr_right <= i_right;\n"
1441
                        "\t\t\tr_aux <= i_aux;\n"
1442
                        "\t\t\tr_coef  <= i_coef;\n"
1443
                        "\t\t\t// Next clock adds/subtracts\n"
1444
                        "\t\t\tr_sum_r <= r_left_r + r_right_r; // Now IWIDTH+1 bits\n"
1445
                        "\t\t\tr_sum_i <= r_left_i + r_right_i;\n"
1446
                        "\t\t\tr_dif_r <= r_left_r - r_right_r;\n"
1447
                        "\t\t\tr_dif_i <= r_left_i - r_right_i;\n"
1448
                        "\t\t\t// Other inputs are simply delayed on second clock\n"
1449
                        "\t\t\tr_aux_2 <= r_aux;\n"
1450
                        "\t\t\tr_coef_2<= r_coef;\n"
1451
                "\t\tend\n"
1452 22 dgisselq
        "\n\n");
1453
        fprintf(fp,
1454
"\t// See comments in the butterfly.v source file for a discussion of\n"
1455
"\t// these operations and the appropriate bit widths.\n\n");
1456
        fprintf(fp,
1457
        "\twire signed  [(CWIDTH-1):0]  ir_coef_r, ir_coef_i;\n"
1458
        "\tassign       ir_coef_r = r_coef_2[(2*CWIDTH-1):CWIDTH];\n"
1459
        "\tassign       ir_coef_i = r_coef_2[(CWIDTH-1):0];\n"
1460
        "\treg\tsigned  [((IWIDTH+2)+(CWIDTH+1)-1):0]   p_one, p_two, p_three;\n"
1461
"\n"
1462
        "\treg\tsigned  [(CWIDTH):0]    p3c_in, p1c_in, p2c_in;\n"
1463
        "\treg\tsigned  [(IWIDTH+1):0]  p3d_in, p1d_in, p2d_in;\n"
1464
        "\treg\t[3:0]           pipeline;\n"
1465
"\n"
1466 25 dgisselq
        "\tinitial pipeline = 4\'h0;\n"
1467
        "\tinitial leftv    = 0;\n"
1468
        "\tinitial leftvv   = 0;\n"
1469 22 dgisselq
        "\talways @(posedge i_clk)\n"
1470
        "\tbegin\n"
1471
                "\t\tif (i_rst)\n"
1472
                "\t\tbegin\n"
1473
                        "\t\t\tpipeline <= 4'h0;\n"
1474
                        "\t\t\tleftv <= 0;\n"
1475
                        "\t\t\tleftvv <= 0;\n"
1476
                "\t\tend else if (i_clk)\n"
1477
                "\t\tbegin\n"
1478
                        "\t\t\t// Second clock, pipeline = 1\n"
1479
                        "\t\t\tp1c_in <= { ir_coef_r[(CWIDTH-1)], ir_coef_r };\n"
1480
                        "\t\t\tp2c_in <= { ir_coef_i[(CWIDTH-1)], ir_coef_i };\n"
1481
                        "\t\t\tp1d_in <= { r_dif_r[(IWIDTH)], r_dif_r };\n"
1482
                        "\t\t\tp2d_in <= { r_dif_i[(IWIDTH)], r_dif_i };\n"
1483
                        "\t\t\tp3c_in <= ir_coef_i + ir_coef_r;\n"
1484
                        "\t\t\tp3d_in <= r_dif_r + r_dif_i;\n"
1485 23 dgisselq
"\n"
1486 22 dgisselq
                        "\t\t\tleftv <= { r_aux_2, r_sum_r, r_sum_i };\n"
1487 23 dgisselq
"\n"
1488 22 dgisselq
                        "\t\t\t// Third clock, pipeline = 3\n"
1489
                        "\t\t\tp_one   <= p1c_in * p1d_in;\n"
1490
                        "\t\t\tp_two   <= p2c_in * p2d_in;\n"
1491
                        "\t\t\tp_three <= p3c_in * p3d_in;\n"
1492
                        "\t\t\tleftvv <= leftv;\n"
1493
"\n"
1494
                        "\t\t\tpipeline <= { pipeline[2:0], 1'b1 };\n"
1495
                "\t\tend\n"
1496
        "\tend\n"
1497
"\n");
1498
 
1499
        fprintf(fp,
1500
        "\t// These values are held in memory and delayed during the\n"
1501
        "\t// multiply.  Here, we recover them.  During the multiply,\n"
1502
        "\t// values were multiplied by 2^(CWIDTH-2)*exp{-j*2*pi*...},\n"
1503
        "\t// therefore, the left_x values need to be right shifted by\n"
1504
        "\t// CWIDTH-2 as well.  The additional bits come from a sign\n"
1505
        "\t// extension.\n"
1506 24 dgisselq
        "\twire\taux_s;\n"
1507 22 dgisselq
        "\twire\tsigned\t[(IWIDTH+CWIDTH):0]    left_si, left_sr;\n"
1508
        "\treg\t\t[(2*IWIDTH+2):0]      left_saved;\n"
1509
        "\tassign\tleft_sr = { {2{left_saved[2*(IWIDTH+1)-1]}}, left_saved[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1'b0}} };\n"
1510
        "\tassign\tleft_si = { {2{left_saved[(IWIDTH+1)-1]}}, left_saved[((IWIDTH+1)-1):0], {(CWIDTH-2){1'b0}} };\n"
1511
        "\tassign\taux_s = left_saved[2*IWIDTH+2];\n"
1512
"\n"
1513
"\n"
1514 23 dgisselq
        "\treg  signed  [(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i;\n");
1515
        fprintf(fp,
1516
        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_left_r, rnd_left_i, rnd_right_r, rnd_right_i;\n\n");
1517 22 dgisselq
 
1518
        fprintf(fp,
1519 23 dgisselq
        "\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_left_r(i_clk, i_ce,\n"
1520
        "\t\t\t\t{ {2{left_sr[(IWIDTH+CWIDTH)]}}, left_sr }, rnd_left_r);\n\n",
1521
                rnd_string);
1522
        fprintf(fp,
1523
        "\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_left_i(i_clk, i_ce,\n"
1524
        "\t\t\t\t{ {2{left_si[(IWIDTH+CWIDTH)]}}, left_si }, rnd_left_i);\n\n",
1525
                rnd_string);
1526
        fprintf(fp,
1527
        "\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_right_r(i_clk, i_ce,\n"
1528
        "\t\t\t\tmpy_r, rnd_right_r);\n\n", rnd_string);
1529
        fprintf(fp,
1530
        "\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_right_i(i_clk, i_ce,\n"
1531
        "\t\t\t\tmpy_i, rnd_right_i);\n\n", rnd_string);
1532
 
1533
        fprintf(fp,
1534 25 dgisselq
        "\tinitial left_saved = 0;\n"
1535
        "\tinitial o_aux      = 1\'b0;\n"
1536 22 dgisselq
        "\talways @(posedge i_clk)\n"
1537
        "\t\tif (i_rst)\n"
1538
        "\t\tbegin\n"
1539
                "\t\t\tleft_saved <= 0;\n"
1540
                "\t\t\to_aux <= 1'b0;\n"
1541
        "\t\tend else if (i_ce)\n"
1542
        "\t\tbegin\n"
1543
                "\t\t\t// First clock, recover all values\n"
1544
                "\t\t\tleft_saved <= leftvv;\n"
1545
                "\t\t\t// These values are IWIDTH+CWIDTH+3 bits wide\n"
1546
                "\t\t\t// although they only need to be (IWIDTH+1)\n"
1547
                "\t\t\t// + (CWIDTH) bits wide.  (We've got two\n"
1548
                "\t\t\t// extra bits we need to get rid of.)\n"
1549
                "\t\t\tmpy_r <= p_one - p_two;\n"
1550
                "\t\t\tmpy_i <= p_three - p_one - p_two;\n"
1551
"\n"
1552
                "\t\t\t// Second clock, round and latch for final clock\n"
1553
"\n"
1554
                "\t\t\to_aux <= aux_s;\n"
1555
        "\t\tend\n"
1556
        "\n");
1557
 
1558
        fprintf(fp,
1559
        "\t// As a final step, we pack our outputs into two packed two's\n"
1560
        "\t// complement numbers per output word, so that each output word\n"
1561
        "\t// has (2*OWIDTH) bits in it, with the top half being the real\n"
1562
        "\t// portion and the bottom half being the imaginary portion.\n"
1563 23 dgisselq
        "\tassign\to_left = { rnd_left_r, rnd_left_i };\n"
1564
        "\tassign\to_right= { rnd_right_r,rnd_right_i};\n"
1565 22 dgisselq
"\n"
1566
"endmodule\n");
1567
 
1568
}
1569
 
1570
void    build_stage(const char *fname, int stage, bool odd, int nbits, bool inv, int xtra, bool hwmpy=false) {
1571 2 dgisselq
        FILE    *fstage = fopen(fname, "w");
1572
        int     cbits = nbits + xtra;
1573
 
1574
        if ((cbits * 2) >= sizeof(long long)*8) {
1575
                fprintf(stderr, "ERROR: CMEM Coefficient precision requested overflows long long data type.\n");
1576
                exit(-1);
1577
        }
1578
 
1579
        if (fstage == NULL) {
1580
                fprintf(stderr, "ERROR: Could not open %s for writing!\n", fname);
1581
                perror("O/S Err was:");
1582
                fprintf(stderr, "Attempting to continue, but this file will be missing.\n");
1583
                return;
1584
        }
1585
 
1586
        fprintf(fstage,
1587
"////////////////////////////////////////////////////////////////////////////\n"
1588
"//\n"
1589
"// Filename:   %sfftstage_%c%d.v\n"
1590
"//\n"
1591
"// Project:    %s\n"
1592
"//\n"
1593
"// Purpose:    This file is (almost) a Verilog source file.  It is meant to\n"
1594
"//             be used by a FFT core compiler to generate FFTs which may be\n"
1595
"//             used as part of an FFT core.  Specifically, this file \n"
1596
"//             encapsulates the options of an FFT-stage.  For any 2^N length\n"
1597
"//             FFT, there shall be (N-1) of these stages.  \n"
1598
"//\n%s"
1599
"//\n",
1600
                (inv)?"i":"", (odd)?'o':'e', stage*2, prjname, creator);
1601
        fprintf(fstage, "%s", cpyleft);
1602
        fprintf(fstage, "module\t%sfftstage_%c%d(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync);\n",
1603
                (inv)?"i":"", (odd)?'o':'e', stage*2);
1604
        // These parameter values are useless at this point--they are to be
1605
        // replaced by the parameter values in the calling program.  Only
1606
        // problem is, the CWIDTH needs to match exactly!
1607
        fprintf(fstage, "\tparameter\tIWIDTH=%d,CWIDTH=%d,OWIDTH=%d;\n",
1608
                nbits, cbits, nbits+1);
1609
        fprintf(fstage,
1610
"\t// Parameters specific to the core that should be changed when this\n"
1611
"\t// core is built ... Note that the minimum LGSPAN (the base two log\n"
1612
"\t// of the span, or the base two log of the current FFT size) is 3.\n"
1613
"\t// Smaller spans (i.e. the span of 2) must use the dblstage module.\n"
1614 6 dgisselq
"\tparameter\tLGWIDTH=11, LGSPAN=9, LGBDLY=5, BFLYSHIFT=0;\n");
1615 2 dgisselq
        fprintf(fstage,
1616
"\tinput                                        i_clk, i_rst, i_ce, i_sync;\n"
1617
"\tinput                [(2*IWIDTH-1):0]        i_data;\n"
1618
"\toutput       reg     [(2*OWIDTH-1):0]        o_data;\n"
1619
"\toutput       reg                             o_sync;\n"
1620
"\n"
1621
"\treg  wait_for_sync;\n"
1622
"\treg  [(2*IWIDTH-1):0]        ib_a, ib_b;\n"
1623
"\treg  [(2*CWIDTH-1):0]        ib_c;\n"
1624 8 dgisselq
"\treg  ib_sync;\n"
1625 2 dgisselq
"\n"
1626
"\treg  b_started;\n"
1627
"\twire ob_sync;\n"
1628 23 dgisselq
"\twire [(2*OWIDTH-1):0]\tob_a, ob_b;\n");
1629 2 dgisselq
        fprintf(fstage,
1630
"\n"
1631
"\t// %scmem is defined as an array of real and complex values,\n"
1632
"\t// where the top CWIDTH bits are the real value and the bottom\n"
1633
"\t// CWIDTH bits are the imaginary value.\n"
1634
"\t//\n"
1635 24 dgisselq
"\t// %scmem[i] = { (2^(CWIDTH-2)) * cos(2*pi*i/(2^LGWIDTH)),\n"
1636 2 dgisselq
"\t//           (2^(CWIDTH-2)) * sin(2*pi*i/(2^LGWIDTH)) };\n"
1637
"\t//\n"
1638
"\treg  [(2*CWIDTH-1):0]        %scmem [0:((1<<LGSPAN)-1)];\n"
1639
"\tinitial\t$readmemh(\"%scmem_%c%d.hex\",%scmem);\n\n",
1640 24 dgisselq
                (inv)?"i":"", (inv)?"i":"", (inv)?"i":"",
1641
                (inv)?"i":"", (odd)?'o':'e',stage<<1, (inv)?"i":"");
1642 2 dgisselq
        {
1643
                FILE    *cmem;
1644
 
1645 14 dgisselq
                {
1646
                        char    *memfile, *ptr;
1647
 
1648
                        memfile = new char[strlen(fname)+128];
1649
                        strcpy(memfile, fname);
1650
                        if ((NULL != (ptr = strrchr(memfile, '/')))&&(ptr>memfile)) {
1651
                                ptr++;
1652
                                sprintf(ptr, "%scmem_%c%d.hex", (inv)?"i":"", (odd)?'o':'e', stage*2);
1653
                        } else {
1654
                                sprintf(memfile, "%s/%scmem_%c%d.hex",
1655
                                        COREDIR, (inv)?"i":"",
1656
                                        (odd)?'o':'e', stage*2);
1657
                        }
1658
                        // strcpy(&memfile[strlen(memfile)-2], ".hex");
1659
                        cmem = fopen(memfile, "w");
1660
                        if (NULL == cmem) {
1661
                                fprintf(stderr, "Could not open/write \'%s\' with FFT coefficients.\n", memfile);
1662
                                perror("Err from O/S:");
1663
                                exit(-2);
1664
                        }
1665
 
1666
                        delete[] memfile;
1667 2 dgisselq
                }
1668
                // fprintf(cmem, "// CBITS = %d, inv = %s\n", cbits, (inv)?"true":"false");
1669
                for(int i=0; i<stage/2; i++) {
1670
                        int k = 2*i+odd;
1671 9 dgisselq
                        double  W = ((inv)?1:-1)*2.0*M_PI*k/(double)(2*stage);
1672 2 dgisselq
                        double  c, s;
1673
                        long long ic, is, vl;
1674
 
1675
                        c = cos(W); s = sin(W);
1676 20 dgisselq
                        ic = (long long)round((1ll<<(cbits-2)) * c);
1677
                        is = (long long)round((1ll<<(cbits-2)) * s);
1678 2 dgisselq
                        vl = (ic & (~(-1ll << (cbits))));
1679
                        vl <<= (cbits);
1680
                        vl |= (is & (~(-1ll << (cbits))));
1681
                        fprintf(cmem, "%0*llx\n", ((cbits*2+3)/4), vl);
1682
                        /*
1683
                        fprintf(cmem, "%0*llx\t\t// %f+j%f -> %llx +j%llx\n",
1684
                                ((cbits*2+3)/4), vl, c, s,
1685
                                ic & (~(-1ll<<(((cbits+3)/4)*4))),
1686
                                is & (~(-1ll<<(((cbits+3)/4)*4))));
1687
                        */
1688
                } fclose(cmem);
1689
        }
1690
 
1691
        fprintf(fstage,
1692 6 dgisselq
"\treg  [(LGWIDTH-2):0]         iaddr;\n"
1693 2 dgisselq
"\treg  [(2*IWIDTH-1):0]        imem    [0:((1<<LGSPAN)-1)];\n"
1694
"\n"
1695 8 dgisselq
"\treg  [LGSPAN:0]              oB;\n"
1696 2 dgisselq
"\treg  [(2*OWIDTH-1):0]        omem    [0:((1<<LGSPAN)-1)];\n"
1697
"\n"
1698 25 dgisselq
"\tinitial wait_for_sync = 1\'b1;\n"
1699
"\tinitial iaddr = 0;\n"
1700 2 dgisselq
"\talways @(posedge i_clk)\n"
1701
        "\t\tif (i_rst)\n"
1702
        "\t\tbegin\n"
1703
                "\t\t\twait_for_sync <= 1'b1;\n"
1704
                "\t\t\tiaddr <= 0;\n"
1705
        "\t\tend\n"
1706
        "\t\telse if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"
1707
        "\t\tbegin\n"
1708
                "\t\t\t//\n"
1709
                "\t\t\t// First step: Record what we\'re not ready to use yet\n"
1710
                "\t\t\t//\n"
1711
                "\t\t\timem[iaddr[(LGSPAN-1):0]] <= i_data;\n"
1712 25 dgisselq
                "\t\t\tiaddr <= iaddr + { {(LGWIDTH-2){1\'b0}}, 1\'b1 };\n"
1713 2 dgisselq
                "\t\t\twait_for_sync <= 1'b0;\n"
1714 23 dgisselq
        "\t\tend\n\n");
1715
 
1716
        fprintf(fstage,
1717
        "\t//\n"
1718
        "\t// Now, we have all the inputs, so let\'s feed the butterfly\n"
1719
        "\t//\n"
1720 25 dgisselq
        "\tinitial ib_sync = 1\'b0;\n"
1721 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
1722 24 dgisselq
        "\tif (i_rst)\n"
1723
                "\t\tib_sync <= 1\'b0;\n"
1724
        "\telse if ((i_ce)&&(iaddr[LGSPAN]))\n"
1725
                "\t\tbegin\n"
1726
                        "\t\t\t// Set the sync to true on the very first\n"
1727
                        "\t\t\t// valid input in, and hence on the very\n"
1728
                        "\t\t\t// first valid data out per FFT.\n"
1729
                        "\t\t\tib_sync <= (iaddr==(1<<(LGSPAN)));\n"
1730
                "\t\tend\n"
1731
        "\talways\t@(posedge i_clk)\n"
1732 23 dgisselq
        "\tif ((i_ce)&&(iaddr[LGSPAN]))\n"
1733
                "\t\tbegin\n"
1734
                        "\t\t\t// One input from memory, ...\n"
1735
                        "\t\t\tib_a <= imem[iaddr[(LGSPAN-1):0]];\n"
1736
                        "\t\t\t// One input clocked in from the top\n"
1737
                        "\t\t\tib_b <= i_data;\n"
1738
                        "\t\t\tib_c <= %scmem[iaddr[(LGSPAN-1):0]];\n"
1739
                "\t\tend\n\n", (inv)?"i":"");
1740
 
1741
        if (hwmpy) {
1742
                fprintf(fstage,
1743
        "\thwbfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),\n"
1744
                        "\t\t\t.SHIFT(BFLYSHIFT))\n"
1745
                "\t\tbfly(i_clk, i_rst, i_ce, ib_c,\n"
1746
                        "\t\t\tib_a, ib_b, ib_sync, ob_a, ob_b, ob_sync);\n");
1747
        } else {
1748
        fprintf(fstage,
1749
        "\tbutterfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),\n"
1750
                "\t\t\t.MPYDELAY(%d\'d%d),.LGDELAY(LGBDLY),.SHIFT(BFLYSHIFT))\n"
1751
        "\t\tbfly(i_clk, i_rst, i_ce, ib_c,\n"
1752
                "\t\t\tib_a, ib_b, ib_sync, ob_a, ob_b, ob_sync);\n",
1753
                        lgdelay(nbits, xtra), bflydelay(nbits, xtra));
1754
        }
1755
 
1756
        fprintf(fstage,
1757
        "\t//\n"
1758
        "\t// Next step: recover the outputs from the butterfly\n"
1759
        "\t//\n"
1760 25 dgisselq
        "\tinitial oB        = 0;\n"
1761
        "\tinitial o_sync    = 0;\n"
1762
        "\tinitial b_started = 0;\n"
1763 23 dgisselq
        "\talways\t@(posedge i_clk)\n"
1764
        "\t\tif (i_rst)\n"
1765
        "\t\tbegin\n"
1766
                "\t\t\toB <= 0;\n"
1767
                "\t\t\to_sync <= 0;\n"
1768
                "\t\t\tb_started <= 0;\n"
1769
        "\t\tend else if (i_ce)\n"
1770
        "\t\tbegin\n"
1771
        "\t\t\tif ((ob_sync||b_started)&&(~oB[LGSPAN]))\n"
1772 2 dgisselq
                "\t\t\tbegin // A butterfly output is available\n"
1773
                        "\t\t\t\tb_started <= 1'b1;\n"
1774 8 dgisselq
                        "\t\t\t\tomem[oB[(LGSPAN-1):0]] <= ob_b;\n"
1775 25 dgisselq
                        "\t\t\t\toB <= oB + { {(LGSPAN){1\'b0}}, 1\'b1 };\n"
1776 2 dgisselq
"\n"
1777 6 dgisselq
                        "\t\t\t\to_sync <= (ob_sync);\n"
1778 2 dgisselq
                        "\t\t\t\to_data <= ob_a;\n"
1779
                "\t\t\tend else if (b_started)\n"
1780
                "\t\t\tbegin // and keep outputting once you start--at a rate\n"
1781
                "\t\t\t// of one guaranteed output per clock that has i_ce set.\n"
1782 8 dgisselq
                        "\t\t\t\to_data <= omem[oB[(LGSPAN-1):0]];\n"
1783 25 dgisselq
                        "\t\t\t\toB <= oB + { {(LGSPAN){1\'b0}}, 1\'b1 };\n"
1784 2 dgisselq
                        "\t\t\t\to_sync <= 1'b0;\n"
1785
                "\t\t\tend else\n"
1786
                        "\t\t\t\to_sync <= 1'b0;\n"
1787 23 dgisselq
        "\t\tend\n\n");
1788 22 dgisselq
        fprintf(fstage, "endmodule\n");
1789 2 dgisselq
}
1790
 
1791
void    usage(void) {
1792
        fprintf(stderr,
1793
"USAGE:\tfftgen [-f <size>] [-d dir] [-c cbits] [-n nbits] [-m mxbits] [-s01]\n"
1794
// "\tfftgen -i\n"
1795
"\t-c <cbits>\tCauses all internal complex coefficients to be\n"
1796
"\t\tlonger than the corresponding data bits, to help avoid\n"
1797
"\t\tcoefficient truncation errors.\n"
1798
"\t-d <dir>\tPlaces all of the generated verilog files into <dir>.\n"
1799
"\t-f <size>\tSets the size of the FFT as the number of complex\n"
1800
"\t\tsamples input to the transform.\n"
1801
"\t-m <mxbits>\tSets the maximum bit width that the FFT should ever\n"
1802
"\t\tproduce.  Internal values greater than this value will be\n"
1803
"\t\ttruncated to this value.\n"
1804 22 dgisselq
"\t-n <nbits>\tSets the bitwidth for values coming into the (i)FFT.\n"
1805
"\t-p <nmpy>\tSets the number of stages that will use any hardware \n"
1806
"\t\tmultiplication facility, instead of shift-add emulation.\n"
1807 2 dgisselq
"\t-s\tSkip the final bit reversal stage.  This is useful in\n"
1808
"\t\talgorithms that need to apply a filter without needing to do\n"
1809
"\t\tbin shifting, as these algorithms can, with this option, just\n"
1810
"\t\tmultiply by a bit reversed correlation sequence and then\n"
1811 22 dgisselq
"\t\tinverse FFT the (still bit reversed) result.  (You would need\n"
1812
"\t\ta decimation in time inverse to do this, which this program does\n"
1813
"\t\tnot yet provide.)\n"
1814 2 dgisselq
"\t-S\tInclude the final bit reversal stage (default).\n"
1815 22 dgisselq
"\t-x <xtrabits>\tUse this many extra bits internally, before any final\n"
1816
"\t\trounding or truncation of the answer to the final number of bits.\n"
1817 2 dgisselq
"\t-0\tA forward FFT (default), meaning that the coefficients are\n"
1818
"\t\tgiven by e^{-j 2 pi k/N n }.\n"
1819
"\t-1\tAn inverse FFT, meaning that the coefficients are\n"
1820
"\t\tgiven by e^{ j 2 pi k/N n }.\n");
1821
}
1822
 
1823
// Features still needed:
1824
//      Interactivity.
1825
int main(int argc, char **argv) {
1826
        int     fftsize = -1, lgsize = -1;
1827 22 dgisselq
        int     nbitsin = 16, xtracbits = 4, nummpy=0, nonmpy=2;
1828 19 dgisselq
        int     nbitsout, maxbitsout = -1, xtrapbits=0;
1829 2 dgisselq
        bool    bitreverse = true, inverse=false, interactive = false,
1830
                verbose_flag = false;
1831
        FILE    *vmain;
1832 14 dgisselq
        std::string     coredir = "fft-core", cmdline = "";
1833 23 dgisselq
        ROUND_T rounding = RND_CONVERGENT;
1834
        // ROUND_T      rounding = RND_HALFUP;
1835 2 dgisselq
 
1836
        if (argc <= 1)
1837
                usage();
1838
 
1839 14 dgisselq
        cmdline = argv[0];
1840 2 dgisselq
        for(int argn=1; argn<argc; argn++) {
1841 14 dgisselq
                cmdline += " ";
1842
                cmdline += argv[argn];
1843
        }
1844
 
1845
        for(int argn=1; argn<argc; argn++) {
1846 2 dgisselq
                if ('-' == argv[argn][0]) {
1847
                        for(int j=1; (argv[argn][j])&&(j<100); j++) {
1848
                                switch(argv[argn][j]) {
1849
                                        case '0':
1850
                                                inverse = false;
1851
                                                break;
1852
                                        case '1':
1853
                                                inverse = true;
1854
                                                break;
1855
                                        case 'c':
1856
                                                if (argn+1 >= argc) {
1857 19 dgisselq
                                                        printf("ERR: No extra number of coefficient bits given!\n\n");
1858 2 dgisselq
                                                        usage(); exit(-1);
1859
                                                }
1860
                                                xtracbits = atoi(argv[++argn]);
1861
                                                j+= 200;
1862
                                                break;
1863
                                        case 'd':
1864
                                                if (argn+1 >= argc) {
1865 19 dgisselq
                                                        printf("ERR: No directory given into which to place the core!\n\n");
1866 2 dgisselq
                                                        usage(); exit(-1);
1867
                                                }
1868 14 dgisselq
                                                coredir = argv[++argn];
1869 2 dgisselq
                                                j += 200;
1870
                                                break;
1871
                                        case 'f':
1872
                                                if (argn+1 >= argc) {
1873 19 dgisselq
                                                        printf("ERR: No FFT Size given!\n\n");
1874 2 dgisselq
                                                        usage(); exit(-1);
1875
                                                }
1876
                                                fftsize = atoi(argv[++argn]);
1877
                                                { int sln = strlen(argv[argn]);
1878
                                                if (!isdigit(argv[argn][sln-1])){
1879
                                                        switch(argv[argn][sln-1]) {
1880
                                                        case 'k': case 'K':
1881
                                                                fftsize <<= 10;
1882
                                                                break;
1883
                                                        case 'm': case 'M':
1884
                                                                fftsize <<= 20;
1885
                                                                break;
1886
                                                        case 'g': case 'G':
1887
                                                                fftsize <<= 30;
1888
                                                                break;
1889
                                                        default:
1890 19 dgisselq
                                                                printf("ERR: Unknown FFT size, %s!\n", argv[argn]);
1891 2 dgisselq
                                                                exit(-1);
1892
                                                        }
1893
                                                }}
1894
                                                j += 200;
1895
                                                break;
1896
                                        case 'h':
1897
                                                usage();
1898
                                                exit(0);
1899
                                                break;
1900
                                        case 'i':
1901
                                                interactive = true;
1902
                                                break;
1903
                                        case 'm':
1904
                                                if (argn+1 >= argc) {
1905 19 dgisselq
                                                        printf("ERR: No maximum output bit value given!\n\n");
1906 2 dgisselq
                                                        exit(-1);
1907
                                                }
1908
                                                maxbitsout = atoi(argv[++argn]);
1909
                                                j += 200;
1910
                                                break;
1911
                                        case 'n':
1912
                                                if (argn+1 >= argc) {
1913 19 dgisselq
                                                        printf("ERR: No input bit size given!\n\n");
1914 2 dgisselq
                                                        exit(-1);
1915
                                                }
1916
                                                nbitsin = atoi(argv[++argn]);
1917
                                                j += 200;
1918
                                                break;
1919 22 dgisselq
                                        case 'p':
1920
                                                if (argn+1 >= argc) {
1921
                                                        printf("ERR: No number given for number of hardware multiply stages!\n\n");
1922
                                                        exit(-1);
1923
                                                }
1924
                                                nummpy = atoi(argv[++argn]);
1925
                                                j += 200;
1926
                                                break;
1927 2 dgisselq
                                        case 'S':
1928
                                                bitreverse = true;
1929
                                                break;
1930
                                        case 's':
1931
                                                bitreverse = false;
1932
                                                break;
1933 19 dgisselq
                                        case 'x':
1934
                                                if (argn+1 >= argc) {
1935
                                                        printf("ERR: No extra number of bits given!\n\n");
1936
                                                        usage(); exit(-1);
1937
                                                } j+= 200;
1938
                                                xtrapbits = atoi(argv[++argn]);
1939
                                                break;
1940 2 dgisselq
                                        case 'v':
1941
                                                verbose_flag = true;
1942
                                                break;
1943
                                        default:
1944
                                                printf("Unknown argument, -%c\n", argv[argn][j]);
1945
                                                usage();
1946
                                                exit(-1);
1947
                                }
1948
                        }
1949
                } else {
1950
                        printf("Unrecognized argument, %s\n", argv[argn]);
1951
                        usage();
1952
                        exit(-1);
1953
                }
1954
        }
1955
 
1956
        if ((lgsize < 0)&&(fftsize > 1)) {
1957
                for(lgsize=1; (1<<lgsize) < fftsize; lgsize++)
1958
                        ;
1959
        }
1960
 
1961
        if ((fftsize <= 0)||(nbitsin < 1)||(nbitsin>48)) {
1962
                printf("INVALID PARAMETERS!!!!\n");
1963
                exit(-1);
1964
        }
1965
 
1966
 
1967
        if (nextlg(fftsize) != fftsize) {
1968
                fprintf(stderr, "ERR: FFTSize (%d) *must* be a power of two\n",
1969
                                fftsize);
1970
                exit(-1);
1971
        } else if (fftsize < 2) {
1972
                fprintf(stderr, "ERR: Minimum FFTSize is 2, not %d\n",
1973
                                fftsize);
1974
                if (fftsize == 1) {
1975
                        fprintf(stderr, "You do realize that a 1 point FFT makes very little sense\n");
1976
                        fprintf(stderr, "in an FFT operation that handles two samples per clock?\n");
1977
                        fprintf(stderr, "If you really need to do an FFT of this size, the output\n");
1978
                        fprintf(stderr, "can be connected straight to the input.\n");
1979
                } else {
1980
                        fprintf(stderr, "Indeed, a size of %d doesn\'t make much sense to me at all.\n", fftsize);
1981
                        fprintf(stderr, "Is such an operation even defined?\n");
1982
                }
1983
                exit(-1);
1984
        }
1985
 
1986
        // Calculate how many output bits we'll have, and what the log
1987
        // based two size of our FFT is.
1988
        {
1989
                int     tmp_size = fftsize;
1990
 
1991
                // The first stage always accumulates one bit, regardless
1992
                // of whether you need to or not.
1993
                nbitsout = nbitsin + 1;
1994
                tmp_size >>= 1;
1995
 
1996
                while(tmp_size > 4) {
1997
                        nbitsout += 1;
1998
                        tmp_size >>= 2;
1999
                }
2000
 
2001
                if (tmp_size > 1)
2002
                        nbitsout ++;
2003
 
2004
                if (fftsize <= 2)
2005
                        bitreverse = false;
2006
        } if ((maxbitsout > 0)&&(nbitsout > maxbitsout))
2007
                nbitsout = maxbitsout;
2008
 
2009 22 dgisselq
        // Figure out how many multiply stages to use, and how many to skip
2010
        {
2011
                int     lgv = lgval(fftsize);
2012 2 dgisselq
 
2013 22 dgisselq
                nonmpy = lgv - nummpy;
2014
                if (nonmpy < 2) nonmpy = 2;
2015
                nummpy = lgv - nonmpy;
2016
        }
2017
 
2018 2 dgisselq
        {
2019
                struct stat     sbuf;
2020 14 dgisselq
                if (lstat(coredir.c_str(), &sbuf)==0) {
2021 2 dgisselq
                        if (!S_ISDIR(sbuf.st_mode)) {
2022 14 dgisselq
                                fprintf(stderr, "\'%s\' already exists, and is not a directory!\n", coredir.c_str());
2023 2 dgisselq
                                fprintf(stderr, "I will stop now, lest I overwrite something you care about.\n");
2024
                                fprintf(stderr, "To try again, please remove this file.\n");
2025
                                exit(-1);
2026
                        }
2027
                } else
2028 14 dgisselq
                        mkdir(coredir.c_str(), 0755);
2029
                if (access(coredir.c_str(), X_OK|W_OK) != 0) {
2030
                        fprintf(stderr, "I have no access to the directory \'%s\'.\n", coredir.c_str());
2031 2 dgisselq
                        exit(-1);
2032
                }
2033
        }
2034
 
2035 14 dgisselq
        {
2036
                std::string     fname_string;
2037
 
2038
                fname_string = coredir;
2039
                fname_string += "/";
2040
                if (inverse) fname_string += "i";
2041
                fname_string += "fftmain.v";
2042
 
2043
                vmain = fopen(fname_string.c_str(), "w");
2044
                if (NULL == vmain) {
2045
                        fprintf(stderr, "Could not open \'%s\' for writing\n", fname_string.c_str());
2046
                        perror("Err from O/S:");
2047
                        exit(-1);
2048
                }
2049 2 dgisselq
        }
2050
 
2051
        fprintf(vmain, "/////////////////////////////////////////////////////////////////////////////\n");
2052
        fprintf(vmain, "//\n");
2053
        fprintf(vmain, "// Filename:    %sfftmain.v\n", (inverse)?"i":"");
2054
        fprintf(vmain, "//\n");
2055
        fprintf(vmain, "// Project:     %s\n", prjname);
2056
        fprintf(vmain, "//\n");
2057
        fprintf(vmain, "// Purpose:     This is the main module in the Doubletime FPGA FFT project.\n");
2058
        fprintf(vmain, "//              As such, all other modules are subordinate to this one.\n");
2059
        fprintf(vmain, "//              (I have been reading too much legalese this week ...)\n");
2060
        fprintf(vmain, "//              This module accomplish a fixed size Complex FFT on %d data\n", fftsize);
2061
        fprintf(vmain, "//              points.  The FFT is fully pipelined, and accepts as inputs\n");
2062
        fprintf(vmain, "//              two complex two\'s complement samples per clock.\n");
2063
        fprintf(vmain, "//\n");
2064
        fprintf(vmain, "// Parameters:\n");
2065
        fprintf(vmain, "//      i_clk\tThe clock.  All operations are synchronous with this clock.\n");
2066
        fprintf(vmain, "//\ti_rst\tSynchronous reset, active high.  Setting this line will\n");
2067
        fprintf(vmain, "//\t\t\tforce the reset of all of the internals to this routine.\n");
2068
        fprintf(vmain, "//\t\t\tFurther, following a reset, the o_sync line will go\n");
2069
        fprintf(vmain, "//\t\t\thigh the same time the first output sample is valid.\n");
2070
        fprintf(vmain, "//      i_ce\tA clock enable line.  If this line is set, this module\n");
2071
        fprintf(vmain, "//\t\t\twill accept two complex values as inputs, and produce\n");
2072
        fprintf(vmain, "//\t\t\ttwo (possibly empty) complex values as outputs.\n");
2073
        fprintf(vmain, "//\t\ti_left\tThe first of two complex input samples.  This value\n");
2074
        fprintf(vmain, "//\t\t\tis split into two two\'s complement numbers, of \n");
2075
        fprintf(vmain, "//\t\t\t%d bits each, with the real portion in the high\n", nbitsin);
2076
        fprintf(vmain, "//\t\t\torder bits, and the imaginary portion taking the\n");
2077
        fprintf(vmain, "//\t\t\tbottom %d bits.\n", nbitsin);
2078
        fprintf(vmain, "//\t\ti_right\tThis is the same thing as i_left, only this is the\n");
2079
        fprintf(vmain, "//\t\t\tsecond of two such samples.  Hence, i_left would\n");
2080
        fprintf(vmain, "//\t\t\tcontain input sample zero, i_right would contain\n");
2081
        fprintf(vmain, "//\t\t\tsample one.  On the next clock i_left would contain\n");
2082
        fprintf(vmain, "//\t\t\tinput sample two, i_right number three and so forth.\n");
2083
        fprintf(vmain, "//\t\to_left\tThe first of two output samples, of the same\n");
2084
        fprintf(vmain, "//\t\t\tformat as i_left, only having %d bits for each of\n", nbitsout);
2085
        fprintf(vmain, "//\t\t\tthe real and imaginary components, leading to %d\n", nbitsout*2);
2086
        fprintf(vmain, "//\t\t\tbits total.\n");
2087
        fprintf(vmain, "//\t\to_right\tThe second of two output samples produced each clock.\n");
2088
        fprintf(vmain, "//\t\t\tThis has the same format as o_left.\n");
2089
        fprintf(vmain, "//\t\to_sync\tA one bit output indicating the first valid sample\n");
2090
        fprintf(vmain, "//\t\t\tproduced by this FFT following a reset.  Ever after,\n");
2091
        fprintf(vmain, "//\t\t\tthis will indicate the first sample of an FFT frame.\n");
2092
        fprintf(vmain, "//\n");
2093 14 dgisselq
        fprintf(vmain, "// Arguments:\tThis file was computer generated using the\n");
2094
        fprintf(vmain, "//\t\tfollowing command line:\n");
2095
        fprintf(vmain, "//\n");
2096
        fprintf(vmain, "//\t\t%% %s\n", cmdline.c_str());
2097
        fprintf(vmain, "//\n");
2098 2 dgisselq
        fprintf(vmain, "%s", creator);
2099
        fprintf(vmain, "//\n");
2100
        fprintf(vmain, "%s", cpyleft);
2101
 
2102
 
2103
        fprintf(vmain, "//\n");
2104
        fprintf(vmain, "//\n");
2105
        fprintf(vmain, "module %sfftmain(i_clk, i_rst, i_ce,\n", (inverse)?"i":"");
2106
        fprintf(vmain, "\t\ti_left, i_right,\n");
2107
        fprintf(vmain, "\t\to_left, o_right, o_sync);\n");
2108
        fprintf(vmain, "\tparameter\tIWIDTH=%d, OWIDTH=%d, LGWIDTH=%d;\n", nbitsin, nbitsout, lgsize);
2109
        assert(lgsize > 0);
2110
        fprintf(vmain, "\tinput\t\ti_clk, i_rst, i_ce;\n");
2111
        fprintf(vmain, "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n");
2112
        fprintf(vmain, "\toutput\treg\t[(2*OWIDTH-1):0]\to_left, o_right;\n");
2113
        fprintf(vmain, "\toutput\treg\t\t\to_sync;\n");
2114
        fprintf(vmain, "\n\n");
2115
 
2116
        fprintf(vmain, "\t// Outputs of the FFT, ready for bit reversal.\n");
2117
        fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_left, br_right;\n");
2118
        fprintf(vmain, "\n\n");
2119
 
2120
        int     tmp_size = fftsize, lgtmp = lgsize;
2121
        if (fftsize == 2) {
2122
                if (bitreverse) {
2123
                        fprintf(vmain, "\treg\tbr_start;\n");
2124 25 dgisselq
                        fprintf(vmain, "\tinitial br_start = 1\'b0;\n");
2125 2 dgisselq
                        fprintf(vmain, "\talways @(posedge i_clk)\n");
2126
                        fprintf(vmain, "\t\tif (i_rst)\n");
2127
                        fprintf(vmain, "\t\t\tbr_start <= 1'b0;\n");
2128
                        fprintf(vmain, "\t\telse if (i_ce)\n");
2129
                        fprintf(vmain, "\t\t\tbr_start <= 1'b1;\n");
2130
                }
2131
                fprintf(vmain, "\n\n");
2132 6 dgisselq
                fprintf(vmain, "\tdblstage\t#(IWIDTH)\tstage_2(i_clk, i_rst, i_ce,\n");
2133
                fprintf(vmain, "\t\t\t(~i_rst), i_left, i_right, br_left, br_right);\n");
2134 2 dgisselq
                fprintf(vmain, "\n\n");
2135
        } else {
2136
                int     nbits = nbitsin, dropbit=0;
2137
                // Always do a first stage
2138
                fprintf(vmain, "\n\n");
2139
                fprintf(vmain, "\twire\t\tw_s%d, w_os%d;\n", fftsize, fftsize);
2140 19 dgisselq
                fprintf(vmain, "\twire\t[%d:0]\tw_e%d, w_o%d;\n", 2*(nbits+1+xtrapbits)-1, fftsize, fftsize);
2141
                fprintf(vmain, "\t%sfftstage_e%d\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,%d,0)\tstage_e%d(i_clk, i_rst, i_ce,\n",
2142 2 dgisselq
                        (inverse)?"i":"", fftsize,
2143 19 dgisselq
                        xtracbits, nbits+1+xtrapbits,
2144 2 dgisselq
                        lgsize, lgtmp-2, lgdelay(nbits,xtracbits),
2145
                        fftsize);
2146
                fprintf(vmain, "\t\t\t(~i_rst), i_left, w_e%d, w_s%d);\n", fftsize, fftsize);
2147 19 dgisselq
                fprintf(vmain, "\t%sfftstage_o%d\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,%d,0)\tstage_o%d(i_clk, i_rst, i_ce,\n",
2148 2 dgisselq
                        (inverse)?"i":"", fftsize,
2149 19 dgisselq
                        xtracbits, nbits+1+xtrapbits,
2150 2 dgisselq
                        lgsize, lgtmp-2, lgdelay(nbits,xtracbits),
2151
                        fftsize);
2152 9 dgisselq
                fprintf(vmain, "\t\t\t(~i_rst), i_right, w_o%d, w_os%d);\n", fftsize, fftsize);
2153 2 dgisselq
                fprintf(vmain, "\n\n");
2154
 
2155 14 dgisselq
                {
2156
                        std::string     fname;
2157
                        char    numstr[12];
2158 22 dgisselq
                        bool    mpystage;
2159 2 dgisselq
 
2160 22 dgisselq
                        // Last two stages are always non-multiply stages
2161
                        // since the multiplies can be done by adds
2162
                        mpystage = ((lgtmp-2) <= nummpy);
2163
 
2164 14 dgisselq
                        fname = coredir + "/";
2165
                        if (inverse) fname += "i";
2166
                        fname += "fftstage_e";
2167
                        sprintf(numstr, "%d", fftsize);
2168
                        fname += numstr;
2169
                        fname += ".v";
2170 22 dgisselq
                        build_stage(fname.c_str(), fftsize/2, 0, nbits, inverse, xtracbits, mpystage);   // Even stage
2171 14 dgisselq
 
2172
                        fname = coredir + "/";
2173
                        if (inverse) fname += "i";
2174
                        fname += "fftstage_o";
2175
                        sprintf(numstr, "%d", fftsize);
2176
                        fname += numstr;
2177
                        fname += ".v";
2178 22 dgisselq
                        build_stage(fname.c_str(), fftsize/2, 1, nbits, inverse, xtracbits, mpystage);  // Odd  stage
2179 14 dgisselq
                }
2180
 
2181 2 dgisselq
                nbits += 1;     // New number of input bits
2182
                tmp_size >>= 1; lgtmp--;
2183
                dropbit = 0;
2184
                fprintf(vmain, "\n\n");
2185
                while(tmp_size >= 8) {
2186
                        int     obits = nbits+((dropbit)?0:1);
2187
 
2188
                        if ((maxbitsout > 0)&&(obits > maxbitsout))
2189
                                obits = maxbitsout;
2190
 
2191
                        fprintf(vmain, "\twire\t\tw_s%d, w_os%d;\n", tmp_size, tmp_size);
2192 19 dgisselq
                        fprintf(vmain, "\twire\t[%d:0]\tw_e%d, w_o%d;\n", 2*(obits+xtrapbits)-1, tmp_size, tmp_size);
2193 2 dgisselq
                        fprintf(vmain, "\t%sfftstage_e%d\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_e%d(i_clk, i_rst, i_ce,\n",
2194
                                (inverse)?"i":"", tmp_size,
2195 19 dgisselq
                                nbits+xtrapbits, nbits+xtracbits+xtrapbits, obits+xtrapbits,
2196
                                lgsize, lgtmp-2, lgdelay(nbits+xtrapbits,xtracbits), (dropbit)?0:0,
2197 2 dgisselq
                                tmp_size);
2198
                        fprintf(vmain, "\t\t\t\t\t\tw_s%d, w_e%d, w_e%d, w_s%d);\n", tmp_size<<1, tmp_size<<1, tmp_size, tmp_size);
2199
                        fprintf(vmain, "\t%sfftstage_o%d\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_o%d(i_clk, i_rst, i_ce,\n",
2200
                                (inverse)?"i":"", tmp_size,
2201 19 dgisselq
                                nbits+xtrapbits, nbits+xtracbits+xtrapbits, obits+xtrapbits,
2202
                                lgsize, lgtmp-2, lgdelay(nbits+xtrapbits,xtracbits), (dropbit)?0:0,
2203 2 dgisselq
                                tmp_size);
2204
                        fprintf(vmain, "\t\t\t\t\t\tw_s%d, w_o%d, w_o%d, w_os%d);\n", tmp_size<<1, tmp_size<<1, tmp_size, tmp_size);
2205
                        fprintf(vmain, "\n\n");
2206
 
2207 14 dgisselq
                        {
2208
                                std::string     fname;
2209
                                char            numstr[12];
2210 22 dgisselq
                                bool            mpystage;
2211 2 dgisselq
 
2212 22 dgisselq
                                mpystage = ((lgtmp-2) <= nummpy);
2213
 
2214 14 dgisselq
                                fname = coredir + "/";
2215
                                if (inverse) fname += "i";
2216
                                fname += "fftstage_e";
2217
                                sprintf(numstr, "%d", tmp_size);
2218
                                fname += numstr;
2219
                                fname += ".v";
2220 22 dgisselq
                                build_stage(fname.c_str(), tmp_size/2, 0,
2221
                                        nbits+xtrapbits, inverse, xtracbits,
2222
                                        mpystage);      // Even stage
2223 2 dgisselq
 
2224 14 dgisselq
                                fname = coredir + "/";
2225
                                if (inverse) fname += "i";
2226
                                fname += "fftstage_o";
2227
                                sprintf(numstr, "%d", tmp_size);
2228
                                fname += numstr;
2229
                                fname += ".v";
2230 22 dgisselq
                                build_stage(fname.c_str(), tmp_size/2, 1,
2231
                                        nbits+xtrapbits, inverse, xtracbits,
2232
                                        mpystage);      // Odd  stage
2233 14 dgisselq
                        }
2234
 
2235
 
2236 2 dgisselq
                        dropbit ^= 1;
2237
                        nbits = obits;
2238
                        tmp_size >>= 1; lgtmp--;
2239
                }
2240
 
2241
                if (tmp_size == 4) {
2242
                        int     obits = nbits+((dropbit)?0:1);
2243
 
2244
                        if ((maxbitsout > 0)&&(obits > maxbitsout))
2245
                                obits = maxbitsout;
2246
 
2247
                        fprintf(vmain, "\twire\t\tw_s4, w_os4;\n");
2248 19 dgisselq
                        fprintf(vmain, "\twire\t[%d:0]\tw_e4, w_o4;\n", 2*(obits+xtrapbits)-1);
2249 2 dgisselq
                        fprintf(vmain, "\tqtrstage\t#(%d,%d,%d,0,%d,%d)\tstage_e4(i_clk, i_rst, i_ce,\n",
2250 19 dgisselq
                                nbits+xtrapbits, obits+xtrapbits, lgsize, (inverse)?1:0, (dropbit)?0:0);
2251 6 dgisselq
                        fprintf(vmain, "\t\t\t\t\t\tw_s8, w_e8, w_e4, w_s4);\n");
2252 2 dgisselq
                        fprintf(vmain, "\tqtrstage\t#(%d,%d,%d,1,%d,%d)\tstage_o4(i_clk, i_rst, i_ce,\n",
2253 19 dgisselq
                                nbits+xtrapbits, obits+xtrapbits, lgsize, (inverse)?1:0, (dropbit)?0:0);
2254 6 dgisselq
                        fprintf(vmain, "\t\t\t\t\t\tw_s8, w_o8, w_o4, w_os4);\n");
2255 2 dgisselq
                        dropbit ^= 1;
2256
                        nbits = obits;
2257
                        tmp_size >>= 1; lgtmp--;
2258
                }
2259
 
2260
                {
2261
                        int obits = nbits+((dropbit)?0:1);
2262
                        if (obits > nbitsout)
2263
                                obits = nbitsout;
2264
                        if ((maxbitsout>0)&&(obits > maxbitsout))
2265
                                obits = maxbitsout;
2266
                        fprintf(vmain, "\twire\t\tw_s2;\n");
2267
                        fprintf(vmain, "\twire\t[%d:0]\tw_e2, w_o2;\n", 2*obits-1);
2268 19 dgisselq
                        fprintf(vmain, "\tdblstage\t#(%d,%d,%d)\tstage_2(i_clk, i_rst, i_ce,\n", nbits+xtrapbits, obits,(dropbit)?0:1);
2269 6 dgisselq
                        fprintf(vmain, "\t\t\t\t\tw_s4, w_e4, w_o4, w_e2, w_o2, w_s2);\n");
2270 2 dgisselq
 
2271
                        fprintf(vmain, "\n\n");
2272
                        nbits = obits;
2273
                }
2274
 
2275
                fprintf(vmain, "\t// Prepare for a (potential) bit-reverse stage.\n");
2276
                fprintf(vmain, "\tassign\tbr_left  = w_e2;\n");
2277
                fprintf(vmain, "\tassign\tbr_right = w_o2;\n");
2278
                fprintf(vmain, "\n");
2279
                if (bitreverse) {
2280
                        fprintf(vmain, "\twire\tbr_start;\n");
2281
                        fprintf(vmain, "\treg\tr_br_started;\n");
2282 25 dgisselq
                        fprintf(vmain, "\tinitial\tr_br_started = 1\'b0;\n");
2283 2 dgisselq
                        fprintf(vmain, "\talways @(posedge i_clk)\n");
2284
                        fprintf(vmain, "\t\tif (i_rst)\n");
2285
                        fprintf(vmain, "\t\t\tr_br_started <= 1'b0;\n");
2286
                        fprintf(vmain, "\t\telse\n");
2287 23 dgisselq
                        fprintf(vmain, "\t\t\tr_br_started <= r_br_started || w_s2;\n");
2288
                        fprintf(vmain, "\tassign\tbr_start = r_br_started || w_s2;\n");
2289 2 dgisselq
                }
2290
        }
2291
 
2292
        fprintf(vmain, "\n");
2293
        fprintf(vmain, "\t// Now for the bit-reversal stage.\n");
2294
        fprintf(vmain, "\twire\tbr_sync;\n");
2295
        fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_o_left, br_o_right;\n");
2296
        if (bitreverse) {
2297
                fprintf(vmain, "\tdblreverse\t#(%d,%d)\trevstage(i_clk, i_rst,\n", lgsize, nbitsout);
2298
                fprintf(vmain, "\t\t\t(i_ce & br_start), br_left, br_right,\n");
2299
                fprintf(vmain, "\t\t\tbr_o_left, br_o_right, br_sync);\n");
2300
        } else {
2301
                fprintf(vmain, "\tassign\tbr_o_left  = br_left;\n");
2302
                fprintf(vmain, "\tassign\tbr_o_right = br_right;\n");
2303
                fprintf(vmain, "\tassign\tbr_sync    = w_s2;\n");
2304
        }
2305
 
2306
        fprintf(vmain, "\n\n");
2307
        fprintf(vmain, "\t// Last clock: Register our outputs, we\'re done.\n");
2308
        fprintf(vmain, "\talways @(posedge i_clk)\n");
2309
        fprintf(vmain, "\t\tbegin\n");
2310
        fprintf(vmain, "\t\t\to_left  <= br_o_left;\n");
2311
        fprintf(vmain, "\t\t\to_right <= br_o_right;\n");
2312
        fprintf(vmain, "\t\t\to_sync  <= br_sync;\n");
2313
        fprintf(vmain, "\t\tend\n");
2314
        fprintf(vmain, "\n\n");
2315
        fprintf(vmain, "endmodule\n");
2316
        fclose(vmain);
2317
 
2318 14 dgisselq
        {
2319
                std::string     fname;
2320 2 dgisselq
 
2321 14 dgisselq
                fname = coredir + "/butterfly.v";
2322 23 dgisselq
                build_butterfly(fname.c_str(), xtracbits, rounding);
2323 2 dgisselq
 
2324 22 dgisselq
                if (nummpy > 0) {
2325
                        fname = coredir + "/hwbfly.v";
2326 23 dgisselq
                        build_hwbfly(fname.c_str(), xtracbits, rounding);
2327 22 dgisselq
                }
2328
 
2329 14 dgisselq
                fname = coredir + "/shiftaddmpy.v";
2330
                build_multiply(fname.c_str());
2331 2 dgisselq
 
2332 14 dgisselq
                fname = coredir + "/qtrstage.v";
2333 23 dgisselq
                build_quarters(fname.c_str(), rounding);
2334 2 dgisselq
 
2335 14 dgisselq
                fname = coredir + "/dblstage.v";
2336 23 dgisselq
                build_dblstage(fname.c_str(), rounding);
2337 14 dgisselq
 
2338
                if (bitreverse) {
2339
                        fname = coredir + "/dblreverse.v";
2340
                        build_dblreverse(fname.c_str());
2341
                }
2342 23 dgisselq
 
2343
                const   char    *rnd_string = "";
2344
                switch(rounding) {
2345
                        case RND_TRUNCATE:      rnd_string = "/truncate.v"; break;
2346
                        case RND_FROMZERO:      rnd_string = "/roundfromzero.v"; break;
2347
                        case RND_HALFUP:        rnd_string = "/roundhalfup.v"; break;
2348
                        default:
2349
                                rnd_string = "/convround.v"; break;
2350
                } fname = coredir + rnd_string;
2351
                switch(rounding) {
2352
                        case RND_TRUNCATE: build_truncator(fname.c_str()); break;
2353
                        case RND_FROMZERO: build_roundfromzero(fname.c_str()); break;
2354
                        case RND_HALFUP: build_roundhalfup(fname.c_str()); break;
2355
                        default:
2356
                                build_convround(fname.c_str()); break;
2357
                }
2358
 
2359 2 dgisselq
        }
2360
}
2361
 
2362 16 dgisselq
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.