OpenCores
URL https://opencores.org/ocsvn/dblclockfft/dblclockfft/trunk

Subversion Repositories dblclockfft

[/] [dblclockfft/] [trunk/] [sw/] [fftgen.cpp] - Blame information for rev 22

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 16 dgisselq
/////////////////////////////////////////////////////////////////////////////
2
//
3
// Filename:    fftgen.v
4
//
5
// Project:     A Doubletime Pipelined FFT
6
//
7
// Purpose:     This is the core generator for the project.  Every part
8
//              and piece of this project begins and ends in this program.
9
//              Once built, this program will build an FFT (or IFFT) core
10
//              of arbitrary width, precision, etc., that will run at
11
//              two samples per clock.  (Incidentally, I didn't pick two
12
//              samples per clock because it was easier, but rather because
13
//              there weren't any two-sample per clock FFT's posted on 
14
//              opencores.com.  Further, FFT's running at one sample per
15
//              clock aren't that hard to find.)
16
//
17
//              You can find the documentation for this program in two places.
18
//              One is in the usage() function below.  The second is in the
19
//              'doc'uments directory that comes with this package, 
20
//              specifically in the spec.pdf file.  If it's not there, type
21
//              make in the documents directory to build it.
22
//
23
// Creator:     Dan Gisselquist, Ph.D.
24
//              Gisselquist Tecnology, LLC
25
//
26
///////////////////////////////////////////////////////////////////////////
27
//
28
// Copyright (C) 2015, Gisselquist Technology, LLC
29
//
30
// This program is free software (firmware): you can redistribute it and/or
31
// modify it under the terms of  the GNU General Public License as published
32
// by the Free Software Foundation, either version 3 of the License, or (at
33
// your option) any later version.
34
//
35
// This program is distributed in the hope that it will be useful, but WITHOUT
36
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
37
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
38
// for more details.
39
//
40
// You should have received a copy of the GNU General Public License along
41
// with this program.  (It's in the $(ROOT)/doc directory, run make with no
42
// target there if the PDF file isn't present.)  If not, see
43
// <http://www.gnu.org/licenses/> for a copy.
44
//
45
// License:     GPL, v3, as defined and found on www.gnu.org,
46
//              http://www.gnu.org/licenses/gpl.html
47
//
48
//
49
///////////////////////////////////////////////////////////////////////////
50
//
51
//
52 2 dgisselq
#include <stdio.h>
53
#include <stdlib.h>
54
#include <unistd.h>
55
#include <sys/stat.h>
56
#include <string.h>
57 14 dgisselq
#include <string>
58 2 dgisselq
#include <math.h>
59
#include <ctype.h>
60
#include <assert.h>
61
 
62
#define COREDIR "fft-core"
63
 
64
const char      cpyleft[] =
65
"///////////////////////////////////////////////////////////////////////////\n"
66
"//\n"
67
"// Copyright (C) 2015, Gisselquist Technology, LLC\n"
68
"//\n"
69
"// This program is free software (firmware): you can redistribute it and/or\n"
70
"// modify it under the terms of  the GNU General Public License as published\n"
71
"// by the Free Software Foundation, either version 3 of the License, or (at\n"
72
"// your option) any later version.\n"
73
"//\n"
74
"// This program is distributed in the hope that it will be useful, but WITHOUT\n"
75
"// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or\n"
76
"// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n"
77
"// for more details.\n"
78
"//\n"
79
"// You should have received a copy of the GNU General Public License along\n"
80 5 dgisselq
"// with this program.  (It's in the $(ROOT)/doc directory, run make with no\n"
81
"// target there if the PDF file isn\'t present.)  If not, see\n"
82
"// <http://www.gnu.org/licenses/> for a copy.\n"
83
"//\n"
84 2 dgisselq
"// License:    GPL, v3, as defined and found on www.gnu.org,\n"
85
"//             http://www.gnu.org/licenses/gpl.html\n"
86
"//\n"
87
"//\n"
88
"///////////////////////////////////////////////////////////////////////////\n";
89 14 dgisselq
const char      prjname[] = "A Doubletime Pipelined FFT";
90 2 dgisselq
const char      creator[] =     "// Creator:    Dan Gisselquist, Ph.D.\n"
91
                                "//             Gisselquist Tecnology, LLC\n";
92
 
93
int     lgval(int vl) {
94
        int     lg;
95
 
96
        for(lg=1; (1<<lg) < vl; lg++)
97
                ;
98
        return lg;
99
}
100
 
101
int     nextlg(int vl) {
102
        int     r;
103
 
104
        for(r=1; r<vl; r<<=1)
105
                ;
106
        return r;
107
}
108
 
109 14 dgisselq
int     bflydelay(int nbits, int xtra) {
110 2 dgisselq
        int     cbits = nbits + xtra;
111 14 dgisselq
        int     delay;
112 2 dgisselq
        if (nbits+1<cbits)
113 5 dgisselq
                delay = nbits+4;
114 2 dgisselq
        else
115 5 dgisselq
                delay = cbits+3;
116 14 dgisselq
        return delay;
117 2 dgisselq
}
118
 
119 14 dgisselq
int     lgdelay(int nbits, int xtra) {
120
        // The butterfly code needs to compare a valid address, of this
121
        // many bits, with an address two greater.  This guarantees we
122
        // have enough bits for that comparison.  We'll also end up with
123
        // more storage space to look for these values, but without a 
124
        // redesign that's just what we'll deal with.
125
        return lgval(bflydelay(nbits, xtra)+3);
126
}
127
 
128 2 dgisselq
void    build_quarters(const char *fname) {
129
        FILE    *fp = fopen(fname, "w");
130
        if (NULL == fp) {
131
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
132
                perror("O/S Err was:");
133
                return;
134
        }
135
 
136
        fprintf(fp,
137
"///////////////////////////////////////////////////////////////////////////\n"
138
"//\n"
139
"// Filename:   qtrstage.v\n"
140
"//             \n"
141
"// Project:    %s\n"
142
"//\n"
143 5 dgisselq
"// Purpose:    This file encapsulates the 4 point stage of a decimation in\n"
144
"//             frequency FFT.  This particular implementation is optimized\n"
145
"//             so that all of the multiplies are accomplished by additions\n"
146
"//             and multiplexers only.\n"
147
"//\n"
148 2 dgisselq
"//\n%s"
149
"//\n",
150
                prjname, creator);
151
        fprintf(fp, "%s", cpyleft);
152
 
153
        fprintf(fp,
154
"module\tqtrstage(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync);\n"
155 5 dgisselq
        "\tparameter    IWIDTH=16, OWIDTH=IWIDTH+1;\n"
156
        "\t// Parameters specific to the core that should be changed when this\n"
157
        "\t// core is built ... Note that the minimum LGSPAN is 2.  Smaller \n"
158
        "\t// spans must use the fftdoubles stage.\n"
159 15 dgisselq
        "\tparameter\tLGWIDTH=8, ODD=0, INVERSE=0,SHIFT=0,ROUND=1;\n"
160 5 dgisselq
        "\tinput\t                              i_clk, i_rst, i_ce, i_sync;\n"
161
        "\tinput\t      [(2*IWIDTH-1):0]        i_data;\n"
162
        "\toutput\treg  [(2*OWIDTH-1):0]        o_data;\n"
163
        "\toutput\treg                          o_sync;\n"
164 14 dgisselq
        "\t\n");
165
        fprintf(fp,
166 5 dgisselq
        "\treg\t        wait_for_sync;\n"
167
        "\treg\t[2:0]   pipeline;\n"
168 2 dgisselq
"\n"
169 5 dgisselq
        "\treg\t[(IWIDTH):0]    sum_r, sum_i, diff_r, diff_i;\n"
170 14 dgisselq
        "\twire\t[(IWIDTH):0]   n_diff_r, n_diff_i;\n"
171
        "\tassign n_diff_r = -diff_r;\n"
172 5 dgisselq
        "\tassign n_diff_i = -diff_i;\n"
173 2 dgisselq
"\n"
174 5 dgisselq
        "\treg\t[(2*OWIDTH-1):0]        ob_a;\n"
175
        "\twire\t[(2*OWIDTH-1):0]       ob_b;\n"
176
        "\treg\t[(OWIDTH-1):0]          ob_b_r, ob_b_i;\n"
177
        "\tassign       ob_b = { ob_b_r, ob_b_i };\n"
178 2 dgisselq
"\n"
179 5 dgisselq
        "\treg\t[(LGWIDTH-1):0]         iaddr;\n"
180
        "\treg\t[(2*IWIDTH-1):0]        imem;\n"
181 2 dgisselq
"\n"
182 5 dgisselq
        "\twire\tsigned\t[(IWIDTH-1):0]\timem_r, imem_i;\n"
183
        "\tassign\timem_r = imem[(2*IWIDTH-1):(IWIDTH)];\n"
184
        "\tassign\timem_i = imem[(IWIDTH-1):0];\n"
185 2 dgisselq
"\n"
186 5 dgisselq
        "\twire\tsigned\t[(IWIDTH-1):0]\ti_data_r, i_data_i;\n"
187
        "\tassign\ti_data_r = i_data[(2*IWIDTH-1):(IWIDTH)];\n"
188
        "\tassign\ti_data_i = i_data[(IWIDTH-1):0];\n"
189 2 dgisselq
"\n"
190 5 dgisselq
        "\treg  [(2*OWIDTH-1):0]        omem;\n"
191 14 dgisselq
"\n");
192
        fprintf(fp,
193 5 dgisselq
        "\twire [(IWIDTH-1):0]  rnd;\n"
194 9 dgisselq
        "\tgenerate\n"
195
        "\tif ((ROUND)&&((IWIDTH+1-OWIDTH-SHIFT)>0))\n"
196
                "\t\tassign rnd = { {(IWIDTH-1){1'b0}}, 1'b1 };\n"
197
        "\telse\n"
198
                "\t\tassign rnd = { {(IWIDTH){1'b0}}};\n"
199
        "\tendgenerate\n"
200 2 dgisselq
"\n"
201 5 dgisselq
        "\talways @(posedge i_clk)\n"
202
                "\t\tif (i_rst)\n"
203
                "\t\tbegin\n"
204
                        "\t\t\twait_for_sync <= 1'b1;\n"
205
                        "\t\t\tiaddr <= 0;\n"
206
                        "\t\t\tpipeline <= 3'b000;\n"
207
                "\t\tend\n"
208
                "\t\telse if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"
209
                "\t\tbegin\n"
210
                        "\t\t\t// Always\n"
211
                        "\t\t\timem <= i_data;\n"
212
                        "\t\t\tiaddr <= iaddr + 1;\n"
213
                        "\t\t\twait_for_sync <= 1'b0;\n"
214 2 dgisselq
"\n"
215 5 dgisselq
                        "\t\t\t// In sequence, clock = 0\n"
216
                        "\t\t\tif (iaddr[0])\n"
217
                        "\t\t\tbegin\n"
218
                                "\t\t\t\tsum_r  <= imem_r + i_data_r + rnd;\n"
219
                                "\t\t\t\tsum_i  <= imem_i + i_data_i + rnd;\n"
220
                                "\t\t\t\tdiff_r <= imem_r - i_data_r + rnd;\n"
221
                                "\t\t\t\tdiff_i <= imem_i - i_data_i + rnd;\n"
222 2 dgisselq
"\n"
223 5 dgisselq
                        "\t\t\t\tpipeline[2:0] <= { pipeline[1:0], 1'b1 };\n"
224
                        "\t\t\tend else\n"
225
                        "\t\t\t\tpipeline[2:0] <= { pipeline[1:0], 1'b0 };\n"
226 2 dgisselq
"\n"
227 5 dgisselq
                        "\t\t\t// In sequence, clock = 1\n"
228
                        "\t\t\tif (pipeline[1])\n"
229
                        "\t\t\tbegin\n"
230 9 dgisselq
"\t\t\t\tob_a <= { sum_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)],\n"
231
        "\t\t\t\t\t\tsum_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)] };\n"
232 5 dgisselq
                                "\t\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"
233 9 dgisselq
                                "\t\t\t\tif (ODD == 0)\n"
234 5 dgisselq
                                "\t\t\t\tbegin\n"
235 2 dgisselq
"\t\t\t\t\tob_b_r <= diff_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
236
"\t\t\t\t\tob_b_i <= diff_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
237 9 dgisselq
// "\t\t\t\t\tob_b_r <=   { (OWIDTH) {1'b0} };\n"
238
// "\t\t\t\t\tob_b_i <=   { (OWIDTH) {1'b0} };\n"
239 14 dgisselq
                                "\t\t\t\tend else if (INVERSE==0) begin\n"
240 2 dgisselq
"\t\t\t\t\t// on Odd, W = e^{-j2pi 1/4} = -j\n"
241
"\t\t\t\t\tob_b_r <=   diff_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
242 14 dgisselq
"\t\t\t\t\tob_b_i <= n_diff_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
243 9 dgisselq
// "\t\t\t\t\tob_b_r <=   { (OWIDTH) {1'b0} };\n"
244
// "\t\t\t\t\tob_b_i <=   { (OWIDTH) {1'b0} };\n"
245 5 dgisselq
                                "\t\t\t\tend else begin\n"
246 2 dgisselq
"\t\t\t\t\t// on Odd, W = e^{j2pi 1/4} = j\n"
247
"\t\t\t\t\tob_b_r <= n_diff_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
248
"\t\t\t\t\tob_b_i <=   diff_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
249 9 dgisselq
// "\t\t\t\t\tob_b_r <=   { (OWIDTH) {1'b0} };\n"
250
// "\t\t\t\t\tob_b_i <=   { (OWIDTH) {1'b0} };\n"
251
 
252 5 dgisselq
                                "\t\t\t\tend\n"
253
                                "\t\t\t\t// (wire) ob_b <= { ob_b_r, ob_b_i };\n"
254
                        "\t\t\tend\n"
255
                        "\t\t\t// In sequence, clock = 2\n"
256
                        "\t\t\tif (pipeline[2])\n"
257
                        "\t\t\tbegin\n"
258
                                "\t\t\t\tomem <= ob_b;\n"
259
                                "\t\t\t\to_data <= ob_a;\n"
260
                        "\t\t\tend else\n"
261
                                "\t\t\t\to_data <= omem;\n"
262 6 dgisselq
                        "\t\t\t// Don\'t forget in the sync check that we are running\n"
263
                        "\t\t\t// at two clocks per sample.  Thus we need to\n"
264
                        "\t\t\t// produce a sync every 2^(LGWIDTH-1) clocks.\n"
265
                        "\t\t\to_sync <= &(~iaddr[(LGWIDTH-2):3]) && (iaddr[2:0] == 3'b100);\n"
266 5 dgisselq
                "\t\tend\n"
267 2 dgisselq
"endmodule\n");
268
}
269
 
270
void    build_dblstage(const char *fname) {
271
        FILE    *fp = fopen(fname, "w");
272
        if (NULL == fp) {
273
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
274
                perror("O/S Err was:");
275
                return;
276
        }
277
 
278
        fprintf(fp,
279
"///////////////////////////////////////////////////////////////////////////\n"
280
"//\n"
281
"// Filename:   dblstage.v\n"
282
"//\n"
283
"// Project:    %s\n"
284
"//\n"
285
"// Purpose:    This is part of an FPGA implementation that will process\n"
286 5 dgisselq
"//             the final stage of a decimate-in-frequency FFT, running\n"
287
"//             through the data at two samples per clock.  If you notice\n"
288
"//             from the derivation of an FFT, the only time both even and\n"
289
"//             odd samples are used at the same time is in this stage.\n"
290
"//             Therefore, other than this stage and these twiddles, all of\n"
291
"//             the other stages can run two stages at a time at one sample\n"
292
"//             per clock.\n"
293 2 dgisselq
"//\n"
294
"//             In this implementation, the output is valid one clock after\n"
295
"//             the input is valid.  The output also accumulates one bit\n"
296
"//             above and beyond the number of bits in the input.\n"
297
"//             \n"
298
"//             i_clk   A system clock\n"
299 6 dgisselq
"//             i_rst   A synchronous reset\n"
300 2 dgisselq
"//             i_ce    Circuit enable--nothing happens unless this line is high\n"
301 6 dgisselq
"//             i_sync  A synchronization signal, high once per FFT at the start\n"
302 2 dgisselq
"//             i_left  The first (even) complex sample input.  The higher order\n"
303
"//                     bits contain the real portion, low order bits the\n"
304
"//                     imaginary portion, all in two\'s complement.\n"
305
"//             i_right The next (odd) complex sample input, same format as\n"
306
"//                     i_left.\n"
307
"//             o_left  The first (even) complex output.\n"
308
"//             o_right The next (odd) complex output.\n"
309 6 dgisselq
"//             o_sync  Output synchronization signal.\n"
310 2 dgisselq
"//\n%s"
311
"//\n", prjname, creator);
312
 
313
        fprintf(fp, "%s", cpyleft);
314
        fprintf(fp,
315 9 dgisselq
"module\tdblstage(i_clk, i_rst, i_ce, i_sync, i_left, i_right, o_left, o_right, o_sync);\n"
316 19 dgisselq
        "\tparameter\tIWIDTH=16,OWIDTH=IWIDTH+1, SHIFT=0, ROUND=1;\n"
317 6 dgisselq
        "\tinput\t\ti_clk, i_rst, i_ce, i_sync;\n"
318 5 dgisselq
        "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"
319 6 dgisselq
        "\toutput\twire\t[(2*OWIDTH-1):0]\to_left, o_right;\n"
320
        "\toutput\treg\t\t\to_sync;\n"
321 19 dgisselq
        "\n");
322
        fprintf(fp,
323 5 dgisselq
        "\twire\tsigned\t[(IWIDTH-1):0]\ti_in_0r, i_in_0i, i_in_1r, i_in_1i;\n"
324
        "\tassign\ti_in_0r = i_left[(2*IWIDTH-1):(IWIDTH)]; \n"
325
        "\tassign\ti_in_0i = i_left[(IWIDTH-1):0]; \n"
326
        "\tassign\ti_in_1r = i_right[(2*IWIDTH-1):(IWIDTH)]; \n"
327
        "\tassign\ti_in_1i = i_right[(IWIDTH-1):0]; \n"
328
        "\twire\t[(OWIDTH-1):0]\t\to_out_0r, o_out_0i,\n"
329
                                "\t\t\t\t\to_out_1r, o_out_1i;\n"
330 2 dgisselq
"\n"
331 15 dgisselq
"\n"
332 19 dgisselq
        "\t// Handle a potential rounding situation, when IWIDTH>=OWIDTH.\n"
333 15 dgisselq
"\n"
334 19 dgisselq
        "\twire\tsigned\t[(IWIDTH):0]\trnd;\n"
335
"\n"
336
        "\tgenerate\n"
337
        "\tif ((ROUND==0)||(IWIDTH+1-OWIDTH-SHIFT==0))\n"
338
                "\t\tassign rnd = { {(IWIDTH+1){1'b0}} };\n"
339
        "\telse if (IWIDTH+1-OWIDTH-SHIFT==1)\n"
340
                "\t\tassign rnd = { {(IWIDTH){1'b0}}, 1'b1 };\n"
341
        "\telse if (IWIDTH+1-OWIDTH-SHIFT>1)\n"
342
                "\t\tassign rnd = { {(IWIDTH-(IWIDTH+1-OWIDTH-SHIFT-1)){1'b0}}, 1'b1, {(IWIDTH+1-OWIDTH-SHIFT-1){1'b0}} };\n"
343
        "\tendgenerate\n"
344
"\n"
345 5 dgisselq
        "\t// Don't forget that we accumulate a bit by adding two values\n"
346
        "\t// together. Therefore our intermediate value must have one more\n"
347
        "\t// bit than the two originals.\n"
348
        "\treg\t[IWIDTH:0]\tout_0r, out_0i, out_1r, out_1i;\n"
349 2 dgisselq
"\n"
350 6 dgisselq
        "\treg\twait_for_sync;\n"
351
"\n"
352 5 dgisselq
        "\talways @(posedge i_clk)\n"
353 6 dgisselq
                "\t\tif (i_rst)\n"
354
                        "\t\t\twait_for_sync <= 1'b1;\n"
355
                "\t\telse if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"
356 5 dgisselq
                "\t\tbegin\n"
357 6 dgisselq
                        "\t\t\twait_for_sync <= 1'b0;\n"
358
                        "\t\t\t//\n"
359 19 dgisselq
                        "\t\t\tout_0r <= i_in_0r + i_in_1r + rnd;\n"
360
                        "\t\t\tout_0i <= i_in_0i + i_in_1i + rnd;\n"
361 5 dgisselq
                        "\t\t\t//\n"
362 19 dgisselq
                        "\t\t\tout_1r <= i_in_0r - i_in_1r + rnd;\n"
363
                        "\t\t\tout_1i <= i_in_0i - i_in_1i + rnd;\n"
364 6 dgisselq
                        "\t\t\t//\n"
365
                        "\t\t\to_sync <= i_sync;\n"
366 5 dgisselq
                "\t\tend\n"
367 2 dgisselq
"\n"
368 5 dgisselq
        "\t// Now, if the master control program doesn't want to keep all of\n"
369
        "\t// our bits, we can shift down to OWIDTH bits here.\n"
370
        "\tassign\to_out_0r = out_0r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
371
        "\tassign\to_out_0i = out_0i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
372
        "\tassign\to_out_1r = out_1r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
373
        "\tassign\to_out_1i = out_1i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
374 2 dgisselq
"\n"
375 5 dgisselq
        "\tassign\to_left  = { o_out_0r, o_out_0i };\n"
376
        "\tassign\to_right = { o_out_1r, o_out_1i };\n"
377 2 dgisselq
"\n"
378
"endmodule\n");
379
        fclose(fp);
380
}
381
 
382
void    build_multiply(const char *fname) {
383
        FILE    *fp = fopen(fname, "w");
384
        if (NULL == fp) {
385
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
386
                perror("O/S Err was:");
387
                return;
388
        }
389
 
390
        fprintf(fp,
391
"///////////////////////////////////////////////////////////////////////////\n"
392
"//\n"
393
"// Filename:   shiftaddmpy.v\n"
394
"//\n"
395
"// Project:    %s\n"
396
"//\n"
397
"// Purpose:    A portable shift and add multiply.\n"
398
"//\n"
399
"//             While both Xilinx and Altera will offer single clock \n"
400
"//             multiplies, this simple approach will multiply two numbers\n"
401
"//             on any architecture.  The result maintains the full width\n"
402
"//             of the multiply, there are no extra stuff bits, no rounding,\n"
403
"//             no shifted bits, etc.\n"
404
"//\n"
405
"//             Further, for those applications that can support it, this\n"
406
"//             multiply is pipelined and will produce one answer per clock.\n"
407
"//\n"
408
"//             For minimal processing delay, make the first parameter\n"
409
"//             the one with the least bits, so that AWIDTH <= BWIDTH.\n"
410
"//\n"
411
"//             The processing delay in this multiply is (AWIDTH+1) cycles.\n"
412
"//             That is, if the data is present on the input at clock t=0,\n"
413
"//             the result will be present on the output at time t=AWIDTH+1;\n"
414
"//\n"
415
"//\n%s"
416
"//\n", prjname, creator);
417
 
418
        fprintf(fp, "%s", cpyleft);
419
        fprintf(fp,
420
"module shiftaddmpy(i_clk, i_ce, i_a, i_b, o_r);\n"
421
        "\tparameter\tAWIDTH=16,BWIDTH=AWIDTH;\n"
422
        "\tinput\t\t\t\t\ti_clk, i_ce;\n"
423
        "\tinput\t\t[(AWIDTH-1):0]\t\ti_a;\n"
424
        "\tinput\t\t[(BWIDTH-1):0]\t\ti_b;\n"
425
        "\toutput\treg\t[(AWIDTH+BWIDTH-1):0]\to_r;\n"
426
"\n"
427
        "\treg\t[(AWIDTH-1):0]\tu_a;\n"
428
        "\treg\t[(BWIDTH-1):0]\tu_b;\n"
429
        "\treg\t\t\tsgn;\n"
430
"\n"
431
        "\treg\t[(AWIDTH-2):0]\t\tr_a[0:(AWIDTH-1)];\n"
432
        "\treg\t[(AWIDTH+BWIDTH-2):0]\tr_b[0:(AWIDTH-1)];\n"
433
        "\treg\t\t\t\tr_s[0:(AWIDTH-1)];\n"
434
        "\treg\t[(AWIDTH+BWIDTH-1):0]\tacc[0:(AWIDTH-1)];\n"
435
        "\tgenvar k;\n"
436
"\n"
437 5 dgisselq
        "\t// If we were forced to stay within two\'s complement arithmetic,\n"
438
        "\t// taking the absolute value here would require an additional bit.\n"
439
        "\t// However, because our results are now unsigned, we can stay\n"
440
        "\t// within the number of bits given (for now).\n"
441 2 dgisselq
        "\talways @(posedge i_clk)\n"
442
                "\t\tif (i_ce)\n"
443
                "\t\tbegin\n"
444
                        "\t\t\tu_a <= (i_a[AWIDTH-1])?(-i_a):(i_a);\n"
445
                        "\t\t\tu_b <= (i_b[BWIDTH-1])?(-i_b):(i_b);\n"
446
                        "\t\t\tsgn <= i_a[AWIDTH-1] ^ i_b[BWIDTH-1];\n"
447
                "\t\tend\n"
448
"\n"
449
        "\talways @(posedge i_clk)\n"
450
                "\t\tif (i_ce)\n"
451
                "\t\tbegin\n"
452
                        "\t\t\tacc[0] <= (u_a[0]) ? { {(AWIDTH){1'b0}}, u_b }\n"
453
                        "\t\t\t\t\t: {(AWIDTH+BWIDTH){1'b0}};\n"
454
                        "\t\t\tr_a[0] <= { u_a[(AWIDTH-1):1] };\n"
455
                        "\t\t\tr_b[0] <= { {(AWIDTH-1){1'b0}}, u_b };\n"
456
                        "\t\t\tr_s[0] <= sgn; // The final sign, needs to be preserved\n"
457
                "\t\tend\n"
458
"\n"
459
        "\tgenerate\n"
460 21 dgisselq
        "\tfor(k=0; k<AWIDTH-1; k=k+1)\n"
461 2 dgisselq
        "\tbegin\n"
462 21 dgisselq
                "\t\talways @(posedge i_clk)\n"
463
                "\t\tif (i_ce)\n"
464 2 dgisselq
                "\t\tbegin\n"
465
                        "\t\t\tacc[k+1] <= acc[k] + ((r_a[k][0]) ? {r_b[k],1'b0}:0);\n"
466
                        "\t\t\tr_a[k+1] <= { 1'b0, r_a[k][(AWIDTH-2):1] };\n"
467
                        "\t\t\tr_b[k+1] <= { r_b[k][(AWIDTH+BWIDTH-3):0], 1'b0};\n"
468
                        "\t\t\tr_s[k+1] <= r_s[k];\n"
469
                "\t\tend\n"
470
        "\tend\n"
471
        "\tendgenerate\n"
472
"\n"
473
        "\talways @(posedge i_clk)\n"
474
                "\t\tif (i_ce)\n"
475
                        "\t\t\to_r <= (r_s[AWIDTH-1]) ? (-acc[AWIDTH-1]) : acc[AWIDTH-1];\n"
476
"\n"
477
"endmodule\n");
478
 
479
        fclose(fp);
480
}
481
 
482
void    build_dblreverse(const char *fname) {
483
        FILE    *fp = fopen(fname, "w");
484
        if (NULL == fp) {
485
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
486
                perror("O/S Err was:");
487
                return;
488
        }
489
 
490
        fprintf(fp,
491
"///////////////////////////////////////////////////////////////////////////\n"
492
"//\n"
493
"// Filename:   dblreverse.v\n"
494
"//\n"
495
"// Project:    %s\n"
496
"//\n"
497
"// Purpose:    This module bitreverses a pipelined FFT input.  Operation is\n"
498
"//             expected as follows:\n"
499
"//\n"
500
"//             i_clk   A running clock at whatever system speed is offered.\n"
501
"//             i_rst   A synchronous reset signal, that resets all internals\n"
502
"//             i_ce    If this is one, one input is consumed and an output\n"
503
"//                     is produced.\n"
504
"//             i_in_0, i_in_1\n"
505
"//                     Two inputs to be consumed, each of width WIDTH.\n"
506
"//             o_out_0, o_out_1\n"
507
"//                     Two of the bitreversed outputs, also of the same\n"
508
"//                     width, WIDTH.  Of course, there is a delay from the\n"
509
"//                     first input to the first output.  For this purpose,\n"
510
"//                     o_sync is present.\n"
511
"//             o_sync  This will be a 1'b1 for the first value in any block.\n"
512
"//                     Following a reset, this will only become 1'b1 once\n"
513
"//                     the data has been loaded and is now valid.  After that,\n"
514
"//                     all outputs will be valid.\n"
515
"//\n%s"
516
"//\n", prjname, creator);
517
        fprintf(fp, "%s", cpyleft);
518
        fprintf(fp,
519
"\n\n"
520
"//\n"
521
"// How do we do bit reversing at two smples per clock?  Can we separate out\n"
522
"// our work into eight memory banks, writing two banks at once and reading\n"
523
"// another two banks in the same clock?\n"
524
"//\n"
525
"//     mem[00xxx0] = s_0[n]\n"
526
"//     mem[00xxx1] = s_1[n]\n"
527
"//     o_0[n] = mem[10xxx0]\n"
528
"//     o_1[n] = mem[11xxx0]\n"
529
"//     ...\n"
530
"//     mem[01xxx0] = s_0[m]\n"
531
"//     mem[01xxx1] = s_1[m]\n"
532
"//     o_0[m] = mem[10xxx1]\n"
533
"//     o_1[m] = mem[11xxx1]\n"
534
"//     ...\n"
535
"//     mem[10xxx0] = s_0[n]\n"
536
"//     mem[10xxx1] = s_1[n]\n"
537
"//     o_0[n] = mem[00xxx0]\n"
538
"//     o_1[n] = mem[01xxx0]\n"
539
"//     ...\n"
540
"//     mem[11xxx0] = s_0[m]\n"
541
"//     mem[11xxx1] = s_1[m]\n"
542
"//     o_0[m] = mem[00xxx1]\n"
543
"//     o_1[m] = mem[01xxx1]\n"
544
"//     ...\n"
545
"//\n"
546 5 dgisselq
"//     The answer is that, yes we can but: we need to use four memory banks\n"
547
"//     to do it properly.  These four banks are defined by the two bits\n"
548
"//     that determine the top and bottom of the correct address.  Larger\n"
549
"//     FFT\'s would require more memories.\n"
550
"//\n"
551 2 dgisselq
"//\n");
552
        fprintf(fp,
553
"module dblreverse(i_clk, i_rst, i_ce, i_in_0, i_in_1,\n"
554 5 dgisselq
        "\t\to_out_0, o_out_1, o_sync);\n"
555
        "\tparameter\t\t\tLGSIZE=4, WIDTH=24;\n"
556
        "\tinput\t\t\t\ti_clk, i_rst, i_ce;\n"
557
        "\tinput\t\t[(2*WIDTH-1):0]\ti_in_0, i_in_1;\n"
558
        "\toutput\treg\t[(2*WIDTH-1):0]\to_out_0, o_out_1;\n"
559
        "\toutput\treg\t\t\to_sync;\n"
560 2 dgisselq
"\n"
561 5 dgisselq
        "\treg\tin_reset;\n"
562
        "\treg\t[(LGSIZE):0]\tiaddr;\n"
563
        "\treg\t[(2*WIDTH-1):0]\tmem_0e [0:((1<<(LGSIZE-1))-1)];\n"
564
        "\treg\t[(2*WIDTH-1):0]\tmem_0o [0:((1<<(LGSIZE-1))-1)];\n"
565
        "\treg\t[(2*WIDTH-1):0]\tmem_1e [0:((1<<(LGSIZE-1))-1)];\n"
566
        "\treg\t[(2*WIDTH-1):0]\tmem_1o [0:((1<<(LGSIZE-1))-1)];\n"
567 2 dgisselq
"\n"
568 5 dgisselq
        "\twire\t[(2*LGSIZE-1):0]       braddr;\n"
569
        "\tgenvar\tk;\n"
570 21 dgisselq
        "\tgenerate for(k=0; k<LGSIZE; k=k+1)\n"
571 5 dgisselq
                "\t\tassign braddr[k] = iaddr[LGSIZE-1-k];\n"
572
        "\tendgenerate\n"
573 2 dgisselq
"\n"
574 5 dgisselq
        "\talways @(posedge i_clk)\n"
575
                "\t\tif (i_rst)\n"
576
                "\t\tbegin\n"
577
                        "\t\t\tiaddr <= 0;\n"
578
                        "\t\t\tin_reset <= 1'b1;\n"
579
                "\t\tend else if (i_ce)\n"
580
                "\t\tbegin\n"
581
                        "\t\t\tif (iaddr[(LGSIZE-1)])\n"
582
                        "\t\t\tbegin\n"
583
                                "\t\t\t\tmem_1e[{iaddr[LGSIZE],iaddr[(LGSIZE-2):1]}] <= i_in_0;\n"
584
                                "\t\t\t\tmem_1o[{iaddr[LGSIZE],iaddr[(LGSIZE-2):1]}] <= i_in_1;\n"
585
                        "\t\t\tend else begin\n"
586
                                "\t\t\t\tmem_0e[{iaddr[LGSIZE],iaddr[(LGSIZE-2):1]}] <= i_in_0;\n"
587
                                "\t\t\t\tmem_0o[{iaddr[LGSIZE],iaddr[(LGSIZE-2):1]}] <= i_in_1;\n"
588
                        "\t\t\tend\n"
589
                        "\t\t\tiaddr <= iaddr + 2;\n"
590
                        "\t\t\tif (&iaddr[(LGSIZE-1):1])\n"
591
                                "\t\t\t\tin_reset <= 1'b0;\n"
592
                        "\t\t\tif (in_reset)\n"
593
                        "\t\t\tbegin\n"
594
                                "\t\t\t\to_out_0 <= {(2*WIDTH){1'b0}};\n"
595
                                "\t\t\t\to_out_1 <= {(2*WIDTH){1'b0}};\n"
596
                                "\t\t\t\to_sync <= 1'b0;\n"
597
                        "\t\t\tend else\n"
598
                        "\t\t\tbegin\n"
599
                                "\t\t\t\tif (braddr[0])\n"
600
                                "\t\t\t\tbegin\n"
601 2 dgisselq
"\t\t\t\t\to_out_0 <= mem_0o[{~iaddr[LGSIZE],braddr[(LGSIZE-2):1]}];\n"
602
"\t\t\t\t\to_out_1 <= mem_1o[{~iaddr[LGSIZE],braddr[(LGSIZE-2):1]}];\n"
603 5 dgisselq
                                "\t\t\t\tend else begin\n"
604 2 dgisselq
"\t\t\t\t\to_out_0 <= mem_0e[{~iaddr[LGSIZE],braddr[(LGSIZE-2):1]}];\n"
605
"\t\t\t\t\to_out_1 <= mem_1e[{~iaddr[LGSIZE],braddr[(LGSIZE-2):1]}];\n"
606 5 dgisselq
                                "\t\t\t\tend\n"
607
                                "\t\t\t\to_sync <= ~(|iaddr[(LGSIZE-1):0]);\n"
608
                        "\t\t\tend\n"
609
                "\t\tend\n"
610 2 dgisselq
"\n"
611 21 dgisselq
"endmodule\n");
612 2 dgisselq
 
613
        fclose(fp);
614
}
615
 
616 14 dgisselq
void    build_butterfly(const char *fname, int xtracbits) {
617 2 dgisselq
        FILE    *fp = fopen(fname, "w");
618
        if (NULL == fp) {
619
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
620
                perror("O/S Err was:");
621
                return;
622
        }
623
 
624
        fprintf(fp,
625
"///////////////////////////////////////////////////////////////////////////\n"
626
"//\n"
627
"// Filename:   butterfly.v\n"
628
"//\n"
629
"// Project:    %s\n"
630
"//\n"
631
"// Purpose:    This routine caculates a butterfly for a decimation\n"
632
"//             in frequency version of an FFT.  Specifically, given\n"
633
"//             complex Left and Right values together with a \n"
634
"//             coefficient, the output of this routine is given\n"
635
"//             by:\n"
636
"//\n"
637
"//             L' = L + R\n"
638
"//             R' = (L - R)*C\n"
639
"//\n"
640
"//             The rest of the junk below handles timing (mostly),\n"
641
"//             to make certain that L' and R' reach the output at\n"
642
"//             the same clock.  Further, just to make certain\n"
643
"//             that is the case, an 'aux' input exists.  This\n"
644
"//             aux value will come out of this routine synchronized\n"
645
"//             to the values it came in with.  (i.e., both L', R',\n"
646
"//             and aux all have the same delay.)  Hence, a caller\n"
647
"//             of this routine may set aux on the first input with\n"
648
"//             valid data, and then wait to see aux set on the output\n"
649
"//             to know when to find the first output with valid data.\n"
650
"//\n"
651
"//             All bits are preserved until the very last clock,\n"
652
"//             where any more bits than OWIDTH will be quietly\n"
653
"//             discarded.\n"
654
"//\n"
655
"//             This design features no overflow checking.\n"
656
"// \n"
657
"// Notes:\n"
658
"//             CORDIC:\n"
659
"//             Much as we would like, we can't use a cordic here.\n"
660
"//             The goal is to accomplish an FFT, as defined, and a\n"
661
"//             CORDIC places a scale factor onto the data.  Removing\n"
662
"//             the scale factor would cost a two multiplies, which\n"
663
"//             is precisely what we are trying to avoid.\n"
664
"//\n"
665
"//\n"
666
"//             3-MULTIPLIES:\n"
667
"//             It should also be possible to do this with three \n"
668
"//             multiplies and an extra two addition cycles.  \n"
669
"//\n"
670
"//             We want\n"
671
"//                     R+I = (a + jb) * (c + jd)\n"
672
"//                     R+I = (ac-bd) + j(ad+bc)\n"
673
"//             We multiply\n"
674
"//                     P1 = ac\n"
675
"//                     P2 = bd\n"
676
"//                     P3 = (a+b)(c+d)\n"
677
"//             Then \n"
678
"//                     R+I=(P1-P2)+j(P3-P2-P1)\n"
679
"//\n"
680
"//             WIDTHS:\n"
681
"//             On multiplying an X width number by an\n"
682
"//             Y width number, X>Y, the result should be (X+Y)\n"
683
"//             bits, right?\n"
684
"//             -2^(X-1) <= a <= 2^(X-1) - 1\n"
685
"//             -2^(Y-1) <= b <= 2^(Y-1) - 1\n"
686
"//             (2^(Y-1)-1)*(-2^(X-1)) <= ab <= 2^(X-1)2^(Y-1)\n"
687
"//             -2^(X+Y-2)+2^(X-1) <= ab <= 2^(X+Y-2) <= 2^(X+Y-1) - 1\n"
688
"//             -2^(X+Y-1) <= ab <= 2^(X+Y-1)-1\n"
689
"//             YUP!  But just barely.  Do this and you'll really want\n"
690
"//             to drop a bit, although you will risk overflow in so\n"
691
"//             doing.\n"
692
"//\n%s"
693
"//\n", prjname, creator);
694
        fprintf(fp, "%s", cpyleft);
695
 
696
        fprintf(fp,
697 6 dgisselq
"module\tbutterfly(i_clk, i_rst, i_ce, i_coef, i_left, i_right, i_aux,\n"
698 5 dgisselq
                "\t\to_left, o_right, o_aux);\n"
699
        "\t// Public changeable parameters ...\n"
700 14 dgisselq
        "\tparameter IWIDTH=%d,CWIDTH=IWIDTH+%d,OWIDTH=IWIDTH+1;\n"
701 5 dgisselq
        "\t// Parameters specific to the core that should not be changed.\n"
702 14 dgisselq
        "\tparameter    MPYDELAY=%d'd%d, // (IWIDTH+1 < CWIDTH)?(IWIDTH+4):(CWIDTH+3),\n"
703 15 dgisselq
                        "\t\t\tSHIFT=0, ROUND=1;\n"
704 5 dgisselq
        "\t// The LGDELAY should be the base two log of the MPYDELAY.  If\n"
705
        "\t// this value is fractional, then round up to the nearest\n"
706
        "\t// integer: LGDELAY=ceil(log(MPYDELAY)/log(2));\n"
707 14 dgisselq
        "\tparameter\tLGDELAY=%d;\n"
708 6 dgisselq
        "\tinput\t\ti_clk, i_rst, i_ce;\n"
709 5 dgisselq
        "\tinput\t\t[(2*CWIDTH-1):0] i_coef;\n"
710
        "\tinput\t\t[(2*IWIDTH-1):0] i_left, i_right;\n"
711
        "\tinput\t\ti_aux;\n"
712
        "\toutput\twire [(2*OWIDTH-1):0] o_left, o_right;\n"
713 21 dgisselq
        "\toutput\treg  o_aux;\n"
714 14 dgisselq
        "\n", 16, xtracbits, lgdelay(16,xtracbits),
715
        bflydelay(16, xtracbits), lgdelay(16,xtracbits));
716
        fprintf(fp,
717 5 dgisselq
        "\twire\t[(OWIDTH-1):0] o_left_r, o_left_i, o_right_r, o_right_i;\n"
718 2 dgisselq
"\n"
719 5 dgisselq
        "\treg\t[(2*IWIDTH-1):0]\tr_left, r_right;\n"
720
        "\treg\t\t\t\tr_aux, r_aux_2;\n"
721
        "\treg\t[(2*CWIDTH-1):0]\tr_coef, r_coef_2;\n"
722
        "\twire\tsigned\t[(CWIDTH-1):0]\tr_coef_r, r_coef_i;\n"
723
        "\tassign\tr_coef_r  = r_coef_2[ (2*CWIDTH-1):(CWIDTH)];\n"
724
        "\tassign\tr_coef_i  = r_coef_2[ (  CWIDTH-1):0];\n"
725
        "\twire\tsigned\t[(IWIDTH-1):0]\tr_left_r, r_left_i, r_right_r, r_right_i;\n"
726
        "\tassign\tr_left_r  = r_left[ (2*IWIDTH-1):(IWIDTH)];\n"
727
        "\tassign\tr_left_i  = r_left[ (IWIDTH-1):0];\n"
728
        "\tassign\tr_right_r = r_right[(2*IWIDTH-1):(IWIDTH)];\n"
729
        "\tassign\tr_right_i = r_right[(IWIDTH-1):0];\n"
730 2 dgisselq
"\n"
731 5 dgisselq
        "\treg\tsigned\t[(IWIDTH):0]\tr_sum_r, r_sum_i, r_dif_r, r_dif_i;\n"
732 2 dgisselq
"\n"
733 5 dgisselq
        "\treg  [(LGDELAY-1):0] fifo_addr;\n"
734
        "\twire [(LGDELAY-1):0] fifo_read_addr;\n"
735 6 dgisselq
        "\tassign\tfifo_read_addr = fifo_addr - MPYDELAY;\n"
736 5 dgisselq
        "\treg  [(2*IWIDTH+2):0]        fifo_left [ 0:((1<<LGDELAY)-1)];\n"
737 6 dgisselq
        "\treg\t\t\t\tovalid;\n"
738 5 dgisselq
"\n");
739
        fprintf(fp,
740
        "\t// Set up the input to the multiply\n"
741 2 dgisselq
        "\talways @(posedge i_clk)\n"
742
                "\t\tif (i_ce)\n"
743
                "\t\tbegin\n"
744
                        "\t\t\t// One clock just latches the inputs\n"
745
                        "\t\t\tr_left <= i_left;        // No change in # of bits\n"
746
                        "\t\t\tr_right <= i_right;\n"
747
                        "\t\t\tr_aux <= i_aux;\n"
748
                        "\t\t\tr_coef  <= i_coef;\n"
749
                        "\t\t\t// Next clock adds/subtracts\n"
750
                        "\t\t\tr_sum_r <= r_left_r + r_right_r; // Now IWIDTH+1 bits\n"
751
                        "\t\t\tr_sum_i <= r_left_i + r_right_i;\n"
752
                        "\t\t\tr_dif_r <= r_left_r - r_right_r;\n"
753
                        "\t\t\tr_dif_i <= r_left_i - r_right_i;\n"
754
                        "\t\t\t// Other inputs are simply delayed on second clock\n"
755
                        "\t\t\tr_aux_2 <= r_aux;\n"
756
                        "\t\t\tr_coef_2<= r_coef;\n"
757
        "\t\tend\n"
758 5 dgisselq
"\n");
759
        fprintf(fp,
760
        "\t// Don\'t forget to record the even side, since it doesn\'t need\n"
761
        "\t// to be multiplied, but yet we still need the results in sync\n"
762
        "\t// with the answer when it is ready.\n"
763 2 dgisselq
        "\talways @(posedge i_clk)\n"
764 6 dgisselq
                "\t\tif (i_rst)\n"
765 2 dgisselq
                "\t\tbegin\n"
766 6 dgisselq
                        "\t\t\tfifo_addr <= 0;\n"
767
                        "\t\t\tovalid <= 1'b0;\n"
768
                "\t\tend else if (i_ce)\n"
769
                "\t\tbegin\n"
770 2 dgisselq
                        "\t\t\t// Need to delay the sum side--nothing else happens\n"
771
                        "\t\t\t// to it, but it needs to stay synchronized with the\n"
772
                        "\t\t\t// right side.\n"
773
                        "\t\t\tfifo_left[fifo_addr] <= { r_aux_2, r_sum_r, r_sum_i };\n"
774
                        "\t\t\tfifo_addr <= fifo_addr + 1;\n"
775 14 dgisselq
"\n"
776
                        "\t\t\tovalid <= (ovalid) || (fifo_addr > (MPYDELAY+1));\n"
777 2 dgisselq
                "\t\tend\n"
778
"\n"
779 5 dgisselq
        "\twire\tsigned\t[(CWIDTH-1):0] ir_coef_r, ir_coef_i;\n"
780
        "\tassign\tir_coef_r = r_coef_2[(2*CWIDTH-1):CWIDTH];\n"
781
        "\tassign\tir_coef_i = r_coef_2[(CWIDTH-1):0];\n"
782
        "\twire\tsigned\t[((IWIDTH+2)+(CWIDTH+1)-1):0]\tp_one, p_two, p_three;\n"
783 2 dgisselq
"\n"
784 5 dgisselq
"\n");
785
        fprintf(fp,
786
        "\t// Multiply output is always a width of the sum of the widths of\n"
787
        "\t// the two inputs.  ALWAYS.  This is independent of the number of\n"
788
        "\t// bits in p_one, p_two, or p_three.  These values needed to \n"
789
        "\t// accumulate a bit (or two) each.  However, this approach to a\n"
790
        "\t// three multiply complex multiply cannot increase the total\n"
791
        "\t// number of bits in our final output.  We\'ll take care of\n"
792
        "\t// dropping back down to the proper width, OWIDTH, in our routine\n"
793
        "\t// below.\n"
794 2 dgisselq
"\n"
795 5 dgisselq
"\n");
796
        fprintf(fp,
797
        "\t// We accomplish here \"Karatsuba\" multiplication.  That is,\n"
798
        "\t// by doing three multiplies we accomplish the work of four.\n"
799
        "\t// Let\'s prove to ourselves that this works ... We wish to\n"
800
        "\t// multiply: (a+jb) * (c+jd), where a+jb is given by\n"
801
        "\t//\ta + jb = r_dif_r + j r_dif_i, and\n"
802
        "\t//\tc + jd = ir_coef_r + j ir_coef_i.\n"
803
        "\t// We do this by calculating the intermediate products P1, P2,\n"
804
        "\t// and P3 as\n"
805
        "\t//\tP1 = ac\n"
806
        "\t//\tP2 = bd\n"
807
        "\t//\tP3 = (a + b) * (c + d)\n"
808
        "\t// and then complete our final answer with\n"
809
        "\t//\tac - bd = P1 - P2 (this checks)\n"
810
        "\t//\tad + bc = P3 - P2 - P1\n"
811
        "\t//\t        = (ac + bc + ad + bd) - bd - ac\n"
812
        "\t//\t        = bc + ad (this checks)\n"
813 2 dgisselq
"\n"
814 5 dgisselq
"\n");
815
        fprintf(fp,
816
        "\t// This should really be based upon an IF, such as in\n"
817
        "\t// if (IWIDTH < CWIDTH) then ...\n"
818
        "\t// However, this is the only (other) way I know to do it.\n"
819 2 dgisselq
        "\tgenerate\n"
820
        "\tif (CWIDTH < IWIDTH+1)\n"
821
        "\tbegin\n"
822 22 dgisselq
                "\t\twire\t[(CWIDTH):0]\tp3c_in;\n"
823
                "\t\twire\t[(IWIDTH+1):0]\tp3d_in;\n"
824
                "\t\tassign\tp3c_in = ir_coef_i + ir_coef_r;\n"
825
                "\t\tassign\tp3d_in = r_dif_r + r_dif_i;\n"
826
                "\n"
827 2 dgisselq
                "\t\t// We need to pad these first two multiplies by an extra\n"
828 5 dgisselq
                "\t\t// bit just to keep them aligned with the third,\n"
829
                "\t\t// simpler, multiply.\n"
830 2 dgisselq
                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p1(i_clk, i_ce,\n"
831
                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r},\n"
832
                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r}, p_one);\n"
833
                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p2(i_clk, i_ce,\n"
834 5 dgisselq
                                "\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i},\n"
835 2 dgisselq
                                "\t\t\t\t{r_dif_i[IWIDTH],r_dif_i}, p_two);\n"
836
                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p3(i_clk, i_ce,\n"
837 22 dgisselq
                        "\t\t\t\tp3c_in, p3d_in, p_three);\n"
838 2 dgisselq
        "\tend else begin\n"
839 22 dgisselq
                "\t\twire\t[(CWIDTH):0]\tp3c_in;\n"
840
                "\t\twire\t[(IWIDTH+1):0]\tp3d_in;\n"
841
                "\t\tassign\tp3c_in = ir_coef_i + ir_coef_r;\n"
842
                "\t\tassign\tp3d_in = r_dif_r + r_dif_i;\n"
843
                "\n"
844 2 dgisselq
                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p1a(i_clk, i_ce,\n"
845
                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r},\n"
846
                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r}, p_one);\n"
847
                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p2a(i_clk, i_ce,\n"
848
                                "\t\t\t\t{r_dif_i[IWIDTH], r_dif_i},\n"
849 5 dgisselq
                                "\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i}, p_two);\n"
850 2 dgisselq
                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p3a(i_clk, i_ce,\n"
851 22 dgisselq
                                "\t\t\t\tp3d_in, p3c_in, p_three);\n"
852 2 dgisselq
        "\tend\n"
853
        "\tendgenerate\n"
854 5 dgisselq
"\n");
855
        fprintf(fp,
856
        "\t// These values are held in memory and delayed during the\n"
857
        "\t// multiply.  Here, we recover them.  During the multiply,\n"
858
        "\t// values were multiplied by 2^(CWIDTH-2)*exp{-j*2*pi*...},\n"
859
        "\t// therefore, the left_x values need to be right shifted by\n"
860
        "\t// CWIDTH-2 as well.  The additional bits come from a sign\n"
861
        "\t// extension.\n"
862 2 dgisselq
        "\twire aux;\n"
863 5 dgisselq
        "\twire\tsigned\t[(IWIDTH+CWIDTH):0]    fifo_i, fifo_r;\n"
864
        "\treg\t\t[(2*IWIDTH+2):0]      fifo_read;\n"
865
        "\tassign\tfifo_r = { {2{fifo_read[2*(IWIDTH+1)-1]}}, fifo_read[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1'b0}} };\n"
866
        "\tassign\tfifo_i = { {2{fifo_read[(IWIDTH+1)-1]}}, fifo_read[((IWIDTH+1)-1):0], {(CWIDTH-2){1'b0}} };\n"
867
        "\tassign\taux = fifo_read[2*IWIDTH+2];\n"
868 2 dgisselq
"\n"
869
"\n"
870 5 dgisselq
        "\treg\tsigned\t[(CWIDTH+IWIDTH+3-1):0] b_left_r, b_left_i,\n"
871
                        "\t\t\t\t\t\tb_right_r, b_right_i;\n"
872
        "\treg\tsigned\t[(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i;\n"
873 21 dgisselq
        "\twire\tsigned\t[(CWIDTH+IWIDTH+3-1):0]        rnd;\n"
874 5 dgisselq
        "\tgenerate\n"
875 16 dgisselq
        "\tif ((ROUND==0)||(CWIDTH+IWIDTH-OWIDTH-SHIFT<2))\n"
876 5 dgisselq
                "\t\tassign rnd = ({(CWIDTH+IWIDTH+3){1'b0}});\n"
877 16 dgisselq
        "\telse if ((IWIDTH+CWIDTH)-(OWIDTH+SHIFT) == 2)\n"
878
                "\t\tassign rnd = ({ {(OWIDTH+4+SHIFT){1'b0}},1'b1 });\n"
879 5 dgisselq
        "\telse\n"
880 15 dgisselq
                "\t\tassign rnd = ({ {(OWIDTH+4+SHIFT){1'b0}},1'b1,\n"
881
                "\t\t\t\t{((IWIDTH+CWIDTH+3)-(OWIDTH+SHIFT+5)){1'b0}} });\n"
882 5 dgisselq
        "\tendgenerate\n"
883
"\n");
884
        fprintf(fp,
885 2 dgisselq
        "\talways @(posedge i_clk)\n"
886
                "\t\tif (i_ce)\n"
887
                "\t\tbegin\n"
888
                        "\t\t\t// First clock, recover all values\n"
889
                        "\t\t\tfifo_read <= fifo_left[fifo_read_addr];\n"
890
                        "\t\t\t// These values are IWIDTH+CWIDTH+3 bits wide\n"
891 5 dgisselq
                        "\t\t\t// although they only need to be (IWIDTH+1)\n"
892
                        "\t\t\t// + (CWIDTH) bits wide.  (We\'ve got two\n"
893
                        "\t\t\t// extra bits we need to get rid of.)\n"
894 2 dgisselq
                        "\t\t\tmpy_r <= p_one - p_two;\n"
895
                        "\t\t\tmpy_i <= p_three - p_one - p_two;\n"
896
"\n"
897
                        "\t\t\t// Second clock, round and latch for final clock\n"
898
                        "\t\t\tb_right_r <= mpy_r + rnd;\n"
899
                        "\t\t\tb_right_i <= mpy_i + rnd;\n"
900 5 dgisselq
                        "\t\t\tb_left_r <= { {2{fifo_r[(IWIDTH+CWIDTH)]}},fifo_r } + rnd;\n"
901
                        "\t\t\tb_left_i <= { {2{fifo_i[(IWIDTH+CWIDTH)]}},fifo_i } + rnd;\n"
902 6 dgisselq
                        "\t\t\to_aux <= aux & ovalid;\n"
903 2 dgisselq
                "\t\tend\n"
904 5 dgisselq
"\n");
905
        fprintf(fp,
906 2 dgisselq
        "\t// Final clock--clock and remove unnecessary bits.\n"
907 5 dgisselq
        "\t// We have (IWIDTH+CWIDTH+3) bits here, we need to drop down to\n"
908
        "\t// OWIDTH, and SHIFT by SHIFT bits in the process.  The trick is\n"
909
        "\t// that we don\'t need (IWIDTH+CWIDTH+3) bits.  We\'ve accumulated\n"
910
        "\t// them, but the actual values will never fill all these bits.\n"
911
        "\t// In particular, we only need:\n"
912
        "\t//\t IWIDTH bits for the input\n"
913
        "\t//\t     +1 bit for the add/subtract\n"
914
        "\t//\t+CWIDTH bits for the coefficient multiply\n"
915
        "\t//\t     +1 bit for the add/subtract in the complex multiply\n"
916
        "\t//\t ------\n"
917
        "\t//\t (IWIDTH+CWIDTH+2) bits at full precision.\n"
918
        "\t//\n"
919
        "\t// However, the coefficient multiply multiplied by a maximum value\n"
920
        "\t// of 2^(CWIDTH-2).  Thus, we only have\n"
921
        "\t//\t   IWIDTH bits for the input\n"
922
        "\t//\t       +1 bit for the add/subtract\n"
923
        "\t//\t+CWIDTH-2 bits for the coefficient multiply\n"
924
        "\t//\t       +1 (optional) bit for the add/subtract in the cpx mpy.\n"
925
        "\t//\t -------- ... multiply.  (This last bit may be shifted out.)\n"
926
        "\t//\t (IWIDTH+CWIDTH) valid output bits. \n"
927
        "\t// Now, if the user wants to keep any extras of these (via OWIDTH),\n"
928
        "\t// or if he wishes to arbitrarily shift some of these off (via\n"
929
        "\t// SHIFT) we accomplish that here.\n"
930
        "\tassign o_left_r  = b_left_r[ (CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"
931
        "\tassign o_left_i  = b_left_i[ (CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"
932
        "\tassign o_right_r = b_right_r[(CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"
933
        "\tassign o_right_i = b_right_i[(CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"
934 2 dgisselq
"\n"
935 5 dgisselq
        "\t// As a final step, we pack our outputs into two packed two\'s\n"
936
        "\t// complement numbers per output word, so that each output word\n"
937
        "\t// has (2*OWIDTH) bits in it, with the top half being the real\n"
938
        "\t// portion and the bottom half being the imaginary portion.\n"
939 2 dgisselq
        "\tassign       o_left = { o_left_r, o_left_i };\n"
940
        "\tassign       o_right= { o_right_r,o_right_i};\n"
941
"\n"
942
"endmodule\n");
943
        fclose(fp);
944
}
945
 
946 22 dgisselq
void    build_hwbfly(const char *fname, int xtracbits) {
947
        FILE    *fp = fopen(fname, "w");
948
        if (NULL == fp) {
949
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
950
                perror("O/S Err was:");
951
                return;
952
        }
953
 
954
        fprintf(fp,
955
"///////////////////////////////////////////////////////////////////////////\n"
956
"//\n"
957
"// Filename:   hwbfly.v\n"
958
"//\n"
959
"// Project:    %s\n"
960
"//\n"
961
"// Purpose:    This routine is identical to the butterfly.v routine found\n"
962
"//             in 'butterfly.v', save only that it uses the verilog \n"
963
"//             operator '*' in hopes that the synthesizer would be able\n"
964
"//             to optimize it with hardware resources.\n"
965
"//\n"
966
"//             It is understood that a hardware multiply can complete its\n"
967
"//             operation in a single clock.\n"
968
"//\n"
969
"//\n%s"
970
"//\n", prjname, creator);
971
        fprintf(fp, "%s", cpyleft);
972
        fprintf(fp,
973
"module hwbfly(i_clk, i_rst, i_ce, i_coef, i_left, i_right, i_aux,\n"
974
                "\t\to_left, o_right, o_aux);\n"
975
        "\t// Public changeable parameters ...\n"
976
        "\tparameter IWIDTH=16,CWIDTH=IWIDTH+%d,OWIDTH=IWIDTH+1;\n"
977
        "\t// Parameters specific to the core that should not be changed.\n"
978
        "\tparameter\tSHIFT=0, ROUND=1;\n"
979
        "\tinput\t\ti_clk, i_rst, i_ce;\n"
980
        "\tinput\t\t[(2*CWIDTH-1):0]\ti_coef;\n"
981
        "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"
982
        "\tinput\t\ti_aux;\n"
983
        "\toutput\twire\t[(2*OWIDTH-1):0]\to_left, o_right;\n"
984
        "\toutput\treg\to_aux;\n"
985
"\n", xtracbits);
986
        fprintf(fp,
987
        "\twire\t[(OWIDTH-1):0] o_left_r, o_left_i, o_right_r, o_right_i;\n"
988
"\n"
989
        "\treg\t[(2*IWIDTH-1):0]        r_left, r_right;\n"
990
        "\treg\t                        r_aux, r_aux_2;\n"
991
        "\treg\t[(2*CWIDTH-1):0]        r_coef, r_coef_2;\n"
992
        "\twire\tsigned [(CWIDTH-1):0]  r_coef_r, r_coef_i;\n"
993
        "\tassign\tr_coef_r  = r_coef_2[ (2*CWIDTH-1):(CWIDTH)];\n"
994
        "\tassign\tr_coef_i  = r_coef_2[ (  CWIDTH-1):0];\n"
995
        "\twire signed  [(IWIDTH-1):0]  r_left_r, r_left_i, r_right_r, r_right_i;\n"
996
        "\tassign\tr_left_r  = r_left[ (2*IWIDTH-1):(IWIDTH)];\n"
997
        "\tassign\tr_left_i  = r_left[ (IWIDTH-1):0];\n"
998
        "\tassign\tr_right_r = r_right[(2*IWIDTH-1):(IWIDTH)];\n"
999
        "\tassign\tr_right_i = r_right[(IWIDTH-1):0];\n"
1000
"\n"
1001
        "\treg  signed  [(IWIDTH):0]    r_sum_r, r_sum_i, r_dif_r, r_dif_i;\n"
1002
"\n"
1003
        "\treg  [(2*IWIDTH+2):0]        leftv, leftvv;\n"
1004
"\n"
1005
        "\t// Set up the input to the multiply\n"
1006
        "\talways @(posedge i_clk)\n"
1007
        "\t\tif (i_rst)\n"
1008
        "\t\tbegin\n"
1009
        "\t\t\tr_aux <= 1'b0;\n"
1010
        "\t\t\tr_aux_2 <= 1'b0;\n"
1011
        "\t\tend else if (i_ce)\n"
1012
        "\t\tbegin\n"
1013
        "\t\t\t// One clock just latches the inputs\n"
1014
        "\t\t\tr_left <= i_left;        // No change in # of bits\n"
1015
        "\t\t\tr_right <= i_right;\n"
1016
        "\t\t\tr_aux <= i_aux;\n"
1017
        "\t\t\tr_coef  <= i_coef;\n"
1018
        "\t\t\t// Next clock adds/subtracts\n"
1019
        "\t\t\tr_sum_r <= r_left_r + r_right_r; // Now IWIDTH+1 bits\n"
1020
        "\t\t\tr_sum_i <= r_left_i + r_right_i;\n"
1021
        "\t\t\tr_dif_r <= r_left_r - r_right_r;\n"
1022
        "\t\t\tr_dif_i <= r_left_i - r_right_i;\n"
1023
        "\t\t\t// Other inputs are simply delayed on second clock\n"
1024
        "\t\t\tr_aux_2 <= r_aux;\n"
1025
        "\t\t\tr_coef_2<= r_coef;\n"
1026
        "\t\tend\n"
1027
        "\n\n");
1028
        fprintf(fp,
1029
"\t// See comments in the butterfly.v source file for a discussion of\n"
1030
"\t// these operations and the appropriate bit widths.\n\n");
1031
        fprintf(fp,
1032
        "\twire signed  [(CWIDTH-1):0]  ir_coef_r, ir_coef_i;\n"
1033
        "\tassign       ir_coef_r = r_coef_2[(2*CWIDTH-1):CWIDTH];\n"
1034
        "\tassign       ir_coef_i = r_coef_2[(CWIDTH-1):0];\n"
1035
        "\treg\tsigned  [((IWIDTH+2)+(CWIDTH+1)-1):0]   p_one, p_two, p_three;\n"
1036
"\n"
1037
        "\treg\tsigned  [(CWIDTH):0]    p3c_in, p1c_in, p2c_in;\n"
1038
        "\treg\tsigned  [(IWIDTH+1):0]  p3d_in, p1d_in, p2d_in;\n"
1039
        "\treg\t[3:0]           pipeline;\n"
1040
"\n"
1041
        "\talways @(posedge i_clk)\n"
1042
        "\tbegin\n"
1043
                "\t\tif (i_rst)\n"
1044
                "\t\tbegin\n"
1045
                        "\t\t\tpipeline <= 4'h0;\n"
1046
                        "\t\t\tleftv <= 0;\n"
1047
                        "\t\t\tleftvv <= 0;\n"
1048
                "\t\tend else if (i_clk)\n"
1049
                "\t\tbegin\n"
1050
                        "\t\t\t// Second clock, pipeline = 1\n"
1051
                        "\t\t\tp1c_in <= { ir_coef_r[(CWIDTH-1)], ir_coef_r };\n"
1052
                        "\t\t\tp2c_in <= { ir_coef_i[(CWIDTH-1)], ir_coef_i };\n"
1053
                        "\t\t\tp1d_in <= { r_dif_r[(IWIDTH)], r_dif_r };\n"
1054
                        "\t\t\tp2d_in <= { r_dif_i[(IWIDTH)], r_dif_i };\n"
1055
                        "\t\t\tp3c_in <= ir_coef_i + ir_coef_r;\n"
1056
                        "\t\t\tp3d_in <= r_dif_r + r_dif_i;\n"
1057
"\n     "
1058
                        "\t\t\tleftv <= { r_aux_2, r_sum_r, r_sum_i };\n"
1059
"\n     "
1060
                        "\t\t\t// Third clock, pipeline = 3\n"
1061
                        "\t\t\tp_one   <= p1c_in * p1d_in;\n"
1062
                        "\t\t\tp_two   <= p2c_in * p2d_in;\n"
1063
                        "\t\t\tp_three <= p3c_in * p3d_in;\n"
1064
                        "\t\t\tleftvv <= leftv;\n"
1065
"\n"
1066
                        "\t\t\tpipeline <= { pipeline[2:0], 1'b1 };\n"
1067
                "\t\tend\n"
1068
        "\tend\n"
1069
"\n");
1070
 
1071
        fprintf(fp,
1072
        "\t// These values are held in memory and delayed during the\n"
1073
        "\t// multiply.  Here, we recover them.  During the multiply,\n"
1074
        "\t// values were multiplied by 2^(CWIDTH-2)*exp{-j*2*pi*...},\n"
1075
        "\t// therefore, the left_x values need to be right shifted by\n"
1076
        "\t// CWIDTH-2 as well.  The additional bits come from a sign\n"
1077
        "\t// extension.\n"
1078
        "\twire\taux_s;\n"
1079
        "\twire\tsigned\t[(IWIDTH+CWIDTH):0]    left_si, left_sr;\n"
1080
        "\treg\t\t[(2*IWIDTH+2):0]      left_saved;\n"
1081
        "\tassign\tleft_sr = { {2{left_saved[2*(IWIDTH+1)-1]}}, left_saved[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1'b0}} };\n"
1082
        "\tassign\tleft_si = { {2{left_saved[(IWIDTH+1)-1]}}, left_saved[((IWIDTH+1)-1):0], {(CWIDTH-2){1'b0}} };\n"
1083
        "\tassign\taux_s = left_saved[2*IWIDTH+2];\n"
1084
"\n"
1085
"\n"
1086
        "\treg  signed  [(CWIDTH+IWIDTH+3-1):0] b_left_r, b_left_i,\n"
1087
                                        "\t\t\t\t\t\tb_right_r, b_right_i;\n"
1088
        "\treg  signed  [(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i;\n"
1089
        "\twire signed  [(CWIDTH+IWIDTH+3-1):0] rnd;\n"
1090
        "\tgenerate\n"
1091
        "\tif ((ROUND==0)||(CWIDTH+IWIDTH-OWIDTH-SHIFT<2))\n"
1092
                "\t\tassign rnd = ({(CWIDTH+IWIDTH+3){1'b0}});\n"
1093
        "\telse if ((IWIDTH+CWIDTH)-(OWIDTH+SHIFT) == 2)\n"
1094
                "\t\tassign rnd = ({ {(OWIDTH+4+SHIFT){1'b0}},1'b1 });\n"
1095
        "\telse\n"
1096
                "\t\tassign rnd = ({ {(OWIDTH+4+SHIFT){1'b0}},1'b1,\n"
1097
                        "\t\t\t\t{((IWIDTH+CWIDTH+3)-(OWIDTH+SHIFT+5)){1'b0}} });\n"
1098
        "\tendgenerate\n"
1099
"\n");
1100
 
1101
        fprintf(fp,
1102
        "\talways @(posedge i_clk)\n"
1103
        "\t\tif (i_rst)\n"
1104
        "\t\tbegin\n"
1105
                "\t\t\tleft_saved <= 0;\n"
1106
                "\t\t\tb_left_r <= 0;\n"
1107
                "\t\t\tb_left_i <= 0;\n"
1108
                "\t\t\tb_right_r <= 0;\n"
1109
                "\t\t\tb_right_i <= 0;\n"
1110
                "\t\t\to_aux <= 1'b0;\n"
1111
        "\t\tend else if (i_ce)\n"
1112
        "\t\tbegin\n"
1113
                "\t\t\t// First clock, recover all values\n"
1114
                "\t\t\tleft_saved <= leftvv;\n"
1115
                "\t\t\t// These values are IWIDTH+CWIDTH+3 bits wide\n"
1116
                "\t\t\t// although they only need to be (IWIDTH+1)\n"
1117
                "\t\t\t// + (CWIDTH) bits wide.  (We've got two\n"
1118
                "\t\t\t// extra bits we need to get rid of.)\n"
1119
                "\t\t\tmpy_r <= p_one - p_two;\n"
1120
                "\t\t\tmpy_i <= p_three - p_one - p_two;\n"
1121
"\n"
1122
                "\t\t\t// Second clock, round and latch for final clock\n"
1123
                "\t\t\tb_right_r <= mpy_r + rnd;\n"
1124
                "\t\t\tb_right_i <= mpy_i + rnd;\n"
1125
                "\t\t\tb_left_r <= { {2{left_sr[(IWIDTH+CWIDTH)]}},left_sr } + rnd;\n"
1126
                "\t\t\tb_left_i <= { {2{left_si[(IWIDTH+CWIDTH)]}},left_si } + rnd;\n"
1127
"\n"
1128
                "\t\t\to_aux <= aux_s;\n"
1129
        "\t\tend\n"
1130
        "\n");
1131
 
1132
        fprintf(fp,
1133
        "\t// Final step--remove unnecessary bits.\n"
1134
        "\tassign o_left_r  = b_left_r[ (CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"
1135
        "\tassign o_left_i  = b_left_i[ (CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"
1136
        "\tassign o_right_r = b_right_r[(CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"
1137
        "\tassign o_right_i = b_right_i[(CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"
1138
"\n"
1139
        "\t// As a final step, we pack our outputs into two packed two's\n"
1140
        "\t// complement numbers per output word, so that each output word\n"
1141
        "\t// has (2*OWIDTH) bits in it, with the top half being the real\n"
1142
        "\t// portion and the bottom half being the imaginary portion.\n"
1143
        "\tassign\to_left = { o_left_r, o_left_i };\n"
1144
        "\tassign\to_right= { o_right_r,o_right_i};\n"
1145
"\n"
1146
"endmodule\n");
1147
 
1148
}
1149
 
1150
void    build_stage(const char *fname, int stage, bool odd, int nbits, bool inv, int xtra, bool hwmpy=false) {
1151 2 dgisselq
        FILE    *fstage = fopen(fname, "w");
1152
        int     cbits = nbits + xtra;
1153
 
1154
        if ((cbits * 2) >= sizeof(long long)*8) {
1155
                fprintf(stderr, "ERROR: CMEM Coefficient precision requested overflows long long data type.\n");
1156
                exit(-1);
1157
        }
1158
 
1159
        if (fstage == NULL) {
1160
                fprintf(stderr, "ERROR: Could not open %s for writing!\n", fname);
1161
                perror("O/S Err was:");
1162
                fprintf(stderr, "Attempting to continue, but this file will be missing.\n");
1163
                return;
1164
        }
1165
 
1166
        fprintf(fstage,
1167
"////////////////////////////////////////////////////////////////////////////\n"
1168
"//\n"
1169
"// Filename:   %sfftstage_%c%d.v\n"
1170
"//\n"
1171
"// Project:    %s\n"
1172
"//\n"
1173
"// Purpose:    This file is (almost) a Verilog source file.  It is meant to\n"
1174
"//             be used by a FFT core compiler to generate FFTs which may be\n"
1175
"//             used as part of an FFT core.  Specifically, this file \n"
1176
"//             encapsulates the options of an FFT-stage.  For any 2^N length\n"
1177
"//             FFT, there shall be (N-1) of these stages.  \n"
1178
"//\n%s"
1179
"//\n",
1180
                (inv)?"i":"", (odd)?'o':'e', stage*2, prjname, creator);
1181
        fprintf(fstage, "%s", cpyleft);
1182
        fprintf(fstage, "module\t%sfftstage_%c%d(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync);\n",
1183
                (inv)?"i":"", (odd)?'o':'e', stage*2);
1184
        // These parameter values are useless at this point--they are to be
1185
        // replaced by the parameter values in the calling program.  Only
1186
        // problem is, the CWIDTH needs to match exactly!
1187
        fprintf(fstage, "\tparameter\tIWIDTH=%d,CWIDTH=%d,OWIDTH=%d;\n",
1188
                nbits, cbits, nbits+1);
1189
        fprintf(fstage,
1190
"\t// Parameters specific to the core that should be changed when this\n"
1191
"\t// core is built ... Note that the minimum LGSPAN (the base two log\n"
1192
"\t// of the span, or the base two log of the current FFT size) is 3.\n"
1193
"\t// Smaller spans (i.e. the span of 2) must use the dblstage module.\n"
1194 6 dgisselq
"\tparameter\tLGWIDTH=11, LGSPAN=9, LGBDLY=5, BFLYSHIFT=0;\n");
1195 2 dgisselq
        fprintf(fstage,
1196
"\tinput                                        i_clk, i_rst, i_ce, i_sync;\n"
1197
"\tinput                [(2*IWIDTH-1):0]        i_data;\n"
1198
"\toutput       reg     [(2*OWIDTH-1):0]        o_data;\n"
1199
"\toutput       reg                             o_sync;\n"
1200
"\n"
1201
"\treg  wait_for_sync;\n"
1202
"\treg  [(2*IWIDTH-1):0]        ib_a, ib_b;\n"
1203
"\treg  [(2*CWIDTH-1):0]        ib_c;\n"
1204 8 dgisselq
"\treg  ib_sync;\n"
1205 2 dgisselq
"\n"
1206
"\treg  b_started;\n"
1207
"\twire ob_sync;\n"
1208
"\twire [(2*OWIDTH-1):0]        ob_a, ob_b;\n");
1209
        fprintf(fstage,
1210
"\n"
1211
"\t// %scmem is defined as an array of real and complex values,\n"
1212
"\t// where the top CWIDTH bits are the real value and the bottom\n"
1213
"\t// CWIDTH bits are the imaginary value.\n"
1214
"\t//\n"
1215
"\t// cmem[i] = { (2^(CWIDTH-2)) * cos(2*pi*i/(2^LGWIDTH)),\n"
1216
"\t//           (2^(CWIDTH-2)) * sin(2*pi*i/(2^LGWIDTH)) };\n"
1217
"\t//\n"
1218
"\treg  [(2*CWIDTH-1):0]        %scmem [0:((1<<LGSPAN)-1)];\n"
1219
"\tinitial\t$readmemh(\"%scmem_%c%d.hex\",%scmem);\n\n",
1220
                (inv)?"i":"", (inv)?"i":"",
1221
                (inv)?"i":"", (odd)?'o':'e',stage<<1,
1222
                (inv)?"i":"");
1223
        {
1224
                FILE    *cmem;
1225
 
1226 14 dgisselq
                {
1227
                        char    *memfile, *ptr;
1228
 
1229
                        memfile = new char[strlen(fname)+128];
1230
                        strcpy(memfile, fname);
1231
                        if ((NULL != (ptr = strrchr(memfile, '/')))&&(ptr>memfile)) {
1232
                                ptr++;
1233
                                sprintf(ptr, "%scmem_%c%d.hex", (inv)?"i":"", (odd)?'o':'e', stage*2);
1234
                        } else {
1235
                                sprintf(memfile, "%s/%scmem_%c%d.hex",
1236
                                        COREDIR, (inv)?"i":"",
1237
                                        (odd)?'o':'e', stage*2);
1238
                        }
1239
                        // strcpy(&memfile[strlen(memfile)-2], ".hex");
1240
                        cmem = fopen(memfile, "w");
1241
                        if (NULL == cmem) {
1242
                                fprintf(stderr, "Could not open/write \'%s\' with FFT coefficients.\n", memfile);
1243
                                perror("Err from O/S:");
1244
                                exit(-2);
1245
                        }
1246
 
1247
                        delete[] memfile;
1248 2 dgisselq
                }
1249
                // fprintf(cmem, "// CBITS = %d, inv = %s\n", cbits, (inv)?"true":"false");
1250
                for(int i=0; i<stage/2; i++) {
1251
                        int k = 2*i+odd;
1252 9 dgisselq
                        double  W = ((inv)?1:-1)*2.0*M_PI*k/(double)(2*stage);
1253 2 dgisselq
                        double  c, s;
1254
                        long long ic, is, vl;
1255
 
1256
                        c = cos(W); s = sin(W);
1257 20 dgisselq
                        ic = (long long)round((1ll<<(cbits-2)) * c);
1258
                        is = (long long)round((1ll<<(cbits-2)) * s);
1259 2 dgisselq
                        vl = (ic & (~(-1ll << (cbits))));
1260
                        vl <<= (cbits);
1261
                        vl |= (is & (~(-1ll << (cbits))));
1262
                        fprintf(cmem, "%0*llx\n", ((cbits*2+3)/4), vl);
1263
                        /*
1264
                        fprintf(cmem, "%0*llx\t\t// %f+j%f -> %llx +j%llx\n",
1265
                                ((cbits*2+3)/4), vl, c, s,
1266
                                ic & (~(-1ll<<(((cbits+3)/4)*4))),
1267
                                is & (~(-1ll<<(((cbits+3)/4)*4))));
1268
                        */
1269
                } fclose(cmem);
1270
        }
1271
 
1272
        fprintf(fstage,
1273 6 dgisselq
"\treg  [(LGWIDTH-2):0]         iaddr;\n"
1274 2 dgisselq
"\treg  [(2*IWIDTH-1):0]        imem    [0:((1<<LGSPAN)-1)];\n"
1275
"\n"
1276 8 dgisselq
"\treg  [LGSPAN:0]              oB;\n"
1277 2 dgisselq
"\treg  [(2*OWIDTH-1):0]        omem    [0:((1<<LGSPAN)-1)];\n"
1278
"\n"
1279
"\talways @(posedge i_clk)\n"
1280
        "\t\tif (i_rst)\n"
1281
        "\t\tbegin\n"
1282
                "\t\t\twait_for_sync <= 1'b1;\n"
1283
                "\t\t\tiaddr <= 0;\n"
1284
                "\t\t\toB <= 0;\n"
1285 8 dgisselq
                "\t\t\tib_sync   <= 1'b0;\n"
1286
                "\t\t\to_sync    <= 1'b0;\n"
1287
                "\t\t\tb_started <= 1'b0;\n"
1288 2 dgisselq
        "\t\tend\n"
1289
        "\t\telse if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"
1290
        "\t\tbegin\n"
1291
                "\t\t\t//\n"
1292
                "\t\t\t// First step: Record what we\'re not ready to use yet\n"
1293
                "\t\t\t//\n"
1294
                "\t\t\timem[iaddr[(LGSPAN-1):0]] <= i_data;\n"
1295
                "\t\t\tiaddr <= iaddr + 1;\n"
1296
                "\t\t\twait_for_sync <= 1'b0;\n"
1297
"\n"
1298
                "\t\t\t//\n"
1299
                "\t\t\t// Now, we have all the inputs, so let\'s feed the\n"
1300
                "\t\t\t// butterfly\n"
1301
                "\t\t\t//\n"
1302 6 dgisselq
                "\t\t\tif (iaddr[LGSPAN])\n"
1303 2 dgisselq
                "\t\t\tbegin\n"
1304
                        "\t\t\t\t// One input from memory, ...\n"
1305
                        "\t\t\t\tib_a <= imem[iaddr[(LGSPAN-1):0]];\n"
1306
                        "\t\t\t\t// One input clocked in from the top\n"
1307
                        "\t\t\t\tib_b <= i_data;\n"
1308
                        "\t\t\t\t// Set the sync to true on the very first\n"
1309
                        "\t\t\t\t// valid input in, and hence on the very\n"
1310
                        "\t\t\t\t// first valid data out per FFT.\n"
1311 6 dgisselq
                        "\t\t\t\tib_sync <= (iaddr==(1<<(LGSPAN)));\n"
1312 2 dgisselq
                        "\t\t\t\tib_c <= %scmem[iaddr[(LGSPAN-1):0]];\n"
1313 8 dgisselq
                "\t\t\tend else begin\n"
1314
                        "\t\t\t\t// Just to make debugging easier, let\'s\n"
1315
                        "\t\t\t\t// clear these registers.  That\'ll make\n"
1316
                        "\t\t\t\t// the transition easier to watch.\n"
1317
                        "\t\t\t\tib_a <= {(2*IWIDTH){1'b0}};\n"
1318
                        "\t\t\t\tib_b <= {(2*IWIDTH){1'b0}};\n"
1319
                        "\t\t\t\tib_sync <= 1'b0;\n"
1320
                "\t\t\tend\n"
1321 2 dgisselq
"\n"
1322
                "\t\t\t//\n"
1323
                "\t\t\t// Next step: recover the outputs from the butterfly\n"
1324
                "\t\t\t//\n"
1325 8 dgisselq
                "\t\t\tif ((ob_sync||b_started)&&(~oB[LGSPAN]))\n"
1326 2 dgisselq
                "\t\t\tbegin // A butterfly output is available\n"
1327
                        "\t\t\t\tb_started <= 1'b1;\n"
1328 8 dgisselq
                        "\t\t\t\tomem[oB[(LGSPAN-1):0]] <= ob_b;\n"
1329 2 dgisselq
                        "\t\t\t\toB <= oB+1;\n"
1330
"\n"
1331 6 dgisselq
                        "\t\t\t\to_sync <= (ob_sync);\n"
1332 2 dgisselq
                        "\t\t\t\to_data <= ob_a;\n"
1333
                "\t\t\tend else if (b_started)\n"
1334
                "\t\t\tbegin // and keep outputting once you start--at a rate\n"
1335
                "\t\t\t// of one guaranteed output per clock that has i_ce set.\n"
1336 8 dgisselq
                        "\t\t\t\to_data <= omem[oB[(LGSPAN-1):0]];\n"
1337 2 dgisselq
                        "\t\t\t\toB <= oB + 1;\n"
1338
                        "\t\t\t\to_sync <= 1'b0;\n"
1339
                "\t\t\tend else\n"
1340
                        "\t\t\t\to_sync <= 1'b0;\n"
1341
        "\t\tend\n"
1342 5 dgisselq
"\n", (inv)?"i":"");
1343 22 dgisselq
        if (hwmpy) {
1344
                fprintf(fstage,
1345
        "\thwbfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),\n"
1346
                        "\t\t\t.SHIFT(BFLYSHIFT))\n"
1347
                "\t\tbfly(i_clk, i_rst, i_ce, ib_c,\n"
1348
                        "\t\t\tib_a, ib_b, ib_sync, ob_a, ob_b, ob_sync);\n");
1349
        } else {
1350 5 dgisselq
        fprintf(fstage,
1351 22 dgisselq
        "\tbutterfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),\n"
1352
                "\t\t\t.MPYDELAY(%d\'d%d),.LGDELAY(LGBDLY),.SHIFT(BFLYSHIFT))\n"
1353
        "\t\tbfly(i_clk, i_rst, i_ce, ib_c,\n"
1354
                "\t\t\tib_a, ib_b, ib_sync, ob_a, ob_b, ob_sync);\n",
1355
                        lgdelay(nbits, xtra), bflydelay(nbits, xtra));
1356
        }
1357
        fprintf(fstage, "endmodule\n");
1358 2 dgisselq
}
1359
 
1360
void    usage(void) {
1361
        fprintf(stderr,
1362
"USAGE:\tfftgen [-f <size>] [-d dir] [-c cbits] [-n nbits] [-m mxbits] [-s01]\n"
1363
// "\tfftgen -i\n"
1364
"\t-c <cbits>\tCauses all internal complex coefficients to be\n"
1365
"\t\tlonger than the corresponding data bits, to help avoid\n"
1366
"\t\tcoefficient truncation errors.\n"
1367
"\t-d <dir>\tPlaces all of the generated verilog files into <dir>.\n"
1368
"\t-f <size>\tSets the size of the FFT as the number of complex\n"
1369
"\t\tsamples input to the transform.\n"
1370
"\t-n <nbits>\tSets the number of bits in the twos complement input\n"
1371
"\t\tto the FFT routine.\n"
1372
"\t-m <mxbits>\tSets the maximum bit width that the FFT should ever\n"
1373
"\t\tproduce.  Internal values greater than this value will be\n"
1374
"\t\ttruncated to this value.\n"
1375 22 dgisselq
"\t-n <nbits>\tSets the bitwidth for values coming into the (i)FFT.\n"
1376
"\t-p <nmpy>\tSets the number of stages that will use any hardware \n"
1377
"\t\tmultiplication facility, instead of shift-add emulation.\n"
1378 2 dgisselq
"\t-s\tSkip the final bit reversal stage.  This is useful in\n"
1379
"\t\talgorithms that need to apply a filter without needing to do\n"
1380
"\t\tbin shifting, as these algorithms can, with this option, just\n"
1381
"\t\tmultiply by a bit reversed correlation sequence and then\n"
1382 22 dgisselq
"\t\tinverse FFT the (still bit reversed) result.  (You would need\n"
1383
"\t\ta decimation in time inverse to do this, which this program does\n"
1384
"\t\tnot yet provide.)\n"
1385 2 dgisselq
"\t-S\tInclude the final bit reversal stage (default).\n"
1386 22 dgisselq
"\t-x <xtrabits>\tUse this many extra bits internally, before any final\n"
1387
"\t\trounding or truncation of the answer to the final number of bits.\n"
1388 2 dgisselq
"\t-0\tA forward FFT (default), meaning that the coefficients are\n"
1389
"\t\tgiven by e^{-j 2 pi k/N n }.\n"
1390
"\t-1\tAn inverse FFT, meaning that the coefficients are\n"
1391
"\t\tgiven by e^{ j 2 pi k/N n }.\n");
1392
}
1393
 
1394
// Features still needed:
1395
//      Interactivity.
1396
//      Some number of maximum bits, beyond which we won't accumulate any more.
1397
//      Obviously, the build_stage above.
1398
//      Copying the files of interest into the fft-core directory, from
1399
//              whatever directory this file is run out of.
1400
int main(int argc, char **argv) {
1401
        int     fftsize = -1, lgsize = -1;
1402 22 dgisselq
        int     nbitsin = 16, xtracbits = 4, nummpy=0, nonmpy=2;
1403 19 dgisselq
        int     nbitsout, maxbitsout = -1, xtrapbits=0;
1404 2 dgisselq
        bool    bitreverse = true, inverse=false, interactive = false,
1405
                verbose_flag = false;
1406
        FILE    *vmain;
1407 14 dgisselq
        std::string     coredir = "fft-core", cmdline = "";
1408 2 dgisselq
 
1409
        if (argc <= 1)
1410
                usage();
1411
 
1412 14 dgisselq
        cmdline = argv[0];
1413 2 dgisselq
        for(int argn=1; argn<argc; argn++) {
1414 14 dgisselq
                cmdline += " ";
1415
                cmdline += argv[argn];
1416
        }
1417
 
1418
        for(int argn=1; argn<argc; argn++) {
1419 2 dgisselq
                if ('-' == argv[argn][0]) {
1420
                        for(int j=1; (argv[argn][j])&&(j<100); j++) {
1421
                                switch(argv[argn][j]) {
1422
                                        case '0':
1423
                                                inverse = false;
1424
                                                break;
1425
                                        case '1':
1426
                                                inverse = true;
1427
                                                break;
1428
                                        case 'c':
1429
                                                if (argn+1 >= argc) {
1430 19 dgisselq
                                                        printf("ERR: No extra number of coefficient bits given!\n\n");
1431 2 dgisselq
                                                        usage(); exit(-1);
1432
                                                }
1433
                                                xtracbits = atoi(argv[++argn]);
1434
                                                j+= 200;
1435
                                                break;
1436
                                        case 'd':
1437
                                                if (argn+1 >= argc) {
1438 19 dgisselq
                                                        printf("ERR: No directory given into which to place the core!\n\n");
1439 2 dgisselq
                                                        usage(); exit(-1);
1440
                                                }
1441 14 dgisselq
                                                coredir = argv[++argn];
1442 2 dgisselq
                                                j += 200;
1443
                                                break;
1444
                                        case 'f':
1445
                                                if (argn+1 >= argc) {
1446 19 dgisselq
                                                        printf("ERR: No FFT Size given!\n\n");
1447 2 dgisselq
                                                        usage(); exit(-1);
1448
                                                }
1449
                                                fftsize = atoi(argv[++argn]);
1450
                                                { int sln = strlen(argv[argn]);
1451
                                                if (!isdigit(argv[argn][sln-1])){
1452
                                                        switch(argv[argn][sln-1]) {
1453
                                                        case 'k': case 'K':
1454
                                                                fftsize <<= 10;
1455
                                                                break;
1456
                                                        case 'm': case 'M':
1457
                                                                fftsize <<= 20;
1458
                                                                break;
1459
                                                        case 'g': case 'G':
1460
                                                                fftsize <<= 30;
1461
                                                                break;
1462
                                                        default:
1463 19 dgisselq
                                                                printf("ERR: Unknown FFT size, %s!\n", argv[argn]);
1464 2 dgisselq
                                                                exit(-1);
1465
                                                        }
1466
                                                }}
1467
                                                j += 200;
1468
                                                break;
1469
                                        case 'h':
1470
                                                usage();
1471
                                                exit(0);
1472
                                                break;
1473
                                        case 'i':
1474
                                                interactive = true;
1475
                                                break;
1476
                                        case 'm':
1477
                                                if (argn+1 >= argc) {
1478 19 dgisselq
                                                        printf("ERR: No maximum output bit value given!\n\n");
1479 2 dgisselq
                                                        exit(-1);
1480
                                                }
1481
                                                maxbitsout = atoi(argv[++argn]);
1482
                                                j += 200;
1483
                                                break;
1484
                                        case 'n':
1485
                                                if (argn+1 >= argc) {
1486 19 dgisselq
                                                        printf("ERR: No input bit size given!\n\n");
1487 2 dgisselq
                                                        exit(-1);
1488
                                                }
1489
                                                nbitsin = atoi(argv[++argn]);
1490
                                                j += 200;
1491
                                                break;
1492 22 dgisselq
                                        case 'p':
1493
                                                if (argn+1 >= argc) {
1494
                                                        printf("ERR: No number given for number of hardware multiply stages!\n\n");
1495
                                                        exit(-1);
1496
                                                }
1497
                                                nummpy = atoi(argv[++argn]);
1498
                                                j += 200;
1499
                                                break;
1500 2 dgisselq
                                        case 'S':
1501
                                                bitreverse = true;
1502
                                                break;
1503
                                        case 's':
1504
                                                bitreverse = false;
1505
                                                break;
1506 19 dgisselq
                                        case 'x':
1507
                                                if (argn+1 >= argc) {
1508
                                                        printf("ERR: No extra number of bits given!\n\n");
1509
                                                        usage(); exit(-1);
1510
                                                } j+= 200;
1511
                                                xtrapbits = atoi(argv[++argn]);
1512
                                                break;
1513 2 dgisselq
                                        case 'v':
1514
                                                verbose_flag = true;
1515
                                                break;
1516
                                        default:
1517
                                                printf("Unknown argument, -%c\n", argv[argn][j]);
1518
                                                usage();
1519
                                                exit(-1);
1520
                                }
1521
                        }
1522
                } else {
1523
                        printf("Unrecognized argument, %s\n", argv[argn]);
1524
                        usage();
1525
                        exit(-1);
1526
                }
1527
        }
1528
 
1529
        if ((lgsize < 0)&&(fftsize > 1)) {
1530
                for(lgsize=1; (1<<lgsize) < fftsize; lgsize++)
1531
                        ;
1532
        }
1533
 
1534
        if ((fftsize <= 0)||(nbitsin < 1)||(nbitsin>48)) {
1535
                printf("INVALID PARAMETERS!!!!\n");
1536
                exit(-1);
1537
        }
1538
 
1539
 
1540
        if (nextlg(fftsize) != fftsize) {
1541
                fprintf(stderr, "ERR: FFTSize (%d) *must* be a power of two\n",
1542
                                fftsize);
1543
                exit(-1);
1544
        } else if (fftsize < 2) {
1545
                fprintf(stderr, "ERR: Minimum FFTSize is 2, not %d\n",
1546
                                fftsize);
1547
                if (fftsize == 1) {
1548
                        fprintf(stderr, "You do realize that a 1 point FFT makes very little sense\n");
1549
                        fprintf(stderr, "in an FFT operation that handles two samples per clock?\n");
1550
                        fprintf(stderr, "If you really need to do an FFT of this size, the output\n");
1551
                        fprintf(stderr, "can be connected straight to the input.\n");
1552
                } else {
1553
                        fprintf(stderr, "Indeed, a size of %d doesn\'t make much sense to me at all.\n", fftsize);
1554
                        fprintf(stderr, "Is such an operation even defined?\n");
1555
                }
1556
                exit(-1);
1557
        }
1558
 
1559
        // Calculate how many output bits we'll have, and what the log
1560
        // based two size of our FFT is.
1561
        {
1562
                int     tmp_size = fftsize;
1563
 
1564
                // The first stage always accumulates one bit, regardless
1565
                // of whether you need to or not.
1566
                nbitsout = nbitsin + 1;
1567
                tmp_size >>= 1;
1568
 
1569
                while(tmp_size > 4) {
1570
                        nbitsout += 1;
1571
                        tmp_size >>= 2;
1572
                }
1573
 
1574
                if (tmp_size > 1)
1575
                        nbitsout ++;
1576
 
1577
                if (fftsize <= 2)
1578
                        bitreverse = false;
1579
        } if ((maxbitsout > 0)&&(nbitsout > maxbitsout))
1580
                nbitsout = maxbitsout;
1581
 
1582 22 dgisselq
        // Figure out how many multiply stages to use, and how many to skip
1583
        {
1584
                int     lgv = lgval(fftsize);
1585 2 dgisselq
 
1586 22 dgisselq
                nonmpy = lgv - nummpy;
1587
                if (nonmpy < 2) nonmpy = 2;
1588
                nummpy = lgv - nonmpy;
1589
        }
1590
 
1591 2 dgisselq
        {
1592
                struct stat     sbuf;
1593 14 dgisselq
                if (lstat(coredir.c_str(), &sbuf)==0) {
1594 2 dgisselq
                        if (!S_ISDIR(sbuf.st_mode)) {
1595 14 dgisselq
                                fprintf(stderr, "\'%s\' already exists, and is not a directory!\n", coredir.c_str());
1596 2 dgisselq
                                fprintf(stderr, "I will stop now, lest I overwrite something you care about.\n");
1597
                                fprintf(stderr, "To try again, please remove this file.\n");
1598
                                exit(-1);
1599
                        }
1600
                } else
1601 14 dgisselq
                        mkdir(coredir.c_str(), 0755);
1602
                if (access(coredir.c_str(), X_OK|W_OK) != 0) {
1603
                        fprintf(stderr, "I have no access to the directory \'%s\'.\n", coredir.c_str());
1604 2 dgisselq
                        exit(-1);
1605
                }
1606
        }
1607
 
1608 14 dgisselq
        {
1609
                std::string     fname_string;
1610
 
1611
                fname_string = coredir;
1612
                fname_string += "/";
1613
                if (inverse) fname_string += "i";
1614
                fname_string += "fftmain.v";
1615
 
1616
                vmain = fopen(fname_string.c_str(), "w");
1617
                if (NULL == vmain) {
1618
                        fprintf(stderr, "Could not open \'%s\' for writing\n", fname_string.c_str());
1619
                        perror("Err from O/S:");
1620
                        exit(-1);
1621
                }
1622 2 dgisselq
        }
1623
 
1624
        fprintf(vmain, "/////////////////////////////////////////////////////////////////////////////\n");
1625
        fprintf(vmain, "//\n");
1626
        fprintf(vmain, "// Filename:    %sfftmain.v\n", (inverse)?"i":"");
1627
        fprintf(vmain, "//\n");
1628
        fprintf(vmain, "// Project:     %s\n", prjname);
1629
        fprintf(vmain, "//\n");
1630
        fprintf(vmain, "// Purpose:     This is the main module in the Doubletime FPGA FFT project.\n");
1631
        fprintf(vmain, "//              As such, all other modules are subordinate to this one.\n");
1632
        fprintf(vmain, "//              (I have been reading too much legalese this week ...)\n");
1633
        fprintf(vmain, "//              This module accomplish a fixed size Complex FFT on %d data\n", fftsize);
1634
        fprintf(vmain, "//              points.  The FFT is fully pipelined, and accepts as inputs\n");
1635
        fprintf(vmain, "//              two complex two\'s complement samples per clock.\n");
1636
        fprintf(vmain, "//\n");
1637
        fprintf(vmain, "// Parameters:\n");
1638
        fprintf(vmain, "//      i_clk\tThe clock.  All operations are synchronous with this clock.\n");
1639
        fprintf(vmain, "//\ti_rst\tSynchronous reset, active high.  Setting this line will\n");
1640
        fprintf(vmain, "//\t\t\tforce the reset of all of the internals to this routine.\n");
1641
        fprintf(vmain, "//\t\t\tFurther, following a reset, the o_sync line will go\n");
1642
        fprintf(vmain, "//\t\t\thigh the same time the first output sample is valid.\n");
1643
        fprintf(vmain, "//      i_ce\tA clock enable line.  If this line is set, this module\n");
1644
        fprintf(vmain, "//\t\t\twill accept two complex values as inputs, and produce\n");
1645
        fprintf(vmain, "//\t\t\ttwo (possibly empty) complex values as outputs.\n");
1646
        fprintf(vmain, "//\t\ti_left\tThe first of two complex input samples.  This value\n");
1647
        fprintf(vmain, "//\t\t\tis split into two two\'s complement numbers, of \n");
1648
        fprintf(vmain, "//\t\t\t%d bits each, with the real portion in the high\n", nbitsin);
1649
        fprintf(vmain, "//\t\t\torder bits, and the imaginary portion taking the\n");
1650
        fprintf(vmain, "//\t\t\tbottom %d bits.\n", nbitsin);
1651
        fprintf(vmain, "//\t\ti_right\tThis is the same thing as i_left, only this is the\n");
1652
        fprintf(vmain, "//\t\t\tsecond of two such samples.  Hence, i_left would\n");
1653
        fprintf(vmain, "//\t\t\tcontain input sample zero, i_right would contain\n");
1654
        fprintf(vmain, "//\t\t\tsample one.  On the next clock i_left would contain\n");
1655
        fprintf(vmain, "//\t\t\tinput sample two, i_right number three and so forth.\n");
1656
        fprintf(vmain, "//\t\to_left\tThe first of two output samples, of the same\n");
1657
        fprintf(vmain, "//\t\t\tformat as i_left, only having %d bits for each of\n", nbitsout);
1658
        fprintf(vmain, "//\t\t\tthe real and imaginary components, leading to %d\n", nbitsout*2);
1659
        fprintf(vmain, "//\t\t\tbits total.\n");
1660
        fprintf(vmain, "//\t\to_right\tThe second of two output samples produced each clock.\n");
1661
        fprintf(vmain, "//\t\t\tThis has the same format as o_left.\n");
1662
        fprintf(vmain, "//\t\to_sync\tA one bit output indicating the first valid sample\n");
1663
        fprintf(vmain, "//\t\t\tproduced by this FFT following a reset.  Ever after,\n");
1664
        fprintf(vmain, "//\t\t\tthis will indicate the first sample of an FFT frame.\n");
1665
        fprintf(vmain, "//\n");
1666 14 dgisselq
        fprintf(vmain, "// Arguments:\tThis file was computer generated using the\n");
1667
        fprintf(vmain, "//\t\tfollowing command line:\n");
1668
        fprintf(vmain, "//\n");
1669
        fprintf(vmain, "//\t\t%% %s\n", cmdline.c_str());
1670
        fprintf(vmain, "//\n");
1671 2 dgisselq
        fprintf(vmain, "%s", creator);
1672
        fprintf(vmain, "//\n");
1673
        fprintf(vmain, "%s", cpyleft);
1674
 
1675
 
1676
        fprintf(vmain, "//\n");
1677
        fprintf(vmain, "//\n");
1678
        fprintf(vmain, "module %sfftmain(i_clk, i_rst, i_ce,\n", (inverse)?"i":"");
1679
        fprintf(vmain, "\t\ti_left, i_right,\n");
1680
        fprintf(vmain, "\t\to_left, o_right, o_sync);\n");
1681
        fprintf(vmain, "\tparameter\tIWIDTH=%d, OWIDTH=%d, LGWIDTH=%d;\n", nbitsin, nbitsout, lgsize);
1682
        assert(lgsize > 0);
1683
        fprintf(vmain, "\tinput\t\ti_clk, i_rst, i_ce;\n");
1684
        fprintf(vmain, "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n");
1685
        fprintf(vmain, "\toutput\treg\t[(2*OWIDTH-1):0]\to_left, o_right;\n");
1686
        fprintf(vmain, "\toutput\treg\t\t\to_sync;\n");
1687
        fprintf(vmain, "\n\n");
1688
 
1689
        fprintf(vmain, "\t// Outputs of the FFT, ready for bit reversal.\n");
1690
        fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_left, br_right;\n");
1691
        fprintf(vmain, "\n\n");
1692
 
1693
        int     tmp_size = fftsize, lgtmp = lgsize;
1694
        if (fftsize == 2) {
1695
                if (bitreverse) {
1696
                        fprintf(vmain, "\treg\tbr_start;\n");
1697
                        fprintf(vmain, "\talways @(posedge i_clk)\n");
1698
                        fprintf(vmain, "\t\tif (i_rst)\n");
1699
                        fprintf(vmain, "\t\t\tbr_start <= 1'b0;\n");
1700
                        fprintf(vmain, "\t\telse if (i_ce)\n");
1701
                        fprintf(vmain, "\t\t\tbr_start <= 1'b1;\n");
1702
                }
1703
                fprintf(vmain, "\n\n");
1704 6 dgisselq
                fprintf(vmain, "\tdblstage\t#(IWIDTH)\tstage_2(i_clk, i_rst, i_ce,\n");
1705
                fprintf(vmain, "\t\t\t(~i_rst), i_left, i_right, br_left, br_right);\n");
1706 2 dgisselq
                fprintf(vmain, "\n\n");
1707
        } else {
1708
                int     nbits = nbitsin, dropbit=0;
1709
                // Always do a first stage
1710
                fprintf(vmain, "\n\n");
1711
                fprintf(vmain, "\twire\t\tw_s%d, w_os%d;\n", fftsize, fftsize);
1712 19 dgisselq
                fprintf(vmain, "\twire\t[%d:0]\tw_e%d, w_o%d;\n", 2*(nbits+1+xtrapbits)-1, fftsize, fftsize);
1713
                fprintf(vmain, "\t%sfftstage_e%d\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,%d,0)\tstage_e%d(i_clk, i_rst, i_ce,\n",
1714 2 dgisselq
                        (inverse)?"i":"", fftsize,
1715 19 dgisselq
                        xtracbits, nbits+1+xtrapbits,
1716 2 dgisselq
                        lgsize, lgtmp-2, lgdelay(nbits,xtracbits),
1717
                        fftsize);
1718
                fprintf(vmain, "\t\t\t(~i_rst), i_left, w_e%d, w_s%d);\n", fftsize, fftsize);
1719 19 dgisselq
                fprintf(vmain, "\t%sfftstage_o%d\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,%d,0)\tstage_o%d(i_clk, i_rst, i_ce,\n",
1720 2 dgisselq
                        (inverse)?"i":"", fftsize,
1721 19 dgisselq
                        xtracbits, nbits+1+xtrapbits,
1722 2 dgisselq
                        lgsize, lgtmp-2, lgdelay(nbits,xtracbits),
1723
                        fftsize);
1724 9 dgisselq
                fprintf(vmain, "\t\t\t(~i_rst), i_right, w_o%d, w_os%d);\n", fftsize, fftsize);
1725 2 dgisselq
                fprintf(vmain, "\n\n");
1726
 
1727 14 dgisselq
                {
1728
                        std::string     fname;
1729
                        char    numstr[12];
1730 22 dgisselq
                        bool    mpystage;
1731 2 dgisselq
 
1732 22 dgisselq
                        // Last two stages are always non-multiply stages
1733
                        // since the multiplies can be done by adds
1734
                        mpystage = ((lgtmp-2) <= nummpy);
1735
 
1736 14 dgisselq
                        fname = coredir + "/";
1737
                        if (inverse) fname += "i";
1738
                        fname += "fftstage_e";
1739
                        sprintf(numstr, "%d", fftsize);
1740
                        fname += numstr;
1741
                        fname += ".v";
1742 22 dgisselq
                        build_stage(fname.c_str(), fftsize/2, 0, nbits, inverse, xtracbits, mpystage);   // Even stage
1743 14 dgisselq
 
1744
                        fname = coredir + "/";
1745
                        if (inverse) fname += "i";
1746
                        fname += "fftstage_o";
1747
                        sprintf(numstr, "%d", fftsize);
1748
                        fname += numstr;
1749
                        fname += ".v";
1750 22 dgisselq
                        build_stage(fname.c_str(), fftsize/2, 1, nbits, inverse, xtracbits, mpystage);  // Odd  stage
1751 14 dgisselq
                }
1752
 
1753 2 dgisselq
                nbits += 1;     // New number of input bits
1754
                tmp_size >>= 1; lgtmp--;
1755
                dropbit = 0;
1756
                fprintf(vmain, "\n\n");
1757
                while(tmp_size >= 8) {
1758
                        int     obits = nbits+((dropbit)?0:1);
1759
 
1760
                        if ((maxbitsout > 0)&&(obits > maxbitsout))
1761
                                obits = maxbitsout;
1762
 
1763
                        fprintf(vmain, "\twire\t\tw_s%d, w_os%d;\n", tmp_size, tmp_size);
1764 19 dgisselq
                        fprintf(vmain, "\twire\t[%d:0]\tw_e%d, w_o%d;\n", 2*(obits+xtrapbits)-1, tmp_size, tmp_size);
1765 2 dgisselq
                        fprintf(vmain, "\t%sfftstage_e%d\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_e%d(i_clk, i_rst, i_ce,\n",
1766
                                (inverse)?"i":"", tmp_size,
1767 19 dgisselq
                                nbits+xtrapbits, nbits+xtracbits+xtrapbits, obits+xtrapbits,
1768
                                lgsize, lgtmp-2, lgdelay(nbits+xtrapbits,xtracbits), (dropbit)?0:0,
1769 2 dgisselq
                                tmp_size);
1770
                        fprintf(vmain, "\t\t\t\t\t\tw_s%d, w_e%d, w_e%d, w_s%d);\n", tmp_size<<1, tmp_size<<1, tmp_size, tmp_size);
1771
                        fprintf(vmain, "\t%sfftstage_o%d\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_o%d(i_clk, i_rst, i_ce,\n",
1772
                                (inverse)?"i":"", tmp_size,
1773 19 dgisselq
                                nbits+xtrapbits, nbits+xtracbits+xtrapbits, obits+xtrapbits,
1774
                                lgsize, lgtmp-2, lgdelay(nbits+xtrapbits,xtracbits), (dropbit)?0:0,
1775 2 dgisselq
                                tmp_size);
1776
                        fprintf(vmain, "\t\t\t\t\t\tw_s%d, w_o%d, w_o%d, w_os%d);\n", tmp_size<<1, tmp_size<<1, tmp_size, tmp_size);
1777
                        fprintf(vmain, "\n\n");
1778
 
1779 14 dgisselq
                        {
1780
                                std::string     fname;
1781
                                char            numstr[12];
1782 22 dgisselq
                                bool            mpystage;
1783 2 dgisselq
 
1784 22 dgisselq
                                mpystage = ((lgtmp-2) <= nummpy);
1785
 
1786 14 dgisselq
                                fname = coredir + "/";
1787
                                if (inverse) fname += "i";
1788
                                fname += "fftstage_e";
1789
                                sprintf(numstr, "%d", tmp_size);
1790
                                fname += numstr;
1791
                                fname += ".v";
1792 22 dgisselq
                                build_stage(fname.c_str(), tmp_size/2, 0,
1793
                                        nbits+xtrapbits, inverse, xtracbits,
1794
                                        mpystage);      // Even stage
1795 2 dgisselq
 
1796 14 dgisselq
                                fname = coredir + "/";
1797
                                if (inverse) fname += "i";
1798
                                fname += "fftstage_o";
1799
                                sprintf(numstr, "%d", tmp_size);
1800
                                fname += numstr;
1801
                                fname += ".v";
1802 22 dgisselq
                                build_stage(fname.c_str(), tmp_size/2, 1,
1803
                                        nbits+xtrapbits, inverse, xtracbits,
1804
                                        mpystage);      // Odd  stage
1805 14 dgisselq
                        }
1806
 
1807
 
1808 2 dgisselq
                        dropbit ^= 1;
1809
                        nbits = obits;
1810
                        tmp_size >>= 1; lgtmp--;
1811
                }
1812
 
1813
                if (tmp_size == 4) {
1814
                        int     obits = nbits+((dropbit)?0:1);
1815
 
1816
                        if ((maxbitsout > 0)&&(obits > maxbitsout))
1817
                                obits = maxbitsout;
1818
 
1819
                        fprintf(vmain, "\twire\t\tw_s4, w_os4;\n");
1820 19 dgisselq
                        fprintf(vmain, "\twire\t[%d:0]\tw_e4, w_o4;\n", 2*(obits+xtrapbits)-1);
1821 2 dgisselq
                        fprintf(vmain, "\tqtrstage\t#(%d,%d,%d,0,%d,%d)\tstage_e4(i_clk, i_rst, i_ce,\n",
1822 19 dgisselq
                                nbits+xtrapbits, obits+xtrapbits, lgsize, (inverse)?1:0, (dropbit)?0:0);
1823 6 dgisselq
                        fprintf(vmain, "\t\t\t\t\t\tw_s8, w_e8, w_e4, w_s4);\n");
1824 2 dgisselq
                        fprintf(vmain, "\tqtrstage\t#(%d,%d,%d,1,%d,%d)\tstage_o4(i_clk, i_rst, i_ce,\n",
1825 19 dgisselq
                                nbits+xtrapbits, obits+xtrapbits, lgsize, (inverse)?1:0, (dropbit)?0:0);
1826 6 dgisselq
                        fprintf(vmain, "\t\t\t\t\t\tw_s8, w_o8, w_o4, w_os4);\n");
1827 2 dgisselq
                        dropbit ^= 1;
1828
                        nbits = obits;
1829
                        tmp_size >>= 1; lgtmp--;
1830
                }
1831
 
1832
                {
1833
                        int obits = nbits+((dropbit)?0:1);
1834
                        if (obits > nbitsout)
1835
                                obits = nbitsout;
1836
                        if ((maxbitsout>0)&&(obits > maxbitsout))
1837
                                obits = maxbitsout;
1838
                        fprintf(vmain, "\twire\t\tw_s2;\n");
1839
                        fprintf(vmain, "\twire\t[%d:0]\tw_e2, w_o2;\n", 2*obits-1);
1840 19 dgisselq
                        fprintf(vmain, "\tdblstage\t#(%d,%d,%d)\tstage_2(i_clk, i_rst, i_ce,\n", nbits+xtrapbits, obits,(dropbit)?0:1);
1841 6 dgisselq
                        fprintf(vmain, "\t\t\t\t\tw_s4, w_e4, w_o4, w_e2, w_o2, w_s2);\n");
1842 2 dgisselq
 
1843
                        fprintf(vmain, "\n\n");
1844
                        nbits = obits;
1845
                }
1846
 
1847
                fprintf(vmain, "\t// Prepare for a (potential) bit-reverse stage.\n");
1848
                fprintf(vmain, "\tassign\tbr_left  = w_e2;\n");
1849
                fprintf(vmain, "\tassign\tbr_right = w_o2;\n");
1850
                fprintf(vmain, "\n");
1851
                if (bitreverse) {
1852
                        fprintf(vmain, "\twire\tbr_start;\n");
1853
                        fprintf(vmain, "\treg\tr_br_started;\n");
1854
                        fprintf(vmain, "\t// A delay of one clock here is perfect, as it matches the delay in\n");
1855
                        fprintf(vmain, "\t// our dblstage.\n");
1856
                        fprintf(vmain, "\talways @(posedge i_clk)\n");
1857
                        fprintf(vmain, "\t\tif (i_rst)\n");
1858
                        fprintf(vmain, "\t\t\tr_br_started <= 1'b0;\n");
1859
                        fprintf(vmain, "\t\telse\n");
1860
                        fprintf(vmain, "\t\t\tr_br_started <= r_br_started || w_s4;\n");
1861
                        fprintf(vmain, "\tassign\tbr_start = r_br_started;\n");
1862
                }
1863
        }
1864
 
1865
        fprintf(vmain, "\n");
1866
        fprintf(vmain, "\t// Now for the bit-reversal stage.\n");
1867
        fprintf(vmain, "\twire\tbr_sync;\n");
1868
        fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_o_left, br_o_right;\n");
1869
        if (bitreverse) {
1870
                fprintf(vmain, "\tdblreverse\t#(%d,%d)\trevstage(i_clk, i_rst,\n", lgsize, nbitsout);
1871
                fprintf(vmain, "\t\t\t(i_ce & br_start), br_left, br_right,\n");
1872
                fprintf(vmain, "\t\t\tbr_o_left, br_o_right, br_sync);\n");
1873
        } else {
1874
                fprintf(vmain, "\tassign\tbr_o_left  = br_left;\n");
1875
                fprintf(vmain, "\tassign\tbr_o_right = br_right;\n");
1876
                fprintf(vmain, "\tassign\tbr_sync    = w_s2;\n");
1877
        }
1878
 
1879
        fprintf(vmain, "\n\n");
1880
        fprintf(vmain, "\t// Last clock: Register our outputs, we\'re done.\n");
1881
        fprintf(vmain, "\talways @(posedge i_clk)\n");
1882
        fprintf(vmain, "\t\tbegin\n");
1883
        fprintf(vmain, "\t\t\to_left  <= br_o_left;\n");
1884
        fprintf(vmain, "\t\t\to_right <= br_o_right;\n");
1885
        fprintf(vmain, "\t\t\to_sync  <= br_sync;\n");
1886
        fprintf(vmain, "\t\tend\n");
1887
        fprintf(vmain, "\n\n");
1888
        fprintf(vmain, "endmodule\n");
1889
        fclose(vmain);
1890
 
1891 14 dgisselq
        {
1892
                std::string     fname;
1893 2 dgisselq
 
1894 14 dgisselq
                fname = coredir + "/butterfly.v";
1895
                build_butterfly(fname.c_str(), xtracbits);
1896 2 dgisselq
 
1897 22 dgisselq
                if (nummpy > 0) {
1898
                        fname = coredir + "/hwbfly.v";
1899
                        build_hwbfly(fname.c_str(), xtracbits);
1900
                }
1901
 
1902 14 dgisselq
                fname = coredir + "/shiftaddmpy.v";
1903
                build_multiply(fname.c_str());
1904 2 dgisselq
 
1905 14 dgisselq
                fname = coredir + "/qtrstage.v";
1906
                build_quarters(fname.c_str());
1907 2 dgisselq
 
1908 14 dgisselq
                fname = coredir + "/dblstage.v";
1909
                build_dblstage(fname.c_str());
1910
 
1911
                if (bitreverse) {
1912
                        fname = coredir + "/dblreverse.v";
1913
                        build_dblreverse(fname.c_str());
1914
                }
1915 2 dgisselq
        }
1916
}
1917
 
1918 16 dgisselq
 

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.