OpenCores
URL https://opencores.org/ocsvn/dblclockfft/dblclockfft/trunk

Subversion Repositories dblclockfft

[/] [dblclockfft/] [trunk/] [sw/] [butterfly.cpp] - Blame information for rev 36

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 36 dgisselq
////////////////////////////////////////////////////////////////////////////////
2
//
3
// Filename:    butterfly.cpp
4
//
5
// Project:     A General Purpose Pipelined FFT Implementation
6
//
7
// Purpose:     
8
//
9
// Creator:     Dan Gisselquist, Ph.D.
10
//              Gisselquist Technology, LLC
11
//
12
////////////////////////////////////////////////////////////////////////////////
13
//
14
// Copyright (C) 2015-2018, Gisselquist Technology, LLC
15
//
16
// This program is free software (firmware): you can redistribute it and/or
17
// modify it under the terms of  the GNU General Public License as published
18
// by the Free Software Foundation, either version 3 of the License, or (at
19
// your option) any later version.
20
//
21
// This program is distributed in the hope that it will be useful, but WITHOUT
22
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
23
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
24
// for more details.
25
//
26
// You should have received a copy of the GNU General Public License along
27
// with this program.  (It's in the $(ROOT)/doc directory, run make with no
28
// target there if the PDF file isn't present.)  If not, see
29
// <http://www.gnu.org/licenses/> for a copy.
30
//
31
// License:     GPL, v3, as defined and found on www.gnu.org,
32
//              http://www.gnu.org/licenses/gpl.html
33
//
34
//
35
////////////////////////////////////////////////////////////////////////////////
36
//
37
//
38
#define _CRT_SECURE_NO_WARNINGS   //  ms vs 2012 doesn't like fopen
39
#include <stdio.h>
40
#include <stdlib.h>
41
 
42
#ifdef _MSC_VER //  added for ms vs compatibility
43
 
44
#include <io.h>
45
#include <direct.h>
46
#define _USE_MATH_DEFINES
47
#define R_OK    4       /* Test for read permission.  */
48
#define W_OK    2       /* Test for write permission.  */
49
#define X_OK    0       /* !!!!!! execute permission - unsupported in windows*/
50
#define F_OK    0       /* Test for existence.  */
51
 
52
#if _MSC_VER <= 1700
53
 
54
int lstat(const char *filename, struct stat *buf) { return 1; };
55
#define S_ISDIR(A)      0
56
 
57
#else
58
 
59
#define lstat   _stat
60
#define S_ISDIR _S_IFDIR
61
 
62
#endif
63
 
64
#define mkdir(A,B)      _mkdir(A)
65
 
66
#define access _access
67
 
68
#else
69
// And for G++/Linux environment
70
 
71
#include <unistd.h>     // Defines the R_OK/W_OK/etc. macros
72
#include <sys/stat.h>
73
#endif
74
 
75
#include <string.h>
76
#include <string>
77
#include <math.h>
78
#include <ctype.h>
79
#include <assert.h>
80
 
81
#include "defaults.h"
82
#include "legal.h"
83
#include "rounding.h"
84
#include "fftlib.h"
85
#include "bldstage.h"
86
#include "bitreverse.h"
87
#include "softmpy.h"
88
#include "butterfly.h"
89
 
90
void    build_butterfly(const char *fname, int xtracbits, ROUND_T rounding,
91
                        int     ckpce, const bool async_reset) {
92
        FILE    *fp = fopen(fname, "w");
93
        if (NULL == fp) {
94
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
95
                perror("O/S Err was:");
96
                return;
97
        }
98
        const   char    *rnd_string;
99
        if (rounding == RND_TRUNCATE)
100
                rnd_string = "truncate";
101
        else if (rounding == RND_FROMZERO)
102
                rnd_string = "roundfromzero";
103
        else if (rounding == RND_HALFUP)
104
                rnd_string = "roundhalfup";
105
        else
106
                rnd_string = "convround";
107
 
108
        //if (ckpce >= 3)
109
                //ckpce = 3;
110
        if (ckpce <= 1)
111
                ckpce = 1;
112
 
113
        std::string     resetw("i_reset");
114
        if (async_reset)
115
                resetw = std::string("i_areset_n");
116
 
117
 
118
        fprintf(fp,
119
SLASHLINE
120
"//\n"
121
"// Filename:\tbutterfly.v\n"
122
"//\n"
123
"// Project:\t%s\n"
124
"//\n"
125
"// Purpose:\tThis routine caculates a butterfly for a decimation\n"
126
"//             in frequency version of an FFT.  Specifically, given\n"
127
"//     complex Left and Right values together with a coefficient, the output\n"
128
"//     of this routine is given by:\n"
129
"//\n"
130
"//             L' = L + R\n"
131
"//             R' = (L - R)*C\n"
132
"//\n"
133
"//     The rest of the junk below handles timing (mostly), to make certain\n"
134
"//     that L' and R' reach the output at the same clock.  Further, just to\n"
135
"//     make certain that is the case, an 'aux' input exists.  This aux value\n"
136
"//     will come out of this routine synchronized to the values it came in\n"
137
"//     with.  (i.e., both L', R', and aux all have the same delay.)  Hence,\n"
138
"//     a caller of this routine may set aux on the first input with valid\n"
139
"//     data, and then wait to see aux set on the output to know when to find\n"
140
"//     the first output with valid data.\n"
141
"//\n"
142
"//     All bits are preserved until the very last clock, where any more bits\n"
143
"//     than OWIDTH will be quietly discarded.\n"
144
"//\n"
145
"//     This design features no overflow checking.\n"
146
"//\n"
147
"// Notes:\n"
148
"//     CORDIC:\n"
149
"//             Much as we might like, we can't use a cordic here.\n"
150
"//             The goal is to accomplish an FFT, as defined, and a\n"
151
"//             CORDIC places a scale factor onto the data.  Removing\n"
152
"//             the scale factor would cost two multiplies, which\n"
153
"//             is precisely what we are trying to avoid.\n"
154
"//\n"
155
"//\n"
156
"//     3-MULTIPLIES:\n"
157
"//             It should also be possible to do this with three multiplies\n"
158
"//             and an extra two addition cycles.\n"
159
"//\n"
160
"//             We want\n"
161
"//                     R+I = (a + jb) * (c + jd)\n"
162
"//                     R+I = (ac-bd) + j(ad+bc)\n"
163
"//             We multiply\n"
164
"//                     P1 = ac\n"
165
"//                     P2 = bd\n"
166
"//                     P3 = (a+b)(c+d)\n"
167
"//             Then\n"
168
"//                     R+I=(P1-P2)+j(P3-P2-P1)\n"
169
"//\n"
170
"//             WIDTHS:\n"
171
"//             On multiplying an X width number by an\n"
172
"//             Y width number, X>Y, the result should be (X+Y)\n"
173
"//             bits, right?\n"
174
"//             -2^(X-1) <= a <= 2^(X-1) - 1\n"
175
"//             -2^(Y-1) <= b <= 2^(Y-1) - 1\n"
176
"//             (2^(Y-1)-1)*(-2^(X-1)) <= ab <= 2^(X-1)2^(Y-1)\n"
177
"//             -2^(X+Y-2)+2^(X-1) <= ab <= 2^(X+Y-2) <= 2^(X+Y-1) - 1\n"
178
"//             -2^(X+Y-1) <= ab <= 2^(X+Y-1)-1\n"
179
"//             YUP!  But just barely.  Do this and you'll really want\n"
180
"//             to drop a bit, although you will risk overflow in so\n"
181
"//             doing.\n"
182
"//\n"
183
"//     20150602 -- The sync logic lines have been completely redone.  The\n"
184
"//             synchronization lines no longer go through the FIFO with the\n"
185
"//             left hand sum, but are kept out of memory.  This allows the\n"
186
"//             butterfly to use more optimal memory resources, while also\n"
187
"//             guaranteeing that the sync lines can be properly reset upon\n"
188
"//             any reset signal.\n"
189
"//\n"
190
"//\n%s"
191
"//\n", prjname, creator);
192
        fprintf(fp, "%s", cpyleft);
193
        fprintf(fp, "//\n//\n`default_nettype\tnone\n//\n");
194
 
195
        fprintf(fp,
196
"module\tbutterfly(i_clk, %s, i_ce, i_coef, i_left, i_right, i_aux,\n"
197
                "\t\to_left, o_right, o_aux);\n"
198
        "\t// Public changeable parameters ...\n", resetw.c_str());
199
 
200
        fprintf(fp,
201
        "\tparameter IWIDTH=%d,", TST_BUTTERFLY_IWIDTH);
202
#ifdef  TST_BUTTERFLY_CWIDTH
203
        fprintf(fp, "CWIDTH=%d,", TST_BUTTERFLY_CWIDTH);
204
#else
205
        fprintf(fp, "CWIDTH=IWIDTH+%d,", xtracbits);
206
#endif
207
#ifdef  TST_BUTTERFLY_OWIDTH
208
        fprintf(fp, "OWIDTH=%d;\n", TST_BUTTERFLY_OWIDTH);
209
        // OWIDTH = TST_BUTTERFLY_OWIDTH;
210
#else
211
        fprintf(fp, "OWIDTH=IWIDTH+1;\n");
212
#endif
213
        fprintf(fp, "\tparameter\tSHIFT=0;\n");
214
 
215
        fprintf(fp,
216
        "\t// The number of clocks per each i_ce.  The actual number can be\n"
217
        "\t// more, but the algorithm depends upon at least this many for\n"
218
        "\t// extra internal processing.\n"
219
        "\tparameter    CKPCE=%d;\n", ckpce);
220
 
221
        fprintf(fp,
222
        "\t//\n"
223
        "\t// Local/derived parameters that are calculated from the above\n"
224
        "\t// params.  Apart from algorithmic changes below, these should not\n"
225
        "\t// be adjusted\n"
226
        "\t//\n"
227
        "\t// The first step is to calculate how many clocks it takes our\n"
228
        "\t// multiply to come back with an answer within.  The time in the\n"
229
        "\t// multiply depends upon the input value with the fewest number of\n"
230
        "\t// bits--to keep the pipeline depth short.  So, let's find the\n"
231
        "\t// fewest number of bits here.\n"
232
        "\tlocalparam MXMPYBITS = \n"
233
                "\t\t((IWIDTH+2)>(CWIDTH+1)) ? (CWIDTH+1) : (IWIDTH + 2);\n"
234
        "\t//\n"
235
        "\t// Given this \"fewest\" number of bits, we can calculate the\n"
236
        "\t// number of clocks the multiply itself will take.\n"
237
        "\tlocalparam   MPYDELAY=((MXMPYBITS+1)/2)+2;\n"
238
        "\t//\n"
239
        "\t// In an environment when CKPCE > 1, the multiply delay isn\'t\n"
240
        "\t// necessarily the delay felt by this algorithm--measured in\n"
241
        "\t// i_ce\'s.  In particular, if the multiply can operate with more\n"
242
        "\t// operations per clock, it can appear to finish \"faster\".\n"
243
        "\t// Since most of the logic in this core operates on the slower\n"
244
        "\t// clock, we'll need to map that speed into the number of slower\n"
245
        "\t// clock ticks that it takes.\n"
246
        "\tlocalparam   LCLDELAY = (CKPCE == 1) ? MPYDELAY\n"
247
                "\t\t: (CKPCE == 2) ? (MPYDELAY/2+2)\n"
248
                "\t\t: (MPYDELAY/3 + 2);\n"
249
        "\tlocalparam   LGDELAY = (MPYDELAY>64) ? 7\n"
250
                        "\t\t\t: (MPYDELAY > 32) ? 6\n"
251
                        "\t\t\t: (MPYDELAY > 16) ? 5\n"
252
                        "\t\t\t: (MPYDELAY >  8) ? 4\n"
253
                        "\t\t\t: (MPYDELAY >  4) ? 3\n"
254
                        "\t\t\t: 2;\n"
255
        "\tlocalparam   AUXLEN=(LCLDELAY+3);\n"
256
        "\tlocalparam   MPYREMAINDER = MPYDELAY - CKPCE*(MPYDELAY/CKPCE);\n"
257
"\n\n");
258
 
259
 
260
        fprintf(fp,
261
        "\tinput\t\ti_clk, %s, i_ce;\n"
262
        "\tinput\t\t[(2*CWIDTH-1):0] i_coef;\n"
263
        "\tinput\t\t[(2*IWIDTH-1):0] i_left, i_right;\n"
264
        "\tinput\t\ti_aux;\n"
265
        "\toutput\twire [(2*OWIDTH-1):0] o_left, o_right;\n"
266
        "\toutput\treg\to_aux;\n\n", resetw.c_str());
267
        fprintf(fp,
268
        "\treg\t[(2*IWIDTH-1):0]\tr_left, r_right;\n"
269
        "\treg\t[(2*CWIDTH-1):0]\tr_coef, r_coef_2;\n"
270
        "\twire\tsigned\t[(IWIDTH-1):0]\tr_left_r, r_left_i, r_right_r, r_right_i;\n"
271
        "\tassign\tr_left_r  = r_left[ (2*IWIDTH-1):(IWIDTH)];\n"
272
        "\tassign\tr_left_i  = r_left[ (IWIDTH-1):0];\n"
273
        "\tassign\tr_right_r = r_right[(2*IWIDTH-1):(IWIDTH)];\n"
274
        "\tassign\tr_right_i = r_right[(IWIDTH-1):0];\n"
275
"\n"
276
        "\treg\tsigned\t[(IWIDTH):0]\tr_sum_r, r_sum_i, r_dif_r, r_dif_i;\n"
277
"\n"
278
        "\treg  [(LGDELAY-1):0] fifo_addr;\n"
279
        "\twire [(LGDELAY-1):0] fifo_read_addr;\n"
280
        "\tassign\tfifo_read_addr = fifo_addr - LCLDELAY[(LGDELAY-1):0];\n"
281
        "\treg  [(2*IWIDTH+1):0]        fifo_left [ 0:((1<<LGDELAY)-1)];\n"
282
"\n");
283
        fprintf(fp,
284
        "\t// Set up the input to the multiply\n"
285
        "\talways @(posedge i_clk)\n"
286
                "\t\tif (i_ce)\n"
287
                "\t\tbegin\n"
288
                        "\t\t\t// One clock just latches the inputs\n"
289
                        "\t\t\tr_left <= i_left;        // No change in # of bits\n"
290
                        "\t\t\tr_right <= i_right;\n"
291
                        "\t\t\tr_coef  <= i_coef;\n"
292
                        "\t\t\t// Next clock adds/subtracts\n"
293
                        "\t\t\tr_sum_r <= r_left_r + r_right_r; // Now IWIDTH+1 bits\n"
294
                        "\t\t\tr_sum_i <= r_left_i + r_right_i;\n"
295
                        "\t\t\tr_dif_r <= r_left_r - r_right_r;\n"
296
                        "\t\t\tr_dif_i <= r_left_i - r_right_i;\n"
297
                        "\t\t\t// Other inputs are simply delayed on second clock\n"
298
                        "\t\t\tr_coef_2<= r_coef;\n"
299
        "\t\tend\n"
300
"\n");
301
        fprintf(fp,
302
        "\t// Don\'t forget to record the even side, since it doesn\'t need\n"
303
        "\t// to be multiplied, but yet we still need the results in sync\n"
304
        "\t// with the answer when it is ready.\n"
305
        "\tinitial fifo_addr = 0;\n");
306
        if (async_reset)
307
                fprintf(fp, "\talways @(posedge i_clk, negedge i_areset_n)\n\t\tif (!i_areset_n)\n");
308
        else
309
                fprintf(fp, "\talways @(posedge i_clk)\n\t\tif (i_reset)\n");
310
        fprintf(fp,
311
                        "\t\t\tfifo_addr <= 0;\n"
312
                "\t\telse if (i_ce)\n"
313
                        "\t\t\t// Need to delay the sum side--nothing else happens\n"
314
                        "\t\t\t// to it, but it needs to stay synchronized with the\n"
315
                        "\t\t\t// right side.\n"
316
                        "\t\t\tfifo_addr <= fifo_addr + 1;\n"
317
"\n"
318
        "\talways @(posedge i_clk)\n"
319
                "\t\tif (i_ce)\n"
320
                        "\t\t\tfifo_left[fifo_addr] <= { r_sum_r, r_sum_i };\n"
321
"\n"
322
        "\twire\tsigned\t[(CWIDTH-1):0] ir_coef_r, ir_coef_i;\n"
323
        "\tassign\tir_coef_r = r_coef_2[(2*CWIDTH-1):CWIDTH];\n"
324
        "\tassign\tir_coef_i = r_coef_2[(CWIDTH-1):0];\n"
325
        "\twire\tsigned\t[((IWIDTH+2)+(CWIDTH+1)-1):0]\tp_one, p_two, p_three;\n"
326
"\n"
327
"\n");
328
        fprintf(fp,
329
        "\t// Multiply output is always a width of the sum of the widths of\n"
330
        "\t// the two inputs.  ALWAYS.  This is independent of the number of\n"
331
        "\t// bits in p_one, p_two, or p_three.  These values needed to\n"
332
        "\t// accumulate a bit (or two) each.  However, this approach to a\n"
333
        "\t// three multiply complex multiply cannot increase the total\n"
334
        "\t// number of bits in our final output.  We\'ll take care of\n"
335
        "\t// dropping back down to the proper width, OWIDTH, in our routine\n"
336
        "\t// below.\n"
337
"\n"
338
"\n");
339
        fprintf(fp,
340
        "\t// We accomplish here \"Karatsuba\" multiplication.  That is,\n"
341
        "\t// by doing three multiplies we accomplish the work of four.\n"
342
        "\t// Let\'s prove to ourselves that this works ... We wish to\n"
343
        "\t// multiply: (a+jb) * (c+jd), where a+jb is given by\n"
344
        "\t//\ta + jb = r_dif_r + j r_dif_i, and\n"
345
        "\t//\tc + jd = ir_coef_r + j ir_coef_i.\n"
346
        "\t// We do this by calculating the intermediate products P1, P2,\n"
347
        "\t// and P3 as\n"
348
        "\t//\tP1 = ac\n"
349
        "\t//\tP2 = bd\n"
350
        "\t//\tP3 = (a + b) * (c + d)\n"
351
        "\t// and then complete our final answer with\n"
352
        "\t//\tac - bd = P1 - P2 (this checks)\n"
353
        "\t//\tad + bc = P3 - P2 - P1\n"
354
        "\t//\t        = (ac + bc + ad + bd) - bd - ac\n"
355
        "\t//\t        = bc + ad (this checks)\n"
356
"\n"
357
"\n");
358
        fprintf(fp,
359
        "\t// This should really be based upon an IF, such as in\n"
360
        "\t// if (IWIDTH < CWIDTH) then ...\n"
361
        "\t// However, this is the only (other) way I know to do it.\n"
362
        "\tgenerate if (CKPCE <= 1)\n"
363
        "\tbegin\n"
364
"\n"
365
                "\t\twire\t[(CWIDTH):0]\tp3c_in;\n"
366
                "\t\twire\t[(IWIDTH+1):0]\tp3d_in;\n"
367
                "\t\tassign\tp3c_in = ir_coef_i + ir_coef_r;\n"
368
                "\t\tassign\tp3d_in = r_dif_r + r_dif_i;\n"
369
                "\n"
370
                "\t\t// We need to pad these first two multiplies by an extra\n"
371
                "\t\t// bit just to keep them aligned with the third,\n"
372
                "\t\t// simpler, multiply.\n"
373
                "\t\tlongbimpy #(CWIDTH+1,IWIDTH+2) p1(i_clk, i_ce,\n"
374
                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r},\n"
375
                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r}, p_one);\n"
376
                "\t\tlongbimpy #(CWIDTH+1,IWIDTH+2) p2(i_clk, i_ce,\n"
377
                                "\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i},\n"
378
                                "\t\t\t\t{r_dif_i[IWIDTH],r_dif_i}, p_two);\n"
379
                "\t\tlongbimpy #(CWIDTH+1,IWIDTH+2) p3(i_clk, i_ce,\n"
380
                        "\t\t\t\tp3c_in, p3d_in, p_three);\n"
381
"\n");
382
 
383
        ///////////////////////////////////////////
384
        ///
385
        ///     Two clocks per CE, so CE, no-ce, CE, no-ce, etc
386
        ///
387
        fprintf(fp,
388
        "\tend else if (CKPCE == 2)\n"
389
        "\tbegin : CKPCE_TWO\n"
390
                "\t\t// Coefficient multiply inputs\n"
391
                "\t\treg                [2*(CWIDTH)-1:0]        mpy_pipe_c;\n"
392
                "\t\t// Data multiply inputs\n"
393
                "\t\treg                [2*(IWIDTH+1)-1:0]      mpy_pipe_d;\n"
394
                "\t\twire       signed  [(CWIDTH-1):0]  mpy_pipe_vc;\n"
395
                "\t\twire       signed  [(IWIDTH):0]    mpy_pipe_vd;\n"
396
                "\t\t//\n"
397
                "\t\treg        signed  [(CWIDTH+1)-1:0]        mpy_cof_sum;\n"
398
                "\t\treg        signed  [(IWIDTH+2)-1:0]        mpy_dif_sum;\n"
399
"\n"
400
                "\t\tassign     mpy_pipe_vc =  mpy_pipe_c[2*(CWIDTH)-1:CWIDTH];\n"
401
                "\t\tassign     mpy_pipe_vd =  mpy_pipe_d[2*(IWIDTH+1)-1:IWIDTH+1];\n"
402
"\n"
403
                "\t\treg                        mpy_pipe_v;\n"
404
                "\t\treg                        ce_phase;\n"
405
"\n"
406
                "\t\treg        signed  [(CWIDTH+IWIDTH+3)-1:0] mpy_pipe_out;\n"
407
                "\t\treg        signed [IWIDTH+CWIDTH+3-1:0]    longmpy;\n"
408
"\n"
409
"\n"
410
                "\t\tinitial    ce_phase = 1'b0;\n"
411
                "\t\talways @(posedge i_clk)\n"
412
                "\t\tif (i_reset)\n"
413
                        "\t\t\tce_phase <= 1'b0;\n"
414
                "\t\telse if (i_ce)\n"
415
                        "\t\t\tce_phase <= 1'b1;\n"
416
                "\t\telse\n"
417
                        "\t\t\tce_phase <= 1'b0;\n"
418
"\n"
419
                "\t\talways @(*)\n"
420
                        "\t\t\tmpy_pipe_v = (i_ce)||(ce_phase);\n"
421
"\n"
422
                "\t\talways @(posedge i_clk)\n"
423
                "\t\tif (ce_phase)\n"
424
                "\t\tbegin\n"
425
                        "\t\t\tmpy_pipe_c[2*CWIDTH-1:0] <=\n"
426
                                "\t\t\t\t\t{ ir_coef_r, ir_coef_i };\n"
427
                        "\t\t\tmpy_pipe_d[2*(IWIDTH+1)-1:0] <=\n"
428
                                "\t\t\t\t\t{ r_dif_r, r_dif_i };\n"
429
"\n"
430
                        "\t\t\tmpy_cof_sum  <= ir_coef_i + ir_coef_r;\n"
431
                        "\t\t\tmpy_dif_sum <= r_dif_r + r_dif_i;\n"
432
"\n"
433
                "\t\tend else if (i_ce)\n"
434
                "\t\tbegin\n"
435
                        "\t\t\tmpy_pipe_c[2*(CWIDTH)-1:0] <= {\n"
436
                                "\t\t\t\tmpy_pipe_c[(CWIDTH)-1:0], {(CWIDTH){1'b0}} };\n"
437
                        "\t\t\tmpy_pipe_d[2*(IWIDTH+1)-1:0] <= {\n"
438
                                "\t\t\t\tmpy_pipe_d[(IWIDTH+1)-1:0], {(IWIDTH+1){1'b0}} };\n"
439
                "\t\tend\n"
440
"\n");
441
        fprintf(fp,
442
                "\t\tlongbimpy #(CWIDTH+1,IWIDTH+2) mpy0(i_clk, mpy_pipe_v,\n"
443
                        "\t\t\t\tmpy_cof_sum, mpy_dif_sum, longmpy);\n"
444
"\n");
445
 
446
        fprintf(fp,
447
                "\t\tlongbimpy #(CWIDTH+1,IWIDTH+2) mpy1(i_clk, mpy_pipe_v,\n"
448
                        "\t\t\t\t{ mpy_pipe_vc[CWIDTH-1], mpy_pipe_vc },\n"
449
                        "\t\t\t\t{ mpy_pipe_vd[IWIDTH  ], mpy_pipe_vd },\n"
450
                        "\t\t\t\tmpy_pipe_out);\n\n");
451
 
452
        fprintf(fp,
453
                "\t\treg\tsigned\t[((IWIDTH+2)+(CWIDTH+1)-1):0]\n"
454
                        "\t\t\t\t\trp_one, rp_two, rp_three,\n"
455
                        "\t\t\t\t\trp2_one, rp2_two, rp2_three;\n"
456
"\n"
457
                "\t\talways @(posedge i_clk)\n"
458
                "\t\tif (((i_ce)&&(!MPYDELAY[0]))\n"
459
                "\t\t\t||((ce_phase)&&(MPYDELAY[0])))\n"
460
                        "\t\t\trp_one <= mpy_pipe_out;\n"
461
                "\t\talways @(posedge i_clk)\n"
462
                "\t\tif (((i_ce)&&(MPYDELAY[0]))\n"
463
                "\t\t\t||((ce_phase)&&(!MPYDELAY[0])))\n"
464
                        "\t\t\trp_two <= mpy_pipe_out;\n"
465
                "\t\talways @(posedge i_clk)\n"
466
                "\t\tif (i_ce)\n"
467
                        "\t\t\trp_three <= longmpy;\n"
468
"\n"
469
                "\t\t// Our outputs *MUST* be set on a clock where i_ce is\n"
470
                "\t\t// true for the following logic to work.  Make that\n"
471
                "\t\t// happen here.\n"
472
                "\t\talways @(posedge i_clk)\n"
473
                "\t\tif (i_ce)\n"
474
                        "\t\t\trp2_one<= rp_one;\n"
475
                "\t\talways @(posedge i_clk)\n"
476
                "\t\tif (i_ce)\n"
477
                        "\t\t\trp2_two <= rp_two;\n"
478
                "\t\talways @(posedge i_clk)\n"
479
                "\t\tif (i_ce)\n"
480
                        "\t\t\trp2_three<= rp_three;\n"
481
"\n"
482
                "\t\tassign     p_one   = rp2_one;\n"
483
                "\t\tassign     p_two   = (!MPYDELAY[0])? rp2_two  : rp_two;\n"
484
                "\t\tassign     p_three = ( MPYDELAY[0])? rp_three : rp2_three;\n"
485
"\n"
486
                "\t\t// verilator lint_off UNUSED\n"
487
                "\t\twire\t[2*(IWIDTH+CWIDTH+3)-1:0]\tunused;\n"
488
                "\t\tassign\tunused = { rp2_two, rp2_three };\n"
489
                "\t\t// verilator lint_on  UNUSED\n"
490
"\n");
491
 
492
        /////////////////////////
493
        ///
494
        ///     Three clock per CE, so CE, no-ce, no-ce*, CE
495
        ///
496
        fprintf(fp,
497
"\tend else if (CKPCE <= 3)\n\tbegin : CKPCE_THREE\n");
498
 
499
        fprintf(fp,
500
        "\t\t// Coefficient multiply inputs\n"
501
        "\t\treg\t\t[3*(CWIDTH+1)-1:0]\tmpy_pipe_c;\n"
502
        "\t\t// Data multiply inputs\n"
503
        "\t\treg\t\t[3*(IWIDTH+2)-1:0]\tmpy_pipe_d;\n"
504
        "\t\twire\tsigned       [(CWIDTH):0]    mpy_pipe_vc;\n"
505
        "\t\twire\tsigned       [(IWIDTH+1):0]  mpy_pipe_vd;\n"
506
        "\n"
507
        "\t\tassign\tmpy_pipe_vc =  mpy_pipe_c[3*(CWIDTH+1)-1:2*(CWIDTH+1)];\n"
508
        "\t\tassign\tmpy_pipe_vd =  mpy_pipe_d[3*(IWIDTH+2)-1:2*(IWIDTH+2)];\n"
509
        "\n"
510
        "\t\treg\t\t\tmpy_pipe_v;\n"
511
        "\t\treg\t\t[2:0]\tce_phase;\n"
512
        "\n"
513
        "\t\treg\tsigned        [  (CWIDTH+IWIDTH+3)-1:0]       mpy_pipe_out;\n"
514
"\n");
515
        fprintf(fp,
516
        "\t\tinitial\tce_phase = 3'b011;\n"
517
        "\t\talways @(posedge i_clk)\n"
518
        "\t\tif (i_reset)\n"
519
                "\t\t\tce_phase <= 3'b011;\n"
520
        "\t\telse if (i_ce)\n"
521
                "\t\t\tce_phase <= 3'b000;\n"
522
        "\t\telse if (ce_phase != 3'b011)\n"
523
                "\t\t\tce_phase <= ce_phase + 1'b1;\n"
524
"\n"
525
        "\t\talways @(*)\n"
526
                "\t\t\tmpy_pipe_v = (i_ce)||(ce_phase < 3'b010);\n"
527
"\n");
528
 
529
        fprintf(fp,
530
        "\t\talways @(posedge i_clk)\n"
531
                "\t\t\tif (ce_phase == 3\'b000)\n"
532
                "\t\t\tbegin\n"
533
                        "\t\t\t\t// Second clock\n"
534
                        "\t\t\t\tmpy_pipe_c[3*(CWIDTH+1)-1:(CWIDTH+1)] <= {\n"
535
                        "\t\t\t\t\tir_coef_r[CWIDTH-1], ir_coef_r,\n"
536
                        "\t\t\t\t\tir_coef_i[CWIDTH-1], ir_coef_i };\n"
537
                        "\t\t\t\tmpy_pipe_c[CWIDTH:0] <= ir_coef_i + ir_coef_r;\n"
538
                        "\t\t\t\tmpy_pipe_d[3*(IWIDTH+2)-1:(IWIDTH+2)] <= {\n"
539
                        "\t\t\t\t\tr_dif_r[IWIDTH], r_dif_r,\n"
540
                        "\t\t\t\t\tr_dif_i[IWIDTH], r_dif_i };\n"
541
                        "\t\t\t\tmpy_pipe_d[(IWIDTH+2)-1:0] <= r_dif_r + r_dif_i;\n"
542
"\n"
543
                "\t\t\tend else if (mpy_pipe_v)\n"
544
                "\t\t\tbegin\n"
545
                        "\t\t\t\tmpy_pipe_c[3*(CWIDTH+1)-1:0] <= {\n"
546
                        "\t\t\t\t\tmpy_pipe_c[2*(CWIDTH+1)-1:0], {(CWIDTH+1){1\'b0}} };\n"
547
                        "\t\t\t\tmpy_pipe_d[3*(IWIDTH+2)-1:0] <= {\n"
548
                        "\t\t\t\t\tmpy_pipe_d[2*(IWIDTH+2)-1:0], {(IWIDTH+2){1\'b0}} };\n"
549
                "\t\t\tend\n"
550
"\n");
551
        fprintf(fp,
552
                "\t\tlongbimpy #(CWIDTH+1,IWIDTH+2) mpy(i_clk, mpy_pipe_v,\n"
553
                        "\t\t\t\tmpy_pipe_vc, mpy_pipe_vd, mpy_pipe_out);\n"
554
"\n");
555
 
556
        fprintf(fp,
557
        "\t\treg\tsigned\t[((IWIDTH+2)+(CWIDTH+1)-1):0]\n"
558
                                "\t\t\t\trp_one,  rp_two,  rp_three,\n"
559
                                "\t\t\t\trp2_one, rp2_two, rp2_three,\n"
560
                                "\t\t\t\trp3_one;\n"
561
"\n");
562
 
563
        fprintf(fp,
564
        "\t\talways @(posedge i_clk)\n"
565
        "\t\tif (MPYREMAINDER == 0)\n"
566
        "\t\tbegin\n\n"
567
        "\t\t   if (i_ce)\n"
568
        "\t\t           rp_two   <= mpy_pipe_out;\n"
569
        "\t\t   else if (ce_phase == 3'b000)\n"
570
        "\t\t           rp_three <= mpy_pipe_out;\n"
571
        "\t\t   else if (ce_phase == 3'b001)\n"
572
        "\t\t           rp_one   <= mpy_pipe_out;\n\n"
573
        "\t\tend else if (MPYREMAINDER == 1)\n"
574
        "\t\tbegin\n\n"
575
        "\t\t   if (i_ce)\n"
576
        "\t\t           rp_one   <= mpy_pipe_out;\n"
577
        "\t\t   else if (ce_phase == 3'b000)\n"
578
        "\t\t           rp_two   <= mpy_pipe_out;\n"
579
        "\t\t   else if (ce_phase == 3'b001)\n"
580
        "\t\t           rp_three <= mpy_pipe_out;\n\n"
581
        "\t\tend else // if (MPYREMAINDER == 2)\n"
582
        "\t\tbegin\n\n"
583
        "\t\t   if (i_ce)\n"
584
        "\t\t           rp_three <= mpy_pipe_out;\n"
585
        "\t\t   else if (ce_phase == 3'b000)\n"
586
        "\t\t           rp_one   <= mpy_pipe_out;\n"
587
        "\t\t   else if (ce_phase == 3'b001)\n"
588
        "\t\t           rp_two   <= mpy_pipe_out;\n\n"
589
        "\t\tend\n\n");
590
 
591
        fprintf(fp,
592
        "\t\talways @(posedge i_clk)\n"
593
        "\t\tif (i_ce)\n"
594
        "\t\tbegin\n"
595
                "\t\t\trp2_one   <= rp_one;\n"
596
                "\t\t\trp2_two   <= rp_two;\n"
597
                "\t\t\trp2_three <= (MPYREMAINDER == 2) ? mpy_pipe_out : rp_three;\n"
598
                "\t\t\trp3_one   <= (MPYREMAINDER == 0) ? rp2_one : rp_one;\n"
599
        "\t\tend\n");
600
        fprintf(fp,
601
 
602
        "\t\tassign\tp_one   = rp3_one;\n"
603
        "\t\tassign\tp_two   = rp2_two;\n"
604
        "\t\tassign\tp_three = rp2_three;\n"
605
"\n");
606
 
607
        fprintf(fp,
608
"\tend endgenerate\n");
609
 
610
        fprintf(fp,
611
        "\t// These values are held in memory and delayed during the\n"
612
        "\t// multiply.  Here, we recover them.  During the multiply,\n"
613
        "\t// values were multiplied by 2^(CWIDTH-2)*exp{-j*2*pi*...},\n"
614
        "\t// therefore, the left_x values need to be right shifted by\n"
615
        "\t// CWIDTH-2 as well.  The additional bits come from a sign\n"
616
        "\t// extension.\n"
617
        "\twire\tsigned\t[(IWIDTH+CWIDTH):0]    fifo_i, fifo_r;\n"
618
        "\treg\t\t[(2*IWIDTH+1):0]      fifo_read;\n"
619
        "\tassign\tfifo_r = { {2{fifo_read[2*(IWIDTH+1)-1]}}, fifo_read[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1\'b0}} };\n"
620
        "\tassign\tfifo_i = { {2{fifo_read[(IWIDTH+1)-1]}}, fifo_read[((IWIDTH+1)-1):0], {(CWIDTH-2){1\'b0}} };\n"
621
"\n"
622
"\n"
623
        "\treg\tsigned\t[(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i;\n"
624
"\n");
625
        fprintf(fp,
626
        "\t// Let's do some rounding and remove unnecessary bits.\n"
627
        "\t// We have (IWIDTH+CWIDTH+3) bits here, we need to drop down to\n"
628
        "\t// OWIDTH, and SHIFT by SHIFT bits in the process.  The trick is\n"
629
        "\t// that we don\'t need (IWIDTH+CWIDTH+3) bits.  We\'ve accumulated\n"
630
        "\t// them, but the actual values will never fill all these bits.\n"
631
        "\t// In particular, we only need:\n"
632
        "\t//\t IWIDTH bits for the input\n"
633
        "\t//\t     +1 bit for the add/subtract\n"
634
        "\t//\t+CWIDTH bits for the coefficient multiply\n"
635
        "\t//\t     +1 bit for the add/subtract in the complex multiply\n"
636
        "\t//\t ------\n"
637
        "\t//\t (IWIDTH+CWIDTH+2) bits at full precision.\n"
638
        "\t//\n"
639
        "\t// However, the coefficient multiply multiplied by a maximum value\n"
640
        "\t// of 2^(CWIDTH-2).  Thus, we only have\n"
641
        "\t//\t   IWIDTH bits for the input\n"
642
        "\t//\t       +1 bit for the add/subtract\n"
643
        "\t//\t+CWIDTH-2 bits for the coefficient multiply\n"
644
        "\t//\t       +1 (optional) bit for the add/subtract in the cpx mpy.\n"
645
        "\t//\t -------- ... multiply.  (This last bit may be shifted out.)\n"
646
        "\t//\t (IWIDTH+CWIDTH) valid output bits.\n"
647
        "\t// Now, if the user wants to keep any extras of these (via OWIDTH),\n"
648
        "\t// or if he wishes to arbitrarily shift some of these off (via\n"
649
        "\t// SHIFT) we accomplish that here.\n"
650
"\n");
651
        fprintf(fp,
652
        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_left_r, rnd_left_i, rnd_right_r, rnd_right_i;\n\n");
653
 
654
        fprintf(fp,
655
        "\twire\tsigned\t[(CWIDTH+IWIDTH+3-1):0]\tleft_sr, left_si;\n"
656
        "\tassign       left_sr = { {(2){fifo_r[(IWIDTH+CWIDTH)]}}, fifo_r };\n"
657
        "\tassign       left_si = { {(2){fifo_i[(IWIDTH+CWIDTH)]}}, fifo_i };\n\n");
658
 
659
        fprintf(fp,
660
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_left_r(i_clk, i_ce,\n"
661
        "\t\t\t\tleft_sr, rnd_left_r);\n\n",
662
                rnd_string);
663
        fprintf(fp,
664
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_left_i(i_clk, i_ce,\n"
665
        "\t\t\t\tleft_si, rnd_left_i);\n\n",
666
                rnd_string);
667
        fprintf(fp,
668
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_right_r(i_clk, i_ce,\n"
669
        "\t\t\t\tmpy_r, rnd_right_r);\n\n", rnd_string);
670
        fprintf(fp,
671
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_right_i(i_clk, i_ce,\n"
672
        "\t\t\t\tmpy_i, rnd_right_i);\n\n", rnd_string);
673
        fprintf(fp,
674
        "\talways @(posedge i_clk)\n"
675
                "\t\tif (i_ce)\n"
676
                "\t\tbegin\n"
677
                        "\t\t\t// First clock, recover all values\n"
678
                        "\t\t\tfifo_read <= fifo_left[fifo_read_addr];\n"
679
                        "\t\t\t// These values are IWIDTH+CWIDTH+3 bits wide\n"
680
                        "\t\t\t// although they only need to be (IWIDTH+1)\n"
681
                        "\t\t\t// + (CWIDTH) bits wide.  (We\'ve got two\n"
682
                        "\t\t\t// extra bits we need to get rid of.)\n"
683
                        "\t\t\tmpy_r <= p_one - p_two;\n"
684
                        "\t\t\tmpy_i <= p_three - p_one - p_two;\n"
685
                "\t\tend\n"
686
"\n");
687
 
688
        fprintf(fp,
689
        "\treg\t[(AUXLEN-1):0]\taux_pipeline;\n"
690
        "\tinitial\taux_pipeline = 0;\n");
691
        if (async_reset)
692
                fprintf(fp, "\talways @(posedge i_clk, negedge i_areset_n)\n\t\tif (!i_areset_n)\n");
693
        else
694
                fprintf(fp, "\talways @(posedge i_clk)\n\t\tif (i_reset)\n");
695
        fprintf(fp,
696
        "\t\t\taux_pipeline <= 0;\n"
697
        "\t\telse if (i_ce)\n"
698
        "\t\t\taux_pipeline <= { aux_pipeline[(AUXLEN-2):0], i_aux };\n"
699
"\n");
700
        fprintf(fp,
701
        "\tinitial o_aux = 1\'b0;\n");
702
        if (async_reset)
703
                fprintf(fp, "\talways @(posedge i_clk, negedge i_areset_n)\n\t\tif (!i_areset_n)\n");
704
        else
705
                fprintf(fp, "\talways @(posedge i_clk)\n\t\tif (i_reset)\n");
706
        fprintf(fp,
707
                "\t\t\to_aux <= 1\'b0;\n"
708
                "\t\telse if (i_ce)\n"
709
                "\t\tbegin\n"
710
                        "\t\t\t// Second clock, latch for final clock\n"
711
                        "\t\t\to_aux <= aux_pipeline[AUXLEN-1];\n"
712
                "\t\tend\n"
713
"\n");
714
 
715
        fprintf(fp,
716
        "\t// As a final step, we pack our outputs into two packed two\'s\n"
717
        "\t// complement numbers per output word, so that each output word\n"
718
        "\t// has (2*OWIDTH) bits in it, with the top half being the real\n"
719
        "\t// portion and the bottom half being the imaginary portion.\n"
720
        "\tassign       o_left = { rnd_left_r, rnd_left_i };\n"
721
        "\tassign       o_right= { rnd_right_r,rnd_right_i};\n"
722
"\n");
723
 
724
        if (formal_property_flag) {
725
                fprintf(fp,
726
"`ifdef VERILATOR\n"
727
"`define FORMAL\n"
728
"`endif\n"
729
"`ifdef FORMAL\n"
730
        "\tlocalparam   F_LGDEPTH = (AUXLEN > 64) ? 7\n"
731
                        "\t\t\t: (AUXLEN > 32) ? 6\n"
732
                        "\t\t\t: (AUXLEN > 16) ? 5\n"
733
                        "\t\t\t: (AUXLEN >  8) ? 4\n"
734
                        "\t\t\t: (AUXLEN >  4) ? 3 : 2;\n\n"
735
        "\tlocalparam   F_DEPTH = AUXLEN;\n"
736
        "\tlocalparam   [F_LGDEPTH-1:0] F_D = F_DEPTH[F_LGDEPTH-1:0]-1;\n"
737
"\n"
738
        "\treg  signed  [IWIDTH-1:0]    f_dlyleft_r  [0:F_DEPTH-1];\n"
739
        "\treg  signed  [IWIDTH-1:0]    f_dlyleft_i  [0:F_DEPTH-1];\n"
740
        "\treg  signed  [IWIDTH-1:0]    f_dlyright_r [0:F_DEPTH-1];\n"
741
        "\treg  signed  [IWIDTH-1:0]    f_dlyright_i [0:F_DEPTH-1];\n"
742
        "\treg  signed  [CWIDTH-1:0]    f_dlycoeff_r [0:F_DEPTH-1];\n"
743
        "\treg  signed  [CWIDTH-1:0]    f_dlycoeff_i [0:F_DEPTH-1];\n"
744
        "\treg  signed  [F_DEPTH-1:0]   f_dlyaux;\n"
745
"\n"
746
        "\tinitial\tf_dlyaux[0] = 0;\n"
747
        "\talways @(posedge i_clk)\n"
748
        "\tif (i_reset)\n"
749
                "\t\tf_dlyaux\t<= 0;\n"
750
        "\telse if (i_ce)\n"
751
                "\t\tf_dlyaux\t<= { f_dlyaux[F_DEPTH-2:0], i_aux };\n"
752
"\n"
753
        "\talways @(posedge i_clk)\n"
754
        "\tif (i_ce)\n"
755
        "\tbegin\n"
756
        "\t     f_dlyleft_r[0]   <= i_left[ (2*IWIDTH-1):IWIDTH];\n"
757
        "\t     f_dlyleft_i[0]   <= i_left[ (  IWIDTH-1):0];\n"
758
        "\t     f_dlyright_r[0]  <= i_right[(2*IWIDTH-1):IWIDTH];\n"
759
        "\t     f_dlyright_i[0]  <= i_right[(  IWIDTH-1):0];\n"
760
        "\t     f_dlycoeff_r[0]  <= i_coef[ (2*CWIDTH-1):CWIDTH];\n"
761
        "\t     f_dlycoeff_i[0]  <= i_coef[ (  CWIDTH-1):0];\n"
762
        "\tend\n"
763
"\n"
764
        "\tgenvar       k;\n"
765
        "\tgenerate for(k=1; k<F_DEPTH; k=k+1)\n"
766
        "\tbegin : F_PROPAGATE_DELAY_LINES\n"
767
"\n"
768
"\n"
769
                "\t\talways @(posedge i_clk)\n"
770
                "\t\tif (i_ce)\n"
771
                "\t\tbegin\n"
772
                "\t\t   f_dlyleft_r[k]  <= f_dlyleft_r[ k-1];\n"
773
                "\t\t   f_dlyleft_i[k]  <= f_dlyleft_i[ k-1];\n"
774
                "\t\t   f_dlyright_r[k] <= f_dlyright_r[k-1];\n"
775
                "\t\t   f_dlyright_i[k] <= f_dlyright_i[k-1];\n"
776
                "\t\t   f_dlycoeff_r[k] <= f_dlycoeff_r[k-1];\n"
777
                "\t\t   f_dlycoeff_i[k] <= f_dlycoeff_i[k-1];\n"
778
                "\t\tend\n"
779
"\n"
780
        "\tend endgenerate\n"
781
"\n"
782
"`ifndef VERILATOR\n"
783
        "\talways @(posedge i_clk)\n"
784
        "\tif ((!$past(i_ce))&&(!$past(i_ce,2))&&(!$past(i_ce,3))\n"
785
        "\t             &&(!$past(i_ce,4)))\n"
786
        "\t     assume(i_ce);\n"
787
"\n"
788
        "\tgenerate if (CKPCE <= 1)\n"
789
        "\tbegin\n"
790
"\n"
791
        "\t     // i_ce is allowed to be anything in this mode\n"
792
"\n"
793
        "\tend else if (CKPCE == 2)\n"
794
        "\tbegin : F_CKPCE_TWO\n"
795
"\n"
796
        "\t     always @(posedge i_clk)\n"
797
        "\t             if ($past(i_ce))\n"
798
        "\t                     assume(!i_ce);\n"
799
"\n"
800
        "\tend else if (CKPCE == 3)\n"
801
        "\tbegin : F_CKPCE_THREE\n"
802
"\n"
803
        "\t     always @(posedge i_clk)\n"
804
        "\t             if (($past(i_ce))||($past(i_ce,2)))\n"
805
        "\t                     assume(!i_ce);\n"
806
"\n"
807
        "\tend endgenerate\n"
808
"`endif\n"
809
"\n"
810
        "\treg  [F_LGDEPTH:0]   f_startup_counter;\n"
811
        "\tinitial      f_startup_counter = 0;\n"
812
        "\talways @(posedge i_clk)\n"
813
        "\tif (i_reset)\n"
814
        "\t     f_startup_counter <= 0;\n"
815
        "\telse if ((i_ce)&&(!(&f_startup_counter)))\n"
816
        "\t     f_startup_counter <= f_startup_counter + 1;\n"
817
"\n"
818
        "\twire signed  [IWIDTH:0]      f_sumr, f_sumi;\n"
819
        "\talways @(*)\n"
820
        "\tbegin\n"
821
        "\t     f_sumr = f_dlyleft_r[F_D] + f_dlyright_r[F_D];\n"
822
        "\t     f_sumi = f_dlyleft_i[F_D] + f_dlyright_i[F_D];\n"
823
        "\tend\n"
824
"\n"
825
        "\twire signed  [IWIDTH+CWIDTH+3-1:0]   f_sumrx, f_sumix;\n"
826
        "\tassign\tf_sumrx = { {(4){f_sumr[IWIDTH]}}, f_sumr, {(CWIDTH-2){1'b0}} };\n"
827
        "\tassign\tf_sumix = { {(4){f_sumi[IWIDTH]}}, f_sumi, {(CWIDTH-2){1'b0}} };\n"
828
"\n"
829
        "\twire signed  [IWIDTH:0]      f_difr, f_difi;\n"
830
        "\talways @(*)\n"
831
        "\tbegin\n"
832
        "\t     f_difr = f_dlyleft_r[F_D] - f_dlyright_r[F_D];\n"
833
        "\t     f_difi = f_dlyleft_i[F_D] - f_dlyright_i[F_D];\n"
834
        "\tend\n"
835
"\n"
836
        "\twire signed  [IWIDTH+CWIDTH+3-1:0]   f_difrx, f_difix;\n"
837
        "\tassign\tf_difrx = { {(CWIDTH+2){f_difr[IWIDTH]}}, f_difr };\n"
838
        "\tassign\tf_difix = { {(CWIDTH+2){f_difi[IWIDTH]}}, f_difi };\n"
839
"\n"
840
        "\twire signed  [IWIDTH+CWIDTH+3-1:0]   f_widecoeff_r, f_widecoeff_i;\n"
841
        "\tassign\tf_widecoeff_r ={ {(IWIDTH+3){f_dlycoeff_r[F_D][CWIDTH-1]}},\n"
842
                                        "\t\t\t\t\t\tf_dlycoeff_r[F_D] };\n"
843
        "\tassign\tf_widecoeff_i ={ {(IWIDTH+3){f_dlycoeff_i[F_D][CWIDTH-1]}},\n"
844
                                        "\t\t\t\t\t\tf_dlycoeff_i[F_D] };\n"
845
"\n"
846
        "\talways @(posedge i_clk)\n"
847
        "\tif (f_startup_counter > {1'b0, F_D})\n"
848
        "\tbegin\n"
849
        "\t     assert(aux_pipeline == f_dlyaux);\n"
850
        "\t     assert(left_sr == f_sumrx);\n"
851
        "\t     assert(left_si == f_sumix);\n"
852
        "\t     assert(aux_pipeline[AUXLEN-1] == f_dlyaux[F_D]);\n"
853
"\n"
854
        "\t     if ((f_difr == 0)&&(f_difi == 0))\n"
855
        "\t     begin\n"
856
        "\t             assert(mpy_r == 0);\n"
857
        "\t             assert(mpy_i == 0);\n"
858
        "\t     end else if ((f_dlycoeff_r[F_D] == 0)\n"
859
        "\t                     &&(f_dlycoeff_i[F_D] == 0))\n"
860
        "\t     begin\n"
861
        "\t             assert(mpy_r == 0);\n"
862
        "\t             assert(mpy_i == 0);\n"
863
        "\t     end\n"
864
"\n"
865
        "\t     if ((f_dlycoeff_r[F_D] == 1)&&(f_dlycoeff_i[F_D] == 0))\n"
866
        "\t     begin\n"
867
        "\t             assert(mpy_r == f_difrx);\n"
868
        "\t             assert(mpy_i == f_difix);\n"
869
        "\t     end\n"
870
"\n"
871
        "\t     if ((f_dlycoeff_r[F_D] == 0)&&(f_dlycoeff_i[F_D] == 1))\n"
872
        "\t     begin\n"
873
        "\t             assert(mpy_r == -f_difix);\n"
874
        "\t             assert(mpy_i ==  f_difrx);\n"
875
        "\t     end\n"
876
"\n"
877
        "\t     if ((f_difr == 1)&&(f_difi == 0))\n"
878
        "\t     begin\n"
879
        "\t             assert(mpy_r == f_widecoeff_r);\n"
880
        "\t             assert(mpy_i == f_widecoeff_i);\n"
881
        "\t     end\n"
882
"\n"
883
        "\t     if ((f_difr == 0)&&(f_difi == 1))\n"
884
        "\t     begin\n"
885
        "\t             assert(mpy_r == -f_widecoeff_i);\n"
886
        "\t             assert(mpy_i ==  f_widecoeff_r);\n"
887
        "\t     end\n"
888
        "\tend\n"
889
"\n");
890
 
891
                fprintf(fp,
892
        "\t// Let's see if we can improve our performance at all by\n"
893
        "\t// moving our test one clock earlier.  If nothing else, it should\n"
894
        "\t// help induction finish one (or more) clocks ealier than\n"
895
        "\t// otherwise\n"
896
"\n\n"
897
        "\twire signed  [IWIDTH:0]      f_predifr, f_predifi;\n"
898
        "\talways @(*)\n"
899
        "\tbegin\n"
900
                "\t\tf_predifr = f_dlyleft_r[F_D-1] - f_dlyright_r[F_D-1];\n"
901
                "\t\tf_predifi = f_dlyleft_i[F_D-1] - f_dlyright_i[F_D-1];\n"
902
        "\tend\n"
903
"\n"
904
        "\twire signed  [IWIDTH+CWIDTH+3-1:0]   f_predifrx, f_predifix;\n"
905
        "\tassign       f_predifrx = { {(CWIDTH+2){f_predifr[IWIDTH]}}, f_predifr };\n"
906
        "\tassign       f_predifix = { {(CWIDTH+2){f_predifi[IWIDTH]}}, f_predifi };\n"
907
"\n"
908
        "\twire signed  [CWIDTH:0]      f_sumcoef;\n"
909
        "\twire signed  [IWIDTH+1:0]    f_sumdiff;\n"
910
        "\talways @(*)\n"
911
        "\tbegin\n"
912
                "\t\tf_sumcoef = f_dlycoeff_r[F_D-1] + f_dlycoeff_i[F_D-1];\n"
913
                "\t\tf_sumdiff = f_predifr + f_predifi;\n"
914
        "\tend\n"
915
"\n"
916
        "\t// Induction helpers\n"
917
        "\talways @(posedge i_clk)\n"
918
        "\tif (f_startup_counter >= { 1'b0, F_D })\n"
919
        "\tbegin\n"
920
                "\t\tif (f_dlycoeff_r[F_D-1] == 0)\n"
921
                        "\t\t\tassert(p_one == 0);\n"
922
                "\t\tif (f_dlycoeff_i[F_D-1] == 0)\n"
923
                        "\t\t\tassert(p_two == 0);\n"
924
"\n"
925
                "\t\tif (f_dlycoeff_r[F_D-1] == 1)\n"
926
                        "\t\t\tassert(p_one == f_predifrx);\n"
927
                "\t\tif (f_dlycoeff_i[F_D-1] == 1)\n"
928
                        "\t\t\tassert(p_two == f_predifix);\n"
929
"\n"
930
                "\t\tif (f_predifr == 0)\n"
931
                        "\t\t\tassert(p_one == 0);\n"
932
                "\t\tif (f_predifi == 0)\n"
933
                        "\t\t\tassert(p_two == 0);\n"
934
"\n"
935
                "\t\t// verilator lint_off WIDTH\n"
936
                "\t\tif (f_predifr == 1)\n"
937
                        "\t\t\tassert(p_one == f_dlycoeff_r[F_D-1]);\n"
938
                "\t\tif (f_predifi == 1)\n"
939
                        "\t\t\tassert(p_two == f_dlycoeff_i[F_D-1]);\n"
940
                "\t\t// verilator lint_on  WIDTH\n"
941
"\n"
942
                "\t\tif (f_sumcoef == 0)\n"
943
                        "\t\t\tassert(p_three == 0);\n"
944
                "\t\tif (f_sumdiff == 0)\n"
945
                        "\t\t\tassert(p_three == 0);\n"
946
                "\t\t// verilator lint_off WIDTH\n"
947
                "\t\tif (f_sumcoef == 1)\n"
948
                        "\t\t\tassert(p_three == f_sumdiff);\n"
949
                "\t\tif (f_sumdiff == 1)\n"
950
                        "\t\t\tassert(p_three == f_sumcoef);\n"
951
                "\t\t// verilator lint_on  WIDTH\n"
952
"`ifdef VERILATOR\n"
953
                "\t\tassert(p_one   == f_predifr * f_dlycoeff_r[F_D-1]);\n"
954
                "\t\tassert(p_two   == f_predifi * f_dlycoeff_i[F_D-1]);\n"
955
                "\t\tassert(p_three == f_sumdiff * f_sumcoef);\n"
956
"`endif // VERILATOR\n"
957
        "\tend\n\n");
958
 
959
                fprintf(fp,
960
        "\t// F_CHECK will be set externally by the solver, so that we can\n"
961
        "\t// double check that the solver is actually testing what we think\n"
962
        "\t// it is testing.  We'll set it here to MPYREMAINDER, which will\n"
963
        "\t// essentially eliminate the check--unless overridden by the\n"
964
        "\t// solver.\n"
965
        "\tparameter    F_CHECK = MPYREMAINDER;\n"
966
        "\tinitial      assert(MPYREMAINDER == F_CHECK);\n\n");
967
 
968
                fprintf(fp,
969
"`endif // FORMAL\n");
970
        }
971
 
972
        fprintf(fp,
973
"endmodule\n");
974
        fclose(fp);
975
}
976
 
977
void    build_hwbfly(const char *fname, int xtracbits, ROUND_T rounding,
978
                int ckpce, const bool async_reset) {
979
        FILE    *fp = fopen(fname, "w");
980
        if (NULL == fp) {
981
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
982
                perror("O/S Err was:");
983
                return;
984
        }
985
 
986
        const   char    *rnd_string;
987
        if (rounding == RND_TRUNCATE)
988
                rnd_string = "truncate";
989
        else if (rounding == RND_FROMZERO)
990
                rnd_string = "roundfromzero";
991
        else if (rounding == RND_HALFUP)
992
                rnd_string = "roundhalfup";
993
        else
994
                rnd_string = "convround";
995
 
996
        std::string     resetw("i_reset");
997
        if (async_reset)
998
                resetw = std::string("i_areset_n");
999
 
1000
 
1001
        fprintf(fp,
1002
SLASHLINE
1003
"//\n"
1004
"// Filename:\thwbfly.v\n"
1005
"//\n"
1006
"// Project:\t%s\n"
1007
"//\n"
1008
"// Purpose:\tThis routine is identical to the butterfly.v routine found\n"
1009
"//             in 'butterfly.v', save only that it uses the verilog\n"
1010
"//     operator '*' in hopes that the synthesizer would be able to optimize\n"
1011
"//     it with hardware resources.\n"
1012
"//\n"
1013
"//     It is understood that a hardware multiply can complete its operation in\n"
1014
"//     a single clock.\n"
1015
"//\n"
1016
"// Operation:\n"
1017
"//\n"
1018
"//     Given two inputs, A (i_left) and B (i_right), and a complex\n"
1019
"//     coefficient C (i_coeff), return two outputs, O1 and O2, where:\n"
1020
"//\n"
1021
"//             O1 = A + B, and\n"
1022
"//             O2 = (A - B)*C\n"
1023
"//\n"
1024
"//     This operation is commonly known as a Decimation in Frequency (DIF)\n"
1025
"//     Radix-2 Butterfly.\n"
1026
"//     O1 and O2 are rounded before being returned in (o_left) and o_right\n"
1027
"//     to OWIDTH bits.  If SHIFT is one, an extra bit is dropped from these\n"
1028
"//     values during the rounding process.\n"
1029
"//\n"
1030
"//     Further, since these outputs will take some number of clocks to\n"
1031
"//     calculate, we'll pipe a value (i_aux) through the system and return\n"
1032
"//     it with the results (o_aux), so you can synchronize to the outgoing\n"
1033
"//     output stream.\n"
1034
"//\n"
1035
"//\n%s"
1036
"//\n", prjname, creator);
1037
        fprintf(fp, "%s", cpyleft);
1038
        fprintf(fp, "//\n//\n`default_nettype\tnone\n//\n");
1039
        fprintf(fp,
1040
"module hwbfly(i_clk, %s, i_ce, i_coef, i_left, i_right, i_aux,\n"
1041
                "\t\to_left, o_right, o_aux);\n"
1042
        "\t// Public changeable parameters ...\n"
1043
        "\t//   - IWIDTH, number of bits in each component of the input\n"
1044
        "\t//   - CWIDTH, number of bits in each component of the twiddle factor\n"
1045
        "\t//   - OWIDTH, number of bits in each component of the output\n"
1046
        "\tparameter IWIDTH=16,CWIDTH=IWIDTH+%d,OWIDTH=IWIDTH+1;\n"
1047
        "\t// Drop an additional bit on the output?\n"
1048
        "\tparameter\t\tSHIFT=0;\n"
1049
        "\t// The number of clocks per clock enable, 1, 2, or 3.\n"
1050
        "\tparameter\t[1:0]\tCKPCE=%d;\n\t//\n", resetw.c_str(), xtracbits,
1051
                ckpce);
1052
 
1053
        fprintf(fp,
1054
        "\tinput\t\ti_clk, %s, i_ce;\n"
1055
        "\tinput\t\t[(2*CWIDTH-1):0]\ti_coef;\n"
1056
        "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"
1057
        "\tinput\t\ti_aux;\n"
1058
        "\toutput\twire\t[(2*OWIDTH-1):0]\to_left, o_right;\n"
1059
        "\toutput\treg\to_aux;\n\n"
1060
"\n", resetw.c_str());
1061
 
1062
        fprintf(fp,
1063
        "\treg\t[(2*IWIDTH-1):0]        r_left, r_right;\n"
1064
        "\treg\t                        r_aux, r_aux_2;\n"
1065
        "\treg\t[(2*CWIDTH-1):0]        r_coef;\n"
1066
        "\twire signed  [(IWIDTH-1):0]  r_left_r, r_left_i, r_right_r, r_right_i;\n"
1067
        "\tassign\tr_left_r  = r_left[ (2*IWIDTH-1):(IWIDTH)];\n"
1068
        "\tassign\tr_left_i  = r_left[ (IWIDTH-1):0];\n"
1069
        "\tassign\tr_right_r = r_right[(2*IWIDTH-1):(IWIDTH)];\n"
1070
        "\tassign\tr_right_i = r_right[(IWIDTH-1):0];\n"
1071
        "\treg  signed  [(CWIDTH-1):0]  ir_coef_r, ir_coef_i;\n"
1072
"\n"
1073
        "\treg  signed  [(IWIDTH):0]    r_sum_r, r_sum_i, r_dif_r, r_dif_i;\n"
1074
"\n"
1075
        "\treg  [(2*IWIDTH+2):0]        leftv, leftvv;\n"
1076
"\n"
1077
        "\t// Set up the input to the multiply\n"
1078
        "\tinitial r_aux   = 1\'b0;\n"
1079
        "\tinitial r_aux_2 = 1\'b0;\n");
1080
        if (async_reset)
1081
                fprintf(fp, "\talways @(posedge i_clk, negedge i_areset_n)\n\t\tif (!i_areset_n)\n");
1082
        else
1083
                fprintf(fp, "\talways @(posedge i_clk)\n\t\tif (i_reset)\n");
1084
        fprintf(fp,
1085
                "\t\tbegin\n"
1086
                        "\t\t\tr_aux <= 1\'b0;\n"
1087
                        "\t\t\tr_aux_2 <= 1\'b0;\n"
1088
                "\t\tend else if (i_ce)\n"
1089
                "\t\tbegin\n"
1090
                        "\t\t\t// One clock just latches the inputs\n"
1091
                        "\t\t\tr_aux <= i_aux;\n"
1092
                        "\t\t\t// Next clock adds/subtracts\n"
1093
                        "\t\t\t// Other inputs are simply delayed on second clock\n"
1094
                        "\t\t\tr_aux_2 <= r_aux;\n"
1095
                "\t\tend\n"
1096
        "\talways @(posedge i_clk)\n"
1097
                "\t\tif (i_ce)\n"
1098
                "\t\tbegin\n"
1099
                        "\t\t\t// One clock just latches the inputs\n"
1100
                        "\t\t\tr_left <= i_left;        // No change in # of bits\n"
1101
                        "\t\t\tr_right <= i_right;\n"
1102
                        "\t\t\tr_coef  <= i_coef;\n"
1103
                        "\t\t\t// Next clock adds/subtracts\n"
1104
                        "\t\t\tr_sum_r <= r_left_r + r_right_r; // Now IWIDTH+1 bits\n"
1105
                        "\t\t\tr_sum_i <= r_left_i + r_right_i;\n"
1106
                        "\t\t\tr_dif_r <= r_left_r - r_right_r;\n"
1107
                        "\t\t\tr_dif_i <= r_left_i - r_right_i;\n"
1108
                        "\t\t\t// Other inputs are simply delayed on second clock\n"
1109
                        "\t\t\tir_coef_r <= r_coef[(2*CWIDTH-1):CWIDTH];\n"
1110
                        "\t\t\tir_coef_i <= r_coef[(CWIDTH-1):0];\n"
1111
                "\t\tend\n"
1112
        "\n\n");
1113
        fprintf(fp,
1114
"\t// See comments in the butterfly.v source file for a discussion of\n"
1115
"\t// these operations and the appropriate bit widths.\n\n");
1116
        fprintf(fp,
1117
        "\twire\tsigned [((IWIDTH+1)+(CWIDTH)-1):0]     p_one, p_two;\n"
1118
        "\twire\tsigned [((IWIDTH+2)+(CWIDTH+1)-1):0]   p_three;\n"
1119
"\n"
1120
        "\tinitial leftv    = 0;\n"
1121
        "\tinitial leftvv   = 0;\n");
1122
        if (async_reset)
1123
                fprintf(fp, "\talways @(posedge i_clk, negedge i_areset_n)\n\t\tif (!i_areset_n)\n");
1124
        else
1125
                fprintf(fp, "\talways @(posedge i_clk)\n\t\tif (i_reset)\n");
1126
        fprintf(fp,
1127
                "\t\tbegin\n"
1128
                        "\t\t\tleftv <= 0;\n"
1129
                        "\t\t\tleftvv <= 0;\n"
1130
                "\t\tend else if (i_ce)\n"
1131
                "\t\tbegin\n"
1132
                        "\t\t\t// Second clock, pipeline = 1\n"
1133
                        "\t\t\tleftv <= { r_aux_2, r_sum_r, r_sum_i };\n"
1134
"\n"
1135
                        "\t\t\t// Third clock, pipeline = 3\n"
1136
                        "\t\t\t//   As desired, each of these lines infers a DSP48\n"
1137
                        "\t\t\tleftvv <= leftv;\n"
1138
                "\t\tend\n"
1139
"\n");
1140
 
1141
        // Nominally, we should handle code for 1, 2, or 3 clocks per CE, with
1142
        // one clock per CE meaning CE could be constant.  The code below
1143
        // instead handles 1 or 3 clocks per CE, leaving the two clocks per
1144
        // CE optimization(s) unfulfilled.
1145
 
1146
//      fprintf(fp,
1147
//"\tend else if (CKPCI == 2'b01)\n\tbegin\n");
1148
 
1149
        ///////////////////////////////////////////
1150
        ///
1151
        ///     One clock per CE, so CE, CE, CE, CE, CE is possible
1152
        ///
1153
        fprintf(fp,
1154
"\tgenerate if (CKPCE <= 1)\n\tbegin : CKPCE_ONE\n");
1155
 
1156
        fprintf(fp,
1157
        "\t\t// Coefficient multiply inputs\n"
1158
        "\t\treg\tsigned        [(CWIDTH-1):0]  p1c_in, p2c_in;\n"
1159
        "\t\t// Data multiply inputs\n"
1160
        "\t\treg\tsigned        [(IWIDTH):0]    p1d_in, p2d_in;\n"
1161
        "\t\t// Product 3, coefficient input\n"
1162
        "\t\treg\tsigned        [(CWIDTH):0]    p3c_in;\n"
1163
        "\t\t// Product 3, data input\n"
1164
        "\t\treg\tsigned        [(IWIDTH+1):0]  p3d_in;\n"
1165
"\n");
1166
        fprintf(fp,
1167
        "\t\treg\tsigned        [((IWIDTH+1)+(CWIDTH)-1):0]     rp_one, rp_two;\n"
1168
        "\t\treg\tsigned        [((IWIDTH+2)+(CWIDTH+1)-1):0]   rp_three;\n"
1169
"\n");
1170
 
1171
        fprintf(fp,
1172
        "\t\talways @(posedge i_clk)\n"
1173
        "\t\tif (i_ce)\n"
1174
        "\t\tbegin\n"
1175
                "\t\t\t// Second clock, pipeline = 1\n"
1176
                "\t\t\tp1c_in <= ir_coef_r;\n"
1177
                "\t\t\tp2c_in <= ir_coef_i;\n"
1178
                "\t\t\tp1d_in <= r_dif_r;\n"
1179
                "\t\t\tp2d_in <= r_dif_i;\n"
1180
                "\t\t\tp3c_in <= ir_coef_i + ir_coef_r;\n"
1181
                "\t\t\tp3d_in <= r_dif_r + r_dif_i;\n"
1182
        "\t\tend\n\n");
1183
 
1184
        if (formal_property_flag)
1185
                fprintf(fp,
1186
"`ifndef        FORMAL\n");
1187
 
1188
        fprintf(fp,
1189
        "\t\talways @(posedge i_clk)\n"
1190
        "\t\tif (i_ce)\n"
1191
        "\t\tbegin\n"
1192
                "\t\t\t// Third clock, pipeline = 3\n"
1193
                "\t\t\t//   As desired, each of these lines infers a DSP48\n"
1194
                "\t\t\trp_one   <= p1c_in * p1d_in;\n"
1195
                "\t\t\trp_two   <= p2c_in * p2d_in;\n"
1196
                "\t\t\trp_three <= p3c_in * p3d_in;\n"
1197
        "\t\tend\n");
1198
 
1199
        if (formal_property_flag)
1200
                fprintf(fp,
1201
"`else\n"
1202
                "\t\twire       signed  [((IWIDTH+1)+(CWIDTH)-1):0]     pre_rp_one, pre_rp_two;\n"
1203
                "\t\twire       signed  [((IWIDTH+2)+(CWIDTH+1)-1):0]   pre_rp_three;\n"
1204
"\n"
1205
                "\t\tabs_mpy #(CWIDTH,IWIDTH+1,1'b1)\n"
1206
                "\t\t   onei(p1c_in, p1d_in, pre_rp_one);\n"
1207
                "\t\tabs_mpy #(CWIDTH,IWIDTH+1,1'b1)\n"
1208
                "\t\t   twoi(p2c_in, p2d_in, pre_rp_two);\n"
1209
                "\t\tabs_mpy #(CWIDTH+1,IWIDTH+2,1'b1)\n"
1210
                "\t\t   threei(p3c_in, p3d_in, pre_rp_three);\n"
1211
"\n"
1212
                "\t\talways @(posedge i_clk)\n"
1213
                "\t\tif (i_ce)\n"
1214
                "\t\tbegin\n"
1215
                "\t\t   rp_one   = pre_rp_one;\n"
1216
                "\t\t   rp_two   = pre_rp_two;\n"
1217
                "\t\t   rp_three = pre_rp_three;\n"
1218
                "\t\tend\n"
1219
"`endif // FORMAL\n");
1220
 
1221
        fprintf(fp,"\n"
1222
        "\t\tassign\tp_one   = rp_one;\n"
1223
        "\t\tassign\tp_two   = rp_two;\n"
1224
        "\t\tassign\tp_three = rp_three;\n"
1225
"\n");
1226
 
1227
        ///////////////////////////////////////////
1228
        ///
1229
        ///     Two clocks per CE, so CE, no-ce, CE, no-ce, etc
1230
        ///
1231
        fprintf(fp,
1232
        "\tend else if (CKPCE <= 2)\n"
1233
        "\tbegin : CKPCE_TWO\n"
1234
                "\t\t// Coefficient multiply inputs\n"
1235
                "\t\treg                [2*(CWIDTH)-1:0]        mpy_pipe_c;\n"
1236
                "\t\t// Data multiply inputs\n"
1237
                "\t\treg                [2*(IWIDTH+1)-1:0]      mpy_pipe_d;\n"
1238
                "\t\twire       signed  [(CWIDTH-1):0]  mpy_pipe_vc;\n"
1239
                "\t\twire       signed  [(IWIDTH):0]    mpy_pipe_vd;\n"
1240
                "\t\t//\n"
1241
                "\t\treg        signed  [(CWIDTH+1)-1:0]        mpy_cof_sum;\n"
1242
                "\t\treg        signed  [(IWIDTH+2)-1:0]        mpy_dif_sum;\n"
1243
"\n"
1244
                "\t\tassign     mpy_pipe_vc =  mpy_pipe_c[2*(CWIDTH)-1:CWIDTH];\n"
1245
                "\t\tassign     mpy_pipe_vd =  mpy_pipe_d[2*(IWIDTH+1)-1:IWIDTH+1];\n"
1246
"\n"
1247
                "\t\treg                        mpy_pipe_v;\n"
1248
                "\t\treg                        ce_phase;\n"
1249
"\n"
1250
                "\t\treg        signed  [(CWIDTH+IWIDTH+1)-1:0] mpy_pipe_out;\n"
1251
                "\t\treg        signed [IWIDTH+CWIDTH+3-1:0]    longmpy;\n"
1252
"\n"
1253
"\n"
1254
                "\t\tinitial    ce_phase = 1'b1;\n"
1255
                "\t\talways @(posedge i_clk)\n"
1256
                "\t\tif (i_reset)\n"
1257
                        "\t\t\tce_phase <= 1'b1;\n"
1258
                "\t\telse if (i_ce)\n"
1259
                        "\t\t\tce_phase <= 1'b0;\n"
1260
                "\t\telse\n"
1261
                        "\t\t\tce_phase <= 1'b1;\n"
1262
"\n"
1263
                "\t\talways @(*)\n"
1264
                        "\t\t\tmpy_pipe_v = (i_ce)||(!ce_phase);\n"
1265
"\n"
1266
                "\t\talways @(posedge i_clk)\n"
1267
                "\t\tif (!ce_phase)\n"
1268
                "\t\tbegin\n"
1269
                        "\t\t\t// Pre-clock\n"
1270
                        "\t\t\tmpy_pipe_c[2*CWIDTH-1:0] <=\n"
1271
                                "\t\t\t\t\t{ ir_coef_r, ir_coef_i };\n"
1272
                        "\t\t\tmpy_pipe_d[2*(IWIDTH+1)-1:0] <=\n"
1273
                                "\t\t\t\t\t{ r_dif_r, r_dif_i };\n"
1274
"\n"
1275
                        "\t\t\tmpy_cof_sum  <= ir_coef_i + ir_coef_r;\n"
1276
                        "\t\t\tmpy_dif_sum <= r_dif_r + r_dif_i;\n"
1277
"\n"
1278
                "\t\tend else if (i_ce)\n"
1279
                "\t\tbegin\n"
1280
                        "\t\t\t// First clock\n"
1281
                        "\t\t\tmpy_pipe_c[2*(CWIDTH)-1:0] <= {\n"
1282
                                "\t\t\t\tmpy_pipe_c[(CWIDTH)-1:0], {(CWIDTH){1'b0}} };\n"
1283
                        "\t\t\tmpy_pipe_d[2*(IWIDTH+1)-1:0] <= {\n"
1284
                                "\t\t\t\tmpy_pipe_d[(IWIDTH+1)-1:0], {(IWIDTH+1){1'b0}} };\n"
1285
                "\t\tend\n\n");
1286
 
1287
        if (formal_property_flag)
1288
                fprintf(fp, "`ifndef    FORMAL\n");
1289
 
1290
        fprintf(fp,
1291
                "\t\talways @(posedge i_clk)\n"
1292
                "\t\tif (i_ce) // First clock\n"
1293
                        "\t\t\tlongmpy <= mpy_cof_sum * mpy_dif_sum;\n"
1294
"\n"
1295
                "\t\talways @(posedge i_clk)\n"
1296
                "\t\tif (mpy_pipe_v)\n"
1297
                        "\t\t\tmpy_pipe_out <= mpy_pipe_vc * mpy_pipe_vd;\n");
1298
 
1299
        if (formal_property_flag)
1300
                fprintf(fp, "`else\n"
1301
                "\t\twire       signed [IWIDTH+CWIDTH+3-1:0]    pre_longmpy;\n"
1302
                "\t\twire       signed  [(CWIDTH+IWIDTH+1)-1:0] pre_mpy_pipe_out;\n"
1303
"\n"
1304
                "\t\tabs_mpy    #(CWIDTH+1,IWIDTH+2,1)\n"
1305
                "\t\t   longmpyi(mpy_cof_sum, mpy_dif_sum, pre_longmpy);\n"
1306
"\n"
1307
                "\t\talways @(posedge i_clk)\n"
1308
                "\t\tif (i_ce)\n"
1309
                "\t\t   longmpy <= pre_longmpy;\n"
1310
"\n"
1311
"\n"
1312
                "\t\tabs_mpy #(CWIDTH,IWIDTH+1,1)\n"
1313
                "\t\t   mpy_pipe_outi(mpy_pipe_vc, mpy_pipe_vd, pre_mpy_pipe_out);\n"
1314
"\n"
1315
                "\t\talways @(posedge i_clk)\n"
1316
                "\t\tif (mpy_pipe_v)\n"
1317
                "\t\t   mpy_pipe_out <= pre_mpy_pipe_out;\n"
1318
"`endif\n");
1319
 
1320
        fprintf(fp,"\n"
1321
                "\t\treg\tsigned\t[((IWIDTH+1)+(CWIDTH)-1):0]   rp_one,\n"
1322
                                "\t\t\t\t\t\t\trp2_one, rp_two;\n"
1323
                "\t\treg\tsigned\t[((IWIDTH+2)+(CWIDTH+1)-1):0] rp_three;\n"
1324
"\n"
1325
                "\t\talways @(posedge i_clk)\n"
1326
                "\t\tif (!ce_phase) // 1.5 clock\n"
1327
                        "\t\t\trp_one <= mpy_pipe_out;\n"
1328
                "\t\talways @(posedge i_clk)\n"
1329
                "\t\tif (i_ce) // two clocks\n"
1330
                        "\t\t\trp_two <= mpy_pipe_out;\n"
1331
                "\t\talways @(posedge i_clk)\n"
1332
                "\t\tif (i_ce) // Second clock\n"
1333
                        "\t\t\trp_three<= longmpy;\n"
1334
                "\t\talways @(posedge i_clk)\n"
1335
                "\t\tif (i_ce)\n"
1336
                        "\t\t\trp2_one<= rp_one;\n"
1337
"\n"
1338
                "\t\tassign     p_one  = rp2_one;\n"
1339
                "\t\tassign     p_two  = rp_two;\n"
1340
                "\t\tassign     p_three= rp_three;\n"
1341
"\n");
1342
 
1343
        /////////////////////////
1344
        ///
1345
        ///     Three clock per CE, so CE, no-ce, no-ce*, CE
1346
        ///
1347
        fprintf(fp,
1348
"\tend else if (CKPCE <= 2'b11)\n\tbegin : CKPCE_THREE\n");
1349
 
1350
        fprintf(fp,
1351
        "\t\t// Coefficient multiply inputs\n"
1352
        "\t\treg\t\t[3*(CWIDTH+1)-1:0]\tmpy_pipe_c;\n"
1353
        "\t\t// Data multiply inputs\n"
1354
        "\t\treg\t\t[3*(IWIDTH+2)-1:0]\tmpy_pipe_d;\n"
1355
        "\t\twire\tsigned       [(CWIDTH):0]    mpy_pipe_vc;\n"
1356
        "\t\twire\tsigned       [(IWIDTH+1):0]  mpy_pipe_vd;\n"
1357
        "\n"
1358
        "\t\tassign\tmpy_pipe_vc =  mpy_pipe_c[3*(CWIDTH+1)-1:2*(CWIDTH+1)];\n"
1359
        "\t\tassign\tmpy_pipe_vd =  mpy_pipe_d[3*(IWIDTH+2)-1:2*(IWIDTH+2)];\n"
1360
        "\n"
1361
        "\t\treg\t\t\tmpy_pipe_v;\n"
1362
        "\t\treg\t\t[2:0]\tce_phase;\n"
1363
        "\n"
1364
        "\t\treg\tsigned        [  (CWIDTH+IWIDTH+3)-1:0]       mpy_pipe_out;\n"
1365
"\n");
1366
        fprintf(fp,
1367
        "\t\tinitial\tce_phase = 3'b011;\n"
1368
        "\t\talways @(posedge i_clk)\n"
1369
        "\t\tif (i_reset)\n"
1370
                "\t\t\tce_phase <= 3'b011;\n"
1371
        "\t\telse if (i_ce)\n"
1372
                "\t\t\tce_phase <= 3'b000;\n"
1373
        "\t\telse if (ce_phase != 3'b011)\n"
1374
                "\t\t\tce_phase <= ce_phase + 1'b1;\n"
1375
"\n"
1376
        "\t\talways @(*)\n"
1377
                "\t\t\tmpy_pipe_v = (i_ce)||(ce_phase < 3'b010);\n"
1378
"\n");
1379
 
1380
        fprintf(fp,
1381
        "\t\talways @(posedge i_clk)\n"
1382
                "\t\t\tif (ce_phase == 3\'b000)\n"
1383
                "\t\t\tbegin\n"
1384
                        "\t\t\t\t// Second clock\n"
1385
                        "\t\t\t\tmpy_pipe_c[3*(CWIDTH+1)-1:(CWIDTH+1)] <= {\n"
1386
                        "\t\t\t\t\tir_coef_r[CWIDTH-1], ir_coef_r,\n"
1387
                        "\t\t\t\t\tir_coef_i[CWIDTH-1], ir_coef_i };\n"
1388
                        "\t\t\t\tmpy_pipe_c[CWIDTH:0] <= ir_coef_i + ir_coef_r;\n"
1389
                        "\t\t\t\tmpy_pipe_d[3*(IWIDTH+2)-1:(IWIDTH+2)] <= {\n"
1390
                        "\t\t\t\t\tr_dif_r[IWIDTH], r_dif_r,\n"
1391
                        "\t\t\t\t\tr_dif_i[IWIDTH], r_dif_i };\n"
1392
                        "\t\t\t\tmpy_pipe_d[(IWIDTH+2)-1:0] <= r_dif_r + r_dif_i;\n"
1393
"\n"
1394
                "\t\t\tend else if (mpy_pipe_v)\n"
1395
                "\t\t\tbegin\n"
1396
                        "\t\t\t\tmpy_pipe_c[3*(CWIDTH+1)-1:0] <= {\n"
1397
                        "\t\t\t\t\tmpy_pipe_c[2*(CWIDTH+1)-1:0], {(CWIDTH+1){1\'b0}} };\n"
1398
                        "\t\t\t\tmpy_pipe_d[3*(IWIDTH+2)-1:0] <= {\n"
1399
                        "\t\t\t\t\tmpy_pipe_d[2*(IWIDTH+2)-1:0], {(IWIDTH+2){1\'b0}} };\n"
1400
                "\t\t\tend\n\n");
1401
 
1402
        if (formal_property_flag)
1403
                fprintf(fp, "`ifndef\tFORMAL\n");
1404
 
1405
        fprintf(fp,
1406
        "\t\talways @(posedge i_clk)\n"
1407
        "\t\t\tif (mpy_pipe_v)\n"
1408
                        "\t\t\t\tmpy_pipe_out <= mpy_pipe_vc * mpy_pipe_vd;\n"
1409
"\n");
1410
 
1411
        if (formal_property_flag)
1412
                fprintf(fp,
1413
"`else\t// FORMAL\n"
1414
                "\t\twire       signed  [  (CWIDTH+IWIDTH+3)-1:0] pre_mpy_pipe_out;\n"
1415
"\n"
1416
                "\t\tabs_mpy #(CWIDTH+1,IWIDTH+2,1)\n"
1417
                "\t\t   mpy_pipe_outi(mpy_pipe_vc, mpy_pipe_vd, pre_mpy_pipe_out);\n"
1418
                "\t\talways @(posedge i_clk)\n"
1419
                "\t\t   if (mpy_pipe_v)\n"
1420
                "\t\t           mpy_pipe_out <= pre_mpy_pipe_out;\n"
1421
"`endif\t// FORMAL\n\n");
1422
 
1423
 
1424
        fprintf(fp,
1425
        "\t\treg\tsigned\t[((IWIDTH+1)+(CWIDTH)-1):0]\trp_one, rp_two,\n"
1426
                                        "\t\t\t\t\t\trp2_one, rp2_two;\n"
1427
        "\t\treg\tsigned\t[((IWIDTH+2)+(CWIDTH+1)-1):0]\trp_three, rp2_three;\n"
1428
 
1429
"\n");
1430
 
1431
        fprintf(fp,
1432
        "\t\talways @(posedge i_clk)\n"
1433
        "\t\tif(i_ce)\n"
1434
                "\t\t\trp_one <= mpy_pipe_out[(CWIDTH+IWIDTH):0];\n"
1435
        "\t\talways @(posedge i_clk)\n"
1436
        "\t\tif(ce_phase == 3'b000)\n"
1437
                "\t\t\trp_two <= mpy_pipe_out[(CWIDTH+IWIDTH):0];\n"
1438
        "\t\talways @(posedge i_clk)\n"
1439
        "\t\tif(ce_phase == 3'b001)\n"
1440
                "\t\t\trp_three <= mpy_pipe_out;\n"
1441
        "\t\talways @(posedge i_clk)\n"
1442
        "\t\tif (i_ce)\n"
1443
        "\t\tbegin\n"
1444
                "\t\t\trp2_one<= rp_one;\n"
1445
                "\t\t\trp2_two<= rp_two;\n"
1446
                "\t\t\trp2_three<= rp_three;\n"
1447
        "\t\tend\n");
1448
        fprintf(fp,
1449
        "\t\tassign     p_one\t= rp2_one;\n"
1450
        "\t\tassign     p_two\t= rp2_two;\n"
1451
        "\t\tassign\tp_three\t= rp2_three;\n"
1452
"\n");
1453
 
1454
        fprintf(fp,
1455
"\tend endgenerate\n");
1456
 
1457
        fprintf(fp,
1458
        "\twire\tsigned [((IWIDTH+2)+(CWIDTH+1)-1):0]   w_one, w_two;\n"
1459
        "\tassign\tw_one = { {(2){p_one[((IWIDTH+1)+(CWIDTH)-1)]}}, p_one };\n"
1460
        "\tassign\tw_two = { {(2){p_two[((IWIDTH+1)+(CWIDTH)-1)]}}, p_two };\n"
1461
"\n");
1462
 
1463
        fprintf(fp,
1464
        "\t// These values are held in memory and delayed during the\n"
1465
        "\t// multiply.  Here, we recover them.  During the multiply,\n"
1466
        "\t// values were multiplied by 2^(CWIDTH-2)*exp{-j*2*pi*...},\n"
1467
        "\t// therefore, the left_x values need to be right shifted by\n"
1468
        "\t// CWIDTH-2 as well.  The additional bits come from a sign\n"
1469
        "\t// extension.\n"
1470
        "\twire\taux_s;\n"
1471
        "\twire\tsigned\t[(IWIDTH+CWIDTH):0]    left_si, left_sr;\n"
1472
        "\treg\t\t[(2*IWIDTH+2):0]      left_saved;\n"
1473
        "\tassign\tleft_sr = { {2{left_saved[2*(IWIDTH+1)-1]}}, left_saved[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1\'b0}} };\n"
1474
        "\tassign\tleft_si = { {2{left_saved[(IWIDTH+1)-1]}}, left_saved[((IWIDTH+1)-1):0], {(CWIDTH-2){1\'b0}} };\n"
1475
        "\tassign\taux_s = left_saved[2*IWIDTH+2];\n"
1476
"\n"
1477
        "\t(* use_dsp48=\"no\" *)\n"
1478
        "\treg  signed  [(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i;\n"
1479
"\n");
1480
 
1481
        fprintf(fp,
1482
        "\tinitial left_saved = 0;\n"
1483
        "\tinitial o_aux      = 1\'b0;\n");
1484
        if (async_reset)
1485
                fprintf(fp, "\talways @(posedge i_clk, negedge i_areset_n)\n\t\tif (!i_areset_n)\n");
1486
        else
1487
                fprintf(fp, "\talways @(posedge i_clk)\n\t\tif (i_reset)\n");
1488
        fprintf(fp,
1489
        "\t\tbegin\n"
1490
                "\t\t\tleft_saved <= 0;\n"
1491
                "\t\t\to_aux <= 1\'b0;\n"
1492
        "\t\tend else if (i_ce)\n"
1493
        "\t\tbegin\n"
1494
                "\t\t\t// First clock, recover all values\n"
1495
                "\t\t\tleft_saved <= leftvv;\n"
1496
"\n"
1497
                "\t\t\t// Second clock, round and latch for final clock\n"
1498
                "\t\t\to_aux <= aux_s;\n"
1499
        "\t\tend\n"
1500
        "\talways @(posedge i_clk)\n"
1501
        "\t\tif (i_ce)\n"
1502
        "\t\tbegin\n"
1503
                "\t\t\t// These values are IWIDTH+CWIDTH+3 bits wide\n"
1504
                "\t\t\t// although they only need to be (IWIDTH+1)\n"
1505
                "\t\t\t// + (CWIDTH) bits wide.  (We've got two\n"
1506
                "\t\t\t// extra bits we need to get rid of.)\n"
1507
                "\n"
1508
                "\t\t\t// These two lines also infer DSP48\'s.\n"
1509
                "\t\t\t// To keep from using extra DSP48 resources,\n"
1510
                "\t\t\t// they are prevented from using DSP48\'s\n"
1511
                "\t\t\t// by the (* use_dsp48 ... *) comment above.\n"
1512
                "\t\t\tmpy_r <= w_one - w_two;\n"
1513
                "\t\t\tmpy_i <= p_three - w_one - w_two;\n"
1514
        "\t\tend\n"
1515
        "\n");
1516
 
1517
        fprintf(fp,
1518
        "\t// Round the results\n"
1519
        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_left_r, rnd_left_i, rnd_right_r, rnd_right_i;\n\n");
1520
        fprintf(fp,
1521
        "\t%s #(CWIDTH+IWIDTH+1,OWIDTH,SHIFT+2) do_rnd_left_r(i_clk, i_ce,\n"
1522
        "\t\t\t\tleft_sr, rnd_left_r);\n\n",
1523
                rnd_string);
1524
        fprintf(fp,
1525
        "\t%s #(CWIDTH+IWIDTH+1,OWIDTH,SHIFT+2) do_rnd_left_i(i_clk, i_ce,\n"
1526
        "\t\t\t\tleft_si, rnd_left_i);\n\n",
1527
                rnd_string);
1528
        fprintf(fp,
1529
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_right_r(i_clk, i_ce,\n"
1530
        "\t\t\t\tmpy_r, rnd_right_r);\n\n", rnd_string);
1531
        fprintf(fp,
1532
        "\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_right_i(i_clk, i_ce,\n"
1533
        "\t\t\t\tmpy_i, rnd_right_i);\n\n", rnd_string);
1534
 
1535
 
1536
        fprintf(fp,
1537
        "\t// As a final step, we pack our outputs into two packed two's\n"
1538
        "\t// complement numbers per output word, so that each output word\n"
1539
        "\t// has (2*OWIDTH) bits in it, with the top half being the real\n"
1540
        "\t// portion and the bottom half being the imaginary portion.\n"
1541
        "\tassign\to_left = { rnd_left_r, rnd_left_i };\n"
1542
        "\tassign\to_right= { rnd_right_r,rnd_right_i};\n"
1543
"\n");
1544
 
1545
        if (formal_property_flag) {
1546
                fprintf(fp,
1547
"`ifdef VERILATOR\n"
1548
"`define        FORMAL\n"
1549
"`endif\n"
1550
"`ifdef FORMAL\n"
1551
        "\tlocalparam   F_LGDEPTH = 3;\n"
1552
        "\tlocalparam   F_DEPTH = 5;\n"
1553
        "\tlocalparam   [F_LGDEPTH-1:0] F_D = F_DEPTH-1;\n"
1554
"\n"
1555
        "\treg  signed  [IWIDTH-1:0]    f_dlyleft_r  [0:F_DEPTH-1];\n"
1556
        "\treg  signed  [IWIDTH-1:0]    f_dlyleft_i  [0:F_DEPTH-1];\n"
1557
        "\treg  signed  [IWIDTH-1:0]    f_dlyright_r [0:F_DEPTH-1];\n"
1558
        "\treg  signed  [IWIDTH-1:0]    f_dlyright_i [0:F_DEPTH-1];\n"
1559
        "\treg  signed  [CWIDTH-1:0]    f_dlycoeff_r [0:F_DEPTH-1];\n"
1560
        "\treg  signed  [CWIDTH-1:0]    f_dlycoeff_i [0:F_DEPTH-1];\n"
1561
        "\treg  signed  [F_DEPTH-1:0]   f_dlyaux;\n"
1562
"\n"
1563
        "\talways @(posedge i_clk)\n"
1564
        "\tif (i_reset)\n"
1565
                "\t\tf_dlyaux <= 0;\n"
1566
        "\telse if (i_ce)\n"
1567
                "\t\tf_dlyaux <= { f_dlyaux[F_DEPTH-2:0], i_aux };\n"
1568
"\n"
1569
        "\talways @(posedge i_clk)\n"
1570
        "\tif (i_ce)\n"
1571
        "\tbegin\n"
1572
                "\t\tf_dlyleft_r[0]   <= i_left[ (2*IWIDTH-1):IWIDTH];\n"
1573
                "\t\tf_dlyleft_i[0]   <= i_left[ (  IWIDTH-1):0];\n"
1574
                "\t\tf_dlyright_r[0]  <= i_right[(2*IWIDTH-1):IWIDTH];\n"
1575
                "\t\tf_dlyright_i[0]  <= i_right[(  IWIDTH-1):0];\n"
1576
                "\t\tf_dlycoeff_r[0]  <= i_coef[ (2*CWIDTH-1):CWIDTH];\n"
1577
                "\t\tf_dlycoeff_i[0]  <= i_coef[ (  CWIDTH-1):0];\n"
1578
        "\tend\n"
1579
"\n"
1580
        "\tgenvar       k;\n"
1581
        "\tgenerate for(k=1; k<F_DEPTH; k=k+1)\n"
1582
"\n"
1583
                "\t\talways @(posedge i_clk)\n"
1584
                "\t\tif (i_ce)\n"
1585
                "\t\tbegin\n"
1586
                        "\t\t\tf_dlyleft_r[k]  <= f_dlyleft_r[ k-1];\n"
1587
                        "\t\t\tf_dlyleft_i[k]  <= f_dlyleft_i[ k-1];\n"
1588
                        "\t\t\tf_dlyright_r[k] <= f_dlyright_r[k-1];\n"
1589
                        "\t\t\tf_dlyright_i[k] <= f_dlyright_i[k-1];\n"
1590
                        "\t\t\tf_dlycoeff_r[k] <= f_dlycoeff_r[k-1];\n"
1591
                        "\t\t\tf_dlycoeff_i[k] <= f_dlycoeff_i[k-1];\n"
1592
                "\t\tend\n"
1593
"\n"
1594
        "\tendgenerate\n"
1595
"\n"
1596
"`ifdef VERILATOR"
1597
/*
1598
        "\tgenerate if (CKPCE <= 1)\n"
1599
        "\tbegin\n"
1600
"\n"
1601
        "\t\t// i_ce is allowed to be anything in this mode\n"
1602
"\n"
1603
        "\tend else if (CKPCE == 2)\n"
1604
        "\tbegin : F_CKPCE_TWO\n"
1605
"\n"
1606
                "\t\tassert property (@(posedge i_clk)\n"
1607
                "\t\t   i_ce |=> !i_ce);\n"
1608
        "\n"
1609
        "\tend else if (CKPCE == 3)\n"
1610
        "\tbegin : F_CKPCE_THREE\n"
1611
"\n"
1612
                "\t\tassert property (@(posedge i_clk)\n"
1613
                "\t\t   i_ce |=> !i_ce ##1 !i_ce);\n"
1614
"\n"
1615
        "\tend endgenerate\n"
1616
*/
1617
"\n"
1618
"`else\n"
1619
        "\talways @(posedge i_clk)\n"
1620
        "\tif ((!$past(i_ce))&&(!$past(i_ce,2))&&(!$past(i_ce,3))\n"
1621
                        "\t\t\t&&(!$past(i_ce,4)))\n"
1622
                "\t\tassume(i_ce);\n"
1623
"\n"
1624
        "\tgenerate if (CKPCE <= 1)\n"
1625
        "\tbegin\n"
1626
"\n"
1627
        "\t\t// i_ce is allowed to be anything in this mode\n"
1628
"\n"
1629
        "\tend else if (CKPCE == 2)\n"
1630
        "\tbegin : F_CKPCE_TWO\n"
1631
"\n"
1632
                "\t\talways @(posedge i_clk)\n"
1633
                "\t\t   if ($past(i_ce))\n"
1634
                "\t\t           assume(!i_ce);\n"
1635
        "\n"
1636
        "\tend else if (CKPCE == 3)\n"
1637
        "\tbegin : F_CKPCE_THREE\n"
1638
"\n"
1639
                "\t\talways @(posedge i_clk)\n"
1640
                "\t\t   if (($past(i_ce))||($past(i_ce,2)))\n"
1641
                "\t\t           assume(!i_ce);\n"
1642
"\n"
1643
        "\tend endgenerate\n"
1644
"`endif"
1645
"\n"
1646
        "\treg  [F_LGDEPTH-1:0] f_startup_counter;\n"
1647
        "\tinitial      f_startup_counter = 0;\n"
1648
        "\talways @(posedge i_clk)\n"
1649
        "\tif (i_reset)\n"
1650
                "\t\tf_startup_counter <= 0;\n"
1651
        "\telse if ((i_ce)&&(!(&f_startup_counter)))\n"
1652
                "\t\tf_startup_counter <= f_startup_counter + 1;\n"
1653
"\n"
1654
        "\twire signed  [IWIDTH:0]      f_sumr, f_sumi;\n"
1655
        "\talways @(*)\n"
1656
        "\tbegin\n"
1657
                "\t\tf_sumr = f_dlyleft_r[F_D] + f_dlyright_r[F_D];\n"
1658
                "\t\tf_sumi = f_dlyleft_i[F_D] + f_dlyright_i[F_D];\n"
1659
        "\tend\n"
1660
"\n"
1661
        "\twire signed  [IWIDTH+CWIDTH:0]       f_sumrx, f_sumix;\n"
1662
        "\tassign       f_sumrx = { {(2){f_sumr[IWIDTH]}}, f_sumr, {(CWIDTH-2){1'b0}} };\n"
1663
        "\tassign       f_sumix = { {(2){f_sumi[IWIDTH]}}, f_sumi, {(CWIDTH-2){1'b0}} };\n"
1664
        "\n"
1665
        "\twire signed  [IWIDTH:0]      f_difr, f_difi;\n"
1666
        "\talways @(*)\n"
1667
        "\tbegin\n"
1668
                "\t\tf_difr = f_dlyleft_r[F_D] - f_dlyright_r[F_D];\n"
1669
                "\t\tf_difi = f_dlyleft_i[F_D] - f_dlyright_i[F_D];\n"
1670
        "\tend\n"
1671
"\n"
1672
        "\twire signed  [IWIDTH+CWIDTH+3-1:0]   f_difrx, f_difix;\n"
1673
        "\tassign       f_difrx = { {(CWIDTH+2){f_difr[IWIDTH]}}, f_difr };\n"
1674
        "\tassign       f_difix = { {(CWIDTH+2){f_difi[IWIDTH]}}, f_difi };\n"
1675
"\n"
1676
        "\twire signed  [IWIDTH+CWIDTH+3-1:0]   f_widecoeff_r, f_widecoeff_i;\n"
1677
        "\tassign       f_widecoeff_r = {{(IWIDTH+3){f_dlycoeff_r[F_D][CWIDTH-1]}},\n"
1678
        "\t             f_dlycoeff_r[F_D] };\n"
1679
        "\tassign       f_widecoeff_i = {{(IWIDTH+3){f_dlycoeff_i[F_D][CWIDTH-1]}},\n"
1680
        "\t             f_dlycoeff_i[F_D] };\n"
1681
"\n"
1682
        "\talways @(posedge i_clk)\n"
1683
        "\tif (f_startup_counter > F_D)\n"
1684
        "\tbegin\n"
1685
                "\t\tassert(left_sr == f_sumrx);\n"
1686
                "\t\tassert(left_si == f_sumix);\n"
1687
                "\t\tassert(aux_s == f_dlyaux[F_D]);\n"
1688
"\n"
1689
                "\t\tif ((f_difr == 0)&&(f_difi == 0))\n"
1690
                "\t\tbegin\n"
1691
                "\t\t   assert(mpy_r == 0);\n"
1692
                "\t\t   assert(mpy_i == 0);\n"
1693
                "\t\tend else if ((f_dlycoeff_r[F_D] == 0)\n"
1694
                "\t\t           &&(f_dlycoeff_i[F_D] == 0))\n"
1695
                "\t\tbegin\n"
1696
                "\t             assert(mpy_r == 0);\n"
1697
                "\t\t   assert(mpy_i == 0);\n"
1698
                "\t\tend\n"
1699
"\n"
1700
                "\t\tif ((f_dlycoeff_r[F_D] == 1)&&(f_dlycoeff_i[F_D] == 0))\n"
1701
                "\t\tbegin\n"
1702
                "\t\t   assert(mpy_r == f_difrx);\n"
1703
                "\t\t   assert(mpy_i == f_difix);\n"
1704
                "\t\tend\n"
1705
"\n"
1706
                "\t\tif ((f_dlycoeff_r[F_D] == 0)&&(f_dlycoeff_i[F_D] == 1))\n"
1707
                "\t\tbegin\n"
1708
                "\t\t   assert(mpy_r == -f_difix);\n"
1709
                "\t\t   assert(mpy_i ==  f_difrx);\n"
1710
                "\t\tend\n"
1711
"\n"
1712
                "\t\tif ((f_difr == 1)&&(f_difi == 0))\n"
1713
                "\t\tbegin\n"
1714
                "\t\t   assert(mpy_r == f_widecoeff_r);\n"
1715
                "\t\t   assert(mpy_i == f_widecoeff_i);\n"
1716
                "\t\tend\n"
1717
"\n"
1718
                "\t\tif ((f_difr == 0)&&(f_difi == 1))\n"
1719
                "\t\tbegin\n"
1720
                "\t\t   assert(mpy_r == -f_widecoeff_i);\n"
1721
                "\t\t   assert(mpy_i ==  f_widecoeff_r);\n"
1722
                "\t\tend\n"
1723
        "\tend\n"
1724
"\n");
1725
 
1726
                fprintf(fp,
1727
        "\t// Let's see if we can improve our performance at all by\n"
1728
        "\t// moving our test one clock earlier.  If nothing else, it should\n"
1729
        "\t// help induction finish one (or more) clocks ealier than\n"
1730
        "\t// otherwise\n"
1731
"\n\n"
1732
        "\twire signed  [IWIDTH:0]      f_predifr, f_predifi;\n"
1733
        "\talways @(*)\n"
1734
        "\tbegin\n"
1735
                "\t\tf_predifr = f_dlyleft_r[F_D-1] - f_dlyright_r[F_D-1];\n"
1736
                "\t\tf_predifi = f_dlyleft_i[F_D-1] - f_dlyright_i[F_D-1];\n"
1737
        "\tend\n"
1738
"\n"
1739
        "\twire signed  [IWIDTH+CWIDTH+1-1:0]   f_predifrx, f_predifix;\n"
1740
        "\tassign       f_predifrx = { {(CWIDTH){f_predifr[IWIDTH]}}, f_predifr };\n"
1741
        "\tassign       f_predifix = { {(CWIDTH){f_predifi[IWIDTH]}}, f_predifi };\n"
1742
"\n"
1743
        "\twire signed  [CWIDTH:0]      f_sumcoef;\n"
1744
        "\twire signed  [IWIDTH+1:0]    f_sumdiff;\n"
1745
        "\talways @(*)\n"
1746
        "\tbegin\n"
1747
                "\t\tf_sumcoef = f_dlycoeff_r[F_D-1] + f_dlycoeff_i[F_D-1];\n"
1748
                "\t\tf_sumdiff = f_predifr + f_predifi;\n"
1749
        "\tend\n"
1750
"\n"
1751
        "\t// Induction helpers\n"
1752
        "\talways @(posedge i_clk)\n"
1753
        "\tif (f_startup_counter >= F_D)\n"
1754
        "\tbegin\n"
1755
                "\t\tif (f_dlycoeff_r[F_D-1] == 0)\n"
1756
                        "\t\t\tassert(p_one == 0);\n"
1757
                "\t\tif (f_dlycoeff_i[F_D-1] == 0)\n"
1758
                        "\t\t\tassert(p_two == 0);\n"
1759
"\n"
1760
                "\t\tif (f_dlycoeff_r[F_D-1] == 1)\n"
1761
                        "\t\t\tassert(p_one == f_predifrx);\n"
1762
                "\t\tif (f_dlycoeff_i[F_D-1] == 1)\n"
1763
                        "\t\t\tassert(p_two == f_predifix);\n"
1764
"\n"
1765
                "\t\tif (f_predifr == 0)\n"
1766
                        "\t\t\tassert(p_one == 0);\n"
1767
                "\t\tif (f_predifi == 0)\n"
1768
                        "\t\t\tassert(p_two == 0);\n"
1769
"\n"
1770
                "\t\t// verilator lint_off WIDTH\n"
1771
                "\t\tif (f_predifr == 1)\n"
1772
                        "\t\t\tassert(p_one == f_dlycoeff_r[F_D-1]);\n"
1773
                "\t\tif (f_predifi == 1)\n"
1774
                        "\t\t\tassert(p_two == f_dlycoeff_i[F_D-1]);\n"
1775
                "\t\t// verilator lint_on  WIDTH\n"
1776
"\n"
1777
                "\t\tif (f_sumcoef == 0)\n"
1778
                        "\t\t\tassert(p_three == 0);\n"
1779
                "\t\tif (f_sumdiff == 0)\n"
1780
                        "\t\t\tassert(p_three == 0);\n"
1781
                "\t\t// verilator lint_off WIDTH\n"
1782
                "\t\tif (f_sumcoef == 1)\n"
1783
                        "\t\t\tassert(p_three == f_sumdiff);\n"
1784
                "\t\tif (f_sumdiff == 1)\n"
1785
                        "\t\t\tassert(p_three == f_sumcoef);\n"
1786
                "\t\t// verilator lint_on  WIDTH\n"
1787
"`ifdef VERILATOR\n"
1788
                "\t\tassert(p_one   == f_predifr * f_dlycoeff_r[F_D-1]);\n"
1789
                "\t\tassert(p_two   == f_predifi * f_dlycoeff_i[F_D-1]);\n"
1790
                "\t\tassert(p_three == f_sumdiff * f_sumcoef);\n"
1791
"`endif // VERILATOR\n"
1792
        "\tend\n\n"
1793
"`endif // FORMAL\n");
1794
        }
1795
 
1796
        fprintf(fp,
1797
"endmodule\n");
1798
 
1799
        fclose(fp);
1800
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.