OpenCores
URL https://opencores.org/ocsvn/dblclockfft/dblclockfft/trunk

Subversion Repositories dblclockfft

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /dblclockfft/trunk
    from Rev 25 to Rev 26
    Reverse comparison

Rev 25 → Rev 26

/bench/cpp/fft_tb.m
8,8 → 8,12
% Reshape the matrix into one line per FFT
% Assume an FFT length of 2048
ftlen = 2048;
ndat = reshape(datc, ftlen, length(datc)/ftlen);
% ftlen = 128;
ndat = reshape(datc, ftlen*2, length(datc)/(ftlen*2));
 
truth = ndat((ftlen+1):(2*ftlen), :);
output = ndat(1:ftlen,:);
 
% Create a time axis, for use in plotting if desired
tm = 0:(ftlen-1);
 
/bench/cpp/hwbfly_tb.cpp
109,6 → 109,7
m_bfly->i_right = rht;
m_bfly->i_aux = aux & 1;
 
m_bfly->i_ce = 1;
tick();
 
if ((m_bfly->o_aux)&&(!m_lastaux))
/bench/cpp/dblrev_tb.cpp
42,8 → 42,12
#include "Vdblreverse.h"
#include "verilated.h"
 
#define FFTBITS 4
#define FFTMASK ((1<<(FFTBITS))-1)
#define FFTBITS 5
#define FFTSIZE (1<<(FFTBITS))
#define FFTMASK (FFTSIZE-1)
#define DATALEN (1<<(FFTBITS+1))
#define DATAMSK (DATALEN-1)
#define PAGEMSK (FFTSIZE)
 
void tick(Vdblreverse *dblrev) {
dblrev->i_clk = 0;
50,6 → 54,8
dblrev->eval();
dblrev->i_clk = 1;
dblrev->eval();
 
dblrev->i_ce = 0;
}
 
void reset(Vdblreverse *dblrev) {
61,8 → 67,9
tick(dblrev);
}
 
unsigned long bitrev(int nbits, unsigned long val) {
int r = 0;
unsigned long bitrev(const int nbits, const unsigned long vl) {
unsigned long r = 0;
unsigned long val = vl;
 
for(int k=0; k<nbits; k++) {
r <<= 1;
77,35 → 84,84
Verilated::commandArgs(argc, argv);
Vdblreverse *dblrev = new Vdblreverse;
int syncd = 0;
unsigned long datastore[DATALEN], dataidx=0;
 
reset(dblrev);
 
for(int k=0; k<64; k++) {
printf("FFTSIZE = %08x\n", FFTSIZE);
printf("FFTMASK = %08x\n", FFTMASK);
printf("DATALEN = %08x\n", DATALEN);
printf("DATAMSK = %08x\n", DATAMSK);
 
for(int k=0; k<4*(FFTSIZE); k++) {
dblrev->i_ce = 1;
dblrev->i_in_0 = 2*k;
dblrev->i_in_1 = 2*k+1;
datastore[(dataidx++)&(DATAMSK)] = dblrev->i_in_0;
datastore[(dataidx++)&(DATAMSK)] = dblrev->i_in_1;
tick(dblrev);
 
printf("k=%3d: IN = %6lx : %6lx, OUT = %6lx : %6lx, SYNC = %d\n",
printf("k=%3d: IN = %6lx : %6lx, OUT = %6lx : %6lx, SYNC = %d\t(%x)\n",
k, dblrev->i_in_0, dblrev->i_in_1,
dblrev->o_out_0, dblrev->o_out_1, dblrev->o_sync);
dblrev->o_out_0, dblrev->o_out_1, dblrev->o_sync,
dblrev->v__DOT__iaddr);
 
if ((k>0)&&(((0==(k&(FFTMASK>>1)))?1:0) != dblrev->o_sync)) {
fprintf(stderr, "FAIL, BAD SYNC\n");
fprintf(stdout, "FAIL, BAD SYNC\n");
exit(-1);
} else if (dblrev->o_sync)
} else if (dblrev->o_sync) {
syncd = 1;
}
if ((syncd)&&((dblrev->o_out_0&FFTMASK) != bitrev(FFTBITS, 2*k))) {
fprintf(stderr, "FAIL: BITREV of k (%2x) = %2lx, not %2lx\n",
fprintf(stdout, "FAIL: BITREV.0 of k (%2x) = %2lx, not %2lx\n",
k, dblrev->o_out_0, bitrev(FFTBITS, 2*k));
exit(-1);
// exit(-1);
}
 
if ((syncd)&&((dblrev->o_out_1&FFTMASK) != bitrev(FFTBITS, 2*k+1))) {
fprintf(stderr, "FAIL: BITREV of k (%2x) = %2lx, not %2lx\n",
fprintf(stdout, "FAIL: BITREV.1 of k (%2x) = %2lx, not %2lx\n",
k, dblrev->o_out_1, bitrev(FFTBITS, 2*k+1));
// exit(-1);
}
}
 
for(int k=0; k<4*(FFTSIZE); k++) {
dblrev->i_ce = 1;
dblrev->i_in_0 = rand() & 0x0ffffff;
dblrev->i_in_1 = rand() & 0x0ffffff;
datastore[(dataidx++)&(DATAMSK)] = dblrev->i_in_0;
datastore[(dataidx++)&(DATAMSK)] = dblrev->i_in_1;
tick(dblrev);
 
printf("k=%3d: IN = %6lx : %6lx, OUT = %6lx : %6lx, SYNC = %d\n",
k, dblrev->i_in_0, dblrev->i_in_1,
dblrev->o_out_0, dblrev->o_out_1, dblrev->o_sync);
 
if ((k>0)&&(((0==(k&(FFTMASK>>1)))?1:0) != dblrev->o_sync)) {
fprintf(stdout, "FAIL, BAD SYNC\n");
exit(-1);
} else if (dblrev->o_sync)
syncd = 1;
if ((syncd)&&(dblrev->o_out_0 != datastore[(((dataidx-2-FFTSIZE)&PAGEMSK) + bitrev(FFTBITS, (dataidx-FFTSIZE-2)&FFTMASK))])) {
fprintf(stdout, "FAIL: BITREV.0 of k (%2x) = %2lx, not %2lx (expected %lx -> %lx)\n",
k, dblrev->o_out_0,
datastore[(((dataidx-2-FFTSIZE)&PAGEMSK)
+ bitrev(FFTBITS, (dataidx-FFTSIZE-2)&FFTMASK))],
(dataidx-2)&DATAMSK,
(((dataidx-2)&PAGEMSK)
+ bitrev(FFTBITS, (dataidx-FFTSIZE-2)&FFTMASK)));
// exit(-1);
}
 
if ((syncd)&&(dblrev->o_out_1 != datastore[(((dataidx-2-FFTSIZE)&PAGEMSK) + bitrev(FFTBITS, (dataidx-FFTSIZE-1)&FFTMASK))])) {
fprintf(stdout, "FAIL: BITREV.1 of k (%2x) = %2lx, not %2lx (expected %lx)\n",
k, dblrev->o_out_1,
datastore[(((dataidx-2-FFTSIZE)&PAGEMSK)
+ bitrev(FFTBITS, (dataidx-FFTSIZE-1)&FFTMASK))],
(((dataidx-1)&PAGEMSK)
+ bitrev(FFTBITS, (dataidx-FFTSIZE-1)&FFTMASK)));
// exit(-1);
}
}
 
delete dblrev;
/bench/cpp/fft_tb.cpp
49,16 → 49,30
 
#define LGWIDTH 11
#define IWIDTH 16
// #define OWIDTH 16
#define OWIDTH 22
 
#define NFTLOG 8
#define FFTLEN (1<<LGWIDTH)
 
unsigned long bitrev(const int nbits, const unsigned long vl) {
unsigned long r = 0;
unsigned long val = vl;
 
for(int k=0; k<nbits; k++) {
r<<= 1;
r |= (val & 1);
val >>= 1;
}
 
return r;
}
 
class FFT_TB {
public:
Vfftmain *m_fft;
long m_data[FFTLEN], m_log[NFTLOG*FFTLEN];
int m_iaddr, m_oaddr, m_ntest;
int m_iaddr, m_oaddr, m_ntest, m_logbase;
FILE *m_dumpfp;
fftw_plan m_plan;
double *m_fft_buf;
82,6 → 96,17
m_fft->eval();
m_fft->i_clk = 1;
m_fft->eval();
 
/*
int nrpt = (rand()&0x01f) + 1;
m_fft->i_ce = 0;
for(int i=0; i<nrpt; i++) {
m_fft->i_clk = 0;
m_fft->eval();
m_fft->i_clk = 1;
m_fft->eval();
}
*/
}
 
void reset(void) {
91,7 → 116,7
m_fft->i_rst = 0;
tick();
 
m_iaddr = m_oaddr = 0;
m_iaddr = m_oaddr = m_logbase = 0;
m_syncd = false;
}
 
106,8 → 131,9
long *lp;
 
// Fill up our test array from the log array
// printf("%3d : CHECK: %8d %5x\n", m_ntest, m_iaddr, m_iaddr);
dp = m_fft_buf; lp = &m_log[(m_iaddr-FFTLEN*3)&((NFTLOG*FFTLEN-1)&(-FFTLEN))];
printf("%3d : CHECK: %8d %5x m_log[-%x=%x]\n", m_ntest, m_iaddr, m_iaddr,
m_logbase, (m_iaddr-m_logbase)&((NFTLOG*FFTLEN-1)&(-FFTLEN)));
dp = m_fft_buf; lp = &m_log[(m_iaddr-m_logbase)&((NFTLOG*FFTLEN-1)&(-FFTLEN))];
for(int i=0; i<FFTLEN; i++) {
long tv = *lp++;
 
123,8 → 149,9
 
// Let's measure ... are we the zero vector? If not, how close?
dp = m_fft_buf;
for(int i=0; i<FFTLEN; i++)
isq += (*dp) * (*dp);
for(int i=0; i<FFTLEN*2; i++) {
isq += (*dp) * (*dp); dp++;
}
 
fftw_execute(m_plan);
 
131,17 → 158,10
// Let's load up the output we received into vout
dp = vout;
for(int i=0; i<FFTLEN; i++) {
long tv = m_data[i];
 
// printf("OUT[%4d = %4x] = ", i, i);
// printf("%12lx = ", tv);
*dp = sbits(tv >> OWIDTH, OWIDTH);
// printf("%10.1f + ", *dp);
*dp = rdata(i);
osq += (*dp) * (*dp); dp++;
*dp = sbits(tv, OWIDTH);
// printf("%10.1f j", *dp);
*dp = idata(i);
osq += (*dp) * (*dp); dp++;
// printf(" <-> %12.1f %12.1f\n", m_fft_buf[2*i], m_fft_buf[2*i+1]);
}
 
 
157,6 → 177,19
 
double xisq = 0.0;
sp = m_fft_buf; dp = vout;
 
if ((true)&&(m_dumpfp)) {
double tmp[FFTLEN*2], nscl;
 
if (fabs(scale) < 1e-4)
nscl = 1.0;
else
nscl = scale;
for(int i=0; i<FFTLEN*2; i++)
tmp[i] = m_fft_buf[i] * nscl;
fwrite(tmp, sizeof(double), FFTLEN*2, m_dumpfp);
}
 
for(int i=0; i<FFTLEN*2; i++) {
double vl = (*sp++) * scale - (*dp++);
xisq += vl * vl;
186,24 → 219,31
tick();
 
if (m_fft->o_sync) {
if (!m_syncd) {
m_logbase = m_iaddr;
} // else printf("RESYNC AT %lx\n", m_fft->m_tickcount);
m_oaddr &= (-1<<LGWIDTH);
m_syncd = true;
} else m_oaddr += 2;
 
printf("%8x,%5d: %08x,%08x -> %011lx,%011lx"
// "\t%011lx,%011lx"
"\t%011lx,%011lx"
printf("%8x,%5d: %08x,%08x -> %011lx,%011lx",
m_iaddr, m_oaddr,
lft, rht, m_fft->o_left, m_fft->o_right);
printf( // "\t%011lx,%011lx"
"\t%3x"
"\t%011lx,%011lx" // w_e128, w_o128
// "\t%011lx,%011lx" // w_e4, w_o4
// "\t%06x,%06x"
// "\t%06x,%06x"
// "\t%011lx,%06x,%06x"
"\t%011lx,%06x,%06x"
"\t%06x,%06x,%06x,%06x"
"\t%011lx,%011lx"
" %s%s%s%s%s%s%s%s%s%s%s %s%s\n",
m_iaddr, m_oaddr,
lft, rht, m_fft->o_left, m_fft->o_right,
m_fft->v__DOT__w_e4,
m_fft->v__DOT__w_o4,
"\t%011lx,%06x,%06x" // ob_a, ob_b_r, ob_b_i
"\t%06x,%06x,%06x,%06x", // o_out_xx
// "\t%011lx,%011lx"
m_fft->v__DOT__revstage__DOT__iaddr,
m_fft->v__DOT__w_e128,
m_fft->v__DOT__w_o128,
// m_fft->v__DOT__w_e4,
// m_fft->v__DOT__w_o4,
// m_fft->v__DOT__stage_e512__DOT__ib_a,
// m_fft->v__DOT__stage_e512__DOT__ib_b,
// m_fft->v__DOT__stage_e256__DOT__ib_a,
233,13 → 273,23
m_fft->v__DOT__stage_2__DOT__o_out_0r,
m_fft->v__DOT__stage_2__DOT__o_out_0i,
m_fft->v__DOT__stage_2__DOT__o_out_1r,
m_fft->v__DOT__stage_2__DOT__o_out_1i,
m_fft->v__DOT__br_o_left,
m_fft->v__DOT__br_o_right,
(m_fft->v__DOT__w_s2048)?"S":"-",
(m_fft->v__DOT__w_s1024)?"S":"-",
(m_fft->v__DOT__w_s512)?"S":"-",
(m_fft->v__DOT__w_s256)?"S":"-",
m_fft->v__DOT__stage_2__DOT__o_out_1i);
/*
printf(" DBG:%c%c:%08x [%6d,%6d]",
(m_fft->o_dbg&(1l<<33))?'T':' ',
(m_fft->o_dbg&(1l<<32))?'C':' ',
(unsigned)(m_fft->o_dbg&((-1l<<32)-1)),
((int)(m_fft->o_dbg))>>16,
(((unsigned)(m_fft->o_dbg&0x0ffff))
|((m_fft->o_dbg&0x08000)?(-1<<16):0)));
*/
printf(" %s%s%s%s%s%s%s %s%s\n",
// m_fft->v__DOT__br_o_left,
// m_fft->v__DOT__br_o_right,
// (m_fft->v__DOT__w_s2048)?"S":"-",
// (m_fft->v__DOT__w_s1024)?"S":"-",
// (m_fft->v__DOT__w_s512)?"S":"-",
// (m_fft->v__DOT__w_s256)?"S":"-",
(m_fft->v__DOT__w_s128)?"S":"-",
(m_fft->v__DOT__w_s64)?"S":"-",
(m_fft->v__DOT__w_s32)?"S":"-",
276,11 → 326,17
}
 
double rdata(int addr) {
return (double)sbits(m_data[addr&(FFTLEN-1)]>>OWIDTH, OWIDTH);
int index = addr & (FFTLEN-1);
 
// index = bitrev(LGWIDTH, index);
return (double)sbits(m_data[index]>>OWIDTH, OWIDTH);
}
 
double idata(int addr) {
return (double)sbits(m_data[addr&(FFTLEN-1)], OWIDTH);
int index = addr & (FFTLEN-1);
 
// index = bitrev(LGWIDTH, index);
return (double)sbits(m_data[index], OWIDTH);
}
 
void dump(FILE *fp) {
319,6 → 375,96
fft->reset();
fft->dump(fpout);
 
// 1.
fft->test(0.0, 0.0, 32767.0, 0.0);
for(int k=0; k<FFTLEN/2-1; k++)
fft->test(0.0,0.0,0.0,0.0);
 
// 2.
fft->test(32767.0, 0.0, 32767.0, 0.0);
for(int k=0; k<FFTLEN/2-1; k++)
fft->test(0.0,0.0,0.0,0.0);
 
// 3.
fft->test(0.0,0.0,0.0,0.0);
fft->test(32767.0, 0.0, 0.0, 0.0);
for(int k=0; k<FFTLEN/2-1; k++)
fft->test(0.0,0.0,0.0,0.0);
 
// 4.
for(int k=0; k<8; k++)
fft->test(32767.0, 0.0, 32767.0, 0.0);
for(int k=8; k<FFTLEN/2; k++)
fft->test(0.0,0.0,0.0,0.0);
 
// 5.
if (FFTLEN/2 >= 16) {
for(int k=0; k<16; k++)
fft->test(32767.0, 0.0, 32767.0, 0.0);
for(int k=16; k<FFTLEN/2; k++)
fft->test(0.0,0.0,0.0,0.0);
}
 
// 6.
if (FFTLEN/2 >= 32) {
for(int k=0; k<32; k++)
fft->test(32767.0, 0.0, 32767.0, 0.0);
for(int k=32; k<FFTLEN/2; k++)
fft->test(0.0,0.0,0.0,0.0);
}
 
// 7.
if (FFTLEN/2 >= 64) {
for(int k=0; k<64; k++)
fft->test(32767.0, 0.0, 32767.0, 0.0);
for(int k=64; k<FFTLEN/2; k++)
fft->test(0.0,0.0,0.0,0.0);
}
 
if (FFTLEN/2 >= 128) {
for(int k=0; k<128; k++)
fft->test(32767.0, 0.0, 32767.0, 0.0);
for(int k=128; k<FFTLEN/2; k++)
fft->test(0.0,0.0,0.0,0.0);
}
 
if (FFTLEN/2 >= 256) {
for(int k=0; k<256; k++)
fft->test(32767.0, 0.0, 32767.0, 0.0);
for(int k=256; k<FFTLEN/2; k++)
fft->test(0.0,0.0,0.0,0.0);
}
 
if (FFTLEN/2 >= 512) {
for(int k=0; k<256+128; k++)
fft->test(32767.0, 0.0, 32767.0, 0.0);
for(int k=256+128; k<FFTLEN/2; k++)
fft->test(0.0,0.0,0.0,0.0);
}
 
/*
for(int k=0; k<FFTLEN/2; k++)
fft->test(0.0,0.0,0.0,0.0);
 
for(int k=0; k<FFTLEN/2; k++)
fft->test(0.0,0.0,0.0,0.0);
 
for(int k=0; k<FFTLEN/2; k++)
fft->test(0.0,0.0,0.0,0.0);
 
for(int k=0; k<FFTLEN/2; k++)
fft->test(0.0,0.0,0.0,0.0);
 
for(int k=0; k<FFTLEN/2; k++)
fft->test(0.0,0.0,0.0,0.0);
 
for(int k=0; k<FFTLEN/2; k++)
fft->test(0.0,0.0,0.0,0.0);
*/
 
#ifndef NO_JUNK
// 7.
 
// 1 -> 0x0001
// 2 -> 0x0002
// 4 -> 0x0004
352,6 → 498,10
// 8192 -> 0xe000
// 16384 -> 0xc000
// 32768 -> 0x8000
fft->test(0.0,0.0,16384.0,0.0);
for(int k=0; k<FFTLEN/2-1; k++)
fft->test(0.0,0.0,0.0,0.0);
 
for(int v=1; v<=32768; v<<=1) for(int k=0; k<FFTLEN/2; k++)
fft->test(-(double)v,0.0,-(double)v,0.0);
// 1 -> 0x000040 CORRECT!!
431,6 → 581,16
for(int k=0; k<FFTLEN/2-1; k++)
fft->test(0.0,0.0,0.0,0.0);
 
// 72. And another one on the next clock (FAILS, ugly)
fft->test(0.0, 0.0, 8192.0, 0.0);
for(int k=0; k<FFTLEN/2-1; k++)
fft->test(0.0,0.0,0.0,0.0);
 
// 72. And another one on the next clock (FAILS, ugly)
fft->test(0.0, 0.0, 512.0, 0.0);
for(int k=0; k<FFTLEN/2-1; k++)
fft->test(0.0,0.0,0.0,0.0);
 
// 73. And an imaginary one on the second clock
fft->test(0.0, 0.0, 0.0, 16384.0);
for(int k=0; k<FFTLEN/2-1; k++)
504,7 → 664,7
sr = sin(W * (2*k+1)) * 4.0;
fft->test(cl, sl, cr, sr);
}
 
#endif
// 19.--24. And finally, let's clear out our results / buffer
for(int k=0; k<(FFTLEN/2) * 5; k++)
fft->test(0.0,0.0,0.0,0.0);
/bench/cpp/fftstage_o2048_tb.cpp
282,6 → 282,18
// Largest negative imaginary value
for(int k=1; k<FFTSIZE; k+=2)
ftstage->test((k==1), 0x000010000l);
// Let's try an impulse
for(int k=0; k<FFTSIZE; k+=2)
ftstage->test((k==0), (k==0)?0x020000000l:0l);
// Now, let's clear out the result
for(int k=0; k<FFTSIZE; k+=2)
ftstage->test((k==0), 0x000000000l);
for(int k=0; k<FFTSIZE; k+=2)
ftstage->test((k==0), 0x000000000l);
for(int k=0; k<FFTSIZE; k+=2)
ftstage->test((k==0), 0x000000000l);
for(int k=0; k<FFTSIZE; k+=2)
ftstage->test((k==0), 0x000000000l);
 
printf("SUCCESS! (Offset = %d)\n", ftstage->m_offset);
delete ftstage;
/bench/cpp/butterfly_tb.cpp
89,10 → 89,11
// we'll never get an aux=1 output.
//
m_bfly->i_rst = 1;
m_bfly->i_ce = 1;
m_bfly->i_aux = 1;
for(int i=0; i<200; i++)
for(int i=0; i<200; i++) {
m_bfly->i_ce = 1;
tick();
}
 
// Now here's the RESET line, so let's see what the test does
m_bfly->i_rst = 1;
117,6 → 118,7
m_addr = 0;
}
 
m_bfly->i_ce = 1;
tick();
 
if ((m_bfly->o_aux)&&(!m_lastaux))
/doc/spec.pdf Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream
/doc/src/gqtekspec.cls
27,6 → 27,7
\usepackage{datetime}
\usepackage{graphicx}
\usepackage[dvips]{pstricks}
\usepackage{hhline}
\usepackage{colortbl}
\newdateformat{headerdate}{\THEYEAR/\twodigit{\THEMONTH}/\twodigit{\THEDAY}}
\setlength{\hoffset}{0.25in}
208,9 → 209,9
\vskip 2\baselineskip
\vspace*{10pt}\vfil
\begin{minipage}{\textwidth}\raggedleft
\ifproject{\scalebox{1.2}{\Huge\bfseries\MakeUppercase\@project}} \\\fi
\ifproject{\Huge\bfseries\MakeUppercase\@project} \\\fi
\vspace*{15pt}
\scalebox{1.2}{\Huge\bfseries\MakeUppercase\@title} \\
{\Huge\bfseries\MakeUppercase\@title} \\
\vskip 10\baselineskip
\Large \@author \\
\ifemail{\Large \@email}\\\fi
234,13 → 235,13
\@afterindentfalse
\secdef\@chapter\@schapter}
\renewcommand\@makechapterhead[1]{%
\hbox to \textwidth{\hfil\scalebox{1.8}{\Huge\bfseries \thechapter.}}\vskip 10\p@
\hbox to \textwidth{\hfil{\Huge\bfseries \thechapter.}}\vskip 10\p@
\hbox to \textwidth{\rput(0,0){\psline[linewidth=0.04in](0,0)(\textwidth,0)}}\vskip \p@
\hbox to \textwidth{\rput(0,0){\psline[linewidth=0.04in](0,0)(\textwidth,0)}}\vskip 10\p@
\hbox to \textwidth{\hfill\scalebox{1.8}{\Huge\bfseries #1}}%
\hbox to \textwidth{\hfill{\Huge\bfseries #1}}%
\par\nobreak\vskip 40\p@}
\renewcommand\@makeschapterhead[1]{%
\hbox to \textwidth{\hfill\scalebox{1.8}{\Huge\bfseries #1}}%
\hbox to \textwidth{\hfill{\Huge\bfseries #1}}%
\par\nobreak\vskip 40\p@}
% ****************************************
% * INITIALIZATION *
275,8 → 276,21
\begin{tabular}{|p{0.5in}|p{1in}|p{1in}|p{2.875in}|}\hline
\rowcolor[gray]{0.8} Rev. & Date & Author & Description\\\hline\hline}
{\end{tabular}\clearpage}
\newenvironment{clocklist}{\begin{tabular}{|p{0.75in}|p{0.5in}|l|l|p{2.875in}|}\hline
\rowcolor[gray]{0.85} Name & Source & \multicolumn{2}{l|}{Rates (MHz)} & Description \\\hhline{~|~|-|-|~}%
\rowcolor[gray]{0.85} & & Max & Min & \\\hline\hline}%
{\end{tabular}}
\newenvironment{reglist}{\begin{tabular}{|p{0.75in}|p{0.5in}|p{0.5in}|p{0.5in}|p{2.875in}|}\hline
\rowcolor[gray]{0.85} Name & Address & Width & Access & Description \\\hline\hline}%
{\end{tabular}}
\newenvironment{bitlist}{\begin{tabular}{|p{0.5in}|p{0.5in}|p{3.875in}|}\hline
\rowcolor[gray]{0.85} Bit \# & Access & Description \\\hline\hline}%
{\end{tabular}}
\newenvironment{portlist}{\begin{tabular}{|p{0.75in}|p{0.5in}|p{0.75in}|p{3.375in}|}\hline
\rowcolor[gray]{0.85} Port & Width & Direction & Description \\\hline\hline}%
{\end{tabular}}
\newenvironment{wishboneds}{\begin{tabular}{|p{2.5in}|p{2.5in}|}\hline
\rowcolor[gray]{0.85} Description & Specification \\\hline\hline}%
{\end{tabular}}
\newenvironment{preface}{\chapter*{Preface}}{\par\bigskip\bigskip\leftline{\hfill\@author}}
\endinput
/doc/src/spec.tex
3,7 → 3,7
\title{Specification}
\author{Dan Gisselquist, Ph.D.}
\email{dgisselq\at opencores.org}
\revision{Rev.~0.1}
\revision{Rev.~0.2}
\begin{document}
\pagestyle{gqtekspecplain}
\titlepage
24,6 → 24,7
with this program. If not, see \hbox{<http://www.gnu.org/licenses/>} for a copy.
\end{license}
\begin{revisionhistory}
0.2 & 6/2/2015 & Gisselquist & Superficial formatting changes\\\hline
0.1 & 3/3/2015 & Gisselquist & First Draft \\\hline
\end{revisionhistory}
% Revision History
/sw/fftgen.cpp
59,7 → 59,11
#include <ctype.h>
#include <assert.h>
 
#define COREDIR "fft-core"
#define DEF_NBITSIN 16
#define DEF_COREDIR "fft-core"
#define DEF_XTRACBITS 4
#define DEF_NMPY 0
#define DEF_XTRAPBITS 0
 
typedef enum {
RND_TRUNCATE, RND_FROMZERO, RND_HALFUP, RND_CONVERGENT
231,7 → 235,7
"\t\twire\t[(OWID-1):0] truncated_value, rounded_up;\n"
"\t\twire\t\t\tlast_valid_bit, first_lost_bit;\n"
"\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1'b0}}, 1'b1 };\n"
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n"
"\t\tassign\tfirst_lost_bit = i_val[(IWID-SHIFT-OWID-1)];\n"
"\n"
"\t\talways @(posedge i_clk)\n"
313,7 → 317,7
"\t\twire\t[(OWID-1):0]\ttruncated_value, rounded_up;\n"
"\t\twire\t\t\tsign_bit, first_lost_bit;\n"
"\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1'b0}}, 1'b1 };\n"
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n"
"\t\tassign\tfirst_lost_bit = i_val[0];\n"
"\t\tassign\tsign_bit = i_val[(IWID-1)];\n"
"\n"
333,7 → 337,7
"\t\twire\t[(OWID-1):0]\ttruncated_value, rounded_up;\n"
"\t\twire\t\t\tsign_bit, first_lost_bit;\n"
"\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1'b0}}, 1'b1 };\n"
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n"
"\t\tassign\tfirst_lost_bit = i_val[(IWID-SHIFT-OWID-1)];\n"
"\t\tassign\tsign_bit = i_val[(IWID-1)];\n"
"\n"
415,7 → 419,7
"\t\twire\t[(OWID-1):0] truncated_value, rounded_up;\n"
"\t\twire\t\t\tlast_valid_bit, first_lost_bit;\n"
"\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1'b0}}, 1'b1 };\n"
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n"
"\t\tassign\tlast_valid_bit = truncated_value[0];\n"
"\t\tassign\tfirst_lost_bit = i_val[0];\n"
"\n"
435,7 → 439,7
"\t\twire\t[(OWID-1):0] truncated_value, rounded_up;\n"
"\t\twire\t\t\tlast_valid_bit, first_lost_bit;\n"
"\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1'b0}}, 1'b1 };\n"
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n"
"\t\tassign\tlast_valid_bit = truncated_value[0];\n"
"\t\tassign\tfirst_lost_bit = i_val[(IWID-SHIFT-OWID-1)];\n"
"\n"
460,7 → 464,7
"endmodule\n");
}
 
void build_quarters(const char *fname, ROUND_T rounding) {
void build_quarters(const char *fname, ROUND_T rounding, bool dbg=false) {
FILE *fp = fopen(fname, "w");
if (NULL == fp) {
fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
481,7 → 485,7
fprintf(fp,
"///////////////////////////////////////////////////////////////////////////\n"
"//\n"
"// Filename: qtrstage.v\n"
"// Filename: qtrstage%s.v\n"
"// \n"
"// Project: %s\n"
"//\n"
492,11 → 496,11
"//\n"
"//\n%s"
"//\n",
prjname, creator);
(dbg)?"_dbg":"", prjname, creator);
fprintf(fp, "%s", cpyleft);
 
fprintf(fp,
"module\tqtrstage(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync);\n"
"module\tqtrstage%s(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync%s);\n"
"\tparameter IWIDTH=16, OWIDTH=IWIDTH+1;\n"
"\t// Parameters specific to the core that should be changed when this\n"
"\t// core is built ... Note that the minimum LGSPAN is 2. Smaller \n"
506,7 → 510,12
"\tinput\t [(2*IWIDTH-1):0] i_data;\n"
"\toutput\treg [(2*OWIDTH-1):0] o_data;\n"
"\toutput\treg o_sync;\n"
"\t\n");
"\t\n", (dbg)?"_dbg":"", (dbg)?", o_dbg":"");
if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"
"\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2*OWIDTH-1):(2*OWIDTH-16)],\n"
"\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n"
"\n");
}
fprintf(fp,
"\treg\t wait_for_sync;\n"
"\treg\t[3:0] pipeline;\n"
536,16 → 545,16
fprintf(fp,
"\t\t\t\t\tn_rnd_diff_r, n_rnd_diff_i;\n");
fprintf(fp,
"\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_r(i_clk, i_ce,\n"
"\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_r(i_clk, i_ce,\n"
"\t\t\t\tsum_r, rnd_sum_r);\n\n", rnd_string);
fprintf(fp,
"\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_i(i_clk, i_ce,\n"
"\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_i(i_clk, i_ce,\n"
"\t\t\t\tsum_i, rnd_sum_i);\n\n", rnd_string);
fprintf(fp,
"\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_r(i_clk, i_ce,\n"
"\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_r(i_clk, i_ce,\n"
"\t\t\t\tdiff_r, rnd_diff_r);\n\n", rnd_string);
fprintf(fp,
"\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_i(i_clk, i_ce,\n"
"\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_i(i_clk, i_ce,\n"
"\t\t\t\tdiff_i, rnd_diff_i);\n\n", rnd_string);
fprintf(fp, "\tassign n_rnd_diff_r = - rnd_diff_r;\n"
"\tassign n_rnd_diff_i = - rnd_diff_i;\n");
554,9 → 563,9
"\twire [(IWIDTH-1):0] rnd;\n"
"\tgenerate\n"
"\tif ((ROUND)&&((IWIDTH+1-OWIDTH-SHIFT)>0))\n"
"\t\tassign rnd = { {(IWIDTH-1){1'b0}}, 1'b1 };\n"
"\t\tassign rnd = { {(IWIDTH-1){1\'b0}}, 1\'b1 };\n"
"\telse\n"
"\t\tassign rnd = { {(IWIDTH){1'b0}}};\n"
"\t\tassign rnd = { {(IWIDTH){1\'b0}}};\n"
"\tendgenerate\n"
"\n"
*/
566,14 → 575,17
"\talways @(posedge i_clk)\n"
"\t\tif (i_rst)\n"
"\t\tbegin\n"
"\t\t\twait_for_sync <= 1'b1;\n"
"\t\t\twait_for_sync <= 1\'b1;\n"
"\t\t\tiaddr <= 0;\n"
"\t\tend else if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"
"\t\tbegin\n"
"\t\t\tiaddr <= iaddr + { {(LGWIDTH-1){1\'b0}}, 1\'b1 };\n"
"\t\t\twait_for_sync <= 1\'b0;\n"
"\t\tend\n"
"\talways @(posedge i_clk)\n"
"\t\tif (i_ce)\n"
"\t\t\timem <= i_data;\n"
"\t\t\tiaddr <= iaddr + { {(LGWIDTH-1){1\'b0}}, 1\'b1 };\n"
"\t\t\twait_for_sync <= 1'b0;\n"
"\t\tend\n\n");
"\n\n");
fprintf(fp,
"\t// Note that we don\'t check on wait_for_sync or i_sync here.\n"
"\t// Why not? Because iaddr will always be zero until after the\n"
581,7 → 593,7
"\tinitial pipeline = 4\'h0;\n"
"\talways\t@(posedge i_clk)\n"
"\t\tif (i_rst)\n"
"\t\t\tpipeline <= 4'h0;\n"
"\t\t\tpipeline <= 4\'h0;\n"
"\t\telse if (i_ce) // is our pipeline process full? Which stages?\n"
"\t\t\tpipeline <= { pipeline[2:0], iaddr[0] };\n\n");
fprintf(fp,
597,9 → 609,12
fprintf(fp,
"\t// pipeline[1] takes sum_x and diff_x and produces rnd_x\n\n");
fprintf(fp,
"\t// Now for pipeline[2]\n"
"\t// Now for pipeline[2]. We can actually do this at all i_ce\n"
"\t// clock times, since nothing will listen unless pipeline[3]\n"
"\t// on the next clock. Thus, we simplify this logic and do\n"
"\t// it independent of pipeline[2].\n"
"\talways\t@(posedge i_clk)\n"
"\t\tif ((i_ce)&&(pipeline[2]))\n"
"\t\tif (i_ce)\n"
"\t\tbegin\n"
"\t\t\tob_a <= { rnd_sum_r, rnd_sum_i };\n"
"\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"
633,13 → 648,16
"\t// Don\'t forget in the sync check that we are running\n"
"\t// at two clocks per sample. Thus we need to\n"
"\t// produce a sync every 2^(LGWIDTH-1) clocks.\n"
"\tinitial\to_sync = 1\'b0;\n"
"\talways\t@(posedge i_clk)\n"
"\t\tif (i_ce)\n"
"\t\tif (i_rst)\n"
"\t\t\to_sync <= 1\'b0;\n"
"\t\telse if (i_ce)\n"
"\t\t\to_sync <= &(~iaddr[(LGWIDTH-2):3]) && (iaddr[2:0] == 3'b101);\n");
fprintf(fp, "endmodule\n");
}
 
void build_dblstage(const char *fname, ROUND_T rounding) {
void build_dblstage(const char *fname, ROUND_T rounding, const bool dbg = false) {
FILE *fp = fopen(fname, "w");
if (NULL == fp) {
fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
661,7 → 679,7
fprintf(fp,
"///////////////////////////////////////////////////////////////////////////\n"
"//\n"
"// Filename: dblstage.v\n"
"// Filename: dblstage%s.v\n"
"//\n"
"// Project: %s\n"
"//\n"
691,17 → 709,23
"// o_right The next (odd) complex output.\n"
"// o_sync Output synchronization signal.\n"
"//\n%s"
"//\n", prjname, creator);
"//\n", (dbg)?"_dbg":"", prjname, creator);
 
fprintf(fp, "%s", cpyleft);
fprintf(fp,
"module\tdblstage(i_clk, i_rst, i_ce, i_sync, i_left, i_right, o_left, o_right, o_sync);\n"
"module\tdblstage%s(i_clk, i_rst, i_ce, i_sync, i_left, i_right, o_left, o_right, o_sync%s);\n"
"\tparameter\tIWIDTH=16,OWIDTH=IWIDTH+1, SHIFT=0;\n"
"\tinput\t\ti_clk, i_rst, i_ce, i_sync;\n"
"\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"
"\toutput\twire\t[(2*OWIDTH-1):0]\to_left, o_right;\n"
"\toutput\treg\t\t\to_sync;\n"
"\n");
"\n", (dbg)?"_dbg":"", (dbg)?", o_dbg":"");
 
if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"
"\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_left[(2*OWIDTH-1):(2*OWIDTH-16)],\n"
"\t\t\t\t\to_left[(OWIDTH-1):(OWIDTH-16)] };\n"
"\n");
}
fprintf(fp,
"\twire\tsigned\t[(IWIDTH-1):0]\ti_in_0r, i_in_0i, i_in_1r, i_in_1i;\n"
"\tassign\ti_in_0r = i_left[(2*IWIDTH-1):(IWIDTH)]; \n"
721,19 → 745,25
"\t// bit than the two originals.\n"
"\treg\tsigned\t[(IWIDTH):0]\trnd_in_0r, rnd_in_0i, rnd_in_1r, rnd_in_1i;\n\n");
fprintf(fp,
"\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_0r(i_clk, i_ce,\n"
"\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_0r(i_clk, i_ce,\n"
"\t\t\t\t\t\t\t\trnd_in_0r, o_out_0r);\n\n", rnd_string);
fprintf(fp,
"\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_0i(i_clk, i_ce,\n"
"\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_0i(i_clk, i_ce,\n"
"\t\t\t\t\t\t\t\trnd_in_0i, o_out_0i);\n\n", rnd_string);
fprintf(fp,
"\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_1r(i_clk, i_ce,\n"
"\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_1r(i_clk, i_ce,\n"
"\t\t\t\t\t\t\t\trnd_in_1r, o_out_1r);\n\n", rnd_string);
fprintf(fp,
"\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_1i(i_clk, i_ce,\n"
"\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_1i(i_clk, i_ce,\n"
"\t\t\t\t\t\t\t\trnd_in_1i, o_out_1i);\n\n", rnd_string);
 
fprintf(fp,
"\n"
"\t// As with any register connected to the sync pulse, these must\n"
"\t// have initial values and be reset on the i_rst signal.\n"
"\t// Other data values need only restrict their updates to i_ce\n"
"\t// enabled clocks, but sync\'s must obey resets and initial\n"
"\t// conditions as well.\n"
"\treg\twait_for_sync, rnd_sync;\n"
"\n"
"\tinitial begin\n"
744,13 → 774,25
"\talways @(posedge i_clk)\n"
"\t\tif (i_rst)\n"
"\t\tbegin\n"
"\t\t\trnd_sync <= 1'b0;\n"
"\t\t\to_sync <= 1'b0;\n"
"\t\t\twait_for_sync <= 1'b1;\n"
"\t\t\trnd_sync <= 1\'b0;\n"
"\t\t\to_sync <= 1\'b0;\n"
"\t\t\twait_for_sync <= 1\'b1;\n"
"\t\tend else if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"
"\t\tbegin\n"
"\t\t\twait_for_sync <= 1'b0;\n"
"\t\t\twait_for_sync <= 1\'b0;\n"
"\t\t\t//\n"
"\t\t\trnd_sync <= i_sync;\n"
"\t\t\to_sync <= rnd_sync;\n"
"\t\tend\n"
"\n"
"\t// As with other variables, these are really only updated when in\n"
"\t// the processing pipeline, after the first i_sync. However, to\n"
"\t// eliminate as much unnecessary logic as possible, we toggle\n"
"\t// these any time the i_ce line is enabled.\n"
"\talways @(posedge i_clk)\n"
"\t\tif (i_ce)\n"
"\t\tbegin\n"
"\t\t\t//\n"
"\t\t\trnd_in_0r <= i_in_0r + i_in_1r;\n"
"\t\t\trnd_in_0i <= i_in_0i + i_in_1i;\n"
"\t\t\t//\n"
757,8 → 799,6
"\t\t\trnd_in_1r <= i_in_0r - i_in_1r;\n"
"\t\t\trnd_in_1i <= i_in_0i - i_in_1i;\n"
"\t\t\t//\n"
"\t\t\trnd_sync <= i_sync;\n"
"\t\t\to_sync <= rnd_sync;\n"
"\t\tend\n"
"\n"
"\tassign\to_left = { o_out_0r, o_out_0i };\n"
838,10 → 878,10
"\talways @(posedge i_clk)\n"
"\t\tif (i_ce)\n"
"\t\tbegin\n"
"\t\t\tacc[0] <= (u_a[0]) ? { {(AWIDTH){1'b0}}, u_b }\n"
"\t\t\t\t\t: {(AWIDTH+BWIDTH){1'b0}};\n"
"\t\t\tacc[0] <= (u_a[0]) ? { {(AWIDTH){1\'b0}}, u_b }\n"
"\t\t\t\t\t: {(AWIDTH+BWIDTH){1\'b0}};\n"
"\t\t\tr_a[0] <= { u_a[(AWIDTH-1):1] };\n"
"\t\t\tr_b[0] <= { {(AWIDTH-1){1'b0}}, u_b };\n"
"\t\t\tr_b[0] <= { {(AWIDTH-1){1\'b0}}, u_b };\n"
"\t\t\tr_s[0] <= sgn; // The final sign, needs to be preserved\n"
"\t\tend\n"
"\n"
851,9 → 891,9
"\t\talways @(posedge i_clk)\n"
"\t\tif (i_ce)\n"
"\t\tbegin\n"
"\t\t\tacc[k+1] <= acc[k] + ((r_a[k][0]) ? {r_b[k],1'b0}:0);\n"
"\t\t\tr_a[k+1] <= { 1'b0, r_a[k][(AWIDTH-2):1] };\n"
"\t\t\tr_b[k+1] <= { r_b[k][(AWIDTH+BWIDTH-3):0], 1'b0};\n"
"\t\t\tacc[k+1] <= acc[k] + ((r_a[k][0]) ? {r_b[k],1\'b0}:0);\n"
"\t\t\tr_a[k+1] <= { 1\'b0, r_a[k][(AWIDTH-2):1] };\n"
"\t\t\tr_b[k+1] <= { r_b[k][(AWIDTH+BWIDTH-3):0], 1\'b0};\n"
"\t\t\tr_s[k+1] <= r_s[k];\n"
"\t\tend\n"
"\tend\n"
897,10 → 937,17
"// width, WIDTH. Of course, there is a delay from the\n"
"// first input to the first output. For this purpose,\n"
"// o_sync is present.\n"
"// o_sync This will be a 1'b1 for the first value in any block.\n"
"// Following a reset, this will only become 1'b1 once\n"
"// o_sync This will be a 1\'b1 for the first value in any block.\n"
"// Following a reset, this will only become 1\'b1 once\n"
"// the data has been loaded and is now valid. After that,\n"
"// all outputs will be valid.\n"
"//\n"
"// 20150602 -- This module has undergone massive rework in order to\n"
"// ensure that it uses resources efficiently. As a result, \n"
"// it now optimizes nicely into block RAMs. As an unfortunately\n"
"// side effect, it now passes it\'s bench test (dblrev_tb) but\n"
"// fails the integration bench test (fft_tb).\n"
"//\n"
"//\n%s"
"//\n", prjname, creator);
fprintf(fp, "%s", cpyleft);
941,65 → 988,69
fprintf(fp,
"module dblreverse(i_clk, i_rst, i_ce, i_in_0, i_in_1,\n"
"\t\to_out_0, o_out_1, o_sync);\n"
"\tparameter\t\t\tLGSIZE=4, WIDTH=24;\n"
"\tparameter\t\t\tLGSIZE=5, WIDTH=24;\n"
"\tinput\t\t\t\ti_clk, i_rst, i_ce;\n"
"\tinput\t\t[(2*WIDTH-1):0]\ti_in_0, i_in_1;\n"
"\toutput\treg\t[(2*WIDTH-1):0]\to_out_0, o_out_1;\n"
"\toutput\twire\t[(2*WIDTH-1):0]\to_out_0, o_out_1;\n"
"\toutput\treg\t\t\to_sync;\n"
"\n"
"\treg\tin_reset;\n"
"\treg\t[(LGSIZE):0]\tiaddr;\n"
"\treg\t[(2*WIDTH-1):0]\tmem_0e [0:((1<<(LGSIZE-1))-1)];\n"
"\treg\t[(2*WIDTH-1):0]\tmem_0o [0:((1<<(LGSIZE-1))-1)];\n"
"\treg\t[(2*WIDTH-1):0]\tmem_1e [0:((1<<(LGSIZE-1))-1)];\n"
"\treg\t[(2*WIDTH-1):0]\tmem_1o [0:((1<<(LGSIZE-1))-1)];\n"
"\treg\t\t\tin_reset;\n"
"\treg\t[(LGSIZE-1):0]\tiaddr;\n"
"\twire\t[(LGSIZE-3):0]\tbraddr;\n"
"\n"
"\twire\t[(2*LGSIZE-1):0] braddr;\n"
"\tgenvar\tk;\n"
"\tgenerate for(k=0; k<LGSIZE; k=k+1)\n"
"\tgenerate for(k=0; k<LGSIZE-2; k=k+1)\n"
"\tbegin : gen_a_bit_reversed_value\n"
"\t\tassign braddr[k] = iaddr[LGSIZE-1-k];\n"
"\t\tassign braddr[k] = iaddr[LGSIZE-3-k];\n"
"\tend endgenerate\n"
"\n"
"\tinitial iaddr = 0;\n"
"\tinitial in_reset = 1\'b1;\n"
"\tinitial o_sync = 1\'b0;\n"
"\talways @(posedge i_clk)\n"
"\t\tif (i_rst)\n"
"\t\tbegin\n"
"\t\t\tiaddr <= 0;\n"
"\t\t\tin_reset <= 1'b1;\n"
"\t\t\tin_reset <= 1\'b1;\n"
"\t\t\to_sync <= 1\'b0;\n"
"\t\tend else if (i_ce)\n"
"\t\tbegin\n"
"\t\t\tif (iaddr[(LGSIZE-1)])\n"
"\t\t\tbegin\n"
"\t\t\t\tmem_1e[{iaddr[LGSIZE],iaddr[(LGSIZE-2):1]}] <= i_in_0;\n"
"\t\t\t\tmem_1o[{iaddr[LGSIZE],iaddr[(LGSIZE-2):1]}] <= i_in_1;\n"
"\t\t\tend else begin\n"
"\t\t\t\tmem_0e[{iaddr[LGSIZE],iaddr[(LGSIZE-2):1]}] <= i_in_0;\n"
"\t\t\t\tmem_0o[{iaddr[LGSIZE],iaddr[(LGSIZE-2):1]}] <= i_in_1;\n"
"\t\t\tend\n"
"\t\t\tiaddr <= iaddr + { {(LGSIZE-2){1\'b0}}, 2\'h2 };\n"
"\t\t\tif (&iaddr[(LGSIZE-1):1])\n"
"\t\t\t\tin_reset <= 1'b0;\n"
"\t\t\tiaddr <= iaddr + { {(LGSIZE-1){1\'b0}}, 1\'b1 };\n"
"\t\t\tif (&iaddr[(LGSIZE-2):0])\n"
"\t\t\t\tin_reset <= 1\'b0;\n"
"\t\t\tif (in_reset)\n"
"\t\t\tbegin\n"
"\t\t\t\to_out_0 <= {(2*WIDTH){1'b0}};\n"
"\t\t\t\to_out_1 <= {(2*WIDTH){1'b0}};\n"
"\t\t\t\to_sync <= 1'b0;\n"
"\t\t\tend else\n"
"\t\t\tbegin\n"
"\t\t\t\tif (braddr[0])\n"
"\t\t\t\tbegin\n"
"\t\t\t\t\to_out_0 <= mem_0o[{~iaddr[LGSIZE],braddr[(LGSIZE-2):1]}];\n"
"\t\t\t\t\to_out_1 <= mem_1o[{~iaddr[LGSIZE],braddr[(LGSIZE-2):1]}];\n"
"\t\t\t\tend else begin\n"
"\t\t\t\t\to_out_0 <= mem_0e[{~iaddr[LGSIZE],braddr[(LGSIZE-2):1]}];\n"
"\t\t\t\t\to_out_1 <= mem_1e[{~iaddr[LGSIZE],braddr[(LGSIZE-2):1]}];\n"
"\t\t\t\tend\n"
"\t\t\t\to_sync <= ~(|iaddr[(LGSIZE-1):0]);\n"
"\t\t\tend\n"
"\t\t\t\to_sync <= 1\'b0;\n"
"\t\t\telse\n"
"\t\t\t\to_sync <= ~(|iaddr[(LGSIZE-2):0]);\n"
"\t\tend\n"
"\n"
"\treg\t[(2*WIDTH-1):0]\tmem_e [0:((1<<(LGSIZE))-1)];\n"
"\treg\t[(2*WIDTH-1):0]\tmem_o [0:((1<<(LGSIZE))-1)];\n"
"\n"
"\talways @(posedge i_clk)\n"
"\t\tif (i_ce)\tmem_e[iaddr] <= i_in_0;\n"
"\talways @(posedge i_clk)\n"
"\t\tif (i_ce)\tmem_o[iaddr] <= i_in_1;\n"
"\n"
"\n"
"\treg [(2*WIDTH-1):0] evn_out_0, evn_out_1, odd_out_0, odd_out_1;\n"
"\n"
"\talways @(posedge i_clk)\n"
"\t\tif (i_ce)\n\t\t\tevn_out_0 <= mem_e[{~iaddr[LGSIZE-1],1\'b0,braddr}];\n"
"\talways @(posedge i_clk)\n"
"\t\tif (i_ce)\n\t\t\tevn_out_1 <= mem_e[{~iaddr[LGSIZE-1],1\'b1,braddr}];\n"
"\talways @(posedge i_clk)\n"
"\t\tif (i_ce)\n\t\t\todd_out_0 <= mem_o[{~iaddr[LGSIZE-1],1\'b0,braddr}];\n"
"\talways @(posedge i_clk)\n"
"\t\tif (i_ce)\n\t\t\todd_out_1 <= mem_o[{~iaddr[LGSIZE-1],1\'b1,braddr}];\n"
"\n"
"\treg\tadrz;\n"
"\talways @(posedge i_clk)\n"
"\t\tif (i_ce) adrz = iaddr[LGSIZE-2];\n"
"\n"
"\tassign\to_out_0 = (adrz)?odd_out_0:evn_out_0;\n"
"\tassign\to_out_1 = (adrz)?odd_out_1:evn_out_1;\n"
"\n"
"endmodule\n");
 
fclose(fp);
1090,6 → 1141,14
"// YUP! But just barely. Do this and you'll really want\n"
"// to drop a bit, although you will risk overflow in so\n"
"// doing.\n"
"//\n"
"// 20150602 -- The sync logic lines have been completely redone. The\n"
"// synchronization lines no longer go through the FIFO with the\n"
"// left hand sum, but are kept out of memory. This allows the\n"
"// butterfly to use more optimal memory resources, while also\n"
"// guaranteeing that the sync lines can be properly reset upon\n"
"// any reset signal.\n"
"//\n"
"//\n%s"
"//\n", prjname, creator);
fprintf(fp, "%s", cpyleft);
1101,7 → 1160,7
"\tparameter IWIDTH=%d,CWIDTH=IWIDTH+%d,OWIDTH=IWIDTH+1;\n"
"\t// Parameters specific to the core that should not be changed.\n"
"\tparameter MPYDELAY=%d'd%d, // (IWIDTH+1 < CWIDTH)?(IWIDTH+4):(CWIDTH+3),\n"
"\t\t\tSHIFT=0;\n"
"\t\t\tSHIFT=0, AUXLEN=%d;\n"
"\t// The LGDELAY should be the base two log of the MPYDELAY. If\n"
"\t// this value is fractional, then round up to the nearest\n"
"\t// integer: LGDELAY=ceil(log(MPYDELAY)/log(2));\n"
1111,9 → 1170,10
"\tinput\t\t[(2*IWIDTH-1):0] i_left, i_right;\n"
"\tinput\t\ti_aux;\n"
"\toutput\twire [(2*OWIDTH-1):0] o_left, o_right;\n"
"\toutput\treg o_aux;\n"
"\toutput\treg\to_aux;\n"
"\n", 16, xtracbits, lgdelay(16,xtracbits),
bflydelay(16, xtracbits), lgdelay(16,xtracbits));
bflydelay(16, xtracbits), bflydelay(16, xtracbits)+3,
lgdelay(16,xtracbits));
fprintf(fp,
"\twire\t[(OWIDTH-1):0] o_left_r, o_left_i, o_right_r, o_right_i;\n"
"\n"
1131,8 → 1191,7
"\treg [(LGDELAY-1):0] fifo_addr;\n"
"\twire [(LGDELAY-1):0] fifo_read_addr;\n"
"\tassign\tfifo_read_addr = fifo_addr - MPYDELAY;\n"
"\treg [(2*IWIDTH+2):0] fifo_left [ 0:((1<<LGDELAY)-1)];\n"
"\treg\t\t\t\tovalid;\n"
"\treg [(2*IWIDTH+1):0] fifo_left [ 0:((1<<LGDELAY)-1)];\n"
"\n");
fprintf(fp,
"\t// Set up the input to the multiply\n"
1142,7 → 1201,6
"\t\t\t// One clock just latches the inputs\n"
"\t\t\tr_left <= i_left; // No change in # of bits\n"
"\t\t\tr_right <= i_right;\n"
"\t\t\tr_aux <= i_aux;\n"
"\t\t\tr_coef <= i_coef;\n"
"\t\t\t// Next clock adds/subtracts\n"
"\t\t\tr_sum_r <= r_left_r + r_right_r; // Now IWIDTH+1 bits\n"
1150,7 → 1208,6
"\t\t\tr_dif_r <= r_left_r - r_right_r;\n"
"\t\t\tr_dif_i <= r_left_i - r_right_i;\n"
"\t\t\t// Other inputs are simply delayed on second clock\n"
"\t\t\tr_aux_2 <= r_aux;\n"
"\t\t\tr_coef_2<= r_coef;\n"
"\t\tend\n"
"\n");
1159,22 → 1216,18
"\t// to be multiplied, but yet we still need the results in sync\n"
"\t// with the answer when it is ready.\n"
"\tinitial fifo_addr = 0;\n"
"\tinitial ovalid = 1'b0;\n"
"\talways @(posedge i_clk)\n"
"\t\tif (i_rst)\n"
"\t\tbegin\n"
"\t\t\tfifo_addr <= 0;\n"
"\t\t\tovalid <= 1'b0;\n"
"\t\tend else if (i_ce)\n"
"\t\tbegin\n"
"\t\telse if (i_ce)\n"
"\t\t\t// Need to delay the sum side--nothing else happens\n"
"\t\t\t// to it, but it needs to stay synchronized with the\n"
"\t\t\t// right side.\n"
"\t\t\tfifo_left[fifo_addr] <= { r_aux_2, r_sum_r, r_sum_i };\n"
"\t\t\tfifo_addr <= fifo_addr + 1;\n"
"\n"
"\t\t\tovalid <= (ovalid) || (fifo_addr > (MPYDELAY+1));\n"
"\t\tend\n"
"\talways @(posedge i_clk)\n"
"\t\tif (i_ce)\n"
"\t\t\tfifo_left[fifo_addr] <= { r_sum_r, r_sum_i };\n"
"\n"
"\twire\tsigned\t[(CWIDTH-1):0] ir_coef_r, ir_coef_i;\n"
"\tassign\tir_coef_r = r_coef_2[(2*CWIDTH-1):CWIDTH];\n"
1259,12 → 1312,10
"\t// therefore, the left_x values need to be right shifted by\n"
"\t// CWIDTH-2 as well. The additional bits come from a sign\n"
"\t// extension.\n"
"\twire aux;\n"
"\twire\tsigned\t[(IWIDTH+CWIDTH):0] fifo_i, fifo_r;\n"
"\treg\t\t[(2*IWIDTH+2):0] fifo_read;\n"
"\tassign\tfifo_r = { {2{fifo_read[2*(IWIDTH+1)-1]}}, fifo_read[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1'b0}} };\n"
"\tassign\tfifo_i = { {2{fifo_read[(IWIDTH+1)-1]}}, fifo_read[((IWIDTH+1)-1):0], {(CWIDTH-2){1'b0}} };\n"
"\tassign\taux = fifo_read[2*IWIDTH+2];\n"
"\treg\t\t[(2*IWIDTH+1):0] fifo_read;\n"
"\tassign\tfifo_r = { {2{fifo_read[2*(IWIDTH+1)-1]}}, fifo_read[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1\'b0}} };\n"
"\tassign\tfifo_i = { {2{fifo_read[(IWIDTH+1)-1]}}, fifo_read[((IWIDTH+1)-1):0], {(CWIDTH-2){1\'b0}} };\n"
"\n"
"\n"
"\treg\tsigned\t[(OWIDTH-1):0] b_left_r, b_left_i,\n"
1301,18 → 1352,18
"\twire\tsigned\t[(OWIDTH-1):0]\trnd_left_r, rnd_left_i, rnd_right_r, rnd_right_i;\n\n");
 
fprintf(fp,
"\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_left_r(i_clk, i_ce,\n"
"\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_left_r(i_clk, i_ce,\n"
"\t\t\t\t{ {2{fifo_r[(IWIDTH+CWIDTH)]}}, fifo_r }, rnd_left_r);\n\n",
rnd_string);
fprintf(fp,
"\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_left_i(i_clk, i_ce,\n"
"\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_left_i(i_clk, i_ce,\n"
"\t\t\t\t{ {2{fifo_i[(IWIDTH+CWIDTH)]}}, fifo_i }, rnd_left_i);\n\n",
rnd_string);
fprintf(fp,
"\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_right_r(i_clk, i_ce,\n"
"\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_right_r(i_clk, i_ce,\n"
"\t\t\t\tmpy_r, rnd_right_r);\n\n", rnd_string);
fprintf(fp,
"\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_right_i(i_clk, i_ce,\n"
"\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_right_i(i_clk, i_ce,\n"
"\t\t\t\tmpy_i, rnd_right_i);\n\n", rnd_string);
fprintf(fp,
"\talways @(posedge i_clk)\n"
1334,7 → 1385,17
"\t\t\tb_left_i <= rnd_left_i;\n"
"\t\tend\n"
"\n");
 
fprintf(fp,
"\treg\t[(AUXLEN-1):0]\taux_pipeline;\n"
"\tinitial\taux_pipeline = 0;\n"
"\talways @(posedge i_clk)\n"
"\t\tif (i_rst)\n"
"\t\t\taux_pipeline <= 0;\n"
"\t\telse if (i_ce)\n"
"\t\t\taux_pipeline <= { aux_pipeline[(AUXLEN-2):0], i_aux };\n"
"\n");
fprintf(fp,
"\tinitial o_aux = 1\'b0;\n"
"\talways @(posedge i_clk)\n"
"\t\tif (i_rst)\n"
1342,7 → 1403,7
"\t\telse if (i_ce)\n"
"\t\tbegin\n"
"\t\t\t// Second clock, latch for final clock\n"
"\t\t\to_aux <= aux & ovalid;\n"
"\t\t\to_aux <= aux_pipeline[AUXLEN-1];\n"
"\t\tend\n"
"\n");
 
1420,6 → 1481,7
"\tassign\tr_left_i = r_left[ (IWIDTH-1):0];\n"
"\tassign\tr_right_r = r_right[(2*IWIDTH-1):(IWIDTH)];\n"
"\tassign\tr_right_i = r_right[(IWIDTH-1):0];\n"
"\treg signed [(CWIDTH-1):0] ir_coef_r, ir_coef_i;\n"
"\n"
"\treg signed [(IWIDTH):0] r_sum_r, r_sum_i, r_dif_r, r_dif_i;\n"
"\n"
1431,14 → 1493,22
"\talways @(posedge i_clk)\n"
"\t\tif (i_rst)\n"
"\t\tbegin\n"
"\t\t\tr_aux <= 1'b0;\n"
"\t\t\tr_aux_2 <= 1'b0;\n"
"\t\t\tr_aux <= 1\'b0;\n"
"\t\t\tr_aux_2 <= 1\'b0;\n"
"\t\tend else if (i_ce)\n"
"\t\tbegin\n"
"\t\t\t// One clock just latches the inputs\n"
"\t\t\tr_aux <= i_aux;\n"
"\t\t\t// Next clock adds/subtracts\n"
"\t\t\t// Other inputs are simply delayed on second clock\n"
"\t\t\tr_aux_2 <= r_aux;\n"
"\t\tend\n"
"\talways @(posedge i_clk)\n"
"\t\tif (i_ce)\n"
"\t\tbegin\n"
"\t\t\t// One clock just latches the inputs\n"
"\t\t\tr_left <= i_left; // No change in # of bits\n"
"\t\t\tr_right <= i_right;\n"
"\t\t\tr_aux <= i_aux;\n"
"\t\t\tr_coef <= i_coef;\n"
"\t\t\t// Next clock adds/subtracts\n"
"\t\t\tr_sum_r <= r_left_r + r_right_r; // Now IWIDTH+1 bits\n"
1446,8 → 1516,8
"\t\t\tr_dif_r <= r_left_r - r_right_r;\n"
"\t\t\tr_dif_i <= r_left_i - r_right_i;\n"
"\t\t\t// Other inputs are simply delayed on second clock\n"
"\t\t\tr_aux_2 <= r_aux;\n"
"\t\t\tr_coef_2<= r_coef;\n"
"\t\t\tir_coef_r <= r_coef[(2*CWIDTH-1):CWIDTH];\n"
"\t\t\tir_coef_i <= r_coef[(CWIDTH-1):0];\n"
"\t\tend\n"
"\n\n");
fprintf(fp,
1454,16 → 1524,14
"\t// See comments in the butterfly.v source file for a discussion of\n"
"\t// these operations and the appropriate bit widths.\n\n");
fprintf(fp,
"\twire signed [(CWIDTH-1):0] ir_coef_r, ir_coef_i;\n"
"\tassign ir_coef_r = r_coef_2[(2*CWIDTH-1):CWIDTH];\n"
"\tassign ir_coef_i = r_coef_2[(CWIDTH-1):0];\n"
"\treg\tsigned [((IWIDTH+2)+(CWIDTH+1)-1):0] p_one, p_two, p_three;\n"
"\treg\tsigned [((IWIDTH+1)+(CWIDTH)-1):0] p_one, p_two;\n"
"\treg\tsigned [((IWIDTH+2)+(CWIDTH+1)-1):0] p_three;\n"
"\n"
"\treg\tsigned [(CWIDTH):0] p3c_in, p1c_in, p2c_in;\n"
"\treg\tsigned [(IWIDTH+1):0] p3d_in, p1d_in, p2d_in;\n"
"\treg\t[3:0] pipeline;\n"
"\treg\tsigned [(CWIDTH-1):0] p1c_in, p2c_in; // Coefficient multiply inputs\n"
"\treg\tsigned [(IWIDTH):0] p1d_in, p2d_in; // Data multiply inputs\n"
"\treg\tsigned [(CWIDTH):0] p3c_in; // Product 3, coefficient input\n"
"\treg\tsigned [(IWIDTH+1):0] p3d_in; // Product 3, data input\n"
"\n"
"\tinitial pipeline = 4\'h0;\n"
"\tinitial leftv = 0;\n"
"\tinitial leftvv = 0;\n"
"\talways @(posedge i_clk)\n"
1470,30 → 1538,41
"\tbegin\n"
"\t\tif (i_rst)\n"
"\t\tbegin\n"
"\t\t\tpipeline <= 4'h0;\n"
"\t\t\tleftv <= 0;\n"
"\t\t\tleftvv <= 0;\n"
"\t\tend else if (i_clk)\n"
"\t\tend else if (i_ce)\n"
"\t\tbegin\n"
"\t\t\t// Second clock, pipeline = 1\n"
"\t\t\tp1c_in <= { ir_coef_r[(CWIDTH-1)], ir_coef_r };\n"
"\t\t\tp2c_in <= { ir_coef_i[(CWIDTH-1)], ir_coef_i };\n"
"\t\t\tp1d_in <= { r_dif_r[(IWIDTH)], r_dif_r };\n"
"\t\t\tp2d_in <= { r_dif_i[(IWIDTH)], r_dif_i };\n"
"\t\t\tleftv <= { r_aux_2, r_sum_r, r_sum_i };\n"
"\n"
"\t\t\t// Third clock, pipeline = 3\n"
"\t\t\t// As desired, each of these lines infers a DSP48\n"
"\t\t\tleftvv <= leftv;\n"
"\t\tend\n"
"\tend\n"
"\n"
"\talways @(posedge i_clk)\n"
"\t\tif (i_ce)\n"
"\t\tbegin\n"
"\t\t\t// Second clock, pipeline = 1\n"
"\t\t\tp1c_in <= ir_coef_r;\n"
"\t\t\tp2c_in <= ir_coef_i;\n"
"\t\t\tp1d_in <= r_dif_r;\n"
"\t\t\tp2d_in <= r_dif_i;\n"
"\t\t\tp3c_in <= ir_coef_i + ir_coef_r;\n"
"\t\t\tp3d_in <= r_dif_r + r_dif_i;\n"
"\n"
"\t\t\tleftv <= { r_aux_2, r_sum_r, r_sum_i };\n"
"\n"
"\t\t\t// Third clock, pipeline = 3\n"
"\t\t\t// As desired, each of these lines infers a DSP48\n"
"\t\t\tp_one <= p1c_in * p1d_in;\n"
"\t\t\tp_two <= p2c_in * p2d_in;\n"
"\t\t\tp_three <= p3c_in * p3d_in;\n"
"\t\t\tleftvv <= leftv;\n"
"\t\tend\n"
"\n"
"\t\t\tpipeline <= { pipeline[2:0], 1'b1 };\n"
"\t\tend\n"
"\tend\n"
"\twire\tsigned [((IWIDTH+2)+(CWIDTH+1)-1):0] w_one, w_two;\n"
"\tassign\tw_one = { {(2){p_one[((IWIDTH+1)+(CWIDTH)-1)]}}, p_one };\n"
"\tassign\tw_two = { {(2){p_two[((IWIDTH+1)+(CWIDTH)-1)]}}, p_two };\n"
"\n");
 
fprintf(fp,
1506,28 → 1585,29
"\twire\taux_s;\n"
"\twire\tsigned\t[(IWIDTH+CWIDTH):0] left_si, left_sr;\n"
"\treg\t\t[(2*IWIDTH+2):0] left_saved;\n"
"\tassign\tleft_sr = { {2{left_saved[2*(IWIDTH+1)-1]}}, left_saved[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1'b0}} };\n"
"\tassign\tleft_si = { {2{left_saved[(IWIDTH+1)-1]}}, left_saved[((IWIDTH+1)-1):0], {(CWIDTH-2){1'b0}} };\n"
"\tassign\tleft_sr = { {2{left_saved[2*(IWIDTH+1)-1]}}, left_saved[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1\'b0}} };\n"
"\tassign\tleft_si = { {2{left_saved[(IWIDTH+1)-1]}}, left_saved[((IWIDTH+1)-1):0], {(CWIDTH-2){1\'b0}} };\n"
"\tassign\taux_s = left_saved[2*IWIDTH+2];\n"
"\n"
"\n"
"\t(* use_dsp48=\"no\" *)\n"
"\treg signed [(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i;\n");
fprintf(fp,
"\twire\tsigned\t[(OWIDTH-1):0]\trnd_left_r, rnd_left_i, rnd_right_r, rnd_right_i;\n\n");
 
fprintf(fp,
"\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_left_r(i_clk, i_ce,\n"
"\t\t\t\t{ {2{left_sr[(IWIDTH+CWIDTH)]}}, left_sr }, rnd_left_r);\n\n",
"\t%s #(CWIDTH+IWIDTH+1,OWIDTH,SHIFT+2) do_rnd_left_r(i_clk, i_ce,\n"
"\t\t\t\tleft_sr, rnd_left_r);\n\n",
rnd_string);
fprintf(fp,
"\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_left_i(i_clk, i_ce,\n"
"\t\t\t\t{ {2{left_si[(IWIDTH+CWIDTH)]}}, left_si }, rnd_left_i);\n\n",
"\t%s #(CWIDTH+IWIDTH+1,OWIDTH,SHIFT+2) do_rnd_left_i(i_clk, i_ce,\n"
"\t\t\t\tleft_si, rnd_left_i);\n\n",
rnd_string);
fprintf(fp,
"\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_right_r(i_clk, i_ce,\n"
"\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_right_r(i_clk, i_ce,\n"
"\t\t\t\tmpy_r, rnd_right_r);\n\n", rnd_string);
fprintf(fp,
"\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_right_i(i_clk, i_ce,\n"
"\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_right_i(i_clk, i_ce,\n"
"\t\t\t\tmpy_i, rnd_right_i);\n\n", rnd_string);
 
fprintf(fp,
1537,21 → 1617,29
"\t\tif (i_rst)\n"
"\t\tbegin\n"
"\t\t\tleft_saved <= 0;\n"
"\t\t\to_aux <= 1'b0;\n"
"\t\t\to_aux <= 1\'b0;\n"
"\t\tend else if (i_ce)\n"
"\t\tbegin\n"
"\t\t\t// First clock, recover all values\n"
"\t\t\tleft_saved <= leftvv;\n"
"\n"
"\t\t\t// Second clock, round and latch for final clock\n"
"\t\t\to_aux <= aux_s;\n"
"\t\tend\n"
"\talways @(posedge i_clk)\n"
"\t\tif (i_ce)\n"
"\t\tbegin\n"
"\t\t\t// These values are IWIDTH+CWIDTH+3 bits wide\n"
"\t\t\t// although they only need to be (IWIDTH+1)\n"
"\t\t\t// + (CWIDTH) bits wide. (We've got two\n"
"\t\t\t// extra bits we need to get rid of.)\n"
"\t\t\tmpy_r <= p_one - p_two;\n"
"\t\t\tmpy_i <= p_three - p_one - p_two;\n"
"\n"
"\t\t\t// Second clock, round and latch for final clock\n"
"\n"
"\t\t\to_aux <= aux_s;\n"
"\n"
"\t\t\t// These two lines also infer DSP48\'s.\n"
"\t\t\t// To keep from using extra DSP48 resources,\n"
"\t\t\t// they are prevented from using DSP48\'s\n"
"\t\t\t// by the (* use_dsp48 ... *) comment above.\n"
"\t\t\tmpy_r <= w_one - w_two;\n"
"\t\t\tmpy_i <= p_three - w_one - w_two;\n"
"\t\tend\n"
"\n");
 
1567,7 → 1655,7
 
}
 
void build_stage(const char *fname, int stage, bool odd, int nbits, bool inv, int xtra, bool hwmpy=false) {
void build_stage(const char *fname, const char *coredir, int stage, bool odd, int nbits, bool inv, int xtra, bool hwmpy=false, bool dbg=false) {
FILE *fstage = fopen(fname, "w");
int cbits = nbits + xtra;
 
1586,7 → 1674,7
fprintf(fstage,
"////////////////////////////////////////////////////////////////////////////\n"
"//\n"
"// Filename: %sfftstage_%c%d.v\n"
"// Filename: %sfftstage_%c%d%s.v\n"
"//\n"
"// Project: %s\n"
"//\n"
1597,10 → 1685,11
"// FFT, there shall be (N-1) of these stages. \n"
"//\n%s"
"//\n",
(inv)?"i":"", (odd)?'o':'e', stage*2, prjname, creator);
(inv)?"i":"", (odd)?'o':'e', stage*2, (dbg)?"_dbg":"", prjname, creator);
fprintf(fstage, "%s", cpyleft);
fprintf(fstage, "module\t%sfftstage_%c%d(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync);\n",
(inv)?"i":"", (odd)?'o':'e', stage*2);
fprintf(fstage, "module\t%sfftstage_%c%d%s(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync%s);\n",
(inv)?"i":"", (odd)?'o':'e', stage*2, (dbg)?"_dbg":"",
(dbg)?", o_dbg":"");
// These parameter values are useless at this point--they are to be
// replaced by the parameter values in the calling program. Only
// problem is, the CWIDTH needs to match exactly!
1617,7 → 1706,13
"\tinput [(2*IWIDTH-1):0] i_data;\n"
"\toutput reg [(2*OWIDTH-1):0] o_data;\n"
"\toutput reg o_sync;\n"
"\n"
"\n");
if (dbg) { fprintf(fstage, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"
"\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2*OWIDTH-1):(2*OWIDTH-16)],\n"
"\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n"
"\n");
}
fprintf(fstage,
"\treg wait_for_sync;\n"
"\treg [(2*IWIDTH-1):0] ib_a, ib_b;\n"
"\treg [(2*CWIDTH-1):0] ib_c;\n"
1652,7 → 1747,7
sprintf(ptr, "%scmem_%c%d.hex", (inv)?"i":"", (odd)?'o':'e', stage*2);
} else {
sprintf(memfile, "%s/%scmem_%c%d.hex",
COREDIR, (inv)?"i":"",
coredir, (inv)?"i":"",
(odd)?'o':'e', stage*2);
}
// strcpy(&memfile[strlen(memfile)-2], ".hex");
1700,7 → 1795,7
"\talways @(posedge i_clk)\n"
"\t\tif (i_rst)\n"
"\t\tbegin\n"
"\t\t\twait_for_sync <= 1'b1;\n"
"\t\t\twait_for_sync <= 1\'b1;\n"
"\t\t\tiaddr <= 0;\n"
"\t\tend\n"
"\t\telse if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"
1708,10 → 1803,13
"\t\t\t//\n"
"\t\t\t// First step: Record what we\'re not ready to use yet\n"
"\t\t\t//\n"
"\t\t\timem[iaddr[(LGSPAN-1):0]] <= i_data;\n"
"\t\t\tiaddr <= iaddr + { {(LGWIDTH-2){1\'b0}}, 1\'b1 };\n"
"\t\t\twait_for_sync <= 1'b0;\n"
"\t\tend\n\n");
"\t\t\twait_for_sync <= 1\'b0;\n"
"\t\tend\n"
"\talways @(posedge i_clk) // Need to make certain here that we don\'t read\n"
"\t\tif ((i_ce)&&(~iaddr[LGSPAN])) // and write the same address on\n"
"\t\t\timem[iaddr[(LGSPAN-1):0]] <= i_data; // the same clk\n"
"\n");
 
fprintf(fstage,
"\t//\n"
1719,24 → 1817,25
"\t//\n"
"\tinitial ib_sync = 1\'b0;\n"
"\talways\t@(posedge i_clk)\n"
"\tif (i_rst)\n"
"\t\tib_sync <= 1\'b0;\n"
"\telse if ((i_ce)&&(iaddr[LGSPAN]))\n"
"\t\tbegin\n"
"\t\t\t// Set the sync to true on the very first\n"
"\t\t\t// valid input in, and hence on the very\n"
"\t\t\t// first valid data out per FFT.\n"
"\t\t\tib_sync <= (iaddr==(1<<(LGSPAN)));\n"
"\t\tend\n"
"\t\tif (i_rst)\n"
"\t\t\tib_sync <= 1\'b0;\n"
"\t\telse if ((i_ce)&&(iaddr[LGSPAN]))\n"
"\t\t\tbegin\n"
"\t\t\t\t// Set the sync to true on the very first\n"
"\t\t\t\t// valid input in, and hence on the very\n"
"\t\t\t\t// first valid data out per FFT.\n"
"\t\t\t\tib_sync <= (iaddr==(1<<(LGSPAN)));\n"
"\t\t\tend\n"
"\talways\t@(posedge i_clk)\n"
"\tif ((i_ce)&&(iaddr[LGSPAN]))\n"
"\t\tbegin\n"
"\t\t\t// One input from memory, ...\n"
"\t\t\tib_a <= imem[iaddr[(LGSPAN-1):0]];\n"
"\t\t\t// One input clocked in from the top\n"
"\t\t\tib_b <= i_data;\n"
"\t\t\tib_c <= %scmem[iaddr[(LGSPAN-1):0]];\n"
"\t\tend\n\n", (inv)?"i":"");
"\t\tif ((i_ce)&&(iaddr[LGSPAN]))\n"
"\t\t\tbegin\n"
"\t\t\t\t// One input from memory, ...\n"
"\t\t\t\tib_a <= imem[iaddr[(LGSPAN-1):0]];\n"
"\t\t\t\t// One input clocked in from the top\n"
"\t\t\t\tib_b <= i_data;\n"
"\t\t\t\t// and the coefficient or twiddle factor\n"
"\t\t\t\tib_c <= %scmem[iaddr[(LGSPAN-1):0]];\n"
"\t\t\tend\n\n", (inv)?"i":"");
 
if (hwmpy) {
fprintf(fstage,
1768,42 → 1867,66
"\t\t\tb_started <= 0;\n"
"\t\tend else if (i_ce)\n"
"\t\tbegin\n"
"\t\t\tif ((ob_sync||b_started)&&(~oB[LGSPAN]))\n"
"\t\t\tbegin // A butterfly output is available\n"
"\t\t\t\tb_started <= 1'b1;\n"
"\t\t\t\tomem[oB[(LGSPAN-1):0]] <= ob_b;\n"
"\t\t\t\toB <= oB + { {(LGSPAN){1\'b0}}, 1\'b1 };\n"
"\n"
"\t\t\t\to_sync <= (ob_sync);\n"
"\t\t\t\to_data <= ob_a;\n"
"\t\t\tend else if (b_started)\n"
"\t\t\tbegin // and keep outputting once you start--at a rate\n"
"\t\t\t// of one guaranteed output per clock that has i_ce set.\n"
"\t\t\t\to_data <= omem[oB[(LGSPAN-1):0]];\n"
"\t\t\t\toB <= oB + { {(LGSPAN){1\'b0}}, 1\'b1 };\n"
"\t\t\t\to_sync <= 1'b0;\n"
"\t\t\tend else\n"
"\t\t\t\to_sync <= 1'b0;\n"
"\t\t\to_sync <= (~oB[LGSPAN])?ob_sync : 1\'b0;\n"
"\t\t\tif (ob_sync||b_started)\n"
"\t\t\t\toB <= oB + { {(LGSPAN){1\'b0}}, 1\'b1 };\n"
"\t\t\tif ((ob_sync)&&(~oB[LGSPAN]))\n"
"\t\t\t// A butterfly output is available\n"
"\t\t\t\tb_started <= 1\'b1;\n"
"\t\tend\n\n");
fprintf(fstage,
"\treg [(LGSPAN-1):0]\t\tdly_addr;\n"
"\treg [(2*OWIDTH-1):0]\tdly_value;\n"
"\talways @(posedge i_clk)\n"
"\t\tif (i_ce)\n"
"\t\tbegin\n"
"\t\t\tdly_addr <= oB[(LGSPAN-1):0];\n"
"\t\t\tdly_value <= ob_b;\n"
"\t\tend\n"
"\talways @(posedge i_clk)\n"
"\t\tif (i_ce)\n"
"\t\t\tomem[dly_addr] <= dly_value;\n"
"\n");
fprintf(fstage,
"\talways @(posedge i_clk)\n"
"\t\tif (i_ce)\n"
"\t\t\to_data <= (~oB[LGSPAN])?ob_a : omem[oB[(LGSPAN-1):0]];\n"
"\n");
fprintf(fstage, "endmodule\n");
}
 
void usage(void) {
fprintf(stderr,
"USAGE:\tfftgen [-f <size>] [-d dir] [-c cbits] [-n nbits] [-m mxbits] [-s01]\n"
"USAGE:\tfftgen [-f <size>] [-d dir] [-c cbits] [-n nbits] [-m mxbits] [-s]\n"
// "\tfftgen -i\n"
"\t-1\tBuild a normal FFT, running at one clock per complex sample, or (for\n"
"\t\ta real FFT) at one clock per two real input samples.\n"
"\t-c <cbits>\tCauses all internal complex coefficients to be\n"
"\t\tlonger than the corresponding data bits, to help avoid\n"
"\t\tcoefficient truncation errors.\n"
"\t\tcoefficient truncation errors. The default is %d bits lnoger\n"
"\t\tthan the data bits.\n"
"\t-d <dir>\tPlaces all of the generated verilog files into <dir>.\n"
"\t\tThe default is a subdirectory of the current directory named %s.\n"
"\t-f <size>\tSets the size of the FFT as the number of complex\n"
"\t\tsamples input to the transform.\n"
"\t\tsamples input to the transform. (No default value, this is\n"
"\t\ta required parameter.)\n"
"\t-i\tAn inverse FFT, meaning that the coefficients are\n"
"\t\tgiven by e^{ j 2 pi k/N n }. The default is a forward FFT, with\n"
"\t\tcoefficients given by e^{ -j 2 pi k/N n }.\n"
"\t-m <mxbits>\tSets the maximum bit width that the FFT should ever\n"
"\t\tproduce. Internal values greater than this value will be\n"
"\t\ttruncated to this value.\n"
"\t\ttruncated to this value. (The default value grows the input\n"
"\t\tsize by one bit for every two FFT stages.)\n"
"\t-n <nbits>\tSets the bitwidth for values coming into the (i)FFT.\n"
"\t\tThe default is %d bits input for each component of the two\n"
"\t\tcomplex values into the FFT.\n"
"\t-p <nmpy>\tSets the number of stages that will use any hardware \n"
"\t\tmultiplication facility, instead of shift-add emulation.\n"
"\t\tThree multiplies per butterfly, or six multiplies per stage will\n"
"\t\tbe accelerated in this fashion. The default is not to use any\n"
"\t\thardware multipliers.\n"
"\t-r\tBuild a real-FFT at four input points per sample, rather than a\n"
"\t\tcomplex FFT. (Default is a Complex FFT.)\n"
"\t-s\tSkip the final bit reversal stage. This is useful in\n"
"\t\talgorithms that need to apply a filter without needing to do\n"
"\t\tbin shifting, as these algorithms can, with this option, just\n"
1814,10 → 1937,14
"\t-S\tInclude the final bit reversal stage (default).\n"
"\t-x <xtrabits>\tUse this many extra bits internally, before any final\n"
"\t\trounding or truncation of the answer to the final number of bits.\n"
"\t\tThe default is to use %d extra bits internally.\n",
/*
"\t-0\tA forward FFT (default), meaning that the coefficients are\n"
"\t\tgiven by e^{-j 2 pi k/N n }.\n"
"\t-1\tAn inverse FFT, meaning that the coefficients are\n"
"\t\tgiven by e^{ j 2 pi k/N n }.\n");
"\t\tgiven by e^{ j 2 pi k/N n }.\n",
*/
DEF_XTRACBITS, DEF_COREDIR, DEF_NBITSIN, DEF_XTRAPBITS);
}
 
// Features still needed:
1824,15 → 1951,20
// Interactivity.
int main(int argc, char **argv) {
int fftsize = -1, lgsize = -1;
int nbitsin = 16, xtracbits = 4, nummpy=0, nonmpy=2;
int nbitsout, maxbitsout = -1, xtrapbits=0;
bool bitreverse = true, inverse=false, interactive = false,
verbose_flag = false;
int nbitsin = DEF_NBITSIN, xtracbits = DEF_XTRACBITS,
nummpy=DEF_NMPY, nonmpy=2;
int nbitsout, maxbitsout = -1, xtrapbits=DEF_XTRAPBITS;
bool bitreverse = true, inverse=false,
verbose_flag = false, single_clock = false,
real_fft = false;
FILE *vmain;
std::string coredir = "fft-core", cmdline = "";
std::string coredir = DEF_COREDIR, cmdline = "";
ROUND_T rounding = RND_CONVERGENT;
// ROUND_T rounding = RND_HALFUP;
 
bool dbg = false;
int dbgstage = 128;
 
if (argc <= 1)
usage();
 
1846,11 → 1978,13
if ('-' == argv[argn][0]) {
for(int j=1; (argv[argn][j])&&(j<100); j++) {
switch(argv[argn][j]) {
/*
case '0':
inverse = false;
break;
*/
case '1':
inverse = true;
single_clock = true;
break;
case 'c':
if (argn+1 >= argc) {
1868,6 → 2002,15
coredir = argv[++argn];
j += 200;
break;
case 'D':
dbg = true;
if (argn+1 >= argc) {
printf("ERR: No debug stage number given!\n\n");
usage(); exit(-1);
}
dbgstage = atoi(argv[++argn]);
j+= 200;
break;
case 'f':
if (argn+1 >= argc) {
printf("ERR: No FFT Size given!\n\n");
1898,7 → 2041,7
exit(0);
break;
case 'i':
interactive = true;
inverse = true;
break;
case 'm':
if (argn+1 >= argc) {
1924,6 → 2067,9
nummpy = atoi(argv[++argn]);
j += 200;
break;
case 'r':
real_fft = true;
break;
case 'S':
bitreverse = true;
break;
1953,6 → 2099,18
}
}
 
if (real_fft) {
printf("The real FFT option is not implemented yet, but still on\nmy to do list. Please try again later.\n");
exit(0);
} if (single_clock) {
printf("The single clock FFT option is not implemented yet, but still on\nmy to do list. Please try again later.\n");
exit(0);
} if (!bitreverse) {
printf("WARNING: While I can skip the bit reverse stage, the code to do\n");
printf("an inverse FFT on a bit--reversed input has not yet been\n");
printf("built.\n");
}
 
if ((lgsize < 0)&&(fftsize > 1)) {
for(lgsize=1; (1<<lgsize) < fftsize; lgsize++)
;
2104,7 → 2262,8
fprintf(vmain, "//\n");
fprintf(vmain, "module %sfftmain(i_clk, i_rst, i_ce,\n", (inverse)?"i":"");
fprintf(vmain, "\t\ti_left, i_right,\n");
fprintf(vmain, "\t\to_left, o_right, o_sync);\n");
fprintf(vmain, "\t\to_left, o_right, o_sync%s);\n",
(dbg)?", o_dbg":"");
fprintf(vmain, "\tparameter\tIWIDTH=%d, OWIDTH=%d, LGWIDTH=%d;\n", nbitsin, nbitsout, lgsize);
assert(lgsize > 0);
fprintf(vmain, "\tinput\t\ti_clk, i_rst, i_ce;\n");
2111,6 → 2270,8
fprintf(vmain, "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n");
fprintf(vmain, "\toutput\treg\t[(2*OWIDTH-1):0]\to_left, o_right;\n");
fprintf(vmain, "\toutput\treg\t\t\to_sync;\n");
if (dbg)
fprintf(vmain, "\toutput\twire\t[33:0]\t\to_dbg;\n");
fprintf(vmain, "\n\n");
 
fprintf(vmain, "\t// Outputs of the FFT, ready for bit reversal.\n");
2124,9 → 2285,9
fprintf(vmain, "\tinitial br_start = 1\'b0;\n");
fprintf(vmain, "\talways @(posedge i_clk)\n");
fprintf(vmain, "\t\tif (i_rst)\n");
fprintf(vmain, "\t\t\tbr_start <= 1'b0;\n");
fprintf(vmain, "\t\t\tbr_start <= 1\'b0;\n");
fprintf(vmain, "\t\telse if (i_ce)\n");
fprintf(vmain, "\t\t\tbr_start <= 1'b1;\n");
fprintf(vmain, "\t\t\tbr_start <= 1\'b1;\n");
}
fprintf(vmain, "\n\n");
fprintf(vmain, "\tdblstage\t#(IWIDTH)\tstage_2(i_clk, i_rst, i_ce,\n");
2134,19 → 2295,25
fprintf(vmain, "\n\n");
} else {
int nbits = nbitsin, dropbit=0;
int obits = nbits+1+xtrapbits;
 
if ((maxbitsout > 0)&&(obits > maxbitsout))
obits = maxbitsout;
 
// Always do a first stage
fprintf(vmain, "\n\n");
fprintf(vmain, "\twire\t\tw_s%d, w_os%d;\n", fftsize, fftsize);
fprintf(vmain, "\twire\t[%d:0]\tw_e%d, w_o%d;\n", 2*(nbits+1+xtrapbits)-1, fftsize, fftsize);
fprintf(vmain, "\t%sfftstage_e%d\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,%d,0)\tstage_e%d(i_clk, i_rst, i_ce,\n",
fprintf(vmain, "\twire\t[%d:0]\tw_e%d, w_o%d;\n", 2*(obits+xtrapbits)-1, fftsize, fftsize);
fprintf(vmain, "\t%sfftstage_e%d%s\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,%d,0)\tstage_e%d(i_clk, i_rst, i_ce,\n",
(inverse)?"i":"", fftsize,
xtracbits, nbits+1+xtrapbits,
((dbg)&&(dbgstage == fftsize))?"_dbg":"",
xtracbits, obits+xtrapbits,
lgsize, lgtmp-2, lgdelay(nbits,xtracbits),
fftsize);
fprintf(vmain, "\t\t\t(~i_rst), i_left, w_e%d, w_s%d);\n", fftsize, fftsize);
fprintf(vmain, "\t\t\t(~i_rst), i_left, w_e%d, w_s%d%s);\n", fftsize, fftsize, ((dbg)&&(dbgstage == fftsize))?", o_dbg":"");
fprintf(vmain, "\t%sfftstage_o%d\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,%d,0)\tstage_o%d(i_clk, i_rst, i_ce,\n",
(inverse)?"i":"", fftsize,
xtracbits, nbits+1+xtrapbits,
xtracbits, obits+xtrapbits,
lgsize, lgtmp-2, lgdelay(nbits,xtracbits),
fftsize);
fprintf(vmain, "\t\t\t(~i_rst), i_right, w_o%d, w_os%d);\n", fftsize, fftsize);
2166,8 → 2333,10
fname += "fftstage_e";
sprintf(numstr, "%d", fftsize);
fname += numstr;
if ((dbg)&&(dbgstage == fftsize))
fname += "_dbg";
fname += ".v";
build_stage(fname.c_str(), fftsize/2, 0, nbits, inverse, xtracbits, mpystage); // Even stage
build_stage(fname.c_str(), coredir.c_str(), fftsize/2, 0, nbits, inverse, xtracbits, mpystage, (dbg)&&(dbgstage == fftsize)); // Even stage
 
fname = coredir + "/";
if (inverse) fname += "i";
2175,15 → 2344,15
sprintf(numstr, "%d", fftsize);
fname += numstr;
fname += ".v";
build_stage(fname.c_str(), fftsize/2, 1, nbits, inverse, xtracbits, mpystage); // Odd stage
build_stage(fname.c_str(), coredir.c_str(), fftsize/2, 1, nbits, inverse, xtracbits, mpystage, false); // Odd stage
}
 
nbits += 1; // New number of input bits
nbits = obits; // New number of input bits
tmp_size >>= 1; lgtmp--;
dropbit = 0;
fprintf(vmain, "\n\n");
while(tmp_size >= 8) {
int obits = nbits+((dropbit)?0:1);
obits = nbits+((dropbit)?0:1);
 
if ((maxbitsout > 0)&&(obits > maxbitsout))
obits = maxbitsout;
2190,12 → 2359,13
 
fprintf(vmain, "\twire\t\tw_s%d, w_os%d;\n", tmp_size, tmp_size);
fprintf(vmain, "\twire\t[%d:0]\tw_e%d, w_o%d;\n", 2*(obits+xtrapbits)-1, tmp_size, tmp_size);
fprintf(vmain, "\t%sfftstage_e%d\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_e%d(i_clk, i_rst, i_ce,\n",
fprintf(vmain, "\t%sfftstage_e%d%s\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_e%d(i_clk, i_rst, i_ce,\n",
(inverse)?"i":"", tmp_size,
((dbg)&&(dbgstage == tmp_size))?"_dbg":"",
nbits+xtrapbits, nbits+xtracbits+xtrapbits, obits+xtrapbits,
lgsize, lgtmp-2, lgdelay(nbits+xtrapbits,xtracbits), (dropbit)?0:0,
tmp_size);
fprintf(vmain, "\t\t\t\t\t\tw_s%d, w_e%d, w_e%d, w_s%d);\n", tmp_size<<1, tmp_size<<1, tmp_size, tmp_size);
fprintf(vmain, "\t\t\t\t\t\tw_s%d, w_e%d, w_e%d, w_s%d%s);\n", tmp_size<<1, tmp_size<<1, tmp_size, tmp_size, ((dbg)&&(dbgstage == tmp_size))?", o_dbg":"");
fprintf(vmain, "\t%sfftstage_o%d\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_o%d(i_clk, i_rst, i_ce,\n",
(inverse)?"i":"", tmp_size,
nbits+xtrapbits, nbits+xtracbits+xtrapbits, obits+xtrapbits,
2216,10 → 2386,12
fname += "fftstage_e";
sprintf(numstr, "%d", tmp_size);
fname += numstr;
if ((dbg)&&(dbgstage == tmp_size))
fname += "_dbg";
fname += ".v";
build_stage(fname.c_str(), tmp_size/2, 0,
build_stage(fname.c_str(), coredir.c_str(), tmp_size/2, 0,
nbits+xtrapbits, inverse, xtracbits,
mpystage); // Even stage
mpystage, ((dbg)&&(dbgstage == tmp_size))); // Even stage
 
fname = coredir + "/";
if (inverse) fname += "i";
2227,9 → 2399,9
sprintf(numstr, "%d", tmp_size);
fname += numstr;
fname += ".v";
build_stage(fname.c_str(), tmp_size/2, 1,
build_stage(fname.c_str(), coredir.c_str(), tmp_size/2, 1,
nbits+xtrapbits, inverse, xtracbits,
mpystage); // Odd stage
mpystage, false); // Odd stage
}
 
 
2239,7 → 2411,7
}
 
if (tmp_size == 4) {
int obits = nbits+((dropbit)?0:1);
obits = nbits+((dropbit)?0:1);
 
if ((maxbitsout > 0)&&(obits > maxbitsout))
obits = maxbitsout;
2246,9 → 2418,12
 
fprintf(vmain, "\twire\t\tw_s4, w_os4;\n");
fprintf(vmain, "\twire\t[%d:0]\tw_e4, w_o4;\n", 2*(obits+xtrapbits)-1);
fprintf(vmain, "\tqtrstage\t#(%d,%d,%d,0,%d,%d)\tstage_e4(i_clk, i_rst, i_ce,\n",
nbits+xtrapbits, obits+xtrapbits, lgsize, (inverse)?1:0, (dropbit)?0:0);
fprintf(vmain, "\t\t\t\t\t\tw_s8, w_e8, w_e4, w_s4);\n");
fprintf(vmain, "\tqtrstage%s\t#(%d,%d,%d,0,%d,%d)\tstage_e4(i_clk, i_rst, i_ce,\n",
((dbg)&&(dbgstage==4))?"_dbg":"",
nbits+xtrapbits, obits+xtrapbits, lgsize,
(inverse)?1:0, (dropbit)?0:0);
fprintf(vmain, "\t\t\t\t\t\tw_s8, w_e8, w_e4, w_s4%s);\n",
((dbg)&&(dbgstage==4))?", o_dbg":"");
fprintf(vmain, "\tqtrstage\t#(%d,%d,%d,1,%d,%d)\tstage_o4(i_clk, i_rst, i_ce,\n",
nbits+xtrapbits, obits+xtrapbits, lgsize, (inverse)?1:0, (dropbit)?0:0);
fprintf(vmain, "\t\t\t\t\t\tw_s8, w_o8, w_o4, w_os4);\n");
2258,7 → 2433,7
}
 
{
int obits = nbits+((dropbit)?0:1);
obits = nbits+((dropbit)?0:1);
if (obits > nbitsout)
obits = nbitsout;
if ((maxbitsout>0)&&(obits > maxbitsout))
2282,8 → 2457,8
fprintf(vmain, "\tinitial\tr_br_started = 1\'b0;\n");
fprintf(vmain, "\talways @(posedge i_clk)\n");
fprintf(vmain, "\t\tif (i_rst)\n");
fprintf(vmain, "\t\t\tr_br_started <= 1'b0;\n");
fprintf(vmain, "\t\telse\n");
fprintf(vmain, "\t\t\tr_br_started <= 1\'b0;\n");
fprintf(vmain, "\t\telse if (i_ce)\n");
fprintf(vmain, "\t\t\tr_br_started <= r_br_started || w_s2;\n");
fprintf(vmain, "\tassign\tbr_start = r_br_started || w_s2;\n");
}
2305,11 → 2480,18
 
fprintf(vmain, "\n\n");
fprintf(vmain, "\t// Last clock: Register our outputs, we\'re done.\n");
fprintf(vmain, "\tinitial\to_sync = 1\'b0;\n");
fprintf(vmain, "\talways @(posedge i_clk)\n");
fprintf(vmain, "\t\tif (i_rst)\n");
fprintf(vmain, "\t\t\to_sync <= 1\'b0;\n");
fprintf(vmain, "\t\telse if (i_ce)\n");
fprintf(vmain, "\t\t\to_sync <= br_sync;\n");
fprintf(vmain, "\n");
fprintf(vmain, "\talways @(posedge i_clk)\n");
fprintf(vmain, "\t\tif (i_ce)\n");
fprintf(vmain, "\t\tbegin\n");
fprintf(vmain, "\t\t\to_left <= br_o_left;\n");
fprintf(vmain, "\t\t\to_right <= br_o_right;\n");
fprintf(vmain, "\t\t\to_sync <= br_sync;\n");
fprintf(vmain, "\t\tend\n");
fprintf(vmain, "\n\n");
fprintf(vmain, "endmodule\n");
2329,11 → 2511,18
fname = coredir + "/shiftaddmpy.v";
build_multiply(fname.c_str());
 
if ((dbg)&&(dbgstage == 4)) {
fname = coredir + "/qtrstage_dbg.v";
build_quarters(fname.c_str(), rounding, true);
}
fname = coredir + "/qtrstage.v";
build_quarters(fname.c_str(), rounding);
build_quarters(fname.c_str(), rounding, false);
 
fname = coredir + "/dblstage.v";
build_dblstage(fname.c_str(), rounding);
if ((dbg)&&(dbgstage == 2))
fname = coredir + "/dblstage_dbg.v";
else
fname = coredir + "/dblstage.v";
build_dblstage(fname.c_str(), rounding, (dbg)&&(dbgstage==2));
 
if (bitreverse) {
fname = coredir + "/dblreverse.v";
/sw/Makefile
66,13 → 66,13
#
.PHONY: fft
fft: fftgen
./fftgen -f 2048 -n 16 -p 1
./fftgen -f 2048 -n 16 -p 4
cd $(CORED)/; verilator -cc fftmain.v
cd $(OBJDR); make -f Vfftmain.mk
 
.PHONY: ifft
ifft: fftgen
./fftgen -f 2048 -1 -n 22 -p 4
./fftgen -f 2048 -i -n 22 -p 4
cd $(CORED)/; verilator -cc ifftmain.v
cd $(OBJDR); make -f Vifftmain.mk
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.