URL
https://opencores.org/ocsvn/dblclockfft/dblclockfft/trunk
Subversion Repositories dblclockfft
Compare Revisions
- This comparison shows the changes necessary to convert path
/
- from Rev 25 to Rev 26
- ↔ Reverse comparison
Rev 25 → Rev 26
/dblclockfft/trunk/bench/cpp/fft_tb.m
8,8 → 8,12
% Reshape the matrix into one line per FFT |
% Assume an FFT length of 2048 |
ftlen = 2048; |
ndat = reshape(datc, ftlen, length(datc)/ftlen); |
% ftlen = 128; |
ndat = reshape(datc, ftlen*2, length(datc)/(ftlen*2)); |
|
truth = ndat((ftlen+1):(2*ftlen), :); |
output = ndat(1:ftlen,:); |
|
% Create a time axis, for use in plotting if desired |
tm = 0:(ftlen-1); |
|
/dblclockfft/trunk/bench/cpp/hwbfly_tb.cpp
109,6 → 109,7
m_bfly->i_right = rht; |
m_bfly->i_aux = aux & 1; |
|
m_bfly->i_ce = 1; |
tick(); |
|
if ((m_bfly->o_aux)&&(!m_lastaux)) |
/dblclockfft/trunk/bench/cpp/dblrev_tb.cpp
42,8 → 42,12
#include "Vdblreverse.h" |
#include "verilated.h" |
|
#define FFTBITS 4 |
#define FFTMASK ((1<<(FFTBITS))-1) |
#define FFTBITS 5 |
#define FFTSIZE (1<<(FFTBITS)) |
#define FFTMASK (FFTSIZE-1) |
#define DATALEN (1<<(FFTBITS+1)) |
#define DATAMSK (DATALEN-1) |
#define PAGEMSK (FFTSIZE) |
|
void tick(Vdblreverse *dblrev) { |
dblrev->i_clk = 0; |
50,6 → 54,8
dblrev->eval(); |
dblrev->i_clk = 1; |
dblrev->eval(); |
|
dblrev->i_ce = 0; |
} |
|
void reset(Vdblreverse *dblrev) { |
61,8 → 67,9
tick(dblrev); |
} |
|
unsigned long bitrev(int nbits, unsigned long val) { |
int r = 0; |
unsigned long bitrev(const int nbits, const unsigned long vl) { |
unsigned long r = 0; |
unsigned long val = vl; |
|
for(int k=0; k<nbits; k++) { |
r <<= 1; |
77,35 → 84,84
Verilated::commandArgs(argc, argv); |
Vdblreverse *dblrev = new Vdblreverse; |
int syncd = 0; |
unsigned long datastore[DATALEN], dataidx=0; |
|
reset(dblrev); |
|
for(int k=0; k<64; k++) { |
printf("FFTSIZE = %08x\n", FFTSIZE); |
printf("FFTMASK = %08x\n", FFTMASK); |
printf("DATALEN = %08x\n", DATALEN); |
printf("DATAMSK = %08x\n", DATAMSK); |
|
for(int k=0; k<4*(FFTSIZE); k++) { |
dblrev->i_ce = 1; |
dblrev->i_in_0 = 2*k; |
dblrev->i_in_1 = 2*k+1; |
datastore[(dataidx++)&(DATAMSK)] = dblrev->i_in_0; |
datastore[(dataidx++)&(DATAMSK)] = dblrev->i_in_1; |
tick(dblrev); |
|
printf("k=%3d: IN = %6lx : %6lx, OUT = %6lx : %6lx, SYNC = %d\n", |
printf("k=%3d: IN = %6lx : %6lx, OUT = %6lx : %6lx, SYNC = %d\t(%x)\n", |
k, dblrev->i_in_0, dblrev->i_in_1, |
dblrev->o_out_0, dblrev->o_out_1, dblrev->o_sync); |
dblrev->o_out_0, dblrev->o_out_1, dblrev->o_sync, |
dblrev->v__DOT__iaddr); |
|
if ((k>0)&&(((0==(k&(FFTMASK>>1)))?1:0) != dblrev->o_sync)) { |
fprintf(stderr, "FAIL, BAD SYNC\n"); |
fprintf(stdout, "FAIL, BAD SYNC\n"); |
exit(-1); |
} else if (dblrev->o_sync) |
} else if (dblrev->o_sync) { |
syncd = 1; |
} |
if ((syncd)&&((dblrev->o_out_0&FFTMASK) != bitrev(FFTBITS, 2*k))) { |
fprintf(stderr, "FAIL: BITREV of k (%2x) = %2lx, not %2lx\n", |
fprintf(stdout, "FAIL: BITREV.0 of k (%2x) = %2lx, not %2lx\n", |
k, dblrev->o_out_0, bitrev(FFTBITS, 2*k)); |
exit(-1); |
// exit(-1); |
} |
|
if ((syncd)&&((dblrev->o_out_1&FFTMASK) != bitrev(FFTBITS, 2*k+1))) { |
fprintf(stderr, "FAIL: BITREV of k (%2x) = %2lx, not %2lx\n", |
fprintf(stdout, "FAIL: BITREV.1 of k (%2x) = %2lx, not %2lx\n", |
k, dblrev->o_out_1, bitrev(FFTBITS, 2*k+1)); |
// exit(-1); |
} |
} |
|
for(int k=0; k<4*(FFTSIZE); k++) { |
dblrev->i_ce = 1; |
dblrev->i_in_0 = rand() & 0x0ffffff; |
dblrev->i_in_1 = rand() & 0x0ffffff; |
datastore[(dataidx++)&(DATAMSK)] = dblrev->i_in_0; |
datastore[(dataidx++)&(DATAMSK)] = dblrev->i_in_1; |
tick(dblrev); |
|
printf("k=%3d: IN = %6lx : %6lx, OUT = %6lx : %6lx, SYNC = %d\n", |
k, dblrev->i_in_0, dblrev->i_in_1, |
dblrev->o_out_0, dblrev->o_out_1, dblrev->o_sync); |
|
if ((k>0)&&(((0==(k&(FFTMASK>>1)))?1:0) != dblrev->o_sync)) { |
fprintf(stdout, "FAIL, BAD SYNC\n"); |
exit(-1); |
} else if (dblrev->o_sync) |
syncd = 1; |
if ((syncd)&&(dblrev->o_out_0 != datastore[(((dataidx-2-FFTSIZE)&PAGEMSK) + bitrev(FFTBITS, (dataidx-FFTSIZE-2)&FFTMASK))])) { |
fprintf(stdout, "FAIL: BITREV.0 of k (%2x) = %2lx, not %2lx (expected %lx -> %lx)\n", |
k, dblrev->o_out_0, |
datastore[(((dataidx-2-FFTSIZE)&PAGEMSK) |
+ bitrev(FFTBITS, (dataidx-FFTSIZE-2)&FFTMASK))], |
(dataidx-2)&DATAMSK, |
(((dataidx-2)&PAGEMSK) |
+ bitrev(FFTBITS, (dataidx-FFTSIZE-2)&FFTMASK))); |
// exit(-1); |
} |
|
if ((syncd)&&(dblrev->o_out_1 != datastore[(((dataidx-2-FFTSIZE)&PAGEMSK) + bitrev(FFTBITS, (dataidx-FFTSIZE-1)&FFTMASK))])) { |
fprintf(stdout, "FAIL: BITREV.1 of k (%2x) = %2lx, not %2lx (expected %lx)\n", |
k, dblrev->o_out_1, |
datastore[(((dataidx-2-FFTSIZE)&PAGEMSK) |
+ bitrev(FFTBITS, (dataidx-FFTSIZE-1)&FFTMASK))], |
(((dataidx-1)&PAGEMSK) |
+ bitrev(FFTBITS, (dataidx-FFTSIZE-1)&FFTMASK))); |
// exit(-1); |
} |
} |
|
delete dblrev; |
/dblclockfft/trunk/bench/cpp/fft_tb.cpp
49,16 → 49,30
|
#define LGWIDTH 11 |
#define IWIDTH 16 |
// #define OWIDTH 16 |
#define OWIDTH 22 |
|
#define NFTLOG 8 |
#define FFTLEN (1<<LGWIDTH) |
|
unsigned long bitrev(const int nbits, const unsigned long vl) { |
unsigned long r = 0; |
unsigned long val = vl; |
|
for(int k=0; k<nbits; k++) { |
r<<= 1; |
r |= (val & 1); |
val >>= 1; |
} |
|
return r; |
} |
|
class FFT_TB { |
public: |
Vfftmain *m_fft; |
long m_data[FFTLEN], m_log[NFTLOG*FFTLEN]; |
int m_iaddr, m_oaddr, m_ntest; |
int m_iaddr, m_oaddr, m_ntest, m_logbase; |
FILE *m_dumpfp; |
fftw_plan m_plan; |
double *m_fft_buf; |
82,6 → 96,17
m_fft->eval(); |
m_fft->i_clk = 1; |
m_fft->eval(); |
|
/* |
int nrpt = (rand()&0x01f) + 1; |
m_fft->i_ce = 0; |
for(int i=0; i<nrpt; i++) { |
m_fft->i_clk = 0; |
m_fft->eval(); |
m_fft->i_clk = 1; |
m_fft->eval(); |
} |
*/ |
} |
|
void reset(void) { |
91,7 → 116,7
m_fft->i_rst = 0; |
tick(); |
|
m_iaddr = m_oaddr = 0; |
m_iaddr = m_oaddr = m_logbase = 0; |
m_syncd = false; |
} |
|
106,8 → 131,9
long *lp; |
|
// Fill up our test array from the log array |
// printf("%3d : CHECK: %8d %5x\n", m_ntest, m_iaddr, m_iaddr); |
dp = m_fft_buf; lp = &m_log[(m_iaddr-FFTLEN*3)&((NFTLOG*FFTLEN-1)&(-FFTLEN))]; |
printf("%3d : CHECK: %8d %5x m_log[-%x=%x]\n", m_ntest, m_iaddr, m_iaddr, |
m_logbase, (m_iaddr-m_logbase)&((NFTLOG*FFTLEN-1)&(-FFTLEN))); |
dp = m_fft_buf; lp = &m_log[(m_iaddr-m_logbase)&((NFTLOG*FFTLEN-1)&(-FFTLEN))]; |
for(int i=0; i<FFTLEN; i++) { |
long tv = *lp++; |
|
123,8 → 149,9
|
// Let's measure ... are we the zero vector? If not, how close? |
dp = m_fft_buf; |
for(int i=0; i<FFTLEN; i++) |
isq += (*dp) * (*dp); |
for(int i=0; i<FFTLEN*2; i++) { |
isq += (*dp) * (*dp); dp++; |
} |
|
fftw_execute(m_plan); |
|
131,17 → 158,10
// Let's load up the output we received into vout |
dp = vout; |
for(int i=0; i<FFTLEN; i++) { |
long tv = m_data[i]; |
|
// printf("OUT[%4d = %4x] = ", i, i); |
// printf("%12lx = ", tv); |
*dp = sbits(tv >> OWIDTH, OWIDTH); |
// printf("%10.1f + ", *dp); |
*dp = rdata(i); |
osq += (*dp) * (*dp); dp++; |
*dp = sbits(tv, OWIDTH); |
// printf("%10.1f j", *dp); |
*dp = idata(i); |
osq += (*dp) * (*dp); dp++; |
// printf(" <-> %12.1f %12.1f\n", m_fft_buf[2*i], m_fft_buf[2*i+1]); |
} |
|
|
157,6 → 177,19
|
double xisq = 0.0; |
sp = m_fft_buf; dp = vout; |
|
if ((true)&&(m_dumpfp)) { |
double tmp[FFTLEN*2], nscl; |
|
if (fabs(scale) < 1e-4) |
nscl = 1.0; |
else |
nscl = scale; |
for(int i=0; i<FFTLEN*2; i++) |
tmp[i] = m_fft_buf[i] * nscl; |
fwrite(tmp, sizeof(double), FFTLEN*2, m_dumpfp); |
} |
|
for(int i=0; i<FFTLEN*2; i++) { |
double vl = (*sp++) * scale - (*dp++); |
xisq += vl * vl; |
186,24 → 219,31
tick(); |
|
if (m_fft->o_sync) { |
if (!m_syncd) { |
m_logbase = m_iaddr; |
} // else printf("RESYNC AT %lx\n", m_fft->m_tickcount); |
m_oaddr &= (-1<<LGWIDTH); |
m_syncd = true; |
} else m_oaddr += 2; |
|
printf("%8x,%5d: %08x,%08x -> %011lx,%011lx" |
// "\t%011lx,%011lx" |
"\t%011lx,%011lx" |
printf("%8x,%5d: %08x,%08x -> %011lx,%011lx", |
m_iaddr, m_oaddr, |
lft, rht, m_fft->o_left, m_fft->o_right); |
printf( // "\t%011lx,%011lx" |
"\t%3x" |
"\t%011lx,%011lx" // w_e128, w_o128 |
// "\t%011lx,%011lx" // w_e4, w_o4 |
// "\t%06x,%06x" |
// "\t%06x,%06x" |
// "\t%011lx,%06x,%06x" |
"\t%011lx,%06x,%06x" |
"\t%06x,%06x,%06x,%06x" |
"\t%011lx,%011lx" |
" %s%s%s%s%s%s%s%s%s%s%s %s%s\n", |
m_iaddr, m_oaddr, |
lft, rht, m_fft->o_left, m_fft->o_right, |
m_fft->v__DOT__w_e4, |
m_fft->v__DOT__w_o4, |
"\t%011lx,%06x,%06x" // ob_a, ob_b_r, ob_b_i |
"\t%06x,%06x,%06x,%06x", // o_out_xx |
// "\t%011lx,%011lx" |
m_fft->v__DOT__revstage__DOT__iaddr, |
m_fft->v__DOT__w_e128, |
m_fft->v__DOT__w_o128, |
// m_fft->v__DOT__w_e4, |
// m_fft->v__DOT__w_o4, |
// m_fft->v__DOT__stage_e512__DOT__ib_a, |
// m_fft->v__DOT__stage_e512__DOT__ib_b, |
// m_fft->v__DOT__stage_e256__DOT__ib_a, |
233,13 → 273,23
m_fft->v__DOT__stage_2__DOT__o_out_0r, |
m_fft->v__DOT__stage_2__DOT__o_out_0i, |
m_fft->v__DOT__stage_2__DOT__o_out_1r, |
m_fft->v__DOT__stage_2__DOT__o_out_1i, |
m_fft->v__DOT__br_o_left, |
m_fft->v__DOT__br_o_right, |
(m_fft->v__DOT__w_s2048)?"S":"-", |
(m_fft->v__DOT__w_s1024)?"S":"-", |
(m_fft->v__DOT__w_s512)?"S":"-", |
(m_fft->v__DOT__w_s256)?"S":"-", |
m_fft->v__DOT__stage_2__DOT__o_out_1i); |
/* |
printf(" DBG:%c%c:%08x [%6d,%6d]", |
(m_fft->o_dbg&(1l<<33))?'T':' ', |
(m_fft->o_dbg&(1l<<32))?'C':' ', |
(unsigned)(m_fft->o_dbg&((-1l<<32)-1)), |
((int)(m_fft->o_dbg))>>16, |
(((unsigned)(m_fft->o_dbg&0x0ffff)) |
|((m_fft->o_dbg&0x08000)?(-1<<16):0))); |
*/ |
printf(" %s%s%s%s%s%s%s %s%s\n", |
// m_fft->v__DOT__br_o_left, |
// m_fft->v__DOT__br_o_right, |
// (m_fft->v__DOT__w_s2048)?"S":"-", |
// (m_fft->v__DOT__w_s1024)?"S":"-", |
// (m_fft->v__DOT__w_s512)?"S":"-", |
// (m_fft->v__DOT__w_s256)?"S":"-", |
(m_fft->v__DOT__w_s128)?"S":"-", |
(m_fft->v__DOT__w_s64)?"S":"-", |
(m_fft->v__DOT__w_s32)?"S":"-", |
276,11 → 326,17
} |
|
double rdata(int addr) { |
return (double)sbits(m_data[addr&(FFTLEN-1)]>>OWIDTH, OWIDTH); |
int index = addr & (FFTLEN-1); |
|
// index = bitrev(LGWIDTH, index); |
return (double)sbits(m_data[index]>>OWIDTH, OWIDTH); |
} |
|
double idata(int addr) { |
return (double)sbits(m_data[addr&(FFTLEN-1)], OWIDTH); |
int index = addr & (FFTLEN-1); |
|
// index = bitrev(LGWIDTH, index); |
return (double)sbits(m_data[index], OWIDTH); |
} |
|
void dump(FILE *fp) { |
319,6 → 375,96
fft->reset(); |
fft->dump(fpout); |
|
// 1. |
fft->test(0.0, 0.0, 32767.0, 0.0); |
for(int k=0; k<FFTLEN/2-1; k++) |
fft->test(0.0,0.0,0.0,0.0); |
|
// 2. |
fft->test(32767.0, 0.0, 32767.0, 0.0); |
for(int k=0; k<FFTLEN/2-1; k++) |
fft->test(0.0,0.0,0.0,0.0); |
|
// 3. |
fft->test(0.0,0.0,0.0,0.0); |
fft->test(32767.0, 0.0, 0.0, 0.0); |
for(int k=0; k<FFTLEN/2-1; k++) |
fft->test(0.0,0.0,0.0,0.0); |
|
// 4. |
for(int k=0; k<8; k++) |
fft->test(32767.0, 0.0, 32767.0, 0.0); |
for(int k=8; k<FFTLEN/2; k++) |
fft->test(0.0,0.0,0.0,0.0); |
|
// 5. |
if (FFTLEN/2 >= 16) { |
for(int k=0; k<16; k++) |
fft->test(32767.0, 0.0, 32767.0, 0.0); |
for(int k=16; k<FFTLEN/2; k++) |
fft->test(0.0,0.0,0.0,0.0); |
} |
|
// 6. |
if (FFTLEN/2 >= 32) { |
for(int k=0; k<32; k++) |
fft->test(32767.0, 0.0, 32767.0, 0.0); |
for(int k=32; k<FFTLEN/2; k++) |
fft->test(0.0,0.0,0.0,0.0); |
} |
|
// 7. |
if (FFTLEN/2 >= 64) { |
for(int k=0; k<64; k++) |
fft->test(32767.0, 0.0, 32767.0, 0.0); |
for(int k=64; k<FFTLEN/2; k++) |
fft->test(0.0,0.0,0.0,0.0); |
} |
|
if (FFTLEN/2 >= 128) { |
for(int k=0; k<128; k++) |
fft->test(32767.0, 0.0, 32767.0, 0.0); |
for(int k=128; k<FFTLEN/2; k++) |
fft->test(0.0,0.0,0.0,0.0); |
} |
|
if (FFTLEN/2 >= 256) { |
for(int k=0; k<256; k++) |
fft->test(32767.0, 0.0, 32767.0, 0.0); |
for(int k=256; k<FFTLEN/2; k++) |
fft->test(0.0,0.0,0.0,0.0); |
} |
|
if (FFTLEN/2 >= 512) { |
for(int k=0; k<256+128; k++) |
fft->test(32767.0, 0.0, 32767.0, 0.0); |
for(int k=256+128; k<FFTLEN/2; k++) |
fft->test(0.0,0.0,0.0,0.0); |
} |
|
/* |
for(int k=0; k<FFTLEN/2; k++) |
fft->test(0.0,0.0,0.0,0.0); |
|
for(int k=0; k<FFTLEN/2; k++) |
fft->test(0.0,0.0,0.0,0.0); |
|
for(int k=0; k<FFTLEN/2; k++) |
fft->test(0.0,0.0,0.0,0.0); |
|
for(int k=0; k<FFTLEN/2; k++) |
fft->test(0.0,0.0,0.0,0.0); |
|
for(int k=0; k<FFTLEN/2; k++) |
fft->test(0.0,0.0,0.0,0.0); |
|
for(int k=0; k<FFTLEN/2; k++) |
fft->test(0.0,0.0,0.0,0.0); |
*/ |
|
#ifndef NO_JUNK |
// 7. |
|
// 1 -> 0x0001 |
// 2 -> 0x0002 |
// 4 -> 0x0004 |
352,6 → 498,10
// 8192 -> 0xe000 |
// 16384 -> 0xc000 |
// 32768 -> 0x8000 |
fft->test(0.0,0.0,16384.0,0.0); |
for(int k=0; k<FFTLEN/2-1; k++) |
fft->test(0.0,0.0,0.0,0.0); |
|
for(int v=1; v<=32768; v<<=1) for(int k=0; k<FFTLEN/2; k++) |
fft->test(-(double)v,0.0,-(double)v,0.0); |
// 1 -> 0x000040 CORRECT!! |
431,6 → 581,16
for(int k=0; k<FFTLEN/2-1; k++) |
fft->test(0.0,0.0,0.0,0.0); |
|
// 72. And another one on the next clock (FAILS, ugly) |
fft->test(0.0, 0.0, 8192.0, 0.0); |
for(int k=0; k<FFTLEN/2-1; k++) |
fft->test(0.0,0.0,0.0,0.0); |
|
// 72. And another one on the next clock (FAILS, ugly) |
fft->test(0.0, 0.0, 512.0, 0.0); |
for(int k=0; k<FFTLEN/2-1; k++) |
fft->test(0.0,0.0,0.0,0.0); |
|
// 73. And an imaginary one on the second clock |
fft->test(0.0, 0.0, 0.0, 16384.0); |
for(int k=0; k<FFTLEN/2-1; k++) |
504,7 → 664,7
sr = sin(W * (2*k+1)) * 4.0; |
fft->test(cl, sl, cr, sr); |
} |
|
#endif |
// 19.--24. And finally, let's clear out our results / buffer |
for(int k=0; k<(FFTLEN/2) * 5; k++) |
fft->test(0.0,0.0,0.0,0.0); |
/dblclockfft/trunk/bench/cpp/fftstage_o2048_tb.cpp
282,6 → 282,18
// Largest negative imaginary value |
for(int k=1; k<FFTSIZE; k+=2) |
ftstage->test((k==1), 0x000010000l); |
// Let's try an impulse |
for(int k=0; k<FFTSIZE; k+=2) |
ftstage->test((k==0), (k==0)?0x020000000l:0l); |
// Now, let's clear out the result |
for(int k=0; k<FFTSIZE; k+=2) |
ftstage->test((k==0), 0x000000000l); |
for(int k=0; k<FFTSIZE; k+=2) |
ftstage->test((k==0), 0x000000000l); |
for(int k=0; k<FFTSIZE; k+=2) |
ftstage->test((k==0), 0x000000000l); |
for(int k=0; k<FFTSIZE; k+=2) |
ftstage->test((k==0), 0x000000000l); |
|
printf("SUCCESS! (Offset = %d)\n", ftstage->m_offset); |
delete ftstage; |
/dblclockfft/trunk/bench/cpp/butterfly_tb.cpp
89,10 → 89,11
// we'll never get an aux=1 output. |
// |
m_bfly->i_rst = 1; |
m_bfly->i_ce = 1; |
m_bfly->i_aux = 1; |
for(int i=0; i<200; i++) |
for(int i=0; i<200; i++) { |
m_bfly->i_ce = 1; |
tick(); |
} |
|
// Now here's the RESET line, so let's see what the test does |
m_bfly->i_rst = 1; |
117,6 → 118,7
m_addr = 0; |
} |
|
m_bfly->i_ce = 1; |
tick(); |
|
if ((m_bfly->o_aux)&&(!m_lastaux)) |
/dblclockfft/trunk/doc/spec.pdf
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream
/dblclockfft/trunk/doc/src/gqtekspec.cls
27,6 → 27,7
\usepackage{datetime} |
\usepackage{graphicx} |
\usepackage[dvips]{pstricks} |
\usepackage{hhline} |
\usepackage{colortbl} |
\newdateformat{headerdate}{\THEYEAR/\twodigit{\THEMONTH}/\twodigit{\THEDAY}} |
\setlength{\hoffset}{0.25in} |
208,9 → 209,9
\vskip 2\baselineskip |
\vspace*{10pt}\vfil |
\begin{minipage}{\textwidth}\raggedleft |
\ifproject{\scalebox{1.2}{\Huge\bfseries\MakeUppercase\@project}} \\\fi |
\ifproject{\Huge\bfseries\MakeUppercase\@project} \\\fi |
\vspace*{15pt} |
\scalebox{1.2}{\Huge\bfseries\MakeUppercase\@title} \\ |
{\Huge\bfseries\MakeUppercase\@title} \\ |
\vskip 10\baselineskip |
\Large \@author \\ |
\ifemail{\Large \@email}\\\fi |
234,13 → 235,13
\@afterindentfalse |
\secdef\@chapter\@schapter} |
\renewcommand\@makechapterhead[1]{% |
\hbox to \textwidth{\hfil\scalebox{1.8}{\Huge\bfseries \thechapter.}}\vskip 10\p@ |
\hbox to \textwidth{\hfil{\Huge\bfseries \thechapter.}}\vskip 10\p@ |
\hbox to \textwidth{\rput(0,0){\psline[linewidth=0.04in](0,0)(\textwidth,0)}}\vskip \p@ |
\hbox to \textwidth{\rput(0,0){\psline[linewidth=0.04in](0,0)(\textwidth,0)}}\vskip 10\p@ |
\hbox to \textwidth{\hfill\scalebox{1.8}{\Huge\bfseries #1}}% |
\hbox to \textwidth{\hfill{\Huge\bfseries #1}}% |
\par\nobreak\vskip 40\p@} |
\renewcommand\@makeschapterhead[1]{% |
\hbox to \textwidth{\hfill\scalebox{1.8}{\Huge\bfseries #1}}% |
\hbox to \textwidth{\hfill{\Huge\bfseries #1}}% |
\par\nobreak\vskip 40\p@} |
% **************************************** |
% * INITIALIZATION * |
275,8 → 276,21
\begin{tabular}{|p{0.5in}|p{1in}|p{1in}|p{2.875in}|}\hline |
\rowcolor[gray]{0.8} Rev. & Date & Author & Description\\\hline\hline} |
{\end{tabular}\clearpage} |
\newenvironment{clocklist}{\begin{tabular}{|p{0.75in}|p{0.5in}|l|l|p{2.875in}|}\hline |
\rowcolor[gray]{0.85} Name & Source & \multicolumn{2}{l|}{Rates (MHz)} & Description \\\hhline{~|~|-|-|~}% |
\rowcolor[gray]{0.85} & & Max & Min & \\\hline\hline}% |
{\end{tabular}} |
\newenvironment{reglist}{\begin{tabular}{|p{0.75in}|p{0.5in}|p{0.5in}|p{0.5in}|p{2.875in}|}\hline |
\rowcolor[gray]{0.85} Name & Address & Width & Access & Description \\\hline\hline}% |
{\end{tabular}} |
\newenvironment{bitlist}{\begin{tabular}{|p{0.5in}|p{0.5in}|p{3.875in}|}\hline |
\rowcolor[gray]{0.85} Bit \# & Access & Description \\\hline\hline}% |
{\end{tabular}} |
\newenvironment{portlist}{\begin{tabular}{|p{0.75in}|p{0.5in}|p{0.75in}|p{3.375in}|}\hline |
\rowcolor[gray]{0.85} Port & Width & Direction & Description \\\hline\hline}% |
{\end{tabular}} |
\newenvironment{wishboneds}{\begin{tabular}{|p{2.5in}|p{2.5in}|}\hline |
\rowcolor[gray]{0.85} Description & Specification \\\hline\hline}% |
{\end{tabular}} |
\newenvironment{preface}{\chapter*{Preface}}{\par\bigskip\bigskip\leftline{\hfill\@author}} |
\endinput |
/dblclockfft/trunk/doc/src/spec.tex
3,7 → 3,7
\title{Specification} |
\author{Dan Gisselquist, Ph.D.} |
\email{dgisselq\at opencores.org} |
\revision{Rev.~0.1} |
\revision{Rev.~0.2} |
\begin{document} |
\pagestyle{gqtekspecplain} |
\titlepage |
24,6 → 24,7
with this program. If not, see \hbox{<http://www.gnu.org/licenses/>} for a copy. |
\end{license} |
\begin{revisionhistory} |
0.2 & 6/2/2015 & Gisselquist & Superficial formatting changes\\\hline |
0.1 & 3/3/2015 & Gisselquist & First Draft \\\hline |
\end{revisionhistory} |
% Revision History |
/dblclockfft/trunk/sw/fftgen.cpp
59,7 → 59,11
#include <ctype.h> |
#include <assert.h> |
|
#define COREDIR "fft-core" |
#define DEF_NBITSIN 16 |
#define DEF_COREDIR "fft-core" |
#define DEF_XTRACBITS 4 |
#define DEF_NMPY 0 |
#define DEF_XTRAPBITS 0 |
|
typedef enum { |
RND_TRUNCATE, RND_FROMZERO, RND_HALFUP, RND_CONVERGENT |
231,7 → 235,7
"\t\twire\t[(OWID-1):0] truncated_value, rounded_up;\n" |
"\t\twire\t\t\tlast_valid_bit, first_lost_bit;\n" |
"\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n" |
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1'b0}}, 1'b1 };\n" |
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n" |
"\t\tassign\tfirst_lost_bit = i_val[(IWID-SHIFT-OWID-1)];\n" |
"\n" |
"\t\talways @(posedge i_clk)\n" |
313,7 → 317,7
"\t\twire\t[(OWID-1):0]\ttruncated_value, rounded_up;\n" |
"\t\twire\t\t\tsign_bit, first_lost_bit;\n" |
"\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n" |
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1'b0}}, 1'b1 };\n" |
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n" |
"\t\tassign\tfirst_lost_bit = i_val[0];\n" |
"\t\tassign\tsign_bit = i_val[(IWID-1)];\n" |
"\n" |
333,7 → 337,7
"\t\twire\t[(OWID-1):0]\ttruncated_value, rounded_up;\n" |
"\t\twire\t\t\tsign_bit, first_lost_bit;\n" |
"\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n" |
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1'b0}}, 1'b1 };\n" |
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n" |
"\t\tassign\tfirst_lost_bit = i_val[(IWID-SHIFT-OWID-1)];\n" |
"\t\tassign\tsign_bit = i_val[(IWID-1)];\n" |
"\n" |
415,7 → 419,7
"\t\twire\t[(OWID-1):0] truncated_value, rounded_up;\n" |
"\t\twire\t\t\tlast_valid_bit, first_lost_bit;\n" |
"\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n" |
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1'b0}}, 1'b1 };\n" |
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n" |
"\t\tassign\tlast_valid_bit = truncated_value[0];\n" |
"\t\tassign\tfirst_lost_bit = i_val[0];\n" |
"\n" |
435,7 → 439,7
"\t\twire\t[(OWID-1):0] truncated_value, rounded_up;\n" |
"\t\twire\t\t\tlast_valid_bit, first_lost_bit;\n" |
"\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n" |
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1'b0}}, 1'b1 };\n" |
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1\'b0}}, 1\'b1 };\n" |
"\t\tassign\tlast_valid_bit = truncated_value[0];\n" |
"\t\tassign\tfirst_lost_bit = i_val[(IWID-SHIFT-OWID-1)];\n" |
"\n" |
460,7 → 464,7
"endmodule\n"); |
} |
|
void build_quarters(const char *fname, ROUND_T rounding) { |
void build_quarters(const char *fname, ROUND_T rounding, bool dbg=false) { |
FILE *fp = fopen(fname, "w"); |
if (NULL == fp) { |
fprintf(stderr, "Could not open \'%s\' for writing\n", fname); |
481,7 → 485,7
fprintf(fp, |
"///////////////////////////////////////////////////////////////////////////\n" |
"//\n" |
"// Filename: qtrstage.v\n" |
"// Filename: qtrstage%s.v\n" |
"// \n" |
"// Project: %s\n" |
"//\n" |
492,11 → 496,11
"//\n" |
"//\n%s" |
"//\n", |
prjname, creator); |
(dbg)?"_dbg":"", prjname, creator); |
fprintf(fp, "%s", cpyleft); |
|
fprintf(fp, |
"module\tqtrstage(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync);\n" |
"module\tqtrstage%s(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync%s);\n" |
"\tparameter IWIDTH=16, OWIDTH=IWIDTH+1;\n" |
"\t// Parameters specific to the core that should be changed when this\n" |
"\t// core is built ... Note that the minimum LGSPAN is 2. Smaller \n" |
506,7 → 510,12
"\tinput\t [(2*IWIDTH-1):0] i_data;\n" |
"\toutput\treg [(2*OWIDTH-1):0] o_data;\n" |
"\toutput\treg o_sync;\n" |
"\t\n"); |
"\t\n", (dbg)?"_dbg":"", (dbg)?", o_dbg":""); |
if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n" |
"\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2*OWIDTH-1):(2*OWIDTH-16)],\n" |
"\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n" |
"\n"); |
} |
fprintf(fp, |
"\treg\t wait_for_sync;\n" |
"\treg\t[3:0] pipeline;\n" |
536,16 → 545,16
fprintf(fp, |
"\t\t\t\t\tn_rnd_diff_r, n_rnd_diff_i;\n"); |
fprintf(fp, |
"\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_r(i_clk, i_ce,\n" |
"\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_r(i_clk, i_ce,\n" |
"\t\t\t\tsum_r, rnd_sum_r);\n\n", rnd_string); |
fprintf(fp, |
"\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_i(i_clk, i_ce,\n" |
"\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_i(i_clk, i_ce,\n" |
"\t\t\t\tsum_i, rnd_sum_i);\n\n", rnd_string); |
fprintf(fp, |
"\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_r(i_clk, i_ce,\n" |
"\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_r(i_clk, i_ce,\n" |
"\t\t\t\tdiff_r, rnd_diff_r);\n\n", rnd_string); |
fprintf(fp, |
"\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_i(i_clk, i_ce,\n" |
"\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_i(i_clk, i_ce,\n" |
"\t\t\t\tdiff_i, rnd_diff_i);\n\n", rnd_string); |
fprintf(fp, "\tassign n_rnd_diff_r = - rnd_diff_r;\n" |
"\tassign n_rnd_diff_i = - rnd_diff_i;\n"); |
554,9 → 563,9
"\twire [(IWIDTH-1):0] rnd;\n" |
"\tgenerate\n" |
"\tif ((ROUND)&&((IWIDTH+1-OWIDTH-SHIFT)>0))\n" |
"\t\tassign rnd = { {(IWIDTH-1){1'b0}}, 1'b1 };\n" |
"\t\tassign rnd = { {(IWIDTH-1){1\'b0}}, 1\'b1 };\n" |
"\telse\n" |
"\t\tassign rnd = { {(IWIDTH){1'b0}}};\n" |
"\t\tassign rnd = { {(IWIDTH){1\'b0}}};\n" |
"\tendgenerate\n" |
"\n" |
*/ |
566,14 → 575,17
"\talways @(posedge i_clk)\n" |
"\t\tif (i_rst)\n" |
"\t\tbegin\n" |
"\t\t\twait_for_sync <= 1'b1;\n" |
"\t\t\twait_for_sync <= 1\'b1;\n" |
"\t\t\tiaddr <= 0;\n" |
"\t\tend else if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n" |
"\t\tbegin\n" |
"\t\t\tiaddr <= iaddr + { {(LGWIDTH-1){1\'b0}}, 1\'b1 };\n" |
"\t\t\twait_for_sync <= 1\'b0;\n" |
"\t\tend\n" |
"\talways @(posedge i_clk)\n" |
"\t\tif (i_ce)\n" |
"\t\t\timem <= i_data;\n" |
"\t\t\tiaddr <= iaddr + { {(LGWIDTH-1){1\'b0}}, 1\'b1 };\n" |
"\t\t\twait_for_sync <= 1'b0;\n" |
"\t\tend\n\n"); |
"\n\n"); |
fprintf(fp, |
"\t// Note that we don\'t check on wait_for_sync or i_sync here.\n" |
"\t// Why not? Because iaddr will always be zero until after the\n" |
581,7 → 593,7
"\tinitial pipeline = 4\'h0;\n" |
"\talways\t@(posedge i_clk)\n" |
"\t\tif (i_rst)\n" |
"\t\t\tpipeline <= 4'h0;\n" |
"\t\t\tpipeline <= 4\'h0;\n" |
"\t\telse if (i_ce) // is our pipeline process full? Which stages?\n" |
"\t\t\tpipeline <= { pipeline[2:0], iaddr[0] };\n\n"); |
fprintf(fp, |
597,9 → 609,12
fprintf(fp, |
"\t// pipeline[1] takes sum_x and diff_x and produces rnd_x\n\n"); |
fprintf(fp, |
"\t// Now for pipeline[2]\n" |
"\t// Now for pipeline[2]. We can actually do this at all i_ce\n" |
"\t// clock times, since nothing will listen unless pipeline[3]\n" |
"\t// on the next clock. Thus, we simplify this logic and do\n" |
"\t// it independent of pipeline[2].\n" |
"\talways\t@(posedge i_clk)\n" |
"\t\tif ((i_ce)&&(pipeline[2]))\n" |
"\t\tif (i_ce)\n" |
"\t\tbegin\n" |
"\t\t\tob_a <= { rnd_sum_r, rnd_sum_i };\n" |
"\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n" |
633,13 → 648,16
"\t// Don\'t forget in the sync check that we are running\n" |
"\t// at two clocks per sample. Thus we need to\n" |
"\t// produce a sync every 2^(LGWIDTH-1) clocks.\n" |
"\tinitial\to_sync = 1\'b0;\n" |
"\talways\t@(posedge i_clk)\n" |
"\t\tif (i_ce)\n" |
"\t\tif (i_rst)\n" |
"\t\t\to_sync <= 1\'b0;\n" |
"\t\telse if (i_ce)\n" |
"\t\t\to_sync <= &(~iaddr[(LGWIDTH-2):3]) && (iaddr[2:0] == 3'b101);\n"); |
fprintf(fp, "endmodule\n"); |
} |
|
void build_dblstage(const char *fname, ROUND_T rounding) { |
void build_dblstage(const char *fname, ROUND_T rounding, const bool dbg = false) { |
FILE *fp = fopen(fname, "w"); |
if (NULL == fp) { |
fprintf(stderr, "Could not open \'%s\' for writing\n", fname); |
661,7 → 679,7
fprintf(fp, |
"///////////////////////////////////////////////////////////////////////////\n" |
"//\n" |
"// Filename: dblstage.v\n" |
"// Filename: dblstage%s.v\n" |
"//\n" |
"// Project: %s\n" |
"//\n" |
691,17 → 709,23
"// o_right The next (odd) complex output.\n" |
"// o_sync Output synchronization signal.\n" |
"//\n%s" |
"//\n", prjname, creator); |
"//\n", (dbg)?"_dbg":"", prjname, creator); |
|
fprintf(fp, "%s", cpyleft); |
fprintf(fp, |
"module\tdblstage(i_clk, i_rst, i_ce, i_sync, i_left, i_right, o_left, o_right, o_sync);\n" |
"module\tdblstage%s(i_clk, i_rst, i_ce, i_sync, i_left, i_right, o_left, o_right, o_sync%s);\n" |
"\tparameter\tIWIDTH=16,OWIDTH=IWIDTH+1, SHIFT=0;\n" |
"\tinput\t\ti_clk, i_rst, i_ce, i_sync;\n" |
"\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n" |
"\toutput\twire\t[(2*OWIDTH-1):0]\to_left, o_right;\n" |
"\toutput\treg\t\t\to_sync;\n" |
"\n"); |
"\n", (dbg)?"_dbg":"", (dbg)?", o_dbg":""); |
|
if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n" |
"\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_left[(2*OWIDTH-1):(2*OWIDTH-16)],\n" |
"\t\t\t\t\to_left[(OWIDTH-1):(OWIDTH-16)] };\n" |
"\n"); |
} |
fprintf(fp, |
"\twire\tsigned\t[(IWIDTH-1):0]\ti_in_0r, i_in_0i, i_in_1r, i_in_1i;\n" |
"\tassign\ti_in_0r = i_left[(2*IWIDTH-1):(IWIDTH)]; \n" |
721,19 → 745,25
"\t// bit than the two originals.\n" |
"\treg\tsigned\t[(IWIDTH):0]\trnd_in_0r, rnd_in_0i, rnd_in_1r, rnd_in_1i;\n\n"); |
fprintf(fp, |
"\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_0r(i_clk, i_ce,\n" |
"\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_0r(i_clk, i_ce,\n" |
"\t\t\t\t\t\t\t\trnd_in_0r, o_out_0r);\n\n", rnd_string); |
fprintf(fp, |
"\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_0i(i_clk, i_ce,\n" |
"\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_0i(i_clk, i_ce,\n" |
"\t\t\t\t\t\t\t\trnd_in_0i, o_out_0i);\n\n", rnd_string); |
fprintf(fp, |
"\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_1r(i_clk, i_ce,\n" |
"\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_1r(i_clk, i_ce,\n" |
"\t\t\t\t\t\t\t\trnd_in_1r, o_out_1r);\n\n", rnd_string); |
fprintf(fp, |
"\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_1i(i_clk, i_ce,\n" |
"\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_1i(i_clk, i_ce,\n" |
"\t\t\t\t\t\t\t\trnd_in_1i, o_out_1i);\n\n", rnd_string); |
|
fprintf(fp, |
"\n" |
"\t// As with any register connected to the sync pulse, these must\n" |
"\t// have initial values and be reset on the i_rst signal.\n" |
"\t// Other data values need only restrict their updates to i_ce\n" |
"\t// enabled clocks, but sync\'s must obey resets and initial\n" |
"\t// conditions as well.\n" |
"\treg\twait_for_sync, rnd_sync;\n" |
"\n" |
"\tinitial begin\n" |
744,13 → 774,25
"\talways @(posedge i_clk)\n" |
"\t\tif (i_rst)\n" |
"\t\tbegin\n" |
"\t\t\trnd_sync <= 1'b0;\n" |
"\t\t\to_sync <= 1'b0;\n" |
"\t\t\twait_for_sync <= 1'b1;\n" |
"\t\t\trnd_sync <= 1\'b0;\n" |
"\t\t\to_sync <= 1\'b0;\n" |
"\t\t\twait_for_sync <= 1\'b1;\n" |
"\t\tend else if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n" |
"\t\tbegin\n" |
"\t\t\twait_for_sync <= 1'b0;\n" |
"\t\t\twait_for_sync <= 1\'b0;\n" |
"\t\t\t//\n" |
"\t\t\trnd_sync <= i_sync;\n" |
"\t\t\to_sync <= rnd_sync;\n" |
"\t\tend\n" |
"\n" |
"\t// As with other variables, these are really only updated when in\n" |
"\t// the processing pipeline, after the first i_sync. However, to\n" |
"\t// eliminate as much unnecessary logic as possible, we toggle\n" |
"\t// these any time the i_ce line is enabled.\n" |
"\talways @(posedge i_clk)\n" |
"\t\tif (i_ce)\n" |
"\t\tbegin\n" |
"\t\t\t//\n" |
"\t\t\trnd_in_0r <= i_in_0r + i_in_1r;\n" |
"\t\t\trnd_in_0i <= i_in_0i + i_in_1i;\n" |
"\t\t\t//\n" |
757,8 → 799,6
"\t\t\trnd_in_1r <= i_in_0r - i_in_1r;\n" |
"\t\t\trnd_in_1i <= i_in_0i - i_in_1i;\n" |
"\t\t\t//\n" |
"\t\t\trnd_sync <= i_sync;\n" |
"\t\t\to_sync <= rnd_sync;\n" |
"\t\tend\n" |
"\n" |
"\tassign\to_left = { o_out_0r, o_out_0i };\n" |
838,10 → 878,10
"\talways @(posedge i_clk)\n" |
"\t\tif (i_ce)\n" |
"\t\tbegin\n" |
"\t\t\tacc[0] <= (u_a[0]) ? { {(AWIDTH){1'b0}}, u_b }\n" |
"\t\t\t\t\t: {(AWIDTH+BWIDTH){1'b0}};\n" |
"\t\t\tacc[0] <= (u_a[0]) ? { {(AWIDTH){1\'b0}}, u_b }\n" |
"\t\t\t\t\t: {(AWIDTH+BWIDTH){1\'b0}};\n" |
"\t\t\tr_a[0] <= { u_a[(AWIDTH-1):1] };\n" |
"\t\t\tr_b[0] <= { {(AWIDTH-1){1'b0}}, u_b };\n" |
"\t\t\tr_b[0] <= { {(AWIDTH-1){1\'b0}}, u_b };\n" |
"\t\t\tr_s[0] <= sgn; // The final sign, needs to be preserved\n" |
"\t\tend\n" |
"\n" |
851,9 → 891,9
"\t\talways @(posedge i_clk)\n" |
"\t\tif (i_ce)\n" |
"\t\tbegin\n" |
"\t\t\tacc[k+1] <= acc[k] + ((r_a[k][0]) ? {r_b[k],1'b0}:0);\n" |
"\t\t\tr_a[k+1] <= { 1'b0, r_a[k][(AWIDTH-2):1] };\n" |
"\t\t\tr_b[k+1] <= { r_b[k][(AWIDTH+BWIDTH-3):0], 1'b0};\n" |
"\t\t\tacc[k+1] <= acc[k] + ((r_a[k][0]) ? {r_b[k],1\'b0}:0);\n" |
"\t\t\tr_a[k+1] <= { 1\'b0, r_a[k][(AWIDTH-2):1] };\n" |
"\t\t\tr_b[k+1] <= { r_b[k][(AWIDTH+BWIDTH-3):0], 1\'b0};\n" |
"\t\t\tr_s[k+1] <= r_s[k];\n" |
"\t\tend\n" |
"\tend\n" |
897,10 → 937,17
"// width, WIDTH. Of course, there is a delay from the\n" |
"// first input to the first output. For this purpose,\n" |
"// o_sync is present.\n" |
"// o_sync This will be a 1'b1 for the first value in any block.\n" |
"// Following a reset, this will only become 1'b1 once\n" |
"// o_sync This will be a 1\'b1 for the first value in any block.\n" |
"// Following a reset, this will only become 1\'b1 once\n" |
"// the data has been loaded and is now valid. After that,\n" |
"// all outputs will be valid.\n" |
"//\n" |
"// 20150602 -- This module has undergone massive rework in order to\n" |
"// ensure that it uses resources efficiently. As a result, \n" |
"// it now optimizes nicely into block RAMs. As an unfortunately\n" |
"// side effect, it now passes it\'s bench test (dblrev_tb) but\n" |
"// fails the integration bench test (fft_tb).\n" |
"//\n" |
"//\n%s" |
"//\n", prjname, creator); |
fprintf(fp, "%s", cpyleft); |
941,65 → 988,69
fprintf(fp, |
"module dblreverse(i_clk, i_rst, i_ce, i_in_0, i_in_1,\n" |
"\t\to_out_0, o_out_1, o_sync);\n" |
"\tparameter\t\t\tLGSIZE=4, WIDTH=24;\n" |
"\tparameter\t\t\tLGSIZE=5, WIDTH=24;\n" |
"\tinput\t\t\t\ti_clk, i_rst, i_ce;\n" |
"\tinput\t\t[(2*WIDTH-1):0]\ti_in_0, i_in_1;\n" |
"\toutput\treg\t[(2*WIDTH-1):0]\to_out_0, o_out_1;\n" |
"\toutput\twire\t[(2*WIDTH-1):0]\to_out_0, o_out_1;\n" |
"\toutput\treg\t\t\to_sync;\n" |
"\n" |
"\treg\tin_reset;\n" |
"\treg\t[(LGSIZE):0]\tiaddr;\n" |
"\treg\t[(2*WIDTH-1):0]\tmem_0e [0:((1<<(LGSIZE-1))-1)];\n" |
"\treg\t[(2*WIDTH-1):0]\tmem_0o [0:((1<<(LGSIZE-1))-1)];\n" |
"\treg\t[(2*WIDTH-1):0]\tmem_1e [0:((1<<(LGSIZE-1))-1)];\n" |
"\treg\t[(2*WIDTH-1):0]\tmem_1o [0:((1<<(LGSIZE-1))-1)];\n" |
"\treg\t\t\tin_reset;\n" |
"\treg\t[(LGSIZE-1):0]\tiaddr;\n" |
"\twire\t[(LGSIZE-3):0]\tbraddr;\n" |
"\n" |
"\twire\t[(2*LGSIZE-1):0] braddr;\n" |
"\tgenvar\tk;\n" |
"\tgenerate for(k=0; k<LGSIZE; k=k+1)\n" |
"\tgenerate for(k=0; k<LGSIZE-2; k=k+1)\n" |
"\tbegin : gen_a_bit_reversed_value\n" |
"\t\tassign braddr[k] = iaddr[LGSIZE-1-k];\n" |
"\t\tassign braddr[k] = iaddr[LGSIZE-3-k];\n" |
"\tend endgenerate\n" |
"\n" |
"\tinitial iaddr = 0;\n" |
"\tinitial in_reset = 1\'b1;\n" |
"\tinitial o_sync = 1\'b0;\n" |
"\talways @(posedge i_clk)\n" |
"\t\tif (i_rst)\n" |
"\t\tbegin\n" |
"\t\t\tiaddr <= 0;\n" |
"\t\t\tin_reset <= 1'b1;\n" |
"\t\t\tin_reset <= 1\'b1;\n" |
"\t\t\to_sync <= 1\'b0;\n" |
"\t\tend else if (i_ce)\n" |
"\t\tbegin\n" |
"\t\t\tif (iaddr[(LGSIZE-1)])\n" |
"\t\t\tbegin\n" |
"\t\t\t\tmem_1e[{iaddr[LGSIZE],iaddr[(LGSIZE-2):1]}] <= i_in_0;\n" |
"\t\t\t\tmem_1o[{iaddr[LGSIZE],iaddr[(LGSIZE-2):1]}] <= i_in_1;\n" |
"\t\t\tend else begin\n" |
"\t\t\t\tmem_0e[{iaddr[LGSIZE],iaddr[(LGSIZE-2):1]}] <= i_in_0;\n" |
"\t\t\t\tmem_0o[{iaddr[LGSIZE],iaddr[(LGSIZE-2):1]}] <= i_in_1;\n" |
"\t\t\tend\n" |
"\t\t\tiaddr <= iaddr + { {(LGSIZE-2){1\'b0}}, 2\'h2 };\n" |
"\t\t\tif (&iaddr[(LGSIZE-1):1])\n" |
"\t\t\t\tin_reset <= 1'b0;\n" |
"\t\t\tiaddr <= iaddr + { {(LGSIZE-1){1\'b0}}, 1\'b1 };\n" |
"\t\t\tif (&iaddr[(LGSIZE-2):0])\n" |
"\t\t\t\tin_reset <= 1\'b0;\n" |
"\t\t\tif (in_reset)\n" |
"\t\t\tbegin\n" |
"\t\t\t\to_out_0 <= {(2*WIDTH){1'b0}};\n" |
"\t\t\t\to_out_1 <= {(2*WIDTH){1'b0}};\n" |
"\t\t\t\to_sync <= 1'b0;\n" |
"\t\t\tend else\n" |
"\t\t\tbegin\n" |
"\t\t\t\tif (braddr[0])\n" |
"\t\t\t\tbegin\n" |
"\t\t\t\t\to_out_0 <= mem_0o[{~iaddr[LGSIZE],braddr[(LGSIZE-2):1]}];\n" |
"\t\t\t\t\to_out_1 <= mem_1o[{~iaddr[LGSIZE],braddr[(LGSIZE-2):1]}];\n" |
"\t\t\t\tend else begin\n" |
"\t\t\t\t\to_out_0 <= mem_0e[{~iaddr[LGSIZE],braddr[(LGSIZE-2):1]}];\n" |
"\t\t\t\t\to_out_1 <= mem_1e[{~iaddr[LGSIZE],braddr[(LGSIZE-2):1]}];\n" |
"\t\t\t\tend\n" |
"\t\t\t\to_sync <= ~(|iaddr[(LGSIZE-1):0]);\n" |
"\t\t\tend\n" |
"\t\t\t\to_sync <= 1\'b0;\n" |
"\t\t\telse\n" |
"\t\t\t\to_sync <= ~(|iaddr[(LGSIZE-2):0]);\n" |
"\t\tend\n" |
"\n" |
"\treg\t[(2*WIDTH-1):0]\tmem_e [0:((1<<(LGSIZE))-1)];\n" |
"\treg\t[(2*WIDTH-1):0]\tmem_o [0:((1<<(LGSIZE))-1)];\n" |
"\n" |
"\talways @(posedge i_clk)\n" |
"\t\tif (i_ce)\tmem_e[iaddr] <= i_in_0;\n" |
"\talways @(posedge i_clk)\n" |
"\t\tif (i_ce)\tmem_o[iaddr] <= i_in_1;\n" |
"\n" |
"\n" |
"\treg [(2*WIDTH-1):0] evn_out_0, evn_out_1, odd_out_0, odd_out_1;\n" |
"\n" |
"\talways @(posedge i_clk)\n" |
"\t\tif (i_ce)\n\t\t\tevn_out_0 <= mem_e[{~iaddr[LGSIZE-1],1\'b0,braddr}];\n" |
"\talways @(posedge i_clk)\n" |
"\t\tif (i_ce)\n\t\t\tevn_out_1 <= mem_e[{~iaddr[LGSIZE-1],1\'b1,braddr}];\n" |
"\talways @(posedge i_clk)\n" |
"\t\tif (i_ce)\n\t\t\todd_out_0 <= mem_o[{~iaddr[LGSIZE-1],1\'b0,braddr}];\n" |
"\talways @(posedge i_clk)\n" |
"\t\tif (i_ce)\n\t\t\todd_out_1 <= mem_o[{~iaddr[LGSIZE-1],1\'b1,braddr}];\n" |
"\n" |
"\treg\tadrz;\n" |
"\talways @(posedge i_clk)\n" |
"\t\tif (i_ce) adrz = iaddr[LGSIZE-2];\n" |
"\n" |
"\tassign\to_out_0 = (adrz)?odd_out_0:evn_out_0;\n" |
"\tassign\to_out_1 = (adrz)?odd_out_1:evn_out_1;\n" |
"\n" |
"endmodule\n"); |
|
fclose(fp); |
1090,6 → 1141,14
"// YUP! But just barely. Do this and you'll really want\n" |
"// to drop a bit, although you will risk overflow in so\n" |
"// doing.\n" |
"//\n" |
"// 20150602 -- The sync logic lines have been completely redone. The\n" |
"// synchronization lines no longer go through the FIFO with the\n" |
"// left hand sum, but are kept out of memory. This allows the\n" |
"// butterfly to use more optimal memory resources, while also\n" |
"// guaranteeing that the sync lines can be properly reset upon\n" |
"// any reset signal.\n" |
"//\n" |
"//\n%s" |
"//\n", prjname, creator); |
fprintf(fp, "%s", cpyleft); |
1101,7 → 1160,7
"\tparameter IWIDTH=%d,CWIDTH=IWIDTH+%d,OWIDTH=IWIDTH+1;\n" |
"\t// Parameters specific to the core that should not be changed.\n" |
"\tparameter MPYDELAY=%d'd%d, // (IWIDTH+1 < CWIDTH)?(IWIDTH+4):(CWIDTH+3),\n" |
"\t\t\tSHIFT=0;\n" |
"\t\t\tSHIFT=0, AUXLEN=%d;\n" |
"\t// The LGDELAY should be the base two log of the MPYDELAY. If\n" |
"\t// this value is fractional, then round up to the nearest\n" |
"\t// integer: LGDELAY=ceil(log(MPYDELAY)/log(2));\n" |
1111,9 → 1170,10
"\tinput\t\t[(2*IWIDTH-1):0] i_left, i_right;\n" |
"\tinput\t\ti_aux;\n" |
"\toutput\twire [(2*OWIDTH-1):0] o_left, o_right;\n" |
"\toutput\treg o_aux;\n" |
"\toutput\treg\to_aux;\n" |
"\n", 16, xtracbits, lgdelay(16,xtracbits), |
bflydelay(16, xtracbits), lgdelay(16,xtracbits)); |
bflydelay(16, xtracbits), bflydelay(16, xtracbits)+3, |
lgdelay(16,xtracbits)); |
fprintf(fp, |
"\twire\t[(OWIDTH-1):0] o_left_r, o_left_i, o_right_r, o_right_i;\n" |
"\n" |
1131,8 → 1191,7
"\treg [(LGDELAY-1):0] fifo_addr;\n" |
"\twire [(LGDELAY-1):0] fifo_read_addr;\n" |
"\tassign\tfifo_read_addr = fifo_addr - MPYDELAY;\n" |
"\treg [(2*IWIDTH+2):0] fifo_left [ 0:((1<<LGDELAY)-1)];\n" |
"\treg\t\t\t\tovalid;\n" |
"\treg [(2*IWIDTH+1):0] fifo_left [ 0:((1<<LGDELAY)-1)];\n" |
"\n"); |
fprintf(fp, |
"\t// Set up the input to the multiply\n" |
1142,7 → 1201,6
"\t\t\t// One clock just latches the inputs\n" |
"\t\t\tr_left <= i_left; // No change in # of bits\n" |
"\t\t\tr_right <= i_right;\n" |
"\t\t\tr_aux <= i_aux;\n" |
"\t\t\tr_coef <= i_coef;\n" |
"\t\t\t// Next clock adds/subtracts\n" |
"\t\t\tr_sum_r <= r_left_r + r_right_r; // Now IWIDTH+1 bits\n" |
1150,7 → 1208,6
"\t\t\tr_dif_r <= r_left_r - r_right_r;\n" |
"\t\t\tr_dif_i <= r_left_i - r_right_i;\n" |
"\t\t\t// Other inputs are simply delayed on second clock\n" |
"\t\t\tr_aux_2 <= r_aux;\n" |
"\t\t\tr_coef_2<= r_coef;\n" |
"\t\tend\n" |
"\n"); |
1159,22 → 1216,18
"\t// to be multiplied, but yet we still need the results in sync\n" |
"\t// with the answer when it is ready.\n" |
"\tinitial fifo_addr = 0;\n" |
"\tinitial ovalid = 1'b0;\n" |
"\talways @(posedge i_clk)\n" |
"\t\tif (i_rst)\n" |
"\t\tbegin\n" |
"\t\t\tfifo_addr <= 0;\n" |
"\t\t\tovalid <= 1'b0;\n" |
"\t\tend else if (i_ce)\n" |
"\t\tbegin\n" |
"\t\telse if (i_ce)\n" |
"\t\t\t// Need to delay the sum side--nothing else happens\n" |
"\t\t\t// to it, but it needs to stay synchronized with the\n" |
"\t\t\t// right side.\n" |
"\t\t\tfifo_left[fifo_addr] <= { r_aux_2, r_sum_r, r_sum_i };\n" |
"\t\t\tfifo_addr <= fifo_addr + 1;\n" |
"\n" |
"\t\t\tovalid <= (ovalid) || (fifo_addr > (MPYDELAY+1));\n" |
"\t\tend\n" |
"\talways @(posedge i_clk)\n" |
"\t\tif (i_ce)\n" |
"\t\t\tfifo_left[fifo_addr] <= { r_sum_r, r_sum_i };\n" |
"\n" |
"\twire\tsigned\t[(CWIDTH-1):0] ir_coef_r, ir_coef_i;\n" |
"\tassign\tir_coef_r = r_coef_2[(2*CWIDTH-1):CWIDTH];\n" |
1259,12 → 1312,10
"\t// therefore, the left_x values need to be right shifted by\n" |
"\t// CWIDTH-2 as well. The additional bits come from a sign\n" |
"\t// extension.\n" |
"\twire aux;\n" |
"\twire\tsigned\t[(IWIDTH+CWIDTH):0] fifo_i, fifo_r;\n" |
"\treg\t\t[(2*IWIDTH+2):0] fifo_read;\n" |
"\tassign\tfifo_r = { {2{fifo_read[2*(IWIDTH+1)-1]}}, fifo_read[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1'b0}} };\n" |
"\tassign\tfifo_i = { {2{fifo_read[(IWIDTH+1)-1]}}, fifo_read[((IWIDTH+1)-1):0], {(CWIDTH-2){1'b0}} };\n" |
"\tassign\taux = fifo_read[2*IWIDTH+2];\n" |
"\treg\t\t[(2*IWIDTH+1):0] fifo_read;\n" |
"\tassign\tfifo_r = { {2{fifo_read[2*(IWIDTH+1)-1]}}, fifo_read[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1\'b0}} };\n" |
"\tassign\tfifo_i = { {2{fifo_read[(IWIDTH+1)-1]}}, fifo_read[((IWIDTH+1)-1):0], {(CWIDTH-2){1\'b0}} };\n" |
"\n" |
"\n" |
"\treg\tsigned\t[(OWIDTH-1):0] b_left_r, b_left_i,\n" |
1301,18 → 1352,18
"\twire\tsigned\t[(OWIDTH-1):0]\trnd_left_r, rnd_left_i, rnd_right_r, rnd_right_i;\n\n"); |
|
fprintf(fp, |
"\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_left_r(i_clk, i_ce,\n" |
"\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_left_r(i_clk, i_ce,\n" |
"\t\t\t\t{ {2{fifo_r[(IWIDTH+CWIDTH)]}}, fifo_r }, rnd_left_r);\n\n", |
rnd_string); |
fprintf(fp, |
"\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_left_i(i_clk, i_ce,\n" |
"\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_left_i(i_clk, i_ce,\n" |
"\t\t\t\t{ {2{fifo_i[(IWIDTH+CWIDTH)]}}, fifo_i }, rnd_left_i);\n\n", |
rnd_string); |
fprintf(fp, |
"\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_right_r(i_clk, i_ce,\n" |
"\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_right_r(i_clk, i_ce,\n" |
"\t\t\t\tmpy_r, rnd_right_r);\n\n", rnd_string); |
fprintf(fp, |
"\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_right_i(i_clk, i_ce,\n" |
"\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_right_i(i_clk, i_ce,\n" |
"\t\t\t\tmpy_i, rnd_right_i);\n\n", rnd_string); |
fprintf(fp, |
"\talways @(posedge i_clk)\n" |
1334,7 → 1385,17
"\t\t\tb_left_i <= rnd_left_i;\n" |
"\t\tend\n" |
"\n"); |
|
fprintf(fp, |
"\treg\t[(AUXLEN-1):0]\taux_pipeline;\n" |
"\tinitial\taux_pipeline = 0;\n" |
"\talways @(posedge i_clk)\n" |
"\t\tif (i_rst)\n" |
"\t\t\taux_pipeline <= 0;\n" |
"\t\telse if (i_ce)\n" |
"\t\t\taux_pipeline <= { aux_pipeline[(AUXLEN-2):0], i_aux };\n" |
"\n"); |
fprintf(fp, |
"\tinitial o_aux = 1\'b0;\n" |
"\talways @(posedge i_clk)\n" |
"\t\tif (i_rst)\n" |
1342,7 → 1403,7
"\t\telse if (i_ce)\n" |
"\t\tbegin\n" |
"\t\t\t// Second clock, latch for final clock\n" |
"\t\t\to_aux <= aux & ovalid;\n" |
"\t\t\to_aux <= aux_pipeline[AUXLEN-1];\n" |
"\t\tend\n" |
"\n"); |
|
1420,6 → 1481,7
"\tassign\tr_left_i = r_left[ (IWIDTH-1):0];\n" |
"\tassign\tr_right_r = r_right[(2*IWIDTH-1):(IWIDTH)];\n" |
"\tassign\tr_right_i = r_right[(IWIDTH-1):0];\n" |
"\treg signed [(CWIDTH-1):0] ir_coef_r, ir_coef_i;\n" |
"\n" |
"\treg signed [(IWIDTH):0] r_sum_r, r_sum_i, r_dif_r, r_dif_i;\n" |
"\n" |
1431,14 → 1493,22
"\talways @(posedge i_clk)\n" |
"\t\tif (i_rst)\n" |
"\t\tbegin\n" |
"\t\t\tr_aux <= 1'b0;\n" |
"\t\t\tr_aux_2 <= 1'b0;\n" |
"\t\t\tr_aux <= 1\'b0;\n" |
"\t\t\tr_aux_2 <= 1\'b0;\n" |
"\t\tend else if (i_ce)\n" |
"\t\tbegin\n" |
"\t\t\t// One clock just latches the inputs\n" |
"\t\t\tr_aux <= i_aux;\n" |
"\t\t\t// Next clock adds/subtracts\n" |
"\t\t\t// Other inputs are simply delayed on second clock\n" |
"\t\t\tr_aux_2 <= r_aux;\n" |
"\t\tend\n" |
"\talways @(posedge i_clk)\n" |
"\t\tif (i_ce)\n" |
"\t\tbegin\n" |
"\t\t\t// One clock just latches the inputs\n" |
"\t\t\tr_left <= i_left; // No change in # of bits\n" |
"\t\t\tr_right <= i_right;\n" |
"\t\t\tr_aux <= i_aux;\n" |
"\t\t\tr_coef <= i_coef;\n" |
"\t\t\t// Next clock adds/subtracts\n" |
"\t\t\tr_sum_r <= r_left_r + r_right_r; // Now IWIDTH+1 bits\n" |
1446,8 → 1516,8
"\t\t\tr_dif_r <= r_left_r - r_right_r;\n" |
"\t\t\tr_dif_i <= r_left_i - r_right_i;\n" |
"\t\t\t// Other inputs are simply delayed on second clock\n" |
"\t\t\tr_aux_2 <= r_aux;\n" |
"\t\t\tr_coef_2<= r_coef;\n" |
"\t\t\tir_coef_r <= r_coef[(2*CWIDTH-1):CWIDTH];\n" |
"\t\t\tir_coef_i <= r_coef[(CWIDTH-1):0];\n" |
"\t\tend\n" |
"\n\n"); |
fprintf(fp, |
1454,16 → 1524,14
"\t// See comments in the butterfly.v source file for a discussion of\n" |
"\t// these operations and the appropriate bit widths.\n\n"); |
fprintf(fp, |
"\twire signed [(CWIDTH-1):0] ir_coef_r, ir_coef_i;\n" |
"\tassign ir_coef_r = r_coef_2[(2*CWIDTH-1):CWIDTH];\n" |
"\tassign ir_coef_i = r_coef_2[(CWIDTH-1):0];\n" |
"\treg\tsigned [((IWIDTH+2)+(CWIDTH+1)-1):0] p_one, p_two, p_three;\n" |
"\treg\tsigned [((IWIDTH+1)+(CWIDTH)-1):0] p_one, p_two;\n" |
"\treg\tsigned [((IWIDTH+2)+(CWIDTH+1)-1):0] p_three;\n" |
"\n" |
"\treg\tsigned [(CWIDTH):0] p3c_in, p1c_in, p2c_in;\n" |
"\treg\tsigned [(IWIDTH+1):0] p3d_in, p1d_in, p2d_in;\n" |
"\treg\t[3:0] pipeline;\n" |
"\treg\tsigned [(CWIDTH-1):0] p1c_in, p2c_in; // Coefficient multiply inputs\n" |
"\treg\tsigned [(IWIDTH):0] p1d_in, p2d_in; // Data multiply inputs\n" |
"\treg\tsigned [(CWIDTH):0] p3c_in; // Product 3, coefficient input\n" |
"\treg\tsigned [(IWIDTH+1):0] p3d_in; // Product 3, data input\n" |
"\n" |
"\tinitial pipeline = 4\'h0;\n" |
"\tinitial leftv = 0;\n" |
"\tinitial leftvv = 0;\n" |
"\talways @(posedge i_clk)\n" |
1470,30 → 1538,41
"\tbegin\n" |
"\t\tif (i_rst)\n" |
"\t\tbegin\n" |
"\t\t\tpipeline <= 4'h0;\n" |
"\t\t\tleftv <= 0;\n" |
"\t\t\tleftvv <= 0;\n" |
"\t\tend else if (i_clk)\n" |
"\t\tend else if (i_ce)\n" |
"\t\tbegin\n" |
"\t\t\t// Second clock, pipeline = 1\n" |
"\t\t\tp1c_in <= { ir_coef_r[(CWIDTH-1)], ir_coef_r };\n" |
"\t\t\tp2c_in <= { ir_coef_i[(CWIDTH-1)], ir_coef_i };\n" |
"\t\t\tp1d_in <= { r_dif_r[(IWIDTH)], r_dif_r };\n" |
"\t\t\tp2d_in <= { r_dif_i[(IWIDTH)], r_dif_i };\n" |
"\t\t\tleftv <= { r_aux_2, r_sum_r, r_sum_i };\n" |
"\n" |
"\t\t\t// Third clock, pipeline = 3\n" |
"\t\t\t// As desired, each of these lines infers a DSP48\n" |
"\t\t\tleftvv <= leftv;\n" |
"\t\tend\n" |
"\tend\n" |
"\n" |
"\talways @(posedge i_clk)\n" |
"\t\tif (i_ce)\n" |
"\t\tbegin\n" |
"\t\t\t// Second clock, pipeline = 1\n" |
"\t\t\tp1c_in <= ir_coef_r;\n" |
"\t\t\tp2c_in <= ir_coef_i;\n" |
"\t\t\tp1d_in <= r_dif_r;\n" |
"\t\t\tp2d_in <= r_dif_i;\n" |
"\t\t\tp3c_in <= ir_coef_i + ir_coef_r;\n" |
"\t\t\tp3d_in <= r_dif_r + r_dif_i;\n" |
"\n" |
"\t\t\tleftv <= { r_aux_2, r_sum_r, r_sum_i };\n" |
"\n" |
"\t\t\t// Third clock, pipeline = 3\n" |
"\t\t\t// As desired, each of these lines infers a DSP48\n" |
"\t\t\tp_one <= p1c_in * p1d_in;\n" |
"\t\t\tp_two <= p2c_in * p2d_in;\n" |
"\t\t\tp_three <= p3c_in * p3d_in;\n" |
"\t\t\tleftvv <= leftv;\n" |
"\t\tend\n" |
"\n" |
"\t\t\tpipeline <= { pipeline[2:0], 1'b1 };\n" |
"\t\tend\n" |
"\tend\n" |
"\twire\tsigned [((IWIDTH+2)+(CWIDTH+1)-1):0] w_one, w_two;\n" |
"\tassign\tw_one = { {(2){p_one[((IWIDTH+1)+(CWIDTH)-1)]}}, p_one };\n" |
"\tassign\tw_two = { {(2){p_two[((IWIDTH+1)+(CWIDTH)-1)]}}, p_two };\n" |
"\n"); |
|
fprintf(fp, |
1506,28 → 1585,29
"\twire\taux_s;\n" |
"\twire\tsigned\t[(IWIDTH+CWIDTH):0] left_si, left_sr;\n" |
"\treg\t\t[(2*IWIDTH+2):0] left_saved;\n" |
"\tassign\tleft_sr = { {2{left_saved[2*(IWIDTH+1)-1]}}, left_saved[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1'b0}} };\n" |
"\tassign\tleft_si = { {2{left_saved[(IWIDTH+1)-1]}}, left_saved[((IWIDTH+1)-1):0], {(CWIDTH-2){1'b0}} };\n" |
"\tassign\tleft_sr = { {2{left_saved[2*(IWIDTH+1)-1]}}, left_saved[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1\'b0}} };\n" |
"\tassign\tleft_si = { {2{left_saved[(IWIDTH+1)-1]}}, left_saved[((IWIDTH+1)-1):0], {(CWIDTH-2){1\'b0}} };\n" |
"\tassign\taux_s = left_saved[2*IWIDTH+2];\n" |
"\n" |
"\n" |
"\t(* use_dsp48=\"no\" *)\n" |
"\treg signed [(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i;\n"); |
fprintf(fp, |
"\twire\tsigned\t[(OWIDTH-1):0]\trnd_left_r, rnd_left_i, rnd_right_r, rnd_right_i;\n\n"); |
|
fprintf(fp, |
"\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_left_r(i_clk, i_ce,\n" |
"\t\t\t\t{ {2{left_sr[(IWIDTH+CWIDTH)]}}, left_sr }, rnd_left_r);\n\n", |
"\t%s #(CWIDTH+IWIDTH+1,OWIDTH,SHIFT+2) do_rnd_left_r(i_clk, i_ce,\n" |
"\t\t\t\tleft_sr, rnd_left_r);\n\n", |
rnd_string); |
fprintf(fp, |
"\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_left_i(i_clk, i_ce,\n" |
"\t\t\t\t{ {2{left_si[(IWIDTH+CWIDTH)]}}, left_si }, rnd_left_i);\n\n", |
"\t%s #(CWIDTH+IWIDTH+1,OWIDTH,SHIFT+2) do_rnd_left_i(i_clk, i_ce,\n" |
"\t\t\t\tleft_si, rnd_left_i);\n\n", |
rnd_string); |
fprintf(fp, |
"\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_right_r(i_clk, i_ce,\n" |
"\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_right_r(i_clk, i_ce,\n" |
"\t\t\t\tmpy_r, rnd_right_r);\n\n", rnd_string); |
fprintf(fp, |
"\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_right_i(i_clk, i_ce,\n" |
"\t%s #(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4) do_rnd_right_i(i_clk, i_ce,\n" |
"\t\t\t\tmpy_i, rnd_right_i);\n\n", rnd_string); |
|
fprintf(fp, |
1537,21 → 1617,29
"\t\tif (i_rst)\n" |
"\t\tbegin\n" |
"\t\t\tleft_saved <= 0;\n" |
"\t\t\to_aux <= 1'b0;\n" |
"\t\t\to_aux <= 1\'b0;\n" |
"\t\tend else if (i_ce)\n" |
"\t\tbegin\n" |
"\t\t\t// First clock, recover all values\n" |
"\t\t\tleft_saved <= leftvv;\n" |
"\n" |
"\t\t\t// Second clock, round and latch for final clock\n" |
"\t\t\to_aux <= aux_s;\n" |
"\t\tend\n" |
"\talways @(posedge i_clk)\n" |
"\t\tif (i_ce)\n" |
"\t\tbegin\n" |
"\t\t\t// These values are IWIDTH+CWIDTH+3 bits wide\n" |
"\t\t\t// although they only need to be (IWIDTH+1)\n" |
"\t\t\t// + (CWIDTH) bits wide. (We've got two\n" |
"\t\t\t// extra bits we need to get rid of.)\n" |
"\t\t\tmpy_r <= p_one - p_two;\n" |
"\t\t\tmpy_i <= p_three - p_one - p_two;\n" |
"\n" |
"\t\t\t// Second clock, round and latch for final clock\n" |
"\n" |
"\t\t\to_aux <= aux_s;\n" |
"\n" |
"\t\t\t// These two lines also infer DSP48\'s.\n" |
"\t\t\t// To keep from using extra DSP48 resources,\n" |
"\t\t\t// they are prevented from using DSP48\'s\n" |
"\t\t\t// by the (* use_dsp48 ... *) comment above.\n" |
"\t\t\tmpy_r <= w_one - w_two;\n" |
"\t\t\tmpy_i <= p_three - w_one - w_two;\n" |
"\t\tend\n" |
"\n"); |
|
1567,7 → 1655,7
|
} |
|
void build_stage(const char *fname, int stage, bool odd, int nbits, bool inv, int xtra, bool hwmpy=false) { |
void build_stage(const char *fname, const char *coredir, int stage, bool odd, int nbits, bool inv, int xtra, bool hwmpy=false, bool dbg=false) { |
FILE *fstage = fopen(fname, "w"); |
int cbits = nbits + xtra; |
|
1586,7 → 1674,7
fprintf(fstage, |
"////////////////////////////////////////////////////////////////////////////\n" |
"//\n" |
"// Filename: %sfftstage_%c%d.v\n" |
"// Filename: %sfftstage_%c%d%s.v\n" |
"//\n" |
"// Project: %s\n" |
"//\n" |
1597,10 → 1685,11
"// FFT, there shall be (N-1) of these stages. \n" |
"//\n%s" |
"//\n", |
(inv)?"i":"", (odd)?'o':'e', stage*2, prjname, creator); |
(inv)?"i":"", (odd)?'o':'e', stage*2, (dbg)?"_dbg":"", prjname, creator); |
fprintf(fstage, "%s", cpyleft); |
fprintf(fstage, "module\t%sfftstage_%c%d(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync);\n", |
(inv)?"i":"", (odd)?'o':'e', stage*2); |
fprintf(fstage, "module\t%sfftstage_%c%d%s(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync%s);\n", |
(inv)?"i":"", (odd)?'o':'e', stage*2, (dbg)?"_dbg":"", |
(dbg)?", o_dbg":""); |
// These parameter values are useless at this point--they are to be |
// replaced by the parameter values in the calling program. Only |
// problem is, the CWIDTH needs to match exactly! |
1617,7 → 1706,13
"\tinput [(2*IWIDTH-1):0] i_data;\n" |
"\toutput reg [(2*OWIDTH-1):0] o_data;\n" |
"\toutput reg o_sync;\n" |
"\n" |
"\n"); |
if (dbg) { fprintf(fstage, "\toutput\twire\t[33:0]\t\t\to_dbg;\n" |
"\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2*OWIDTH-1):(2*OWIDTH-16)],\n" |
"\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n" |
"\n"); |
} |
fprintf(fstage, |
"\treg wait_for_sync;\n" |
"\treg [(2*IWIDTH-1):0] ib_a, ib_b;\n" |
"\treg [(2*CWIDTH-1):0] ib_c;\n" |
1652,7 → 1747,7
sprintf(ptr, "%scmem_%c%d.hex", (inv)?"i":"", (odd)?'o':'e', stage*2); |
} else { |
sprintf(memfile, "%s/%scmem_%c%d.hex", |
COREDIR, (inv)?"i":"", |
coredir, (inv)?"i":"", |
(odd)?'o':'e', stage*2); |
} |
// strcpy(&memfile[strlen(memfile)-2], ".hex"); |
1700,7 → 1795,7
"\talways @(posedge i_clk)\n" |
"\t\tif (i_rst)\n" |
"\t\tbegin\n" |
"\t\t\twait_for_sync <= 1'b1;\n" |
"\t\t\twait_for_sync <= 1\'b1;\n" |
"\t\t\tiaddr <= 0;\n" |
"\t\tend\n" |
"\t\telse if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n" |
1708,10 → 1803,13
"\t\t\t//\n" |
"\t\t\t// First step: Record what we\'re not ready to use yet\n" |
"\t\t\t//\n" |
"\t\t\timem[iaddr[(LGSPAN-1):0]] <= i_data;\n" |
"\t\t\tiaddr <= iaddr + { {(LGWIDTH-2){1\'b0}}, 1\'b1 };\n" |
"\t\t\twait_for_sync <= 1'b0;\n" |
"\t\tend\n\n"); |
"\t\t\twait_for_sync <= 1\'b0;\n" |
"\t\tend\n" |
"\talways @(posedge i_clk) // Need to make certain here that we don\'t read\n" |
"\t\tif ((i_ce)&&(~iaddr[LGSPAN])) // and write the same address on\n" |
"\t\t\timem[iaddr[(LGSPAN-1):0]] <= i_data; // the same clk\n" |
"\n"); |
|
fprintf(fstage, |
"\t//\n" |
1719,24 → 1817,25
"\t//\n" |
"\tinitial ib_sync = 1\'b0;\n" |
"\talways\t@(posedge i_clk)\n" |
"\tif (i_rst)\n" |
"\t\tib_sync <= 1\'b0;\n" |
"\telse if ((i_ce)&&(iaddr[LGSPAN]))\n" |
"\t\tbegin\n" |
"\t\t\t// Set the sync to true on the very first\n" |
"\t\t\t// valid input in, and hence on the very\n" |
"\t\t\t// first valid data out per FFT.\n" |
"\t\t\tib_sync <= (iaddr==(1<<(LGSPAN)));\n" |
"\t\tend\n" |
"\t\tif (i_rst)\n" |
"\t\t\tib_sync <= 1\'b0;\n" |
"\t\telse if ((i_ce)&&(iaddr[LGSPAN]))\n" |
"\t\t\tbegin\n" |
"\t\t\t\t// Set the sync to true on the very first\n" |
"\t\t\t\t// valid input in, and hence on the very\n" |
"\t\t\t\t// first valid data out per FFT.\n" |
"\t\t\t\tib_sync <= (iaddr==(1<<(LGSPAN)));\n" |
"\t\t\tend\n" |
"\talways\t@(posedge i_clk)\n" |
"\tif ((i_ce)&&(iaddr[LGSPAN]))\n" |
"\t\tbegin\n" |
"\t\t\t// One input from memory, ...\n" |
"\t\t\tib_a <= imem[iaddr[(LGSPAN-1):0]];\n" |
"\t\t\t// One input clocked in from the top\n" |
"\t\t\tib_b <= i_data;\n" |
"\t\t\tib_c <= %scmem[iaddr[(LGSPAN-1):0]];\n" |
"\t\tend\n\n", (inv)?"i":""); |
"\t\tif ((i_ce)&&(iaddr[LGSPAN]))\n" |
"\t\t\tbegin\n" |
"\t\t\t\t// One input from memory, ...\n" |
"\t\t\t\tib_a <= imem[iaddr[(LGSPAN-1):0]];\n" |
"\t\t\t\t// One input clocked in from the top\n" |
"\t\t\t\tib_b <= i_data;\n" |
"\t\t\t\t// and the coefficient or twiddle factor\n" |
"\t\t\t\tib_c <= %scmem[iaddr[(LGSPAN-1):0]];\n" |
"\t\t\tend\n\n", (inv)?"i":""); |
|
if (hwmpy) { |
fprintf(fstage, |
1768,42 → 1867,66
"\t\t\tb_started <= 0;\n" |
"\t\tend else if (i_ce)\n" |
"\t\tbegin\n" |
"\t\t\tif ((ob_sync||b_started)&&(~oB[LGSPAN]))\n" |
"\t\t\tbegin // A butterfly output is available\n" |
"\t\t\t\tb_started <= 1'b1;\n" |
"\t\t\t\tomem[oB[(LGSPAN-1):0]] <= ob_b;\n" |
"\t\t\t\toB <= oB + { {(LGSPAN){1\'b0}}, 1\'b1 };\n" |
"\n" |
"\t\t\t\to_sync <= (ob_sync);\n" |
"\t\t\t\to_data <= ob_a;\n" |
"\t\t\tend else if (b_started)\n" |
"\t\t\tbegin // and keep outputting once you start--at a rate\n" |
"\t\t\t// of one guaranteed output per clock that has i_ce set.\n" |
"\t\t\t\to_data <= omem[oB[(LGSPAN-1):0]];\n" |
"\t\t\t\toB <= oB + { {(LGSPAN){1\'b0}}, 1\'b1 };\n" |
"\t\t\t\to_sync <= 1'b0;\n" |
"\t\t\tend else\n" |
"\t\t\t\to_sync <= 1'b0;\n" |
"\t\t\to_sync <= (~oB[LGSPAN])?ob_sync : 1\'b0;\n" |
"\t\t\tif (ob_sync||b_started)\n" |
"\t\t\t\toB <= oB + { {(LGSPAN){1\'b0}}, 1\'b1 };\n" |
"\t\t\tif ((ob_sync)&&(~oB[LGSPAN]))\n" |
"\t\t\t// A butterfly output is available\n" |
"\t\t\t\tb_started <= 1\'b1;\n" |
"\t\tend\n\n"); |
fprintf(fstage, |
"\treg [(LGSPAN-1):0]\t\tdly_addr;\n" |
"\treg [(2*OWIDTH-1):0]\tdly_value;\n" |
"\talways @(posedge i_clk)\n" |
"\t\tif (i_ce)\n" |
"\t\tbegin\n" |
"\t\t\tdly_addr <= oB[(LGSPAN-1):0];\n" |
"\t\t\tdly_value <= ob_b;\n" |
"\t\tend\n" |
"\talways @(posedge i_clk)\n" |
"\t\tif (i_ce)\n" |
"\t\t\tomem[dly_addr] <= dly_value;\n" |
"\n"); |
fprintf(fstage, |
"\talways @(posedge i_clk)\n" |
"\t\tif (i_ce)\n" |
"\t\t\to_data <= (~oB[LGSPAN])?ob_a : omem[oB[(LGSPAN-1):0]];\n" |
"\n"); |
fprintf(fstage, "endmodule\n"); |
} |
|
void usage(void) { |
fprintf(stderr, |
"USAGE:\tfftgen [-f <size>] [-d dir] [-c cbits] [-n nbits] [-m mxbits] [-s01]\n" |
"USAGE:\tfftgen [-f <size>] [-d dir] [-c cbits] [-n nbits] [-m mxbits] [-s]\n" |
// "\tfftgen -i\n" |
"\t-1\tBuild a normal FFT, running at one clock per complex sample, or (for\n" |
"\t\ta real FFT) at one clock per two real input samples.\n" |
"\t-c <cbits>\tCauses all internal complex coefficients to be\n" |
"\t\tlonger than the corresponding data bits, to help avoid\n" |
"\t\tcoefficient truncation errors.\n" |
"\t\tcoefficient truncation errors. The default is %d bits lnoger\n" |
"\t\tthan the data bits.\n" |
"\t-d <dir>\tPlaces all of the generated verilog files into <dir>.\n" |
"\t\tThe default is a subdirectory of the current directory named %s.\n" |
"\t-f <size>\tSets the size of the FFT as the number of complex\n" |
"\t\tsamples input to the transform.\n" |
"\t\tsamples input to the transform. (No default value, this is\n" |
"\t\ta required parameter.)\n" |
"\t-i\tAn inverse FFT, meaning that the coefficients are\n" |
"\t\tgiven by e^{ j 2 pi k/N n }. The default is a forward FFT, with\n" |
"\t\tcoefficients given by e^{ -j 2 pi k/N n }.\n" |
"\t-m <mxbits>\tSets the maximum bit width that the FFT should ever\n" |
"\t\tproduce. Internal values greater than this value will be\n" |
"\t\ttruncated to this value.\n" |
"\t\ttruncated to this value. (The default value grows the input\n" |
"\t\tsize by one bit for every two FFT stages.)\n" |
"\t-n <nbits>\tSets the bitwidth for values coming into the (i)FFT.\n" |
"\t\tThe default is %d bits input for each component of the two\n" |
"\t\tcomplex values into the FFT.\n" |
"\t-p <nmpy>\tSets the number of stages that will use any hardware \n" |
"\t\tmultiplication facility, instead of shift-add emulation.\n" |
"\t\tThree multiplies per butterfly, or six multiplies per stage will\n" |
"\t\tbe accelerated in this fashion. The default is not to use any\n" |
"\t\thardware multipliers.\n" |
"\t-r\tBuild a real-FFT at four input points per sample, rather than a\n" |
"\t\tcomplex FFT. (Default is a Complex FFT.)\n" |
"\t-s\tSkip the final bit reversal stage. This is useful in\n" |
"\t\talgorithms that need to apply a filter without needing to do\n" |
"\t\tbin shifting, as these algorithms can, with this option, just\n" |
1814,10 → 1937,14
"\t-S\tInclude the final bit reversal stage (default).\n" |
"\t-x <xtrabits>\tUse this many extra bits internally, before any final\n" |
"\t\trounding or truncation of the answer to the final number of bits.\n" |
"\t\tThe default is to use %d extra bits internally.\n", |
/* |
"\t-0\tA forward FFT (default), meaning that the coefficients are\n" |
"\t\tgiven by e^{-j 2 pi k/N n }.\n" |
"\t-1\tAn inverse FFT, meaning that the coefficients are\n" |
"\t\tgiven by e^{ j 2 pi k/N n }.\n"); |
"\t\tgiven by e^{ j 2 pi k/N n }.\n", |
*/ |
DEF_XTRACBITS, DEF_COREDIR, DEF_NBITSIN, DEF_XTRAPBITS); |
} |
|
// Features still needed: |
1824,15 → 1951,20
// Interactivity. |
int main(int argc, char **argv) { |
int fftsize = -1, lgsize = -1; |
int nbitsin = 16, xtracbits = 4, nummpy=0, nonmpy=2; |
int nbitsout, maxbitsout = -1, xtrapbits=0; |
bool bitreverse = true, inverse=false, interactive = false, |
verbose_flag = false; |
int nbitsin = DEF_NBITSIN, xtracbits = DEF_XTRACBITS, |
nummpy=DEF_NMPY, nonmpy=2; |
int nbitsout, maxbitsout = -1, xtrapbits=DEF_XTRAPBITS; |
bool bitreverse = true, inverse=false, |
verbose_flag = false, single_clock = false, |
real_fft = false; |
FILE *vmain; |
std::string coredir = "fft-core", cmdline = ""; |
std::string coredir = DEF_COREDIR, cmdline = ""; |
ROUND_T rounding = RND_CONVERGENT; |
// ROUND_T rounding = RND_HALFUP; |
|
bool dbg = false; |
int dbgstage = 128; |
|
if (argc <= 1) |
usage(); |
|
1846,11 → 1978,13
if ('-' == argv[argn][0]) { |
for(int j=1; (argv[argn][j])&&(j<100); j++) { |
switch(argv[argn][j]) { |
/* |
case '0': |
inverse = false; |
break; |
*/ |
case '1': |
inverse = true; |
single_clock = true; |
break; |
case 'c': |
if (argn+1 >= argc) { |
1868,6 → 2002,15
coredir = argv[++argn]; |
j += 200; |
break; |
case 'D': |
dbg = true; |
if (argn+1 >= argc) { |
printf("ERR: No debug stage number given!\n\n"); |
usage(); exit(-1); |
} |
dbgstage = atoi(argv[++argn]); |
j+= 200; |
break; |
case 'f': |
if (argn+1 >= argc) { |
printf("ERR: No FFT Size given!\n\n"); |
1898,7 → 2041,7
exit(0); |
break; |
case 'i': |
interactive = true; |
inverse = true; |
break; |
case 'm': |
if (argn+1 >= argc) { |
1924,6 → 2067,9
nummpy = atoi(argv[++argn]); |
j += 200; |
break; |
case 'r': |
real_fft = true; |
break; |
case 'S': |
bitreverse = true; |
break; |
1953,6 → 2099,18
} |
} |
|
if (real_fft) { |
printf("The real FFT option is not implemented yet, but still on\nmy to do list. Please try again later.\n"); |
exit(0); |
} if (single_clock) { |
printf("The single clock FFT option is not implemented yet, but still on\nmy to do list. Please try again later.\n"); |
exit(0); |
} if (!bitreverse) { |
printf("WARNING: While I can skip the bit reverse stage, the code to do\n"); |
printf("an inverse FFT on a bit--reversed input has not yet been\n"); |
printf("built.\n"); |
} |
|
if ((lgsize < 0)&&(fftsize > 1)) { |
for(lgsize=1; (1<<lgsize) < fftsize; lgsize++) |
; |
2104,7 → 2262,8
fprintf(vmain, "//\n"); |
fprintf(vmain, "module %sfftmain(i_clk, i_rst, i_ce,\n", (inverse)?"i":""); |
fprintf(vmain, "\t\ti_left, i_right,\n"); |
fprintf(vmain, "\t\to_left, o_right, o_sync);\n"); |
fprintf(vmain, "\t\to_left, o_right, o_sync%s);\n", |
(dbg)?", o_dbg":""); |
fprintf(vmain, "\tparameter\tIWIDTH=%d, OWIDTH=%d, LGWIDTH=%d;\n", nbitsin, nbitsout, lgsize); |
assert(lgsize > 0); |
fprintf(vmain, "\tinput\t\ti_clk, i_rst, i_ce;\n"); |
2111,6 → 2270,8
fprintf(vmain, "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"); |
fprintf(vmain, "\toutput\treg\t[(2*OWIDTH-1):0]\to_left, o_right;\n"); |
fprintf(vmain, "\toutput\treg\t\t\to_sync;\n"); |
if (dbg) |
fprintf(vmain, "\toutput\twire\t[33:0]\t\to_dbg;\n"); |
fprintf(vmain, "\n\n"); |
|
fprintf(vmain, "\t// Outputs of the FFT, ready for bit reversal.\n"); |
2124,9 → 2285,9
fprintf(vmain, "\tinitial br_start = 1\'b0;\n"); |
fprintf(vmain, "\talways @(posedge i_clk)\n"); |
fprintf(vmain, "\t\tif (i_rst)\n"); |
fprintf(vmain, "\t\t\tbr_start <= 1'b0;\n"); |
fprintf(vmain, "\t\t\tbr_start <= 1\'b0;\n"); |
fprintf(vmain, "\t\telse if (i_ce)\n"); |
fprintf(vmain, "\t\t\tbr_start <= 1'b1;\n"); |
fprintf(vmain, "\t\t\tbr_start <= 1\'b1;\n"); |
} |
fprintf(vmain, "\n\n"); |
fprintf(vmain, "\tdblstage\t#(IWIDTH)\tstage_2(i_clk, i_rst, i_ce,\n"); |
2134,19 → 2295,25
fprintf(vmain, "\n\n"); |
} else { |
int nbits = nbitsin, dropbit=0; |
int obits = nbits+1+xtrapbits; |
|
if ((maxbitsout > 0)&&(obits > maxbitsout)) |
obits = maxbitsout; |
|
// Always do a first stage |
fprintf(vmain, "\n\n"); |
fprintf(vmain, "\twire\t\tw_s%d, w_os%d;\n", fftsize, fftsize); |
fprintf(vmain, "\twire\t[%d:0]\tw_e%d, w_o%d;\n", 2*(nbits+1+xtrapbits)-1, fftsize, fftsize); |
fprintf(vmain, "\t%sfftstage_e%d\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,%d,0)\tstage_e%d(i_clk, i_rst, i_ce,\n", |
fprintf(vmain, "\twire\t[%d:0]\tw_e%d, w_o%d;\n", 2*(obits+xtrapbits)-1, fftsize, fftsize); |
fprintf(vmain, "\t%sfftstage_e%d%s\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,%d,0)\tstage_e%d(i_clk, i_rst, i_ce,\n", |
(inverse)?"i":"", fftsize, |
xtracbits, nbits+1+xtrapbits, |
((dbg)&&(dbgstage == fftsize))?"_dbg":"", |
xtracbits, obits+xtrapbits, |
lgsize, lgtmp-2, lgdelay(nbits,xtracbits), |
fftsize); |
fprintf(vmain, "\t\t\t(~i_rst), i_left, w_e%d, w_s%d);\n", fftsize, fftsize); |
fprintf(vmain, "\t\t\t(~i_rst), i_left, w_e%d, w_s%d%s);\n", fftsize, fftsize, ((dbg)&&(dbgstage == fftsize))?", o_dbg":""); |
fprintf(vmain, "\t%sfftstage_o%d\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,%d,0)\tstage_o%d(i_clk, i_rst, i_ce,\n", |
(inverse)?"i":"", fftsize, |
xtracbits, nbits+1+xtrapbits, |
xtracbits, obits+xtrapbits, |
lgsize, lgtmp-2, lgdelay(nbits,xtracbits), |
fftsize); |
fprintf(vmain, "\t\t\t(~i_rst), i_right, w_o%d, w_os%d);\n", fftsize, fftsize); |
2166,8 → 2333,10
fname += "fftstage_e"; |
sprintf(numstr, "%d", fftsize); |
fname += numstr; |
if ((dbg)&&(dbgstage == fftsize)) |
fname += "_dbg"; |
fname += ".v"; |
build_stage(fname.c_str(), fftsize/2, 0, nbits, inverse, xtracbits, mpystage); // Even stage |
build_stage(fname.c_str(), coredir.c_str(), fftsize/2, 0, nbits, inverse, xtracbits, mpystage, (dbg)&&(dbgstage == fftsize)); // Even stage |
|
fname = coredir + "/"; |
if (inverse) fname += "i"; |
2175,15 → 2344,15
sprintf(numstr, "%d", fftsize); |
fname += numstr; |
fname += ".v"; |
build_stage(fname.c_str(), fftsize/2, 1, nbits, inverse, xtracbits, mpystage); // Odd stage |
build_stage(fname.c_str(), coredir.c_str(), fftsize/2, 1, nbits, inverse, xtracbits, mpystage, false); // Odd stage |
} |
|
nbits += 1; // New number of input bits |
nbits = obits; // New number of input bits |
tmp_size >>= 1; lgtmp--; |
dropbit = 0; |
fprintf(vmain, "\n\n"); |
while(tmp_size >= 8) { |
int obits = nbits+((dropbit)?0:1); |
obits = nbits+((dropbit)?0:1); |
|
if ((maxbitsout > 0)&&(obits > maxbitsout)) |
obits = maxbitsout; |
2190,12 → 2359,13
|
fprintf(vmain, "\twire\t\tw_s%d, w_os%d;\n", tmp_size, tmp_size); |
fprintf(vmain, "\twire\t[%d:0]\tw_e%d, w_o%d;\n", 2*(obits+xtrapbits)-1, tmp_size, tmp_size); |
fprintf(vmain, "\t%sfftstage_e%d\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_e%d(i_clk, i_rst, i_ce,\n", |
fprintf(vmain, "\t%sfftstage_e%d%s\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_e%d(i_clk, i_rst, i_ce,\n", |
(inverse)?"i":"", tmp_size, |
((dbg)&&(dbgstage == tmp_size))?"_dbg":"", |
nbits+xtrapbits, nbits+xtracbits+xtrapbits, obits+xtrapbits, |
lgsize, lgtmp-2, lgdelay(nbits+xtrapbits,xtracbits), (dropbit)?0:0, |
tmp_size); |
fprintf(vmain, "\t\t\t\t\t\tw_s%d, w_e%d, w_e%d, w_s%d);\n", tmp_size<<1, tmp_size<<1, tmp_size, tmp_size); |
fprintf(vmain, "\t\t\t\t\t\tw_s%d, w_e%d, w_e%d, w_s%d%s);\n", tmp_size<<1, tmp_size<<1, tmp_size, tmp_size, ((dbg)&&(dbgstage == tmp_size))?", o_dbg":""); |
fprintf(vmain, "\t%sfftstage_o%d\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_o%d(i_clk, i_rst, i_ce,\n", |
(inverse)?"i":"", tmp_size, |
nbits+xtrapbits, nbits+xtracbits+xtrapbits, obits+xtrapbits, |
2216,10 → 2386,12
fname += "fftstage_e"; |
sprintf(numstr, "%d", tmp_size); |
fname += numstr; |
if ((dbg)&&(dbgstage == tmp_size)) |
fname += "_dbg"; |
fname += ".v"; |
build_stage(fname.c_str(), tmp_size/2, 0, |
build_stage(fname.c_str(), coredir.c_str(), tmp_size/2, 0, |
nbits+xtrapbits, inverse, xtracbits, |
mpystage); // Even stage |
mpystage, ((dbg)&&(dbgstage == tmp_size))); // Even stage |
|
fname = coredir + "/"; |
if (inverse) fname += "i"; |
2227,9 → 2399,9
sprintf(numstr, "%d", tmp_size); |
fname += numstr; |
fname += ".v"; |
build_stage(fname.c_str(), tmp_size/2, 1, |
build_stage(fname.c_str(), coredir.c_str(), tmp_size/2, 1, |
nbits+xtrapbits, inverse, xtracbits, |
mpystage); // Odd stage |
mpystage, false); // Odd stage |
} |
|
|
2239,7 → 2411,7
} |
|
if (tmp_size == 4) { |
int obits = nbits+((dropbit)?0:1); |
obits = nbits+((dropbit)?0:1); |
|
if ((maxbitsout > 0)&&(obits > maxbitsout)) |
obits = maxbitsout; |
2246,9 → 2418,12
|
fprintf(vmain, "\twire\t\tw_s4, w_os4;\n"); |
fprintf(vmain, "\twire\t[%d:0]\tw_e4, w_o4;\n", 2*(obits+xtrapbits)-1); |
fprintf(vmain, "\tqtrstage\t#(%d,%d,%d,0,%d,%d)\tstage_e4(i_clk, i_rst, i_ce,\n", |
nbits+xtrapbits, obits+xtrapbits, lgsize, (inverse)?1:0, (dropbit)?0:0); |
fprintf(vmain, "\t\t\t\t\t\tw_s8, w_e8, w_e4, w_s4);\n"); |
fprintf(vmain, "\tqtrstage%s\t#(%d,%d,%d,0,%d,%d)\tstage_e4(i_clk, i_rst, i_ce,\n", |
((dbg)&&(dbgstage==4))?"_dbg":"", |
nbits+xtrapbits, obits+xtrapbits, lgsize, |
(inverse)?1:0, (dropbit)?0:0); |
fprintf(vmain, "\t\t\t\t\t\tw_s8, w_e8, w_e4, w_s4%s);\n", |
((dbg)&&(dbgstage==4))?", o_dbg":""); |
fprintf(vmain, "\tqtrstage\t#(%d,%d,%d,1,%d,%d)\tstage_o4(i_clk, i_rst, i_ce,\n", |
nbits+xtrapbits, obits+xtrapbits, lgsize, (inverse)?1:0, (dropbit)?0:0); |
fprintf(vmain, "\t\t\t\t\t\tw_s8, w_o8, w_o4, w_os4);\n"); |
2258,7 → 2433,7
} |
|
{ |
int obits = nbits+((dropbit)?0:1); |
obits = nbits+((dropbit)?0:1); |
if (obits > nbitsout) |
obits = nbitsout; |
if ((maxbitsout>0)&&(obits > maxbitsout)) |
2282,8 → 2457,8
fprintf(vmain, "\tinitial\tr_br_started = 1\'b0;\n"); |
fprintf(vmain, "\talways @(posedge i_clk)\n"); |
fprintf(vmain, "\t\tif (i_rst)\n"); |
fprintf(vmain, "\t\t\tr_br_started <= 1'b0;\n"); |
fprintf(vmain, "\t\telse\n"); |
fprintf(vmain, "\t\t\tr_br_started <= 1\'b0;\n"); |
fprintf(vmain, "\t\telse if (i_ce)\n"); |
fprintf(vmain, "\t\t\tr_br_started <= r_br_started || w_s2;\n"); |
fprintf(vmain, "\tassign\tbr_start = r_br_started || w_s2;\n"); |
} |
2305,11 → 2480,18
|
fprintf(vmain, "\n\n"); |
fprintf(vmain, "\t// Last clock: Register our outputs, we\'re done.\n"); |
fprintf(vmain, "\tinitial\to_sync = 1\'b0;\n"); |
fprintf(vmain, "\talways @(posedge i_clk)\n"); |
fprintf(vmain, "\t\tif (i_rst)\n"); |
fprintf(vmain, "\t\t\to_sync <= 1\'b0;\n"); |
fprintf(vmain, "\t\telse if (i_ce)\n"); |
fprintf(vmain, "\t\t\to_sync <= br_sync;\n"); |
fprintf(vmain, "\n"); |
fprintf(vmain, "\talways @(posedge i_clk)\n"); |
fprintf(vmain, "\t\tif (i_ce)\n"); |
fprintf(vmain, "\t\tbegin\n"); |
fprintf(vmain, "\t\t\to_left <= br_o_left;\n"); |
fprintf(vmain, "\t\t\to_right <= br_o_right;\n"); |
fprintf(vmain, "\t\t\to_sync <= br_sync;\n"); |
fprintf(vmain, "\t\tend\n"); |
fprintf(vmain, "\n\n"); |
fprintf(vmain, "endmodule\n"); |
2329,11 → 2511,18
fname = coredir + "/shiftaddmpy.v"; |
build_multiply(fname.c_str()); |
|
if ((dbg)&&(dbgstage == 4)) { |
fname = coredir + "/qtrstage_dbg.v"; |
build_quarters(fname.c_str(), rounding, true); |
} |
fname = coredir + "/qtrstage.v"; |
build_quarters(fname.c_str(), rounding); |
build_quarters(fname.c_str(), rounding, false); |
|
fname = coredir + "/dblstage.v"; |
build_dblstage(fname.c_str(), rounding); |
if ((dbg)&&(dbgstage == 2)) |
fname = coredir + "/dblstage_dbg.v"; |
else |
fname = coredir + "/dblstage.v"; |
build_dblstage(fname.c_str(), rounding, (dbg)&&(dbgstage==2)); |
|
if (bitreverse) { |
fname = coredir + "/dblreverse.v"; |
/dblclockfft/trunk/sw/Makefile
66,13 → 66,13
# |
.PHONY: fft |
fft: fftgen |
./fftgen -f 2048 -n 16 -p 1 |
./fftgen -f 2048 -n 16 -p 4 |
cd $(CORED)/; verilator -cc fftmain.v |
cd $(OBJDR); make -f Vfftmain.mk |
|
.PHONY: ifft |
ifft: fftgen |
./fftgen -f 2048 -1 -n 22 -p 4 |
./fftgen -f 2048 -i -n 22 -p 4 |
cd $(CORED)/; verilator -cc ifftmain.v |
cd $(OBJDR); make -f Vifftmain.mk |
|