OpenCores
URL https://opencores.org/ocsvn/dblclockfft/dblclockfft/trunk

Subversion Repositories dblclockfft

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /dblclockfft/trunk
    from Rev 38 to Rev 39
    Reverse comparison

Rev 38 → Rev 39

/rtl/README.md
15,6 → 15,7
multiply at the cost of more logic and a greater delay.
 
- [longbimpy](longbimpy.v) is the logic binary multiply.
 
- [bimpy](bimpy.v) multiplies a small set of bits together. It is a
component of [longbimpy](longbimpy.v)
 
/rtl/bimpy.v
25,23 → 25,25
//
// Copyright (C) 2015-2018, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
// by the Free Software Foundation, either version 3 of the License, or (at
// your option) any later version.
// This file is part of the general purpose pipelined FFT project.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
// The pipelined FFT project is free software (firmware): you can redistribute
// it and/or modify it under the terms of the GNU Lesser General Public License
// as published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// You should have received a copy of the GNU General Public License along
// with this program. (It's in the $(ROOT)/doc directory, run make with no
// target there if the PDF file isn't present.) If not, see
// The pipelined FFT project is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
// General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this program. (It's in the $(ROOT)/doc directory. Run make
// with no target there if the PDF file isn't present.) If not, see
// <http://www.gnu.org/licenses/> for a copy.
//
// License: GPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/gpl.html
// License: LGPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/lgpl.html
//
//
////////////////////////////////////////////////////////////////////////////////
50,11 → 52,11
`default_nettype none
//
module bimpy(i_clk, i_ce, i_a, i_b, o_r);
parameter BW=18, // Number of bits in i_b
LUTB=2; // Number of bits in i_a for our LUT multiply
input i_clk, i_ce;
input [(LUTB-1):0] i_a;
input [(BW-1):0] i_b;
parameter BW=18; // Number of bits in i_b
localparam LUTB=2; // Number of bits in i_a for our LUT multiply
input wire i_clk, i_ce;
input wire [(LUTB-1):0] i_a;
input wire [(BW-1):0] i_b;
output reg [(BW+LUTB-1):0] o_r;
 
wire [(BW+LUTB-2):0] w_r;
65,8 → 67,32
assign c = { ((i_a[1])?i_b[(BW-2):0]:{(BW-1){1'b0}}) }
& ((i_a[0])?i_b[(BW-1):1]:{(BW-1){1'b0}});
 
initial o_r = 0;
always @(posedge i_clk)
if (i_ce)
o_r <= w_r + { c, 2'b0 };
if (i_ce)
o_r <= w_r + { c, 2'b0 };
 
`ifdef FORMAL
reg f_past_valid;
 
initial f_past_valid = 1'b0;
always @(posedge i_clk)
f_past_valid <= 1'b1;
 
`define ASSERT assert
 
always @(posedge i_clk)
if ((f_past_valid)&&($past(i_ce)))
begin
if ($past(i_a)==0)
`ASSERT(o_r == 0);
else if ($past(i_a) == 1)
`ASSERT(o_r == $past(i_b));
 
if ($past(i_b)==0)
`ASSERT(o_r == 0);
else if ($past(i_b) == 1)
`ASSERT(o_r[(LUTB-1):0] == $past(i_a));
end
`endif
endmodule
/rtl/bitreverse.v
4,32 → 4,12
//
// Project: A General Purpose Pipelined FFT Implementation
//
// Purpose: This module bitreverses a pipelined FFT input. Operation is
// expected as follows:
// Purpose: This module bitreverses a pipelined FFT input. It differes
// from the dblreverse module in that this is just a simple and
// straightforward bitreverse, rather than one written to handle two
// words at once.
//
// i_clk A running clock at whatever system speed is offered.
// i_reset A synchronous reset signal, that resets all internals
// i_ce If this is one, one input is consumed and an output
// is produced.
// i_in_0, i_in_1
// Two inputs to be consumed, each of width WIDTH.
// o_out_0, o_out_1
// Two of the bitreversed outputs, also of the same
// width, WIDTH. Of course, there is a delay from the
// first input to the first output. For this purpose,
// o_sync is present.
// o_sync This will be a 1'b1 for the first value in any block.
// Following a reset, this will only become 1'b1 once
// the data has been loaded and is now valid. After that,
// all outputs will be valid.
//
// 20150602 -- This module has undergone massive rework in order to
// ensure that it uses resources efficiently. As a result,
// it now optimizes nicely into block RAMs. As an unfortunately
// side effect, it now passes it's bench test (dblrev_tb) but
// fails the integration bench test (fft_tb).
//
//
// Creator: Dan Gisselquist, Ph.D.
// Gisselquist Technology, LLC
//
37,23 → 17,25
//
// Copyright (C) 2015-2018, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
// by the Free Software Foundation, either version 3 of the License, or (at
// your option) any later version.
// This file is part of the general purpose pipelined FFT project.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
// The pipelined FFT project is free software (firmware): you can redistribute
// it and/or modify it under the terms of the GNU Lesser General Public License
// as published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// You should have received a copy of the GNU General Public License along
// with this program. (It's in the $(ROOT)/doc directory, run make with no
// target there if the PDF file isn't present.) If not, see
// The pipelined FFT project is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
// General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this program. (It's in the $(ROOT)/doc directory. Run make
// with no target there if the PDF file isn't present.) If not, see
// <http://www.gnu.org/licenses/> for a copy.
//
// License: GPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/gpl.html
// License: LGPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/lgpl.html
//
//
////////////////////////////////////////////////////////////////////////////////
61,116 → 43,59
//
`default_nettype none
//
 
 
//
// How do we do bit reversing at two smples per clock? Can we separate out
// our work into eight memory banks, writing two banks at once and reading
// another two banks in the same clock?
//
// mem[00xxx0] = s_0[n]
// mem[00xxx1] = s_1[n]
// o_0[n] = mem[10xxx0]
// o_1[n] = mem[11xxx0]
// ...
// mem[01xxx0] = s_0[m]
// mem[01xxx1] = s_1[m]
// o_0[m] = mem[10xxx1]
// o_1[m] = mem[11xxx1]
// ...
// mem[10xxx0] = s_0[n]
// mem[10xxx1] = s_1[n]
// o_0[n] = mem[00xxx0]
// o_1[n] = mem[01xxx0]
// ...
// mem[11xxx0] = s_0[m]
// mem[11xxx1] = s_1[m]
// o_0[m] = mem[00xxx1]
// o_1[m] = mem[01xxx1]
// ...
//
// The answer is that, yes we can but: we need to use four memory banks
// to do it properly. These four banks are defined by the two bits
// that determine the top and bottom of the correct address. Larger
// FFT's would require more memories.
//
//
module bitreverse(i_clk, i_reset, i_ce, i_in_0, i_in_1,
o_out_0, o_out_1, o_sync);
module bitreverse(i_clk, i_reset, i_ce, i_in, o_out, o_sync);
parameter LGSIZE=5, WIDTH=24;
input i_clk, i_reset, i_ce;
input [(2*WIDTH-1):0] i_in_0, i_in_1;
output wire [(2*WIDTH-1):0] o_out_0, o_out_1;
input wire i_clk, i_reset, i_ce;
input wire [(2*WIDTH-1):0] i_in;
output reg [(2*WIDTH-1):0] o_out;
output reg o_sync;
reg [(LGSIZE):0] wraddr;
wire [(LGSIZE):0] rdaddr;
 
reg in_reset;
reg [(LGSIZE-1):0] iaddr;
wire [(LGSIZE-3):0] braddr;
reg [(2*WIDTH-1):0] brmem [0:((1<<(LGSIZE+1))-1)];
 
genvar k;
generate for(k=0; k<LGSIZE-2; k=k+1)
begin : gen_a_bit_reversed_value
assign braddr[k] = iaddr[LGSIZE-3-k];
end endgenerate
generate for(k=0; k<LGSIZE; k=k+1)
assign rdaddr[k] = wraddr[LGSIZE-1-k];
endgenerate
assign rdaddr[LGSIZE] = !wraddr[LGSIZE];
 
initial iaddr = 0;
initial in_reset = 1'b1;
initial o_sync = 1'b0;
reg in_reset;
 
initial in_reset = 1'b1;
always @(posedge i_clk)
if (i_reset)
begin
iaddr <= 0;
in_reset <= 1'b1;
o_sync <= 1'b0;
end else if (i_ce)
else if ((i_ce)&&(&wraddr[(LGSIZE-1):0]))
in_reset <= 1'b0;
 
initial wraddr = 0;
always @(posedge i_clk)
if (i_reset)
wraddr <= 0;
else if (i_ce)
begin
iaddr <= iaddr + { {(LGSIZE-1){1'b0}}, 1'b1 };
if (&iaddr[(LGSIZE-2):0])
in_reset <= 1'b0;
if (in_reset)
o_sync <= 1'b0;
else
o_sync <= ~(|iaddr[(LGSIZE-2):0]);
brmem[wraddr] <= i_in;
wraddr <= wraddr + 1;
end
 
reg [(2*WIDTH-1):0] mem_e [0:((1<<(LGSIZE))-1)];
reg [(2*WIDTH-1):0] mem_o [0:((1<<(LGSIZE))-1)];
 
always @(posedge i_clk)
if (i_ce) mem_e[iaddr] <= i_in_0;
always @(posedge i_clk)
if (i_ce) mem_o[iaddr] <= i_in_1;
if (i_ce) // If (i_reset) we just output junk ... not a problem
o_out <= brmem[rdaddr]; // w/o a sync pulse
 
 
reg [(2*WIDTH-1):0] evn_out_0, evn_out_1, odd_out_0, odd_out_1;
 
initial o_sync = 1'b0;
always @(posedge i_clk)
if (i_ce)
evn_out_0 <= mem_e[{!iaddr[LGSIZE-1],1'b0,braddr}];
always @(posedge i_clk)
if (i_ce)
evn_out_1 <= mem_e[{!iaddr[LGSIZE-1],1'b1,braddr}];
always @(posedge i_clk)
if (i_ce)
odd_out_0 <= mem_o[{!iaddr[LGSIZE-1],1'b0,braddr}];
always @(posedge i_clk)
if (i_ce)
odd_out_1 <= mem_o[{!iaddr[LGSIZE-1],1'b1,braddr}];
if (i_reset)
o_sync <= 1'b0;
else if ((i_ce)&&(!in_reset))
o_sync <= (wraddr[(LGSIZE-1):0] == 0);
 
reg adrz;
always @(posedge i_clk)
if (i_ce) adrz <= iaddr[LGSIZE-2];
 
assign o_out_0 = (adrz)?odd_out_0:evn_out_0;
assign o_out_1 = (adrz)?odd_out_1:evn_out_1;
 
`ifdef FORMAL
`define ASSERT assert
`ifdef BITREVERSE
`define ASSUME assume
`define ASSERT assert
`else
`define ASSUME assert
`define ASSERT assume
`endif
 
reg f_past_valid;
182,7 → 107,7
always @(posedge i_clk)
if ((!f_past_valid)||($past(i_reset)))
begin
`ASSERT(iaddr == 0);
`ASSERT(wraddr == 0);
`ASSERT(in_reset);
`ASSERT(!o_sync);
end
191,135 → 116,60
assume((i_ce)||($past(i_ce))||($past(i_ce,2)));
`endif // BITREVERSE
 
(* anyconst *) reg [LGSIZE-1:0] f_const_addr;
wire [LGSIZE-3:0] f_reversed_addr;
// reg [LGSIZE:0] f_now;
reg f_addr_loaded_0, f_addr_loaded_1;
reg [(2*WIDTH-1):0] f_data_0, f_data_1;
wire f_writing, f_reading;
(* anyconst *) reg [LGSIZE:0] f_const_addr;
wire [LGSIZE:0] f_reversed_addr;
reg f_addr_loaded;
reg [(2*WIDTH-1):0] f_addr_value;
 
generate for(k=0; k<LGSIZE-2; k=k+1)
assign f_reversed_addr[k] = f_const_addr[LGSIZE-3-k];
generate for(k=0; k<LGSIZE; k=k+1)
assign f_reversed_addr[k] = f_const_addr[LGSIZE-1-k];
endgenerate
assign f_reversed_addr[LGSIZE] = f_const_addr[LGSIZE];
 
assign f_writing=(f_const_addr[LGSIZE-1]==iaddr[LGSIZE-1]);
assign f_reading=(f_const_addr[LGSIZE-1]!=iaddr[LGSIZE-1]);
initial f_addr_loaded_0 = 1'b0;
initial f_addr_loaded_1 = 1'b0;
initial f_addr_loaded = 1'b0;
always @(posedge i_clk)
if (i_reset)
f_addr_loaded <= 1'b0;
else if (i_ce)
begin
f_addr_loaded_0 <= 1'b0;
f_addr_loaded_1 <= 1'b0;
end else if (i_ce)
begin
if (iaddr == f_const_addr)
begin
f_addr_loaded_0 <= 1'b1;
f_addr_loaded_1 <= 1'b1;
end
 
if (f_reading)
begin
if ((braddr == f_const_addr[LGSIZE-3:0])
&&(iaddr[LGSIZE-2] == 1'b0))
f_addr_loaded_0 <= 1'b0;
 
if ((braddr == f_const_addr[LGSIZE-3:0])
&&(iaddr[LGSIZE-2] == 1'b1))
f_addr_loaded_1 <= 1'b0;
end
if (wraddr == f_const_addr)
f_addr_loaded <= 1'b1;
else if (rdaddr == f_const_addr)
f_addr_loaded <= 1'b0;
end
 
always @(posedge i_clk)
if ((i_ce)&&(iaddr == f_const_addr))
if ((i_ce)&&(wraddr == f_const_addr))
begin
f_data_0 <= i_in_0;
f_data_1 <= i_in_1;
`ASSERT(!f_addr_loaded_0);
`ASSERT(!f_addr_loaded_1);
f_addr_value <= i_in;
`ASSERT(!f_addr_loaded);
end
 
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(i_reset))
&&($past(f_addr_loaded_0))&&(!f_addr_loaded_0))
begin
assert(!$past(iaddr[LGSIZE-2]));
if (f_const_addr[LGSIZE-2])
assert(o_out_1 == f_data_0);
else
assert(o_out_0 == f_data_0);
end
&&($past(f_addr_loaded))&&(!f_addr_loaded))
assert(o_out == f_addr_value);
 
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(i_reset))
&&($past(f_addr_loaded_1))&&(!f_addr_loaded_1))
begin
assert($past(iaddr[LGSIZE-2]));
if (f_const_addr[LGSIZE-2])
assert(o_out_1 == f_data_1);
else
assert(o_out_0 == f_data_1);
end
 
always @(*)
`ASSERT(o_sync == ((iaddr[LGSIZE-2:0] == 1)&&(!in_reset)));
if (o_sync)
assert(wraddr[LGSIZE-1:0] == 1);
 
// Before writing to a section, the loaded flags should be
// zero
always @(*)
if (f_writing)
begin
`ASSERT(f_addr_loaded_0 == (iaddr[LGSIZE-2:0]
> f_const_addr[LGSIZE-2:0]));
`ASSERT(f_addr_loaded_1 == (iaddr[LGSIZE-2:0]
> f_const_addr[LGSIZE-2:0]));
end
if ((wraddr[LGSIZE]==f_const_addr[LGSIZE])
&&(wraddr[LGSIZE-1:0]
<= f_const_addr[LGSIZE-1:0]))
`ASSERT(!f_addr_loaded);
 
// If we were writing, and now we are reading, then both
// f_addr_loaded flags must be set
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(i_reset))
&&($past(f_writing))&&(f_reading))
begin
`ASSERT(f_addr_loaded_0);
`ASSERT(f_addr_loaded_1);
end
 
always @(*)
if (f_writing)
`ASSERT(f_addr_loaded_0 == f_addr_loaded_1);
if ((rdaddr[LGSIZE]==f_const_addr[LGSIZE])&&(f_addr_loaded))
`ASSERT(wraddr[LGSIZE-1:0]
<= f_reversed_addr[LGSIZE-1:0]+1);
 
// When reading, and the loaded flag is zero, our pointer
// must not have hit the address of interest yet
always @(*)
if ((!in_reset)&&(f_reading))
`ASSERT(f_addr_loaded_0 ==
((!iaddr[LGSIZE-2])&&(iaddr[LGSIZE-3:0]
<= f_reversed_addr[LGSIZE-3:0])));
always @(*)
if ((!in_reset)&&(f_reading))
`ASSERT(f_addr_loaded_1 ==
((!iaddr[LGSIZE-2])||(iaddr[LGSIZE-3:0]
<= f_reversed_addr[LGSIZE-3:0])));
always @(*)
if ((in_reset)&&(f_reading))
begin
`ASSERT(!f_addr_loaded_0);
`ASSERT(!f_addr_loaded_1);
end
if (f_addr_loaded)
`ASSERT(brmem[f_const_addr] == f_addr_value);
 
always @(*)
if(iaddr[LGSIZE-1])
`ASSERT(!in_reset);
 
always @(*)
if (f_addr_loaded_0)
`ASSERT(mem_e[f_const_addr] == f_data_0);
always @(*)
if (f_addr_loaded_1)
`ASSERT(mem_o[f_const_addr] == f_data_1);
 
 
`endif // FORMAL
endmodule
/rtl/butterfly.v
77,23 → 77,25
//
// Copyright (C) 2015-2018, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
// by the Free Software Foundation, either version 3 of the License, or (at
// your option) any later version.
// This file is part of the general purpose pipelined FFT project.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
// The pipelined FFT project is free software (firmware): you can redistribute
// it and/or modify it under the terms of the GNU Lesser General Public License
// as published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// You should have received a copy of the GNU General Public License along
// with this program. (It's in the $(ROOT)/doc directory, run make with no
// target there if the PDF file isn't present.) If not, see
// The pipelined FFT project is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
// General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this program. (It's in the $(ROOT)/doc directory. Run make
// with no target there if the PDF file isn't present.) If not, see
// <http://www.gnu.org/licenses/> for a copy.
//
// License: GPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/gpl.html
// License: LGPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/lgpl.html
//
//
////////////////////////////////////////////////////////////////////////////////
147,13 → 149,45
localparam MPYREMAINDER = MPYDELAY - CKPCE*(MPYDELAY/CKPCE);
 
 
input i_clk, i_reset, i_ce;
input [(2*CWIDTH-1):0] i_coef;
input [(2*IWIDTH-1):0] i_left, i_right;
input i_aux;
input wire i_clk, i_reset, i_ce;
input wire [(2*CWIDTH-1):0] i_coef;
input wire [(2*IWIDTH-1):0] i_left, i_right;
input wire i_aux;
output wire [(2*OWIDTH-1):0] o_left, o_right;
output reg o_aux;
 
`ifdef FORMAL
localparam F_LGDEPTH = (AUXLEN > 64) ? 7
: (AUXLEN > 32) ? 6
: (AUXLEN > 16) ? 5
: (AUXLEN > 8) ? 4
: (AUXLEN > 4) ? 3 : 2;
 
localparam F_DEPTH = AUXLEN;
localparam [F_LGDEPTH-1:0] F_D = F_DEPTH[F_LGDEPTH-1:0]-1;
 
reg signed [IWIDTH-1:0] f_dlyleft_r [0:F_DEPTH-1];
reg signed [IWIDTH-1:0] f_dlyleft_i [0:F_DEPTH-1];
reg signed [IWIDTH-1:0] f_dlyright_r [0:F_DEPTH-1];
reg signed [IWIDTH-1:0] f_dlyright_i [0:F_DEPTH-1];
reg signed [CWIDTH-1:0] f_dlycoeff_r [0:F_DEPTH-1];
reg signed [CWIDTH-1:0] f_dlycoeff_i [0:F_DEPTH-1];
reg signed [F_DEPTH-1:0] f_dlyaux;
 
wire signed [IWIDTH:0] f_predifr, f_predifi;
wire signed [IWIDTH+CWIDTH+3-1:0] f_predifrx, f_predifix;
wire signed [CWIDTH:0] f_sumcoef;
wire signed [IWIDTH+1:0] f_sumdiff;
wire signed [IWIDTH:0] f_sumr, f_sumi;
wire signed [IWIDTH+CWIDTH+3-1:0] f_sumrx, f_sumix;
wire signed [IWIDTH:0] f_difr, f_difi;
wire signed [IWIDTH+CWIDTH+3-1:0] f_difrx, f_difix;
wire signed [IWIDTH+CWIDTH+3-1:0] f_widecoeff_r, f_widecoeff_i;
 
wire [(CWIDTH):0] fp_one_ic, fp_two_ic, fp_three_ic, f_p3c_in;
wire [(IWIDTH+1):0] fp_one_id, fp_two_id, fp_three_id, f_p3d_in;
`endif
 
reg [(2*IWIDTH-1):0] r_left, r_right;
reg [(2*CWIDTH-1):0] r_coef, r_coef_2;
wire signed [(IWIDTH-1):0] r_left_r, r_left_i, r_right_r, r_right_i;
171,20 → 205,20
 
// Set up the input to the multiply
always @(posedge i_clk)
if (i_ce)
begin
// One clock just latches the inputs
r_left <= i_left; // No change in # of bits
r_right <= i_right;
r_coef <= i_coef;
// Next clock adds/subtracts
r_sum_r <= r_left_r + r_right_r; // Now IWIDTH+1 bits
r_sum_i <= r_left_i + r_right_i;
r_dif_r <= r_left_r - r_right_r;
r_dif_i <= r_left_i - r_right_i;
// Other inputs are simply delayed on second clock
r_coef_2<= r_coef;
end
if (i_ce)
begin
// One clock just latches the inputs
r_left <= i_left; // No change in # of bits
r_right <= i_right;
r_coef <= i_coef;
// Next clock adds/subtracts
r_sum_r <= r_left_r + r_right_r; // Now IWIDTH+1 bits
r_sum_i <= r_left_i + r_right_i;
r_dif_r <= r_left_r - r_right_r;
r_dif_i <= r_left_i - r_right_i;
// Other inputs are simply delayed on second clock
r_coef_2<= r_coef;
end
 
// Don't forget to record the even side, since it doesn't need
// to be multiplied, but yet we still need the results in sync
191,17 → 225,17
// with the answer when it is ready.
initial fifo_addr = 0;
always @(posedge i_clk)
if (i_reset)
fifo_addr <= 0;
else if (i_ce)
// Need to delay the sum side--nothing else happens
// to it, but it needs to stay synchronized with the
// right side.
fifo_addr <= fifo_addr + 1;
if (i_reset)
fifo_addr <= 0;
else if (i_ce)
// Need to delay the sum side--nothing else happens
// to it, but it needs to stay synchronized with the
// right side.
fifo_addr <= fifo_addr + 1;
 
always @(posedge i_clk)
if (i_ce)
fifo_left[fifo_addr] <= { r_sum_r, r_sum_i };
if (i_ce)
fifo_left[fifo_addr] <= { r_sum_r, r_sum_i };
 
wire signed [(CWIDTH-1):0] ir_coef_r, ir_coef_i;
assign ir_coef_r = r_coef_2[(2*CWIDTH-1):CWIDTH];
253,12 → 287,24
// simpler, multiply.
longbimpy #(CWIDTH+1,IWIDTH+2) p1(i_clk, i_ce,
{ir_coef_r[CWIDTH-1],ir_coef_r},
{r_dif_r[IWIDTH],r_dif_r}, p_one);
{r_dif_r[IWIDTH],r_dif_r}, p_one
`ifdef FORMAL
, fp_one_ic, fp_one_id
`endif
);
longbimpy #(CWIDTH+1,IWIDTH+2) p2(i_clk, i_ce,
{ir_coef_i[CWIDTH-1],ir_coef_i},
{r_dif_i[IWIDTH],r_dif_i}, p_two);
{r_dif_i[IWIDTH],r_dif_i}, p_two
`ifdef FORMAL
, fp_two_ic, fp_two_id
`endif
);
longbimpy #(CWIDTH+1,IWIDTH+2) p3(i_clk, i_ce,
p3c_in, p3d_in, p_three);
p3c_in, p3d_in, p_three
`ifdef FORMAL
, fp_three_ic, fp_three_id
`endif
);
 
end else if (CKPCE == 2)
begin : CKPCE_TWO
281,7 → 327,19
reg signed [(CWIDTH+IWIDTH+3)-1:0] mpy_pipe_out;
reg signed [IWIDTH+CWIDTH+3-1:0] longmpy;
 
`ifdef FORMAL
wire [CWIDTH:0] f_past_ic;
wire [IWIDTH+1:0] f_past_id;
wire [CWIDTH:0] f_past_mux_ic;
wire [IWIDTH+1:0] f_past_mux_id;
 
reg [CWIDTH:0] f_rpone_ic, f_rptwo_ic, f_rpthree_ic,
f_rp2one_ic, f_rp2two_ic, f_rp2three_ic;
reg [IWIDTH+1:0] f_rpone_id, f_rptwo_id, f_rpthree_id,
f_rp2one_id, f_rp2two_id, f_rp2three_id;
`endif
 
 
initial ce_phase = 1'b0;
always @(posedge i_clk)
if (i_reset)
314,12 → 372,20
end
 
longbimpy #(CWIDTH+1,IWIDTH+2) mpy0(i_clk, mpy_pipe_v,
mpy_cof_sum, mpy_dif_sum, longmpy);
mpy_cof_sum, mpy_dif_sum, longmpy
`ifdef FORMAL
, f_past_ic, f_past_id
`endif
);
 
longbimpy #(CWIDTH+1,IWIDTH+2) mpy1(i_clk, mpy_pipe_v,
{ mpy_pipe_vc[CWIDTH-1], mpy_pipe_vc },
{ mpy_pipe_vd[IWIDTH ], mpy_pipe_vd },
mpy_pipe_out);
mpy_pipe_out
`ifdef FORMAL
, f_past_mux_ic, f_past_mux_id
`endif
);
 
reg signed [((IWIDTH+2)+(CWIDTH+1)-1):0]
rp_one, rp_two, rp_three,
328,28 → 394,57
always @(posedge i_clk)
if (((i_ce)&&(!MPYDELAY[0]))
||((ce_phase)&&(MPYDELAY[0])))
begin
rp_one <= mpy_pipe_out;
`ifdef FORMAL
f_rpone_ic <= f_past_mux_ic;
f_rpone_id <= f_past_mux_id;
`endif
end
 
always @(posedge i_clk)
if (((i_ce)&&(MPYDELAY[0]))
||((ce_phase)&&(!MPYDELAY[0])))
begin
rp_two <= mpy_pipe_out;
`ifdef FORMAL
f_rptwo_ic <= f_past_mux_ic;
f_rptwo_id <= f_past_mux_id;
`endif
end
 
always @(posedge i_clk)
if (i_ce)
begin
rp_three <= longmpy;
`ifdef FORMAL
f_rpthree_ic <= f_past_ic;
f_rpthree_id <= f_past_id;
`endif
end
 
 
// Our outputs *MUST* be set on a clock where i_ce is
// true for the following logic to work. Make that
// happen here.
always @(posedge i_clk)
if (i_ce)
begin
rp2_one<= rp_one;
always @(posedge i_clk)
if (i_ce)
rp2_two <= rp_two;
always @(posedge i_clk)
if (i_ce)
rp2_three<= rp_three;
`ifdef FORMAL
f_rp2one_ic <= f_rpone_ic;
f_rp2one_id <= f_rpone_id;
 
f_rp2two_ic <= f_rptwo_ic;
f_rp2two_id <= f_rptwo_id;
 
f_rp2three_ic <= f_rpthree_ic;
f_rp2three_id <= f_rpthree_id;
`endif
end
 
assign p_one = rp2_one;
assign p_two = (!MPYDELAY[0])? rp2_two : rp_two;
assign p_three = ( MPYDELAY[0])? rp_three : rp2_three;
359,6 → 454,17
assign unused = { rp2_two, rp2_three };
// verilator lint_on UNUSED
 
`ifdef FORMAL
assign fp_one_ic = f_rp2one_ic;
assign fp_one_id = f_rp2one_id;
 
assign fp_two_ic = (!MPYDELAY[0])? f_rp2two_ic : f_rptwo_ic;
assign fp_two_id = (!MPYDELAY[0])? f_rp2two_id : f_rptwo_id;
 
assign fp_three_ic= (MPYDELAY[0])? f_rpthree_ic : f_rp2three_ic;
assign fp_three_id= (MPYDELAY[0])? f_rpthree_id : f_rp2three_id;
`endif
 
end else if (CKPCE <= 3)
begin : CKPCE_THREE
// Coefficient multiply inputs
376,6 → 482,18
 
reg signed [ (CWIDTH+IWIDTH+3)-1:0] mpy_pipe_out;
 
`ifdef FORMAL
wire [CWIDTH:0] f_past_ic;
wire [IWIDTH+1:0] f_past_id;
 
reg [CWIDTH:0] f_rpone_ic, f_rptwo_ic, f_rpthree_ic,
f_rp2one_ic, f_rp2two_ic, f_rp2three_ic,
f_rp3one_ic;
reg [IWIDTH+1:0] f_rpone_id, f_rptwo_id, f_rpthree_id,
f_rp2one_id, f_rp2two_id, f_rp2three_id,
f_rp3one_id;
`endif
 
initial ce_phase = 3'b011;
always @(posedge i_clk)
if (i_reset)
389,28 → 507,32
mpy_pipe_v = (i_ce)||(ce_phase < 3'b010);
 
always @(posedge i_clk)
if (ce_phase == 3'b000)
begin
// Second clock
mpy_pipe_c[3*(CWIDTH+1)-1:(CWIDTH+1)] <= {
ir_coef_r[CWIDTH-1], ir_coef_r,
ir_coef_i[CWIDTH-1], ir_coef_i };
mpy_pipe_c[CWIDTH:0] <= ir_coef_i + ir_coef_r;
mpy_pipe_d[3*(IWIDTH+2)-1:(IWIDTH+2)] <= {
r_dif_r[IWIDTH], r_dif_r,
r_dif_i[IWIDTH], r_dif_i };
mpy_pipe_d[(IWIDTH+2)-1:0] <= r_dif_r + r_dif_i;
if (ce_phase == 3'b000)
begin
// Second clock
mpy_pipe_c[3*(CWIDTH+1)-1:(CWIDTH+1)] <= {
ir_coef_r[CWIDTH-1], ir_coef_r,
ir_coef_i[CWIDTH-1], ir_coef_i };
mpy_pipe_c[CWIDTH:0] <= ir_coef_i + ir_coef_r;
mpy_pipe_d[3*(IWIDTH+2)-1:(IWIDTH+2)] <= {
r_dif_r[IWIDTH], r_dif_r,
r_dif_i[IWIDTH], r_dif_i };
mpy_pipe_d[(IWIDTH+2)-1:0] <= r_dif_r + r_dif_i;
 
end else if (mpy_pipe_v)
begin
mpy_pipe_c[3*(CWIDTH+1)-1:0] <= {
mpy_pipe_c[2*(CWIDTH+1)-1:0], {(CWIDTH+1){1'b0}} };
mpy_pipe_d[3*(IWIDTH+2)-1:0] <= {
mpy_pipe_d[2*(IWIDTH+2)-1:0], {(IWIDTH+2){1'b0}} };
end
end else if (mpy_pipe_v)
begin
mpy_pipe_c[3*(CWIDTH+1)-1:0] <= {
mpy_pipe_c[2*(CWIDTH+1)-1:0], {(CWIDTH+1){1'b0}} };
mpy_pipe_d[3*(IWIDTH+2)-1:0] <= {
mpy_pipe_d[2*(IWIDTH+2)-1:0], {(IWIDTH+2){1'b0}} };
end
 
longbimpy #(CWIDTH+1,IWIDTH+2) mpy(i_clk, mpy_pipe_v,
mpy_pipe_vc, mpy_pipe_vd, mpy_pipe_out);
mpy_pipe_vc, mpy_pipe_vd, mpy_pipe_out
`ifdef FORMAL
, f_past_ic, f_past_id
`endif
);
 
reg signed [((IWIDTH+2)+(CWIDTH+1)-1):0]
rp_one, rp_two, rp_three,
422,32 → 544,77
begin
 
if (i_ce)
begin
rp_two <= mpy_pipe_out;
else if (ce_phase == 3'b000)
`ifdef FORMAL
f_rptwo_ic <= f_past_ic;
f_rptwo_id <= f_past_id;
`endif
end else if (ce_phase == 3'b000)
begin
rp_three <= mpy_pipe_out;
else if (ce_phase == 3'b001)
`ifdef FORMAL
f_rpthree_ic <= f_past_ic;
f_rpthree_id <= f_past_id;
`endif
end else if (ce_phase == 3'b001)
begin
rp_one <= mpy_pipe_out;
 
`ifdef FORMAL
f_rpone_ic <= f_past_ic;
f_rpone_id <= f_past_id;
`endif
end
end else if (MPYREMAINDER == 1)
begin
 
if (i_ce)
begin
rp_one <= mpy_pipe_out;
else if (ce_phase == 3'b000)
`ifdef FORMAL
f_rpone_ic <= f_past_ic;
f_rpone_id <= f_past_id;
`endif
end else if (ce_phase == 3'b000)
begin
rp_two <= mpy_pipe_out;
else if (ce_phase == 3'b001)
`ifdef FORMAL
f_rptwo_ic <= f_past_ic;
f_rptwo_id <= f_past_id;
`endif
end else if (ce_phase == 3'b001)
begin
rp_three <= mpy_pipe_out;
 
`ifdef FORMAL
f_rpthree_ic <= f_past_ic;
f_rpthree_id <= f_past_id;
`endif
end
end else // if (MPYREMAINDER == 2)
begin
 
if (i_ce)
begin
rp_three <= mpy_pipe_out;
else if (ce_phase == 3'b000)
`ifdef FORMAL
f_rpthree_ic <= f_past_ic;
f_rpthree_id <= f_past_id;
`endif
end else if (ce_phase == 3'b000)
begin
rp_one <= mpy_pipe_out;
else if (ce_phase == 3'b001)
`ifdef FORMAL
f_rpone_ic <= f_past_ic;
f_rpone_id <= f_past_id;
`endif
end else if (ce_phase == 3'b001)
begin
rp_two <= mpy_pipe_out;
 
`ifdef FORMAL
f_rptwo_ic <= f_past_ic;
f_rptwo_id <= f_past_id;
`endif
end
end
 
always @(posedge i_clk)
457,11 → 624,35
rp2_two <= rp_two;
rp2_three <= (MPYREMAINDER == 2) ? mpy_pipe_out : rp_three;
rp3_one <= (MPYREMAINDER == 0) ? rp2_one : rp_one;
`ifdef FORMAL
f_rp2one_ic <= f_rpone_ic;
f_rp2one_id <= f_rpone_id;
 
f_rp2two_ic <= f_rptwo_ic;
f_rp2two_id <= f_rptwo_id;
 
f_rp2three_ic <= (MPYREMAINDER==2) ? f_past_ic : f_rpthree_ic;
f_rp2three_id <= (MPYREMAINDER==2) ? f_past_id : f_rpthree_id;
f_rp3one_ic <= (MPYREMAINDER==0) ? f_rp2one_ic : f_rpone_ic;
f_rp3one_id <= (MPYREMAINDER==0) ? f_rp2one_id : f_rpone_id;
`endif
end
 
assign p_one = rp3_one;
assign p_two = rp2_two;
assign p_three = rp2_three;
 
`ifdef FORMAL
assign fp_one_ic = f_rp3one_ic;
assign fp_one_id = f_rp3one_id;
 
assign fp_two_ic = f_rp2two_ic;
assign fp_two_id = f_rp2two_id;
 
assign fp_three_ic = f_rp2three_ic;
assign fp_three_id = f_rp2three_id;
`endif
 
end endgenerate
// These values are held in memory and delayed during the
// multiply. Here, we recover them. During the multiply,
471,8 → 662,10
// extension.
wire signed [(IWIDTH+CWIDTH):0] fifo_i, fifo_r;
reg [(2*IWIDTH+1):0] fifo_read;
assign fifo_r = { {2{fifo_read[2*(IWIDTH+1)-1]}}, fifo_read[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1'b0}} };
assign fifo_i = { {2{fifo_read[(IWIDTH+1)-1]}}, fifo_read[((IWIDTH+1)-1):0], {(CWIDTH-2){1'b0}} };
assign fifo_r = { {2{fifo_read[2*(IWIDTH+1)-1]}},
fifo_read[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1'b0}} };
assign fifo_i = { {2{fifo_read[(IWIDTH+1)-1]}},
fifo_read[((IWIDTH+1)-1):0], {(CWIDTH-2){1'b0}} };
 
 
reg signed [(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i;
521,35 → 714,35
mpy_i, rnd_right_i);
 
always @(posedge i_clk)
if (i_ce)
begin
// First clock, recover all values
fifo_read <= fifo_left[fifo_read_addr];
// These values are IWIDTH+CWIDTH+3 bits wide
// although they only need to be (IWIDTH+1)
// + (CWIDTH) bits wide. (We've got two
// extra bits we need to get rid of.)
mpy_r <= p_one - p_two;
mpy_i <= p_three - p_one - p_two;
end
if (i_ce)
begin
// First clock, recover all values
fifo_read <= fifo_left[fifo_read_addr];
// These values are IWIDTH+CWIDTH+3 bits wide
// although they only need to be (IWIDTH+1)
// + (CWIDTH) bits wide. (We've got two
// extra bits we need to get rid of.)
mpy_r <= p_one - p_two;
mpy_i <= p_three - p_one - p_two;
end
 
reg [(AUXLEN-1):0] aux_pipeline;
initial aux_pipeline = 0;
always @(posedge i_clk)
if (i_reset)
aux_pipeline <= 0;
else if (i_ce)
aux_pipeline <= { aux_pipeline[(AUXLEN-2):0], i_aux };
if (i_reset)
aux_pipeline <= 0;
else if (i_ce)
aux_pipeline <= { aux_pipeline[(AUXLEN-2):0], i_aux };
 
initial o_aux = 1'b0;
always @(posedge i_clk)
if (i_reset)
o_aux <= 1'b0;
else if (i_ce)
begin
// Second clock, latch for final clock
o_aux <= aux_pipeline[AUXLEN-1];
end
if (i_reset)
o_aux <= 1'b0;
else if (i_ce)
begin
// Second clock, latch for final clock
o_aux <= aux_pipeline[AUXLEN-1];
end
 
// As a final step, we pack our outputs into two packed two's
// complement numbers per output word, so that each output word
558,27 → 751,7
assign o_left = { rnd_left_r, rnd_left_i };
assign o_right= { rnd_right_r,rnd_right_i};
 
`ifdef VERILATOR
`define FORMAL
`endif
`ifdef FORMAL
localparam F_LGDEPTH = (AUXLEN > 64) ? 7
: (AUXLEN > 32) ? 6
: (AUXLEN > 16) ? 5
: (AUXLEN > 8) ? 4
: (AUXLEN > 4) ? 3 : 2;
 
localparam F_DEPTH = AUXLEN;
localparam [F_LGDEPTH-1:0] F_D = F_DEPTH[F_LGDEPTH-1:0]-1;
 
reg signed [IWIDTH-1:0] f_dlyleft_r [0:F_DEPTH-1];
reg signed [IWIDTH-1:0] f_dlyleft_i [0:F_DEPTH-1];
reg signed [IWIDTH-1:0] f_dlyright_r [0:F_DEPTH-1];
reg signed [IWIDTH-1:0] f_dlyright_i [0:F_DEPTH-1];
reg signed [CWIDTH-1:0] f_dlycoeff_r [0:F_DEPTH-1];
reg signed [CWIDTH-1:0] f_dlycoeff_i [0:F_DEPTH-1];
reg signed [F_DEPTH-1:0] f_dlyaux;
 
initial f_dlyaux[0] = 0;
always @(posedge i_clk)
if (i_reset)
616,30 → 789,77
end endgenerate
 
`ifndef VERILATOR
always @(posedge i_clk)
if ((!$past(i_ce))&&(!$past(i_ce,2))&&(!$past(i_ce,3))
&&(!$past(i_ce,4)))
assume(i_ce);
 
//
// Make some i_ce restraining assumptions. These are necessary
// to get the design to pass induction.
//
generate if (CKPCE <= 1)
begin
 
// i_ce is allowed to be anything in this mode
// No primary i_ce assumption. i_ce can be anything
//
// First induction i_ce assumption: No more than one
// empty cycle between used cycles. Without this
// assumption, or one like it, induction would never
// complete.
always @(posedge i_clk)
if ((!$past(i_ce)))
assume(i_ce);
 
// Second induction i_ce assumption: avoid skipping an
// i_ce and thus stretching out the i_ce cycle two i_ce
// cycles in a row. Without this assumption, induction
// would still complete, it would just take longer
always @(posedge i_clk)
if (($past(i_ce))&&(!$past(i_ce,2)))
assume(i_ce);
 
end else if (CKPCE == 2)
begin : F_CKPCE_TWO
 
// Primary i_ce assumption: Every i_ce cycle is followed
// by a non-i_ce cycle, so the multiplies can be
// multiplexed
always @(posedge i_clk)
if ($past(i_ce))
assume(!i_ce);
if ($past(i_ce))
assume(!i_ce);
// First induction assumption: Don't let this stretch
// out too far. This is necessary to pass induction
always @(posedge i_clk)
if ((!$past(i_ce))&&(!$past(i_ce,2)))
assume(i_ce);
 
always @(posedge i_clk)
if ((!$past(i_ce))&&($past(i_ce,2))
&&(!$past(i_ce,3))&&(!$past(i_ce,4)))
assume(i_ce);
 
end else if (CKPCE == 3)
begin : F_CKPCE_THREE
 
// Primary i_ce assumption: Following any i_ce cycle,
// there must be two clock cycles with i_ce de-asserted
always @(posedge i_clk)
if (($past(i_ce))||($past(i_ce,2)))
assume(!i_ce);
if (($past(i_ce))||($past(i_ce,2)))
assume(!i_ce);
 
// Induction assumption: Allow i_ce's every third or
// fourth clock, but don't allow them to be separated
// further than that
always @(posedge i_clk)
if ((!$past(i_ce))&&(!$past(i_ce,2))&&(!$past(i_ce,3)))
assume(i_ce);
 
// Second induction assumption, to speed up the proof:
// If it's the earliest possible opportunity for an
// i_ce, and the last i_ce was late, don't let this one
// be late as well.
always @(posedge i_clk)
if ((!$past(i_ce))&&(!$past(i_ce,2))
&&($past(i_ce,3))&&(!$past(i_ce,4))
&&(!$past(i_ce,5))&&(!$past(i_ce,6)))
assume(i_ce);
 
end endgenerate
`endif
 
651,7 → 871,6
else if ((i_ce)&&(!(&f_startup_counter)))
f_startup_counter <= f_startup_counter + 1;
 
wire signed [IWIDTH:0] f_sumr, f_sumi;
always @(*)
begin
f_sumr = f_dlyleft_r[F_D] + f_dlyright_r[F_D];
658,11 → 877,9
f_sumi = f_dlyleft_i[F_D] + f_dlyright_i[F_D];
end
 
wire signed [IWIDTH+CWIDTH+3-1:0] f_sumrx, f_sumix;
assign f_sumrx = { {(4){f_sumr[IWIDTH]}}, f_sumr, {(CWIDTH-2){1'b0}} };
assign f_sumix = { {(4){f_sumi[IWIDTH]}}, f_sumi, {(CWIDTH-2){1'b0}} };
 
wire signed [IWIDTH:0] f_difr, f_difi;
always @(*)
begin
f_difr = f_dlyleft_r[F_D] - f_dlyright_r[F_D];
669,11 → 886,9
f_difi = f_dlyleft_i[F_D] - f_dlyright_i[F_D];
end
 
wire signed [IWIDTH+CWIDTH+3-1:0] f_difrx, f_difix;
assign f_difrx = { {(CWIDTH+2){f_difr[IWIDTH]}}, f_difr };
assign f_difix = { {(CWIDTH+2){f_difi[IWIDTH]}}, f_difi };
 
wire signed [IWIDTH+CWIDTH+3-1:0] f_widecoeff_r, f_widecoeff_i;
assign f_widecoeff_r ={ {(IWIDTH+3){f_dlycoeff_r[F_D][CWIDTH-1]}},
f_dlycoeff_r[F_D] };
assign f_widecoeff_i ={ {(IWIDTH+3){f_dlycoeff_i[F_D][CWIDTH-1]}},
729,7 → 944,6
// otherwise
 
 
wire signed [IWIDTH:0] f_predifr, f_predifi;
always @(*)
begin
f_predifr = f_dlyleft_r[F_D-1] - f_dlyright_r[F_D-1];
736,12 → 950,9
f_predifi = f_dlyleft_i[F_D-1] - f_dlyright_i[F_D-1];
end
 
wire signed [IWIDTH+CWIDTH+3-1:0] f_predifrx, f_predifix;
assign f_predifrx = { {(CWIDTH+2){f_predifr[IWIDTH]}}, f_predifr };
assign f_predifix = { {(CWIDTH+2){f_predifi[IWIDTH]}}, f_predifi };
 
wire signed [CWIDTH:0] f_sumcoef;
wire signed [IWIDTH+1:0] f_sumdiff;
always @(*)
begin
f_sumcoef = f_dlycoeff_r[F_D-1] + f_dlycoeff_i[F_D-1];
785,6 → 996,8
assert(p_three == f_sumcoef);
// verilator lint_on WIDTH
`ifdef VERILATOR
// Check that the multiplies match--but *ONLY* if using
// Verilator, and not if using formal proper
assert(p_one == f_predifr * f_dlycoeff_r[F_D-1]);
assert(p_two == f_predifi * f_dlycoeff_i[F_D-1]);
assert(p_three == f_sumdiff * f_sumcoef);
791,6 → 1004,28
`endif // VERILATOR
end
 
// The following logic formally insists that our version of the
// inputs to the multiply matches what the (multiclock) multiply
// thinks its inputs were. While this may seem redundant, the
// proof will not complete in any reasonable amount of time
// without these assertions.
 
assign f_p3c_in = f_dlycoeff_i[F_D-1] + f_dlycoeff_r[F_D-1];
assign f_p3d_in = f_predifi + f_predifr;
 
always @(*)
if (f_startup_counter >= { 1'b0, F_D })
begin
assert(fp_one_ic == { f_dlycoeff_r[F_D-1][CWIDTH-1],
f_dlycoeff_r[F_D-1][CWIDTH-1:0] });
assert(fp_two_ic == { f_dlycoeff_i[F_D-1][CWIDTH-1],
f_dlycoeff_i[F_D-1][CWIDTH-1:0] });
assert(fp_one_id == { f_predifr[IWIDTH], f_predifr });
assert(fp_two_id == { f_predifi[IWIDTH], f_predifi });
assert(fp_three_ic == f_p3c_in);
assert(fp_three_id == f_p3d_in);
end
 
// F_CHECK will be set externally by the solver, so that we can
// double check that the solver is actually testing what we think
// it is testing. We'll set it here to MPYREMAINDER, which will
/rtl/convround.v
19,23 → 19,25
//
// Copyright (C) 2015-2018, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
// by the Free Software Foundation, either version 3 of the License, or (at
// your option) any later version.
// This file is part of the general purpose pipelined FFT project.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
// The pipelined FFT project is free software (firmware): you can redistribute
// it and/or modify it under the terms of the GNU Lesser General Public License
// as published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// You should have received a copy of the GNU General Public License along
// with this program. (It's in the $(ROOT)/doc directory, run make with no
// target there if the PDF file isn't present.) If not, see
// The pipelined FFT project is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
// General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this program. (It's in the $(ROOT)/doc directory. Run make
// with no target there if the PDF file isn't present.) If not, see
// <http://www.gnu.org/licenses/> for a copy.
//
// License: GPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/gpl.html
// License: LGPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/lgpl.html
//
//
////////////////////////////////////////////////////////////////////////////////
45,8 → 47,8
//
module convround(i_clk, i_ce, i_val, o_val);
parameter IWID=16, OWID=8, SHIFT=0;
input i_clk, i_ce;
input signed [(IWID-1):0] i_val;
input wire i_clk, i_ce;
input wire signed [(IWID-1):0] i_val;
output reg signed [(OWID-1):0] o_val;
 
// Let's deal with three cases to be as general as we can be here
/rtl/fftmain.v
8,8 → 8,8
// implementation. As such, all other modules are subordinate
// to this one. This module accomplish a fixed size Complex FFT on
// 2048 data points.
// The FFT is fully pipelined, and accepts as inputs two complex two's
// complement samples per clock.
// The FFT is fully pipelined, and accepts as inputs one complex two's
// complement sample per clock.
//
// Parameters:
// i_clk The clock. All operations are synchronous with this clock.
18,31 → 18,27
// Further, following a reset, the o_sync line will go
// high the same time the first output sample is valid.
// i_ce A clock enable line. If this line is set, this module
// will accept two complex values as inputs, and produce
// two (possibly empty) complex values as outputs.
// i_left The first of two complex input samples. This value is split
// will accept one complex input value, and produce
// one (possibly empty) complex output value.
// i_sample The complex input sample. This value is split
// into two two's complement numbers, 15 bits each, with
// the real portion in the high order bits, and the
// imaginary portion taking the bottom 15 bits.
// i_right This is the same thing as i_left, only this is the second of
// two such samples. Hence, i_left would contain input
// sample zero, i_right would contain sample one. On the
// next clock i_left would contain input sample two,
// i_right number three and so forth.
// o_left The first of two output samples, of the same format as i_left,
// o_result The output result, of the same format as i_sample,
// only having 21 bits for each of the real and imaginary
// components, leading to 42 bits total.
// o_right The second of two output samples produced each clock. This has
// the same format as o_left.
// o_sync A one bit output indicating the first valid sample produced by
// this FFT following a reset. Ever after, this will
// indicate the first sample of an FFT frame.
// o_sync A one bit output indicating the first sample of the FFT frame.
// It also indicates the first valid sample out of the FFT
// on the first frame.
//
// Arguments: This file was computer generated using the following command
// line:
//
// % ./fftgen -v -d ../rtl -f 2048 -2 -p 0 -n 15 -a ../bench/cpp/fftsize.h
// % ./fftgen -v -d ../rtl -f 2048 -1 -k 1 -p 0 -n 15 -a ../bench/cpp/fftsize.h
//
// This core will use hardware accelerated multiplies (DSPs)
// for 0 of the 11 stages
//
// Creator: Dan Gisselquist, Ph.D.
// Gisselquist Technology, LLC
//
50,23 → 46,25
//
// Copyright (C) 2015-2018, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
// by the Free Software Foundation, either version 3 of the License, or (at
// your option) any later version.
// This file is part of the general purpose pipelined FFT project.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
// The pipelined FFT project is free software (firmware): you can redistribute
// it and/or modify it under the terms of the GNU Lesser General Public License
// as published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// You should have received a copy of the GNU General Public License along
// with this program. (It's in the $(ROOT)/doc directory, run make with no
// target there if the PDF file isn't present.) If not, see
// The pipelined FFT project is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
// General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this program. (It's in the $(ROOT)/doc directory. Run make
// with no target there if the PDF file isn't present.) If not, see
// <http://www.gnu.org/licenses/> for a copy.
//
// License: GPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/gpl.html
// License: LGPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/lgpl.html
//
//
////////////////////////////////////////////////////////////////////////////////
77,166 → 75,102
//
//
module fftmain(i_clk, i_reset, i_ce,
i_left, i_right,
o_left, o_right, o_sync);
parameter IWIDTH=15, OWIDTH=21, LGWIDTH=11;
i_sample, o_result, o_sync);
// The bit-width of the input, IWIDTH, output, OWIDTH, and the log
// of the FFT size. These are localparams, rather than parameters,
// because once the core has been generated, they can no longer be
// changed. (These values can be adjusted by running the core
// generator again.) The reason is simply that these values have
// been hardwired into the core at several places.
localparam IWIDTH=15, OWIDTH=21, LGWIDTH=11;
//
input i_clk, i_reset, i_ce;
input wire i_clk, i_reset, i_ce;
//
input [(2*IWIDTH-1):0] i_left, i_right;
output reg [(2*OWIDTH-1):0] o_left, o_right;
input wire [(2*IWIDTH-1):0] i_sample;
output reg [(2*OWIDTH-1):0] o_result;
output reg o_sync;
 
 
// Outputs of the FFT, ready for bit reversal.
wire [(2*OWIDTH-1):0] br_left, br_right;
wire [(2*OWIDTH-1):0] br_sample;
 
 
wire w_s2048;
// verilator lint_off UNUSED
wire w_os2048;
// verilator lint_on UNUSED
wire [31:0] w_e2048, w_o2048;
fftstage #(IWIDTH,IWIDTH+4,16,11,9,0,
0, 1, "cmem_e4096.hex")
stage_e2048(i_clk, i_reset, i_ce,
(!i_reset), i_left, w_e2048, w_s2048);
fftstage #(IWIDTH,IWIDTH+4,16,11,9,0,
0, 1, "cmem_o4096.hex")
stage_o2048(i_clk, i_reset, i_ce,
(!i_reset), i_right, w_o2048, w_os2048);
wire [31:0] w_d2048;
fftstage #(IWIDTH,IWIDTH+4,16,10,0,
0, 1, "cmem_2048.hex")
stage_2048(i_clk, i_reset, i_ce,
(!i_reset), i_sample, w_d2048, w_s2048);
 
 
wire w_s1024;
// verilator lint_off UNUSED
wire w_os1024;
// verilator lint_on UNUSED
wire [33:0] w_e1024, w_o1024;
fftstage #(16,20,17,11,8,0,
0, 1, "cmem_e2048.hex")
stage_e1024(i_clk, i_reset, i_ce,
w_s2048, w_e2048, w_e1024, w_s1024);
fftstage #(16,20,17,11,8,0,
0, 1, "cmem_o2048.hex")
stage_o1024(i_clk, i_reset, i_ce,
w_s2048, w_o2048, w_o1024, w_os1024);
wire [33:0] w_d1024;
fftstage #(16,20,17,9,0,
0, 1, "cmem_1024.hex")
stage_1024(i_clk, i_reset, i_ce,
w_s2048, w_d2048, w_d1024, w_s1024);
 
wire w_s512;
// verilator lint_off UNUSED
wire w_os512;
// verilator lint_on UNUSED
wire [33:0] w_e512, w_o512;
fftstage #(17,21,17,11,7,0,
0, 1, "cmem_e1024.hex")
stage_e512(i_clk, i_reset, i_ce,
w_s1024, w_e1024, w_e512, w_s512);
fftstage #(17,21,17,11,7,0,
0, 1, "cmem_o1024.hex")
stage_o512(i_clk, i_reset, i_ce,
w_s1024, w_o1024, w_o512, w_os512);
wire [33:0] w_d512;
fftstage #(17,21,17,8,0,
0, 1, "cmem_512.hex")
stage_512(i_clk, i_reset, i_ce,
w_s1024, w_d1024, w_d512, w_s512);
 
wire w_s256;
// verilator lint_off UNUSED
wire w_os256;
// verilator lint_on UNUSED
wire [35:0] w_e256, w_o256;
fftstage #(17,21,18,11,6,0,
0, 1, "cmem_e512.hex")
stage_e256(i_clk, i_reset, i_ce,
w_s512, w_e512, w_e256, w_s256);
fftstage #(17,21,18,11,6,0,
0, 1, "cmem_o512.hex")
stage_o256(i_clk, i_reset, i_ce,
w_s512, w_o512, w_o256, w_os256);
wire [35:0] w_d256;
fftstage #(17,21,18,7,0,
0, 1, "cmem_256.hex")
stage_256(i_clk, i_reset, i_ce,
w_s512, w_d512, w_d256, w_s256);
 
wire w_s128;
// verilator lint_off UNUSED
wire w_os128;
// verilator lint_on UNUSED
wire [35:0] w_e128, w_o128;
fftstage #(18,22,18,11,5,0,
0, 1, "cmem_e256.hex")
stage_e128(i_clk, i_reset, i_ce,
w_s256, w_e256, w_e128, w_s128);
fftstage #(18,22,18,11,5,0,
0, 1, "cmem_o256.hex")
stage_o128(i_clk, i_reset, i_ce,
w_s256, w_o256, w_o128, w_os128);
wire [35:0] w_d128;
fftstage #(18,22,18,6,0,
0, 1, "cmem_128.hex")
stage_128(i_clk, i_reset, i_ce,
w_s256, w_d256, w_d128, w_s128);
 
wire w_s64;
// verilator lint_off UNUSED
wire w_os64;
// verilator lint_on UNUSED
wire [37:0] w_e64, w_o64;
fftstage #(18,22,19,11,4,0,
0, 1, "cmem_e128.hex")
stage_e64(i_clk, i_reset, i_ce,
w_s128, w_e128, w_e64, w_s64);
fftstage #(18,22,19,11,4,0,
0, 1, "cmem_o128.hex")
stage_o64(i_clk, i_reset, i_ce,
w_s128, w_o128, w_o64, w_os64);
wire [37:0] w_d64;
fftstage #(18,22,19,5,0,
0, 1, "cmem_64.hex")
stage_64(i_clk, i_reset, i_ce,
w_s128, w_d128, w_d64, w_s64);
 
wire w_s32;
// verilator lint_off UNUSED
wire w_os32;
// verilator lint_on UNUSED
wire [37:0] w_e32, w_o32;
fftstage #(19,23,19,11,3,0,
0, 1, "cmem_e64.hex")
stage_e32(i_clk, i_reset, i_ce,
w_s64, w_e64, w_e32, w_s32);
fftstage #(19,23,19,11,3,0,
0, 1, "cmem_o64.hex")
stage_o32(i_clk, i_reset, i_ce,
w_s64, w_o64, w_o32, w_os32);
wire [37:0] w_d32;
fftstage #(19,23,19,4,0,
0, 1, "cmem_32.hex")
stage_32(i_clk, i_reset, i_ce,
w_s64, w_d64, w_d32, w_s32);
 
wire w_s16;
// verilator lint_off UNUSED
wire w_os16;
// verilator lint_on UNUSED
wire [39:0] w_e16, w_o16;
fftstage #(19,23,20,11,2,0,
0, 1, "cmem_e32.hex")
stage_e16(i_clk, i_reset, i_ce,
w_s32, w_e32, w_e16, w_s16);
fftstage #(19,23,20,11,2,0,
0, 1, "cmem_o32.hex")
stage_o16(i_clk, i_reset, i_ce,
w_s32, w_o32, w_o16, w_os16);
wire [39:0] w_d16;
fftstage #(19,23,20,3,0,
0, 1, "cmem_16.hex")
stage_16(i_clk, i_reset, i_ce,
w_s32, w_d32, w_d16, w_s16);
 
wire w_s8;
// verilator lint_off UNUSED
wire w_os8;
// verilator lint_on UNUSED
wire [39:0] w_e8, w_o8;
fftstage #(20,24,20,11,1,0,
0, 1, "cmem_e16.hex")
stage_e8(i_clk, i_reset, i_ce,
w_s16, w_e16, w_e8, w_s8);
fftstage #(20,24,20,11,1,0,
0, 1, "cmem_o16.hex")
stage_o8(i_clk, i_reset, i_ce,
w_s16, w_o16, w_o8, w_os8);
wire [39:0] w_d8;
fftstage #(20,24,20,2,0,
0, 1, "cmem_8.hex")
stage_8(i_clk, i_reset, i_ce,
w_s16, w_d16, w_d8, w_s8);
 
wire w_s4;
// verilator lint_off UNUSED
wire w_os4;
// verilator lint_on UNUSED
wire [41:0] w_e4, w_o4;
qtrstage #(20,21,11,0,0,0) stage_e4(i_clk, i_reset, i_ce,
w_s8, w_e8, w_e4, w_s4);
qtrstage #(20,21,11,1,0,0) stage_o4(i_clk, i_reset, i_ce,
w_s8, w_o8, w_o4, w_os4);
wire [41:0] w_d4;
qtrstage #(20,21,11,0,0) stage_4(i_clk, i_reset, i_ce,
w_s8, w_d8, w_d4, w_s4);
wire w_s2;
wire [41:0] w_e2, w_o2;
wire [41:0] w_d2;
laststage #(21,21,0) stage_2(i_clk, i_reset, i_ce,
w_s4, w_e4, w_o4, w_e2, w_o2, w_s2);
w_s4, w_d4, w_d2, w_s2);
 
 
// Prepare for a (potential) bit-reverse stage.
assign br_left = w_e2;
assign br_right = w_o2;
assign br_sample= w_d2;
 
wire br_start;
reg r_br_started;
250,11 → 184,11
 
// Now for the bit-reversal stage.
wire br_sync;
wire [(2*OWIDTH-1):0] br_o_left, br_o_right;
wire [(2*OWIDTH-1):0] br_o_result;
bitreverse #(11,21)
revstage(i_clk, i_reset,
(i_ce & br_start), br_left, br_right,
br_o_left, br_o_right, br_sync);
(i_ce & br_start), br_sample,
br_o_result, br_sync);
 
 
// Last clock: Register our outputs, we're done.
267,10 → 201,7
 
always @(posedge i_clk)
if (i_ce)
begin
o_left <= br_o_left;
o_right <= br_o_right;
end
o_result <= br_o_result;
 
 
endmodule
/rtl/fftstage.v
35,23 → 35,25
//
// Copyright (C) 2015-2018, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
// by the Free Software Foundation, either version 3 of the License, or (at
// your option) any later version.
// This file is part of the general purpose pipelined FFT project.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
// The pipelined FFT project is free software (firmware): you can redistribute
// it and/or modify it under the terms of the GNU Lesser General Public License
// as published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// You should have received a copy of the GNU General Public License along
// with this program. (It's in the $(ROOT)/doc directory, run make with no
// target there if the PDF file isn't present.) If not, see
// The pipelined FFT project is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
// General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this program. (It's in the $(ROOT)/doc directory. Run make
// with no target there if the PDF file isn't present.) If not, see
// <http://www.gnu.org/licenses/> for a copy.
//
// License: GPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/gpl.html
// License: LGPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/lgpl.html
//
//
////////////////////////////////////////////////////////////////////////////////
65,7 → 67,7
// core is built ... Note that the minimum LGSPAN (the base two log
// of the span, or the base two log of the current FFT size) is 3.
// Smaller spans (i.e. the span of 2) must use the dbl laststage module.
parameter LGWIDTH=10, LGSPAN=8, BFLYSHIFT=0;
parameter LGSPAN=10, BFLYSHIFT=0; // LGWIDTH=11
parameter [0:0] OPT_HWMPY = 1;
// Clocks per CE. If your incoming data rate is less than 50% of your
// clock speed, you can set CKPCE to 2'b10, make sure there's at least
77,7 → 79,7
parameter CKPCE = 1;
// The COEFFILE parameter contains the name of the file containing the
// FFT twiddle factors
parameter COEFFILE="cmem_o2048.hex";
parameter COEFFILE="cmem_2048.hex";
 
`ifdef VERILATOR
parameter [0:0] ZERO_ON_IDLE = 1'b0;
85,11 → 87,14
localparam [0:0] ZERO_ON_IDLE = 1'b0;
`endif // VERILATOR
 
input i_clk, i_reset, i_ce, i_sync;
input [(2*IWIDTH-1):0] i_data;
input wire i_clk, i_reset, i_ce, i_sync;
input wire [(2*IWIDTH-1):0] i_data;
output reg [(2*OWIDTH-1):0] o_data;
output reg o_sync;
 
// I am using the prefixes
// ib_* to reference the inputs to the butterfly, and
// ob_* to reference the outputs from the butterfly
reg wait_for_sync;
reg [(2*IWIDTH-1):0] ib_a, ib_b;
reg [(2*CWIDTH-1):0] ib_c;
107,21 → 112,26
// (2^(CWIDTH-2)) * sin(2*pi*i/(2^LGWIDTH)) };
//
reg [(2*CWIDTH-1):0] cmem [0:((1<<LGSPAN)-1)];
`ifdef FORMAL
// Let the formal tool pick the coefficients
`else
initial $readmemh(COEFFILE,cmem);
 
`endif
 
reg [(LGSPAN):0] iaddr;
reg [(2*IWIDTH-1):0] imem [0:((1<<LGSPAN)-1)];
 
reg [LGSPAN:0] oB;
reg [LGSPAN:0] oaddr;
reg [(2*OWIDTH-1):0] omem [0:((1<<LGSPAN)-1)];
 
initial wait_for_sync = 1'b1;
initial iaddr = 0;
always @(posedge i_clk)
if (i_reset)
if (i_reset)
begin
wait_for_sync <= 1'b1;
iaddr <= 0;
wait_for_sync <= 1'b1;
iaddr <= 0;
end else if ((i_ce)&&((!wait_for_sync)||(i_sync)))
begin
//
137,6 → 147,9
//
// Now, we have all the inputs, so let's feed the butterfly
//
// ib_sync is the synchronization bit to the butterfly. It will
// be tracked within the butterfly, and used to create the o_sync
// value when the results from this output are produced
initial ib_sync = 1'b0;
always @(posedge i_clk)
if (i_reset)
149,6 → 162,8
ib_sync <= (iaddr==(1<<(LGSPAN)));
end
 
// Read the values from our input memory, and use them to feed first of two
// butterfly inputs
always @(posedge i_clk)
if (i_ce)
begin
186,6 → 201,12
 
end endgenerate
 
// For the formal proof, we'll assume the outputs of hwbfly and/or
// butterfly, rather than actually calculating them. This will simplify
// the proof and (if done properly) will be equivalent. Be careful of
// defining FORMAL if you want the full logic!
`ifndef FORMAL
//
generate if (OPT_HWMPY)
begin : HWBFLY
hwbfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),
205,43 → 226,213
(ib_sync&&i_ce),
ob_a, ob_b, ob_sync);
end endgenerate
`endif
 
//
// Next step: recover the outputs from the butterfly
//
initial oB = 0;
// The first output can go immediately to the output of this routine
// The second output must wait until this time in the idle cycle
// oaddr is the output memory address, keeping track of where we are
// in this output cycle.
initial oaddr = 0;
initial o_sync = 0;
initial b_started = 0;
always @(posedge i_clk)
if (i_reset)
if (i_reset)
begin
oB <= 0;
o_sync <= 0;
oaddr <= 0;
o_sync <= 0;
// b_started will be true once we've seen the first ob_sync
b_started <= 0;
end else if (i_ce)
begin
o_sync <= (!oB[LGSPAN])?ob_sync : 1'b0;
o_sync <= (!oaddr[LGSPAN])?ob_sync : 1'b0;
if (ob_sync||b_started)
oB <= oB + { {(LGSPAN){1'b0}}, 1'b1 };
if ((ob_sync)&&(!oB[LGSPAN]))
// A butterfly output is available
oaddr <= oaddr + 1'b1;
if ((ob_sync)&&(!oaddr[LGSPAN]))
// If b_started is true, then a butterfly output is available
b_started <= 1'b1;
end
 
reg [(LGSPAN-1):0] dly_addr;
reg [(2*OWIDTH-1):0] dly_value;
reg [(LGSPAN-1):0] nxt_oaddr;
reg [(2*OWIDTH-1):0] pre_ovalue;
always @(posedge i_clk)
if (i_ce)
nxt_oaddr[0] <= oaddr[0];
generate if (LGSPAN>1)
begin
dly_addr <= oB[(LGSPAN-1):0];
dly_value <= ob_b;
end
 
always @(posedge i_clk)
if (i_ce)
nxt_oaddr[LGSPAN-1:1] <= oaddr[LGSPAN-1:1] + 1'b1;
 
end endgenerate
 
// Only write to the memory on the first half of the outputs
// We'll use the memory value on the second half of the outputs
always @(posedge i_clk)
if ((i_ce)&&(!oaddr[LGSPAN]))
omem[oaddr[(LGSPAN-1):0]] <= ob_b;
 
always @(posedge i_clk)
if (i_ce)
omem[dly_addr] <= dly_value;
pre_ovalue <= omem[nxt_oaddr[(LGSPAN-1):0]];
 
always @(posedge i_clk)
if (i_ce)
o_data <= (!oB[LGSPAN])?ob_a : omem[oB[(LGSPAN-1):0]];
o_data <= (!oaddr[LGSPAN]) ? ob_a : pre_ovalue;
 
`ifdef FORMAL
// An arbitrary processing delay from butterfly input to
// butterfly output(s)
(* anyconst *) reg [LGSPAN:0] f_mpydelay;
always @(*)
assume(f_mpydelay > 1);
 
reg f_past_valid;
initial f_past_valid = 1'b0;
always @(posedge i_clk)
f_past_valid <= 1'b1;
 
always @(posedge i_clk)
if ((!f_past_valid)||($past(i_reset)))
begin
assert(iaddr == 0);
assert(wait_for_sync);
assert(o_sync == 0);
assert(oaddr == 0);
assert(!b_started);
assert(!o_sync);
end
 
/////////////////////////////////////////
//
// Formally verify the input half, from the inputs to this module
// to the inputs of the butterfly
//
/////////////////////////////////////////
//
// Let's verify a specific set of inputs
(* anyconst *) reg [LGSPAN:0] f_addr;
reg [2*IWIDTH-1:0] f_left, f_right;
wire [LGSPAN:0] f_next_addr;
 
always @(posedge i_clk)
if ((!$past(i_ce))&&(!$past(i_ce,2))&&(!$past(i_ce,3))&&(!$past(i_ce,4)))
assume(!i_ce);
 
always @(*)
assume(f_addr[LGSPAN]==1'b0);
 
assign f_next_addr = f_addr + 1'b1;
 
always @(posedge i_clk)
if ((i_ce)&&(iaddr[LGSPAN:0] == f_addr))
f_left <= i_data;
 
always @(*)
if (wait_for_sync)
assert(iaddr == 0);
 
wire [LGSPAN:0] f_last_addr = iaddr - 1'b1;
 
always @(posedge i_clk)
if ((!wait_for_sync)&&(f_last_addr >= { 1'b0, f_addr[LGSPAN-1:0]}))
assert(f_left == imem[f_addr[LGSPAN-1:0]]);
 
always @(posedge i_clk)
if ((i_ce)&&(iaddr == { 1'b1, f_addr[LGSPAN-1:0]}))
f_right <= i_data;
 
always @(posedge i_clk)
if ((i_ce)&&(!wait_for_sync)&&(f_last_addr == { 1'b1, f_addr[LGSPAN-1:0]}))
begin
assert(ib_a == f_left);
assert(ib_b == f_right);
assert(ib_c == cmem[f_addr[LGSPAN-1:0]]);
end
 
/////////////////////////////////////////
//
// Formally verify the output half, from the output of the butterfly
// to the outputs of this module
//
/////////////////////////////////////////
reg [2*OWIDTH-1:0] f_oleft, f_oright;
reg [LGSPAN:0] f_oaddr;
wire [LGSPAN:0] f_oaddr_m1 = f_oaddr - 1'b1;
 
always @(*)
f_oaddr = iaddr - f_mpydelay + {1'b1,{(LGSPAN-1){1'b0}}};
 
assign f_oaddr_m1 = f_oaddr - 1'b1;
 
reg f_output_active;
initial f_output_active = 1'b0;
always @(posedge i_clk)
if (i_reset)
f_output_active <= 1'b0;
else if ((i_ce)&&(ob_sync))
f_output_active <= 1'b1;
 
always @(*)
assert(f_output_active == b_started);
 
always @(*)
if (wait_for_sync)
assert(!f_output_active);
 
always @(*)
if (f_output_active)
assert(oaddr == f_oaddr);
else
assert(oaddr == 0);
 
always @(*)
if (wait_for_sync)
assume(!ob_sync);
 
always @(*)
assume(ob_sync == (f_oaddr == 0));
 
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(i_ce)))
begin
assume($stable(ob_a));
assume($stable(ob_b));
end
 
initial f_oleft = 0;
initial f_oright = 0;
always @(posedge i_clk)
if ((i_ce)&&(f_oaddr == f_addr))
begin
f_oleft <= ob_a;
f_oright <= ob_b;
end
 
always @(posedge i_clk)
if ((f_output_active)&&(f_oaddr_m1 >= { 1'b0, f_addr[LGSPAN-1:0]}))
assert(omem[f_addr[LGSPAN-1:0]] == f_oright);
 
always @(posedge i_clk)
if ((i_ce)&&(f_oaddr_m1 == 0)&&(f_output_active))
assert(o_sync);
else if ((i_ce)||(!f_output_active))
assert(!o_sync);
 
always @(posedge i_clk)
if ((i_ce)&&(f_output_active)&&(f_oaddr_m1 == f_addr))
assert(o_data == f_oleft);
always @(posedge i_clk)
if ((i_ce)&&(f_output_active)&&(f_oaddr[LGSPAN])
&&(f_oaddr[LGSPAN-1:0] == f_addr[LGSPAN-1:0]))
assert(pre_ovalue == f_oright);
always @(posedge i_clk)
if ((i_ce)&&(f_output_active)&&(f_oaddr_m1[LGSPAN])
&&(f_oaddr_m1[LGSPAN-1:0] == f_addr[LGSPAN-1:0]))
assert(o_data == f_oright);
 
`endif
endmodule
/rtl/hwbfly.v
39,23 → 39,25
//
// Copyright (C) 2015-2018, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
// by the Free Software Foundation, either version 3 of the License, or (at
// your option) any later version.
// This file is part of the general purpose pipelined FFT project.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
// The pipelined FFT project is free software (firmware): you can redistribute
// it and/or modify it under the terms of the GNU Lesser General Public License
// as published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// You should have received a copy of the GNU General Public License along
// with this program. (It's in the $(ROOT)/doc directory, run make with no
// target there if the PDF file isn't present.) If not, see
// The pipelined FFT project is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
// General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this program. (It's in the $(ROOT)/doc directory. Run make
// with no target there if the PDF file isn't present.) If not, see
// <http://www.gnu.org/licenses/> for a copy.
//
// License: GPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/gpl.html
// License: LGPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/lgpl.html
//
//
////////////////////////////////////////////////////////////////////////////////
75,10 → 77,10
// The number of clocks per clock enable, 1, 2, or 3.
parameter [1:0] CKPCE=1;
//
input i_clk, i_reset, i_ce;
input [(2*CWIDTH-1):0] i_coef;
input [(2*IWIDTH-1):0] i_left, i_right;
input i_aux;
input wire i_clk, i_reset, i_ce;
input wire [(2*CWIDTH-1):0] i_coef;
input wire [(2*IWIDTH-1):0] i_left, i_right;
input wire i_aux;
output wire [(2*OWIDTH-1):0] o_left, o_right;
output reg o_aux;
 
481,9 → 483,6
assign o_left = { rnd_left_r, rnd_left_i };
assign o_right= { rnd_right_r,rnd_right_i};
 
`ifdef VERILATOR
`define FORMAL
`endif
`ifdef FORMAL
localparam F_LGDEPTH = 3;
localparam F_DEPTH = 5;
/rtl/ifftmain.v
8,8 → 8,8
// implementation. As such, all other modules are subordinate
// to this one. This module accomplish a fixed size Complex FFT on
// 2048 data points.
// The FFT is fully pipelined, and accepts as inputs two complex two's
// complement samples per clock.
// The FFT is fully pipelined, and accepts as inputs one complex two's
// complement sample per clock.
//
// Parameters:
// i_clk The clock. All operations are synchronous with this clock.
18,31 → 18,27
// Further, following a reset, the o_sync line will go
// high the same time the first output sample is valid.
// i_ce A clock enable line. If this line is set, this module
// will accept two complex values as inputs, and produce
// two (possibly empty) complex values as outputs.
// i_left The first of two complex input samples. This value is split
// will accept one complex input value, and produce
// one (possibly empty) complex output value.
// i_sample The complex input sample. This value is split
// into two two's complement numbers, 15 bits each, with
// the real portion in the high order bits, and the
// imaginary portion taking the bottom 15 bits.
// i_right This is the same thing as i_left, only this is the second of
// two such samples. Hence, i_left would contain input
// sample zero, i_right would contain sample one. On the
// next clock i_left would contain input sample two,
// i_right number three and so forth.
// o_left The first of two output samples, of the same format as i_left,
// o_result The output result, of the same format as i_sample,
// only having 21 bits for each of the real and imaginary
// components, leading to 42 bits total.
// o_right The second of two output samples produced each clock. This has
// the same format as o_left.
// o_sync A one bit output indicating the first valid sample produced by
// this FFT following a reset. Ever after, this will
// indicate the first sample of an FFT frame.
// o_sync A one bit output indicating the first sample of the FFT frame.
// It also indicates the first valid sample out of the FFT
// on the first frame.
//
// Arguments: This file was computer generated using the following command
// line:
//
// % ./fftgen -i -d ../rtl -f 2048 -2 -p 0 -n 15 -a ../bench/cpp/ifftsize.h
// % ./fftgen -i -d ../rtl -f 2048 -1 -k 1 -p 0 -n 15 -a ../bench/cpp/ifftsize.h
//
// This core will use hardware accelerated multiplies (DSPs)
// for 0 of the 11 stages
//
// Creator: Dan Gisselquist, Ph.D.
// Gisselquist Technology, LLC
//
50,23 → 46,25
//
// Copyright (C) 2015-2018, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
// by the Free Software Foundation, either version 3 of the License, or (at
// your option) any later version.
// This file is part of the general purpose pipelined FFT project.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
// The pipelined FFT project is free software (firmware): you can redistribute
// it and/or modify it under the terms of the GNU Lesser General Public License
// as published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// You should have received a copy of the GNU General Public License along
// with this program. (It's in the $(ROOT)/doc directory, run make with no
// target there if the PDF file isn't present.) If not, see
// The pipelined FFT project is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
// General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this program. (It's in the $(ROOT)/doc directory. Run make
// with no target there if the PDF file isn't present.) If not, see
// <http://www.gnu.org/licenses/> for a copy.
//
// License: GPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/gpl.html
// License: LGPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/lgpl.html
//
//
////////////////////////////////////////////////////////////////////////////////
77,166 → 75,102
//
//
module ifftmain(i_clk, i_reset, i_ce,
i_left, i_right,
o_left, o_right, o_sync);
parameter IWIDTH=15, OWIDTH=21, LGWIDTH=11;
i_sample, o_result, o_sync);
// The bit-width of the input, IWIDTH, output, OWIDTH, and the log
// of the FFT size. These are localparams, rather than parameters,
// because once the core has been generated, they can no longer be
// changed. (These values can be adjusted by running the core
// generator again.) The reason is simply that these values have
// been hardwired into the core at several places.
localparam IWIDTH=15, OWIDTH=21, LGWIDTH=11;
//
input i_clk, i_reset, i_ce;
input wire i_clk, i_reset, i_ce;
//
input [(2*IWIDTH-1):0] i_left, i_right;
output reg [(2*OWIDTH-1):0] o_left, o_right;
input wire [(2*IWIDTH-1):0] i_sample;
output reg [(2*OWIDTH-1):0] o_result;
output reg o_sync;
 
 
// Outputs of the FFT, ready for bit reversal.
wire [(2*OWIDTH-1):0] br_left, br_right;
wire [(2*OWIDTH-1):0] br_sample;
 
 
wire w_s2048;
// verilator lint_off UNUSED
wire w_os2048;
// verilator lint_on UNUSED
wire [31:0] w_e2048, w_o2048;
fftstage #(IWIDTH,IWIDTH+4,16,11,9,0,
0, 1, "icmem_e4096.hex")
stage_e2048(i_clk, i_reset, i_ce,
(!i_reset), i_left, w_e2048, w_s2048);
fftstage #(IWIDTH,IWIDTH+4,16,11,9,0,
0, 1, "icmem_o4096.hex")
stage_o2048(i_clk, i_reset, i_ce,
(!i_reset), i_right, w_o2048, w_os2048);
wire [31:0] w_d2048;
fftstage #(IWIDTH,IWIDTH+4,16,10,0,
0, 1, "icmem_2048.hex")
stage_2048(i_clk, i_reset, i_ce,
(!i_reset), i_sample, w_d2048, w_s2048);
 
 
wire w_s1024;
// verilator lint_off UNUSED
wire w_os1024;
// verilator lint_on UNUSED
wire [33:0] w_e1024, w_o1024;
fftstage #(16,20,17,11,8,0,
0, 1, "icmem_e2048.hex")
stage_e1024(i_clk, i_reset, i_ce,
w_s2048, w_e2048, w_e1024, w_s1024);
fftstage #(16,20,17,11,8,0,
0, 1, "icmem_o2048.hex")
stage_o1024(i_clk, i_reset, i_ce,
w_s2048, w_o2048, w_o1024, w_os1024);
wire [33:0] w_d1024;
fftstage #(16,20,17,9,0,
0, 1, "icmem_1024.hex")
stage_1024(i_clk, i_reset, i_ce,
w_s2048, w_d2048, w_d1024, w_s1024);
 
wire w_s512;
// verilator lint_off UNUSED
wire w_os512;
// verilator lint_on UNUSED
wire [33:0] w_e512, w_o512;
fftstage #(17,21,17,11,7,0,
0, 1, "icmem_e1024.hex")
stage_e512(i_clk, i_reset, i_ce,
w_s1024, w_e1024, w_e512, w_s512);
fftstage #(17,21,17,11,7,0,
0, 1, "icmem_o1024.hex")
stage_o512(i_clk, i_reset, i_ce,
w_s1024, w_o1024, w_o512, w_os512);
wire [33:0] w_d512;
fftstage #(17,21,17,8,0,
0, 1, "icmem_512.hex")
stage_512(i_clk, i_reset, i_ce,
w_s1024, w_d1024, w_d512, w_s512);
 
wire w_s256;
// verilator lint_off UNUSED
wire w_os256;
// verilator lint_on UNUSED
wire [35:0] w_e256, w_o256;
fftstage #(17,21,18,11,6,0,
0, 1, "icmem_e512.hex")
stage_e256(i_clk, i_reset, i_ce,
w_s512, w_e512, w_e256, w_s256);
fftstage #(17,21,18,11,6,0,
0, 1, "icmem_o512.hex")
stage_o256(i_clk, i_reset, i_ce,
w_s512, w_o512, w_o256, w_os256);
wire [35:0] w_d256;
fftstage #(17,21,18,7,0,
0, 1, "icmem_256.hex")
stage_256(i_clk, i_reset, i_ce,
w_s512, w_d512, w_d256, w_s256);
 
wire w_s128;
// verilator lint_off UNUSED
wire w_os128;
// verilator lint_on UNUSED
wire [35:0] w_e128, w_o128;
fftstage #(18,22,18,11,5,0,
0, 1, "icmem_e256.hex")
stage_e128(i_clk, i_reset, i_ce,
w_s256, w_e256, w_e128, w_s128);
fftstage #(18,22,18,11,5,0,
0, 1, "icmem_o256.hex")
stage_o128(i_clk, i_reset, i_ce,
w_s256, w_o256, w_o128, w_os128);
wire [35:0] w_d128;
fftstage #(18,22,18,6,0,
0, 1, "icmem_128.hex")
stage_128(i_clk, i_reset, i_ce,
w_s256, w_d256, w_d128, w_s128);
 
wire w_s64;
// verilator lint_off UNUSED
wire w_os64;
// verilator lint_on UNUSED
wire [37:0] w_e64, w_o64;
fftstage #(18,22,19,11,4,0,
0, 1, "icmem_e128.hex")
stage_e64(i_clk, i_reset, i_ce,
w_s128, w_e128, w_e64, w_s64);
fftstage #(18,22,19,11,4,0,
0, 1, "icmem_o128.hex")
stage_o64(i_clk, i_reset, i_ce,
w_s128, w_o128, w_o64, w_os64);
wire [37:0] w_d64;
fftstage #(18,22,19,5,0,
0, 1, "icmem_64.hex")
stage_64(i_clk, i_reset, i_ce,
w_s128, w_d128, w_d64, w_s64);
 
wire w_s32;
// verilator lint_off UNUSED
wire w_os32;
// verilator lint_on UNUSED
wire [37:0] w_e32, w_o32;
fftstage #(19,23,19,11,3,0,
0, 1, "icmem_e64.hex")
stage_e32(i_clk, i_reset, i_ce,
w_s64, w_e64, w_e32, w_s32);
fftstage #(19,23,19,11,3,0,
0, 1, "icmem_o64.hex")
stage_o32(i_clk, i_reset, i_ce,
w_s64, w_o64, w_o32, w_os32);
wire [37:0] w_d32;
fftstage #(19,23,19,4,0,
0, 1, "icmem_32.hex")
stage_32(i_clk, i_reset, i_ce,
w_s64, w_d64, w_d32, w_s32);
 
wire w_s16;
// verilator lint_off UNUSED
wire w_os16;
// verilator lint_on UNUSED
wire [39:0] w_e16, w_o16;
fftstage #(19,23,20,11,2,0,
0, 1, "icmem_e32.hex")
stage_e16(i_clk, i_reset, i_ce,
w_s32, w_e32, w_e16, w_s16);
fftstage #(19,23,20,11,2,0,
0, 1, "icmem_o32.hex")
stage_o16(i_clk, i_reset, i_ce,
w_s32, w_o32, w_o16, w_os16);
wire [39:0] w_d16;
fftstage #(19,23,20,3,0,
0, 1, "icmem_16.hex")
stage_16(i_clk, i_reset, i_ce,
w_s32, w_d32, w_d16, w_s16);
 
wire w_s8;
// verilator lint_off UNUSED
wire w_os8;
// verilator lint_on UNUSED
wire [39:0] w_e8, w_o8;
fftstage #(20,24,20,11,1,0,
0, 1, "icmem_e16.hex")
stage_e8(i_clk, i_reset, i_ce,
w_s16, w_e16, w_e8, w_s8);
fftstage #(20,24,20,11,1,0,
0, 1, "icmem_o16.hex")
stage_o8(i_clk, i_reset, i_ce,
w_s16, w_o16, w_o8, w_os8);
wire [39:0] w_d8;
fftstage #(20,24,20,2,0,
0, 1, "icmem_8.hex")
stage_8(i_clk, i_reset, i_ce,
w_s16, w_d16, w_d8, w_s8);
 
wire w_s4;
// verilator lint_off UNUSED
wire w_os4;
// verilator lint_on UNUSED
wire [41:0] w_e4, w_o4;
qtrstage #(20,21,11,0,1,0) stage_e4(i_clk, i_reset, i_ce,
w_s8, w_e8, w_e4, w_s4);
qtrstage #(20,21,11,1,1,0) stage_o4(i_clk, i_reset, i_ce,
w_s8, w_o8, w_o4, w_os4);
wire [41:0] w_d4;
qtrstage #(20,21,11,1,0) stage_4(i_clk, i_reset, i_ce,
w_s8, w_d8, w_d4, w_s4);
wire w_s2;
wire [41:0] w_e2, w_o2;
wire [41:0] w_d2;
laststage #(21,21,0) stage_2(i_clk, i_reset, i_ce,
w_s4, w_e4, w_o4, w_e2, w_o2, w_s2);
w_s4, w_d4, w_d2, w_s2);
 
 
// Prepare for a (potential) bit-reverse stage.
assign br_left = w_e2;
assign br_right = w_o2;
assign br_sample= w_d2;
 
wire br_start;
reg r_br_started;
250,11 → 184,11
 
// Now for the bit-reversal stage.
wire br_sync;
wire [(2*OWIDTH-1):0] br_o_left, br_o_right;
wire [(2*OWIDTH-1):0] br_o_result;
bitreverse #(11,21)
revstage(i_clk, i_reset,
(i_ce & br_start), br_left, br_right,
br_o_left, br_o_right, br_sync);
(i_ce & br_start), br_sample,
br_o_result, br_sync);
 
 
// Last clock: Register our outputs, we're done.
267,10 → 201,7
 
always @(posedge i_clk)
if (i_ce)
begin
o_left <= br_o_left;
o_right <= br_o_right;
end
o_result <= br_o_result;
 
 
endmodule
/rtl/ifftstage.v
0,0 → 1,436
////////////////////////////////////////////////////////////////////////////////
//
// Filename: fftstage.v
//
// Project: A General Purpose Pipelined FFT Implementation
//
// Purpose: This file is (almost) a Verilog source file. It is meant to
// be used by a FFT core compiler to generate FFTs which may be
// used as part of an FFT core. Specifically, this file encapsulates
// the options of an FFT-stage. For any 2^N length FFT, there shall be
// (N-1) of these stages.
//
//
// Operation:
// Given a stream of values, operate upon them as though they were
// value pairs, x[n] and x[n+N/2]. The stream begins when n=0, and ends
// when n=N/2-1 (i.e. there's a full set of N values). When the value
// x[0] enters, the synchronization input, i_sync, must be true as well.
//
// For this stream, produce outputs
// y[n ] = x[n] + x[n+N/2], and
// y[n+N/2] = (x[n] - x[n+N/2]) * c[n],
// where c[n] is a complex coefficient found in the
// external memory file COEFFILE.
// When y[0] is output, a synchronization bit o_sync will be true as
// well, otherwise it will be zero.
//
// Most of the work to do this is done within the butterfly, whether the
// hardware accelerated butterfly (uses a DSP) or not.
//
// Creator: Dan Gisselquist, Ph.D.
// Gisselquist Technology, LLC
//
////////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2015-2018, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
// by the Free Software Foundation, either version 3 of the License, or (at
// your option) any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with this program. (It's in the $(ROOT)/doc directory. Run make with no
// target there if the PDF file isn't present.) If not, see
// <http://www.gnu.org/licenses/> for a copy.
//
// License: GPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/gpl.html
//
//
////////////////////////////////////////////////////////////////////////////////
//
//
`default_nettype none
//
module fftstage(i_clk, i_reset, i_ce, i_sync, i_data, o_data, o_sync);
parameter IWIDTH=15,CWIDTH=20,OWIDTH=16;
// Parameters specific to the core that should be changed when this
// core is built ... Note that the minimum LGSPAN (the base two log
// of the span, or the base two log of the current FFT size) is 3.
// Smaller spans (i.e. the span of 2) must use the dbl laststage module.
parameter LGSPAN=10, BFLYSHIFT=0; // LGWIDTH=11
parameter [0:0] OPT_HWMPY = 1;
// Clocks per CE. If your incoming data rate is less than 50% of your
// clock speed, you can set CKPCE to 2'b10, make sure there's at least
// one clock between cycles when i_ce is high, and then use two
// multiplies instead of three. Setting CKPCE to 2'b11, and insisting
// on at least two clocks with i_ce low between cycles with i_ce high,
// then the hardware optimized butterfly code will used one multiply
// instead of two.
parameter CKPCE = 1;
// The COEFFILE parameter contains the name of the file containing the
// FFT twiddle factors
parameter COEFFILE="cmem_2048.hex";
 
`ifdef VERILATOR
parameter [0:0] ZERO_ON_IDLE = 1'b0;
`else
localparam [0:0] ZERO_ON_IDLE = 1'b0;
`endif // VERILATOR
 
input wire i_clk, i_reset, i_ce, i_sync;
input wire [(2*IWIDTH-1):0] i_data;
output reg [(2*OWIDTH-1):0] o_data;
output reg o_sync;
 
// I am using the prefixes
// ib_* to reference the inputs to the butterfly, and
// ob_* to reference the outputs from the butterfly
reg wait_for_sync;
reg [(2*IWIDTH-1):0] ib_a, ib_b;
reg [(2*CWIDTH-1):0] ib_c;
reg ib_sync;
 
reg b_started;
wire ob_sync;
wire [(2*OWIDTH-1):0] ob_a, ob_b;
 
// cmem is defined as an array of real and complex values,
// where the top CWIDTH bits are the real value and the bottom
// CWIDTH bits are the imaginary value.
//
// cmem[i] = { (2^(CWIDTH-2)) * cos(2*pi*i/(2^LGWIDTH)),
// (2^(CWIDTH-2)) * sin(2*pi*i/(2^LGWIDTH)) };
//
reg [(2*CWIDTH-1):0] cmem [0:((1<<LGSPAN)-1)];
`ifdef FORMAL
// Let the formal tool pick the coefficients
`else
initial $readmemh(COEFFILE,cmem);
 
`endif
 
reg [(LGSPAN):0] iaddr;
reg [(2*IWIDTH-1):0] imem [0:((1<<LGSPAN)-1)];
 
reg [LGSPAN:0] oaddr;
reg [(2*OWIDTH-1):0] omem [0:((1<<LGSPAN)-1)];
 
initial wait_for_sync = 1'b1;
initial iaddr = 0;
always @(posedge i_clk)
if (i_reset)
begin
wait_for_sync <= 1'b1;
iaddr <= 0;
end else if ((i_ce)&&((!wait_for_sync)||(i_sync)))
begin
//
// First step: Record what we're not ready to use yet
//
iaddr <= iaddr + { {(LGSPAN){1'b0}}, 1'b1 };
wait_for_sync <= 1'b0;
end
always @(posedge i_clk) // Need to make certain here that we don't read
if ((i_ce)&&(!iaddr[LGSPAN])) // and write the same address on
imem[iaddr[(LGSPAN-1):0]] <= i_data; // the same clk
 
//
// Now, we have all the inputs, so let's feed the butterfly
//
// ib_sync is the synchronization bit to the butterfly. It will
// be tracked within the butterfly, and used to create the o_sync
// value when the results from this output are produced
initial ib_sync = 1'b0;
always @(posedge i_clk)
if (i_reset)
ib_sync <= 1'b0;
else if (i_ce)
begin
// Set the sync to true on the very first
// valid input in, and hence on the very
// first valid data out per FFT.
ib_sync <= (iaddr==(1<<(LGSPAN)));
end
 
// Read the values from our input memory, and use them to feed first of two
// butterfly inputs
always @(posedge i_clk)
if (i_ce)
begin
// One input from memory, ...
ib_a <= imem[iaddr[(LGSPAN-1):0]];
// One input clocked in from the top
ib_b <= i_data;
// and the coefficient or twiddle factor
ib_c <= cmem[iaddr[(LGSPAN-1):0]];
end
 
// The idle register is designed to keep track of when an input
// to the butterfly is important and going to be used. It's used
// in a flag following, so that when useful values are placed
// into the butterfly they'll be non-zero (idle=0), otherwise when
// the inputs to the butterfly are irrelevant and will be ignored,
// then (idle=1) those inputs will be set to zero. This
// functionality is not designed to be used in operation, but only
// within a Verilator simulation context when chasing a bug.
// In this limited environment, the non-zero answers will stand
// in a trace making it easier to highlight a bug.
reg idle;
generate if (ZERO_ON_IDLE)
begin
initial idle = 1;
always @(posedge i_clk)
if (i_reset)
idle <= 1'b1;
else if (i_ce)
idle <= (!iaddr[LGSPAN])&&(!wait_for_sync);
 
end else begin
 
always @(*) idle = 0;
 
end endgenerate
 
// For the formal proof, we'll assume the outputs of hwbfly and/or
// butterfly, rather than actually calculating them. This will simplify
// the proof and (if done properly) will be equivalent. Be careful of
// defining FORMAL if you want the full logic!
`ifndef FORMAL
//
generate if (OPT_HWMPY)
begin : HWBFLY
hwbfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),
.CKPCE(CKPCE), .SHIFT(BFLYSHIFT))
bfly(i_clk, i_reset, i_ce, (idle)?0:ib_c,
(idle || (!i_ce)) ? 0:ib_a,
(idle || (!i_ce)) ? 0:ib_b,
(ib_sync)&&(i_ce),
ob_a, ob_b, ob_sync);
end else begin : FWBFLY
butterfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),
.CKPCE(CKPCE),.SHIFT(BFLYSHIFT))
bfly(i_clk, i_reset, i_ce,
(idle||(!i_ce))?0:ib_c,
(idle||(!i_ce))?0:ib_a,
(idle||(!i_ce))?0:ib_b,
(ib_sync&&i_ce),
ob_a, ob_b, ob_sync);
end endgenerate
`endif
 
//
// Next step: recover the outputs from the butterfly
//
// The first output can go immediately to the output of this routine
// The second output must wait until this time in the idle cycle
// oaddr is the output memory address, keeping track of where we are
// in this output cycle.
initial oaddr = 0;
initial o_sync = 0;
initial b_started = 0;
always @(posedge i_clk)
if (i_reset)
begin
oaddr <= 0;
o_sync <= 0;
// b_started will be true once we've seen the first ob_sync
b_started <= 0;
end else if (i_ce)
begin
o_sync <= (!oaddr[LGSPAN])?ob_sync : 1'b0;
if (ob_sync||b_started)
oaddr <= oaddr + 1'b1;
if ((ob_sync)&&(!oaddr[LGSPAN]))
// If b_started is true, then a butterfly output is available
b_started <= 1'b1;
end
 
reg [(LGSPAN-1):0] nxt_oaddr;
reg [(2*OWIDTH-1):0] pre_ovalue;
always @(posedge i_clk)
if (i_ce)
nxt_oaddr[0] <= oaddr[0];
generate if (LGSPAN>1)
begin
 
always @(posedge i_clk)
if (i_ce)
nxt_oaddr[LGSPAN-1:1] <= oaddr[LGSPAN-1:1] + 1'b1;
 
end endgenerate
 
// Only write to the memory on the first half of the outputs
// We'll use the memory value on the second half of the outputs
always @(posedge i_clk)
if ((i_ce)&&(!oaddr[LGSPAN]))
omem[oaddr[(LGSPAN-1):0]] <= ob_b;
 
always @(posedge i_clk)
if (i_ce)
pre_ovalue <= omem[nxt_oaddr[(LGSPAN-1):0]];
 
always @(posedge i_clk)
if (i_ce)
o_data <= (!oaddr[LGSPAN]) ? ob_a : pre_ovalue;
 
`ifdef FORMAL
// An arbitrary processing delay from butterfly input to
// butterfly output(s)
(* anyconst *) reg [LGSPAN:0] f_mpydelay;
always @(*)
assume(f_mpydelay > 1);
 
reg f_past_valid;
initial f_past_valid = 1'b0;
always @(posedge i_clk)
f_past_valid <= 1'b1;
 
always @(posedge i_clk)
if ((!f_past_valid)||($past(i_reset)))
begin
assert(iaddr == 0);
assert(wait_for_sync);
assert(o_sync == 0);
assert(oaddr == 0);
assert(!b_started);
assert(!o_sync);
end
 
/////////////////////////////////////////
//
// Formally verify the input half, from the inputs to this module
// to the inputs of the butterfly
//
/////////////////////////////////////////
//
// Let's verify a specific set of inputs
(* anyconst *) reg [LGSPAN:0] f_addr;
reg [2*IWIDTH-1:0] f_left, f_right;
wire [LGSPAN:0] f_next_addr;
 
always @(posedge i_clk)
if ((!$past(i_ce))&&(!$past(i_ce,2))&&(!$past(i_ce,3))&&(!$past(i_ce,4)))
assume(!i_ce);
 
always @(*)
assume(f_addr[LGSPAN]==1'b0);
 
assign f_next_addr = f_addr + 1'b1;
 
always @(posedge i_clk)
if ((i_ce)&&(iaddr[LGSPAN:0] == f_addr))
f_left <= i_data;
 
always @(*)
if (wait_for_sync)
assert(iaddr == 0);
 
wire [LGSPAN:0] f_last_addr = iaddr - 1'b1;
 
always @(posedge i_clk)
if ((!wait_for_sync)&&(f_last_addr >= { 1'b0, f_addr[LGSPAN-1:0]}))
assert(f_left == imem[f_addr[LGSPAN-1:0]]);
 
always @(posedge i_clk)
if ((i_ce)&&(iaddr == { 1'b1, f_addr[LGSPAN-1:0]}))
f_right <= i_data;
 
always @(posedge i_clk)
if ((i_ce)&&(!wait_for_sync)&&(f_last_addr == { 1'b1, f_addr[LGSPAN-1:0]}))
begin
assert(ib_a == f_left);
assert(ib_b == f_right);
assert(ib_c == cmem[f_addr[LGSPAN-1:0]]);
end
 
/////////////////////////////////////////
//
// Formally verify the output half, from the output of the butterfly
// to the outputs of this module
//
/////////////////////////////////////////
reg [2*OWIDTH-1:0] f_oleft, f_oright;
reg [LGSPAN:0] f_oaddr;
wire [LGSPAN:0] f_oaddr_m1 = f_oaddr - 1'b1;
 
always @(*)
f_oaddr = iaddr - f_mpydelay + {1'b1,{(LGSPAN-1){1'b0}}};
 
assign f_oaddr_m1 = f_oaddr - 1'b1;
 
reg f_output_active;
initial f_output_active = 1'b0;
always @(posedge i_clk)
if (i_reset)
f_output_active <= 1'b0;
else if ((i_ce)&&(ob_sync))
f_output_active <= 1'b1;
 
always @(*)
assert(f_output_active == b_started);
 
always @(*)
if (wait_for_sync)
assert(!f_output_active);
 
always @(*)
if (f_output_active)
assert(oaddr == f_oaddr);
else
assert(oaddr == 0);
 
always @(*)
if (wait_for_sync)
assume(!ob_sync);
 
always @(*)
assume(ob_sync == (f_oaddr == 0));
 
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(i_ce)))
begin
assume($stable(ob_a));
assume($stable(ob_b));
end
 
initial f_oleft = 0;
initial f_oright = 0;
always @(posedge i_clk)
if ((i_ce)&&(f_oaddr == f_addr))
begin
f_oleft <= ob_a;
f_oright <= ob_b;
end
 
always @(posedge i_clk)
if ((f_output_active)&&(f_oaddr_m1 >= { 1'b0, f_addr[LGSPAN-1:0]}))
assert(omem[f_addr[LGSPAN-1:0]] == f_oright);
 
always @(posedge i_clk)
if ((i_ce)&&(f_oaddr_m1 == 0)&&(f_output_active))
assert(o_sync);
else if ((i_ce)||(!f_output_active))
assert(!o_sync);
 
always @(posedge i_clk)
if ((i_ce)&&(f_output_active)&&(f_oaddr_m1 == f_addr))
assert(o_data == f_oleft);
always @(posedge i_clk)
if ((i_ce)&&(f_output_active)&&(f_oaddr[LGSPAN])
&&(f_oaddr[LGSPAN-1:0] == f_addr[LGSPAN-1:0]))
assert(pre_ovalue == f_oright);
always @(posedge i_clk)
if ((i_ce)&&(f_output_active)&&(f_oaddr_m1[LGSPAN])
&&(f_oaddr_m1[LGSPAN-1:0] == f_addr[LGSPAN-1:0]))
assert(o_data == f_oright);
 
`endif
endmodule
/rtl/laststage.v
6,43 → 6,9
//
// Purpose: This is part of an FPGA implementation that will process
// the final stage of a decimate-in-frequency FFT, running
// through the data at two samples per clock. If you notice from the
// derivation of an FFT, the only time both even and odd samples are
// used at the same time is in this stage. Therefore, other than this
// stage and these twiddles, all of the other stages can run two stages
// at a time at one sample per clock.
// through the data at one sample per clock.
//
// Operation:
// Given a stream of values, operate upon them as though they were
// value pairs, x[2n] and x[2n+1]. The stream begins when n=0, and ends
// when n=1. When the first x[0] value enters, the synchronization
// input, i_sync, must be true as well.
//
// For this stream, produce outputs
// y[2n ] = x[2n] + x[2n+1], and
// y[2n+1] = x[2n] - x[2n+1]
//
// When y[0] is output, a synchronization bit o_sync will be true as
// well, otherwise it will be zero.
//
//
// In this implementation, the output is valid one clock after the input
// is valid. The output also accumulates one bit above and beyond the
// number of bits in the input.
//
// i_clk A system clock
// i_reset A synchronous reset
// i_ce Circuit enable--nothing happens unless this line is high
// i_sync A synchronization signal, high once per FFT at the start
// i_left The first (even) complex sample input. The higher order
// bits contain the real portion, low order bits the
// imaginary portion, all in two's complement.
// i_right The next (odd) complex sample input, same format as
// i_left.
// o_left The first (even) complex output.
// o_right The next (odd) complex output.
// o_sync Output synchronization signal.
//
// Creator: Dan Gisselquist, Ph.D.
// Gisselquist Technology, LLC
//
50,23 → 16,25
//
// Copyright (C) 2015-2018, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
// by the Free Software Foundation, either version 3 of the License, or (at
// your option) any later version.
// This file is part of the general purpose pipelined FFT project.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
// The pipelined FFT project is free software (firmware): you can redistribute
// it and/or modify it under the terms of the GNU Lesser General Public License
// as published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// You should have received a copy of the GNU General Public License along
// with this program. (It's in the $(ROOT)/doc directory, run make with no
// target there if the PDF file isn't present.) If not, see
// The pipelined FFT project is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
// General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this program. (It's in the $(ROOT)/doc directory. Run make
// with no target there if the PDF file isn't present.) If not, see
// <http://www.gnu.org/licenses/> for a copy.
//
// License: GPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/gpl.html
// License: LGPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/lgpl.html
//
//
////////////////////////////////////////////////////////////////////////////////
74,98 → 42,166
//
`default_nettype none
//
module laststage(i_clk, i_reset, i_ce, i_sync, i_left, i_right, o_left, o_right, o_sync);
module laststage(i_clk, i_reset, i_ce, i_sync, i_val, o_val, o_sync);
parameter IWIDTH=16,OWIDTH=IWIDTH+1, SHIFT=0;
input i_clk, i_reset, i_ce, i_sync;
input [(2*IWIDTH-1):0] i_left, i_right;
output reg [(2*OWIDTH-1):0] o_left, o_right;
output reg o_sync;
input wire i_clk, i_reset, i_ce, i_sync;
input wire [(2*IWIDTH-1):0] i_val;
output wire [(2*OWIDTH-1):0] o_val;
output reg o_sync;
 
wire signed [(IWIDTH-1):0] i_in_0r, i_in_0i, i_in_1r, i_in_1i;
assign i_in_0r = i_left[(2*IWIDTH-1):(IWIDTH)];
assign i_in_0i = i_left[(IWIDTH-1):0];
assign i_in_1r = i_right[(2*IWIDTH-1):(IWIDTH)];
assign i_in_1i = i_right[(IWIDTH-1):0];
wire [(OWIDTH-1):0] o_out_0r, o_out_0i,
o_out_1r, o_out_1i;
reg signed [(IWIDTH-1):0] m_r, m_i;
wire signed [(IWIDTH-1):0] i_r, i_i;
 
assign i_r = i_val[(2*IWIDTH-1):(IWIDTH)];
assign i_i = i_val[(IWIDTH-1):0];
 
// Handle a potential rounding situation, when IWIDTH>=OWIDTH.
// Don't forget that we accumulate a bit by adding two values
// together. Therefore our intermediate value must have one more
// bit than the two originals.
reg signed [(IWIDTH):0] rnd_r, rnd_i, sto_r, sto_i;
reg wait_for_sync, stage;
reg [1:0] sync_pipe;
 
 
 
// As with any register connected to the sync pulse, these must
// have initial values and be reset on the i_reset signal.
// Other data values need only restrict their updates to i_ce
// enabled clocks, but sync's must obey resets and initial
// conditions as well.
reg rnd_sync, r_sync;
 
initial rnd_sync = 1'b0; // Sync into rounding
initial r_sync = 1'b0; // Sync coming out
initial wait_for_sync = 1'b1;
initial stage = 1'b0;
always @(posedge i_clk)
if (i_reset)
begin
rnd_sync <= 1'b0;
r_sync <= 1'b0;
wait_for_sync <= 1'b1;
stage <= 1'b0;
end else if ((i_ce)&&((!wait_for_sync)||(i_sync))&&(!stage))
begin
wait_for_sync <= 1'b0;
//
stage <= 1'b1;
//
end else if (i_ce)
begin
rnd_sync <= i_sync;
r_sync <= rnd_sync;
end
stage <= 1'b0;
 
// As with other variables, these are really only updated when in
// the processing pipeline, after the first i_sync. However, to
// eliminate as much unnecessary logic as possible, we toggle
// these any time the i_ce line is enabled, and don't reset.
// them on i_reset.
// Don't forget that we accumulate a bit by adding two values
// together. Therefore our intermediate value must have one more
// bit than the two originals.
reg signed [(IWIDTH):0] rnd_in_0r, rnd_in_0i;
reg signed [(IWIDTH):0] rnd_in_1r, rnd_in_1i;
initial sync_pipe = 0;
always @(posedge i_clk)
if (i_reset)
sync_pipe <= 0;
else if (i_ce)
sync_pipe <= { sync_pipe[0], i_sync };
 
initial o_sync = 1'b0;
always @(posedge i_clk)
if (i_ce)
if (i_reset)
o_sync <= 1'b0;
else if (i_ce)
o_sync <= sync_pipe[1];
 
always @(posedge i_clk)
if (i_ce)
begin
if (!stage)
begin
// Clock 1
m_r <= i_r;
m_i <= i_i;
// Clock 3
rnd_r <= sto_r;
rnd_i <= sto_i;
//
rnd_in_0r <= i_in_0r + i_in_1r;
rnd_in_0i <= i_in_0i + i_in_1i;
end else begin
// Clock 2
rnd_r <= m_r + i_r;
rnd_i <= m_i + i_i;
//
rnd_in_1r <= i_in_0r - i_in_1r;
rnd_in_1i <= i_in_0i - i_in_1i;
sto_r <= m_r - i_r;
sto_i <= m_i - i_i;
//
end
end
 
convround #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_0r(i_clk, i_ce,
rnd_in_0r, o_out_0r);
// Now that we have our results, let's round them and report them
wire signed [(OWIDTH-1):0] o_r, o_i;
 
convround #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_0i(i_clk, i_ce,
rnd_in_0i, o_out_0i);
convround #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_r(i_clk, i_ce, rnd_r, o_r);
convround #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_i(i_clk, i_ce, rnd_i, o_i);
 
convround #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_1r(i_clk, i_ce,
rnd_in_1r, o_out_1r);
assign o_val = { o_r, o_i };
 
convround #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_1i(i_clk, i_ce,
rnd_in_1i, o_out_1i);
`ifdef FORMAL
reg f_past_valid;
initial f_past_valid = 1'b0;
always @(posedge i_clk)
f_past_valid <= 1'b1;
 
`ifdef LASTSTAGE
always @(posedge i_clk)
assume((i_ce)||($past(i_ce))||($past(i_ce,2)));
`endif
 
// Prior versions of this routine did not include the extra
// clock and register/flip-flops that this routine requires.
// These are placed in here to correct a bug in Verilator, that
// otherwise struggles. (Hopefully this will fix the problem ...)
initial assert(IWIDTH+1 == OWIDTH);
 
reg signed [IWIDTH-1:0] f_piped_real [0:3];
reg signed [IWIDTH-1:0] f_piped_imag [0:3];
always @(posedge i_clk)
if (i_ce)
begin
o_left <= { o_out_0r, o_out_0i };
o_right <= { o_out_1r, o_out_1i };
end
if (i_ce)
begin
f_piped_real[0] <= i_val[2*IWIDTH-1:IWIDTH];
f_piped_imag[0] <= i_val[ IWIDTH-1:0];
 
initial o_sync = 1'b0; // Final sync coming out of module
f_piped_real[1] <= f_piped_real[0];
f_piped_imag[1] <= f_piped_imag[0];
 
f_piped_real[2] <= f_piped_real[1];
f_piped_imag[2] <= f_piped_imag[1];
 
f_piped_real[3] <= f_piped_real[2];
f_piped_imag[3] <= f_piped_imag[2];
end
 
wire f_syncd;
reg f_rsyncd;
 
initial f_rsyncd = 0;
always @(posedge i_clk)
if (i_reset)
o_sync <= 1'b0;
else if (i_ce)
o_sync <= r_sync;
if (i_reset)
f_rsyncd <= 1'b0;
else if (!f_rsyncd)
f_rsyncd <= o_sync;
assign f_syncd = (f_rsyncd)||(o_sync);
 
reg f_state;
initial f_state = 0;
always @(posedge i_clk)
if (i_reset)
f_state <= 0;
else if ((i_ce)&&((!wait_for_sync)||(i_sync)))
f_state <= f_state + 1;
 
always @(*)
if (f_state != 0)
assume(!i_sync);
 
always @(*)
assert(stage == f_state[0]);
 
always @(posedge i_clk)
if ((f_state == 1'b1)&&(f_syncd))
begin
assert(o_r == f_piped_real[2] + f_piped_real[1]);
assert(o_i == f_piped_imag[2] + f_piped_imag[1]);
end
 
always @(posedge i_clk)
if ((f_state == 1'b0)&&(f_syncd))
begin
assert(!o_sync);
assert(o_r == f_piped_real[3] - f_piped_real[2]);
assert(o_i == f_piped_imag[3] - f_piped_imag[2]);
end
 
always @(*)
if (wait_for_sync)
begin
assert(!f_rsyncd);
assert(!o_sync);
assert(f_state == 0);
end
 
`endif // FORMAL
endmodule
/rtl/longbimpy.v
22,23 → 22,25
//
// Copyright (C) 2015-2018, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
// by the Free Software Foundation, either version 3 of the License, or (at
// your option) any later version.
// This file is part of the general purpose pipelined FFT project.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
// The pipelined FFT project is free software (firmware): you can redistribute
// it and/or modify it under the terms of the GNU Lesser General Public License
// as published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// You should have received a copy of the GNU General Public License along
// with this program. (It's in the $(ROOT)/doc directory, run make with no
// target there if the PDF file isn't present.) If not, see
// The pipelined FFT project is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
// General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this program. (It's in the $(ROOT)/doc directory. Run make
// with no target there if the PDF file isn't present.) If not, see
// <http://www.gnu.org/licenses/> for a copy.
//
// License: GPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/gpl.html
// License: LGPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/lgpl.html
//
//
////////////////////////////////////////////////////////////////////////////////
46,7 → 48,11
//
`default_nettype none
//
module longbimpy(i_clk, i_ce, i_a_unsorted, i_b_unsorted, o_r);
module longbimpy(i_clk, i_ce, i_a_unsorted, i_b_unsorted, o_r
`ifdef FORMAL
, f_past_a_unsorted, f_past_b_unsorted
`endif
);
parameter IAW=8, // The width of i_a, min width is 5
IBW=12, // The width of i_b, can be anything
// The following three parameters should not be changed
58,11 → 64,16
IW=(AW+1)&(-2), // Internal width of A
LUTB=2, // How many bits we can multiply by at once
TLEN=(AW+(LUTB-1))/LUTB; // Nmbr of rows in our tableau
input i_clk, i_ce;
input [(IAW-1):0] i_a_unsorted;
input [(IBW-1):0] i_b_unsorted;
input wire i_clk, i_ce;
input wire [(IAW-1):0] i_a_unsorted;
input wire [(IBW-1):0] i_b_unsorted;
output reg [(AW+BW-1):0] o_r;
 
`ifdef FORMAL
output wire [(IAW-1):0] f_past_a_unsorted;
output wire [(IBW-1):0] f_past_b_unsorted;
`endif
 
//
// Swap parameter order, so that AW <= BW -- for performance
// reasons
96,23 → 107,26
// taking the absolute value here would require an additional bit.
// However, because our results are now unsigned, we can stay
// within the number of bits given (for now).
initial u_a = 0;
generate if (IW > AW)
begin
begin : ABS_AND_ADD_BIT_TO_A
always @(posedge i_clk)
if (i_ce)
u_a <= { 1'b0, (i_a[AW-1])?(-i_a):(i_a) };
end else begin
end else begin : ABS_A
always @(posedge i_clk)
if (i_ce)
u_a <= (i_a[AW-1])?(-i_a):(i_a);
end endgenerate
 
initial sgn = 0;
initial u_b = 0;
always @(posedge i_clk)
if (i_ce)
begin
u_b <= (i_b[BW-1])?(-i_b):(i_b);
sgn <= i_a[AW-1] ^ i_b[BW-1];
end
if (i_ce)
begin : ABS_B
u_b <= (i_b[BW-1])?(-i_b):(i_b);
sgn <= i_a[AW-1] ^ i_b[BW-1];
end
 
wire [(BW+LUTB-1):0] pr_a, pr_b;
 
126,6 → 140,10
// a time can follow this--but the first clock can do two at a time.
bimpy #(BW) lmpy_0(i_clk,i_ce,u_a[( LUTB-1): 0], u_b, pr_a);
bimpy #(BW) lmpy_1(i_clk,i_ce,u_a[(2*LUTB-1):LUTB], u_b, pr_b);
 
initial r_s = 0;
initial r_a[0] = 0;
initial r_b[0] = 0;
always @(posedge i_clk)
if (i_ce) r_a[0] <= u_a[(IW-1):(2*LUTB)];
always @(posedge i_clk)
132,13 → 150,18
if (i_ce) r_b[0] <= u_b;
always @(posedge i_clk)
if (i_ce) r_s <= { r_s[(TLEN-2):0], sgn };
 
initial acc[0] = 0;
always @(posedge i_clk) // One clk after p[0],p[1] become valid
if (i_ce) acc[0] <= { {(IW-LUTB){1'b0}}, pr_a}
+{ {(IW-(2*LUTB)){1'b0}}, pr_b, {(LUTB){1'b0}} };
if (i_ce) acc[0] <= { {(IW-LUTB){1'b0}}, pr_a}
+{ {(IW-(2*LUTB)){1'b0}}, pr_b, {(LUTB){1'b0}} };
 
generate // Keep track of intermediate values, before multiplying them
if (TLEN > 3) for(k=0; k<TLEN-3; k=k+1)
begin : gencopies
begin : GENCOPIES
 
initial r_a[k+1] = 0;
initial r_b[k+1] = 0;
always @(posedge i_clk)
if (i_ce)
begin
150,23 → 173,27
 
generate // The actual multiply and accumulate stage
if (TLEN > 2) for(k=0; k<TLEN-2; k=k+1)
begin : genstages
begin : GENSTAGES
wire [(BW+LUTB-1):0] genp;
 
// First, the multiply: 2-bits times BW bits
wire [(BW+LUTB-1):0] genp;
bimpy #(BW) genmpy(i_clk,i_ce,r_a[k][(LUTB-1):0],r_b[k], genp);
 
// Then the accumulate step -- on the next clock
initial acc[k+1] = 0;
always @(posedge i_clk)
if (i_ce)
acc[k+1] <= acc[k] + {{(IW-LUTB*(k+3)){1'b0}},
genp, {(LUTB*(k+2)){1'b0}} };
if (i_ce)
acc[k+1] <= acc[k] + {{(IW-LUTB*(k+3)){1'b0}},
genp, {(LUTB*(k+2)){1'b0}} };
end endgenerate
 
wire [(IW+BW-1):0] w_r;
assign w_r = (r_s[TLEN-1]) ? (-acc[TLEN-2]) : acc[TLEN-2];
 
initial o_r = 0;
always @(posedge i_clk)
if (i_ce)
o_r <= w_r[(AW+BW-1):0];
if (i_ce)
o_r <= w_r[(AW+BW-1):0];
 
generate if (IW > AW)
begin : VUNUSED
176,4 → 203,231
// verilator lint_on UNUSED
end endgenerate
 
`ifdef FORMAL
reg f_past_valid;
initial f_past_valid = 1'b0;
always @(posedge i_clk)
f_past_valid <= 1'b1;
 
`define ASSERT assert
`ifdef LONGBIMPY
 
always @(posedge i_clk)
if (!$past(i_ce))
assume(i_ce);
 
`endif
 
reg [AW-1:0] f_past_a [0:TLEN];
reg [BW-1:0] f_past_b [0:TLEN];
reg [TLEN+1:0] f_sgn_a, f_sgn_b;
 
initial f_past_a[0] = 0;
initial f_past_b[0] = 0;
initial f_sgn_a = 0;
initial f_sgn_b = 0;
always @(posedge i_clk)
if (i_ce)
begin
f_past_a[0] <= u_a;
f_past_b[0] <= u_b;
f_sgn_a[0] <= i_a[AW-1];
f_sgn_b[0] <= i_b[BW-1];
end
 
generate for(k=0; k<TLEN; k=k+1)
begin
initial f_past_a[k+1] = 0;
initial f_past_b[k+1] = 0;
initial f_sgn_a[k+1] = 0;
initial f_sgn_b[k+1] = 0;
always @(posedge i_clk)
if (i_ce)
begin
f_past_a[k+1] <= f_past_a[k];
f_past_b[k+1] <= f_past_b[k];
 
f_sgn_a[k+1] <= f_sgn_a[k];
f_sgn_b[k+1] <= f_sgn_b[k];
end
end endgenerate
 
always @(posedge i_clk)
if (i_ce)
begin
f_sgn_a[TLEN+1] <= f_sgn_a[TLEN];
f_sgn_b[TLEN+1] <= f_sgn_b[TLEN];
end
 
always @(posedge i_clk)
begin
assert(sgn == (f_sgn_a[0] ^ f_sgn_b[0]));
assert(r_s[TLEN-1:0] == (f_sgn_a[TLEN:1] ^ f_sgn_b[TLEN:1]));
assert(r_s[TLEN-1:0] == (f_sgn_a[TLEN:1] ^ f_sgn_b[TLEN:1]));
end
 
always @(posedge i_clk)
if ((f_past_valid)&&($past(i_ce)))
begin
if ($past(i_a)==0)
`ASSERT(u_a == 0);
else if ($past(i_a[AW-1]) == 1'b0)
`ASSERT(u_a == $past(i_a));
 
if ($past(i_b)==0)
`ASSERT(u_b == 0);
else if ($past(i_b[BW-1]) == 1'b0)
`ASSERT(u_b == $past(i_b));
end
 
generate // Keep track of intermediate values, before multiplying them
if (TLEN > 3) for(k=0; k<TLEN-3; k=k+1)
begin : ASSERT_GENCOPY
always @(posedge i_clk)
if (i_ce)
begin
if (f_past_a[k]==0)
`ASSERT(r_a[k] == 0);
else if (f_past_a[k]==1)
`ASSERT(r_a[k] == 0);
`ASSERT(r_b[k] == f_past_b[k]);
end
end endgenerate
 
generate // The actual multiply and accumulate stage
if (TLEN > 2) for(k=0; k<TLEN-2; k=k+1)
begin : ASSERT_GENSTAGE
always @(posedge i_clk)
if ((f_past_valid)&&($past(i_ce)))
begin
if (f_past_a[k+1]==0)
`ASSERT(acc[k] == 0);
if (f_past_a[k+1]==1)
`ASSERT(acc[k] == f_past_b[k+1]);
if (f_past_b[k+1]==0)
`ASSERT(acc[k] == 0);
if (f_past_b[k+1]==1)
begin
`ASSERT(acc[k][(2*k)+3:0]
== f_past_a[k+1][(2*k)+3:0]);
`ASSERT(acc[k][(IW+BW-1):(2*k)+4] == 0);
end
end
end endgenerate
 
wire [AW-1:0] f_past_a_neg = - f_past_a[TLEN];
wire [BW-1:0] f_past_b_neg = - f_past_b[TLEN];
 
wire [AW-1:0] f_past_a_pos = f_past_a[TLEN][AW-1]
? f_past_a_neg : f_past_a[TLEN];
wire [BW-1:0] f_past_b_pos = f_past_b[TLEN][BW-1]
? f_past_b_neg : f_past_b[TLEN];
 
always @(posedge i_clk)
if ((f_past_valid)&&($past(i_ce)))
begin
if ((f_past_a[TLEN]==0)||(f_past_b[TLEN]==0))
`ASSERT(o_r == 0);
else if (f_past_a[TLEN]==1)
begin
if ((f_sgn_a[TLEN+1]^f_sgn_b[TLEN+1])==0)
begin
`ASSERT(o_r[BW-1:0] == f_past_b_pos[BW-1:0]);
`ASSERT(o_r[AW+BW-1:BW] == 0);
end else begin // if (f_sgn_b[TLEN+1]) begin
`ASSERT(o_r[BW-1:0] == f_past_b_neg);
`ASSERT(o_r[AW+BW-1:BW]
== {(AW){f_past_b_neg[BW-1]}});
end
end else if (f_past_b[TLEN]==1)
begin
if ((f_sgn_a[TLEN+1] ^ f_sgn_b[TLEN+1])==0)
begin
`ASSERT(o_r[AW-1:0] == f_past_a_pos[AW-1:0]);
`ASSERT(o_r[AW+BW-1:AW] == 0);
end else begin
`ASSERT(o_r[AW-1:0] == f_past_a_neg);
`ASSERT(o_r[AW+BW-1:AW]
== {(BW){f_past_a_neg[AW-1]}});
end
end else begin
`ASSERT(o_r != 0);
if (!o_r[AW+BW-1:0])
begin
`ASSERT((o_r[AW-1:0] != f_past_a[TLEN][AW-1:0])
||(o_r[AW+BW-1:AW]!=0));
`ASSERT((o_r[BW-1:0] != f_past_b[TLEN][BW-1:0])
||(o_r[AW+BW-1:BW]!=0));
end else begin
`ASSERT((o_r[AW-1:0] != f_past_a_neg[AW-1:0])
||(! (&o_r[AW+BW-1:AW])));
`ASSERT((o_r[BW-1:0] != f_past_b_neg[BW-1:0])
||(! (&o_r[AW+BW-1:BW]!=0)));
end
end
end
 
generate if (IAW <= IBW)
begin : NO_PARAM_CHANGE
assign f_past_a_unsorted = (!f_sgn_a[TLEN+1])
? f_past_a[TLEN] : f_past_a_neg;
assign f_past_b_unsorted = (!f_sgn_b[TLEN+1])
? f_past_b[TLEN] : f_past_b_neg;
end else begin : SWAP_PARAMETERS
assign f_past_a_unsorted = (!f_sgn_b[TLEN+1])
? f_past_b[TLEN] : f_past_b_neg;
assign f_past_b_unsorted = (!f_sgn_a[TLEN+1])
? f_past_a[TLEN] : f_past_a_neg;
end endgenerate
`ifdef BUTTERFLY
// The following properties artificially restrict the inputs
// to this long binary multiplier to only those values whose
// absolute value is 0..7. It is used by the formal proof of
// the BUTTERFLY to artificially limit the scope of the proof.
// By the time the butterfly sees this code, it will be known
// that the long binary multiply works. At issue will no longer
// be whether or not this works, but rather whether it works in
// context. For that purpose, we'll need to check timing, not
// results. Checking against inputs of +/- 1 and 0 are perfect
// for that task. The below assumptions (yes they are assumptions
// just go a little bit further.
//
// THEREFORE, THESE PROPERTIES ARE NOT NECESSARY TO PROVING THAT
// THIS MODULE WORKS, AND THEY WILL INTERFERE WITH THAT PROOF.
//
// This just limits the proof for the butterfly, the parent.
// module that calls this one
//
// Start by assuming that all inputs have an absolute value less
// than eight.
always @(*)
assume(u_a[AW-1:3] == 0);
always @(*)
assume(u_b[BW-1:3] == 0);
 
// If the inputs have these properties, then so too do many of
// our internal values. ASSERT therefore that we never get out
// of bounds
generate for(k=0; k<TLEN; k=k+1)
begin
always @(*)
begin
assert(f_past_a[k][AW-1:3] == 0);
assert(f_past_b[k][BW-1:3] == 0);
end
end endgenerate
 
generate for(k=0; k<TLEN-1; k=k+1)
begin
always @(*)
assert(acc[k][IW+BW-1:6] == 0);
end endgenerate
 
generate for(k=0; k<TLEN-2; k=k+1)
begin
always @(*)
assert(r_b[k][BW-1:3] == 0);
end endgenerate
`endif // BUTTERFLY
`endif // FORMAL
endmodule
/rtl/qtrstage.v
9,7 → 9,33
// so that all of the multiplies are accomplished by additions and
// multiplexers only.
//
// Operation:
// The operation of this stage is identical to the regular stages of
// the FFT (see them for details), with one additional and critical
// difference: this stage doesn't require any hardware multiplication.
// The multiplies within it may all be accomplished using additions and
// subtractions.
//
// Let's see how this is done. Given x[n] and x[n+2], cause thats the
// stage we are working on, with i_sync true for x[0] being input,
// produce the output:
//
// y[n ] = x[n] + x[n+2]
// y[n+2] = (x[n] - x[n+2]) * e^{-j2pi n/2} (forward transform)
// = (x[n] - x[n+2]) * -j^n
//
// y[n].r = x[n].r + x[n+2].r (This is the easy part)
// y[n].i = x[n].i + x[n+2].i
//
// y[2].r = x[0].r - x[2].r
// y[2].i = x[0].i - x[2].i
//
// y[3].r = (x[1].i - x[3].i) (forward transform)
// y[3].i = - (x[1].r - x[3].r)
//
// y[3].r = - (x[1].i - x[3].i) (inverse transform)
// y[3].i = (x[1].r - x[3].r) (INVERSE = 1)
//
// Creator: Dan Gisselquist, Ph.D.
// Gisselquist Technology, LLC
//
17,23 → 43,25
//
// Copyright (C) 2015-2018, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
// by the Free Software Foundation, either version 3 of the License, or (at
// your option) any later version.
// This file is part of the general purpose pipelined FFT project.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
// The pipelined FFT project is free software (firmware): you can redistribute
// it and/or modify it under the terms of the GNU Lesser General Public License
// as published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// You should have received a copy of the GNU General Public License along
// with this program. (It's in the $(ROOT)/doc directory, run make with no
// target there if the PDF file isn't present.) If not, see
// The pipelined FFT project is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
// General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this program. (It's in the $(ROOT)/doc directory. Run make
// with no target there if the PDF file isn't present.) If not, see
// <http://www.gnu.org/licenses/> for a copy.
//
// License: GPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/gpl.html
// License: LGPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/lgpl.html
//
//
////////////////////////////////////////////////////////////////////////////////
43,19 → 71,16
//
module qtrstage(i_clk, i_reset, i_ce, i_sync, i_data, o_data, o_sync);
parameter IWIDTH=16, OWIDTH=IWIDTH+1;
// Parameters specific to the core that should be changed when this
// core is built ... Note that the minimum LGSPAN is 2. Smaller
// spans must use the fftdoubles stage.
parameter LGWIDTH=8, ODD=0, INVERSE=0,SHIFT=0;
input i_clk, i_reset, i_ce, i_sync;
input [(2*IWIDTH-1):0] i_data;
parameter LGWIDTH=8, INVERSE=0,SHIFT=0;
input wire i_clk, i_reset, i_ce, i_sync;
input wire [(2*IWIDTH-1):0] i_data;
output reg [(2*OWIDTH-1):0] o_data;
output reg o_sync;
reg wait_for_sync;
reg [3:0] pipeline;
reg [2:0] pipeline;
 
reg [(IWIDTH):0] sum_r, sum_i, diff_r, diff_i;
reg signed [(IWIDTH):0] sum_r, sum_i, diff_r, diff_i;
 
reg [(2*OWIDTH-1):0] ob_a;
wire [(2*OWIDTH-1):0] ob_b;
63,20 → 88,23
assign ob_b = { ob_b_r, ob_b_i };
 
reg [(LGWIDTH-1):0] iaddr;
reg [(2*IWIDTH-1):0] imem;
reg [(2*IWIDTH-1):0] imem [0:1];
 
wire signed [(IWIDTH-1):0] imem_r, imem_i;
assign imem_r = imem[(2*IWIDTH-1):(IWIDTH)];
assign imem_i = imem[(IWIDTH-1):0];
assign imem_r = imem[1][(2*IWIDTH-1):(IWIDTH)];
assign imem_i = imem[1][(IWIDTH-1):0];
 
wire signed [(IWIDTH-1):0] i_data_r, i_data_i;
assign i_data_r = i_data[(2*IWIDTH-1):(IWIDTH)];
assign i_data_i = i_data[(IWIDTH-1):0];
 
reg [(2*OWIDTH-1):0] omem;
reg [(2*OWIDTH-1):0] omem [0:1];
 
wire signed [(OWIDTH-1):0] rnd_sum_r, rnd_sum_i, rnd_diff_r, rnd_diff_i,
n_rnd_diff_r, n_rnd_diff_i;
//
// Round our output values down to OWIDTH bits
//
wire signed [(OWIDTH-1):0] rnd_sum_r, rnd_sum_i,
rnd_diff_r, rnd_diff_i, n_rnd_diff_r, n_rnd_diff_i;
convround #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_sum_r(i_clk, i_ce,
sum_r, rnd_sum_r);
 
100,28 → 128,31
iaddr <= 0;
end else if ((i_ce)&&((!wait_for_sync)||(i_sync)))
begin
iaddr <= iaddr + { {(LGWIDTH-1){1'b0}}, 1'b1 };
iaddr <= iaddr + 1'b1;
wait_for_sync <= 1'b0;
end
 
always @(posedge i_clk)
if (i_ce)
imem <= i_data;
begin
imem[0] <= i_data;
imem[1] <= imem[0];
end
 
 
// Note that we don't check on wait_for_sync or i_sync here.
// Why not? Because iaddr will always be zero until after the
// first i_ce, so we are safe.
initial pipeline = 4'h0;
initial pipeline = 3'h0;
always @(posedge i_clk)
if (i_reset)
pipeline <= 4'h0;
pipeline <= 3'h0;
else if (i_ce) // is our pipeline process full? Which stages?
pipeline <= { pipeline[2:0], iaddr[0] };
pipeline <= { pipeline[1:0], iaddr[1] };
 
// This is the pipeline[-1] stage, pipeline[0] will be set next.
always @(posedge i_clk)
if ((i_ce)&&(iaddr[0]))
if ((i_ce)&&(iaddr[1]))
begin
sum_r <= imem_r + i_data_r;
sum_i <= imem_i + i_data_i;
140,7 → 171,7
begin
ob_a <= { rnd_sum_r, rnd_sum_i };
// on Even, W = e^{-j2pi 1/4 0} = 1
if (ODD == 0)
if (!iaddr[0])
begin
ob_b_r <= rnd_diff_r;
ob_b_i <= rnd_diff_i;
158,21 → 189,172
always @(posedge i_clk)
if (i_ce)
begin // In sequence, clock = 3
if (pipeline[3])
begin
omem <= ob_b;
omem[0] <= ob_b;
omem[1] <= omem[0];
if (pipeline[2])
o_data <= ob_a;
end else
o_data <= omem;
else
o_data <= omem[1];
end
 
// Don't forget in the sync check that we are running
// at two clocks per sample. Thus we need to
// produce a sync every 2^(LGWIDTH-1) clocks.
initial o_sync = 1'b0;
always @(posedge i_clk)
if (i_reset)
o_sync <= 1'b0;
else if (i_ce)
o_sync <= &(~iaddr[(LGWIDTH-2):3]) && (iaddr[2:0] == 3'b101);
o_sync <= (iaddr[2:0] == 3'b101);
 
`ifdef FORMAL
reg f_past_valid;
initial f_past_valid = 1'b0;
always @(posedge i_clk)
f_past_valid = 1'b1;
 
`ifdef QTRSTAGE
always @(posedge i_clk)
assume((i_ce)||($past(i_ce))||($past(i_ce,2)));
`endif
 
// The below logic only works if the rounding stage does nothing
initial assert(IWIDTH+1 == OWIDTH);
 
reg signed [IWIDTH-1:0] f_piped_real [0:7];
reg signed [IWIDTH-1:0] f_piped_imag [0:7];
 
always @(posedge i_clk)
if (i_ce)
begin
f_piped_real[0] <= i_data[2*IWIDTH-1:IWIDTH];
f_piped_imag[0] <= i_data[ IWIDTH-1:0];
 
f_piped_real[1] <= f_piped_real[0];
f_piped_imag[1] <= f_piped_imag[0];
 
f_piped_real[2] <= f_piped_real[1];
f_piped_imag[2] <= f_piped_imag[1];
 
f_piped_real[3] <= f_piped_real[2];
f_piped_imag[3] <= f_piped_imag[2];
 
f_piped_real[4] <= f_piped_real[3];
f_piped_imag[4] <= f_piped_imag[3];
 
f_piped_real[5] <= f_piped_real[4];
f_piped_imag[5] <= f_piped_imag[4];
 
f_piped_real[6] <= f_piped_real[5];
f_piped_imag[6] <= f_piped_imag[5];
 
f_piped_real[7] <= f_piped_real[6];
f_piped_imag[7] <= f_piped_imag[6];
end
 
reg f_rsyncd;
wire f_syncd;
 
initial f_rsyncd = 0;
always @(posedge i_clk)
if(i_reset)
f_rsyncd <= 1'b0;
else if (!f_rsyncd)
f_rsyncd <= (o_sync);
assign f_syncd = (f_rsyncd)||(o_sync);
 
reg [1:0] f_state;
 
 
initial f_state = 0;
always @(posedge i_clk)
if (i_reset)
f_state <= 0;
else if ((i_ce)&&((!wait_for_sync)||(i_sync)))
f_state <= f_state + 1;
 
always @(*)
if (f_state != 0)
assume(!i_sync);
 
always @(posedge i_clk)
assert(f_state[1:0] == iaddr[1:0]);
 
wire signed [2*IWIDTH-1:0] f_i_real, f_i_imag;
assign f_i_real = i_data[2*IWIDTH-1:IWIDTH];
assign f_i_imag = i_data[ IWIDTH-1:0];
 
wire signed [OWIDTH-1:0] f_o_real, f_o_imag;
assign f_o_real = o_data[2*OWIDTH-1:OWIDTH];
assign f_o_imag = o_data[ OWIDTH-1:0];
 
always @(posedge i_clk)
if (f_state == 2'b11)
begin
assume(f_piped_real[0] != 3'sb100);
assume(f_piped_real[2] != 3'sb100);
assert(sum_r == f_piped_real[2] + f_piped_real[0]);
assert(sum_i == f_piped_imag[2] + f_piped_imag[0]);
 
assert(diff_r == f_piped_real[2] - f_piped_real[0]);
assert(diff_i == f_piped_imag[2] - f_piped_imag[0]);
end
 
always @(posedge i_clk)
if ((f_state == 2'b00)&&((f_syncd)||(iaddr >= 4)))
begin
assert(rnd_sum_r == f_piped_real[3]+f_piped_real[1]);
assert(rnd_sum_i == f_piped_imag[3]+f_piped_imag[1]);
assert(rnd_diff_r == f_piped_real[3]-f_piped_real[1]);
assert(rnd_diff_i == f_piped_imag[3]-f_piped_imag[1]);
end
 
always @(posedge i_clk)
if ((f_state == 2'b10)&&(f_syncd))
begin
// assert(o_sync);
assert(f_o_real == f_piped_real[5] + f_piped_real[3]);
assert(f_o_imag == f_piped_imag[5] + f_piped_imag[3]);
end
 
always @(posedge i_clk)
if ((f_state == 2'b11)&&(f_syncd))
begin
assert(!o_sync);
assert(f_o_real == f_piped_real[5] + f_piped_real[3]);
assert(f_o_imag == f_piped_imag[5] + f_piped_imag[3]);
end
 
always @(posedge i_clk)
if ((f_state == 2'b00)&&(f_syncd))
begin
assert(!o_sync);
assert(f_o_real == f_piped_real[7] - f_piped_real[5]);
assert(f_o_imag == f_piped_imag[7] - f_piped_imag[5]);
end
 
always @(*)
if ((iaddr[2:0] == 0)&&(!wait_for_sync))
assume(i_sync);
 
always @(*)
if (wait_for_sync)
assert((iaddr == 0)&&(f_state == 2'b00)&&(!o_sync)&&(!f_rsyncd));
 
always @(posedge i_clk)
if ((f_past_valid)&&($past(i_ce))&&($past(i_sync))&&(!$past(i_reset)))
assert(!wait_for_sync);
 
always @(posedge i_clk)
if ((f_state == 2'b01)&&(f_syncd))
begin
assert(!o_sync);
if (INVERSE)
begin
assert(f_o_real == -f_piped_imag[7]+f_piped_imag[5]);
assert(f_o_imag == f_piped_real[7]-f_piped_real[5]);
end else begin
assert(f_o_real == f_piped_imag[7]-f_piped_imag[5]);
assert(f_o_imag == -f_piped_real[7]+f_piped_real[5]);
end
end
 
`endif
endmodule
/rtl/windowfn.v
0,0 → 1,679
////////////////////////////////////////////////////////////////////////////////
//
// Filename: windowfn.v
//
// Project: A General Purpose Pipelined FFT Implementation
//
// Purpose: Apply a window function to incoming real data points, so as
// to create an outgoing stream of data samples that can be used
// in an FFT construct using 50% overlap. The overlap, coupled with the
// FFT's requirements, can make for somewhat of a problem. Hence, there
// are two 'ce' signals coming into the core. A primary ce signal when
// new data is ready, and an alternate that must take place between
// primary signals. This allows the second/alternate CE signal to be
// appropriately spaced between the primary CE signals so that the
// outgoing signals to the FFT will still meet separation
// requirements--whatever they would be for the application.
//
// For this module, the window size is the FFT length.
//
// Ports:
// i_clk, i_reset Should be self explanatory. The reset is assumed to
// be synchronous.
//
// i_tap_wr, i_tap For use when OPT_FIXED_TAPS is zero, i_tap_wr signals
// that a "tap" or "coefficient" of the filter should be
// written. When i_tap_wr is high, i_tap is taken to be
// a coefficient to the core. There's an internal address
// counter, so no address need be given. However, the counter is
// reset on an i_reset signal.
//
// i_ce, i_alt_ce As discussed above, these signals need to alternate
// back and forth. Following a reset, the first signal coming in
// should be an i_ce signal.
//
// i_sample The incoming sample data, valid any time i_ce is true,
// and accepted into the core on that clock tick.
//
// o_ce True when the core has a valid output
// o_sample The output calculated by the core, ready to pass to
// the FFT
// o_frame True on the first sample of any frame. Following a
// reset, o_ce will remain false until o_frame is also
// true with it. From then on out, o_frame will be true once
// every FFT length.
//
// For a timing/signaling diagram, please feel free to run the formal
// tools in cover mode for this module, 'sby -f windowfn.sby cover',
// and then check out the generated trace.
//
//
// Creator: Dan Gisselquist, Ph.D.
// Gisselquist Technology, LLC
//
////////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2018, Gisselquist Technology, LLC
//
// This file is part of the general purpose pipelined FFT project.
//
// The pipelined FFT project is free software (firmware): you can redistribute
// it and/or modify it under the terms of the GNU Lesser General Public License
// as published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// The pipelined FFT project is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
// General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this program. (It's in the $(ROOT)/doc directory. Run make
// with no target there if the PDF file isn't present.) If not, see
// <http://www.gnu.org/licenses/> for a copy.
//
// License: LGPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/lgpl.html
//
//
////////////////////////////////////////////////////////////////////////////////
//
//
`default_nettype none
//
module windowfn(i_clk, i_reset, i_tap_wr, i_tap,
i_ce, i_sample, i_alt_ce,
o_frame, o_ce, o_sample);
parameter IW=16, OW=16, TW=16, LGNFFT = 4;
parameter [0:0] OPT_FIXED_TAPS = 1'b0;
parameter INITIAL_COEFFS = "";
//
localparam AW=IW+TW;
//
input wire i_clk, i_reset;
//
input wire i_tap_wr;
input wire [(TW-1):0] i_tap;
//
input wire i_ce;
input wire [(IW-1):0] i_sample;
input wire i_alt_ce;
//
output reg o_frame, o_ce;
output reg [(OW-1):0] o_sample;
 
 
reg [(TW-1):0] cmem [0:(1<<LGNFFT)-1];
reg [(TW-1):0] dmem [0:(1<<LGNFFT)-1];
 
//
// LOAD THE TAPS
//
wire [LGNFFT-1:0] tapwidx;
generate if (OPT_FIXED_TAPS)
begin : SET_FIXED_TAPS
 
initial $readmemh(INITIAL_COEFFS, cmem);
 
assign tapwidx = 0;
// Make Verilators -Wall happy
// Verilator lint_off UNUSED
wire [TW:0] ignored_inputs;
assign ignored_inputs = { i_tap_wr, i_tap };
// Verilator lint_on UNUSED
 
end else begin : DYNAMICALLY_SET_TAPS
 
// Coef memory write index
reg [(LGNFFT-1):0] r_tapwidx;
 
initial r_tapwidx = 0;
always @(posedge i_clk)
if(i_reset)
r_tapwidx <= 0;
else if (i_tap_wr)
r_tapwidx <= r_tapwidx + 1'b1;
 
if (INITIAL_COEFFS != 0)
initial $readmemh(INITIAL_COEFFS, cmem);
always @(posedge i_clk)
if (i_tap_wr)
cmem[r_tapwidx] <= i_tap;
 
assign tapwidx = r_tapwidx;
end endgenerate
 
 
reg [LGNFFT-1:0] dwidx, didx;
reg [LGNFFT-1:0] tidx;
reg top_of_block, first_block;
reg [1:0] frame_count;
reg p_ce, d_ce;
reg signed [IW-1:0] data;
reg signed [TW-1:0] tap;
 
 
 
//
//
// Record the incoming data into a local memory
//
//
 
// Notice how this data writing section is *independent* of the reset,
// depending only upon new sample data.
 
initial dwidx = 0;
always @(posedge i_clk)
if (i_reset)
dwidx <= 0;
else if (i_ce)
dwidx <= dwidx + 1'b1;
always @(posedge i_clk)
if (i_ce)
dmem[dwidx] <= i_sample;
 
initial first_block = 1'b1;
always @(posedge i_clk)
if (i_reset)
first_block <= 1'b1;
else if ((i_alt_ce)&&(&tidx)&&(dwidx==0))
first_block <= 1'b0;
 
 
//
//
// Keep track of the top of the block. The top of the block is the
// first incoming data sample on an FFT or half FFT boundary. This
// is where data processing starts from.
//
initial top_of_block = 1'b0;
always @(posedge i_clk)
if (i_reset)
top_of_block <= 1'b0;
else if (i_alt_ce)
top_of_block <= (&tidx)&&((!first_block)||(dwidx==0));
else if (i_ce)
top_of_block <= 1'b0;
 
//
// Data and coefficient memory indices.
//
initial didx = 0;
always @(posedge i_clk)
if (i_reset)
didx <= 0;
else if ((i_alt_ce)&&(dwidx[LGNFFT-2:0]==0))
begin
didx[LGNFFT-2:0] <= 0;
didx[LGNFFT-1] <= dwidx[LGNFFT-1];
end else if ((i_ce)||(i_alt_ce))
// Process the next point in this FFT
didx <= didx + 1'b1;
 
initial tidx = 0;
always @(posedge i_clk)
if (i_reset)
tidx <= 0;
else if ((i_alt_ce)&&(dwidx[LGNFFT-2:0]==0))
begin
// // At the beginning of processing for a given FFT
tidx <= 0;
end else if ((i_ce)||(i_alt_ce))
// Process the next point in the window function
tidx <= tidx + 1'b1;
 
initial frame_count = 0;
always @(posedge i_clk)
if (i_reset)
frame_count <= 0;
else if ((i_ce)&&(top_of_block)&&(!first_block))
frame_count <= 3;
else if (frame_count != 0)
frame_count <= frame_count - 1'b1;
 
initial o_frame = 1'b0;
always @(posedge i_clk)
if (i_reset)
o_frame <= 1'b0;
else if (frame_count == 2)
o_frame <= 1'b1;
else
o_frame <= 1'b0;
 
//
// Following any initial i_ce, ...
// d_ce: The data (and coefficient), read from memory,will be valid
// p_ce: The produc of data and coefficient is valid
//
initial { p_ce, d_ce } = 2'h0;
always @(posedge i_clk)
if (i_reset)
{ p_ce, d_ce } <= 2'h0;
else
{ p_ce, d_ce } <= { d_ce, (!first_block)&&((i_ce)||(i_alt_ce)) };
 
// Read the data sample point, and the filter coefficient, from block
// RAM. Because this is block RAM, we have to be careful not to
// do anything else here.
initial data = 0;
initial tap = 0;
always @(posedge i_clk)
begin
data <= dmem[didx];
tap <= cmem[tidx];
end
 
//
// Multiply the two values together
reg signed [IW+TW-1:0] product;
`ifdef FORMAL
// We'll implement an abstract multiply below--just to make sure the
// timing is right.
`else
always @(posedge i_clk)
product <= data * tap;
`endif
 
//
// Output CE
//
initial o_ce = 0;
always @(posedge i_clk)
if (i_reset)
o_ce <= 0;
else
o_ce <= p_ce;
 
generate if (OW == AW)
begin : BIT_ADJUSTMENT_NONE
 
initial o_sample = 0;
always @(posedge i_clk)
if (i_reset)
o_sample <= 0;
else if (p_ce)
o_sample <= product;
 
end else if (OW < AW)
begin : BIT_ADJUSTMENT_ROUNDING
wire [AW-1:0] rounded;
 
assign rounded = product + { {(OW){1'b0}}, product[AW-OW],
{(AW-OW-1){!product[AW-OW]}} };
 
initial o_sample = 0;
always @(posedge i_clk)
if (i_reset)
o_sample <= 0;
else if (p_ce)
o_sample <= rounded[(AW-1):(AW-OW)];
 
// Make Verilator happy
// verilator lint_off UNUSED
wire [AW-OW-1:0] unused_rounding_bits;
assign unused_rounding_bits = rounded[AW-OW-1:0];
// verilator lint_on UNUSED
 
end else // if (OW > AW)
begin : BIT_ADJUSTMENT_EXTENDING
 
always @(posedge i_clk)
if (i_reset)
o_sample <= 0;
else if (p_ce)
o_sample <= { product, {(OW-AW){1'b0}} };
 
end endgenerate
 
// Make Verilator happy
// verilator lint_off UNUSED
wire [LGNFFT-1:0] unused;
assign unused = tapwidx;
// verilator lint_on UNUSED
 
`ifdef FORMAL
reg f_past_valid;
initial f_past_valid = 1'b0;
always @(posedge i_clk)
f_past_valid <= 1'b1;
 
// Keep track of the phase of this operation
reg [LGNFFT:0] f_phase;
 
initial f_phase = 0;
always @(posedge i_clk)
if (i_reset)
begin
f_phase <= 0;
end else if ((i_ce)&&(top_of_block))
f_phase[LGNFFT-1:0] <= 1;
else if ((i_ce)||(i_alt_ce))
f_phase <= f_phase + 1;
 
///////
//
// Assumptions about the input
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(i_reset))&&(!$past(i_ce)))
restrict($stable(i_sample));
 
always @(*)
if (tapwidx != 0)
assume(!i_ce);
 
always @(posedge i_clk)
if (f_phase[0])
assume(!i_ce);
 
always @(posedge i_clk)
if (!f_phase[0])
assume(!i_alt_ce);
 
always @(*)
if (i_tap_wr)
assume((!i_ce)&&(!i_alt_ce));
 
always @(*)
assume(!i_tap_wr);
 
always @(*)
if ((i_ce)&&(top_of_block))
begin
assert(dwidx[LGNFFT-2:0]==0);
assert(didx[LGNFFT-2:0]==0);
end else if (dwidx[LGNFFT-2:0]!=0)
assert(!top_of_block);
 
always @(*)
if ((dwidx[LGNFFT-2:0]==0)&&(!didx[0]))
assert(top_of_block||f_first_block||first_block);
else
assert(!top_of_block);
 
always @(posedge i_clk)
if ((f_past_valid)&&($past(first_block))&&(!first_block))
assert(top_of_block);
 
/*
always @(posedge i_clk)
if (f_past_valid)
begin
if ($past(i_reset))
assert(!top_of_block);
else if ($past(i_ce))
assert(top_of_block == ((!first_block)&&(dwidx[LGNFFT-2:0]==0)));
else
 
assert(top_of_block == ((!first_block)&&(&dwidx[LGNFFT-2:1])));
end
*/
 
always @(posedge i_clk)
if ((f_past_valid)&&($past(first_block))&&(!$past(first_block)))
assert(top_of_block);
 
//////////////////////////////////////////////////////////////////
//
// Assertions about our outputs
//
/////////////////////////
//
//
// always @(*)
// if (o_frame)
// assert(o_ce);
always @(*)
if (first_block)
assert(!o_ce);
 
reg f_waiting_for_first_frame;
 
initial f_waiting_for_first_frame = 1;
always @(posedge i_clk)
if ((i_reset)||(first_block))
f_waiting_for_first_frame <= 1'b1;
else if (o_ce)
f_waiting_for_first_frame <= 1'b0;
 
always @(*)
if ((f_waiting_for_first_frame)&&(o_ce))
assert(o_frame);
always @(*)
if (f_phase == 0)
assert((top_of_block)||(first_block));
 
always @(*)
if (f_waiting_for_first_frame)
begin
if (f_phase[LGNFFT-1:0] > 3)
begin
assert((first_block)&&(!o_frame));
assert({o_ce, p_ce, d_ce} == 0);
assert(frame_count == 0);
end else if (f_phase == 0)
begin
assert((!o_frame)&&({o_ce, p_ce, d_ce} == 0));
assert(frame_count == 0);
end else if ((f_phase > 0)&&(!first_block))
assert(|{o_ce, p_ce, d_ce });
else if ((frame_count != 0)||(first_block))
assert(!o_frame);
else
assert(o_frame);
end
 
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(i_reset)))
assert(o_ce || $stable(o_sample));
 
/*
always @(*)
assert(m_ce == (f_phase == 0));
always @(*)
assert(d_ce == (f_phase == 1));
always @(*)
assert(p_ce == (f_phase == 2));
always @(*)
assert(o_ce == (f_phase == 3));
always @(*)
assert(o_frame == (f_phase == 3));
*/
 
always @(*)
assert(didx[LGNFFT-2:0] == tidx[LGNFFT-2:0]);
 
always @(posedge i_clk)
if ((f_past_valid)&&(!$past(i_reset))
&&($past(i_ce))&&($past(top_of_block)))
assert(tidx == 1);
 
 
wire [LGNFFT:0] f_phase_plus_one;
always @(*)
f_phase_plus_one = f_phase + 1;
/*
end else if ($past(i_ce))
begin
// 1 0 // 0 0 Top of block 0
// 2 1 // 1 1 ALT 0
// 3 1 // 2 1 1
// 4 2 // 3 2 ALT 1
// 5 2 // 4 2 2
// 6 3 // 5 3 ALT 2
// 7 3 // 6 3 3
// 0 4 // 7 4 ALT 3
// 1 // 4 4 Top of block 0
// 2 // 5 5 0
// // 6 5 ALT 1
// // 7 6 1
// // 0 6 ALT 2
// // 1 7 2
// // 2 7 ALT
// // 3 0
// // 0 0 ALT Top of block
// // 1 1
// // 2 1
// // 3 2
// // 4 2
*/
always @(*)
assert(f_phase_plus_one[LGNFFT:1] == dwidx[LGNFFT-1:0]);
always @(*)
assert(f_phase[0] == didx[0]);
always @(*)
if (f_phase[LGNFFT])
begin
assert(f_phase[LGNFFT-1:0] == {!didx[LGNFFT-1],didx[LGNFFT-2:0]});
assert((dwidx[LGNFFT-1]==1)
||(dwidx[LGNFFT-2:0]==0));
end else begin
assert((dwidx[LGNFFT-1]==0)
||((dwidx[LGNFFT-1])&&(dwidx[LGNFFT-2:0]==0)&&(&f_phase[LGNFFT-1:0])));
assert(f_phase[LGNFFT-1:0] == didx[LGNFFT-1:0]);
end
 
always @(*)
assert(f_phase[LGNFFT-1:0] == tidx[LGNFFT-1:0]);
always @(*)
assert(f_phase[LGNFFT-2:0] == didx[LGNFFT-2:0]);
wire [LGNFFT-1:0] f_diff_idx;
assign f_diff_idx = didx - dwidx;
always @(*)
if (!first_block)
assert(f_diff_idx < { 1'b1, {(LGNFFT-1){1'b0}} });
 
 
always @(*)
if (top_of_block)
assert(!first_block);
 
reg f_first_block;
initial f_first_block = 1'b1;
always @(posedge i_clk)
if (i_ce)
f_first_block <= first_block;
 
always @(*)
if ((top_of_block)&&(i_ce)&&(!f_first_block))
assert(didx[LGNFFT-2:0] == dwidx[LGNFFT-2:0]);
 
always @(*)
if ((!f_first_block)&&(!first_block)&&(dwidx[LGNFFT-2:0]==0)
&&(didx[LGNFFT-1:0]==0))
assert(top_of_block);
 
////////////////////////////////////////////////////////////////////////////////
//
// Abstract Multiply
//
////////////////////////////////////////////////////////////////////////////////
// Gin up a really quick abstract multiply for formal testing
// only. always @(posedge i_clk)
(* anyconst *) signed reg [IW+TW-1:0] pre_product;
always @(posedge i_clk)
if (data == 0)
assume(pre_product == 0);
always @(posedge i_clk)
if (tap == 0)
assume(pre_product == 0);
always @(posedge i_clk)
if (data == 1)
assume(pre_product == tap);
always @(posedge i_clk)
if (tap == 1)
assume(pre_product == data);
always @(posedge i_clk)
if ((i_ce)||(i_alt_ce))
product <= pre_product;
 
////////////////////////////////////////////////////////////////////////////////
//
// Arbitrary memory test
//
////////////////////////////////////////////////////////////////////////////////
(* anyconst *) reg [LGNFFT-1:0] f_addr;
signed reg [TW-1:0] f_tap;
signed reg [IW-1:0] f_value, f_tap;
reg f_this_dce, f_this_pce,
f_this_oce, f_this_tap;
 
initial assume(f_tap == cmem[f_addr]);
always @(*)
assert(f_tap == cmem[f_addr]);
always @(posedge i_clk)
if ((i_tap_wr)&&(f_addr == tapwidx))
f_tap <= i_tap;
 
initial f_value = 0;
initial assume(dmem[f_addr] == f_value);
always @(*)
assert(f_value == dmem[f_addr]);
always @(posedge i_clk)
if ((i_ce)&&(dwidx == f_addr))
f_value <= i_sample;
initial { f_this_oce, f_this_pce, f_this_dce } = 3'h0;
always @(posedge i_clk)
if ((i_reset)||(i_tap_wr))
{ f_this_oce, f_this_pce, f_this_dce } <= 3'h0;
else
{ f_this_oce, f_this_pce, f_this_dce }
<= { f_this_pce, f_this_dce,
(((i_ce)||(i_alt_ce))
&&(f_past_valid)&&(f_addr == didx)) };
initial f_this_tap = 0;
always @(posedge i_clk)
if (i_reset)
f_this_tap <= 0;
else if ((i_ce)||(i_alt_ce))
f_this_tap <= (f_past_valid)&&(f_addr == tidx);
else
f_this_tap <= 0;
 
 
always @(posedge i_clk)
if (f_this_tap)
assert(tap == f_tap);
 
always @(posedge i_clk)
if ((f_past_valid)&&(f_this_dce))
assert(data == $past(f_value));
 
reg signed [IW-1:0] f_past_data;
reg signed [TW-1:0] f_past_tap;
 
always @(posedge i_clk)
begin
f_past_data <= data;
f_past_tap <= tap;
end
 
always @(posedge i_clk)
if ((f_past_valid)&&(f_this_pce))
begin
if (f_past_tap == 0)
assert(product == 0);
if (f_past_data == 0)
assert(product == 0);
if (f_past_tap == 1)
assert(product=={{(TW){f_past_data[IW-1]}},f_past_data});
if (f_past_data == 1)
assert(product=={{{IW}{f_past_tap[TW-1]}},f_past_tap});
end
 
 
////////////////////////////////////////////////////////////////////////////////
//
// Cover tests
//
////////////////////////////////////////////////////////////////////////////////
reg f_second_frame;
initial f_second_frame = 1'b0;
always @(posedge i_clk)
if ((o_ce)&&(o_frame))
f_second_frame <= 1'b1;
 
always @(posedge i_clk)
cover((o_ce)&&(o_frame)&&(f_second_frame));
`endif
endmodule
rtl Property changes : Added: svn:ignore ## -0,0 +1 ## +obj_dir

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.