URL
https://opencores.org/ocsvn/dblclockfft/dblclockfft/trunk
Subversion Repositories dblclockfft
Compare Revisions
- This comparison shows the changes necessary to convert path
/dblclockfft/trunk
- from Rev 38 to Rev 39
- ↔ Reverse comparison
Rev 38 → Rev 39
/rtl/README.md
15,6 → 15,7
multiply at the cost of more logic and a greater delay. |
|
- [longbimpy](longbimpy.v) is the logic binary multiply. |
|
- [bimpy](bimpy.v) multiplies a small set of bits together. It is a |
component of [longbimpy](longbimpy.v) |
|
/rtl/bimpy.v
25,23 → 25,25
// |
// Copyright (C) 2015-2018, Gisselquist Technology, LLC |
// |
// This program is free software (firmware): you can redistribute it and/or |
// modify it under the terms of the GNU General Public License as published |
// by the Free Software Foundation, either version 3 of the License, or (at |
// your option) any later version. |
// This file is part of the general purpose pipelined FFT project. |
// |
// This program is distributed in the hope that it will be useful, but WITHOUT |
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or |
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
// for more details. |
// The pipelined FFT project is free software (firmware): you can redistribute |
// it and/or modify it under the terms of the GNU Lesser General Public License |
// as published by the Free Software Foundation, either version 3 of the |
// License, or (at your option) any later version. |
// |
// You should have received a copy of the GNU General Public License along |
// with this program. (It's in the $(ROOT)/doc directory, run make with no |
// target there if the PDF file isn't present.) If not, see |
// The pipelined FFT project is distributed in the hope that it will be useful, |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
// MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser |
// General Public License for more details. |
// |
// You should have received a copy of the GNU Lesser General Public License |
// along with this program. (It's in the $(ROOT)/doc directory. Run make |
// with no target there if the PDF file isn't present.) If not, see |
// <http://www.gnu.org/licenses/> for a copy. |
// |
// License: GPL, v3, as defined and found on www.gnu.org, |
// http://www.gnu.org/licenses/gpl.html |
// License: LGPL, v3, as defined and found on www.gnu.org, |
// http://www.gnu.org/licenses/lgpl.html |
// |
// |
//////////////////////////////////////////////////////////////////////////////// |
50,11 → 52,11
`default_nettype none |
// |
module bimpy(i_clk, i_ce, i_a, i_b, o_r); |
parameter BW=18, // Number of bits in i_b |
LUTB=2; // Number of bits in i_a for our LUT multiply |
input i_clk, i_ce; |
input [(LUTB-1):0] i_a; |
input [(BW-1):0] i_b; |
parameter BW=18; // Number of bits in i_b |
localparam LUTB=2; // Number of bits in i_a for our LUT multiply |
input wire i_clk, i_ce; |
input wire [(LUTB-1):0] i_a; |
input wire [(BW-1):0] i_b; |
output reg [(BW+LUTB-1):0] o_r; |
|
wire [(BW+LUTB-2):0] w_r; |
65,8 → 67,32
assign c = { ((i_a[1])?i_b[(BW-2):0]:{(BW-1){1'b0}}) } |
& ((i_a[0])?i_b[(BW-1):1]:{(BW-1){1'b0}}); |
|
initial o_r = 0; |
always @(posedge i_clk) |
if (i_ce) |
o_r <= w_r + { c, 2'b0 }; |
if (i_ce) |
o_r <= w_r + { c, 2'b0 }; |
|
`ifdef FORMAL |
reg f_past_valid; |
|
initial f_past_valid = 1'b0; |
always @(posedge i_clk) |
f_past_valid <= 1'b1; |
|
`define ASSERT assert |
|
always @(posedge i_clk) |
if ((f_past_valid)&&($past(i_ce))) |
begin |
if ($past(i_a)==0) |
`ASSERT(o_r == 0); |
else if ($past(i_a) == 1) |
`ASSERT(o_r == $past(i_b)); |
|
if ($past(i_b)==0) |
`ASSERT(o_r == 0); |
else if ($past(i_b) == 1) |
`ASSERT(o_r[(LUTB-1):0] == $past(i_a)); |
end |
`endif |
endmodule |
/rtl/bitreverse.v
4,32 → 4,12
// |
// Project: A General Purpose Pipelined FFT Implementation |
// |
// Purpose: This module bitreverses a pipelined FFT input. Operation is |
// expected as follows: |
// Purpose: This module bitreverses a pipelined FFT input. It differes |
// from the dblreverse module in that this is just a simple and |
// straightforward bitreverse, rather than one written to handle two |
// words at once. |
// |
// i_clk A running clock at whatever system speed is offered. |
// i_reset A synchronous reset signal, that resets all internals |
// i_ce If this is one, one input is consumed and an output |
// is produced. |
// i_in_0, i_in_1 |
// Two inputs to be consumed, each of width WIDTH. |
// o_out_0, o_out_1 |
// Two of the bitreversed outputs, also of the same |
// width, WIDTH. Of course, there is a delay from the |
// first input to the first output. For this purpose, |
// o_sync is present. |
// o_sync This will be a 1'b1 for the first value in any block. |
// Following a reset, this will only become 1'b1 once |
// the data has been loaded and is now valid. After that, |
// all outputs will be valid. |
// |
// 20150602 -- This module has undergone massive rework in order to |
// ensure that it uses resources efficiently. As a result, |
// it now optimizes nicely into block RAMs. As an unfortunately |
// side effect, it now passes it's bench test (dblrev_tb) but |
// fails the integration bench test (fft_tb). |
// |
// |
// Creator: Dan Gisselquist, Ph.D. |
// Gisselquist Technology, LLC |
// |
37,23 → 17,25
// |
// Copyright (C) 2015-2018, Gisselquist Technology, LLC |
// |
// This program is free software (firmware): you can redistribute it and/or |
// modify it under the terms of the GNU General Public License as published |
// by the Free Software Foundation, either version 3 of the License, or (at |
// your option) any later version. |
// This file is part of the general purpose pipelined FFT project. |
// |
// This program is distributed in the hope that it will be useful, but WITHOUT |
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or |
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
// for more details. |
// The pipelined FFT project is free software (firmware): you can redistribute |
// it and/or modify it under the terms of the GNU Lesser General Public License |
// as published by the Free Software Foundation, either version 3 of the |
// License, or (at your option) any later version. |
// |
// You should have received a copy of the GNU General Public License along |
// with this program. (It's in the $(ROOT)/doc directory, run make with no |
// target there if the PDF file isn't present.) If not, see |
// The pipelined FFT project is distributed in the hope that it will be useful, |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
// MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser |
// General Public License for more details. |
// |
// You should have received a copy of the GNU Lesser General Public License |
// along with this program. (It's in the $(ROOT)/doc directory. Run make |
// with no target there if the PDF file isn't present.) If not, see |
// <http://www.gnu.org/licenses/> for a copy. |
// |
// License: GPL, v3, as defined and found on www.gnu.org, |
// http://www.gnu.org/licenses/gpl.html |
// License: LGPL, v3, as defined and found on www.gnu.org, |
// http://www.gnu.org/licenses/lgpl.html |
// |
// |
//////////////////////////////////////////////////////////////////////////////// |
61,116 → 43,59
// |
`default_nettype none |
// |
|
|
// |
// How do we do bit reversing at two smples per clock? Can we separate out |
// our work into eight memory banks, writing two banks at once and reading |
// another two banks in the same clock? |
// |
// mem[00xxx0] = s_0[n] |
// mem[00xxx1] = s_1[n] |
// o_0[n] = mem[10xxx0] |
// o_1[n] = mem[11xxx0] |
// ... |
// mem[01xxx0] = s_0[m] |
// mem[01xxx1] = s_1[m] |
// o_0[m] = mem[10xxx1] |
// o_1[m] = mem[11xxx1] |
// ... |
// mem[10xxx0] = s_0[n] |
// mem[10xxx1] = s_1[n] |
// o_0[n] = mem[00xxx0] |
// o_1[n] = mem[01xxx0] |
// ... |
// mem[11xxx0] = s_0[m] |
// mem[11xxx1] = s_1[m] |
// o_0[m] = mem[00xxx1] |
// o_1[m] = mem[01xxx1] |
// ... |
// |
// The answer is that, yes we can but: we need to use four memory banks |
// to do it properly. These four banks are defined by the two bits |
// that determine the top and bottom of the correct address. Larger |
// FFT's would require more memories. |
// |
// |
module bitreverse(i_clk, i_reset, i_ce, i_in_0, i_in_1, |
o_out_0, o_out_1, o_sync); |
module bitreverse(i_clk, i_reset, i_ce, i_in, o_out, o_sync); |
parameter LGSIZE=5, WIDTH=24; |
input i_clk, i_reset, i_ce; |
input [(2*WIDTH-1):0] i_in_0, i_in_1; |
output wire [(2*WIDTH-1):0] o_out_0, o_out_1; |
input wire i_clk, i_reset, i_ce; |
input wire [(2*WIDTH-1):0] i_in; |
output reg [(2*WIDTH-1):0] o_out; |
output reg o_sync; |
reg [(LGSIZE):0] wraddr; |
wire [(LGSIZE):0] rdaddr; |
|
reg in_reset; |
reg [(LGSIZE-1):0] iaddr; |
wire [(LGSIZE-3):0] braddr; |
reg [(2*WIDTH-1):0] brmem [0:((1<<(LGSIZE+1))-1)]; |
|
genvar k; |
generate for(k=0; k<LGSIZE-2; k=k+1) |
begin : gen_a_bit_reversed_value |
assign braddr[k] = iaddr[LGSIZE-3-k]; |
end endgenerate |
generate for(k=0; k<LGSIZE; k=k+1) |
assign rdaddr[k] = wraddr[LGSIZE-1-k]; |
endgenerate |
assign rdaddr[LGSIZE] = !wraddr[LGSIZE]; |
|
initial iaddr = 0; |
initial in_reset = 1'b1; |
initial o_sync = 1'b0; |
reg in_reset; |
|
initial in_reset = 1'b1; |
always @(posedge i_clk) |
if (i_reset) |
begin |
iaddr <= 0; |
in_reset <= 1'b1; |
o_sync <= 1'b0; |
end else if (i_ce) |
else if ((i_ce)&&(&wraddr[(LGSIZE-1):0])) |
in_reset <= 1'b0; |
|
initial wraddr = 0; |
always @(posedge i_clk) |
if (i_reset) |
wraddr <= 0; |
else if (i_ce) |
begin |
iaddr <= iaddr + { {(LGSIZE-1){1'b0}}, 1'b1 }; |
if (&iaddr[(LGSIZE-2):0]) |
in_reset <= 1'b0; |
if (in_reset) |
o_sync <= 1'b0; |
else |
o_sync <= ~(|iaddr[(LGSIZE-2):0]); |
brmem[wraddr] <= i_in; |
wraddr <= wraddr + 1; |
end |
|
reg [(2*WIDTH-1):0] mem_e [0:((1<<(LGSIZE))-1)]; |
reg [(2*WIDTH-1):0] mem_o [0:((1<<(LGSIZE))-1)]; |
|
always @(posedge i_clk) |
if (i_ce) mem_e[iaddr] <= i_in_0; |
always @(posedge i_clk) |
if (i_ce) mem_o[iaddr] <= i_in_1; |
if (i_ce) // If (i_reset) we just output junk ... not a problem |
o_out <= brmem[rdaddr]; // w/o a sync pulse |
|
|
reg [(2*WIDTH-1):0] evn_out_0, evn_out_1, odd_out_0, odd_out_1; |
|
initial o_sync = 1'b0; |
always @(posedge i_clk) |
if (i_ce) |
evn_out_0 <= mem_e[{!iaddr[LGSIZE-1],1'b0,braddr}]; |
always @(posedge i_clk) |
if (i_ce) |
evn_out_1 <= mem_e[{!iaddr[LGSIZE-1],1'b1,braddr}]; |
always @(posedge i_clk) |
if (i_ce) |
odd_out_0 <= mem_o[{!iaddr[LGSIZE-1],1'b0,braddr}]; |
always @(posedge i_clk) |
if (i_ce) |
odd_out_1 <= mem_o[{!iaddr[LGSIZE-1],1'b1,braddr}]; |
if (i_reset) |
o_sync <= 1'b0; |
else if ((i_ce)&&(!in_reset)) |
o_sync <= (wraddr[(LGSIZE-1):0] == 0); |
|
reg adrz; |
always @(posedge i_clk) |
if (i_ce) adrz <= iaddr[LGSIZE-2]; |
|
assign o_out_0 = (adrz)?odd_out_0:evn_out_0; |
assign o_out_1 = (adrz)?odd_out_1:evn_out_1; |
|
`ifdef FORMAL |
`define ASSERT assert |
`ifdef BITREVERSE |
`define ASSUME assume |
`define ASSERT assert |
`else |
`define ASSUME assert |
`define ASSERT assume |
`endif |
|
reg f_past_valid; |
182,7 → 107,7
always @(posedge i_clk) |
if ((!f_past_valid)||($past(i_reset))) |
begin |
`ASSERT(iaddr == 0); |
`ASSERT(wraddr == 0); |
`ASSERT(in_reset); |
`ASSERT(!o_sync); |
end |
191,135 → 116,60
assume((i_ce)||($past(i_ce))||($past(i_ce,2))); |
`endif // BITREVERSE |
|
(* anyconst *) reg [LGSIZE-1:0] f_const_addr; |
wire [LGSIZE-3:0] f_reversed_addr; |
// reg [LGSIZE:0] f_now; |
reg f_addr_loaded_0, f_addr_loaded_1; |
reg [(2*WIDTH-1):0] f_data_0, f_data_1; |
wire f_writing, f_reading; |
(* anyconst *) reg [LGSIZE:0] f_const_addr; |
wire [LGSIZE:0] f_reversed_addr; |
reg f_addr_loaded; |
reg [(2*WIDTH-1):0] f_addr_value; |
|
generate for(k=0; k<LGSIZE-2; k=k+1) |
assign f_reversed_addr[k] = f_const_addr[LGSIZE-3-k]; |
generate for(k=0; k<LGSIZE; k=k+1) |
assign f_reversed_addr[k] = f_const_addr[LGSIZE-1-k]; |
endgenerate |
assign f_reversed_addr[LGSIZE] = f_const_addr[LGSIZE]; |
|
assign f_writing=(f_const_addr[LGSIZE-1]==iaddr[LGSIZE-1]); |
assign f_reading=(f_const_addr[LGSIZE-1]!=iaddr[LGSIZE-1]); |
initial f_addr_loaded_0 = 1'b0; |
initial f_addr_loaded_1 = 1'b0; |
initial f_addr_loaded = 1'b0; |
always @(posedge i_clk) |
if (i_reset) |
f_addr_loaded <= 1'b0; |
else if (i_ce) |
begin |
f_addr_loaded_0 <= 1'b0; |
f_addr_loaded_1 <= 1'b0; |
end else if (i_ce) |
begin |
if (iaddr == f_const_addr) |
begin |
f_addr_loaded_0 <= 1'b1; |
f_addr_loaded_1 <= 1'b1; |
end |
|
if (f_reading) |
begin |
if ((braddr == f_const_addr[LGSIZE-3:0]) |
&&(iaddr[LGSIZE-2] == 1'b0)) |
f_addr_loaded_0 <= 1'b0; |
|
if ((braddr == f_const_addr[LGSIZE-3:0]) |
&&(iaddr[LGSIZE-2] == 1'b1)) |
f_addr_loaded_1 <= 1'b0; |
end |
if (wraddr == f_const_addr) |
f_addr_loaded <= 1'b1; |
else if (rdaddr == f_const_addr) |
f_addr_loaded <= 1'b0; |
end |
|
always @(posedge i_clk) |
if ((i_ce)&&(iaddr == f_const_addr)) |
if ((i_ce)&&(wraddr == f_const_addr)) |
begin |
f_data_0 <= i_in_0; |
f_data_1 <= i_in_1; |
`ASSERT(!f_addr_loaded_0); |
`ASSERT(!f_addr_loaded_1); |
f_addr_value <= i_in; |
`ASSERT(!f_addr_loaded); |
end |
|
always @(posedge i_clk) |
if ((f_past_valid)&&(!$past(i_reset)) |
&&($past(f_addr_loaded_0))&&(!f_addr_loaded_0)) |
begin |
assert(!$past(iaddr[LGSIZE-2])); |
if (f_const_addr[LGSIZE-2]) |
assert(o_out_1 == f_data_0); |
else |
assert(o_out_0 == f_data_0); |
end |
&&($past(f_addr_loaded))&&(!f_addr_loaded)) |
assert(o_out == f_addr_value); |
|
always @(posedge i_clk) |
if ((f_past_valid)&&(!$past(i_reset)) |
&&($past(f_addr_loaded_1))&&(!f_addr_loaded_1)) |
begin |
assert($past(iaddr[LGSIZE-2])); |
if (f_const_addr[LGSIZE-2]) |
assert(o_out_1 == f_data_1); |
else |
assert(o_out_0 == f_data_1); |
end |
|
always @(*) |
`ASSERT(o_sync == ((iaddr[LGSIZE-2:0] == 1)&&(!in_reset))); |
if (o_sync) |
assert(wraddr[LGSIZE-1:0] == 1); |
|
// Before writing to a section, the loaded flags should be |
// zero |
always @(*) |
if (f_writing) |
begin |
`ASSERT(f_addr_loaded_0 == (iaddr[LGSIZE-2:0] |
> f_const_addr[LGSIZE-2:0])); |
`ASSERT(f_addr_loaded_1 == (iaddr[LGSIZE-2:0] |
> f_const_addr[LGSIZE-2:0])); |
end |
if ((wraddr[LGSIZE]==f_const_addr[LGSIZE]) |
&&(wraddr[LGSIZE-1:0] |
<= f_const_addr[LGSIZE-1:0])) |
`ASSERT(!f_addr_loaded); |
|
// If we were writing, and now we are reading, then both |
// f_addr_loaded flags must be set |
always @(posedge i_clk) |
if ((f_past_valid)&&(!$past(i_reset)) |
&&($past(f_writing))&&(f_reading)) |
begin |
`ASSERT(f_addr_loaded_0); |
`ASSERT(f_addr_loaded_1); |
end |
|
always @(*) |
if (f_writing) |
`ASSERT(f_addr_loaded_0 == f_addr_loaded_1); |
if ((rdaddr[LGSIZE]==f_const_addr[LGSIZE])&&(f_addr_loaded)) |
`ASSERT(wraddr[LGSIZE-1:0] |
<= f_reversed_addr[LGSIZE-1:0]+1); |
|
// When reading, and the loaded flag is zero, our pointer |
// must not have hit the address of interest yet |
always @(*) |
if ((!in_reset)&&(f_reading)) |
`ASSERT(f_addr_loaded_0 == |
((!iaddr[LGSIZE-2])&&(iaddr[LGSIZE-3:0] |
<= f_reversed_addr[LGSIZE-3:0]))); |
always @(*) |
if ((!in_reset)&&(f_reading)) |
`ASSERT(f_addr_loaded_1 == |
((!iaddr[LGSIZE-2])||(iaddr[LGSIZE-3:0] |
<= f_reversed_addr[LGSIZE-3:0]))); |
always @(*) |
if ((in_reset)&&(f_reading)) |
begin |
`ASSERT(!f_addr_loaded_0); |
`ASSERT(!f_addr_loaded_1); |
end |
if (f_addr_loaded) |
`ASSERT(brmem[f_const_addr] == f_addr_value); |
|
always @(*) |
if(iaddr[LGSIZE-1]) |
`ASSERT(!in_reset); |
|
always @(*) |
if (f_addr_loaded_0) |
`ASSERT(mem_e[f_const_addr] == f_data_0); |
always @(*) |
if (f_addr_loaded_1) |
`ASSERT(mem_o[f_const_addr] == f_data_1); |
|
|
`endif // FORMAL |
endmodule |
/rtl/butterfly.v
77,23 → 77,25
// |
// Copyright (C) 2015-2018, Gisselquist Technology, LLC |
// |
// This program is free software (firmware): you can redistribute it and/or |
// modify it under the terms of the GNU General Public License as published |
// by the Free Software Foundation, either version 3 of the License, or (at |
// your option) any later version. |
// This file is part of the general purpose pipelined FFT project. |
// |
// This program is distributed in the hope that it will be useful, but WITHOUT |
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or |
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
// for more details. |
// The pipelined FFT project is free software (firmware): you can redistribute |
// it and/or modify it under the terms of the GNU Lesser General Public License |
// as published by the Free Software Foundation, either version 3 of the |
// License, or (at your option) any later version. |
// |
// You should have received a copy of the GNU General Public License along |
// with this program. (It's in the $(ROOT)/doc directory, run make with no |
// target there if the PDF file isn't present.) If not, see |
// The pipelined FFT project is distributed in the hope that it will be useful, |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
// MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser |
// General Public License for more details. |
// |
// You should have received a copy of the GNU Lesser General Public License |
// along with this program. (It's in the $(ROOT)/doc directory. Run make |
// with no target there if the PDF file isn't present.) If not, see |
// <http://www.gnu.org/licenses/> for a copy. |
// |
// License: GPL, v3, as defined and found on www.gnu.org, |
// http://www.gnu.org/licenses/gpl.html |
// License: LGPL, v3, as defined and found on www.gnu.org, |
// http://www.gnu.org/licenses/lgpl.html |
// |
// |
//////////////////////////////////////////////////////////////////////////////// |
147,13 → 149,45
localparam MPYREMAINDER = MPYDELAY - CKPCE*(MPYDELAY/CKPCE); |
|
|
input i_clk, i_reset, i_ce; |
input [(2*CWIDTH-1):0] i_coef; |
input [(2*IWIDTH-1):0] i_left, i_right; |
input i_aux; |
input wire i_clk, i_reset, i_ce; |
input wire [(2*CWIDTH-1):0] i_coef; |
input wire [(2*IWIDTH-1):0] i_left, i_right; |
input wire i_aux; |
output wire [(2*OWIDTH-1):0] o_left, o_right; |
output reg o_aux; |
|
`ifdef FORMAL |
localparam F_LGDEPTH = (AUXLEN > 64) ? 7 |
: (AUXLEN > 32) ? 6 |
: (AUXLEN > 16) ? 5 |
: (AUXLEN > 8) ? 4 |
: (AUXLEN > 4) ? 3 : 2; |
|
localparam F_DEPTH = AUXLEN; |
localparam [F_LGDEPTH-1:0] F_D = F_DEPTH[F_LGDEPTH-1:0]-1; |
|
reg signed [IWIDTH-1:0] f_dlyleft_r [0:F_DEPTH-1]; |
reg signed [IWIDTH-1:0] f_dlyleft_i [0:F_DEPTH-1]; |
reg signed [IWIDTH-1:0] f_dlyright_r [0:F_DEPTH-1]; |
reg signed [IWIDTH-1:0] f_dlyright_i [0:F_DEPTH-1]; |
reg signed [CWIDTH-1:0] f_dlycoeff_r [0:F_DEPTH-1]; |
reg signed [CWIDTH-1:0] f_dlycoeff_i [0:F_DEPTH-1]; |
reg signed [F_DEPTH-1:0] f_dlyaux; |
|
wire signed [IWIDTH:0] f_predifr, f_predifi; |
wire signed [IWIDTH+CWIDTH+3-1:0] f_predifrx, f_predifix; |
wire signed [CWIDTH:0] f_sumcoef; |
wire signed [IWIDTH+1:0] f_sumdiff; |
wire signed [IWIDTH:0] f_sumr, f_sumi; |
wire signed [IWIDTH+CWIDTH+3-1:0] f_sumrx, f_sumix; |
wire signed [IWIDTH:0] f_difr, f_difi; |
wire signed [IWIDTH+CWIDTH+3-1:0] f_difrx, f_difix; |
wire signed [IWIDTH+CWIDTH+3-1:0] f_widecoeff_r, f_widecoeff_i; |
|
wire [(CWIDTH):0] fp_one_ic, fp_two_ic, fp_three_ic, f_p3c_in; |
wire [(IWIDTH+1):0] fp_one_id, fp_two_id, fp_three_id, f_p3d_in; |
`endif |
|
reg [(2*IWIDTH-1):0] r_left, r_right; |
reg [(2*CWIDTH-1):0] r_coef, r_coef_2; |
wire signed [(IWIDTH-1):0] r_left_r, r_left_i, r_right_r, r_right_i; |
171,20 → 205,20
|
// Set up the input to the multiply |
always @(posedge i_clk) |
if (i_ce) |
begin |
// One clock just latches the inputs |
r_left <= i_left; // No change in # of bits |
r_right <= i_right; |
r_coef <= i_coef; |
// Next clock adds/subtracts |
r_sum_r <= r_left_r + r_right_r; // Now IWIDTH+1 bits |
r_sum_i <= r_left_i + r_right_i; |
r_dif_r <= r_left_r - r_right_r; |
r_dif_i <= r_left_i - r_right_i; |
// Other inputs are simply delayed on second clock |
r_coef_2<= r_coef; |
end |
if (i_ce) |
begin |
// One clock just latches the inputs |
r_left <= i_left; // No change in # of bits |
r_right <= i_right; |
r_coef <= i_coef; |
// Next clock adds/subtracts |
r_sum_r <= r_left_r + r_right_r; // Now IWIDTH+1 bits |
r_sum_i <= r_left_i + r_right_i; |
r_dif_r <= r_left_r - r_right_r; |
r_dif_i <= r_left_i - r_right_i; |
// Other inputs are simply delayed on second clock |
r_coef_2<= r_coef; |
end |
|
// Don't forget to record the even side, since it doesn't need |
// to be multiplied, but yet we still need the results in sync |
191,17 → 225,17
// with the answer when it is ready. |
initial fifo_addr = 0; |
always @(posedge i_clk) |
if (i_reset) |
fifo_addr <= 0; |
else if (i_ce) |
// Need to delay the sum side--nothing else happens |
// to it, but it needs to stay synchronized with the |
// right side. |
fifo_addr <= fifo_addr + 1; |
if (i_reset) |
fifo_addr <= 0; |
else if (i_ce) |
// Need to delay the sum side--nothing else happens |
// to it, but it needs to stay synchronized with the |
// right side. |
fifo_addr <= fifo_addr + 1; |
|
always @(posedge i_clk) |
if (i_ce) |
fifo_left[fifo_addr] <= { r_sum_r, r_sum_i }; |
if (i_ce) |
fifo_left[fifo_addr] <= { r_sum_r, r_sum_i }; |
|
wire signed [(CWIDTH-1):0] ir_coef_r, ir_coef_i; |
assign ir_coef_r = r_coef_2[(2*CWIDTH-1):CWIDTH]; |
253,12 → 287,24
// simpler, multiply. |
longbimpy #(CWIDTH+1,IWIDTH+2) p1(i_clk, i_ce, |
{ir_coef_r[CWIDTH-1],ir_coef_r}, |
{r_dif_r[IWIDTH],r_dif_r}, p_one); |
{r_dif_r[IWIDTH],r_dif_r}, p_one |
`ifdef FORMAL |
, fp_one_ic, fp_one_id |
`endif |
); |
longbimpy #(CWIDTH+1,IWIDTH+2) p2(i_clk, i_ce, |
{ir_coef_i[CWIDTH-1],ir_coef_i}, |
{r_dif_i[IWIDTH],r_dif_i}, p_two); |
{r_dif_i[IWIDTH],r_dif_i}, p_two |
`ifdef FORMAL |
, fp_two_ic, fp_two_id |
`endif |
); |
longbimpy #(CWIDTH+1,IWIDTH+2) p3(i_clk, i_ce, |
p3c_in, p3d_in, p_three); |
p3c_in, p3d_in, p_three |
`ifdef FORMAL |
, fp_three_ic, fp_three_id |
`endif |
); |
|
end else if (CKPCE == 2) |
begin : CKPCE_TWO |
281,7 → 327,19
reg signed [(CWIDTH+IWIDTH+3)-1:0] mpy_pipe_out; |
reg signed [IWIDTH+CWIDTH+3-1:0] longmpy; |
|
`ifdef FORMAL |
wire [CWIDTH:0] f_past_ic; |
wire [IWIDTH+1:0] f_past_id; |
wire [CWIDTH:0] f_past_mux_ic; |
wire [IWIDTH+1:0] f_past_mux_id; |
|
reg [CWIDTH:0] f_rpone_ic, f_rptwo_ic, f_rpthree_ic, |
f_rp2one_ic, f_rp2two_ic, f_rp2three_ic; |
reg [IWIDTH+1:0] f_rpone_id, f_rptwo_id, f_rpthree_id, |
f_rp2one_id, f_rp2two_id, f_rp2three_id; |
`endif |
|
|
initial ce_phase = 1'b0; |
always @(posedge i_clk) |
if (i_reset) |
314,12 → 372,20
end |
|
longbimpy #(CWIDTH+1,IWIDTH+2) mpy0(i_clk, mpy_pipe_v, |
mpy_cof_sum, mpy_dif_sum, longmpy); |
mpy_cof_sum, mpy_dif_sum, longmpy |
`ifdef FORMAL |
, f_past_ic, f_past_id |
`endif |
); |
|
longbimpy #(CWIDTH+1,IWIDTH+2) mpy1(i_clk, mpy_pipe_v, |
{ mpy_pipe_vc[CWIDTH-1], mpy_pipe_vc }, |
{ mpy_pipe_vd[IWIDTH ], mpy_pipe_vd }, |
mpy_pipe_out); |
mpy_pipe_out |
`ifdef FORMAL |
, f_past_mux_ic, f_past_mux_id |
`endif |
); |
|
reg signed [((IWIDTH+2)+(CWIDTH+1)-1):0] |
rp_one, rp_two, rp_three, |
328,28 → 394,57
always @(posedge i_clk) |
if (((i_ce)&&(!MPYDELAY[0])) |
||((ce_phase)&&(MPYDELAY[0]))) |
begin |
rp_one <= mpy_pipe_out; |
`ifdef FORMAL |
f_rpone_ic <= f_past_mux_ic; |
f_rpone_id <= f_past_mux_id; |
`endif |
end |
|
always @(posedge i_clk) |
if (((i_ce)&&(MPYDELAY[0])) |
||((ce_phase)&&(!MPYDELAY[0]))) |
begin |
rp_two <= mpy_pipe_out; |
`ifdef FORMAL |
f_rptwo_ic <= f_past_mux_ic; |
f_rptwo_id <= f_past_mux_id; |
`endif |
end |
|
always @(posedge i_clk) |
if (i_ce) |
begin |
rp_three <= longmpy; |
`ifdef FORMAL |
f_rpthree_ic <= f_past_ic; |
f_rpthree_id <= f_past_id; |
`endif |
end |
|
|
// Our outputs *MUST* be set on a clock where i_ce is |
// true for the following logic to work. Make that |
// happen here. |
always @(posedge i_clk) |
if (i_ce) |
begin |
rp2_one<= rp_one; |
always @(posedge i_clk) |
if (i_ce) |
rp2_two <= rp_two; |
always @(posedge i_clk) |
if (i_ce) |
rp2_three<= rp_three; |
`ifdef FORMAL |
f_rp2one_ic <= f_rpone_ic; |
f_rp2one_id <= f_rpone_id; |
|
f_rp2two_ic <= f_rptwo_ic; |
f_rp2two_id <= f_rptwo_id; |
|
f_rp2three_ic <= f_rpthree_ic; |
f_rp2three_id <= f_rpthree_id; |
`endif |
end |
|
assign p_one = rp2_one; |
assign p_two = (!MPYDELAY[0])? rp2_two : rp_two; |
assign p_three = ( MPYDELAY[0])? rp_three : rp2_three; |
359,6 → 454,17
assign unused = { rp2_two, rp2_three }; |
// verilator lint_on UNUSED |
|
`ifdef FORMAL |
assign fp_one_ic = f_rp2one_ic; |
assign fp_one_id = f_rp2one_id; |
|
assign fp_two_ic = (!MPYDELAY[0])? f_rp2two_ic : f_rptwo_ic; |
assign fp_two_id = (!MPYDELAY[0])? f_rp2two_id : f_rptwo_id; |
|
assign fp_three_ic= (MPYDELAY[0])? f_rpthree_ic : f_rp2three_ic; |
assign fp_three_id= (MPYDELAY[0])? f_rpthree_id : f_rp2three_id; |
`endif |
|
end else if (CKPCE <= 3) |
begin : CKPCE_THREE |
// Coefficient multiply inputs |
376,6 → 482,18
|
reg signed [ (CWIDTH+IWIDTH+3)-1:0] mpy_pipe_out; |
|
`ifdef FORMAL |
wire [CWIDTH:0] f_past_ic; |
wire [IWIDTH+1:0] f_past_id; |
|
reg [CWIDTH:0] f_rpone_ic, f_rptwo_ic, f_rpthree_ic, |
f_rp2one_ic, f_rp2two_ic, f_rp2three_ic, |
f_rp3one_ic; |
reg [IWIDTH+1:0] f_rpone_id, f_rptwo_id, f_rpthree_id, |
f_rp2one_id, f_rp2two_id, f_rp2three_id, |
f_rp3one_id; |
`endif |
|
initial ce_phase = 3'b011; |
always @(posedge i_clk) |
if (i_reset) |
389,28 → 507,32
mpy_pipe_v = (i_ce)||(ce_phase < 3'b010); |
|
always @(posedge i_clk) |
if (ce_phase == 3'b000) |
begin |
// Second clock |
mpy_pipe_c[3*(CWIDTH+1)-1:(CWIDTH+1)] <= { |
ir_coef_r[CWIDTH-1], ir_coef_r, |
ir_coef_i[CWIDTH-1], ir_coef_i }; |
mpy_pipe_c[CWIDTH:0] <= ir_coef_i + ir_coef_r; |
mpy_pipe_d[3*(IWIDTH+2)-1:(IWIDTH+2)] <= { |
r_dif_r[IWIDTH], r_dif_r, |
r_dif_i[IWIDTH], r_dif_i }; |
mpy_pipe_d[(IWIDTH+2)-1:0] <= r_dif_r + r_dif_i; |
if (ce_phase == 3'b000) |
begin |
// Second clock |
mpy_pipe_c[3*(CWIDTH+1)-1:(CWIDTH+1)] <= { |
ir_coef_r[CWIDTH-1], ir_coef_r, |
ir_coef_i[CWIDTH-1], ir_coef_i }; |
mpy_pipe_c[CWIDTH:0] <= ir_coef_i + ir_coef_r; |
mpy_pipe_d[3*(IWIDTH+2)-1:(IWIDTH+2)] <= { |
r_dif_r[IWIDTH], r_dif_r, |
r_dif_i[IWIDTH], r_dif_i }; |
mpy_pipe_d[(IWIDTH+2)-1:0] <= r_dif_r + r_dif_i; |
|
end else if (mpy_pipe_v) |
begin |
mpy_pipe_c[3*(CWIDTH+1)-1:0] <= { |
mpy_pipe_c[2*(CWIDTH+1)-1:0], {(CWIDTH+1){1'b0}} }; |
mpy_pipe_d[3*(IWIDTH+2)-1:0] <= { |
mpy_pipe_d[2*(IWIDTH+2)-1:0], {(IWIDTH+2){1'b0}} }; |
end |
end else if (mpy_pipe_v) |
begin |
mpy_pipe_c[3*(CWIDTH+1)-1:0] <= { |
mpy_pipe_c[2*(CWIDTH+1)-1:0], {(CWIDTH+1){1'b0}} }; |
mpy_pipe_d[3*(IWIDTH+2)-1:0] <= { |
mpy_pipe_d[2*(IWIDTH+2)-1:0], {(IWIDTH+2){1'b0}} }; |
end |
|
longbimpy #(CWIDTH+1,IWIDTH+2) mpy(i_clk, mpy_pipe_v, |
mpy_pipe_vc, mpy_pipe_vd, mpy_pipe_out); |
mpy_pipe_vc, mpy_pipe_vd, mpy_pipe_out |
`ifdef FORMAL |
, f_past_ic, f_past_id |
`endif |
); |
|
reg signed [((IWIDTH+2)+(CWIDTH+1)-1):0] |
rp_one, rp_two, rp_three, |
422,32 → 544,77
begin |
|
if (i_ce) |
begin |
rp_two <= mpy_pipe_out; |
else if (ce_phase == 3'b000) |
`ifdef FORMAL |
f_rptwo_ic <= f_past_ic; |
f_rptwo_id <= f_past_id; |
`endif |
end else if (ce_phase == 3'b000) |
begin |
rp_three <= mpy_pipe_out; |
else if (ce_phase == 3'b001) |
`ifdef FORMAL |
f_rpthree_ic <= f_past_ic; |
f_rpthree_id <= f_past_id; |
`endif |
end else if (ce_phase == 3'b001) |
begin |
rp_one <= mpy_pipe_out; |
|
`ifdef FORMAL |
f_rpone_ic <= f_past_ic; |
f_rpone_id <= f_past_id; |
`endif |
end |
end else if (MPYREMAINDER == 1) |
begin |
|
if (i_ce) |
begin |
rp_one <= mpy_pipe_out; |
else if (ce_phase == 3'b000) |
`ifdef FORMAL |
f_rpone_ic <= f_past_ic; |
f_rpone_id <= f_past_id; |
`endif |
end else if (ce_phase == 3'b000) |
begin |
rp_two <= mpy_pipe_out; |
else if (ce_phase == 3'b001) |
`ifdef FORMAL |
f_rptwo_ic <= f_past_ic; |
f_rptwo_id <= f_past_id; |
`endif |
end else if (ce_phase == 3'b001) |
begin |
rp_three <= mpy_pipe_out; |
|
`ifdef FORMAL |
f_rpthree_ic <= f_past_ic; |
f_rpthree_id <= f_past_id; |
`endif |
end |
end else // if (MPYREMAINDER == 2) |
begin |
|
if (i_ce) |
begin |
rp_three <= mpy_pipe_out; |
else if (ce_phase == 3'b000) |
`ifdef FORMAL |
f_rpthree_ic <= f_past_ic; |
f_rpthree_id <= f_past_id; |
`endif |
end else if (ce_phase == 3'b000) |
begin |
rp_one <= mpy_pipe_out; |
else if (ce_phase == 3'b001) |
`ifdef FORMAL |
f_rpone_ic <= f_past_ic; |
f_rpone_id <= f_past_id; |
`endif |
end else if (ce_phase == 3'b001) |
begin |
rp_two <= mpy_pipe_out; |
|
`ifdef FORMAL |
f_rptwo_ic <= f_past_ic; |
f_rptwo_id <= f_past_id; |
`endif |
end |
end |
|
always @(posedge i_clk) |
457,11 → 624,35
rp2_two <= rp_two; |
rp2_three <= (MPYREMAINDER == 2) ? mpy_pipe_out : rp_three; |
rp3_one <= (MPYREMAINDER == 0) ? rp2_one : rp_one; |
`ifdef FORMAL |
f_rp2one_ic <= f_rpone_ic; |
f_rp2one_id <= f_rpone_id; |
|
f_rp2two_ic <= f_rptwo_ic; |
f_rp2two_id <= f_rptwo_id; |
|
f_rp2three_ic <= (MPYREMAINDER==2) ? f_past_ic : f_rpthree_ic; |
f_rp2three_id <= (MPYREMAINDER==2) ? f_past_id : f_rpthree_id; |
f_rp3one_ic <= (MPYREMAINDER==0) ? f_rp2one_ic : f_rpone_ic; |
f_rp3one_id <= (MPYREMAINDER==0) ? f_rp2one_id : f_rpone_id; |
`endif |
end |
|
assign p_one = rp3_one; |
assign p_two = rp2_two; |
assign p_three = rp2_three; |
|
`ifdef FORMAL |
assign fp_one_ic = f_rp3one_ic; |
assign fp_one_id = f_rp3one_id; |
|
assign fp_two_ic = f_rp2two_ic; |
assign fp_two_id = f_rp2two_id; |
|
assign fp_three_ic = f_rp2three_ic; |
assign fp_three_id = f_rp2three_id; |
`endif |
|
end endgenerate |
// These values are held in memory and delayed during the |
// multiply. Here, we recover them. During the multiply, |
471,8 → 662,10
// extension. |
wire signed [(IWIDTH+CWIDTH):0] fifo_i, fifo_r; |
reg [(2*IWIDTH+1):0] fifo_read; |
assign fifo_r = { {2{fifo_read[2*(IWIDTH+1)-1]}}, fifo_read[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1'b0}} }; |
assign fifo_i = { {2{fifo_read[(IWIDTH+1)-1]}}, fifo_read[((IWIDTH+1)-1):0], {(CWIDTH-2){1'b0}} }; |
assign fifo_r = { {2{fifo_read[2*(IWIDTH+1)-1]}}, |
fifo_read[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1'b0}} }; |
assign fifo_i = { {2{fifo_read[(IWIDTH+1)-1]}}, |
fifo_read[((IWIDTH+1)-1):0], {(CWIDTH-2){1'b0}} }; |
|
|
reg signed [(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i; |
521,35 → 714,35
mpy_i, rnd_right_i); |
|
always @(posedge i_clk) |
if (i_ce) |
begin |
// First clock, recover all values |
fifo_read <= fifo_left[fifo_read_addr]; |
// These values are IWIDTH+CWIDTH+3 bits wide |
// although they only need to be (IWIDTH+1) |
// + (CWIDTH) bits wide. (We've got two |
// extra bits we need to get rid of.) |
mpy_r <= p_one - p_two; |
mpy_i <= p_three - p_one - p_two; |
end |
if (i_ce) |
begin |
// First clock, recover all values |
fifo_read <= fifo_left[fifo_read_addr]; |
// These values are IWIDTH+CWIDTH+3 bits wide |
// although they only need to be (IWIDTH+1) |
// + (CWIDTH) bits wide. (We've got two |
// extra bits we need to get rid of.) |
mpy_r <= p_one - p_two; |
mpy_i <= p_three - p_one - p_two; |
end |
|
reg [(AUXLEN-1):0] aux_pipeline; |
initial aux_pipeline = 0; |
always @(posedge i_clk) |
if (i_reset) |
aux_pipeline <= 0; |
else if (i_ce) |
aux_pipeline <= { aux_pipeline[(AUXLEN-2):0], i_aux }; |
if (i_reset) |
aux_pipeline <= 0; |
else if (i_ce) |
aux_pipeline <= { aux_pipeline[(AUXLEN-2):0], i_aux }; |
|
initial o_aux = 1'b0; |
always @(posedge i_clk) |
if (i_reset) |
o_aux <= 1'b0; |
else if (i_ce) |
begin |
// Second clock, latch for final clock |
o_aux <= aux_pipeline[AUXLEN-1]; |
end |
if (i_reset) |
o_aux <= 1'b0; |
else if (i_ce) |
begin |
// Second clock, latch for final clock |
o_aux <= aux_pipeline[AUXLEN-1]; |
end |
|
// As a final step, we pack our outputs into two packed two's |
// complement numbers per output word, so that each output word |
558,27 → 751,7
assign o_left = { rnd_left_r, rnd_left_i }; |
assign o_right= { rnd_right_r,rnd_right_i}; |
|
`ifdef VERILATOR |
`define FORMAL |
`endif |
`ifdef FORMAL |
localparam F_LGDEPTH = (AUXLEN > 64) ? 7 |
: (AUXLEN > 32) ? 6 |
: (AUXLEN > 16) ? 5 |
: (AUXLEN > 8) ? 4 |
: (AUXLEN > 4) ? 3 : 2; |
|
localparam F_DEPTH = AUXLEN; |
localparam [F_LGDEPTH-1:0] F_D = F_DEPTH[F_LGDEPTH-1:0]-1; |
|
reg signed [IWIDTH-1:0] f_dlyleft_r [0:F_DEPTH-1]; |
reg signed [IWIDTH-1:0] f_dlyleft_i [0:F_DEPTH-1]; |
reg signed [IWIDTH-1:0] f_dlyright_r [0:F_DEPTH-1]; |
reg signed [IWIDTH-1:0] f_dlyright_i [0:F_DEPTH-1]; |
reg signed [CWIDTH-1:0] f_dlycoeff_r [0:F_DEPTH-1]; |
reg signed [CWIDTH-1:0] f_dlycoeff_i [0:F_DEPTH-1]; |
reg signed [F_DEPTH-1:0] f_dlyaux; |
|
initial f_dlyaux[0] = 0; |
always @(posedge i_clk) |
if (i_reset) |
616,30 → 789,77
end endgenerate |
|
`ifndef VERILATOR |
always @(posedge i_clk) |
if ((!$past(i_ce))&&(!$past(i_ce,2))&&(!$past(i_ce,3)) |
&&(!$past(i_ce,4))) |
assume(i_ce); |
|
// |
// Make some i_ce restraining assumptions. These are necessary |
// to get the design to pass induction. |
// |
generate if (CKPCE <= 1) |
begin |
|
// i_ce is allowed to be anything in this mode |
// No primary i_ce assumption. i_ce can be anything |
// |
// First induction i_ce assumption: No more than one |
// empty cycle between used cycles. Without this |
// assumption, or one like it, induction would never |
// complete. |
always @(posedge i_clk) |
if ((!$past(i_ce))) |
assume(i_ce); |
|
// Second induction i_ce assumption: avoid skipping an |
// i_ce and thus stretching out the i_ce cycle two i_ce |
// cycles in a row. Without this assumption, induction |
// would still complete, it would just take longer |
always @(posedge i_clk) |
if (($past(i_ce))&&(!$past(i_ce,2))) |
assume(i_ce); |
|
end else if (CKPCE == 2) |
begin : F_CKPCE_TWO |
|
// Primary i_ce assumption: Every i_ce cycle is followed |
// by a non-i_ce cycle, so the multiplies can be |
// multiplexed |
always @(posedge i_clk) |
if ($past(i_ce)) |
assume(!i_ce); |
if ($past(i_ce)) |
assume(!i_ce); |
// First induction assumption: Don't let this stretch |
// out too far. This is necessary to pass induction |
always @(posedge i_clk) |
if ((!$past(i_ce))&&(!$past(i_ce,2))) |
assume(i_ce); |
|
always @(posedge i_clk) |
if ((!$past(i_ce))&&($past(i_ce,2)) |
&&(!$past(i_ce,3))&&(!$past(i_ce,4))) |
assume(i_ce); |
|
end else if (CKPCE == 3) |
begin : F_CKPCE_THREE |
|
// Primary i_ce assumption: Following any i_ce cycle, |
// there must be two clock cycles with i_ce de-asserted |
always @(posedge i_clk) |
if (($past(i_ce))||($past(i_ce,2))) |
assume(!i_ce); |
if (($past(i_ce))||($past(i_ce,2))) |
assume(!i_ce); |
|
// Induction assumption: Allow i_ce's every third or |
// fourth clock, but don't allow them to be separated |
// further than that |
always @(posedge i_clk) |
if ((!$past(i_ce))&&(!$past(i_ce,2))&&(!$past(i_ce,3))) |
assume(i_ce); |
|
// Second induction assumption, to speed up the proof: |
// If it's the earliest possible opportunity for an |
// i_ce, and the last i_ce was late, don't let this one |
// be late as well. |
always @(posedge i_clk) |
if ((!$past(i_ce))&&(!$past(i_ce,2)) |
&&($past(i_ce,3))&&(!$past(i_ce,4)) |
&&(!$past(i_ce,5))&&(!$past(i_ce,6))) |
assume(i_ce); |
|
end endgenerate |
`endif |
|
651,7 → 871,6
else if ((i_ce)&&(!(&f_startup_counter))) |
f_startup_counter <= f_startup_counter + 1; |
|
wire signed [IWIDTH:0] f_sumr, f_sumi; |
always @(*) |
begin |
f_sumr = f_dlyleft_r[F_D] + f_dlyright_r[F_D]; |
658,11 → 877,9
f_sumi = f_dlyleft_i[F_D] + f_dlyright_i[F_D]; |
end |
|
wire signed [IWIDTH+CWIDTH+3-1:0] f_sumrx, f_sumix; |
assign f_sumrx = { {(4){f_sumr[IWIDTH]}}, f_sumr, {(CWIDTH-2){1'b0}} }; |
assign f_sumix = { {(4){f_sumi[IWIDTH]}}, f_sumi, {(CWIDTH-2){1'b0}} }; |
|
wire signed [IWIDTH:0] f_difr, f_difi; |
always @(*) |
begin |
f_difr = f_dlyleft_r[F_D] - f_dlyright_r[F_D]; |
669,11 → 886,9
f_difi = f_dlyleft_i[F_D] - f_dlyright_i[F_D]; |
end |
|
wire signed [IWIDTH+CWIDTH+3-1:0] f_difrx, f_difix; |
assign f_difrx = { {(CWIDTH+2){f_difr[IWIDTH]}}, f_difr }; |
assign f_difix = { {(CWIDTH+2){f_difi[IWIDTH]}}, f_difi }; |
|
wire signed [IWIDTH+CWIDTH+3-1:0] f_widecoeff_r, f_widecoeff_i; |
assign f_widecoeff_r ={ {(IWIDTH+3){f_dlycoeff_r[F_D][CWIDTH-1]}}, |
f_dlycoeff_r[F_D] }; |
assign f_widecoeff_i ={ {(IWIDTH+3){f_dlycoeff_i[F_D][CWIDTH-1]}}, |
729,7 → 944,6
// otherwise |
|
|
wire signed [IWIDTH:0] f_predifr, f_predifi; |
always @(*) |
begin |
f_predifr = f_dlyleft_r[F_D-1] - f_dlyright_r[F_D-1]; |
736,12 → 950,9
f_predifi = f_dlyleft_i[F_D-1] - f_dlyright_i[F_D-1]; |
end |
|
wire signed [IWIDTH+CWIDTH+3-1:0] f_predifrx, f_predifix; |
assign f_predifrx = { {(CWIDTH+2){f_predifr[IWIDTH]}}, f_predifr }; |
assign f_predifix = { {(CWIDTH+2){f_predifi[IWIDTH]}}, f_predifi }; |
|
wire signed [CWIDTH:0] f_sumcoef; |
wire signed [IWIDTH+1:0] f_sumdiff; |
always @(*) |
begin |
f_sumcoef = f_dlycoeff_r[F_D-1] + f_dlycoeff_i[F_D-1]; |
785,6 → 996,8
assert(p_three == f_sumcoef); |
// verilator lint_on WIDTH |
`ifdef VERILATOR |
// Check that the multiplies match--but *ONLY* if using |
// Verilator, and not if using formal proper |
assert(p_one == f_predifr * f_dlycoeff_r[F_D-1]); |
assert(p_two == f_predifi * f_dlycoeff_i[F_D-1]); |
assert(p_three == f_sumdiff * f_sumcoef); |
791,6 → 1004,28
`endif // VERILATOR |
end |
|
// The following logic formally insists that our version of the |
// inputs to the multiply matches what the (multiclock) multiply |
// thinks its inputs were. While this may seem redundant, the |
// proof will not complete in any reasonable amount of time |
// without these assertions. |
|
assign f_p3c_in = f_dlycoeff_i[F_D-1] + f_dlycoeff_r[F_D-1]; |
assign f_p3d_in = f_predifi + f_predifr; |
|
always @(*) |
if (f_startup_counter >= { 1'b0, F_D }) |
begin |
assert(fp_one_ic == { f_dlycoeff_r[F_D-1][CWIDTH-1], |
f_dlycoeff_r[F_D-1][CWIDTH-1:0] }); |
assert(fp_two_ic == { f_dlycoeff_i[F_D-1][CWIDTH-1], |
f_dlycoeff_i[F_D-1][CWIDTH-1:0] }); |
assert(fp_one_id == { f_predifr[IWIDTH], f_predifr }); |
assert(fp_two_id == { f_predifi[IWIDTH], f_predifi }); |
assert(fp_three_ic == f_p3c_in); |
assert(fp_three_id == f_p3d_in); |
end |
|
// F_CHECK will be set externally by the solver, so that we can |
// double check that the solver is actually testing what we think |
// it is testing. We'll set it here to MPYREMAINDER, which will |
/rtl/convround.v
19,23 → 19,25
// |
// Copyright (C) 2015-2018, Gisselquist Technology, LLC |
// |
// This program is free software (firmware): you can redistribute it and/or |
// modify it under the terms of the GNU General Public License as published |
// by the Free Software Foundation, either version 3 of the License, or (at |
// your option) any later version. |
// This file is part of the general purpose pipelined FFT project. |
// |
// This program is distributed in the hope that it will be useful, but WITHOUT |
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or |
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
// for more details. |
// The pipelined FFT project is free software (firmware): you can redistribute |
// it and/or modify it under the terms of the GNU Lesser General Public License |
// as published by the Free Software Foundation, either version 3 of the |
// License, or (at your option) any later version. |
// |
// You should have received a copy of the GNU General Public License along |
// with this program. (It's in the $(ROOT)/doc directory, run make with no |
// target there if the PDF file isn't present.) If not, see |
// The pipelined FFT project is distributed in the hope that it will be useful, |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
// MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser |
// General Public License for more details. |
// |
// You should have received a copy of the GNU Lesser General Public License |
// along with this program. (It's in the $(ROOT)/doc directory. Run make |
// with no target there if the PDF file isn't present.) If not, see |
// <http://www.gnu.org/licenses/> for a copy. |
// |
// License: GPL, v3, as defined and found on www.gnu.org, |
// http://www.gnu.org/licenses/gpl.html |
// License: LGPL, v3, as defined and found on www.gnu.org, |
// http://www.gnu.org/licenses/lgpl.html |
// |
// |
//////////////////////////////////////////////////////////////////////////////// |
45,8 → 47,8
// |
module convround(i_clk, i_ce, i_val, o_val); |
parameter IWID=16, OWID=8, SHIFT=0; |
input i_clk, i_ce; |
input signed [(IWID-1):0] i_val; |
input wire i_clk, i_ce; |
input wire signed [(IWID-1):0] i_val; |
output reg signed [(OWID-1):0] o_val; |
|
// Let's deal with three cases to be as general as we can be here |
/rtl/fftmain.v
8,8 → 8,8
// implementation. As such, all other modules are subordinate |
// to this one. This module accomplish a fixed size Complex FFT on |
// 2048 data points. |
// The FFT is fully pipelined, and accepts as inputs two complex two's |
// complement samples per clock. |
// The FFT is fully pipelined, and accepts as inputs one complex two's |
// complement sample per clock. |
// |
// Parameters: |
// i_clk The clock. All operations are synchronous with this clock. |
18,31 → 18,27
// Further, following a reset, the o_sync line will go |
// high the same time the first output sample is valid. |
// i_ce A clock enable line. If this line is set, this module |
// will accept two complex values as inputs, and produce |
// two (possibly empty) complex values as outputs. |
// i_left The first of two complex input samples. This value is split |
// will accept one complex input value, and produce |
// one (possibly empty) complex output value. |
// i_sample The complex input sample. This value is split |
// into two two's complement numbers, 15 bits each, with |
// the real portion in the high order bits, and the |
// imaginary portion taking the bottom 15 bits. |
// i_right This is the same thing as i_left, only this is the second of |
// two such samples. Hence, i_left would contain input |
// sample zero, i_right would contain sample one. On the |
// next clock i_left would contain input sample two, |
// i_right number three and so forth. |
// o_left The first of two output samples, of the same format as i_left, |
// o_result The output result, of the same format as i_sample, |
// only having 21 bits for each of the real and imaginary |
// components, leading to 42 bits total. |
// o_right The second of two output samples produced each clock. This has |
// the same format as o_left. |
// o_sync A one bit output indicating the first valid sample produced by |
// this FFT following a reset. Ever after, this will |
// indicate the first sample of an FFT frame. |
// o_sync A one bit output indicating the first sample of the FFT frame. |
// It also indicates the first valid sample out of the FFT |
// on the first frame. |
// |
// Arguments: This file was computer generated using the following command |
// line: |
// |
// % ./fftgen -v -d ../rtl -f 2048 -2 -p 0 -n 15 -a ../bench/cpp/fftsize.h |
// % ./fftgen -v -d ../rtl -f 2048 -1 -k 1 -p 0 -n 15 -a ../bench/cpp/fftsize.h |
// |
// This core will use hardware accelerated multiplies (DSPs) |
// for 0 of the 11 stages |
// |
// Creator: Dan Gisselquist, Ph.D. |
// Gisselquist Technology, LLC |
// |
50,23 → 46,25
// |
// Copyright (C) 2015-2018, Gisselquist Technology, LLC |
// |
// This program is free software (firmware): you can redistribute it and/or |
// modify it under the terms of the GNU General Public License as published |
// by the Free Software Foundation, either version 3 of the License, or (at |
// your option) any later version. |
// This file is part of the general purpose pipelined FFT project. |
// |
// This program is distributed in the hope that it will be useful, but WITHOUT |
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or |
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
// for more details. |
// The pipelined FFT project is free software (firmware): you can redistribute |
// it and/or modify it under the terms of the GNU Lesser General Public License |
// as published by the Free Software Foundation, either version 3 of the |
// License, or (at your option) any later version. |
// |
// You should have received a copy of the GNU General Public License along |
// with this program. (It's in the $(ROOT)/doc directory, run make with no |
// target there if the PDF file isn't present.) If not, see |
// The pipelined FFT project is distributed in the hope that it will be useful, |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
// MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser |
// General Public License for more details. |
// |
// You should have received a copy of the GNU Lesser General Public License |
// along with this program. (It's in the $(ROOT)/doc directory. Run make |
// with no target there if the PDF file isn't present.) If not, see |
// <http://www.gnu.org/licenses/> for a copy. |
// |
// License: GPL, v3, as defined and found on www.gnu.org, |
// http://www.gnu.org/licenses/gpl.html |
// License: LGPL, v3, as defined and found on www.gnu.org, |
// http://www.gnu.org/licenses/lgpl.html |
// |
// |
//////////////////////////////////////////////////////////////////////////////// |
77,166 → 75,102
// |
// |
module fftmain(i_clk, i_reset, i_ce, |
i_left, i_right, |
o_left, o_right, o_sync); |
parameter IWIDTH=15, OWIDTH=21, LGWIDTH=11; |
i_sample, o_result, o_sync); |
// The bit-width of the input, IWIDTH, output, OWIDTH, and the log |
// of the FFT size. These are localparams, rather than parameters, |
// because once the core has been generated, they can no longer be |
// changed. (These values can be adjusted by running the core |
// generator again.) The reason is simply that these values have |
// been hardwired into the core at several places. |
localparam IWIDTH=15, OWIDTH=21, LGWIDTH=11; |
// |
input i_clk, i_reset, i_ce; |
input wire i_clk, i_reset, i_ce; |
// |
input [(2*IWIDTH-1):0] i_left, i_right; |
output reg [(2*OWIDTH-1):0] o_left, o_right; |
input wire [(2*IWIDTH-1):0] i_sample; |
output reg [(2*OWIDTH-1):0] o_result; |
output reg o_sync; |
|
|
// Outputs of the FFT, ready for bit reversal. |
wire [(2*OWIDTH-1):0] br_left, br_right; |
wire [(2*OWIDTH-1):0] br_sample; |
|
|
wire w_s2048; |
// verilator lint_off UNUSED |
wire w_os2048; |
// verilator lint_on UNUSED |
wire [31:0] w_e2048, w_o2048; |
fftstage #(IWIDTH,IWIDTH+4,16,11,9,0, |
0, 1, "cmem_e4096.hex") |
stage_e2048(i_clk, i_reset, i_ce, |
(!i_reset), i_left, w_e2048, w_s2048); |
fftstage #(IWIDTH,IWIDTH+4,16,11,9,0, |
0, 1, "cmem_o4096.hex") |
stage_o2048(i_clk, i_reset, i_ce, |
(!i_reset), i_right, w_o2048, w_os2048); |
wire [31:0] w_d2048; |
fftstage #(IWIDTH,IWIDTH+4,16,10,0, |
0, 1, "cmem_2048.hex") |
stage_2048(i_clk, i_reset, i_ce, |
(!i_reset), i_sample, w_d2048, w_s2048); |
|
|
wire w_s1024; |
// verilator lint_off UNUSED |
wire w_os1024; |
// verilator lint_on UNUSED |
wire [33:0] w_e1024, w_o1024; |
fftstage #(16,20,17,11,8,0, |
0, 1, "cmem_e2048.hex") |
stage_e1024(i_clk, i_reset, i_ce, |
w_s2048, w_e2048, w_e1024, w_s1024); |
fftstage #(16,20,17,11,8,0, |
0, 1, "cmem_o2048.hex") |
stage_o1024(i_clk, i_reset, i_ce, |
w_s2048, w_o2048, w_o1024, w_os1024); |
wire [33:0] w_d1024; |
fftstage #(16,20,17,9,0, |
0, 1, "cmem_1024.hex") |
stage_1024(i_clk, i_reset, i_ce, |
w_s2048, w_d2048, w_d1024, w_s1024); |
|
wire w_s512; |
// verilator lint_off UNUSED |
wire w_os512; |
// verilator lint_on UNUSED |
wire [33:0] w_e512, w_o512; |
fftstage #(17,21,17,11,7,0, |
0, 1, "cmem_e1024.hex") |
stage_e512(i_clk, i_reset, i_ce, |
w_s1024, w_e1024, w_e512, w_s512); |
fftstage #(17,21,17,11,7,0, |
0, 1, "cmem_o1024.hex") |
stage_o512(i_clk, i_reset, i_ce, |
w_s1024, w_o1024, w_o512, w_os512); |
wire [33:0] w_d512; |
fftstage #(17,21,17,8,0, |
0, 1, "cmem_512.hex") |
stage_512(i_clk, i_reset, i_ce, |
w_s1024, w_d1024, w_d512, w_s512); |
|
wire w_s256; |
// verilator lint_off UNUSED |
wire w_os256; |
// verilator lint_on UNUSED |
wire [35:0] w_e256, w_o256; |
fftstage #(17,21,18,11,6,0, |
0, 1, "cmem_e512.hex") |
stage_e256(i_clk, i_reset, i_ce, |
w_s512, w_e512, w_e256, w_s256); |
fftstage #(17,21,18,11,6,0, |
0, 1, "cmem_o512.hex") |
stage_o256(i_clk, i_reset, i_ce, |
w_s512, w_o512, w_o256, w_os256); |
wire [35:0] w_d256; |
fftstage #(17,21,18,7,0, |
0, 1, "cmem_256.hex") |
stage_256(i_clk, i_reset, i_ce, |
w_s512, w_d512, w_d256, w_s256); |
|
wire w_s128; |
// verilator lint_off UNUSED |
wire w_os128; |
// verilator lint_on UNUSED |
wire [35:0] w_e128, w_o128; |
fftstage #(18,22,18,11,5,0, |
0, 1, "cmem_e256.hex") |
stage_e128(i_clk, i_reset, i_ce, |
w_s256, w_e256, w_e128, w_s128); |
fftstage #(18,22,18,11,5,0, |
0, 1, "cmem_o256.hex") |
stage_o128(i_clk, i_reset, i_ce, |
w_s256, w_o256, w_o128, w_os128); |
wire [35:0] w_d128; |
fftstage #(18,22,18,6,0, |
0, 1, "cmem_128.hex") |
stage_128(i_clk, i_reset, i_ce, |
w_s256, w_d256, w_d128, w_s128); |
|
wire w_s64; |
// verilator lint_off UNUSED |
wire w_os64; |
// verilator lint_on UNUSED |
wire [37:0] w_e64, w_o64; |
fftstage #(18,22,19,11,4,0, |
0, 1, "cmem_e128.hex") |
stage_e64(i_clk, i_reset, i_ce, |
w_s128, w_e128, w_e64, w_s64); |
fftstage #(18,22,19,11,4,0, |
0, 1, "cmem_o128.hex") |
stage_o64(i_clk, i_reset, i_ce, |
w_s128, w_o128, w_o64, w_os64); |
wire [37:0] w_d64; |
fftstage #(18,22,19,5,0, |
0, 1, "cmem_64.hex") |
stage_64(i_clk, i_reset, i_ce, |
w_s128, w_d128, w_d64, w_s64); |
|
wire w_s32; |
// verilator lint_off UNUSED |
wire w_os32; |
// verilator lint_on UNUSED |
wire [37:0] w_e32, w_o32; |
fftstage #(19,23,19,11,3,0, |
0, 1, "cmem_e64.hex") |
stage_e32(i_clk, i_reset, i_ce, |
w_s64, w_e64, w_e32, w_s32); |
fftstage #(19,23,19,11,3,0, |
0, 1, "cmem_o64.hex") |
stage_o32(i_clk, i_reset, i_ce, |
w_s64, w_o64, w_o32, w_os32); |
wire [37:0] w_d32; |
fftstage #(19,23,19,4,0, |
0, 1, "cmem_32.hex") |
stage_32(i_clk, i_reset, i_ce, |
w_s64, w_d64, w_d32, w_s32); |
|
wire w_s16; |
// verilator lint_off UNUSED |
wire w_os16; |
// verilator lint_on UNUSED |
wire [39:0] w_e16, w_o16; |
fftstage #(19,23,20,11,2,0, |
0, 1, "cmem_e32.hex") |
stage_e16(i_clk, i_reset, i_ce, |
w_s32, w_e32, w_e16, w_s16); |
fftstage #(19,23,20,11,2,0, |
0, 1, "cmem_o32.hex") |
stage_o16(i_clk, i_reset, i_ce, |
w_s32, w_o32, w_o16, w_os16); |
wire [39:0] w_d16; |
fftstage #(19,23,20,3,0, |
0, 1, "cmem_16.hex") |
stage_16(i_clk, i_reset, i_ce, |
w_s32, w_d32, w_d16, w_s16); |
|
wire w_s8; |
// verilator lint_off UNUSED |
wire w_os8; |
// verilator lint_on UNUSED |
wire [39:0] w_e8, w_o8; |
fftstage #(20,24,20,11,1,0, |
0, 1, "cmem_e16.hex") |
stage_e8(i_clk, i_reset, i_ce, |
w_s16, w_e16, w_e8, w_s8); |
fftstage #(20,24,20,11,1,0, |
0, 1, "cmem_o16.hex") |
stage_o8(i_clk, i_reset, i_ce, |
w_s16, w_o16, w_o8, w_os8); |
wire [39:0] w_d8; |
fftstage #(20,24,20,2,0, |
0, 1, "cmem_8.hex") |
stage_8(i_clk, i_reset, i_ce, |
w_s16, w_d16, w_d8, w_s8); |
|
wire w_s4; |
// verilator lint_off UNUSED |
wire w_os4; |
// verilator lint_on UNUSED |
wire [41:0] w_e4, w_o4; |
qtrstage #(20,21,11,0,0,0) stage_e4(i_clk, i_reset, i_ce, |
w_s8, w_e8, w_e4, w_s4); |
qtrstage #(20,21,11,1,0,0) stage_o4(i_clk, i_reset, i_ce, |
w_s8, w_o8, w_o4, w_os4); |
wire [41:0] w_d4; |
qtrstage #(20,21,11,0,0) stage_4(i_clk, i_reset, i_ce, |
w_s8, w_d8, w_d4, w_s4); |
wire w_s2; |
wire [41:0] w_e2, w_o2; |
wire [41:0] w_d2; |
laststage #(21,21,0) stage_2(i_clk, i_reset, i_ce, |
w_s4, w_e4, w_o4, w_e2, w_o2, w_s2); |
w_s4, w_d4, w_d2, w_s2); |
|
|
// Prepare for a (potential) bit-reverse stage. |
assign br_left = w_e2; |
assign br_right = w_o2; |
assign br_sample= w_d2; |
|
wire br_start; |
reg r_br_started; |
250,11 → 184,11
|
// Now for the bit-reversal stage. |
wire br_sync; |
wire [(2*OWIDTH-1):0] br_o_left, br_o_right; |
wire [(2*OWIDTH-1):0] br_o_result; |
bitreverse #(11,21) |
revstage(i_clk, i_reset, |
(i_ce & br_start), br_left, br_right, |
br_o_left, br_o_right, br_sync); |
(i_ce & br_start), br_sample, |
br_o_result, br_sync); |
|
|
// Last clock: Register our outputs, we're done. |
267,10 → 201,7
|
always @(posedge i_clk) |
if (i_ce) |
begin |
o_left <= br_o_left; |
o_right <= br_o_right; |
end |
o_result <= br_o_result; |
|
|
endmodule |
/rtl/fftstage.v
35,23 → 35,25
// |
// Copyright (C) 2015-2018, Gisselquist Technology, LLC |
// |
// This program is free software (firmware): you can redistribute it and/or |
// modify it under the terms of the GNU General Public License as published |
// by the Free Software Foundation, either version 3 of the License, or (at |
// your option) any later version. |
// This file is part of the general purpose pipelined FFT project. |
// |
// This program is distributed in the hope that it will be useful, but WITHOUT |
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or |
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
// for more details. |
// The pipelined FFT project is free software (firmware): you can redistribute |
// it and/or modify it under the terms of the GNU Lesser General Public License |
// as published by the Free Software Foundation, either version 3 of the |
// License, or (at your option) any later version. |
// |
// You should have received a copy of the GNU General Public License along |
// with this program. (It's in the $(ROOT)/doc directory, run make with no |
// target there if the PDF file isn't present.) If not, see |
// The pipelined FFT project is distributed in the hope that it will be useful, |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
// MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser |
// General Public License for more details. |
// |
// You should have received a copy of the GNU Lesser General Public License |
// along with this program. (It's in the $(ROOT)/doc directory. Run make |
// with no target there if the PDF file isn't present.) If not, see |
// <http://www.gnu.org/licenses/> for a copy. |
// |
// License: GPL, v3, as defined and found on www.gnu.org, |
// http://www.gnu.org/licenses/gpl.html |
// License: LGPL, v3, as defined and found on www.gnu.org, |
// http://www.gnu.org/licenses/lgpl.html |
// |
// |
//////////////////////////////////////////////////////////////////////////////// |
65,7 → 67,7
// core is built ... Note that the minimum LGSPAN (the base two log |
// of the span, or the base two log of the current FFT size) is 3. |
// Smaller spans (i.e. the span of 2) must use the dbl laststage module. |
parameter LGWIDTH=10, LGSPAN=8, BFLYSHIFT=0; |
parameter LGSPAN=10, BFLYSHIFT=0; // LGWIDTH=11 |
parameter [0:0] OPT_HWMPY = 1; |
// Clocks per CE. If your incoming data rate is less than 50% of your |
// clock speed, you can set CKPCE to 2'b10, make sure there's at least |
77,7 → 79,7
parameter CKPCE = 1; |
// The COEFFILE parameter contains the name of the file containing the |
// FFT twiddle factors |
parameter COEFFILE="cmem_o2048.hex"; |
parameter COEFFILE="cmem_2048.hex"; |
|
`ifdef VERILATOR |
parameter [0:0] ZERO_ON_IDLE = 1'b0; |
85,11 → 87,14
localparam [0:0] ZERO_ON_IDLE = 1'b0; |
`endif // VERILATOR |
|
input i_clk, i_reset, i_ce, i_sync; |
input [(2*IWIDTH-1):0] i_data; |
input wire i_clk, i_reset, i_ce, i_sync; |
input wire [(2*IWIDTH-1):0] i_data; |
output reg [(2*OWIDTH-1):0] o_data; |
output reg o_sync; |
|
// I am using the prefixes |
// ib_* to reference the inputs to the butterfly, and |
// ob_* to reference the outputs from the butterfly |
reg wait_for_sync; |
reg [(2*IWIDTH-1):0] ib_a, ib_b; |
reg [(2*CWIDTH-1):0] ib_c; |
107,21 → 112,26
// (2^(CWIDTH-2)) * sin(2*pi*i/(2^LGWIDTH)) }; |
// |
reg [(2*CWIDTH-1):0] cmem [0:((1<<LGSPAN)-1)]; |
`ifdef FORMAL |
// Let the formal tool pick the coefficients |
`else |
initial $readmemh(COEFFILE,cmem); |
|
`endif |
|
reg [(LGSPAN):0] iaddr; |
reg [(2*IWIDTH-1):0] imem [0:((1<<LGSPAN)-1)]; |
|
reg [LGSPAN:0] oB; |
reg [LGSPAN:0] oaddr; |
reg [(2*OWIDTH-1):0] omem [0:((1<<LGSPAN)-1)]; |
|
initial wait_for_sync = 1'b1; |
initial iaddr = 0; |
always @(posedge i_clk) |
if (i_reset) |
if (i_reset) |
begin |
wait_for_sync <= 1'b1; |
iaddr <= 0; |
wait_for_sync <= 1'b1; |
iaddr <= 0; |
end else if ((i_ce)&&((!wait_for_sync)||(i_sync))) |
begin |
// |
137,6 → 147,9
// |
// Now, we have all the inputs, so let's feed the butterfly |
// |
// ib_sync is the synchronization bit to the butterfly. It will |
// be tracked within the butterfly, and used to create the o_sync |
// value when the results from this output are produced |
initial ib_sync = 1'b0; |
always @(posedge i_clk) |
if (i_reset) |
149,6 → 162,8
ib_sync <= (iaddr==(1<<(LGSPAN))); |
end |
|
// Read the values from our input memory, and use them to feed first of two |
// butterfly inputs |
always @(posedge i_clk) |
if (i_ce) |
begin |
186,6 → 201,12
|
end endgenerate |
|
// For the formal proof, we'll assume the outputs of hwbfly and/or |
// butterfly, rather than actually calculating them. This will simplify |
// the proof and (if done properly) will be equivalent. Be careful of |
// defining FORMAL if you want the full logic! |
`ifndef FORMAL |
// |
generate if (OPT_HWMPY) |
begin : HWBFLY |
hwbfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH), |
205,43 → 226,213
(ib_sync&&i_ce), |
ob_a, ob_b, ob_sync); |
end endgenerate |
`endif |
|
// |
// Next step: recover the outputs from the butterfly |
// |
initial oB = 0; |
// The first output can go immediately to the output of this routine |
// The second output must wait until this time in the idle cycle |
// oaddr is the output memory address, keeping track of where we are |
// in this output cycle. |
initial oaddr = 0; |
initial o_sync = 0; |
initial b_started = 0; |
always @(posedge i_clk) |
if (i_reset) |
if (i_reset) |
begin |
oB <= 0; |
o_sync <= 0; |
oaddr <= 0; |
o_sync <= 0; |
// b_started will be true once we've seen the first ob_sync |
b_started <= 0; |
end else if (i_ce) |
begin |
o_sync <= (!oB[LGSPAN])?ob_sync : 1'b0; |
o_sync <= (!oaddr[LGSPAN])?ob_sync : 1'b0; |
if (ob_sync||b_started) |
oB <= oB + { {(LGSPAN){1'b0}}, 1'b1 }; |
if ((ob_sync)&&(!oB[LGSPAN])) |
// A butterfly output is available |
oaddr <= oaddr + 1'b1; |
if ((ob_sync)&&(!oaddr[LGSPAN])) |
// If b_started is true, then a butterfly output is available |
b_started <= 1'b1; |
end |
|
reg [(LGSPAN-1):0] dly_addr; |
reg [(2*OWIDTH-1):0] dly_value; |
reg [(LGSPAN-1):0] nxt_oaddr; |
reg [(2*OWIDTH-1):0] pre_ovalue; |
always @(posedge i_clk) |
if (i_ce) |
nxt_oaddr[0] <= oaddr[0]; |
generate if (LGSPAN>1) |
begin |
dly_addr <= oB[(LGSPAN-1):0]; |
dly_value <= ob_b; |
end |
|
always @(posedge i_clk) |
if (i_ce) |
nxt_oaddr[LGSPAN-1:1] <= oaddr[LGSPAN-1:1] + 1'b1; |
|
end endgenerate |
|
// Only write to the memory on the first half of the outputs |
// We'll use the memory value on the second half of the outputs |
always @(posedge i_clk) |
if ((i_ce)&&(!oaddr[LGSPAN])) |
omem[oaddr[(LGSPAN-1):0]] <= ob_b; |
|
always @(posedge i_clk) |
if (i_ce) |
omem[dly_addr] <= dly_value; |
pre_ovalue <= omem[nxt_oaddr[(LGSPAN-1):0]]; |
|
always @(posedge i_clk) |
if (i_ce) |
o_data <= (!oB[LGSPAN])?ob_a : omem[oB[(LGSPAN-1):0]]; |
o_data <= (!oaddr[LGSPAN]) ? ob_a : pre_ovalue; |
|
`ifdef FORMAL |
// An arbitrary processing delay from butterfly input to |
// butterfly output(s) |
(* anyconst *) reg [LGSPAN:0] f_mpydelay; |
always @(*) |
assume(f_mpydelay > 1); |
|
reg f_past_valid; |
initial f_past_valid = 1'b0; |
always @(posedge i_clk) |
f_past_valid <= 1'b1; |
|
always @(posedge i_clk) |
if ((!f_past_valid)||($past(i_reset))) |
begin |
assert(iaddr == 0); |
assert(wait_for_sync); |
assert(o_sync == 0); |
assert(oaddr == 0); |
assert(!b_started); |
assert(!o_sync); |
end |
|
///////////////////////////////////////// |
// |
// Formally verify the input half, from the inputs to this module |
// to the inputs of the butterfly |
// |
///////////////////////////////////////// |
// |
// Let's verify a specific set of inputs |
(* anyconst *) reg [LGSPAN:0] f_addr; |
reg [2*IWIDTH-1:0] f_left, f_right; |
wire [LGSPAN:0] f_next_addr; |
|
always @(posedge i_clk) |
if ((!$past(i_ce))&&(!$past(i_ce,2))&&(!$past(i_ce,3))&&(!$past(i_ce,4))) |
assume(!i_ce); |
|
always @(*) |
assume(f_addr[LGSPAN]==1'b0); |
|
assign f_next_addr = f_addr + 1'b1; |
|
always @(posedge i_clk) |
if ((i_ce)&&(iaddr[LGSPAN:0] == f_addr)) |
f_left <= i_data; |
|
always @(*) |
if (wait_for_sync) |
assert(iaddr == 0); |
|
wire [LGSPAN:0] f_last_addr = iaddr - 1'b1; |
|
always @(posedge i_clk) |
if ((!wait_for_sync)&&(f_last_addr >= { 1'b0, f_addr[LGSPAN-1:0]})) |
assert(f_left == imem[f_addr[LGSPAN-1:0]]); |
|
always @(posedge i_clk) |
if ((i_ce)&&(iaddr == { 1'b1, f_addr[LGSPAN-1:0]})) |
f_right <= i_data; |
|
always @(posedge i_clk) |
if ((i_ce)&&(!wait_for_sync)&&(f_last_addr == { 1'b1, f_addr[LGSPAN-1:0]})) |
begin |
assert(ib_a == f_left); |
assert(ib_b == f_right); |
assert(ib_c == cmem[f_addr[LGSPAN-1:0]]); |
end |
|
///////////////////////////////////////// |
// |
// Formally verify the output half, from the output of the butterfly |
// to the outputs of this module |
// |
///////////////////////////////////////// |
reg [2*OWIDTH-1:0] f_oleft, f_oright; |
reg [LGSPAN:0] f_oaddr; |
wire [LGSPAN:0] f_oaddr_m1 = f_oaddr - 1'b1; |
|
always @(*) |
f_oaddr = iaddr - f_mpydelay + {1'b1,{(LGSPAN-1){1'b0}}}; |
|
assign f_oaddr_m1 = f_oaddr - 1'b1; |
|
reg f_output_active; |
initial f_output_active = 1'b0; |
always @(posedge i_clk) |
if (i_reset) |
f_output_active <= 1'b0; |
else if ((i_ce)&&(ob_sync)) |
f_output_active <= 1'b1; |
|
always @(*) |
assert(f_output_active == b_started); |
|
always @(*) |
if (wait_for_sync) |
assert(!f_output_active); |
|
always @(*) |
if (f_output_active) |
assert(oaddr == f_oaddr); |
else |
assert(oaddr == 0); |
|
always @(*) |
if (wait_for_sync) |
assume(!ob_sync); |
|
always @(*) |
assume(ob_sync == (f_oaddr == 0)); |
|
always @(posedge i_clk) |
if ((f_past_valid)&&(!$past(i_ce))) |
begin |
assume($stable(ob_a)); |
assume($stable(ob_b)); |
end |
|
initial f_oleft = 0; |
initial f_oright = 0; |
always @(posedge i_clk) |
if ((i_ce)&&(f_oaddr == f_addr)) |
begin |
f_oleft <= ob_a; |
f_oright <= ob_b; |
end |
|
always @(posedge i_clk) |
if ((f_output_active)&&(f_oaddr_m1 >= { 1'b0, f_addr[LGSPAN-1:0]})) |
assert(omem[f_addr[LGSPAN-1:0]] == f_oright); |
|
always @(posedge i_clk) |
if ((i_ce)&&(f_oaddr_m1 == 0)&&(f_output_active)) |
assert(o_sync); |
else if ((i_ce)||(!f_output_active)) |
assert(!o_sync); |
|
always @(posedge i_clk) |
if ((i_ce)&&(f_output_active)&&(f_oaddr_m1 == f_addr)) |
assert(o_data == f_oleft); |
always @(posedge i_clk) |
if ((i_ce)&&(f_output_active)&&(f_oaddr[LGSPAN]) |
&&(f_oaddr[LGSPAN-1:0] == f_addr[LGSPAN-1:0])) |
assert(pre_ovalue == f_oright); |
always @(posedge i_clk) |
if ((i_ce)&&(f_output_active)&&(f_oaddr_m1[LGSPAN]) |
&&(f_oaddr_m1[LGSPAN-1:0] == f_addr[LGSPAN-1:0])) |
assert(o_data == f_oright); |
|
`endif |
endmodule |
/rtl/hwbfly.v
39,23 → 39,25
// |
// Copyright (C) 2015-2018, Gisselquist Technology, LLC |
// |
// This program is free software (firmware): you can redistribute it and/or |
// modify it under the terms of the GNU General Public License as published |
// by the Free Software Foundation, either version 3 of the License, or (at |
// your option) any later version. |
// This file is part of the general purpose pipelined FFT project. |
// |
// This program is distributed in the hope that it will be useful, but WITHOUT |
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or |
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
// for more details. |
// The pipelined FFT project is free software (firmware): you can redistribute |
// it and/or modify it under the terms of the GNU Lesser General Public License |
// as published by the Free Software Foundation, either version 3 of the |
// License, or (at your option) any later version. |
// |
// You should have received a copy of the GNU General Public License along |
// with this program. (It's in the $(ROOT)/doc directory, run make with no |
// target there if the PDF file isn't present.) If not, see |
// The pipelined FFT project is distributed in the hope that it will be useful, |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
// MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser |
// General Public License for more details. |
// |
// You should have received a copy of the GNU Lesser General Public License |
// along with this program. (It's in the $(ROOT)/doc directory. Run make |
// with no target there if the PDF file isn't present.) If not, see |
// <http://www.gnu.org/licenses/> for a copy. |
// |
// License: GPL, v3, as defined and found on www.gnu.org, |
// http://www.gnu.org/licenses/gpl.html |
// License: LGPL, v3, as defined and found on www.gnu.org, |
// http://www.gnu.org/licenses/lgpl.html |
// |
// |
//////////////////////////////////////////////////////////////////////////////// |
75,10 → 77,10
// The number of clocks per clock enable, 1, 2, or 3. |
parameter [1:0] CKPCE=1; |
// |
input i_clk, i_reset, i_ce; |
input [(2*CWIDTH-1):0] i_coef; |
input [(2*IWIDTH-1):0] i_left, i_right; |
input i_aux; |
input wire i_clk, i_reset, i_ce; |
input wire [(2*CWIDTH-1):0] i_coef; |
input wire [(2*IWIDTH-1):0] i_left, i_right; |
input wire i_aux; |
output wire [(2*OWIDTH-1):0] o_left, o_right; |
output reg o_aux; |
|
481,9 → 483,6
assign o_left = { rnd_left_r, rnd_left_i }; |
assign o_right= { rnd_right_r,rnd_right_i}; |
|
`ifdef VERILATOR |
`define FORMAL |
`endif |
`ifdef FORMAL |
localparam F_LGDEPTH = 3; |
localparam F_DEPTH = 5; |
/rtl/ifftmain.v
8,8 → 8,8
// implementation. As such, all other modules are subordinate |
// to this one. This module accomplish a fixed size Complex FFT on |
// 2048 data points. |
// The FFT is fully pipelined, and accepts as inputs two complex two's |
// complement samples per clock. |
// The FFT is fully pipelined, and accepts as inputs one complex two's |
// complement sample per clock. |
// |
// Parameters: |
// i_clk The clock. All operations are synchronous with this clock. |
18,31 → 18,27
// Further, following a reset, the o_sync line will go |
// high the same time the first output sample is valid. |
// i_ce A clock enable line. If this line is set, this module |
// will accept two complex values as inputs, and produce |
// two (possibly empty) complex values as outputs. |
// i_left The first of two complex input samples. This value is split |
// will accept one complex input value, and produce |
// one (possibly empty) complex output value. |
// i_sample The complex input sample. This value is split |
// into two two's complement numbers, 15 bits each, with |
// the real portion in the high order bits, and the |
// imaginary portion taking the bottom 15 bits. |
// i_right This is the same thing as i_left, only this is the second of |
// two such samples. Hence, i_left would contain input |
// sample zero, i_right would contain sample one. On the |
// next clock i_left would contain input sample two, |
// i_right number three and so forth. |
// o_left The first of two output samples, of the same format as i_left, |
// o_result The output result, of the same format as i_sample, |
// only having 21 bits for each of the real and imaginary |
// components, leading to 42 bits total. |
// o_right The second of two output samples produced each clock. This has |
// the same format as o_left. |
// o_sync A one bit output indicating the first valid sample produced by |
// this FFT following a reset. Ever after, this will |
// indicate the first sample of an FFT frame. |
// o_sync A one bit output indicating the first sample of the FFT frame. |
// It also indicates the first valid sample out of the FFT |
// on the first frame. |
// |
// Arguments: This file was computer generated using the following command |
// line: |
// |
// % ./fftgen -i -d ../rtl -f 2048 -2 -p 0 -n 15 -a ../bench/cpp/ifftsize.h |
// % ./fftgen -i -d ../rtl -f 2048 -1 -k 1 -p 0 -n 15 -a ../bench/cpp/ifftsize.h |
// |
// This core will use hardware accelerated multiplies (DSPs) |
// for 0 of the 11 stages |
// |
// Creator: Dan Gisselquist, Ph.D. |
// Gisselquist Technology, LLC |
// |
50,23 → 46,25
// |
// Copyright (C) 2015-2018, Gisselquist Technology, LLC |
// |
// This program is free software (firmware): you can redistribute it and/or |
// modify it under the terms of the GNU General Public License as published |
// by the Free Software Foundation, either version 3 of the License, or (at |
// your option) any later version. |
// This file is part of the general purpose pipelined FFT project. |
// |
// This program is distributed in the hope that it will be useful, but WITHOUT |
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or |
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
// for more details. |
// The pipelined FFT project is free software (firmware): you can redistribute |
// it and/or modify it under the terms of the GNU Lesser General Public License |
// as published by the Free Software Foundation, either version 3 of the |
// License, or (at your option) any later version. |
// |
// You should have received a copy of the GNU General Public License along |
// with this program. (It's in the $(ROOT)/doc directory, run make with no |
// target there if the PDF file isn't present.) If not, see |
// The pipelined FFT project is distributed in the hope that it will be useful, |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
// MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser |
// General Public License for more details. |
// |
// You should have received a copy of the GNU Lesser General Public License |
// along with this program. (It's in the $(ROOT)/doc directory. Run make |
// with no target there if the PDF file isn't present.) If not, see |
// <http://www.gnu.org/licenses/> for a copy. |
// |
// License: GPL, v3, as defined and found on www.gnu.org, |
// http://www.gnu.org/licenses/gpl.html |
// License: LGPL, v3, as defined and found on www.gnu.org, |
// http://www.gnu.org/licenses/lgpl.html |
// |
// |
//////////////////////////////////////////////////////////////////////////////// |
77,166 → 75,102
// |
// |
module ifftmain(i_clk, i_reset, i_ce, |
i_left, i_right, |
o_left, o_right, o_sync); |
parameter IWIDTH=15, OWIDTH=21, LGWIDTH=11; |
i_sample, o_result, o_sync); |
// The bit-width of the input, IWIDTH, output, OWIDTH, and the log |
// of the FFT size. These are localparams, rather than parameters, |
// because once the core has been generated, they can no longer be |
// changed. (These values can be adjusted by running the core |
// generator again.) The reason is simply that these values have |
// been hardwired into the core at several places. |
localparam IWIDTH=15, OWIDTH=21, LGWIDTH=11; |
// |
input i_clk, i_reset, i_ce; |
input wire i_clk, i_reset, i_ce; |
// |
input [(2*IWIDTH-1):0] i_left, i_right; |
output reg [(2*OWIDTH-1):0] o_left, o_right; |
input wire [(2*IWIDTH-1):0] i_sample; |
output reg [(2*OWIDTH-1):0] o_result; |
output reg o_sync; |
|
|
// Outputs of the FFT, ready for bit reversal. |
wire [(2*OWIDTH-1):0] br_left, br_right; |
wire [(2*OWIDTH-1):0] br_sample; |
|
|
wire w_s2048; |
// verilator lint_off UNUSED |
wire w_os2048; |
// verilator lint_on UNUSED |
wire [31:0] w_e2048, w_o2048; |
fftstage #(IWIDTH,IWIDTH+4,16,11,9,0, |
0, 1, "icmem_e4096.hex") |
stage_e2048(i_clk, i_reset, i_ce, |
(!i_reset), i_left, w_e2048, w_s2048); |
fftstage #(IWIDTH,IWIDTH+4,16,11,9,0, |
0, 1, "icmem_o4096.hex") |
stage_o2048(i_clk, i_reset, i_ce, |
(!i_reset), i_right, w_o2048, w_os2048); |
wire [31:0] w_d2048; |
fftstage #(IWIDTH,IWIDTH+4,16,10,0, |
0, 1, "icmem_2048.hex") |
stage_2048(i_clk, i_reset, i_ce, |
(!i_reset), i_sample, w_d2048, w_s2048); |
|
|
wire w_s1024; |
// verilator lint_off UNUSED |
wire w_os1024; |
// verilator lint_on UNUSED |
wire [33:0] w_e1024, w_o1024; |
fftstage #(16,20,17,11,8,0, |
0, 1, "icmem_e2048.hex") |
stage_e1024(i_clk, i_reset, i_ce, |
w_s2048, w_e2048, w_e1024, w_s1024); |
fftstage #(16,20,17,11,8,0, |
0, 1, "icmem_o2048.hex") |
stage_o1024(i_clk, i_reset, i_ce, |
w_s2048, w_o2048, w_o1024, w_os1024); |
wire [33:0] w_d1024; |
fftstage #(16,20,17,9,0, |
0, 1, "icmem_1024.hex") |
stage_1024(i_clk, i_reset, i_ce, |
w_s2048, w_d2048, w_d1024, w_s1024); |
|
wire w_s512; |
// verilator lint_off UNUSED |
wire w_os512; |
// verilator lint_on UNUSED |
wire [33:0] w_e512, w_o512; |
fftstage #(17,21,17,11,7,0, |
0, 1, "icmem_e1024.hex") |
stage_e512(i_clk, i_reset, i_ce, |
w_s1024, w_e1024, w_e512, w_s512); |
fftstage #(17,21,17,11,7,0, |
0, 1, "icmem_o1024.hex") |
stage_o512(i_clk, i_reset, i_ce, |
w_s1024, w_o1024, w_o512, w_os512); |
wire [33:0] w_d512; |
fftstage #(17,21,17,8,0, |
0, 1, "icmem_512.hex") |
stage_512(i_clk, i_reset, i_ce, |
w_s1024, w_d1024, w_d512, w_s512); |
|
wire w_s256; |
// verilator lint_off UNUSED |
wire w_os256; |
// verilator lint_on UNUSED |
wire [35:0] w_e256, w_o256; |
fftstage #(17,21,18,11,6,0, |
0, 1, "icmem_e512.hex") |
stage_e256(i_clk, i_reset, i_ce, |
w_s512, w_e512, w_e256, w_s256); |
fftstage #(17,21,18,11,6,0, |
0, 1, "icmem_o512.hex") |
stage_o256(i_clk, i_reset, i_ce, |
w_s512, w_o512, w_o256, w_os256); |
wire [35:0] w_d256; |
fftstage #(17,21,18,7,0, |
0, 1, "icmem_256.hex") |
stage_256(i_clk, i_reset, i_ce, |
w_s512, w_d512, w_d256, w_s256); |
|
wire w_s128; |
// verilator lint_off UNUSED |
wire w_os128; |
// verilator lint_on UNUSED |
wire [35:0] w_e128, w_o128; |
fftstage #(18,22,18,11,5,0, |
0, 1, "icmem_e256.hex") |
stage_e128(i_clk, i_reset, i_ce, |
w_s256, w_e256, w_e128, w_s128); |
fftstage #(18,22,18,11,5,0, |
0, 1, "icmem_o256.hex") |
stage_o128(i_clk, i_reset, i_ce, |
w_s256, w_o256, w_o128, w_os128); |
wire [35:0] w_d128; |
fftstage #(18,22,18,6,0, |
0, 1, "icmem_128.hex") |
stage_128(i_clk, i_reset, i_ce, |
w_s256, w_d256, w_d128, w_s128); |
|
wire w_s64; |
// verilator lint_off UNUSED |
wire w_os64; |
// verilator lint_on UNUSED |
wire [37:0] w_e64, w_o64; |
fftstage #(18,22,19,11,4,0, |
0, 1, "icmem_e128.hex") |
stage_e64(i_clk, i_reset, i_ce, |
w_s128, w_e128, w_e64, w_s64); |
fftstage #(18,22,19,11,4,0, |
0, 1, "icmem_o128.hex") |
stage_o64(i_clk, i_reset, i_ce, |
w_s128, w_o128, w_o64, w_os64); |
wire [37:0] w_d64; |
fftstage #(18,22,19,5,0, |
0, 1, "icmem_64.hex") |
stage_64(i_clk, i_reset, i_ce, |
w_s128, w_d128, w_d64, w_s64); |
|
wire w_s32; |
// verilator lint_off UNUSED |
wire w_os32; |
// verilator lint_on UNUSED |
wire [37:0] w_e32, w_o32; |
fftstage #(19,23,19,11,3,0, |
0, 1, "icmem_e64.hex") |
stage_e32(i_clk, i_reset, i_ce, |
w_s64, w_e64, w_e32, w_s32); |
fftstage #(19,23,19,11,3,0, |
0, 1, "icmem_o64.hex") |
stage_o32(i_clk, i_reset, i_ce, |
w_s64, w_o64, w_o32, w_os32); |
wire [37:0] w_d32; |
fftstage #(19,23,19,4,0, |
0, 1, "icmem_32.hex") |
stage_32(i_clk, i_reset, i_ce, |
w_s64, w_d64, w_d32, w_s32); |
|
wire w_s16; |
// verilator lint_off UNUSED |
wire w_os16; |
// verilator lint_on UNUSED |
wire [39:0] w_e16, w_o16; |
fftstage #(19,23,20,11,2,0, |
0, 1, "icmem_e32.hex") |
stage_e16(i_clk, i_reset, i_ce, |
w_s32, w_e32, w_e16, w_s16); |
fftstage #(19,23,20,11,2,0, |
0, 1, "icmem_o32.hex") |
stage_o16(i_clk, i_reset, i_ce, |
w_s32, w_o32, w_o16, w_os16); |
wire [39:0] w_d16; |
fftstage #(19,23,20,3,0, |
0, 1, "icmem_16.hex") |
stage_16(i_clk, i_reset, i_ce, |
w_s32, w_d32, w_d16, w_s16); |
|
wire w_s8; |
// verilator lint_off UNUSED |
wire w_os8; |
// verilator lint_on UNUSED |
wire [39:0] w_e8, w_o8; |
fftstage #(20,24,20,11,1,0, |
0, 1, "icmem_e16.hex") |
stage_e8(i_clk, i_reset, i_ce, |
w_s16, w_e16, w_e8, w_s8); |
fftstage #(20,24,20,11,1,0, |
0, 1, "icmem_o16.hex") |
stage_o8(i_clk, i_reset, i_ce, |
w_s16, w_o16, w_o8, w_os8); |
wire [39:0] w_d8; |
fftstage #(20,24,20,2,0, |
0, 1, "icmem_8.hex") |
stage_8(i_clk, i_reset, i_ce, |
w_s16, w_d16, w_d8, w_s8); |
|
wire w_s4; |
// verilator lint_off UNUSED |
wire w_os4; |
// verilator lint_on UNUSED |
wire [41:0] w_e4, w_o4; |
qtrstage #(20,21,11,0,1,0) stage_e4(i_clk, i_reset, i_ce, |
w_s8, w_e8, w_e4, w_s4); |
qtrstage #(20,21,11,1,1,0) stage_o4(i_clk, i_reset, i_ce, |
w_s8, w_o8, w_o4, w_os4); |
wire [41:0] w_d4; |
qtrstage #(20,21,11,1,0) stage_4(i_clk, i_reset, i_ce, |
w_s8, w_d8, w_d4, w_s4); |
wire w_s2; |
wire [41:0] w_e2, w_o2; |
wire [41:0] w_d2; |
laststage #(21,21,0) stage_2(i_clk, i_reset, i_ce, |
w_s4, w_e4, w_o4, w_e2, w_o2, w_s2); |
w_s4, w_d4, w_d2, w_s2); |
|
|
// Prepare for a (potential) bit-reverse stage. |
assign br_left = w_e2; |
assign br_right = w_o2; |
assign br_sample= w_d2; |
|
wire br_start; |
reg r_br_started; |
250,11 → 184,11
|
// Now for the bit-reversal stage. |
wire br_sync; |
wire [(2*OWIDTH-1):0] br_o_left, br_o_right; |
wire [(2*OWIDTH-1):0] br_o_result; |
bitreverse #(11,21) |
revstage(i_clk, i_reset, |
(i_ce & br_start), br_left, br_right, |
br_o_left, br_o_right, br_sync); |
(i_ce & br_start), br_sample, |
br_o_result, br_sync); |
|
|
// Last clock: Register our outputs, we're done. |
267,10 → 201,7
|
always @(posedge i_clk) |
if (i_ce) |
begin |
o_left <= br_o_left; |
o_right <= br_o_right; |
end |
o_result <= br_o_result; |
|
|
endmodule |
/rtl/ifftstage.v
0,0 → 1,436
//////////////////////////////////////////////////////////////////////////////// |
// |
// Filename: fftstage.v |
// |
// Project: A General Purpose Pipelined FFT Implementation |
// |
// Purpose: This file is (almost) a Verilog source file. It is meant to |
// be used by a FFT core compiler to generate FFTs which may be |
// used as part of an FFT core. Specifically, this file encapsulates |
// the options of an FFT-stage. For any 2^N length FFT, there shall be |
// (N-1) of these stages. |
// |
// |
// Operation: |
// Given a stream of values, operate upon them as though they were |
// value pairs, x[n] and x[n+N/2]. The stream begins when n=0, and ends |
// when n=N/2-1 (i.e. there's a full set of N values). When the value |
// x[0] enters, the synchronization input, i_sync, must be true as well. |
// |
// For this stream, produce outputs |
// y[n ] = x[n] + x[n+N/2], and |
// y[n+N/2] = (x[n] - x[n+N/2]) * c[n], |
// where c[n] is a complex coefficient found in the |
// external memory file COEFFILE. |
// When y[0] is output, a synchronization bit o_sync will be true as |
// well, otherwise it will be zero. |
// |
// Most of the work to do this is done within the butterfly, whether the |
// hardware accelerated butterfly (uses a DSP) or not. |
// |
// Creator: Dan Gisselquist, Ph.D. |
// Gisselquist Technology, LLC |
// |
//////////////////////////////////////////////////////////////////////////////// |
// |
// Copyright (C) 2015-2018, Gisselquist Technology, LLC |
// |
// This program is free software (firmware): you can redistribute it and/or |
// modify it under the terms of the GNU General Public License as published |
// by the Free Software Foundation, either version 3 of the License, or (at |
// your option) any later version. |
// |
// This program is distributed in the hope that it will be useful, but WITHOUT |
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or |
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
// for more details. |
// |
// You should have received a copy of the GNU General Public License along |
// with this program. (It's in the $(ROOT)/doc directory. Run make with no |
// target there if the PDF file isn't present.) If not, see |
// <http://www.gnu.org/licenses/> for a copy. |
// |
// License: GPL, v3, as defined and found on www.gnu.org, |
// http://www.gnu.org/licenses/gpl.html |
// |
// |
//////////////////////////////////////////////////////////////////////////////// |
// |
// |
`default_nettype none |
// |
module fftstage(i_clk, i_reset, i_ce, i_sync, i_data, o_data, o_sync); |
parameter IWIDTH=15,CWIDTH=20,OWIDTH=16; |
// Parameters specific to the core that should be changed when this |
// core is built ... Note that the minimum LGSPAN (the base two log |
// of the span, or the base two log of the current FFT size) is 3. |
// Smaller spans (i.e. the span of 2) must use the dbl laststage module. |
parameter LGSPAN=10, BFLYSHIFT=0; // LGWIDTH=11 |
parameter [0:0] OPT_HWMPY = 1; |
// Clocks per CE. If your incoming data rate is less than 50% of your |
// clock speed, you can set CKPCE to 2'b10, make sure there's at least |
// one clock between cycles when i_ce is high, and then use two |
// multiplies instead of three. Setting CKPCE to 2'b11, and insisting |
// on at least two clocks with i_ce low between cycles with i_ce high, |
// then the hardware optimized butterfly code will used one multiply |
// instead of two. |
parameter CKPCE = 1; |
// The COEFFILE parameter contains the name of the file containing the |
// FFT twiddle factors |
parameter COEFFILE="cmem_2048.hex"; |
|
`ifdef VERILATOR |
parameter [0:0] ZERO_ON_IDLE = 1'b0; |
`else |
localparam [0:0] ZERO_ON_IDLE = 1'b0; |
`endif // VERILATOR |
|
input wire i_clk, i_reset, i_ce, i_sync; |
input wire [(2*IWIDTH-1):0] i_data; |
output reg [(2*OWIDTH-1):0] o_data; |
output reg o_sync; |
|
// I am using the prefixes |
// ib_* to reference the inputs to the butterfly, and |
// ob_* to reference the outputs from the butterfly |
reg wait_for_sync; |
reg [(2*IWIDTH-1):0] ib_a, ib_b; |
reg [(2*CWIDTH-1):0] ib_c; |
reg ib_sync; |
|
reg b_started; |
wire ob_sync; |
wire [(2*OWIDTH-1):0] ob_a, ob_b; |
|
// cmem is defined as an array of real and complex values, |
// where the top CWIDTH bits are the real value and the bottom |
// CWIDTH bits are the imaginary value. |
// |
// cmem[i] = { (2^(CWIDTH-2)) * cos(2*pi*i/(2^LGWIDTH)), |
// (2^(CWIDTH-2)) * sin(2*pi*i/(2^LGWIDTH)) }; |
// |
reg [(2*CWIDTH-1):0] cmem [0:((1<<LGSPAN)-1)]; |
`ifdef FORMAL |
// Let the formal tool pick the coefficients |
`else |
initial $readmemh(COEFFILE,cmem); |
|
`endif |
|
reg [(LGSPAN):0] iaddr; |
reg [(2*IWIDTH-1):0] imem [0:((1<<LGSPAN)-1)]; |
|
reg [LGSPAN:0] oaddr; |
reg [(2*OWIDTH-1):0] omem [0:((1<<LGSPAN)-1)]; |
|
initial wait_for_sync = 1'b1; |
initial iaddr = 0; |
always @(posedge i_clk) |
if (i_reset) |
begin |
wait_for_sync <= 1'b1; |
iaddr <= 0; |
end else if ((i_ce)&&((!wait_for_sync)||(i_sync))) |
begin |
// |
// First step: Record what we're not ready to use yet |
// |
iaddr <= iaddr + { {(LGSPAN){1'b0}}, 1'b1 }; |
wait_for_sync <= 1'b0; |
end |
always @(posedge i_clk) // Need to make certain here that we don't read |
if ((i_ce)&&(!iaddr[LGSPAN])) // and write the same address on |
imem[iaddr[(LGSPAN-1):0]] <= i_data; // the same clk |
|
// |
// Now, we have all the inputs, so let's feed the butterfly |
// |
// ib_sync is the synchronization bit to the butterfly. It will |
// be tracked within the butterfly, and used to create the o_sync |
// value when the results from this output are produced |
initial ib_sync = 1'b0; |
always @(posedge i_clk) |
if (i_reset) |
ib_sync <= 1'b0; |
else if (i_ce) |
begin |
// Set the sync to true on the very first |
// valid input in, and hence on the very |
// first valid data out per FFT. |
ib_sync <= (iaddr==(1<<(LGSPAN))); |
end |
|
// Read the values from our input memory, and use them to feed first of two |
// butterfly inputs |
always @(posedge i_clk) |
if (i_ce) |
begin |
// One input from memory, ... |
ib_a <= imem[iaddr[(LGSPAN-1):0]]; |
// One input clocked in from the top |
ib_b <= i_data; |
// and the coefficient or twiddle factor |
ib_c <= cmem[iaddr[(LGSPAN-1):0]]; |
end |
|
// The idle register is designed to keep track of when an input |
// to the butterfly is important and going to be used. It's used |
// in a flag following, so that when useful values are placed |
// into the butterfly they'll be non-zero (idle=0), otherwise when |
// the inputs to the butterfly are irrelevant and will be ignored, |
// then (idle=1) those inputs will be set to zero. This |
// functionality is not designed to be used in operation, but only |
// within a Verilator simulation context when chasing a bug. |
// In this limited environment, the non-zero answers will stand |
// in a trace making it easier to highlight a bug. |
reg idle; |
generate if (ZERO_ON_IDLE) |
begin |
initial idle = 1; |
always @(posedge i_clk) |
if (i_reset) |
idle <= 1'b1; |
else if (i_ce) |
idle <= (!iaddr[LGSPAN])&&(!wait_for_sync); |
|
end else begin |
|
always @(*) idle = 0; |
|
end endgenerate |
|
// For the formal proof, we'll assume the outputs of hwbfly and/or |
// butterfly, rather than actually calculating them. This will simplify |
// the proof and (if done properly) will be equivalent. Be careful of |
// defining FORMAL if you want the full logic! |
`ifndef FORMAL |
// |
generate if (OPT_HWMPY) |
begin : HWBFLY |
hwbfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH), |
.CKPCE(CKPCE), .SHIFT(BFLYSHIFT)) |
bfly(i_clk, i_reset, i_ce, (idle)?0:ib_c, |
(idle || (!i_ce)) ? 0:ib_a, |
(idle || (!i_ce)) ? 0:ib_b, |
(ib_sync)&&(i_ce), |
ob_a, ob_b, ob_sync); |
end else begin : FWBFLY |
butterfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH), |
.CKPCE(CKPCE),.SHIFT(BFLYSHIFT)) |
bfly(i_clk, i_reset, i_ce, |
(idle||(!i_ce))?0:ib_c, |
(idle||(!i_ce))?0:ib_a, |
(idle||(!i_ce))?0:ib_b, |
(ib_sync&&i_ce), |
ob_a, ob_b, ob_sync); |
end endgenerate |
`endif |
|
// |
// Next step: recover the outputs from the butterfly |
// |
// The first output can go immediately to the output of this routine |
// The second output must wait until this time in the idle cycle |
// oaddr is the output memory address, keeping track of where we are |
// in this output cycle. |
initial oaddr = 0; |
initial o_sync = 0; |
initial b_started = 0; |
always @(posedge i_clk) |
if (i_reset) |
begin |
oaddr <= 0; |
o_sync <= 0; |
// b_started will be true once we've seen the first ob_sync |
b_started <= 0; |
end else if (i_ce) |
begin |
o_sync <= (!oaddr[LGSPAN])?ob_sync : 1'b0; |
if (ob_sync||b_started) |
oaddr <= oaddr + 1'b1; |
if ((ob_sync)&&(!oaddr[LGSPAN])) |
// If b_started is true, then a butterfly output is available |
b_started <= 1'b1; |
end |
|
reg [(LGSPAN-1):0] nxt_oaddr; |
reg [(2*OWIDTH-1):0] pre_ovalue; |
always @(posedge i_clk) |
if (i_ce) |
nxt_oaddr[0] <= oaddr[0]; |
generate if (LGSPAN>1) |
begin |
|
always @(posedge i_clk) |
if (i_ce) |
nxt_oaddr[LGSPAN-1:1] <= oaddr[LGSPAN-1:1] + 1'b1; |
|
end endgenerate |
|
// Only write to the memory on the first half of the outputs |
// We'll use the memory value on the second half of the outputs |
always @(posedge i_clk) |
if ((i_ce)&&(!oaddr[LGSPAN])) |
omem[oaddr[(LGSPAN-1):0]] <= ob_b; |
|
always @(posedge i_clk) |
if (i_ce) |
pre_ovalue <= omem[nxt_oaddr[(LGSPAN-1):0]]; |
|
always @(posedge i_clk) |
if (i_ce) |
o_data <= (!oaddr[LGSPAN]) ? ob_a : pre_ovalue; |
|
`ifdef FORMAL |
// An arbitrary processing delay from butterfly input to |
// butterfly output(s) |
(* anyconst *) reg [LGSPAN:0] f_mpydelay; |
always @(*) |
assume(f_mpydelay > 1); |
|
reg f_past_valid; |
initial f_past_valid = 1'b0; |
always @(posedge i_clk) |
f_past_valid <= 1'b1; |
|
always @(posedge i_clk) |
if ((!f_past_valid)||($past(i_reset))) |
begin |
assert(iaddr == 0); |
assert(wait_for_sync); |
assert(o_sync == 0); |
assert(oaddr == 0); |
assert(!b_started); |
assert(!o_sync); |
end |
|
///////////////////////////////////////// |
// |
// Formally verify the input half, from the inputs to this module |
// to the inputs of the butterfly |
// |
///////////////////////////////////////// |
// |
// Let's verify a specific set of inputs |
(* anyconst *) reg [LGSPAN:0] f_addr; |
reg [2*IWIDTH-1:0] f_left, f_right; |
wire [LGSPAN:0] f_next_addr; |
|
always @(posedge i_clk) |
if ((!$past(i_ce))&&(!$past(i_ce,2))&&(!$past(i_ce,3))&&(!$past(i_ce,4))) |
assume(!i_ce); |
|
always @(*) |
assume(f_addr[LGSPAN]==1'b0); |
|
assign f_next_addr = f_addr + 1'b1; |
|
always @(posedge i_clk) |
if ((i_ce)&&(iaddr[LGSPAN:0] == f_addr)) |
f_left <= i_data; |
|
always @(*) |
if (wait_for_sync) |
assert(iaddr == 0); |
|
wire [LGSPAN:0] f_last_addr = iaddr - 1'b1; |
|
always @(posedge i_clk) |
if ((!wait_for_sync)&&(f_last_addr >= { 1'b0, f_addr[LGSPAN-1:0]})) |
assert(f_left == imem[f_addr[LGSPAN-1:0]]); |
|
always @(posedge i_clk) |
if ((i_ce)&&(iaddr == { 1'b1, f_addr[LGSPAN-1:0]})) |
f_right <= i_data; |
|
always @(posedge i_clk) |
if ((i_ce)&&(!wait_for_sync)&&(f_last_addr == { 1'b1, f_addr[LGSPAN-1:0]})) |
begin |
assert(ib_a == f_left); |
assert(ib_b == f_right); |
assert(ib_c == cmem[f_addr[LGSPAN-1:0]]); |
end |
|
///////////////////////////////////////// |
// |
// Formally verify the output half, from the output of the butterfly |
// to the outputs of this module |
// |
///////////////////////////////////////// |
reg [2*OWIDTH-1:0] f_oleft, f_oright; |
reg [LGSPAN:0] f_oaddr; |
wire [LGSPAN:0] f_oaddr_m1 = f_oaddr - 1'b1; |
|
always @(*) |
f_oaddr = iaddr - f_mpydelay + {1'b1,{(LGSPAN-1){1'b0}}}; |
|
assign f_oaddr_m1 = f_oaddr - 1'b1; |
|
reg f_output_active; |
initial f_output_active = 1'b0; |
always @(posedge i_clk) |
if (i_reset) |
f_output_active <= 1'b0; |
else if ((i_ce)&&(ob_sync)) |
f_output_active <= 1'b1; |
|
always @(*) |
assert(f_output_active == b_started); |
|
always @(*) |
if (wait_for_sync) |
assert(!f_output_active); |
|
always @(*) |
if (f_output_active) |
assert(oaddr == f_oaddr); |
else |
assert(oaddr == 0); |
|
always @(*) |
if (wait_for_sync) |
assume(!ob_sync); |
|
always @(*) |
assume(ob_sync == (f_oaddr == 0)); |
|
always @(posedge i_clk) |
if ((f_past_valid)&&(!$past(i_ce))) |
begin |
assume($stable(ob_a)); |
assume($stable(ob_b)); |
end |
|
initial f_oleft = 0; |
initial f_oright = 0; |
always @(posedge i_clk) |
if ((i_ce)&&(f_oaddr == f_addr)) |
begin |
f_oleft <= ob_a; |
f_oright <= ob_b; |
end |
|
always @(posedge i_clk) |
if ((f_output_active)&&(f_oaddr_m1 >= { 1'b0, f_addr[LGSPAN-1:0]})) |
assert(omem[f_addr[LGSPAN-1:0]] == f_oright); |
|
always @(posedge i_clk) |
if ((i_ce)&&(f_oaddr_m1 == 0)&&(f_output_active)) |
assert(o_sync); |
else if ((i_ce)||(!f_output_active)) |
assert(!o_sync); |
|
always @(posedge i_clk) |
if ((i_ce)&&(f_output_active)&&(f_oaddr_m1 == f_addr)) |
assert(o_data == f_oleft); |
always @(posedge i_clk) |
if ((i_ce)&&(f_output_active)&&(f_oaddr[LGSPAN]) |
&&(f_oaddr[LGSPAN-1:0] == f_addr[LGSPAN-1:0])) |
assert(pre_ovalue == f_oright); |
always @(posedge i_clk) |
if ((i_ce)&&(f_output_active)&&(f_oaddr_m1[LGSPAN]) |
&&(f_oaddr_m1[LGSPAN-1:0] == f_addr[LGSPAN-1:0])) |
assert(o_data == f_oright); |
|
`endif |
endmodule |
/rtl/laststage.v
6,43 → 6,9
// |
// Purpose: This is part of an FPGA implementation that will process |
// the final stage of a decimate-in-frequency FFT, running |
// through the data at two samples per clock. If you notice from the |
// derivation of an FFT, the only time both even and odd samples are |
// used at the same time is in this stage. Therefore, other than this |
// stage and these twiddles, all of the other stages can run two stages |
// at a time at one sample per clock. |
// through the data at one sample per clock. |
// |
// Operation: |
// Given a stream of values, operate upon them as though they were |
// value pairs, x[2n] and x[2n+1]. The stream begins when n=0, and ends |
// when n=1. When the first x[0] value enters, the synchronization |
// input, i_sync, must be true as well. |
// |
// For this stream, produce outputs |
// y[2n ] = x[2n] + x[2n+1], and |
// y[2n+1] = x[2n] - x[2n+1] |
// |
// When y[0] is output, a synchronization bit o_sync will be true as |
// well, otherwise it will be zero. |
// |
// |
// In this implementation, the output is valid one clock after the input |
// is valid. The output also accumulates one bit above and beyond the |
// number of bits in the input. |
// |
// i_clk A system clock |
// i_reset A synchronous reset |
// i_ce Circuit enable--nothing happens unless this line is high |
// i_sync A synchronization signal, high once per FFT at the start |
// i_left The first (even) complex sample input. The higher order |
// bits contain the real portion, low order bits the |
// imaginary portion, all in two's complement. |
// i_right The next (odd) complex sample input, same format as |
// i_left. |
// o_left The first (even) complex output. |
// o_right The next (odd) complex output. |
// o_sync Output synchronization signal. |
// |
// Creator: Dan Gisselquist, Ph.D. |
// Gisselquist Technology, LLC |
// |
50,23 → 16,25
// |
// Copyright (C) 2015-2018, Gisselquist Technology, LLC |
// |
// This program is free software (firmware): you can redistribute it and/or |
// modify it under the terms of the GNU General Public License as published |
// by the Free Software Foundation, either version 3 of the License, or (at |
// your option) any later version. |
// This file is part of the general purpose pipelined FFT project. |
// |
// This program is distributed in the hope that it will be useful, but WITHOUT |
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or |
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
// for more details. |
// The pipelined FFT project is free software (firmware): you can redistribute |
// it and/or modify it under the terms of the GNU Lesser General Public License |
// as published by the Free Software Foundation, either version 3 of the |
// License, or (at your option) any later version. |
// |
// You should have received a copy of the GNU General Public License along |
// with this program. (It's in the $(ROOT)/doc directory, run make with no |
// target there if the PDF file isn't present.) If not, see |
// The pipelined FFT project is distributed in the hope that it will be useful, |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
// MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser |
// General Public License for more details. |
// |
// You should have received a copy of the GNU Lesser General Public License |
// along with this program. (It's in the $(ROOT)/doc directory. Run make |
// with no target there if the PDF file isn't present.) If not, see |
// <http://www.gnu.org/licenses/> for a copy. |
// |
// License: GPL, v3, as defined and found on www.gnu.org, |
// http://www.gnu.org/licenses/gpl.html |
// License: LGPL, v3, as defined and found on www.gnu.org, |
// http://www.gnu.org/licenses/lgpl.html |
// |
// |
//////////////////////////////////////////////////////////////////////////////// |
74,98 → 42,166
// |
`default_nettype none |
// |
module laststage(i_clk, i_reset, i_ce, i_sync, i_left, i_right, o_left, o_right, o_sync); |
module laststage(i_clk, i_reset, i_ce, i_sync, i_val, o_val, o_sync); |
parameter IWIDTH=16,OWIDTH=IWIDTH+1, SHIFT=0; |
input i_clk, i_reset, i_ce, i_sync; |
input [(2*IWIDTH-1):0] i_left, i_right; |
output reg [(2*OWIDTH-1):0] o_left, o_right; |
output reg o_sync; |
input wire i_clk, i_reset, i_ce, i_sync; |
input wire [(2*IWIDTH-1):0] i_val; |
output wire [(2*OWIDTH-1):0] o_val; |
output reg o_sync; |
|
wire signed [(IWIDTH-1):0] i_in_0r, i_in_0i, i_in_1r, i_in_1i; |
assign i_in_0r = i_left[(2*IWIDTH-1):(IWIDTH)]; |
assign i_in_0i = i_left[(IWIDTH-1):0]; |
assign i_in_1r = i_right[(2*IWIDTH-1):(IWIDTH)]; |
assign i_in_1i = i_right[(IWIDTH-1):0]; |
wire [(OWIDTH-1):0] o_out_0r, o_out_0i, |
o_out_1r, o_out_1i; |
reg signed [(IWIDTH-1):0] m_r, m_i; |
wire signed [(IWIDTH-1):0] i_r, i_i; |
|
assign i_r = i_val[(2*IWIDTH-1):(IWIDTH)]; |
assign i_i = i_val[(IWIDTH-1):0]; |
|
// Handle a potential rounding situation, when IWIDTH>=OWIDTH. |
// Don't forget that we accumulate a bit by adding two values |
// together. Therefore our intermediate value must have one more |
// bit than the two originals. |
reg signed [(IWIDTH):0] rnd_r, rnd_i, sto_r, sto_i; |
reg wait_for_sync, stage; |
reg [1:0] sync_pipe; |
|
|
|
// As with any register connected to the sync pulse, these must |
// have initial values and be reset on the i_reset signal. |
// Other data values need only restrict their updates to i_ce |
// enabled clocks, but sync's must obey resets and initial |
// conditions as well. |
reg rnd_sync, r_sync; |
|
initial rnd_sync = 1'b0; // Sync into rounding |
initial r_sync = 1'b0; // Sync coming out |
initial wait_for_sync = 1'b1; |
initial stage = 1'b0; |
always @(posedge i_clk) |
if (i_reset) |
begin |
rnd_sync <= 1'b0; |
r_sync <= 1'b0; |
wait_for_sync <= 1'b1; |
stage <= 1'b0; |
end else if ((i_ce)&&((!wait_for_sync)||(i_sync))&&(!stage)) |
begin |
wait_for_sync <= 1'b0; |
// |
stage <= 1'b1; |
// |
end else if (i_ce) |
begin |
rnd_sync <= i_sync; |
r_sync <= rnd_sync; |
end |
stage <= 1'b0; |
|
// As with other variables, these are really only updated when in |
// the processing pipeline, after the first i_sync. However, to |
// eliminate as much unnecessary logic as possible, we toggle |
// these any time the i_ce line is enabled, and don't reset. |
// them on i_reset. |
// Don't forget that we accumulate a bit by adding two values |
// together. Therefore our intermediate value must have one more |
// bit than the two originals. |
reg signed [(IWIDTH):0] rnd_in_0r, rnd_in_0i; |
reg signed [(IWIDTH):0] rnd_in_1r, rnd_in_1i; |
initial sync_pipe = 0; |
always @(posedge i_clk) |
if (i_reset) |
sync_pipe <= 0; |
else if (i_ce) |
sync_pipe <= { sync_pipe[0], i_sync }; |
|
initial o_sync = 1'b0; |
always @(posedge i_clk) |
if (i_ce) |
if (i_reset) |
o_sync <= 1'b0; |
else if (i_ce) |
o_sync <= sync_pipe[1]; |
|
always @(posedge i_clk) |
if (i_ce) |
begin |
if (!stage) |
begin |
// Clock 1 |
m_r <= i_r; |
m_i <= i_i; |
// Clock 3 |
rnd_r <= sto_r; |
rnd_i <= sto_i; |
// |
rnd_in_0r <= i_in_0r + i_in_1r; |
rnd_in_0i <= i_in_0i + i_in_1i; |
end else begin |
// Clock 2 |
rnd_r <= m_r + i_r; |
rnd_i <= m_i + i_i; |
// |
rnd_in_1r <= i_in_0r - i_in_1r; |
rnd_in_1i <= i_in_0i - i_in_1i; |
sto_r <= m_r - i_r; |
sto_i <= m_i - i_i; |
// |
end |
end |
|
convround #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_0r(i_clk, i_ce, |
rnd_in_0r, o_out_0r); |
// Now that we have our results, let's round them and report them |
wire signed [(OWIDTH-1):0] o_r, o_i; |
|
convround #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_0i(i_clk, i_ce, |
rnd_in_0i, o_out_0i); |
convround #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_r(i_clk, i_ce, rnd_r, o_r); |
convround #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_i(i_clk, i_ce, rnd_i, o_i); |
|
convround #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_1r(i_clk, i_ce, |
rnd_in_1r, o_out_1r); |
assign o_val = { o_r, o_i }; |
|
convround #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_1i(i_clk, i_ce, |
rnd_in_1i, o_out_1i); |
`ifdef FORMAL |
reg f_past_valid; |
initial f_past_valid = 1'b0; |
always @(posedge i_clk) |
f_past_valid <= 1'b1; |
|
`ifdef LASTSTAGE |
always @(posedge i_clk) |
assume((i_ce)||($past(i_ce))||($past(i_ce,2))); |
`endif |
|
// Prior versions of this routine did not include the extra |
// clock and register/flip-flops that this routine requires. |
// These are placed in here to correct a bug in Verilator, that |
// otherwise struggles. (Hopefully this will fix the problem ...) |
initial assert(IWIDTH+1 == OWIDTH); |
|
reg signed [IWIDTH-1:0] f_piped_real [0:3]; |
reg signed [IWIDTH-1:0] f_piped_imag [0:3]; |
always @(posedge i_clk) |
if (i_ce) |
begin |
o_left <= { o_out_0r, o_out_0i }; |
o_right <= { o_out_1r, o_out_1i }; |
end |
if (i_ce) |
begin |
f_piped_real[0] <= i_val[2*IWIDTH-1:IWIDTH]; |
f_piped_imag[0] <= i_val[ IWIDTH-1:0]; |
|
initial o_sync = 1'b0; // Final sync coming out of module |
f_piped_real[1] <= f_piped_real[0]; |
f_piped_imag[1] <= f_piped_imag[0]; |
|
f_piped_real[2] <= f_piped_real[1]; |
f_piped_imag[2] <= f_piped_imag[1]; |
|
f_piped_real[3] <= f_piped_real[2]; |
f_piped_imag[3] <= f_piped_imag[2]; |
end |
|
wire f_syncd; |
reg f_rsyncd; |
|
initial f_rsyncd = 0; |
always @(posedge i_clk) |
if (i_reset) |
o_sync <= 1'b0; |
else if (i_ce) |
o_sync <= r_sync; |
if (i_reset) |
f_rsyncd <= 1'b0; |
else if (!f_rsyncd) |
f_rsyncd <= o_sync; |
assign f_syncd = (f_rsyncd)||(o_sync); |
|
reg f_state; |
initial f_state = 0; |
always @(posedge i_clk) |
if (i_reset) |
f_state <= 0; |
else if ((i_ce)&&((!wait_for_sync)||(i_sync))) |
f_state <= f_state + 1; |
|
always @(*) |
if (f_state != 0) |
assume(!i_sync); |
|
always @(*) |
assert(stage == f_state[0]); |
|
always @(posedge i_clk) |
if ((f_state == 1'b1)&&(f_syncd)) |
begin |
assert(o_r == f_piped_real[2] + f_piped_real[1]); |
assert(o_i == f_piped_imag[2] + f_piped_imag[1]); |
end |
|
always @(posedge i_clk) |
if ((f_state == 1'b0)&&(f_syncd)) |
begin |
assert(!o_sync); |
assert(o_r == f_piped_real[3] - f_piped_real[2]); |
assert(o_i == f_piped_imag[3] - f_piped_imag[2]); |
end |
|
always @(*) |
if (wait_for_sync) |
begin |
assert(!f_rsyncd); |
assert(!o_sync); |
assert(f_state == 0); |
end |
|
`endif // FORMAL |
endmodule |
/rtl/longbimpy.v
22,23 → 22,25
// |
// Copyright (C) 2015-2018, Gisselquist Technology, LLC |
// |
// This program is free software (firmware): you can redistribute it and/or |
// modify it under the terms of the GNU General Public License as published |
// by the Free Software Foundation, either version 3 of the License, or (at |
// your option) any later version. |
// This file is part of the general purpose pipelined FFT project. |
// |
// This program is distributed in the hope that it will be useful, but WITHOUT |
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or |
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
// for more details. |
// The pipelined FFT project is free software (firmware): you can redistribute |
// it and/or modify it under the terms of the GNU Lesser General Public License |
// as published by the Free Software Foundation, either version 3 of the |
// License, or (at your option) any later version. |
// |
// You should have received a copy of the GNU General Public License along |
// with this program. (It's in the $(ROOT)/doc directory, run make with no |
// target there if the PDF file isn't present.) If not, see |
// The pipelined FFT project is distributed in the hope that it will be useful, |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
// MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser |
// General Public License for more details. |
// |
// You should have received a copy of the GNU Lesser General Public License |
// along with this program. (It's in the $(ROOT)/doc directory. Run make |
// with no target there if the PDF file isn't present.) If not, see |
// <http://www.gnu.org/licenses/> for a copy. |
// |
// License: GPL, v3, as defined and found on www.gnu.org, |
// http://www.gnu.org/licenses/gpl.html |
// License: LGPL, v3, as defined and found on www.gnu.org, |
// http://www.gnu.org/licenses/lgpl.html |
// |
// |
//////////////////////////////////////////////////////////////////////////////// |
46,7 → 48,11
// |
`default_nettype none |
// |
module longbimpy(i_clk, i_ce, i_a_unsorted, i_b_unsorted, o_r); |
module longbimpy(i_clk, i_ce, i_a_unsorted, i_b_unsorted, o_r |
`ifdef FORMAL |
, f_past_a_unsorted, f_past_b_unsorted |
`endif |
); |
parameter IAW=8, // The width of i_a, min width is 5 |
IBW=12, // The width of i_b, can be anything |
// The following three parameters should not be changed |
58,11 → 64,16
IW=(AW+1)&(-2), // Internal width of A |
LUTB=2, // How many bits we can multiply by at once |
TLEN=(AW+(LUTB-1))/LUTB; // Nmbr of rows in our tableau |
input i_clk, i_ce; |
input [(IAW-1):0] i_a_unsorted; |
input [(IBW-1):0] i_b_unsorted; |
input wire i_clk, i_ce; |
input wire [(IAW-1):0] i_a_unsorted; |
input wire [(IBW-1):0] i_b_unsorted; |
output reg [(AW+BW-1):0] o_r; |
|
`ifdef FORMAL |
output wire [(IAW-1):0] f_past_a_unsorted; |
output wire [(IBW-1):0] f_past_b_unsorted; |
`endif |
|
// |
// Swap parameter order, so that AW <= BW -- for performance |
// reasons |
96,23 → 107,26
// taking the absolute value here would require an additional bit. |
// However, because our results are now unsigned, we can stay |
// within the number of bits given (for now). |
initial u_a = 0; |
generate if (IW > AW) |
begin |
begin : ABS_AND_ADD_BIT_TO_A |
always @(posedge i_clk) |
if (i_ce) |
u_a <= { 1'b0, (i_a[AW-1])?(-i_a):(i_a) }; |
end else begin |
end else begin : ABS_A |
always @(posedge i_clk) |
if (i_ce) |
u_a <= (i_a[AW-1])?(-i_a):(i_a); |
end endgenerate |
|
initial sgn = 0; |
initial u_b = 0; |
always @(posedge i_clk) |
if (i_ce) |
begin |
u_b <= (i_b[BW-1])?(-i_b):(i_b); |
sgn <= i_a[AW-1] ^ i_b[BW-1]; |
end |
if (i_ce) |
begin : ABS_B |
u_b <= (i_b[BW-1])?(-i_b):(i_b); |
sgn <= i_a[AW-1] ^ i_b[BW-1]; |
end |
|
wire [(BW+LUTB-1):0] pr_a, pr_b; |
|
126,6 → 140,10
// a time can follow this--but the first clock can do two at a time. |
bimpy #(BW) lmpy_0(i_clk,i_ce,u_a[( LUTB-1): 0], u_b, pr_a); |
bimpy #(BW) lmpy_1(i_clk,i_ce,u_a[(2*LUTB-1):LUTB], u_b, pr_b); |
|
initial r_s = 0; |
initial r_a[0] = 0; |
initial r_b[0] = 0; |
always @(posedge i_clk) |
if (i_ce) r_a[0] <= u_a[(IW-1):(2*LUTB)]; |
always @(posedge i_clk) |
132,13 → 150,18
if (i_ce) r_b[0] <= u_b; |
always @(posedge i_clk) |
if (i_ce) r_s <= { r_s[(TLEN-2):0], sgn }; |
|
initial acc[0] = 0; |
always @(posedge i_clk) // One clk after p[0],p[1] become valid |
if (i_ce) acc[0] <= { {(IW-LUTB){1'b0}}, pr_a} |
+{ {(IW-(2*LUTB)){1'b0}}, pr_b, {(LUTB){1'b0}} }; |
if (i_ce) acc[0] <= { {(IW-LUTB){1'b0}}, pr_a} |
+{ {(IW-(2*LUTB)){1'b0}}, pr_b, {(LUTB){1'b0}} }; |
|
generate // Keep track of intermediate values, before multiplying them |
if (TLEN > 3) for(k=0; k<TLEN-3; k=k+1) |
begin : gencopies |
begin : GENCOPIES |
|
initial r_a[k+1] = 0; |
initial r_b[k+1] = 0; |
always @(posedge i_clk) |
if (i_ce) |
begin |
150,23 → 173,27
|
generate // The actual multiply and accumulate stage |
if (TLEN > 2) for(k=0; k<TLEN-2; k=k+1) |
begin : genstages |
begin : GENSTAGES |
wire [(BW+LUTB-1):0] genp; |
|
// First, the multiply: 2-bits times BW bits |
wire [(BW+LUTB-1):0] genp; |
bimpy #(BW) genmpy(i_clk,i_ce,r_a[k][(LUTB-1):0],r_b[k], genp); |
|
// Then the accumulate step -- on the next clock |
initial acc[k+1] = 0; |
always @(posedge i_clk) |
if (i_ce) |
acc[k+1] <= acc[k] + {{(IW-LUTB*(k+3)){1'b0}}, |
genp, {(LUTB*(k+2)){1'b0}} }; |
if (i_ce) |
acc[k+1] <= acc[k] + {{(IW-LUTB*(k+3)){1'b0}}, |
genp, {(LUTB*(k+2)){1'b0}} }; |
end endgenerate |
|
wire [(IW+BW-1):0] w_r; |
assign w_r = (r_s[TLEN-1]) ? (-acc[TLEN-2]) : acc[TLEN-2]; |
|
initial o_r = 0; |
always @(posedge i_clk) |
if (i_ce) |
o_r <= w_r[(AW+BW-1):0]; |
if (i_ce) |
o_r <= w_r[(AW+BW-1):0]; |
|
generate if (IW > AW) |
begin : VUNUSED |
176,4 → 203,231
// verilator lint_on UNUSED |
end endgenerate |
|
`ifdef FORMAL |
reg f_past_valid; |
initial f_past_valid = 1'b0; |
always @(posedge i_clk) |
f_past_valid <= 1'b1; |
|
`define ASSERT assert |
`ifdef LONGBIMPY |
|
always @(posedge i_clk) |
if (!$past(i_ce)) |
assume(i_ce); |
|
`endif |
|
reg [AW-1:0] f_past_a [0:TLEN]; |
reg [BW-1:0] f_past_b [0:TLEN]; |
reg [TLEN+1:0] f_sgn_a, f_sgn_b; |
|
initial f_past_a[0] = 0; |
initial f_past_b[0] = 0; |
initial f_sgn_a = 0; |
initial f_sgn_b = 0; |
always @(posedge i_clk) |
if (i_ce) |
begin |
f_past_a[0] <= u_a; |
f_past_b[0] <= u_b; |
f_sgn_a[0] <= i_a[AW-1]; |
f_sgn_b[0] <= i_b[BW-1]; |
end |
|
generate for(k=0; k<TLEN; k=k+1) |
begin |
initial f_past_a[k+1] = 0; |
initial f_past_b[k+1] = 0; |
initial f_sgn_a[k+1] = 0; |
initial f_sgn_b[k+1] = 0; |
always @(posedge i_clk) |
if (i_ce) |
begin |
f_past_a[k+1] <= f_past_a[k]; |
f_past_b[k+1] <= f_past_b[k]; |
|
f_sgn_a[k+1] <= f_sgn_a[k]; |
f_sgn_b[k+1] <= f_sgn_b[k]; |
end |
end endgenerate |
|
always @(posedge i_clk) |
if (i_ce) |
begin |
f_sgn_a[TLEN+1] <= f_sgn_a[TLEN]; |
f_sgn_b[TLEN+1] <= f_sgn_b[TLEN]; |
end |
|
always @(posedge i_clk) |
begin |
assert(sgn == (f_sgn_a[0] ^ f_sgn_b[0])); |
assert(r_s[TLEN-1:0] == (f_sgn_a[TLEN:1] ^ f_sgn_b[TLEN:1])); |
assert(r_s[TLEN-1:0] == (f_sgn_a[TLEN:1] ^ f_sgn_b[TLEN:1])); |
end |
|
always @(posedge i_clk) |
if ((f_past_valid)&&($past(i_ce))) |
begin |
if ($past(i_a)==0) |
`ASSERT(u_a == 0); |
else if ($past(i_a[AW-1]) == 1'b0) |
`ASSERT(u_a == $past(i_a)); |
|
if ($past(i_b)==0) |
`ASSERT(u_b == 0); |
else if ($past(i_b[BW-1]) == 1'b0) |
`ASSERT(u_b == $past(i_b)); |
end |
|
generate // Keep track of intermediate values, before multiplying them |
if (TLEN > 3) for(k=0; k<TLEN-3; k=k+1) |
begin : ASSERT_GENCOPY |
always @(posedge i_clk) |
if (i_ce) |
begin |
if (f_past_a[k]==0) |
`ASSERT(r_a[k] == 0); |
else if (f_past_a[k]==1) |
`ASSERT(r_a[k] == 0); |
`ASSERT(r_b[k] == f_past_b[k]); |
end |
end endgenerate |
|
generate // The actual multiply and accumulate stage |
if (TLEN > 2) for(k=0; k<TLEN-2; k=k+1) |
begin : ASSERT_GENSTAGE |
always @(posedge i_clk) |
if ((f_past_valid)&&($past(i_ce))) |
begin |
if (f_past_a[k+1]==0) |
`ASSERT(acc[k] == 0); |
if (f_past_a[k+1]==1) |
`ASSERT(acc[k] == f_past_b[k+1]); |
if (f_past_b[k+1]==0) |
`ASSERT(acc[k] == 0); |
if (f_past_b[k+1]==1) |
begin |
`ASSERT(acc[k][(2*k)+3:0] |
== f_past_a[k+1][(2*k)+3:0]); |
`ASSERT(acc[k][(IW+BW-1):(2*k)+4] == 0); |
end |
end |
end endgenerate |
|
wire [AW-1:0] f_past_a_neg = - f_past_a[TLEN]; |
wire [BW-1:0] f_past_b_neg = - f_past_b[TLEN]; |
|
wire [AW-1:0] f_past_a_pos = f_past_a[TLEN][AW-1] |
? f_past_a_neg : f_past_a[TLEN]; |
wire [BW-1:0] f_past_b_pos = f_past_b[TLEN][BW-1] |
? f_past_b_neg : f_past_b[TLEN]; |
|
always @(posedge i_clk) |
if ((f_past_valid)&&($past(i_ce))) |
begin |
if ((f_past_a[TLEN]==0)||(f_past_b[TLEN]==0)) |
`ASSERT(o_r == 0); |
else if (f_past_a[TLEN]==1) |
begin |
if ((f_sgn_a[TLEN+1]^f_sgn_b[TLEN+1])==0) |
begin |
`ASSERT(o_r[BW-1:0] == f_past_b_pos[BW-1:0]); |
`ASSERT(o_r[AW+BW-1:BW] == 0); |
end else begin // if (f_sgn_b[TLEN+1]) begin |
`ASSERT(o_r[BW-1:0] == f_past_b_neg); |
`ASSERT(o_r[AW+BW-1:BW] |
== {(AW){f_past_b_neg[BW-1]}}); |
end |
end else if (f_past_b[TLEN]==1) |
begin |
if ((f_sgn_a[TLEN+1] ^ f_sgn_b[TLEN+1])==0) |
begin |
`ASSERT(o_r[AW-1:0] == f_past_a_pos[AW-1:0]); |
`ASSERT(o_r[AW+BW-1:AW] == 0); |
end else begin |
`ASSERT(o_r[AW-1:0] == f_past_a_neg); |
`ASSERT(o_r[AW+BW-1:AW] |
== {(BW){f_past_a_neg[AW-1]}}); |
end |
end else begin |
`ASSERT(o_r != 0); |
if (!o_r[AW+BW-1:0]) |
begin |
`ASSERT((o_r[AW-1:0] != f_past_a[TLEN][AW-1:0]) |
||(o_r[AW+BW-1:AW]!=0)); |
`ASSERT((o_r[BW-1:0] != f_past_b[TLEN][BW-1:0]) |
||(o_r[AW+BW-1:BW]!=0)); |
end else begin |
`ASSERT((o_r[AW-1:0] != f_past_a_neg[AW-1:0]) |
||(! (&o_r[AW+BW-1:AW]))); |
`ASSERT((o_r[BW-1:0] != f_past_b_neg[BW-1:0]) |
||(! (&o_r[AW+BW-1:BW]!=0))); |
end |
end |
end |
|
generate if (IAW <= IBW) |
begin : NO_PARAM_CHANGE |
assign f_past_a_unsorted = (!f_sgn_a[TLEN+1]) |
? f_past_a[TLEN] : f_past_a_neg; |
assign f_past_b_unsorted = (!f_sgn_b[TLEN+1]) |
? f_past_b[TLEN] : f_past_b_neg; |
end else begin : SWAP_PARAMETERS |
assign f_past_a_unsorted = (!f_sgn_b[TLEN+1]) |
? f_past_b[TLEN] : f_past_b_neg; |
assign f_past_b_unsorted = (!f_sgn_a[TLEN+1]) |
? f_past_a[TLEN] : f_past_a_neg; |
end endgenerate |
`ifdef BUTTERFLY |
// The following properties artificially restrict the inputs |
// to this long binary multiplier to only those values whose |
// absolute value is 0..7. It is used by the formal proof of |
// the BUTTERFLY to artificially limit the scope of the proof. |
// By the time the butterfly sees this code, it will be known |
// that the long binary multiply works. At issue will no longer |
// be whether or not this works, but rather whether it works in |
// context. For that purpose, we'll need to check timing, not |
// results. Checking against inputs of +/- 1 and 0 are perfect |
// for that task. The below assumptions (yes they are assumptions |
// just go a little bit further. |
// |
// THEREFORE, THESE PROPERTIES ARE NOT NECESSARY TO PROVING THAT |
// THIS MODULE WORKS, AND THEY WILL INTERFERE WITH THAT PROOF. |
// |
// This just limits the proof for the butterfly, the parent. |
// module that calls this one |
// |
// Start by assuming that all inputs have an absolute value less |
// than eight. |
always @(*) |
assume(u_a[AW-1:3] == 0); |
always @(*) |
assume(u_b[BW-1:3] == 0); |
|
// If the inputs have these properties, then so too do many of |
// our internal values. ASSERT therefore that we never get out |
// of bounds |
generate for(k=0; k<TLEN; k=k+1) |
begin |
always @(*) |
begin |
assert(f_past_a[k][AW-1:3] == 0); |
assert(f_past_b[k][BW-1:3] == 0); |
end |
end endgenerate |
|
generate for(k=0; k<TLEN-1; k=k+1) |
begin |
always @(*) |
assert(acc[k][IW+BW-1:6] == 0); |
end endgenerate |
|
generate for(k=0; k<TLEN-2; k=k+1) |
begin |
always @(*) |
assert(r_b[k][BW-1:3] == 0); |
end endgenerate |
`endif // BUTTERFLY |
`endif // FORMAL |
endmodule |
/rtl/qtrstage.v
9,7 → 9,33
// so that all of the multiplies are accomplished by additions and |
// multiplexers only. |
// |
// Operation: |
// The operation of this stage is identical to the regular stages of |
// the FFT (see them for details), with one additional and critical |
// difference: this stage doesn't require any hardware multiplication. |
// The multiplies within it may all be accomplished using additions and |
// subtractions. |
// |
// Let's see how this is done. Given x[n] and x[n+2], cause thats the |
// stage we are working on, with i_sync true for x[0] being input, |
// produce the output: |
// |
// y[n ] = x[n] + x[n+2] |
// y[n+2] = (x[n] - x[n+2]) * e^{-j2pi n/2} (forward transform) |
// = (x[n] - x[n+2]) * -j^n |
// |
// y[n].r = x[n].r + x[n+2].r (This is the easy part) |
// y[n].i = x[n].i + x[n+2].i |
// |
// y[2].r = x[0].r - x[2].r |
// y[2].i = x[0].i - x[2].i |
// |
// y[3].r = (x[1].i - x[3].i) (forward transform) |
// y[3].i = - (x[1].r - x[3].r) |
// |
// y[3].r = - (x[1].i - x[3].i) (inverse transform) |
// y[3].i = (x[1].r - x[3].r) (INVERSE = 1) |
// |
// Creator: Dan Gisselquist, Ph.D. |
// Gisselquist Technology, LLC |
// |
17,23 → 43,25
// |
// Copyright (C) 2015-2018, Gisselquist Technology, LLC |
// |
// This program is free software (firmware): you can redistribute it and/or |
// modify it under the terms of the GNU General Public License as published |
// by the Free Software Foundation, either version 3 of the License, or (at |
// your option) any later version. |
// This file is part of the general purpose pipelined FFT project. |
// |
// This program is distributed in the hope that it will be useful, but WITHOUT |
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or |
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
// for more details. |
// The pipelined FFT project is free software (firmware): you can redistribute |
// it and/or modify it under the terms of the GNU Lesser General Public License |
// as published by the Free Software Foundation, either version 3 of the |
// License, or (at your option) any later version. |
// |
// You should have received a copy of the GNU General Public License along |
// with this program. (It's in the $(ROOT)/doc directory, run make with no |
// target there if the PDF file isn't present.) If not, see |
// The pipelined FFT project is distributed in the hope that it will be useful, |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
// MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser |
// General Public License for more details. |
// |
// You should have received a copy of the GNU Lesser General Public License |
// along with this program. (It's in the $(ROOT)/doc directory. Run make |
// with no target there if the PDF file isn't present.) If not, see |
// <http://www.gnu.org/licenses/> for a copy. |
// |
// License: GPL, v3, as defined and found on www.gnu.org, |
// http://www.gnu.org/licenses/gpl.html |
// License: LGPL, v3, as defined and found on www.gnu.org, |
// http://www.gnu.org/licenses/lgpl.html |
// |
// |
//////////////////////////////////////////////////////////////////////////////// |
43,19 → 71,16
// |
module qtrstage(i_clk, i_reset, i_ce, i_sync, i_data, o_data, o_sync); |
parameter IWIDTH=16, OWIDTH=IWIDTH+1; |
// Parameters specific to the core that should be changed when this |
// core is built ... Note that the minimum LGSPAN is 2. Smaller |
// spans must use the fftdoubles stage. |
parameter LGWIDTH=8, ODD=0, INVERSE=0,SHIFT=0; |
input i_clk, i_reset, i_ce, i_sync; |
input [(2*IWIDTH-1):0] i_data; |
parameter LGWIDTH=8, INVERSE=0,SHIFT=0; |
input wire i_clk, i_reset, i_ce, i_sync; |
input wire [(2*IWIDTH-1):0] i_data; |
output reg [(2*OWIDTH-1):0] o_data; |
output reg o_sync; |
|
reg wait_for_sync; |
reg [3:0] pipeline; |
reg [2:0] pipeline; |
|
reg [(IWIDTH):0] sum_r, sum_i, diff_r, diff_i; |
reg signed [(IWIDTH):0] sum_r, sum_i, diff_r, diff_i; |
|
reg [(2*OWIDTH-1):0] ob_a; |
wire [(2*OWIDTH-1):0] ob_b; |
63,20 → 88,23
assign ob_b = { ob_b_r, ob_b_i }; |
|
reg [(LGWIDTH-1):0] iaddr; |
reg [(2*IWIDTH-1):0] imem; |
reg [(2*IWIDTH-1):0] imem [0:1]; |
|
wire signed [(IWIDTH-1):0] imem_r, imem_i; |
assign imem_r = imem[(2*IWIDTH-1):(IWIDTH)]; |
assign imem_i = imem[(IWIDTH-1):0]; |
assign imem_r = imem[1][(2*IWIDTH-1):(IWIDTH)]; |
assign imem_i = imem[1][(IWIDTH-1):0]; |
|
wire signed [(IWIDTH-1):0] i_data_r, i_data_i; |
assign i_data_r = i_data[(2*IWIDTH-1):(IWIDTH)]; |
assign i_data_i = i_data[(IWIDTH-1):0]; |
|
reg [(2*OWIDTH-1):0] omem; |
reg [(2*OWIDTH-1):0] omem [0:1]; |
|
wire signed [(OWIDTH-1):0] rnd_sum_r, rnd_sum_i, rnd_diff_r, rnd_diff_i, |
n_rnd_diff_r, n_rnd_diff_i; |
// |
// Round our output values down to OWIDTH bits |
// |
wire signed [(OWIDTH-1):0] rnd_sum_r, rnd_sum_i, |
rnd_diff_r, rnd_diff_i, n_rnd_diff_r, n_rnd_diff_i; |
convround #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_sum_r(i_clk, i_ce, |
sum_r, rnd_sum_r); |
|
100,28 → 128,31
iaddr <= 0; |
end else if ((i_ce)&&((!wait_for_sync)||(i_sync))) |
begin |
iaddr <= iaddr + { {(LGWIDTH-1){1'b0}}, 1'b1 }; |
iaddr <= iaddr + 1'b1; |
wait_for_sync <= 1'b0; |
end |
|
always @(posedge i_clk) |
if (i_ce) |
imem <= i_data; |
begin |
imem[0] <= i_data; |
imem[1] <= imem[0]; |
end |
|
|
// Note that we don't check on wait_for_sync or i_sync here. |
// Why not? Because iaddr will always be zero until after the |
// first i_ce, so we are safe. |
initial pipeline = 4'h0; |
initial pipeline = 3'h0; |
always @(posedge i_clk) |
if (i_reset) |
pipeline <= 4'h0; |
pipeline <= 3'h0; |
else if (i_ce) // is our pipeline process full? Which stages? |
pipeline <= { pipeline[2:0], iaddr[0] }; |
pipeline <= { pipeline[1:0], iaddr[1] }; |
|
// This is the pipeline[-1] stage, pipeline[0] will be set next. |
always @(posedge i_clk) |
if ((i_ce)&&(iaddr[0])) |
if ((i_ce)&&(iaddr[1])) |
begin |
sum_r <= imem_r + i_data_r; |
sum_i <= imem_i + i_data_i; |
140,7 → 171,7
begin |
ob_a <= { rnd_sum_r, rnd_sum_i }; |
// on Even, W = e^{-j2pi 1/4 0} = 1 |
if (ODD == 0) |
if (!iaddr[0]) |
begin |
ob_b_r <= rnd_diff_r; |
ob_b_i <= rnd_diff_i; |
158,21 → 189,172
always @(posedge i_clk) |
if (i_ce) |
begin // In sequence, clock = 3 |
if (pipeline[3]) |
begin |
omem <= ob_b; |
omem[0] <= ob_b; |
omem[1] <= omem[0]; |
if (pipeline[2]) |
o_data <= ob_a; |
end else |
o_data <= omem; |
else |
o_data <= omem[1]; |
end |
|
// Don't forget in the sync check that we are running |
// at two clocks per sample. Thus we need to |
// produce a sync every 2^(LGWIDTH-1) clocks. |
initial o_sync = 1'b0; |
always @(posedge i_clk) |
if (i_reset) |
o_sync <= 1'b0; |
else if (i_ce) |
o_sync <= &(~iaddr[(LGWIDTH-2):3]) && (iaddr[2:0] == 3'b101); |
o_sync <= (iaddr[2:0] == 3'b101); |
|
`ifdef FORMAL |
reg f_past_valid; |
initial f_past_valid = 1'b0; |
always @(posedge i_clk) |
f_past_valid = 1'b1; |
|
`ifdef QTRSTAGE |
always @(posedge i_clk) |
assume((i_ce)||($past(i_ce))||($past(i_ce,2))); |
`endif |
|
// The below logic only works if the rounding stage does nothing |
initial assert(IWIDTH+1 == OWIDTH); |
|
reg signed [IWIDTH-1:0] f_piped_real [0:7]; |
reg signed [IWIDTH-1:0] f_piped_imag [0:7]; |
|
always @(posedge i_clk) |
if (i_ce) |
begin |
f_piped_real[0] <= i_data[2*IWIDTH-1:IWIDTH]; |
f_piped_imag[0] <= i_data[ IWIDTH-1:0]; |
|
f_piped_real[1] <= f_piped_real[0]; |
f_piped_imag[1] <= f_piped_imag[0]; |
|
f_piped_real[2] <= f_piped_real[1]; |
f_piped_imag[2] <= f_piped_imag[1]; |
|
f_piped_real[3] <= f_piped_real[2]; |
f_piped_imag[3] <= f_piped_imag[2]; |
|
f_piped_real[4] <= f_piped_real[3]; |
f_piped_imag[4] <= f_piped_imag[3]; |
|
f_piped_real[5] <= f_piped_real[4]; |
f_piped_imag[5] <= f_piped_imag[4]; |
|
f_piped_real[6] <= f_piped_real[5]; |
f_piped_imag[6] <= f_piped_imag[5]; |
|
f_piped_real[7] <= f_piped_real[6]; |
f_piped_imag[7] <= f_piped_imag[6]; |
end |
|
reg f_rsyncd; |
wire f_syncd; |
|
initial f_rsyncd = 0; |
always @(posedge i_clk) |
if(i_reset) |
f_rsyncd <= 1'b0; |
else if (!f_rsyncd) |
f_rsyncd <= (o_sync); |
assign f_syncd = (f_rsyncd)||(o_sync); |
|
reg [1:0] f_state; |
|
|
initial f_state = 0; |
always @(posedge i_clk) |
if (i_reset) |
f_state <= 0; |
else if ((i_ce)&&((!wait_for_sync)||(i_sync))) |
f_state <= f_state + 1; |
|
always @(*) |
if (f_state != 0) |
assume(!i_sync); |
|
always @(posedge i_clk) |
assert(f_state[1:0] == iaddr[1:0]); |
|
wire signed [2*IWIDTH-1:0] f_i_real, f_i_imag; |
assign f_i_real = i_data[2*IWIDTH-1:IWIDTH]; |
assign f_i_imag = i_data[ IWIDTH-1:0]; |
|
wire signed [OWIDTH-1:0] f_o_real, f_o_imag; |
assign f_o_real = o_data[2*OWIDTH-1:OWIDTH]; |
assign f_o_imag = o_data[ OWIDTH-1:0]; |
|
always @(posedge i_clk) |
if (f_state == 2'b11) |
begin |
assume(f_piped_real[0] != 3'sb100); |
assume(f_piped_real[2] != 3'sb100); |
assert(sum_r == f_piped_real[2] + f_piped_real[0]); |
assert(sum_i == f_piped_imag[2] + f_piped_imag[0]); |
|
assert(diff_r == f_piped_real[2] - f_piped_real[0]); |
assert(diff_i == f_piped_imag[2] - f_piped_imag[0]); |
end |
|
always @(posedge i_clk) |
if ((f_state == 2'b00)&&((f_syncd)||(iaddr >= 4))) |
begin |
assert(rnd_sum_r == f_piped_real[3]+f_piped_real[1]); |
assert(rnd_sum_i == f_piped_imag[3]+f_piped_imag[1]); |
assert(rnd_diff_r == f_piped_real[3]-f_piped_real[1]); |
assert(rnd_diff_i == f_piped_imag[3]-f_piped_imag[1]); |
end |
|
always @(posedge i_clk) |
if ((f_state == 2'b10)&&(f_syncd)) |
begin |
// assert(o_sync); |
assert(f_o_real == f_piped_real[5] + f_piped_real[3]); |
assert(f_o_imag == f_piped_imag[5] + f_piped_imag[3]); |
end |
|
always @(posedge i_clk) |
if ((f_state == 2'b11)&&(f_syncd)) |
begin |
assert(!o_sync); |
assert(f_o_real == f_piped_real[5] + f_piped_real[3]); |
assert(f_o_imag == f_piped_imag[5] + f_piped_imag[3]); |
end |
|
always @(posedge i_clk) |
if ((f_state == 2'b00)&&(f_syncd)) |
begin |
assert(!o_sync); |
assert(f_o_real == f_piped_real[7] - f_piped_real[5]); |
assert(f_o_imag == f_piped_imag[7] - f_piped_imag[5]); |
end |
|
always @(*) |
if ((iaddr[2:0] == 0)&&(!wait_for_sync)) |
assume(i_sync); |
|
always @(*) |
if (wait_for_sync) |
assert((iaddr == 0)&&(f_state == 2'b00)&&(!o_sync)&&(!f_rsyncd)); |
|
always @(posedge i_clk) |
if ((f_past_valid)&&($past(i_ce))&&($past(i_sync))&&(!$past(i_reset))) |
assert(!wait_for_sync); |
|
always @(posedge i_clk) |
if ((f_state == 2'b01)&&(f_syncd)) |
begin |
assert(!o_sync); |
if (INVERSE) |
begin |
assert(f_o_real == -f_piped_imag[7]+f_piped_imag[5]); |
assert(f_o_imag == f_piped_real[7]-f_piped_real[5]); |
end else begin |
assert(f_o_real == f_piped_imag[7]-f_piped_imag[5]); |
assert(f_o_imag == -f_piped_real[7]+f_piped_real[5]); |
end |
end |
|
`endif |
endmodule |
/rtl/windowfn.v
0,0 → 1,679
//////////////////////////////////////////////////////////////////////////////// |
// |
// Filename: windowfn.v |
// |
// Project: A General Purpose Pipelined FFT Implementation |
// |
// Purpose: Apply a window function to incoming real data points, so as |
// to create an outgoing stream of data samples that can be used |
// in an FFT construct using 50% overlap. The overlap, coupled with the |
// FFT's requirements, can make for somewhat of a problem. Hence, there |
// are two 'ce' signals coming into the core. A primary ce signal when |
// new data is ready, and an alternate that must take place between |
// primary signals. This allows the second/alternate CE signal to be |
// appropriately spaced between the primary CE signals so that the |
// outgoing signals to the FFT will still meet separation |
// requirements--whatever they would be for the application. |
// |
// For this module, the window size is the FFT length. |
// |
// Ports: |
// i_clk, i_reset Should be self explanatory. The reset is assumed to |
// be synchronous. |
// |
// i_tap_wr, i_tap For use when OPT_FIXED_TAPS is zero, i_tap_wr signals |
// that a "tap" or "coefficient" of the filter should be |
// written. When i_tap_wr is high, i_tap is taken to be |
// a coefficient to the core. There's an internal address |
// counter, so no address need be given. However, the counter is |
// reset on an i_reset signal. |
// |
// i_ce, i_alt_ce As discussed above, these signals need to alternate |
// back and forth. Following a reset, the first signal coming in |
// should be an i_ce signal. |
// |
// i_sample The incoming sample data, valid any time i_ce is true, |
// and accepted into the core on that clock tick. |
// |
// o_ce True when the core has a valid output |
// o_sample The output calculated by the core, ready to pass to |
// the FFT |
// o_frame True on the first sample of any frame. Following a |
// reset, o_ce will remain false until o_frame is also |
// true with it. From then on out, o_frame will be true once |
// every FFT length. |
// |
// For a timing/signaling diagram, please feel free to run the formal |
// tools in cover mode for this module, 'sby -f windowfn.sby cover', |
// and then check out the generated trace. |
// |
// |
// Creator: Dan Gisselquist, Ph.D. |
// Gisselquist Technology, LLC |
// |
//////////////////////////////////////////////////////////////////////////////// |
// |
// Copyright (C) 2018, Gisselquist Technology, LLC |
// |
// This file is part of the general purpose pipelined FFT project. |
// |
// The pipelined FFT project is free software (firmware): you can redistribute |
// it and/or modify it under the terms of the GNU Lesser General Public License |
// as published by the Free Software Foundation, either version 3 of the |
// License, or (at your option) any later version. |
// |
// The pipelined FFT project is distributed in the hope that it will be useful, |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
// MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser |
// General Public License for more details. |
// |
// You should have received a copy of the GNU Lesser General Public License |
// along with this program. (It's in the $(ROOT)/doc directory. Run make |
// with no target there if the PDF file isn't present.) If not, see |
// <http://www.gnu.org/licenses/> for a copy. |
// |
// License: LGPL, v3, as defined and found on www.gnu.org, |
// http://www.gnu.org/licenses/lgpl.html |
// |
// |
//////////////////////////////////////////////////////////////////////////////// |
// |
// |
`default_nettype none |
// |
module windowfn(i_clk, i_reset, i_tap_wr, i_tap, |
i_ce, i_sample, i_alt_ce, |
o_frame, o_ce, o_sample); |
parameter IW=16, OW=16, TW=16, LGNFFT = 4; |
parameter [0:0] OPT_FIXED_TAPS = 1'b0; |
parameter INITIAL_COEFFS = ""; |
// |
localparam AW=IW+TW; |
// |
input wire i_clk, i_reset; |
// |
input wire i_tap_wr; |
input wire [(TW-1):0] i_tap; |
// |
input wire i_ce; |
input wire [(IW-1):0] i_sample; |
input wire i_alt_ce; |
// |
output reg o_frame, o_ce; |
output reg [(OW-1):0] o_sample; |
|
|
reg [(TW-1):0] cmem [0:(1<<LGNFFT)-1]; |
reg [(TW-1):0] dmem [0:(1<<LGNFFT)-1]; |
|
// |
// LOAD THE TAPS |
// |
wire [LGNFFT-1:0] tapwidx; |
generate if (OPT_FIXED_TAPS) |
begin : SET_FIXED_TAPS |
|
initial $readmemh(INITIAL_COEFFS, cmem); |
|
assign tapwidx = 0; |
// Make Verilators -Wall happy |
// Verilator lint_off UNUSED |
wire [TW:0] ignored_inputs; |
assign ignored_inputs = { i_tap_wr, i_tap }; |
// Verilator lint_on UNUSED |
|
end else begin : DYNAMICALLY_SET_TAPS |
|
// Coef memory write index |
reg [(LGNFFT-1):0] r_tapwidx; |
|
initial r_tapwidx = 0; |
always @(posedge i_clk) |
if(i_reset) |
r_tapwidx <= 0; |
else if (i_tap_wr) |
r_tapwidx <= r_tapwidx + 1'b1; |
|
if (INITIAL_COEFFS != 0) |
initial $readmemh(INITIAL_COEFFS, cmem); |
always @(posedge i_clk) |
if (i_tap_wr) |
cmem[r_tapwidx] <= i_tap; |
|
assign tapwidx = r_tapwidx; |
end endgenerate |
|
|
reg [LGNFFT-1:0] dwidx, didx; |
reg [LGNFFT-1:0] tidx; |
reg top_of_block, first_block; |
reg [1:0] frame_count; |
reg p_ce, d_ce; |
reg signed [IW-1:0] data; |
reg signed [TW-1:0] tap; |
|
|
|
// |
// |
// Record the incoming data into a local memory |
// |
// |
|
// Notice how this data writing section is *independent* of the reset, |
// depending only upon new sample data. |
|
initial dwidx = 0; |
always @(posedge i_clk) |
if (i_reset) |
dwidx <= 0; |
else if (i_ce) |
dwidx <= dwidx + 1'b1; |
always @(posedge i_clk) |
if (i_ce) |
dmem[dwidx] <= i_sample; |
|
initial first_block = 1'b1; |
always @(posedge i_clk) |
if (i_reset) |
first_block <= 1'b1; |
else if ((i_alt_ce)&&(&tidx)&&(dwidx==0)) |
first_block <= 1'b0; |
|
|
// |
// |
// Keep track of the top of the block. The top of the block is the |
// first incoming data sample on an FFT or half FFT boundary. This |
// is where data processing starts from. |
// |
initial top_of_block = 1'b0; |
always @(posedge i_clk) |
if (i_reset) |
top_of_block <= 1'b0; |
else if (i_alt_ce) |
top_of_block <= (&tidx)&&((!first_block)||(dwidx==0)); |
else if (i_ce) |
top_of_block <= 1'b0; |
|
// |
// Data and coefficient memory indices. |
// |
initial didx = 0; |
always @(posedge i_clk) |
if (i_reset) |
didx <= 0; |
else if ((i_alt_ce)&&(dwidx[LGNFFT-2:0]==0)) |
begin |
didx[LGNFFT-2:0] <= 0; |
didx[LGNFFT-1] <= dwidx[LGNFFT-1]; |
end else if ((i_ce)||(i_alt_ce)) |
// Process the next point in this FFT |
didx <= didx + 1'b1; |
|
initial tidx = 0; |
always @(posedge i_clk) |
if (i_reset) |
tidx <= 0; |
else if ((i_alt_ce)&&(dwidx[LGNFFT-2:0]==0)) |
begin |
// // At the beginning of processing for a given FFT |
tidx <= 0; |
end else if ((i_ce)||(i_alt_ce)) |
// Process the next point in the window function |
tidx <= tidx + 1'b1; |
|
initial frame_count = 0; |
always @(posedge i_clk) |
if (i_reset) |
frame_count <= 0; |
else if ((i_ce)&&(top_of_block)&&(!first_block)) |
frame_count <= 3; |
else if (frame_count != 0) |
frame_count <= frame_count - 1'b1; |
|
initial o_frame = 1'b0; |
always @(posedge i_clk) |
if (i_reset) |
o_frame <= 1'b0; |
else if (frame_count == 2) |
o_frame <= 1'b1; |
else |
o_frame <= 1'b0; |
|
// |
// Following any initial i_ce, ... |
// d_ce: The data (and coefficient), read from memory,will be valid |
// p_ce: The produc of data and coefficient is valid |
// |
initial { p_ce, d_ce } = 2'h0; |
always @(posedge i_clk) |
if (i_reset) |
{ p_ce, d_ce } <= 2'h0; |
else |
{ p_ce, d_ce } <= { d_ce, (!first_block)&&((i_ce)||(i_alt_ce)) }; |
|
// Read the data sample point, and the filter coefficient, from block |
// RAM. Because this is block RAM, we have to be careful not to |
// do anything else here. |
initial data = 0; |
initial tap = 0; |
always @(posedge i_clk) |
begin |
data <= dmem[didx]; |
tap <= cmem[tidx]; |
end |
|
// |
// Multiply the two values together |
reg signed [IW+TW-1:0] product; |
`ifdef FORMAL |
// We'll implement an abstract multiply below--just to make sure the |
// timing is right. |
`else |
always @(posedge i_clk) |
product <= data * tap; |
`endif |
|
// |
// Output CE |
// |
initial o_ce = 0; |
always @(posedge i_clk) |
if (i_reset) |
o_ce <= 0; |
else |
o_ce <= p_ce; |
|
generate if (OW == AW) |
begin : BIT_ADJUSTMENT_NONE |
|
initial o_sample = 0; |
always @(posedge i_clk) |
if (i_reset) |
o_sample <= 0; |
else if (p_ce) |
o_sample <= product; |
|
end else if (OW < AW) |
begin : BIT_ADJUSTMENT_ROUNDING |
wire [AW-1:0] rounded; |
|
assign rounded = product + { {(OW){1'b0}}, product[AW-OW], |
{(AW-OW-1){!product[AW-OW]}} }; |
|
initial o_sample = 0; |
always @(posedge i_clk) |
if (i_reset) |
o_sample <= 0; |
else if (p_ce) |
o_sample <= rounded[(AW-1):(AW-OW)]; |
|
// Make Verilator happy |
// verilator lint_off UNUSED |
wire [AW-OW-1:0] unused_rounding_bits; |
assign unused_rounding_bits = rounded[AW-OW-1:0]; |
// verilator lint_on UNUSED |
|
end else // if (OW > AW) |
begin : BIT_ADJUSTMENT_EXTENDING |
|
always @(posedge i_clk) |
if (i_reset) |
o_sample <= 0; |
else if (p_ce) |
o_sample <= { product, {(OW-AW){1'b0}} }; |
|
end endgenerate |
|
// Make Verilator happy |
// verilator lint_off UNUSED |
wire [LGNFFT-1:0] unused; |
assign unused = tapwidx; |
// verilator lint_on UNUSED |
|
`ifdef FORMAL |
reg f_past_valid; |
initial f_past_valid = 1'b0; |
always @(posedge i_clk) |
f_past_valid <= 1'b1; |
|
// Keep track of the phase of this operation |
reg [LGNFFT:0] f_phase; |
|
initial f_phase = 0; |
always @(posedge i_clk) |
if (i_reset) |
begin |
f_phase <= 0; |
end else if ((i_ce)&&(top_of_block)) |
f_phase[LGNFFT-1:0] <= 1; |
else if ((i_ce)||(i_alt_ce)) |
f_phase <= f_phase + 1; |
|
/////// |
// |
// Assumptions about the input |
always @(posedge i_clk) |
if ((f_past_valid)&&(!$past(i_reset))&&(!$past(i_ce))) |
restrict($stable(i_sample)); |
|
always @(*) |
if (tapwidx != 0) |
assume(!i_ce); |
|
always @(posedge i_clk) |
if (f_phase[0]) |
assume(!i_ce); |
|
always @(posedge i_clk) |
if (!f_phase[0]) |
assume(!i_alt_ce); |
|
always @(*) |
if (i_tap_wr) |
assume((!i_ce)&&(!i_alt_ce)); |
|
always @(*) |
assume(!i_tap_wr); |
|
always @(*) |
if ((i_ce)&&(top_of_block)) |
begin |
assert(dwidx[LGNFFT-2:0]==0); |
assert(didx[LGNFFT-2:0]==0); |
end else if (dwidx[LGNFFT-2:0]!=0) |
assert(!top_of_block); |
|
always @(*) |
if ((dwidx[LGNFFT-2:0]==0)&&(!didx[0])) |
assert(top_of_block||f_first_block||first_block); |
else |
assert(!top_of_block); |
|
always @(posedge i_clk) |
if ((f_past_valid)&&($past(first_block))&&(!first_block)) |
assert(top_of_block); |
|
/* |
always @(posedge i_clk) |
if (f_past_valid) |
begin |
if ($past(i_reset)) |
assert(!top_of_block); |
else if ($past(i_ce)) |
assert(top_of_block == ((!first_block)&&(dwidx[LGNFFT-2:0]==0))); |
else |
|
assert(top_of_block == ((!first_block)&&(&dwidx[LGNFFT-2:1]))); |
end |
*/ |
|
always @(posedge i_clk) |
if ((f_past_valid)&&($past(first_block))&&(!$past(first_block))) |
assert(top_of_block); |
|
////////////////////////////////////////////////////////////////// |
// |
// Assertions about our outputs |
// |
///////////////////////// |
// |
// |
// always @(*) |
// if (o_frame) |
// assert(o_ce); |
always @(*) |
if (first_block) |
assert(!o_ce); |
|
reg f_waiting_for_first_frame; |
|
initial f_waiting_for_first_frame = 1; |
always @(posedge i_clk) |
if ((i_reset)||(first_block)) |
f_waiting_for_first_frame <= 1'b1; |
else if (o_ce) |
f_waiting_for_first_frame <= 1'b0; |
|
always @(*) |
if ((f_waiting_for_first_frame)&&(o_ce)) |
assert(o_frame); |
always @(*) |
if (f_phase == 0) |
assert((top_of_block)||(first_block)); |
|
always @(*) |
if (f_waiting_for_first_frame) |
begin |
if (f_phase[LGNFFT-1:0] > 3) |
begin |
assert((first_block)&&(!o_frame)); |
assert({o_ce, p_ce, d_ce} == 0); |
assert(frame_count == 0); |
end else if (f_phase == 0) |
begin |
assert((!o_frame)&&({o_ce, p_ce, d_ce} == 0)); |
assert(frame_count == 0); |
end else if ((f_phase > 0)&&(!first_block)) |
assert(|{o_ce, p_ce, d_ce }); |
else if ((frame_count != 0)||(first_block)) |
assert(!o_frame); |
else |
assert(o_frame); |
end |
|
always @(posedge i_clk) |
if ((f_past_valid)&&(!$past(i_reset))) |
assert(o_ce || $stable(o_sample)); |
|
/* |
always @(*) |
assert(m_ce == (f_phase == 0)); |
always @(*) |
assert(d_ce == (f_phase == 1)); |
always @(*) |
assert(p_ce == (f_phase == 2)); |
always @(*) |
assert(o_ce == (f_phase == 3)); |
always @(*) |
assert(o_frame == (f_phase == 3)); |
*/ |
|
always @(*) |
assert(didx[LGNFFT-2:0] == tidx[LGNFFT-2:0]); |
|
always @(posedge i_clk) |
if ((f_past_valid)&&(!$past(i_reset)) |
&&($past(i_ce))&&($past(top_of_block))) |
assert(tidx == 1); |
|
|
wire [LGNFFT:0] f_phase_plus_one; |
always @(*) |
f_phase_plus_one = f_phase + 1; |
/* |
end else if ($past(i_ce)) |
begin |
// 1 0 // 0 0 Top of block 0 |
// 2 1 // 1 1 ALT 0 |
// 3 1 // 2 1 1 |
// 4 2 // 3 2 ALT 1 |
// 5 2 // 4 2 2 |
// 6 3 // 5 3 ALT 2 |
// 7 3 // 6 3 3 |
// 0 4 // 7 4 ALT 3 |
// 1 // 4 4 Top of block 0 |
// 2 // 5 5 0 |
// // 6 5 ALT 1 |
// // 7 6 1 |
// // 0 6 ALT 2 |
// // 1 7 2 |
// // 2 7 ALT |
// // 3 0 |
// // 0 0 ALT Top of block |
// // 1 1 |
// // 2 1 |
// // 3 2 |
// // 4 2 |
*/ |
always @(*) |
assert(f_phase_plus_one[LGNFFT:1] == dwidx[LGNFFT-1:0]); |
always @(*) |
assert(f_phase[0] == didx[0]); |
always @(*) |
if (f_phase[LGNFFT]) |
begin |
assert(f_phase[LGNFFT-1:0] == {!didx[LGNFFT-1],didx[LGNFFT-2:0]}); |
assert((dwidx[LGNFFT-1]==1) |
||(dwidx[LGNFFT-2:0]==0)); |
end else begin |
assert((dwidx[LGNFFT-1]==0) |
||((dwidx[LGNFFT-1])&&(dwidx[LGNFFT-2:0]==0)&&(&f_phase[LGNFFT-1:0]))); |
assert(f_phase[LGNFFT-1:0] == didx[LGNFFT-1:0]); |
end |
|
always @(*) |
assert(f_phase[LGNFFT-1:0] == tidx[LGNFFT-1:0]); |
always @(*) |
assert(f_phase[LGNFFT-2:0] == didx[LGNFFT-2:0]); |
wire [LGNFFT-1:0] f_diff_idx; |
assign f_diff_idx = didx - dwidx; |
always @(*) |
if (!first_block) |
assert(f_diff_idx < { 1'b1, {(LGNFFT-1){1'b0}} }); |
|
|
always @(*) |
if (top_of_block) |
assert(!first_block); |
|
reg f_first_block; |
initial f_first_block = 1'b1; |
always @(posedge i_clk) |
if (i_ce) |
f_first_block <= first_block; |
|
always @(*) |
if ((top_of_block)&&(i_ce)&&(!f_first_block)) |
assert(didx[LGNFFT-2:0] == dwidx[LGNFFT-2:0]); |
|
always @(*) |
if ((!f_first_block)&&(!first_block)&&(dwidx[LGNFFT-2:0]==0) |
&&(didx[LGNFFT-1:0]==0)) |
assert(top_of_block); |
|
//////////////////////////////////////////////////////////////////////////////// |
// |
// Abstract Multiply |
// |
//////////////////////////////////////////////////////////////////////////////// |
// Gin up a really quick abstract multiply for formal testing |
// only. always @(posedge i_clk) |
(* anyconst *) signed reg [IW+TW-1:0] pre_product; |
always @(posedge i_clk) |
if (data == 0) |
assume(pre_product == 0); |
always @(posedge i_clk) |
if (tap == 0) |
assume(pre_product == 0); |
always @(posedge i_clk) |
if (data == 1) |
assume(pre_product == tap); |
always @(posedge i_clk) |
if (tap == 1) |
assume(pre_product == data); |
always @(posedge i_clk) |
if ((i_ce)||(i_alt_ce)) |
product <= pre_product; |
|
//////////////////////////////////////////////////////////////////////////////// |
// |
// Arbitrary memory test |
// |
//////////////////////////////////////////////////////////////////////////////// |
(* anyconst *) reg [LGNFFT-1:0] f_addr; |
signed reg [TW-1:0] f_tap; |
signed reg [IW-1:0] f_value, f_tap; |
reg f_this_dce, f_this_pce, |
f_this_oce, f_this_tap; |
|
initial assume(f_tap == cmem[f_addr]); |
always @(*) |
assert(f_tap == cmem[f_addr]); |
always @(posedge i_clk) |
if ((i_tap_wr)&&(f_addr == tapwidx)) |
f_tap <= i_tap; |
|
initial f_value = 0; |
initial assume(dmem[f_addr] == f_value); |
always @(*) |
assert(f_value == dmem[f_addr]); |
always @(posedge i_clk) |
if ((i_ce)&&(dwidx == f_addr)) |
f_value <= i_sample; |
initial { f_this_oce, f_this_pce, f_this_dce } = 3'h0; |
always @(posedge i_clk) |
if ((i_reset)||(i_tap_wr)) |
{ f_this_oce, f_this_pce, f_this_dce } <= 3'h0; |
else |
{ f_this_oce, f_this_pce, f_this_dce } |
<= { f_this_pce, f_this_dce, |
(((i_ce)||(i_alt_ce)) |
&&(f_past_valid)&&(f_addr == didx)) }; |
initial f_this_tap = 0; |
always @(posedge i_clk) |
if (i_reset) |
f_this_tap <= 0; |
else if ((i_ce)||(i_alt_ce)) |
f_this_tap <= (f_past_valid)&&(f_addr == tidx); |
else |
f_this_tap <= 0; |
|
|
always @(posedge i_clk) |
if (f_this_tap) |
assert(tap == f_tap); |
|
always @(posedge i_clk) |
if ((f_past_valid)&&(f_this_dce)) |
assert(data == $past(f_value)); |
|
reg signed [IW-1:0] f_past_data; |
reg signed [TW-1:0] f_past_tap; |
|
always @(posedge i_clk) |
begin |
f_past_data <= data; |
f_past_tap <= tap; |
end |
|
always @(posedge i_clk) |
if ((f_past_valid)&&(f_this_pce)) |
begin |
if (f_past_tap == 0) |
assert(product == 0); |
if (f_past_data == 0) |
assert(product == 0); |
if (f_past_tap == 1) |
assert(product=={{(TW){f_past_data[IW-1]}},f_past_data}); |
if (f_past_data == 1) |
assert(product=={{{IW}{f_past_tap[TW-1]}},f_past_tap}); |
end |
|
|
//////////////////////////////////////////////////////////////////////////////// |
// |
// Cover tests |
// |
//////////////////////////////////////////////////////////////////////////////// |
reg f_second_frame; |
initial f_second_frame = 1'b0; |
always @(posedge i_clk) |
if ((o_ce)&&(o_frame)) |
f_second_frame <= 1'b1; |
|
always @(posedge i_clk) |
cover((o_ce)&&(o_frame)&&(f_second_frame)); |
`endif |
endmodule |
rtl
Property changes :
Added: svn:ignore
## -0,0 +1 ##
+obj_dir