URL https://opencores.org/ocsvn/dblclockfft/dblclockfft/trunk
Subversion Repositories dblclockfft

[/] [dblclockfft/] [trunk/] [rtl/] [longbimpy.v] - Rev 39

Compare with Previous | Blame | View Log
////////////////////////////////////////////////////////////////////////////////
//
// Filename: 	../rtl/longbimpy.v
//
// Project:	A General Purpose Pipelined FFT Implementation
//
// Purpose:	A portable shift and add multiply, built with the knowledge
//	of the existence of a six bit LUT and carry chain.  That knowledge
//	allows us to multiply two bits from one value at a time against all
//	of the bits of the other value.  This sub multiply is called the
//	bimpy.
//
//	For minimal processing delay, make the first parameter the one with
//	the least bits, so that AWIDTH <= BWIDTH.
//
//
//
// Creator:	Dan Gisselquist, Ph.D.
//		Gisselquist Technology, LLC
//
////////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2015-2018, Gisselquist Technology, LLC
//
// This file is part of the general purpose pipelined FFT project.
//
// The pipelined FFT project is free software (firmware): you can redistribute
// it and/or modify it under the terms of the GNU Lesser General Public License
// as published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// The pipelined FFT project is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
// General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this program.  (It's in the $(ROOT)/doc directory.  Run make
// with no target there if the PDF file isn't present.)  If not, see
// <http://www.gnu.org/licenses/> for a copy.
//
// License:	LGPL, v3, as defined and found on www.gnu.org,
//		http://www.gnu.org/licenses/lgpl.html
//
//
////////////////////////////////////////////////////////////////////////////////
//
//
`default_nettype	none
//
module	longbimpy(i_clk, i_ce, i_a_unsorted, i_b_unsorted, o_r
`ifdef	FORMAL
	, f_past_a_unsorted, f_past_b_unsorted
`endif
		);
	parameter	IAW=8,	// The width of i_a, min width is 5
			IBW=12,	// The width of i_b, can be anything
			// The following three parameters should not be changed
			// by any implementation, but are based upon hardware
			// and the above values:
			OW=IAW+IBW;	// The output width
	localparam	AW = (IAW<IBW) ? IAW : IBW,
			BW = (IAW<IBW) ? IBW : IAW,
			IW=(AW+1)&(-2),	// Internal width of A
			LUTB=2,	// How many bits we can multiply by at once
			TLEN=(AW+(LUTB-1))/LUTB; // Nmbr of rows in our tableau
	input	wire			i_clk, i_ce;
	input	wire	[(IAW-1):0]	i_a_unsorted;
	input	wire	[(IBW-1):0]	i_b_unsorted;
	output	reg	[(AW+BW-1):0]	o_r;
 
`ifdef	FORMAL
	output	wire	[(IAW-1):0]	f_past_a_unsorted;
	output	wire	[(IBW-1):0]	f_past_b_unsorted;
`endif
 
	//
	// Swap parameter order, so that AW <= BW -- for performance
	// reasons
	wire	[AW-1:0]	i_a;
	wire	[BW-1:0]	i_b;
	generate if (IAW <= IBW)
	begin : NO_PARAM_CHANGE
		assign i_a = i_a_unsorted;
		assign i_b = i_b_unsorted;
	end else begin : SWAP_PARAMETERS
		assign i_a = i_b_unsorted;
		assign i_b = i_a_unsorted;
	end endgenerate
 
	reg	[(IW-1):0]	u_a;
	reg	[(BW-1):0]	u_b;
	reg			sgn;
 
	reg	[(IW-1-2*(LUTB)):0]	r_a[0:(TLEN-3)];
	reg	[(BW-1):0]		r_b[0:(TLEN-3)];
	reg	[(TLEN-1):0]		r_s;
	reg	[(IW+BW-1):0]		acc[0:(TLEN-2)];
	genvar k;
 
	// First step:
	// Switch to unsigned arithmetic for our multiply, keeping track
	// of the along the way.  We'll then add the sign again later at
	// the end.
	//
	// If we were forced to stay within two's complement arithmetic,
	// taking the absolute value here would require an additional bit.
	// However, because our results are now unsigned, we can stay
	// within the number of bits given (for now).
	initial u_a = 0;
	generate if (IW > AW)
	begin : ABS_AND_ADD_BIT_TO_A
		always @(posedge i_clk)
			if (i_ce)
				u_a <= { 1'b0, (i_a[AW-1])?(-i_a):(i_a) };
	end else begin : ABS_A
		always @(posedge i_clk)
			if (i_ce)
				u_a <= (i_a[AW-1])?(-i_a):(i_a);
	end endgenerate
 
	initial sgn = 0;
	initial u_b = 0;
	always @(posedge i_clk)
	if (i_ce)
	begin : ABS_B
		u_b <= (i_b[BW-1])?(-i_b):(i_b);
		sgn <= i_a[AW-1] ^ i_b[BW-1];
	end
 
	wire	[(BW+LUTB-1):0]	pr_a, pr_b;
 
	//
	// Second step: First two 2xN products.
	//
	// Since we have no tableau of additions (yet), we can do both
	// of the first two rows at the same time and add them together.
	// For the next round, we'll then have a previous sum to accumulate
	// with new and subsequent product, and so only do one product at
	// a time can follow this--but the first clock can do two at a time.
	bimpy	#(BW) lmpy_0(i_clk,i_ce,u_a[(  LUTB-1):   0], u_b, pr_a);
	bimpy	#(BW) lmpy_1(i_clk,i_ce,u_a[(2*LUTB-1):LUTB], u_b, pr_b);
 
	initial r_s    = 0;
	initial r_a[0] = 0;
	initial r_b[0] = 0;
	always @(posedge i_clk)
		if (i_ce) r_a[0] <= u_a[(IW-1):(2*LUTB)];
	always @(posedge i_clk)
		if (i_ce) r_b[0] <= u_b;
	always @(posedge i_clk)
		if (i_ce) r_s <= { r_s[(TLEN-2):0], sgn };
 
	initial acc[0] = 0;
	always @(posedge i_clk) // One clk after p[0],p[1] become valid
	if (i_ce) acc[0] <= { {(IW-LUTB){1'b0}}, pr_a}
		  +{ {(IW-(2*LUTB)){1'b0}}, pr_b, {(LUTB){1'b0}} };
 
	generate // Keep track of intermediate values, before multiplying them
	if (TLEN > 3) for(k=0; k<TLEN-3; k=k+1)
	begin : GENCOPIES
 
		initial r_a[k+1] = 0;
		initial r_b[k+1] = 0;
		always @(posedge i_clk)
		if (i_ce)
		begin
			r_a[k+1] <= { {(LUTB){1'b0}},
				r_a[k][(IW-1-(2*LUTB)):LUTB] };
			r_b[k+1] <= r_b[k];
		end
	end endgenerate
 
	generate // The actual multiply and accumulate stage
	if (TLEN > 2) for(k=0; k<TLEN-2; k=k+1)
	begin : GENSTAGES
		wire	[(BW+LUTB-1):0] genp;
 
		// First, the multiply: 2-bits times BW bits
		bimpy #(BW) genmpy(i_clk,i_ce,r_a[k][(LUTB-1):0],r_b[k], genp);
 
		// Then the accumulate step -- on the next clock
		initial acc[k+1] = 0;
		always @(posedge i_clk)
		if (i_ce)
			acc[k+1] <= acc[k] + {{(IW-LUTB*(k+3)){1'b0}},
				genp, {(LUTB*(k+2)){1'b0}} };
	end endgenerate
 
	wire	[(IW+BW-1):0]	w_r;
	assign	w_r = (r_s[TLEN-1]) ? (-acc[TLEN-2]) : acc[TLEN-2];
 
	initial o_r = 0;
	always @(posedge i_clk)
	if (i_ce)
		o_r <= w_r[(AW+BW-1):0];
 
	generate if (IW > AW)
	begin : VUNUSED
		// verilator lint_off UNUSED
		wire	[(IW-AW)-1:0]	unused;
		assign	unused = w_r[(IW+BW-1):(AW+BW)];
		// verilator lint_on UNUSED
	end endgenerate
 
`ifdef	FORMAL
	reg	f_past_valid;
	initial	f_past_valid = 1'b0;
	always @(posedge i_clk)
		f_past_valid <= 1'b1;
 
`define	ASSERT	assert
`ifdef	LONGBIMPY
 
	always @(posedge i_clk)
	if (!$past(i_ce))
		assume(i_ce);
 
`endif
 
	reg	[AW-1:0]	f_past_a	[0:TLEN];
	reg	[BW-1:0]	f_past_b	[0:TLEN];
	reg	[TLEN+1:0]	f_sgn_a, f_sgn_b;
 
	initial	f_past_a[0] = 0;
	initial	f_past_b[0] = 0;
	initial	f_sgn_a = 0;
	initial	f_sgn_b = 0;
	always @(posedge i_clk)
	if (i_ce)
	begin
		f_past_a[0] <= u_a;
		f_past_b[0] <= u_b;
		f_sgn_a[0] <= i_a[AW-1];
		f_sgn_b[0] <= i_b[BW-1];
	end
 
	generate for(k=0; k<TLEN; k=k+1)
	begin
		initial	f_past_a[k+1] = 0;
		initial	f_past_b[k+1] = 0;
		initial	f_sgn_a[k+1] = 0;
		initial	f_sgn_b[k+1] = 0;
		always @(posedge i_clk)
		if (i_ce)
		begin
			f_past_a[k+1] <= f_past_a[k];
			f_past_b[k+1] <= f_past_b[k];
 
			f_sgn_a[k+1]  <= f_sgn_a[k];
			f_sgn_b[k+1]  <= f_sgn_b[k];
		end
	end endgenerate
 
	always @(posedge i_clk)
	if (i_ce)
	begin
		f_sgn_a[TLEN+1] <= f_sgn_a[TLEN];
		f_sgn_b[TLEN+1] <= f_sgn_b[TLEN];
	end
 
	always @(posedge i_clk)
	begin
		assert(sgn == (f_sgn_a[0] ^ f_sgn_b[0]));
		assert(r_s[TLEN-1:0] == (f_sgn_a[TLEN:1] ^ f_sgn_b[TLEN:1]));
		assert(r_s[TLEN-1:0] == (f_sgn_a[TLEN:1] ^ f_sgn_b[TLEN:1]));
	end
 
	always @(posedge i_clk)
	if ((f_past_valid)&&($past(i_ce)))
	begin
		if ($past(i_a)==0)
			`ASSERT(u_a == 0);
		else if ($past(i_a[AW-1]) == 1'b0)
			`ASSERT(u_a == $past(i_a));
 
		if ($past(i_b)==0)
			`ASSERT(u_b == 0);
		else if ($past(i_b[BW-1]) == 1'b0)
			`ASSERT(u_b == $past(i_b));
	end
 
	generate // Keep track of intermediate values, before multiplying them
	if (TLEN > 3) for(k=0; k<TLEN-3; k=k+1)
	begin : ASSERT_GENCOPY
		always @(posedge i_clk)
		if (i_ce)
		begin
			if (f_past_a[k]==0)
				`ASSERT(r_a[k] == 0);
			else if (f_past_a[k]==1)
				`ASSERT(r_a[k] == 0);
			`ASSERT(r_b[k] == f_past_b[k]);
		end
	end endgenerate
 
	generate // The actual multiply and accumulate stage
	if (TLEN > 2) for(k=0; k<TLEN-2; k=k+1)
	begin : ASSERT_GENSTAGE
		always @(posedge i_clk)
		if ((f_past_valid)&&($past(i_ce)))
		begin
			if (f_past_a[k+1]==0)
				`ASSERT(acc[k] == 0);
			if (f_past_a[k+1]==1)
				`ASSERT(acc[k] == f_past_b[k+1]);
			if (f_past_b[k+1]==0)
				`ASSERT(acc[k] == 0);
			if (f_past_b[k+1]==1)
			begin
				`ASSERT(acc[k][(2*k)+3:0]
						== f_past_a[k+1][(2*k)+3:0]);
				`ASSERT(acc[k][(IW+BW-1):(2*k)+4] == 0);
			end
		end
	end endgenerate
 
	wire	[AW-1:0]	f_past_a_neg = - f_past_a[TLEN];
	wire	[BW-1:0]	f_past_b_neg = - f_past_b[TLEN];
 
	wire	[AW-1:0]	f_past_a_pos = f_past_a[TLEN][AW-1]
					? f_past_a_neg : f_past_a[TLEN];
	wire	[BW-1:0]	f_past_b_pos = f_past_b[TLEN][BW-1]
					? f_past_b_neg : f_past_b[TLEN];
 
	always @(posedge i_clk)
	if ((f_past_valid)&&($past(i_ce)))
	begin
		if ((f_past_a[TLEN]==0)||(f_past_b[TLEN]==0))
			`ASSERT(o_r == 0);
		else if (f_past_a[TLEN]==1)
		begin
			if ((f_sgn_a[TLEN+1]^f_sgn_b[TLEN+1])==0)
			begin
				`ASSERT(o_r[BW-1:0] == f_past_b_pos[BW-1:0]);
				`ASSERT(o_r[AW+BW-1:BW] == 0);
			end else begin // if (f_sgn_b[TLEN+1]) begin
				`ASSERT(o_r[BW-1:0] == f_past_b_neg);
				`ASSERT(o_r[AW+BW-1:BW]
					== {(AW){f_past_b_neg[BW-1]}});
			end
		end else if (f_past_b[TLEN]==1)
		begin
			if ((f_sgn_a[TLEN+1] ^ f_sgn_b[TLEN+1])==0)
			begin
				`ASSERT(o_r[AW-1:0] == f_past_a_pos[AW-1:0]);
				`ASSERT(o_r[AW+BW-1:AW] == 0);
			end else begin
				`ASSERT(o_r[AW-1:0] == f_past_a_neg);
				`ASSERT(o_r[AW+BW-1:AW]
					== {(BW){f_past_a_neg[AW-1]}});
			end
		end else begin
			`ASSERT(o_r != 0);
			if (!o_r[AW+BW-1:0])
			begin
				`ASSERT((o_r[AW-1:0] != f_past_a[TLEN][AW-1:0])
					||(o_r[AW+BW-1:AW]!=0));
				`ASSERT((o_r[BW-1:0] != f_past_b[TLEN][BW-1:0])
					||(o_r[AW+BW-1:BW]!=0));
			end else begin
				`ASSERT((o_r[AW-1:0] != f_past_a_neg[AW-1:0])
					||(! (&o_r[AW+BW-1:AW])));
				`ASSERT((o_r[BW-1:0] != f_past_b_neg[BW-1:0])
					||(! (&o_r[AW+BW-1:BW]!=0)));
			end
		end
	end
 
	generate if (IAW <= IBW)
	begin : NO_PARAM_CHANGE
		assign f_past_a_unsorted = (!f_sgn_a[TLEN+1])
					? f_past_a[TLEN] : f_past_a_neg;
		assign f_past_b_unsorted = (!f_sgn_b[TLEN+1])
					? f_past_b[TLEN] : f_past_b_neg;
	end else begin : SWAP_PARAMETERS
		assign f_past_a_unsorted = (!f_sgn_b[TLEN+1])
					? f_past_b[TLEN] : f_past_b_neg;
		assign f_past_b_unsorted = (!f_sgn_a[TLEN+1])
					? f_past_a[TLEN] : f_past_a_neg;
	end endgenerate
`ifdef	BUTTERFLY
	// The following properties artificially restrict the inputs
	// to this long binary multiplier to only those values whose
	// absolute value is 0..7.  It is used by the formal proof of
	// the BUTTERFLY to artificially limit the scope of the proof.
	// By the time the butterfly sees this code, it will be known
	// that the long binary multiply works.  At issue will no longer
	// be whether or not this works, but rather whether it works in
	// context.  For that purpose, we'll need to check timing, not
	// results.  Checking against inputs of +/- 1 and 0 are perfect
	// for that task.  The below assumptions (yes they are assumptions
	// just go a little bit further.
	//
	// THEREFORE, THESE PROPERTIES ARE NOT NECESSARY TO PROVING THAT
	// THIS MODULE WORKS, AND THEY WILL INTERFERE WITH THAT PROOF.
	//
	// This just limits the proof for the butterfly, the parent.
	// module that calls this one
	//
	// Start by assuming that all inputs have an absolute value less
	// than eight.
	always @(*)
		assume(u_a[AW-1:3] == 0);
	always @(*)
		assume(u_b[BW-1:3] == 0);
 
	// If the inputs have these properties, then so too do many of
	// our internal values.  ASSERT therefore that we never get out
	// of bounds
	generate for(k=0; k<TLEN; k=k+1)
	begin
		always @(*)
		begin
			assert(f_past_a[k][AW-1:3] == 0);
			assert(f_past_b[k][BW-1:3] == 0);
		end
	end endgenerate
 
	generate for(k=0; k<TLEN-1; k=k+1)
	begin
		always @(*)
			assert(acc[k][IW+BW-1:6] == 0);
	end endgenerate
 
	generate for(k=0; k<TLEN-2; k=k+1)
	begin
		always @(*)
			assert(r_b[k][BW-1:3] == 0);
	end endgenerate
`endif	// BUTTERFLY
`endif	// FORMAL
endmodule
Compare with Previous | Blame | View Log
Browse

Tools

Subversion Repositories dblclockfft

[/] [dblclockfft/] [trunk/] [rtl/] [longbimpy.v] - Rev 39