OpenCores
URL https://opencores.org/ocsvn/cascaded_fir_filter/cascaded_fir_filter/trunk

Subversion Repositories cascaded_fir_filter

[/] [cascaded_fir_filter/] [trunk/] [FIR_cascaded.v] - Rev 3

Compare with Previous | Blame | View Log

/*
FIR filter with comples samples
convolution computation divided into blocks for parallel processing
then summ of results in blocks is computed
 
filter designed to evaluate convolution of echo-signal
it works in two modes:
1 - echo-signal with ping signal leaked into input assumed. FIR takes first n (loadable runtime) samples in frame into pulse response RAM and convolutes other samples in frame with first n
 n is ping signal length
 frame begins with inp_ping_start strobe
 
2 - pulse response RAM loaded through parallel interface (Data, Addres, WR, I/Q)
 
Number of cycles required to compute one sample is determined by formula
block_length + number_of_blocks + 11
it is constant for synthesized filter
 
block_length and number_of_blocks should be power of 2
for example
pulse response RAM depth is 2**11 = 2048
block size is 2**8 = 256
number of blocks is 2048/256 = 2**(11-8) = 8
 
block_length + number_of_blocks + 11 = 256 + 8 + 11 = 275
 
In any case filter yelds output samples after n = 2**PING_ADDR_WIDTH samples
 
*/
module FIR_cascaded
#(
	parameter	INP_SAMP_WIDTH = 14,				// imput samples width
	parameter	PING_ADDR_WIDTH = 11,				// address width of pulse response characteristic samples
	parameter	CONV_MEM_BLOCK_ADDR_WIDTH = 10,		// address width of block
	parameter	FRAME_ADDR_WIDTH = 18,				// address width of counter of samples in frame
	parameter	OUT_SAMP_WIDTH = 18,				// output samples width
	parameter	CLK_TO_SAMP_ADDR_WIDTH	= 11,		// clocks in frame counter width
	//for debug. out_samp_A_sq is I^2 + Q^2
	parameter	OUT_SAMP_A_SQ_WIDTH = 8,			// width of out_samp_A_sq
	parameter	OUT_SAMP_A_SQ_OFFS = 8				// downscale for out_samp_A_sq. OUT_SAMP_A_SQ_OFFS and next OUT_SAMP_A_SQ_WIDTH bits goes to the output
)
(
	// ping means first n=inp_ping_length samples, which can be loaded into ping RAM, where stores FIR coefficients or pulse response
	input	clk,			// clock
	input	reset,			// reset
	input	inp_clk,		// input samples strobes
	input	inp_ping_start, // frame strobes
	input signed	[INP_SAMP_WIDTH - 1:0]	inp_samp_I,			// input samples Re
	input signed	[INP_SAMP_WIDTH - 1:0]	inp_samp_Q,			// input samples Im
	input [PING_ADDR_WIDTH - 1:0] 			inp_ping_length,	// ping duration, in samples
	input									IOB_ping_from_Rx,	// 1 - take pulse response from input samles, 0 - do not take pulse response from input samples, assumes load coefficient through parallel interface
	input 									IOB_ping_RAM_CS,	// select coefficient RAM
	inout signed	[INP_SAMP_WIDTH - 1:0]	IOB_ping_RAM_D,		// coefficient RAM, data
	input 									IOB_ping_RAM_IQ,	// coefficient RAM, I/Q select. 0 - I, 1 - Q
	input signed	[PING_ADDR_WIDTH - 1:0]	IOB_ping_RAM_A,		// coefficient RAM, address
	input 									IOB_ping_RAM_WR,	// coefficient RAM, write enable
	input 									IOB_ping_RAM_RD,	// coefficient RAM, read enable
	output signed	[OUT_SAMP_WIDTH - 1:0]	out_samp_I,			// output samples, Re
	output signed	[OUT_SAMP_WIDTH - 1:0]	out_samp_Q,			// output samples, Im
	output signed	[OUT_SAMP_A_SQ_WIDTH - 1:0]	out_samp_A_sq,	// I^2 + Q^2, for debug
	output	out_samp_strobe,									// output sample strobe
	output	out_frame_strobe									// output frame strobe
);
 
	//wire signed	[INP_SAMP_WIDTH - 1:0]	IOB_ping_RAM_D;
	//wire signed	[PING_ADDR_WIDTH - 1:0]	IOB_ping_RAM_A;
	parameter CONV_BLOCK_ADDR_WIDTH = PING_ADDR_WIDTH - CONV_MEM_BLOCK_ADDR_WIDTH;	// address width for blocks counting
	reg	[2**CONV_BLOCK_ADDR_WIDTH - 1:0]	IOB_ping_RAM_A_bank_sel;	// one-hot block select for WR coefficients through parallel bus
	reg	[PING_ADDR_WIDTH - 1:0] inp_ping_length_reg;	// inp_ping_length store register
	reg	[FRAME_ADDR_WIDTH - 1:0] sample_counter;		// sample in frame counter
	reg	inp_ping_start_str;								// frame begins strobe
	reg	inp_ping_start_catch;							// for generating inp_ping_start_catch
	reg	inp_clk_str;									// sample begins strobe
	reg	inp_clk_catch;									// for generating inp_clk_str
	reg	ping_to_store;									// set to 1 from frame begining to the end of ping. While 1 and if should take input samples to coefficients, to store input samples into coefficient RAM
	reg	[2**CONV_BLOCK_ADDR_WIDTH:0]	ping_to_store_n;									// one-hot to select block in coefficients RAM to store sample
	reg	[CLK_TO_SAMP_ADDR_WIDTH - 1:0]	clk_to_samp_counter;								// clock between samples counter, used to calculation of output samples
	reg signed	[OUT_SAMP_WIDTH - 1:0]	out_samp_I_reg;										// register to store output Re samples
	reg signed	[OUT_SAMP_WIDTH - 1:0]	out_samp_Q_reg;										// register to store output Im samples
	reg signed	[OUT_SAMP_WIDTH - 1:0]	samp_mult_II[2**CONV_BLOCK_ADDR_WIDTH - 1:0];		// multipliers for output sample calculation, Re*Re
	reg signed	[OUT_SAMP_WIDTH - 1:0]	samp_mult_QQ[2**CONV_BLOCK_ADDR_WIDTH - 1:0];		// multipliers for output sample calculation, Im*Im
	reg signed	[OUT_SAMP_WIDTH - 1:0]	samp_mult_QI[2**CONV_BLOCK_ADDR_WIDTH - 1:0];		// multipliers for output sample calculation, Im*Re
	reg signed	[OUT_SAMP_WIDTH - 1:0]	samp_mult_IQ[2**CONV_BLOCK_ADDR_WIDTH - 1:0];		// multipliers for output sample calculation, Re*Im
	reg signed	[OUT_SAMP_WIDTH - 1:0]	out_samp_acc_I[2**CONV_BLOCK_ADDR_WIDTH - 1:0];		// accumulators for calculation summ in block Re
	reg signed	[OUT_SAMP_WIDTH - 1:0]	out_samp_acc_Q[2**CONV_BLOCK_ADDR_WIDTH - 1:0];		// accumulators for calculation summ in block Im
	wire signed	[OUT_SAMP_WIDTH - 1:0]	out_samp_acc_Q_selected;							// accumulators for calculation summ in block Im
	//reg signed	[OUT_SAMP_WIDTH - 1:0]	out_samp_acc_result_I[2**CONV_BLOCK_ADDR_WIDTH - 1:0];	// регистр хранения результата вычисления отсчёта свёртки канала I
	//reg signed	[OUT_SAMP_WIDTH - 1:0]	out_samp_acc_result_Q[2**CONV_BLOCK_ADDR_WIDTH - 1:0];	// регистр хранения результата вычисления отсчёта свёртки канала Q
	reg signed	[OUT_SAMP_WIDTH - 1:0]	blocks_acc_I;										// summ of summs in blocks accumulator, Re
	reg signed	[OUT_SAMP_WIDTH - 1:0]	blocks_acc_Q;										// summ of summs in blocks accumulator, Re
	reg signed	[OUT_SAMP_WIDTH*2  :0]	out_samp_A_sq_reg;									// Re^2 + Im^2 register, for debug
	reg	[CONV_MEM_BLOCK_ADDR_WIDTH - 1:0]	addr_ping;										// coefficient address register for convolution calculation
	reg	[CLK_TO_SAMP_ADDR_WIDTH:0]			addr_echo;										// TODO: width CONV_BLOCK_ADDR_WIDTH + CONV_MEM_BLOCK_ADDR_WIDTH
	reg	proc_store_samp;							// sets for saving samples
	reg	proc_count_blocks;							// sets when reading data from coefficient RAM and samples RAM
	reg	proc_count_blocks_acc;						// sets for summs in blocks calculating
	reg	proc_count_blocks_sum;						// sets for summs of summs in block calculating
	reg	[CONV_BLOCK_ADDR_WIDTH - 1:0]	blocks_sum_counter;	// block number counter for summs of summs in block calculating
 
	reg signed [INP_SAMP_WIDTH - 1:0]	multiplier_ping_I[2**CONV_BLOCK_ADDR_WIDTH - 1:0];	// Re coefficient register for multiplication
	reg signed [INP_SAMP_WIDTH - 1:0]	multiplier_ping_Q[2**CONV_BLOCK_ADDR_WIDTH - 1:0];	// Im coefficient register for multiplication
	reg signed [INP_SAMP_WIDTH - 1:0]	multiplier_echo_I[2**CONV_BLOCK_ADDR_WIDTH - 1:0];	// Re sample register for multiplication
	reg signed [INP_SAMP_WIDTH - 1:0]	multiplier_echo_Q[2**CONV_BLOCK_ADDR_WIDTH - 1:0];	// Im sample register for multiplication
 
	// Buses of RAM for storing coefficients and data samples
	// address bus is shared, data and control buses are separated for Re and Im
	wire signed	[INP_SAMP_WIDTH - 1:0]			ping_RAM_D_I	[2**CONV_BLOCK_ADDR_WIDTH - 1:0];
	wire signed	[INP_SAMP_WIDTH - 1:0]			ping_RAM_D_Q	[2**CONV_BLOCK_ADDR_WIDTH - 1:0];
	wire	[CONV_MEM_BLOCK_ADDR_WIDTH - 1:0]	ping_RAM_A		[2**CONV_BLOCK_ADDR_WIDTH - 1:0];
	//wire	[CONV_MEM_BLOCK_ADDR_WIDTH - 1:0]	ping_RAM_A_buf;
	wire signed	[INP_SAMP_WIDTH - 1:0]			ping_RAM_Q_I	[2**CONV_BLOCK_ADDR_WIDTH - 1:0];
	wire signed	[INP_SAMP_WIDTH - 1:0]			ping_RAM_Q_Q	[2**CONV_BLOCK_ADDR_WIDTH - 1:0];
	wire										ping_RAM_W_I	[2**CONV_BLOCK_ADDR_WIDTH - 1:0];
	wire										ping_RAM_W_Q	[2**CONV_BLOCK_ADDR_WIDTH - 1:0];
	wire signed	[INP_SAMP_WIDTH - 1:0]			samp_RAM_D_I	[2**CONV_BLOCK_ADDR_WIDTH - 1:0];
	wire signed	[INP_SAMP_WIDTH - 1:0]			samp_RAM_D_Q	[2**CONV_BLOCK_ADDR_WIDTH - 1:0];
	wire	[CONV_MEM_BLOCK_ADDR_WIDTH - 1:0]	samp_RAM_A		[2**CONV_BLOCK_ADDR_WIDTH - 1:0];
	wire signed	[INP_SAMP_WIDTH - 1:0]			samp_RAM_Q_I	[2**CONV_BLOCK_ADDR_WIDTH - 1:0];
	wire signed	[INP_SAMP_WIDTH - 1:0]			samp_RAM_Q_Q	[2**CONV_BLOCK_ADDR_WIDTH - 1:0];
	wire										samp_RAM_W		[2**CONV_BLOCK_ADDR_WIDTH - 1:0];
 
	reg	out_samp_strobe_reg;	// register for generating out_samp_strobe
	reg	out_frame_strobe_reg;	// register for generating out_frame_strobe
	// RAM for coefficients - ping and for data - samp
	// number of blocks is 2**CONV_BLOCK_ADDR_WIDTH * 2 (ping, samp) * 2 (I, Q)
	generate
		genvar i_ram;
		for (i_ram = 0; i_ram < 2**CONV_BLOCK_ADDR_WIDTH; i_ram = i_ram + 1) begin : gen_ram
			single_port_ram
				#(
				.DATA_WIDTH	(INP_SAMP_WIDTH),
				.ADDR_WIDTH	(CONV_MEM_BLOCK_ADDR_WIDTH))
			ping_RAM_I
			(
				.clk	(~clk),
				.d_wr	(ping_RAM_D_I[i_ram]),
				.addr	(ping_RAM_A[i_ram]),
				.we		(ping_RAM_W_I[i_ram]),
				.d_rd	(ping_RAM_Q_I[i_ram])
			);
			single_port_ram
				#(
				.DATA_WIDTH	(INP_SAMP_WIDTH),
				.ADDR_WIDTH	(CONV_MEM_BLOCK_ADDR_WIDTH))
			ping_RAM_Q
			(
				.clk	(~clk),
				.d_wr	(ping_RAM_D_Q[i_ram]),
				.addr	(ping_RAM_A[i_ram]),
				.we		(ping_RAM_W_Q[i_ram]),
				.d_rd	(ping_RAM_Q_Q[i_ram])
			);
			single_port_ram
				#(
				.DATA_WIDTH	(INP_SAMP_WIDTH),
				.ADDR_WIDTH	(CONV_MEM_BLOCK_ADDR_WIDTH))
			samp_RAM_I
			(
				.clk	(~clk),
				.d_wr	(samp_RAM_D_I[i_ram]),
				.addr	(samp_RAM_A[i_ram]),
				.we		(samp_RAM_W[i_ram]),
				.d_rd	(samp_RAM_Q_I[i_ram])
			);
			single_port_ram
				#(
				.DATA_WIDTH	(INP_SAMP_WIDTH),
				.ADDR_WIDTH	(CONV_MEM_BLOCK_ADDR_WIDTH))
			samp_RAM_Q
			(
				.clk	(~clk),
				.d_wr	(samp_RAM_D_Q[i_ram]),
				.addr	(samp_RAM_A[i_ram]),
				.we		(samp_RAM_W[i_ram]),
				.d_rd	(samp_RAM_Q_Q[i_ram])
			);
		end // for
	endgenerate
 
	// strobes for frame start ang sample start
	always @ (negedge clk or posedge reset) begin
		if (reset) begin
			inp_ping_start_catch <= 0;
			inp_ping_start_str <= 0;
			inp_clk_catch <= 0;
			inp_clk_str <= 0;
		end else begin
			inp_ping_start_catch <= inp_ping_start;
			inp_ping_start_str <= inp_ping_start & ~inp_ping_start_catch;
			inp_clk_catch <= inp_clk;
			inp_clk_str <= inp_clk & ~inp_clk_catch;
		end
	end //always
 
	// one-hot for ping_RAM block selecting for access from parallel interface
	always @(IOB_ping_RAM_A) begin
		IOB_ping_RAM_A_bank_sel = {2**CONV_BLOCK_ADDR_WIDTH{1'b0}};
		IOB_ping_RAM_A_bank_sel[IOB_ping_RAM_A[CONV_MEM_BLOCK_ADDR_WIDTH + CONV_BLOCK_ADDR_WIDTH - 1 : CONV_MEM_BLOCK_ADDR_WIDTH]] = 1'b1;
	end //always
 
	// sample number "sample_counter", ping present signal "ping_to_store" and ping_RAM block number to store ping "ping_to_store_n"
	always @ (negedge clk)
	begin
		if (inp_ping_start_str) begin
			inp_ping_length_reg <= inp_ping_length;
			sample_counter <= 0;
			ping_to_store <= 1;
			ping_to_store_n = 1;
		end else begin
			if (inp_clk_str) begin
				sample_counter <= sample_counter + 1;
				if (sample_counter[PING_ADDR_WIDTH - 1:0] == inp_ping_length_reg) begin	// ping ends, stop storing samples to coefficients RAM
					ping_to_store <= 0;
				end
				if (sample_counter[CONV_MEM_BLOCK_ADDR_WIDTH - 1 : 0] == {CONV_MEM_BLOCK_ADDR_WIDTH{1'b1}}) begin	// addres goes to the next bank
					ping_to_store_n  = ping_to_store_n << 1;
				end
			end
		end
	end
 
	// clock counter, counts clocks in frame, used to convolution calculating
	always @ (negedge clk)
	begin
		clk_to_samp_counter <= inp_clk_str ? 0 : (clk_to_samp_counter + 1);
	end
 
	//	clk_to_samp_counter
	//	0										registers initialization
	//	1										store sample into RAM
	//	2										/summands calculation (II, IQ, QI, QQ)
	//	5										|	2**CONV_MEM_BLOCK_ADDR_WIDTH + 3 такта	/summs in blocks calculation
	//	2**CONV_MEM_BLOCK_ADDR_WIDTH + 5		\											|
	//	2**CONV_MEM_BLOCK_ADDR_WIDTH + 6		/ summs of summs in blocks calculation		\
	//  2**CONV_MEM_BLOCK_ADDR_WIDTH + 6		|
	//			+ 2**CONV_BLOCK_ADDR_WIDTH		\
	//	2**CONV_MEM_BLOCK_ADDR_WIDTH + 7		output result, sample strobe and frame strobe
	//			+ 2**CONV_BLOCK_ADDR_WIDTH
	always @ (negedge clk)
	begin
		if (inp_clk_str) begin
			proc_store_samp <= 0;
			proc_count_blocks <= 0;
			proc_count_blocks_acc <= 0;
			proc_count_blocks_sum <= 0;
		end else begin
			proc_store_samp = clk_to_samp_counter == 0;
			if (clk_to_samp_counter == 2) begin
				proc_count_blocks <= 1;		// begin to calculate convolution in blocks
			end else if (clk_to_samp_counter == 2**CONV_MEM_BLOCK_ADDR_WIDTH + 5) begin
				proc_count_blocks <= 0;		// finish
			end
			if (clk_to_samp_counter == 5) begin
				proc_count_blocks_acc <= 1;		// begin to calculate summs in blocks
			end else if (clk_to_samp_counter == 2**CONV_MEM_BLOCK_ADDR_WIDTH + 7) begin
				proc_count_blocks_acc <= 0;		// finish
			end
			if (clk_to_samp_counter == 2**CONV_MEM_BLOCK_ADDR_WIDTH + 6) begin
				proc_count_blocks_sum <= 1;	// begin to count summs of summs
			end else if (clk_to_samp_counter == 2**CONV_MEM_BLOCK_ADDR_WIDTH + 7 + 2**CONV_BLOCK_ADDR_WIDTH) begin	// continue for 2**CONV_BLOCK_ADDR_WIDTH times
				proc_count_blocks_sum <= 0;	// finish
			end
		end
	end
 
	assign out_samp_acc_Q_selected = out_samp_acc_Q[blocks_sum_counter];
	// coefficient address counter, sample address counter
	always @ (negedge clk or posedge proc_store_samp)
	begin
		if (proc_store_samp) begin
			addr_ping <= 0;
			addr_echo <= sample_counter - (2**PING_ADDR_WIDTH - 1);
		end else if (proc_count_blocks) begin
			addr_ping <= addr_ping + 1;
			addr_echo <= addr_echo + 1;
		end
	end
 
	// bidirectional bus for coefficient RAM
	assign	IOB_ping_RAM_D = (IOB_ping_RAM_RD & IOB_ping_RAM_CS) ? 	// Data bus, Z if read not selected
		(IOB_ping_RAM_IQ ?											// if read, then I or Q
				ping_RAM_D_Q[IOB_ping_RAM_A[CONV_MEM_BLOCK_ADDR_WIDTH + CONV_BLOCK_ADDR_WIDTH - 1 : CONV_MEM_BLOCK_ADDR_WIDTH]]
			 : ping_RAM_D_I[IOB_ping_RAM_A[CONV_MEM_BLOCK_ADDR_WIDTH + CONV_BLOCK_ADDR_WIDTH - 1 : CONV_MEM_BLOCK_ADDR_WIDTH]]
		) : {INP_SAMP_WIDTH{1'bZ}};
	wire	[2**CONV_BLOCK_ADDR_WIDTH - 1 : 0] block_num_buf[2**CONV_BLOCK_ADDR_WIDTH - 1 : 0];	// block number for coefficient
	reg	[2**CONV_BLOCK_ADDR_WIDTH - 1 : 0] block_num_buf_reg[2**CONV_BLOCK_ADDR_WIDTH - 1 : 0];	// delayed for using in convolution calculation pipe
	// convolution calculating blocks
	genvar mac_block;
	generate
		for (mac_block = 0; mac_block < 2**CONV_BLOCK_ADDR_WIDTH; mac_block = mac_block + 1)
		begin : mac_blocks
			// RAM buses
			// coefficient RAM buses
			// Data bus: if IOB_ping_from_Rx = 0 - coefficient RAM loaded from parallel bus and CS set then here is data from parallel bus
			//					else if flag ping is present is set, then here is input samples
			assign ping_RAM_D_I[mac_block] = (IOB_ping_RAM_CS & ~IOB_ping_from_Rx) ? IOB_ping_RAM_D : (ping_to_store ? inp_samp_I : 0);
			assign ping_RAM_D_Q[mac_block] = (IOB_ping_RAM_CS & ~IOB_ping_from_Rx) ? IOB_ping_RAM_D : (ping_to_store ? inp_samp_Q : 0);
			// address bus: if IOB_ping_from_Rx = 0 - coefficient RAM loaded from parallel bus and CS set then here is address from parallel bus
			//					else if inp_clk_str is set - here is address for storing input samples
			//							else here is coefficient address for convolution calculation
			assign ping_RAM_A  [mac_block] = (IOB_ping_RAM_CS & ~IOB_ping_from_Rx) ? IOB_ping_RAM_A[CONV_MEM_BLOCK_ADDR_WIDTH - 1:0] : (proc_store_samp ? sample_counter[CONV_MEM_BLOCK_ADDR_WIDTH - 1:0] : addr_ping[CONV_MEM_BLOCK_ADDR_WIDTH - 1:0]);
			// write strobe
			// if coefficient RAM loading from parallel bus selected, then with WE on parallel bus generated WE for appropriate block of coefficient RAM
			// else WE generated with input samples while ping is present
			assign ping_RAM_W_I[mac_block] = (IOB_ping_RAM_CS & ~IOB_ping_from_Rx & IOB_ping_RAM_WR & ~IOB_ping_RAM_IQ & IOB_ping_RAM_A_bank_sel[mac_block]) | (IOB_ping_from_Rx & proc_store_samp & ping_to_store_n[mac_block]);
			assign ping_RAM_W_Q[mac_block] = (IOB_ping_RAM_CS & ~IOB_ping_from_Rx & IOB_ping_RAM_WR &  IOB_ping_RAM_IQ & IOB_ping_RAM_A_bank_sel[mac_block]) | (IOB_ping_from_Rx & proc_store_samp & ping_to_store_n[mac_block]);
			// samples RAM buses
			assign samp_RAM_D_I[mac_block] = inp_samp_I;
			assign samp_RAM_D_Q[mac_block] = inp_samp_Q;
			// with new sample address for storing new sample then address for reading for convolution calculation
			assign samp_RAM_A[mac_block] = proc_store_samp ? sample_counter[CONV_MEM_BLOCK_ADDR_WIDTH - 1:0] : addr_echo[CONV_MEM_BLOCK_ADDR_WIDTH - 1:0];
			// with new sample WE for appropriate block of samples RAM is set
			assign samp_RAM_W[mac_block] = proc_store_samp & (sample_counter[CONV_MEM_BLOCK_ADDR_WIDTH + CONV_BLOCK_ADDR_WIDTH - 1:CONV_MEM_BLOCK_ADDR_WIDTH] == mac_block);
			// block number for reading sample for convolution calculating is evaluated as summ of its number and address offset counted in blocks, floor(addr/sizeof(block)) 
			assign block_num_buf[mac_block] = (mac_block + addr_echo[CONV_MEM_BLOCK_ADDR_WIDTH + CONV_BLOCK_ADDR_WIDTH - 1:CONV_MEM_BLOCK_ADDR_WIDTH]) & {CONV_BLOCK_ADDR_WIDTH{1'b1}};
			always @ (negedge clk or negedge  proc_count_blocks) begin
				block_num_buf_reg[mac_block] <= block_num_buf[mac_block];
				// registers initialization if convolution not processed
				if (~proc_count_blocks) begin
					multiplier_ping_I[mac_block] <= 0;
					multiplier_ping_Q[mac_block] <= 0;
					multiplier_echo_I[mac_block] <= 0;
					multiplier_echo_Q[mac_block] <= 0;
					samp_mult_II[mac_block] <= 0;
					samp_mult_QQ[mac_block] <= 0;
				end else begin
					// multipiers are read from its block with no offset
					multiplier_ping_I[mac_block] <= ping_RAM_Q_I[mac_block];
					multiplier_ping_Q[mac_block] <= ping_RAM_Q_Q[mac_block];
					// multipliers of samples are read with offset
					multiplier_echo_I[mac_block] <= samp_RAM_Q_I[block_num_buf_reg[mac_block]];
					multiplier_echo_Q[mac_block] <= samp_RAM_Q_Q[block_num_buf_reg[mac_block]];
					// summands of convolution Si + jSq = Ai*Bi-Aq*Bq + j(Ai*Bq + Aq*Bi)
					samp_mult_II[mac_block] <= multiplier_ping_I[mac_block] * multiplier_echo_I[mac_block];
					samp_mult_QQ[mac_block] <= multiplier_ping_Q[mac_block] * multiplier_echo_Q[mac_block];
					samp_mult_QI[mac_block] <= multiplier_ping_Q[mac_block] * multiplier_echo_I[mac_block];
					samp_mult_IQ[mac_block] <= multiplier_ping_I[mac_block] * multiplier_echo_Q[mac_block];
				end
			end // always
			always @ (negedge clk ) begin
				if (inp_clk_str) begin
					out_samp_acc_I[mac_block] <= 0;
					out_samp_acc_Q[mac_block] <= 0;
				end else if (proc_count_blocks_acc) begin
					// use II - QQ and QI + IQ to get complex FIR or use II and QQ to get real FIR
					out_samp_acc_I[mac_block] <= out_samp_acc_I[mac_block] + samp_mult_II[mac_block] + samp_mult_QQ[mac_block];
					//out_samp_acc_I[mac_block] <= out_samp_acc_I[mac_block] + samp_mult_II[mac_block];
					out_samp_acc_Q[mac_block] <= out_samp_acc_Q[mac_block] - samp_mult_QI[mac_block] + samp_mult_IQ[mac_block];
					//out_samp_acc_Q[mac_block] <= out_samp_acc_Q[mac_block] + samp_mult_QQ[mac_block];
				end
			end
		end // for
	endgenerate
 
	always @ (negedge clk)
	begin
		if (inp_clk_str) begin
			blocks_sum_counter <= 0;
			blocks_acc_I <= 0;
			blocks_acc_Q <= 0;
		end else begin
			if (proc_count_blocks_sum) begin	// here is summ of summs calculation
				blocks_sum_counter <= blocks_sum_counter + 1;
				blocks_acc_I <= blocks_acc_I + out_samp_acc_I[blocks_sum_counter];
				blocks_acc_Q <= blocks_acc_Q + out_samp_acc_Q[blocks_sum_counter];
			end
			if (clk_to_samp_counter == 2**CONV_MEM_BLOCK_ADDR_WIDTH + 7 + 2**CONV_BLOCK_ADDR_WIDTH) begin	// convolution sample ready, move result to output register
				out_samp_I_reg <= blocks_acc_I;
				out_samp_Q_reg <= blocks_acc_Q;
			end
		end //if
	end // always
 
	// sample strobe, frame strobe and |output|^2 for debug
	always @ (negedge clk ) begin
		// output strobes outputs with output sample
		out_samp_strobe_reg <= clk_to_samp_counter == 2**CONV_MEM_BLOCK_ADDR_WIDTH + 7 + 2**CONV_BLOCK_ADDR_WIDTH;
		out_frame_strobe_reg <= (clk_to_samp_counter == 2**CONV_MEM_BLOCK_ADDR_WIDTH + 7 + 2**CONV_BLOCK_ADDR_WIDTH) & (sample_counter == 0) & ping_to_store_n[0];
		out_samp_A_sq_reg <= out_samp_I_reg * out_samp_I_reg + out_samp_Q_reg * out_samp_Q_reg;
	end
 
	assign out_samp_strobe = out_samp_strobe_reg;
	assign out_frame_strobe = out_frame_strobe_reg;
	assign out_samp_I = out_samp_I_reg;
	assign out_samp_Q = out_samp_Q_reg;
	assign out_samp_A_sq = out_samp_A_sq_reg[OUT_SAMP_A_SQ_WIDTH + OUT_SAMP_A_SQ_OFFS - 1:OUT_SAMP_A_SQ_OFFS];
endmodule
 

Compare with Previous | Blame | View Log

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.