1 |
3 |
Juzujka |
/*
|
2 |
|
|
FIR filter with comples samples
|
3 |
|
|
convolution computation divided into blocks for parallel processing
|
4 |
|
|
then summ of results in blocks is computed
|
5 |
|
|
|
6 |
|
|
filter designed to evaluate convolution of echo-signal
|
7 |
|
|
it works in two modes:
|
8 |
|
|
1 - echo-signal with ping signal leaked into input assumed. FIR takes first n (loadable runtime) samples in frame into pulse response RAM and convolutes other samples in frame with first n
|
9 |
|
|
n is ping signal length
|
10 |
|
|
frame begins with inp_ping_start strobe
|
11 |
|
|
|
12 |
|
|
2 - pulse response RAM loaded through parallel interface (Data, Addres, WR, I/Q)
|
13 |
|
|
|
14 |
|
|
Number of cycles required to compute one sample is determined by formula
|
15 |
|
|
block_length + number_of_blocks + 11
|
16 |
|
|
it is constant for synthesized filter
|
17 |
|
|
|
18 |
|
|
block_length and number_of_blocks should be power of 2
|
19 |
|
|
for example
|
20 |
|
|
pulse response RAM depth is 2**11 = 2048
|
21 |
|
|
block size is 2**8 = 256
|
22 |
|
|
number of blocks is 2048/256 = 2**(11-8) = 8
|
23 |
|
|
|
24 |
|
|
block_length + number_of_blocks + 11 = 256 + 8 + 11 = 275
|
25 |
|
|
|
26 |
|
|
In any case filter yelds output samples after n = 2**PING_ADDR_WIDTH samples
|
27 |
|
|
|
28 |
|
|
*/
|
29 |
|
|
module FIR_cascaded
|
30 |
|
|
#(
|
31 |
|
|
parameter INP_SAMP_WIDTH = 14, // imput samples width
|
32 |
|
|
parameter PING_ADDR_WIDTH = 11, // address width of pulse response characteristic samples
|
33 |
|
|
parameter CONV_MEM_BLOCK_ADDR_WIDTH = 10, // address width of block
|
34 |
|
|
parameter FRAME_ADDR_WIDTH = 18, // address width of counter of samples in frame
|
35 |
|
|
parameter OUT_SAMP_WIDTH = 18, // output samples width
|
36 |
|
|
parameter CLK_TO_SAMP_ADDR_WIDTH = 11, // clocks in frame counter width
|
37 |
|
|
//for debug. out_samp_A_sq is I^2 + Q^2
|
38 |
|
|
parameter OUT_SAMP_A_SQ_WIDTH = 8, // width of out_samp_A_sq
|
39 |
|
|
parameter OUT_SAMP_A_SQ_OFFS = 8 // downscale for out_samp_A_sq. OUT_SAMP_A_SQ_OFFS and next OUT_SAMP_A_SQ_WIDTH bits goes to the output
|
40 |
|
|
)
|
41 |
|
|
(
|
42 |
|
|
// ping means first n=inp_ping_length samples, which can be loaded into ping RAM, where stores FIR coefficients or pulse response
|
43 |
|
|
input clk, // clock
|
44 |
|
|
input reset, // reset
|
45 |
|
|
input inp_clk, // input samples strobes
|
46 |
|
|
input inp_ping_start, // frame strobes
|
47 |
|
|
input signed [INP_SAMP_WIDTH - 1:0] inp_samp_I, // input samples Re
|
48 |
|
|
input signed [INP_SAMP_WIDTH - 1:0] inp_samp_Q, // input samples Im
|
49 |
|
|
input [PING_ADDR_WIDTH - 1:0] inp_ping_length, // ping duration, in samples
|
50 |
|
|
input IOB_ping_from_Rx, // 1 - take pulse response from input samles, 0 - do not take pulse response from input samples, assumes load coefficient through parallel interface
|
51 |
|
|
input IOB_ping_RAM_CS, // select coefficient RAM
|
52 |
|
|
inout signed [INP_SAMP_WIDTH - 1:0] IOB_ping_RAM_D, // coefficient RAM, data
|
53 |
|
|
input IOB_ping_RAM_IQ, // coefficient RAM, I/Q select. 0 - I, 1 - Q
|
54 |
|
|
input signed [PING_ADDR_WIDTH - 1:0] IOB_ping_RAM_A, // coefficient RAM, address
|
55 |
|
|
input IOB_ping_RAM_WR, // coefficient RAM, write enable
|
56 |
|
|
input IOB_ping_RAM_RD, // coefficient RAM, read enable
|
57 |
|
|
output signed [OUT_SAMP_WIDTH - 1:0] out_samp_I, // output samples, Re
|
58 |
|
|
output signed [OUT_SAMP_WIDTH - 1:0] out_samp_Q, // output samples, Im
|
59 |
|
|
output signed [OUT_SAMP_A_SQ_WIDTH - 1:0] out_samp_A_sq, // I^2 + Q^2, for debug
|
60 |
|
|
output out_samp_strobe, // output sample strobe
|
61 |
|
|
output out_frame_strobe // output frame strobe
|
62 |
|
|
);
|
63 |
|
|
|
64 |
|
|
//wire signed [INP_SAMP_WIDTH - 1:0] IOB_ping_RAM_D;
|
65 |
|
|
//wire signed [PING_ADDR_WIDTH - 1:0] IOB_ping_RAM_A;
|
66 |
|
|
parameter CONV_BLOCK_ADDR_WIDTH = PING_ADDR_WIDTH - CONV_MEM_BLOCK_ADDR_WIDTH; // address width for blocks counting
|
67 |
|
|
reg [2**CONV_BLOCK_ADDR_WIDTH - 1:0] IOB_ping_RAM_A_bank_sel; // one-hot block select for WR coefficients through parallel bus
|
68 |
|
|
reg [PING_ADDR_WIDTH - 1:0] inp_ping_length_reg; // inp_ping_length store register
|
69 |
|
|
reg [FRAME_ADDR_WIDTH - 1:0] sample_counter; // sample in frame counter
|
70 |
|
|
reg inp_ping_start_str; // frame begins strobe
|
71 |
|
|
reg inp_ping_start_catch; // for generating inp_ping_start_catch
|
72 |
|
|
reg inp_clk_str; // sample begins strobe
|
73 |
|
|
reg inp_clk_catch; // for generating inp_clk_str
|
74 |
|
|
reg ping_to_store; // set to 1 from frame begining to the end of ping. While 1 and if should take input samples to coefficients, to store input samples into coefficient RAM
|
75 |
|
|
reg [2**CONV_BLOCK_ADDR_WIDTH:0] ping_to_store_n; // one-hot to select block in coefficients RAM to store sample
|
76 |
|
|
reg [CLK_TO_SAMP_ADDR_WIDTH - 1:0] clk_to_samp_counter; // clock between samples counter, used to calculation of output samples
|
77 |
|
|
reg signed [OUT_SAMP_WIDTH - 1:0] out_samp_I_reg; // register to store output Re samples
|
78 |
|
|
reg signed [OUT_SAMP_WIDTH - 1:0] out_samp_Q_reg; // register to store output Im samples
|
79 |
|
|
reg signed [OUT_SAMP_WIDTH - 1:0] samp_mult_II[2**CONV_BLOCK_ADDR_WIDTH - 1:0]; // multipliers for output sample calculation, Re*Re
|
80 |
|
|
reg signed [OUT_SAMP_WIDTH - 1:0] samp_mult_QQ[2**CONV_BLOCK_ADDR_WIDTH - 1:0]; // multipliers for output sample calculation, Im*Im
|
81 |
|
|
reg signed [OUT_SAMP_WIDTH - 1:0] samp_mult_QI[2**CONV_BLOCK_ADDR_WIDTH - 1:0]; // multipliers for output sample calculation, Im*Re
|
82 |
|
|
reg signed [OUT_SAMP_WIDTH - 1:0] samp_mult_IQ[2**CONV_BLOCK_ADDR_WIDTH - 1:0]; // multipliers for output sample calculation, Re*Im
|
83 |
|
|
reg signed [OUT_SAMP_WIDTH - 1:0] out_samp_acc_I[2**CONV_BLOCK_ADDR_WIDTH - 1:0]; // accumulators for calculation summ in block Re
|
84 |
|
|
reg signed [OUT_SAMP_WIDTH - 1:0] out_samp_acc_Q[2**CONV_BLOCK_ADDR_WIDTH - 1:0]; // accumulators for calculation summ in block Im
|
85 |
|
|
wire signed [OUT_SAMP_WIDTH - 1:0] out_samp_acc_Q_selected; // accumulators for calculation summ in block Im
|
86 |
|
|
//reg signed [OUT_SAMP_WIDTH - 1:0] out_samp_acc_result_I[2**CONV_BLOCK_ADDR_WIDTH - 1:0]; // регистр хранения результата вычисления отсчёта свёртки канала I
|
87 |
|
|
//reg signed [OUT_SAMP_WIDTH - 1:0] out_samp_acc_result_Q[2**CONV_BLOCK_ADDR_WIDTH - 1:0]; // регистр хранения результата вычисления отсчёта свёртки канала Q
|
88 |
|
|
reg signed [OUT_SAMP_WIDTH - 1:0] blocks_acc_I; // summ of summs in blocks accumulator, Re
|
89 |
|
|
reg signed [OUT_SAMP_WIDTH - 1:0] blocks_acc_Q; // summ of summs in blocks accumulator, Re
|
90 |
|
|
reg signed [OUT_SAMP_WIDTH*2 :0] out_samp_A_sq_reg; // Re^2 + Im^2 register, for debug
|
91 |
|
|
reg [CONV_MEM_BLOCK_ADDR_WIDTH - 1:0] addr_ping; // coefficient address register for convolution calculation
|
92 |
|
|
reg [CLK_TO_SAMP_ADDR_WIDTH:0] addr_echo; // TODO: width CONV_BLOCK_ADDR_WIDTH + CONV_MEM_BLOCK_ADDR_WIDTH
|
93 |
|
|
reg proc_store_samp; // sets for saving samples
|
94 |
|
|
reg proc_count_blocks; // sets when reading data from coefficient RAM and samples RAM
|
95 |
|
|
reg proc_count_blocks_acc; // sets for summs in blocks calculating
|
96 |
|
|
reg proc_count_blocks_sum; // sets for summs of summs in block calculating
|
97 |
|
|
reg [CONV_BLOCK_ADDR_WIDTH - 1:0] blocks_sum_counter; // block number counter for summs of summs in block calculating
|
98 |
|
|
|
99 |
|
|
reg signed [INP_SAMP_WIDTH - 1:0] multiplier_ping_I[2**CONV_BLOCK_ADDR_WIDTH - 1:0]; // Re coefficient register for multiplication
|
100 |
|
|
reg signed [INP_SAMP_WIDTH - 1:0] multiplier_ping_Q[2**CONV_BLOCK_ADDR_WIDTH - 1:0]; // Im coefficient register for multiplication
|
101 |
|
|
reg signed [INP_SAMP_WIDTH - 1:0] multiplier_echo_I[2**CONV_BLOCK_ADDR_WIDTH - 1:0]; // Re sample register for multiplication
|
102 |
|
|
reg signed [INP_SAMP_WIDTH - 1:0] multiplier_echo_Q[2**CONV_BLOCK_ADDR_WIDTH - 1:0]; // Im sample register for multiplication
|
103 |
|
|
|
104 |
|
|
// Buses of RAM for storing coefficients and data samples
|
105 |
|
|
// address bus is shared, data and control buses are separated for Re and Im
|
106 |
|
|
wire signed [INP_SAMP_WIDTH - 1:0] ping_RAM_D_I [2**CONV_BLOCK_ADDR_WIDTH - 1:0];
|
107 |
|
|
wire signed [INP_SAMP_WIDTH - 1:0] ping_RAM_D_Q [2**CONV_BLOCK_ADDR_WIDTH - 1:0];
|
108 |
|
|
wire [CONV_MEM_BLOCK_ADDR_WIDTH - 1:0] ping_RAM_A [2**CONV_BLOCK_ADDR_WIDTH - 1:0];
|
109 |
|
|
//wire [CONV_MEM_BLOCK_ADDR_WIDTH - 1:0] ping_RAM_A_buf;
|
110 |
|
|
wire signed [INP_SAMP_WIDTH - 1:0] ping_RAM_Q_I [2**CONV_BLOCK_ADDR_WIDTH - 1:0];
|
111 |
|
|
wire signed [INP_SAMP_WIDTH - 1:0] ping_RAM_Q_Q [2**CONV_BLOCK_ADDR_WIDTH - 1:0];
|
112 |
|
|
wire ping_RAM_W_I [2**CONV_BLOCK_ADDR_WIDTH - 1:0];
|
113 |
|
|
wire ping_RAM_W_Q [2**CONV_BLOCK_ADDR_WIDTH - 1:0];
|
114 |
|
|
wire signed [INP_SAMP_WIDTH - 1:0] samp_RAM_D_I [2**CONV_BLOCK_ADDR_WIDTH - 1:0];
|
115 |
|
|
wire signed [INP_SAMP_WIDTH - 1:0] samp_RAM_D_Q [2**CONV_BLOCK_ADDR_WIDTH - 1:0];
|
116 |
|
|
wire [CONV_MEM_BLOCK_ADDR_WIDTH - 1:0] samp_RAM_A [2**CONV_BLOCK_ADDR_WIDTH - 1:0];
|
117 |
|
|
wire signed [INP_SAMP_WIDTH - 1:0] samp_RAM_Q_I [2**CONV_BLOCK_ADDR_WIDTH - 1:0];
|
118 |
|
|
wire signed [INP_SAMP_WIDTH - 1:0] samp_RAM_Q_Q [2**CONV_BLOCK_ADDR_WIDTH - 1:0];
|
119 |
|
|
wire samp_RAM_W [2**CONV_BLOCK_ADDR_WIDTH - 1:0];
|
120 |
|
|
|
121 |
|
|
reg out_samp_strobe_reg; // register for generating out_samp_strobe
|
122 |
|
|
reg out_frame_strobe_reg; // register for generating out_frame_strobe
|
123 |
|
|
// RAM for coefficients - ping and for data - samp
|
124 |
|
|
// number of blocks is 2**CONV_BLOCK_ADDR_WIDTH * 2 (ping, samp) * 2 (I, Q)
|
125 |
|
|
generate
|
126 |
|
|
genvar i_ram;
|
127 |
|
|
for (i_ram = 0; i_ram < 2**CONV_BLOCK_ADDR_WIDTH; i_ram = i_ram + 1) begin : gen_ram
|
128 |
|
|
single_port_ram
|
129 |
|
|
#(
|
130 |
|
|
.DATA_WIDTH (INP_SAMP_WIDTH),
|
131 |
|
|
.ADDR_WIDTH (CONV_MEM_BLOCK_ADDR_WIDTH))
|
132 |
|
|
ping_RAM_I
|
133 |
|
|
(
|
134 |
|
|
.clk (~clk),
|
135 |
|
|
.d_wr (ping_RAM_D_I[i_ram]),
|
136 |
|
|
.addr (ping_RAM_A[i_ram]),
|
137 |
|
|
.we (ping_RAM_W_I[i_ram]),
|
138 |
|
|
.d_rd (ping_RAM_Q_I[i_ram])
|
139 |
|
|
);
|
140 |
|
|
single_port_ram
|
141 |
|
|
#(
|
142 |
|
|
.DATA_WIDTH (INP_SAMP_WIDTH),
|
143 |
|
|
.ADDR_WIDTH (CONV_MEM_BLOCK_ADDR_WIDTH))
|
144 |
|
|
ping_RAM_Q
|
145 |
|
|
(
|
146 |
|
|
.clk (~clk),
|
147 |
|
|
.d_wr (ping_RAM_D_Q[i_ram]),
|
148 |
|
|
.addr (ping_RAM_A[i_ram]),
|
149 |
|
|
.we (ping_RAM_W_Q[i_ram]),
|
150 |
|
|
.d_rd (ping_RAM_Q_Q[i_ram])
|
151 |
|
|
);
|
152 |
|
|
single_port_ram
|
153 |
|
|
#(
|
154 |
|
|
.DATA_WIDTH (INP_SAMP_WIDTH),
|
155 |
|
|
.ADDR_WIDTH (CONV_MEM_BLOCK_ADDR_WIDTH))
|
156 |
|
|
samp_RAM_I
|
157 |
|
|
(
|
158 |
|
|
.clk (~clk),
|
159 |
|
|
.d_wr (samp_RAM_D_I[i_ram]),
|
160 |
|
|
.addr (samp_RAM_A[i_ram]),
|
161 |
|
|
.we (samp_RAM_W[i_ram]),
|
162 |
|
|
.d_rd (samp_RAM_Q_I[i_ram])
|
163 |
|
|
);
|
164 |
|
|
single_port_ram
|
165 |
|
|
#(
|
166 |
|
|
.DATA_WIDTH (INP_SAMP_WIDTH),
|
167 |
|
|
.ADDR_WIDTH (CONV_MEM_BLOCK_ADDR_WIDTH))
|
168 |
|
|
samp_RAM_Q
|
169 |
|
|
(
|
170 |
|
|
.clk (~clk),
|
171 |
|
|
.d_wr (samp_RAM_D_Q[i_ram]),
|
172 |
|
|
.addr (samp_RAM_A[i_ram]),
|
173 |
|
|
.we (samp_RAM_W[i_ram]),
|
174 |
|
|
.d_rd (samp_RAM_Q_Q[i_ram])
|
175 |
|
|
);
|
176 |
|
|
end // for
|
177 |
|
|
endgenerate
|
178 |
|
|
|
179 |
|
|
// strobes for frame start ang sample start
|
180 |
|
|
always @ (negedge clk or posedge reset) begin
|
181 |
|
|
if (reset) begin
|
182 |
|
|
inp_ping_start_catch <= 0;
|
183 |
|
|
inp_ping_start_str <= 0;
|
184 |
|
|
inp_clk_catch <= 0;
|
185 |
|
|
inp_clk_str <= 0;
|
186 |
|
|
end else begin
|
187 |
|
|
inp_ping_start_catch <= inp_ping_start;
|
188 |
|
|
inp_ping_start_str <= inp_ping_start & ~inp_ping_start_catch;
|
189 |
|
|
inp_clk_catch <= inp_clk;
|
190 |
|
|
inp_clk_str <= inp_clk & ~inp_clk_catch;
|
191 |
|
|
end
|
192 |
|
|
end //always
|
193 |
|
|
|
194 |
|
|
// one-hot for ping_RAM block selecting for access from parallel interface
|
195 |
|
|
always @(IOB_ping_RAM_A) begin
|
196 |
|
|
IOB_ping_RAM_A_bank_sel = {2**CONV_BLOCK_ADDR_WIDTH{1'b0}};
|
197 |
|
|
IOB_ping_RAM_A_bank_sel[IOB_ping_RAM_A[CONV_MEM_BLOCK_ADDR_WIDTH + CONV_BLOCK_ADDR_WIDTH - 1 : CONV_MEM_BLOCK_ADDR_WIDTH]] = 1'b1;
|
198 |
|
|
end //always
|
199 |
|
|
|
200 |
|
|
// sample number "sample_counter", ping present signal "ping_to_store" and ping_RAM block number to store ping "ping_to_store_n"
|
201 |
|
|
always @ (negedge clk)
|
202 |
|
|
begin
|
203 |
|
|
if (inp_ping_start_str) begin
|
204 |
|
|
inp_ping_length_reg <= inp_ping_length;
|
205 |
|
|
sample_counter <= 0;
|
206 |
|
|
ping_to_store <= 1;
|
207 |
|
|
ping_to_store_n = 1;
|
208 |
|
|
end else begin
|
209 |
|
|
if (inp_clk_str) begin
|
210 |
|
|
sample_counter <= sample_counter + 1;
|
211 |
|
|
if (sample_counter[PING_ADDR_WIDTH - 1:0] == inp_ping_length_reg) begin // ping ends, stop storing samples to coefficients RAM
|
212 |
|
|
ping_to_store <= 0;
|
213 |
|
|
end
|
214 |
|
|
if (sample_counter[CONV_MEM_BLOCK_ADDR_WIDTH - 1 : 0] == {CONV_MEM_BLOCK_ADDR_WIDTH{1'b1}}) begin // addres goes to the next bank
|
215 |
|
|
ping_to_store_n = ping_to_store_n << 1;
|
216 |
|
|
end
|
217 |
|
|
end
|
218 |
|
|
end
|
219 |
|
|
end
|
220 |
|
|
|
221 |
|
|
// clock counter, counts clocks in frame, used to convolution calculating
|
222 |
|
|
always @ (negedge clk)
|
223 |
|
|
begin
|
224 |
|
|
clk_to_samp_counter <= inp_clk_str ? 0 : (clk_to_samp_counter + 1);
|
225 |
|
|
end
|
226 |
|
|
|
227 |
|
|
// clk_to_samp_counter
|
228 |
|
|
// 0 registers initialization
|
229 |
|
|
// 1 store sample into RAM
|
230 |
|
|
// 2 /summands calculation (II, IQ, QI, QQ)
|
231 |
|
|
// 5 | 2**CONV_MEM_BLOCK_ADDR_WIDTH + 3 такта /summs in blocks calculation
|
232 |
|
|
// 2**CONV_MEM_BLOCK_ADDR_WIDTH + 5 \ |
|
233 |
|
|
// 2**CONV_MEM_BLOCK_ADDR_WIDTH + 6 / summs of summs in blocks calculation \
|
234 |
|
|
// 2**CONV_MEM_BLOCK_ADDR_WIDTH + 6 |
|
235 |
|
|
// + 2**CONV_BLOCK_ADDR_WIDTH \
|
236 |
|
|
// 2**CONV_MEM_BLOCK_ADDR_WIDTH + 7 output result, sample strobe and frame strobe
|
237 |
|
|
// + 2**CONV_BLOCK_ADDR_WIDTH
|
238 |
|
|
always @ (negedge clk)
|
239 |
|
|
begin
|
240 |
|
|
if (inp_clk_str) begin
|
241 |
|
|
proc_store_samp <= 0;
|
242 |
|
|
proc_count_blocks <= 0;
|
243 |
|
|
proc_count_blocks_acc <= 0;
|
244 |
|
|
proc_count_blocks_sum <= 0;
|
245 |
|
|
end else begin
|
246 |
|
|
proc_store_samp = clk_to_samp_counter == 0;
|
247 |
|
|
if (clk_to_samp_counter == 2) begin
|
248 |
|
|
proc_count_blocks <= 1; // begin to calculate convolution in blocks
|
249 |
|
|
end else if (clk_to_samp_counter == 2**CONV_MEM_BLOCK_ADDR_WIDTH + 5) begin
|
250 |
|
|
proc_count_blocks <= 0; // finish
|
251 |
|
|
end
|
252 |
|
|
if (clk_to_samp_counter == 5) begin
|
253 |
|
|
proc_count_blocks_acc <= 1; // begin to calculate summs in blocks
|
254 |
|
|
end else if (clk_to_samp_counter == 2**CONV_MEM_BLOCK_ADDR_WIDTH + 7) begin
|
255 |
|
|
proc_count_blocks_acc <= 0; // finish
|
256 |
|
|
end
|
257 |
|
|
if (clk_to_samp_counter == 2**CONV_MEM_BLOCK_ADDR_WIDTH + 6) begin
|
258 |
|
|
proc_count_blocks_sum <= 1; // begin to count summs of summs
|
259 |
|
|
end else if (clk_to_samp_counter == 2**CONV_MEM_BLOCK_ADDR_WIDTH + 7 + 2**CONV_BLOCK_ADDR_WIDTH) begin // continue for 2**CONV_BLOCK_ADDR_WIDTH times
|
260 |
|
|
proc_count_blocks_sum <= 0; // finish
|
261 |
|
|
end
|
262 |
|
|
end
|
263 |
|
|
end
|
264 |
|
|
|
265 |
|
|
assign out_samp_acc_Q_selected = out_samp_acc_Q[blocks_sum_counter];
|
266 |
|
|
// coefficient address counter, sample address counter
|
267 |
|
|
always @ (negedge clk or posedge proc_store_samp)
|
268 |
|
|
begin
|
269 |
|
|
if (proc_store_samp) begin
|
270 |
|
|
addr_ping <= 0;
|
271 |
|
|
addr_echo <= sample_counter - (2**PING_ADDR_WIDTH - 1);
|
272 |
|
|
end else if (proc_count_blocks) begin
|
273 |
|
|
addr_ping <= addr_ping + 1;
|
274 |
|
|
addr_echo <= addr_echo + 1;
|
275 |
|
|
end
|
276 |
|
|
end
|
277 |
|
|
|
278 |
|
|
// bidirectional bus for coefficient RAM
|
279 |
|
|
assign IOB_ping_RAM_D = (IOB_ping_RAM_RD & IOB_ping_RAM_CS) ? // Data bus, Z if read not selected
|
280 |
|
|
(IOB_ping_RAM_IQ ? // if read, then I or Q
|
281 |
|
|
ping_RAM_D_Q[IOB_ping_RAM_A[CONV_MEM_BLOCK_ADDR_WIDTH + CONV_BLOCK_ADDR_WIDTH - 1 : CONV_MEM_BLOCK_ADDR_WIDTH]]
|
282 |
|
|
: ping_RAM_D_I[IOB_ping_RAM_A[CONV_MEM_BLOCK_ADDR_WIDTH + CONV_BLOCK_ADDR_WIDTH - 1 : CONV_MEM_BLOCK_ADDR_WIDTH]]
|
283 |
|
|
) : {INP_SAMP_WIDTH{1'bZ}};
|
284 |
|
|
wire [2**CONV_BLOCK_ADDR_WIDTH - 1 : 0] block_num_buf[2**CONV_BLOCK_ADDR_WIDTH - 1 : 0]; // block number for coefficient
|
285 |
|
|
reg [2**CONV_BLOCK_ADDR_WIDTH - 1 : 0] block_num_buf_reg[2**CONV_BLOCK_ADDR_WIDTH - 1 : 0]; // delayed for using in convolution calculation pipe
|
286 |
|
|
// convolution calculating blocks
|
287 |
|
|
genvar mac_block;
|
288 |
|
|
generate
|
289 |
|
|
for (mac_block = 0; mac_block < 2**CONV_BLOCK_ADDR_WIDTH; mac_block = mac_block + 1)
|
290 |
|
|
begin : mac_blocks
|
291 |
|
|
// RAM buses
|
292 |
|
|
// coefficient RAM buses
|
293 |
|
|
// Data bus: if IOB_ping_from_Rx = 0 - coefficient RAM loaded from parallel bus and CS set then here is data from parallel bus
|
294 |
|
|
// else if flag ping is present is set, then here is input samples
|
295 |
|
|
assign ping_RAM_D_I[mac_block] = (IOB_ping_RAM_CS & ~IOB_ping_from_Rx) ? IOB_ping_RAM_D : (ping_to_store ? inp_samp_I : 0);
|
296 |
|
|
assign ping_RAM_D_Q[mac_block] = (IOB_ping_RAM_CS & ~IOB_ping_from_Rx) ? IOB_ping_RAM_D : (ping_to_store ? inp_samp_Q : 0);
|
297 |
|
|
// address bus: if IOB_ping_from_Rx = 0 - coefficient RAM loaded from parallel bus and CS set then here is address from parallel bus
|
298 |
|
|
// else if inp_clk_str is set - here is address for storing input samples
|
299 |
|
|
// else here is coefficient address for convolution calculation
|
300 |
|
|
assign ping_RAM_A [mac_block] = (IOB_ping_RAM_CS & ~IOB_ping_from_Rx) ? IOB_ping_RAM_A[CONV_MEM_BLOCK_ADDR_WIDTH - 1:0] : (proc_store_samp ? sample_counter[CONV_MEM_BLOCK_ADDR_WIDTH - 1:0] : addr_ping[CONV_MEM_BLOCK_ADDR_WIDTH - 1:0]);
|
301 |
|
|
// write strobe
|
302 |
|
|
// if coefficient RAM loading from parallel bus selected, then with WE on parallel bus generated WE for appropriate block of coefficient RAM
|
303 |
|
|
// else WE generated with input samples while ping is present
|
304 |
|
|
assign ping_RAM_W_I[mac_block] = (IOB_ping_RAM_CS & ~IOB_ping_from_Rx & IOB_ping_RAM_WR & ~IOB_ping_RAM_IQ & IOB_ping_RAM_A_bank_sel[mac_block]) | (IOB_ping_from_Rx & proc_store_samp & ping_to_store_n[mac_block]);
|
305 |
|
|
assign ping_RAM_W_Q[mac_block] = (IOB_ping_RAM_CS & ~IOB_ping_from_Rx & IOB_ping_RAM_WR & IOB_ping_RAM_IQ & IOB_ping_RAM_A_bank_sel[mac_block]) | (IOB_ping_from_Rx & proc_store_samp & ping_to_store_n[mac_block]);
|
306 |
|
|
// samples RAM buses
|
307 |
|
|
assign samp_RAM_D_I[mac_block] = inp_samp_I;
|
308 |
|
|
assign samp_RAM_D_Q[mac_block] = inp_samp_Q;
|
309 |
|
|
// with new sample address for storing new sample then address for reading for convolution calculation
|
310 |
|
|
assign samp_RAM_A[mac_block] = proc_store_samp ? sample_counter[CONV_MEM_BLOCK_ADDR_WIDTH - 1:0] : addr_echo[CONV_MEM_BLOCK_ADDR_WIDTH - 1:0];
|
311 |
|
|
// with new sample WE for appropriate block of samples RAM is set
|
312 |
|
|
assign samp_RAM_W[mac_block] = proc_store_samp & (sample_counter[CONV_MEM_BLOCK_ADDR_WIDTH + CONV_BLOCK_ADDR_WIDTH - 1:CONV_MEM_BLOCK_ADDR_WIDTH] == mac_block);
|
313 |
|
|
// block number for reading sample for convolution calculating is evaluated as summ of its number and address offset counted in blocks, floor(addr/sizeof(block))
|
314 |
|
|
assign block_num_buf[mac_block] = (mac_block + addr_echo[CONV_MEM_BLOCK_ADDR_WIDTH + CONV_BLOCK_ADDR_WIDTH - 1:CONV_MEM_BLOCK_ADDR_WIDTH]) & {CONV_BLOCK_ADDR_WIDTH{1'b1}};
|
315 |
|
|
always @ (negedge clk or negedge proc_count_blocks) begin
|
316 |
|
|
block_num_buf_reg[mac_block] <= block_num_buf[mac_block];
|
317 |
|
|
// registers initialization if convolution not processed
|
318 |
|
|
if (~proc_count_blocks) begin
|
319 |
|
|
multiplier_ping_I[mac_block] <= 0;
|
320 |
|
|
multiplier_ping_Q[mac_block] <= 0;
|
321 |
|
|
multiplier_echo_I[mac_block] <= 0;
|
322 |
|
|
multiplier_echo_Q[mac_block] <= 0;
|
323 |
|
|
samp_mult_II[mac_block] <= 0;
|
324 |
|
|
samp_mult_QQ[mac_block] <= 0;
|
325 |
|
|
end else begin
|
326 |
|
|
// multipiers are read from its block with no offset
|
327 |
|
|
multiplier_ping_I[mac_block] <= ping_RAM_Q_I[mac_block];
|
328 |
|
|
multiplier_ping_Q[mac_block] <= ping_RAM_Q_Q[mac_block];
|
329 |
|
|
// multipliers of samples are read with offset
|
330 |
|
|
multiplier_echo_I[mac_block] <= samp_RAM_Q_I[block_num_buf_reg[mac_block]];
|
331 |
|
|
multiplier_echo_Q[mac_block] <= samp_RAM_Q_Q[block_num_buf_reg[mac_block]];
|
332 |
|
|
// summands of convolution Si + jSq = Ai*Bi-Aq*Bq + j(Ai*Bq + Aq*Bi)
|
333 |
|
|
samp_mult_II[mac_block] <= multiplier_ping_I[mac_block] * multiplier_echo_I[mac_block];
|
334 |
|
|
samp_mult_QQ[mac_block] <= multiplier_ping_Q[mac_block] * multiplier_echo_Q[mac_block];
|
335 |
|
|
samp_mult_QI[mac_block] <= multiplier_ping_Q[mac_block] * multiplier_echo_I[mac_block];
|
336 |
|
|
samp_mult_IQ[mac_block] <= multiplier_ping_I[mac_block] * multiplier_echo_Q[mac_block];
|
337 |
|
|
end
|
338 |
|
|
end // always
|
339 |
|
|
always @ (negedge clk ) begin
|
340 |
|
|
if (inp_clk_str) begin
|
341 |
|
|
out_samp_acc_I[mac_block] <= 0;
|
342 |
|
|
out_samp_acc_Q[mac_block] <= 0;
|
343 |
|
|
end else if (proc_count_blocks_acc) begin
|
344 |
|
|
// use II - QQ and QI + IQ to get complex FIR or use II and QQ to get real FIR
|
345 |
|
|
out_samp_acc_I[mac_block] <= out_samp_acc_I[mac_block] + samp_mult_II[mac_block] + samp_mult_QQ[mac_block];
|
346 |
|
|
//out_samp_acc_I[mac_block] <= out_samp_acc_I[mac_block] + samp_mult_II[mac_block];
|
347 |
|
|
out_samp_acc_Q[mac_block] <= out_samp_acc_Q[mac_block] - samp_mult_QI[mac_block] + samp_mult_IQ[mac_block];
|
348 |
|
|
//out_samp_acc_Q[mac_block] <= out_samp_acc_Q[mac_block] + samp_mult_QQ[mac_block];
|
349 |
|
|
end
|
350 |
|
|
end
|
351 |
|
|
end // for
|
352 |
|
|
endgenerate
|
353 |
|
|
|
354 |
|
|
always @ (negedge clk)
|
355 |
|
|
begin
|
356 |
|
|
if (inp_clk_str) begin
|
357 |
|
|
blocks_sum_counter <= 0;
|
358 |
|
|
blocks_acc_I <= 0;
|
359 |
|
|
blocks_acc_Q <= 0;
|
360 |
|
|
end else begin
|
361 |
|
|
if (proc_count_blocks_sum) begin // here is summ of summs calculation
|
362 |
|
|
blocks_sum_counter <= blocks_sum_counter + 1;
|
363 |
|
|
blocks_acc_I <= blocks_acc_I + out_samp_acc_I[blocks_sum_counter];
|
364 |
|
|
blocks_acc_Q <= blocks_acc_Q + out_samp_acc_Q[blocks_sum_counter];
|
365 |
|
|
end
|
366 |
|
|
if (clk_to_samp_counter == 2**CONV_MEM_BLOCK_ADDR_WIDTH + 7 + 2**CONV_BLOCK_ADDR_WIDTH) begin // convolution sample ready, move result to output register
|
367 |
|
|
out_samp_I_reg <= blocks_acc_I;
|
368 |
|
|
out_samp_Q_reg <= blocks_acc_Q;
|
369 |
|
|
end
|
370 |
|
|
end //if
|
371 |
|
|
end // always
|
372 |
|
|
|
373 |
|
|
// sample strobe, frame strobe and |output|^2 for debug
|
374 |
|
|
always @ (negedge clk ) begin
|
375 |
|
|
// output strobes outputs with output sample
|
376 |
|
|
out_samp_strobe_reg <= clk_to_samp_counter == 2**CONV_MEM_BLOCK_ADDR_WIDTH + 7 + 2**CONV_BLOCK_ADDR_WIDTH;
|
377 |
|
|
out_frame_strobe_reg <= (clk_to_samp_counter == 2**CONV_MEM_BLOCK_ADDR_WIDTH + 7 + 2**CONV_BLOCK_ADDR_WIDTH) & (sample_counter == 0) & ping_to_store_n[0];
|
378 |
|
|
out_samp_A_sq_reg <= out_samp_I_reg * out_samp_I_reg + out_samp_Q_reg * out_samp_Q_reg;
|
379 |
|
|
end
|
380 |
|
|
|
381 |
|
|
assign out_samp_strobe = out_samp_strobe_reg;
|
382 |
|
|
assign out_frame_strobe = out_frame_strobe_reg;
|
383 |
|
|
assign out_samp_I = out_samp_I_reg;
|
384 |
|
|
assign out_samp_Q = out_samp_Q_reg;
|
385 |
|
|
assign out_samp_A_sq = out_samp_A_sq_reg[OUT_SAMP_A_SQ_WIDTH + OUT_SAMP_A_SQ_OFFS - 1:OUT_SAMP_A_SQ_OFFS];
|
386 |
|
|
endmodule
|