OpenCores
URL https://opencores.org/ocsvn/cascaded_fir_filter/cascaded_fir_filter/trunk

Subversion Repositories cascaded_fir_filter

[/] [cascaded_fir_filter/] [trunk/] [FIR_cascaded.v] - Blame information for rev 3

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 3 Juzujka
/*
2
FIR filter with comples samples
3
convolution computation divided into blocks for parallel processing
4
then summ of results in blocks is computed
5
 
6
filter designed to evaluate convolution of echo-signal
7
it works in two modes:
8
1 - echo-signal with ping signal leaked into input assumed. FIR takes first n (loadable runtime) samples in frame into pulse response RAM and convolutes other samples in frame with first n
9
 n is ping signal length
10
 frame begins with inp_ping_start strobe
11
 
12
2 - pulse response RAM loaded through parallel interface (Data, Addres, WR, I/Q)
13
 
14
Number of cycles required to compute one sample is determined by formula
15
block_length + number_of_blocks + 11
16
it is constant for synthesized filter
17
 
18
block_length and number_of_blocks should be power of 2
19
for example
20
pulse response RAM depth is 2**11 = 2048
21
block size is 2**8 = 256
22
number of blocks is 2048/256 = 2**(11-8) = 8
23
 
24
block_length + number_of_blocks + 11 = 256 + 8 + 11 = 275
25
 
26
In any case filter yelds output samples after n = 2**PING_ADDR_WIDTH samples
27
 
28
*/
29
module FIR_cascaded
30
#(
31
        parameter       INP_SAMP_WIDTH = 14,                            // imput samples width
32
        parameter       PING_ADDR_WIDTH = 11,                           // address width of pulse response characteristic samples
33
        parameter       CONV_MEM_BLOCK_ADDR_WIDTH = 10,         // address width of block
34
        parameter       FRAME_ADDR_WIDTH = 18,                          // address width of counter of samples in frame
35
        parameter       OUT_SAMP_WIDTH = 18,                            // output samples width
36
        parameter       CLK_TO_SAMP_ADDR_WIDTH  = 11,           // clocks in frame counter width
37
        //for debug. out_samp_A_sq is I^2 + Q^2
38
        parameter       OUT_SAMP_A_SQ_WIDTH = 8,                        // width of out_samp_A_sq
39
        parameter       OUT_SAMP_A_SQ_OFFS = 8                          // downscale for out_samp_A_sq. OUT_SAMP_A_SQ_OFFS and next OUT_SAMP_A_SQ_WIDTH bits goes to the output
40
)
41
(
42
        // ping means first n=inp_ping_length samples, which can be loaded into ping RAM, where stores FIR coefficients or pulse response
43
        input   clk,                    // clock
44
        input   reset,                  // reset
45
        input   inp_clk,                // input samples strobes
46
        input   inp_ping_start, // frame strobes
47
        input signed    [INP_SAMP_WIDTH - 1:0]   inp_samp_I,                     // input samples Re
48
        input signed    [INP_SAMP_WIDTH - 1:0]   inp_samp_Q,                     // input samples Im
49
        input [PING_ADDR_WIDTH - 1:0]                    inp_ping_length,        // ping duration, in samples
50
        input                                                                   IOB_ping_from_Rx,       // 1 - take pulse response from input samles, 0 - do not take pulse response from input samples, assumes load coefficient through parallel interface
51
        input                                                                   IOB_ping_RAM_CS,        // select coefficient RAM
52
        inout signed    [INP_SAMP_WIDTH - 1:0]   IOB_ping_RAM_D,         // coefficient RAM, data
53
        input                                                                   IOB_ping_RAM_IQ,        // coefficient RAM, I/Q select. 0 - I, 1 - Q
54
        input signed    [PING_ADDR_WIDTH - 1:0]  IOB_ping_RAM_A,         // coefficient RAM, address
55
        input                                                                   IOB_ping_RAM_WR,        // coefficient RAM, write enable
56
        input                                                                   IOB_ping_RAM_RD,        // coefficient RAM, read enable
57
        output signed   [OUT_SAMP_WIDTH - 1:0]   out_samp_I,                     // output samples, Re
58
        output signed   [OUT_SAMP_WIDTH - 1:0]   out_samp_Q,                     // output samples, Im
59
        output signed   [OUT_SAMP_A_SQ_WIDTH - 1:0]      out_samp_A_sq,  // I^2 + Q^2, for debug
60
        output  out_samp_strobe,                                                                        // output sample strobe
61
        output  out_frame_strobe                                                                        // output frame strobe
62
);
63
 
64
        //wire signed   [INP_SAMP_WIDTH - 1:0]  IOB_ping_RAM_D;
65
        //wire signed   [PING_ADDR_WIDTH - 1:0] IOB_ping_RAM_A;
66
        parameter CONV_BLOCK_ADDR_WIDTH = PING_ADDR_WIDTH - CONV_MEM_BLOCK_ADDR_WIDTH;  // address width for blocks counting
67
        reg     [2**CONV_BLOCK_ADDR_WIDTH - 1:0] IOB_ping_RAM_A_bank_sel;        // one-hot block select for WR coefficients through parallel bus
68
        reg     [PING_ADDR_WIDTH - 1:0] inp_ping_length_reg;     // inp_ping_length store register
69
        reg     [FRAME_ADDR_WIDTH - 1:0] sample_counter;         // sample in frame counter
70
        reg     inp_ping_start_str;                                                             // frame begins strobe
71
        reg     inp_ping_start_catch;                                                   // for generating inp_ping_start_catch
72
        reg     inp_clk_str;                                                                    // sample begins strobe
73
        reg     inp_clk_catch;                                                                  // for generating inp_clk_str
74
        reg     ping_to_store;                                                                  // set to 1 from frame begining to the end of ping. While 1 and if should take input samples to coefficients, to store input samples into coefficient RAM
75
        reg     [2**CONV_BLOCK_ADDR_WIDTH:0]     ping_to_store_n;                                                                        // one-hot to select block in coefficients RAM to store sample
76
        reg     [CLK_TO_SAMP_ADDR_WIDTH - 1:0]   clk_to_samp_counter;                                                            // clock between samples counter, used to calculation of output samples
77
        reg signed      [OUT_SAMP_WIDTH - 1:0]   out_samp_I_reg;                                                                         // register to store output Re samples
78
        reg signed      [OUT_SAMP_WIDTH - 1:0]   out_samp_Q_reg;                                                                         // register to store output Im samples
79
        reg signed      [OUT_SAMP_WIDTH - 1:0]   samp_mult_II[2**CONV_BLOCK_ADDR_WIDTH - 1:0];            // multipliers for output sample calculation, Re*Re
80
        reg signed      [OUT_SAMP_WIDTH - 1:0]   samp_mult_QQ[2**CONV_BLOCK_ADDR_WIDTH - 1:0];            // multipliers for output sample calculation, Im*Im
81
        reg signed      [OUT_SAMP_WIDTH - 1:0]   samp_mult_QI[2**CONV_BLOCK_ADDR_WIDTH - 1:0];            // multipliers for output sample calculation, Im*Re
82
        reg signed      [OUT_SAMP_WIDTH - 1:0]   samp_mult_IQ[2**CONV_BLOCK_ADDR_WIDTH - 1:0];            // multipliers for output sample calculation, Re*Im
83
        reg signed      [OUT_SAMP_WIDTH - 1:0]   out_samp_acc_I[2**CONV_BLOCK_ADDR_WIDTH - 1:0];          // accumulators for calculation summ in block Re
84
        reg signed      [OUT_SAMP_WIDTH - 1:0]   out_samp_acc_Q[2**CONV_BLOCK_ADDR_WIDTH - 1:0];          // accumulators for calculation summ in block Im
85
        wire signed     [OUT_SAMP_WIDTH - 1:0]   out_samp_acc_Q_selected;                                                        // accumulators for calculation summ in block Im
86
        //reg signed    [OUT_SAMP_WIDTH - 1:0]  out_samp_acc_result_I[2**CONV_BLOCK_ADDR_WIDTH - 1:0];  // регистр хранения результата вычисления отсчёта свёртки канала I
87
        //reg signed    [OUT_SAMP_WIDTH - 1:0]  out_samp_acc_result_Q[2**CONV_BLOCK_ADDR_WIDTH - 1:0];  // регистр хранения результата вычисления отсчёта свёртки канала Q
88
        reg signed      [OUT_SAMP_WIDTH - 1:0]   blocks_acc_I;                                                                           // summ of summs in blocks accumulator, Re
89
        reg signed      [OUT_SAMP_WIDTH - 1:0]   blocks_acc_Q;                                                                           // summ of summs in blocks accumulator, Re
90
        reg signed      [OUT_SAMP_WIDTH*2  :0]   out_samp_A_sq_reg;                                                                      // Re^2 + Im^2 register, for debug
91
        reg     [CONV_MEM_BLOCK_ADDR_WIDTH - 1:0]        addr_ping;                                                                              // coefficient address register for convolution calculation
92
        reg     [CLK_TO_SAMP_ADDR_WIDTH:0]                       addr_echo;                                                                              // TODO: width CONV_BLOCK_ADDR_WIDTH + CONV_MEM_BLOCK_ADDR_WIDTH
93
        reg     proc_store_samp;                                                        // sets for saving samples
94
        reg     proc_count_blocks;                                                      // sets when reading data from coefficient RAM and samples RAM
95
        reg     proc_count_blocks_acc;                                          // sets for summs in blocks calculating
96
        reg     proc_count_blocks_sum;                                          // sets for summs of summs in block calculating
97
        reg     [CONV_BLOCK_ADDR_WIDTH - 1:0]    blocks_sum_counter;     // block number counter for summs of summs in block calculating
98
 
99
        reg signed [INP_SAMP_WIDTH - 1:0]        multiplier_ping_I[2**CONV_BLOCK_ADDR_WIDTH - 1:0];       // Re coefficient register for multiplication
100
        reg signed [INP_SAMP_WIDTH - 1:0]        multiplier_ping_Q[2**CONV_BLOCK_ADDR_WIDTH - 1:0];       // Im coefficient register for multiplication
101
        reg signed [INP_SAMP_WIDTH - 1:0]        multiplier_echo_I[2**CONV_BLOCK_ADDR_WIDTH - 1:0];       // Re sample register for multiplication
102
        reg signed [INP_SAMP_WIDTH - 1:0]        multiplier_echo_Q[2**CONV_BLOCK_ADDR_WIDTH - 1:0];       // Im sample register for multiplication
103
 
104
        // Buses of RAM for storing coefficients and data samples
105
        // address bus is shared, data and control buses are separated for Re and Im
106
        wire signed     [INP_SAMP_WIDTH - 1:0]                   ping_RAM_D_I    [2**CONV_BLOCK_ADDR_WIDTH - 1:0];
107
        wire signed     [INP_SAMP_WIDTH - 1:0]                   ping_RAM_D_Q    [2**CONV_BLOCK_ADDR_WIDTH - 1:0];
108
        wire    [CONV_MEM_BLOCK_ADDR_WIDTH - 1:0]        ping_RAM_A              [2**CONV_BLOCK_ADDR_WIDTH - 1:0];
109
        //wire  [CONV_MEM_BLOCK_ADDR_WIDTH - 1:0]       ping_RAM_A_buf;
110
        wire signed     [INP_SAMP_WIDTH - 1:0]                   ping_RAM_Q_I    [2**CONV_BLOCK_ADDR_WIDTH - 1:0];
111
        wire signed     [INP_SAMP_WIDTH - 1:0]                   ping_RAM_Q_Q    [2**CONV_BLOCK_ADDR_WIDTH - 1:0];
112
        wire                                                                            ping_RAM_W_I    [2**CONV_BLOCK_ADDR_WIDTH - 1:0];
113
        wire                                                                            ping_RAM_W_Q    [2**CONV_BLOCK_ADDR_WIDTH - 1:0];
114
        wire signed     [INP_SAMP_WIDTH - 1:0]                   samp_RAM_D_I    [2**CONV_BLOCK_ADDR_WIDTH - 1:0];
115
        wire signed     [INP_SAMP_WIDTH - 1:0]                   samp_RAM_D_Q    [2**CONV_BLOCK_ADDR_WIDTH - 1:0];
116
        wire    [CONV_MEM_BLOCK_ADDR_WIDTH - 1:0]        samp_RAM_A              [2**CONV_BLOCK_ADDR_WIDTH - 1:0];
117
        wire signed     [INP_SAMP_WIDTH - 1:0]                   samp_RAM_Q_I    [2**CONV_BLOCK_ADDR_WIDTH - 1:0];
118
        wire signed     [INP_SAMP_WIDTH - 1:0]                   samp_RAM_Q_Q    [2**CONV_BLOCK_ADDR_WIDTH - 1:0];
119
        wire                                                                            samp_RAM_W              [2**CONV_BLOCK_ADDR_WIDTH - 1:0];
120
 
121
        reg     out_samp_strobe_reg;    // register for generating out_samp_strobe
122
        reg     out_frame_strobe_reg;   // register for generating out_frame_strobe
123
        // RAM for coefficients - ping and for data - samp
124
        // number of blocks is 2**CONV_BLOCK_ADDR_WIDTH * 2 (ping, samp) * 2 (I, Q)
125
        generate
126
                genvar i_ram;
127
                for (i_ram = 0; i_ram < 2**CONV_BLOCK_ADDR_WIDTH; i_ram = i_ram + 1) begin : gen_ram
128
                        single_port_ram
129
                                #(
130
                                .DATA_WIDTH     (INP_SAMP_WIDTH),
131
                                .ADDR_WIDTH     (CONV_MEM_BLOCK_ADDR_WIDTH))
132
                        ping_RAM_I
133
                        (
134
                                .clk    (~clk),
135
                                .d_wr   (ping_RAM_D_I[i_ram]),
136
                                .addr   (ping_RAM_A[i_ram]),
137
                                .we             (ping_RAM_W_I[i_ram]),
138
                                .d_rd   (ping_RAM_Q_I[i_ram])
139
                        );
140
                        single_port_ram
141
                                #(
142
                                .DATA_WIDTH     (INP_SAMP_WIDTH),
143
                                .ADDR_WIDTH     (CONV_MEM_BLOCK_ADDR_WIDTH))
144
                        ping_RAM_Q
145
                        (
146
                                .clk    (~clk),
147
                                .d_wr   (ping_RAM_D_Q[i_ram]),
148
                                .addr   (ping_RAM_A[i_ram]),
149
                                .we             (ping_RAM_W_Q[i_ram]),
150
                                .d_rd   (ping_RAM_Q_Q[i_ram])
151
                        );
152
                        single_port_ram
153
                                #(
154
                                .DATA_WIDTH     (INP_SAMP_WIDTH),
155
                                .ADDR_WIDTH     (CONV_MEM_BLOCK_ADDR_WIDTH))
156
                        samp_RAM_I
157
                        (
158
                                .clk    (~clk),
159
                                .d_wr   (samp_RAM_D_I[i_ram]),
160
                                .addr   (samp_RAM_A[i_ram]),
161
                                .we             (samp_RAM_W[i_ram]),
162
                                .d_rd   (samp_RAM_Q_I[i_ram])
163
                        );
164
                        single_port_ram
165
                                #(
166
                                .DATA_WIDTH     (INP_SAMP_WIDTH),
167
                                .ADDR_WIDTH     (CONV_MEM_BLOCK_ADDR_WIDTH))
168
                        samp_RAM_Q
169
                        (
170
                                .clk    (~clk),
171
                                .d_wr   (samp_RAM_D_Q[i_ram]),
172
                                .addr   (samp_RAM_A[i_ram]),
173
                                .we             (samp_RAM_W[i_ram]),
174
                                .d_rd   (samp_RAM_Q_Q[i_ram])
175
                        );
176
                end // for
177
        endgenerate
178
 
179
        // strobes for frame start ang sample start
180
        always @ (negedge clk or posedge reset) begin
181
                if (reset) begin
182
                        inp_ping_start_catch <= 0;
183
                        inp_ping_start_str <= 0;
184
                        inp_clk_catch <= 0;
185
                        inp_clk_str <= 0;
186
                end else begin
187
                        inp_ping_start_catch <= inp_ping_start;
188
                        inp_ping_start_str <= inp_ping_start & ~inp_ping_start_catch;
189
                        inp_clk_catch <= inp_clk;
190
                        inp_clk_str <= inp_clk & ~inp_clk_catch;
191
                end
192
        end //always
193
 
194
        // one-hot for ping_RAM block selecting for access from parallel interface
195
        always @(IOB_ping_RAM_A) begin
196
                IOB_ping_RAM_A_bank_sel = {2**CONV_BLOCK_ADDR_WIDTH{1'b0}};
197
                IOB_ping_RAM_A_bank_sel[IOB_ping_RAM_A[CONV_MEM_BLOCK_ADDR_WIDTH + CONV_BLOCK_ADDR_WIDTH - 1 : CONV_MEM_BLOCK_ADDR_WIDTH]] = 1'b1;
198
        end //always
199
 
200
        // sample number "sample_counter", ping present signal "ping_to_store" and ping_RAM block number to store ping "ping_to_store_n"
201
        always @ (negedge clk)
202
        begin
203
                if (inp_ping_start_str) begin
204
                        inp_ping_length_reg <= inp_ping_length;
205
                        sample_counter <= 0;
206
                        ping_to_store <= 1;
207
                        ping_to_store_n = 1;
208
                end else begin
209
                        if (inp_clk_str) begin
210
                                sample_counter <= sample_counter + 1;
211
                                if (sample_counter[PING_ADDR_WIDTH - 1:0] == inp_ping_length_reg) begin  // ping ends, stop storing samples to coefficients RAM
212
                                        ping_to_store <= 0;
213
                                end
214
                                if (sample_counter[CONV_MEM_BLOCK_ADDR_WIDTH - 1 : 0] == {CONV_MEM_BLOCK_ADDR_WIDTH{1'b1}}) begin        // addres goes to the next bank
215
                                        ping_to_store_n  = ping_to_store_n << 1;
216
                                end
217
                        end
218
                end
219
        end
220
 
221
        // clock counter, counts clocks in frame, used to convolution calculating
222
        always @ (negedge clk)
223
        begin
224
                clk_to_samp_counter <= inp_clk_str ? 0 : (clk_to_samp_counter + 1);
225
        end
226
 
227
        //      clk_to_samp_counter
228
        //      0                                                                                registers initialization
229
        //      1                                                                               store sample into RAM
230
        //      2                                                                               /summands calculation (II, IQ, QI, QQ)
231
        //      5                                                                               |       2**CONV_MEM_BLOCK_ADDR_WIDTH + 3 такта     /summs in blocks calculation
232
        //      2**CONV_MEM_BLOCK_ADDR_WIDTH + 5                \                                                                                       |
233
        //      2**CONV_MEM_BLOCK_ADDR_WIDTH + 6                / summs of summs in blocks calculation          \
234
        //  2**CONV_MEM_BLOCK_ADDR_WIDTH + 6            |
235
        //                      + 2**CONV_BLOCK_ADDR_WIDTH              \
236
        //      2**CONV_MEM_BLOCK_ADDR_WIDTH + 7                output result, sample strobe and frame strobe
237
        //                      + 2**CONV_BLOCK_ADDR_WIDTH
238
        always @ (negedge clk)
239
        begin
240
                if (inp_clk_str) begin
241
                        proc_store_samp <= 0;
242
                        proc_count_blocks <= 0;
243
                        proc_count_blocks_acc <= 0;
244
                        proc_count_blocks_sum <= 0;
245
                end else begin
246
                        proc_store_samp = clk_to_samp_counter == 0;
247
                        if (clk_to_samp_counter == 2) begin
248
                                proc_count_blocks <= 1;         // begin to calculate convolution in blocks
249
                        end else if (clk_to_samp_counter == 2**CONV_MEM_BLOCK_ADDR_WIDTH + 5) begin
250
                                proc_count_blocks <= 0;          // finish
251
                        end
252
                        if (clk_to_samp_counter == 5) begin
253
                                proc_count_blocks_acc <= 1;             // begin to calculate summs in blocks
254
                        end else if (clk_to_samp_counter == 2**CONV_MEM_BLOCK_ADDR_WIDTH + 7) begin
255
                                proc_count_blocks_acc <= 0;              // finish
256
                        end
257
                        if (clk_to_samp_counter == 2**CONV_MEM_BLOCK_ADDR_WIDTH + 6) begin
258
                                proc_count_blocks_sum <= 1;     // begin to count summs of summs
259
                        end else if (clk_to_samp_counter == 2**CONV_MEM_BLOCK_ADDR_WIDTH + 7 + 2**CONV_BLOCK_ADDR_WIDTH) begin  // continue for 2**CONV_BLOCK_ADDR_WIDTH times
260
                                proc_count_blocks_sum <= 0;      // finish
261
                        end
262
                end
263
        end
264
 
265
        assign out_samp_acc_Q_selected = out_samp_acc_Q[blocks_sum_counter];
266
        // coefficient address counter, sample address counter
267
        always @ (negedge clk or posedge proc_store_samp)
268
        begin
269
                if (proc_store_samp) begin
270
                        addr_ping <= 0;
271
                        addr_echo <= sample_counter - (2**PING_ADDR_WIDTH - 1);
272
                end else if (proc_count_blocks) begin
273
                        addr_ping <= addr_ping + 1;
274
                        addr_echo <= addr_echo + 1;
275
                end
276
        end
277
 
278
        // bidirectional bus for coefficient RAM
279
        assign  IOB_ping_RAM_D = (IOB_ping_RAM_RD & IOB_ping_RAM_CS) ?  // Data bus, Z if read not selected
280
                (IOB_ping_RAM_IQ ?                                                                                      // if read, then I or Q
281
                                ping_RAM_D_Q[IOB_ping_RAM_A[CONV_MEM_BLOCK_ADDR_WIDTH + CONV_BLOCK_ADDR_WIDTH - 1 : CONV_MEM_BLOCK_ADDR_WIDTH]]
282
                         : ping_RAM_D_I[IOB_ping_RAM_A[CONV_MEM_BLOCK_ADDR_WIDTH + CONV_BLOCK_ADDR_WIDTH - 1 : CONV_MEM_BLOCK_ADDR_WIDTH]]
283
                ) : {INP_SAMP_WIDTH{1'bZ}};
284
        wire    [2**CONV_BLOCK_ADDR_WIDTH - 1 : 0] block_num_buf[2**CONV_BLOCK_ADDR_WIDTH - 1 : 0];       // block number for coefficient
285
        reg     [2**CONV_BLOCK_ADDR_WIDTH - 1 : 0] block_num_buf_reg[2**CONV_BLOCK_ADDR_WIDTH - 1 : 0];   // delayed for using in convolution calculation pipe
286
        // convolution calculating blocks
287
        genvar mac_block;
288
        generate
289
                for (mac_block = 0; mac_block < 2**CONV_BLOCK_ADDR_WIDTH; mac_block = mac_block + 1)
290
                begin : mac_blocks
291
                        // RAM buses
292
                        // coefficient RAM buses
293
                        // Data bus: if IOB_ping_from_Rx = 0 - coefficient RAM loaded from parallel bus and CS set then here is data from parallel bus
294
                        //                                      else if flag ping is present is set, then here is input samples
295
                        assign ping_RAM_D_I[mac_block] = (IOB_ping_RAM_CS & ~IOB_ping_from_Rx) ? IOB_ping_RAM_D : (ping_to_store ? inp_samp_I : 0);
296
                        assign ping_RAM_D_Q[mac_block] = (IOB_ping_RAM_CS & ~IOB_ping_from_Rx) ? IOB_ping_RAM_D : (ping_to_store ? inp_samp_Q : 0);
297
                        // address bus: if IOB_ping_from_Rx = 0 - coefficient RAM loaded from parallel bus and CS set then here is address from parallel bus
298
                        //                                      else if inp_clk_str is set - here is address for storing input samples
299
                        //                                                      else here is coefficient address for convolution calculation
300
                        assign ping_RAM_A  [mac_block] = (IOB_ping_RAM_CS & ~IOB_ping_from_Rx) ? IOB_ping_RAM_A[CONV_MEM_BLOCK_ADDR_WIDTH - 1:0] : (proc_store_samp ? sample_counter[CONV_MEM_BLOCK_ADDR_WIDTH - 1:0] : addr_ping[CONV_MEM_BLOCK_ADDR_WIDTH - 1:0]);
301
                        // write strobe
302
                        // if coefficient RAM loading from parallel bus selected, then with WE on parallel bus generated WE for appropriate block of coefficient RAM
303
                        // else WE generated with input samples while ping is present
304
                        assign ping_RAM_W_I[mac_block] = (IOB_ping_RAM_CS & ~IOB_ping_from_Rx & IOB_ping_RAM_WR & ~IOB_ping_RAM_IQ & IOB_ping_RAM_A_bank_sel[mac_block]) | (IOB_ping_from_Rx & proc_store_samp & ping_to_store_n[mac_block]);
305
                        assign ping_RAM_W_Q[mac_block] = (IOB_ping_RAM_CS & ~IOB_ping_from_Rx & IOB_ping_RAM_WR &  IOB_ping_RAM_IQ & IOB_ping_RAM_A_bank_sel[mac_block]) | (IOB_ping_from_Rx & proc_store_samp & ping_to_store_n[mac_block]);
306
                        // samples RAM buses
307
                        assign samp_RAM_D_I[mac_block] = inp_samp_I;
308
                        assign samp_RAM_D_Q[mac_block] = inp_samp_Q;
309
                        // with new sample address for storing new sample then address for reading for convolution calculation
310
                        assign samp_RAM_A[mac_block] = proc_store_samp ? sample_counter[CONV_MEM_BLOCK_ADDR_WIDTH - 1:0] : addr_echo[CONV_MEM_BLOCK_ADDR_WIDTH - 1:0];
311
                        // with new sample WE for appropriate block of samples RAM is set
312
                        assign samp_RAM_W[mac_block] = proc_store_samp & (sample_counter[CONV_MEM_BLOCK_ADDR_WIDTH + CONV_BLOCK_ADDR_WIDTH - 1:CONV_MEM_BLOCK_ADDR_WIDTH] == mac_block);
313
                        // block number for reading sample for convolution calculating is evaluated as summ of its number and address offset counted in blocks, floor(addr/sizeof(block)) 
314
                        assign block_num_buf[mac_block] = (mac_block + addr_echo[CONV_MEM_BLOCK_ADDR_WIDTH + CONV_BLOCK_ADDR_WIDTH - 1:CONV_MEM_BLOCK_ADDR_WIDTH]) & {CONV_BLOCK_ADDR_WIDTH{1'b1}};
315
                        always @ (negedge clk or negedge  proc_count_blocks) begin
316
                                block_num_buf_reg[mac_block] <= block_num_buf[mac_block];
317
                                // registers initialization if convolution not processed
318
                                if (~proc_count_blocks) begin
319
                                        multiplier_ping_I[mac_block] <= 0;
320
                                        multiplier_ping_Q[mac_block] <= 0;
321
                                        multiplier_echo_I[mac_block] <= 0;
322
                                        multiplier_echo_Q[mac_block] <= 0;
323
                                        samp_mult_II[mac_block] <= 0;
324
                                        samp_mult_QQ[mac_block] <= 0;
325
                                end else begin
326
                                        // multipiers are read from its block with no offset
327
                                        multiplier_ping_I[mac_block] <= ping_RAM_Q_I[mac_block];
328
                                        multiplier_ping_Q[mac_block] <= ping_RAM_Q_Q[mac_block];
329
                                        // multipliers of samples are read with offset
330
                                        multiplier_echo_I[mac_block] <= samp_RAM_Q_I[block_num_buf_reg[mac_block]];
331
                                        multiplier_echo_Q[mac_block] <= samp_RAM_Q_Q[block_num_buf_reg[mac_block]];
332
                                        // summands of convolution Si + jSq = Ai*Bi-Aq*Bq + j(Ai*Bq + Aq*Bi)
333
                                        samp_mult_II[mac_block] <= multiplier_ping_I[mac_block] * multiplier_echo_I[mac_block];
334
                                        samp_mult_QQ[mac_block] <= multiplier_ping_Q[mac_block] * multiplier_echo_Q[mac_block];
335
                                        samp_mult_QI[mac_block] <= multiplier_ping_Q[mac_block] * multiplier_echo_I[mac_block];
336
                                        samp_mult_IQ[mac_block] <= multiplier_ping_I[mac_block] * multiplier_echo_Q[mac_block];
337
                                end
338
                        end // always
339
                        always @ (negedge clk ) begin
340
                                if (inp_clk_str) begin
341
                                        out_samp_acc_I[mac_block] <= 0;
342
                                        out_samp_acc_Q[mac_block] <= 0;
343
                                end else if (proc_count_blocks_acc) begin
344
                                        // use II - QQ and QI + IQ to get complex FIR or use II and QQ to get real FIR
345
                                        out_samp_acc_I[mac_block] <= out_samp_acc_I[mac_block] + samp_mult_II[mac_block] + samp_mult_QQ[mac_block];
346
                                        //out_samp_acc_I[mac_block] <= out_samp_acc_I[mac_block] + samp_mult_II[mac_block];
347
                                        out_samp_acc_Q[mac_block] <= out_samp_acc_Q[mac_block] - samp_mult_QI[mac_block] + samp_mult_IQ[mac_block];
348
                                        //out_samp_acc_Q[mac_block] <= out_samp_acc_Q[mac_block] + samp_mult_QQ[mac_block];
349
                                end
350
                        end
351
                end // for
352
        endgenerate
353
 
354
        always @ (negedge clk)
355
        begin
356
                if (inp_clk_str) begin
357
                        blocks_sum_counter <= 0;
358
                        blocks_acc_I <= 0;
359
                        blocks_acc_Q <= 0;
360
                end else begin
361
                        if (proc_count_blocks_sum) begin        // here is summ of summs calculation
362
                                blocks_sum_counter <= blocks_sum_counter + 1;
363
                                blocks_acc_I <= blocks_acc_I + out_samp_acc_I[blocks_sum_counter];
364
                                blocks_acc_Q <= blocks_acc_Q + out_samp_acc_Q[blocks_sum_counter];
365
                        end
366
                        if (clk_to_samp_counter == 2**CONV_MEM_BLOCK_ADDR_WIDTH + 7 + 2**CONV_BLOCK_ADDR_WIDTH) begin   // convolution sample ready, move result to output register
367
                                out_samp_I_reg <= blocks_acc_I;
368
                                out_samp_Q_reg <= blocks_acc_Q;
369
                        end
370
                end //if
371
        end // always
372
 
373
        // sample strobe, frame strobe and |output|^2 for debug
374
        always @ (negedge clk ) begin
375
                // output strobes outputs with output sample
376
                out_samp_strobe_reg <= clk_to_samp_counter == 2**CONV_MEM_BLOCK_ADDR_WIDTH + 7 + 2**CONV_BLOCK_ADDR_WIDTH;
377
                out_frame_strobe_reg <= (clk_to_samp_counter == 2**CONV_MEM_BLOCK_ADDR_WIDTH + 7 + 2**CONV_BLOCK_ADDR_WIDTH) & (sample_counter == 0) & ping_to_store_n[0];
378
                out_samp_A_sq_reg <= out_samp_I_reg * out_samp_I_reg + out_samp_Q_reg * out_samp_Q_reg;
379
        end
380
 
381
        assign out_samp_strobe = out_samp_strobe_reg;
382
        assign out_frame_strobe = out_frame_strobe_reg;
383
        assign out_samp_I = out_samp_I_reg;
384
        assign out_samp_Q = out_samp_Q_reg;
385
        assign out_samp_A_sq = out_samp_A_sq_reg[OUT_SAMP_A_SQ_WIDTH + OUT_SAMP_A_SQ_OFFS - 1:OUT_SAMP_A_SQ_OFFS];
386
endmodule

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.