1 |
2 |
eexuke |
//--------------------------------------------------------------------------------------------------
|
2 |
|
|
// Design : nova
|
3 |
|
|
// Author(s) : Ke Xu
|
4 |
|
|
// Email : eexuke@yahoo.com
|
5 |
|
|
// File : sum.v
|
6 |
|
|
// Generated : Oct 29, 2005
|
7 |
|
|
// Copyright (C) 2008 Ke Xu
|
8 |
|
|
//-------------------------------------------------------------------------------------------------
|
9 |
|
|
// Description
|
10 |
|
|
// Sum module for residual + prediction
|
11 |
|
|
// Including output transpose and Intra_mbAddrB_RAM write control
|
12 |
|
|
//-------------------------------------------------------------------------------------------------
|
13 |
|
|
|
14 |
|
|
// synopsys translate_off
|
15 |
|
|
`include "timescale.v"
|
16 |
|
|
// synopsys translate_on
|
17 |
|
|
`include "nova_defines.v"
|
18 |
|
|
|
19 |
|
|
module sum (clk,reset_n,slice_data_state,residual_state,TotalCoeff,curr_CBPLuma_IsZero,CodedBlockPatternChroma,
|
20 |
|
|
curr_DC_IsZero,curr_DC_scaled,gclk_pred_output,gclk_blk4x4_sum,trigger_blk4x4_rec_sum,
|
21 |
|
|
IQIT_output_0, IQIT_output_1, IQIT_output_2, IQIT_output_3,
|
22 |
|
|
IQIT_output_4, IQIT_output_5, IQIT_output_6, IQIT_output_7,
|
23 |
|
|
IQIT_output_8, IQIT_output_9, IQIT_output_10,IQIT_output_11,
|
24 |
|
|
IQIT_output_12,IQIT_output_13,IQIT_output_14,IQIT_output_15,
|
25 |
|
|
mb_type_general,Intra4x4_predmode,Intra16x16_predmode,Intra_chroma_predmode,
|
26 |
|
|
Intra_pred_PE0_out,Intra_pred_PE1_out,Intra_pred_PE2_out,Intra_pred_PE3_out,blk4x4_intra_calculate_counter,
|
27 |
|
|
Inter_pred_out0,Inter_pred_out1,Inter_pred_out2,Inter_pred_out3,blk4x4_inter_calculate_counter,Inter_chroma2x2_counter,
|
28 |
|
|
Inter_blk4x4_pred_output_valid,mv_below8x8_curr,pos_FracL,mb_num_v,mb_num_h,LowerMB_IsSkip,
|
29 |
|
|
|
30 |
|
|
end_of_one_blk4x4_sum,blk4x4_sum_counter,blk4x4_rec_counter,
|
31 |
|
|
blk4x4_sum_PE0_out,blk4x4_sum_PE1_out,blk4x4_sum_PE2_out,blk4x4_sum_PE3_out,
|
32 |
|
|
sum_right_column_reg,blk4x4_rec_counter_2_raster_order,
|
33 |
|
|
blk4x4_pred_output0, blk4x4_pred_output1, blk4x4_pred_output2,
|
34 |
|
|
blk4x4_pred_output4, blk4x4_pred_output5, blk4x4_pred_output6,
|
35 |
|
|
blk4x4_pred_output8, blk4x4_pred_output9, blk4x4_pred_output10,
|
36 |
|
|
blk4x4_pred_output12,blk4x4_pred_output13,blk4x4_pred_output14,
|
37 |
|
|
Intra_mbAddrB_RAM_wr,Intra_mbAddrB_RAM_wr_addr,Intra_mbAddrB_RAM_din
|
38 |
|
|
);
|
39 |
|
|
input clk,reset_n;
|
40 |
|
|
input [3:0] slice_data_state;
|
41 |
|
|
input [3:0] residual_state;
|
42 |
|
|
input [4:0] TotalCoeff;
|
43 |
|
|
input curr_CBPLuma_IsZero;
|
44 |
|
|
input [1:0] CodedBlockPatternChroma;
|
45 |
|
|
input curr_DC_IsZero;
|
46 |
|
|
input [8:0] curr_DC_scaled;
|
47 |
|
|
input gclk_pred_output;
|
48 |
|
|
input gclk_blk4x4_sum;
|
49 |
|
|
input trigger_blk4x4_rec_sum;
|
50 |
|
|
//residual from IQIT
|
51 |
|
|
input [8:0] IQIT_output_0, IQIT_output_1, IQIT_output_2, IQIT_output_3;
|
52 |
|
|
input [8:0] IQIT_output_4, IQIT_output_5, IQIT_output_6, IQIT_output_7;
|
53 |
|
|
input [8:0] IQIT_output_8, IQIT_output_9, IQIT_output_10,IQIT_output_11;
|
54 |
|
|
input [8:0] IQIT_output_12,IQIT_output_13,IQIT_output_14,IQIT_output_15;
|
55 |
|
|
//Intra prediction output
|
56 |
|
|
input [3:0] mb_type_general;
|
57 |
|
|
input [3:0] Intra4x4_predmode;
|
58 |
|
|
input [1:0] Intra16x16_predmode;
|
59 |
|
|
input [1:0] Intra_chroma_predmode;
|
60 |
|
|
input [7:0] Intra_pred_PE0_out,Intra_pred_PE1_out,Intra_pred_PE2_out,Intra_pred_PE3_out;
|
61 |
|
|
input [2:0] blk4x4_intra_calculate_counter;
|
62 |
|
|
//Inter prediction output
|
63 |
|
|
input [7:0] Inter_pred_out0,Inter_pred_out1,Inter_pred_out2,Inter_pred_out3;
|
64 |
|
|
input [1:0] Inter_blk4x4_pred_output_valid;
|
65 |
|
|
input mv_below8x8_curr;
|
66 |
|
|
input [3:0] pos_FracL;
|
67 |
|
|
input [3:0] blk4x4_inter_calculate_counter;
|
68 |
|
|
input [1:0] Inter_chroma2x2_counter;
|
69 |
|
|
input [3:0] mb_num_h,mb_num_v;
|
70 |
|
|
input LowerMB_IsSkip;
|
71 |
|
|
|
72 |
|
|
output end_of_one_blk4x4_sum;
|
73 |
|
|
output [2:0] blk4x4_sum_counter;
|
74 |
|
|
output [4:0] blk4x4_rec_counter;
|
75 |
|
|
output [7:0] blk4x4_sum_PE0_out,blk4x4_sum_PE1_out,blk4x4_sum_PE2_out,blk4x4_sum_PE3_out;
|
76 |
|
|
output [23:0] sum_right_column_reg;
|
77 |
|
|
output [4:0] blk4x4_rec_counter_2_raster_order;
|
78 |
|
|
output [7:0] blk4x4_pred_output0, blk4x4_pred_output1, blk4x4_pred_output2;
|
79 |
|
|
output [7:0] blk4x4_pred_output4, blk4x4_pred_output5, blk4x4_pred_output6;
|
80 |
|
|
output [7:0] blk4x4_pred_output8, blk4x4_pred_output9, blk4x4_pred_output10;
|
81 |
|
|
output [7:0] blk4x4_pred_output12,blk4x4_pred_output13,blk4x4_pred_output14;
|
82 |
|
|
output Intra_mbAddrB_RAM_wr;
|
83 |
|
|
output [6:0] Intra_mbAddrB_RAM_wr_addr;
|
84 |
|
|
output [31:0] Intra_mbAddrB_RAM_din;
|
85 |
|
|
|
86 |
|
|
reg [2:0] blk4x4_sum_counter;
|
87 |
|
|
reg [4:0] blk4x4_rec_counter;
|
88 |
|
|
reg [4:0] blk4x4_rec_counter_2_raster_order;
|
89 |
|
|
reg [23:0] sum_right_column_reg;
|
90 |
|
|
|
91 |
|
|
reg [7:0] blk4x4_pred_output0, blk4x4_pred_output1, blk4x4_pred_output2, blk4x4_pred_output3;
|
92 |
|
|
reg [7:0] blk4x4_pred_output4, blk4x4_pred_output5, blk4x4_pred_output6, blk4x4_pred_output7;
|
93 |
|
|
reg [7:0] blk4x4_pred_output8, blk4x4_pred_output9, blk4x4_pred_output10,blk4x4_pred_output11;
|
94 |
|
|
reg [7:0] blk4x4_pred_output12,blk4x4_pred_output13,blk4x4_pred_output14,blk4x4_pred_output15;
|
95 |
|
|
|
96 |
|
|
|
97 |
|
|
always @ (posedge gclk_pred_output or negedge reset_n)
|
98 |
|
|
if (reset_n == 1'b0)
|
99 |
|
|
begin blk4x4_pred_output0 <= 0; blk4x4_pred_output1 <= 0; blk4x4_pred_output2 <= 0; blk4x4_pred_output3 <= 0;
|
100 |
|
|
blk4x4_pred_output4 <= 0; blk4x4_pred_output5 <= 0; blk4x4_pred_output6 <= 0; blk4x4_pred_output7 <= 0;
|
101 |
|
|
blk4x4_pred_output8 <= 0; blk4x4_pred_output9 <= 0; blk4x4_pred_output10 <= 0; blk4x4_pred_output11 <= 0;
|
102 |
|
|
blk4x4_pred_output12 <= 0; blk4x4_pred_output13<= 0; blk4x4_pred_output14 <= 0; blk4x4_pred_output15 <= 0; end
|
103 |
|
|
else if (blk4x4_intra_calculate_counter != 0)
|
104 |
|
|
begin
|
105 |
|
|
//Intra4x4DC or chromaDC intra prediction:output valid only at cycle3 by PE0
|
106 |
|
|
if ((mb_type_general[3:2] == 2'b11 && blk4x4_rec_counter < 16 && Intra4x4_predmode == `Intra4x4_DC) ||
|
107 |
|
|
(mb_type_general[3] == 1'b1 && blk4x4_rec_counter > 15 && Intra_chroma_predmode == `Intra_chroma_DC))
|
108 |
|
|
begin
|
109 |
|
|
if (blk4x4_intra_calculate_counter == 3'd3) //Intra4x4DC or chromaDC completes calculation at cycle3 by PE0
|
110 |
|
|
begin
|
111 |
|
|
blk4x4_pred_output0 <= Intra_pred_PE0_out; blk4x4_pred_output1 <= Intra_pred_PE0_out;
|
112 |
|
|
blk4x4_pred_output2 <= Intra_pred_PE0_out; blk4x4_pred_output3 <= Intra_pred_PE0_out;
|
113 |
|
|
blk4x4_pred_output4 <= Intra_pred_PE0_out; blk4x4_pred_output5 <= Intra_pred_PE0_out;
|
114 |
|
|
blk4x4_pred_output6 <= Intra_pred_PE0_out; blk4x4_pred_output7 <= Intra_pred_PE0_out;
|
115 |
|
|
blk4x4_pred_output8 <= Intra_pred_PE0_out; blk4x4_pred_output9 <= Intra_pred_PE0_out;
|
116 |
|
|
blk4x4_pred_output10 <= Intra_pred_PE0_out; blk4x4_pred_output11 <= Intra_pred_PE0_out;
|
117 |
|
|
blk4x4_pred_output12 <= Intra_pred_PE0_out; blk4x4_pred_output13 <= Intra_pred_PE0_out;
|
118 |
|
|
blk4x4_pred_output14 <= Intra_pred_PE0_out; blk4x4_pred_output15 <= Intra_pred_PE0_out;
|
119 |
|
|
end
|
120 |
|
|
end
|
121 |
|
|
//Intra16x16DC intra prediction:output valid only at cycle1 by PE0
|
122 |
|
|
else if (mb_type_general[3:2] == 2'b10 && blk4x4_rec_counter < 16 && Intra16x16_predmode == `Intra16x16_DC)
|
123 |
|
|
begin
|
124 |
|
|
if (blk4x4_rec_counter == 0 && blk4x4_intra_calculate_counter == 3'd1)
|
125 |
|
|
begin
|
126 |
|
|
blk4x4_pred_output0 <= Intra_pred_PE0_out; blk4x4_pred_output1 <= Intra_pred_PE0_out;
|
127 |
|
|
blk4x4_pred_output2 <= Intra_pred_PE0_out; blk4x4_pred_output3 <= Intra_pred_PE0_out;
|
128 |
|
|
blk4x4_pred_output4 <= Intra_pred_PE0_out; blk4x4_pred_output5 <= Intra_pred_PE0_out;
|
129 |
|
|
blk4x4_pred_output6 <= Intra_pred_PE0_out; blk4x4_pred_output7 <= Intra_pred_PE0_out;
|
130 |
|
|
blk4x4_pred_output8 <= Intra_pred_PE0_out; blk4x4_pred_output9 <= Intra_pred_PE0_out;
|
131 |
|
|
blk4x4_pred_output10 <= Intra_pred_PE0_out; blk4x4_pred_output11 <= Intra_pred_PE0_out;
|
132 |
|
|
blk4x4_pred_output12 <= Intra_pred_PE0_out; blk4x4_pred_output13 <= Intra_pred_PE0_out;
|
133 |
|
|
blk4x4_pred_output14 <= Intra_pred_PE0_out; blk4x4_pred_output15 <= Intra_pred_PE0_out;
|
134 |
|
|
end
|
135 |
|
|
end
|
136 |
|
|
//Besides above DC intra prediction case,other intra prediction modes output valid from cycle4 ~ cycle1
|
137 |
|
|
else
|
138 |
|
|
case (blk4x4_intra_calculate_counter)
|
139 |
|
|
3'd4:begin blk4x4_pred_output0 <= Intra_pred_PE0_out; blk4x4_pred_output4 <= Intra_pred_PE1_out;
|
140 |
|
|
blk4x4_pred_output8 <= Intra_pred_PE2_out; blk4x4_pred_output12 <= Intra_pred_PE3_out; end
|
141 |
|
|
3'd3:begin blk4x4_pred_output1 <= Intra_pred_PE0_out; blk4x4_pred_output5 <= Intra_pred_PE1_out;
|
142 |
|
|
blk4x4_pred_output9 <= Intra_pred_PE2_out; blk4x4_pred_output13 <= Intra_pred_PE3_out; end
|
143 |
|
|
3'd2:begin blk4x4_pred_output2 <= Intra_pred_PE0_out; blk4x4_pred_output6 <= Intra_pred_PE1_out;
|
144 |
|
|
blk4x4_pred_output10 <= Intra_pred_PE2_out; blk4x4_pred_output14 <= Intra_pred_PE3_out; end
|
145 |
|
|
3'd1:begin blk4x4_pred_output3 <= Intra_pred_PE0_out; blk4x4_pred_output7 <= Intra_pred_PE1_out;
|
146 |
|
|
blk4x4_pred_output11 <= Intra_pred_PE2_out; blk4x4_pred_output15 <= Intra_pred_PE3_out; end
|
147 |
|
|
endcase
|
148 |
|
|
end
|
149 |
|
|
//Inter luma prediction output store
|
150 |
|
|
else if (Inter_blk4x4_pred_output_valid == 2'b01)
|
151 |
|
|
begin
|
152 |
|
|
if (pos_FracL == `pos_i || pos_FracL == `pos_k)
|
153 |
|
|
case (blk4x4_inter_calculate_counter)
|
154 |
|
|
4'd7:begin blk4x4_pred_output0 <= Inter_pred_out0; blk4x4_pred_output4 <= Inter_pred_out1;
|
155 |
|
|
blk4x4_pred_output8 <= Inter_pred_out2; blk4x4_pred_output12 <= Inter_pred_out3; end
|
156 |
|
|
4'd5:begin blk4x4_pred_output1 <= Inter_pred_out0; blk4x4_pred_output5 <= Inter_pred_out1;
|
157 |
|
|
blk4x4_pred_output9 <= Inter_pred_out2; blk4x4_pred_output13 <= Inter_pred_out3; end
|
158 |
|
|
4'd3:begin blk4x4_pred_output2 <= Inter_pred_out0; blk4x4_pred_output6 <= Inter_pred_out1;
|
159 |
|
|
blk4x4_pred_output10 <= Inter_pred_out2; blk4x4_pred_output14 <= Inter_pred_out3; end
|
160 |
|
|
4'd1:begin blk4x4_pred_output3 <= Inter_pred_out0; blk4x4_pred_output7 <= Inter_pred_out1;
|
161 |
|
|
blk4x4_pred_output11 <= Inter_pred_out2; blk4x4_pred_output15 <= Inter_pred_out3; end
|
162 |
|
|
endcase
|
163 |
|
|
else
|
164 |
|
|
case (blk4x4_inter_calculate_counter)
|
165 |
|
|
4'd4:begin blk4x4_pred_output0 <= Inter_pred_out0; blk4x4_pred_output4 <= Inter_pred_out1;
|
166 |
|
|
blk4x4_pred_output8 <= Inter_pred_out2; blk4x4_pred_output12 <= Inter_pred_out3; end
|
167 |
|
|
4'd3:begin blk4x4_pred_output1 <= Inter_pred_out0; blk4x4_pred_output5 <= Inter_pred_out1;
|
168 |
|
|
blk4x4_pred_output9 <= Inter_pred_out2; blk4x4_pred_output13 <= Inter_pred_out3; end
|
169 |
|
|
4'd2:begin blk4x4_pred_output2 <= Inter_pred_out0; blk4x4_pred_output6 <= Inter_pred_out1;
|
170 |
|
|
blk4x4_pred_output10 <= Inter_pred_out2; blk4x4_pred_output14 <= Inter_pred_out3; end
|
171 |
|
|
4'd1:begin blk4x4_pred_output3 <= Inter_pred_out0; blk4x4_pred_output7 <= Inter_pred_out1;
|
172 |
|
|
blk4x4_pred_output11 <= Inter_pred_out2; blk4x4_pred_output15 <= Inter_pred_out3; end
|
173 |
|
|
endcase
|
174 |
|
|
end
|
175 |
|
|
//Inter chroma prediction output store
|
176 |
|
|
else if (Inter_blk4x4_pred_output_valid == 2'b10)
|
177 |
|
|
case (mv_below8x8_curr)
|
178 |
|
|
1'b1:
|
179 |
|
|
case (Inter_chroma2x2_counter)
|
180 |
|
|
2'b11:
|
181 |
|
|
begin
|
182 |
|
|
blk4x4_pred_output0 <= (blk4x4_inter_calculate_counter != 0)? Inter_pred_out0:0;
|
183 |
|
|
blk4x4_pred_output1 <= (blk4x4_inter_calculate_counter != 0)? Inter_pred_out1:0;
|
184 |
|
|
blk4x4_pred_output4 <= (blk4x4_inter_calculate_counter != 0)? Inter_pred_out2:0;
|
185 |
|
|
blk4x4_pred_output5 <= (blk4x4_inter_calculate_counter != 0)? Inter_pred_out3:0;
|
186 |
|
|
end
|
187 |
|
|
2'b10:
|
188 |
|
|
begin
|
189 |
|
|
blk4x4_pred_output2 <= (blk4x4_inter_calculate_counter != 0)? Inter_pred_out0:0;
|
190 |
|
|
blk4x4_pred_output3 <= (blk4x4_inter_calculate_counter != 0)? Inter_pred_out1:0;
|
191 |
|
|
blk4x4_pred_output6 <= (blk4x4_inter_calculate_counter != 0)? Inter_pred_out2:0;
|
192 |
|
|
blk4x4_pred_output7 <= (blk4x4_inter_calculate_counter != 0)? Inter_pred_out3:0;
|
193 |
|
|
end
|
194 |
|
|
2'b01:
|
195 |
|
|
begin
|
196 |
|
|
blk4x4_pred_output8 <= (blk4x4_inter_calculate_counter != 0)? Inter_pred_out0:0;
|
197 |
|
|
blk4x4_pred_output9 <= (blk4x4_inter_calculate_counter != 0)? Inter_pred_out1:0;
|
198 |
|
|
blk4x4_pred_output12 <= (blk4x4_inter_calculate_counter != 0)? Inter_pred_out2:0;
|
199 |
|
|
blk4x4_pred_output13 <= (blk4x4_inter_calculate_counter != 0)? Inter_pred_out3:0;
|
200 |
|
|
end
|
201 |
|
|
2'b00:
|
202 |
|
|
begin
|
203 |
|
|
blk4x4_pred_output10 <= (blk4x4_inter_calculate_counter != 0)? Inter_pred_out0:0;
|
204 |
|
|
blk4x4_pred_output11 <= (blk4x4_inter_calculate_counter != 0)? Inter_pred_out1:0;
|
205 |
|
|
blk4x4_pred_output14 <= (blk4x4_inter_calculate_counter != 0)? Inter_pred_out2:0;
|
206 |
|
|
blk4x4_pred_output15 <= (blk4x4_inter_calculate_counter != 0)? Inter_pred_out3:0;
|
207 |
|
|
end
|
208 |
|
|
endcase
|
209 |
|
|
1'b0:
|
210 |
|
|
case (blk4x4_inter_calculate_counter)
|
211 |
|
|
4'd4:begin blk4x4_pred_output0 <= Inter_pred_out0; blk4x4_pred_output1 <= Inter_pred_out1;
|
212 |
|
|
blk4x4_pred_output4 <= Inter_pred_out2; blk4x4_pred_output5 <= Inter_pred_out3; end
|
213 |
|
|
4'd3:begin blk4x4_pred_output2 <= Inter_pred_out0; blk4x4_pred_output3 <= Inter_pred_out1;
|
214 |
|
|
blk4x4_pred_output6 <= Inter_pred_out2; blk4x4_pred_output7 <= Inter_pred_out3; end
|
215 |
|
|
4'd2:begin blk4x4_pred_output8 <= Inter_pred_out0; blk4x4_pred_output9 <= Inter_pred_out1;
|
216 |
|
|
blk4x4_pred_output12 <= Inter_pred_out2; blk4x4_pred_output13 <= Inter_pred_out3; end
|
217 |
|
|
4'd1:begin blk4x4_pred_output10 <= Inter_pred_out0; blk4x4_pred_output11 <= Inter_pred_out1;
|
218 |
|
|
blk4x4_pred_output14 <= Inter_pred_out2; blk4x4_pred_output15 <= Inter_pred_out3; end
|
219 |
|
|
endcase
|
220 |
|
|
endcase
|
221 |
|
|
|
222 |
|
|
//------------------------------------------------------
|
223 |
|
|
//blk4x4_sum_counter
|
224 |
|
|
//------------------------------------------------------
|
225 |
|
|
always @ (posedge clk)
|
226 |
|
|
if (reset_n == 1'b0)
|
227 |
|
|
blk4x4_sum_counter <= 3'd4;
|
228 |
|
|
else if (trigger_blk4x4_rec_sum == 1'b1)
|
229 |
|
|
blk4x4_sum_counter <= 3'd0;
|
230 |
|
|
else if (blk4x4_sum_counter != 3'd4)
|
231 |
|
|
blk4x4_sum_counter <= blk4x4_sum_counter + 1;
|
232 |
|
|
|
233 |
|
|
assign end_of_one_blk4x4_sum = (blk4x4_sum_counter == 3'd3)? 1'b1:1'b0;
|
234 |
|
|
//------------------------------------------------------
|
235 |
|
|
//blk4x4_rec_counter
|
236 |
|
|
//------------------------------------------------------
|
237 |
|
|
always @ (posedge clk)
|
238 |
|
|
if (reset_n == 1'b0)
|
239 |
|
|
blk4x4_rec_counter <= 0;
|
240 |
|
|
else if (blk4x4_sum_counter == 3'd3)
|
241 |
|
|
blk4x4_rec_counter <= (blk4x4_rec_counter == 5'd23)? 5'd0:(blk4x4_rec_counter + 1);
|
242 |
|
|
//------------------------------------------------------
|
243 |
|
|
//reconstruction sum
|
244 |
|
|
//------------------------------------------------------
|
245 |
|
|
|
246 |
|
|
//Note:since res_blk4x4_IsAllZero has a higer priority over res_blk4x4_OnlyDC,the conditions
|
247 |
|
|
//to assign res_blk4x4_OnlyDC is NOT complete (but when take current assigned res_blk4x4_IsAllZero
|
248 |
|
|
//value into account, res_blk4x4_OnlyDC is correct!)
|
249 |
|
|
|
250 |
|
|
//res_blk4x4_IsAllZero:curr_DC_IsZero? curr_CBPLuma_IsZero? TotalCoeff is zero? CBPChroma is zero or one?
|
251 |
|
|
|
252 |
|
|
reg res_blk4x4_IsAllZero;
|
253 |
|
|
reg res_blk4x4_onlyDC;
|
254 |
|
|
always @ (slice_data_state or residual_state or curr_DC_IsZero or TotalCoeff
|
255 |
|
|
or curr_DC_IsZero or curr_CBPLuma_IsZero or CodedBlockPatternChroma)
|
256 |
|
|
if (slice_data_state == `skip_run_duration)
|
257 |
|
|
begin
|
258 |
|
|
res_blk4x4_IsAllZero <= 1'b1;
|
259 |
|
|
res_blk4x4_onlyDC <= 1'b0;
|
260 |
|
|
end
|
261 |
|
|
else
|
262 |
|
|
case (residual_state)
|
263 |
|
|
`Intra16x16ACLevel_0_s:
|
264 |
|
|
begin
|
265 |
|
|
res_blk4x4_IsAllZero <= (curr_DC_IsZero)? 1'b1:1'b0;
|
266 |
|
|
res_blk4x4_onlyDC <= (curr_DC_IsZero)? 1'b0:1'b1;
|
267 |
|
|
end
|
268 |
|
|
`Intra16x16ACLevel_s,`ChromaACLevel_Cb_s,`ChromaACLevel_Cr_s:
|
269 |
|
|
begin
|
270 |
|
|
res_blk4x4_IsAllZero <= (TotalCoeff == 0 && curr_DC_IsZero)? 1'b1:1'b0;
|
271 |
|
|
res_blk4x4_onlyDC <= (TotalCoeff == 0)? 1'b1:1'b0;
|
272 |
|
|
end
|
273 |
|
|
`LumaLevel_0_s:
|
274 |
|
|
begin
|
275 |
|
|
res_blk4x4_IsAllZero <= 1'b1;
|
276 |
|
|
res_blk4x4_onlyDC <= 1'b0;
|
277 |
|
|
end
|
278 |
|
|
`LumaLevel_s:
|
279 |
|
|
begin
|
280 |
|
|
res_blk4x4_IsAllZero <= (TotalCoeff == 0 || curr_CBPLuma_IsZero)? 1'b1:1'b0;
|
281 |
|
|
res_blk4x4_onlyDC <= 1'b0;
|
282 |
|
|
end
|
283 |
|
|
`ChromaACLevel_0_s: //CodedBlockPatternChroma == 0 or 1
|
284 |
|
|
if (CodedBlockPatternChroma == 0) //CodedBlockPatternChroma == 0
|
285 |
|
|
begin
|
286 |
|
|
res_blk4x4_IsAllZero <= 1'b1;
|
287 |
|
|
res_blk4x4_onlyDC <= 1'b0;
|
288 |
|
|
end
|
289 |
|
|
else //CodedBlockPatternChroma == 1
|
290 |
|
|
begin
|
291 |
|
|
res_blk4x4_IsAllZero <= (curr_DC_IsZero)? 1'b1:1'b0;
|
292 |
|
|
res_blk4x4_onlyDC <= (curr_DC_IsZero)? 1'b0:1'b1;
|
293 |
|
|
end
|
294 |
|
|
default:
|
295 |
|
|
begin
|
296 |
|
|
res_blk4x4_IsAllZero <= 1'b0;
|
297 |
|
|
res_blk4x4_onlyDC <= 1'b0;
|
298 |
|
|
end
|
299 |
|
|
endcase
|
300 |
|
|
|
301 |
|
|
reg [8:0] sum_PE0_a,sum_PE1_a,sum_PE2_a,sum_PE3_a;
|
302 |
|
|
reg [7:0] sum_PE0_b,sum_PE1_b,sum_PE2_b,sum_PE3_b;
|
303 |
|
|
wire sum_PE_bypass; //only one bypass signal for all sum_PE0 ~ sum_PE3
|
304 |
|
|
assign sum_PE_bypass = (blk4x4_sum_counter != 3'd4 && !res_blk4x4_IsAllZero)? 1'b0:1'b1;
|
305 |
|
|
|
306 |
|
|
sum_PE sum_PE0 (
|
307 |
|
|
.a(sum_PE0_a),
|
308 |
|
|
.b(sum_PE0_b),
|
309 |
|
|
.bypass(sum_PE_bypass),
|
310 |
|
|
.c(blk4x4_sum_PE0_out)
|
311 |
|
|
);
|
312 |
|
|
sum_PE sum_PE1 (
|
313 |
|
|
.a(sum_PE1_a),
|
314 |
|
|
.b(sum_PE1_b),
|
315 |
|
|
.bypass(sum_PE_bypass),
|
316 |
|
|
.c(blk4x4_sum_PE1_out)
|
317 |
|
|
);
|
318 |
|
|
sum_PE sum_PE2 (
|
319 |
|
|
.a(sum_PE2_a),
|
320 |
|
|
.b(sum_PE2_b),
|
321 |
|
|
.bypass(sum_PE_bypass),
|
322 |
|
|
.c(blk4x4_sum_PE2_out)
|
323 |
|
|
);
|
324 |
|
|
sum_PE sum_PE3 (
|
325 |
|
|
.a(sum_PE3_a),
|
326 |
|
|
.b(sum_PE3_b),
|
327 |
|
|
.bypass(sum_PE_bypass),
|
328 |
|
|
.c(blk4x4_sum_PE3_out)
|
329 |
|
|
);
|
330 |
|
|
|
331 |
|
|
// only for statistical purpose
|
332 |
|
|
// synopsys translate_off
|
333 |
|
|
integer number_of_IsAllZero;
|
334 |
|
|
integer number_of_onlyDC;
|
335 |
|
|
initial
|
336 |
|
|
begin
|
337 |
|
|
number_of_IsAllZero = 0;
|
338 |
|
|
number_of_onlyDC = 0;
|
339 |
|
|
end
|
340 |
|
|
always @ (blk4x4_sum_counter)
|
341 |
|
|
if (blk4x4_sum_counter == 3'd2)
|
342 |
|
|
begin
|
343 |
|
|
if (res_blk4x4_IsAllZero == 1'b1) number_of_IsAllZero <= number_of_IsAllZero + 1;
|
344 |
|
|
else if (res_blk4x4_onlyDC == 1'b1) number_of_onlyDC <= number_of_onlyDC + 1;
|
345 |
|
|
end
|
346 |
|
|
// synopsys translate_on
|
347 |
|
|
|
348 |
|
|
always @ (blk4x4_sum_counter or res_blk4x4_IsAllZero or res_blk4x4_onlyDC or curr_DC_scaled or
|
349 |
|
|
IQIT_output_0 or IQIT_output_1 or IQIT_output_2 or IQIT_output_3 or
|
350 |
|
|
IQIT_output_4 or IQIT_output_5 or IQIT_output_6 or IQIT_output_7 or
|
351 |
|
|
IQIT_output_8 or IQIT_output_9 or IQIT_output_10 or IQIT_output_11 or
|
352 |
|
|
IQIT_output_12 or IQIT_output_13 or IQIT_output_14 or IQIT_output_15)
|
353 |
|
|
if (res_blk4x4_IsAllZero)
|
354 |
|
|
begin sum_PE0_a <= 0; sum_PE1_a <= 0; sum_PE2_a <= 0; sum_PE3_a <= 0; end
|
355 |
|
|
else if (res_blk4x4_onlyDC)
|
356 |
|
|
begin sum_PE0_a <= curr_DC_scaled; sum_PE1_a <= curr_DC_scaled;
|
357 |
|
|
sum_PE2_a <= curr_DC_scaled; sum_PE3_a <= curr_DC_scaled; end
|
358 |
|
|
else
|
359 |
|
|
case (blk4x4_sum_counter)
|
360 |
|
|
0:begin sum_PE0_a <= IQIT_output_0; sum_PE1_a <= IQIT_output_1;
|
361 |
|
|
sum_PE2_a <= IQIT_output_2; sum_PE3_a <= IQIT_output_3; end
|
362 |
|
|
1:begin sum_PE0_a <= IQIT_output_4; sum_PE1_a <= IQIT_output_5;
|
363 |
|
|
sum_PE2_a <= IQIT_output_6; sum_PE3_a <= IQIT_output_7; end
|
364 |
|
|
2:begin sum_PE0_a <= IQIT_output_8; sum_PE1_a <= IQIT_output_9;
|
365 |
|
|
sum_PE2_a <= IQIT_output_10;sum_PE3_a <= IQIT_output_11; end
|
366 |
|
|
3:begin sum_PE0_a <= IQIT_output_12;sum_PE1_a <= IQIT_output_13;
|
367 |
|
|
sum_PE2_a <= IQIT_output_14;sum_PE3_a <= IQIT_output_15; end
|
368 |
|
|
default:begin sum_PE0_a <= 0; sum_PE1_a <= 0; sum_PE2_a <= 0; sum_PE3_a <= 0; end
|
369 |
|
|
endcase
|
370 |
|
|
always @ (blk4x4_sum_counter or
|
371 |
|
|
blk4x4_pred_output0 or blk4x4_pred_output1 or blk4x4_pred_output2 or blk4x4_pred_output3 or
|
372 |
|
|
blk4x4_pred_output4 or blk4x4_pred_output5 or blk4x4_pred_output6 or blk4x4_pred_output7 or
|
373 |
|
|
blk4x4_pred_output8 or blk4x4_pred_output9 or blk4x4_pred_output10 or blk4x4_pred_output11 or
|
374 |
|
|
blk4x4_pred_output12 or blk4x4_pred_output13 or blk4x4_pred_output14 or blk4x4_pred_output15)
|
375 |
|
|
case (blk4x4_sum_counter)
|
376 |
|
|
0:begin sum_PE0_b <= blk4x4_pred_output0; sum_PE1_b <= blk4x4_pred_output1;
|
377 |
|
|
sum_PE2_b <= blk4x4_pred_output2; sum_PE3_b <= blk4x4_pred_output3; end
|
378 |
|
|
1:begin sum_PE0_b <= blk4x4_pred_output4; sum_PE1_b <= blk4x4_pred_output5;
|
379 |
|
|
sum_PE2_b <= blk4x4_pred_output6; sum_PE3_b <= blk4x4_pred_output7; end
|
380 |
|
|
2:begin sum_PE0_b <= blk4x4_pred_output8; sum_PE1_b <= blk4x4_pred_output9;
|
381 |
|
|
sum_PE2_b <= blk4x4_pred_output10;sum_PE3_b <= blk4x4_pred_output11; end
|
382 |
|
|
3:begin sum_PE0_b <= blk4x4_pred_output12;sum_PE1_b <= blk4x4_pred_output13;
|
383 |
|
|
sum_PE2_b <= blk4x4_pred_output14;sum_PE3_b <= blk4x4_pred_output15; end
|
384 |
|
|
default:begin sum_PE0_b <= 0; sum_PE1_b <= 0; sum_PE2_b <= 0; sum_PE3_b <= 0; end
|
385 |
|
|
endcase
|
386 |
|
|
//----------------------------------------------------------------------
|
387 |
|
|
//sum right most column latch for Intra mbAddrA
|
388 |
|
|
//----------------------------------------------------------------------
|
389 |
|
|
//sum_right_column_reg:
|
390 |
|
|
always @ (posedge gclk_blk4x4_sum or negedge reset_n)
|
391 |
|
|
if (reset_n == 0)
|
392 |
|
|
sum_right_column_reg <= 0;
|
393 |
|
|
else
|
394 |
|
|
case (blk4x4_sum_counter)
|
395 |
|
|
3'd0:sum_right_column_reg[7:0] <= blk4x4_sum_PE3_out;
|
396 |
|
|
3'd1:sum_right_column_reg[15:8] <= blk4x4_sum_PE3_out;
|
397 |
|
|
3'd2:sum_right_column_reg[23:16] <= blk4x4_sum_PE3_out;
|
398 |
|
|
endcase
|
399 |
|
|
|
400 |
|
|
//blk4x4_rec_counter_2_raster_order:
|
401 |
|
|
//change from double-z order to raster order
|
402 |
|
|
always @ (blk4x4_rec_counter)
|
403 |
|
|
case (blk4x4_rec_counter)
|
404 |
|
|
5'd2 :blk4x4_rec_counter_2_raster_order <= 5'd4;
|
405 |
|
|
5'd3 :blk4x4_rec_counter_2_raster_order <= 5'd5;
|
406 |
|
|
5'd4 :blk4x4_rec_counter_2_raster_order <= 5'd2;
|
407 |
|
|
5'd5 :blk4x4_rec_counter_2_raster_order <= 5'd3;
|
408 |
|
|
5'd10:blk4x4_rec_counter_2_raster_order <= 5'd12;
|
409 |
|
|
5'd11:blk4x4_rec_counter_2_raster_order <= 5'd13;
|
410 |
|
|
5'd12:blk4x4_rec_counter_2_raster_order <= 5'd10;
|
411 |
|
|
5'd13:blk4x4_rec_counter_2_raster_order <= 5'd11;
|
412 |
|
|
default:blk4x4_rec_counter_2_raster_order <= blk4x4_rec_counter;
|
413 |
|
|
endcase
|
414 |
|
|
//----------------------------------------------------------------------
|
415 |
|
|
//Intra_mbAddrB_RAM write control
|
416 |
|
|
//----------------------------------------------------------------------
|
417 |
|
|
wire Is_blk4x4_rec_bottom;
|
418 |
|
|
assign Is_blk4x4_rec_bottom = (blk4x4_rec_counter == 5'd10 || blk4x4_rec_counter == 5'd11 ||
|
419 |
|
|
blk4x4_rec_counter == 5'd14 || blk4x4_rec_counter == 5'd15 || blk4x4_rec_counter == 5'd18 ||
|
420 |
|
|
blk4x4_rec_counter == 5'd19 || blk4x4_rec_counter == 5'd22 || blk4x4_rec_counter == 5'd23);
|
421 |
|
|
|
422 |
|
|
assign Intra_mbAddrB_RAM_wr = (mb_num_v != 4'd8 && blk4x4_sum_counter == 3'd3 && Is_blk4x4_rec_bottom && !LowerMB_IsSkip);
|
423 |
|
|
assign Intra_mbAddrB_RAM_din = (Intra_mbAddrB_RAM_wr)? {blk4x4_sum_PE3_out,blk4x4_sum_PE2_out,blk4x4_sum_PE1_out,blk4x4_sum_PE0_out}:0;
|
424 |
|
|
|
425 |
|
|
// base pointer, [43:0] luma, [65:44] Chroma Cb, [87:66] Chroma Cr
|
426 |
|
|
reg [6:0] Intra_mbAddrB_RAM_addr_bp;
|
427 |
|
|
always @ (Intra_mbAddrB_RAM_wr or blk4x4_rec_counter[4] or blk4x4_rec_counter[2])
|
428 |
|
|
if (Intra_mbAddrB_RAM_wr)
|
429 |
|
|
begin
|
430 |
|
|
if (blk4x4_rec_counter[4] == 1'b0) Intra_mbAddrB_RAM_addr_bp <= 0;
|
431 |
|
|
else if (blk4x4_rec_counter[2] == 1'b0) Intra_mbAddrB_RAM_addr_bp <= 7'd44;
|
432 |
|
|
else Intra_mbAddrB_RAM_addr_bp <= 7'd66;
|
433 |
|
|
end
|
434 |
|
|
else Intra_mbAddrB_RAM_addr_bp <= 0;
|
435 |
|
|
|
436 |
|
|
// shift pointer,x2 for chroma,x4 for luma
|
437 |
|
|
wire [5:0] Intra_mbAddrB_RAM_addr_sp;
|
438 |
|
|
assign Intra_mbAddrB_RAM_addr_sp = (Intra_mbAddrB_RAM_wr && blk4x4_rec_counter[4] == 1'b1)?
|
439 |
|
|
{1'b0,mb_num_h,1'b0}:{mb_num_h,2'b0};
|
440 |
|
|
// pointer for relative address of each 4x4 block inside a MB
|
441 |
|
|
reg [1:0] Intra_mbAddrB_RAM_addr_ip;
|
442 |
|
|
always @ (Intra_mbAddrB_RAM_wr or blk4x4_rec_counter[4] or blk4x4_rec_counter[2:0])
|
443 |
|
|
if (Intra_mbAddrB_RAM_wr)
|
444 |
|
|
begin
|
445 |
|
|
if (blk4x4_rec_counter[4] == 1'b0)
|
446 |
|
|
case (blk4x4_rec_counter[2:0])
|
447 |
|
|
3'b010:Intra_mbAddrB_RAM_addr_ip <= 2'd0;
|
448 |
|
|
3'b011:Intra_mbAddrB_RAM_addr_ip <= 2'd1;
|
449 |
|
|
3'b110:Intra_mbAddrB_RAM_addr_ip <= 2'd2;
|
450 |
|
|
3'b111:Intra_mbAddrB_RAM_addr_ip <= 2'd3;
|
451 |
|
|
default:Intra_mbAddrB_RAM_addr_ip <= 0;
|
452 |
|
|
endcase
|
453 |
|
|
else
|
454 |
|
|
Intra_mbAddrB_RAM_addr_ip <= {1'b0,blk4x4_rec_counter[0]};
|
455 |
|
|
end
|
456 |
|
|
else
|
457 |
|
|
Intra_mbAddrB_RAM_addr_ip <= 0;
|
458 |
|
|
|
459 |
|
|
assign Intra_mbAddrB_RAM_wr_addr = Intra_mbAddrB_RAM_addr_bp + Intra_mbAddrB_RAM_addr_sp + Intra_mbAddrB_RAM_addr_ip;
|
460 |
|
|
|
461 |
|
|
/*
|
462 |
|
|
// synopsys translate_off
|
463 |
|
|
integer tracefile;
|
464 |
|
|
initial
|
465 |
|
|
begin
|
466 |
|
|
tracefile = $fopen("nova_sum_output.log");
|
467 |
|
|
end
|
468 |
|
|
|
469 |
|
|
wire [6:0] mb_num;
|
470 |
|
|
assign mb_num = mb_num_v * 11 + mb_num_h;
|
471 |
|
|
|
472 |
|
|
wire [1:0] blk4x4_rec_counter_M4;
|
473 |
|
|
assign blk4x4_rec_counter_M4 = blk4x4_rec_counter[1:0];
|
474 |
|
|
|
475 |
|
|
reg [8:0] pic_num;
|
476 |
|
|
always @ (reset_n or mb_num)
|
477 |
|
|
if (reset_n == 1'b0)
|
478 |
|
|
pic_num <= 9'b111111111;
|
479 |
|
|
else if (mb_num == 0)
|
480 |
|
|
pic_num <= pic_num + 1;
|
481 |
|
|
|
482 |
|
|
always @ (posedge clk)
|
483 |
|
|
if (blk4x4_sum_counter == 0)
|
484 |
|
|
begin
|
485 |
|
|
$fdisplay (tracefile,"------------------------ Pic = %3d, MB = %3d -------------------------",pic_num,mb_num);
|
486 |
|
|
if (blk4x4_rec_counter < 16)
|
487 |
|
|
$fdisplay (tracefile," [Luma] blk4x4Idx = %2d",blk4x4_rec_counter);
|
488 |
|
|
else
|
489 |
|
|
$fdisplay (tracefile," [Chroma] blk4x4Idx = %2d",blk4x4_rec_counter_M4);
|
490 |
|
|
$fdisplay (tracefile," Sum output: %8d %8d %8d %8d",blk4x4_sum_PE0_out,blk4x4_sum_PE1_out,blk4x4_sum_PE2_out,blk4x4_sum_PE3_out);
|
491 |
|
|
end
|
492 |
|
|
else if (blk4x4_sum_counter != 3'd4)
|
493 |
|
|
$fdisplay (tracefile," %8d %8d %8d %8d",blk4x4_sum_PE0_out,blk4x4_sum_PE1_out,blk4x4_sum_PE2_out,blk4x4_sum_PE3_out);
|
494 |
|
|
// synopsys translate_on
|
495 |
|
|
*/
|
496 |
|
|
|
497 |
|
|
endmodule
|
498 |
|
|
|
499 |
|
|
module sum_PE (a,b,bypass,c);
|
500 |
|
|
input [8:0] a; //for residual from IQIT
|
501 |
|
|
input [7:0] b; //for prediction from intra or inter
|
502 |
|
|
input bypass;
|
503 |
|
|
output [7:0] c;
|
504 |
|
|
|
505 |
|
|
wire [9:0] sum;
|
506 |
|
|
|
507 |
|
|
assign sum = (bypass)? 0:({2'b0,b} + {a[8],a});
|
508 |
|
|
assign c = (bypass)? b:((sum[9] == 1'b1)? 0:((sum[8] == 1'b1)? 8'd255:sum[7:0]));
|
509 |
|
|
endmodule
|
510 |
|
|
|