1 |
2 |
eejlny |
----------------------------------------------------------------------------
|
2 |
|
|
-- This file is a part of the LM VHDL IP LIBRARY
|
3 |
|
|
-- Copyright (C) 2009 Jose Nunez-Yanez
|
4 |
|
|
--
|
5 |
|
|
-- This program is free software; you can redistribute it and/or modify
|
6 |
|
|
-- it under the terms of the GNU General Public License as published by
|
7 |
|
|
-- the Free Software Foundation; either version 2 of the License, or
|
8 |
|
|
-- (at your option) any later version.
|
9 |
|
|
--
|
10 |
|
|
-- See the file COPYING for the full details of the license.
|
11 |
|
|
--
|
12 |
|
|
-- The license allows free and unlimited use of the library and tools for research and education purposes.
|
13 |
|
|
-- The full LM core supports many more advanced motion estimation features and it is available under a
|
14 |
|
|
-- low-cost commercial license. See the readme file to learn more or contact us at
|
15 |
|
|
-- eejlny@byacom.co.uk or www.byacom.co.uk
|
16 |
|
|
--------------------------------------
|
17 |
|
|
-- entity = me_top --
|
18 |
|
|
-- version = 1.0 --
|
19 |
|
|
-- last update = 16/08/09 --
|
20 |
|
|
-- author = Jose Nunez --
|
21 |
|
|
--------------------------------------
|
22 |
|
|
|
23 |
|
|
|
24 |
|
|
-- me top of the hierarchy
|
25 |
|
|
|
26 |
|
|
library IEEE;
|
27 |
|
|
use IEEE.std_logic_1164.all;
|
28 |
|
|
use IEEE.Numeric_STD.all;
|
29 |
|
|
use IEEE.std_logic_unsigned."+";
|
30 |
|
|
use IEEE.std_logic_unsigned."-";
|
31 |
|
|
use IEEE.std_logic_signed.">";
|
32 |
|
|
use IEEE.std_logic_signed."<";
|
33 |
|
|
use work.config.all;
|
34 |
|
|
|
35 |
|
|
|
36 |
|
|
entity me_top is
|
37 |
|
|
port ( clk : in std_logic;
|
38 |
|
|
clear : in std_logic;
|
39 |
|
|
reset : in std_logic;
|
40 |
|
|
register_file_address : in std_logic_vector(4 downto 0); -- 32 general purpose registers
|
41 |
|
|
register_file_write : in std_logic;
|
42 |
|
|
register_file_data_in : in std_logic_vector(31 downto 0);
|
43 |
|
|
register_file_data_out : out std_logic_vector(31 downto 0);
|
44 |
|
|
done_interrupt : out std_logic; -- high when macroblock processing has completed
|
45 |
|
|
best_sad_debug : out std_logic_vector(15 downto 0); --debugging ports
|
46 |
|
|
best_mv_debug : out std_logic_vector(15 downto 0);
|
47 |
|
|
best_eu_debug : out std_logic_vector(3 downto 0);
|
48 |
|
|
partition_mode_debug : out std_logic_vector(3 downto 0);
|
49 |
|
|
qp_on_debug : out std_logic; --running qp
|
50 |
|
|
dma_rm_re_debug : in std_logic; --set to one to enable reading the reference area
|
51 |
|
|
dma_rm_debug : out std_logic_vector(63 downto 0); -- reference area data out
|
52 |
|
|
dma_address : in std_logic_vector(10 downto 0); -- next reference memory address
|
53 |
|
|
dma_data_in : in std_logic_vector(63 downto 0); -- pixel in for reference memory or macroblock memory
|
54 |
|
|
dma_rm_we : in std_logic; --enable writing to reference memory
|
55 |
|
|
dma_cm_we : in std_logic; --enable writing to current macroblock memory
|
56 |
|
|
dma_pom_we : in std_logic; -- enable writing to point memory
|
57 |
|
|
dma_prm_we : in std_logic; -- enable writing to program memory
|
58 |
|
|
dma_residue_out : out std_logic_vector(63 downto 0); -- get residue from winner mv
|
59 |
|
|
dma_re_re : in std_logic -- enable reading residue
|
60 |
|
|
);
|
61 |
|
|
end;
|
62 |
|
|
|
63 |
|
|
architecture struct of me_top is
|
64 |
|
|
|
65 |
|
|
component mv_cost --calculate mv cost using Lagrangian optimization
|
66 |
|
|
generic (pipelines : integer);
|
67 |
|
|
port (
|
68 |
|
|
clk : in std_logic;
|
69 |
|
|
clear : in std_logic;
|
70 |
|
|
reset : in std_logic;
|
71 |
|
|
load : in std_logic; -- start
|
72 |
|
|
mvp_x : in std_logic_vector(7 downto 0); --predicted mv x
|
73 |
|
|
mvp_y : in std_logic_vector(7 downto 0); -- predicted mv y
|
74 |
|
|
mvx_c : in std_logic_vector(7 downto 0); -- motion vector candidate x
|
75 |
|
|
mvy_c : in std_logic_vector(7 downto 0); -- motion vector candidate y
|
76 |
|
|
rest_mvx_c : in rest_type_points; -- rest of motion vector candidates x
|
77 |
|
|
rest_mvy_c : in rest_type_points; -- rest of motion vector candidates y
|
78 |
|
|
quant_parameter : in std_logic_vector(5 downto 0); -- quantization parameter
|
79 |
|
|
p_cost_mv : out std_logic_vector(15 downto 0);
|
80 |
|
|
rest_p_cost_mv : out rest_type_displacement
|
81 |
|
|
);
|
82 |
|
|
end component;
|
83 |
|
|
|
84 |
|
|
|
85 |
|
|
component phy_address
|
86 |
|
|
port (
|
87 |
|
|
clk : in std_logic;
|
88 |
|
|
clear : in std_logic;
|
89 |
|
|
reset : in std_logic;
|
90 |
|
|
partition_count : in std_logic_vector(3 downto 0); --identify the subpartition active
|
91 |
|
|
line_offset : in std_logic_vector(5 downto 0); -- read multiple lines
|
92 |
|
|
mvx : in std_logic_vector(7 downto 0);
|
93 |
|
|
mvy : in std_logic_vector(7 downto 0);
|
94 |
|
|
phy_address : out std_logic_vector(13 downto 0));
|
95 |
|
|
end component;
|
96 |
|
|
|
97 |
|
|
component sad_selector
|
98 |
|
|
generic (integer_pipeline_count : integer);
|
99 |
|
|
port (
|
100 |
|
|
clk : in std_logic;
|
101 |
|
|
reset : in std_logic;
|
102 |
|
|
clear : in std_logic;
|
103 |
|
|
calculate_sad_done : in std_logic;
|
104 |
|
|
active_pipelines : in std_logic_vector(CFG_PIPELINE_COUNT-1 downto 0);
|
105 |
|
|
update : in std_logic; -- completed program reset the stored sad
|
106 |
|
|
update_fp : in std_logic; -- end of iteraction
|
107 |
|
|
best_eu : out std_logic_vector(3 downto 0);
|
108 |
|
|
best_sad : in std_logic_vector(15 downto 0);
|
109 |
|
|
best_mv : in std_logic_vector(15 downto 0);
|
110 |
|
|
rest_best_sad : in rest_type_displacement;
|
111 |
|
|
rest_best_mv : in rest_type_displacement;
|
112 |
|
|
best_sad_out : out std_logic_vector(15 downto 0);
|
113 |
|
|
best_mv_out : out std_logic_vector(15 downto 0));
|
114 |
|
|
end component;
|
115 |
|
|
|
116 |
|
|
component sad_selector_qp
|
117 |
|
|
port (
|
118 |
|
|
clk : in std_logic;
|
119 |
|
|
reset : in std_logic;
|
120 |
|
|
clear : in std_logic;
|
121 |
|
|
calculate_sad_done : in std_logic;
|
122 |
|
|
active_pipelines : in std_logic_vector(CFG_PIPELINE_COUNT_QP-1 downto 0);
|
123 |
|
|
update : in std_logic; -- completed fp part set the stored sad
|
124 |
|
|
update_qp : in std_logic; -- end of iteraction
|
125 |
|
|
best_eu : out std_logic_vector(3 downto 0);
|
126 |
|
|
best_sad : in std_logic_vector(15 downto 0);
|
127 |
|
|
best_mv : in std_logic_vector(15 downto 0);
|
128 |
|
|
rest_best_sad : in rest_type_displacement_qp;
|
129 |
|
|
rest_best_mv : in rest_type_displacement_qp;
|
130 |
|
|
best_sad_out : out std_logic_vector(15 downto 0);
|
131 |
|
|
best_mv_out : out std_logic_vector(15 downto 0));
|
132 |
|
|
end component;
|
133 |
|
|
|
134 |
|
|
component distance_engine64
|
135 |
|
|
generic (qp_mode : std_logic);
|
136 |
|
|
port(
|
137 |
|
|
clk : in std_logic;
|
138 |
|
|
clear : in std_logic;
|
139 |
|
|
reset : in std_logic;
|
140 |
|
|
enable : in std_logic;
|
141 |
|
|
update : in std_logic;
|
142 |
|
|
load_mv : in std_logic;
|
143 |
|
|
mode_in : in mode_type;
|
144 |
|
|
mv_cost_on : in std_logic;
|
145 |
|
|
mv_cost_in : in std_logic_vector(15 downto 0);
|
146 |
|
|
candidate_mvx : in std_logic_vector(7 downto 0);
|
147 |
|
|
candidate_mvy : in std_logic_vector(7 downto 0);
|
148 |
|
|
reference_data_in : in std_logic_vector(63 downto 0);
|
149 |
|
|
current_data_in : in std_logic_vector(63 downto 0);
|
150 |
|
|
residue_out : out std_logic_vector(63 downto 0);
|
151 |
|
|
enable_fifo : out std_logic;
|
152 |
|
|
reset_fifo : out std_logic;
|
153 |
|
|
winner1 : out std_logic;
|
154 |
|
|
calculate_sad_done : out std_logic;
|
155 |
|
|
distance_engine_active : out std_logic;
|
156 |
|
|
best_sad : out std_logic_vector(15 downto 0);
|
157 |
|
|
best_mv : out std_logic_vector(15 downto 0));
|
158 |
|
|
end component;
|
159 |
|
|
|
160 |
|
|
|
161 |
|
|
component register_file
|
162 |
|
|
generic (integer_pipeline_count : integer);
|
163 |
|
|
port(
|
164 |
|
|
clk : in std_logic;
|
165 |
|
|
clear : in std_logic;
|
166 |
|
|
reset : in std_logic;
|
167 |
|
|
addr : in std_logic_vector(4 downto 0);
|
168 |
|
|
write : in std_logic;
|
169 |
|
|
data_in : in std_logic_vector(31 downto 0);
|
170 |
|
|
data_out : out std_logic_vector(31 downto 0);
|
171 |
|
|
start : out std_logic;
|
172 |
|
|
all_done_qp : in std_logic; -- program completes
|
173 |
|
|
all_done_fp : in std_logic; -- fp part completes
|
174 |
|
|
mvc_done : out std_logic; -- all motion vector candidates evaluated
|
175 |
|
|
mvc_to_do : out std_logic_vector;
|
176 |
|
|
instruction_zero : in std_logic;
|
177 |
|
|
partition_done_fp : in std_logic; -- fp partition terminates
|
178 |
|
|
partition_done_qp : in std_logic; -- qp partition terminates
|
179 |
|
|
done_interrupt : out std_logic;
|
180 |
|
|
start_row : out std_logic;
|
181 |
|
|
update_fp : in std_logic;
|
182 |
|
|
load_mv : in std_logic; -- force the mvc to move foward
|
183 |
|
|
mode_out : out mode_type;
|
184 |
|
|
mv_cost_on : out std_logic; -- activate the costing of mvs
|
185 |
|
|
best_sad_fp : in std_logic_vector(15 downto 0);
|
186 |
|
|
best_mv_fp : in std_logic_vector(15 downto 0);
|
187 |
|
|
first_mv_fp : out std_logic_vector(15 downto 0);
|
188 |
|
|
rest_first_mv_fp : out rest_type_displacement;
|
189 |
|
|
mbx_coordinate : out std_logic_vector(7 downto 0);
|
190 |
|
|
mby_coordinate : out std_logic_vector(7 downto 0);
|
191 |
|
|
mvp_x : out std_logic_vector(7 downto 0);
|
192 |
|
|
mvp_y : out std_logic_vector(7 downto 0);
|
193 |
|
|
quant_parameter : out std_logic_vector(5 downto 0);
|
194 |
|
|
frame_dimension_x : out std_logic_vector(7 downto 0);
|
195 |
|
|
frame_dimension_y : out std_logic_vector(7 downto 0);
|
196 |
|
|
update_qp : in std_logic;
|
197 |
|
|
partition_count : in std_logic_vector(3 downto 0); --identify the subpartition active
|
198 |
|
|
best_sad_qp : in std_logic_vector(15 downto 0);
|
199 |
|
|
best_mv_qp : in std_logic_vector(15 downto 0);
|
200 |
|
|
first_mv_qp : out std_logic_vector(15 downto 0));
|
201 |
|
|
end component;
|
202 |
|
|
|
203 |
|
|
|
204 |
|
|
|
205 |
|
|
component fp_pipeline
|
206 |
|
|
port ( clk : in std_logic;
|
207 |
|
|
clear : in std_logic;
|
208 |
|
|
reset : in std_logic;
|
209 |
|
|
next_point_displacement_fp : in std_logic_vector(15 downto 0); --next point to be processed
|
210 |
|
|
first_mv_fp : in std_logic_vector(15 downto 0); -- first point to start the search from
|
211 |
|
|
start : in std_logic;
|
212 |
|
|
start_mb : in std_logic; -- once per macroblock
|
213 |
|
|
mode_in : in mode_type;
|
214 |
|
|
partition_count : in std_logic_vector(3 downto 0); --identify the subpartition active
|
215 |
|
|
frame_dimension_x : in std_logic_vector(7 downto 0); --in mb
|
216 |
|
|
frame_dimension_y : in std_logic_vector(7 downto 0);
|
217 |
|
|
mbx_coordinate : in std_logic_vector(7 downto 0); --in mb
|
218 |
|
|
mby_coordinate : in std_logic_vector(7 downto 0);
|
219 |
|
|
candidate_mvx : out std_logic_vector(7 downto 0); -- port for Lagrangian optimizaton
|
220 |
|
|
candidate_mvy : out std_logic_vector(7 downto 0);
|
221 |
|
|
mv_cost : in std_logic_vector(15 downto 0);
|
222 |
|
|
mv_cost_on : in std_logic; -- enable mv cost
|
223 |
|
|
all_done_fp : in std_logic; -- program completes
|
224 |
|
|
calculate_sad_done : out std_logic;
|
225 |
|
|
best_sad_fp : out std_logic_vector(15 downto 0);
|
226 |
|
|
best_mv_fp : out std_logic_vector(15 downto 0);
|
227 |
|
|
dma_address : in std_logic_vector(10 downto 0); -- next reference memory address
|
228 |
|
|
mb_data_in : in std_logic_vector(63 downto 0); -- pixel in for macroblock memory (shared this memory among the pipelines)
|
229 |
|
|
dma_data_in : in std_logic_vector(63 downto 0); -- pixel in for reference memory
|
230 |
|
|
dma_rm_we : in std_logic; --enable writing to reference memory
|
231 |
|
|
--dma_cm_we : in std_logic; --enable writing to current macroblock memory
|
232 |
|
|
dma_residue_out : out std_logic_vector(63 downto 0); -- get residue from winner mv
|
233 |
|
|
dma_re_re : in std_logic -- enable reading residue
|
234 |
|
|
);
|
235 |
|
|
end component;
|
236 |
|
|
|
237 |
|
|
|
238 |
|
|
component program_memory
|
239 |
|
|
port (
|
240 |
|
|
addr: in std_logic_vector(7 downto 0);
|
241 |
|
|
clk: in std_logic;
|
242 |
|
|
din: in std_logic_vector(19 downto 0);
|
243 |
|
|
dout: out std_logic_vector(19 downto 0);
|
244 |
|
|
we: in std_logic);
|
245 |
|
|
end component;
|
246 |
|
|
|
247 |
|
|
component reference_memory64_remap -- This memory stores the 5x7 reference data (1120 words of 64 bit)
|
248 |
|
|
port ( -- It also remaps the addresses
|
249 |
|
|
addr_r: in std_logic_vector(10 downto 0);
|
250 |
|
|
addr_w: in std_logic_vector(10 downto 0);
|
251 |
|
|
enable_hp_inter : in std_logic; -- working in interpolation mode
|
252 |
|
|
clk: in std_logic;
|
253 |
|
|
start : in std_logic;
|
254 |
|
|
next_configuration : in std_logic; -- move to the next configuration
|
255 |
|
|
start_row : in std_logic;
|
256 |
|
|
reset : in std_logic;
|
257 |
|
|
clear : in std_logic;
|
258 |
|
|
din: in std_logic_vector(63 downto 0);
|
259 |
|
|
dout: out std_logic_vector(63 downto 0);
|
260 |
|
|
dout2: out std_logic_vector(63 downto 0);
|
261 |
|
|
we: in std_logic);
|
262 |
|
|
end component;
|
263 |
|
|
|
264 |
|
|
component reference_memory64_remap_compact -- This memory stores the 5x5 reference data (800 words of 64 bit)
|
265 |
|
|
port ( -- It also remaps the addresses
|
266 |
|
|
addr: in std_logic_vector(9 downto 0);
|
267 |
|
|
enable_hp_inter : in std_logic; -- working in interpolation mode
|
268 |
|
|
clk: in std_logic;
|
269 |
|
|
next_configuration : in std_logic; -- move to the next configuration
|
270 |
|
|
start_row : in std_logic;
|
271 |
|
|
reset : in std_logic;
|
272 |
|
|
clear : in std_logic;
|
273 |
|
|
din: in std_logic_vector(63 downto 0);
|
274 |
|
|
dout: out std_logic_vector(63 downto 0);
|
275 |
|
|
dout2 : out std_logic_vector(63 downto 0); -- from the second read port
|
276 |
|
|
we: in std_logic);
|
277 |
|
|
end component;
|
278 |
|
|
|
279 |
|
|
component concatenate64_qp
|
280 |
|
|
port(
|
281 |
|
|
addr : in std_logic_vector(2 downto 0);
|
282 |
|
|
clk : in std_logic;
|
283 |
|
|
clear : in std_logic;
|
284 |
|
|
reset : in std_logic;
|
285 |
|
|
din : in std_logic_vector(63 downto 0);
|
286 |
|
|
din2 : in std_logic_vector(63 downto 0);
|
287 |
|
|
dout : out std_logic_vector(63 downto 0);
|
288 |
|
|
enable : in std_logic;
|
289 |
|
|
quick_valid : out std_logic; --as valid but one cycle earlier
|
290 |
|
|
valid : out std_logic); -- indicates when 64 valid bits are in the output
|
291 |
|
|
end component;
|
292 |
|
|
|
293 |
|
|
|
294 |
|
|
component concatenate64 -- this unit makes sure that 16 valid pixels are assemble depending on byte address
|
295 |
|
|
port(
|
296 |
|
|
addr : in std_logic_vector(2 downto 0);
|
297 |
|
|
clk : in std_logic;
|
298 |
|
|
clear : in std_logic;
|
299 |
|
|
reset : in std_logic;
|
300 |
|
|
din : in std_logic_vector(63 downto 0);
|
301 |
|
|
din2 : in std_logic_vector(63 downto 0);
|
302 |
|
|
dout : out std_logic_vector(63 downto 0);
|
303 |
|
|
enable_hp_inter : in std_logic; -- working in interpolation mode
|
304 |
|
|
enable : in std_logic;
|
305 |
|
|
quick_valid : out std_logic; --as valid but one cycle earlier
|
306 |
|
|
valid : out std_logic); -- indicates when 16 valid bytes are in the output
|
307 |
|
|
end component;
|
308 |
|
|
|
309 |
|
|
component qp_interpolate_engine
|
310 |
|
|
port(
|
311 |
|
|
clk : in std_logic;
|
312 |
|
|
clear : in std_logic;
|
313 |
|
|
reset : in std_logic;
|
314 |
|
|
qp_mode : in std_logic; -- in qp mode two lines must be written to interpolate
|
315 |
|
|
enable_hp_inter : in std_logic;
|
316 |
|
|
write_interpolate_register : in std_logic;
|
317 |
|
|
interpolate_in_pixels_a : in std_logic_vector(63 downto 0);
|
318 |
|
|
interpolate_in_pixels_b : in std_logic_vector(63 downto 0);
|
319 |
|
|
write_block1 : out std_logic; -- control which of the two blocks is being read and written (interpolate and dist engine)
|
320 |
|
|
rma_address : out std_logic_vector(4 downto 0); -- extracted reference pixels use this address
|
321 |
|
|
rma_we : out std_logic;
|
322 |
|
|
interpolate_out_pixels : out std_logic_vector(63 downto 0)
|
323 |
|
|
);
|
324 |
|
|
end component;
|
325 |
|
|
|
326 |
|
|
component forward_engine
|
327 |
|
|
port(
|
328 |
|
|
clk : in std_logic;
|
329 |
|
|
clear : in std_logic;
|
330 |
|
|
reset : in std_logic;
|
331 |
|
|
enable_hp_inter : in std_logic; -- when hp interpolation is being performed in the background
|
332 |
|
|
write_register : in std_logic;
|
333 |
|
|
mode_in : in mode_type;
|
334 |
|
|
partition_count_in : in std_logic_vector(3 downto 0);
|
335 |
|
|
in_pixels : in std_logic_vector(63 downto 0);
|
336 |
|
|
write_block1 : out std_logic; -- control which of the two blocks is being read and written (interpolate and dist engine)
|
337 |
|
|
rma_address : out std_logic_vector(4 downto 0); -- extracted reference pixels use this address
|
338 |
|
|
rma_we : out std_logic;
|
339 |
|
|
out_pixels : out std_logic_vector(63 downto 0)
|
340 |
|
|
);
|
341 |
|
|
end component;
|
342 |
|
|
|
343 |
|
|
-- half pel interpolation engine
|
344 |
|
|
|
345 |
|
|
component systolic_array_top
|
346 |
|
|
port (
|
347 |
|
|
clear : in std_logic;
|
348 |
|
|
reset : in std_logic;
|
349 |
|
|
enable_interpolation : in std_logic;
|
350 |
|
|
enable_estimation : in std_logic;
|
351 |
|
|
clk : in std_logic;
|
352 |
|
|
data_in : in std_logic_vector(63 downto 0);
|
353 |
|
|
data_in_valid : in std_logic;
|
354 |
|
|
data_request : out std_logic;
|
355 |
|
|
all_done : out std_logic;
|
356 |
|
|
next_point : out std_logic; -- tell the main control unit that the next point is required
|
357 |
|
|
shift_concatenate_valid : in std_logic; -- need to know when 64 bits are valid
|
358 |
|
|
-- memory interface for o,h,v and d memories
|
359 |
|
|
candidate_mvx : in std_logic_vector(7 downto 0);
|
360 |
|
|
candidate_mvy : in std_logic_vector(7 downto 0);
|
361 |
|
|
hp_address_a : out std_logic_vector(2 downto 0);
|
362 |
|
|
hp_address_b : out std_logic_vector(2 downto 0);
|
363 |
|
|
data_out_a : out std_logic_vector(63 downto 0); --interpolated data out
|
364 |
|
|
data_out2_a : out std_logic_vector(63 downto 0); --interpolated data out
|
365 |
|
|
data_out_b : out std_logic_vector(63 downto 0); --interpolated data out
|
366 |
|
|
data_out2_b : out std_logic_vector(63 downto 0); --interpolated data out
|
367 |
|
|
data_out_valid : out std_logic --signal to indicate 8 bytes of data valid
|
368 |
|
|
|
369 |
|
|
);
|
370 |
|
|
end component;
|
371 |
|
|
|
372 |
|
|
|
373 |
|
|
|
374 |
|
|
component current_macroblock_memory64 -- This memory stores the current macroblock(256 bytes))
|
375 |
|
|
port (
|
376 |
|
|
addr: in std_logic_vector(4 downto 0);
|
377 |
|
|
clk: in std_logic;
|
378 |
|
|
din: in std_logic_vector(63 downto 0);
|
379 |
|
|
dout: out std_logic_vector(63 downto 0);
|
380 |
|
|
we: in std_logic);
|
381 |
|
|
end component;
|
382 |
|
|
|
383 |
|
|
|
384 |
|
|
component point_memory
|
385 |
|
|
port (
|
386 |
|
|
addr: IN std_logic_VECTOR(7 downto 0);
|
387 |
|
|
clk: IN std_logic;
|
388 |
|
|
din: IN std_logic_VECTOR(15 downto 0);
|
389 |
|
|
dout: OUT std_logic_VECTOR(15 downto 0);
|
390 |
|
|
we: IN std_logic);
|
391 |
|
|
end component;
|
392 |
|
|
|
393 |
|
|
--component point_memory
|
394 |
|
|
-- port (
|
395 |
|
|
-- addr: in std_logic_vector(7 downto 0);
|
396 |
|
|
-- clk: in std_logic;
|
397 |
|
|
-- dout: out std_logic_vector(15 downto 0));
|
398 |
|
|
--end component;
|
399 |
|
|
|
400 |
|
|
|
401 |
|
|
component me_control_unit
|
402 |
|
|
generic ( integer_pipeline_count : integer);
|
403 |
|
|
port ( clk : in std_logic;
|
404 |
|
|
clear : in std_logic;
|
405 |
|
|
reset : in std_logic;
|
406 |
|
|
start : in std_logic;
|
407 |
|
|
range_ok : in std_logic; --keep track of the mv range
|
408 |
|
|
best_sad_in : in std_logic_vector(15 downto 0); -- to make SAD-based decisions
|
409 |
|
|
mv_length_in : in std_logic_vector(15 downto 0); -- to make LENGTH-based decisions
|
410 |
|
|
mode_in : in mode_type;
|
411 |
|
|
mvc_done : in std_logic; -- all motion vector candidates evaluated
|
412 |
|
|
mvc_to_do : in std_logic_vector(3 downto 0);
|
413 |
|
|
qp_on : in std_logic; -- qp on
|
414 |
|
|
partition_count_out : out std_logic_vector(3 downto 0); --identify the subpartition active
|
415 |
|
|
start_pipelines : out std_logic_vector(CFG_PIPELINE_COUNT-1 downto 0);
|
416 |
|
|
active_pipelines : out std_logic_vector(CFG_PIPELINE_COUNT-1 downto 0); -- so sad selector ignores the non active ones
|
417 |
|
|
shift_concatenate_valid : in std_logic; -- valid output from the concantenate unit (64 bit ready)
|
418 |
|
|
instruction_address : out std_logic_vector(7 downto 0); -- address to fetch next instruction
|
419 |
|
|
instruction_opcode : in std_logic_vector(3 downto 0); -- opcode
|
420 |
|
|
point_count : in std_logic_vector(7 downto 0); -- how many points to test
|
421 |
|
|
point_address : in std_logic_vector(7 downto 0); -- which is the first point to test
|
422 |
|
|
best_eu : in std_logic_vector(3 downto 0);
|
423 |
|
|
calculate_sad_done : in std_logic;
|
424 |
|
|
distance_engine_active : in std_logic;
|
425 |
|
|
interpolation_done : in std_logic; -- interpolation completes
|
426 |
|
|
interpolate_data_request : in std_logic; -- interpolator requests data
|
427 |
|
|
next_point : out std_logic_vector(7 downto 0); -- next point address to ROM
|
428 |
|
|
line_offset : out std_logic_vector(5 downto 0); -- multiple line reading
|
429 |
|
|
enable_concatenate_unit : out std_logic;
|
430 |
|
|
--enable_dist_engine : out std_logic;
|
431 |
|
|
write_register : out std_logic;
|
432 |
|
|
load_mv : out std_logic;
|
433 |
|
|
update : out std_logic;
|
434 |
|
|
instruction_zero : out std_logic;
|
435 |
|
|
all_done : out std_logic; -- program completes or qp mode started (fp finished)
|
436 |
|
|
partition_done : out std_logic;
|
437 |
|
|
qpel_loc_x : in std_logic_vector(1 downto 0); -- detect qp mode
|
438 |
|
|
qpel_loc_y : in std_logic_vector(1 downto 0);
|
439 |
|
|
start_qp : out std_logic;
|
440 |
|
|
enable_hp_inter : out std_logic; -- start the interpolation core
|
441 |
|
|
-- write_block1 : out std_logic;
|
442 |
|
|
-- next_rm_address_ready : in std_logic;
|
443 |
|
|
next_rm_addresss : in std_logic_vector(13 downto 0); --physical address for reference (macroblock upper left corner
|
444 |
|
|
rm_address : out std_logic_vector(13 downto 0) -- reference memory write from address
|
445 |
|
|
-- cm_address : out std_logic_vector(4 downto 0); -- address to extract 4x4 blocks from current macroblock
|
446 |
|
|
-- rma_address : out std_logic_vector(4 downto 0); -- reference macroblock write to address
|
447 |
|
|
-- rma_we : out std_logic
|
448 |
|
|
);
|
449 |
|
|
end component;
|
450 |
|
|
|
451 |
|
|
component me_control_unit_qp
|
452 |
|
|
port ( clk : in std_logic;
|
453 |
|
|
clear : in std_logic;
|
454 |
|
|
reset : in std_logic;
|
455 |
|
|
start : in std_logic; -- start qp refinement
|
456 |
|
|
next_point_inter : in std_logic; -- tell the main control unit that the next point is required by the interpolation unit
|
457 |
|
|
shift_concatenate_valid : in std_logic; -- valid output from the concantenate unit
|
458 |
|
|
qp_starting_address : in std_logic_vector(7 downto 0); -- start fetching qp instructions from this point
|
459 |
|
|
instruction_address : out std_logic_vector(7 downto 0); -- address to fetch next instruction
|
460 |
|
|
point_count : in std_logic_vector(7 downto 0); -- how many points to test
|
461 |
|
|
point_address : in std_logic_vector(7 downto 0); -- which is the first point to test
|
462 |
|
|
calculate_sad_done : in std_logic; -- signals when the distance engine has finished
|
463 |
|
|
instruction_opcode : in std_logic_vector(3 downto 0); -- opcode
|
464 |
|
|
best_eu : in std_logic_vector(3 downto 0); -- best execution unit
|
465 |
|
|
next_point : out std_logic_vector(7 downto 0); -- next point address to ROM
|
466 |
|
|
qp_mode : out std_logic; --enable qp estimation
|
467 |
|
|
qp_on : out std_logic; -- qp active
|
468 |
|
|
load_mv : out std_logic;
|
469 |
|
|
update : out std_logic;
|
470 |
|
|
all_done : out std_logic -- program completes
|
471 |
|
|
);
|
472 |
|
|
end component;
|
473 |
|
|
|
474 |
|
|
component range_checker --make sure that MVs are not out of range
|
475 |
|
|
port (
|
476 |
|
|
clk : in std_logic;
|
477 |
|
|
clear : in std_logic;
|
478 |
|
|
reset : in std_logic;
|
479 |
|
|
candidate_mvx : in std_logic_vector(7 downto 0);
|
480 |
|
|
candidate_mvy : in std_logic_vector(7 downto 0);
|
481 |
|
|
frame_dimension_x : in std_logic_vector(7 downto 0); --in mb
|
482 |
|
|
frame_dimension_y : in std_logic_vector(7 downto 0);
|
483 |
|
|
mbx_coordinate : in std_logic_vector(7 downto 0); --in mb
|
484 |
|
|
mby_coordinate : in std_logic_vector(7 downto 0);
|
485 |
|
|
range_ok : out std_logic
|
486 |
|
|
);
|
487 |
|
|
end component;
|
488 |
|
|
|
489 |
|
|
|
490 |
|
|
signal rest_next_point_fp : rest_type_points;
|
491 |
|
|
signal rest_point_memory_address : rest_type_points;
|
492 |
|
|
signal rest_next_point_displacement_fp : rest_type_displacement;
|
493 |
|
|
signal rest_start_pipeline,rest_calculate_sad_done : std_logic_vector(CFG_PIPELINE_COUNT-1 downto 0);
|
494 |
|
|
signal rest_best_sad_fp,rest_best_mv_fp,rest_first_mv_fp: rest_type_displacement;
|
495 |
|
|
|
496 |
|
|
signal rest_next_point_qp : rest_type_points_qp;
|
497 |
|
|
signal rest_point_memory_address_qp : rest_type_points_qp;
|
498 |
|
|
signal rest_next_point_displacement_qqp : rest_type_displacement_qp;
|
499 |
|
|
signal rest_start_pipeline_qp,rest_calculate_sad_done_qp : std_logic_vector(CFG_PIPELINE_COUNT_QP-1 downto 0);
|
500 |
|
|
signal rest_best_sad_qp,rest_best_mv_qp: rest_type_displacement_qp;
|
501 |
|
|
|
502 |
|
|
signal mvp_x,mvp_y,frame_dimension_x,frame_dimension_y,mby_coordinate,mbx_coordinate,program_memory_address,program_memory_address_qp,instruction_address_fp,instruction_address_qp,point_count_fp,point_count_qp,point_address_fp,point_address_qp,candidate_mvx_fp,candidate_mvy_fp,candidate_mvx_qp,candidate_mvy_qp : std_logic_vector(7 downto 0);
|
503 |
|
|
signal quant_parameter,line_offset : std_logic_vector(5 downto 0);
|
504 |
|
|
signal partition_count,instruction_fp,instruction_qp : std_logic_vector(3 downto 0); --op code
|
505 |
|
|
signal next_point_fp,point_memory_address,next_point_qp,point_memory_address_qp,candidate_mvx_int,candidate_mvy_int : std_logic_vector(7 downto 0);
|
506 |
|
|
signal one_bit,zero_bit,distance_engine_active_fp,range_ok,quick_valid,quick_valid_qp,next_point_inter,enable_concatenate_unit,dma_cm_we_m1,dma_cm_we_m2,dma_cm_we_fp,dma_cm_we_qp,done_interrupt_int,mv_cost_on : std_logic;
|
507 |
|
|
signal distance_engine_address_m2,distance_engine_address_m1,distance_engine_address_qp,cm_address_m1,cm_address_m2,address1_fp,address2_fp,address1_qp,address2_qp: std_logic_vector(4 downto 0);
|
508 |
|
|
signal zero,best_sad_out_qp,best_mv_out_qp,next_point_displacement_qp,next_point_displacement_fp,best_sad_fp,best_sad_qp,best_sad_qp_distance_engine,best_mv_fp,best_mv_qp,best_sad_out_fp,best_mv_out_fp,p_cost_mv,p_cost_mv_qp : std_logic_vector(15 downto 0);
|
509 |
|
|
signal point_count_position_qp,point_count_position_fp : std_logic_vector(19 downto 0);
|
510 |
|
|
signal mv_length_in,first_mv_fp,first_mv_qp,mv_displacement_fp,mv_displacement_qp : std_logic_vector(15 downto 0);
|
511 |
|
|
signal mvc_done,instruction_zero,qp_on,partition_done_fp,partition_done_qp,mux_control_write,mux_control_read,qp_mode,start_qp,qp_pixels_valid,hp_pixels_valid,hp_interpolation_done,data_request_hp_inter,all_done_fp,all_done_qp,start_row,reset_fifo_fp,reset_fifo_qp,reset_fifo1_qp,reset_fifo2_qp,reset_fifo1_fp,reset_fifo2_fp,fifo_enable_w1,fifo_enable_w2,fifo_enable_r1,fifo_enable_r2,enable_fifo_fp,enable_fifo_qp,winner1_fp,winner1_qp,start,write_register,write_interpolate_register,next_rm_address_ready,shift_concatenate_valid_qp,shift_concatenate_valid_fp,rma_we_qp,rma_we_fp,rma_we1_qp,rma_we2_qp,rma_we1_fp,rma_we2_fp,write_block1_qp,load_mv_fp,load_mv_qp,update_fp,update_qp,calculate_sad_done_qp,calculate_sad_done_fp,enable_hp_inter : std_logic;
|
512 |
|
|
signal rm_address_r,rm_address_w : std_logic_vector(10 downto 0);
|
513 |
|
|
signal rm_address_c : std_logic_vector(9 downto 0);
|
514 |
|
|
signal best_eu,best_eu_qp,mvc_to_do : std_logic_vector(3 downto 0);
|
515 |
|
|
signal next_rm_address,int_rm_address : std_logic_vector(13 downto 0);
|
516 |
|
|
signal current_pixels,current_pixels_fp,current_pixels_qp,current_pixels_m1,current_pixels_m2,reference_data_in1_fp,reference_data_in2_fp,reference_data_in1_qp,reference_data_in2_qp,reference_data_in_fp,reference_data_in_qp : std_logic_vector(63 downto 0);
|
517 |
|
|
signal reference_pixels_in,reference_pixels_in2,residue_out_fp,residue_out_qp,residue_out_1_2,residue_out_2_2,residue_out_1_1,residue_out_2_1 : std_logic_vector(63 downto 0);
|
518 |
|
|
signal out_pixels_fp,out_pixels_qp,qp_pixels_out_a,qp_pixels_out_b,hp_pixels_out_a,hp_pixels_out2_a,hp_pixels_out_b,hp_pixels_out2_b,reference_pixels_out_qp_a,reference_pixels_out_qp_b,reference_pixels_out_fp : std_logic_vector(63 downto 0); --for the interpolate unit
|
519 |
|
|
signal rma_address_qp,rma_address_fp : std_logic_vector(4 downto 0);
|
520 |
|
|
signal hp_address_a, hp_address_b : std_logic_vector(2 downto 0);
|
521 |
|
|
--signal sad : std_logic_vector(15 downto 0);
|
522 |
|
|
signal qpel_loc_x,qpel_loc_y : std_logic_vector(1 downto 0);
|
523 |
|
|
signal point_memory_data_in : std_logic_vector(15 downto 0);
|
524 |
|
|
signal program_memory_data_in : std_logic_vector(19 downto 0);
|
525 |
|
|
signal partition_mode : mode_type;
|
526 |
|
|
signal active_pipelines : std_logic_vector(CFG_PIPELINE_COUNT-1 downto 0);
|
527 |
|
|
signal active_pipelines_qp : std_logic_vector(CFG_PIPELINE_COUNT_QP-1 downto 0);
|
528 |
|
|
signal rest_mvx_c,rest_mvy_c : rest_type_points;
|
529 |
|
|
signal rest_p_cost_mv : rest_type_displacement;
|
530 |
|
|
|
531 |
|
|
begin
|
532 |
|
|
|
533 |
|
|
|
534 |
|
|
-- program memory for fp engine
|
535 |
|
|
program_memory_data_in <= dma_data_in(19 downto 0);
|
536 |
|
|
program_memory_address <= dma_address(7 downto 0) when dma_prm_we = '1' else instruction_address_fp;
|
537 |
|
|
qp_on_debug <= qp_on;
|
538 |
|
|
zero_bit <= '0';
|
539 |
|
|
one_bit <= '1';
|
540 |
|
|
|
541 |
|
|
mode_process : process(partition_mode)
|
542 |
|
|
|
543 |
|
|
begin
|
544 |
|
|
|
545 |
|
|
case partition_mode is
|
546 |
|
|
|
547 |
|
|
when m16x16 => partition_mode_debug <= "0000";
|
548 |
|
|
when m8x8 => partition_mode_debug <= "0001";
|
549 |
|
|
when others => partition_mode_debug <= "0000";
|
550 |
|
|
|
551 |
|
|
end case;
|
552 |
|
|
|
553 |
|
|
end process;
|
554 |
|
|
|
555 |
|
|
|
556 |
|
|
program_memory1 : program_memory
|
557 |
|
|
port map(
|
558 |
|
|
addr =>program_memory_address,
|
559 |
|
|
clk =>clk,
|
560 |
|
|
din =>program_memory_data_in,
|
561 |
|
|
dout => point_count_position_fp,
|
562 |
|
|
we => dma_prm_we
|
563 |
|
|
);
|
564 |
|
|
|
565 |
|
|
-- program memory for qp engine
|
566 |
|
|
|
567 |
|
|
program_memory2_qp : if CFG_PIPELINE_COUNT_QP = 1 generate
|
568 |
|
|
|
569 |
|
|
program_memory_address_qp <= dma_address(7 downto 0) when dma_prm_we = '1' else instruction_address_qp;
|
570 |
|
|
|
571 |
|
|
program_memory2 : program_memory
|
572 |
|
|
port map(
|
573 |
|
|
addr =>program_memory_address_qp,
|
574 |
|
|
clk =>clk,
|
575 |
|
|
din =>program_memory_data_in,
|
576 |
|
|
dout => point_count_position_qp,
|
577 |
|
|
we => dma_prm_we
|
578 |
|
|
);
|
579 |
|
|
|
580 |
|
|
end generate;
|
581 |
|
|
|
582 |
|
|
no_qpgen0 : if CFG_PIPELINE_COUNT_QP = 0 generate
|
583 |
|
|
|
584 |
|
|
point_count_position_qp <= (others => '0');
|
585 |
|
|
|
586 |
|
|
end generate;
|
587 |
|
|
|
588 |
|
|
range_checker1 : range_checker --make sure that MVs are not out of range
|
589 |
|
|
port map(
|
590 |
|
|
clk => clk,
|
591 |
|
|
clear => clear,
|
592 |
|
|
reset => reset,
|
593 |
|
|
candidate_mvx => candidate_mvx_fp,
|
594 |
|
|
candidate_mvy => candidate_mvy_fp,
|
595 |
|
|
frame_dimension_x =>frame_dimension_x,
|
596 |
|
|
frame_dimension_y =>frame_dimension_y,
|
597 |
|
|
mbx_coordinate => mbx_coordinate,
|
598 |
|
|
mby_coordinate => mby_coordinate,
|
599 |
|
|
range_ok => range_ok
|
600 |
|
|
);
|
601 |
|
|
|
602 |
|
|
phy_address1 : phy_address
|
603 |
|
|
port map(
|
604 |
|
|
clk => clk,
|
605 |
|
|
clear => clear,
|
606 |
|
|
reset => reset,
|
607 |
|
|
partition_count => partition_count, --identify the subpartition active
|
608 |
|
|
line_offset => line_offset,
|
609 |
|
|
mvx => candidate_mvx_int,
|
610 |
|
|
mvy => candidate_mvy_int,
|
611 |
|
|
phy_address => next_rm_address
|
612 |
|
|
);
|
613 |
|
|
|
614 |
|
|
instruction_fp <= point_count_position_fp(19 downto 16);
|
615 |
|
|
point_count_fp <= point_count_position_fp(15 downto 8);
|
616 |
|
|
point_address_fp <= point_count_position_fp(7 downto 0);
|
617 |
|
|
instruction_qp <= point_count_position_qp(19 downto 16);
|
618 |
|
|
point_count_qp <= point_count_position_qp(15 downto 8);
|
619 |
|
|
point_address_qp <= point_count_position_qp(7 downto 0);
|
620 |
|
|
candidate_mvx_fp <= first_mv_fp(15 downto 8)+mv_displacement_fp(15 downto 8);
|
621 |
|
|
candidate_mvy_fp <= first_mv_fp(7 downto 0)+mv_displacement_fp(7 downto 0);
|
622 |
|
|
|
623 |
|
|
-- check that MVX is in the reference area
|
624 |
|
|
in_range_x : process(candidate_mvx_fp)
|
625 |
|
|
begin
|
626 |
|
|
if candidate_mvx_fp > 47 then
|
627 |
|
|
candidate_mvx_int <= x"2f";
|
628 |
|
|
elsif candidate_mvx_fp < -48 then
|
629 |
|
|
candidate_mvx_int <= x"d0";
|
630 |
|
|
else
|
631 |
|
|
candidate_mvx_int <= candidate_mvx_fp;
|
632 |
|
|
end if;
|
633 |
|
|
end process;
|
634 |
|
|
|
635 |
|
|
-- check that MVY is in the reference area
|
636 |
|
|
in_range_y: process(candidate_mvy_fp)
|
637 |
|
|
begin
|
638 |
|
|
if candidate_mvy_fp > 31 then
|
639 |
|
|
candidate_mvy_int <= x"1f";
|
640 |
|
|
elsif candidate_mvy_fp < -32 then
|
641 |
|
|
candidate_mvy_int <= x"e0";
|
642 |
|
|
else
|
643 |
|
|
candidate_mvy_int <= candidate_mvy_fp;
|
644 |
|
|
end if;
|
645 |
|
|
end process;
|
646 |
|
|
|
647 |
|
|
|
648 |
|
|
--this has to change the first mv for qp should be the winner from fo
|
649 |
|
|
candidate_mvx_qp <= first_mv_qp(15 downto 8)+mv_displacement_qp(15 downto 8);
|
650 |
|
|
candidate_mvy_qp <= first_mv_qp(7 downto 0)+mv_displacement_qp(7 downto 0);
|
651 |
|
|
no_qpgen11 : if CFG_PIPELINE_COUNT_QP = 0 generate
|
652 |
|
|
qpel_loc_x <= (others => '0');
|
653 |
|
|
qpel_loc_y <= (others => '0');
|
654 |
|
|
end generate;
|
655 |
|
|
qpgen11 : if CFG_PIPELINE_COUNT_QP = 1 generate
|
656 |
|
|
qpel_loc_x <= candidate_mvx_fp(1 downto 0);
|
657 |
|
|
qpel_loc_y <= candidate_mvy_fp(1 downto 0);
|
658 |
|
|
end generate;
|
659 |
|
|
|
660 |
|
|
-- fp point memory
|
661 |
|
|
|
662 |
|
|
--point_memory_fp : point_memory
|
663 |
|
|
-- port map(
|
664 |
|
|
-- addr =>next_point_fp,
|
665 |
|
|
-- clk =>clk,
|
666 |
|
|
-- dout => next_point_displacement_fp
|
667 |
|
|
--);
|
668 |
|
|
|
669 |
|
|
point_memory_address <= dma_address(7 downto 0) when dma_pom_we = '1' else next_point_fp;
|
670 |
|
|
point_memory_data_in <= dma_data_in(15 downto 0);
|
671 |
|
|
|
672 |
|
|
point_memory_fp : point_memory
|
673 |
|
|
port map(
|
674 |
|
|
addr =>point_memory_address,
|
675 |
|
|
clk =>clk,
|
676 |
|
|
din =>point_memory_data_in,
|
677 |
|
|
dout =>next_point_displacement_fp,
|
678 |
|
|
we => dma_pom_we
|
679 |
|
|
);
|
680 |
|
|
|
681 |
|
|
|
682 |
|
|
--generate enough memories to hold the point memories for each aditional pipeline
|
683 |
|
|
|
684 |
|
|
generate_pipelines1 : for i in 1 to (CFG_PIPELINE_COUNT-1) generate
|
685 |
|
|
begin
|
686 |
|
|
rest_next_point_fp(i) <= next_point_fp when mvc_done = '0' else next_point_fp + i;
|
687 |
|
|
rest_point_memory_address(i) <= dma_address(7 downto 0) when dma_pom_we = '1' else rest_next_point_fp(i);
|
688 |
|
|
rest_point_memory_fp : point_memory
|
689 |
|
|
port map (
|
690 |
|
|
addr => rest_point_memory_address(i),
|
691 |
|
|
clk => clk,
|
692 |
|
|
din =>point_memory_data_in,
|
693 |
|
|
dout => rest_next_point_displacement_fp(i),
|
694 |
|
|
we => dma_pom_we
|
695 |
|
|
);
|
696 |
|
|
end generate;
|
697 |
|
|
|
698 |
|
|
|
699 |
|
|
--generate integer pipelines
|
700 |
|
|
|
701 |
|
|
generate_pipelines2 : for i in 1 to (CFG_PIPELINE_COUNT-1) generate
|
702 |
|
|
begin
|
703 |
|
|
|
704 |
|
|
fp_pipelines1 : fp_pipeline
|
705 |
|
|
port map( clk =>clk,
|
706 |
|
|
clear =>clear,
|
707 |
|
|
reset =>reset,
|
708 |
|
|
next_point_displacement_fp =>rest_next_point_displacement_fp(i), --next point to be processed
|
709 |
|
|
first_mv_fp =>rest_first_mv_fp(i), -- first point to start the search from
|
710 |
|
|
start =>rest_start_pipeline(i), -- enable the pipeline by main me
|
711 |
|
|
start_mb => start, -- once per macroblock
|
712 |
|
|
mode_in => partition_mode,
|
713 |
|
|
partition_count => partition_count, --identify the subpartition active
|
714 |
|
|
frame_dimension_x =>frame_dimension_x, --in mb
|
715 |
|
|
frame_dimension_y =>frame_dimension_y,
|
716 |
|
|
mbx_coordinate =>mbx_coordinate, --in mb
|
717 |
|
|
mby_coordinate =>mby_coordinate,
|
718 |
|
|
candidate_mvx =>rest_mvx_c(i), -- port for Lagrangian optimizaton
|
719 |
|
|
candidate_mvy =>rest_mvy_c(i),
|
720 |
|
|
mv_cost =>rest_p_cost_mv(i),
|
721 |
|
|
mv_cost_on => mv_cost_on, -- enable mv cost
|
722 |
|
|
all_done_fp => all_done_fp,
|
723 |
|
|
calculate_sad_done => rest_calculate_sad_done(i),
|
724 |
|
|
best_sad_fp =>rest_best_sad_fp(i),
|
725 |
|
|
best_mv_fp =>rest_best_mv_fp(i),
|
726 |
|
|
dma_address =>dma_address, -- next reference memory address
|
727 |
|
|
mb_data_in => current_pixels_fp, -- shared mb memory
|
728 |
|
|
dma_data_in =>dma_data_in, -- pixel in for reference memory
|
729 |
|
|
dma_rm_we =>dma_rm_we, --enable writing to reference memory
|
730 |
|
|
--dma_cm_we =>dma_cm_we, --enable writing to current macroblock memory
|
731 |
|
|
dma_residue_out =>open, -- get residue from winner mv
|
732 |
|
|
dma_re_re => dma_re_re-- enable reading residue
|
733 |
|
|
);
|
734 |
|
|
end generate;
|
735 |
|
|
|
736 |
|
|
|
737 |
|
|
|
738 |
|
|
point_memory_qp_qp : if CFG_PIPELINE_COUNT_QP = 1 generate
|
739 |
|
|
-- qp point memory
|
740 |
|
|
|
741 |
|
|
point_memory_qp : point_memory
|
742 |
|
|
port map(
|
743 |
|
|
addr =>point_memory_address_qp,
|
744 |
|
|
din =>point_memory_data_in,
|
745 |
|
|
clk =>clk,
|
746 |
|
|
dout => next_point_displacement_qp,
|
747 |
|
|
we => dma_pom_we
|
748 |
|
|
);
|
749 |
|
|
|
750 |
|
|
point_memory_address_qp <= dma_address(7 downto 0) when dma_pom_we = '1' else next_point_qp;
|
751 |
|
|
|
752 |
|
|
|
753 |
|
|
|
754 |
|
|
end generate;
|
755 |
|
|
|
756 |
|
|
no_qpgen1 : if CFG_PIPELINE_COUNT_QP= 0 generate
|
757 |
|
|
|
758 |
|
|
next_point_displacement_qp <= (others => '0');
|
759 |
|
|
|
760 |
|
|
end generate;
|
761 |
|
|
|
762 |
|
|
-- displace the mv by 2 pixels to define the interpolation area when interpolation active
|
763 |
|
|
|
764 |
|
|
mv_displacement_fp <= next_point_displacement_fp when enable_hp_inter = '0' else x"14FC"; --(20,-4)
|
765 |
|
|
mv_displacement_qp <= next_point_displacement_qp;
|
766 |
|
|
done_interrupt <= done_interrupt_int;
|
767 |
|
|
|
768 |
|
|
register_file1 : register_file
|
769 |
|
|
generic map(integer_pipeline_count => (CFG_PIPELINE_COUNT))
|
770 |
|
|
port map(
|
771 |
|
|
clk => clk,
|
772 |
|
|
clear => clear,
|
773 |
|
|
reset => reset,
|
774 |
|
|
addr => register_file_address,
|
775 |
|
|
write => register_file_write,
|
776 |
|
|
data_in => register_file_data_in,
|
777 |
|
|
data_out => register_file_data_out,
|
778 |
|
|
start => start,
|
779 |
|
|
mode_out => partition_mode,
|
780 |
|
|
mv_cost_on => mv_cost_on, -- activate the costing of mvs
|
781 |
|
|
all_done_fp => all_done_fp,
|
782 |
|
|
all_done_qp => all_done_qp,
|
783 |
|
|
mvc_to_do => mvc_to_do,
|
784 |
|
|
mvc_done => mvc_done, -- all motion vector candidates evaluated
|
785 |
|
|
instruction_zero => instruction_zero,
|
786 |
|
|
partition_done_fp => partition_done_fp,
|
787 |
|
|
partition_done_qp => partition_done_qp,
|
788 |
|
|
done_interrupt => done_interrupt_int,
|
789 |
|
|
start_row => start_row,
|
790 |
|
|
load_mv => load_mv_fp, -- force the mvc to move foward
|
791 |
|
|
update_fp => update_fp,
|
792 |
|
|
best_sad_fp => best_sad_out_fp,
|
793 |
|
|
best_mv_fp => best_mv_out_fp,
|
794 |
|
|
first_mv_fp => first_mv_fp,
|
795 |
|
|
rest_first_mv_fp => rest_first_mv_fp,
|
796 |
|
|
mbx_coordinate =>mbx_coordinate,
|
797 |
|
|
mby_coordinate =>mby_coordinate,
|
798 |
|
|
mvp_x => mvp_x,
|
799 |
|
|
mvp_y => mvp_y,
|
800 |
|
|
quant_parameter => quant_parameter,
|
801 |
|
|
frame_dimension_x =>frame_dimension_x,
|
802 |
|
|
frame_dimension_y =>frame_dimension_y,
|
803 |
|
|
partition_count => partition_count,
|
804 |
|
|
update_qp => update_qp,
|
805 |
|
|
best_sad_qp => best_sad_out_qp,
|
806 |
|
|
best_mv_qp => best_mv_out_qp,
|
807 |
|
|
first_mv_qp => first_mv_qp
|
808 |
|
|
|
809 |
|
|
);
|
810 |
|
|
|
811 |
|
|
|
812 |
|
|
compact_memory0 : if CFG_CM = 0 generate
|
813 |
|
|
|
814 |
|
|
reference_memory_large : reference_memory64_remap --This memory stores the 7x5
|
815 |
|
|
port map(
|
816 |
|
|
addr_r => rm_address_r,
|
817 |
|
|
addr_w => rm_address_w,
|
818 |
|
|
enable_hp_inter => enable_hp_inter, -- working in interpolation mode
|
819 |
|
|
clk => clk,
|
820 |
|
|
next_configuration => start, -- use the start signal to move between configurations all_done_fp, -- move to the next configuration when programs completes
|
821 |
|
|
start => start,
|
822 |
|
|
start_row => start_row,
|
823 |
|
|
reset => reset,
|
824 |
|
|
clear => clear,
|
825 |
|
|
din =>dma_data_in,
|
826 |
|
|
dout =>reference_pixels_in,
|
827 |
|
|
dout2 => reference_pixels_in2,
|
828 |
|
|
we => dma_rm_we
|
829 |
|
|
);
|
830 |
|
|
|
831 |
|
|
end generate;
|
832 |
|
|
|
833 |
|
|
compact_memory1 : if CFG_CM = 1 generate
|
834 |
|
|
|
835 |
|
|
rm_address_c <= rm_address_w(9 downto 0) when dma_rm_we = '1' else rm_address_r(9 downto 0);
|
836 |
|
|
|
837 |
|
|
reference_memory_compact : reference_memory64_remap_compact -- This memory stores the 5x5 reference data (800 words of 64 bit)
|
838 |
|
|
port map( -- It also remaps the addresses
|
839 |
|
|
addr => rm_address_c,
|
840 |
|
|
enable_hp_inter => enable_hp_inter, -- working in interpolation mode
|
841 |
|
|
clk => clk,
|
842 |
|
|
next_configuration => start, -- move to the next configuration
|
843 |
|
|
start_row => start_row,
|
844 |
|
|
reset => reset,
|
845 |
|
|
clear => clear,
|
846 |
|
|
din => dma_data_in,
|
847 |
|
|
dout => reference_pixels_in,
|
848 |
|
|
dout2 => reference_pixels_in2, -- from the second read port
|
849 |
|
|
we => dma_rm_we
|
850 |
|
|
);
|
851 |
|
|
|
852 |
|
|
end generate;
|
853 |
|
|
|
854 |
|
|
|
855 |
|
|
--when qp mode it is the systolic array which decides when data is valid
|
856 |
|
|
|
857 |
|
|
concatenate_qp_qp : if CFG_PIPELINE_COUNT_QP = 1 generate
|
858 |
|
|
|
859 |
|
|
concatenate_qp_a : concatenate64_qp -- this unit makes sure that 8 valid pixels are assemble depending on byte address
|
860 |
|
|
port map(
|
861 |
|
|
addr => hp_address_a,
|
862 |
|
|
clk => clk,
|
863 |
|
|
clear => clear,
|
864 |
|
|
reset => reset,
|
865 |
|
|
din => hp_pixels_out_a,
|
866 |
|
|
din2 => hp_pixels_out2_a,
|
867 |
|
|
dout => reference_pixels_out_qp_a,
|
868 |
|
|
enable => hp_pixels_valid,
|
869 |
|
|
quick_valid => quick_valid_qp, --as valid but one cycle earlier
|
870 |
|
|
valid => shift_concatenate_valid_qp -- indicates when 64 valid bits are in the output
|
871 |
|
|
);
|
872 |
|
|
|
873 |
|
|
|
874 |
|
|
concatenate_qp_b : concatenate64_qp -- this unit makes sure that 8 valid pixels are assemble depending on byte address
|
875 |
|
|
port map(
|
876 |
|
|
addr => hp_address_b,
|
877 |
|
|
clk => clk,
|
878 |
|
|
clear => clear,
|
879 |
|
|
reset => reset,
|
880 |
|
|
din => hp_pixels_out_b,
|
881 |
|
|
din2 => hp_pixels_out2_b,
|
882 |
|
|
dout => reference_pixels_out_qp_b,
|
883 |
|
|
enable => hp_pixels_valid,
|
884 |
|
|
quick_valid => open, --as valid but one cycle earlier
|
885 |
|
|
valid => open -- indicates when 64 valid bits are in the output
|
886 |
|
|
);
|
887 |
|
|
|
888 |
|
|
|
889 |
|
|
end generate;
|
890 |
|
|
|
891 |
|
|
no_qpgen2 : if CFG_PIPELINE_COUNT_QP = 0 generate
|
892 |
|
|
|
893 |
|
|
reference_pixels_out_qp_a <= (others => '0');
|
894 |
|
|
reference_pixels_out_qp_b <= (others => '0');
|
895 |
|
|
shift_concatenate_valid_qp <= '0';
|
896 |
|
|
|
897 |
|
|
end generate;
|
898 |
|
|
|
899 |
|
|
-- when no qp mode different concatenate unit
|
900 |
|
|
|
901 |
|
|
concatenate_fp : concatenate64 -- this unit makes sure that 8 valid pixels are assemble depending on byte address
|
902 |
|
|
port map(
|
903 |
|
|
addr => int_rm_address(2 downto 0),
|
904 |
|
|
clk => clk,
|
905 |
|
|
clear => clear,
|
906 |
|
|
reset => reset,
|
907 |
|
|
din => reference_pixels_in,
|
908 |
|
|
din2 => reference_pixels_in2,
|
909 |
|
|
dout => reference_pixels_out_fp,
|
910 |
|
|
enable => enable_concatenate_unit,
|
911 |
|
|
enable_hp_inter => enable_hp_inter, -- working in interpolation mode
|
912 |
|
|
quick_valid => quick_valid, --as valid but one cycle earlier
|
913 |
|
|
valid => shift_concatenate_valid_fp -- indicates when 64 valid bits are in the output
|
914 |
|
|
);
|
915 |
|
|
|
916 |
|
|
-- half pel interpolation engine
|
917 |
|
|
|
918 |
|
|
interpolate2_qp : if CFG_PIPELINE_COUNT_QP = 1 generate
|
919 |
|
|
|
920 |
|
|
interpolate2 : systolic_array_top
|
921 |
|
|
port map(
|
922 |
|
|
clear =>clear,
|
923 |
|
|
reset => reset,
|
924 |
|
|
enable_interpolation =>enable_hp_inter,
|
925 |
|
|
enable_estimation =>qp_mode,
|
926 |
|
|
clk =>clk,
|
927 |
|
|
data_in =>reference_pixels_out_fp,
|
928 |
|
|
data_in_valid =>shift_concatenate_valid_fp,
|
929 |
|
|
data_request =>data_request_hp_inter,
|
930 |
|
|
all_done =>hp_interpolation_done,
|
931 |
|
|
next_point => next_point_inter, -- tell the main control unit that the next point is required
|
932 |
|
|
shift_concatenate_valid =>shift_concatenate_valid_qp, -- need to know when 64 bits are valid
|
933 |
|
|
-- memory interface for o,h,v and d memories
|
934 |
|
|
candidate_mvx => candidate_mvx_qp,
|
935 |
|
|
candidate_mvy => candidate_mvy_qp,
|
936 |
|
|
hp_address_a => hp_address_a,
|
937 |
|
|
hp_address_b => hp_address_b,
|
938 |
|
|
data_out_a => hp_pixels_out_a, --interpolated data out port a
|
939 |
|
|
data_out2_a => hp_pixels_out2_a, --interpolated data out
|
940 |
|
|
data_out_b => hp_pixels_out_b, --interpolated data out port b
|
941 |
|
|
data_out2_b => hp_pixels_out2_b, --interpolated data out
|
942 |
|
|
data_out_valid => hp_pixels_valid --signal to indicate 8 bytes of data valid
|
943 |
|
|
|
944 |
|
|
);
|
945 |
|
|
|
946 |
|
|
end generate;
|
947 |
|
|
|
948 |
|
|
no_qpgen3 : if CFG_PIPELINE_COUNT_QP = 0 generate
|
949 |
|
|
|
950 |
|
|
data_request_hp_inter <= '0';
|
951 |
|
|
hp_interpolation_done <= '0';
|
952 |
|
|
next_point_inter <= '0';
|
953 |
|
|
hp_address_a <= (others => '0');
|
954 |
|
|
hp_address_b <= (others => '0');
|
955 |
|
|
hp_pixels_out_a <= (others => '0'); --interpolated data out
|
956 |
|
|
hp_pixels_out_b <= (others => '0');
|
957 |
|
|
hp_pixels_valid <= '0';
|
958 |
|
|
|
959 |
|
|
end generate;
|
960 |
|
|
|
961 |
|
|
-- data for qp engine always comes from the concatenate unit
|
962 |
|
|
qp_pixels_valid <= shift_concatenate_valid_qp;
|
963 |
|
|
qp_pixels_out_a <= reference_pixels_out_qp_a;
|
964 |
|
|
qp_pixels_out_b <= reference_pixels_out_qp_b;
|
965 |
|
|
|
966 |
|
|
forward1 : forward_engine
|
967 |
|
|
port map(
|
968 |
|
|
clk =>clk,
|
969 |
|
|
clear =>clear,
|
970 |
|
|
reset =>reset,
|
971 |
|
|
mode_in => partition_mode,
|
972 |
|
|
partition_count_in => partition_count,
|
973 |
|
|
enable_hp_inter =>enable_hp_inter, -- when hp interpolation is being performed in the background
|
974 |
|
|
write_register =>shift_concatenate_valid_fp,
|
975 |
|
|
in_pixels =>reference_pixels_out_fp,
|
976 |
|
|
write_block1 =>open, -- control which of the two blocks is being read and written (interpolate and dist engine)
|
977 |
|
|
rma_address =>rma_address_fp, -- extracted reference pixels use this address
|
978 |
|
|
rma_we => rma_we_fp,
|
979 |
|
|
out_pixels => out_pixels_fp
|
980 |
|
|
);
|
981 |
|
|
|
982 |
|
|
interpolate1_qp : if CFG_PIPELINE_COUNT_QP = 1 generate
|
983 |
|
|
|
984 |
|
|
interpolate1 : qp_interpolate_engine
|
985 |
|
|
port map(
|
986 |
|
|
clk=>clk,
|
987 |
|
|
clear=>clear,
|
988 |
|
|
reset=>reset,
|
989 |
|
|
qp_mode => qp_mode, -- in qp mode two lines must be written to interpolate
|
990 |
|
|
enable_hp_inter => enable_hp_inter,
|
991 |
|
|
write_interpolate_register=> qp_pixels_valid,
|
992 |
|
|
interpolate_in_pixels_a => qp_pixels_out_a,
|
993 |
|
|
interpolate_in_pixels_b => qp_pixels_out_b,
|
994 |
|
|
write_block1 => open, -- control which of the two blocks is being read and written (interpolate and dist engine)
|
995 |
|
|
rma_address => rma_address_qp, -- extracted reference pixels use this address
|
996 |
|
|
rma_we => rma_we_qp,
|
997 |
|
|
interpolate_out_pixels => out_pixels_qp
|
998 |
|
|
);
|
999 |
|
|
|
1000 |
|
|
end generate;
|
1001 |
|
|
|
1002 |
|
|
no_qpgen4 : if CFG_PIPELINE_COUNT_QP = 0 generate
|
1003 |
|
|
|
1004 |
|
|
write_block1_qp <= '0'; -- control which of the two blocks is being read and written (interpolate and dist engine)
|
1005 |
|
|
rma_address_qp <= (others => '0'); -- extracted reference pixels use this address
|
1006 |
|
|
rma_we_qp <= '0';
|
1007 |
|
|
out_pixels_qp <= (others => '0');
|
1008 |
|
|
|
1009 |
|
|
end generate;
|
1010 |
|
|
|
1011 |
|
|
reference_data_in_fp <= out_pixels_fp;
|
1012 |
|
|
rm_address_r <= int_rm_address(13 downto 3) when (dma_rm_re_debug = '0') else dma_address;
|
1013 |
|
|
rm_address_w <= dma_address;
|
1014 |
|
|
dma_rm_debug <= reference_pixels_in;
|
1015 |
|
|
|
1016 |
|
|
|
1017 |
|
|
reference_data_in_qp <= out_pixels_qp;
|
1018 |
|
|
|
1019 |
|
|
--These two memories will alternate if they are fp or qp
|
1020 |
|
|
|
1021 |
|
|
dma_cm_we_m2 <= dma_cm_we when mux_control_write = '1' else '0';
|
1022 |
|
|
dma_cm_we_m1 <= dma_cm_we when mux_control_write = '0' else '0';
|
1023 |
|
|
|
1024 |
|
|
current_pixels_fp <= current_pixels_m1 when mux_control_read = '0' else current_pixels_m2;
|
1025 |
|
|
current_pixels_qp <= current_pixels_m1 when mux_control_read = '1' else current_pixels_m2;
|
1026 |
|
|
distance_engine_address_m1 <= rma_address_fp when mux_control_read = '0' else rma_address_qp;
|
1027 |
|
|
distance_engine_address_m2 <= rma_address_fp when mux_control_read = '1' else rma_address_qp;
|
1028 |
|
|
|
1029 |
|
|
--two so you can write one while you read the other. Dual port is not enough since you cannot destroy the contents
|
1030 |
|
|
|
1031 |
|
|
current_macroblock_memory1 : current_macroblock_memory64 -- This memory stores the current macroblock(256 bytes (32 words x 64 bits))
|
1032 |
|
|
port map(
|
1033 |
|
|
addr => cm_address_m1,
|
1034 |
|
|
clk => clk,
|
1035 |
|
|
din => dma_data_in,
|
1036 |
|
|
dout => current_pixels_m1,
|
1037 |
|
|
we => dma_cm_we_m1
|
1038 |
|
|
);
|
1039 |
|
|
|
1040 |
|
|
current_macroblock_memory2 : current_macroblock_memory64 -- This memory stores the current macroblock(256 bytes (32 words x 64 bits))
|
1041 |
|
|
port map(
|
1042 |
|
|
addr => cm_address_m2,
|
1043 |
|
|
clk => clk,
|
1044 |
|
|
din => dma_data_in,
|
1045 |
|
|
dout => current_pixels_m2,
|
1046 |
|
|
we => dma_cm_we_m2
|
1047 |
|
|
);
|
1048 |
|
|
|
1049 |
|
|
|
1050 |
|
|
cm_address_m1 <= distance_engine_address_m1 when dma_cm_we_m1 = '0' else dma_address(4 downto 0);
|
1051 |
|
|
cm_address_m2 <= distance_engine_address_m2 when dma_cm_we_m2 = '0' else dma_address(4 downto 0);
|
1052 |
|
|
|
1053 |
|
|
me_control_unit_qp1_qp : if CFG_PIPELINE_COUNT_QP = 1 generate
|
1054 |
|
|
|
1055 |
|
|
me_control_unit_qp1 : me_control_unit_qp
|
1056 |
|
|
port map( clk =>clk,
|
1057 |
|
|
clear =>clear,
|
1058 |
|
|
reset =>reset,
|
1059 |
|
|
start =>start_qp,
|
1060 |
|
|
next_point_inter => next_point_inter, -- next point address to ROM
|
1061 |
|
|
shift_concatenate_valid =>quick_valid_qp,
|
1062 |
|
|
qp_starting_address =>instruction_address_fp,
|
1063 |
|
|
instruction_address =>instruction_address_qp,
|
1064 |
|
|
point_count =>point_count_qp,
|
1065 |
|
|
point_address =>point_address_qp,
|
1066 |
|
|
calculate_sad_done =>calculate_sad_done_qp,
|
1067 |
|
|
instruction_opcode => instruction_qp,
|
1068 |
|
|
best_eu => best_eu_qp,
|
1069 |
|
|
next_point =>next_point_qp,
|
1070 |
|
|
qp_mode =>qp_mode, --enable qp estimation
|
1071 |
|
|
qp_on => qp_on, -- qp active
|
1072 |
|
|
load_mv => load_mv_qp,
|
1073 |
|
|
update =>update_qp,
|
1074 |
|
|
all_done =>all_done_qp -- program completes
|
1075 |
|
|
);
|
1076 |
|
|
|
1077 |
|
|
end generate;
|
1078 |
|
|
|
1079 |
|
|
no_qpgen7 : if CFG_PIPELINE_COUNT_QP = 0 generate
|
1080 |
|
|
|
1081 |
|
|
instruction_address_qp <= (others => '0'); -- address to fetch next instruction
|
1082 |
|
|
next_point_qp <= (others => '0');
|
1083 |
|
|
qp_mode <= '0'; --enable qp estimation
|
1084 |
|
|
load_mv_qp <= '0';
|
1085 |
|
|
update_qp <= '0';
|
1086 |
|
|
all_done_qp <= '0'; -- program completes
|
1087 |
|
|
qp_on <= '0';
|
1088 |
|
|
|
1089 |
|
|
end generate;
|
1090 |
|
|
|
1091 |
|
|
best_eu_debug <= best_eu;
|
1092 |
|
|
|
1093 |
|
|
me_control_unit1 : me_control_unit
|
1094 |
|
|
generic map(
|
1095 |
|
|
integer_pipeline_count => (CFG_PIPELINE_COUNT)
|
1096 |
|
|
)
|
1097 |
|
|
port map( clk =>clk,
|
1098 |
|
|
clear =>clear,
|
1099 |
|
|
reset =>reset,
|
1100 |
|
|
start => start,
|
1101 |
|
|
range_ok => range_ok,
|
1102 |
|
|
mode_in => partition_mode,
|
1103 |
|
|
best_sad_in => best_sad_out_fp, -- to make SAD-based decisions
|
1104 |
|
|
mv_length_in => mv_length_in, -- to make LENGTH-based decisions
|
1105 |
|
|
qp_on => qp_on, -- qp on
|
1106 |
|
|
mvc_done => mvc_done, -- all motion vector candidates evaluated
|
1107 |
|
|
mvc_to_do => mvc_to_do,
|
1108 |
|
|
partition_count_out => partition_count, --identify the subpartition active
|
1109 |
|
|
start_pipelines => rest_start_pipeline,
|
1110 |
|
|
active_pipelines => active_pipelines,
|
1111 |
|
|
shift_concatenate_valid => quick_valid, -- valid output from the concantenate unit (64 bit ready)
|
1112 |
|
|
instruction_address => instruction_address_fp, -- address to fetch next instruction
|
1113 |
|
|
instruction_opcode => instruction_fp, -- the opcode
|
1114 |
|
|
point_count => point_count_fp, -- how many points to test
|
1115 |
|
|
point_address => point_address_fp, -- which is the first point to test
|
1116 |
|
|
calculate_sad_done => calculate_sad_done_fp,
|
1117 |
|
|
distance_engine_active => distance_engine_active_fp,
|
1118 |
|
|
interpolation_done => hp_interpolation_done,-- interpolation completes
|
1119 |
|
|
interpolate_data_request => data_request_hp_inter,-- interpolator requests data
|
1120 |
|
|
line_offset => line_offset, -- read the different lines of the reference macroblock
|
1121 |
|
|
enable_concatenate_unit => enable_concatenate_unit,
|
1122 |
|
|
-- enable_dist_engine => enable_dist_engine,
|
1123 |
|
|
write_register => write_register,
|
1124 |
|
|
load_mv => load_mv_fp,
|
1125 |
|
|
best_eu => best_eu,
|
1126 |
|
|
update => update_fp,
|
1127 |
|
|
instruction_zero => instruction_zero,
|
1128 |
|
|
all_done => all_done_fp,
|
1129 |
|
|
partition_done => partition_done_fp,
|
1130 |
|
|
qpel_loc_x =>qpel_loc_x, -- detect qp mode
|
1131 |
|
|
qpel_loc_y =>qpel_loc_y,
|
1132 |
|
|
next_point => next_point_fp,
|
1133 |
|
|
start_qp => start_qp,
|
1134 |
|
|
enable_hp_inter =>enable_hp_inter,
|
1135 |
|
|
-- write_block1 => write_block1, -- control which of the two blocks is being read and written (interpolate and dist engine)
|
1136 |
|
|
-- next_rm_address_ready => next_rm_address_ready,
|
1137 |
|
|
next_rm_addresss => next_rm_address, --physical address for reference (macroblock upper left corner
|
1138 |
|
|
rm_address => int_rm_address -- internal reference memory addresses
|
1139 |
|
|
-- rma_address => rma_address, -- extracted reference pixels use this address
|
1140 |
|
|
-- rma_we => rma_we
|
1141 |
|
|
);
|
1142 |
|
|
|
1143 |
|
|
|
1144 |
|
|
-- calculate the length of the motion vector
|
1145 |
|
|
mv_length_in <= (best_mv_out_fp(15 downto 8) - mvp_x) & (best_mv_out_fp(7 downto 0) - mvp_y);
|
1146 |
|
|
|
1147 |
|
|
|
1148 |
|
|
|
1149 |
|
|
gen_mv_cost_qp : if (CFG_MV_COST = 1 and CFG_PIPELINE_COUNT_QP = 1) generate
|
1150 |
|
|
-- mv cost unit
|
1151 |
|
|
mv_cost_qp : mv_cost
|
1152 |
|
|
generic map(pipelines => CFG_PIPELINE_COUNT_QP)
|
1153 |
|
|
port map(
|
1154 |
|
|
clk => clk,
|
1155 |
|
|
clear => clear,
|
1156 |
|
|
reset => reset,
|
1157 |
|
|
load => load_mv_qp, -- start calculation of mv costs for qp
|
1158 |
|
|
mvp_x => mvp_x, -- predicted mv x
|
1159 |
|
|
mvp_y => mvp_y, -- predicted mv y
|
1160 |
|
|
mvx_c =>candidate_mvx_qp, -- motion vector candidate x
|
1161 |
|
|
mvy_c =>candidate_mvy_qp, -- motion vector candidate y
|
1162 |
|
|
rest_mvx_c =>rest_mvx_c, -- rest of motion vector candidates x
|
1163 |
|
|
rest_mvy_c =>rest_mvy_c, -- rest of motion vector candidates y
|
1164 |
|
|
quant_parameter => quant_parameter,
|
1165 |
|
|
p_cost_mv => p_cost_mv_qp,
|
1166 |
|
|
rest_p_cost_mv => open
|
1167 |
|
|
);
|
1168 |
|
|
|
1169 |
|
|
end generate;
|
1170 |
|
|
|
1171 |
|
|
no_gen_mv_cost_qp : if (CFG_MV_COST = 0 or CFG_PIPELINE_COUNT_QP = 0) generate
|
1172 |
|
|
p_cost_mv_qp <= (others => '0');
|
1173 |
|
|
end generate;
|
1174 |
|
|
|
1175 |
|
|
gen_mv_cost_fp : if CFG_MV_COST = 1 generate
|
1176 |
|
|
-- mv cost unit
|
1177 |
|
|
mv_cost_fp : mv_cost
|
1178 |
|
|
generic map(pipelines => CFG_PIPELINE_COUNT)
|
1179 |
|
|
port map(
|
1180 |
|
|
clk => clk,
|
1181 |
|
|
clear => clear,
|
1182 |
|
|
reset => reset,
|
1183 |
|
|
load => rest_start_pipeline(0), -- start calculation of mv costs for fp
|
1184 |
|
|
mvp_x => mvp_x, -- predicted mv x
|
1185 |
|
|
mvp_y => mvp_y, -- predicted mv y
|
1186 |
|
|
mvx_c =>candidate_mvx_int, -- motion vector candidate x
|
1187 |
|
|
mvy_c =>candidate_mvy_int, -- motion vector candidate y
|
1188 |
|
|
rest_mvx_c =>rest_mvx_c, -- rest of motion vector candidates x
|
1189 |
|
|
rest_mvy_c =>rest_mvy_c, -- rest of motion vector candidates y
|
1190 |
|
|
quant_parameter => quant_parameter,
|
1191 |
|
|
p_cost_mv => p_cost_mv,
|
1192 |
|
|
rest_p_cost_mv => rest_p_cost_mv
|
1193 |
|
|
);
|
1194 |
|
|
|
1195 |
|
|
end generate;
|
1196 |
|
|
|
1197 |
|
|
|
1198 |
|
|
no_gen_mv_cost_fp : if CFG_MV_COST = 0 generate
|
1199 |
|
|
p_cost_mv <= (others => '0');
|
1200 |
|
|
end generate;
|
1201 |
|
|
|
1202 |
|
|
-- fp distance engine
|
1203 |
|
|
|
1204 |
|
|
distance_engine_fp : distance_engine64
|
1205 |
|
|
generic map (qp_mode => '0')
|
1206 |
|
|
port map(
|
1207 |
|
|
clk =>clk,
|
1208 |
|
|
clear =>clear,
|
1209 |
|
|
reset =>reset,
|
1210 |
|
|
enable => rma_we_fp, -- calculate when new data available
|
1211 |
|
|
update => update_fp, -- instruction completes set the best sad register to FFFF
|
1212 |
|
|
load_mv => load_mv_fp,
|
1213 |
|
|
mode_in => partition_mode,
|
1214 |
|
|
mv_cost_on => mv_cost_on,
|
1215 |
|
|
mv_cost_in => p_cost_mv,
|
1216 |
|
|
candidate_mvx => candidate_mvx_fp,
|
1217 |
|
|
candidate_mvy => candidate_mvy_fp,
|
1218 |
|
|
reference_data_in => reference_data_in_fp,
|
1219 |
|
|
current_data_in => current_pixels_fp,
|
1220 |
|
|
residue_out => residue_out_fp,
|
1221 |
|
|
enable_fifo => enable_fifo_fp,
|
1222 |
|
|
reset_fifo => reset_fifo_fp,
|
1223 |
|
|
winner1 => winner1_fp,
|
1224 |
|
|
calculate_sad_done => calculate_sad_done_fp,
|
1225 |
|
|
distance_engine_active => distance_engine_active_fp,
|
1226 |
|
|
best_sad => best_sad_fp,
|
1227 |
|
|
best_mv => best_mv_fp
|
1228 |
|
|
|
1229 |
|
|
);
|
1230 |
|
|
|
1231 |
|
|
-- mv/sad selector
|
1232 |
|
|
|
1233 |
|
|
sad_selector_fp : sad_selector
|
1234 |
|
|
generic map(integer_pipeline_count => CFG_PIPELINE_COUNT)
|
1235 |
|
|
port map(
|
1236 |
|
|
clk =>clk,
|
1237 |
|
|
reset =>reset,
|
1238 |
|
|
clear =>clear,
|
1239 |
|
|
calculate_sad_done =>calculate_sad_done_fp,
|
1240 |
|
|
update =>partition_done_fp,
|
1241 |
|
|
update_fp => update_fp, -- end of iteraction
|
1242 |
|
|
best_eu => best_eu, --id of best execution unit
|
1243 |
|
|
active_pipelines => active_pipelines,
|
1244 |
|
|
best_sad => best_sad_fp,
|
1245 |
|
|
best_mv => best_mv_fp,
|
1246 |
|
|
rest_best_sad => rest_best_sad_fp,
|
1247 |
|
|
rest_best_mv => rest_best_mv_fp,
|
1248 |
|
|
best_sad_out => best_sad_out_fp,
|
1249 |
|
|
best_mv_out => best_mv_out_fp
|
1250 |
|
|
);
|
1251 |
|
|
|
1252 |
|
|
best_sad_debug <= best_sad_fp;
|
1253 |
|
|
best_mv_debug <= best_mv_fp; --debugging port
|
1254 |
|
|
|
1255 |
|
|
--qp distance engine
|
1256 |
|
|
|
1257 |
|
|
pipeline_qp_qp : if CFG_PIPELINE_COUNT_QP = 1 generate
|
1258 |
|
|
|
1259 |
|
|
sad_selector_qp1 : sad_selector_qp
|
1260 |
|
|
port map (
|
1261 |
|
|
clk =>clk,
|
1262 |
|
|
reset =>reset,
|
1263 |
|
|
clear =>clear,
|
1264 |
|
|
calculate_sad_done =>calculate_sad_done_qp,
|
1265 |
|
|
active_pipelines => active_pipelines_qp,
|
1266 |
|
|
update =>all_done_fp, -- complete fp part set the stored sad
|
1267 |
|
|
update_qp =>update_qp,
|
1268 |
|
|
best_eu => best_eu_qp,
|
1269 |
|
|
best_sad =>best_sad_qp,
|
1270 |
|
|
best_mv =>best_mv_qp,
|
1271 |
|
|
rest_best_sad =>rest_best_sad_qp,
|
1272 |
|
|
rest_best_mv =>rest_best_mv_qp,
|
1273 |
|
|
best_sad_out =>best_sad_out_qp,
|
1274 |
|
|
best_mv_out =>best_mv_out_qp
|
1275 |
|
|
);
|
1276 |
|
|
|
1277 |
|
|
distance_engine_qp : distance_engine64
|
1278 |
|
|
generic map (qp_mode => '0')
|
1279 |
|
|
port map(
|
1280 |
|
|
clk =>clk,
|
1281 |
|
|
clear =>clear,
|
1282 |
|
|
reset =>reset,
|
1283 |
|
|
enable =>rma_we_qp,
|
1284 |
|
|
update => update_qp, -- instruction completes set the best sad register to FFFF
|
1285 |
|
|
load_mv => load_mv_qp,
|
1286 |
|
|
mode_in => partition_mode,
|
1287 |
|
|
mv_cost_on => mv_cost_on,
|
1288 |
|
|
mv_cost_in => p_cost_mv_qp,
|
1289 |
|
|
candidate_mvx => candidate_mvx_qp,
|
1290 |
|
|
candidate_mvy => candidate_mvy_qp,
|
1291 |
|
|
reference_data_in => reference_data_in_qp,
|
1292 |
|
|
current_data_in => current_pixels_qp,
|
1293 |
|
|
residue_out => residue_out_qp,
|
1294 |
|
|
enable_fifo => enable_fifo_qp,
|
1295 |
|
|
reset_fifo => reset_fifo_qp,
|
1296 |
|
|
winner1 => winner1_qp,
|
1297 |
|
|
calculate_sad_done => calculate_sad_done_qp,
|
1298 |
|
|
distance_engine_active => open,
|
1299 |
|
|
best_sad => best_sad_qp_distance_engine,
|
1300 |
|
|
best_mv => best_mv_qp
|
1301 |
|
|
|
1302 |
|
|
);
|
1303 |
|
|
|
1304 |
|
|
best_sad_qp <= best_sad_qp_distance_engine when all_done_fp = '0' else best_sad_out_fp; -- stored best sad fp in qp part
|
1305 |
|
|
|
1306 |
|
|
|
1307 |
|
|
end generate;
|
1308 |
|
|
|
1309 |
|
|
|
1310 |
|
|
no_qpgen8 : if CFG_PIPELINE_COUNT_QP = 0 generate
|
1311 |
|
|
|
1312 |
|
|
distance_engine_address_qp <= (others => '0');
|
1313 |
|
|
residue_out_qp <= (others => '0');
|
1314 |
|
|
enable_fifo_qp <= '0';
|
1315 |
|
|
reset_fifo_qp <= '0';
|
1316 |
|
|
winner1_qp <= '0';
|
1317 |
|
|
calculate_sad_done_qp <= '0';
|
1318 |
|
|
best_sad_qp <= (others => '0');
|
1319 |
|
|
best_mv_qp <= (others => '0');
|
1320 |
|
|
|
1321 |
|
|
end generate;
|
1322 |
|
|
|
1323 |
|
|
next_rm_address_ready <= '1';
|
1324 |
|
|
|
1325 |
|
|
-- control the wiring of the memories
|
1326 |
|
|
|
1327 |
|
|
regs : process(clk,clear)
|
1328 |
|
|
|
1329 |
|
|
begin
|
1330 |
|
|
|
1331 |
|
|
if (clear = '1') then
|
1332 |
|
|
mux_control_write <= '0';
|
1333 |
|
|
mux_control_read <= '0';
|
1334 |
|
|
elsif rising_edge(clk) then
|
1335 |
|
|
if (reset = '1') then
|
1336 |
|
|
mux_control_write <= '0';
|
1337 |
|
|
mux_control_read <= '0';
|
1338 |
|
|
elsif (start = '1') then
|
1339 |
|
|
mux_control_write <= not(mux_control_write);
|
1340 |
|
|
elsif (all_done_fp = '1') then
|
1341 |
|
|
mux_control_read <= not(mux_control_read);
|
1342 |
|
|
end if;
|
1343 |
|
|
end if;
|
1344 |
|
|
|
1345 |
|
|
end process regs;
|
1346 |
|
|
|
1347 |
|
|
|
1348 |
|
|
end;
|
1349 |
|
|
|
1350 |
|
|
|
1351 |
|
|
|
1352 |
|
|
|
1353 |
|
|
|
1354 |
|
|
|
1355 |
|
|
|
1356 |
|
|
|
1357 |
|
|
|
1358 |
|
|
|