OpenCores
URL https://opencores.org/ocsvn/motion_estimation_processor/motion_estimation_processor/trunk

Subversion Repositories motion_estimation_processor

[/] [motion_estimation_processor/] [trunk/] [src_me/] [me_top.vhd] - Rev 2

Compare with Previous | Blame | View Log

----------------------------------------------------------------------------
--  This file is a part of the LM VHDL IP LIBRARY
--  Copyright (C) 2009 Jose Nunez-Yanez
--
--  This program is free software; you can redistribute it and/or modify
--  it under the terms of the GNU General Public License as published by
--  the Free Software Foundation; either version 2 of the License, or
--  (at your option) any later version.
--
--  See the file COPYING for the full details of the license.
--
--  The license allows free and unlimited use of the library and tools for research and education purposes. 
--  The full LM core supports many more advanced motion estimation features and it is available under a 
--  low-cost commercial license. See the readme file to learn more or contact us at 
--  eejlny@byacom.co.uk or www.byacom.co.uk
--------------------------------------
--  entity       = me_top	      --
--  version      = 1.0              --
--  last update  = 16/08/09         --
--  author       = Jose Nunez       --
--------------------------------------
 
 
-- me top of the hierarchy
 
library IEEE;
use IEEE.std_logic_1164.all;
use IEEE.Numeric_STD.all;
use IEEE.std_logic_unsigned."+";
use IEEE.std_logic_unsigned."-";
use IEEE.std_logic_signed.">";
use IEEE.std_logic_signed."<";
use work.config.all;
 
 
entity me_top is
 port ( clk : in std_logic;
        clear : in std_logic;
        reset : in std_logic;
	     register_file_address : in std_logic_vector(4 downto 0); -- 32 general purpose registers
        register_file_write : in std_logic;
	     register_file_data_in : in std_logic_vector(31 downto 0);
	     register_file_data_out : out std_logic_vector(31 downto 0);
	     done_interrupt : out std_logic; -- high when macroblock processing has completed
	     best_sad_debug : out std_logic_vector(15 downto 0); --debugging ports
	     best_mv_debug : out std_logic_vector(15 downto 0);
		 best_eu_debug : out std_logic_vector(3 downto 0);
		 partition_mode_debug : out std_logic_vector(3 downto 0);
		 qp_on_debug : out std_logic;	   --running qp
	     dma_rm_re_debug : in std_logic; --set to one to enable reading the reference area
	     dma_rm_debug : out std_logic_vector(63 downto 0); -- reference area data out
	     dma_address : in std_logic_vector(10 downto 0); -- next reference memory address
        dma_data_in : in std_logic_vector(63 downto 0); -- pixel in for reference memory or macroblock memory
        dma_rm_we : in std_logic; --enable writing to reference memory
	   dma_cm_we : in std_logic; --enable writing to current macroblock memory
	   dma_pom_we : in std_logic; -- enable writing to point memory
	   dma_prm_we : in std_logic;  -- enable writing to program memory
	     dma_residue_out : out std_logic_vector(63 downto 0); -- get residue from winner mv
	     dma_re_re : in std_logic -- enable reading residue
      );
end;  
 
architecture struct of me_top is
 
component mv_cost --calculate mv cost using Lagrangian optimization
generic (pipelines : integer);
port (
clk : in std_logic;
clear : in std_logic;
reset : in std_logic;
load  : in std_logic; -- start 
mvp_x : in std_logic_vector(7 downto 0); --predicted mv x
mvp_y : in std_logic_vector(7 downto 0); -- predicted mv y
mvx_c : in std_logic_vector(7 downto 0); -- motion vector candidate x
mvy_c : in std_logic_vector(7 downto 0); -- motion vector candidate y
rest_mvx_c : in rest_type_points; -- rest of motion vector candidates x
rest_mvy_c : in rest_type_points; -- rest of motion vector candidates y 
quant_parameter : in std_logic_vector(5 downto 0); -- quantization parameter 
p_cost_mv : out std_logic_vector(15 downto 0);
rest_p_cost_mv : out rest_type_displacement
); 
end component;
 
 
component phy_address
port (
    clk : in std_logic;
	clear : in std_logic;
	reset : in std_logic;
  partition_count : in std_logic_vector(3 downto 0); --identify the subpartition active
   line_offset : in std_logic_vector(5 downto 0); -- read multiple lines
	mvx : in std_logic_vector(7 downto 0);
	mvy : in std_logic_vector(7 downto 0);
	phy_address : out std_logic_vector(13 downto 0));
end component;
 
component sad_selector
generic (integer_pipeline_count : integer);
port (
      clk : in std_logic;
	reset : in std_logic;
	clear : in std_logic;
      calculate_sad_done : in std_logic;
	active_pipelines : in std_logic_vector(CFG_PIPELINE_COUNT-1 downto 0);
      update : in std_logic; -- completed program reset the stored sad
	update_fp : in std_logic; -- end of iteraction
	best_eu : out std_logic_vector(3 downto 0);
	best_sad : in std_logic_vector(15 downto 0);
	best_mv : in std_logic_vector(15 downto 0);
      rest_best_sad : in rest_type_displacement;
	rest_best_mv : in rest_type_displacement;
	best_sad_out : out std_logic_vector(15 downto 0);
	best_mv_out : out std_logic_vector(15 downto 0));
end component;
 
component sad_selector_qp
port (
      clk : in std_logic;
	reset : in std_logic;
	clear : in std_logic;
      calculate_sad_done : in std_logic;
	active_pipelines : in std_logic_vector(CFG_PIPELINE_COUNT_QP-1 downto 0);
      update : in std_logic; -- completed fp part set the stored sad
	update_qp : in std_logic; -- end of iteraction
      best_eu : out std_logic_vector(3 downto 0);
	best_sad : in std_logic_vector(15 downto 0);
	best_mv : in std_logic_vector(15 downto 0);
	rest_best_sad : in rest_type_displacement_qp;
	rest_best_mv : in rest_type_displacement_qp;    	
	best_sad_out : out std_logic_vector(15 downto 0);
	best_mv_out : out std_logic_vector(15 downto 0));
end component;
 
component distance_engine64
generic (qp_mode : std_logic);
port(
   clk : in std_logic;
	clear : in std_logic;
	reset : in std_logic;
	enable : in std_logic;
	update : in std_logic;
	load_mv : in std_logic;
	mode_in : in mode_type;
	mv_cost_on : in std_logic;
   mv_cost_in : in std_logic_vector(15 downto 0);
	candidate_mvx : in std_logic_vector(7 downto 0);
	candidate_mvy : in std_logic_vector(7 downto 0);
	reference_data_in : in std_logic_vector(63 downto 0);
	current_data_in : in std_logic_vector(63 downto 0);
	residue_out : out std_logic_vector(63 downto 0);
	enable_fifo : out std_logic;
	reset_fifo : out std_logic;
	winner1 : out std_logic;
	calculate_sad_done : out std_logic;
	distance_engine_active : out std_logic;
   best_sad : out std_logic_vector(15 downto 0);
   best_mv : out std_logic_vector(15 downto 0));
end component;
 
 
component register_file
generic (integer_pipeline_count : integer);
	port(
      clk : in std_logic;
      clear : in std_logic;
      reset : in std_logic;
	addr : in std_logic_vector(4 downto 0);
	write : in std_logic;
	data_in : in std_logic_vector(31 downto 0);
      data_out : out std_logic_vector(31 downto 0);
	start : out std_logic;
	   all_done_qp : in std_logic; -- program completes
	   all_done_fp : in std_logic; -- fp part completes
  	mvc_done : out std_logic; -- all motion vector candidates evaluated
  	mvc_to_do : out std_logic_vector;
	instruction_zero : in std_logic;
	partition_done_fp : in std_logic; -- fp partition terminates
	partition_done_qp : in std_logic; -- qp partition terminates
	   done_interrupt : out std_logic;
		start_row : out std_logic;
		update_fp : in std_logic; 
	load_mv : in std_logic; -- force the mvc to move foward
        mode_out : out mode_type;
	mv_cost_on : out std_logic; -- activate the costing of mvs
	   best_sad_fp : in std_logic_vector(15 downto 0);
   best_mv_fp : in std_logic_vector(15 downto 0);
      first_mv_fp : out std_logic_vector(15 downto 0);
	rest_first_mv_fp : out rest_type_displacement;
      mbx_coordinate : out std_logic_vector(7 downto 0);
	mby_coordinate : out std_logic_vector(7 downto 0);
	mvp_x : out std_logic_vector(7 downto 0);
	mvp_y : out std_logic_vector(7 downto 0);
      quant_parameter : out std_logic_vector(5 downto 0);
frame_dimension_x : out std_logic_vector(7 downto 0);
frame_dimension_y : out std_logic_vector(7 downto 0);
      		update_qp : in std_logic; 
partition_count : in std_logic_vector(3 downto 0); --identify the subpartition active
	   best_sad_qp : in std_logic_vector(15 downto 0);
   best_mv_qp : in std_logic_vector(15 downto 0);
      first_mv_qp : out std_logic_vector(15 downto 0));
end component;
 
 
 
component fp_pipeline 
 port ( clk : in std_logic;
        clear : in std_logic;
        reset : in std_logic;
        next_point_displacement_fp : in std_logic_vector(15 downto 0); --next point to be processed
        first_mv_fp : in std_logic_vector(15 downto 0); -- first point to start the search from
        start : in std_logic;
 	 start_mb : in std_logic; -- once per macroblock
		   mode_in : in mode_type;
        partition_count : in std_logic_vector(3 downto 0); --identify the subpartition active
	  frame_dimension_x : in std_logic_vector(7 downto 0); --in mb  
	  frame_dimension_y : in std_logic_vector(7 downto 0);
	  mbx_coordinate : in std_logic_vector(7 downto 0); --in mb
	  mby_coordinate : in std_logic_vector(7 downto 0);
	  candidate_mvx : out std_logic_vector(7 downto 0); -- port for Lagrangian optimizaton
	  candidate_mvy : out std_logic_vector(7 downto 0);
	  mv_cost : in std_logic_vector(15 downto 0);
	  mv_cost_on : in std_logic; -- enable mv cost
	  all_done_fp : in std_logic; -- program completes
        calculate_sad_done : out std_logic;
        best_sad_fp : out std_logic_vector(15 downto 0);
        best_mv_fp : out std_logic_vector(15 downto 0);
	  dma_address : in std_logic_vector(10 downto 0); -- next reference memory address
        mb_data_in : in std_logic_vector(63 downto 0); -- pixel in for macroblock memory (shared this memory among the pipelines)
        dma_data_in : in std_logic_vector(63 downto 0); -- pixel in for reference memory
        dma_rm_we : in std_logic; --enable writing to reference memory
	  --dma_cm_we : in std_logic; --enable writing to current macroblock memory
	  dma_residue_out : out std_logic_vector(63 downto 0); -- get residue from winner mv
	  dma_re_re : in std_logic -- enable reading residue
      );
end component;  
 
 
component program_memory
	port (
	addr: in std_logic_vector(7 downto 0);
	clk: in std_logic;
	din: in std_logic_vector(19 downto 0);
	dout: out std_logic_vector(19 downto 0);
	we: in std_logic);
end component;
 
component reference_memory64_remap -- This memory stores the 5x7 reference data (1120 words of 64 bit)
	port (                          -- It also remaps the addresses
	addr_r: in std_logic_vector(10 downto 0);
	addr_w: in std_logic_vector(10 downto 0);
	enable_hp_inter : in std_logic; -- working in interpolation mode
	clk: in std_logic;
	start : in std_logic;
	next_configuration : in std_logic; -- move to the next configuration
	start_row : in std_logic;
	reset : in std_logic;
	clear : in std_logic;
	din: in std_logic_vector(63 downto 0);
	dout: out std_logic_vector(63 downto 0);
	dout2: out std_logic_vector(63 downto 0);
	we: in std_logic);
end component;
 
component reference_memory64_remap_compact -- This memory stores the 5x5 reference data (800 words of 64 bit)
	port (                          -- It also remaps the addresses
	addr: in std_logic_vector(9 downto 0);
      enable_hp_inter : in std_logic; -- working in interpolation mode
	clk: in std_logic;
	next_configuration : in std_logic; -- move to the next configuration
	start_row : in std_logic;
	reset : in std_logic;
	clear : in std_logic;
	din: in std_logic_vector(63 downto 0);
	dout: out std_logic_vector(63 downto 0);
   dout2 : out std_logic_vector(63 downto 0); -- from the second read port
	we: in std_logic);
end component;
 
component concatenate64_qp
	port(
	addr : in std_logic_vector(2 downto 0);
	clk : in std_logic;
	clear : in std_logic;
	reset : in std_logic;
	din : in std_logic_vector(63 downto 0);
	din2 : in std_logic_vector(63 downto 0);
	dout : out std_logic_vector(63 downto 0);
	enable : in std_logic;
    quick_valid : out std_logic; --as valid but one cycle earlier
	valid : out std_logic);  -- indicates when 64 valid bits are in the output
end component;
 
 
component concatenate64 -- this unit makes sure that 16 valid pixels are assemble depending on byte address
	port(
	addr : in std_logic_vector(2 downto 0);
	clk : in std_logic;
	clear : in std_logic;
	reset : in std_logic;
	din : in std_logic_vector(63 downto 0);
	din2 : in std_logic_vector(63 downto 0);
	dout : out std_logic_vector(63 downto 0);
	enable_hp_inter : in std_logic; -- working in interpolation mode
	enable : in std_logic;
      quick_valid : out std_logic; --as valid but one cycle earlier
	valid : out std_logic);  -- indicates when 16 valid bytes are in the output
end component;
 
component qp_interpolate_engine
port(
    clk : in std_logic;
    clear : in std_logic;
    reset : in std_logic;
    qp_mode : in std_logic; -- in qp mode two lines must be written to interpolate
    enable_hp_inter : in std_logic;
   write_interpolate_register : in std_logic;
    interpolate_in_pixels_a : in std_logic_vector(63 downto 0);
    interpolate_in_pixels_b : in std_logic_vector(63 downto 0);
    write_block1 : out std_logic;  -- control which of the two blocks is being read and written (interpolate and dist engine)
    rma_address : out std_logic_vector(4 downto 0); -- extracted reference pixels use this address
     rma_we : out std_logic;
    interpolate_out_pixels : out std_logic_vector(63 downto 0)
    );
end component;
 
component forward_engine
port(
    clk : in std_logic;
    clear : in std_logic;
    reset : in std_logic;
    enable_hp_inter : in std_logic; -- when hp interpolation is being performed in the background
    write_register : in std_logic;
   mode_in : in mode_type;
    partition_count_in : in std_logic_vector(3 downto 0); 
    in_pixels : in std_logic_vector(63 downto 0);
    write_block1 : out std_logic;  -- control which of the two blocks is being read and written (interpolate and dist engine)
    rma_address : out std_logic_vector(4 downto 0); -- extracted reference pixels use this address
	 rma_we : out std_logic;
    out_pixels : out std_logic_vector(63 downto 0)
    );
end component;
 
-- half pel interpolation engine
 
component systolic_array_top
port (
    clear            : in std_logic;
    reset            : in std_logic;
    enable_interpolation   : in std_logic;
    enable_estimation : in std_logic;
    clk              : in std_logic;
    data_in          : in std_logic_vector(63 downto 0);
    data_in_valid    : in std_logic;
    data_request : out std_logic;
    all_done   : out std_logic;
    next_point : out std_logic; -- tell the main control unit that the next point is required
    shift_concatenate_valid : in std_logic; -- need to know when 64 bits are valid
    -- memory interface for o,h,v and d memories
    candidate_mvx   : in std_logic_vector(7 downto 0);
    candidate_mvy   : in std_logic_vector(7 downto 0);
    hp_address_a   : out std_logic_vector(2 downto 0);
    hp_address_b   : out std_logic_vector(2 downto 0);
    data_out_a   : out std_logic_vector(63 downto 0);   --interpolated data out 
    data_out2_a   : out std_logic_vector(63 downto 0);   --interpolated data out 
    data_out_b   : out std_logic_vector(63 downto 0);   --interpolated data out 
    data_out2_b   : out std_logic_vector(63 downto 0);   --interpolated data out 
    data_out_valid : out std_logic --signal to indicate 8 bytes of data valid
 
    );
end component;
 
 
 
component current_macroblock_memory64 -- This memory stores the current macroblock(256 bytes))
	port (
	addr: in std_logic_vector(4 downto 0);
	clk: in std_logic;
	din: in std_logic_vector(63 downto 0);
	dout: out std_logic_vector(63 downto 0);
	we: in std_logic);
end component;
 
 
component point_memory
	port (
	addr: IN std_logic_VECTOR(7 downto 0);
	clk: IN std_logic;
	din: IN std_logic_VECTOR(15 downto 0);
	dout: OUT std_logic_VECTOR(15 downto 0);
	we: IN std_logic);
end component;
 
--component point_memory
--	port (
--	addr: in std_logic_vector(7 downto 0);
--	clk: in std_logic;
--	dout: out std_logic_vector(15 downto 0));
--end component;
 
 
component me_control_unit
generic ( integer_pipeline_count : integer);
port ( clk : in std_logic;
        clear : in std_logic;
        reset : in std_logic;
        start : in std_logic;
 	  range_ok : in std_logic; --keep track of the mv range
	  best_sad_in : in std_logic_vector(15 downto 0); -- to make SAD-based decisions
	  mv_length_in : in std_logic_vector(15 downto 0); -- to make LENGTH-based decisions
	  mode_in : in mode_type;
	  mvc_done : in std_logic; -- all motion vector candidates evaluated
	  mvc_to_do : in std_logic_vector(3 downto 0);
	  qp_on : in std_logic; -- qp on
 	  partition_count_out : out std_logic_vector(3 downto 0); --identify the subpartition active
        start_pipelines : out std_logic_vector(CFG_PIPELINE_COUNT-1 downto 0);
 	  active_pipelines : out std_logic_vector(CFG_PIPELINE_COUNT-1 downto 0); -- so sad selector ignores the non active ones
        shift_concatenate_valid : in std_logic; -- valid output from the concantenate unit (64 bit ready)
        instruction_address : out std_logic_vector(7 downto 0); -- address to fetch next instruction
	  instruction_opcode : in std_logic_vector(3 downto 0); -- opcode
        point_count : in std_logic_vector(7 downto 0); -- how many points to test
        point_address : in std_logic_vector(7 downto 0); -- which is the first point to test
	  best_eu : in std_logic_vector(3 downto 0);
        calculate_sad_done : in std_logic;
	   distance_engine_active : in std_logic; 
        interpolation_done : in std_logic; -- interpolation completes
        interpolate_data_request : in std_logic; -- interpolator requests data
        next_point : out std_logic_vector(7 downto 0); -- next point address to ROM
        line_offset : out std_logic_vector(5 downto 0); -- multiple line reading
	  enable_concatenate_unit : out std_logic;
	    --enable_dist_engine : out std_logic;
        write_register : out std_logic;
        load_mv : out std_logic;
         update : out std_logic;
	  instruction_zero : out std_logic;
        all_done : out std_logic; -- program completes or qp mode started (fp finished)
	  partition_done : out std_logic;
        qpel_loc_x : in std_logic_vector(1 downto 0); -- detect qp mode
        qpel_loc_y : in std_logic_vector(1 downto 0);
        start_qp : out std_logic;
        enable_hp_inter : out std_logic; -- start the interpolation core
       --  write_block1 : out std_logic;
      --  next_rm_address_ready : in std_logic;
        next_rm_addresss : in std_logic_vector(13 downto 0); --physical address for reference (macroblock upper left corner        
        rm_address : out std_logic_vector(13 downto 0) -- reference memory write from address
	--  cm_address : out std_logic_vector(4 downto 0);  -- address to extract 4x4 blocks from current macroblock
	--  rma_address : out std_logic_vector(4 downto 0); -- reference macroblock write to address
	--  rma_we : out std_logic
	);
end component;
 
component me_control_unit_qp
 port ( clk : in std_logic;
        clear : in std_logic;
        reset : in std_logic;
        start : in std_logic; -- start qp refinement
        next_point_inter : in std_logic; -- tell the main control unit that the next point is required by the interpolation unit
        shift_concatenate_valid : in std_logic; -- valid output from the concantenate unit 
        qp_starting_address : in std_logic_vector(7 downto 0); -- start fetching qp instructions from this point
        instruction_address : out std_logic_vector(7 downto 0); -- address to fetch next instruction
        point_count : in std_logic_vector(7 downto 0); -- how many points to test
        point_address : in std_logic_vector(7 downto 0); -- which is the first point to test
        calculate_sad_done : in std_logic; -- signals when the distance engine has finished
         instruction_opcode : in std_logic_vector(3 downto 0); -- opcode
         best_eu : in std_logic_vector(3 downto 0); -- best execution unit
        next_point : out std_logic_vector(7 downto 0); -- next point address to ROM
	     qp_mode : out std_logic; --enable qp estimation
	     qp_on : out std_logic; -- qp active
        load_mv : out std_logic;
        update : out std_logic;
        all_done : out std_logic -- program completes
    );
end component; 
 
component range_checker --make sure that MVs are not out of range
port (
clk : in std_logic;
clear : in std_logic;
reset : in std_logic;
candidate_mvx : in std_logic_vector(7 downto 0);
candidate_mvy : in std_logic_vector(7 downto 0);
frame_dimension_x : in std_logic_vector(7 downto 0); --in mb  
frame_dimension_y : in std_logic_vector(7 downto 0);
mbx_coordinate : in std_logic_vector(7 downto 0); --in mb
mby_coordinate : in std_logic_vector(7 downto 0);
range_ok : out std_logic
); 
end component;
 
 
signal rest_next_point_fp : rest_type_points;
signal rest_point_memory_address : rest_type_points;
signal rest_next_point_displacement_fp : rest_type_displacement;
signal rest_start_pipeline,rest_calculate_sad_done : std_logic_vector(CFG_PIPELINE_COUNT-1 downto 0);
signal rest_best_sad_fp,rest_best_mv_fp,rest_first_mv_fp: rest_type_displacement;
 
signal rest_next_point_qp : rest_type_points_qp;
signal rest_point_memory_address_qp : rest_type_points_qp;
signal rest_next_point_displacement_qqp : rest_type_displacement_qp;
signal rest_start_pipeline_qp,rest_calculate_sad_done_qp : std_logic_vector(CFG_PIPELINE_COUNT_QP-1 downto 0);
signal rest_best_sad_qp,rest_best_mv_qp: rest_type_displacement_qp;
 
signal mvp_x,mvp_y,frame_dimension_x,frame_dimension_y,mby_coordinate,mbx_coordinate,program_memory_address,program_memory_address_qp,instruction_address_fp,instruction_address_qp,point_count_fp,point_count_qp,point_address_fp,point_address_qp,candidate_mvx_fp,candidate_mvy_fp,candidate_mvx_qp,candidate_mvy_qp : std_logic_vector(7 downto 0);
signal quant_parameter,line_offset : std_logic_vector(5 downto 0);
signal partition_count,instruction_fp,instruction_qp : std_logic_vector(3 downto 0); --op code
signal next_point_fp,point_memory_address,next_point_qp,point_memory_address_qp,candidate_mvx_int,candidate_mvy_int : std_logic_vector(7 downto 0);
signal one_bit,zero_bit,distance_engine_active_fp,range_ok,quick_valid,quick_valid_qp,next_point_inter,enable_concatenate_unit,dma_cm_we_m1,dma_cm_we_m2,dma_cm_we_fp,dma_cm_we_qp,done_interrupt_int,mv_cost_on : std_logic;
signal distance_engine_address_m2,distance_engine_address_m1,distance_engine_address_qp,cm_address_m1,cm_address_m2,address1_fp,address2_fp,address1_qp,address2_qp: std_logic_vector(4 downto 0);
signal zero,best_sad_out_qp,best_mv_out_qp,next_point_displacement_qp,next_point_displacement_fp,best_sad_fp,best_sad_qp,best_sad_qp_distance_engine,best_mv_fp,best_mv_qp,best_sad_out_fp,best_mv_out_fp,p_cost_mv,p_cost_mv_qp : std_logic_vector(15 downto 0);
signal point_count_position_qp,point_count_position_fp : std_logic_vector(19 downto 0);
signal mv_length_in,first_mv_fp,first_mv_qp,mv_displacement_fp,mv_displacement_qp : std_logic_vector(15 downto 0);
signal mvc_done,instruction_zero,qp_on,partition_done_fp,partition_done_qp,mux_control_write,mux_control_read,qp_mode,start_qp,qp_pixels_valid,hp_pixels_valid,hp_interpolation_done,data_request_hp_inter,all_done_fp,all_done_qp,start_row,reset_fifo_fp,reset_fifo_qp,reset_fifo1_qp,reset_fifo2_qp,reset_fifo1_fp,reset_fifo2_fp,fifo_enable_w1,fifo_enable_w2,fifo_enable_r1,fifo_enable_r2,enable_fifo_fp,enable_fifo_qp,winner1_fp,winner1_qp,start,write_register,write_interpolate_register,next_rm_address_ready,shift_concatenate_valid_qp,shift_concatenate_valid_fp,rma_we_qp,rma_we_fp,rma_we1_qp,rma_we2_qp,rma_we1_fp,rma_we2_fp,write_block1_qp,load_mv_fp,load_mv_qp,update_fp,update_qp,calculate_sad_done_qp,calculate_sad_done_fp,enable_hp_inter : std_logic;
signal rm_address_r,rm_address_w : std_logic_vector(10 downto 0);
signal rm_address_c : std_logic_vector(9 downto 0);
signal best_eu,best_eu_qp,mvc_to_do : std_logic_vector(3 downto 0);
signal next_rm_address,int_rm_address : std_logic_vector(13 downto 0);
signal current_pixels,current_pixels_fp,current_pixels_qp,current_pixels_m1,current_pixels_m2,reference_data_in1_fp,reference_data_in2_fp,reference_data_in1_qp,reference_data_in2_qp,reference_data_in_fp,reference_data_in_qp : std_logic_vector(63 downto 0);
signal reference_pixels_in,reference_pixels_in2,residue_out_fp,residue_out_qp,residue_out_1_2,residue_out_2_2,residue_out_1_1,residue_out_2_1 : std_logic_vector(63 downto 0);
signal out_pixels_fp,out_pixels_qp,qp_pixels_out_a,qp_pixels_out_b,hp_pixels_out_a,hp_pixels_out2_a,hp_pixels_out_b,hp_pixels_out2_b,reference_pixels_out_qp_a,reference_pixels_out_qp_b,reference_pixels_out_fp : std_logic_vector(63 downto 0); --for the interpolate unit
signal rma_address_qp,rma_address_fp  : std_logic_vector(4 downto 0);
signal hp_address_a, hp_address_b : std_logic_vector(2 downto 0);
--signal sad : std_logic_vector(15 downto 0);
signal qpel_loc_x,qpel_loc_y : std_logic_vector(1 downto 0);
signal point_memory_data_in : std_logic_vector(15 downto 0);
signal program_memory_data_in : std_logic_vector(19 downto 0);
signal partition_mode : mode_type;
signal active_pipelines : std_logic_vector(CFG_PIPELINE_COUNT-1 downto 0);
signal active_pipelines_qp : std_logic_vector(CFG_PIPELINE_COUNT_QP-1 downto 0);
signal rest_mvx_c,rest_mvy_c : rest_type_points;
signal rest_p_cost_mv : rest_type_displacement;
 
begin
 
 
-- program memory for fp engine    
program_memory_data_in <= dma_data_in(19 downto 0);
program_memory_address <= dma_address(7 downto 0) when dma_prm_we = '1' else instruction_address_fp;
qp_on_debug <= qp_on;
zero_bit <= '0';
one_bit <= '1';
 
mode_process : process(partition_mode)
 
begin
 
case partition_mode is
 
	when m16x16 => partition_mode_debug <= "0000";
	when m8x8 => partition_mode_debug <= "0001";
	when others => partition_mode_debug <= "0000";
 
end case;
 
end process;
 
 
program_memory1 : program_memory
	port map(
	addr =>program_memory_address,
	clk =>clk,
	din =>program_memory_data_in,
	dout => point_count_position_fp,
	we => dma_prm_we 
);
 
-- program memory for qp engine
 
program_memory2_qp : if CFG_PIPELINE_COUNT_QP = 1 generate
 
program_memory_address_qp <= dma_address(7 downto 0) when dma_prm_we = '1' else instruction_address_qp;
 
program_memory2 : program_memory
	port map(
	addr =>program_memory_address_qp,
	clk =>clk,
	din =>program_memory_data_in,
	dout => point_count_position_qp,
	we => dma_prm_we 
);
 
end generate;
 
no_qpgen0 : if CFG_PIPELINE_COUNT_QP = 0 generate
 
	point_count_position_qp <= (others => '0');
 
end generate;
 
range_checker1 : range_checker --make sure that MVs are not out of range
port map(
clk => clk,
clear => clear,
reset => reset,
candidate_mvx => candidate_mvx_fp,
candidate_mvy => candidate_mvy_fp,
frame_dimension_x =>frame_dimension_x, 
frame_dimension_y =>frame_dimension_y,
mbx_coordinate => mbx_coordinate,
mby_coordinate => mby_coordinate,
range_ok => range_ok
);
 
phy_address1 : phy_address
port map(
   clk => clk,
	clear => clear,
	reset => reset,
  partition_count => partition_count, --identify the subpartition active
   line_offset => line_offset,
	mvx => candidate_mvx_int,
	mvy => candidate_mvy_int,
	phy_address => next_rm_address
);
 
instruction_fp <= point_count_position_fp(19 downto 16);
point_count_fp <= point_count_position_fp(15 downto 8);
point_address_fp <= point_count_position_fp(7 downto 0);
instruction_qp <= point_count_position_qp(19 downto 16);
point_count_qp <= point_count_position_qp(15 downto 8);
point_address_qp <= point_count_position_qp(7 downto 0);
candidate_mvx_fp <= first_mv_fp(15 downto 8)+mv_displacement_fp(15 downto 8); 
candidate_mvy_fp <= first_mv_fp(7 downto 0)+mv_displacement_fp(7 downto 0);
 
-- check that MVX is in the reference area
in_range_x : process(candidate_mvx_fp)
begin
	if candidate_mvx_fp > 47 then
		candidate_mvx_int <= x"2f";
	elsif candidate_mvx_fp < -48 then
		candidate_mvx_int <= x"d0";
	else
	    candidate_mvx_int <= candidate_mvx_fp;
	end if;
end process;
 
-- check that MVY is in the reference area
in_range_y: process(candidate_mvy_fp)
begin
	if candidate_mvy_fp > 31 then
		candidate_mvy_int <= x"1f";
	elsif candidate_mvy_fp < -32 then
		candidate_mvy_int <= x"e0";
	else
	    candidate_mvy_int <= candidate_mvy_fp;
	end if;
end process;
 
 
--this has to change the first mv for qp should be the winner from fo
candidate_mvx_qp <= first_mv_qp(15 downto 8)+mv_displacement_qp(15 downto 8); 
candidate_mvy_qp <= first_mv_qp(7 downto 0)+mv_displacement_qp(7 downto 0);
no_qpgen11 : if CFG_PIPELINE_COUNT_QP = 0 generate
	qpel_loc_x <= (others => '0');
	qpel_loc_y <= (others => '0');
end generate;
qpgen11 : if CFG_PIPELINE_COUNT_QP = 1 generate
	qpel_loc_x <= candidate_mvx_fp(1 downto 0);
	qpel_loc_y <= candidate_mvy_fp(1 downto 0);
end generate;
 
-- fp point memory
 
--point_memory_fp : point_memory
--	port map(
--	addr =>next_point_fp,
--	clk =>clk,
--	dout => next_point_displacement_fp
--);
 
point_memory_address <= dma_address(7 downto 0) when dma_pom_we = '1' else next_point_fp;
point_memory_data_in <= dma_data_in(15 downto 0);
 
point_memory_fp : point_memory
	port map(
	addr =>point_memory_address,
	clk =>clk,
	din =>point_memory_data_in,
	dout =>next_point_displacement_fp,
	we => dma_pom_we
);
 
 
--generate enough memories to hold the point memories for each aditional pipeline
 
generate_pipelines1 : for i in 1 to (CFG_PIPELINE_COUNT-1) generate
begin
	rest_next_point_fp(i) <= next_point_fp when mvc_done = '0' else next_point_fp + i;
	rest_point_memory_address(i) <=  dma_address(7 downto 0) when dma_pom_we = '1' else rest_next_point_fp(i);
	rest_point_memory_fp : point_memory
	port map ( 
	addr =>  rest_point_memory_address(i),
	clk => clk,
	din =>point_memory_data_in,
	dout => rest_next_point_displacement_fp(i),
	we => dma_pom_we
	);
end generate;
 
 
--generate integer pipelines 
 
generate_pipelines2 : for i in 1 to (CFG_PIPELINE_COUNT-1) generate
begin
 
fp_pipelines1 : fp_pipeline
 port map( clk =>clk,
        clear =>clear,
        reset =>reset,
        next_point_displacement_fp =>rest_next_point_displacement_fp(i), --next point to be processed
        first_mv_fp =>rest_first_mv_fp(i), -- first point to start the search from
        start =>rest_start_pipeline(i), -- enable the pipeline by main me
	  start_mb => start, -- once per macroblock
	  mode_in => partition_mode,
        partition_count => partition_count, --identify the subpartition active
	  frame_dimension_x =>frame_dimension_x, --in mb  
        frame_dimension_y =>frame_dimension_y,
        mbx_coordinate =>mbx_coordinate, --in mb
        mby_coordinate =>mby_coordinate,
	  candidate_mvx =>rest_mvx_c(i), -- port for Lagrangian optimizaton
	  candidate_mvy =>rest_mvy_c(i),
	  mv_cost =>rest_p_cost_mv(i), 
	  mv_cost_on => mv_cost_on, -- enable mv cost
	  all_done_fp => all_done_fp,
        calculate_sad_done => rest_calculate_sad_done(i),
        best_sad_fp =>rest_best_sad_fp(i),
        best_mv_fp =>rest_best_mv_fp(i),
	  dma_address =>dma_address, -- next reference memory address
 	  mb_data_in => current_pixels_fp, -- shared mb memory 
        dma_data_in =>dma_data_in, -- pixel in for reference memory
        dma_rm_we =>dma_rm_we, --enable writing to reference memory
	  --dma_cm_we =>dma_cm_we, --enable writing to current macroblock memory
	  dma_residue_out =>open, -- get residue from winner mv
	  dma_re_re => dma_re_re-- enable reading residue
      );
end generate;  
 
 
 
point_memory_qp_qp : if CFG_PIPELINE_COUNT_QP = 1 generate
-- qp point memory
 
point_memory_qp : point_memory
	port map(
	addr =>point_memory_address_qp,
	din =>point_memory_data_in,
	clk =>clk,
	dout => next_point_displacement_qp,
	we => dma_pom_we
);		
 
point_memory_address_qp <=  dma_address(7 downto 0) when dma_pom_we = '1' else next_point_qp;
 
 
 
end generate;
 
no_qpgen1 : if CFG_PIPELINE_COUNT_QP= 0 generate
 
	next_point_displacement_qp <= (others => '0');
 
end generate;
 
-- displace the mv by 2 pixels to define the interpolation area when interpolation active
 
mv_displacement_fp <= next_point_displacement_fp when enable_hp_inter = '0' else x"14FC"; --(20,-4)
mv_displacement_qp <= next_point_displacement_qp;
done_interrupt <= done_interrupt_int;
 
register_file1 : register_file
generic map(integer_pipeline_count => (CFG_PIPELINE_COUNT))
	port map(
      clk => clk,
      clear => clear,
      reset => reset,
	addr =>  register_file_address,
	write => register_file_write,
	data_in => register_file_data_in,
      data_out => register_file_data_out,
	start => start,
      mode_out => partition_mode,
	mv_cost_on => mv_cost_on,  -- activate the costing of mvs	
 all_done_fp => all_done_fp,
	all_done_qp => all_done_qp,
	mvc_to_do => mvc_to_do,
  	mvc_done => mvc_done, -- all motion vector candidates evaluated
	instruction_zero => instruction_zero,
	  partition_done_fp => partition_done_fp,
	  partition_done_qp => partition_done_qp,
		   done_interrupt => done_interrupt_int,
		start_row => start_row,
		load_mv => load_mv_fp, -- force the mvc to move foward
		update_fp => update_fp,
	   best_sad_fp => best_sad_out_fp,
   best_mv_fp => best_mv_out_fp,
      first_mv_fp => first_mv_fp,
	rest_first_mv_fp => rest_first_mv_fp,
	mbx_coordinate =>mbx_coordinate,
	mby_coordinate =>mby_coordinate,
	mvp_x =>  mvp_x,
	mvp_y =>  mvp_y,
      quant_parameter => quant_parameter,
frame_dimension_x =>frame_dimension_x,
frame_dimension_y =>frame_dimension_y,
partition_count => partition_count,
      		update_qp => update_qp,
	   best_sad_qp => best_sad_out_qp,
   best_mv_qp => best_mv_out_qp,
      first_mv_qp => first_mv_qp
 
); 
 
 
compact_memory0 : if CFG_CM = 0 generate
 
reference_memory_large : reference_memory64_remap  --This memory stores the 7x5 
port map(
	addr_r => rm_address_r,
	addr_w => rm_address_w,
	enable_hp_inter => enable_hp_inter, -- working in interpolation mode
	clk => clk,
	next_configuration => start, -- use the start signal to move between configurations all_done_fp, -- move to the next configuration when programs completes
	start => start,
	start_row => start_row,
	reset => reset,
	clear => clear,
	din =>dma_data_in,
	dout =>reference_pixels_in,
	dout2 => reference_pixels_in2,
	we => dma_rm_we
);
 
end generate;
 
compact_memory1 : if CFG_CM = 1 generate 
 
rm_address_c <= rm_address_w(9 downto 0) when dma_rm_we = '1' else rm_address_r(9 downto 0);
 
reference_memory_compact : reference_memory64_remap_compact -- This memory stores the 5x5 reference data (800 words of 64 bit)
	port map(                          -- It also remaps the addresses
	addr => rm_address_c,
      enable_hp_inter => enable_hp_inter, -- working in interpolation mode
	clk => clk,
	next_configuration => start, -- move to the next configuration
	start_row => start_row,
	reset => reset,
	clear => clear,
	din => dma_data_in,
	dout => reference_pixels_in,
      dout2 => reference_pixels_in2, -- from the second read port
	we => dma_rm_we
);
 
end generate;
 
 
--when qp mode it is the systolic array which decides when data is valid
 
concatenate_qp_qp : if CFG_PIPELINE_COUNT_QP = 1 generate
 
concatenate_qp_a : concatenate64_qp -- this unit makes sure that 8 valid pixels are assemble depending on byte address
	port map(
	addr => hp_address_a,
	clk => clk,
	clear => clear,
	reset => reset,
	din => hp_pixels_out_a,
	din2 => hp_pixels_out2_a,
	dout => reference_pixels_out_qp_a,
	enable => hp_pixels_valid,
	quick_valid => quick_valid_qp, --as valid but one cycle earlier
	valid => shift_concatenate_valid_qp  -- indicates when 64 valid bits are in the output
);
 
 
concatenate_qp_b : concatenate64_qp -- this unit makes sure that 8 valid pixels are assemble depending on byte address
	port map(
	addr => hp_address_b,
	clk => clk,
	clear => clear,
	reset => reset,
	din => hp_pixels_out_b,
	din2 => hp_pixels_out2_b,
	dout => reference_pixels_out_qp_b,
	enable => hp_pixels_valid,
	quick_valid => open, --as valid but one cycle earlier
	valid => open  -- indicates when 64 valid bits are in the output
);
 
 
end generate;
 
no_qpgen2 : if CFG_PIPELINE_COUNT_QP = 0 generate
 
	reference_pixels_out_qp_a <= (others => '0');
	reference_pixels_out_qp_b <= (others => '0');
	shift_concatenate_valid_qp <= '0';
 
end generate;
 
-- when no qp mode different concatenate unit
 
concatenate_fp : concatenate64 -- this unit makes sure that 8 valid pixels are assemble depending on byte address
	port map(
	addr => int_rm_address(2 downto 0),
	clk => clk,
	clear => clear,
	reset => reset,
	din => reference_pixels_in,
      din2 => reference_pixels_in2,
	dout => reference_pixels_out_fp,
	enable => enable_concatenate_unit,
	enable_hp_inter => enable_hp_inter, -- working in interpolation mode
      quick_valid => quick_valid, --as valid but one cycle earlier
	valid => shift_concatenate_valid_fp  -- indicates when 64 valid bits are in the output
);
 
-- half pel interpolation engine
 
interpolate2_qp : if CFG_PIPELINE_COUNT_QP = 1 generate
 
interpolate2 :  systolic_array_top
port map(
    clear =>clear,
    reset => reset,
    enable_interpolation =>enable_hp_inter,
    enable_estimation =>qp_mode,
    clk =>clk,
    data_in =>reference_pixels_out_fp,
    data_in_valid =>shift_concatenate_valid_fp,
    data_request =>data_request_hp_inter,
    all_done =>hp_interpolation_done,
    next_point => next_point_inter, -- tell the main control unit that the next point is required
    shift_concatenate_valid =>shift_concatenate_valid_qp, -- need to know when 64 bits are valid
    -- memory interface for o,h,v and d memories
    candidate_mvx => candidate_mvx_qp,
    candidate_mvy => candidate_mvy_qp,
    hp_address_a => hp_address_a,
    hp_address_b => hp_address_b,
    data_out_a  => hp_pixels_out_a,   --interpolated data out port a
    data_out2_a => hp_pixels_out2_a,  --interpolated data out 
    data_out_b  => hp_pixels_out_b,   --interpolated data out port b
    data_out2_b  => hp_pixels_out2_b,   --interpolated data out 
    data_out_valid => hp_pixels_valid --signal to indicate 8 bytes of data valid
 
);
 
end generate;
 
no_qpgen3 : if CFG_PIPELINE_COUNT_QP = 0 generate
 
  data_request_hp_inter <= '0';
  hp_interpolation_done  <= '0';
  next_point_inter <= '0';
  hp_address_a <= (others => '0');
  hp_address_b <= (others => '0');
  hp_pixels_out_a <= (others => '0');   --interpolated data out 
  hp_pixels_out_b <= (others => '0');
  hp_pixels_valid <= '0'; 
 
end generate;
 
-- data for qp engine always comes from the concatenate unit
qp_pixels_valid <= shift_concatenate_valid_qp;
qp_pixels_out_a <= reference_pixels_out_qp_a;
qp_pixels_out_b <= reference_pixels_out_qp_b;
 
forward1 : forward_engine
port map(
    clk =>clk,
    clear =>clear,
    reset =>reset,
   mode_in => partition_mode,
    partition_count_in => partition_count,
    enable_hp_inter =>enable_hp_inter, -- when hp interpolation is being performed in the background
    write_register =>shift_concatenate_valid_fp,
    in_pixels =>reference_pixels_out_fp,
    write_block1 =>open,  -- control which of the two blocks is being read and written (interpolate and dist engine)
    rma_address =>rma_address_fp, -- extracted reference pixels use this address
	 rma_we => rma_we_fp,
    out_pixels => out_pixels_fp
    );
 
interpolate1_qp : if CFG_PIPELINE_COUNT_QP = 1 generate
 
interpolate1 : qp_interpolate_engine
port map(
    clk=>clk,
    clear=>clear,
    reset=>reset,
    qp_mode => qp_mode, -- in qp mode two lines must be written to interpolate
    enable_hp_inter => enable_hp_inter,
    write_interpolate_register=> qp_pixels_valid,
    interpolate_in_pixels_a => qp_pixels_out_a,
    interpolate_in_pixels_b => qp_pixels_out_b, 
    write_block1 => open, -- control which of the two blocks is being read and written (interpolate and dist engine)
    rma_address => rma_address_qp, -- extracted reference pixels use this address
	 rma_we => rma_we_qp,
    interpolate_out_pixels => out_pixels_qp
    );
 
end generate;
 
no_qpgen4 : if CFG_PIPELINE_COUNT_QP = 0 generate
 
    write_block1_qp <= '0';  -- control which of the two blocks is being read and written (interpolate and dist engine)
    rma_address_qp <= (others => '0'); -- extracted reference pixels use this address
    rma_we_qp <= '0';
    out_pixels_qp <= (others => '0');
 
end generate;
 
reference_data_in_fp <= out_pixels_fp;
rm_address_r <= int_rm_address(13 downto 3) when (dma_rm_re_debug = '0') else dma_address;
rm_address_w <= dma_address;
dma_rm_debug <= reference_pixels_in;
 
 
reference_data_in_qp <= out_pixels_qp;
 
--These two memories will alternate if they are fp or qp
 
dma_cm_we_m2 <= dma_cm_we when mux_control_write = '1' else '0';
dma_cm_we_m1 <= dma_cm_we when mux_control_write = '0' else '0';
 
current_pixels_fp <= current_pixels_m1 when mux_control_read = '0' else current_pixels_m2;
current_pixels_qp <= current_pixels_m1 when mux_control_read = '1' else current_pixels_m2;
distance_engine_address_m1 <= rma_address_fp when mux_control_read = '0' else rma_address_qp;
distance_engine_address_m2 <= rma_address_fp when mux_control_read = '1' else rma_address_qp;
 
--two so you can write one while you read the other. Dual port is not enough since you cannot destroy the contents
 
current_macroblock_memory1 : current_macroblock_memory64 -- This memory stores the current macroblock(256 bytes (32 words x 64 bits))
	port map(
	addr => cm_address_m1, 
	clk => clk,
	din => dma_data_in, 
	dout => current_pixels_m1,
	we => dma_cm_we_m1
);
 
current_macroblock_memory2 : current_macroblock_memory64 -- This memory stores the current macroblock(256 bytes (32 words x 64 bits))
	port map(
	addr => cm_address_m2, 
	clk => clk,
	din => dma_data_in, 
	dout => current_pixels_m2,
	we => dma_cm_we_m2
);
 
 
cm_address_m1 <= distance_engine_address_m1 when dma_cm_we_m1 = '0' else dma_address(4 downto 0);
cm_address_m2 <= distance_engine_address_m2 when dma_cm_we_m2 = '0' else dma_address(4 downto 0); 
 
me_control_unit_qp1_qp : if CFG_PIPELINE_COUNT_QP = 1 generate
 
me_control_unit_qp1 : me_control_unit_qp
 port map( clk =>clk,
        clear =>clear,
        reset =>reset,
        start =>start_qp,
        next_point_inter => next_point_inter, -- next point address to ROM
        shift_concatenate_valid =>quick_valid_qp, 
        qp_starting_address =>instruction_address_fp,
        instruction_address =>instruction_address_qp,
        point_count =>point_count_qp,
        point_address =>point_address_qp,
        calculate_sad_done =>calculate_sad_done_qp,
        instruction_opcode => instruction_qp,
        best_eu => best_eu_qp,
        next_point =>next_point_qp,
	     qp_mode =>qp_mode, --enable qp estimation
	     qp_on => qp_on, -- qp active
        load_mv => load_mv_qp,
        update =>update_qp,
        all_done =>all_done_qp -- program completes
    );
 
end generate;
 
no_qpgen7 : if CFG_PIPELINE_COUNT_QP = 0 generate
 
        instruction_address_qp <= (others => '0'); -- address to fetch next instruction
        next_point_qp <= (others => '0');
        qp_mode <= '0'; --enable qp estimation
        load_mv_qp <= '0';
        update_qp <= '0';
        all_done_qp <= '0'; -- program completes
		qp_on <= '0';
 
end generate;
 
best_eu_debug <= best_eu;
 
me_control_unit1 : me_control_unit
generic map(
integer_pipeline_count => (CFG_PIPELINE_COUNT)
)
port map( clk =>clk,
        clear =>clear,
        reset =>reset,
        start => start,
 	  range_ok => range_ok,
	  mode_in => partition_mode,
	  best_sad_in => best_sad_out_fp, -- to make SAD-based decisions
	  mv_length_in => mv_length_in, -- to make LENGTH-based decisions
	  qp_on => qp_on, -- qp on
 	  mvc_done => mvc_done, -- all motion vector candidates evaluated
	  mvc_to_do => mvc_to_do,
  	  partition_count_out => partition_count, --identify the subpartition active
        start_pipelines => rest_start_pipeline,
	  active_pipelines => active_pipelines,
        shift_concatenate_valid => quick_valid, -- valid output from the concantenate unit (64 bit ready)
        instruction_address => instruction_address_fp, -- address to fetch next instruction
	  instruction_opcode => instruction_fp, -- the opcode
        point_count => point_count_fp, -- how many points to test
        point_address => point_address_fp, -- which is the first point to test
        calculate_sad_done => calculate_sad_done_fp,
	  distance_engine_active => distance_engine_active_fp,
        interpolation_done => hp_interpolation_done,-- interpolation completes
        interpolate_data_request => data_request_hp_inter,-- interpolator requests data
        line_offset => line_offset, -- read the different lines of the reference macroblock
	  enable_concatenate_unit => enable_concatenate_unit,
    --    enable_dist_engine => enable_dist_engine,
        write_register => write_register,
        load_mv => load_mv_fp,
	  best_eu => best_eu,
         update => update_fp,
	   instruction_zero => instruction_zero,
            all_done => all_done_fp,
	  partition_done => partition_done_fp,
        qpel_loc_x =>qpel_loc_x, -- detect qp mode
        qpel_loc_y =>qpel_loc_y,
         next_point => next_point_fp,
        start_qp => start_qp,
        enable_hp_inter =>enable_hp_inter,
    --    write_block1 => write_block1, -- control which of the two blocks is being read and written (interpolate and dist engine)
      --  next_rm_address_ready => next_rm_address_ready,
        next_rm_addresss => next_rm_address, --physical address for reference (macroblock upper left corner        
        rm_address => int_rm_address -- internal reference memory addresses
    --    rma_address => rma_address, -- extracted reference pixels use this address
	 --    rma_we => rma_we
	);
 
 
-- calculate the length of the motion vector 
mv_length_in <=	(best_mv_out_fp(15 downto 8) -  mvp_x) & (best_mv_out_fp(7 downto 0)  -  mvp_y);
 
 
 
gen_mv_cost_qp : if (CFG_MV_COST = 1 and CFG_PIPELINE_COUNT_QP = 1) generate
-- mv cost unit
mv_cost_qp : mv_cost
generic map(pipelines => CFG_PIPELINE_COUNT_QP)
port map(
	clk => clk,
	clear => clear,
	reset => reset,
	load  =>  load_mv_qp, -- start calculation of mv costs for qp
	mvp_x => mvp_x, -- predicted mv x
	mvp_y => mvp_y, -- predicted mv y
	mvx_c =>candidate_mvx_qp, -- motion vector candidate x
	mvy_c =>candidate_mvy_qp, -- motion vector candidate y
	rest_mvx_c =>rest_mvx_c, -- rest of motion vector candidates x
	rest_mvy_c =>rest_mvy_c, -- rest of motion vector candidates y 
	quant_parameter => quant_parameter, 
	p_cost_mv => p_cost_mv_qp,
	rest_p_cost_mv => open
); 
 
end generate;
 
no_gen_mv_cost_qp : if (CFG_MV_COST = 0 or CFG_PIPELINE_COUNT_QP = 0) generate
	p_cost_mv_qp <= (others => '0');
end generate;
 
gen_mv_cost_fp : if CFG_MV_COST = 1 generate
-- mv cost unit
mv_cost_fp : mv_cost
generic map(pipelines => CFG_PIPELINE_COUNT)
port map(
	clk => clk,
	clear => clear,
	reset => reset,
	load  =>  rest_start_pipeline(0), -- start calculation of mv costs for fp
	mvp_x => mvp_x, -- predicted mv x
	mvp_y => mvp_y, -- predicted mv y
	mvx_c =>candidate_mvx_int, -- motion vector candidate x
	mvy_c =>candidate_mvy_int, -- motion vector candidate y
	rest_mvx_c =>rest_mvx_c, -- rest of motion vector candidates x
	rest_mvy_c =>rest_mvy_c, -- rest of motion vector candidates y 
	quant_parameter => quant_parameter, 
	p_cost_mv => p_cost_mv,
	rest_p_cost_mv => rest_p_cost_mv
); 
 
end generate;
 
 
no_gen_mv_cost_fp : if CFG_MV_COST = 0 generate
	p_cost_mv <= (others => '0');
end generate;
 
-- fp distance engine	
 
distance_engine_fp : distance_engine64
generic map (qp_mode => '0')
port map(
	clk =>clk,
	clear =>clear,
	reset =>reset,
	enable => rma_we_fp, -- calculate when new data available
	update => update_fp,  -- instruction completes set the best sad register to FFFF
	load_mv => load_mv_fp,
	mode_in => partition_mode,
	mv_cost_on => mv_cost_on,
	mv_cost_in => p_cost_mv,
	candidate_mvx => candidate_mvx_fp,
	candidate_mvy => candidate_mvy_fp,
	reference_data_in => reference_data_in_fp,
	current_data_in => current_pixels_fp,
	residue_out => residue_out_fp,
	enable_fifo => enable_fifo_fp,
	reset_fifo => reset_fifo_fp,
	winner1 => winner1_fp,
	calculate_sad_done => calculate_sad_done_fp,
	distance_engine_active => distance_engine_active_fp,
   best_sad => best_sad_fp,
   best_mv => best_mv_fp
 
);
 
-- mv/sad selector
 
sad_selector_fp : sad_selector
generic map(integer_pipeline_count => CFG_PIPELINE_COUNT)
port map(
      clk =>clk,
	reset =>reset,
	clear =>clear,
      calculate_sad_done =>calculate_sad_done_fp, 
      update =>partition_done_fp,
	update_fp => update_fp,  -- end of iteraction
      best_eu => best_eu, --id of best execution unit
	active_pipelines => active_pipelines,
	best_sad => best_sad_fp,
	best_mv => best_mv_fp,
      rest_best_sad => rest_best_sad_fp,
	rest_best_mv => rest_best_mv_fp,
	best_sad_out => best_sad_out_fp,
	best_mv_out => best_mv_out_fp
);
 
best_sad_debug <= best_sad_fp;
best_mv_debug <= best_mv_fp; --debugging port
 
--qp distance engine
 
pipeline_qp_qp : if CFG_PIPELINE_COUNT_QP = 1 generate
 
sad_selector_qp1 : sad_selector_qp
port map (
      clk =>clk,
	reset =>reset,
	clear =>clear,
      calculate_sad_done =>calculate_sad_done_qp,
	active_pipelines => active_pipelines_qp,
      update =>all_done_fp, -- complete fp part set the stored sad
      update_qp =>update_qp,
        best_eu => best_eu_qp,
	best_sad =>best_sad_qp,
	best_mv =>best_mv_qp,
      rest_best_sad =>rest_best_sad_qp,
	rest_best_mv =>rest_best_mv_qp,
	best_sad_out =>best_sad_out_qp,
	best_mv_out =>best_mv_out_qp
);
 
distance_engine_qp : distance_engine64
generic map (qp_mode => '0')
port map(
	clk =>clk,
	clear =>clear,
	reset =>reset,
	enable =>rma_we_qp,
	update => update_qp,  -- instruction completes set the best sad register to FFFF
	load_mv => load_mv_qp,
	mode_in => partition_mode,
	mv_cost_on => mv_cost_on,
	mv_cost_in => p_cost_mv_qp,
	candidate_mvx => candidate_mvx_qp,
	candidate_mvy => candidate_mvy_qp,
	reference_data_in => reference_data_in_qp,
	current_data_in => current_pixels_qp,
	residue_out => residue_out_qp,
	enable_fifo => enable_fifo_qp,
	reset_fifo => reset_fifo_qp,
	winner1 => winner1_qp,
	calculate_sad_done => calculate_sad_done_qp,
	distance_engine_active => open,
   best_sad => best_sad_qp_distance_engine,
   best_mv => best_mv_qp
 
);
 
best_sad_qp <= best_sad_qp_distance_engine when all_done_fp = '0' else best_sad_out_fp; -- stored best sad fp in qp part
 
 
end generate;
 
 
no_qpgen8 : if CFG_PIPELINE_COUNT_QP = 0 generate
 
	distance_engine_address_qp <= (others => '0');
	residue_out_qp <= (others => '0');
	enable_fifo_qp <= '0';
	reset_fifo_qp <= '0';
	winner1_qp <= '0';
	calculate_sad_done_qp <= '0';
   	best_sad_qp <= (others => '0');
   	best_mv_qp <= (others => '0');
 
end generate;
 
next_rm_address_ready <= '1';
 
-- control the wiring of the memories
 
regs : process(clk,clear)
 
begin
 
 if (clear = '1') then
	mux_control_write <= '0';
	mux_control_read <= '0';
 elsif rising_edge(clk) then 
	if (reset = '1') then	
		mux_control_write <= '0';
		mux_control_read <= '0';
	elsif (start = '1') then
		mux_control_write <= not(mux_control_write);
	elsif (all_done_fp = '1') then
		mux_control_read <= not(mux_control_read);
	end if;
 end if;
 
end process regs; 
 
 
end;
 
 
 
 
 
 
 
 
 
 

Compare with Previous | Blame | View Log

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.