OpenCores
URL https://opencores.org/ocsvn/motion_estimation_processor/motion_estimation_processor/trunk

Subversion Repositories motion_estimation_processor

[/] [motion_estimation_processor/] [trunk/] [src_me/] [me_control_unit.vhd] - Rev 2

Compare with Previous | Blame | View Log

----------------------------------------------------------------------------
--  This file is a part of the LM VHDL IP LIBRARY
--  Copyright (C) 2009 Jose Nunez-Yanez
--
--  This program is free software; you can redistribute it and/or modify
--  it under the terms of the GNU General Public License as published by
--  the Free Software Foundation; either version 2 of the License, or
--  (at your option) any later version.
--
--  See the file COPYING for the full details of the license.
--
--  The license allows free and unlimited use of the library and tools for research and education purposes. 
--  The full LM core supports many more advanced motion estimation features and it is available under a 
--  low-cost commercial license. See the readme file to learn more or contact us at 
--  eejlny@byacom.co.uk or www.byacom.co.uk
--------------------------------------
--  entity       = me_control_unit  --
--  version      = 1.0              --
--  last update  = 20/07/09         --
--  author       = Jose Nunez       --
--------------------------------------
 
 
-- main control unit for the motion estimation process
 
library IEEE;
use IEEE.std_logic_1164.all;
use IEEE.std_logic_unsigned."+";
use IEEE.std_logic_unsigned."-";
use IEEE.std_logic_unsigned."=";
use IEEE.std_logic_unsigned.">";
use IEEE.std_logic_unsigned."<";
use IEEE.std_logic_unsigned.">=";
use IEEE.Numeric_STD.all;
use work.config.all;
 
 
entity me_control_unit is
generic ( integer_pipeline_count : integer := 1);
 port ( clk : in std_logic;
        clear : in std_logic;
        reset : in std_logic;
        start : in std_logic;
        range_ok : in std_logic; --keep track of the mv range
	  best_sad_in : in std_logic_vector(15 downto 0); -- to make SAD-based decisions
	  mv_length_in : in std_logic_vector(15 downto 0); -- to make LENGTH-based decisions
        mode_in : in mode_type;
 	     qp_on : in std_logic; -- qp on
 	  mvc_done : in std_logic; -- all motion vector candidates evaluated
	  mvc_to_do : in std_logic_vector(3 downto 0);
        partition_count_out : out std_logic_vector(3 downto 0); --identify the partition active
	  start_pipelines : out std_logic_vector((integer_pipeline_count-1) downto 0);
	  active_pipelines : out std_logic_vector((integer_pipeline_count-1) downto 0); -- so sad selector ignores the non active ones
        shift_concatenate_valid : in std_logic; -- valid output from the concantenate unit 
        interpolation_done : in std_logic; -- interpolation completes
        interpolate_data_request : in std_logic; -- interpolator requests data
        instruction_address : out std_logic_vector(7 downto 0); -- address to fetch next instruction
	  instruction_opcode : in std_logic_vector(3 downto 0); -- opcode
        point_count : in std_logic_vector(7 downto 0); -- how many points to test
        point_address : in std_logic_vector(7 downto 0); -- which is the first point to test
        calculate_sad_done : in std_logic; -- signals when the distance engine has finished
	  distance_engine_active : in std_logic; -- signals when distance engine is not running
        next_point : out std_logic_vector(7 downto 0); -- next point address to ROM
        line_offset : out std_logic_vector(5 downto 0); -- multiple line reading
	  enable_concatenate_unit : out std_logic;
	    -- enable_dist_engine : out std_logic;
        write_register : out std_logic;
	   best_eu : in std_logic_vector(3 downto 0); -- best execution unit
        load_mv : out std_logic;
        update : out std_logic;
	  instruction_zero : out std_logic; -- program completes after hitting instruction zero points
        all_done : out std_logic; -- fp part completes or program completes
	  partition_done : out std_logic;
        qpel_loc_x : in std_logic_vector(1 downto 0); -- detect qp mode
        qpel_loc_y : in std_logic_vector(1 downto 0);
        start_qp : out std_logic;
        enable_hp_inter : out std_logic; -- start the interpolation core
       --  write_block1 : out std_logic;
      --  next_rm_address_ready : in std_logic;
        next_rm_addresss : in std_logic_vector(13 downto 0); --physical address for reference (macroblock upper left corner 
        rm_address : out std_logic_vector(13 downto 0) -- reference memory read from address
	  --cm_address : out std_logic_vector(4 downto 0);  -- address to extract 4x4 blocks from current macroblock
	--  rma_address : out std_logic_vector(4 downto 0); -- reference macroblock write to address
  --   rma_we : out std_logic
      );
end;  
 
architecture struct of me_control_unit is
 
 
 
type state_type is (terminate,idle,fetch_instruction,access_point_memory,next_phy_address_ready,update_mv,wait_for_distance_engine,wait_for_distance_engine2,enable_concatenate_2,enable_concatenate,wait_for_phy_address,wait_for_phy_address2); -- me control unit states
 
type state_register_type is record
 
	state : state_type;
	partition_count : std_logic_vector(3 downto 0); -- to keep track of the sub-partitions
	best_eu : std_logic_vector(3 downto 0); -- for jump conditions
	  qp_mode : std_logic; -- processing a fraction instruction
      invalidate : std_logic; -- flag to invalidate instructions that follow jumps
	   enable_concatenate_unit : std_logic;
	   data_block : std_logic_vector(1 downto 0); --9 data blocks of 8 rows and 8 pixels for interpolation	 
	   enable_hp_inter : std_logic;
      instruction_address : std_logic_vector(7 downto 0); --counter keeps track of next instruction to execute
      instruction_opcode : std_logic_vector(3 downto 0);
	point_count : std_logic_vector(7 downto 0);  --register stores the number of points to test
	point_address : std_logic_vector(7 downto 0); -- register stores the first address of the points to test	
      points_tested : std_logic_vector(7 downto 0); -- counter that keeps track of the number of points tested
      phy_address : std_logic_vector(13 downto 0); -- address for the reference memory
      phy_address_ready : std_logic_vector(13 downto 0); -- stores a ready copy of the next reference memory address
      --rma_address : std_logic_vector(4 downto 0); -- address for the reference macroblock memory
      line_offset : std_logic_vector(5 downto 0); -- reading of multiple lines control
      line_count : std_logic_vector(4 downto 0); -- counter to keep track of the number of lines loaded in reference macroblock
      interpolation_done : std_logic; -- remember when interpolation has been done so not to do it again
	active_pipelines_r : std_logic_vector((integer_pipeline_count-1) downto 0); 
	condition_bit : std_logic; -- condition bit for jump instructions
  --    calculate_sad_done : std_logic;
      --write_block1 : std_logic; -- flag
end record;
 
signal r, r_in: state_register_type; -- state register
signal finish : std_logic;
 
 
begin
 
 
--rma_address <= (others => '0'); -- reference macroblock write to address
--cm_address <= (others => '0');  -- address to extract 4x4 blocks from current macroblock
all_done <= finish;
 
control: process(mvc_done,mvc_to_do,range_ok,instruction_opcode,best_eu,mode_in,r,qpel_loc_x,qpel_loc_y,interpolation_done,interpolate_data_request,start,next_rm_addresss,shift_concatenate_valid,calculate_sad_done,point_address,point_count)
 
variable v : state_register_type;
variable vfinish,vstart,vrma_we,vload_mv,vupdate,vstart_qp,vpartition_done,vinstruction_zero : std_logic; 
variable vwrite_register,venable_dist_engine : std_logic;
variable vrm_address : std_logic_vector(13 downto 0); -- generate rm address as soon as possible
variable vstart_pipelines : std_logic_vector((integer_pipeline_count-1) downto 0);
 
begin
 
--v.calculate_sad_done := calculate_sad_done;
vstart_qp := '0';
vpartition_done := '0';
vfinish := '0';
vinstruction_zero := '0'; --instruction zero points hit
v.qp_mode := r.qp_mode;
v.partition_count := r.partition_count;
v.data_block := r.data_block;
v.invalidate := r.invalidate;
v.best_eu := r.best_eu;
v.points_tested := r.points_tested;
v.instruction_address := r.instruction_address;
v.phy_address_ready := next_rm_addresss;
v.line_offset := r.line_offset;
v.point_count := r.point_count;
v.point_address := r.point_address;
v.phy_address := r.phy_address;
v.line_count := r.line_count;
v.enable_concatenate_unit := r.enable_concatenate_unit;
v.enable_hp_inter := r.enable_hp_inter;
v.instruction_opcode := (others =>'0');
--vnext_rm_address_ready := next_rm_address_ready;
v.state := r.state;
v.interpolation_done := r.interpolation_done;
vstart := start;
vload_mv := '0';
vrma_we := '0';
vupdate := '0';
vwrite_register := '0';
venable_dist_engine := '0';
vrm_address := r.phy_address;
vstart_pipelines :=  (others => '0'); -- all pipelines disable
v.active_pipelines_r := r.active_pipelines_r;
v.condition_bit := r.condition_bit;
 
-- std_logic_vector(to_unsigned(,integer_pipeline_count));
 
 
 
case v.state is
 
	when idle =>  -- first state, waiting for command register bit 31 to go high
		if (vstart = '1') then
			v.state := fetch_instruction;
			v.interpolation_done := '0';
			--if (mvc_done = '1') then  -- first evaluate all the mvcs
				v.instruction_address := v.instruction_address + x"01";
			--end if;
		end if;
 
	when fetch_instruction => -- execute instructions in the program firmware memory 
		--v.instruction_address := v.instruction_address + x"01";
		v.point_count := point_count;
      v.point_address := point_address;
		v.instruction_opcode := instruction_opcode;
	      case v.instruction_opcode is
			when "0000" => --full pel pattern instruction
				if (v.invalidate = '0') then
      				if (v.point_count = x"00") then -- finish => all instructions executed
						v.state := terminate;
 						v.instruction_address := (others => '0');
						v.condition_bit := '0'; -- reset condition bit
						--vfinish := '1'; -- clear the state when completing program
					else
						v.state := access_point_memory;	
					end if;		
				else
				    v.invalidate := '0'; -- clear flag
				    v.instruction_address := v.instruction_address + x"01";
				end if;
			when "0001" => -- fractional pel pattern instruction
				if CFG_PIPELINE_COUNT_QP = 1 then
					if (v.invalidate = '0') then
    						v.state := access_point_memory;	
				      		v.qp_mode := '1';	
					else
				    		v.invalidate := '0'; -- clear flag
				    		v.instruction_address := v.instruction_address + x"01";
					end if;
				end if;
			when "0010" => -- condional jump instruction (if best_eu == field A in instruction (point count) jumpp to point_address
			    if (v.best_eu = point_count(3 downto 0)) then
			        v.instruction_address := point_address;
			    	  v.invalidate := '1'; -- invalidate the next instruction so it does not execute
			     else
				  v.instruction_address := v.instruction_address + x"01";
			    	  v.invalidate := '0';
			     end if;
			when "0100" => -- conditional jump to label (if condition bit set jump to label)
			     if (v.condition_bit = '1') then
				  v.instruction_address := point_address;
			    	  v.invalidate := '1'; -- invalidate the next instruction so it does not execute
			     else
				  v.instruction_address := v.instruction_address + x"01";
			    	  v.invalidate := '0';
			     end if;
			     v.condition_bit := '0'; -- reset condition bit
			when "0101" => -- compare (if less than set condition bit)
				 if (point_count(7 downto 6) = "00") then -- reg field
				 	if (best_sad_in < point_count(5 downto 0) & point_address(7 downto 0)) then
				  		v.condition_bit := '1'; -- set condition bit
			     	end if;
				 else
				 	if (mv_length_in(14 downto 7) & mv_length_in(6 downto 0)) < (point_count(5 downto 0) & point_address(7 downto 0)) then
				  		v.condition_bit := '1'; -- set condition bit
			     	end if;
				 end if;
			     v.instruction_address := v.instruction_address + x"01";
			when "0110" => -- compare (if greater than set condition bit)
				 if (point_count(7 downto 6) = "00") then -- reg field
			     	if (best_sad_in > point_count(5 downto 0) & point_address(7 downto 0)) then
				  		v.condition_bit := '1'; -- set condition bit
			     	end if;
				 else
				 	if (mv_length_in(14 downto 7) & mv_length_in(6 downto 0)) > (point_count(5 downto 0) & point_address(7 downto 0)) then
				  		v.condition_bit := '1'; -- set condition bit
			     	end if;
				 end if;
			     v.instruction_address := v.instruction_address + x"01";
			when others => null;
			end case;
			-- unconditional jump
			-- conditional jump if condition bit
			-- compare instruction (less than)
	when access_point_memory => 
		v.enable_concatenate_unit := '0';
		 v.state := wait_for_phy_address;
	      if (r.qp_mode = '0') then   --only use slave fp pipelines if not qp mode
		v.active_pipelines_r(0) := '1';
		vstart_pipelines(0) := '1';
		for i in 1 to integer_pipeline_count-1 loop
						if (v.point_count-v.points_tested > i) then 
							vstart_pipelines(i) := '1';
							v.active_pipelines_r(i) := '1';
						else
							vstart_pipelines(i) := '0';
							v.active_pipelines_r(i) := '0';
                  			end if;
		end loop;
		end if;
	when wait_for_phy_address =>-- waiting for translation to finish
	  if (range_ok = '1')then
	  		v.state := wait_for_phy_address2;
			if CFG_PIPELINE_COUNT_QP = 1 then
	    		if (r.qp_mode = '1' and r.interpolation_done = '0') then -- jump if mvx or mvy qp instruction are qp fractional
		    		v.enable_hp_inter := '1';
		    		v.instruction_address := v.instruction_address + x"FF"; -- -1
		     		--v.qp_mode := '1';
		    		-- v.state := inter_wait_for_phy_address2;
		 		end if;
			end if;
	  else     --bypass point calculation if range is not good
			v.point_address := v.point_address + std_logic_vector(to_unsigned(integer_pipeline_count,8));  -- next point memory position
   	            v.points_tested := v.points_tested + std_logic_vector(to_unsigned(integer_pipeline_count,8)); -- new point ajusted depending of the number of integer pipelines
   	            if (v.points_tested >= r.point_count) then -- current instruction completes
   	                   v.state := wait_for_distance_engine2;
   	                   v.points_tested := (others => '0');
   	            else
   	                   v.state := access_point_memory;  
   	            end if; 
	  end if;
	when wait_for_phy_address2 => -- two cycles to get phy address
		 v.phy_address := next_rm_addresss; -- store phy address in register
		 v.line_offset := v.line_offset + "000001"; -- +1 to increase the y component
		 v.state := next_phy_address_ready;
		-- v.point_address := v.point_address + std_logic_vector(to_unsigned(integer_pipeline_count,8));  -- next 
 
 
 
    when next_phy_address_ready => -- start accessing reference memory
		v.enable_concatenate_unit := '0';
		v.phy_address := (v.phy_address(13 downto 3) + "00000000001") & next_rm_addresss(2 downto 0); -- plus 1 to read again part of the data (simple alignment)
        v.state := enable_concatenate;
 
		--v.point_address := v.point_address + std_logic_vector(to_unsigned(integer_pipeline_count,8));  -- next point memory position
		--vload_mv := '1'; -- load mv candidate in distance engine for future use
   	when enable_concatenate => 
   	        v.phy_address := v.phy_address + "00000000001000";
   	        v.enable_concatenate_unit := '1';
   	       -- if (shift_concatenate_valid = '1') then -- first part of line
   	           v.state := enable_concatenate_2;
   	           v.line_offset := v.line_offset + "000001"; -- +1 to increase the y componenent
			if (r.line_count = "01111") then
 
 
				v.line_offset := (others => '0');
			end if;
			if (r.line_count = "01110") then
				vload_mv := '1'; -- load mv candidate in distance engine for future use
				v.point_address := v.point_address + std_logic_vector(to_unsigned(integer_pipeline_count,8));  -- next 
			end if;
 
   	       -- end if;   
   	   when enable_concatenate_2 =>
		--	if (r.line_offset = "10000") then
		--		v.line_offset := (others => '0'); 
		--	end if;
   	    --    v.phy_address := (v.phy_address(13 downto 3) + "00000000010") & "000";
   	      --  v.enable_concatenate_unit := '1';
   	    --    if (shift_concatenate_valid = '1') then -- second part of line
			v.enable_concatenate_unit := '1';
   	            --v.state := enable_interpolate;
   	            v.line_count := v.line_count + "00001";
   	            --vrm_address := next_rm_addresss(12 downto 3) & "000"; -- set to "000" so the first new access does not interfere with the concatenation of the last bytes in the previous access        
   	            vrm_address := r.phy_address_ready;
   	            v.phy_address := next_rm_addresss + "00000000001000"; -- (plus 1 as well?) store phy address in register
		       --v.phy_address := (v.phy_address(11 downto 3) + "000000001") & next_rm_addresss(2 downto 0)
   	            v.state := enable_concatenate; -- next line  
   	      --  end if; 
	     	 if (r.line_count = "01110") then
				v.line_offset := (others => '0'); 
				--v.point_address := v.point_address + std_logic_vector(to_unsigned(integer_pipeline_count,8));  -- next 
			end if;
   	        if (r.line_count = "01111") then -- all the lines done
   	                 --v.enable_concatenate_unit := '0';
   	                --v.point_address := v.point_address + std_logic_vector(to_unsigned(integer_pipeline_count,8));  -- next point memory position
   	                v.points_tested := v.points_tested + std_logic_vector(to_unsigned(integer_pipeline_count,8)); -- new point ajusted depending of the number of integer pipelines
   	                v.line_count := (others=>'0');
		 	    v.phy_address := next_rm_addresss; -- store phy address in register
		          v.line_offset := v.line_offset + "000001"; -- +1 to increase the y component
	                if (v.points_tested >= r.point_count) then -- current instruction completes
   	                   v.state := wait_for_distance_engine;
   	                   v.points_tested := (others => '0');
				 v.line_offset := (others => '0'); 
				-- v.point_address := point_address; -- start accesing point memory earlier
   	                else
   	                   v.state := next_phy_address_ready; -- access should be ready;  
				 if (r.qp_mode = '0') then   --only use slave fp pipelines if not qp mode
					v.active_pipelines_r(0) := '1';
					vstart_pipelines(0) := '1';
					for i in 1 to integer_pipeline_count-1 loop
						if (v.point_count-v.points_tested > i) then
							vstart_pipelines(i) := '1';
							v.active_pipelines_r(i) := '1';
						else
							vstart_pipelines(i) := '0';
							v.active_pipelines_r(i) := '0';
                			end if;
					end loop;
				end if;
   	                end if;
   	              --  v.line_offset := (others => '0'); 
   	        end if;   
   	 when   wait_for_distance_engine=>
		v.enable_concatenate_unit := '0';
		if (calculate_sad_done= '1') then
	           v.state := update_mv;
		end if;  
	when wait_for_distance_engine2 =>
	     v.enable_concatenate_unit := '0';
	     if (distance_engine_active = '0') then
	          v.state := update_mv;
	     end if;
	when update_mv =>
		vupdate := '1';
	      v.best_eu := best_eu; --update the condition bit
		v.state := fetch_instruction;
		--if (mvc_done = '1') then  -- first evaluate all the mvcs
			v.instruction_address := v.instruction_address + x"01"; -- start reading the next instruction when arriving in fetch instruction state
  		--end if;
        when terminate =>
	   if (qp_on = '0') then -- qp must have finished
		    --  if (mode_in = m16x16) then
				vfinish := '1';
				vinstruction_zero := '1';
		      	v.state := idle;
		      	vpartition_done := '1';			
		end if;
	      --v.state := idle;
	  when others => null;
 
end case;
 
if (interpolation_done = '1') then
	v.interpolation_done := '1';
end if;
 
r_in <= v;
partition_count_out <= r.partition_count;
write_register <= vwrite_register;
load_mv <= vload_mv;
update <= vupdate;
rm_address <= vrm_address;
finish <= vfinish;
start_qp <= vstart_qp;
start_pipelines <= vstart_pipelines; --enable extra pipelines as required
partition_done <= vpartition_done;
instruction_zero <= vinstruction_zero;
 
end process control;
 
instruction_address <= r.instruction_address;
next_point <= r.point_address;
line_offset <= r.line_offset;
enable_hp_inter <= r.enable_hp_inter;
enable_concatenate_unit <= r_in.enable_concatenate_unit when r.enable_hp_inter = '0' else r.enable_concatenate_unit;
active_pipelines <= r.active_pipelines_r;
 
 
-- sequential part
 
regs: process (clk,clear)
 
begin
 
if (clear = '1') then
	r.partition_count <= (others => '0');
	r.state <= idle;
	r.qp_mode <= '0';
   r.instruction_address <= (others => '0');
   r.point_count <= (others => '0');
	r.point_address <= (others => '0');
	r.phy_address <= (others => '0');
	r.phy_address_ready <= (others => '0');
	r.line_count <= (others => '0');
   r.line_offset <= (others => '0');
   r.points_tested <= (others => '0');
	r.enable_hp_inter <= '0';
	r.data_block <= (others => '0');
	r.interpolation_done <= '0';
	r.invalidate <= '0';
	r.enable_concatenate_unit <= '0';
	 r.best_eu <= (others => '0');
	r.active_pipelines_r <= (others => '0');
	r.condition_bit <= '0';
  -- r.calculate_sad_done <= '0';
elsif rising_edge(clk) then
	if (reset = '1') then
	r.partition_count <= (others => '0');
		r.state <= idle;
		r.invalidate <= '0';
			r.qp_mode <= '0';
		r.instruction_address <= (others => '0');
   	  r.point_count <= (others => '0');
		r.point_address <= (others => '0');
	   r.phy_address <= (others => '0');
	  r.best_eu <= (others => '0');
	   r.phy_address_ready <= (others => '0');
	   r.line_count <= (others => '0');
	   r.line_offset <= (others => '0');
	   r.points_tested <= (others => '0');
	   r.enable_hp_inter <= '0';
	   r.data_block <= (others => '0');
	   r.interpolation_done <= '0';
	   r.enable_concatenate_unit <= '0';
	r.active_pipelines_r <= (others => '0');
	r.condition_bit <= '0';
	--   r.calculate_sad_done <= '0';
	else
		r <= r_in;
	end if;
end if;
 
end process regs;
 
end;
 
 
 
 
 
 
 
 
 
 

Compare with Previous | Blame | View Log

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.