OpenCores
URL https://opencores.org/ocsvn/bluespec-h264/bluespec-h264/trunk

Subversion Repositories bluespec-h264

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /
    from Rev 1 to Rev 2
    Reverse comparison

Rev 1 → Rev 2

/trunk/build/Makefile
0,0 → 1,106
#=======================================================================
# 6.375 Makefile for bsc-compile
#-----------------------------------------------------------------------
# $Id: Makefile,v 1.1 2008-06-26 17:46:03 jamey.hicks Exp $
#
 
default : all
 
basedir = ../
 
#--------------------------------------------------------------------
# Sources
#--------------------------------------------------------------------
 
# Library components
 
bsvclibdir = $(MIT6375_HOME)/install/bsvclib
bsvclibsrcs = \
 
# Bluespec sources
 
toplevel_module = mkTH
 
srcdir = $(basedir)/src
bsvsrcs = \
$(srcdir)/BRAM.bsv \
$(srcdir)/H264Types.bsv \
$(srcdir)/ExpGolomb.bsv \
$(srcdir)/CAVLC.bsv \
$(srcdir)/IH264.bsv \
$(srcdir)/IInputGen.bsv \
$(srcdir)/INalUnwrap.bsv \
$(srcdir)/IEntropyDec.bsv \
$(srcdir)/ICalc_nC.bsv \
$(srcdir)/IMemED.bsv \
$(srcdir)/IInverseTrans.bsv \
$(srcdir)/IPrediction.bsv \
$(srcdir)/IInterpolator.bsv \
$(srcdir)/IDeblockFilter.bsv \
$(srcdir)/IBufferControl.bsv \
$(srcdir)/IFrameBuffer.bsv \
$(srcdir)/IFinalOutput.bsv \
$(srcdir)/mkH264.bsv \
$(srcdir)/mkInputGen.bsv \
$(srcdir)/mkNalUnwrap.bsv \
$(srcdir)/mkEntropyDec.bsv \
$(srcdir)/mkCalc_nC.bsv \
$(srcdir)/mkMemED.bsv \
$(srcdir)/mkInverseTrans.bsv \
$(srcdir)/mkPrediction.bsv \
$(srcdir)/mkInterpolator.bsv \
$(srcdir)/mkDeblockFilter.bsv \
$(srcdir)/mkBufferControl.bsv \
$(srcdir)/mkFrameBuffer.bsv \
$(srcdir)/mkFinalOutput.bsv \
$(srcdir)/mkTH.bsv \
 
#--------------------------------------------------------------------
# Build rules
#--------------------------------------------------------------------
 
BSC_COMP = bsc
#BSC_OPTS = -u -show-module-use -verilog -keep-fires -aggressive-conditions \
# -relax-method-earliness -relax-method-urgency -v
 
BSC_OPTS = -u -v -verilog -aggressive-conditions
 
# Copy over the bluespec source
 
$(notdir $(bsvsrcs)) : % : $(srcdir)/%
cp $< .
 
$(notdir $(bsvclibsrcs)) : % : $(bsvclibdir)/%
cp $< .
 
# Run the bluespec compiler
 
bsv_TH_vsrc = $(toplevel_module).v
$(bsv_TH_vsrc) $(bsv_lib_use) : $(notdir $(bsvsrcs) $(bsvclibsrcs))
$(BSC_COMP) $(BSC_OPTS) -g $(toplevel_module) $(toplevel_module).bsv > out.log
 
compile : $(toplevel_module).v
 
# Create a schedule file
 
schedule_rpt = schedule.rpt
$(schedule_rpt) : $(notdir $(bsvsrcs) $(bsvclibsrcs))
rm -rf *.v
$(BSC_COMP) $(BSC_OPTS) -show-schedule -show-rule-rel \* \* -g $(toplevel_module) \
$(toplevel_module).bsv >& $(schedule_rpt)
 
junk += $(notdir $(bsvsrcs)) $(notdir $(bsvclibsrcs)) \
$(schedule_rpt) *.use *.bi *.bo *.v bsc.log
 
#--------------------------------------------------------------------
# Default make target
#--------------------------------------------------------------------
 
all : compile
 
#--------------------------------------------------------------------
# Clean up
#--------------------------------------------------------------------
 
clean :
rm -rf $(junk) *~ \#*
/trunk/src/mkInputGen_akiyo224nodeblock.bsv
0,0 → 1,41
//**********************************************************************
// Input Generator implementation
//----------------------------------------------------------------------
//
//
 
package mkInputGen;
 
import H264Types::*;
import IInputGen::*;
import RegFile::*;
import FIFO::*;
 
import Connectable::*;
import GetPut::*;
 
 
module mkInputGen( IInputGen );
 
RegFile#(Bit#(27), Bit#(8)) rfile <- mkRegFileLoad("akiyo224x176_1-300_no_deblock.hex", 0, 130234);
FIFO#(InputGenOT) outfifo <- mkFIFO;
Reg#(Bit#(27)) index <- mkReg(0);
 
rule output_byte (index < 130235);
//$display( "ccl0inputbyte %x", rfile.sub(index) );
outfifo.enq(DataByte rfile.sub(index));
index <= index+1;
endrule
 
rule end_of_file (index == 130235);
//$finish(0);
outfifo.enq(EndOfFile);
endrule
interface Get ioout = fifoToGet(outfifo);
endmodule
 
 
endpackage
/trunk/src/mkInputGen_intersquid.bsv
0,0 → 1,41
//**********************************************************************
// Input Generator implementation
//----------------------------------------------------------------------
//
//
 
package mkInputGen;
 
import H264Types::*;
import IInputGen::*;
import RegFile::*;
import FIFO::*;
 
import Connectable::*;
import GetPut::*;
 
 
module mkInputGen( IInputGen );
 
RegFile#(Bit#(27), Bit#(8)) rfile <- mkRegFileLoad("inter_squid.hex", 0, 4376240);
FIFO#(InputGenOT) outfifo <- mkFIFO;
Reg#(Bit#(27)) index <- mkReg(0);
 
rule output_byte (index < 4376241);
//$display( "ccl0inputbyte %x", rfile.sub(index) );
outfifo.enq(DataByte rfile.sub(index));
index <= index+1;
endrule
 
rule end_of_file (index == 4376241);
//$finish(0);
outfifo.enq(EndOfFile);
endrule
interface Get ioout = fifoToGet(outfifo);
endmodule
 
 
endpackage
/trunk/src/mkPrediction.bsv
0,0 → 1,2189
//**********************************************************************
// Prediction
//----------------------------------------------------------------------
//
//
 
package mkPrediction;
 
import H264Types::*;
 
import IPrediction::*;
import IInterpolator::*;
import mkInterpolator::*;
import FIFO::*;
import FIFOF::*;
import Vector::*;
 
import Connectable::*;
import GetPut::*;
import ClientServer::*;
 
 
//-----------------------------------------------------------
// Local Datatypes
//-----------------------------------------------------------
 
typedef union tagged
{
void Intra; //Intra non-4x4
void Intra4x4;
void Inter;
}
OutState deriving(Eq,Bits);
 
typedef union tagged
{
void Start; //not working on anything in particular
void Intra16x16;
void Intra4x4;
void IntraPCM;
}
IntraState deriving(Eq,Bits);
 
typedef union tagged
{
void Start; //not working on anything in particular
void InterP16x16;
void InterP16x8;
void InterP8x16;
void InterP8x8;
void InterP8x8ref0;
void InterPskip;
}
InterState deriving(Eq,Bits);
 
typedef union tagged
{
Bit#(1) NotInter;//0 for not available, 1 for intra-coded
struct {Bit#(4) refIdx; Bit#(14) mvhor; Bit#(12) mvver; Bit#(1) nonZeroTransCoeff;} BlockMv;
}
InterBlockMv deriving(Eq,Bits);
 
typedef union tagged
{
void SkipMB;
void NonSkipMB;
void Intra4x4;
void Intra4x4PlusChroma;
}
NextOutput deriving(Eq,Bits);
 
 
//-----------------------------------------------------------
// Helper functions
 
function Bit#(8) intra4x4SelectTop( Bit#(72) valVector, Bit#(4) idx );
case(idx)
0: return valVector[15:8];
1: return valVector[23:16];
2: return valVector[31:24];
3: return valVector[39:32];
4: return valVector[47:40];
5: return valVector[55:48];
6: return valVector[63:56];
7: return valVector[71:64];
default: return valVector[7:0];
endcase
endfunction
 
function Bit#(8) intra4x4SelectLeft( Bit#(40) valVector, Bit#(3) idx );
case(idx)
0: return valVector[15:8];
1: return valVector[23:16];
2: return valVector[31:24];
3: return valVector[39:32];
default: return valVector[7:0];
endcase
endfunction
 
function Bit#(8) select32to8( Bit#(32) valVector, Bit#(2) idx );
case(idx)
0: return valVector[7:0];
1: return valVector[15:8];
2: return valVector[23:16];
3: return valVector[31:24];
endcase
endfunction
 
function Bit#(8) select16to8( Bit#(16) valVector, Bit#(1) idx );
case(idx)
0: return valVector[7:0];
1: return valVector[15:8];
endcase
endfunction
 
function Bool absDiffGEFour14( Bit#(14) val1, Bit#(14) val2 );
Int#(15) int1 = unpack(signExtend(val1));
Int#(15) int2 = unpack(signExtend(val2));
if(int1>=int2)
return (int1 >= (int2+4));
else
return (int2 >= (int1+4));
endfunction
 
function Bool absDiffGEFour12( Bit#(12) val1, Bit#(12) val2 );
Int#(13) int1 = unpack(signExtend(val1));
Int#(13) int2 = unpack(signExtend(val2));
if(int1>=int2)
return (int1 >= (int2+4));
else
return (int2 >= (int1+4));
endfunction
 
 
//-----------------------------------------------------------
// Prediction Module
//-----------------------------------------------------------
 
 
(* synthesize *)
module mkPrediction( IPrediction );
 
//Common state
FIFO#(EntropyDecOT) infifo <- mkSizedFIFO(prediction_infifo_size);
FIFO#(InverseTransOT) infifo_ITB <- mkSizedFIFO(prediction_infifo_ITB_size);
FIFO#(EntropyDecOT) outfifo <- mkFIFO;
Reg#(Bool) passFlag <- mkReg(True);
Reg#(Bit#(4)) blockNum <- mkReg(0);
Reg#(Bit#(4)) pixelNum <- mkReg(0);
 
Reg#(Bit#(PicWidthSz)) picWidth <- mkReg(maxPicWidthInMB);
Reg#(Bit#(PicHeightSz)) picHeight <- mkReg(0);
Reg#(Bit#(PicAreaSz)) firstMb <- mkReg(0);
Reg#(Bit#(PicAreaSz)) currMb <- mkReg(0);
Reg#(Bit#(PicAreaSz)) currMbHor <- mkReg(0);//horizontal position of currMb
Reg#(Bit#(PicHeightSz)) currMbVer <- mkReg(0);//vertical position of currMb
 
FIFOF#(OutState) outstatefifo <- mkFIFOF;
FIFOF#(NextOutput) nextoutputfifo <- mkFIFOF;
Reg#(Bit#(4)) outBlockNum <- mkReg(0);
Reg#(Bit#(4)) outPixelNum <- mkReg(0);
FIFO#(Vector#(4,Bit#(8))) predictedfifo <- mkSizedFIFO(prediction_predictedfifo_size);
Reg#(Bit#(1)) outChromaFlag <- mkReg(0);
Reg#(Bool) outFirstQPFlag <- mkReg(False);
 
DoNotFire donotfire <- mkDoNotFire();
//Reg#(Vector#(16,Bit#(8))) workVector <- mkRegU();
//Inter state
Interpolator interpolator <- mkInterpolator();
Reg#(InterState) interstate <- mkReg(Start);
Reg#(Bit#(PicAreaSz)) interPskipCount <- mkReg(0);
Reg#(Vector#(5,InterBlockMv)) interTopVal <- mkRegU();
Reg#(Vector#(4,InterBlockMv)) interLeftVal <- mkRegU();
Reg#(Vector#(4,InterBlockMv)) interTopLeftVal <- mkRegU();
FIFO#(MemReq#(TAdd#(PicWidthSz,2),32)) interMemReqQ <- mkFIFO;
Reg#(MemReq#(TAdd#(PicWidthSz,2),32)) interMemReqQdelay <- mkRegU();
FIFO#(MemResp#(32)) interMemRespQ <- mkFIFO;
Reg#(Bit#(3)) interReqCount <- mkReg(0);
Reg#(Bit#(3)) interRespCount <- mkReg(0);
 
Reg#(Bit#(1)) interStepCount <- mkReg(0);
Reg#(Bit#(2)) interMbPartNum <- mkReg(0);
Reg#(Bit#(2)) interSubMbPartNum <- mkReg(0);
Reg#(Bit#(2)) interPassingCount <- mkReg(0);
Reg#(Vector#(4,Bit#(4))) interRefIdxVector <- mkRegU();
Reg#(Vector#(4,Bit#(2))) interSubMbTypeVector <- mkRegU();
RFile1#(Bit#(4),Tuple2#(Bit#(14),Bit#(12))) interMvFile <- mkRFile1Full();
Reg#(Bit#(15)) interMvDiffTemp <- mkReg(0);
FIFO#(Tuple2#(Bit#(15),Bit#(13))) interMvDiff <- mkFIFO;
Reg#(Bit#(5)) interNewestMv <- mkReg(0);
Reg#(Bit#(2)) interIPStepCount <- mkReg(0);
Reg#(Bit#(2)) interIPMbPartNum <- mkReg(0);
Reg#(Bit#(2)) interIPSubMbPartNum <- mkReg(0);
 
Reg#(Bit#(PicWidthSz)) interCurrMbDiff <- mkReg(0);
 
Reg#(Vector#(4,Bool)) interTopNonZeroTransCoeff <- mkRegU();
Reg#(Vector#(4,Bool)) interLeftNonZeroTransCoeff <- mkRegU();
FIFO#(Tuple2#(Bit#(2),Bit#(2))) interBSfifo <- mkSizedFIFO(32);
Reg#(Bool) interBSoutput <- mkReg(True);
FIFO#(InterBlockMv) interOutBlockMvfifo <- mkSizedFIFO(8);
//Intra state
Reg#(IntraState) intrastate <- mkReg(Start);
Reg#(Bit#(1)) intraChromaFlag <- mkReg(0);
FIFO#(MemReq#(TAdd#(PicWidthSz,2),68)) intraMemReqQ <- mkFIFO;
Reg#(MemReq#(TAdd#(PicWidthSz,2),68)) intraMemReqQdelay <- mkRegU;
FIFO#(MemResp#(68)) intraMemRespQ <- mkFIFO;
Reg#(Vector#(4,Bit#(4))) intra4x4typeLeft <- mkRegU();//15=unavailable, 14=inter-MB, 13=intra-non-4x4
Reg#(Vector#(4,Bit#(4))) intra4x4typeTop <- mkRegU();//15=unavailable, 14=inter-MB, 13=intra-non-4x4
Reg#(Bit#(1)) ppsconstrained_intra_pred_flag <- mkReg(0);
Reg#(Vector#(4,Bit#(40))) intraLeftVal <- mkRegU();
Reg#(Vector#(9,Bit#(8))) intraLeftValChroma0 <- mkRegU();
Reg#(Vector#(9,Bit#(8))) intraLeftValChroma1 <- mkRegU();
Reg#(Vector#(5,Bit#(32))) intraTopVal <- mkRegU();
Reg#(Vector#(4,Bit#(16))) intraTopValChroma0 <- mkRegU();
Reg#(Vector#(4,Bit#(16))) intraTopValChroma1 <- mkRegU();
Reg#(Bit#(32)) intraLeftValNext <- mkReg(0);
Reg#(Bit#(2)) intra16x16_pred_mode <- mkReg(0);
FIFO#(Bit#(4)) rem_intra4x4_pred_mode <- mkSizedFIFO(16);
FIFO#(Bit#(2)) intra_chroma_pred_mode <- mkFIFO;
Reg#(Bit#(4)) cur_intra4x4_pred_mode <- mkReg(0);
Reg#(Bit#(1)) intraChromaTopAvailable <- mkReg(0);
Reg#(Bit#(1)) intraChromaLeftAvailable <- mkReg(0);
 
Reg#(Bit#(3)) intraReqCount <- mkReg(0);
Reg#(Bit#(3)) intraRespCount <- mkReg(0);
Reg#(Bit#(4)) intraStepCount <- mkReg(0);
Reg#(Bit#(13)) intraSumA <- mkReg(0);
Reg#(Bit#(15)) intraSumB <- mkReg(0);
Reg#(Bit#(15)) intraSumC <- mkReg(0);
 
//-----------------------------------------------------------
// Rules
 
//////////////////////////////////////////////////////////////////////////////
// rule stateMonitor ( True );
// if(predictedfifo.notEmpty())
// $display( "TRACE Prediction: stateMonitor predictedfifo.first() %0d", predictedfifo.first());////////////////////
// if(infifo.first() matches tagged ITBresidual .xdata)
// $display( "TRACE Prediction: stateMonitor infifo.first() %0d", xdata);////////////////////
// if(infifo.first() matches tagged ITBresidual .xdata)
// $display( "TRACE Prediction: stateMonitor outBlockNum outPixelNum outChromaFlag %0d %0d", outBlockNum, outPixelNum, outChromaFlag);////////////////////
// endrule
//////////////////////////////////////////////////////////////////////////////
 
 
rule checkFIFO ( True );
$display( "Trace Prediction: checkFIFO %h", infifo_ITB.first() );
endrule
rule passing ( passFlag && !outstatefifo.notEmpty() && currMbHor<zeroExtend(picWidth) );
$display( "Trace Prediction: passing infifo packed %h", pack(infifo.first()));
case (infifo.first()) matches
tagged NewUnit . xdata :
begin
infifo.deq();
outfifo.enq(infifo.first());
$display("ccl4newunit");
$display("ccl4rbspbyte %h", xdata);
end
tagged SPSpic_width_in_mbs .xdata :
begin
infifo.deq();
outfifo.enq(infifo.first());
picWidth <= xdata;
interpolator.setPicWidth(xdata);
end
tagged SPSpic_height_in_map_units .xdata :
begin
infifo.deq();
outfifo.enq(infifo.first());
picHeight <= xdata;
interpolator.setPicHeight(xdata);
end
tagged PPSconstrained_intra_pred_flag .xdata :
begin
infifo.deq();
////outfifo.enq(infifo.first());
ppsconstrained_intra_pred_flag <= xdata;
end
tagged SHfirst_mb_in_slice .xdata :
begin
infifo.deq();
outfifo.enq(infifo.first());
firstMb <= xdata;
currMb <= xdata;
currMbHor <= xdata;
currMbVer <= 0;
intra4x4typeLeft <= replicate(15);
interTopLeftVal <= replicate(NotInter 0);
if(xdata==0)
interLeftVal <= replicate(NotInter 0);
outFirstQPFlag <= True;
end
tagged SDmb_skip_run .xdata : passFlag <= False;
tagged SDMmbtype .xdata : passFlag <= False;
tagged EndOfFile :
begin
infifo.deq();
outfifo.enq(infifo.first());
$display( "INFO Prediction: EndOfFile reached" );
//$finish(0);////////////////////////////////
end
default:
begin
infifo.deq();
outfifo.enq(infifo.first());
end
endcase
endrule
 
 
rule inputing ( !passFlag );
$display( "Trace Prediction: inputing infifo packed %h", pack(infifo.first()));
case (infifo.first()) matches
tagged SDmb_skip_run .xdata :
begin
if(interstate==Start && intrastate==Start)
begin
if(interPskipCount < xdata)
begin
if(!outstatefifo.notEmpty() || interCurrMbDiff<picWidth-1)
begin
$display( "Trace Prediction: passing SDmb_skip_run %0d", xdata);
outstatefifo.enq(Inter);
interstate <= InterPskip;
interReqCount <= 1;
interRespCount <= 1;
intra4x4typeLeft <= replicate(14);
intra4x4typeTop <= replicate(14);
interTopLeftVal <= update(interTopLeftVal , 0, (NotInter 0));
interTopVal <= replicate(NotInter 0);
interPskipCount <= interPskipCount+1;
interNewestMv <= 0;
interRefIdxVector <= replicate(0);
interCurrMbDiff <= interCurrMbDiff+1;
nextoutputfifo.enq(SkipMB);
end
else
donotfire.doNotFire();
end
else
begin
$display( "Trace Prediction: passing no SDmb_skip_run");
interPskipCount <= 0;
infifo.deq();
end
end
else
donotfire.doNotFire();
end
tagged SDMmbtype .xdata :
begin
if(interstate==Start && intrastate==Start)//not necessary (just need to keep inter from feeding predictedfifo or change intra state until intrastate==Start)
begin
infifo.deq();
$display( "INFO Prediction: SDMmbtype %0d", xdata);
if(mbPartPredMode(xdata,0)==Intra_16x16)
begin
if(!outstatefifo.notEmpty())
begin
outstatefifo.enq(Intra);
intrastate <= Intra16x16;
if(xdata matches tagged I_16x16 {intra16x16PredMode:.tempv1, codedBlockPatternChroma:.tempv2, codedBlockPatternLuma:.tempv3})
intra16x16_pred_mode <= tempv1;
else
$display( "ERROR Prediction: MacroblockLayer 5 sdmmbtype not I_16x16" );
intraReqCount <= 1;
intraRespCount <= 1;
interTopLeftVal <= replicate(NotInter 1);
interLeftVal <= replicate(NotInter 1);
interTopVal <= replicate(NotInter 1);
end
else
donotfire.doNotFire();
end
else if(xdata==I_NxN)
begin
if(!outstatefifo.notEmpty())
begin
outstatefifo.enq(Intra4x4);
intrastate <= Intra4x4;
intraReqCount <= 1;
intraRespCount <= 1;
interTopLeftVal <= replicate(NotInter 1);
interLeftVal <= replicate(NotInter 1);
interTopVal <= replicate(NotInter 1);
end
else
donotfire.doNotFire();
end
else if(xdata==I_PCM)
begin
$display( "ERROR Prediction: I_PCM not implemented yet");
$finish;////////////////////////////////////////////////////////////////////////////////////////
intra4x4typeLeft <= replicate(13);
intra4x4typeTop <= replicate(13);
interTopLeftVal <= replicate(NotInter 1);
interLeftVal <= replicate(NotInter 1);
interTopVal <= replicate(NotInter 1);
end
else
begin
if(!outstatefifo.notEmpty() || interCurrMbDiff<picWidth-1)
begin
outstatefifo.enq(Inter);
case(xdata)
P_L0_16x16: interstate <= InterP16x16;
P_L0_L0_16x8: interstate <= InterP16x8;
P_L0_L0_8x16: interstate <= InterP8x16;
P_8x8: interstate <= InterP8x8;
P_8x8ref0: interstate <= InterP8x8ref0;
default: $display( "ERROR Prediction: passing SDMmbtype inter prediction unknown mbtype");
endcase
interReqCount <= 1;
interRespCount <= 1;
intra4x4typeLeft <= replicate(14);/////////////////////////////////////////////////////////////////////////////
intra4x4typeTop <= replicate(14);
interTopLeftVal <= update(interTopLeftVal , 0, (NotInter 0));
interTopVal <= replicate(NotInter 0);
interNewestMv <= 0;
interRefIdxVector <= replicate(0);
nextoutputfifo.enq(NonSkipMB);
end
else
donotfire.doNotFire();
end
interCurrMbDiff <= interCurrMbDiff+1;
end
else
donotfire.doNotFire();
end
tagged SDMMrem_intra4x4_pred_mode .xdata :
begin
infifo.deq();
////outfifo.enq(infifo.first());
rem_intra4x4_pred_mode.enq(xdata);
end
tagged SDMMintra_chroma_pred_mode .xdata :
begin
infifo.deq();
////outfifo.enq(infifo.first());
intra_chroma_pred_mode.enq(xdata);
end
tagged SDMMref_idx_l0 .xdata :
begin
infifo.deq();
////outfifo.enq(infifo.first());
interRefIdxVector <= update(interRefIdxVector,interPassingCount,xdata[3:0]);
if(interstate==InterP16x16 || interPassingCount==1)
interPassingCount <= 0;
else
interPassingCount <= interPassingCount+1;
end
tagged SDMMmvd_l0 .xdata :
begin
infifo.deq();
////outfifo.enq(infifo.first());
if(interPassingCount==1)
begin
Bit#(13) interMvDiffTemp2 = truncate(xdata);
interMvDiff.enq(tuple2(interMvDiffTemp,interMvDiffTemp2));
interPassingCount <= 0;
end
else
begin
interMvDiffTemp <= truncate(xdata);
interPassingCount <= interPassingCount+1;
end
end
tagged SDMSsub_mb_type .xdata :
begin
infifo.deq();
////outfifo.enq(infifo.first());
interSubMbTypeVector <= update(interSubMbTypeVector,interPassingCount,xdata);
interPassingCount <= interPassingCount+1;
end
tagged SDMSref_idx_l0 .xdata :
begin
infifo.deq();
////outfifo.enq(infifo.first());
interRefIdxVector <= update(interRefIdxVector,interPassingCount,xdata[3:0]);
interPassingCount <= interPassingCount+1;
end
tagged SDMSmvd_l0 .xdata :
begin
infifo.deq();
////outfifo.enq(infifo.first());
if(interPassingCount==1)
begin
Bit#(13) interMvDiffTemp2 = truncate(xdata);
interMvDiff.enq(tuple2(interMvDiffTemp,interMvDiffTemp2));
interPassingCount <= 0;
end
else
begin
interMvDiffTemp <= truncate(xdata);
interPassingCount <= interPassingCount+1;
end
end
default: passFlag <= True;
endcase
endrule
 
rule outputing ( currMbHor<zeroExtend(picWidth) );
Bit#(1) outputFlag = 0;
Vector#(4,Bit#(8)) outputVector = replicate(0);
Bit#(2) blockHor = {outBlockNum[2],outBlockNum[0]};
Bit#(2) blockVer = {outBlockNum[3],outBlockNum[1]};
Bit#(2) pixelVer = {outPixelNum[3],outPixelNum[2]};
Bit#(4) totalVer = {blockVer,pixelVer};
//$display( "Trace Prediction: outputing" );
if(outFirstQPFlag)
begin
if(infifo_ITB.first() matches tagged IBTmb_qp .xdata)
begin
infifo_ITB.deq();
outfifo.enq(IBTmb_qp {qpy:xdata.qpy,qpc:xdata.qpc});
outFirstQPFlag <= False;
$display( "Trace Prediction: outputing outFirstQP %h %h %h", outBlockNum, outPixelNum, xdata);
end
else
$display( "ERROR Prediction: outputing unexpected infifo_ITB.first()");
end
else if(nextoutputfifo.first() == SkipMB)
begin
if(interBSoutput && outChromaFlag==0 && outPixelNum==0)
begin
interBSoutput <= False;
interBSfifo.deq();
Bit#(2) tempHorBS = tpl_1(interBSfifo.first());
Bit#(2) tempVerBS = tpl_2(interBSfifo.first());
Bit#(3) horBS = (tempHorBS==3 ? 4 : (interLeftNonZeroTransCoeff[blockVer] ? 2 : zeroExtend(tempHorBS)));
Bit#(3) verBS = (tempVerBS==3 ? 4 : (interTopNonZeroTransCoeff[blockHor]&&blockVer!=0 ? 2 : zeroExtend(tempVerBS)));
outfifo.enq(PBbS {bShor:horBS,bSver:verBS});
interLeftNonZeroTransCoeff <= update(interLeftNonZeroTransCoeff, blockVer, False);
interTopNonZeroTransCoeff <= update(interTopNonZeroTransCoeff, blockHor, False);
$display( "Trace Prediction: outputing SkipMB bS %h %h %h %h", outBlockNum, outPixelNum, currMbHor, currMbVer);
end
else
begin
interBSoutput <= True;
outputVector = predictedfifo.first();
outfifo.enq(PBoutput outputVector);
outputFlag = 1;
predictedfifo.deq();
$display( "Trace Prediction: outputing SkipMB out %h %h %h", outBlockNum, outPixelNum, outputVector);
end
end
else
begin
case ( infifo_ITB.first() ) matches
tagged IBTmb_qp .xdata :
begin
infifo_ITB.deq();
outfifo.enq(IBTmb_qp {qpy:xdata.qpy,qpc:xdata.qpc});
outFirstQPFlag <= False;
$display( "Trace Prediction: outputing ITBmb_qp %h %h %h", outBlockNum, outPixelNum, xdata);
end
tagged ITBresidual .xdata :
begin
if(interBSoutput && outChromaFlag==0 && outPixelNum==0)
begin
interBSoutput <= False;
if(outstatefifo.first() != Inter)
outfifo.enq(PBbS {bShor:(blockHor==0 ? 4 : 3),bSver:(blockVer==0 ? 4 : 3)});
else
begin
interBSfifo.deq();
Bit#(2) tempHorBS = tpl_1(interBSfifo.first());
Bit#(2) tempVerBS = tpl_2(interBSfifo.first());
Bit#(3) horBS = (tempHorBS==3 ? 4 : 2);
Bit#(3) verBS = (tempVerBS==3 ? 4 : 2);
outfifo.enq(PBbS {bShor:horBS,bSver:verBS});
end
interLeftNonZeroTransCoeff <= update(interLeftNonZeroTransCoeff, blockVer, True);
interTopNonZeroTransCoeff <= update(interTopNonZeroTransCoeff, blockHor, True);
$display( "Trace Prediction: outputing ITBresidual bS %h %h %h %h %h", outChromaFlag, outBlockNum, outPixelNum, currMbHor, currMbVer);
end
else
begin
interBSoutput <= True;
Bit#(11) tempOutputValue = 0;
for(Integer ii=0; ii<4; ii=ii+1)
begin
tempOutputValue = signExtend(xdata[ii]) + zeroExtend((predictedfifo.first())[ii]);
if(tempOutputValue[10]==1)
outputVector[ii] = 0;
else if(tempOutputValue[9:0] > 255)
outputVector[ii] = 255;
else
outputVector[ii] = tempOutputValue[7:0];
end
outfifo.enq(PBoutput outputVector);
infifo_ITB.deq();
predictedfifo.deq();
outputFlag = 1;
$display( "Trace Prediction: outputing ITBresidual out %h %h %h %h %h %h", outChromaFlag, outBlockNum, outPixelNum, predictedfifo.first(), xdata, outputVector);
end
end
tagged ITBcoeffLevelZeros :
begin
if(interBSoutput && outChromaFlag==0 && outPixelNum==0)
begin
interBSoutput <= False;
if(outstatefifo.first() != Inter)
outfifo.enq(PBbS {bShor:(blockHor==0 ? 4 : 3),bSver:(blockVer==0 ? 4 : 3)});
else
begin
interBSfifo.deq();
Bit#(2) tempHorBS = tpl_1(interBSfifo.first());
Bit#(2) tempVerBS = tpl_2(interBSfifo.first());
Bit#(3) horBS = (tempHorBS==3 ? 4 : (interLeftNonZeroTransCoeff[blockVer] ? 2 : zeroExtend(tempHorBS)));
Bit#(3) verBS = (tempVerBS==3 ? 4 : (interTopNonZeroTransCoeff[blockHor]&&blockVer!=0 ? 2 : zeroExtend(tempVerBS)));
outfifo.enq(PBbS {bShor:horBS,bSver:verBS});
end
interLeftNonZeroTransCoeff <= update(interLeftNonZeroTransCoeff, blockVer, False);
interTopNonZeroTransCoeff <= update(interTopNonZeroTransCoeff, blockHor, False);
$display( "Trace Prediction: outputing ITBcoeffLevelZeros bS %h %h %h %h %h", outChromaFlag, outBlockNum, outPixelNum, currMbHor, currMbVer);
end
else
begin
interBSoutput <= True;
if(outPixelNum == 12)
infifo_ITB.deq();
outputVector = predictedfifo.first();
outfifo.enq(PBoutput outputVector);
outputFlag = 1;
predictedfifo.deq();
$display( "Trace Prediction: outputing ITBcoeffLevelZeros out %h %h %h %h %h", outChromaFlag, outBlockNum, outPixelNum, predictedfifo.first(), outputVector);
end
end
default: $display( "ERROR Prediction: outputing unknown infifo_ITB input" );
endcase
end
if(outputFlag == 1)
begin
$display("ccl4PBoutput %0d", outputVector[0]);
$display("ccl4PBoutput %0d", outputVector[1]);
$display("ccl4PBoutput %0d", outputVector[2]);
$display("ccl4PBoutput %0d", outputVector[3]);
 
if(outBlockNum==0 && pixelVer==0 && outChromaFlag==0 && currMb!=firstMb && picWidth>1)
begin
intraMemReqQ.enq(intraMemReqQdelay);
interMemReqQ.enq(interMemReqQdelay);
//$display( "TRACE Prediction: passing storing addr data");//////////////////
end
if(blockHor==3 || (blockHor[0]==1 && outChromaFlag==1) || (outstatefifo.first()==Intra4x4 && outChromaFlag==0))
begin
if(outChromaFlag==0)
begin
Bit#(32) intraLeftValNextTemp = intraLeftValNext;
if(totalVer==0 || (outstatefifo.first()==Intra4x4 && pixelVer==0))
begin
Bit#(32) tempValSet = select(intraTopVal,zeroExtend(blockHor));
intraLeftValNextTemp = zeroExtend(tempValSet[31:24]);
end
case(pixelVer)
0:intraLeftValNext <= {intraLeftValNextTemp[31:16],outputVector[3],intraLeftValNextTemp[7:0]};
1:intraLeftValNext <= {intraLeftValNextTemp[31:24],outputVector[3],intraLeftValNextTemp[15:0]};
2:intraLeftValNext <= {outputVector[3],intraLeftValNextTemp[23:0]};
3:
begin
intraLeftVal <= update(intraLeftVal,blockVer,{outputVector[3],intraLeftValNextTemp});
intraLeftValNext <= zeroExtend(outputVector[3]);
if(outstatefifo.first()==Intra4x4)
intra4x4typeLeft <= update(intra4x4typeLeft,blockVer,cur_intra4x4_pred_mode);
else if(outstatefifo.first()==Intra)
intra4x4typeLeft <= update(intra4x4typeLeft,blockVer,13);
else
intra4x4typeLeft <= update(intra4x4typeLeft,blockVer,14);
end
endcase
end
else
begin
if(outBlockNum[2]==0)
intraLeftValChroma0 <= update(intraLeftValChroma0,totalVer+1,outputVector[3]);
else
intraLeftValChroma1 <= update(intraLeftValChroma1,totalVer+1,outputVector[3]);
end
end
if(pixelVer==3 && (blockVer==3 || (blockVer[0]==1 && outChromaFlag==1) || (outstatefifo.first()==Intra4x4 && outChromaFlag==0)))
begin
if(outChromaFlag==0)
begin
intraTopVal <= update(intraTopVal,zeroExtend(blockHor),{outputVector[3],outputVector[2],outputVector[1],outputVector[0]});
if(outstatefifo.first()==Intra4x4)
intra4x4typeTop <= update(intra4x4typeTop,blockHor,cur_intra4x4_pred_mode);
else if(outstatefifo.first()==Intra)
intra4x4typeTop <= update(intra4x4typeTop,blockHor,13);
else
intra4x4typeTop <= update(intra4x4typeTop,blockHor,14);
end
else
begin
if(outBlockNum[2]==0)
begin
Vector#(4,Bit#(16)) intraTopValChroma0Next = intraTopValChroma0;
intraTopValChroma0Next[{blockHor[0],1'b0}] = {outputVector[1],outputVector[0]};
intraTopValChroma0Next[{blockHor[0],1'b1}] = {outputVector[3],outputVector[2]};
intraTopValChroma0 <= intraTopValChroma0Next;
end
else
begin
Vector#(4,Bit#(16)) intraTopValChroma1Next = intraTopValChroma1;
intraTopValChroma1Next[{blockHor[0],1'b0}] = {outputVector[1],outputVector[0]};
intraTopValChroma1Next[{blockHor[0],1'b1}] = {outputVector[3],outputVector[2]};
intraTopValChroma1 <= intraTopValChroma1Next;
end
end
end
 
if(outChromaFlag==1 && outBlockNum==7)
begin
Bit#(PicWidthSz) tempStoreAddr = truncate(currMbHor);
InterBlockMv outBlockMv = interOutBlockMvfifo.first();
if(outBlockMv matches tagged BlockMv .bdata)
begin
outBlockMv = (BlockMv {refIdx:bdata.refIdx,mvhor:bdata.mvhor,mvver:bdata.mvver,nonZeroTransCoeff:(interTopNonZeroTransCoeff[pixelVer]?1:0)});
interOutBlockMvfifo.deq();
end
else if(pixelVer==3)
interOutBlockMvfifo.deq();
if(pixelVer==3 && picWidth>1)
interMemReqQdelay <= StoreReq {addr:{tempStoreAddr,pixelVer},data:pack(outBlockMv)};
else
interMemReqQ.enq(StoreReq {addr:{tempStoreAddr,pixelVer},data:pack(outBlockMv)});
if(pixelVer>0)
begin
Bit#(4) intra4x4typeTopStore = ((outstatefifo.first()==Inter) ? 14 : ((outstatefifo.first()!=Intra4x4) ? 13: intra4x4typeTop[(pixelVer-1)]));
Bit#(32) intraTopValStore = intraTopVal[(pixelVer-1)];
Bit#(16) intraTopValChroma0Store = intraTopValChroma0[(pixelVer-1)];
Bit#(16) intraTopValChroma1Store = (pixelVer<3 ? intraTopValChroma1[(pixelVer-1)] : {outputVector[1],outputVector[0]});
Bit#(68) intraStore = {intra4x4typeTopStore,intraTopValChroma1Store,intraTopValChroma0Store,intraTopValStore};
intraMemReqQ.enq(StoreReq {addr:{tempStoreAddr,(pixelVer-1)},data:intraStore});
if(pixelVer==3)
begin
intra4x4typeTopStore = ((outstatefifo.first()==Inter) ? 14 : ((outstatefifo.first()!=Intra4x4) ? 13: intra4x4typeTop[3]));
intraTopValStore = intraTopVal[3];
intraTopValChroma0Store = intraTopValChroma0[3];
intraTopValChroma1Store = {outputVector[3],outputVector[2]};
intraStore = {intra4x4typeTopStore,intraTopValChroma1Store,intraTopValChroma0Store,intraTopValStore};
intraMemReqQdelay <= StoreReq {addr:{tempStoreAddr,2'b11},data:intraStore};
end
end
end
outPixelNum <= outPixelNum+4;
if(outPixelNum == 12)
begin
if(outChromaFlag==0)
begin
outBlockNum <= outBlockNum+1;
if(outBlockNum == 15)
outChromaFlag <= 1;
if(nextoutputfifo.first() == Intra4x4)
nextoutputfifo.deq();
end
else
begin
if(outBlockNum == 7)
begin
outBlockNum <= 0;
outChromaFlag <= 0;
currMb <= currMb+1;
currMbHor <= currMbHor+1;
interCurrMbDiff <= interCurrMbDiff-1;
outstatefifo.deq;
intrastate <= Start;
if(truncate(currMbHor)==picWidth-1 && currMbVer==picHeight-1)
interpolator.endOfFrame();
nextoutputfifo.deq();
end
else
outBlockNum <= outBlockNum+1;
end
end
end
endrule
 
 
rule currMbHorUpdate( !(currMbHor<zeroExtend(picWidth)) );
Bit#(PicAreaSz) temp = zeroExtend(picWidth);
if((currMbHor >> 3) >= temp)
begin
currMbHor <= currMbHor - (temp << 3);
currMbVer <= currMbVer + 8;
end
else
begin
currMbHor <= currMbHor - temp;
currMbVer <= currMbVer + 1;
end
//$display( "Trace Prediction: currMbHorUpdate %h %h", currMbHor, currMbVer);
endrule
 
 
// inter prediction rules
 
rule interSendReq ( interReqCount>0 && currMbHor<zeroExtend(picWidth) );
Bit#(PicAreaSz) currMbHorTemp = currMbHor+zeroExtend(interCurrMbDiff)-1;
Bit#(PicAreaSz) currMbTemp = currMb+zeroExtend(interCurrMbDiff)-1;
if( currMbHorTemp >= zeroExtend(picWidth) )
currMbHorTemp = currMbHorTemp-zeroExtend(picWidth);
Bit#(PicWidthSz) temp2 = truncate(currMbHorTemp);
Bit#(TAdd#(PicWidthSz,2)) temp = 0;
Bool noMoreReq = False;
if( currMbTemp < zeroExtend(picWidth) )
noMoreReq = True;
else
begin
if(interReqCount<5)
begin
Bit#(2) temp3 = truncate(interReqCount-1);
temp = {temp2,temp3};
end
else if(interReqCount==5)
begin
if((currMbHorTemp+1)<zeroExtend(picWidth))
temp = {(temp2+1),2'b00};
else if(currMbHorTemp>0 && currMbTemp-firstMb>zeroExtend(picWidth))
temp = {(temp2-1),2'b11};
else
noMoreReq = True;
end
else if(interReqCount==6)
begin
if((currMbHorTemp+1)<zeroExtend(picWidth) && currMbHorTemp>0 && currMbTemp-firstMb>zeroExtend(picWidth))
temp = {(temp2-1),2'b11};
else
noMoreReq = True;
end
else
noMoreReq = True;
end
if(!noMoreReq)
begin
interMemReqQ.enq(LoadReq temp);
interReqCount <= interReqCount+1;
//$display( "TRACE Prediction: interSendReq addr %0d",temp);///////////////////////
end
else
interReqCount <= 0;
$display( "Trace Prediction: interSendReq %h %h %h", interstate, interReqCount, temp);
endrule
 
 
rule interReceiveNoResp ( interRespCount>0 && currMbHor<zeroExtend(picWidth) && currMb+zeroExtend(interCurrMbDiff)-1<zeroExtend(picWidth) );
Bit#(PicAreaSz) currMbHorTemp = currMbHor+zeroExtend(interCurrMbDiff)-1;
if( currMbHorTemp >= zeroExtend(picWidth) )
currMbHorTemp = currMbHorTemp-zeroExtend(picWidth);
interRespCount <= 0;
interStepCount <= 1;
interIPStepCount <= 1;
if(currMbHorTemp == 0)
begin
interLeftVal <= replicate(NotInter 0);
interTopLeftVal <= replicate(NotInter 0);
end
$display( "Trace Prediction: interReceiveNoResp %h %h", interstate, interRespCount);
endrule
 
rule interReceiveResp ( interRespCount>0 && interRespCount<7 && currMbHor<zeroExtend(picWidth) &&& interMemRespQ.first() matches tagged LoadResp .data);
Bit#(PicAreaSz) currMbHorTemp = currMbHor+zeroExtend(interCurrMbDiff)-1;
Bit#(PicAreaSz) currMbTemp = currMb+zeroExtend(interCurrMbDiff)-1;
if( currMbHorTemp >= zeroExtend(picWidth) )
currMbHorTemp = currMbHorTemp-zeroExtend(picWidth);
Bool noMoreResp = False;
Bit#(2) temp2bit = 0;
InterBlockMv unpackedData = unpack(data);
Vector#(5,InterBlockMv) interTopValNext = interTopVal;
Vector#(4,InterBlockMv) interTopLeftValNext = interTopLeftVal;
if(interRespCount<5)
begin
temp2bit = truncate(interRespCount-1);
interTopValNext[temp2bit] = unpackedData;
if((interRespCount==4 || (interRespCount==1 && (interstate==InterPskip || interstate==InterP16x16 || interstate==InterP16x8)))
&& (!((currMbHorTemp+1)<zeroExtend(picWidth)) && !(currMbHorTemp>0 && currMbTemp-firstMb>zeroExtend(picWidth))))
noMoreResp = True;
end
else if(interRespCount==5)
begin
if((currMbHorTemp+1)<zeroExtend(picWidth))
begin
interTopValNext[4] = unpackedData;
if(!(currMbHorTemp>0 && currMbTemp-firstMb>zeroExtend(picWidth)))
noMoreResp = True;
end
else
begin
interTopLeftValNext[0] = unpackedData;
noMoreResp = True;
end
end
else
begin
interTopLeftValNext[0] = unpackedData;
noMoreResp = True;
end
interMemRespQ.deq();
//$display( "TRACE Prediction: interReceiveResp data %h",data);///////////////////////
if(!noMoreResp)
interRespCount <= interRespCount+1;
else
begin
interRespCount <= 0;
interStepCount <= 1;
interIPStepCount <= 1;
if(currMbHorTemp == 0)
begin
interLeftVal <= replicate(NotInter 0);
interTopLeftValNext = replicate(NotInter 0);
end
end
interTopVal <= interTopValNext;
interTopLeftVal <= interTopLeftValNext;
$display( "Trace Prediction: interReceiveResp %h %h %h", interstate, interRespCount, data);
endrule
 
 
rule interProcessStep ( interStepCount>0 && currMbHor<zeroExtend(picWidth) );
Bit#(PicAreaSz) currMbTemp = currMb+zeroExtend(interCurrMbDiff)-1;
Bit#(2) blockHor = {interMbPartNum[0],interSubMbPartNum[0]};
Bit#(2) blockVer = {interMbPartNum[1],interSubMbPartNum[1]};
Bit#(3) partWidth = 0;
Bit#(3) partHeight = 0;
Bit#(3) numPart = 1;
Bit#(3) numSubPart = 1;
Bit#(2) subMbType = 0;
Bool noBlockC = False;
Bool calcmv = False;
Bool leftmv = False;
if(interstate==InterPskip || interstate==InterP16x16)
begin
partWidth = 4;
partHeight = 4;
numPart = 1;
calcmv = (interMbPartNum==0 && interSubMbPartNum==0);
leftmv = (blockHor>0);
end
else if(interstate==InterP16x8)
begin
partWidth = 4;
partHeight = 2;
numPart = 2;
if(interMbPartNum==2)
noBlockC = True;
calcmv = (interMbPartNum[0]==0 && interSubMbPartNum==0);
leftmv = (blockHor>0);
end
else if(interstate==InterP8x16)
begin
partWidth = 2;
partHeight = 4;
numPart = 2;
calcmv = (interMbPartNum[1]==0 && interSubMbPartNum==0);
leftmv = !(blockVer>0);
end
else if(interstate==InterP8x8 || interstate==InterP8x8ref0)
begin
numPart = 4;
subMbType = interSubMbTypeVector[interMbPartNum];
numSubPart = numSubMbPart(subMbType);
case(subMbType)
0:
begin
partWidth = 2;
partHeight = 2;
if(interMbPartNum==3)
noBlockC = True;
calcmv = (interSubMbPartNum==0);
leftmv = (blockHor[0]>0);
end
1:
begin
partWidth = 2;
partHeight = 1;
if(interSubMbPartNum==2)
noBlockC = True;
calcmv = (interSubMbPartNum[0]==0);
leftmv = True;
end
2:
begin
partWidth = 1;
partHeight = 2;
calcmv = (interSubMbPartNum[1]==0);
leftmv = False;
end
3:
begin
partWidth = 1;
partHeight = 1;
if(interSubMbPartNum==3)
noBlockC = True;
calcmv = True;
end
endcase
end
else
$display( "ERROR Prediction: interProcessStep unexpected interstate");
Bit#(4) refIndex = ((interstate==InterPskip||interstate==InterP8x8ref0) ? 0 : interRefIdxVector[interMbPartNum]);
Vector#(3,InterBlockMv) blockABC = replicate(NotInter 0);
if( currMbTemp-firstMb==0 && blockHor==0 )
blockABC[0] = (NotInter 0);
else
blockABC[0] = interLeftVal[blockVer];
if( currMbTemp-firstMb<zeroExtend(picWidth) && blockVer==0 )
blockABC[1] = (NotInter 0);
else
blockABC[1] = interTopVal[blockHor];
blockABC[2] = interTopVal[{1'b0,blockHor}+partWidth];
if(noBlockC || blockABC[2]==(NotInter 0))
blockABC[2] = interTopLeftVal[blockVer];
Bit#(14) mvhorfinal = 0;
Bit#(12) mvverfinal = 0;
Bit#(5) interNewestMvNext = 0;
if(calcmv)//motion vector caculation
begin
Vector#(3,Int#(14)) mvhorABC = replicate(0);
Vector#(3,Int#(12)) mvverABC = replicate(0);
Bit#(2) validCount = 0;
Bit#(14) mvhorPred = 0;
Bit#(12) mvverPred = 0;
for(Integer ii=0; ii<3; ii=ii+1)
begin
if(blockABC[ii] matches tagged BlockMv .xdata)
begin
mvhorABC[ii] = unpack(xdata.mvhor);
mvverABC[ii] = unpack(xdata.mvver);
if(xdata.refIdx == refIndex)
begin
validCount = validCount+1;
mvhorPred = xdata.mvhor;
mvverPred = xdata.mvver;
end
end
else
begin
mvhorABC[ii] = 0;
mvverABC[ii] = 0;
end
end
if(validCount != 1)//median
begin
if(mvhorABC[0]>mvhorABC[1] && mvhorABC[0]>mvhorABC[2])
mvhorPred = pack((mvhorABC[1]>mvhorABC[2]) ? mvhorABC[1] : mvhorABC[2]);
else if(mvhorABC[0]<mvhorABC[1] && mvhorABC[0]<mvhorABC[2])
mvhorPred = pack((mvhorABC[1]<mvhorABC[2]) ? mvhorABC[1] : mvhorABC[2]);
else
mvhorPred = pack(mvhorABC[0]);
if(mvverABC[0]>mvverABC[1] && mvverABC[0]>mvverABC[2])
mvverPred = pack((mvverABC[1]>mvverABC[2]) ? mvverABC[1] : mvverABC[2]);
else if(mvverABC[0]<mvverABC[1] && mvverABC[0]<mvverABC[2])
mvverPred = pack((mvverABC[1]<mvverABC[2]) ? mvverABC[1] : mvverABC[2]);
else
mvverPred = pack(mvverABC[0]);
end
if(interstate==InterPskip)
begin
for(Integer ii=0; ii<2; ii=ii+1)
begin
if(blockABC[ii] matches tagged BlockMv .xdata)
begin
if(xdata.refIdx==0 && xdata.mvhor==0 && xdata.mvver==0)
begin
mvhorPred = 0;
mvverPred = 0;
end
end
else if(blockABC[ii] matches tagged NotInter 0)
begin
mvhorPred = 0;
mvverPred = 0;
end
end
end
else if(interstate==InterP16x8 || interstate==InterP8x16)
begin
InterBlockMv blockCheck;
if(interstate==InterP16x8)
begin
if(interMbPartNum==0)
blockCheck = blockABC[1];
else
blockCheck = blockABC[0];
end
else
begin
if(interMbPartNum==0)
blockCheck = blockABC[0];
else
blockCheck = blockABC[2];
end
if(blockCheck matches tagged BlockMv .xdata &&& xdata.refIdx==refIndex)
begin
mvhorPred = xdata.mvhor;
mvverPred = xdata.mvver;
end
end
mvhorfinal = mvhorPred;
mvverfinal = mvverPred;
if(interstate!=InterPskip)
begin
mvhorfinal = truncate(tpl_1(interMvDiff.first()) + signExtend(mvhorPred));
mvverfinal = truncate(tpl_2(interMvDiff.first()) + signExtend(mvverPred));
interMvDiff.deq();
end
interMvFile.upd({interMbPartNum,interSubMbPartNum},tuple2(mvhorfinal,mvverfinal));
interNewestMvNext = zeroExtend({interMbPartNum,interSubMbPartNum})+1;
$display( "Trace Prediction: interProcessStep %h %h %h %h %h %h %h %h %h", interstate, interStepCount, interMbPartNum, interSubMbPartNum, pack(blockABC[0]), pack(blockABC[1]), pack(blockABC[2]), mvhorPred, mvverPred);
end
else
begin
if(leftmv)
begin
if(blockABC[0] matches tagged BlockMv .xdata)
begin
mvhorfinal = unpack(xdata.mvhor);
mvverfinal = unpack(xdata.mvver);
end
else
$display( "ERROR Prediction: interProcessStep unexpected blockABC[0]");
end
else
begin
if(blockABC[1] matches tagged BlockMv .xdata)
begin
mvhorfinal = unpack(xdata.mvhor);
mvverfinal = unpack(xdata.mvver);
end
else
$display( "ERROR Prediction: interProcessStep unexpected blockABC[1]");
end
end
Bit#(2) tempBShor = 0;//bS calculation
Bit#(2) tempBSver = 0;
if(interLeftVal[blockVer] matches tagged BlockMv .xdata)
begin
if(xdata.nonZeroTransCoeff == 1)
tempBShor = 2;
else
begin
if(xdata.refIdx!=refIndex || absDiffGEFour14(mvhorfinal,xdata.mvhor) || absDiffGEFour12(mvverfinal,xdata.mvver))
tempBShor = 1;
else
tempBShor = 0;
end
end
else
tempBShor = 3;
if(interTopVal[blockHor] matches tagged BlockMv .xdata)
begin
if(xdata.nonZeroTransCoeff == 1)
tempBSver = 2;
else
begin
if(xdata.refIdx!=refIndex || absDiffGEFour14(mvhorfinal,xdata.mvhor) || absDiffGEFour12(mvverfinal,xdata.mvver))
tempBSver = 1;
else
tempBSver = 0;
end
end
else
tempBSver = 3;
interBSfifo.enq(tuple2(tempBShor,tempBSver));
Vector#(5,InterBlockMv) interTopValNext = interTopVal;//update inter*Val
Vector#(4,InterBlockMv) interLeftValNext = interLeftVal;
Vector#(4,InterBlockMv) interTopLeftValNext = interTopLeftVal;
interLeftValNext[blockVer] = (BlockMv {refIdx:refIndex,mvhor:mvhorfinal,mvver:mvverfinal,nonZeroTransCoeff:0});
interTopValNext[blockHor] = (BlockMv {refIdx:refIndex,mvhor:mvhorfinal,mvver:mvverfinal,nonZeroTransCoeff:0});
interTopLeftValNext[blockVer] = interTopVal[blockHor];
interTopVal <= interTopValNext;
interLeftVal <= interLeftValNext;
interTopLeftVal <= interTopLeftValNext;
if(blockVer == 3)
interOutBlockMvfifo.enq(BlockMv {refIdx:refIndex,mvhor:mvhorfinal,mvver:mvverfinal,nonZeroTransCoeff:0});
if(interSubMbPartNum == 3)//next step
begin
interSubMbPartNum <= 0;
if(interMbPartNum == 3)
begin
interMbPartNum <= 0;
interStepCount <= 0;
interNewestMvNext = 16;
end
else
interMbPartNum <= interMbPartNum+1;
end
else
interSubMbPartNum <= interSubMbPartNum+1;
if(interNewestMvNext > 0)
interNewestMv <= interNewestMvNext;
endrule
 
 
rule interIPProcessStep ( interIPStepCount>0 && currMbHor<zeroExtend(picWidth) && interNewestMv>zeroExtend({interIPMbPartNum,interIPSubMbPartNum}) );
Bit#(PicAreaSz) currMbHorTemp = currMbHor+zeroExtend(interCurrMbDiff)-1;
Bit#(PicHeightSz) currMbVerTemp = currMbVer;
if( currMbHorTemp >= zeroExtend(picWidth) )
begin
currMbHorTemp = currMbHorTemp-zeroExtend(picWidth);
currMbVerTemp = currMbVerTemp+1;
end
Bit#(2) blockHor = {interIPMbPartNum[0],interIPSubMbPartNum[0]};
Bit#(2) blockVer = {interIPMbPartNum[1],interIPSubMbPartNum[1]};
Bit#(3) numPart = 1;
Bit#(3) numSubPart = 1;
Bit#(2) subMbType = 0;
if(interstate==InterPskip || interstate==InterP16x16)
numPart = 1;
else if(interstate==InterP16x8)
numPart = 2;
else if(interstate==InterP8x16)
numPart = 2;
else if(interstate==InterP8x8 || interstate==InterP8x8ref0)
begin
numPart = 4;
subMbType = interSubMbTypeVector[interIPMbPartNum];
numSubPart = numSubMbPart(subMbType);
end
else
$display( "ERROR Prediction: interIPProcessStep unexpected interstate");
Bit#(4) refIndex = ((interstate==InterPskip||interstate==InterP8x8ref0) ? 0 : interRefIdxVector[interIPMbPartNum]);
Bit#(PicWidthSz) currMbHorT = truncate(currMbHorTemp);
Bit#(TAdd#(PicWidthSz,2)) horTemp = {currMbHorT,blockHor};
Bit#(TAdd#(PicHeightSz,4)) verTemp = {currMbVerTemp,blockVer,2'b00};
IPBlockType btTemp = IP16x16;
if(interstate==InterPskip || interstate==InterP16x16)
btTemp = IP16x16;
else if(interstate==InterP16x8)
btTemp = IP16x8;
else if(interstate==InterP8x16)
btTemp = IP8x16;
else
begin
case(subMbType)
0: btTemp = IP8x8;
1: btTemp = IP8x4;
2: btTemp = IP4x8;
3: btTemp = IP4x4;
endcase
end
Bit#(14) mvhorTemp = tpl_1(interMvFile.sub({interIPMbPartNum,interIPSubMbPartNum}));
Bit#(12) mvverTemp = tpl_2(interMvFile.sub({interIPMbPartNum,interIPSubMbPartNum}));
if(interIPStepCount == 1)
begin
if(!(interstate==InterP8x8 || interstate==InterP8x8ref0))
begin
numPart = 4;
Bit#(2) interIPMbPartNumTemp = interIPMbPartNum;
if(btTemp==IP16x16)
interIPMbPartNumTemp = 0;
else if(btTemp==IP16x8 && interIPMbPartNumTemp[0]==1)
interIPMbPartNumTemp = interIPMbPartNumTemp-1;
else if(btTemp==IP8x16 && interIPMbPartNumTemp[1]==1)
interIPMbPartNumTemp = interIPMbPartNumTemp-2;
refIndex = ((interstate==InterPskip||interstate==InterP8x8ref0) ? 0 : interRefIdxVector[interIPMbPartNumTemp]);
btTemp = IP8x8;
mvhorTemp = tpl_1(interMvFile.sub({interIPMbPartNumTemp,2'b00}));
mvverTemp = tpl_2(interMvFile.sub({interIPMbPartNumTemp,2'b00}));
interpolator.request(IPLuma {refIdx:refIndex,hor:horTemp,ver:verTemp,mvhor:mvhorTemp,mvver:mvverTemp,bt:btTemp});
end
else
interpolator.request(IPLuma {refIdx:refIndex,hor:horTemp,ver:verTemp,mvhor:mvhorTemp,mvver:mvverTemp,bt:btTemp});
end
else
interpolator.request(IPChroma {refIdx:refIndex,uv:interIPStepCount[0],hor:horTemp,ver:truncate(verTemp>>1),mvhor:mvhorTemp,mvver:mvverTemp,bt:btTemp});
if(interIPSubMbPartNum >= truncate(numSubPart-1))
begin
interIPSubMbPartNum <= 0;
if(interIPMbPartNum >= truncate(numPart-1))
begin
interIPMbPartNum <= 0;
interIPStepCount <= interIPStepCount+1;
end
else
begin
if(btTemp == IP16x8)
interIPMbPartNum <= 2;
else
interIPMbPartNum <= interIPMbPartNum+1;
end
end
else
begin
if(subMbType == 1)
interIPSubMbPartNum <= 2;
else
interIPSubMbPartNum <= interIPSubMbPartNum+1;
end
$display( "Trace Prediction: interIPProcessStep %h %h %h %h %h %h %h %h %h %h", interstate, interIPStepCount, interIPMbPartNum, interIPSubMbPartNum, refIndex, horTemp, verTemp, mvhorTemp, mvverTemp, pack(btTemp));
endrule
 
 
rule interDone ( interstate!=Start && interReqCount==0 && interRespCount==0 && interStepCount==0 && interIPStepCount==0 );
interstate <= Start;
//$display( "Trace Prediction: interOutputTransfer %h %h", interstate, interOutputCount);
endrule
 
rule interOutputTransfer ( True );
predictedfifo.enq(interpolator.first());
interpolator.deq();
//$display( "Trace Prediction: interOutputTransfer %h %h", interstate, interOutputCount);
endrule
 
 
 
// intra prediction rules
 
rule intraSendReq ( intraReqCount>0 && currMbHor<zeroExtend(picWidth) && !nextoutputfifo.notEmpty() );
Bit#(PicWidthSz) temp2 = truncate(currMbHor);
Bit#(TAdd#(PicWidthSz,2)) temp = 0;
Bit#(1) noMoreReq = 0;
if( currMb-firstMb < zeroExtend(picWidth) )
noMoreReq = 1;
else
begin
if(intraReqCount<5)
begin
Bit#(2) temp3 = truncate(intraReqCount-1);
temp = {temp2,temp3};
end
else if(intraReqCount==5)
begin
if((currMbHor+1)<zeroExtend(picWidth) && intrastate==Intra4x4)
temp = {(temp2+1),2'b00};
else if(currMbHor>0 && currMb-firstMb>zeroExtend(picWidth))
temp = {(temp2-1),2'b11};
else
noMoreReq = 1;
end
else if(intraReqCount==6)
begin
if((currMbHor+1)<zeroExtend(picWidth) && intrastate==Intra4x4 && currMbHor>0 && currMb-firstMb>zeroExtend(picWidth))
temp = {(temp2-1),2'b11};
else
noMoreReq = 1;
end
else
noMoreReq = 1;
end
if(noMoreReq == 0)
begin
intraMemReqQ.enq(LoadReq temp);
intraReqCount <= intraReqCount+1;
//$display( "TRACE Prediction: intraSendReq addr %0d",temp);///////////////////////
end
else
intraReqCount <= 0;
$display( "Trace Prediction: intraSendReq");
endrule
 
 
rule intraReceiveNoResp ( intraRespCount>0 && currMbHor<zeroExtend(picWidth) && currMb-firstMb<zeroExtend(picWidth) );
intra4x4typeTop <= replicate(15);
intraRespCount <= 0;
intraStepCount <= 1;
blockNum <= 0;
pixelNum <= 0;
interOutBlockMvfifo.enq(NotInter 1);
$display( "Trace Prediction: intraReceiveNoResp");
endrule
 
rule intraReceiveResp ( intraRespCount>0 && intraRespCount<7 && currMbHor<zeroExtend(picWidth) &&& intraMemRespQ.first() matches tagged LoadResp .data);
Bit#(1) noMoreResp = 0;
Bit#(2) temp2bit = 0;
if(intraRespCount<5)
begin
temp2bit = truncate(intraRespCount-1);
intra4x4typeTop <= update(intra4x4typeTop, temp2bit, data[67:64]);
if(intraRespCount==4)
begin
Vector#(5,Bit#(32)) intraTopValTemp = intraTopVal;
intraTopValTemp[3] = data[31:0];
intraTopValTemp[4] = {data[31:24],data[31:24],data[31:24],data[31:24]};
intraTopVal <= intraTopValTemp;
if(!((currMbHor+1)<zeroExtend(picWidth) && intrastate==Intra4x4) && !(currMbHor>0 && currMb-firstMb>zeroExtend(picWidth)))
noMoreResp = 1;
end
else
intraTopVal <= update(intraTopVal, intraRespCount-1, data[31:0]);
intraTopValChroma0 <= update(intraTopValChroma0, temp2bit, data[47:32]);
intraTopValChroma1 <= update(intraTopValChroma1, temp2bit, data[63:48]);
end
else if(intraRespCount==5)
begin
if((currMbHor+1)<zeroExtend(picWidth) && intrastate==Intra4x4)
begin
if(!(data[67:64]==15 || (data[67:64]==14 && ppsconstrained_intra_pred_flag==1)))
intraTopVal <= update(intraTopVal, 4, data[31:0]);
if(!(currMbHor>0 && currMb-firstMb>zeroExtend(picWidth)))
noMoreResp = 1;
end
else
begin
Bit#(40) temp2 = intraLeftVal[0];
intraLeftVal <= update(intraLeftVal, 0, {temp2[39:8],data[31:24]});
intraLeftValChroma0 <= update(intraLeftValChroma0, 0, data[47:40]);
intraLeftValChroma1 <= update(intraLeftValChroma1, 0, data[63:56]);
noMoreResp = 1;
end
end
else
begin
Bit#(40) temp2 = intraLeftVal[0];
intraLeftVal <= update(intraLeftVal, 0, {temp2[39:8],data[31:24]});
intraLeftValChroma0 <= update(intraLeftValChroma0, 0, data[47:40]);
intraLeftValChroma1 <= update(intraLeftValChroma1, 0, data[63:56]);
noMoreResp = 1;
end
intraMemRespQ.deq();
//$display( "TRACE Prediction: intraReceiveResp data %h",data);///////////////////////
if(noMoreResp == 0)
intraRespCount <= intraRespCount+1;
else
begin
intraRespCount <= 0;
intraStepCount <= 1;
blockNum <= 0;
pixelNum <= 0;
interOutBlockMvfifo.enq(NotInter 1);
end
$display( "Trace Prediction: intraReceiveResp");
endrule
 
rule intraPredTypeStep ( intraStepCount==1 && !nextoutputfifo.notEmpty());
Bit#(2) blockHor = {blockNum[2],blockNum[0]};
Bit#(2) blockVer = {blockNum[3],blockNum[1]};
Bit#(4) topType = select(intra4x4typeTop, blockHor);
Bit#(4) leftType;
if(currMbHor!=0 || blockNum!=0)
leftType = select(intra4x4typeLeft, blockVer);
else
begin
leftType = 15;
intra4x4typeLeft <= replicate(15);
end
if(intrastate!=Intra4x4)
begin
intraStepCount <= intraStepCount+1;
nextoutputfifo.enq(NonSkipMB);
end
else
begin
Bit#(1) topAvailable;
Bit#(1) leftAvailable;
if(topType==15 || (topType==14 && ppsconstrained_intra_pred_flag==1))
topAvailable = 0;
else
topAvailable = 1;
if(leftType==15 || (leftType==14 && ppsconstrained_intra_pred_flag==1))
leftAvailable = 0;
else
leftAvailable = 1;
Bit#(4) predType = 0;
Bit#(4) remType = rem_intra4x4_pred_mode.first();
Bit#(4) curType = 0;
rem_intra4x4_pred_mode.deq();
if(topAvailable==0 || leftAvailable==0)
predType = 2;
else
begin
Bit#(4) topType2 = topType;
Bit#(4) leftType2 = leftType;
if(topType>8)
topType2 = 2;
if(leftType>8)
leftType2 = 2;
if(topType2 > leftType2)
predType = leftType2;
else
predType = topType2;
end
if(remType[3] == 1)
curType = predType;
else if(remType < predType)
curType = remType;
else
curType = remType+1;
cur_intra4x4_pred_mode <= curType;
intraStepCount <= intraStepCount+1;
if(blockNum == 15)
nextoutputfifo.enq(Intra4x4PlusChroma);
else
nextoutputfifo.enq(Intra4x4);
$display( "TRACE Prediction: intraPredTypeStep currMbHor currMbVer blockNum topType leftType predType remType curType %0d %0d %0d %0d %0d %0d %0d %0d",currMbHor,currMbVer,blockNum,topType,leftType,predType,remType,curType);//////////////////
end
//$display( "Trace Prediction: intraPredTypeStep");
endrule
 
 
rule intraProcessStep ( intraStepCount>1 );
$display( "TRACE Prediction: intraProcessStep %0d %0d", blockNum, pixelNum);////////////////////
//$display( "TRACE Prediction: intraProcessStep intraTopVal %h %h %h %h %h",intraTopVal[4],intraTopVal[3],intraTopVal[2],intraTopVal[1],intraTopVal[0]);/////////////////
Bit#(1) outFlag = 0;
Bit#(4) nextIntraStepCount = intraStepCount+1;
Bit#(2) blockHor = {blockNum[2],blockNum[0]};
Bit#(2) blockVer = {blockNum[3],blockNum[1]};
Bit#(2) pixelVer = {pixelNum[3],pixelNum[2]};
Vector#(4,Bit#(8)) predVector = replicate(0);
 
Bit#(4) topType = select(intra4x4typeTop, blockHor);
Bit#(4) leftType = select(intra4x4typeLeft, blockVer);
Bit#(1) topAvailable;
Bit#(1) leftAvailable;
if(topType==15 || (topType==14 && ppsconstrained_intra_pred_flag==1))
topAvailable = 0;
else
topAvailable = 1;
if(leftType==15 || (leftType==14 && ppsconstrained_intra_pred_flag==1))
leftAvailable = 0;
else
leftAvailable = 1;
if(blockNum==0 && pixelNum==0 && intraChromaFlag==0)
begin
intraChromaTopAvailable <= topAvailable;
intraChromaLeftAvailable <= leftAvailable;
end
if(intrastate==Intra4x4 && intraChromaFlag==0)
begin
if(intraStepCount==2)
begin
outFlag = 1;
Bit#(40) leftValSet = select(intraLeftVal,blockVer);
Bit#(32) topMidValSet = select(intraTopVal,zeroExtend(blockHor));
Bit#(32) topRightValSet = select(intraTopVal,{1'b0,blockHor}+1);
Bit#(72) topValSet;
if((blockNum[3:2]==3 && blockNum[0]==1) || blockNum[1:0]==3)
topValSet = {topMidValSet[31:24],topMidValSet[31:24],topMidValSet[31:24],topMidValSet[31:24],topMidValSet,leftValSet[7:0]};
else
topValSet = {topRightValSet,topMidValSet,leftValSet[7:0]};
$display( "TRACE Prediction: intraProcessStep intra4x4 %0d %0d %h %h", cur_intra4x4_pred_mode, blockNum, leftValSet, topValSet);////////////////////
Bit#(4) topSelect1 = 0;
Bit#(4) topSelect2 = 0;
Bit#(4) topSelect3 = 0;
Bit#(3) leftSelect1 = 0;
Bit#(3) leftSelect2 = 0;
Bit#(3) leftSelect3 = 0;
Bit#(10) tempVal1 = 0;
Bit#(10) tempVal2 = 0;
Bit#(10) tempVal3 = 0;
case(cur_intra4x4_pred_mode)
0://vertical
begin
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
begin
topSelect1 = fromInteger(pixelHor);
Bit#(8) topVal = intra4x4SelectTop(topValSet,topSelect1);
predVector[pixelHor] = topVal;
end
end
1://horizontal
begin
leftSelect1 = zeroExtend(pixelVer);
Bit#(8) leftVal = intra4x4SelectLeft(leftValSet,leftSelect1);
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
predVector[pixelHor] = leftVal;
end
2://dc
begin
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
begin
Bit#(10) tempTopSum = zeroExtend(topValSet[15:8])+zeroExtend(topValSet[23:16])+zeroExtend(topValSet[31:24])+zeroExtend(topValSet[39:32]) + 2;
Bit#(10) tempLeftSum = zeroExtend(leftValSet[15:8])+zeroExtend(leftValSet[23:16])+zeroExtend(leftValSet[31:24])+zeroExtend(leftValSet[39:32]) + 2;
Bit#(11) tempTotalSum = zeroExtend(tempTopSum)+zeroExtend(tempLeftSum);
Bit#(8) topSum = tempTopSum[9:2];
Bit#(8) leftSum = tempLeftSum[9:2];
Bit#(8) totalSum = tempTotalSum[10:3];
if(topAvailable==1 && leftAvailable==1)
predVector[pixelHor] = totalSum;
else if(topAvailable==1)
predVector[pixelHor] = topSum;
else if(leftAvailable==1)
predVector[pixelHor] = leftSum;
else
predVector[pixelHor] = 8'b10000000;
end
end
3://diagonal down left
begin
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
begin
Bit#(4) selectNum = fromInteger(pixelHor)+zeroExtend(pixelVer);
if(pixelHor==3 && pixelVer==3)
begin
topSelect1 = 6;
topSelect2 = 7;
topSelect3 = 7;
end
else
begin
topSelect1 = selectNum;
topSelect2 = selectNum+1;
topSelect3 = selectNum+2;
end
tempVal1 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1));
tempVal2 = zeroExtend(intra4x4SelectTop(topValSet,topSelect2));
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect3));
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2;
predVector[pixelHor] = predVal[9:2];
end
end
4://diagonal down right
begin
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
begin
if(fromInteger(pixelHor) > pixelVer)
begin
topSelect3 = fromInteger(pixelHor)-zeroExtend(pixelVer);
topSelect2 = topSelect3-1;
topSelect1 = topSelect3-2;
tempVal1 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1));
tempVal2 = zeroExtend(intra4x4SelectTop(topValSet,topSelect2));
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect3));
end
else if(fromInteger(pixelHor) < pixelVer)
begin
leftSelect3 = zeroExtend(pixelVer)-fromInteger(pixelHor);
leftSelect2 = leftSelect3-1;
leftSelect1 = leftSelect3-2;
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1));
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2));
tempVal3 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect3));
end
else
begin
leftSelect1 = 0;
leftSelect2 = -1;
topSelect1 = 0;
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1));
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2));
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1));
end
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2;
predVector[pixelHor] = predVal[9:2];
end
end
5://vertical right
begin
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
begin
Bit#(4) tempPixelHor = fromInteger(pixelHor);
Bit#(4) zVR = (tempPixelHor<<1)-zeroExtend(pixelVer);
if(zVR<=6 && zVR>=0)
begin
topSelect3 = fromInteger(pixelHor)-zeroExtend(pixelVer>>1);
topSelect2 = topSelect3-1;
if(zVR==1 || zVR==3 || zVR==5)
topSelect1 = topSelect3-2;
else
topSelect1 = topSelect3;
tempVal1 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1));
tempVal2 = zeroExtend(intra4x4SelectTop(topValSet,topSelect2));
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect3));
end
else if(zVR==-1)
begin
leftSelect1 = 0;
leftSelect2 = -1;
topSelect1 = 0;
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1));
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2));
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1));
end
else
begin
leftSelect1 = zeroExtend(pixelVer)-1;
leftSelect2 = leftSelect1-1;
leftSelect3 = leftSelect1-2;
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1));
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2));
tempVal3 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect3));
end
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2;
predVector[pixelHor] = predVal[9:2];
end
end
6://horizontal down
begin
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
begin
Bit#(4) tempPixelVer = zeroExtend(pixelVer);
Bit#(4) zHD = (tempPixelVer<<1)-fromInteger(pixelHor);
if(zHD<=6 && zHD>=0)
begin
leftSelect3 = zeroExtend(pixelVer)-fromInteger(pixelHor/2);
leftSelect2 = leftSelect3-1;
if(zHD==1 || zHD==3 || zHD==5)
leftSelect1 = leftSelect3-2;
else
leftSelect1 = leftSelect3;
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1));
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2));
tempVal3 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect3));
end
else if(zHD==-1)
begin
leftSelect1 = 0;
leftSelect2 = -1;
topSelect1 = 0;
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1));
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2));
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1));
end
else
begin
topSelect1 = fromInteger(pixelHor)-1;
topSelect2 = topSelect1-1;
topSelect3 = topSelect1-2;
tempVal1 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1));
tempVal2 = zeroExtend(intra4x4SelectTop(topValSet,topSelect2));
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect3));
end
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2;
predVector[pixelHor] = predVal[9:2];
end
end
7://vertical left
begin
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
begin
topSelect1 = fromInteger(pixelHor)+zeroExtend(pixelVer>>1);
topSelect2 = topSelect1+1;
if(pixelVer==1 || pixelVer==3)
topSelect3 = topSelect1+2;
else
topSelect3 = topSelect1;
tempVal1 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1));
tempVal2 = zeroExtend(intra4x4SelectTop(topValSet,topSelect2));
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect3));
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2;
predVector[pixelHor] = predVal[9:2];
end
end
8://horizontal up
begin
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
begin
Bit#(4) tempPixelVer = zeroExtend(pixelVer);
Bit#(4) zHU = (tempPixelVer<<1)+fromInteger(pixelHor);
if(zHU<=4)
begin
leftSelect1 = zeroExtend(pixelVer)+fromInteger(pixelHor/2);
leftSelect2 = leftSelect1+1;
if(zHU==1 || zHU==3)
leftSelect3 = leftSelect1+2;
else
leftSelect3 = leftSelect1;
end
else
begin
if(zHU==5)
leftSelect1 = 2;
else
leftSelect1 = 3;
leftSelect2 = 3;
leftSelect3 = 3;
end
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1));
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2));
tempVal3 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect3));
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2;
predVector[pixelHor] = predVal[9:2];
end
end
default: $display( "ERROR Prediction: intraProcessStep intra4x4 unknown cur_intra4x4_pred_mode");
endcase
end
else
$display( "ERROR Prediction: intraProcessStep intra4x4 unknown intraStepCount");
end
else if(intrastate==Intra16x16 && intraChromaFlag==0)
begin
//$display( "TRACE Prediction: intraProcessStep intra16x16 %0d %0d %0d %h", intra16x16_pred_mode, currMb, blockNum, select(intraTopVal,blockHor));/////////////////
case(intra16x16_pred_mode)
0://vertical
begin
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
begin
Bit#(32) topValSet = select(intraTopVal,blockHor);
Bit#(8) topVal = select32to8(topValSet,fromInteger(pixelHor));
predVector[pixelHor] = topVal;
end
outFlag = 1;
end
1://horizontal
begin
Bit#(40) leftValSet = select(intraLeftVal,blockVer);
Bit#(8) leftVal = intra4x4SelectLeft(leftValSet,zeroExtend(pixelVer));
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
predVector[pixelHor] = leftVal;
outFlag = 1;
end
2://dc
begin
case(intraStepCount)
2:
begin
if(topAvailable == 1)
begin
Bit#(32) topValSet = select(intraTopVal,0);
intraSumA <= zeroExtend(topValSet[7:0])+zeroExtend(topValSet[15:8])+zeroExtend(topValSet[23:16])+zeroExtend(topValSet[31:24]);
end
else
begin
intraSumA <= 0;
nextIntraStepCount = 6;
end
end
3:
begin
Bit#(32) topValSet = select(intraTopVal,1);
intraSumA <= intraSumA+zeroExtend(topValSet[7:0])+zeroExtend(topValSet[15:8])+zeroExtend(topValSet[23:16])+zeroExtend(topValSet[31:24]);
end
4:
begin
Bit#(32) topValSet = select(intraTopVal,2);
intraSumA <= intraSumA+zeroExtend(topValSet[7:0])+zeroExtend(topValSet[15:8])+zeroExtend(topValSet[23:16])+zeroExtend(topValSet[31:24]);
end
5:
begin
Bit#(32) topValSet = select(intraTopVal,3);
intraSumA <= intraSumA+zeroExtend(topValSet[7:0])+zeroExtend(topValSet[15:8])+zeroExtend(topValSet[23:16])+zeroExtend(topValSet[31:24])+8;
end
6:
begin
if(leftAvailable == 1)
begin
Bit#(40) leftValSet = select(intraLeftVal,0);
intraSumA <= intraSumA+zeroExtend(leftValSet[15:8])+zeroExtend(leftValSet[23:16])+zeroExtend(leftValSet[31:24])+zeroExtend(leftValSet[39:32]);
end
else
nextIntraStepCount = 10;
end
7:
begin
Bit#(40) leftValSet = select(intraLeftVal,1);
intraSumA <= intraSumA+zeroExtend(leftValSet[15:8])+zeroExtend(leftValSet[23:16])+zeroExtend(leftValSet[31:24])+zeroExtend(leftValSet[39:32]);
end
8:
begin
Bit#(40) leftValSet = select(intraLeftVal,2);
intraSumA <= intraSumA+zeroExtend(leftValSet[15:8])+zeroExtend(leftValSet[23:16])+zeroExtend(leftValSet[31:24])+zeroExtend(leftValSet[39:32]);
end
9:
begin
Bit#(40) leftValSet = select(intraLeftVal,3);
intraSumA <= intraSumA+zeroExtend(leftValSet[15:8])+zeroExtend(leftValSet[23:16])+zeroExtend(leftValSet[31:24])+zeroExtend(leftValSet[39:32])+8;
end
10:
begin
if(leftAvailable == 1 && topAvailable == 1)
intraSumA <= intraSumA >> 5;
else if(leftAvailable == 1 || topAvailable == 1)
intraSumA <= intraSumA >> 4;
else
intraSumA <= 128;
end
11:
begin
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
predVector[pixelHor] = intraSumA[7:0];
outFlag = 1;
end
default: $display( "ERROR Prediction: intraProcessStep intra16x16 DC unknown intraStepCount");
endcase
end
3://plane
begin
if(intraStepCount == 2)
begin
Bit#(32) topValSet = select(intraTopVal,3);
Bit#(8) topVal = select32to8(topValSet,3);
Bit#(40) leftValSet = select(intraLeftVal,3);
Bit#(8) leftVal = intra4x4SelectLeft(leftValSet,3);
Bit#(13) tempVal = zeroExtend(topVal) + zeroExtend(leftVal);
intraSumA <= tempVal << 4;
intraSumB <= 0;
intraSumC <= 0;
end
else if(intraStepCount < 11)
begin
Bit#(4) xyPlusOne = intraStepCount-2;
Bit#(4) xyPlusEight = intraStepCount+5;
Bit#(4) sixMinusXY = 9-intraStepCount;
Bit#(32) topValSet1 = select(intraTopVal,xyPlusEight[3:2]);
Bit#(8) topVal1 = select32to8(topValSet1,xyPlusEight[1:0]);
Bit#(40) leftValSet1 = select(intraLeftVal,xyPlusEight[3:2]);
Bit#(8) leftVal1 = intra4x4SelectLeft(leftValSet1,zeroExtend(xyPlusEight[1:0]));
Bit#(32) topValSet2=0;
Bit#(8) topVal2;
Bit#(40) leftValSet2;
Bit#(8) leftVal2;
if(intraStepCount==10)
begin
leftValSet2 = select(intraLeftVal,0);
leftVal2 = intra4x4SelectLeft(leftValSet2,-1);
topVal2 = leftVal2;
end
else
begin
topValSet2 = select(intraTopVal,sixMinusXY[3:2]);
topVal2 = select32to8(topValSet2,sixMinusXY[1:0]);
leftValSet2 = select(intraLeftVal,sixMinusXY[3:2]);
leftVal2 = intra4x4SelectLeft(leftValSet2,zeroExtend(sixMinusXY[1:0]));
end
Bit#(15) diffH = zeroExtend(topVal1) - zeroExtend(topVal2);
Bit#(15) diffV = zeroExtend(leftVal1) - zeroExtend(leftVal2);
intraSumB <= intraSumB + (zeroExtend(xyPlusOne) * diffH);
intraSumC <= intraSumC + (zeroExtend(xyPlusOne) * diffV);
end
else if(intraStepCount == 11)
begin
Bit#(18) tempSumB = (5*signExtend(intraSumB)) + 32;
Bit#(18) tempSumC = (5*signExtend(intraSumC)) + 32;
intraSumB <= signExtend(tempSumB[17:6]);
intraSumC <= signExtend(tempSumC[17:6]);
end
else if(intraStepCount == 12)
begin
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
begin
Bit#(5) positionHor = {1'b0,blockHor,fromInteger(pixelHor)};
Bit#(5) positionVer = {1'b0,blockVer,pixelVer};
Bit#(16) tempProductB = signExtend(intraSumB) * signExtend(positionHor-7);
Bit#(16) tempProductC = signExtend(intraSumC) * signExtend(positionVer-7);
Bit#(16) tempTotal = tempProductB + tempProductC + zeroExtend(intraSumA) + 16;
if(tempTotal[15]==1)
predVector[pixelHor] = 0;
else if(tempTotal[14:5] > 255)
predVector[pixelHor] = 255;
else
predVector[pixelHor] = tempTotal[12:5];
end
outFlag = 1;
end
else
$display( "ERROR Prediction: intraProcessStep intra16x16 plane unknown intraStepCount");
end
endcase
end
else if(intraChromaFlag==1)
begin
//$display( "TRACE Prediction: intraProcessStep intraChroma %0d %0d %0d %0d %0d %0d %h %h %h %h %h %h %h %h",intra_chroma_pred_mode.first(),intraChromaTopAvailable,intraChromaLeftAvailable,currMb,blockNum,pixelNum,pack(intraLeftValChroma0),pack(intraTopValChroma0),pack(intraLeftValChroma1),pack(intraTopValChroma1),intraLeftValChroma0[0],intraTopValChroma0[3][15:8],intraLeftValChroma1[0],intraTopValChroma1[3][15:8]);///////////////////
Vector#(9,Bit#(8)) tempLeftVec;
Vector#(4,Bit#(16)) tempTopVec;
if(blockNum[2] == 0)
begin
tempLeftVec = intraLeftValChroma0;
tempTopVec = intraTopValChroma0;
end
else
begin
tempLeftVec = intraLeftValChroma1;
tempTopVec = intraTopValChroma1;
end
case(intra_chroma_pred_mode.first())
0://dc
begin
if(intraStepCount == 2)
begin
Bit#(1) useTop=0;
Bit#(1) useLeft=0;
if(blockNum[1:0] == 0 || blockNum[1:0] == 3)
begin
useTop = intraChromaTopAvailable;
useLeft = intraChromaLeftAvailable;
end
else if(blockNum[1:0] == 1)
begin
if(intraChromaTopAvailable == 1)
useTop = 1;
else if(intraChromaLeftAvailable == 1)
useLeft = 1;
end
else if(blockNum[1:0] == 2)
begin
if(intraChromaLeftAvailable == 1)
useLeft = 1;
else if(intraChromaTopAvailable == 1)
useTop = 1;
end
else
$display( "ERROR Prediction: intraProcessStep intraChroma dc unknown blockNum");
Bit#(10) topSum;
Bit#(10) leftSum;
Bit#(11) totalSum;
if(blockHor[0] == 0)
topSum = zeroExtend(tempTopVec[0][15:8])+zeroExtend(tempTopVec[0][7:0])+zeroExtend(tempTopVec[1][15:8])+zeroExtend(tempTopVec[1][7:0])+2;
else
topSum = zeroExtend(tempTopVec[2][15:8])+zeroExtend(tempTopVec[2][7:0])+zeroExtend(tempTopVec[3][15:8])+zeroExtend(tempTopVec[3][7:0])+2;
if(blockVer[0] == 0)
leftSum = zeroExtend(tempLeftVec[1])+zeroExtend(tempLeftVec[2])+zeroExtend(tempLeftVec[3])+zeroExtend(tempLeftVec[4])+2;
else
leftSum = zeroExtend(tempLeftVec[5])+zeroExtend(tempLeftVec[6])+zeroExtend(tempLeftVec[7])+zeroExtend(tempLeftVec[8])+2;
totalSum = zeroExtend(topSum) + zeroExtend(leftSum);
if(useTop==1 && useLeft==1)
intraSumA <= zeroExtend(totalSum[10:3]);
else if(useTop==1)
intraSumA <= zeroExtend(topSum[9:2]);
else if(useLeft==1)
intraSumA <= zeroExtend(leftSum[9:2]);
else
intraSumA <= zeroExtend(8'b10000000);
end
else if(intraStepCount == 3)
begin
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
predVector[pixelHor] = intraSumA[7:0];
outFlag = 1;
end
else
$display( "ERROR Prediction: intraProcessStep intraChroma dc unknown intraStepCount");
end
1://horizontal
begin
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
begin
Bit#(4) tempLeftIdx = {1'b0,blockVer[0],pixelVer} + 1;
predVector[pixelHor] = select(tempLeftVec,tempLeftIdx);
end
outFlag = 1;
end
2://vertical
begin
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
begin
Bit#(2) pixelHorTemp = fromInteger(pixelHor);
Bit#(16) tempTopVal = select(tempTopVec,{blockHor[0],pixelHorTemp[1]});
if(pixelHorTemp[0] == 0)
predVector[pixelHor] = tempTopVal[7:0];
else
predVector[pixelHor] = tempTopVal[15:8];
end
outFlag = 1;
end
3://plane
begin
if(intraStepCount == 2)
begin
Bit#(16) topValSet = tempTopVec[3];
Bit#(8) topVal = topValSet[15:8];
Bit#(8) leftVal = tempLeftVec[8];
Bit#(13) tempVal = zeroExtend(topVal) + zeroExtend(leftVal);
intraSumA <= tempVal << 4;
intraSumB <= 0;
intraSumC <= 0;
end
else if(intraStepCount < 7)
begin
Bit#(3) xyPlusOne = truncate(intraStepCount)-2;
Bit#(3) xyPlusFour = truncate(intraStepCount)+1;
Bit#(4) twoMinusXY = 5-intraStepCount;
Bit#(16) topValSet1 = select(tempTopVec,xyPlusFour[2:1]);
Bit#(8) topVal1 = select16to8(topValSet1,xyPlusFour[0]);
Bit#(4) tempLeftIdx1 = {1'b0,xyPlusFour} + 1;
Bit#(8) leftVal1 = select(tempLeftVec,tempLeftIdx1);
Bit#(16) topValSet2 = select(tempTopVec,twoMinusXY[2:1]);
Bit#(8) topVal2;
Bit#(8) leftVal2 = select(tempLeftVec,twoMinusXY+1);
if(intraStepCount==6)
topVal2 = leftVal2;
else
topVal2 = select16to8(topValSet2,twoMinusXY[0]);
Bit#(15) diffH = zeroExtend(topVal1) - zeroExtend(topVal2);
Bit#(15) diffV = zeroExtend(leftVal1) - zeroExtend(leftVal2);
intraSumB <= intraSumB + (zeroExtend(xyPlusOne) * diffH);
intraSumC <= intraSumC + (zeroExtend(xyPlusOne) * diffV);
Int#(15) tempDisplayH = unpack(zeroExtend(xyPlusOne) * diffH);
Int#(15) tempDisplayV = unpack(zeroExtend(xyPlusOne) * diffV);
//$display( "TRACE Prediction: intraProcessStep intraChroma plane partH partV %0d %0d",tempDisplayH,tempDisplayV);////////////////////
end
else if(intraStepCount == 7)
begin
Int#(15) tempDisplayH = unpack(intraSumB);
Int#(15) tempDisplayV = unpack(intraSumC);
//$display( "TRACE Prediction: intraProcessStep intraChroma plane H V %0d %0d",tempDisplayH,tempDisplayV);////////////////////
Bit#(19) tempSumB = (34*signExtend(intraSumB)) + 32;
Bit#(19) tempSumC = (34*signExtend(intraSumC)) + 32;
intraSumB <= signExtend(tempSumB[18:6]);
intraSumC <= signExtend(tempSumC[18:6]);
end
else if(intraStepCount == 8)
begin
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
begin
Bit#(4) positionHor = {1'b0,blockHor[0],fromInteger(pixelHor)};
Bit#(4) positionVer = {1'b0,blockVer[0],pixelVer};
Bit#(17) tempProductB = signExtend(intraSumB) * signExtend(positionHor-3);
Bit#(17) tempProductC = signExtend(intraSumC) * signExtend(positionVer-3);
Bit#(17) tempTotal = tempProductB + tempProductC + zeroExtend(intraSumA) + 16;
if(tempTotal[16]==1)
predVector[pixelHor] = 0;
else if(tempTotal[15:5] > 255)
predVector[pixelHor] = 255;
else
predVector[pixelHor] = tempTotal[12:5];
end
outFlag = 1;
end
else
$display( "ERROR Prediction: intraProcessStep intraChroma plane unknown intraStepCount");
end
endcase
end
else
$display( "ERROR Prediction: intraProcessStep unknown intrastate");
 
if(outFlag==1)
begin
predictedfifo.enq(predVector);
pixelNum <= pixelNum+4;
if(pixelNum == 12)
begin
if(intraChromaFlag==0)
begin
blockNum <= blockNum+1;
if(blockNum == 15)
begin
intraChromaFlag <= 1;
intraStepCount <= 2;
end
else if(intrastate==Intra4x4)
intraStepCount <= 1;
end
else
begin
if(blockNum == 7)
begin
blockNum <= 0;
intraChromaFlag <= 0;
intraStepCount <= 0;
intra_chroma_pred_mode.deq();
end
else
begin
blockNum <= blockNum+1;
if(intra_chroma_pred_mode.first()==0)
intraStepCount <= 2;
else if(blockNum==3)
intraStepCount <= 2;
end
end
end
end
else
intraStepCount <= nextIntraStepCount;
//$display( "Trace Prediction: intraProcessStep");
endrule
 
interface Client mem_client_intra;
interface Get request = fifoToGet(intraMemReqQ);
interface Put response = fifoToPut(intraMemRespQ);
endinterface
interface Client mem_client_inter;
interface Get request = fifoToGet(interMemReqQ);
interface Put response = fifoToPut(interMemRespQ);
endinterface
interface Client mem_client_buffer = interpolator.mem_client;
 
interface Put ioin = fifoToPut(infifo);
interface Put ioin_InverseTrans = fifoToPut(infifo_ITB);
interface Get ioout = fifoToGet(outfifo);
 
endmodule
 
endpackage
/trunk/src/ICalc_nC.bsv
0,0 → 1,28
//**********************************************************************
// Interface for nC Calculator
//----------------------------------------------------------------------
//
//
//
 
package ICalc_nC;
 
import H264Types::*;
import GetPut::*;
import ClientServer::*;
 
interface Calc_nC;
method Action initialize_picWidth( Bit#(PicWidthSz) picWidthInMb );
method Action initialize( Bit#(PicAreaSz) firstMbAddr );
method Action loadMb( Bit#(PicAreaSz) mbAddr );
method Bit#(5) nCcalc_luma( Bit#(4) microBlockNum );
method Bit#(5) nCcalc_chroma( Bit#(3) microBlockNum );
method Action nNupdate_luma( Bit#(4) microBlockNum, Bit#(5) updataVal );
method Action nNupdate_chroma( Bit#(3) microBlockNum, Bit#(5) updataVal );
method Action nNupdate_pskip( Bit#(PicAreaSz) mb_skip_run );
method Action nNupdate_ipcm();
interface Client#(MemReq#(TAdd#(PicWidthSz,1),20),MemResp#(20)) mem_client;
endinterface
 
endpackage
 
/trunk/src/IH264.bsv
0,0 → 1,31
//**********************************************************************
// Interface for H264 Main Module
//----------------------------------------------------------------------
//
//
//
 
package IH264;
 
import H264Types::*;
import GetPut::*;
import ClientServer::*;
 
interface IH264;
 
// Interface for memory, input generator
interface Put#(InputGenOT) ioin;
interface Client#(MemReq#(TAdd#(PicWidthSz,1),20),MemResp#(20)) mem_clientED;
interface Client#(MemReq#(TAdd#(PicWidthSz,2),68),MemResp#(68)) mem_clientP_intra;
interface Client#(MemReq#(TAdd#(PicWidthSz,2),32),MemResp#(32)) mem_clientP_inter;
interface Client#(MemReq#(PicWidthSz,13),MemResp#(13)) mem_clientD_parameter;
interface Client#(MemReq#(TAdd#(PicWidthSz,5),32),MemResp#(32)) mem_clientD_data;
interface Client#(FrameBufferLoadReq,FrameBufferLoadResp) buffer_client_load1;
interface Client#(FrameBufferLoadReq,FrameBufferLoadResp) buffer_client_load2;
interface Get#(FrameBufferStoreReq) buffer_client_store;
interface Get#(BufferControlOT) ioout;
 
endinterface
 
endpackage
 
/trunk/src/IInverseTrans.bsv
0,0 → 1,23
//**********************************************************************
// Interface for Inverse Quantizer and Inverse Transformer
//----------------------------------------------------------------------
//
//
//
 
package IInverseTrans;
 
import H264Types::*;
import GetPut::*;
import ClientServer::*;
 
interface IInverseTrans;
 
// Interface for inter-module io
interface Put#(EntropyDecOT_InverseTrans) ioin;
interface Get#(InverseTransOT) ioout;
 
endinterface
 
endpackage
 
/trunk/src/IFrameBuffer.bsv
0,0 → 1,23
//**********************************************************************
// Interface for Frame Buffer
//----------------------------------------------------------------------
//
//
//
 
package IFrameBuffer;
 
import H264Types::*;
import ClientServer::*;
import GetPut::*;
 
interface IFrameBuffer;
 
// Interface from processor to cache
interface Server#(FrameBufferLoadReq,FrameBufferLoadResp) server_load1;
interface Server#(FrameBufferLoadReq,FrameBufferLoadResp) server_load2;
interface Put#(FrameBufferStoreReq) server_store;
 
endinterface
 
endpackage
/trunk/src/mkInputGen_park.bsv
0,0 → 1,41
//**********************************************************************
// Input Generator implementation
//----------------------------------------------------------------------
//
//
 
package mkInputGen;
 
import H264Types::*;
import IInputGen::*;
import RegFile::*;
import FIFO::*;
 
import Connectable::*;
import GetPut::*;
 
 
module mkInputGen( IInputGen );
 
RegFile#(Bit#(27), Bit#(8)) rfile <- mkRegFileLoad("720p50_parkrun_ter1-5.hex", 0, 1023205);
FIFO#(InputGenOT) outfifo <- mkFIFO;
Reg#(Bit#(27)) index <- mkReg(0);
 
rule output_byte (index < 1023206);
//$display( "ccl0inputbyte %x", rfile.sub(index) );
outfifo.enq(DataByte rfile.sub(index));
index <= index+1;
endrule
 
rule end_of_file (index == 1023206);
//$finish(0);
outfifo.enq(EndOfFile);
endrule
interface Get ioout = fifoToGet(outfifo);
endmodule
 
 
endpackage
/trunk/src/mkDeblockFilter.bsv
0,0 → 1,786
//**********************************************************************
// Deblocking Filter
//----------------------------------------------------------------------
//
//
 
package mkDeblockFilter;
 
import H264Types::*;
 
import IDeblockFilter::*;
import FIFO::*;
import Vector::*;
 
import Connectable::*;
import GetPut::*;
import ClientServer::*;
 
 
 
 
//-----------------------------------------------------------
// Local Datatypes
//-----------------------------------------------------------
 
 
typedef union tagged
{
void Passing; //not working on anything in particular
void Initialize;
void Horizontal;
void Vertical;
void Cleanup;
}
Process deriving(Eq,Bits);
 
 
 
//-----------------------------------------------------------
// Helper functions
 
 
function Bit#(8) absdiff8(Bit#(8) in0, Bit#(8) in1);
return (in1>=in0 ? in1-in0 : in0-in1);
endfunction
 
 
function Bool filter_test(Bit#(32) in_pixels, Bit#(8) alpha, Bit#(5) beta);
Bit#(8) p1 = in_pixels[7:0];
Bit#(8) p0 = in_pixels[15:8];
Bit#(8) q0 = in_pixels[23:16];
Bit#(8) q1 = in_pixels[31:24];
return((absdiff8(p0,q0) < alpha) &&
(absdiff8(p0,p1) < zeroExtend(beta)) &&
(absdiff8(q0,q1) < zeroExtend(beta)));
endfunction
 
 
function Bit#(6) clip3symmetric9to6(Bit#(9) val, Bit#(5) bound);
Int#(9) intval = unpack(val);
Int#(6) intbound = unpack({1'b0,bound});
Int#(6) intout = (intval<signExtend(-intbound) ? -intbound : (intval>signExtend(intbound) ? intbound : truncate(intval)));
return pack(intout);
endfunction
 
 
function Bit#(64) filter_input(Bit#(64) in_pixels, Bool chroma_flag, Bit#(3) bs, Bit#(8) alpha, Bit#(5) beta, Vector#(3,Bit#(5)) tc0_vector);
Bit#(8) p[4];
Bit#(8) q[4];
p[3] = in_pixels[7:0];
p[2] = in_pixels[15:8];
p[1] = in_pixels[23:16];
p[0] = in_pixels[31:24];
q[0] = in_pixels[39:32];
q[1] = in_pixels[47:40];
q[2] = in_pixels[55:48];
q[3] = in_pixels[63:56];
Bit#(8) p_out[4];
Bit#(8) q_out[4];
Bool a_p_test = absdiff8(p[2],p[0]) < zeroExtend(beta);
Bool a_q_test = absdiff8(q[2],q[0]) < zeroExtend(beta);
Bit#(9) p0q0 = zeroExtend(p[0])+zeroExtend(q[0]);
if (bs == 4)
begin
Bool small_gap_test = absdiff8(p[0],q[0]) < (alpha >> 2)+2;
Bit#(11) p_outtemp[3];
Bit#(11) q_outtemp[3];
if (!chroma_flag && a_p_test && small_gap_test)
begin
Bit#(11) sum = zeroExtend(p[1])+zeroExtend(p0q0);
p_outtemp[0] = (zeroExtend(p[2]) + (sum<<1) + zeroExtend(q[1]) + 4) >> 3;
p_outtemp[1] = (zeroExtend(p[2]) + sum + 2) >> 2;
p_outtemp[2] = (((zeroExtend(p[3])+zeroExtend(p[2]))<<1) + zeroExtend(p[2]) + sum + 4) >> 3;
end
else
begin
p_outtemp[0] = ((zeroExtend(p[1])<<1) + zeroExtend(p[0]) + zeroExtend(q[1]) + 2) >> 2;
p_outtemp[1] = zeroExtend(p[1]);
p_outtemp[2] = zeroExtend(p[2]);
end
if (!chroma_flag && a_q_test && small_gap_test)
begin
Bit#(11) sum = zeroExtend(q[1])+zeroExtend(p0q0);
q_outtemp[0] = (zeroExtend(p[1]) + (sum<<1) + zeroExtend(q[2]) + 4) >> 3;
q_outtemp[1] = (zeroExtend(q[2]) + sum + 2) >> 2;
q_outtemp[2] = (((zeroExtend(q[3])+zeroExtend(q[2]))<<1) + zeroExtend(q[2]) + sum + 4) >> 3;
end
else
begin
q_outtemp[0] = ((zeroExtend(q[1])<<1) + zeroExtend(q[0]) + zeroExtend(p[1]) + 2) >> 2;
q_outtemp[1] = zeroExtend(q[1]);
q_outtemp[2] = zeroExtend(q[2]);
end
p_out[0] = truncate(p_outtemp[0]);
p_out[1] = truncate(p_outtemp[1]);
p_out[2] = truncate(p_outtemp[2]);
q_out[0] = truncate(q_outtemp[0]);
q_out[1] = truncate(q_outtemp[1]);
q_out[2] = truncate(q_outtemp[2]);
end
else if(bs > 0)
begin
Bit#(5) t_c0 = tc0_vector[bs-1];
Bit#(5) t_c = chroma_flag ? t_c0+1 : t_c0 + (a_p_test ? 1:0) + (a_q_test ? 1:0);
Bit#(12) deltatemp = (((zeroExtend(q[0])-zeroExtend(p[0]))<<2)+zeroExtend(p[1])-zeroExtend(q[1])+4);
Bit#(6) delta = clip3symmetric9to6(deltatemp[11:3],t_c);
Bit#(10) p_out0temp = zeroExtend(p[0]) + signExtend(delta);
p_out[0] = (p_out0temp[9]==1 ? 0 : (p_out0temp[8]==1 ? 255 : p_out0temp[7:0]));
Bit#(10) q_out0temp = zeroExtend(q[0]) - signExtend(delta);
q_out[0] = (q_out0temp[9]==1 ? 0 : (q_out0temp[8]==1 ? 255 : q_out0temp[7:0]));
Bit#(9) p0q0PLUS1 = p0q0+1;
Bit#(8) p0q0_av = p0q0PLUS1[8:1];
if (!chroma_flag && a_p_test)
begin
Bit#(10) p_out1temp = zeroExtend(p[2]) + zeroExtend(p0q0_av) - (zeroExtend(p[1])<<1);
p_out[1] = p[1]+signExtend(clip3symmetric9to6(p_out1temp[9:1],t_c0));
end
else
p_out[1] = p[1];
if (!chroma_flag && a_q_test)
begin
Bit#(10) q_out1temp = zeroExtend(q[2]) + zeroExtend(p0q0_av) - (zeroExtend(q[1])<<1);
q_out[1] = q[1]+signExtend(clip3symmetric9to6(q_out1temp[9:1],t_c0));
end
else
q_out[1] = q[1];
p_out[2] = p[2];
q_out[2] = q[2];
end
else
begin
p_out[0] = p[0];
q_out[0] = q[0];
p_out[1] = p[1];
q_out[1] = q[1];
p_out[2] = p[2];
q_out[2] = q[2];
end
p_out[3] = p[3];
q_out[3] = q[3];
return({q_out[3], q_out[2], q_out[1], q_out[0], p_out[0], p_out[1], p_out[2], p_out[3]});
endfunction
 
 
 
//-----------------------------------------------------------
// Deblocking Filter Module
//-----------------------------------------------------------
 
 
(* synthesize *)
module mkDeblockFilter( IDeblockFilter );
 
FIFO#(EntropyDecOT) infifo <- mkSizedFIFO(deblockFilter_infifo_size);
FIFO#(DeblockFilterOT) outfifo <- mkFIFO();
 
FIFO#(MemReq#(TAdd#(PicWidthSz,5),32)) dataMemReqQ <- mkFIFO;
FIFO#(MemReq#(PicWidthSz,13)) parameterMemReqQ <- mkFIFO;
FIFO#(MemResp#(32)) dataMemRespQ <- mkFIFO;
FIFO#(MemResp#(13)) parameterMemRespQ <- mkFIFO;
 
Reg#(Process) process <- mkReg(Passing);
Reg#(Bit#(1)) chromaFlag <- mkReg(0);
Reg#(Bit#(5)) dataReqCount <- mkReg(0);
Reg#(Bit#(5)) dataRespCount <- mkReg(0);
Reg#(Bit#(4)) blockNum <- mkReg(0);
Reg#(Bit#(4)) pixelNum <- mkReg(0);
 
Reg#(Bool) filterTopMbEdgeFlag <- mkReg(False);
Reg#(Bool) filterLeftMbEdgeFlag <- mkReg(False);
Reg#(Bool) filterInternalEdgesFlag <- mkReg(False);
 
Reg#(Bit#(PicWidthSz)) picWidth <- mkReg(maxPicWidthInMB);
Reg#(Bit#(PicHeightSz)) picHeight <- mkReg(0);
Reg#(Bit#(PicAreaSz)) firstMb <- mkReg(0);
Reg#(Bit#(PicAreaSz)) currMb <- mkReg(0);
Reg#(Bit#(PicAreaSz)) currMbHor <- mkReg(0);//horizontal position of currMb
Reg#(Bit#(PicHeightSz)) currMbVer <- mkReg(0);//vertical position of currMb
 
Reg#(Bit#(2)) disable_deblocking_filter_idc <- mkReg(0);
Reg#(Bit#(5)) slice_alpha_c0_offset <- mkReg(0);
Reg#(Bit#(5)) slice_beta_offset <- mkReg(0);
 
Reg#(Bit#(6)) curr_qpy <- mkReg(0);
Reg#(Bit#(6)) left_qpy <- mkReg(0);
Reg#(Bit#(6)) top_qpy <- mkReg(0);
Reg#(Bit#(6)) curr_qpc <- mkReg(0);
Reg#(Bit#(6)) left_qpc <- mkReg(0);
Reg#(Bit#(6)) top_qpc <- mkReg(0);
Reg#(Bit#(1)) curr_intra <- mkReg(0);
Reg#(Bit#(1)) left_intra <- mkReg(0);
Reg#(Bit#(1)) top_intra <- mkReg(0);
 
Reg#(Bit#(8)) alphaMbEdge <- mkReg(0);
Reg#(Bit#(8)) alphaInternal <- mkReg(0);
Reg#(Bit#(5)) betaMbEdge <- mkReg(0);
Reg#(Bit#(5)) betaInternal <- mkReg(0);
Reg#(Vector#(3,Bit#(5))) tc0MbEdge <- mkRegU();
Reg#(Vector#(3,Bit#(5))) tc0Internal <- mkRegU();
 
Bit#(8) alpha_table[52] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 4, 4, 5, 6,
7, 8, 9, 10, 12, 13, 15, 17, 20, 22,
25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
80, 90,101,113,127,144,162,182,203,226,
255,255};
Bit#(5) beta_table[52] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 2, 2, 2, 3,
3, 3, 3, 4, 4, 4, 6, 6, 7, 7,
8, 8, 9, 9, 10, 10, 11, 11, 12, 12,
13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
18, 18};
Bit#(5) tc0_table[52][3] = {{ 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
{ 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
{ 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 1 },
{ 0, 0, 1 }, { 0, 0, 1 }, { 0, 0, 1 }, { 0, 1, 1 }, { 0, 1, 1 }, { 1, 1, 1 },
{ 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 2 }, { 1, 1, 2 }, { 1, 1, 2 },
{ 1, 1, 2 }, { 1, 2, 3 }, { 1, 2, 3 }, { 2, 2, 3 }, { 2, 2, 4 }, { 2, 3, 4 },
{ 2, 3, 4 }, { 3, 3, 5 }, { 3, 4, 6 }, { 3, 4, 6 }, { 4, 5, 7 }, { 4, 5, 8 },
{ 4, 6, 9 }, { 5, 7,10 }, { 6, 8,11 }, { 6, 8,13 }, { 7,10,14 }, { 8,11,16 },
{ 9,12,18 }, {10,13,20 }, {11,15,23 }, {13,17,25 }};
 
Reg#(Vector#(64,Bit#(32))) workVector <- mkRegU();
Reg#(Vector#(96,Bit#(32))) leftVector <- mkRegU();
Reg#(Vector#(16,Bit#(32))) topVector <- mkRegU();
 
Reg#(Bool) startLastOutput <- mkReg(False);
Reg#(Bool) outputingFinished <- mkReg(False);
Reg#(Bit#(2)) colNum <- mkReg(0);
Reg#(Bit#(2)) rowNum <- mkReg(0);
 
RFile1#(Bit#(4),Tuple2#(Bit#(3),Bit#(3))) bSfile <- mkRFile1Full();
 
 
//-----------------------------------------------------------
// Rules
 
 
rule checkFIFO ( True );
$display( "Trace DeblockFilter: checkFIFO %h", infifo.first() );
endrule
rule passing ( process matches Passing );
case (infifo.first()) matches
tagged NewUnit . xdata :
begin
infifo.deq();
outfifo.enq(EDOT infifo.first());
$display("ccl5newunit");
$display("ccl5rbspbyte %h", xdata);
end
tagged SPSpic_width_in_mbs .xdata :
begin
infifo.deq();
outfifo.enq(EDOT infifo.first());
picWidth <= xdata;
end
tagged SPSpic_height_in_map_units .xdata :
begin
infifo.deq();
outfifo.enq(EDOT infifo.first());
picHeight <= xdata;
end
tagged PPSdeblocking_filter_control_present_flag .xdata :
begin
infifo.deq();
if (xdata == 0)
begin
disable_deblocking_filter_idc <= 0;
slice_alpha_c0_offset <= 0;
slice_beta_offset <= 0;
end
end
tagged SHfirst_mb_in_slice .xdata :
begin
infifo.deq();
outfifo.enq(EDOT infifo.first());
firstMb <= xdata;
currMb <= xdata;
currMbHor <= xdata;
currMbVer <= 0;
end
tagged SHdisable_deblocking_filter_idc .xdata :
begin
infifo.deq();
disable_deblocking_filter_idc <= xdata;
end
tagged SHslice_alpha_c0_offset .xdata :
begin
infifo.deq();
slice_alpha_c0_offset <= xdata;
end
tagged SHslice_beta_offset .xdata :
begin
infifo.deq();
slice_beta_offset <= xdata;
end
tagged IBTmb_qp .xdata :
begin
infifo.deq();
curr_qpy <= xdata.qpy;
curr_qpc <= xdata.qpc;
end
tagged PBbS .xdata :
begin
process <= Initialize;
end
tagged PBoutput .xdata :
begin
$display( "ERROR Deblocking Filter: passing PBoutput");
end
tagged EndOfFile :
begin
infifo.deq();
outfifo.enq(EDOT infifo.first());
$display( "ccl5: EndOfFile reached");
//$finish(0);
end
default:
begin
infifo.deq();
outfifo.enq(EDOT infifo.first());
end
endcase
endrule
 
 
rule currMbHorUpdate( !(currMbHor<zeroExtend(picWidth)) );
Bit#(PicAreaSz) temp = zeroExtend(picWidth);
if((currMbHor >> 3) >= temp)
begin
currMbHor <= currMbHor - (temp << 3);
currMbVer <= currMbVer + 8;
end
else
begin
currMbHor <= currMbHor - temp;
currMbVer <= currMbVer + 1;
end
endrule
 
rule initialize ( process==Initialize && currMbHor<zeroExtend(picWidth) );
//$display( "TRACE Deblocking Filter: initialize %0d", currMb);
process <= Horizontal;
dataReqCount <= 1;
dataRespCount <= 1;
filterTopMbEdgeFlag <= !(currMb<zeroExtend(picWidth) || disable_deblocking_filter_idc==1 || (disable_deblocking_filter_idc==2 && currMb-firstMb<zeroExtend(picWidth)));
filterLeftMbEdgeFlag <= !(currMbHor==0 || disable_deblocking_filter_idc==1 || (disable_deblocking_filter_idc==2 && currMb==firstMb));
filterInternalEdgesFlag <= !(disable_deblocking_filter_idc==1);
blockNum <= 0;
pixelNum <= 0;
Bit#(6) curr_qp = (chromaFlag==0 ? curr_qpy : curr_qpc);
Bit#(6) left_qp = (chromaFlag==0 ? left_qpy : left_qpc);
Bit#(7) qpavtemp = zeroExtend(curr_qp)+zeroExtend(left_qp)+1;
Bit#(6) qpav = qpavtemp[6:1];
Bit#(8) indexAtemp = zeroExtend(qpav)+signExtend(slice_alpha_c0_offset);
Bit#(8) indexBtemp = zeroExtend(qpav)+signExtend(slice_beta_offset);
Bit#(6) indexA = (indexAtemp[7]==1 ? 0 : (indexAtemp[6:0]>51 ? 51 : indexAtemp[5:0]));
Bit#(6) indexB = (indexBtemp[7]==1 ? 0 : (indexBtemp[6:0]>51 ? 51 : indexBtemp[5:0]));
alphaMbEdge <= alpha_table[indexA];
betaMbEdge <= beta_table[indexB];
Vector#(3,Bit#(5)) tc0temp = arrayToVector(tc0_table[indexA]);
tc0MbEdge <= tc0temp;
endrule
 
 
rule dataSendReq ( dataReqCount>0 && currMbHor<zeroExtend(picWidth) );
//$display( "TRACE Deblocking Filter: dataSendReq %0d", dataReqCount);
Bit#(PicWidthSz) temp = truncate(currMbHor);
if(currMb<zeroExtend(picWidth))
dataReqCount <= 0;
else
begin
if(dataReqCount==1)
parameterMemReqQ.enq(LoadReq temp);
Bit#(4) temp2 = truncate(dataReqCount-1);
let temp3 = {temp,chromaFlag,temp2};
dataMemReqQ.enq(LoadReq temp3);
if(dataReqCount==16)
dataReqCount <= 0;
else
dataReqCount <= dataReqCount+1;
end
endrule
 
 
rule dataReceiveNoResp ( dataRespCount>0 && currMb<zeroExtend(picWidth) && currMb-firstMb<zeroExtend(picWidth) );
//$display( "TRACE Deblocking Filter: dataReceiveNoResp");
dataRespCount <= 0;
endrule
 
rule dataReceiveResp ( dataRespCount>0 && !(currMb<zeroExtend(picWidth)) && currMbHor<zeroExtend(picWidth) );
//$display( "TRACE Deblocking Filter: dataReceiveResp %0d", dataRespCount);
Bit#(4) temp = truncate(dataRespCount-1);
Vector#(16,Bit#(32)) topVectorNext = topVector;
if(dataRespCount==1)
begin
Bit#(13) tempParameters=0;
if(parameterMemRespQ.first() matches tagged LoadResp .xdata)
tempParameters = xdata;
top_qpy <= tempParameters[5:0];
top_qpc <= tempParameters[11:6];
top_intra <= tempParameters[12];
parameterMemRespQ.deq();
end
if(dataRespCount==16)
dataRespCount <= 0;
else
dataRespCount <= dataRespCount+1;
if(dataMemRespQ.first() matches tagged LoadResp .xdata)
topVectorNext[temp] = xdata;
dataMemRespQ.deq();
topVector <= topVectorNext;
//$display( "TRACE Deblocking Filter: dataReceiveResp topVector %h %h %h %h %h %h %h %h %h %h %h %h %h %h %h %h", topVector[0], topVector[1], topVector[2], topVector[3], topVector[4], topVector[5], topVector[6], topVector[7], topVector[8], topVector[9], topVector[10], topVector[11], topVector[12], topVector[13], topVector[14], topVector[15]);
endrule
 
 
rule horizontal ( process==Horizontal && currMbHor<zeroExtend(picWidth) );
//$display( "TRACE Deblocking Filter: horizontal %0d %0d %0d", blockNum, pixelNum, infifo.first());
Bit#(2) blockHor = {blockNum[2],blockNum[0]};
Bit#(2) blockVer = {blockNum[3],blockNum[1]};
Bit#(2) pixelVer = {pixelNum[3],pixelNum[2]};
Vector#(96,Bit#(32)) leftVectorNext = leftVector;
Vector#(64,Bit#(32)) workVectorNext = workVector;
Bool leftEdge = (blockNum[0]==0 && (blockNum[2]==0 || chromaFlag==1));
if(blockNum==0 && pixelNum==0)
begin
Bit#(6) qpav = (chromaFlag==0 ? curr_qpy : curr_qpc);
Bit#(8) indexAtemp = zeroExtend(qpav)+signExtend(slice_alpha_c0_offset);
Bit#(8) indexBtemp = zeroExtend(qpav)+signExtend(slice_beta_offset);
Bit#(6) indexA = (indexAtemp[7]==1 ? 0 : (indexAtemp[6:0]>51 ? 51 : indexAtemp[5:0]));
Bit#(6) indexB = (indexBtemp[7]==1 ? 0 : (indexBtemp[6:0]>51 ? 51 : indexBtemp[5:0]));
alphaInternal <= alpha_table[indexA];
betaInternal <= beta_table[indexB];
Vector#(3,Bit#(5)) tc0temp = arrayToVector(tc0_table[indexA]);
tc0Internal <= tc0temp;
end
case (infifo.first()) matches
tagged PBbS .xdata :
begin
infifo.deq();
bSfile.upd(blockNum,tuple2(xdata.bShor,xdata.bSver));
end
tagged PBoutput .xdata :
begin
infifo.deq();
Bit#(6) addrq = {blockHor,blockVer,pixelVer};
Bit#(7) addrpLeft = (chromaFlag==0 ? {3'b011,blockVer,pixelVer} : {2'b10,blockHor[1],1'b1,blockVer[0],pixelVer});
Bit#(6) addrpCurr = {(blockHor-1),blockVer,pixelVer};
Bit#(32) pixelq = {xdata[3],xdata[2],xdata[1],xdata[0]};
Bit#(32) pixelp;
if(leftEdge)
pixelp = leftVector[addrpLeft];
else
pixelp = workVector[addrpCurr];
Bit#(64) result = {pixelq,pixelp};
if(leftEdge && filterLeftMbEdgeFlag)
begin
if(filter_test({pixelq[15:0],pixelp[31:16]},alphaMbEdge,betaMbEdge))
result = filter_input({pixelq,pixelp},chromaFlag==1,tpl_1(bSfile.sub((chromaFlag==0?blockNum:{blockNum[1:0],pixelVer[1],1'b0}))),alphaMbEdge,betaMbEdge,tc0MbEdge);
end
else if(!leftEdge && filterInternalEdgesFlag)
begin
if(filter_test({pixelq[15:0],pixelp[31:16]},alphaInternal,betaInternal))
result = filter_input({pixelq,pixelp},chromaFlag==1,tpl_1(bSfile.sub((chromaFlag==0?blockNum:{blockNum[1:0],pixelVer[1],1'b0}))),alphaInternal,betaInternal,tc0Internal);
end
if(leftEdge)
leftVectorNext[addrpLeft] = result[31:0];
else
workVectorNext[addrpCurr] = result[31:0];
workVectorNext[addrq] = result[63:32];
leftVector <= leftVectorNext;
workVector <= workVectorNext;
if(pixelNum==12 && (blockNum==15 || (blockNum==7 && chromaFlag==1)))
begin
blockNum <= 0;
process <= Vertical;
startLastOutput <= False;
outputingFinished <= False;
colNum <= 0;
if(filterTopMbEdgeFlag)
rowNum <= 0;
else
rowNum <= 1;
Bit#(6) curr_qp = (chromaFlag==0 ? curr_qpy : curr_qpc);
Bit#(6) top_qp = (chromaFlag==0 ? top_qpy : top_qpc);
Bit#(7) qpavtemp = zeroExtend(curr_qp)+zeroExtend(top_qp)+1;
Bit#(6) qpav = qpavtemp[6:1];
Bit#(8) indexAtemp = zeroExtend(qpav)+signExtend(slice_alpha_c0_offset);
Bit#(8) indexBtemp = zeroExtend(qpav)+signExtend(slice_beta_offset);
Bit#(6) indexA = (indexAtemp[7]==1 ? 0 : (indexAtemp[6:0]>51 ? 51 : indexAtemp[5:0]));
Bit#(6) indexB = (indexBtemp[7]==1 ? 0 : (indexBtemp[6:0]>51 ? 51 : indexBtemp[5:0]));
alphaMbEdge <= alpha_table[indexA];
betaMbEdge <= beta_table[indexB];
Vector#(3,Bit#(5)) tc0temp = arrayToVector(tc0_table[indexA]);
tc0MbEdge <= tc0temp;
end
else if(pixelNum==12)
blockNum <= blockNum+1;
pixelNum <= pixelNum+4;
end
default: $display( "ERROR Deblocking Filter: horizontal non-PBoutput input");
endcase
endrule
 
 
rule vertical ( process==Vertical && !startLastOutput && dataRespCount==0 && currMbHor<zeroExtend(picWidth) );
//$display( "TRACE Deblocking Filter: vertical %0d %0d", colNum, rowNum);
//$display( "TRACE Deblocking Filter: vertical topVector %h %h %h %h %h %h %h %h %h %h %h %h %h %h %h %h", topVector[0], topVector[1], topVector[2], topVector[3], topVector[4], topVector[5], topVector[6], topVector[7], topVector[8], topVector[9], topVector[10], topVector[11], topVector[12], topVector[13], topVector[14], topVector[15]);
Bool topEdge = (rowNum==0);
Vector#(64,Bit#(32)) workVectorNext = workVector;
Vector#(16,Bit#(32)) topVectorNext = topVector;
Vector#(64,Bit#(32)) workV = workVector;
Vector#(4,Bit#(32)) tempV = replicate(0);
Vector#(4,Bit#(64)) resultV = replicate(0);
Bit#(8) alpha;
Bit#(5) beta;
Vector#(3,Bit#(5)) tc0;
Bit#(4) crNum = {colNum,rowNum};
if(topEdge)
begin
tempV[0] = topVector[{colNum,2'b00}];
tempV[1] = topVector[{colNum,2'b01}];
tempV[2] = topVector[{colNum,2'b10}];
tempV[3] = topVector[{colNum,2'b11}];
alpha = alphaMbEdge;
beta = betaMbEdge;
tc0 = tc0MbEdge;
end
else
begin
tempV[0] = workV[{(crNum-1),2'b00}];
tempV[1] = workV[{(crNum-1),2'b01}];
tempV[2] = workV[{(crNum-1),2'b10}];
tempV[3] = workV[{(crNum-1),2'b11}];
alpha = alphaInternal;
beta = betaInternal;
tc0 = tc0Internal;
end
resultV[0] = {workV[{crNum,2'b11}][7:0],workV[{crNum,2'b10}][7:0],workV[{crNum,2'b01}][7:0],workV[{crNum,2'b00}][7:0],tempV[3][7:0],tempV[2][7:0],tempV[1][7:0],tempV[0][7:0]};
resultV[1] = {workV[{crNum,2'b11}][15:8],workV[{crNum,2'b10}][15:8],workV[{crNum,2'b01}][15:8],workV[{crNum,2'b00}][15:8],tempV[3][15:8],tempV[2][15:8],tempV[1][15:8],tempV[0][15:8]};
resultV[2] = {workV[{crNum,2'b11}][23:16],workV[{crNum,2'b10}][23:16],workV[{crNum,2'b01}][23:16],workV[{crNum,2'b00}][23:16],tempV[3][23:16],tempV[2][23:16],tempV[1][23:16],tempV[0][23:16]};
resultV[3] = {workV[{crNum,2'b11}][31:24],workV[{crNum,2'b10}][31:24],workV[{crNum,2'b01}][31:24],workV[{crNum,2'b00}][31:24],tempV[3][31:24],tempV[2][31:24],tempV[1][31:24],tempV[0][31:24]};
if(filter_test({workV[{crNum,2'b01}][7:0],workV[{crNum,2'b00}][7:0],tempV[3][7:0],tempV[2][7:0]},alpha,beta))
resultV[0] = filter_input(resultV[0],chromaFlag==1,tpl_2(bSfile.sub((chromaFlag==0?{rowNum[1],colNum[1],rowNum[0],colNum[0]}:{rowNum[0],colNum[0],2'b00}))),alpha,beta,tc0);
if(filter_test({workV[{crNum,2'b01}][15:8],workV[{crNum,2'b00}][15:8],tempV[3][15:8],tempV[2][15:8]},alpha,beta))
resultV[1] = filter_input(resultV[1],chromaFlag==1,tpl_2(bSfile.sub((chromaFlag==0?{rowNum[1],colNum[1],rowNum[0],colNum[0]}:{rowNum[0],colNum[0],2'b00}))),alpha,beta,tc0);
if(filter_test({workV[{crNum,2'b01}][23:16],workV[{crNum,2'b00}][23:16],tempV[3][23:16],tempV[2][23:16]},alpha,beta))
resultV[2] = filter_input(resultV[2],chromaFlag==1,tpl_2(bSfile.sub((chromaFlag==0?{rowNum[1],colNum[1],rowNum[0],colNum[0]}:{rowNum[0],colNum[0],2'b01}))),alpha,beta,tc0);
if(filter_test({workV[{crNum,2'b01}][31:24],workV[{crNum,2'b00}][31:24],tempV[3][31:24],tempV[2][31:24]},alpha,beta))
resultV[3] = filter_input(resultV[3],chromaFlag==1,tpl_2(bSfile.sub((chromaFlag==0?{rowNum[1],colNum[1],rowNum[0],colNum[0]}:{rowNum[0],colNum[0],2'b01}))),alpha,beta,tc0);
if(topEdge)
begin
topVectorNext[{colNum,2'b00}] = {resultV[3][7:0],resultV[2][7:0],resultV[1][7:0],resultV[0][7:0]};
topVectorNext[{colNum,2'b01}] = {resultV[3][15:8],resultV[2][15:8],resultV[1][15:8],resultV[0][15:8]};
topVectorNext[{colNum,2'b10}] = {resultV[3][23:16],resultV[2][23:16],resultV[1][23:16],resultV[0][23:16]};
topVectorNext[{colNum,2'b11}] = {resultV[3][31:24],resultV[2][31:24],resultV[1][31:24],resultV[0][31:24]};
end
else
begin
workVectorNext[{(crNum-1),2'b00}] = {resultV[3][7:0],resultV[2][7:0],resultV[1][7:0],resultV[0][7:0]};
workVectorNext[{(crNum-1),2'b01}] = {resultV[3][15:8],resultV[2][15:8],resultV[1][15:8],resultV[0][15:8]};
workVectorNext[{(crNum-1),2'b10}] = {resultV[3][23:16],resultV[2][23:16],resultV[1][23:16],resultV[0][23:16]};
workVectorNext[{(crNum-1),2'b11}] = {resultV[3][31:24],resultV[2][31:24],resultV[1][31:24],resultV[0][31:24]};
end
workVectorNext[{crNum,2'b00}] = {resultV[3][39:32],resultV[2][39:32],resultV[1][39:32],resultV[0][39:32]};
workVectorNext[{crNum,2'b01}] = {resultV[3][47:40],resultV[2][47:40],resultV[1][47:40],resultV[0][47:40]};
workVectorNext[{crNum,2'b10}] = {resultV[3][55:48],resultV[2][55:48],resultV[1][55:48],resultV[0][55:48]};
workVectorNext[{crNum,2'b11}] = {resultV[3][63:56],resultV[2][63:56],resultV[1][63:56],resultV[0][63:56]};
if(topEdge)
topVector <= topVectorNext;
workVector <= workVectorNext;
if(rowNum==3 || (chromaFlag==1 && rowNum==1))
begin
if(colNum==3)
startLastOutput <= True;
else
begin
if(filterTopMbEdgeFlag)
rowNum <= 0;
else
rowNum <= 1;
end
colNum <= colNum+1;
end
else
rowNum <= rowNum+1;
endrule
 
 
rule outputing ( process==Vertical && !outputingFinished && currMbHor<zeroExtend(picWidth) );
//$display( "TRACE Deblocking Filter: outputting %0d %0d", blockNum, pixelNum);
Bit#(2) blockHor = pixelNum[1:0];
Bit#(2) blockVer = blockNum[1:0];
Bit#(2) pixelVer = pixelNum[3:2];
Bit#(PicWidthSz) currMbHorT = truncate(currMbHor);
Bool stalling = False;
if(currMb==0)
begin
if(startLastOutput)
outputingFinished <= True;
end
else
begin
Bit#(7) leftAddr;
if(chromaFlag==0)
leftAddr = {1'b0,blockHor,blockVer,pixelVer};
else
leftAddr = {2'b10,blockHor,blockVer[0],pixelVer};
Bit#(32) leftData = leftVector[leftAddr];
if(!(blockNum==3 || (blockNum==1 && chromaFlag==1)))
begin
if(chromaFlag==0)
outfifo.enq(DFBLuma {ver:{(currMbHorT==0 ? currMbVer-1 : currMbVer),blockVer,pixelVer},hor:{(currMbHorT==0 ? picWidth-1 : currMbHorT-1),blockHor},data:leftData});
else
outfifo.enq(DFBChroma {uv:blockHor[1],ver:{(currMbHorT==0 ? currMbVer-1 : currMbVer),blockVer[0],pixelVer},hor:{(currMbHorT==0 ? picWidth-1 : currMbHorT-1),blockHor[0]},data:leftData});
end
else if(startLastOutput)
begin
Bit#(PicWidthSz) temp = ((currMbHor==0) ? (picWidth-1) : truncate(currMbHor-1));
dataMemReqQ.enq(StoreReq {addr:{temp,chromaFlag,blockHor,pixelVer},data:leftData});
if(currMbVer > 0)
begin
//$display( "TRACE Deblocking Filter: outputting last output %0d %0d %h", blockHor, pixelVer, topVector[{blockHor,pixelVer}]);
Bit#(32) topData = topVector[{blockHor,pixelVer}];
if(chromaFlag==0)
outfifo.enq(DFBLuma {ver:{currMbVer-1,2'b11,pixelVer},hor:{currMbHorT,blockHor},data:topData});
else
outfifo.enq(DFBChroma {uv:blockHor[1],ver:{currMbVer-1,1'b1,pixelVer},hor:{currMbHorT,blockHor[0]},data:topData});
end
end
else
stalling = True;
if(!stalling)
begin
if(pixelNum==15)
begin
if(blockNum==3 || (chromaFlag==1 && blockNum==1))
begin
if(currMbVer==picHeight-1)
blockNum <= (chromaFlag==0 ? 3 : 1);
else
blockNum <= 0;
outputingFinished <= True;
end
else
blockNum <= blockNum+1;
end
pixelNum <= pixelNum+1;
end
end
endrule
 
 
rule verticaltocleanup ( process==Vertical && startLastOutput && outputingFinished);
process <= Cleanup;
startLastOutput <= False;
outputingFinished <= False;
endrule
 
 
rule cleanup ( process==Cleanup && currMbHor<zeroExtend(picWidth) );
//$display( "TRACE Deblocking Filter: cleanup %0d %0d", blockNum, pixelNum);
Bit#(2) blockHor = pixelNum[1:0];
Bit#(2) blockVer = blockNum[1:0];
Bit#(2) pixelVer = pixelNum[3:2];
Bit#(PicWidthSz) currMbHorT = truncate(currMbHor);
Vector#(96,Bit#(32)) leftVectorNext = leftVector;
if(blockNum==0)
begin
if(chromaFlag==0)
begin
for(Integer ii=0; ii<64; ii=ii+1)
leftVectorNext[fromInteger(ii)] = workVector[fromInteger(ii)];
chromaFlag <= 1;
process <= Initialize;
end
else
begin
for(Integer ii=0; ii<32; ii=ii+1)
begin
Bit#(5) tempAddr = fromInteger(ii);
leftVectorNext[{2'b10,tempAddr}] = workVector[{tempAddr[4:3],1'b0,tempAddr[2:0]}];
end
chromaFlag <= 0;
process <= Passing;
Bit#(PicWidthSz) temp = truncate(currMbHor);
parameterMemReqQ.enq(StoreReq {addr:temp,data:{curr_intra,curr_qpc,curr_qpy}});
left_intra <= curr_intra;
left_qpc <= curr_qpc;
left_qpy <= curr_qpy;
currMb <= currMb+1;
currMbHor <= currMbHor+1;
if(currMbVer==picHeight-1 && currMbHor==zeroExtend(picWidth-1))
outfifo.enq(EndOfFrame);
end
leftVector <= leftVectorNext;
end
else if(blockNum < 8)
begin
Bit#(7) leftAddr;
if(chromaFlag==0)
leftAddr = {1'b0,blockHor,blockVer,pixelVer};
else
leftAddr = {2'b10,blockHor,blockVer[0],pixelVer};
Bit#(32) leftData = leftVector[leftAddr];
if(chromaFlag==0)
outfifo.enq(DFBLuma {ver:{(currMbHorT==0 ? currMbVer-1 : currMbVer),blockVer,pixelVer},hor:{(currMbHorT==0 ? picWidth-1 : currMbHorT-1),blockHor},data:leftData});
else
outfifo.enq(DFBChroma {uv:blockHor[1],ver:{(currMbHorT==0 ? currMbVer-1 : currMbVer),blockVer[0],pixelVer},hor:{(currMbHorT==0 ? picWidth-1 : currMbHorT-1),blockHor[0]},data:leftData});
if(pixelNum==15)
begin
if(currMbHor==zeroExtend(picWidth-1))
blockNum <= 8;
else
blockNum <= 0;
end
pixelNum <= pixelNum+1;
end
else
begin
Bit#(6) currAddr = {blockHor,blockVer,pixelVer};
Bit#(32) currData = workVector[currAddr];
if(chromaFlag==0)
outfifo.enq(DFBLuma {ver:{currMbVer,blockVer,pixelVer},hor:{currMbHorT,blockHor},data:currData});
else
outfifo.enq(DFBChroma {uv:blockHor[1],ver:{currMbVer,blockVer[0],pixelVer},hor:{currMbHorT,blockHor[0]},data:currData});
if(pixelNum==15)
begin
if(blockNum[1:0]==3 || (blockNum[1:0]==1 && chromaFlag==1))
blockNum <= 0;
else
blockNum <= blockNum+1;
end
pixelNum <= pixelNum+1;
end
endrule
 
 
 
interface Client mem_client_data;
interface Get request = fifoToGet(dataMemReqQ);
interface Put response = fifoToPut(dataMemRespQ);
endinterface
 
interface Client mem_client_parameter;
interface Get request = fifoToGet(parameterMemReqQ);
interface Put response = fifoToPut(parameterMemRespQ);
endinterface
 
interface Put ioin = fifoToPut(infifo);
interface Get ioout = fifoToGet(outfifo);
endmodule
 
endpackage
/trunk/src/mkInputGen_nodeblock.bsv
0,0 → 1,41
//**********************************************************************
// Input Generator implementation
//----------------------------------------------------------------------
//
//
 
package mkInputGen;
 
import H264Types::*;
import IInputGen::*;
import RegFile::*;
import FIFO::*;
 
import Connectable::*;
import GetPut::*;
 
 
module mkInputGen( IInputGen );
 
RegFile#(Bit#(27), Bit#(8)) rfile <- mkRegFileLoad("foreman_qcif1-5_no_deblock.hex", 0, 7476);
FIFO#(InputGenOT) outfifo <- mkFIFO;
Reg#(Bit#(27)) index <- mkReg(0);
 
rule output_byte (index < 7477);
//$display( "ccl0inputbyte %x", rfile.sub(index) );
outfifo.enq(DataByte rfile.sub(index));
index <= index+1;
endrule
 
rule end_of_file (index == 7477);
//$finish(0);
outfifo.enq(EndOfFile);
endrule
interface Get ioout = fifoToGet(outfifo);
endmodule
 
 
endpackage
/trunk/src/mkFinalOutput.bsv
0,0 → 1,44
//**********************************************************************
// final output implementation
//----------------------------------------------------------------------
//
//
 
package mkFinalOutput;
 
import H264Types::*;
import IFinalOutput::*;
import FIFO::*;
 
import Connectable::*;
import GetPut::*;
 
//-----------------------------------------------------------
// Final Output Module
//-----------------------------------------------------------
 
module mkFinalOutput( IFinalOutput );
 
FIFO#(BufferControlOT) infifo <- mkFIFO;
 
//-----------------------------------------------------------
// Rules
rule finalout (True);
if(infifo.first() matches tagged YUV .xdata)
begin
$display("ccl5finalout %h", xdata[7:0]);
$display("ccl5finalout %h", xdata[15:8]);
$display("ccl5finalout %h", xdata[23:16]);
$display("ccl5finalout %h", xdata[31:24]);
infifo.deq();
end
else
$finish(0);
endrule
 
 
interface Put ioin = fifoToPut(infifo);
 
endmodule
 
endpackage
/trunk/src/IInputGen.bsv
0,0 → 1,21
//**********************************************************************
// Interface for input generator
//----------------------------------------------------------------------
//
//
//
 
package IInputGen;
 
import H264Types::*;
import GetPut::*;
 
interface IInputGen;
 
// Interface for inter-module io
interface Get#(InputGenOT) ioout;
 
endinterface
 
endpackage
 
/trunk/src/mkInputGen_park20inter.bsv
0,0 → 1,41
//**********************************************************************
// Input Generator implementation
//----------------------------------------------------------------------
//
//
 
package mkInputGen;
 
import H264Types::*;
import IInputGen::*;
import RegFile::*;
import FIFO::*;
 
import Connectable::*;
import GetPut::*;
 
 
module mkInputGen( IInputGen );
 
RegFile#(Bit#(27), Bit#(8)) rfile <- mkRegFileLoad("720p50_parkrun_ter1-20inter.hex", 0, 2282510);
FIFO#(InputGenOT) outfifo <- mkFIFO;
Reg#(Bit#(27)) index <- mkReg(0);
 
rule output_byte (index < 2282511);
//$display( "ccl0inputbyte %x", rfile.sub(index) );
outfifo.enq(DataByte rfile.sub(index));
index <= index+1;
endrule
 
rule end_of_file (index == 2282511);
//$finish(0);
outfifo.enq(EndOfFile);
endrule
interface Get ioout = fifoToGet(outfifo);
endmodule
 
 
endpackage
/trunk/src/mkEntropyDec.bsv
0,0 → 1,1656
//**********************************************************************
// Entropy Decoder implementation
//----------------------------------------------------------------------
//
//
 
package mkEntropyDec;
 
import H264Types::*;
import ExpGolomb::*;
import CAVLC::*;
import ICalc_nC::*;
import mkCalc_nC::*;
import IEntropyDec::*;
import FIFO::*;
 
import Connectable::*;
import GetPut::*;
import ClientServer::*;
 
 
//-----------------------------------------------------------
// Local Datatypes
//-----------------------------------------------------------
 
typedef union tagged
{
void Start; //special state that initializes the process.
void NewUnit; //special state that checks the NAL unit type.
Bit#(5) CodedSlice; //decodes a type of NAL unit
void SEI; //decodes a type of NAL unit
Bit#(5) SPS; //decodes a type of NAL unit
Bit#(5) PPS; //decodes a type of NAL unit
void AUD; //decodes a type of NAL unit
void EndSequence; //decodes a type of NAL unit
void EndStream; //decodes a type of NAL unit
void Filler; //decodes a type of NAL unit
 
Bit#(5) SliceData; //decodes slice data (part of a CodedSlice NAL unit)
Bit#(5) MacroblockLayer; //decodes macroblock layer (part of a CodedSlice NAL unit)
Bit#(5) MbPrediction; //decodes macroblock prediction (part of a CodedSlice NAL unit)
Bit#(5) SubMbPrediction; //decodes sub-macroblock prediction (part of a CodedSlice NAL unit)
Bit#(5) ResidualBlock; //decodes residual block (part of a CodedSlice NAL unit)
}
State deriving(Eq,Bits);
 
 
//-----------------------------------------------------------
// Helper functions
function MbType mbtype_convert( Bit#(5) in_mb_type, Bit#(4) in_slice_type );//converts mb_type syntax element to MbType type
Bit#(5) tempmb = in_mb_type;
if(in_slice_type == 2 || in_slice_type == 7)//I slice
tempmb = in_mb_type+5;
case ( tempmb )
0: return P_L0_16x16;
1: return P_L0_L0_16x8;
2: return P_L0_L0_8x16;
3: return P_8x8;
4: return P_8x8ref0;
5: return I_NxN;
30: return I_PCM;
default:
begin
Bit#(5) tempmb16x16 = tempmb-6;
Bit#(2) tempv1 = tempmb16x16[1:0];
Bit#(2) tempv2;
Bit#(1) tempv3;
if(tempmb16x16 < 12)
begin
tempv3 = 0;
tempv2 = tempmb16x16[3:2];
end
else
begin
tempv3 = 1;
tempv2 = tempmb16x16[3:2]+1;
end
return I_16x16{intra16x16PredMode:tempv1, codedBlockPatternChroma:tempv2, codedBlockPatternLuma:tempv3};
end
endcase
endfunction
 
 
 
//-----------------------------------------------------------
// Entropy Decoder Module
//-----------------------------------------------------------
 
 
(* synthesize *)
module mkEntropyDec( IEntropyDec );
FIFO#(NalUnwrapOT) infifo <- mkSizedFIFO(entropyDec_infifo_size);
FIFO#(EntropyDecOT) outfifo <- mkFIFO;
FIFO#(EntropyDecOT_InverseTrans) outfifo_ITB <- mkFIFO;
Reg#(State) state <- mkReg(Start);
Reg#(Bit#(2)) nalrefidc <- mkReg(0);
Reg#(Bit#(5)) nalunittype <- mkReg(0);
Reg#(Buffer) buffer <- mkReg(0);
Reg#(Bufcount) bufcount <- mkReg(0);
 
//saved syntax elements
Reg#(Bit#(5)) spsseq_parameter_set_id <- mkReg(0);
Reg#(Bit#(5)) spslog2_max_frame_num <- mkReg(0);
Reg#(Bit#(5)) spslog2_max_pic_order_cnt_lsb <- mkReg(0);
Reg#(Bit#(2)) spspic_order_cnt_type <- mkReg(0);
Reg#(Bit#(1)) spsdelta_pic_order_always_zero_flag <- mkReg(0);
Reg#(Bit#(8)) spsnum_ref_frames_in_pic_order_cnt_cycle <- mkReg(0);
Reg#(Bit#(8)) ppspic_parameter_set_id <- mkReg(0);
Reg#(Bit#(1)) ppspic_order_present_flag <- mkReg(0);
Reg#(Bit#(1)) ppsdeblocking_filter_control_present_flag <- mkReg(0);
Reg#(Bit#(4)) shslice_type <- mkReg(0);
Reg#(Bit#(3)) shdmemory_management_control_operation <- mkReg(0);
Reg#(MbType) sdmmbtype <- mkReg(I_NxN);
Reg#(Bit#(4)) sdmcodedBlockPatternLuma <- mkReg(0);
Reg#(Bit#(2)) sdmcodedBlockPatternChroma <- mkReg(0);
Reg#(Bit#(5)) sdmrTotalCoeff <- mkReg(0);
Reg#(Bit#(2)) sdmrTrailingOnes <- mkReg(0);
//derived decoding variables for slice data
Reg#(Bit#(16)) tempreg <- mkReg(0);
Reg#(Bit#(5)) num_ref_idx_l0_active_minus1 <- mkReg(0);
Reg#(Bit#(PicAreaSz)) currMbAddr <- mkReg(0);
Reg#(Bit#(3)) temp3bit0 <- mkReg(0);
Reg#(Bit#(3)) temp3bit1 <- mkReg(0);
Reg#(Bit#(3)) temp3bit2 <- mkReg(0);
Reg#(Bit#(3)) temp3bit3 <- mkReg(0);
Reg#(Bit#(5)) temp5bit <- mkReg(0);
Reg#(Bit#(5)) temp5bit2 <- mkReg(0);
Reg#(Bit#(5)) maxNumCoeff <- mkReg(0);
FIFO#(Bit#(13)) cavlcFIFO <- mkSizedFIFO(16);
Calc_nC calcnc <- mkCalc_nC();
Reg#(Bit#(1)) residualChroma <- mkReg(0);
Reg#(Bit#(5)) totalCoeff <- mkReg(0);
Reg#(Bit#(4)) zerosLeft <- mkReg(0);
 
//exp-golomb 32-bit version states
Reg#(Bufcount) egnumbits <- mkReg(0);
 
//extra-buffering states
Reg#(Bit#(32)) extrabuffer <- mkReg(0);
Reg#(Bit#(3)) extrabufcount <- mkReg(0);
Reg#(Bit#(1)) extraendnalflag <- mkReg(0);
Reg#(Bit#(1)) endnalflag <- mkReg(0);
 
//-----------------------------------------------------------
// Rules
 
rule startup (state matches Start);
case (infifo.first()) matches
tagged NewUnit :
begin
infifo.deq();
state <= NewUnit;
buffer <= 0;
bufcount <= 0;
extrabuffer <= 0;
extrabufcount <= 0;
extraendnalflag <= 0;
endnalflag <= 0;
end
tagged RbspByte .rdata :
begin
infifo.deq();
end
tagged EndOfFile :
begin
infifo.deq();
outfifo.enq(EndOfFile);
$display( "INFO EntropyDec: EndOfFile reached" );
end
endcase
endrule
 
rule newunit (state matches NewUnit);
case (infifo.first()) matches
tagged NewUnit : state <= Start;
tagged RbspByte .rdata :
begin
infifo.deq();
nalrefidc <= rdata[6:5];
nalunittype <= rdata[4:0];
case (rdata[4:0])
1 : state <= CodedSlice 0;
5 : state <= CodedSlice 0;
6 : state <= SEI;
7 : state <= SPS 0;
8 : state <= PPS 0;
9 : state <= AUD;
10: state <= EndSequence;
11: state <= EndStream;
12: state <= Filler;
default:
begin
$display( "ERROR EntropyDec: NAL Unit Type = %d", rdata[4:0] );
state <= Start;
end
endcase
$display("ccl2newunit");
$display("ccl2rbspbyte %h", rdata);
outfifo.enq(NewUnit rdata);
outfifo_ITB.enq(NewUnit rdata);
end
tagged EndOfFile : state <= Start;
endcase
endrule
 
 
rule fillextrabuffer (state != Start
&& state != NewUnit
&& extrabufcount < 4
&& extraendnalflag == 0);
if(infifo.first() matches tagged RbspByte .dbyte)
begin
case ( extrabufcount )
0: extrabuffer <= {dbyte, extrabuffer[23:0]};
1: extrabuffer <= {extrabuffer[31:24],dbyte,extrabuffer[15:0]};
2: extrabuffer <= {extrabuffer[31:16],dbyte,extrabuffer[7:0]};
3: extrabuffer <= {extrabuffer[31:8],dbyte};
default: $display( "ERROR EntropyDec: fillextrabuffer default case_" );
endcase
extrabufcount <= extrabufcount + 1;
infifo.deq();
//$display( "TRACE EntropyDec: fillextrabuffer RbspByte %h %h %h", dbyte, extrabufcount, extrabuffer);
end
else
begin
if(extrabufcount != 0)
extraendnalflag <= 1;
//$display( "TRACE EntropyDec: fillextrabuffer else %h", extrabufcount);
end
endrule
 
rule fillbuffer (state != Start
&& state != NewUnit
&& bufcount<=truncate(buffersize-32)
&& (extrabufcount == 4 || extraendnalflag == 1)
&& endnalflag == 0);//predicate not sure
Buffer temp = zeroExtend(extrabuffer);
Bufcount temp2 = truncate(buffersize)-bufcount-32;
buffer <= (buffer | (temp << zeroExtend(temp2)));
case ( extrabufcount )
4: bufcount <= bufcount+32;
3: bufcount <= bufcount+24;
2: bufcount <= bufcount+16;
1: bufcount <= bufcount+8;
default: $display( "ERROR EntropyDec: fillbuffer default case" );
endcase
extrabuffer <= 0;
extrabufcount <= 0;
if(infifo.first()==NewUnit || infifo.first()==EndOfFile)
endnalflag <= 1;
//$display( "TRACE EntropyDec: fillbuffer RbspByte %h %h %h %h %h %h %h %h", extrabufcount, bufcount, extrabuffer, temp, temp2, (temp << zeroExtend(temp2)), buffer, (buffer | (temp << zeroExtend(temp2))));
endrule
 
 
rule parser (state != Start
&&& state != NewUnit
&&& (bufcount > truncate(buffersize-32) || endnalflag == 1));//predicate not sure
//$display( "TRACE EntropyDec: fillbuffer RbspByte %h %h", bufcount, buffer );
Bufcount numbitsused = 0;
State nextstate = Start;
Int#(16) tempint = 0;
Int#(32) tempint32 = 0;
case ( state ) matches
tagged CodedSlice .step :
begin
case ( step )
0:
begin
$display( "ccl2SHfirst_mb_in_slice %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SHfirst_mb_in_slice truncate(expgolomb_unsigned(buffer)));
currMbAddr <= truncate(expgolomb_unsigned(buffer));
calcnc.initialize(truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = CodedSlice 1;
end
1:
begin
$display( "ccl2SHslice_type %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SHslice_type truncate(expgolomb_unsigned(buffer)));
shslice_type <= truncate(expgolomb_unsigned(buffer));
numbitsused = expgolomb_numbits(buffer);
nextstate = CodedSlice 2;
end
2:
begin
$display( "ccl2SHpic_parameter_set_id %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SHpic_parameter_set_id truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = CodedSlice 3;
if(ppspic_parameter_set_id != truncate(expgolomb_unsigned(buffer))) $display( "ERROR EntropyDec: pic_parameter_set_id don't match" );
end
3:
begin
Bit#(16) tttt = buffer[buffersize-1:buffersize-16];
tttt = tttt >> 16 - zeroExtend(spslog2_max_frame_num);
$display( "ccl2SHframe_num %0d", tttt );
outfifo.enq(SHframe_num tttt);
numbitsused = zeroExtend(spslog2_max_frame_num);
nextstate = CodedSlice 4;
end
4:
begin
if(nalunittype == 5)
begin
$display( "ccl2SHidr_pic_id %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SHidr_pic_id truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
end
nextstate = CodedSlice 5;
end
5:
begin
if(spspic_order_cnt_type == 0)
begin
Bit#(16) tttt = buffer[buffersize-1:buffersize-16];
tttt = tttt >> 16 - zeroExtend(spslog2_max_pic_order_cnt_lsb);
$display( "ccl2SHpic_order_cnt_lsb %0d", tttt );
outfifo.enq(SHpic_order_cnt_lsb tttt);
numbitsused = zeroExtend(spslog2_max_pic_order_cnt_lsb);
nextstate = CodedSlice 6;
end
else
nextstate = CodedSlice 7;
end
6:
begin
if(ppspic_order_present_flag == 1)
begin
if(egnumbits == 0)
begin
Bufcount tempbufcount = expgolomb_numbits32(buffer);
egnumbits <= tempbufcount;
numbitsused = tempbufcount-1;
nextstate = CodedSlice 6;
end
else
begin
tempint32 = unpack(expgolomb_signed32(buffer,egnumbits));
$display( "ccl2SHdelta_pic_order_cnt_bottom %0d", tempint32 );
outfifo.enq(SHdelta_pic_order_cnt_bottom truncate(expgolomb_signed32(buffer,egnumbits)));
egnumbits <= 0;
numbitsused = egnumbits;
nextstate = CodedSlice 7;
end
end
else
nextstate = CodedSlice 7;
end
7:
begin
if(spspic_order_cnt_type == 1 && spsdelta_pic_order_always_zero_flag == 0)
begin
if(egnumbits == 0)
begin
Bufcount tempbufcount = expgolomb_numbits32(buffer);
egnumbits <= tempbufcount;
numbitsused = tempbufcount-1;
nextstate = CodedSlice 7;
end
else
begin
tempint32 = unpack(expgolomb_signed32(buffer,egnumbits));
$display( "ccl2SHdelta_pic_order_cnt0 %0d", tempint32 );
outfifo.enq(SHdelta_pic_order_cnt0 truncate(expgolomb_signed32(buffer,egnumbits)));
egnumbits <= 0;
numbitsused = egnumbits;
nextstate = CodedSlice 8;
end
end
else
nextstate = CodedSlice 9;
end
8:
begin
if(ppspic_order_present_flag == 1)
begin
if(egnumbits == 0)
begin
Bufcount tempbufcount = expgolomb_numbits32(buffer);
egnumbits <= tempbufcount;
numbitsused = tempbufcount-1;
nextstate = CodedSlice 8;
end
else
begin
tempint32 = unpack(expgolomb_signed32(buffer,egnumbits));
$display( "ccl2SHdelta_pic_order_cnt1 %0d", tempint32 );
outfifo.enq(SHdelta_pic_order_cnt1 truncate(expgolomb_signed32(buffer,egnumbits)));
egnumbits <= 0;
numbitsused = egnumbits;
nextstate = CodedSlice 9;
end
end
else
nextstate = CodedSlice 9;
end
9:
begin
if(shslice_type == 0 || shslice_type == 5)
begin
$display( "ccl2SHnum_ref_idx_active_override_flag %0d", buffer[buffersize-1] );
outfifo.enq(SHnum_ref_idx_active_override_flag buffer[buffersize-1]);
numbitsused = 1;
if(buffer[buffersize-1] == 1)
nextstate = CodedSlice 10;
else
nextstate = CodedSlice 11;
end
else
nextstate = CodedSlice 11;
end
10:
begin
$display( "ccl2SHnum_ref_idx_l0_active %0d", expgolomb_unsigned(buffer)+1 );
outfifo.enq(SHnum_ref_idx_l0_active truncate(expgolomb_unsigned(buffer)+1));
num_ref_idx_l0_active_minus1 <= truncate(expgolomb_unsigned(buffer));
numbitsused = expgolomb_numbits(buffer);
nextstate = CodedSlice 11;
end
11:
begin
if(shslice_type != 2 && shslice_type != 7)
begin
$display( "ccl2SHRref_pic_list_reordering_flag_l0 %0d", buffer[buffersize-1] );
outfifo.enq(SHRref_pic_list_reordering_flag_l0 buffer[buffersize-1]);
numbitsused = 1;
if(buffer[buffersize-1] == 1)
nextstate = CodedSlice 12;
else
nextstate = CodedSlice 15;
end
else
nextstate = CodedSlice 15;
end
12:
begin
$display( "ccl2SHRreordering_of_pic_nums_idc %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SHRreordering_of_pic_nums_idc truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
if(expgolomb_unsigned(buffer)==0 || expgolomb_unsigned(buffer)==1)
nextstate = CodedSlice 13;
else if(expgolomb_unsigned(buffer)==2)
nextstate = CodedSlice 14;
else
nextstate = CodedSlice 15;
end
13:
begin
Bit#(17) temp17 = zeroExtend(expgolomb_unsigned(buffer)) + 1;
$display( "ccl2SHRabs_diff_pic_num %0d", temp17 );
outfifo.enq(SHRabs_diff_pic_num temp17);
numbitsused = expgolomb_numbits(buffer);
nextstate = CodedSlice 12;
end
14:
begin
$display( "ccl2SHRlong_term_pic_num %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SHRlong_term_pic_num truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = CodedSlice 12;
end
15:
begin
if(nalrefidc == 0)
nextstate = CodedSlice 23;
else
begin
if(nalunittype == 5)
begin
$display( "ccl2SHDno_output_of_prior_pics_flag %0d", buffer[buffersize-1] );
outfifo.enq(SHDno_output_of_prior_pics_flag buffer[buffersize-1]);
numbitsused = 1;
nextstate = CodedSlice 16;
end
else
nextstate = CodedSlice 17;
end
end
16:
begin
$display( "ccl2SHDlong_term_reference_flag %0d", buffer[buffersize-1] );
outfifo.enq(SHDlong_term_reference_flag buffer[buffersize-1]);
numbitsused = 1;
nextstate = CodedSlice 23;
end
17:
begin
$display( "ccl2SHDadaptive_ref_pic_marking_mode_flag %0d", buffer[buffersize-1] );
outfifo.enq(SHDadaptive_ref_pic_marking_mode_flag buffer[buffersize-1]);
numbitsused = 1;
if(buffer[buffersize-1] == 1)
nextstate = CodedSlice 18;
else
nextstate = CodedSlice 23;
end
18:
begin
$display( "ccl2SHDmemory_management_control_operation %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SHDmemory_management_control_operation truncate(expgolomb_unsigned(buffer)));
shdmemory_management_control_operation <= truncate(expgolomb_unsigned(buffer));
numbitsused = expgolomb_numbits(buffer);
if(expgolomb_unsigned(buffer)!=0)
nextstate = CodedSlice 19;
else
nextstate = CodedSlice 23;
end
19:
begin
if(shdmemory_management_control_operation==1 || shdmemory_management_control_operation==3)
begin
Bit#(17) temp17 = zeroExtend(expgolomb_unsigned(buffer)) + 1;
$display( "ccl2SHDdifference_of_pic_nums %0d", temp17 );
outfifo.enq(SHDdifference_of_pic_nums temp17);
numbitsused = expgolomb_numbits(buffer);
nextstate = CodedSlice 20;
end
else
nextstate = CodedSlice 20;
end
20:
begin
if(shdmemory_management_control_operation==2)
begin
$display( "ccl2SHDlong_term_pic_num %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SHDlong_term_pic_num truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = CodedSlice 21;
end
else
nextstate = CodedSlice 21;
end
21:
begin
if(shdmemory_management_control_operation==3 || shdmemory_management_control_operation==6)
begin
$display( "ccl2SHDlong_term_frame_idx %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SHDlong_term_frame_idx truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = CodedSlice 22;
end
else
nextstate = CodedSlice 22;
end
22:
begin
if(shdmemory_management_control_operation==4)
begin
$display( "ccl2SHDmax_long_term_frame_idx_plus1 %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SHDmax_long_term_frame_idx_plus1 truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = CodedSlice 18;
end
else
nextstate = CodedSlice 18;
end
23:
begin
tempint = unpack(expgolomb_signed(buffer));
$display( "ccl2SHslice_qp_delta %0d", tempint );
outfifo_ITB.enq(SHslice_qp_delta truncate(expgolomb_signed(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = CodedSlice 24;
end
24:
begin
if(ppsdeblocking_filter_control_present_flag==1)
begin
$display( "ccl2SHdisable_deblocking_filter_idc %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SHdisable_deblocking_filter_idc truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
if(expgolomb_unsigned(buffer)!=1)
nextstate = CodedSlice 25;
else
nextstate = CodedSlice 27;
end
else
nextstate = CodedSlice 27;
end
25:
begin
tempint = unpack(expgolomb_signed(buffer) << 1);
$display( "ccl2SHslice_alpha_c0_offset %0d", tempint );
outfifo.enq(SHslice_alpha_c0_offset truncate(expgolomb_signed(buffer) << 1));
numbitsused = expgolomb_numbits(buffer);
nextstate = CodedSlice 26;
end
26:
begin
tempint = unpack(expgolomb_signed(buffer) << 1);
$display( "ccl2SHslice_beta_offset %0d", tempint );
outfifo.enq(SHslice_beta_offset truncate(expgolomb_signed(buffer) << 1));
numbitsused = expgolomb_numbits(buffer);
nextstate = CodedSlice 27;
end
27:
begin
nextstate = SliceData 0;
end
default: $display( "ERROR EntropyDec: CodedSlice default step" );
endcase
end
tagged SEI .step :
begin
nextstate = Start;
$display( "INFO EntropyDec: SEI data thrown away" );
end
tagged SPS .step :
begin
case ( step )
0:
begin
Bit#(8) outputdata = buffer[buffersize-1:buffersize-8];
$display( "INFO EntropyDec: profile_idc = %d", outputdata );
outputdata = buffer[buffersize-9:buffersize-16];
$display( "INFO EntropyDec: constraint_set = %b", outputdata );
outputdata = buffer[buffersize-17:buffersize-24];
$display( "INFO EntropyDec: level_idc = %d", outputdata );
numbitsused = 24;
nextstate = SPS 1;
end
1:
begin
$display( "ccl2SPSseq_parameter_set_id %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SPSseq_parameter_set_id truncate(expgolomb_unsigned(buffer)));
spsseq_parameter_set_id <= truncate(expgolomb_unsigned(buffer));
numbitsused = expgolomb_numbits(buffer);
nextstate = SPS 2;
end
2:
begin
$display( "ccl2SPSlog2_max_frame_num %0d", expgolomb_unsigned(buffer)+4 );
outfifo.enq(SPSlog2_max_frame_num truncate(expgolomb_unsigned(buffer)+4));
spslog2_max_frame_num <= truncate(expgolomb_unsigned(buffer)+4);
numbitsused = expgolomb_numbits(buffer);
nextstate = SPS 3;
end
3:
begin
let tttt = expgolomb_unsigned(buffer);
$display( "ccl2SPSpic_order_cnt_type %0d", tttt );
outfifo.enq(SPSpic_order_cnt_type truncate(tttt));
spspic_order_cnt_type <= truncate(tttt);
numbitsused = expgolomb_numbits(buffer);
if(tttt == 0)
nextstate = SPS 4;
else if(tttt == 1)
nextstate = SPS 5;
else
nextstate = SPS 10;
end
4:
begin
$display( "ccl2SPSlog2_max_pic_order_cnt_lsb %0d", expgolomb_unsigned(buffer)+4 );
outfifo.enq(SPSlog2_max_pic_order_cnt_lsb truncate(expgolomb_unsigned(buffer)+4));
spslog2_max_pic_order_cnt_lsb <= truncate(expgolomb_unsigned(buffer)+4);
numbitsused = expgolomb_numbits(buffer);
nextstate = SPS 10;
end
5:
begin
$display( "ccl2SPSdelta_pic_order_always_zero_flag %0d", buffer[buffersize-1] );
outfifo.enq(SPSdelta_pic_order_always_zero_flag buffer[buffersize-1]);
spsdelta_pic_order_always_zero_flag <= buffer[buffersize-1];
numbitsused = 1;
nextstate = SPS 6;
end
6:
begin
if(egnumbits == 0)
begin
Bufcount tempbufcount = expgolomb_numbits32(buffer);
egnumbits <= tempbufcount;
numbitsused = tempbufcount-1;
nextstate = SPS 6;
end
else
begin
tempint32 = unpack(expgolomb_signed32(buffer,egnumbits));
$display( "ccl2SPSoffset_for_non_ref_pic %0d", tempint32 );
outfifo.enq(SPSoffset_for_non_ref_pic truncate(expgolomb_signed32(buffer,egnumbits)));
egnumbits <= 0;
numbitsused = egnumbits;
nextstate = SPS 7;
end
end
7:
begin
if(egnumbits == 0)
begin
Bufcount tempbufcount = expgolomb_numbits32(buffer);
egnumbits <= tempbufcount;
numbitsused = tempbufcount-1;
nextstate = SPS 7;
end
else
begin
tempint32 = unpack(expgolomb_signed32(buffer,egnumbits));
$display( "ccl2SPSoffset_for_top_to_bottom_field %0d", tempint32 );
outfifo.enq(SPSoffset_for_top_to_bottom_field truncate(expgolomb_signed32(buffer,egnumbits)));
egnumbits <= 0;
numbitsused = egnumbits;
nextstate = SPS 8;
end
end
8:
begin
$display( "ccl2SPSnum_ref_frames_in_pic_order_cnt_cycle %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SPSnum_ref_frames_in_pic_order_cnt_cycle truncate(expgolomb_unsigned(buffer)));
spsnum_ref_frames_in_pic_order_cnt_cycle <= truncate(expgolomb_unsigned(buffer));
numbitsused = expgolomb_numbits(buffer);
nextstate = SPS 9;
end
9:
begin
if(spsnum_ref_frames_in_pic_order_cnt_cycle == 0)
nextstate = SPS 10;
else
begin
if(egnumbits == 0)
begin
Bufcount tempbufcount = expgolomb_numbits32(buffer);
egnumbits <= tempbufcount;
numbitsused = tempbufcount-1;
nextstate = SPS 9;
end
else
begin
tempint32 = unpack(expgolomb_signed32(buffer,egnumbits));
$display( "ccl2SPSoffset_for_ref_frame %0d", tempint32 );
outfifo.enq(SPSoffset_for_ref_frame truncate(expgolomb_signed32(buffer,egnumbits)));
egnumbits <= 0;
spsnum_ref_frames_in_pic_order_cnt_cycle <= spsnum_ref_frames_in_pic_order_cnt_cycle - 1;
numbitsused = egnumbits;
nextstate = SPS 9;
end
end
end
10:
begin
$display( "ccl2SPSnum_ref_frames %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SPSnum_ref_frames truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = SPS 11;
end
11:
begin
$display( "ccl2SPSgaps_in_frame_num_allowed_flag %0d", buffer[buffersize-1] );
outfifo.enq(SPSgaps_in_frame_num_allowed_flag buffer[buffersize-1]);
numbitsused = 1;
nextstate = SPS 12;
end
12:
begin
$display( "ccl2SPSpic_width_in_mbs %0d", expgolomb_unsigned(buffer)+1 );
outfifo.enq(SPSpic_width_in_mbs truncate(expgolomb_unsigned(buffer)+1));
calcnc.initialize_picWidth(truncate(expgolomb_unsigned(buffer)+1));
numbitsused = expgolomb_numbits(buffer);
nextstate = SPS 13;
end
13:
begin
$display( "ccl2SPSpic_height_in_map_units %0d", expgolomb_unsigned(buffer)+1 );
outfifo.enq(SPSpic_height_in_map_units truncate(expgolomb_unsigned(buffer)+1));
numbitsused = expgolomb_numbits(buffer);
nextstate = SPS 14;
end
14:
begin
//SPSframe_mbs_only_flag = 1 for baseline
numbitsused = 1;
nextstate = SPS 15;
end
15:
begin
$display( "ccl2SPSdirect_8x8_inference_flag %0d", buffer[buffersize-1] );
outfifo.enq(SPSdirect_8x8_inference_flag buffer[buffersize-1]);
numbitsused = 1;
nextstate = SPS 16;
end
16:
begin
$display( "ccl2SPSframe_cropping_flag %0d", buffer[buffersize-1] );
outfifo.enq(SPSframe_cropping_flag buffer[buffersize-1]);
numbitsused = 1;
if(buffer[buffersize-1] == 1)
nextstate = SPS 17;
else
nextstate = SPS 21;
end
17:
begin
$display( "ccl2SPSframe_crop_left_offset %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SPSframe_crop_left_offset truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = SPS 18;
end
18:
begin
$display( "ccl2SPSframe_crop_right_offset %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SPSframe_crop_right_offset truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = SPS 19;
end
19:
begin
$display( "ccl2SPSframe_crop_top_offset %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SPSframe_crop_top_offset truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = SPS 20;
end
20:
begin
$display( "ccl2SPSframe_crop_bottom_offset %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SPSframe_crop_bottom_offset truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = SPS 21;
end
21:
begin
nextstate = Start;
$display( "INFO EntropyDec:VUI data thrown away" );
end
default: $display( "ERROR EntropyDec: SPS default step" );
endcase
end
tagged PPS .step :
begin
case ( step )
0:
begin
ppspic_parameter_set_id <= truncate(expgolomb_unsigned(buffer));
$display( "ccl2PPSpic_parameter_set_id %0d", expgolomb_unsigned(buffer) );
outfifo.enq(PPSpic_parameter_set_id truncate(expgolomb_unsigned(buffer)));
outfifo_ITB.enq(PPSpic_parameter_set_id truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = PPS 1;
end
1:
begin
$display( "ccl2PPSseq_parameter_set_id %0d", expgolomb_unsigned(buffer) );
outfifo.enq(PPSseq_parameter_set_id truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = PPS 2;
if(spsseq_parameter_set_id != truncate(expgolomb_unsigned(buffer)))
$display( "ERROR EntropyDec: seq_parameter_set_id don't match" );
end
2:
begin
//PPSentropy_coding_mode_flag = 0 for baseline
numbitsused = 1;
nextstate = PPS 3;
end
3:
begin
ppspic_order_present_flag <= buffer[buffersize-1];
$display( "ccl2PPSpic_order_present_flag %0d", buffer[buffersize-1] );
outfifo.enq(PPSpic_order_present_flag buffer[buffersize-1]);
numbitsused = 1;
nextstate = PPS 4;
end
4:
begin
numbitsused = expgolomb_numbits(buffer);
nextstate = PPS 5;
if(expgolomb_unsigned(buffer)+1 != 1)
$display( "ERROR EntropyDec: PPSnum_slice_groups not equal to 1" );//=1 for main
end
5:
begin
$display( "ccl2PPSnum_ref_idx_l0_active %0d", expgolomb_unsigned(buffer)+1 );
outfifo.enq(PPSnum_ref_idx_l0_active truncate(expgolomb_unsigned(buffer)+1));
num_ref_idx_l0_active_minus1 <= truncate(expgolomb_unsigned(buffer));
numbitsused = expgolomb_numbits(buffer);
nextstate = PPS 6;
end
6:
begin
$display( "ccl2PPSnum_ref_idx_l1_active %0d", expgolomb_unsigned(buffer)+1 );
outfifo.enq(PPSnum_ref_idx_l1_active truncate(expgolomb_unsigned(buffer)+1));
numbitsused = expgolomb_numbits(buffer);
nextstate = PPS 7;
end
7:
begin
//PPSweighted_pred_flag = 0 for baseline; PPSweighted_bipred_idc = 0 for baseline
numbitsused = 3;
nextstate = PPS 8;
end
8:
begin
$display( "ccl2PPSpic_init_qp %0d", expgolomb_signed(buffer)+26 );
outfifo_ITB.enq(PPSpic_init_qp truncate(expgolomb_signed(buffer)+26));
numbitsused = expgolomb_numbits(buffer);
nextstate = PPS 9;
end
9:
begin
$display( "ccl2PPSpic_init_qs %0d", expgolomb_signed(buffer)+26 );
outfifo_ITB.enq(PPSpic_init_qs truncate(expgolomb_signed(buffer)+26));
numbitsused = expgolomb_numbits(buffer);
nextstate = PPS 10;
end
10:
begin
tempint = unpack(expgolomb_signed(buffer));
$display( "ccl2PPSchroma_qp_index_offset %0d", tempint );
outfifo_ITB.enq(PPSchroma_qp_index_offset truncate(expgolomb_signed(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = PPS 11;
end
11:
begin
ppsdeblocking_filter_control_present_flag <= buffer[buffersize-1];
$display( "ccl2PPSdeblocking_filter_control_present_flag %0d", buffer[buffersize-1] );
outfifo.enq(PPSdeblocking_filter_control_present_flag buffer[buffersize-1]);
numbitsused = 1;
nextstate = PPS 12;
end
12:
begin
$display( "ccl2PPSconstrained_intra_pred_flag %0d", buffer[buffersize-1] );
outfifo.enq(PPSconstrained_intra_pred_flag buffer[buffersize-1]);
numbitsused = 1;
nextstate = PPS 13;
end
13:
begin
//PPSredundant_pic_cnt_present_flag = 0 for main
numbitsused = 1;
nextstate = PPS 14;
if(buffer[buffersize-1] != 0)
$display( "ERROR EntropyDec: PPSredundant_pic_cnt_present_flag not equal to 0" );//=0 for main
end
14:
begin
nextstate = Start;
end
default: $display( "ERROR EntropyDec: PPS default step" );
endcase
end
tagged AUD .step :
begin
outfifo.enq(AUDPrimaryPicType buffer[buffersize-1:buffersize-3]);
numbitsused = 3;
nextstate = Start;
end
tagged EndSequence :
begin
outfifo.enq(EndOfSequence);
nextstate = Start;
end
tagged EndStream :
begin
outfifo.enq(EndOfStream);
nextstate = Start;
end
tagged Filler :
begin
nextstate = Start;
end
tagged SliceData .step :
begin
case ( step )
0:
begin
if( shslice_type!=2 && shslice_type!=7 )
begin
$display( "ccl2SDmb_skip_run %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SDmb_skip_run truncate(expgolomb_unsigned(buffer)));
tempreg <= truncate(expgolomb_unsigned(buffer));
calcnc.nNupdate_pskip( truncate(expgolomb_unsigned(buffer)) );
numbitsused = expgolomb_numbits(buffer);
nextstate = SliceData 1;
end
else
nextstate = SliceData 2;
end
1:
begin
if( tempreg>0 )
begin
currMbAddr <= currMbAddr+1;//only because input assumed to comform to both baseline and main
tempreg <= tempreg-1;
nextstate = SliceData 1;
end
else
begin
////$display( "ccl2SDcurrMbAddr %0d", currMbAddr );
////outfifo.enq(SDcurrMbAddr currMbAddr);
nextstate = SliceData 2;
end
end
2:
begin
if( bufcount>8 || buffer[buffersize-1]!=1 || (buffer<<1)!=0 )
begin
calcnc.loadMb(currMbAddr);
nextstate = MacroblockLayer 0;
end
else
nextstate = SliceData 3;
end
3:
begin
currMbAddr <= currMbAddr+1;//only because input assumed to comform to both baseline and main
if( bufcount>8 || buffer[buffersize-1]!=1 || (buffer<<1)!=0 )
nextstate = SliceData 0;
else
nextstate = Start;
end
default: $display( "ERROR EntropyDec: SliceData default step" );
endcase
end
tagged MacroblockLayer .step : //return to SliceData 3
begin
case ( step )
0:
begin
$display( "ccl2SDMmb_type %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SDMmbtype mbtype_convert(truncate(expgolomb_unsigned(buffer)), shslice_type) );
outfifo_ITB.enq(SDMmbtype mbtype_convert(truncate(expgolomb_unsigned(buffer)), shslice_type) );
sdmmbtype <= mbtype_convert(truncate(expgolomb_unsigned(buffer)), shslice_type);
numbitsused = expgolomb_numbits(buffer);
if(mbtype_convert(truncate(expgolomb_unsigned(buffer)), shslice_type) == I_PCM)
begin
calcnc.nNupdate_ipcm();
nextstate = MacroblockLayer 1;
end
else
nextstate = MacroblockLayer 4;
end
1:
begin
tempreg <= 256;
numbitsused = zeroExtend(bufcount[2:0]);
nextstate = MacroblockLayer 2;
end
2:
begin
if( tempreg>0 )
begin
Bit#(8) outputdata = buffer[buffersize-1:buffersize-8];
$display( "ccl2SDMpcm_sample_luma %0d", outputdata );
outfifo.enq(SDMpcm_sample_luma outputdata);
tempreg <= tempreg-1;
numbitsused = 8;
nextstate = MacroblockLayer 2;
end
else
begin
tempreg <= 128;
nextstate = MacroblockLayer 3;
end
end
3:
begin
if( tempreg>0 )
begin
Bit#(8) outputdata = buffer[buffersize-1:buffersize-8];
$display( "ccl2SDMpcm_sample_chroma %0d", outputdata );
outfifo.enq(SDMpcm_sample_chroma outputdata);
tempreg <= tempreg-1;
numbitsused = 8;
nextstate = MacroblockLayer 3;
end
else
nextstate = SliceData 3;
end
4:
begin
if(sdmmbtype != I_NxN
&&& mbPartPredMode(sdmmbtype,0) != Intra_16x16
&&& numMbPart(sdmmbtype) == 4)
nextstate = SubMbPrediction 0;
else
nextstate = MbPrediction 0;
end
5:
begin
if(mbPartPredMode(sdmmbtype,0) != Intra_16x16)
begin
$display( "ccl2SDMcoded_block_pattern %0d", expgolomb_coded_block_pattern(buffer,sdmmbtype) );
////outfifo.enq(SDMcoded_block_pattern expgolomb_coded_block_pattern(buffer,sdmmbtype));
sdmcodedBlockPatternLuma <= expgolomb_coded_block_pattern(buffer,sdmmbtype)[3:0];
sdmcodedBlockPatternChroma <= expgolomb_coded_block_pattern(buffer,sdmmbtype)[5:4];
numbitsused = expgolomb_numbits(buffer);
end
else
begin
if(sdmmbtype matches tagged I_16x16 {intra16x16PredMode:.tempv1, codedBlockPatternChroma:.tempv2, codedBlockPatternLuma:.tempv3})
begin
sdmcodedBlockPatternLuma <= {tempv3,tempv3,tempv3,tempv3};
sdmcodedBlockPatternChroma <= tempv2;
end
else
$display( "ERROR EntropyDec: MacroblockLayer 5 sdmmbtype not I_16x16" );
end
nextstate = MacroblockLayer 6;
end
6:
begin
if(sdmcodedBlockPatternLuma > 0
|| sdmcodedBlockPatternChroma > 0
|| mbPartPredMode(sdmmbtype,0) == Intra_16x16)
begin
tempint = unpack(expgolomb_signed(buffer));
$display( "ccl2SDMmb_qp_delta %0d", tempint );
outfifo_ITB.enq(SDMmb_qp_delta truncate(expgolomb_signed(buffer)));
numbitsused = expgolomb_numbits(buffer);
end
residualChroma <= 0;
temp5bit <= 0;
maxNumCoeff <= 16;
nextstate = ResidualBlock 0;
end
default: $display( "ERROR EntropyDec: MacroblockLayer default step" );
endcase
end
tagged MbPrediction .step : //return to MacroblockLayer 5
begin
case ( step )
0:
begin
if(mbPartPredMode(sdmmbtype,0) == Intra_16x16)
begin
$display( "ccl2SDMMintra_chroma_pred_mode %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SDMMintra_chroma_pred_mode truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = MacroblockLayer 5;
end
else if(mbPartPredMode(sdmmbtype,0) == Intra_4x4)
begin
temp5bit <= 16;
nextstate = MbPrediction 1;
end
else if(num_ref_idx_l0_active_minus1 > 0)
begin
temp3bit0 <= numMbPart(sdmmbtype);
nextstate = MbPrediction 2;
end
else
begin
temp3bit0 <= numMbPart(sdmmbtype);
nextstate = MbPrediction 3;
end
end
1:
begin
if(temp5bit == 0)
begin
$display( "ccl2SDMMintra_chroma_pred_mode %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SDMMintra_chroma_pred_mode truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = MacroblockLayer 5;
end
else
begin
////$display( "ccl2SDMMprev_intra4x4_pred_mode_flag %0d", buffer[buffersize-1] );
if(buffer[buffersize-1] == 0)
begin
Bit#(4) tttt = buffer[buffersize-1:buffersize-4];
$display( "ccl2SDMMrem_intra4x4_pred_mode %0d", tttt );
outfifo.enq(SDMMrem_intra4x4_pred_mode tttt);
numbitsused = 4;
end
else
begin
outfifo.enq(SDMMrem_intra4x4_pred_mode 4'b1000);
numbitsused = 1;
end
temp5bit <= temp5bit-1;
nextstate = MbPrediction 1;
end
end
2:
begin
if(num_ref_idx_l0_active_minus1 == 1)
begin
$display( "ccl2SDMMref_idx_l0 %0d", 1-buffer[buffersize-1] );
outfifo.enq(SDMMref_idx_l0 zeroExtend(1-buffer[buffersize-1]));
numbitsused = 1;
end
else
begin
$display( "ccl2SDMMref_idx_l0 %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SDMMref_idx_l0 truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
end
if(temp3bit0 == 1)
begin
temp3bit0 <= numMbPart(sdmmbtype);
nextstate = MbPrediction 3;
end
else
begin
temp3bit0 <= temp3bit0-1;
nextstate = MbPrediction 2;
end
end
3:
begin
tempint = unpack(expgolomb_signed(buffer));
$display( "ccl2SDMMmvd_l0 %0d", tempint );
outfifo.enq(SDMMmvd_l0 truncate(expgolomb_signed(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = MbPrediction 4;
end
4:
begin
tempint = unpack(expgolomb_signed(buffer));
$display( "ccl2SDMMmvd_l0 %0d", tempint );
outfifo.enq(SDMMmvd_l0 truncate(expgolomb_signed(buffer)));
numbitsused = expgolomb_numbits(buffer);
temp3bit0 <= temp3bit0-1;
if(temp3bit0 == 1)
nextstate = MacroblockLayer 5;
else
nextstate = MbPrediction 3;
end
default: $display( "ERROR EntropyDec: MbPrediction default step" );
endcase
end
tagged SubMbPrediction .step : //return to MacroblockLayer 5
begin
case ( step )
0:
begin
$display( "ccl2SDMSsub_mb_type %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SDMSsub_mb_type truncate(expgolomb_unsigned(buffer)));
temp3bit0 <= numSubMbPart(truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = SubMbPrediction 1;
end
1:
begin
$display( "ccl2SDMSsub_mb_type %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SDMSsub_mb_type truncate(expgolomb_unsigned(buffer)));
temp3bit1 <= numSubMbPart(truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = SubMbPrediction 2;
end
2:
begin
$display( "ccl2SDMSsub_mb_type %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SDMSsub_mb_type truncate(expgolomb_unsigned(buffer)));
temp3bit2 <= numSubMbPart(truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = SubMbPrediction 3;
end
3:
begin
$display( "ccl2SDMSsub_mb_type %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SDMSsub_mb_type truncate(expgolomb_unsigned(buffer)));
temp3bit3 <= numSubMbPart(truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
if(num_ref_idx_l0_active_minus1 > 0
&& sdmmbtype != P_8x8ref0)
nextstate = SubMbPrediction 4;
else
nextstate = SubMbPrediction 8;
end
4:
begin
if(num_ref_idx_l0_active_minus1 == 1)
begin
$display( "ccl2SDMSref_idx_l0 %0d", 1-buffer[buffersize-1] );
outfifo.enq(SDMSref_idx_l0 zeroExtend(1-buffer[buffersize-1]));
numbitsused = 1;
end
else
begin
$display( "ccl2SDMSref_idx_l0 %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SDMSref_idx_l0 truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
end
nextstate = SubMbPrediction 5;
end
5:
begin
if(num_ref_idx_l0_active_minus1 == 1)
begin
$display( "ccl2SDMSref_idx_l0 %0d", 1-buffer[buffersize-1] );
outfifo.enq(SDMSref_idx_l0 zeroExtend(1-buffer[buffersize-1]));
numbitsused = 1;
end
else
begin
$display( "ccl2SDMSref_idx_l0 %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SDMSref_idx_l0 truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
end
nextstate = SubMbPrediction 6;
end
6:
begin
if(num_ref_idx_l0_active_minus1 == 1)
begin
$display( "ccl2SDMSref_idx_l0 %0d", 1-buffer[buffersize-1] );
outfifo.enq(SDMSref_idx_l0 zeroExtend(1-buffer[buffersize-1]));
numbitsused = 1;
end
else
begin
$display( "ccl2SDMSref_idx_l0 %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SDMSref_idx_l0 truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
end
nextstate = SubMbPrediction 7;
end
7:
begin
if(num_ref_idx_l0_active_minus1 == 1)
begin
$display( "ccl2SDMSref_idx_l0 %0d", 1-buffer[buffersize-1] );
outfifo.enq(SDMSref_idx_l0 zeroExtend(1-buffer[buffersize-1]));
numbitsused = 1;
end
else
begin
$display( "ccl2SDMSref_idx_l0 %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SDMSref_idx_l0 truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
end
nextstate = SubMbPrediction 8;
end
8:
begin
tempint = unpack(expgolomb_signed(buffer));
$display( "ccl2SDMSmvd_l0 %0d", tempint );
outfifo.enq(SDMSmvd_l0 truncate(expgolomb_signed(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = SubMbPrediction 9;
end
9:
begin
tempint = unpack(expgolomb_signed(buffer));
$display( "ccl2SDMSmvd_l0 %0d", tempint );
outfifo.enq(SDMSmvd_l0 truncate(expgolomb_signed(buffer)));
numbitsused = expgolomb_numbits(buffer);
temp3bit0 <= temp3bit0-1;
if(temp3bit0 == 1)
nextstate = SubMbPrediction 10;
else
nextstate = SubMbPrediction 8;
end
10:
begin
tempint = unpack(expgolomb_signed(buffer));
$display( "ccl2SDMSmvd_l0 %0d", tempint );
outfifo.enq(SDMSmvd_l0 truncate(expgolomb_signed(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = SubMbPrediction 11;
end
11:
begin
tempint = unpack(expgolomb_signed(buffer));
$display( "ccl2SDMSmvd_l0 %0d", tempint );
outfifo.enq(SDMSmvd_l0 truncate(expgolomb_signed(buffer)));
numbitsused = expgolomb_numbits(buffer);
temp3bit1 <= temp3bit1-1;
if(temp3bit1 == 1)
nextstate = SubMbPrediction 12;
else
nextstate = SubMbPrediction 10;
end
12:
begin
tempint = unpack(expgolomb_signed(buffer));
$display( "ccl2SDMSmvd_l0 %0d", tempint );
outfifo.enq(SDMSmvd_l0 truncate(expgolomb_signed(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = SubMbPrediction 13;
end
13:
begin
tempint = unpack(expgolomb_signed(buffer));
$display( "ccl2SDMSmvd_l0 %0d", tempint );
outfifo.enq(SDMSmvd_l0 truncate(expgolomb_signed(buffer)));
numbitsused = expgolomb_numbits(buffer);
temp3bit2 <= temp3bit2-1;
if(temp3bit2 == 1)
nextstate = SubMbPrediction 14;
else
nextstate = SubMbPrediction 12;
end
14:
begin
tempint = unpack(expgolomb_signed(buffer));
$display( "ccl2SDMSmvd_l0 %0d", tempint );
outfifo.enq(SDMSmvd_l0 truncate(expgolomb_signed(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = SubMbPrediction 15;
end
15:
begin
tempint = unpack(expgolomb_signed(buffer));
$display( "ccl2SDMSmvd_l0 %0d", tempint );
outfifo.enq(SDMSmvd_l0 truncate(expgolomb_signed(buffer)));
numbitsused = expgolomb_numbits(buffer);
temp3bit3 <= temp3bit3-1;
if(temp3bit3 == 1)
nextstate = MacroblockLayer 5;
else
nextstate = SubMbPrediction 14;
end
default: $display( "ERROR EntropyDec: SubMbPrediction default step" );
endcase
end
tagged ResidualBlock .step : //if(residualChroma==0) return to Residual 1; else if(maxNumCoeff==4) return to Residual 3; else return to Residual 5
begin//don't modify maxNumCoeff, residualChroma, and increment temp5bit on return
case ( step )
0:
begin
cavlcFIFO.clear();
if(maxNumCoeff != 4)
begin
if(residualChroma == 0)
tempreg <= zeroExtend(calcnc.nCcalc_luma(truncate(temp5bit)));
else
tempreg <= zeroExtend(calcnc.nCcalc_chroma(truncate(temp5bit)));
end
else
tempreg <= zeroExtend(6'b111111);
if(mbPartPredMode(sdmmbtype,0)==Intra_16x16 && maxNumCoeff==16)
nextstate = ResidualBlock 1;
else if(residualChroma==0 && (sdmcodedBlockPatternLuma & (1 << zeroExtend(temp5bit[3:2])))==0)
begin
calcnc.nNupdate_luma(truncate(temp5bit),0);
outfifo_ITB.enq(SDMRcoeffLevelZeros maxNumCoeff);
nextstate = ResidualBlock 5;
end
else if(residualChroma==1 && maxNumCoeff==4 && (sdmcodedBlockPatternChroma & 3)==0)
begin
outfifo_ITB.enq(SDMRcoeffLevelZeros 4);
nextstate = ResidualBlock 5;
end
else if(residualChroma==1 && maxNumCoeff!=4 && (sdmcodedBlockPatternChroma & 2)==0)
begin
calcnc.nNupdate_chroma(truncate(temp5bit),0);
outfifo_ITB.enq(SDMRcoeffLevelZeros 15);
nextstate = ResidualBlock 5;
end
else
nextstate = ResidualBlock 1;
//$display( "TRACE EntropyDec: ResidualBlock 0 temp5bit = %0d", temp5bit);
end
1:
begin
Bit#(2) trailingOnesTemp = 0;
Bit#(5) totalCoeffTemp = 0;
{trailingOnesTemp,totalCoeffTemp,numbitsused} = cavlc_coeff_token( buffer, truncate(tempreg) );
temp3bit0 <= zeroExtend(trailingOnesTemp);//trailingOnes
totalCoeff <= totalCoeffTemp;
if(residualChroma == 0 && !(mbPartPredMode(sdmmbtype,0)==Intra_16x16 && maxNumCoeff==16))
calcnc.nNupdate_luma(truncate(temp5bit),totalCoeffTemp);
else if(residualChroma == 1 && maxNumCoeff != 4)
calcnc.nNupdate_chroma(truncate(temp5bit),totalCoeffTemp);
temp5bit2 <= 0;//i
tempreg <= 0;//levelCode temp
if(totalCoeffTemp > 10 && trailingOnesTemp < 3)
temp3bit1 <= 1;//suffixLength
else
temp3bit1 <= 0;//suffixLength
nextstate = ResidualBlock 2;
//$display( "TRACE EntropyDec: ResidualBlock 1 nC = %0d", tempreg);
$display( "ccl2SDMRtotal_coeff %0d", totalCoeffTemp );
$display( "ccl2SDMRtrailing_ones %0d", trailingOnesTemp );
end
2:
begin
if( totalCoeff != 0 )
begin
if(temp5bit2 < zeroExtend(temp3bit0))
begin
if(buffer[buffersize-1] == 1)
cavlcFIFO.enq(-1);
else
cavlcFIFO.enq(1);
numbitsused = 1;
end
else
begin
Bit#(32) buffertempshow = buffer[buffersize-1:buffersize-32];
Bit#(3) suffixLength = temp3bit1;
Bit#(4) levelSuffixSize = zeroExtend(suffixLength);
Bit#(4) level_prefix = cavlc_level_prefix( buffer );
Bit#(5) temp_level_prefix = zeroExtend(level_prefix);
Bit#(28) tempbuffer = buffer[buffersize-1:buffersize-28] << zeroExtend(temp_level_prefix+1);
Bit#(14) levelCode = zeroExtend(level_prefix) << zeroExtend(suffixLength);
if(level_prefix == 14 && suffixLength == 0)
levelSuffixSize = 4;
else if(level_prefix == 15)
levelSuffixSize = 12;
levelCode = levelCode + zeroExtend(tempbuffer[27:16] >> (12-zeroExtend(levelSuffixSize)));//level_suffix
if(level_prefix == 15 && suffixLength == 0)
levelCode = levelCode + 15;
if(temp5bit2 == zeroExtend(temp3bit0) && temp3bit0 < 3)
levelCode = levelCode + 2;
if(suffixLength == 0)
suffixLength = 1;
if( suffixLength < 6 && ((levelCode+2) >> 1) > (3 << zeroExtend(suffixLength-1)) )
suffixLength = suffixLength+1;
if(levelCode[0] == 0)
cavlcFIFO.enq(truncate((levelCode+2) >> 1));
else
cavlcFIFO.enq(truncate((~levelCode) >> 1));
if(levelCode[0] == 0)//////////////////////////////////////////////////
begin
tempint = signExtend(unpack((levelCode+2) >> 1));
//$display( "TRACE EntropyDec: temp level %0d", tempint );
end
else
begin
Bit#(13) tempinttemp = truncate((~levelCode) >> 1);
tempint = signExtend(unpack(tempinttemp));
//$display( "TRACE EntropyDec: temp level %0d", tempint );
end///////////////////////////////////////////////////////////////////////
temp3bit1 <= suffixLength;
numbitsused = zeroExtend(level_prefix)+1+zeroExtend(levelSuffixSize);
end
end
if( totalCoeff==0 || temp5bit2+1==totalCoeff )
begin
temp5bit2 <= 0;
zerosLeft <= 0;
if(totalCoeff < maxNumCoeff)
nextstate = ResidualBlock 3;
else
nextstate = ResidualBlock 5;
end
else
begin
temp5bit2 <= temp5bit2 + 1;
nextstate = ResidualBlock 2;
end
end
3:
begin
Bit#(4) tempZerosLeft;
if(totalCoeff > 0)
begin
{tempZerosLeft,numbitsused} = cavlc_total_zeros( buffer, truncate(totalCoeff), maxNumCoeff);
$display( "ccl2SDMRtotal_zeros %0d", tempZerosLeft );//////////////////////////////////////
end
else
tempZerosLeft = 0;
zerosLeft <= tempZerosLeft;
if(maxNumCoeff - totalCoeff - zeroExtend(tempZerosLeft) > 0)
begin
$display( "ccl2SDMRcoeffLevelZeros %0d", maxNumCoeff - totalCoeff - zeroExtend(tempZerosLeft) );
outfifo_ITB.enq(SDMRcoeffLevelZeros (maxNumCoeff - totalCoeff - zeroExtend(tempZerosLeft)));
end
nextstate = ResidualBlock 5;
end
5:
begin
if( totalCoeff > 0 )
begin
tempint = signExtend(unpack(cavlcFIFO.first()));
$display( "ccl2SDMRcoeffLevel %0d", tempint );
if( zerosLeft > 0 )
begin
Bit#(4) run_before = 0;
if( totalCoeff > 1 )
{run_before,numbitsused} = cavlc_run_before( buffer, zerosLeft);
else
run_before = zerosLeft;
zerosLeft <= zerosLeft - run_before;
outfifo_ITB.enq(SDMRcoeffLevelPlusZeros {level:cavlcFIFO.first(),zeros:zeroExtend(run_before)});
if( run_before > 0 )
$display( "ccl2SDMRcoeffLevelZeros %0d", run_before );
end
else
outfifo_ITB.enq(SDMRcoeffLevelPlusZeros {level:cavlcFIFO.first(),zeros:0});
cavlcFIFO.deq();
totalCoeff <= totalCoeff-1;
end
if( totalCoeff <= 1 )
begin
if(residualChroma==0)
begin
nextstate = ResidualBlock 0;
if(mbPartPredMode(sdmmbtype,0)==Intra_16x16 && maxNumCoeff==16)
maxNumCoeff <= 15;
else if(temp5bit==15)
begin
temp5bit <= 0;
maxNumCoeff <= 4;
residualChroma <= 1;
end
else
temp5bit <= temp5bit+1;
end
else if(maxNumCoeff==4)
begin
nextstate = ResidualBlock 0;
if(temp5bit==1)
begin
temp5bit <= 0;
maxNumCoeff <= 15;
end
else
temp5bit <= temp5bit+1;
end
else
begin
if(temp5bit==7)
begin
temp5bit <= 0;
nextstate = SliceData 3;
end
else
begin
nextstate = ResidualBlock 0;
temp5bit <= temp5bit+1;
end
end
end
else
nextstate = ResidualBlock 5;
end
default: $display( "ERROR EntropyDec: ResidualBlock default step" );
endcase
end
endcase
if(numbitsused+1 > bufcount)
begin
$display( "ERROR EntropyDec: not enough bits in buffer" );
nextstate = Start;
end
buffer <= buffer << zeroExtend(numbitsused);
bufcount <= bufcount-numbitsused;
state <= nextstate;
endrule
interface Put ioin = fifoToPut(infifo);
interface Get ioout = fifoToGet(outfifo);
interface Get ioout_InverseTrans = fifoToGet(outfifo_ITB);
 
interface mem_client = calcnc.mem_client;
endmodule
 
endpackage
/trunk/src/H264Types.bsv
0,0 → 1,415
//**********************************************************************
// H264 Types
//----------------------------------------------------------------------
//
//
//
 
 
package H264Types;
 
import Vector::*;
import RegFile::*;
 
typedef 7 PicWidthSz;//number of bits to represent the horizontal position of a MB
typedef 7 PicHeightSz;//number of bits to represent the vertical position of a MB
typedef 14 PicAreaSz;//number of bits to represent the 2D position of a MB (max 16)
Bit#(PicWidthSz) maxPicWidthInMB=127;//(2^PicWidthSz)-1
 
Bit#(PicAreaSz) maxPicAreaInMB=14'b10000000000000;
typedef 25 FrameBufferSz;//number of bits to address the frame buffer (5+PicAreaSz+6)
typedef 16 MaxRefFrames;//max number of frames in the frame buffer
Bit#(5) maxRefFrames=16;//max number of frames in the frame buffer
Bit#(FrameBufferSz) frameBufferSize=25'b0110110000000000000000000;//size of frame buffer ((maxRefFrames+2)*maxPicAreaInMB*1.5*64)
 
Integer entropyDec_infifo_size = 2;
Integer inverseTrans_infifo_size = 8;
Integer prediction_infifo_size = 4;
Integer prediction_infifo_ITB_size = 16;
Integer prediction_predictedfifo_size = 16;
Integer interpolator_reqfifoLoad_size = 4;
Integer interpolator_reqfifoWork_size = 8;
Integer interpolator_memRespQ_size = 4;
Integer deblockFilter_infifo_size = 32;
Integer bufferControl_infifo_size = 2;
 
 
//-----------------------------------------------------------
// 1 read port register file module
 
interface RFile1#(type idx_t, type d_t);
method Action upd(idx_t x1, d_t x2);
method d_t sub(idx_t x1);
endinterface
 
module mkRFile1#( idx_t lo, idx_t hi ) ( RFile1#(idx_t, d_t) )
provisos (Bits#(idx_t, si),Bits#(d_t, sa));
RegFile#(idx_t,d_t) rf <- mkRegFile(lo,hi);
method Action upd( idx_t index, d_t data );
rf.upd( index, data );
endmethod
method d_t sub( idx_t index );
return rf.sub(index);
endmethod
endmodule
module mkRFile1Full( RFile1#(idx_t, d_t) )
provisos (Bits#(idx_t, si),Bits#(d_t, sa),Bounded#(idx_t) );
RegFile#(idx_t,d_t) rf <- mkRegFileFull();
method Action upd( idx_t index, d_t data );
rf.upd( index, data );
endmethod
method d_t sub( idx_t index );
return rf.sub(index);
endmethod
endmodule
 
 
//-----------------------------------------------------------
// Do not fire module
 
interface DoNotFire;
method Action doNotFire();
endinterface
 
module mkDoNotFire( DoNotFire );
method Action doNotFire() if(False);
noAction;
endmethod
endmodule
 
 
typedef union tagged
{
void P_L0_16x16;
void P_L0_L0_16x8;
void P_L0_L0_8x16;
void P_8x8;
void P_8x8ref0;
void I_NxN;
struct{
Bit#(2) intra16x16PredMode;
Bit#(2) codedBlockPatternChroma;
Bit#(1) codedBlockPatternLuma;
}I_16x16;
void I_PCM;
void P_Skip;
} MbType deriving(Eq,Bits);
 
 
typedef enum
{
Pred_L0,
Intra_4x4,
Intra_16x16,
NA
} MbPartPredModeType deriving(Eq,Bits);
 
 
typedef Bit#(64) Buffer;//not sure size
typedef Bit#(7) Bufcount;
Nat buffersize = 64;//not sure size
 
 
 
function MbPartPredModeType mbPartPredMode( MbType mbtype, Bit#(1) mbPartIdx );
if(mbPartIdx == 1)
begin
if(mbtype == P_L0_L0_16x8 || mbtype == P_L0_L0_8x16)
return Pred_L0;
else
return NA;
end
else
begin
if(mbtype==P_L0_16x16 || mbtype==P_L0_L0_16x8 || mbtype==P_L0_L0_8x16 || mbtype==P_Skip)
return Pred_L0;
else if(mbtype == I_NxN)
return Intra_4x4;
else if(mbtype == P_8x8 || mbtype == P_8x8ref0 || mbtype == I_PCM )
return NA;
else
return Intra_16x16;
end
endfunction
 
 
function Bit#(3) numMbPart( MbType mbtype );
if(mbtype == P_L0_16x16 || mbtype == P_Skip)
return 1;
else if(mbtype == P_L0_L0_16x8 || mbtype == P_L0_L0_8x16)
return 2;
else if(mbtype == P_8x8 || mbtype == P_8x8ref0)
return 4;
else
return 0;//should never happen
endfunction
 
 
function Bit#(3) numSubMbPart( Bit#(2) submbtype );
if(submbtype == 0)
return 1;
else if(submbtype == 1 || submbtype == 2)
return 2;
else
return 4;
endfunction
 
 
//----------------------------------------------------------------------
// Inter-module FIFO types
//----------------------------------------------------------------------
 
 
typedef union tagged
{
Bit#(8) DataByte;
void EndOfFile;
}
InputGenOT deriving(Eq,Bits);
 
 
typedef union tagged
{
void NewUnit;
Bit#(8) RbspByte;
void EndOfFile;
}
NalUnwrapOT deriving(Eq,Bits);
 
 
typedef union tagged
{
Bit#(8) NewUnit;
 
////Sequence Parameter Set
Bit#(5) SPSseq_parameter_set_id;//ue 0 to 31
Bit#(5) SPSlog2_max_frame_num;//ue+4 4 to 16
Bit#(2) SPSpic_order_cnt_type;//ue 0 to 2
Bit#(5) SPSlog2_max_pic_order_cnt_lsb;//ue+4 4 to 16
Bit#(1) SPSdelta_pic_order_always_zero_flag;//u(1)
Bit#(32) SPSoffset_for_non_ref_pic;//se -2^31 to 2^31-1
Bit#(32) SPSoffset_for_top_to_bottom_field;//se -2^31 to 2^31-1
Bit#(8) SPSnum_ref_frames_in_pic_order_cnt_cycle;//ue 0 to 255
Bit#(32) SPSoffset_for_ref_frame;//se -2^31 to 2^31-1
Bit#(5) SPSnum_ref_frames;//ue 0 to MaxDpbSize (depends on Level)
Bit#(1) SPSgaps_in_frame_num_allowed_flag;//u(1)
Bit#(PicWidthSz) SPSpic_width_in_mbs;//ue+1 1 to ?
Bit#(PicHeightSz) SPSpic_height_in_map_units;//ue+1 1 to ?
//// Bit#(1) SPSframe_mbs_only_flag//u(1) (=1 for baseline)
Bit#(1) SPSdirect_8x8_inference_flag;//u(1)
Bit#(1) SPSframe_cropping_flag;//u(1)
Bit#(16) SPSframe_crop_left_offset;//ue 0 to ?
Bit#(16) SPSframe_crop_right_offset;//ue 0 to ?
Bit#(16) SPSframe_crop_top_offset;//ue 0 to ?
Bit#(16) SPSframe_crop_bottom_offset;//ue 0 to ?
 
////Picture Parameter Set
Bit#(8) PPSpic_parameter_set_id;//ue 0 to 255
Bit#(5) PPSseq_parameter_set_id;//ue 0 to 31
//// Bit#(1) PPSentropy_coding_mode_flag//u(1) (=0 for baseline)
Bit#(1) PPSpic_order_present_flag;//u(1)
//// Bit#(4) PPSnum_slice_groups;//ue+1 1 to 8 (=1 for main)
////some info if PPSnum_slice_groups>1 (not in main)
Bit#(5) PPSnum_ref_idx_l0_active;//ue+1 1 to 32 (16 for frame mb)
Bit#(5) PPSnum_ref_idx_l1_active;//ue+1 1 to 32 (16 for frame mb)
//// Bit#(1) PPSweighted_pred_flag;//u(1) (=0 for baseline)
//// Bit#(2) PPSweighted_bipred_flag;//u(2) (=0 for baseline)
//////// Bit#(7) PPSpic_init_qp;//se+26 0 to 51
//////// Bit#(7) PPSpic_init_qs;//se+26 0 to 51
//////// Bit#(5) PPSchroma_qp_index_offset;//se -12 to 12
Bit#(1) PPSdeblocking_filter_control_present_flag;//u(1)
Bit#(1) PPSconstrained_intra_pred_flag;//u(1)
//// Bit#(1) PPSredundant_pic_cnt_present_flag;//u(1) (=0 for main)
 
////Slice Header
Bit#(PicAreaSz) SHfirst_mb_in_slice;//ue 0 to PicSizeInMbs-1
Bit#(4) SHslice_type;//ue 0 to 9
Bit#(8) SHpic_parameter_set_id;//ue 0 to 255
Bit#(16) SHframe_num;//u(log2_max_frame_num)
Bit#(16) SHidr_pic_id;//ue 0 to 65535
Bit#(16) SHpic_order_cnt_lsb;//u(log2_max_pic_order_cnt_lsb)
Bit#(32) SHdelta_pic_order_cnt_bottom;//se -2^31 to 2^31-1
Bit#(32) SHdelta_pic_order_cnt0;//se -2^31 to 2^31-1
Bit#(32) SHdelta_pic_order_cnt1;//se -2^31 to 2^31-1
Bit#(1) SHnum_ref_idx_active_override_flag;//u(1)
Bit#(5) SHnum_ref_idx_l0_active;//ue+1 1 to 32 (16 for frame mb)
////reference picture list reordering
Bit#(1) SHRref_pic_list_reordering_flag_l0;//u(1)
Bit#(2) SHRreordering_of_pic_nums_idc;//ue 0 to 3
Bit#(17) SHRabs_diff_pic_num;//ue 1 to MaxPicNum
Bit#(5) SHRlong_term_pic_num;//ue 0 to ?
////decoded reference picture marking
Bit#(1) SHDno_output_of_prior_pics_flag;//u(1)
Bit#(1) SHDlong_term_reference_flag;//u(1)
Bit#(1) SHDadaptive_ref_pic_marking_mode_flag;//u(1)
Bit#(3) SHDmemory_management_control_operation;//ue 0 to 6
Bit#(17) SHDdifference_of_pic_nums;//ue 1 to MaxPicNum
Bit#(5) SHDlong_term_pic_num;//ue 0 to 32 (16 for frame mb)
Bit#(5) SHDlong_term_frame_idx;//ue 0 to MaxLongTermFrameIdx
Bit#(5) SHDmax_long_term_frame_idx_plus1;//ue 0 to num_ref_frames (0 to 16)
////Slice Header (continued)
//////// Bit#(7) SHslice_qp_delta;//se -51 to 51
Bit#(2) SHdisable_deblocking_filter_idc;//ue 0 to 2
Bit#(5) SHslice_alpha_c0_offset;//se*2 -12 to 12
Bit#(5) SHslice_beta_offset;//se*2 -12 to 12
 
////Slice Data
Bit#(PicAreaSz) SDmb_skip_run;//ue 0 to PicSizeInMbs
//// Bit#(PicAreaSz) SDcurrMbAddr;//ue ->process-> 0 to PicSizeInMbs
////macroblock layer
MbType SDMmbtype;//ue ->process-> MbType
Bit#(8) SDMpcm_sample_luma;//ue 0 to 255
Bit#(8) SDMpcm_sample_chroma;//ue 0 to 255
//// Bit#(6) SDMcoded_block_pattern;//me
//////// Bit#(7) SDMmb_qp_delta;//se -26 to 25
////macroblock prediction
// Bit#(1) SDMMprev_intra4x4_pred_mode_flag;//u(1)
Bit#(4) SDMMrem_intra4x4_pred_mode;//(SDMMprev_intra4x4_pred_mode_flag ? 4'b1000 : {1'b0,u(3)})
Bit#(2) SDMMintra_chroma_pred_mode;//ue 0 to 3
Bit#(5) SDMMref_idx_l0;//te 0 to num_ref_idx_active_minus1
Bit#(16) SDMMmvd_l0;//se ? to ? (see Annex A)
////sub-macroblock prediction
Bit#(2) SDMSsub_mb_type;//ue 0 to 3
Bit#(5) SDMSref_idx_l0;//te 0 to num_ref_idx_active_minus1
Bit#(16) SDMSmvd_l0;//se ? to ? (see Annex A)
////residual data
//////// Bit#(13) SDMRcoeffLevel;//cavlc output in reverse order (high frequency first)
//////// Bit#(5) SDMRcoeffLevelZeros;//# of consecutive zeros (also used for ITBresidual)
 
////Prediction Block output
struct {Bit#(6) qpy; Bit#(6) qpc;} IBTmb_qp;//qp for luma and chroma for the current MB
struct {Bit#(3) bShor; Bit#(3) bSver;} PBbS;//
Vector#(4,Bit#(8)) PBoutput;//prediction+residual in regular h.264 order
 
//// various delimiters
Bit#(3) AUDPrimaryPicType;
void EndOfSequence;
void EndOfStream;
void EndOfFile;
}
EntropyDecOT deriving(Eq,Bits);
 
 
typedef union tagged
{
Bit#(8) NewUnit;
 
////Picture Parameter Set
Bit#(8) PPSpic_parameter_set_id;//ue 0 to 255
Bit#(7) PPSpic_init_qp;//se+26 0 to 51
Bit#(7) PPSpic_init_qs;//se+26 0 to 51
Bit#(5) PPSchroma_qp_index_offset;//se -12 to 12
 
////Slice Header
Bit#(7) SHslice_qp_delta;//se -51 to 51
 
////macroblock layer
MbType SDMmbtype;//ue ->process-> MbType
Bit#(7) SDMmb_qp_delta;//se -26 to 25
////residual data (cavlc output in reverse order (high frequency first))
struct {Bit#(13) level; Bit#(5) zeros;} SDMRcoeffLevelPlusZeros;//one non-zero coeff level followed by # of consecutive zeros
Bit#(5) SDMRcoeffLevelZeros;//# of consecutive zeros
}
EntropyDecOT_InverseTrans deriving(Eq,Bits);
 
 
typedef union tagged
{
void ITBcoeffLevelZeros;//16 consecutive zeros
Vector#(4,Bit#(10)) ITBresidual;//residual data in regular h.264 order
struct {Bit#(6) qpy; Bit#(6) qpc;} IBTmb_qp;//qp for luma and chroma for the current MB
}
InverseTransOT deriving(Eq,Bits);
 
 
typedef union tagged
{
struct {Bit#(TAdd#(PicWidthSz,2)) hor; Bit#(TAdd#(PicHeightSz,4)) ver; Bit#(32) data;} DFBLuma;
struct {Bit#(1) uv; Bit#(TAdd#(PicWidthSz,1)) hor; Bit#(TAdd#(PicHeightSz,3)) ver; Bit#(32) data;} DFBChroma;
void EndOfFrame;
EntropyDecOT EDOT;
}
DeblockFilterOT deriving(Eq,Bits);
 
 
typedef union tagged
{
Bit#(32) YUV;
void EndOfFile;
}
BufferControlOT deriving(Eq,Bits);
 
 
typedef union tagged
{
Bit#(FrameBufferSz) FBLoadReq;
void FBEndFrameSync;
}
FrameBufferLoadReq deriving(Eq,Bits);
 
typedef union tagged
{
Bit#(32) FBLoadResp;
}
FrameBufferLoadResp deriving(Eq,Bits);
 
typedef union tagged
{
struct { Bit#(FrameBufferSz) addr; Bit#(32) data; } FBStoreReq;
void FBEndFrameSync;
}
FrameBufferStoreReq deriving(Eq,Bits);
 
 
typedef enum
{
IP16x16,
IP16x8,
IP8x16,
IP8x8,
IP8x4,
IP4x8,
IP4x4
} IPBlockType deriving(Eq,Bits);
 
typedef union tagged
{
struct { Bit#(4) refIdx; Bit#(TAdd#(PicWidthSz,2)) hor; Bit#(TAdd#(PicHeightSz,4)) ver; Bit#(14) mvhor; Bit#(12) mvver; IPBlockType bt; } IPLuma;
struct { Bit#(4) refIdx; Bit#(1) uv; Bit#(TAdd#(PicWidthSz,2)) hor; Bit#(TAdd#(PicHeightSz,3)) ver; Bit#(14) mvhor; Bit#(12) mvver; IPBlockType bt; } IPChroma;
}
InterpolatorIT deriving(Eq,Bits);
 
typedef union tagged
{
struct { Bit#(4) refIdx; Bit#(1) horOutOfBounds; Bit#(TAdd#(PicWidthSz,2)) hor; Bit#(TAdd#(PicHeightSz,4)) ver; } IPLoadLuma;
struct { Bit#(4) refIdx; Bit#(1) uv; Bit#(1) horOutOfBounds; Bit#(TAdd#(PicWidthSz,1)) hor; Bit#(TAdd#(PicHeightSz,3)) ver; } IPLoadChroma;
void IPLoadEndFrame;
}
InterpolatorLoadReq deriving(Eq,Bits);
 
typedef union tagged
{
Bit#(32) IPLoadResp;
}
InterpolatorLoadResp deriving(Eq,Bits);
 
 
typedef union tagged
{
Bit#(addrSz) LoadReq;
struct { Bit#(addrSz) addr; Bit#(dataSz) data; } StoreReq;
}
MemReq#( type addrSz, type dataSz )
deriving(Eq,Bits);
 
typedef union tagged
{
Bit#(dataSz) LoadResp;
}
MemResp#( type dataSz )
deriving(Eq,Bits);
 
 
 
endpackage
/trunk/src/IPrediction.bsv
0,0 → 1,29
//**********************************************************************
// Interface for Prediction
//----------------------------------------------------------------------
//
//
//
 
package IPrediction;
 
import H264Types::*;
import GetPut::*;
import ClientServer::*;
 
interface IPrediction;
 
// Interface for inter-module io
interface Put#(EntropyDecOT) ioin;
interface Put#(InverseTransOT) ioin_InverseTrans;
interface Get#(EntropyDecOT) ioout;
 
// Interface for module to memory
interface Client#(MemReq#(TAdd#(PicWidthSz,2),68),MemResp#(68)) mem_client_intra;
interface Client#(MemReq#(TAdd#(PicWidthSz,2),32),MemResp#(32)) mem_client_inter;
interface Client#(InterpolatorLoadReq,InterpolatorLoadResp) mem_client_buffer;
 
endinterface
 
endpackage
 
/trunk/src/mkInterpolator.bsv
0,0 → 1,843
//**********************************************************************
// interpolator implementation
//----------------------------------------------------------------------
//
//
 
package mkInterpolator;
 
import H264Types::*;
import IInterpolator::*;
import FIFO::*;
import Vector::*;
 
import Connectable::*;
import GetPut::*;
import ClientServer::*;
 
 
//-----------------------------------------------------------
// Local Datatypes
//-----------------------------------------------------------
 
typedef union tagged
{
struct { Bit#(2) xFracL; Bit#(2) yFracL; Bit#(2) offset; IPBlockType bt; } IPWLuma;
struct { Bit#(3) xFracC; Bit#(3) yFracC; Bit#(2) offset; IPBlockType bt; } IPWChroma;
}
InterpolatorWT deriving(Eq,Bits);
 
 
//-----------------------------------------------------------
// Helper functions
 
function Bit#(8) clip1y10to8( Bit#(10) innum );
if(innum[9] == 1)
return 0;
else if(innum[8] == 1)
return 255;
else
return truncate(innum);
endfunction
 
function Bit#(15) interpolate8to15( Bit#(8) in0, Bit#(8) in1, Bit#(8) in2, Bit#(8) in3, Bit#(8) in4, Bit#(8) in5 );
return zeroExtend(in0) - 5*zeroExtend(in1) + 20*zeroExtend(in2) + 20*zeroExtend(in3) - 5*zeroExtend(in4) + zeroExtend(in5);
endfunction
 
function Bit#(8) interpolate15to8( Bit#(15) in0, Bit#(15) in1, Bit#(15) in2, Bit#(15) in3, Bit#(15) in4, Bit#(15) in5 );
Bit#(20) temp = signExtend(in0) - 5*signExtend(in1) + 20*signExtend(in2) + 20*signExtend(in3) - 5*signExtend(in4) + signExtend(in5) + 512;
return clip1y10to8(truncate(temp>>10));
endfunction
 
 
 
//-----------------------------------------------------------
// Interpolation Module
//-----------------------------------------------------------
 
 
(* synthesize *)
module mkInterpolator( Interpolator );
FIFO#(InterpolatorIT) reqfifoLoad <- mkSizedFIFO(interpolator_reqfifoLoad_size);
FIFO#(InterpolatorWT) reqfifoWork1 <- mkSizedFIFO(interpolator_reqfifoWork_size);
Reg#(Maybe#(InterpolatorWT)) reqregWork2 <- mkReg(Invalid);
FIFO#(Vector#(4,Bit#(8))) outfifo <- mkFIFO;
Reg#(Bool) endOfFrameFlag <- mkReg(False);
FIFO#(InterpolatorLoadReq) memReqQ <- mkFIFO;
FIFO#(InterpolatorLoadResp) memRespQ <- mkSizedFIFO(interpolator_memRespQ_size);
 
Reg#(Bit#(PicWidthSz)) picWidth <- mkReg(maxPicWidthInMB);
Reg#(Bit#(PicHeightSz)) picHeight <- mkReg(0);
 
RFile1#(Bit#(6),Vector#(4,Bit#(15))) workFile <- mkRFile1Full();
RFile1#(Bit#(6),Vector#(4,Bit#(8))) storeFile <- mkRFile1Full();
Reg#(Bit#(1)) workFileFlag <- mkReg(0);
RFile1#(Bit#(4),Vector#(4,Bit#(8))) resultFile <- mkRFile1Full();
 
Reg#(Bit#(1)) loadStage <- mkReg(0);
Reg#(Bit#(2)) loadHorNum <- mkReg(0);
Reg#(Bit#(4)) loadVerNum <- mkReg(0);
 
Reg#(Bit#(2)) work1MbPart <- mkReg(0);//only for Chroma
Reg#(Bit#(2)) work1SubMbPart <- mkReg(0);//only for Chroma
Reg#(Bit#(1)) work1Stage <- mkReg(0);
Reg#(Bit#(2)) work1HorNum <- mkReg(0);
Reg#(Bit#(4)) work1VerNum <- mkReg(0);
Reg#(Vector#(20,Bit#(8))) work1Vector8 <- mkRegU;
Reg#(Bool) work1Done <- mkReg(False);
 
Reg#(Bit#(2)) work2SubMbPart <- mkReg(0);
Reg#(Bit#(2)) work2HorNum <- mkReg(0);
Reg#(Bit#(4)) work2VerNum <- mkReg(0);
Reg#(Vector#(20,Bit#(8))) work2Vector8 <- mkRegU;
Reg#(Vector#(20,Bit#(15))) work2Vector15 <- mkRegU;
Reg#(Vector#(16,Bit#(1))) resultReady <- mkReg(replicate(0));
Reg#(Bool) work2Done <- mkReg(False);
Reg#(Bool) work8x8Done <- mkReg(False);
 
Reg#(Bit#(2)) outBlockNum <- mkReg(0);
Reg#(Bit#(2)) outPixelNum <- mkReg(0);
Reg#(Bool) outDone <- mkReg(False);
 
rule sendEndOfFrameReq( endOfFrameFlag );
endOfFrameFlag <= False;
memReqQ.enq(IPLoadEndFrame);
endrule
rule loadLuma( reqfifoLoad.first() matches tagged IPLuma .reqdata &&& !endOfFrameFlag );
Bit#(2) xfracl = reqdata.mvhor[1:0];
Bit#(2) yfracl = reqdata.mvver[1:0];
Bit#(2) offset = reqdata.mvhor[3:2];
Bool twoStage = (xfracl==1||xfracl==3) && (yfracl==1||yfracl==3);
Bool horInter = (twoStage ? loadStage==1 : xfracl!=0);
Bool verInter = (twoStage ? loadStage==0 : yfracl!=0);
Bit#(2) offset2 = reqdata.mvhor[3:2] + ((twoStage&&verInter&&xfracl==3) ? 1 : 0);
Bit#(1) horOut = 0;
Bit#(TAdd#(PicWidthSz,2)) horAddr;
Bit#(TAdd#(PicHeightSz,4)) verAddr;
Bit#(TAdd#(PicWidthSz,12)) horTemp = zeroExtend({reqdata.hor,2'b00}) + zeroExtend({loadHorNum,2'b00}) + (xfracl==3&&(yfracl==1||yfracl==3)&&loadStage==0 ? 1 : 0);
Bit#(TAdd#(PicHeightSz,10)) verTemp = zeroExtend(reqdata.ver) + zeroExtend(loadVerNum) + (yfracl==3&&(xfracl==1||xfracl==3)&&loadStage==1 ? 1 : 0);
Bit#(13) mvhortemp = signExtend(reqdata.mvhor[13:2])-(horInter?2:0);
Bit#(11) mvvertemp = signExtend(reqdata.mvver[11:2])-(verInter?2:0);
if(mvhortemp[12]==1 && zeroExtend(0-mvhortemp)>horTemp)
begin
horAddr = 0;
horOut = 1;
end
else
begin
horTemp = horTemp + signExtend(mvhortemp);
if(horTemp>=zeroExtend({picWidth,4'b0000}))
begin
horAddr = {picWidth-1,2'b11};
horOut = 1;
end
else
horAddr = truncate(horTemp>>2);
end
if(mvvertemp[10]==1 && zeroExtend(0-mvvertemp)>verTemp)
verAddr = 0;
else
begin
verTemp = verTemp + signExtend(mvvertemp);
if(verTemp>=zeroExtend({picHeight,4'b0000}))
verAddr = {picHeight-1,4'b1111};
else
verAddr = truncate(verTemp);
end
memReqQ.enq(IPLoadLuma {refIdx:reqdata.refIdx,horOutOfBounds:horOut,hor:horAddr,ver:verAddr});
Bool verFirst = twoStage || (yfracl==2&&(xfracl==1||xfracl==3));
Bit#(2) loadHorNumMax = (reqdata.bt==IP8x8||reqdata.bt==IP8x4 ? 1 : 0) + (horInter ? 2 : (offset2==0 ? 0 : 1));
Bit#(4) loadVerNumMax = (reqdata.bt==IP8x8||reqdata.bt==IP4x8 ? 7 : 3) + (verInter ? 5 : 0);
if(verFirst)
begin
if(loadVerNum < loadVerNumMax)
loadVerNum <= loadVerNum+1;
else
begin
loadVerNum <= 0;
if(loadHorNum < loadHorNumMax)
begin
if(loadStage == 1)
begin
offset = offset + (xfracl==3 ? 1 : 0);
if(!(offset==1 || (xfracl==3 && offset==2)))
loadHorNum <= loadHorNumMax;
else
begin
loadHorNum <= 0;
loadStage <= 0;
reqfifoLoad.deq();
end
end
else
loadHorNum <= loadHorNum+1;
end
else
begin
if(twoStage && loadStage==0)
begin
offset = offset + (xfracl==3 ? 1 : 0);
if((xfracl==3 ? offset<3 : offset<2))
loadHorNum <= 0;
else
loadHorNum <= loadHorNumMax+1;
loadStage <= 1;
end
else
begin
loadHorNum <= 0;
loadStage <= 0;
reqfifoLoad.deq();
end
end
end
end
else
begin
if(loadHorNum < loadHorNumMax)
loadHorNum <= loadHorNum+1;
else
begin
loadHorNum <= 0;
if(loadVerNum < loadVerNumMax)
loadVerNum <= loadVerNum+1;
else
begin
loadVerNum <= 0;
reqfifoLoad.deq();
end
end
end
if(reqdata.bt==IP16x16 || reqdata.bt==IP16x8 || reqdata.bt==IP8x16)
$display( "ERROR Interpolation: loadLuma block sizes > 8x8 not supported");
$display( "Trace interpolator: loadLuma %h %h %h %h %h %h %h", xfracl, yfracl, loadHorNum, loadVerNum, reqdata.refIdx, horAddr, verAddr);
endrule
 
 
rule loadChroma( reqfifoLoad.first() matches tagged IPChroma .reqdata &&& !endOfFrameFlag );
Bit#(3) xfracc = reqdata.mvhor[2:0];
Bit#(3) yfracc = reqdata.mvver[2:0];
Bit#(2) offset = reqdata.mvhor[4:3]+{reqdata.hor[0],1'b0};
Bit#(1) horOut = 0;
Bit#(TAdd#(PicWidthSz,1)) horAddr;
Bit#(TAdd#(PicHeightSz,3)) verAddr;
Bit#(TAdd#(PicWidthSz,11)) horTemp = zeroExtend({reqdata.hor,1'b0}) + zeroExtend({loadHorNum,2'b00});
Bit#(TAdd#(PicHeightSz,9)) verTemp = zeroExtend(reqdata.ver) + zeroExtend(loadVerNum);
if(reqdata.mvhor[13]==1 && zeroExtend(0-reqdata.mvhor[13:3])>horTemp)
begin
horAddr = 0;
horOut = 1;
end
else
begin
horTemp = horTemp + signExtend(reqdata.mvhor[13:3]);
if(horTemp>=zeroExtend({picWidth,3'b000}))
begin
horAddr = {picWidth-1,1'b1};
horOut = 1;
end
else
horAddr = truncate(horTemp>>2);
end
if(reqdata.mvver[11]==1 && zeroExtend(0-reqdata.mvver[11:3])>verTemp)
verAddr = 0;
else
begin
verTemp = verTemp + signExtend(reqdata.mvver[11:3]);
if(verTemp>=zeroExtend({picHeight,3'b000}))
verAddr = {picHeight-1,3'b111};
else
verAddr = truncate(verTemp);
end
memReqQ.enq(IPLoadChroma {refIdx:reqdata.refIdx,uv:reqdata.uv,horOutOfBounds:horOut,hor:horAddr,ver:verAddr});
Bit#(2) loadHorNumMax = (reqdata.bt==IP4x8||reqdata.bt==IP4x4 ? (offset[1]==0||(xfracc==0&&offset!=3) ? 0 : 1) : ((reqdata.bt==IP16x16||reqdata.bt==IP16x8 ? 1 : 0) + (xfracc==0&&offset==0 ? 0 : 1)));
Bit#(4) loadVerNumMax = (reqdata.bt==IP16x16||reqdata.bt==IP8x16 ? 7 : (reqdata.bt==IP16x8||reqdata.bt==IP8x8||reqdata.bt==IP4x8 ? 3 : 1)) + (yfracc==0 ? 0 : 1);
if(loadHorNum < loadHorNumMax)
loadHorNum <= loadHorNum+1;
else
begin
loadHorNum <= 0;
if(loadVerNum < loadVerNumMax)
loadVerNum <= loadVerNum+1;
else
begin
loadVerNum <= 0;
reqfifoLoad.deq();
end
end
$display( "Trace interpolator: loadChroma %h %h %h %h %h %h %h", xfracc, yfracc, loadHorNum, loadVerNum, reqdata.refIdx, horAddr, verAddr);
endrule
 
rule work1Luma ( reqfifoWork1.first() matches tagged IPWLuma .reqdata &&& !work1Done );
let xfracl = reqdata.xFracL;
let yfracl = reqdata.yFracL;
let offset = reqdata.offset;
let blockT = reqdata.bt;
Bool twoStage = (xfracl==1||xfracl==3) && (yfracl==1||yfracl==3);
Vector#(20,Bit#(8)) work1Vector8Next = work1Vector8;
if(memRespQ.first() matches tagged IPLoadResp .tempreaddata)
begin
memRespQ.deq();
Vector#(4,Bit#(8)) readdata = replicate(0);
readdata[0] = tempreaddata[7:0];
readdata[1] = tempreaddata[15:8];
readdata[2] = tempreaddata[23:16];
readdata[3] = tempreaddata[31:24];
//$display( "Trace interpolator: workLuma stage 0 readdata %h %h %h %h %h %h", workHorNum, workVerNum, readdata[3], readdata[2], readdata[1], readdata[0] );
Vector#(4,Bit#(8)) tempResult8 = replicate(0);
Vector#(4,Bit#(15)) tempResult15 = replicate(0);
if(xfracl==0 || yfracl==0 || xfracl==2)
begin
if(xfracl==0)//reorder
begin
for(Integer ii=0; ii<4; ii=ii+1)
begin
Bit#(2) offsetplusii = offset+fromInteger(ii);
if(offset <= 3-fromInteger(ii) && offset!=0)
tempResult8[ii] = work1Vector8[offsetplusii];
else
tempResult8[ii] = readdata[offsetplusii];
work1Vector8Next[ii] = readdata[ii];
end
for(Integer ii=0; ii<4; ii=ii+1)
tempResult15[ii] = zeroExtend({tempResult8[ii],5'b00000});
end
else//horizontal interpolation
begin
offset = offset-2;
for(Integer ii=0; ii<8; ii=ii+1)
work1Vector8Next[ii] = work1Vector8[ii+4];
for(Integer ii=0; ii<4; ii=ii+1)
begin
Bit#(4) tempIndex = fromInteger(ii) + 8 - zeroExtend(offset);
work1Vector8Next[tempIndex] = readdata[ii];
end
for(Integer ii=0; ii<4; ii=ii+1)
begin
tempResult15[ii] = interpolate8to15(work1Vector8Next[ii],work1Vector8Next[ii+1],work1Vector8Next[ii+2],work1Vector8Next[ii+3],work1Vector8Next[ii+4],work1Vector8Next[ii+5]);
tempResult8[ii] = clip1y10to8(truncate((tempResult15[ii]+16)>>5));
if(xfracl == 1)
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,work1Vector8Next[ii+2]} + 1) >> 1);
else if(xfracl == 3)
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,work1Vector8Next[ii+3]} + 1) >> 1);
end
end
Bit#(2) workHorNumOffset = (xfracl!=0 ? 2 : (reqdata.offset==0 ? 0 : 1));
if(work1HorNum >= workHorNumOffset)
begin
Bit#(1) horAddr = truncate(work1HorNum-workHorNumOffset);
if(yfracl == 0)
begin
for(Integer ii=0; ii<4; ii=ii+1)
tempResult15[ii] = zeroExtend({tempResult8[ii],5'b00000});
end
workFile.upd({workFileFlag,work1VerNum,horAddr},tempResult15);
end
Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + workHorNumOffset;
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3) + (yfracl!=0 ? 5 : 0);
if(work1HorNum < workHorNumMax)
work1HorNum <= work1HorNum+1;
else
begin
work1HorNum <= 0;
if(work1VerNum < workVerNumMax)
work1VerNum <= work1VerNum+1;
else
begin
work1VerNum <= 0;
work1Done <= True;
end
end
end
else if(work1Stage == 0)//vertical interpolation
begin
offset = offset + (xfracl==3&&(yfracl==1||yfracl==3) ? 1 : 0);
for(Integer ii=0; ii<4; ii=ii+1)
tempResult15[ii] = interpolate8to15(work1Vector8[ii],work1Vector8[ii+4],work1Vector8[ii+8],work1Vector8[ii+12],work1Vector8[ii+16],readdata[ii]);
for(Integer ii=0; ii<16; ii=ii+1)
work1Vector8Next[ii] = work1Vector8[ii+4];
for(Integer ii=0; ii<4; ii=ii+1)
work1Vector8Next[ii+16] = readdata[ii];
Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + (yfracl==2 ? 2 : (offset==0 ? 0 : 1));
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3) + 5;
Bit#(2) horAddr = work1HorNum;
Bit#(3) verAddr = truncate(work1VerNum-5);
if(work1VerNum > 4)
begin
workFile.upd({workFileFlag,verAddr,horAddr},tempResult15);
//$display( "Trace interpolator: workLuma stage 0 result %h %h %h %h %h %h %h", workHorNum, workVerNum, {verAddr,horAddr}, tempResult15[3], tempResult15[2], tempResult15[1], tempResult15[0]);
end
if(twoStage)
begin
Bit#(2) storeHorAddr = work1HorNum;
Bit#(4) storeVerAddr = work1VerNum;
if((xfracl==3 ? offset<3 : offset<2))
storeHorAddr = storeHorAddr+1;
if(yfracl==3)
storeVerAddr = storeVerAddr-3;
else
storeVerAddr = storeVerAddr-2;
if(storeVerAddr < 8)
storeFile.upd({workFileFlag,storeVerAddr[2:0],storeHorAddr},readdata);
end
if(work1VerNum < workVerNumMax)
work1VerNum <= work1VerNum+1;
else
begin
work1VerNum <= 0;
if(work1HorNum < workHorNumMax)
work1HorNum <= work1HorNum+1;
else
begin
if(twoStage)
begin
work1Stage <= 1;
if((xfracl==3 ? offset<3 : offset<2))
work1HorNum <= 0;
else
work1HorNum <= workHorNumMax+1;
end
else
begin
work1HorNum <= 0;
work1Done <= True;
end
end
end
end
else//second stage of twoStage
begin
storeFile.upd({workFileFlag,work1VerNum[2:0],work1HorNum},readdata);
Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + 2;
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3);
if(work1VerNum < workVerNumMax)
work1VerNum <= work1VerNum+1;
else
begin
work1VerNum <= 0;
offset = offset + (xfracl==3 ? 1 : 0);
if(work1HorNum<workHorNumMax && !(offset==1 || (xfracl==3 && offset==2)))
work1HorNum <= workHorNumMax;
else
begin
work1HorNum <= 0;
work1Stage <= 0;
work1Done <= True;
end
end
end
end
work1Vector8 <= work1Vector8Next;
$display( "Trace interpolator: work1Luma %h %h %h %h %h %h", xfracl, yfracl, work1HorNum, work1VerNum, offset, work1Stage);
endrule
 
 
rule work2Luma ( reqregWork2 matches tagged Valid .vdata &&& vdata matches tagged IPWLuma .reqdata &&& !work2Done &&& !work8x8Done );
let xfracl = reqdata.xFracL;
let yfracl = reqdata.yFracL;
let offset = reqdata.offset;
let blockT = reqdata.bt;
Vector#(20,Bit#(8)) work2Vector8Next = work2Vector8;
Vector#(20,Bit#(15)) work2Vector15Next = work2Vector15;
Vector#(16,Bit#(1)) resultReadyNext = resultReady;
Vector#(4,Bit#(8)) tempResult8 = replicate(0);
Vector#(4,Bit#(15)) readdata = replicate(0);
if(yfracl==0)
begin
readdata = workFile.sub({(1-workFileFlag),1'b0,work2VerNum[1],work2HorNum,work2VerNum[0]});
for(Integer ii=0; ii<4; ii=ii+1)
tempResult8[ii] = (readdata[ii])[12:5];
resultFile.upd({work2VerNum[1],work2HorNum,work2VerNum[0]},tempResult8);
resultReadyNext[{work2VerNum[1],work2HorNum,work2VerNum[0]}] = 1;
work2HorNum <= work2HorNum+1;
if(work2HorNum == 3)
begin
if(work2VerNum == 3)
begin
work2VerNum <= 0;
work2Done <= True;
if(((blockT==IP4x8 || blockT==IP8x4) && work2SubMbPart==0) || (blockT==IP4x4 && work2SubMbPart<3))
work2SubMbPart <= work2SubMbPart+1;
else
begin
work2SubMbPart <= 0;
work8x8Done <= True;
end
end
else
work2VerNum <= work2VerNum+1;
end
end
else if(xfracl==0 || xfracl==2)//vertical interpolation
begin
readdata = workFile.sub({(1-workFileFlag),work2VerNum,work2HorNum[0]});
for(Integer ii=0; ii<4; ii=ii+1)
begin
tempResult8[ii] = interpolate15to8(work2Vector15[ii],work2Vector15[ii+4],work2Vector15[ii+8],work2Vector15[ii+12],work2Vector15[ii+16],readdata[ii]);
if(yfracl == 1)
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((work2Vector15[ii+8]+16)>>5))} + 1) >> 1);
else if(yfracl == 3)
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((work2Vector15[ii+12]+16)>>5))} + 1) >> 1);
end
for(Integer ii=0; ii<16; ii=ii+1)
work2Vector15Next[ii] = work2Vector15[ii+4];
for(Integer ii=0; ii<4; ii=ii+1)
work2Vector15Next[ii+16] = readdata[ii];
Bit#(2) workHorNumMax = 1;
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3) + 5;
if(work2VerNum > 4)
begin
Bit#(1) horAddr = truncate(work2HorNum);
Bit#(3) verAddr = truncate(work2VerNum-5);
horAddr = horAddr + ((blockT==IP4x8&&work2SubMbPart==1)||(blockT==IP4x4&&work2SubMbPart[0]==1) ? 1 : 0);
verAddr = verAddr + ((blockT==IP8x4&&work2SubMbPart==1)||(blockT==IP4x4&&work2SubMbPart[1]==1) ? 4 : 0);
resultFile.upd({verAddr,horAddr},tempResult8);
resultReadyNext[{verAddr,horAddr}] = 1;
end
if(work2VerNum < workVerNumMax)
work2VerNum <= work2VerNum+1;
else
begin
work2VerNum <= 0;
if(work2HorNum < workHorNumMax)
work2HorNum <= work2HorNum+1;
else
begin
work2HorNum <= 0;
work2Done <= True;
if(((blockT==IP4x8 || blockT==IP8x4) && work2SubMbPart==0) || (blockT==IP4x4 && work2SubMbPart<3))
work2SubMbPart <= work2SubMbPart+1;
else
begin
work2SubMbPart <= 0;
work8x8Done <= True;
end
end
end
end
else//horizontal interpolation
begin
offset = offset-2;
if(yfracl == 2)
begin
readdata = workFile.sub({(1-workFileFlag),work2VerNum[2:0],work2HorNum});
for(Integer ii=0; ii<8; ii=ii+1)
work2Vector15Next[ii] = work2Vector15[ii+4];
for(Integer ii=0; ii<4; ii=ii+1)
begin
Bit#(4) tempIndex = fromInteger(ii) + 8 - zeroExtend(offset);
work2Vector15Next[tempIndex] = readdata[ii];
end
for(Integer ii=0; ii<4; ii=ii+1)
begin
tempResult8[ii] = interpolate15to8(work2Vector15Next[ii],work2Vector15Next[ii+1],work2Vector15Next[ii+2],work2Vector15Next[ii+3],work2Vector15Next[ii+4],work2Vector15Next[ii+5]);
if(xfracl == 1)
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((work2Vector15Next[ii+2]+16)>>5))} + 1) >> 1);
else if(xfracl == 3)
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((work2Vector15Next[ii+3]+16)>>5))} + 1) >> 1);
end
end
else
begin
Vector#(4,Bit#(8)) readdata8 = storeFile.sub({(1-workFileFlag),work2VerNum[2:0],work2HorNum});
for(Integer ii=0; ii<8; ii=ii+1)
work2Vector8Next[ii] = work2Vector8[ii+4];
for(Integer ii=0; ii<4; ii=ii+1)
begin
Bit#(4) tempIndex = fromInteger(ii) + 8 - zeroExtend(offset);
work2Vector8Next[tempIndex] = readdata8[ii];
end
Vector#(4,Bit#(15)) tempResult15 = replicate(0);
for(Integer ii=0; ii<4; ii=ii+1)
begin
tempResult15[ii] = interpolate8to15(work2Vector8Next[ii],work2Vector8Next[ii+1],work2Vector8Next[ii+2],work2Vector8Next[ii+3],work2Vector8Next[ii+4],work2Vector8Next[ii+5]);
tempResult8[ii] = clip1y10to8(truncate((tempResult15[ii]+16)>>5));
end
Bit#(2) verOffset;
Vector#(4,Bit#(15)) verResult15 = replicate(0);
if(xfracl == 1)
verOffset = reqdata.offset;
else
verOffset = reqdata.offset+1;
readdata = workFile.sub({(1-workFileFlag),work2VerNum[2:0],(work2HorNum-2+(verOffset==0?0:1))});
for(Integer ii=0; ii<4; ii=ii+1)
begin
Bit#(2) offsetplusii = verOffset+fromInteger(ii);
if(verOffset <= 3-fromInteger(ii) && verOffset!=0)
verResult15[ii] = work2Vector15[offsetplusii];
else
verResult15[ii] = readdata[offsetplusii];
work2Vector15Next[ii] = readdata[ii];
end
for(Integer ii=0; ii<4; ii=ii+1)
begin
Bit#(9) tempVal = zeroExtend(clip1y10to8(truncate((verResult15[ii]+16)>>5)));
tempResult8[ii] = truncate((tempVal+zeroExtend(tempResult8[ii])+1)>>1);
end
end
if(work2HorNum >= 2)
begin
Bit#(1) horAddr = truncate(work2HorNum-2);
Bit#(3) verAddr = truncate(work2VerNum);
horAddr = horAddr + ((blockT==IP4x8&&work2SubMbPart==1)||(blockT==IP4x4&&work2SubMbPart[0]==1) ? 1 : 0);
verAddr = verAddr + ((blockT==IP8x4&&work2SubMbPart==1)||(blockT==IP4x4&&work2SubMbPart[1]==1) ? 4 : 0);
resultFile.upd({verAddr,horAddr},tempResult8);
resultReadyNext[{verAddr,horAddr}] = 1;
//$display( "Trace interpolator: workLuma stage 1 result %h %h %h %h %h %h %h %h", workHorNum, workVerNum, {verAddr,horAddr}, tempResult8[3], tempResult8[2], tempResult8[1], tempResult8[0], pack(resultReadyNext));
end
Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + 2;
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3);
if(work2HorNum < workHorNumMax)
work2HorNum <= work2HorNum+1;
else
begin
work2HorNum <= 0;
if(work2VerNum < workVerNumMax)
work2VerNum <= work2VerNum+1;
else
begin
work2VerNum <= 0;
work2Done <= True;
if(((blockT==IP4x8 || blockT==IP8x4) && work2SubMbPart==0) || (blockT==IP4x4 && work2SubMbPart<3))
work2SubMbPart <= work2SubMbPart+1;
else
begin
work2SubMbPart <= 0;
work8x8Done <= True;
end
end
end
end
work2Vector8 <= work2Vector8Next;
work2Vector15 <= work2Vector15Next;
resultReady <= resultReadyNext;
$display( "Trace interpolator: work2Luma %h %h %h %h %h", xfracl, yfracl, work2HorNum, work2VerNum, offset);
endrule
 
 
rule work1Chroma ( reqfifoWork1.first() matches tagged IPWChroma .reqdata &&& !work1Done );
Bit#(4) xfracc = zeroExtend(reqdata.xFracC);
Bit#(4) yfracc = zeroExtend(reqdata.yFracC);
let offset = reqdata.offset;
let blockT = reqdata.bt;
Vector#(20,Bit#(8)) work1Vector8Next = work1Vector8;
if(memRespQ.first() matches tagged IPLoadResp .tempreaddata)
begin
memRespQ.deq();
Vector#(4,Bit#(8)) readdata = replicate(0);
readdata[0] = tempreaddata[7:0];
readdata[1] = tempreaddata[15:8];
readdata[2] = tempreaddata[23:16];
readdata[3] = tempreaddata[31:24];
Vector#(5,Bit#(8)) tempWork8 = replicate(0);
Vector#(5,Bit#(8)) tempPrev8 = replicate(0);
Vector#(4,Bit#(8)) tempResult8 = replicate(0);
Bool resultReadyFlag = False;
for(Integer ii=0; ii<4; ii=ii+1)
begin
Bit#(2) offsetplusii = offset+fromInteger(ii);
if(offset <= 3-fromInteger(ii) && !((blockT==IP4x8||blockT==IP4x4)&&(offset[1]==0||(xfracc==0&&offset!=3))) && !(xfracc==0&&offset==0))
tempWork8[ii] = work1Vector8[offsetplusii];
else
tempWork8[ii] = readdata[offsetplusii];
work1Vector8Next[ii] = readdata[ii];
end
tempWork8[4] = readdata[offset];
if((blockT==IP16x8 || blockT==IP16x16) && work1HorNum==(xfracc==0&&offset==0 ? 1 : 2))
begin
for(Integer ii=0; ii<5; ii=ii+1)
begin
tempPrev8[ii] = work1Vector8[ii+9];
work1Vector8Next[ii+9] = tempWork8[ii];
end
end
else
begin
for(Integer ii=0; ii<5; ii=ii+1)
tempPrev8[ii] = work1Vector8[ii+4];
if(work1HorNum==(xfracc==0&&offset==0 ? 0 : 1) || ((blockT==IP4x8||blockT==IP4x4)&&(offset[1]==0||(xfracc==0&&offset!=3))))
begin
for(Integer ii=0; ii<5; ii=ii+1)
work1Vector8Next[ii+4] = tempWork8[ii];
end
end
if(yfracc==0)
begin
for(Integer ii=0; ii<5; ii=ii+1)
tempPrev8[ii] = tempWork8[ii];
end
for(Integer ii=0; ii<4; ii=ii+1)
begin
Bit#(14) tempVal = zeroExtend((8-xfracc))*zeroExtend((8-yfracc))*zeroExtend(tempPrev8[ii]);
tempVal = tempVal + zeroExtend(xfracc)*zeroExtend((8-yfracc))*zeroExtend(tempPrev8[ii+1]);
tempVal = tempVal + zeroExtend((8-xfracc))*zeroExtend(yfracc)*zeroExtend(tempWork8[ii]);
tempVal = tempVal + zeroExtend(xfracc)*zeroExtend(yfracc)*zeroExtend(tempWork8[ii+1]);
tempResult8[ii] = truncate((tempVal+32)>>6);
end
if(work1VerNum > 0 || yfracc==0)
begin
if(blockT==IP4x8 || blockT==IP4x4)
begin
Bit#(5) tempIndex = 10 + zeroExtend(work1VerNum<<1);
work1Vector8Next[tempIndex] = tempResult8[0];
work1Vector8Next[tempIndex+1] = tempResult8[1];
tempResult8[2] = tempResult8[0];
tempResult8[3] = tempResult8[1];
tempResult8[0] = work1Vector8[tempIndex];
tempResult8[1] = work1Vector8[tempIndex+1];
if((work1HorNum>0 || offset[1]==0) && work1SubMbPart[0]==1)
resultReadyFlag = True;
end
else
begin
if(work1HorNum>0 || (xfracc==0 && offset==0))
resultReadyFlag = True;
end
end
if(resultReadyFlag)
begin
Bit#(1) horAddr = ((blockT==IP4x8 || blockT==IP4x4) ? 0 : truncate(((xfracc==0 && offset==0) ? work1HorNum : work1HorNum-1)));
Bit#(3) verAddr = truncate((yfracc==0 ? work1VerNum : work1VerNum-1));
horAddr = horAddr + ((blockT==IP16x8||blockT==IP16x16) ? 0 : work1MbPart[0]);
verAddr = verAddr + ((blockT==IP8x16||blockT==IP16x16) ? 0 : ((blockT==IP16x8) ? {work1MbPart[0],2'b00} : {work1MbPart[1],2'b00}));
verAddr = verAddr + ((blockT==IP8x4&&work1SubMbPart==1)||(blockT==IP4x4&&work1SubMbPart[1]==1) ? 2 : 0);
storeFile.upd({workFileFlag,1'b0,verAddr,horAddr},tempResult8);
end
Bit#(2) workHorNumMax = (blockT==IP4x8||blockT==IP4x4 ? (offset[1]==0||(xfracc==0&&offset!=3) ? 0 : 1) : ((blockT==IP16x16||blockT==IP16x8 ? 1 : 0) + (xfracc==0&&offset==0 ? 0 : 1)));
Bit#(4) workVerNumMax = (blockT==IP16x16||blockT==IP8x16 ? 7 : (blockT==IP16x8||blockT==IP8x8||blockT==IP4x8 ? 3 : 1)) + (yfracc==0 ? 0 : 1);
if(work1HorNum < workHorNumMax)
work1HorNum <= work1HorNum+1;
else
begin
work1HorNum <= 0;
if(work1VerNum < workVerNumMax)
work1VerNum <= work1VerNum+1;
else
begin
Bool allDone = False;
work1VerNum <= 0;
if(((blockT==IP4x8 || blockT==IP8x4) && work1SubMbPart==0) || (blockT==IP4x4 && work1SubMbPart<3))
work1SubMbPart <= work1SubMbPart+1;
else
begin
work1SubMbPart <= 0;
if(((blockT==IP16x8 || blockT==IP8x16) && work1MbPart==0) || (!(blockT==IP16x8 || blockT==IP8x16 || blockT==IP16x16) && work1MbPart<3))
work1MbPart <= work1MbPart+1;
else
begin
work1MbPart <= 0;
work1Done <= True;
allDone = True;
end
end
if(!allDone)
reqfifoWork1.deq();
end
end
end
work1Vector8 <= work1Vector8Next;
$display( "Trace interpolator: work1Chroma %h %h %h %h %h", xfracc, yfracc, work1HorNum, work1VerNum, offset);
endrule
 
 
rule work2Chroma ( reqregWork2 matches tagged Valid .vdata &&& vdata matches tagged IPWChroma .reqdata &&& !work2Done &&& !work8x8Done );
Vector#(16,Bit#(1)) resultReadyNext = resultReady;
resultFile.upd({work2VerNum[1],work2HorNum,work2VerNum[0]},storeFile.sub({(1-workFileFlag),1'b0,work2VerNum[1],work2HorNum,work2VerNum[0]}));
resultReadyNext[{work2VerNum[1],work2HorNum,work2VerNum[0]}] = 1;
work2HorNum <= work2HorNum+1;
if(work2HorNum == 3)
begin
if(work2VerNum == 3)
begin
work2VerNum <= 0;
work2Done <= True;
work8x8Done <= True;
end
else
work2VerNum <= work2VerNum+1;
end
resultReady <= resultReadyNext;
$display( "Trace interpolator: work2Chroma %h %h", work2HorNum, work2VerNum);
endrule
 
 
rule outputing( !outDone && resultReady[{outBlockNum[1],outPixelNum,outBlockNum[0]}]==1 );
outfifo.enq(resultFile.sub({outBlockNum[1],outPixelNum,outBlockNum[0]}));
outPixelNum <= outPixelNum+1;
if(outPixelNum == 3)
begin
outBlockNum <= outBlockNum+1;
if(outBlockNum == 3)
outDone <= True;
end
$display( "Trace interpolator: outputing %h %h", outBlockNum, outPixelNum);
endrule
 
 
rule switching( work1Done && (work2Done || reqregWork2==Invalid) && !work8x8Done);
work1Done <= False;
work2Done <= False;
reqregWork2 <= (Valid reqfifoWork1.first());
workFileFlag <= 1-workFileFlag;
reqfifoWork1.deq();
$display( "Trace interpolator: switching %h %h", outBlockNum, outPixelNum);
endrule
 
rule switching8x8( work1Done && (work2Done || reqregWork2==Invalid) && work8x8Done && outDone);
outDone <= False;
work8x8Done <= False;
resultReady <= replicate(0);
work1Done <= False;
work2Done <= False;
reqregWork2 <= (Valid reqfifoWork1.first());
workFileFlag <= 1-workFileFlag;
reqfifoWork1.deq();
$display( "Trace interpolator: switching8x8 %h %h", outBlockNum, outPixelNum);
endrule
 
 
 
method Action setPicWidth( Bit#(PicWidthSz) newPicWidth );
picWidth <= newPicWidth;
endmethod
method Action setPicHeight( Bit#(PicHeightSz) newPicHeight );
picHeight <= newPicHeight;
endmethod
method Action request( InterpolatorIT inputdata );
reqfifoLoad.enq(inputdata);
if(inputdata matches tagged IPLuma .indata)
reqfifoWork1.enq(IPWLuma {xFracL:indata.mvhor[1:0],yFracL:indata.mvver[1:0],offset:indata.mvhor[3:2],bt:indata.bt});
else if(inputdata matches tagged IPChroma .indata)
reqfifoWork1.enq(IPWChroma {xFracC:indata.mvhor[2:0],yFracC:indata.mvver[2:0],offset:indata.mvhor[4:3]+{indata.hor[0],1'b0},bt:indata.bt});
endmethod
 
method Vector#(4,Bit#(8)) first();
return outfifo.first();
endmethod
method Action deq();
outfifo.deq();
endmethod
method Action endOfFrame();
endOfFrameFlag <= True;
endmethod
interface Client mem_client;
interface Get request = fifoToGet(memReqQ);
interface Put response = fifoToPut(memRespQ);
endinterface
 
 
endmodule
 
 
endpackage
/trunk/src/mkCalc_nC.bsv
0,0 → 1,313
//**********************************************************************
// nC Calculator implementation
//----------------------------------------------------------------------
//
//
 
package mkCalc_nC;
 
import H264Types::*;
import ICalc_nC::*;
import FIFO::*;
 
import Connectable::*;
import GetPut::*;
import ClientServer::*;
 
 
(* synthesize *)
module mkCalc_nC( Calc_nC );
 
Reg#(Bit#(PicWidthSz)) picWidth <- mkReg(maxPicWidthInMB);
Reg#(Bit#(PicAreaSz)) firstMb <- mkReg(0);
Reg#(Bit#(PicAreaSz)) currMb <- mkReg(0);
Reg#(Bit#(PicAreaSz)) currMbHor <- mkReg(0);//horizontal position of currMb
Reg#(Bit#(1)) waiting <- mkReg(0);
Reg#(Bit#(1)) reqCount <- mkReg(0);
Reg#(Bit#(2)) respCount <- mkReg(0);
Reg#(Bit#(1)) ipcmCount <- mkReg(0);
Reg#(Bit#(PicAreaSz)) pskipCount <- mkReg(0);
Reg#(Bit#(20)) leftVal <- mkReg(0);
Reg#(Bit#(20)) topVal <- mkReg(0);
Reg#(Bit#(10)) leftValChroma0 <- mkReg(0);
Reg#(Bit#(10)) topValChroma0 <- mkReg(0);
Reg#(Bit#(10)) leftValChroma1 <- mkReg(0);
Reg#(Bit#(10)) topValChroma1 <- mkReg(0);
FIFO#(MemReq#(TAdd#(PicWidthSz,1),20)) memReqQ <- mkFIFO;
FIFO#(MemResp#(20)) memRespQ <- mkFIFO;
Bit#(1) bit1 = 1;
Bit#(1) bit0 = 0;
rule currMbHorUpdate( !(currMbHor<zeroExtend(picWidth)) );
Bit#(PicAreaSz) temp = zeroExtend(picWidth);
if((currMbHor >> 3) >= temp)
currMbHor <= currMbHor - (temp << 3);
else
currMbHor <= currMbHor - temp;
endrule
 
rule sendReq ( waiting == 1 && reqCount > 0 );
Bit#(PicWidthSz) temp2 = truncate(currMbHor);
Bit#(TAdd#(PicWidthSz,1)) temp = {bit1,temp2};
memReqQ.enq(LoadReq temp );
reqCount <= reqCount-1;
endrule
 
rule receiveResp ( waiting == 1 &&& respCount > 0 &&& memRespQ.first() matches tagged LoadResp .data );
if( respCount == 2 )
topVal <= data;
else
begin
topValChroma0 <= data[9:0];
topValChroma1 <= data[19:10];
waiting <= 0;
end
memRespQ.deq();
respCount <= respCount - 1;
endrule
 
rule ipcmReq ( waiting == 1 && ipcmCount > 0 );
currMb <= currMb+1;
currMbHor <= currMbHor+1;
Bit#(PicWidthSz) temp2 = truncate(currMbHor);
Bit#(TAdd#(PicWidthSz,1)) temp = {bit1,temp2};
memReqQ.enq(StoreReq {addr:temp,data:20'b10000100001000010000} );
ipcmCount <= 0;
waiting <= 0;
endrule
 
rule pskipReq ( waiting == 1 && pskipCount > 0 && currMbHor<zeroExtend(picWidth) );
if(pskipCount[0] == 1)
begin
currMb <= currMb+1;
currMbHor <= currMbHor+1;
Bit#(PicWidthSz) temp2 = truncate(currMbHor);
Bit#(TAdd#(PicWidthSz,1)) temp = {bit1,temp2};
memReqQ.enq(StoreReq {addr:temp,data:20'b00000000000000000000} );
if(pskipCount == 1)
waiting <= 0;
end
else
begin
Bit#(PicWidthSz) temp2 = truncate(currMbHor);
Bit#(TAdd#(PicWidthSz,1)) temp = {bit0,temp2};
memReqQ.enq(StoreReq {addr:temp,data:20'b00000000000000000000} );
end
pskipCount <= pskipCount - 1;
endrule
 
method Action initialize_picWidth( Bit#(PicWidthSz) picWidthInMb ) if( waiting == 0 && currMbHor<zeroExtend(picWidth) );
picWidth <= picWidthInMb;
endmethod
method Action initialize( Bit#(PicAreaSz) firstMbAddr ) if( waiting == 0 && currMbHor<zeroExtend(picWidth) );
firstMb <= firstMbAddr;
currMb <= firstMbAddr;
currMbHor <= firstMbAddr;
leftVal <= 0;
leftValChroma0 <= 0;
leftValChroma1 <= 0;
endmethod
 
method Action loadMb( Bit#(PicAreaSz) mbAddr ) if( waiting == 0 && currMbHor<zeroExtend(picWidth) );
if( mbAddr != currMb )
$display( "ERROR EntropyDec: mkCalc_nC loadMb wrong mbAddr" );
else
begin
if( currMbHor == 0 || currMb == firstMb)
begin
leftVal <= 20'b11111111111111111111;
leftValChroma0 <= 10'b1111111111;
leftValChroma1 <= 10'b1111111111;
end
if( currMb-firstMb < zeroExtend(picWidth) )
begin
topVal <= 20'b11111111111111111111;
topValChroma0 <= 10'b1111111111;
topValChroma1 <= 10'b1111111111;
end
else
begin
waiting <= 1;
reqCount <= 1;
respCount <= 2;
Bit#(PicWidthSz) temp2 = truncate(currMbHor);
Bit#(TAdd#(PicWidthSz,1)) temp = {bit0,temp2};
memReqQ.enq(LoadReq temp );
//$display( "ERROR EntropyDec: mkCalc_nC loadMb incomplete" );
end
end
endmethod
 
method Bit#(5) nCcalc_luma( Bit#(4) microBlockNum ) if( waiting == 0 && currMbHor<zeroExtend(picWidth) );
Bit#(6) templeft = 0;
Bit#(6) temptop = 0;
if(microBlockNum[3]==0 && microBlockNum[1]==0)
templeft = zeroExtend(leftVal[4:0]);
else if(microBlockNum[3]==0 && microBlockNum[1]==1)
templeft = zeroExtend(leftVal[9:5]);
else if(microBlockNum[3]==1 && microBlockNum[1]==0)
templeft = zeroExtend(leftVal[14:10]);
else
templeft = zeroExtend(leftVal[19:15]);
if(microBlockNum[2]==0 && microBlockNum[0]==0)
temptop = zeroExtend(topVal[4:0]);
else if(microBlockNum[2]==0 && microBlockNum[0]==1)
temptop = zeroExtend(topVal[9:5]);
else if(microBlockNum[2]==1 && microBlockNum[0]==0)
temptop = zeroExtend(topVal[14:10]);
else
temptop = zeroExtend(topVal[19:15]);
if(temptop!=6'b011111 && templeft!=6'b011111)
return truncate((temptop+templeft+1) >> 1);
else if(templeft!=6'b011111)
return truncate(templeft);
else if(temptop!=6'b011111)
return truncate(temptop);
else
return 0;
endmethod
 
method Bit#(5) nCcalc_chroma( Bit#(3) microBlockNum ) if( waiting == 0 && currMbHor<zeroExtend(picWidth) );
Bit#(6) templeft = 0;
Bit#(6) temptop = 0;
if(microBlockNum[2]==0)
begin
if(microBlockNum[1]==0)
templeft = zeroExtend(leftValChroma0[4:0]);
else
templeft = zeroExtend(leftValChroma0[9:5]);
if(microBlockNum[0]==0)
temptop = zeroExtend(topValChroma0[4:0]);
else
temptop = zeroExtend(topValChroma0[9:5]);
end
else
begin
if(microBlockNum[1]==0)
templeft = zeroExtend(leftValChroma1[4:0]);
else
templeft = zeroExtend(leftValChroma1[9:5]);
if(microBlockNum[0]==0)
temptop = zeroExtend(topValChroma1[4:0]);
else
temptop = zeroExtend(topValChroma1[9:5]);
end
if(temptop!=6'b011111 && templeft!=6'b011111)
return truncate((temptop+templeft+1) >> 1);
else if(templeft!=6'b011111)
return truncate(templeft);
else if(temptop!=6'b011111)
return truncate(temptop);
else
return 0;
endmethod
 
method Action nNupdate_luma( Bit#(4) microBlockNum, Bit#(5) totalCoeff ) if( waiting == 0 && currMbHor<zeroExtend(picWidth) );
Bit#(20) topValTemp = topVal;
if(microBlockNum[3]==0 && microBlockNum[1]==0)
leftVal <= {leftVal[19:5] , totalCoeff};
else if(microBlockNum[3]==0 && microBlockNum[1]==1)
leftVal <= {{leftVal[19:10] , totalCoeff} , leftVal[4:0]};
else if(microBlockNum[3]==1 && microBlockNum[1]==0)
leftVal <= {{leftVal[19:15] , totalCoeff} , leftVal[9:0]};
else
leftVal <= {totalCoeff , leftVal[14:0]};
if(microBlockNum[2]==0 && microBlockNum[0]==0)
topValTemp = {topVal[19:5] , totalCoeff};
else if(microBlockNum[2]==0 && microBlockNum[0]==1)
topValTemp = {{topVal[19:10] , totalCoeff} , topVal[4:0]};
else if(microBlockNum[2]==1 && microBlockNum[0]==0)
topValTemp = {{topVal[19:15] , totalCoeff} , topVal[9:0]};
else
topValTemp = {totalCoeff , topVal[14:0]};
topVal <= topValTemp;
if(microBlockNum == 15)
begin
Bit#(PicWidthSz) temp2 = truncate(currMbHor);
Bit#(TAdd#(PicWidthSz,1)) temp = {bit0,temp2};
memReqQ.enq(StoreReq {addr:temp,data:topValTemp} );
end
//$display( "TRACE nNupdate_luma old leftVal %b", leftVal );
//$display( "TRACE nNupdate_luma old topVal %b", topVal );
//$display( "TRACE nNupdate_luma microBlockNum %0d", microBlockNum );
//$display( "TRACE nNupdate_luma totalCoeff %0d", totalCoeff );
endmethod
method Action nNupdate_chroma( Bit#(3) microBlockNum, Bit#(5) totalCoeff ) if( waiting == 0 && currMbHor<zeroExtend(picWidth) );
Bit#(10) topValChroma0Temp = topValChroma0;
Bit#(10) topValChroma1Temp = topValChroma1;
if(microBlockNum[2]==0)
begin
if(microBlockNum[1]==0)
leftValChroma0 <= {leftValChroma0[9:5] , totalCoeff};
else
leftValChroma0 <= {totalCoeff , leftValChroma0[4:0]};
if(microBlockNum[0]==0)
topValChroma0Temp = {topValChroma0[9:5] , totalCoeff};
else
topValChroma0Temp = {totalCoeff , topValChroma0[4:0]};
end
else
begin
if(microBlockNum[1]==0)
leftValChroma1 <= {leftValChroma1[9:5] , totalCoeff};
else
leftValChroma1 <= {totalCoeff , leftValChroma1[4:0]};
if(microBlockNum[0]==0)
topValChroma1Temp = {topValChroma1[9:5] , totalCoeff};
else
topValChroma1Temp = {totalCoeff , topValChroma1[4:0]};
end
topValChroma0 <= topValChroma0Temp;
topValChroma1 <= topValChroma1Temp;
if(microBlockNum == 7)
begin
currMb <= currMb+1;
currMbHor <= currMbHor+1;
Bit#(PicWidthSz) temp2 = truncate(currMbHor);
Bit#(TAdd#(PicWidthSz,1)) temp = {bit1,temp2};
memReqQ.enq(StoreReq {addr:temp,data:{topValChroma1Temp,topValChroma0Temp}} );
end
endmethod
 
method Action nNupdate_pskip( Bit#(PicAreaSz) inmb_skip_run ) if( waiting == 0 && currMbHor<zeroExtend(picWidth) );
//$display( "TRACE nNupdate_pskip mb_skip_run = %0d", inmb_skip_run );
 
if(inmb_skip_run > 0)
begin
waiting <= 1;
pskipCount <= (inmb_skip_run << 1)-1;
Bit#(PicWidthSz) temp2 = truncate(currMbHor);
Bit#(TAdd#(PicWidthSz,1)) temp = {bit0,temp2};
memReqQ.enq(StoreReq {addr:temp,data:20'b00000000000000000000} );
leftVal <= 0;
leftValChroma0 <= 10'b0000000000;
leftValChroma1 <= 10'b0000000000;
end
endmethod
method Action nNupdate_ipcm() if( waiting == 0 && currMbHor<zeroExtend(picWidth) );
leftVal <= 20'b10000100001000010000;
leftValChroma0 <= 10'b1000010000;
leftValChroma1 <= 10'b1000010000;
//$display( "TRACE nNupdate_ipcm");
 
waiting <= 1;
ipcmCount <= 1;
Bit#(PicWidthSz) temp2 = truncate(currMbHor);
Bit#(TAdd#(PicWidthSz,1)) temp = {bit0,temp2};
memReqQ.enq(StoreReq {addr:temp,data:20'b10000100001000010000} );
endmethod
 
interface Client mem_client;
interface Get request = fifoToGet(memReqQ);
interface Put response = fifoToPut(memRespQ);
endinterface
 
 
endmodule
 
 
 
endpackage
/trunk/src/mkInterpolator_4stage_16ready.bsv
0,0 → 1,843
//**********************************************************************
// interpolator implementation
//----------------------------------------------------------------------
//
//
 
package mkInterpolator;
 
import H264Types::*;
import IInterpolator::*;
import FIFO::*;
import Vector::*;
 
import Connectable::*;
import GetPut::*;
import ClientServer::*;
 
 
//-----------------------------------------------------------
// Local Datatypes
//-----------------------------------------------------------
 
typedef union tagged
{
struct { Bit#(2) xFracL; Bit#(2) yFracL; Bit#(2) offset; IPBlockType bt; } IPWLuma;
struct { Bit#(3) xFracC; Bit#(3) yFracC; Bit#(2) offset; IPBlockType bt; } IPWChroma;
}
InterpolatorWT deriving(Eq,Bits);
 
 
//-----------------------------------------------------------
// Helper functions
 
function Bit#(8) clip1y10to8( Bit#(10) innum );
if(innum[9] == 1)
return 0;
else if(innum[8] == 1)
return 255;
else
return truncate(innum);
endfunction
 
function Bit#(15) interpolate8to15( Bit#(8) in0, Bit#(8) in1, Bit#(8) in2, Bit#(8) in3, Bit#(8) in4, Bit#(8) in5 );
return zeroExtend(in0) - 5*zeroExtend(in1) + 20*zeroExtend(in2) + 20*zeroExtend(in3) - 5*zeroExtend(in4) + zeroExtend(in5);
endfunction
 
function Bit#(8) interpolate15to8( Bit#(15) in0, Bit#(15) in1, Bit#(15) in2, Bit#(15) in3, Bit#(15) in4, Bit#(15) in5 );
Bit#(20) temp = signExtend(in0) - 5*signExtend(in1) + 20*signExtend(in2) + 20*signExtend(in3) - 5*signExtend(in4) + signExtend(in5) + 512;
return clip1y10to8(truncate(temp>>10));
endfunction
 
 
 
//-----------------------------------------------------------
// Interpolation Module
//-----------------------------------------------------------
 
 
(* synthesize *)
module mkInterpolator( Interpolator );
FIFO#(InterpolatorIT) reqfifoLoad <- mkSizedFIFO(interpolator_reqfifoLoad_size);
FIFO#(InterpolatorWT) reqfifoWork1 <- mkSizedFIFO(interpolator_reqfifoWork_size);
Reg#(Maybe#(InterpolatorWT)) reqregWork2 <- mkReg(Invalid);
FIFO#(Vector#(4,Bit#(8))) outfifo <- mkFIFO;
Reg#(Bool) endOfFrameFlag <- mkReg(False);
FIFO#(InterpolatorLoadReq) memReqQ <- mkFIFO;
FIFO#(InterpolatorLoadResp) memRespQ <- mkSizedFIFO(interpolator_memRespQ_size);
 
Reg#(Bit#(PicWidthSz)) picWidth <- mkReg(maxPicWidthInMB);
Reg#(Bit#(PicHeightSz)) picHeight <- mkReg(0);
 
RFile1#(Bit#(6),Vector#(4,Bit#(15))) workFile <- mkRFile1Full();
RFile1#(Bit#(6),Vector#(4,Bit#(8))) storeFile <- mkRFile1Full();
Reg#(Bit#(1)) workFileFlag <- mkReg(0);
RFile1#(Bit#(4),Vector#(4,Bit#(8))) resultFile <- mkRFile1Full();
 
Reg#(Bit#(1)) loadStage <- mkReg(0);
Reg#(Bit#(2)) loadHorNum <- mkReg(0);
Reg#(Bit#(4)) loadVerNum <- mkReg(0);
 
Reg#(Bit#(2)) work1MbPart <- mkReg(0);//only for Chroma
Reg#(Bit#(2)) work1SubMbPart <- mkReg(0);//only for Chroma
Reg#(Bit#(1)) work1Stage <- mkReg(0);
Reg#(Bit#(2)) work1HorNum <- mkReg(0);
Reg#(Bit#(4)) work1VerNum <- mkReg(0);
Reg#(Vector#(20,Bit#(8))) work1Vector8 <- mkRegU;
Reg#(Bool) work1Done <- mkReg(False);
 
Reg#(Bit#(2)) work2SubMbPart <- mkReg(0);
Reg#(Bit#(2)) work2HorNum <- mkReg(0);
Reg#(Bit#(4)) work2VerNum <- mkReg(0);
Reg#(Vector#(20,Bit#(8))) work2Vector8 <- mkRegU;
Reg#(Vector#(20,Bit#(15))) work2Vector15 <- mkRegU;
Reg#(Vector#(16,Bit#(1))) resultReady <- mkReg(replicate(0));
Reg#(Bool) work2Done <- mkReg(False);
Reg#(Bool) work8x8Done <- mkReg(False);
 
Reg#(Bit#(2)) outBlockNum <- mkReg(0);
Reg#(Bit#(2)) outPixelNum <- mkReg(0);
Reg#(Bool) outDone <- mkReg(False);
 
rule sendEndOfFrameReq( endOfFrameFlag );
endOfFrameFlag <= False;
memReqQ.enq(IPLoadEndFrame);
endrule
rule loadLuma( reqfifoLoad.first() matches tagged IPLuma .reqdata &&& !endOfFrameFlag );
Bit#(2) xfracl = reqdata.mvhor[1:0];
Bit#(2) yfracl = reqdata.mvver[1:0];
Bit#(2) offset = reqdata.mvhor[3:2];
Bool twoStage = (xfracl==1||xfracl==3) && (yfracl==1||yfracl==3);
Bool horInter = (twoStage ? loadStage==1 : xfracl!=0);
Bool verInter = (twoStage ? loadStage==0 : yfracl!=0);
Bit#(2) offset2 = reqdata.mvhor[3:2] + ((twoStage&&verInter&&xfracl==3) ? 1 : 0);
Bit#(1) horOut = 0;
Bit#(TAdd#(PicWidthSz,2)) horAddr;
Bit#(TAdd#(PicHeightSz,4)) verAddr;
Bit#(TAdd#(PicWidthSz,12)) horTemp = zeroExtend({reqdata.hor,2'b00}) + zeroExtend({loadHorNum,2'b00}) + (xfracl==3&&(yfracl==1||yfracl==3)&&loadStage==0 ? 1 : 0);
Bit#(TAdd#(PicHeightSz,10)) verTemp = zeroExtend(reqdata.ver) + zeroExtend(loadVerNum) + (yfracl==3&&(xfracl==1||xfracl==3)&&loadStage==1 ? 1 : 0);
Bit#(13) mvhortemp = signExtend(reqdata.mvhor[13:2])-(horInter?2:0);
Bit#(11) mvvertemp = signExtend(reqdata.mvver[11:2])-(verInter?2:0);
if(mvhortemp[12]==1 && zeroExtend(0-mvhortemp)>horTemp)
begin
horAddr = 0;
horOut = 1;
end
else
begin
horTemp = horTemp + signExtend(mvhortemp);
if(horTemp>=zeroExtend({picWidth,4'b0000}))
begin
horAddr = {picWidth-1,2'b11};
horOut = 1;
end
else
horAddr = truncate(horTemp>>2);
end
if(mvvertemp[10]==1 && zeroExtend(0-mvvertemp)>verTemp)
verAddr = 0;
else
begin
verTemp = verTemp + signExtend(mvvertemp);
if(verTemp>=zeroExtend({picHeight,4'b0000}))
verAddr = {picHeight-1,4'b1111};
else
verAddr = truncate(verTemp);
end
memReqQ.enq(IPLoadLuma {refIdx:reqdata.refIdx,horOutOfBounds:horOut,hor:horAddr,ver:verAddr});
Bool verFirst = twoStage || (yfracl==2&&(xfracl==1||xfracl==3));
Bit#(2) loadHorNumMax = (reqdata.bt==IP8x8||reqdata.bt==IP8x4 ? 1 : 0) + (horInter ? 2 : (offset2==0 ? 0 : 1));
Bit#(4) loadVerNumMax = (reqdata.bt==IP8x8||reqdata.bt==IP4x8 ? 7 : 3) + (verInter ? 5 : 0);
if(verFirst)
begin
if(loadVerNum < loadVerNumMax)
loadVerNum <= loadVerNum+1;
else
begin
loadVerNum <= 0;
if(loadHorNum < loadHorNumMax)
begin
if(loadStage == 1)
begin
offset = offset + (xfracl==3 ? 1 : 0);
if(!(offset==1 || (xfracl==3 && offset==2)))
loadHorNum <= loadHorNumMax;
else
begin
loadHorNum <= 0;
loadStage <= 0;
reqfifoLoad.deq();
end
end
else
loadHorNum <= loadHorNum+1;
end
else
begin
if(twoStage && loadStage==0)
begin
offset = offset + (xfracl==3 ? 1 : 0);
if((xfracl==3 ? offset<3 : offset<2))
loadHorNum <= 0;
else
loadHorNum <= loadHorNumMax+1;
loadStage <= 1;
end
else
begin
loadHorNum <= 0;
loadStage <= 0;
reqfifoLoad.deq();
end
end
end
end
else
begin
if(loadHorNum < loadHorNumMax)
loadHorNum <= loadHorNum+1;
else
begin
loadHorNum <= 0;
if(loadVerNum < loadVerNumMax)
loadVerNum <= loadVerNum+1;
else
begin
loadVerNum <= 0;
reqfifoLoad.deq();
end
end
end
if(reqdata.bt==IP16x16 || reqdata.bt==IP16x8 || reqdata.bt==IP8x16)
$display( "ERROR Interpolation: loadLuma block sizes > 8x8 not supported");
$display( "Trace interpolator: loadLuma %h %h %h %h %h %h %h", xfracl, yfracl, loadHorNum, loadVerNum, reqdata.refIdx, horAddr, verAddr);
endrule
 
 
rule loadChroma( reqfifoLoad.first() matches tagged IPChroma .reqdata &&& !endOfFrameFlag );
Bit#(3) xfracc = reqdata.mvhor[2:0];
Bit#(3) yfracc = reqdata.mvver[2:0];
Bit#(2) offset = reqdata.mvhor[4:3]+{reqdata.hor[0],1'b0};
Bit#(1) horOut = 0;
Bit#(TAdd#(PicWidthSz,1)) horAddr;
Bit#(TAdd#(PicHeightSz,3)) verAddr;
Bit#(TAdd#(PicWidthSz,11)) horTemp = zeroExtend({reqdata.hor,1'b0}) + zeroExtend({loadHorNum,2'b00});
Bit#(TAdd#(PicHeightSz,9)) verTemp = zeroExtend(reqdata.ver) + zeroExtend(loadVerNum);
if(reqdata.mvhor[13]==1 && zeroExtend(0-reqdata.mvhor[13:3])>horTemp)
begin
horAddr = 0;
horOut = 1;
end
else
begin
horTemp = horTemp + signExtend(reqdata.mvhor[13:3]);
if(horTemp>=zeroExtend({picWidth,3'b000}))
begin
horAddr = {picWidth-1,1'b1};
horOut = 1;
end
else
horAddr = truncate(horTemp>>2);
end
if(reqdata.mvver[11]==1 && zeroExtend(0-reqdata.mvver[11:3])>verTemp)
verAddr = 0;
else
begin
verTemp = verTemp + signExtend(reqdata.mvver[11:3]);
if(verTemp>=zeroExtend({picHeight,3'b000}))
verAddr = {picHeight-1,3'b111};
else
verAddr = truncate(verTemp);
end
memReqQ.enq(IPLoadChroma {refIdx:reqdata.refIdx,uv:reqdata.uv,horOutOfBounds:horOut,hor:horAddr,ver:verAddr});
Bit#(2) loadHorNumMax = (reqdata.bt==IP4x8||reqdata.bt==IP4x4 ? (offset[1]==0||(xfracc==0&&offset!=3) ? 0 : 1) : ((reqdata.bt==IP16x16||reqdata.bt==IP16x8 ? 1 : 0) + (xfracc==0&&offset==0 ? 0 : 1)));
Bit#(4) loadVerNumMax = (reqdata.bt==IP16x16||reqdata.bt==IP8x16 ? 7 : (reqdata.bt==IP16x8||reqdata.bt==IP8x8||reqdata.bt==IP4x8 ? 3 : 1)) + (yfracc==0 ? 0 : 1);
if(loadHorNum < loadHorNumMax)
loadHorNum <= loadHorNum+1;
else
begin
loadHorNum <= 0;
if(loadVerNum < loadVerNumMax)
loadVerNum <= loadVerNum+1;
else
begin
loadVerNum <= 0;
reqfifoLoad.deq();
end
end
$display( "Trace interpolator: loadChroma %h %h %h %h %h %h %h", xfracc, yfracc, loadHorNum, loadVerNum, reqdata.refIdx, horAddr, verAddr);
endrule
 
rule work1Luma ( reqfifoWork1.first() matches tagged IPWLuma .reqdata &&& !work1Done );
let xfracl = reqdata.xFracL;
let yfracl = reqdata.yFracL;
let offset = reqdata.offset;
let blockT = reqdata.bt;
Bool twoStage = (xfracl==1||xfracl==3) && (yfracl==1||yfracl==3);
Vector#(20,Bit#(8)) work1Vector8Next = work1Vector8;
if(memRespQ.first() matches tagged IPLoadResp .tempreaddata)
begin
memRespQ.deq();
Vector#(4,Bit#(8)) readdata = replicate(0);
readdata[0] = tempreaddata[7:0];
readdata[1] = tempreaddata[15:8];
readdata[2] = tempreaddata[23:16];
readdata[3] = tempreaddata[31:24];
//$display( "Trace interpolator: workLuma stage 0 readdata %h %h %h %h %h %h", workHorNum, workVerNum, readdata[3], readdata[2], readdata[1], readdata[0] );
Vector#(4,Bit#(8)) tempResult8 = replicate(0);
Vector#(4,Bit#(15)) tempResult15 = replicate(0);
if(xfracl==0 || yfracl==0 || xfracl==2)
begin
if(xfracl==0)//reorder
begin
for(Integer ii=0; ii<4; ii=ii+1)
begin
Bit#(2) offsetplusii = offset+fromInteger(ii);
if(offset <= 3-fromInteger(ii) && offset!=0)
tempResult8[ii] = work1Vector8[offsetplusii];
else
tempResult8[ii] = readdata[offsetplusii];
work1Vector8Next[ii] = readdata[ii];
end
for(Integer ii=0; ii<4; ii=ii+1)
tempResult15[ii] = zeroExtend({tempResult8[ii],5'b00000});
end
else//horizontal interpolation
begin
offset = offset-2;
for(Integer ii=0; ii<8; ii=ii+1)
work1Vector8Next[ii] = work1Vector8[ii+4];
for(Integer ii=0; ii<4; ii=ii+1)
begin
Bit#(4) tempIndex = fromInteger(ii) + 8 - zeroExtend(offset);
work1Vector8Next[tempIndex] = readdata[ii];
end
for(Integer ii=0; ii<4; ii=ii+1)
begin
tempResult15[ii] = interpolate8to15(work1Vector8Next[ii],work1Vector8Next[ii+1],work1Vector8Next[ii+2],work1Vector8Next[ii+3],work1Vector8Next[ii+4],work1Vector8Next[ii+5]);
tempResult8[ii] = clip1y10to8(truncate((tempResult15[ii]+16)>>5));
if(xfracl == 1)
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,work1Vector8Next[ii+2]} + 1) >> 1);
else if(xfracl == 3)
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,work1Vector8Next[ii+3]} + 1) >> 1);
end
end
Bit#(2) workHorNumOffset = (xfracl!=0 ? 2 : (reqdata.offset==0 ? 0 : 1));
if(work1HorNum >= workHorNumOffset)
begin
Bit#(1) horAddr = truncate(work1HorNum-workHorNumOffset);
if(yfracl == 0)
begin
for(Integer ii=0; ii<4; ii=ii+1)
tempResult15[ii] = zeroExtend({tempResult8[ii],5'b00000});
end
workFile.upd({workFileFlag,work1VerNum,horAddr},tempResult15);
end
Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + workHorNumOffset;
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3) + (yfracl!=0 ? 5 : 0);
if(work1HorNum < workHorNumMax)
work1HorNum <= work1HorNum+1;
else
begin
work1HorNum <= 0;
if(work1VerNum < workVerNumMax)
work1VerNum <= work1VerNum+1;
else
begin
work1VerNum <= 0;
work1Done <= True;
end
end
end
else if(work1Stage == 0)//vertical interpolation
begin
offset = offset + (xfracl==3&&(yfracl==1||yfracl==3) ? 1 : 0);
for(Integer ii=0; ii<4; ii=ii+1)
tempResult15[ii] = interpolate8to15(work1Vector8[ii],work1Vector8[ii+4],work1Vector8[ii+8],work1Vector8[ii+12],work1Vector8[ii+16],readdata[ii]);
for(Integer ii=0; ii<16; ii=ii+1)
work1Vector8Next[ii] = work1Vector8[ii+4];
for(Integer ii=0; ii<4; ii=ii+1)
work1Vector8Next[ii+16] = readdata[ii];
Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + (yfracl==2 ? 2 : (offset==0 ? 0 : 1));
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3) + 5;
Bit#(2) horAddr = work1HorNum;
Bit#(3) verAddr = truncate(work1VerNum-5);
if(work1VerNum > 4)
begin
workFile.upd({workFileFlag,verAddr,horAddr},tempResult15);
//$display( "Trace interpolator: workLuma stage 0 result %h %h %h %h %h %h %h", workHorNum, workVerNum, {verAddr,horAddr}, tempResult15[3], tempResult15[2], tempResult15[1], tempResult15[0]);
end
if(twoStage)
begin
Bit#(2) storeHorAddr = work1HorNum;
Bit#(4) storeVerAddr = work1VerNum;
if((xfracl==3 ? offset<3 : offset<2))
storeHorAddr = storeHorAddr+1;
if(yfracl==3)
storeVerAddr = storeVerAddr-3;
else
storeVerAddr = storeVerAddr-2;
if(storeVerAddr < 8)
storeFile.upd({workFileFlag,storeVerAddr[2:0],storeHorAddr},readdata);
end
if(work1VerNum < workVerNumMax)
work1VerNum <= work1VerNum+1;
else
begin
work1VerNum <= 0;
if(work1HorNum < workHorNumMax)
work1HorNum <= work1HorNum+1;
else
begin
if(twoStage)
begin
work1Stage <= 1;
if((xfracl==3 ? offset<3 : offset<2))
work1HorNum <= 0;
else
work1HorNum <= workHorNumMax+1;
end
else
begin
work1HorNum <= 0;
work1Done <= True;
end
end
end
end
else//second stage of twoStage
begin
storeFile.upd({workFileFlag,work1VerNum[2:0],work1HorNum},readdata);
Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + 2;
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3);
if(work1VerNum < workVerNumMax)
work1VerNum <= work1VerNum+1;
else
begin
work1VerNum <= 0;
offset = offset + (xfracl==3 ? 1 : 0);
if(work1HorNum<workHorNumMax && !(offset==1 || (xfracl==3 && offset==2)))
work1HorNum <= workHorNumMax;
else
begin
work1HorNum <= 0;
work1Stage <= 0;
work1Done <= True;
end
end
end
end
work1Vector8 <= work1Vector8Next;
$display( "Trace interpolator: work1Luma %h %h %h %h %h %h", xfracl, yfracl, work1HorNum, work1VerNum, offset, work1Stage);
endrule
 
 
rule work2Luma ( reqregWork2 matches tagged Valid .vdata &&& vdata matches tagged IPWLuma .reqdata &&& !work2Done &&& !work8x8Done );
let xfracl = reqdata.xFracL;
let yfracl = reqdata.yFracL;
let offset = reqdata.offset;
let blockT = reqdata.bt;
Vector#(20,Bit#(8)) work2Vector8Next = work2Vector8;
Vector#(20,Bit#(15)) work2Vector15Next = work2Vector15;
Vector#(16,Bit#(1)) resultReadyNext = resultReady;
Vector#(4,Bit#(8)) tempResult8 = replicate(0);
Vector#(4,Bit#(15)) readdata = replicate(0);
if(yfracl==0)
begin
readdata = workFile.sub({(1-workFileFlag),1'b0,work2VerNum[1],work2HorNum,work2VerNum[0]});
for(Integer ii=0; ii<4; ii=ii+1)
tempResult8[ii] = (readdata[ii])[12:5];
resultFile.upd({work2VerNum[1],work2HorNum,work2VerNum[0]},tempResult8);
resultReadyNext[{work2VerNum[1],work2HorNum,work2VerNum[0]}] = 1;
work2HorNum <= work2HorNum+1;
if(work2HorNum == 3)
begin
if(work2VerNum == 3)
begin
work2VerNum <= 0;
work2Done <= True;
if(((blockT==IP4x8 || blockT==IP8x4) && work2SubMbPart==0) || (blockT==IP4x4 && work2SubMbPart<3))
work2SubMbPart <= work2SubMbPart+1;
else
begin
work2SubMbPart <= 0;
work8x8Done <= True;
end
end
else
work2VerNum <= work2VerNum+1;
end
end
else if(xfracl==0 || xfracl==2)//vertical interpolation
begin
readdata = workFile.sub({(1-workFileFlag),work2VerNum,work2HorNum[0]});
for(Integer ii=0; ii<4; ii=ii+1)
begin
tempResult8[ii] = interpolate15to8(work2Vector15[ii],work2Vector15[ii+4],work2Vector15[ii+8],work2Vector15[ii+12],work2Vector15[ii+16],readdata[ii]);
if(yfracl == 1)
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((work2Vector15[ii+8]+16)>>5))} + 1) >> 1);
else if(yfracl == 3)
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((work2Vector15[ii+12]+16)>>5))} + 1) >> 1);
end
for(Integer ii=0; ii<16; ii=ii+1)
work2Vector15Next[ii] = work2Vector15[ii+4];
for(Integer ii=0; ii<4; ii=ii+1)
work2Vector15Next[ii+16] = readdata[ii];
Bit#(2) workHorNumMax = 1;
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3) + 5;
if(work2VerNum > 4)
begin
Bit#(1) horAddr = truncate(work2HorNum);
Bit#(3) verAddr = truncate(work2VerNum-5);
horAddr = horAddr + ((blockT==IP4x8&&work2SubMbPart==1)||(blockT==IP4x4&&work2SubMbPart[0]==1) ? 1 : 0);
verAddr = verAddr + ((blockT==IP8x4&&work2SubMbPart==1)||(blockT==IP4x4&&work2SubMbPart[1]==1) ? 4 : 0);
resultFile.upd({verAddr,horAddr},tempResult8);
resultReadyNext[{verAddr,horAddr}] = 1;
end
if(work2VerNum < workVerNumMax)
work2VerNum <= work2VerNum+1;
else
begin
work2VerNum <= 0;
if(work2HorNum < workHorNumMax)
work2HorNum <= work2HorNum+1;
else
begin
work2HorNum <= 0;
work2Done <= True;
if(((blockT==IP4x8 || blockT==IP8x4) && work2SubMbPart==0) || (blockT==IP4x4 && work2SubMbPart<3))
work2SubMbPart <= work2SubMbPart+1;
else
begin
work2SubMbPart <= 0;
work8x8Done <= True;
end
end
end
end
else//horizontal interpolation
begin
offset = offset-2;
if(yfracl == 2)
begin
readdata = workFile.sub({(1-workFileFlag),work2VerNum[2:0],work2HorNum});
for(Integer ii=0; ii<8; ii=ii+1)
work2Vector15Next[ii] = work2Vector15[ii+4];
for(Integer ii=0; ii<4; ii=ii+1)
begin
Bit#(4) tempIndex = fromInteger(ii) + 8 - zeroExtend(offset);
work2Vector15Next[tempIndex] = readdata[ii];
end
for(Integer ii=0; ii<4; ii=ii+1)
begin
tempResult8[ii] = interpolate15to8(work2Vector15Next[ii],work2Vector15Next[ii+1],work2Vector15Next[ii+2],work2Vector15Next[ii+3],work2Vector15Next[ii+4],work2Vector15Next[ii+5]);
if(xfracl == 1)
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((work2Vector15Next[ii+2]+16)>>5))} + 1) >> 1);
else if(xfracl == 3)
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((work2Vector15Next[ii+3]+16)>>5))} + 1) >> 1);
end
end
else
begin
Vector#(4,Bit#(8)) readdata8 = storeFile.sub({(1-workFileFlag),work2VerNum[2:0],work2HorNum});
for(Integer ii=0; ii<8; ii=ii+1)
work2Vector8Next[ii] = work2Vector8[ii+4];
for(Integer ii=0; ii<4; ii=ii+1)
begin
Bit#(4) tempIndex = fromInteger(ii) + 8 - zeroExtend(offset);
work2Vector8Next[tempIndex] = readdata8[ii];
end
Vector#(4,Bit#(15)) tempResult15 = replicate(0);
for(Integer ii=0; ii<4; ii=ii+1)
begin
tempResult15[ii] = interpolate8to15(work2Vector8Next[ii],work2Vector8Next[ii+1],work2Vector8Next[ii+2],work2Vector8Next[ii+3],work2Vector8Next[ii+4],work2Vector8Next[ii+5]);
tempResult8[ii] = clip1y10to8(truncate((tempResult15[ii]+16)>>5));
end
Bit#(2) verOffset;
Vector#(4,Bit#(15)) verResult15 = replicate(0);
if(xfracl == 1)
verOffset = reqdata.offset;
else
verOffset = reqdata.offset+1;
readdata = workFile.sub({(1-workFileFlag),work2VerNum[2:0],(work2HorNum-2+(verOffset==0?0:1))});
for(Integer ii=0; ii<4; ii=ii+1)
begin
Bit#(2) offsetplusii = verOffset+fromInteger(ii);
if(verOffset <= 3-fromInteger(ii) && verOffset!=0)
verResult15[ii] = work2Vector15[offsetplusii];
else
verResult15[ii] = readdata[offsetplusii];
work2Vector15Next[ii] = readdata[ii];
end
for(Integer ii=0; ii<4; ii=ii+1)
begin
Bit#(9) tempVal = zeroExtend(clip1y10to8(truncate((verResult15[ii]+16)>>5)));
tempResult8[ii] = truncate((tempVal+zeroExtend(tempResult8[ii])+1)>>1);
end
end
if(work2HorNum >= 2)
begin
Bit#(1) horAddr = truncate(work2HorNum-2);
Bit#(3) verAddr = truncate(work2VerNum);
horAddr = horAddr + ((blockT==IP4x8&&work2SubMbPart==1)||(blockT==IP4x4&&work2SubMbPart[0]==1) ? 1 : 0);
verAddr = verAddr + ((blockT==IP8x4&&work2SubMbPart==1)||(blockT==IP4x4&&work2SubMbPart[1]==1) ? 4 : 0);
resultFile.upd({verAddr,horAddr},tempResult8);
resultReadyNext[{verAddr,horAddr}] = 1;
//$display( "Trace interpolator: workLuma stage 1 result %h %h %h %h %h %h %h %h", workHorNum, workVerNum, {verAddr,horAddr}, tempResult8[3], tempResult8[2], tempResult8[1], tempResult8[0], pack(resultReadyNext));
end
Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + 2;
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3);
if(work2HorNum < workHorNumMax)
work2HorNum <= work2HorNum+1;
else
begin
work2HorNum <= 0;
if(work2VerNum < workVerNumMax)
work2VerNum <= work2VerNum+1;
else
begin
work2VerNum <= 0;
work2Done <= True;
if(((blockT==IP4x8 || blockT==IP8x4) && work2SubMbPart==0) || (blockT==IP4x4 && work2SubMbPart<3))
work2SubMbPart <= work2SubMbPart+1;
else
begin
work2SubMbPart <= 0;
work8x8Done <= True;
end
end
end
end
work2Vector8 <= work2Vector8Next;
work2Vector15 <= work2Vector15Next;
resultReady <= resultReadyNext;
$display( "Trace interpolator: work2Luma %h %h %h %h %h", xfracl, yfracl, work2HorNum, work2VerNum, offset);
endrule
 
 
rule work1Chroma ( reqfifoWork1.first() matches tagged IPWChroma .reqdata &&& !work1Done );
Bit#(4) xfracc = zeroExtend(reqdata.xFracC);
Bit#(4) yfracc = zeroExtend(reqdata.yFracC);
let offset = reqdata.offset;
let blockT = reqdata.bt;
Vector#(20,Bit#(8)) work1Vector8Next = work1Vector8;
if(memRespQ.first() matches tagged IPLoadResp .tempreaddata)
begin
memRespQ.deq();
Vector#(4,Bit#(8)) readdata = replicate(0);
readdata[0] = tempreaddata[7:0];
readdata[1] = tempreaddata[15:8];
readdata[2] = tempreaddata[23:16];
readdata[3] = tempreaddata[31:24];
Vector#(5,Bit#(8)) tempWork8 = replicate(0);
Vector#(5,Bit#(8)) tempPrev8 = replicate(0);
Vector#(4,Bit#(8)) tempResult8 = replicate(0);
Bool resultReadyFlag = False;
for(Integer ii=0; ii<4; ii=ii+1)
begin
Bit#(2) offsetplusii = offset+fromInteger(ii);
if(offset <= 3-fromInteger(ii) && !((blockT==IP4x8||blockT==IP4x4)&&(offset[1]==0||(xfracc==0&&offset!=3))) && !(xfracc==0&&offset==0))
tempWork8[ii] = work1Vector8[offsetplusii];
else
tempWork8[ii] = readdata[offsetplusii];
work1Vector8Next[ii] = readdata[ii];
end
tempWork8[4] = readdata[offset];
if((blockT==IP16x8 || blockT==IP16x16) && work1HorNum==(xfracc==0&&offset==0 ? 1 : 2))
begin
for(Integer ii=0; ii<5; ii=ii+1)
begin
tempPrev8[ii] = work1Vector8[ii+9];
work1Vector8Next[ii+9] = tempWork8[ii];
end
end
else
begin
for(Integer ii=0; ii<5; ii=ii+1)
tempPrev8[ii] = work1Vector8[ii+4];
if(work1HorNum==(xfracc==0&&offset==0 ? 0 : 1) || ((blockT==IP4x8||blockT==IP4x4)&&(offset[1]==0||(xfracc==0&&offset!=3))))
begin
for(Integer ii=0; ii<5; ii=ii+1)
work1Vector8Next[ii+4] = tempWork8[ii];
end
end
if(yfracc==0)
begin
for(Integer ii=0; ii<5; ii=ii+1)
tempPrev8[ii] = tempWork8[ii];
end
for(Integer ii=0; ii<4; ii=ii+1)
begin
Bit#(14) tempVal = zeroExtend((8-xfracc))*zeroExtend((8-yfracc))*zeroExtend(tempPrev8[ii]);
tempVal = tempVal + zeroExtend(xfracc)*zeroExtend((8-yfracc))*zeroExtend(tempPrev8[ii+1]);
tempVal = tempVal + zeroExtend((8-xfracc))*zeroExtend(yfracc)*zeroExtend(tempWork8[ii]);
tempVal = tempVal + zeroExtend(xfracc)*zeroExtend(yfracc)*zeroExtend(tempWork8[ii+1]);
tempResult8[ii] = truncate((tempVal+32)>>6);
end
if(work1VerNum > 0 || yfracc==0)
begin
if(blockT==IP4x8 || blockT==IP4x4)
begin
Bit#(5) tempIndex = 10 + zeroExtend(work1VerNum<<1);
work1Vector8Next[tempIndex] = tempResult8[0];
work1Vector8Next[tempIndex+1] = tempResult8[1];
tempResult8[2] = tempResult8[0];
tempResult8[3] = tempResult8[1];
tempResult8[0] = work1Vector8[tempIndex];
tempResult8[1] = work1Vector8[tempIndex+1];
if((work1HorNum>0 || offset[1]==0) && work1SubMbPart[0]==1)
resultReadyFlag = True;
end
else
begin
if(work1HorNum>0 || (xfracc==0 && offset==0))
resultReadyFlag = True;
end
end
if(resultReadyFlag)
begin
Bit#(1) horAddr = ((blockT==IP4x8 || blockT==IP4x4) ? 0 : truncate(((xfracc==0 && offset==0) ? work1HorNum : work1HorNum-1)));
Bit#(3) verAddr = truncate((yfracc==0 ? work1VerNum : work1VerNum-1));
horAddr = horAddr + ((blockT==IP16x8||blockT==IP16x16) ? 0 : work1MbPart[0]);
verAddr = verAddr + ((blockT==IP8x16||blockT==IP16x16) ? 0 : ((blockT==IP16x8) ? {work1MbPart[0],2'b00} : {work1MbPart[1],2'b00}));
verAddr = verAddr + ((blockT==IP8x4&&work1SubMbPart==1)||(blockT==IP4x4&&work1SubMbPart[1]==1) ? 2 : 0);
storeFile.upd({workFileFlag,1'b0,verAddr,horAddr},tempResult8);
end
Bit#(2) workHorNumMax = (blockT==IP4x8||blockT==IP4x4 ? (offset[1]==0||(xfracc==0&&offset!=3) ? 0 : 1) : ((blockT==IP16x16||blockT==IP16x8 ? 1 : 0) + (xfracc==0&&offset==0 ? 0 : 1)));
Bit#(4) workVerNumMax = (blockT==IP16x16||blockT==IP8x16 ? 7 : (blockT==IP16x8||blockT==IP8x8||blockT==IP4x8 ? 3 : 1)) + (yfracc==0 ? 0 : 1);
if(work1HorNum < workHorNumMax)
work1HorNum <= work1HorNum+1;
else
begin
work1HorNum <= 0;
if(work1VerNum < workVerNumMax)
work1VerNum <= work1VerNum+1;
else
begin
Bool allDone = False;
work1VerNum <= 0;
if(((blockT==IP4x8 || blockT==IP8x4) && work1SubMbPart==0) || (blockT==IP4x4 && work1SubMbPart<3))
work1SubMbPart <= work1SubMbPart+1;
else
begin
work1SubMbPart <= 0;
if(((blockT==IP16x8 || blockT==IP8x16) && work1MbPart==0) || (!(blockT==IP16x8 || blockT==IP8x16 || blockT==IP16x16) && work1MbPart<3))
work1MbPart <= work1MbPart+1;
else
begin
work1MbPart <= 0;
work1Done <= True;
allDone = True;
end
end
if(!allDone)
reqfifoWork1.deq();
end
end
end
work1Vector8 <= work1Vector8Next;
$display( "Trace interpolator: work1Chroma %h %h %h %h %h", xfracc, yfracc, work1HorNum, work1VerNum, offset);
endrule
 
 
rule work2Chroma ( reqregWork2 matches tagged Valid .vdata &&& vdata matches tagged IPWChroma .reqdata &&& !work2Done &&& !work8x8Done );
Vector#(16,Bit#(1)) resultReadyNext = resultReady;
resultFile.upd({work2VerNum[1],work2HorNum,work2VerNum[0]},storeFile.sub({(1-workFileFlag),1'b0,work2VerNum[1],work2HorNum,work2VerNum[0]}));
resultReadyNext[{work2VerNum[1],work2HorNum,work2VerNum[0]}] = 1;
work2HorNum <= work2HorNum+1;
if(work2HorNum == 3)
begin
if(work2VerNum == 3)
begin
work2VerNum <= 0;
work2Done <= True;
work8x8Done <= True;
end
else
work2VerNum <= work2VerNum+1;
end
resultReady <= resultReadyNext;
$display( "Trace interpolator: work2Chroma %h %h", work2HorNum, work2VerNum);
endrule
 
 
rule outputing( !outDone && resultReady[{outBlockNum[1],outPixelNum,outBlockNum[0]}]==1 );
outfifo.enq(resultFile.sub({outBlockNum[1],outPixelNum,outBlockNum[0]}));
outPixelNum <= outPixelNum+1;
if(outPixelNum == 3)
begin
outBlockNum <= outBlockNum+1;
if(outBlockNum == 3)
outDone <= True;
end
$display( "Trace interpolator: outputing %h %h", outBlockNum, outPixelNum);
endrule
 
 
rule switching( work1Done && (work2Done || reqregWork2==Invalid) && !work8x8Done);
work1Done <= False;
work2Done <= False;
reqregWork2 <= (Valid reqfifoWork1.first());
workFileFlag <= 1-workFileFlag;
reqfifoWork1.deq();
$display( "Trace interpolator: switching %h %h", outBlockNum, outPixelNum);
endrule
 
rule switching8x8( work1Done && (work2Done || reqregWork2==Invalid) && work8x8Done && outDone);
outDone <= False;
work8x8Done <= False;
resultReady <= replicate(0);
work1Done <= False;
work2Done <= False;
reqregWork2 <= (Valid reqfifoWork1.first());
workFileFlag <= 1-workFileFlag;
reqfifoWork1.deq();
$display( "Trace interpolator: switching8x8 %h %h", outBlockNum, outPixelNum);
endrule
 
 
 
method Action setPicWidth( Bit#(PicWidthSz) newPicWidth );
picWidth <= newPicWidth;
endmethod
method Action setPicHeight( Bit#(PicHeightSz) newPicHeight );
picHeight <= newPicHeight;
endmethod
method Action request( InterpolatorIT inputdata );
reqfifoLoad.enq(inputdata);
if(inputdata matches tagged IPLuma .indata)
reqfifoWork1.enq(IPWLuma {xFracL:indata.mvhor[1:0],yFracL:indata.mvver[1:0],offset:indata.mvhor[3:2],bt:indata.bt});
else if(inputdata matches tagged IPChroma .indata)
reqfifoWork1.enq(IPWChroma {xFracC:indata.mvhor[2:0],yFracC:indata.mvver[2:0],offset:indata.mvhor[4:3]+{indata.hor[0],1'b0},bt:indata.bt});
endmethod
 
method Vector#(4,Bit#(8)) first();
return outfifo.first();
endmethod
method Action deq();
outfifo.deq();
endmethod
method Action endOfFrame();
endOfFrameFlag <= True;
endmethod
interface Client mem_client;
interface Get request = fifoToGet(memReqQ);
interface Put response = fifoToPut(memRespQ);
endinterface
 
 
endmodule
 
 
endpackage
/trunk/src/mkFrameBuffer.bsv
0,0 → 1,113
//**********************************************************************
// Frame Buffer
//----------------------------------------------------------------------
//
//
//
 
package mkFrameBuffer;
 
import H264Types::*;
import IFrameBuffer::*;
import RegFile::*;
import GetPut::*;
import ClientServer::*;
import FIFO::*;
 
 
//-----------------------------------------------------------
// Register file module
//-----------------------------------------------------------
 
interface FBRFile2;
method Action store( Bit#(FrameBufferSz) addr, Bit#(32) data );
method Bit#(32) load1( Bit#(FrameBufferSz) addr );
method Bit#(32) load2( Bit#(FrameBufferSz) addr );
endinterface
 
module mkFBRFile2( FBRFile2 );
 
RegFile#(Bit#(FrameBufferSz),Bit#(32)) rfile <- mkRegFile(0,frameBufferSize);
method Action store( Bit#(FrameBufferSz) addr, Bit#(32) data );
rfile.upd( addr, data );
endmethod
method Bit#(32) load1( Bit#(FrameBufferSz) addr );
return rfile.sub(addr);
endmethod
method Bit#(32) load2( Bit#(FrameBufferSz) addr );
return rfile.sub(addr);
endmethod
endmodule
 
 
//----------------------------------------------------------------------
// Main module
//----------------------------------------------------------------------
 
module mkFrameBuffer( IFrameBuffer );
 
//-----------------------------------------------------------
// State
 
FBRFile2 rfile2 <- mkFBRFile2;
FIFO#(FrameBufferLoadReq) loadReqQ1 <- mkFIFO();
FIFO#(FrameBufferLoadResp) loadRespQ1 <- mkFIFO();
FIFO#(FrameBufferLoadReq) loadReqQ2 <- mkFIFO();
FIFO#(FrameBufferLoadResp) loadRespQ2 <- mkFIFO();
FIFO#(FrameBufferStoreReq) storeReqQ <- mkFIFO();
 
rule loading1 ( loadReqQ1.first() matches tagged FBLoadReq .addrt );
if(addrt<frameBufferSize)
begin
loadRespQ1.enq( FBLoadResp rfile2.load1(addrt) );
loadReqQ1.deq();
end
else
$display( "ERROR FrameBuffer: loading1 outside range" );
endrule
rule loading2 ( loadReqQ2.first() matches tagged FBLoadReq .addrt );
if(addrt<frameBufferSize)
begin
loadRespQ2.enq( FBLoadResp rfile2.load2(addrt) );
loadReqQ2.deq();
end
else
$display( "ERROR FrameBuffer: loading2 outside range" );
endrule
 
rule storing ( storeReqQ.first() matches tagged FBStoreReq { addr:.addrt,data:.datat} );
if(addrt<frameBufferSize)
begin
rfile2.store(addrt,datat);
storeReqQ.deq();
end
else
$display( "ERROR FrameBuffer: storing outside range" );
endrule
rule syncing ( loadReqQ1.first() matches tagged FBEndFrameSync &&& loadReqQ2.first() matches tagged FBEndFrameSync &&& storeReqQ.first() matches tagged FBEndFrameSync);
loadReqQ1.deq();
loadReqQ2.deq();
storeReqQ.deq();
endrule
 
interface Server server_load1;
interface Put request = fifoToPut(loadReqQ1);
interface Get response = fifoToGet(loadRespQ1);
endinterface
interface Server server_load2;
interface Put request = fifoToPut(loadReqQ2);
interface Get response = fifoToGet(loadRespQ2);
endinterface
interface Put server_store = fifoToPut(storeReqQ);
 
endmodule
 
endpackage
/trunk/src/mkPrediction_intra8.bsv
0,0 → 1,2144
//**********************************************************************
// Prediction
//----------------------------------------------------------------------
//
//
 
package mkPrediction;
 
import H264Types::*;
 
import IPrediction::*;
import IInterpolator::*;
import mkInterpolator::*;
import FIFO::*;
import FIFOF::*;
import Vector::*;
 
import Connectable::*;
import GetPut::*;
import ClientServer::*;
 
 
//-----------------------------------------------------------
// Local Datatypes
//-----------------------------------------------------------
 
typedef union tagged
{
void Intra; //Intra non-4x4
void Intra4x4;
void Inter;
}
OutState deriving(Eq,Bits);
 
typedef union tagged
{
void Start; //not working on anything in particular
void Intra16x16;
void Intra4x4;
void IntraPCM;
}
IntraState deriving(Eq,Bits);
 
typedef union tagged
{
void Start; //not working on anything in particular
void InterP16x16;
void InterP16x8;
void InterP8x16;
void InterP8x8;
void InterP8x8ref0;
void InterPskip;
}
InterState deriving(Eq,Bits);
 
typedef union tagged
{
Bit#(1) NotInter;//0 for not available, 1 for intra-coded
struct {Bit#(4) refIdx; Bit#(14) mvhor; Bit#(12) mvver; Bit#(1) nonZeroTransCoeff;} BlockMv;
}
InterBlockMv deriving(Eq,Bits);
 
typedef union tagged
{
void SkipMB;
void NonSkipMB;
void Intra4x4;
void Intra4x4PlusChroma;
}
NextOutput deriving(Eq,Bits);
 
 
//-----------------------------------------------------------
// Helper functions
 
function Bit#(8) intra4x4SelectTop( Bit#(72) valVector, Bit#(4) idx );
case(idx)
0: return valVector[15:8];
1: return valVector[23:16];
2: return valVector[31:24];
3: return valVector[39:32];
4: return valVector[47:40];
5: return valVector[55:48];
6: return valVector[63:56];
7: return valVector[71:64];
default: return valVector[7:0];
endcase
endfunction
 
function Bit#(8) intra4x4SelectLeft( Bit#(40) valVector, Bit#(3) idx );
case(idx)
0: return valVector[15:8];
1: return valVector[23:16];
2: return valVector[31:24];
3: return valVector[39:32];
default: return valVector[7:0];
endcase
endfunction
 
function Bit#(8) select32to8( Bit#(32) valVector, Bit#(2) idx );
case(idx)
0: return valVector[7:0];
1: return valVector[15:8];
2: return valVector[23:16];
3: return valVector[31:24];
endcase
endfunction
 
function Bit#(8) select16to8( Bit#(16) valVector, Bit#(1) idx );
case(idx)
0: return valVector[7:0];
1: return valVector[15:8];
endcase
endfunction
 
function Bool absDiffGEFour14( Bit#(14) val1, Bit#(14) val2 );
Int#(15) int1 = unpack(signExtend(val1));
Int#(15) int2 = unpack(signExtend(val2));
if(int1>=int2)
return (int1 >= (int2+4));
else
return (int2 >= (int1+4));
endfunction
 
function Bool absDiffGEFour12( Bit#(12) val1, Bit#(12) val2 );
Int#(13) int1 = unpack(signExtend(val1));
Int#(13) int2 = unpack(signExtend(val2));
if(int1>=int2)
return (int1 >= (int2+4));
else
return (int2 >= (int1+4));
endfunction
 
 
//-----------------------------------------------------------
// Prediction Module
//-----------------------------------------------------------
 
 
(* synthesize *)
module mkPrediction( IPrediction );
 
//Common state
FIFO#(EntropyDecOT) infifo <- mkSizedFIFO(prediction_infifo_size);
FIFO#(InverseTransOT) infifo_ITB <- mkSizedFIFO(prediction_infifo_ITB_size);
FIFO#(EntropyDecOT) outfifo <- mkFIFO;
Reg#(Bool) passFlag <- mkReg(True);
Reg#(Bit#(4)) blockNum <- mkReg(0);
Reg#(Bit#(4)) pixelNum <- mkReg(0);
 
Reg#(Bit#(PicWidthSz)) picWidth <- mkReg(maxPicWidthInMB);
Reg#(Bit#(PicHeightSz)) picHeight <- mkReg(0);
Reg#(Bit#(PicAreaSz)) firstMb <- mkReg(0);
Reg#(Bit#(PicAreaSz)) currMb <- mkReg(0);
Reg#(Bit#(PicAreaSz)) currMbHor <- mkReg(0);//horizontal position of currMb
Reg#(Bit#(PicHeightSz)) currMbVer <- mkReg(0);//vertical position of currMb
 
FIFOF#(OutState) outstatefifo <- mkFIFOF;
FIFOF#(NextOutput) nextoutputfifo <- mkFIFOF;
Reg#(Bit#(4)) outBlockNum <- mkReg(0);
Reg#(Bit#(4)) outPixelNum <- mkReg(0);
FIFO#(Vector#(4,Bit#(8))) predictedfifo <- mkSizedFIFO(prediction_predictedfifo_size);
Reg#(Bit#(1)) outChromaFlag <- mkReg(0);
Reg#(Bool) outFirstQPFlag <- mkReg(False);
 
DoNotFire donotfire <- mkDoNotFire();
//Reg#(Vector#(16,Bit#(8))) workVector <- mkRegU();
//Inter state
Interpolator interpolator <- mkInterpolator();
Reg#(InterState) interstate <- mkReg(Start);
Reg#(Bit#(PicAreaSz)) interPskipCount <- mkReg(0);
Reg#(Vector#(5,InterBlockMv)) interTopVal <- mkRegU();
Reg#(Vector#(4,InterBlockMv)) interLeftVal <- mkRegU();
Reg#(Vector#(4,InterBlockMv)) interTopLeftVal <- mkRegU();
FIFO#(MemReq#(TAdd#(PicWidthSz,2),32)) interMemReqQ <- mkFIFO;
Reg#(MemReq#(TAdd#(PicWidthSz,2),32)) interMemReqQdelay <- mkRegU();
FIFO#(MemResp#(32)) interMemRespQ <- mkFIFO;
Reg#(Bit#(3)) interReqCount <- mkReg(0);
Reg#(Bit#(3)) interRespCount <- mkReg(0);
 
Reg#(Bit#(1)) interStepCount <- mkReg(0);
Reg#(Bit#(2)) interMbPartNum <- mkReg(0);
Reg#(Bit#(2)) interSubMbPartNum <- mkReg(0);
Reg#(Bit#(2)) interPassingCount <- mkReg(0);
Reg#(Vector#(4,Bit#(4))) interRefIdxVector <- mkRegU();
Reg#(Vector#(4,Bit#(2))) interSubMbTypeVector <- mkRegU();
RFile1#(Bit#(4),Tuple2#(Bit#(14),Bit#(12))) interMvFile <- mkRFile1Full();
Reg#(Bit#(15)) interMvDiffTemp <- mkReg(0);
FIFO#(Tuple2#(Bit#(15),Bit#(13))) interMvDiff <- mkFIFO;
Reg#(Bit#(5)) interNewestMv <- mkReg(0);
Reg#(Bit#(2)) interIPStepCount <- mkReg(0);
Reg#(Bit#(2)) interIPMbPartNum <- mkReg(0);
Reg#(Bit#(2)) interIPSubMbPartNum <- mkReg(0);
 
Reg#(Bit#(PicWidthSz)) interCurrMbDiff <- mkReg(0);
 
Reg#(Vector#(4,Bool)) interTopNonZeroTransCoeff <- mkRegU();
Reg#(Vector#(4,Bool)) interLeftNonZeroTransCoeff <- mkRegU();
FIFO#(Tuple2#(Bit#(2),Bit#(2))) interBSfifo <- mkSizedFIFO(32);
Reg#(Bool) interBSoutput <- mkReg(True);
FIFO#(InterBlockMv) interOutBlockMvfifo <- mkSizedFIFO(8);
//Intra state
Reg#(IntraState) intrastate <- mkReg(Start);
Reg#(Bit#(1)) intraChromaFlag <- mkReg(0);
FIFO#(MemReq#(TAdd#(PicWidthSz,2),68)) intraMemReqQ <- mkFIFO;
Reg#(MemReq#(TAdd#(PicWidthSz,2),68)) intraMemReqQdelay <- mkRegU;
FIFO#(MemResp#(68)) intraMemRespQ <- mkFIFO;
Reg#(Vector#(4,Bit#(4))) intra4x4typeLeft <- mkRegU();//15=unavailable, 14=inter-MB, 13=intra-non-4x4
Reg#(Vector#(4,Bit#(4))) intra4x4typeTop <- mkRegU();//15=unavailable, 14=inter-MB, 13=intra-non-4x4
Reg#(Bit#(1)) ppsconstrained_intra_pred_flag <- mkReg(0);
Reg#(Vector#(4,Bit#(40))) intraLeftVal <- mkRegU();
Reg#(Vector#(9,Bit#(8))) intraLeftValChroma0 <- mkRegU();
Reg#(Vector#(9,Bit#(8))) intraLeftValChroma1 <- mkRegU();
Reg#(Vector#(5,Bit#(32))) intraTopVal <- mkRegU();
Reg#(Vector#(4,Bit#(16))) intraTopValChroma0 <- mkRegU();
Reg#(Vector#(4,Bit#(16))) intraTopValChroma1 <- mkRegU();
Reg#(Bit#(32)) intraLeftValNext <- mkReg(0);
Reg#(Bit#(2)) intra16x16_pred_mode <- mkReg(0);
FIFO#(Bit#(4)) rem_intra4x4_pred_mode <- mkSizedFIFO(16);
FIFO#(Bit#(2)) intra_chroma_pred_mode <- mkFIFO;
Reg#(Bit#(4)) cur_intra4x4_pred_mode <- mkReg(0);
Reg#(Bit#(1)) intraChromaTopAvailable <- mkReg(0);
Reg#(Bit#(1)) intraChromaLeftAvailable <- mkReg(0);
 
Reg#(Bit#(3)) intraReqCount <- mkReg(0);
Reg#(Bit#(3)) intraRespCount <- mkReg(0);
Reg#(Bit#(4)) intraStepCount <- mkReg(0);
Reg#(Bit#(13)) intraSumA <- mkReg(0);
Reg#(Bit#(15)) intraSumB <- mkReg(0);
Reg#(Bit#(15)) intraSumC <- mkReg(0);
Reg#(Vector#(4,Bit#(8))) intraPredVector <- mkRegU();
 
//-----------------------------------------------------------
// Rules
 
//////////////////////////////////////////////////////////////////////////////
// rule stateMonitor ( True );
// if(predictedfifo.notEmpty())
// $display( "TRACE Prediction: stateMonitor predictedfifo.first() %0d", predictedfifo.first());////////////////////
// if(infifo.first() matches tagged ITBresidual .xdata)
// $display( "TRACE Prediction: stateMonitor infifo.first() %0d", xdata);////////////////////
// if(infifo.first() matches tagged ITBresidual .xdata)
// $display( "TRACE Prediction: stateMonitor outBlockNum outPixelNum outChromaFlag %0d %0d", outBlockNum, outPixelNum, outChromaFlag);////////////////////
// endrule
//////////////////////////////////////////////////////////////////////////////
rule passing ( passFlag && !outstatefifo.notEmpty() && currMbHor<zeroExtend(picWidth) );
$display( "Trace Prediction: passing infifo packed %h", pack(infifo.first()));
case (infifo.first()) matches
tagged NewUnit . xdata :
begin
infifo.deq();
outfifo.enq(infifo.first());
$display("ccl4newunit");
$display("ccl4rbspbyte %h", xdata);
end
tagged SPSpic_width_in_mbs .xdata :
begin
infifo.deq();
outfifo.enq(infifo.first());
picWidth <= xdata;
interpolator.setPicWidth(xdata);
end
tagged SPSpic_height_in_map_units .xdata :
begin
infifo.deq();
outfifo.enq(infifo.first());
picHeight <= xdata;
interpolator.setPicHeight(xdata);
end
tagged PPSconstrained_intra_pred_flag .xdata :
begin
infifo.deq();
////outfifo.enq(infifo.first());
ppsconstrained_intra_pred_flag <= xdata;
end
tagged SHfirst_mb_in_slice .xdata :
begin
infifo.deq();
outfifo.enq(infifo.first());
firstMb <= xdata;
currMb <= xdata;
currMbHor <= xdata;
currMbVer <= 0;
intra4x4typeLeft <= replicate(15);
interTopLeftVal <= replicate(NotInter 0);
if(xdata==0)
interLeftVal <= replicate(NotInter 0);
outFirstQPFlag <= True;
end
tagged SDmb_skip_run .xdata : passFlag <= False;
tagged SDMmbtype .xdata : passFlag <= False;
tagged EndOfFile :
begin
infifo.deq();
outfifo.enq(infifo.first());
$display( "INFO Prediction: EndOfFile reached" );
//$finish(0);////////////////////////////////
end
default:
begin
infifo.deq();
outfifo.enq(infifo.first());
end
endcase
endrule
 
 
rule inputing ( !passFlag );
$display( "Trace Prediction: inputing infifo packed %h", pack(infifo.first()));
case (infifo.first()) matches
tagged SDmb_skip_run .xdata :
begin
if(interstate==Start && intrastate==Start)
begin
if(interPskipCount < xdata)
begin
if(!outstatefifo.notEmpty() || interCurrMbDiff<picWidth-1)
begin
$display( "Trace Prediction: passing SDmb_skip_run %0d", xdata);
outstatefifo.enq(Inter);
interstate <= InterPskip;
interReqCount <= 1;
interRespCount <= 1;
intra4x4typeLeft <= replicate(14);
intra4x4typeTop <= replicate(14);
interTopLeftVal <= update(interTopLeftVal , 0, (NotInter 0));
interTopVal <= replicate(NotInter 0);
interPskipCount <= interPskipCount+1;
interNewestMv <= 0;
interRefIdxVector <= replicate(0);
interCurrMbDiff <= interCurrMbDiff+1;
nextoutputfifo.enq(SkipMB);
end
else
donotfire.doNotFire();
end
else
begin
$display( "Trace Prediction: passing no SDmb_skip_run");
interPskipCount <= 0;
infifo.deq();
end
end
else
donotfire.doNotFire();
end
tagged SDMmbtype .xdata :
begin
if(interstate==Start && intrastate==Start)//not necessary (just need to keep inter from feeding predictedfifo or change intra state until intrastate==Start)
begin
infifo.deq();
$display( "INFO Prediction: SDMmbtype %0d", xdata);
if(mbPartPredMode(xdata,0)==Intra_16x16)
begin
if(!outstatefifo.notEmpty())
begin
outstatefifo.enq(Intra);
intrastate <= Intra16x16;
if(xdata matches tagged I_16x16 {intra16x16PredMode:.tempv1, codedBlockPatternChroma:.tempv2, codedBlockPatternLuma:.tempv3})
intra16x16_pred_mode <= tempv1;
else
$display( "ERROR Prediction: MacroblockLayer 5 sdmmbtype not I_16x16" );
intraReqCount <= 1;
intraRespCount <= 1;
interTopLeftVal <= replicate(NotInter 1);
interLeftVal <= replicate(NotInter 1);
interTopVal <= replicate(NotInter 1);
end
else
donotfire.doNotFire();
end
else if(xdata==I_NxN)
begin
if(!outstatefifo.notEmpty())
begin
outstatefifo.enq(Intra4x4);
intrastate <= Intra4x4;
intraReqCount <= 1;
intraRespCount <= 1;
interTopLeftVal <= replicate(NotInter 1);
interLeftVal <= replicate(NotInter 1);
interTopVal <= replicate(NotInter 1);
end
else
donotfire.doNotFire();
end
else if(xdata==I_PCM)
begin
$display( "ERROR Prediction: I_PCM not implemented yet");
$finish;////////////////////////////////////////////////////////////////////////////////////////
intra4x4typeLeft <= replicate(13);
intra4x4typeTop <= replicate(13);
interTopLeftVal <= replicate(NotInter 1);
interLeftVal <= replicate(NotInter 1);
interTopVal <= replicate(NotInter 1);
end
else
begin
if(!outstatefifo.notEmpty() || interCurrMbDiff<picWidth-1)
begin
outstatefifo.enq(Inter);
case(xdata)
P_L0_16x16: interstate <= InterP16x16;
P_L0_L0_16x8: interstate <= InterP16x8;
P_L0_L0_8x16: interstate <= InterP8x16;
P_8x8: interstate <= InterP8x8;
P_8x8ref0: interstate <= InterP8x8ref0;
default: $display( "ERROR Prediction: passing SDMmbtype inter prediction unknown mbtype");
endcase
interReqCount <= 1;
interRespCount <= 1;
intra4x4typeLeft <= replicate(14);/////////////////////////////////////////////////////////////////////////////
intra4x4typeTop <= replicate(14);
interTopLeftVal <= update(interTopLeftVal , 0, (NotInter 0));
interTopVal <= replicate(NotInter 0);
interNewestMv <= 0;
interRefIdxVector <= replicate(0);
nextoutputfifo.enq(NonSkipMB);
end
else
donotfire.doNotFire();
end
interCurrMbDiff <= interCurrMbDiff+1;
end
else
donotfire.doNotFire();
end
tagged SDMMrem_intra4x4_pred_mode .xdata :
begin
infifo.deq();
////outfifo.enq(infifo.first());
rem_intra4x4_pred_mode.enq(xdata);
end
tagged SDMMintra_chroma_pred_mode .xdata :
begin
infifo.deq();
////outfifo.enq(infifo.first());
intra_chroma_pred_mode.enq(xdata);
end
tagged SDMMref_idx_l0 .xdata :
begin
infifo.deq();
////outfifo.enq(infifo.first());
interRefIdxVector <= update(interRefIdxVector,interPassingCount,xdata[3:0]);
if(interstate==InterP16x16 || interPassingCount==1)
interPassingCount <= 0;
else
interPassingCount <= interPassingCount+1;
end
tagged SDMMmvd_l0 .xdata :
begin
infifo.deq();
////outfifo.enq(infifo.first());
if(interPassingCount==1)
begin
Bit#(13) interMvDiffTemp2 = truncate(xdata);
interMvDiff.enq(tuple2(interMvDiffTemp,interMvDiffTemp2));
interPassingCount <= 0;
end
else
begin
interMvDiffTemp <= truncate(xdata);
interPassingCount <= interPassingCount+1;
end
end
tagged SDMSsub_mb_type .xdata :
begin
infifo.deq();
////outfifo.enq(infifo.first());
interSubMbTypeVector <= update(interSubMbTypeVector,interPassingCount,xdata);
interPassingCount <= interPassingCount+1;
end
tagged SDMSref_idx_l0 .xdata :
begin
infifo.deq();
////outfifo.enq(infifo.first());
interRefIdxVector <= update(interRefIdxVector,interPassingCount,xdata[3:0]);
interPassingCount <= interPassingCount+1;
end
tagged SDMSmvd_l0 .xdata :
begin
infifo.deq();
////outfifo.enq(infifo.first());
if(interPassingCount==1)
begin
Bit#(13) interMvDiffTemp2 = truncate(xdata);
interMvDiff.enq(tuple2(interMvDiffTemp,interMvDiffTemp2));
interPassingCount <= 0;
end
else
begin
interMvDiffTemp <= truncate(xdata);
interPassingCount <= interPassingCount+1;
end
end
default: passFlag <= True;
endcase
endrule
 
rule outputing ( currMbHor<zeroExtend(picWidth) );
Bit#(1) outputFlag = 0;
Vector#(4,Bit#(8)) outputVector = replicate(0);
Bit#(2) blockHor = {outBlockNum[2],outBlockNum[0]};
Bit#(2) blockVer = {outBlockNum[3],outBlockNum[1]};
Bit#(2) pixelVer = {outPixelNum[3],outPixelNum[2]};
Bit#(4) totalVer = {blockVer,pixelVer};
//$display( "Trace Prediction: outputing" );
if(outFirstQPFlag)
begin
if(infifo_ITB.first() matches tagged IBTmb_qp .xdata)
begin
infifo_ITB.deq();
outfifo.enq(IBTmb_qp {qpy:xdata.qpy,qpc:xdata.qpc});
outFirstQPFlag <= False;
$display( "Trace Prediction: outputing outFirstQP %h %h %h", outBlockNum, outPixelNum, xdata);
end
else
$display( "ERROR Prediction: outputing unexpected infifo_ITB.first()");
end
else if(nextoutputfifo.first() == SkipMB)
begin
if(interBSoutput && outChromaFlag==0 && outPixelNum==0)
begin
interBSoutput <= False;
interBSfifo.deq();
Bit#(2) tempHorBS = tpl_1(interBSfifo.first());
Bit#(2) tempVerBS = tpl_2(interBSfifo.first());
Bit#(3) horBS = (tempHorBS==3 ? 4 : (interLeftNonZeroTransCoeff[blockVer] ? 2 : zeroExtend(tempHorBS)));
Bit#(3) verBS = (tempVerBS==3 ? 4 : (interTopNonZeroTransCoeff[blockHor]&&blockVer!=0 ? 2 : zeroExtend(tempVerBS)));
outfifo.enq(PBbS {bShor:horBS,bSver:verBS});
interLeftNonZeroTransCoeff <= update(interLeftNonZeroTransCoeff, blockVer, False);
interTopNonZeroTransCoeff <= update(interTopNonZeroTransCoeff, blockHor, False);
$display( "Trace Prediction: outputing SkipMB bS %h %h %h %h", outBlockNum, outPixelNum, currMbHor, currMbVer);
end
else
begin
interBSoutput <= True;
outputVector = predictedfifo.first();
outfifo.enq(PBoutput outputVector);
outputFlag = 1;
predictedfifo.deq();
$display( "Trace Prediction: outputing SkipMB out %h %h %h", outBlockNum, outPixelNum, outputVector);
end
end
else
begin
case ( infifo_ITB.first() ) matches
tagged IBTmb_qp .xdata :
begin
infifo_ITB.deq();
outfifo.enq(IBTmb_qp {qpy:xdata.qpy,qpc:xdata.qpc});
outFirstQPFlag <= False;
$display( "Trace Prediction: outputing ITBmb_qp %h %h %h", outBlockNum, outPixelNum, xdata);
end
tagged ITBresidual .xdata :
begin
if(interBSoutput && outChromaFlag==0 && outPixelNum==0)
begin
interBSoutput <= False;
if(outstatefifo.first() != Inter)
outfifo.enq(PBbS {bShor:(blockHor==0 ? 4 : 3),bSver:(blockVer==0 ? 4 : 3)});
else
begin
interBSfifo.deq();
Bit#(2) tempHorBS = tpl_1(interBSfifo.first());
Bit#(2) tempVerBS = tpl_2(interBSfifo.first());
Bit#(3) horBS = (tempHorBS==3 ? 4 : 2);
Bit#(3) verBS = (tempVerBS==3 ? 4 : 2);
outfifo.enq(PBbS {bShor:horBS,bSver:verBS});
end
interLeftNonZeroTransCoeff <= update(interLeftNonZeroTransCoeff, blockVer, True);
interTopNonZeroTransCoeff <= update(interTopNonZeroTransCoeff, blockHor, True);
$display( "Trace Prediction: outputing ITBresidual bS %h %h %h %h %h", outChromaFlag, outBlockNum, outPixelNum, currMbHor, currMbVer);
end
else
begin
interBSoutput <= True;
Bit#(11) tempOutputValue = 0;
for(Integer ii=0; ii<4; ii=ii+1)
begin
tempOutputValue = signExtend(xdata[ii]) + zeroExtend((predictedfifo.first())[ii]);
if(tempOutputValue[10]==1)
outputVector[ii] = 0;
else if(tempOutputValue[9:0] > 255)
outputVector[ii] = 255;
else
outputVector[ii] = tempOutputValue[7:0];
end
outfifo.enq(PBoutput outputVector);
infifo_ITB.deq();
predictedfifo.deq();
outputFlag = 1;
$display( "Trace Prediction: outputing ITBresidual out %h %h %h %h %h %h", outChromaFlag, outBlockNum, outPixelNum, predictedfifo.first(), xdata, outputVector);
end
end
tagged ITBcoeffLevelZeros :
begin
if(interBSoutput && outChromaFlag==0 && outPixelNum==0)
begin
interBSoutput <= False;
if(outstatefifo.first() != Inter)
outfifo.enq(PBbS {bShor:(blockHor==0 ? 4 : 3),bSver:(blockVer==0 ? 4 : 3)});
else
begin
interBSfifo.deq();
Bit#(2) tempHorBS = tpl_1(interBSfifo.first());
Bit#(2) tempVerBS = tpl_2(interBSfifo.first());
Bit#(3) horBS = (tempHorBS==3 ? 4 : (interLeftNonZeroTransCoeff[blockVer] ? 2 : zeroExtend(tempHorBS)));
Bit#(3) verBS = (tempVerBS==3 ? 4 : (interTopNonZeroTransCoeff[blockHor]&&blockVer!=0 ? 2 : zeroExtend(tempVerBS)));
outfifo.enq(PBbS {bShor:horBS,bSver:verBS});
end
interLeftNonZeroTransCoeff <= update(interLeftNonZeroTransCoeff, blockVer, False);
interTopNonZeroTransCoeff <= update(interTopNonZeroTransCoeff, blockHor, False);
$display( "Trace Prediction: outputing ITBcoeffLevelZeros bS %h %h %h %h %h", outChromaFlag, outBlockNum, outPixelNum, currMbHor, currMbVer);
end
else
begin
interBSoutput <= True;
if(outPixelNum == 12)
infifo_ITB.deq();
outputVector = predictedfifo.first();
outfifo.enq(PBoutput outputVector);
outputFlag = 1;
predictedfifo.deq();
$display( "Trace Prediction: outputing ITBcoeffLevelZeros out %h %h %h %h %h", outChromaFlag, outBlockNum, outPixelNum, predictedfifo.first(), outputVector);
end
end
default: $display( "ERROR Prediction: outputing unknown infifo_ITB input" );
endcase
end
if(outputFlag == 1)
begin
$display("ccl4PBoutput %0d", outputVector[0]);
$display("ccl4PBoutput %0d", outputVector[1]);
$display("ccl4PBoutput %0d", outputVector[2]);
$display("ccl4PBoutput %0d", outputVector[3]);
 
if(outBlockNum==0 && pixelVer==0 && outChromaFlag==0 && currMb!=firstMb && picWidth>1)
begin
intraMemReqQ.enq(intraMemReqQdelay);
interMemReqQ.enq(interMemReqQdelay);
//$display( "TRACE Prediction: passing storing addr data");//////////////////
end
if(blockHor==3 || (blockHor[0]==1 && outChromaFlag==1) || (outstatefifo.first()==Intra4x4 && outChromaFlag==0))
begin
if(outChromaFlag==0)
begin
Bit#(32) intraLeftValNextTemp = intraLeftValNext;
if(totalVer==0 || (outstatefifo.first()==Intra4x4 && pixelVer==0))
begin
Bit#(32) tempValSet = select(intraTopVal,zeroExtend(blockHor));
intraLeftValNextTemp = zeroExtend(tempValSet[31:24]);
end
case(pixelVer)
0:intraLeftValNext <= {intraLeftValNextTemp[31:16],outputVector[3],intraLeftValNextTemp[7:0]};
1:intraLeftValNext <= {intraLeftValNextTemp[31:24],outputVector[3],intraLeftValNextTemp[15:0]};
2:intraLeftValNext <= {outputVector[3],intraLeftValNextTemp[23:0]};
3:
begin
intraLeftVal <= update(intraLeftVal,blockVer,{outputVector[3],intraLeftValNextTemp});
intraLeftValNext <= zeroExtend(outputVector[3]);
if(outstatefifo.first()==Intra4x4)
intra4x4typeLeft <= update(intra4x4typeLeft,blockVer,cur_intra4x4_pred_mode);
else if(outstatefifo.first()==Intra)
intra4x4typeLeft <= update(intra4x4typeLeft,blockVer,13);
else
intra4x4typeLeft <= update(intra4x4typeLeft,blockVer,14);
end
endcase
end
else
begin
if(outBlockNum[2]==0)
intraLeftValChroma0 <= update(intraLeftValChroma0,totalVer+1,outputVector[3]);
else
intraLeftValChroma1 <= update(intraLeftValChroma1,totalVer+1,outputVector[3]);
end
end
if(pixelVer==3 && (blockVer==3 || (blockVer[0]==1 && outChromaFlag==1) || (outstatefifo.first()==Intra4x4 && outChromaFlag==0)))
begin
if(outChromaFlag==0)
begin
intraTopVal <= update(intraTopVal,zeroExtend(blockHor),{outputVector[3],outputVector[2],outputVector[1],outputVector[0]});
if(outstatefifo.first()==Intra4x4)
intra4x4typeTop <= update(intra4x4typeTop,blockHor,cur_intra4x4_pred_mode);
else if(outstatefifo.first()==Intra)
intra4x4typeTop <= update(intra4x4typeTop,blockHor,13);
else
intra4x4typeTop <= update(intra4x4typeTop,blockHor,14);
end
else
begin
if(outBlockNum[2]==0)
begin
Vector#(4,Bit#(16)) intraTopValChroma0Next = intraTopValChroma0;
intraTopValChroma0Next[{blockHor[0],1'b0}] = {outputVector[1],outputVector[0]};
intraTopValChroma0Next[{blockHor[0],1'b1}] = {outputVector[3],outputVector[2]};
intraTopValChroma0 <= intraTopValChroma0Next;
end
else
begin
Vector#(4,Bit#(16)) intraTopValChroma1Next = intraTopValChroma1;
intraTopValChroma1Next[{blockHor[0],1'b0}] = {outputVector[1],outputVector[0]};
intraTopValChroma1Next[{blockHor[0],1'b1}] = {outputVector[3],outputVector[2]};
intraTopValChroma1 <= intraTopValChroma1Next;
end
end
end
 
if(outChromaFlag==1 && outBlockNum==7)
begin
Bit#(PicWidthSz) tempStoreAddr = truncate(currMbHor);
InterBlockMv outBlockMv = interOutBlockMvfifo.first();
if(outBlockMv matches tagged BlockMv .bdata)
begin
outBlockMv = (BlockMv {refIdx:bdata.refIdx,mvhor:bdata.mvhor,mvver:bdata.mvver,nonZeroTransCoeff:(interTopNonZeroTransCoeff[pixelVer]?1:0)});
interOutBlockMvfifo.deq();
end
else if(pixelVer==3)
interOutBlockMvfifo.deq();
if(pixelVer==3 && picWidth>1)
interMemReqQdelay <= StoreReq {addr:{tempStoreAddr,pixelVer},data:pack(outBlockMv)};
else
interMemReqQ.enq(StoreReq {addr:{tempStoreAddr,pixelVer},data:pack(outBlockMv)});
if(pixelVer>0)
begin
Bit#(4) intra4x4typeTopStore = ((outstatefifo.first()==Inter) ? 14 : ((outstatefifo.first()!=Intra4x4) ? 13: intra4x4typeTop[(pixelVer-1)]));
Bit#(32) intraTopValStore = intraTopVal[(pixelVer-1)];
Bit#(16) intraTopValChroma0Store = intraTopValChroma0[(pixelVer-1)];
Bit#(16) intraTopValChroma1Store = (pixelVer<3 ? intraTopValChroma1[(pixelVer-1)] : {outputVector[1],outputVector[0]});
Bit#(68) intraStore = {intra4x4typeTopStore,intraTopValChroma1Store,intraTopValChroma0Store,intraTopValStore};
intraMemReqQ.enq(StoreReq {addr:{tempStoreAddr,(pixelVer-1)},data:intraStore});
if(pixelVer==3)
begin
intra4x4typeTopStore = ((outstatefifo.first()==Inter) ? 14 : ((outstatefifo.first()!=Intra4x4) ? 13: intra4x4typeTop[3]));
intraTopValStore = intraTopVal[3];
intraTopValChroma0Store = intraTopValChroma0[3];
intraTopValChroma1Store = {outputVector[3],outputVector[2]};
intraStore = {intra4x4typeTopStore,intraTopValChroma1Store,intraTopValChroma0Store,intraTopValStore};
intraMemReqQdelay <= StoreReq {addr:{tempStoreAddr,2'b11},data:intraStore};
end
end
end
outPixelNum <= outPixelNum+4;
if(outPixelNum == 12)
begin
if(outChromaFlag==0)
begin
outBlockNum <= outBlockNum+1;
if(outBlockNum == 15)
outChromaFlag <= 1;
if(nextoutputfifo.first() == Intra4x4)
nextoutputfifo.deq();
end
else
begin
if(outBlockNum == 7)
begin
outBlockNum <= 0;
outChromaFlag <= 0;
currMb <= currMb+1;
currMbHor <= currMbHor+1;
interCurrMbDiff <= interCurrMbDiff-1;
outstatefifo.deq;
intrastate <= Start;
if(truncate(currMbHor)==picWidth-1 && currMbVer==picHeight-1)
interpolator.endOfFrame();
nextoutputfifo.deq();
end
else
outBlockNum <= outBlockNum+1;
end
end
end
endrule
 
 
rule currMbHorUpdate( !(currMbHor<zeroExtend(picWidth)) );
Bit#(PicAreaSz) temp = zeroExtend(picWidth);
if((currMbHor >> 3) >= temp)
begin
currMbHor <= currMbHor - (temp << 3);
currMbVer <= currMbVer + 8;
end
else
begin
currMbHor <= currMbHor - temp;
currMbVer <= currMbVer + 1;
end
//$display( "Trace Prediction: currMbHorUpdate %h %h", currMbHor, currMbVer);
endrule
 
 
// inter prediction rules
 
rule interSendReq ( interReqCount>0 && currMbHor<zeroExtend(picWidth) );
Bit#(PicAreaSz) currMbHorTemp = currMbHor+zeroExtend(interCurrMbDiff)-1;
Bit#(PicAreaSz) currMbTemp = currMb+zeroExtend(interCurrMbDiff)-1;
if( currMbHorTemp >= zeroExtend(picWidth) )
currMbHorTemp = currMbHorTemp-zeroExtend(picWidth);
Bit#(PicWidthSz) temp2 = truncate(currMbHorTemp);
Bit#(TAdd#(PicWidthSz,2)) temp = 0;
Bool noMoreReq = False;
if( currMbTemp < zeroExtend(picWidth) )
noMoreReq = True;
else
begin
if(interReqCount<5)
begin
Bit#(2) temp3 = truncate(interReqCount-1);
temp = {temp2,temp3};
end
else if(interReqCount==5)
begin
if((currMbHorTemp+1)<zeroExtend(picWidth))
temp = {(temp2+1),2'b00};
else if(currMbHorTemp>0 && currMbTemp-firstMb>zeroExtend(picWidth))
temp = {(temp2-1),2'b11};
else
noMoreReq = True;
end
else if(interReqCount==6)
begin
if((currMbHorTemp+1)<zeroExtend(picWidth) && currMbHorTemp>0 && currMbTemp-firstMb>zeroExtend(picWidth))
temp = {(temp2-1),2'b11};
else
noMoreReq = True;
end
else
noMoreReq = True;
end
if(!noMoreReq)
begin
interMemReqQ.enq(LoadReq temp);
interReqCount <= interReqCount+1;
//$display( "TRACE Prediction: interSendReq addr %0d",temp);///////////////////////
end
else
interReqCount <= 0;
$display( "Trace Prediction: interSendReq %h %h %h", interstate, interReqCount, temp);
endrule
 
 
rule interReceiveNoResp ( interRespCount>0 && currMbHor<zeroExtend(picWidth) && currMb+zeroExtend(interCurrMbDiff)-1<zeroExtend(picWidth) );
Bit#(PicAreaSz) currMbHorTemp = currMbHor+zeroExtend(interCurrMbDiff)-1;
if( currMbHorTemp >= zeroExtend(picWidth) )
currMbHorTemp = currMbHorTemp-zeroExtend(picWidth);
interRespCount <= 0;
interStepCount <= 1;
interIPStepCount <= 1;
if(currMbHorTemp == 0)
begin
interLeftVal <= replicate(NotInter 0);
interTopLeftVal <= replicate(NotInter 0);
end
$display( "Trace Prediction: interReceiveNoResp %h %h", interstate, interRespCount);
endrule
 
rule interReceiveResp ( interRespCount>0 && interRespCount<7 && currMbHor<zeroExtend(picWidth) &&& interMemRespQ.first() matches tagged LoadResp .data);
Bit#(PicAreaSz) currMbHorTemp = currMbHor+zeroExtend(interCurrMbDiff)-1;
Bit#(PicAreaSz) currMbTemp = currMb+zeroExtend(interCurrMbDiff)-1;
if( currMbHorTemp >= zeroExtend(picWidth) )
currMbHorTemp = currMbHorTemp-zeroExtend(picWidth);
Bool noMoreResp = False;
Bit#(2) temp2bit = 0;
InterBlockMv unpackedData = unpack(data);
Vector#(5,InterBlockMv) interTopValNext = interTopVal;
Vector#(4,InterBlockMv) interTopLeftValNext = interTopLeftVal;
if(interRespCount<5)
begin
temp2bit = truncate(interRespCount-1);
interTopValNext[temp2bit] = unpackedData;
if((interRespCount==4 || (interRespCount==1 && (interstate==InterPskip || interstate==InterP16x16 || interstate==InterP16x8)))
&& (!((currMbHorTemp+1)<zeroExtend(picWidth)) && !(currMbHorTemp>0 && currMbTemp-firstMb>zeroExtend(picWidth))))
noMoreResp = True;
end
else if(interRespCount==5)
begin
if((currMbHorTemp+1)<zeroExtend(picWidth))
begin
interTopValNext[4] = unpackedData;
if(!(currMbHorTemp>0 && currMbTemp-firstMb>zeroExtend(picWidth)))
noMoreResp = True;
end
else
begin
interTopLeftValNext[0] = unpackedData;
noMoreResp = True;
end
end
else
begin
interTopLeftValNext[0] = unpackedData;
noMoreResp = True;
end
interMemRespQ.deq();
//$display( "TRACE Prediction: interReceiveResp data %h",data);///////////////////////
if(!noMoreResp)
interRespCount <= interRespCount+1;
else
begin
interRespCount <= 0;
interStepCount <= 1;
interIPStepCount <= 1;
if(currMbHorTemp == 0)
begin
interLeftVal <= replicate(NotInter 0);
interTopLeftValNext = replicate(NotInter 0);
end
end
interTopVal <= interTopValNext;
interTopLeftVal <= interTopLeftValNext;
$display( "Trace Prediction: interReceiveResp %h %h %h", interstate, interRespCount, data);
endrule
 
 
rule interProcessStep ( interStepCount>0 && currMbHor<zeroExtend(picWidth) );
Bit#(PicAreaSz) currMbTemp = currMb+zeroExtend(interCurrMbDiff)-1;
Bit#(2) blockHor = {interMbPartNum[0],interSubMbPartNum[0]};
Bit#(2) blockVer = {interMbPartNum[1],interSubMbPartNum[1]};
Bit#(3) partWidth = 0;
Bit#(3) partHeight = 0;
Bit#(3) numPart = 1;
Bit#(3) numSubPart = 1;
Bit#(2) subMbType = 0;
Bool noBlockC = False;
Bool calcmv = False;
Bool leftmv = False;
if(interstate==InterPskip || interstate==InterP16x16)
begin
partWidth = 4;
partHeight = 4;
numPart = 1;
calcmv = (interMbPartNum==0 && interSubMbPartNum==0);
leftmv = (blockHor>0);
end
else if(interstate==InterP16x8)
begin
partWidth = 4;
partHeight = 2;
numPart = 2;
if(interMbPartNum==2)
noBlockC = True;
calcmv = (interMbPartNum[0]==0 && interSubMbPartNum==0);
leftmv = (blockHor>0);
end
else if(interstate==InterP8x16)
begin
partWidth = 2;
partHeight = 4;
numPart = 2;
calcmv = (interMbPartNum[1]==0 && interSubMbPartNum==0);
leftmv = !(blockVer>0);
end
else if(interstate==InterP8x8 || interstate==InterP8x8ref0)
begin
numPart = 4;
subMbType = interSubMbTypeVector[interMbPartNum];
numSubPart = numSubMbPart(subMbType);
case(subMbType)
0:
begin
partWidth = 2;
partHeight = 2;
if(interMbPartNum==3)
noBlockC = True;
calcmv = (interSubMbPartNum==0);
leftmv = (blockHor[0]>0);
end
1:
begin
partWidth = 2;
partHeight = 1;
if(interSubMbPartNum==2)
noBlockC = True;
calcmv = (interSubMbPartNum[0]==0);
leftmv = True;
end
2:
begin
partWidth = 1;
partHeight = 2;
calcmv = (interSubMbPartNum[1]==0);
leftmv = False;
end
3:
begin
partWidth = 1;
partHeight = 1;
if(interSubMbPartNum==3)
noBlockC = True;
calcmv = True;
end
endcase
end
else
$display( "ERROR Prediction: interProcessStep unexpected interstate");
Bit#(4) refIndex = ((interstate==InterPskip||interstate==InterP8x8ref0) ? 0 : interRefIdxVector[interMbPartNum]);
Vector#(3,InterBlockMv) blockABC = replicate(NotInter 0);
if( currMbTemp-firstMb==0 && blockHor==0 )
blockABC[0] = (NotInter 0);
else
blockABC[0] = interLeftVal[blockVer];
if( currMbTemp-firstMb<zeroExtend(picWidth) && blockVer==0 )
blockABC[1] = (NotInter 0);
else
blockABC[1] = interTopVal[blockHor];
blockABC[2] = interTopVal[{1'b0,blockHor}+partWidth];
if(noBlockC || blockABC[2]==(NotInter 0))
blockABC[2] = interTopLeftVal[blockVer];
Bit#(14) mvhorfinal = 0;
Bit#(12) mvverfinal = 0;
Bit#(5) interNewestMvNext = 0;
if(calcmv)//motion vector caculation
begin
Vector#(3,Int#(14)) mvhorABC = replicate(0);
Vector#(3,Int#(12)) mvverABC = replicate(0);
Bit#(2) validCount = 0;
Bit#(14) mvhorPred = 0;
Bit#(12) mvverPred = 0;
for(Integer ii=0; ii<3; ii=ii+1)
begin
if(blockABC[ii] matches tagged BlockMv .xdata)
begin
mvhorABC[ii] = unpack(xdata.mvhor);
mvverABC[ii] = unpack(xdata.mvver);
if(xdata.refIdx == refIndex)
begin
validCount = validCount+1;
mvhorPred = xdata.mvhor;
mvverPred = xdata.mvver;
end
end
else
begin
mvhorABC[ii] = 0;
mvverABC[ii] = 0;
end
end
if(validCount != 1)//median
begin
if(mvhorABC[0]>mvhorABC[1] && mvhorABC[0]>mvhorABC[2])
mvhorPred = pack((mvhorABC[1]>mvhorABC[2]) ? mvhorABC[1] : mvhorABC[2]);
else if(mvhorABC[0]<mvhorABC[1] && mvhorABC[0]<mvhorABC[2])
mvhorPred = pack((mvhorABC[1]<mvhorABC[2]) ? mvhorABC[1] : mvhorABC[2]);
else
mvhorPred = pack(mvhorABC[0]);
if(mvverABC[0]>mvverABC[1] && mvverABC[0]>mvverABC[2])
mvverPred = pack((mvverABC[1]>mvverABC[2]) ? mvverABC[1] : mvverABC[2]);
else if(mvverABC[0]<mvverABC[1] && mvverABC[0]<mvverABC[2])
mvverPred = pack((mvverABC[1]<mvverABC[2]) ? mvverABC[1] : mvverABC[2]);
else
mvverPred = pack(mvverABC[0]);
end
if(interstate==InterPskip)
begin
for(Integer ii=0; ii<2; ii=ii+1)
begin
if(blockABC[ii] matches tagged BlockMv .xdata)
begin
if(xdata.refIdx==0 && xdata.mvhor==0 && xdata.mvver==0)
begin
mvhorPred = 0;
mvverPred = 0;
end
end
else if(blockABC[ii] matches tagged NotInter 0)
begin
mvhorPred = 0;
mvverPred = 0;
end
end
end
else if(interstate==InterP16x8 || interstate==InterP8x16)
begin
InterBlockMv blockCheck;
if(interstate==InterP16x8)
begin
if(interMbPartNum==0)
blockCheck = blockABC[1];
else
blockCheck = blockABC[0];
end
else
begin
if(interMbPartNum==0)
blockCheck = blockABC[0];
else
blockCheck = blockABC[2];
end
if(blockCheck matches tagged BlockMv .xdata &&& xdata.refIdx==refIndex)
begin
mvhorPred = xdata.mvhor;
mvverPred = xdata.mvver;
end
end
mvhorfinal = mvhorPred;
mvverfinal = mvverPred;
if(interstate!=InterPskip)
begin
mvhorfinal = truncate(tpl_1(interMvDiff.first()) + signExtend(mvhorPred));
mvverfinal = truncate(tpl_2(interMvDiff.first()) + signExtend(mvverPred));
interMvDiff.deq();
end
interMvFile.upd({interMbPartNum,interSubMbPartNum},tuple2(mvhorfinal,mvverfinal));
interNewestMvNext = zeroExtend({interMbPartNum,interSubMbPartNum})+1;
$display( "Trace Prediction: interProcessStep %h %h %h %h %h %h %h %h %h", interstate, interStepCount, interMbPartNum, interSubMbPartNum, pack(blockABC[0]), pack(blockABC[1]), pack(blockABC[2]), mvhorPred, mvverPred);
end
else
begin
if(leftmv)
begin
if(blockABC[0] matches tagged BlockMv .xdata)
begin
mvhorfinal = unpack(xdata.mvhor);
mvverfinal = unpack(xdata.mvver);
end
else
$display( "ERROR Prediction: interProcessStep unexpected blockABC[0]");
end
else
begin
if(blockABC[1] matches tagged BlockMv .xdata)
begin
mvhorfinal = unpack(xdata.mvhor);
mvverfinal = unpack(xdata.mvver);
end
else
$display( "ERROR Prediction: interProcessStep unexpected blockABC[1]");
end
end
Bit#(2) tempBShor = 0;//bS calculation
Bit#(2) tempBSver = 0;
if(interLeftVal[blockVer] matches tagged BlockMv .xdata)
begin
if(xdata.nonZeroTransCoeff == 1)
tempBShor = 2;
else
begin
if(xdata.refIdx!=refIndex || absDiffGEFour14(mvhorfinal,xdata.mvhor) || absDiffGEFour12(mvverfinal,xdata.mvver))
tempBShor = 1;
else
tempBShor = 0;
end
end
else
tempBShor = 3;
if(interTopVal[blockHor] matches tagged BlockMv .xdata)
begin
if(xdata.nonZeroTransCoeff == 1)
tempBSver = 2;
else
begin
if(xdata.refIdx!=refIndex || absDiffGEFour14(mvhorfinal,xdata.mvhor) || absDiffGEFour12(mvverfinal,xdata.mvver))
tempBSver = 1;
else
tempBSver = 0;
end
end
else
tempBSver = 3;
interBSfifo.enq(tuple2(tempBShor,tempBSver));
Vector#(5,InterBlockMv) interTopValNext = interTopVal;//update inter*Val
Vector#(4,InterBlockMv) interLeftValNext = interLeftVal;
Vector#(4,InterBlockMv) interTopLeftValNext = interTopLeftVal;
interLeftValNext[blockVer] = (BlockMv {refIdx:refIndex,mvhor:mvhorfinal,mvver:mvverfinal,nonZeroTransCoeff:0});
interTopValNext[blockHor] = (BlockMv {refIdx:refIndex,mvhor:mvhorfinal,mvver:mvverfinal,nonZeroTransCoeff:0});
interTopLeftValNext[blockVer] = interTopVal[blockHor];
interTopVal <= interTopValNext;
interLeftVal <= interLeftValNext;
interTopLeftVal <= interTopLeftValNext;
if(blockVer == 3)
interOutBlockMvfifo.enq(BlockMv {refIdx:refIndex,mvhor:mvhorfinal,mvver:mvverfinal,nonZeroTransCoeff:0});
if(interSubMbPartNum == 3)//next step
begin
interSubMbPartNum <= 0;
if(interMbPartNum == 3)
begin
interMbPartNum <= 0;
interStepCount <= 0;
interNewestMvNext = 16;
end
else
interMbPartNum <= interMbPartNum+1;
end
else
interSubMbPartNum <= interSubMbPartNum+1;
if(interNewestMvNext > 0)
interNewestMv <= interNewestMvNext;
endrule
 
 
rule interIPProcessStep ( interIPStepCount>0 && currMbHor<zeroExtend(picWidth) && interNewestMv>zeroExtend({interIPMbPartNum,interIPSubMbPartNum}) );
Bit#(PicAreaSz) currMbHorTemp = currMbHor+zeroExtend(interCurrMbDiff)-1;
Bit#(PicHeightSz) currMbVerTemp = currMbVer;
if( currMbHorTemp >= zeroExtend(picWidth) )
begin
currMbHorTemp = currMbHorTemp-zeroExtend(picWidth);
currMbVerTemp = currMbVerTemp+1;
end
Bit#(2) blockHor = {interIPMbPartNum[0],interIPSubMbPartNum[0]};
Bit#(2) blockVer = {interIPMbPartNum[1],interIPSubMbPartNum[1]};
Bit#(3) numPart = 1;
Bit#(3) numSubPart = 1;
Bit#(2) subMbType = 0;
if(interstate==InterPskip || interstate==InterP16x16)
numPart = 1;
else if(interstate==InterP16x8)
numPart = 2;
else if(interstate==InterP8x16)
numPart = 2;
else if(interstate==InterP8x8 || interstate==InterP8x8ref0)
begin
numPart = 4;
subMbType = interSubMbTypeVector[interIPMbPartNum];
numSubPart = numSubMbPart(subMbType);
end
else
$display( "ERROR Prediction: interIPProcessStep unexpected interstate");
Bit#(4) refIndex = ((interstate==InterPskip||interstate==InterP8x8ref0) ? 0 : interRefIdxVector[interIPMbPartNum]);
Bit#(PicWidthSz) currMbHorT = truncate(currMbHorTemp);
Bit#(TAdd#(PicWidthSz,2)) horTemp = {currMbHorT,blockHor};
Bit#(TAdd#(PicHeightSz,4)) verTemp = {currMbVerTemp,blockVer,2'b00};
IPBlockType btTemp = IP16x16;
if(interstate==InterPskip || interstate==InterP16x16)
btTemp = IP16x16;
else if(interstate==InterP16x8)
btTemp = IP16x8;
else if(interstate==InterP8x16)
btTemp = IP8x16;
else
begin
case(subMbType)
0: btTemp = IP8x8;
1: btTemp = IP8x4;
2: btTemp = IP4x8;
3: btTemp = IP4x4;
endcase
end
Bit#(14) mvhorTemp = tpl_1(interMvFile.sub({interIPMbPartNum,interIPSubMbPartNum}));
Bit#(12) mvverTemp = tpl_2(interMvFile.sub({interIPMbPartNum,interIPSubMbPartNum}));
if(interIPStepCount == 1)
begin
if(!(interstate==InterP8x8 || interstate==InterP8x8ref0))
begin
numPart = 4;
Bit#(2) interIPMbPartNumTemp = interIPMbPartNum;
if(btTemp==IP16x16)
interIPMbPartNumTemp = 0;
else if(btTemp==IP16x8 && interIPMbPartNumTemp[0]==1)
interIPMbPartNumTemp = interIPMbPartNumTemp-1;
else if(btTemp==IP8x16 && interIPMbPartNumTemp[1]==1)
interIPMbPartNumTemp = interIPMbPartNumTemp-2;
refIndex = ((interstate==InterPskip||interstate==InterP8x8ref0) ? 0 : interRefIdxVector[interIPMbPartNumTemp]);
btTemp = IP8x8;
mvhorTemp = tpl_1(interMvFile.sub({interIPMbPartNumTemp,2'b00}));
mvverTemp = tpl_2(interMvFile.sub({interIPMbPartNumTemp,2'b00}));
interpolator.request(IPLuma {refIdx:refIndex,hor:horTemp,ver:verTemp,mvhor:mvhorTemp,mvver:mvverTemp,bt:btTemp});
end
else
interpolator.request(IPLuma {refIdx:refIndex,hor:horTemp,ver:verTemp,mvhor:mvhorTemp,mvver:mvverTemp,bt:btTemp});
end
else
interpolator.request(IPChroma {refIdx:refIndex,uv:interIPStepCount[0],hor:horTemp,ver:truncate(verTemp>>1),mvhor:mvhorTemp,mvver:mvverTemp,bt:btTemp});
if(interIPSubMbPartNum >= truncate(numSubPart-1))
begin
interIPSubMbPartNum <= 0;
if(interIPMbPartNum >= truncate(numPart-1))
begin
interIPMbPartNum <= 0;
interIPStepCount <= interIPStepCount+1;
end
else
begin
if(btTemp == IP16x8)
interIPMbPartNum <= 2;
else
interIPMbPartNum <= interIPMbPartNum+1;
end
end
else
begin
if(subMbType == 1)
interIPSubMbPartNum <= 2;
else
interIPSubMbPartNum <= interIPSubMbPartNum+1;
end
$display( "Trace Prediction: interIPProcessStep %h %h %h %h %h %h %h %h %h %h", interstate, interIPStepCount, interIPMbPartNum, interIPSubMbPartNum, refIndex, horTemp, verTemp, mvhorTemp, mvverTemp, pack(btTemp));
endrule
 
 
rule interDone ( interstate!=Start && interReqCount==0 && interRespCount==0 && interStepCount==0 && interIPStepCount==0 );
interstate <= Start;
//$display( "Trace Prediction: interOutputTransfer %h %h", interstate, interOutputCount);
endrule
 
rule interOutputTransfer ( True );
predictedfifo.enq(interpolator.first());
interpolator.deq();
//$display( "Trace Prediction: interOutputTransfer %h %h", interstate, interOutputCount);
endrule
 
 
 
// intra prediction rules
 
rule intraSendReq ( intraReqCount>0 && currMbHor<zeroExtend(picWidth) && !nextoutputfifo.notEmpty() );
Bit#(PicWidthSz) temp2 = truncate(currMbHor);
Bit#(TAdd#(PicWidthSz,2)) temp = 0;
Bit#(1) noMoreReq = 0;
if( currMb-firstMb < zeroExtend(picWidth) )
noMoreReq = 1;
else
begin
if(intraReqCount<5)
begin
Bit#(2) temp3 = truncate(intraReqCount-1);
temp = {temp2,temp3};
end
else if(intraReqCount==5)
begin
if((currMbHor+1)<zeroExtend(picWidth) && intrastate==Intra4x4)
temp = {(temp2+1),2'b00};
else if(currMbHor>0 && currMb-firstMb>zeroExtend(picWidth))
temp = {(temp2-1),2'b11};
else
noMoreReq = 1;
end
else if(intraReqCount==6)
begin
if((currMbHor+1)<zeroExtend(picWidth) && intrastate==Intra4x4 && currMbHor>0 && currMb-firstMb>zeroExtend(picWidth))
temp = {(temp2-1),2'b11};
else
noMoreReq = 1;
end
else
noMoreReq = 1;
end
if(noMoreReq == 0)
begin
intraMemReqQ.enq(LoadReq temp);
intraReqCount <= intraReqCount+1;
//$display( "TRACE Prediction: intraSendReq addr %0d",temp);///////////////////////
end
else
intraReqCount <= 0;
$display( "Trace Prediction: intraSendReq");
endrule
 
 
rule intraReceiveNoResp ( intraRespCount>0 && currMbHor<zeroExtend(picWidth) && currMb-firstMb<zeroExtend(picWidth) );
intra4x4typeTop <= replicate(15);
intraRespCount <= 0;
intraStepCount <= 1;
blockNum <= 0;
pixelNum <= 0;
interOutBlockMvfifo.enq(NotInter 1);
$display( "Trace Prediction: intraReceiveNoResp");
endrule
 
rule intraReceiveResp ( intraRespCount>0 && intraRespCount<7 && currMbHor<zeroExtend(picWidth) &&& intraMemRespQ.first() matches tagged LoadResp .data);
Bit#(1) noMoreResp = 0;
Bit#(2) temp2bit = 0;
if(intraRespCount<5)
begin
temp2bit = truncate(intraRespCount-1);
intra4x4typeTop <= update(intra4x4typeTop, temp2bit, data[67:64]);
if(intraRespCount==4)
begin
Vector#(5,Bit#(32)) intraTopValTemp = intraTopVal;
intraTopValTemp[3] = data[31:0];
intraTopValTemp[4] = {data[31:24],data[31:24],data[31:24],data[31:24]};
intraTopVal <= intraTopValTemp;
if(!((currMbHor+1)<zeroExtend(picWidth) && intrastate==Intra4x4) && !(currMbHor>0 && currMb-firstMb>zeroExtend(picWidth)))
noMoreResp = 1;
end
else
intraTopVal <= update(intraTopVal, intraRespCount-1, data[31:0]);
intraTopValChroma0 <= update(intraTopValChroma0, temp2bit, data[47:32]);
intraTopValChroma1 <= update(intraTopValChroma1, temp2bit, data[63:48]);
end
else if(intraRespCount==5)
begin
if((currMbHor+1)<zeroExtend(picWidth) && intrastate==Intra4x4)
begin
if(!(data[67:64]==15 || (data[67:64]==14 && ppsconstrained_intra_pred_flag==1)))
intraTopVal <= update(intraTopVal, 4, data[31:0]);
if(!(currMbHor>0 && currMb-firstMb>zeroExtend(picWidth)))
noMoreResp = 1;
end
else
begin
Bit#(40) temp2 = intraLeftVal[0];
intraLeftVal <= update(intraLeftVal, 0, {temp2[39:8],data[31:24]});
intraLeftValChroma0 <= update(intraLeftValChroma0, 0, data[47:40]);
intraLeftValChroma1 <= update(intraLeftValChroma1, 0, data[63:56]);
noMoreResp = 1;
end
end
else
begin
Bit#(40) temp2 = intraLeftVal[0];
intraLeftVal <= update(intraLeftVal, 0, {temp2[39:8],data[31:24]});
intraLeftValChroma0 <= update(intraLeftValChroma0, 0, data[47:40]);
intraLeftValChroma1 <= update(intraLeftValChroma1, 0, data[63:56]);
noMoreResp = 1;
end
intraMemRespQ.deq();
//$display( "TRACE Prediction: intraReceiveResp data %h",data);///////////////////////
if(noMoreResp == 0)
intraRespCount <= intraRespCount+1;
else
begin
intraRespCount <= 0;
intraStepCount <= 1;
blockNum <= 0;
pixelNum <= 0;
interOutBlockMvfifo.enq(NotInter 1);
end
$display( "Trace Prediction: intraReceiveResp");
endrule
 
rule intraPredTypeStep ( intraStepCount==1 && !nextoutputfifo.notEmpty());
Bit#(2) blockHor = {blockNum[2],blockNum[0]};
Bit#(2) blockVer = {blockNum[3],blockNum[1]};
Bit#(4) topType = select(intra4x4typeTop, blockHor);
Bit#(4) leftType;
if(currMbHor!=0 || blockNum!=0)
leftType = select(intra4x4typeLeft, blockVer);
else
begin
leftType = 15;
intra4x4typeLeft <= replicate(15);
end
if(intrastate!=Intra4x4)
begin
intraStepCount <= intraStepCount+1;
nextoutputfifo.enq(NonSkipMB);
end
else
begin
Bit#(1) topAvailable;
Bit#(1) leftAvailable;
if(topType==15 || (topType==14 && ppsconstrained_intra_pred_flag==1))
topAvailable = 0;
else
topAvailable = 1;
if(leftType==15 || (leftType==14 && ppsconstrained_intra_pred_flag==1))
leftAvailable = 0;
else
leftAvailable = 1;
Bit#(4) predType = 0;
Bit#(4) remType = rem_intra4x4_pred_mode.first();
Bit#(4) curType = 0;
rem_intra4x4_pred_mode.deq();
if(topAvailable==0 || leftAvailable==0)
predType = 2;
else
begin
Bit#(4) topType2 = topType;
Bit#(4) leftType2 = leftType;
if(topType>8)
topType2 = 2;
if(leftType>8)
leftType2 = 2;
if(topType2 > leftType2)
predType = leftType2;
else
predType = topType2;
end
if(remType[3] == 1)
curType = predType;
else if(remType < predType)
curType = remType;
else
curType = remType+1;
cur_intra4x4_pred_mode <= curType;
intraStepCount <= intraStepCount+1;
if(blockNum == 15)
nextoutputfifo.enq(Intra4x4PlusChroma);
else
nextoutputfifo.enq(Intra4x4);
$display( "TRACE Prediction: intraPredTypeStep currMbHor currMbVer blockNum topType leftType predType remType curType %0d %0d %0d %0d %0d %0d %0d %0d",currMbHor,currMbVer,blockNum,topType,leftType,predType,remType,curType);//////////////////
end
//$display( "Trace Prediction: intraPredTypeStep");
endrule
 
 
rule intraProcessStep ( intraStepCount>1 );
$display( "TRACE Prediction: intraProcessStep %0d %0d", blockNum, pixelNum);////////////////////
//$display( "TRACE Prediction: intraProcessStep intraTopVal %h %h %h %h %h",intraTopVal[4],intraTopVal[3],intraTopVal[2],intraTopVal[1],intraTopVal[0]);/////////////////
Bit#(1) outFlag = 0;
Bit#(4) nextIntraStepCount = intraStepCount+1;
Bit#(2) blockHor = {blockNum[2],blockNum[0]};
Bit#(2) blockVer = {blockNum[3],blockNum[1]};
Bit#(2) pixelHor = {pixelNum[1],pixelNum[0]};
Bit#(2) pixelVer = {pixelNum[3],pixelNum[2]};
Vector#(4,Bit#(8)) predVector = intraPredVector;
 
Bit#(4) topType = select(intra4x4typeTop, blockHor);
Bit#(4) leftType = select(intra4x4typeLeft, blockVer);
Bit#(1) topAvailable;
Bit#(1) leftAvailable;
if(topType==15 || (topType==14 && ppsconstrained_intra_pred_flag==1))
topAvailable = 0;
else
topAvailable = 1;
if(leftType==15 || (leftType==14 && ppsconstrained_intra_pred_flag==1))
leftAvailable = 0;
else
leftAvailable = 1;
if(blockNum==0 && pixelNum==0 && intraChromaFlag==0)
begin
intraChromaTopAvailable <= topAvailable;
intraChromaLeftAvailable <= leftAvailable;
end
if(intrastate==Intra4x4 && intraChromaFlag==0)
begin
if(intraStepCount==2)
begin
outFlag = 1;
Bit#(40) leftValSet = select(intraLeftVal,blockVer);
Bit#(32) topMidValSet = select(intraTopVal,zeroExtend(blockHor));
Bit#(32) topRightValSet = select(intraTopVal,{1'b0,blockHor}+1);
Bit#(72) topValSet;
if((blockNum[3:2]==3 && blockNum[0]==1) || blockNum[1:0]==3)
topValSet = {topMidValSet[31:24],topMidValSet[31:24],topMidValSet[31:24],topMidValSet[31:24],topMidValSet,leftValSet[7:0]};
else
topValSet = {topRightValSet,topMidValSet,leftValSet[7:0]};
//$display( "TRACE Prediction: intraProcessStep intra4x4 %0d %0d %h %h", cur_intra4x4_pred_mode, blockNum, leftValSet, topValSet);////////////////////
Bit#(4) topSelect1 = 0;
Bit#(4) topSelect2 = 0;
Bit#(4) topSelect3 = 0;
Bit#(3) leftSelect1 = 0;
Bit#(3) leftSelect2 = 0;
Bit#(3) leftSelect3 = 0;
Bit#(10) tempVal1 = 0;
Bit#(10) tempVal2 = 0;
Bit#(10) tempVal3 = 0;
case(cur_intra4x4_pred_mode)
0://vertical
begin
topSelect1 = zeroExtend(pixelHor);
Bit#(8) topVal = intra4x4SelectTop(topValSet,topSelect1);
predVector[pixelHor] = topVal;
end
1://horizontal
begin
leftSelect1 = zeroExtend(pixelVer);
Bit#(8) leftVal = intra4x4SelectLeft(leftValSet,leftSelect1);
predVector[pixelHor] = leftVal;
end
2://dc
begin
Bit#(10) tempTopSum = zeroExtend(topValSet[15:8])+zeroExtend(topValSet[23:16])+zeroExtend(topValSet[31:24])+zeroExtend(topValSet[39:32]) + 2;
Bit#(10) tempLeftSum = zeroExtend(leftValSet[15:8])+zeroExtend(leftValSet[23:16])+zeroExtend(leftValSet[31:24])+zeroExtend(leftValSet[39:32]) + 2;
Bit#(11) tempTotalSum = zeroExtend(tempTopSum)+zeroExtend(tempLeftSum);
Bit#(8) topSum = tempTopSum[9:2];
Bit#(8) leftSum = tempLeftSum[9:2];
Bit#(8) totalSum = tempTotalSum[10:3];
if(topAvailable==1 && leftAvailable==1)
predVector[pixelHor] = totalSum;
else if(topAvailable==1)
predVector[pixelHor] = topSum;
else if(leftAvailable==1)
predVector[pixelHor] = leftSum;
else
predVector[pixelHor] = 8'b10000000;
end
3://diagonal down left
begin
Bit#(4) selectNum = zeroExtend(pixelHor)+zeroExtend(pixelVer);
if(pixelHor==3 && pixelVer==3)
begin
topSelect1 = 6;
topSelect2 = 7;
topSelect3 = 7;
end
else
begin
topSelect1 = selectNum;
topSelect2 = selectNum+1;
topSelect3 = selectNum+2;
end
tempVal1 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1));
tempVal2 = zeroExtend(intra4x4SelectTop(topValSet,topSelect2));
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect3));
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2;
predVector[pixelHor] = predVal[9:2];
end
4://diagonal down right
begin
if(pixelHor > pixelVer)
begin
topSelect3 = zeroExtend(pixelHor)-zeroExtend(pixelVer);
topSelect2 = topSelect3-1;
topSelect1 = topSelect3-2;
tempVal1 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1));
tempVal2 = zeroExtend(intra4x4SelectTop(topValSet,topSelect2));
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect3));
end
else if(pixelHor < pixelVer)
begin
leftSelect3 = zeroExtend(pixelVer)-zeroExtend(pixelHor);
leftSelect2 = leftSelect3-1;
leftSelect1 = leftSelect3-2;
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1));
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2));
tempVal3 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect3));
end
else
begin
leftSelect1 = 0;
leftSelect2 = -1;
topSelect1 = 0;
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1));
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2));
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1));
end
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2;
predVector[pixelHor] = predVal[9:2];
end
5://vertical right
begin
Bit#(4) tempPixelHor = zeroExtend(pixelHor);
Bit#(4) zVR = (tempPixelHor<<1)-zeroExtend(pixelVer);
if(zVR<=6 && zVR>=0)
begin
topSelect3 = zeroExtend(pixelHor)-zeroExtend(pixelVer>>1);
topSelect2 = topSelect3-1;
if(zVR==1 || zVR==3 || zVR==5)
topSelect1 = topSelect3-2;
else
topSelect1 = topSelect3;
tempVal1 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1));
tempVal2 = zeroExtend(intra4x4SelectTop(topValSet,topSelect2));
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect3));
end
else if(zVR==-1)
begin
leftSelect1 = 0;
leftSelect2 = -1;
topSelect1 = 0;
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1));
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2));
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1));
end
else
begin
leftSelect1 = zeroExtend(pixelVer)-1;
leftSelect2 = leftSelect1-1;
leftSelect3 = leftSelect1-2;
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1));
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2));
tempVal3 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect3));
end
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2;
predVector[pixelHor] = predVal[9:2];
end
6://horizontal down
begin
Bit#(4) tempPixelVer = zeroExtend(pixelVer);
Bit#(4) zHD = (tempPixelVer<<1)-zeroExtend(pixelHor);
if(zHD<=6 && zHD>=0)
begin
leftSelect3 = zeroExtend(pixelVer)-zeroExtend(pixelHor>>1);
leftSelect2 = leftSelect3-1;
if(zHD==1 || zHD==3 || zHD==5)
leftSelect1 = leftSelect3-2;
else
leftSelect1 = leftSelect3;
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1));
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2));
tempVal3 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect3));
end
else if(zHD==-1)
begin
leftSelect1 = 0;
leftSelect2 = -1;
topSelect1 = 0;
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1));
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2));
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1));
end
else
begin
topSelect1 = zeroExtend(pixelHor)-1;
topSelect2 = topSelect1-1;
topSelect3 = topSelect1-2;
tempVal1 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1));
tempVal2 = zeroExtend(intra4x4SelectTop(topValSet,topSelect2));
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect3));
end
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2;
predVector[pixelHor] = predVal[9:2];
end
7://vertical left
begin
topSelect1 = zeroExtend(pixelHor)+zeroExtend(pixelVer>>1);
topSelect2 = topSelect1+1;
if(pixelVer==1 || pixelVer==3)
topSelect3 = topSelect1+2;
else
topSelect3 = topSelect1;
tempVal1 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1));
tempVal2 = zeroExtend(intra4x4SelectTop(topValSet,topSelect2));
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect3));
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2;
predVector[pixelHor] = predVal[9:2];
end
8://horizontal up
begin
Bit#(4) tempPixelVer = zeroExtend(pixelVer);
Bit#(4) zHU = (tempPixelVer<<1)+zeroExtend(pixelHor);
if(zHU<=4)
begin
leftSelect1 = zeroExtend(pixelVer)+zeroExtend(pixelHor>>1);
leftSelect2 = leftSelect1+1;
if(zHU==1 || zHU==3)
leftSelect3 = leftSelect1+2;
else
leftSelect3 = leftSelect1;
end
else
begin
if(zHU==5)
leftSelect1 = 2;
else
leftSelect1 = 3;
leftSelect2 = 3;
leftSelect3 = 3;
end
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1));
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2));
tempVal3 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect3));
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2;
predVector[pixelHor] = predVal[9:2];
end
default: $display( "ERROR Prediction: intraProcessStep intra4x4 unknown cur_intra4x4_pred_mode");
endcase
end
else
$display( "ERROR Prediction: intraProcessStep intra4x4 unknown intraStepCount");
end
else if(intrastate==Intra16x16 && intraChromaFlag==0)
begin
//$display( "TRACE Prediction: intraProcessStep intra16x16 %0d %0d %0d %h", intra16x16_pred_mode, currMb, blockNum, select(intraTopVal,blockHor));/////////////////
case(intra16x16_pred_mode)
0://vertical
begin
Bit#(32) topValSet = select(intraTopVal,blockHor);
Bit#(8) topVal = select32to8(topValSet,pixelHor);
predVector[pixelHor] = topVal;
outFlag = 1;
end
1://horizontal
begin
Bit#(40) leftValSet = select(intraLeftVal,blockVer);
Bit#(8) leftVal = intra4x4SelectLeft(leftValSet,zeroExtend(pixelVer));
predVector[pixelHor] = leftVal;
outFlag = 1;
end
2://dc
begin
case(intraStepCount)
2:
begin
if(topAvailable == 1)
begin
Bit#(32) topValSet = select(intraTopVal,0);
intraSumA <= zeroExtend(topValSet[7:0])+zeroExtend(topValSet[15:8])+zeroExtend(topValSet[23:16])+zeroExtend(topValSet[31:24]);
end
else
begin
intraSumA <= 0;
nextIntraStepCount = 6;
end
end
3:
begin
Bit#(32) topValSet = select(intraTopVal,1);
intraSumA <= intraSumA+zeroExtend(topValSet[7:0])+zeroExtend(topValSet[15:8])+zeroExtend(topValSet[23:16])+zeroExtend(topValSet[31:24]);
end
4:
begin
Bit#(32) topValSet = select(intraTopVal,2);
intraSumA <= intraSumA+zeroExtend(topValSet[7:0])+zeroExtend(topValSet[15:8])+zeroExtend(topValSet[23:16])+zeroExtend(topValSet[31:24]);
end
5:
begin
Bit#(32) topValSet = select(intraTopVal,3);
intraSumA <= intraSumA+zeroExtend(topValSet[7:0])+zeroExtend(topValSet[15:8])+zeroExtend(topValSet[23:16])+zeroExtend(topValSet[31:24])+8;
end
6:
begin
if(leftAvailable == 1)
begin
Bit#(40) leftValSet = select(intraLeftVal,0);
intraSumA <= intraSumA+zeroExtend(leftValSet[15:8])+zeroExtend(leftValSet[23:16])+zeroExtend(leftValSet[31:24])+zeroExtend(leftValSet[39:32]);
end
else
nextIntraStepCount = 10;
end
7:
begin
Bit#(40) leftValSet = select(intraLeftVal,1);
intraSumA <= intraSumA+zeroExtend(leftValSet[15:8])+zeroExtend(leftValSet[23:16])+zeroExtend(leftValSet[31:24])+zeroExtend(leftValSet[39:32]);
end
8:
begin
Bit#(40) leftValSet = select(intraLeftVal,2);
intraSumA <= intraSumA+zeroExtend(leftValSet[15:8])+zeroExtend(leftValSet[23:16])+zeroExtend(leftValSet[31:24])+zeroExtend(leftValSet[39:32]);
end
9:
begin
Bit#(40) leftValSet = select(intraLeftVal,3);
intraSumA <= intraSumA+zeroExtend(leftValSet[15:8])+zeroExtend(leftValSet[23:16])+zeroExtend(leftValSet[31:24])+zeroExtend(leftValSet[39:32])+8;
end
10:
begin
if(leftAvailable == 1 && topAvailable == 1)
intraSumA <= intraSumA >> 5;
else if(leftAvailable == 1 || topAvailable == 1)
intraSumA <= intraSumA >> 4;
else
intraSumA <= 128;
end
11:
begin
predVector[pixelHor] = intraSumA[7:0];
outFlag = 1;
end
default: $display( "ERROR Prediction: intraProcessStep intra16x16 DC unknown intraStepCount");
endcase
end
3://plane
begin
if(intraStepCount == 2)
begin
Bit#(32) topValSet = select(intraTopVal,3);
Bit#(8) topVal = select32to8(topValSet,3);
Bit#(40) leftValSet = select(intraLeftVal,3);
Bit#(8) leftVal = intra4x4SelectLeft(leftValSet,3);
Bit#(13) tempVal = zeroExtend(topVal) + zeroExtend(leftVal);
intraSumA <= tempVal << 4;
intraSumB <= 0;
intraSumC <= 0;
end
else if(intraStepCount < 11)
begin
Bit#(4) xyPlusOne = intraStepCount-2;
Bit#(4) xyPlusEight = intraStepCount+5;
Bit#(4) sixMinusXY = 9-intraStepCount;
Bit#(32) topValSet1 = select(intraTopVal,xyPlusEight[3:2]);
Bit#(8) topVal1 = select32to8(topValSet1,xyPlusEight[1:0]);
Bit#(40) leftValSet1 = select(intraLeftVal,xyPlusEight[3:2]);
Bit#(8) leftVal1 = intra4x4SelectLeft(leftValSet1,zeroExtend(xyPlusEight[1:0]));
Bit#(32) topValSet2=0;
Bit#(8) topVal2;
Bit#(40) leftValSet2;
Bit#(8) leftVal2;
if(intraStepCount==10)
begin
leftValSet2 = select(intraLeftVal,0);
leftVal2 = intra4x4SelectLeft(leftValSet2,-1);
topVal2 = leftVal2;
end
else
begin
topValSet2 = select(intraTopVal,sixMinusXY[3:2]);
topVal2 = select32to8(topValSet2,sixMinusXY[1:0]);
leftValSet2 = select(intraLeftVal,sixMinusXY[3:2]);
leftVal2 = intra4x4SelectLeft(leftValSet2,zeroExtend(sixMinusXY[1:0]));
end
Bit#(15) diffH = zeroExtend(topVal1) - zeroExtend(topVal2);
Bit#(15) diffV = zeroExtend(leftVal1) - zeroExtend(leftVal2);
intraSumB <= intraSumB + (zeroExtend(xyPlusOne) * diffH);
intraSumC <= intraSumC + (zeroExtend(xyPlusOne) * diffV);
end
else if(intraStepCount == 11)
begin
Bit#(18) tempSumB = (5*signExtend(intraSumB)) + 32;
Bit#(18) tempSumC = (5*signExtend(intraSumC)) + 32;
intraSumB <= signExtend(tempSumB[17:6]);
intraSumC <= signExtend(tempSumC[17:6]);
end
else if(intraStepCount == 12)
begin
Bit#(5) positionHor = {1'b0,blockHor,pixelHor};
Bit#(5) positionVer = {1'b0,blockVer,pixelVer};
Bit#(16) tempProductB = signExtend(intraSumB) * signExtend(positionHor-7);
Bit#(16) tempProductC = signExtend(intraSumC) * signExtend(positionVer-7);
Bit#(16) tempTotal = tempProductB + tempProductC + zeroExtend(intraSumA) + 16;
if(tempTotal[15]==1)
predVector[pixelHor] = 0;
else if(tempTotal[14:5] > 255)
predVector[pixelHor] = 255;
else
predVector[pixelHor] = tempTotal[12:5];
outFlag = 1;
end
else
$display( "ERROR Prediction: intraProcessStep intra16x16 plane unknown intraStepCount");
end
endcase
end
else if(intraChromaFlag==1)
begin
//$display( "TRACE Prediction: intraProcessStep intraChroma %0d %0d %0d %0d %0d %0d %h %h %h %h %h %h %h %h",intra_chroma_pred_mode.first(),intraChromaTopAvailable,intraChromaLeftAvailable,currMb,blockNum,pixelNum,pack(intraLeftValChroma0),pack(intraTopValChroma0),pack(intraLeftValChroma1),pack(intraTopValChroma1),intraLeftValChroma0[0],intraTopValChroma0[3][15:8],intraLeftValChroma1[0],intraTopValChroma1[3][15:8]);///////////////////
Vector#(9,Bit#(8)) tempLeftVec;
Vector#(4,Bit#(16)) tempTopVec;
if(blockNum[2] == 0)
begin
tempLeftVec = intraLeftValChroma0;
tempTopVec = intraTopValChroma0;
end
else
begin
tempLeftVec = intraLeftValChroma1;
tempTopVec = intraTopValChroma1;
end
case(intra_chroma_pred_mode.first())
0://dc
begin
if(intraStepCount == 2)
begin
Bit#(1) useTop=0;
Bit#(1) useLeft=0;
if(blockNum[1:0] == 0 || blockNum[1:0] == 3)
begin
useTop = intraChromaTopAvailable;
useLeft = intraChromaLeftAvailable;
end
else if(blockNum[1:0] == 1)
begin
if(intraChromaTopAvailable == 1)
useTop = 1;
else if(intraChromaLeftAvailable == 1)
useLeft = 1;
end
else if(blockNum[1:0] == 2)
begin
if(intraChromaLeftAvailable == 1)
useLeft = 1;
else if(intraChromaTopAvailable == 1)
useTop = 1;
end
else
$display( "ERROR Prediction: intraProcessStep intraChroma dc unknown blockNum");
Bit#(10) topSum;
Bit#(10) leftSum;
Bit#(11) totalSum;
if(blockHor[0] == 0)
topSum = zeroExtend(tempTopVec[0][15:8])+zeroExtend(tempTopVec[0][7:0])+zeroExtend(tempTopVec[1][15:8])+zeroExtend(tempTopVec[1][7:0])+2;
else
topSum = zeroExtend(tempTopVec[2][15:8])+zeroExtend(tempTopVec[2][7:0])+zeroExtend(tempTopVec[3][15:8])+zeroExtend(tempTopVec[3][7:0])+2;
if(blockVer[0] == 0)
leftSum = zeroExtend(tempLeftVec[1])+zeroExtend(tempLeftVec[2])+zeroExtend(tempLeftVec[3])+zeroExtend(tempLeftVec[4])+2;
else
leftSum = zeroExtend(tempLeftVec[5])+zeroExtend(tempLeftVec[6])+zeroExtend(tempLeftVec[7])+zeroExtend(tempLeftVec[8])+2;
totalSum = zeroExtend(topSum) + zeroExtend(leftSum);
if(useTop==1 && useLeft==1)
intraSumA <= zeroExtend(totalSum[10:3]);
else if(useTop==1)
intraSumA <= zeroExtend(topSum[9:2]);
else if(useLeft==1)
intraSumA <= zeroExtend(leftSum[9:2]);
else
intraSumA <= zeroExtend(8'b10000000);
end
else if(intraStepCount == 3)
begin
predVector[pixelHor] = intraSumA[7:0];
outFlag = 1;
end
else
$display( "ERROR Prediction: intraProcessStep intraChroma dc unknown intraStepCount");
end
1://horizontal
begin
Bit#(4) tempLeftIdx = {1'b0,blockVer[0],pixelVer} + 1;
predVector[pixelHor] = select(tempLeftVec,tempLeftIdx);
outFlag = 1;
end
2://vertical
begin
Bit#(16) tempTopVal = select(tempTopVec,{blockHor[0],pixelHor[1]});
if(pixelHor[0] == 0)
predVector[pixelHor] = tempTopVal[7:0];
else
predVector[pixelHor] = tempTopVal[15:8];
outFlag = 1;
end
3://plane
begin
if(intraStepCount == 2)
begin
Bit#(16) topValSet = tempTopVec[3];
Bit#(8) topVal = topValSet[15:8];
Bit#(8) leftVal = tempLeftVec[8];
Bit#(13) tempVal = zeroExtend(topVal) + zeroExtend(leftVal);
intraSumA <= tempVal << 4;
intraSumB <= 0;
intraSumC <= 0;
end
else if(intraStepCount < 7)
begin
Bit#(3) xyPlusOne = truncate(intraStepCount)-2;
Bit#(3) xyPlusFour = truncate(intraStepCount)+1;
Bit#(4) twoMinusXY = 5-intraStepCount;
Bit#(16) topValSet1 = select(tempTopVec,xyPlusFour[2:1]);
Bit#(8) topVal1 = select16to8(topValSet1,xyPlusFour[0]);
Bit#(4) tempLeftIdx1 = {1'b0,xyPlusFour} + 1;
Bit#(8) leftVal1 = select(tempLeftVec,tempLeftIdx1);
Bit#(16) topValSet2 = select(tempTopVec,twoMinusXY[2:1]);
Bit#(8) topVal2;
Bit#(8) leftVal2 = select(tempLeftVec,twoMinusXY+1);
if(intraStepCount==6)
topVal2 = leftVal2;
else
topVal2 = select16to8(topValSet2,twoMinusXY[0]);
Bit#(15) diffH = zeroExtend(topVal1) - zeroExtend(topVal2);
Bit#(15) diffV = zeroExtend(leftVal1) - zeroExtend(leftVal2);
intraSumB <= intraSumB + (zeroExtend(xyPlusOne) * diffH);
intraSumC <= intraSumC + (zeroExtend(xyPlusOne) * diffV);
Int#(15) tempDisplayH = unpack(zeroExtend(xyPlusOne) * diffH);
Int#(15) tempDisplayV = unpack(zeroExtend(xyPlusOne) * diffV);
//$display( "TRACE Prediction: intraProcessStep intraChroma plane partH partV %0d %0d",tempDisplayH,tempDisplayV);////////////////////
end
else if(intraStepCount == 7)
begin
Int#(15) tempDisplayH = unpack(intraSumB);
Int#(15) tempDisplayV = unpack(intraSumC);
//$display( "TRACE Prediction: intraProcessStep intraChroma plane H V %0d %0d",tempDisplayH,tempDisplayV);////////////////////
Bit#(19) tempSumB = (34*signExtend(intraSumB)) + 32;
Bit#(19) tempSumC = (34*signExtend(intraSumC)) + 32;
intraSumB <= signExtend(tempSumB[18:6]);
intraSumC <= signExtend(tempSumC[18:6]);
end
else if(intraStepCount == 8)
begin
Bit#(4) positionHor = {1'b0,blockHor[0],pixelHor};
Bit#(4) positionVer = {1'b0,blockVer[0],pixelVer};
Bit#(17) tempProductB = signExtend(intraSumB) * signExtend(positionHor-3);
Bit#(17) tempProductC = signExtend(intraSumC) * signExtend(positionVer-3);
Bit#(17) tempTotal = tempProductB + tempProductC + zeroExtend(intraSumA) + 16;
if(tempTotal[16]==1)
predVector[pixelHor] = 0;
else if(tempTotal[15:5] > 255)
predVector[pixelHor] = 255;
else
predVector[pixelHor] = tempTotal[12:5];
outFlag = 1;
end
else
$display( "ERROR Prediction: intraProcessStep intraChroma plane unknown intraStepCount");
end
endcase
end
else
$display( "ERROR Prediction: intraProcessStep unknown intrastate");
 
intraPredVector <= predVector;
if(pixelHor == 3)
predictedfifo.enq(predVector);
if(outFlag==1)
begin
pixelNum <= pixelNum+1;
if(pixelNum == 15)
begin
if(intraChromaFlag==0)
begin
blockNum <= blockNum+1;
if(blockNum == 15)
begin
intraChromaFlag <= 1;
intraStepCount <= 2;
end
else if(intrastate==Intra4x4)
intraStepCount <= 1;
end
else
begin
if(blockNum == 7)
begin
blockNum <= 0;
intraChromaFlag <= 0;
intraStepCount <= 0;
intra_chroma_pred_mode.deq();
end
else
begin
blockNum <= blockNum+1;
if(intra_chroma_pred_mode.first()==0)
intraStepCount <= 2;
else if(blockNum==3)
intraStepCount <= 2;
end
end
end
end
else
intraStepCount <= nextIntraStepCount;
//$display( "Trace Prediction: intraProcessStep");
endrule
interface Client mem_client_intra;
interface Get request = fifoToGet(intraMemReqQ);
interface Put response = fifoToPut(intraMemRespQ);
endinterface
interface Client mem_client_inter;
interface Get request = fifoToGet(interMemReqQ);
interface Put response = fifoToPut(interMemRespQ);
endinterface
interface Client mem_client_buffer = interpolator.mem_client;
 
interface Put ioin = fifoToPut(infifo);
interface Put ioin_InverseTrans = fifoToPut(infifo_ITB);
interface Get ioout = fifoToGet(outfifo);
 
endmodule
 
endpackage
/trunk/src/mkInputGen_akiyo.bsv
0,0 → 1,41
//**********************************************************************
// Input Generator implementation
//----------------------------------------------------------------------
//
//
 
package mkInputGen;
 
import H264Types::*;
import IInputGen::*;
import RegFile::*;
import FIFO::*;
 
import Connectable::*;
import GetPut::*;
 
 
module mkInputGen( IInputGen );
 
RegFile#(Bit#(27), Bit#(8)) rfile <- mkRegFileLoad("akiyo_qcif1-15.hex", 0, 4867);
FIFO#(InputGenOT) outfifo <- mkFIFO;
Reg#(Bit#(27)) index <- mkReg(0);
 
rule output_byte (index < 4868);
//$display( "ccl0inputbyte %x", rfile.sub(index) );
outfifo.enq(DataByte rfile.sub(index));
index <= index+1;
endrule
 
rule end_of_file (index == 4868);
//$finish(0);
outfifo.enq(EndOfFile);
endrule
interface Get ioout = fifoToGet(outfifo);
endmodule
 
 
endpackage
/trunk/src/mkInputGen_intra.bsv
0,0 → 1,41
//**********************************************************************
// Input Generator implementation
//----------------------------------------------------------------------
//
//
 
package mkInputGen;
 
import H264Types::*;
import IInputGen::*;
import RegFile::*;
import FIFO::*;
 
import Connectable::*;
import GetPut::*;
 
 
module mkInputGen( IInputGen );
 
RegFile#(Bit#(27), Bit#(8)) rfile <- mkRegFileLoad("intraforeman_qcif1-5.hex", 0, 22038);
FIFO#(InputGenOT) outfifo <- mkFIFO;
Reg#(Bit#(27)) index <- mkReg(0);
 
rule output_byte (index < 22039);
//$display( "ccl0inputbyte %x", rfile.sub(index) );
outfifo.enq(DataByte rfile.sub(index));
index <= index+1;
endrule
 
rule end_of_file (index == 22039);
//$finish(0);
outfifo.enq(EndOfFile);
endrule
interface Get ioout = fifoToGet(outfifo);
endmodule
 
 
endpackage
/trunk/src/mkMemED_bram.bsv
0,0 → 1,57
//**********************************************************************
// Memory for Entropy Decoder
//----------------------------------------------------------------------
//
//
//
 
package mkMemED;
 
import H264Types::*;
import IMemED::*;
import GetPut::*;
import ClientServer::*;
import FIFO::*;
import BRAM::*;
 
 
//----------------------------------------------------------------------
// Main module
//----------------------------------------------------------------------
 
module mkMemED(IMemED#(index_size,data_size))
provisos (Bits#(MemReq#(index_size,data_size),mReqLen),
Bits#(MemResp#(data_size),mRespLen));
 
//-----------------------------------------------------------
// State
 
BRAM#(Bit#(index_size),Bit#(data_size)) bramfile <- mkBRAM_Full();
FIFO#(MemReq#(index_size,data_size)) reqQ <- mkFIFO();
FIFO#(MemResp#(data_size)) respQ <- mkFIFO();
rule storing ( reqQ.first() matches tagged StoreReq { addr:.addrt,data:.datat} );
bramfile.write(addrt,datat);
reqQ.deq();
endrule
 
rule reading ( reqQ.first() matches tagged LoadReq .addrt );
bramfile.read_req(addrt);
reqQ.deq();
endrule
 
rule readresp ( True );
let temp <- bramfile.read_resp;
respQ.enq( LoadResp temp );
endrule
interface Server mem_server;
interface Put request = fifoToPut(reqQ);
interface Get response = fifoToGet(respQ);
endinterface
 
 
endmodule
 
endpackage
/trunk/src/mkInterpolator_3stage.bsv
0,0 → 1,726
//**********************************************************************
// interpolator implementation
//----------------------------------------------------------------------
//
//
 
package mkInterpolator;
 
import H264Types::*;
import IInterpolator::*;
import FIFO::*;
import Vector::*;
 
import Connectable::*;
import GetPut::*;
import ClientServer::*;
 
 
//-----------------------------------------------------------
// Local Datatypes
//-----------------------------------------------------------
 
typedef union tagged
{
struct { Bit#(2) xFracL; Bit#(2) yFracL; Bit#(2) offset; IPBlockType bt; } IPWLuma;
struct { Bit#(3) xFracC; Bit#(3) yFracC; Bit#(2) offset; IPBlockType bt; } IPWChroma;
}
InterpolatorWT deriving(Eq,Bits);
 
 
//-----------------------------------------------------------
// Helper functions
 
function Bit#(8) clip1y10to8( Bit#(10) innum );
if(innum[9] == 1)
return 0;
else if(innum[8] == 1)
return 255;
else
return truncate(innum);
endfunction
 
function Bit#(15) interpolate8to15( Bit#(8) in0, Bit#(8) in1, Bit#(8) in2, Bit#(8) in3, Bit#(8) in4, Bit#(8) in5 );
return zeroExtend(in0) - 5*zeroExtend(in1) + 20*zeroExtend(in2) + 20*zeroExtend(in3) - 5*zeroExtend(in4) + zeroExtend(in5);
endfunction
 
function Bit#(8) interpolate15to8( Bit#(15) in0, Bit#(15) in1, Bit#(15) in2, Bit#(15) in3, Bit#(15) in4, Bit#(15) in5 );
Bit#(20) temp = signExtend(in0) - 5*signExtend(in1) + 20*signExtend(in2) + 20*signExtend(in3) - 5*signExtend(in4) + signExtend(in5) + 512;
return clip1y10to8(truncate(temp>>10));
endfunction
 
 
 
//-----------------------------------------------------------
// Interpolation Module
//-----------------------------------------------------------
 
 
(* synthesize *)
module mkInterpolator( Interpolator );
FIFO#(InterpolatorIT) reqfifoLoad <- mkSizedFIFO(interpolator_reqfifoLoad_size);
FIFO#(InterpolatorWT) reqfifoWork <- mkSizedFIFO(interpolator_reqfifoWork_size);
FIFO#(Vector#(4,Bit#(8))) outfifo <- mkFIFO;
Reg#(Bool) endOfFrameFlag <- mkReg(False);
FIFO#(InterpolatorLoadReq) memReqQ <- mkFIFO;
FIFO#(InterpolatorLoadResp) memRespQ <- mkSizedFIFO(interpolator_memRespQ_size);
 
Reg#(Bit#(PicWidthSz)) picWidth <- mkReg(maxPicWidthInMB);
Reg#(Bit#(PicHeightSz)) picHeight <- mkReg(0);
 
RFile1#(Bit#(5),Vector#(4,Bit#(15))) workFile <- mkRFile1Full();
RFile1#(Bit#(4),Vector#(4,Bit#(8))) resultFile <- mkRFile1Full();
 
Reg#(Bit#(1)) loadStage <- mkReg(0);
Reg#(Bit#(2)) loadHorNum <- mkReg(0);
Reg#(Bit#(4)) loadVerNum <- mkReg(0);
 
Reg#(Bit#(1)) workStage <- mkReg(0);
Reg#(Bit#(2)) workMbPart <- mkReg(0);//only for Chroma
Reg#(Bit#(2)) workSubMbPart <- mkReg(0);
Reg#(Bit#(2)) workHorNum <- mkReg(0);
Reg#(Bit#(4)) workVerNum <- mkReg(0);
Reg#(Vector#(20,Bit#(8))) workVector8 <- mkRegU;
Reg#(Vector#(20,Bit#(15))) workVector15 <- mkRegU;
Reg#(Vector#(4,Bit#(1))) resultReady <- mkRegU;
Reg#(Bool) workDone <- mkReg(False);
 
Reg#(Bit#(2)) outBlockNum <- mkReg(0);
Reg#(Bit#(2)) outPixelNum <- mkReg(0);
Reg#(Bool) outDone <- mkReg(False);
 
 
rule sendEndOfFrameReq( endOfFrameFlag );
endOfFrameFlag <= False;
memReqQ.enq(IPLoadEndFrame);
endrule
rule loadLuma( reqfifoLoad.first() matches tagged IPLuma .reqdata &&& !endOfFrameFlag );
Bit#(2) xfracl = reqdata.mvhor[1:0];
Bit#(2) yfracl = reqdata.mvver[1:0];
Bool twoStage = (xfracl==1||xfracl==3) && (yfracl==1||yfracl==3);
Bool horInter = (twoStage ? loadStage==1 : xfracl!=0);
Bool verInter = (twoStage ? loadStage==0 : yfracl!=0);
Bit#(2) offset = reqdata.mvhor[3:2] + ((twoStage&&verInter&&xfracl==3) ? 1 : 0);
Bit#(1) horOut = 0;
Bit#(TAdd#(PicWidthSz,2)) horAddr;
Bit#(TAdd#(PicHeightSz,4)) verAddr;
Bit#(TAdd#(PicWidthSz,12)) horTemp = zeroExtend({reqdata.hor,2'b00}) + zeroExtend({loadHorNum,2'b00}) + (xfracl==3&&(yfracl==1||yfracl==3)&&loadStage==0 ? 1 : 0);
Bit#(TAdd#(PicHeightSz,10)) verTemp = zeroExtend(reqdata.ver) + zeroExtend(loadVerNum) + (yfracl==3&&(xfracl==1||xfracl==3)&&loadStage==1 ? 1 : 0);
Bit#(13) mvhortemp = signExtend(reqdata.mvhor[13:2])-(horInter?2:0);
Bit#(11) mvvertemp = signExtend(reqdata.mvver[11:2])-(verInter?2:0);
if(mvhortemp[12]==1 && zeroExtend(0-mvhortemp)>horTemp)
begin
horAddr = 0;
horOut = 1;
end
else
begin
horTemp = horTemp + signExtend(mvhortemp);
if(horTemp>=zeroExtend({picWidth,4'b0000}))
begin
horAddr = {picWidth-1,2'b11};
horOut = 1;
end
else
horAddr = truncate(horTemp>>2);
end
if(mvvertemp[10]==1 && zeroExtend(0-mvvertemp)>verTemp)
verAddr = 0;
else
begin
verTemp = verTemp + signExtend(mvvertemp);
if(verTemp>=zeroExtend({picHeight,4'b0000}))
verAddr = {picHeight-1,4'b1111};
else
verAddr = truncate(verTemp);
end
memReqQ.enq(IPLoadLuma {refIdx:reqdata.refIdx,horOutOfBounds:horOut,hor:horAddr,ver:verAddr});
Bool verFirst = (twoStage&&loadStage==0) || (yfracl==2&&(xfracl==1||xfracl==3));
Bit#(2) loadHorNumMax = (reqdata.bt==IP8x8||reqdata.bt==IP8x4 ? 1 : 0) + (horInter ? 2 : (offset==0 ? 0 : 1));
Bit#(4) loadVerNumMax = (reqdata.bt==IP8x8||reqdata.bt==IP4x8 ? 7 : 3) + (verInter ? 5 : 0);
if(verFirst)
begin
if(loadVerNum < loadVerNumMax)
loadVerNum <= loadVerNum+1;
else
begin
loadVerNum <= 0;
if(loadHorNum < loadHorNumMax)
loadHorNum <= loadHorNum+1;
else
begin
loadHorNum <= 0;
if(twoStage)
loadStage <= 1;
else
reqfifoLoad.deq();
end
end
end
else
begin
if(loadHorNum < loadHorNumMax)
loadHorNum <= loadHorNum+1;
else
begin
loadHorNum <= 0;
if(loadVerNum < loadVerNumMax)
loadVerNum <= loadVerNum+1;
else
begin
loadVerNum <= 0;
loadStage <= 0;
reqfifoLoad.deq();
end
end
end
if(reqdata.bt==IP16x16 || reqdata.bt==IP16x8 || reqdata.bt==IP8x16)
$display( "ERROR Interpolation: loadLuma block sizes > 8x8 not supported");
//$display( "Trace interpolator: loadLuma %h %h %h %h %h %h %h", xfracl, yfracl, loadHorNum, loadVerNum, reqdata.refIdx, horAddr, verAddr);
endrule
 
 
rule loadChroma( reqfifoLoad.first() matches tagged IPChroma .reqdata &&& !endOfFrameFlag );
Bit#(3) xfracc = reqdata.mvhor[2:0];
Bit#(3) yfracc = reqdata.mvver[2:0];
Bit#(2) offset = reqdata.mvhor[4:3]+{reqdata.hor[0],1'b0};
Bit#(1) horOut = 0;
Bit#(TAdd#(PicWidthSz,1)) horAddr;
Bit#(TAdd#(PicHeightSz,3)) verAddr;
Bit#(TAdd#(PicWidthSz,11)) horTemp = zeroExtend({reqdata.hor,1'b0}) + zeroExtend({loadHorNum,2'b00});
Bit#(TAdd#(PicHeightSz,9)) verTemp = zeroExtend(reqdata.ver) + zeroExtend(loadVerNum);
if(reqdata.mvhor[13]==1 && zeroExtend(0-reqdata.mvhor[13:3])>horTemp)
begin
horAddr = 0;
horOut = 1;
end
else
begin
horTemp = horTemp + signExtend(reqdata.mvhor[13:3]);
if(horTemp>=zeroExtend({picWidth,3'b000}))
begin
horAddr = {picWidth-1,1'b1};
horOut = 1;
end
else
horAddr = truncate(horTemp>>2);
end
if(reqdata.mvver[11]==1 && zeroExtend(0-reqdata.mvver[11:3])>verTemp)
verAddr = 0;
else
begin
verTemp = verTemp + signExtend(reqdata.mvver[11:3]);
if(verTemp>=zeroExtend({picHeight,3'b000}))
verAddr = {picHeight-1,3'b111};
else
verAddr = truncate(verTemp);
end
memReqQ.enq(IPLoadChroma {refIdx:reqdata.refIdx,uv:reqdata.uv,horOutOfBounds:horOut,hor:horAddr,ver:verAddr});
Bit#(2) loadHorNumMax = (reqdata.bt==IP4x8||reqdata.bt==IP4x4 ? (offset[1]==0||(xfracc==0&&offset!=3) ? 0 : 1) : ((reqdata.bt==IP16x16||reqdata.bt==IP16x8 ? 1 : 0) + (xfracc==0&&offset==0 ? 0 : 1)));
Bit#(4) loadVerNumMax = (reqdata.bt==IP16x16||reqdata.bt==IP8x16 ? 7 : (reqdata.bt==IP16x8||reqdata.bt==IP8x8||reqdata.bt==IP4x8 ? 3 : 1)) + (yfracc==0 ? 0 : 1);
if(loadHorNum < loadHorNumMax)
loadHorNum <= loadHorNum+1;
else
begin
loadHorNum <= 0;
if(loadVerNum < loadVerNumMax)
loadVerNum <= loadVerNum+1;
else
begin
loadVerNum <= 0;
reqfifoLoad.deq();
end
end
//$display( "Trace interpolator: loadChroma %h %h %h %h %h %h %h", xfracc, yfracc, loadHorNum, loadVerNum, reqdata.refIdx, horAddr, verAddr);
endrule
 
rule workLuma ( reqfifoWork.first() matches tagged IPWLuma .reqdata &&& !workDone );
let xfracl = reqdata.xFracL;
let yfracl = reqdata.yFracL;
let offset = reqdata.offset;
let blockT = reqdata.bt;
Vector#(20,Bit#(8)) workVector8Next = workVector8;
Vector#(20,Bit#(15)) workVector15Next = workVector15;
Vector#(4,Bit#(1)) resultReadyNext = resultReady;
if(workStage == 0)
begin
if(memRespQ.first() matches tagged IPLoadResp .tempreaddata)
begin
memRespQ.deq();
Vector#(4,Bit#(8)) readdata = replicate(0);
readdata[0] = tempreaddata[7:0];
readdata[1] = tempreaddata[15:8];
readdata[2] = tempreaddata[23:16];
readdata[3] = tempreaddata[31:24];
//$display( "Trace interpolator: workLuma stage 0 readdata %h %h %h %h %h %h", workHorNum, workVerNum, readdata[3], readdata[2], readdata[1], readdata[0] );
Vector#(4,Bit#(8)) tempResult8 = replicate(0);
Vector#(4,Bit#(15)) tempResult15 = replicate(0);
if(xfracl==0 || yfracl==0 || xfracl==2)
begin
if(xfracl==0)//reorder
begin
for(Integer ii=0; ii<4; ii=ii+1)
begin
Bit#(2) offsetplusii = offset+fromInteger(ii);
if(offset <= 3-fromInteger(ii) && offset!=0)
tempResult8[ii] = workVector8[offsetplusii];
else
tempResult8[ii] = readdata[offsetplusii];
workVector8Next[ii] = readdata[ii];
end
for(Integer ii=0; ii<4; ii=ii+1)
tempResult15[ii] = zeroExtend({tempResult8[ii],5'b00000});
end
else//horizontal interpolation
begin
offset = offset-2;
for(Integer ii=0; ii<8; ii=ii+1)
workVector8Next[ii] = workVector8[ii+4];
for(Integer ii=0; ii<4; ii=ii+1)
begin
Bit#(4) tempIndex = fromInteger(ii) + 8 - zeroExtend(offset);
workVector8Next[tempIndex] = readdata[ii];
end
for(Integer ii=0; ii<4; ii=ii+1)
begin
tempResult15[ii] = interpolate8to15(workVector8Next[ii],workVector8Next[ii+1],workVector8Next[ii+2],workVector8Next[ii+3],workVector8Next[ii+4],workVector8Next[ii+5]);
tempResult8[ii] = clip1y10to8(truncate((tempResult15[ii]+16)>>5));
if(xfracl == 1)
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,workVector8Next[ii+2]} + 1) >> 1);
else if(xfracl == 3)
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,workVector8Next[ii+3]} + 1) >> 1);
end
end
Bit#(2) workHorNumOffset = (xfracl!=0 ? 2 : (reqdata.offset==0 ? 0 : 1));
if(workHorNum >= workHorNumOffset)
begin
Bit#(1) horAddr = truncate(workHorNum-workHorNumOffset);
if(yfracl == 0)//write to resultFile
begin
Bit#(3) verAddr = truncate(workVerNum);
horAddr = horAddr + ((blockT==IP4x8&&workSubMbPart==1)||(blockT==IP4x4&&workSubMbPart[0]==1) ? 1 : 0);
verAddr = verAddr + ((blockT==IP8x4&&workSubMbPart==1)||(blockT==IP4x4&&workSubMbPart[1]==1) ? 4 : 0);
resultFile.upd({verAddr,horAddr},tempResult8);
if(verAddr[1:0] == 3)
resultReadyNext[{verAddr[2],horAddr}] = 1;
end
else//write to workFile
workFile.upd({workVerNum,horAddr},tempResult15);
end
Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + workHorNumOffset;
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3) + (yfracl!=0 ? 5 : 0);
if(workHorNum < workHorNumMax)
workHorNum <= workHorNum+1;
else
begin
workHorNum <= 0;
if(workVerNum < workVerNumMax)
workVerNum <= workVerNum+1;
else
begin
workVerNum <= 0;
if(yfracl!=0)
workStage <= 1;
else
begin
if(((blockT==IP4x8 || blockT==IP8x4) && workSubMbPart==0) || (blockT==IP4x4 && workSubMbPart<3))
workSubMbPart <= workSubMbPart+1;
else
begin
workSubMbPart <= 0;
workDone <= True;
end
reqfifoWork.deq();
end
end
end
end
else//vertical interpolation
begin
offset = offset + (xfracl==3&&(yfracl==1||yfracl==3) ? 1 : 0);
for(Integer ii=0; ii<4; ii=ii+1)
tempResult15[ii] = interpolate8to15(workVector8[ii],workVector8[ii+4],workVector8[ii+8],workVector8[ii+12],workVector8[ii+16],readdata[ii]);
for(Integer ii=0; ii<16; ii=ii+1)
workVector8Next[ii] = workVector8[ii+4];
for(Integer ii=0; ii<4; ii=ii+1)
workVector8Next[ii+16] = readdata[ii];
Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + (yfracl==2 ? 2 : (offset==0 ? 0 : 1));
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3) + 5;
Bit#(2) horAddr = workHorNum;
Bit#(3) verAddr = truncate(workVerNum-5);
if(workVerNum > 4)
begin
workFile.upd({verAddr,horAddr},tempResult15);
//$display( "Trace interpolator: workLuma stage 0 result %h %h %h %h %h %h %h", workHorNum, workVerNum, {verAddr,horAddr}, tempResult15[3], tempResult15[2], tempResult15[1], tempResult15[0]);
end
if(workVerNum < workVerNumMax)
workVerNum <= workVerNum+1;
else
begin
workVerNum <= 0;
if(workHorNum < workHorNumMax)
workHorNum <= workHorNum+1;
else
begin
workHorNum <= 0;
workStage <= 1;
end
end
end
end
end
else
begin
Vector#(4,Bit#(8)) tempResult8 = replicate(0);
Vector#(4,Bit#(15)) readdata = replicate(0);
if(yfracl==0)
$display( "ERROR Interpolation: workLuma loadStage==1 and yfracl==0");
if(xfracl==0 || xfracl==2)//vertical interpolation
begin
readdata = workFile.sub({workVerNum,workHorNum[0]});
for(Integer ii=0; ii<4; ii=ii+1)
begin
tempResult8[ii] = interpolate15to8(workVector15[ii],workVector15[ii+4],workVector15[ii+8],workVector15[ii+12],workVector15[ii+16],readdata[ii]);
if(yfracl == 1)
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((workVector15[ii+8]+16)>>5))} + 1) >> 1);
else if(yfracl == 3)
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((workVector15[ii+12]+16)>>5))} + 1) >> 1);
end
for(Integer ii=0; ii<16; ii=ii+1)
workVector15Next[ii] = workVector15[ii+4];
for(Integer ii=0; ii<4; ii=ii+1)
workVector15Next[ii+16] = readdata[ii];
Bit#(2) workHorNumMax = 1;
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3) + 5;
if(workVerNum > 4)
begin
Bit#(1) horAddr = truncate(workHorNum);
Bit#(3) verAddr = truncate(workVerNum-5);
horAddr = horAddr + ((blockT==IP4x8&&workSubMbPart==1)||(blockT==IP4x4&&workSubMbPart[0]==1) ? 1 : 0);
verAddr = verAddr + ((blockT==IP8x4&&workSubMbPart==1)||(blockT==IP4x4&&workSubMbPart[1]==1) ? 4 : 0);
resultFile.upd({verAddr,horAddr},tempResult8);
if(verAddr[1:0] == 3)
resultReadyNext[{verAddr[2],horAddr}] = 1;
end
if(workVerNum < workVerNumMax)
workVerNum <= workVerNum+1;
else
begin
workVerNum <= 0;
if(workHorNum < workHorNumMax)
workHorNum <= workHorNum+1;
else
begin
workHorNum <= 0;
workStage <= 0;
if(((blockT==IP4x8 || blockT==IP8x4) && workSubMbPart==0) || (blockT==IP4x4 && workSubMbPart<3))
workSubMbPart <= workSubMbPart+1;
else
begin
workSubMbPart <= 0;
workDone <= True;
end
reqfifoWork.deq();
end
end
end
else//horizontal interpolation
begin
offset = offset-2;
if(yfracl == 2)
begin
readdata = workFile.sub({workVerNum[2:0],workHorNum});
for(Integer ii=0; ii<8; ii=ii+1)
workVector15Next[ii] = workVector15[ii+4];
for(Integer ii=0; ii<4; ii=ii+1)
begin
Bit#(4) tempIndex = fromInteger(ii) + 8 - zeroExtend(offset);
workVector15Next[tempIndex] = readdata[ii];
end
for(Integer ii=0; ii<4; ii=ii+1)
begin
tempResult8[ii] = interpolate15to8(workVector15Next[ii],workVector15Next[ii+1],workVector15Next[ii+2],workVector15Next[ii+3],workVector15Next[ii+4],workVector15Next[ii+5]);
if(xfracl == 1)
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((workVector15Next[ii+2]+16)>>5))} + 1) >> 1);
else if(xfracl == 3)
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((workVector15Next[ii+3]+16)>>5))} + 1) >> 1);
end
end
else
begin
if(memRespQ.first() matches tagged IPLoadResp .tempreaddata8)
begin
memRespQ.deq();
Vector#(4,Bit#(8)) readdata8 = replicate(0);
readdata8[0] = tempreaddata8[7:0];
readdata8[1] = tempreaddata8[15:8];
readdata8[2] = tempreaddata8[23:16];
readdata8[3] = tempreaddata8[31:24];
for(Integer ii=0; ii<8; ii=ii+1)
workVector8Next[ii] = workVector8[ii+4];
for(Integer ii=0; ii<4; ii=ii+1)
begin
Bit#(4) tempIndex = fromInteger(ii) + 8 - zeroExtend(offset);
workVector8Next[tempIndex] = readdata8[ii];
end
Vector#(4,Bit#(15)) tempResult15 = replicate(0);
for(Integer ii=0; ii<4; ii=ii+1)
begin
tempResult15[ii] = interpolate8to15(workVector8Next[ii],workVector8Next[ii+1],workVector8Next[ii+2],workVector8Next[ii+3],workVector8Next[ii+4],workVector8Next[ii+5]);
tempResult8[ii] = clip1y10to8(truncate((tempResult15[ii]+16)>>5));
end
Bit#(2) verOffset;
Vector#(4,Bit#(15)) verResult15 = replicate(0);
if(xfracl == 1)
verOffset = reqdata.offset;
else
verOffset = reqdata.offset+1;
readdata = workFile.sub({workVerNum[2:0],(workHorNum-2+(verOffset==0?0:1))});
for(Integer ii=0; ii<4; ii=ii+1)
begin
Bit#(2) offsetplusii = verOffset+fromInteger(ii);
if(verOffset <= 3-fromInteger(ii) && verOffset!=0)
verResult15[ii] = workVector15[offsetplusii];
else
verResult15[ii] = readdata[offsetplusii];
workVector15Next[ii] = readdata[ii];
end
for(Integer ii=0; ii<4; ii=ii+1)
begin
Bit#(9) tempVal = zeroExtend(clip1y10to8(truncate((verResult15[ii]+16)>>5)));
tempResult8[ii] = truncate((tempVal+zeroExtend(tempResult8[ii])+1)>>1);
end
end
end
if(workHorNum >= 2)
begin
Bit#(1) horAddr = truncate(workHorNum-2);
Bit#(3) verAddr = truncate(workVerNum);
horAddr = horAddr + ((blockT==IP4x8&&workSubMbPart==1)||(blockT==IP4x4&&workSubMbPart[0]==1) ? 1 : 0);
verAddr = verAddr + ((blockT==IP8x4&&workSubMbPart==1)||(blockT==IP4x4&&workSubMbPart[1]==1) ? 4 : 0);
resultFile.upd({verAddr,horAddr},tempResult8);
if(verAddr[1:0] == 3)
resultReadyNext[{verAddr[2],horAddr}] = 1;
//$display( "Trace interpolator: workLuma stage 1 result %h %h %h %h %h %h %h %h", workHorNum, workVerNum, {verAddr,horAddr}, tempResult8[3], tempResult8[2], tempResult8[1], tempResult8[0], pack(resultReadyNext));
end
Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + 2;
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3);
if(workHorNum < workHorNumMax)
workHorNum <= workHorNum+1;
else
begin
workHorNum <= 0;
if(workVerNum < workVerNumMax)
workVerNum <= workVerNum+1;
else
begin
workVerNum <= 0;
workStage <= 0;
if(((blockT==IP4x8 || blockT==IP8x4) && workSubMbPart==0) || (blockT==IP4x4 && workSubMbPart<3))
workSubMbPart <= workSubMbPart+1;
else
begin
workSubMbPart <= 0;
workDone <= True;
end
reqfifoWork.deq();
end
end
end
end
workVector8 <= workVector8Next;
workVector15 <= workVector15Next;
resultReady <= resultReadyNext;
//$display( "Trace interpolator: workLuma %h %h %h %h %h %h", xfracl, yfracl, workHorNum, workVerNum, offset, workStage);
endrule
 
rule workChroma ( reqfifoWork.first() matches tagged IPWChroma .reqdata &&& !workDone );
Bit#(4) xfracc = zeroExtend(reqdata.xFracC);
Bit#(4) yfracc = zeroExtend(reqdata.yFracC);
let offset = reqdata.offset;
let blockT = reqdata.bt;
Vector#(20,Bit#(8)) workVector8Next = workVector8;
Vector#(4,Bit#(1)) resultReadyNext = resultReady;
if(memRespQ.first() matches tagged IPLoadResp .tempreaddata)
begin
memRespQ.deq();
Vector#(4,Bit#(8)) readdata = replicate(0);
readdata[0] = tempreaddata[7:0];
readdata[1] = tempreaddata[15:8];
readdata[2] = tempreaddata[23:16];
readdata[3] = tempreaddata[31:24];
Vector#(5,Bit#(8)) tempWork8 = replicate(0);
Vector#(5,Bit#(8)) tempPrev8 = replicate(0);
Vector#(4,Bit#(8)) tempResult8 = replicate(0);
Bool resultReadyFlag = False;
for(Integer ii=0; ii<4; ii=ii+1)
begin
Bit#(2) offsetplusii = offset+fromInteger(ii);
if(offset <= 3-fromInteger(ii) && !((blockT==IP4x8||blockT==IP4x4)&&(offset[1]==0||(xfracc==0&&offset!=3))) && !(xfracc==0&&offset==0))
tempWork8[ii] = workVector8[offsetplusii];
else
tempWork8[ii] = readdata[offsetplusii];
workVector8Next[ii] = readdata[ii];
end
tempWork8[4] = readdata[offset];
if((blockT==IP16x8 || blockT==IP16x16) && workHorNum==(xfracc==0&&offset==0 ? 1 : 2))
begin
for(Integer ii=0; ii<5; ii=ii+1)
begin
tempPrev8[ii] = workVector8[ii+9];
workVector8Next[ii+9] = tempWork8[ii];
end
end
else
begin
for(Integer ii=0; ii<5; ii=ii+1)
tempPrev8[ii] = workVector8[ii+4];
if(workHorNum==(xfracc==0&&offset==0 ? 0 : 1) || ((blockT==IP4x8||blockT==IP4x4)&&(offset[1]==0||(xfracc==0&&offset!=3))))
begin
for(Integer ii=0; ii<5; ii=ii+1)
workVector8Next[ii+4] = tempWork8[ii];
end
end
if(yfracc==0)
begin
for(Integer ii=0; ii<5; ii=ii+1)
tempPrev8[ii] = tempWork8[ii];
end
for(Integer ii=0; ii<4; ii=ii+1)
begin
Bit#(14) tempVal = zeroExtend((8-xfracc))*zeroExtend((8-yfracc))*zeroExtend(tempPrev8[ii]);
tempVal = tempVal + zeroExtend(xfracc)*zeroExtend((8-yfracc))*zeroExtend(tempPrev8[ii+1]);
tempVal = tempVal + zeroExtend((8-xfracc))*zeroExtend(yfracc)*zeroExtend(tempWork8[ii]);
tempVal = tempVal + zeroExtend(xfracc)*zeroExtend(yfracc)*zeroExtend(tempWork8[ii+1]);
tempResult8[ii] = truncate((tempVal+32)>>6);
end
if(workVerNum > 0 || yfracc==0)
begin
if(blockT==IP4x8 || blockT==IP4x4)
begin
Bit#(5) tempIndex = 10 + zeroExtend(workVerNum<<1);
workVector8Next[tempIndex] = tempResult8[0];
workVector8Next[tempIndex+1] = tempResult8[1];
tempResult8[2] = tempResult8[0];
tempResult8[3] = tempResult8[1];
tempResult8[0] = workVector8[tempIndex];
tempResult8[1] = workVector8[tempIndex+1];
if((workHorNum>0 || offset[1]==0) && workSubMbPart[0]==1)
resultReadyFlag = True;
end
else
begin
if(workHorNum>0 || (xfracc==0 && offset==0))
resultReadyFlag = True;
end
end
if(resultReadyFlag)
begin
Bit#(1) horAddr = ((blockT==IP4x8 || blockT==IP4x4) ? 0 : truncate(((xfracc==0 && offset==0) ? workHorNum : workHorNum-1)));
Bit#(3) verAddr = truncate((yfracc==0 ? workVerNum : workVerNum-1));
horAddr = horAddr + ((blockT==IP16x8||blockT==IP16x16) ? 0 : workMbPart[0]);
verAddr = verAddr + ((blockT==IP8x16||blockT==IP16x16) ? 0 : ((blockT==IP16x8) ? {workMbPart[0],2'b00} : {workMbPart[1],2'b00}));
verAddr = verAddr + ((blockT==IP8x4&&workSubMbPart==1)||(blockT==IP4x4&&workSubMbPart[1]==1) ? 2 : 0);
resultFile.upd({verAddr,horAddr},tempResult8);
if(verAddr[1:0] == 3)
resultReadyNext[{verAddr[2],horAddr}] = 1;
end
Bit#(2) workHorNumMax = (blockT==IP4x8||blockT==IP4x4 ? (offset[1]==0||(xfracc==0&&offset!=3) ? 0 : 1) : ((blockT==IP16x16||blockT==IP16x8 ? 1 : 0) + (xfracc==0&&offset==0 ? 0 : 1)));
Bit#(4) workVerNumMax = (blockT==IP16x16||blockT==IP8x16 ? 7 : (blockT==IP16x8||blockT==IP8x8||blockT==IP4x8 ? 3 : 1)) + (yfracc==0 ? 0 : 1);
if(workHorNum < workHorNumMax)
workHorNum <= workHorNum+1;
else
begin
workHorNum <= 0;
if(workVerNum < workVerNumMax)
workVerNum <= workVerNum+1;
else
begin
workVerNum <= 0;
if(((blockT==IP4x8 || blockT==IP8x4) && workSubMbPart==0) || (blockT==IP4x4 && workSubMbPart<3))
workSubMbPart <= workSubMbPart+1;
else
begin
workSubMbPart <= 0;
if(((blockT==IP16x8 || blockT==IP8x16) && workMbPart==0) || (!(blockT==IP16x8 || blockT==IP8x16 || blockT==IP16x16) && workMbPart<3))
workMbPart <= workMbPart+1;
else
begin
workMbPart <= 0;
workDone <= True;
end
end
reqfifoWork.deq();
end
end
end
workVector8 <= workVector8Next;
resultReady <= resultReadyNext;
//$display( "Trace interpolator: workChroma %h %h %h %h %h", xfracc, yfracc, workHorNum, workVerNum, offset);
endrule
 
 
rule outputing( !outDone && resultReady[outBlockNum]==1 );
outfifo.enq(resultFile.sub({outBlockNum[1],outPixelNum,outBlockNum[0]}));
outPixelNum <= outPixelNum+1;
if(outPixelNum == 3)
begin
outBlockNum <= outBlockNum+1;
if(outBlockNum == 3)
outDone <= True;
end
//$display( "Trace interpolator: outputing %h %h %h %h %h %h", outBlockNum, outPixelNum, tempVector[3], tempVector[2], tempVector[1], tempVector[0]);
endrule
 
 
rule switching( outDone && workDone );
outDone <= False;
workDone <= False;
resultReady <= replicate(0);
//$display( "Trace interpolator: switching %h %h", outBlockNum, outPixelNum);
endrule
 
method Action setPicWidth( Bit#(PicWidthSz) newPicWidth );
picWidth <= newPicWidth;
endmethod
method Action setPicHeight( Bit#(PicHeightSz) newPicHeight );
picHeight <= newPicHeight;
endmethod
method Action request( InterpolatorIT inputdata );
reqfifoLoad.enq(inputdata);
if(inputdata matches tagged IPLuma .indata)
reqfifoWork.enq(IPWLuma {xFracL:indata.mvhor[1:0],yFracL:indata.mvver[1:0],offset:indata.mvhor[3:2],bt:indata.bt});
else if(inputdata matches tagged IPChroma .indata)
reqfifoWork.enq(IPWChroma {xFracC:indata.mvhor[2:0],yFracC:indata.mvver[2:0],offset:indata.mvhor[4:3]+{indata.hor[0],1'b0},bt:indata.bt});
endmethod
 
method Vector#(4,Bit#(8)) first();
return outfifo.first();
endmethod
method Action deq();
outfifo.deq();
endmethod
method Action endOfFrame();
endOfFrameFlag <= True;
endmethod
interface Client mem_client;
interface Get request = fifoToGet(memReqQ);
interface Put response = fifoToPut(memRespQ);
endinterface
 
 
endmodule
 
 
endpackage
/trunk/src/INalUnwrap.bsv
0,0 → 1,22
//**********************************************************************
// Interface for NAL unwrapper
//----------------------------------------------------------------------
//
//
//
 
package INalUnwrap;
 
import H264Types::*;
import GetPut::*;
 
interface INalUnwrap;
 
// Interface for inter-module io
interface Put#(InputGenOT) ioin;
interface Get#(NalUnwrapOT) ioout;
 
endinterface
 
endpackage
 
/trunk/src/BRAM.bsv
0,0 → 1,214
import FIFO::*;
 
//One RAM.
interface BRAM#(type idx_type, type data_type);
 
method Action read_req(idx_type idx);
 
method ActionValue#(data_type) read_resp();
 
method Action write(idx_type idx, data_type data);
endinterface
 
 
//Two RAMs.
interface BRAM_2#(type idx_type, type data_type);
 
method Action read_req1(idx_type idx);
method Action read_req2(idx_type idx);
 
method ActionValue#(data_type) read_resp1();
method ActionValue#(data_type) read_resp2();
 
method Action write(idx_type idx, data_type data);
endinterface
 
//Three RAMs.
interface BRAM_3#(type idx_type, type data_type);
 
method Action read_req1(idx_type idx);
method Action read_req2(idx_type idx);
method Action read_req3(idx_type idx);
 
method ActionValue#(data_type) read_resp1();
method ActionValue#(data_type) read_resp2();
method ActionValue#(data_type) read_resp3();
 
method Action write(idx_type idx, data_type data);
endinterface
 
 
module mkBRAM#(Integer low, Integer high)
//interface:
(BRAM#(idx_type, data_type))
provisos
(Bits#(idx_type, idx),
Bits#(data_type, data),
Literal#(idx_type));
BRAM#(idx_type, data_type) m <- (valueof(data) == 0) ?
mkBRAM_Zero() :
mkBRAM_NonZero(low, high);
 
return m;
endmodule
 
import "BVI" BRAM = module mkBRAM_NonZero#(Integer low, Integer high)
//interface:
(BRAM#(idx_type, data_type))
provisos
(Bits#(idx_type, idx),
Bits#(data_type, data),
Literal#(idx_type));
 
default_clock clk(CLK);
 
parameter addr_width = valueof(idx);
parameter data_width = valueof(data);
parameter lo = low;
parameter hi = high;
 
method DOUT read_resp() ready(DOUT_RDY) enable(DOUT_EN);
method read_req(RD_ADDR) ready(RD_RDY) enable(RD_EN);
method write(WR_ADDR, WR_VAL) enable(WR_EN);
 
schedule read_req CF (read_resp, write);
schedule read_resp CF (read_req, write);
schedule write CF (read_req, read_resp);
schedule read_req C read_req;
schedule read_resp C read_resp;
schedule write C write;
 
endmodule
 
module mkBRAM_Zero
//interface:
(BRAM#(idx_type, data_type))
provisos
(Bits#(idx_type, idx),
Bits#(data_type, data),
Literal#(idx_type));
FIFO#(data_type) q <- mkFIFO();
 
method Action read_req(idx_type i);
q.enq(?);
endmethod
 
method Action write(idx_type i, data_type d);
noAction;
endmethod
 
method ActionValue#(data_type) read_resp();
q.deq();
return q.first();
endmethod
 
endmodule
 
module mkBRAM_Full
//interface:
(BRAM#(idx_type, data_type))
provisos
(Bits#(idx_type, idx),
Bits#(data_type, data),
Literal#(idx_type));
 
 
BRAM#(idx_type, data_type) br <- mkBRAM(0, valueof(TExp#(idx)) - 1);
 
return br;
 
endmodule
module mkBRAM_2#(Integer low, Integer high)
//interface:
(BRAM_2#(idx_type, data_type))
provisos
(Bits#(idx_type, idx),
Bits#(data_type, data),
Literal#(idx_type));
BRAM#(idx_type, data_type) br1 <- mkBRAM(low, high);
BRAM#(idx_type, data_type) br2 <- mkBRAM(low, high);
method read_req1(idx) = br1.read_req(idx);
method read_req2(idx) = br2.read_req(idx);
 
method read_resp1() = br1.read_resp();
method read_resp2() = br2.read_resp();
 
method Action write(idx_type idx, data_type data);
br1.write(idx, data);
br2.write(idx, data);
endmethod
endmodule
 
module mkBRAM_2_Full
//interface:
(BRAM_2#(idx_type, data_type))
provisos
(Bits#(idx_type, idx),
Bits#(data_type, data),
Literal#(idx_type));
 
 
BRAM_2#(idx_type, data_type) br <- mkBRAM_2(0, valueof(TExp#(idx)) - 1);
 
return br;
 
endmodule
 
module mkBRAM_3#(Integer low, Integer high)
//interface:
(BRAM_3#(idx_type, data_type))
provisos
(Bits#(idx_type, idx),
Bits#(data_type, data),
Literal#(idx_type));
BRAM#(idx_type, data_type) br1 <- mkBRAM(low, high);
BRAM#(idx_type, data_type) br2 <- mkBRAM(low, high);
BRAM#(idx_type, data_type) br3 <- mkBRAM(low, high);
method read_req1(idx) = br1.read_req(idx);
method read_req2(idx) = br2.read_req(idx);
method read_req3(idx) = br3.read_req(idx);
 
method read_resp1() = br1.read_resp();
method read_resp2() = br2.read_resp();
method read_resp3() = br3.read_resp();
 
method Action write(idx_type idx, data_type data);
br1.write(idx, data);
br2.write(idx, data);
br3.write(idx, data);
endmethod
endmodule
 
 
module mkBRAM_3_Full
//interface:
(BRAM_3#(idx_type, data_type))
provisos
(Bits#(idx_type, idx),
Bits#(data_type, data),
Literal#(idx_type));
 
 
BRAM_3#(idx_type, data_type) br <- mkBRAM_3(0, valueof(TExp#(idx)) - 1);
 
return br;
 
endmodule
 
/trunk/src/mkDeblockFilter_orig.bsv
0,0 → 1,780
//**********************************************************************
// Deblocking Filter
//----------------------------------------------------------------------
//
//
 
package mkDeblockFilter;
 
import H264Types::*;
 
import IDeblockFilter::*;
import FIFO::*;
import Vector::*;
 
import Connectable::*;
import GetPut::*;
import ClientServer::*;
 
 
 
 
//-----------------------------------------------------------
// Local Datatypes
//-----------------------------------------------------------
 
 
typedef union tagged
{
void Passing; //not working on anything in particular
void Initialize;
void Horizontal;
void Vertical;
void Cleanup;
}
Process deriving(Eq,Bits);
 
 
 
//-----------------------------------------------------------
// Helper functions
 
 
function Bit#(8) absdiff8(Bit#(8) in0, Bit#(8) in1);
return (in1>=in0 ? in1-in0 : in0-in1);
endfunction
 
 
function Bool filter_test(Bit#(32) in_pixels, Bit#(8) alpha, Bit#(5) beta);
Bit#(8) p1 = in_pixels[7:0];
Bit#(8) p0 = in_pixels[15:8];
Bit#(8) q0 = in_pixels[23:16];
Bit#(8) q1 = in_pixels[31:24];
return((absdiff8(p0,q0) < alpha) &&
(absdiff8(p0,p1) < zeroExtend(beta)) &&
(absdiff8(q0,q1) < zeroExtend(beta)));
endfunction
 
 
function Bit#(6) clip3symmetric9to6(Bit#(9) val, Bit#(5) bound);
Int#(9) intval = unpack(val);
Int#(6) intbound = unpack({1'b0,bound});
Int#(6) intout = (intval<signExtend(-intbound) ? -intbound : (intval>signExtend(intbound) ? intbound : truncate(intval)));
return pack(intout);
endfunction
 
 
function Bit#(64) filter_input(Bit#(64) in_pixels, Bool chroma_flag, Bit#(3) bs, Bit#(8) alpha, Bit#(5) beta, Vector#(3,Bit#(5)) tc0_vector);
Bit#(8) p[4];
Bit#(8) q[4];
p[3] = in_pixels[7:0];
p[2] = in_pixels[15:8];
p[1] = in_pixels[23:16];
p[0] = in_pixels[31:24];
q[0] = in_pixels[39:32];
q[1] = in_pixels[47:40];
q[2] = in_pixels[55:48];
q[3] = in_pixels[63:56];
Bit#(8) p_out[4];
Bit#(8) q_out[4];
Bool a_p_test = absdiff8(p[2],p[0]) < zeroExtend(beta);
Bool a_q_test = absdiff8(q[2],q[0]) < zeroExtend(beta);
Bit#(9) p0q0 = zeroExtend(p[0])+zeroExtend(q[0]);
if (bs == 4)
begin
Bool small_gap_test = absdiff8(p[0],q[0]) < (alpha >> 2)+2;
Bit#(11) p_outtemp[3];
Bit#(11) q_outtemp[3];
if (!chroma_flag && a_p_test && small_gap_test)
begin
Bit#(11) sum = zeroExtend(p[1])+zeroExtend(p0q0);
p_outtemp[0] = (zeroExtend(p[2]) + (sum<<1) + zeroExtend(q[1]) + 4) >> 3;
p_outtemp[1] = (zeroExtend(p[2]) + sum + 2) >> 2;
p_outtemp[2] = (((zeroExtend(p[3])+zeroExtend(p[2]))<<1) + zeroExtend(p[2]) + sum + 4) >> 3;
end
else
begin
p_outtemp[0] = ((zeroExtend(p[1])<<1) + zeroExtend(p[0]) + zeroExtend(q[1]) + 2) >> 2;
p_outtemp[1] = zeroExtend(p[1]);
p_outtemp[2] = zeroExtend(p[2]);
end
if (!chroma_flag && a_q_test && small_gap_test)
begin
Bit#(11) sum = zeroExtend(q[1])+zeroExtend(p0q0);
q_outtemp[0] = (zeroExtend(p[1]) + (sum<<1) + zeroExtend(q[2]) + 4) >> 3;
q_outtemp[1] = (zeroExtend(q[2]) + sum + 2) >> 2;
q_outtemp[2] = (((zeroExtend(q[3])+zeroExtend(q[2]))<<1) + zeroExtend(q[2]) + sum + 4) >> 3;
end
else
begin
q_outtemp[0] = ((zeroExtend(q[1])<<1) + zeroExtend(q[0]) + zeroExtend(p[1]) + 2) >> 2;
q_outtemp[1] = zeroExtend(q[1]);
q_outtemp[2] = zeroExtend(q[2]);
end
p_out[0] = truncate(p_outtemp[0]);
p_out[1] = truncate(p_outtemp[1]);
p_out[2] = truncate(p_outtemp[2]);
q_out[0] = truncate(q_outtemp[0]);
q_out[1] = truncate(q_outtemp[1]);
q_out[2] = truncate(q_outtemp[2]);
end
else if(bs > 0)
begin
Bit#(5) t_c0 = tc0_vector[bs-1];
Bit#(5) t_c = chroma_flag ? t_c0+1 : t_c0 + (a_p_test ? 1:0) + (a_q_test ? 1:0);
Bit#(12) deltatemp = (((zeroExtend(q[0])-zeroExtend(p[0]))<<2)+zeroExtend(p[1])-zeroExtend(q[1])+4);
Bit#(6) delta = clip3symmetric9to6(deltatemp[11:3],t_c);
Bit#(10) p_out0temp = zeroExtend(p[0]) + signExtend(delta);
p_out[0] = (p_out0temp[9]==1 ? 0 : (p_out0temp[8]==1 ? 255 : p_out0temp[7:0]));
Bit#(10) q_out0temp = zeroExtend(q[0]) - signExtend(delta);
q_out[0] = (q_out0temp[9]==1 ? 0 : (q_out0temp[8]==1 ? 255 : q_out0temp[7:0]));
Bit#(9) p0q0PLUS1 = p0q0+1;
Bit#(8) p0q0_av = p0q0PLUS1[8:1];
if (!chroma_flag && a_p_test)
begin
Bit#(10) p_out1temp = zeroExtend(p[2]) + zeroExtend(p0q0_av) - (zeroExtend(p[1])<<1);
p_out[1] = p[1]+signExtend(clip3symmetric9to6(p_out1temp[9:1],t_c0));
end
else
p_out[1] = p[1];
if (!chroma_flag && a_q_test)
begin
Bit#(10) q_out1temp = zeroExtend(q[2]) + zeroExtend(p0q0_av) - (zeroExtend(q[1])<<1);
q_out[1] = q[1]+signExtend(clip3symmetric9to6(q_out1temp[9:1],t_c0));
end
else
q_out[1] = q[1];
p_out[2] = p[2];
q_out[2] = q[2];
end
else
begin
p_out[0] = p[0];
q_out[0] = q[0];
p_out[1] = p[1];
q_out[1] = q[1];
p_out[2] = p[2];
q_out[2] = q[2];
end
p_out[3] = p[3];
q_out[3] = q[3];
return({q_out[3], q_out[2], q_out[1], q_out[0], p_out[0], p_out[1], p_out[2], p_out[3]});
endfunction
 
 
 
//-----------------------------------------------------------
// Deblocking Filter Module
//-----------------------------------------------------------
 
 
(* synthesize *)
module mkDeblockFilter( IDeblockFilter );
 
FIFO#(EntropyDecOT) infifo <- mkSizedFIFO(deblockFilter_infifo_size);
FIFO#(DeblockFilterOT) outfifo <- mkFIFO();
 
FIFO#(MemReq#(TAdd#(PicWidthSz,5),32)) dataMemReqQ <- mkFIFO;
FIFO#(MemReq#(PicWidthSz,13)) parameterMemReqQ <- mkFIFO;
FIFO#(MemResp#(32)) dataMemRespQ <- mkFIFO;
FIFO#(MemResp#(13)) parameterMemRespQ <- mkFIFO;
 
Reg#(Process) process <- mkReg(Passing);
Reg#(Bit#(1)) chromaFlag <- mkReg(0);
Reg#(Bit#(5)) dataReqCount <- mkReg(0);
Reg#(Bit#(5)) dataRespCount <- mkReg(0);
Reg#(Bit#(4)) blockNum <- mkReg(0);
Reg#(Bit#(4)) pixelNum <- mkReg(0);
 
Reg#(Bool) filterTopMbEdgeFlag <- mkReg(False);
Reg#(Bool) filterLeftMbEdgeFlag <- mkReg(False);
Reg#(Bool) filterInternalEdgesFlag <- mkReg(False);
 
Reg#(Bit#(PicWidthSz)) picWidth <- mkReg(maxPicWidthInMB);
Reg#(Bit#(PicHeightSz)) picHeight <- mkReg(0);
Reg#(Bit#(PicAreaSz)) firstMb <- mkReg(0);
Reg#(Bit#(PicAreaSz)) currMb <- mkReg(0);
Reg#(Bit#(PicAreaSz)) currMbHor <- mkReg(0);//horizontal position of currMb
Reg#(Bit#(PicHeightSz)) currMbVer <- mkReg(0);//vertical position of currMb
 
Reg#(Bit#(2)) disable_deblocking_filter_idc <- mkReg(0);
Reg#(Bit#(5)) slice_alpha_c0_offset <- mkReg(0);
Reg#(Bit#(5)) slice_beta_offset <- mkReg(0);
 
Reg#(Bit#(6)) curr_qpy <- mkReg(0);
Reg#(Bit#(6)) left_qpy <- mkReg(0);
Reg#(Bit#(6)) top_qpy <- mkReg(0);
Reg#(Bit#(6)) curr_qpc <- mkReg(0);
Reg#(Bit#(6)) left_qpc <- mkReg(0);
Reg#(Bit#(6)) top_qpc <- mkReg(0);
Reg#(Bit#(1)) curr_intra <- mkReg(0);
Reg#(Bit#(1)) left_intra <- mkReg(0);
Reg#(Bit#(1)) top_intra <- mkReg(0);
 
Reg#(Bit#(8)) alphaMbEdge <- mkReg(0);
Reg#(Bit#(8)) alphaInternal <- mkReg(0);
Reg#(Bit#(5)) betaMbEdge <- mkReg(0);
Reg#(Bit#(5)) betaInternal <- mkReg(0);
Reg#(Vector#(3,Bit#(5))) tc0MbEdge <- mkRegU();
Reg#(Vector#(3,Bit#(5))) tc0Internal <- mkRegU();
 
Bit#(8) alpha_table[52] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 4, 4, 5, 6,
7, 8, 9, 10, 12, 13, 15, 17, 20, 22,
25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
80, 90,101,113,127,144,162,182,203,226,
255,255};
Bit#(5) beta_table[52] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 2, 2, 2, 3,
3, 3, 3, 4, 4, 4, 6, 6, 7, 7,
8, 8, 9, 9, 10, 10, 11, 11, 12, 12,
13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
18, 18};
Bit#(5) tc0_table[52][3] = {{ 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
{ 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
{ 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 1 },
{ 0, 0, 1 }, { 0, 0, 1 }, { 0, 0, 1 }, { 0, 1, 1 }, { 0, 1, 1 }, { 1, 1, 1 },
{ 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 2 }, { 1, 1, 2 }, { 1, 1, 2 },
{ 1, 1, 2 }, { 1, 2, 3 }, { 1, 2, 3 }, { 2, 2, 3 }, { 2, 2, 4 }, { 2, 3, 4 },
{ 2, 3, 4 }, { 3, 3, 5 }, { 3, 4, 6 }, { 3, 4, 6 }, { 4, 5, 7 }, { 4, 5, 8 },
{ 4, 6, 9 }, { 5, 7,10 }, { 6, 8,11 }, { 6, 8,13 }, { 7,10,14 }, { 8,11,16 },
{ 9,12,18 }, {10,13,20 }, {11,15,23 }, {13,17,25 }};
 
Reg#(Vector#(64,Bit#(32))) workVector <- mkRegU();
Reg#(Vector#(96,Bit#(32))) leftVector <- mkRegU();
Reg#(Vector#(16,Bit#(32))) topVector <- mkRegU();
 
Reg#(Bool) startLastOutput <- mkReg(False);
Reg#(Bool) outputingFinished <- mkReg(False);
Reg#(Bit#(2)) colNum <- mkReg(0);
Reg#(Bit#(2)) rowNum <- mkReg(0);
 
RFile1#(Bit#(4),Tuple2#(Bit#(3),Bit#(3))) bSfile <- mkRFile1Full();
 
 
//-----------------------------------------------------------
// Rules
rule passing ( process matches Passing );
case (infifo.first()) matches
tagged NewUnit . xdata :
begin
infifo.deq();
outfifo.enq(EDOT infifo.first());
$display("ccl5newunit");
$display("ccl5rbspbyte %h", xdata);
end
tagged SPSpic_width_in_mbs .xdata :
begin
infifo.deq();
outfifo.enq(EDOT infifo.first());
picWidth <= xdata;
end
tagged SPSpic_height_in_map_units .xdata :
begin
infifo.deq();
outfifo.enq(EDOT infifo.first());
picHeight <= xdata;
end
tagged PPSdeblocking_filter_control_present_flag .xdata :
begin
infifo.deq();
if (xdata == 0)
begin
disable_deblocking_filter_idc <= 0;
slice_alpha_c0_offset <= 0;
slice_beta_offset <= 0;
end
end
tagged SHfirst_mb_in_slice .xdata :
begin
infifo.deq();
outfifo.enq(EDOT infifo.first());
firstMb <= xdata;
currMb <= xdata;
currMbHor <= xdata;
currMbVer <= 0;
end
tagged SHdisable_deblocking_filter_idc .xdata :
begin
infifo.deq();
disable_deblocking_filter_idc <= xdata;
end
tagged SHslice_alpha_c0_offset .xdata :
begin
infifo.deq();
slice_alpha_c0_offset <= xdata;
end
tagged SHslice_beta_offset .xdata :
begin
infifo.deq();
slice_beta_offset <= xdata;
end
tagged IBTmb_qp .xdata :
begin
infifo.deq();
curr_qpy <= xdata.qpy;
curr_qpc <= xdata.qpc;
end
tagged PBbS .xdata :
begin
process <= Initialize;
end
tagged PBoutput .xdata :
begin
$display( "ERROR Deblocking Filter: passing PBoutput");
end
tagged EndOfFile :
begin
infifo.deq();
outfifo.enq(EDOT infifo.first());
$display( "ccl5: EndOfFile reached");
//$finish(0);
end
default:
begin
infifo.deq();
outfifo.enq(EDOT infifo.first());
end
endcase
endrule
 
 
rule currMbHorUpdate( !(currMbHor<zeroExtend(picWidth)) );
Bit#(PicAreaSz) temp = zeroExtend(picWidth);
if((currMbHor >> 3) >= temp)
begin
currMbHor <= currMbHor - (temp << 3);
currMbVer <= currMbVer + 8;
end
else
begin
currMbHor <= currMbHor - temp;
currMbVer <= currMbVer + 1;
end
endrule
 
rule initialize ( process==Initialize && currMbHor<zeroExtend(picWidth) );
//$display( "TRACE Deblocking Filter: initialize %0d", currMb);
process <= Horizontal;
dataReqCount <= 1;
dataRespCount <= 1;
filterTopMbEdgeFlag <= !(currMb<zeroExtend(picWidth) || disable_deblocking_filter_idc==1 || (disable_deblocking_filter_idc==2 && currMb-firstMb<zeroExtend(picWidth)));
filterLeftMbEdgeFlag <= !(currMbHor==0 || disable_deblocking_filter_idc==1 || (disable_deblocking_filter_idc==2 && currMb==firstMb));
filterInternalEdgesFlag <= !(disable_deblocking_filter_idc==1);
blockNum <= 0;
pixelNum <= 0;
Bit#(6) curr_qp = (chromaFlag==0 ? curr_qpy : curr_qpc);
Bit#(6) left_qp = (chromaFlag==0 ? left_qpy : left_qpc);
Bit#(7) qpavtemp = zeroExtend(curr_qp)+zeroExtend(left_qp)+1;
Bit#(6) qpav = qpavtemp[6:1];
Bit#(8) indexAtemp = zeroExtend(qpav)+signExtend(slice_alpha_c0_offset);
Bit#(8) indexBtemp = zeroExtend(qpav)+signExtend(slice_beta_offset);
Bit#(6) indexA = (indexAtemp[7]==1 ? 0 : (indexAtemp[6:0]>51 ? 51 : indexAtemp[5:0]));
Bit#(6) indexB = (indexBtemp[7]==1 ? 0 : (indexBtemp[6:0]>51 ? 51 : indexBtemp[5:0]));
alphaMbEdge <= alpha_table[indexA];
betaMbEdge <= beta_table[indexB];
Vector#(3,Bit#(5)) tc0temp = arrayToVector(tc0_table[indexA]);
tc0MbEdge <= tc0temp;
endrule
 
 
rule dataSendReq ( dataReqCount>0 && currMbHor<zeroExtend(picWidth) );
//$display( "TRACE Deblocking Filter: dataSendReq %0d", dataReqCount);
Bit#(PicWidthSz) temp = truncate(currMbHor);
if(currMb<zeroExtend(picWidth))
dataReqCount <= 0;
else
begin
if(dataReqCount==1)
parameterMemReqQ.enq(LoadReq temp);
Bit#(4) temp2 = truncate(dataReqCount-1);
let temp3 = {temp,chromaFlag,temp2};
dataMemReqQ.enq(LoadReq temp3);
if(dataReqCount==16)
dataReqCount <= 0;
else
dataReqCount <= dataReqCount+1;
end
endrule
 
 
rule dataReceiveNoResp ( dataRespCount>0 && currMb<zeroExtend(picWidth) && currMb-firstMb<zeroExtend(picWidth) );
//$display( "TRACE Deblocking Filter: dataReceiveNoResp");
dataRespCount <= 0;
endrule
 
rule dataReceiveResp ( dataRespCount>0 && !(currMb<zeroExtend(picWidth)) && currMbHor<zeroExtend(picWidth) );
//$display( "TRACE Deblocking Filter: dataReceiveResp %0d", dataRespCount);
Bit#(4) temp = truncate(dataRespCount-1);
Vector#(16,Bit#(32)) topVectorNext = topVector;
if(dataRespCount==1)
begin
Bit#(13) tempParameters=0;
if(parameterMemRespQ.first() matches tagged LoadResp .xdata)
tempParameters = xdata;
top_qpy <= tempParameters[5:0];
top_qpc <= tempParameters[11:6];
top_intra <= tempParameters[12];
parameterMemRespQ.deq();
end
if(dataRespCount==16)
dataRespCount <= 0;
else
dataRespCount <= dataRespCount+1;
if(dataMemRespQ.first() matches tagged LoadResp .xdata)
topVectorNext[temp] = xdata;
dataMemRespQ.deq();
topVector <= topVectorNext;
//$display( "TRACE Deblocking Filter: dataReceiveResp topVector %h %h %h %h %h %h %h %h %h %h %h %h %h %h %h %h", topVector[0], topVector[1], topVector[2], topVector[3], topVector[4], topVector[5], topVector[6], topVector[7], topVector[8], topVector[9], topVector[10], topVector[11], topVector[12], topVector[13], topVector[14], topVector[15]);
endrule
 
 
rule horizontal ( process==Horizontal && currMbHor<zeroExtend(picWidth) );
//$display( "TRACE Deblocking Filter: horizontal %0d %0d %0d", blockNum, pixelNum, infifo.first());
Bit#(2) blockHor = {blockNum[2],blockNum[0]};
Bit#(2) blockVer = {blockNum[3],blockNum[1]};
Bit#(2) pixelVer = {pixelNum[3],pixelNum[2]};
Vector#(96,Bit#(32)) leftVectorNext = leftVector;
Vector#(64,Bit#(32)) workVectorNext = workVector;
Bool leftEdge = (blockNum[0]==0 && (blockNum[2]==0 || chromaFlag==1));
if(blockNum==0 && pixelNum==0)
begin
Bit#(6) qpav = (chromaFlag==0 ? curr_qpy : curr_qpc);
Bit#(8) indexAtemp = zeroExtend(qpav)+signExtend(slice_alpha_c0_offset);
Bit#(8) indexBtemp = zeroExtend(qpav)+signExtend(slice_beta_offset);
Bit#(6) indexA = (indexAtemp[7]==1 ? 0 : (indexAtemp[6:0]>51 ? 51 : indexAtemp[5:0]));
Bit#(6) indexB = (indexBtemp[7]==1 ? 0 : (indexBtemp[6:0]>51 ? 51 : indexBtemp[5:0]));
alphaInternal <= alpha_table[indexA];
betaInternal <= beta_table[indexB];
Vector#(3,Bit#(5)) tc0temp = arrayToVector(tc0_table[indexA]);
tc0Internal <= tc0temp;
end
case (infifo.first()) matches
tagged PBbS .xdata :
begin
infifo.deq();
bSfile.upd(blockNum,tuple2(xdata.bShor,xdata.bSver));
end
tagged PBoutput .xdata :
begin
infifo.deq();
Bit#(6) addrq = {blockHor,blockVer,pixelVer};
Bit#(7) addrpLeft = (chromaFlag==0 ? {3'b011,blockVer,pixelVer} : {2'b10,blockHor[1],1'b1,blockVer[0],pixelVer});
Bit#(6) addrpCurr = {(blockHor-1),blockVer,pixelVer};
Bit#(32) pixelq = {xdata[3],xdata[2],xdata[1],xdata[0]};
Bit#(32) pixelp;
if(leftEdge)
pixelp = leftVector[addrpLeft];
else
pixelp = workVector[addrpCurr];
Bit#(64) result = {pixelq,pixelp};
if(leftEdge && filterLeftMbEdgeFlag)
begin
if(filter_test({pixelq[15:0],pixelp[31:16]},alphaMbEdge,betaMbEdge))
result = filter_input({pixelq,pixelp},chromaFlag==1,tpl_1(bSfile.sub((chromaFlag==0?blockNum:{blockNum[1:0],pixelVer[1],1'b0}))),alphaMbEdge,betaMbEdge,tc0MbEdge);
end
else if(!leftEdge && filterInternalEdgesFlag)
begin
if(filter_test({pixelq[15:0],pixelp[31:16]},alphaInternal,betaInternal))
result = filter_input({pixelq,pixelp},chromaFlag==1,tpl_1(bSfile.sub((chromaFlag==0?blockNum:{blockNum[1:0],pixelVer[1],1'b0}))),alphaInternal,betaInternal,tc0Internal);
end
if(leftEdge)
leftVectorNext[addrpLeft] = result[31:0];
else
workVectorNext[addrpCurr] = result[31:0];
workVectorNext[addrq] = result[63:32];
leftVector <= leftVectorNext;
workVector <= workVectorNext;
if(pixelNum==12 && (blockNum==15 || (blockNum==7 && chromaFlag==1)))
begin
blockNum <= 0;
process <= Vertical;
startLastOutput <= False;
outputingFinished <= False;
colNum <= 0;
if(filterTopMbEdgeFlag)
rowNum <= 0;
else
rowNum <= 1;
Bit#(6) curr_qp = (chromaFlag==0 ? curr_qpy : curr_qpc);
Bit#(6) top_qp = (chromaFlag==0 ? top_qpy : top_qpc);
Bit#(7) qpavtemp = zeroExtend(curr_qp)+zeroExtend(top_qp)+1;
Bit#(6) qpav = qpavtemp[6:1];
Bit#(8) indexAtemp = zeroExtend(qpav)+signExtend(slice_alpha_c0_offset);
Bit#(8) indexBtemp = zeroExtend(qpav)+signExtend(slice_beta_offset);
Bit#(6) indexA = (indexAtemp[7]==1 ? 0 : (indexAtemp[6:0]>51 ? 51 : indexAtemp[5:0]));
Bit#(6) indexB = (indexBtemp[7]==1 ? 0 : (indexBtemp[6:0]>51 ? 51 : indexBtemp[5:0]));
alphaMbEdge <= alpha_table[indexA];
betaMbEdge <= beta_table[indexB];
Vector#(3,Bit#(5)) tc0temp = arrayToVector(tc0_table[indexA]);
tc0MbEdge <= tc0temp;
end
else if(pixelNum==12)
blockNum <= blockNum+1;
pixelNum <= pixelNum+4;
end
//default: $display( "ERROR Deblocking Filter: horizontal non-PBoutput input");
endcase
endrule
 
 
rule vertical ( process==Vertical && !startLastOutput && dataRespCount==0 && currMbHor<zeroExtend(picWidth) );
//$display( "TRACE Deblocking Filter: vertical %0d %0d", colNum, rowNum);
//$display( "TRACE Deblocking Filter: vertical topVector %h %h %h %h %h %h %h %h %h %h %h %h %h %h %h %h", topVector[0], topVector[1], topVector[2], topVector[3], topVector[4], topVector[5], topVector[6], topVector[7], topVector[8], topVector[9], topVector[10], topVector[11], topVector[12], topVector[13], topVector[14], topVector[15]);
Bool topEdge = (rowNum==0);
Vector#(64,Bit#(32)) workVectorNext = workVector;
Vector#(16,Bit#(32)) topVectorNext = topVector;
Vector#(64,Bit#(32)) workV = workVector;
Vector#(4,Bit#(32)) tempV = replicate(0);
Vector#(4,Bit#(64)) resultV = replicate(0);
Bit#(8) alpha;
Bit#(5) beta;
Vector#(3,Bit#(5)) tc0;
Bit#(4) crNum = {colNum,rowNum};
if(topEdge)
begin
tempV[0] = topVector[{colNum,2'b00}];
tempV[1] = topVector[{colNum,2'b01}];
tempV[2] = topVector[{colNum,2'b10}];
tempV[3] = topVector[{colNum,2'b11}];
alpha = alphaMbEdge;
beta = betaMbEdge;
tc0 = tc0MbEdge;
end
else
begin
tempV[0] = workV[{(crNum-1),2'b00}];
tempV[1] = workV[{(crNum-1),2'b01}];
tempV[2] = workV[{(crNum-1),2'b10}];
tempV[3] = workV[{(crNum-1),2'b11}];
alpha = alphaInternal;
beta = betaInternal;
tc0 = tc0Internal;
end
resultV[0] = {workV[{crNum,2'b11}][7:0],workV[{crNum,2'b10}][7:0],workV[{crNum,2'b01}][7:0],workV[{crNum,2'b00}][7:0],tempV[3][7:0],tempV[2][7:0],tempV[1][7:0],tempV[0][7:0]};
resultV[1] = {workV[{crNum,2'b11}][15:8],workV[{crNum,2'b10}][15:8],workV[{crNum,2'b01}][15:8],workV[{crNum,2'b00}][15:8],tempV[3][15:8],tempV[2][15:8],tempV[1][15:8],tempV[0][15:8]};
resultV[2] = {workV[{crNum,2'b11}][23:16],workV[{crNum,2'b10}][23:16],workV[{crNum,2'b01}][23:16],workV[{crNum,2'b00}][23:16],tempV[3][23:16],tempV[2][23:16],tempV[1][23:16],tempV[0][23:16]};
resultV[3] = {workV[{crNum,2'b11}][31:24],workV[{crNum,2'b10}][31:24],workV[{crNum,2'b01}][31:24],workV[{crNum,2'b00}][31:24],tempV[3][31:24],tempV[2][31:24],tempV[1][31:24],tempV[0][31:24]};
if(filter_test({workV[{crNum,2'b01}][7:0],workV[{crNum,2'b00}][7:0],tempV[3][7:0],tempV[2][7:0]},alpha,beta))
resultV[0] = filter_input(resultV[0],chromaFlag==1,tpl_2(bSfile.sub((chromaFlag==0?{rowNum[1],colNum[1],rowNum[0],colNum[0]}:{rowNum[0],colNum[0],2'b00}))),alpha,beta,tc0);
if(filter_test({workV[{crNum,2'b01}][15:8],workV[{crNum,2'b00}][15:8],tempV[3][15:8],tempV[2][15:8]},alpha,beta))
resultV[1] = filter_input(resultV[1],chromaFlag==1,tpl_2(bSfile.sub((chromaFlag==0?{rowNum[1],colNum[1],rowNum[0],colNum[0]}:{rowNum[0],colNum[0],2'b00}))),alpha,beta,tc0);
if(filter_test({workV[{crNum,2'b01}][23:16],workV[{crNum,2'b00}][23:16],tempV[3][23:16],tempV[2][23:16]},alpha,beta))
resultV[2] = filter_input(resultV[2],chromaFlag==1,tpl_2(bSfile.sub((chromaFlag==0?{rowNum[1],colNum[1],rowNum[0],colNum[0]}:{rowNum[0],colNum[0],2'b01}))),alpha,beta,tc0);
if(filter_test({workV[{crNum,2'b01}][31:24],workV[{crNum,2'b00}][31:24],tempV[3][31:24],tempV[2][31:24]},alpha,beta))
resultV[3] = filter_input(resultV[3],chromaFlag==1,tpl_2(bSfile.sub((chromaFlag==0?{rowNum[1],colNum[1],rowNum[0],colNum[0]}:{rowNum[0],colNum[0],2'b01}))),alpha,beta,tc0);
if(topEdge)
begin
topVectorNext[{colNum,2'b00}] = {resultV[3][7:0],resultV[2][7:0],resultV[1][7:0],resultV[0][7:0]};
topVectorNext[{colNum,2'b01}] = {resultV[3][15:8],resultV[2][15:8],resultV[1][15:8],resultV[0][15:8]};
topVectorNext[{colNum,2'b10}] = {resultV[3][23:16],resultV[2][23:16],resultV[1][23:16],resultV[0][23:16]};
topVectorNext[{colNum,2'b11}] = {resultV[3][31:24],resultV[2][31:24],resultV[1][31:24],resultV[0][31:24]};
end
else
begin
workVectorNext[{(crNum-1),2'b00}] = {resultV[3][7:0],resultV[2][7:0],resultV[1][7:0],resultV[0][7:0]};
workVectorNext[{(crNum-1),2'b01}] = {resultV[3][15:8],resultV[2][15:8],resultV[1][15:8],resultV[0][15:8]};
workVectorNext[{(crNum-1),2'b10}] = {resultV[3][23:16],resultV[2][23:16],resultV[1][23:16],resultV[0][23:16]};
workVectorNext[{(crNum-1),2'b11}] = {resultV[3][31:24],resultV[2][31:24],resultV[1][31:24],resultV[0][31:24]};
end
workVectorNext[{crNum,2'b00}] = {resultV[3][39:32],resultV[2][39:32],resultV[1][39:32],resultV[0][39:32]};
workVectorNext[{crNum,2'b01}] = {resultV[3][47:40],resultV[2][47:40],resultV[1][47:40],resultV[0][47:40]};
workVectorNext[{crNum,2'b10}] = {resultV[3][55:48],resultV[2][55:48],resultV[1][55:48],resultV[0][55:48]};
workVectorNext[{crNum,2'b11}] = {resultV[3][63:56],resultV[2][63:56],resultV[1][63:56],resultV[0][63:56]};
if(topEdge)
topVector <= topVectorNext;
workVector <= workVectorNext;
if(rowNum==3 || (chromaFlag==1 && rowNum==1))
begin
if(colNum==3)
startLastOutput <= True;
else
begin
if(filterTopMbEdgeFlag)
rowNum <= 0;
else
rowNum <= 1;
end
colNum <= colNum+1;
end
else
rowNum <= rowNum+1;
endrule
 
 
rule outputing ( process==Vertical && !outputingFinished && currMbHor<zeroExtend(picWidth) );
//$display( "TRACE Deblocking Filter: outputting %0d %0d", blockNum, pixelNum);
Bit#(2) blockHor = pixelNum[1:0];
Bit#(2) blockVer = blockNum[1:0];
Bit#(2) pixelVer = pixelNum[3:2];
Bit#(PicWidthSz) currMbHorT = truncate(currMbHor);
Bool stalling = False;
if(currMb==0)
begin
if(startLastOutput)
outputingFinished <= True;
end
else
begin
Bit#(7) leftAddr;
if(chromaFlag==0)
leftAddr = {1'b0,blockHor,blockVer,pixelVer};
else
leftAddr = {2'b10,blockHor,blockVer[0],pixelVer};
Bit#(32) leftData = leftVector[leftAddr];
if(!(blockNum==3 || (blockNum==1 && chromaFlag==1)))
begin
if(chromaFlag==0)
outfifo.enq(DFBLuma {ver:{(currMbHorT==0 ? currMbVer-1 : currMbVer),blockVer,pixelVer},hor:{(currMbHorT==0 ? picWidth-1 : currMbHorT-1),blockHor},data:leftData});
else
outfifo.enq(DFBChroma {uv:blockHor[1],ver:{(currMbHorT==0 ? currMbVer-1 : currMbVer),blockVer[0],pixelVer},hor:{(currMbHorT==0 ? picWidth-1 : currMbHorT-1),blockHor[0]},data:leftData});
end
else if(startLastOutput)
begin
Bit#(PicWidthSz) temp = ((currMbHor==0) ? (picWidth-1) : truncate(currMbHor-1));
dataMemReqQ.enq(StoreReq {addr:{temp,chromaFlag,blockHor,pixelVer},data:leftData});
if(currMbVer > 0)
begin
//$display( "TRACE Deblocking Filter: outputting last output %0d %0d %h", blockHor, pixelVer, topVector[{blockHor,pixelVer}]);
Bit#(32) topData = topVector[{blockHor,pixelVer}];
if(chromaFlag==0)
outfifo.enq(DFBLuma {ver:{currMbVer-1,2'b11,pixelVer},hor:{currMbHorT,blockHor},data:topData});
else
outfifo.enq(DFBChroma {uv:blockHor[1],ver:{currMbVer-1,1'b1,pixelVer},hor:{currMbHorT,blockHor[0]},data:topData});
end
end
else
stalling = True;
if(!stalling)
begin
if(pixelNum==15)
begin
if(blockNum==3 || (chromaFlag==1 && blockNum==1))
begin
if(currMbVer==picHeight-1)
blockNum <= (chromaFlag==0 ? 3 : 1);
else
blockNum <= 0;
outputingFinished <= True;
end
else
blockNum <= blockNum+1;
end
pixelNum <= pixelNum+1;
end
end
endrule
 
 
rule verticaltocleanup ( process==Vertical && startLastOutput && outputingFinished);
process <= Cleanup;
startLastOutput <= False;
outputingFinished <= False;
endrule
 
 
rule cleanup ( process==Cleanup && currMbHor<zeroExtend(picWidth) );
//$display( "TRACE Deblocking Filter: cleanup %0d %0d", blockNum, pixelNum);
Bit#(2) blockHor = pixelNum[1:0];
Bit#(2) blockVer = blockNum[1:0];
Bit#(2) pixelVer = pixelNum[3:2];
Bit#(PicWidthSz) currMbHorT = truncate(currMbHor);
Vector#(96,Bit#(32)) leftVectorNext = leftVector;
if(blockNum==0)
begin
if(chromaFlag==0)
begin
for(Integer ii=0; ii<64; ii=ii+1)
leftVectorNext[fromInteger(ii)] = workVector[fromInteger(ii)];
chromaFlag <= 1;
process <= Initialize;
end
else
begin
for(Integer ii=0; ii<32; ii=ii+1)
begin
Bit#(5) tempAddr = fromInteger(ii);
leftVectorNext[{2'b10,tempAddr}] = workVector[{tempAddr[4:3],1'b0,tempAddr[2:0]}];
end
chromaFlag <= 0;
process <= Passing;
Bit#(PicWidthSz) temp = truncate(currMbHor);
parameterMemReqQ.enq(StoreReq {addr:temp,data:{curr_intra,curr_qpc,curr_qpy}});
left_intra <= curr_intra;
left_qpc <= curr_qpc;
left_qpy <= curr_qpy;
currMb <= currMb+1;
currMbHor <= currMbHor+1;
if(currMbVer==picHeight-1 && currMbHor==zeroExtend(picWidth-1))
outfifo.enq(EndOfFrame);
end
leftVector <= leftVectorNext;
end
else if(blockNum < 8)
begin
Bit#(7) leftAddr;
if(chromaFlag==0)
leftAddr = {1'b0,blockHor,blockVer,pixelVer};
else
leftAddr = {2'b10,blockHor,blockVer[0],pixelVer};
Bit#(32) leftData = leftVector[leftAddr];
if(chromaFlag==0)
outfifo.enq(DFBLuma {ver:{(currMbHorT==0 ? currMbVer-1 : currMbVer),blockVer,pixelVer},hor:{(currMbHorT==0 ? picWidth-1 : currMbHorT-1),blockHor},data:leftData});
else
outfifo.enq(DFBChroma {uv:blockHor[1],ver:{(currMbHorT==0 ? currMbVer-1 : currMbVer),blockVer[0],pixelVer},hor:{(currMbHorT==0 ? picWidth-1 : currMbHorT-1),blockHor[0]},data:leftData});
if(pixelNum==15)
begin
if(currMbHor==zeroExtend(picWidth-1))
blockNum <= 8;
else
blockNum <= 0;
end
pixelNum <= pixelNum+1;
end
else
begin
Bit#(6) currAddr = {blockHor,blockVer,pixelVer};
Bit#(32) currData = workVector[currAddr];
if(chromaFlag==0)
outfifo.enq(DFBLuma {ver:{currMbVer,blockVer,pixelVer},hor:{currMbHorT,blockHor},data:currData});
else
outfifo.enq(DFBChroma {uv:blockHor[1],ver:{currMbVer,blockVer[0],pixelVer},hor:{currMbHorT,blockHor[0]},data:currData});
if(pixelNum==15)
begin
if(blockNum[1:0]==3 || (blockNum[1:0]==1 && chromaFlag==1))
blockNum <= 0;
else
blockNum <= blockNum+1;
end
pixelNum <= pixelNum+1;
end
endrule
 
 
 
interface Client mem_client_data;
interface Get request = fifoToGet(dataMemReqQ);
interface Put response = fifoToPut(dataMemRespQ);
endinterface
 
interface Client mem_client_parameter;
interface Get request = fifoToGet(parameterMemReqQ);
interface Put response = fifoToPut(parameterMemRespQ);
endinterface
 
interface Put ioin = fifoToPut(infifo);
interface Get ioout = fifoToGet(outfifo);
endmodule
 
endpackage
/trunk/src/IBufferControl.bsv
0,0 → 1,31
//**********************************************************************
// Interface for Buffer Controller
//----------------------------------------------------------------------
//
//
//
 
package IBufferControl;
 
import H264Types::*;
import GetPut::*;
import ClientServer::*;
 
interface IBufferControl;
 
// Interface for inter-module io
interface Put#(DeblockFilterOT) ioin;
interface Get#(BufferControlOT) ioout;
 
// Interface for module to frame buffer
interface Client#(FrameBufferLoadReq,FrameBufferLoadResp) buffer_client_load1;
interface Client#(FrameBufferLoadReq,FrameBufferLoadResp) buffer_client_load2;
interface Get#(FrameBufferStoreReq) buffer_client_store;
 
// Interface for module to interpolation
interface Server#(InterpolatorLoadReq,InterpolatorLoadResp) inter_server;
 
endinterface
 
endpackage
 
/trunk/src/IEntropyDec.bsv
0,0 → 1,27
//**********************************************************************
// Interface for Entropy Decoder
//----------------------------------------------------------------------
//
//
//
 
package IEntropyDec;
 
import H264Types::*;
import GetPut::*;
import ClientServer::*;
 
interface IEntropyDec;
 
// Interface for inter-module io
interface Put#(NalUnwrapOT) ioin;
interface Get#(EntropyDecOT) ioout;
interface Get#(EntropyDecOT_InverseTrans) ioout_InverseTrans;
 
// Interface for module to memory
interface Client#(MemReq#(TAdd#(PicWidthSz,1),20),MemResp#(20)) mem_client;
 
endinterface
 
endpackage
 
/trunk/src/CAVLC.bsv
0,0 → 1,651
//**********************************************************************
// CAVLC codes
//----------------------------------------------------------------------
//
//
//
 
package CAVLC;
 
import H264Types::*;
 
//-----------------------------------------------------------
// Helper functions
 
 
(* noinline *)
function Tuple3#(Bit#(2),Bit#(5),Bufcount) cavlc_coeff_token( Buffer inbuffer, Bit#(6) nC );
if(nC[5] == 1)
begin
Bit#(8) buffertemp = inbuffer[buffersize-1:buffersize-8];
if(buffertemp[7:6] == 2'b01) return tuple3(0,0,2);
else if(buffertemp[7:2] == 6'b000111) return tuple3(0,1,6);
else if(buffertemp[7:7] == 1'b1) return tuple3(1,1,1);
else if(buffertemp[7:2] == 6'b000100) return tuple3(0,2,6);
else if(buffertemp[7:2] == 6'b000110) return tuple3(1,2,6);
else if(buffertemp[7:5] == 3'b001) return tuple3(2,2,3);
else if(buffertemp[7:2] == 6'b000011) return tuple3(0,3,6);
else if(buffertemp[7:1] == 7'b0000011) return tuple3(1,3,7);
else if(buffertemp[7:1] == 7'b0000010) return tuple3(2,3,7);
else if(buffertemp[7:2] == 6'b000101) return tuple3(3,3,6);
else if(buffertemp[7:2] == 6'b000010) return tuple3(0,4,6);
else if(buffertemp[7:0] == 8'b00000011) return tuple3(1,4,8);
else if(buffertemp[7:0] == 8'b00000010) return tuple3(2,4,8);
else if(buffertemp[7:1] == 7'b0000000) return tuple3(3,4,7);
else return tuple3(0,0,100);
end
else if(nC[4] == 1 || nC[3] == 1)
begin
Bit#(6) buffertemp = inbuffer[buffersize-1:buffersize-6];
if(buffertemp[5:0] == 6'b000011) return tuple3(0,0,6);
else if(buffertemp[5:0] == 6'b000000) return tuple3(0,1,6);
else if(buffertemp[5:0] == 6'b000001) return tuple3(1,1,6);
else if(buffertemp[5:0] == 6'b000100) return tuple3(0,2,6);
else if(buffertemp[5:0] == 6'b000101) return tuple3(1,2,6);
else if(buffertemp[5:0] == 6'b000110) return tuple3(2,2,6);
else if(buffertemp[5:0] == 6'b001000) return tuple3(0,3,6);
else if(buffertemp[5:0] == 6'b001001) return tuple3(1,3,6);
else if(buffertemp[5:0] == 6'b001010) return tuple3(2,3,6);
else if(buffertemp[5:0] == 6'b001011) return tuple3(3,3,6);
else if(buffertemp[5:0] == 6'b001100) return tuple3(0,4,6);
else if(buffertemp[5:0] == 6'b001101) return tuple3(1,4,6);
else if(buffertemp[5:0] == 6'b001110) return tuple3(2,4,6);
else if(buffertemp[5:0] == 6'b001111) return tuple3(3,4,6);
else if(buffertemp[5:0] == 6'b010000) return tuple3(0,5,6);
else if(buffertemp[5:0] == 6'b010001) return tuple3(1,5,6);
else if(buffertemp[5:0] == 6'b010010) return tuple3(2,5,6);
else if(buffertemp[5:0] == 6'b010011) return tuple3(3,5,6);
else if(buffertemp[5:0] == 6'b010100) return tuple3(0,6,6);
else if(buffertemp[5:0] == 6'b010101) return tuple3(1,6,6);
else if(buffertemp[5:0] == 6'b010110) return tuple3(2,6,6);
else if(buffertemp[5:0] == 6'b010111) return tuple3(3,6,6);
else if(buffertemp[5:0] == 6'b011000) return tuple3(0,7,6);
else if(buffertemp[5:0] == 6'b011001) return tuple3(1,7,6);
else if(buffertemp[5:0] == 6'b011010) return tuple3(2,7,6);
else if(buffertemp[5:0] == 6'b011011) return tuple3(3,7,6);
else if(buffertemp[5:0] == 6'b011100) return tuple3(0,8,6);
else if(buffertemp[5:0] == 6'b011101) return tuple3(1,8,6);
else if(buffertemp[5:0] == 6'b011110) return tuple3(2,8,6);
else if(buffertemp[5:0] == 6'b011111) return tuple3(3,8,6);
else if(buffertemp[5:0] == 6'b100000) return tuple3(0,9,6);
else if(buffertemp[5:0] == 6'b100001) return tuple3(1,9,6);
else if(buffertemp[5:0] == 6'b100010) return tuple3(2,9,6);
else if(buffertemp[5:0] == 6'b100011) return tuple3(3,9,6);
else if(buffertemp[5:0] == 6'b100100) return tuple3(0,10,6);
else if(buffertemp[5:0] == 6'b100101) return tuple3(1,10,6);
else if(buffertemp[5:0] == 6'b100110) return tuple3(2,10,6);
else if(buffertemp[5:0] == 6'b100111) return tuple3(3,10,6);
else if(buffertemp[5:0] == 6'b101000) return tuple3(0,11,6);
else if(buffertemp[5:0] == 6'b101001) return tuple3(1,11,6);
else if(buffertemp[5:0] == 6'b101010) return tuple3(2,11,6);
else if(buffertemp[5:0] == 6'b101011) return tuple3(3,11,6);
else if(buffertemp[5:0] == 6'b101100) return tuple3(0,12,6);
else if(buffertemp[5:0] == 6'b101101) return tuple3(1,12,6);
else if(buffertemp[5:0] == 6'b101110) return tuple3(2,12,6);
else if(buffertemp[5:0] == 6'b101111) return tuple3(3,12,6);
else if(buffertemp[5:0] == 6'b110000) return tuple3(0,13,6);
else if(buffertemp[5:0] == 6'b110001) return tuple3(1,13,6);
else if(buffertemp[5:0] == 6'b110010) return tuple3(2,13,6);
else if(buffertemp[5:0] == 6'b110011) return tuple3(3,13,6);
else if(buffertemp[5:0] == 6'b110100) return tuple3(0,14,6);
else if(buffertemp[5:0] == 6'b110101) return tuple3(1,14,6);
else if(buffertemp[5:0] == 6'b110110) return tuple3(2,14,6);
else if(buffertemp[5:0] == 6'b110111) return tuple3(3,14,6);
else if(buffertemp[5:0] == 6'b111000) return tuple3(0,15,6);
else if(buffertemp[5:0] == 6'b111001) return tuple3(1,15,6);
else if(buffertemp[5:0] == 6'b111010) return tuple3(2,15,6);
else if(buffertemp[5:0] == 6'b111011) return tuple3(3,15,6);
else if(buffertemp[5:0] == 6'b111100) return tuple3(0,16,6);
else if(buffertemp[5:0] == 6'b111101) return tuple3(1,16,6);
else if(buffertemp[5:0] == 6'b111110) return tuple3(2,16,6);
else if(buffertemp[5:0] == 6'b111111) return tuple3(3,16,6);
else return tuple3(0,0,100);
end
else if(nC[2] == 1)
begin
Bit#(10) buffertemp = inbuffer[buffersize-1:buffersize-10];
if(buffertemp[9:6] == 4'b1111) return tuple3(0,0,4);
else if(buffertemp[9:4] == 6'b001111) return tuple3(0,1,6);
else if(buffertemp[9:6] == 4'b1110) return tuple3(1,1,4);
else if(buffertemp[9:4] == 6'b001011) return tuple3(0,2,6);
else if(buffertemp[9:5] == 5'b01111) return tuple3(1,2,5);
else if(buffertemp[9:6] == 4'b1101) return tuple3(2,2,4);
else if(buffertemp[9:4] == 6'b001000) return tuple3(0,3,6);
else if(buffertemp[9:5] == 5'b01100) return tuple3(1,3,5);
else if(buffertemp[9:5] == 5'b01110) return tuple3(2,3,5);
else if(buffertemp[9:6] == 4'b1100) return tuple3(3,3,4);
else if(buffertemp[9:3] == 7'b0001111) return tuple3(0,4,7);
else if(buffertemp[9:5] == 5'b01010) return tuple3(1,4,5);
else if(buffertemp[9:5] == 5'b01011) return tuple3(2,4,5);
else if(buffertemp[9:6] == 4'b1011) return tuple3(3,4,4);
else if(buffertemp[9:3] == 7'b0001011) return tuple3(0,5,7);
else if(buffertemp[9:5] == 5'b01000) return tuple3(1,5,5);
else if(buffertemp[9:5] == 5'b01001) return tuple3(2,5,5);
else if(buffertemp[9:6] == 4'b1010) return tuple3(3,5,4);
else if(buffertemp[9:3] == 7'b0001001) return tuple3(0,6,7);
else if(buffertemp[9:4] == 6'b001110) return tuple3(1,6,6);
else if(buffertemp[9:4] == 6'b001101) return tuple3(2,6,6);
else if(buffertemp[9:6] == 4'b1001) return tuple3(3,6,4);
else if(buffertemp[9:3] == 7'b0001000) return tuple3(0,7,7);
else if(buffertemp[9:4] == 6'b001010) return tuple3(1,7,6);
else if(buffertemp[9:4] == 6'b001001) return tuple3(2,7,6);
else if(buffertemp[9:6] == 4'b1000) return tuple3(3,7,4);
else if(buffertemp[9:2] == 8'b00001111) return tuple3(0,8,8);
else if(buffertemp[9:3] == 7'b0001110) return tuple3(1,8,7);
else if(buffertemp[9:3] == 7'b0001101) return tuple3(2,8,7);
else if(buffertemp[9:5] == 5'b01101) return tuple3(3,8,5);
else if(buffertemp[9:2] == 8'b00001011) return tuple3(0,9,8);
else if(buffertemp[9:2] == 8'b00001110) return tuple3(1,9,8);
else if(buffertemp[9:3] == 7'b0001010) return tuple3(2,9,7);
else if(buffertemp[9:4] == 6'b001100) return tuple3(3,9,6);
else if(buffertemp[9:1] == 9'b000001111) return tuple3(0,10,9);
else if(buffertemp[9:2] == 8'b00001010) return tuple3(1,10,8);
else if(buffertemp[9:2] == 8'b00001101) return tuple3(2,10,8);
else if(buffertemp[9:3] == 7'b0001100) return tuple3(3,10,7);
else if(buffertemp[9:1] == 9'b000001011) return tuple3(0,11,9);
else if(buffertemp[9:1] == 9'b000001110) return tuple3(1,11,9);
else if(buffertemp[9:2] == 8'b00001001) return tuple3(2,11,8);
else if(buffertemp[9:2] == 8'b00001100) return tuple3(3,11,8);
else if(buffertemp[9:1] == 9'b000001000) return tuple3(0,12,9);
else if(buffertemp[9:1] == 9'b000001010) return tuple3(1,12,9);
else if(buffertemp[9:1] == 9'b000001101) return tuple3(2,12,9);
else if(buffertemp[9:2] == 8'b00001000) return tuple3(3,12,8);
else if(buffertemp[9:0] == 10'b0000001101) return tuple3(0,13,10);
else if(buffertemp[9:1] == 9'b000000111) return tuple3(1,13,9);
else if(buffertemp[9:1] == 9'b000001001) return tuple3(2,13,9);
else if(buffertemp[9:1] == 9'b000001100) return tuple3(3,13,9);
else if(buffertemp[9:0] == 10'b0000001001) return tuple3(0,14,10);
else if(buffertemp[9:0] == 10'b0000001100) return tuple3(1,14,10);
else if(buffertemp[9:0] == 10'b0000001011) return tuple3(2,14,10);
else if(buffertemp[9:0] == 10'b0000001010) return tuple3(3,14,10);
else if(buffertemp[9:0] == 10'b0000000101) return tuple3(0,15,10);
else if(buffertemp[9:0] == 10'b0000001000) return tuple3(1,15,10);
else if(buffertemp[9:0] == 10'b0000000111) return tuple3(2,15,10);
else if(buffertemp[9:0] == 10'b0000000110) return tuple3(3,15,10);
else if(buffertemp[9:0] == 10'b0000000001) return tuple3(0,16,10);
else if(buffertemp[9:0] == 10'b0000000100) return tuple3(1,16,10);
else if(buffertemp[9:0] == 10'b0000000011) return tuple3(2,16,10);
else if(buffertemp[9:0] == 10'b0000000010) return tuple3(3,16,10);
else return tuple3(0,0,100);
end
else if(nC[1] == 1)
begin
Bit#(14) buffertemp = inbuffer[buffersize-1:buffersize-14];
if(buffertemp[13:12] == 2'b11) return tuple3(0,0,2);
else if(buffertemp[13:8] == 6'b001011) return tuple3(0,1,6);
else if(buffertemp[13:12] == 2'b10) return tuple3(1,1,2);
else if(buffertemp[13:8] == 6'b000111) return tuple3(0,2,6);
else if(buffertemp[13:9] == 5'b00111) return tuple3(1,2,5);
else if(buffertemp[13:11] == 3'b011) return tuple3(2,2,3);
else if(buffertemp[13:7] == 7'b0000111) return tuple3(0,3,7);
else if(buffertemp[13:8] == 6'b001010) return tuple3(1,3,6);
else if(buffertemp[13:8] == 6'b001001) return tuple3(2,3,6);
else if(buffertemp[13:10] == 4'b0101) return tuple3(3,3,4);
else if(buffertemp[13:6] == 8'b00000111) return tuple3(0,4,8);
else if(buffertemp[13:8] == 6'b000110) return tuple3(1,4,6);
else if(buffertemp[13:8] == 6'b000101) return tuple3(2,4,6);
else if(buffertemp[13:10] == 4'b0100) return tuple3(3,4,4);
else if(buffertemp[13:6] == 8'b00000100) return tuple3(0,5,8);
else if(buffertemp[13:7] == 7'b0000110) return tuple3(1,5,7);
else if(buffertemp[13:7] == 7'b0000101) return tuple3(2,5,7);
else if(buffertemp[13:9] == 5'b00110) return tuple3(3,5,5);
else if(buffertemp[13:5] == 9'b000000111) return tuple3(0,6,9);
else if(buffertemp[13:6] == 8'b00000110) return tuple3(1,6,8);
else if(buffertemp[13:6] == 8'b00000101) return tuple3(2,6,8);
else if(buffertemp[13:8] == 6'b001000) return tuple3(3,6,6);
else if(buffertemp[13:3] == 11'b00000001111) return tuple3(0,7,11);
else if(buffertemp[13:5] == 9'b000000110) return tuple3(1,7,9);
else if(buffertemp[13:5] == 9'b000000101) return tuple3(2,7,9);
else if(buffertemp[13:8] == 6'b000100) return tuple3(3,7,6);
else if(buffertemp[13:3] == 11'b00000001011) return tuple3(0,8,11);
else if(buffertemp[13:3] == 11'b00000001110) return tuple3(1,8,11);
else if(buffertemp[13:3] == 11'b00000001101) return tuple3(2,8,11);
else if(buffertemp[13:7] == 7'b0000100) return tuple3(3,8,7);
else if(buffertemp[13:2] == 12'b000000001111) return tuple3(0,9,12);
else if(buffertemp[13:3] == 11'b00000001010) return tuple3(1,9,11);
else if(buffertemp[13:3] == 11'b00000001001) return tuple3(2,9,11);
else if(buffertemp[13:5] == 9'b000000100) return tuple3(3,9,9);
else if(buffertemp[13:2] == 12'b000000001011) return tuple3(0,10,12);
else if(buffertemp[13:2] == 12'b000000001110) return tuple3(1,10,12);
else if(buffertemp[13:2] == 12'b000000001101) return tuple3(2,10,12);
else if(buffertemp[13:3] == 11'b00000001100) return tuple3(3,10,11);
else if(buffertemp[13:2] == 12'b000000001000) return tuple3(0,11,12);
else if(buffertemp[13:2] == 12'b000000001010) return tuple3(1,11,12);
else if(buffertemp[13:2] == 12'b000000001001) return tuple3(2,11,12);
else if(buffertemp[13:3] == 11'b00000001000) return tuple3(3,11,11);
else if(buffertemp[13:1] == 13'b0000000001111) return tuple3(0,12,13);
else if(buffertemp[13:1] == 13'b0000000001110) return tuple3(1,12,13);
else if(buffertemp[13:1] == 13'b0000000001101) return tuple3(2,12,13);
else if(buffertemp[13:2] == 12'b000000001100) return tuple3(3,12,12);
else if(buffertemp[13:1] == 13'b0000000001011) return tuple3(0,13,13);
else if(buffertemp[13:1] == 13'b0000000001010) return tuple3(1,13,13);
else if(buffertemp[13:1] == 13'b0000000001001) return tuple3(2,13,13);
else if(buffertemp[13:1] == 13'b0000000001100) return tuple3(3,13,13);
else if(buffertemp[13:1] == 13'b0000000000111) return tuple3(0,14,13);
else if(buffertemp[13:0] == 14'b00000000001011) return tuple3(1,14,14);
else if(buffertemp[13:1] == 13'b0000000000110) return tuple3(2,14,13);
else if(buffertemp[13:1] == 13'b0000000001000) return tuple3(3,14,13);
else if(buffertemp[13:0] == 14'b00000000001001) return tuple3(0,15,14);
else if(buffertemp[13:0] == 14'b00000000001000) return tuple3(1,15,14);
else if(buffertemp[13:0] == 14'b00000000001010) return tuple3(2,15,14);
else if(buffertemp[13:1] == 13'b0000000000001) return tuple3(3,15,13);
else if(buffertemp[13:0] == 14'b00000000000111) return tuple3(0,16,14);
else if(buffertemp[13:0] == 14'b00000000000110) return tuple3(1,16,14);
else if(buffertemp[13:0] == 14'b00000000000101) return tuple3(2,16,14);
else if(buffertemp[13:0] == 14'b00000000000100) return tuple3(3,16,14);
else return tuple3(0,0,100);
end
else
begin
Bit#(16) buffertemp = inbuffer[buffersize-1:buffersize-16];
if(buffertemp[15:15] == 1'b1) return tuple3(0,0,1);
else if(buffertemp[15:10] == 6'b000101) return tuple3(0,1,6);
else if(buffertemp[15:14] == 2'b01) return tuple3(1,1,2);
else if(buffertemp[15:8] == 8'b00000111) return tuple3(0,2,8);
else if(buffertemp[15:10] == 6'b000100) return tuple3(1,2,6);
else if(buffertemp[15:13] == 3'b001) return tuple3(2,2,3);
else if(buffertemp[15:7] == 9'b000000111) return tuple3(0,3,9);
else if(buffertemp[15:8] == 8'b00000110) return tuple3(1,3,8);
else if(buffertemp[15:9] == 7'b0000101) return tuple3(2,3,7);
else if(buffertemp[15:11] == 5'b00011) return tuple3(3,3,5);
else if(buffertemp[15:6] == 10'b0000000111) return tuple3(0,4,10);
else if(buffertemp[15:7] == 9'b000000110) return tuple3(1,4,9);
else if(buffertemp[15:8] == 8'b00000101) return tuple3(2,4,8);
else if(buffertemp[15:10] == 6'b000011) return tuple3(3,4,6);
else if(buffertemp[15:5] == 11'b00000000111) return tuple3(0,5,11);
else if(buffertemp[15:6] == 10'b0000000110) return tuple3(1,5,10);
else if(buffertemp[15:7] == 9'b000000101) return tuple3(2,5,9);
else if(buffertemp[15:9] == 7'b0000100) return tuple3(3,5,7);
else if(buffertemp[15:3] == 13'b0000000001111) return tuple3(0,6,13);
else if(buffertemp[15:5] == 11'b00000000110) return tuple3(1,6,11);
else if(buffertemp[15:6] == 10'b0000000101) return tuple3(2,6,10);
else if(buffertemp[15:8] == 8'b00000100) return tuple3(3,6,8);
else if(buffertemp[15:3] == 13'b0000000001011) return tuple3(0,7,13);
else if(buffertemp[15:3] == 13'b0000000001110) return tuple3(1,7,13);
else if(buffertemp[15:5] == 11'b00000000101) return tuple3(2,7,11);
else if(buffertemp[15:7] == 9'b000000100) return tuple3(3,7,9);
else if(buffertemp[15:3] == 13'b0000000001000) return tuple3(0,8,13);
else if(buffertemp[15:3] == 13'b0000000001010) return tuple3(1,8,13);
else if(buffertemp[15:3] == 13'b0000000001101) return tuple3(2,8,13);
else if(buffertemp[15:6] == 10'b0000000100) return tuple3(3,8,10);
else if(buffertemp[15:2] == 14'b00000000001111) return tuple3(0,9,14);
else if(buffertemp[15:2] == 14'b00000000001110) return tuple3(1,9,14);
else if(buffertemp[15:3] == 13'b0000000001001) return tuple3(2,9,13);
else if(buffertemp[15:5] == 11'b00000000100) return tuple3(3,9,11);
else if(buffertemp[15:2] == 14'b00000000001011) return tuple3(0,10,14);
else if(buffertemp[15:2] == 14'b00000000001010) return tuple3(1,10,14);
else if(buffertemp[15:2] == 14'b00000000001101) return tuple3(2,10,14);
else if(buffertemp[15:3] == 13'b0000000001100) return tuple3(3,10,13);
else if(buffertemp[15:1] == 15'b000000000001111) return tuple3(0,11,15);
else if(buffertemp[15:1] == 15'b000000000001110) return tuple3(1,11,15);
else if(buffertemp[15:2] == 14'b00000000001001) return tuple3(2,11,14);
else if(buffertemp[15:2] == 14'b00000000001100) return tuple3(3,11,14);
else if(buffertemp[15:1] == 15'b000000000001011) return tuple3(0,12,15);
else if(buffertemp[15:1] == 15'b000000000001010) return tuple3(1,12,15);
else if(buffertemp[15:1] == 15'b000000000001101) return tuple3(2,12,15);
else if(buffertemp[15:2] == 14'b00000000001000) return tuple3(3,12,14);
else if(buffertemp[15:0] == 16'b0000000000001111) return tuple3(0,13,16);
else if(buffertemp[15:1] == 15'b000000000000001) return tuple3(1,13,15);
else if(buffertemp[15:1] == 15'b000000000001001) return tuple3(2,13,15);
else if(buffertemp[15:1] == 15'b000000000001100) return tuple3(3,13,15);
else if(buffertemp[15:0] == 16'b0000000000001011) return tuple3(0,14,16);
else if(buffertemp[15:0] == 16'b0000000000001110) return tuple3(1,14,16);
else if(buffertemp[15:0] == 16'b0000000000001101) return tuple3(2,14,16);
else if(buffertemp[15:1] == 15'b000000000001000) return tuple3(3,14,15);
else if(buffertemp[15:0] == 16'b0000000000000111) return tuple3(0,15,16);
else if(buffertemp[15:0] == 16'b0000000000001010) return tuple3(1,15,16);
else if(buffertemp[15:0] == 16'b0000000000001001) return tuple3(2,15,16);
else if(buffertemp[15:0] == 16'b0000000000001100) return tuple3(3,15,16);
else if(buffertemp[15:0] == 16'b0000000000000100) return tuple3(0,16,16);
else if(buffertemp[15:0] == 16'b0000000000000110) return tuple3(1,16,16);
else if(buffertemp[15:0] == 16'b0000000000000101) return tuple3(2,16,16);
else if(buffertemp[15:0] == 16'b0000000000001000) return tuple3(3,16,16);
else return tuple3(0,0,100);
end
endfunction
(* noinline *)
function Bit#(4) cavlc_level_prefix( Buffer inbuffer );
Bit#(4) tempout = 15;
for(Integer ii=15; ii>0; ii=ii-1)
begin
if(inbuffer[buffersize-fromInteger(ii)]==1'b1)
tempout = fromInteger(ii)-1;
end
return tempout;
endfunction
 
(* noinline *)
function Tuple2#(Bit#(4),Bufcount) cavlc_total_zeros( Buffer inbuffer, Bit#(4) inTotalCoeff, Bit#(5) inMaxNumCoeff);
if(inMaxNumCoeff==4)
begin
Bit#(3) buffertemp3 = inbuffer[buffersize-1:buffersize-3];
Bit#(2) buffertemp2 = inbuffer[buffersize-1:buffersize-2];
case ( inTotalCoeff )
1:
begin
if(inbuffer[buffersize-1] == 1)
return tuple2(0,1);
else if(buffertemp2 == 2'b01)
return tuple2(1,2);
else if(buffertemp3 == 3'b001)
return tuple2(2,3);
else
return tuple2(3,3);
end
2:
begin
if(inbuffer[buffersize-1] == 1)
return tuple2(0,1);
else if(buffertemp2 == 2'b01)
return tuple2(1,2);
else
return tuple2(2,2);
end
3:
begin
if(inbuffer[buffersize-1] == 1)
return tuple2(0,1);
else
return tuple2(1,1);
end
default: return tuple2(0,100);
endcase
end
else
begin
Bit#(6) buffertemp = inbuffer[buffersize-1:buffersize-6];
case ( inTotalCoeff )
1:
begin
Bit#(10) buffertemp2 = inbuffer[buffersize-1:buffersize-10];
if(buffertemp2[9:9] == 1'b1) return tuple2(0,1);
else if(buffertemp2[9:7] == 3'b011) return tuple2(1,3);
else if(buffertemp2[9:7] == 3'b010) return tuple2(2,3);
else if(buffertemp2[9:6] == 4'b0011) return tuple2(3,4);
else if(buffertemp2[9:6] == 4'b0010) return tuple2(4,4);
else if(buffertemp2[9:5] == 5'b00011) return tuple2(5,5);
else if(buffertemp2[9:5] == 5'b00010) return tuple2(6,5);
else if(buffertemp2[9:4] == 6'b000011) return tuple2(7,6);
else if(buffertemp2[9:4] == 6'b000010) return tuple2(8,6);
else if(buffertemp2[9:3] == 7'b0000011) return tuple2(9,7);
else if(buffertemp2[9:3] == 7'b0000010) return tuple2(10,7);
else if(buffertemp2[9:2] == 8'b00000011) return tuple2(11,8);
else if(buffertemp2[9:2] == 8'b00000010) return tuple2(12,8);
else if(buffertemp2[9:1] == 9'b000000011) return tuple2(13,9);
else if(buffertemp2[9:1] == 9'b000000010) return tuple2(14,9);
else return tuple2(15,9);
end
2:
begin
if(buffertemp[5:3] == 3'b111) return tuple2(0,3);
else if(buffertemp[5:3] == 3'b110) return tuple2(1,3);
else if(buffertemp[5:3] == 3'b101) return tuple2(2,3);
else if(buffertemp[5:3] == 3'b100) return tuple2(3,3);
else if(buffertemp[5:3] == 3'b011) return tuple2(4,3);
else if(buffertemp[5:2] == 4'b0101) return tuple2(5,4);
else if(buffertemp[5:2] == 4'b0100) return tuple2(6,4);
else if(buffertemp[5:2] == 4'b0011) return tuple2(7,4);
else if(buffertemp[5:2] == 4'b0010) return tuple2(8,4);
else if(buffertemp[5:1] == 5'b00011) return tuple2(9,5);
else if(buffertemp[5:1] == 5'b00010) return tuple2(10,5);
else if(buffertemp[5:0] == 6'b000011) return tuple2(11,6);
else if(buffertemp[5:0] == 6'b000010) return tuple2(12,6);
else if(buffertemp[5:0] == 6'b000001) return tuple2(13,6);
else return tuple2(14,6);
end
3:
begin
if(buffertemp[5:2] == 4'b0101) return tuple2(0,4);
else if(buffertemp[5:3] == 3'b111) return tuple2(1,3);
else if(buffertemp[5:3] == 3'b110) return tuple2(2,3);
else if(buffertemp[5:3] == 3'b101) return tuple2(3,3);
else if(buffertemp[5:2] == 4'b0100) return tuple2(4,4);
else if(buffertemp[5:2] == 4'b0011) return tuple2(5,4);
else if(buffertemp[5:3] == 3'b100) return tuple2(6,3);
else if(buffertemp[5:3] == 3'b011) return tuple2(7,3);
else if(buffertemp[5:2] == 4'b0010) return tuple2(8,4);
else if(buffertemp[5:1] == 5'b00011) return tuple2(9,5);
else if(buffertemp[5:1] == 5'b00010) return tuple2(10,5);
else if(buffertemp[5:0] == 6'b000001) return tuple2(11,6);
else if(buffertemp[5:1] == 5'b00001) return tuple2(12,5);
else return tuple2(13,6);
end
4:
begin
if(buffertemp[5:1] == 5'b00011) return tuple2(0,5);
else if(buffertemp[5:3] == 3'b111) return tuple2(1,3);
else if(buffertemp[5:2] == 4'b0101) return tuple2(2,4);
else if(buffertemp[5:2] == 4'b0100) return tuple2(3,4);
else if(buffertemp[5:3] == 3'b110) return tuple2(4,3);
else if(buffertemp[5:3] == 3'b101) return tuple2(5,3);
else if(buffertemp[5:3] == 3'b100) return tuple2(6,3);
else if(buffertemp[5:2] == 4'b0011) return tuple2(7,4);
else if(buffertemp[5:3] == 3'b011) return tuple2(8,3);
else if(buffertemp[5:2] == 4'b0010) return tuple2(9,4);
else if(buffertemp[5:1] == 5'b00010) return tuple2(10,5);
else if(buffertemp[5:1] == 5'b00001) return tuple2(11,5);
else return tuple2(12,5);
end
5:
begin
if(buffertemp[5:2] == 4'b0101) return tuple2(0,4);
else if(buffertemp[5:2] == 4'b0100) return tuple2(1,4);
else if(buffertemp[5:2] == 4'b0011) return tuple2(2,4);
else if(buffertemp[5:3] == 3'b111) return tuple2(3,3);
else if(buffertemp[5:3] == 3'b110) return tuple2(4,3);
else if(buffertemp[5:3] == 3'b101) return tuple2(5,3);
else if(buffertemp[5:3] == 3'b100) return tuple2(6,3);
else if(buffertemp[5:3] == 3'b011) return tuple2(7,3);
else if(buffertemp[5:2] == 4'b0010) return tuple2(8,4);
else if(buffertemp[5:1] == 5'b00001) return tuple2(9,5);
else if(buffertemp[5:2] == 4'b0001) return tuple2(10,4);
else return tuple2(11,5);
end
6:
begin
if(buffertemp[5:0] == 6'b000001) return tuple2(0,6);
else if(buffertemp[5:1] == 5'b00001) return tuple2(1,5);
else if(buffertemp[5:3] == 3'b111) return tuple2(2,3);
else if(buffertemp[5:3] == 3'b110) return tuple2(3,3);
else if(buffertemp[5:3] == 3'b101) return tuple2(4,3);
else if(buffertemp[5:3] == 3'b100) return tuple2(5,3);
else if(buffertemp[5:3] == 3'b011) return tuple2(6,3);
else if(buffertemp[5:3] == 3'b010) return tuple2(7,3);
else if(buffertemp[5:2] == 4'b0001) return tuple2(8,4);
else if(buffertemp[5:3] == 3'b001) return tuple2(9,3);
else return tuple2(10,6);
end
7:
begin
if(buffertemp[5:0] == 6'b000001) return tuple2(0,6);
else if(buffertemp[5:1] == 5'b00001) return tuple2(1,5);
else if(buffertemp[5:3] == 3'b101) return tuple2(2,3);
else if(buffertemp[5:3] == 3'b100) return tuple2(3,3);
else if(buffertemp[5:3] == 3'b011) return tuple2(4,3);
else if(buffertemp[5:4] == 2'b11) return tuple2(5,2);
else if(buffertemp[5:3] == 3'b010) return tuple2(6,3);
else if(buffertemp[5:2] == 4'b0001) return tuple2(7,4);
else if(buffertemp[5:3] == 3'b001) return tuple2(8,3);
else return tuple2(9,6);
end
8:
begin
if(buffertemp[5:0] == 6'b000001) return tuple2(0,6);
else if(buffertemp[5:2] == 4'b0001) return tuple2(1,4);
else if(buffertemp[5:1] == 5'b00001) return tuple2(2,5);
else if(buffertemp[5:3] == 3'b011) return tuple2(3,3);
else if(buffertemp[5:4] == 2'b11) return tuple2(4,2);
else if(buffertemp[5:4] == 2'b10) return tuple2(5,2);
else if(buffertemp[5:3] == 3'b010) return tuple2(6,3);
else if(buffertemp[5:3] == 3'b001) return tuple2(7,3);
else return tuple2(8,6);
end
9:
begin
if(buffertemp[5:0] == 6'b000001) return tuple2(0,6);
else if(buffertemp[5:0] == 6'b000000) return tuple2(1,6);
else if(buffertemp[5:2] == 4'b0001) return tuple2(2,4);
else if(buffertemp[5:4] == 2'b11) return tuple2(3,2);
else if(buffertemp[5:4] == 2'b10) return tuple2(4,2);
else if(buffertemp[5:3] == 3'b001) return tuple2(5,3);
else if(buffertemp[5:4] == 2'b01) return tuple2(6,2);
else return tuple2(7,5);
end
10:
begin
if(buffertemp[5:1] == 5'b00001) return tuple2(0,5);
else if(buffertemp[5:1] == 5'b00000) return tuple2(1,5);
else if(buffertemp[5:3] == 3'b001) return tuple2(2,3);
else if(buffertemp[5:4] == 2'b11) return tuple2(3,2);
else if(buffertemp[5:4] == 2'b10) return tuple2(4,2);
else if(buffertemp[5:4] == 2'b01) return tuple2(5,2);
else return tuple2(6,4);
end
11:
begin
if(buffertemp[5:2] == 4'b0000) return tuple2(0,4);
else if(buffertemp[5:2] == 4'b0001) return tuple2(1,4);
else if(buffertemp[5:3] == 3'b001) return tuple2(2,3);
else if(buffertemp[5:3] == 3'b010) return tuple2(3,3);
else if(buffertemp[5:5] == 1'b1) return tuple2(4,1);
else return tuple2(5,3);
end
12:
begin
if(buffertemp[5:2] == 4'b0000) return tuple2(0,4);
else if(buffertemp[5:2] == 4'b0001) return tuple2(1,4);
else if(buffertemp[5:4] == 2'b01) return tuple2(2,2);
else if(buffertemp[5:5] == 1'b1) return tuple2(3,1);
else return tuple2(4,3);
end
13:
begin
if(buffertemp[5:3] == 3'b000) return tuple2(0,3);
else if(buffertemp[5:3] == 3'b001) return tuple2(1,3);
else if(buffertemp[5:5] == 1'b1) return tuple2(2,1);
else return tuple2(3,2);
end
14:
begin
if(buffertemp[5:4] == 2'b00) return tuple2(0,2);
else if(buffertemp[5:4] == 2'b01) return tuple2(1,2);
else return tuple2(2,1);
end
15:
begin
if(buffertemp[5:5] == 1'b0) return tuple2(0,1);
else return tuple2(1,1);
end
default: return tuple2(0,100);
endcase
end
endfunction
 
(* noinline *)
function Tuple2#(Bit#(4),Bufcount) cavlc_run_before( Buffer inbuffer, Bit#(4) inZerosLeft);
Bit#(3) buffertemp3 = inbuffer[buffersize-1:buffersize-3];
Bit#(2) buffertemp2 = inbuffer[buffersize-1:buffersize-2];
case ( inZerosLeft )
0: return tuple2(0,100);
1:
begin
if(inbuffer[buffersize-1] == 1)
return tuple2(0,1);
else
return tuple2(1,1);
end
2:
begin
if(inbuffer[buffersize-1] == 1)
return tuple2(0,1);
else if(buffertemp2 == 2'b01)
return tuple2(1,2);
else
return tuple2(2,2);
end
3:
begin
if(buffertemp2 == 2'b11)
return tuple2(0,2);
else if(buffertemp2 == 2'b10)
return tuple2(1,2);
else if(buffertemp2 == 2'b01)
return tuple2(2,2);
else
return tuple2(3,2);
end
4:
begin
if(buffertemp2 == 2'b11)
return tuple2(0,2);
else if(buffertemp2 == 2'b10)
return tuple2(1,2);
else if(buffertemp2 == 2'b01)
return tuple2(2,2);
else if(buffertemp3 == 3'b001)
return tuple2(3,3);
else
return tuple2(4,3);
end
5:
begin
if(buffertemp2 == 2'b11)
return tuple2(0,2);
else if(buffertemp2 == 2'b10)
return tuple2(1,2);
else if(buffertemp3 == 3'b011)
return tuple2(2,3);
else if(buffertemp3 == 3'b010)
return tuple2(3,3);
else if(buffertemp3 == 3'b001)
return tuple2(4,3);
else
return tuple2(5,3);
end
6:
begin
if(buffertemp2 == 2'b11)
return tuple2(0,2);
else if(buffertemp3 == 3'b000)
return tuple2(1,3);
else if(buffertemp3 == 3'b001)
return tuple2(2,3);
else if(buffertemp3 == 3'b011)
return tuple2(3,3);
else if(buffertemp3 == 3'b010)
return tuple2(4,3);
else if(buffertemp3 == 3'b101)
return tuple2(5,3);
else
return tuple2(6,3);
end
default:
begin
if(buffertemp3 != 3'b000)
begin
Bit#(4) outputtemp = zeroExtend(3'b111 - buffertemp3);
return tuple2(outputtemp,3);
end
else
begin
Bit#(4) returnVal1 = 14;
Bufcount returnVal2 = 11;
for(Integer ii=10; ii>=4; ii=ii-1)
begin
if(inbuffer[buffersize-fromInteger(ii)]==1'b1)
begin
returnVal1 = fromInteger(ii)+3;
returnVal2 = fromInteger(ii);
end
end
return tuple2(returnVal1,returnVal2);
end
end
endcase
endfunction
 
 
 
endpackage
/trunk/src/mkPrediction_intra32.bsv
0,0 → 1,2184
//**********************************************************************
// Prediction
//----------------------------------------------------------------------
//
//
 
package mkPrediction;
 
import H264Types::*;
 
import IPrediction::*;
import IInterpolator::*;
import mkInterpolator::*;
import FIFO::*;
import FIFOF::*;
import Vector::*;
 
import Connectable::*;
import GetPut::*;
import ClientServer::*;
 
 
//-----------------------------------------------------------
// Local Datatypes
//-----------------------------------------------------------
 
typedef union tagged
{
void Intra; //Intra non-4x4
void Intra4x4;
void Inter;
}
OutState deriving(Eq,Bits);
 
typedef union tagged
{
void Start; //not working on anything in particular
void Intra16x16;
void Intra4x4;
void IntraPCM;
}
IntraState deriving(Eq,Bits);
 
typedef union tagged
{
void Start; //not working on anything in particular
void InterP16x16;
void InterP16x8;
void InterP8x16;
void InterP8x8;
void InterP8x8ref0;
void InterPskip;
}
InterState deriving(Eq,Bits);
 
typedef union tagged
{
Bit#(1) NotInter;//0 for not available, 1 for intra-coded
struct {Bit#(4) refIdx; Bit#(14) mvhor; Bit#(12) mvver; Bit#(1) nonZeroTransCoeff;} BlockMv;
}
InterBlockMv deriving(Eq,Bits);
 
typedef union tagged
{
void SkipMB;
void NonSkipMB;
void Intra4x4;
void Intra4x4PlusChroma;
}
NextOutput deriving(Eq,Bits);
 
 
//-----------------------------------------------------------
// Helper functions
 
function Bit#(8) intra4x4SelectTop( Bit#(72) valVector, Bit#(4) idx );
case(idx)
0: return valVector[15:8];
1: return valVector[23:16];
2: return valVector[31:24];
3: return valVector[39:32];
4: return valVector[47:40];
5: return valVector[55:48];
6: return valVector[63:56];
7: return valVector[71:64];
default: return valVector[7:0];
endcase
endfunction
 
function Bit#(8) intra4x4SelectLeft( Bit#(40) valVector, Bit#(3) idx );
case(idx)
0: return valVector[15:8];
1: return valVector[23:16];
2: return valVector[31:24];
3: return valVector[39:32];
default: return valVector[7:0];
endcase
endfunction
 
function Bit#(8) select32to8( Bit#(32) valVector, Bit#(2) idx );
case(idx)
0: return valVector[7:0];
1: return valVector[15:8];
2: return valVector[23:16];
3: return valVector[31:24];
endcase
endfunction
 
function Bit#(8) select16to8( Bit#(16) valVector, Bit#(1) idx );
case(idx)
0: return valVector[7:0];
1: return valVector[15:8];
endcase
endfunction
 
function Bool absDiffGEFour14( Bit#(14) val1, Bit#(14) val2 );
Int#(15) int1 = unpack(signExtend(val1));
Int#(15) int2 = unpack(signExtend(val2));
if(int1>=int2)
return (int1 >= (int2+4));
else
return (int2 >= (int1+4));
endfunction
 
function Bool absDiffGEFour12( Bit#(12) val1, Bit#(12) val2 );
Int#(13) int1 = unpack(signExtend(val1));
Int#(13) int2 = unpack(signExtend(val2));
if(int1>=int2)
return (int1 >= (int2+4));
else
return (int2 >= (int1+4));
endfunction
 
 
//-----------------------------------------------------------
// Prediction Module
//-----------------------------------------------------------
 
 
(* synthesize *)
module mkPrediction( IPrediction );
 
//Common state
FIFO#(EntropyDecOT) infifo <- mkSizedFIFO(prediction_infifo_size);
FIFO#(InverseTransOT) infifo_ITB <- mkSizedFIFO(prediction_infifo_ITB_size);
FIFO#(EntropyDecOT) outfifo <- mkFIFO;
Reg#(Bool) passFlag <- mkReg(True);
Reg#(Bit#(4)) blockNum <- mkReg(0);
Reg#(Bit#(4)) pixelNum <- mkReg(0);
 
Reg#(Bit#(PicWidthSz)) picWidth <- mkReg(maxPicWidthInMB);
Reg#(Bit#(PicHeightSz)) picHeight <- mkReg(0);
Reg#(Bit#(PicAreaSz)) firstMb <- mkReg(0);
Reg#(Bit#(PicAreaSz)) currMb <- mkReg(0);
Reg#(Bit#(PicAreaSz)) currMbHor <- mkReg(0);//horizontal position of currMb
Reg#(Bit#(PicHeightSz)) currMbVer <- mkReg(0);//vertical position of currMb
 
FIFOF#(OutState) outstatefifo <- mkFIFOF;
FIFOF#(NextOutput) nextoutputfifo <- mkFIFOF;
Reg#(Bit#(4)) outBlockNum <- mkReg(0);
Reg#(Bit#(4)) outPixelNum <- mkReg(0);
FIFO#(Vector#(4,Bit#(8))) predictedfifo <- mkSizedFIFO(prediction_predictedfifo_size);
Reg#(Bit#(1)) outChromaFlag <- mkReg(0);
Reg#(Bool) outFirstQPFlag <- mkReg(False);
 
DoNotFire donotfire <- mkDoNotFire();
//Reg#(Vector#(16,Bit#(8))) workVector <- mkRegU();
//Inter state
Interpolator interpolator <- mkInterpolator();
Reg#(InterState) interstate <- mkReg(Start);
Reg#(Bit#(PicAreaSz)) interPskipCount <- mkReg(0);
Reg#(Vector#(5,InterBlockMv)) interTopVal <- mkRegU();
Reg#(Vector#(4,InterBlockMv)) interLeftVal <- mkRegU();
Reg#(Vector#(4,InterBlockMv)) interTopLeftVal <- mkRegU();
FIFO#(MemReq#(TAdd#(PicWidthSz,2),32)) interMemReqQ <- mkFIFO;
Reg#(MemReq#(TAdd#(PicWidthSz,2),32)) interMemReqQdelay <- mkRegU();
FIFO#(MemResp#(32)) interMemRespQ <- mkFIFO;
Reg#(Bit#(3)) interReqCount <- mkReg(0);
Reg#(Bit#(3)) interRespCount <- mkReg(0);
 
Reg#(Bit#(1)) interStepCount <- mkReg(0);
Reg#(Bit#(2)) interMbPartNum <- mkReg(0);
Reg#(Bit#(2)) interSubMbPartNum <- mkReg(0);
Reg#(Bit#(2)) interPassingCount <- mkReg(0);
Reg#(Vector#(4,Bit#(4))) interRefIdxVector <- mkRegU();
Reg#(Vector#(4,Bit#(2))) interSubMbTypeVector <- mkRegU();
RFile1#(Bit#(4),Tuple2#(Bit#(14),Bit#(12))) interMvFile <- mkRFile1Full();
Reg#(Bit#(15)) interMvDiffTemp <- mkReg(0);
FIFO#(Tuple2#(Bit#(15),Bit#(13))) interMvDiff <- mkFIFO;
Reg#(Bit#(5)) interNewestMv <- mkReg(0);
Reg#(Bit#(2)) interIPStepCount <- mkReg(0);
Reg#(Bit#(2)) interIPMbPartNum <- mkReg(0);
Reg#(Bit#(2)) interIPSubMbPartNum <- mkReg(0);
 
Reg#(Bit#(PicWidthSz)) interCurrMbDiff <- mkReg(0);
 
Reg#(Vector#(4,Bool)) interTopNonZeroTransCoeff <- mkRegU();
Reg#(Vector#(4,Bool)) interLeftNonZeroTransCoeff <- mkRegU();
FIFO#(Tuple2#(Bit#(2),Bit#(2))) interBSfifo <- mkSizedFIFO(32);
Reg#(Bool) interBSoutput <- mkReg(True);
FIFO#(InterBlockMv) interOutBlockMvfifo <- mkSizedFIFO(8);
//Intra state
Reg#(IntraState) intrastate <- mkReg(Start);
Reg#(Bit#(1)) intraChromaFlag <- mkReg(0);
FIFO#(MemReq#(TAdd#(PicWidthSz,2),68)) intraMemReqQ <- mkFIFO;
Reg#(MemReq#(TAdd#(PicWidthSz,2),68)) intraMemReqQdelay <- mkRegU;
FIFO#(MemResp#(68)) intraMemRespQ <- mkFIFO;
Reg#(Vector#(4,Bit#(4))) intra4x4typeLeft <- mkRegU();//15=unavailable, 14=inter-MB, 13=intra-non-4x4
Reg#(Vector#(4,Bit#(4))) intra4x4typeTop <- mkRegU();//15=unavailable, 14=inter-MB, 13=intra-non-4x4
Reg#(Bit#(1)) ppsconstrained_intra_pred_flag <- mkReg(0);
Reg#(Vector#(4,Bit#(40))) intraLeftVal <- mkRegU();
Reg#(Vector#(9,Bit#(8))) intraLeftValChroma0 <- mkRegU();
Reg#(Vector#(9,Bit#(8))) intraLeftValChroma1 <- mkRegU();
Reg#(Vector#(5,Bit#(32))) intraTopVal <- mkRegU();
Reg#(Vector#(4,Bit#(16))) intraTopValChroma0 <- mkRegU();
Reg#(Vector#(4,Bit#(16))) intraTopValChroma1 <- mkRegU();
Reg#(Bit#(32)) intraLeftValNext <- mkReg(0);
Reg#(Bit#(2)) intra16x16_pred_mode <- mkReg(0);
FIFO#(Bit#(4)) rem_intra4x4_pred_mode <- mkSizedFIFO(16);
FIFO#(Bit#(2)) intra_chroma_pred_mode <- mkFIFO;
Reg#(Bit#(4)) cur_intra4x4_pred_mode <- mkReg(0);
Reg#(Bit#(1)) intraChromaTopAvailable <- mkReg(0);
Reg#(Bit#(1)) intraChromaLeftAvailable <- mkReg(0);
 
Reg#(Bit#(3)) intraReqCount <- mkReg(0);
Reg#(Bit#(3)) intraRespCount <- mkReg(0);
Reg#(Bit#(4)) intraStepCount <- mkReg(0);
Reg#(Bit#(13)) intraSumA <- mkReg(0);
Reg#(Bit#(15)) intraSumB <- mkReg(0);
Reg#(Bit#(15)) intraSumC <- mkReg(0);
 
//-----------------------------------------------------------
// Rules
 
//////////////////////////////////////////////////////////////////////////////
// rule stateMonitor ( True );
// if(predictedfifo.notEmpty())
// $display( "TRACE Prediction: stateMonitor predictedfifo.first() %0d", predictedfifo.first());////////////////////
// if(infifo.first() matches tagged ITBresidual .xdata)
// $display( "TRACE Prediction: stateMonitor infifo.first() %0d", xdata);////////////////////
// if(infifo.first() matches tagged ITBresidual .xdata)
// $display( "TRACE Prediction: stateMonitor outBlockNum outPixelNum outChromaFlag %0d %0d", outBlockNum, outPixelNum, outChromaFlag);////////////////////
// endrule
//////////////////////////////////////////////////////////////////////////////
rule passing ( passFlag && !outstatefifo.notEmpty() && currMbHor<zeroExtend(picWidth) );
$display( "Trace Prediction: passing infifo packed %h", pack(infifo.first()));
case (infifo.first()) matches
tagged NewUnit . xdata :
begin
infifo.deq();
outfifo.enq(infifo.first());
$display("ccl4newunit");
$display("ccl4rbspbyte %h", xdata);
end
tagged SPSpic_width_in_mbs .xdata :
begin
infifo.deq();
outfifo.enq(infifo.first());
picWidth <= xdata;
interpolator.setPicWidth(xdata);
end
tagged SPSpic_height_in_map_units .xdata :
begin
infifo.deq();
outfifo.enq(infifo.first());
picHeight <= xdata;
interpolator.setPicHeight(xdata);
end
tagged PPSconstrained_intra_pred_flag .xdata :
begin
infifo.deq();
////outfifo.enq(infifo.first());
ppsconstrained_intra_pred_flag <= xdata;
end
tagged SHfirst_mb_in_slice .xdata :
begin
infifo.deq();
outfifo.enq(infifo.first());
firstMb <= xdata;
currMb <= xdata;
currMbHor <= xdata;
currMbVer <= 0;
intra4x4typeLeft <= replicate(15);
interTopLeftVal <= replicate(NotInter 0);
if(xdata==0)
interLeftVal <= replicate(NotInter 0);
outFirstQPFlag <= True;
end
tagged SDmb_skip_run .xdata : passFlag <= False;
tagged SDMmbtype .xdata : passFlag <= False;
tagged EndOfFile :
begin
infifo.deq();
outfifo.enq(infifo.first());
$display( "INFO Prediction: EndOfFile reached" );
//$finish(0);////////////////////////////////
end
default:
begin
infifo.deq();
outfifo.enq(infifo.first());
end
endcase
endrule
 
 
rule inputing ( !passFlag );
$display( "Trace Prediction: inputing infifo packed %h", pack(infifo.first()));
case (infifo.first()) matches
tagged SDmb_skip_run .xdata :
begin
if(interstate==Start && intrastate==Start)
begin
if(interPskipCount < xdata)
begin
if(!outstatefifo.notEmpty() || interCurrMbDiff<picWidth-1)
begin
$display( "Trace Prediction: passing SDmb_skip_run %0d", xdata);
outstatefifo.enq(Inter);
interstate <= InterPskip;
interReqCount <= 1;
interRespCount <= 1;
intra4x4typeLeft <= replicate(14);
intra4x4typeTop <= replicate(14);
interTopLeftVal <= update(interTopLeftVal , 0, (NotInter 0));
interTopVal <= replicate(NotInter 0);
interPskipCount <= interPskipCount+1;
interNewestMv <= 0;
interRefIdxVector <= replicate(0);
interCurrMbDiff <= interCurrMbDiff+1;
nextoutputfifo.enq(SkipMB);
end
else
donotfire.doNotFire();
end
else
begin
$display( "Trace Prediction: passing no SDmb_skip_run");
interPskipCount <= 0;
infifo.deq();
end
end
else
donotfire.doNotFire();
end
tagged SDMmbtype .xdata :
begin
if(interstate==Start && intrastate==Start)//not necessary (just need to keep inter from feeding predictedfifo or change intra state until intrastate==Start)
begin
infifo.deq();
$display( "INFO Prediction: SDMmbtype %0d", xdata);
if(mbPartPredMode(xdata,0)==Intra_16x16)
begin
if(!outstatefifo.notEmpty())
begin
outstatefifo.enq(Intra);
intrastate <= Intra16x16;
if(xdata matches tagged I_16x16 {intra16x16PredMode:.tempv1, codedBlockPatternChroma:.tempv2, codedBlockPatternLuma:.tempv3})
intra16x16_pred_mode <= tempv1;
else
$display( "ERROR Prediction: MacroblockLayer 5 sdmmbtype not I_16x16" );
intraReqCount <= 1;
intraRespCount <= 1;
interTopLeftVal <= replicate(NotInter 1);
interLeftVal <= replicate(NotInter 1);
interTopVal <= replicate(NotInter 1);
end
else
donotfire.doNotFire();
end
else if(xdata==I_NxN)
begin
if(!outstatefifo.notEmpty())
begin
outstatefifo.enq(Intra4x4);
intrastate <= Intra4x4;
intraReqCount <= 1;
intraRespCount <= 1;
interTopLeftVal <= replicate(NotInter 1);
interLeftVal <= replicate(NotInter 1);
interTopVal <= replicate(NotInter 1);
end
else
donotfire.doNotFire();
end
else if(xdata==I_PCM)
begin
$display( "ERROR Prediction: I_PCM not implemented yet");
$finish;////////////////////////////////////////////////////////////////////////////////////////
intra4x4typeLeft <= replicate(13);
intra4x4typeTop <= replicate(13);
interTopLeftVal <= replicate(NotInter 1);
interLeftVal <= replicate(NotInter 1);
interTopVal <= replicate(NotInter 1);
end
else
begin
if(!outstatefifo.notEmpty() || interCurrMbDiff<picWidth-1)
begin
outstatefifo.enq(Inter);
case(xdata)
P_L0_16x16: interstate <= InterP16x16;
P_L0_L0_16x8: interstate <= InterP16x8;
P_L0_L0_8x16: interstate <= InterP8x16;
P_8x8: interstate <= InterP8x8;
P_8x8ref0: interstate <= InterP8x8ref0;
default: $display( "ERROR Prediction: passing SDMmbtype inter prediction unknown mbtype");
endcase
interReqCount <= 1;
interRespCount <= 1;
intra4x4typeLeft <= replicate(14);/////////////////////////////////////////////////////////////////////////////
intra4x4typeTop <= replicate(14);
interTopLeftVal <= update(interTopLeftVal , 0, (NotInter 0));
interTopVal <= replicate(NotInter 0);
interNewestMv <= 0;
interRefIdxVector <= replicate(0);
nextoutputfifo.enq(NonSkipMB);
end
else
donotfire.doNotFire();
end
interCurrMbDiff <= interCurrMbDiff+1;
end
else
donotfire.doNotFire();
end
tagged SDMMrem_intra4x4_pred_mode .xdata :
begin
infifo.deq();
////outfifo.enq(infifo.first());
rem_intra4x4_pred_mode.enq(xdata);
end
tagged SDMMintra_chroma_pred_mode .xdata :
begin
infifo.deq();
////outfifo.enq(infifo.first());
intra_chroma_pred_mode.enq(xdata);
end
tagged SDMMref_idx_l0 .xdata :
begin
infifo.deq();
////outfifo.enq(infifo.first());
interRefIdxVector <= update(interRefIdxVector,interPassingCount,xdata[3:0]);
if(interstate==InterP16x16 || interPassingCount==1)
interPassingCount <= 0;
else
interPassingCount <= interPassingCount+1;
end
tagged SDMMmvd_l0 .xdata :
begin
infifo.deq();
////outfifo.enq(infifo.first());
if(interPassingCount==1)
begin
Bit#(13) interMvDiffTemp2 = truncate(xdata);
interMvDiff.enq(tuple2(interMvDiffTemp,interMvDiffTemp2));
interPassingCount <= 0;
end
else
begin
interMvDiffTemp <= truncate(xdata);
interPassingCount <= interPassingCount+1;
end
end
tagged SDMSsub_mb_type .xdata :
begin
infifo.deq();
////outfifo.enq(infifo.first());
interSubMbTypeVector <= update(interSubMbTypeVector,interPassingCount,xdata);
interPassingCount <= interPassingCount+1;
end
tagged SDMSref_idx_l0 .xdata :
begin
infifo.deq();
////outfifo.enq(infifo.first());
interRefIdxVector <= update(interRefIdxVector,interPassingCount,xdata[3:0]);
interPassingCount <= interPassingCount+1;
end
tagged SDMSmvd_l0 .xdata :
begin
infifo.deq();
////outfifo.enq(infifo.first());
if(interPassingCount==1)
begin
Bit#(13) interMvDiffTemp2 = truncate(xdata);
interMvDiff.enq(tuple2(interMvDiffTemp,interMvDiffTemp2));
interPassingCount <= 0;
end
else
begin
interMvDiffTemp <= truncate(xdata);
interPassingCount <= interPassingCount+1;
end
end
default: passFlag <= True;
endcase
endrule
 
rule outputing ( currMbHor<zeroExtend(picWidth) );
Bit#(1) outputFlag = 0;
Vector#(4,Bit#(8)) outputVector = replicate(0);
Bit#(2) blockHor = {outBlockNum[2],outBlockNum[0]};
Bit#(2) blockVer = {outBlockNum[3],outBlockNum[1]};
Bit#(2) pixelVer = {outPixelNum[3],outPixelNum[2]};
Bit#(4) totalVer = {blockVer,pixelVer};
//$display( "Trace Prediction: outputing" );
if(outFirstQPFlag)
begin
if(infifo_ITB.first() matches tagged IBTmb_qp .xdata)
begin
infifo_ITB.deq();
outfifo.enq(IBTmb_qp {qpy:xdata.qpy,qpc:xdata.qpc});
outFirstQPFlag <= False;
$display( "Trace Prediction: outputing outFirstQP %h %h %h", outBlockNum, outPixelNum, xdata);
end
else
$display( "ERROR Prediction: outputing unexpected infifo_ITB.first()");
end
else if(nextoutputfifo.first() == SkipMB)
begin
if(interBSoutput && outChromaFlag==0 && outPixelNum==0)
begin
interBSoutput <= False;
interBSfifo.deq();
Bit#(2) tempHorBS = tpl_1(interBSfifo.first());
Bit#(2) tempVerBS = tpl_2(interBSfifo.first());
Bit#(3) horBS = (tempHorBS==3 ? 4 : (interLeftNonZeroTransCoeff[blockVer] ? 2 : zeroExtend(tempHorBS)));
Bit#(3) verBS = (tempVerBS==3 ? 4 : (interTopNonZeroTransCoeff[blockHor]&&blockVer!=0 ? 2 : zeroExtend(tempVerBS)));
outfifo.enq(PBbS {bShor:horBS,bSver:verBS});
interLeftNonZeroTransCoeff <= update(interLeftNonZeroTransCoeff, blockVer, False);
interTopNonZeroTransCoeff <= update(interTopNonZeroTransCoeff, blockHor, False);
$display( "Trace Prediction: outputing SkipMB bS %h %h %h %h", outBlockNum, outPixelNum, currMbHor, currMbVer);
end
else
begin
interBSoutput <= True;
outputVector = predictedfifo.first();
outfifo.enq(PBoutput outputVector);
outputFlag = 1;
predictedfifo.deq();
$display( "Trace Prediction: outputing SkipMB out %h %h %h", outBlockNum, outPixelNum, outputVector);
end
end
else
begin
case ( infifo_ITB.first() ) matches
tagged IBTmb_qp .xdata :
begin
infifo_ITB.deq();
outfifo.enq(IBTmb_qp {qpy:xdata.qpy,qpc:xdata.qpc});
outFirstQPFlag <= False;
$display( "Trace Prediction: outputing ITBmb_qp %h %h %h", outBlockNum, outPixelNum, xdata);
end
tagged ITBresidual .xdata :
begin
if(interBSoutput && outChromaFlag==0 && outPixelNum==0)
begin
interBSoutput <= False;
if(outstatefifo.first() != Inter)
outfifo.enq(PBbS {bShor:(blockHor==0 ? 4 : 3),bSver:(blockVer==0 ? 4 : 3)});
else
begin
interBSfifo.deq();
Bit#(2) tempHorBS = tpl_1(interBSfifo.first());
Bit#(2) tempVerBS = tpl_2(interBSfifo.first());
Bit#(3) horBS = (tempHorBS==3 ? 4 : 2);
Bit#(3) verBS = (tempVerBS==3 ? 4 : 2);
outfifo.enq(PBbS {bShor:horBS,bSver:verBS});
end
interLeftNonZeroTransCoeff <= update(interLeftNonZeroTransCoeff, blockVer, True);
interTopNonZeroTransCoeff <= update(interTopNonZeroTransCoeff, blockHor, True);
$display( "Trace Prediction: outputing ITBresidual bS %h %h %h %h %h", outChromaFlag, outBlockNum, outPixelNum, currMbHor, currMbVer);
end
else
begin
interBSoutput <= True;
Bit#(11) tempOutputValue = 0;
for(Integer ii=0; ii<4; ii=ii+1)
begin
tempOutputValue = signExtend(xdata[ii]) + zeroExtend((predictedfifo.first())[ii]);
if(tempOutputValue[10]==1)
outputVector[ii] = 0;
else if(tempOutputValue[9:0] > 255)
outputVector[ii] = 255;
else
outputVector[ii] = tempOutputValue[7:0];
end
outfifo.enq(PBoutput outputVector);
infifo_ITB.deq();
predictedfifo.deq();
outputFlag = 1;
$display( "Trace Prediction: outputing ITBresidual out %h %h %h %h %h %h", outChromaFlag, outBlockNum, outPixelNum, predictedfifo.first(), xdata, outputVector);
end
end
tagged ITBcoeffLevelZeros :
begin
if(interBSoutput && outChromaFlag==0 && outPixelNum==0)
begin
interBSoutput <= False;
if(outstatefifo.first() != Inter)
outfifo.enq(PBbS {bShor:(blockHor==0 ? 4 : 3),bSver:(blockVer==0 ? 4 : 3)});
else
begin
interBSfifo.deq();
Bit#(2) tempHorBS = tpl_1(interBSfifo.first());
Bit#(2) tempVerBS = tpl_2(interBSfifo.first());
Bit#(3) horBS = (tempHorBS==3 ? 4 : (interLeftNonZeroTransCoeff[blockVer] ? 2 : zeroExtend(tempHorBS)));
Bit#(3) verBS = (tempVerBS==3 ? 4 : (interTopNonZeroTransCoeff[blockHor]&&blockVer!=0 ? 2 : zeroExtend(tempVerBS)));
outfifo.enq(PBbS {bShor:horBS,bSver:verBS});
end
interLeftNonZeroTransCoeff <= update(interLeftNonZeroTransCoeff, blockVer, False);
interTopNonZeroTransCoeff <= update(interTopNonZeroTransCoeff, blockHor, False);
$display( "Trace Prediction: outputing ITBcoeffLevelZeros bS %h %h %h %h %h", outChromaFlag, outBlockNum, outPixelNum, currMbHor, currMbVer);
end
else
begin
interBSoutput <= True;
if(outPixelNum == 12)
infifo_ITB.deq();
outputVector = predictedfifo.first();
outfifo.enq(PBoutput outputVector);
outputFlag = 1;
predictedfifo.deq();
$display( "Trace Prediction: outputing ITBcoeffLevelZeros out %h %h %h %h %h", outChromaFlag, outBlockNum, outPixelNum, predictedfifo.first(), outputVector);
end
end
default: $display( "ERROR Prediction: outputing unknown infifo_ITB input" );
endcase
end
if(outputFlag == 1)
begin
$display("ccl4PBoutput %0d", outputVector[0]);
$display("ccl4PBoutput %0d", outputVector[1]);
$display("ccl4PBoutput %0d", outputVector[2]);
$display("ccl4PBoutput %0d", outputVector[3]);
 
if(outBlockNum==0 && pixelVer==0 && outChromaFlag==0 && currMb!=firstMb && picWidth>1)
begin
intraMemReqQ.enq(intraMemReqQdelay);
interMemReqQ.enq(interMemReqQdelay);
//$display( "TRACE Prediction: passing storing addr data");//////////////////
end
if(blockHor==3 || (blockHor[0]==1 && outChromaFlag==1) || (outstatefifo.first()==Intra4x4 && outChromaFlag==0))
begin
if(outChromaFlag==0)
begin
Bit#(32) intraLeftValNextTemp = intraLeftValNext;
if(totalVer==0 || (outstatefifo.first()==Intra4x4 && pixelVer==0))
begin
Bit#(32) tempValSet = select(intraTopVal,zeroExtend(blockHor));
intraLeftValNextTemp = zeroExtend(tempValSet[31:24]);
end
case(pixelVer)
0:intraLeftValNext <= {intraLeftValNextTemp[31:16],outputVector[3],intraLeftValNextTemp[7:0]};
1:intraLeftValNext <= {intraLeftValNextTemp[31:24],outputVector[3],intraLeftValNextTemp[15:0]};
2:intraLeftValNext <= {outputVector[3],intraLeftValNextTemp[23:0]};
3:
begin
intraLeftVal <= update(intraLeftVal,blockVer,{outputVector[3],intraLeftValNextTemp});
intraLeftValNext <= zeroExtend(outputVector[3]);
if(outstatefifo.first()==Intra4x4)
intra4x4typeLeft <= update(intra4x4typeLeft,blockVer,cur_intra4x4_pred_mode);
else if(outstatefifo.first()==Intra)
intra4x4typeLeft <= update(intra4x4typeLeft,blockVer,13);
else
intra4x4typeLeft <= update(intra4x4typeLeft,blockVer,14);
end
endcase
end
else
begin
if(outBlockNum[2]==0)
intraLeftValChroma0 <= update(intraLeftValChroma0,totalVer+1,outputVector[3]);
else
intraLeftValChroma1 <= update(intraLeftValChroma1,totalVer+1,outputVector[3]);
end
end
if(pixelVer==3 && (blockVer==3 || (blockVer[0]==1 && outChromaFlag==1) || (outstatefifo.first()==Intra4x4 && outChromaFlag==0)))
begin
if(outChromaFlag==0)
begin
intraTopVal <= update(intraTopVal,zeroExtend(blockHor),{outputVector[3],outputVector[2],outputVector[1],outputVector[0]});
if(outstatefifo.first()==Intra4x4)
intra4x4typeTop <= update(intra4x4typeTop,blockHor,cur_intra4x4_pred_mode);
else if(outstatefifo.first()==Intra)
intra4x4typeTop <= update(intra4x4typeTop,blockHor,13);
else
intra4x4typeTop <= update(intra4x4typeTop,blockHor,14);
end
else
begin
if(outBlockNum[2]==0)
begin
Vector#(4,Bit#(16)) intraTopValChroma0Next = intraTopValChroma0;
intraTopValChroma0Next[{blockHor[0],1'b0}] = {outputVector[1],outputVector[0]};
intraTopValChroma0Next[{blockHor[0],1'b1}] = {outputVector[3],outputVector[2]};
intraTopValChroma0 <= intraTopValChroma0Next;
end
else
begin
Vector#(4,Bit#(16)) intraTopValChroma1Next = intraTopValChroma1;
intraTopValChroma1Next[{blockHor[0],1'b0}] = {outputVector[1],outputVector[0]};
intraTopValChroma1Next[{blockHor[0],1'b1}] = {outputVector[3],outputVector[2]};
intraTopValChroma1 <= intraTopValChroma1Next;
end
end
end
 
if(outChromaFlag==1 && outBlockNum==7)
begin
Bit#(PicWidthSz) tempStoreAddr = truncate(currMbHor);
InterBlockMv outBlockMv = interOutBlockMvfifo.first();
if(outBlockMv matches tagged BlockMv .bdata)
begin
outBlockMv = (BlockMv {refIdx:bdata.refIdx,mvhor:bdata.mvhor,mvver:bdata.mvver,nonZeroTransCoeff:(interTopNonZeroTransCoeff[pixelVer]?1:0)});
interOutBlockMvfifo.deq();
end
else if(pixelVer==3)
interOutBlockMvfifo.deq();
if(pixelVer==3 && picWidth>1)
interMemReqQdelay <= StoreReq {addr:{tempStoreAddr,pixelVer},data:pack(outBlockMv)};
else
interMemReqQ.enq(StoreReq {addr:{tempStoreAddr,pixelVer},data:pack(outBlockMv)});
if(pixelVer>0)
begin
Bit#(4) intra4x4typeTopStore = ((outstatefifo.first()==Inter) ? 14 : ((outstatefifo.first()!=Intra4x4) ? 13: intra4x4typeTop[(pixelVer-1)]));
Bit#(32) intraTopValStore = intraTopVal[(pixelVer-1)];
Bit#(16) intraTopValChroma0Store = intraTopValChroma0[(pixelVer-1)];
Bit#(16) intraTopValChroma1Store = (pixelVer<3 ? intraTopValChroma1[(pixelVer-1)] : {outputVector[1],outputVector[0]});
Bit#(68) intraStore = {intra4x4typeTopStore,intraTopValChroma1Store,intraTopValChroma0Store,intraTopValStore};
intraMemReqQ.enq(StoreReq {addr:{tempStoreAddr,(pixelVer-1)},data:intraStore});
if(pixelVer==3)
begin
intra4x4typeTopStore = ((outstatefifo.first()==Inter) ? 14 : ((outstatefifo.first()!=Intra4x4) ? 13: intra4x4typeTop[3]));
intraTopValStore = intraTopVal[3];
intraTopValChroma0Store = intraTopValChroma0[3];
intraTopValChroma1Store = {outputVector[3],outputVector[2]};
intraStore = {intra4x4typeTopStore,intraTopValChroma1Store,intraTopValChroma0Store,intraTopValStore};
intraMemReqQdelay <= StoreReq {addr:{tempStoreAddr,2'b11},data:intraStore};
end
end
end
outPixelNum <= outPixelNum+4;
if(outPixelNum == 12)
begin
if(outChromaFlag==0)
begin
outBlockNum <= outBlockNum+1;
if(outBlockNum == 15)
outChromaFlag <= 1;
if(nextoutputfifo.first() == Intra4x4)
nextoutputfifo.deq();
end
else
begin
if(outBlockNum == 7)
begin
outBlockNum <= 0;
outChromaFlag <= 0;
currMb <= currMb+1;
currMbHor <= currMbHor+1;
interCurrMbDiff <= interCurrMbDiff-1;
outstatefifo.deq;
intrastate <= Start;
if(truncate(currMbHor)==picWidth-1 && currMbVer==picHeight-1)
interpolator.endOfFrame();
nextoutputfifo.deq();
end
else
outBlockNum <= outBlockNum+1;
end
end
end
endrule
 
 
rule currMbHorUpdate( !(currMbHor<zeroExtend(picWidth)) );
Bit#(PicAreaSz) temp = zeroExtend(picWidth);
if((currMbHor >> 3) >= temp)
begin
currMbHor <= currMbHor - (temp << 3);
currMbVer <= currMbVer + 8;
end
else
begin
currMbHor <= currMbHor - temp;
currMbVer <= currMbVer + 1;
end
//$display( "Trace Prediction: currMbHorUpdate %h %h", currMbHor, currMbVer);
endrule
 
 
// inter prediction rules
 
rule interSendReq ( interReqCount>0 && currMbHor<zeroExtend(picWidth) );
Bit#(PicAreaSz) currMbHorTemp = currMbHor+zeroExtend(interCurrMbDiff)-1;
Bit#(PicAreaSz) currMbTemp = currMb+zeroExtend(interCurrMbDiff)-1;
if( currMbHorTemp >= zeroExtend(picWidth) )
currMbHorTemp = currMbHorTemp-zeroExtend(picWidth);
Bit#(PicWidthSz) temp2 = truncate(currMbHorTemp);
Bit#(TAdd#(PicWidthSz,2)) temp = 0;
Bool noMoreReq = False;
if( currMbTemp < zeroExtend(picWidth) )
noMoreReq = True;
else
begin
if(interReqCount<5)
begin
Bit#(2) temp3 = truncate(interReqCount-1);
temp = {temp2,temp3};
end
else if(interReqCount==5)
begin
if((currMbHorTemp+1)<zeroExtend(picWidth))
temp = {(temp2+1),2'b00};
else if(currMbHorTemp>0 && currMbTemp-firstMb>zeroExtend(picWidth))
temp = {(temp2-1),2'b11};
else
noMoreReq = True;
end
else if(interReqCount==6)
begin
if((currMbHorTemp+1)<zeroExtend(picWidth) && currMbHorTemp>0 && currMbTemp-firstMb>zeroExtend(picWidth))
temp = {(temp2-1),2'b11};
else
noMoreReq = True;
end
else
noMoreReq = True;
end
if(!noMoreReq)
begin
interMemReqQ.enq(LoadReq temp);
interReqCount <= interReqCount+1;
//$display( "TRACE Prediction: interSendReq addr %0d",temp);///////////////////////
end
else
interReqCount <= 0;
$display( "Trace Prediction: interSendReq %h %h %h", interstate, interReqCount, temp);
endrule
 
 
rule interReceiveNoResp ( interRespCount>0 && currMbHor<zeroExtend(picWidth) && currMb+zeroExtend(interCurrMbDiff)-1<zeroExtend(picWidth) );
Bit#(PicAreaSz) currMbHorTemp = currMbHor+zeroExtend(interCurrMbDiff)-1;
if( currMbHorTemp >= zeroExtend(picWidth) )
currMbHorTemp = currMbHorTemp-zeroExtend(picWidth);
interRespCount <= 0;
interStepCount <= 1;
interIPStepCount <= 1;
if(currMbHorTemp == 0)
begin
interLeftVal <= replicate(NotInter 0);
interTopLeftVal <= replicate(NotInter 0);
end
$display( "Trace Prediction: interReceiveNoResp %h %h", interstate, interRespCount);
endrule
 
rule interReceiveResp ( interRespCount>0 && interRespCount<7 && currMbHor<zeroExtend(picWidth) &&& interMemRespQ.first() matches tagged LoadResp .data);
Bit#(PicAreaSz) currMbHorTemp = currMbHor+zeroExtend(interCurrMbDiff)-1;
Bit#(PicAreaSz) currMbTemp = currMb+zeroExtend(interCurrMbDiff)-1;
if( currMbHorTemp >= zeroExtend(picWidth) )
currMbHorTemp = currMbHorTemp-zeroExtend(picWidth);
Bool noMoreResp = False;
Bit#(2) temp2bit = 0;
InterBlockMv unpackedData = unpack(data);
Vector#(5,InterBlockMv) interTopValNext = interTopVal;
Vector#(4,InterBlockMv) interTopLeftValNext = interTopLeftVal;
if(interRespCount<5)
begin
temp2bit = truncate(interRespCount-1);
interTopValNext[temp2bit] = unpackedData;
if((interRespCount==4 || (interRespCount==1 && (interstate==InterPskip || interstate==InterP16x16 || interstate==InterP16x8)))
&& (!((currMbHorTemp+1)<zeroExtend(picWidth)) && !(currMbHorTemp>0 && currMbTemp-firstMb>zeroExtend(picWidth))))
noMoreResp = True;
end
else if(interRespCount==5)
begin
if((currMbHorTemp+1)<zeroExtend(picWidth))
begin
interTopValNext[4] = unpackedData;
if(!(currMbHorTemp>0 && currMbTemp-firstMb>zeroExtend(picWidth)))
noMoreResp = True;
end
else
begin
interTopLeftValNext[0] = unpackedData;
noMoreResp = True;
end
end
else
begin
interTopLeftValNext[0] = unpackedData;
noMoreResp = True;
end
interMemRespQ.deq();
//$display( "TRACE Prediction: interReceiveResp data %h",data);///////////////////////
if(!noMoreResp)
interRespCount <= interRespCount+1;
else
begin
interRespCount <= 0;
interStepCount <= 1;
interIPStepCount <= 1;
if(currMbHorTemp == 0)
begin
interLeftVal <= replicate(NotInter 0);
interTopLeftValNext = replicate(NotInter 0);
end
end
interTopVal <= interTopValNext;
interTopLeftVal <= interTopLeftValNext;
$display( "Trace Prediction: interReceiveResp %h %h %h", interstate, interRespCount, data);
endrule
 
 
rule interProcessStep ( interStepCount>0 && currMbHor<zeroExtend(picWidth) );
Bit#(PicAreaSz) currMbTemp = currMb+zeroExtend(interCurrMbDiff)-1;
Bit#(2) blockHor = {interMbPartNum[0],interSubMbPartNum[0]};
Bit#(2) blockVer = {interMbPartNum[1],interSubMbPartNum[1]};
Bit#(3) partWidth = 0;
Bit#(3) partHeight = 0;
Bit#(3) numPart = 1;
Bit#(3) numSubPart = 1;
Bit#(2) subMbType = 0;
Bool noBlockC = False;
Bool calcmv = False;
Bool leftmv = False;
if(interstate==InterPskip || interstate==InterP16x16)
begin
partWidth = 4;
partHeight = 4;
numPart = 1;
calcmv = (interMbPartNum==0 && interSubMbPartNum==0);
leftmv = (blockHor>0);
end
else if(interstate==InterP16x8)
begin
partWidth = 4;
partHeight = 2;
numPart = 2;
if(interMbPartNum==2)
noBlockC = True;
calcmv = (interMbPartNum[0]==0 && interSubMbPartNum==0);
leftmv = (blockHor>0);
end
else if(interstate==InterP8x16)
begin
partWidth = 2;
partHeight = 4;
numPart = 2;
calcmv = (interMbPartNum[1]==0 && interSubMbPartNum==0);
leftmv = !(blockVer>0);
end
else if(interstate==InterP8x8 || interstate==InterP8x8ref0)
begin
numPart = 4;
subMbType = interSubMbTypeVector[interMbPartNum];
numSubPart = numSubMbPart(subMbType);
case(subMbType)
0:
begin
partWidth = 2;
partHeight = 2;
if(interMbPartNum==3)
noBlockC = True;
calcmv = (interSubMbPartNum==0);
leftmv = (blockHor[0]>0);
end
1:
begin
partWidth = 2;
partHeight = 1;
if(interSubMbPartNum==2)
noBlockC = True;
calcmv = (interSubMbPartNum[0]==0);
leftmv = True;
end
2:
begin
partWidth = 1;
partHeight = 2;
calcmv = (interSubMbPartNum[1]==0);
leftmv = False;
end
3:
begin
partWidth = 1;
partHeight = 1;
if(interSubMbPartNum==3)
noBlockC = True;
calcmv = True;
end
endcase
end
else
$display( "ERROR Prediction: interProcessStep unexpected interstate");
Bit#(4) refIndex = ((interstate==InterPskip||interstate==InterP8x8ref0) ? 0 : interRefIdxVector[interMbPartNum]);
Vector#(3,InterBlockMv) blockABC = replicate(NotInter 0);
if( currMbTemp-firstMb==0 && blockHor==0 )
blockABC[0] = (NotInter 0);
else
blockABC[0] = interLeftVal[blockVer];
if( currMbTemp-firstMb<zeroExtend(picWidth) && blockVer==0 )
blockABC[1] = (NotInter 0);
else
blockABC[1] = interTopVal[blockHor];
blockABC[2] = interTopVal[{1'b0,blockHor}+partWidth];
if(noBlockC || blockABC[2]==(NotInter 0))
blockABC[2] = interTopLeftVal[blockVer];
Bit#(14) mvhorfinal = 0;
Bit#(12) mvverfinal = 0;
Bit#(5) interNewestMvNext = 0;
if(calcmv)//motion vector caculation
begin
Vector#(3,Int#(14)) mvhorABC = replicate(0);
Vector#(3,Int#(12)) mvverABC = replicate(0);
Bit#(2) validCount = 0;
Bit#(14) mvhorPred = 0;
Bit#(12) mvverPred = 0;
for(Integer ii=0; ii<3; ii=ii+1)
begin
if(blockABC[ii] matches tagged BlockMv .xdata)
begin
mvhorABC[ii] = unpack(xdata.mvhor);
mvverABC[ii] = unpack(xdata.mvver);
if(xdata.refIdx == refIndex)
begin
validCount = validCount+1;
mvhorPred = xdata.mvhor;
mvverPred = xdata.mvver;
end
end
else
begin
mvhorABC[ii] = 0;
mvverABC[ii] = 0;
end
end
if(validCount != 1)//median
begin
if(mvhorABC[0]>mvhorABC[1] && mvhorABC[0]>mvhorABC[2])
mvhorPred = pack((mvhorABC[1]>mvhorABC[2]) ? mvhorABC[1] : mvhorABC[2]);
else if(mvhorABC[0]<mvhorABC[1] && mvhorABC[0]<mvhorABC[2])
mvhorPred = pack((mvhorABC[1]<mvhorABC[2]) ? mvhorABC[1] : mvhorABC[2]);
else
mvhorPred = pack(mvhorABC[0]);
if(mvverABC[0]>mvverABC[1] && mvverABC[0]>mvverABC[2])
mvverPred = pack((mvverABC[1]>mvverABC[2]) ? mvverABC[1] : mvverABC[2]);
else if(mvverABC[0]<mvverABC[1] && mvverABC[0]<mvverABC[2])
mvverPred = pack((mvverABC[1]<mvverABC[2]) ? mvverABC[1] : mvverABC[2]);
else
mvverPred = pack(mvverABC[0]);
end
if(interstate==InterPskip)
begin
for(Integer ii=0; ii<2; ii=ii+1)
begin
if(blockABC[ii] matches tagged BlockMv .xdata)
begin
if(xdata.refIdx==0 && xdata.mvhor==0 && xdata.mvver==0)
begin
mvhorPred = 0;
mvverPred = 0;
end
end
else if(blockABC[ii] matches tagged NotInter 0)
begin
mvhorPred = 0;
mvverPred = 0;
end
end
end
else if(interstate==InterP16x8 || interstate==InterP8x16)
begin
InterBlockMv blockCheck;
if(interstate==InterP16x8)
begin
if(interMbPartNum==0)
blockCheck = blockABC[1];
else
blockCheck = blockABC[0];
end
else
begin
if(interMbPartNum==0)
blockCheck = blockABC[0];
else
blockCheck = blockABC[2];
end
if(blockCheck matches tagged BlockMv .xdata &&& xdata.refIdx==refIndex)
begin
mvhorPred = xdata.mvhor;
mvverPred = xdata.mvver;
end
end
mvhorfinal = mvhorPred;
mvverfinal = mvverPred;
if(interstate!=InterPskip)
begin
mvhorfinal = truncate(tpl_1(interMvDiff.first()) + signExtend(mvhorPred));
mvverfinal = truncate(tpl_2(interMvDiff.first()) + signExtend(mvverPred));
interMvDiff.deq();
end
interMvFile.upd({interMbPartNum,interSubMbPartNum},tuple2(mvhorfinal,mvverfinal));
interNewestMvNext = zeroExtend({interMbPartNum,interSubMbPartNum})+1;
$display( "Trace Prediction: interProcessStep %h %h %h %h %h %h %h %h %h", interstate, interStepCount, interMbPartNum, interSubMbPartNum, pack(blockABC[0]), pack(blockABC[1]), pack(blockABC[2]), mvhorPred, mvverPred);
end
else
begin
if(leftmv)
begin
if(blockABC[0] matches tagged BlockMv .xdata)
begin
mvhorfinal = unpack(xdata.mvhor);
mvverfinal = unpack(xdata.mvver);
end
else
$display( "ERROR Prediction: interProcessStep unexpected blockABC[0]");
end
else
begin
if(blockABC[1] matches tagged BlockMv .xdata)
begin
mvhorfinal = unpack(xdata.mvhor);
mvverfinal = unpack(xdata.mvver);
end
else
$display( "ERROR Prediction: interProcessStep unexpected blockABC[1]");
end
end
Bit#(2) tempBShor = 0;//bS calculation
Bit#(2) tempBSver = 0;
if(interLeftVal[blockVer] matches tagged BlockMv .xdata)
begin
if(xdata.nonZeroTransCoeff == 1)
tempBShor = 2;
else
begin
if(xdata.refIdx!=refIndex || absDiffGEFour14(mvhorfinal,xdata.mvhor) || absDiffGEFour12(mvverfinal,xdata.mvver))
tempBShor = 1;
else
tempBShor = 0;
end
end
else
tempBShor = 3;
if(interTopVal[blockHor] matches tagged BlockMv .xdata)
begin
if(xdata.nonZeroTransCoeff == 1)
tempBSver = 2;
else
begin
if(xdata.refIdx!=refIndex || absDiffGEFour14(mvhorfinal,xdata.mvhor) || absDiffGEFour12(mvverfinal,xdata.mvver))
tempBSver = 1;
else
tempBSver = 0;
end
end
else
tempBSver = 3;
interBSfifo.enq(tuple2(tempBShor,tempBSver));
Vector#(5,InterBlockMv) interTopValNext = interTopVal;//update inter*Val
Vector#(4,InterBlockMv) interLeftValNext = interLeftVal;
Vector#(4,InterBlockMv) interTopLeftValNext = interTopLeftVal;
interLeftValNext[blockVer] = (BlockMv {refIdx:refIndex,mvhor:mvhorfinal,mvver:mvverfinal,nonZeroTransCoeff:0});
interTopValNext[blockHor] = (BlockMv {refIdx:refIndex,mvhor:mvhorfinal,mvver:mvverfinal,nonZeroTransCoeff:0});
interTopLeftValNext[blockVer] = interTopVal[blockHor];
interTopVal <= interTopValNext;
interLeftVal <= interLeftValNext;
interTopLeftVal <= interTopLeftValNext;
if(blockVer == 3)
interOutBlockMvfifo.enq(BlockMv {refIdx:refIndex,mvhor:mvhorfinal,mvver:mvverfinal,nonZeroTransCoeff:0});
if(interSubMbPartNum == 3)//next step
begin
interSubMbPartNum <= 0;
if(interMbPartNum == 3)
begin
interMbPartNum <= 0;
interStepCount <= 0;
interNewestMvNext = 16;
end
else
interMbPartNum <= interMbPartNum+1;
end
else
interSubMbPartNum <= interSubMbPartNum+1;
if(interNewestMvNext > 0)
interNewestMv <= interNewestMvNext;
endrule
 
 
rule interIPProcessStep ( interIPStepCount>0 && currMbHor<zeroExtend(picWidth) && interNewestMv>zeroExtend({interIPMbPartNum,interIPSubMbPartNum}) );
Bit#(PicAreaSz) currMbHorTemp = currMbHor+zeroExtend(interCurrMbDiff)-1;
Bit#(PicHeightSz) currMbVerTemp = currMbVer;
if( currMbHorTemp >= zeroExtend(picWidth) )
begin
currMbHorTemp = currMbHorTemp-zeroExtend(picWidth);
currMbVerTemp = currMbVerTemp+1;
end
Bit#(2) blockHor = {interIPMbPartNum[0],interIPSubMbPartNum[0]};
Bit#(2) blockVer = {interIPMbPartNum[1],interIPSubMbPartNum[1]};
Bit#(3) numPart = 1;
Bit#(3) numSubPart = 1;
Bit#(2) subMbType = 0;
if(interstate==InterPskip || interstate==InterP16x16)
numPart = 1;
else if(interstate==InterP16x8)
numPart = 2;
else if(interstate==InterP8x16)
numPart = 2;
else if(interstate==InterP8x8 || interstate==InterP8x8ref0)
begin
numPart = 4;
subMbType = interSubMbTypeVector[interIPMbPartNum];
numSubPart = numSubMbPart(subMbType);
end
else
$display( "ERROR Prediction: interIPProcessStep unexpected interstate");
Bit#(4) refIndex = ((interstate==InterPskip||interstate==InterP8x8ref0) ? 0 : interRefIdxVector[interIPMbPartNum]);
Bit#(PicWidthSz) currMbHorT = truncate(currMbHorTemp);
Bit#(TAdd#(PicWidthSz,2)) horTemp = {currMbHorT,blockHor};
Bit#(TAdd#(PicHeightSz,4)) verTemp = {currMbVerTemp,blockVer,2'b00};
IPBlockType btTemp = IP16x16;
if(interstate==InterPskip || interstate==InterP16x16)
btTemp = IP16x16;
else if(interstate==InterP16x8)
btTemp = IP16x8;
else if(interstate==InterP8x16)
btTemp = IP8x16;
else
begin
case(subMbType)
0: btTemp = IP8x8;
1: btTemp = IP8x4;
2: btTemp = IP4x8;
3: btTemp = IP4x4;
endcase
end
Bit#(14) mvhorTemp = tpl_1(interMvFile.sub({interIPMbPartNum,interIPSubMbPartNum}));
Bit#(12) mvverTemp = tpl_2(interMvFile.sub({interIPMbPartNum,interIPSubMbPartNum}));
if(interIPStepCount == 1)
begin
if(!(interstate==InterP8x8 || interstate==InterP8x8ref0))
begin
numPart = 4;
Bit#(2) interIPMbPartNumTemp = interIPMbPartNum;
if(btTemp==IP16x16)
interIPMbPartNumTemp = 0;
else if(btTemp==IP16x8 && interIPMbPartNumTemp[0]==1)
interIPMbPartNumTemp = interIPMbPartNumTemp-1;
else if(btTemp==IP8x16 && interIPMbPartNumTemp[1]==1)
interIPMbPartNumTemp = interIPMbPartNumTemp-2;
refIndex = ((interstate==InterPskip||interstate==InterP8x8ref0) ? 0 : interRefIdxVector[interIPMbPartNumTemp]);
btTemp = IP8x8;
mvhorTemp = tpl_1(interMvFile.sub({interIPMbPartNumTemp,2'b00}));
mvverTemp = tpl_2(interMvFile.sub({interIPMbPartNumTemp,2'b00}));
interpolator.request(IPLuma {refIdx:refIndex,hor:horTemp,ver:verTemp,mvhor:mvhorTemp,mvver:mvverTemp,bt:btTemp});
end
else
interpolator.request(IPLuma {refIdx:refIndex,hor:horTemp,ver:verTemp,mvhor:mvhorTemp,mvver:mvverTemp,bt:btTemp});
end
else
interpolator.request(IPChroma {refIdx:refIndex,uv:interIPStepCount[0],hor:horTemp,ver:truncate(verTemp>>1),mvhor:mvhorTemp,mvver:mvverTemp,bt:btTemp});
if(interIPSubMbPartNum >= truncate(numSubPart-1))
begin
interIPSubMbPartNum <= 0;
if(interIPMbPartNum >= truncate(numPart-1))
begin
interIPMbPartNum <= 0;
interIPStepCount <= interIPStepCount+1;
end
else
begin
if(btTemp == IP16x8)
interIPMbPartNum <= 2;
else
interIPMbPartNum <= interIPMbPartNum+1;
end
end
else
begin
if(subMbType == 1)
interIPSubMbPartNum <= 2;
else
interIPSubMbPartNum <= interIPSubMbPartNum+1;
end
$display( "Trace Prediction: interIPProcessStep %h %h %h %h %h %h %h %h %h %h", interstate, interIPStepCount, interIPMbPartNum, interIPSubMbPartNum, refIndex, horTemp, verTemp, mvhorTemp, mvverTemp, pack(btTemp));
endrule
 
 
rule interDone ( interstate!=Start && interReqCount==0 && interRespCount==0 && interStepCount==0 && interIPStepCount==0 );
interstate <= Start;
//$display( "Trace Prediction: interOutputTransfer %h %h", interstate, interOutputCount);
endrule
 
rule interOutputTransfer ( True );
predictedfifo.enq(interpolator.first());
interpolator.deq();
//$display( "Trace Prediction: interOutputTransfer %h %h", interstate, interOutputCount);
endrule
 
 
 
// intra prediction rules
 
rule intraSendReq ( intraReqCount>0 && currMbHor<zeroExtend(picWidth) && !nextoutputfifo.notEmpty() );
Bit#(PicWidthSz) temp2 = truncate(currMbHor);
Bit#(TAdd#(PicWidthSz,2)) temp = 0;
Bit#(1) noMoreReq = 0;
if( currMb-firstMb < zeroExtend(picWidth) )
noMoreReq = 1;
else
begin
if(intraReqCount<5)
begin
Bit#(2) temp3 = truncate(intraReqCount-1);
temp = {temp2,temp3};
end
else if(intraReqCount==5)
begin
if((currMbHor+1)<zeroExtend(picWidth) && intrastate==Intra4x4)
temp = {(temp2+1),2'b00};
else if(currMbHor>0 && currMb-firstMb>zeroExtend(picWidth))
temp = {(temp2-1),2'b11};
else
noMoreReq = 1;
end
else if(intraReqCount==6)
begin
if((currMbHor+1)<zeroExtend(picWidth) && intrastate==Intra4x4 && currMbHor>0 && currMb-firstMb>zeroExtend(picWidth))
temp = {(temp2-1),2'b11};
else
noMoreReq = 1;
end
else
noMoreReq = 1;
end
if(noMoreReq == 0)
begin
intraMemReqQ.enq(LoadReq temp);
intraReqCount <= intraReqCount+1;
//$display( "TRACE Prediction: intraSendReq addr %0d",temp);///////////////////////
end
else
intraReqCount <= 0;
$display( "Trace Prediction: intraSendReq");
endrule
 
 
rule intraReceiveNoResp ( intraRespCount>0 && currMbHor<zeroExtend(picWidth) && currMb-firstMb<zeroExtend(picWidth) );
intra4x4typeTop <= replicate(15);
intraRespCount <= 0;
intraStepCount <= 1;
blockNum <= 0;
pixelNum <= 0;
interOutBlockMvfifo.enq(NotInter 1);
$display( "Trace Prediction: intraReceiveNoResp");
endrule
 
rule intraReceiveResp ( intraRespCount>0 && intraRespCount<7 && currMbHor<zeroExtend(picWidth) &&& intraMemRespQ.first() matches tagged LoadResp .data);
Bit#(1) noMoreResp = 0;
Bit#(2) temp2bit = 0;
if(intraRespCount<5)
begin
temp2bit = truncate(intraRespCount-1);
intra4x4typeTop <= update(intra4x4typeTop, temp2bit, data[67:64]);
if(intraRespCount==4)
begin
Vector#(5,Bit#(32)) intraTopValTemp = intraTopVal;
intraTopValTemp[3] = data[31:0];
intraTopValTemp[4] = {data[31:24],data[31:24],data[31:24],data[31:24]};
intraTopVal <= intraTopValTemp;
if(!((currMbHor+1)<zeroExtend(picWidth) && intrastate==Intra4x4) && !(currMbHor>0 && currMb-firstMb>zeroExtend(picWidth)))
noMoreResp = 1;
end
else
intraTopVal <= update(intraTopVal, intraRespCount-1, data[31:0]);
intraTopValChroma0 <= update(intraTopValChroma0, temp2bit, data[47:32]);
intraTopValChroma1 <= update(intraTopValChroma1, temp2bit, data[63:48]);
end
else if(intraRespCount==5)
begin
if((currMbHor+1)<zeroExtend(picWidth) && intrastate==Intra4x4)
begin
if(!(data[67:64]==15 || (data[67:64]==14 && ppsconstrained_intra_pred_flag==1)))
intraTopVal <= update(intraTopVal, 4, data[31:0]);
if(!(currMbHor>0 && currMb-firstMb>zeroExtend(picWidth)))
noMoreResp = 1;
end
else
begin
Bit#(40) temp2 = intraLeftVal[0];
intraLeftVal <= update(intraLeftVal, 0, {temp2[39:8],data[31:24]});
intraLeftValChroma0 <= update(intraLeftValChroma0, 0, data[47:40]);
intraLeftValChroma1 <= update(intraLeftValChroma1, 0, data[63:56]);
noMoreResp = 1;
end
end
else
begin
Bit#(40) temp2 = intraLeftVal[0];
intraLeftVal <= update(intraLeftVal, 0, {temp2[39:8],data[31:24]});
intraLeftValChroma0 <= update(intraLeftValChroma0, 0, data[47:40]);
intraLeftValChroma1 <= update(intraLeftValChroma1, 0, data[63:56]);
noMoreResp = 1;
end
intraMemRespQ.deq();
//$display( "TRACE Prediction: intraReceiveResp data %h",data);///////////////////////
if(noMoreResp == 0)
intraRespCount <= intraRespCount+1;
else
begin
intraRespCount <= 0;
intraStepCount <= 1;
blockNum <= 0;
pixelNum <= 0;
interOutBlockMvfifo.enq(NotInter 1);
end
$display( "Trace Prediction: intraReceiveResp");
endrule
 
rule intraPredTypeStep ( intraStepCount==1 && !nextoutputfifo.notEmpty());
Bit#(2) blockHor = {blockNum[2],blockNum[0]};
Bit#(2) blockVer = {blockNum[3],blockNum[1]};
Bit#(4) topType = select(intra4x4typeTop, blockHor);
Bit#(4) leftType;
if(currMbHor!=0 || blockNum!=0)
leftType = select(intra4x4typeLeft, blockVer);
else
begin
leftType = 15;
intra4x4typeLeft <= replicate(15);
end
if(intrastate!=Intra4x4)
begin
intraStepCount <= intraStepCount+1;
nextoutputfifo.enq(NonSkipMB);
end
else
begin
Bit#(1) topAvailable;
Bit#(1) leftAvailable;
if(topType==15 || (topType==14 && ppsconstrained_intra_pred_flag==1))
topAvailable = 0;
else
topAvailable = 1;
if(leftType==15 || (leftType==14 && ppsconstrained_intra_pred_flag==1))
leftAvailable = 0;
else
leftAvailable = 1;
Bit#(4) predType = 0;
Bit#(4) remType = rem_intra4x4_pred_mode.first();
Bit#(4) curType = 0;
rem_intra4x4_pred_mode.deq();
if(topAvailable==0 || leftAvailable==0)
predType = 2;
else
begin
Bit#(4) topType2 = topType;
Bit#(4) leftType2 = leftType;
if(topType>8)
topType2 = 2;
if(leftType>8)
leftType2 = 2;
if(topType2 > leftType2)
predType = leftType2;
else
predType = topType2;
end
if(remType[3] == 1)
curType = predType;
else if(remType < predType)
curType = remType;
else
curType = remType+1;
cur_intra4x4_pred_mode <= curType;
intraStepCount <= intraStepCount+1;
if(blockNum == 15)
nextoutputfifo.enq(Intra4x4PlusChroma);
else
nextoutputfifo.enq(Intra4x4);
$display( "TRACE Prediction: intraPredTypeStep currMbHor currMbVer blockNum topType leftType predType remType curType %0d %0d %0d %0d %0d %0d %0d %0d",currMbHor,currMbVer,blockNum,topType,leftType,predType,remType,curType);//////////////////
end
//$display( "Trace Prediction: intraPredTypeStep");
endrule
 
 
rule intraProcessStep ( intraStepCount>1 );
$display( "TRACE Prediction: intraProcessStep %0d %0d", blockNum, pixelNum);////////////////////
//$display( "TRACE Prediction: intraProcessStep intraTopVal %h %h %h %h %h",intraTopVal[4],intraTopVal[3],intraTopVal[2],intraTopVal[1],intraTopVal[0]);/////////////////
Bit#(1) outFlag = 0;
Bit#(4) nextIntraStepCount = intraStepCount+1;
Bit#(2) blockHor = {blockNum[2],blockNum[0]};
Bit#(2) blockVer = {blockNum[3],blockNum[1]};
Bit#(2) pixelVer = {pixelNum[3],pixelNum[2]};
Vector#(4,Bit#(8)) predVector = replicate(0);
 
Bit#(4) topType = select(intra4x4typeTop, blockHor);
Bit#(4) leftType = select(intra4x4typeLeft, blockVer);
Bit#(1) topAvailable;
Bit#(1) leftAvailable;
if(topType==15 || (topType==14 && ppsconstrained_intra_pred_flag==1))
topAvailable = 0;
else
topAvailable = 1;
if(leftType==15 || (leftType==14 && ppsconstrained_intra_pred_flag==1))
leftAvailable = 0;
else
leftAvailable = 1;
if(blockNum==0 && pixelNum==0 && intraChromaFlag==0)
begin
intraChromaTopAvailable <= topAvailable;
intraChromaLeftAvailable <= leftAvailable;
end
if(intrastate==Intra4x4 && intraChromaFlag==0)
begin
if(intraStepCount==2)
begin
outFlag = 1;
Bit#(40) leftValSet = select(intraLeftVal,blockVer);
Bit#(32) topMidValSet = select(intraTopVal,zeroExtend(blockHor));
Bit#(32) topRightValSet = select(intraTopVal,{1'b0,blockHor}+1);
Bit#(72) topValSet;
if((blockNum[3:2]==3 && blockNum[0]==1) || blockNum[1:0]==3)
topValSet = {topMidValSet[31:24],topMidValSet[31:24],topMidValSet[31:24],topMidValSet[31:24],topMidValSet,leftValSet[7:0]};
else
topValSet = {topRightValSet,topMidValSet,leftValSet[7:0]};
$display( "TRACE Prediction: intraProcessStep intra4x4 %0d %0d %h %h", cur_intra4x4_pred_mode, blockNum, leftValSet, topValSet);////////////////////
Bit#(4) topSelect1 = 0;
Bit#(4) topSelect2 = 0;
Bit#(4) topSelect3 = 0;
Bit#(3) leftSelect1 = 0;
Bit#(3) leftSelect2 = 0;
Bit#(3) leftSelect3 = 0;
Bit#(10) tempVal1 = 0;
Bit#(10) tempVal2 = 0;
Bit#(10) tempVal3 = 0;
case(cur_intra4x4_pred_mode)
0://vertical
begin
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
begin
topSelect1 = fromInteger(pixelHor);
Bit#(8) topVal = intra4x4SelectTop(topValSet,topSelect1);
predVector[pixelHor] = topVal;
end
end
1://horizontal
begin
leftSelect1 = zeroExtend(pixelVer);
Bit#(8) leftVal = intra4x4SelectLeft(leftValSet,leftSelect1);
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
predVector[pixelHor] = leftVal;
end
2://dc
begin
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
begin
Bit#(10) tempTopSum = zeroExtend(topValSet[15:8])+zeroExtend(topValSet[23:16])+zeroExtend(topValSet[31:24])+zeroExtend(topValSet[39:32]) + 2;
Bit#(10) tempLeftSum = zeroExtend(leftValSet[15:8])+zeroExtend(leftValSet[23:16])+zeroExtend(leftValSet[31:24])+zeroExtend(leftValSet[39:32]) + 2;
Bit#(11) tempTotalSum = zeroExtend(tempTopSum)+zeroExtend(tempLeftSum);
Bit#(8) topSum = tempTopSum[9:2];
Bit#(8) leftSum = tempLeftSum[9:2];
Bit#(8) totalSum = tempTotalSum[10:3];
if(topAvailable==1 && leftAvailable==1)
predVector[pixelHor] = totalSum;
else if(topAvailable==1)
predVector[pixelHor] = topSum;
else if(leftAvailable==1)
predVector[pixelHor] = leftSum;
else
predVector[pixelHor] = 8'b10000000;
end
end
3://diagonal down left
begin
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
begin
Bit#(4) selectNum = fromInteger(pixelHor)+zeroExtend(pixelVer);
if(pixelHor==3 && pixelVer==3)
begin
topSelect1 = 6;
topSelect2 = 7;
topSelect3 = 7;
end
else
begin
topSelect1 = selectNum;
topSelect2 = selectNum+1;
topSelect3 = selectNum+2;
end
tempVal1 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1));
tempVal2 = zeroExtend(intra4x4SelectTop(topValSet,topSelect2));
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect3));
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2;
predVector[pixelHor] = predVal[9:2];
end
end
4://diagonal down right
begin
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
begin
if(fromInteger(pixelHor) > pixelVer)
begin
topSelect3 = fromInteger(pixelHor)-zeroExtend(pixelVer);
topSelect2 = topSelect3-1;
topSelect1 = topSelect3-2;
tempVal1 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1));
tempVal2 = zeroExtend(intra4x4SelectTop(topValSet,topSelect2));
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect3));
end
else if(fromInteger(pixelHor) < pixelVer)
begin
leftSelect3 = zeroExtend(pixelVer)-fromInteger(pixelHor);
leftSelect2 = leftSelect3-1;
leftSelect1 = leftSelect3-2;
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1));
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2));
tempVal3 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect3));
end
else
begin
leftSelect1 = 0;
leftSelect2 = -1;
topSelect1 = 0;
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1));
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2));
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1));
end
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2;
predVector[pixelHor] = predVal[9:2];
end
end
5://vertical right
begin
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
begin
Bit#(4) tempPixelHor = fromInteger(pixelHor);
Bit#(4) zVR = (tempPixelHor<<1)-zeroExtend(pixelVer);
if(zVR<=6 && zVR>=0)
begin
topSelect3 = fromInteger(pixelHor)-zeroExtend(pixelVer>>1);
topSelect2 = topSelect3-1;
if(zVR==1 || zVR==3 || zVR==5)
topSelect1 = topSelect3-2;
else
topSelect1 = topSelect3;
tempVal1 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1));
tempVal2 = zeroExtend(intra4x4SelectTop(topValSet,topSelect2));
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect3));
end
else if(zVR==-1)
begin
leftSelect1 = 0;
leftSelect2 = -1;
topSelect1 = 0;
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1));
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2));
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1));
end
else
begin
leftSelect1 = zeroExtend(pixelVer)-1;
leftSelect2 = leftSelect1-1;
leftSelect3 = leftSelect1-2;
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1));
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2));
tempVal3 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect3));
end
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2;
predVector[pixelHor] = predVal[9:2];
end
end
6://horizontal down
begin
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
begin
Bit#(4) tempPixelVer = zeroExtend(pixelVer);
Bit#(4) zHD = (tempPixelVer<<1)-fromInteger(pixelHor);
if(zHD<=6 && zHD>=0)
begin
leftSelect3 = zeroExtend(pixelVer)-fromInteger(pixelHor/2);
leftSelect2 = leftSelect3-1;
if(zHD==1 || zHD==3 || zHD==5)
leftSelect1 = leftSelect3-2;
else
leftSelect1 = leftSelect3;
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1));
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2));
tempVal3 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect3));
end
else if(zHD==-1)
begin
leftSelect1 = 0;
leftSelect2 = -1;
topSelect1 = 0;
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1));
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2));
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1));
end
else
begin
topSelect1 = fromInteger(pixelHor)-1;
topSelect2 = topSelect1-1;
topSelect3 = topSelect1-2;
tempVal1 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1));
tempVal2 = zeroExtend(intra4x4SelectTop(topValSet,topSelect2));
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect3));
end
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2;
predVector[pixelHor] = predVal[9:2];
end
end
7://vertical left
begin
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
begin
topSelect1 = fromInteger(pixelHor)+zeroExtend(pixelVer>>1);
topSelect2 = topSelect1+1;
if(pixelVer==1 || pixelVer==3)
topSelect3 = topSelect1+2;
else
topSelect3 = topSelect1;
tempVal1 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1));
tempVal2 = zeroExtend(intra4x4SelectTop(topValSet,topSelect2));
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect3));
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2;
predVector[pixelHor] = predVal[9:2];
end
end
8://horizontal up
begin
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
begin
Bit#(4) tempPixelVer = zeroExtend(pixelVer);
Bit#(4) zHU = (tempPixelVer<<1)+fromInteger(pixelHor);
if(zHU<=4)
begin
leftSelect1 = zeroExtend(pixelVer)+fromInteger(pixelHor/2);
leftSelect2 = leftSelect1+1;
if(zHU==1 || zHU==3)
leftSelect3 = leftSelect1+2;
else
leftSelect3 = leftSelect1;
end
else
begin
if(zHU==5)
leftSelect1 = 2;
else
leftSelect1 = 3;
leftSelect2 = 3;
leftSelect3 = 3;
end
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1));
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2));
tempVal3 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect3));
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2;
predVector[pixelHor] = predVal[9:2];
end
end
default: $display( "ERROR Prediction: intraProcessStep intra4x4 unknown cur_intra4x4_pred_mode");
endcase
end
else
$display( "ERROR Prediction: intraProcessStep intra4x4 unknown intraStepCount");
end
else if(intrastate==Intra16x16 && intraChromaFlag==0)
begin
//$display( "TRACE Prediction: intraProcessStep intra16x16 %0d %0d %0d %h", intra16x16_pred_mode, currMb, blockNum, select(intraTopVal,blockHor));/////////////////
case(intra16x16_pred_mode)
0://vertical
begin
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
begin
Bit#(32) topValSet = select(intraTopVal,blockHor);
Bit#(8) topVal = select32to8(topValSet,fromInteger(pixelHor));
predVector[pixelHor] = topVal;
end
outFlag = 1;
end
1://horizontal
begin
Bit#(40) leftValSet = select(intraLeftVal,blockVer);
Bit#(8) leftVal = intra4x4SelectLeft(leftValSet,zeroExtend(pixelVer));
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
predVector[pixelHor] = leftVal;
outFlag = 1;
end
2://dc
begin
case(intraStepCount)
2:
begin
if(topAvailable == 1)
begin
Bit#(32) topValSet = select(intraTopVal,0);
intraSumA <= zeroExtend(topValSet[7:0])+zeroExtend(topValSet[15:8])+zeroExtend(topValSet[23:16])+zeroExtend(topValSet[31:24]);
end
else
begin
intraSumA <= 0;
nextIntraStepCount = 6;
end
end
3:
begin
Bit#(32) topValSet = select(intraTopVal,1);
intraSumA <= intraSumA+zeroExtend(topValSet[7:0])+zeroExtend(topValSet[15:8])+zeroExtend(topValSet[23:16])+zeroExtend(topValSet[31:24]);
end
4:
begin
Bit#(32) topValSet = select(intraTopVal,2);
intraSumA <= intraSumA+zeroExtend(topValSet[7:0])+zeroExtend(topValSet[15:8])+zeroExtend(topValSet[23:16])+zeroExtend(topValSet[31:24]);
end
5:
begin
Bit#(32) topValSet = select(intraTopVal,3);
intraSumA <= intraSumA+zeroExtend(topValSet[7:0])+zeroExtend(topValSet[15:8])+zeroExtend(topValSet[23:16])+zeroExtend(topValSet[31:24])+8;
end
6:
begin
if(leftAvailable == 1)
begin
Bit#(40) leftValSet = select(intraLeftVal,0);
intraSumA <= intraSumA+zeroExtend(leftValSet[15:8])+zeroExtend(leftValSet[23:16])+zeroExtend(leftValSet[31:24])+zeroExtend(leftValSet[39:32]);
end
else
nextIntraStepCount = 10;
end
7:
begin
Bit#(40) leftValSet = select(intraLeftVal,1);
intraSumA <= intraSumA+zeroExtend(leftValSet[15:8])+zeroExtend(leftValSet[23:16])+zeroExtend(leftValSet[31:24])+zeroExtend(leftValSet[39:32]);
end
8:
begin
Bit#(40) leftValSet = select(intraLeftVal,2);
intraSumA <= intraSumA+zeroExtend(leftValSet[15:8])+zeroExtend(leftValSet[23:16])+zeroExtend(leftValSet[31:24])+zeroExtend(leftValSet[39:32]);
end
9:
begin
Bit#(40) leftValSet = select(intraLeftVal,3);
intraSumA <= intraSumA+zeroExtend(leftValSet[15:8])+zeroExtend(leftValSet[23:16])+zeroExtend(leftValSet[31:24])+zeroExtend(leftValSet[39:32])+8;
end
10:
begin
if(leftAvailable == 1 && topAvailable == 1)
intraSumA <= intraSumA >> 5;
else if(leftAvailable == 1 || topAvailable == 1)
intraSumA <= intraSumA >> 4;
else
intraSumA <= 128;
end
11:
begin
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
predVector[pixelHor] = intraSumA[7:0];
outFlag = 1;
end
default: $display( "ERROR Prediction: intraProcessStep intra16x16 DC unknown intraStepCount");
endcase
end
3://plane
begin
if(intraStepCount == 2)
begin
Bit#(32) topValSet = select(intraTopVal,3);
Bit#(8) topVal = select32to8(topValSet,3);
Bit#(40) leftValSet = select(intraLeftVal,3);
Bit#(8) leftVal = intra4x4SelectLeft(leftValSet,3);
Bit#(13) tempVal = zeroExtend(topVal) + zeroExtend(leftVal);
intraSumA <= tempVal << 4;
intraSumB <= 0;
intraSumC <= 0;
end
else if(intraStepCount < 11)
begin
Bit#(4) xyPlusOne = intraStepCount-2;
Bit#(4) xyPlusEight = intraStepCount+5;
Bit#(4) sixMinusXY = 9-intraStepCount;
Bit#(32) topValSet1 = select(intraTopVal,xyPlusEight[3:2]);
Bit#(8) topVal1 = select32to8(topValSet1,xyPlusEight[1:0]);
Bit#(40) leftValSet1 = select(intraLeftVal,xyPlusEight[3:2]);
Bit#(8) leftVal1 = intra4x4SelectLeft(leftValSet1,zeroExtend(xyPlusEight[1:0]));
Bit#(32) topValSet2=0;
Bit#(8) topVal2;
Bit#(40) leftValSet2;
Bit#(8) leftVal2;
if(intraStepCount==10)
begin
leftValSet2 = select(intraLeftVal,0);
leftVal2 = intra4x4SelectLeft(leftValSet2,-1);
topVal2 = leftVal2;
end
else
begin
topValSet2 = select(intraTopVal,sixMinusXY[3:2]);
topVal2 = select32to8(topValSet2,sixMinusXY[1:0]);
leftValSet2 = select(intraLeftVal,sixMinusXY[3:2]);
leftVal2 = intra4x4SelectLeft(leftValSet2,zeroExtend(sixMinusXY[1:0]));
end
Bit#(15) diffH = zeroExtend(topVal1) - zeroExtend(topVal2);
Bit#(15) diffV = zeroExtend(leftVal1) - zeroExtend(leftVal2);
intraSumB <= intraSumB + (zeroExtend(xyPlusOne) * diffH);
intraSumC <= intraSumC + (zeroExtend(xyPlusOne) * diffV);
end
else if(intraStepCount == 11)
begin
Bit#(18) tempSumB = (5*signExtend(intraSumB)) + 32;
Bit#(18) tempSumC = (5*signExtend(intraSumC)) + 32;
intraSumB <= signExtend(tempSumB[17:6]);
intraSumC <= signExtend(tempSumC[17:6]);
end
else if(intraStepCount == 12)
begin
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
begin
Bit#(5) positionHor = {1'b0,blockHor,fromInteger(pixelHor)};
Bit#(5) positionVer = {1'b0,blockVer,pixelVer};
Bit#(16) tempProductB = signExtend(intraSumB) * signExtend(positionHor-7);
Bit#(16) tempProductC = signExtend(intraSumC) * signExtend(positionVer-7);
Bit#(16) tempTotal = tempProductB + tempProductC + zeroExtend(intraSumA) + 16;
if(tempTotal[15]==1)
predVector[pixelHor] = 0;
else if(tempTotal[14:5] > 255)
predVector[pixelHor] = 255;
else
predVector[pixelHor] = tempTotal[12:5];
end
outFlag = 1;
end
else
$display( "ERROR Prediction: intraProcessStep intra16x16 plane unknown intraStepCount");
end
endcase
end
else if(intraChromaFlag==1)
begin
//$display( "TRACE Prediction: intraProcessStep intraChroma %0d %0d %0d %0d %0d %0d %h %h %h %h %h %h %h %h",intra_chroma_pred_mode.first(),intraChromaTopAvailable,intraChromaLeftAvailable,currMb,blockNum,pixelNum,pack(intraLeftValChroma0),pack(intraTopValChroma0),pack(intraLeftValChroma1),pack(intraTopValChroma1),intraLeftValChroma0[0],intraTopValChroma0[3][15:8],intraLeftValChroma1[0],intraTopValChroma1[3][15:8]);///////////////////
Vector#(9,Bit#(8)) tempLeftVec;
Vector#(4,Bit#(16)) tempTopVec;
if(blockNum[2] == 0)
begin
tempLeftVec = intraLeftValChroma0;
tempTopVec = intraTopValChroma0;
end
else
begin
tempLeftVec = intraLeftValChroma1;
tempTopVec = intraTopValChroma1;
end
case(intra_chroma_pred_mode.first())
0://dc
begin
if(intraStepCount == 2)
begin
Bit#(1) useTop=0;
Bit#(1) useLeft=0;
if(blockNum[1:0] == 0 || blockNum[1:0] == 3)
begin
useTop = intraChromaTopAvailable;
useLeft = intraChromaLeftAvailable;
end
else if(blockNum[1:0] == 1)
begin
if(intraChromaTopAvailable == 1)
useTop = 1;
else if(intraChromaLeftAvailable == 1)
useLeft = 1;
end
else if(blockNum[1:0] == 2)
begin
if(intraChromaLeftAvailable == 1)
useLeft = 1;
else if(intraChromaTopAvailable == 1)
useTop = 1;
end
else
$display( "ERROR Prediction: intraProcessStep intraChroma dc unknown blockNum");
Bit#(10) topSum;
Bit#(10) leftSum;
Bit#(11) totalSum;
if(blockHor[0] == 0)
topSum = zeroExtend(tempTopVec[0][15:8])+zeroExtend(tempTopVec[0][7:0])+zeroExtend(tempTopVec[1][15:8])+zeroExtend(tempTopVec[1][7:0])+2;
else
topSum = zeroExtend(tempTopVec[2][15:8])+zeroExtend(tempTopVec[2][7:0])+zeroExtend(tempTopVec[3][15:8])+zeroExtend(tempTopVec[3][7:0])+2;
if(blockVer[0] == 0)
leftSum = zeroExtend(tempLeftVec[1])+zeroExtend(tempLeftVec[2])+zeroExtend(tempLeftVec[3])+zeroExtend(tempLeftVec[4])+2;
else
leftSum = zeroExtend(tempLeftVec[5])+zeroExtend(tempLeftVec[6])+zeroExtend(tempLeftVec[7])+zeroExtend(tempLeftVec[8])+2;
totalSum = zeroExtend(topSum) + zeroExtend(leftSum);
if(useTop==1 && useLeft==1)
intraSumA <= zeroExtend(totalSum[10:3]);
else if(useTop==1)
intraSumA <= zeroExtend(topSum[9:2]);
else if(useLeft==1)
intraSumA <= zeroExtend(leftSum[9:2]);
else
intraSumA <= zeroExtend(8'b10000000);
end
else if(intraStepCount == 3)
begin
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
predVector[pixelHor] = intraSumA[7:0];
outFlag = 1;
end
else
$display( "ERROR Prediction: intraProcessStep intraChroma dc unknown intraStepCount");
end
1://horizontal
begin
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
begin
Bit#(4) tempLeftIdx = {1'b0,blockVer[0],pixelVer} + 1;
predVector[pixelHor] = select(tempLeftVec,tempLeftIdx);
end
outFlag = 1;
end
2://vertical
begin
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
begin
Bit#(2) pixelHorTemp = fromInteger(pixelHor);
Bit#(16) tempTopVal = select(tempTopVec,{blockHor[0],pixelHorTemp[1]});
if(pixelHorTemp[0] == 0)
predVector[pixelHor] = tempTopVal[7:0];
else
predVector[pixelHor] = tempTopVal[15:8];
end
outFlag = 1;
end
3://plane
begin
if(intraStepCount == 2)
begin
Bit#(16) topValSet = tempTopVec[3];
Bit#(8) topVal = topValSet[15:8];
Bit#(8) leftVal = tempLeftVec[8];
Bit#(13) tempVal = zeroExtend(topVal) + zeroExtend(leftVal);
intraSumA <= tempVal << 4;
intraSumB <= 0;
intraSumC <= 0;
end
else if(intraStepCount < 7)
begin
Bit#(3) xyPlusOne = truncate(intraStepCount)-2;
Bit#(3) xyPlusFour = truncate(intraStepCount)+1;
Bit#(4) twoMinusXY = 5-intraStepCount;
Bit#(16) topValSet1 = select(tempTopVec,xyPlusFour[2:1]);
Bit#(8) topVal1 = select16to8(topValSet1,xyPlusFour[0]);
Bit#(4) tempLeftIdx1 = {1'b0,xyPlusFour} + 1;
Bit#(8) leftVal1 = select(tempLeftVec,tempLeftIdx1);
Bit#(16) topValSet2 = select(tempTopVec,twoMinusXY[2:1]);
Bit#(8) topVal2;
Bit#(8) leftVal2 = select(tempLeftVec,twoMinusXY+1);
if(intraStepCount==6)
topVal2 = leftVal2;
else
topVal2 = select16to8(topValSet2,twoMinusXY[0]);
Bit#(15) diffH = zeroExtend(topVal1) - zeroExtend(topVal2);
Bit#(15) diffV = zeroExtend(leftVal1) - zeroExtend(leftVal2);
intraSumB <= intraSumB + (zeroExtend(xyPlusOne) * diffH);
intraSumC <= intraSumC + (zeroExtend(xyPlusOne) * diffV);
Int#(15) tempDisplayH = unpack(zeroExtend(xyPlusOne) * diffH);
Int#(15) tempDisplayV = unpack(zeroExtend(xyPlusOne) * diffV);
//$display( "TRACE Prediction: intraProcessStep intraChroma plane partH partV %0d %0d",tempDisplayH,tempDisplayV);////////////////////
end
else if(intraStepCount == 7)
begin
Int#(15) tempDisplayH = unpack(intraSumB);
Int#(15) tempDisplayV = unpack(intraSumC);
//$display( "TRACE Prediction: intraProcessStep intraChroma plane H V %0d %0d",tempDisplayH,tempDisplayV);////////////////////
Bit#(19) tempSumB = (34*signExtend(intraSumB)) + 32;
Bit#(19) tempSumC = (34*signExtend(intraSumC)) + 32;
intraSumB <= signExtend(tempSumB[18:6]);
intraSumC <= signExtend(tempSumC[18:6]);
end
else if(intraStepCount == 8)
begin
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1)
begin
Bit#(4) positionHor = {1'b0,blockHor[0],fromInteger(pixelHor)};
Bit#(4) positionVer = {1'b0,blockVer[0],pixelVer};
Bit#(17) tempProductB = signExtend(intraSumB) * signExtend(positionHor-3);
Bit#(17) tempProductC = signExtend(intraSumC) * signExtend(positionVer-3);
Bit#(17) tempTotal = tempProductB + tempProductC + zeroExtend(intraSumA) + 16;
if(tempTotal[16]==1)
predVector[pixelHor] = 0;
else if(tempTotal[15:5] > 255)
predVector[pixelHor] = 255;
else
predVector[pixelHor] = tempTotal[12:5];
end
outFlag = 1;
end
else
$display( "ERROR Prediction: intraProcessStep intraChroma plane unknown intraStepCount");
end
endcase
end
else
$display( "ERROR Prediction: intraProcessStep unknown intrastate");
 
if(outFlag==1)
begin
predictedfifo.enq(predVector);
pixelNum <= pixelNum+4;
if(pixelNum == 12)
begin
if(intraChromaFlag==0)
begin
blockNum <= blockNum+1;
if(blockNum == 15)
begin
intraChromaFlag <= 1;
intraStepCount <= 2;
end
else if(intrastate==Intra4x4)
intraStepCount <= 1;
end
else
begin
if(blockNum == 7)
begin
blockNum <= 0;
intraChromaFlag <= 0;
intraStepCount <= 0;
intra_chroma_pred_mode.deq();
end
else
begin
blockNum <= blockNum+1;
if(intra_chroma_pred_mode.first()==0)
intraStepCount <= 2;
else if(blockNum==3)
intraStepCount <= 2;
end
end
end
end
else
intraStepCount <= nextIntraStepCount;
//$display( "Trace Prediction: intraProcessStep");
endrule
 
interface Client mem_client_intra;
interface Get request = fifoToGet(intraMemReqQ);
interface Put response = fifoToPut(intraMemRespQ);
endinterface
interface Client mem_client_inter;
interface Get request = fifoToGet(interMemReqQ);
interface Put response = fifoToPut(interMemRespQ);
endinterface
interface Client mem_client_buffer = interpolator.mem_client;
 
interface Put ioin = fifoToPut(infifo);
interface Put ioin_InverseTrans = fifoToPut(infifo_ITB);
interface Get ioout = fifoToGet(outfifo);
 
endmodule
 
endpackage
/trunk/src/IInterpolator.bsv
0,0 → 1,26
//**********************************************************************
// Interface for interpolator
//----------------------------------------------------------------------
//
//
//
 
package IInterpolator;
 
import H264Types::*;
import GetPut::*;
import Vector::*;
import ClientServer::*;
 
interface Interpolator;
method Action setPicWidth( Bit#(PicWidthSz) newPicWidth );
method Action setPicHeight( Bit#(PicHeightSz) newPicHeight );
method Action request( InterpolatorIT inputdata );
method Vector#(4,Bit#(8)) first();
method Action deq();
method Action endOfFrame();
interface Client#(InterpolatorLoadReq,InterpolatorLoadResp) mem_client;
endinterface
 
endpackage
 
/trunk/src/ExpGolomb.bsv
0,0 → 1,197
//**********************************************************************
// Exp-Golomb codes
//----------------------------------------------------------------------
//
//
//
 
package ExpGolomb;
 
import H264Types::*;
 
 
//-----------------------------------------------------------
// Helper functions
(* noinline *)
function Bufcount expgolomb_numbits32( Buffer inbuffer );//number of bits consumed by exp-golomb code
Bufcount tempout = 100;
for(Integer ii=33; ii>0; ii=ii-1)
begin
if(inbuffer[buffersize-fromInteger(ii)]==1'b1)
tempout = fromInteger(ii);
end
return tempout;
endfunction
 
(* noinline *)
function Bit#(33) expgolomb_codenum32( Buffer inbuffer, Bufcount egnumbits );//exp-golomb codenum calculation
Bit#(33) tempbuffer = inbuffer[buffersize-1:buffersize-33];
Bufcount shiftamount = 33-egnumbits;
return (tempbuffer >> zeroExtend(shiftamount))-1;
endfunction
 
(* noinline *)
function Bit#(32) expgolomb_unsigned32( Buffer inbuffer, Bufcount egnumbits );//unsigned exp-golomb code calculation
Bit#(33) codenum = expgolomb_codenum32( inbuffer, egnumbits );
return truncate(codenum);
endfunction
 
(* noinline *)
function Bit#(32) expgolomb_signed32( Buffer inbuffer, Bufcount egnumbits );//signed exp-golomb code calculation
Bit#(33) codenum = expgolomb_codenum32( inbuffer, egnumbits );
Bit#(33) tempout = (codenum+1) >> 1;
Bit#(33) tempout2 = (codenum[0]==1 ? tempout : (~tempout)+1 );
return truncate(tempout2);
endfunction
 
 
 
(* noinline *)
function Bufcount expgolomb_numbits( Buffer inbuffer );//number of bits consumed by exp-golomb code
Bufcount tempout = 100;
for(Integer ii=17; ii>0; ii=ii-1)
begin
if(inbuffer[buffersize-fromInteger(ii)]==1'b1)
tempout = (fromInteger(ii)*2)-1;
end
return tempout;
endfunction
 
(* noinline *)
function Bit#(17) expgolomb_codenum( Buffer inbuffer );//exp-golomb codenum calculation
Bufcount egnumbits = expgolomb_numbits( inbuffer ) >> 1;
Bit#(33) tempbuffer = inbuffer[buffersize-1:buffersize-33] << zeroExtend(egnumbits);
Bit#(17) tempout = tempbuffer[32:16];
Bufcount shiftamount = 17-egnumbits-1;
return (tempout >> zeroExtend(shiftamount))-1;
endfunction
(* noinline *)
function Bit#(16) expgolomb_unsigned( Buffer inbuffer );//unsigned exp-golomb code calculation
Bit#(17) codenum = expgolomb_codenum( inbuffer );
return truncate(codenum);
endfunction
 
(* noinline *)
function Bit#(16) expgolomb_signed( Buffer inbuffer );//signed exp-golomb code calculation
Bit#(17) codenum = expgolomb_codenum( inbuffer );
Bit#(17) tempout = (codenum+1) >> 1;
Bit#(17) tempout2 = (codenum[0]==1 ? tempout : (~tempout)+1 );
return truncate(tempout2);
endfunction
 
(* noinline *)
function Bit#(6) expgolomb_coded_block_pattern( Buffer inbuffer, MbType mbtype );//unsigned exp-golomb code calculation
Bit#(6) codenum = truncate(expgolomb_codenum( inbuffer ));
if(mbPartPredMode(mbtype,0) == Intra_4x4)
begin
case(codenum)
0: return 47;
1: return 31;
2: return 15;
3: return 0;
4: return 23;
5: return 27;
6: return 29;
7: return 30;
8: return 7;
9: return 11;
10: return 13;
11: return 14;
12: return 39;
13: return 43;
14: return 45;
15: return 46;
16: return 16;
17: return 3;
18: return 5;
19: return 10;
20: return 12;
21: return 19;
22: return 21;
23: return 26;
24: return 28;
25: return 35;
26: return 37;
27: return 42;
28: return 44;
29: return 1;
30: return 2;
31: return 4;
32: return 8;
33: return 17;
34: return 18;
35: return 20;
36: return 24;
37: return 6;
38: return 9;
39: return 22;
40: return 25;
41: return 32;
42: return 33;
43: return 34;
44: return 36;
45: return 40;
46: return 38;
47: return 41;
endcase
end
else
begin
case(codenum)
0: return 0;
1: return 16;
2: return 1;
3: return 2;
4: return 4;
5: return 8;
6: return 32;
7: return 3;
8: return 5;
9: return 10;
10: return 12;
11: return 15;
12: return 47;
13: return 7;
14: return 11;
15: return 13;
16: return 14;
17: return 6;
18: return 9;
19: return 31;
20: return 35;
21: return 37;
22: return 42;
23: return 44;
24: return 33;
25: return 34;
26: return 36;
27: return 40;
28: return 39;
29: return 43;
30: return 45;
31: return 46;
32: return 17;
33: return 18;
34: return 20;
35: return 24;
36: return 19;
37: return 21;
38: return 26;
39: return 28;
40: return 23;
41: return 27;
42: return 29;
43: return 30;
44: return 22;
45: return 25;
46: return 38;
47: return 41;
endcase
end
endfunction
 
 
 
endpackage
/trunk/src/mkNalUnwrap.bsv
0,0 → 1,149
//**********************************************************************
// NAL unit unwrapper implementation
//----------------------------------------------------------------------
//
//
 
package mkNalUnwrap;
 
import H264Types::*;
import INalUnwrap::*;
import FIFO::*;
 
import Connectable::*;
import GetPut::*;
 
 
 
//-----------------------------------------------------------
// NAL Unwrapper Module
//-----------------------------------------------------------
 
module mkNalUnwrap( INalUnwrap );
 
FIFO#(InputGenOT) infifo <- mkFIFO;
FIFO#(NalUnwrapOT) outfifo <- mkFIFO;
Reg#(Bit#(8)) buffera <- mkReg(0);
Reg#(Bit#(8)) bufferb <- mkReg(0);
Reg#(Bit#(8)) bufferc <- mkReg(0);
Reg#(Bit#(2)) bufcount <- mkReg(0);
Reg#(Bit#(27)) zerocount <- mkReg(0);
 
//-----------------------------------------------------------
// Rules
rule fillbuffer (bufcount<3
&&& infifo.first() matches tagged DataByte .dbyte);
bufferc <= bufferb;
bufferb <= buffera;
buffera <= dbyte;
bufcount <= bufcount+1;
infifo.deq();
endrule
 
rule newnalunit (bufcount==3
&&& infifo.first() matches tagged DataByte .dbyte
&&& ((bufferc==0 && bufferb==0 && buffera==1)
|| (bufferc==0 && bufferb==0 && buffera==0 && dbyte==1)));
zerocount <= 0;
if(bufferc==0 && bufferb==0 && buffera==1)
bufcount <= 0;
else
begin
bufcount <= 0;
infifo.deq();
end
outfifo.enq(NewUnit);
$display("ccl1newunit");
endrule
 
rule remove3byte (bufcount==3
&&& infifo.first() matches tagged DataByte .dbyte
&&& (bufferc==0 && bufferb==0 && buffera==3 && dbyte<4));
zerocount <= zerocount+2;
bufcount <= 0;
endrule
 
rule normalop (bufcount==3
&&& infifo.first() matches tagged DataByte .dbyte
&&& !(bufferc==0 && bufferb==0 && buffera==3 && dbyte<4)
&&& !((bufferc==0 && bufferb==0 && buffera==1)
|| (bufferc==0 && bufferb==0 && buffera==0 && dbyte==1)));
if(bufferc==0)
begin
zerocount <= zerocount+1;
bufferc <= bufferb;
bufferb <= buffera;
buffera <= dbyte;
infifo.deq();
end
else if(zerocount==0)
begin
outfifo.enq(RbspByte bufferc);
$display("ccl1rbspbyte %h", bufferc);
bufferc <= bufferb;
bufferb <= buffera;
buffera <= dbyte;
infifo.deq();
end
else
begin
zerocount <= zerocount-1;
outfifo.enq(RbspByte 0);
$display("ccl1rbspbyte 00");
end
endrule
 
rule endfileop(infifo.first() matches tagged EndOfFile);
case ( bufcount )
3:
begin
if(bufferc==0 && bufferb==0 && buffera<4)
begin
bufcount <= 0;
zerocount <= 0;
end
else if(zerocount==0)
begin
bufcount <= 2;
outfifo.enq(RbspByte bufferc);
$display("ccl1rbspbyte %h", bufferc);
end
else
begin
zerocount <= zerocount-1;
outfifo.enq(RbspByte 0);
$display("ccl1rbspbyte 00");
end
end
2:
begin
bufcount <= 1;
if(!(bufferb==0 && buffera==0))
outfifo.enq(RbspByte bufferb);
$display("ccl1rbspbyte %h", bufferb);
end
1:
begin
bufcount <= 0;
if(!(buffera==0))
outfifo.enq(RbspByte buffera);
$display("ccl1rbspbyte %h", buffera);
end
0:
begin
infifo.deq();
outfifo.enq(EndOfFile);
$display("EndOfFile reached (NalUnwrap)");
end
endcase
endrule
 
interface Put ioin = fifoToPut(infifo);
interface Get ioout = fifoToGet(outfifo);
endmodule
 
endpackage
/trunk/src/IMemED.bsv
0,0 → 1,21
//**********************************************************************
// Interface for Memory for Entropy Decoding
//----------------------------------------------------------------------
//
//
//
 
package IMemED;
 
import H264Types::*;
import ClientServer::*;
import GetPut::*;
 
interface IMemED #(type index_size, type data_size);
 
// Interface from processor to cache
interface Server#(MemReq#(index_size,data_size),MemResp#(data_size)) mem_server;
 
endinterface
 
endpackage
/trunk/src/mkBufferControl.bsv
0,0 → 1,970
//**********************************************************************
// Buffer Controller
//----------------------------------------------------------------------
//
//
 
package mkBufferControl;
 
import H264Types::*;
 
import IBufferControl::*;
import FIFO::*;
import Vector::*;
 
import Connectable::*;
import GetPut::*;
import ClientServer::*;
 
 
 
//-----------------------------------------------------------
// Local Datatypes
//-----------------------------------------------------------
 
typedef union tagged
{
void Idle; //not working on anything in particular
void Y;
void U;
void V;
}
Outprocess deriving(Eq,Bits);
 
 
//-----------------------------------------------------------
// Short term pic list submodule
//-----------------------------------------------------------
 
typedef union tagged
{
void Idle; //not working on anything in particular
void Remove;
void RemoveOutput;
void RemoveFound;
void InsertGap;
void Search;
void ListAll;
}
ShortTermPicListState deriving(Eq,Bits);
 
interface ShortTermPicList;
method Action clear();
method Action insert( Bit#(16) frameNum, Bit#(5) slot, Bit#(5) maxAllowed );
method Action insert_gap( Bit#(16) frameNum, Bit#(5) slot, Bit#(5) maxAllowed, Bit#(16) gap, Bit#(5) log2_max_frame_num );
method Action remove( Bit#(16) frameNum, Bool removeOutputFlag );
method Action search( Bit#(16) frameNum );
method Action listAll();
method Action deq();
method Maybe#(Bit#(5)) resultSlot();
method Bit#(5) numPics();
endinterface
 
module mkShortTermPicList( ShortTermPicList );
function Bit#(5) shortTermPicListNext( Bit#(5) addrFunc );
if(addrFunc<maxRefFrames-1)
return addrFunc+1;
else
return 0;
endfunction
function Bit#(5) shortTermPicListPrev( Bit#(5) addrFunc );
if(addrFunc==0)
return maxRefFrames-1;
else
return addrFunc-1;
endfunction
RFile1#(Bit#(5),Tuple2#(Bit#(16),Bit#(5))) rfile <- mkRFile1(0,maxRefFrames-1);
Reg#(ShortTermPicListState) state <- mkReg(Idle);
Reg#(Bit#(5)) log2_mfn <- mkReg(0);
Reg#(Bit#(5)) nextPic <- mkReg(0);
Reg#(Bit#(5)) picCount <- mkReg(0);
Reg#(Bit#(5)) tempPic <- mkReg(0);
Reg#(Bit#(5)) tempCount <- mkReg(0);
Reg#(Bit#(16)) tempNum <- mkReg(0);
FIFO#(Maybe#(Bit#(5))) returnList <- mkFIFO();
rule removing ( state==Remove || state==RemoveOutput || state==RemoveFound );
if(state!=RemoveFound)
begin
Tuple2#(Bit#(16),Bit#(5)) temp = rfile.sub(tempPic);
if(tpl_1(temp)==tempNum)
begin
state <= RemoveFound;
if(state==RemoveOutput)
returnList.enq(Valid tpl_2(temp));
end
if(tempCount>=picCount)
$display( "ERROR BufferControl: ShortTermPicList removing not found");
end
else
begin
Bit#(5) tempPrev = shortTermPicListPrev(tempPic);
rfile.upd(tempPrev,rfile.sub(tempPic));
if(tempCount==picCount)
begin
picCount <= picCount-1;
nextPic <= tempPrev;
state <= Idle;
end
end
tempCount <= tempCount+1;
tempPic <= shortTermPicListNext(tempPic);
endrule
rule insertingGap ( state matches tagged InsertGap );
if(tempCount>0)
begin
if(tempCount>1)
rfile.upd(nextPic,tuple2(tempNum,31));
else
rfile.upd(nextPic,tuple2(tempNum,tempPic));
nextPic <= shortTermPicListNext(nextPic);
end
else
state <= Idle;
Bit#(17) tempOne = 1;
Bit#(17) maxPicNum = tempOne << log2_mfn;
if(zeroExtend(tempNum) == maxPicNum-1)
tempNum <= 0;
else
tempNum <= tempNum+1;
tempCount <= tempCount-1;
endrule
rule searching ( state matches tagged Search );
if(tempCount<picCount)
begin
Tuple2#(Bit#(16),Bit#(5)) temp = rfile.sub(tempPic);
if(tpl_1(temp)==tempNum)
begin
returnList.enq(Valid tpl_2(temp));
state <= Idle;
end
tempPic <= shortTermPicListPrev(tempPic);
tempCount <= tempCount+1;
end
else
$display( "ERROR BufferControl: ShortTermPicList searching not found");
endrule
rule listingAll ( state matches tagged ListAll );
if(tempCount<picCount)
begin
Tuple2#(Bit#(16),Bit#(5)) temp = rfile.sub(tempPic);
returnList.enq(Valid tpl_2(temp));
tempPic <= shortTermPicListPrev(tempPic);
tempCount <= tempCount+1;
end
else
begin
returnList.enq(Invalid);
state <= Idle;
end
endrule
method Action clear() if(state matches tagged Idle);
picCount <= 0;
nextPic <= 0;
endmethod
method Action insert( Bit#(16) frameNum, Bit#(5) slot, Bit#(5) maxAllowed ) if(state matches tagged Idle);
rfile.upd(nextPic,tuple2(frameNum,slot));
nextPic <= shortTermPicListNext(nextPic);
if(maxAllowed>picCount)
picCount <= picCount+1;
endmethod
method Action insert_gap( Bit#(16) frameNum, Bit#(5) slot, Bit#(5) maxAllowed, Bit#(16) gap, Bit#(5) log2_max_frame_num ) if(state matches tagged Idle);
state <= InsertGap;
log2_mfn <= log2_max_frame_num;
if(zeroExtend(picCount)+gap+1 >= zeroExtend(maxAllowed))
picCount <= maxAllowed;
else
picCount <= truncate(zeroExtend(picCount)+gap+1);
Bit#(5) temp;
if(gap+1 >= zeroExtend(maxAllowed))
temp = maxAllowed;
else
temp = truncate(gap+1);
tempCount <= temp;
Bit#(17) tempOne = 1;
Bit#(17) maxPicNum = tempOne << log2_max_frame_num;
Bit#(17) tempFrameNum = zeroExtend(frameNum);
if(tempFrameNum+1 > zeroExtend(temp))
tempNum <= truncate(tempFrameNum+1-zeroExtend(temp));
else
tempNum <= truncate(maxPicNum+tempFrameNum+1-zeroExtend(temp));
tempPic <= slot;
endmethod
method Action remove( Bit#(16) frameNum, Bool removeOutputFlag ) if(state matches tagged Idle);
if(removeOutputFlag)
state <= RemoveOutput;
else
state <= Remove;
tempCount <= 0;
Bit#(5) temp = (maxRefFrames-picCount)+nextPic;
if(temp>maxRefFrames-1)
tempPic <= temp-maxRefFrames;
else
tempPic <= temp;
tempNum <= frameNum;
endmethod
method Action search( Bit#(16) frameNum ) if(state matches tagged Idle);
state <= Search;
tempCount <= 0;
tempPic <= shortTermPicListPrev(nextPic);
tempNum <= frameNum;
endmethod
method Action listAll() if(state matches tagged Idle);
state <= ListAll;
tempCount <= 0;
tempPic <= shortTermPicListPrev(nextPic);
endmethod
method Action deq();
returnList.deq();
endmethod
method Maybe#(Bit#(5)) resultSlot();
return returnList.first();
endmethod
method Bit#(5) numPics() if(state matches tagged Idle);
return picCount;
endmethod
endmodule
 
//-----------------------------------------------------------
// Long term pic list submodule
//-----------------------------------------------------------
 
typedef union tagged
{
void Idle; //not working on anything in particular
void Clear;
void ListAll;
}
LongTermPicListState deriving(Eq,Bits);
 
interface LongTermPicList;
method Action clear();
method Action insert( Bit#(5) frameNum, Bit#(5) slot );
method Action remove( Bit#(5) frameNum );
method Action maxIndexPlus1( Bit#(5) maxAllowed );
method Action search( Bit#(5) frameNum );
method Action listAll();
method Action deq();
method Maybe#(Bit#(5)) resultSlot();
method Bit#(5) numPics();
endinterface
 
module mkLongTermPicList( LongTermPicList );
// RegFile#(Bit#(5),Maybe#(Bit#(5))) rfile <- mkRegFile(0,maxRefFrames-1);
RFile1#(Bit#(5),Maybe#(Bit#(5))) rfile <- mkRFile1Full();
Reg#(LongTermPicListState) state <- mkReg(Idle);
Reg#(Bit#(5)) picCount <- mkReg(0);
Reg#(Bit#(5)) tempPic <- mkReg(0);
FIFO#(Maybe#(Bit#(5))) returnList <- mkFIFO();
 
rule clearing ( state matches tagged Clear );
if(tempPic<maxRefFrames)
begin
if(rfile.sub(tempPic) matches tagged Valid .data &&& picCount!=0)
picCount <= picCount-1;
rfile.upd(tempPic,Invalid);
tempPic <= tempPic+1;
end
else
state <= Idle;
//$display( "TRACE BufferControl: LongTermPicList clearing %h %h", picCount, tempPic);
endrule
 
rule listingAll ( state matches tagged ListAll );
if(tempPic<maxRefFrames)
begin
Maybe#(Bit#(5)) temp = rfile.sub(tempPic);
if(temp matches tagged Valid .data)
returnList.enq(Valid data);
tempPic <= tempPic+1;
end
else
begin
returnList.enq(Invalid);
state <= Idle;
end
//$display( "TRACE BufferControl: LongTermPicList listingAll %h %h", picCount, tempPic);
endrule
method Action clear() if(state matches tagged Idle);
state <= Clear;
tempPic <= 0;
//$display( "TRACE BufferControl: LongTermPicList clear %h", picCount);
endmethod
method Action insert( Bit#(5) frameNum, Bit#(5) slot ) if(state matches tagged Idle);
if(rfile.sub(frameNum) matches tagged Invalid)
picCount <= picCount+1;
rfile.upd(frameNum,Valid slot);
//$display( "TRACE BufferControl: LongTermPicList insert %h %h %h", picCount, frameNum, slot);
endmethod
method Action remove( Bit#(5) frameNum ) if(state matches tagged Idle);
if(rfile.sub(frameNum) matches tagged Invalid)
$display( "ERROR BufferControl: LongTermPicList removing not found");
else
picCount <= picCount-1;
rfile.upd(frameNum,Invalid);
//$display( "TRACE BufferControl: LongTermPicList remove %h %h", picCount, frameNum);
endmethod
method Action maxIndexPlus1( Bit#(5) index ) if(state matches tagged Idle);
state <= Clear;
tempPic <= index;
//$display( "TRACE BufferControl: LongTermPicList maxIndexPlus1 %h %h", picCount, index);
endmethod
method Action search( Bit#(5) frameNum ) if(state matches tagged Idle);
returnList.enq(rfile.sub(frameNum));
//$display( "TRACE BufferControl: LongTermPicList search %h %h", picCount, frameNum);
endmethod
method Action listAll() if(state matches tagged Idle);
state <= ListAll;
tempPic <= 0;
//$display( "TRACE BufferControl: LongTermPicList listAll %h", picCount);
endmethod
method Action deq();
returnList.deq();
//$display( "TRACE BufferControl: LongTermPicList deq %h", picCount);
endmethod
method Maybe#(Bit#(5)) resultSlot();
return returnList.first();
endmethod
method Bit#(5) numPics() if(state matches tagged Idle);
return picCount;
endmethod
endmodule
 
 
//-----------------------------------------------------------
// Free slot module
//-----------------------------------------------------------
 
interface FreeSlots;
method Action init();
method Action add( Bit#(5) slot );
method Action remove( Bit#(5) slot );
method Bit#(5) first( Bit#(5) exception );
endinterface
 
module mkFreeSlots( FreeSlots );
Reg#(Vector#(18,Bit#(1))) slots <- mkRegU();
 
method Action init();
Vector#(18,Bit#(1)) tempSlots = replicate(0);
slots <= tempSlots;
endmethod
method Action add( Bit#(5) slot );
Vector#(18,Bit#(1)) tempSlots = slots;
tempSlots[slot] = 0;
slots <= tempSlots;
if(slot >= maxRefFrames+2)
$display( "ERROR BufferControl: FreeSlots add out of bounds");
endmethod
method Action remove( Bit#(5) slot );
Vector#(18,Bit#(1)) tempSlots = slots;
if(slot != 31)
begin
tempSlots[slot] = 1;
slots <= tempSlots;
if(slot >= maxRefFrames+2)
$display( "ERROR BufferControl: FreeSlots remove out of bounds");
end
endmethod
method Bit#(5) first( Bit#(5) exception );
Bit#(5) tempout = 31;
for(Integer ii=17; ii>=0; ii=ii-1)
begin
if(slots[fromInteger(ii)]==1'b0 && fromInteger(ii)!=exception)
tempout = fromInteger(ii);
end
return tempout;
endmethod
endmodule
 
 
//-----------------------------------------------------------
// Helper functions
 
 
 
//-----------------------------------------------------------
// Buffer Controller Module
//-----------------------------------------------------------
 
 
(* synthesize *)
module mkBufferControl( IBufferControl );
 
FIFO#(DeblockFilterOT) infifo <- mkSizedFIFO(bufferControl_infifo_size);
FIFO#(BufferControlOT) outfifo <- mkFIFO();
 
FIFO#(FrameBufferLoadReq) loadReqQ1 <- mkFIFO();
FIFO#(FrameBufferLoadResp) loadRespQ1 <- mkFIFO();
FIFO#(FrameBufferLoadReq) loadReqQ2 <- mkFIFO();
FIFO#(FrameBufferLoadResp) loadRespQ2 <- mkFIFO();
FIFO#(FrameBufferStoreReq) storeReqQ <- mkFIFO();
 
FIFO#(InterpolatorLoadReq) inLoadReqQ <- mkFIFO();
FIFO#(InterpolatorLoadResp) inLoadRespQ <- mkFIFO();
FIFO#(Bit#(2)) inLoadOutOfBounds <- mkSizedFIFO(64);
 
Reg#(Bit#(5)) log2_max_frame_num <- mkReg(0);
Reg#(Bit#(5)) num_ref_frames <- mkReg(0);
Reg#(Bit#(1)) gaps_in_frame_num_allowed_flag <- mkReg(0);
Reg#(Bit#(PicWidthSz)) picWidth <- mkReg(maxPicWidthInMB);
Reg#(Bit#(PicHeightSz)) picHeight <- mkReg(0);
Reg#(Bit#(PicAreaSz)) frameinmb <- mkReg(0);
 
Reg#(Bit#(5)) ppsnum_ref_idx_l0_active <- mkReg(0);
Reg#(Bit#(16)) frame_num <- mkReg(0);
Reg#(Bit#(16)) prevRefFrameNum <- mkReg(0);
Reg#(Bit#(5)) num_ref_idx_l0_active <- mkReg(0);
Reg#(Bit#(2)) reordering_of_pic_nums_idc <- mkReg(0);
Reg#(Bit#(16)) picNumLXPred <- mkReg(0);
Reg#(Bit#(3)) memory_management_control_operation <- mkReg(0);
 
Reg#(Bool) newInputFrame <- mkReg(True);
Reg#(Bool) noMoreInput <- mkReg(False);
Reg#(Bool) inputframedone <- mkReg(False);
Reg#(Outprocess) outprocess <- mkReg(Idle);
Reg#(Bool) outputframedone <- mkReg(True);
 
Reg#(Bit#(5)) inSlot <- mkReg(0);
Reg#(Bit#(FrameBufferSz)) inAddrBase <- mkReg(0);
Reg#(Bit#(5)) outSlot <- mkReg(31);
Reg#(Bit#(FrameBufferSz)) outAddrBase <- mkReg(0);
Reg#(Bit#(TAdd#(PicAreaSz,7))) outReqCount <- mkReg(0);
Reg#(Bit#(TAdd#(PicAreaSz,7))) outRespCount <- mkReg(0);
FreeSlots freeSlots <- mkFreeSlots();//may include outSlot (have to make sure it's not used)
ShortTermPicList shortTermPicList <- mkShortTermPicList();
LongTermPicList longTermPicList <- mkLongTermPicList();
RFile1#(Bit#(5),Bit#(5)) refPicList <- mkRFile1(0,maxRefFrames-1);
Reg#(Bit#(5)) refPicListCount <- mkReg(0);
Reg#(Bool) initRefPicList <- mkReg(False);
Reg#(Bool) reorderRefPicList <- mkReg(False);
Reg#(Bit#(5)) refIdx <- mkReg(0);
Reg#(Bit#(5)) tempSlot <- mkReg(0);
Reg#(Bit#(5)) tempSlot2 <- mkReg(0);
Reg#(Bit#(2)) adjustFreeSlots <- mkReg(0);
 
Reg#(Bool) refPicListDone <- mkReg(False);
Reg#(Bool) lockInterLoads <- mkReg(True);
DoNotFire donotfire <- mkDoNotFire();
 
//-----------------------------------------------------------
// Rules
rule inputing ( !noMoreInput && !inputframedone );
//$display( "Trace Buffer Control: passing infifo packed %h", pack(infifo.first()));
case (infifo.first()) matches
tagged EDOT .indata :
begin
case (indata) matches
tagged SPSlog2_max_frame_num .xdata :
begin
if(adjustFreeSlots == 0)
begin
infifo.deq();
log2_max_frame_num <= xdata;
freeSlots.init();
shortTermPicList.clear();
longTermPicList.clear();
end
else
donotfire.doNotFire();
end
tagged SPSnum_ref_frames .xdata :
begin
infifo.deq();
num_ref_frames <= xdata;
end
tagged SPSgaps_in_frame_num_allowed_flag .xdata :
begin
infifo.deq();
gaps_in_frame_num_allowed_flag <= xdata;
end
tagged SPSpic_width_in_mbs .xdata :
begin
infifo.deq();
picWidth <= xdata;
end
tagged SPSpic_height_in_map_units .xdata :
begin
infifo.deq();
picHeight <= xdata;
frameinmb <= zeroExtend(picWidth)*zeroExtend(xdata);
end
tagged PPSnum_ref_idx_l0_active .xdata :
begin
infifo.deq();
ppsnum_ref_idx_l0_active <= xdata;
end
tagged SHfirst_mb_in_slice .xdata :
begin
if(adjustFreeSlots == 0)
begin
infifo.deq();
newInputFrame <= False;
shortTermPicList.listAll();
longTermPicList.listAll();
initRefPicList <= True;
refPicListCount <= 0;
if(newInputFrame)
begin
inSlot <= freeSlots.first(outSlot);
inAddrBase <= (zeroExtend(freeSlots.first(outSlot))*zeroExtend(frameinmb)*3)<<5;
end
$display( "Trace BufferControl: passing SHfirst_mb_in_slice %h %h %0d", freeSlots.first(outSlot), outSlot, (newInputFrame ? 1 : 0));
end
else
donotfire.doNotFire();
end
tagged SHframe_num .xdata :
begin
infifo.deq();
frame_num <= xdata;
picNumLXPred <= frame_num;
end
tagged SHnum_ref_idx_active_override_flag .xdata :
begin
infifo.deq();
num_ref_idx_l0_active <= ppsnum_ref_idx_l0_active;
end
tagged SHnum_ref_idx_l0_active .xdata :
begin
infifo.deq();
num_ref_idx_l0_active <= xdata;
end
tagged SHRref_pic_list_reordering_flag_l0 .xdata :
begin
if(!initRefPicList)
begin
infifo.deq();
if(xdata==0)
refPicListDone <= True;
end
else
donotfire.doNotFire();
refIdx <= 0;
end
tagged SHRreordering_of_pic_nums_idc .xdata :
begin
if(!reorderRefPicList)
begin
infifo.deq();
reordering_of_pic_nums_idc <= xdata;
if(xdata==3)
refPicListDone <= True;
end
else
donotfire.doNotFire();
end
tagged SHRabs_diff_pic_num .xdata :
begin
if(!reorderRefPicList)
begin
infifo.deq();
Bit#(16) picNumLXNoWrap;
Bit#(17) tempOne = 1;
Bit#(17) maxPicNum = tempOne << log2_max_frame_num;
if(reordering_of_pic_nums_idc==0)
begin
if(picNumLXPred < truncate(xdata))
picNumLXNoWrap = truncate(zeroExtend(picNumLXPred)-xdata+maxPicNum);
else
picNumLXNoWrap = truncate(zeroExtend(picNumLXPred)-xdata);
end
else
begin
if(zeroExtend(picNumLXPred)+xdata >= maxPicNum)
picNumLXNoWrap = truncate(zeroExtend(picNumLXPred)+xdata-maxPicNum);
else
picNumLXNoWrap = truncate(zeroExtend(picNumLXPred)+xdata);
end
picNumLXPred <= picNumLXNoWrap;
shortTermPicList.search(picNumLXNoWrap);
reorderRefPicList <= True;
refPicListCount <= 0;
end
else
donotfire.doNotFire();
end
tagged SHRlong_term_pic_num .xdata :
begin
if(!reorderRefPicList)
begin
infifo.deq();
longTermPicList.search(xdata);
reorderRefPicList <= True;
refPicListCount <= 0;
end
else
donotfire.doNotFire();
end
tagged SHDlong_term_reference_flag .xdata :
begin
infifo.deq();
if(xdata==0)
shortTermPicList.insert(frame_num,inSlot,num_ref_frames);
else
longTermPicList.insert(0,inSlot);
adjustFreeSlots <= 1;
end
tagged SHDadaptive_ref_pic_marking_mode_flag .xdata :
begin
infifo.deq();
Bit#(17) tempFrameNum = zeroExtend(frame_num);
Bit#(17) tempOne = 1;
Bit#(17) maxPicNum = tempOne << log2_max_frame_num;
Bit#(16) tempGap = 0;
if(frame_num < prevRefFrameNum)
tempFrameNum = tempFrameNum + maxPicNum;
if(tempFrameNum-zeroExtend(prevRefFrameNum) > 1)
tempGap = truncate(tempFrameNum-zeroExtend(prevRefFrameNum)-1);
if(xdata==0)
begin
if(tempGap==0)
shortTermPicList.insert(frame_num,inSlot,(num_ref_frames-longTermPicList.numPics()));
else
shortTermPicList.insert_gap(frame_num,inSlot,(num_ref_frames-longTermPicList.numPics()),tempGap,log2_max_frame_num);
adjustFreeSlots <= 1;
end
prevRefFrameNum <= frame_num;
end
tagged SHDmemory_management_control_operation .xdata :
begin
infifo.deq();
memory_management_control_operation <= xdata;
if(xdata==0)
adjustFreeSlots <= 1;
else if(xdata==5)
begin
shortTermPicList.clear();
longTermPicList.clear();
end
end
tagged SHDdifference_of_pic_nums .xdata :
begin
infifo.deq();
Bit#(16) picNumXNoWrap;
Bit#(17) tempOne = 1;
Bit#(17) maxPicNum = tempOne << log2_max_frame_num;
if(frame_num < truncate(xdata))
picNumXNoWrap = truncate(zeroExtend(frame_num)-xdata+maxPicNum);
else
picNumXNoWrap = truncate(zeroExtend(frame_num)-xdata);
if(memory_management_control_operation == 1)
shortTermPicList.remove(picNumXNoWrap,False);
else
shortTermPicList.remove(picNumXNoWrap,True);
end
tagged SHDlong_term_pic_num .xdata :
begin
infifo.deq();
longTermPicList.remove(xdata);
end
tagged SHDlong_term_frame_idx .xdata :
begin
infifo.deq();
if(memory_management_control_operation == 3)
begin
if(shortTermPicList.resultSlot() matches tagged Valid .validdata)
longTermPicList.insert(xdata,validdata);
else
$display( "ERROR BufferControl: SHDlong_term_frame_idx Invalid output from shortTermPicList");
shortTermPicList.deq();
end
else
longTermPicList.insert(xdata,inSlot);
end
tagged SHDmax_long_term_frame_idx_plus1 .xdata :
begin
infifo.deq();
longTermPicList.maxIndexPlus1(xdata);
end
tagged EndOfFile :
begin
infifo.deq();
$display( "INFO Buffer Control: EndOfFile reached");
noMoreInput <= True;
//$finish(0);
//outfifo.enq(EndOfFile);
end
default: infifo.deq();
endcase
end
tagged DFBLuma .indata :
begin
infifo.deq();
//$display( "TRACE Buffer Control: input Luma %0d %h %h", indata.mb, indata.pixel, indata.data);
Bit#(TAdd#(PicAreaSz,6)) addr = {(zeroExtend(indata.ver)*zeroExtend(picWidth)),2'b00}+zeroExtend(indata.hor);
storeReqQ.enq(FBStoreReq {addr:inAddrBase+zeroExtend(addr),data:indata.data});
end
tagged DFBChroma .indata :
begin
infifo.deq();
Bit#(TAdd#(PicAreaSz,4)) addr = {(zeroExtend(indata.ver)*zeroExtend(picWidth)),1'b0}+zeroExtend(indata.hor);
Bit#(TAdd#(PicAreaSz,6)) chromaOffset = {frameinmb,6'b000000};
Bit#(TAdd#(PicAreaSz,4)) vOffset = 0;
if(indata.uv == 1)
vOffset = {frameinmb,4'b0000};
storeReqQ.enq(FBStoreReq {addr:(inAddrBase+zeroExtend(chromaOffset)+zeroExtend(vOffset)+zeroExtend(addr)),data:indata.data});
//$display( "TRACE Buffer Control: input Chroma %0d %0h %h %h %h %h", indata.uv, indata.ver, indata.hor, indata.data, addr, (inAddrBase+zeroExtend(chromaOffset)+zeroExtend(vOffset)+zeroExtend(addr)));
end
tagged EndOfFrame :
begin
infifo.deq();
$display( "INFO Buffer Control: EndOfFrame reached");
inputframedone <= True;
newInputFrame <= True;
refPicListDone <= False;
end
default: infifo.deq();
endcase
endrule
 
rule initingRefPicList ( initRefPicList );
if(shortTermPicList.resultSlot() matches tagged Valid .xdata)
begin
shortTermPicList.deq();
refPicList.upd(refPicListCount,xdata);
refPicListCount <= refPicListCount+1;
$display( "Trace BufferControl: initingRefPicList shortTermPicList %h", xdata);
end
else if(longTermPicList.resultSlot() matches tagged Valid .xdata)
begin
longTermPicList.deq();
refPicList.upd(refPicListCount,xdata);
refPicListCount <= refPicListCount+1;
$display( "Trace BufferControl: initingRefPicList longTermPicList %h", xdata);
end
else
begin
shortTermPicList.deq();
longTermPicList.deq();
initRefPicList <= False;
refPicListCount <= 0;
$display( "Trace BufferControl: initingRefPicList end");
end
endrule
 
rule reorderingRefPicList ( reorderRefPicList );
$display( "Trace BufferControl: reorderingRefPicList");
if(shortTermPicList.resultSlot() matches tagged Valid .xdata)//////////////////////////////////////////////////////////////////////////////////////////
begin
shortTermPicList.deq();
tempSlot <= refPicList.sub(refIdx);
refPicList.upd(refIdx,xdata);
refPicListCount <= refIdx+1;
tempSlot2 <= xdata;
end
else if(longTermPicList.resultSlot() matches tagged Valid .xdata)/////////////////////////////////////////////////////////////////////////////////////may get stuck?
begin
longTermPicList.deq();
tempSlot <= refPicList.sub(refIdx);
refPicList.upd(refIdx,xdata);
refPicListCount <= refIdx+1;
tempSlot2 <= xdata;
end
else
begin
if(refPicListCount<num_ref_idx_l0_active && tempSlot!=tempSlot2)
begin
tempSlot <= refPicList.sub(refPicListCount);
refPicList.upd(refPicListCount,tempSlot);
refPicListCount <= refPicListCount+1;
end
else
begin
reorderRefPicList <= False;
refPicListCount <= 0;
refIdx <= refIdx+1;
end
end
endrule
 
rule adjustingFreeSlots ( adjustFreeSlots != 0 );
if(adjustFreeSlots == 1)
begin
shortTermPicList.listAll();
longTermPicList.listAll();
freeSlots.init();
adjustFreeSlots <= 2;
$display( "Trace BufferControl: adjustingFreeSlots begin");
end
else
begin
if(shortTermPicList.resultSlot() matches tagged Valid .xdata)
begin
shortTermPicList.deq();
freeSlots.remove(xdata);
$display( "Trace BufferControl: adjustingFreeSlots shortTermPicList %h", xdata);
end
else if(longTermPicList.resultSlot() matches tagged Valid .xdata)
begin
longTermPicList.deq();
freeSlots.remove(xdata);
$display( "Trace BufferControl: adjustingFreeSlots longTermPicList %h", xdata);
end
else
begin
shortTermPicList.deq();
longTermPicList.deq();
adjustFreeSlots <= 0;
$display( "Trace BufferControl: adjustingFreeSlots end");
end
end
endrule
 
rule outputingReq ( outprocess != Idle );
if(outprocess==Y)
begin
loadReqQ1.enq(FBLoadReq (outAddrBase+zeroExtend(outReqCount)));
if(outReqCount == {1'b0,frameinmb,6'b000000}-1)
outprocess <= U;
outReqCount <= outReqCount+1;
end
else if(outprocess==U)
begin
loadReqQ1.enq(FBLoadReq (outAddrBase+zeroExtend(outReqCount)));
if(outReqCount == {1'b0,frameinmb,6'b000000}+{3'b000,frameinmb,4'b0000}-1)
outprocess <= V;
outReqCount <= outReqCount+1;
end
else
begin
//$display( "TRACE BufferControl: outputingReq V %h %h %h", outAddrBase, outReqCount, (outAddrBase+zeroExtend(outReqCount)));
loadReqQ1.enq(FBLoadReq (outAddrBase+zeroExtend(outReqCount)));
if(outReqCount == {1'b0,frameinmb,6'b000000}+{2'b00,frameinmb,5'b00000}-1)
outprocess <= Idle;
outReqCount <= outReqCount+1;
end
endrule
 
rule outputingResp ( !outputframedone );
if(loadRespQ1.first() matches tagged FBLoadResp .xdata)
begin
loadRespQ1.deq();
outfifo.enq(YUV xdata);
if(outRespCount == {1'b0,frameinmb,6'b000000}+{2'b00,frameinmb,5'b00000}-1)
outputframedone <= True;
outRespCount <= outRespCount+1;
end
endrule
 
 
rule goToNextFrame ( outputframedone && inputframedone && inLoadReqQ.first()==IPLoadEndFrame );
inputframedone <= False;
outprocess <= Y;
outputframedone <= False;
outSlot <= inSlot;
outAddrBase <= inAddrBase;
outReqCount <= 0;
outRespCount <= 0;
loadReqQ1.enq(FBEndFrameSync);
loadReqQ2.enq(FBEndFrameSync);
storeReqQ.enq(FBEndFrameSync);
inLoadReqQ.deq();
lockInterLoads <= True;
endrule
 
 
rule unlockInterLoads ( lockInterLoads && refPicListDone );
lockInterLoads <= False;
endrule
 
 
rule theEndOfFile ( outputframedone && noMoreInput );
outfifo.enq(EndOfFile);
endrule
 
 
rule interLumaReq ( inLoadReqQ.first() matches tagged IPLoadLuma .reqdata &&& !lockInterLoads );
inLoadReqQ.deq();
Bit#(5) slot = refPicList.sub(zeroExtend(reqdata.refIdx));
Bit#(FrameBufferSz) addrBase = (zeroExtend(slot)*zeroExtend(frameinmb)*3)<<5;
Bit#(TAdd#(PicAreaSz,6)) addr = {(zeroExtend(reqdata.ver)*zeroExtend(picWidth)),2'b00}+zeroExtend(reqdata.hor);
inLoadOutOfBounds.enq({reqdata.horOutOfBounds,(reqdata.hor==0 ? 0 : 1)});
loadReqQ2.enq(FBLoadReq (addrBase+zeroExtend(addr)));
//$display( "Trace BufferControl: interLumaReq %h %h %h %h %h", reqdata.refIdx, slot, addrBase, addr, addrBase+zeroExtend(addr));
endrule
 
 
rule interChromaReq ( inLoadReqQ.first() matches tagged IPLoadChroma .reqdata &&& !lockInterLoads );
inLoadReqQ.deq();
Bit#(5) slot = refPicList.sub(zeroExtend(reqdata.refIdx));
Bit#(FrameBufferSz) addrBase = (zeroExtend(slot)*zeroExtend(frameinmb)*3)<<5;
Bit#(TAdd#(PicAreaSz,6)) chromaOffset = {frameinmb,6'b000000};
Bit#(TAdd#(PicAreaSz,4)) vOffset = 0;
if(reqdata.uv == 1)
vOffset = {frameinmb,4'b0000};
Bit#(TAdd#(PicAreaSz,6)) addr = {(zeroExtend(reqdata.ver)*zeroExtend(picWidth)),1'b0}+zeroExtend(reqdata.hor);
inLoadOutOfBounds.enq({reqdata.horOutOfBounds,(reqdata.hor==0 ? 0 : 1)});
loadReqQ2.enq(FBLoadReq (addrBase+zeroExtend(chromaOffset)+zeroExtend(vOffset)+zeroExtend(addr)));
//$display( "Trace BufferControl: interChromaReq %h %h %h %h %h", reqdata.refIdx, slot, addrBase, addr, addrBase+zeroExtend(chromaOffset)+zeroExtend(vOffset)+zeroExtend(addr));
endrule
 
rule interResp ( loadRespQ2.first() matches tagged FBLoadResp .data );
loadRespQ2.deq();
if(inLoadOutOfBounds.first() == 2'b10)
inLoadRespQ.enq(IPLoadResp ({data[7:0],data[7:0],data[7:0],data[7:0]}));
else if(inLoadOutOfBounds.first() == 2'b11)
inLoadRespQ.enq(IPLoadResp ({data[31:24],data[31:24],data[31:24],data[31:24]}));
else
inLoadRespQ.enq(IPLoadResp data);
inLoadOutOfBounds.deq();
//$display( "Trace BufferControl: interResp %h %h", inLoadOutOfBounds.first(), data);
endrule
 
 
interface Put ioin = fifoToPut(infifo);
interface Get ioout = fifoToGet(outfifo);
interface Client buffer_client_load1;
interface Get request = fifoToGet(loadReqQ1);
interface Put response = fifoToPut(loadRespQ1);
endinterface
interface Client buffer_client_load2;
interface Get request = fifoToGet(loadReqQ2);
interface Put response = fifoToPut(loadRespQ2);
endinterface
interface Get buffer_client_store = fifoToGet(storeReqQ);
interface Server inter_server;
interface Put request = fifoToPut(inLoadReqQ);
interface Get response = fifoToGet(inLoadRespQ);
endinterface
 
endmodule
 
endpackage
/trunk/src/mkTH.bsv
0,0 → 1,68
//**********************************************************************
// H264 Test Bench
//----------------------------------------------------------------------
//
//
 
package mkTH;
 
import H264Types::*;
import IMemED::*;
import IFrameBuffer::*;
import IInputGen::*;
import IFinalOutput::*;
import IH264::*;
import mkMemED::*;
import mkFrameBuffer::*;
import mkInputGen::*;
import mkFinalOutput::*;
import mkH264::*;
 
import Connectable::*;
import GetPut::*;
import ClientServer::*;
 
(* synthesize *)
module mkTH( Empty );
 
// Instantiate the modules
 
IInputGen inputgen <- mkInputGen();
IH264 h264 <- mkH264();
IMemED#(TAdd#(PicWidthSz,1),20) memED <- mkMemED();
IMemED#(TAdd#(PicWidthSz,2),68) memP_intra <- mkMemED();
IMemED#(TAdd#(PicWidthSz,2),32) memP_inter <- mkMemED();
IMemED#(TAdd#(PicWidthSz,5),32) memD_data <- mkMemED();
IMemED#(PicWidthSz,13) memD_parameter <- mkMemED();
IFrameBuffer framebuffer <- mkFrameBuffer();
IFinalOutput finaloutput <- mkFinalOutput();
 
// Cycle counter
Reg#(Bit#(32)) cyclecount <- mkReg(0);
 
rule countCycles ( True );
if(cyclecount[4:0]==0) $display( "CCLCycleCount %0d", cyclecount );
cyclecount <= cyclecount+1;
if(cyclecount > 60000000)
begin
$display( "ERROR mkTH: time out" );
$finish(0);
end
endrule
// Internal connections
mkConnection( inputgen.ioout, h264.ioin );
mkConnection( h264.mem_clientED, memED.mem_server );
mkConnection( h264.mem_clientP_intra, memP_intra.mem_server );
mkConnection( h264.mem_clientP_inter, memP_inter.mem_server );
mkConnection( h264.mem_clientD_data, memD_data.mem_server );
mkConnection( h264.mem_clientD_parameter, memD_parameter.mem_server );
mkConnection( h264.buffer_client_load1, framebuffer.server_load1 );
mkConnection( h264.buffer_client_load2, framebuffer.server_load2 );
mkConnection( h264.buffer_client_store, framebuffer.server_store );
mkConnection( h264.ioout, finaloutput.ioin );
endmodule
 
endpackage
/trunk/src/mkInputGen_x264.bsv
0,0 → 1,41
//**********************************************************************
// Input Generator implementation
//----------------------------------------------------------------------
//
//
 
package mkInputGen;
 
import H264Types::*;
import IInputGen::*;
import RegFile::*;
import FIFO::*;
 
import Connectable::*;
import GetPut::*;
 
 
module mkInputGen( IInputGen );
 
RegFile#(Bit#(27), Bit#(8)) rfile <- mkRegFileLoad("x264foreman_qcif1-5.hex", 0, 7407);
FIFO#(InputGenOT) outfifo <- mkFIFO;
Reg#(Bit#(27)) index <- mkReg(0);
 
rule output_byte (index < 7408);
//$display( "ccl0inputbyte %x", rfile.sub(index) );
outfifo.enq(DataByte rfile.sub(index));
index <= index+1;
endrule
 
rule end_of_file (index == 7408);
//$finish(0);
outfifo.enq(EndOfFile);
endrule
interface Get ioout = fifoToGet(outfifo);
endmodule
 
 
endpackage
/trunk/src/mkH264.bsv
0,0 → 1,68
//**********************************************************************
// H264 Main Module
//----------------------------------------------------------------------
//
//
 
package mkH264;
 
import H264Types::*;
import IH264::*;
import INalUnwrap::*;
import IEntropyDec::*;
import IInverseTrans::*;
import IPrediction::*;
import IDeblockFilter::*;
import IBufferControl::*;
import mkNalUnwrap::*;
import mkEntropyDec::*;
import mkInverseTrans::*;
import mkPrediction::*;
import mkDeblockFilter::*;
import mkBufferControl::*;
 
import Connectable::*;
import GetPut::*;
import ClientServer::*;
 
(* synthesize *)
module mkH264( IH264 );
 
// Instantiate the modules
 
INalUnwrap nalunwrap <- mkNalUnwrap();
IEntropyDec entropydec <- mkEntropyDec();
IInverseTrans inversetrans <- mkInverseTrans();
IPrediction prediction <- mkPrediction();
IDeblockFilter deblockfilter <- mkDeblockFilter();
IBufferControl buffercontrol <- mkBufferControl();
 
// Internal connections
mkConnection( prediction.mem_client_buffer, buffercontrol.inter_server );
 
mkConnection( nalunwrap.ioout, entropydec.ioin );
mkConnection( entropydec.ioout_InverseTrans, inversetrans.ioin );
mkConnection( entropydec.ioout, prediction.ioin );
mkConnection( inversetrans.ioout, prediction.ioin_InverseTrans );
mkConnection( prediction.ioout, deblockfilter.ioin );
mkConnection( deblockfilter.ioout, buffercontrol.ioin );
 
// Interface to input generator
interface ioin = nalunwrap.ioin;
// Memory interfaces
interface mem_clientED = entropydec.mem_client;
interface mem_clientP_intra = prediction.mem_client_intra;
interface mem_clientP_inter = prediction.mem_client_inter;
interface mem_clientD_data = deblockfilter.mem_client_data;
interface mem_clientD_parameter = deblockfilter.mem_client_parameter;
interface buffer_client_load1 = buffercontrol.buffer_client_load1;
interface buffer_client_load2 = buffercontrol.buffer_client_load2;
interface buffer_client_store = buffercontrol.buffer_client_store;
 
// Interface for output
interface ioout = buffercontrol.ioout;
endmodule
 
endpackage
/trunk/src/mkInverseTrans.bsv
0,0 → 1,702
//**********************************************************************
// Inverse Quantizer and Inverse Transformer implementation
//----------------------------------------------------------------------
//
//
 
package mkInverseTrans;
 
import H264Types::*;
 
import IInverseTrans::*;
import FIFO::*;
import Vector::*;
 
import Connectable::*;
import GetPut::*;
import ClientServer::*;
 
 
//-----------------------------------------------------------
// Local Datatypes
//-----------------------------------------------------------
 
typedef union tagged
{
void Start; //not working on anything in particular
void Intra16x16DC;
void Intra16x16;
void ChromaDC;
void Chroma;
void Regular4x4;
}
State deriving(Eq,Bits);
 
typedef union tagged
{
void Passing; //not working on anything in particular
void LoadingDC;
void Scaling; //does not include scaling for DC (just loading in that case)
void TransformingDC;
void ScalingDC;
}
Process deriving(Eq,Bits);
 
typedef union tagged
{
void Invalid;
void Zeros;
Vector#(16,Bit#(16)) Values;
}
PipeType deriving(Eq,Bits);
 
//-----------------------------------------------------------
// Helper functions
 
function Bit#(6) qpi_to_qpc( Bit#(6) qpi );//mapping from qpi to qpc
case ( qpi )
30: return 29;
31: return 30;
32: return 31;
33: return 32;
34: return 32;
35: return 33;
36: return 34;
37: return 34;
38: return 35;
39: return 35;
40: return 36;
41: return 36;
42: return 37;
43: return 37;
44: return 37;
45: return 38;
46: return 38;
47: return 38;
48: return 39;
49: return 39;
50: return 39;
51: return 39;
default: return qpi;
endcase
endfunction
 
 
function Bit#(4) reverseInverseZigZagScan( Bit#(4) idx );
case ( idx )
0: return 15;
1: return 14;
2: return 11;
3: return 7;
4: return 10;
5: return 13;
6: return 12;
7: return 9;
8: return 6;
9: return 3;
10: return 2;
11: return 5;
12: return 8;
13: return 4;
14: return 1;
15: return 0;
endcase
endfunction
 
 
function Tuple2#(Bit#(4),Bit#(3)) qpdivmod6( Bit#(6) qp );
Bit#(6) tempqp = qp;
Bit#(4) tempdiv = 0;
for(Integer ii=5; ii>=2; ii=ii-1)
begin
if(tempqp >= (6'b000011 << (fromInteger(ii)-1)))
begin
tempqp = tempqp - (6'b000011 << (fromInteger(ii)-1));
tempdiv = tempdiv | (4'b0001 << (fromInteger(ii)-2));
end
end
return tuple2(tempdiv,truncate(tempqp));
endfunction
 
 
function Vector#(4,Bit#(16)) dcTransFunc( Bit#(16) in0, Bit#(16) in1, Bit#(16) in2, Bit#(16) in3 );
Vector#(4,Bit#(16)) resultVector = replicate(0);
resultVector[0] = in0 + in1 + in2 + in3;
resultVector[1] = in0 + in1 - in2 - in3;
resultVector[2] = in0 - in1 - in2 + in3;
resultVector[3] = in0 - in1 + in2 - in3;
return resultVector;
endfunction
 
 
function Vector#(4,Bit#(16)) transFunc( Bit#(16) in0, Bit#(16) in1, Bit#(16) in2, Bit#(16) in3 );
Vector#(4,Bit#(16)) resultVector = replicate(0);
Bit#(16) workValue0 = in0 + in2;
Bit#(16) workValue1 = in0 - in2;
Bit#(16) workValue2 = signedShiftRight(in1,1) - in3;
Bit#(16) workValue3 = in1 + signedShiftRight(in3,1);
resultVector[0] = workValue0 + workValue3;
resultVector[1] = workValue1 + workValue2;
resultVector[2] = workValue1 - workValue2;
resultVector[3] = workValue0 - workValue3;
return resultVector;
endfunction
 
 
//-----------------------------------------------------------
// Inverse Quantizer and Inverse Transformer Module
//-----------------------------------------------------------
 
 
(* synthesize *)
module mkInverseTrans( IInverseTrans );
 
FIFO#(EntropyDecOT_InverseTrans) infifo <- mkSizedFIFO(inverseTrans_infifo_size);
FIFO#(InverseTransOT) outfifo <- mkFIFO;
Reg#(Bit#(4)) blockNum <- mkReg(0);
Reg#(Bit#(4)) pixelNum <- mkReg(0);//also used as a regular counter during inverse transformation
Reg#(State) state <- mkReg(Start);
Reg#(Process) process <- mkReg(Passing);
 
Reg#(Bit#(5)) chroma_qp_index_offset <- mkReg(0);
Reg#(Bit#(6)) ppspic_init_qp <- mkReg(0);
Reg#(Bit#(6)) slice_qp <- mkReg(0);
Reg#(Bit#(6)) qpy <- mkReg(0);//Calculating it requires 8 bits, but value only 0 to 51
Reg#(Bit#(6)) qpc <- mkReg(0);
Reg#(Bit#(3)) qpymod6 <- mkReg(0);
Reg#(Bit#(3)) qpcmod6 <- mkReg(0);
Reg#(Bit#(4)) qpydiv6 <- mkReg(0);
Reg#(Bit#(4)) qpcdiv6 <- mkReg(0);
 
Reg#(Vector#(16,Bit#(16))) storeVector <- mkRegU();
Reg#(Vector#(16,Bit#(16))) workVector <- mkRegU();
Reg#(PipeType) work2Vector <- mkReg(Invalid);
Reg#(PipeType) work3Vector <- mkReg(Invalid);
Reg#(Bool) stage1Zeros <- mkReg(False);
Reg#(Bool) stage1Done <- mkReg(False);
Reg#(Bool) stage2Done <- mkReg(False);
Reg#(Bool) stage3Done <- mkReg(False);
Reg#(Bit#(3)) stage2Step <- mkReg(0);
Reg#(Bit#(2)) stage3Step <- mkReg(0);
 
 
//-----------------------------------------------------------
// Rules
 
rule passing (process==Passing && work2Vector==Invalid && (stage3Done || work3Vector==Invalid) );
//$display( "Trace Inverse Trans: passing infifo packed %h", pack(infifo.first()));
case (infifo.first()) matches
tagged NewUnit . xdata :
begin
infifo.deq();
$display("ccl3newunit");
$display("ccl3rbspbyte %h", xdata);
end
tagged SDMmbtype .xdata :
begin
infifo.deq();
$display( "INFO InverseTrans: SDMmbtype %0d", xdata);
if(mbPartPredMode(xdata,0) == Intra_16x16)
state <= Intra16x16DC;
else
state <= Regular4x4;
end
tagged PPSpic_init_qp .xdata :
begin
infifo.deq();
ppspic_init_qp <= truncate(xdata);
end
tagged SHslice_qp_delta .xdata :
begin
infifo.deq();
slice_qp <= ppspic_init_qp+truncate(xdata);
Bit#(6) qpynext = ppspic_init_qp+truncate(xdata);
qpy <= qpynext;
Bit#(7) qpitemp = zeroExtend(chroma_qp_index_offset+12) + zeroExtend(qpynext);
Bit#(6) qpi;
if(qpitemp < 12)
qpi = 0;
else if(qpitemp > 63)
qpi = 51;
else
qpi = truncate(qpitemp-12);
qpc <= qpi_to_qpc(qpi);
outfifo.enq(IBTmb_qp {qpy:qpynext,qpc:qpi_to_qpc(qpi)});
end
tagged SDMmb_qp_delta .xdata :
begin
infifo.deq();
Bit#(8) qpytemp = zeroExtend(qpy) + zeroExtend(xdata+52);
Bit#(6) qpynext;
if(qpytemp >= 104)
qpynext = truncate(qpytemp - 104);
else if(qpytemp >= 52)
qpynext = truncate(qpytemp - 52);
else
qpynext = truncate(qpytemp);
qpy <= qpynext;
//$display( "TRACE InverseTrans: qpy %0d", qpynext );
//$display( "TRACE InverseTrans: qpy %0d", qpynext );
Tuple2#(Bit#(4),Bit#(3)) temptuple = qpdivmod6(qpynext);
qpydiv6 <= tpl_1(temptuple);
qpymod6 <= tpl_2(temptuple);
//$display( "TRACE InverseTrans: qpydiv6 %0d", tpl_1(temptuple) );
//$display( "TRACE InverseTrans: qpymod6 %0d", tpl_2(temptuple) );
 
Bit#(7) qpitemp = zeroExtend(chroma_qp_index_offset+12) + zeroExtend(qpynext);
Bit#(6) qpi;
if(qpitemp < 12)
qpi = 0;
else if(qpitemp > 63)
qpi = 51;
else
qpi = truncate(qpitemp-12);
qpc <= qpi_to_qpc(qpi);
outfifo.enq(IBTmb_qp {qpy:qpynext,qpc:qpi_to_qpc(qpi)});
end
tagged PPSchroma_qp_index_offset .xdata :
begin
infifo.deq();
chroma_qp_index_offset <= xdata;
end
tagged SDMRcoeffLevelPlusZeros .xdata :
begin
blockNum <= 0;
pixelNum <= 0;
if(state == Intra16x16DC)
begin
$display( "INFO InverseTrans: 16x16 MB" );
process <= LoadingDC;
end
else
begin
$display( "INFO InverseTrans: Non-16x16 MB" );
process <= Scaling;
end
workVector <= replicate(0);
Tuple2#(Bit#(4),Bit#(3)) temptuple = qpdivmod6(qpc);
qpcdiv6 <= tpl_1(temptuple);
qpcmod6 <= tpl_2(temptuple);
end
tagged SDMRcoeffLevelZeros .xdata :
begin
blockNum <= 0;
pixelNum <= 0;
if(state == Intra16x16DC)
begin
$display( "INFO InverseTrans: 16x16 MB" );
process <= LoadingDC;
end
else
begin
$display( "INFO InverseTrans: Non-16x16 MB" );
process <= Scaling;
end
workVector <= replicate(0);
Tuple2#(Bit#(4),Bit#(3)) temptuple = qpdivmod6(qpc);
qpcdiv6 <= tpl_1(temptuple);
qpcmod6 <= tpl_2(temptuple);
end
default: infifo.deq();
endcase
endrule
 
 
rule loadingDC (process matches LoadingDC);
Vector#(16,Bit#(16)) workVectorTemp = workVector;
 
case (infifo.first()) matches
tagged SDMRcoeffLevelZeros .xdata :
begin
infifo.deq();
pixelNum <= pixelNum+truncate(xdata);
if((state==ChromaDC && zeroExtend(pixelNum)+xdata==8) || zeroExtend(pixelNum)+xdata==16)
process <= TransformingDC;
else if((state==ChromaDC && zeroExtend(pixelNum)+xdata>8) || zeroExtend(pixelNum)+xdata>16)
$display( "ERROR InverseTrans: loadingDC index overflow" );
end
tagged SDMRcoeffLevelPlusZeros .xdata :
begin
infifo.deq();
Bit#(16) workValue = signExtend(xdata.level);
if(state==ChromaDC)
begin
if(pixelNum<4)
workVector <= update(workVectorTemp, 3-pixelNum, workValue);
else
workVector <= update(workVectorTemp, 11-pixelNum, workValue);
end
else
workVector <= update(workVectorTemp, reverseInverseZigZagScan(pixelNum), workValue);
pixelNum <= pixelNum+1+truncate(xdata.zeros);
if((state==ChromaDC && zeroExtend(pixelNum)+1+xdata.zeros==8) || zeroExtend(pixelNum)+1+xdata.zeros==16)
process <= TransformingDC;
else if((state==ChromaDC && zeroExtend(pixelNum)+1+xdata.zeros>8) || zeroExtend(pixelNum)+1+xdata.zeros>16)
$display( "ERROR InverseTrans: loadingDC index overflow" );
end
default: process <= Passing;
endcase
endrule
 
 
rule transformingDC (process matches TransformingDC);
Vector#(16,Bit#(16)) workVectorTemp = workVector;
Vector#(16,Bit#(16)) workVectorNew = workVector;
Vector#(16,Bit#(16)) storeVectorTemp = storeVector;
 
if(state == ChromaDC)
begin
case ( pixelNum )
8:
begin
workVectorNew[0] = workVectorTemp[0] + workVectorTemp[2];
workVectorNew[1] = workVectorTemp[1] + workVectorTemp[3];
workVectorNew[2] = workVectorTemp[0] - workVectorTemp[2];
workVectorNew[3] = workVectorTemp[1] - workVectorTemp[3];
pixelNum <= pixelNum+1;
end
9:
begin
workVectorNew[0] = workVectorTemp[0] + workVectorTemp[1];
workVectorNew[1] = workVectorTemp[0] - workVectorTemp[1];
workVectorNew[2] = workVectorTemp[2] + workVectorTemp[3];
workVectorNew[3] = workVectorTemp[2] - workVectorTemp[3];
pixelNum <= pixelNum+1;
end
10:
begin
workVectorNew[4] = workVectorTemp[4] + workVectorTemp[6];
workVectorNew[5] = workVectorTemp[5] + workVectorTemp[7];
workVectorNew[6] = workVectorTemp[4] - workVectorTemp[6];
workVectorNew[7] = workVectorTemp[5] - workVectorTemp[7];
pixelNum <= pixelNum+1;
end
11:
begin
workVectorNew[4] = workVectorTemp[4] + workVectorTemp[5];
workVectorNew[5] = workVectorTemp[4] - workVectorTemp[5];
workVectorNew[6] = workVectorTemp[6] + workVectorTemp[7];
workVectorNew[7] = workVectorTemp[6] - workVectorTemp[7];
pixelNum <= 0;
process <= ScalingDC;
end
default:
$display( "ERROR InverseTrans: transformingDC ChromaDC unexpected pixelNum" );
endcase
workVector <= workVectorNew;
end
else if(state == Intra16x16DC)
begin
Vector#(4,Bit#(16)) resultVector = replicate(0);
if(pixelNum < 4)
begin
Bit#(4) tempIndex = zeroExtend(pixelNum[1:0]);
resultVector = dcTransFunc( workVectorTemp[tempIndex], workVectorTemp[tempIndex+4], workVectorTemp[tempIndex+8], workVectorTemp[tempIndex+12] );
for(Integer ii=0; ii<4; ii=ii+1)
workVectorNew[tempIndex+fromInteger(ii*4)] = resultVector[ii];
end
else if(pixelNum < 8)
begin
Bit#(4) tempIndex = {pixelNum[1:0],2'b00};
resultVector = dcTransFunc( workVectorTemp[tempIndex], workVectorTemp[tempIndex+1], workVectorTemp[tempIndex+2], workVectorTemp[tempIndex+3] );
for(Integer ii=0; ii<4; ii=ii+1)
workVectorNew[tempIndex+fromInteger(ii)] = resultVector[ii];
end
else
$display( "ERROR InverseTrans: transforming Intra16x16DC unexpected pixelNum" );
workVector <= workVectorNew;
if(pixelNum == 7)
begin
pixelNum <= 0;
process <= ScalingDC;
end
else
pixelNum <= pixelNum+1;
end
else
$display( "ERROR InverseTrans: transformingDC unexpected state" );
endrule
 
rule scalingDC (process matches ScalingDC);
Bit#(6) qp;
Bit#(4) qpdiv6;
Bit#(3) qpmod6;
Bit#(6) workOne = 1;
Bit#(16) workValue;
Bit#(22) storeValueTemp;
Bit#(16) storeValue;
Vector#(16,Bit#(16)) workVectorTemp = workVector;
Vector#(16,Bit#(16)) storeVectorTemp = storeVector;
 
if(state==ChromaDC)
begin
qp = qpc;
qpdiv6 = qpcdiv6;
qpmod6 = qpcmod6;
end
else
begin
qp = qpy;
qpdiv6 = qpydiv6;
qpmod6 = qpymod6;
end
workValue = select(workVectorTemp, pixelNum);
Bit#(5) levelScaleValue=0;
case(qpmod6)
0: levelScaleValue = 10;
1: levelScaleValue = 11;
2: levelScaleValue = 13;
3: levelScaleValue = 14;
4: levelScaleValue = 16;
5: levelScaleValue = 18;
default: $display( "ERROR InverseTrans: scalingDC levelScaleGen case default" );
endcase
storeValueTemp = zeroExtend(levelScaleValue)*signExtend(workValue);
if(state==ChromaDC)
storeValue = truncate( (storeValueTemp << zeroExtend(qpdiv6)) >> 1 );
else
begin
if(qp >= 36)
storeValue = truncate( storeValueTemp << zeroExtend(qpdiv6 - 2) );
else
storeValue = truncate( ((storeValueTemp << 4) + zeroExtend(workOne << zeroExtend(5-qpdiv6))) >> zeroExtend(6 - qpdiv6) );
end
storeVector <= update(storeVectorTemp, pixelNum, storeValue);
if((state==ChromaDC && pixelNum==7) || pixelNum==15)
begin
blockNum <= 0;
pixelNum <= 0;
workVector <= replicate(0);
if(state==ChromaDC)
state <= Chroma;
else
state <= Intra16x16;
process <= Scaling;
end
else if((state==ChromaDC && pixelNum>7) || pixelNum>15)
$display( "ERROR InverseTrans: scalingDC index overflow" );
else
pixelNum <= pixelNum+1;
endrule
 
 
rule switching ( (stage1Done && work2Vector==Invalid) || (stage2Done && (stage3Done || work3Vector==Invalid)) );
Bool switch2to3 = False;
if(stage2Done && (stage3Done || work3Vector==Invalid))
begin
switch2to3 = True;
work3Vector <= work2Vector;
stage3Done <= False;
end
if(stage1Done && (switch2to3 || work2Vector==Invalid))
begin
Vector#(16,Bit#(16)) workVectorTemp = workVector;
if(state==Intra16x16)
workVectorTemp[0] = storeVector[{blockNum[3],blockNum[1],blockNum[2],blockNum[0]}];
else if(state==Chroma)
workVectorTemp[0] = storeVector[blockNum];
if(stage1Zeros)
work2Vector <= Zeros;
else
work2Vector <= (Values workVectorTemp);
stage1Zeros <= False;
stage1Done <= False;
workVector <= replicate(0);
if(state==Chroma)
begin
if(blockNum<7)
blockNum <= blockNum+1;
else if (blockNum==7)
begin
blockNum <= 0;
process <= Passing;
end
else
$display( "ERROR InverseTrans: switching chroma unexpected blockNum" );
end
else
begin
blockNum <= blockNum+1;
if(blockNum==15)
begin
state <= ChromaDC;
process <= LoadingDC;
end
else
process <= Scaling;
end
end
else //switch2to3==True
work2Vector <= Invalid;
stage2Done <= False;
endrule
 
 
rule scaling (process==Scaling && !stage1Done );
Vector#(16,Bit#(16)) workVectorTemp = workVector;
Vector#(16,Bit#(16)) storeVectorTemp = storeVector;
 
case (infifo.first()) matches
tagged SDMRcoeffLevelZeros .xdata :
begin
infifo.deq();
if(zeroExtend(pixelNum)+xdata==16 || (zeroExtend(pixelNum)+xdata==15 && (state==Chroma || state==Intra16x16)))
begin
Bit#(16) prevValue0=0;
if(state==Intra16x16)
prevValue0 = select(storeVectorTemp, {blockNum[3],blockNum[1],blockNum[2],blockNum[0]});
else if(state==Chroma)
prevValue0 = select(storeVectorTemp, blockNum);
if(xdata==16 || (xdata==15 && (state==Chroma || state==Intra16x16) && prevValue0==0))
stage1Zeros <= True;
stage1Done <= True;
pixelNum <= 0;
end
else if(zeroExtend(pixelNum)+xdata>16 || (zeroExtend(pixelNum)+xdata>15 && (state==Chroma || state==Intra16x16)))
$display( "ERROR InverseTrans: scaling index overflow" );
else
pixelNum <= pixelNum+truncate(xdata);
//$display( "TRACE InverseTrans: coeff zeros %0d", xdata );
end
tagged SDMRcoeffLevelPlusZeros .xdata :
begin
infifo.deq();
Bit#(6) qp;
Bit#(4) qpdiv6;
Bit#(3) qpmod6;
if(state==Chroma)
begin
qp = qpc;
qpdiv6 = qpcdiv6;
qpmod6 = qpcmod6;
end
else
begin
qp = qpy;
qpdiv6 = qpydiv6;
qpmod6 = qpymod6;
end
Bit#(5) levelScaleValue=0;
if(pixelNum==15 || pixelNum==12 || pixelNum==10 || pixelNum==4)
begin
case(qpmod6)
0: levelScaleValue = 10;
1: levelScaleValue = 11;
2: levelScaleValue = 13;
3: levelScaleValue = 14;
4: levelScaleValue = 16;
5: levelScaleValue = 18;
default: $display( "ERROR InverseTrans: levelScaleGen case default" );
endcase
end
else if(pixelNum==11 || pixelNum==5 || pixelNum==3 || pixelNum==0)
begin
case(qpmod6)
0: levelScaleValue = 16;
1: levelScaleValue = 18;
2: levelScaleValue = 20;
3: levelScaleValue = 23;
4: levelScaleValue = 25;
5: levelScaleValue = 29;
default: $display( "ERROR InverseTrans: levelScaleGen case default" );
endcase
end
else
begin
case(qpmod6)
0: levelScaleValue = 13;
1: levelScaleValue = 14;
2: levelScaleValue = 16;
3: levelScaleValue = 18;
4: levelScaleValue = 20;
5: levelScaleValue = 23;
default: $display( "ERROR InverseTrans: levelScaleGen case default" );
endcase
end
Bit#(16) workValueTemp = zeroExtend(levelScaleValue)*signExtend(xdata.level);
Bit#(16) workValue;
workValue = workValueTemp << zeroExtend(qpdiv6);
workVector <= update(workVectorTemp, reverseInverseZigZagScan(pixelNum), workValue);
if(zeroExtend(pixelNum)+1+xdata.zeros==16 || (zeroExtend(pixelNum)+1+xdata.zeros==15 && (state==Chroma || state==Intra16x16)))
begin
stage1Done <= True;
pixelNum <= 0;
end
else if(zeroExtend(pixelNum)+1+xdata.zeros>16 || (zeroExtend(pixelNum)+1+xdata.zeros>15 && (state==Chroma || state==Intra16x16)))
$display( "ERROR InverseTrans: scaling index overflow" );
else
pixelNum <= pixelNum+1+truncate(xdata.zeros);
end
default: process <= Passing;
endcase
endrule
 
 
rule transforming ( work2Vector!=Invalid && !stage2Done );
if(work2Vector matches tagged Values .xdata)
begin
Vector#(16,Bit#(16)) work2VectorNew = xdata;
if(stage2Step < 4)
begin
Bit#(4) tempIndex = {stage2Step[1:0],2'b00};
Vector#(4,Bit#(16)) resultVector = transFunc( xdata[tempIndex], xdata[tempIndex+1], xdata[tempIndex+2], xdata[tempIndex+3] );
for(Integer ii=0; ii<4; ii=ii+1)
work2VectorNew[tempIndex+fromInteger(ii)] = resultVector[ii];
end
else
begin
Bit#(4) tempIndex = zeroExtend(stage2Step[1:0]);
Vector#(4,Bit#(16)) resultVector = transFunc( xdata[tempIndex], xdata[tempIndex+4], xdata[tempIndex+8], xdata[tempIndex+12] );
for(Integer ii=0; ii<4; ii=ii+1)
work2VectorNew[tempIndex+fromInteger(ii*4)] = resultVector[ii];
end
work2Vector <= (Values work2VectorNew);
if(stage2Step == 7)
stage2Done <= True;
stage2Step <= stage2Step+1;
end
else //All Zeros
stage2Done <= True;
endrule
 
 
rule outputing ( work3Vector!=Invalid && !stage3Done );
if(work3Vector matches tagged Values .xdata)
begin
Vector#(4,Bit#(10)) outputVector = replicate(0);
for(Integer ii=0; ii<4; ii=ii+1)
outputVector[ii] = truncate((xdata[{stage3Step,2'b00}+fromInteger(ii)]+32) >> 6);
outfifo.enq(ITBresidual outputVector);
Int#(10) tempint = unpack(outputVector[0]);
$display("ccl3IBTresidual %0d", tempint);
tempint = unpack(outputVector[1]);
$display("ccl3IBTresidual %0d", tempint);
tempint = unpack(outputVector[2]);
$display("ccl3IBTresidual %0d", tempint);
tempint = unpack(outputVector[3]);
$display("ccl3IBTresidual %0d", tempint);
if(stage3Step == 3)
stage3Done <= True;
stage3Step <= stage3Step+1;
end
else
begin
outfifo.enq(ITBcoeffLevelZeros);
stage3Done <= True;
end
endrule
 
 
 
interface Put ioin = fifoToPut(infifo);
interface Get ioout = fifoToGet(outfifo);
 
endmodule
 
endpackage
/trunk/src/mkDeblockFilter_dummy.bsv
0,0 → 1,192
//**********************************************************************
// Deblocking Filter
//----------------------------------------------------------------------
//
//
 
package mkDeblockFilter;
 
import H264Types::*;
 
import IDeblockFilter::*;
import FIFO::*;
import Vector::*;
 
import Connectable::*;
import GetPut::*;
import ClientServer::*;
 
 
 
 
//-----------------------------------------------------------
// Local Datatypes
//-----------------------------------------------------------
 
 
 
 
//-----------------------------------------------------------
// Helper functions
 
 
 
 
//-----------------------------------------------------------
// Deblocking Filter Module
//-----------------------------------------------------------
 
 
(* synthesize *)
module mkDeblockFilter( IDeblockFilter );
 
FIFO#(EntropyDecOT) infifo <- mkFIFO();
FIFO#(DeblockFilterOT) outfifo <- mkFIFO();
 
FIFO#(MemReq#(TAdd#(PicWidthSz,5),32)) dataMemReqQ <- mkSizedFIFO(1);
FIFO#(MemReq#(PicWidthSz,13)) parameterMemReqQ <- mkSizedFIFO(1);
FIFO#(MemResp#(32)) dataMemRespQ <- mkSizedFIFO(1);
FIFO#(MemResp#(13)) parameterMemRespQ <- mkSizedFIFO(1);
 
Reg#(Bit#(1)) chromaFlag <- mkReg(0);
Reg#(Bit#(4)) blockNum <- mkReg(0);
Reg#(Bit#(4)) pixelNum <- mkReg(0);
 
Reg#(Bit#(PicWidthSz)) picWidth <- mkReg(maxPicWidthInMB);
Reg#(Bit#(PicHeightSz)) picHeight <- mkReg(0);
Reg#(Bit#(PicAreaSz)) firstMb <- mkReg(0);
Reg#(Bit#(PicAreaSz)) currMb <- mkReg(0);
Reg#(Bit#(PicAreaSz)) currMbHor <- mkReg(0);//horizontal position of currMb
Reg#(Bit#(PicHeightSz)) currMbVer <- mkReg(0);//vertical position of currMb
 
Vector#(3,Reg#(Bit#(8))) tempinput <- replicateM(mkRegU);
 
Reg#(Bool) endOfFrame <- mkReg(False);
 
 
//-----------------------------------------------------------
// Rules
rule passing (currMbHor<zeroExtend(picWidth) && !endOfFrame);
//$display( "Trace Deblocking Filter: passing infifo packed %h", pack(infifo.first()));
case (infifo.first()) matches
tagged NewUnit . xdata :
begin
infifo.deq();
outfifo.enq(EDOT infifo.first());
$display("ccl5newunit");
$display("ccl5rbspbyte %h", xdata);
end
tagged SPSpic_width_in_mbs .xdata :
begin
infifo.deq();
outfifo.enq(EDOT infifo.first());
picWidth <= xdata;
end
tagged SPSpic_height_in_map_units .xdata :
begin
infifo.deq();
outfifo.enq(EDOT infifo.first());
picHeight <= xdata;
end
tagged SHfirst_mb_in_slice .xdata :
begin
infifo.deq();
outfifo.enq(EDOT infifo.first());
firstMb <= xdata;
currMb <= xdata;
currMbHor <= xdata;
currMbVer <= 0;
end
tagged PBoutput .xdata :
begin
infifo.deq();
Bit#(2) blockHor = {blockNum[2],blockNum[0]};
Bit#(2) blockVer = {blockNum[3],blockNum[1]};
Bit#(2) pixelHor = {pixelNum[1],pixelNum[0]};
Bit#(2) pixelVer = {pixelNum[3],pixelNum[2]};
Bit#(PicWidthSz) currMbHorT = truncate(currMbHor);
Bit#(32) pixelq = {xdata[3],xdata[2],xdata[1],xdata[0]};
if(chromaFlag==0)
outfifo.enq(DFBLuma {ver:{currMbVer,blockVer,pixelVer},hor:{currMbHorT,blockHor},data:pixelq});
else
outfifo.enq(DFBChroma {uv:blockHor[1],ver:{currMbVer,blockVer[0],pixelVer},hor:{currMbHorT,blockHor[0]},data:pixelq});
if(pixelNum == 12)
begin
pixelNum <= 0;
if(blockNum == 15)
begin
blockNum <= 0;
chromaFlag <= 1;
end
else if(blockNum==7 && chromaFlag==1)
begin
blockNum <= 0;
chromaFlag <= 0;
currMb <= currMb+1;
currMbHor <= currMbHor+1;
if(currMbVer==picHeight-1 && currMbHor==zeroExtend(picWidth-1))
endOfFrame <= True;
end
else
blockNum <= blockNum+1;
end
else
pixelNum <= pixelNum+4;
//$display( "Trace Deblocking Filter: passing PBoutput %h %h %h %h", blockNum, pixelNum, pixelHor, xdata);
end
tagged EndOfFile :
begin
infifo.deq();
outfifo.enq(EDOT infifo.first());
$display( "ccl5: EndOfFile reached");
//$finish(0);
end
default:
begin
infifo.deq();
outfifo.enq(EDOT infifo.first());
end
endcase
endrule
 
 
rule currMbHorUpdate( !(currMbHor<zeroExtend(picWidth)) && !endOfFrame);
Bit#(PicAreaSz) temp = zeroExtend(picWidth);
if((currMbHor >> 3) >= temp)
begin
currMbHor <= currMbHor - (temp << 3);
currMbVer <= currMbVer + 8;
end
else
begin
currMbHor <= currMbHor - temp;
currMbVer <= currMbVer + 1;
end
//$display( "Trace Deblocking Filter: currMbHorUpdate %h %h", currMbHor, currMbVer);
endrule
 
 
rule outputEndOfFrame(endOfFrame);
outfifo.enq(EndOfFrame);
endOfFrame <= False;
//$display( "Trace Deblocking Filter: outputEndOfFrame %h", pack(infifo.first()));
endrule
interface Client mem_client_data;
interface Get request = fifoToGet(dataMemReqQ);
interface Put response = fifoToPut(dataMemRespQ);
endinterface
 
interface Client mem_client_parameter;
interface Get request = fifoToGet(parameterMemReqQ);
interface Put response = fifoToPut(parameterMemRespQ);
endinterface
 
interface Put ioin = fifoToPut(infifo);
interface Get ioout = fifoToGet(outfifo);
endmodule
 
endpackage
/trunk/src/mkMemED_regfile.bsv
0,0 → 1,52
//**********************************************************************
// Memory for Entropy Decoder
//----------------------------------------------------------------------
//
//
//
 
package mkMemED;
 
import H264Types::*;
import IMemED::*;
import RegFile::*;
import GetPut::*;
import ClientServer::*;
import FIFO::*;
 
 
//----------------------------------------------------------------------
// Main module
//----------------------------------------------------------------------
 
module mkMemED(IMemED#(index_size,data_size))
provisos (Bits#(MemReq#(index_size,data_size),mReqLen),
Bits#(MemResp#(data_size),mRespLen));
 
//-----------------------------------------------------------
// State
 
RegFile#(Bit#(index_size),Bit#(data_size)) rfile <- mkRegFileFull();
FIFO#(MemReq#(index_size,data_size)) reqQ <- mkFIFO();
FIFO#(MemResp#(data_size)) respQ <- mkFIFO();
rule storing ( reqQ.first() matches tagged StoreReq { addr:.addrt,data:.datat} );
rfile.upd(addrt,datat);
reqQ.deq();
endrule
 
rule reading ( reqQ.first() matches tagged LoadReq .addrt );
respQ.enq( LoadResp rfile.sub(addrt) );
reqQ.deq();
endrule
interface Server mem_server;
interface Put request = fifoToPut(reqQ);
interface Get response = fifoToGet(respQ);
endinterface
 
 
endmodule
 
endpackage
/trunk/src/mkInputGen_news.bsv
0,0 → 1,41
//**********************************************************************
// Input Generator implementation
//----------------------------------------------------------------------
//
//
 
package mkInputGen;
 
import H264Types::*;
import IInputGen::*;
import RegFile::*;
import FIFO::*;
 
import Connectable::*;
import GetPut::*;
 
 
module mkInputGen( IInputGen );
 
RegFile#(Bit#(27), Bit#(8)) rfile <- mkRegFileLoad("news_cif1-5.hex", 0, 17225);
FIFO#(InputGenOT) outfifo <- mkFIFO;
Reg#(Bit#(27)) index <- mkReg(0);
 
rule output_byte (index < 17226);
//$display( "ccl0inputbyte %x", rfile.sub(index) );
outfifo.enq(DataByte rfile.sub(index));
index <= index+1;
endrule
 
rule end_of_file (index == 17226);
//$finish(0);
outfifo.enq(EndOfFile);
endrule
interface Get ioout = fifoToGet(outfifo);
endmodule
 
 
endpackage
/trunk/src/mkMemED.bsv
0,0 → 1,52
//**********************************************************************
// Memory for Entropy Decoder
//----------------------------------------------------------------------
//
//
//
 
package mkMemED;
 
import H264Types::*;
import IMemED::*;
import RegFile::*;
import GetPut::*;
import ClientServer::*;
import FIFO::*;
 
 
//----------------------------------------------------------------------
// Main module
//----------------------------------------------------------------------
 
module mkMemED(IMemED#(index_size,data_size))
provisos (Bits#(MemReq#(index_size,data_size),mReqLen),
Bits#(MemResp#(data_size),mRespLen));
 
//-----------------------------------------------------------
// State
 
RegFile#(Bit#(index_size),Bit#(data_size)) rfile <- mkRegFileFull();
FIFO#(MemReq#(index_size,data_size)) reqQ <- mkFIFO();
FIFO#(MemResp#(data_size)) respQ <- mkFIFO();
rule storing ( reqQ.first() matches tagged StoreReq { addr:.addrt,data:.datat} );
rfile.upd(addrt,datat);
reqQ.deq();
endrule
 
rule reading ( reqQ.first() matches tagged LoadReq .addrt );
respQ.enq( LoadResp rfile.sub(addrt) );
reqQ.deq();
endrule
interface Server mem_server;
interface Put request = fifoToPut(reqQ);
interface Get response = fifoToGet(respQ);
endinterface
 
 
endmodule
 
endpackage
/trunk/src/mkInputGen.bsv
0,0 → 1,41
//**********************************************************************
// Input Generator implementation
//----------------------------------------------------------------------
//
//
 
package mkInputGen;
 
import H264Types::*;
import IInputGen::*;
import RegFile::*;
import FIFO::*;
 
import Connectable::*;
import GetPut::*;
 
 
module mkInputGen( IInputGen );
 
RegFile#(Bit#(27), Bit#(8)) rfile <- mkRegFileLoad("720p50_parkrun_ter1-20inter.hex", 0, 2282510);
FIFO#(InputGenOT) outfifo <- mkFIFO;
Reg#(Bit#(27)) index <- mkReg(0);
 
rule output_byte (index < 2282511);
//$display( "ccl0inputbyte %x", rfile.sub(index) );
outfifo.enq(DataByte rfile.sub(index));
index <= index+1;
endrule
 
rule end_of_file (index == 2282511);
//$finish(0);
outfifo.enq(EndOfFile);
endrule
interface Get ioout = fifoToGet(outfifo);
endmodule
 
 
endpackage
/trunk/src/mkInterpolator_4stage.bsv
0,0 → 1,844
//**********************************************************************
// interpolator implementation
//----------------------------------------------------------------------
//
//
 
package mkInterpolator;
 
import H264Types::*;
import IInterpolator::*;
import FIFO::*;
import Vector::*;
 
import Connectable::*;
import GetPut::*;
import ClientServer::*;
 
 
//-----------------------------------------------------------
// Local Datatypes
//-----------------------------------------------------------
 
typedef union tagged
{
struct { Bit#(2) xFracL; Bit#(2) yFracL; Bit#(2) offset; IPBlockType bt; } IPWLuma;
struct { Bit#(3) xFracC; Bit#(3) yFracC; Bit#(2) offset; IPBlockType bt; } IPWChroma;
}
InterpolatorWT deriving(Eq,Bits);
 
 
//-----------------------------------------------------------
// Helper functions
 
function Bit#(8) clip1y10to8( Bit#(10) innum );
if(innum[9] == 1)
return 0;
else if(innum[8] == 1)
return 255;
else
return truncate(innum);
endfunction
 
function Bit#(15) interpolate8to15( Bit#(8) in0, Bit#(8) in1, Bit#(8) in2, Bit#(8) in3, Bit#(8) in4, Bit#(8) in5 );
return zeroExtend(in0) - 5*zeroExtend(in1) + 20*zeroExtend(in2) + 20*zeroExtend(in3) - 5*zeroExtend(in4) + zeroExtend(in5);
endfunction
 
function Bit#(8) interpolate15to8( Bit#(15) in0, Bit#(15) in1, Bit#(15) in2, Bit#(15) in3, Bit#(15) in4, Bit#(15) in5 );
Bit#(20) temp = signExtend(in0) - 5*signExtend(in1) + 20*signExtend(in2) + 20*signExtend(in3) - 5*signExtend(in4) + signExtend(in5) + 512;
return clip1y10to8(truncate(temp>>10));
endfunction
 
 
 
//-----------------------------------------------------------
// Interpolation Module
//-----------------------------------------------------------
 
 
(* synthesize *)
module mkInterpolator( Interpolator );
FIFO#(InterpolatorIT) reqfifoLoad <- mkSizedFIFO(interpolator_reqfifoLoad_size);
FIFO#(InterpolatorWT) reqfifoWork1 <- mkSizedFIFO(interpolator_reqfifoWork_size);
Reg#(Maybe#(InterpolatorWT)) reqregWork2 <- mkReg(Invalid);
FIFO#(Vector#(4,Bit#(8))) outfifo <- mkFIFO;
Reg#(Bool) endOfFrameFlag <- mkReg(False);
FIFO#(InterpolatorLoadReq) memReqQ <- mkFIFO;
FIFO#(InterpolatorLoadResp) memRespQ <- mkSizedFIFO(interpolator_memRespQ_size);
 
Reg#(Bit#(PicWidthSz)) picWidth <- mkReg(maxPicWidthInMB);
Reg#(Bit#(PicHeightSz)) picHeight <- mkReg(0);
 
RFile1#(Bit#(6),Vector#(4,Bit#(15))) workFile <- mkRFile1Full();
RFile1#(Bit#(6),Vector#(4,Bit#(8))) storeFile <- mkRFile1Full();
Reg#(Bit#(1)) workFileFlag <- mkReg(0);
RFile1#(Bit#(4),Vector#(4,Bit#(8))) resultFile <- mkRFile1Full();
 
Reg#(Bit#(1)) loadStage <- mkReg(0);
Reg#(Bit#(2)) loadHorNum <- mkReg(0);
Reg#(Bit#(4)) loadVerNum <- mkReg(0);
 
Reg#(Bit#(2)) work1MbPart <- mkReg(0);//only for Chroma
Reg#(Bit#(2)) work1SubMbPart <- mkReg(0);//only for Chroma
Reg#(Bit#(1)) work1Stage <- mkReg(0);
Reg#(Bit#(2)) work1HorNum <- mkReg(0);
Reg#(Bit#(4)) work1VerNum <- mkReg(0);
Reg#(Vector#(20,Bit#(8))) work1Vector8 <- mkRegU;
Reg#(Bool) work1Done <- mkReg(False);
 
Reg#(Bit#(2)) work2SubMbPart <- mkReg(0);
Reg#(Bit#(2)) work2HorNum <- mkReg(0);
Reg#(Bit#(4)) work2VerNum <- mkReg(0);
Reg#(Vector#(20,Bit#(8))) work2Vector8 <- mkRegU;
Reg#(Vector#(20,Bit#(15))) work2Vector15 <- mkRegU;
Reg#(Vector#(4,Bit#(1))) resultReady <- mkRegU;
Reg#(Bool) work2Done <- mkReg(False);
Reg#(Bool) work8x8Done <- mkReg(False);
 
Reg#(Bit#(2)) outBlockNum <- mkReg(0);
Reg#(Bit#(2)) outPixelNum <- mkReg(0);
Reg#(Bool) outDone <- mkReg(False);
 
 
rule sendEndOfFrameReq( endOfFrameFlag );
endOfFrameFlag <= False;
memReqQ.enq(IPLoadEndFrame);
endrule
rule loadLuma( reqfifoLoad.first() matches tagged IPLuma .reqdata &&& !endOfFrameFlag );
Bit#(2) xfracl = reqdata.mvhor[1:0];
Bit#(2) yfracl = reqdata.mvver[1:0];
Bit#(2) offset = reqdata.mvhor[3:2];
Bool twoStage = (xfracl==1||xfracl==3) && (yfracl==1||yfracl==3);
Bool horInter = (twoStage ? loadStage==1 : xfracl!=0);
Bool verInter = (twoStage ? loadStage==0 : yfracl!=0);
Bit#(2) offset2 = reqdata.mvhor[3:2] + ((twoStage&&verInter&&xfracl==3) ? 1 : 0);
Bit#(1) horOut = 0;
Bit#(TAdd#(PicWidthSz,2)) horAddr;
Bit#(TAdd#(PicHeightSz,4)) verAddr;
Bit#(TAdd#(PicWidthSz,12)) horTemp = zeroExtend({reqdata.hor,2'b00}) + zeroExtend({loadHorNum,2'b00}) + (xfracl==3&&(yfracl==1||yfracl==3)&&loadStage==0 ? 1 : 0);
Bit#(TAdd#(PicHeightSz,10)) verTemp = zeroExtend(reqdata.ver) + zeroExtend(loadVerNum) + (yfracl==3&&(xfracl==1||xfracl==3)&&loadStage==1 ? 1 : 0);
Bit#(13) mvhortemp = signExtend(reqdata.mvhor[13:2])-(horInter?2:0);
Bit#(11) mvvertemp = signExtend(reqdata.mvver[11:2])-(verInter?2:0);
if(mvhortemp[12]==1 && zeroExtend(0-mvhortemp)>horTemp)
begin
horAddr = 0;
horOut = 1;
end
else
begin
horTemp = horTemp + signExtend(mvhortemp);
if(horTemp>=zeroExtend({picWidth,4'b0000}))
begin
horAddr = {picWidth-1,2'b11};
horOut = 1;
end
else
horAddr = truncate(horTemp>>2);
end
if(mvvertemp[10]==1 && zeroExtend(0-mvvertemp)>verTemp)
verAddr = 0;
else
begin
verTemp = verTemp + signExtend(mvvertemp);
if(verTemp>=zeroExtend({picHeight,4'b0000}))
verAddr = {picHeight-1,4'b1111};
else
verAddr = truncate(verTemp);
end
memReqQ.enq(IPLoadLuma {refIdx:reqdata.refIdx,horOutOfBounds:horOut,hor:horAddr,ver:verAddr});
Bool verFirst = twoStage || (yfracl==2&&(xfracl==1||xfracl==3));
Bit#(2) loadHorNumMax = (reqdata.bt==IP8x8||reqdata.bt==IP8x4 ? 1 : 0) + (horInter ? 2 : (offset2==0 ? 0 : 1));
Bit#(4) loadVerNumMax = (reqdata.bt==IP8x8||reqdata.bt==IP4x8 ? 7 : 3) + (verInter ? 5 : 0);
if(verFirst)
begin
if(loadVerNum < loadVerNumMax)
loadVerNum <= loadVerNum+1;
else
begin
loadVerNum <= 0;
if(loadHorNum < loadHorNumMax)
begin
if(loadStage == 1)
begin
offset = offset + (xfracl==3 ? 1 : 0);
if(!(offset==1 || (xfracl==3 && offset==2)))
loadHorNum <= loadHorNumMax;
else
begin
loadHorNum <= 0;
loadStage <= 0;
reqfifoLoad.deq();
end
end
else
loadHorNum <= loadHorNum+1;
end
else
begin
if(twoStage && loadStage==0)
begin
offset = offset + (xfracl==3 ? 1 : 0);
if((xfracl==3 ? offset<3 : offset<2))
loadHorNum <= 0;
else
loadHorNum <= loadHorNumMax+1;
loadStage <= 1;
end
else
begin
loadHorNum <= 0;
loadStage <= 0;
reqfifoLoad.deq();
end
end
end
end
else
begin
if(loadHorNum < loadHorNumMax)
loadHorNum <= loadHorNum+1;
else
begin
loadHorNum <= 0;
if(loadVerNum < loadVerNumMax)
loadVerNum <= loadVerNum+1;
else
begin
loadVerNum <= 0;
reqfifoLoad.deq();
end
end
end
if(reqdata.bt==IP16x16 || reqdata.bt==IP16x8 || reqdata.bt==IP8x16)
$display( "ERROR Interpolation: loadLuma block sizes > 8x8 not supported");
//$display( "Trace interpolator: loadLuma %h %h %h %h %h %h %h", xfracl, yfracl, loadHorNum, loadVerNum, reqdata.refIdx, horAddr, verAddr);
endrule
 
 
rule loadChroma( reqfifoLoad.first() matches tagged IPChroma .reqdata &&& !endOfFrameFlag );
Bit#(3) xfracc = reqdata.mvhor[2:0];
Bit#(3) yfracc = reqdata.mvver[2:0];
Bit#(2) offset = reqdata.mvhor[4:3]+{reqdata.hor[0],1'b0};
Bit#(1) horOut = 0;
Bit#(TAdd#(PicWidthSz,1)) horAddr;
Bit#(TAdd#(PicHeightSz,3)) verAddr;
Bit#(TAdd#(PicWidthSz,11)) horTemp = zeroExtend({reqdata.hor,1'b0}) + zeroExtend({loadHorNum,2'b00});
Bit#(TAdd#(PicHeightSz,9)) verTemp = zeroExtend(reqdata.ver) + zeroExtend(loadVerNum);
if(reqdata.mvhor[13]==1 && zeroExtend(0-reqdata.mvhor[13:3])>horTemp)
begin
horAddr = 0;
horOut = 1;
end
else
begin
horTemp = horTemp + signExtend(reqdata.mvhor[13:3]);
if(horTemp>=zeroExtend({picWidth,3'b000}))
begin
horAddr = {picWidth-1,1'b1};
horOut = 1;
end
else
horAddr = truncate(horTemp>>2);
end
if(reqdata.mvver[11]==1 && zeroExtend(0-reqdata.mvver[11:3])>verTemp)
verAddr = 0;
else
begin
verTemp = verTemp + signExtend(reqdata.mvver[11:3]);
if(verTemp>=zeroExtend({picHeight,3'b000}))
verAddr = {picHeight-1,3'b111};
else
verAddr = truncate(verTemp);
end
memReqQ.enq(IPLoadChroma {refIdx:reqdata.refIdx,uv:reqdata.uv,horOutOfBounds:horOut,hor:horAddr,ver:verAddr});
Bit#(2) loadHorNumMax = (reqdata.bt==IP4x8||reqdata.bt==IP4x4 ? (offset[1]==0||(xfracc==0&&offset!=3) ? 0 : 1) : ((reqdata.bt==IP16x16||reqdata.bt==IP16x8 ? 1 : 0) + (xfracc==0&&offset==0 ? 0 : 1)));
Bit#(4) loadVerNumMax = (reqdata.bt==IP16x16||reqdata.bt==IP8x16 ? 7 : (reqdata.bt==IP16x8||reqdata.bt==IP8x8||reqdata.bt==IP4x8 ? 3 : 1)) + (yfracc==0 ? 0 : 1);
if(loadHorNum < loadHorNumMax)
loadHorNum <= loadHorNum+1;
else
begin
loadHorNum <= 0;
if(loadVerNum < loadVerNumMax)
loadVerNum <= loadVerNum+1;
else
begin
loadVerNum <= 0;
reqfifoLoad.deq();
end
end
//$display( "Trace interpolator: loadChroma %h %h %h %h %h %h %h", xfracc, yfracc, loadHorNum, loadVerNum, reqdata.refIdx, horAddr, verAddr);
endrule
 
rule work1Luma ( reqfifoWork1.first() matches tagged IPWLuma .reqdata &&& !work1Done );
let xfracl = reqdata.xFracL;
let yfracl = reqdata.yFracL;
let offset = reqdata.offset;
let blockT = reqdata.bt;
Bool twoStage = (xfracl==1||xfracl==3) && (yfracl==1||yfracl==3);
Vector#(20,Bit#(8)) work1Vector8Next = work1Vector8;
if(memRespQ.first() matches tagged IPLoadResp .tempreaddata)
begin
memRespQ.deq();
Vector#(4,Bit#(8)) readdata = replicate(0);
readdata[0] = tempreaddata[7:0];
readdata[1] = tempreaddata[15:8];
readdata[2] = tempreaddata[23:16];
readdata[3] = tempreaddata[31:24];
//$display( "Trace interpolator: workLuma stage 0 readdata %h %h %h %h %h %h", workHorNum, workVerNum, readdata[3], readdata[2], readdata[1], readdata[0] );
Vector#(4,Bit#(8)) tempResult8 = replicate(0);
Vector#(4,Bit#(15)) tempResult15 = replicate(0);
if(xfracl==0 || yfracl==0 || xfracl==2)
begin
if(xfracl==0)//reorder
begin
for(Integer ii=0; ii<4; ii=ii+1)
begin
Bit#(2) offsetplusii = offset+fromInteger(ii);
if(offset <= 3-fromInteger(ii) && offset!=0)
tempResult8[ii] = work1Vector8[offsetplusii];
else
tempResult8[ii] = readdata[offsetplusii];
work1Vector8Next[ii] = readdata[ii];
end
for(Integer ii=0; ii<4; ii=ii+1)
tempResult15[ii] = zeroExtend({tempResult8[ii],5'b00000});
end
else//horizontal interpolation
begin
offset = offset-2;
for(Integer ii=0; ii<8; ii=ii+1)
work1Vector8Next[ii] = work1Vector8[ii+4];
for(Integer ii=0; ii<4; ii=ii+1)
begin
Bit#(4) tempIndex = fromInteger(ii) + 8 - zeroExtend(offset);
work1Vector8Next[tempIndex] = readdata[ii];
end
for(Integer ii=0; ii<4; ii=ii+1)
begin
tempResult15[ii] = interpolate8to15(work1Vector8Next[ii],work1Vector8Next[ii+1],work1Vector8Next[ii+2],work1Vector8Next[ii+3],work1Vector8Next[ii+4],work1Vector8Next[ii+5]);
tempResult8[ii] = clip1y10to8(truncate((tempResult15[ii]+16)>>5));
if(xfracl == 1)
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,work1Vector8Next[ii+2]} + 1) >> 1);
else if(xfracl == 3)
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,work1Vector8Next[ii+3]} + 1) >> 1);
end
end
Bit#(2) workHorNumOffset = (xfracl!=0 ? 2 : (reqdata.offset==0 ? 0 : 1));
if(work1HorNum >= workHorNumOffset)
begin
Bit#(1) horAddr = truncate(work1HorNum-workHorNumOffset);
if(yfracl == 0)
begin
for(Integer ii=0; ii<4; ii=ii+1)
tempResult15[ii] = zeroExtend({tempResult8[ii],5'b00000});
end
workFile.upd({workFileFlag,work1VerNum,horAddr},tempResult15);
end
Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + workHorNumOffset;
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3) + (yfracl!=0 ? 5 : 0);
if(work1HorNum < workHorNumMax)
work1HorNum <= work1HorNum+1;
else
begin
work1HorNum <= 0;
if(work1VerNum < workVerNumMax)
work1VerNum <= work1VerNum+1;
else
begin
work1VerNum <= 0;
work1Done <= True;
end
end
end
else if(work1Stage == 0)//vertical interpolation
begin
offset = offset + (xfracl==3&&(yfracl==1||yfracl==3) ? 1 : 0);
for(Integer ii=0; ii<4; ii=ii+1)
tempResult15[ii] = interpolate8to15(work1Vector8[ii],work1Vector8[ii+4],work1Vector8[ii+8],work1Vector8[ii+12],work1Vector8[ii+16],readdata[ii]);
for(Integer ii=0; ii<16; ii=ii+1)
work1Vector8Next[ii] = work1Vector8[ii+4];
for(Integer ii=0; ii<4; ii=ii+1)
work1Vector8Next[ii+16] = readdata[ii];
Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + (yfracl==2 ? 2 : (offset==0 ? 0 : 1));
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3) + 5;
Bit#(2) horAddr = work1HorNum;
Bit#(3) verAddr = truncate(work1VerNum-5);
if(work1VerNum > 4)
begin
workFile.upd({workFileFlag,verAddr,horAddr},tempResult15);
//$display( "Trace interpolator: workLuma stage 0 result %h %h %h %h %h %h %h", workHorNum, workVerNum, {verAddr,horAddr}, tempResult15[3], tempResult15[2], tempResult15[1], tempResult15[0]);
end
if(twoStage)
begin
Bit#(2) storeHorAddr = work1HorNum;
Bit#(4) storeVerAddr = work1VerNum;
if((xfracl==3 ? offset<3 : offset<2))
storeHorAddr = storeHorAddr+1;
if(yfracl==3)
storeVerAddr = storeVerAddr-3;
else
storeVerAddr = storeVerAddr-2;
if(storeVerAddr < 8)
storeFile.upd({workFileFlag,storeVerAddr[2:0],storeHorAddr},readdata);
end
if(work1VerNum < workVerNumMax)
work1VerNum <= work1VerNum+1;
else
begin
work1VerNum <= 0;
if(work1HorNum < workHorNumMax)
work1HorNum <= work1HorNum+1;
else
begin
if(twoStage)
begin
work1Stage <= 1;
if((xfracl==3 ? offset<3 : offset<2))
work1HorNum <= 0;
else
work1HorNum <= workHorNumMax+1;
end
else
begin
work1HorNum <= 0;
work1Done <= True;
end
end
end
end
else//second stage of twoStage
begin
storeFile.upd({workFileFlag,work1VerNum[2:0],work1HorNum},readdata);
Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + 2;
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3);
if(work1VerNum < workVerNumMax)
work1VerNum <= work1VerNum+1;
else
begin
work1VerNum <= 0;
offset = offset + (xfracl==3 ? 1 : 0);
if(work1HorNum<workHorNumMax && !(offset==1 || (xfracl==3 && offset==2)))
work1HorNum <= workHorNumMax;
else
begin
work1HorNum <= 0;
work1Stage <= 0;
work1Done <= True;
end
end
end
end
work1Vector8 <= work1Vector8Next;
//$display( "Trace interpolator: work1Luma %h %h %h %h %h %h", xfracl, yfracl, work1HorNum, work1VerNum, offset, work1Stage);
endrule
 
 
rule work2Luma ( reqregWork2 matches tagged Valid .vdata &&& vdata matches tagged IPWLuma .reqdata &&& !work2Done &&& !work8x8Done );
let xfracl = reqdata.xFracL;
let yfracl = reqdata.yFracL;
let offset = reqdata.offset;
let blockT = reqdata.bt;
Vector#(20,Bit#(8)) work2Vector8Next = work2Vector8;
Vector#(20,Bit#(15)) work2Vector15Next = work2Vector15;
Vector#(4,Bit#(1)) resultReadyNext = resultReady;
Vector#(4,Bit#(8)) tempResult8 = replicate(0);
Vector#(4,Bit#(15)) readdata = replicate(0);
if(yfracl==0)
begin
readdata = workFile.sub({(1-workFileFlag),1'b0,work2VerNum[1],work2HorNum,work2VerNum[0]});
for(Integer ii=0; ii<4; ii=ii+1)
tempResult8[ii] = (readdata[ii])[12:5];
resultFile.upd({work2VerNum[1],work2HorNum,work2VerNum[0]},tempResult8);
work2HorNum <= work2HorNum+1;
if(work2HorNum == 3)
begin
resultReadyNext[(work2VerNum[1:0])] = 1;
if(work2VerNum == 3)
begin
work2VerNum <= 0;
work2Done <= True;
if(((blockT==IP4x8 || blockT==IP8x4) && work2SubMbPart==0) || (blockT==IP4x4 && work2SubMbPart<3))
work2SubMbPart <= work2SubMbPart+1;
else
begin
work2SubMbPart <= 0;
work8x8Done <= True;
end
end
else
work2VerNum <= work2VerNum+1;
end
end
else if(xfracl==0 || xfracl==2)//vertical interpolation
begin
readdata = workFile.sub({(1-workFileFlag),work2VerNum,work2HorNum[0]});
for(Integer ii=0; ii<4; ii=ii+1)
begin
tempResult8[ii] = interpolate15to8(work2Vector15[ii],work2Vector15[ii+4],work2Vector15[ii+8],work2Vector15[ii+12],work2Vector15[ii+16],readdata[ii]);
if(yfracl == 1)
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((work2Vector15[ii+8]+16)>>5))} + 1) >> 1);
else if(yfracl == 3)
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((work2Vector15[ii+12]+16)>>5))} + 1) >> 1);
end
for(Integer ii=0; ii<16; ii=ii+1)
work2Vector15Next[ii] = work2Vector15[ii+4];
for(Integer ii=0; ii<4; ii=ii+1)
work2Vector15Next[ii+16] = readdata[ii];
Bit#(2) workHorNumMax = 1;
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3) + 5;
if(work2VerNum > 4)
begin
Bit#(1) horAddr = truncate(work2HorNum);
Bit#(3) verAddr = truncate(work2VerNum-5);
horAddr = horAddr + ((blockT==IP4x8&&work2SubMbPart==1)||(blockT==IP4x4&&work2SubMbPart[0]==1) ? 1 : 0);
verAddr = verAddr + ((blockT==IP8x4&&work2SubMbPart==1)||(blockT==IP4x4&&work2SubMbPart[1]==1) ? 4 : 0);
resultFile.upd({verAddr,horAddr},tempResult8);
if(verAddr[1:0] == 3)
resultReadyNext[{verAddr[2],horAddr}] = 1;
end
if(work2VerNum < workVerNumMax)
work2VerNum <= work2VerNum+1;
else
begin
work2VerNum <= 0;
if(work2HorNum < workHorNumMax)
work2HorNum <= work2HorNum+1;
else
begin
work2HorNum <= 0;
work2Done <= True;
if(((blockT==IP4x8 || blockT==IP8x4) && work2SubMbPart==0) || (blockT==IP4x4 && work2SubMbPart<3))
work2SubMbPart <= work2SubMbPart+1;
else
begin
work2SubMbPart <= 0;
work8x8Done <= True;
end
end
end
end
else//horizontal interpolation
begin
offset = offset-2;
if(yfracl == 2)
begin
readdata = workFile.sub({(1-workFileFlag),work2VerNum[2:0],work2HorNum});
for(Integer ii=0; ii<8; ii=ii+1)
work2Vector15Next[ii] = work2Vector15[ii+4];
for(Integer ii=0; ii<4; ii=ii+1)
begin
Bit#(4) tempIndex = fromInteger(ii) + 8 - zeroExtend(offset);
work2Vector15Next[tempIndex] = readdata[ii];
end
for(Integer ii=0; ii<4; ii=ii+1)
begin
tempResult8[ii] = interpolate15to8(work2Vector15Next[ii],work2Vector15Next[ii+1],work2Vector15Next[ii+2],work2Vector15Next[ii+3],work2Vector15Next[ii+4],work2Vector15Next[ii+5]);
if(xfracl == 1)
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((work2Vector15Next[ii+2]+16)>>5))} + 1) >> 1);
else if(xfracl == 3)
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((work2Vector15Next[ii+3]+16)>>5))} + 1) >> 1);
end
end
else
begin
Vector#(4,Bit#(8)) readdata8 = storeFile.sub({(1-workFileFlag),work2VerNum[2:0],work2HorNum});
for(Integer ii=0; ii<8; ii=ii+1)
work2Vector8Next[ii] = work2Vector8[ii+4];
for(Integer ii=0; ii<4; ii=ii+1)
begin
Bit#(4) tempIndex = fromInteger(ii) + 8 - zeroExtend(offset);
work2Vector8Next[tempIndex] = readdata8[ii];
end
Vector#(4,Bit#(15)) tempResult15 = replicate(0);
for(Integer ii=0; ii<4; ii=ii+1)
begin
tempResult15[ii] = interpolate8to15(work2Vector8Next[ii],work2Vector8Next[ii+1],work2Vector8Next[ii+2],work2Vector8Next[ii+3],work2Vector8Next[ii+4],work2Vector8Next[ii+5]);
tempResult8[ii] = clip1y10to8(truncate((tempResult15[ii]+16)>>5));
end
Bit#(2) verOffset;
Vector#(4,Bit#(15)) verResult15 = replicate(0);
if(xfracl == 1)
verOffset = reqdata.offset;
else
verOffset = reqdata.offset+1;
readdata = workFile.sub({(1-workFileFlag),work2VerNum[2:0],(work2HorNum-2+(verOffset==0?0:1))});
for(Integer ii=0; ii<4; ii=ii+1)
begin
Bit#(2) offsetplusii = verOffset+fromInteger(ii);
if(verOffset <= 3-fromInteger(ii) && verOffset!=0)
verResult15[ii] = work2Vector15[offsetplusii];
else
verResult15[ii] = readdata[offsetplusii];
work2Vector15Next[ii] = readdata[ii];
end
for(Integer ii=0; ii<4; ii=ii+1)
begin
Bit#(9) tempVal = zeroExtend(clip1y10to8(truncate((verResult15[ii]+16)>>5)));
tempResult8[ii] = truncate((tempVal+zeroExtend(tempResult8[ii])+1)>>1);
end
end
if(work2HorNum >= 2)
begin
Bit#(1) horAddr = truncate(work2HorNum-2);
Bit#(3) verAddr = truncate(work2VerNum);
horAddr = horAddr + ((blockT==IP4x8&&work2SubMbPart==1)||(blockT==IP4x4&&work2SubMbPart[0]==1) ? 1 : 0);
verAddr = verAddr + ((blockT==IP8x4&&work2SubMbPart==1)||(blockT==IP4x4&&work2SubMbPart[1]==1) ? 4 : 0);
resultFile.upd({verAddr,horAddr},tempResult8);
if(verAddr[1:0] == 3)
resultReadyNext[{verAddr[2],horAddr}] = 1;
//$display( "Trace interpolator: workLuma stage 1 result %h %h %h %h %h %h %h %h", workHorNum, workVerNum, {verAddr,horAddr}, tempResult8[3], tempResult8[2], tempResult8[1], tempResult8[0], pack(resultReadyNext));
end
Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + 2;
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3);
if(work2HorNum < workHorNumMax)
work2HorNum <= work2HorNum+1;
else
begin
work2HorNum <= 0;
if(work2VerNum < workVerNumMax)
work2VerNum <= work2VerNum+1;
else
begin
work2VerNum <= 0;
work2Done <= True;
if(((blockT==IP4x8 || blockT==IP8x4) && work2SubMbPart==0) || (blockT==IP4x4 && work2SubMbPart<3))
work2SubMbPart <= work2SubMbPart+1;
else
begin
work2SubMbPart <= 0;
work8x8Done <= True;
end
end
end
end
work2Vector8 <= work2Vector8Next;
work2Vector15 <= work2Vector15Next;
resultReady <= resultReadyNext;
//$display( "Trace interpolator: work2Luma %h %h %h %h %h", xfracl, yfracl, work2HorNum, work2VerNum, offset);
endrule
 
 
rule work1Chroma ( reqfifoWork1.first() matches tagged IPWChroma .reqdata &&& !work1Done );
Bit#(4) xfracc = zeroExtend(reqdata.xFracC);
Bit#(4) yfracc = zeroExtend(reqdata.yFracC);
let offset = reqdata.offset;
let blockT = reqdata.bt;
Vector#(20,Bit#(8)) work1Vector8Next = work1Vector8;
if(memRespQ.first() matches tagged IPLoadResp .tempreaddata)
begin
memRespQ.deq();
Vector#(4,Bit#(8)) readdata = replicate(0);
readdata[0] = tempreaddata[7:0];
readdata[1] = tempreaddata[15:8];
readdata[2] = tempreaddata[23:16];
readdata[3] = tempreaddata[31:24];
Vector#(5,Bit#(8)) tempWork8 = replicate(0);
Vector#(5,Bit#(8)) tempPrev8 = replicate(0);
Vector#(4,Bit#(8)) tempResult8 = replicate(0);
Bool resultReadyFlag = False;
for(Integer ii=0; ii<4; ii=ii+1)
begin
Bit#(2) offsetplusii = offset+fromInteger(ii);
if(offset <= 3-fromInteger(ii) && !((blockT==IP4x8||blockT==IP4x4)&&(offset[1]==0||(xfracc==0&&offset!=3))) && !(xfracc==0&&offset==0))
tempWork8[ii] = work1Vector8[offsetplusii];
else
tempWork8[ii] = readdata[offsetplusii];
work1Vector8Next[ii] = readdata[ii];
end
tempWork8[4] = readdata[offset];
if((blockT==IP16x8 || blockT==IP16x16) && work1HorNum==(xfracc==0&&offset==0 ? 1 : 2))
begin
for(Integer ii=0; ii<5; ii=ii+1)
begin
tempPrev8[ii] = work1Vector8[ii+9];
work1Vector8Next[ii+9] = tempWork8[ii];
end
end
else
begin
for(Integer ii=0; ii<5; ii=ii+1)
tempPrev8[ii] = work1Vector8[ii+4];
if(work1HorNum==(xfracc==0&&offset==0 ? 0 : 1) || ((blockT==IP4x8||blockT==IP4x4)&&(offset[1]==0||(xfracc==0&&offset!=3))))
begin
for(Integer ii=0; ii<5; ii=ii+1)
work1Vector8Next[ii+4] = tempWork8[ii];
end
end
if(yfracc==0)
begin
for(Integer ii=0; ii<5; ii=ii+1)
tempPrev8[ii] = tempWork8[ii];
end
for(Integer ii=0; ii<4; ii=ii+1)
begin
Bit#(14) tempVal = zeroExtend((8-xfracc))*zeroExtend((8-yfracc))*zeroExtend(tempPrev8[ii]);
tempVal = tempVal + zeroExtend(xfracc)*zeroExtend((8-yfracc))*zeroExtend(tempPrev8[ii+1]);
tempVal = tempVal + zeroExtend((8-xfracc))*zeroExtend(yfracc)*zeroExtend(tempWork8[ii]);
tempVal = tempVal + zeroExtend(xfracc)*zeroExtend(yfracc)*zeroExtend(tempWork8[ii+1]);
tempResult8[ii] = truncate((tempVal+32)>>6);
end
if(work1VerNum > 0 || yfracc==0)
begin
if(blockT==IP4x8 || blockT==IP4x4)
begin
Bit#(5) tempIndex = 10 + zeroExtend(work1VerNum<<1);
work1Vector8Next[tempIndex] = tempResult8[0];
work1Vector8Next[tempIndex+1] = tempResult8[1];
tempResult8[2] = tempResult8[0];
tempResult8[3] = tempResult8[1];
tempResult8[0] = work1Vector8[tempIndex];
tempResult8[1] = work1Vector8[tempIndex+1];
if((work1HorNum>0 || offset[1]==0) && work1SubMbPart[0]==1)
resultReadyFlag = True;
end
else
begin
if(work1HorNum>0 || (xfracc==0 && offset==0))
resultReadyFlag = True;
end
end
if(resultReadyFlag)
begin
Bit#(1) horAddr = ((blockT==IP4x8 || blockT==IP4x4) ? 0 : truncate(((xfracc==0 && offset==0) ? work1HorNum : work1HorNum-1)));
Bit#(3) verAddr = truncate((yfracc==0 ? work1VerNum : work1VerNum-1));
horAddr = horAddr + ((blockT==IP16x8||blockT==IP16x16) ? 0 : work1MbPart[0]);
verAddr = verAddr + ((blockT==IP8x16||blockT==IP16x16) ? 0 : ((blockT==IP16x8) ? {work1MbPart[0],2'b00} : {work1MbPart[1],2'b00}));
verAddr = verAddr + ((blockT==IP8x4&&work1SubMbPart==1)||(blockT==IP4x4&&work1SubMbPart[1]==1) ? 2 : 0);
storeFile.upd({workFileFlag,1'b0,verAddr,horAddr},tempResult8);
end
Bit#(2) workHorNumMax = (blockT==IP4x8||blockT==IP4x4 ? (offset[1]==0||(xfracc==0&&offset!=3) ? 0 : 1) : ((blockT==IP16x16||blockT==IP16x8 ? 1 : 0) + (xfracc==0&&offset==0 ? 0 : 1)));
Bit#(4) workVerNumMax = (blockT==IP16x16||blockT==IP8x16 ? 7 : (blockT==IP16x8||blockT==IP8x8||blockT==IP4x8 ? 3 : 1)) + (yfracc==0 ? 0 : 1);
if(work1HorNum < workHorNumMax)
work1HorNum <= work1HorNum+1;
else
begin
work1HorNum <= 0;
if(work1VerNum < workVerNumMax)
work1VerNum <= work1VerNum+1;
else
begin
Bool allDone = False;
work1VerNum <= 0;
if(((blockT==IP4x8 || blockT==IP8x4) && work1SubMbPart==0) || (blockT==IP4x4 && work1SubMbPart<3))
work1SubMbPart <= work1SubMbPart+1;
else
begin
work1SubMbPart <= 0;
if(((blockT==IP16x8 || blockT==IP8x16) && work1MbPart==0) || (!(blockT==IP16x8 || blockT==IP8x16 || blockT==IP16x16) && work1MbPart<3))
work1MbPart <= work1MbPart+1;
else
begin
work1MbPart <= 0;
work1Done <= True;
allDone = True;
end
end
if(!allDone)
reqfifoWork1.deq();
end
end
end
work1Vector8 <= work1Vector8Next;
//$display( "Trace interpolator: work1Chroma %h %h %h %h %h", xfracc, yfracc, work1HorNum, work1VerNum, offset);
endrule
 
 
rule work2Chroma ( reqregWork2 matches tagged Valid .vdata &&& vdata matches tagged IPWChroma .reqdata &&& !work2Done &&& !work8x8Done );
Vector#(4,Bit#(1)) resultReadyNext = resultReady;
resultFile.upd({work2VerNum[1],work2HorNum,work2VerNum[0]},storeFile.sub({(1-workFileFlag),1'b0,work2VerNum[1],work2HorNum,work2VerNum[0]}));
work2HorNum <= work2HorNum+1;
if(work2HorNum == 3)
begin
resultReadyNext[work2VerNum] = 1;
if(work2VerNum == 3)
begin
work2VerNum <= 0;
work2Done <= True;
work8x8Done <= True;
end
else
work2VerNum <= work2VerNum+1;
end
resultReady <= resultReadyNext;
//$display( "Trace interpolator: work2Chroma %h %h", work2HorNum, work2VerNum);
endrule
 
 
rule outputing( !outDone && resultReady[outBlockNum]==1 );
outfifo.enq(resultFile.sub({outBlockNum[1],outPixelNum,outBlockNum[0]}));
outPixelNum <= outPixelNum+1;
if(outPixelNum == 3)
begin
outBlockNum <= outBlockNum+1;
if(outBlockNum == 3)
outDone <= True;
end
//$display( "Trace interpolator: outputing %h %h", outBlockNum, outPixelNum);
endrule
 
 
rule switching( work1Done && (work2Done || reqregWork2==Invalid) && !work8x8Done);
work1Done <= False;
work2Done <= False;
reqregWork2 <= (Valid reqfifoWork1.first());
workFileFlag <= 1-workFileFlag;
reqfifoWork1.deq();
//$display( "Trace interpolator: switching %h %h", outBlockNum, outPixelNum);
endrule
 
rule switching8x8( work1Done && (work2Done || reqregWork2==Invalid) && work8x8Done && outDone);
outDone <= False;
work8x8Done <= False;
resultReady <= replicate(0);
work1Done <= False;
work2Done <= False;
reqregWork2 <= (Valid reqfifoWork1.first());
workFileFlag <= 1-workFileFlag;
reqfifoWork1.deq();
//$display( "Trace interpolator: switching8x8 %h %h", outBlockNum, outPixelNum);
endrule
 
method Action setPicWidth( Bit#(PicWidthSz) newPicWidth );
picWidth <= newPicWidth;
endmethod
method Action setPicHeight( Bit#(PicHeightSz) newPicHeight );
picHeight <= newPicHeight;
endmethod
method Action request( InterpolatorIT inputdata );
reqfifoLoad.enq(inputdata);
if(inputdata matches tagged IPLuma .indata)
reqfifoWork1.enq(IPWLuma {xFracL:indata.mvhor[1:0],yFracL:indata.mvver[1:0],offset:indata.mvhor[3:2],bt:indata.bt});
else if(inputdata matches tagged IPChroma .indata)
reqfifoWork1.enq(IPWChroma {xFracC:indata.mvhor[2:0],yFracC:indata.mvver[2:0],offset:indata.mvhor[4:3]+{indata.hor[0],1'b0},bt:indata.bt});
endmethod
 
method Vector#(4,Bit#(8)) first();
return outfifo.first();
endmethod
method Action deq();
outfifo.deq();
endmethod
method Action endOfFrame();
endOfFrameFlag <= True;
endmethod
interface Client mem_client;
interface Get request = fifoToGet(memReqQ);
interface Put response = fifoToPut(memRespQ);
endinterface
 
 
endmodule
 
 
endpackage
/trunk/src/IDeblockFilter.bsv
0,0 → 1,27
//**********************************************************************
// Interface for Deblocking Filter
//----------------------------------------------------------------------
//
//
//
 
package IDeblockFilter;
 
import H264Types::*;
import GetPut::*;
import ClientServer::*;
 
interface IDeblockFilter;
 
// Interface for inter-module io
interface Put#(EntropyDecOT) ioin;
interface Get#(DeblockFilterOT) ioout;
// Interface for module to memory
interface Client#(MemReq#(TAdd#(PicWidthSz,5),32),MemResp#(32)) mem_client_data;
interface Client#(MemReq#(PicWidthSz,13),MemResp#(13)) mem_client_parameter;
endinterface
 
endpackage
 
/trunk/src/IFinalOutput.bsv
0,0 → 1,22
//**********************************************************************
// Interface for Final Output
//----------------------------------------------------------------------
//
//
//
 
package IFinalOutput;
 
import H264Types::*;
import GetPut::*;
import ClientServer::*;
 
interface IFinalOutput;
 
// Interface for inter-module io
interface Put#(BufferControlOT) ioin;
 
endinterface
 
endpackage
 
/trunk/src/mkEntropyDec_orig.bsv
0,0 → 1,1699
//**********************************************************************
// Entropy Decoder implementation
//----------------------------------------------------------------------
//
//
 
package mkEntropyDec;
 
import H264Types::*;
import ExpGolomb::*;
import CAVLC::*;
import ICalc_nC::*;
import mkCalc_nC::*;
import IEntropyDec::*;
import FIFO::*;
 
import Connectable::*;
import GetPut::*;
import ClientServer::*;
 
 
//-----------------------------------------------------------
// Local Datatypes
//-----------------------------------------------------------
 
typedef union tagged
{
void Start; //special state that initializes the process.
void NewUnit; //special state that checks the NAL unit type.
Bit#(5) CodedSlice; //decodes a type of NAL unit
void SEI; //decodes a type of NAL unit
Bit#(5) SPS; //decodes a type of NAL unit
Bit#(5) PPS; //decodes a type of NAL unit
void AUD; //decodes a type of NAL unit
void EndSequence; //decodes a type of NAL unit
void EndStream; //decodes a type of NAL unit
void Filler; //decodes a type of NAL unit
 
Bit#(5) SliceData; //decodes slice data (part of a CodedSlice NAL unit)
Bit#(5) MacroblockLayer; //decodes macroblock layer (part of a CodedSlice NAL unit)
Bit#(5) MbPrediction; //decodes macroblock prediction (part of a CodedSlice NAL unit)
Bit#(5) SubMbPrediction; //decodes sub-macroblock prediction (part of a CodedSlice NAL unit)
Bit#(5) Residual; //decodes residual (part of a CodedSlice NAL unit)
Bit#(5) ResidualBlock; //decodes residual block (part of a CodedSlice NAL unit)
}
State deriving(Eq,Bits);
 
 
//-----------------------------------------------------------
// Helper functions
function MbType mbtype_convert( Bit#(5) in_mb_type, Bit#(4) in_slice_type );//converts mb_type syntax element to MbType type
Bit#(5) tempmb = in_mb_type;
if(in_slice_type == 2 || in_slice_type == 7)//I slice
tempmb = in_mb_type+5;
case ( tempmb )
0: return P_L0_16x16;
1: return P_L0_L0_16x8;
2: return P_L0_L0_8x16;
3: return P_8x8;
4: return P_8x8ref0;
5: return I_NxN;
30: return I_PCM;
default:
begin
Bit#(5) tempmb16x16 = tempmb-6;
Bit#(2) tempv1 = tempmb16x16[1:0];
Bit#(2) tempv2;
Bit#(1) tempv3;
if(tempmb16x16 < 12)
begin
tempv3 = 0;
tempv2 = tempmb16x16[3:2];
end
else
begin
tempv3 = 1;
tempv2 = tempmb16x16[3:2]+1;
end
return I_16x16{intra16x16PredMode:tempv1, codedBlockPatternChroma:tempv2, codedBlockPatternLuma:tempv3};
end
endcase
endfunction
 
 
 
//-----------------------------------------------------------
// Entropy Decoder Module
//-----------------------------------------------------------
 
 
(* synthesize *)
module mkEntropyDec( IEntropyDec );
FIFO#(NalUnwrapOT) infifo <- mkSizedFIFO(entropyDec_infifo_size);
FIFO#(EntropyDecOT) outfifo <- mkFIFO;
FIFO#(EntropyDecOT_InverseTrans) outfifo_ITB <- mkFIFO;
Reg#(State) state <- mkReg(Start);
Reg#(Bit#(2)) nalrefidc <- mkReg(0);
Reg#(Bit#(5)) nalunittype <- mkReg(0);
Reg#(Buffer) buffer <- mkReg(0);
Reg#(Bufcount) bufcount <- mkReg(0);
 
//saved syntax elements
Reg#(Bit#(5)) spsseq_parameter_set_id <- mkReg(0);
Reg#(Bit#(5)) spslog2_max_frame_num <- mkReg(0);
Reg#(Bit#(5)) spslog2_max_pic_order_cnt_lsb <- mkReg(0);
Reg#(Bit#(2)) spspic_order_cnt_type <- mkReg(0);
Reg#(Bit#(1)) spsdelta_pic_order_always_zero_flag <- mkReg(0);
Reg#(Bit#(8)) spsnum_ref_frames_in_pic_order_cnt_cycle <- mkReg(0);
Reg#(Bit#(8)) ppspic_parameter_set_id <- mkReg(0);
Reg#(Bit#(1)) ppspic_order_present_flag <- mkReg(0);
Reg#(Bit#(1)) ppsdeblocking_filter_control_present_flag <- mkReg(0);
Reg#(Bit#(4)) shslice_type <- mkReg(0);
Reg#(Bit#(3)) shdmemory_management_control_operation <- mkReg(0);
Reg#(MbType) sdmmbtype <- mkReg(I_NxN);
Reg#(Bit#(4)) sdmcodedBlockPatternLuma <- mkReg(0);
Reg#(Bit#(2)) sdmcodedBlockPatternChroma <- mkReg(0);
Reg#(Bit#(5)) sdmrTotalCoeff <- mkReg(0);
Reg#(Bit#(2)) sdmrTrailingOnes <- mkReg(0);
//derived decoding variables for slice data
Reg#(Bit#(16)) tempreg <- mkReg(0);
Reg#(Bit#(5)) num_ref_idx_l0_active_minus1 <- mkReg(0);
Reg#(Bit#(PicAreaSz)) currMbAddr <- mkReg(0);
Reg#(Bit#(3)) temp3bit0 <- mkReg(0);
Reg#(Bit#(3)) temp3bit1 <- mkReg(0);
Reg#(Bit#(3)) temp3bit2 <- mkReg(0);
Reg#(Bit#(3)) temp3bit3 <- mkReg(0);
Reg#(Bit#(5)) temp5bit <- mkReg(0);
Reg#(Bit#(5)) temp5bit2 <- mkReg(0);
Reg#(Bit#(5)) maxNumCoeff <- mkReg(0);
FIFO#(Bit#(13)) cavlcFIFO <- mkSizedFIFO(16);
Calc_nC calcnc <- mkCalc_nC();
Reg#(Bit#(1)) residualChroma <- mkReg(0);
Reg#(Bit#(5)) totalCoeff <- mkReg(0);
Reg#(Bit#(4)) zerosLeft <- mkReg(0);
 
//exp-golomb 32-bit version states
Reg#(Bufcount) egnumbits <- mkReg(0);
 
//extra-buffering states
Reg#(Bit#(32)) extrabuffer <- mkReg(0);
Reg#(Bit#(3)) extrabufcount <- mkReg(0);
Reg#(Bit#(1)) extraendnalflag <- mkReg(0);
Reg#(Bit#(1)) endnalflag <- mkReg(0);
 
//-----------------------------------------------------------
// Rules
 
rule startup (state matches Start);
case (infifo.first()) matches
tagged NewUnit :
begin
infifo.deq();
state <= NewUnit;
buffer <= 0;
bufcount <= 0;
extrabuffer <= 0;
extrabufcount <= 0;
extraendnalflag <= 0;
endnalflag <= 0;
end
tagged RbspByte .rdata :
begin
infifo.deq();
end
tagged EndOfFile :
begin
infifo.deq();
outfifo.enq(EndOfFile);
$display( "INFO EntropyDec: EndOfFile reached" );
end
endcase
endrule
 
rule newunit (state matches NewUnit);
case (infifo.first()) matches
tagged NewUnit : state <= Start;
tagged RbspByte .rdata :
begin
infifo.deq();
nalrefidc <= rdata[6:5];
nalunittype <= rdata[4:0];
case (rdata[4:0])
1 : state <= CodedSlice 0;
5 : state <= CodedSlice 0;
6 : state <= SEI;
7 : state <= SPS 0;
8 : state <= PPS 0;
9 : state <= AUD;
10: state <= EndSequence;
11: state <= EndStream;
12: state <= Filler;
default:
begin
$display( "ERROR EntropyDec: NAL Unit Type = %d", rdata[4:0] );
state <= Start;
end
endcase
$display("ccl2newunit");
$display("ccl2rbspbyte %h", rdata);
outfifo.enq(NewUnit rdata);
outfifo_ITB.enq(NewUnit rdata);
end
tagged EndOfFile : state <= Start;
endcase
endrule
 
 
rule fillextrabuffer (state != Start
&& state != NewUnit
&& extrabufcount < 4
&& extraendnalflag == 0);
if(infifo.first() matches tagged RbspByte .dbyte)
begin
case ( extrabufcount )
0: extrabuffer <= {dbyte, extrabuffer[23:0]};
1: extrabuffer <= {extrabuffer[31:24],dbyte,extrabuffer[15:0]};
2: extrabuffer <= {extrabuffer[31:16],dbyte,extrabuffer[7:0]};
3: extrabuffer <= {extrabuffer[31:8],dbyte};
default: $display( "ERROR EntropyDec: fillextrabuffer default case_" );
endcase
extrabufcount <= extrabufcount + 1;
infifo.deq();
//$display( "TRACE EntropyDec: fillextrabuffer RbspByte %h %h %h", dbyte, extrabufcount, extrabuffer);
end
else
begin
if(extrabufcount != 0)
extraendnalflag <= 1;
//$display( "TRACE EntropyDec: fillextrabuffer else %h", extrabufcount);
end
endrule
 
rule fillbuffer (state != Start
&& state != NewUnit
&& bufcount<=truncate(buffersize-32)
&& (extrabufcount == 4 || extraendnalflag == 1)
&& endnalflag == 0);//predicate not sure
Buffer temp = zeroExtend(extrabuffer);
Bufcount temp2 = truncate(buffersize)-bufcount-32;
buffer <= (buffer | (temp << zeroExtend(temp2)));
case ( extrabufcount )
4: bufcount <= bufcount+32;
3: bufcount <= bufcount+24;
2: bufcount <= bufcount+16;
1: bufcount <= bufcount+8;
default: $display( "ERROR EntropyDec: fillbuffer default case" );
endcase
extrabuffer <= 0;
extrabufcount <= 0;
if(infifo.first()==NewUnit || infifo.first()==EndOfFile)
endnalflag <= 1;
//$display( "TRACE EntropyDec: fillbuffer RbspByte %h %h %h %h %h %h %h %h", extrabufcount, bufcount, extrabuffer, temp, temp2, (temp << zeroExtend(temp2)), buffer, (buffer | (temp << zeroExtend(temp2))));
endrule
 
 
rule parser (state != Start
&&& state != NewUnit
&&& (bufcount > truncate(buffersize-32) || endnalflag == 1));//predicate not sure
//$display( "TRACE EntropyDec: fillbuffer RbspByte %h %h", bufcount, buffer );
Bufcount numbitsused = 0;
State nextstate = Start;
Int#(16) tempint = 0;
Int#(32) tempint32 = 0;
case ( state ) matches
tagged CodedSlice .step :
begin
case ( step )
0:
begin
$display( "ccl2SHfirst_mb_in_slice %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SHfirst_mb_in_slice truncate(expgolomb_unsigned(buffer)));
currMbAddr <= truncate(expgolomb_unsigned(buffer));
calcnc.initialize(truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = CodedSlice 1;
end
1:
begin
$display( "ccl2SHslice_type %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SHslice_type truncate(expgolomb_unsigned(buffer)));
shslice_type <= truncate(expgolomb_unsigned(buffer));
numbitsused = expgolomb_numbits(buffer);
nextstate = CodedSlice 2;
end
2:
begin
$display( "ccl2SHpic_parameter_set_id %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SHpic_parameter_set_id truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = CodedSlice 3;
if(ppspic_parameter_set_id != truncate(expgolomb_unsigned(buffer))) $display( "ERROR EntropyDec: pic_parameter_set_id don't match" );
end
3:
begin
Bit#(16) tttt = buffer[buffersize-1:buffersize-16];
tttt = tttt >> 16 - zeroExtend(spslog2_max_frame_num);
$display( "ccl2SHframe_num %0d", tttt );
outfifo.enq(SHframe_num tttt);
numbitsused = zeroExtend(spslog2_max_frame_num);
nextstate = CodedSlice 4;
end
4:
begin
if(nalunittype == 5)
begin
$display( "ccl2SHidr_pic_id %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SHidr_pic_id truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
end
nextstate = CodedSlice 5;
end
5:
begin
if(spspic_order_cnt_type == 0)
begin
Bit#(16) tttt = buffer[buffersize-1:buffersize-16];
tttt = tttt >> 16 - zeroExtend(spslog2_max_pic_order_cnt_lsb);
$display( "ccl2SHpic_order_cnt_lsb %0d", tttt );
outfifo.enq(SHpic_order_cnt_lsb tttt);
numbitsused = zeroExtend(spslog2_max_pic_order_cnt_lsb);
nextstate = CodedSlice 6;
end
else
nextstate = CodedSlice 7;
end
6:
begin
if(ppspic_order_present_flag == 1)
begin
if(egnumbits == 0)
begin
Bufcount tempbufcount = expgolomb_numbits32(buffer);
egnumbits <= tempbufcount;
numbitsused = tempbufcount-1;
nextstate = CodedSlice 6;
end
else
begin
tempint32 = unpack(expgolomb_signed32(buffer,egnumbits));
$display( "ccl2SHdelta_pic_order_cnt_bottom %0d", tempint32 );
outfifo.enq(SHdelta_pic_order_cnt_bottom truncate(expgolomb_signed32(buffer,egnumbits)));
egnumbits <= 0;
numbitsused = egnumbits;
nextstate = CodedSlice 7;
end
end
else
nextstate = CodedSlice 7;
end
7:
begin
if(spspic_order_cnt_type == 1 && spsdelta_pic_order_always_zero_flag == 0)
begin
if(egnumbits == 0)
begin
Bufcount tempbufcount = expgolomb_numbits32(buffer);
egnumbits <= tempbufcount;
numbitsused = tempbufcount-1;
nextstate = CodedSlice 7;
end
else
begin
tempint32 = unpack(expgolomb_signed32(buffer,egnumbits));
$display( "ccl2SHdelta_pic_order_cnt0 %0d", tempint32 );
outfifo.enq(SHdelta_pic_order_cnt0 truncate(expgolomb_signed32(buffer,egnumbits)));
egnumbits <= 0;
numbitsused = egnumbits;
nextstate = CodedSlice 8;
end
end
else
nextstate = CodedSlice 9;
end
8:
begin
if(ppspic_order_present_flag == 1)
begin
if(egnumbits == 0)
begin
Bufcount tempbufcount = expgolomb_numbits32(buffer);
egnumbits <= tempbufcount;
numbitsused = tempbufcount-1;
nextstate = CodedSlice 8;
end
else
begin
tempint32 = unpack(expgolomb_signed32(buffer,egnumbits));
$display( "ccl2SHdelta_pic_order_cnt1 %0d", tempint32 );
outfifo.enq(SHdelta_pic_order_cnt1 truncate(expgolomb_signed32(buffer,egnumbits)));
egnumbits <= 0;
numbitsused = egnumbits;
nextstate = CodedSlice 9;
end
end
else
nextstate = CodedSlice 9;
end
9:
begin
if(shslice_type == 0 || shslice_type == 5)
begin
$display( "ccl2SHnum_ref_idx_active_override_flag %0d", buffer[buffersize-1] );
outfifo.enq(SHnum_ref_idx_active_override_flag buffer[buffersize-1]);
numbitsused = 1;
if(buffer[buffersize-1] == 1)
nextstate = CodedSlice 10;
else
nextstate = CodedSlice 11;
end
else
nextstate = CodedSlice 11;
end
10:
begin
$display( "ccl2SHnum_ref_idx_l0_active %0d", expgolomb_unsigned(buffer)+1 );
outfifo.enq(SHnum_ref_idx_l0_active truncate(expgolomb_unsigned(buffer)+1));
num_ref_idx_l0_active_minus1 <= truncate(expgolomb_unsigned(buffer));
numbitsused = expgolomb_numbits(buffer);
nextstate = CodedSlice 11;
end
11:
begin
if(shslice_type != 2 && shslice_type != 7)
begin
$display( "ccl2SHRref_pic_list_reordering_flag_l0 %0d", buffer[buffersize-1] );
outfifo.enq(SHRref_pic_list_reordering_flag_l0 buffer[buffersize-1]);
numbitsused = 1;
if(buffer[buffersize-1] == 1)
nextstate = CodedSlice 12;
else
nextstate = CodedSlice 15;
end
else
nextstate = CodedSlice 15;
end
12:
begin
$display( "ccl2SHRreordering_of_pic_nums_idc %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SHRreordering_of_pic_nums_idc truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
if(expgolomb_unsigned(buffer)==0 || expgolomb_unsigned(buffer)==1)
nextstate = CodedSlice 13;
else if(expgolomb_unsigned(buffer)==2)
nextstate = CodedSlice 14;
else
nextstate = CodedSlice 15;
end
13:
begin
Bit#(17) temp17 = zeroExtend(expgolomb_unsigned(buffer)) + 1;
$display( "ccl2SHRabs_diff_pic_num %0d", temp17 );
outfifo.enq(SHRabs_diff_pic_num temp17);
numbitsused = expgolomb_numbits(buffer);
nextstate = CodedSlice 12;
end
14:
begin
$display( "ccl2SHRlong_term_pic_num %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SHRlong_term_pic_num truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = CodedSlice 12;
end
15:
begin
if(nalrefidc == 0)
nextstate = CodedSlice 23;
else
begin
if(nalunittype == 5)
begin
$display( "ccl2SHDno_output_of_prior_pics_flag %0d", buffer[buffersize-1] );
outfifo.enq(SHDno_output_of_prior_pics_flag buffer[buffersize-1]);
numbitsused = 1;
nextstate = CodedSlice 16;
end
else
nextstate = CodedSlice 17;
end
end
16:
begin
$display( "ccl2SHDlong_term_reference_flag %0d", buffer[buffersize-1] );
outfifo.enq(SHDlong_term_reference_flag buffer[buffersize-1]);
numbitsused = 1;
nextstate = CodedSlice 23;
end
17:
begin
$display( "ccl2SHDadaptive_ref_pic_marking_mode_flag %0d", buffer[buffersize-1] );
outfifo.enq(SHDadaptive_ref_pic_marking_mode_flag buffer[buffersize-1]);
numbitsused = 1;
if(buffer[buffersize-1] == 1)
nextstate = CodedSlice 18;
else
nextstate = CodedSlice 23;
end
18:
begin
$display( "ccl2SHDmemory_management_control_operation %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SHDmemory_management_control_operation truncate(expgolomb_unsigned(buffer)));
shdmemory_management_control_operation <= truncate(expgolomb_unsigned(buffer));
numbitsused = expgolomb_numbits(buffer);
if(expgolomb_unsigned(buffer)!=0)
nextstate = CodedSlice 19;
else
nextstate = CodedSlice 23;
end
19:
begin
if(shdmemory_management_control_operation==1 || shdmemory_management_control_operation==3)
begin
Bit#(17) temp17 = zeroExtend(expgolomb_unsigned(buffer)) + 1;
$display( "ccl2SHDdifference_of_pic_nums %0d", temp17 );
outfifo.enq(SHDdifference_of_pic_nums temp17);
numbitsused = expgolomb_numbits(buffer);
nextstate = CodedSlice 20;
end
else
nextstate = CodedSlice 20;
end
20:
begin
if(shdmemory_management_control_operation==2)
begin
$display( "ccl2SHDlong_term_pic_num %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SHDlong_term_pic_num truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = CodedSlice 21;
end
else
nextstate = CodedSlice 21;
end
21:
begin
if(shdmemory_management_control_operation==3 || shdmemory_management_control_operation==6)
begin
$display( "ccl2SHDlong_term_frame_idx %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SHDlong_term_frame_idx truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = CodedSlice 22;
end
else
nextstate = CodedSlice 22;
end
22:
begin
if(shdmemory_management_control_operation==4)
begin
$display( "ccl2SHDmax_long_term_frame_idx_plus1 %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SHDmax_long_term_frame_idx_plus1 truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = CodedSlice 18;
end
else
nextstate = CodedSlice 18;
end
23:
begin
tempint = unpack(expgolomb_signed(buffer));
$display( "ccl2SHslice_qp_delta %0d", tempint );
outfifo_ITB.enq(SHslice_qp_delta truncate(expgolomb_signed(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = CodedSlice 24;
end
24:
begin
if(ppsdeblocking_filter_control_present_flag==1)
begin
$display( "ccl2SHdisable_deblocking_filter_idc %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SHdisable_deblocking_filter_idc truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
if(expgolomb_unsigned(buffer)!=1)
nextstate = CodedSlice 25;
else
nextstate = CodedSlice 27;
end
else
nextstate = CodedSlice 27;
end
25:
begin
tempint = unpack(expgolomb_signed(buffer) << 1);
$display( "ccl2SHslice_alpha_c0_offset %0d", tempint );
outfifo.enq(SHslice_alpha_c0_offset truncate(expgolomb_signed(buffer) << 1));
numbitsused = expgolomb_numbits(buffer);
nextstate = CodedSlice 26;
end
26:
begin
tempint = unpack(expgolomb_signed(buffer) << 1);
$display( "ccl2SHslice_beta_offset %0d", tempint );
outfifo.enq(SHslice_beta_offset truncate(expgolomb_signed(buffer) << 1));
numbitsused = expgolomb_numbits(buffer);
nextstate = CodedSlice 27;
end
27:
begin
nextstate = SliceData 0;
end
default: $display( "ERROR EntropyDec: CodedSlice default step" );
endcase
end
tagged SEI .step :
begin
nextstate = Start;
$display( "INFO EntropyDec: SEI data thrown away" );
end
tagged SPS .step :
begin
case ( step )
0:
begin
Bit#(8) outputdata = buffer[buffersize-1:buffersize-8];
$display( "INFO EntropyDec: profile_idc = %d", outputdata );
outputdata = buffer[buffersize-9:buffersize-16];
$display( "INFO EntropyDec: constraint_set = %b", outputdata );
outputdata = buffer[buffersize-17:buffersize-24];
$display( "INFO EntropyDec: level_idc = %d", outputdata );
numbitsused = 24;
nextstate = SPS 1;
end
1:
begin
$display( "ccl2SPSseq_parameter_set_id %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SPSseq_parameter_set_id truncate(expgolomb_unsigned(buffer)));
spsseq_parameter_set_id <= truncate(expgolomb_unsigned(buffer));
numbitsused = expgolomb_numbits(buffer);
nextstate = SPS 2;
end
2:
begin
$display( "ccl2SPSlog2_max_frame_num %0d", expgolomb_unsigned(buffer)+4 );
outfifo.enq(SPSlog2_max_frame_num truncate(expgolomb_unsigned(buffer)+4));
spslog2_max_frame_num <= truncate(expgolomb_unsigned(buffer)+4);
numbitsused = expgolomb_numbits(buffer);
nextstate = SPS 3;
end
3:
begin
let tttt = expgolomb_unsigned(buffer);
$display( "ccl2SPSpic_order_cnt_type %0d", tttt );
outfifo.enq(SPSpic_order_cnt_type truncate(tttt));
spspic_order_cnt_type <= truncate(tttt);
numbitsused = expgolomb_numbits(buffer);
if(tttt == 0)
nextstate = SPS 4;
else if(tttt == 1)
nextstate = SPS 5;
else
nextstate = SPS 10;
end
4:
begin
$display( "ccl2SPSlog2_max_pic_order_cnt_lsb %0d", expgolomb_unsigned(buffer)+4 );
outfifo.enq(SPSlog2_max_pic_order_cnt_lsb truncate(expgolomb_unsigned(buffer)+4));
spslog2_max_pic_order_cnt_lsb <= truncate(expgolomb_unsigned(buffer)+4);
numbitsused = expgolomb_numbits(buffer);
nextstate = SPS 10;
end
5:
begin
$display( "ccl2SPSdelta_pic_order_always_zero_flag %0d", buffer[buffersize-1] );
outfifo.enq(SPSdelta_pic_order_always_zero_flag buffer[buffersize-1]);
spsdelta_pic_order_always_zero_flag <= buffer[buffersize-1];
numbitsused = 1;
nextstate = SPS 6;
end
6:
begin
if(egnumbits == 0)
begin
Bufcount tempbufcount = expgolomb_numbits32(buffer);
egnumbits <= tempbufcount;
numbitsused = tempbufcount-1;
nextstate = SPS 6;
end
else
begin
tempint32 = unpack(expgolomb_signed32(buffer,egnumbits));
$display( "ccl2SPSoffset_for_non_ref_pic %0d", tempint32 );
outfifo.enq(SPSoffset_for_non_ref_pic truncate(expgolomb_signed32(buffer,egnumbits)));
egnumbits <= 0;
numbitsused = egnumbits;
nextstate = SPS 7;
end
end
7:
begin
if(egnumbits == 0)
begin
Bufcount tempbufcount = expgolomb_numbits32(buffer);
egnumbits <= tempbufcount;
numbitsused = tempbufcount-1;
nextstate = SPS 7;
end
else
begin
tempint32 = unpack(expgolomb_signed32(buffer,egnumbits));
$display( "ccl2SPSoffset_for_top_to_bottom_field %0d", tempint32 );
outfifo.enq(SPSoffset_for_top_to_bottom_field truncate(expgolomb_signed32(buffer,egnumbits)));
egnumbits <= 0;
numbitsused = egnumbits;
nextstate = SPS 8;
end
end
8:
begin
$display( "ccl2SPSnum_ref_frames_in_pic_order_cnt_cycle %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SPSnum_ref_frames_in_pic_order_cnt_cycle truncate(expgolomb_unsigned(buffer)));
spsnum_ref_frames_in_pic_order_cnt_cycle <= truncate(expgolomb_unsigned(buffer));
numbitsused = expgolomb_numbits(buffer);
nextstate = SPS 9;
end
9:
begin
if(spsnum_ref_frames_in_pic_order_cnt_cycle == 0)
nextstate = SPS 10;
else
begin
if(egnumbits == 0)
begin
Bufcount tempbufcount = expgolomb_numbits32(buffer);
egnumbits <= tempbufcount;
numbitsused = tempbufcount-1;
nextstate = SPS 9;
end
else
begin
tempint32 = unpack(expgolomb_signed32(buffer,egnumbits));
$display( "ccl2SPSoffset_for_ref_frame %0d", tempint32 );
outfifo.enq(SPSoffset_for_ref_frame truncate(expgolomb_signed32(buffer,egnumbits)));
egnumbits <= 0;
spsnum_ref_frames_in_pic_order_cnt_cycle <= spsnum_ref_frames_in_pic_order_cnt_cycle - 1;
numbitsused = egnumbits;
nextstate = SPS 9;
end
end
end
10:
begin
$display( "ccl2SPSnum_ref_frames %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SPSnum_ref_frames truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = SPS 11;
end
11:
begin
$display( "ccl2SPSgaps_in_frame_num_allowed_flag %0d", buffer[buffersize-1] );
outfifo.enq(SPSgaps_in_frame_num_allowed_flag buffer[buffersize-1]);
numbitsused = 1;
nextstate = SPS 12;
end
12:
begin
$display( "ccl2SPSpic_width_in_mbs %0d", expgolomb_unsigned(buffer)+1 );
outfifo.enq(SPSpic_width_in_mbs truncate(expgolomb_unsigned(buffer)+1));
calcnc.initialize_picWidth(truncate(expgolomb_unsigned(buffer)+1));
numbitsused = expgolomb_numbits(buffer);
nextstate = SPS 13;
end
13:
begin
$display( "ccl2SPSpic_height_in_map_units %0d", expgolomb_unsigned(buffer)+1 );
outfifo.enq(SPSpic_height_in_map_units truncate(expgolomb_unsigned(buffer)+1));
numbitsused = expgolomb_numbits(buffer);
nextstate = SPS 14;
end
14:
begin
//SPSframe_mbs_only_flag = 1 for baseline
numbitsused = 1;
nextstate = SPS 15;
end
15:
begin
$display( "ccl2SPSdirect_8x8_inference_flag %0d", buffer[buffersize-1] );
outfifo.enq(SPSdirect_8x8_inference_flag buffer[buffersize-1]);
numbitsused = 1;
nextstate = SPS 16;
end
16:
begin
$display( "ccl2SPSframe_cropping_flag %0d", buffer[buffersize-1] );
outfifo.enq(SPSframe_cropping_flag buffer[buffersize-1]);
numbitsused = 1;
if(buffer[buffersize-1] == 1)
nextstate = SPS 17;
else
nextstate = SPS 21;
end
17:
begin
$display( "ccl2SPSframe_crop_left_offset %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SPSframe_crop_left_offset truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = SPS 18;
end
18:
begin
$display( "ccl2SPSframe_crop_right_offset %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SPSframe_crop_right_offset truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = SPS 19;
end
19:
begin
$display( "ccl2SPSframe_crop_top_offset %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SPSframe_crop_top_offset truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = SPS 20;
end
20:
begin
$display( "ccl2SPSframe_crop_bottom_offset %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SPSframe_crop_bottom_offset truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = SPS 21;
end
21:
begin
nextstate = Start;
$display( "INFO EntropyDec:VUI data thrown away" );
end
default: $display( "ERROR EntropyDec: SPS default step" );
endcase
end
tagged PPS .step :
begin
case ( step )
0:
begin
ppspic_parameter_set_id <= truncate(expgolomb_unsigned(buffer));
$display( "ccl2PPSpic_parameter_set_id %0d", expgolomb_unsigned(buffer) );
outfifo.enq(PPSpic_parameter_set_id truncate(expgolomb_unsigned(buffer)));
outfifo_ITB.enq(PPSpic_parameter_set_id truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = PPS 1;
end
1:
begin
$display( "ccl2PPSseq_parameter_set_id %0d", expgolomb_unsigned(buffer) );
outfifo.enq(PPSseq_parameter_set_id truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = PPS 2;
if(spsseq_parameter_set_id != truncate(expgolomb_unsigned(buffer)))
$display( "ERROR EntropyDec: seq_parameter_set_id don't match" );
end
2:
begin
//PPSentropy_coding_mode_flag = 0 for baseline
numbitsused = 1;
nextstate = PPS 3;
end
3:
begin
ppspic_order_present_flag <= buffer[buffersize-1];
$display( "ccl2PPSpic_order_present_flag %0d", buffer[buffersize-1] );
outfifo.enq(PPSpic_order_present_flag buffer[buffersize-1]);
numbitsused = 1;
nextstate = PPS 4;
end
4:
begin
numbitsused = expgolomb_numbits(buffer);
nextstate = PPS 5;
if(expgolomb_unsigned(buffer)+1 != 1)
$display( "ERROR EntropyDec: PPSnum_slice_groups not equal to 1" );//=1 for main
end
5:
begin
$display( "ccl2PPSnum_ref_idx_l0_active %0d", expgolomb_unsigned(buffer)+1 );
outfifo.enq(PPSnum_ref_idx_l0_active truncate(expgolomb_unsigned(buffer)+1));
num_ref_idx_l0_active_minus1 <= truncate(expgolomb_unsigned(buffer));
numbitsused = expgolomb_numbits(buffer);
nextstate = PPS 6;
end
6:
begin
$display( "ccl2PPSnum_ref_idx_l1_active %0d", expgolomb_unsigned(buffer)+1 );
outfifo.enq(PPSnum_ref_idx_l1_active truncate(expgolomb_unsigned(buffer)+1));
numbitsused = expgolomb_numbits(buffer);
nextstate = PPS 7;
end
7:
begin
//PPSweighted_pred_flag = 0 for baseline; PPSweighted_bipred_idc = 0 for baseline
numbitsused = 3;
nextstate = PPS 8;
end
8:
begin
$display( "ccl2PPSpic_init_qp %0d", expgolomb_signed(buffer)+26 );
outfifo_ITB.enq(PPSpic_init_qp truncate(expgolomb_signed(buffer)+26));
numbitsused = expgolomb_numbits(buffer);
nextstate = PPS 9;
end
9:
begin
$display( "ccl2PPSpic_init_qs %0d", expgolomb_signed(buffer)+26 );
outfifo_ITB.enq(PPSpic_init_qs truncate(expgolomb_signed(buffer)+26));
numbitsused = expgolomb_numbits(buffer);
nextstate = PPS 10;
end
10:
begin
tempint = unpack(expgolomb_signed(buffer));
$display( "ccl2PPSchroma_qp_index_offset %0d", tempint );
outfifo_ITB.enq(PPSchroma_qp_index_offset truncate(expgolomb_signed(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = PPS 11;
end
11:
begin
ppsdeblocking_filter_control_present_flag <= buffer[buffersize-1];
$display( "ccl2PPSdeblocking_filter_control_present_flag %0d", buffer[buffersize-1] );
outfifo.enq(PPSdeblocking_filter_control_present_flag buffer[buffersize-1]);
numbitsused = 1;
nextstate = PPS 12;
end
12:
begin
$display( "ccl2PPSconstrained_intra_pred_flag %0d", buffer[buffersize-1] );
outfifo.enq(PPSconstrained_intra_pred_flag buffer[buffersize-1]);
numbitsused = 1;
nextstate = PPS 13;
end
13:
begin
//PPSredundant_pic_cnt_present_flag = 0 for main
numbitsused = 1;
nextstate = PPS 14;
if(buffer[buffersize-1] != 0)
$display( "ERROR EntropyDec: PPSredundant_pic_cnt_present_flag not equal to 0" );//=0 for main
end
14:
begin
nextstate = Start;
end
default: $display( "ERROR EntropyDec: PPS default step" );
endcase
end
tagged AUD .step :
begin
outfifo.enq(AUDPrimaryPicType buffer[buffersize-1:buffersize-3]);
numbitsused = 3;
nextstate = Start;
end
tagged EndSequence :
begin
outfifo.enq(EndOfSequence);
nextstate = Start;
end
tagged EndStream :
begin
outfifo.enq(EndOfStream);
nextstate = Start;
end
tagged Filler :
begin
nextstate = Start;
end
tagged SliceData .step :
begin
case ( step )
0:
begin
if( shslice_type!=2 && shslice_type!=7 )
begin
$display( "ccl2SDmb_skip_run %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SDmb_skip_run truncate(expgolomb_unsigned(buffer)));
tempreg <= truncate(expgolomb_unsigned(buffer));
calcnc.nNupdate_pskip( truncate(expgolomb_unsigned(buffer)) );
numbitsused = expgolomb_numbits(buffer);
nextstate = SliceData 1;
end
else
nextstate = SliceData 2;
end
1:
begin
if( tempreg>0 )
begin
currMbAddr <= currMbAddr+1;//only because input assumed to comform to both baseline and main
tempreg <= tempreg-1;
nextstate = SliceData 1;
end
else
begin
////$display( "ccl2SDcurrMbAddr %0d", currMbAddr );
////outfifo.enq(SDcurrMbAddr currMbAddr);
nextstate = SliceData 2;
end
end
2:
begin
if( bufcount>8 || buffer[buffersize-1]!=1 || (buffer<<1)!=0 )
begin
calcnc.loadMb(currMbAddr);
nextstate = MacroblockLayer 0;
end
else
nextstate = SliceData 3;
end
3:
begin
currMbAddr <= currMbAddr+1;//only because input assumed to comform to both baseline and main
if( bufcount>8 || buffer[buffersize-1]!=1 || (buffer<<1)!=0 )
nextstate = SliceData 0;
else
nextstate = Start;
end
default: $display( "ERROR EntropyDec: SliceData default step" );
endcase
end
tagged MacroblockLayer .step : //return to SliceData 3
begin
case ( step )
0:
begin
$display( "ccl2SDMmb_type %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SDMmbtype mbtype_convert(truncate(expgolomb_unsigned(buffer)), shslice_type) );
outfifo_ITB.enq(SDMmbtype mbtype_convert(truncate(expgolomb_unsigned(buffer)), shslice_type) );
sdmmbtype <= mbtype_convert(truncate(expgolomb_unsigned(buffer)), shslice_type);
numbitsused = expgolomb_numbits(buffer);
if(mbtype_convert(truncate(expgolomb_unsigned(buffer)), shslice_type) == I_PCM)
begin
calcnc.nNupdate_ipcm();
nextstate = MacroblockLayer 1;
end
else
nextstate = MacroblockLayer 4;
end
1:
begin
tempreg <= 256;
numbitsused = zeroExtend(bufcount[2:0]);
nextstate = MacroblockLayer 2;
end
2:
begin
if( tempreg>0 )
begin
Bit#(8) outputdata = buffer[buffersize-1:buffersize-8];
$display( "ccl2SDMpcm_sample_luma %0d", outputdata );
outfifo.enq(SDMpcm_sample_luma outputdata);
tempreg <= tempreg-1;
numbitsused = 8;
nextstate = MacroblockLayer 2;
end
else
begin
tempreg <= 128;
nextstate = MacroblockLayer 3;
end
end
3:
begin
if( tempreg>0 )
begin
Bit#(8) outputdata = buffer[buffersize-1:buffersize-8];
$display( "ccl2SDMpcm_sample_chroma %0d", outputdata );
outfifo.enq(SDMpcm_sample_chroma outputdata);
tempreg <= tempreg-1;
numbitsused = 8;
nextstate = MacroblockLayer 3;
end
else
nextstate = SliceData 3;
end
4:
begin
if(sdmmbtype != I_NxN
&&& mbPartPredMode(sdmmbtype,0) != Intra_16x16
&&& numMbPart(sdmmbtype) == 4)
nextstate = SubMbPrediction 0;
else
nextstate = MbPrediction 0;
end
5:
begin
if(mbPartPredMode(sdmmbtype,0) != Intra_16x16)
begin
$display( "ccl2SDMcoded_block_pattern %0d", expgolomb_coded_block_pattern(buffer,sdmmbtype) );
////outfifo.enq(SDMcoded_block_pattern expgolomb_coded_block_pattern(buffer,sdmmbtype));
sdmcodedBlockPatternLuma <= expgolomb_coded_block_pattern(buffer,sdmmbtype)[3:0];
sdmcodedBlockPatternChroma <= expgolomb_coded_block_pattern(buffer,sdmmbtype)[5:4];
numbitsused = expgolomb_numbits(buffer);
end
else
begin
if(sdmmbtype matches tagged I_16x16 {intra16x16PredMode:.tempv1, codedBlockPatternChroma:.tempv2, codedBlockPatternLuma:.tempv3})
begin
sdmcodedBlockPatternLuma <= {tempv3,tempv3,tempv3,tempv3};
sdmcodedBlockPatternChroma <= tempv2;
end
else
$display( "ERROR EntropyDec: MacroblockLayer 5 sdmmbtype not I_16x16" );
end
nextstate = MacroblockLayer 6;
end
6:
begin
if(sdmcodedBlockPatternLuma > 0
|| sdmcodedBlockPatternChroma > 0
|| mbPartPredMode(sdmmbtype,0) == Intra_16x16)
begin
tempint = unpack(expgolomb_signed(buffer));
$display( "ccl2SDMmb_qp_delta %0d", tempint );
outfifo_ITB.enq(SDMmb_qp_delta truncate(expgolomb_signed(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = Residual 0;
end
else
nextstate = Residual 0;
end
default: $display( "ERROR EntropyDec: MacroblockLayer default step" );
endcase
end
tagged MbPrediction .step : //return to MacroblockLayer 5
begin
case ( step )
0:
begin
if(mbPartPredMode(sdmmbtype,0) == Intra_16x16)
begin
$display( "ccl2SDMMintra_chroma_pred_mode %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SDMMintra_chroma_pred_mode truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = MacroblockLayer 5;
end
else if(mbPartPredMode(sdmmbtype,0) == Intra_4x4)
begin
temp5bit <= 16;
nextstate = MbPrediction 1;
end
else if(num_ref_idx_l0_active_minus1 > 0)
begin
temp3bit0 <= numMbPart(sdmmbtype);
nextstate = MbPrediction 2;
end
else
begin
temp3bit0 <= numMbPart(sdmmbtype);
nextstate = MbPrediction 3;
end
end
1:
begin
if(temp5bit == 0)
begin
$display( "ccl2SDMMintra_chroma_pred_mode %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SDMMintra_chroma_pred_mode truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = MacroblockLayer 5;
end
else
begin
////$display( "ccl2SDMMprev_intra4x4_pred_mode_flag %0d", buffer[buffersize-1] );
if(buffer[buffersize-1] == 0)
begin
Bit#(4) tttt = buffer[buffersize-1:buffersize-4];
$display( "ccl2SDMMrem_intra4x4_pred_mode %0d", tttt );
outfifo.enq(SDMMrem_intra4x4_pred_mode tttt);
numbitsused = 4;
end
else
begin
outfifo.enq(SDMMrem_intra4x4_pred_mode 4'b1000);
numbitsused = 1;
end
temp5bit <= temp5bit-1;
nextstate = MbPrediction 1;
end
end
2:
begin
if(num_ref_idx_l0_active_minus1 == 1)
begin
$display( "ccl2SDMMref_idx_l0 %0d", 1-buffer[buffersize-1] );
outfifo.enq(SDMMref_idx_l0 zeroExtend(1-buffer[buffersize-1]));
numbitsused = 1;
end
else
begin
$display( "ccl2SDMMref_idx_l0 %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SDMMref_idx_l0 truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
end
if(temp3bit0 == 1)
begin
temp3bit0 <= numMbPart(sdmmbtype);
nextstate = MbPrediction 3;
end
else
begin
temp3bit0 <= temp3bit0-1;
nextstate = MbPrediction 2;
end
end
3:
begin
tempint = unpack(expgolomb_signed(buffer));
$display( "ccl2SDMMmvd_l0 %0d", tempint );
outfifo.enq(SDMMmvd_l0 truncate(expgolomb_signed(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = MbPrediction 4;
end
4:
begin
tempint = unpack(expgolomb_signed(buffer));
$display( "ccl2SDMMmvd_l0 %0d", tempint );
outfifo.enq(SDMMmvd_l0 truncate(expgolomb_signed(buffer)));
numbitsused = expgolomb_numbits(buffer);
temp3bit0 <= temp3bit0-1;
if(temp3bit0 == 1)
nextstate = MacroblockLayer 5;
else
nextstate = MbPrediction 3;
end
default: $display( "ERROR EntropyDec: MbPrediction default step" );
endcase
end
tagged SubMbPrediction .step : //return to MacroblockLayer 5
begin
case ( step )
0:
begin
$display( "ccl2SDMSsub_mb_type %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SDMSsub_mb_type truncate(expgolomb_unsigned(buffer)));
temp3bit0 <= numSubMbPart(truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = SubMbPrediction 1;
end
1:
begin
$display( "ccl2SDMSsub_mb_type %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SDMSsub_mb_type truncate(expgolomb_unsigned(buffer)));
temp3bit1 <= numSubMbPart(truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = SubMbPrediction 2;
end
2:
begin
$display( "ccl2SDMSsub_mb_type %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SDMSsub_mb_type truncate(expgolomb_unsigned(buffer)));
temp3bit2 <= numSubMbPart(truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = SubMbPrediction 3;
end
3:
begin
$display( "ccl2SDMSsub_mb_type %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SDMSsub_mb_type truncate(expgolomb_unsigned(buffer)));
temp3bit3 <= numSubMbPart(truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
if(num_ref_idx_l0_active_minus1 > 0
&& sdmmbtype != P_8x8ref0)
nextstate = SubMbPrediction 4;
else
nextstate = SubMbPrediction 8;
end
4:
begin
if(num_ref_idx_l0_active_minus1 == 1)
begin
$display( "ccl2SDMSref_idx_l0 %0d", 1-buffer[buffersize-1] );
outfifo.enq(SDMSref_idx_l0 zeroExtend(1-buffer[buffersize-1]));
numbitsused = 1;
end
else
begin
$display( "ccl2SDMSref_idx_l0 %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SDMSref_idx_l0 truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
end
nextstate = SubMbPrediction 5;
end
5:
begin
if(num_ref_idx_l0_active_minus1 == 1)
begin
$display( "ccl2SDMSref_idx_l0 %0d", 1-buffer[buffersize-1] );
outfifo.enq(SDMSref_idx_l0 zeroExtend(1-buffer[buffersize-1]));
numbitsused = 1;
end
else
begin
$display( "ccl2SDMSref_idx_l0 %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SDMSref_idx_l0 truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
end
nextstate = SubMbPrediction 6;
end
6:
begin
if(num_ref_idx_l0_active_minus1 == 1)
begin
$display( "ccl2SDMSref_idx_l0 %0d", 1-buffer[buffersize-1] );
outfifo.enq(SDMSref_idx_l0 zeroExtend(1-buffer[buffersize-1]));
numbitsused = 1;
end
else
begin
$display( "ccl2SDMSref_idx_l0 %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SDMSref_idx_l0 truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
end
nextstate = SubMbPrediction 7;
end
7:
begin
if(num_ref_idx_l0_active_minus1 == 1)
begin
$display( "ccl2SDMSref_idx_l0 %0d", 1-buffer[buffersize-1] );
outfifo.enq(SDMSref_idx_l0 zeroExtend(1-buffer[buffersize-1]));
numbitsused = 1;
end
else
begin
$display( "ccl2SDMSref_idx_l0 %0d", expgolomb_unsigned(buffer) );
outfifo.enq(SDMSref_idx_l0 truncate(expgolomb_unsigned(buffer)));
numbitsused = expgolomb_numbits(buffer);
end
nextstate = SubMbPrediction 8;
end
8:
begin
tempint = unpack(expgolomb_signed(buffer));
$display( "ccl2SDMSmvd_l0 %0d", tempint );
outfifo.enq(SDMSmvd_l0 truncate(expgolomb_signed(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = SubMbPrediction 9;
end
9:
begin
tempint = unpack(expgolomb_signed(buffer));
$display( "ccl2SDMSmvd_l0 %0d", tempint );
outfifo.enq(SDMSmvd_l0 truncate(expgolomb_signed(buffer)));
numbitsused = expgolomb_numbits(buffer);
temp3bit0 <= temp3bit0-1;
if(temp3bit0 == 1)
nextstate = SubMbPrediction 10;
else
nextstate = SubMbPrediction 8;
end
10:
begin
tempint = unpack(expgolomb_signed(buffer));
$display( "ccl2SDMSmvd_l0 %0d", tempint );
outfifo.enq(SDMSmvd_l0 truncate(expgolomb_signed(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = SubMbPrediction 11;
end
11:
begin
tempint = unpack(expgolomb_signed(buffer));
$display( "ccl2SDMSmvd_l0 %0d", tempint );
outfifo.enq(SDMSmvd_l0 truncate(expgolomb_signed(buffer)));
numbitsused = expgolomb_numbits(buffer);
temp3bit1 <= temp3bit1-1;
if(temp3bit1 == 1)
nextstate = SubMbPrediction 12;
else
nextstate = SubMbPrediction 10;
end
12:
begin
tempint = unpack(expgolomb_signed(buffer));
$display( "ccl2SDMSmvd_l0 %0d", tempint );
outfifo.enq(SDMSmvd_l0 truncate(expgolomb_signed(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = SubMbPrediction 13;
end
13:
begin
tempint = unpack(expgolomb_signed(buffer));
$display( "ccl2SDMSmvd_l0 %0d", tempint );
outfifo.enq(SDMSmvd_l0 truncate(expgolomb_signed(buffer)));
numbitsused = expgolomb_numbits(buffer);
temp3bit2 <= temp3bit2-1;
if(temp3bit2 == 1)
nextstate = SubMbPrediction 14;
else
nextstate = SubMbPrediction 12;
end
14:
begin
tempint = unpack(expgolomb_signed(buffer));
$display( "ccl2SDMSmvd_l0 %0d", tempint );
outfifo.enq(SDMSmvd_l0 truncate(expgolomb_signed(buffer)));
numbitsused = expgolomb_numbits(buffer);
nextstate = SubMbPrediction 15;
end
15:
begin
tempint = unpack(expgolomb_signed(buffer));
$display( "ccl2SDMSmvd_l0 %0d", tempint );
outfifo.enq(SDMSmvd_l0 truncate(expgolomb_signed(buffer)));
numbitsused = expgolomb_numbits(buffer);
temp3bit3 <= temp3bit3-1;
if(temp3bit3 == 1)
nextstate = MacroblockLayer 5;
else
nextstate = SubMbPrediction 14;
end
default: $display( "ERROR EntropyDec: SubMbPrediction default step" );
endcase
end
tagged Residual .step : //return to SliceData 3
begin
case ( step )
0:
begin
residualChroma <= 0;
temp5bit <= 0;
if(mbPartPredMode(sdmmbtype,0) == Intra_16x16)
begin
maxNumCoeff <= 16;
nextstate = ResidualBlock 0;
end
else
nextstate = Residual 1;
//$display( "TRACE EntropyDec: Residual 0" );
end
1:
begin
if(temp5bit == 16)
begin
residualChroma <= 1;
temp5bit <= 0;
nextstate = Residual 3;
end
else
begin
Bit#(5) tempMaxNumCoeff = 0;
if(mbPartPredMode(sdmmbtype,0) == Intra_16x16)
tempMaxNumCoeff = 15;
else
tempMaxNumCoeff = 16;
maxNumCoeff <= tempMaxNumCoeff;
if((sdmcodedBlockPatternLuma & (1 << zeroExtend(temp5bit[3:2]))) == 0)
begin
calcnc.nNupdate_luma(truncate(temp5bit),0);
////$display( "ccl2SDMRcoeffLevelZeros %0d", tempMaxNumCoeff );
outfifo_ITB.enq(SDMRcoeffLevelZeros tempMaxNumCoeff);
temp5bit <= temp5bit+1;
nextstate = Residual 1;
end
else
nextstate = ResidualBlock 0;
end
//$display( "TRACE EntropyDec: Residual 1" );
end
3:
begin
if(temp5bit == 2)
begin
temp5bit <= 0;
nextstate = Residual 5;
end
else
begin
maxNumCoeff <= 4;
if((sdmcodedBlockPatternChroma & 3) == 0)
begin
////$display( "ccl2SDMRcoeffLevelZeros %0d", 4 );
outfifo_ITB.enq(SDMRcoeffLevelZeros 4);
temp5bit <= temp5bit+1;
nextstate = Residual 3;
end
else
nextstate = ResidualBlock 0;
end
//$display( "TRACE EntropyDec: Residual 3" );
end
5:
begin
if(temp5bit == 8)
begin
temp5bit <= 0;
nextstate = SliceData 3;
end
else
begin
maxNumCoeff <= 15;
if((sdmcodedBlockPatternChroma & 2) == 0)
begin
calcnc.nNupdate_chroma(truncate(temp5bit),0);
////$display( "ccl2SDMRcoeffLevelZeros %0d", 15 );
outfifo_ITB.enq(SDMRcoeffLevelZeros 15);
temp5bit <= temp5bit+1;
nextstate = Residual 5;
end
else
nextstate = ResidualBlock 0;
end
//$display( "TRACE EntropyDec: Residual 5" );
end
default: $display( "ERROR EntropyDec: Residual default step" );
endcase
end
tagged ResidualBlock .step : //if(residualChroma==0) return to Residual 1; else if(maxNumCoeff==4) return to Residual 3; else return to Residual 5
begin//don't modify maxNumCoeff, residualChroma, and increment temp5bit on return
case ( step )
0:
begin
cavlcFIFO.clear();
if(maxNumCoeff != 4)
begin
if(residualChroma == 0)
tempreg <= zeroExtend(calcnc.nCcalc_luma(truncate(temp5bit)));
else
tempreg <= zeroExtend(calcnc.nCcalc_chroma(truncate(temp5bit)));
end
else
tempreg <= zeroExtend(6'b111111);
nextstate = ResidualBlock 1;
//$display( "TRACE EntropyDec: ResidualBlock 0 temp5bit = %0d", temp5bit);
end
1:
begin
Bit#(2) trailingOnesTemp = 0;
Bit#(5) totalCoeffTemp = 0;
{trailingOnesTemp,totalCoeffTemp,numbitsused} = cavlc_coeff_token( buffer, truncate(tempreg) );
temp3bit0 <= zeroExtend(trailingOnesTemp);//trailingOnes
totalCoeff <= totalCoeffTemp;
if(residualChroma == 0 && !(mbPartPredMode(sdmmbtype,0)==Intra_16x16 && maxNumCoeff==16))
calcnc.nNupdate_luma(truncate(temp5bit),totalCoeffTemp);
else if(residualChroma == 1 && maxNumCoeff != 4)
calcnc.nNupdate_chroma(truncate(temp5bit),totalCoeffTemp);
temp5bit2 <= 0;//i
tempreg <= 0;//levelCode temp
if(totalCoeffTemp > 10 && trailingOnesTemp < 3)
temp3bit1 <= 1;//suffixLength
else
temp3bit1 <= 0;//suffixLength
nextstate = ResidualBlock 2;
//$display( "TRACE EntropyDec: ResidualBlock 1 nC = %0d", tempreg);
$display( "ccl2SDMRtotal_coeff %0d", totalCoeffTemp );
$display( "ccl2SDMRtrailing_ones %0d", trailingOnesTemp );
end
2:
begin
if( totalCoeff != 0 )
begin
if(temp5bit2 < zeroExtend(temp3bit0))
begin
if(buffer[buffersize-1] == 1)
cavlcFIFO.enq(-1);
else
cavlcFIFO.enq(1);
numbitsused = 1;
end
else
begin
Bit#(32) buffertempshow = buffer[buffersize-1:buffersize-32];
Bit#(3) suffixLength = temp3bit1;
Bit#(4) levelSuffixSize = zeroExtend(suffixLength);
Bit#(4) level_prefix = cavlc_level_prefix( buffer );
Bit#(5) temp_level_prefix = zeroExtend(level_prefix);
Bit#(28) tempbuffer = buffer[buffersize-1:buffersize-28] << zeroExtend(temp_level_prefix+1);
Bit#(14) levelCode = zeroExtend(level_prefix) << zeroExtend(suffixLength);
if(level_prefix == 14 && suffixLength == 0)
levelSuffixSize = 4;
else if(level_prefix == 15)
levelSuffixSize = 12;
levelCode = levelCode + zeroExtend(tempbuffer[27:16] >> (12-zeroExtend(levelSuffixSize)));//level_suffix
if(level_prefix == 15 && suffixLength == 0)
levelCode = levelCode + 15;
if(temp5bit2 == zeroExtend(temp3bit0) && temp3bit0 < 3)
levelCode = levelCode + 2;
if(suffixLength == 0)
suffixLength = 1;
if( suffixLength < 6 && ((levelCode+2) >> 1) > (3 << zeroExtend(suffixLength-1)) )
suffixLength = suffixLength+1;
if(levelCode[0] == 0)
cavlcFIFO.enq(truncate((levelCode+2) >> 1));
else
cavlcFIFO.enq(truncate((~levelCode) >> 1));
if(levelCode[0] == 0)//////////////////////////////////////////////////
begin
tempint = signExtend(unpack((levelCode+2) >> 1));
//$display( "TRACE EntropyDec: temp level %0d", tempint );
end
else
begin
Bit#(13) tempinttemp = truncate((~levelCode) >> 1);
tempint = signExtend(unpack(tempinttemp));
//$display( "TRACE EntropyDec: temp level %0d", tempint );
end///////////////////////////////////////////////////////////////////////
temp3bit1 <= suffixLength;
numbitsused = zeroExtend(level_prefix)+1+zeroExtend(levelSuffixSize);
end
end
if( totalCoeff==0 || temp5bit2+1==totalCoeff )
begin
temp5bit2 <= 0;
zerosLeft <= 0;
if(totalCoeff < maxNumCoeff)
nextstate = ResidualBlock 3;
else
nextstate = ResidualBlock 5;
end
else
begin
temp5bit2 <= temp5bit2 + 1;
nextstate = ResidualBlock 2;
end
end
3:
begin
Bit#(4) tempZerosLeft;
if(totalCoeff > 0)
begin
{tempZerosLeft,numbitsused} = cavlc_total_zeros( buffer, truncate(totalCoeff), maxNumCoeff);
$display( "ccl2SDMRtotal_zeros %0d", tempZerosLeft );//////////////////////////////////////
end
else
tempZerosLeft = 0;
zerosLeft <= tempZerosLeft;
if(maxNumCoeff - totalCoeff - zeroExtend(tempZerosLeft) > 0)
begin
$display( "ccl2SDMRcoeffLevelZeros %0d", maxNumCoeff - totalCoeff - zeroExtend(tempZerosLeft) );
outfifo_ITB.enq(SDMRcoeffLevelZeros (maxNumCoeff - totalCoeff - zeroExtend(tempZerosLeft)));
end
nextstate = ResidualBlock 5;
end
5:
begin
if( totalCoeff > 0 )
begin
tempint = signExtend(unpack(cavlcFIFO.first()));
$display( "ccl2SDMRcoeffLevel %0d", tempint );
if( zerosLeft > 0 )
begin
Bit#(4) run_before = 0;
if( totalCoeff > 1 )
{run_before,numbitsused} = cavlc_run_before( buffer, zerosLeft);
else
run_before = zerosLeft;
zerosLeft <= zerosLeft - run_before;
outfifo_ITB.enq(SDMRcoeffLevelPlusZeros {level:cavlcFIFO.first(),zeros:zeroExtend(run_before)});
if( run_before > 0 )
$display( "ccl2SDMRcoeffLevelZeros %0d", run_before );
end
else
outfifo_ITB.enq(SDMRcoeffLevelPlusZeros {level:cavlcFIFO.first(),zeros:0});
cavlcFIFO.deq();
totalCoeff <= totalCoeff-1;
end
if( totalCoeff <= 1 )
begin
if(!(mbPartPredMode(sdmmbtype,0)==Intra_16x16 && maxNumCoeff==16))
temp5bit <= temp5bit+1;
if(residualChroma==0)
nextstate = Residual 1;
else if(maxNumCoeff==4)
nextstate = Residual 3;
else
nextstate = Residual 5;
end
else
nextstate = ResidualBlock 5;
end
default: $display( "ERROR EntropyDec: ResidualBlock default step" );
endcase
end
endcase
if(numbitsused+1 > bufcount)
begin
$display( "ERROR EntropyDec: not enough bits in buffer" );
nextstate = Start;
end
buffer <= buffer << zeroExtend(numbitsused);
bufcount <= bufcount-numbitsused;
state <= nextstate;
endrule
interface Put ioin = fifoToPut(infifo);
interface Get ioout = fifoToGet(outfifo);
interface Get ioout_InverseTrans = fifoToGet(outfifo_ITB);
 
interface mem_client = calcnc.mem_client;
endmodule
 
endpackage

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.