URL
https://opencores.org/ocsvn/bluespec-h264/bluespec-h264/trunk
Subversion Repositories bluespec-h264
Compare Revisions
- This comparison shows the changes necessary to convert path
/
- from Rev 1 to Rev 2
- ↔ Reverse comparison
Rev 1 → Rev 2
/trunk/build/Makefile
0,0 → 1,106
#======================================================================= |
# 6.375 Makefile for bsc-compile |
#----------------------------------------------------------------------- |
# $Id: Makefile,v 1.1 2008-06-26 17:46:03 jamey.hicks Exp $ |
# |
|
default : all |
|
basedir = ../ |
|
#-------------------------------------------------------------------- |
# Sources |
#-------------------------------------------------------------------- |
|
# Library components |
|
bsvclibdir = $(MIT6375_HOME)/install/bsvclib |
bsvclibsrcs = \ |
|
# Bluespec sources |
|
toplevel_module = mkTH |
|
srcdir = $(basedir)/src |
bsvsrcs = \ |
$(srcdir)/BRAM.bsv \ |
$(srcdir)/H264Types.bsv \ |
$(srcdir)/ExpGolomb.bsv \ |
$(srcdir)/CAVLC.bsv \ |
$(srcdir)/IH264.bsv \ |
$(srcdir)/IInputGen.bsv \ |
$(srcdir)/INalUnwrap.bsv \ |
$(srcdir)/IEntropyDec.bsv \ |
$(srcdir)/ICalc_nC.bsv \ |
$(srcdir)/IMemED.bsv \ |
$(srcdir)/IInverseTrans.bsv \ |
$(srcdir)/IPrediction.bsv \ |
$(srcdir)/IInterpolator.bsv \ |
$(srcdir)/IDeblockFilter.bsv \ |
$(srcdir)/IBufferControl.bsv \ |
$(srcdir)/IFrameBuffer.bsv \ |
$(srcdir)/IFinalOutput.bsv \ |
$(srcdir)/mkH264.bsv \ |
$(srcdir)/mkInputGen.bsv \ |
$(srcdir)/mkNalUnwrap.bsv \ |
$(srcdir)/mkEntropyDec.bsv \ |
$(srcdir)/mkCalc_nC.bsv \ |
$(srcdir)/mkMemED.bsv \ |
$(srcdir)/mkInverseTrans.bsv \ |
$(srcdir)/mkPrediction.bsv \ |
$(srcdir)/mkInterpolator.bsv \ |
$(srcdir)/mkDeblockFilter.bsv \ |
$(srcdir)/mkBufferControl.bsv \ |
$(srcdir)/mkFrameBuffer.bsv \ |
$(srcdir)/mkFinalOutput.bsv \ |
$(srcdir)/mkTH.bsv \ |
|
#-------------------------------------------------------------------- |
# Build rules |
#-------------------------------------------------------------------- |
|
BSC_COMP = bsc |
#BSC_OPTS = -u -show-module-use -verilog -keep-fires -aggressive-conditions \ |
# -relax-method-earliness -relax-method-urgency -v |
|
BSC_OPTS = -u -v -verilog -aggressive-conditions |
|
# Copy over the bluespec source |
|
$(notdir $(bsvsrcs)) : % : $(srcdir)/% |
cp $< . |
|
$(notdir $(bsvclibsrcs)) : % : $(bsvclibdir)/% |
cp $< . |
|
# Run the bluespec compiler |
|
bsv_TH_vsrc = $(toplevel_module).v |
$(bsv_TH_vsrc) $(bsv_lib_use) : $(notdir $(bsvsrcs) $(bsvclibsrcs)) |
$(BSC_COMP) $(BSC_OPTS) -g $(toplevel_module) $(toplevel_module).bsv > out.log |
|
compile : $(toplevel_module).v |
|
# Create a schedule file |
|
schedule_rpt = schedule.rpt |
$(schedule_rpt) : $(notdir $(bsvsrcs) $(bsvclibsrcs)) |
rm -rf *.v |
$(BSC_COMP) $(BSC_OPTS) -show-schedule -show-rule-rel \* \* -g $(toplevel_module) \ |
$(toplevel_module).bsv >& $(schedule_rpt) |
|
junk += $(notdir $(bsvsrcs)) $(notdir $(bsvclibsrcs)) \ |
$(schedule_rpt) *.use *.bi *.bo *.v bsc.log |
|
#-------------------------------------------------------------------- |
# Default make target |
#-------------------------------------------------------------------- |
|
all : compile |
|
#-------------------------------------------------------------------- |
# Clean up |
#-------------------------------------------------------------------- |
|
clean : |
rm -rf $(junk) *~ \#* |
/trunk/src/mkInputGen_akiyo224nodeblock.bsv
0,0 → 1,41
//********************************************************************** |
// Input Generator implementation |
//---------------------------------------------------------------------- |
// |
// |
|
package mkInputGen; |
|
import H264Types::*; |
import IInputGen::*; |
import RegFile::*; |
import FIFO::*; |
|
import Connectable::*; |
import GetPut::*; |
|
|
module mkInputGen( IInputGen ); |
|
RegFile#(Bit#(27), Bit#(8)) rfile <- mkRegFileLoad("akiyo224x176_1-300_no_deblock.hex", 0, 130234); |
|
FIFO#(InputGenOT) outfifo <- mkFIFO; |
Reg#(Bit#(27)) index <- mkReg(0); |
|
rule output_byte (index < 130235); |
//$display( "ccl0inputbyte %x", rfile.sub(index) ); |
outfifo.enq(DataByte rfile.sub(index)); |
index <= index+1; |
endrule |
|
rule end_of_file (index == 130235); |
//$finish(0); |
outfifo.enq(EndOfFile); |
endrule |
|
interface Get ioout = fifoToGet(outfifo); |
|
endmodule |
|
|
endpackage |
/trunk/src/mkInputGen_intersquid.bsv
0,0 → 1,41
//********************************************************************** |
// Input Generator implementation |
//---------------------------------------------------------------------- |
// |
// |
|
package mkInputGen; |
|
import H264Types::*; |
import IInputGen::*; |
import RegFile::*; |
import FIFO::*; |
|
import Connectable::*; |
import GetPut::*; |
|
|
module mkInputGen( IInputGen ); |
|
RegFile#(Bit#(27), Bit#(8)) rfile <- mkRegFileLoad("inter_squid.hex", 0, 4376240); |
|
FIFO#(InputGenOT) outfifo <- mkFIFO; |
Reg#(Bit#(27)) index <- mkReg(0); |
|
rule output_byte (index < 4376241); |
//$display( "ccl0inputbyte %x", rfile.sub(index) ); |
outfifo.enq(DataByte rfile.sub(index)); |
index <= index+1; |
endrule |
|
rule end_of_file (index == 4376241); |
//$finish(0); |
outfifo.enq(EndOfFile); |
endrule |
|
interface Get ioout = fifoToGet(outfifo); |
|
endmodule |
|
|
endpackage |
/trunk/src/mkPrediction.bsv
0,0 → 1,2189
//********************************************************************** |
// Prediction |
//---------------------------------------------------------------------- |
// |
// |
|
package mkPrediction; |
|
import H264Types::*; |
|
import IPrediction::*; |
import IInterpolator::*; |
import mkInterpolator::*; |
import FIFO::*; |
import FIFOF::*; |
import Vector::*; |
|
import Connectable::*; |
import GetPut::*; |
import ClientServer::*; |
|
|
//----------------------------------------------------------- |
// Local Datatypes |
//----------------------------------------------------------- |
|
typedef union tagged |
{ |
void Intra; //Intra non-4x4 |
void Intra4x4; |
void Inter; |
} |
OutState deriving(Eq,Bits); |
|
typedef union tagged |
{ |
void Start; //not working on anything in particular |
void Intra16x16; |
void Intra4x4; |
void IntraPCM; |
} |
IntraState deriving(Eq,Bits); |
|
typedef union tagged |
{ |
void Start; //not working on anything in particular |
void InterP16x16; |
void InterP16x8; |
void InterP8x16; |
void InterP8x8; |
void InterP8x8ref0; |
void InterPskip; |
} |
InterState deriving(Eq,Bits); |
|
typedef union tagged |
{ |
Bit#(1) NotInter;//0 for not available, 1 for intra-coded |
struct {Bit#(4) refIdx; Bit#(14) mvhor; Bit#(12) mvver; Bit#(1) nonZeroTransCoeff;} BlockMv; |
} |
InterBlockMv deriving(Eq,Bits); |
|
typedef union tagged |
{ |
void SkipMB; |
void NonSkipMB; |
void Intra4x4; |
void Intra4x4PlusChroma; |
} |
NextOutput deriving(Eq,Bits); |
|
|
|
//----------------------------------------------------------- |
// Helper functions |
|
function Bit#(8) intra4x4SelectTop( Bit#(72) valVector, Bit#(4) idx ); |
case(idx) |
0: return valVector[15:8]; |
1: return valVector[23:16]; |
2: return valVector[31:24]; |
3: return valVector[39:32]; |
4: return valVector[47:40]; |
5: return valVector[55:48]; |
6: return valVector[63:56]; |
7: return valVector[71:64]; |
default: return valVector[7:0]; |
endcase |
endfunction |
|
function Bit#(8) intra4x4SelectLeft( Bit#(40) valVector, Bit#(3) idx ); |
case(idx) |
0: return valVector[15:8]; |
1: return valVector[23:16]; |
2: return valVector[31:24]; |
3: return valVector[39:32]; |
default: return valVector[7:0]; |
endcase |
endfunction |
|
function Bit#(8) select32to8( Bit#(32) valVector, Bit#(2) idx ); |
case(idx) |
0: return valVector[7:0]; |
1: return valVector[15:8]; |
2: return valVector[23:16]; |
3: return valVector[31:24]; |
endcase |
endfunction |
|
function Bit#(8) select16to8( Bit#(16) valVector, Bit#(1) idx ); |
case(idx) |
0: return valVector[7:0]; |
1: return valVector[15:8]; |
endcase |
endfunction |
|
function Bool absDiffGEFour14( Bit#(14) val1, Bit#(14) val2 ); |
Int#(15) int1 = unpack(signExtend(val1)); |
Int#(15) int2 = unpack(signExtend(val2)); |
if(int1>=int2) |
return (int1 >= (int2+4)); |
else |
return (int2 >= (int1+4)); |
endfunction |
|
function Bool absDiffGEFour12( Bit#(12) val1, Bit#(12) val2 ); |
Int#(13) int1 = unpack(signExtend(val1)); |
Int#(13) int2 = unpack(signExtend(val2)); |
if(int1>=int2) |
return (int1 >= (int2+4)); |
else |
return (int2 >= (int1+4)); |
endfunction |
|
|
//----------------------------------------------------------- |
// Prediction Module |
//----------------------------------------------------------- |
|
|
(* synthesize *) |
module mkPrediction( IPrediction ); |
|
//Common state |
FIFO#(EntropyDecOT) infifo <- mkSizedFIFO(prediction_infifo_size); |
FIFO#(InverseTransOT) infifo_ITB <- mkSizedFIFO(prediction_infifo_ITB_size); |
FIFO#(EntropyDecOT) outfifo <- mkFIFO; |
Reg#(Bool) passFlag <- mkReg(True); |
Reg#(Bit#(4)) blockNum <- mkReg(0); |
Reg#(Bit#(4)) pixelNum <- mkReg(0); |
|
Reg#(Bit#(PicWidthSz)) picWidth <- mkReg(maxPicWidthInMB); |
Reg#(Bit#(PicHeightSz)) picHeight <- mkReg(0); |
Reg#(Bit#(PicAreaSz)) firstMb <- mkReg(0); |
Reg#(Bit#(PicAreaSz)) currMb <- mkReg(0); |
Reg#(Bit#(PicAreaSz)) currMbHor <- mkReg(0);//horizontal position of currMb |
Reg#(Bit#(PicHeightSz)) currMbVer <- mkReg(0);//vertical position of currMb |
|
FIFOF#(OutState) outstatefifo <- mkFIFOF; |
FIFOF#(NextOutput) nextoutputfifo <- mkFIFOF; |
Reg#(Bit#(4)) outBlockNum <- mkReg(0); |
Reg#(Bit#(4)) outPixelNum <- mkReg(0); |
FIFO#(Vector#(4,Bit#(8))) predictedfifo <- mkSizedFIFO(prediction_predictedfifo_size); |
Reg#(Bit#(1)) outChromaFlag <- mkReg(0); |
Reg#(Bool) outFirstQPFlag <- mkReg(False); |
|
DoNotFire donotfire <- mkDoNotFire(); |
|
//Reg#(Vector#(16,Bit#(8))) workVector <- mkRegU(); |
|
//Inter state |
Interpolator interpolator <- mkInterpolator(); |
Reg#(InterState) interstate <- mkReg(Start); |
Reg#(Bit#(PicAreaSz)) interPskipCount <- mkReg(0); |
Reg#(Vector#(5,InterBlockMv)) interTopVal <- mkRegU(); |
Reg#(Vector#(4,InterBlockMv)) interLeftVal <- mkRegU(); |
Reg#(Vector#(4,InterBlockMv)) interTopLeftVal <- mkRegU(); |
FIFO#(MemReq#(TAdd#(PicWidthSz,2),32)) interMemReqQ <- mkFIFO; |
Reg#(MemReq#(TAdd#(PicWidthSz,2),32)) interMemReqQdelay <- mkRegU(); |
FIFO#(MemResp#(32)) interMemRespQ <- mkFIFO; |
Reg#(Bit#(3)) interReqCount <- mkReg(0); |
Reg#(Bit#(3)) interRespCount <- mkReg(0); |
|
Reg#(Bit#(1)) interStepCount <- mkReg(0); |
Reg#(Bit#(2)) interMbPartNum <- mkReg(0); |
Reg#(Bit#(2)) interSubMbPartNum <- mkReg(0); |
Reg#(Bit#(2)) interPassingCount <- mkReg(0); |
Reg#(Vector#(4,Bit#(4))) interRefIdxVector <- mkRegU(); |
Reg#(Vector#(4,Bit#(2))) interSubMbTypeVector <- mkRegU(); |
RFile1#(Bit#(4),Tuple2#(Bit#(14),Bit#(12))) interMvFile <- mkRFile1Full(); |
Reg#(Bit#(15)) interMvDiffTemp <- mkReg(0); |
FIFO#(Tuple2#(Bit#(15),Bit#(13))) interMvDiff <- mkFIFO; |
Reg#(Bit#(5)) interNewestMv <- mkReg(0); |
|
Reg#(Bit#(2)) interIPStepCount <- mkReg(0); |
Reg#(Bit#(2)) interIPMbPartNum <- mkReg(0); |
Reg#(Bit#(2)) interIPSubMbPartNum <- mkReg(0); |
|
Reg#(Bit#(PicWidthSz)) interCurrMbDiff <- mkReg(0); |
|
Reg#(Vector#(4,Bool)) interTopNonZeroTransCoeff <- mkRegU(); |
Reg#(Vector#(4,Bool)) interLeftNonZeroTransCoeff <- mkRegU(); |
FIFO#(Tuple2#(Bit#(2),Bit#(2))) interBSfifo <- mkSizedFIFO(32); |
Reg#(Bool) interBSoutput <- mkReg(True); |
FIFO#(InterBlockMv) interOutBlockMvfifo <- mkSizedFIFO(8); |
|
|
//Intra state |
Reg#(IntraState) intrastate <- mkReg(Start); |
Reg#(Bit#(1)) intraChromaFlag <- mkReg(0); |
FIFO#(MemReq#(TAdd#(PicWidthSz,2),68)) intraMemReqQ <- mkFIFO; |
Reg#(MemReq#(TAdd#(PicWidthSz,2),68)) intraMemReqQdelay <- mkRegU; |
FIFO#(MemResp#(68)) intraMemRespQ <- mkFIFO; |
Reg#(Vector#(4,Bit#(4))) intra4x4typeLeft <- mkRegU();//15=unavailable, 14=inter-MB, 13=intra-non-4x4 |
Reg#(Vector#(4,Bit#(4))) intra4x4typeTop <- mkRegU();//15=unavailable, 14=inter-MB, 13=intra-non-4x4 |
Reg#(Bit#(1)) ppsconstrained_intra_pred_flag <- mkReg(0); |
Reg#(Vector#(4,Bit#(40))) intraLeftVal <- mkRegU(); |
Reg#(Vector#(9,Bit#(8))) intraLeftValChroma0 <- mkRegU(); |
Reg#(Vector#(9,Bit#(8))) intraLeftValChroma1 <- mkRegU(); |
Reg#(Vector#(5,Bit#(32))) intraTopVal <- mkRegU(); |
Reg#(Vector#(4,Bit#(16))) intraTopValChroma0 <- mkRegU(); |
Reg#(Vector#(4,Bit#(16))) intraTopValChroma1 <- mkRegU(); |
Reg#(Bit#(32)) intraLeftValNext <- mkReg(0); |
Reg#(Bit#(2)) intra16x16_pred_mode <- mkReg(0); |
FIFO#(Bit#(4)) rem_intra4x4_pred_mode <- mkSizedFIFO(16); |
FIFO#(Bit#(2)) intra_chroma_pred_mode <- mkFIFO; |
Reg#(Bit#(4)) cur_intra4x4_pred_mode <- mkReg(0); |
Reg#(Bit#(1)) intraChromaTopAvailable <- mkReg(0); |
Reg#(Bit#(1)) intraChromaLeftAvailable <- mkReg(0); |
|
Reg#(Bit#(3)) intraReqCount <- mkReg(0); |
Reg#(Bit#(3)) intraRespCount <- mkReg(0); |
Reg#(Bit#(4)) intraStepCount <- mkReg(0); |
Reg#(Bit#(13)) intraSumA <- mkReg(0); |
Reg#(Bit#(15)) intraSumB <- mkReg(0); |
Reg#(Bit#(15)) intraSumC <- mkReg(0); |
|
|
|
//----------------------------------------------------------- |
// Rules |
|
////////////////////////////////////////////////////////////////////////////// |
// rule stateMonitor ( True ); |
// if(predictedfifo.notEmpty()) |
// $display( "TRACE Prediction: stateMonitor predictedfifo.first() %0d", predictedfifo.first());//////////////////// |
// if(infifo.first() matches tagged ITBresidual .xdata) |
// $display( "TRACE Prediction: stateMonitor infifo.first() %0d", xdata);//////////////////// |
// if(infifo.first() matches tagged ITBresidual .xdata) |
// $display( "TRACE Prediction: stateMonitor outBlockNum outPixelNum outChromaFlag %0d %0d", outBlockNum, outPixelNum, outChromaFlag);//////////////////// |
// endrule |
////////////////////////////////////////////////////////////////////////////// |
|
|
rule checkFIFO ( True ); |
$display( "Trace Prediction: checkFIFO %h", infifo_ITB.first() ); |
endrule |
|
rule passing ( passFlag && !outstatefifo.notEmpty() && currMbHor<zeroExtend(picWidth) ); |
$display( "Trace Prediction: passing infifo packed %h", pack(infifo.first())); |
case (infifo.first()) matches |
tagged NewUnit . xdata : |
begin |
infifo.deq(); |
outfifo.enq(infifo.first()); |
$display("ccl4newunit"); |
$display("ccl4rbspbyte %h", xdata); |
end |
tagged SPSpic_width_in_mbs .xdata : |
begin |
infifo.deq(); |
outfifo.enq(infifo.first()); |
picWidth <= xdata; |
interpolator.setPicWidth(xdata); |
end |
tagged SPSpic_height_in_map_units .xdata : |
begin |
infifo.deq(); |
outfifo.enq(infifo.first()); |
picHeight <= xdata; |
interpolator.setPicHeight(xdata); |
end |
tagged PPSconstrained_intra_pred_flag .xdata : |
begin |
infifo.deq(); |
////outfifo.enq(infifo.first()); |
ppsconstrained_intra_pred_flag <= xdata; |
end |
tagged SHfirst_mb_in_slice .xdata : |
begin |
infifo.deq(); |
outfifo.enq(infifo.first()); |
firstMb <= xdata; |
currMb <= xdata; |
currMbHor <= xdata; |
currMbVer <= 0; |
intra4x4typeLeft <= replicate(15); |
interTopLeftVal <= replicate(NotInter 0); |
if(xdata==0) |
interLeftVal <= replicate(NotInter 0); |
outFirstQPFlag <= True; |
end |
tagged SDmb_skip_run .xdata : passFlag <= False; |
tagged SDMmbtype .xdata : passFlag <= False; |
tagged EndOfFile : |
begin |
infifo.deq(); |
outfifo.enq(infifo.first()); |
$display( "INFO Prediction: EndOfFile reached" ); |
//$finish(0);//////////////////////////////// |
end |
default: |
begin |
infifo.deq(); |
outfifo.enq(infifo.first()); |
end |
endcase |
endrule |
|
|
rule inputing ( !passFlag ); |
$display( "Trace Prediction: inputing infifo packed %h", pack(infifo.first())); |
case (infifo.first()) matches |
tagged SDmb_skip_run .xdata : |
begin |
if(interstate==Start && intrastate==Start) |
begin |
if(interPskipCount < xdata) |
begin |
if(!outstatefifo.notEmpty() || interCurrMbDiff<picWidth-1) |
begin |
$display( "Trace Prediction: passing SDmb_skip_run %0d", xdata); |
outstatefifo.enq(Inter); |
interstate <= InterPskip; |
interReqCount <= 1; |
interRespCount <= 1; |
intra4x4typeLeft <= replicate(14); |
intra4x4typeTop <= replicate(14); |
interTopLeftVal <= update(interTopLeftVal , 0, (NotInter 0)); |
interTopVal <= replicate(NotInter 0); |
interPskipCount <= interPskipCount+1; |
interNewestMv <= 0; |
interRefIdxVector <= replicate(0); |
interCurrMbDiff <= interCurrMbDiff+1; |
nextoutputfifo.enq(SkipMB); |
end |
else |
donotfire.doNotFire(); |
end |
else |
begin |
$display( "Trace Prediction: passing no SDmb_skip_run"); |
interPskipCount <= 0; |
infifo.deq(); |
end |
end |
else |
donotfire.doNotFire(); |
end |
tagged SDMmbtype .xdata : |
begin |
if(interstate==Start && intrastate==Start)//not necessary (just need to keep inter from feeding predictedfifo or change intra state until intrastate==Start) |
begin |
infifo.deq(); |
$display( "INFO Prediction: SDMmbtype %0d", xdata); |
if(mbPartPredMode(xdata,0)==Intra_16x16) |
begin |
if(!outstatefifo.notEmpty()) |
begin |
outstatefifo.enq(Intra); |
intrastate <= Intra16x16; |
if(xdata matches tagged I_16x16 {intra16x16PredMode:.tempv1, codedBlockPatternChroma:.tempv2, codedBlockPatternLuma:.tempv3}) |
intra16x16_pred_mode <= tempv1; |
else |
$display( "ERROR Prediction: MacroblockLayer 5 sdmmbtype not I_16x16" ); |
intraReqCount <= 1; |
intraRespCount <= 1; |
interTopLeftVal <= replicate(NotInter 1); |
interLeftVal <= replicate(NotInter 1); |
interTopVal <= replicate(NotInter 1); |
end |
else |
donotfire.doNotFire(); |
end |
else if(xdata==I_NxN) |
begin |
if(!outstatefifo.notEmpty()) |
begin |
outstatefifo.enq(Intra4x4); |
intrastate <= Intra4x4; |
intraReqCount <= 1; |
intraRespCount <= 1; |
interTopLeftVal <= replicate(NotInter 1); |
interLeftVal <= replicate(NotInter 1); |
interTopVal <= replicate(NotInter 1); |
end |
else |
donotfire.doNotFire(); |
end |
else if(xdata==I_PCM) |
begin |
$display( "ERROR Prediction: I_PCM not implemented yet"); |
$finish;//////////////////////////////////////////////////////////////////////////////////////// |
intra4x4typeLeft <= replicate(13); |
intra4x4typeTop <= replicate(13); |
interTopLeftVal <= replicate(NotInter 1); |
interLeftVal <= replicate(NotInter 1); |
interTopVal <= replicate(NotInter 1); |
end |
else |
begin |
if(!outstatefifo.notEmpty() || interCurrMbDiff<picWidth-1) |
begin |
outstatefifo.enq(Inter); |
case(xdata) |
P_L0_16x16: interstate <= InterP16x16; |
P_L0_L0_16x8: interstate <= InterP16x8; |
P_L0_L0_8x16: interstate <= InterP8x16; |
P_8x8: interstate <= InterP8x8; |
P_8x8ref0: interstate <= InterP8x8ref0; |
default: $display( "ERROR Prediction: passing SDMmbtype inter prediction unknown mbtype"); |
endcase |
interReqCount <= 1; |
interRespCount <= 1; |
intra4x4typeLeft <= replicate(14);///////////////////////////////////////////////////////////////////////////// |
intra4x4typeTop <= replicate(14); |
interTopLeftVal <= update(interTopLeftVal , 0, (NotInter 0)); |
interTopVal <= replicate(NotInter 0); |
interNewestMv <= 0; |
interRefIdxVector <= replicate(0); |
nextoutputfifo.enq(NonSkipMB); |
end |
else |
donotfire.doNotFire(); |
end |
interCurrMbDiff <= interCurrMbDiff+1; |
end |
else |
donotfire.doNotFire(); |
end |
tagged SDMMrem_intra4x4_pred_mode .xdata : |
begin |
infifo.deq(); |
////outfifo.enq(infifo.first()); |
rem_intra4x4_pred_mode.enq(xdata); |
end |
tagged SDMMintra_chroma_pred_mode .xdata : |
begin |
infifo.deq(); |
////outfifo.enq(infifo.first()); |
intra_chroma_pred_mode.enq(xdata); |
end |
tagged SDMMref_idx_l0 .xdata : |
begin |
infifo.deq(); |
////outfifo.enq(infifo.first()); |
interRefIdxVector <= update(interRefIdxVector,interPassingCount,xdata[3:0]); |
if(interstate==InterP16x16 || interPassingCount==1) |
interPassingCount <= 0; |
else |
interPassingCount <= interPassingCount+1; |
end |
tagged SDMMmvd_l0 .xdata : |
begin |
infifo.deq(); |
////outfifo.enq(infifo.first()); |
if(interPassingCount==1) |
begin |
Bit#(13) interMvDiffTemp2 = truncate(xdata); |
interMvDiff.enq(tuple2(interMvDiffTemp,interMvDiffTemp2)); |
interPassingCount <= 0; |
end |
else |
begin |
interMvDiffTemp <= truncate(xdata); |
interPassingCount <= interPassingCount+1; |
end |
end |
tagged SDMSsub_mb_type .xdata : |
begin |
infifo.deq(); |
////outfifo.enq(infifo.first()); |
interSubMbTypeVector <= update(interSubMbTypeVector,interPassingCount,xdata); |
interPassingCount <= interPassingCount+1; |
end |
tagged SDMSref_idx_l0 .xdata : |
begin |
infifo.deq(); |
////outfifo.enq(infifo.first()); |
interRefIdxVector <= update(interRefIdxVector,interPassingCount,xdata[3:0]); |
interPassingCount <= interPassingCount+1; |
end |
tagged SDMSmvd_l0 .xdata : |
begin |
infifo.deq(); |
////outfifo.enq(infifo.first()); |
if(interPassingCount==1) |
begin |
Bit#(13) interMvDiffTemp2 = truncate(xdata); |
interMvDiff.enq(tuple2(interMvDiffTemp,interMvDiffTemp2)); |
interPassingCount <= 0; |
end |
else |
begin |
interMvDiffTemp <= truncate(xdata); |
interPassingCount <= interPassingCount+1; |
end |
end |
default: passFlag <= True; |
endcase |
endrule |
|
|
rule outputing ( currMbHor<zeroExtend(picWidth) ); |
Bit#(1) outputFlag = 0; |
Vector#(4,Bit#(8)) outputVector = replicate(0); |
Bit#(2) blockHor = {outBlockNum[2],outBlockNum[0]}; |
Bit#(2) blockVer = {outBlockNum[3],outBlockNum[1]}; |
Bit#(2) pixelVer = {outPixelNum[3],outPixelNum[2]}; |
Bit#(4) totalVer = {blockVer,pixelVer}; |
//$display( "Trace Prediction: outputing" ); |
if(outFirstQPFlag) |
begin |
if(infifo_ITB.first() matches tagged IBTmb_qp .xdata) |
begin |
infifo_ITB.deq(); |
outfifo.enq(IBTmb_qp {qpy:xdata.qpy,qpc:xdata.qpc}); |
outFirstQPFlag <= False; |
$display( "Trace Prediction: outputing outFirstQP %h %h %h", outBlockNum, outPixelNum, xdata); |
end |
else |
$display( "ERROR Prediction: outputing unexpected infifo_ITB.first()"); |
end |
else if(nextoutputfifo.first() == SkipMB) |
begin |
if(interBSoutput && outChromaFlag==0 && outPixelNum==0) |
begin |
interBSoutput <= False; |
interBSfifo.deq(); |
Bit#(2) tempHorBS = tpl_1(interBSfifo.first()); |
Bit#(2) tempVerBS = tpl_2(interBSfifo.first()); |
Bit#(3) horBS = (tempHorBS==3 ? 4 : (interLeftNonZeroTransCoeff[blockVer] ? 2 : zeroExtend(tempHorBS))); |
Bit#(3) verBS = (tempVerBS==3 ? 4 : (interTopNonZeroTransCoeff[blockHor]&&blockVer!=0 ? 2 : zeroExtend(tempVerBS))); |
outfifo.enq(PBbS {bShor:horBS,bSver:verBS}); |
interLeftNonZeroTransCoeff <= update(interLeftNonZeroTransCoeff, blockVer, False); |
interTopNonZeroTransCoeff <= update(interTopNonZeroTransCoeff, blockHor, False); |
$display( "Trace Prediction: outputing SkipMB bS %h %h %h %h", outBlockNum, outPixelNum, currMbHor, currMbVer); |
end |
else |
begin |
interBSoutput <= True; |
outputVector = predictedfifo.first(); |
outfifo.enq(PBoutput outputVector); |
outputFlag = 1; |
predictedfifo.deq(); |
$display( "Trace Prediction: outputing SkipMB out %h %h %h", outBlockNum, outPixelNum, outputVector); |
end |
end |
else |
begin |
case ( infifo_ITB.first() ) matches |
tagged IBTmb_qp .xdata : |
begin |
infifo_ITB.deq(); |
outfifo.enq(IBTmb_qp {qpy:xdata.qpy,qpc:xdata.qpc}); |
outFirstQPFlag <= False; |
$display( "Trace Prediction: outputing ITBmb_qp %h %h %h", outBlockNum, outPixelNum, xdata); |
end |
tagged ITBresidual .xdata : |
begin |
if(interBSoutput && outChromaFlag==0 && outPixelNum==0) |
begin |
interBSoutput <= False; |
if(outstatefifo.first() != Inter) |
outfifo.enq(PBbS {bShor:(blockHor==0 ? 4 : 3),bSver:(blockVer==0 ? 4 : 3)}); |
else |
begin |
interBSfifo.deq(); |
Bit#(2) tempHorBS = tpl_1(interBSfifo.first()); |
Bit#(2) tempVerBS = tpl_2(interBSfifo.first()); |
Bit#(3) horBS = (tempHorBS==3 ? 4 : 2); |
Bit#(3) verBS = (tempVerBS==3 ? 4 : 2); |
outfifo.enq(PBbS {bShor:horBS,bSver:verBS}); |
end |
interLeftNonZeroTransCoeff <= update(interLeftNonZeroTransCoeff, blockVer, True); |
interTopNonZeroTransCoeff <= update(interTopNonZeroTransCoeff, blockHor, True); |
$display( "Trace Prediction: outputing ITBresidual bS %h %h %h %h %h", outChromaFlag, outBlockNum, outPixelNum, currMbHor, currMbVer); |
end |
else |
begin |
interBSoutput <= True; |
Bit#(11) tempOutputValue = 0; |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
tempOutputValue = signExtend(xdata[ii]) + zeroExtend((predictedfifo.first())[ii]); |
if(tempOutputValue[10]==1) |
outputVector[ii] = 0; |
else if(tempOutputValue[9:0] > 255) |
outputVector[ii] = 255; |
else |
outputVector[ii] = tempOutputValue[7:0]; |
end |
outfifo.enq(PBoutput outputVector); |
infifo_ITB.deq(); |
predictedfifo.deq(); |
outputFlag = 1; |
$display( "Trace Prediction: outputing ITBresidual out %h %h %h %h %h %h", outChromaFlag, outBlockNum, outPixelNum, predictedfifo.first(), xdata, outputVector); |
end |
end |
tagged ITBcoeffLevelZeros : |
begin |
if(interBSoutput && outChromaFlag==0 && outPixelNum==0) |
begin |
interBSoutput <= False; |
if(outstatefifo.first() != Inter) |
outfifo.enq(PBbS {bShor:(blockHor==0 ? 4 : 3),bSver:(blockVer==0 ? 4 : 3)}); |
else |
begin |
interBSfifo.deq(); |
Bit#(2) tempHorBS = tpl_1(interBSfifo.first()); |
Bit#(2) tempVerBS = tpl_2(interBSfifo.first()); |
Bit#(3) horBS = (tempHorBS==3 ? 4 : (interLeftNonZeroTransCoeff[blockVer] ? 2 : zeroExtend(tempHorBS))); |
Bit#(3) verBS = (tempVerBS==3 ? 4 : (interTopNonZeroTransCoeff[blockHor]&&blockVer!=0 ? 2 : zeroExtend(tempVerBS))); |
outfifo.enq(PBbS {bShor:horBS,bSver:verBS}); |
end |
interLeftNonZeroTransCoeff <= update(interLeftNonZeroTransCoeff, blockVer, False); |
interTopNonZeroTransCoeff <= update(interTopNonZeroTransCoeff, blockHor, False); |
$display( "Trace Prediction: outputing ITBcoeffLevelZeros bS %h %h %h %h %h", outChromaFlag, outBlockNum, outPixelNum, currMbHor, currMbVer); |
end |
else |
begin |
interBSoutput <= True; |
if(outPixelNum == 12) |
infifo_ITB.deq(); |
outputVector = predictedfifo.first(); |
outfifo.enq(PBoutput outputVector); |
outputFlag = 1; |
predictedfifo.deq(); |
$display( "Trace Prediction: outputing ITBcoeffLevelZeros out %h %h %h %h %h", outChromaFlag, outBlockNum, outPixelNum, predictedfifo.first(), outputVector); |
end |
end |
default: $display( "ERROR Prediction: outputing unknown infifo_ITB input" ); |
endcase |
end |
|
if(outputFlag == 1) |
begin |
$display("ccl4PBoutput %0d", outputVector[0]); |
$display("ccl4PBoutput %0d", outputVector[1]); |
$display("ccl4PBoutput %0d", outputVector[2]); |
$display("ccl4PBoutput %0d", outputVector[3]); |
|
if(outBlockNum==0 && pixelVer==0 && outChromaFlag==0 && currMb!=firstMb && picWidth>1) |
begin |
intraMemReqQ.enq(intraMemReqQdelay); |
interMemReqQ.enq(interMemReqQdelay); |
//$display( "TRACE Prediction: passing storing addr data");////////////////// |
end |
|
if(blockHor==3 || (blockHor[0]==1 && outChromaFlag==1) || (outstatefifo.first()==Intra4x4 && outChromaFlag==0)) |
begin |
if(outChromaFlag==0) |
begin |
Bit#(32) intraLeftValNextTemp = intraLeftValNext; |
if(totalVer==0 || (outstatefifo.first()==Intra4x4 && pixelVer==0)) |
begin |
Bit#(32) tempValSet = select(intraTopVal,zeroExtend(blockHor)); |
intraLeftValNextTemp = zeroExtend(tempValSet[31:24]); |
end |
case(pixelVer) |
0:intraLeftValNext <= {intraLeftValNextTemp[31:16],outputVector[3],intraLeftValNextTemp[7:0]}; |
1:intraLeftValNext <= {intraLeftValNextTemp[31:24],outputVector[3],intraLeftValNextTemp[15:0]}; |
2:intraLeftValNext <= {outputVector[3],intraLeftValNextTemp[23:0]}; |
3: |
begin |
intraLeftVal <= update(intraLeftVal,blockVer,{outputVector[3],intraLeftValNextTemp}); |
intraLeftValNext <= zeroExtend(outputVector[3]); |
if(outstatefifo.first()==Intra4x4) |
intra4x4typeLeft <= update(intra4x4typeLeft,blockVer,cur_intra4x4_pred_mode); |
else if(outstatefifo.first()==Intra) |
intra4x4typeLeft <= update(intra4x4typeLeft,blockVer,13); |
else |
intra4x4typeLeft <= update(intra4x4typeLeft,blockVer,14); |
end |
endcase |
end |
else |
begin |
if(outBlockNum[2]==0) |
intraLeftValChroma0 <= update(intraLeftValChroma0,totalVer+1,outputVector[3]); |
else |
intraLeftValChroma1 <= update(intraLeftValChroma1,totalVer+1,outputVector[3]); |
end |
end |
|
if(pixelVer==3 && (blockVer==3 || (blockVer[0]==1 && outChromaFlag==1) || (outstatefifo.first()==Intra4x4 && outChromaFlag==0))) |
begin |
if(outChromaFlag==0) |
begin |
intraTopVal <= update(intraTopVal,zeroExtend(blockHor),{outputVector[3],outputVector[2],outputVector[1],outputVector[0]}); |
if(outstatefifo.first()==Intra4x4) |
intra4x4typeTop <= update(intra4x4typeTop,blockHor,cur_intra4x4_pred_mode); |
else if(outstatefifo.first()==Intra) |
intra4x4typeTop <= update(intra4x4typeTop,blockHor,13); |
else |
intra4x4typeTop <= update(intra4x4typeTop,blockHor,14); |
end |
else |
begin |
if(outBlockNum[2]==0) |
begin |
Vector#(4,Bit#(16)) intraTopValChroma0Next = intraTopValChroma0; |
intraTopValChroma0Next[{blockHor[0],1'b0}] = {outputVector[1],outputVector[0]}; |
intraTopValChroma0Next[{blockHor[0],1'b1}] = {outputVector[3],outputVector[2]}; |
intraTopValChroma0 <= intraTopValChroma0Next; |
end |
else |
begin |
Vector#(4,Bit#(16)) intraTopValChroma1Next = intraTopValChroma1; |
intraTopValChroma1Next[{blockHor[0],1'b0}] = {outputVector[1],outputVector[0]}; |
intraTopValChroma1Next[{blockHor[0],1'b1}] = {outputVector[3],outputVector[2]}; |
intraTopValChroma1 <= intraTopValChroma1Next; |
end |
end |
end |
|
if(outChromaFlag==1 && outBlockNum==7) |
begin |
Bit#(PicWidthSz) tempStoreAddr = truncate(currMbHor); |
InterBlockMv outBlockMv = interOutBlockMvfifo.first(); |
if(outBlockMv matches tagged BlockMv .bdata) |
begin |
outBlockMv = (BlockMv {refIdx:bdata.refIdx,mvhor:bdata.mvhor,mvver:bdata.mvver,nonZeroTransCoeff:(interTopNonZeroTransCoeff[pixelVer]?1:0)}); |
interOutBlockMvfifo.deq(); |
end |
else if(pixelVer==3) |
interOutBlockMvfifo.deq(); |
if(pixelVer==3 && picWidth>1) |
interMemReqQdelay <= StoreReq {addr:{tempStoreAddr,pixelVer},data:pack(outBlockMv)}; |
else |
interMemReqQ.enq(StoreReq {addr:{tempStoreAddr,pixelVer},data:pack(outBlockMv)}); |
if(pixelVer>0) |
begin |
Bit#(4) intra4x4typeTopStore = ((outstatefifo.first()==Inter) ? 14 : ((outstatefifo.first()!=Intra4x4) ? 13: intra4x4typeTop[(pixelVer-1)])); |
Bit#(32) intraTopValStore = intraTopVal[(pixelVer-1)]; |
Bit#(16) intraTopValChroma0Store = intraTopValChroma0[(pixelVer-1)]; |
Bit#(16) intraTopValChroma1Store = (pixelVer<3 ? intraTopValChroma1[(pixelVer-1)] : {outputVector[1],outputVector[0]}); |
Bit#(68) intraStore = {intra4x4typeTopStore,intraTopValChroma1Store,intraTopValChroma0Store,intraTopValStore}; |
intraMemReqQ.enq(StoreReq {addr:{tempStoreAddr,(pixelVer-1)},data:intraStore}); |
if(pixelVer==3) |
begin |
intra4x4typeTopStore = ((outstatefifo.first()==Inter) ? 14 : ((outstatefifo.first()!=Intra4x4) ? 13: intra4x4typeTop[3])); |
intraTopValStore = intraTopVal[3]; |
intraTopValChroma0Store = intraTopValChroma0[3]; |
intraTopValChroma1Store = {outputVector[3],outputVector[2]}; |
intraStore = {intra4x4typeTopStore,intraTopValChroma1Store,intraTopValChroma0Store,intraTopValStore}; |
intraMemReqQdelay <= StoreReq {addr:{tempStoreAddr,2'b11},data:intraStore}; |
end |
end |
end |
outPixelNum <= outPixelNum+4; |
if(outPixelNum == 12) |
begin |
if(outChromaFlag==0) |
begin |
outBlockNum <= outBlockNum+1; |
if(outBlockNum == 15) |
outChromaFlag <= 1; |
if(nextoutputfifo.first() == Intra4x4) |
nextoutputfifo.deq(); |
end |
else |
begin |
if(outBlockNum == 7) |
begin |
outBlockNum <= 0; |
outChromaFlag <= 0; |
currMb <= currMb+1; |
currMbHor <= currMbHor+1; |
interCurrMbDiff <= interCurrMbDiff-1; |
outstatefifo.deq; |
intrastate <= Start; |
if(truncate(currMbHor)==picWidth-1 && currMbVer==picHeight-1) |
interpolator.endOfFrame(); |
nextoutputfifo.deq(); |
end |
else |
outBlockNum <= outBlockNum+1; |
end |
end |
end |
endrule |
|
|
rule currMbHorUpdate( !(currMbHor<zeroExtend(picWidth)) ); |
Bit#(PicAreaSz) temp = zeroExtend(picWidth); |
if((currMbHor >> 3) >= temp) |
begin |
currMbHor <= currMbHor - (temp << 3); |
currMbVer <= currMbVer + 8; |
end |
else |
begin |
currMbHor <= currMbHor - temp; |
currMbVer <= currMbVer + 1; |
end |
//$display( "Trace Prediction: currMbHorUpdate %h %h", currMbHor, currMbVer); |
endrule |
|
|
// inter prediction rules |
|
rule interSendReq ( interReqCount>0 && currMbHor<zeroExtend(picWidth) ); |
Bit#(PicAreaSz) currMbHorTemp = currMbHor+zeroExtend(interCurrMbDiff)-1; |
Bit#(PicAreaSz) currMbTemp = currMb+zeroExtend(interCurrMbDiff)-1; |
if( currMbHorTemp >= zeroExtend(picWidth) ) |
currMbHorTemp = currMbHorTemp-zeroExtend(picWidth); |
Bit#(PicWidthSz) temp2 = truncate(currMbHorTemp); |
Bit#(TAdd#(PicWidthSz,2)) temp = 0; |
Bool noMoreReq = False; |
if( currMbTemp < zeroExtend(picWidth) ) |
noMoreReq = True; |
else |
begin |
if(interReqCount<5) |
begin |
Bit#(2) temp3 = truncate(interReqCount-1); |
temp = {temp2,temp3}; |
end |
else if(interReqCount==5) |
begin |
if((currMbHorTemp+1)<zeroExtend(picWidth)) |
temp = {(temp2+1),2'b00}; |
else if(currMbHorTemp>0 && currMbTemp-firstMb>zeroExtend(picWidth)) |
temp = {(temp2-1),2'b11}; |
else |
noMoreReq = True; |
end |
else if(interReqCount==6) |
begin |
if((currMbHorTemp+1)<zeroExtend(picWidth) && currMbHorTemp>0 && currMbTemp-firstMb>zeroExtend(picWidth)) |
temp = {(temp2-1),2'b11}; |
else |
noMoreReq = True; |
end |
else |
noMoreReq = True; |
end |
if(!noMoreReq) |
begin |
interMemReqQ.enq(LoadReq temp); |
interReqCount <= interReqCount+1; |
//$display( "TRACE Prediction: interSendReq addr %0d",temp);/////////////////////// |
end |
else |
interReqCount <= 0; |
$display( "Trace Prediction: interSendReq %h %h %h", interstate, interReqCount, temp); |
endrule |
|
|
rule interReceiveNoResp ( interRespCount>0 && currMbHor<zeroExtend(picWidth) && currMb+zeroExtend(interCurrMbDiff)-1<zeroExtend(picWidth) ); |
Bit#(PicAreaSz) currMbHorTemp = currMbHor+zeroExtend(interCurrMbDiff)-1; |
if( currMbHorTemp >= zeroExtend(picWidth) ) |
currMbHorTemp = currMbHorTemp-zeroExtend(picWidth); |
interRespCount <= 0; |
interStepCount <= 1; |
interIPStepCount <= 1; |
if(currMbHorTemp == 0) |
begin |
interLeftVal <= replicate(NotInter 0); |
interTopLeftVal <= replicate(NotInter 0); |
end |
$display( "Trace Prediction: interReceiveNoResp %h %h", interstate, interRespCount); |
endrule |
|
|
rule interReceiveResp ( interRespCount>0 && interRespCount<7 && currMbHor<zeroExtend(picWidth) &&& interMemRespQ.first() matches tagged LoadResp .data); |
Bit#(PicAreaSz) currMbHorTemp = currMbHor+zeroExtend(interCurrMbDiff)-1; |
Bit#(PicAreaSz) currMbTemp = currMb+zeroExtend(interCurrMbDiff)-1; |
if( currMbHorTemp >= zeroExtend(picWidth) ) |
currMbHorTemp = currMbHorTemp-zeroExtend(picWidth); |
Bool noMoreResp = False; |
Bit#(2) temp2bit = 0; |
InterBlockMv unpackedData = unpack(data); |
Vector#(5,InterBlockMv) interTopValNext = interTopVal; |
Vector#(4,InterBlockMv) interTopLeftValNext = interTopLeftVal; |
if(interRespCount<5) |
begin |
temp2bit = truncate(interRespCount-1); |
interTopValNext[temp2bit] = unpackedData; |
if((interRespCount==4 || (interRespCount==1 && (interstate==InterPskip || interstate==InterP16x16 || interstate==InterP16x8))) |
&& (!((currMbHorTemp+1)<zeroExtend(picWidth)) && !(currMbHorTemp>0 && currMbTemp-firstMb>zeroExtend(picWidth)))) |
noMoreResp = True; |
end |
else if(interRespCount==5) |
begin |
if((currMbHorTemp+1)<zeroExtend(picWidth)) |
begin |
interTopValNext[4] = unpackedData; |
if(!(currMbHorTemp>0 && currMbTemp-firstMb>zeroExtend(picWidth))) |
noMoreResp = True; |
end |
else |
begin |
interTopLeftValNext[0] = unpackedData; |
noMoreResp = True; |
end |
end |
else |
begin |
interTopLeftValNext[0] = unpackedData; |
noMoreResp = True; |
end |
interMemRespQ.deq(); |
//$display( "TRACE Prediction: interReceiveResp data %h",data);/////////////////////// |
if(!noMoreResp) |
interRespCount <= interRespCount+1; |
else |
begin |
interRespCount <= 0; |
interStepCount <= 1; |
interIPStepCount <= 1; |
if(currMbHorTemp == 0) |
begin |
interLeftVal <= replicate(NotInter 0); |
interTopLeftValNext = replicate(NotInter 0); |
end |
end |
interTopVal <= interTopValNext; |
interTopLeftVal <= interTopLeftValNext; |
$display( "Trace Prediction: interReceiveResp %h %h %h", interstate, interRespCount, data); |
endrule |
|
|
rule interProcessStep ( interStepCount>0 && currMbHor<zeroExtend(picWidth) ); |
Bit#(PicAreaSz) currMbTemp = currMb+zeroExtend(interCurrMbDiff)-1; |
Bit#(2) blockHor = {interMbPartNum[0],interSubMbPartNum[0]}; |
Bit#(2) blockVer = {interMbPartNum[1],interSubMbPartNum[1]}; |
Bit#(3) partWidth = 0; |
Bit#(3) partHeight = 0; |
Bit#(3) numPart = 1; |
Bit#(3) numSubPart = 1; |
Bit#(2) subMbType = 0; |
Bool noBlockC = False; |
Bool calcmv = False; |
Bool leftmv = False; |
if(interstate==InterPskip || interstate==InterP16x16) |
begin |
partWidth = 4; |
partHeight = 4; |
numPart = 1; |
calcmv = (interMbPartNum==0 && interSubMbPartNum==0); |
leftmv = (blockHor>0); |
end |
else if(interstate==InterP16x8) |
begin |
partWidth = 4; |
partHeight = 2; |
numPart = 2; |
if(interMbPartNum==2) |
noBlockC = True; |
calcmv = (interMbPartNum[0]==0 && interSubMbPartNum==0); |
leftmv = (blockHor>0); |
end |
else if(interstate==InterP8x16) |
begin |
partWidth = 2; |
partHeight = 4; |
numPart = 2; |
calcmv = (interMbPartNum[1]==0 && interSubMbPartNum==0); |
leftmv = !(blockVer>0); |
end |
else if(interstate==InterP8x8 || interstate==InterP8x8ref0) |
begin |
numPart = 4; |
subMbType = interSubMbTypeVector[interMbPartNum]; |
numSubPart = numSubMbPart(subMbType); |
case(subMbType) |
0: |
begin |
partWidth = 2; |
partHeight = 2; |
if(interMbPartNum==3) |
noBlockC = True; |
calcmv = (interSubMbPartNum==0); |
leftmv = (blockHor[0]>0); |
end |
1: |
begin |
partWidth = 2; |
partHeight = 1; |
if(interSubMbPartNum==2) |
noBlockC = True; |
calcmv = (interSubMbPartNum[0]==0); |
leftmv = True; |
end |
2: |
begin |
partWidth = 1; |
partHeight = 2; |
calcmv = (interSubMbPartNum[1]==0); |
leftmv = False; |
end |
3: |
begin |
partWidth = 1; |
partHeight = 1; |
if(interSubMbPartNum==3) |
noBlockC = True; |
calcmv = True; |
end |
endcase |
end |
else |
$display( "ERROR Prediction: interProcessStep unexpected interstate"); |
Bit#(4) refIndex = ((interstate==InterPskip||interstate==InterP8x8ref0) ? 0 : interRefIdxVector[interMbPartNum]); |
Vector#(3,InterBlockMv) blockABC = replicate(NotInter 0); |
if( currMbTemp-firstMb==0 && blockHor==0 ) |
blockABC[0] = (NotInter 0); |
else |
blockABC[0] = interLeftVal[blockVer]; |
if( currMbTemp-firstMb<zeroExtend(picWidth) && blockVer==0 ) |
blockABC[1] = (NotInter 0); |
else |
blockABC[1] = interTopVal[blockHor]; |
blockABC[2] = interTopVal[{1'b0,blockHor}+partWidth]; |
if(noBlockC || blockABC[2]==(NotInter 0)) |
blockABC[2] = interTopLeftVal[blockVer]; |
Bit#(14) mvhorfinal = 0; |
Bit#(12) mvverfinal = 0; |
Bit#(5) interNewestMvNext = 0; |
if(calcmv)//motion vector caculation |
begin |
Vector#(3,Int#(14)) mvhorABC = replicate(0); |
Vector#(3,Int#(12)) mvverABC = replicate(0); |
Bit#(2) validCount = 0; |
Bit#(14) mvhorPred = 0; |
Bit#(12) mvverPred = 0; |
for(Integer ii=0; ii<3; ii=ii+1) |
begin |
if(blockABC[ii] matches tagged BlockMv .xdata) |
begin |
mvhorABC[ii] = unpack(xdata.mvhor); |
mvverABC[ii] = unpack(xdata.mvver); |
if(xdata.refIdx == refIndex) |
begin |
validCount = validCount+1; |
mvhorPred = xdata.mvhor; |
mvverPred = xdata.mvver; |
end |
end |
else |
begin |
mvhorABC[ii] = 0; |
mvverABC[ii] = 0; |
end |
end |
if(validCount != 1)//median |
begin |
if(mvhorABC[0]>mvhorABC[1] && mvhorABC[0]>mvhorABC[2]) |
mvhorPred = pack((mvhorABC[1]>mvhorABC[2]) ? mvhorABC[1] : mvhorABC[2]); |
else if(mvhorABC[0]<mvhorABC[1] && mvhorABC[0]<mvhorABC[2]) |
mvhorPred = pack((mvhorABC[1]<mvhorABC[2]) ? mvhorABC[1] : mvhorABC[2]); |
else |
mvhorPred = pack(mvhorABC[0]); |
if(mvverABC[0]>mvverABC[1] && mvverABC[0]>mvverABC[2]) |
mvverPred = pack((mvverABC[1]>mvverABC[2]) ? mvverABC[1] : mvverABC[2]); |
else if(mvverABC[0]<mvverABC[1] && mvverABC[0]<mvverABC[2]) |
mvverPred = pack((mvverABC[1]<mvverABC[2]) ? mvverABC[1] : mvverABC[2]); |
else |
mvverPred = pack(mvverABC[0]); |
end |
if(interstate==InterPskip) |
begin |
for(Integer ii=0; ii<2; ii=ii+1) |
begin |
if(blockABC[ii] matches tagged BlockMv .xdata) |
begin |
if(xdata.refIdx==0 && xdata.mvhor==0 && xdata.mvver==0) |
begin |
mvhorPred = 0; |
mvverPred = 0; |
end |
end |
else if(blockABC[ii] matches tagged NotInter 0) |
begin |
mvhorPred = 0; |
mvverPred = 0; |
end |
end |
end |
else if(interstate==InterP16x8 || interstate==InterP8x16) |
begin |
InterBlockMv blockCheck; |
if(interstate==InterP16x8) |
begin |
if(interMbPartNum==0) |
blockCheck = blockABC[1]; |
else |
blockCheck = blockABC[0]; |
end |
else |
begin |
if(interMbPartNum==0) |
blockCheck = blockABC[0]; |
else |
blockCheck = blockABC[2]; |
end |
if(blockCheck matches tagged BlockMv .xdata &&& xdata.refIdx==refIndex) |
begin |
mvhorPred = xdata.mvhor; |
mvverPred = xdata.mvver; |
end |
end |
mvhorfinal = mvhorPred; |
mvverfinal = mvverPred; |
if(interstate!=InterPskip) |
begin |
mvhorfinal = truncate(tpl_1(interMvDiff.first()) + signExtend(mvhorPred)); |
mvverfinal = truncate(tpl_2(interMvDiff.first()) + signExtend(mvverPred)); |
interMvDiff.deq(); |
end |
interMvFile.upd({interMbPartNum,interSubMbPartNum},tuple2(mvhorfinal,mvverfinal)); |
interNewestMvNext = zeroExtend({interMbPartNum,interSubMbPartNum})+1; |
$display( "Trace Prediction: interProcessStep %h %h %h %h %h %h %h %h %h", interstate, interStepCount, interMbPartNum, interSubMbPartNum, pack(blockABC[0]), pack(blockABC[1]), pack(blockABC[2]), mvhorPred, mvverPred); |
end |
else |
begin |
if(leftmv) |
begin |
if(blockABC[0] matches tagged BlockMv .xdata) |
begin |
mvhorfinal = unpack(xdata.mvhor); |
mvverfinal = unpack(xdata.mvver); |
end |
else |
$display( "ERROR Prediction: interProcessStep unexpected blockABC[0]"); |
end |
else |
begin |
if(blockABC[1] matches tagged BlockMv .xdata) |
begin |
mvhorfinal = unpack(xdata.mvhor); |
mvverfinal = unpack(xdata.mvver); |
end |
else |
$display( "ERROR Prediction: interProcessStep unexpected blockABC[1]"); |
end |
end |
Bit#(2) tempBShor = 0;//bS calculation |
Bit#(2) tempBSver = 0; |
if(interLeftVal[blockVer] matches tagged BlockMv .xdata) |
begin |
if(xdata.nonZeroTransCoeff == 1) |
tempBShor = 2; |
else |
begin |
if(xdata.refIdx!=refIndex || absDiffGEFour14(mvhorfinal,xdata.mvhor) || absDiffGEFour12(mvverfinal,xdata.mvver)) |
tempBShor = 1; |
else |
tempBShor = 0; |
end |
end |
else |
tempBShor = 3; |
if(interTopVal[blockHor] matches tagged BlockMv .xdata) |
begin |
if(xdata.nonZeroTransCoeff == 1) |
tempBSver = 2; |
else |
begin |
if(xdata.refIdx!=refIndex || absDiffGEFour14(mvhorfinal,xdata.mvhor) || absDiffGEFour12(mvverfinal,xdata.mvver)) |
tempBSver = 1; |
else |
tempBSver = 0; |
end |
end |
else |
tempBSver = 3; |
interBSfifo.enq(tuple2(tempBShor,tempBSver)); |
Vector#(5,InterBlockMv) interTopValNext = interTopVal;//update inter*Val |
Vector#(4,InterBlockMv) interLeftValNext = interLeftVal; |
Vector#(4,InterBlockMv) interTopLeftValNext = interTopLeftVal; |
interLeftValNext[blockVer] = (BlockMv {refIdx:refIndex,mvhor:mvhorfinal,mvver:mvverfinal,nonZeroTransCoeff:0}); |
interTopValNext[blockHor] = (BlockMv {refIdx:refIndex,mvhor:mvhorfinal,mvver:mvverfinal,nonZeroTransCoeff:0}); |
interTopLeftValNext[blockVer] = interTopVal[blockHor]; |
interTopVal <= interTopValNext; |
interLeftVal <= interLeftValNext; |
interTopLeftVal <= interTopLeftValNext; |
if(blockVer == 3) |
interOutBlockMvfifo.enq(BlockMv {refIdx:refIndex,mvhor:mvhorfinal,mvver:mvverfinal,nonZeroTransCoeff:0}); |
if(interSubMbPartNum == 3)//next step |
begin |
interSubMbPartNum <= 0; |
if(interMbPartNum == 3) |
begin |
interMbPartNum <= 0; |
interStepCount <= 0; |
interNewestMvNext = 16; |
end |
else |
interMbPartNum <= interMbPartNum+1; |
end |
else |
interSubMbPartNum <= interSubMbPartNum+1; |
if(interNewestMvNext > 0) |
interNewestMv <= interNewestMvNext; |
endrule |
|
|
rule interIPProcessStep ( interIPStepCount>0 && currMbHor<zeroExtend(picWidth) && interNewestMv>zeroExtend({interIPMbPartNum,interIPSubMbPartNum}) ); |
Bit#(PicAreaSz) currMbHorTemp = currMbHor+zeroExtend(interCurrMbDiff)-1; |
Bit#(PicHeightSz) currMbVerTemp = currMbVer; |
if( currMbHorTemp >= zeroExtend(picWidth) ) |
begin |
currMbHorTemp = currMbHorTemp-zeroExtend(picWidth); |
currMbVerTemp = currMbVerTemp+1; |
end |
Bit#(2) blockHor = {interIPMbPartNum[0],interIPSubMbPartNum[0]}; |
Bit#(2) blockVer = {interIPMbPartNum[1],interIPSubMbPartNum[1]}; |
Bit#(3) numPart = 1; |
Bit#(3) numSubPart = 1; |
Bit#(2) subMbType = 0; |
if(interstate==InterPskip || interstate==InterP16x16) |
numPart = 1; |
else if(interstate==InterP16x8) |
numPart = 2; |
else if(interstate==InterP8x16) |
numPart = 2; |
else if(interstate==InterP8x8 || interstate==InterP8x8ref0) |
begin |
numPart = 4; |
subMbType = interSubMbTypeVector[interIPMbPartNum]; |
numSubPart = numSubMbPart(subMbType); |
end |
else |
$display( "ERROR Prediction: interIPProcessStep unexpected interstate"); |
Bit#(4) refIndex = ((interstate==InterPskip||interstate==InterP8x8ref0) ? 0 : interRefIdxVector[interIPMbPartNum]); |
Bit#(PicWidthSz) currMbHorT = truncate(currMbHorTemp); |
Bit#(TAdd#(PicWidthSz,2)) horTemp = {currMbHorT,blockHor}; |
Bit#(TAdd#(PicHeightSz,4)) verTemp = {currMbVerTemp,blockVer,2'b00}; |
IPBlockType btTemp = IP16x16; |
if(interstate==InterPskip || interstate==InterP16x16) |
btTemp = IP16x16; |
else if(interstate==InterP16x8) |
btTemp = IP16x8; |
else if(interstate==InterP8x16) |
btTemp = IP8x16; |
else |
begin |
case(subMbType) |
0: btTemp = IP8x8; |
1: btTemp = IP8x4; |
2: btTemp = IP4x8; |
3: btTemp = IP4x4; |
endcase |
end |
Bit#(14) mvhorTemp = tpl_1(interMvFile.sub({interIPMbPartNum,interIPSubMbPartNum})); |
Bit#(12) mvverTemp = tpl_2(interMvFile.sub({interIPMbPartNum,interIPSubMbPartNum})); |
if(interIPStepCount == 1) |
begin |
if(!(interstate==InterP8x8 || interstate==InterP8x8ref0)) |
begin |
numPart = 4; |
Bit#(2) interIPMbPartNumTemp = interIPMbPartNum; |
if(btTemp==IP16x16) |
interIPMbPartNumTemp = 0; |
else if(btTemp==IP16x8 && interIPMbPartNumTemp[0]==1) |
interIPMbPartNumTemp = interIPMbPartNumTemp-1; |
else if(btTemp==IP8x16 && interIPMbPartNumTemp[1]==1) |
interIPMbPartNumTemp = interIPMbPartNumTemp-2; |
refIndex = ((interstate==InterPskip||interstate==InterP8x8ref0) ? 0 : interRefIdxVector[interIPMbPartNumTemp]); |
btTemp = IP8x8; |
mvhorTemp = tpl_1(interMvFile.sub({interIPMbPartNumTemp,2'b00})); |
mvverTemp = tpl_2(interMvFile.sub({interIPMbPartNumTemp,2'b00})); |
interpolator.request(IPLuma {refIdx:refIndex,hor:horTemp,ver:verTemp,mvhor:mvhorTemp,mvver:mvverTemp,bt:btTemp}); |
end |
else |
interpolator.request(IPLuma {refIdx:refIndex,hor:horTemp,ver:verTemp,mvhor:mvhorTemp,mvver:mvverTemp,bt:btTemp}); |
end |
else |
interpolator.request(IPChroma {refIdx:refIndex,uv:interIPStepCount[0],hor:horTemp,ver:truncate(verTemp>>1),mvhor:mvhorTemp,mvver:mvverTemp,bt:btTemp}); |
if(interIPSubMbPartNum >= truncate(numSubPart-1)) |
begin |
interIPSubMbPartNum <= 0; |
if(interIPMbPartNum >= truncate(numPart-1)) |
begin |
interIPMbPartNum <= 0; |
interIPStepCount <= interIPStepCount+1; |
end |
else |
begin |
if(btTemp == IP16x8) |
interIPMbPartNum <= 2; |
else |
interIPMbPartNum <= interIPMbPartNum+1; |
end |
end |
else |
begin |
if(subMbType == 1) |
interIPSubMbPartNum <= 2; |
else |
interIPSubMbPartNum <= interIPSubMbPartNum+1; |
end |
$display( "Trace Prediction: interIPProcessStep %h %h %h %h %h %h %h %h %h %h", interstate, interIPStepCount, interIPMbPartNum, interIPSubMbPartNum, refIndex, horTemp, verTemp, mvhorTemp, mvverTemp, pack(btTemp)); |
endrule |
|
|
rule interDone ( interstate!=Start && interReqCount==0 && interRespCount==0 && interStepCount==0 && interIPStepCount==0 ); |
interstate <= Start; |
//$display( "Trace Prediction: interOutputTransfer %h %h", interstate, interOutputCount); |
endrule |
|
|
rule interOutputTransfer ( True ); |
predictedfifo.enq(interpolator.first()); |
interpolator.deq(); |
//$display( "Trace Prediction: interOutputTransfer %h %h", interstate, interOutputCount); |
endrule |
|
|
|
// intra prediction rules |
|
rule intraSendReq ( intraReqCount>0 && currMbHor<zeroExtend(picWidth) && !nextoutputfifo.notEmpty() ); |
Bit#(PicWidthSz) temp2 = truncate(currMbHor); |
Bit#(TAdd#(PicWidthSz,2)) temp = 0; |
Bit#(1) noMoreReq = 0; |
if( currMb-firstMb < zeroExtend(picWidth) ) |
noMoreReq = 1; |
else |
begin |
if(intraReqCount<5) |
begin |
Bit#(2) temp3 = truncate(intraReqCount-1); |
temp = {temp2,temp3}; |
end |
else if(intraReqCount==5) |
begin |
if((currMbHor+1)<zeroExtend(picWidth) && intrastate==Intra4x4) |
temp = {(temp2+1),2'b00}; |
else if(currMbHor>0 && currMb-firstMb>zeroExtend(picWidth)) |
temp = {(temp2-1),2'b11}; |
else |
noMoreReq = 1; |
end |
else if(intraReqCount==6) |
begin |
if((currMbHor+1)<zeroExtend(picWidth) && intrastate==Intra4x4 && currMbHor>0 && currMb-firstMb>zeroExtend(picWidth)) |
temp = {(temp2-1),2'b11}; |
else |
noMoreReq = 1; |
end |
else |
noMoreReq = 1; |
end |
if(noMoreReq == 0) |
begin |
intraMemReqQ.enq(LoadReq temp); |
intraReqCount <= intraReqCount+1; |
//$display( "TRACE Prediction: intraSendReq addr %0d",temp);/////////////////////// |
end |
else |
intraReqCount <= 0; |
$display( "Trace Prediction: intraSendReq"); |
endrule |
|
|
rule intraReceiveNoResp ( intraRespCount>0 && currMbHor<zeroExtend(picWidth) && currMb-firstMb<zeroExtend(picWidth) ); |
intra4x4typeTop <= replicate(15); |
intraRespCount <= 0; |
intraStepCount <= 1; |
blockNum <= 0; |
pixelNum <= 0; |
interOutBlockMvfifo.enq(NotInter 1); |
$display( "Trace Prediction: intraReceiveNoResp"); |
endrule |
|
|
rule intraReceiveResp ( intraRespCount>0 && intraRespCount<7 && currMbHor<zeroExtend(picWidth) &&& intraMemRespQ.first() matches tagged LoadResp .data); |
Bit#(1) noMoreResp = 0; |
Bit#(2) temp2bit = 0; |
if(intraRespCount<5) |
begin |
temp2bit = truncate(intraRespCount-1); |
intra4x4typeTop <= update(intra4x4typeTop, temp2bit, data[67:64]); |
if(intraRespCount==4) |
begin |
Vector#(5,Bit#(32)) intraTopValTemp = intraTopVal; |
intraTopValTemp[3] = data[31:0]; |
intraTopValTemp[4] = {data[31:24],data[31:24],data[31:24],data[31:24]}; |
intraTopVal <= intraTopValTemp; |
if(!((currMbHor+1)<zeroExtend(picWidth) && intrastate==Intra4x4) && !(currMbHor>0 && currMb-firstMb>zeroExtend(picWidth))) |
noMoreResp = 1; |
end |
else |
intraTopVal <= update(intraTopVal, intraRespCount-1, data[31:0]); |
intraTopValChroma0 <= update(intraTopValChroma0, temp2bit, data[47:32]); |
intraTopValChroma1 <= update(intraTopValChroma1, temp2bit, data[63:48]); |
end |
else if(intraRespCount==5) |
begin |
if((currMbHor+1)<zeroExtend(picWidth) && intrastate==Intra4x4) |
begin |
if(!(data[67:64]==15 || (data[67:64]==14 && ppsconstrained_intra_pred_flag==1))) |
intraTopVal <= update(intraTopVal, 4, data[31:0]); |
if(!(currMbHor>0 && currMb-firstMb>zeroExtend(picWidth))) |
noMoreResp = 1; |
end |
else |
begin |
Bit#(40) temp2 = intraLeftVal[0]; |
intraLeftVal <= update(intraLeftVal, 0, {temp2[39:8],data[31:24]}); |
intraLeftValChroma0 <= update(intraLeftValChroma0, 0, data[47:40]); |
intraLeftValChroma1 <= update(intraLeftValChroma1, 0, data[63:56]); |
noMoreResp = 1; |
end |
end |
else |
begin |
Bit#(40) temp2 = intraLeftVal[0]; |
intraLeftVal <= update(intraLeftVal, 0, {temp2[39:8],data[31:24]}); |
intraLeftValChroma0 <= update(intraLeftValChroma0, 0, data[47:40]); |
intraLeftValChroma1 <= update(intraLeftValChroma1, 0, data[63:56]); |
noMoreResp = 1; |
end |
intraMemRespQ.deq(); |
//$display( "TRACE Prediction: intraReceiveResp data %h",data);/////////////////////// |
if(noMoreResp == 0) |
intraRespCount <= intraRespCount+1; |
else |
begin |
intraRespCount <= 0; |
intraStepCount <= 1; |
blockNum <= 0; |
pixelNum <= 0; |
interOutBlockMvfifo.enq(NotInter 1); |
end |
$display( "Trace Prediction: intraReceiveResp"); |
endrule |
|
|
rule intraPredTypeStep ( intraStepCount==1 && !nextoutputfifo.notEmpty()); |
Bit#(2) blockHor = {blockNum[2],blockNum[0]}; |
Bit#(2) blockVer = {blockNum[3],blockNum[1]}; |
Bit#(4) topType = select(intra4x4typeTop, blockHor); |
Bit#(4) leftType; |
if(currMbHor!=0 || blockNum!=0) |
leftType = select(intra4x4typeLeft, blockVer); |
else |
begin |
leftType = 15; |
intra4x4typeLeft <= replicate(15); |
end |
if(intrastate!=Intra4x4) |
begin |
intraStepCount <= intraStepCount+1; |
nextoutputfifo.enq(NonSkipMB); |
end |
else |
begin |
Bit#(1) topAvailable; |
Bit#(1) leftAvailable; |
if(topType==15 || (topType==14 && ppsconstrained_intra_pred_flag==1)) |
topAvailable = 0; |
else |
topAvailable = 1; |
if(leftType==15 || (leftType==14 && ppsconstrained_intra_pred_flag==1)) |
leftAvailable = 0; |
else |
leftAvailable = 1; |
Bit#(4) predType = 0; |
Bit#(4) remType = rem_intra4x4_pred_mode.first(); |
Bit#(4) curType = 0; |
rem_intra4x4_pred_mode.deq(); |
if(topAvailable==0 || leftAvailable==0) |
predType = 2; |
else |
begin |
Bit#(4) topType2 = topType; |
Bit#(4) leftType2 = leftType; |
if(topType>8) |
topType2 = 2; |
if(leftType>8) |
leftType2 = 2; |
if(topType2 > leftType2) |
predType = leftType2; |
else |
predType = topType2; |
end |
if(remType[3] == 1) |
curType = predType; |
else if(remType < predType) |
curType = remType; |
else |
curType = remType+1; |
cur_intra4x4_pred_mode <= curType; |
intraStepCount <= intraStepCount+1; |
if(blockNum == 15) |
nextoutputfifo.enq(Intra4x4PlusChroma); |
else |
nextoutputfifo.enq(Intra4x4); |
$display( "TRACE Prediction: intraPredTypeStep currMbHor currMbVer blockNum topType leftType predType remType curType %0d %0d %0d %0d %0d %0d %0d %0d",currMbHor,currMbVer,blockNum,topType,leftType,predType,remType,curType);////////////////// |
end |
//$display( "Trace Prediction: intraPredTypeStep"); |
endrule |
|
|
rule intraProcessStep ( intraStepCount>1 ); |
$display( "TRACE Prediction: intraProcessStep %0d %0d", blockNum, pixelNum);//////////////////// |
//$display( "TRACE Prediction: intraProcessStep intraTopVal %h %h %h %h %h",intraTopVal[4],intraTopVal[3],intraTopVal[2],intraTopVal[1],intraTopVal[0]);///////////////// |
Bit#(1) outFlag = 0; |
Bit#(4) nextIntraStepCount = intraStepCount+1; |
Bit#(2) blockHor = {blockNum[2],blockNum[0]}; |
Bit#(2) blockVer = {blockNum[3],blockNum[1]}; |
Bit#(2) pixelVer = {pixelNum[3],pixelNum[2]}; |
Vector#(4,Bit#(8)) predVector = replicate(0); |
|
Bit#(4) topType = select(intra4x4typeTop, blockHor); |
Bit#(4) leftType = select(intra4x4typeLeft, blockVer); |
Bit#(1) topAvailable; |
Bit#(1) leftAvailable; |
if(topType==15 || (topType==14 && ppsconstrained_intra_pred_flag==1)) |
topAvailable = 0; |
else |
topAvailable = 1; |
if(leftType==15 || (leftType==14 && ppsconstrained_intra_pred_flag==1)) |
leftAvailable = 0; |
else |
leftAvailable = 1; |
if(blockNum==0 && pixelNum==0 && intraChromaFlag==0) |
begin |
intraChromaTopAvailable <= topAvailable; |
intraChromaLeftAvailable <= leftAvailable; |
end |
if(intrastate==Intra4x4 && intraChromaFlag==0) |
begin |
if(intraStepCount==2) |
begin |
outFlag = 1; |
Bit#(40) leftValSet = select(intraLeftVal,blockVer); |
Bit#(32) topMidValSet = select(intraTopVal,zeroExtend(blockHor)); |
Bit#(32) topRightValSet = select(intraTopVal,{1'b0,blockHor}+1); |
Bit#(72) topValSet; |
if((blockNum[3:2]==3 && blockNum[0]==1) || blockNum[1:0]==3) |
topValSet = {topMidValSet[31:24],topMidValSet[31:24],topMidValSet[31:24],topMidValSet[31:24],topMidValSet,leftValSet[7:0]}; |
else |
topValSet = {topRightValSet,topMidValSet,leftValSet[7:0]}; |
$display( "TRACE Prediction: intraProcessStep intra4x4 %0d %0d %h %h", cur_intra4x4_pred_mode, blockNum, leftValSet, topValSet);//////////////////// |
Bit#(4) topSelect1 = 0; |
Bit#(4) topSelect2 = 0; |
Bit#(4) topSelect3 = 0; |
Bit#(3) leftSelect1 = 0; |
Bit#(3) leftSelect2 = 0; |
Bit#(3) leftSelect3 = 0; |
Bit#(10) tempVal1 = 0; |
Bit#(10) tempVal2 = 0; |
Bit#(10) tempVal3 = 0; |
case(cur_intra4x4_pred_mode) |
0://vertical |
begin |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
begin |
topSelect1 = fromInteger(pixelHor); |
Bit#(8) topVal = intra4x4SelectTop(topValSet,topSelect1); |
predVector[pixelHor] = topVal; |
end |
end |
1://horizontal |
begin |
leftSelect1 = zeroExtend(pixelVer); |
Bit#(8) leftVal = intra4x4SelectLeft(leftValSet,leftSelect1); |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
predVector[pixelHor] = leftVal; |
end |
2://dc |
begin |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
begin |
Bit#(10) tempTopSum = zeroExtend(topValSet[15:8])+zeroExtend(topValSet[23:16])+zeroExtend(topValSet[31:24])+zeroExtend(topValSet[39:32]) + 2; |
Bit#(10) tempLeftSum = zeroExtend(leftValSet[15:8])+zeroExtend(leftValSet[23:16])+zeroExtend(leftValSet[31:24])+zeroExtend(leftValSet[39:32]) + 2; |
Bit#(11) tempTotalSum = zeroExtend(tempTopSum)+zeroExtend(tempLeftSum); |
Bit#(8) topSum = tempTopSum[9:2]; |
Bit#(8) leftSum = tempLeftSum[9:2]; |
Bit#(8) totalSum = tempTotalSum[10:3]; |
if(topAvailable==1 && leftAvailable==1) |
predVector[pixelHor] = totalSum; |
else if(topAvailable==1) |
predVector[pixelHor] = topSum; |
else if(leftAvailable==1) |
predVector[pixelHor] = leftSum; |
else |
predVector[pixelHor] = 8'b10000000; |
end |
end |
3://diagonal down left |
begin |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
begin |
Bit#(4) selectNum = fromInteger(pixelHor)+zeroExtend(pixelVer); |
if(pixelHor==3 && pixelVer==3) |
begin |
topSelect1 = 6; |
topSelect2 = 7; |
topSelect3 = 7; |
end |
else |
begin |
topSelect1 = selectNum; |
topSelect2 = selectNum+1; |
topSelect3 = selectNum+2; |
end |
tempVal1 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectTop(topValSet,topSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect3)); |
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2; |
predVector[pixelHor] = predVal[9:2]; |
end |
end |
4://diagonal down right |
begin |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
begin |
if(fromInteger(pixelHor) > pixelVer) |
begin |
topSelect3 = fromInteger(pixelHor)-zeroExtend(pixelVer); |
topSelect2 = topSelect3-1; |
topSelect1 = topSelect3-2; |
tempVal1 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectTop(topValSet,topSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect3)); |
end |
else if(fromInteger(pixelHor) < pixelVer) |
begin |
leftSelect3 = zeroExtend(pixelVer)-fromInteger(pixelHor); |
leftSelect2 = leftSelect3-1; |
leftSelect1 = leftSelect3-2; |
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect3)); |
end |
else |
begin |
leftSelect1 = 0; |
leftSelect2 = -1; |
topSelect1 = 0; |
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1)); |
end |
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2; |
predVector[pixelHor] = predVal[9:2]; |
end |
end |
5://vertical right |
begin |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
begin |
Bit#(4) tempPixelHor = fromInteger(pixelHor); |
Bit#(4) zVR = (tempPixelHor<<1)-zeroExtend(pixelVer); |
if(zVR<=6 && zVR>=0) |
begin |
topSelect3 = fromInteger(pixelHor)-zeroExtend(pixelVer>>1); |
topSelect2 = topSelect3-1; |
if(zVR==1 || zVR==3 || zVR==5) |
topSelect1 = topSelect3-2; |
else |
topSelect1 = topSelect3; |
tempVal1 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectTop(topValSet,topSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect3)); |
end |
else if(zVR==-1) |
begin |
leftSelect1 = 0; |
leftSelect2 = -1; |
topSelect1 = 0; |
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1)); |
end |
else |
begin |
leftSelect1 = zeroExtend(pixelVer)-1; |
leftSelect2 = leftSelect1-1; |
leftSelect3 = leftSelect1-2; |
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect3)); |
end |
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2; |
predVector[pixelHor] = predVal[9:2]; |
end |
end |
6://horizontal down |
begin |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
begin |
Bit#(4) tempPixelVer = zeroExtend(pixelVer); |
Bit#(4) zHD = (tempPixelVer<<1)-fromInteger(pixelHor); |
if(zHD<=6 && zHD>=0) |
begin |
leftSelect3 = zeroExtend(pixelVer)-fromInteger(pixelHor/2); |
leftSelect2 = leftSelect3-1; |
if(zHD==1 || zHD==3 || zHD==5) |
leftSelect1 = leftSelect3-2; |
else |
leftSelect1 = leftSelect3; |
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect3)); |
end |
else if(zHD==-1) |
begin |
leftSelect1 = 0; |
leftSelect2 = -1; |
topSelect1 = 0; |
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1)); |
end |
else |
begin |
topSelect1 = fromInteger(pixelHor)-1; |
topSelect2 = topSelect1-1; |
topSelect3 = topSelect1-2; |
tempVal1 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectTop(topValSet,topSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect3)); |
end |
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2; |
predVector[pixelHor] = predVal[9:2]; |
end |
end |
7://vertical left |
begin |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
begin |
topSelect1 = fromInteger(pixelHor)+zeroExtend(pixelVer>>1); |
topSelect2 = topSelect1+1; |
if(pixelVer==1 || pixelVer==3) |
topSelect3 = topSelect1+2; |
else |
topSelect3 = topSelect1; |
tempVal1 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectTop(topValSet,topSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect3)); |
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2; |
predVector[pixelHor] = predVal[9:2]; |
end |
end |
8://horizontal up |
begin |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
begin |
Bit#(4) tempPixelVer = zeroExtend(pixelVer); |
Bit#(4) zHU = (tempPixelVer<<1)+fromInteger(pixelHor); |
if(zHU<=4) |
begin |
leftSelect1 = zeroExtend(pixelVer)+fromInteger(pixelHor/2); |
leftSelect2 = leftSelect1+1; |
if(zHU==1 || zHU==3) |
leftSelect3 = leftSelect1+2; |
else |
leftSelect3 = leftSelect1; |
end |
else |
begin |
if(zHU==5) |
leftSelect1 = 2; |
else |
leftSelect1 = 3; |
leftSelect2 = 3; |
leftSelect3 = 3; |
end |
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect3)); |
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2; |
predVector[pixelHor] = predVal[9:2]; |
end |
end |
default: $display( "ERROR Prediction: intraProcessStep intra4x4 unknown cur_intra4x4_pred_mode"); |
endcase |
end |
else |
$display( "ERROR Prediction: intraProcessStep intra4x4 unknown intraStepCount"); |
end |
else if(intrastate==Intra16x16 && intraChromaFlag==0) |
begin |
//$display( "TRACE Prediction: intraProcessStep intra16x16 %0d %0d %0d %h", intra16x16_pred_mode, currMb, blockNum, select(intraTopVal,blockHor));///////////////// |
case(intra16x16_pred_mode) |
0://vertical |
begin |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
begin |
Bit#(32) topValSet = select(intraTopVal,blockHor); |
Bit#(8) topVal = select32to8(topValSet,fromInteger(pixelHor)); |
predVector[pixelHor] = topVal; |
end |
outFlag = 1; |
end |
1://horizontal |
begin |
Bit#(40) leftValSet = select(intraLeftVal,blockVer); |
Bit#(8) leftVal = intra4x4SelectLeft(leftValSet,zeroExtend(pixelVer)); |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
predVector[pixelHor] = leftVal; |
outFlag = 1; |
end |
2://dc |
begin |
case(intraStepCount) |
2: |
begin |
if(topAvailable == 1) |
begin |
Bit#(32) topValSet = select(intraTopVal,0); |
intraSumA <= zeroExtend(topValSet[7:0])+zeroExtend(topValSet[15:8])+zeroExtend(topValSet[23:16])+zeroExtend(topValSet[31:24]); |
end |
else |
begin |
intraSumA <= 0; |
nextIntraStepCount = 6; |
end |
end |
3: |
begin |
Bit#(32) topValSet = select(intraTopVal,1); |
intraSumA <= intraSumA+zeroExtend(topValSet[7:0])+zeroExtend(topValSet[15:8])+zeroExtend(topValSet[23:16])+zeroExtend(topValSet[31:24]); |
end |
4: |
begin |
Bit#(32) topValSet = select(intraTopVal,2); |
intraSumA <= intraSumA+zeroExtend(topValSet[7:0])+zeroExtend(topValSet[15:8])+zeroExtend(topValSet[23:16])+zeroExtend(topValSet[31:24]); |
end |
5: |
begin |
Bit#(32) topValSet = select(intraTopVal,3); |
intraSumA <= intraSumA+zeroExtend(topValSet[7:0])+zeroExtend(topValSet[15:8])+zeroExtend(topValSet[23:16])+zeroExtend(topValSet[31:24])+8; |
end |
6: |
begin |
if(leftAvailable == 1) |
begin |
Bit#(40) leftValSet = select(intraLeftVal,0); |
intraSumA <= intraSumA+zeroExtend(leftValSet[15:8])+zeroExtend(leftValSet[23:16])+zeroExtend(leftValSet[31:24])+zeroExtend(leftValSet[39:32]); |
end |
else |
nextIntraStepCount = 10; |
end |
7: |
begin |
Bit#(40) leftValSet = select(intraLeftVal,1); |
intraSumA <= intraSumA+zeroExtend(leftValSet[15:8])+zeroExtend(leftValSet[23:16])+zeroExtend(leftValSet[31:24])+zeroExtend(leftValSet[39:32]); |
end |
8: |
begin |
Bit#(40) leftValSet = select(intraLeftVal,2); |
intraSumA <= intraSumA+zeroExtend(leftValSet[15:8])+zeroExtend(leftValSet[23:16])+zeroExtend(leftValSet[31:24])+zeroExtend(leftValSet[39:32]); |
end |
9: |
begin |
Bit#(40) leftValSet = select(intraLeftVal,3); |
intraSumA <= intraSumA+zeroExtend(leftValSet[15:8])+zeroExtend(leftValSet[23:16])+zeroExtend(leftValSet[31:24])+zeroExtend(leftValSet[39:32])+8; |
end |
10: |
begin |
if(leftAvailable == 1 && topAvailable == 1) |
intraSumA <= intraSumA >> 5; |
else if(leftAvailable == 1 || topAvailable == 1) |
intraSumA <= intraSumA >> 4; |
else |
intraSumA <= 128; |
end |
11: |
begin |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
predVector[pixelHor] = intraSumA[7:0]; |
outFlag = 1; |
end |
default: $display( "ERROR Prediction: intraProcessStep intra16x16 DC unknown intraStepCount"); |
endcase |
end |
3://plane |
begin |
if(intraStepCount == 2) |
begin |
Bit#(32) topValSet = select(intraTopVal,3); |
Bit#(8) topVal = select32to8(topValSet,3); |
Bit#(40) leftValSet = select(intraLeftVal,3); |
Bit#(8) leftVal = intra4x4SelectLeft(leftValSet,3); |
Bit#(13) tempVal = zeroExtend(topVal) + zeroExtend(leftVal); |
intraSumA <= tempVal << 4; |
intraSumB <= 0; |
intraSumC <= 0; |
end |
else if(intraStepCount < 11) |
begin |
Bit#(4) xyPlusOne = intraStepCount-2; |
Bit#(4) xyPlusEight = intraStepCount+5; |
Bit#(4) sixMinusXY = 9-intraStepCount; |
Bit#(32) topValSet1 = select(intraTopVal,xyPlusEight[3:2]); |
Bit#(8) topVal1 = select32to8(topValSet1,xyPlusEight[1:0]); |
Bit#(40) leftValSet1 = select(intraLeftVal,xyPlusEight[3:2]); |
Bit#(8) leftVal1 = intra4x4SelectLeft(leftValSet1,zeroExtend(xyPlusEight[1:0])); |
Bit#(32) topValSet2=0; |
Bit#(8) topVal2; |
Bit#(40) leftValSet2; |
Bit#(8) leftVal2; |
if(intraStepCount==10) |
begin |
leftValSet2 = select(intraLeftVal,0); |
leftVal2 = intra4x4SelectLeft(leftValSet2,-1); |
topVal2 = leftVal2; |
end |
else |
begin |
topValSet2 = select(intraTopVal,sixMinusXY[3:2]); |
topVal2 = select32to8(topValSet2,sixMinusXY[1:0]); |
leftValSet2 = select(intraLeftVal,sixMinusXY[3:2]); |
leftVal2 = intra4x4SelectLeft(leftValSet2,zeroExtend(sixMinusXY[1:0])); |
end |
Bit#(15) diffH = zeroExtend(topVal1) - zeroExtend(topVal2); |
Bit#(15) diffV = zeroExtend(leftVal1) - zeroExtend(leftVal2); |
intraSumB <= intraSumB + (zeroExtend(xyPlusOne) * diffH); |
intraSumC <= intraSumC + (zeroExtend(xyPlusOne) * diffV); |
end |
else if(intraStepCount == 11) |
begin |
Bit#(18) tempSumB = (5*signExtend(intraSumB)) + 32; |
Bit#(18) tempSumC = (5*signExtend(intraSumC)) + 32; |
intraSumB <= signExtend(tempSumB[17:6]); |
intraSumC <= signExtend(tempSumC[17:6]); |
end |
else if(intraStepCount == 12) |
begin |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
begin |
Bit#(5) positionHor = {1'b0,blockHor,fromInteger(pixelHor)}; |
Bit#(5) positionVer = {1'b0,blockVer,pixelVer}; |
Bit#(16) tempProductB = signExtend(intraSumB) * signExtend(positionHor-7); |
Bit#(16) tempProductC = signExtend(intraSumC) * signExtend(positionVer-7); |
Bit#(16) tempTotal = tempProductB + tempProductC + zeroExtend(intraSumA) + 16; |
if(tempTotal[15]==1) |
predVector[pixelHor] = 0; |
else if(tempTotal[14:5] > 255) |
predVector[pixelHor] = 255; |
else |
predVector[pixelHor] = tempTotal[12:5]; |
end |
outFlag = 1; |
end |
else |
$display( "ERROR Prediction: intraProcessStep intra16x16 plane unknown intraStepCount"); |
end |
endcase |
end |
else if(intraChromaFlag==1) |
begin |
//$display( "TRACE Prediction: intraProcessStep intraChroma %0d %0d %0d %0d %0d %0d %h %h %h %h %h %h %h %h",intra_chroma_pred_mode.first(),intraChromaTopAvailable,intraChromaLeftAvailable,currMb,blockNum,pixelNum,pack(intraLeftValChroma0),pack(intraTopValChroma0),pack(intraLeftValChroma1),pack(intraTopValChroma1),intraLeftValChroma0[0],intraTopValChroma0[3][15:8],intraLeftValChroma1[0],intraTopValChroma1[3][15:8]);/////////////////// |
Vector#(9,Bit#(8)) tempLeftVec; |
Vector#(4,Bit#(16)) tempTopVec; |
if(blockNum[2] == 0) |
begin |
tempLeftVec = intraLeftValChroma0; |
tempTopVec = intraTopValChroma0; |
end |
else |
begin |
tempLeftVec = intraLeftValChroma1; |
tempTopVec = intraTopValChroma1; |
end |
case(intra_chroma_pred_mode.first()) |
0://dc |
begin |
if(intraStepCount == 2) |
begin |
Bit#(1) useTop=0; |
Bit#(1) useLeft=0; |
if(blockNum[1:0] == 0 || blockNum[1:0] == 3) |
begin |
useTop = intraChromaTopAvailable; |
useLeft = intraChromaLeftAvailable; |
end |
else if(blockNum[1:0] == 1) |
begin |
if(intraChromaTopAvailable == 1) |
useTop = 1; |
else if(intraChromaLeftAvailable == 1) |
useLeft = 1; |
end |
else if(blockNum[1:0] == 2) |
begin |
if(intraChromaLeftAvailable == 1) |
useLeft = 1; |
else if(intraChromaTopAvailable == 1) |
useTop = 1; |
end |
else |
$display( "ERROR Prediction: intraProcessStep intraChroma dc unknown blockNum"); |
Bit#(10) topSum; |
Bit#(10) leftSum; |
Bit#(11) totalSum; |
if(blockHor[0] == 0) |
topSum = zeroExtend(tempTopVec[0][15:8])+zeroExtend(tempTopVec[0][7:0])+zeroExtend(tempTopVec[1][15:8])+zeroExtend(tempTopVec[1][7:0])+2; |
else |
topSum = zeroExtend(tempTopVec[2][15:8])+zeroExtend(tempTopVec[2][7:0])+zeroExtend(tempTopVec[3][15:8])+zeroExtend(tempTopVec[3][7:0])+2; |
if(blockVer[0] == 0) |
leftSum = zeroExtend(tempLeftVec[1])+zeroExtend(tempLeftVec[2])+zeroExtend(tempLeftVec[3])+zeroExtend(tempLeftVec[4])+2; |
else |
leftSum = zeroExtend(tempLeftVec[5])+zeroExtend(tempLeftVec[6])+zeroExtend(tempLeftVec[7])+zeroExtend(tempLeftVec[8])+2; |
totalSum = zeroExtend(topSum) + zeroExtend(leftSum); |
if(useTop==1 && useLeft==1) |
intraSumA <= zeroExtend(totalSum[10:3]); |
else if(useTop==1) |
intraSumA <= zeroExtend(topSum[9:2]); |
else if(useLeft==1) |
intraSumA <= zeroExtend(leftSum[9:2]); |
else |
intraSumA <= zeroExtend(8'b10000000); |
end |
else if(intraStepCount == 3) |
begin |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
predVector[pixelHor] = intraSumA[7:0]; |
outFlag = 1; |
end |
else |
$display( "ERROR Prediction: intraProcessStep intraChroma dc unknown intraStepCount"); |
end |
1://horizontal |
begin |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
begin |
Bit#(4) tempLeftIdx = {1'b0,blockVer[0],pixelVer} + 1; |
predVector[pixelHor] = select(tempLeftVec,tempLeftIdx); |
end |
outFlag = 1; |
end |
2://vertical |
begin |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
begin |
Bit#(2) pixelHorTemp = fromInteger(pixelHor); |
Bit#(16) tempTopVal = select(tempTopVec,{blockHor[0],pixelHorTemp[1]}); |
if(pixelHorTemp[0] == 0) |
predVector[pixelHor] = tempTopVal[7:0]; |
else |
predVector[pixelHor] = tempTopVal[15:8]; |
end |
outFlag = 1; |
end |
3://plane |
begin |
if(intraStepCount == 2) |
begin |
Bit#(16) topValSet = tempTopVec[3]; |
Bit#(8) topVal = topValSet[15:8]; |
Bit#(8) leftVal = tempLeftVec[8]; |
Bit#(13) tempVal = zeroExtend(topVal) + zeroExtend(leftVal); |
intraSumA <= tempVal << 4; |
intraSumB <= 0; |
intraSumC <= 0; |
end |
else if(intraStepCount < 7) |
begin |
Bit#(3) xyPlusOne = truncate(intraStepCount)-2; |
Bit#(3) xyPlusFour = truncate(intraStepCount)+1; |
Bit#(4) twoMinusXY = 5-intraStepCount; |
Bit#(16) topValSet1 = select(tempTopVec,xyPlusFour[2:1]); |
Bit#(8) topVal1 = select16to8(topValSet1,xyPlusFour[0]); |
Bit#(4) tempLeftIdx1 = {1'b0,xyPlusFour} + 1; |
Bit#(8) leftVal1 = select(tempLeftVec,tempLeftIdx1); |
|
Bit#(16) topValSet2 = select(tempTopVec,twoMinusXY[2:1]); |
Bit#(8) topVal2; |
Bit#(8) leftVal2 = select(tempLeftVec,twoMinusXY+1); |
if(intraStepCount==6) |
topVal2 = leftVal2; |
else |
topVal2 = select16to8(topValSet2,twoMinusXY[0]); |
Bit#(15) diffH = zeroExtend(topVal1) - zeroExtend(topVal2); |
Bit#(15) diffV = zeroExtend(leftVal1) - zeroExtend(leftVal2); |
intraSumB <= intraSumB + (zeroExtend(xyPlusOne) * diffH); |
intraSumC <= intraSumC + (zeroExtend(xyPlusOne) * diffV); |
Int#(15) tempDisplayH = unpack(zeroExtend(xyPlusOne) * diffH); |
Int#(15) tempDisplayV = unpack(zeroExtend(xyPlusOne) * diffV); |
//$display( "TRACE Prediction: intraProcessStep intraChroma plane partH partV %0d %0d",tempDisplayH,tempDisplayV);//////////////////// |
end |
else if(intraStepCount == 7) |
begin |
Int#(15) tempDisplayH = unpack(intraSumB); |
Int#(15) tempDisplayV = unpack(intraSumC); |
//$display( "TRACE Prediction: intraProcessStep intraChroma plane H V %0d %0d",tempDisplayH,tempDisplayV);//////////////////// |
Bit#(19) tempSumB = (34*signExtend(intraSumB)) + 32; |
Bit#(19) tempSumC = (34*signExtend(intraSumC)) + 32; |
intraSumB <= signExtend(tempSumB[18:6]); |
intraSumC <= signExtend(tempSumC[18:6]); |
end |
else if(intraStepCount == 8) |
begin |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
begin |
Bit#(4) positionHor = {1'b0,blockHor[0],fromInteger(pixelHor)}; |
Bit#(4) positionVer = {1'b0,blockVer[0],pixelVer}; |
Bit#(17) tempProductB = signExtend(intraSumB) * signExtend(positionHor-3); |
Bit#(17) tempProductC = signExtend(intraSumC) * signExtend(positionVer-3); |
Bit#(17) tempTotal = tempProductB + tempProductC + zeroExtend(intraSumA) + 16; |
if(tempTotal[16]==1) |
predVector[pixelHor] = 0; |
else if(tempTotal[15:5] > 255) |
predVector[pixelHor] = 255; |
else |
predVector[pixelHor] = tempTotal[12:5]; |
end |
outFlag = 1; |
end |
else |
$display( "ERROR Prediction: intraProcessStep intraChroma plane unknown intraStepCount"); |
end |
endcase |
end |
else |
$display( "ERROR Prediction: intraProcessStep unknown intrastate"); |
|
if(outFlag==1) |
begin |
predictedfifo.enq(predVector); |
pixelNum <= pixelNum+4; |
if(pixelNum == 12) |
begin |
if(intraChromaFlag==0) |
begin |
blockNum <= blockNum+1; |
if(blockNum == 15) |
begin |
intraChromaFlag <= 1; |
intraStepCount <= 2; |
end |
else if(intrastate==Intra4x4) |
intraStepCount <= 1; |
end |
else |
begin |
if(blockNum == 7) |
begin |
blockNum <= 0; |
intraChromaFlag <= 0; |
intraStepCount <= 0; |
intra_chroma_pred_mode.deq(); |
end |
else |
begin |
blockNum <= blockNum+1; |
if(intra_chroma_pred_mode.first()==0) |
intraStepCount <= 2; |
else if(blockNum==3) |
intraStepCount <= 2; |
end |
end |
end |
end |
else |
intraStepCount <= nextIntraStepCount; |
//$display( "Trace Prediction: intraProcessStep"); |
endrule |
|
|
|
interface Client mem_client_intra; |
interface Get request = fifoToGet(intraMemReqQ); |
interface Put response = fifoToPut(intraMemRespQ); |
endinterface |
interface Client mem_client_inter; |
interface Get request = fifoToGet(interMemReqQ); |
interface Put response = fifoToPut(interMemRespQ); |
endinterface |
interface Client mem_client_buffer = interpolator.mem_client; |
|
interface Put ioin = fifoToPut(infifo); |
interface Put ioin_InverseTrans = fifoToPut(infifo_ITB); |
interface Get ioout = fifoToGet(outfifo); |
|
|
endmodule |
|
endpackage |
/trunk/src/ICalc_nC.bsv
0,0 → 1,28
//********************************************************************** |
// Interface for nC Calculator |
//---------------------------------------------------------------------- |
// |
// |
// |
|
package ICalc_nC; |
|
import H264Types::*; |
import GetPut::*; |
import ClientServer::*; |
|
interface Calc_nC; |
method Action initialize_picWidth( Bit#(PicWidthSz) picWidthInMb ); |
method Action initialize( Bit#(PicAreaSz) firstMbAddr ); |
method Action loadMb( Bit#(PicAreaSz) mbAddr ); |
method Bit#(5) nCcalc_luma( Bit#(4) microBlockNum ); |
method Bit#(5) nCcalc_chroma( Bit#(3) microBlockNum ); |
method Action nNupdate_luma( Bit#(4) microBlockNum, Bit#(5) updataVal ); |
method Action nNupdate_chroma( Bit#(3) microBlockNum, Bit#(5) updataVal ); |
method Action nNupdate_pskip( Bit#(PicAreaSz) mb_skip_run ); |
method Action nNupdate_ipcm(); |
interface Client#(MemReq#(TAdd#(PicWidthSz,1),20),MemResp#(20)) mem_client; |
endinterface |
|
endpackage |
|
/trunk/src/IH264.bsv
0,0 → 1,31
//********************************************************************** |
// Interface for H264 Main Module |
//---------------------------------------------------------------------- |
// |
// |
// |
|
package IH264; |
|
import H264Types::*; |
import GetPut::*; |
import ClientServer::*; |
|
interface IH264; |
|
// Interface for memory, input generator |
interface Put#(InputGenOT) ioin; |
interface Client#(MemReq#(TAdd#(PicWidthSz,1),20),MemResp#(20)) mem_clientED; |
interface Client#(MemReq#(TAdd#(PicWidthSz,2),68),MemResp#(68)) mem_clientP_intra; |
interface Client#(MemReq#(TAdd#(PicWidthSz,2),32),MemResp#(32)) mem_clientP_inter; |
interface Client#(MemReq#(PicWidthSz,13),MemResp#(13)) mem_clientD_parameter; |
interface Client#(MemReq#(TAdd#(PicWidthSz,5),32),MemResp#(32)) mem_clientD_data; |
interface Client#(FrameBufferLoadReq,FrameBufferLoadResp) buffer_client_load1; |
interface Client#(FrameBufferLoadReq,FrameBufferLoadResp) buffer_client_load2; |
interface Get#(FrameBufferStoreReq) buffer_client_store; |
interface Get#(BufferControlOT) ioout; |
|
endinterface |
|
endpackage |
|
/trunk/src/IInverseTrans.bsv
0,0 → 1,23
//********************************************************************** |
// Interface for Inverse Quantizer and Inverse Transformer |
//---------------------------------------------------------------------- |
// |
// |
// |
|
package IInverseTrans; |
|
import H264Types::*; |
import GetPut::*; |
import ClientServer::*; |
|
interface IInverseTrans; |
|
// Interface for inter-module io |
interface Put#(EntropyDecOT_InverseTrans) ioin; |
interface Get#(InverseTransOT) ioout; |
|
endinterface |
|
endpackage |
|
/trunk/src/IFrameBuffer.bsv
0,0 → 1,23
//********************************************************************** |
// Interface for Frame Buffer |
//---------------------------------------------------------------------- |
// |
// |
// |
|
package IFrameBuffer; |
|
import H264Types::*; |
import ClientServer::*; |
import GetPut::*; |
|
interface IFrameBuffer; |
|
// Interface from processor to cache |
interface Server#(FrameBufferLoadReq,FrameBufferLoadResp) server_load1; |
interface Server#(FrameBufferLoadReq,FrameBufferLoadResp) server_load2; |
interface Put#(FrameBufferStoreReq) server_store; |
|
endinterface |
|
endpackage |
/trunk/src/mkInputGen_park.bsv
0,0 → 1,41
//********************************************************************** |
// Input Generator implementation |
//---------------------------------------------------------------------- |
// |
// |
|
package mkInputGen; |
|
import H264Types::*; |
import IInputGen::*; |
import RegFile::*; |
import FIFO::*; |
|
import Connectable::*; |
import GetPut::*; |
|
|
module mkInputGen( IInputGen ); |
|
RegFile#(Bit#(27), Bit#(8)) rfile <- mkRegFileLoad("720p50_parkrun_ter1-5.hex", 0, 1023205); |
|
FIFO#(InputGenOT) outfifo <- mkFIFO; |
Reg#(Bit#(27)) index <- mkReg(0); |
|
rule output_byte (index < 1023206); |
//$display( "ccl0inputbyte %x", rfile.sub(index) ); |
outfifo.enq(DataByte rfile.sub(index)); |
index <= index+1; |
endrule |
|
rule end_of_file (index == 1023206); |
//$finish(0); |
outfifo.enq(EndOfFile); |
endrule |
|
interface Get ioout = fifoToGet(outfifo); |
|
endmodule |
|
|
endpackage |
/trunk/src/mkDeblockFilter.bsv
0,0 → 1,786
//********************************************************************** |
// Deblocking Filter |
//---------------------------------------------------------------------- |
// |
// |
|
package mkDeblockFilter; |
|
import H264Types::*; |
|
import IDeblockFilter::*; |
import FIFO::*; |
import Vector::*; |
|
import Connectable::*; |
import GetPut::*; |
import ClientServer::*; |
|
|
|
|
//----------------------------------------------------------- |
// Local Datatypes |
//----------------------------------------------------------- |
|
|
typedef union tagged |
{ |
void Passing; //not working on anything in particular |
void Initialize; |
void Horizontal; |
void Vertical; |
void Cleanup; |
} |
Process deriving(Eq,Bits); |
|
|
|
//----------------------------------------------------------- |
// Helper functions |
|
|
function Bit#(8) absdiff8(Bit#(8) in0, Bit#(8) in1); |
return (in1>=in0 ? in1-in0 : in0-in1); |
endfunction |
|
|
function Bool filter_test(Bit#(32) in_pixels, Bit#(8) alpha, Bit#(5) beta); |
Bit#(8) p1 = in_pixels[7:0]; |
Bit#(8) p0 = in_pixels[15:8]; |
Bit#(8) q0 = in_pixels[23:16]; |
Bit#(8) q1 = in_pixels[31:24]; |
return((absdiff8(p0,q0) < alpha) && |
(absdiff8(p0,p1) < zeroExtend(beta)) && |
(absdiff8(q0,q1) < zeroExtend(beta))); |
endfunction |
|
|
function Bit#(6) clip3symmetric9to6(Bit#(9) val, Bit#(5) bound); |
Int#(9) intval = unpack(val); |
Int#(6) intbound = unpack({1'b0,bound}); |
Int#(6) intout = (intval<signExtend(-intbound) ? -intbound : (intval>signExtend(intbound) ? intbound : truncate(intval))); |
return pack(intout); |
endfunction |
|
|
function Bit#(64) filter_input(Bit#(64) in_pixels, Bool chroma_flag, Bit#(3) bs, Bit#(8) alpha, Bit#(5) beta, Vector#(3,Bit#(5)) tc0_vector); |
Bit#(8) p[4]; |
Bit#(8) q[4]; |
p[3] = in_pixels[7:0]; |
p[2] = in_pixels[15:8]; |
p[1] = in_pixels[23:16]; |
p[0] = in_pixels[31:24]; |
q[0] = in_pixels[39:32]; |
q[1] = in_pixels[47:40]; |
q[2] = in_pixels[55:48]; |
q[3] = in_pixels[63:56]; |
Bit#(8) p_out[4]; |
Bit#(8) q_out[4]; |
Bool a_p_test = absdiff8(p[2],p[0]) < zeroExtend(beta); |
Bool a_q_test = absdiff8(q[2],q[0]) < zeroExtend(beta); |
Bit#(9) p0q0 = zeroExtend(p[0])+zeroExtend(q[0]); |
if (bs == 4) |
begin |
Bool small_gap_test = absdiff8(p[0],q[0]) < (alpha >> 2)+2; |
Bit#(11) p_outtemp[3]; |
Bit#(11) q_outtemp[3]; |
if (!chroma_flag && a_p_test && small_gap_test) |
begin |
Bit#(11) sum = zeroExtend(p[1])+zeroExtend(p0q0); |
p_outtemp[0] = (zeroExtend(p[2]) + (sum<<1) + zeroExtend(q[1]) + 4) >> 3; |
p_outtemp[1] = (zeroExtend(p[2]) + sum + 2) >> 2; |
p_outtemp[2] = (((zeroExtend(p[3])+zeroExtend(p[2]))<<1) + zeroExtend(p[2]) + sum + 4) >> 3; |
end |
else |
begin |
p_outtemp[0] = ((zeroExtend(p[1])<<1) + zeroExtend(p[0]) + zeroExtend(q[1]) + 2) >> 2; |
p_outtemp[1] = zeroExtend(p[1]); |
p_outtemp[2] = zeroExtend(p[2]); |
end |
if (!chroma_flag && a_q_test && small_gap_test) |
begin |
Bit#(11) sum = zeroExtend(q[1])+zeroExtend(p0q0); |
q_outtemp[0] = (zeroExtend(p[1]) + (sum<<1) + zeroExtend(q[2]) + 4) >> 3; |
q_outtemp[1] = (zeroExtend(q[2]) + sum + 2) >> 2; |
q_outtemp[2] = (((zeroExtend(q[3])+zeroExtend(q[2]))<<1) + zeroExtend(q[2]) + sum + 4) >> 3; |
end |
else |
begin |
q_outtemp[0] = ((zeroExtend(q[1])<<1) + zeroExtend(q[0]) + zeroExtend(p[1]) + 2) >> 2; |
q_outtemp[1] = zeroExtend(q[1]); |
q_outtemp[2] = zeroExtend(q[2]); |
end |
p_out[0] = truncate(p_outtemp[0]); |
p_out[1] = truncate(p_outtemp[1]); |
p_out[2] = truncate(p_outtemp[2]); |
q_out[0] = truncate(q_outtemp[0]); |
q_out[1] = truncate(q_outtemp[1]); |
q_out[2] = truncate(q_outtemp[2]); |
end |
else if(bs > 0) |
begin |
Bit#(5) t_c0 = tc0_vector[bs-1]; |
Bit#(5) t_c = chroma_flag ? t_c0+1 : t_c0 + (a_p_test ? 1:0) + (a_q_test ? 1:0); |
Bit#(12) deltatemp = (((zeroExtend(q[0])-zeroExtend(p[0]))<<2)+zeroExtend(p[1])-zeroExtend(q[1])+4); |
Bit#(6) delta = clip3symmetric9to6(deltatemp[11:3],t_c); |
|
Bit#(10) p_out0temp = zeroExtend(p[0]) + signExtend(delta); |
p_out[0] = (p_out0temp[9]==1 ? 0 : (p_out0temp[8]==1 ? 255 : p_out0temp[7:0])); |
Bit#(10) q_out0temp = zeroExtend(q[0]) - signExtend(delta); |
q_out[0] = (q_out0temp[9]==1 ? 0 : (q_out0temp[8]==1 ? 255 : q_out0temp[7:0])); |
|
Bit#(9) p0q0PLUS1 = p0q0+1; |
Bit#(8) p0q0_av = p0q0PLUS1[8:1]; |
if (!chroma_flag && a_p_test) |
begin |
Bit#(10) p_out1temp = zeroExtend(p[2]) + zeroExtend(p0q0_av) - (zeroExtend(p[1])<<1); |
p_out[1] = p[1]+signExtend(clip3symmetric9to6(p_out1temp[9:1],t_c0)); |
end |
else |
p_out[1] = p[1]; |
|
if (!chroma_flag && a_q_test) |
begin |
Bit#(10) q_out1temp = zeroExtend(q[2]) + zeroExtend(p0q0_av) - (zeroExtend(q[1])<<1); |
q_out[1] = q[1]+signExtend(clip3symmetric9to6(q_out1temp[9:1],t_c0)); |
end |
else |
q_out[1] = q[1]; |
|
p_out[2] = p[2]; |
q_out[2] = q[2]; |
end |
else |
begin |
p_out[0] = p[0]; |
q_out[0] = q[0]; |
p_out[1] = p[1]; |
q_out[1] = q[1]; |
p_out[2] = p[2]; |
q_out[2] = q[2]; |
end |
p_out[3] = p[3]; |
q_out[3] = q[3]; |
return({q_out[3], q_out[2], q_out[1], q_out[0], p_out[0], p_out[1], p_out[2], p_out[3]}); |
endfunction |
|
|
|
//----------------------------------------------------------- |
// Deblocking Filter Module |
//----------------------------------------------------------- |
|
|
(* synthesize *) |
module mkDeblockFilter( IDeblockFilter ); |
|
FIFO#(EntropyDecOT) infifo <- mkSizedFIFO(deblockFilter_infifo_size); |
FIFO#(DeblockFilterOT) outfifo <- mkFIFO(); |
|
FIFO#(MemReq#(TAdd#(PicWidthSz,5),32)) dataMemReqQ <- mkFIFO; |
FIFO#(MemReq#(PicWidthSz,13)) parameterMemReqQ <- mkFIFO; |
FIFO#(MemResp#(32)) dataMemRespQ <- mkFIFO; |
FIFO#(MemResp#(13)) parameterMemRespQ <- mkFIFO; |
|
Reg#(Process) process <- mkReg(Passing); |
Reg#(Bit#(1)) chromaFlag <- mkReg(0); |
Reg#(Bit#(5)) dataReqCount <- mkReg(0); |
Reg#(Bit#(5)) dataRespCount <- mkReg(0); |
Reg#(Bit#(4)) blockNum <- mkReg(0); |
Reg#(Bit#(4)) pixelNum <- mkReg(0); |
|
Reg#(Bool) filterTopMbEdgeFlag <- mkReg(False); |
Reg#(Bool) filterLeftMbEdgeFlag <- mkReg(False); |
Reg#(Bool) filterInternalEdgesFlag <- mkReg(False); |
|
Reg#(Bit#(PicWidthSz)) picWidth <- mkReg(maxPicWidthInMB); |
Reg#(Bit#(PicHeightSz)) picHeight <- mkReg(0); |
Reg#(Bit#(PicAreaSz)) firstMb <- mkReg(0); |
Reg#(Bit#(PicAreaSz)) currMb <- mkReg(0); |
Reg#(Bit#(PicAreaSz)) currMbHor <- mkReg(0);//horizontal position of currMb |
Reg#(Bit#(PicHeightSz)) currMbVer <- mkReg(0);//vertical position of currMb |
|
Reg#(Bit#(2)) disable_deblocking_filter_idc <- mkReg(0); |
Reg#(Bit#(5)) slice_alpha_c0_offset <- mkReg(0); |
Reg#(Bit#(5)) slice_beta_offset <- mkReg(0); |
|
Reg#(Bit#(6)) curr_qpy <- mkReg(0); |
Reg#(Bit#(6)) left_qpy <- mkReg(0); |
Reg#(Bit#(6)) top_qpy <- mkReg(0); |
Reg#(Bit#(6)) curr_qpc <- mkReg(0); |
Reg#(Bit#(6)) left_qpc <- mkReg(0); |
Reg#(Bit#(6)) top_qpc <- mkReg(0); |
Reg#(Bit#(1)) curr_intra <- mkReg(0); |
Reg#(Bit#(1)) left_intra <- mkReg(0); |
Reg#(Bit#(1)) top_intra <- mkReg(0); |
|
Reg#(Bit#(8)) alphaMbEdge <- mkReg(0); |
Reg#(Bit#(8)) alphaInternal <- mkReg(0); |
Reg#(Bit#(5)) betaMbEdge <- mkReg(0); |
Reg#(Bit#(5)) betaInternal <- mkReg(0); |
Reg#(Vector#(3,Bit#(5))) tc0MbEdge <- mkRegU(); |
Reg#(Vector#(3,Bit#(5))) tc0Internal <- mkRegU(); |
|
Bit#(8) alpha_table[52] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
0, 0, 0, 0, 0, 0, 4, 4, 5, 6, |
7, 8, 9, 10, 12, 13, 15, 17, 20, 22, |
25, 28, 32, 36, 40, 45, 50, 56, 63, 71, |
80, 90,101,113,127,144,162,182,203,226, |
255,255}; |
Bit#(5) beta_table[52] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
0, 0, 0, 0, 0, 0, 2, 2, 2, 3, |
3, 3, 3, 4, 4, 4, 6, 6, 7, 7, |
8, 8, 9, 9, 10, 10, 11, 11, 12, 12, |
13, 13, 14, 14, 15, 15, 16, 16, 17, 17, |
18, 18}; |
Bit#(5) tc0_table[52][3] = {{ 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, |
{ 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, |
{ 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 1 }, |
{ 0, 0, 1 }, { 0, 0, 1 }, { 0, 0, 1 }, { 0, 1, 1 }, { 0, 1, 1 }, { 1, 1, 1 }, |
{ 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 2 }, { 1, 1, 2 }, { 1, 1, 2 }, |
{ 1, 1, 2 }, { 1, 2, 3 }, { 1, 2, 3 }, { 2, 2, 3 }, { 2, 2, 4 }, { 2, 3, 4 }, |
{ 2, 3, 4 }, { 3, 3, 5 }, { 3, 4, 6 }, { 3, 4, 6 }, { 4, 5, 7 }, { 4, 5, 8 }, |
{ 4, 6, 9 }, { 5, 7,10 }, { 6, 8,11 }, { 6, 8,13 }, { 7,10,14 }, { 8,11,16 }, |
{ 9,12,18 }, {10,13,20 }, {11,15,23 }, {13,17,25 }}; |
|
Reg#(Vector#(64,Bit#(32))) workVector <- mkRegU(); |
Reg#(Vector#(96,Bit#(32))) leftVector <- mkRegU(); |
Reg#(Vector#(16,Bit#(32))) topVector <- mkRegU(); |
|
Reg#(Bool) startLastOutput <- mkReg(False); |
Reg#(Bool) outputingFinished <- mkReg(False); |
Reg#(Bit#(2)) colNum <- mkReg(0); |
Reg#(Bit#(2)) rowNum <- mkReg(0); |
|
RFile1#(Bit#(4),Tuple2#(Bit#(3),Bit#(3))) bSfile <- mkRFile1Full(); |
|
|
//----------------------------------------------------------- |
// Rules |
|
|
rule checkFIFO ( True ); |
$display( "Trace DeblockFilter: checkFIFO %h", infifo.first() ); |
endrule |
|
|
rule passing ( process matches Passing ); |
case (infifo.first()) matches |
tagged NewUnit . xdata : |
begin |
infifo.deq(); |
outfifo.enq(EDOT infifo.first()); |
$display("ccl5newunit"); |
$display("ccl5rbspbyte %h", xdata); |
end |
tagged SPSpic_width_in_mbs .xdata : |
begin |
infifo.deq(); |
outfifo.enq(EDOT infifo.first()); |
picWidth <= xdata; |
end |
tagged SPSpic_height_in_map_units .xdata : |
begin |
infifo.deq(); |
outfifo.enq(EDOT infifo.first()); |
picHeight <= xdata; |
end |
tagged PPSdeblocking_filter_control_present_flag .xdata : |
begin |
infifo.deq(); |
if (xdata == 0) |
begin |
disable_deblocking_filter_idc <= 0; |
slice_alpha_c0_offset <= 0; |
slice_beta_offset <= 0; |
end |
end |
tagged SHfirst_mb_in_slice .xdata : |
begin |
infifo.deq(); |
outfifo.enq(EDOT infifo.first()); |
firstMb <= xdata; |
currMb <= xdata; |
currMbHor <= xdata; |
currMbVer <= 0; |
end |
tagged SHdisable_deblocking_filter_idc .xdata : |
begin |
infifo.deq(); |
disable_deblocking_filter_idc <= xdata; |
end |
tagged SHslice_alpha_c0_offset .xdata : |
begin |
infifo.deq(); |
slice_alpha_c0_offset <= xdata; |
end |
tagged SHslice_beta_offset .xdata : |
begin |
infifo.deq(); |
slice_beta_offset <= xdata; |
end |
tagged IBTmb_qp .xdata : |
begin |
infifo.deq(); |
curr_qpy <= xdata.qpy; |
curr_qpc <= xdata.qpc; |
end |
tagged PBbS .xdata : |
begin |
process <= Initialize; |
end |
tagged PBoutput .xdata : |
begin |
$display( "ERROR Deblocking Filter: passing PBoutput"); |
end |
tagged EndOfFile : |
begin |
infifo.deq(); |
outfifo.enq(EDOT infifo.first()); |
$display( "ccl5: EndOfFile reached"); |
//$finish(0); |
end |
default: |
begin |
infifo.deq(); |
outfifo.enq(EDOT infifo.first()); |
end |
endcase |
endrule |
|
|
rule currMbHorUpdate( !(currMbHor<zeroExtend(picWidth)) ); |
Bit#(PicAreaSz) temp = zeroExtend(picWidth); |
if((currMbHor >> 3) >= temp) |
begin |
currMbHor <= currMbHor - (temp << 3); |
currMbVer <= currMbVer + 8; |
end |
else |
begin |
currMbHor <= currMbHor - temp; |
currMbVer <= currMbVer + 1; |
end |
endrule |
|
|
rule initialize ( process==Initialize && currMbHor<zeroExtend(picWidth) ); |
//$display( "TRACE Deblocking Filter: initialize %0d", currMb); |
process <= Horizontal; |
dataReqCount <= 1; |
dataRespCount <= 1; |
filterTopMbEdgeFlag <= !(currMb<zeroExtend(picWidth) || disable_deblocking_filter_idc==1 || (disable_deblocking_filter_idc==2 && currMb-firstMb<zeroExtend(picWidth))); |
filterLeftMbEdgeFlag <= !(currMbHor==0 || disable_deblocking_filter_idc==1 || (disable_deblocking_filter_idc==2 && currMb==firstMb)); |
filterInternalEdgesFlag <= !(disable_deblocking_filter_idc==1); |
blockNum <= 0; |
pixelNum <= 0; |
Bit#(6) curr_qp = (chromaFlag==0 ? curr_qpy : curr_qpc); |
Bit#(6) left_qp = (chromaFlag==0 ? left_qpy : left_qpc); |
Bit#(7) qpavtemp = zeroExtend(curr_qp)+zeroExtend(left_qp)+1; |
Bit#(6) qpav = qpavtemp[6:1]; |
Bit#(8) indexAtemp = zeroExtend(qpav)+signExtend(slice_alpha_c0_offset); |
Bit#(8) indexBtemp = zeroExtend(qpav)+signExtend(slice_beta_offset); |
Bit#(6) indexA = (indexAtemp[7]==1 ? 0 : (indexAtemp[6:0]>51 ? 51 : indexAtemp[5:0])); |
Bit#(6) indexB = (indexBtemp[7]==1 ? 0 : (indexBtemp[6:0]>51 ? 51 : indexBtemp[5:0])); |
alphaMbEdge <= alpha_table[indexA]; |
betaMbEdge <= beta_table[indexB]; |
Vector#(3,Bit#(5)) tc0temp = arrayToVector(tc0_table[indexA]); |
tc0MbEdge <= tc0temp; |
endrule |
|
|
rule dataSendReq ( dataReqCount>0 && currMbHor<zeroExtend(picWidth) ); |
//$display( "TRACE Deblocking Filter: dataSendReq %0d", dataReqCount); |
Bit#(PicWidthSz) temp = truncate(currMbHor); |
if(currMb<zeroExtend(picWidth)) |
dataReqCount <= 0; |
else |
begin |
if(dataReqCount==1) |
parameterMemReqQ.enq(LoadReq temp); |
Bit#(4) temp2 = truncate(dataReqCount-1); |
let temp3 = {temp,chromaFlag,temp2}; |
dataMemReqQ.enq(LoadReq temp3); |
if(dataReqCount==16) |
dataReqCount <= 0; |
else |
dataReqCount <= dataReqCount+1; |
end |
endrule |
|
|
rule dataReceiveNoResp ( dataRespCount>0 && currMb<zeroExtend(picWidth) && currMb-firstMb<zeroExtend(picWidth) ); |
//$display( "TRACE Deblocking Filter: dataReceiveNoResp"); |
dataRespCount <= 0; |
endrule |
|
|
rule dataReceiveResp ( dataRespCount>0 && !(currMb<zeroExtend(picWidth)) && currMbHor<zeroExtend(picWidth) ); |
//$display( "TRACE Deblocking Filter: dataReceiveResp %0d", dataRespCount); |
Bit#(4) temp = truncate(dataRespCount-1); |
Vector#(16,Bit#(32)) topVectorNext = topVector; |
if(dataRespCount==1) |
begin |
Bit#(13) tempParameters=0; |
if(parameterMemRespQ.first() matches tagged LoadResp .xdata) |
tempParameters = xdata; |
top_qpy <= tempParameters[5:0]; |
top_qpc <= tempParameters[11:6]; |
top_intra <= tempParameters[12]; |
parameterMemRespQ.deq(); |
end |
if(dataRespCount==16) |
dataRespCount <= 0; |
else |
dataRespCount <= dataRespCount+1; |
if(dataMemRespQ.first() matches tagged LoadResp .xdata) |
topVectorNext[temp] = xdata; |
dataMemRespQ.deq(); |
topVector <= topVectorNext; |
//$display( "TRACE Deblocking Filter: dataReceiveResp topVector %h %h %h %h %h %h %h %h %h %h %h %h %h %h %h %h", topVector[0], topVector[1], topVector[2], topVector[3], topVector[4], topVector[5], topVector[6], topVector[7], topVector[8], topVector[9], topVector[10], topVector[11], topVector[12], topVector[13], topVector[14], topVector[15]); |
endrule |
|
|
rule horizontal ( process==Horizontal && currMbHor<zeroExtend(picWidth) ); |
//$display( "TRACE Deblocking Filter: horizontal %0d %0d %0d", blockNum, pixelNum, infifo.first()); |
Bit#(2) blockHor = {blockNum[2],blockNum[0]}; |
Bit#(2) blockVer = {blockNum[3],blockNum[1]}; |
Bit#(2) pixelVer = {pixelNum[3],pixelNum[2]}; |
Vector#(96,Bit#(32)) leftVectorNext = leftVector; |
Vector#(64,Bit#(32)) workVectorNext = workVector; |
Bool leftEdge = (blockNum[0]==0 && (blockNum[2]==0 || chromaFlag==1)); |
if(blockNum==0 && pixelNum==0) |
begin |
Bit#(6) qpav = (chromaFlag==0 ? curr_qpy : curr_qpc); |
Bit#(8) indexAtemp = zeroExtend(qpav)+signExtend(slice_alpha_c0_offset); |
Bit#(8) indexBtemp = zeroExtend(qpav)+signExtend(slice_beta_offset); |
Bit#(6) indexA = (indexAtemp[7]==1 ? 0 : (indexAtemp[6:0]>51 ? 51 : indexAtemp[5:0])); |
Bit#(6) indexB = (indexBtemp[7]==1 ? 0 : (indexBtemp[6:0]>51 ? 51 : indexBtemp[5:0])); |
alphaInternal <= alpha_table[indexA]; |
betaInternal <= beta_table[indexB]; |
Vector#(3,Bit#(5)) tc0temp = arrayToVector(tc0_table[indexA]); |
tc0Internal <= tc0temp; |
end |
case (infifo.first()) matches |
tagged PBbS .xdata : |
begin |
infifo.deq(); |
bSfile.upd(blockNum,tuple2(xdata.bShor,xdata.bSver)); |
end |
tagged PBoutput .xdata : |
begin |
infifo.deq(); |
Bit#(6) addrq = {blockHor,blockVer,pixelVer}; |
Bit#(7) addrpLeft = (chromaFlag==0 ? {3'b011,blockVer,pixelVer} : {2'b10,blockHor[1],1'b1,blockVer[0],pixelVer}); |
Bit#(6) addrpCurr = {(blockHor-1),blockVer,pixelVer}; |
Bit#(32) pixelq = {xdata[3],xdata[2],xdata[1],xdata[0]}; |
Bit#(32) pixelp; |
if(leftEdge) |
pixelp = leftVector[addrpLeft]; |
else |
pixelp = workVector[addrpCurr]; |
Bit#(64) result = {pixelq,pixelp}; |
if(leftEdge && filterLeftMbEdgeFlag) |
begin |
if(filter_test({pixelq[15:0],pixelp[31:16]},alphaMbEdge,betaMbEdge)) |
result = filter_input({pixelq,pixelp},chromaFlag==1,tpl_1(bSfile.sub((chromaFlag==0?blockNum:{blockNum[1:0],pixelVer[1],1'b0}))),alphaMbEdge,betaMbEdge,tc0MbEdge); |
end |
else if(!leftEdge && filterInternalEdgesFlag) |
begin |
if(filter_test({pixelq[15:0],pixelp[31:16]},alphaInternal,betaInternal)) |
result = filter_input({pixelq,pixelp},chromaFlag==1,tpl_1(bSfile.sub((chromaFlag==0?blockNum:{blockNum[1:0],pixelVer[1],1'b0}))),alphaInternal,betaInternal,tc0Internal); |
end |
if(leftEdge) |
leftVectorNext[addrpLeft] = result[31:0]; |
else |
workVectorNext[addrpCurr] = result[31:0]; |
workVectorNext[addrq] = result[63:32]; |
leftVector <= leftVectorNext; |
workVector <= workVectorNext; |
if(pixelNum==12 && (blockNum==15 || (blockNum==7 && chromaFlag==1))) |
begin |
blockNum <= 0; |
process <= Vertical; |
startLastOutput <= False; |
outputingFinished <= False; |
colNum <= 0; |
if(filterTopMbEdgeFlag) |
rowNum <= 0; |
else |
rowNum <= 1; |
Bit#(6) curr_qp = (chromaFlag==0 ? curr_qpy : curr_qpc); |
Bit#(6) top_qp = (chromaFlag==0 ? top_qpy : top_qpc); |
Bit#(7) qpavtemp = zeroExtend(curr_qp)+zeroExtend(top_qp)+1; |
Bit#(6) qpav = qpavtemp[6:1]; |
Bit#(8) indexAtemp = zeroExtend(qpav)+signExtend(slice_alpha_c0_offset); |
Bit#(8) indexBtemp = zeroExtend(qpav)+signExtend(slice_beta_offset); |
Bit#(6) indexA = (indexAtemp[7]==1 ? 0 : (indexAtemp[6:0]>51 ? 51 : indexAtemp[5:0])); |
Bit#(6) indexB = (indexBtemp[7]==1 ? 0 : (indexBtemp[6:0]>51 ? 51 : indexBtemp[5:0])); |
alphaMbEdge <= alpha_table[indexA]; |
betaMbEdge <= beta_table[indexB]; |
Vector#(3,Bit#(5)) tc0temp = arrayToVector(tc0_table[indexA]); |
tc0MbEdge <= tc0temp; |
end |
else if(pixelNum==12) |
blockNum <= blockNum+1; |
pixelNum <= pixelNum+4; |
end |
default: $display( "ERROR Deblocking Filter: horizontal non-PBoutput input"); |
endcase |
endrule |
|
|
rule vertical ( process==Vertical && !startLastOutput && dataRespCount==0 && currMbHor<zeroExtend(picWidth) ); |
//$display( "TRACE Deblocking Filter: vertical %0d %0d", colNum, rowNum); |
//$display( "TRACE Deblocking Filter: vertical topVector %h %h %h %h %h %h %h %h %h %h %h %h %h %h %h %h", topVector[0], topVector[1], topVector[2], topVector[3], topVector[4], topVector[5], topVector[6], topVector[7], topVector[8], topVector[9], topVector[10], topVector[11], topVector[12], topVector[13], topVector[14], topVector[15]); |
Bool topEdge = (rowNum==0); |
Vector#(64,Bit#(32)) workVectorNext = workVector; |
Vector#(16,Bit#(32)) topVectorNext = topVector; |
Vector#(64,Bit#(32)) workV = workVector; |
Vector#(4,Bit#(32)) tempV = replicate(0); |
Vector#(4,Bit#(64)) resultV = replicate(0); |
Bit#(8) alpha; |
Bit#(5) beta; |
Vector#(3,Bit#(5)) tc0; |
Bit#(4) crNum = {colNum,rowNum}; |
if(topEdge) |
begin |
tempV[0] = topVector[{colNum,2'b00}]; |
tempV[1] = topVector[{colNum,2'b01}]; |
tempV[2] = topVector[{colNum,2'b10}]; |
tempV[3] = topVector[{colNum,2'b11}]; |
alpha = alphaMbEdge; |
beta = betaMbEdge; |
tc0 = tc0MbEdge; |
end |
else |
begin |
tempV[0] = workV[{(crNum-1),2'b00}]; |
tempV[1] = workV[{(crNum-1),2'b01}]; |
tempV[2] = workV[{(crNum-1),2'b10}]; |
tempV[3] = workV[{(crNum-1),2'b11}]; |
alpha = alphaInternal; |
beta = betaInternal; |
tc0 = tc0Internal; |
end |
resultV[0] = {workV[{crNum,2'b11}][7:0],workV[{crNum,2'b10}][7:0],workV[{crNum,2'b01}][7:0],workV[{crNum,2'b00}][7:0],tempV[3][7:0],tempV[2][7:0],tempV[1][7:0],tempV[0][7:0]}; |
resultV[1] = {workV[{crNum,2'b11}][15:8],workV[{crNum,2'b10}][15:8],workV[{crNum,2'b01}][15:8],workV[{crNum,2'b00}][15:8],tempV[3][15:8],tempV[2][15:8],tempV[1][15:8],tempV[0][15:8]}; |
resultV[2] = {workV[{crNum,2'b11}][23:16],workV[{crNum,2'b10}][23:16],workV[{crNum,2'b01}][23:16],workV[{crNum,2'b00}][23:16],tempV[3][23:16],tempV[2][23:16],tempV[1][23:16],tempV[0][23:16]}; |
resultV[3] = {workV[{crNum,2'b11}][31:24],workV[{crNum,2'b10}][31:24],workV[{crNum,2'b01}][31:24],workV[{crNum,2'b00}][31:24],tempV[3][31:24],tempV[2][31:24],tempV[1][31:24],tempV[0][31:24]}; |
if(filter_test({workV[{crNum,2'b01}][7:0],workV[{crNum,2'b00}][7:0],tempV[3][7:0],tempV[2][7:0]},alpha,beta)) |
resultV[0] = filter_input(resultV[0],chromaFlag==1,tpl_2(bSfile.sub((chromaFlag==0?{rowNum[1],colNum[1],rowNum[0],colNum[0]}:{rowNum[0],colNum[0],2'b00}))),alpha,beta,tc0); |
if(filter_test({workV[{crNum,2'b01}][15:8],workV[{crNum,2'b00}][15:8],tempV[3][15:8],tempV[2][15:8]},alpha,beta)) |
resultV[1] = filter_input(resultV[1],chromaFlag==1,tpl_2(bSfile.sub((chromaFlag==0?{rowNum[1],colNum[1],rowNum[0],colNum[0]}:{rowNum[0],colNum[0],2'b00}))),alpha,beta,tc0); |
if(filter_test({workV[{crNum,2'b01}][23:16],workV[{crNum,2'b00}][23:16],tempV[3][23:16],tempV[2][23:16]},alpha,beta)) |
resultV[2] = filter_input(resultV[2],chromaFlag==1,tpl_2(bSfile.sub((chromaFlag==0?{rowNum[1],colNum[1],rowNum[0],colNum[0]}:{rowNum[0],colNum[0],2'b01}))),alpha,beta,tc0); |
if(filter_test({workV[{crNum,2'b01}][31:24],workV[{crNum,2'b00}][31:24],tempV[3][31:24],tempV[2][31:24]},alpha,beta)) |
resultV[3] = filter_input(resultV[3],chromaFlag==1,tpl_2(bSfile.sub((chromaFlag==0?{rowNum[1],colNum[1],rowNum[0],colNum[0]}:{rowNum[0],colNum[0],2'b01}))),alpha,beta,tc0); |
if(topEdge) |
begin |
topVectorNext[{colNum,2'b00}] = {resultV[3][7:0],resultV[2][7:0],resultV[1][7:0],resultV[0][7:0]}; |
topVectorNext[{colNum,2'b01}] = {resultV[3][15:8],resultV[2][15:8],resultV[1][15:8],resultV[0][15:8]}; |
topVectorNext[{colNum,2'b10}] = {resultV[3][23:16],resultV[2][23:16],resultV[1][23:16],resultV[0][23:16]}; |
topVectorNext[{colNum,2'b11}] = {resultV[3][31:24],resultV[2][31:24],resultV[1][31:24],resultV[0][31:24]}; |
end |
else |
begin |
workVectorNext[{(crNum-1),2'b00}] = {resultV[3][7:0],resultV[2][7:0],resultV[1][7:0],resultV[0][7:0]}; |
workVectorNext[{(crNum-1),2'b01}] = {resultV[3][15:8],resultV[2][15:8],resultV[1][15:8],resultV[0][15:8]}; |
workVectorNext[{(crNum-1),2'b10}] = {resultV[3][23:16],resultV[2][23:16],resultV[1][23:16],resultV[0][23:16]}; |
workVectorNext[{(crNum-1),2'b11}] = {resultV[3][31:24],resultV[2][31:24],resultV[1][31:24],resultV[0][31:24]}; |
end |
workVectorNext[{crNum,2'b00}] = {resultV[3][39:32],resultV[2][39:32],resultV[1][39:32],resultV[0][39:32]}; |
workVectorNext[{crNum,2'b01}] = {resultV[3][47:40],resultV[2][47:40],resultV[1][47:40],resultV[0][47:40]}; |
workVectorNext[{crNum,2'b10}] = {resultV[3][55:48],resultV[2][55:48],resultV[1][55:48],resultV[0][55:48]}; |
workVectorNext[{crNum,2'b11}] = {resultV[3][63:56],resultV[2][63:56],resultV[1][63:56],resultV[0][63:56]}; |
if(topEdge) |
topVector <= topVectorNext; |
workVector <= workVectorNext; |
if(rowNum==3 || (chromaFlag==1 && rowNum==1)) |
begin |
if(colNum==3) |
startLastOutput <= True; |
else |
begin |
if(filterTopMbEdgeFlag) |
rowNum <= 0; |
else |
rowNum <= 1; |
end |
colNum <= colNum+1; |
end |
else |
rowNum <= rowNum+1; |
endrule |
|
|
rule outputing ( process==Vertical && !outputingFinished && currMbHor<zeroExtend(picWidth) ); |
//$display( "TRACE Deblocking Filter: outputting %0d %0d", blockNum, pixelNum); |
Bit#(2) blockHor = pixelNum[1:0]; |
Bit#(2) blockVer = blockNum[1:0]; |
Bit#(2) pixelVer = pixelNum[3:2]; |
Bit#(PicWidthSz) currMbHorT = truncate(currMbHor); |
Bool stalling = False; |
if(currMb==0) |
begin |
if(startLastOutput) |
outputingFinished <= True; |
end |
else |
begin |
Bit#(7) leftAddr; |
if(chromaFlag==0) |
leftAddr = {1'b0,blockHor,blockVer,pixelVer}; |
else |
leftAddr = {2'b10,blockHor,blockVer[0],pixelVer}; |
Bit#(32) leftData = leftVector[leftAddr]; |
if(!(blockNum==3 || (blockNum==1 && chromaFlag==1))) |
begin |
if(chromaFlag==0) |
outfifo.enq(DFBLuma {ver:{(currMbHorT==0 ? currMbVer-1 : currMbVer),blockVer,pixelVer},hor:{(currMbHorT==0 ? picWidth-1 : currMbHorT-1),blockHor},data:leftData}); |
else |
outfifo.enq(DFBChroma {uv:blockHor[1],ver:{(currMbHorT==0 ? currMbVer-1 : currMbVer),blockVer[0],pixelVer},hor:{(currMbHorT==0 ? picWidth-1 : currMbHorT-1),blockHor[0]},data:leftData}); |
end |
else if(startLastOutput) |
begin |
Bit#(PicWidthSz) temp = ((currMbHor==0) ? (picWidth-1) : truncate(currMbHor-1)); |
dataMemReqQ.enq(StoreReq {addr:{temp,chromaFlag,blockHor,pixelVer},data:leftData}); |
if(currMbVer > 0) |
begin |
//$display( "TRACE Deblocking Filter: outputting last output %0d %0d %h", blockHor, pixelVer, topVector[{blockHor,pixelVer}]); |
Bit#(32) topData = topVector[{blockHor,pixelVer}]; |
if(chromaFlag==0) |
outfifo.enq(DFBLuma {ver:{currMbVer-1,2'b11,pixelVer},hor:{currMbHorT,blockHor},data:topData}); |
else |
outfifo.enq(DFBChroma {uv:blockHor[1],ver:{currMbVer-1,1'b1,pixelVer},hor:{currMbHorT,blockHor[0]},data:topData}); |
end |
end |
else |
stalling = True; |
if(!stalling) |
begin |
if(pixelNum==15) |
begin |
if(blockNum==3 || (chromaFlag==1 && blockNum==1)) |
begin |
if(currMbVer==picHeight-1) |
blockNum <= (chromaFlag==0 ? 3 : 1); |
else |
blockNum <= 0; |
outputingFinished <= True; |
end |
else |
blockNum <= blockNum+1; |
end |
pixelNum <= pixelNum+1; |
end |
end |
endrule |
|
|
rule verticaltocleanup ( process==Vertical && startLastOutput && outputingFinished); |
process <= Cleanup; |
startLastOutput <= False; |
outputingFinished <= False; |
endrule |
|
|
rule cleanup ( process==Cleanup && currMbHor<zeroExtend(picWidth) ); |
//$display( "TRACE Deblocking Filter: cleanup %0d %0d", blockNum, pixelNum); |
Bit#(2) blockHor = pixelNum[1:0]; |
Bit#(2) blockVer = blockNum[1:0]; |
Bit#(2) pixelVer = pixelNum[3:2]; |
Bit#(PicWidthSz) currMbHorT = truncate(currMbHor); |
Vector#(96,Bit#(32)) leftVectorNext = leftVector; |
if(blockNum==0) |
begin |
if(chromaFlag==0) |
begin |
for(Integer ii=0; ii<64; ii=ii+1) |
leftVectorNext[fromInteger(ii)] = workVector[fromInteger(ii)]; |
chromaFlag <= 1; |
process <= Initialize; |
end |
else |
begin |
for(Integer ii=0; ii<32; ii=ii+1) |
begin |
Bit#(5) tempAddr = fromInteger(ii); |
leftVectorNext[{2'b10,tempAddr}] = workVector[{tempAddr[4:3],1'b0,tempAddr[2:0]}]; |
end |
chromaFlag <= 0; |
process <= Passing; |
Bit#(PicWidthSz) temp = truncate(currMbHor); |
parameterMemReqQ.enq(StoreReq {addr:temp,data:{curr_intra,curr_qpc,curr_qpy}}); |
left_intra <= curr_intra; |
left_qpc <= curr_qpc; |
left_qpy <= curr_qpy; |
currMb <= currMb+1; |
currMbHor <= currMbHor+1; |
if(currMbVer==picHeight-1 && currMbHor==zeroExtend(picWidth-1)) |
outfifo.enq(EndOfFrame); |
end |
leftVector <= leftVectorNext; |
end |
else if(blockNum < 8) |
begin |
Bit#(7) leftAddr; |
if(chromaFlag==0) |
leftAddr = {1'b0,blockHor,blockVer,pixelVer}; |
else |
leftAddr = {2'b10,blockHor,blockVer[0],pixelVer}; |
Bit#(32) leftData = leftVector[leftAddr]; |
if(chromaFlag==0) |
outfifo.enq(DFBLuma {ver:{(currMbHorT==0 ? currMbVer-1 : currMbVer),blockVer,pixelVer},hor:{(currMbHorT==0 ? picWidth-1 : currMbHorT-1),blockHor},data:leftData}); |
else |
outfifo.enq(DFBChroma {uv:blockHor[1],ver:{(currMbHorT==0 ? currMbVer-1 : currMbVer),blockVer[0],pixelVer},hor:{(currMbHorT==0 ? picWidth-1 : currMbHorT-1),blockHor[0]},data:leftData}); |
if(pixelNum==15) |
begin |
if(currMbHor==zeroExtend(picWidth-1)) |
blockNum <= 8; |
else |
blockNum <= 0; |
end |
pixelNum <= pixelNum+1; |
end |
else |
begin |
Bit#(6) currAddr = {blockHor,blockVer,pixelVer}; |
Bit#(32) currData = workVector[currAddr]; |
if(chromaFlag==0) |
outfifo.enq(DFBLuma {ver:{currMbVer,blockVer,pixelVer},hor:{currMbHorT,blockHor},data:currData}); |
else |
outfifo.enq(DFBChroma {uv:blockHor[1],ver:{currMbVer,blockVer[0],pixelVer},hor:{currMbHorT,blockHor[0]},data:currData}); |
if(pixelNum==15) |
begin |
if(blockNum[1:0]==3 || (blockNum[1:0]==1 && chromaFlag==1)) |
blockNum <= 0; |
else |
blockNum <= blockNum+1; |
end |
pixelNum <= pixelNum+1; |
end |
endrule |
|
|
|
|
|
|
interface Client mem_client_data; |
interface Get request = fifoToGet(dataMemReqQ); |
interface Put response = fifoToPut(dataMemRespQ); |
endinterface |
|
interface Client mem_client_parameter; |
interface Get request = fifoToGet(parameterMemReqQ); |
interface Put response = fifoToPut(parameterMemRespQ); |
endinterface |
|
interface Put ioin = fifoToPut(infifo); |
interface Get ioout = fifoToGet(outfifo); |
|
endmodule |
|
endpackage |
/trunk/src/mkInputGen_nodeblock.bsv
0,0 → 1,41
//********************************************************************** |
// Input Generator implementation |
//---------------------------------------------------------------------- |
// |
// |
|
package mkInputGen; |
|
import H264Types::*; |
import IInputGen::*; |
import RegFile::*; |
import FIFO::*; |
|
import Connectable::*; |
import GetPut::*; |
|
|
module mkInputGen( IInputGen ); |
|
RegFile#(Bit#(27), Bit#(8)) rfile <- mkRegFileLoad("foreman_qcif1-5_no_deblock.hex", 0, 7476); |
|
FIFO#(InputGenOT) outfifo <- mkFIFO; |
Reg#(Bit#(27)) index <- mkReg(0); |
|
rule output_byte (index < 7477); |
//$display( "ccl0inputbyte %x", rfile.sub(index) ); |
outfifo.enq(DataByte rfile.sub(index)); |
index <= index+1; |
endrule |
|
rule end_of_file (index == 7477); |
//$finish(0); |
outfifo.enq(EndOfFile); |
endrule |
|
interface Get ioout = fifoToGet(outfifo); |
|
endmodule |
|
|
endpackage |
/trunk/src/mkFinalOutput.bsv
0,0 → 1,44
//********************************************************************** |
// final output implementation |
//---------------------------------------------------------------------- |
// |
// |
|
package mkFinalOutput; |
|
import H264Types::*; |
import IFinalOutput::*; |
import FIFO::*; |
|
import Connectable::*; |
import GetPut::*; |
|
//----------------------------------------------------------- |
// Final Output Module |
//----------------------------------------------------------- |
|
module mkFinalOutput( IFinalOutput ); |
|
FIFO#(BufferControlOT) infifo <- mkFIFO; |
|
//----------------------------------------------------------- |
// Rules |
rule finalout (True); |
if(infifo.first() matches tagged YUV .xdata) |
begin |
$display("ccl5finalout %h", xdata[7:0]); |
$display("ccl5finalout %h", xdata[15:8]); |
$display("ccl5finalout %h", xdata[23:16]); |
$display("ccl5finalout %h", xdata[31:24]); |
infifo.deq(); |
end |
else |
$finish(0); |
endrule |
|
|
interface Put ioin = fifoToPut(infifo); |
|
endmodule |
|
endpackage |
/trunk/src/IInputGen.bsv
0,0 → 1,21
//********************************************************************** |
// Interface for input generator |
//---------------------------------------------------------------------- |
// |
// |
// |
|
package IInputGen; |
|
import H264Types::*; |
import GetPut::*; |
|
interface IInputGen; |
|
// Interface for inter-module io |
interface Get#(InputGenOT) ioout; |
|
endinterface |
|
endpackage |
|
/trunk/src/mkInputGen_park20inter.bsv
0,0 → 1,41
//********************************************************************** |
// Input Generator implementation |
//---------------------------------------------------------------------- |
// |
// |
|
package mkInputGen; |
|
import H264Types::*; |
import IInputGen::*; |
import RegFile::*; |
import FIFO::*; |
|
import Connectable::*; |
import GetPut::*; |
|
|
module mkInputGen( IInputGen ); |
|
RegFile#(Bit#(27), Bit#(8)) rfile <- mkRegFileLoad("720p50_parkrun_ter1-20inter.hex", 0, 2282510); |
|
FIFO#(InputGenOT) outfifo <- mkFIFO; |
Reg#(Bit#(27)) index <- mkReg(0); |
|
rule output_byte (index < 2282511); |
//$display( "ccl0inputbyte %x", rfile.sub(index) ); |
outfifo.enq(DataByte rfile.sub(index)); |
index <= index+1; |
endrule |
|
rule end_of_file (index == 2282511); |
//$finish(0); |
outfifo.enq(EndOfFile); |
endrule |
|
interface Get ioout = fifoToGet(outfifo); |
|
endmodule |
|
|
endpackage |
/trunk/src/mkEntropyDec.bsv
0,0 → 1,1656
//********************************************************************** |
// Entropy Decoder implementation |
//---------------------------------------------------------------------- |
// |
// |
|
package mkEntropyDec; |
|
import H264Types::*; |
import ExpGolomb::*; |
import CAVLC::*; |
import ICalc_nC::*; |
import mkCalc_nC::*; |
import IEntropyDec::*; |
import FIFO::*; |
|
import Connectable::*; |
import GetPut::*; |
import ClientServer::*; |
|
|
//----------------------------------------------------------- |
// Local Datatypes |
//----------------------------------------------------------- |
|
typedef union tagged |
{ |
void Start; //special state that initializes the process. |
void NewUnit; //special state that checks the NAL unit type. |
|
Bit#(5) CodedSlice; //decodes a type of NAL unit |
void SEI; //decodes a type of NAL unit |
Bit#(5) SPS; //decodes a type of NAL unit |
Bit#(5) PPS; //decodes a type of NAL unit |
void AUD; //decodes a type of NAL unit |
void EndSequence; //decodes a type of NAL unit |
void EndStream; //decodes a type of NAL unit |
void Filler; //decodes a type of NAL unit |
|
Bit#(5) SliceData; //decodes slice data (part of a CodedSlice NAL unit) |
Bit#(5) MacroblockLayer; //decodes macroblock layer (part of a CodedSlice NAL unit) |
Bit#(5) MbPrediction; //decodes macroblock prediction (part of a CodedSlice NAL unit) |
Bit#(5) SubMbPrediction; //decodes sub-macroblock prediction (part of a CodedSlice NAL unit) |
Bit#(5) ResidualBlock; //decodes residual block (part of a CodedSlice NAL unit) |
} |
State deriving(Eq,Bits); |
|
|
|
//----------------------------------------------------------- |
// Helper functions |
function MbType mbtype_convert( Bit#(5) in_mb_type, Bit#(4) in_slice_type );//converts mb_type syntax element to MbType type |
Bit#(5) tempmb = in_mb_type; |
if(in_slice_type == 2 || in_slice_type == 7)//I slice |
tempmb = in_mb_type+5; |
case ( tempmb ) |
0: return P_L0_16x16; |
1: return P_L0_L0_16x8; |
2: return P_L0_L0_8x16; |
3: return P_8x8; |
4: return P_8x8ref0; |
5: return I_NxN; |
30: return I_PCM; |
default: |
begin |
Bit#(5) tempmb16x16 = tempmb-6; |
Bit#(2) tempv1 = tempmb16x16[1:0]; |
Bit#(2) tempv2; |
Bit#(1) tempv3; |
if(tempmb16x16 < 12) |
begin |
tempv3 = 0; |
tempv2 = tempmb16x16[3:2]; |
end |
else |
begin |
tempv3 = 1; |
tempv2 = tempmb16x16[3:2]+1; |
end |
return I_16x16{intra16x16PredMode:tempv1, codedBlockPatternChroma:tempv2, codedBlockPatternLuma:tempv3}; |
end |
endcase |
endfunction |
|
|
|
//----------------------------------------------------------- |
// Entropy Decoder Module |
//----------------------------------------------------------- |
|
|
(* synthesize *) |
module mkEntropyDec( IEntropyDec ); |
|
FIFO#(NalUnwrapOT) infifo <- mkSizedFIFO(entropyDec_infifo_size); |
FIFO#(EntropyDecOT) outfifo <- mkFIFO; |
FIFO#(EntropyDecOT_InverseTrans) outfifo_ITB <- mkFIFO; |
Reg#(State) state <- mkReg(Start); |
Reg#(Bit#(2)) nalrefidc <- mkReg(0); |
Reg#(Bit#(5)) nalunittype <- mkReg(0); |
Reg#(Buffer) buffer <- mkReg(0); |
Reg#(Bufcount) bufcount <- mkReg(0); |
|
//saved syntax elements |
Reg#(Bit#(5)) spsseq_parameter_set_id <- mkReg(0); |
Reg#(Bit#(5)) spslog2_max_frame_num <- mkReg(0); |
Reg#(Bit#(5)) spslog2_max_pic_order_cnt_lsb <- mkReg(0); |
Reg#(Bit#(2)) spspic_order_cnt_type <- mkReg(0); |
Reg#(Bit#(1)) spsdelta_pic_order_always_zero_flag <- mkReg(0); |
Reg#(Bit#(8)) spsnum_ref_frames_in_pic_order_cnt_cycle <- mkReg(0); |
Reg#(Bit#(8)) ppspic_parameter_set_id <- mkReg(0); |
Reg#(Bit#(1)) ppspic_order_present_flag <- mkReg(0); |
Reg#(Bit#(1)) ppsdeblocking_filter_control_present_flag <- mkReg(0); |
Reg#(Bit#(4)) shslice_type <- mkReg(0); |
Reg#(Bit#(3)) shdmemory_management_control_operation <- mkReg(0); |
Reg#(MbType) sdmmbtype <- mkReg(I_NxN); |
Reg#(Bit#(4)) sdmcodedBlockPatternLuma <- mkReg(0); |
Reg#(Bit#(2)) sdmcodedBlockPatternChroma <- mkReg(0); |
Reg#(Bit#(5)) sdmrTotalCoeff <- mkReg(0); |
Reg#(Bit#(2)) sdmrTrailingOnes <- mkReg(0); |
|
//derived decoding variables for slice data |
Reg#(Bit#(16)) tempreg <- mkReg(0); |
Reg#(Bit#(5)) num_ref_idx_l0_active_minus1 <- mkReg(0); |
Reg#(Bit#(PicAreaSz)) currMbAddr <- mkReg(0); |
Reg#(Bit#(3)) temp3bit0 <- mkReg(0); |
Reg#(Bit#(3)) temp3bit1 <- mkReg(0); |
Reg#(Bit#(3)) temp3bit2 <- mkReg(0); |
Reg#(Bit#(3)) temp3bit3 <- mkReg(0); |
Reg#(Bit#(5)) temp5bit <- mkReg(0); |
Reg#(Bit#(5)) temp5bit2 <- mkReg(0); |
Reg#(Bit#(5)) maxNumCoeff <- mkReg(0); |
FIFO#(Bit#(13)) cavlcFIFO <- mkSizedFIFO(16); |
Calc_nC calcnc <- mkCalc_nC(); |
Reg#(Bit#(1)) residualChroma <- mkReg(0); |
Reg#(Bit#(5)) totalCoeff <- mkReg(0); |
Reg#(Bit#(4)) zerosLeft <- mkReg(0); |
|
//exp-golomb 32-bit version states |
Reg#(Bufcount) egnumbits <- mkReg(0); |
|
//extra-buffering states |
Reg#(Bit#(32)) extrabuffer <- mkReg(0); |
Reg#(Bit#(3)) extrabufcount <- mkReg(0); |
Reg#(Bit#(1)) extraendnalflag <- mkReg(0); |
Reg#(Bit#(1)) endnalflag <- mkReg(0); |
|
|
//----------------------------------------------------------- |
// Rules |
|
rule startup (state matches Start); |
case (infifo.first()) matches |
tagged NewUnit : |
begin |
infifo.deq(); |
state <= NewUnit; |
buffer <= 0; |
bufcount <= 0; |
extrabuffer <= 0; |
extrabufcount <= 0; |
extraendnalflag <= 0; |
endnalflag <= 0; |
end |
tagged RbspByte .rdata : |
begin |
infifo.deq(); |
end |
tagged EndOfFile : |
begin |
infifo.deq(); |
outfifo.enq(EndOfFile); |
$display( "INFO EntropyDec: EndOfFile reached" ); |
end |
endcase |
endrule |
|
|
rule newunit (state matches NewUnit); |
case (infifo.first()) matches |
tagged NewUnit : state <= Start; |
tagged RbspByte .rdata : |
begin |
infifo.deq(); |
nalrefidc <= rdata[6:5]; |
nalunittype <= rdata[4:0]; |
case (rdata[4:0]) |
1 : state <= CodedSlice 0; |
5 : state <= CodedSlice 0; |
6 : state <= SEI; |
7 : state <= SPS 0; |
8 : state <= PPS 0; |
9 : state <= AUD; |
10: state <= EndSequence; |
11: state <= EndStream; |
12: state <= Filler; |
default: |
begin |
$display( "ERROR EntropyDec: NAL Unit Type = %d", rdata[4:0] ); |
state <= Start; |
end |
endcase |
$display("ccl2newunit"); |
$display("ccl2rbspbyte %h", rdata); |
outfifo.enq(NewUnit rdata); |
outfifo_ITB.enq(NewUnit rdata); |
end |
tagged EndOfFile : state <= Start; |
endcase |
endrule |
|
|
rule fillextrabuffer (state != Start |
&& state != NewUnit |
&& extrabufcount < 4 |
&& extraendnalflag == 0); |
if(infifo.first() matches tagged RbspByte .dbyte) |
begin |
case ( extrabufcount ) |
0: extrabuffer <= {dbyte, extrabuffer[23:0]}; |
1: extrabuffer <= {extrabuffer[31:24],dbyte,extrabuffer[15:0]}; |
2: extrabuffer <= {extrabuffer[31:16],dbyte,extrabuffer[7:0]}; |
3: extrabuffer <= {extrabuffer[31:8],dbyte}; |
default: $display( "ERROR EntropyDec: fillextrabuffer default case_" ); |
endcase |
extrabufcount <= extrabufcount + 1; |
infifo.deq(); |
//$display( "TRACE EntropyDec: fillextrabuffer RbspByte %h %h %h", dbyte, extrabufcount, extrabuffer); |
end |
else |
begin |
if(extrabufcount != 0) |
extraendnalflag <= 1; |
//$display( "TRACE EntropyDec: fillextrabuffer else %h", extrabufcount); |
end |
endrule |
|
|
rule fillbuffer (state != Start |
&& state != NewUnit |
&& bufcount<=truncate(buffersize-32) |
&& (extrabufcount == 4 || extraendnalflag == 1) |
&& endnalflag == 0);//predicate not sure |
Buffer temp = zeroExtend(extrabuffer); |
Bufcount temp2 = truncate(buffersize)-bufcount-32; |
buffer <= (buffer | (temp << zeroExtend(temp2))); |
case ( extrabufcount ) |
4: bufcount <= bufcount+32; |
3: bufcount <= bufcount+24; |
2: bufcount <= bufcount+16; |
1: bufcount <= bufcount+8; |
default: $display( "ERROR EntropyDec: fillbuffer default case" ); |
endcase |
extrabuffer <= 0; |
extrabufcount <= 0; |
if(infifo.first()==NewUnit || infifo.first()==EndOfFile) |
endnalflag <= 1; |
//$display( "TRACE EntropyDec: fillbuffer RbspByte %h %h %h %h %h %h %h %h", extrabufcount, bufcount, extrabuffer, temp, temp2, (temp << zeroExtend(temp2)), buffer, (buffer | (temp << zeroExtend(temp2)))); |
endrule |
|
|
rule parser (state != Start |
&&& state != NewUnit |
&&& (bufcount > truncate(buffersize-32) || endnalflag == 1));//predicate not sure |
//$display( "TRACE EntropyDec: fillbuffer RbspByte %h %h", bufcount, buffer ); |
|
Bufcount numbitsused = 0; |
State nextstate = Start; |
Int#(16) tempint = 0; |
Int#(32) tempint32 = 0; |
|
case ( state ) matches |
tagged CodedSlice .step : |
begin |
case ( step ) |
0: |
begin |
$display( "ccl2SHfirst_mb_in_slice %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SHfirst_mb_in_slice truncate(expgolomb_unsigned(buffer))); |
currMbAddr <= truncate(expgolomb_unsigned(buffer)); |
calcnc.initialize(truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = CodedSlice 1; |
end |
1: |
begin |
$display( "ccl2SHslice_type %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SHslice_type truncate(expgolomb_unsigned(buffer))); |
shslice_type <= truncate(expgolomb_unsigned(buffer)); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = CodedSlice 2; |
end |
2: |
begin |
$display( "ccl2SHpic_parameter_set_id %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SHpic_parameter_set_id truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = CodedSlice 3; |
if(ppspic_parameter_set_id != truncate(expgolomb_unsigned(buffer))) $display( "ERROR EntropyDec: pic_parameter_set_id don't match" ); |
end |
3: |
begin |
Bit#(16) tttt = buffer[buffersize-1:buffersize-16]; |
tttt = tttt >> 16 - zeroExtend(spslog2_max_frame_num); |
$display( "ccl2SHframe_num %0d", tttt ); |
outfifo.enq(SHframe_num tttt); |
numbitsused = zeroExtend(spslog2_max_frame_num); |
nextstate = CodedSlice 4; |
end |
4: |
begin |
if(nalunittype == 5) |
begin |
$display( "ccl2SHidr_pic_id %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SHidr_pic_id truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
end |
nextstate = CodedSlice 5; |
end |
5: |
begin |
if(spspic_order_cnt_type == 0) |
begin |
Bit#(16) tttt = buffer[buffersize-1:buffersize-16]; |
tttt = tttt >> 16 - zeroExtend(spslog2_max_pic_order_cnt_lsb); |
$display( "ccl2SHpic_order_cnt_lsb %0d", tttt ); |
outfifo.enq(SHpic_order_cnt_lsb tttt); |
numbitsused = zeroExtend(spslog2_max_pic_order_cnt_lsb); |
nextstate = CodedSlice 6; |
end |
else |
nextstate = CodedSlice 7; |
end |
6: |
begin |
if(ppspic_order_present_flag == 1) |
begin |
if(egnumbits == 0) |
begin |
Bufcount tempbufcount = expgolomb_numbits32(buffer); |
egnumbits <= tempbufcount; |
numbitsused = tempbufcount-1; |
nextstate = CodedSlice 6; |
end |
else |
begin |
tempint32 = unpack(expgolomb_signed32(buffer,egnumbits)); |
$display( "ccl2SHdelta_pic_order_cnt_bottom %0d", tempint32 ); |
outfifo.enq(SHdelta_pic_order_cnt_bottom truncate(expgolomb_signed32(buffer,egnumbits))); |
egnumbits <= 0; |
numbitsused = egnumbits; |
nextstate = CodedSlice 7; |
end |
end |
else |
nextstate = CodedSlice 7; |
end |
7: |
begin |
if(spspic_order_cnt_type == 1 && spsdelta_pic_order_always_zero_flag == 0) |
begin |
if(egnumbits == 0) |
begin |
Bufcount tempbufcount = expgolomb_numbits32(buffer); |
egnumbits <= tempbufcount; |
numbitsused = tempbufcount-1; |
nextstate = CodedSlice 7; |
end |
else |
begin |
tempint32 = unpack(expgolomb_signed32(buffer,egnumbits)); |
$display( "ccl2SHdelta_pic_order_cnt0 %0d", tempint32 ); |
outfifo.enq(SHdelta_pic_order_cnt0 truncate(expgolomb_signed32(buffer,egnumbits))); |
egnumbits <= 0; |
numbitsused = egnumbits; |
nextstate = CodedSlice 8; |
end |
end |
else |
nextstate = CodedSlice 9; |
end |
8: |
begin |
if(ppspic_order_present_flag == 1) |
begin |
if(egnumbits == 0) |
begin |
Bufcount tempbufcount = expgolomb_numbits32(buffer); |
egnumbits <= tempbufcount; |
numbitsused = tempbufcount-1; |
nextstate = CodedSlice 8; |
end |
else |
begin |
tempint32 = unpack(expgolomb_signed32(buffer,egnumbits)); |
$display( "ccl2SHdelta_pic_order_cnt1 %0d", tempint32 ); |
outfifo.enq(SHdelta_pic_order_cnt1 truncate(expgolomb_signed32(buffer,egnumbits))); |
egnumbits <= 0; |
numbitsused = egnumbits; |
nextstate = CodedSlice 9; |
end |
end |
else |
nextstate = CodedSlice 9; |
end |
9: |
begin |
if(shslice_type == 0 || shslice_type == 5) |
begin |
$display( "ccl2SHnum_ref_idx_active_override_flag %0d", buffer[buffersize-1] ); |
outfifo.enq(SHnum_ref_idx_active_override_flag buffer[buffersize-1]); |
numbitsused = 1; |
if(buffer[buffersize-1] == 1) |
nextstate = CodedSlice 10; |
else |
nextstate = CodedSlice 11; |
end |
else |
nextstate = CodedSlice 11; |
end |
10: |
begin |
$display( "ccl2SHnum_ref_idx_l0_active %0d", expgolomb_unsigned(buffer)+1 ); |
outfifo.enq(SHnum_ref_idx_l0_active truncate(expgolomb_unsigned(buffer)+1)); |
num_ref_idx_l0_active_minus1 <= truncate(expgolomb_unsigned(buffer)); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = CodedSlice 11; |
end |
11: |
begin |
if(shslice_type != 2 && shslice_type != 7) |
begin |
$display( "ccl2SHRref_pic_list_reordering_flag_l0 %0d", buffer[buffersize-1] ); |
outfifo.enq(SHRref_pic_list_reordering_flag_l0 buffer[buffersize-1]); |
numbitsused = 1; |
if(buffer[buffersize-1] == 1) |
nextstate = CodedSlice 12; |
else |
nextstate = CodedSlice 15; |
end |
else |
nextstate = CodedSlice 15; |
end |
12: |
begin |
$display( "ccl2SHRreordering_of_pic_nums_idc %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SHRreordering_of_pic_nums_idc truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
if(expgolomb_unsigned(buffer)==0 || expgolomb_unsigned(buffer)==1) |
nextstate = CodedSlice 13; |
else if(expgolomb_unsigned(buffer)==2) |
nextstate = CodedSlice 14; |
else |
nextstate = CodedSlice 15; |
end |
13: |
begin |
Bit#(17) temp17 = zeroExtend(expgolomb_unsigned(buffer)) + 1; |
$display( "ccl2SHRabs_diff_pic_num %0d", temp17 ); |
outfifo.enq(SHRabs_diff_pic_num temp17); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = CodedSlice 12; |
end |
14: |
begin |
$display( "ccl2SHRlong_term_pic_num %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SHRlong_term_pic_num truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = CodedSlice 12; |
end |
15: |
begin |
if(nalrefidc == 0) |
nextstate = CodedSlice 23; |
else |
begin |
if(nalunittype == 5) |
begin |
$display( "ccl2SHDno_output_of_prior_pics_flag %0d", buffer[buffersize-1] ); |
outfifo.enq(SHDno_output_of_prior_pics_flag buffer[buffersize-1]); |
numbitsused = 1; |
nextstate = CodedSlice 16; |
end |
else |
nextstate = CodedSlice 17; |
end |
end |
16: |
begin |
$display( "ccl2SHDlong_term_reference_flag %0d", buffer[buffersize-1] ); |
outfifo.enq(SHDlong_term_reference_flag buffer[buffersize-1]); |
numbitsused = 1; |
nextstate = CodedSlice 23; |
end |
17: |
begin |
$display( "ccl2SHDadaptive_ref_pic_marking_mode_flag %0d", buffer[buffersize-1] ); |
outfifo.enq(SHDadaptive_ref_pic_marking_mode_flag buffer[buffersize-1]); |
numbitsused = 1; |
if(buffer[buffersize-1] == 1) |
nextstate = CodedSlice 18; |
else |
nextstate = CodedSlice 23; |
end |
18: |
begin |
$display( "ccl2SHDmemory_management_control_operation %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SHDmemory_management_control_operation truncate(expgolomb_unsigned(buffer))); |
shdmemory_management_control_operation <= truncate(expgolomb_unsigned(buffer)); |
numbitsused = expgolomb_numbits(buffer); |
if(expgolomb_unsigned(buffer)!=0) |
nextstate = CodedSlice 19; |
else |
nextstate = CodedSlice 23; |
end |
19: |
begin |
if(shdmemory_management_control_operation==1 || shdmemory_management_control_operation==3) |
begin |
Bit#(17) temp17 = zeroExtend(expgolomb_unsigned(buffer)) + 1; |
$display( "ccl2SHDdifference_of_pic_nums %0d", temp17 ); |
outfifo.enq(SHDdifference_of_pic_nums temp17); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = CodedSlice 20; |
end |
else |
nextstate = CodedSlice 20; |
end |
20: |
begin |
if(shdmemory_management_control_operation==2) |
begin |
$display( "ccl2SHDlong_term_pic_num %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SHDlong_term_pic_num truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = CodedSlice 21; |
end |
else |
nextstate = CodedSlice 21; |
end |
21: |
begin |
if(shdmemory_management_control_operation==3 || shdmemory_management_control_operation==6) |
begin |
$display( "ccl2SHDlong_term_frame_idx %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SHDlong_term_frame_idx truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = CodedSlice 22; |
end |
else |
nextstate = CodedSlice 22; |
end |
22: |
begin |
if(shdmemory_management_control_operation==4) |
begin |
$display( "ccl2SHDmax_long_term_frame_idx_plus1 %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SHDmax_long_term_frame_idx_plus1 truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = CodedSlice 18; |
end |
else |
nextstate = CodedSlice 18; |
end |
23: |
begin |
tempint = unpack(expgolomb_signed(buffer)); |
$display( "ccl2SHslice_qp_delta %0d", tempint ); |
outfifo_ITB.enq(SHslice_qp_delta truncate(expgolomb_signed(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = CodedSlice 24; |
end |
24: |
begin |
if(ppsdeblocking_filter_control_present_flag==1) |
begin |
$display( "ccl2SHdisable_deblocking_filter_idc %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SHdisable_deblocking_filter_idc truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
if(expgolomb_unsigned(buffer)!=1) |
nextstate = CodedSlice 25; |
else |
nextstate = CodedSlice 27; |
end |
else |
nextstate = CodedSlice 27; |
end |
25: |
begin |
tempint = unpack(expgolomb_signed(buffer) << 1); |
$display( "ccl2SHslice_alpha_c0_offset %0d", tempint ); |
outfifo.enq(SHslice_alpha_c0_offset truncate(expgolomb_signed(buffer) << 1)); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = CodedSlice 26; |
end |
26: |
begin |
tempint = unpack(expgolomb_signed(buffer) << 1); |
$display( "ccl2SHslice_beta_offset %0d", tempint ); |
outfifo.enq(SHslice_beta_offset truncate(expgolomb_signed(buffer) << 1)); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = CodedSlice 27; |
end |
27: |
begin |
nextstate = SliceData 0; |
end |
default: $display( "ERROR EntropyDec: CodedSlice default step" ); |
endcase |
end |
tagged SEI .step : |
begin |
nextstate = Start; |
$display( "INFO EntropyDec: SEI data thrown away" ); |
end |
tagged SPS .step : |
begin |
case ( step ) |
0: |
begin |
Bit#(8) outputdata = buffer[buffersize-1:buffersize-8]; |
$display( "INFO EntropyDec: profile_idc = %d", outputdata ); |
outputdata = buffer[buffersize-9:buffersize-16]; |
$display( "INFO EntropyDec: constraint_set = %b", outputdata ); |
outputdata = buffer[buffersize-17:buffersize-24]; |
$display( "INFO EntropyDec: level_idc = %d", outputdata ); |
numbitsused = 24; |
nextstate = SPS 1; |
end |
1: |
begin |
$display( "ccl2SPSseq_parameter_set_id %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SPSseq_parameter_set_id truncate(expgolomb_unsigned(buffer))); |
spsseq_parameter_set_id <= truncate(expgolomb_unsigned(buffer)); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SPS 2; |
end |
2: |
begin |
$display( "ccl2SPSlog2_max_frame_num %0d", expgolomb_unsigned(buffer)+4 ); |
outfifo.enq(SPSlog2_max_frame_num truncate(expgolomb_unsigned(buffer)+4)); |
spslog2_max_frame_num <= truncate(expgolomb_unsigned(buffer)+4); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SPS 3; |
end |
3: |
begin |
let tttt = expgolomb_unsigned(buffer); |
$display( "ccl2SPSpic_order_cnt_type %0d", tttt ); |
outfifo.enq(SPSpic_order_cnt_type truncate(tttt)); |
spspic_order_cnt_type <= truncate(tttt); |
numbitsused = expgolomb_numbits(buffer); |
if(tttt == 0) |
nextstate = SPS 4; |
else if(tttt == 1) |
nextstate = SPS 5; |
else |
nextstate = SPS 10; |
end |
4: |
begin |
$display( "ccl2SPSlog2_max_pic_order_cnt_lsb %0d", expgolomb_unsigned(buffer)+4 ); |
outfifo.enq(SPSlog2_max_pic_order_cnt_lsb truncate(expgolomb_unsigned(buffer)+4)); |
spslog2_max_pic_order_cnt_lsb <= truncate(expgolomb_unsigned(buffer)+4); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SPS 10; |
end |
5: |
begin |
$display( "ccl2SPSdelta_pic_order_always_zero_flag %0d", buffer[buffersize-1] ); |
outfifo.enq(SPSdelta_pic_order_always_zero_flag buffer[buffersize-1]); |
spsdelta_pic_order_always_zero_flag <= buffer[buffersize-1]; |
numbitsused = 1; |
nextstate = SPS 6; |
end |
6: |
begin |
if(egnumbits == 0) |
begin |
Bufcount tempbufcount = expgolomb_numbits32(buffer); |
egnumbits <= tempbufcount; |
numbitsused = tempbufcount-1; |
nextstate = SPS 6; |
end |
else |
begin |
tempint32 = unpack(expgolomb_signed32(buffer,egnumbits)); |
$display( "ccl2SPSoffset_for_non_ref_pic %0d", tempint32 ); |
outfifo.enq(SPSoffset_for_non_ref_pic truncate(expgolomb_signed32(buffer,egnumbits))); |
egnumbits <= 0; |
numbitsused = egnumbits; |
nextstate = SPS 7; |
end |
end |
7: |
begin |
if(egnumbits == 0) |
begin |
Bufcount tempbufcount = expgolomb_numbits32(buffer); |
egnumbits <= tempbufcount; |
numbitsused = tempbufcount-1; |
nextstate = SPS 7; |
end |
else |
begin |
tempint32 = unpack(expgolomb_signed32(buffer,egnumbits)); |
$display( "ccl2SPSoffset_for_top_to_bottom_field %0d", tempint32 ); |
outfifo.enq(SPSoffset_for_top_to_bottom_field truncate(expgolomb_signed32(buffer,egnumbits))); |
egnumbits <= 0; |
numbitsused = egnumbits; |
nextstate = SPS 8; |
end |
end |
8: |
begin |
$display( "ccl2SPSnum_ref_frames_in_pic_order_cnt_cycle %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SPSnum_ref_frames_in_pic_order_cnt_cycle truncate(expgolomb_unsigned(buffer))); |
spsnum_ref_frames_in_pic_order_cnt_cycle <= truncate(expgolomb_unsigned(buffer)); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SPS 9; |
end |
9: |
begin |
if(spsnum_ref_frames_in_pic_order_cnt_cycle == 0) |
nextstate = SPS 10; |
else |
begin |
if(egnumbits == 0) |
begin |
Bufcount tempbufcount = expgolomb_numbits32(buffer); |
egnumbits <= tempbufcount; |
numbitsused = tempbufcount-1; |
nextstate = SPS 9; |
end |
else |
begin |
tempint32 = unpack(expgolomb_signed32(buffer,egnumbits)); |
$display( "ccl2SPSoffset_for_ref_frame %0d", tempint32 ); |
outfifo.enq(SPSoffset_for_ref_frame truncate(expgolomb_signed32(buffer,egnumbits))); |
egnumbits <= 0; |
spsnum_ref_frames_in_pic_order_cnt_cycle <= spsnum_ref_frames_in_pic_order_cnt_cycle - 1; |
numbitsused = egnumbits; |
nextstate = SPS 9; |
end |
end |
end |
10: |
begin |
$display( "ccl2SPSnum_ref_frames %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SPSnum_ref_frames truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SPS 11; |
end |
11: |
begin |
$display( "ccl2SPSgaps_in_frame_num_allowed_flag %0d", buffer[buffersize-1] ); |
outfifo.enq(SPSgaps_in_frame_num_allowed_flag buffer[buffersize-1]); |
numbitsused = 1; |
nextstate = SPS 12; |
end |
12: |
begin |
$display( "ccl2SPSpic_width_in_mbs %0d", expgolomb_unsigned(buffer)+1 ); |
outfifo.enq(SPSpic_width_in_mbs truncate(expgolomb_unsigned(buffer)+1)); |
calcnc.initialize_picWidth(truncate(expgolomb_unsigned(buffer)+1)); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SPS 13; |
end |
13: |
begin |
$display( "ccl2SPSpic_height_in_map_units %0d", expgolomb_unsigned(buffer)+1 ); |
outfifo.enq(SPSpic_height_in_map_units truncate(expgolomb_unsigned(buffer)+1)); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SPS 14; |
end |
14: |
begin |
//SPSframe_mbs_only_flag = 1 for baseline |
numbitsused = 1; |
nextstate = SPS 15; |
end |
15: |
begin |
$display( "ccl2SPSdirect_8x8_inference_flag %0d", buffer[buffersize-1] ); |
outfifo.enq(SPSdirect_8x8_inference_flag buffer[buffersize-1]); |
numbitsused = 1; |
nextstate = SPS 16; |
end |
16: |
begin |
$display( "ccl2SPSframe_cropping_flag %0d", buffer[buffersize-1] ); |
outfifo.enq(SPSframe_cropping_flag buffer[buffersize-1]); |
numbitsused = 1; |
if(buffer[buffersize-1] == 1) |
nextstate = SPS 17; |
else |
nextstate = SPS 21; |
end |
17: |
begin |
$display( "ccl2SPSframe_crop_left_offset %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SPSframe_crop_left_offset truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SPS 18; |
end |
18: |
begin |
$display( "ccl2SPSframe_crop_right_offset %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SPSframe_crop_right_offset truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SPS 19; |
end |
19: |
begin |
$display( "ccl2SPSframe_crop_top_offset %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SPSframe_crop_top_offset truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SPS 20; |
end |
20: |
begin |
$display( "ccl2SPSframe_crop_bottom_offset %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SPSframe_crop_bottom_offset truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SPS 21; |
end |
21: |
begin |
nextstate = Start; |
$display( "INFO EntropyDec:VUI data thrown away" ); |
end |
default: $display( "ERROR EntropyDec: SPS default step" ); |
endcase |
end |
tagged PPS .step : |
begin |
case ( step ) |
0: |
begin |
ppspic_parameter_set_id <= truncate(expgolomb_unsigned(buffer)); |
$display( "ccl2PPSpic_parameter_set_id %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(PPSpic_parameter_set_id truncate(expgolomb_unsigned(buffer))); |
outfifo_ITB.enq(PPSpic_parameter_set_id truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = PPS 1; |
end |
1: |
begin |
$display( "ccl2PPSseq_parameter_set_id %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(PPSseq_parameter_set_id truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = PPS 2; |
if(spsseq_parameter_set_id != truncate(expgolomb_unsigned(buffer))) |
$display( "ERROR EntropyDec: seq_parameter_set_id don't match" ); |
end |
2: |
begin |
//PPSentropy_coding_mode_flag = 0 for baseline |
numbitsused = 1; |
nextstate = PPS 3; |
end |
3: |
begin |
ppspic_order_present_flag <= buffer[buffersize-1]; |
$display( "ccl2PPSpic_order_present_flag %0d", buffer[buffersize-1] ); |
outfifo.enq(PPSpic_order_present_flag buffer[buffersize-1]); |
numbitsused = 1; |
nextstate = PPS 4; |
end |
4: |
begin |
numbitsused = expgolomb_numbits(buffer); |
nextstate = PPS 5; |
if(expgolomb_unsigned(buffer)+1 != 1) |
$display( "ERROR EntropyDec: PPSnum_slice_groups not equal to 1" );//=1 for main |
end |
5: |
begin |
$display( "ccl2PPSnum_ref_idx_l0_active %0d", expgolomb_unsigned(buffer)+1 ); |
outfifo.enq(PPSnum_ref_idx_l0_active truncate(expgolomb_unsigned(buffer)+1)); |
num_ref_idx_l0_active_minus1 <= truncate(expgolomb_unsigned(buffer)); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = PPS 6; |
end |
6: |
begin |
$display( "ccl2PPSnum_ref_idx_l1_active %0d", expgolomb_unsigned(buffer)+1 ); |
outfifo.enq(PPSnum_ref_idx_l1_active truncate(expgolomb_unsigned(buffer)+1)); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = PPS 7; |
end |
7: |
begin |
//PPSweighted_pred_flag = 0 for baseline; PPSweighted_bipred_idc = 0 for baseline |
numbitsused = 3; |
nextstate = PPS 8; |
end |
8: |
begin |
$display( "ccl2PPSpic_init_qp %0d", expgolomb_signed(buffer)+26 ); |
outfifo_ITB.enq(PPSpic_init_qp truncate(expgolomb_signed(buffer)+26)); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = PPS 9; |
end |
9: |
begin |
$display( "ccl2PPSpic_init_qs %0d", expgolomb_signed(buffer)+26 ); |
outfifo_ITB.enq(PPSpic_init_qs truncate(expgolomb_signed(buffer)+26)); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = PPS 10; |
end |
10: |
begin |
tempint = unpack(expgolomb_signed(buffer)); |
$display( "ccl2PPSchroma_qp_index_offset %0d", tempint ); |
outfifo_ITB.enq(PPSchroma_qp_index_offset truncate(expgolomb_signed(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = PPS 11; |
end |
11: |
begin |
ppsdeblocking_filter_control_present_flag <= buffer[buffersize-1]; |
$display( "ccl2PPSdeblocking_filter_control_present_flag %0d", buffer[buffersize-1] ); |
outfifo.enq(PPSdeblocking_filter_control_present_flag buffer[buffersize-1]); |
numbitsused = 1; |
nextstate = PPS 12; |
end |
12: |
begin |
$display( "ccl2PPSconstrained_intra_pred_flag %0d", buffer[buffersize-1] ); |
outfifo.enq(PPSconstrained_intra_pred_flag buffer[buffersize-1]); |
numbitsused = 1; |
nextstate = PPS 13; |
end |
13: |
begin |
//PPSredundant_pic_cnt_present_flag = 0 for main |
numbitsused = 1; |
nextstate = PPS 14; |
if(buffer[buffersize-1] != 0) |
$display( "ERROR EntropyDec: PPSredundant_pic_cnt_present_flag not equal to 0" );//=0 for main |
end |
14: |
begin |
nextstate = Start; |
end |
default: $display( "ERROR EntropyDec: PPS default step" ); |
endcase |
end |
tagged AUD .step : |
begin |
outfifo.enq(AUDPrimaryPicType buffer[buffersize-1:buffersize-3]); |
numbitsused = 3; |
nextstate = Start; |
end |
tagged EndSequence : |
begin |
outfifo.enq(EndOfSequence); |
nextstate = Start; |
end |
tagged EndStream : |
begin |
outfifo.enq(EndOfStream); |
nextstate = Start; |
end |
tagged Filler : |
begin |
nextstate = Start; |
end |
tagged SliceData .step : |
begin |
case ( step ) |
0: |
begin |
if( shslice_type!=2 && shslice_type!=7 ) |
begin |
$display( "ccl2SDmb_skip_run %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SDmb_skip_run truncate(expgolomb_unsigned(buffer))); |
tempreg <= truncate(expgolomb_unsigned(buffer)); |
calcnc.nNupdate_pskip( truncate(expgolomb_unsigned(buffer)) ); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SliceData 1; |
end |
else |
nextstate = SliceData 2; |
end |
1: |
begin |
if( tempreg>0 ) |
begin |
currMbAddr <= currMbAddr+1;//only because input assumed to comform to both baseline and main |
tempreg <= tempreg-1; |
nextstate = SliceData 1; |
end |
else |
begin |
////$display( "ccl2SDcurrMbAddr %0d", currMbAddr ); |
////outfifo.enq(SDcurrMbAddr currMbAddr); |
nextstate = SliceData 2; |
end |
end |
2: |
begin |
if( bufcount>8 || buffer[buffersize-1]!=1 || (buffer<<1)!=0 ) |
begin |
calcnc.loadMb(currMbAddr); |
nextstate = MacroblockLayer 0; |
end |
else |
nextstate = SliceData 3; |
end |
3: |
begin |
currMbAddr <= currMbAddr+1;//only because input assumed to comform to both baseline and main |
if( bufcount>8 || buffer[buffersize-1]!=1 || (buffer<<1)!=0 ) |
nextstate = SliceData 0; |
else |
nextstate = Start; |
end |
default: $display( "ERROR EntropyDec: SliceData default step" ); |
endcase |
end |
tagged MacroblockLayer .step : //return to SliceData 3 |
begin |
case ( step ) |
0: |
begin |
$display( "ccl2SDMmb_type %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SDMmbtype mbtype_convert(truncate(expgolomb_unsigned(buffer)), shslice_type) ); |
outfifo_ITB.enq(SDMmbtype mbtype_convert(truncate(expgolomb_unsigned(buffer)), shslice_type) ); |
sdmmbtype <= mbtype_convert(truncate(expgolomb_unsigned(buffer)), shslice_type); |
numbitsused = expgolomb_numbits(buffer); |
if(mbtype_convert(truncate(expgolomb_unsigned(buffer)), shslice_type) == I_PCM) |
begin |
calcnc.nNupdate_ipcm(); |
nextstate = MacroblockLayer 1; |
end |
else |
nextstate = MacroblockLayer 4; |
end |
1: |
begin |
tempreg <= 256; |
numbitsused = zeroExtend(bufcount[2:0]); |
nextstate = MacroblockLayer 2; |
end |
2: |
begin |
if( tempreg>0 ) |
begin |
Bit#(8) outputdata = buffer[buffersize-1:buffersize-8]; |
$display( "ccl2SDMpcm_sample_luma %0d", outputdata ); |
outfifo.enq(SDMpcm_sample_luma outputdata); |
tempreg <= tempreg-1; |
numbitsused = 8; |
nextstate = MacroblockLayer 2; |
end |
else |
begin |
tempreg <= 128; |
nextstate = MacroblockLayer 3; |
end |
end |
3: |
begin |
if( tempreg>0 ) |
begin |
Bit#(8) outputdata = buffer[buffersize-1:buffersize-8]; |
$display( "ccl2SDMpcm_sample_chroma %0d", outputdata ); |
outfifo.enq(SDMpcm_sample_chroma outputdata); |
tempreg <= tempreg-1; |
numbitsused = 8; |
nextstate = MacroblockLayer 3; |
end |
else |
nextstate = SliceData 3; |
end |
4: |
begin |
if(sdmmbtype != I_NxN |
&&& mbPartPredMode(sdmmbtype,0) != Intra_16x16 |
&&& numMbPart(sdmmbtype) == 4) |
nextstate = SubMbPrediction 0; |
else |
nextstate = MbPrediction 0; |
end |
5: |
begin |
if(mbPartPredMode(sdmmbtype,0) != Intra_16x16) |
begin |
$display( "ccl2SDMcoded_block_pattern %0d", expgolomb_coded_block_pattern(buffer,sdmmbtype) ); |
////outfifo.enq(SDMcoded_block_pattern expgolomb_coded_block_pattern(buffer,sdmmbtype)); |
sdmcodedBlockPatternLuma <= expgolomb_coded_block_pattern(buffer,sdmmbtype)[3:0]; |
sdmcodedBlockPatternChroma <= expgolomb_coded_block_pattern(buffer,sdmmbtype)[5:4]; |
numbitsused = expgolomb_numbits(buffer); |
end |
else |
begin |
if(sdmmbtype matches tagged I_16x16 {intra16x16PredMode:.tempv1, codedBlockPatternChroma:.tempv2, codedBlockPatternLuma:.tempv3}) |
begin |
sdmcodedBlockPatternLuma <= {tempv3,tempv3,tempv3,tempv3}; |
sdmcodedBlockPatternChroma <= tempv2; |
end |
else |
$display( "ERROR EntropyDec: MacroblockLayer 5 sdmmbtype not I_16x16" ); |
end |
nextstate = MacroblockLayer 6; |
end |
6: |
begin |
if(sdmcodedBlockPatternLuma > 0 |
|| sdmcodedBlockPatternChroma > 0 |
|| mbPartPredMode(sdmmbtype,0) == Intra_16x16) |
begin |
tempint = unpack(expgolomb_signed(buffer)); |
$display( "ccl2SDMmb_qp_delta %0d", tempint ); |
outfifo_ITB.enq(SDMmb_qp_delta truncate(expgolomb_signed(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
end |
residualChroma <= 0; |
temp5bit <= 0; |
maxNumCoeff <= 16; |
nextstate = ResidualBlock 0; |
end |
default: $display( "ERROR EntropyDec: MacroblockLayer default step" ); |
endcase |
end |
tagged MbPrediction .step : //return to MacroblockLayer 5 |
begin |
case ( step ) |
0: |
begin |
if(mbPartPredMode(sdmmbtype,0) == Intra_16x16) |
begin |
$display( "ccl2SDMMintra_chroma_pred_mode %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SDMMintra_chroma_pred_mode truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = MacroblockLayer 5; |
end |
else if(mbPartPredMode(sdmmbtype,0) == Intra_4x4) |
begin |
temp5bit <= 16; |
nextstate = MbPrediction 1; |
end |
else if(num_ref_idx_l0_active_minus1 > 0) |
begin |
temp3bit0 <= numMbPart(sdmmbtype); |
nextstate = MbPrediction 2; |
end |
else |
begin |
temp3bit0 <= numMbPart(sdmmbtype); |
nextstate = MbPrediction 3; |
end |
end |
1: |
begin |
if(temp5bit == 0) |
begin |
$display( "ccl2SDMMintra_chroma_pred_mode %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SDMMintra_chroma_pred_mode truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = MacroblockLayer 5; |
end |
else |
begin |
////$display( "ccl2SDMMprev_intra4x4_pred_mode_flag %0d", buffer[buffersize-1] ); |
if(buffer[buffersize-1] == 0) |
begin |
Bit#(4) tttt = buffer[buffersize-1:buffersize-4]; |
$display( "ccl2SDMMrem_intra4x4_pred_mode %0d", tttt ); |
outfifo.enq(SDMMrem_intra4x4_pred_mode tttt); |
numbitsused = 4; |
end |
else |
begin |
outfifo.enq(SDMMrem_intra4x4_pred_mode 4'b1000); |
numbitsused = 1; |
end |
temp5bit <= temp5bit-1; |
nextstate = MbPrediction 1; |
end |
end |
2: |
begin |
if(num_ref_idx_l0_active_minus1 == 1) |
begin |
$display( "ccl2SDMMref_idx_l0 %0d", 1-buffer[buffersize-1] ); |
outfifo.enq(SDMMref_idx_l0 zeroExtend(1-buffer[buffersize-1])); |
numbitsused = 1; |
end |
else |
begin |
$display( "ccl2SDMMref_idx_l0 %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SDMMref_idx_l0 truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
end |
if(temp3bit0 == 1) |
begin |
temp3bit0 <= numMbPart(sdmmbtype); |
nextstate = MbPrediction 3; |
end |
else |
begin |
temp3bit0 <= temp3bit0-1; |
nextstate = MbPrediction 2; |
end |
end |
3: |
begin |
tempint = unpack(expgolomb_signed(buffer)); |
$display( "ccl2SDMMmvd_l0 %0d", tempint ); |
outfifo.enq(SDMMmvd_l0 truncate(expgolomb_signed(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = MbPrediction 4; |
end |
4: |
begin |
tempint = unpack(expgolomb_signed(buffer)); |
$display( "ccl2SDMMmvd_l0 %0d", tempint ); |
outfifo.enq(SDMMmvd_l0 truncate(expgolomb_signed(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
temp3bit0 <= temp3bit0-1; |
if(temp3bit0 == 1) |
nextstate = MacroblockLayer 5; |
else |
nextstate = MbPrediction 3; |
end |
default: $display( "ERROR EntropyDec: MbPrediction default step" ); |
endcase |
end |
tagged SubMbPrediction .step : //return to MacroblockLayer 5 |
begin |
case ( step ) |
0: |
begin |
$display( "ccl2SDMSsub_mb_type %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SDMSsub_mb_type truncate(expgolomb_unsigned(buffer))); |
temp3bit0 <= numSubMbPart(truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SubMbPrediction 1; |
end |
1: |
begin |
$display( "ccl2SDMSsub_mb_type %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SDMSsub_mb_type truncate(expgolomb_unsigned(buffer))); |
temp3bit1 <= numSubMbPart(truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SubMbPrediction 2; |
end |
2: |
begin |
$display( "ccl2SDMSsub_mb_type %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SDMSsub_mb_type truncate(expgolomb_unsigned(buffer))); |
temp3bit2 <= numSubMbPart(truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SubMbPrediction 3; |
end |
3: |
begin |
$display( "ccl2SDMSsub_mb_type %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SDMSsub_mb_type truncate(expgolomb_unsigned(buffer))); |
temp3bit3 <= numSubMbPart(truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
if(num_ref_idx_l0_active_minus1 > 0 |
&& sdmmbtype != P_8x8ref0) |
nextstate = SubMbPrediction 4; |
else |
nextstate = SubMbPrediction 8; |
end |
4: |
begin |
if(num_ref_idx_l0_active_minus1 == 1) |
begin |
$display( "ccl2SDMSref_idx_l0 %0d", 1-buffer[buffersize-1] ); |
outfifo.enq(SDMSref_idx_l0 zeroExtend(1-buffer[buffersize-1])); |
numbitsused = 1; |
end |
else |
begin |
$display( "ccl2SDMSref_idx_l0 %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SDMSref_idx_l0 truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
end |
nextstate = SubMbPrediction 5; |
end |
5: |
begin |
if(num_ref_idx_l0_active_minus1 == 1) |
begin |
$display( "ccl2SDMSref_idx_l0 %0d", 1-buffer[buffersize-1] ); |
outfifo.enq(SDMSref_idx_l0 zeroExtend(1-buffer[buffersize-1])); |
numbitsused = 1; |
end |
else |
begin |
$display( "ccl2SDMSref_idx_l0 %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SDMSref_idx_l0 truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
end |
nextstate = SubMbPrediction 6; |
end |
6: |
begin |
if(num_ref_idx_l0_active_minus1 == 1) |
begin |
$display( "ccl2SDMSref_idx_l0 %0d", 1-buffer[buffersize-1] ); |
outfifo.enq(SDMSref_idx_l0 zeroExtend(1-buffer[buffersize-1])); |
numbitsused = 1; |
end |
else |
begin |
$display( "ccl2SDMSref_idx_l0 %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SDMSref_idx_l0 truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
end |
nextstate = SubMbPrediction 7; |
end |
7: |
begin |
if(num_ref_idx_l0_active_minus1 == 1) |
begin |
$display( "ccl2SDMSref_idx_l0 %0d", 1-buffer[buffersize-1] ); |
outfifo.enq(SDMSref_idx_l0 zeroExtend(1-buffer[buffersize-1])); |
numbitsused = 1; |
end |
else |
begin |
$display( "ccl2SDMSref_idx_l0 %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SDMSref_idx_l0 truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
end |
nextstate = SubMbPrediction 8; |
end |
8: |
begin |
tempint = unpack(expgolomb_signed(buffer)); |
$display( "ccl2SDMSmvd_l0 %0d", tempint ); |
outfifo.enq(SDMSmvd_l0 truncate(expgolomb_signed(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SubMbPrediction 9; |
end |
9: |
begin |
tempint = unpack(expgolomb_signed(buffer)); |
$display( "ccl2SDMSmvd_l0 %0d", tempint ); |
outfifo.enq(SDMSmvd_l0 truncate(expgolomb_signed(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
temp3bit0 <= temp3bit0-1; |
if(temp3bit0 == 1) |
nextstate = SubMbPrediction 10; |
else |
nextstate = SubMbPrediction 8; |
end |
10: |
begin |
tempint = unpack(expgolomb_signed(buffer)); |
$display( "ccl2SDMSmvd_l0 %0d", tempint ); |
outfifo.enq(SDMSmvd_l0 truncate(expgolomb_signed(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SubMbPrediction 11; |
end |
11: |
begin |
tempint = unpack(expgolomb_signed(buffer)); |
$display( "ccl2SDMSmvd_l0 %0d", tempint ); |
outfifo.enq(SDMSmvd_l0 truncate(expgolomb_signed(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
temp3bit1 <= temp3bit1-1; |
if(temp3bit1 == 1) |
nextstate = SubMbPrediction 12; |
else |
nextstate = SubMbPrediction 10; |
end |
12: |
begin |
tempint = unpack(expgolomb_signed(buffer)); |
$display( "ccl2SDMSmvd_l0 %0d", tempint ); |
outfifo.enq(SDMSmvd_l0 truncate(expgolomb_signed(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SubMbPrediction 13; |
end |
13: |
begin |
tempint = unpack(expgolomb_signed(buffer)); |
$display( "ccl2SDMSmvd_l0 %0d", tempint ); |
outfifo.enq(SDMSmvd_l0 truncate(expgolomb_signed(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
temp3bit2 <= temp3bit2-1; |
if(temp3bit2 == 1) |
nextstate = SubMbPrediction 14; |
else |
nextstate = SubMbPrediction 12; |
end |
14: |
begin |
tempint = unpack(expgolomb_signed(buffer)); |
$display( "ccl2SDMSmvd_l0 %0d", tempint ); |
outfifo.enq(SDMSmvd_l0 truncate(expgolomb_signed(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SubMbPrediction 15; |
end |
15: |
begin |
tempint = unpack(expgolomb_signed(buffer)); |
$display( "ccl2SDMSmvd_l0 %0d", tempint ); |
outfifo.enq(SDMSmvd_l0 truncate(expgolomb_signed(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
temp3bit3 <= temp3bit3-1; |
if(temp3bit3 == 1) |
nextstate = MacroblockLayer 5; |
else |
nextstate = SubMbPrediction 14; |
end |
default: $display( "ERROR EntropyDec: SubMbPrediction default step" ); |
endcase |
end |
tagged ResidualBlock .step : //if(residualChroma==0) return to Residual 1; else if(maxNumCoeff==4) return to Residual 3; else return to Residual 5 |
begin//don't modify maxNumCoeff, residualChroma, and increment temp5bit on return |
case ( step ) |
0: |
begin |
cavlcFIFO.clear(); |
if(maxNumCoeff != 4) |
begin |
if(residualChroma == 0) |
tempreg <= zeroExtend(calcnc.nCcalc_luma(truncate(temp5bit))); |
else |
tempreg <= zeroExtend(calcnc.nCcalc_chroma(truncate(temp5bit))); |
end |
else |
tempreg <= zeroExtend(6'b111111); |
if(mbPartPredMode(sdmmbtype,0)==Intra_16x16 && maxNumCoeff==16) |
nextstate = ResidualBlock 1; |
else if(residualChroma==0 && (sdmcodedBlockPatternLuma & (1 << zeroExtend(temp5bit[3:2])))==0) |
begin |
calcnc.nNupdate_luma(truncate(temp5bit),0); |
outfifo_ITB.enq(SDMRcoeffLevelZeros maxNumCoeff); |
nextstate = ResidualBlock 5; |
end |
else if(residualChroma==1 && maxNumCoeff==4 && (sdmcodedBlockPatternChroma & 3)==0) |
begin |
outfifo_ITB.enq(SDMRcoeffLevelZeros 4); |
nextstate = ResidualBlock 5; |
end |
else if(residualChroma==1 && maxNumCoeff!=4 && (sdmcodedBlockPatternChroma & 2)==0) |
begin |
calcnc.nNupdate_chroma(truncate(temp5bit),0); |
outfifo_ITB.enq(SDMRcoeffLevelZeros 15); |
nextstate = ResidualBlock 5; |
end |
else |
nextstate = ResidualBlock 1; |
//$display( "TRACE EntropyDec: ResidualBlock 0 temp5bit = %0d", temp5bit); |
end |
1: |
begin |
Bit#(2) trailingOnesTemp = 0; |
Bit#(5) totalCoeffTemp = 0; |
{trailingOnesTemp,totalCoeffTemp,numbitsused} = cavlc_coeff_token( buffer, truncate(tempreg) ); |
temp3bit0 <= zeroExtend(trailingOnesTemp);//trailingOnes |
totalCoeff <= totalCoeffTemp; |
if(residualChroma == 0 && !(mbPartPredMode(sdmmbtype,0)==Intra_16x16 && maxNumCoeff==16)) |
calcnc.nNupdate_luma(truncate(temp5bit),totalCoeffTemp); |
else if(residualChroma == 1 && maxNumCoeff != 4) |
calcnc.nNupdate_chroma(truncate(temp5bit),totalCoeffTemp); |
temp5bit2 <= 0;//i |
tempreg <= 0;//levelCode temp |
if(totalCoeffTemp > 10 && trailingOnesTemp < 3) |
temp3bit1 <= 1;//suffixLength |
else |
temp3bit1 <= 0;//suffixLength |
nextstate = ResidualBlock 2; |
//$display( "TRACE EntropyDec: ResidualBlock 1 nC = %0d", tempreg); |
$display( "ccl2SDMRtotal_coeff %0d", totalCoeffTemp ); |
$display( "ccl2SDMRtrailing_ones %0d", trailingOnesTemp ); |
end |
2: |
begin |
if( totalCoeff != 0 ) |
begin |
if(temp5bit2 < zeroExtend(temp3bit0)) |
begin |
if(buffer[buffersize-1] == 1) |
cavlcFIFO.enq(-1); |
else |
cavlcFIFO.enq(1); |
numbitsused = 1; |
end |
else |
begin |
Bit#(32) buffertempshow = buffer[buffersize-1:buffersize-32]; |
Bit#(3) suffixLength = temp3bit1; |
Bit#(4) levelSuffixSize = zeroExtend(suffixLength); |
Bit#(4) level_prefix = cavlc_level_prefix( buffer ); |
Bit#(5) temp_level_prefix = zeroExtend(level_prefix); |
Bit#(28) tempbuffer = buffer[buffersize-1:buffersize-28] << zeroExtend(temp_level_prefix+1); |
Bit#(14) levelCode = zeroExtend(level_prefix) << zeroExtend(suffixLength); |
if(level_prefix == 14 && suffixLength == 0) |
levelSuffixSize = 4; |
else if(level_prefix == 15) |
levelSuffixSize = 12; |
levelCode = levelCode + zeroExtend(tempbuffer[27:16] >> (12-zeroExtend(levelSuffixSize)));//level_suffix |
if(level_prefix == 15 && suffixLength == 0) |
levelCode = levelCode + 15; |
if(temp5bit2 == zeroExtend(temp3bit0) && temp3bit0 < 3) |
levelCode = levelCode + 2; |
if(suffixLength == 0) |
suffixLength = 1; |
if( suffixLength < 6 && ((levelCode+2) >> 1) > (3 << zeroExtend(suffixLength-1)) ) |
suffixLength = suffixLength+1; |
if(levelCode[0] == 0) |
cavlcFIFO.enq(truncate((levelCode+2) >> 1)); |
else |
cavlcFIFO.enq(truncate((~levelCode) >> 1)); |
if(levelCode[0] == 0)////////////////////////////////////////////////// |
begin |
tempint = signExtend(unpack((levelCode+2) >> 1)); |
//$display( "TRACE EntropyDec: temp level %0d", tempint ); |
end |
else |
begin |
Bit#(13) tempinttemp = truncate((~levelCode) >> 1); |
tempint = signExtend(unpack(tempinttemp)); |
//$display( "TRACE EntropyDec: temp level %0d", tempint ); |
end/////////////////////////////////////////////////////////////////////// |
temp3bit1 <= suffixLength; |
numbitsused = zeroExtend(level_prefix)+1+zeroExtend(levelSuffixSize); |
end |
end |
if( totalCoeff==0 || temp5bit2+1==totalCoeff ) |
begin |
temp5bit2 <= 0; |
zerosLeft <= 0; |
if(totalCoeff < maxNumCoeff) |
nextstate = ResidualBlock 3; |
else |
nextstate = ResidualBlock 5; |
end |
else |
begin |
temp5bit2 <= temp5bit2 + 1; |
nextstate = ResidualBlock 2; |
end |
end |
3: |
begin |
Bit#(4) tempZerosLeft; |
if(totalCoeff > 0) |
begin |
{tempZerosLeft,numbitsused} = cavlc_total_zeros( buffer, truncate(totalCoeff), maxNumCoeff); |
$display( "ccl2SDMRtotal_zeros %0d", tempZerosLeft );////////////////////////////////////// |
end |
else |
tempZerosLeft = 0; |
zerosLeft <= tempZerosLeft; |
if(maxNumCoeff - totalCoeff - zeroExtend(tempZerosLeft) > 0) |
begin |
$display( "ccl2SDMRcoeffLevelZeros %0d", maxNumCoeff - totalCoeff - zeroExtend(tempZerosLeft) ); |
outfifo_ITB.enq(SDMRcoeffLevelZeros (maxNumCoeff - totalCoeff - zeroExtend(tempZerosLeft))); |
end |
nextstate = ResidualBlock 5; |
end |
5: |
begin |
if( totalCoeff > 0 ) |
begin |
tempint = signExtend(unpack(cavlcFIFO.first())); |
$display( "ccl2SDMRcoeffLevel %0d", tempint ); |
if( zerosLeft > 0 ) |
begin |
Bit#(4) run_before = 0; |
if( totalCoeff > 1 ) |
{run_before,numbitsused} = cavlc_run_before( buffer, zerosLeft); |
else |
run_before = zerosLeft; |
zerosLeft <= zerosLeft - run_before; |
outfifo_ITB.enq(SDMRcoeffLevelPlusZeros {level:cavlcFIFO.first(),zeros:zeroExtend(run_before)}); |
if( run_before > 0 ) |
$display( "ccl2SDMRcoeffLevelZeros %0d", run_before ); |
end |
else |
outfifo_ITB.enq(SDMRcoeffLevelPlusZeros {level:cavlcFIFO.first(),zeros:0}); |
cavlcFIFO.deq(); |
totalCoeff <= totalCoeff-1; |
end |
if( totalCoeff <= 1 ) |
begin |
if(residualChroma==0) |
begin |
nextstate = ResidualBlock 0; |
if(mbPartPredMode(sdmmbtype,0)==Intra_16x16 && maxNumCoeff==16) |
maxNumCoeff <= 15; |
else if(temp5bit==15) |
begin |
temp5bit <= 0; |
maxNumCoeff <= 4; |
residualChroma <= 1; |
end |
else |
temp5bit <= temp5bit+1; |
end |
else if(maxNumCoeff==4) |
begin |
nextstate = ResidualBlock 0; |
if(temp5bit==1) |
begin |
temp5bit <= 0; |
maxNumCoeff <= 15; |
end |
else |
temp5bit <= temp5bit+1; |
end |
else |
begin |
if(temp5bit==7) |
begin |
temp5bit <= 0; |
nextstate = SliceData 3; |
end |
else |
begin |
nextstate = ResidualBlock 0; |
temp5bit <= temp5bit+1; |
end |
end |
end |
else |
nextstate = ResidualBlock 5; |
end |
default: $display( "ERROR EntropyDec: ResidualBlock default step" ); |
endcase |
end |
endcase |
|
if(numbitsused+1 > bufcount) |
begin |
$display( "ERROR EntropyDec: not enough bits in buffer" ); |
nextstate = Start; |
end |
buffer <= buffer << zeroExtend(numbitsused); |
bufcount <= bufcount-numbitsused; |
state <= nextstate; |
|
endrule |
|
|
interface Put ioin = fifoToPut(infifo); |
interface Get ioout = fifoToGet(outfifo); |
interface Get ioout_InverseTrans = fifoToGet(outfifo_ITB); |
|
interface mem_client = calcnc.mem_client; |
|
endmodule |
|
endpackage |
/trunk/src/H264Types.bsv
0,0 → 1,415
//********************************************************************** |
// H264 Types |
//---------------------------------------------------------------------- |
// |
// |
// |
|
|
package H264Types; |
|
import Vector::*; |
import RegFile::*; |
|
typedef 7 PicWidthSz;//number of bits to represent the horizontal position of a MB |
typedef 7 PicHeightSz;//number of bits to represent the vertical position of a MB |
typedef 14 PicAreaSz;//number of bits to represent the 2D position of a MB (max 16) |
Bit#(PicWidthSz) maxPicWidthInMB=127;//(2^PicWidthSz)-1 |
|
Bit#(PicAreaSz) maxPicAreaInMB=14'b10000000000000; |
typedef 25 FrameBufferSz;//number of bits to address the frame buffer (5+PicAreaSz+6) |
typedef 16 MaxRefFrames;//max number of frames in the frame buffer |
Bit#(5) maxRefFrames=16;//max number of frames in the frame buffer |
Bit#(FrameBufferSz) frameBufferSize=25'b0110110000000000000000000;//size of frame buffer ((maxRefFrames+2)*maxPicAreaInMB*1.5*64) |
|
Integer entropyDec_infifo_size = 2; |
Integer inverseTrans_infifo_size = 8; |
Integer prediction_infifo_size = 4; |
Integer prediction_infifo_ITB_size = 16; |
Integer prediction_predictedfifo_size = 16; |
Integer interpolator_reqfifoLoad_size = 4; |
Integer interpolator_reqfifoWork_size = 8; |
Integer interpolator_memRespQ_size = 4; |
Integer deblockFilter_infifo_size = 32; |
Integer bufferControl_infifo_size = 2; |
|
|
//----------------------------------------------------------- |
// 1 read port register file module |
|
interface RFile1#(type idx_t, type d_t); |
method Action upd(idx_t x1, d_t x2); |
method d_t sub(idx_t x1); |
endinterface |
|
module mkRFile1#( idx_t lo, idx_t hi ) ( RFile1#(idx_t, d_t) ) |
provisos (Bits#(idx_t, si),Bits#(d_t, sa)); |
RegFile#(idx_t,d_t) rf <- mkRegFile(lo,hi); |
method Action upd( idx_t index, d_t data ); |
rf.upd( index, data ); |
endmethod |
method d_t sub( idx_t index ); |
return rf.sub(index); |
endmethod |
endmodule |
|
module mkRFile1Full( RFile1#(idx_t, d_t) ) |
provisos (Bits#(idx_t, si),Bits#(d_t, sa),Bounded#(idx_t) ); |
RegFile#(idx_t,d_t) rf <- mkRegFileFull(); |
method Action upd( idx_t index, d_t data ); |
rf.upd( index, data ); |
endmethod |
method d_t sub( idx_t index ); |
return rf.sub(index); |
endmethod |
endmodule |
|
|
//----------------------------------------------------------- |
// Do not fire module |
|
interface DoNotFire; |
method Action doNotFire(); |
endinterface |
|
module mkDoNotFire( DoNotFire ); |
method Action doNotFire() if(False); |
noAction; |
endmethod |
endmodule |
|
|
typedef union tagged |
{ |
void P_L0_16x16; |
void P_L0_L0_16x8; |
void P_L0_L0_8x16; |
void P_8x8; |
void P_8x8ref0; |
void I_NxN; |
struct{ |
Bit#(2) intra16x16PredMode; |
Bit#(2) codedBlockPatternChroma; |
Bit#(1) codedBlockPatternLuma; |
}I_16x16; |
void I_PCM; |
void P_Skip; |
} MbType deriving(Eq,Bits); |
|
|
typedef enum |
{ |
Pred_L0, |
Intra_4x4, |
Intra_16x16, |
NA |
} MbPartPredModeType deriving(Eq,Bits); |
|
|
typedef Bit#(64) Buffer;//not sure size |
typedef Bit#(7) Bufcount; |
Nat buffersize = 64;//not sure size |
|
|
|
function MbPartPredModeType mbPartPredMode( MbType mbtype, Bit#(1) mbPartIdx ); |
if(mbPartIdx == 1) |
begin |
if(mbtype == P_L0_L0_16x8 || mbtype == P_L0_L0_8x16) |
return Pred_L0; |
else |
return NA; |
end |
else |
begin |
if(mbtype==P_L0_16x16 || mbtype==P_L0_L0_16x8 || mbtype==P_L0_L0_8x16 || mbtype==P_Skip) |
return Pred_L0; |
else if(mbtype == I_NxN) |
return Intra_4x4; |
else if(mbtype == P_8x8 || mbtype == P_8x8ref0 || mbtype == I_PCM ) |
return NA; |
else |
return Intra_16x16; |
end |
endfunction |
|
|
function Bit#(3) numMbPart( MbType mbtype ); |
if(mbtype == P_L0_16x16 || mbtype == P_Skip) |
return 1; |
else if(mbtype == P_L0_L0_16x8 || mbtype == P_L0_L0_8x16) |
return 2; |
else if(mbtype == P_8x8 || mbtype == P_8x8ref0) |
return 4; |
else |
return 0;//should never happen |
endfunction |
|
|
function Bit#(3) numSubMbPart( Bit#(2) submbtype ); |
if(submbtype == 0) |
return 1; |
else if(submbtype == 1 || submbtype == 2) |
return 2; |
else |
return 4; |
endfunction |
|
|
//---------------------------------------------------------------------- |
// Inter-module FIFO types |
//---------------------------------------------------------------------- |
|
|
typedef union tagged |
{ |
Bit#(8) DataByte; |
void EndOfFile; |
} |
InputGenOT deriving(Eq,Bits); |
|
|
typedef union tagged |
{ |
void NewUnit; |
Bit#(8) RbspByte; |
void EndOfFile; |
} |
NalUnwrapOT deriving(Eq,Bits); |
|
|
typedef union tagged |
{ |
Bit#(8) NewUnit; |
|
////Sequence Parameter Set |
Bit#(5) SPSseq_parameter_set_id;//ue 0 to 31 |
Bit#(5) SPSlog2_max_frame_num;//ue+4 4 to 16 |
Bit#(2) SPSpic_order_cnt_type;//ue 0 to 2 |
Bit#(5) SPSlog2_max_pic_order_cnt_lsb;//ue+4 4 to 16 |
Bit#(1) SPSdelta_pic_order_always_zero_flag;//u(1) |
Bit#(32) SPSoffset_for_non_ref_pic;//se -2^31 to 2^31-1 |
Bit#(32) SPSoffset_for_top_to_bottom_field;//se -2^31 to 2^31-1 |
Bit#(8) SPSnum_ref_frames_in_pic_order_cnt_cycle;//ue 0 to 255 |
Bit#(32) SPSoffset_for_ref_frame;//se -2^31 to 2^31-1 |
Bit#(5) SPSnum_ref_frames;//ue 0 to MaxDpbSize (depends on Level) |
Bit#(1) SPSgaps_in_frame_num_allowed_flag;//u(1) |
Bit#(PicWidthSz) SPSpic_width_in_mbs;//ue+1 1 to ? |
Bit#(PicHeightSz) SPSpic_height_in_map_units;//ue+1 1 to ? |
//// Bit#(1) SPSframe_mbs_only_flag//u(1) (=1 for baseline) |
Bit#(1) SPSdirect_8x8_inference_flag;//u(1) |
Bit#(1) SPSframe_cropping_flag;//u(1) |
Bit#(16) SPSframe_crop_left_offset;//ue 0 to ? |
Bit#(16) SPSframe_crop_right_offset;//ue 0 to ? |
Bit#(16) SPSframe_crop_top_offset;//ue 0 to ? |
Bit#(16) SPSframe_crop_bottom_offset;//ue 0 to ? |
|
////Picture Parameter Set |
Bit#(8) PPSpic_parameter_set_id;//ue 0 to 255 |
Bit#(5) PPSseq_parameter_set_id;//ue 0 to 31 |
//// Bit#(1) PPSentropy_coding_mode_flag//u(1) (=0 for baseline) |
Bit#(1) PPSpic_order_present_flag;//u(1) |
//// Bit#(4) PPSnum_slice_groups;//ue+1 1 to 8 (=1 for main) |
////some info if PPSnum_slice_groups>1 (not in main) |
Bit#(5) PPSnum_ref_idx_l0_active;//ue+1 1 to 32 (16 for frame mb) |
Bit#(5) PPSnum_ref_idx_l1_active;//ue+1 1 to 32 (16 for frame mb) |
//// Bit#(1) PPSweighted_pred_flag;//u(1) (=0 for baseline) |
//// Bit#(2) PPSweighted_bipred_flag;//u(2) (=0 for baseline) |
//////// Bit#(7) PPSpic_init_qp;//se+26 0 to 51 |
//////// Bit#(7) PPSpic_init_qs;//se+26 0 to 51 |
//////// Bit#(5) PPSchroma_qp_index_offset;//se -12 to 12 |
Bit#(1) PPSdeblocking_filter_control_present_flag;//u(1) |
Bit#(1) PPSconstrained_intra_pred_flag;//u(1) |
//// Bit#(1) PPSredundant_pic_cnt_present_flag;//u(1) (=0 for main) |
|
////Slice Header |
Bit#(PicAreaSz) SHfirst_mb_in_slice;//ue 0 to PicSizeInMbs-1 |
Bit#(4) SHslice_type;//ue 0 to 9 |
Bit#(8) SHpic_parameter_set_id;//ue 0 to 255 |
Bit#(16) SHframe_num;//u(log2_max_frame_num) |
Bit#(16) SHidr_pic_id;//ue 0 to 65535 |
Bit#(16) SHpic_order_cnt_lsb;//u(log2_max_pic_order_cnt_lsb) |
Bit#(32) SHdelta_pic_order_cnt_bottom;//se -2^31 to 2^31-1 |
Bit#(32) SHdelta_pic_order_cnt0;//se -2^31 to 2^31-1 |
Bit#(32) SHdelta_pic_order_cnt1;//se -2^31 to 2^31-1 |
Bit#(1) SHnum_ref_idx_active_override_flag;//u(1) |
Bit#(5) SHnum_ref_idx_l0_active;//ue+1 1 to 32 (16 for frame mb) |
////reference picture list reordering |
Bit#(1) SHRref_pic_list_reordering_flag_l0;//u(1) |
Bit#(2) SHRreordering_of_pic_nums_idc;//ue 0 to 3 |
Bit#(17) SHRabs_diff_pic_num;//ue 1 to MaxPicNum |
Bit#(5) SHRlong_term_pic_num;//ue 0 to ? |
////decoded reference picture marking |
Bit#(1) SHDno_output_of_prior_pics_flag;//u(1) |
Bit#(1) SHDlong_term_reference_flag;//u(1) |
Bit#(1) SHDadaptive_ref_pic_marking_mode_flag;//u(1) |
Bit#(3) SHDmemory_management_control_operation;//ue 0 to 6 |
Bit#(17) SHDdifference_of_pic_nums;//ue 1 to MaxPicNum |
Bit#(5) SHDlong_term_pic_num;//ue 0 to 32 (16 for frame mb) |
Bit#(5) SHDlong_term_frame_idx;//ue 0 to MaxLongTermFrameIdx |
Bit#(5) SHDmax_long_term_frame_idx_plus1;//ue 0 to num_ref_frames (0 to 16) |
////Slice Header (continued) |
//////// Bit#(7) SHslice_qp_delta;//se -51 to 51 |
Bit#(2) SHdisable_deblocking_filter_idc;//ue 0 to 2 |
Bit#(5) SHslice_alpha_c0_offset;//se*2 -12 to 12 |
Bit#(5) SHslice_beta_offset;//se*2 -12 to 12 |
|
////Slice Data |
Bit#(PicAreaSz) SDmb_skip_run;//ue 0 to PicSizeInMbs |
//// Bit#(PicAreaSz) SDcurrMbAddr;//ue ->process-> 0 to PicSizeInMbs |
////macroblock layer |
MbType SDMmbtype;//ue ->process-> MbType |
Bit#(8) SDMpcm_sample_luma;//ue 0 to 255 |
Bit#(8) SDMpcm_sample_chroma;//ue 0 to 255 |
//// Bit#(6) SDMcoded_block_pattern;//me |
//////// Bit#(7) SDMmb_qp_delta;//se -26 to 25 |
////macroblock prediction |
// Bit#(1) SDMMprev_intra4x4_pred_mode_flag;//u(1) |
Bit#(4) SDMMrem_intra4x4_pred_mode;//(SDMMprev_intra4x4_pred_mode_flag ? 4'b1000 : {1'b0,u(3)}) |
Bit#(2) SDMMintra_chroma_pred_mode;//ue 0 to 3 |
Bit#(5) SDMMref_idx_l0;//te 0 to num_ref_idx_active_minus1 |
Bit#(16) SDMMmvd_l0;//se ? to ? (see Annex A) |
////sub-macroblock prediction |
Bit#(2) SDMSsub_mb_type;//ue 0 to 3 |
Bit#(5) SDMSref_idx_l0;//te 0 to num_ref_idx_active_minus1 |
Bit#(16) SDMSmvd_l0;//se ? to ? (see Annex A) |
////residual data |
//////// Bit#(13) SDMRcoeffLevel;//cavlc output in reverse order (high frequency first) |
//////// Bit#(5) SDMRcoeffLevelZeros;//# of consecutive zeros (also used for ITBresidual) |
|
////Prediction Block output |
struct {Bit#(6) qpy; Bit#(6) qpc;} IBTmb_qp;//qp for luma and chroma for the current MB |
struct {Bit#(3) bShor; Bit#(3) bSver;} PBbS;// |
Vector#(4,Bit#(8)) PBoutput;//prediction+residual in regular h.264 order |
|
//// various delimiters |
Bit#(3) AUDPrimaryPicType; |
void EndOfSequence; |
void EndOfStream; |
void EndOfFile; |
} |
EntropyDecOT deriving(Eq,Bits); |
|
|
typedef union tagged |
{ |
Bit#(8) NewUnit; |
|
////Picture Parameter Set |
Bit#(8) PPSpic_parameter_set_id;//ue 0 to 255 |
Bit#(7) PPSpic_init_qp;//se+26 0 to 51 |
Bit#(7) PPSpic_init_qs;//se+26 0 to 51 |
Bit#(5) PPSchroma_qp_index_offset;//se -12 to 12 |
|
////Slice Header |
Bit#(7) SHslice_qp_delta;//se -51 to 51 |
|
////macroblock layer |
MbType SDMmbtype;//ue ->process-> MbType |
Bit#(7) SDMmb_qp_delta;//se -26 to 25 |
////residual data (cavlc output in reverse order (high frequency first)) |
struct {Bit#(13) level; Bit#(5) zeros;} SDMRcoeffLevelPlusZeros;//one non-zero coeff level followed by # of consecutive zeros |
Bit#(5) SDMRcoeffLevelZeros;//# of consecutive zeros |
} |
EntropyDecOT_InverseTrans deriving(Eq,Bits); |
|
|
typedef union tagged |
{ |
void ITBcoeffLevelZeros;//16 consecutive zeros |
Vector#(4,Bit#(10)) ITBresidual;//residual data in regular h.264 order |
struct {Bit#(6) qpy; Bit#(6) qpc;} IBTmb_qp;//qp for luma and chroma for the current MB |
} |
InverseTransOT deriving(Eq,Bits); |
|
|
typedef union tagged |
{ |
struct {Bit#(TAdd#(PicWidthSz,2)) hor; Bit#(TAdd#(PicHeightSz,4)) ver; Bit#(32) data;} DFBLuma; |
struct {Bit#(1) uv; Bit#(TAdd#(PicWidthSz,1)) hor; Bit#(TAdd#(PicHeightSz,3)) ver; Bit#(32) data;} DFBChroma; |
void EndOfFrame; |
EntropyDecOT EDOT; |
} |
DeblockFilterOT deriving(Eq,Bits); |
|
|
typedef union tagged |
{ |
Bit#(32) YUV; |
void EndOfFile; |
} |
BufferControlOT deriving(Eq,Bits); |
|
|
typedef union tagged |
{ |
Bit#(FrameBufferSz) FBLoadReq; |
void FBEndFrameSync; |
} |
FrameBufferLoadReq deriving(Eq,Bits); |
|
typedef union tagged |
{ |
Bit#(32) FBLoadResp; |
} |
FrameBufferLoadResp deriving(Eq,Bits); |
|
typedef union tagged |
{ |
struct { Bit#(FrameBufferSz) addr; Bit#(32) data; } FBStoreReq; |
void FBEndFrameSync; |
} |
FrameBufferStoreReq deriving(Eq,Bits); |
|
|
typedef enum |
{ |
IP16x16, |
IP16x8, |
IP8x16, |
IP8x8, |
IP8x4, |
IP4x8, |
IP4x4 |
} IPBlockType deriving(Eq,Bits); |
|
typedef union tagged |
{ |
struct { Bit#(4) refIdx; Bit#(TAdd#(PicWidthSz,2)) hor; Bit#(TAdd#(PicHeightSz,4)) ver; Bit#(14) mvhor; Bit#(12) mvver; IPBlockType bt; } IPLuma; |
struct { Bit#(4) refIdx; Bit#(1) uv; Bit#(TAdd#(PicWidthSz,2)) hor; Bit#(TAdd#(PicHeightSz,3)) ver; Bit#(14) mvhor; Bit#(12) mvver; IPBlockType bt; } IPChroma; |
} |
InterpolatorIT deriving(Eq,Bits); |
|
typedef union tagged |
{ |
struct { Bit#(4) refIdx; Bit#(1) horOutOfBounds; Bit#(TAdd#(PicWidthSz,2)) hor; Bit#(TAdd#(PicHeightSz,4)) ver; } IPLoadLuma; |
struct { Bit#(4) refIdx; Bit#(1) uv; Bit#(1) horOutOfBounds; Bit#(TAdd#(PicWidthSz,1)) hor; Bit#(TAdd#(PicHeightSz,3)) ver; } IPLoadChroma; |
void IPLoadEndFrame; |
} |
InterpolatorLoadReq deriving(Eq,Bits); |
|
typedef union tagged |
{ |
Bit#(32) IPLoadResp; |
} |
InterpolatorLoadResp deriving(Eq,Bits); |
|
|
typedef union tagged |
{ |
Bit#(addrSz) LoadReq; |
struct { Bit#(addrSz) addr; Bit#(dataSz) data; } StoreReq; |
} |
MemReq#( type addrSz, type dataSz ) |
deriving(Eq,Bits); |
|
typedef union tagged |
{ |
Bit#(dataSz) LoadResp; |
} |
MemResp#( type dataSz ) |
deriving(Eq,Bits); |
|
|
|
endpackage |
/trunk/src/IPrediction.bsv
0,0 → 1,29
//********************************************************************** |
// Interface for Prediction |
//---------------------------------------------------------------------- |
// |
// |
// |
|
package IPrediction; |
|
import H264Types::*; |
import GetPut::*; |
import ClientServer::*; |
|
interface IPrediction; |
|
// Interface for inter-module io |
interface Put#(EntropyDecOT) ioin; |
interface Put#(InverseTransOT) ioin_InverseTrans; |
interface Get#(EntropyDecOT) ioout; |
|
// Interface for module to memory |
interface Client#(MemReq#(TAdd#(PicWidthSz,2),68),MemResp#(68)) mem_client_intra; |
interface Client#(MemReq#(TAdd#(PicWidthSz,2),32),MemResp#(32)) mem_client_inter; |
interface Client#(InterpolatorLoadReq,InterpolatorLoadResp) mem_client_buffer; |
|
endinterface |
|
endpackage |
|
/trunk/src/mkInterpolator.bsv
0,0 → 1,843
//********************************************************************** |
// interpolator implementation |
//---------------------------------------------------------------------- |
// |
// |
|
package mkInterpolator; |
|
import H264Types::*; |
import IInterpolator::*; |
import FIFO::*; |
import Vector::*; |
|
import Connectable::*; |
import GetPut::*; |
import ClientServer::*; |
|
|
//----------------------------------------------------------- |
// Local Datatypes |
//----------------------------------------------------------- |
|
typedef union tagged |
{ |
struct { Bit#(2) xFracL; Bit#(2) yFracL; Bit#(2) offset; IPBlockType bt; } IPWLuma; |
struct { Bit#(3) xFracC; Bit#(3) yFracC; Bit#(2) offset; IPBlockType bt; } IPWChroma; |
} |
InterpolatorWT deriving(Eq,Bits); |
|
|
//----------------------------------------------------------- |
// Helper functions |
|
function Bit#(8) clip1y10to8( Bit#(10) innum ); |
if(innum[9] == 1) |
return 0; |
else if(innum[8] == 1) |
return 255; |
else |
return truncate(innum); |
endfunction |
|
function Bit#(15) interpolate8to15( Bit#(8) in0, Bit#(8) in1, Bit#(8) in2, Bit#(8) in3, Bit#(8) in4, Bit#(8) in5 ); |
return zeroExtend(in0) - 5*zeroExtend(in1) + 20*zeroExtend(in2) + 20*zeroExtend(in3) - 5*zeroExtend(in4) + zeroExtend(in5); |
endfunction |
|
function Bit#(8) interpolate15to8( Bit#(15) in0, Bit#(15) in1, Bit#(15) in2, Bit#(15) in3, Bit#(15) in4, Bit#(15) in5 ); |
Bit#(20) temp = signExtend(in0) - 5*signExtend(in1) + 20*signExtend(in2) + 20*signExtend(in3) - 5*signExtend(in4) + signExtend(in5) + 512; |
return clip1y10to8(truncate(temp>>10)); |
endfunction |
|
|
|
//----------------------------------------------------------- |
// Interpolation Module |
//----------------------------------------------------------- |
|
|
(* synthesize *) |
module mkInterpolator( Interpolator ); |
|
FIFO#(InterpolatorIT) reqfifoLoad <- mkSizedFIFO(interpolator_reqfifoLoad_size); |
FIFO#(InterpolatorWT) reqfifoWork1 <- mkSizedFIFO(interpolator_reqfifoWork_size); |
Reg#(Maybe#(InterpolatorWT)) reqregWork2 <- mkReg(Invalid); |
FIFO#(Vector#(4,Bit#(8))) outfifo <- mkFIFO; |
Reg#(Bool) endOfFrameFlag <- mkReg(False); |
FIFO#(InterpolatorLoadReq) memReqQ <- mkFIFO; |
FIFO#(InterpolatorLoadResp) memRespQ <- mkSizedFIFO(interpolator_memRespQ_size); |
|
Reg#(Bit#(PicWidthSz)) picWidth <- mkReg(maxPicWidthInMB); |
Reg#(Bit#(PicHeightSz)) picHeight <- mkReg(0); |
|
RFile1#(Bit#(6),Vector#(4,Bit#(15))) workFile <- mkRFile1Full(); |
RFile1#(Bit#(6),Vector#(4,Bit#(8))) storeFile <- mkRFile1Full(); |
Reg#(Bit#(1)) workFileFlag <- mkReg(0); |
RFile1#(Bit#(4),Vector#(4,Bit#(8))) resultFile <- mkRFile1Full(); |
|
Reg#(Bit#(1)) loadStage <- mkReg(0); |
Reg#(Bit#(2)) loadHorNum <- mkReg(0); |
Reg#(Bit#(4)) loadVerNum <- mkReg(0); |
|
Reg#(Bit#(2)) work1MbPart <- mkReg(0);//only for Chroma |
Reg#(Bit#(2)) work1SubMbPart <- mkReg(0);//only for Chroma |
Reg#(Bit#(1)) work1Stage <- mkReg(0); |
Reg#(Bit#(2)) work1HorNum <- mkReg(0); |
Reg#(Bit#(4)) work1VerNum <- mkReg(0); |
Reg#(Vector#(20,Bit#(8))) work1Vector8 <- mkRegU; |
Reg#(Bool) work1Done <- mkReg(False); |
|
Reg#(Bit#(2)) work2SubMbPart <- mkReg(0); |
Reg#(Bit#(2)) work2HorNum <- mkReg(0); |
Reg#(Bit#(4)) work2VerNum <- mkReg(0); |
Reg#(Vector#(20,Bit#(8))) work2Vector8 <- mkRegU; |
Reg#(Vector#(20,Bit#(15))) work2Vector15 <- mkRegU; |
Reg#(Vector#(16,Bit#(1))) resultReady <- mkReg(replicate(0)); |
Reg#(Bool) work2Done <- mkReg(False); |
Reg#(Bool) work8x8Done <- mkReg(False); |
|
Reg#(Bit#(2)) outBlockNum <- mkReg(0); |
Reg#(Bit#(2)) outPixelNum <- mkReg(0); |
Reg#(Bool) outDone <- mkReg(False); |
|
|
rule sendEndOfFrameReq( endOfFrameFlag ); |
endOfFrameFlag <= False; |
memReqQ.enq(IPLoadEndFrame); |
endrule |
|
|
rule loadLuma( reqfifoLoad.first() matches tagged IPLuma .reqdata &&& !endOfFrameFlag ); |
Bit#(2) xfracl = reqdata.mvhor[1:0]; |
Bit#(2) yfracl = reqdata.mvver[1:0]; |
Bit#(2) offset = reqdata.mvhor[3:2]; |
Bool twoStage = (xfracl==1||xfracl==3) && (yfracl==1||yfracl==3); |
Bool horInter = (twoStage ? loadStage==1 : xfracl!=0); |
Bool verInter = (twoStage ? loadStage==0 : yfracl!=0); |
Bit#(2) offset2 = reqdata.mvhor[3:2] + ((twoStage&&verInter&&xfracl==3) ? 1 : 0); |
Bit#(1) horOut = 0; |
Bit#(TAdd#(PicWidthSz,2)) horAddr; |
Bit#(TAdd#(PicHeightSz,4)) verAddr; |
Bit#(TAdd#(PicWidthSz,12)) horTemp = zeroExtend({reqdata.hor,2'b00}) + zeroExtend({loadHorNum,2'b00}) + (xfracl==3&&(yfracl==1||yfracl==3)&&loadStage==0 ? 1 : 0); |
Bit#(TAdd#(PicHeightSz,10)) verTemp = zeroExtend(reqdata.ver) + zeroExtend(loadVerNum) + (yfracl==3&&(xfracl==1||xfracl==3)&&loadStage==1 ? 1 : 0); |
Bit#(13) mvhortemp = signExtend(reqdata.mvhor[13:2])-(horInter?2:0); |
Bit#(11) mvvertemp = signExtend(reqdata.mvver[11:2])-(verInter?2:0); |
if(mvhortemp[12]==1 && zeroExtend(0-mvhortemp)>horTemp) |
begin |
horAddr = 0; |
horOut = 1; |
end |
else |
begin |
horTemp = horTemp + signExtend(mvhortemp); |
if(horTemp>=zeroExtend({picWidth,4'b0000})) |
begin |
horAddr = {picWidth-1,2'b11}; |
horOut = 1; |
end |
else |
horAddr = truncate(horTemp>>2); |
end |
if(mvvertemp[10]==1 && zeroExtend(0-mvvertemp)>verTemp) |
verAddr = 0; |
else |
begin |
verTemp = verTemp + signExtend(mvvertemp); |
if(verTemp>=zeroExtend({picHeight,4'b0000})) |
verAddr = {picHeight-1,4'b1111}; |
else |
verAddr = truncate(verTemp); |
end |
memReqQ.enq(IPLoadLuma {refIdx:reqdata.refIdx,horOutOfBounds:horOut,hor:horAddr,ver:verAddr}); |
Bool verFirst = twoStage || (yfracl==2&&(xfracl==1||xfracl==3)); |
Bit#(2) loadHorNumMax = (reqdata.bt==IP8x8||reqdata.bt==IP8x4 ? 1 : 0) + (horInter ? 2 : (offset2==0 ? 0 : 1)); |
Bit#(4) loadVerNumMax = (reqdata.bt==IP8x8||reqdata.bt==IP4x8 ? 7 : 3) + (verInter ? 5 : 0); |
if(verFirst) |
begin |
if(loadVerNum < loadVerNumMax) |
loadVerNum <= loadVerNum+1; |
else |
begin |
loadVerNum <= 0; |
if(loadHorNum < loadHorNumMax) |
begin |
if(loadStage == 1) |
begin |
offset = offset + (xfracl==3 ? 1 : 0); |
if(!(offset==1 || (xfracl==3 && offset==2))) |
loadHorNum <= loadHorNumMax; |
else |
begin |
loadHorNum <= 0; |
loadStage <= 0; |
reqfifoLoad.deq(); |
end |
end |
else |
loadHorNum <= loadHorNum+1; |
end |
else |
begin |
if(twoStage && loadStage==0) |
begin |
offset = offset + (xfracl==3 ? 1 : 0); |
if((xfracl==3 ? offset<3 : offset<2)) |
loadHorNum <= 0; |
else |
loadHorNum <= loadHorNumMax+1; |
loadStage <= 1; |
end |
else |
begin |
loadHorNum <= 0; |
loadStage <= 0; |
reqfifoLoad.deq(); |
end |
end |
end |
end |
else |
begin |
if(loadHorNum < loadHorNumMax) |
loadHorNum <= loadHorNum+1; |
else |
begin |
loadHorNum <= 0; |
if(loadVerNum < loadVerNumMax) |
loadVerNum <= loadVerNum+1; |
else |
begin |
loadVerNum <= 0; |
reqfifoLoad.deq(); |
end |
end |
end |
if(reqdata.bt==IP16x16 || reqdata.bt==IP16x8 || reqdata.bt==IP8x16) |
$display( "ERROR Interpolation: loadLuma block sizes > 8x8 not supported"); |
$display( "Trace interpolator: loadLuma %h %h %h %h %h %h %h", xfracl, yfracl, loadHorNum, loadVerNum, reqdata.refIdx, horAddr, verAddr); |
endrule |
|
|
rule loadChroma( reqfifoLoad.first() matches tagged IPChroma .reqdata &&& !endOfFrameFlag ); |
Bit#(3) xfracc = reqdata.mvhor[2:0]; |
Bit#(3) yfracc = reqdata.mvver[2:0]; |
Bit#(2) offset = reqdata.mvhor[4:3]+{reqdata.hor[0],1'b0}; |
Bit#(1) horOut = 0; |
Bit#(TAdd#(PicWidthSz,1)) horAddr; |
Bit#(TAdd#(PicHeightSz,3)) verAddr; |
Bit#(TAdd#(PicWidthSz,11)) horTemp = zeroExtend({reqdata.hor,1'b0}) + zeroExtend({loadHorNum,2'b00}); |
Bit#(TAdd#(PicHeightSz,9)) verTemp = zeroExtend(reqdata.ver) + zeroExtend(loadVerNum); |
if(reqdata.mvhor[13]==1 && zeroExtend(0-reqdata.mvhor[13:3])>horTemp) |
begin |
horAddr = 0; |
horOut = 1; |
end |
else |
begin |
horTemp = horTemp + signExtend(reqdata.mvhor[13:3]); |
if(horTemp>=zeroExtend({picWidth,3'b000})) |
begin |
horAddr = {picWidth-1,1'b1}; |
horOut = 1; |
end |
else |
horAddr = truncate(horTemp>>2); |
end |
if(reqdata.mvver[11]==1 && zeroExtend(0-reqdata.mvver[11:3])>verTemp) |
verAddr = 0; |
else |
begin |
verTemp = verTemp + signExtend(reqdata.mvver[11:3]); |
if(verTemp>=zeroExtend({picHeight,3'b000})) |
verAddr = {picHeight-1,3'b111}; |
else |
verAddr = truncate(verTemp); |
end |
memReqQ.enq(IPLoadChroma {refIdx:reqdata.refIdx,uv:reqdata.uv,horOutOfBounds:horOut,hor:horAddr,ver:verAddr}); |
Bit#(2) loadHorNumMax = (reqdata.bt==IP4x8||reqdata.bt==IP4x4 ? (offset[1]==0||(xfracc==0&&offset!=3) ? 0 : 1) : ((reqdata.bt==IP16x16||reqdata.bt==IP16x8 ? 1 : 0) + (xfracc==0&&offset==0 ? 0 : 1))); |
Bit#(4) loadVerNumMax = (reqdata.bt==IP16x16||reqdata.bt==IP8x16 ? 7 : (reqdata.bt==IP16x8||reqdata.bt==IP8x8||reqdata.bt==IP4x8 ? 3 : 1)) + (yfracc==0 ? 0 : 1); |
if(loadHorNum < loadHorNumMax) |
loadHorNum <= loadHorNum+1; |
else |
begin |
loadHorNum <= 0; |
if(loadVerNum < loadVerNumMax) |
loadVerNum <= loadVerNum+1; |
else |
begin |
loadVerNum <= 0; |
reqfifoLoad.deq(); |
end |
end |
$display( "Trace interpolator: loadChroma %h %h %h %h %h %h %h", xfracc, yfracc, loadHorNum, loadVerNum, reqdata.refIdx, horAddr, verAddr); |
endrule |
|
|
rule work1Luma ( reqfifoWork1.first() matches tagged IPWLuma .reqdata &&& !work1Done ); |
let xfracl = reqdata.xFracL; |
let yfracl = reqdata.yFracL; |
let offset = reqdata.offset; |
let blockT = reqdata.bt; |
Bool twoStage = (xfracl==1||xfracl==3) && (yfracl==1||yfracl==3); |
Vector#(20,Bit#(8)) work1Vector8Next = work1Vector8; |
if(memRespQ.first() matches tagged IPLoadResp .tempreaddata) |
begin |
memRespQ.deq(); |
Vector#(4,Bit#(8)) readdata = replicate(0); |
readdata[0] = tempreaddata[7:0]; |
readdata[1] = tempreaddata[15:8]; |
readdata[2] = tempreaddata[23:16]; |
readdata[3] = tempreaddata[31:24]; |
//$display( "Trace interpolator: workLuma stage 0 readdata %h %h %h %h %h %h", workHorNum, workVerNum, readdata[3], readdata[2], readdata[1], readdata[0] ); |
Vector#(4,Bit#(8)) tempResult8 = replicate(0); |
Vector#(4,Bit#(15)) tempResult15 = replicate(0); |
if(xfracl==0 || yfracl==0 || xfracl==2) |
begin |
if(xfracl==0)//reorder |
begin |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
Bit#(2) offsetplusii = offset+fromInteger(ii); |
if(offset <= 3-fromInteger(ii) && offset!=0) |
tempResult8[ii] = work1Vector8[offsetplusii]; |
else |
tempResult8[ii] = readdata[offsetplusii]; |
work1Vector8Next[ii] = readdata[ii]; |
end |
for(Integer ii=0; ii<4; ii=ii+1) |
tempResult15[ii] = zeroExtend({tempResult8[ii],5'b00000}); |
end |
else//horizontal interpolation |
begin |
offset = offset-2; |
for(Integer ii=0; ii<8; ii=ii+1) |
work1Vector8Next[ii] = work1Vector8[ii+4]; |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
Bit#(4) tempIndex = fromInteger(ii) + 8 - zeroExtend(offset); |
work1Vector8Next[tempIndex] = readdata[ii]; |
end |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
tempResult15[ii] = interpolate8to15(work1Vector8Next[ii],work1Vector8Next[ii+1],work1Vector8Next[ii+2],work1Vector8Next[ii+3],work1Vector8Next[ii+4],work1Vector8Next[ii+5]); |
tempResult8[ii] = clip1y10to8(truncate((tempResult15[ii]+16)>>5)); |
if(xfracl == 1) |
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,work1Vector8Next[ii+2]} + 1) >> 1); |
else if(xfracl == 3) |
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,work1Vector8Next[ii+3]} + 1) >> 1); |
end |
end |
Bit#(2) workHorNumOffset = (xfracl!=0 ? 2 : (reqdata.offset==0 ? 0 : 1)); |
if(work1HorNum >= workHorNumOffset) |
begin |
Bit#(1) horAddr = truncate(work1HorNum-workHorNumOffset); |
if(yfracl == 0) |
begin |
for(Integer ii=0; ii<4; ii=ii+1) |
tempResult15[ii] = zeroExtend({tempResult8[ii],5'b00000}); |
end |
workFile.upd({workFileFlag,work1VerNum,horAddr},tempResult15); |
end |
Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + workHorNumOffset; |
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3) + (yfracl!=0 ? 5 : 0); |
if(work1HorNum < workHorNumMax) |
work1HorNum <= work1HorNum+1; |
else |
begin |
work1HorNum <= 0; |
if(work1VerNum < workVerNumMax) |
work1VerNum <= work1VerNum+1; |
else |
begin |
work1VerNum <= 0; |
work1Done <= True; |
end |
end |
end |
else if(work1Stage == 0)//vertical interpolation |
begin |
offset = offset + (xfracl==3&&(yfracl==1||yfracl==3) ? 1 : 0); |
for(Integer ii=0; ii<4; ii=ii+1) |
tempResult15[ii] = interpolate8to15(work1Vector8[ii],work1Vector8[ii+4],work1Vector8[ii+8],work1Vector8[ii+12],work1Vector8[ii+16],readdata[ii]); |
for(Integer ii=0; ii<16; ii=ii+1) |
work1Vector8Next[ii] = work1Vector8[ii+4]; |
for(Integer ii=0; ii<4; ii=ii+1) |
work1Vector8Next[ii+16] = readdata[ii]; |
Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + (yfracl==2 ? 2 : (offset==0 ? 0 : 1)); |
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3) + 5; |
Bit#(2) horAddr = work1HorNum; |
Bit#(3) verAddr = truncate(work1VerNum-5); |
if(work1VerNum > 4) |
begin |
workFile.upd({workFileFlag,verAddr,horAddr},tempResult15); |
//$display( "Trace interpolator: workLuma stage 0 result %h %h %h %h %h %h %h", workHorNum, workVerNum, {verAddr,horAddr}, tempResult15[3], tempResult15[2], tempResult15[1], tempResult15[0]); |
end |
if(twoStage) |
begin |
Bit#(2) storeHorAddr = work1HorNum; |
Bit#(4) storeVerAddr = work1VerNum; |
if((xfracl==3 ? offset<3 : offset<2)) |
storeHorAddr = storeHorAddr+1; |
if(yfracl==3) |
storeVerAddr = storeVerAddr-3; |
else |
storeVerAddr = storeVerAddr-2; |
if(storeVerAddr < 8) |
storeFile.upd({workFileFlag,storeVerAddr[2:0],storeHorAddr},readdata); |
end |
if(work1VerNum < workVerNumMax) |
work1VerNum <= work1VerNum+1; |
else |
begin |
work1VerNum <= 0; |
if(work1HorNum < workHorNumMax) |
work1HorNum <= work1HorNum+1; |
else |
begin |
if(twoStage) |
begin |
work1Stage <= 1; |
if((xfracl==3 ? offset<3 : offset<2)) |
work1HorNum <= 0; |
else |
work1HorNum <= workHorNumMax+1; |
end |
else |
begin |
work1HorNum <= 0; |
work1Done <= True; |
end |
end |
end |
end |
else//second stage of twoStage |
begin |
storeFile.upd({workFileFlag,work1VerNum[2:0],work1HorNum},readdata); |
Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + 2; |
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3); |
if(work1VerNum < workVerNumMax) |
work1VerNum <= work1VerNum+1; |
else |
begin |
work1VerNum <= 0; |
offset = offset + (xfracl==3 ? 1 : 0); |
if(work1HorNum<workHorNumMax && !(offset==1 || (xfracl==3 && offset==2))) |
work1HorNum <= workHorNumMax; |
else |
begin |
work1HorNum <= 0; |
work1Stage <= 0; |
work1Done <= True; |
end |
end |
end |
end |
work1Vector8 <= work1Vector8Next; |
$display( "Trace interpolator: work1Luma %h %h %h %h %h %h", xfracl, yfracl, work1HorNum, work1VerNum, offset, work1Stage); |
endrule |
|
|
rule work2Luma ( reqregWork2 matches tagged Valid .vdata &&& vdata matches tagged IPWLuma .reqdata &&& !work2Done &&& !work8x8Done ); |
let xfracl = reqdata.xFracL; |
let yfracl = reqdata.yFracL; |
let offset = reqdata.offset; |
let blockT = reqdata.bt; |
Vector#(20,Bit#(8)) work2Vector8Next = work2Vector8; |
Vector#(20,Bit#(15)) work2Vector15Next = work2Vector15; |
Vector#(16,Bit#(1)) resultReadyNext = resultReady; |
Vector#(4,Bit#(8)) tempResult8 = replicate(0); |
Vector#(4,Bit#(15)) readdata = replicate(0); |
if(yfracl==0) |
begin |
readdata = workFile.sub({(1-workFileFlag),1'b0,work2VerNum[1],work2HorNum,work2VerNum[0]}); |
for(Integer ii=0; ii<4; ii=ii+1) |
tempResult8[ii] = (readdata[ii])[12:5]; |
resultFile.upd({work2VerNum[1],work2HorNum,work2VerNum[0]},tempResult8); |
resultReadyNext[{work2VerNum[1],work2HorNum,work2VerNum[0]}] = 1; |
work2HorNum <= work2HorNum+1; |
if(work2HorNum == 3) |
begin |
if(work2VerNum == 3) |
begin |
work2VerNum <= 0; |
work2Done <= True; |
if(((blockT==IP4x8 || blockT==IP8x4) && work2SubMbPart==0) || (blockT==IP4x4 && work2SubMbPart<3)) |
work2SubMbPart <= work2SubMbPart+1; |
else |
begin |
work2SubMbPart <= 0; |
work8x8Done <= True; |
end |
end |
else |
work2VerNum <= work2VerNum+1; |
end |
end |
else if(xfracl==0 || xfracl==2)//vertical interpolation |
begin |
readdata = workFile.sub({(1-workFileFlag),work2VerNum,work2HorNum[0]}); |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
tempResult8[ii] = interpolate15to8(work2Vector15[ii],work2Vector15[ii+4],work2Vector15[ii+8],work2Vector15[ii+12],work2Vector15[ii+16],readdata[ii]); |
if(yfracl == 1) |
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((work2Vector15[ii+8]+16)>>5))} + 1) >> 1); |
else if(yfracl == 3) |
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((work2Vector15[ii+12]+16)>>5))} + 1) >> 1); |
end |
for(Integer ii=0; ii<16; ii=ii+1) |
work2Vector15Next[ii] = work2Vector15[ii+4]; |
for(Integer ii=0; ii<4; ii=ii+1) |
work2Vector15Next[ii+16] = readdata[ii]; |
Bit#(2) workHorNumMax = 1; |
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3) + 5; |
if(work2VerNum > 4) |
begin |
Bit#(1) horAddr = truncate(work2HorNum); |
Bit#(3) verAddr = truncate(work2VerNum-5); |
horAddr = horAddr + ((blockT==IP4x8&&work2SubMbPart==1)||(blockT==IP4x4&&work2SubMbPart[0]==1) ? 1 : 0); |
verAddr = verAddr + ((blockT==IP8x4&&work2SubMbPart==1)||(blockT==IP4x4&&work2SubMbPart[1]==1) ? 4 : 0); |
resultFile.upd({verAddr,horAddr},tempResult8); |
resultReadyNext[{verAddr,horAddr}] = 1; |
end |
if(work2VerNum < workVerNumMax) |
work2VerNum <= work2VerNum+1; |
else |
begin |
work2VerNum <= 0; |
if(work2HorNum < workHorNumMax) |
work2HorNum <= work2HorNum+1; |
else |
begin |
work2HorNum <= 0; |
work2Done <= True; |
if(((blockT==IP4x8 || blockT==IP8x4) && work2SubMbPart==0) || (blockT==IP4x4 && work2SubMbPart<3)) |
work2SubMbPart <= work2SubMbPart+1; |
else |
begin |
work2SubMbPart <= 0; |
work8x8Done <= True; |
end |
end |
end |
end |
else//horizontal interpolation |
begin |
offset = offset-2; |
if(yfracl == 2) |
begin |
readdata = workFile.sub({(1-workFileFlag),work2VerNum[2:0],work2HorNum}); |
for(Integer ii=0; ii<8; ii=ii+1) |
work2Vector15Next[ii] = work2Vector15[ii+4]; |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
Bit#(4) tempIndex = fromInteger(ii) + 8 - zeroExtend(offset); |
work2Vector15Next[tempIndex] = readdata[ii]; |
end |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
tempResult8[ii] = interpolate15to8(work2Vector15Next[ii],work2Vector15Next[ii+1],work2Vector15Next[ii+2],work2Vector15Next[ii+3],work2Vector15Next[ii+4],work2Vector15Next[ii+5]); |
if(xfracl == 1) |
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((work2Vector15Next[ii+2]+16)>>5))} + 1) >> 1); |
else if(xfracl == 3) |
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((work2Vector15Next[ii+3]+16)>>5))} + 1) >> 1); |
end |
end |
else |
begin |
Vector#(4,Bit#(8)) readdata8 = storeFile.sub({(1-workFileFlag),work2VerNum[2:0],work2HorNum}); |
for(Integer ii=0; ii<8; ii=ii+1) |
work2Vector8Next[ii] = work2Vector8[ii+4]; |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
Bit#(4) tempIndex = fromInteger(ii) + 8 - zeroExtend(offset); |
work2Vector8Next[tempIndex] = readdata8[ii]; |
end |
Vector#(4,Bit#(15)) tempResult15 = replicate(0); |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
tempResult15[ii] = interpolate8to15(work2Vector8Next[ii],work2Vector8Next[ii+1],work2Vector8Next[ii+2],work2Vector8Next[ii+3],work2Vector8Next[ii+4],work2Vector8Next[ii+5]); |
tempResult8[ii] = clip1y10to8(truncate((tempResult15[ii]+16)>>5)); |
end |
Bit#(2) verOffset; |
Vector#(4,Bit#(15)) verResult15 = replicate(0); |
if(xfracl == 1) |
verOffset = reqdata.offset; |
else |
verOffset = reqdata.offset+1; |
readdata = workFile.sub({(1-workFileFlag),work2VerNum[2:0],(work2HorNum-2+(verOffset==0?0:1))}); |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
Bit#(2) offsetplusii = verOffset+fromInteger(ii); |
if(verOffset <= 3-fromInteger(ii) && verOffset!=0) |
verResult15[ii] = work2Vector15[offsetplusii]; |
else |
verResult15[ii] = readdata[offsetplusii]; |
work2Vector15Next[ii] = readdata[ii]; |
end |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
Bit#(9) tempVal = zeroExtend(clip1y10to8(truncate((verResult15[ii]+16)>>5))); |
tempResult8[ii] = truncate((tempVal+zeroExtend(tempResult8[ii])+1)>>1); |
end |
end |
if(work2HorNum >= 2) |
begin |
Bit#(1) horAddr = truncate(work2HorNum-2); |
Bit#(3) verAddr = truncate(work2VerNum); |
horAddr = horAddr + ((blockT==IP4x8&&work2SubMbPart==1)||(blockT==IP4x4&&work2SubMbPart[0]==1) ? 1 : 0); |
verAddr = verAddr + ((blockT==IP8x4&&work2SubMbPart==1)||(blockT==IP4x4&&work2SubMbPart[1]==1) ? 4 : 0); |
resultFile.upd({verAddr,horAddr},tempResult8); |
resultReadyNext[{verAddr,horAddr}] = 1; |
//$display( "Trace interpolator: workLuma stage 1 result %h %h %h %h %h %h %h %h", workHorNum, workVerNum, {verAddr,horAddr}, tempResult8[3], tempResult8[2], tempResult8[1], tempResult8[0], pack(resultReadyNext)); |
end |
Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + 2; |
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3); |
if(work2HorNum < workHorNumMax) |
work2HorNum <= work2HorNum+1; |
else |
begin |
work2HorNum <= 0; |
if(work2VerNum < workVerNumMax) |
work2VerNum <= work2VerNum+1; |
else |
begin |
work2VerNum <= 0; |
work2Done <= True; |
if(((blockT==IP4x8 || blockT==IP8x4) && work2SubMbPart==0) || (blockT==IP4x4 && work2SubMbPart<3)) |
work2SubMbPart <= work2SubMbPart+1; |
else |
begin |
work2SubMbPart <= 0; |
work8x8Done <= True; |
end |
end |
end |
end |
work2Vector8 <= work2Vector8Next; |
work2Vector15 <= work2Vector15Next; |
resultReady <= resultReadyNext; |
$display( "Trace interpolator: work2Luma %h %h %h %h %h", xfracl, yfracl, work2HorNum, work2VerNum, offset); |
endrule |
|
|
rule work1Chroma ( reqfifoWork1.first() matches tagged IPWChroma .reqdata &&& !work1Done ); |
Bit#(4) xfracc = zeroExtend(reqdata.xFracC); |
Bit#(4) yfracc = zeroExtend(reqdata.yFracC); |
let offset = reqdata.offset; |
let blockT = reqdata.bt; |
Vector#(20,Bit#(8)) work1Vector8Next = work1Vector8; |
if(memRespQ.first() matches tagged IPLoadResp .tempreaddata) |
begin |
memRespQ.deq(); |
Vector#(4,Bit#(8)) readdata = replicate(0); |
readdata[0] = tempreaddata[7:0]; |
readdata[1] = tempreaddata[15:8]; |
readdata[2] = tempreaddata[23:16]; |
readdata[3] = tempreaddata[31:24]; |
Vector#(5,Bit#(8)) tempWork8 = replicate(0); |
Vector#(5,Bit#(8)) tempPrev8 = replicate(0); |
Vector#(4,Bit#(8)) tempResult8 = replicate(0); |
Bool resultReadyFlag = False; |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
Bit#(2) offsetplusii = offset+fromInteger(ii); |
if(offset <= 3-fromInteger(ii) && !((blockT==IP4x8||blockT==IP4x4)&&(offset[1]==0||(xfracc==0&&offset!=3))) && !(xfracc==0&&offset==0)) |
tempWork8[ii] = work1Vector8[offsetplusii]; |
else |
tempWork8[ii] = readdata[offsetplusii]; |
work1Vector8Next[ii] = readdata[ii]; |
end |
tempWork8[4] = readdata[offset]; |
if((blockT==IP16x8 || blockT==IP16x16) && work1HorNum==(xfracc==0&&offset==0 ? 1 : 2)) |
begin |
for(Integer ii=0; ii<5; ii=ii+1) |
begin |
tempPrev8[ii] = work1Vector8[ii+9]; |
work1Vector8Next[ii+9] = tempWork8[ii]; |
end |
end |
else |
begin |
for(Integer ii=0; ii<5; ii=ii+1) |
tempPrev8[ii] = work1Vector8[ii+4]; |
if(work1HorNum==(xfracc==0&&offset==0 ? 0 : 1) || ((blockT==IP4x8||blockT==IP4x4)&&(offset[1]==0||(xfracc==0&&offset!=3)))) |
begin |
for(Integer ii=0; ii<5; ii=ii+1) |
work1Vector8Next[ii+4] = tempWork8[ii]; |
end |
end |
if(yfracc==0) |
begin |
for(Integer ii=0; ii<5; ii=ii+1) |
tempPrev8[ii] = tempWork8[ii]; |
end |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
Bit#(14) tempVal = zeroExtend((8-xfracc))*zeroExtend((8-yfracc))*zeroExtend(tempPrev8[ii]); |
tempVal = tempVal + zeroExtend(xfracc)*zeroExtend((8-yfracc))*zeroExtend(tempPrev8[ii+1]); |
tempVal = tempVal + zeroExtend((8-xfracc))*zeroExtend(yfracc)*zeroExtend(tempWork8[ii]); |
tempVal = tempVal + zeroExtend(xfracc)*zeroExtend(yfracc)*zeroExtend(tempWork8[ii+1]); |
tempResult8[ii] = truncate((tempVal+32)>>6); |
end |
if(work1VerNum > 0 || yfracc==0) |
begin |
if(blockT==IP4x8 || blockT==IP4x4) |
begin |
Bit#(5) tempIndex = 10 + zeroExtend(work1VerNum<<1); |
work1Vector8Next[tempIndex] = tempResult8[0]; |
work1Vector8Next[tempIndex+1] = tempResult8[1]; |
tempResult8[2] = tempResult8[0]; |
tempResult8[3] = tempResult8[1]; |
tempResult8[0] = work1Vector8[tempIndex]; |
tempResult8[1] = work1Vector8[tempIndex+1]; |
if((work1HorNum>0 || offset[1]==0) && work1SubMbPart[0]==1) |
resultReadyFlag = True; |
end |
else |
begin |
if(work1HorNum>0 || (xfracc==0 && offset==0)) |
resultReadyFlag = True; |
end |
end |
if(resultReadyFlag) |
begin |
Bit#(1) horAddr = ((blockT==IP4x8 || blockT==IP4x4) ? 0 : truncate(((xfracc==0 && offset==0) ? work1HorNum : work1HorNum-1))); |
Bit#(3) verAddr = truncate((yfracc==0 ? work1VerNum : work1VerNum-1)); |
horAddr = horAddr + ((blockT==IP16x8||blockT==IP16x16) ? 0 : work1MbPart[0]); |
verAddr = verAddr + ((blockT==IP8x16||blockT==IP16x16) ? 0 : ((blockT==IP16x8) ? {work1MbPart[0],2'b00} : {work1MbPart[1],2'b00})); |
verAddr = verAddr + ((blockT==IP8x4&&work1SubMbPart==1)||(blockT==IP4x4&&work1SubMbPart[1]==1) ? 2 : 0); |
storeFile.upd({workFileFlag,1'b0,verAddr,horAddr},tempResult8); |
end |
Bit#(2) workHorNumMax = (blockT==IP4x8||blockT==IP4x4 ? (offset[1]==0||(xfracc==0&&offset!=3) ? 0 : 1) : ((blockT==IP16x16||blockT==IP16x8 ? 1 : 0) + (xfracc==0&&offset==0 ? 0 : 1))); |
Bit#(4) workVerNumMax = (blockT==IP16x16||blockT==IP8x16 ? 7 : (blockT==IP16x8||blockT==IP8x8||blockT==IP4x8 ? 3 : 1)) + (yfracc==0 ? 0 : 1); |
if(work1HorNum < workHorNumMax) |
work1HorNum <= work1HorNum+1; |
else |
begin |
work1HorNum <= 0; |
if(work1VerNum < workVerNumMax) |
work1VerNum <= work1VerNum+1; |
else |
begin |
Bool allDone = False; |
work1VerNum <= 0; |
if(((blockT==IP4x8 || blockT==IP8x4) && work1SubMbPart==0) || (blockT==IP4x4 && work1SubMbPart<3)) |
work1SubMbPart <= work1SubMbPart+1; |
else |
begin |
work1SubMbPart <= 0; |
if(((blockT==IP16x8 || blockT==IP8x16) && work1MbPart==0) || (!(blockT==IP16x8 || blockT==IP8x16 || blockT==IP16x16) && work1MbPart<3)) |
work1MbPart <= work1MbPart+1; |
else |
begin |
work1MbPart <= 0; |
work1Done <= True; |
allDone = True; |
end |
end |
if(!allDone) |
reqfifoWork1.deq(); |
end |
end |
end |
work1Vector8 <= work1Vector8Next; |
$display( "Trace interpolator: work1Chroma %h %h %h %h %h", xfracc, yfracc, work1HorNum, work1VerNum, offset); |
endrule |
|
|
rule work2Chroma ( reqregWork2 matches tagged Valid .vdata &&& vdata matches tagged IPWChroma .reqdata &&& !work2Done &&& !work8x8Done ); |
Vector#(16,Bit#(1)) resultReadyNext = resultReady; |
resultFile.upd({work2VerNum[1],work2HorNum,work2VerNum[0]},storeFile.sub({(1-workFileFlag),1'b0,work2VerNum[1],work2HorNum,work2VerNum[0]})); |
resultReadyNext[{work2VerNum[1],work2HorNum,work2VerNum[0]}] = 1; |
work2HorNum <= work2HorNum+1; |
if(work2HorNum == 3) |
begin |
if(work2VerNum == 3) |
begin |
work2VerNum <= 0; |
work2Done <= True; |
work8x8Done <= True; |
end |
else |
work2VerNum <= work2VerNum+1; |
end |
resultReady <= resultReadyNext; |
$display( "Trace interpolator: work2Chroma %h %h", work2HorNum, work2VerNum); |
endrule |
|
|
rule outputing( !outDone && resultReady[{outBlockNum[1],outPixelNum,outBlockNum[0]}]==1 ); |
outfifo.enq(resultFile.sub({outBlockNum[1],outPixelNum,outBlockNum[0]})); |
outPixelNum <= outPixelNum+1; |
if(outPixelNum == 3) |
begin |
outBlockNum <= outBlockNum+1; |
if(outBlockNum == 3) |
outDone <= True; |
end |
$display( "Trace interpolator: outputing %h %h", outBlockNum, outPixelNum); |
endrule |
|
|
rule switching( work1Done && (work2Done || reqregWork2==Invalid) && !work8x8Done); |
work1Done <= False; |
work2Done <= False; |
reqregWork2 <= (Valid reqfifoWork1.first()); |
workFileFlag <= 1-workFileFlag; |
reqfifoWork1.deq(); |
$display( "Trace interpolator: switching %h %h", outBlockNum, outPixelNum); |
endrule |
|
|
rule switching8x8( work1Done && (work2Done || reqregWork2==Invalid) && work8x8Done && outDone); |
outDone <= False; |
work8x8Done <= False; |
resultReady <= replicate(0); |
work1Done <= False; |
work2Done <= False; |
reqregWork2 <= (Valid reqfifoWork1.first()); |
workFileFlag <= 1-workFileFlag; |
reqfifoWork1.deq(); |
$display( "Trace interpolator: switching8x8 %h %h", outBlockNum, outPixelNum); |
endrule |
|
|
|
method Action setPicWidth( Bit#(PicWidthSz) newPicWidth ); |
picWidth <= newPicWidth; |
endmethod |
|
method Action setPicHeight( Bit#(PicHeightSz) newPicHeight ); |
picHeight <= newPicHeight; |
endmethod |
|
method Action request( InterpolatorIT inputdata ); |
reqfifoLoad.enq(inputdata); |
if(inputdata matches tagged IPLuma .indata) |
reqfifoWork1.enq(IPWLuma {xFracL:indata.mvhor[1:0],yFracL:indata.mvver[1:0],offset:indata.mvhor[3:2],bt:indata.bt}); |
else if(inputdata matches tagged IPChroma .indata) |
reqfifoWork1.enq(IPWChroma {xFracC:indata.mvhor[2:0],yFracC:indata.mvver[2:0],offset:indata.mvhor[4:3]+{indata.hor[0],1'b0},bt:indata.bt}); |
endmethod |
|
method Vector#(4,Bit#(8)) first(); |
return outfifo.first(); |
endmethod |
|
method Action deq(); |
outfifo.deq(); |
endmethod |
|
method Action endOfFrame(); |
endOfFrameFlag <= True; |
endmethod |
|
interface Client mem_client; |
interface Get request = fifoToGet(memReqQ); |
interface Put response = fifoToPut(memRespQ); |
endinterface |
|
|
endmodule |
|
|
endpackage |
/trunk/src/mkCalc_nC.bsv
0,0 → 1,313
//********************************************************************** |
// nC Calculator implementation |
//---------------------------------------------------------------------- |
// |
// |
|
package mkCalc_nC; |
|
import H264Types::*; |
import ICalc_nC::*; |
import FIFO::*; |
|
import Connectable::*; |
import GetPut::*; |
import ClientServer::*; |
|
|
(* synthesize *) |
module mkCalc_nC( Calc_nC ); |
|
Reg#(Bit#(PicWidthSz)) picWidth <- mkReg(maxPicWidthInMB); |
Reg#(Bit#(PicAreaSz)) firstMb <- mkReg(0); |
Reg#(Bit#(PicAreaSz)) currMb <- mkReg(0); |
Reg#(Bit#(PicAreaSz)) currMbHor <- mkReg(0);//horizontal position of currMb |
Reg#(Bit#(1)) waiting <- mkReg(0); |
Reg#(Bit#(1)) reqCount <- mkReg(0); |
Reg#(Bit#(2)) respCount <- mkReg(0); |
Reg#(Bit#(1)) ipcmCount <- mkReg(0); |
Reg#(Bit#(PicAreaSz)) pskipCount <- mkReg(0); |
Reg#(Bit#(20)) leftVal <- mkReg(0); |
Reg#(Bit#(20)) topVal <- mkReg(0); |
Reg#(Bit#(10)) leftValChroma0 <- mkReg(0); |
Reg#(Bit#(10)) topValChroma0 <- mkReg(0); |
Reg#(Bit#(10)) leftValChroma1 <- mkReg(0); |
Reg#(Bit#(10)) topValChroma1 <- mkReg(0); |
FIFO#(MemReq#(TAdd#(PicWidthSz,1),20)) memReqQ <- mkFIFO; |
FIFO#(MemResp#(20)) memRespQ <- mkFIFO; |
Bit#(1) bit1 = 1; |
Bit#(1) bit0 = 0; |
|
rule currMbHorUpdate( !(currMbHor<zeroExtend(picWidth)) ); |
Bit#(PicAreaSz) temp = zeroExtend(picWidth); |
if((currMbHor >> 3) >= temp) |
currMbHor <= currMbHor - (temp << 3); |
else |
currMbHor <= currMbHor - temp; |
endrule |
|
rule sendReq ( waiting == 1 && reqCount > 0 ); |
Bit#(PicWidthSz) temp2 = truncate(currMbHor); |
Bit#(TAdd#(PicWidthSz,1)) temp = {bit1,temp2}; |
memReqQ.enq(LoadReq temp ); |
reqCount <= reqCount-1; |
endrule |
|
rule receiveResp ( waiting == 1 &&& respCount > 0 &&& memRespQ.first() matches tagged LoadResp .data ); |
if( respCount == 2 ) |
topVal <= data; |
else |
begin |
topValChroma0 <= data[9:0]; |
topValChroma1 <= data[19:10]; |
waiting <= 0; |
end |
memRespQ.deq(); |
respCount <= respCount - 1; |
endrule |
|
rule ipcmReq ( waiting == 1 && ipcmCount > 0 ); |
currMb <= currMb+1; |
currMbHor <= currMbHor+1; |
Bit#(PicWidthSz) temp2 = truncate(currMbHor); |
Bit#(TAdd#(PicWidthSz,1)) temp = {bit1,temp2}; |
memReqQ.enq(StoreReq {addr:temp,data:20'b10000100001000010000} ); |
ipcmCount <= 0; |
waiting <= 0; |
endrule |
|
rule pskipReq ( waiting == 1 && pskipCount > 0 && currMbHor<zeroExtend(picWidth) ); |
if(pskipCount[0] == 1) |
begin |
currMb <= currMb+1; |
currMbHor <= currMbHor+1; |
Bit#(PicWidthSz) temp2 = truncate(currMbHor); |
Bit#(TAdd#(PicWidthSz,1)) temp = {bit1,temp2}; |
memReqQ.enq(StoreReq {addr:temp,data:20'b00000000000000000000} ); |
if(pskipCount == 1) |
waiting <= 0; |
end |
else |
begin |
Bit#(PicWidthSz) temp2 = truncate(currMbHor); |
Bit#(TAdd#(PicWidthSz,1)) temp = {bit0,temp2}; |
memReqQ.enq(StoreReq {addr:temp,data:20'b00000000000000000000} ); |
end |
pskipCount <= pskipCount - 1; |
endrule |
|
method Action initialize_picWidth( Bit#(PicWidthSz) picWidthInMb ) if( waiting == 0 && currMbHor<zeroExtend(picWidth) ); |
picWidth <= picWidthInMb; |
endmethod |
|
method Action initialize( Bit#(PicAreaSz) firstMbAddr ) if( waiting == 0 && currMbHor<zeroExtend(picWidth) ); |
firstMb <= firstMbAddr; |
currMb <= firstMbAddr; |
currMbHor <= firstMbAddr; |
leftVal <= 0; |
leftValChroma0 <= 0; |
leftValChroma1 <= 0; |
endmethod |
|
method Action loadMb( Bit#(PicAreaSz) mbAddr ) if( waiting == 0 && currMbHor<zeroExtend(picWidth) ); |
if( mbAddr != currMb ) |
$display( "ERROR EntropyDec: mkCalc_nC loadMb wrong mbAddr" ); |
else |
begin |
if( currMbHor == 0 || currMb == firstMb) |
begin |
leftVal <= 20'b11111111111111111111; |
leftValChroma0 <= 10'b1111111111; |
leftValChroma1 <= 10'b1111111111; |
end |
if( currMb-firstMb < zeroExtend(picWidth) ) |
begin |
topVal <= 20'b11111111111111111111; |
topValChroma0 <= 10'b1111111111; |
topValChroma1 <= 10'b1111111111; |
end |
else |
begin |
waiting <= 1; |
reqCount <= 1; |
respCount <= 2; |
Bit#(PicWidthSz) temp2 = truncate(currMbHor); |
Bit#(TAdd#(PicWidthSz,1)) temp = {bit0,temp2}; |
memReqQ.enq(LoadReq temp ); |
//$display( "ERROR EntropyDec: mkCalc_nC loadMb incomplete" ); |
end |
end |
endmethod |
|
method Bit#(5) nCcalc_luma( Bit#(4) microBlockNum ) if( waiting == 0 && currMbHor<zeroExtend(picWidth) ); |
Bit#(6) templeft = 0; |
Bit#(6) temptop = 0; |
if(microBlockNum[3]==0 && microBlockNum[1]==0) |
templeft = zeroExtend(leftVal[4:0]); |
else if(microBlockNum[3]==0 && microBlockNum[1]==1) |
templeft = zeroExtend(leftVal[9:5]); |
else if(microBlockNum[3]==1 && microBlockNum[1]==0) |
templeft = zeroExtend(leftVal[14:10]); |
else |
templeft = zeroExtend(leftVal[19:15]); |
if(microBlockNum[2]==0 && microBlockNum[0]==0) |
temptop = zeroExtend(topVal[4:0]); |
else if(microBlockNum[2]==0 && microBlockNum[0]==1) |
temptop = zeroExtend(topVal[9:5]); |
else if(microBlockNum[2]==1 && microBlockNum[0]==0) |
temptop = zeroExtend(topVal[14:10]); |
else |
temptop = zeroExtend(topVal[19:15]); |
if(temptop!=6'b011111 && templeft!=6'b011111) |
return truncate((temptop+templeft+1) >> 1); |
else if(templeft!=6'b011111) |
return truncate(templeft); |
else if(temptop!=6'b011111) |
return truncate(temptop); |
else |
return 0; |
endmethod |
|
method Bit#(5) nCcalc_chroma( Bit#(3) microBlockNum ) if( waiting == 0 && currMbHor<zeroExtend(picWidth) ); |
Bit#(6) templeft = 0; |
Bit#(6) temptop = 0; |
if(microBlockNum[2]==0) |
begin |
if(microBlockNum[1]==0) |
templeft = zeroExtend(leftValChroma0[4:0]); |
else |
templeft = zeroExtend(leftValChroma0[9:5]); |
if(microBlockNum[0]==0) |
temptop = zeroExtend(topValChroma0[4:0]); |
else |
temptop = zeroExtend(topValChroma0[9:5]); |
end |
else |
begin |
if(microBlockNum[1]==0) |
templeft = zeroExtend(leftValChroma1[4:0]); |
else |
templeft = zeroExtend(leftValChroma1[9:5]); |
if(microBlockNum[0]==0) |
temptop = zeroExtend(topValChroma1[4:0]); |
else |
temptop = zeroExtend(topValChroma1[9:5]); |
end |
if(temptop!=6'b011111 && templeft!=6'b011111) |
return truncate((temptop+templeft+1) >> 1); |
else if(templeft!=6'b011111) |
return truncate(templeft); |
else if(temptop!=6'b011111) |
return truncate(temptop); |
else |
return 0; |
endmethod |
|
method Action nNupdate_luma( Bit#(4) microBlockNum, Bit#(5) totalCoeff ) if( waiting == 0 && currMbHor<zeroExtend(picWidth) ); |
Bit#(20) topValTemp = topVal; |
if(microBlockNum[3]==0 && microBlockNum[1]==0) |
leftVal <= {leftVal[19:5] , totalCoeff}; |
else if(microBlockNum[3]==0 && microBlockNum[1]==1) |
leftVal <= {{leftVal[19:10] , totalCoeff} , leftVal[4:0]}; |
else if(microBlockNum[3]==1 && microBlockNum[1]==0) |
leftVal <= {{leftVal[19:15] , totalCoeff} , leftVal[9:0]}; |
else |
leftVal <= {totalCoeff , leftVal[14:0]}; |
if(microBlockNum[2]==0 && microBlockNum[0]==0) |
topValTemp = {topVal[19:5] , totalCoeff}; |
else if(microBlockNum[2]==0 && microBlockNum[0]==1) |
topValTemp = {{topVal[19:10] , totalCoeff} , topVal[4:0]}; |
else if(microBlockNum[2]==1 && microBlockNum[0]==0) |
topValTemp = {{topVal[19:15] , totalCoeff} , topVal[9:0]}; |
else |
topValTemp = {totalCoeff , topVal[14:0]}; |
topVal <= topValTemp; |
if(microBlockNum == 15) |
begin |
Bit#(PicWidthSz) temp2 = truncate(currMbHor); |
Bit#(TAdd#(PicWidthSz,1)) temp = {bit0,temp2}; |
memReqQ.enq(StoreReq {addr:temp,data:topValTemp} ); |
end |
//$display( "TRACE nNupdate_luma old leftVal %b", leftVal ); |
//$display( "TRACE nNupdate_luma old topVal %b", topVal ); |
//$display( "TRACE nNupdate_luma microBlockNum %0d", microBlockNum ); |
//$display( "TRACE nNupdate_luma totalCoeff %0d", totalCoeff ); |
endmethod |
|
method Action nNupdate_chroma( Bit#(3) microBlockNum, Bit#(5) totalCoeff ) if( waiting == 0 && currMbHor<zeroExtend(picWidth) ); |
Bit#(10) topValChroma0Temp = topValChroma0; |
Bit#(10) topValChroma1Temp = topValChroma1; |
if(microBlockNum[2]==0) |
begin |
if(microBlockNum[1]==0) |
leftValChroma0 <= {leftValChroma0[9:5] , totalCoeff}; |
else |
leftValChroma0 <= {totalCoeff , leftValChroma0[4:0]}; |
if(microBlockNum[0]==0) |
topValChroma0Temp = {topValChroma0[9:5] , totalCoeff}; |
else |
topValChroma0Temp = {totalCoeff , topValChroma0[4:0]}; |
end |
else |
begin |
if(microBlockNum[1]==0) |
leftValChroma1 <= {leftValChroma1[9:5] , totalCoeff}; |
else |
leftValChroma1 <= {totalCoeff , leftValChroma1[4:0]}; |
if(microBlockNum[0]==0) |
topValChroma1Temp = {topValChroma1[9:5] , totalCoeff}; |
else |
topValChroma1Temp = {totalCoeff , topValChroma1[4:0]}; |
end |
topValChroma0 <= topValChroma0Temp; |
topValChroma1 <= topValChroma1Temp; |
if(microBlockNum == 7) |
begin |
currMb <= currMb+1; |
currMbHor <= currMbHor+1; |
Bit#(PicWidthSz) temp2 = truncate(currMbHor); |
Bit#(TAdd#(PicWidthSz,1)) temp = {bit1,temp2}; |
memReqQ.enq(StoreReq {addr:temp,data:{topValChroma1Temp,topValChroma0Temp}} ); |
end |
endmethod |
|
method Action nNupdate_pskip( Bit#(PicAreaSz) inmb_skip_run ) if( waiting == 0 && currMbHor<zeroExtend(picWidth) ); |
//$display( "TRACE nNupdate_pskip mb_skip_run = %0d", inmb_skip_run ); |
|
if(inmb_skip_run > 0) |
begin |
waiting <= 1; |
pskipCount <= (inmb_skip_run << 1)-1; |
Bit#(PicWidthSz) temp2 = truncate(currMbHor); |
Bit#(TAdd#(PicWidthSz,1)) temp = {bit0,temp2}; |
memReqQ.enq(StoreReq {addr:temp,data:20'b00000000000000000000} ); |
leftVal <= 0; |
leftValChroma0 <= 10'b0000000000; |
leftValChroma1 <= 10'b0000000000; |
end |
endmethod |
|
method Action nNupdate_ipcm() if( waiting == 0 && currMbHor<zeroExtend(picWidth) ); |
leftVal <= 20'b10000100001000010000; |
leftValChroma0 <= 10'b1000010000; |
leftValChroma1 <= 10'b1000010000; |
//$display( "TRACE nNupdate_ipcm"); |
|
waiting <= 1; |
ipcmCount <= 1; |
Bit#(PicWidthSz) temp2 = truncate(currMbHor); |
Bit#(TAdd#(PicWidthSz,1)) temp = {bit0,temp2}; |
memReqQ.enq(StoreReq {addr:temp,data:20'b10000100001000010000} ); |
endmethod |
|
interface Client mem_client; |
interface Get request = fifoToGet(memReqQ); |
interface Put response = fifoToPut(memRespQ); |
endinterface |
|
|
endmodule |
|
|
|
endpackage |
/trunk/src/mkInterpolator_4stage_16ready.bsv
0,0 → 1,843
//********************************************************************** |
// interpolator implementation |
//---------------------------------------------------------------------- |
// |
// |
|
package mkInterpolator; |
|
import H264Types::*; |
import IInterpolator::*; |
import FIFO::*; |
import Vector::*; |
|
import Connectable::*; |
import GetPut::*; |
import ClientServer::*; |
|
|
//----------------------------------------------------------- |
// Local Datatypes |
//----------------------------------------------------------- |
|
typedef union tagged |
{ |
struct { Bit#(2) xFracL; Bit#(2) yFracL; Bit#(2) offset; IPBlockType bt; } IPWLuma; |
struct { Bit#(3) xFracC; Bit#(3) yFracC; Bit#(2) offset; IPBlockType bt; } IPWChroma; |
} |
InterpolatorWT deriving(Eq,Bits); |
|
|
//----------------------------------------------------------- |
// Helper functions |
|
function Bit#(8) clip1y10to8( Bit#(10) innum ); |
if(innum[9] == 1) |
return 0; |
else if(innum[8] == 1) |
return 255; |
else |
return truncate(innum); |
endfunction |
|
function Bit#(15) interpolate8to15( Bit#(8) in0, Bit#(8) in1, Bit#(8) in2, Bit#(8) in3, Bit#(8) in4, Bit#(8) in5 ); |
return zeroExtend(in0) - 5*zeroExtend(in1) + 20*zeroExtend(in2) + 20*zeroExtend(in3) - 5*zeroExtend(in4) + zeroExtend(in5); |
endfunction |
|
function Bit#(8) interpolate15to8( Bit#(15) in0, Bit#(15) in1, Bit#(15) in2, Bit#(15) in3, Bit#(15) in4, Bit#(15) in5 ); |
Bit#(20) temp = signExtend(in0) - 5*signExtend(in1) + 20*signExtend(in2) + 20*signExtend(in3) - 5*signExtend(in4) + signExtend(in5) + 512; |
return clip1y10to8(truncate(temp>>10)); |
endfunction |
|
|
|
//----------------------------------------------------------- |
// Interpolation Module |
//----------------------------------------------------------- |
|
|
(* synthesize *) |
module mkInterpolator( Interpolator ); |
|
FIFO#(InterpolatorIT) reqfifoLoad <- mkSizedFIFO(interpolator_reqfifoLoad_size); |
FIFO#(InterpolatorWT) reqfifoWork1 <- mkSizedFIFO(interpolator_reqfifoWork_size); |
Reg#(Maybe#(InterpolatorWT)) reqregWork2 <- mkReg(Invalid); |
FIFO#(Vector#(4,Bit#(8))) outfifo <- mkFIFO; |
Reg#(Bool) endOfFrameFlag <- mkReg(False); |
FIFO#(InterpolatorLoadReq) memReqQ <- mkFIFO; |
FIFO#(InterpolatorLoadResp) memRespQ <- mkSizedFIFO(interpolator_memRespQ_size); |
|
Reg#(Bit#(PicWidthSz)) picWidth <- mkReg(maxPicWidthInMB); |
Reg#(Bit#(PicHeightSz)) picHeight <- mkReg(0); |
|
RFile1#(Bit#(6),Vector#(4,Bit#(15))) workFile <- mkRFile1Full(); |
RFile1#(Bit#(6),Vector#(4,Bit#(8))) storeFile <- mkRFile1Full(); |
Reg#(Bit#(1)) workFileFlag <- mkReg(0); |
RFile1#(Bit#(4),Vector#(4,Bit#(8))) resultFile <- mkRFile1Full(); |
|
Reg#(Bit#(1)) loadStage <- mkReg(0); |
Reg#(Bit#(2)) loadHorNum <- mkReg(0); |
Reg#(Bit#(4)) loadVerNum <- mkReg(0); |
|
Reg#(Bit#(2)) work1MbPart <- mkReg(0);//only for Chroma |
Reg#(Bit#(2)) work1SubMbPart <- mkReg(0);//only for Chroma |
Reg#(Bit#(1)) work1Stage <- mkReg(0); |
Reg#(Bit#(2)) work1HorNum <- mkReg(0); |
Reg#(Bit#(4)) work1VerNum <- mkReg(0); |
Reg#(Vector#(20,Bit#(8))) work1Vector8 <- mkRegU; |
Reg#(Bool) work1Done <- mkReg(False); |
|
Reg#(Bit#(2)) work2SubMbPart <- mkReg(0); |
Reg#(Bit#(2)) work2HorNum <- mkReg(0); |
Reg#(Bit#(4)) work2VerNum <- mkReg(0); |
Reg#(Vector#(20,Bit#(8))) work2Vector8 <- mkRegU; |
Reg#(Vector#(20,Bit#(15))) work2Vector15 <- mkRegU; |
Reg#(Vector#(16,Bit#(1))) resultReady <- mkReg(replicate(0)); |
Reg#(Bool) work2Done <- mkReg(False); |
Reg#(Bool) work8x8Done <- mkReg(False); |
|
Reg#(Bit#(2)) outBlockNum <- mkReg(0); |
Reg#(Bit#(2)) outPixelNum <- mkReg(0); |
Reg#(Bool) outDone <- mkReg(False); |
|
|
rule sendEndOfFrameReq( endOfFrameFlag ); |
endOfFrameFlag <= False; |
memReqQ.enq(IPLoadEndFrame); |
endrule |
|
|
rule loadLuma( reqfifoLoad.first() matches tagged IPLuma .reqdata &&& !endOfFrameFlag ); |
Bit#(2) xfracl = reqdata.mvhor[1:0]; |
Bit#(2) yfracl = reqdata.mvver[1:0]; |
Bit#(2) offset = reqdata.mvhor[3:2]; |
Bool twoStage = (xfracl==1||xfracl==3) && (yfracl==1||yfracl==3); |
Bool horInter = (twoStage ? loadStage==1 : xfracl!=0); |
Bool verInter = (twoStage ? loadStage==0 : yfracl!=0); |
Bit#(2) offset2 = reqdata.mvhor[3:2] + ((twoStage&&verInter&&xfracl==3) ? 1 : 0); |
Bit#(1) horOut = 0; |
Bit#(TAdd#(PicWidthSz,2)) horAddr; |
Bit#(TAdd#(PicHeightSz,4)) verAddr; |
Bit#(TAdd#(PicWidthSz,12)) horTemp = zeroExtend({reqdata.hor,2'b00}) + zeroExtend({loadHorNum,2'b00}) + (xfracl==3&&(yfracl==1||yfracl==3)&&loadStage==0 ? 1 : 0); |
Bit#(TAdd#(PicHeightSz,10)) verTemp = zeroExtend(reqdata.ver) + zeroExtend(loadVerNum) + (yfracl==3&&(xfracl==1||xfracl==3)&&loadStage==1 ? 1 : 0); |
Bit#(13) mvhortemp = signExtend(reqdata.mvhor[13:2])-(horInter?2:0); |
Bit#(11) mvvertemp = signExtend(reqdata.mvver[11:2])-(verInter?2:0); |
if(mvhortemp[12]==1 && zeroExtend(0-mvhortemp)>horTemp) |
begin |
horAddr = 0; |
horOut = 1; |
end |
else |
begin |
horTemp = horTemp + signExtend(mvhortemp); |
if(horTemp>=zeroExtend({picWidth,4'b0000})) |
begin |
horAddr = {picWidth-1,2'b11}; |
horOut = 1; |
end |
else |
horAddr = truncate(horTemp>>2); |
end |
if(mvvertemp[10]==1 && zeroExtend(0-mvvertemp)>verTemp) |
verAddr = 0; |
else |
begin |
verTemp = verTemp + signExtend(mvvertemp); |
if(verTemp>=zeroExtend({picHeight,4'b0000})) |
verAddr = {picHeight-1,4'b1111}; |
else |
verAddr = truncate(verTemp); |
end |
memReqQ.enq(IPLoadLuma {refIdx:reqdata.refIdx,horOutOfBounds:horOut,hor:horAddr,ver:verAddr}); |
Bool verFirst = twoStage || (yfracl==2&&(xfracl==1||xfracl==3)); |
Bit#(2) loadHorNumMax = (reqdata.bt==IP8x8||reqdata.bt==IP8x4 ? 1 : 0) + (horInter ? 2 : (offset2==0 ? 0 : 1)); |
Bit#(4) loadVerNumMax = (reqdata.bt==IP8x8||reqdata.bt==IP4x8 ? 7 : 3) + (verInter ? 5 : 0); |
if(verFirst) |
begin |
if(loadVerNum < loadVerNumMax) |
loadVerNum <= loadVerNum+1; |
else |
begin |
loadVerNum <= 0; |
if(loadHorNum < loadHorNumMax) |
begin |
if(loadStage == 1) |
begin |
offset = offset + (xfracl==3 ? 1 : 0); |
if(!(offset==1 || (xfracl==3 && offset==2))) |
loadHorNum <= loadHorNumMax; |
else |
begin |
loadHorNum <= 0; |
loadStage <= 0; |
reqfifoLoad.deq(); |
end |
end |
else |
loadHorNum <= loadHorNum+1; |
end |
else |
begin |
if(twoStage && loadStage==0) |
begin |
offset = offset + (xfracl==3 ? 1 : 0); |
if((xfracl==3 ? offset<3 : offset<2)) |
loadHorNum <= 0; |
else |
loadHorNum <= loadHorNumMax+1; |
loadStage <= 1; |
end |
else |
begin |
loadHorNum <= 0; |
loadStage <= 0; |
reqfifoLoad.deq(); |
end |
end |
end |
end |
else |
begin |
if(loadHorNum < loadHorNumMax) |
loadHorNum <= loadHorNum+1; |
else |
begin |
loadHorNum <= 0; |
if(loadVerNum < loadVerNumMax) |
loadVerNum <= loadVerNum+1; |
else |
begin |
loadVerNum <= 0; |
reqfifoLoad.deq(); |
end |
end |
end |
if(reqdata.bt==IP16x16 || reqdata.bt==IP16x8 || reqdata.bt==IP8x16) |
$display( "ERROR Interpolation: loadLuma block sizes > 8x8 not supported"); |
$display( "Trace interpolator: loadLuma %h %h %h %h %h %h %h", xfracl, yfracl, loadHorNum, loadVerNum, reqdata.refIdx, horAddr, verAddr); |
endrule |
|
|
rule loadChroma( reqfifoLoad.first() matches tagged IPChroma .reqdata &&& !endOfFrameFlag ); |
Bit#(3) xfracc = reqdata.mvhor[2:0]; |
Bit#(3) yfracc = reqdata.mvver[2:0]; |
Bit#(2) offset = reqdata.mvhor[4:3]+{reqdata.hor[0],1'b0}; |
Bit#(1) horOut = 0; |
Bit#(TAdd#(PicWidthSz,1)) horAddr; |
Bit#(TAdd#(PicHeightSz,3)) verAddr; |
Bit#(TAdd#(PicWidthSz,11)) horTemp = zeroExtend({reqdata.hor,1'b0}) + zeroExtend({loadHorNum,2'b00}); |
Bit#(TAdd#(PicHeightSz,9)) verTemp = zeroExtend(reqdata.ver) + zeroExtend(loadVerNum); |
if(reqdata.mvhor[13]==1 && zeroExtend(0-reqdata.mvhor[13:3])>horTemp) |
begin |
horAddr = 0; |
horOut = 1; |
end |
else |
begin |
horTemp = horTemp + signExtend(reqdata.mvhor[13:3]); |
if(horTemp>=zeroExtend({picWidth,3'b000})) |
begin |
horAddr = {picWidth-1,1'b1}; |
horOut = 1; |
end |
else |
horAddr = truncate(horTemp>>2); |
end |
if(reqdata.mvver[11]==1 && zeroExtend(0-reqdata.mvver[11:3])>verTemp) |
verAddr = 0; |
else |
begin |
verTemp = verTemp + signExtend(reqdata.mvver[11:3]); |
if(verTemp>=zeroExtend({picHeight,3'b000})) |
verAddr = {picHeight-1,3'b111}; |
else |
verAddr = truncate(verTemp); |
end |
memReqQ.enq(IPLoadChroma {refIdx:reqdata.refIdx,uv:reqdata.uv,horOutOfBounds:horOut,hor:horAddr,ver:verAddr}); |
Bit#(2) loadHorNumMax = (reqdata.bt==IP4x8||reqdata.bt==IP4x4 ? (offset[1]==0||(xfracc==0&&offset!=3) ? 0 : 1) : ((reqdata.bt==IP16x16||reqdata.bt==IP16x8 ? 1 : 0) + (xfracc==0&&offset==0 ? 0 : 1))); |
Bit#(4) loadVerNumMax = (reqdata.bt==IP16x16||reqdata.bt==IP8x16 ? 7 : (reqdata.bt==IP16x8||reqdata.bt==IP8x8||reqdata.bt==IP4x8 ? 3 : 1)) + (yfracc==0 ? 0 : 1); |
if(loadHorNum < loadHorNumMax) |
loadHorNum <= loadHorNum+1; |
else |
begin |
loadHorNum <= 0; |
if(loadVerNum < loadVerNumMax) |
loadVerNum <= loadVerNum+1; |
else |
begin |
loadVerNum <= 0; |
reqfifoLoad.deq(); |
end |
end |
$display( "Trace interpolator: loadChroma %h %h %h %h %h %h %h", xfracc, yfracc, loadHorNum, loadVerNum, reqdata.refIdx, horAddr, verAddr); |
endrule |
|
|
rule work1Luma ( reqfifoWork1.first() matches tagged IPWLuma .reqdata &&& !work1Done ); |
let xfracl = reqdata.xFracL; |
let yfracl = reqdata.yFracL; |
let offset = reqdata.offset; |
let blockT = reqdata.bt; |
Bool twoStage = (xfracl==1||xfracl==3) && (yfracl==1||yfracl==3); |
Vector#(20,Bit#(8)) work1Vector8Next = work1Vector8; |
if(memRespQ.first() matches tagged IPLoadResp .tempreaddata) |
begin |
memRespQ.deq(); |
Vector#(4,Bit#(8)) readdata = replicate(0); |
readdata[0] = tempreaddata[7:0]; |
readdata[1] = tempreaddata[15:8]; |
readdata[2] = tempreaddata[23:16]; |
readdata[3] = tempreaddata[31:24]; |
//$display( "Trace interpolator: workLuma stage 0 readdata %h %h %h %h %h %h", workHorNum, workVerNum, readdata[3], readdata[2], readdata[1], readdata[0] ); |
Vector#(4,Bit#(8)) tempResult8 = replicate(0); |
Vector#(4,Bit#(15)) tempResult15 = replicate(0); |
if(xfracl==0 || yfracl==0 || xfracl==2) |
begin |
if(xfracl==0)//reorder |
begin |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
Bit#(2) offsetplusii = offset+fromInteger(ii); |
if(offset <= 3-fromInteger(ii) && offset!=0) |
tempResult8[ii] = work1Vector8[offsetplusii]; |
else |
tempResult8[ii] = readdata[offsetplusii]; |
work1Vector8Next[ii] = readdata[ii]; |
end |
for(Integer ii=0; ii<4; ii=ii+1) |
tempResult15[ii] = zeroExtend({tempResult8[ii],5'b00000}); |
end |
else//horizontal interpolation |
begin |
offset = offset-2; |
for(Integer ii=0; ii<8; ii=ii+1) |
work1Vector8Next[ii] = work1Vector8[ii+4]; |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
Bit#(4) tempIndex = fromInteger(ii) + 8 - zeroExtend(offset); |
work1Vector8Next[tempIndex] = readdata[ii]; |
end |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
tempResult15[ii] = interpolate8to15(work1Vector8Next[ii],work1Vector8Next[ii+1],work1Vector8Next[ii+2],work1Vector8Next[ii+3],work1Vector8Next[ii+4],work1Vector8Next[ii+5]); |
tempResult8[ii] = clip1y10to8(truncate((tempResult15[ii]+16)>>5)); |
if(xfracl == 1) |
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,work1Vector8Next[ii+2]} + 1) >> 1); |
else if(xfracl == 3) |
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,work1Vector8Next[ii+3]} + 1) >> 1); |
end |
end |
Bit#(2) workHorNumOffset = (xfracl!=0 ? 2 : (reqdata.offset==0 ? 0 : 1)); |
if(work1HorNum >= workHorNumOffset) |
begin |
Bit#(1) horAddr = truncate(work1HorNum-workHorNumOffset); |
if(yfracl == 0) |
begin |
for(Integer ii=0; ii<4; ii=ii+1) |
tempResult15[ii] = zeroExtend({tempResult8[ii],5'b00000}); |
end |
workFile.upd({workFileFlag,work1VerNum,horAddr},tempResult15); |
end |
Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + workHorNumOffset; |
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3) + (yfracl!=0 ? 5 : 0); |
if(work1HorNum < workHorNumMax) |
work1HorNum <= work1HorNum+1; |
else |
begin |
work1HorNum <= 0; |
if(work1VerNum < workVerNumMax) |
work1VerNum <= work1VerNum+1; |
else |
begin |
work1VerNum <= 0; |
work1Done <= True; |
end |
end |
end |
else if(work1Stage == 0)//vertical interpolation |
begin |
offset = offset + (xfracl==3&&(yfracl==1||yfracl==3) ? 1 : 0); |
for(Integer ii=0; ii<4; ii=ii+1) |
tempResult15[ii] = interpolate8to15(work1Vector8[ii],work1Vector8[ii+4],work1Vector8[ii+8],work1Vector8[ii+12],work1Vector8[ii+16],readdata[ii]); |
for(Integer ii=0; ii<16; ii=ii+1) |
work1Vector8Next[ii] = work1Vector8[ii+4]; |
for(Integer ii=0; ii<4; ii=ii+1) |
work1Vector8Next[ii+16] = readdata[ii]; |
Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + (yfracl==2 ? 2 : (offset==0 ? 0 : 1)); |
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3) + 5; |
Bit#(2) horAddr = work1HorNum; |
Bit#(3) verAddr = truncate(work1VerNum-5); |
if(work1VerNum > 4) |
begin |
workFile.upd({workFileFlag,verAddr,horAddr},tempResult15); |
//$display( "Trace interpolator: workLuma stage 0 result %h %h %h %h %h %h %h", workHorNum, workVerNum, {verAddr,horAddr}, tempResult15[3], tempResult15[2], tempResult15[1], tempResult15[0]); |
end |
if(twoStage) |
begin |
Bit#(2) storeHorAddr = work1HorNum; |
Bit#(4) storeVerAddr = work1VerNum; |
if((xfracl==3 ? offset<3 : offset<2)) |
storeHorAddr = storeHorAddr+1; |
if(yfracl==3) |
storeVerAddr = storeVerAddr-3; |
else |
storeVerAddr = storeVerAddr-2; |
if(storeVerAddr < 8) |
storeFile.upd({workFileFlag,storeVerAddr[2:0],storeHorAddr},readdata); |
end |
if(work1VerNum < workVerNumMax) |
work1VerNum <= work1VerNum+1; |
else |
begin |
work1VerNum <= 0; |
if(work1HorNum < workHorNumMax) |
work1HorNum <= work1HorNum+1; |
else |
begin |
if(twoStage) |
begin |
work1Stage <= 1; |
if((xfracl==3 ? offset<3 : offset<2)) |
work1HorNum <= 0; |
else |
work1HorNum <= workHorNumMax+1; |
end |
else |
begin |
work1HorNum <= 0; |
work1Done <= True; |
end |
end |
end |
end |
else//second stage of twoStage |
begin |
storeFile.upd({workFileFlag,work1VerNum[2:0],work1HorNum},readdata); |
Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + 2; |
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3); |
if(work1VerNum < workVerNumMax) |
work1VerNum <= work1VerNum+1; |
else |
begin |
work1VerNum <= 0; |
offset = offset + (xfracl==3 ? 1 : 0); |
if(work1HorNum<workHorNumMax && !(offset==1 || (xfracl==3 && offset==2))) |
work1HorNum <= workHorNumMax; |
else |
begin |
work1HorNum <= 0; |
work1Stage <= 0; |
work1Done <= True; |
end |
end |
end |
end |
work1Vector8 <= work1Vector8Next; |
$display( "Trace interpolator: work1Luma %h %h %h %h %h %h", xfracl, yfracl, work1HorNum, work1VerNum, offset, work1Stage); |
endrule |
|
|
rule work2Luma ( reqregWork2 matches tagged Valid .vdata &&& vdata matches tagged IPWLuma .reqdata &&& !work2Done &&& !work8x8Done ); |
let xfracl = reqdata.xFracL; |
let yfracl = reqdata.yFracL; |
let offset = reqdata.offset; |
let blockT = reqdata.bt; |
Vector#(20,Bit#(8)) work2Vector8Next = work2Vector8; |
Vector#(20,Bit#(15)) work2Vector15Next = work2Vector15; |
Vector#(16,Bit#(1)) resultReadyNext = resultReady; |
Vector#(4,Bit#(8)) tempResult8 = replicate(0); |
Vector#(4,Bit#(15)) readdata = replicate(0); |
if(yfracl==0) |
begin |
readdata = workFile.sub({(1-workFileFlag),1'b0,work2VerNum[1],work2HorNum,work2VerNum[0]}); |
for(Integer ii=0; ii<4; ii=ii+1) |
tempResult8[ii] = (readdata[ii])[12:5]; |
resultFile.upd({work2VerNum[1],work2HorNum,work2VerNum[0]},tempResult8); |
resultReadyNext[{work2VerNum[1],work2HorNum,work2VerNum[0]}] = 1; |
work2HorNum <= work2HorNum+1; |
if(work2HorNum == 3) |
begin |
if(work2VerNum == 3) |
begin |
work2VerNum <= 0; |
work2Done <= True; |
if(((blockT==IP4x8 || blockT==IP8x4) && work2SubMbPart==0) || (blockT==IP4x4 && work2SubMbPart<3)) |
work2SubMbPart <= work2SubMbPart+1; |
else |
begin |
work2SubMbPart <= 0; |
work8x8Done <= True; |
end |
end |
else |
work2VerNum <= work2VerNum+1; |
end |
end |
else if(xfracl==0 || xfracl==2)//vertical interpolation |
begin |
readdata = workFile.sub({(1-workFileFlag),work2VerNum,work2HorNum[0]}); |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
tempResult8[ii] = interpolate15to8(work2Vector15[ii],work2Vector15[ii+4],work2Vector15[ii+8],work2Vector15[ii+12],work2Vector15[ii+16],readdata[ii]); |
if(yfracl == 1) |
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((work2Vector15[ii+8]+16)>>5))} + 1) >> 1); |
else if(yfracl == 3) |
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((work2Vector15[ii+12]+16)>>5))} + 1) >> 1); |
end |
for(Integer ii=0; ii<16; ii=ii+1) |
work2Vector15Next[ii] = work2Vector15[ii+4]; |
for(Integer ii=0; ii<4; ii=ii+1) |
work2Vector15Next[ii+16] = readdata[ii]; |
Bit#(2) workHorNumMax = 1; |
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3) + 5; |
if(work2VerNum > 4) |
begin |
Bit#(1) horAddr = truncate(work2HorNum); |
Bit#(3) verAddr = truncate(work2VerNum-5); |
horAddr = horAddr + ((blockT==IP4x8&&work2SubMbPart==1)||(blockT==IP4x4&&work2SubMbPart[0]==1) ? 1 : 0); |
verAddr = verAddr + ((blockT==IP8x4&&work2SubMbPart==1)||(blockT==IP4x4&&work2SubMbPart[1]==1) ? 4 : 0); |
resultFile.upd({verAddr,horAddr},tempResult8); |
resultReadyNext[{verAddr,horAddr}] = 1; |
end |
if(work2VerNum < workVerNumMax) |
work2VerNum <= work2VerNum+1; |
else |
begin |
work2VerNum <= 0; |
if(work2HorNum < workHorNumMax) |
work2HorNum <= work2HorNum+1; |
else |
begin |
work2HorNum <= 0; |
work2Done <= True; |
if(((blockT==IP4x8 || blockT==IP8x4) && work2SubMbPart==0) || (blockT==IP4x4 && work2SubMbPart<3)) |
work2SubMbPart <= work2SubMbPart+1; |
else |
begin |
work2SubMbPart <= 0; |
work8x8Done <= True; |
end |
end |
end |
end |
else//horizontal interpolation |
begin |
offset = offset-2; |
if(yfracl == 2) |
begin |
readdata = workFile.sub({(1-workFileFlag),work2VerNum[2:0],work2HorNum}); |
for(Integer ii=0; ii<8; ii=ii+1) |
work2Vector15Next[ii] = work2Vector15[ii+4]; |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
Bit#(4) tempIndex = fromInteger(ii) + 8 - zeroExtend(offset); |
work2Vector15Next[tempIndex] = readdata[ii]; |
end |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
tempResult8[ii] = interpolate15to8(work2Vector15Next[ii],work2Vector15Next[ii+1],work2Vector15Next[ii+2],work2Vector15Next[ii+3],work2Vector15Next[ii+4],work2Vector15Next[ii+5]); |
if(xfracl == 1) |
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((work2Vector15Next[ii+2]+16)>>5))} + 1) >> 1); |
else if(xfracl == 3) |
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((work2Vector15Next[ii+3]+16)>>5))} + 1) >> 1); |
end |
end |
else |
begin |
Vector#(4,Bit#(8)) readdata8 = storeFile.sub({(1-workFileFlag),work2VerNum[2:0],work2HorNum}); |
for(Integer ii=0; ii<8; ii=ii+1) |
work2Vector8Next[ii] = work2Vector8[ii+4]; |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
Bit#(4) tempIndex = fromInteger(ii) + 8 - zeroExtend(offset); |
work2Vector8Next[tempIndex] = readdata8[ii]; |
end |
Vector#(4,Bit#(15)) tempResult15 = replicate(0); |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
tempResult15[ii] = interpolate8to15(work2Vector8Next[ii],work2Vector8Next[ii+1],work2Vector8Next[ii+2],work2Vector8Next[ii+3],work2Vector8Next[ii+4],work2Vector8Next[ii+5]); |
tempResult8[ii] = clip1y10to8(truncate((tempResult15[ii]+16)>>5)); |
end |
Bit#(2) verOffset; |
Vector#(4,Bit#(15)) verResult15 = replicate(0); |
if(xfracl == 1) |
verOffset = reqdata.offset; |
else |
verOffset = reqdata.offset+1; |
readdata = workFile.sub({(1-workFileFlag),work2VerNum[2:0],(work2HorNum-2+(verOffset==0?0:1))}); |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
Bit#(2) offsetplusii = verOffset+fromInteger(ii); |
if(verOffset <= 3-fromInteger(ii) && verOffset!=0) |
verResult15[ii] = work2Vector15[offsetplusii]; |
else |
verResult15[ii] = readdata[offsetplusii]; |
work2Vector15Next[ii] = readdata[ii]; |
end |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
Bit#(9) tempVal = zeroExtend(clip1y10to8(truncate((verResult15[ii]+16)>>5))); |
tempResult8[ii] = truncate((tempVal+zeroExtend(tempResult8[ii])+1)>>1); |
end |
end |
if(work2HorNum >= 2) |
begin |
Bit#(1) horAddr = truncate(work2HorNum-2); |
Bit#(3) verAddr = truncate(work2VerNum); |
horAddr = horAddr + ((blockT==IP4x8&&work2SubMbPart==1)||(blockT==IP4x4&&work2SubMbPart[0]==1) ? 1 : 0); |
verAddr = verAddr + ((blockT==IP8x4&&work2SubMbPart==1)||(blockT==IP4x4&&work2SubMbPart[1]==1) ? 4 : 0); |
resultFile.upd({verAddr,horAddr},tempResult8); |
resultReadyNext[{verAddr,horAddr}] = 1; |
//$display( "Trace interpolator: workLuma stage 1 result %h %h %h %h %h %h %h %h", workHorNum, workVerNum, {verAddr,horAddr}, tempResult8[3], tempResult8[2], tempResult8[1], tempResult8[0], pack(resultReadyNext)); |
end |
Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + 2; |
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3); |
if(work2HorNum < workHorNumMax) |
work2HorNum <= work2HorNum+1; |
else |
begin |
work2HorNum <= 0; |
if(work2VerNum < workVerNumMax) |
work2VerNum <= work2VerNum+1; |
else |
begin |
work2VerNum <= 0; |
work2Done <= True; |
if(((blockT==IP4x8 || blockT==IP8x4) && work2SubMbPart==0) || (blockT==IP4x4 && work2SubMbPart<3)) |
work2SubMbPart <= work2SubMbPart+1; |
else |
begin |
work2SubMbPart <= 0; |
work8x8Done <= True; |
end |
end |
end |
end |
work2Vector8 <= work2Vector8Next; |
work2Vector15 <= work2Vector15Next; |
resultReady <= resultReadyNext; |
$display( "Trace interpolator: work2Luma %h %h %h %h %h", xfracl, yfracl, work2HorNum, work2VerNum, offset); |
endrule |
|
|
rule work1Chroma ( reqfifoWork1.first() matches tagged IPWChroma .reqdata &&& !work1Done ); |
Bit#(4) xfracc = zeroExtend(reqdata.xFracC); |
Bit#(4) yfracc = zeroExtend(reqdata.yFracC); |
let offset = reqdata.offset; |
let blockT = reqdata.bt; |
Vector#(20,Bit#(8)) work1Vector8Next = work1Vector8; |
if(memRespQ.first() matches tagged IPLoadResp .tempreaddata) |
begin |
memRespQ.deq(); |
Vector#(4,Bit#(8)) readdata = replicate(0); |
readdata[0] = tempreaddata[7:0]; |
readdata[1] = tempreaddata[15:8]; |
readdata[2] = tempreaddata[23:16]; |
readdata[3] = tempreaddata[31:24]; |
Vector#(5,Bit#(8)) tempWork8 = replicate(0); |
Vector#(5,Bit#(8)) tempPrev8 = replicate(0); |
Vector#(4,Bit#(8)) tempResult8 = replicate(0); |
Bool resultReadyFlag = False; |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
Bit#(2) offsetplusii = offset+fromInteger(ii); |
if(offset <= 3-fromInteger(ii) && !((blockT==IP4x8||blockT==IP4x4)&&(offset[1]==0||(xfracc==0&&offset!=3))) && !(xfracc==0&&offset==0)) |
tempWork8[ii] = work1Vector8[offsetplusii]; |
else |
tempWork8[ii] = readdata[offsetplusii]; |
work1Vector8Next[ii] = readdata[ii]; |
end |
tempWork8[4] = readdata[offset]; |
if((blockT==IP16x8 || blockT==IP16x16) && work1HorNum==(xfracc==0&&offset==0 ? 1 : 2)) |
begin |
for(Integer ii=0; ii<5; ii=ii+1) |
begin |
tempPrev8[ii] = work1Vector8[ii+9]; |
work1Vector8Next[ii+9] = tempWork8[ii]; |
end |
end |
else |
begin |
for(Integer ii=0; ii<5; ii=ii+1) |
tempPrev8[ii] = work1Vector8[ii+4]; |
if(work1HorNum==(xfracc==0&&offset==0 ? 0 : 1) || ((blockT==IP4x8||blockT==IP4x4)&&(offset[1]==0||(xfracc==0&&offset!=3)))) |
begin |
for(Integer ii=0; ii<5; ii=ii+1) |
work1Vector8Next[ii+4] = tempWork8[ii]; |
end |
end |
if(yfracc==0) |
begin |
for(Integer ii=0; ii<5; ii=ii+1) |
tempPrev8[ii] = tempWork8[ii]; |
end |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
Bit#(14) tempVal = zeroExtend((8-xfracc))*zeroExtend((8-yfracc))*zeroExtend(tempPrev8[ii]); |
tempVal = tempVal + zeroExtend(xfracc)*zeroExtend((8-yfracc))*zeroExtend(tempPrev8[ii+1]); |
tempVal = tempVal + zeroExtend((8-xfracc))*zeroExtend(yfracc)*zeroExtend(tempWork8[ii]); |
tempVal = tempVal + zeroExtend(xfracc)*zeroExtend(yfracc)*zeroExtend(tempWork8[ii+1]); |
tempResult8[ii] = truncate((tempVal+32)>>6); |
end |
if(work1VerNum > 0 || yfracc==0) |
begin |
if(blockT==IP4x8 || blockT==IP4x4) |
begin |
Bit#(5) tempIndex = 10 + zeroExtend(work1VerNum<<1); |
work1Vector8Next[tempIndex] = tempResult8[0]; |
work1Vector8Next[tempIndex+1] = tempResult8[1]; |
tempResult8[2] = tempResult8[0]; |
tempResult8[3] = tempResult8[1]; |
tempResult8[0] = work1Vector8[tempIndex]; |
tempResult8[1] = work1Vector8[tempIndex+1]; |
if((work1HorNum>0 || offset[1]==0) && work1SubMbPart[0]==1) |
resultReadyFlag = True; |
end |
else |
begin |
if(work1HorNum>0 || (xfracc==0 && offset==0)) |
resultReadyFlag = True; |
end |
end |
if(resultReadyFlag) |
begin |
Bit#(1) horAddr = ((blockT==IP4x8 || blockT==IP4x4) ? 0 : truncate(((xfracc==0 && offset==0) ? work1HorNum : work1HorNum-1))); |
Bit#(3) verAddr = truncate((yfracc==0 ? work1VerNum : work1VerNum-1)); |
horAddr = horAddr + ((blockT==IP16x8||blockT==IP16x16) ? 0 : work1MbPart[0]); |
verAddr = verAddr + ((blockT==IP8x16||blockT==IP16x16) ? 0 : ((blockT==IP16x8) ? {work1MbPart[0],2'b00} : {work1MbPart[1],2'b00})); |
verAddr = verAddr + ((blockT==IP8x4&&work1SubMbPart==1)||(blockT==IP4x4&&work1SubMbPart[1]==1) ? 2 : 0); |
storeFile.upd({workFileFlag,1'b0,verAddr,horAddr},tempResult8); |
end |
Bit#(2) workHorNumMax = (blockT==IP4x8||blockT==IP4x4 ? (offset[1]==0||(xfracc==0&&offset!=3) ? 0 : 1) : ((blockT==IP16x16||blockT==IP16x8 ? 1 : 0) + (xfracc==0&&offset==0 ? 0 : 1))); |
Bit#(4) workVerNumMax = (blockT==IP16x16||blockT==IP8x16 ? 7 : (blockT==IP16x8||blockT==IP8x8||blockT==IP4x8 ? 3 : 1)) + (yfracc==0 ? 0 : 1); |
if(work1HorNum < workHorNumMax) |
work1HorNum <= work1HorNum+1; |
else |
begin |
work1HorNum <= 0; |
if(work1VerNum < workVerNumMax) |
work1VerNum <= work1VerNum+1; |
else |
begin |
Bool allDone = False; |
work1VerNum <= 0; |
if(((blockT==IP4x8 || blockT==IP8x4) && work1SubMbPart==0) || (blockT==IP4x4 && work1SubMbPart<3)) |
work1SubMbPart <= work1SubMbPart+1; |
else |
begin |
work1SubMbPart <= 0; |
if(((blockT==IP16x8 || blockT==IP8x16) && work1MbPart==0) || (!(blockT==IP16x8 || blockT==IP8x16 || blockT==IP16x16) && work1MbPart<3)) |
work1MbPart <= work1MbPart+1; |
else |
begin |
work1MbPart <= 0; |
work1Done <= True; |
allDone = True; |
end |
end |
if(!allDone) |
reqfifoWork1.deq(); |
end |
end |
end |
work1Vector8 <= work1Vector8Next; |
$display( "Trace interpolator: work1Chroma %h %h %h %h %h", xfracc, yfracc, work1HorNum, work1VerNum, offset); |
endrule |
|
|
rule work2Chroma ( reqregWork2 matches tagged Valid .vdata &&& vdata matches tagged IPWChroma .reqdata &&& !work2Done &&& !work8x8Done ); |
Vector#(16,Bit#(1)) resultReadyNext = resultReady; |
resultFile.upd({work2VerNum[1],work2HorNum,work2VerNum[0]},storeFile.sub({(1-workFileFlag),1'b0,work2VerNum[1],work2HorNum,work2VerNum[0]})); |
resultReadyNext[{work2VerNum[1],work2HorNum,work2VerNum[0]}] = 1; |
work2HorNum <= work2HorNum+1; |
if(work2HorNum == 3) |
begin |
if(work2VerNum == 3) |
begin |
work2VerNum <= 0; |
work2Done <= True; |
work8x8Done <= True; |
end |
else |
work2VerNum <= work2VerNum+1; |
end |
resultReady <= resultReadyNext; |
$display( "Trace interpolator: work2Chroma %h %h", work2HorNum, work2VerNum); |
endrule |
|
|
rule outputing( !outDone && resultReady[{outBlockNum[1],outPixelNum,outBlockNum[0]}]==1 ); |
outfifo.enq(resultFile.sub({outBlockNum[1],outPixelNum,outBlockNum[0]})); |
outPixelNum <= outPixelNum+1; |
if(outPixelNum == 3) |
begin |
outBlockNum <= outBlockNum+1; |
if(outBlockNum == 3) |
outDone <= True; |
end |
$display( "Trace interpolator: outputing %h %h", outBlockNum, outPixelNum); |
endrule |
|
|
rule switching( work1Done && (work2Done || reqregWork2==Invalid) && !work8x8Done); |
work1Done <= False; |
work2Done <= False; |
reqregWork2 <= (Valid reqfifoWork1.first()); |
workFileFlag <= 1-workFileFlag; |
reqfifoWork1.deq(); |
$display( "Trace interpolator: switching %h %h", outBlockNum, outPixelNum); |
endrule |
|
|
rule switching8x8( work1Done && (work2Done || reqregWork2==Invalid) && work8x8Done && outDone); |
outDone <= False; |
work8x8Done <= False; |
resultReady <= replicate(0); |
work1Done <= False; |
work2Done <= False; |
reqregWork2 <= (Valid reqfifoWork1.first()); |
workFileFlag <= 1-workFileFlag; |
reqfifoWork1.deq(); |
$display( "Trace interpolator: switching8x8 %h %h", outBlockNum, outPixelNum); |
endrule |
|
|
|
method Action setPicWidth( Bit#(PicWidthSz) newPicWidth ); |
picWidth <= newPicWidth; |
endmethod |
|
method Action setPicHeight( Bit#(PicHeightSz) newPicHeight ); |
picHeight <= newPicHeight; |
endmethod |
|
method Action request( InterpolatorIT inputdata ); |
reqfifoLoad.enq(inputdata); |
if(inputdata matches tagged IPLuma .indata) |
reqfifoWork1.enq(IPWLuma {xFracL:indata.mvhor[1:0],yFracL:indata.mvver[1:0],offset:indata.mvhor[3:2],bt:indata.bt}); |
else if(inputdata matches tagged IPChroma .indata) |
reqfifoWork1.enq(IPWChroma {xFracC:indata.mvhor[2:0],yFracC:indata.mvver[2:0],offset:indata.mvhor[4:3]+{indata.hor[0],1'b0},bt:indata.bt}); |
endmethod |
|
method Vector#(4,Bit#(8)) first(); |
return outfifo.first(); |
endmethod |
|
method Action deq(); |
outfifo.deq(); |
endmethod |
|
method Action endOfFrame(); |
endOfFrameFlag <= True; |
endmethod |
|
interface Client mem_client; |
interface Get request = fifoToGet(memReqQ); |
interface Put response = fifoToPut(memRespQ); |
endinterface |
|
|
endmodule |
|
|
endpackage |
/trunk/src/mkFrameBuffer.bsv
0,0 → 1,113
//********************************************************************** |
// Frame Buffer |
//---------------------------------------------------------------------- |
// |
// |
// |
|
package mkFrameBuffer; |
|
import H264Types::*; |
import IFrameBuffer::*; |
import RegFile::*; |
import GetPut::*; |
import ClientServer::*; |
import FIFO::*; |
|
|
//----------------------------------------------------------- |
// Register file module |
//----------------------------------------------------------- |
|
interface FBRFile2; |
method Action store( Bit#(FrameBufferSz) addr, Bit#(32) data ); |
method Bit#(32) load1( Bit#(FrameBufferSz) addr ); |
method Bit#(32) load2( Bit#(FrameBufferSz) addr ); |
endinterface |
|
module mkFBRFile2( FBRFile2 ); |
|
RegFile#(Bit#(FrameBufferSz),Bit#(32)) rfile <- mkRegFile(0,frameBufferSize); |
|
method Action store( Bit#(FrameBufferSz) addr, Bit#(32) data ); |
rfile.upd( addr, data ); |
endmethod |
|
method Bit#(32) load1( Bit#(FrameBufferSz) addr ); |
return rfile.sub(addr); |
endmethod |
|
method Bit#(32) load2( Bit#(FrameBufferSz) addr ); |
return rfile.sub(addr); |
endmethod |
|
endmodule |
|
|
//---------------------------------------------------------------------- |
// Main module |
//---------------------------------------------------------------------- |
|
module mkFrameBuffer( IFrameBuffer ); |
|
//----------------------------------------------------------- |
// State |
|
FBRFile2 rfile2 <- mkFBRFile2; |
|
FIFO#(FrameBufferLoadReq) loadReqQ1 <- mkFIFO(); |
FIFO#(FrameBufferLoadResp) loadRespQ1 <- mkFIFO(); |
FIFO#(FrameBufferLoadReq) loadReqQ2 <- mkFIFO(); |
FIFO#(FrameBufferLoadResp) loadRespQ2 <- mkFIFO(); |
FIFO#(FrameBufferStoreReq) storeReqQ <- mkFIFO(); |
|
rule loading1 ( loadReqQ1.first() matches tagged FBLoadReq .addrt ); |
if(addrt<frameBufferSize) |
begin |
loadRespQ1.enq( FBLoadResp rfile2.load1(addrt) ); |
loadReqQ1.deq(); |
end |
else |
$display( "ERROR FrameBuffer: loading1 outside range" ); |
endrule |
|
rule loading2 ( loadReqQ2.first() matches tagged FBLoadReq .addrt ); |
if(addrt<frameBufferSize) |
begin |
loadRespQ2.enq( FBLoadResp rfile2.load2(addrt) ); |
loadReqQ2.deq(); |
end |
else |
$display( "ERROR FrameBuffer: loading2 outside range" ); |
endrule |
|
rule storing ( storeReqQ.first() matches tagged FBStoreReq { addr:.addrt,data:.datat} ); |
if(addrt<frameBufferSize) |
begin |
rfile2.store(addrt,datat); |
storeReqQ.deq(); |
end |
else |
$display( "ERROR FrameBuffer: storing outside range" ); |
endrule |
|
rule syncing ( loadReqQ1.first() matches tagged FBEndFrameSync &&& loadReqQ2.first() matches tagged FBEndFrameSync &&& storeReqQ.first() matches tagged FBEndFrameSync); |
loadReqQ1.deq(); |
loadReqQ2.deq(); |
storeReqQ.deq(); |
endrule |
|
|
interface Server server_load1; |
interface Put request = fifoToPut(loadReqQ1); |
interface Get response = fifoToGet(loadRespQ1); |
endinterface |
interface Server server_load2; |
interface Put request = fifoToPut(loadReqQ2); |
interface Get response = fifoToGet(loadRespQ2); |
endinterface |
interface Put server_store = fifoToPut(storeReqQ); |
|
endmodule |
|
endpackage |
/trunk/src/mkPrediction_intra8.bsv
0,0 → 1,2144
//********************************************************************** |
// Prediction |
//---------------------------------------------------------------------- |
// |
// |
|
package mkPrediction; |
|
import H264Types::*; |
|
import IPrediction::*; |
import IInterpolator::*; |
import mkInterpolator::*; |
import FIFO::*; |
import FIFOF::*; |
import Vector::*; |
|
import Connectable::*; |
import GetPut::*; |
import ClientServer::*; |
|
|
//----------------------------------------------------------- |
// Local Datatypes |
//----------------------------------------------------------- |
|
typedef union tagged |
{ |
void Intra; //Intra non-4x4 |
void Intra4x4; |
void Inter; |
} |
OutState deriving(Eq,Bits); |
|
typedef union tagged |
{ |
void Start; //not working on anything in particular |
void Intra16x16; |
void Intra4x4; |
void IntraPCM; |
} |
IntraState deriving(Eq,Bits); |
|
typedef union tagged |
{ |
void Start; //not working on anything in particular |
void InterP16x16; |
void InterP16x8; |
void InterP8x16; |
void InterP8x8; |
void InterP8x8ref0; |
void InterPskip; |
} |
InterState deriving(Eq,Bits); |
|
typedef union tagged |
{ |
Bit#(1) NotInter;//0 for not available, 1 for intra-coded |
struct {Bit#(4) refIdx; Bit#(14) mvhor; Bit#(12) mvver; Bit#(1) nonZeroTransCoeff;} BlockMv; |
} |
InterBlockMv deriving(Eq,Bits); |
|
typedef union tagged |
{ |
void SkipMB; |
void NonSkipMB; |
void Intra4x4; |
void Intra4x4PlusChroma; |
} |
NextOutput deriving(Eq,Bits); |
|
|
|
//----------------------------------------------------------- |
// Helper functions |
|
function Bit#(8) intra4x4SelectTop( Bit#(72) valVector, Bit#(4) idx ); |
case(idx) |
0: return valVector[15:8]; |
1: return valVector[23:16]; |
2: return valVector[31:24]; |
3: return valVector[39:32]; |
4: return valVector[47:40]; |
5: return valVector[55:48]; |
6: return valVector[63:56]; |
7: return valVector[71:64]; |
default: return valVector[7:0]; |
endcase |
endfunction |
|
function Bit#(8) intra4x4SelectLeft( Bit#(40) valVector, Bit#(3) idx ); |
case(idx) |
0: return valVector[15:8]; |
1: return valVector[23:16]; |
2: return valVector[31:24]; |
3: return valVector[39:32]; |
default: return valVector[7:0]; |
endcase |
endfunction |
|
function Bit#(8) select32to8( Bit#(32) valVector, Bit#(2) idx ); |
case(idx) |
0: return valVector[7:0]; |
1: return valVector[15:8]; |
2: return valVector[23:16]; |
3: return valVector[31:24]; |
endcase |
endfunction |
|
function Bit#(8) select16to8( Bit#(16) valVector, Bit#(1) idx ); |
case(idx) |
0: return valVector[7:0]; |
1: return valVector[15:8]; |
endcase |
endfunction |
|
function Bool absDiffGEFour14( Bit#(14) val1, Bit#(14) val2 ); |
Int#(15) int1 = unpack(signExtend(val1)); |
Int#(15) int2 = unpack(signExtend(val2)); |
if(int1>=int2) |
return (int1 >= (int2+4)); |
else |
return (int2 >= (int1+4)); |
endfunction |
|
function Bool absDiffGEFour12( Bit#(12) val1, Bit#(12) val2 ); |
Int#(13) int1 = unpack(signExtend(val1)); |
Int#(13) int2 = unpack(signExtend(val2)); |
if(int1>=int2) |
return (int1 >= (int2+4)); |
else |
return (int2 >= (int1+4)); |
endfunction |
|
|
//----------------------------------------------------------- |
// Prediction Module |
//----------------------------------------------------------- |
|
|
(* synthesize *) |
module mkPrediction( IPrediction ); |
|
//Common state |
FIFO#(EntropyDecOT) infifo <- mkSizedFIFO(prediction_infifo_size); |
FIFO#(InverseTransOT) infifo_ITB <- mkSizedFIFO(prediction_infifo_ITB_size); |
FIFO#(EntropyDecOT) outfifo <- mkFIFO; |
Reg#(Bool) passFlag <- mkReg(True); |
Reg#(Bit#(4)) blockNum <- mkReg(0); |
Reg#(Bit#(4)) pixelNum <- mkReg(0); |
|
Reg#(Bit#(PicWidthSz)) picWidth <- mkReg(maxPicWidthInMB); |
Reg#(Bit#(PicHeightSz)) picHeight <- mkReg(0); |
Reg#(Bit#(PicAreaSz)) firstMb <- mkReg(0); |
Reg#(Bit#(PicAreaSz)) currMb <- mkReg(0); |
Reg#(Bit#(PicAreaSz)) currMbHor <- mkReg(0);//horizontal position of currMb |
Reg#(Bit#(PicHeightSz)) currMbVer <- mkReg(0);//vertical position of currMb |
|
FIFOF#(OutState) outstatefifo <- mkFIFOF; |
FIFOF#(NextOutput) nextoutputfifo <- mkFIFOF; |
Reg#(Bit#(4)) outBlockNum <- mkReg(0); |
Reg#(Bit#(4)) outPixelNum <- mkReg(0); |
FIFO#(Vector#(4,Bit#(8))) predictedfifo <- mkSizedFIFO(prediction_predictedfifo_size); |
Reg#(Bit#(1)) outChromaFlag <- mkReg(0); |
Reg#(Bool) outFirstQPFlag <- mkReg(False); |
|
DoNotFire donotfire <- mkDoNotFire(); |
|
//Reg#(Vector#(16,Bit#(8))) workVector <- mkRegU(); |
|
//Inter state |
Interpolator interpolator <- mkInterpolator(); |
Reg#(InterState) interstate <- mkReg(Start); |
Reg#(Bit#(PicAreaSz)) interPskipCount <- mkReg(0); |
Reg#(Vector#(5,InterBlockMv)) interTopVal <- mkRegU(); |
Reg#(Vector#(4,InterBlockMv)) interLeftVal <- mkRegU(); |
Reg#(Vector#(4,InterBlockMv)) interTopLeftVal <- mkRegU(); |
FIFO#(MemReq#(TAdd#(PicWidthSz,2),32)) interMemReqQ <- mkFIFO; |
Reg#(MemReq#(TAdd#(PicWidthSz,2),32)) interMemReqQdelay <- mkRegU(); |
FIFO#(MemResp#(32)) interMemRespQ <- mkFIFO; |
Reg#(Bit#(3)) interReqCount <- mkReg(0); |
Reg#(Bit#(3)) interRespCount <- mkReg(0); |
|
Reg#(Bit#(1)) interStepCount <- mkReg(0); |
Reg#(Bit#(2)) interMbPartNum <- mkReg(0); |
Reg#(Bit#(2)) interSubMbPartNum <- mkReg(0); |
Reg#(Bit#(2)) interPassingCount <- mkReg(0); |
Reg#(Vector#(4,Bit#(4))) interRefIdxVector <- mkRegU(); |
Reg#(Vector#(4,Bit#(2))) interSubMbTypeVector <- mkRegU(); |
RFile1#(Bit#(4),Tuple2#(Bit#(14),Bit#(12))) interMvFile <- mkRFile1Full(); |
Reg#(Bit#(15)) interMvDiffTemp <- mkReg(0); |
FIFO#(Tuple2#(Bit#(15),Bit#(13))) interMvDiff <- mkFIFO; |
Reg#(Bit#(5)) interNewestMv <- mkReg(0); |
|
Reg#(Bit#(2)) interIPStepCount <- mkReg(0); |
Reg#(Bit#(2)) interIPMbPartNum <- mkReg(0); |
Reg#(Bit#(2)) interIPSubMbPartNum <- mkReg(0); |
|
Reg#(Bit#(PicWidthSz)) interCurrMbDiff <- mkReg(0); |
|
Reg#(Vector#(4,Bool)) interTopNonZeroTransCoeff <- mkRegU(); |
Reg#(Vector#(4,Bool)) interLeftNonZeroTransCoeff <- mkRegU(); |
FIFO#(Tuple2#(Bit#(2),Bit#(2))) interBSfifo <- mkSizedFIFO(32); |
Reg#(Bool) interBSoutput <- mkReg(True); |
FIFO#(InterBlockMv) interOutBlockMvfifo <- mkSizedFIFO(8); |
|
|
//Intra state |
Reg#(IntraState) intrastate <- mkReg(Start); |
Reg#(Bit#(1)) intraChromaFlag <- mkReg(0); |
FIFO#(MemReq#(TAdd#(PicWidthSz,2),68)) intraMemReqQ <- mkFIFO; |
Reg#(MemReq#(TAdd#(PicWidthSz,2),68)) intraMemReqQdelay <- mkRegU; |
FIFO#(MemResp#(68)) intraMemRespQ <- mkFIFO; |
Reg#(Vector#(4,Bit#(4))) intra4x4typeLeft <- mkRegU();//15=unavailable, 14=inter-MB, 13=intra-non-4x4 |
Reg#(Vector#(4,Bit#(4))) intra4x4typeTop <- mkRegU();//15=unavailable, 14=inter-MB, 13=intra-non-4x4 |
Reg#(Bit#(1)) ppsconstrained_intra_pred_flag <- mkReg(0); |
Reg#(Vector#(4,Bit#(40))) intraLeftVal <- mkRegU(); |
Reg#(Vector#(9,Bit#(8))) intraLeftValChroma0 <- mkRegU(); |
Reg#(Vector#(9,Bit#(8))) intraLeftValChroma1 <- mkRegU(); |
Reg#(Vector#(5,Bit#(32))) intraTopVal <- mkRegU(); |
Reg#(Vector#(4,Bit#(16))) intraTopValChroma0 <- mkRegU(); |
Reg#(Vector#(4,Bit#(16))) intraTopValChroma1 <- mkRegU(); |
Reg#(Bit#(32)) intraLeftValNext <- mkReg(0); |
Reg#(Bit#(2)) intra16x16_pred_mode <- mkReg(0); |
FIFO#(Bit#(4)) rem_intra4x4_pred_mode <- mkSizedFIFO(16); |
FIFO#(Bit#(2)) intra_chroma_pred_mode <- mkFIFO; |
Reg#(Bit#(4)) cur_intra4x4_pred_mode <- mkReg(0); |
Reg#(Bit#(1)) intraChromaTopAvailable <- mkReg(0); |
Reg#(Bit#(1)) intraChromaLeftAvailable <- mkReg(0); |
|
Reg#(Bit#(3)) intraReqCount <- mkReg(0); |
Reg#(Bit#(3)) intraRespCount <- mkReg(0); |
Reg#(Bit#(4)) intraStepCount <- mkReg(0); |
Reg#(Bit#(13)) intraSumA <- mkReg(0); |
Reg#(Bit#(15)) intraSumB <- mkReg(0); |
Reg#(Bit#(15)) intraSumC <- mkReg(0); |
|
Reg#(Vector#(4,Bit#(8))) intraPredVector <- mkRegU(); |
|
|
//----------------------------------------------------------- |
// Rules |
|
////////////////////////////////////////////////////////////////////////////// |
// rule stateMonitor ( True ); |
// if(predictedfifo.notEmpty()) |
// $display( "TRACE Prediction: stateMonitor predictedfifo.first() %0d", predictedfifo.first());//////////////////// |
// if(infifo.first() matches tagged ITBresidual .xdata) |
// $display( "TRACE Prediction: stateMonitor infifo.first() %0d", xdata);//////////////////// |
// if(infifo.first() matches tagged ITBresidual .xdata) |
// $display( "TRACE Prediction: stateMonitor outBlockNum outPixelNum outChromaFlag %0d %0d", outBlockNum, outPixelNum, outChromaFlag);//////////////////// |
// endrule |
////////////////////////////////////////////////////////////////////////////// |
|
rule passing ( passFlag && !outstatefifo.notEmpty() && currMbHor<zeroExtend(picWidth) ); |
$display( "Trace Prediction: passing infifo packed %h", pack(infifo.first())); |
case (infifo.first()) matches |
tagged NewUnit . xdata : |
begin |
infifo.deq(); |
outfifo.enq(infifo.first()); |
$display("ccl4newunit"); |
$display("ccl4rbspbyte %h", xdata); |
end |
tagged SPSpic_width_in_mbs .xdata : |
begin |
infifo.deq(); |
outfifo.enq(infifo.first()); |
picWidth <= xdata; |
interpolator.setPicWidth(xdata); |
end |
tagged SPSpic_height_in_map_units .xdata : |
begin |
infifo.deq(); |
outfifo.enq(infifo.first()); |
picHeight <= xdata; |
interpolator.setPicHeight(xdata); |
end |
tagged PPSconstrained_intra_pred_flag .xdata : |
begin |
infifo.deq(); |
////outfifo.enq(infifo.first()); |
ppsconstrained_intra_pred_flag <= xdata; |
end |
tagged SHfirst_mb_in_slice .xdata : |
begin |
infifo.deq(); |
outfifo.enq(infifo.first()); |
firstMb <= xdata; |
currMb <= xdata; |
currMbHor <= xdata; |
currMbVer <= 0; |
intra4x4typeLeft <= replicate(15); |
interTopLeftVal <= replicate(NotInter 0); |
if(xdata==0) |
interLeftVal <= replicate(NotInter 0); |
outFirstQPFlag <= True; |
end |
tagged SDmb_skip_run .xdata : passFlag <= False; |
tagged SDMmbtype .xdata : passFlag <= False; |
tagged EndOfFile : |
begin |
infifo.deq(); |
outfifo.enq(infifo.first()); |
$display( "INFO Prediction: EndOfFile reached" ); |
//$finish(0);//////////////////////////////// |
end |
default: |
begin |
infifo.deq(); |
outfifo.enq(infifo.first()); |
end |
endcase |
endrule |
|
|
rule inputing ( !passFlag ); |
$display( "Trace Prediction: inputing infifo packed %h", pack(infifo.first())); |
case (infifo.first()) matches |
tagged SDmb_skip_run .xdata : |
begin |
if(interstate==Start && intrastate==Start) |
begin |
if(interPskipCount < xdata) |
begin |
if(!outstatefifo.notEmpty() || interCurrMbDiff<picWidth-1) |
begin |
$display( "Trace Prediction: passing SDmb_skip_run %0d", xdata); |
outstatefifo.enq(Inter); |
interstate <= InterPskip; |
interReqCount <= 1; |
interRespCount <= 1; |
intra4x4typeLeft <= replicate(14); |
intra4x4typeTop <= replicate(14); |
interTopLeftVal <= update(interTopLeftVal , 0, (NotInter 0)); |
interTopVal <= replicate(NotInter 0); |
interPskipCount <= interPskipCount+1; |
interNewestMv <= 0; |
interRefIdxVector <= replicate(0); |
interCurrMbDiff <= interCurrMbDiff+1; |
nextoutputfifo.enq(SkipMB); |
end |
else |
donotfire.doNotFire(); |
end |
else |
begin |
$display( "Trace Prediction: passing no SDmb_skip_run"); |
interPskipCount <= 0; |
infifo.deq(); |
end |
end |
else |
donotfire.doNotFire(); |
end |
tagged SDMmbtype .xdata : |
begin |
if(interstate==Start && intrastate==Start)//not necessary (just need to keep inter from feeding predictedfifo or change intra state until intrastate==Start) |
begin |
infifo.deq(); |
$display( "INFO Prediction: SDMmbtype %0d", xdata); |
if(mbPartPredMode(xdata,0)==Intra_16x16) |
begin |
if(!outstatefifo.notEmpty()) |
begin |
outstatefifo.enq(Intra); |
intrastate <= Intra16x16; |
if(xdata matches tagged I_16x16 {intra16x16PredMode:.tempv1, codedBlockPatternChroma:.tempv2, codedBlockPatternLuma:.tempv3}) |
intra16x16_pred_mode <= tempv1; |
else |
$display( "ERROR Prediction: MacroblockLayer 5 sdmmbtype not I_16x16" ); |
intraReqCount <= 1; |
intraRespCount <= 1; |
interTopLeftVal <= replicate(NotInter 1); |
interLeftVal <= replicate(NotInter 1); |
interTopVal <= replicate(NotInter 1); |
end |
else |
donotfire.doNotFire(); |
end |
else if(xdata==I_NxN) |
begin |
if(!outstatefifo.notEmpty()) |
begin |
outstatefifo.enq(Intra4x4); |
intrastate <= Intra4x4; |
intraReqCount <= 1; |
intraRespCount <= 1; |
interTopLeftVal <= replicate(NotInter 1); |
interLeftVal <= replicate(NotInter 1); |
interTopVal <= replicate(NotInter 1); |
end |
else |
donotfire.doNotFire(); |
end |
else if(xdata==I_PCM) |
begin |
$display( "ERROR Prediction: I_PCM not implemented yet"); |
$finish;//////////////////////////////////////////////////////////////////////////////////////// |
intra4x4typeLeft <= replicate(13); |
intra4x4typeTop <= replicate(13); |
interTopLeftVal <= replicate(NotInter 1); |
interLeftVal <= replicate(NotInter 1); |
interTopVal <= replicate(NotInter 1); |
end |
else |
begin |
if(!outstatefifo.notEmpty() || interCurrMbDiff<picWidth-1) |
begin |
outstatefifo.enq(Inter); |
case(xdata) |
P_L0_16x16: interstate <= InterP16x16; |
P_L0_L0_16x8: interstate <= InterP16x8; |
P_L0_L0_8x16: interstate <= InterP8x16; |
P_8x8: interstate <= InterP8x8; |
P_8x8ref0: interstate <= InterP8x8ref0; |
default: $display( "ERROR Prediction: passing SDMmbtype inter prediction unknown mbtype"); |
endcase |
interReqCount <= 1; |
interRespCount <= 1; |
intra4x4typeLeft <= replicate(14);///////////////////////////////////////////////////////////////////////////// |
intra4x4typeTop <= replicate(14); |
interTopLeftVal <= update(interTopLeftVal , 0, (NotInter 0)); |
interTopVal <= replicate(NotInter 0); |
interNewestMv <= 0; |
interRefIdxVector <= replicate(0); |
nextoutputfifo.enq(NonSkipMB); |
end |
else |
donotfire.doNotFire(); |
end |
interCurrMbDiff <= interCurrMbDiff+1; |
end |
else |
donotfire.doNotFire(); |
end |
tagged SDMMrem_intra4x4_pred_mode .xdata : |
begin |
infifo.deq(); |
////outfifo.enq(infifo.first()); |
rem_intra4x4_pred_mode.enq(xdata); |
end |
tagged SDMMintra_chroma_pred_mode .xdata : |
begin |
infifo.deq(); |
////outfifo.enq(infifo.first()); |
intra_chroma_pred_mode.enq(xdata); |
end |
tagged SDMMref_idx_l0 .xdata : |
begin |
infifo.deq(); |
////outfifo.enq(infifo.first()); |
interRefIdxVector <= update(interRefIdxVector,interPassingCount,xdata[3:0]); |
if(interstate==InterP16x16 || interPassingCount==1) |
interPassingCount <= 0; |
else |
interPassingCount <= interPassingCount+1; |
end |
tagged SDMMmvd_l0 .xdata : |
begin |
infifo.deq(); |
////outfifo.enq(infifo.first()); |
if(interPassingCount==1) |
begin |
Bit#(13) interMvDiffTemp2 = truncate(xdata); |
interMvDiff.enq(tuple2(interMvDiffTemp,interMvDiffTemp2)); |
interPassingCount <= 0; |
end |
else |
begin |
interMvDiffTemp <= truncate(xdata); |
interPassingCount <= interPassingCount+1; |
end |
end |
tagged SDMSsub_mb_type .xdata : |
begin |
infifo.deq(); |
////outfifo.enq(infifo.first()); |
interSubMbTypeVector <= update(interSubMbTypeVector,interPassingCount,xdata); |
interPassingCount <= interPassingCount+1; |
end |
tagged SDMSref_idx_l0 .xdata : |
begin |
infifo.deq(); |
////outfifo.enq(infifo.first()); |
interRefIdxVector <= update(interRefIdxVector,interPassingCount,xdata[3:0]); |
interPassingCount <= interPassingCount+1; |
end |
tagged SDMSmvd_l0 .xdata : |
begin |
infifo.deq(); |
////outfifo.enq(infifo.first()); |
if(interPassingCount==1) |
begin |
Bit#(13) interMvDiffTemp2 = truncate(xdata); |
interMvDiff.enq(tuple2(interMvDiffTemp,interMvDiffTemp2)); |
interPassingCount <= 0; |
end |
else |
begin |
interMvDiffTemp <= truncate(xdata); |
interPassingCount <= interPassingCount+1; |
end |
end |
default: passFlag <= True; |
endcase |
endrule |
|
|
rule outputing ( currMbHor<zeroExtend(picWidth) ); |
Bit#(1) outputFlag = 0; |
Vector#(4,Bit#(8)) outputVector = replicate(0); |
Bit#(2) blockHor = {outBlockNum[2],outBlockNum[0]}; |
Bit#(2) blockVer = {outBlockNum[3],outBlockNum[1]}; |
Bit#(2) pixelVer = {outPixelNum[3],outPixelNum[2]}; |
Bit#(4) totalVer = {blockVer,pixelVer}; |
//$display( "Trace Prediction: outputing" ); |
if(outFirstQPFlag) |
begin |
if(infifo_ITB.first() matches tagged IBTmb_qp .xdata) |
begin |
infifo_ITB.deq(); |
outfifo.enq(IBTmb_qp {qpy:xdata.qpy,qpc:xdata.qpc}); |
outFirstQPFlag <= False; |
$display( "Trace Prediction: outputing outFirstQP %h %h %h", outBlockNum, outPixelNum, xdata); |
end |
else |
$display( "ERROR Prediction: outputing unexpected infifo_ITB.first()"); |
end |
else if(nextoutputfifo.first() == SkipMB) |
begin |
if(interBSoutput && outChromaFlag==0 && outPixelNum==0) |
begin |
interBSoutput <= False; |
interBSfifo.deq(); |
Bit#(2) tempHorBS = tpl_1(interBSfifo.first()); |
Bit#(2) tempVerBS = tpl_2(interBSfifo.first()); |
Bit#(3) horBS = (tempHorBS==3 ? 4 : (interLeftNonZeroTransCoeff[blockVer] ? 2 : zeroExtend(tempHorBS))); |
Bit#(3) verBS = (tempVerBS==3 ? 4 : (interTopNonZeroTransCoeff[blockHor]&&blockVer!=0 ? 2 : zeroExtend(tempVerBS))); |
outfifo.enq(PBbS {bShor:horBS,bSver:verBS}); |
interLeftNonZeroTransCoeff <= update(interLeftNonZeroTransCoeff, blockVer, False); |
interTopNonZeroTransCoeff <= update(interTopNonZeroTransCoeff, blockHor, False); |
$display( "Trace Prediction: outputing SkipMB bS %h %h %h %h", outBlockNum, outPixelNum, currMbHor, currMbVer); |
end |
else |
begin |
interBSoutput <= True; |
outputVector = predictedfifo.first(); |
outfifo.enq(PBoutput outputVector); |
outputFlag = 1; |
predictedfifo.deq(); |
$display( "Trace Prediction: outputing SkipMB out %h %h %h", outBlockNum, outPixelNum, outputVector); |
end |
end |
else |
begin |
case ( infifo_ITB.first() ) matches |
tagged IBTmb_qp .xdata : |
begin |
infifo_ITB.deq(); |
outfifo.enq(IBTmb_qp {qpy:xdata.qpy,qpc:xdata.qpc}); |
outFirstQPFlag <= False; |
$display( "Trace Prediction: outputing ITBmb_qp %h %h %h", outBlockNum, outPixelNum, xdata); |
end |
tagged ITBresidual .xdata : |
begin |
if(interBSoutput && outChromaFlag==0 && outPixelNum==0) |
begin |
interBSoutput <= False; |
if(outstatefifo.first() != Inter) |
outfifo.enq(PBbS {bShor:(blockHor==0 ? 4 : 3),bSver:(blockVer==0 ? 4 : 3)}); |
else |
begin |
interBSfifo.deq(); |
Bit#(2) tempHorBS = tpl_1(interBSfifo.first()); |
Bit#(2) tempVerBS = tpl_2(interBSfifo.first()); |
Bit#(3) horBS = (tempHorBS==3 ? 4 : 2); |
Bit#(3) verBS = (tempVerBS==3 ? 4 : 2); |
outfifo.enq(PBbS {bShor:horBS,bSver:verBS}); |
end |
interLeftNonZeroTransCoeff <= update(interLeftNonZeroTransCoeff, blockVer, True); |
interTopNonZeroTransCoeff <= update(interTopNonZeroTransCoeff, blockHor, True); |
$display( "Trace Prediction: outputing ITBresidual bS %h %h %h %h %h", outChromaFlag, outBlockNum, outPixelNum, currMbHor, currMbVer); |
end |
else |
begin |
interBSoutput <= True; |
Bit#(11) tempOutputValue = 0; |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
tempOutputValue = signExtend(xdata[ii]) + zeroExtend((predictedfifo.first())[ii]); |
if(tempOutputValue[10]==1) |
outputVector[ii] = 0; |
else if(tempOutputValue[9:0] > 255) |
outputVector[ii] = 255; |
else |
outputVector[ii] = tempOutputValue[7:0]; |
end |
outfifo.enq(PBoutput outputVector); |
infifo_ITB.deq(); |
predictedfifo.deq(); |
outputFlag = 1; |
$display( "Trace Prediction: outputing ITBresidual out %h %h %h %h %h %h", outChromaFlag, outBlockNum, outPixelNum, predictedfifo.first(), xdata, outputVector); |
end |
end |
tagged ITBcoeffLevelZeros : |
begin |
if(interBSoutput && outChromaFlag==0 && outPixelNum==0) |
begin |
interBSoutput <= False; |
if(outstatefifo.first() != Inter) |
outfifo.enq(PBbS {bShor:(blockHor==0 ? 4 : 3),bSver:(blockVer==0 ? 4 : 3)}); |
else |
begin |
interBSfifo.deq(); |
Bit#(2) tempHorBS = tpl_1(interBSfifo.first()); |
Bit#(2) tempVerBS = tpl_2(interBSfifo.first()); |
Bit#(3) horBS = (tempHorBS==3 ? 4 : (interLeftNonZeroTransCoeff[blockVer] ? 2 : zeroExtend(tempHorBS))); |
Bit#(3) verBS = (tempVerBS==3 ? 4 : (interTopNonZeroTransCoeff[blockHor]&&blockVer!=0 ? 2 : zeroExtend(tempVerBS))); |
outfifo.enq(PBbS {bShor:horBS,bSver:verBS}); |
end |
interLeftNonZeroTransCoeff <= update(interLeftNonZeroTransCoeff, blockVer, False); |
interTopNonZeroTransCoeff <= update(interTopNonZeroTransCoeff, blockHor, False); |
$display( "Trace Prediction: outputing ITBcoeffLevelZeros bS %h %h %h %h %h", outChromaFlag, outBlockNum, outPixelNum, currMbHor, currMbVer); |
end |
else |
begin |
interBSoutput <= True; |
if(outPixelNum == 12) |
infifo_ITB.deq(); |
outputVector = predictedfifo.first(); |
outfifo.enq(PBoutput outputVector); |
outputFlag = 1; |
predictedfifo.deq(); |
$display( "Trace Prediction: outputing ITBcoeffLevelZeros out %h %h %h %h %h", outChromaFlag, outBlockNum, outPixelNum, predictedfifo.first(), outputVector); |
end |
end |
default: $display( "ERROR Prediction: outputing unknown infifo_ITB input" ); |
endcase |
end |
|
if(outputFlag == 1) |
begin |
$display("ccl4PBoutput %0d", outputVector[0]); |
$display("ccl4PBoutput %0d", outputVector[1]); |
$display("ccl4PBoutput %0d", outputVector[2]); |
$display("ccl4PBoutput %0d", outputVector[3]); |
|
if(outBlockNum==0 && pixelVer==0 && outChromaFlag==0 && currMb!=firstMb && picWidth>1) |
begin |
intraMemReqQ.enq(intraMemReqQdelay); |
interMemReqQ.enq(interMemReqQdelay); |
//$display( "TRACE Prediction: passing storing addr data");////////////////// |
end |
|
if(blockHor==3 || (blockHor[0]==1 && outChromaFlag==1) || (outstatefifo.first()==Intra4x4 && outChromaFlag==0)) |
begin |
if(outChromaFlag==0) |
begin |
Bit#(32) intraLeftValNextTemp = intraLeftValNext; |
if(totalVer==0 || (outstatefifo.first()==Intra4x4 && pixelVer==0)) |
begin |
Bit#(32) tempValSet = select(intraTopVal,zeroExtend(blockHor)); |
intraLeftValNextTemp = zeroExtend(tempValSet[31:24]); |
end |
case(pixelVer) |
0:intraLeftValNext <= {intraLeftValNextTemp[31:16],outputVector[3],intraLeftValNextTemp[7:0]}; |
1:intraLeftValNext <= {intraLeftValNextTemp[31:24],outputVector[3],intraLeftValNextTemp[15:0]}; |
2:intraLeftValNext <= {outputVector[3],intraLeftValNextTemp[23:0]}; |
3: |
begin |
intraLeftVal <= update(intraLeftVal,blockVer,{outputVector[3],intraLeftValNextTemp}); |
intraLeftValNext <= zeroExtend(outputVector[3]); |
if(outstatefifo.first()==Intra4x4) |
intra4x4typeLeft <= update(intra4x4typeLeft,blockVer,cur_intra4x4_pred_mode); |
else if(outstatefifo.first()==Intra) |
intra4x4typeLeft <= update(intra4x4typeLeft,blockVer,13); |
else |
intra4x4typeLeft <= update(intra4x4typeLeft,blockVer,14); |
end |
endcase |
end |
else |
begin |
if(outBlockNum[2]==0) |
intraLeftValChroma0 <= update(intraLeftValChroma0,totalVer+1,outputVector[3]); |
else |
intraLeftValChroma1 <= update(intraLeftValChroma1,totalVer+1,outputVector[3]); |
end |
end |
|
if(pixelVer==3 && (blockVer==3 || (blockVer[0]==1 && outChromaFlag==1) || (outstatefifo.first()==Intra4x4 && outChromaFlag==0))) |
begin |
if(outChromaFlag==0) |
begin |
intraTopVal <= update(intraTopVal,zeroExtend(blockHor),{outputVector[3],outputVector[2],outputVector[1],outputVector[0]}); |
if(outstatefifo.first()==Intra4x4) |
intra4x4typeTop <= update(intra4x4typeTop,blockHor,cur_intra4x4_pred_mode); |
else if(outstatefifo.first()==Intra) |
intra4x4typeTop <= update(intra4x4typeTop,blockHor,13); |
else |
intra4x4typeTop <= update(intra4x4typeTop,blockHor,14); |
end |
else |
begin |
if(outBlockNum[2]==0) |
begin |
Vector#(4,Bit#(16)) intraTopValChroma0Next = intraTopValChroma0; |
intraTopValChroma0Next[{blockHor[0],1'b0}] = {outputVector[1],outputVector[0]}; |
intraTopValChroma0Next[{blockHor[0],1'b1}] = {outputVector[3],outputVector[2]}; |
intraTopValChroma0 <= intraTopValChroma0Next; |
end |
else |
begin |
Vector#(4,Bit#(16)) intraTopValChroma1Next = intraTopValChroma1; |
intraTopValChroma1Next[{blockHor[0],1'b0}] = {outputVector[1],outputVector[0]}; |
intraTopValChroma1Next[{blockHor[0],1'b1}] = {outputVector[3],outputVector[2]}; |
intraTopValChroma1 <= intraTopValChroma1Next; |
end |
end |
end |
|
if(outChromaFlag==1 && outBlockNum==7) |
begin |
Bit#(PicWidthSz) tempStoreAddr = truncate(currMbHor); |
InterBlockMv outBlockMv = interOutBlockMvfifo.first(); |
if(outBlockMv matches tagged BlockMv .bdata) |
begin |
outBlockMv = (BlockMv {refIdx:bdata.refIdx,mvhor:bdata.mvhor,mvver:bdata.mvver,nonZeroTransCoeff:(interTopNonZeroTransCoeff[pixelVer]?1:0)}); |
interOutBlockMvfifo.deq(); |
end |
else if(pixelVer==3) |
interOutBlockMvfifo.deq(); |
if(pixelVer==3 && picWidth>1) |
interMemReqQdelay <= StoreReq {addr:{tempStoreAddr,pixelVer},data:pack(outBlockMv)}; |
else |
interMemReqQ.enq(StoreReq {addr:{tempStoreAddr,pixelVer},data:pack(outBlockMv)}); |
if(pixelVer>0) |
begin |
Bit#(4) intra4x4typeTopStore = ((outstatefifo.first()==Inter) ? 14 : ((outstatefifo.first()!=Intra4x4) ? 13: intra4x4typeTop[(pixelVer-1)])); |
Bit#(32) intraTopValStore = intraTopVal[(pixelVer-1)]; |
Bit#(16) intraTopValChroma0Store = intraTopValChroma0[(pixelVer-1)]; |
Bit#(16) intraTopValChroma1Store = (pixelVer<3 ? intraTopValChroma1[(pixelVer-1)] : {outputVector[1],outputVector[0]}); |
Bit#(68) intraStore = {intra4x4typeTopStore,intraTopValChroma1Store,intraTopValChroma0Store,intraTopValStore}; |
intraMemReqQ.enq(StoreReq {addr:{tempStoreAddr,(pixelVer-1)},data:intraStore}); |
if(pixelVer==3) |
begin |
intra4x4typeTopStore = ((outstatefifo.first()==Inter) ? 14 : ((outstatefifo.first()!=Intra4x4) ? 13: intra4x4typeTop[3])); |
intraTopValStore = intraTopVal[3]; |
intraTopValChroma0Store = intraTopValChroma0[3]; |
intraTopValChroma1Store = {outputVector[3],outputVector[2]}; |
intraStore = {intra4x4typeTopStore,intraTopValChroma1Store,intraTopValChroma0Store,intraTopValStore}; |
intraMemReqQdelay <= StoreReq {addr:{tempStoreAddr,2'b11},data:intraStore}; |
end |
end |
end |
outPixelNum <= outPixelNum+4; |
if(outPixelNum == 12) |
begin |
if(outChromaFlag==0) |
begin |
outBlockNum <= outBlockNum+1; |
if(outBlockNum == 15) |
outChromaFlag <= 1; |
if(nextoutputfifo.first() == Intra4x4) |
nextoutputfifo.deq(); |
end |
else |
begin |
if(outBlockNum == 7) |
begin |
outBlockNum <= 0; |
outChromaFlag <= 0; |
currMb <= currMb+1; |
currMbHor <= currMbHor+1; |
interCurrMbDiff <= interCurrMbDiff-1; |
outstatefifo.deq; |
intrastate <= Start; |
if(truncate(currMbHor)==picWidth-1 && currMbVer==picHeight-1) |
interpolator.endOfFrame(); |
nextoutputfifo.deq(); |
end |
else |
outBlockNum <= outBlockNum+1; |
end |
end |
end |
endrule |
|
|
rule currMbHorUpdate( !(currMbHor<zeroExtend(picWidth)) ); |
Bit#(PicAreaSz) temp = zeroExtend(picWidth); |
if((currMbHor >> 3) >= temp) |
begin |
currMbHor <= currMbHor - (temp << 3); |
currMbVer <= currMbVer + 8; |
end |
else |
begin |
currMbHor <= currMbHor - temp; |
currMbVer <= currMbVer + 1; |
end |
//$display( "Trace Prediction: currMbHorUpdate %h %h", currMbHor, currMbVer); |
endrule |
|
|
// inter prediction rules |
|
rule interSendReq ( interReqCount>0 && currMbHor<zeroExtend(picWidth) ); |
Bit#(PicAreaSz) currMbHorTemp = currMbHor+zeroExtend(interCurrMbDiff)-1; |
Bit#(PicAreaSz) currMbTemp = currMb+zeroExtend(interCurrMbDiff)-1; |
if( currMbHorTemp >= zeroExtend(picWidth) ) |
currMbHorTemp = currMbHorTemp-zeroExtend(picWidth); |
Bit#(PicWidthSz) temp2 = truncate(currMbHorTemp); |
Bit#(TAdd#(PicWidthSz,2)) temp = 0; |
Bool noMoreReq = False; |
if( currMbTemp < zeroExtend(picWidth) ) |
noMoreReq = True; |
else |
begin |
if(interReqCount<5) |
begin |
Bit#(2) temp3 = truncate(interReqCount-1); |
temp = {temp2,temp3}; |
end |
else if(interReqCount==5) |
begin |
if((currMbHorTemp+1)<zeroExtend(picWidth)) |
temp = {(temp2+1),2'b00}; |
else if(currMbHorTemp>0 && currMbTemp-firstMb>zeroExtend(picWidth)) |
temp = {(temp2-1),2'b11}; |
else |
noMoreReq = True; |
end |
else if(interReqCount==6) |
begin |
if((currMbHorTemp+1)<zeroExtend(picWidth) && currMbHorTemp>0 && currMbTemp-firstMb>zeroExtend(picWidth)) |
temp = {(temp2-1),2'b11}; |
else |
noMoreReq = True; |
end |
else |
noMoreReq = True; |
end |
if(!noMoreReq) |
begin |
interMemReqQ.enq(LoadReq temp); |
interReqCount <= interReqCount+1; |
//$display( "TRACE Prediction: interSendReq addr %0d",temp);/////////////////////// |
end |
else |
interReqCount <= 0; |
$display( "Trace Prediction: interSendReq %h %h %h", interstate, interReqCount, temp); |
endrule |
|
|
rule interReceiveNoResp ( interRespCount>0 && currMbHor<zeroExtend(picWidth) && currMb+zeroExtend(interCurrMbDiff)-1<zeroExtend(picWidth) ); |
Bit#(PicAreaSz) currMbHorTemp = currMbHor+zeroExtend(interCurrMbDiff)-1; |
if( currMbHorTemp >= zeroExtend(picWidth) ) |
currMbHorTemp = currMbHorTemp-zeroExtend(picWidth); |
interRespCount <= 0; |
interStepCount <= 1; |
interIPStepCount <= 1; |
if(currMbHorTemp == 0) |
begin |
interLeftVal <= replicate(NotInter 0); |
interTopLeftVal <= replicate(NotInter 0); |
end |
$display( "Trace Prediction: interReceiveNoResp %h %h", interstate, interRespCount); |
endrule |
|
|
rule interReceiveResp ( interRespCount>0 && interRespCount<7 && currMbHor<zeroExtend(picWidth) &&& interMemRespQ.first() matches tagged LoadResp .data); |
Bit#(PicAreaSz) currMbHorTemp = currMbHor+zeroExtend(interCurrMbDiff)-1; |
Bit#(PicAreaSz) currMbTemp = currMb+zeroExtend(interCurrMbDiff)-1; |
if( currMbHorTemp >= zeroExtend(picWidth) ) |
currMbHorTemp = currMbHorTemp-zeroExtend(picWidth); |
Bool noMoreResp = False; |
Bit#(2) temp2bit = 0; |
InterBlockMv unpackedData = unpack(data); |
Vector#(5,InterBlockMv) interTopValNext = interTopVal; |
Vector#(4,InterBlockMv) interTopLeftValNext = interTopLeftVal; |
if(interRespCount<5) |
begin |
temp2bit = truncate(interRespCount-1); |
interTopValNext[temp2bit] = unpackedData; |
if((interRespCount==4 || (interRespCount==1 && (interstate==InterPskip || interstate==InterP16x16 || interstate==InterP16x8))) |
&& (!((currMbHorTemp+1)<zeroExtend(picWidth)) && !(currMbHorTemp>0 && currMbTemp-firstMb>zeroExtend(picWidth)))) |
noMoreResp = True; |
end |
else if(interRespCount==5) |
begin |
if((currMbHorTemp+1)<zeroExtend(picWidth)) |
begin |
interTopValNext[4] = unpackedData; |
if(!(currMbHorTemp>0 && currMbTemp-firstMb>zeroExtend(picWidth))) |
noMoreResp = True; |
end |
else |
begin |
interTopLeftValNext[0] = unpackedData; |
noMoreResp = True; |
end |
end |
else |
begin |
interTopLeftValNext[0] = unpackedData; |
noMoreResp = True; |
end |
interMemRespQ.deq(); |
//$display( "TRACE Prediction: interReceiveResp data %h",data);/////////////////////// |
if(!noMoreResp) |
interRespCount <= interRespCount+1; |
else |
begin |
interRespCount <= 0; |
interStepCount <= 1; |
interIPStepCount <= 1; |
if(currMbHorTemp == 0) |
begin |
interLeftVal <= replicate(NotInter 0); |
interTopLeftValNext = replicate(NotInter 0); |
end |
end |
interTopVal <= interTopValNext; |
interTopLeftVal <= interTopLeftValNext; |
$display( "Trace Prediction: interReceiveResp %h %h %h", interstate, interRespCount, data); |
endrule |
|
|
rule interProcessStep ( interStepCount>0 && currMbHor<zeroExtend(picWidth) ); |
Bit#(PicAreaSz) currMbTemp = currMb+zeroExtend(interCurrMbDiff)-1; |
Bit#(2) blockHor = {interMbPartNum[0],interSubMbPartNum[0]}; |
Bit#(2) blockVer = {interMbPartNum[1],interSubMbPartNum[1]}; |
Bit#(3) partWidth = 0; |
Bit#(3) partHeight = 0; |
Bit#(3) numPart = 1; |
Bit#(3) numSubPart = 1; |
Bit#(2) subMbType = 0; |
Bool noBlockC = False; |
Bool calcmv = False; |
Bool leftmv = False; |
if(interstate==InterPskip || interstate==InterP16x16) |
begin |
partWidth = 4; |
partHeight = 4; |
numPart = 1; |
calcmv = (interMbPartNum==0 && interSubMbPartNum==0); |
leftmv = (blockHor>0); |
end |
else if(interstate==InterP16x8) |
begin |
partWidth = 4; |
partHeight = 2; |
numPart = 2; |
if(interMbPartNum==2) |
noBlockC = True; |
calcmv = (interMbPartNum[0]==0 && interSubMbPartNum==0); |
leftmv = (blockHor>0); |
end |
else if(interstate==InterP8x16) |
begin |
partWidth = 2; |
partHeight = 4; |
numPart = 2; |
calcmv = (interMbPartNum[1]==0 && interSubMbPartNum==0); |
leftmv = !(blockVer>0); |
end |
else if(interstate==InterP8x8 || interstate==InterP8x8ref0) |
begin |
numPart = 4; |
subMbType = interSubMbTypeVector[interMbPartNum]; |
numSubPart = numSubMbPart(subMbType); |
case(subMbType) |
0: |
begin |
partWidth = 2; |
partHeight = 2; |
if(interMbPartNum==3) |
noBlockC = True; |
calcmv = (interSubMbPartNum==0); |
leftmv = (blockHor[0]>0); |
end |
1: |
begin |
partWidth = 2; |
partHeight = 1; |
if(interSubMbPartNum==2) |
noBlockC = True; |
calcmv = (interSubMbPartNum[0]==0); |
leftmv = True; |
end |
2: |
begin |
partWidth = 1; |
partHeight = 2; |
calcmv = (interSubMbPartNum[1]==0); |
leftmv = False; |
end |
3: |
begin |
partWidth = 1; |
partHeight = 1; |
if(interSubMbPartNum==3) |
noBlockC = True; |
calcmv = True; |
end |
endcase |
end |
else |
$display( "ERROR Prediction: interProcessStep unexpected interstate"); |
Bit#(4) refIndex = ((interstate==InterPskip||interstate==InterP8x8ref0) ? 0 : interRefIdxVector[interMbPartNum]); |
Vector#(3,InterBlockMv) blockABC = replicate(NotInter 0); |
if( currMbTemp-firstMb==0 && blockHor==0 ) |
blockABC[0] = (NotInter 0); |
else |
blockABC[0] = interLeftVal[blockVer]; |
if( currMbTemp-firstMb<zeroExtend(picWidth) && blockVer==0 ) |
blockABC[1] = (NotInter 0); |
else |
blockABC[1] = interTopVal[blockHor]; |
blockABC[2] = interTopVal[{1'b0,blockHor}+partWidth]; |
if(noBlockC || blockABC[2]==(NotInter 0)) |
blockABC[2] = interTopLeftVal[blockVer]; |
Bit#(14) mvhorfinal = 0; |
Bit#(12) mvverfinal = 0; |
Bit#(5) interNewestMvNext = 0; |
if(calcmv)//motion vector caculation |
begin |
Vector#(3,Int#(14)) mvhorABC = replicate(0); |
Vector#(3,Int#(12)) mvverABC = replicate(0); |
Bit#(2) validCount = 0; |
Bit#(14) mvhorPred = 0; |
Bit#(12) mvverPred = 0; |
for(Integer ii=0; ii<3; ii=ii+1) |
begin |
if(blockABC[ii] matches tagged BlockMv .xdata) |
begin |
mvhorABC[ii] = unpack(xdata.mvhor); |
mvverABC[ii] = unpack(xdata.mvver); |
if(xdata.refIdx == refIndex) |
begin |
validCount = validCount+1; |
mvhorPred = xdata.mvhor; |
mvverPred = xdata.mvver; |
end |
end |
else |
begin |
mvhorABC[ii] = 0; |
mvverABC[ii] = 0; |
end |
end |
if(validCount != 1)//median |
begin |
if(mvhorABC[0]>mvhorABC[1] && mvhorABC[0]>mvhorABC[2]) |
mvhorPred = pack((mvhorABC[1]>mvhorABC[2]) ? mvhorABC[1] : mvhorABC[2]); |
else if(mvhorABC[0]<mvhorABC[1] && mvhorABC[0]<mvhorABC[2]) |
mvhorPred = pack((mvhorABC[1]<mvhorABC[2]) ? mvhorABC[1] : mvhorABC[2]); |
else |
mvhorPred = pack(mvhorABC[0]); |
if(mvverABC[0]>mvverABC[1] && mvverABC[0]>mvverABC[2]) |
mvverPred = pack((mvverABC[1]>mvverABC[2]) ? mvverABC[1] : mvverABC[2]); |
else if(mvverABC[0]<mvverABC[1] && mvverABC[0]<mvverABC[2]) |
mvverPred = pack((mvverABC[1]<mvverABC[2]) ? mvverABC[1] : mvverABC[2]); |
else |
mvverPred = pack(mvverABC[0]); |
end |
if(interstate==InterPskip) |
begin |
for(Integer ii=0; ii<2; ii=ii+1) |
begin |
if(blockABC[ii] matches tagged BlockMv .xdata) |
begin |
if(xdata.refIdx==0 && xdata.mvhor==0 && xdata.mvver==0) |
begin |
mvhorPred = 0; |
mvverPred = 0; |
end |
end |
else if(blockABC[ii] matches tagged NotInter 0) |
begin |
mvhorPred = 0; |
mvverPred = 0; |
end |
end |
end |
else if(interstate==InterP16x8 || interstate==InterP8x16) |
begin |
InterBlockMv blockCheck; |
if(interstate==InterP16x8) |
begin |
if(interMbPartNum==0) |
blockCheck = blockABC[1]; |
else |
blockCheck = blockABC[0]; |
end |
else |
begin |
if(interMbPartNum==0) |
blockCheck = blockABC[0]; |
else |
blockCheck = blockABC[2]; |
end |
if(blockCheck matches tagged BlockMv .xdata &&& xdata.refIdx==refIndex) |
begin |
mvhorPred = xdata.mvhor; |
mvverPred = xdata.mvver; |
end |
end |
mvhorfinal = mvhorPred; |
mvverfinal = mvverPred; |
if(interstate!=InterPskip) |
begin |
mvhorfinal = truncate(tpl_1(interMvDiff.first()) + signExtend(mvhorPred)); |
mvverfinal = truncate(tpl_2(interMvDiff.first()) + signExtend(mvverPred)); |
interMvDiff.deq(); |
end |
interMvFile.upd({interMbPartNum,interSubMbPartNum},tuple2(mvhorfinal,mvverfinal)); |
interNewestMvNext = zeroExtend({interMbPartNum,interSubMbPartNum})+1; |
$display( "Trace Prediction: interProcessStep %h %h %h %h %h %h %h %h %h", interstate, interStepCount, interMbPartNum, interSubMbPartNum, pack(blockABC[0]), pack(blockABC[1]), pack(blockABC[2]), mvhorPred, mvverPred); |
end |
else |
begin |
if(leftmv) |
begin |
if(blockABC[0] matches tagged BlockMv .xdata) |
begin |
mvhorfinal = unpack(xdata.mvhor); |
mvverfinal = unpack(xdata.mvver); |
end |
else |
$display( "ERROR Prediction: interProcessStep unexpected blockABC[0]"); |
end |
else |
begin |
if(blockABC[1] matches tagged BlockMv .xdata) |
begin |
mvhorfinal = unpack(xdata.mvhor); |
mvverfinal = unpack(xdata.mvver); |
end |
else |
$display( "ERROR Prediction: interProcessStep unexpected blockABC[1]"); |
end |
end |
Bit#(2) tempBShor = 0;//bS calculation |
Bit#(2) tempBSver = 0; |
if(interLeftVal[blockVer] matches tagged BlockMv .xdata) |
begin |
if(xdata.nonZeroTransCoeff == 1) |
tempBShor = 2; |
else |
begin |
if(xdata.refIdx!=refIndex || absDiffGEFour14(mvhorfinal,xdata.mvhor) || absDiffGEFour12(mvverfinal,xdata.mvver)) |
tempBShor = 1; |
else |
tempBShor = 0; |
end |
end |
else |
tempBShor = 3; |
if(interTopVal[blockHor] matches tagged BlockMv .xdata) |
begin |
if(xdata.nonZeroTransCoeff == 1) |
tempBSver = 2; |
else |
begin |
if(xdata.refIdx!=refIndex || absDiffGEFour14(mvhorfinal,xdata.mvhor) || absDiffGEFour12(mvverfinal,xdata.mvver)) |
tempBSver = 1; |
else |
tempBSver = 0; |
end |
end |
else |
tempBSver = 3; |
interBSfifo.enq(tuple2(tempBShor,tempBSver)); |
Vector#(5,InterBlockMv) interTopValNext = interTopVal;//update inter*Val |
Vector#(4,InterBlockMv) interLeftValNext = interLeftVal; |
Vector#(4,InterBlockMv) interTopLeftValNext = interTopLeftVal; |
interLeftValNext[blockVer] = (BlockMv {refIdx:refIndex,mvhor:mvhorfinal,mvver:mvverfinal,nonZeroTransCoeff:0}); |
interTopValNext[blockHor] = (BlockMv {refIdx:refIndex,mvhor:mvhorfinal,mvver:mvverfinal,nonZeroTransCoeff:0}); |
interTopLeftValNext[blockVer] = interTopVal[blockHor]; |
interTopVal <= interTopValNext; |
interLeftVal <= interLeftValNext; |
interTopLeftVal <= interTopLeftValNext; |
if(blockVer == 3) |
interOutBlockMvfifo.enq(BlockMv {refIdx:refIndex,mvhor:mvhorfinal,mvver:mvverfinal,nonZeroTransCoeff:0}); |
if(interSubMbPartNum == 3)//next step |
begin |
interSubMbPartNum <= 0; |
if(interMbPartNum == 3) |
begin |
interMbPartNum <= 0; |
interStepCount <= 0; |
interNewestMvNext = 16; |
end |
else |
interMbPartNum <= interMbPartNum+1; |
end |
else |
interSubMbPartNum <= interSubMbPartNum+1; |
if(interNewestMvNext > 0) |
interNewestMv <= interNewestMvNext; |
endrule |
|
|
rule interIPProcessStep ( interIPStepCount>0 && currMbHor<zeroExtend(picWidth) && interNewestMv>zeroExtend({interIPMbPartNum,interIPSubMbPartNum}) ); |
Bit#(PicAreaSz) currMbHorTemp = currMbHor+zeroExtend(interCurrMbDiff)-1; |
Bit#(PicHeightSz) currMbVerTemp = currMbVer; |
if( currMbHorTemp >= zeroExtend(picWidth) ) |
begin |
currMbHorTemp = currMbHorTemp-zeroExtend(picWidth); |
currMbVerTemp = currMbVerTemp+1; |
end |
Bit#(2) blockHor = {interIPMbPartNum[0],interIPSubMbPartNum[0]}; |
Bit#(2) blockVer = {interIPMbPartNum[1],interIPSubMbPartNum[1]}; |
Bit#(3) numPart = 1; |
Bit#(3) numSubPart = 1; |
Bit#(2) subMbType = 0; |
if(interstate==InterPskip || interstate==InterP16x16) |
numPart = 1; |
else if(interstate==InterP16x8) |
numPart = 2; |
else if(interstate==InterP8x16) |
numPart = 2; |
else if(interstate==InterP8x8 || interstate==InterP8x8ref0) |
begin |
numPart = 4; |
subMbType = interSubMbTypeVector[interIPMbPartNum]; |
numSubPart = numSubMbPart(subMbType); |
end |
else |
$display( "ERROR Prediction: interIPProcessStep unexpected interstate"); |
Bit#(4) refIndex = ((interstate==InterPskip||interstate==InterP8x8ref0) ? 0 : interRefIdxVector[interIPMbPartNum]); |
Bit#(PicWidthSz) currMbHorT = truncate(currMbHorTemp); |
Bit#(TAdd#(PicWidthSz,2)) horTemp = {currMbHorT,blockHor}; |
Bit#(TAdd#(PicHeightSz,4)) verTemp = {currMbVerTemp,blockVer,2'b00}; |
IPBlockType btTemp = IP16x16; |
if(interstate==InterPskip || interstate==InterP16x16) |
btTemp = IP16x16; |
else if(interstate==InterP16x8) |
btTemp = IP16x8; |
else if(interstate==InterP8x16) |
btTemp = IP8x16; |
else |
begin |
case(subMbType) |
0: btTemp = IP8x8; |
1: btTemp = IP8x4; |
2: btTemp = IP4x8; |
3: btTemp = IP4x4; |
endcase |
end |
Bit#(14) mvhorTemp = tpl_1(interMvFile.sub({interIPMbPartNum,interIPSubMbPartNum})); |
Bit#(12) mvverTemp = tpl_2(interMvFile.sub({interIPMbPartNum,interIPSubMbPartNum})); |
if(interIPStepCount == 1) |
begin |
if(!(interstate==InterP8x8 || interstate==InterP8x8ref0)) |
begin |
numPart = 4; |
Bit#(2) interIPMbPartNumTemp = interIPMbPartNum; |
if(btTemp==IP16x16) |
interIPMbPartNumTemp = 0; |
else if(btTemp==IP16x8 && interIPMbPartNumTemp[0]==1) |
interIPMbPartNumTemp = interIPMbPartNumTemp-1; |
else if(btTemp==IP8x16 && interIPMbPartNumTemp[1]==1) |
interIPMbPartNumTemp = interIPMbPartNumTemp-2; |
refIndex = ((interstate==InterPskip||interstate==InterP8x8ref0) ? 0 : interRefIdxVector[interIPMbPartNumTemp]); |
btTemp = IP8x8; |
mvhorTemp = tpl_1(interMvFile.sub({interIPMbPartNumTemp,2'b00})); |
mvverTemp = tpl_2(interMvFile.sub({interIPMbPartNumTemp,2'b00})); |
interpolator.request(IPLuma {refIdx:refIndex,hor:horTemp,ver:verTemp,mvhor:mvhorTemp,mvver:mvverTemp,bt:btTemp}); |
end |
else |
interpolator.request(IPLuma {refIdx:refIndex,hor:horTemp,ver:verTemp,mvhor:mvhorTemp,mvver:mvverTemp,bt:btTemp}); |
end |
else |
interpolator.request(IPChroma {refIdx:refIndex,uv:interIPStepCount[0],hor:horTemp,ver:truncate(verTemp>>1),mvhor:mvhorTemp,mvver:mvverTemp,bt:btTemp}); |
if(interIPSubMbPartNum >= truncate(numSubPart-1)) |
begin |
interIPSubMbPartNum <= 0; |
if(interIPMbPartNum >= truncate(numPart-1)) |
begin |
interIPMbPartNum <= 0; |
interIPStepCount <= interIPStepCount+1; |
end |
else |
begin |
if(btTemp == IP16x8) |
interIPMbPartNum <= 2; |
else |
interIPMbPartNum <= interIPMbPartNum+1; |
end |
end |
else |
begin |
if(subMbType == 1) |
interIPSubMbPartNum <= 2; |
else |
interIPSubMbPartNum <= interIPSubMbPartNum+1; |
end |
$display( "Trace Prediction: interIPProcessStep %h %h %h %h %h %h %h %h %h %h", interstate, interIPStepCount, interIPMbPartNum, interIPSubMbPartNum, refIndex, horTemp, verTemp, mvhorTemp, mvverTemp, pack(btTemp)); |
endrule |
|
|
rule interDone ( interstate!=Start && interReqCount==0 && interRespCount==0 && interStepCount==0 && interIPStepCount==0 ); |
interstate <= Start; |
//$display( "Trace Prediction: interOutputTransfer %h %h", interstate, interOutputCount); |
endrule |
|
|
rule interOutputTransfer ( True ); |
predictedfifo.enq(interpolator.first()); |
interpolator.deq(); |
//$display( "Trace Prediction: interOutputTransfer %h %h", interstate, interOutputCount); |
endrule |
|
|
|
// intra prediction rules |
|
rule intraSendReq ( intraReqCount>0 && currMbHor<zeroExtend(picWidth) && !nextoutputfifo.notEmpty() ); |
Bit#(PicWidthSz) temp2 = truncate(currMbHor); |
Bit#(TAdd#(PicWidthSz,2)) temp = 0; |
Bit#(1) noMoreReq = 0; |
if( currMb-firstMb < zeroExtend(picWidth) ) |
noMoreReq = 1; |
else |
begin |
if(intraReqCount<5) |
begin |
Bit#(2) temp3 = truncate(intraReqCount-1); |
temp = {temp2,temp3}; |
end |
else if(intraReqCount==5) |
begin |
if((currMbHor+1)<zeroExtend(picWidth) && intrastate==Intra4x4) |
temp = {(temp2+1),2'b00}; |
else if(currMbHor>0 && currMb-firstMb>zeroExtend(picWidth)) |
temp = {(temp2-1),2'b11}; |
else |
noMoreReq = 1; |
end |
else if(intraReqCount==6) |
begin |
if((currMbHor+1)<zeroExtend(picWidth) && intrastate==Intra4x4 && currMbHor>0 && currMb-firstMb>zeroExtend(picWidth)) |
temp = {(temp2-1),2'b11}; |
else |
noMoreReq = 1; |
end |
else |
noMoreReq = 1; |
end |
if(noMoreReq == 0) |
begin |
intraMemReqQ.enq(LoadReq temp); |
intraReqCount <= intraReqCount+1; |
//$display( "TRACE Prediction: intraSendReq addr %0d",temp);/////////////////////// |
end |
else |
intraReqCount <= 0; |
$display( "Trace Prediction: intraSendReq"); |
endrule |
|
|
rule intraReceiveNoResp ( intraRespCount>0 && currMbHor<zeroExtend(picWidth) && currMb-firstMb<zeroExtend(picWidth) ); |
intra4x4typeTop <= replicate(15); |
intraRespCount <= 0; |
intraStepCount <= 1; |
blockNum <= 0; |
pixelNum <= 0; |
interOutBlockMvfifo.enq(NotInter 1); |
$display( "Trace Prediction: intraReceiveNoResp"); |
endrule |
|
|
rule intraReceiveResp ( intraRespCount>0 && intraRespCount<7 && currMbHor<zeroExtend(picWidth) &&& intraMemRespQ.first() matches tagged LoadResp .data); |
Bit#(1) noMoreResp = 0; |
Bit#(2) temp2bit = 0; |
if(intraRespCount<5) |
begin |
temp2bit = truncate(intraRespCount-1); |
intra4x4typeTop <= update(intra4x4typeTop, temp2bit, data[67:64]); |
if(intraRespCount==4) |
begin |
Vector#(5,Bit#(32)) intraTopValTemp = intraTopVal; |
intraTopValTemp[3] = data[31:0]; |
intraTopValTemp[4] = {data[31:24],data[31:24],data[31:24],data[31:24]}; |
intraTopVal <= intraTopValTemp; |
if(!((currMbHor+1)<zeroExtend(picWidth) && intrastate==Intra4x4) && !(currMbHor>0 && currMb-firstMb>zeroExtend(picWidth))) |
noMoreResp = 1; |
end |
else |
intraTopVal <= update(intraTopVal, intraRespCount-1, data[31:0]); |
intraTopValChroma0 <= update(intraTopValChroma0, temp2bit, data[47:32]); |
intraTopValChroma1 <= update(intraTopValChroma1, temp2bit, data[63:48]); |
end |
else if(intraRespCount==5) |
begin |
if((currMbHor+1)<zeroExtend(picWidth) && intrastate==Intra4x4) |
begin |
if(!(data[67:64]==15 || (data[67:64]==14 && ppsconstrained_intra_pred_flag==1))) |
intraTopVal <= update(intraTopVal, 4, data[31:0]); |
if(!(currMbHor>0 && currMb-firstMb>zeroExtend(picWidth))) |
noMoreResp = 1; |
end |
else |
begin |
Bit#(40) temp2 = intraLeftVal[0]; |
intraLeftVal <= update(intraLeftVal, 0, {temp2[39:8],data[31:24]}); |
intraLeftValChroma0 <= update(intraLeftValChroma0, 0, data[47:40]); |
intraLeftValChroma1 <= update(intraLeftValChroma1, 0, data[63:56]); |
noMoreResp = 1; |
end |
end |
else |
begin |
Bit#(40) temp2 = intraLeftVal[0]; |
intraLeftVal <= update(intraLeftVal, 0, {temp2[39:8],data[31:24]}); |
intraLeftValChroma0 <= update(intraLeftValChroma0, 0, data[47:40]); |
intraLeftValChroma1 <= update(intraLeftValChroma1, 0, data[63:56]); |
noMoreResp = 1; |
end |
intraMemRespQ.deq(); |
//$display( "TRACE Prediction: intraReceiveResp data %h",data);/////////////////////// |
if(noMoreResp == 0) |
intraRespCount <= intraRespCount+1; |
else |
begin |
intraRespCount <= 0; |
intraStepCount <= 1; |
blockNum <= 0; |
pixelNum <= 0; |
interOutBlockMvfifo.enq(NotInter 1); |
end |
$display( "Trace Prediction: intraReceiveResp"); |
endrule |
|
|
rule intraPredTypeStep ( intraStepCount==1 && !nextoutputfifo.notEmpty()); |
Bit#(2) blockHor = {blockNum[2],blockNum[0]}; |
Bit#(2) blockVer = {blockNum[3],blockNum[1]}; |
Bit#(4) topType = select(intra4x4typeTop, blockHor); |
Bit#(4) leftType; |
if(currMbHor!=0 || blockNum!=0) |
leftType = select(intra4x4typeLeft, blockVer); |
else |
begin |
leftType = 15; |
intra4x4typeLeft <= replicate(15); |
end |
if(intrastate!=Intra4x4) |
begin |
intraStepCount <= intraStepCount+1; |
nextoutputfifo.enq(NonSkipMB); |
end |
else |
begin |
Bit#(1) topAvailable; |
Bit#(1) leftAvailable; |
if(topType==15 || (topType==14 && ppsconstrained_intra_pred_flag==1)) |
topAvailable = 0; |
else |
topAvailable = 1; |
if(leftType==15 || (leftType==14 && ppsconstrained_intra_pred_flag==1)) |
leftAvailable = 0; |
else |
leftAvailable = 1; |
Bit#(4) predType = 0; |
Bit#(4) remType = rem_intra4x4_pred_mode.first(); |
Bit#(4) curType = 0; |
rem_intra4x4_pred_mode.deq(); |
if(topAvailable==0 || leftAvailable==0) |
predType = 2; |
else |
begin |
Bit#(4) topType2 = topType; |
Bit#(4) leftType2 = leftType; |
if(topType>8) |
topType2 = 2; |
if(leftType>8) |
leftType2 = 2; |
if(topType2 > leftType2) |
predType = leftType2; |
else |
predType = topType2; |
end |
if(remType[3] == 1) |
curType = predType; |
else if(remType < predType) |
curType = remType; |
else |
curType = remType+1; |
cur_intra4x4_pred_mode <= curType; |
intraStepCount <= intraStepCount+1; |
if(blockNum == 15) |
nextoutputfifo.enq(Intra4x4PlusChroma); |
else |
nextoutputfifo.enq(Intra4x4); |
$display( "TRACE Prediction: intraPredTypeStep currMbHor currMbVer blockNum topType leftType predType remType curType %0d %0d %0d %0d %0d %0d %0d %0d",currMbHor,currMbVer,blockNum,topType,leftType,predType,remType,curType);////////////////// |
end |
//$display( "Trace Prediction: intraPredTypeStep"); |
endrule |
|
|
rule intraProcessStep ( intraStepCount>1 ); |
$display( "TRACE Prediction: intraProcessStep %0d %0d", blockNum, pixelNum);//////////////////// |
//$display( "TRACE Prediction: intraProcessStep intraTopVal %h %h %h %h %h",intraTopVal[4],intraTopVal[3],intraTopVal[2],intraTopVal[1],intraTopVal[0]);///////////////// |
Bit#(1) outFlag = 0; |
Bit#(4) nextIntraStepCount = intraStepCount+1; |
Bit#(2) blockHor = {blockNum[2],blockNum[0]}; |
Bit#(2) blockVer = {blockNum[3],blockNum[1]}; |
Bit#(2) pixelHor = {pixelNum[1],pixelNum[0]}; |
Bit#(2) pixelVer = {pixelNum[3],pixelNum[2]}; |
Vector#(4,Bit#(8)) predVector = intraPredVector; |
|
Bit#(4) topType = select(intra4x4typeTop, blockHor); |
Bit#(4) leftType = select(intra4x4typeLeft, blockVer); |
Bit#(1) topAvailable; |
Bit#(1) leftAvailable; |
if(topType==15 || (topType==14 && ppsconstrained_intra_pred_flag==1)) |
topAvailable = 0; |
else |
topAvailable = 1; |
if(leftType==15 || (leftType==14 && ppsconstrained_intra_pred_flag==1)) |
leftAvailable = 0; |
else |
leftAvailable = 1; |
if(blockNum==0 && pixelNum==0 && intraChromaFlag==0) |
begin |
intraChromaTopAvailable <= topAvailable; |
intraChromaLeftAvailable <= leftAvailable; |
end |
if(intrastate==Intra4x4 && intraChromaFlag==0) |
begin |
if(intraStepCount==2) |
begin |
outFlag = 1; |
Bit#(40) leftValSet = select(intraLeftVal,blockVer); |
Bit#(32) topMidValSet = select(intraTopVal,zeroExtend(blockHor)); |
Bit#(32) topRightValSet = select(intraTopVal,{1'b0,blockHor}+1); |
Bit#(72) topValSet; |
if((blockNum[3:2]==3 && blockNum[0]==1) || blockNum[1:0]==3) |
topValSet = {topMidValSet[31:24],topMidValSet[31:24],topMidValSet[31:24],topMidValSet[31:24],topMidValSet,leftValSet[7:0]}; |
else |
topValSet = {topRightValSet,topMidValSet,leftValSet[7:0]}; |
//$display( "TRACE Prediction: intraProcessStep intra4x4 %0d %0d %h %h", cur_intra4x4_pred_mode, blockNum, leftValSet, topValSet);//////////////////// |
Bit#(4) topSelect1 = 0; |
Bit#(4) topSelect2 = 0; |
Bit#(4) topSelect3 = 0; |
Bit#(3) leftSelect1 = 0; |
Bit#(3) leftSelect2 = 0; |
Bit#(3) leftSelect3 = 0; |
Bit#(10) tempVal1 = 0; |
Bit#(10) tempVal2 = 0; |
Bit#(10) tempVal3 = 0; |
case(cur_intra4x4_pred_mode) |
0://vertical |
begin |
topSelect1 = zeroExtend(pixelHor); |
Bit#(8) topVal = intra4x4SelectTop(topValSet,topSelect1); |
predVector[pixelHor] = topVal; |
end |
1://horizontal |
begin |
leftSelect1 = zeroExtend(pixelVer); |
Bit#(8) leftVal = intra4x4SelectLeft(leftValSet,leftSelect1); |
predVector[pixelHor] = leftVal; |
end |
2://dc |
begin |
Bit#(10) tempTopSum = zeroExtend(topValSet[15:8])+zeroExtend(topValSet[23:16])+zeroExtend(topValSet[31:24])+zeroExtend(topValSet[39:32]) + 2; |
Bit#(10) tempLeftSum = zeroExtend(leftValSet[15:8])+zeroExtend(leftValSet[23:16])+zeroExtend(leftValSet[31:24])+zeroExtend(leftValSet[39:32]) + 2; |
Bit#(11) tempTotalSum = zeroExtend(tempTopSum)+zeroExtend(tempLeftSum); |
Bit#(8) topSum = tempTopSum[9:2]; |
Bit#(8) leftSum = tempLeftSum[9:2]; |
Bit#(8) totalSum = tempTotalSum[10:3]; |
if(topAvailable==1 && leftAvailable==1) |
predVector[pixelHor] = totalSum; |
else if(topAvailable==1) |
predVector[pixelHor] = topSum; |
else if(leftAvailable==1) |
predVector[pixelHor] = leftSum; |
else |
predVector[pixelHor] = 8'b10000000; |
end |
3://diagonal down left |
begin |
Bit#(4) selectNum = zeroExtend(pixelHor)+zeroExtend(pixelVer); |
if(pixelHor==3 && pixelVer==3) |
begin |
topSelect1 = 6; |
topSelect2 = 7; |
topSelect3 = 7; |
end |
else |
begin |
topSelect1 = selectNum; |
topSelect2 = selectNum+1; |
topSelect3 = selectNum+2; |
end |
tempVal1 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectTop(topValSet,topSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect3)); |
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2; |
predVector[pixelHor] = predVal[9:2]; |
end |
4://diagonal down right |
begin |
if(pixelHor > pixelVer) |
begin |
topSelect3 = zeroExtend(pixelHor)-zeroExtend(pixelVer); |
topSelect2 = topSelect3-1; |
topSelect1 = topSelect3-2; |
tempVal1 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectTop(topValSet,topSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect3)); |
end |
else if(pixelHor < pixelVer) |
begin |
leftSelect3 = zeroExtend(pixelVer)-zeroExtend(pixelHor); |
leftSelect2 = leftSelect3-1; |
leftSelect1 = leftSelect3-2; |
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect3)); |
end |
else |
begin |
leftSelect1 = 0; |
leftSelect2 = -1; |
topSelect1 = 0; |
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1)); |
end |
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2; |
predVector[pixelHor] = predVal[9:2]; |
end |
5://vertical right |
begin |
Bit#(4) tempPixelHor = zeroExtend(pixelHor); |
Bit#(4) zVR = (tempPixelHor<<1)-zeroExtend(pixelVer); |
if(zVR<=6 && zVR>=0) |
begin |
topSelect3 = zeroExtend(pixelHor)-zeroExtend(pixelVer>>1); |
topSelect2 = topSelect3-1; |
if(zVR==1 || zVR==3 || zVR==5) |
topSelect1 = topSelect3-2; |
else |
topSelect1 = topSelect3; |
tempVal1 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectTop(topValSet,topSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect3)); |
end |
else if(zVR==-1) |
begin |
leftSelect1 = 0; |
leftSelect2 = -1; |
topSelect1 = 0; |
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1)); |
end |
else |
begin |
leftSelect1 = zeroExtend(pixelVer)-1; |
leftSelect2 = leftSelect1-1; |
leftSelect3 = leftSelect1-2; |
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect3)); |
end |
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2; |
predVector[pixelHor] = predVal[9:2]; |
end |
6://horizontal down |
begin |
Bit#(4) tempPixelVer = zeroExtend(pixelVer); |
Bit#(4) zHD = (tempPixelVer<<1)-zeroExtend(pixelHor); |
if(zHD<=6 && zHD>=0) |
begin |
leftSelect3 = zeroExtend(pixelVer)-zeroExtend(pixelHor>>1); |
leftSelect2 = leftSelect3-1; |
if(zHD==1 || zHD==3 || zHD==5) |
leftSelect1 = leftSelect3-2; |
else |
leftSelect1 = leftSelect3; |
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect3)); |
end |
else if(zHD==-1) |
begin |
leftSelect1 = 0; |
leftSelect2 = -1; |
topSelect1 = 0; |
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1)); |
end |
else |
begin |
topSelect1 = zeroExtend(pixelHor)-1; |
topSelect2 = topSelect1-1; |
topSelect3 = topSelect1-2; |
tempVal1 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectTop(topValSet,topSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect3)); |
end |
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2; |
predVector[pixelHor] = predVal[9:2]; |
end |
7://vertical left |
begin |
topSelect1 = zeroExtend(pixelHor)+zeroExtend(pixelVer>>1); |
topSelect2 = topSelect1+1; |
if(pixelVer==1 || pixelVer==3) |
topSelect3 = topSelect1+2; |
else |
topSelect3 = topSelect1; |
tempVal1 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectTop(topValSet,topSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect3)); |
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2; |
predVector[pixelHor] = predVal[9:2]; |
end |
8://horizontal up |
begin |
Bit#(4) tempPixelVer = zeroExtend(pixelVer); |
Bit#(4) zHU = (tempPixelVer<<1)+zeroExtend(pixelHor); |
if(zHU<=4) |
begin |
leftSelect1 = zeroExtend(pixelVer)+zeroExtend(pixelHor>>1); |
leftSelect2 = leftSelect1+1; |
if(zHU==1 || zHU==3) |
leftSelect3 = leftSelect1+2; |
else |
leftSelect3 = leftSelect1; |
end |
else |
begin |
if(zHU==5) |
leftSelect1 = 2; |
else |
leftSelect1 = 3; |
leftSelect2 = 3; |
leftSelect3 = 3; |
end |
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect3)); |
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2; |
predVector[pixelHor] = predVal[9:2]; |
end |
default: $display( "ERROR Prediction: intraProcessStep intra4x4 unknown cur_intra4x4_pred_mode"); |
endcase |
end |
else |
$display( "ERROR Prediction: intraProcessStep intra4x4 unknown intraStepCount"); |
end |
else if(intrastate==Intra16x16 && intraChromaFlag==0) |
begin |
//$display( "TRACE Prediction: intraProcessStep intra16x16 %0d %0d %0d %h", intra16x16_pred_mode, currMb, blockNum, select(intraTopVal,blockHor));///////////////// |
case(intra16x16_pred_mode) |
0://vertical |
begin |
Bit#(32) topValSet = select(intraTopVal,blockHor); |
Bit#(8) topVal = select32to8(topValSet,pixelHor); |
predVector[pixelHor] = topVal; |
outFlag = 1; |
end |
1://horizontal |
begin |
Bit#(40) leftValSet = select(intraLeftVal,blockVer); |
Bit#(8) leftVal = intra4x4SelectLeft(leftValSet,zeroExtend(pixelVer)); |
predVector[pixelHor] = leftVal; |
outFlag = 1; |
end |
2://dc |
begin |
case(intraStepCount) |
2: |
begin |
if(topAvailable == 1) |
begin |
Bit#(32) topValSet = select(intraTopVal,0); |
intraSumA <= zeroExtend(topValSet[7:0])+zeroExtend(topValSet[15:8])+zeroExtend(topValSet[23:16])+zeroExtend(topValSet[31:24]); |
end |
else |
begin |
intraSumA <= 0; |
nextIntraStepCount = 6; |
end |
end |
3: |
begin |
Bit#(32) topValSet = select(intraTopVal,1); |
intraSumA <= intraSumA+zeroExtend(topValSet[7:0])+zeroExtend(topValSet[15:8])+zeroExtend(topValSet[23:16])+zeroExtend(topValSet[31:24]); |
end |
4: |
begin |
Bit#(32) topValSet = select(intraTopVal,2); |
intraSumA <= intraSumA+zeroExtend(topValSet[7:0])+zeroExtend(topValSet[15:8])+zeroExtend(topValSet[23:16])+zeroExtend(topValSet[31:24]); |
end |
5: |
begin |
Bit#(32) topValSet = select(intraTopVal,3); |
intraSumA <= intraSumA+zeroExtend(topValSet[7:0])+zeroExtend(topValSet[15:8])+zeroExtend(topValSet[23:16])+zeroExtend(topValSet[31:24])+8; |
end |
6: |
begin |
if(leftAvailable == 1) |
begin |
Bit#(40) leftValSet = select(intraLeftVal,0); |
intraSumA <= intraSumA+zeroExtend(leftValSet[15:8])+zeroExtend(leftValSet[23:16])+zeroExtend(leftValSet[31:24])+zeroExtend(leftValSet[39:32]); |
end |
else |
nextIntraStepCount = 10; |
end |
7: |
begin |
Bit#(40) leftValSet = select(intraLeftVal,1); |
intraSumA <= intraSumA+zeroExtend(leftValSet[15:8])+zeroExtend(leftValSet[23:16])+zeroExtend(leftValSet[31:24])+zeroExtend(leftValSet[39:32]); |
end |
8: |
begin |
Bit#(40) leftValSet = select(intraLeftVal,2); |
intraSumA <= intraSumA+zeroExtend(leftValSet[15:8])+zeroExtend(leftValSet[23:16])+zeroExtend(leftValSet[31:24])+zeroExtend(leftValSet[39:32]); |
end |
9: |
begin |
Bit#(40) leftValSet = select(intraLeftVal,3); |
intraSumA <= intraSumA+zeroExtend(leftValSet[15:8])+zeroExtend(leftValSet[23:16])+zeroExtend(leftValSet[31:24])+zeroExtend(leftValSet[39:32])+8; |
end |
10: |
begin |
if(leftAvailable == 1 && topAvailable == 1) |
intraSumA <= intraSumA >> 5; |
else if(leftAvailable == 1 || topAvailable == 1) |
intraSumA <= intraSumA >> 4; |
else |
intraSumA <= 128; |
end |
11: |
begin |
predVector[pixelHor] = intraSumA[7:0]; |
outFlag = 1; |
end |
default: $display( "ERROR Prediction: intraProcessStep intra16x16 DC unknown intraStepCount"); |
endcase |
end |
3://plane |
begin |
if(intraStepCount == 2) |
begin |
Bit#(32) topValSet = select(intraTopVal,3); |
Bit#(8) topVal = select32to8(topValSet,3); |
Bit#(40) leftValSet = select(intraLeftVal,3); |
Bit#(8) leftVal = intra4x4SelectLeft(leftValSet,3); |
Bit#(13) tempVal = zeroExtend(topVal) + zeroExtend(leftVal); |
intraSumA <= tempVal << 4; |
intraSumB <= 0; |
intraSumC <= 0; |
end |
else if(intraStepCount < 11) |
begin |
Bit#(4) xyPlusOne = intraStepCount-2; |
Bit#(4) xyPlusEight = intraStepCount+5; |
Bit#(4) sixMinusXY = 9-intraStepCount; |
Bit#(32) topValSet1 = select(intraTopVal,xyPlusEight[3:2]); |
Bit#(8) topVal1 = select32to8(topValSet1,xyPlusEight[1:0]); |
Bit#(40) leftValSet1 = select(intraLeftVal,xyPlusEight[3:2]); |
Bit#(8) leftVal1 = intra4x4SelectLeft(leftValSet1,zeroExtend(xyPlusEight[1:0])); |
Bit#(32) topValSet2=0; |
Bit#(8) topVal2; |
Bit#(40) leftValSet2; |
Bit#(8) leftVal2; |
if(intraStepCount==10) |
begin |
leftValSet2 = select(intraLeftVal,0); |
leftVal2 = intra4x4SelectLeft(leftValSet2,-1); |
topVal2 = leftVal2; |
end |
else |
begin |
topValSet2 = select(intraTopVal,sixMinusXY[3:2]); |
topVal2 = select32to8(topValSet2,sixMinusXY[1:0]); |
leftValSet2 = select(intraLeftVal,sixMinusXY[3:2]); |
leftVal2 = intra4x4SelectLeft(leftValSet2,zeroExtend(sixMinusXY[1:0])); |
end |
Bit#(15) diffH = zeroExtend(topVal1) - zeroExtend(topVal2); |
Bit#(15) diffV = zeroExtend(leftVal1) - zeroExtend(leftVal2); |
intraSumB <= intraSumB + (zeroExtend(xyPlusOne) * diffH); |
intraSumC <= intraSumC + (zeroExtend(xyPlusOne) * diffV); |
end |
else if(intraStepCount == 11) |
begin |
Bit#(18) tempSumB = (5*signExtend(intraSumB)) + 32; |
Bit#(18) tempSumC = (5*signExtend(intraSumC)) + 32; |
intraSumB <= signExtend(tempSumB[17:6]); |
intraSumC <= signExtend(tempSumC[17:6]); |
end |
else if(intraStepCount == 12) |
begin |
Bit#(5) positionHor = {1'b0,blockHor,pixelHor}; |
Bit#(5) positionVer = {1'b0,blockVer,pixelVer}; |
Bit#(16) tempProductB = signExtend(intraSumB) * signExtend(positionHor-7); |
Bit#(16) tempProductC = signExtend(intraSumC) * signExtend(positionVer-7); |
Bit#(16) tempTotal = tempProductB + tempProductC + zeroExtend(intraSumA) + 16; |
if(tempTotal[15]==1) |
predVector[pixelHor] = 0; |
else if(tempTotal[14:5] > 255) |
predVector[pixelHor] = 255; |
else |
predVector[pixelHor] = tempTotal[12:5]; |
outFlag = 1; |
end |
else |
$display( "ERROR Prediction: intraProcessStep intra16x16 plane unknown intraStepCount"); |
end |
endcase |
end |
else if(intraChromaFlag==1) |
begin |
//$display( "TRACE Prediction: intraProcessStep intraChroma %0d %0d %0d %0d %0d %0d %h %h %h %h %h %h %h %h",intra_chroma_pred_mode.first(),intraChromaTopAvailable,intraChromaLeftAvailable,currMb,blockNum,pixelNum,pack(intraLeftValChroma0),pack(intraTopValChroma0),pack(intraLeftValChroma1),pack(intraTopValChroma1),intraLeftValChroma0[0],intraTopValChroma0[3][15:8],intraLeftValChroma1[0],intraTopValChroma1[3][15:8]);/////////////////// |
Vector#(9,Bit#(8)) tempLeftVec; |
Vector#(4,Bit#(16)) tempTopVec; |
if(blockNum[2] == 0) |
begin |
tempLeftVec = intraLeftValChroma0; |
tempTopVec = intraTopValChroma0; |
end |
else |
begin |
tempLeftVec = intraLeftValChroma1; |
tempTopVec = intraTopValChroma1; |
end |
case(intra_chroma_pred_mode.first()) |
0://dc |
begin |
if(intraStepCount == 2) |
begin |
Bit#(1) useTop=0; |
Bit#(1) useLeft=0; |
if(blockNum[1:0] == 0 || blockNum[1:0] == 3) |
begin |
useTop = intraChromaTopAvailable; |
useLeft = intraChromaLeftAvailable; |
end |
else if(blockNum[1:0] == 1) |
begin |
if(intraChromaTopAvailable == 1) |
useTop = 1; |
else if(intraChromaLeftAvailable == 1) |
useLeft = 1; |
end |
else if(blockNum[1:0] == 2) |
begin |
if(intraChromaLeftAvailable == 1) |
useLeft = 1; |
else if(intraChromaTopAvailable == 1) |
useTop = 1; |
end |
else |
$display( "ERROR Prediction: intraProcessStep intraChroma dc unknown blockNum"); |
Bit#(10) topSum; |
Bit#(10) leftSum; |
Bit#(11) totalSum; |
if(blockHor[0] == 0) |
topSum = zeroExtend(tempTopVec[0][15:8])+zeroExtend(tempTopVec[0][7:0])+zeroExtend(tempTopVec[1][15:8])+zeroExtend(tempTopVec[1][7:0])+2; |
else |
topSum = zeroExtend(tempTopVec[2][15:8])+zeroExtend(tempTopVec[2][7:0])+zeroExtend(tempTopVec[3][15:8])+zeroExtend(tempTopVec[3][7:0])+2; |
if(blockVer[0] == 0) |
leftSum = zeroExtend(tempLeftVec[1])+zeroExtend(tempLeftVec[2])+zeroExtend(tempLeftVec[3])+zeroExtend(tempLeftVec[4])+2; |
else |
leftSum = zeroExtend(tempLeftVec[5])+zeroExtend(tempLeftVec[6])+zeroExtend(tempLeftVec[7])+zeroExtend(tempLeftVec[8])+2; |
totalSum = zeroExtend(topSum) + zeroExtend(leftSum); |
if(useTop==1 && useLeft==1) |
intraSumA <= zeroExtend(totalSum[10:3]); |
else if(useTop==1) |
intraSumA <= zeroExtend(topSum[9:2]); |
else if(useLeft==1) |
intraSumA <= zeroExtend(leftSum[9:2]); |
else |
intraSumA <= zeroExtend(8'b10000000); |
end |
else if(intraStepCount == 3) |
begin |
predVector[pixelHor] = intraSumA[7:0]; |
outFlag = 1; |
end |
else |
$display( "ERROR Prediction: intraProcessStep intraChroma dc unknown intraStepCount"); |
end |
1://horizontal |
begin |
Bit#(4) tempLeftIdx = {1'b0,blockVer[0],pixelVer} + 1; |
predVector[pixelHor] = select(tempLeftVec,tempLeftIdx); |
outFlag = 1; |
end |
2://vertical |
begin |
Bit#(16) tempTopVal = select(tempTopVec,{blockHor[0],pixelHor[1]}); |
if(pixelHor[0] == 0) |
predVector[pixelHor] = tempTopVal[7:0]; |
else |
predVector[pixelHor] = tempTopVal[15:8]; |
outFlag = 1; |
end |
3://plane |
begin |
if(intraStepCount == 2) |
begin |
Bit#(16) topValSet = tempTopVec[3]; |
Bit#(8) topVal = topValSet[15:8]; |
Bit#(8) leftVal = tempLeftVec[8]; |
Bit#(13) tempVal = zeroExtend(topVal) + zeroExtend(leftVal); |
intraSumA <= tempVal << 4; |
intraSumB <= 0; |
intraSumC <= 0; |
end |
else if(intraStepCount < 7) |
begin |
Bit#(3) xyPlusOne = truncate(intraStepCount)-2; |
Bit#(3) xyPlusFour = truncate(intraStepCount)+1; |
Bit#(4) twoMinusXY = 5-intraStepCount; |
Bit#(16) topValSet1 = select(tempTopVec,xyPlusFour[2:1]); |
Bit#(8) topVal1 = select16to8(topValSet1,xyPlusFour[0]); |
Bit#(4) tempLeftIdx1 = {1'b0,xyPlusFour} + 1; |
Bit#(8) leftVal1 = select(tempLeftVec,tempLeftIdx1); |
|
Bit#(16) topValSet2 = select(tempTopVec,twoMinusXY[2:1]); |
Bit#(8) topVal2; |
Bit#(8) leftVal2 = select(tempLeftVec,twoMinusXY+1); |
if(intraStepCount==6) |
topVal2 = leftVal2; |
else |
topVal2 = select16to8(topValSet2,twoMinusXY[0]); |
Bit#(15) diffH = zeroExtend(topVal1) - zeroExtend(topVal2); |
Bit#(15) diffV = zeroExtend(leftVal1) - zeroExtend(leftVal2); |
intraSumB <= intraSumB + (zeroExtend(xyPlusOne) * diffH); |
intraSumC <= intraSumC + (zeroExtend(xyPlusOne) * diffV); |
Int#(15) tempDisplayH = unpack(zeroExtend(xyPlusOne) * diffH); |
Int#(15) tempDisplayV = unpack(zeroExtend(xyPlusOne) * diffV); |
//$display( "TRACE Prediction: intraProcessStep intraChroma plane partH partV %0d %0d",tempDisplayH,tempDisplayV);//////////////////// |
end |
else if(intraStepCount == 7) |
begin |
Int#(15) tempDisplayH = unpack(intraSumB); |
Int#(15) tempDisplayV = unpack(intraSumC); |
//$display( "TRACE Prediction: intraProcessStep intraChroma plane H V %0d %0d",tempDisplayH,tempDisplayV);//////////////////// |
Bit#(19) tempSumB = (34*signExtend(intraSumB)) + 32; |
Bit#(19) tempSumC = (34*signExtend(intraSumC)) + 32; |
intraSumB <= signExtend(tempSumB[18:6]); |
intraSumC <= signExtend(tempSumC[18:6]); |
end |
else if(intraStepCount == 8) |
begin |
Bit#(4) positionHor = {1'b0,blockHor[0],pixelHor}; |
Bit#(4) positionVer = {1'b0,blockVer[0],pixelVer}; |
Bit#(17) tempProductB = signExtend(intraSumB) * signExtend(positionHor-3); |
Bit#(17) tempProductC = signExtend(intraSumC) * signExtend(positionVer-3); |
Bit#(17) tempTotal = tempProductB + tempProductC + zeroExtend(intraSumA) + 16; |
if(tempTotal[16]==1) |
predVector[pixelHor] = 0; |
else if(tempTotal[15:5] > 255) |
predVector[pixelHor] = 255; |
else |
predVector[pixelHor] = tempTotal[12:5]; |
outFlag = 1; |
end |
else |
$display( "ERROR Prediction: intraProcessStep intraChroma plane unknown intraStepCount"); |
end |
endcase |
end |
else |
$display( "ERROR Prediction: intraProcessStep unknown intrastate"); |
|
intraPredVector <= predVector; |
if(pixelHor == 3) |
predictedfifo.enq(predVector); |
|
if(outFlag==1) |
begin |
pixelNum <= pixelNum+1; |
if(pixelNum == 15) |
begin |
if(intraChromaFlag==0) |
begin |
blockNum <= blockNum+1; |
if(blockNum == 15) |
begin |
intraChromaFlag <= 1; |
intraStepCount <= 2; |
end |
else if(intrastate==Intra4x4) |
intraStepCount <= 1; |
end |
else |
begin |
if(blockNum == 7) |
begin |
blockNum <= 0; |
intraChromaFlag <= 0; |
intraStepCount <= 0; |
intra_chroma_pred_mode.deq(); |
end |
else |
begin |
blockNum <= blockNum+1; |
if(intra_chroma_pred_mode.first()==0) |
intraStepCount <= 2; |
else if(blockNum==3) |
intraStepCount <= 2; |
end |
end |
end |
end |
else |
intraStepCount <= nextIntraStepCount; |
//$display( "Trace Prediction: intraProcessStep"); |
endrule |
|
|
interface Client mem_client_intra; |
interface Get request = fifoToGet(intraMemReqQ); |
interface Put response = fifoToPut(intraMemRespQ); |
endinterface |
interface Client mem_client_inter; |
interface Get request = fifoToGet(interMemReqQ); |
interface Put response = fifoToPut(interMemRespQ); |
endinterface |
interface Client mem_client_buffer = interpolator.mem_client; |
|
interface Put ioin = fifoToPut(infifo); |
interface Put ioin_InverseTrans = fifoToPut(infifo_ITB); |
interface Get ioout = fifoToGet(outfifo); |
|
|
endmodule |
|
endpackage |
/trunk/src/mkInputGen_akiyo.bsv
0,0 → 1,41
//********************************************************************** |
// Input Generator implementation |
//---------------------------------------------------------------------- |
// |
// |
|
package mkInputGen; |
|
import H264Types::*; |
import IInputGen::*; |
import RegFile::*; |
import FIFO::*; |
|
import Connectable::*; |
import GetPut::*; |
|
|
module mkInputGen( IInputGen ); |
|
RegFile#(Bit#(27), Bit#(8)) rfile <- mkRegFileLoad("akiyo_qcif1-15.hex", 0, 4867); |
|
FIFO#(InputGenOT) outfifo <- mkFIFO; |
Reg#(Bit#(27)) index <- mkReg(0); |
|
rule output_byte (index < 4868); |
//$display( "ccl0inputbyte %x", rfile.sub(index) ); |
outfifo.enq(DataByte rfile.sub(index)); |
index <= index+1; |
endrule |
|
rule end_of_file (index == 4868); |
//$finish(0); |
outfifo.enq(EndOfFile); |
endrule |
|
interface Get ioout = fifoToGet(outfifo); |
|
endmodule |
|
|
endpackage |
/trunk/src/mkInputGen_intra.bsv
0,0 → 1,41
//********************************************************************** |
// Input Generator implementation |
//---------------------------------------------------------------------- |
// |
// |
|
package mkInputGen; |
|
import H264Types::*; |
import IInputGen::*; |
import RegFile::*; |
import FIFO::*; |
|
import Connectable::*; |
import GetPut::*; |
|
|
module mkInputGen( IInputGen ); |
|
RegFile#(Bit#(27), Bit#(8)) rfile <- mkRegFileLoad("intraforeman_qcif1-5.hex", 0, 22038); |
|
FIFO#(InputGenOT) outfifo <- mkFIFO; |
Reg#(Bit#(27)) index <- mkReg(0); |
|
rule output_byte (index < 22039); |
//$display( "ccl0inputbyte %x", rfile.sub(index) ); |
outfifo.enq(DataByte rfile.sub(index)); |
index <= index+1; |
endrule |
|
rule end_of_file (index == 22039); |
//$finish(0); |
outfifo.enq(EndOfFile); |
endrule |
|
interface Get ioout = fifoToGet(outfifo); |
|
endmodule |
|
|
endpackage |
/trunk/src/mkMemED_bram.bsv
0,0 → 1,57
//********************************************************************** |
// Memory for Entropy Decoder |
//---------------------------------------------------------------------- |
// |
// |
// |
|
package mkMemED; |
|
import H264Types::*; |
import IMemED::*; |
import GetPut::*; |
import ClientServer::*; |
import FIFO::*; |
import BRAM::*; |
|
|
//---------------------------------------------------------------------- |
// Main module |
//---------------------------------------------------------------------- |
|
module mkMemED(IMemED#(index_size,data_size)) |
provisos (Bits#(MemReq#(index_size,data_size),mReqLen), |
Bits#(MemResp#(data_size),mRespLen)); |
|
//----------------------------------------------------------- |
// State |
|
BRAM#(Bit#(index_size),Bit#(data_size)) bramfile <- mkBRAM_Full(); |
|
FIFO#(MemReq#(index_size,data_size)) reqQ <- mkFIFO(); |
FIFO#(MemResp#(data_size)) respQ <- mkFIFO(); |
|
rule storing ( reqQ.first() matches tagged StoreReq { addr:.addrt,data:.datat} ); |
bramfile.write(addrt,datat); |
reqQ.deq(); |
endrule |
|
rule reading ( reqQ.first() matches tagged LoadReq .addrt ); |
bramfile.read_req(addrt); |
reqQ.deq(); |
endrule |
|
rule readresp ( True ); |
let temp <- bramfile.read_resp; |
respQ.enq( LoadResp temp ); |
endrule |
|
interface Server mem_server; |
interface Put request = fifoToPut(reqQ); |
interface Get response = fifoToGet(respQ); |
endinterface |
|
|
endmodule |
|
endpackage |
/trunk/src/mkInterpolator_3stage.bsv
0,0 → 1,726
//********************************************************************** |
// interpolator implementation |
//---------------------------------------------------------------------- |
// |
// |
|
package mkInterpolator; |
|
import H264Types::*; |
import IInterpolator::*; |
import FIFO::*; |
import Vector::*; |
|
import Connectable::*; |
import GetPut::*; |
import ClientServer::*; |
|
|
//----------------------------------------------------------- |
// Local Datatypes |
//----------------------------------------------------------- |
|
typedef union tagged |
{ |
struct { Bit#(2) xFracL; Bit#(2) yFracL; Bit#(2) offset; IPBlockType bt; } IPWLuma; |
struct { Bit#(3) xFracC; Bit#(3) yFracC; Bit#(2) offset; IPBlockType bt; } IPWChroma; |
} |
InterpolatorWT deriving(Eq,Bits); |
|
|
//----------------------------------------------------------- |
// Helper functions |
|
function Bit#(8) clip1y10to8( Bit#(10) innum ); |
if(innum[9] == 1) |
return 0; |
else if(innum[8] == 1) |
return 255; |
else |
return truncate(innum); |
endfunction |
|
function Bit#(15) interpolate8to15( Bit#(8) in0, Bit#(8) in1, Bit#(8) in2, Bit#(8) in3, Bit#(8) in4, Bit#(8) in5 ); |
return zeroExtend(in0) - 5*zeroExtend(in1) + 20*zeroExtend(in2) + 20*zeroExtend(in3) - 5*zeroExtend(in4) + zeroExtend(in5); |
endfunction |
|
function Bit#(8) interpolate15to8( Bit#(15) in0, Bit#(15) in1, Bit#(15) in2, Bit#(15) in3, Bit#(15) in4, Bit#(15) in5 ); |
Bit#(20) temp = signExtend(in0) - 5*signExtend(in1) + 20*signExtend(in2) + 20*signExtend(in3) - 5*signExtend(in4) + signExtend(in5) + 512; |
return clip1y10to8(truncate(temp>>10)); |
endfunction |
|
|
|
//----------------------------------------------------------- |
// Interpolation Module |
//----------------------------------------------------------- |
|
|
(* synthesize *) |
module mkInterpolator( Interpolator ); |
|
FIFO#(InterpolatorIT) reqfifoLoad <- mkSizedFIFO(interpolator_reqfifoLoad_size); |
FIFO#(InterpolatorWT) reqfifoWork <- mkSizedFIFO(interpolator_reqfifoWork_size); |
FIFO#(Vector#(4,Bit#(8))) outfifo <- mkFIFO; |
Reg#(Bool) endOfFrameFlag <- mkReg(False); |
FIFO#(InterpolatorLoadReq) memReqQ <- mkFIFO; |
FIFO#(InterpolatorLoadResp) memRespQ <- mkSizedFIFO(interpolator_memRespQ_size); |
|
Reg#(Bit#(PicWidthSz)) picWidth <- mkReg(maxPicWidthInMB); |
Reg#(Bit#(PicHeightSz)) picHeight <- mkReg(0); |
|
RFile1#(Bit#(5),Vector#(4,Bit#(15))) workFile <- mkRFile1Full(); |
RFile1#(Bit#(4),Vector#(4,Bit#(8))) resultFile <- mkRFile1Full(); |
|
Reg#(Bit#(1)) loadStage <- mkReg(0); |
Reg#(Bit#(2)) loadHorNum <- mkReg(0); |
Reg#(Bit#(4)) loadVerNum <- mkReg(0); |
|
Reg#(Bit#(1)) workStage <- mkReg(0); |
Reg#(Bit#(2)) workMbPart <- mkReg(0);//only for Chroma |
Reg#(Bit#(2)) workSubMbPart <- mkReg(0); |
Reg#(Bit#(2)) workHorNum <- mkReg(0); |
Reg#(Bit#(4)) workVerNum <- mkReg(0); |
Reg#(Vector#(20,Bit#(8))) workVector8 <- mkRegU; |
Reg#(Vector#(20,Bit#(15))) workVector15 <- mkRegU; |
Reg#(Vector#(4,Bit#(1))) resultReady <- mkRegU; |
Reg#(Bool) workDone <- mkReg(False); |
|
Reg#(Bit#(2)) outBlockNum <- mkReg(0); |
Reg#(Bit#(2)) outPixelNum <- mkReg(0); |
Reg#(Bool) outDone <- mkReg(False); |
|
|
rule sendEndOfFrameReq( endOfFrameFlag ); |
endOfFrameFlag <= False; |
memReqQ.enq(IPLoadEndFrame); |
endrule |
|
|
rule loadLuma( reqfifoLoad.first() matches tagged IPLuma .reqdata &&& !endOfFrameFlag ); |
Bit#(2) xfracl = reqdata.mvhor[1:0]; |
Bit#(2) yfracl = reqdata.mvver[1:0]; |
Bool twoStage = (xfracl==1||xfracl==3) && (yfracl==1||yfracl==3); |
Bool horInter = (twoStage ? loadStage==1 : xfracl!=0); |
Bool verInter = (twoStage ? loadStage==0 : yfracl!=0); |
Bit#(2) offset = reqdata.mvhor[3:2] + ((twoStage&&verInter&&xfracl==3) ? 1 : 0); |
Bit#(1) horOut = 0; |
Bit#(TAdd#(PicWidthSz,2)) horAddr; |
Bit#(TAdd#(PicHeightSz,4)) verAddr; |
Bit#(TAdd#(PicWidthSz,12)) horTemp = zeroExtend({reqdata.hor,2'b00}) + zeroExtend({loadHorNum,2'b00}) + (xfracl==3&&(yfracl==1||yfracl==3)&&loadStage==0 ? 1 : 0); |
Bit#(TAdd#(PicHeightSz,10)) verTemp = zeroExtend(reqdata.ver) + zeroExtend(loadVerNum) + (yfracl==3&&(xfracl==1||xfracl==3)&&loadStage==1 ? 1 : 0); |
Bit#(13) mvhortemp = signExtend(reqdata.mvhor[13:2])-(horInter?2:0); |
Bit#(11) mvvertemp = signExtend(reqdata.mvver[11:2])-(verInter?2:0); |
if(mvhortemp[12]==1 && zeroExtend(0-mvhortemp)>horTemp) |
begin |
horAddr = 0; |
horOut = 1; |
end |
else |
begin |
horTemp = horTemp + signExtend(mvhortemp); |
if(horTemp>=zeroExtend({picWidth,4'b0000})) |
begin |
horAddr = {picWidth-1,2'b11}; |
horOut = 1; |
end |
else |
horAddr = truncate(horTemp>>2); |
end |
if(mvvertemp[10]==1 && zeroExtend(0-mvvertemp)>verTemp) |
verAddr = 0; |
else |
begin |
verTemp = verTemp + signExtend(mvvertemp); |
if(verTemp>=zeroExtend({picHeight,4'b0000})) |
verAddr = {picHeight-1,4'b1111}; |
else |
verAddr = truncate(verTemp); |
end |
memReqQ.enq(IPLoadLuma {refIdx:reqdata.refIdx,horOutOfBounds:horOut,hor:horAddr,ver:verAddr}); |
Bool verFirst = (twoStage&&loadStage==0) || (yfracl==2&&(xfracl==1||xfracl==3)); |
Bit#(2) loadHorNumMax = (reqdata.bt==IP8x8||reqdata.bt==IP8x4 ? 1 : 0) + (horInter ? 2 : (offset==0 ? 0 : 1)); |
Bit#(4) loadVerNumMax = (reqdata.bt==IP8x8||reqdata.bt==IP4x8 ? 7 : 3) + (verInter ? 5 : 0); |
if(verFirst) |
begin |
if(loadVerNum < loadVerNumMax) |
loadVerNum <= loadVerNum+1; |
else |
begin |
loadVerNum <= 0; |
if(loadHorNum < loadHorNumMax) |
loadHorNum <= loadHorNum+1; |
else |
begin |
loadHorNum <= 0; |
if(twoStage) |
loadStage <= 1; |
else |
reqfifoLoad.deq(); |
end |
end |
end |
else |
begin |
if(loadHorNum < loadHorNumMax) |
loadHorNum <= loadHorNum+1; |
else |
begin |
loadHorNum <= 0; |
if(loadVerNum < loadVerNumMax) |
loadVerNum <= loadVerNum+1; |
else |
begin |
loadVerNum <= 0; |
loadStage <= 0; |
reqfifoLoad.deq(); |
end |
end |
end |
if(reqdata.bt==IP16x16 || reqdata.bt==IP16x8 || reqdata.bt==IP8x16) |
$display( "ERROR Interpolation: loadLuma block sizes > 8x8 not supported"); |
//$display( "Trace interpolator: loadLuma %h %h %h %h %h %h %h", xfracl, yfracl, loadHorNum, loadVerNum, reqdata.refIdx, horAddr, verAddr); |
endrule |
|
|
rule loadChroma( reqfifoLoad.first() matches tagged IPChroma .reqdata &&& !endOfFrameFlag ); |
Bit#(3) xfracc = reqdata.mvhor[2:0]; |
Bit#(3) yfracc = reqdata.mvver[2:0]; |
Bit#(2) offset = reqdata.mvhor[4:3]+{reqdata.hor[0],1'b0}; |
Bit#(1) horOut = 0; |
Bit#(TAdd#(PicWidthSz,1)) horAddr; |
Bit#(TAdd#(PicHeightSz,3)) verAddr; |
Bit#(TAdd#(PicWidthSz,11)) horTemp = zeroExtend({reqdata.hor,1'b0}) + zeroExtend({loadHorNum,2'b00}); |
Bit#(TAdd#(PicHeightSz,9)) verTemp = zeroExtend(reqdata.ver) + zeroExtend(loadVerNum); |
if(reqdata.mvhor[13]==1 && zeroExtend(0-reqdata.mvhor[13:3])>horTemp) |
begin |
horAddr = 0; |
horOut = 1; |
end |
else |
begin |
horTemp = horTemp + signExtend(reqdata.mvhor[13:3]); |
if(horTemp>=zeroExtend({picWidth,3'b000})) |
begin |
horAddr = {picWidth-1,1'b1}; |
horOut = 1; |
end |
else |
horAddr = truncate(horTemp>>2); |
end |
if(reqdata.mvver[11]==1 && zeroExtend(0-reqdata.mvver[11:3])>verTemp) |
verAddr = 0; |
else |
begin |
verTemp = verTemp + signExtend(reqdata.mvver[11:3]); |
if(verTemp>=zeroExtend({picHeight,3'b000})) |
verAddr = {picHeight-1,3'b111}; |
else |
verAddr = truncate(verTemp); |
end |
memReqQ.enq(IPLoadChroma {refIdx:reqdata.refIdx,uv:reqdata.uv,horOutOfBounds:horOut,hor:horAddr,ver:verAddr}); |
Bit#(2) loadHorNumMax = (reqdata.bt==IP4x8||reqdata.bt==IP4x4 ? (offset[1]==0||(xfracc==0&&offset!=3) ? 0 : 1) : ((reqdata.bt==IP16x16||reqdata.bt==IP16x8 ? 1 : 0) + (xfracc==0&&offset==0 ? 0 : 1))); |
Bit#(4) loadVerNumMax = (reqdata.bt==IP16x16||reqdata.bt==IP8x16 ? 7 : (reqdata.bt==IP16x8||reqdata.bt==IP8x8||reqdata.bt==IP4x8 ? 3 : 1)) + (yfracc==0 ? 0 : 1); |
if(loadHorNum < loadHorNumMax) |
loadHorNum <= loadHorNum+1; |
else |
begin |
loadHorNum <= 0; |
if(loadVerNum < loadVerNumMax) |
loadVerNum <= loadVerNum+1; |
else |
begin |
loadVerNum <= 0; |
reqfifoLoad.deq(); |
end |
end |
//$display( "Trace interpolator: loadChroma %h %h %h %h %h %h %h", xfracc, yfracc, loadHorNum, loadVerNum, reqdata.refIdx, horAddr, verAddr); |
endrule |
|
|
rule workLuma ( reqfifoWork.first() matches tagged IPWLuma .reqdata &&& !workDone ); |
let xfracl = reqdata.xFracL; |
let yfracl = reqdata.yFracL; |
let offset = reqdata.offset; |
let blockT = reqdata.bt; |
Vector#(20,Bit#(8)) workVector8Next = workVector8; |
Vector#(20,Bit#(15)) workVector15Next = workVector15; |
Vector#(4,Bit#(1)) resultReadyNext = resultReady; |
if(workStage == 0) |
begin |
if(memRespQ.first() matches tagged IPLoadResp .tempreaddata) |
begin |
memRespQ.deq(); |
Vector#(4,Bit#(8)) readdata = replicate(0); |
readdata[0] = tempreaddata[7:0]; |
readdata[1] = tempreaddata[15:8]; |
readdata[2] = tempreaddata[23:16]; |
readdata[3] = tempreaddata[31:24]; |
//$display( "Trace interpolator: workLuma stage 0 readdata %h %h %h %h %h %h", workHorNum, workVerNum, readdata[3], readdata[2], readdata[1], readdata[0] ); |
Vector#(4,Bit#(8)) tempResult8 = replicate(0); |
Vector#(4,Bit#(15)) tempResult15 = replicate(0); |
if(xfracl==0 || yfracl==0 || xfracl==2) |
begin |
if(xfracl==0)//reorder |
begin |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
Bit#(2) offsetplusii = offset+fromInteger(ii); |
if(offset <= 3-fromInteger(ii) && offset!=0) |
tempResult8[ii] = workVector8[offsetplusii]; |
else |
tempResult8[ii] = readdata[offsetplusii]; |
workVector8Next[ii] = readdata[ii]; |
end |
for(Integer ii=0; ii<4; ii=ii+1) |
tempResult15[ii] = zeroExtend({tempResult8[ii],5'b00000}); |
end |
else//horizontal interpolation |
begin |
offset = offset-2; |
for(Integer ii=0; ii<8; ii=ii+1) |
workVector8Next[ii] = workVector8[ii+4]; |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
Bit#(4) tempIndex = fromInteger(ii) + 8 - zeroExtend(offset); |
workVector8Next[tempIndex] = readdata[ii]; |
end |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
tempResult15[ii] = interpolate8to15(workVector8Next[ii],workVector8Next[ii+1],workVector8Next[ii+2],workVector8Next[ii+3],workVector8Next[ii+4],workVector8Next[ii+5]); |
tempResult8[ii] = clip1y10to8(truncate((tempResult15[ii]+16)>>5)); |
if(xfracl == 1) |
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,workVector8Next[ii+2]} + 1) >> 1); |
else if(xfracl == 3) |
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,workVector8Next[ii+3]} + 1) >> 1); |
end |
end |
Bit#(2) workHorNumOffset = (xfracl!=0 ? 2 : (reqdata.offset==0 ? 0 : 1)); |
if(workHorNum >= workHorNumOffset) |
begin |
Bit#(1) horAddr = truncate(workHorNum-workHorNumOffset); |
if(yfracl == 0)//write to resultFile |
begin |
Bit#(3) verAddr = truncate(workVerNum); |
horAddr = horAddr + ((blockT==IP4x8&&workSubMbPart==1)||(blockT==IP4x4&&workSubMbPart[0]==1) ? 1 : 0); |
verAddr = verAddr + ((blockT==IP8x4&&workSubMbPart==1)||(blockT==IP4x4&&workSubMbPart[1]==1) ? 4 : 0); |
resultFile.upd({verAddr,horAddr},tempResult8); |
if(verAddr[1:0] == 3) |
resultReadyNext[{verAddr[2],horAddr}] = 1; |
end |
else//write to workFile |
workFile.upd({workVerNum,horAddr},tempResult15); |
end |
Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + workHorNumOffset; |
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3) + (yfracl!=0 ? 5 : 0); |
if(workHorNum < workHorNumMax) |
workHorNum <= workHorNum+1; |
else |
begin |
workHorNum <= 0; |
if(workVerNum < workVerNumMax) |
workVerNum <= workVerNum+1; |
else |
begin |
workVerNum <= 0; |
if(yfracl!=0) |
workStage <= 1; |
else |
begin |
if(((blockT==IP4x8 || blockT==IP8x4) && workSubMbPart==0) || (blockT==IP4x4 && workSubMbPart<3)) |
workSubMbPart <= workSubMbPart+1; |
else |
begin |
workSubMbPart <= 0; |
workDone <= True; |
end |
reqfifoWork.deq(); |
end |
end |
end |
end |
else//vertical interpolation |
begin |
offset = offset + (xfracl==3&&(yfracl==1||yfracl==3) ? 1 : 0); |
for(Integer ii=0; ii<4; ii=ii+1) |
tempResult15[ii] = interpolate8to15(workVector8[ii],workVector8[ii+4],workVector8[ii+8],workVector8[ii+12],workVector8[ii+16],readdata[ii]); |
for(Integer ii=0; ii<16; ii=ii+1) |
workVector8Next[ii] = workVector8[ii+4]; |
for(Integer ii=0; ii<4; ii=ii+1) |
workVector8Next[ii+16] = readdata[ii]; |
Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + (yfracl==2 ? 2 : (offset==0 ? 0 : 1)); |
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3) + 5; |
Bit#(2) horAddr = workHorNum; |
Bit#(3) verAddr = truncate(workVerNum-5); |
if(workVerNum > 4) |
begin |
workFile.upd({verAddr,horAddr},tempResult15); |
//$display( "Trace interpolator: workLuma stage 0 result %h %h %h %h %h %h %h", workHorNum, workVerNum, {verAddr,horAddr}, tempResult15[3], tempResult15[2], tempResult15[1], tempResult15[0]); |
end |
if(workVerNum < workVerNumMax) |
workVerNum <= workVerNum+1; |
else |
begin |
workVerNum <= 0; |
if(workHorNum < workHorNumMax) |
workHorNum <= workHorNum+1; |
else |
begin |
workHorNum <= 0; |
workStage <= 1; |
end |
end |
end |
end |
end |
else |
begin |
Vector#(4,Bit#(8)) tempResult8 = replicate(0); |
Vector#(4,Bit#(15)) readdata = replicate(0); |
if(yfracl==0) |
$display( "ERROR Interpolation: workLuma loadStage==1 and yfracl==0"); |
if(xfracl==0 || xfracl==2)//vertical interpolation |
begin |
readdata = workFile.sub({workVerNum,workHorNum[0]}); |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
tempResult8[ii] = interpolate15to8(workVector15[ii],workVector15[ii+4],workVector15[ii+8],workVector15[ii+12],workVector15[ii+16],readdata[ii]); |
if(yfracl == 1) |
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((workVector15[ii+8]+16)>>5))} + 1) >> 1); |
else if(yfracl == 3) |
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((workVector15[ii+12]+16)>>5))} + 1) >> 1); |
end |
for(Integer ii=0; ii<16; ii=ii+1) |
workVector15Next[ii] = workVector15[ii+4]; |
for(Integer ii=0; ii<4; ii=ii+1) |
workVector15Next[ii+16] = readdata[ii]; |
Bit#(2) workHorNumMax = 1; |
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3) + 5; |
if(workVerNum > 4) |
begin |
Bit#(1) horAddr = truncate(workHorNum); |
Bit#(3) verAddr = truncate(workVerNum-5); |
horAddr = horAddr + ((blockT==IP4x8&&workSubMbPart==1)||(blockT==IP4x4&&workSubMbPart[0]==1) ? 1 : 0); |
verAddr = verAddr + ((blockT==IP8x4&&workSubMbPart==1)||(blockT==IP4x4&&workSubMbPart[1]==1) ? 4 : 0); |
resultFile.upd({verAddr,horAddr},tempResult8); |
if(verAddr[1:0] == 3) |
resultReadyNext[{verAddr[2],horAddr}] = 1; |
end |
if(workVerNum < workVerNumMax) |
workVerNum <= workVerNum+1; |
else |
begin |
workVerNum <= 0; |
if(workHorNum < workHorNumMax) |
workHorNum <= workHorNum+1; |
else |
begin |
workHorNum <= 0; |
workStage <= 0; |
if(((blockT==IP4x8 || blockT==IP8x4) && workSubMbPart==0) || (blockT==IP4x4 && workSubMbPart<3)) |
workSubMbPart <= workSubMbPart+1; |
else |
begin |
workSubMbPart <= 0; |
workDone <= True; |
end |
reqfifoWork.deq(); |
end |
end |
end |
else//horizontal interpolation |
begin |
offset = offset-2; |
if(yfracl == 2) |
begin |
readdata = workFile.sub({workVerNum[2:0],workHorNum}); |
for(Integer ii=0; ii<8; ii=ii+1) |
workVector15Next[ii] = workVector15[ii+4]; |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
Bit#(4) tempIndex = fromInteger(ii) + 8 - zeroExtend(offset); |
workVector15Next[tempIndex] = readdata[ii]; |
end |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
tempResult8[ii] = interpolate15to8(workVector15Next[ii],workVector15Next[ii+1],workVector15Next[ii+2],workVector15Next[ii+3],workVector15Next[ii+4],workVector15Next[ii+5]); |
if(xfracl == 1) |
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((workVector15Next[ii+2]+16)>>5))} + 1) >> 1); |
else if(xfracl == 3) |
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((workVector15Next[ii+3]+16)>>5))} + 1) >> 1); |
end |
end |
else |
begin |
if(memRespQ.first() matches tagged IPLoadResp .tempreaddata8) |
begin |
memRespQ.deq(); |
Vector#(4,Bit#(8)) readdata8 = replicate(0); |
readdata8[0] = tempreaddata8[7:0]; |
readdata8[1] = tempreaddata8[15:8]; |
readdata8[2] = tempreaddata8[23:16]; |
readdata8[3] = tempreaddata8[31:24]; |
for(Integer ii=0; ii<8; ii=ii+1) |
workVector8Next[ii] = workVector8[ii+4]; |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
Bit#(4) tempIndex = fromInteger(ii) + 8 - zeroExtend(offset); |
workVector8Next[tempIndex] = readdata8[ii]; |
end |
Vector#(4,Bit#(15)) tempResult15 = replicate(0); |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
tempResult15[ii] = interpolate8to15(workVector8Next[ii],workVector8Next[ii+1],workVector8Next[ii+2],workVector8Next[ii+3],workVector8Next[ii+4],workVector8Next[ii+5]); |
tempResult8[ii] = clip1y10to8(truncate((tempResult15[ii]+16)>>5)); |
end |
Bit#(2) verOffset; |
Vector#(4,Bit#(15)) verResult15 = replicate(0); |
if(xfracl == 1) |
verOffset = reqdata.offset; |
else |
verOffset = reqdata.offset+1; |
readdata = workFile.sub({workVerNum[2:0],(workHorNum-2+(verOffset==0?0:1))}); |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
Bit#(2) offsetplusii = verOffset+fromInteger(ii); |
if(verOffset <= 3-fromInteger(ii) && verOffset!=0) |
verResult15[ii] = workVector15[offsetplusii]; |
else |
verResult15[ii] = readdata[offsetplusii]; |
workVector15Next[ii] = readdata[ii]; |
end |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
Bit#(9) tempVal = zeroExtend(clip1y10to8(truncate((verResult15[ii]+16)>>5))); |
tempResult8[ii] = truncate((tempVal+zeroExtend(tempResult8[ii])+1)>>1); |
end |
end |
end |
if(workHorNum >= 2) |
begin |
Bit#(1) horAddr = truncate(workHorNum-2); |
Bit#(3) verAddr = truncate(workVerNum); |
horAddr = horAddr + ((blockT==IP4x8&&workSubMbPart==1)||(blockT==IP4x4&&workSubMbPart[0]==1) ? 1 : 0); |
verAddr = verAddr + ((blockT==IP8x4&&workSubMbPart==1)||(blockT==IP4x4&&workSubMbPart[1]==1) ? 4 : 0); |
resultFile.upd({verAddr,horAddr},tempResult8); |
if(verAddr[1:0] == 3) |
resultReadyNext[{verAddr[2],horAddr}] = 1; |
//$display( "Trace interpolator: workLuma stage 1 result %h %h %h %h %h %h %h %h", workHorNum, workVerNum, {verAddr,horAddr}, tempResult8[3], tempResult8[2], tempResult8[1], tempResult8[0], pack(resultReadyNext)); |
end |
Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + 2; |
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3); |
if(workHorNum < workHorNumMax) |
workHorNum <= workHorNum+1; |
else |
begin |
workHorNum <= 0; |
if(workVerNum < workVerNumMax) |
workVerNum <= workVerNum+1; |
else |
begin |
workVerNum <= 0; |
workStage <= 0; |
if(((blockT==IP4x8 || blockT==IP8x4) && workSubMbPart==0) || (blockT==IP4x4 && workSubMbPart<3)) |
workSubMbPart <= workSubMbPart+1; |
else |
begin |
workSubMbPart <= 0; |
workDone <= True; |
end |
reqfifoWork.deq(); |
end |
end |
end |
end |
workVector8 <= workVector8Next; |
workVector15 <= workVector15Next; |
resultReady <= resultReadyNext; |
//$display( "Trace interpolator: workLuma %h %h %h %h %h %h", xfracl, yfracl, workHorNum, workVerNum, offset, workStage); |
endrule |
|
|
rule workChroma ( reqfifoWork.first() matches tagged IPWChroma .reqdata &&& !workDone ); |
Bit#(4) xfracc = zeroExtend(reqdata.xFracC); |
Bit#(4) yfracc = zeroExtend(reqdata.yFracC); |
let offset = reqdata.offset; |
let blockT = reqdata.bt; |
Vector#(20,Bit#(8)) workVector8Next = workVector8; |
Vector#(4,Bit#(1)) resultReadyNext = resultReady; |
if(memRespQ.first() matches tagged IPLoadResp .tempreaddata) |
begin |
memRespQ.deq(); |
Vector#(4,Bit#(8)) readdata = replicate(0); |
readdata[0] = tempreaddata[7:0]; |
readdata[1] = tempreaddata[15:8]; |
readdata[2] = tempreaddata[23:16]; |
readdata[3] = tempreaddata[31:24]; |
Vector#(5,Bit#(8)) tempWork8 = replicate(0); |
Vector#(5,Bit#(8)) tempPrev8 = replicate(0); |
Vector#(4,Bit#(8)) tempResult8 = replicate(0); |
Bool resultReadyFlag = False; |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
Bit#(2) offsetplusii = offset+fromInteger(ii); |
if(offset <= 3-fromInteger(ii) && !((blockT==IP4x8||blockT==IP4x4)&&(offset[1]==0||(xfracc==0&&offset!=3))) && !(xfracc==0&&offset==0)) |
tempWork8[ii] = workVector8[offsetplusii]; |
else |
tempWork8[ii] = readdata[offsetplusii]; |
workVector8Next[ii] = readdata[ii]; |
end |
tempWork8[4] = readdata[offset]; |
if((blockT==IP16x8 || blockT==IP16x16) && workHorNum==(xfracc==0&&offset==0 ? 1 : 2)) |
begin |
for(Integer ii=0; ii<5; ii=ii+1) |
begin |
tempPrev8[ii] = workVector8[ii+9]; |
workVector8Next[ii+9] = tempWork8[ii]; |
end |
end |
else |
begin |
for(Integer ii=0; ii<5; ii=ii+1) |
tempPrev8[ii] = workVector8[ii+4]; |
if(workHorNum==(xfracc==0&&offset==0 ? 0 : 1) || ((blockT==IP4x8||blockT==IP4x4)&&(offset[1]==0||(xfracc==0&&offset!=3)))) |
begin |
for(Integer ii=0; ii<5; ii=ii+1) |
workVector8Next[ii+4] = tempWork8[ii]; |
end |
end |
if(yfracc==0) |
begin |
for(Integer ii=0; ii<5; ii=ii+1) |
tempPrev8[ii] = tempWork8[ii]; |
end |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
Bit#(14) tempVal = zeroExtend((8-xfracc))*zeroExtend((8-yfracc))*zeroExtend(tempPrev8[ii]); |
tempVal = tempVal + zeroExtend(xfracc)*zeroExtend((8-yfracc))*zeroExtend(tempPrev8[ii+1]); |
tempVal = tempVal + zeroExtend((8-xfracc))*zeroExtend(yfracc)*zeroExtend(tempWork8[ii]); |
tempVal = tempVal + zeroExtend(xfracc)*zeroExtend(yfracc)*zeroExtend(tempWork8[ii+1]); |
tempResult8[ii] = truncate((tempVal+32)>>6); |
end |
if(workVerNum > 0 || yfracc==0) |
begin |
if(blockT==IP4x8 || blockT==IP4x4) |
begin |
Bit#(5) tempIndex = 10 + zeroExtend(workVerNum<<1); |
workVector8Next[tempIndex] = tempResult8[0]; |
workVector8Next[tempIndex+1] = tempResult8[1]; |
tempResult8[2] = tempResult8[0]; |
tempResult8[3] = tempResult8[1]; |
tempResult8[0] = workVector8[tempIndex]; |
tempResult8[1] = workVector8[tempIndex+1]; |
if((workHorNum>0 || offset[1]==0) && workSubMbPart[0]==1) |
resultReadyFlag = True; |
end |
else |
begin |
if(workHorNum>0 || (xfracc==0 && offset==0)) |
resultReadyFlag = True; |
end |
end |
if(resultReadyFlag) |
begin |
Bit#(1) horAddr = ((blockT==IP4x8 || blockT==IP4x4) ? 0 : truncate(((xfracc==0 && offset==0) ? workHorNum : workHorNum-1))); |
Bit#(3) verAddr = truncate((yfracc==0 ? workVerNum : workVerNum-1)); |
horAddr = horAddr + ((blockT==IP16x8||blockT==IP16x16) ? 0 : workMbPart[0]); |
verAddr = verAddr + ((blockT==IP8x16||blockT==IP16x16) ? 0 : ((blockT==IP16x8) ? {workMbPart[0],2'b00} : {workMbPart[1],2'b00})); |
verAddr = verAddr + ((blockT==IP8x4&&workSubMbPart==1)||(blockT==IP4x4&&workSubMbPart[1]==1) ? 2 : 0); |
resultFile.upd({verAddr,horAddr},tempResult8); |
if(verAddr[1:0] == 3) |
resultReadyNext[{verAddr[2],horAddr}] = 1; |
end |
Bit#(2) workHorNumMax = (blockT==IP4x8||blockT==IP4x4 ? (offset[1]==0||(xfracc==0&&offset!=3) ? 0 : 1) : ((blockT==IP16x16||blockT==IP16x8 ? 1 : 0) + (xfracc==0&&offset==0 ? 0 : 1))); |
Bit#(4) workVerNumMax = (blockT==IP16x16||blockT==IP8x16 ? 7 : (blockT==IP16x8||blockT==IP8x8||blockT==IP4x8 ? 3 : 1)) + (yfracc==0 ? 0 : 1); |
if(workHorNum < workHorNumMax) |
workHorNum <= workHorNum+1; |
else |
begin |
workHorNum <= 0; |
if(workVerNum < workVerNumMax) |
workVerNum <= workVerNum+1; |
else |
begin |
workVerNum <= 0; |
if(((blockT==IP4x8 || blockT==IP8x4) && workSubMbPart==0) || (blockT==IP4x4 && workSubMbPart<3)) |
workSubMbPart <= workSubMbPart+1; |
else |
begin |
workSubMbPart <= 0; |
if(((blockT==IP16x8 || blockT==IP8x16) && workMbPart==0) || (!(blockT==IP16x8 || blockT==IP8x16 || blockT==IP16x16) && workMbPart<3)) |
workMbPart <= workMbPart+1; |
else |
begin |
workMbPart <= 0; |
workDone <= True; |
end |
end |
reqfifoWork.deq(); |
end |
end |
end |
workVector8 <= workVector8Next; |
resultReady <= resultReadyNext; |
//$display( "Trace interpolator: workChroma %h %h %h %h %h", xfracc, yfracc, workHorNum, workVerNum, offset); |
endrule |
|
|
rule outputing( !outDone && resultReady[outBlockNum]==1 ); |
outfifo.enq(resultFile.sub({outBlockNum[1],outPixelNum,outBlockNum[0]})); |
outPixelNum <= outPixelNum+1; |
if(outPixelNum == 3) |
begin |
outBlockNum <= outBlockNum+1; |
if(outBlockNum == 3) |
outDone <= True; |
end |
//$display( "Trace interpolator: outputing %h %h %h %h %h %h", outBlockNum, outPixelNum, tempVector[3], tempVector[2], tempVector[1], tempVector[0]); |
endrule |
|
|
rule switching( outDone && workDone ); |
outDone <= False; |
workDone <= False; |
resultReady <= replicate(0); |
//$display( "Trace interpolator: switching %h %h", outBlockNum, outPixelNum); |
endrule |
|
|
method Action setPicWidth( Bit#(PicWidthSz) newPicWidth ); |
picWidth <= newPicWidth; |
endmethod |
|
method Action setPicHeight( Bit#(PicHeightSz) newPicHeight ); |
picHeight <= newPicHeight; |
endmethod |
|
method Action request( InterpolatorIT inputdata ); |
reqfifoLoad.enq(inputdata); |
if(inputdata matches tagged IPLuma .indata) |
reqfifoWork.enq(IPWLuma {xFracL:indata.mvhor[1:0],yFracL:indata.mvver[1:0],offset:indata.mvhor[3:2],bt:indata.bt}); |
else if(inputdata matches tagged IPChroma .indata) |
reqfifoWork.enq(IPWChroma {xFracC:indata.mvhor[2:0],yFracC:indata.mvver[2:0],offset:indata.mvhor[4:3]+{indata.hor[0],1'b0},bt:indata.bt}); |
endmethod |
|
method Vector#(4,Bit#(8)) first(); |
return outfifo.first(); |
endmethod |
|
method Action deq(); |
outfifo.deq(); |
endmethod |
|
method Action endOfFrame(); |
endOfFrameFlag <= True; |
endmethod |
|
interface Client mem_client; |
interface Get request = fifoToGet(memReqQ); |
interface Put response = fifoToPut(memRespQ); |
endinterface |
|
|
endmodule |
|
|
endpackage |
/trunk/src/INalUnwrap.bsv
0,0 → 1,22
//********************************************************************** |
// Interface for NAL unwrapper |
//---------------------------------------------------------------------- |
// |
// |
// |
|
package INalUnwrap; |
|
import H264Types::*; |
import GetPut::*; |
|
interface INalUnwrap; |
|
// Interface for inter-module io |
interface Put#(InputGenOT) ioin; |
interface Get#(NalUnwrapOT) ioout; |
|
endinterface |
|
endpackage |
|
/trunk/src/BRAM.bsv
0,0 → 1,214
import FIFO::*; |
|
//One RAM. |
interface BRAM#(type idx_type, type data_type); |
|
method Action read_req(idx_type idx); |
|
method ActionValue#(data_type) read_resp(); |
|
method Action write(idx_type idx, data_type data); |
|
endinterface |
|
|
//Two RAMs. |
interface BRAM_2#(type idx_type, type data_type); |
|
method Action read_req1(idx_type idx); |
method Action read_req2(idx_type idx); |
|
method ActionValue#(data_type) read_resp1(); |
method ActionValue#(data_type) read_resp2(); |
|
method Action write(idx_type idx, data_type data); |
|
endinterface |
|
//Three RAMs. |
interface BRAM_3#(type idx_type, type data_type); |
|
method Action read_req1(idx_type idx); |
method Action read_req2(idx_type idx); |
method Action read_req3(idx_type idx); |
|
method ActionValue#(data_type) read_resp1(); |
method ActionValue#(data_type) read_resp2(); |
method ActionValue#(data_type) read_resp3(); |
|
method Action write(idx_type idx, data_type data); |
|
endinterface |
|
|
module mkBRAM#(Integer low, Integer high) |
//interface: |
(BRAM#(idx_type, data_type)) |
provisos |
(Bits#(idx_type, idx), |
Bits#(data_type, data), |
Literal#(idx_type)); |
|
BRAM#(idx_type, data_type) m <- (valueof(data) == 0) ? |
mkBRAM_Zero() : |
mkBRAM_NonZero(low, high); |
|
return m; |
endmodule |
|
import "BVI" BRAM = module mkBRAM_NonZero#(Integer low, Integer high) |
//interface: |
(BRAM#(idx_type, data_type)) |
provisos |
(Bits#(idx_type, idx), |
Bits#(data_type, data), |
Literal#(idx_type)); |
|
default_clock clk(CLK); |
|
parameter addr_width = valueof(idx); |
parameter data_width = valueof(data); |
parameter lo = low; |
parameter hi = high; |
|
method DOUT read_resp() ready(DOUT_RDY) enable(DOUT_EN); |
|
method read_req(RD_ADDR) ready(RD_RDY) enable(RD_EN); |
method write(WR_ADDR, WR_VAL) enable(WR_EN); |
|
schedule read_req CF (read_resp, write); |
schedule read_resp CF (read_req, write); |
schedule write CF (read_req, read_resp); |
|
schedule read_req C read_req; |
schedule read_resp C read_resp; |
schedule write C write; |
|
endmodule |
|
module mkBRAM_Zero |
//interface: |
(BRAM#(idx_type, data_type)) |
provisos |
(Bits#(idx_type, idx), |
Bits#(data_type, data), |
Literal#(idx_type)); |
|
FIFO#(data_type) q <- mkFIFO(); |
|
method Action read_req(idx_type i); |
q.enq(?); |
endmethod |
|
method Action write(idx_type i, data_type d); |
noAction; |
endmethod |
|
method ActionValue#(data_type) read_resp(); |
q.deq(); |
return q.first(); |
endmethod |
|
endmodule |
|
module mkBRAM_Full |
//interface: |
(BRAM#(idx_type, data_type)) |
provisos |
(Bits#(idx_type, idx), |
Bits#(data_type, data), |
Literal#(idx_type)); |
|
|
BRAM#(idx_type, data_type) br <- mkBRAM(0, valueof(TExp#(idx)) - 1); |
|
return br; |
|
endmodule |
module mkBRAM_2#(Integer low, Integer high) |
//interface: |
(BRAM_2#(idx_type, data_type)) |
provisos |
(Bits#(idx_type, idx), |
Bits#(data_type, data), |
Literal#(idx_type)); |
|
BRAM#(idx_type, data_type) br1 <- mkBRAM(low, high); |
BRAM#(idx_type, data_type) br2 <- mkBRAM(low, high); |
|
method read_req1(idx) = br1.read_req(idx); |
method read_req2(idx) = br2.read_req(idx); |
|
method read_resp1() = br1.read_resp(); |
method read_resp2() = br2.read_resp(); |
|
method Action write(idx_type idx, data_type data); |
|
br1.write(idx, data); |
br2.write(idx, data); |
|
endmethod |
|
endmodule |
|
module mkBRAM_2_Full |
//interface: |
(BRAM_2#(idx_type, data_type)) |
provisos |
(Bits#(idx_type, idx), |
Bits#(data_type, data), |
Literal#(idx_type)); |
|
|
BRAM_2#(idx_type, data_type) br <- mkBRAM_2(0, valueof(TExp#(idx)) - 1); |
|
return br; |
|
endmodule |
|
module mkBRAM_3#(Integer low, Integer high) |
//interface: |
(BRAM_3#(idx_type, data_type)) |
provisos |
(Bits#(idx_type, idx), |
Bits#(data_type, data), |
Literal#(idx_type)); |
|
BRAM#(idx_type, data_type) br1 <- mkBRAM(low, high); |
BRAM#(idx_type, data_type) br2 <- mkBRAM(low, high); |
BRAM#(idx_type, data_type) br3 <- mkBRAM(low, high); |
|
method read_req1(idx) = br1.read_req(idx); |
method read_req2(idx) = br2.read_req(idx); |
method read_req3(idx) = br3.read_req(idx); |
|
method read_resp1() = br1.read_resp(); |
method read_resp2() = br2.read_resp(); |
method read_resp3() = br3.read_resp(); |
|
method Action write(idx_type idx, data_type data); |
|
br1.write(idx, data); |
br2.write(idx, data); |
br3.write(idx, data); |
|
endmethod |
|
endmodule |
|
|
module mkBRAM_3_Full |
//interface: |
(BRAM_3#(idx_type, data_type)) |
provisos |
(Bits#(idx_type, idx), |
Bits#(data_type, data), |
Literal#(idx_type)); |
|
|
BRAM_3#(idx_type, data_type) br <- mkBRAM_3(0, valueof(TExp#(idx)) - 1); |
|
return br; |
|
endmodule |
|
/trunk/src/mkDeblockFilter_orig.bsv
0,0 → 1,780
//********************************************************************** |
// Deblocking Filter |
//---------------------------------------------------------------------- |
// |
// |
|
package mkDeblockFilter; |
|
import H264Types::*; |
|
import IDeblockFilter::*; |
import FIFO::*; |
import Vector::*; |
|
import Connectable::*; |
import GetPut::*; |
import ClientServer::*; |
|
|
|
|
//----------------------------------------------------------- |
// Local Datatypes |
//----------------------------------------------------------- |
|
|
typedef union tagged |
{ |
void Passing; //not working on anything in particular |
void Initialize; |
void Horizontal; |
void Vertical; |
void Cleanup; |
} |
Process deriving(Eq,Bits); |
|
|
|
//----------------------------------------------------------- |
// Helper functions |
|
|
function Bit#(8) absdiff8(Bit#(8) in0, Bit#(8) in1); |
return (in1>=in0 ? in1-in0 : in0-in1); |
endfunction |
|
|
function Bool filter_test(Bit#(32) in_pixels, Bit#(8) alpha, Bit#(5) beta); |
Bit#(8) p1 = in_pixels[7:0]; |
Bit#(8) p0 = in_pixels[15:8]; |
Bit#(8) q0 = in_pixels[23:16]; |
Bit#(8) q1 = in_pixels[31:24]; |
return((absdiff8(p0,q0) < alpha) && |
(absdiff8(p0,p1) < zeroExtend(beta)) && |
(absdiff8(q0,q1) < zeroExtend(beta))); |
endfunction |
|
|
function Bit#(6) clip3symmetric9to6(Bit#(9) val, Bit#(5) bound); |
Int#(9) intval = unpack(val); |
Int#(6) intbound = unpack({1'b0,bound}); |
Int#(6) intout = (intval<signExtend(-intbound) ? -intbound : (intval>signExtend(intbound) ? intbound : truncate(intval))); |
return pack(intout); |
endfunction |
|
|
function Bit#(64) filter_input(Bit#(64) in_pixels, Bool chroma_flag, Bit#(3) bs, Bit#(8) alpha, Bit#(5) beta, Vector#(3,Bit#(5)) tc0_vector); |
Bit#(8) p[4]; |
Bit#(8) q[4]; |
p[3] = in_pixels[7:0]; |
p[2] = in_pixels[15:8]; |
p[1] = in_pixels[23:16]; |
p[0] = in_pixels[31:24]; |
q[0] = in_pixels[39:32]; |
q[1] = in_pixels[47:40]; |
q[2] = in_pixels[55:48]; |
q[3] = in_pixels[63:56]; |
Bit#(8) p_out[4]; |
Bit#(8) q_out[4]; |
Bool a_p_test = absdiff8(p[2],p[0]) < zeroExtend(beta); |
Bool a_q_test = absdiff8(q[2],q[0]) < zeroExtend(beta); |
Bit#(9) p0q0 = zeroExtend(p[0])+zeroExtend(q[0]); |
if (bs == 4) |
begin |
Bool small_gap_test = absdiff8(p[0],q[0]) < (alpha >> 2)+2; |
Bit#(11) p_outtemp[3]; |
Bit#(11) q_outtemp[3]; |
if (!chroma_flag && a_p_test && small_gap_test) |
begin |
Bit#(11) sum = zeroExtend(p[1])+zeroExtend(p0q0); |
p_outtemp[0] = (zeroExtend(p[2]) + (sum<<1) + zeroExtend(q[1]) + 4) >> 3; |
p_outtemp[1] = (zeroExtend(p[2]) + sum + 2) >> 2; |
p_outtemp[2] = (((zeroExtend(p[3])+zeroExtend(p[2]))<<1) + zeroExtend(p[2]) + sum + 4) >> 3; |
end |
else |
begin |
p_outtemp[0] = ((zeroExtend(p[1])<<1) + zeroExtend(p[0]) + zeroExtend(q[1]) + 2) >> 2; |
p_outtemp[1] = zeroExtend(p[1]); |
p_outtemp[2] = zeroExtend(p[2]); |
end |
if (!chroma_flag && a_q_test && small_gap_test) |
begin |
Bit#(11) sum = zeroExtend(q[1])+zeroExtend(p0q0); |
q_outtemp[0] = (zeroExtend(p[1]) + (sum<<1) + zeroExtend(q[2]) + 4) >> 3; |
q_outtemp[1] = (zeroExtend(q[2]) + sum + 2) >> 2; |
q_outtemp[2] = (((zeroExtend(q[3])+zeroExtend(q[2]))<<1) + zeroExtend(q[2]) + sum + 4) >> 3; |
end |
else |
begin |
q_outtemp[0] = ((zeroExtend(q[1])<<1) + zeroExtend(q[0]) + zeroExtend(p[1]) + 2) >> 2; |
q_outtemp[1] = zeroExtend(q[1]); |
q_outtemp[2] = zeroExtend(q[2]); |
end |
p_out[0] = truncate(p_outtemp[0]); |
p_out[1] = truncate(p_outtemp[1]); |
p_out[2] = truncate(p_outtemp[2]); |
q_out[0] = truncate(q_outtemp[0]); |
q_out[1] = truncate(q_outtemp[1]); |
q_out[2] = truncate(q_outtemp[2]); |
end |
else if(bs > 0) |
begin |
Bit#(5) t_c0 = tc0_vector[bs-1]; |
Bit#(5) t_c = chroma_flag ? t_c0+1 : t_c0 + (a_p_test ? 1:0) + (a_q_test ? 1:0); |
Bit#(12) deltatemp = (((zeroExtend(q[0])-zeroExtend(p[0]))<<2)+zeroExtend(p[1])-zeroExtend(q[1])+4); |
Bit#(6) delta = clip3symmetric9to6(deltatemp[11:3],t_c); |
|
Bit#(10) p_out0temp = zeroExtend(p[0]) + signExtend(delta); |
p_out[0] = (p_out0temp[9]==1 ? 0 : (p_out0temp[8]==1 ? 255 : p_out0temp[7:0])); |
Bit#(10) q_out0temp = zeroExtend(q[0]) - signExtend(delta); |
q_out[0] = (q_out0temp[9]==1 ? 0 : (q_out0temp[8]==1 ? 255 : q_out0temp[7:0])); |
|
Bit#(9) p0q0PLUS1 = p0q0+1; |
Bit#(8) p0q0_av = p0q0PLUS1[8:1]; |
if (!chroma_flag && a_p_test) |
begin |
Bit#(10) p_out1temp = zeroExtend(p[2]) + zeroExtend(p0q0_av) - (zeroExtend(p[1])<<1); |
p_out[1] = p[1]+signExtend(clip3symmetric9to6(p_out1temp[9:1],t_c0)); |
end |
else |
p_out[1] = p[1]; |
|
if (!chroma_flag && a_q_test) |
begin |
Bit#(10) q_out1temp = zeroExtend(q[2]) + zeroExtend(p0q0_av) - (zeroExtend(q[1])<<1); |
q_out[1] = q[1]+signExtend(clip3symmetric9to6(q_out1temp[9:1],t_c0)); |
end |
else |
q_out[1] = q[1]; |
|
p_out[2] = p[2]; |
q_out[2] = q[2]; |
end |
else |
begin |
p_out[0] = p[0]; |
q_out[0] = q[0]; |
p_out[1] = p[1]; |
q_out[1] = q[1]; |
p_out[2] = p[2]; |
q_out[2] = q[2]; |
end |
p_out[3] = p[3]; |
q_out[3] = q[3]; |
return({q_out[3], q_out[2], q_out[1], q_out[0], p_out[0], p_out[1], p_out[2], p_out[3]}); |
endfunction |
|
|
|
//----------------------------------------------------------- |
// Deblocking Filter Module |
//----------------------------------------------------------- |
|
|
(* synthesize *) |
module mkDeblockFilter( IDeblockFilter ); |
|
FIFO#(EntropyDecOT) infifo <- mkSizedFIFO(deblockFilter_infifo_size); |
FIFO#(DeblockFilterOT) outfifo <- mkFIFO(); |
|
FIFO#(MemReq#(TAdd#(PicWidthSz,5),32)) dataMemReqQ <- mkFIFO; |
FIFO#(MemReq#(PicWidthSz,13)) parameterMemReqQ <- mkFIFO; |
FIFO#(MemResp#(32)) dataMemRespQ <- mkFIFO; |
FIFO#(MemResp#(13)) parameterMemRespQ <- mkFIFO; |
|
Reg#(Process) process <- mkReg(Passing); |
Reg#(Bit#(1)) chromaFlag <- mkReg(0); |
Reg#(Bit#(5)) dataReqCount <- mkReg(0); |
Reg#(Bit#(5)) dataRespCount <- mkReg(0); |
Reg#(Bit#(4)) blockNum <- mkReg(0); |
Reg#(Bit#(4)) pixelNum <- mkReg(0); |
|
Reg#(Bool) filterTopMbEdgeFlag <- mkReg(False); |
Reg#(Bool) filterLeftMbEdgeFlag <- mkReg(False); |
Reg#(Bool) filterInternalEdgesFlag <- mkReg(False); |
|
Reg#(Bit#(PicWidthSz)) picWidth <- mkReg(maxPicWidthInMB); |
Reg#(Bit#(PicHeightSz)) picHeight <- mkReg(0); |
Reg#(Bit#(PicAreaSz)) firstMb <- mkReg(0); |
Reg#(Bit#(PicAreaSz)) currMb <- mkReg(0); |
Reg#(Bit#(PicAreaSz)) currMbHor <- mkReg(0);//horizontal position of currMb |
Reg#(Bit#(PicHeightSz)) currMbVer <- mkReg(0);//vertical position of currMb |
|
Reg#(Bit#(2)) disable_deblocking_filter_idc <- mkReg(0); |
Reg#(Bit#(5)) slice_alpha_c0_offset <- mkReg(0); |
Reg#(Bit#(5)) slice_beta_offset <- mkReg(0); |
|
Reg#(Bit#(6)) curr_qpy <- mkReg(0); |
Reg#(Bit#(6)) left_qpy <- mkReg(0); |
Reg#(Bit#(6)) top_qpy <- mkReg(0); |
Reg#(Bit#(6)) curr_qpc <- mkReg(0); |
Reg#(Bit#(6)) left_qpc <- mkReg(0); |
Reg#(Bit#(6)) top_qpc <- mkReg(0); |
Reg#(Bit#(1)) curr_intra <- mkReg(0); |
Reg#(Bit#(1)) left_intra <- mkReg(0); |
Reg#(Bit#(1)) top_intra <- mkReg(0); |
|
Reg#(Bit#(8)) alphaMbEdge <- mkReg(0); |
Reg#(Bit#(8)) alphaInternal <- mkReg(0); |
Reg#(Bit#(5)) betaMbEdge <- mkReg(0); |
Reg#(Bit#(5)) betaInternal <- mkReg(0); |
Reg#(Vector#(3,Bit#(5))) tc0MbEdge <- mkRegU(); |
Reg#(Vector#(3,Bit#(5))) tc0Internal <- mkRegU(); |
|
Bit#(8) alpha_table[52] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
0, 0, 0, 0, 0, 0, 4, 4, 5, 6, |
7, 8, 9, 10, 12, 13, 15, 17, 20, 22, |
25, 28, 32, 36, 40, 45, 50, 56, 63, 71, |
80, 90,101,113,127,144,162,182,203,226, |
255,255}; |
Bit#(5) beta_table[52] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
0, 0, 0, 0, 0, 0, 2, 2, 2, 3, |
3, 3, 3, 4, 4, 4, 6, 6, 7, 7, |
8, 8, 9, 9, 10, 10, 11, 11, 12, 12, |
13, 13, 14, 14, 15, 15, 16, 16, 17, 17, |
18, 18}; |
Bit#(5) tc0_table[52][3] = {{ 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, |
{ 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, |
{ 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 1 }, |
{ 0, 0, 1 }, { 0, 0, 1 }, { 0, 0, 1 }, { 0, 1, 1 }, { 0, 1, 1 }, { 1, 1, 1 }, |
{ 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 2 }, { 1, 1, 2 }, { 1, 1, 2 }, |
{ 1, 1, 2 }, { 1, 2, 3 }, { 1, 2, 3 }, { 2, 2, 3 }, { 2, 2, 4 }, { 2, 3, 4 }, |
{ 2, 3, 4 }, { 3, 3, 5 }, { 3, 4, 6 }, { 3, 4, 6 }, { 4, 5, 7 }, { 4, 5, 8 }, |
{ 4, 6, 9 }, { 5, 7,10 }, { 6, 8,11 }, { 6, 8,13 }, { 7,10,14 }, { 8,11,16 }, |
{ 9,12,18 }, {10,13,20 }, {11,15,23 }, {13,17,25 }}; |
|
Reg#(Vector#(64,Bit#(32))) workVector <- mkRegU(); |
Reg#(Vector#(96,Bit#(32))) leftVector <- mkRegU(); |
Reg#(Vector#(16,Bit#(32))) topVector <- mkRegU(); |
|
Reg#(Bool) startLastOutput <- mkReg(False); |
Reg#(Bool) outputingFinished <- mkReg(False); |
Reg#(Bit#(2)) colNum <- mkReg(0); |
Reg#(Bit#(2)) rowNum <- mkReg(0); |
|
RFile1#(Bit#(4),Tuple2#(Bit#(3),Bit#(3))) bSfile <- mkRFile1Full(); |
|
|
//----------------------------------------------------------- |
// Rules |
|
rule passing ( process matches Passing ); |
case (infifo.first()) matches |
tagged NewUnit . xdata : |
begin |
infifo.deq(); |
outfifo.enq(EDOT infifo.first()); |
$display("ccl5newunit"); |
$display("ccl5rbspbyte %h", xdata); |
end |
tagged SPSpic_width_in_mbs .xdata : |
begin |
infifo.deq(); |
outfifo.enq(EDOT infifo.first()); |
picWidth <= xdata; |
end |
tagged SPSpic_height_in_map_units .xdata : |
begin |
infifo.deq(); |
outfifo.enq(EDOT infifo.first()); |
picHeight <= xdata; |
end |
tagged PPSdeblocking_filter_control_present_flag .xdata : |
begin |
infifo.deq(); |
if (xdata == 0) |
begin |
disable_deblocking_filter_idc <= 0; |
slice_alpha_c0_offset <= 0; |
slice_beta_offset <= 0; |
end |
end |
tagged SHfirst_mb_in_slice .xdata : |
begin |
infifo.deq(); |
outfifo.enq(EDOT infifo.first()); |
firstMb <= xdata; |
currMb <= xdata; |
currMbHor <= xdata; |
currMbVer <= 0; |
end |
tagged SHdisable_deblocking_filter_idc .xdata : |
begin |
infifo.deq(); |
disable_deblocking_filter_idc <= xdata; |
end |
tagged SHslice_alpha_c0_offset .xdata : |
begin |
infifo.deq(); |
slice_alpha_c0_offset <= xdata; |
end |
tagged SHslice_beta_offset .xdata : |
begin |
infifo.deq(); |
slice_beta_offset <= xdata; |
end |
tagged IBTmb_qp .xdata : |
begin |
infifo.deq(); |
curr_qpy <= xdata.qpy; |
curr_qpc <= xdata.qpc; |
end |
tagged PBbS .xdata : |
begin |
process <= Initialize; |
end |
tagged PBoutput .xdata : |
begin |
$display( "ERROR Deblocking Filter: passing PBoutput"); |
end |
tagged EndOfFile : |
begin |
infifo.deq(); |
outfifo.enq(EDOT infifo.first()); |
$display( "ccl5: EndOfFile reached"); |
//$finish(0); |
end |
default: |
begin |
infifo.deq(); |
outfifo.enq(EDOT infifo.first()); |
end |
endcase |
endrule |
|
|
rule currMbHorUpdate( !(currMbHor<zeroExtend(picWidth)) ); |
Bit#(PicAreaSz) temp = zeroExtend(picWidth); |
if((currMbHor >> 3) >= temp) |
begin |
currMbHor <= currMbHor - (temp << 3); |
currMbVer <= currMbVer + 8; |
end |
else |
begin |
currMbHor <= currMbHor - temp; |
currMbVer <= currMbVer + 1; |
end |
endrule |
|
|
rule initialize ( process==Initialize && currMbHor<zeroExtend(picWidth) ); |
//$display( "TRACE Deblocking Filter: initialize %0d", currMb); |
process <= Horizontal; |
dataReqCount <= 1; |
dataRespCount <= 1; |
filterTopMbEdgeFlag <= !(currMb<zeroExtend(picWidth) || disable_deblocking_filter_idc==1 || (disable_deblocking_filter_idc==2 && currMb-firstMb<zeroExtend(picWidth))); |
filterLeftMbEdgeFlag <= !(currMbHor==0 || disable_deblocking_filter_idc==1 || (disable_deblocking_filter_idc==2 && currMb==firstMb)); |
filterInternalEdgesFlag <= !(disable_deblocking_filter_idc==1); |
blockNum <= 0; |
pixelNum <= 0; |
Bit#(6) curr_qp = (chromaFlag==0 ? curr_qpy : curr_qpc); |
Bit#(6) left_qp = (chromaFlag==0 ? left_qpy : left_qpc); |
Bit#(7) qpavtemp = zeroExtend(curr_qp)+zeroExtend(left_qp)+1; |
Bit#(6) qpav = qpavtemp[6:1]; |
Bit#(8) indexAtemp = zeroExtend(qpav)+signExtend(slice_alpha_c0_offset); |
Bit#(8) indexBtemp = zeroExtend(qpav)+signExtend(slice_beta_offset); |
Bit#(6) indexA = (indexAtemp[7]==1 ? 0 : (indexAtemp[6:0]>51 ? 51 : indexAtemp[5:0])); |
Bit#(6) indexB = (indexBtemp[7]==1 ? 0 : (indexBtemp[6:0]>51 ? 51 : indexBtemp[5:0])); |
alphaMbEdge <= alpha_table[indexA]; |
betaMbEdge <= beta_table[indexB]; |
Vector#(3,Bit#(5)) tc0temp = arrayToVector(tc0_table[indexA]); |
tc0MbEdge <= tc0temp; |
endrule |
|
|
rule dataSendReq ( dataReqCount>0 && currMbHor<zeroExtend(picWidth) ); |
//$display( "TRACE Deblocking Filter: dataSendReq %0d", dataReqCount); |
Bit#(PicWidthSz) temp = truncate(currMbHor); |
if(currMb<zeroExtend(picWidth)) |
dataReqCount <= 0; |
else |
begin |
if(dataReqCount==1) |
parameterMemReqQ.enq(LoadReq temp); |
Bit#(4) temp2 = truncate(dataReqCount-1); |
let temp3 = {temp,chromaFlag,temp2}; |
dataMemReqQ.enq(LoadReq temp3); |
if(dataReqCount==16) |
dataReqCount <= 0; |
else |
dataReqCount <= dataReqCount+1; |
end |
endrule |
|
|
rule dataReceiveNoResp ( dataRespCount>0 && currMb<zeroExtend(picWidth) && currMb-firstMb<zeroExtend(picWidth) ); |
//$display( "TRACE Deblocking Filter: dataReceiveNoResp"); |
dataRespCount <= 0; |
endrule |
|
|
rule dataReceiveResp ( dataRespCount>0 && !(currMb<zeroExtend(picWidth)) && currMbHor<zeroExtend(picWidth) ); |
//$display( "TRACE Deblocking Filter: dataReceiveResp %0d", dataRespCount); |
Bit#(4) temp = truncate(dataRespCount-1); |
Vector#(16,Bit#(32)) topVectorNext = topVector; |
if(dataRespCount==1) |
begin |
Bit#(13) tempParameters=0; |
if(parameterMemRespQ.first() matches tagged LoadResp .xdata) |
tempParameters = xdata; |
top_qpy <= tempParameters[5:0]; |
top_qpc <= tempParameters[11:6]; |
top_intra <= tempParameters[12]; |
parameterMemRespQ.deq(); |
end |
if(dataRespCount==16) |
dataRespCount <= 0; |
else |
dataRespCount <= dataRespCount+1; |
if(dataMemRespQ.first() matches tagged LoadResp .xdata) |
topVectorNext[temp] = xdata; |
dataMemRespQ.deq(); |
topVector <= topVectorNext; |
//$display( "TRACE Deblocking Filter: dataReceiveResp topVector %h %h %h %h %h %h %h %h %h %h %h %h %h %h %h %h", topVector[0], topVector[1], topVector[2], topVector[3], topVector[4], topVector[5], topVector[6], topVector[7], topVector[8], topVector[9], topVector[10], topVector[11], topVector[12], topVector[13], topVector[14], topVector[15]); |
endrule |
|
|
rule horizontal ( process==Horizontal && currMbHor<zeroExtend(picWidth) ); |
//$display( "TRACE Deblocking Filter: horizontal %0d %0d %0d", blockNum, pixelNum, infifo.first()); |
Bit#(2) blockHor = {blockNum[2],blockNum[0]}; |
Bit#(2) blockVer = {blockNum[3],blockNum[1]}; |
Bit#(2) pixelVer = {pixelNum[3],pixelNum[2]}; |
Vector#(96,Bit#(32)) leftVectorNext = leftVector; |
Vector#(64,Bit#(32)) workVectorNext = workVector; |
Bool leftEdge = (blockNum[0]==0 && (blockNum[2]==0 || chromaFlag==1)); |
if(blockNum==0 && pixelNum==0) |
begin |
Bit#(6) qpav = (chromaFlag==0 ? curr_qpy : curr_qpc); |
Bit#(8) indexAtemp = zeroExtend(qpav)+signExtend(slice_alpha_c0_offset); |
Bit#(8) indexBtemp = zeroExtend(qpav)+signExtend(slice_beta_offset); |
Bit#(6) indexA = (indexAtemp[7]==1 ? 0 : (indexAtemp[6:0]>51 ? 51 : indexAtemp[5:0])); |
Bit#(6) indexB = (indexBtemp[7]==1 ? 0 : (indexBtemp[6:0]>51 ? 51 : indexBtemp[5:0])); |
alphaInternal <= alpha_table[indexA]; |
betaInternal <= beta_table[indexB]; |
Vector#(3,Bit#(5)) tc0temp = arrayToVector(tc0_table[indexA]); |
tc0Internal <= tc0temp; |
end |
case (infifo.first()) matches |
tagged PBbS .xdata : |
begin |
infifo.deq(); |
bSfile.upd(blockNum,tuple2(xdata.bShor,xdata.bSver)); |
end |
tagged PBoutput .xdata : |
begin |
infifo.deq(); |
Bit#(6) addrq = {blockHor,blockVer,pixelVer}; |
Bit#(7) addrpLeft = (chromaFlag==0 ? {3'b011,blockVer,pixelVer} : {2'b10,blockHor[1],1'b1,blockVer[0],pixelVer}); |
Bit#(6) addrpCurr = {(blockHor-1),blockVer,pixelVer}; |
Bit#(32) pixelq = {xdata[3],xdata[2],xdata[1],xdata[0]}; |
Bit#(32) pixelp; |
if(leftEdge) |
pixelp = leftVector[addrpLeft]; |
else |
pixelp = workVector[addrpCurr]; |
Bit#(64) result = {pixelq,pixelp}; |
if(leftEdge && filterLeftMbEdgeFlag) |
begin |
if(filter_test({pixelq[15:0],pixelp[31:16]},alphaMbEdge,betaMbEdge)) |
result = filter_input({pixelq,pixelp},chromaFlag==1,tpl_1(bSfile.sub((chromaFlag==0?blockNum:{blockNum[1:0],pixelVer[1],1'b0}))),alphaMbEdge,betaMbEdge,tc0MbEdge); |
end |
else if(!leftEdge && filterInternalEdgesFlag) |
begin |
if(filter_test({pixelq[15:0],pixelp[31:16]},alphaInternal,betaInternal)) |
result = filter_input({pixelq,pixelp},chromaFlag==1,tpl_1(bSfile.sub((chromaFlag==0?blockNum:{blockNum[1:0],pixelVer[1],1'b0}))),alphaInternal,betaInternal,tc0Internal); |
end |
if(leftEdge) |
leftVectorNext[addrpLeft] = result[31:0]; |
else |
workVectorNext[addrpCurr] = result[31:0]; |
workVectorNext[addrq] = result[63:32]; |
leftVector <= leftVectorNext; |
workVector <= workVectorNext; |
if(pixelNum==12 && (blockNum==15 || (blockNum==7 && chromaFlag==1))) |
begin |
blockNum <= 0; |
process <= Vertical; |
startLastOutput <= False; |
outputingFinished <= False; |
colNum <= 0; |
if(filterTopMbEdgeFlag) |
rowNum <= 0; |
else |
rowNum <= 1; |
Bit#(6) curr_qp = (chromaFlag==0 ? curr_qpy : curr_qpc); |
Bit#(6) top_qp = (chromaFlag==0 ? top_qpy : top_qpc); |
Bit#(7) qpavtemp = zeroExtend(curr_qp)+zeroExtend(top_qp)+1; |
Bit#(6) qpav = qpavtemp[6:1]; |
Bit#(8) indexAtemp = zeroExtend(qpav)+signExtend(slice_alpha_c0_offset); |
Bit#(8) indexBtemp = zeroExtend(qpav)+signExtend(slice_beta_offset); |
Bit#(6) indexA = (indexAtemp[7]==1 ? 0 : (indexAtemp[6:0]>51 ? 51 : indexAtemp[5:0])); |
Bit#(6) indexB = (indexBtemp[7]==1 ? 0 : (indexBtemp[6:0]>51 ? 51 : indexBtemp[5:0])); |
alphaMbEdge <= alpha_table[indexA]; |
betaMbEdge <= beta_table[indexB]; |
Vector#(3,Bit#(5)) tc0temp = arrayToVector(tc0_table[indexA]); |
tc0MbEdge <= tc0temp; |
end |
else if(pixelNum==12) |
blockNum <= blockNum+1; |
pixelNum <= pixelNum+4; |
end |
//default: $display( "ERROR Deblocking Filter: horizontal non-PBoutput input"); |
endcase |
endrule |
|
|
rule vertical ( process==Vertical && !startLastOutput && dataRespCount==0 && currMbHor<zeroExtend(picWidth) ); |
//$display( "TRACE Deblocking Filter: vertical %0d %0d", colNum, rowNum); |
//$display( "TRACE Deblocking Filter: vertical topVector %h %h %h %h %h %h %h %h %h %h %h %h %h %h %h %h", topVector[0], topVector[1], topVector[2], topVector[3], topVector[4], topVector[5], topVector[6], topVector[7], topVector[8], topVector[9], topVector[10], topVector[11], topVector[12], topVector[13], topVector[14], topVector[15]); |
Bool topEdge = (rowNum==0); |
Vector#(64,Bit#(32)) workVectorNext = workVector; |
Vector#(16,Bit#(32)) topVectorNext = topVector; |
Vector#(64,Bit#(32)) workV = workVector; |
Vector#(4,Bit#(32)) tempV = replicate(0); |
Vector#(4,Bit#(64)) resultV = replicate(0); |
Bit#(8) alpha; |
Bit#(5) beta; |
Vector#(3,Bit#(5)) tc0; |
Bit#(4) crNum = {colNum,rowNum}; |
if(topEdge) |
begin |
tempV[0] = topVector[{colNum,2'b00}]; |
tempV[1] = topVector[{colNum,2'b01}]; |
tempV[2] = topVector[{colNum,2'b10}]; |
tempV[3] = topVector[{colNum,2'b11}]; |
alpha = alphaMbEdge; |
beta = betaMbEdge; |
tc0 = tc0MbEdge; |
end |
else |
begin |
tempV[0] = workV[{(crNum-1),2'b00}]; |
tempV[1] = workV[{(crNum-1),2'b01}]; |
tempV[2] = workV[{(crNum-1),2'b10}]; |
tempV[3] = workV[{(crNum-1),2'b11}]; |
alpha = alphaInternal; |
beta = betaInternal; |
tc0 = tc0Internal; |
end |
resultV[0] = {workV[{crNum,2'b11}][7:0],workV[{crNum,2'b10}][7:0],workV[{crNum,2'b01}][7:0],workV[{crNum,2'b00}][7:0],tempV[3][7:0],tempV[2][7:0],tempV[1][7:0],tempV[0][7:0]}; |
resultV[1] = {workV[{crNum,2'b11}][15:8],workV[{crNum,2'b10}][15:8],workV[{crNum,2'b01}][15:8],workV[{crNum,2'b00}][15:8],tempV[3][15:8],tempV[2][15:8],tempV[1][15:8],tempV[0][15:8]}; |
resultV[2] = {workV[{crNum,2'b11}][23:16],workV[{crNum,2'b10}][23:16],workV[{crNum,2'b01}][23:16],workV[{crNum,2'b00}][23:16],tempV[3][23:16],tempV[2][23:16],tempV[1][23:16],tempV[0][23:16]}; |
resultV[3] = {workV[{crNum,2'b11}][31:24],workV[{crNum,2'b10}][31:24],workV[{crNum,2'b01}][31:24],workV[{crNum,2'b00}][31:24],tempV[3][31:24],tempV[2][31:24],tempV[1][31:24],tempV[0][31:24]}; |
if(filter_test({workV[{crNum,2'b01}][7:0],workV[{crNum,2'b00}][7:0],tempV[3][7:0],tempV[2][7:0]},alpha,beta)) |
resultV[0] = filter_input(resultV[0],chromaFlag==1,tpl_2(bSfile.sub((chromaFlag==0?{rowNum[1],colNum[1],rowNum[0],colNum[0]}:{rowNum[0],colNum[0],2'b00}))),alpha,beta,tc0); |
if(filter_test({workV[{crNum,2'b01}][15:8],workV[{crNum,2'b00}][15:8],tempV[3][15:8],tempV[2][15:8]},alpha,beta)) |
resultV[1] = filter_input(resultV[1],chromaFlag==1,tpl_2(bSfile.sub((chromaFlag==0?{rowNum[1],colNum[1],rowNum[0],colNum[0]}:{rowNum[0],colNum[0],2'b00}))),alpha,beta,tc0); |
if(filter_test({workV[{crNum,2'b01}][23:16],workV[{crNum,2'b00}][23:16],tempV[3][23:16],tempV[2][23:16]},alpha,beta)) |
resultV[2] = filter_input(resultV[2],chromaFlag==1,tpl_2(bSfile.sub((chromaFlag==0?{rowNum[1],colNum[1],rowNum[0],colNum[0]}:{rowNum[0],colNum[0],2'b01}))),alpha,beta,tc0); |
if(filter_test({workV[{crNum,2'b01}][31:24],workV[{crNum,2'b00}][31:24],tempV[3][31:24],tempV[2][31:24]},alpha,beta)) |
resultV[3] = filter_input(resultV[3],chromaFlag==1,tpl_2(bSfile.sub((chromaFlag==0?{rowNum[1],colNum[1],rowNum[0],colNum[0]}:{rowNum[0],colNum[0],2'b01}))),alpha,beta,tc0); |
if(topEdge) |
begin |
topVectorNext[{colNum,2'b00}] = {resultV[3][7:0],resultV[2][7:0],resultV[1][7:0],resultV[0][7:0]}; |
topVectorNext[{colNum,2'b01}] = {resultV[3][15:8],resultV[2][15:8],resultV[1][15:8],resultV[0][15:8]}; |
topVectorNext[{colNum,2'b10}] = {resultV[3][23:16],resultV[2][23:16],resultV[1][23:16],resultV[0][23:16]}; |
topVectorNext[{colNum,2'b11}] = {resultV[3][31:24],resultV[2][31:24],resultV[1][31:24],resultV[0][31:24]}; |
end |
else |
begin |
workVectorNext[{(crNum-1),2'b00}] = {resultV[3][7:0],resultV[2][7:0],resultV[1][7:0],resultV[0][7:0]}; |
workVectorNext[{(crNum-1),2'b01}] = {resultV[3][15:8],resultV[2][15:8],resultV[1][15:8],resultV[0][15:8]}; |
workVectorNext[{(crNum-1),2'b10}] = {resultV[3][23:16],resultV[2][23:16],resultV[1][23:16],resultV[0][23:16]}; |
workVectorNext[{(crNum-1),2'b11}] = {resultV[3][31:24],resultV[2][31:24],resultV[1][31:24],resultV[0][31:24]}; |
end |
workVectorNext[{crNum,2'b00}] = {resultV[3][39:32],resultV[2][39:32],resultV[1][39:32],resultV[0][39:32]}; |
workVectorNext[{crNum,2'b01}] = {resultV[3][47:40],resultV[2][47:40],resultV[1][47:40],resultV[0][47:40]}; |
workVectorNext[{crNum,2'b10}] = {resultV[3][55:48],resultV[2][55:48],resultV[1][55:48],resultV[0][55:48]}; |
workVectorNext[{crNum,2'b11}] = {resultV[3][63:56],resultV[2][63:56],resultV[1][63:56],resultV[0][63:56]}; |
if(topEdge) |
topVector <= topVectorNext; |
workVector <= workVectorNext; |
if(rowNum==3 || (chromaFlag==1 && rowNum==1)) |
begin |
if(colNum==3) |
startLastOutput <= True; |
else |
begin |
if(filterTopMbEdgeFlag) |
rowNum <= 0; |
else |
rowNum <= 1; |
end |
colNum <= colNum+1; |
end |
else |
rowNum <= rowNum+1; |
endrule |
|
|
rule outputing ( process==Vertical && !outputingFinished && currMbHor<zeroExtend(picWidth) ); |
//$display( "TRACE Deblocking Filter: outputting %0d %0d", blockNum, pixelNum); |
Bit#(2) blockHor = pixelNum[1:0]; |
Bit#(2) blockVer = blockNum[1:0]; |
Bit#(2) pixelVer = pixelNum[3:2]; |
Bit#(PicWidthSz) currMbHorT = truncate(currMbHor); |
Bool stalling = False; |
if(currMb==0) |
begin |
if(startLastOutput) |
outputingFinished <= True; |
end |
else |
begin |
Bit#(7) leftAddr; |
if(chromaFlag==0) |
leftAddr = {1'b0,blockHor,blockVer,pixelVer}; |
else |
leftAddr = {2'b10,blockHor,blockVer[0],pixelVer}; |
Bit#(32) leftData = leftVector[leftAddr]; |
if(!(blockNum==3 || (blockNum==1 && chromaFlag==1))) |
begin |
if(chromaFlag==0) |
outfifo.enq(DFBLuma {ver:{(currMbHorT==0 ? currMbVer-1 : currMbVer),blockVer,pixelVer},hor:{(currMbHorT==0 ? picWidth-1 : currMbHorT-1),blockHor},data:leftData}); |
else |
outfifo.enq(DFBChroma {uv:blockHor[1],ver:{(currMbHorT==0 ? currMbVer-1 : currMbVer),blockVer[0],pixelVer},hor:{(currMbHorT==0 ? picWidth-1 : currMbHorT-1),blockHor[0]},data:leftData}); |
end |
else if(startLastOutput) |
begin |
Bit#(PicWidthSz) temp = ((currMbHor==0) ? (picWidth-1) : truncate(currMbHor-1)); |
dataMemReqQ.enq(StoreReq {addr:{temp,chromaFlag,blockHor,pixelVer},data:leftData}); |
if(currMbVer > 0) |
begin |
//$display( "TRACE Deblocking Filter: outputting last output %0d %0d %h", blockHor, pixelVer, topVector[{blockHor,pixelVer}]); |
Bit#(32) topData = topVector[{blockHor,pixelVer}]; |
if(chromaFlag==0) |
outfifo.enq(DFBLuma {ver:{currMbVer-1,2'b11,pixelVer},hor:{currMbHorT,blockHor},data:topData}); |
else |
outfifo.enq(DFBChroma {uv:blockHor[1],ver:{currMbVer-1,1'b1,pixelVer},hor:{currMbHorT,blockHor[0]},data:topData}); |
end |
end |
else |
stalling = True; |
if(!stalling) |
begin |
if(pixelNum==15) |
begin |
if(blockNum==3 || (chromaFlag==1 && blockNum==1)) |
begin |
if(currMbVer==picHeight-1) |
blockNum <= (chromaFlag==0 ? 3 : 1); |
else |
blockNum <= 0; |
outputingFinished <= True; |
end |
else |
blockNum <= blockNum+1; |
end |
pixelNum <= pixelNum+1; |
end |
end |
endrule |
|
|
rule verticaltocleanup ( process==Vertical && startLastOutput && outputingFinished); |
process <= Cleanup; |
startLastOutput <= False; |
outputingFinished <= False; |
endrule |
|
|
rule cleanup ( process==Cleanup && currMbHor<zeroExtend(picWidth) ); |
//$display( "TRACE Deblocking Filter: cleanup %0d %0d", blockNum, pixelNum); |
Bit#(2) blockHor = pixelNum[1:0]; |
Bit#(2) blockVer = blockNum[1:0]; |
Bit#(2) pixelVer = pixelNum[3:2]; |
Bit#(PicWidthSz) currMbHorT = truncate(currMbHor); |
Vector#(96,Bit#(32)) leftVectorNext = leftVector; |
if(blockNum==0) |
begin |
if(chromaFlag==0) |
begin |
for(Integer ii=0; ii<64; ii=ii+1) |
leftVectorNext[fromInteger(ii)] = workVector[fromInteger(ii)]; |
chromaFlag <= 1; |
process <= Initialize; |
end |
else |
begin |
for(Integer ii=0; ii<32; ii=ii+1) |
begin |
Bit#(5) tempAddr = fromInteger(ii); |
leftVectorNext[{2'b10,tempAddr}] = workVector[{tempAddr[4:3],1'b0,tempAddr[2:0]}]; |
end |
chromaFlag <= 0; |
process <= Passing; |
Bit#(PicWidthSz) temp = truncate(currMbHor); |
parameterMemReqQ.enq(StoreReq {addr:temp,data:{curr_intra,curr_qpc,curr_qpy}}); |
left_intra <= curr_intra; |
left_qpc <= curr_qpc; |
left_qpy <= curr_qpy; |
currMb <= currMb+1; |
currMbHor <= currMbHor+1; |
if(currMbVer==picHeight-1 && currMbHor==zeroExtend(picWidth-1)) |
outfifo.enq(EndOfFrame); |
end |
leftVector <= leftVectorNext; |
end |
else if(blockNum < 8) |
begin |
Bit#(7) leftAddr; |
if(chromaFlag==0) |
leftAddr = {1'b0,blockHor,blockVer,pixelVer}; |
else |
leftAddr = {2'b10,blockHor,blockVer[0],pixelVer}; |
Bit#(32) leftData = leftVector[leftAddr]; |
if(chromaFlag==0) |
outfifo.enq(DFBLuma {ver:{(currMbHorT==0 ? currMbVer-1 : currMbVer),blockVer,pixelVer},hor:{(currMbHorT==0 ? picWidth-1 : currMbHorT-1),blockHor},data:leftData}); |
else |
outfifo.enq(DFBChroma {uv:blockHor[1],ver:{(currMbHorT==0 ? currMbVer-1 : currMbVer),blockVer[0],pixelVer},hor:{(currMbHorT==0 ? picWidth-1 : currMbHorT-1),blockHor[0]},data:leftData}); |
if(pixelNum==15) |
begin |
if(currMbHor==zeroExtend(picWidth-1)) |
blockNum <= 8; |
else |
blockNum <= 0; |
end |
pixelNum <= pixelNum+1; |
end |
else |
begin |
Bit#(6) currAddr = {blockHor,blockVer,pixelVer}; |
Bit#(32) currData = workVector[currAddr]; |
if(chromaFlag==0) |
outfifo.enq(DFBLuma {ver:{currMbVer,blockVer,pixelVer},hor:{currMbHorT,blockHor},data:currData}); |
else |
outfifo.enq(DFBChroma {uv:blockHor[1],ver:{currMbVer,blockVer[0],pixelVer},hor:{currMbHorT,blockHor[0]},data:currData}); |
if(pixelNum==15) |
begin |
if(blockNum[1:0]==3 || (blockNum[1:0]==1 && chromaFlag==1)) |
blockNum <= 0; |
else |
blockNum <= blockNum+1; |
end |
pixelNum <= pixelNum+1; |
end |
endrule |
|
|
|
|
|
|
interface Client mem_client_data; |
interface Get request = fifoToGet(dataMemReqQ); |
interface Put response = fifoToPut(dataMemRespQ); |
endinterface |
|
interface Client mem_client_parameter; |
interface Get request = fifoToGet(parameterMemReqQ); |
interface Put response = fifoToPut(parameterMemRespQ); |
endinterface |
|
interface Put ioin = fifoToPut(infifo); |
interface Get ioout = fifoToGet(outfifo); |
|
endmodule |
|
endpackage |
/trunk/src/IBufferControl.bsv
0,0 → 1,31
//********************************************************************** |
// Interface for Buffer Controller |
//---------------------------------------------------------------------- |
// |
// |
// |
|
package IBufferControl; |
|
import H264Types::*; |
import GetPut::*; |
import ClientServer::*; |
|
interface IBufferControl; |
|
// Interface for inter-module io |
interface Put#(DeblockFilterOT) ioin; |
interface Get#(BufferControlOT) ioout; |
|
// Interface for module to frame buffer |
interface Client#(FrameBufferLoadReq,FrameBufferLoadResp) buffer_client_load1; |
interface Client#(FrameBufferLoadReq,FrameBufferLoadResp) buffer_client_load2; |
interface Get#(FrameBufferStoreReq) buffer_client_store; |
|
// Interface for module to interpolation |
interface Server#(InterpolatorLoadReq,InterpolatorLoadResp) inter_server; |
|
endinterface |
|
endpackage |
|
/trunk/src/IEntropyDec.bsv
0,0 → 1,27
//********************************************************************** |
// Interface for Entropy Decoder |
//---------------------------------------------------------------------- |
// |
// |
// |
|
package IEntropyDec; |
|
import H264Types::*; |
import GetPut::*; |
import ClientServer::*; |
|
interface IEntropyDec; |
|
// Interface for inter-module io |
interface Put#(NalUnwrapOT) ioin; |
interface Get#(EntropyDecOT) ioout; |
interface Get#(EntropyDecOT_InverseTrans) ioout_InverseTrans; |
|
// Interface for module to memory |
interface Client#(MemReq#(TAdd#(PicWidthSz,1),20),MemResp#(20)) mem_client; |
|
endinterface |
|
endpackage |
|
/trunk/src/CAVLC.bsv
0,0 → 1,651
//********************************************************************** |
// CAVLC codes |
//---------------------------------------------------------------------- |
// |
// |
// |
|
package CAVLC; |
|
import H264Types::*; |
|
|
//----------------------------------------------------------- |
// Helper functions |
|
|
(* noinline *) |
function Tuple3#(Bit#(2),Bit#(5),Bufcount) cavlc_coeff_token( Buffer inbuffer, Bit#(6) nC ); |
if(nC[5] == 1) |
begin |
Bit#(8) buffertemp = inbuffer[buffersize-1:buffersize-8]; |
if(buffertemp[7:6] == 2'b01) return tuple3(0,0,2); |
else if(buffertemp[7:2] == 6'b000111) return tuple3(0,1,6); |
else if(buffertemp[7:7] == 1'b1) return tuple3(1,1,1); |
else if(buffertemp[7:2] == 6'b000100) return tuple3(0,2,6); |
else if(buffertemp[7:2] == 6'b000110) return tuple3(1,2,6); |
else if(buffertemp[7:5] == 3'b001) return tuple3(2,2,3); |
else if(buffertemp[7:2] == 6'b000011) return tuple3(0,3,6); |
else if(buffertemp[7:1] == 7'b0000011) return tuple3(1,3,7); |
else if(buffertemp[7:1] == 7'b0000010) return tuple3(2,3,7); |
else if(buffertemp[7:2] == 6'b000101) return tuple3(3,3,6); |
else if(buffertemp[7:2] == 6'b000010) return tuple3(0,4,6); |
else if(buffertemp[7:0] == 8'b00000011) return tuple3(1,4,8); |
else if(buffertemp[7:0] == 8'b00000010) return tuple3(2,4,8); |
else if(buffertemp[7:1] == 7'b0000000) return tuple3(3,4,7); |
else return tuple3(0,0,100); |
end |
else if(nC[4] == 1 || nC[3] == 1) |
begin |
Bit#(6) buffertemp = inbuffer[buffersize-1:buffersize-6]; |
if(buffertemp[5:0] == 6'b000011) return tuple3(0,0,6); |
else if(buffertemp[5:0] == 6'b000000) return tuple3(0,1,6); |
else if(buffertemp[5:0] == 6'b000001) return tuple3(1,1,6); |
else if(buffertemp[5:0] == 6'b000100) return tuple3(0,2,6); |
else if(buffertemp[5:0] == 6'b000101) return tuple3(1,2,6); |
else if(buffertemp[5:0] == 6'b000110) return tuple3(2,2,6); |
else if(buffertemp[5:0] == 6'b001000) return tuple3(0,3,6); |
else if(buffertemp[5:0] == 6'b001001) return tuple3(1,3,6); |
else if(buffertemp[5:0] == 6'b001010) return tuple3(2,3,6); |
else if(buffertemp[5:0] == 6'b001011) return tuple3(3,3,6); |
else if(buffertemp[5:0] == 6'b001100) return tuple3(0,4,6); |
else if(buffertemp[5:0] == 6'b001101) return tuple3(1,4,6); |
else if(buffertemp[5:0] == 6'b001110) return tuple3(2,4,6); |
else if(buffertemp[5:0] == 6'b001111) return tuple3(3,4,6); |
else if(buffertemp[5:0] == 6'b010000) return tuple3(0,5,6); |
else if(buffertemp[5:0] == 6'b010001) return tuple3(1,5,6); |
else if(buffertemp[5:0] == 6'b010010) return tuple3(2,5,6); |
else if(buffertemp[5:0] == 6'b010011) return tuple3(3,5,6); |
else if(buffertemp[5:0] == 6'b010100) return tuple3(0,6,6); |
else if(buffertemp[5:0] == 6'b010101) return tuple3(1,6,6); |
else if(buffertemp[5:0] == 6'b010110) return tuple3(2,6,6); |
else if(buffertemp[5:0] == 6'b010111) return tuple3(3,6,6); |
else if(buffertemp[5:0] == 6'b011000) return tuple3(0,7,6); |
else if(buffertemp[5:0] == 6'b011001) return tuple3(1,7,6); |
else if(buffertemp[5:0] == 6'b011010) return tuple3(2,7,6); |
else if(buffertemp[5:0] == 6'b011011) return tuple3(3,7,6); |
else if(buffertemp[5:0] == 6'b011100) return tuple3(0,8,6); |
else if(buffertemp[5:0] == 6'b011101) return tuple3(1,8,6); |
else if(buffertemp[5:0] == 6'b011110) return tuple3(2,8,6); |
else if(buffertemp[5:0] == 6'b011111) return tuple3(3,8,6); |
else if(buffertemp[5:0] == 6'b100000) return tuple3(0,9,6); |
else if(buffertemp[5:0] == 6'b100001) return tuple3(1,9,6); |
else if(buffertemp[5:0] == 6'b100010) return tuple3(2,9,6); |
else if(buffertemp[5:0] == 6'b100011) return tuple3(3,9,6); |
else if(buffertemp[5:0] == 6'b100100) return tuple3(0,10,6); |
else if(buffertemp[5:0] == 6'b100101) return tuple3(1,10,6); |
else if(buffertemp[5:0] == 6'b100110) return tuple3(2,10,6); |
else if(buffertemp[5:0] == 6'b100111) return tuple3(3,10,6); |
else if(buffertemp[5:0] == 6'b101000) return tuple3(0,11,6); |
else if(buffertemp[5:0] == 6'b101001) return tuple3(1,11,6); |
else if(buffertemp[5:0] == 6'b101010) return tuple3(2,11,6); |
else if(buffertemp[5:0] == 6'b101011) return tuple3(3,11,6); |
else if(buffertemp[5:0] == 6'b101100) return tuple3(0,12,6); |
else if(buffertemp[5:0] == 6'b101101) return tuple3(1,12,6); |
else if(buffertemp[5:0] == 6'b101110) return tuple3(2,12,6); |
else if(buffertemp[5:0] == 6'b101111) return tuple3(3,12,6); |
else if(buffertemp[5:0] == 6'b110000) return tuple3(0,13,6); |
else if(buffertemp[5:0] == 6'b110001) return tuple3(1,13,6); |
else if(buffertemp[5:0] == 6'b110010) return tuple3(2,13,6); |
else if(buffertemp[5:0] == 6'b110011) return tuple3(3,13,6); |
else if(buffertemp[5:0] == 6'b110100) return tuple3(0,14,6); |
else if(buffertemp[5:0] == 6'b110101) return tuple3(1,14,6); |
else if(buffertemp[5:0] == 6'b110110) return tuple3(2,14,6); |
else if(buffertemp[5:0] == 6'b110111) return tuple3(3,14,6); |
else if(buffertemp[5:0] == 6'b111000) return tuple3(0,15,6); |
else if(buffertemp[5:0] == 6'b111001) return tuple3(1,15,6); |
else if(buffertemp[5:0] == 6'b111010) return tuple3(2,15,6); |
else if(buffertemp[5:0] == 6'b111011) return tuple3(3,15,6); |
else if(buffertemp[5:0] == 6'b111100) return tuple3(0,16,6); |
else if(buffertemp[5:0] == 6'b111101) return tuple3(1,16,6); |
else if(buffertemp[5:0] == 6'b111110) return tuple3(2,16,6); |
else if(buffertemp[5:0] == 6'b111111) return tuple3(3,16,6); |
else return tuple3(0,0,100); |
end |
else if(nC[2] == 1) |
begin |
Bit#(10) buffertemp = inbuffer[buffersize-1:buffersize-10]; |
if(buffertemp[9:6] == 4'b1111) return tuple3(0,0,4); |
else if(buffertemp[9:4] == 6'b001111) return tuple3(0,1,6); |
else if(buffertemp[9:6] == 4'b1110) return tuple3(1,1,4); |
else if(buffertemp[9:4] == 6'b001011) return tuple3(0,2,6); |
else if(buffertemp[9:5] == 5'b01111) return tuple3(1,2,5); |
else if(buffertemp[9:6] == 4'b1101) return tuple3(2,2,4); |
else if(buffertemp[9:4] == 6'b001000) return tuple3(0,3,6); |
else if(buffertemp[9:5] == 5'b01100) return tuple3(1,3,5); |
else if(buffertemp[9:5] == 5'b01110) return tuple3(2,3,5); |
else if(buffertemp[9:6] == 4'b1100) return tuple3(3,3,4); |
else if(buffertemp[9:3] == 7'b0001111) return tuple3(0,4,7); |
else if(buffertemp[9:5] == 5'b01010) return tuple3(1,4,5); |
else if(buffertemp[9:5] == 5'b01011) return tuple3(2,4,5); |
else if(buffertemp[9:6] == 4'b1011) return tuple3(3,4,4); |
else if(buffertemp[9:3] == 7'b0001011) return tuple3(0,5,7); |
else if(buffertemp[9:5] == 5'b01000) return tuple3(1,5,5); |
else if(buffertemp[9:5] == 5'b01001) return tuple3(2,5,5); |
else if(buffertemp[9:6] == 4'b1010) return tuple3(3,5,4); |
else if(buffertemp[9:3] == 7'b0001001) return tuple3(0,6,7); |
else if(buffertemp[9:4] == 6'b001110) return tuple3(1,6,6); |
else if(buffertemp[9:4] == 6'b001101) return tuple3(2,6,6); |
else if(buffertemp[9:6] == 4'b1001) return tuple3(3,6,4); |
else if(buffertemp[9:3] == 7'b0001000) return tuple3(0,7,7); |
else if(buffertemp[9:4] == 6'b001010) return tuple3(1,7,6); |
else if(buffertemp[9:4] == 6'b001001) return tuple3(2,7,6); |
else if(buffertemp[9:6] == 4'b1000) return tuple3(3,7,4); |
else if(buffertemp[9:2] == 8'b00001111) return tuple3(0,8,8); |
else if(buffertemp[9:3] == 7'b0001110) return tuple3(1,8,7); |
else if(buffertemp[9:3] == 7'b0001101) return tuple3(2,8,7); |
else if(buffertemp[9:5] == 5'b01101) return tuple3(3,8,5); |
else if(buffertemp[9:2] == 8'b00001011) return tuple3(0,9,8); |
else if(buffertemp[9:2] == 8'b00001110) return tuple3(1,9,8); |
else if(buffertemp[9:3] == 7'b0001010) return tuple3(2,9,7); |
else if(buffertemp[9:4] == 6'b001100) return tuple3(3,9,6); |
else if(buffertemp[9:1] == 9'b000001111) return tuple3(0,10,9); |
else if(buffertemp[9:2] == 8'b00001010) return tuple3(1,10,8); |
else if(buffertemp[9:2] == 8'b00001101) return tuple3(2,10,8); |
else if(buffertemp[9:3] == 7'b0001100) return tuple3(3,10,7); |
else if(buffertemp[9:1] == 9'b000001011) return tuple3(0,11,9); |
else if(buffertemp[9:1] == 9'b000001110) return tuple3(1,11,9); |
else if(buffertemp[9:2] == 8'b00001001) return tuple3(2,11,8); |
else if(buffertemp[9:2] == 8'b00001100) return tuple3(3,11,8); |
else if(buffertemp[9:1] == 9'b000001000) return tuple3(0,12,9); |
else if(buffertemp[9:1] == 9'b000001010) return tuple3(1,12,9); |
else if(buffertemp[9:1] == 9'b000001101) return tuple3(2,12,9); |
else if(buffertemp[9:2] == 8'b00001000) return tuple3(3,12,8); |
else if(buffertemp[9:0] == 10'b0000001101) return tuple3(0,13,10); |
else if(buffertemp[9:1] == 9'b000000111) return tuple3(1,13,9); |
else if(buffertemp[9:1] == 9'b000001001) return tuple3(2,13,9); |
else if(buffertemp[9:1] == 9'b000001100) return tuple3(3,13,9); |
else if(buffertemp[9:0] == 10'b0000001001) return tuple3(0,14,10); |
else if(buffertemp[9:0] == 10'b0000001100) return tuple3(1,14,10); |
else if(buffertemp[9:0] == 10'b0000001011) return tuple3(2,14,10); |
else if(buffertemp[9:0] == 10'b0000001010) return tuple3(3,14,10); |
else if(buffertemp[9:0] == 10'b0000000101) return tuple3(0,15,10); |
else if(buffertemp[9:0] == 10'b0000001000) return tuple3(1,15,10); |
else if(buffertemp[9:0] == 10'b0000000111) return tuple3(2,15,10); |
else if(buffertemp[9:0] == 10'b0000000110) return tuple3(3,15,10); |
else if(buffertemp[9:0] == 10'b0000000001) return tuple3(0,16,10); |
else if(buffertemp[9:0] == 10'b0000000100) return tuple3(1,16,10); |
else if(buffertemp[9:0] == 10'b0000000011) return tuple3(2,16,10); |
else if(buffertemp[9:0] == 10'b0000000010) return tuple3(3,16,10); |
else return tuple3(0,0,100); |
end |
else if(nC[1] == 1) |
begin |
Bit#(14) buffertemp = inbuffer[buffersize-1:buffersize-14]; |
if(buffertemp[13:12] == 2'b11) return tuple3(0,0,2); |
else if(buffertemp[13:8] == 6'b001011) return tuple3(0,1,6); |
else if(buffertemp[13:12] == 2'b10) return tuple3(1,1,2); |
else if(buffertemp[13:8] == 6'b000111) return tuple3(0,2,6); |
else if(buffertemp[13:9] == 5'b00111) return tuple3(1,2,5); |
else if(buffertemp[13:11] == 3'b011) return tuple3(2,2,3); |
else if(buffertemp[13:7] == 7'b0000111) return tuple3(0,3,7); |
else if(buffertemp[13:8] == 6'b001010) return tuple3(1,3,6); |
else if(buffertemp[13:8] == 6'b001001) return tuple3(2,3,6); |
else if(buffertemp[13:10] == 4'b0101) return tuple3(3,3,4); |
else if(buffertemp[13:6] == 8'b00000111) return tuple3(0,4,8); |
else if(buffertemp[13:8] == 6'b000110) return tuple3(1,4,6); |
else if(buffertemp[13:8] == 6'b000101) return tuple3(2,4,6); |
else if(buffertemp[13:10] == 4'b0100) return tuple3(3,4,4); |
else if(buffertemp[13:6] == 8'b00000100) return tuple3(0,5,8); |
else if(buffertemp[13:7] == 7'b0000110) return tuple3(1,5,7); |
else if(buffertemp[13:7] == 7'b0000101) return tuple3(2,5,7); |
else if(buffertemp[13:9] == 5'b00110) return tuple3(3,5,5); |
else if(buffertemp[13:5] == 9'b000000111) return tuple3(0,6,9); |
else if(buffertemp[13:6] == 8'b00000110) return tuple3(1,6,8); |
else if(buffertemp[13:6] == 8'b00000101) return tuple3(2,6,8); |
else if(buffertemp[13:8] == 6'b001000) return tuple3(3,6,6); |
else if(buffertemp[13:3] == 11'b00000001111) return tuple3(0,7,11); |
else if(buffertemp[13:5] == 9'b000000110) return tuple3(1,7,9); |
else if(buffertemp[13:5] == 9'b000000101) return tuple3(2,7,9); |
else if(buffertemp[13:8] == 6'b000100) return tuple3(3,7,6); |
else if(buffertemp[13:3] == 11'b00000001011) return tuple3(0,8,11); |
else if(buffertemp[13:3] == 11'b00000001110) return tuple3(1,8,11); |
else if(buffertemp[13:3] == 11'b00000001101) return tuple3(2,8,11); |
else if(buffertemp[13:7] == 7'b0000100) return tuple3(3,8,7); |
else if(buffertemp[13:2] == 12'b000000001111) return tuple3(0,9,12); |
else if(buffertemp[13:3] == 11'b00000001010) return tuple3(1,9,11); |
else if(buffertemp[13:3] == 11'b00000001001) return tuple3(2,9,11); |
else if(buffertemp[13:5] == 9'b000000100) return tuple3(3,9,9); |
else if(buffertemp[13:2] == 12'b000000001011) return tuple3(0,10,12); |
else if(buffertemp[13:2] == 12'b000000001110) return tuple3(1,10,12); |
else if(buffertemp[13:2] == 12'b000000001101) return tuple3(2,10,12); |
else if(buffertemp[13:3] == 11'b00000001100) return tuple3(3,10,11); |
else if(buffertemp[13:2] == 12'b000000001000) return tuple3(0,11,12); |
else if(buffertemp[13:2] == 12'b000000001010) return tuple3(1,11,12); |
else if(buffertemp[13:2] == 12'b000000001001) return tuple3(2,11,12); |
else if(buffertemp[13:3] == 11'b00000001000) return tuple3(3,11,11); |
else if(buffertemp[13:1] == 13'b0000000001111) return tuple3(0,12,13); |
else if(buffertemp[13:1] == 13'b0000000001110) return tuple3(1,12,13); |
else if(buffertemp[13:1] == 13'b0000000001101) return tuple3(2,12,13); |
else if(buffertemp[13:2] == 12'b000000001100) return tuple3(3,12,12); |
else if(buffertemp[13:1] == 13'b0000000001011) return tuple3(0,13,13); |
else if(buffertemp[13:1] == 13'b0000000001010) return tuple3(1,13,13); |
else if(buffertemp[13:1] == 13'b0000000001001) return tuple3(2,13,13); |
else if(buffertemp[13:1] == 13'b0000000001100) return tuple3(3,13,13); |
else if(buffertemp[13:1] == 13'b0000000000111) return tuple3(0,14,13); |
else if(buffertemp[13:0] == 14'b00000000001011) return tuple3(1,14,14); |
else if(buffertemp[13:1] == 13'b0000000000110) return tuple3(2,14,13); |
else if(buffertemp[13:1] == 13'b0000000001000) return tuple3(3,14,13); |
else if(buffertemp[13:0] == 14'b00000000001001) return tuple3(0,15,14); |
else if(buffertemp[13:0] == 14'b00000000001000) return tuple3(1,15,14); |
else if(buffertemp[13:0] == 14'b00000000001010) return tuple3(2,15,14); |
else if(buffertemp[13:1] == 13'b0000000000001) return tuple3(3,15,13); |
else if(buffertemp[13:0] == 14'b00000000000111) return tuple3(0,16,14); |
else if(buffertemp[13:0] == 14'b00000000000110) return tuple3(1,16,14); |
else if(buffertemp[13:0] == 14'b00000000000101) return tuple3(2,16,14); |
else if(buffertemp[13:0] == 14'b00000000000100) return tuple3(3,16,14); |
else return tuple3(0,0,100); |
end |
else |
begin |
Bit#(16) buffertemp = inbuffer[buffersize-1:buffersize-16]; |
if(buffertemp[15:15] == 1'b1) return tuple3(0,0,1); |
else if(buffertemp[15:10] == 6'b000101) return tuple3(0,1,6); |
else if(buffertemp[15:14] == 2'b01) return tuple3(1,1,2); |
else if(buffertemp[15:8] == 8'b00000111) return tuple3(0,2,8); |
else if(buffertemp[15:10] == 6'b000100) return tuple3(1,2,6); |
else if(buffertemp[15:13] == 3'b001) return tuple3(2,2,3); |
else if(buffertemp[15:7] == 9'b000000111) return tuple3(0,3,9); |
else if(buffertemp[15:8] == 8'b00000110) return tuple3(1,3,8); |
else if(buffertemp[15:9] == 7'b0000101) return tuple3(2,3,7); |
else if(buffertemp[15:11] == 5'b00011) return tuple3(3,3,5); |
else if(buffertemp[15:6] == 10'b0000000111) return tuple3(0,4,10); |
else if(buffertemp[15:7] == 9'b000000110) return tuple3(1,4,9); |
else if(buffertemp[15:8] == 8'b00000101) return tuple3(2,4,8); |
else if(buffertemp[15:10] == 6'b000011) return tuple3(3,4,6); |
else if(buffertemp[15:5] == 11'b00000000111) return tuple3(0,5,11); |
else if(buffertemp[15:6] == 10'b0000000110) return tuple3(1,5,10); |
else if(buffertemp[15:7] == 9'b000000101) return tuple3(2,5,9); |
else if(buffertemp[15:9] == 7'b0000100) return tuple3(3,5,7); |
else if(buffertemp[15:3] == 13'b0000000001111) return tuple3(0,6,13); |
else if(buffertemp[15:5] == 11'b00000000110) return tuple3(1,6,11); |
else if(buffertemp[15:6] == 10'b0000000101) return tuple3(2,6,10); |
else if(buffertemp[15:8] == 8'b00000100) return tuple3(3,6,8); |
else if(buffertemp[15:3] == 13'b0000000001011) return tuple3(0,7,13); |
else if(buffertemp[15:3] == 13'b0000000001110) return tuple3(1,7,13); |
else if(buffertemp[15:5] == 11'b00000000101) return tuple3(2,7,11); |
else if(buffertemp[15:7] == 9'b000000100) return tuple3(3,7,9); |
else if(buffertemp[15:3] == 13'b0000000001000) return tuple3(0,8,13); |
else if(buffertemp[15:3] == 13'b0000000001010) return tuple3(1,8,13); |
else if(buffertemp[15:3] == 13'b0000000001101) return tuple3(2,8,13); |
else if(buffertemp[15:6] == 10'b0000000100) return tuple3(3,8,10); |
else if(buffertemp[15:2] == 14'b00000000001111) return tuple3(0,9,14); |
else if(buffertemp[15:2] == 14'b00000000001110) return tuple3(1,9,14); |
else if(buffertemp[15:3] == 13'b0000000001001) return tuple3(2,9,13); |
else if(buffertemp[15:5] == 11'b00000000100) return tuple3(3,9,11); |
else if(buffertemp[15:2] == 14'b00000000001011) return tuple3(0,10,14); |
else if(buffertemp[15:2] == 14'b00000000001010) return tuple3(1,10,14); |
else if(buffertemp[15:2] == 14'b00000000001101) return tuple3(2,10,14); |
else if(buffertemp[15:3] == 13'b0000000001100) return tuple3(3,10,13); |
else if(buffertemp[15:1] == 15'b000000000001111) return tuple3(0,11,15); |
else if(buffertemp[15:1] == 15'b000000000001110) return tuple3(1,11,15); |
else if(buffertemp[15:2] == 14'b00000000001001) return tuple3(2,11,14); |
else if(buffertemp[15:2] == 14'b00000000001100) return tuple3(3,11,14); |
else if(buffertemp[15:1] == 15'b000000000001011) return tuple3(0,12,15); |
else if(buffertemp[15:1] == 15'b000000000001010) return tuple3(1,12,15); |
else if(buffertemp[15:1] == 15'b000000000001101) return tuple3(2,12,15); |
else if(buffertemp[15:2] == 14'b00000000001000) return tuple3(3,12,14); |
else if(buffertemp[15:0] == 16'b0000000000001111) return tuple3(0,13,16); |
else if(buffertemp[15:1] == 15'b000000000000001) return tuple3(1,13,15); |
else if(buffertemp[15:1] == 15'b000000000001001) return tuple3(2,13,15); |
else if(buffertemp[15:1] == 15'b000000000001100) return tuple3(3,13,15); |
else if(buffertemp[15:0] == 16'b0000000000001011) return tuple3(0,14,16); |
else if(buffertemp[15:0] == 16'b0000000000001110) return tuple3(1,14,16); |
else if(buffertemp[15:0] == 16'b0000000000001101) return tuple3(2,14,16); |
else if(buffertemp[15:1] == 15'b000000000001000) return tuple3(3,14,15); |
else if(buffertemp[15:0] == 16'b0000000000000111) return tuple3(0,15,16); |
else if(buffertemp[15:0] == 16'b0000000000001010) return tuple3(1,15,16); |
else if(buffertemp[15:0] == 16'b0000000000001001) return tuple3(2,15,16); |
else if(buffertemp[15:0] == 16'b0000000000001100) return tuple3(3,15,16); |
else if(buffertemp[15:0] == 16'b0000000000000100) return tuple3(0,16,16); |
else if(buffertemp[15:0] == 16'b0000000000000110) return tuple3(1,16,16); |
else if(buffertemp[15:0] == 16'b0000000000000101) return tuple3(2,16,16); |
else if(buffertemp[15:0] == 16'b0000000000001000) return tuple3(3,16,16); |
else return tuple3(0,0,100); |
end |
endfunction |
|
(* noinline *) |
function Bit#(4) cavlc_level_prefix( Buffer inbuffer ); |
Bit#(4) tempout = 15; |
for(Integer ii=15; ii>0; ii=ii-1) |
begin |
if(inbuffer[buffersize-fromInteger(ii)]==1'b1) |
tempout = fromInteger(ii)-1; |
end |
return tempout; |
endfunction |
|
(* noinline *) |
function Tuple2#(Bit#(4),Bufcount) cavlc_total_zeros( Buffer inbuffer, Bit#(4) inTotalCoeff, Bit#(5) inMaxNumCoeff); |
if(inMaxNumCoeff==4) |
begin |
Bit#(3) buffertemp3 = inbuffer[buffersize-1:buffersize-3]; |
Bit#(2) buffertemp2 = inbuffer[buffersize-1:buffersize-2]; |
case ( inTotalCoeff ) |
1: |
begin |
if(inbuffer[buffersize-1] == 1) |
return tuple2(0,1); |
else if(buffertemp2 == 2'b01) |
return tuple2(1,2); |
else if(buffertemp3 == 3'b001) |
return tuple2(2,3); |
else |
return tuple2(3,3); |
end |
2: |
begin |
if(inbuffer[buffersize-1] == 1) |
return tuple2(0,1); |
else if(buffertemp2 == 2'b01) |
return tuple2(1,2); |
else |
return tuple2(2,2); |
end |
3: |
begin |
if(inbuffer[buffersize-1] == 1) |
return tuple2(0,1); |
else |
return tuple2(1,1); |
end |
default: return tuple2(0,100); |
endcase |
end |
else |
begin |
Bit#(6) buffertemp = inbuffer[buffersize-1:buffersize-6]; |
case ( inTotalCoeff ) |
1: |
begin |
Bit#(10) buffertemp2 = inbuffer[buffersize-1:buffersize-10]; |
if(buffertemp2[9:9] == 1'b1) return tuple2(0,1); |
else if(buffertemp2[9:7] == 3'b011) return tuple2(1,3); |
else if(buffertemp2[9:7] == 3'b010) return tuple2(2,3); |
else if(buffertemp2[9:6] == 4'b0011) return tuple2(3,4); |
else if(buffertemp2[9:6] == 4'b0010) return tuple2(4,4); |
else if(buffertemp2[9:5] == 5'b00011) return tuple2(5,5); |
else if(buffertemp2[9:5] == 5'b00010) return tuple2(6,5); |
else if(buffertemp2[9:4] == 6'b000011) return tuple2(7,6); |
else if(buffertemp2[9:4] == 6'b000010) return tuple2(8,6); |
else if(buffertemp2[9:3] == 7'b0000011) return tuple2(9,7); |
else if(buffertemp2[9:3] == 7'b0000010) return tuple2(10,7); |
else if(buffertemp2[9:2] == 8'b00000011) return tuple2(11,8); |
else if(buffertemp2[9:2] == 8'b00000010) return tuple2(12,8); |
else if(buffertemp2[9:1] == 9'b000000011) return tuple2(13,9); |
else if(buffertemp2[9:1] == 9'b000000010) return tuple2(14,9); |
else return tuple2(15,9); |
end |
2: |
begin |
if(buffertemp[5:3] == 3'b111) return tuple2(0,3); |
else if(buffertemp[5:3] == 3'b110) return tuple2(1,3); |
else if(buffertemp[5:3] == 3'b101) return tuple2(2,3); |
else if(buffertemp[5:3] == 3'b100) return tuple2(3,3); |
else if(buffertemp[5:3] == 3'b011) return tuple2(4,3); |
else if(buffertemp[5:2] == 4'b0101) return tuple2(5,4); |
else if(buffertemp[5:2] == 4'b0100) return tuple2(6,4); |
else if(buffertemp[5:2] == 4'b0011) return tuple2(7,4); |
else if(buffertemp[5:2] == 4'b0010) return tuple2(8,4); |
else if(buffertemp[5:1] == 5'b00011) return tuple2(9,5); |
else if(buffertemp[5:1] == 5'b00010) return tuple2(10,5); |
else if(buffertemp[5:0] == 6'b000011) return tuple2(11,6); |
else if(buffertemp[5:0] == 6'b000010) return tuple2(12,6); |
else if(buffertemp[5:0] == 6'b000001) return tuple2(13,6); |
else return tuple2(14,6); |
end |
3: |
begin |
if(buffertemp[5:2] == 4'b0101) return tuple2(0,4); |
else if(buffertemp[5:3] == 3'b111) return tuple2(1,3); |
else if(buffertemp[5:3] == 3'b110) return tuple2(2,3); |
else if(buffertemp[5:3] == 3'b101) return tuple2(3,3); |
else if(buffertemp[5:2] == 4'b0100) return tuple2(4,4); |
else if(buffertemp[5:2] == 4'b0011) return tuple2(5,4); |
else if(buffertemp[5:3] == 3'b100) return tuple2(6,3); |
else if(buffertemp[5:3] == 3'b011) return tuple2(7,3); |
else if(buffertemp[5:2] == 4'b0010) return tuple2(8,4); |
else if(buffertemp[5:1] == 5'b00011) return tuple2(9,5); |
else if(buffertemp[5:1] == 5'b00010) return tuple2(10,5); |
else if(buffertemp[5:0] == 6'b000001) return tuple2(11,6); |
else if(buffertemp[5:1] == 5'b00001) return tuple2(12,5); |
else return tuple2(13,6); |
end |
4: |
begin |
if(buffertemp[5:1] == 5'b00011) return tuple2(0,5); |
else if(buffertemp[5:3] == 3'b111) return tuple2(1,3); |
else if(buffertemp[5:2] == 4'b0101) return tuple2(2,4); |
else if(buffertemp[5:2] == 4'b0100) return tuple2(3,4); |
else if(buffertemp[5:3] == 3'b110) return tuple2(4,3); |
else if(buffertemp[5:3] == 3'b101) return tuple2(5,3); |
else if(buffertemp[5:3] == 3'b100) return tuple2(6,3); |
else if(buffertemp[5:2] == 4'b0011) return tuple2(7,4); |
else if(buffertemp[5:3] == 3'b011) return tuple2(8,3); |
else if(buffertemp[5:2] == 4'b0010) return tuple2(9,4); |
else if(buffertemp[5:1] == 5'b00010) return tuple2(10,5); |
else if(buffertemp[5:1] == 5'b00001) return tuple2(11,5); |
else return tuple2(12,5); |
end |
5: |
begin |
if(buffertemp[5:2] == 4'b0101) return tuple2(0,4); |
else if(buffertemp[5:2] == 4'b0100) return tuple2(1,4); |
else if(buffertemp[5:2] == 4'b0011) return tuple2(2,4); |
else if(buffertemp[5:3] == 3'b111) return tuple2(3,3); |
else if(buffertemp[5:3] == 3'b110) return tuple2(4,3); |
else if(buffertemp[5:3] == 3'b101) return tuple2(5,3); |
else if(buffertemp[5:3] == 3'b100) return tuple2(6,3); |
else if(buffertemp[5:3] == 3'b011) return tuple2(7,3); |
else if(buffertemp[5:2] == 4'b0010) return tuple2(8,4); |
else if(buffertemp[5:1] == 5'b00001) return tuple2(9,5); |
else if(buffertemp[5:2] == 4'b0001) return tuple2(10,4); |
else return tuple2(11,5); |
end |
6: |
begin |
if(buffertemp[5:0] == 6'b000001) return tuple2(0,6); |
else if(buffertemp[5:1] == 5'b00001) return tuple2(1,5); |
else if(buffertemp[5:3] == 3'b111) return tuple2(2,3); |
else if(buffertemp[5:3] == 3'b110) return tuple2(3,3); |
else if(buffertemp[5:3] == 3'b101) return tuple2(4,3); |
else if(buffertemp[5:3] == 3'b100) return tuple2(5,3); |
else if(buffertemp[5:3] == 3'b011) return tuple2(6,3); |
else if(buffertemp[5:3] == 3'b010) return tuple2(7,3); |
else if(buffertemp[5:2] == 4'b0001) return tuple2(8,4); |
else if(buffertemp[5:3] == 3'b001) return tuple2(9,3); |
else return tuple2(10,6); |
end |
7: |
begin |
if(buffertemp[5:0] == 6'b000001) return tuple2(0,6); |
else if(buffertemp[5:1] == 5'b00001) return tuple2(1,5); |
else if(buffertemp[5:3] == 3'b101) return tuple2(2,3); |
else if(buffertemp[5:3] == 3'b100) return tuple2(3,3); |
else if(buffertemp[5:3] == 3'b011) return tuple2(4,3); |
else if(buffertemp[5:4] == 2'b11) return tuple2(5,2); |
else if(buffertemp[5:3] == 3'b010) return tuple2(6,3); |
else if(buffertemp[5:2] == 4'b0001) return tuple2(7,4); |
else if(buffertemp[5:3] == 3'b001) return tuple2(8,3); |
else return tuple2(9,6); |
end |
8: |
begin |
if(buffertemp[5:0] == 6'b000001) return tuple2(0,6); |
else if(buffertemp[5:2] == 4'b0001) return tuple2(1,4); |
else if(buffertemp[5:1] == 5'b00001) return tuple2(2,5); |
else if(buffertemp[5:3] == 3'b011) return tuple2(3,3); |
else if(buffertemp[5:4] == 2'b11) return tuple2(4,2); |
else if(buffertemp[5:4] == 2'b10) return tuple2(5,2); |
else if(buffertemp[5:3] == 3'b010) return tuple2(6,3); |
else if(buffertemp[5:3] == 3'b001) return tuple2(7,3); |
else return tuple2(8,6); |
end |
9: |
begin |
if(buffertemp[5:0] == 6'b000001) return tuple2(0,6); |
else if(buffertemp[5:0] == 6'b000000) return tuple2(1,6); |
else if(buffertemp[5:2] == 4'b0001) return tuple2(2,4); |
else if(buffertemp[5:4] == 2'b11) return tuple2(3,2); |
else if(buffertemp[5:4] == 2'b10) return tuple2(4,2); |
else if(buffertemp[5:3] == 3'b001) return tuple2(5,3); |
else if(buffertemp[5:4] == 2'b01) return tuple2(6,2); |
else return tuple2(7,5); |
end |
10: |
begin |
if(buffertemp[5:1] == 5'b00001) return tuple2(0,5); |
else if(buffertemp[5:1] == 5'b00000) return tuple2(1,5); |
else if(buffertemp[5:3] == 3'b001) return tuple2(2,3); |
else if(buffertemp[5:4] == 2'b11) return tuple2(3,2); |
else if(buffertemp[5:4] == 2'b10) return tuple2(4,2); |
else if(buffertemp[5:4] == 2'b01) return tuple2(5,2); |
else return tuple2(6,4); |
end |
11: |
begin |
if(buffertemp[5:2] == 4'b0000) return tuple2(0,4); |
else if(buffertemp[5:2] == 4'b0001) return tuple2(1,4); |
else if(buffertemp[5:3] == 3'b001) return tuple2(2,3); |
else if(buffertemp[5:3] == 3'b010) return tuple2(3,3); |
else if(buffertemp[5:5] == 1'b1) return tuple2(4,1); |
else return tuple2(5,3); |
end |
12: |
begin |
if(buffertemp[5:2] == 4'b0000) return tuple2(0,4); |
else if(buffertemp[5:2] == 4'b0001) return tuple2(1,4); |
else if(buffertemp[5:4] == 2'b01) return tuple2(2,2); |
else if(buffertemp[5:5] == 1'b1) return tuple2(3,1); |
else return tuple2(4,3); |
end |
13: |
begin |
if(buffertemp[5:3] == 3'b000) return tuple2(0,3); |
else if(buffertemp[5:3] == 3'b001) return tuple2(1,3); |
else if(buffertemp[5:5] == 1'b1) return tuple2(2,1); |
else return tuple2(3,2); |
end |
14: |
begin |
if(buffertemp[5:4] == 2'b00) return tuple2(0,2); |
else if(buffertemp[5:4] == 2'b01) return tuple2(1,2); |
else return tuple2(2,1); |
end |
15: |
begin |
if(buffertemp[5:5] == 1'b0) return tuple2(0,1); |
else return tuple2(1,1); |
end |
default: return tuple2(0,100); |
endcase |
end |
endfunction |
|
(* noinline *) |
function Tuple2#(Bit#(4),Bufcount) cavlc_run_before( Buffer inbuffer, Bit#(4) inZerosLeft); |
Bit#(3) buffertemp3 = inbuffer[buffersize-1:buffersize-3]; |
Bit#(2) buffertemp2 = inbuffer[buffersize-1:buffersize-2]; |
case ( inZerosLeft ) |
0: return tuple2(0,100); |
1: |
begin |
if(inbuffer[buffersize-1] == 1) |
return tuple2(0,1); |
else |
return tuple2(1,1); |
end |
2: |
begin |
if(inbuffer[buffersize-1] == 1) |
return tuple2(0,1); |
else if(buffertemp2 == 2'b01) |
return tuple2(1,2); |
else |
return tuple2(2,2); |
end |
3: |
begin |
if(buffertemp2 == 2'b11) |
return tuple2(0,2); |
else if(buffertemp2 == 2'b10) |
return tuple2(1,2); |
else if(buffertemp2 == 2'b01) |
return tuple2(2,2); |
else |
return tuple2(3,2); |
end |
4: |
begin |
if(buffertemp2 == 2'b11) |
return tuple2(0,2); |
else if(buffertemp2 == 2'b10) |
return tuple2(1,2); |
else if(buffertemp2 == 2'b01) |
return tuple2(2,2); |
else if(buffertemp3 == 3'b001) |
return tuple2(3,3); |
else |
return tuple2(4,3); |
end |
5: |
begin |
if(buffertemp2 == 2'b11) |
return tuple2(0,2); |
else if(buffertemp2 == 2'b10) |
return tuple2(1,2); |
else if(buffertemp3 == 3'b011) |
return tuple2(2,3); |
else if(buffertemp3 == 3'b010) |
return tuple2(3,3); |
else if(buffertemp3 == 3'b001) |
return tuple2(4,3); |
else |
return tuple2(5,3); |
end |
6: |
begin |
if(buffertemp2 == 2'b11) |
return tuple2(0,2); |
else if(buffertemp3 == 3'b000) |
return tuple2(1,3); |
else if(buffertemp3 == 3'b001) |
return tuple2(2,3); |
else if(buffertemp3 == 3'b011) |
return tuple2(3,3); |
else if(buffertemp3 == 3'b010) |
return tuple2(4,3); |
else if(buffertemp3 == 3'b101) |
return tuple2(5,3); |
else |
return tuple2(6,3); |
end |
default: |
begin |
if(buffertemp3 != 3'b000) |
begin |
Bit#(4) outputtemp = zeroExtend(3'b111 - buffertemp3); |
return tuple2(outputtemp,3); |
end |
else |
begin |
Bit#(4) returnVal1 = 14; |
Bufcount returnVal2 = 11; |
for(Integer ii=10; ii>=4; ii=ii-1) |
begin |
if(inbuffer[buffersize-fromInteger(ii)]==1'b1) |
begin |
returnVal1 = fromInteger(ii)+3; |
returnVal2 = fromInteger(ii); |
end |
end |
return tuple2(returnVal1,returnVal2); |
end |
end |
endcase |
endfunction |
|
|
|
endpackage |
/trunk/src/mkPrediction_intra32.bsv
0,0 → 1,2184
//********************************************************************** |
// Prediction |
//---------------------------------------------------------------------- |
// |
// |
|
package mkPrediction; |
|
import H264Types::*; |
|
import IPrediction::*; |
import IInterpolator::*; |
import mkInterpolator::*; |
import FIFO::*; |
import FIFOF::*; |
import Vector::*; |
|
import Connectable::*; |
import GetPut::*; |
import ClientServer::*; |
|
|
//----------------------------------------------------------- |
// Local Datatypes |
//----------------------------------------------------------- |
|
typedef union tagged |
{ |
void Intra; //Intra non-4x4 |
void Intra4x4; |
void Inter; |
} |
OutState deriving(Eq,Bits); |
|
typedef union tagged |
{ |
void Start; //not working on anything in particular |
void Intra16x16; |
void Intra4x4; |
void IntraPCM; |
} |
IntraState deriving(Eq,Bits); |
|
typedef union tagged |
{ |
void Start; //not working on anything in particular |
void InterP16x16; |
void InterP16x8; |
void InterP8x16; |
void InterP8x8; |
void InterP8x8ref0; |
void InterPskip; |
} |
InterState deriving(Eq,Bits); |
|
typedef union tagged |
{ |
Bit#(1) NotInter;//0 for not available, 1 for intra-coded |
struct {Bit#(4) refIdx; Bit#(14) mvhor; Bit#(12) mvver; Bit#(1) nonZeroTransCoeff;} BlockMv; |
} |
InterBlockMv deriving(Eq,Bits); |
|
typedef union tagged |
{ |
void SkipMB; |
void NonSkipMB; |
void Intra4x4; |
void Intra4x4PlusChroma; |
} |
NextOutput deriving(Eq,Bits); |
|
|
|
//----------------------------------------------------------- |
// Helper functions |
|
function Bit#(8) intra4x4SelectTop( Bit#(72) valVector, Bit#(4) idx ); |
case(idx) |
0: return valVector[15:8]; |
1: return valVector[23:16]; |
2: return valVector[31:24]; |
3: return valVector[39:32]; |
4: return valVector[47:40]; |
5: return valVector[55:48]; |
6: return valVector[63:56]; |
7: return valVector[71:64]; |
default: return valVector[7:0]; |
endcase |
endfunction |
|
function Bit#(8) intra4x4SelectLeft( Bit#(40) valVector, Bit#(3) idx ); |
case(idx) |
0: return valVector[15:8]; |
1: return valVector[23:16]; |
2: return valVector[31:24]; |
3: return valVector[39:32]; |
default: return valVector[7:0]; |
endcase |
endfunction |
|
function Bit#(8) select32to8( Bit#(32) valVector, Bit#(2) idx ); |
case(idx) |
0: return valVector[7:0]; |
1: return valVector[15:8]; |
2: return valVector[23:16]; |
3: return valVector[31:24]; |
endcase |
endfunction |
|
function Bit#(8) select16to8( Bit#(16) valVector, Bit#(1) idx ); |
case(idx) |
0: return valVector[7:0]; |
1: return valVector[15:8]; |
endcase |
endfunction |
|
function Bool absDiffGEFour14( Bit#(14) val1, Bit#(14) val2 ); |
Int#(15) int1 = unpack(signExtend(val1)); |
Int#(15) int2 = unpack(signExtend(val2)); |
if(int1>=int2) |
return (int1 >= (int2+4)); |
else |
return (int2 >= (int1+4)); |
endfunction |
|
function Bool absDiffGEFour12( Bit#(12) val1, Bit#(12) val2 ); |
Int#(13) int1 = unpack(signExtend(val1)); |
Int#(13) int2 = unpack(signExtend(val2)); |
if(int1>=int2) |
return (int1 >= (int2+4)); |
else |
return (int2 >= (int1+4)); |
endfunction |
|
|
//----------------------------------------------------------- |
// Prediction Module |
//----------------------------------------------------------- |
|
|
(* synthesize *) |
module mkPrediction( IPrediction ); |
|
//Common state |
FIFO#(EntropyDecOT) infifo <- mkSizedFIFO(prediction_infifo_size); |
FIFO#(InverseTransOT) infifo_ITB <- mkSizedFIFO(prediction_infifo_ITB_size); |
FIFO#(EntropyDecOT) outfifo <- mkFIFO; |
Reg#(Bool) passFlag <- mkReg(True); |
Reg#(Bit#(4)) blockNum <- mkReg(0); |
Reg#(Bit#(4)) pixelNum <- mkReg(0); |
|
Reg#(Bit#(PicWidthSz)) picWidth <- mkReg(maxPicWidthInMB); |
Reg#(Bit#(PicHeightSz)) picHeight <- mkReg(0); |
Reg#(Bit#(PicAreaSz)) firstMb <- mkReg(0); |
Reg#(Bit#(PicAreaSz)) currMb <- mkReg(0); |
Reg#(Bit#(PicAreaSz)) currMbHor <- mkReg(0);//horizontal position of currMb |
Reg#(Bit#(PicHeightSz)) currMbVer <- mkReg(0);//vertical position of currMb |
|
FIFOF#(OutState) outstatefifo <- mkFIFOF; |
FIFOF#(NextOutput) nextoutputfifo <- mkFIFOF; |
Reg#(Bit#(4)) outBlockNum <- mkReg(0); |
Reg#(Bit#(4)) outPixelNum <- mkReg(0); |
FIFO#(Vector#(4,Bit#(8))) predictedfifo <- mkSizedFIFO(prediction_predictedfifo_size); |
Reg#(Bit#(1)) outChromaFlag <- mkReg(0); |
Reg#(Bool) outFirstQPFlag <- mkReg(False); |
|
DoNotFire donotfire <- mkDoNotFire(); |
|
//Reg#(Vector#(16,Bit#(8))) workVector <- mkRegU(); |
|
//Inter state |
Interpolator interpolator <- mkInterpolator(); |
Reg#(InterState) interstate <- mkReg(Start); |
Reg#(Bit#(PicAreaSz)) interPskipCount <- mkReg(0); |
Reg#(Vector#(5,InterBlockMv)) interTopVal <- mkRegU(); |
Reg#(Vector#(4,InterBlockMv)) interLeftVal <- mkRegU(); |
Reg#(Vector#(4,InterBlockMv)) interTopLeftVal <- mkRegU(); |
FIFO#(MemReq#(TAdd#(PicWidthSz,2),32)) interMemReqQ <- mkFIFO; |
Reg#(MemReq#(TAdd#(PicWidthSz,2),32)) interMemReqQdelay <- mkRegU(); |
FIFO#(MemResp#(32)) interMemRespQ <- mkFIFO; |
Reg#(Bit#(3)) interReqCount <- mkReg(0); |
Reg#(Bit#(3)) interRespCount <- mkReg(0); |
|
Reg#(Bit#(1)) interStepCount <- mkReg(0); |
Reg#(Bit#(2)) interMbPartNum <- mkReg(0); |
Reg#(Bit#(2)) interSubMbPartNum <- mkReg(0); |
Reg#(Bit#(2)) interPassingCount <- mkReg(0); |
Reg#(Vector#(4,Bit#(4))) interRefIdxVector <- mkRegU(); |
Reg#(Vector#(4,Bit#(2))) interSubMbTypeVector <- mkRegU(); |
RFile1#(Bit#(4),Tuple2#(Bit#(14),Bit#(12))) interMvFile <- mkRFile1Full(); |
Reg#(Bit#(15)) interMvDiffTemp <- mkReg(0); |
FIFO#(Tuple2#(Bit#(15),Bit#(13))) interMvDiff <- mkFIFO; |
Reg#(Bit#(5)) interNewestMv <- mkReg(0); |
|
Reg#(Bit#(2)) interIPStepCount <- mkReg(0); |
Reg#(Bit#(2)) interIPMbPartNum <- mkReg(0); |
Reg#(Bit#(2)) interIPSubMbPartNum <- mkReg(0); |
|
Reg#(Bit#(PicWidthSz)) interCurrMbDiff <- mkReg(0); |
|
Reg#(Vector#(4,Bool)) interTopNonZeroTransCoeff <- mkRegU(); |
Reg#(Vector#(4,Bool)) interLeftNonZeroTransCoeff <- mkRegU(); |
FIFO#(Tuple2#(Bit#(2),Bit#(2))) interBSfifo <- mkSizedFIFO(32); |
Reg#(Bool) interBSoutput <- mkReg(True); |
FIFO#(InterBlockMv) interOutBlockMvfifo <- mkSizedFIFO(8); |
|
|
//Intra state |
Reg#(IntraState) intrastate <- mkReg(Start); |
Reg#(Bit#(1)) intraChromaFlag <- mkReg(0); |
FIFO#(MemReq#(TAdd#(PicWidthSz,2),68)) intraMemReqQ <- mkFIFO; |
Reg#(MemReq#(TAdd#(PicWidthSz,2),68)) intraMemReqQdelay <- mkRegU; |
FIFO#(MemResp#(68)) intraMemRespQ <- mkFIFO; |
Reg#(Vector#(4,Bit#(4))) intra4x4typeLeft <- mkRegU();//15=unavailable, 14=inter-MB, 13=intra-non-4x4 |
Reg#(Vector#(4,Bit#(4))) intra4x4typeTop <- mkRegU();//15=unavailable, 14=inter-MB, 13=intra-non-4x4 |
Reg#(Bit#(1)) ppsconstrained_intra_pred_flag <- mkReg(0); |
Reg#(Vector#(4,Bit#(40))) intraLeftVal <- mkRegU(); |
Reg#(Vector#(9,Bit#(8))) intraLeftValChroma0 <- mkRegU(); |
Reg#(Vector#(9,Bit#(8))) intraLeftValChroma1 <- mkRegU(); |
Reg#(Vector#(5,Bit#(32))) intraTopVal <- mkRegU(); |
Reg#(Vector#(4,Bit#(16))) intraTopValChroma0 <- mkRegU(); |
Reg#(Vector#(4,Bit#(16))) intraTopValChroma1 <- mkRegU(); |
Reg#(Bit#(32)) intraLeftValNext <- mkReg(0); |
Reg#(Bit#(2)) intra16x16_pred_mode <- mkReg(0); |
FIFO#(Bit#(4)) rem_intra4x4_pred_mode <- mkSizedFIFO(16); |
FIFO#(Bit#(2)) intra_chroma_pred_mode <- mkFIFO; |
Reg#(Bit#(4)) cur_intra4x4_pred_mode <- mkReg(0); |
Reg#(Bit#(1)) intraChromaTopAvailable <- mkReg(0); |
Reg#(Bit#(1)) intraChromaLeftAvailable <- mkReg(0); |
|
Reg#(Bit#(3)) intraReqCount <- mkReg(0); |
Reg#(Bit#(3)) intraRespCount <- mkReg(0); |
Reg#(Bit#(4)) intraStepCount <- mkReg(0); |
Reg#(Bit#(13)) intraSumA <- mkReg(0); |
Reg#(Bit#(15)) intraSumB <- mkReg(0); |
Reg#(Bit#(15)) intraSumC <- mkReg(0); |
|
|
|
//----------------------------------------------------------- |
// Rules |
|
////////////////////////////////////////////////////////////////////////////// |
// rule stateMonitor ( True ); |
// if(predictedfifo.notEmpty()) |
// $display( "TRACE Prediction: stateMonitor predictedfifo.first() %0d", predictedfifo.first());//////////////////// |
// if(infifo.first() matches tagged ITBresidual .xdata) |
// $display( "TRACE Prediction: stateMonitor infifo.first() %0d", xdata);//////////////////// |
// if(infifo.first() matches tagged ITBresidual .xdata) |
// $display( "TRACE Prediction: stateMonitor outBlockNum outPixelNum outChromaFlag %0d %0d", outBlockNum, outPixelNum, outChromaFlag);//////////////////// |
// endrule |
////////////////////////////////////////////////////////////////////////////// |
|
rule passing ( passFlag && !outstatefifo.notEmpty() && currMbHor<zeroExtend(picWidth) ); |
$display( "Trace Prediction: passing infifo packed %h", pack(infifo.first())); |
case (infifo.first()) matches |
tagged NewUnit . xdata : |
begin |
infifo.deq(); |
outfifo.enq(infifo.first()); |
$display("ccl4newunit"); |
$display("ccl4rbspbyte %h", xdata); |
end |
tagged SPSpic_width_in_mbs .xdata : |
begin |
infifo.deq(); |
outfifo.enq(infifo.first()); |
picWidth <= xdata; |
interpolator.setPicWidth(xdata); |
end |
tagged SPSpic_height_in_map_units .xdata : |
begin |
infifo.deq(); |
outfifo.enq(infifo.first()); |
picHeight <= xdata; |
interpolator.setPicHeight(xdata); |
end |
tagged PPSconstrained_intra_pred_flag .xdata : |
begin |
infifo.deq(); |
////outfifo.enq(infifo.first()); |
ppsconstrained_intra_pred_flag <= xdata; |
end |
tagged SHfirst_mb_in_slice .xdata : |
begin |
infifo.deq(); |
outfifo.enq(infifo.first()); |
firstMb <= xdata; |
currMb <= xdata; |
currMbHor <= xdata; |
currMbVer <= 0; |
intra4x4typeLeft <= replicate(15); |
interTopLeftVal <= replicate(NotInter 0); |
if(xdata==0) |
interLeftVal <= replicate(NotInter 0); |
outFirstQPFlag <= True; |
end |
tagged SDmb_skip_run .xdata : passFlag <= False; |
tagged SDMmbtype .xdata : passFlag <= False; |
tagged EndOfFile : |
begin |
infifo.deq(); |
outfifo.enq(infifo.first()); |
$display( "INFO Prediction: EndOfFile reached" ); |
//$finish(0);//////////////////////////////// |
end |
default: |
begin |
infifo.deq(); |
outfifo.enq(infifo.first()); |
end |
endcase |
endrule |
|
|
rule inputing ( !passFlag ); |
$display( "Trace Prediction: inputing infifo packed %h", pack(infifo.first())); |
case (infifo.first()) matches |
tagged SDmb_skip_run .xdata : |
begin |
if(interstate==Start && intrastate==Start) |
begin |
if(interPskipCount < xdata) |
begin |
if(!outstatefifo.notEmpty() || interCurrMbDiff<picWidth-1) |
begin |
$display( "Trace Prediction: passing SDmb_skip_run %0d", xdata); |
outstatefifo.enq(Inter); |
interstate <= InterPskip; |
interReqCount <= 1; |
interRespCount <= 1; |
intra4x4typeLeft <= replicate(14); |
intra4x4typeTop <= replicate(14); |
interTopLeftVal <= update(interTopLeftVal , 0, (NotInter 0)); |
interTopVal <= replicate(NotInter 0); |
interPskipCount <= interPskipCount+1; |
interNewestMv <= 0; |
interRefIdxVector <= replicate(0); |
interCurrMbDiff <= interCurrMbDiff+1; |
nextoutputfifo.enq(SkipMB); |
end |
else |
donotfire.doNotFire(); |
end |
else |
begin |
$display( "Trace Prediction: passing no SDmb_skip_run"); |
interPskipCount <= 0; |
infifo.deq(); |
end |
end |
else |
donotfire.doNotFire(); |
end |
tagged SDMmbtype .xdata : |
begin |
if(interstate==Start && intrastate==Start)//not necessary (just need to keep inter from feeding predictedfifo or change intra state until intrastate==Start) |
begin |
infifo.deq(); |
$display( "INFO Prediction: SDMmbtype %0d", xdata); |
if(mbPartPredMode(xdata,0)==Intra_16x16) |
begin |
if(!outstatefifo.notEmpty()) |
begin |
outstatefifo.enq(Intra); |
intrastate <= Intra16x16; |
if(xdata matches tagged I_16x16 {intra16x16PredMode:.tempv1, codedBlockPatternChroma:.tempv2, codedBlockPatternLuma:.tempv3}) |
intra16x16_pred_mode <= tempv1; |
else |
$display( "ERROR Prediction: MacroblockLayer 5 sdmmbtype not I_16x16" ); |
intraReqCount <= 1; |
intraRespCount <= 1; |
interTopLeftVal <= replicate(NotInter 1); |
interLeftVal <= replicate(NotInter 1); |
interTopVal <= replicate(NotInter 1); |
end |
else |
donotfire.doNotFire(); |
end |
else if(xdata==I_NxN) |
begin |
if(!outstatefifo.notEmpty()) |
begin |
outstatefifo.enq(Intra4x4); |
intrastate <= Intra4x4; |
intraReqCount <= 1; |
intraRespCount <= 1; |
interTopLeftVal <= replicate(NotInter 1); |
interLeftVal <= replicate(NotInter 1); |
interTopVal <= replicate(NotInter 1); |
end |
else |
donotfire.doNotFire(); |
end |
else if(xdata==I_PCM) |
begin |
$display( "ERROR Prediction: I_PCM not implemented yet"); |
$finish;//////////////////////////////////////////////////////////////////////////////////////// |
intra4x4typeLeft <= replicate(13); |
intra4x4typeTop <= replicate(13); |
interTopLeftVal <= replicate(NotInter 1); |
interLeftVal <= replicate(NotInter 1); |
interTopVal <= replicate(NotInter 1); |
end |
else |
begin |
if(!outstatefifo.notEmpty() || interCurrMbDiff<picWidth-1) |
begin |
outstatefifo.enq(Inter); |
case(xdata) |
P_L0_16x16: interstate <= InterP16x16; |
P_L0_L0_16x8: interstate <= InterP16x8; |
P_L0_L0_8x16: interstate <= InterP8x16; |
P_8x8: interstate <= InterP8x8; |
P_8x8ref0: interstate <= InterP8x8ref0; |
default: $display( "ERROR Prediction: passing SDMmbtype inter prediction unknown mbtype"); |
endcase |
interReqCount <= 1; |
interRespCount <= 1; |
intra4x4typeLeft <= replicate(14);///////////////////////////////////////////////////////////////////////////// |
intra4x4typeTop <= replicate(14); |
interTopLeftVal <= update(interTopLeftVal , 0, (NotInter 0)); |
interTopVal <= replicate(NotInter 0); |
interNewestMv <= 0; |
interRefIdxVector <= replicate(0); |
nextoutputfifo.enq(NonSkipMB); |
end |
else |
donotfire.doNotFire(); |
end |
interCurrMbDiff <= interCurrMbDiff+1; |
end |
else |
donotfire.doNotFire(); |
end |
tagged SDMMrem_intra4x4_pred_mode .xdata : |
begin |
infifo.deq(); |
////outfifo.enq(infifo.first()); |
rem_intra4x4_pred_mode.enq(xdata); |
end |
tagged SDMMintra_chroma_pred_mode .xdata : |
begin |
infifo.deq(); |
////outfifo.enq(infifo.first()); |
intra_chroma_pred_mode.enq(xdata); |
end |
tagged SDMMref_idx_l0 .xdata : |
begin |
infifo.deq(); |
////outfifo.enq(infifo.first()); |
interRefIdxVector <= update(interRefIdxVector,interPassingCount,xdata[3:0]); |
if(interstate==InterP16x16 || interPassingCount==1) |
interPassingCount <= 0; |
else |
interPassingCount <= interPassingCount+1; |
end |
tagged SDMMmvd_l0 .xdata : |
begin |
infifo.deq(); |
////outfifo.enq(infifo.first()); |
if(interPassingCount==1) |
begin |
Bit#(13) interMvDiffTemp2 = truncate(xdata); |
interMvDiff.enq(tuple2(interMvDiffTemp,interMvDiffTemp2)); |
interPassingCount <= 0; |
end |
else |
begin |
interMvDiffTemp <= truncate(xdata); |
interPassingCount <= interPassingCount+1; |
end |
end |
tagged SDMSsub_mb_type .xdata : |
begin |
infifo.deq(); |
////outfifo.enq(infifo.first()); |
interSubMbTypeVector <= update(interSubMbTypeVector,interPassingCount,xdata); |
interPassingCount <= interPassingCount+1; |
end |
tagged SDMSref_idx_l0 .xdata : |
begin |
infifo.deq(); |
////outfifo.enq(infifo.first()); |
interRefIdxVector <= update(interRefIdxVector,interPassingCount,xdata[3:0]); |
interPassingCount <= interPassingCount+1; |
end |
tagged SDMSmvd_l0 .xdata : |
begin |
infifo.deq(); |
////outfifo.enq(infifo.first()); |
if(interPassingCount==1) |
begin |
Bit#(13) interMvDiffTemp2 = truncate(xdata); |
interMvDiff.enq(tuple2(interMvDiffTemp,interMvDiffTemp2)); |
interPassingCount <= 0; |
end |
else |
begin |
interMvDiffTemp <= truncate(xdata); |
interPassingCount <= interPassingCount+1; |
end |
end |
default: passFlag <= True; |
endcase |
endrule |
|
|
rule outputing ( currMbHor<zeroExtend(picWidth) ); |
Bit#(1) outputFlag = 0; |
Vector#(4,Bit#(8)) outputVector = replicate(0); |
Bit#(2) blockHor = {outBlockNum[2],outBlockNum[0]}; |
Bit#(2) blockVer = {outBlockNum[3],outBlockNum[1]}; |
Bit#(2) pixelVer = {outPixelNum[3],outPixelNum[2]}; |
Bit#(4) totalVer = {blockVer,pixelVer}; |
//$display( "Trace Prediction: outputing" ); |
if(outFirstQPFlag) |
begin |
if(infifo_ITB.first() matches tagged IBTmb_qp .xdata) |
begin |
infifo_ITB.deq(); |
outfifo.enq(IBTmb_qp {qpy:xdata.qpy,qpc:xdata.qpc}); |
outFirstQPFlag <= False; |
$display( "Trace Prediction: outputing outFirstQP %h %h %h", outBlockNum, outPixelNum, xdata); |
end |
else |
$display( "ERROR Prediction: outputing unexpected infifo_ITB.first()"); |
end |
else if(nextoutputfifo.first() == SkipMB) |
begin |
if(interBSoutput && outChromaFlag==0 && outPixelNum==0) |
begin |
interBSoutput <= False; |
interBSfifo.deq(); |
Bit#(2) tempHorBS = tpl_1(interBSfifo.first()); |
Bit#(2) tempVerBS = tpl_2(interBSfifo.first()); |
Bit#(3) horBS = (tempHorBS==3 ? 4 : (interLeftNonZeroTransCoeff[blockVer] ? 2 : zeroExtend(tempHorBS))); |
Bit#(3) verBS = (tempVerBS==3 ? 4 : (interTopNonZeroTransCoeff[blockHor]&&blockVer!=0 ? 2 : zeroExtend(tempVerBS))); |
outfifo.enq(PBbS {bShor:horBS,bSver:verBS}); |
interLeftNonZeroTransCoeff <= update(interLeftNonZeroTransCoeff, blockVer, False); |
interTopNonZeroTransCoeff <= update(interTopNonZeroTransCoeff, blockHor, False); |
$display( "Trace Prediction: outputing SkipMB bS %h %h %h %h", outBlockNum, outPixelNum, currMbHor, currMbVer); |
end |
else |
begin |
interBSoutput <= True; |
outputVector = predictedfifo.first(); |
outfifo.enq(PBoutput outputVector); |
outputFlag = 1; |
predictedfifo.deq(); |
$display( "Trace Prediction: outputing SkipMB out %h %h %h", outBlockNum, outPixelNum, outputVector); |
end |
end |
else |
begin |
case ( infifo_ITB.first() ) matches |
tagged IBTmb_qp .xdata : |
begin |
infifo_ITB.deq(); |
outfifo.enq(IBTmb_qp {qpy:xdata.qpy,qpc:xdata.qpc}); |
outFirstQPFlag <= False; |
$display( "Trace Prediction: outputing ITBmb_qp %h %h %h", outBlockNum, outPixelNum, xdata); |
end |
tagged ITBresidual .xdata : |
begin |
if(interBSoutput && outChromaFlag==0 && outPixelNum==0) |
begin |
interBSoutput <= False; |
if(outstatefifo.first() != Inter) |
outfifo.enq(PBbS {bShor:(blockHor==0 ? 4 : 3),bSver:(blockVer==0 ? 4 : 3)}); |
else |
begin |
interBSfifo.deq(); |
Bit#(2) tempHorBS = tpl_1(interBSfifo.first()); |
Bit#(2) tempVerBS = tpl_2(interBSfifo.first()); |
Bit#(3) horBS = (tempHorBS==3 ? 4 : 2); |
Bit#(3) verBS = (tempVerBS==3 ? 4 : 2); |
outfifo.enq(PBbS {bShor:horBS,bSver:verBS}); |
end |
interLeftNonZeroTransCoeff <= update(interLeftNonZeroTransCoeff, blockVer, True); |
interTopNonZeroTransCoeff <= update(interTopNonZeroTransCoeff, blockHor, True); |
$display( "Trace Prediction: outputing ITBresidual bS %h %h %h %h %h", outChromaFlag, outBlockNum, outPixelNum, currMbHor, currMbVer); |
end |
else |
begin |
interBSoutput <= True; |
Bit#(11) tempOutputValue = 0; |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
tempOutputValue = signExtend(xdata[ii]) + zeroExtend((predictedfifo.first())[ii]); |
if(tempOutputValue[10]==1) |
outputVector[ii] = 0; |
else if(tempOutputValue[9:0] > 255) |
outputVector[ii] = 255; |
else |
outputVector[ii] = tempOutputValue[7:0]; |
end |
outfifo.enq(PBoutput outputVector); |
infifo_ITB.deq(); |
predictedfifo.deq(); |
outputFlag = 1; |
$display( "Trace Prediction: outputing ITBresidual out %h %h %h %h %h %h", outChromaFlag, outBlockNum, outPixelNum, predictedfifo.first(), xdata, outputVector); |
end |
end |
tagged ITBcoeffLevelZeros : |
begin |
if(interBSoutput && outChromaFlag==0 && outPixelNum==0) |
begin |
interBSoutput <= False; |
if(outstatefifo.first() != Inter) |
outfifo.enq(PBbS {bShor:(blockHor==0 ? 4 : 3),bSver:(blockVer==0 ? 4 : 3)}); |
else |
begin |
interBSfifo.deq(); |
Bit#(2) tempHorBS = tpl_1(interBSfifo.first()); |
Bit#(2) tempVerBS = tpl_2(interBSfifo.first()); |
Bit#(3) horBS = (tempHorBS==3 ? 4 : (interLeftNonZeroTransCoeff[blockVer] ? 2 : zeroExtend(tempHorBS))); |
Bit#(3) verBS = (tempVerBS==3 ? 4 : (interTopNonZeroTransCoeff[blockHor]&&blockVer!=0 ? 2 : zeroExtend(tempVerBS))); |
outfifo.enq(PBbS {bShor:horBS,bSver:verBS}); |
end |
interLeftNonZeroTransCoeff <= update(interLeftNonZeroTransCoeff, blockVer, False); |
interTopNonZeroTransCoeff <= update(interTopNonZeroTransCoeff, blockHor, False); |
$display( "Trace Prediction: outputing ITBcoeffLevelZeros bS %h %h %h %h %h", outChromaFlag, outBlockNum, outPixelNum, currMbHor, currMbVer); |
end |
else |
begin |
interBSoutput <= True; |
if(outPixelNum == 12) |
infifo_ITB.deq(); |
outputVector = predictedfifo.first(); |
outfifo.enq(PBoutput outputVector); |
outputFlag = 1; |
predictedfifo.deq(); |
$display( "Trace Prediction: outputing ITBcoeffLevelZeros out %h %h %h %h %h", outChromaFlag, outBlockNum, outPixelNum, predictedfifo.first(), outputVector); |
end |
end |
default: $display( "ERROR Prediction: outputing unknown infifo_ITB input" ); |
endcase |
end |
|
if(outputFlag == 1) |
begin |
$display("ccl4PBoutput %0d", outputVector[0]); |
$display("ccl4PBoutput %0d", outputVector[1]); |
$display("ccl4PBoutput %0d", outputVector[2]); |
$display("ccl4PBoutput %0d", outputVector[3]); |
|
if(outBlockNum==0 && pixelVer==0 && outChromaFlag==0 && currMb!=firstMb && picWidth>1) |
begin |
intraMemReqQ.enq(intraMemReqQdelay); |
interMemReqQ.enq(interMemReqQdelay); |
//$display( "TRACE Prediction: passing storing addr data");////////////////// |
end |
|
if(blockHor==3 || (blockHor[0]==1 && outChromaFlag==1) || (outstatefifo.first()==Intra4x4 && outChromaFlag==0)) |
begin |
if(outChromaFlag==0) |
begin |
Bit#(32) intraLeftValNextTemp = intraLeftValNext; |
if(totalVer==0 || (outstatefifo.first()==Intra4x4 && pixelVer==0)) |
begin |
Bit#(32) tempValSet = select(intraTopVal,zeroExtend(blockHor)); |
intraLeftValNextTemp = zeroExtend(tempValSet[31:24]); |
end |
case(pixelVer) |
0:intraLeftValNext <= {intraLeftValNextTemp[31:16],outputVector[3],intraLeftValNextTemp[7:0]}; |
1:intraLeftValNext <= {intraLeftValNextTemp[31:24],outputVector[3],intraLeftValNextTemp[15:0]}; |
2:intraLeftValNext <= {outputVector[3],intraLeftValNextTemp[23:0]}; |
3: |
begin |
intraLeftVal <= update(intraLeftVal,blockVer,{outputVector[3],intraLeftValNextTemp}); |
intraLeftValNext <= zeroExtend(outputVector[3]); |
if(outstatefifo.first()==Intra4x4) |
intra4x4typeLeft <= update(intra4x4typeLeft,blockVer,cur_intra4x4_pred_mode); |
else if(outstatefifo.first()==Intra) |
intra4x4typeLeft <= update(intra4x4typeLeft,blockVer,13); |
else |
intra4x4typeLeft <= update(intra4x4typeLeft,blockVer,14); |
end |
endcase |
end |
else |
begin |
if(outBlockNum[2]==0) |
intraLeftValChroma0 <= update(intraLeftValChroma0,totalVer+1,outputVector[3]); |
else |
intraLeftValChroma1 <= update(intraLeftValChroma1,totalVer+1,outputVector[3]); |
end |
end |
|
if(pixelVer==3 && (blockVer==3 || (blockVer[0]==1 && outChromaFlag==1) || (outstatefifo.first()==Intra4x4 && outChromaFlag==0))) |
begin |
if(outChromaFlag==0) |
begin |
intraTopVal <= update(intraTopVal,zeroExtend(blockHor),{outputVector[3],outputVector[2],outputVector[1],outputVector[0]}); |
if(outstatefifo.first()==Intra4x4) |
intra4x4typeTop <= update(intra4x4typeTop,blockHor,cur_intra4x4_pred_mode); |
else if(outstatefifo.first()==Intra) |
intra4x4typeTop <= update(intra4x4typeTop,blockHor,13); |
else |
intra4x4typeTop <= update(intra4x4typeTop,blockHor,14); |
end |
else |
begin |
if(outBlockNum[2]==0) |
begin |
Vector#(4,Bit#(16)) intraTopValChroma0Next = intraTopValChroma0; |
intraTopValChroma0Next[{blockHor[0],1'b0}] = {outputVector[1],outputVector[0]}; |
intraTopValChroma0Next[{blockHor[0],1'b1}] = {outputVector[3],outputVector[2]}; |
intraTopValChroma0 <= intraTopValChroma0Next; |
end |
else |
begin |
Vector#(4,Bit#(16)) intraTopValChroma1Next = intraTopValChroma1; |
intraTopValChroma1Next[{blockHor[0],1'b0}] = {outputVector[1],outputVector[0]}; |
intraTopValChroma1Next[{blockHor[0],1'b1}] = {outputVector[3],outputVector[2]}; |
intraTopValChroma1 <= intraTopValChroma1Next; |
end |
end |
end |
|
if(outChromaFlag==1 && outBlockNum==7) |
begin |
Bit#(PicWidthSz) tempStoreAddr = truncate(currMbHor); |
InterBlockMv outBlockMv = interOutBlockMvfifo.first(); |
if(outBlockMv matches tagged BlockMv .bdata) |
begin |
outBlockMv = (BlockMv {refIdx:bdata.refIdx,mvhor:bdata.mvhor,mvver:bdata.mvver,nonZeroTransCoeff:(interTopNonZeroTransCoeff[pixelVer]?1:0)}); |
interOutBlockMvfifo.deq(); |
end |
else if(pixelVer==3) |
interOutBlockMvfifo.deq(); |
if(pixelVer==3 && picWidth>1) |
interMemReqQdelay <= StoreReq {addr:{tempStoreAddr,pixelVer},data:pack(outBlockMv)}; |
else |
interMemReqQ.enq(StoreReq {addr:{tempStoreAddr,pixelVer},data:pack(outBlockMv)}); |
if(pixelVer>0) |
begin |
Bit#(4) intra4x4typeTopStore = ((outstatefifo.first()==Inter) ? 14 : ((outstatefifo.first()!=Intra4x4) ? 13: intra4x4typeTop[(pixelVer-1)])); |
Bit#(32) intraTopValStore = intraTopVal[(pixelVer-1)]; |
Bit#(16) intraTopValChroma0Store = intraTopValChroma0[(pixelVer-1)]; |
Bit#(16) intraTopValChroma1Store = (pixelVer<3 ? intraTopValChroma1[(pixelVer-1)] : {outputVector[1],outputVector[0]}); |
Bit#(68) intraStore = {intra4x4typeTopStore,intraTopValChroma1Store,intraTopValChroma0Store,intraTopValStore}; |
intraMemReqQ.enq(StoreReq {addr:{tempStoreAddr,(pixelVer-1)},data:intraStore}); |
if(pixelVer==3) |
begin |
intra4x4typeTopStore = ((outstatefifo.first()==Inter) ? 14 : ((outstatefifo.first()!=Intra4x4) ? 13: intra4x4typeTop[3])); |
intraTopValStore = intraTopVal[3]; |
intraTopValChroma0Store = intraTopValChroma0[3]; |
intraTopValChroma1Store = {outputVector[3],outputVector[2]}; |
intraStore = {intra4x4typeTopStore,intraTopValChroma1Store,intraTopValChroma0Store,intraTopValStore}; |
intraMemReqQdelay <= StoreReq {addr:{tempStoreAddr,2'b11},data:intraStore}; |
end |
end |
end |
outPixelNum <= outPixelNum+4; |
if(outPixelNum == 12) |
begin |
if(outChromaFlag==0) |
begin |
outBlockNum <= outBlockNum+1; |
if(outBlockNum == 15) |
outChromaFlag <= 1; |
if(nextoutputfifo.first() == Intra4x4) |
nextoutputfifo.deq(); |
end |
else |
begin |
if(outBlockNum == 7) |
begin |
outBlockNum <= 0; |
outChromaFlag <= 0; |
currMb <= currMb+1; |
currMbHor <= currMbHor+1; |
interCurrMbDiff <= interCurrMbDiff-1; |
outstatefifo.deq; |
intrastate <= Start; |
if(truncate(currMbHor)==picWidth-1 && currMbVer==picHeight-1) |
interpolator.endOfFrame(); |
nextoutputfifo.deq(); |
end |
else |
outBlockNum <= outBlockNum+1; |
end |
end |
end |
endrule |
|
|
rule currMbHorUpdate( !(currMbHor<zeroExtend(picWidth)) ); |
Bit#(PicAreaSz) temp = zeroExtend(picWidth); |
if((currMbHor >> 3) >= temp) |
begin |
currMbHor <= currMbHor - (temp << 3); |
currMbVer <= currMbVer + 8; |
end |
else |
begin |
currMbHor <= currMbHor - temp; |
currMbVer <= currMbVer + 1; |
end |
//$display( "Trace Prediction: currMbHorUpdate %h %h", currMbHor, currMbVer); |
endrule |
|
|
// inter prediction rules |
|
rule interSendReq ( interReqCount>0 && currMbHor<zeroExtend(picWidth) ); |
Bit#(PicAreaSz) currMbHorTemp = currMbHor+zeroExtend(interCurrMbDiff)-1; |
Bit#(PicAreaSz) currMbTemp = currMb+zeroExtend(interCurrMbDiff)-1; |
if( currMbHorTemp >= zeroExtend(picWidth) ) |
currMbHorTemp = currMbHorTemp-zeroExtend(picWidth); |
Bit#(PicWidthSz) temp2 = truncate(currMbHorTemp); |
Bit#(TAdd#(PicWidthSz,2)) temp = 0; |
Bool noMoreReq = False; |
if( currMbTemp < zeroExtend(picWidth) ) |
noMoreReq = True; |
else |
begin |
if(interReqCount<5) |
begin |
Bit#(2) temp3 = truncate(interReqCount-1); |
temp = {temp2,temp3}; |
end |
else if(interReqCount==5) |
begin |
if((currMbHorTemp+1)<zeroExtend(picWidth)) |
temp = {(temp2+1),2'b00}; |
else if(currMbHorTemp>0 && currMbTemp-firstMb>zeroExtend(picWidth)) |
temp = {(temp2-1),2'b11}; |
else |
noMoreReq = True; |
end |
else if(interReqCount==6) |
begin |
if((currMbHorTemp+1)<zeroExtend(picWidth) && currMbHorTemp>0 && currMbTemp-firstMb>zeroExtend(picWidth)) |
temp = {(temp2-1),2'b11}; |
else |
noMoreReq = True; |
end |
else |
noMoreReq = True; |
end |
if(!noMoreReq) |
begin |
interMemReqQ.enq(LoadReq temp); |
interReqCount <= interReqCount+1; |
//$display( "TRACE Prediction: interSendReq addr %0d",temp);/////////////////////// |
end |
else |
interReqCount <= 0; |
$display( "Trace Prediction: interSendReq %h %h %h", interstate, interReqCount, temp); |
endrule |
|
|
rule interReceiveNoResp ( interRespCount>0 && currMbHor<zeroExtend(picWidth) && currMb+zeroExtend(interCurrMbDiff)-1<zeroExtend(picWidth) ); |
Bit#(PicAreaSz) currMbHorTemp = currMbHor+zeroExtend(interCurrMbDiff)-1; |
if( currMbHorTemp >= zeroExtend(picWidth) ) |
currMbHorTemp = currMbHorTemp-zeroExtend(picWidth); |
interRespCount <= 0; |
interStepCount <= 1; |
interIPStepCount <= 1; |
if(currMbHorTemp == 0) |
begin |
interLeftVal <= replicate(NotInter 0); |
interTopLeftVal <= replicate(NotInter 0); |
end |
$display( "Trace Prediction: interReceiveNoResp %h %h", interstate, interRespCount); |
endrule |
|
|
rule interReceiveResp ( interRespCount>0 && interRespCount<7 && currMbHor<zeroExtend(picWidth) &&& interMemRespQ.first() matches tagged LoadResp .data); |
Bit#(PicAreaSz) currMbHorTemp = currMbHor+zeroExtend(interCurrMbDiff)-1; |
Bit#(PicAreaSz) currMbTemp = currMb+zeroExtend(interCurrMbDiff)-1; |
if( currMbHorTemp >= zeroExtend(picWidth) ) |
currMbHorTemp = currMbHorTemp-zeroExtend(picWidth); |
Bool noMoreResp = False; |
Bit#(2) temp2bit = 0; |
InterBlockMv unpackedData = unpack(data); |
Vector#(5,InterBlockMv) interTopValNext = interTopVal; |
Vector#(4,InterBlockMv) interTopLeftValNext = interTopLeftVal; |
if(interRespCount<5) |
begin |
temp2bit = truncate(interRespCount-1); |
interTopValNext[temp2bit] = unpackedData; |
if((interRespCount==4 || (interRespCount==1 && (interstate==InterPskip || interstate==InterP16x16 || interstate==InterP16x8))) |
&& (!((currMbHorTemp+1)<zeroExtend(picWidth)) && !(currMbHorTemp>0 && currMbTemp-firstMb>zeroExtend(picWidth)))) |
noMoreResp = True; |
end |
else if(interRespCount==5) |
begin |
if((currMbHorTemp+1)<zeroExtend(picWidth)) |
begin |
interTopValNext[4] = unpackedData; |
if(!(currMbHorTemp>0 && currMbTemp-firstMb>zeroExtend(picWidth))) |
noMoreResp = True; |
end |
else |
begin |
interTopLeftValNext[0] = unpackedData; |
noMoreResp = True; |
end |
end |
else |
begin |
interTopLeftValNext[0] = unpackedData; |
noMoreResp = True; |
end |
interMemRespQ.deq(); |
//$display( "TRACE Prediction: interReceiveResp data %h",data);/////////////////////// |
if(!noMoreResp) |
interRespCount <= interRespCount+1; |
else |
begin |
interRespCount <= 0; |
interStepCount <= 1; |
interIPStepCount <= 1; |
if(currMbHorTemp == 0) |
begin |
interLeftVal <= replicate(NotInter 0); |
interTopLeftValNext = replicate(NotInter 0); |
end |
end |
interTopVal <= interTopValNext; |
interTopLeftVal <= interTopLeftValNext; |
$display( "Trace Prediction: interReceiveResp %h %h %h", interstate, interRespCount, data); |
endrule |
|
|
rule interProcessStep ( interStepCount>0 && currMbHor<zeroExtend(picWidth) ); |
Bit#(PicAreaSz) currMbTemp = currMb+zeroExtend(interCurrMbDiff)-1; |
Bit#(2) blockHor = {interMbPartNum[0],interSubMbPartNum[0]}; |
Bit#(2) blockVer = {interMbPartNum[1],interSubMbPartNum[1]}; |
Bit#(3) partWidth = 0; |
Bit#(3) partHeight = 0; |
Bit#(3) numPart = 1; |
Bit#(3) numSubPart = 1; |
Bit#(2) subMbType = 0; |
Bool noBlockC = False; |
Bool calcmv = False; |
Bool leftmv = False; |
if(interstate==InterPskip || interstate==InterP16x16) |
begin |
partWidth = 4; |
partHeight = 4; |
numPart = 1; |
calcmv = (interMbPartNum==0 && interSubMbPartNum==0); |
leftmv = (blockHor>0); |
end |
else if(interstate==InterP16x8) |
begin |
partWidth = 4; |
partHeight = 2; |
numPart = 2; |
if(interMbPartNum==2) |
noBlockC = True; |
calcmv = (interMbPartNum[0]==0 && interSubMbPartNum==0); |
leftmv = (blockHor>0); |
end |
else if(interstate==InterP8x16) |
begin |
partWidth = 2; |
partHeight = 4; |
numPart = 2; |
calcmv = (interMbPartNum[1]==0 && interSubMbPartNum==0); |
leftmv = !(blockVer>0); |
end |
else if(interstate==InterP8x8 || interstate==InterP8x8ref0) |
begin |
numPart = 4; |
subMbType = interSubMbTypeVector[interMbPartNum]; |
numSubPart = numSubMbPart(subMbType); |
case(subMbType) |
0: |
begin |
partWidth = 2; |
partHeight = 2; |
if(interMbPartNum==3) |
noBlockC = True; |
calcmv = (interSubMbPartNum==0); |
leftmv = (blockHor[0]>0); |
end |
1: |
begin |
partWidth = 2; |
partHeight = 1; |
if(interSubMbPartNum==2) |
noBlockC = True; |
calcmv = (interSubMbPartNum[0]==0); |
leftmv = True; |
end |
2: |
begin |
partWidth = 1; |
partHeight = 2; |
calcmv = (interSubMbPartNum[1]==0); |
leftmv = False; |
end |
3: |
begin |
partWidth = 1; |
partHeight = 1; |
if(interSubMbPartNum==3) |
noBlockC = True; |
calcmv = True; |
end |
endcase |
end |
else |
$display( "ERROR Prediction: interProcessStep unexpected interstate"); |
Bit#(4) refIndex = ((interstate==InterPskip||interstate==InterP8x8ref0) ? 0 : interRefIdxVector[interMbPartNum]); |
Vector#(3,InterBlockMv) blockABC = replicate(NotInter 0); |
if( currMbTemp-firstMb==0 && blockHor==0 ) |
blockABC[0] = (NotInter 0); |
else |
blockABC[0] = interLeftVal[blockVer]; |
if( currMbTemp-firstMb<zeroExtend(picWidth) && blockVer==0 ) |
blockABC[1] = (NotInter 0); |
else |
blockABC[1] = interTopVal[blockHor]; |
blockABC[2] = interTopVal[{1'b0,blockHor}+partWidth]; |
if(noBlockC || blockABC[2]==(NotInter 0)) |
blockABC[2] = interTopLeftVal[blockVer]; |
Bit#(14) mvhorfinal = 0; |
Bit#(12) mvverfinal = 0; |
Bit#(5) interNewestMvNext = 0; |
if(calcmv)//motion vector caculation |
begin |
Vector#(3,Int#(14)) mvhorABC = replicate(0); |
Vector#(3,Int#(12)) mvverABC = replicate(0); |
Bit#(2) validCount = 0; |
Bit#(14) mvhorPred = 0; |
Bit#(12) mvverPred = 0; |
for(Integer ii=0; ii<3; ii=ii+1) |
begin |
if(blockABC[ii] matches tagged BlockMv .xdata) |
begin |
mvhorABC[ii] = unpack(xdata.mvhor); |
mvverABC[ii] = unpack(xdata.mvver); |
if(xdata.refIdx == refIndex) |
begin |
validCount = validCount+1; |
mvhorPred = xdata.mvhor; |
mvverPred = xdata.mvver; |
end |
end |
else |
begin |
mvhorABC[ii] = 0; |
mvverABC[ii] = 0; |
end |
end |
if(validCount != 1)//median |
begin |
if(mvhorABC[0]>mvhorABC[1] && mvhorABC[0]>mvhorABC[2]) |
mvhorPred = pack((mvhorABC[1]>mvhorABC[2]) ? mvhorABC[1] : mvhorABC[2]); |
else if(mvhorABC[0]<mvhorABC[1] && mvhorABC[0]<mvhorABC[2]) |
mvhorPred = pack((mvhorABC[1]<mvhorABC[2]) ? mvhorABC[1] : mvhorABC[2]); |
else |
mvhorPred = pack(mvhorABC[0]); |
if(mvverABC[0]>mvverABC[1] && mvverABC[0]>mvverABC[2]) |
mvverPred = pack((mvverABC[1]>mvverABC[2]) ? mvverABC[1] : mvverABC[2]); |
else if(mvverABC[0]<mvverABC[1] && mvverABC[0]<mvverABC[2]) |
mvverPred = pack((mvverABC[1]<mvverABC[2]) ? mvverABC[1] : mvverABC[2]); |
else |
mvverPred = pack(mvverABC[0]); |
end |
if(interstate==InterPskip) |
begin |
for(Integer ii=0; ii<2; ii=ii+1) |
begin |
if(blockABC[ii] matches tagged BlockMv .xdata) |
begin |
if(xdata.refIdx==0 && xdata.mvhor==0 && xdata.mvver==0) |
begin |
mvhorPred = 0; |
mvverPred = 0; |
end |
end |
else if(blockABC[ii] matches tagged NotInter 0) |
begin |
mvhorPred = 0; |
mvverPred = 0; |
end |
end |
end |
else if(interstate==InterP16x8 || interstate==InterP8x16) |
begin |
InterBlockMv blockCheck; |
if(interstate==InterP16x8) |
begin |
if(interMbPartNum==0) |
blockCheck = blockABC[1]; |
else |
blockCheck = blockABC[0]; |
end |
else |
begin |
if(interMbPartNum==0) |
blockCheck = blockABC[0]; |
else |
blockCheck = blockABC[2]; |
end |
if(blockCheck matches tagged BlockMv .xdata &&& xdata.refIdx==refIndex) |
begin |
mvhorPred = xdata.mvhor; |
mvverPred = xdata.mvver; |
end |
end |
mvhorfinal = mvhorPred; |
mvverfinal = mvverPred; |
if(interstate!=InterPskip) |
begin |
mvhorfinal = truncate(tpl_1(interMvDiff.first()) + signExtend(mvhorPred)); |
mvverfinal = truncate(tpl_2(interMvDiff.first()) + signExtend(mvverPred)); |
interMvDiff.deq(); |
end |
interMvFile.upd({interMbPartNum,interSubMbPartNum},tuple2(mvhorfinal,mvverfinal)); |
interNewestMvNext = zeroExtend({interMbPartNum,interSubMbPartNum})+1; |
$display( "Trace Prediction: interProcessStep %h %h %h %h %h %h %h %h %h", interstate, interStepCount, interMbPartNum, interSubMbPartNum, pack(blockABC[0]), pack(blockABC[1]), pack(blockABC[2]), mvhorPred, mvverPred); |
end |
else |
begin |
if(leftmv) |
begin |
if(blockABC[0] matches tagged BlockMv .xdata) |
begin |
mvhorfinal = unpack(xdata.mvhor); |
mvverfinal = unpack(xdata.mvver); |
end |
else |
$display( "ERROR Prediction: interProcessStep unexpected blockABC[0]"); |
end |
else |
begin |
if(blockABC[1] matches tagged BlockMv .xdata) |
begin |
mvhorfinal = unpack(xdata.mvhor); |
mvverfinal = unpack(xdata.mvver); |
end |
else |
$display( "ERROR Prediction: interProcessStep unexpected blockABC[1]"); |
end |
end |
Bit#(2) tempBShor = 0;//bS calculation |
Bit#(2) tempBSver = 0; |
if(interLeftVal[blockVer] matches tagged BlockMv .xdata) |
begin |
if(xdata.nonZeroTransCoeff == 1) |
tempBShor = 2; |
else |
begin |
if(xdata.refIdx!=refIndex || absDiffGEFour14(mvhorfinal,xdata.mvhor) || absDiffGEFour12(mvverfinal,xdata.mvver)) |
tempBShor = 1; |
else |
tempBShor = 0; |
end |
end |
else |
tempBShor = 3; |
if(interTopVal[blockHor] matches tagged BlockMv .xdata) |
begin |
if(xdata.nonZeroTransCoeff == 1) |
tempBSver = 2; |
else |
begin |
if(xdata.refIdx!=refIndex || absDiffGEFour14(mvhorfinal,xdata.mvhor) || absDiffGEFour12(mvverfinal,xdata.mvver)) |
tempBSver = 1; |
else |
tempBSver = 0; |
end |
end |
else |
tempBSver = 3; |
interBSfifo.enq(tuple2(tempBShor,tempBSver)); |
Vector#(5,InterBlockMv) interTopValNext = interTopVal;//update inter*Val |
Vector#(4,InterBlockMv) interLeftValNext = interLeftVal; |
Vector#(4,InterBlockMv) interTopLeftValNext = interTopLeftVal; |
interLeftValNext[blockVer] = (BlockMv {refIdx:refIndex,mvhor:mvhorfinal,mvver:mvverfinal,nonZeroTransCoeff:0}); |
interTopValNext[blockHor] = (BlockMv {refIdx:refIndex,mvhor:mvhorfinal,mvver:mvverfinal,nonZeroTransCoeff:0}); |
interTopLeftValNext[blockVer] = interTopVal[blockHor]; |
interTopVal <= interTopValNext; |
interLeftVal <= interLeftValNext; |
interTopLeftVal <= interTopLeftValNext; |
if(blockVer == 3) |
interOutBlockMvfifo.enq(BlockMv {refIdx:refIndex,mvhor:mvhorfinal,mvver:mvverfinal,nonZeroTransCoeff:0}); |
if(interSubMbPartNum == 3)//next step |
begin |
interSubMbPartNum <= 0; |
if(interMbPartNum == 3) |
begin |
interMbPartNum <= 0; |
interStepCount <= 0; |
interNewestMvNext = 16; |
end |
else |
interMbPartNum <= interMbPartNum+1; |
end |
else |
interSubMbPartNum <= interSubMbPartNum+1; |
if(interNewestMvNext > 0) |
interNewestMv <= interNewestMvNext; |
endrule |
|
|
rule interIPProcessStep ( interIPStepCount>0 && currMbHor<zeroExtend(picWidth) && interNewestMv>zeroExtend({interIPMbPartNum,interIPSubMbPartNum}) ); |
Bit#(PicAreaSz) currMbHorTemp = currMbHor+zeroExtend(interCurrMbDiff)-1; |
Bit#(PicHeightSz) currMbVerTemp = currMbVer; |
if( currMbHorTemp >= zeroExtend(picWidth) ) |
begin |
currMbHorTemp = currMbHorTemp-zeroExtend(picWidth); |
currMbVerTemp = currMbVerTemp+1; |
end |
Bit#(2) blockHor = {interIPMbPartNum[0],interIPSubMbPartNum[0]}; |
Bit#(2) blockVer = {interIPMbPartNum[1],interIPSubMbPartNum[1]}; |
Bit#(3) numPart = 1; |
Bit#(3) numSubPart = 1; |
Bit#(2) subMbType = 0; |
if(interstate==InterPskip || interstate==InterP16x16) |
numPart = 1; |
else if(interstate==InterP16x8) |
numPart = 2; |
else if(interstate==InterP8x16) |
numPart = 2; |
else if(interstate==InterP8x8 || interstate==InterP8x8ref0) |
begin |
numPart = 4; |
subMbType = interSubMbTypeVector[interIPMbPartNum]; |
numSubPart = numSubMbPart(subMbType); |
end |
else |
$display( "ERROR Prediction: interIPProcessStep unexpected interstate"); |
Bit#(4) refIndex = ((interstate==InterPskip||interstate==InterP8x8ref0) ? 0 : interRefIdxVector[interIPMbPartNum]); |
Bit#(PicWidthSz) currMbHorT = truncate(currMbHorTemp); |
Bit#(TAdd#(PicWidthSz,2)) horTemp = {currMbHorT,blockHor}; |
Bit#(TAdd#(PicHeightSz,4)) verTemp = {currMbVerTemp,blockVer,2'b00}; |
IPBlockType btTemp = IP16x16; |
if(interstate==InterPskip || interstate==InterP16x16) |
btTemp = IP16x16; |
else if(interstate==InterP16x8) |
btTemp = IP16x8; |
else if(interstate==InterP8x16) |
btTemp = IP8x16; |
else |
begin |
case(subMbType) |
0: btTemp = IP8x8; |
1: btTemp = IP8x4; |
2: btTemp = IP4x8; |
3: btTemp = IP4x4; |
endcase |
end |
Bit#(14) mvhorTemp = tpl_1(interMvFile.sub({interIPMbPartNum,interIPSubMbPartNum})); |
Bit#(12) mvverTemp = tpl_2(interMvFile.sub({interIPMbPartNum,interIPSubMbPartNum})); |
if(interIPStepCount == 1) |
begin |
if(!(interstate==InterP8x8 || interstate==InterP8x8ref0)) |
begin |
numPart = 4; |
Bit#(2) interIPMbPartNumTemp = interIPMbPartNum; |
if(btTemp==IP16x16) |
interIPMbPartNumTemp = 0; |
else if(btTemp==IP16x8 && interIPMbPartNumTemp[0]==1) |
interIPMbPartNumTemp = interIPMbPartNumTemp-1; |
else if(btTemp==IP8x16 && interIPMbPartNumTemp[1]==1) |
interIPMbPartNumTemp = interIPMbPartNumTemp-2; |
refIndex = ((interstate==InterPskip||interstate==InterP8x8ref0) ? 0 : interRefIdxVector[interIPMbPartNumTemp]); |
btTemp = IP8x8; |
mvhorTemp = tpl_1(interMvFile.sub({interIPMbPartNumTemp,2'b00})); |
mvverTemp = tpl_2(interMvFile.sub({interIPMbPartNumTemp,2'b00})); |
interpolator.request(IPLuma {refIdx:refIndex,hor:horTemp,ver:verTemp,mvhor:mvhorTemp,mvver:mvverTemp,bt:btTemp}); |
end |
else |
interpolator.request(IPLuma {refIdx:refIndex,hor:horTemp,ver:verTemp,mvhor:mvhorTemp,mvver:mvverTemp,bt:btTemp}); |
end |
else |
interpolator.request(IPChroma {refIdx:refIndex,uv:interIPStepCount[0],hor:horTemp,ver:truncate(verTemp>>1),mvhor:mvhorTemp,mvver:mvverTemp,bt:btTemp}); |
if(interIPSubMbPartNum >= truncate(numSubPart-1)) |
begin |
interIPSubMbPartNum <= 0; |
if(interIPMbPartNum >= truncate(numPart-1)) |
begin |
interIPMbPartNum <= 0; |
interIPStepCount <= interIPStepCount+1; |
end |
else |
begin |
if(btTemp == IP16x8) |
interIPMbPartNum <= 2; |
else |
interIPMbPartNum <= interIPMbPartNum+1; |
end |
end |
else |
begin |
if(subMbType == 1) |
interIPSubMbPartNum <= 2; |
else |
interIPSubMbPartNum <= interIPSubMbPartNum+1; |
end |
$display( "Trace Prediction: interIPProcessStep %h %h %h %h %h %h %h %h %h %h", interstate, interIPStepCount, interIPMbPartNum, interIPSubMbPartNum, refIndex, horTemp, verTemp, mvhorTemp, mvverTemp, pack(btTemp)); |
endrule |
|
|
rule interDone ( interstate!=Start && interReqCount==0 && interRespCount==0 && interStepCount==0 && interIPStepCount==0 ); |
interstate <= Start; |
//$display( "Trace Prediction: interOutputTransfer %h %h", interstate, interOutputCount); |
endrule |
|
|
rule interOutputTransfer ( True ); |
predictedfifo.enq(interpolator.first()); |
interpolator.deq(); |
//$display( "Trace Prediction: interOutputTransfer %h %h", interstate, interOutputCount); |
endrule |
|
|
|
// intra prediction rules |
|
rule intraSendReq ( intraReqCount>0 && currMbHor<zeroExtend(picWidth) && !nextoutputfifo.notEmpty() ); |
Bit#(PicWidthSz) temp2 = truncate(currMbHor); |
Bit#(TAdd#(PicWidthSz,2)) temp = 0; |
Bit#(1) noMoreReq = 0; |
if( currMb-firstMb < zeroExtend(picWidth) ) |
noMoreReq = 1; |
else |
begin |
if(intraReqCount<5) |
begin |
Bit#(2) temp3 = truncate(intraReqCount-1); |
temp = {temp2,temp3}; |
end |
else if(intraReqCount==5) |
begin |
if((currMbHor+1)<zeroExtend(picWidth) && intrastate==Intra4x4) |
temp = {(temp2+1),2'b00}; |
else if(currMbHor>0 && currMb-firstMb>zeroExtend(picWidth)) |
temp = {(temp2-1),2'b11}; |
else |
noMoreReq = 1; |
end |
else if(intraReqCount==6) |
begin |
if((currMbHor+1)<zeroExtend(picWidth) && intrastate==Intra4x4 && currMbHor>0 && currMb-firstMb>zeroExtend(picWidth)) |
temp = {(temp2-1),2'b11}; |
else |
noMoreReq = 1; |
end |
else |
noMoreReq = 1; |
end |
if(noMoreReq == 0) |
begin |
intraMemReqQ.enq(LoadReq temp); |
intraReqCount <= intraReqCount+1; |
//$display( "TRACE Prediction: intraSendReq addr %0d",temp);/////////////////////// |
end |
else |
intraReqCount <= 0; |
$display( "Trace Prediction: intraSendReq"); |
endrule |
|
|
rule intraReceiveNoResp ( intraRespCount>0 && currMbHor<zeroExtend(picWidth) && currMb-firstMb<zeroExtend(picWidth) ); |
intra4x4typeTop <= replicate(15); |
intraRespCount <= 0; |
intraStepCount <= 1; |
blockNum <= 0; |
pixelNum <= 0; |
interOutBlockMvfifo.enq(NotInter 1); |
$display( "Trace Prediction: intraReceiveNoResp"); |
endrule |
|
|
rule intraReceiveResp ( intraRespCount>0 && intraRespCount<7 && currMbHor<zeroExtend(picWidth) &&& intraMemRespQ.first() matches tagged LoadResp .data); |
Bit#(1) noMoreResp = 0; |
Bit#(2) temp2bit = 0; |
if(intraRespCount<5) |
begin |
temp2bit = truncate(intraRespCount-1); |
intra4x4typeTop <= update(intra4x4typeTop, temp2bit, data[67:64]); |
if(intraRespCount==4) |
begin |
Vector#(5,Bit#(32)) intraTopValTemp = intraTopVal; |
intraTopValTemp[3] = data[31:0]; |
intraTopValTemp[4] = {data[31:24],data[31:24],data[31:24],data[31:24]}; |
intraTopVal <= intraTopValTemp; |
if(!((currMbHor+1)<zeroExtend(picWidth) && intrastate==Intra4x4) && !(currMbHor>0 && currMb-firstMb>zeroExtend(picWidth))) |
noMoreResp = 1; |
end |
else |
intraTopVal <= update(intraTopVal, intraRespCount-1, data[31:0]); |
intraTopValChroma0 <= update(intraTopValChroma0, temp2bit, data[47:32]); |
intraTopValChroma1 <= update(intraTopValChroma1, temp2bit, data[63:48]); |
end |
else if(intraRespCount==5) |
begin |
if((currMbHor+1)<zeroExtend(picWidth) && intrastate==Intra4x4) |
begin |
if(!(data[67:64]==15 || (data[67:64]==14 && ppsconstrained_intra_pred_flag==1))) |
intraTopVal <= update(intraTopVal, 4, data[31:0]); |
if(!(currMbHor>0 && currMb-firstMb>zeroExtend(picWidth))) |
noMoreResp = 1; |
end |
else |
begin |
Bit#(40) temp2 = intraLeftVal[0]; |
intraLeftVal <= update(intraLeftVal, 0, {temp2[39:8],data[31:24]}); |
intraLeftValChroma0 <= update(intraLeftValChroma0, 0, data[47:40]); |
intraLeftValChroma1 <= update(intraLeftValChroma1, 0, data[63:56]); |
noMoreResp = 1; |
end |
end |
else |
begin |
Bit#(40) temp2 = intraLeftVal[0]; |
intraLeftVal <= update(intraLeftVal, 0, {temp2[39:8],data[31:24]}); |
intraLeftValChroma0 <= update(intraLeftValChroma0, 0, data[47:40]); |
intraLeftValChroma1 <= update(intraLeftValChroma1, 0, data[63:56]); |
noMoreResp = 1; |
end |
intraMemRespQ.deq(); |
//$display( "TRACE Prediction: intraReceiveResp data %h",data);/////////////////////// |
if(noMoreResp == 0) |
intraRespCount <= intraRespCount+1; |
else |
begin |
intraRespCount <= 0; |
intraStepCount <= 1; |
blockNum <= 0; |
pixelNum <= 0; |
interOutBlockMvfifo.enq(NotInter 1); |
end |
$display( "Trace Prediction: intraReceiveResp"); |
endrule |
|
|
rule intraPredTypeStep ( intraStepCount==1 && !nextoutputfifo.notEmpty()); |
Bit#(2) blockHor = {blockNum[2],blockNum[0]}; |
Bit#(2) blockVer = {blockNum[3],blockNum[1]}; |
Bit#(4) topType = select(intra4x4typeTop, blockHor); |
Bit#(4) leftType; |
if(currMbHor!=0 || blockNum!=0) |
leftType = select(intra4x4typeLeft, blockVer); |
else |
begin |
leftType = 15; |
intra4x4typeLeft <= replicate(15); |
end |
if(intrastate!=Intra4x4) |
begin |
intraStepCount <= intraStepCount+1; |
nextoutputfifo.enq(NonSkipMB); |
end |
else |
begin |
Bit#(1) topAvailable; |
Bit#(1) leftAvailable; |
if(topType==15 || (topType==14 && ppsconstrained_intra_pred_flag==1)) |
topAvailable = 0; |
else |
topAvailable = 1; |
if(leftType==15 || (leftType==14 && ppsconstrained_intra_pred_flag==1)) |
leftAvailable = 0; |
else |
leftAvailable = 1; |
Bit#(4) predType = 0; |
Bit#(4) remType = rem_intra4x4_pred_mode.first(); |
Bit#(4) curType = 0; |
rem_intra4x4_pred_mode.deq(); |
if(topAvailable==0 || leftAvailable==0) |
predType = 2; |
else |
begin |
Bit#(4) topType2 = topType; |
Bit#(4) leftType2 = leftType; |
if(topType>8) |
topType2 = 2; |
if(leftType>8) |
leftType2 = 2; |
if(topType2 > leftType2) |
predType = leftType2; |
else |
predType = topType2; |
end |
if(remType[3] == 1) |
curType = predType; |
else if(remType < predType) |
curType = remType; |
else |
curType = remType+1; |
cur_intra4x4_pred_mode <= curType; |
intraStepCount <= intraStepCount+1; |
if(blockNum == 15) |
nextoutputfifo.enq(Intra4x4PlusChroma); |
else |
nextoutputfifo.enq(Intra4x4); |
$display( "TRACE Prediction: intraPredTypeStep currMbHor currMbVer blockNum topType leftType predType remType curType %0d %0d %0d %0d %0d %0d %0d %0d",currMbHor,currMbVer,blockNum,topType,leftType,predType,remType,curType);////////////////// |
end |
//$display( "Trace Prediction: intraPredTypeStep"); |
endrule |
|
|
rule intraProcessStep ( intraStepCount>1 ); |
$display( "TRACE Prediction: intraProcessStep %0d %0d", blockNum, pixelNum);//////////////////// |
//$display( "TRACE Prediction: intraProcessStep intraTopVal %h %h %h %h %h",intraTopVal[4],intraTopVal[3],intraTopVal[2],intraTopVal[1],intraTopVal[0]);///////////////// |
Bit#(1) outFlag = 0; |
Bit#(4) nextIntraStepCount = intraStepCount+1; |
Bit#(2) blockHor = {blockNum[2],blockNum[0]}; |
Bit#(2) blockVer = {blockNum[3],blockNum[1]}; |
Bit#(2) pixelVer = {pixelNum[3],pixelNum[2]}; |
Vector#(4,Bit#(8)) predVector = replicate(0); |
|
Bit#(4) topType = select(intra4x4typeTop, blockHor); |
Bit#(4) leftType = select(intra4x4typeLeft, blockVer); |
Bit#(1) topAvailable; |
Bit#(1) leftAvailable; |
if(topType==15 || (topType==14 && ppsconstrained_intra_pred_flag==1)) |
topAvailable = 0; |
else |
topAvailable = 1; |
if(leftType==15 || (leftType==14 && ppsconstrained_intra_pred_flag==1)) |
leftAvailable = 0; |
else |
leftAvailable = 1; |
if(blockNum==0 && pixelNum==0 && intraChromaFlag==0) |
begin |
intraChromaTopAvailable <= topAvailable; |
intraChromaLeftAvailable <= leftAvailable; |
end |
if(intrastate==Intra4x4 && intraChromaFlag==0) |
begin |
if(intraStepCount==2) |
begin |
outFlag = 1; |
Bit#(40) leftValSet = select(intraLeftVal,blockVer); |
Bit#(32) topMidValSet = select(intraTopVal,zeroExtend(blockHor)); |
Bit#(32) topRightValSet = select(intraTopVal,{1'b0,blockHor}+1); |
Bit#(72) topValSet; |
if((blockNum[3:2]==3 && blockNum[0]==1) || blockNum[1:0]==3) |
topValSet = {topMidValSet[31:24],topMidValSet[31:24],topMidValSet[31:24],topMidValSet[31:24],topMidValSet,leftValSet[7:0]}; |
else |
topValSet = {topRightValSet,topMidValSet,leftValSet[7:0]}; |
$display( "TRACE Prediction: intraProcessStep intra4x4 %0d %0d %h %h", cur_intra4x4_pred_mode, blockNum, leftValSet, topValSet);//////////////////// |
Bit#(4) topSelect1 = 0; |
Bit#(4) topSelect2 = 0; |
Bit#(4) topSelect3 = 0; |
Bit#(3) leftSelect1 = 0; |
Bit#(3) leftSelect2 = 0; |
Bit#(3) leftSelect3 = 0; |
Bit#(10) tempVal1 = 0; |
Bit#(10) tempVal2 = 0; |
Bit#(10) tempVal3 = 0; |
case(cur_intra4x4_pred_mode) |
0://vertical |
begin |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
begin |
topSelect1 = fromInteger(pixelHor); |
Bit#(8) topVal = intra4x4SelectTop(topValSet,topSelect1); |
predVector[pixelHor] = topVal; |
end |
end |
1://horizontal |
begin |
leftSelect1 = zeroExtend(pixelVer); |
Bit#(8) leftVal = intra4x4SelectLeft(leftValSet,leftSelect1); |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
predVector[pixelHor] = leftVal; |
end |
2://dc |
begin |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
begin |
Bit#(10) tempTopSum = zeroExtend(topValSet[15:8])+zeroExtend(topValSet[23:16])+zeroExtend(topValSet[31:24])+zeroExtend(topValSet[39:32]) + 2; |
Bit#(10) tempLeftSum = zeroExtend(leftValSet[15:8])+zeroExtend(leftValSet[23:16])+zeroExtend(leftValSet[31:24])+zeroExtend(leftValSet[39:32]) + 2; |
Bit#(11) tempTotalSum = zeroExtend(tempTopSum)+zeroExtend(tempLeftSum); |
Bit#(8) topSum = tempTopSum[9:2]; |
Bit#(8) leftSum = tempLeftSum[9:2]; |
Bit#(8) totalSum = tempTotalSum[10:3]; |
if(topAvailable==1 && leftAvailable==1) |
predVector[pixelHor] = totalSum; |
else if(topAvailable==1) |
predVector[pixelHor] = topSum; |
else if(leftAvailable==1) |
predVector[pixelHor] = leftSum; |
else |
predVector[pixelHor] = 8'b10000000; |
end |
end |
3://diagonal down left |
begin |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
begin |
Bit#(4) selectNum = fromInteger(pixelHor)+zeroExtend(pixelVer); |
if(pixelHor==3 && pixelVer==3) |
begin |
topSelect1 = 6; |
topSelect2 = 7; |
topSelect3 = 7; |
end |
else |
begin |
topSelect1 = selectNum; |
topSelect2 = selectNum+1; |
topSelect3 = selectNum+2; |
end |
tempVal1 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectTop(topValSet,topSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect3)); |
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2; |
predVector[pixelHor] = predVal[9:2]; |
end |
end |
4://diagonal down right |
begin |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
begin |
if(fromInteger(pixelHor) > pixelVer) |
begin |
topSelect3 = fromInteger(pixelHor)-zeroExtend(pixelVer); |
topSelect2 = topSelect3-1; |
topSelect1 = topSelect3-2; |
tempVal1 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectTop(topValSet,topSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect3)); |
end |
else if(fromInteger(pixelHor) < pixelVer) |
begin |
leftSelect3 = zeroExtend(pixelVer)-fromInteger(pixelHor); |
leftSelect2 = leftSelect3-1; |
leftSelect1 = leftSelect3-2; |
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect3)); |
end |
else |
begin |
leftSelect1 = 0; |
leftSelect2 = -1; |
topSelect1 = 0; |
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1)); |
end |
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2; |
predVector[pixelHor] = predVal[9:2]; |
end |
end |
5://vertical right |
begin |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
begin |
Bit#(4) tempPixelHor = fromInteger(pixelHor); |
Bit#(4) zVR = (tempPixelHor<<1)-zeroExtend(pixelVer); |
if(zVR<=6 && zVR>=0) |
begin |
topSelect3 = fromInteger(pixelHor)-zeroExtend(pixelVer>>1); |
topSelect2 = topSelect3-1; |
if(zVR==1 || zVR==3 || zVR==5) |
topSelect1 = topSelect3-2; |
else |
topSelect1 = topSelect3; |
tempVal1 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectTop(topValSet,topSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect3)); |
end |
else if(zVR==-1) |
begin |
leftSelect1 = 0; |
leftSelect2 = -1; |
topSelect1 = 0; |
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1)); |
end |
else |
begin |
leftSelect1 = zeroExtend(pixelVer)-1; |
leftSelect2 = leftSelect1-1; |
leftSelect3 = leftSelect1-2; |
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect3)); |
end |
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2; |
predVector[pixelHor] = predVal[9:2]; |
end |
end |
6://horizontal down |
begin |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
begin |
Bit#(4) tempPixelVer = zeroExtend(pixelVer); |
Bit#(4) zHD = (tempPixelVer<<1)-fromInteger(pixelHor); |
if(zHD<=6 && zHD>=0) |
begin |
leftSelect3 = zeroExtend(pixelVer)-fromInteger(pixelHor/2); |
leftSelect2 = leftSelect3-1; |
if(zHD==1 || zHD==3 || zHD==5) |
leftSelect1 = leftSelect3-2; |
else |
leftSelect1 = leftSelect3; |
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect3)); |
end |
else if(zHD==-1) |
begin |
leftSelect1 = 0; |
leftSelect2 = -1; |
topSelect1 = 0; |
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1)); |
end |
else |
begin |
topSelect1 = fromInteger(pixelHor)-1; |
topSelect2 = topSelect1-1; |
topSelect3 = topSelect1-2; |
tempVal1 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectTop(topValSet,topSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect3)); |
end |
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2; |
predVector[pixelHor] = predVal[9:2]; |
end |
end |
7://vertical left |
begin |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
begin |
topSelect1 = fromInteger(pixelHor)+zeroExtend(pixelVer>>1); |
topSelect2 = topSelect1+1; |
if(pixelVer==1 || pixelVer==3) |
topSelect3 = topSelect1+2; |
else |
topSelect3 = topSelect1; |
tempVal1 = zeroExtend(intra4x4SelectTop(topValSet,topSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectTop(topValSet,topSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectTop(topValSet,topSelect3)); |
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2; |
predVector[pixelHor] = predVal[9:2]; |
end |
end |
8://horizontal up |
begin |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
begin |
Bit#(4) tempPixelVer = zeroExtend(pixelVer); |
Bit#(4) zHU = (tempPixelVer<<1)+fromInteger(pixelHor); |
if(zHU<=4) |
begin |
leftSelect1 = zeroExtend(pixelVer)+fromInteger(pixelHor/2); |
leftSelect2 = leftSelect1+1; |
if(zHU==1 || zHU==3) |
leftSelect3 = leftSelect1+2; |
else |
leftSelect3 = leftSelect1; |
end |
else |
begin |
if(zHU==5) |
leftSelect1 = 2; |
else |
leftSelect1 = 3; |
leftSelect2 = 3; |
leftSelect3 = 3; |
end |
tempVal1 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect1)); |
tempVal2 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect2)); |
tempVal3 = zeroExtend(intra4x4SelectLeft(leftValSet,leftSelect3)); |
Bit#(10) predVal = tempVal1 + (tempVal2<<1) + tempVal3 + 2; |
predVector[pixelHor] = predVal[9:2]; |
end |
end |
default: $display( "ERROR Prediction: intraProcessStep intra4x4 unknown cur_intra4x4_pred_mode"); |
endcase |
end |
else |
$display( "ERROR Prediction: intraProcessStep intra4x4 unknown intraStepCount"); |
end |
else if(intrastate==Intra16x16 && intraChromaFlag==0) |
begin |
//$display( "TRACE Prediction: intraProcessStep intra16x16 %0d %0d %0d %h", intra16x16_pred_mode, currMb, blockNum, select(intraTopVal,blockHor));///////////////// |
case(intra16x16_pred_mode) |
0://vertical |
begin |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
begin |
Bit#(32) topValSet = select(intraTopVal,blockHor); |
Bit#(8) topVal = select32to8(topValSet,fromInteger(pixelHor)); |
predVector[pixelHor] = topVal; |
end |
outFlag = 1; |
end |
1://horizontal |
begin |
Bit#(40) leftValSet = select(intraLeftVal,blockVer); |
Bit#(8) leftVal = intra4x4SelectLeft(leftValSet,zeroExtend(pixelVer)); |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
predVector[pixelHor] = leftVal; |
outFlag = 1; |
end |
2://dc |
begin |
case(intraStepCount) |
2: |
begin |
if(topAvailable == 1) |
begin |
Bit#(32) topValSet = select(intraTopVal,0); |
intraSumA <= zeroExtend(topValSet[7:0])+zeroExtend(topValSet[15:8])+zeroExtend(topValSet[23:16])+zeroExtend(topValSet[31:24]); |
end |
else |
begin |
intraSumA <= 0; |
nextIntraStepCount = 6; |
end |
end |
3: |
begin |
Bit#(32) topValSet = select(intraTopVal,1); |
intraSumA <= intraSumA+zeroExtend(topValSet[7:0])+zeroExtend(topValSet[15:8])+zeroExtend(topValSet[23:16])+zeroExtend(topValSet[31:24]); |
end |
4: |
begin |
Bit#(32) topValSet = select(intraTopVal,2); |
intraSumA <= intraSumA+zeroExtend(topValSet[7:0])+zeroExtend(topValSet[15:8])+zeroExtend(topValSet[23:16])+zeroExtend(topValSet[31:24]); |
end |
5: |
begin |
Bit#(32) topValSet = select(intraTopVal,3); |
intraSumA <= intraSumA+zeroExtend(topValSet[7:0])+zeroExtend(topValSet[15:8])+zeroExtend(topValSet[23:16])+zeroExtend(topValSet[31:24])+8; |
end |
6: |
begin |
if(leftAvailable == 1) |
begin |
Bit#(40) leftValSet = select(intraLeftVal,0); |
intraSumA <= intraSumA+zeroExtend(leftValSet[15:8])+zeroExtend(leftValSet[23:16])+zeroExtend(leftValSet[31:24])+zeroExtend(leftValSet[39:32]); |
end |
else |
nextIntraStepCount = 10; |
end |
7: |
begin |
Bit#(40) leftValSet = select(intraLeftVal,1); |
intraSumA <= intraSumA+zeroExtend(leftValSet[15:8])+zeroExtend(leftValSet[23:16])+zeroExtend(leftValSet[31:24])+zeroExtend(leftValSet[39:32]); |
end |
8: |
begin |
Bit#(40) leftValSet = select(intraLeftVal,2); |
intraSumA <= intraSumA+zeroExtend(leftValSet[15:8])+zeroExtend(leftValSet[23:16])+zeroExtend(leftValSet[31:24])+zeroExtend(leftValSet[39:32]); |
end |
9: |
begin |
Bit#(40) leftValSet = select(intraLeftVal,3); |
intraSumA <= intraSumA+zeroExtend(leftValSet[15:8])+zeroExtend(leftValSet[23:16])+zeroExtend(leftValSet[31:24])+zeroExtend(leftValSet[39:32])+8; |
end |
10: |
begin |
if(leftAvailable == 1 && topAvailable == 1) |
intraSumA <= intraSumA >> 5; |
else if(leftAvailable == 1 || topAvailable == 1) |
intraSumA <= intraSumA >> 4; |
else |
intraSumA <= 128; |
end |
11: |
begin |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
predVector[pixelHor] = intraSumA[7:0]; |
outFlag = 1; |
end |
default: $display( "ERROR Prediction: intraProcessStep intra16x16 DC unknown intraStepCount"); |
endcase |
end |
3://plane |
begin |
if(intraStepCount == 2) |
begin |
Bit#(32) topValSet = select(intraTopVal,3); |
Bit#(8) topVal = select32to8(topValSet,3); |
Bit#(40) leftValSet = select(intraLeftVal,3); |
Bit#(8) leftVal = intra4x4SelectLeft(leftValSet,3); |
Bit#(13) tempVal = zeroExtend(topVal) + zeroExtend(leftVal); |
intraSumA <= tempVal << 4; |
intraSumB <= 0; |
intraSumC <= 0; |
end |
else if(intraStepCount < 11) |
begin |
Bit#(4) xyPlusOne = intraStepCount-2; |
Bit#(4) xyPlusEight = intraStepCount+5; |
Bit#(4) sixMinusXY = 9-intraStepCount; |
Bit#(32) topValSet1 = select(intraTopVal,xyPlusEight[3:2]); |
Bit#(8) topVal1 = select32to8(topValSet1,xyPlusEight[1:0]); |
Bit#(40) leftValSet1 = select(intraLeftVal,xyPlusEight[3:2]); |
Bit#(8) leftVal1 = intra4x4SelectLeft(leftValSet1,zeroExtend(xyPlusEight[1:0])); |
Bit#(32) topValSet2=0; |
Bit#(8) topVal2; |
Bit#(40) leftValSet2; |
Bit#(8) leftVal2; |
if(intraStepCount==10) |
begin |
leftValSet2 = select(intraLeftVal,0); |
leftVal2 = intra4x4SelectLeft(leftValSet2,-1); |
topVal2 = leftVal2; |
end |
else |
begin |
topValSet2 = select(intraTopVal,sixMinusXY[3:2]); |
topVal2 = select32to8(topValSet2,sixMinusXY[1:0]); |
leftValSet2 = select(intraLeftVal,sixMinusXY[3:2]); |
leftVal2 = intra4x4SelectLeft(leftValSet2,zeroExtend(sixMinusXY[1:0])); |
end |
Bit#(15) diffH = zeroExtend(topVal1) - zeroExtend(topVal2); |
Bit#(15) diffV = zeroExtend(leftVal1) - zeroExtend(leftVal2); |
intraSumB <= intraSumB + (zeroExtend(xyPlusOne) * diffH); |
intraSumC <= intraSumC + (zeroExtend(xyPlusOne) * diffV); |
end |
else if(intraStepCount == 11) |
begin |
Bit#(18) tempSumB = (5*signExtend(intraSumB)) + 32; |
Bit#(18) tempSumC = (5*signExtend(intraSumC)) + 32; |
intraSumB <= signExtend(tempSumB[17:6]); |
intraSumC <= signExtend(tempSumC[17:6]); |
end |
else if(intraStepCount == 12) |
begin |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
begin |
Bit#(5) positionHor = {1'b0,blockHor,fromInteger(pixelHor)}; |
Bit#(5) positionVer = {1'b0,blockVer,pixelVer}; |
Bit#(16) tempProductB = signExtend(intraSumB) * signExtend(positionHor-7); |
Bit#(16) tempProductC = signExtend(intraSumC) * signExtend(positionVer-7); |
Bit#(16) tempTotal = tempProductB + tempProductC + zeroExtend(intraSumA) + 16; |
if(tempTotal[15]==1) |
predVector[pixelHor] = 0; |
else if(tempTotal[14:5] > 255) |
predVector[pixelHor] = 255; |
else |
predVector[pixelHor] = tempTotal[12:5]; |
end |
outFlag = 1; |
end |
else |
$display( "ERROR Prediction: intraProcessStep intra16x16 plane unknown intraStepCount"); |
end |
endcase |
end |
else if(intraChromaFlag==1) |
begin |
//$display( "TRACE Prediction: intraProcessStep intraChroma %0d %0d %0d %0d %0d %0d %h %h %h %h %h %h %h %h",intra_chroma_pred_mode.first(),intraChromaTopAvailable,intraChromaLeftAvailable,currMb,blockNum,pixelNum,pack(intraLeftValChroma0),pack(intraTopValChroma0),pack(intraLeftValChroma1),pack(intraTopValChroma1),intraLeftValChroma0[0],intraTopValChroma0[3][15:8],intraLeftValChroma1[0],intraTopValChroma1[3][15:8]);/////////////////// |
Vector#(9,Bit#(8)) tempLeftVec; |
Vector#(4,Bit#(16)) tempTopVec; |
if(blockNum[2] == 0) |
begin |
tempLeftVec = intraLeftValChroma0; |
tempTopVec = intraTopValChroma0; |
end |
else |
begin |
tempLeftVec = intraLeftValChroma1; |
tempTopVec = intraTopValChroma1; |
end |
case(intra_chroma_pred_mode.first()) |
0://dc |
begin |
if(intraStepCount == 2) |
begin |
Bit#(1) useTop=0; |
Bit#(1) useLeft=0; |
if(blockNum[1:0] == 0 || blockNum[1:0] == 3) |
begin |
useTop = intraChromaTopAvailable; |
useLeft = intraChromaLeftAvailable; |
end |
else if(blockNum[1:0] == 1) |
begin |
if(intraChromaTopAvailable == 1) |
useTop = 1; |
else if(intraChromaLeftAvailable == 1) |
useLeft = 1; |
end |
else if(blockNum[1:0] == 2) |
begin |
if(intraChromaLeftAvailable == 1) |
useLeft = 1; |
else if(intraChromaTopAvailable == 1) |
useTop = 1; |
end |
else |
$display( "ERROR Prediction: intraProcessStep intraChroma dc unknown blockNum"); |
Bit#(10) topSum; |
Bit#(10) leftSum; |
Bit#(11) totalSum; |
if(blockHor[0] == 0) |
topSum = zeroExtend(tempTopVec[0][15:8])+zeroExtend(tempTopVec[0][7:0])+zeroExtend(tempTopVec[1][15:8])+zeroExtend(tempTopVec[1][7:0])+2; |
else |
topSum = zeroExtend(tempTopVec[2][15:8])+zeroExtend(tempTopVec[2][7:0])+zeroExtend(tempTopVec[3][15:8])+zeroExtend(tempTopVec[3][7:0])+2; |
if(blockVer[0] == 0) |
leftSum = zeroExtend(tempLeftVec[1])+zeroExtend(tempLeftVec[2])+zeroExtend(tempLeftVec[3])+zeroExtend(tempLeftVec[4])+2; |
else |
leftSum = zeroExtend(tempLeftVec[5])+zeroExtend(tempLeftVec[6])+zeroExtend(tempLeftVec[7])+zeroExtend(tempLeftVec[8])+2; |
totalSum = zeroExtend(topSum) + zeroExtend(leftSum); |
if(useTop==1 && useLeft==1) |
intraSumA <= zeroExtend(totalSum[10:3]); |
else if(useTop==1) |
intraSumA <= zeroExtend(topSum[9:2]); |
else if(useLeft==1) |
intraSumA <= zeroExtend(leftSum[9:2]); |
else |
intraSumA <= zeroExtend(8'b10000000); |
end |
else if(intraStepCount == 3) |
begin |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
predVector[pixelHor] = intraSumA[7:0]; |
outFlag = 1; |
end |
else |
$display( "ERROR Prediction: intraProcessStep intraChroma dc unknown intraStepCount"); |
end |
1://horizontal |
begin |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
begin |
Bit#(4) tempLeftIdx = {1'b0,blockVer[0],pixelVer} + 1; |
predVector[pixelHor] = select(tempLeftVec,tempLeftIdx); |
end |
outFlag = 1; |
end |
2://vertical |
begin |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
begin |
Bit#(2) pixelHorTemp = fromInteger(pixelHor); |
Bit#(16) tempTopVal = select(tempTopVec,{blockHor[0],pixelHorTemp[1]}); |
if(pixelHorTemp[0] == 0) |
predVector[pixelHor] = tempTopVal[7:0]; |
else |
predVector[pixelHor] = tempTopVal[15:8]; |
end |
outFlag = 1; |
end |
3://plane |
begin |
if(intraStepCount == 2) |
begin |
Bit#(16) topValSet = tempTopVec[3]; |
Bit#(8) topVal = topValSet[15:8]; |
Bit#(8) leftVal = tempLeftVec[8]; |
Bit#(13) tempVal = zeroExtend(topVal) + zeroExtend(leftVal); |
intraSumA <= tempVal << 4; |
intraSumB <= 0; |
intraSumC <= 0; |
end |
else if(intraStepCount < 7) |
begin |
Bit#(3) xyPlusOne = truncate(intraStepCount)-2; |
Bit#(3) xyPlusFour = truncate(intraStepCount)+1; |
Bit#(4) twoMinusXY = 5-intraStepCount; |
Bit#(16) topValSet1 = select(tempTopVec,xyPlusFour[2:1]); |
Bit#(8) topVal1 = select16to8(topValSet1,xyPlusFour[0]); |
Bit#(4) tempLeftIdx1 = {1'b0,xyPlusFour} + 1; |
Bit#(8) leftVal1 = select(tempLeftVec,tempLeftIdx1); |
|
Bit#(16) topValSet2 = select(tempTopVec,twoMinusXY[2:1]); |
Bit#(8) topVal2; |
Bit#(8) leftVal2 = select(tempLeftVec,twoMinusXY+1); |
if(intraStepCount==6) |
topVal2 = leftVal2; |
else |
topVal2 = select16to8(topValSet2,twoMinusXY[0]); |
Bit#(15) diffH = zeroExtend(topVal1) - zeroExtend(topVal2); |
Bit#(15) diffV = zeroExtend(leftVal1) - zeroExtend(leftVal2); |
intraSumB <= intraSumB + (zeroExtend(xyPlusOne) * diffH); |
intraSumC <= intraSumC + (zeroExtend(xyPlusOne) * diffV); |
Int#(15) tempDisplayH = unpack(zeroExtend(xyPlusOne) * diffH); |
Int#(15) tempDisplayV = unpack(zeroExtend(xyPlusOne) * diffV); |
//$display( "TRACE Prediction: intraProcessStep intraChroma plane partH partV %0d %0d",tempDisplayH,tempDisplayV);//////////////////// |
end |
else if(intraStepCount == 7) |
begin |
Int#(15) tempDisplayH = unpack(intraSumB); |
Int#(15) tempDisplayV = unpack(intraSumC); |
//$display( "TRACE Prediction: intraProcessStep intraChroma plane H V %0d %0d",tempDisplayH,tempDisplayV);//////////////////// |
Bit#(19) tempSumB = (34*signExtend(intraSumB)) + 32; |
Bit#(19) tempSumC = (34*signExtend(intraSumC)) + 32; |
intraSumB <= signExtend(tempSumB[18:6]); |
intraSumC <= signExtend(tempSumC[18:6]); |
end |
else if(intraStepCount == 8) |
begin |
for(Integer pixelHor=0; pixelHor<4; pixelHor=pixelHor+1) |
begin |
Bit#(4) positionHor = {1'b0,blockHor[0],fromInteger(pixelHor)}; |
Bit#(4) positionVer = {1'b0,blockVer[0],pixelVer}; |
Bit#(17) tempProductB = signExtend(intraSumB) * signExtend(positionHor-3); |
Bit#(17) tempProductC = signExtend(intraSumC) * signExtend(positionVer-3); |
Bit#(17) tempTotal = tempProductB + tempProductC + zeroExtend(intraSumA) + 16; |
if(tempTotal[16]==1) |
predVector[pixelHor] = 0; |
else if(tempTotal[15:5] > 255) |
predVector[pixelHor] = 255; |
else |
predVector[pixelHor] = tempTotal[12:5]; |
end |
outFlag = 1; |
end |
else |
$display( "ERROR Prediction: intraProcessStep intraChroma plane unknown intraStepCount"); |
end |
endcase |
end |
else |
$display( "ERROR Prediction: intraProcessStep unknown intrastate"); |
|
if(outFlag==1) |
begin |
predictedfifo.enq(predVector); |
pixelNum <= pixelNum+4; |
if(pixelNum == 12) |
begin |
if(intraChromaFlag==0) |
begin |
blockNum <= blockNum+1; |
if(blockNum == 15) |
begin |
intraChromaFlag <= 1; |
intraStepCount <= 2; |
end |
else if(intrastate==Intra4x4) |
intraStepCount <= 1; |
end |
else |
begin |
if(blockNum == 7) |
begin |
blockNum <= 0; |
intraChromaFlag <= 0; |
intraStepCount <= 0; |
intra_chroma_pred_mode.deq(); |
end |
else |
begin |
blockNum <= blockNum+1; |
if(intra_chroma_pred_mode.first()==0) |
intraStepCount <= 2; |
else if(blockNum==3) |
intraStepCount <= 2; |
end |
end |
end |
end |
else |
intraStepCount <= nextIntraStepCount; |
//$display( "Trace Prediction: intraProcessStep"); |
endrule |
|
|
|
interface Client mem_client_intra; |
interface Get request = fifoToGet(intraMemReqQ); |
interface Put response = fifoToPut(intraMemRespQ); |
endinterface |
interface Client mem_client_inter; |
interface Get request = fifoToGet(interMemReqQ); |
interface Put response = fifoToPut(interMemRespQ); |
endinterface |
interface Client mem_client_buffer = interpolator.mem_client; |
|
interface Put ioin = fifoToPut(infifo); |
interface Put ioin_InverseTrans = fifoToPut(infifo_ITB); |
interface Get ioout = fifoToGet(outfifo); |
|
|
endmodule |
|
endpackage |
/trunk/src/IInterpolator.bsv
0,0 → 1,26
//********************************************************************** |
// Interface for interpolator |
//---------------------------------------------------------------------- |
// |
// |
// |
|
package IInterpolator; |
|
import H264Types::*; |
import GetPut::*; |
import Vector::*; |
import ClientServer::*; |
|
interface Interpolator; |
method Action setPicWidth( Bit#(PicWidthSz) newPicWidth ); |
method Action setPicHeight( Bit#(PicHeightSz) newPicHeight ); |
method Action request( InterpolatorIT inputdata ); |
method Vector#(4,Bit#(8)) first(); |
method Action deq(); |
method Action endOfFrame(); |
interface Client#(InterpolatorLoadReq,InterpolatorLoadResp) mem_client; |
endinterface |
|
endpackage |
|
/trunk/src/ExpGolomb.bsv
0,0 → 1,197
//********************************************************************** |
// Exp-Golomb codes |
//---------------------------------------------------------------------- |
// |
// |
// |
|
package ExpGolomb; |
|
import H264Types::*; |
|
|
|
//----------------------------------------------------------- |
// Helper functions |
(* noinline *) |
function Bufcount expgolomb_numbits32( Buffer inbuffer );//number of bits consumed by exp-golomb code |
Bufcount tempout = 100; |
for(Integer ii=33; ii>0; ii=ii-1) |
begin |
if(inbuffer[buffersize-fromInteger(ii)]==1'b1) |
tempout = fromInteger(ii); |
end |
return tempout; |
endfunction |
|
(* noinline *) |
function Bit#(33) expgolomb_codenum32( Buffer inbuffer, Bufcount egnumbits );//exp-golomb codenum calculation |
Bit#(33) tempbuffer = inbuffer[buffersize-1:buffersize-33]; |
Bufcount shiftamount = 33-egnumbits; |
return (tempbuffer >> zeroExtend(shiftamount))-1; |
endfunction |
|
(* noinline *) |
function Bit#(32) expgolomb_unsigned32( Buffer inbuffer, Bufcount egnumbits );//unsigned exp-golomb code calculation |
Bit#(33) codenum = expgolomb_codenum32( inbuffer, egnumbits ); |
return truncate(codenum); |
endfunction |
|
(* noinline *) |
function Bit#(32) expgolomb_signed32( Buffer inbuffer, Bufcount egnumbits );//signed exp-golomb code calculation |
Bit#(33) codenum = expgolomb_codenum32( inbuffer, egnumbits ); |
Bit#(33) tempout = (codenum+1) >> 1; |
Bit#(33) tempout2 = (codenum[0]==1 ? tempout : (~tempout)+1 ); |
return truncate(tempout2); |
endfunction |
|
|
|
(* noinline *) |
function Bufcount expgolomb_numbits( Buffer inbuffer );//number of bits consumed by exp-golomb code |
Bufcount tempout = 100; |
for(Integer ii=17; ii>0; ii=ii-1) |
begin |
if(inbuffer[buffersize-fromInteger(ii)]==1'b1) |
tempout = (fromInteger(ii)*2)-1; |
end |
return tempout; |
endfunction |
|
(* noinline *) |
function Bit#(17) expgolomb_codenum( Buffer inbuffer );//exp-golomb codenum calculation |
Bufcount egnumbits = expgolomb_numbits( inbuffer ) >> 1; |
Bit#(33) tempbuffer = inbuffer[buffersize-1:buffersize-33] << zeroExtend(egnumbits); |
Bit#(17) tempout = tempbuffer[32:16]; |
Bufcount shiftamount = 17-egnumbits-1; |
return (tempout >> zeroExtend(shiftamount))-1; |
endfunction |
|
(* noinline *) |
function Bit#(16) expgolomb_unsigned( Buffer inbuffer );//unsigned exp-golomb code calculation |
Bit#(17) codenum = expgolomb_codenum( inbuffer ); |
return truncate(codenum); |
endfunction |
|
(* noinline *) |
function Bit#(16) expgolomb_signed( Buffer inbuffer );//signed exp-golomb code calculation |
Bit#(17) codenum = expgolomb_codenum( inbuffer ); |
Bit#(17) tempout = (codenum+1) >> 1; |
Bit#(17) tempout2 = (codenum[0]==1 ? tempout : (~tempout)+1 ); |
return truncate(tempout2); |
endfunction |
|
(* noinline *) |
function Bit#(6) expgolomb_coded_block_pattern( Buffer inbuffer, MbType mbtype );//unsigned exp-golomb code calculation |
Bit#(6) codenum = truncate(expgolomb_codenum( inbuffer )); |
if(mbPartPredMode(mbtype,0) == Intra_4x4) |
begin |
case(codenum) |
0: return 47; |
1: return 31; |
2: return 15; |
3: return 0; |
4: return 23; |
5: return 27; |
6: return 29; |
7: return 30; |
8: return 7; |
9: return 11; |
10: return 13; |
11: return 14; |
12: return 39; |
13: return 43; |
14: return 45; |
15: return 46; |
16: return 16; |
17: return 3; |
18: return 5; |
19: return 10; |
20: return 12; |
21: return 19; |
22: return 21; |
23: return 26; |
24: return 28; |
25: return 35; |
26: return 37; |
27: return 42; |
28: return 44; |
29: return 1; |
30: return 2; |
31: return 4; |
32: return 8; |
33: return 17; |
34: return 18; |
35: return 20; |
36: return 24; |
37: return 6; |
38: return 9; |
39: return 22; |
40: return 25; |
41: return 32; |
42: return 33; |
43: return 34; |
44: return 36; |
45: return 40; |
46: return 38; |
47: return 41; |
endcase |
end |
else |
begin |
case(codenum) |
0: return 0; |
1: return 16; |
2: return 1; |
3: return 2; |
4: return 4; |
5: return 8; |
6: return 32; |
7: return 3; |
8: return 5; |
9: return 10; |
10: return 12; |
11: return 15; |
12: return 47; |
13: return 7; |
14: return 11; |
15: return 13; |
16: return 14; |
17: return 6; |
18: return 9; |
19: return 31; |
20: return 35; |
21: return 37; |
22: return 42; |
23: return 44; |
24: return 33; |
25: return 34; |
26: return 36; |
27: return 40; |
28: return 39; |
29: return 43; |
30: return 45; |
31: return 46; |
32: return 17; |
33: return 18; |
34: return 20; |
35: return 24; |
36: return 19; |
37: return 21; |
38: return 26; |
39: return 28; |
40: return 23; |
41: return 27; |
42: return 29; |
43: return 30; |
44: return 22; |
45: return 25; |
46: return 38; |
47: return 41; |
endcase |
end |
endfunction |
|
|
|
endpackage |
/trunk/src/mkNalUnwrap.bsv
0,0 → 1,149
//********************************************************************** |
// NAL unit unwrapper implementation |
//---------------------------------------------------------------------- |
// |
// |
|
package mkNalUnwrap; |
|
import H264Types::*; |
import INalUnwrap::*; |
import FIFO::*; |
|
import Connectable::*; |
import GetPut::*; |
|
|
|
//----------------------------------------------------------- |
// NAL Unwrapper Module |
//----------------------------------------------------------- |
|
module mkNalUnwrap( INalUnwrap ); |
|
FIFO#(InputGenOT) infifo <- mkFIFO; |
FIFO#(NalUnwrapOT) outfifo <- mkFIFO; |
Reg#(Bit#(8)) buffera <- mkReg(0); |
Reg#(Bit#(8)) bufferb <- mkReg(0); |
Reg#(Bit#(8)) bufferc <- mkReg(0); |
Reg#(Bit#(2)) bufcount <- mkReg(0); |
Reg#(Bit#(27)) zerocount <- mkReg(0); |
|
|
//----------------------------------------------------------- |
// Rules |
rule fillbuffer (bufcount<3 |
&&& infifo.first() matches tagged DataByte .dbyte); |
bufferc <= bufferb; |
bufferb <= buffera; |
buffera <= dbyte; |
bufcount <= bufcount+1; |
infifo.deq(); |
endrule |
|
rule newnalunit (bufcount==3 |
&&& infifo.first() matches tagged DataByte .dbyte |
&&& ((bufferc==0 && bufferb==0 && buffera==1) |
|| (bufferc==0 && bufferb==0 && buffera==0 && dbyte==1))); |
zerocount <= 0; |
if(bufferc==0 && bufferb==0 && buffera==1) |
bufcount <= 0; |
else |
begin |
bufcount <= 0; |
infifo.deq(); |
end |
outfifo.enq(NewUnit); |
$display("ccl1newunit"); |
endrule |
|
rule remove3byte (bufcount==3 |
&&& infifo.first() matches tagged DataByte .dbyte |
&&& (bufferc==0 && bufferb==0 && buffera==3 && dbyte<4)); |
zerocount <= zerocount+2; |
bufcount <= 0; |
endrule |
|
rule normalop (bufcount==3 |
&&& infifo.first() matches tagged DataByte .dbyte |
&&& !(bufferc==0 && bufferb==0 && buffera==3 && dbyte<4) |
&&& !((bufferc==0 && bufferb==0 && buffera==1) |
|| (bufferc==0 && bufferb==0 && buffera==0 && dbyte==1))); |
if(bufferc==0) |
begin |
zerocount <= zerocount+1; |
bufferc <= bufferb; |
bufferb <= buffera; |
buffera <= dbyte; |
infifo.deq(); |
end |
else if(zerocount==0) |
begin |
outfifo.enq(RbspByte bufferc); |
$display("ccl1rbspbyte %h", bufferc); |
bufferc <= bufferb; |
bufferb <= buffera; |
buffera <= dbyte; |
infifo.deq(); |
end |
else |
begin |
zerocount <= zerocount-1; |
outfifo.enq(RbspByte 0); |
$display("ccl1rbspbyte 00"); |
end |
endrule |
|
rule endfileop(infifo.first() matches tagged EndOfFile); |
case ( bufcount ) |
3: |
begin |
if(bufferc==0 && bufferb==0 && buffera<4) |
begin |
bufcount <= 0; |
zerocount <= 0; |
end |
else if(zerocount==0) |
begin |
bufcount <= 2; |
outfifo.enq(RbspByte bufferc); |
$display("ccl1rbspbyte %h", bufferc); |
end |
else |
begin |
zerocount <= zerocount-1; |
outfifo.enq(RbspByte 0); |
$display("ccl1rbspbyte 00"); |
end |
end |
2: |
begin |
bufcount <= 1; |
if(!(bufferb==0 && buffera==0)) |
outfifo.enq(RbspByte bufferb); |
$display("ccl1rbspbyte %h", bufferb); |
end |
1: |
begin |
bufcount <= 0; |
if(!(buffera==0)) |
outfifo.enq(RbspByte buffera); |
$display("ccl1rbspbyte %h", buffera); |
end |
0: |
begin |
infifo.deq(); |
outfifo.enq(EndOfFile); |
$display("EndOfFile reached (NalUnwrap)"); |
end |
endcase |
|
endrule |
|
|
interface Put ioin = fifoToPut(infifo); |
interface Get ioout = fifoToGet(outfifo); |
|
endmodule |
|
endpackage |
/trunk/src/IMemED.bsv
0,0 → 1,21
//********************************************************************** |
// Interface for Memory for Entropy Decoding |
//---------------------------------------------------------------------- |
// |
// |
// |
|
package IMemED; |
|
import H264Types::*; |
import ClientServer::*; |
import GetPut::*; |
|
interface IMemED #(type index_size, type data_size); |
|
// Interface from processor to cache |
interface Server#(MemReq#(index_size,data_size),MemResp#(data_size)) mem_server; |
|
endinterface |
|
endpackage |
/trunk/src/mkBufferControl.bsv
0,0 → 1,970
//********************************************************************** |
// Buffer Controller |
//---------------------------------------------------------------------- |
// |
// |
|
package mkBufferControl; |
|
import H264Types::*; |
|
import IBufferControl::*; |
import FIFO::*; |
import Vector::*; |
|
import Connectable::*; |
import GetPut::*; |
import ClientServer::*; |
|
|
|
//----------------------------------------------------------- |
// Local Datatypes |
//----------------------------------------------------------- |
|
typedef union tagged |
{ |
void Idle; //not working on anything in particular |
void Y; |
void U; |
void V; |
} |
Outprocess deriving(Eq,Bits); |
|
|
//----------------------------------------------------------- |
// Short term pic list submodule |
//----------------------------------------------------------- |
|
typedef union tagged |
{ |
void Idle; //not working on anything in particular |
void Remove; |
void RemoveOutput; |
void RemoveFound; |
void InsertGap; |
void Search; |
void ListAll; |
} |
ShortTermPicListState deriving(Eq,Bits); |
|
interface ShortTermPicList; |
method Action clear(); |
method Action insert( Bit#(16) frameNum, Bit#(5) slot, Bit#(5) maxAllowed ); |
method Action insert_gap( Bit#(16) frameNum, Bit#(5) slot, Bit#(5) maxAllowed, Bit#(16) gap, Bit#(5) log2_max_frame_num ); |
method Action remove( Bit#(16) frameNum, Bool removeOutputFlag ); |
method Action search( Bit#(16) frameNum ); |
method Action listAll(); |
method Action deq(); |
method Maybe#(Bit#(5)) resultSlot(); |
method Bit#(5) numPics(); |
endinterface |
|
module mkShortTermPicList( ShortTermPicList ); |
function Bit#(5) shortTermPicListNext( Bit#(5) addrFunc ); |
if(addrFunc<maxRefFrames-1) |
return addrFunc+1; |
else |
return 0; |
endfunction |
function Bit#(5) shortTermPicListPrev( Bit#(5) addrFunc ); |
if(addrFunc==0) |
return maxRefFrames-1; |
else |
return addrFunc-1; |
endfunction |
|
RFile1#(Bit#(5),Tuple2#(Bit#(16),Bit#(5))) rfile <- mkRFile1(0,maxRefFrames-1); |
Reg#(ShortTermPicListState) state <- mkReg(Idle); |
Reg#(Bit#(5)) log2_mfn <- mkReg(0); |
Reg#(Bit#(5)) nextPic <- mkReg(0); |
Reg#(Bit#(5)) picCount <- mkReg(0); |
Reg#(Bit#(5)) tempPic <- mkReg(0); |
Reg#(Bit#(5)) tempCount <- mkReg(0); |
Reg#(Bit#(16)) tempNum <- mkReg(0); |
FIFO#(Maybe#(Bit#(5))) returnList <- mkFIFO(); |
|
rule removing ( state==Remove || state==RemoveOutput || state==RemoveFound ); |
if(state!=RemoveFound) |
begin |
Tuple2#(Bit#(16),Bit#(5)) temp = rfile.sub(tempPic); |
if(tpl_1(temp)==tempNum) |
begin |
state <= RemoveFound; |
if(state==RemoveOutput) |
returnList.enq(Valid tpl_2(temp)); |
end |
if(tempCount>=picCount) |
$display( "ERROR BufferControl: ShortTermPicList removing not found"); |
end |
else |
begin |
Bit#(5) tempPrev = shortTermPicListPrev(tempPic); |
rfile.upd(tempPrev,rfile.sub(tempPic)); |
if(tempCount==picCount) |
begin |
picCount <= picCount-1; |
nextPic <= tempPrev; |
state <= Idle; |
end |
end |
tempCount <= tempCount+1; |
tempPic <= shortTermPicListNext(tempPic); |
endrule |
|
rule insertingGap ( state matches tagged InsertGap ); |
if(tempCount>0) |
begin |
if(tempCount>1) |
rfile.upd(nextPic,tuple2(tempNum,31)); |
else |
rfile.upd(nextPic,tuple2(tempNum,tempPic)); |
nextPic <= shortTermPicListNext(nextPic); |
end |
else |
state <= Idle; |
Bit#(17) tempOne = 1; |
Bit#(17) maxPicNum = tempOne << log2_mfn; |
if(zeroExtend(tempNum) == maxPicNum-1) |
tempNum <= 0; |
else |
tempNum <= tempNum+1; |
tempCount <= tempCount-1; |
endrule |
|
rule searching ( state matches tagged Search ); |
if(tempCount<picCount) |
begin |
Tuple2#(Bit#(16),Bit#(5)) temp = rfile.sub(tempPic); |
if(tpl_1(temp)==tempNum) |
begin |
returnList.enq(Valid tpl_2(temp)); |
state <= Idle; |
end |
tempPic <= shortTermPicListPrev(tempPic); |
tempCount <= tempCount+1; |
end |
else |
$display( "ERROR BufferControl: ShortTermPicList searching not found"); |
endrule |
|
rule listingAll ( state matches tagged ListAll ); |
if(tempCount<picCount) |
begin |
Tuple2#(Bit#(16),Bit#(5)) temp = rfile.sub(tempPic); |
returnList.enq(Valid tpl_2(temp)); |
tempPic <= shortTermPicListPrev(tempPic); |
tempCount <= tempCount+1; |
end |
else |
begin |
returnList.enq(Invalid); |
state <= Idle; |
end |
endrule |
|
method Action clear() if(state matches tagged Idle); |
picCount <= 0; |
nextPic <= 0; |
endmethod |
|
method Action insert( Bit#(16) frameNum, Bit#(5) slot, Bit#(5) maxAllowed ) if(state matches tagged Idle); |
rfile.upd(nextPic,tuple2(frameNum,slot)); |
nextPic <= shortTermPicListNext(nextPic); |
if(maxAllowed>picCount) |
picCount <= picCount+1; |
endmethod |
|
method Action insert_gap( Bit#(16) frameNum, Bit#(5) slot, Bit#(5) maxAllowed, Bit#(16) gap, Bit#(5) log2_max_frame_num ) if(state matches tagged Idle); |
state <= InsertGap; |
log2_mfn <= log2_max_frame_num; |
if(zeroExtend(picCount)+gap+1 >= zeroExtend(maxAllowed)) |
picCount <= maxAllowed; |
else |
picCount <= truncate(zeroExtend(picCount)+gap+1); |
Bit#(5) temp; |
if(gap+1 >= zeroExtend(maxAllowed)) |
temp = maxAllowed; |
else |
temp = truncate(gap+1); |
tempCount <= temp; |
Bit#(17) tempOne = 1; |
Bit#(17) maxPicNum = tempOne << log2_max_frame_num; |
Bit#(17) tempFrameNum = zeroExtend(frameNum); |
if(tempFrameNum+1 > zeroExtend(temp)) |
tempNum <= truncate(tempFrameNum+1-zeroExtend(temp)); |
else |
tempNum <= truncate(maxPicNum+tempFrameNum+1-zeroExtend(temp)); |
tempPic <= slot; |
endmethod |
|
method Action remove( Bit#(16) frameNum, Bool removeOutputFlag ) if(state matches tagged Idle); |
if(removeOutputFlag) |
state <= RemoveOutput; |
else |
state <= Remove; |
tempCount <= 0; |
Bit#(5) temp = (maxRefFrames-picCount)+nextPic; |
if(temp>maxRefFrames-1) |
tempPic <= temp-maxRefFrames; |
else |
tempPic <= temp; |
tempNum <= frameNum; |
endmethod |
|
method Action search( Bit#(16) frameNum ) if(state matches tagged Idle); |
state <= Search; |
tempCount <= 0; |
tempPic <= shortTermPicListPrev(nextPic); |
tempNum <= frameNum; |
endmethod |
|
method Action listAll() if(state matches tagged Idle); |
state <= ListAll; |
tempCount <= 0; |
tempPic <= shortTermPicListPrev(nextPic); |
endmethod |
|
method Action deq(); |
returnList.deq(); |
endmethod |
|
method Maybe#(Bit#(5)) resultSlot(); |
return returnList.first(); |
endmethod |
|
method Bit#(5) numPics() if(state matches tagged Idle); |
return picCount; |
endmethod |
endmodule |
|
//----------------------------------------------------------- |
// Long term pic list submodule |
//----------------------------------------------------------- |
|
typedef union tagged |
{ |
void Idle; //not working on anything in particular |
void Clear; |
void ListAll; |
} |
LongTermPicListState deriving(Eq,Bits); |
|
interface LongTermPicList; |
method Action clear(); |
method Action insert( Bit#(5) frameNum, Bit#(5) slot ); |
method Action remove( Bit#(5) frameNum ); |
method Action maxIndexPlus1( Bit#(5) maxAllowed ); |
method Action search( Bit#(5) frameNum ); |
method Action listAll(); |
method Action deq(); |
method Maybe#(Bit#(5)) resultSlot(); |
method Bit#(5) numPics(); |
endinterface |
|
module mkLongTermPicList( LongTermPicList ); |
// RegFile#(Bit#(5),Maybe#(Bit#(5))) rfile <- mkRegFile(0,maxRefFrames-1); |
RFile1#(Bit#(5),Maybe#(Bit#(5))) rfile <- mkRFile1Full(); |
Reg#(LongTermPicListState) state <- mkReg(Idle); |
Reg#(Bit#(5)) picCount <- mkReg(0); |
Reg#(Bit#(5)) tempPic <- mkReg(0); |
FIFO#(Maybe#(Bit#(5))) returnList <- mkFIFO(); |
|
rule clearing ( state matches tagged Clear ); |
if(tempPic<maxRefFrames) |
begin |
if(rfile.sub(tempPic) matches tagged Valid .data &&& picCount!=0) |
picCount <= picCount-1; |
rfile.upd(tempPic,Invalid); |
tempPic <= tempPic+1; |
end |
else |
state <= Idle; |
//$display( "TRACE BufferControl: LongTermPicList clearing %h %h", picCount, tempPic); |
endrule |
|
rule listingAll ( state matches tagged ListAll ); |
if(tempPic<maxRefFrames) |
begin |
Maybe#(Bit#(5)) temp = rfile.sub(tempPic); |
if(temp matches tagged Valid .data) |
returnList.enq(Valid data); |
tempPic <= tempPic+1; |
end |
else |
begin |
returnList.enq(Invalid); |
state <= Idle; |
end |
//$display( "TRACE BufferControl: LongTermPicList listingAll %h %h", picCount, tempPic); |
endrule |
|
method Action clear() if(state matches tagged Idle); |
state <= Clear; |
tempPic <= 0; |
//$display( "TRACE BufferControl: LongTermPicList clear %h", picCount); |
endmethod |
|
method Action insert( Bit#(5) frameNum, Bit#(5) slot ) if(state matches tagged Idle); |
if(rfile.sub(frameNum) matches tagged Invalid) |
picCount <= picCount+1; |
rfile.upd(frameNum,Valid slot); |
//$display( "TRACE BufferControl: LongTermPicList insert %h %h %h", picCount, frameNum, slot); |
endmethod |
|
method Action remove( Bit#(5) frameNum ) if(state matches tagged Idle); |
if(rfile.sub(frameNum) matches tagged Invalid) |
$display( "ERROR BufferControl: LongTermPicList removing not found"); |
else |
picCount <= picCount-1; |
rfile.upd(frameNum,Invalid); |
//$display( "TRACE BufferControl: LongTermPicList remove %h %h", picCount, frameNum); |
endmethod |
|
method Action maxIndexPlus1( Bit#(5) index ) if(state matches tagged Idle); |
state <= Clear; |
tempPic <= index; |
//$display( "TRACE BufferControl: LongTermPicList maxIndexPlus1 %h %h", picCount, index); |
endmethod |
|
method Action search( Bit#(5) frameNum ) if(state matches tagged Idle); |
returnList.enq(rfile.sub(frameNum)); |
//$display( "TRACE BufferControl: LongTermPicList search %h %h", picCount, frameNum); |
endmethod |
|
method Action listAll() if(state matches tagged Idle); |
state <= ListAll; |
tempPic <= 0; |
//$display( "TRACE BufferControl: LongTermPicList listAll %h", picCount); |
endmethod |
|
method Action deq(); |
returnList.deq(); |
//$display( "TRACE BufferControl: LongTermPicList deq %h", picCount); |
endmethod |
|
method Maybe#(Bit#(5)) resultSlot(); |
return returnList.first(); |
endmethod |
|
method Bit#(5) numPics() if(state matches tagged Idle); |
return picCount; |
endmethod |
endmodule |
|
|
//----------------------------------------------------------- |
// Free slot module |
//----------------------------------------------------------- |
|
interface FreeSlots; |
method Action init(); |
method Action add( Bit#(5) slot ); |
method Action remove( Bit#(5) slot ); |
method Bit#(5) first( Bit#(5) exception ); |
endinterface |
|
module mkFreeSlots( FreeSlots ); |
Reg#(Vector#(18,Bit#(1))) slots <- mkRegU(); |
|
method Action init(); |
Vector#(18,Bit#(1)) tempSlots = replicate(0); |
slots <= tempSlots; |
endmethod |
|
method Action add( Bit#(5) slot ); |
Vector#(18,Bit#(1)) tempSlots = slots; |
tempSlots[slot] = 0; |
slots <= tempSlots; |
if(slot >= maxRefFrames+2) |
$display( "ERROR BufferControl: FreeSlots add out of bounds"); |
endmethod |
|
method Action remove( Bit#(5) slot ); |
Vector#(18,Bit#(1)) tempSlots = slots; |
if(slot != 31) |
begin |
tempSlots[slot] = 1; |
slots <= tempSlots; |
if(slot >= maxRefFrames+2) |
$display( "ERROR BufferControl: FreeSlots remove out of bounds"); |
end |
endmethod |
|
method Bit#(5) first( Bit#(5) exception ); |
Bit#(5) tempout = 31; |
for(Integer ii=17; ii>=0; ii=ii-1) |
begin |
if(slots[fromInteger(ii)]==1'b0 && fromInteger(ii)!=exception) |
tempout = fromInteger(ii); |
end |
return tempout; |
endmethod |
|
endmodule |
|
|
//----------------------------------------------------------- |
// Helper functions |
|
|
|
//----------------------------------------------------------- |
// Buffer Controller Module |
//----------------------------------------------------------- |
|
|
(* synthesize *) |
module mkBufferControl( IBufferControl ); |
|
FIFO#(DeblockFilterOT) infifo <- mkSizedFIFO(bufferControl_infifo_size); |
FIFO#(BufferControlOT) outfifo <- mkFIFO(); |
|
FIFO#(FrameBufferLoadReq) loadReqQ1 <- mkFIFO(); |
FIFO#(FrameBufferLoadResp) loadRespQ1 <- mkFIFO(); |
FIFO#(FrameBufferLoadReq) loadReqQ2 <- mkFIFO(); |
FIFO#(FrameBufferLoadResp) loadRespQ2 <- mkFIFO(); |
FIFO#(FrameBufferStoreReq) storeReqQ <- mkFIFO(); |
|
FIFO#(InterpolatorLoadReq) inLoadReqQ <- mkFIFO(); |
FIFO#(InterpolatorLoadResp) inLoadRespQ <- mkFIFO(); |
FIFO#(Bit#(2)) inLoadOutOfBounds <- mkSizedFIFO(64); |
|
Reg#(Bit#(5)) log2_max_frame_num <- mkReg(0); |
Reg#(Bit#(5)) num_ref_frames <- mkReg(0); |
Reg#(Bit#(1)) gaps_in_frame_num_allowed_flag <- mkReg(0); |
Reg#(Bit#(PicWidthSz)) picWidth <- mkReg(maxPicWidthInMB); |
Reg#(Bit#(PicHeightSz)) picHeight <- mkReg(0); |
Reg#(Bit#(PicAreaSz)) frameinmb <- mkReg(0); |
|
Reg#(Bit#(5)) ppsnum_ref_idx_l0_active <- mkReg(0); |
Reg#(Bit#(16)) frame_num <- mkReg(0); |
Reg#(Bit#(16)) prevRefFrameNum <- mkReg(0); |
Reg#(Bit#(5)) num_ref_idx_l0_active <- mkReg(0); |
Reg#(Bit#(2)) reordering_of_pic_nums_idc <- mkReg(0); |
Reg#(Bit#(16)) picNumLXPred <- mkReg(0); |
Reg#(Bit#(3)) memory_management_control_operation <- mkReg(0); |
|
Reg#(Bool) newInputFrame <- mkReg(True); |
Reg#(Bool) noMoreInput <- mkReg(False); |
Reg#(Bool) inputframedone <- mkReg(False); |
Reg#(Outprocess) outprocess <- mkReg(Idle); |
Reg#(Bool) outputframedone <- mkReg(True); |
|
Reg#(Bit#(5)) inSlot <- mkReg(0); |
Reg#(Bit#(FrameBufferSz)) inAddrBase <- mkReg(0); |
Reg#(Bit#(5)) outSlot <- mkReg(31); |
Reg#(Bit#(FrameBufferSz)) outAddrBase <- mkReg(0); |
Reg#(Bit#(TAdd#(PicAreaSz,7))) outReqCount <- mkReg(0); |
Reg#(Bit#(TAdd#(PicAreaSz,7))) outRespCount <- mkReg(0); |
|
FreeSlots freeSlots <- mkFreeSlots();//may include outSlot (have to make sure it's not used) |
ShortTermPicList shortTermPicList <- mkShortTermPicList(); |
LongTermPicList longTermPicList <- mkLongTermPicList(); |
RFile1#(Bit#(5),Bit#(5)) refPicList <- mkRFile1(0,maxRefFrames-1); |
Reg#(Bit#(5)) refPicListCount <- mkReg(0); |
Reg#(Bool) initRefPicList <- mkReg(False); |
Reg#(Bool) reorderRefPicList <- mkReg(False); |
Reg#(Bit#(5)) refIdx <- mkReg(0); |
Reg#(Bit#(5)) tempSlot <- mkReg(0); |
Reg#(Bit#(5)) tempSlot2 <- mkReg(0); |
Reg#(Bit#(2)) adjustFreeSlots <- mkReg(0); |
|
Reg#(Bool) refPicListDone <- mkReg(False); |
Reg#(Bool) lockInterLoads <- mkReg(True); |
DoNotFire donotfire <- mkDoNotFire(); |
|
|
//----------------------------------------------------------- |
// Rules |
|
rule inputing ( !noMoreInput && !inputframedone ); |
//$display( "Trace Buffer Control: passing infifo packed %h", pack(infifo.first())); |
case (infifo.first()) matches |
tagged EDOT .indata : |
begin |
case (indata) matches |
tagged SPSlog2_max_frame_num .xdata : |
begin |
if(adjustFreeSlots == 0) |
begin |
infifo.deq(); |
log2_max_frame_num <= xdata; |
freeSlots.init(); |
shortTermPicList.clear(); |
longTermPicList.clear(); |
end |
else |
donotfire.doNotFire(); |
end |
tagged SPSnum_ref_frames .xdata : |
begin |
infifo.deq(); |
num_ref_frames <= xdata; |
end |
tagged SPSgaps_in_frame_num_allowed_flag .xdata : |
begin |
infifo.deq(); |
gaps_in_frame_num_allowed_flag <= xdata; |
end |
tagged SPSpic_width_in_mbs .xdata : |
begin |
infifo.deq(); |
picWidth <= xdata; |
end |
tagged SPSpic_height_in_map_units .xdata : |
begin |
infifo.deq(); |
picHeight <= xdata; |
frameinmb <= zeroExtend(picWidth)*zeroExtend(xdata); |
end |
tagged PPSnum_ref_idx_l0_active .xdata : |
begin |
infifo.deq(); |
ppsnum_ref_idx_l0_active <= xdata; |
end |
tagged SHfirst_mb_in_slice .xdata : |
begin |
if(adjustFreeSlots == 0) |
begin |
infifo.deq(); |
newInputFrame <= False; |
shortTermPicList.listAll(); |
longTermPicList.listAll(); |
initRefPicList <= True; |
refPicListCount <= 0; |
if(newInputFrame) |
begin |
inSlot <= freeSlots.first(outSlot); |
inAddrBase <= (zeroExtend(freeSlots.first(outSlot))*zeroExtend(frameinmb)*3)<<5; |
end |
$display( "Trace BufferControl: passing SHfirst_mb_in_slice %h %h %0d", freeSlots.first(outSlot), outSlot, (newInputFrame ? 1 : 0)); |
end |
else |
donotfire.doNotFire(); |
end |
tagged SHframe_num .xdata : |
begin |
infifo.deq(); |
frame_num <= xdata; |
picNumLXPred <= frame_num; |
end |
tagged SHnum_ref_idx_active_override_flag .xdata : |
begin |
infifo.deq(); |
num_ref_idx_l0_active <= ppsnum_ref_idx_l0_active; |
end |
tagged SHnum_ref_idx_l0_active .xdata : |
begin |
infifo.deq(); |
num_ref_idx_l0_active <= xdata; |
end |
tagged SHRref_pic_list_reordering_flag_l0 .xdata : |
begin |
if(!initRefPicList) |
begin |
infifo.deq(); |
if(xdata==0) |
refPicListDone <= True; |
end |
else |
donotfire.doNotFire(); |
refIdx <= 0; |
end |
tagged SHRreordering_of_pic_nums_idc .xdata : |
begin |
if(!reorderRefPicList) |
begin |
infifo.deq(); |
reordering_of_pic_nums_idc <= xdata; |
if(xdata==3) |
refPicListDone <= True; |
end |
else |
donotfire.doNotFire(); |
end |
tagged SHRabs_diff_pic_num .xdata : |
begin |
if(!reorderRefPicList) |
begin |
infifo.deq(); |
Bit#(16) picNumLXNoWrap; |
Bit#(17) tempOne = 1; |
Bit#(17) maxPicNum = tempOne << log2_max_frame_num; |
if(reordering_of_pic_nums_idc==0) |
begin |
if(picNumLXPred < truncate(xdata)) |
picNumLXNoWrap = truncate(zeroExtend(picNumLXPred)-xdata+maxPicNum); |
else |
picNumLXNoWrap = truncate(zeroExtend(picNumLXPred)-xdata); |
end |
else |
begin |
if(zeroExtend(picNumLXPred)+xdata >= maxPicNum) |
picNumLXNoWrap = truncate(zeroExtend(picNumLXPred)+xdata-maxPicNum); |
else |
picNumLXNoWrap = truncate(zeroExtend(picNumLXPred)+xdata); |
end |
picNumLXPred <= picNumLXNoWrap; |
shortTermPicList.search(picNumLXNoWrap); |
reorderRefPicList <= True; |
refPicListCount <= 0; |
end |
else |
donotfire.doNotFire(); |
end |
tagged SHRlong_term_pic_num .xdata : |
begin |
if(!reorderRefPicList) |
begin |
infifo.deq(); |
longTermPicList.search(xdata); |
reorderRefPicList <= True; |
refPicListCount <= 0; |
end |
else |
donotfire.doNotFire(); |
end |
tagged SHDlong_term_reference_flag .xdata : |
begin |
infifo.deq(); |
if(xdata==0) |
shortTermPicList.insert(frame_num,inSlot,num_ref_frames); |
else |
longTermPicList.insert(0,inSlot); |
adjustFreeSlots <= 1; |
end |
tagged SHDadaptive_ref_pic_marking_mode_flag .xdata : |
begin |
infifo.deq(); |
Bit#(17) tempFrameNum = zeroExtend(frame_num); |
Bit#(17) tempOne = 1; |
Bit#(17) maxPicNum = tempOne << log2_max_frame_num; |
Bit#(16) tempGap = 0; |
if(frame_num < prevRefFrameNum) |
tempFrameNum = tempFrameNum + maxPicNum; |
if(tempFrameNum-zeroExtend(prevRefFrameNum) > 1) |
tempGap = truncate(tempFrameNum-zeroExtend(prevRefFrameNum)-1); |
if(xdata==0) |
begin |
if(tempGap==0) |
shortTermPicList.insert(frame_num,inSlot,(num_ref_frames-longTermPicList.numPics())); |
else |
shortTermPicList.insert_gap(frame_num,inSlot,(num_ref_frames-longTermPicList.numPics()),tempGap,log2_max_frame_num); |
adjustFreeSlots <= 1; |
end |
prevRefFrameNum <= frame_num; |
end |
tagged SHDmemory_management_control_operation .xdata : |
begin |
infifo.deq(); |
memory_management_control_operation <= xdata; |
if(xdata==0) |
adjustFreeSlots <= 1; |
else if(xdata==5) |
begin |
shortTermPicList.clear(); |
longTermPicList.clear(); |
end |
end |
tagged SHDdifference_of_pic_nums .xdata : |
begin |
infifo.deq(); |
Bit#(16) picNumXNoWrap; |
Bit#(17) tempOne = 1; |
Bit#(17) maxPicNum = tempOne << log2_max_frame_num; |
if(frame_num < truncate(xdata)) |
picNumXNoWrap = truncate(zeroExtend(frame_num)-xdata+maxPicNum); |
else |
picNumXNoWrap = truncate(zeroExtend(frame_num)-xdata); |
if(memory_management_control_operation == 1) |
shortTermPicList.remove(picNumXNoWrap,False); |
else |
shortTermPicList.remove(picNumXNoWrap,True); |
end |
tagged SHDlong_term_pic_num .xdata : |
begin |
infifo.deq(); |
longTermPicList.remove(xdata); |
end |
tagged SHDlong_term_frame_idx .xdata : |
begin |
infifo.deq(); |
if(memory_management_control_operation == 3) |
begin |
if(shortTermPicList.resultSlot() matches tagged Valid .validdata) |
longTermPicList.insert(xdata,validdata); |
else |
$display( "ERROR BufferControl: SHDlong_term_frame_idx Invalid output from shortTermPicList"); |
shortTermPicList.deq(); |
end |
else |
longTermPicList.insert(xdata,inSlot); |
end |
tagged SHDmax_long_term_frame_idx_plus1 .xdata : |
begin |
infifo.deq(); |
longTermPicList.maxIndexPlus1(xdata); |
end |
tagged EndOfFile : |
begin |
infifo.deq(); |
$display( "INFO Buffer Control: EndOfFile reached"); |
noMoreInput <= True; |
//$finish(0); |
//outfifo.enq(EndOfFile); |
end |
default: infifo.deq(); |
endcase |
end |
tagged DFBLuma .indata : |
begin |
infifo.deq(); |
//$display( "TRACE Buffer Control: input Luma %0d %h %h", indata.mb, indata.pixel, indata.data); |
Bit#(TAdd#(PicAreaSz,6)) addr = {(zeroExtend(indata.ver)*zeroExtend(picWidth)),2'b00}+zeroExtend(indata.hor); |
storeReqQ.enq(FBStoreReq {addr:inAddrBase+zeroExtend(addr),data:indata.data}); |
end |
tagged DFBChroma .indata : |
begin |
infifo.deq(); |
Bit#(TAdd#(PicAreaSz,4)) addr = {(zeroExtend(indata.ver)*zeroExtend(picWidth)),1'b0}+zeroExtend(indata.hor); |
Bit#(TAdd#(PicAreaSz,6)) chromaOffset = {frameinmb,6'b000000}; |
Bit#(TAdd#(PicAreaSz,4)) vOffset = 0; |
if(indata.uv == 1) |
vOffset = {frameinmb,4'b0000}; |
storeReqQ.enq(FBStoreReq {addr:(inAddrBase+zeroExtend(chromaOffset)+zeroExtend(vOffset)+zeroExtend(addr)),data:indata.data}); |
//$display( "TRACE Buffer Control: input Chroma %0d %0h %h %h %h %h", indata.uv, indata.ver, indata.hor, indata.data, addr, (inAddrBase+zeroExtend(chromaOffset)+zeroExtend(vOffset)+zeroExtend(addr))); |
end |
tagged EndOfFrame : |
begin |
infifo.deq(); |
$display( "INFO Buffer Control: EndOfFrame reached"); |
inputframedone <= True; |
newInputFrame <= True; |
refPicListDone <= False; |
end |
default: infifo.deq(); |
endcase |
endrule |
|
|
rule initingRefPicList ( initRefPicList ); |
if(shortTermPicList.resultSlot() matches tagged Valid .xdata) |
begin |
shortTermPicList.deq(); |
refPicList.upd(refPicListCount,xdata); |
refPicListCount <= refPicListCount+1; |
$display( "Trace BufferControl: initingRefPicList shortTermPicList %h", xdata); |
end |
else if(longTermPicList.resultSlot() matches tagged Valid .xdata) |
begin |
longTermPicList.deq(); |
refPicList.upd(refPicListCount,xdata); |
refPicListCount <= refPicListCount+1; |
$display( "Trace BufferControl: initingRefPicList longTermPicList %h", xdata); |
end |
else |
begin |
shortTermPicList.deq(); |
longTermPicList.deq(); |
initRefPicList <= False; |
refPicListCount <= 0; |
$display( "Trace BufferControl: initingRefPicList end"); |
end |
endrule |
|
|
rule reorderingRefPicList ( reorderRefPicList ); |
$display( "Trace BufferControl: reorderingRefPicList"); |
if(shortTermPicList.resultSlot() matches tagged Valid .xdata)////////////////////////////////////////////////////////////////////////////////////////// |
begin |
shortTermPicList.deq(); |
tempSlot <= refPicList.sub(refIdx); |
refPicList.upd(refIdx,xdata); |
refPicListCount <= refIdx+1; |
tempSlot2 <= xdata; |
end |
else if(longTermPicList.resultSlot() matches tagged Valid .xdata)/////////////////////////////////////////////////////////////////////////////////////may get stuck? |
begin |
longTermPicList.deq(); |
tempSlot <= refPicList.sub(refIdx); |
refPicList.upd(refIdx,xdata); |
refPicListCount <= refIdx+1; |
tempSlot2 <= xdata; |
end |
else |
begin |
if(refPicListCount<num_ref_idx_l0_active && tempSlot!=tempSlot2) |
begin |
tempSlot <= refPicList.sub(refPicListCount); |
refPicList.upd(refPicListCount,tempSlot); |
refPicListCount <= refPicListCount+1; |
end |
else |
begin |
reorderRefPicList <= False; |
refPicListCount <= 0; |
refIdx <= refIdx+1; |
end |
end |
endrule |
|
|
rule adjustingFreeSlots ( adjustFreeSlots != 0 ); |
if(adjustFreeSlots == 1) |
begin |
shortTermPicList.listAll(); |
longTermPicList.listAll(); |
freeSlots.init(); |
adjustFreeSlots <= 2; |
$display( "Trace BufferControl: adjustingFreeSlots begin"); |
end |
else |
begin |
if(shortTermPicList.resultSlot() matches tagged Valid .xdata) |
begin |
shortTermPicList.deq(); |
freeSlots.remove(xdata); |
$display( "Trace BufferControl: adjustingFreeSlots shortTermPicList %h", xdata); |
end |
else if(longTermPicList.resultSlot() matches tagged Valid .xdata) |
begin |
longTermPicList.deq(); |
freeSlots.remove(xdata); |
$display( "Trace BufferControl: adjustingFreeSlots longTermPicList %h", xdata); |
end |
else |
begin |
shortTermPicList.deq(); |
longTermPicList.deq(); |
adjustFreeSlots <= 0; |
$display( "Trace BufferControl: adjustingFreeSlots end"); |
end |
end |
endrule |
|
|
rule outputingReq ( outprocess != Idle ); |
if(outprocess==Y) |
begin |
loadReqQ1.enq(FBLoadReq (outAddrBase+zeroExtend(outReqCount))); |
if(outReqCount == {1'b0,frameinmb,6'b000000}-1) |
outprocess <= U; |
outReqCount <= outReqCount+1; |
end |
else if(outprocess==U) |
begin |
loadReqQ1.enq(FBLoadReq (outAddrBase+zeroExtend(outReqCount))); |
if(outReqCount == {1'b0,frameinmb,6'b000000}+{3'b000,frameinmb,4'b0000}-1) |
outprocess <= V; |
outReqCount <= outReqCount+1; |
end |
else |
begin |
//$display( "TRACE BufferControl: outputingReq V %h %h %h", outAddrBase, outReqCount, (outAddrBase+zeroExtend(outReqCount))); |
loadReqQ1.enq(FBLoadReq (outAddrBase+zeroExtend(outReqCount))); |
if(outReqCount == {1'b0,frameinmb,6'b000000}+{2'b00,frameinmb,5'b00000}-1) |
outprocess <= Idle; |
outReqCount <= outReqCount+1; |
end |
endrule |
|
|
rule outputingResp ( !outputframedone ); |
if(loadRespQ1.first() matches tagged FBLoadResp .xdata) |
begin |
loadRespQ1.deq(); |
outfifo.enq(YUV xdata); |
if(outRespCount == {1'b0,frameinmb,6'b000000}+{2'b00,frameinmb,5'b00000}-1) |
outputframedone <= True; |
outRespCount <= outRespCount+1; |
end |
endrule |
|
|
rule goToNextFrame ( outputframedone && inputframedone && inLoadReqQ.first()==IPLoadEndFrame ); |
inputframedone <= False; |
outprocess <= Y; |
outputframedone <= False; |
outSlot <= inSlot; |
outAddrBase <= inAddrBase; |
outReqCount <= 0; |
outRespCount <= 0; |
loadReqQ1.enq(FBEndFrameSync); |
loadReqQ2.enq(FBEndFrameSync); |
storeReqQ.enq(FBEndFrameSync); |
inLoadReqQ.deq(); |
lockInterLoads <= True; |
endrule |
|
|
rule unlockInterLoads ( lockInterLoads && refPicListDone ); |
lockInterLoads <= False; |
endrule |
|
|
rule theEndOfFile ( outputframedone && noMoreInput ); |
outfifo.enq(EndOfFile); |
endrule |
|
|
rule interLumaReq ( inLoadReqQ.first() matches tagged IPLoadLuma .reqdata &&& !lockInterLoads ); |
inLoadReqQ.deq(); |
Bit#(5) slot = refPicList.sub(zeroExtend(reqdata.refIdx)); |
Bit#(FrameBufferSz) addrBase = (zeroExtend(slot)*zeroExtend(frameinmb)*3)<<5; |
Bit#(TAdd#(PicAreaSz,6)) addr = {(zeroExtend(reqdata.ver)*zeroExtend(picWidth)),2'b00}+zeroExtend(reqdata.hor); |
inLoadOutOfBounds.enq({reqdata.horOutOfBounds,(reqdata.hor==0 ? 0 : 1)}); |
loadReqQ2.enq(FBLoadReq (addrBase+zeroExtend(addr))); |
//$display( "Trace BufferControl: interLumaReq %h %h %h %h %h", reqdata.refIdx, slot, addrBase, addr, addrBase+zeroExtend(addr)); |
endrule |
|
|
rule interChromaReq ( inLoadReqQ.first() matches tagged IPLoadChroma .reqdata &&& !lockInterLoads ); |
inLoadReqQ.deq(); |
Bit#(5) slot = refPicList.sub(zeroExtend(reqdata.refIdx)); |
Bit#(FrameBufferSz) addrBase = (zeroExtend(slot)*zeroExtend(frameinmb)*3)<<5; |
Bit#(TAdd#(PicAreaSz,6)) chromaOffset = {frameinmb,6'b000000}; |
Bit#(TAdd#(PicAreaSz,4)) vOffset = 0; |
if(reqdata.uv == 1) |
vOffset = {frameinmb,4'b0000}; |
Bit#(TAdd#(PicAreaSz,6)) addr = {(zeroExtend(reqdata.ver)*zeroExtend(picWidth)),1'b0}+zeroExtend(reqdata.hor); |
inLoadOutOfBounds.enq({reqdata.horOutOfBounds,(reqdata.hor==0 ? 0 : 1)}); |
loadReqQ2.enq(FBLoadReq (addrBase+zeroExtend(chromaOffset)+zeroExtend(vOffset)+zeroExtend(addr))); |
//$display( "Trace BufferControl: interChromaReq %h %h %h %h %h", reqdata.refIdx, slot, addrBase, addr, addrBase+zeroExtend(chromaOffset)+zeroExtend(vOffset)+zeroExtend(addr)); |
endrule |
|
|
rule interResp ( loadRespQ2.first() matches tagged FBLoadResp .data ); |
loadRespQ2.deq(); |
if(inLoadOutOfBounds.first() == 2'b10) |
inLoadRespQ.enq(IPLoadResp ({data[7:0],data[7:0],data[7:0],data[7:0]})); |
else if(inLoadOutOfBounds.first() == 2'b11) |
inLoadRespQ.enq(IPLoadResp ({data[31:24],data[31:24],data[31:24],data[31:24]})); |
else |
inLoadRespQ.enq(IPLoadResp data); |
inLoadOutOfBounds.deq(); |
//$display( "Trace BufferControl: interResp %h %h", inLoadOutOfBounds.first(), data); |
endrule |
|
|
|
interface Put ioin = fifoToPut(infifo); |
interface Get ioout = fifoToGet(outfifo); |
interface Client buffer_client_load1; |
interface Get request = fifoToGet(loadReqQ1); |
interface Put response = fifoToPut(loadRespQ1); |
endinterface |
interface Client buffer_client_load2; |
interface Get request = fifoToGet(loadReqQ2); |
interface Put response = fifoToPut(loadRespQ2); |
endinterface |
interface Get buffer_client_store = fifoToGet(storeReqQ); |
interface Server inter_server; |
interface Put request = fifoToPut(inLoadReqQ); |
interface Get response = fifoToGet(inLoadRespQ); |
endinterface |
|
|
endmodule |
|
endpackage |
/trunk/src/mkTH.bsv
0,0 → 1,68
//********************************************************************** |
// H264 Test Bench |
//---------------------------------------------------------------------- |
// |
// |
|
package mkTH; |
|
import H264Types::*; |
import IMemED::*; |
import IFrameBuffer::*; |
import IInputGen::*; |
import IFinalOutput::*; |
import IH264::*; |
import mkMemED::*; |
import mkFrameBuffer::*; |
import mkInputGen::*; |
import mkFinalOutput::*; |
import mkH264::*; |
|
import Connectable::*; |
import GetPut::*; |
import ClientServer::*; |
|
(* synthesize *) |
module mkTH( Empty ); |
|
// Instantiate the modules |
|
IInputGen inputgen <- mkInputGen(); |
IH264 h264 <- mkH264(); |
IMemED#(TAdd#(PicWidthSz,1),20) memED <- mkMemED(); |
IMemED#(TAdd#(PicWidthSz,2),68) memP_intra <- mkMemED(); |
IMemED#(TAdd#(PicWidthSz,2),32) memP_inter <- mkMemED(); |
IMemED#(TAdd#(PicWidthSz,5),32) memD_data <- mkMemED(); |
IMemED#(PicWidthSz,13) memD_parameter <- mkMemED(); |
IFrameBuffer framebuffer <- mkFrameBuffer(); |
IFinalOutput finaloutput <- mkFinalOutput(); |
|
// Cycle counter |
Reg#(Bit#(32)) cyclecount <- mkReg(0); |
|
rule countCycles ( True ); |
if(cyclecount[4:0]==0) $display( "CCLCycleCount %0d", cyclecount ); |
cyclecount <= cyclecount+1; |
if(cyclecount > 60000000) |
begin |
$display( "ERROR mkTH: time out" ); |
$finish(0); |
end |
endrule |
|
// Internal connections |
|
mkConnection( inputgen.ioout, h264.ioin ); |
mkConnection( h264.mem_clientED, memED.mem_server ); |
mkConnection( h264.mem_clientP_intra, memP_intra.mem_server ); |
mkConnection( h264.mem_clientP_inter, memP_inter.mem_server ); |
mkConnection( h264.mem_clientD_data, memD_data.mem_server ); |
mkConnection( h264.mem_clientD_parameter, memD_parameter.mem_server ); |
mkConnection( h264.buffer_client_load1, framebuffer.server_load1 ); |
mkConnection( h264.buffer_client_load2, framebuffer.server_load2 ); |
mkConnection( h264.buffer_client_store, framebuffer.server_store ); |
mkConnection( h264.ioout, finaloutput.ioin ); |
|
endmodule |
|
endpackage |
/trunk/src/mkInputGen_x264.bsv
0,0 → 1,41
//********************************************************************** |
// Input Generator implementation |
//---------------------------------------------------------------------- |
// |
// |
|
package mkInputGen; |
|
import H264Types::*; |
import IInputGen::*; |
import RegFile::*; |
import FIFO::*; |
|
import Connectable::*; |
import GetPut::*; |
|
|
module mkInputGen( IInputGen ); |
|
RegFile#(Bit#(27), Bit#(8)) rfile <- mkRegFileLoad("x264foreman_qcif1-5.hex", 0, 7407); |
|
FIFO#(InputGenOT) outfifo <- mkFIFO; |
Reg#(Bit#(27)) index <- mkReg(0); |
|
rule output_byte (index < 7408); |
//$display( "ccl0inputbyte %x", rfile.sub(index) ); |
outfifo.enq(DataByte rfile.sub(index)); |
index <= index+1; |
endrule |
|
rule end_of_file (index == 7408); |
//$finish(0); |
outfifo.enq(EndOfFile); |
endrule |
|
interface Get ioout = fifoToGet(outfifo); |
|
endmodule |
|
|
endpackage |
/trunk/src/mkH264.bsv
0,0 → 1,68
//********************************************************************** |
// H264 Main Module |
//---------------------------------------------------------------------- |
// |
// |
|
package mkH264; |
|
import H264Types::*; |
import IH264::*; |
import INalUnwrap::*; |
import IEntropyDec::*; |
import IInverseTrans::*; |
import IPrediction::*; |
import IDeblockFilter::*; |
import IBufferControl::*; |
import mkNalUnwrap::*; |
import mkEntropyDec::*; |
import mkInverseTrans::*; |
import mkPrediction::*; |
import mkDeblockFilter::*; |
import mkBufferControl::*; |
|
import Connectable::*; |
import GetPut::*; |
import ClientServer::*; |
|
(* synthesize *) |
module mkH264( IH264 ); |
|
// Instantiate the modules |
|
INalUnwrap nalunwrap <- mkNalUnwrap(); |
IEntropyDec entropydec <- mkEntropyDec(); |
IInverseTrans inversetrans <- mkInverseTrans(); |
IPrediction prediction <- mkPrediction(); |
IDeblockFilter deblockfilter <- mkDeblockFilter(); |
IBufferControl buffercontrol <- mkBufferControl(); |
|
// Internal connections |
mkConnection( prediction.mem_client_buffer, buffercontrol.inter_server ); |
|
mkConnection( nalunwrap.ioout, entropydec.ioin ); |
mkConnection( entropydec.ioout_InverseTrans, inversetrans.ioin ); |
mkConnection( entropydec.ioout, prediction.ioin ); |
mkConnection( inversetrans.ioout, prediction.ioin_InverseTrans ); |
mkConnection( prediction.ioout, deblockfilter.ioin ); |
mkConnection( deblockfilter.ioout, buffercontrol.ioin ); |
|
// Interface to input generator |
interface ioin = nalunwrap.ioin; |
|
// Memory interfaces |
interface mem_clientED = entropydec.mem_client; |
interface mem_clientP_intra = prediction.mem_client_intra; |
interface mem_clientP_inter = prediction.mem_client_inter; |
interface mem_clientD_data = deblockfilter.mem_client_data; |
interface mem_clientD_parameter = deblockfilter.mem_client_parameter; |
interface buffer_client_load1 = buffercontrol.buffer_client_load1; |
interface buffer_client_load2 = buffercontrol.buffer_client_load2; |
interface buffer_client_store = buffercontrol.buffer_client_store; |
|
// Interface for output |
interface ioout = buffercontrol.ioout; |
|
endmodule |
|
endpackage |
/trunk/src/mkInverseTrans.bsv
0,0 → 1,702
//********************************************************************** |
// Inverse Quantizer and Inverse Transformer implementation |
//---------------------------------------------------------------------- |
// |
// |
|
package mkInverseTrans; |
|
import H264Types::*; |
|
import IInverseTrans::*; |
import FIFO::*; |
import Vector::*; |
|
import Connectable::*; |
import GetPut::*; |
import ClientServer::*; |
|
|
//----------------------------------------------------------- |
// Local Datatypes |
//----------------------------------------------------------- |
|
typedef union tagged |
{ |
void Start; //not working on anything in particular |
void Intra16x16DC; |
void Intra16x16; |
void ChromaDC; |
void Chroma; |
void Regular4x4; |
} |
State deriving(Eq,Bits); |
|
typedef union tagged |
{ |
void Passing; //not working on anything in particular |
void LoadingDC; |
void Scaling; //does not include scaling for DC (just loading in that case) |
void TransformingDC; |
void ScalingDC; |
} |
Process deriving(Eq,Bits); |
|
typedef union tagged |
{ |
void Invalid; |
void Zeros; |
Vector#(16,Bit#(16)) Values; |
} |
PipeType deriving(Eq,Bits); |
|
|
//----------------------------------------------------------- |
// Helper functions |
|
function Bit#(6) qpi_to_qpc( Bit#(6) qpi );//mapping from qpi to qpc |
case ( qpi ) |
30: return 29; |
31: return 30; |
32: return 31; |
33: return 32; |
34: return 32; |
35: return 33; |
36: return 34; |
37: return 34; |
38: return 35; |
39: return 35; |
40: return 36; |
41: return 36; |
42: return 37; |
43: return 37; |
44: return 37; |
45: return 38; |
46: return 38; |
47: return 38; |
48: return 39; |
49: return 39; |
50: return 39; |
51: return 39; |
default: return qpi; |
endcase |
endfunction |
|
|
function Bit#(4) reverseInverseZigZagScan( Bit#(4) idx ); |
case ( idx ) |
0: return 15; |
1: return 14; |
2: return 11; |
3: return 7; |
4: return 10; |
5: return 13; |
6: return 12; |
7: return 9; |
8: return 6; |
9: return 3; |
10: return 2; |
11: return 5; |
12: return 8; |
13: return 4; |
14: return 1; |
15: return 0; |
endcase |
endfunction |
|
|
function Tuple2#(Bit#(4),Bit#(3)) qpdivmod6( Bit#(6) qp ); |
Bit#(6) tempqp = qp; |
Bit#(4) tempdiv = 0; |
for(Integer ii=5; ii>=2; ii=ii-1) |
begin |
if(tempqp >= (6'b000011 << (fromInteger(ii)-1))) |
begin |
tempqp = tempqp - (6'b000011 << (fromInteger(ii)-1)); |
tempdiv = tempdiv | (4'b0001 << (fromInteger(ii)-2)); |
end |
end |
return tuple2(tempdiv,truncate(tempqp)); |
endfunction |
|
|
function Vector#(4,Bit#(16)) dcTransFunc( Bit#(16) in0, Bit#(16) in1, Bit#(16) in2, Bit#(16) in3 ); |
Vector#(4,Bit#(16)) resultVector = replicate(0); |
resultVector[0] = in0 + in1 + in2 + in3; |
resultVector[1] = in0 + in1 - in2 - in3; |
resultVector[2] = in0 - in1 - in2 + in3; |
resultVector[3] = in0 - in1 + in2 - in3; |
return resultVector; |
endfunction |
|
|
function Vector#(4,Bit#(16)) transFunc( Bit#(16) in0, Bit#(16) in1, Bit#(16) in2, Bit#(16) in3 ); |
Vector#(4,Bit#(16)) resultVector = replicate(0); |
Bit#(16) workValue0 = in0 + in2; |
Bit#(16) workValue1 = in0 - in2; |
Bit#(16) workValue2 = signedShiftRight(in1,1) - in3; |
Bit#(16) workValue3 = in1 + signedShiftRight(in3,1); |
resultVector[0] = workValue0 + workValue3; |
resultVector[1] = workValue1 + workValue2; |
resultVector[2] = workValue1 - workValue2; |
resultVector[3] = workValue0 - workValue3; |
return resultVector; |
endfunction |
|
|
//----------------------------------------------------------- |
// Inverse Quantizer and Inverse Transformer Module |
//----------------------------------------------------------- |
|
|
(* synthesize *) |
module mkInverseTrans( IInverseTrans ); |
|
FIFO#(EntropyDecOT_InverseTrans) infifo <- mkSizedFIFO(inverseTrans_infifo_size); |
FIFO#(InverseTransOT) outfifo <- mkFIFO; |
Reg#(Bit#(4)) blockNum <- mkReg(0); |
Reg#(Bit#(4)) pixelNum <- mkReg(0);//also used as a regular counter during inverse transformation |
Reg#(State) state <- mkReg(Start); |
Reg#(Process) process <- mkReg(Passing); |
|
Reg#(Bit#(5)) chroma_qp_index_offset <- mkReg(0); |
Reg#(Bit#(6)) ppspic_init_qp <- mkReg(0); |
Reg#(Bit#(6)) slice_qp <- mkReg(0); |
Reg#(Bit#(6)) qpy <- mkReg(0);//Calculating it requires 8 bits, but value only 0 to 51 |
Reg#(Bit#(6)) qpc <- mkReg(0); |
Reg#(Bit#(3)) qpymod6 <- mkReg(0); |
Reg#(Bit#(3)) qpcmod6 <- mkReg(0); |
Reg#(Bit#(4)) qpydiv6 <- mkReg(0); |
Reg#(Bit#(4)) qpcdiv6 <- mkReg(0); |
|
Reg#(Vector#(16,Bit#(16))) storeVector <- mkRegU(); |
Reg#(Vector#(16,Bit#(16))) workVector <- mkRegU(); |
Reg#(PipeType) work2Vector <- mkReg(Invalid); |
Reg#(PipeType) work3Vector <- mkReg(Invalid); |
Reg#(Bool) stage1Zeros <- mkReg(False); |
Reg#(Bool) stage1Done <- mkReg(False); |
Reg#(Bool) stage2Done <- mkReg(False); |
Reg#(Bool) stage3Done <- mkReg(False); |
Reg#(Bit#(3)) stage2Step <- mkReg(0); |
Reg#(Bit#(2)) stage3Step <- mkReg(0); |
|
|
|
//----------------------------------------------------------- |
// Rules |
|
|
rule passing (process==Passing && work2Vector==Invalid && (stage3Done || work3Vector==Invalid) ); |
//$display( "Trace Inverse Trans: passing infifo packed %h", pack(infifo.first())); |
case (infifo.first()) matches |
tagged NewUnit . xdata : |
begin |
infifo.deq(); |
$display("ccl3newunit"); |
$display("ccl3rbspbyte %h", xdata); |
end |
tagged SDMmbtype .xdata : |
begin |
infifo.deq(); |
$display( "INFO InverseTrans: SDMmbtype %0d", xdata); |
if(mbPartPredMode(xdata,0) == Intra_16x16) |
state <= Intra16x16DC; |
else |
state <= Regular4x4; |
end |
tagged PPSpic_init_qp .xdata : |
begin |
infifo.deq(); |
ppspic_init_qp <= truncate(xdata); |
end |
tagged SHslice_qp_delta .xdata : |
begin |
infifo.deq(); |
slice_qp <= ppspic_init_qp+truncate(xdata); |
Bit#(6) qpynext = ppspic_init_qp+truncate(xdata); |
qpy <= qpynext; |
Bit#(7) qpitemp = zeroExtend(chroma_qp_index_offset+12) + zeroExtend(qpynext); |
Bit#(6) qpi; |
if(qpitemp < 12) |
qpi = 0; |
else if(qpitemp > 63) |
qpi = 51; |
else |
qpi = truncate(qpitemp-12); |
qpc <= qpi_to_qpc(qpi); |
outfifo.enq(IBTmb_qp {qpy:qpynext,qpc:qpi_to_qpc(qpi)}); |
end |
tagged SDMmb_qp_delta .xdata : |
begin |
infifo.deq(); |
Bit#(8) qpytemp = zeroExtend(qpy) + zeroExtend(xdata+52); |
Bit#(6) qpynext; |
if(qpytemp >= 104) |
qpynext = truncate(qpytemp - 104); |
else if(qpytemp >= 52) |
qpynext = truncate(qpytemp - 52); |
else |
qpynext = truncate(qpytemp); |
qpy <= qpynext; |
|
//$display( "TRACE InverseTrans: qpy %0d", qpynext ); |
//$display( "TRACE InverseTrans: qpy %0d", qpynext ); |
Tuple2#(Bit#(4),Bit#(3)) temptuple = qpdivmod6(qpynext); |
qpydiv6 <= tpl_1(temptuple); |
qpymod6 <= tpl_2(temptuple); |
//$display( "TRACE InverseTrans: qpydiv6 %0d", tpl_1(temptuple) ); |
//$display( "TRACE InverseTrans: qpymod6 %0d", tpl_2(temptuple) ); |
|
Bit#(7) qpitemp = zeroExtend(chroma_qp_index_offset+12) + zeroExtend(qpynext); |
Bit#(6) qpi; |
if(qpitemp < 12) |
qpi = 0; |
else if(qpitemp > 63) |
qpi = 51; |
else |
qpi = truncate(qpitemp-12); |
qpc <= qpi_to_qpc(qpi); |
outfifo.enq(IBTmb_qp {qpy:qpynext,qpc:qpi_to_qpc(qpi)}); |
end |
tagged PPSchroma_qp_index_offset .xdata : |
begin |
infifo.deq(); |
chroma_qp_index_offset <= xdata; |
end |
tagged SDMRcoeffLevelPlusZeros .xdata : |
begin |
blockNum <= 0; |
pixelNum <= 0; |
if(state == Intra16x16DC) |
begin |
$display( "INFO InverseTrans: 16x16 MB" ); |
process <= LoadingDC; |
end |
else |
begin |
$display( "INFO InverseTrans: Non-16x16 MB" ); |
process <= Scaling; |
end |
workVector <= replicate(0); |
Tuple2#(Bit#(4),Bit#(3)) temptuple = qpdivmod6(qpc); |
qpcdiv6 <= tpl_1(temptuple); |
qpcmod6 <= tpl_2(temptuple); |
end |
tagged SDMRcoeffLevelZeros .xdata : |
begin |
blockNum <= 0; |
pixelNum <= 0; |
if(state == Intra16x16DC) |
begin |
$display( "INFO InverseTrans: 16x16 MB" ); |
process <= LoadingDC; |
end |
else |
begin |
$display( "INFO InverseTrans: Non-16x16 MB" ); |
process <= Scaling; |
end |
workVector <= replicate(0); |
Tuple2#(Bit#(4),Bit#(3)) temptuple = qpdivmod6(qpc); |
qpcdiv6 <= tpl_1(temptuple); |
qpcmod6 <= tpl_2(temptuple); |
end |
default: infifo.deq(); |
endcase |
endrule |
|
|
rule loadingDC (process matches LoadingDC); |
Vector#(16,Bit#(16)) workVectorTemp = workVector; |
|
case (infifo.first()) matches |
tagged SDMRcoeffLevelZeros .xdata : |
begin |
infifo.deq(); |
pixelNum <= pixelNum+truncate(xdata); |
if((state==ChromaDC && zeroExtend(pixelNum)+xdata==8) || zeroExtend(pixelNum)+xdata==16) |
process <= TransformingDC; |
else if((state==ChromaDC && zeroExtend(pixelNum)+xdata>8) || zeroExtend(pixelNum)+xdata>16) |
$display( "ERROR InverseTrans: loadingDC index overflow" ); |
end |
tagged SDMRcoeffLevelPlusZeros .xdata : |
begin |
infifo.deq(); |
Bit#(16) workValue = signExtend(xdata.level); |
if(state==ChromaDC) |
begin |
if(pixelNum<4) |
workVector <= update(workVectorTemp, 3-pixelNum, workValue); |
else |
workVector <= update(workVectorTemp, 11-pixelNum, workValue); |
end |
else |
workVector <= update(workVectorTemp, reverseInverseZigZagScan(pixelNum), workValue); |
pixelNum <= pixelNum+1+truncate(xdata.zeros); |
if((state==ChromaDC && zeroExtend(pixelNum)+1+xdata.zeros==8) || zeroExtend(pixelNum)+1+xdata.zeros==16) |
process <= TransformingDC; |
else if((state==ChromaDC && zeroExtend(pixelNum)+1+xdata.zeros>8) || zeroExtend(pixelNum)+1+xdata.zeros>16) |
$display( "ERROR InverseTrans: loadingDC index overflow" ); |
end |
default: process <= Passing; |
endcase |
endrule |
|
|
rule transformingDC (process matches TransformingDC); |
Vector#(16,Bit#(16)) workVectorTemp = workVector; |
Vector#(16,Bit#(16)) workVectorNew = workVector; |
Vector#(16,Bit#(16)) storeVectorTemp = storeVector; |
|
if(state == ChromaDC) |
begin |
case ( pixelNum ) |
8: |
begin |
workVectorNew[0] = workVectorTemp[0] + workVectorTemp[2]; |
workVectorNew[1] = workVectorTemp[1] + workVectorTemp[3]; |
workVectorNew[2] = workVectorTemp[0] - workVectorTemp[2]; |
workVectorNew[3] = workVectorTemp[1] - workVectorTemp[3]; |
pixelNum <= pixelNum+1; |
end |
9: |
begin |
workVectorNew[0] = workVectorTemp[0] + workVectorTemp[1]; |
workVectorNew[1] = workVectorTemp[0] - workVectorTemp[1]; |
workVectorNew[2] = workVectorTemp[2] + workVectorTemp[3]; |
workVectorNew[3] = workVectorTemp[2] - workVectorTemp[3]; |
pixelNum <= pixelNum+1; |
end |
10: |
begin |
workVectorNew[4] = workVectorTemp[4] + workVectorTemp[6]; |
workVectorNew[5] = workVectorTemp[5] + workVectorTemp[7]; |
workVectorNew[6] = workVectorTemp[4] - workVectorTemp[6]; |
workVectorNew[7] = workVectorTemp[5] - workVectorTemp[7]; |
pixelNum <= pixelNum+1; |
end |
11: |
begin |
workVectorNew[4] = workVectorTemp[4] + workVectorTemp[5]; |
workVectorNew[5] = workVectorTemp[4] - workVectorTemp[5]; |
workVectorNew[6] = workVectorTemp[6] + workVectorTemp[7]; |
workVectorNew[7] = workVectorTemp[6] - workVectorTemp[7]; |
pixelNum <= 0; |
process <= ScalingDC; |
end |
default: |
$display( "ERROR InverseTrans: transformingDC ChromaDC unexpected pixelNum" ); |
endcase |
workVector <= workVectorNew; |
end |
else if(state == Intra16x16DC) |
begin |
Vector#(4,Bit#(16)) resultVector = replicate(0); |
if(pixelNum < 4) |
begin |
Bit#(4) tempIndex = zeroExtend(pixelNum[1:0]); |
resultVector = dcTransFunc( workVectorTemp[tempIndex], workVectorTemp[tempIndex+4], workVectorTemp[tempIndex+8], workVectorTemp[tempIndex+12] ); |
for(Integer ii=0; ii<4; ii=ii+1) |
workVectorNew[tempIndex+fromInteger(ii*4)] = resultVector[ii]; |
end |
else if(pixelNum < 8) |
begin |
Bit#(4) tempIndex = {pixelNum[1:0],2'b00}; |
resultVector = dcTransFunc( workVectorTemp[tempIndex], workVectorTemp[tempIndex+1], workVectorTemp[tempIndex+2], workVectorTemp[tempIndex+3] ); |
for(Integer ii=0; ii<4; ii=ii+1) |
workVectorNew[tempIndex+fromInteger(ii)] = resultVector[ii]; |
end |
else |
$display( "ERROR InverseTrans: transforming Intra16x16DC unexpected pixelNum" ); |
workVector <= workVectorNew; |
if(pixelNum == 7) |
begin |
pixelNum <= 0; |
process <= ScalingDC; |
end |
else |
pixelNum <= pixelNum+1; |
end |
else |
$display( "ERROR InverseTrans: transformingDC unexpected state" ); |
endrule |
|
|
rule scalingDC (process matches ScalingDC); |
Bit#(6) qp; |
Bit#(4) qpdiv6; |
Bit#(3) qpmod6; |
Bit#(6) workOne = 1; |
Bit#(16) workValue; |
Bit#(22) storeValueTemp; |
Bit#(16) storeValue; |
Vector#(16,Bit#(16)) workVectorTemp = workVector; |
Vector#(16,Bit#(16)) storeVectorTemp = storeVector; |
|
if(state==ChromaDC) |
begin |
qp = qpc; |
qpdiv6 = qpcdiv6; |
qpmod6 = qpcmod6; |
end |
else |
begin |
qp = qpy; |
qpdiv6 = qpydiv6; |
qpmod6 = qpymod6; |
end |
workValue = select(workVectorTemp, pixelNum); |
Bit#(5) levelScaleValue=0; |
case(qpmod6) |
0: levelScaleValue = 10; |
1: levelScaleValue = 11; |
2: levelScaleValue = 13; |
3: levelScaleValue = 14; |
4: levelScaleValue = 16; |
5: levelScaleValue = 18; |
default: $display( "ERROR InverseTrans: scalingDC levelScaleGen case default" ); |
endcase |
storeValueTemp = zeroExtend(levelScaleValue)*signExtend(workValue); |
if(state==ChromaDC) |
storeValue = truncate( (storeValueTemp << zeroExtend(qpdiv6)) >> 1 ); |
else |
begin |
if(qp >= 36) |
storeValue = truncate( storeValueTemp << zeroExtend(qpdiv6 - 2) ); |
else |
storeValue = truncate( ((storeValueTemp << 4) + zeroExtend(workOne << zeroExtend(5-qpdiv6))) >> zeroExtend(6 - qpdiv6) ); |
end |
storeVector <= update(storeVectorTemp, pixelNum, storeValue); |
if((state==ChromaDC && pixelNum==7) || pixelNum==15) |
begin |
blockNum <= 0; |
pixelNum <= 0; |
workVector <= replicate(0); |
if(state==ChromaDC) |
state <= Chroma; |
else |
state <= Intra16x16; |
process <= Scaling; |
end |
else if((state==ChromaDC && pixelNum>7) || pixelNum>15) |
$display( "ERROR InverseTrans: scalingDC index overflow" ); |
else |
pixelNum <= pixelNum+1; |
endrule |
|
|
rule switching ( (stage1Done && work2Vector==Invalid) || (stage2Done && (stage3Done || work3Vector==Invalid)) ); |
Bool switch2to3 = False; |
if(stage2Done && (stage3Done || work3Vector==Invalid)) |
begin |
switch2to3 = True; |
work3Vector <= work2Vector; |
stage3Done <= False; |
end |
if(stage1Done && (switch2to3 || work2Vector==Invalid)) |
begin |
Vector#(16,Bit#(16)) workVectorTemp = workVector; |
if(state==Intra16x16) |
workVectorTemp[0] = storeVector[{blockNum[3],blockNum[1],blockNum[2],blockNum[0]}]; |
else if(state==Chroma) |
workVectorTemp[0] = storeVector[blockNum]; |
if(stage1Zeros) |
work2Vector <= Zeros; |
else |
work2Vector <= (Values workVectorTemp); |
stage1Zeros <= False; |
stage1Done <= False; |
workVector <= replicate(0); |
if(state==Chroma) |
begin |
if(blockNum<7) |
blockNum <= blockNum+1; |
else if (blockNum==7) |
begin |
blockNum <= 0; |
process <= Passing; |
end |
else |
$display( "ERROR InverseTrans: switching chroma unexpected blockNum" ); |
end |
else |
begin |
blockNum <= blockNum+1; |
if(blockNum==15) |
begin |
state <= ChromaDC; |
process <= LoadingDC; |
end |
else |
process <= Scaling; |
end |
end |
else //switch2to3==True |
work2Vector <= Invalid; |
stage2Done <= False; |
endrule |
|
|
rule scaling (process==Scaling && !stage1Done ); |
Vector#(16,Bit#(16)) workVectorTemp = workVector; |
Vector#(16,Bit#(16)) storeVectorTemp = storeVector; |
|
case (infifo.first()) matches |
tagged SDMRcoeffLevelZeros .xdata : |
begin |
infifo.deq(); |
if(zeroExtend(pixelNum)+xdata==16 || (zeroExtend(pixelNum)+xdata==15 && (state==Chroma || state==Intra16x16))) |
begin |
Bit#(16) prevValue0=0; |
if(state==Intra16x16) |
prevValue0 = select(storeVectorTemp, {blockNum[3],blockNum[1],blockNum[2],blockNum[0]}); |
else if(state==Chroma) |
prevValue0 = select(storeVectorTemp, blockNum); |
if(xdata==16 || (xdata==15 && (state==Chroma || state==Intra16x16) && prevValue0==0)) |
stage1Zeros <= True; |
stage1Done <= True; |
pixelNum <= 0; |
end |
else if(zeroExtend(pixelNum)+xdata>16 || (zeroExtend(pixelNum)+xdata>15 && (state==Chroma || state==Intra16x16))) |
$display( "ERROR InverseTrans: scaling index overflow" ); |
else |
pixelNum <= pixelNum+truncate(xdata); |
//$display( "TRACE InverseTrans: coeff zeros %0d", xdata ); |
end |
tagged SDMRcoeffLevelPlusZeros .xdata : |
begin |
infifo.deq(); |
Bit#(6) qp; |
Bit#(4) qpdiv6; |
Bit#(3) qpmod6; |
if(state==Chroma) |
begin |
qp = qpc; |
qpdiv6 = qpcdiv6; |
qpmod6 = qpcmod6; |
end |
else |
begin |
qp = qpy; |
qpdiv6 = qpydiv6; |
qpmod6 = qpymod6; |
end |
Bit#(5) levelScaleValue=0; |
if(pixelNum==15 || pixelNum==12 || pixelNum==10 || pixelNum==4) |
begin |
case(qpmod6) |
0: levelScaleValue = 10; |
1: levelScaleValue = 11; |
2: levelScaleValue = 13; |
3: levelScaleValue = 14; |
4: levelScaleValue = 16; |
5: levelScaleValue = 18; |
default: $display( "ERROR InverseTrans: levelScaleGen case default" ); |
endcase |
end |
else if(pixelNum==11 || pixelNum==5 || pixelNum==3 || pixelNum==0) |
begin |
case(qpmod6) |
0: levelScaleValue = 16; |
1: levelScaleValue = 18; |
2: levelScaleValue = 20; |
3: levelScaleValue = 23; |
4: levelScaleValue = 25; |
5: levelScaleValue = 29; |
default: $display( "ERROR InverseTrans: levelScaleGen case default" ); |
endcase |
end |
else |
begin |
case(qpmod6) |
0: levelScaleValue = 13; |
1: levelScaleValue = 14; |
2: levelScaleValue = 16; |
3: levelScaleValue = 18; |
4: levelScaleValue = 20; |
5: levelScaleValue = 23; |
default: $display( "ERROR InverseTrans: levelScaleGen case default" ); |
endcase |
end |
Bit#(16) workValueTemp = zeroExtend(levelScaleValue)*signExtend(xdata.level); |
Bit#(16) workValue; |
workValue = workValueTemp << zeroExtend(qpdiv6); |
workVector <= update(workVectorTemp, reverseInverseZigZagScan(pixelNum), workValue); |
if(zeroExtend(pixelNum)+1+xdata.zeros==16 || (zeroExtend(pixelNum)+1+xdata.zeros==15 && (state==Chroma || state==Intra16x16))) |
begin |
stage1Done <= True; |
pixelNum <= 0; |
end |
else if(zeroExtend(pixelNum)+1+xdata.zeros>16 || (zeroExtend(pixelNum)+1+xdata.zeros>15 && (state==Chroma || state==Intra16x16))) |
$display( "ERROR InverseTrans: scaling index overflow" ); |
else |
pixelNum <= pixelNum+1+truncate(xdata.zeros); |
end |
default: process <= Passing; |
endcase |
endrule |
|
|
rule transforming ( work2Vector!=Invalid && !stage2Done ); |
if(work2Vector matches tagged Values .xdata) |
begin |
Vector#(16,Bit#(16)) work2VectorNew = xdata; |
if(stage2Step < 4) |
begin |
Bit#(4) tempIndex = {stage2Step[1:0],2'b00}; |
Vector#(4,Bit#(16)) resultVector = transFunc( xdata[tempIndex], xdata[tempIndex+1], xdata[tempIndex+2], xdata[tempIndex+3] ); |
for(Integer ii=0; ii<4; ii=ii+1) |
work2VectorNew[tempIndex+fromInteger(ii)] = resultVector[ii]; |
end |
else |
begin |
Bit#(4) tempIndex = zeroExtend(stage2Step[1:0]); |
Vector#(4,Bit#(16)) resultVector = transFunc( xdata[tempIndex], xdata[tempIndex+4], xdata[tempIndex+8], xdata[tempIndex+12] ); |
for(Integer ii=0; ii<4; ii=ii+1) |
work2VectorNew[tempIndex+fromInteger(ii*4)] = resultVector[ii]; |
end |
work2Vector <= (Values work2VectorNew); |
if(stage2Step == 7) |
stage2Done <= True; |
stage2Step <= stage2Step+1; |
end |
else //All Zeros |
stage2Done <= True; |
endrule |
|
|
rule outputing ( work3Vector!=Invalid && !stage3Done ); |
if(work3Vector matches tagged Values .xdata) |
begin |
Vector#(4,Bit#(10)) outputVector = replicate(0); |
for(Integer ii=0; ii<4; ii=ii+1) |
outputVector[ii] = truncate((xdata[{stage3Step,2'b00}+fromInteger(ii)]+32) >> 6); |
outfifo.enq(ITBresidual outputVector); |
Int#(10) tempint = unpack(outputVector[0]); |
$display("ccl3IBTresidual %0d", tempint); |
tempint = unpack(outputVector[1]); |
$display("ccl3IBTresidual %0d", tempint); |
tempint = unpack(outputVector[2]); |
$display("ccl3IBTresidual %0d", tempint); |
tempint = unpack(outputVector[3]); |
$display("ccl3IBTresidual %0d", tempint); |
if(stage3Step == 3) |
stage3Done <= True; |
stage3Step <= stage3Step+1; |
end |
else |
begin |
outfifo.enq(ITBcoeffLevelZeros); |
stage3Done <= True; |
end |
endrule |
|
|
|
interface Put ioin = fifoToPut(infifo); |
interface Get ioout = fifoToGet(outfifo); |
|
|
endmodule |
|
endpackage |
/trunk/src/mkDeblockFilter_dummy.bsv
0,0 → 1,192
//********************************************************************** |
// Deblocking Filter |
//---------------------------------------------------------------------- |
// |
// |
|
package mkDeblockFilter; |
|
import H264Types::*; |
|
import IDeblockFilter::*; |
import FIFO::*; |
import Vector::*; |
|
import Connectable::*; |
import GetPut::*; |
import ClientServer::*; |
|
|
|
|
//----------------------------------------------------------- |
// Local Datatypes |
//----------------------------------------------------------- |
|
|
|
|
//----------------------------------------------------------- |
// Helper functions |
|
|
|
|
//----------------------------------------------------------- |
// Deblocking Filter Module |
//----------------------------------------------------------- |
|
|
(* synthesize *) |
module mkDeblockFilter( IDeblockFilter ); |
|
FIFO#(EntropyDecOT) infifo <- mkFIFO(); |
FIFO#(DeblockFilterOT) outfifo <- mkFIFO(); |
|
FIFO#(MemReq#(TAdd#(PicWidthSz,5),32)) dataMemReqQ <- mkSizedFIFO(1); |
FIFO#(MemReq#(PicWidthSz,13)) parameterMemReqQ <- mkSizedFIFO(1); |
FIFO#(MemResp#(32)) dataMemRespQ <- mkSizedFIFO(1); |
FIFO#(MemResp#(13)) parameterMemRespQ <- mkSizedFIFO(1); |
|
Reg#(Bit#(1)) chromaFlag <- mkReg(0); |
Reg#(Bit#(4)) blockNum <- mkReg(0); |
Reg#(Bit#(4)) pixelNum <- mkReg(0); |
|
Reg#(Bit#(PicWidthSz)) picWidth <- mkReg(maxPicWidthInMB); |
Reg#(Bit#(PicHeightSz)) picHeight <- mkReg(0); |
Reg#(Bit#(PicAreaSz)) firstMb <- mkReg(0); |
Reg#(Bit#(PicAreaSz)) currMb <- mkReg(0); |
Reg#(Bit#(PicAreaSz)) currMbHor <- mkReg(0);//horizontal position of currMb |
Reg#(Bit#(PicHeightSz)) currMbVer <- mkReg(0);//vertical position of currMb |
|
Vector#(3,Reg#(Bit#(8))) tempinput <- replicateM(mkRegU); |
|
Reg#(Bool) endOfFrame <- mkReg(False); |
|
|
//----------------------------------------------------------- |
// Rules |
|
rule passing (currMbHor<zeroExtend(picWidth) && !endOfFrame); |
//$display( "Trace Deblocking Filter: passing infifo packed %h", pack(infifo.first())); |
case (infifo.first()) matches |
tagged NewUnit . xdata : |
begin |
infifo.deq(); |
outfifo.enq(EDOT infifo.first()); |
$display("ccl5newunit"); |
$display("ccl5rbspbyte %h", xdata); |
end |
tagged SPSpic_width_in_mbs .xdata : |
begin |
infifo.deq(); |
outfifo.enq(EDOT infifo.first()); |
picWidth <= xdata; |
end |
tagged SPSpic_height_in_map_units .xdata : |
begin |
infifo.deq(); |
outfifo.enq(EDOT infifo.first()); |
picHeight <= xdata; |
end |
tagged SHfirst_mb_in_slice .xdata : |
begin |
infifo.deq(); |
outfifo.enq(EDOT infifo.first()); |
firstMb <= xdata; |
currMb <= xdata; |
currMbHor <= xdata; |
currMbVer <= 0; |
end |
tagged PBoutput .xdata : |
begin |
infifo.deq(); |
Bit#(2) blockHor = {blockNum[2],blockNum[0]}; |
Bit#(2) blockVer = {blockNum[3],blockNum[1]}; |
Bit#(2) pixelHor = {pixelNum[1],pixelNum[0]}; |
Bit#(2) pixelVer = {pixelNum[3],pixelNum[2]}; |
Bit#(PicWidthSz) currMbHorT = truncate(currMbHor); |
Bit#(32) pixelq = {xdata[3],xdata[2],xdata[1],xdata[0]}; |
if(chromaFlag==0) |
outfifo.enq(DFBLuma {ver:{currMbVer,blockVer,pixelVer},hor:{currMbHorT,blockHor},data:pixelq}); |
else |
outfifo.enq(DFBChroma {uv:blockHor[1],ver:{currMbVer,blockVer[0],pixelVer},hor:{currMbHorT,blockHor[0]},data:pixelq}); |
if(pixelNum == 12) |
begin |
pixelNum <= 0; |
if(blockNum == 15) |
begin |
blockNum <= 0; |
chromaFlag <= 1; |
end |
else if(blockNum==7 && chromaFlag==1) |
begin |
blockNum <= 0; |
chromaFlag <= 0; |
currMb <= currMb+1; |
currMbHor <= currMbHor+1; |
if(currMbVer==picHeight-1 && currMbHor==zeroExtend(picWidth-1)) |
endOfFrame <= True; |
end |
else |
blockNum <= blockNum+1; |
end |
else |
pixelNum <= pixelNum+4; |
//$display( "Trace Deblocking Filter: passing PBoutput %h %h %h %h", blockNum, pixelNum, pixelHor, xdata); |
end |
tagged EndOfFile : |
begin |
infifo.deq(); |
outfifo.enq(EDOT infifo.first()); |
$display( "ccl5: EndOfFile reached"); |
//$finish(0); |
end |
default: |
begin |
infifo.deq(); |
outfifo.enq(EDOT infifo.first()); |
end |
endcase |
endrule |
|
|
rule currMbHorUpdate( !(currMbHor<zeroExtend(picWidth)) && !endOfFrame); |
Bit#(PicAreaSz) temp = zeroExtend(picWidth); |
if((currMbHor >> 3) >= temp) |
begin |
currMbHor <= currMbHor - (temp << 3); |
currMbVer <= currMbVer + 8; |
end |
else |
begin |
currMbHor <= currMbHor - temp; |
currMbVer <= currMbVer + 1; |
end |
//$display( "Trace Deblocking Filter: currMbHorUpdate %h %h", currMbHor, currMbVer); |
endrule |
|
|
rule outputEndOfFrame(endOfFrame); |
outfifo.enq(EndOfFrame); |
endOfFrame <= False; |
//$display( "Trace Deblocking Filter: outputEndOfFrame %h", pack(infifo.first())); |
endrule |
|
|
interface Client mem_client_data; |
interface Get request = fifoToGet(dataMemReqQ); |
interface Put response = fifoToPut(dataMemRespQ); |
endinterface |
|
interface Client mem_client_parameter; |
interface Get request = fifoToGet(parameterMemReqQ); |
interface Put response = fifoToPut(parameterMemRespQ); |
endinterface |
|
interface Put ioin = fifoToPut(infifo); |
interface Get ioout = fifoToGet(outfifo); |
|
endmodule |
|
endpackage |
/trunk/src/mkMemED_regfile.bsv
0,0 → 1,52
//********************************************************************** |
// Memory for Entropy Decoder |
//---------------------------------------------------------------------- |
// |
// |
// |
|
package mkMemED; |
|
import H264Types::*; |
import IMemED::*; |
import RegFile::*; |
import GetPut::*; |
import ClientServer::*; |
import FIFO::*; |
|
|
//---------------------------------------------------------------------- |
// Main module |
//---------------------------------------------------------------------- |
|
module mkMemED(IMemED#(index_size,data_size)) |
provisos (Bits#(MemReq#(index_size,data_size),mReqLen), |
Bits#(MemResp#(data_size),mRespLen)); |
|
//----------------------------------------------------------- |
// State |
|
RegFile#(Bit#(index_size),Bit#(data_size)) rfile <- mkRegFileFull(); |
|
FIFO#(MemReq#(index_size,data_size)) reqQ <- mkFIFO(); |
FIFO#(MemResp#(data_size)) respQ <- mkFIFO(); |
|
rule storing ( reqQ.first() matches tagged StoreReq { addr:.addrt,data:.datat} ); |
rfile.upd(addrt,datat); |
reqQ.deq(); |
endrule |
|
rule reading ( reqQ.first() matches tagged LoadReq .addrt ); |
respQ.enq( LoadResp rfile.sub(addrt) ); |
reqQ.deq(); |
endrule |
|
interface Server mem_server; |
interface Put request = fifoToPut(reqQ); |
interface Get response = fifoToGet(respQ); |
endinterface |
|
|
endmodule |
|
endpackage |
/trunk/src/mkInputGen_news.bsv
0,0 → 1,41
//********************************************************************** |
// Input Generator implementation |
//---------------------------------------------------------------------- |
// |
// |
|
package mkInputGen; |
|
import H264Types::*; |
import IInputGen::*; |
import RegFile::*; |
import FIFO::*; |
|
import Connectable::*; |
import GetPut::*; |
|
|
module mkInputGen( IInputGen ); |
|
RegFile#(Bit#(27), Bit#(8)) rfile <- mkRegFileLoad("news_cif1-5.hex", 0, 17225); |
|
FIFO#(InputGenOT) outfifo <- mkFIFO; |
Reg#(Bit#(27)) index <- mkReg(0); |
|
rule output_byte (index < 17226); |
//$display( "ccl0inputbyte %x", rfile.sub(index) ); |
outfifo.enq(DataByte rfile.sub(index)); |
index <= index+1; |
endrule |
|
rule end_of_file (index == 17226); |
//$finish(0); |
outfifo.enq(EndOfFile); |
endrule |
|
interface Get ioout = fifoToGet(outfifo); |
|
endmodule |
|
|
endpackage |
/trunk/src/mkMemED.bsv
0,0 → 1,52
//********************************************************************** |
// Memory for Entropy Decoder |
//---------------------------------------------------------------------- |
// |
// |
// |
|
package mkMemED; |
|
import H264Types::*; |
import IMemED::*; |
import RegFile::*; |
import GetPut::*; |
import ClientServer::*; |
import FIFO::*; |
|
|
//---------------------------------------------------------------------- |
// Main module |
//---------------------------------------------------------------------- |
|
module mkMemED(IMemED#(index_size,data_size)) |
provisos (Bits#(MemReq#(index_size,data_size),mReqLen), |
Bits#(MemResp#(data_size),mRespLen)); |
|
//----------------------------------------------------------- |
// State |
|
RegFile#(Bit#(index_size),Bit#(data_size)) rfile <- mkRegFileFull(); |
|
FIFO#(MemReq#(index_size,data_size)) reqQ <- mkFIFO(); |
FIFO#(MemResp#(data_size)) respQ <- mkFIFO(); |
|
rule storing ( reqQ.first() matches tagged StoreReq { addr:.addrt,data:.datat} ); |
rfile.upd(addrt,datat); |
reqQ.deq(); |
endrule |
|
rule reading ( reqQ.first() matches tagged LoadReq .addrt ); |
respQ.enq( LoadResp rfile.sub(addrt) ); |
reqQ.deq(); |
endrule |
|
interface Server mem_server; |
interface Put request = fifoToPut(reqQ); |
interface Get response = fifoToGet(respQ); |
endinterface |
|
|
endmodule |
|
endpackage |
/trunk/src/mkInputGen.bsv
0,0 → 1,41
//********************************************************************** |
// Input Generator implementation |
//---------------------------------------------------------------------- |
// |
// |
|
package mkInputGen; |
|
import H264Types::*; |
import IInputGen::*; |
import RegFile::*; |
import FIFO::*; |
|
import Connectable::*; |
import GetPut::*; |
|
|
module mkInputGen( IInputGen ); |
|
RegFile#(Bit#(27), Bit#(8)) rfile <- mkRegFileLoad("720p50_parkrun_ter1-20inter.hex", 0, 2282510); |
|
FIFO#(InputGenOT) outfifo <- mkFIFO; |
Reg#(Bit#(27)) index <- mkReg(0); |
|
rule output_byte (index < 2282511); |
//$display( "ccl0inputbyte %x", rfile.sub(index) ); |
outfifo.enq(DataByte rfile.sub(index)); |
index <= index+1; |
endrule |
|
rule end_of_file (index == 2282511); |
//$finish(0); |
outfifo.enq(EndOfFile); |
endrule |
|
interface Get ioout = fifoToGet(outfifo); |
|
endmodule |
|
|
endpackage |
/trunk/src/mkInterpolator_4stage.bsv
0,0 → 1,844
//********************************************************************** |
// interpolator implementation |
//---------------------------------------------------------------------- |
// |
// |
|
package mkInterpolator; |
|
import H264Types::*; |
import IInterpolator::*; |
import FIFO::*; |
import Vector::*; |
|
import Connectable::*; |
import GetPut::*; |
import ClientServer::*; |
|
|
//----------------------------------------------------------- |
// Local Datatypes |
//----------------------------------------------------------- |
|
typedef union tagged |
{ |
struct { Bit#(2) xFracL; Bit#(2) yFracL; Bit#(2) offset; IPBlockType bt; } IPWLuma; |
struct { Bit#(3) xFracC; Bit#(3) yFracC; Bit#(2) offset; IPBlockType bt; } IPWChroma; |
} |
InterpolatorWT deriving(Eq,Bits); |
|
|
//----------------------------------------------------------- |
// Helper functions |
|
function Bit#(8) clip1y10to8( Bit#(10) innum ); |
if(innum[9] == 1) |
return 0; |
else if(innum[8] == 1) |
return 255; |
else |
return truncate(innum); |
endfunction |
|
function Bit#(15) interpolate8to15( Bit#(8) in0, Bit#(8) in1, Bit#(8) in2, Bit#(8) in3, Bit#(8) in4, Bit#(8) in5 ); |
return zeroExtend(in0) - 5*zeroExtend(in1) + 20*zeroExtend(in2) + 20*zeroExtend(in3) - 5*zeroExtend(in4) + zeroExtend(in5); |
endfunction |
|
function Bit#(8) interpolate15to8( Bit#(15) in0, Bit#(15) in1, Bit#(15) in2, Bit#(15) in3, Bit#(15) in4, Bit#(15) in5 ); |
Bit#(20) temp = signExtend(in0) - 5*signExtend(in1) + 20*signExtend(in2) + 20*signExtend(in3) - 5*signExtend(in4) + signExtend(in5) + 512; |
return clip1y10to8(truncate(temp>>10)); |
endfunction |
|
|
|
//----------------------------------------------------------- |
// Interpolation Module |
//----------------------------------------------------------- |
|
|
(* synthesize *) |
module mkInterpolator( Interpolator ); |
|
FIFO#(InterpolatorIT) reqfifoLoad <- mkSizedFIFO(interpolator_reqfifoLoad_size); |
FIFO#(InterpolatorWT) reqfifoWork1 <- mkSizedFIFO(interpolator_reqfifoWork_size); |
Reg#(Maybe#(InterpolatorWT)) reqregWork2 <- mkReg(Invalid); |
FIFO#(Vector#(4,Bit#(8))) outfifo <- mkFIFO; |
Reg#(Bool) endOfFrameFlag <- mkReg(False); |
FIFO#(InterpolatorLoadReq) memReqQ <- mkFIFO; |
FIFO#(InterpolatorLoadResp) memRespQ <- mkSizedFIFO(interpolator_memRespQ_size); |
|
Reg#(Bit#(PicWidthSz)) picWidth <- mkReg(maxPicWidthInMB); |
Reg#(Bit#(PicHeightSz)) picHeight <- mkReg(0); |
|
RFile1#(Bit#(6),Vector#(4,Bit#(15))) workFile <- mkRFile1Full(); |
RFile1#(Bit#(6),Vector#(4,Bit#(8))) storeFile <- mkRFile1Full(); |
Reg#(Bit#(1)) workFileFlag <- mkReg(0); |
RFile1#(Bit#(4),Vector#(4,Bit#(8))) resultFile <- mkRFile1Full(); |
|
Reg#(Bit#(1)) loadStage <- mkReg(0); |
Reg#(Bit#(2)) loadHorNum <- mkReg(0); |
Reg#(Bit#(4)) loadVerNum <- mkReg(0); |
|
Reg#(Bit#(2)) work1MbPart <- mkReg(0);//only for Chroma |
Reg#(Bit#(2)) work1SubMbPart <- mkReg(0);//only for Chroma |
Reg#(Bit#(1)) work1Stage <- mkReg(0); |
Reg#(Bit#(2)) work1HorNum <- mkReg(0); |
Reg#(Bit#(4)) work1VerNum <- mkReg(0); |
Reg#(Vector#(20,Bit#(8))) work1Vector8 <- mkRegU; |
Reg#(Bool) work1Done <- mkReg(False); |
|
Reg#(Bit#(2)) work2SubMbPart <- mkReg(0); |
Reg#(Bit#(2)) work2HorNum <- mkReg(0); |
Reg#(Bit#(4)) work2VerNum <- mkReg(0); |
Reg#(Vector#(20,Bit#(8))) work2Vector8 <- mkRegU; |
Reg#(Vector#(20,Bit#(15))) work2Vector15 <- mkRegU; |
Reg#(Vector#(4,Bit#(1))) resultReady <- mkRegU; |
Reg#(Bool) work2Done <- mkReg(False); |
Reg#(Bool) work8x8Done <- mkReg(False); |
|
Reg#(Bit#(2)) outBlockNum <- mkReg(0); |
Reg#(Bit#(2)) outPixelNum <- mkReg(0); |
Reg#(Bool) outDone <- mkReg(False); |
|
|
rule sendEndOfFrameReq( endOfFrameFlag ); |
endOfFrameFlag <= False; |
memReqQ.enq(IPLoadEndFrame); |
endrule |
|
|
rule loadLuma( reqfifoLoad.first() matches tagged IPLuma .reqdata &&& !endOfFrameFlag ); |
Bit#(2) xfracl = reqdata.mvhor[1:0]; |
Bit#(2) yfracl = reqdata.mvver[1:0]; |
Bit#(2) offset = reqdata.mvhor[3:2]; |
Bool twoStage = (xfracl==1||xfracl==3) && (yfracl==1||yfracl==3); |
Bool horInter = (twoStage ? loadStage==1 : xfracl!=0); |
Bool verInter = (twoStage ? loadStage==0 : yfracl!=0); |
Bit#(2) offset2 = reqdata.mvhor[3:2] + ((twoStage&&verInter&&xfracl==3) ? 1 : 0); |
Bit#(1) horOut = 0; |
Bit#(TAdd#(PicWidthSz,2)) horAddr; |
Bit#(TAdd#(PicHeightSz,4)) verAddr; |
Bit#(TAdd#(PicWidthSz,12)) horTemp = zeroExtend({reqdata.hor,2'b00}) + zeroExtend({loadHorNum,2'b00}) + (xfracl==3&&(yfracl==1||yfracl==3)&&loadStage==0 ? 1 : 0); |
Bit#(TAdd#(PicHeightSz,10)) verTemp = zeroExtend(reqdata.ver) + zeroExtend(loadVerNum) + (yfracl==3&&(xfracl==1||xfracl==3)&&loadStage==1 ? 1 : 0); |
Bit#(13) mvhortemp = signExtend(reqdata.mvhor[13:2])-(horInter?2:0); |
Bit#(11) mvvertemp = signExtend(reqdata.mvver[11:2])-(verInter?2:0); |
if(mvhortemp[12]==1 && zeroExtend(0-mvhortemp)>horTemp) |
begin |
horAddr = 0; |
horOut = 1; |
end |
else |
begin |
horTemp = horTemp + signExtend(mvhortemp); |
if(horTemp>=zeroExtend({picWidth,4'b0000})) |
begin |
horAddr = {picWidth-1,2'b11}; |
horOut = 1; |
end |
else |
horAddr = truncate(horTemp>>2); |
end |
if(mvvertemp[10]==1 && zeroExtend(0-mvvertemp)>verTemp) |
verAddr = 0; |
else |
begin |
verTemp = verTemp + signExtend(mvvertemp); |
if(verTemp>=zeroExtend({picHeight,4'b0000})) |
verAddr = {picHeight-1,4'b1111}; |
else |
verAddr = truncate(verTemp); |
end |
memReqQ.enq(IPLoadLuma {refIdx:reqdata.refIdx,horOutOfBounds:horOut,hor:horAddr,ver:verAddr}); |
Bool verFirst = twoStage || (yfracl==2&&(xfracl==1||xfracl==3)); |
Bit#(2) loadHorNumMax = (reqdata.bt==IP8x8||reqdata.bt==IP8x4 ? 1 : 0) + (horInter ? 2 : (offset2==0 ? 0 : 1)); |
Bit#(4) loadVerNumMax = (reqdata.bt==IP8x8||reqdata.bt==IP4x8 ? 7 : 3) + (verInter ? 5 : 0); |
if(verFirst) |
begin |
if(loadVerNum < loadVerNumMax) |
loadVerNum <= loadVerNum+1; |
else |
begin |
loadVerNum <= 0; |
if(loadHorNum < loadHorNumMax) |
begin |
if(loadStage == 1) |
begin |
offset = offset + (xfracl==3 ? 1 : 0); |
if(!(offset==1 || (xfracl==3 && offset==2))) |
loadHorNum <= loadHorNumMax; |
else |
begin |
loadHorNum <= 0; |
loadStage <= 0; |
reqfifoLoad.deq(); |
end |
end |
else |
loadHorNum <= loadHorNum+1; |
end |
else |
begin |
if(twoStage && loadStage==0) |
begin |
offset = offset + (xfracl==3 ? 1 : 0); |
if((xfracl==3 ? offset<3 : offset<2)) |
loadHorNum <= 0; |
else |
loadHorNum <= loadHorNumMax+1; |
loadStage <= 1; |
end |
else |
begin |
loadHorNum <= 0; |
loadStage <= 0; |
reqfifoLoad.deq(); |
end |
end |
end |
end |
else |
begin |
if(loadHorNum < loadHorNumMax) |
loadHorNum <= loadHorNum+1; |
else |
begin |
loadHorNum <= 0; |
if(loadVerNum < loadVerNumMax) |
loadVerNum <= loadVerNum+1; |
else |
begin |
loadVerNum <= 0; |
reqfifoLoad.deq(); |
end |
end |
end |
if(reqdata.bt==IP16x16 || reqdata.bt==IP16x8 || reqdata.bt==IP8x16) |
$display( "ERROR Interpolation: loadLuma block sizes > 8x8 not supported"); |
//$display( "Trace interpolator: loadLuma %h %h %h %h %h %h %h", xfracl, yfracl, loadHorNum, loadVerNum, reqdata.refIdx, horAddr, verAddr); |
endrule |
|
|
rule loadChroma( reqfifoLoad.first() matches tagged IPChroma .reqdata &&& !endOfFrameFlag ); |
Bit#(3) xfracc = reqdata.mvhor[2:0]; |
Bit#(3) yfracc = reqdata.mvver[2:0]; |
Bit#(2) offset = reqdata.mvhor[4:3]+{reqdata.hor[0],1'b0}; |
Bit#(1) horOut = 0; |
Bit#(TAdd#(PicWidthSz,1)) horAddr; |
Bit#(TAdd#(PicHeightSz,3)) verAddr; |
Bit#(TAdd#(PicWidthSz,11)) horTemp = zeroExtend({reqdata.hor,1'b0}) + zeroExtend({loadHorNum,2'b00}); |
Bit#(TAdd#(PicHeightSz,9)) verTemp = zeroExtend(reqdata.ver) + zeroExtend(loadVerNum); |
if(reqdata.mvhor[13]==1 && zeroExtend(0-reqdata.mvhor[13:3])>horTemp) |
begin |
horAddr = 0; |
horOut = 1; |
end |
else |
begin |
horTemp = horTemp + signExtend(reqdata.mvhor[13:3]); |
if(horTemp>=zeroExtend({picWidth,3'b000})) |
begin |
horAddr = {picWidth-1,1'b1}; |
horOut = 1; |
end |
else |
horAddr = truncate(horTemp>>2); |
end |
if(reqdata.mvver[11]==1 && zeroExtend(0-reqdata.mvver[11:3])>verTemp) |
verAddr = 0; |
else |
begin |
verTemp = verTemp + signExtend(reqdata.mvver[11:3]); |
if(verTemp>=zeroExtend({picHeight,3'b000})) |
verAddr = {picHeight-1,3'b111}; |
else |
verAddr = truncate(verTemp); |
end |
memReqQ.enq(IPLoadChroma {refIdx:reqdata.refIdx,uv:reqdata.uv,horOutOfBounds:horOut,hor:horAddr,ver:verAddr}); |
Bit#(2) loadHorNumMax = (reqdata.bt==IP4x8||reqdata.bt==IP4x4 ? (offset[1]==0||(xfracc==0&&offset!=3) ? 0 : 1) : ((reqdata.bt==IP16x16||reqdata.bt==IP16x8 ? 1 : 0) + (xfracc==0&&offset==0 ? 0 : 1))); |
Bit#(4) loadVerNumMax = (reqdata.bt==IP16x16||reqdata.bt==IP8x16 ? 7 : (reqdata.bt==IP16x8||reqdata.bt==IP8x8||reqdata.bt==IP4x8 ? 3 : 1)) + (yfracc==0 ? 0 : 1); |
if(loadHorNum < loadHorNumMax) |
loadHorNum <= loadHorNum+1; |
else |
begin |
loadHorNum <= 0; |
if(loadVerNum < loadVerNumMax) |
loadVerNum <= loadVerNum+1; |
else |
begin |
loadVerNum <= 0; |
reqfifoLoad.deq(); |
end |
end |
//$display( "Trace interpolator: loadChroma %h %h %h %h %h %h %h", xfracc, yfracc, loadHorNum, loadVerNum, reqdata.refIdx, horAddr, verAddr); |
endrule |
|
|
rule work1Luma ( reqfifoWork1.first() matches tagged IPWLuma .reqdata &&& !work1Done ); |
let xfracl = reqdata.xFracL; |
let yfracl = reqdata.yFracL; |
let offset = reqdata.offset; |
let blockT = reqdata.bt; |
Bool twoStage = (xfracl==1||xfracl==3) && (yfracl==1||yfracl==3); |
Vector#(20,Bit#(8)) work1Vector8Next = work1Vector8; |
if(memRespQ.first() matches tagged IPLoadResp .tempreaddata) |
begin |
memRespQ.deq(); |
Vector#(4,Bit#(8)) readdata = replicate(0); |
readdata[0] = tempreaddata[7:0]; |
readdata[1] = tempreaddata[15:8]; |
readdata[2] = tempreaddata[23:16]; |
readdata[3] = tempreaddata[31:24]; |
//$display( "Trace interpolator: workLuma stage 0 readdata %h %h %h %h %h %h", workHorNum, workVerNum, readdata[3], readdata[2], readdata[1], readdata[0] ); |
Vector#(4,Bit#(8)) tempResult8 = replicate(0); |
Vector#(4,Bit#(15)) tempResult15 = replicate(0); |
if(xfracl==0 || yfracl==0 || xfracl==2) |
begin |
if(xfracl==0)//reorder |
begin |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
Bit#(2) offsetplusii = offset+fromInteger(ii); |
if(offset <= 3-fromInteger(ii) && offset!=0) |
tempResult8[ii] = work1Vector8[offsetplusii]; |
else |
tempResult8[ii] = readdata[offsetplusii]; |
work1Vector8Next[ii] = readdata[ii]; |
end |
for(Integer ii=0; ii<4; ii=ii+1) |
tempResult15[ii] = zeroExtend({tempResult8[ii],5'b00000}); |
end |
else//horizontal interpolation |
begin |
offset = offset-2; |
for(Integer ii=0; ii<8; ii=ii+1) |
work1Vector8Next[ii] = work1Vector8[ii+4]; |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
Bit#(4) tempIndex = fromInteger(ii) + 8 - zeroExtend(offset); |
work1Vector8Next[tempIndex] = readdata[ii]; |
end |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
tempResult15[ii] = interpolate8to15(work1Vector8Next[ii],work1Vector8Next[ii+1],work1Vector8Next[ii+2],work1Vector8Next[ii+3],work1Vector8Next[ii+4],work1Vector8Next[ii+5]); |
tempResult8[ii] = clip1y10to8(truncate((tempResult15[ii]+16)>>5)); |
if(xfracl == 1) |
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,work1Vector8Next[ii+2]} + 1) >> 1); |
else if(xfracl == 3) |
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,work1Vector8Next[ii+3]} + 1) >> 1); |
end |
end |
Bit#(2) workHorNumOffset = (xfracl!=0 ? 2 : (reqdata.offset==0 ? 0 : 1)); |
if(work1HorNum >= workHorNumOffset) |
begin |
Bit#(1) horAddr = truncate(work1HorNum-workHorNumOffset); |
if(yfracl == 0) |
begin |
for(Integer ii=0; ii<4; ii=ii+1) |
tempResult15[ii] = zeroExtend({tempResult8[ii],5'b00000}); |
end |
workFile.upd({workFileFlag,work1VerNum,horAddr},tempResult15); |
end |
Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + workHorNumOffset; |
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3) + (yfracl!=0 ? 5 : 0); |
if(work1HorNum < workHorNumMax) |
work1HorNum <= work1HorNum+1; |
else |
begin |
work1HorNum <= 0; |
if(work1VerNum < workVerNumMax) |
work1VerNum <= work1VerNum+1; |
else |
begin |
work1VerNum <= 0; |
work1Done <= True; |
end |
end |
end |
else if(work1Stage == 0)//vertical interpolation |
begin |
offset = offset + (xfracl==3&&(yfracl==1||yfracl==3) ? 1 : 0); |
for(Integer ii=0; ii<4; ii=ii+1) |
tempResult15[ii] = interpolate8to15(work1Vector8[ii],work1Vector8[ii+4],work1Vector8[ii+8],work1Vector8[ii+12],work1Vector8[ii+16],readdata[ii]); |
for(Integer ii=0; ii<16; ii=ii+1) |
work1Vector8Next[ii] = work1Vector8[ii+4]; |
for(Integer ii=0; ii<4; ii=ii+1) |
work1Vector8Next[ii+16] = readdata[ii]; |
Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + (yfracl==2 ? 2 : (offset==0 ? 0 : 1)); |
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3) + 5; |
Bit#(2) horAddr = work1HorNum; |
Bit#(3) verAddr = truncate(work1VerNum-5); |
if(work1VerNum > 4) |
begin |
workFile.upd({workFileFlag,verAddr,horAddr},tempResult15); |
//$display( "Trace interpolator: workLuma stage 0 result %h %h %h %h %h %h %h", workHorNum, workVerNum, {verAddr,horAddr}, tempResult15[3], tempResult15[2], tempResult15[1], tempResult15[0]); |
end |
if(twoStage) |
begin |
Bit#(2) storeHorAddr = work1HorNum; |
Bit#(4) storeVerAddr = work1VerNum; |
if((xfracl==3 ? offset<3 : offset<2)) |
storeHorAddr = storeHorAddr+1; |
if(yfracl==3) |
storeVerAddr = storeVerAddr-3; |
else |
storeVerAddr = storeVerAddr-2; |
if(storeVerAddr < 8) |
storeFile.upd({workFileFlag,storeVerAddr[2:0],storeHorAddr},readdata); |
end |
if(work1VerNum < workVerNumMax) |
work1VerNum <= work1VerNum+1; |
else |
begin |
work1VerNum <= 0; |
if(work1HorNum < workHorNumMax) |
work1HorNum <= work1HorNum+1; |
else |
begin |
if(twoStage) |
begin |
work1Stage <= 1; |
if((xfracl==3 ? offset<3 : offset<2)) |
work1HorNum <= 0; |
else |
work1HorNum <= workHorNumMax+1; |
end |
else |
begin |
work1HorNum <= 0; |
work1Done <= True; |
end |
end |
end |
end |
else//second stage of twoStage |
begin |
storeFile.upd({workFileFlag,work1VerNum[2:0],work1HorNum},readdata); |
Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + 2; |
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3); |
if(work1VerNum < workVerNumMax) |
work1VerNum <= work1VerNum+1; |
else |
begin |
work1VerNum <= 0; |
offset = offset + (xfracl==3 ? 1 : 0); |
if(work1HorNum<workHorNumMax && !(offset==1 || (xfracl==3 && offset==2))) |
work1HorNum <= workHorNumMax; |
else |
begin |
work1HorNum <= 0; |
work1Stage <= 0; |
work1Done <= True; |
end |
end |
end |
end |
work1Vector8 <= work1Vector8Next; |
//$display( "Trace interpolator: work1Luma %h %h %h %h %h %h", xfracl, yfracl, work1HorNum, work1VerNum, offset, work1Stage); |
endrule |
|
|
rule work2Luma ( reqregWork2 matches tagged Valid .vdata &&& vdata matches tagged IPWLuma .reqdata &&& !work2Done &&& !work8x8Done ); |
let xfracl = reqdata.xFracL; |
let yfracl = reqdata.yFracL; |
let offset = reqdata.offset; |
let blockT = reqdata.bt; |
Vector#(20,Bit#(8)) work2Vector8Next = work2Vector8; |
Vector#(20,Bit#(15)) work2Vector15Next = work2Vector15; |
Vector#(4,Bit#(1)) resultReadyNext = resultReady; |
Vector#(4,Bit#(8)) tempResult8 = replicate(0); |
Vector#(4,Bit#(15)) readdata = replicate(0); |
if(yfracl==0) |
begin |
readdata = workFile.sub({(1-workFileFlag),1'b0,work2VerNum[1],work2HorNum,work2VerNum[0]}); |
for(Integer ii=0; ii<4; ii=ii+1) |
tempResult8[ii] = (readdata[ii])[12:5]; |
resultFile.upd({work2VerNum[1],work2HorNum,work2VerNum[0]},tempResult8); |
work2HorNum <= work2HorNum+1; |
if(work2HorNum == 3) |
begin |
resultReadyNext[(work2VerNum[1:0])] = 1; |
if(work2VerNum == 3) |
begin |
work2VerNum <= 0; |
work2Done <= True; |
if(((blockT==IP4x8 || blockT==IP8x4) && work2SubMbPart==0) || (blockT==IP4x4 && work2SubMbPart<3)) |
work2SubMbPart <= work2SubMbPart+1; |
else |
begin |
work2SubMbPart <= 0; |
work8x8Done <= True; |
end |
end |
else |
work2VerNum <= work2VerNum+1; |
end |
end |
else if(xfracl==0 || xfracl==2)//vertical interpolation |
begin |
readdata = workFile.sub({(1-workFileFlag),work2VerNum,work2HorNum[0]}); |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
tempResult8[ii] = interpolate15to8(work2Vector15[ii],work2Vector15[ii+4],work2Vector15[ii+8],work2Vector15[ii+12],work2Vector15[ii+16],readdata[ii]); |
if(yfracl == 1) |
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((work2Vector15[ii+8]+16)>>5))} + 1) >> 1); |
else if(yfracl == 3) |
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((work2Vector15[ii+12]+16)>>5))} + 1) >> 1); |
end |
for(Integer ii=0; ii<16; ii=ii+1) |
work2Vector15Next[ii] = work2Vector15[ii+4]; |
for(Integer ii=0; ii<4; ii=ii+1) |
work2Vector15Next[ii+16] = readdata[ii]; |
Bit#(2) workHorNumMax = 1; |
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3) + 5; |
if(work2VerNum > 4) |
begin |
Bit#(1) horAddr = truncate(work2HorNum); |
Bit#(3) verAddr = truncate(work2VerNum-5); |
horAddr = horAddr + ((blockT==IP4x8&&work2SubMbPart==1)||(blockT==IP4x4&&work2SubMbPart[0]==1) ? 1 : 0); |
verAddr = verAddr + ((blockT==IP8x4&&work2SubMbPart==1)||(blockT==IP4x4&&work2SubMbPart[1]==1) ? 4 : 0); |
resultFile.upd({verAddr,horAddr},tempResult8); |
if(verAddr[1:0] == 3) |
resultReadyNext[{verAddr[2],horAddr}] = 1; |
end |
if(work2VerNum < workVerNumMax) |
work2VerNum <= work2VerNum+1; |
else |
begin |
work2VerNum <= 0; |
if(work2HorNum < workHorNumMax) |
work2HorNum <= work2HorNum+1; |
else |
begin |
work2HorNum <= 0; |
work2Done <= True; |
if(((blockT==IP4x8 || blockT==IP8x4) && work2SubMbPart==0) || (blockT==IP4x4 && work2SubMbPart<3)) |
work2SubMbPart <= work2SubMbPart+1; |
else |
begin |
work2SubMbPart <= 0; |
work8x8Done <= True; |
end |
end |
end |
end |
else//horizontal interpolation |
begin |
offset = offset-2; |
if(yfracl == 2) |
begin |
readdata = workFile.sub({(1-workFileFlag),work2VerNum[2:0],work2HorNum}); |
for(Integer ii=0; ii<8; ii=ii+1) |
work2Vector15Next[ii] = work2Vector15[ii+4]; |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
Bit#(4) tempIndex = fromInteger(ii) + 8 - zeroExtend(offset); |
work2Vector15Next[tempIndex] = readdata[ii]; |
end |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
tempResult8[ii] = interpolate15to8(work2Vector15Next[ii],work2Vector15Next[ii+1],work2Vector15Next[ii+2],work2Vector15Next[ii+3],work2Vector15Next[ii+4],work2Vector15Next[ii+5]); |
if(xfracl == 1) |
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((work2Vector15Next[ii+2]+16)>>5))} + 1) >> 1); |
else if(xfracl == 3) |
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((work2Vector15Next[ii+3]+16)>>5))} + 1) >> 1); |
end |
end |
else |
begin |
Vector#(4,Bit#(8)) readdata8 = storeFile.sub({(1-workFileFlag),work2VerNum[2:0],work2HorNum}); |
for(Integer ii=0; ii<8; ii=ii+1) |
work2Vector8Next[ii] = work2Vector8[ii+4]; |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
Bit#(4) tempIndex = fromInteger(ii) + 8 - zeroExtend(offset); |
work2Vector8Next[tempIndex] = readdata8[ii]; |
end |
Vector#(4,Bit#(15)) tempResult15 = replicate(0); |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
tempResult15[ii] = interpolate8to15(work2Vector8Next[ii],work2Vector8Next[ii+1],work2Vector8Next[ii+2],work2Vector8Next[ii+3],work2Vector8Next[ii+4],work2Vector8Next[ii+5]); |
tempResult8[ii] = clip1y10to8(truncate((tempResult15[ii]+16)>>5)); |
end |
Bit#(2) verOffset; |
Vector#(4,Bit#(15)) verResult15 = replicate(0); |
if(xfracl == 1) |
verOffset = reqdata.offset; |
else |
verOffset = reqdata.offset+1; |
readdata = workFile.sub({(1-workFileFlag),work2VerNum[2:0],(work2HorNum-2+(verOffset==0?0:1))}); |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
Bit#(2) offsetplusii = verOffset+fromInteger(ii); |
if(verOffset <= 3-fromInteger(ii) && verOffset!=0) |
verResult15[ii] = work2Vector15[offsetplusii]; |
else |
verResult15[ii] = readdata[offsetplusii]; |
work2Vector15Next[ii] = readdata[ii]; |
end |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
Bit#(9) tempVal = zeroExtend(clip1y10to8(truncate((verResult15[ii]+16)>>5))); |
tempResult8[ii] = truncate((tempVal+zeroExtend(tempResult8[ii])+1)>>1); |
end |
end |
if(work2HorNum >= 2) |
begin |
Bit#(1) horAddr = truncate(work2HorNum-2); |
Bit#(3) verAddr = truncate(work2VerNum); |
horAddr = horAddr + ((blockT==IP4x8&&work2SubMbPart==1)||(blockT==IP4x4&&work2SubMbPart[0]==1) ? 1 : 0); |
verAddr = verAddr + ((blockT==IP8x4&&work2SubMbPart==1)||(blockT==IP4x4&&work2SubMbPart[1]==1) ? 4 : 0); |
resultFile.upd({verAddr,horAddr},tempResult8); |
if(verAddr[1:0] == 3) |
resultReadyNext[{verAddr[2],horAddr}] = 1; |
//$display( "Trace interpolator: workLuma stage 1 result %h %h %h %h %h %h %h %h", workHorNum, workVerNum, {verAddr,horAddr}, tempResult8[3], tempResult8[2], tempResult8[1], tempResult8[0], pack(resultReadyNext)); |
end |
Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + 2; |
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3); |
if(work2HorNum < workHorNumMax) |
work2HorNum <= work2HorNum+1; |
else |
begin |
work2HorNum <= 0; |
if(work2VerNum < workVerNumMax) |
work2VerNum <= work2VerNum+1; |
else |
begin |
work2VerNum <= 0; |
work2Done <= True; |
if(((blockT==IP4x8 || blockT==IP8x4) && work2SubMbPart==0) || (blockT==IP4x4 && work2SubMbPart<3)) |
work2SubMbPart <= work2SubMbPart+1; |
else |
begin |
work2SubMbPart <= 0; |
work8x8Done <= True; |
end |
end |
end |
end |
work2Vector8 <= work2Vector8Next; |
work2Vector15 <= work2Vector15Next; |
resultReady <= resultReadyNext; |
//$display( "Trace interpolator: work2Luma %h %h %h %h %h", xfracl, yfracl, work2HorNum, work2VerNum, offset); |
endrule |
|
|
rule work1Chroma ( reqfifoWork1.first() matches tagged IPWChroma .reqdata &&& !work1Done ); |
Bit#(4) xfracc = zeroExtend(reqdata.xFracC); |
Bit#(4) yfracc = zeroExtend(reqdata.yFracC); |
let offset = reqdata.offset; |
let blockT = reqdata.bt; |
Vector#(20,Bit#(8)) work1Vector8Next = work1Vector8; |
if(memRespQ.first() matches tagged IPLoadResp .tempreaddata) |
begin |
memRespQ.deq(); |
Vector#(4,Bit#(8)) readdata = replicate(0); |
readdata[0] = tempreaddata[7:0]; |
readdata[1] = tempreaddata[15:8]; |
readdata[2] = tempreaddata[23:16]; |
readdata[3] = tempreaddata[31:24]; |
Vector#(5,Bit#(8)) tempWork8 = replicate(0); |
Vector#(5,Bit#(8)) tempPrev8 = replicate(0); |
Vector#(4,Bit#(8)) tempResult8 = replicate(0); |
Bool resultReadyFlag = False; |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
Bit#(2) offsetplusii = offset+fromInteger(ii); |
if(offset <= 3-fromInteger(ii) && !((blockT==IP4x8||blockT==IP4x4)&&(offset[1]==0||(xfracc==0&&offset!=3))) && !(xfracc==0&&offset==0)) |
tempWork8[ii] = work1Vector8[offsetplusii]; |
else |
tempWork8[ii] = readdata[offsetplusii]; |
work1Vector8Next[ii] = readdata[ii]; |
end |
tempWork8[4] = readdata[offset]; |
if((blockT==IP16x8 || blockT==IP16x16) && work1HorNum==(xfracc==0&&offset==0 ? 1 : 2)) |
begin |
for(Integer ii=0; ii<5; ii=ii+1) |
begin |
tempPrev8[ii] = work1Vector8[ii+9]; |
work1Vector8Next[ii+9] = tempWork8[ii]; |
end |
end |
else |
begin |
for(Integer ii=0; ii<5; ii=ii+1) |
tempPrev8[ii] = work1Vector8[ii+4]; |
if(work1HorNum==(xfracc==0&&offset==0 ? 0 : 1) || ((blockT==IP4x8||blockT==IP4x4)&&(offset[1]==0||(xfracc==0&&offset!=3)))) |
begin |
for(Integer ii=0; ii<5; ii=ii+1) |
work1Vector8Next[ii+4] = tempWork8[ii]; |
end |
end |
if(yfracc==0) |
begin |
for(Integer ii=0; ii<5; ii=ii+1) |
tempPrev8[ii] = tempWork8[ii]; |
end |
for(Integer ii=0; ii<4; ii=ii+1) |
begin |
Bit#(14) tempVal = zeroExtend((8-xfracc))*zeroExtend((8-yfracc))*zeroExtend(tempPrev8[ii]); |
tempVal = tempVal + zeroExtend(xfracc)*zeroExtend((8-yfracc))*zeroExtend(tempPrev8[ii+1]); |
tempVal = tempVal + zeroExtend((8-xfracc))*zeroExtend(yfracc)*zeroExtend(tempWork8[ii]); |
tempVal = tempVal + zeroExtend(xfracc)*zeroExtend(yfracc)*zeroExtend(tempWork8[ii+1]); |
tempResult8[ii] = truncate((tempVal+32)>>6); |
end |
if(work1VerNum > 0 || yfracc==0) |
begin |
if(blockT==IP4x8 || blockT==IP4x4) |
begin |
Bit#(5) tempIndex = 10 + zeroExtend(work1VerNum<<1); |
work1Vector8Next[tempIndex] = tempResult8[0]; |
work1Vector8Next[tempIndex+1] = tempResult8[1]; |
tempResult8[2] = tempResult8[0]; |
tempResult8[3] = tempResult8[1]; |
tempResult8[0] = work1Vector8[tempIndex]; |
tempResult8[1] = work1Vector8[tempIndex+1]; |
if((work1HorNum>0 || offset[1]==0) && work1SubMbPart[0]==1) |
resultReadyFlag = True; |
end |
else |
begin |
if(work1HorNum>0 || (xfracc==0 && offset==0)) |
resultReadyFlag = True; |
end |
end |
if(resultReadyFlag) |
begin |
Bit#(1) horAddr = ((blockT==IP4x8 || blockT==IP4x4) ? 0 : truncate(((xfracc==0 && offset==0) ? work1HorNum : work1HorNum-1))); |
Bit#(3) verAddr = truncate((yfracc==0 ? work1VerNum : work1VerNum-1)); |
horAddr = horAddr + ((blockT==IP16x8||blockT==IP16x16) ? 0 : work1MbPart[0]); |
verAddr = verAddr + ((blockT==IP8x16||blockT==IP16x16) ? 0 : ((blockT==IP16x8) ? {work1MbPart[0],2'b00} : {work1MbPart[1],2'b00})); |
verAddr = verAddr + ((blockT==IP8x4&&work1SubMbPart==1)||(blockT==IP4x4&&work1SubMbPart[1]==1) ? 2 : 0); |
storeFile.upd({workFileFlag,1'b0,verAddr,horAddr},tempResult8); |
end |
Bit#(2) workHorNumMax = (blockT==IP4x8||blockT==IP4x4 ? (offset[1]==0||(xfracc==0&&offset!=3) ? 0 : 1) : ((blockT==IP16x16||blockT==IP16x8 ? 1 : 0) + (xfracc==0&&offset==0 ? 0 : 1))); |
Bit#(4) workVerNumMax = (blockT==IP16x16||blockT==IP8x16 ? 7 : (blockT==IP16x8||blockT==IP8x8||blockT==IP4x8 ? 3 : 1)) + (yfracc==0 ? 0 : 1); |
if(work1HorNum < workHorNumMax) |
work1HorNum <= work1HorNum+1; |
else |
begin |
work1HorNum <= 0; |
if(work1VerNum < workVerNumMax) |
work1VerNum <= work1VerNum+1; |
else |
begin |
Bool allDone = False; |
work1VerNum <= 0; |
if(((blockT==IP4x8 || blockT==IP8x4) && work1SubMbPart==0) || (blockT==IP4x4 && work1SubMbPart<3)) |
work1SubMbPart <= work1SubMbPart+1; |
else |
begin |
work1SubMbPart <= 0; |
if(((blockT==IP16x8 || blockT==IP8x16) && work1MbPart==0) || (!(blockT==IP16x8 || blockT==IP8x16 || blockT==IP16x16) && work1MbPart<3)) |
work1MbPart <= work1MbPart+1; |
else |
begin |
work1MbPart <= 0; |
work1Done <= True; |
allDone = True; |
end |
end |
if(!allDone) |
reqfifoWork1.deq(); |
end |
end |
end |
work1Vector8 <= work1Vector8Next; |
//$display( "Trace interpolator: work1Chroma %h %h %h %h %h", xfracc, yfracc, work1HorNum, work1VerNum, offset); |
endrule |
|
|
rule work2Chroma ( reqregWork2 matches tagged Valid .vdata &&& vdata matches tagged IPWChroma .reqdata &&& !work2Done &&& !work8x8Done ); |
Vector#(4,Bit#(1)) resultReadyNext = resultReady; |
resultFile.upd({work2VerNum[1],work2HorNum,work2VerNum[0]},storeFile.sub({(1-workFileFlag),1'b0,work2VerNum[1],work2HorNum,work2VerNum[0]})); |
work2HorNum <= work2HorNum+1; |
if(work2HorNum == 3) |
begin |
resultReadyNext[work2VerNum] = 1; |
if(work2VerNum == 3) |
begin |
work2VerNum <= 0; |
work2Done <= True; |
work8x8Done <= True; |
end |
else |
work2VerNum <= work2VerNum+1; |
end |
resultReady <= resultReadyNext; |
//$display( "Trace interpolator: work2Chroma %h %h", work2HorNum, work2VerNum); |
endrule |
|
|
rule outputing( !outDone && resultReady[outBlockNum]==1 ); |
outfifo.enq(resultFile.sub({outBlockNum[1],outPixelNum,outBlockNum[0]})); |
outPixelNum <= outPixelNum+1; |
if(outPixelNum == 3) |
begin |
outBlockNum <= outBlockNum+1; |
if(outBlockNum == 3) |
outDone <= True; |
end |
//$display( "Trace interpolator: outputing %h %h", outBlockNum, outPixelNum); |
endrule |
|
|
rule switching( work1Done && (work2Done || reqregWork2==Invalid) && !work8x8Done); |
work1Done <= False; |
work2Done <= False; |
reqregWork2 <= (Valid reqfifoWork1.first()); |
workFileFlag <= 1-workFileFlag; |
reqfifoWork1.deq(); |
//$display( "Trace interpolator: switching %h %h", outBlockNum, outPixelNum); |
endrule |
|
|
rule switching8x8( work1Done && (work2Done || reqregWork2==Invalid) && work8x8Done && outDone); |
outDone <= False; |
work8x8Done <= False; |
resultReady <= replicate(0); |
work1Done <= False; |
work2Done <= False; |
reqregWork2 <= (Valid reqfifoWork1.first()); |
workFileFlag <= 1-workFileFlag; |
reqfifoWork1.deq(); |
//$display( "Trace interpolator: switching8x8 %h %h", outBlockNum, outPixelNum); |
endrule |
|
|
method Action setPicWidth( Bit#(PicWidthSz) newPicWidth ); |
picWidth <= newPicWidth; |
endmethod |
|
method Action setPicHeight( Bit#(PicHeightSz) newPicHeight ); |
picHeight <= newPicHeight; |
endmethod |
|
method Action request( InterpolatorIT inputdata ); |
reqfifoLoad.enq(inputdata); |
if(inputdata matches tagged IPLuma .indata) |
reqfifoWork1.enq(IPWLuma {xFracL:indata.mvhor[1:0],yFracL:indata.mvver[1:0],offset:indata.mvhor[3:2],bt:indata.bt}); |
else if(inputdata matches tagged IPChroma .indata) |
reqfifoWork1.enq(IPWChroma {xFracC:indata.mvhor[2:0],yFracC:indata.mvver[2:0],offset:indata.mvhor[4:3]+{indata.hor[0],1'b0},bt:indata.bt}); |
endmethod |
|
method Vector#(4,Bit#(8)) first(); |
return outfifo.first(); |
endmethod |
|
method Action deq(); |
outfifo.deq(); |
endmethod |
|
method Action endOfFrame(); |
endOfFrameFlag <= True; |
endmethod |
|
interface Client mem_client; |
interface Get request = fifoToGet(memReqQ); |
interface Put response = fifoToPut(memRespQ); |
endinterface |
|
|
endmodule |
|
|
endpackage |
/trunk/src/IDeblockFilter.bsv
0,0 → 1,27
//********************************************************************** |
// Interface for Deblocking Filter |
//---------------------------------------------------------------------- |
// |
// |
// |
|
package IDeblockFilter; |
|
import H264Types::*; |
import GetPut::*; |
import ClientServer::*; |
|
interface IDeblockFilter; |
|
// Interface for inter-module io |
interface Put#(EntropyDecOT) ioin; |
interface Get#(DeblockFilterOT) ioout; |
|
// Interface for module to memory |
interface Client#(MemReq#(TAdd#(PicWidthSz,5),32),MemResp#(32)) mem_client_data; |
interface Client#(MemReq#(PicWidthSz,13),MemResp#(13)) mem_client_parameter; |
|
endinterface |
|
endpackage |
|
/trunk/src/IFinalOutput.bsv
0,0 → 1,22
//********************************************************************** |
// Interface for Final Output |
//---------------------------------------------------------------------- |
// |
// |
// |
|
package IFinalOutput; |
|
import H264Types::*; |
import GetPut::*; |
import ClientServer::*; |
|
interface IFinalOutput; |
|
// Interface for inter-module io |
interface Put#(BufferControlOT) ioin; |
|
endinterface |
|
endpackage |
|
/trunk/src/mkEntropyDec_orig.bsv
0,0 → 1,1699
//********************************************************************** |
// Entropy Decoder implementation |
//---------------------------------------------------------------------- |
// |
// |
|
package mkEntropyDec; |
|
import H264Types::*; |
import ExpGolomb::*; |
import CAVLC::*; |
import ICalc_nC::*; |
import mkCalc_nC::*; |
import IEntropyDec::*; |
import FIFO::*; |
|
import Connectable::*; |
import GetPut::*; |
import ClientServer::*; |
|
|
//----------------------------------------------------------- |
// Local Datatypes |
//----------------------------------------------------------- |
|
typedef union tagged |
{ |
void Start; //special state that initializes the process. |
void NewUnit; //special state that checks the NAL unit type. |
|
Bit#(5) CodedSlice; //decodes a type of NAL unit |
void SEI; //decodes a type of NAL unit |
Bit#(5) SPS; //decodes a type of NAL unit |
Bit#(5) PPS; //decodes a type of NAL unit |
void AUD; //decodes a type of NAL unit |
void EndSequence; //decodes a type of NAL unit |
void EndStream; //decodes a type of NAL unit |
void Filler; //decodes a type of NAL unit |
|
Bit#(5) SliceData; //decodes slice data (part of a CodedSlice NAL unit) |
Bit#(5) MacroblockLayer; //decodes macroblock layer (part of a CodedSlice NAL unit) |
Bit#(5) MbPrediction; //decodes macroblock prediction (part of a CodedSlice NAL unit) |
Bit#(5) SubMbPrediction; //decodes sub-macroblock prediction (part of a CodedSlice NAL unit) |
Bit#(5) Residual; //decodes residual (part of a CodedSlice NAL unit) |
Bit#(5) ResidualBlock; //decodes residual block (part of a CodedSlice NAL unit) |
} |
State deriving(Eq,Bits); |
|
|
|
//----------------------------------------------------------- |
// Helper functions |
function MbType mbtype_convert( Bit#(5) in_mb_type, Bit#(4) in_slice_type );//converts mb_type syntax element to MbType type |
Bit#(5) tempmb = in_mb_type; |
if(in_slice_type == 2 || in_slice_type == 7)//I slice |
tempmb = in_mb_type+5; |
case ( tempmb ) |
0: return P_L0_16x16; |
1: return P_L0_L0_16x8; |
2: return P_L0_L0_8x16; |
3: return P_8x8; |
4: return P_8x8ref0; |
5: return I_NxN; |
30: return I_PCM; |
default: |
begin |
Bit#(5) tempmb16x16 = tempmb-6; |
Bit#(2) tempv1 = tempmb16x16[1:0]; |
Bit#(2) tempv2; |
Bit#(1) tempv3; |
if(tempmb16x16 < 12) |
begin |
tempv3 = 0; |
tempv2 = tempmb16x16[3:2]; |
end |
else |
begin |
tempv3 = 1; |
tempv2 = tempmb16x16[3:2]+1; |
end |
return I_16x16{intra16x16PredMode:tempv1, codedBlockPatternChroma:tempv2, codedBlockPatternLuma:tempv3}; |
end |
endcase |
endfunction |
|
|
|
//----------------------------------------------------------- |
// Entropy Decoder Module |
//----------------------------------------------------------- |
|
|
(* synthesize *) |
module mkEntropyDec( IEntropyDec ); |
|
FIFO#(NalUnwrapOT) infifo <- mkSizedFIFO(entropyDec_infifo_size); |
FIFO#(EntropyDecOT) outfifo <- mkFIFO; |
FIFO#(EntropyDecOT_InverseTrans) outfifo_ITB <- mkFIFO; |
Reg#(State) state <- mkReg(Start); |
Reg#(Bit#(2)) nalrefidc <- mkReg(0); |
Reg#(Bit#(5)) nalunittype <- mkReg(0); |
Reg#(Buffer) buffer <- mkReg(0); |
Reg#(Bufcount) bufcount <- mkReg(0); |
|
//saved syntax elements |
Reg#(Bit#(5)) spsseq_parameter_set_id <- mkReg(0); |
Reg#(Bit#(5)) spslog2_max_frame_num <- mkReg(0); |
Reg#(Bit#(5)) spslog2_max_pic_order_cnt_lsb <- mkReg(0); |
Reg#(Bit#(2)) spspic_order_cnt_type <- mkReg(0); |
Reg#(Bit#(1)) spsdelta_pic_order_always_zero_flag <- mkReg(0); |
Reg#(Bit#(8)) spsnum_ref_frames_in_pic_order_cnt_cycle <- mkReg(0); |
Reg#(Bit#(8)) ppspic_parameter_set_id <- mkReg(0); |
Reg#(Bit#(1)) ppspic_order_present_flag <- mkReg(0); |
Reg#(Bit#(1)) ppsdeblocking_filter_control_present_flag <- mkReg(0); |
Reg#(Bit#(4)) shslice_type <- mkReg(0); |
Reg#(Bit#(3)) shdmemory_management_control_operation <- mkReg(0); |
Reg#(MbType) sdmmbtype <- mkReg(I_NxN); |
Reg#(Bit#(4)) sdmcodedBlockPatternLuma <- mkReg(0); |
Reg#(Bit#(2)) sdmcodedBlockPatternChroma <- mkReg(0); |
Reg#(Bit#(5)) sdmrTotalCoeff <- mkReg(0); |
Reg#(Bit#(2)) sdmrTrailingOnes <- mkReg(0); |
|
//derived decoding variables for slice data |
Reg#(Bit#(16)) tempreg <- mkReg(0); |
Reg#(Bit#(5)) num_ref_idx_l0_active_minus1 <- mkReg(0); |
Reg#(Bit#(PicAreaSz)) currMbAddr <- mkReg(0); |
Reg#(Bit#(3)) temp3bit0 <- mkReg(0); |
Reg#(Bit#(3)) temp3bit1 <- mkReg(0); |
Reg#(Bit#(3)) temp3bit2 <- mkReg(0); |
Reg#(Bit#(3)) temp3bit3 <- mkReg(0); |
Reg#(Bit#(5)) temp5bit <- mkReg(0); |
Reg#(Bit#(5)) temp5bit2 <- mkReg(0); |
Reg#(Bit#(5)) maxNumCoeff <- mkReg(0); |
FIFO#(Bit#(13)) cavlcFIFO <- mkSizedFIFO(16); |
Calc_nC calcnc <- mkCalc_nC(); |
Reg#(Bit#(1)) residualChroma <- mkReg(0); |
Reg#(Bit#(5)) totalCoeff <- mkReg(0); |
Reg#(Bit#(4)) zerosLeft <- mkReg(0); |
|
//exp-golomb 32-bit version states |
Reg#(Bufcount) egnumbits <- mkReg(0); |
|
//extra-buffering states |
Reg#(Bit#(32)) extrabuffer <- mkReg(0); |
Reg#(Bit#(3)) extrabufcount <- mkReg(0); |
Reg#(Bit#(1)) extraendnalflag <- mkReg(0); |
Reg#(Bit#(1)) endnalflag <- mkReg(0); |
|
|
//----------------------------------------------------------- |
// Rules |
|
rule startup (state matches Start); |
case (infifo.first()) matches |
tagged NewUnit : |
begin |
infifo.deq(); |
state <= NewUnit; |
buffer <= 0; |
bufcount <= 0; |
extrabuffer <= 0; |
extrabufcount <= 0; |
extraendnalflag <= 0; |
endnalflag <= 0; |
end |
tagged RbspByte .rdata : |
begin |
infifo.deq(); |
end |
tagged EndOfFile : |
begin |
infifo.deq(); |
outfifo.enq(EndOfFile); |
$display( "INFO EntropyDec: EndOfFile reached" ); |
end |
endcase |
endrule |
|
|
rule newunit (state matches NewUnit); |
case (infifo.first()) matches |
tagged NewUnit : state <= Start; |
tagged RbspByte .rdata : |
begin |
infifo.deq(); |
nalrefidc <= rdata[6:5]; |
nalunittype <= rdata[4:0]; |
case (rdata[4:0]) |
1 : state <= CodedSlice 0; |
5 : state <= CodedSlice 0; |
6 : state <= SEI; |
7 : state <= SPS 0; |
8 : state <= PPS 0; |
9 : state <= AUD; |
10: state <= EndSequence; |
11: state <= EndStream; |
12: state <= Filler; |
default: |
begin |
$display( "ERROR EntropyDec: NAL Unit Type = %d", rdata[4:0] ); |
state <= Start; |
end |
endcase |
$display("ccl2newunit"); |
$display("ccl2rbspbyte %h", rdata); |
outfifo.enq(NewUnit rdata); |
outfifo_ITB.enq(NewUnit rdata); |
end |
tagged EndOfFile : state <= Start; |
endcase |
endrule |
|
|
rule fillextrabuffer (state != Start |
&& state != NewUnit |
&& extrabufcount < 4 |
&& extraendnalflag == 0); |
if(infifo.first() matches tagged RbspByte .dbyte) |
begin |
case ( extrabufcount ) |
0: extrabuffer <= {dbyte, extrabuffer[23:0]}; |
1: extrabuffer <= {extrabuffer[31:24],dbyte,extrabuffer[15:0]}; |
2: extrabuffer <= {extrabuffer[31:16],dbyte,extrabuffer[7:0]}; |
3: extrabuffer <= {extrabuffer[31:8],dbyte}; |
default: $display( "ERROR EntropyDec: fillextrabuffer default case_" ); |
endcase |
extrabufcount <= extrabufcount + 1; |
infifo.deq(); |
//$display( "TRACE EntropyDec: fillextrabuffer RbspByte %h %h %h", dbyte, extrabufcount, extrabuffer); |
end |
else |
begin |
if(extrabufcount != 0) |
extraendnalflag <= 1; |
//$display( "TRACE EntropyDec: fillextrabuffer else %h", extrabufcount); |
end |
endrule |
|
|
rule fillbuffer (state != Start |
&& state != NewUnit |
&& bufcount<=truncate(buffersize-32) |
&& (extrabufcount == 4 || extraendnalflag == 1) |
&& endnalflag == 0);//predicate not sure |
Buffer temp = zeroExtend(extrabuffer); |
Bufcount temp2 = truncate(buffersize)-bufcount-32; |
buffer <= (buffer | (temp << zeroExtend(temp2))); |
case ( extrabufcount ) |
4: bufcount <= bufcount+32; |
3: bufcount <= bufcount+24; |
2: bufcount <= bufcount+16; |
1: bufcount <= bufcount+8; |
default: $display( "ERROR EntropyDec: fillbuffer default case" ); |
endcase |
extrabuffer <= 0; |
extrabufcount <= 0; |
if(infifo.first()==NewUnit || infifo.first()==EndOfFile) |
endnalflag <= 1; |
//$display( "TRACE EntropyDec: fillbuffer RbspByte %h %h %h %h %h %h %h %h", extrabufcount, bufcount, extrabuffer, temp, temp2, (temp << zeroExtend(temp2)), buffer, (buffer | (temp << zeroExtend(temp2)))); |
endrule |
|
|
rule parser (state != Start |
&&& state != NewUnit |
&&& (bufcount > truncate(buffersize-32) || endnalflag == 1));//predicate not sure |
//$display( "TRACE EntropyDec: fillbuffer RbspByte %h %h", bufcount, buffer ); |
|
Bufcount numbitsused = 0; |
State nextstate = Start; |
Int#(16) tempint = 0; |
Int#(32) tempint32 = 0; |
|
case ( state ) matches |
tagged CodedSlice .step : |
begin |
case ( step ) |
0: |
begin |
$display( "ccl2SHfirst_mb_in_slice %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SHfirst_mb_in_slice truncate(expgolomb_unsigned(buffer))); |
currMbAddr <= truncate(expgolomb_unsigned(buffer)); |
calcnc.initialize(truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = CodedSlice 1; |
end |
1: |
begin |
$display( "ccl2SHslice_type %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SHslice_type truncate(expgolomb_unsigned(buffer))); |
shslice_type <= truncate(expgolomb_unsigned(buffer)); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = CodedSlice 2; |
end |
2: |
begin |
$display( "ccl2SHpic_parameter_set_id %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SHpic_parameter_set_id truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = CodedSlice 3; |
if(ppspic_parameter_set_id != truncate(expgolomb_unsigned(buffer))) $display( "ERROR EntropyDec: pic_parameter_set_id don't match" ); |
end |
3: |
begin |
Bit#(16) tttt = buffer[buffersize-1:buffersize-16]; |
tttt = tttt >> 16 - zeroExtend(spslog2_max_frame_num); |
$display( "ccl2SHframe_num %0d", tttt ); |
outfifo.enq(SHframe_num tttt); |
numbitsused = zeroExtend(spslog2_max_frame_num); |
nextstate = CodedSlice 4; |
end |
4: |
begin |
if(nalunittype == 5) |
begin |
$display( "ccl2SHidr_pic_id %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SHidr_pic_id truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
end |
nextstate = CodedSlice 5; |
end |
5: |
begin |
if(spspic_order_cnt_type == 0) |
begin |
Bit#(16) tttt = buffer[buffersize-1:buffersize-16]; |
tttt = tttt >> 16 - zeroExtend(spslog2_max_pic_order_cnt_lsb); |
$display( "ccl2SHpic_order_cnt_lsb %0d", tttt ); |
outfifo.enq(SHpic_order_cnt_lsb tttt); |
numbitsused = zeroExtend(spslog2_max_pic_order_cnt_lsb); |
nextstate = CodedSlice 6; |
end |
else |
nextstate = CodedSlice 7; |
end |
6: |
begin |
if(ppspic_order_present_flag == 1) |
begin |
if(egnumbits == 0) |
begin |
Bufcount tempbufcount = expgolomb_numbits32(buffer); |
egnumbits <= tempbufcount; |
numbitsused = tempbufcount-1; |
nextstate = CodedSlice 6; |
end |
else |
begin |
tempint32 = unpack(expgolomb_signed32(buffer,egnumbits)); |
$display( "ccl2SHdelta_pic_order_cnt_bottom %0d", tempint32 ); |
outfifo.enq(SHdelta_pic_order_cnt_bottom truncate(expgolomb_signed32(buffer,egnumbits))); |
egnumbits <= 0; |
numbitsused = egnumbits; |
nextstate = CodedSlice 7; |
end |
end |
else |
nextstate = CodedSlice 7; |
end |
7: |
begin |
if(spspic_order_cnt_type == 1 && spsdelta_pic_order_always_zero_flag == 0) |
begin |
if(egnumbits == 0) |
begin |
Bufcount tempbufcount = expgolomb_numbits32(buffer); |
egnumbits <= tempbufcount; |
numbitsused = tempbufcount-1; |
nextstate = CodedSlice 7; |
end |
else |
begin |
tempint32 = unpack(expgolomb_signed32(buffer,egnumbits)); |
$display( "ccl2SHdelta_pic_order_cnt0 %0d", tempint32 ); |
outfifo.enq(SHdelta_pic_order_cnt0 truncate(expgolomb_signed32(buffer,egnumbits))); |
egnumbits <= 0; |
numbitsused = egnumbits; |
nextstate = CodedSlice 8; |
end |
end |
else |
nextstate = CodedSlice 9; |
end |
8: |
begin |
if(ppspic_order_present_flag == 1) |
begin |
if(egnumbits == 0) |
begin |
Bufcount tempbufcount = expgolomb_numbits32(buffer); |
egnumbits <= tempbufcount; |
numbitsused = tempbufcount-1; |
nextstate = CodedSlice 8; |
end |
else |
begin |
tempint32 = unpack(expgolomb_signed32(buffer,egnumbits)); |
$display( "ccl2SHdelta_pic_order_cnt1 %0d", tempint32 ); |
outfifo.enq(SHdelta_pic_order_cnt1 truncate(expgolomb_signed32(buffer,egnumbits))); |
egnumbits <= 0; |
numbitsused = egnumbits; |
nextstate = CodedSlice 9; |
end |
end |
else |
nextstate = CodedSlice 9; |
end |
9: |
begin |
if(shslice_type == 0 || shslice_type == 5) |
begin |
$display( "ccl2SHnum_ref_idx_active_override_flag %0d", buffer[buffersize-1] ); |
outfifo.enq(SHnum_ref_idx_active_override_flag buffer[buffersize-1]); |
numbitsused = 1; |
if(buffer[buffersize-1] == 1) |
nextstate = CodedSlice 10; |
else |
nextstate = CodedSlice 11; |
end |
else |
nextstate = CodedSlice 11; |
end |
10: |
begin |
$display( "ccl2SHnum_ref_idx_l0_active %0d", expgolomb_unsigned(buffer)+1 ); |
outfifo.enq(SHnum_ref_idx_l0_active truncate(expgolomb_unsigned(buffer)+1)); |
num_ref_idx_l0_active_minus1 <= truncate(expgolomb_unsigned(buffer)); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = CodedSlice 11; |
end |
11: |
begin |
if(shslice_type != 2 && shslice_type != 7) |
begin |
$display( "ccl2SHRref_pic_list_reordering_flag_l0 %0d", buffer[buffersize-1] ); |
outfifo.enq(SHRref_pic_list_reordering_flag_l0 buffer[buffersize-1]); |
numbitsused = 1; |
if(buffer[buffersize-1] == 1) |
nextstate = CodedSlice 12; |
else |
nextstate = CodedSlice 15; |
end |
else |
nextstate = CodedSlice 15; |
end |
12: |
begin |
$display( "ccl2SHRreordering_of_pic_nums_idc %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SHRreordering_of_pic_nums_idc truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
if(expgolomb_unsigned(buffer)==0 || expgolomb_unsigned(buffer)==1) |
nextstate = CodedSlice 13; |
else if(expgolomb_unsigned(buffer)==2) |
nextstate = CodedSlice 14; |
else |
nextstate = CodedSlice 15; |
end |
13: |
begin |
Bit#(17) temp17 = zeroExtend(expgolomb_unsigned(buffer)) + 1; |
$display( "ccl2SHRabs_diff_pic_num %0d", temp17 ); |
outfifo.enq(SHRabs_diff_pic_num temp17); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = CodedSlice 12; |
end |
14: |
begin |
$display( "ccl2SHRlong_term_pic_num %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SHRlong_term_pic_num truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = CodedSlice 12; |
end |
15: |
begin |
if(nalrefidc == 0) |
nextstate = CodedSlice 23; |
else |
begin |
if(nalunittype == 5) |
begin |
$display( "ccl2SHDno_output_of_prior_pics_flag %0d", buffer[buffersize-1] ); |
outfifo.enq(SHDno_output_of_prior_pics_flag buffer[buffersize-1]); |
numbitsused = 1; |
nextstate = CodedSlice 16; |
end |
else |
nextstate = CodedSlice 17; |
end |
end |
16: |
begin |
$display( "ccl2SHDlong_term_reference_flag %0d", buffer[buffersize-1] ); |
outfifo.enq(SHDlong_term_reference_flag buffer[buffersize-1]); |
numbitsused = 1; |
nextstate = CodedSlice 23; |
end |
17: |
begin |
$display( "ccl2SHDadaptive_ref_pic_marking_mode_flag %0d", buffer[buffersize-1] ); |
outfifo.enq(SHDadaptive_ref_pic_marking_mode_flag buffer[buffersize-1]); |
numbitsused = 1; |
if(buffer[buffersize-1] == 1) |
nextstate = CodedSlice 18; |
else |
nextstate = CodedSlice 23; |
end |
18: |
begin |
$display( "ccl2SHDmemory_management_control_operation %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SHDmemory_management_control_operation truncate(expgolomb_unsigned(buffer))); |
shdmemory_management_control_operation <= truncate(expgolomb_unsigned(buffer)); |
numbitsused = expgolomb_numbits(buffer); |
if(expgolomb_unsigned(buffer)!=0) |
nextstate = CodedSlice 19; |
else |
nextstate = CodedSlice 23; |
end |
19: |
begin |
if(shdmemory_management_control_operation==1 || shdmemory_management_control_operation==3) |
begin |
Bit#(17) temp17 = zeroExtend(expgolomb_unsigned(buffer)) + 1; |
$display( "ccl2SHDdifference_of_pic_nums %0d", temp17 ); |
outfifo.enq(SHDdifference_of_pic_nums temp17); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = CodedSlice 20; |
end |
else |
nextstate = CodedSlice 20; |
end |
20: |
begin |
if(shdmemory_management_control_operation==2) |
begin |
$display( "ccl2SHDlong_term_pic_num %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SHDlong_term_pic_num truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = CodedSlice 21; |
end |
else |
nextstate = CodedSlice 21; |
end |
21: |
begin |
if(shdmemory_management_control_operation==3 || shdmemory_management_control_operation==6) |
begin |
$display( "ccl2SHDlong_term_frame_idx %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SHDlong_term_frame_idx truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = CodedSlice 22; |
end |
else |
nextstate = CodedSlice 22; |
end |
22: |
begin |
if(shdmemory_management_control_operation==4) |
begin |
$display( "ccl2SHDmax_long_term_frame_idx_plus1 %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SHDmax_long_term_frame_idx_plus1 truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = CodedSlice 18; |
end |
else |
nextstate = CodedSlice 18; |
end |
23: |
begin |
tempint = unpack(expgolomb_signed(buffer)); |
$display( "ccl2SHslice_qp_delta %0d", tempint ); |
outfifo_ITB.enq(SHslice_qp_delta truncate(expgolomb_signed(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = CodedSlice 24; |
end |
24: |
begin |
if(ppsdeblocking_filter_control_present_flag==1) |
begin |
$display( "ccl2SHdisable_deblocking_filter_idc %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SHdisable_deblocking_filter_idc truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
if(expgolomb_unsigned(buffer)!=1) |
nextstate = CodedSlice 25; |
else |
nextstate = CodedSlice 27; |
end |
else |
nextstate = CodedSlice 27; |
end |
25: |
begin |
tempint = unpack(expgolomb_signed(buffer) << 1); |
$display( "ccl2SHslice_alpha_c0_offset %0d", tempint ); |
outfifo.enq(SHslice_alpha_c0_offset truncate(expgolomb_signed(buffer) << 1)); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = CodedSlice 26; |
end |
26: |
begin |
tempint = unpack(expgolomb_signed(buffer) << 1); |
$display( "ccl2SHslice_beta_offset %0d", tempint ); |
outfifo.enq(SHslice_beta_offset truncate(expgolomb_signed(buffer) << 1)); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = CodedSlice 27; |
end |
27: |
begin |
nextstate = SliceData 0; |
end |
default: $display( "ERROR EntropyDec: CodedSlice default step" ); |
endcase |
end |
tagged SEI .step : |
begin |
nextstate = Start; |
$display( "INFO EntropyDec: SEI data thrown away" ); |
end |
tagged SPS .step : |
begin |
case ( step ) |
0: |
begin |
Bit#(8) outputdata = buffer[buffersize-1:buffersize-8]; |
$display( "INFO EntropyDec: profile_idc = %d", outputdata ); |
outputdata = buffer[buffersize-9:buffersize-16]; |
$display( "INFO EntropyDec: constraint_set = %b", outputdata ); |
outputdata = buffer[buffersize-17:buffersize-24]; |
$display( "INFO EntropyDec: level_idc = %d", outputdata ); |
numbitsused = 24; |
nextstate = SPS 1; |
end |
1: |
begin |
$display( "ccl2SPSseq_parameter_set_id %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SPSseq_parameter_set_id truncate(expgolomb_unsigned(buffer))); |
spsseq_parameter_set_id <= truncate(expgolomb_unsigned(buffer)); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SPS 2; |
end |
2: |
begin |
$display( "ccl2SPSlog2_max_frame_num %0d", expgolomb_unsigned(buffer)+4 ); |
outfifo.enq(SPSlog2_max_frame_num truncate(expgolomb_unsigned(buffer)+4)); |
spslog2_max_frame_num <= truncate(expgolomb_unsigned(buffer)+4); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SPS 3; |
end |
3: |
begin |
let tttt = expgolomb_unsigned(buffer); |
$display( "ccl2SPSpic_order_cnt_type %0d", tttt ); |
outfifo.enq(SPSpic_order_cnt_type truncate(tttt)); |
spspic_order_cnt_type <= truncate(tttt); |
numbitsused = expgolomb_numbits(buffer); |
if(tttt == 0) |
nextstate = SPS 4; |
else if(tttt == 1) |
nextstate = SPS 5; |
else |
nextstate = SPS 10; |
end |
4: |
begin |
$display( "ccl2SPSlog2_max_pic_order_cnt_lsb %0d", expgolomb_unsigned(buffer)+4 ); |
outfifo.enq(SPSlog2_max_pic_order_cnt_lsb truncate(expgolomb_unsigned(buffer)+4)); |
spslog2_max_pic_order_cnt_lsb <= truncate(expgolomb_unsigned(buffer)+4); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SPS 10; |
end |
5: |
begin |
$display( "ccl2SPSdelta_pic_order_always_zero_flag %0d", buffer[buffersize-1] ); |
outfifo.enq(SPSdelta_pic_order_always_zero_flag buffer[buffersize-1]); |
spsdelta_pic_order_always_zero_flag <= buffer[buffersize-1]; |
numbitsused = 1; |
nextstate = SPS 6; |
end |
6: |
begin |
if(egnumbits == 0) |
begin |
Bufcount tempbufcount = expgolomb_numbits32(buffer); |
egnumbits <= tempbufcount; |
numbitsused = tempbufcount-1; |
nextstate = SPS 6; |
end |
else |
begin |
tempint32 = unpack(expgolomb_signed32(buffer,egnumbits)); |
$display( "ccl2SPSoffset_for_non_ref_pic %0d", tempint32 ); |
outfifo.enq(SPSoffset_for_non_ref_pic truncate(expgolomb_signed32(buffer,egnumbits))); |
egnumbits <= 0; |
numbitsused = egnumbits; |
nextstate = SPS 7; |
end |
end |
7: |
begin |
if(egnumbits == 0) |
begin |
Bufcount tempbufcount = expgolomb_numbits32(buffer); |
egnumbits <= tempbufcount; |
numbitsused = tempbufcount-1; |
nextstate = SPS 7; |
end |
else |
begin |
tempint32 = unpack(expgolomb_signed32(buffer,egnumbits)); |
$display( "ccl2SPSoffset_for_top_to_bottom_field %0d", tempint32 ); |
outfifo.enq(SPSoffset_for_top_to_bottom_field truncate(expgolomb_signed32(buffer,egnumbits))); |
egnumbits <= 0; |
numbitsused = egnumbits; |
nextstate = SPS 8; |
end |
end |
8: |
begin |
$display( "ccl2SPSnum_ref_frames_in_pic_order_cnt_cycle %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SPSnum_ref_frames_in_pic_order_cnt_cycle truncate(expgolomb_unsigned(buffer))); |
spsnum_ref_frames_in_pic_order_cnt_cycle <= truncate(expgolomb_unsigned(buffer)); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SPS 9; |
end |
9: |
begin |
if(spsnum_ref_frames_in_pic_order_cnt_cycle == 0) |
nextstate = SPS 10; |
else |
begin |
if(egnumbits == 0) |
begin |
Bufcount tempbufcount = expgolomb_numbits32(buffer); |
egnumbits <= tempbufcount; |
numbitsused = tempbufcount-1; |
nextstate = SPS 9; |
end |
else |
begin |
tempint32 = unpack(expgolomb_signed32(buffer,egnumbits)); |
$display( "ccl2SPSoffset_for_ref_frame %0d", tempint32 ); |
outfifo.enq(SPSoffset_for_ref_frame truncate(expgolomb_signed32(buffer,egnumbits))); |
egnumbits <= 0; |
spsnum_ref_frames_in_pic_order_cnt_cycle <= spsnum_ref_frames_in_pic_order_cnt_cycle - 1; |
numbitsused = egnumbits; |
nextstate = SPS 9; |
end |
end |
end |
10: |
begin |
$display( "ccl2SPSnum_ref_frames %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SPSnum_ref_frames truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SPS 11; |
end |
11: |
begin |
$display( "ccl2SPSgaps_in_frame_num_allowed_flag %0d", buffer[buffersize-1] ); |
outfifo.enq(SPSgaps_in_frame_num_allowed_flag buffer[buffersize-1]); |
numbitsused = 1; |
nextstate = SPS 12; |
end |
12: |
begin |
$display( "ccl2SPSpic_width_in_mbs %0d", expgolomb_unsigned(buffer)+1 ); |
outfifo.enq(SPSpic_width_in_mbs truncate(expgolomb_unsigned(buffer)+1)); |
calcnc.initialize_picWidth(truncate(expgolomb_unsigned(buffer)+1)); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SPS 13; |
end |
13: |
begin |
$display( "ccl2SPSpic_height_in_map_units %0d", expgolomb_unsigned(buffer)+1 ); |
outfifo.enq(SPSpic_height_in_map_units truncate(expgolomb_unsigned(buffer)+1)); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SPS 14; |
end |
14: |
begin |
//SPSframe_mbs_only_flag = 1 for baseline |
numbitsused = 1; |
nextstate = SPS 15; |
end |
15: |
begin |
$display( "ccl2SPSdirect_8x8_inference_flag %0d", buffer[buffersize-1] ); |
outfifo.enq(SPSdirect_8x8_inference_flag buffer[buffersize-1]); |
numbitsused = 1; |
nextstate = SPS 16; |
end |
16: |
begin |
$display( "ccl2SPSframe_cropping_flag %0d", buffer[buffersize-1] ); |
outfifo.enq(SPSframe_cropping_flag buffer[buffersize-1]); |
numbitsused = 1; |
if(buffer[buffersize-1] == 1) |
nextstate = SPS 17; |
else |
nextstate = SPS 21; |
end |
17: |
begin |
$display( "ccl2SPSframe_crop_left_offset %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SPSframe_crop_left_offset truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SPS 18; |
end |
18: |
begin |
$display( "ccl2SPSframe_crop_right_offset %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SPSframe_crop_right_offset truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SPS 19; |
end |
19: |
begin |
$display( "ccl2SPSframe_crop_top_offset %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SPSframe_crop_top_offset truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SPS 20; |
end |
20: |
begin |
$display( "ccl2SPSframe_crop_bottom_offset %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SPSframe_crop_bottom_offset truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SPS 21; |
end |
21: |
begin |
nextstate = Start; |
$display( "INFO EntropyDec:VUI data thrown away" ); |
end |
default: $display( "ERROR EntropyDec: SPS default step" ); |
endcase |
end |
tagged PPS .step : |
begin |
case ( step ) |
0: |
begin |
ppspic_parameter_set_id <= truncate(expgolomb_unsigned(buffer)); |
$display( "ccl2PPSpic_parameter_set_id %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(PPSpic_parameter_set_id truncate(expgolomb_unsigned(buffer))); |
outfifo_ITB.enq(PPSpic_parameter_set_id truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = PPS 1; |
end |
1: |
begin |
$display( "ccl2PPSseq_parameter_set_id %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(PPSseq_parameter_set_id truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = PPS 2; |
if(spsseq_parameter_set_id != truncate(expgolomb_unsigned(buffer))) |
$display( "ERROR EntropyDec: seq_parameter_set_id don't match" ); |
end |
2: |
begin |
//PPSentropy_coding_mode_flag = 0 for baseline |
numbitsused = 1; |
nextstate = PPS 3; |
end |
3: |
begin |
ppspic_order_present_flag <= buffer[buffersize-1]; |
$display( "ccl2PPSpic_order_present_flag %0d", buffer[buffersize-1] ); |
outfifo.enq(PPSpic_order_present_flag buffer[buffersize-1]); |
numbitsused = 1; |
nextstate = PPS 4; |
end |
4: |
begin |
numbitsused = expgolomb_numbits(buffer); |
nextstate = PPS 5; |
if(expgolomb_unsigned(buffer)+1 != 1) |
$display( "ERROR EntropyDec: PPSnum_slice_groups not equal to 1" );//=1 for main |
end |
5: |
begin |
$display( "ccl2PPSnum_ref_idx_l0_active %0d", expgolomb_unsigned(buffer)+1 ); |
outfifo.enq(PPSnum_ref_idx_l0_active truncate(expgolomb_unsigned(buffer)+1)); |
num_ref_idx_l0_active_minus1 <= truncate(expgolomb_unsigned(buffer)); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = PPS 6; |
end |
6: |
begin |
$display( "ccl2PPSnum_ref_idx_l1_active %0d", expgolomb_unsigned(buffer)+1 ); |
outfifo.enq(PPSnum_ref_idx_l1_active truncate(expgolomb_unsigned(buffer)+1)); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = PPS 7; |
end |
7: |
begin |
//PPSweighted_pred_flag = 0 for baseline; PPSweighted_bipred_idc = 0 for baseline |
numbitsused = 3; |
nextstate = PPS 8; |
end |
8: |
begin |
$display( "ccl2PPSpic_init_qp %0d", expgolomb_signed(buffer)+26 ); |
outfifo_ITB.enq(PPSpic_init_qp truncate(expgolomb_signed(buffer)+26)); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = PPS 9; |
end |
9: |
begin |
$display( "ccl2PPSpic_init_qs %0d", expgolomb_signed(buffer)+26 ); |
outfifo_ITB.enq(PPSpic_init_qs truncate(expgolomb_signed(buffer)+26)); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = PPS 10; |
end |
10: |
begin |
tempint = unpack(expgolomb_signed(buffer)); |
$display( "ccl2PPSchroma_qp_index_offset %0d", tempint ); |
outfifo_ITB.enq(PPSchroma_qp_index_offset truncate(expgolomb_signed(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = PPS 11; |
end |
11: |
begin |
ppsdeblocking_filter_control_present_flag <= buffer[buffersize-1]; |
$display( "ccl2PPSdeblocking_filter_control_present_flag %0d", buffer[buffersize-1] ); |
outfifo.enq(PPSdeblocking_filter_control_present_flag buffer[buffersize-1]); |
numbitsused = 1; |
nextstate = PPS 12; |
end |
12: |
begin |
$display( "ccl2PPSconstrained_intra_pred_flag %0d", buffer[buffersize-1] ); |
outfifo.enq(PPSconstrained_intra_pred_flag buffer[buffersize-1]); |
numbitsused = 1; |
nextstate = PPS 13; |
end |
13: |
begin |
//PPSredundant_pic_cnt_present_flag = 0 for main |
numbitsused = 1; |
nextstate = PPS 14; |
if(buffer[buffersize-1] != 0) |
$display( "ERROR EntropyDec: PPSredundant_pic_cnt_present_flag not equal to 0" );//=0 for main |
end |
14: |
begin |
nextstate = Start; |
end |
default: $display( "ERROR EntropyDec: PPS default step" ); |
endcase |
end |
tagged AUD .step : |
begin |
outfifo.enq(AUDPrimaryPicType buffer[buffersize-1:buffersize-3]); |
numbitsused = 3; |
nextstate = Start; |
end |
tagged EndSequence : |
begin |
outfifo.enq(EndOfSequence); |
nextstate = Start; |
end |
tagged EndStream : |
begin |
outfifo.enq(EndOfStream); |
nextstate = Start; |
end |
tagged Filler : |
begin |
nextstate = Start; |
end |
tagged SliceData .step : |
begin |
case ( step ) |
0: |
begin |
if( shslice_type!=2 && shslice_type!=7 ) |
begin |
$display( "ccl2SDmb_skip_run %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SDmb_skip_run truncate(expgolomb_unsigned(buffer))); |
tempreg <= truncate(expgolomb_unsigned(buffer)); |
calcnc.nNupdate_pskip( truncate(expgolomb_unsigned(buffer)) ); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SliceData 1; |
end |
else |
nextstate = SliceData 2; |
end |
1: |
begin |
if( tempreg>0 ) |
begin |
currMbAddr <= currMbAddr+1;//only because input assumed to comform to both baseline and main |
tempreg <= tempreg-1; |
nextstate = SliceData 1; |
end |
else |
begin |
////$display( "ccl2SDcurrMbAddr %0d", currMbAddr ); |
////outfifo.enq(SDcurrMbAddr currMbAddr); |
nextstate = SliceData 2; |
end |
end |
2: |
begin |
if( bufcount>8 || buffer[buffersize-1]!=1 || (buffer<<1)!=0 ) |
begin |
calcnc.loadMb(currMbAddr); |
nextstate = MacroblockLayer 0; |
end |
else |
nextstate = SliceData 3; |
end |
3: |
begin |
currMbAddr <= currMbAddr+1;//only because input assumed to comform to both baseline and main |
if( bufcount>8 || buffer[buffersize-1]!=1 || (buffer<<1)!=0 ) |
nextstate = SliceData 0; |
else |
nextstate = Start; |
end |
default: $display( "ERROR EntropyDec: SliceData default step" ); |
endcase |
end |
tagged MacroblockLayer .step : //return to SliceData 3 |
begin |
case ( step ) |
0: |
begin |
$display( "ccl2SDMmb_type %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SDMmbtype mbtype_convert(truncate(expgolomb_unsigned(buffer)), shslice_type) ); |
outfifo_ITB.enq(SDMmbtype mbtype_convert(truncate(expgolomb_unsigned(buffer)), shslice_type) ); |
sdmmbtype <= mbtype_convert(truncate(expgolomb_unsigned(buffer)), shslice_type); |
numbitsused = expgolomb_numbits(buffer); |
if(mbtype_convert(truncate(expgolomb_unsigned(buffer)), shslice_type) == I_PCM) |
begin |
calcnc.nNupdate_ipcm(); |
nextstate = MacroblockLayer 1; |
end |
else |
nextstate = MacroblockLayer 4; |
end |
1: |
begin |
tempreg <= 256; |
numbitsused = zeroExtend(bufcount[2:0]); |
nextstate = MacroblockLayer 2; |
end |
2: |
begin |
if( tempreg>0 ) |
begin |
Bit#(8) outputdata = buffer[buffersize-1:buffersize-8]; |
$display( "ccl2SDMpcm_sample_luma %0d", outputdata ); |
outfifo.enq(SDMpcm_sample_luma outputdata); |
tempreg <= tempreg-1; |
numbitsused = 8; |
nextstate = MacroblockLayer 2; |
end |
else |
begin |
tempreg <= 128; |
nextstate = MacroblockLayer 3; |
end |
end |
3: |
begin |
if( tempreg>0 ) |
begin |
Bit#(8) outputdata = buffer[buffersize-1:buffersize-8]; |
$display( "ccl2SDMpcm_sample_chroma %0d", outputdata ); |
outfifo.enq(SDMpcm_sample_chroma outputdata); |
tempreg <= tempreg-1; |
numbitsused = 8; |
nextstate = MacroblockLayer 3; |
end |
else |
nextstate = SliceData 3; |
end |
4: |
begin |
if(sdmmbtype != I_NxN |
&&& mbPartPredMode(sdmmbtype,0) != Intra_16x16 |
&&& numMbPart(sdmmbtype) == 4) |
nextstate = SubMbPrediction 0; |
else |
nextstate = MbPrediction 0; |
end |
5: |
begin |
if(mbPartPredMode(sdmmbtype,0) != Intra_16x16) |
begin |
$display( "ccl2SDMcoded_block_pattern %0d", expgolomb_coded_block_pattern(buffer,sdmmbtype) ); |
////outfifo.enq(SDMcoded_block_pattern expgolomb_coded_block_pattern(buffer,sdmmbtype)); |
sdmcodedBlockPatternLuma <= expgolomb_coded_block_pattern(buffer,sdmmbtype)[3:0]; |
sdmcodedBlockPatternChroma <= expgolomb_coded_block_pattern(buffer,sdmmbtype)[5:4]; |
numbitsused = expgolomb_numbits(buffer); |
end |
else |
begin |
if(sdmmbtype matches tagged I_16x16 {intra16x16PredMode:.tempv1, codedBlockPatternChroma:.tempv2, codedBlockPatternLuma:.tempv3}) |
begin |
sdmcodedBlockPatternLuma <= {tempv3,tempv3,tempv3,tempv3}; |
sdmcodedBlockPatternChroma <= tempv2; |
end |
else |
$display( "ERROR EntropyDec: MacroblockLayer 5 sdmmbtype not I_16x16" ); |
end |
nextstate = MacroblockLayer 6; |
end |
6: |
begin |
if(sdmcodedBlockPatternLuma > 0 |
|| sdmcodedBlockPatternChroma > 0 |
|| mbPartPredMode(sdmmbtype,0) == Intra_16x16) |
begin |
tempint = unpack(expgolomb_signed(buffer)); |
$display( "ccl2SDMmb_qp_delta %0d", tempint ); |
outfifo_ITB.enq(SDMmb_qp_delta truncate(expgolomb_signed(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = Residual 0; |
end |
else |
nextstate = Residual 0; |
end |
default: $display( "ERROR EntropyDec: MacroblockLayer default step" ); |
endcase |
end |
tagged MbPrediction .step : //return to MacroblockLayer 5 |
begin |
case ( step ) |
0: |
begin |
if(mbPartPredMode(sdmmbtype,0) == Intra_16x16) |
begin |
$display( "ccl2SDMMintra_chroma_pred_mode %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SDMMintra_chroma_pred_mode truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = MacroblockLayer 5; |
end |
else if(mbPartPredMode(sdmmbtype,0) == Intra_4x4) |
begin |
temp5bit <= 16; |
nextstate = MbPrediction 1; |
end |
else if(num_ref_idx_l0_active_minus1 > 0) |
begin |
temp3bit0 <= numMbPart(sdmmbtype); |
nextstate = MbPrediction 2; |
end |
else |
begin |
temp3bit0 <= numMbPart(sdmmbtype); |
nextstate = MbPrediction 3; |
end |
end |
1: |
begin |
if(temp5bit == 0) |
begin |
$display( "ccl2SDMMintra_chroma_pred_mode %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SDMMintra_chroma_pred_mode truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = MacroblockLayer 5; |
end |
else |
begin |
////$display( "ccl2SDMMprev_intra4x4_pred_mode_flag %0d", buffer[buffersize-1] ); |
if(buffer[buffersize-1] == 0) |
begin |
Bit#(4) tttt = buffer[buffersize-1:buffersize-4]; |
$display( "ccl2SDMMrem_intra4x4_pred_mode %0d", tttt ); |
outfifo.enq(SDMMrem_intra4x4_pred_mode tttt); |
numbitsused = 4; |
end |
else |
begin |
outfifo.enq(SDMMrem_intra4x4_pred_mode 4'b1000); |
numbitsused = 1; |
end |
temp5bit <= temp5bit-1; |
nextstate = MbPrediction 1; |
end |
end |
2: |
begin |
if(num_ref_idx_l0_active_minus1 == 1) |
begin |
$display( "ccl2SDMMref_idx_l0 %0d", 1-buffer[buffersize-1] ); |
outfifo.enq(SDMMref_idx_l0 zeroExtend(1-buffer[buffersize-1])); |
numbitsused = 1; |
end |
else |
begin |
$display( "ccl2SDMMref_idx_l0 %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SDMMref_idx_l0 truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
end |
if(temp3bit0 == 1) |
begin |
temp3bit0 <= numMbPart(sdmmbtype); |
nextstate = MbPrediction 3; |
end |
else |
begin |
temp3bit0 <= temp3bit0-1; |
nextstate = MbPrediction 2; |
end |
end |
3: |
begin |
tempint = unpack(expgolomb_signed(buffer)); |
$display( "ccl2SDMMmvd_l0 %0d", tempint ); |
outfifo.enq(SDMMmvd_l0 truncate(expgolomb_signed(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = MbPrediction 4; |
end |
4: |
begin |
tempint = unpack(expgolomb_signed(buffer)); |
$display( "ccl2SDMMmvd_l0 %0d", tempint ); |
outfifo.enq(SDMMmvd_l0 truncate(expgolomb_signed(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
temp3bit0 <= temp3bit0-1; |
if(temp3bit0 == 1) |
nextstate = MacroblockLayer 5; |
else |
nextstate = MbPrediction 3; |
end |
default: $display( "ERROR EntropyDec: MbPrediction default step" ); |
endcase |
end |
tagged SubMbPrediction .step : //return to MacroblockLayer 5 |
begin |
case ( step ) |
0: |
begin |
$display( "ccl2SDMSsub_mb_type %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SDMSsub_mb_type truncate(expgolomb_unsigned(buffer))); |
temp3bit0 <= numSubMbPart(truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SubMbPrediction 1; |
end |
1: |
begin |
$display( "ccl2SDMSsub_mb_type %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SDMSsub_mb_type truncate(expgolomb_unsigned(buffer))); |
temp3bit1 <= numSubMbPart(truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SubMbPrediction 2; |
end |
2: |
begin |
$display( "ccl2SDMSsub_mb_type %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SDMSsub_mb_type truncate(expgolomb_unsigned(buffer))); |
temp3bit2 <= numSubMbPart(truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SubMbPrediction 3; |
end |
3: |
begin |
$display( "ccl2SDMSsub_mb_type %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SDMSsub_mb_type truncate(expgolomb_unsigned(buffer))); |
temp3bit3 <= numSubMbPart(truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
if(num_ref_idx_l0_active_minus1 > 0 |
&& sdmmbtype != P_8x8ref0) |
nextstate = SubMbPrediction 4; |
else |
nextstate = SubMbPrediction 8; |
end |
4: |
begin |
if(num_ref_idx_l0_active_minus1 == 1) |
begin |
$display( "ccl2SDMSref_idx_l0 %0d", 1-buffer[buffersize-1] ); |
outfifo.enq(SDMSref_idx_l0 zeroExtend(1-buffer[buffersize-1])); |
numbitsused = 1; |
end |
else |
begin |
$display( "ccl2SDMSref_idx_l0 %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SDMSref_idx_l0 truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
end |
nextstate = SubMbPrediction 5; |
end |
5: |
begin |
if(num_ref_idx_l0_active_minus1 == 1) |
begin |
$display( "ccl2SDMSref_idx_l0 %0d", 1-buffer[buffersize-1] ); |
outfifo.enq(SDMSref_idx_l0 zeroExtend(1-buffer[buffersize-1])); |
numbitsused = 1; |
end |
else |
begin |
$display( "ccl2SDMSref_idx_l0 %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SDMSref_idx_l0 truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
end |
nextstate = SubMbPrediction 6; |
end |
6: |
begin |
if(num_ref_idx_l0_active_minus1 == 1) |
begin |
$display( "ccl2SDMSref_idx_l0 %0d", 1-buffer[buffersize-1] ); |
outfifo.enq(SDMSref_idx_l0 zeroExtend(1-buffer[buffersize-1])); |
numbitsused = 1; |
end |
else |
begin |
$display( "ccl2SDMSref_idx_l0 %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SDMSref_idx_l0 truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
end |
nextstate = SubMbPrediction 7; |
end |
7: |
begin |
if(num_ref_idx_l0_active_minus1 == 1) |
begin |
$display( "ccl2SDMSref_idx_l0 %0d", 1-buffer[buffersize-1] ); |
outfifo.enq(SDMSref_idx_l0 zeroExtend(1-buffer[buffersize-1])); |
numbitsused = 1; |
end |
else |
begin |
$display( "ccl2SDMSref_idx_l0 %0d", expgolomb_unsigned(buffer) ); |
outfifo.enq(SDMSref_idx_l0 truncate(expgolomb_unsigned(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
end |
nextstate = SubMbPrediction 8; |
end |
8: |
begin |
tempint = unpack(expgolomb_signed(buffer)); |
$display( "ccl2SDMSmvd_l0 %0d", tempint ); |
outfifo.enq(SDMSmvd_l0 truncate(expgolomb_signed(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SubMbPrediction 9; |
end |
9: |
begin |
tempint = unpack(expgolomb_signed(buffer)); |
$display( "ccl2SDMSmvd_l0 %0d", tempint ); |
outfifo.enq(SDMSmvd_l0 truncate(expgolomb_signed(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
temp3bit0 <= temp3bit0-1; |
if(temp3bit0 == 1) |
nextstate = SubMbPrediction 10; |
else |
nextstate = SubMbPrediction 8; |
end |
10: |
begin |
tempint = unpack(expgolomb_signed(buffer)); |
$display( "ccl2SDMSmvd_l0 %0d", tempint ); |
outfifo.enq(SDMSmvd_l0 truncate(expgolomb_signed(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SubMbPrediction 11; |
end |
11: |
begin |
tempint = unpack(expgolomb_signed(buffer)); |
$display( "ccl2SDMSmvd_l0 %0d", tempint ); |
outfifo.enq(SDMSmvd_l0 truncate(expgolomb_signed(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
temp3bit1 <= temp3bit1-1; |
if(temp3bit1 == 1) |
nextstate = SubMbPrediction 12; |
else |
nextstate = SubMbPrediction 10; |
end |
12: |
begin |
tempint = unpack(expgolomb_signed(buffer)); |
$display( "ccl2SDMSmvd_l0 %0d", tempint ); |
outfifo.enq(SDMSmvd_l0 truncate(expgolomb_signed(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SubMbPrediction 13; |
end |
13: |
begin |
tempint = unpack(expgolomb_signed(buffer)); |
$display( "ccl2SDMSmvd_l0 %0d", tempint ); |
outfifo.enq(SDMSmvd_l0 truncate(expgolomb_signed(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
temp3bit2 <= temp3bit2-1; |
if(temp3bit2 == 1) |
nextstate = SubMbPrediction 14; |
else |
nextstate = SubMbPrediction 12; |
end |
14: |
begin |
tempint = unpack(expgolomb_signed(buffer)); |
$display( "ccl2SDMSmvd_l0 %0d", tempint ); |
outfifo.enq(SDMSmvd_l0 truncate(expgolomb_signed(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
nextstate = SubMbPrediction 15; |
end |
15: |
begin |
tempint = unpack(expgolomb_signed(buffer)); |
$display( "ccl2SDMSmvd_l0 %0d", tempint ); |
outfifo.enq(SDMSmvd_l0 truncate(expgolomb_signed(buffer))); |
numbitsused = expgolomb_numbits(buffer); |
temp3bit3 <= temp3bit3-1; |
if(temp3bit3 == 1) |
nextstate = MacroblockLayer 5; |
else |
nextstate = SubMbPrediction 14; |
end |
default: $display( "ERROR EntropyDec: SubMbPrediction default step" ); |
endcase |
end |
tagged Residual .step : //return to SliceData 3 |
begin |
case ( step ) |
0: |
begin |
residualChroma <= 0; |
temp5bit <= 0; |
if(mbPartPredMode(sdmmbtype,0) == Intra_16x16) |
begin |
maxNumCoeff <= 16; |
nextstate = ResidualBlock 0; |
end |
else |
nextstate = Residual 1; |
//$display( "TRACE EntropyDec: Residual 0" ); |
end |
1: |
begin |
if(temp5bit == 16) |
begin |
residualChroma <= 1; |
temp5bit <= 0; |
nextstate = Residual 3; |
end |
else |
begin |
Bit#(5) tempMaxNumCoeff = 0; |
if(mbPartPredMode(sdmmbtype,0) == Intra_16x16) |
tempMaxNumCoeff = 15; |
else |
tempMaxNumCoeff = 16; |
maxNumCoeff <= tempMaxNumCoeff; |
if((sdmcodedBlockPatternLuma & (1 << zeroExtend(temp5bit[3:2]))) == 0) |
begin |
calcnc.nNupdate_luma(truncate(temp5bit),0); |
////$display( "ccl2SDMRcoeffLevelZeros %0d", tempMaxNumCoeff ); |
outfifo_ITB.enq(SDMRcoeffLevelZeros tempMaxNumCoeff); |
temp5bit <= temp5bit+1; |
nextstate = Residual 1; |
end |
else |
nextstate = ResidualBlock 0; |
end |
//$display( "TRACE EntropyDec: Residual 1" ); |
end |
3: |
begin |
if(temp5bit == 2) |
begin |
temp5bit <= 0; |
nextstate = Residual 5; |
end |
else |
begin |
maxNumCoeff <= 4; |
if((sdmcodedBlockPatternChroma & 3) == 0) |
begin |
////$display( "ccl2SDMRcoeffLevelZeros %0d", 4 ); |
outfifo_ITB.enq(SDMRcoeffLevelZeros 4); |
temp5bit <= temp5bit+1; |
nextstate = Residual 3; |
end |
else |
nextstate = ResidualBlock 0; |
end |
//$display( "TRACE EntropyDec: Residual 3" ); |
end |
5: |
begin |
if(temp5bit == 8) |
begin |
temp5bit <= 0; |
nextstate = SliceData 3; |
end |
else |
begin |
maxNumCoeff <= 15; |
if((sdmcodedBlockPatternChroma & 2) == 0) |
begin |
calcnc.nNupdate_chroma(truncate(temp5bit),0); |
////$display( "ccl2SDMRcoeffLevelZeros %0d", 15 ); |
outfifo_ITB.enq(SDMRcoeffLevelZeros 15); |
temp5bit <= temp5bit+1; |
nextstate = Residual 5; |
end |
else |
nextstate = ResidualBlock 0; |
end |
//$display( "TRACE EntropyDec: Residual 5" ); |
end |
default: $display( "ERROR EntropyDec: Residual default step" ); |
endcase |
end |
tagged ResidualBlock .step : //if(residualChroma==0) return to Residual 1; else if(maxNumCoeff==4) return to Residual 3; else return to Residual 5 |
begin//don't modify maxNumCoeff, residualChroma, and increment temp5bit on return |
case ( step ) |
0: |
begin |
cavlcFIFO.clear(); |
if(maxNumCoeff != 4) |
begin |
if(residualChroma == 0) |
tempreg <= zeroExtend(calcnc.nCcalc_luma(truncate(temp5bit))); |
else |
tempreg <= zeroExtend(calcnc.nCcalc_chroma(truncate(temp5bit))); |
end |
else |
tempreg <= zeroExtend(6'b111111); |
nextstate = ResidualBlock 1; |
//$display( "TRACE EntropyDec: ResidualBlock 0 temp5bit = %0d", temp5bit); |
end |
1: |
begin |
Bit#(2) trailingOnesTemp = 0; |
Bit#(5) totalCoeffTemp = 0; |
{trailingOnesTemp,totalCoeffTemp,numbitsused} = cavlc_coeff_token( buffer, truncate(tempreg) ); |
temp3bit0 <= zeroExtend(trailingOnesTemp);//trailingOnes |
totalCoeff <= totalCoeffTemp; |
if(residualChroma == 0 && !(mbPartPredMode(sdmmbtype,0)==Intra_16x16 && maxNumCoeff==16)) |
calcnc.nNupdate_luma(truncate(temp5bit),totalCoeffTemp); |
else if(residualChroma == 1 && maxNumCoeff != 4) |
calcnc.nNupdate_chroma(truncate(temp5bit),totalCoeffTemp); |
temp5bit2 <= 0;//i |
tempreg <= 0;//levelCode temp |
if(totalCoeffTemp > 10 && trailingOnesTemp < 3) |
temp3bit1 <= 1;//suffixLength |
else |
temp3bit1 <= 0;//suffixLength |
nextstate = ResidualBlock 2; |
//$display( "TRACE EntropyDec: ResidualBlock 1 nC = %0d", tempreg); |
$display( "ccl2SDMRtotal_coeff %0d", totalCoeffTemp ); |
$display( "ccl2SDMRtrailing_ones %0d", trailingOnesTemp ); |
end |
2: |
begin |
if( totalCoeff != 0 ) |
begin |
if(temp5bit2 < zeroExtend(temp3bit0)) |
begin |
if(buffer[buffersize-1] == 1) |
cavlcFIFO.enq(-1); |
else |
cavlcFIFO.enq(1); |
numbitsused = 1; |
end |
else |
begin |
Bit#(32) buffertempshow = buffer[buffersize-1:buffersize-32]; |
Bit#(3) suffixLength = temp3bit1; |
Bit#(4) levelSuffixSize = zeroExtend(suffixLength); |
Bit#(4) level_prefix = cavlc_level_prefix( buffer ); |
Bit#(5) temp_level_prefix = zeroExtend(level_prefix); |
Bit#(28) tempbuffer = buffer[buffersize-1:buffersize-28] << zeroExtend(temp_level_prefix+1); |
Bit#(14) levelCode = zeroExtend(level_prefix) << zeroExtend(suffixLength); |
if(level_prefix == 14 && suffixLength == 0) |
levelSuffixSize = 4; |
else if(level_prefix == 15) |
levelSuffixSize = 12; |
levelCode = levelCode + zeroExtend(tempbuffer[27:16] >> (12-zeroExtend(levelSuffixSize)));//level_suffix |
if(level_prefix == 15 && suffixLength == 0) |
levelCode = levelCode + 15; |
if(temp5bit2 == zeroExtend(temp3bit0) && temp3bit0 < 3) |
levelCode = levelCode + 2; |
if(suffixLength == 0) |
suffixLength = 1; |
if( suffixLength < 6 && ((levelCode+2) >> 1) > (3 << zeroExtend(suffixLength-1)) ) |
suffixLength = suffixLength+1; |
if(levelCode[0] == 0) |
cavlcFIFO.enq(truncate((levelCode+2) >> 1)); |
else |
cavlcFIFO.enq(truncate((~levelCode) >> 1)); |
if(levelCode[0] == 0)////////////////////////////////////////////////// |
begin |
tempint = signExtend(unpack((levelCode+2) >> 1)); |
//$display( "TRACE EntropyDec: temp level %0d", tempint ); |
end |
else |
begin |
Bit#(13) tempinttemp = truncate((~levelCode) >> 1); |
tempint = signExtend(unpack(tempinttemp)); |
//$display( "TRACE EntropyDec: temp level %0d", tempint ); |
end/////////////////////////////////////////////////////////////////////// |
temp3bit1 <= suffixLength; |
numbitsused = zeroExtend(level_prefix)+1+zeroExtend(levelSuffixSize); |
end |
end |
if( totalCoeff==0 || temp5bit2+1==totalCoeff ) |
begin |
temp5bit2 <= 0; |
zerosLeft <= 0; |
if(totalCoeff < maxNumCoeff) |
nextstate = ResidualBlock 3; |
else |
nextstate = ResidualBlock 5; |
end |
else |
begin |
temp5bit2 <= temp5bit2 + 1; |
nextstate = ResidualBlock 2; |
end |
end |
3: |
begin |
Bit#(4) tempZerosLeft; |
if(totalCoeff > 0) |
begin |
{tempZerosLeft,numbitsused} = cavlc_total_zeros( buffer, truncate(totalCoeff), maxNumCoeff); |
$display( "ccl2SDMRtotal_zeros %0d", tempZerosLeft );////////////////////////////////////// |
end |
else |
tempZerosLeft = 0; |
zerosLeft <= tempZerosLeft; |
if(maxNumCoeff - totalCoeff - zeroExtend(tempZerosLeft) > 0) |
begin |
$display( "ccl2SDMRcoeffLevelZeros %0d", maxNumCoeff - totalCoeff - zeroExtend(tempZerosLeft) ); |
outfifo_ITB.enq(SDMRcoeffLevelZeros (maxNumCoeff - totalCoeff - zeroExtend(tempZerosLeft))); |
end |
nextstate = ResidualBlock 5; |
end |
5: |
begin |
if( totalCoeff > 0 ) |
begin |
tempint = signExtend(unpack(cavlcFIFO.first())); |
$display( "ccl2SDMRcoeffLevel %0d", tempint ); |
if( zerosLeft > 0 ) |
begin |
Bit#(4) run_before = 0; |
if( totalCoeff > 1 ) |
{run_before,numbitsused} = cavlc_run_before( buffer, zerosLeft); |
else |
run_before = zerosLeft; |
zerosLeft <= zerosLeft - run_before; |
outfifo_ITB.enq(SDMRcoeffLevelPlusZeros {level:cavlcFIFO.first(),zeros:zeroExtend(run_before)}); |
if( run_before > 0 ) |
$display( "ccl2SDMRcoeffLevelZeros %0d", run_before ); |
end |
else |
outfifo_ITB.enq(SDMRcoeffLevelPlusZeros {level:cavlcFIFO.first(),zeros:0}); |
cavlcFIFO.deq(); |
totalCoeff <= totalCoeff-1; |
end |
if( totalCoeff <= 1 ) |
begin |
if(!(mbPartPredMode(sdmmbtype,0)==Intra_16x16 && maxNumCoeff==16)) |
temp5bit <= temp5bit+1; |
if(residualChroma==0) |
nextstate = Residual 1; |
else if(maxNumCoeff==4) |
nextstate = Residual 3; |
else |
nextstate = Residual 5; |
end |
else |
nextstate = ResidualBlock 5; |
end |
default: $display( "ERROR EntropyDec: ResidualBlock default step" ); |
endcase |
end |
endcase |
|
if(numbitsused+1 > bufcount) |
begin |
$display( "ERROR EntropyDec: not enough bits in buffer" ); |
nextstate = Start; |
end |
buffer <= buffer << zeroExtend(numbitsused); |
bufcount <= bufcount-numbitsused; |
state <= nextstate; |
|
endrule |
|
|
interface Put ioin = fifoToPut(infifo); |
interface Get ioout = fifoToGet(outfifo); |
interface Get ioout_InverseTrans = fifoToGet(outfifo_ITB); |
|
interface mem_client = calcnc.mem_client; |
|
endmodule |
|
endpackage |