Line 26... |
Line 26... |
struct { Bit#(3) xFracC; Bit#(3) yFracC; Bit#(2) offset; IPBlockType bt; } IPWChroma;
|
struct { Bit#(3) xFracC; Bit#(3) yFracC; Bit#(2) offset; IPBlockType bt; } IPWChroma;
|
}
|
}
|
InterpolatorWT deriving(Eq,Bits);
|
InterpolatorWT deriving(Eq,Bits);
|
|
|
|
|
|
|
|
|
//-----------------------------------------------------------
|
//-----------------------------------------------------------
|
// Helper functions
|
// Helper functions
|
|
|
function Bit#(8) clip1y10to8( Bit#(10) innum );
|
function Bit#(8) clip1y10to8( Bit#(10) innum );
|
if(innum[9] == 1)
|
if(innum[9] == 1)
|
Line 57... |
Line 59... |
|
|
|
|
(* synthesize *)
|
(* synthesize *)
|
module mkInterpolator( Interpolator );
|
module mkInterpolator( Interpolator );
|
|
|
FIFO#(InterpolatorIT) reqfifoLoad <- mkSizedFIFO(interpolator_reqfifoLoad_size);
|
FIFO#(InterpolatorIT) reqfifoLoad <- mkSizedFIFO(interpolator_reqfifoLoad_size); // This fifo takes in motion vector
|
FIFO#(InterpolatorWT) reqfifoWork1 <- mkSizedFIFO(interpolator_reqfifoWork_size);
|
// pixel requests.
|
|
FIFO#(InterpolatorWT) reqfifoWork1 <- mkSizedFIFO(interpolator_reqfifoWork_size); // This is where the memory responses
|
|
// come from
|
Reg#(Maybe#(InterpolatorWT)) reqregWork2 <- mkReg(Invalid);
|
Reg#(Maybe#(InterpolatorWT)) reqregWork2 <- mkReg(Invalid);
|
FIFO#(Vector#(4,Bit#(8))) outfifo <- mkFIFO;
|
FIFO#(Vector#(4,Bit#(8))) outfifo <- mkFIFO;
|
Reg#(Bool) endOfFrameFlag <- mkReg(False);
|
Reg#(Bool) endOfFrameFlag <- mkReg(False);
|
FIFO#(InterpolatorLoadReq) memReqQ <- mkFIFO;
|
FIFO#(InterpolatorLoadReq) memReqQ <- mkFIFO;
|
FIFO#(InterpolatorLoadResp) memRespQ <- mkSizedFIFO(interpolator_memRespQ_size);
|
FIFO#(InterpolatorLoadResp) memRespQ <- mkSizedFIFO(interpolator_memRespQ_size);
|
Line 150... |
Line 154... |
end
|
end
|
memReqQ.enq(IPLoadLuma {refIdx:reqdata.refIdx,horOutOfBounds:horOut,hor:horAddr,ver:verAddr});
|
memReqQ.enq(IPLoadLuma {refIdx:reqdata.refIdx,horOutOfBounds:horOut,hor:horAddr,ver:verAddr});
|
Bool verFirst = twoStage || (yfracl==2&&(xfracl==1||xfracl==3));
|
Bool verFirst = twoStage || (yfracl==2&&(xfracl==1||xfracl==3));
|
Bit#(2) loadHorNumMax = (reqdata.bt==IP8x8||reqdata.bt==IP8x4 ? 1 : 0) + (horInter ? 2 : (offset2==0 ? 0 : 1));
|
Bit#(2) loadHorNumMax = (reqdata.bt==IP8x8||reqdata.bt==IP8x4 ? 1 : 0) + (horInter ? 2 : (offset2==0 ? 0 : 1));
|
Bit#(4) loadVerNumMax = (reqdata.bt==IP8x8||reqdata.bt==IP4x8 ? 7 : 3) + (verInter ? 5 : 0);
|
Bit#(4) loadVerNumMax = (reqdata.bt==IP8x8||reqdata.bt==IP4x8 ? 7 : 3) + (verInter ? 5 : 0);
|
|
// It would appear that we are collecting memory requests here, or at least we're adjusting
|
|
// the memory addresses.
|
if(verFirst)
|
if(verFirst)
|
begin
|
begin
|
if(loadVerNum < loadVerNumMax)
|
if(loadVerNum < loadVerNumMax)
|
loadVerNum <= loadVerNum+1;
|
loadVerNum <= loadVerNum+1;
|
else
|
else
|
Line 241... |
Line 247... |
horOut = 1;
|
horOut = 1;
|
end
|
end
|
else
|
else
|
horAddr = truncate(horTemp>>2);
|
horAddr = truncate(horTemp>>2);
|
end
|
end
|
|
|
if(reqdata.mvver[11]==1 && zeroExtend(0-reqdata.mvver[11:3])>verTemp)
|
if(reqdata.mvver[11]==1 && zeroExtend(0-reqdata.mvver[11:3])>verTemp)
|
verAddr = 0;
|
verAddr = 0;
|
else
|
else
|
begin
|
begin
|
verTemp = verTemp + signExtend(reqdata.mvver[11:3]);
|
verTemp = verTemp + signExtend(reqdata.mvver[11:3]);
|
if(verTemp>=zeroExtend({picHeight,3'b000}))
|
if(verTemp>=zeroExtend({picHeight,3'b000}))
|
verAddr = {picHeight-1,3'b111};
|
verAddr = {picHeight-1,3'b111};
|
else
|
else
|
verAddr = truncate(verTemp);
|
verAddr = truncate(verTemp);
|
end
|
end
|
|
|
memReqQ.enq(IPLoadChroma {refIdx:reqdata.refIdx,uv:reqdata.uv,horOutOfBounds:horOut,hor:horAddr,ver:verAddr});
|
memReqQ.enq(IPLoadChroma {refIdx:reqdata.refIdx,uv:reqdata.uv,horOutOfBounds:horOut,hor:horAddr,ver:verAddr});
|
Bit#(2) loadHorNumMax = (reqdata.bt==IP4x8||reqdata.bt==IP4x4 ? (offset[1]==0||(xfracc==0&&offset!=3) ? 0 : 1) : ((reqdata.bt==IP16x16||reqdata.bt==IP16x8 ? 1 : 0) + (xfracc==0&&offset==0 ? 0 : 1)));
|
Bit#(2) loadHorNumMax = (reqdata.bt==IP4x8||reqdata.bt==IP4x4 ? (offset[1]==0||(xfracc==0&&offset!=3) ? 0 : 1) : ((reqdata.bt==IP16x16||reqdata.bt==IP16x8 ? 1 : 0) + (xfracc==0&&offset==0 ? 0 : 1)));
|
Bit#(4) loadVerNumMax = (reqdata.bt==IP16x16||reqdata.bt==IP8x16 ? 7 : (reqdata.bt==IP16x8||reqdata.bt==IP8x8||reqdata.bt==IP4x8 ? 3 : 1)) + (yfracc==0 ? 0 : 1);
|
Bit#(4) loadVerNumMax = (reqdata.bt==IP16x16||reqdata.bt==IP8x16 ? 7 : (reqdata.bt==IP16x8||reqdata.bt==IP8x8||reqdata.bt==IP4x8 ? 3 : 1)) + (yfracc==0 ? 0 : 1);
|
if(loadHorNum < loadHorNumMax)
|
if(loadHorNum < loadHorNumMax)
|
loadHorNum <= loadHorNum+1;
|
loadHorNum <= loadHorNum+1;
|
Line 276... |
Line 284... |
rule work1Luma ( reqfifoWork1.first() matches tagged IPWLuma .reqdata &&& !work1Done );
|
rule work1Luma ( reqfifoWork1.first() matches tagged IPWLuma .reqdata &&& !work1Done );
|
let xfracl = reqdata.xFracL;
|
let xfracl = reqdata.xFracL;
|
let yfracl = reqdata.yFracL;
|
let yfracl = reqdata.yFracL;
|
let offset = reqdata.offset;
|
let offset = reqdata.offset;
|
let blockT = reqdata.bt;
|
let blockT = reqdata.bt;
|
Bool twoStage = (xfracl==1||xfracl==3) && (yfracl==1||yfracl==3);
|
Bool twoStage = (xfracl==1||xfracl==3) && (yfracl==1||yfracl==3); // are we dealing with a quarter sample
|
Vector#(20,Bit#(8)) work1Vector8Next = work1Vector8;
|
Vector#(20,Bit#(8)) work1Vector8Next = work1Vector8; // This must die.
|
if(memRespQ.first() matches tagged IPLoadResp .tempreaddata)
|
if(memRespQ.first() matches tagged IPLoadResp .tempreaddata)
|
begin
|
begin
|
memRespQ.deq();
|
memRespQ.deq();
|
Vector#(4,Bit#(8)) readdata = replicate(0);
|
Vector#(4,Bit#(8)) readdata = replicate(0);
|
readdata[0] = tempreaddata[7:0];
|
readdata[0] = tempreaddata[7:0];
|
Line 315... |
Line 323... |
for(Integer ii=0; ii<4; ii=ii+1)
|
for(Integer ii=0; ii<4; ii=ii+1)
|
begin
|
begin
|
Bit#(4) tempIndex = fromInteger(ii) + 8 - zeroExtend(offset);
|
Bit#(4) tempIndex = fromInteger(ii) + 8 - zeroExtend(offset);
|
work1Vector8Next[tempIndex] = readdata[ii];
|
work1Vector8Next[tempIndex] = readdata[ii];
|
end
|
end
|
for(Integer ii=0; ii<4; ii=ii+1)
|
for(Integer ii=0; ii<4; ii=ii+1) // horizontal filtration step.
|
begin
|
begin
|
tempResult15[ii] = interpolate8to15(work1Vector8Next[ii],work1Vector8Next[ii+1],work1Vector8Next[ii+2],work1Vector8Next[ii+3],work1Vector8Next[ii+4],work1Vector8Next[ii+5]);
|
tempResult15[ii] = interpolate8to15(work1Vector8Next[ii],work1Vector8Next[ii+1],work1Vector8Next[ii+2],work1Vector8Next[ii+3],work1Vector8Next[ii+4],work1Vector8Next[ii+5]);
|
tempResult8[ii] = clip1y10to8(truncate((tempResult15[ii]+16)>>5));
|
tempResult8[ii] = clip1y10to8(truncate((tempResult15[ii]+16)>>5));
|
if(xfracl == 1)
|
if(xfracl == 1) // Seems to be averaging the quarter samples.
|
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,work1Vector8Next[ii+2]} + 1) >> 1);
|
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,work1Vector8Next[ii+2]} + 1) >> 1);
|
else if(xfracl == 3)
|
else if(xfracl == 3)
|
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,work1Vector8Next[ii+3]} + 1) >> 1);
|
tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,work1Vector8Next[ii+3]} + 1) >> 1);
|
end
|
end
|
end
|
end
|
Line 355... |
Line 363... |
end
|
end
|
end
|
end
|
else if(work1Stage == 0)//vertical interpolation
|
else if(work1Stage == 0)//vertical interpolation
|
begin
|
begin
|
offset = offset + (xfracl==3&&(yfracl==1||yfracl==3) ? 1 : 0);
|
offset = offset + (xfracl==3&&(yfracl==1||yfracl==3) ? 1 : 0);
|
for(Integer ii=0; ii<4; ii=ii+1)
|
for(Integer ii=0; ii<4; ii=ii+1) // apply the horizontal filtration step.
|
tempResult15[ii] = interpolate8to15(work1Vector8[ii],work1Vector8[ii+4],work1Vector8[ii+8],work1Vector8[ii+12],work1Vector8[ii+16],readdata[ii]);
|
tempResult15[ii] = interpolate8to15(work1Vector8[ii],work1Vector8[ii+4],work1Vector8[ii+8],work1Vector8[ii+12],work1Vector8[ii+16],readdata[ii]);
|
for(Integer ii=0; ii<16; ii=ii+1)
|
for(Integer ii=0; ii<16; ii=ii+1) // advances the work vector
|
work1Vector8Next[ii] = work1Vector8[ii+4];
|
work1Vector8Next[ii] = work1Vector8[ii+4];
|
for(Integer ii=0; ii<4; ii=ii+1)
|
for(Integer ii=0; ii<4; ii=ii+1) // assigns the new work vector value
|
work1Vector8Next[ii+16] = readdata[ii];
|
work1Vector8Next[ii+16] = readdata[ii];
|
Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + (yfracl==2 ? 2 : (offset==0 ? 0 : 1));
|
Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + (yfracl==2 ? 2 : (offset==0 ? 0 : 1));
|
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3) + 5;
|
Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3) + 5;
|
Bit#(2) horAddr = work1HorNum;
|
Bit#(2) horAddr = work1HorNum;
|
Bit#(3) verAddr = truncate(work1VerNum-5);
|
Bit#(3) verAddr = truncate(work1VerNum-5);
|
Line 646... |
Line 654... |
else
|
else
|
tempWork8[ii] = readdata[offsetplusii];
|
tempWork8[ii] = readdata[offsetplusii];
|
work1Vector8Next[ii] = readdata[ii];
|
work1Vector8Next[ii] = readdata[ii];
|
end
|
end
|
tempWork8[4] = readdata[offset];
|
tempWork8[4] = readdata[offset];
|
|
|
|
// deals with the row major offsets
|
if((blockT==IP16x8 || blockT==IP16x16) && work1HorNum==(xfracc==0&&offset==0 ? 1 : 2))
|
if((blockT==IP16x8 || blockT==IP16x16) && work1HorNum==(xfracc==0&&offset==0 ? 1 : 2))
|
begin
|
begin
|
for(Integer ii=0; ii<5; ii=ii+1)
|
for(Integer ii=0; ii<5; ii=ii+1)
|
begin
|
begin
|
tempPrev8[ii] = work1Vector8[ii+9];
|
tempPrev8[ii] = work1Vector8[ii+9];
|
Line 669... |
Line 679... |
if(yfracc==0)
|
if(yfracc==0)
|
begin
|
begin
|
for(Integer ii=0; ii<5; ii=ii+1)
|
for(Integer ii=0; ii<5; ii=ii+1)
|
tempPrev8[ii] = tempWork8[ii];
|
tempPrev8[ii] = tempWork8[ii];
|
end
|
end
|
|
// Apply filter?
|
for(Integer ii=0; ii<4; ii=ii+1)
|
for(Integer ii=0; ii<4; ii=ii+1)
|
begin
|
begin
|
Bit#(14) tempVal = zeroExtend((8-xfracc))*zeroExtend((8-yfracc))*zeroExtend(tempPrev8[ii]);
|
Bit#(14) tempVal = zeroExtend((8-xfracc))*zeroExtend((8-yfracc))*zeroExtend(tempPrev8[ii]);
|
tempVal = tempVal + zeroExtend(xfracc)*zeroExtend((8-yfracc))*zeroExtend(tempPrev8[ii+1]);
|
tempVal = tempVal + zeroExtend(xfracc)*zeroExtend((8-yfracc))*zeroExtend(tempPrev8[ii+1]);
|
tempVal = tempVal + zeroExtend((8-xfracc))*zeroExtend(yfracc)*zeroExtend(tempWork8[ii]);
|
tempVal = tempVal + zeroExtend((8-xfracc))*zeroExtend(yfracc)*zeroExtend(tempWork8[ii]);
|
tempVal = tempVal + zeroExtend(xfracc)*zeroExtend(yfracc)*zeroExtend(tempWork8[ii+1]);
|
tempVal = tempVal + zeroExtend(xfracc)*zeroExtend(yfracc)*zeroExtend(tempWork8[ii+1]);
|
tempResult8[ii] = truncate((tempVal+32)>>6);
|
tempResult8[ii] = truncate((tempVal+32)>>6);
|
end
|
end
|
|
|
if(work1VerNum > 0 || yfracc==0)
|
if(work1VerNum > 0 || yfracc==0)
|
begin
|
begin
|
if(blockT==IP4x8 || blockT==IP4x4)
|
if(blockT==IP4x8 || blockT==IP4x4)
|
begin
|
begin
|
Bit#(5) tempIndex = 10 + zeroExtend(work1VerNum<<1);
|
Bit#(5) tempIndex = 10 + zeroExtend(work1VerNum<<1);
|
Line 777... |
Line 789... |
end
|
end
|
$display( "Trace interpolator: outputing %h %h", outBlockNum, outPixelNum);
|
$display( "Trace interpolator: outputing %h %h", outBlockNum, outPixelNum);
|
endrule
|
endrule
|
|
|
|
|
|
// These two rules complete the processing step, and
|
rule switching( work1Done && (work2Done || reqregWork2==Invalid) && !work8x8Done);
|
rule switching( work1Done && (work2Done || reqregWork2==Invalid) && !work8x8Done);
|
work1Done <= False;
|
work1Done <= False;
|
work2Done <= False;
|
work2Done <= False;
|
reqregWork2 <= (Valid reqfifoWork1.first());
|
reqregWork2 <= (Valid reqfifoWork1.first());
|
workFileFlag <= 1-workFileFlag;
|
workFileFlag <= 1-workFileFlag;
|
reqfifoWork1.deq();
|
reqfifoWork1.deq();
|
$display( "Trace interpolator: switching %h %h", outBlockNum, outPixelNum);
|
$display( "Trace interpolator: switching %h %h", outBlockNum, outPixelNum);
|
endrule
|
endrule
|
|
|
|
|
|
// this rule is kind of one of the last to run
|
rule switching8x8( work1Done && (work2Done || reqregWork2==Invalid) && work8x8Done && outDone);
|
rule switching8x8( work1Done && (work2Done || reqregWork2==Invalid) && work8x8Done && outDone);
|
outDone <= False;
|
outDone <= False;
|
work8x8Done <= False;
|
work8x8Done <= False;
|
resultReady <= replicate(0);
|
resultReady <= replicate(0);
|
work1Done <= False;
|
work1Done <= False;
|