OpenCores
URL https://opencores.org/ocsvn/bluespec-h264/bluespec-h264/trunk

Subversion Repositories bluespec-h264

[/] [bluespec-h264/] [trunk/] [release/] [mkInterpolator_3stage.bsv] - Blame information for rev 100

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 85 jamey.hick
// The MIT License
2
 
3
// Copyright (c) 2006-2007 Massachusetts Institute of Technology
4
 
5
// Permission is hereby granted, free of charge, to any person obtaining a copy
6
// of this software and associated documentation files (the "Software"), to deal
7
// in the Software without restriction, including without limitation the rights
8
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
// copies of the Software, and to permit persons to whom the Software is
10
// furnished to do so, subject to the following conditions:
11
 
12
// The above copyright notice and this permission notice shall be included in
13
// all copies or substantial portions of the Software.
14
 
15
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
// THE SOFTWARE.
22 84 jamey.hick
//**********************************************************************
23
// interpolator implementation
24
//----------------------------------------------------------------------
25
//
26
//
27
 
28
package mkInterpolator;
29
 
30
import H264Types::*;
31
import IInterpolator::*;
32
import FIFO::*;
33
import Vector::*;
34
 
35
import Connectable::*;
36
import GetPut::*;
37
import ClientServer::*;
38
 
39
 
40
//-----------------------------------------------------------
41
// Local Datatypes
42
//-----------------------------------------------------------
43
 
44
typedef union tagged
45
{
46
 struct { Bit#(2) xFracL; Bit#(2) yFracL; Bit#(2) offset; IPBlockType bt; } IPWLuma;
47
 struct { Bit#(3) xFracC; Bit#(3) yFracC; Bit#(2) offset; IPBlockType bt; } IPWChroma;
48
}
49
InterpolatorWT deriving(Eq,Bits);
50
 
51
 
52
//-----------------------------------------------------------
53
// Helper functions
54
 
55
function Bit#(8) clip1y10to8( Bit#(10) innum );
56
   if(innum[9] == 1)
57
      return 0;
58
   else if(innum[8] == 1)
59
      return 255;
60
   else
61
      return truncate(innum);
62
endfunction
63
 
64
function Bit#(15) interpolate8to15( Bit#(8) in0, Bit#(8) in1, Bit#(8) in2, Bit#(8) in3, Bit#(8) in4, Bit#(8) in5 );
65
   return zeroExtend(in0) - 5*zeroExtend(in1) + 20*zeroExtend(in2) + 20*zeroExtend(in3) - 5*zeroExtend(in4) + zeroExtend(in5);
66
endfunction
67
 
68
function Bit#(8) interpolate15to8( Bit#(15) in0, Bit#(15) in1, Bit#(15) in2, Bit#(15) in3, Bit#(15) in4, Bit#(15) in5 );
69
   Bit#(20) temp = signExtend(in0) - 5*signExtend(in1) + 20*signExtend(in2) + 20*signExtend(in3) - 5*signExtend(in4) + signExtend(in5) + 512;
70
   return clip1y10to8(truncate(temp>>10));
71
endfunction
72
 
73
 
74
 
75
//-----------------------------------------------------------
76
// Interpolation Module
77
//-----------------------------------------------------------
78
 
79
 
80
(* synthesize *)
81
module mkInterpolator( Interpolator );
82
 
83
   FIFO#(InterpolatorIT) reqfifoLoad <- mkSizedFIFO(interpolator_reqfifoLoad_size);
84
   FIFO#(InterpolatorWT) reqfifoWork <- mkSizedFIFO(interpolator_reqfifoWork_size);
85
   FIFO#(Vector#(4,Bit#(8))) outfifo <- mkFIFO;
86
   Reg#(Bool) endOfFrameFlag <- mkReg(False);
87
   FIFO#(InterpolatorLoadReq)  memReqQ  <- mkFIFO;
88
   FIFO#(InterpolatorLoadResp) memRespQ <- mkSizedFIFO(interpolator_memRespQ_size);
89
 
90
   Reg#(Bit#(PicWidthSz))  picWidth  <- mkReg(maxPicWidthInMB);
91
   Reg#(Bit#(PicHeightSz)) picHeight <- mkReg(0);
92
 
93
   RFile1#(Bit#(5),Vector#(4,Bit#(15))) workFile  <- mkRFile1Full();
94
   RFile1#(Bit#(4),Vector#(4,Bit#(8))) resultFile <- mkRFile1Full();
95
 
96
   Reg#(Bit#(1)) loadStage  <- mkReg(0);
97
   Reg#(Bit#(2)) loadHorNum <- mkReg(0);
98
   Reg#(Bit#(4)) loadVerNum <- mkReg(0);
99
 
100
   Reg#(Bit#(1)) workStage     <- mkReg(0);
101
   Reg#(Bit#(2)) workMbPart    <- mkReg(0);//only for Chroma
102
   Reg#(Bit#(2)) workSubMbPart <- mkReg(0);
103
   Reg#(Bit#(2)) workHorNum    <- mkReg(0);
104
   Reg#(Bit#(4)) workVerNum    <- mkReg(0);
105
   Reg#(Vector#(20,Bit#(8))) workVector8 <- mkRegU;
106
   Reg#(Vector#(20,Bit#(15))) workVector15 <- mkRegU;
107
   Reg#(Vector#(4,Bit#(1))) resultReady <- mkRegU;
108
   Reg#(Bool) workDone <- mkReg(False);
109
 
110
   Reg#(Bit#(2)) outBlockNum <- mkReg(0);
111
   Reg#(Bit#(2)) outPixelNum <- mkReg(0);
112
   Reg#(Bool) outDone <- mkReg(False);
113
 
114
 
115
   rule sendEndOfFrameReq( endOfFrameFlag );
116
      endOfFrameFlag <= False;
117
      memReqQ.enq(IPLoadEndFrame);
118
   endrule
119
 
120
 
121
   rule loadLuma( reqfifoLoad.first() matches tagged IPLuma .reqdata &&& !endOfFrameFlag );
122
      Bit#(2) xfracl = reqdata.mvhor[1:0];
123
      Bit#(2) yfracl = reqdata.mvver[1:0];
124
      Bool twoStage = (xfracl==1||xfracl==3) && (yfracl==1||yfracl==3);
125
      Bool horInter = (twoStage ? loadStage==1 : xfracl!=0);
126
      Bool verInter = (twoStage ? loadStage==0 : yfracl!=0);
127
      Bit#(2) offset = reqdata.mvhor[3:2] + ((twoStage&&verInter&&xfracl==3) ? 1 : 0);
128
      Bit#(1) horOut = 0;
129
      Bit#(TAdd#(PicWidthSz,2)) horAddr;
130
      Bit#(TAdd#(PicHeightSz,4)) verAddr;
131
      Bit#(TAdd#(PicWidthSz,12)) horTemp = zeroExtend({reqdata.hor,2'b00}) + zeroExtend({loadHorNum,2'b00}) + (xfracl==3&&(yfracl==1||yfracl==3)&&loadStage==0 ? 1 : 0);
132
      Bit#(TAdd#(PicHeightSz,10)) verTemp = zeroExtend(reqdata.ver) + zeroExtend(loadVerNum) + (yfracl==3&&(xfracl==1||xfracl==3)&&loadStage==1 ? 1 : 0);
133
      Bit#(13) mvhortemp = signExtend(reqdata.mvhor[13:2])-(horInter?2:0);
134
      Bit#(11) mvvertemp = signExtend(reqdata.mvver[11:2])-(verInter?2:0);
135
      if(mvhortemp[12]==1 && zeroExtend(0-mvhortemp)>horTemp)
136
         begin
137
            horAddr = 0;
138
            horOut = 1;
139
         end
140
      else
141
         begin
142
            horTemp = horTemp + signExtend(mvhortemp);
143
            if(horTemp>=zeroExtend({picWidth,4'b0000}))
144
               begin
145
                  horAddr = {picWidth-1,2'b11};
146
                  horOut = 1;
147
               end
148
            else
149
               horAddr = truncate(horTemp>>2);
150
         end
151
      if(mvvertemp[10]==1 && zeroExtend(0-mvvertemp)>verTemp)
152
         verAddr = 0;
153
      else
154
         begin
155
            verTemp = verTemp + signExtend(mvvertemp);
156
            if(verTemp>=zeroExtend({picHeight,4'b0000}))
157
               verAddr = {picHeight-1,4'b1111};
158
            else
159
               verAddr = truncate(verTemp);
160
         end
161
      memReqQ.enq(IPLoadLuma {refIdx:reqdata.refIdx,horOutOfBounds:horOut,hor:horAddr,ver:verAddr});
162
      Bool verFirst = (twoStage&&loadStage==0) || (yfracl==2&&(xfracl==1||xfracl==3));
163
      Bit#(2) loadHorNumMax = (reqdata.bt==IP8x8||reqdata.bt==IP8x4 ? 1 : 0) + (horInter ? 2 : (offset==0 ? 0 : 1));
164
      Bit#(4) loadVerNumMax = (reqdata.bt==IP8x8||reqdata.bt==IP4x8 ? 7 : 3) + (verInter ? 5 : 0);
165
      if(verFirst)
166
         begin
167
            if(loadVerNum < loadVerNumMax)
168
               loadVerNum <= loadVerNum+1;
169
            else
170
               begin
171
                  loadVerNum <= 0;
172
                  if(loadHorNum < loadHorNumMax)
173
                     loadHorNum <= loadHorNum+1;
174
                  else
175
                     begin
176
                        loadHorNum <= 0;
177
                        if(twoStage)
178
                           loadStage <= 1;
179
                        else
180
                           reqfifoLoad.deq();
181
                     end
182
               end
183
         end
184
      else
185
         begin
186
            if(loadHorNum < loadHorNumMax)
187
               loadHorNum <= loadHorNum+1;
188
            else
189
               begin
190
                  loadHorNum <= 0;
191
                  if(loadVerNum < loadVerNumMax)
192
                     loadVerNum <= loadVerNum+1;
193
                  else
194
                     begin
195
                        loadVerNum <= 0;
196
                        loadStage <= 0;
197
                        reqfifoLoad.deq();
198
                     end
199
               end
200
         end
201
      if(reqdata.bt==IP16x16 || reqdata.bt==IP16x8 || reqdata.bt==IP8x16)
202
         $display( "ERROR Interpolation: loadLuma block sizes > 8x8 not supported");
203
      //$display( "Trace interpolator: loadLuma %h %h %h %h %h %h %h", xfracl, yfracl, loadHorNum, loadVerNum, reqdata.refIdx, horAddr, verAddr);
204
   endrule
205
 
206
 
207
   rule loadChroma( reqfifoLoad.first() matches tagged IPChroma .reqdata &&& !endOfFrameFlag );
208
      Bit#(3) xfracc = reqdata.mvhor[2:0];
209
      Bit#(3) yfracc = reqdata.mvver[2:0];
210
      Bit#(2) offset = reqdata.mvhor[4:3]+{reqdata.hor[0],1'b0};
211
      Bit#(1) horOut = 0;
212
      Bit#(TAdd#(PicWidthSz,1)) horAddr;
213
      Bit#(TAdd#(PicHeightSz,3)) verAddr;
214
      Bit#(TAdd#(PicWidthSz,11)) horTemp = zeroExtend({reqdata.hor,1'b0}) + zeroExtend({loadHorNum,2'b00});
215
      Bit#(TAdd#(PicHeightSz,9)) verTemp = zeroExtend(reqdata.ver) + zeroExtend(loadVerNum);
216
      if(reqdata.mvhor[13]==1 && zeroExtend(0-reqdata.mvhor[13:3])>horTemp)
217
         begin
218
            horAddr = 0;
219
            horOut = 1;
220
         end
221
      else
222
         begin
223
            horTemp = horTemp + signExtend(reqdata.mvhor[13:3]);
224
            if(horTemp>=zeroExtend({picWidth,3'b000}))
225
               begin
226
                  horAddr = {picWidth-1,1'b1};
227
                  horOut = 1;
228
               end
229
            else
230
               horAddr = truncate(horTemp>>2);
231
         end
232
      if(reqdata.mvver[11]==1 && zeroExtend(0-reqdata.mvver[11:3])>verTemp)
233
         verAddr = 0;
234
      else
235
         begin
236
            verTemp = verTemp + signExtend(reqdata.mvver[11:3]);
237
            if(verTemp>=zeroExtend({picHeight,3'b000}))
238
               verAddr = {picHeight-1,3'b111};
239
            else
240
               verAddr = truncate(verTemp);
241
         end
242
      memReqQ.enq(IPLoadChroma {refIdx:reqdata.refIdx,uv:reqdata.uv,horOutOfBounds:horOut,hor:horAddr,ver:verAddr});
243
      Bit#(2) loadHorNumMax = (reqdata.bt==IP4x8||reqdata.bt==IP4x4 ? (offset[1]==0||(xfracc==0&&offset!=3) ? 0 : 1) : ((reqdata.bt==IP16x16||reqdata.bt==IP16x8 ? 1 : 0) + (xfracc==0&&offset==0 ? 0 : 1)));
244
      Bit#(4) loadVerNumMax = (reqdata.bt==IP16x16||reqdata.bt==IP8x16 ? 7 : (reqdata.bt==IP16x8||reqdata.bt==IP8x8||reqdata.bt==IP4x8 ? 3 : 1)) + (yfracc==0 ? 0 : 1);
245
      if(loadHorNum < loadHorNumMax)
246
         loadHorNum <= loadHorNum+1;
247
      else
248
         begin
249
            loadHorNum <= 0;
250
            if(loadVerNum < loadVerNumMax)
251
               loadVerNum <= loadVerNum+1;
252
            else
253
               begin
254
                  loadVerNum <= 0;
255
                  reqfifoLoad.deq();
256
               end
257
         end
258
      //$display( "Trace interpolator: loadChroma %h %h %h %h %h %h %h", xfracc, yfracc, loadHorNum, loadVerNum, reqdata.refIdx, horAddr, verAddr);
259
   endrule
260
 
261
 
262
   rule workLuma ( reqfifoWork.first() matches tagged IPWLuma .reqdata &&& !workDone );
263
      let xfracl = reqdata.xFracL;
264
      let yfracl = reqdata.yFracL;
265
      let offset = reqdata.offset;
266
      let blockT = reqdata.bt;
267
      Vector#(20,Bit#(8)) workVector8Next = workVector8;
268
      Vector#(20,Bit#(15)) workVector15Next = workVector15;
269
      Vector#(4,Bit#(1)) resultReadyNext = resultReady;
270
      if(workStage == 0)
271
         begin
272
            if(memRespQ.first() matches tagged IPLoadResp .tempreaddata)
273
               begin
274
                  memRespQ.deq();
275
                  Vector#(4,Bit#(8)) readdata = replicate(0);
276
                  readdata[0] = tempreaddata[7:0];
277
                  readdata[1] = tempreaddata[15:8];
278
                  readdata[2] = tempreaddata[23:16];
279
                  readdata[3] = tempreaddata[31:24];
280
                  //$display( "Trace interpolator: workLuma stage 0 readdata %h %h %h %h %h %h", workHorNum, workVerNum, readdata[3], readdata[2], readdata[1], readdata[0] );
281
                  Vector#(4,Bit#(8)) tempResult8 = replicate(0);
282
                  Vector#(4,Bit#(15)) tempResult15 = replicate(0);
283
                  if(xfracl==0 || yfracl==0 || xfracl==2)
284
                     begin
285
                        if(xfracl==0)//reorder
286
                           begin
287
                              for(Integer ii=0; ii<4; ii=ii+1)
288
                                 begin
289
                                    Bit#(2) offsetplusii = offset+fromInteger(ii);
290
                                    if(offset <= 3-fromInteger(ii) && offset!=0)
291
                                       tempResult8[ii] = workVector8[offsetplusii];
292
                                    else
293
                                       tempResult8[ii] = readdata[offsetplusii];
294
                                    workVector8Next[ii] = readdata[ii];
295
                                 end
296
                              for(Integer ii=0; ii<4; ii=ii+1)
297
                                 tempResult15[ii] = zeroExtend({tempResult8[ii],5'b00000});
298
                           end
299
                        else//horizontal interpolation
300
                           begin
301
                              offset = offset-2;
302
                              for(Integer ii=0; ii<8; ii=ii+1)
303
                                 workVector8Next[ii] = workVector8[ii+4];
304
                              for(Integer ii=0; ii<4; ii=ii+1)
305
                                 begin
306
                                    Bit#(4) tempIndex = fromInteger(ii) + 8 - zeroExtend(offset);
307
                                    workVector8Next[tempIndex] = readdata[ii];
308
                                 end
309
                              for(Integer ii=0; ii<4; ii=ii+1)
310
                                 begin
311
                                    tempResult15[ii] = interpolate8to15(workVector8Next[ii],workVector8Next[ii+1],workVector8Next[ii+2],workVector8Next[ii+3],workVector8Next[ii+4],workVector8Next[ii+5]);
312
                                    tempResult8[ii] = clip1y10to8(truncate((tempResult15[ii]+16)>>5));
313
                                    if(xfracl == 1)
314
                                       tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,workVector8Next[ii+2]} + 1) >> 1);
315
                                    else if(xfracl == 3)
316
                                       tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,workVector8Next[ii+3]} + 1) >> 1);
317
                                 end
318
                           end
319
                        Bit#(2) workHorNumOffset = (xfracl!=0 ? 2 : (reqdata.offset==0 ? 0 : 1));
320
                        if(workHorNum >= workHorNumOffset)
321
                           begin
322
                              Bit#(1) horAddr = truncate(workHorNum-workHorNumOffset);
323
                              if(yfracl == 0)//write to resultFile
324
                                 begin
325
                                    Bit#(3) verAddr = truncate(workVerNum);
326
                                    horAddr = horAddr + ((blockT==IP4x8&&workSubMbPart==1)||(blockT==IP4x4&&workSubMbPart[0]==1) ? 1 : 0);
327
                                    verAddr = verAddr + ((blockT==IP8x4&&workSubMbPart==1)||(blockT==IP4x4&&workSubMbPart[1]==1) ? 4 : 0);
328
                                    resultFile.upd({verAddr,horAddr},tempResult8);
329
                                    if(verAddr[1:0] == 3)
330
                                       resultReadyNext[{verAddr[2],horAddr}] = 1;
331
                                 end
332
                              else//write to workFile
333
                                 workFile.upd({workVerNum,horAddr},tempResult15);
334
                           end
335
                        Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + workHorNumOffset;
336
                        Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3) + (yfracl!=0 ? 5 : 0);
337
                        if(workHorNum < workHorNumMax)
338
                           workHorNum <= workHorNum+1;
339
                        else
340
                           begin
341
                              workHorNum <= 0;
342
                              if(workVerNum < workVerNumMax)
343
                                 workVerNum <= workVerNum+1;
344
                              else
345
                                 begin
346
                                    workVerNum <= 0;
347
                                    if(yfracl!=0)
348
                                       workStage <= 1;
349
                                    else
350
                                       begin
351
                                          if(((blockT==IP4x8 || blockT==IP8x4) && workSubMbPart==0) || (blockT==IP4x4 && workSubMbPart<3))
352
                                             workSubMbPart <= workSubMbPart+1;
353
                                          else
354
                                             begin
355
                                                workSubMbPart <= 0;
356
                                                workDone <= True;
357
                                             end
358
                                          reqfifoWork.deq();
359
                                       end
360
                                 end
361
                           end
362
                     end
363
                  else//vertical interpolation
364
                     begin
365
                        offset = offset + (xfracl==3&&(yfracl==1||yfracl==3) ? 1 : 0);
366
                        for(Integer ii=0; ii<4; ii=ii+1)
367
                           tempResult15[ii] = interpolate8to15(workVector8[ii],workVector8[ii+4],workVector8[ii+8],workVector8[ii+12],workVector8[ii+16],readdata[ii]);
368
                        for(Integer ii=0; ii<16; ii=ii+1)
369
                           workVector8Next[ii] = workVector8[ii+4];
370
                        for(Integer ii=0; ii<4; ii=ii+1)
371
                           workVector8Next[ii+16] = readdata[ii];
372
                        Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + (yfracl==2 ? 2 : (offset==0 ? 0 : 1));
373
                        Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3) + 5;
374
                        Bit#(2) horAddr = workHorNum;
375
                        Bit#(3) verAddr = truncate(workVerNum-5);
376
                        if(workVerNum > 4)
377
                           begin
378
                              workFile.upd({verAddr,horAddr},tempResult15);
379
                              //$display( "Trace interpolator: workLuma stage 0 result %h %h %h %h %h %h %h", workHorNum, workVerNum, {verAddr,horAddr}, tempResult15[3], tempResult15[2], tempResult15[1], tempResult15[0]);
380
                           end
381
                        if(workVerNum < workVerNumMax)
382
                           workVerNum <= workVerNum+1;
383
                        else
384
                           begin
385
                              workVerNum <= 0;
386
                              if(workHorNum < workHorNumMax)
387
                                 workHorNum <= workHorNum+1;
388
                              else
389
                                 begin
390
                                    workHorNum <= 0;
391
                                    workStage <= 1;
392
                                 end
393
                           end
394
                     end
395
               end
396
         end
397
      else
398
         begin
399
            Vector#(4,Bit#(8)) tempResult8 = replicate(0);
400
            Vector#(4,Bit#(15)) readdata = replicate(0);
401
            if(yfracl==0)
402
               $display( "ERROR Interpolation: workLuma loadStage==1 and yfracl==0");
403
            if(xfracl==0 || xfracl==2)//vertical interpolation
404
               begin
405
                  readdata = workFile.sub({workVerNum,workHorNum[0]});
406
                  for(Integer ii=0; ii<4; ii=ii+1)
407
                     begin
408
                        tempResult8[ii] = interpolate15to8(workVector15[ii],workVector15[ii+4],workVector15[ii+8],workVector15[ii+12],workVector15[ii+16],readdata[ii]);
409
                        if(yfracl == 1)
410
                           tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((workVector15[ii+8]+16)>>5))} + 1) >> 1);
411
                        else if(yfracl == 3)
412
                           tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((workVector15[ii+12]+16)>>5))} + 1) >> 1);
413
                     end
414
                  for(Integer ii=0; ii<16; ii=ii+1)
415
                     workVector15Next[ii] = workVector15[ii+4];
416
                  for(Integer ii=0; ii<4; ii=ii+1)
417
                     workVector15Next[ii+16] = readdata[ii];
418
                  Bit#(2) workHorNumMax = 1;
419
                  Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3) + 5;
420
                  if(workVerNum > 4)
421
                     begin
422
                        Bit#(1) horAddr = truncate(workHorNum);
423
                        Bit#(3) verAddr = truncate(workVerNum-5);
424
                        horAddr = horAddr + ((blockT==IP4x8&&workSubMbPart==1)||(blockT==IP4x4&&workSubMbPart[0]==1) ? 1 : 0);
425
                        verAddr = verAddr + ((blockT==IP8x4&&workSubMbPart==1)||(blockT==IP4x4&&workSubMbPart[1]==1) ? 4 : 0);
426
                        resultFile.upd({verAddr,horAddr},tempResult8);
427
                        if(verAddr[1:0] == 3)
428
                           resultReadyNext[{verAddr[2],horAddr}] = 1;
429
                     end
430
                  if(workVerNum < workVerNumMax)
431
                     workVerNum <= workVerNum+1;
432
                  else
433
                     begin
434
                        workVerNum <= 0;
435
                        if(workHorNum < workHorNumMax)
436
                           workHorNum <= workHorNum+1;
437
                        else
438
                           begin
439
                              workHorNum <= 0;
440
                              workStage <= 0;
441
                              if(((blockT==IP4x8 || blockT==IP8x4) && workSubMbPart==0) || (blockT==IP4x4 && workSubMbPart<3))
442
                                 workSubMbPart <= workSubMbPart+1;
443
                              else
444
                                 begin
445
                                    workSubMbPart <= 0;
446
                                    workDone <= True;
447
                                 end
448
                              reqfifoWork.deq();
449
                           end
450
                     end
451
               end
452
            else//horizontal interpolation
453
               begin
454
                  offset = offset-2;
455
                  if(yfracl == 2)
456
                     begin
457
                        readdata = workFile.sub({workVerNum[2:0],workHorNum});
458
                        for(Integer ii=0; ii<8; ii=ii+1)
459
                           workVector15Next[ii] = workVector15[ii+4];
460
                        for(Integer ii=0; ii<4; ii=ii+1)
461
                           begin
462
                              Bit#(4) tempIndex = fromInteger(ii) + 8 - zeroExtend(offset);
463
                              workVector15Next[tempIndex] = readdata[ii];
464
                           end
465
                        for(Integer ii=0; ii<4; ii=ii+1)
466
                           begin
467
                              tempResult8[ii] = interpolate15to8(workVector15Next[ii],workVector15Next[ii+1],workVector15Next[ii+2],workVector15Next[ii+3],workVector15Next[ii+4],workVector15Next[ii+5]);
468
                              if(xfracl == 1)
469
                                 tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((workVector15Next[ii+2]+16)>>5))} + 1) >> 1);
470
                              else if(xfracl == 3)
471
                                 tempResult8[ii] = truncate(({1'b0,tempResult8[ii]} + {1'b0,clip1y10to8(truncate((workVector15Next[ii+3]+16)>>5))} + 1) >> 1);
472
                           end
473
                     end
474
                  else
475
                     begin
476
                        if(memRespQ.first() matches tagged IPLoadResp .tempreaddata8)
477
                           begin
478
                              memRespQ.deq();
479
                              Vector#(4,Bit#(8)) readdata8 = replicate(0);
480
                              readdata8[0] = tempreaddata8[7:0];
481
                              readdata8[1] = tempreaddata8[15:8];
482
                              readdata8[2] = tempreaddata8[23:16];
483
                              readdata8[3] = tempreaddata8[31:24];
484
                              for(Integer ii=0; ii<8; ii=ii+1)
485
                                 workVector8Next[ii] = workVector8[ii+4];
486
                              for(Integer ii=0; ii<4; ii=ii+1)
487
                                 begin
488
                                    Bit#(4) tempIndex = fromInteger(ii) + 8 - zeroExtend(offset);
489
                                    workVector8Next[tempIndex] = readdata8[ii];
490
                                 end
491
                              Vector#(4,Bit#(15)) tempResult15 = replicate(0);
492
                              for(Integer ii=0; ii<4; ii=ii+1)
493
                                 begin
494
                                    tempResult15[ii] = interpolate8to15(workVector8Next[ii],workVector8Next[ii+1],workVector8Next[ii+2],workVector8Next[ii+3],workVector8Next[ii+4],workVector8Next[ii+5]);
495
                                    tempResult8[ii] = clip1y10to8(truncate((tempResult15[ii]+16)>>5));
496
                                 end
497
                              Bit#(2) verOffset;
498
                              Vector#(4,Bit#(15)) verResult15 = replicate(0);
499
                              if(xfracl == 1)
500
                                 verOffset = reqdata.offset;
501
                              else
502
                                 verOffset = reqdata.offset+1;
503
                              readdata = workFile.sub({workVerNum[2:0],(workHorNum-2+(verOffset==0?0:1))});
504
                              for(Integer ii=0; ii<4; ii=ii+1)
505
                                 begin
506
                                    Bit#(2) offsetplusii = verOffset+fromInteger(ii);
507
                                    if(verOffset <= 3-fromInteger(ii) && verOffset!=0)
508
                                       verResult15[ii] = workVector15[offsetplusii];
509
                                    else
510
                                       verResult15[ii] = readdata[offsetplusii];
511
                                    workVector15Next[ii] = readdata[ii];
512
                                 end
513
                              for(Integer ii=0; ii<4; ii=ii+1)
514
                                 begin
515
                                    Bit#(9) tempVal = zeroExtend(clip1y10to8(truncate((verResult15[ii]+16)>>5)));
516
                                    tempResult8[ii] = truncate((tempVal+zeroExtend(tempResult8[ii])+1)>>1);
517
                                 end
518
                           end
519
                     end
520
                  if(workHorNum >= 2)
521
                     begin
522
                        Bit#(1) horAddr = truncate(workHorNum-2);
523
                        Bit#(3) verAddr = truncate(workVerNum);
524
                        horAddr = horAddr + ((blockT==IP4x8&&workSubMbPart==1)||(blockT==IP4x4&&workSubMbPart[0]==1) ? 1 : 0);
525
                        verAddr = verAddr + ((blockT==IP8x4&&workSubMbPart==1)||(blockT==IP4x4&&workSubMbPart[1]==1) ? 4 : 0);
526
                        resultFile.upd({verAddr,horAddr},tempResult8);
527
                        if(verAddr[1:0] == 3)
528
                           resultReadyNext[{verAddr[2],horAddr}] = 1;
529
                        //$display( "Trace interpolator: workLuma stage 1 result %h %h %h %h %h %h %h %h", workHorNum, workVerNum, {verAddr,horAddr}, tempResult8[3], tempResult8[2], tempResult8[1], tempResult8[0], pack(resultReadyNext));
530
                     end
531
                  Bit#(2) workHorNumMax = (blockT==IP8x8||blockT==IP8x4 ? 1 : 0) + 2;
532
                  Bit#(4) workVerNumMax = (blockT==IP8x8||blockT==IP4x8 ? 7 : 3);
533
                  if(workHorNum < workHorNumMax)
534
                     workHorNum <= workHorNum+1;
535
                  else
536
                     begin
537
                        workHorNum <= 0;
538
                        if(workVerNum < workVerNumMax)
539
                           workVerNum <= workVerNum+1;
540
                        else
541
                           begin
542
                              workVerNum <= 0;
543
                              workStage <= 0;
544
                              if(((blockT==IP4x8 || blockT==IP8x4) && workSubMbPart==0) || (blockT==IP4x4 && workSubMbPart<3))
545
                                 workSubMbPart <= workSubMbPart+1;
546
                              else
547
                                 begin
548
                                    workSubMbPart <= 0;
549
                                    workDone <= True;
550
                                 end
551
                              reqfifoWork.deq();
552
                           end
553
                     end
554
               end
555
         end
556
      workVector8 <= workVector8Next;
557
      workVector15 <= workVector15Next;
558
      resultReady <= resultReadyNext;
559
      //$display( "Trace interpolator: workLuma %h %h %h %h %h %h", xfracl, yfracl, workHorNum, workVerNum, offset, workStage);
560
   endrule
561
 
562
 
563
   rule workChroma ( reqfifoWork.first() matches tagged IPWChroma .reqdata &&& !workDone );
564
      Bit#(4) xfracc = zeroExtend(reqdata.xFracC);
565
      Bit#(4) yfracc = zeroExtend(reqdata.yFracC);
566
      let offset = reqdata.offset;
567
      let blockT = reqdata.bt;
568
      Vector#(20,Bit#(8)) workVector8Next = workVector8;
569
      Vector#(4,Bit#(1)) resultReadyNext = resultReady;
570
      if(memRespQ.first() matches tagged IPLoadResp .tempreaddata)
571
         begin
572
            memRespQ.deq();
573
            Vector#(4,Bit#(8)) readdata = replicate(0);
574
            readdata[0] = tempreaddata[7:0];
575
            readdata[1] = tempreaddata[15:8];
576
            readdata[2] = tempreaddata[23:16];
577
            readdata[3] = tempreaddata[31:24];
578
            Vector#(5,Bit#(8)) tempWork8 = replicate(0);
579
            Vector#(5,Bit#(8)) tempPrev8 = replicate(0);
580
            Vector#(4,Bit#(8)) tempResult8 = replicate(0);
581
            Bool resultReadyFlag = False;
582
            for(Integer ii=0; ii<4; ii=ii+1)
583
               begin
584
                  Bit#(2) offsetplusii = offset+fromInteger(ii);
585
                  if(offset <= 3-fromInteger(ii) && !((blockT==IP4x8||blockT==IP4x4)&&(offset[1]==0||(xfracc==0&&offset!=3))) && !(xfracc==0&&offset==0))
586
                     tempWork8[ii] = workVector8[offsetplusii];
587
                  else
588
                     tempWork8[ii] = readdata[offsetplusii];
589
                  workVector8Next[ii] = readdata[ii];
590
               end
591
            tempWork8[4] = readdata[offset];
592
            if((blockT==IP16x8 || blockT==IP16x16) && workHorNum==(xfracc==0&&offset==0 ? 1 : 2))
593
               begin
594
                  for(Integer ii=0; ii<5; ii=ii+1)
595
                     begin
596
                        tempPrev8[ii] = workVector8[ii+9];
597
                        workVector8Next[ii+9] = tempWork8[ii];
598
                     end
599
               end
600
            else
601
               begin
602
                  for(Integer ii=0; ii<5; ii=ii+1)
603
                     tempPrev8[ii] = workVector8[ii+4];
604
                  if(workHorNum==(xfracc==0&&offset==0 ? 0 : 1) || ((blockT==IP4x8||blockT==IP4x4)&&(offset[1]==0||(xfracc==0&&offset!=3))))
605
                     begin
606
                        for(Integer ii=0; ii<5; ii=ii+1)
607
                           workVector8Next[ii+4] = tempWork8[ii];
608
                     end
609
               end
610
            if(yfracc==0)
611
               begin
612
                  for(Integer ii=0; ii<5; ii=ii+1)
613
                     tempPrev8[ii] = tempWork8[ii];
614
               end
615
            for(Integer ii=0; ii<4; ii=ii+1)
616
               begin
617
                  Bit#(14) tempVal = zeroExtend((8-xfracc))*zeroExtend((8-yfracc))*zeroExtend(tempPrev8[ii]);
618
                  tempVal = tempVal + zeroExtend(xfracc)*zeroExtend((8-yfracc))*zeroExtend(tempPrev8[ii+1]);
619
                  tempVal = tempVal + zeroExtend((8-xfracc))*zeroExtend(yfracc)*zeroExtend(tempWork8[ii]);
620
                  tempVal = tempVal + zeroExtend(xfracc)*zeroExtend(yfracc)*zeroExtend(tempWork8[ii+1]);
621
                  tempResult8[ii] = truncate((tempVal+32)>>6);
622
               end
623
            if(workVerNum > 0 || yfracc==0)
624
               begin
625
                  if(blockT==IP4x8 || blockT==IP4x4)
626
                     begin
627
                        Bit#(5) tempIndex = 10 + zeroExtend(workVerNum<<1);
628
                        workVector8Next[tempIndex] = tempResult8[0];
629
                        workVector8Next[tempIndex+1] = tempResult8[1];
630
                        tempResult8[2] = tempResult8[0];
631
                        tempResult8[3] = tempResult8[1];
632
                        tempResult8[0] = workVector8[tempIndex];
633
                        tempResult8[1] = workVector8[tempIndex+1];
634
                        if((workHorNum>0 || offset[1]==0) && workSubMbPart[0]==1)
635
                           resultReadyFlag = True;
636
                     end
637
                  else
638
                     begin
639
                        if(workHorNum>0 || (xfracc==0 && offset==0))
640
                           resultReadyFlag = True;
641
                     end
642
               end
643
            if(resultReadyFlag)
644
               begin
645
                  Bit#(1) horAddr = ((blockT==IP4x8 || blockT==IP4x4) ? 0 : truncate(((xfracc==0 && offset==0) ? workHorNum : workHorNum-1)));
646
                  Bit#(3) verAddr = truncate((yfracc==0 ? workVerNum : workVerNum-1));
647
                  horAddr = horAddr + ((blockT==IP16x8||blockT==IP16x16) ? 0 : workMbPart[0]);
648
                  verAddr = verAddr + ((blockT==IP8x16||blockT==IP16x16) ? 0 : ((blockT==IP16x8) ? {workMbPart[0],2'b00} : {workMbPart[1],2'b00}));
649
                  verAddr = verAddr + ((blockT==IP8x4&&workSubMbPart==1)||(blockT==IP4x4&&workSubMbPart[1]==1) ? 2 : 0);
650
                  resultFile.upd({verAddr,horAddr},tempResult8);
651
                  if(verAddr[1:0] == 3)
652
                     resultReadyNext[{verAddr[2],horAddr}] = 1;
653
               end
654
            Bit#(2) workHorNumMax = (blockT==IP4x8||blockT==IP4x4 ? (offset[1]==0||(xfracc==0&&offset!=3) ? 0 : 1) : ((blockT==IP16x16||blockT==IP16x8 ? 1 : 0) + (xfracc==0&&offset==0 ? 0 : 1)));
655
            Bit#(4) workVerNumMax = (blockT==IP16x16||blockT==IP8x16 ? 7 : (blockT==IP16x8||blockT==IP8x8||blockT==IP4x8 ? 3 : 1)) + (yfracc==0 ? 0 : 1);
656
            if(workHorNum < workHorNumMax)
657
               workHorNum <= workHorNum+1;
658
            else
659
               begin
660
                  workHorNum <= 0;
661
                  if(workVerNum < workVerNumMax)
662
                     workVerNum <= workVerNum+1;
663
                  else
664
                     begin
665
                        workVerNum <= 0;
666
                        if(((blockT==IP4x8 || blockT==IP8x4) && workSubMbPart==0) || (blockT==IP4x4 && workSubMbPart<3))
667
                           workSubMbPart <= workSubMbPart+1;
668
                        else
669
                           begin
670
                              workSubMbPart <= 0;
671
                              if(((blockT==IP16x8 || blockT==IP8x16) && workMbPart==0) || (!(blockT==IP16x8 || blockT==IP8x16 || blockT==IP16x16) && workMbPart<3))
672
                                 workMbPart <= workMbPart+1;
673
                              else
674
                                 begin
675
                                    workMbPart <= 0;
676
                                    workDone <= True;
677
                                 end
678
                           end
679
                        reqfifoWork.deq();
680
                     end
681
               end
682
         end
683
      workVector8 <= workVector8Next;
684
      resultReady <= resultReadyNext;
685
      //$display( "Trace interpolator: workChroma %h %h %h %h %h", xfracc, yfracc, workHorNum, workVerNum, offset);
686
   endrule
687
 
688
 
689
   rule outputing( !outDone && resultReady[outBlockNum]==1 );
690
      outfifo.enq(resultFile.sub({outBlockNum[1],outPixelNum,outBlockNum[0]}));
691
      outPixelNum <= outPixelNum+1;
692
      if(outPixelNum == 3)
693
         begin
694
            outBlockNum <= outBlockNum+1;
695
            if(outBlockNum == 3)
696
               outDone <= True;
697
         end
698
      //$display( "Trace interpolator: outputing %h %h %h %h %h %h", outBlockNum, outPixelNum, tempVector[3], tempVector[2], tempVector[1], tempVector[0]);
699
   endrule
700
 
701
 
702
   rule switching( outDone && workDone );
703
      outDone <= False;
704
      workDone <= False;
705
      resultReady <= replicate(0);
706
      //$display( "Trace interpolator: switching %h %h", outBlockNum, outPixelNum);
707
   endrule
708
 
709
 
710
   method Action   setPicWidth( Bit#(PicWidthSz) newPicWidth );
711
      picWidth <= newPicWidth;
712
   endmethod
713
 
714
   method Action   setPicHeight( Bit#(PicHeightSz) newPicHeight );
715
      picHeight <= newPicHeight;
716
   endmethod
717
 
718
   method Action request( InterpolatorIT inputdata );
719
      reqfifoLoad.enq(inputdata);
720
      if(inputdata matches tagged IPLuma .indata)
721
         reqfifoWork.enq(IPWLuma {xFracL:indata.mvhor[1:0],yFracL:indata.mvver[1:0],offset:indata.mvhor[3:2],bt:indata.bt});
722
      else if(inputdata matches tagged IPChroma .indata)
723
         reqfifoWork.enq(IPWChroma {xFracC:indata.mvhor[2:0],yFracC:indata.mvver[2:0],offset:indata.mvhor[4:3]+{indata.hor[0],1'b0},bt:indata.bt});
724
   endmethod
725
 
726
   method Vector#(4,Bit#(8)) first();
727
      return outfifo.first();
728
   endmethod
729
 
730
   method Action deq();
731
      outfifo.deq();
732
   endmethod
733
 
734
   method Action endOfFrame();
735
      endOfFrameFlag <= True;
736
   endmethod
737
 
738
   interface Client mem_client;
739
      interface Get request  = fifoToGet(memReqQ);
740
      interface Put response = fifoToPut(memRespQ);
741
   endinterface
742
 
743
 
744
endmodule
745
 
746
 
747
endpackage

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.