URL
https://opencores.org/ocsvn/video_stream_scaler/video_stream_scaler/trunk
Subversion Repositories video_stream_scaler
[/] [video_stream_scaler/] [trunk/] [rtl/] [verilog/] [scaler.v] - Rev 2
Compare with Previous | Blame | View Log
/*----------------------------------------------------------------------------- Video Stream Scaler Author: David Kronstein Copyright 2011, David Kronstein, and individual contributors as indicated by the @authors tag. This is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This software is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this software; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA, or see the FSF site: http://www.fsf.org. ------------------------------------------------------------------------------- Scales streaming video up or down in resolution. Bilinear and nearest neighbor modes are supported. Run-time adjustment of input and output resolution, scaling factors, and scale type. ------------------------------------------------------------------------------- Revisions V1.0.0 Feb 21 2011 Initial Release David Kronstein Known bugs: Very slight numerical errors (+0/-2 LSb) in output data due to coefficient arithmetic. Impossible to notice without adjustment in video levels. Attempted to fix by setting coeff11 to 1.0 - other coefficients, but this caused timing issues. */ `default_nettype none module streamScaler #( //---------------------------Parameters---------------------------------------- parameter DATA_WIDTH = 8, //Width of input/output data parameter CHANNELS = 1, //Number of channels of DATA_WIDTH, for color images parameter DISCARD_CNT_WIDTH = 8, //Width of inputDiscardCnt parameter INPUT_X_RES_WIDTH = 11, //Widths of input/output resolution control signals parameter INPUT_Y_RES_WIDTH = 11, parameter OUTPUT_X_RES_WIDTH = 11, parameter OUTPUT_Y_RES_WIDTH = 11, parameter FRACTION_BITS = 8, //Number of bits for fractional component of coefficients. parameter SCALE_INT_BITS = 4, //Width of integer component of scaling factor. The maximum input data width to //multipliers created will be SCALE_INT_BITS + SCALE_FRAC_BITS. Typically these //values will sum to 18 to match multipliers available in FPGAs. parameter SCALE_FRAC_BITS = 14, //Width of fractional component of scaling factor parameter BUFFER_SIZE = 4, //Depth of RFIFO //---------------------Non-user-definable parameters---------------------------- parameter COEFF_WIDTH = FRACTION_BITS + 1, parameter SCALE_BITS = SCALE_INT_BITS + SCALE_FRAC_BITS, parameter BUFFER_SIZE_WIDTH = ((BUFFER_SIZE+1) <= 2) ? 1 : //wide enough to hold value BUFFER_SIZE + 1 ((BUFFER_SIZE+1) <= 4) ? 2 : ((BUFFER_SIZE+1) <= 8) ? 3 : ((BUFFER_SIZE+1) <= 16) ? 4 : ((BUFFER_SIZE+1) <= 32) ? 5 : ((BUFFER_SIZE+1) <= 64) ? 6 : 7 )( //---------------------------Module IO----------------------------------------- //Clock and reset input wire clk, input wire rst, //User interface //Input input wire [DATA_WIDTH*CHANNELS-1:0]dIn, input wire dInValid, output wire nextDin, input wire start, //Output output reg [DATA_WIDTH*CHANNELS-1:0] dOut, output reg dOutValid, //latency of 4 clock cycles after nextDout is asserted input wire nextDout, //Control input wire [DISCARD_CNT_WIDTH-1:0] inputDiscardCnt, //Number of input pixels to discard before processing data. Used for clipping input wire [INPUT_X_RES_WIDTH-1:0] inputXRes, //Resolution of input data minus 1 input wire [INPUT_Y_RES_WIDTH-1:0] inputYRes, input wire [OUTPUT_X_RES_WIDTH-1:0] outputXRes, //Resolution of output data minus 1 input wire [OUTPUT_Y_RES_WIDTH-1:0] outputYRes, input wire [SCALE_BITS-1:0] xScale, //Scaling factors. Input resolution scaled up by 1/xScale. Format Q SCALE_INT_BITS.SCALE_FRAC_BITS input wire [SCALE_BITS-1:0] yScale, //Scaling factors. Input resolution scaled up by 1/yScale. Format Q SCALE_INT_BITS.SCALE_FRAC_BITS input wire [OUTPUT_X_RES_WIDTH-1+SCALE_FRAC_BITS:0] leftOffset, //Integer/fraction of input pixel to offset output data horizontally right. Format Q OUTPUT_X_RES_WIDTH.SCALE_FRAC_BITS input wire [SCALE_FRAC_BITS-1:0] topFracOffset, //Fraction of input pixel to offset data vertically down. Format Q0.SCALE_FRAC_BITS input wire nearestNeighbor //Use nearest neighbor resize instead of bilinear ); //-----------------------Internal signals and registers------------------------ reg advanceRead1; reg advanceRead2; wire [DATA_WIDTH*CHANNELS-1:0] readData00; wire [DATA_WIDTH*CHANNELS-1:0] readData01; wire [DATA_WIDTH*CHANNELS-1:0] readData10; wire [DATA_WIDTH*CHANNELS-1:0] readData11; reg [DATA_WIDTH*CHANNELS-1:0] readData00Reg; reg [DATA_WIDTH*CHANNELS-1:0] readData01Reg; reg [DATA_WIDTH*CHANNELS-1:0] readData10Reg; reg [DATA_WIDTH*CHANNELS-1:0] readData11Reg; wire [INPUT_X_RES_WIDTH-1:0] readAddress; reg readyForRead; //Indicates two full lines have been put into the buffer reg [OUTPUT_Y_RES_WIDTH-1:0] outputLine; //which output video line we're on reg [OUTPUT_X_RES_WIDTH-1:0] outputColumn; //which output video column we're on reg [INPUT_X_RES_WIDTH-1+SCALE_FRAC_BITS:0] xScaleAmount; //Fractional and integer components of input pixel select (multiply result) reg [INPUT_Y_RES_WIDTH-1+SCALE_FRAC_BITS:0] yScaleAmount; //Fractional and integer components of input pixel select (multiply result) reg [INPUT_Y_RES_WIDTH-1+SCALE_FRAC_BITS:0] yScaleAmountNext; //Fractional and integer components of input pixel select (multiply result) wire [BUFFER_SIZE_WIDTH-1:0] fillCount; //Numbers used rams in the ram fifo reg lineSwitchOutputDisable; //On the end of an output line, disable the output for one cycle to let the RAM data become valid reg dOutValidInt; reg [COEFF_WIDTH-1:0] xBlend; wire [COEFF_WIDTH-1:0] yBlend = {1'b0, yScaleAmount[SCALE_FRAC_BITS-1:SCALE_FRAC_BITS-FRACTION_BITS]}; wire [INPUT_X_RES_WIDTH-1:0] xPixLow = xScaleAmount[INPUT_X_RES_WIDTH-1+SCALE_FRAC_BITS:SCALE_FRAC_BITS]; wire [INPUT_Y_RES_WIDTH-1:0] yPixLow = yScaleAmount[INPUT_Y_RES_WIDTH-1+SCALE_FRAC_BITS:SCALE_FRAC_BITS]; wire [INPUT_Y_RES_WIDTH-1:0] yPixLowNext = yScaleAmountNext[INPUT_Y_RES_WIDTH-1+SCALE_FRAC_BITS:SCALE_FRAC_BITS]; wire allDataWritten; //Indicates that all data from input has been read in reg readState; //States for read state machine parameter RS_START = 0; parameter RS_READ_LINE = 1; //Read state machine //Controls the RFIFO(ram FIFO) readout and generates output data valid signals always @ (posedge clk or posedge rst or posedge start) begin if(rst | start) begin outputLine <= 0; outputColumn <= 0; xScaleAmount <= 0; yScaleAmount <= 0; readState <= RS_START; dOutValidInt <= 0; lineSwitchOutputDisable <= 0; advanceRead1 <= 0; advanceRead2 <= 0; yScaleAmountNext <= 0; end else begin case (readState) RS_START: begin xScaleAmount <= leftOffset; yScaleAmount <= {{INPUT_Y_RES_WIDTH{1'b0}}, topFracOffset}; if(readyForRead) begin readState <= RS_READ_LINE; dOutValidInt <= 1; end end RS_READ_LINE: begin //outputLine goes through all output lines, and the logic determines which input lines to read into the RRB and which ones to discard. if(nextDout && dOutValidInt) begin if(outputColumn == outputXRes) begin //On the last input pixel of the line if(yPixLowNext == (yPixLow + 1)) //If the next input line is only one greater, advance the RRB by one only begin advanceRead1 <= 1; if(fillCount < 3) //If the RRB doesn't have enough data, stop reading it out dOutValidInt <= 0; end else if(yPixLowNext > (yPixLow + 1)) //If the next input line is two or more greater, advance the read by two begin advanceRead2 <= 1; if(fillCount < 4) //If the RRB doesn't have enough data, stop reading it out dOutValidInt <= 0; end outputColumn <= 0; xScaleAmount <= leftOffset; outputLine <= outputLine + 1; yScaleAmount <= yScaleAmountNext; lineSwitchOutputDisable <= 1; end else begin //Advance the output pixel selection values except when waiting for the ram data to become valid if(lineSwitchOutputDisable == 0) begin outputColumn <= outputColumn + 1; xScaleAmount <= (outputColumn + 1) * xScale + leftOffset; end advanceRead1 <= 0; advanceRead2 <= 0; lineSwitchOutputDisable <= 0; end end else //else from if(nextDout && dOutValidInt) begin advanceRead1 <= 0; advanceRead2 <= 0; lineSwitchOutputDisable <= 0; end //Once the RRB has enough data, let data be read from it. If all input data has been written, always allow read if(fillCount >= 2 && dOutValidInt == 0 || allDataWritten) begin if((!advanceRead1 && !advanceRead2)) begin dOutValidInt <= 1; lineSwitchOutputDisable <= 0; end end end//state RS_READ_LINE: endcase //yScaleAmountNext is used to determine which input lines are valid. yScaleAmountNext <= (outputLine + 1) * yScale + {{OUTPUT_Y_RES_WIDTH{1'b0}}, topFracOffset}; end end assign readAddress = xPixLow; //Generate dOutValid signal, delayed to account for delays in data path reg dOutValid_1; reg dOutValid_2; reg dOutValid_3; always @(posedge clk or posedge rst) begin if(rst) begin dOutValid_1 <= 0; dOutValid_2 <= 0; dOutValid_3 <= 0; dOutValid <= 0; end else begin dOutValid_1 <= nextDout && dOutValidInt && !lineSwitchOutputDisable; dOutValid_2 <= dOutValid_1; dOutValid_3 <= dOutValid_2; dOutValid <= dOutValid_3; end end //-----------------------Output data generation----------------------------- //Scale amount values are used to generate coefficients for the four pixels coming out of the RRB to be multiplied with. //Coefficients for each of the four pixels //Format Q1.FRACTION_BITS // yx reg [COEFF_WIDTH-1:0] coeff00; //Top left reg [COEFF_WIDTH-1:0] coeff01; //Top right reg [COEFF_WIDTH-1:0] coeff10; //Bottom left reg [COEFF_WIDTH-1:0] coeff11; //Bottom right //Coefficient value of one, format Q1.COEFF_WIDTH-1 wire [COEFF_WIDTH-1:0] coeffOne = {1'b1, {(COEFF_WIDTH-1){1'b0}}}; //One in MSb, zeros elsewhere //Coefficient value of one half, format Q1.COEFF_WIDTH-1 wire [COEFF_WIDTH-1:0] coeffHalf = {2'b01, {(COEFF_WIDTH-2){1'b0}}}; //Compute bilinear interpolation coefficinets. Done here because these pre-registerd values are used twice. //Adding coeffHalf to get the nearest value. wire [COEFF_WIDTH-1:0] preCoeff00 = (((coeffOne - xBlend) * (coeffOne - yBlend) + (coeffHalf - 1)) >> FRACTION_BITS) & {{COEFF_WIDTH{1'b0}}, {COEFF_WIDTH{1'b1}}}; wire [COEFF_WIDTH-1:0] preCoeff01 = ((xBlend * (coeffOne - yBlend) + (coeffHalf - 1)) >> FRACTION_BITS) & {{COEFF_WIDTH{1'b0}}, {COEFF_WIDTH{1'b1}}}; wire [COEFF_WIDTH-1:0] preCoeff10 = (((coeffOne - xBlend) * yBlend + (coeffHalf - 1)) >> FRACTION_BITS) & {{COEFF_WIDTH{1'b0}}, {COEFF_WIDTH{1'b1}}}; //Compute the coefficients always @(posedge clk or posedge rst) begin if(rst) begin coeff00 <= 0; coeff01 <= 0; coeff10 <= 0; coeff11 <= 0; xBlend <= 0; end else begin xBlend <= {1'b0, xScaleAmount[SCALE_FRAC_BITS-1:SCALE_FRAC_BITS-FRACTION_BITS]}; //Changed to registered to improve timing if(nearestNeighbor == 1'b0) begin //Normal bilinear interpolation coeff00 <= preCoeff00; coeff01 <= preCoeff01; coeff10 <= preCoeff10; coeff11 <= ((xBlend * yBlend + (coeffHalf - 1)) >> FRACTION_BITS) & {{COEFF_WIDTH{1'b0}}, {COEFF_WIDTH{1'b1}}}; //coeff11 <= coeffOne - preCoeff00 - preCoeff01 - preCoeff10; //Guarantee that all coefficients sum to coeffOne. Saves a multiply too. Reverted to previous method due to timing issues. end else begin //Nearest neighbor interploation, set one coefficient to 1.0, the rest to zero based on the fractions coeff00 <= xBlend < coeffHalf && yBlend < coeffHalf ? coeffOne : {COEFF_WIDTH{1'b0}}; coeff01 <= xBlend >= coeffHalf && yBlend < coeffHalf ? coeffOne : {COEFF_WIDTH{1'b0}}; coeff10 <= xBlend < coeffHalf && yBlend >= coeffHalf ? coeffOne : {COEFF_WIDTH{1'b0}}; coeff11 <= xBlend >= coeffHalf && yBlend >= coeffHalf ? coeffOne : {COEFF_WIDTH{1'b0}}; end end end //Generate the blending multipliers reg [(DATA_WIDTH+COEFF_WIDTH)*CHANNELS-1:0] product00, product01, product10, product11; generate genvar channel; for(channel = 0; channel < CHANNELS; channel = channel + 1) begin : blend_mult_generate always @(posedge clk or posedge rst) begin if(rst) begin //productxx[channel] <= 0; product00[ (DATA_WIDTH+COEFF_WIDTH)*(channel+1)-1 : (DATA_WIDTH+COEFF_WIDTH)*channel] <= 0; product01[ (DATA_WIDTH+COEFF_WIDTH)*(channel+1)-1 : (DATA_WIDTH+COEFF_WIDTH)*channel] <= 0; product10[ (DATA_WIDTH+COEFF_WIDTH)*(channel+1)-1 : (DATA_WIDTH+COEFF_WIDTH)*channel] <= 0; product11[ (DATA_WIDTH+COEFF_WIDTH)*(channel+1)-1 : (DATA_WIDTH+COEFF_WIDTH)*channel] <= 0; //readDataxxReg[channel] <= 0; readData00Reg[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ] <= 0; readData01Reg[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ] <= 0; readData10Reg[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ] <= 0; readData11Reg[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ] <= 0; //dOut[channel] <= 0; dOut[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ] <= 0; end else begin //readDataxxReg[channel] <= readDataxx[channel]; readData00Reg[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ] <= readData00[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ]; readData01Reg[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ] <= readData01[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ]; readData10Reg[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ] <= readData10[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ]; readData11Reg[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ] <= readData11[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ]; //productxx[channel] <= readDataxxReg[channel] * coeffxx product00[ (DATA_WIDTH+COEFF_WIDTH)*(channel+1)-1 : (DATA_WIDTH+COEFF_WIDTH)*channel] <= readData00Reg[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ] * coeff00; product01[ (DATA_WIDTH+COEFF_WIDTH)*(channel+1)-1 : (DATA_WIDTH+COEFF_WIDTH)*channel] <= readData01Reg[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ] * coeff01; product10[ (DATA_WIDTH+COEFF_WIDTH)*(channel+1)-1 : (DATA_WIDTH+COEFF_WIDTH)*channel] <= readData10Reg[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ] * coeff10; product11[ (DATA_WIDTH+COEFF_WIDTH)*(channel+1)-1 : (DATA_WIDTH+COEFF_WIDTH)*channel] <= readData11Reg[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ] * coeff11; //dOut[channel] <= (((product00[channel]) + // (product01[channel]) + // (product10[channel]) + // (product11[channel])) >> FRACTION_BITS) & ({ {COEFF_WIDTH{1'b0}}, {DATA_WIDTH{1'b1}} }); dOut[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ] <= (((product00[ (DATA_WIDTH+COEFF_WIDTH)*(channel+1)-1 : (DATA_WIDTH+COEFF_WIDTH)*channel]) + (product01[ (DATA_WIDTH+COEFF_WIDTH)*(channel+1)-1 : (DATA_WIDTH+COEFF_WIDTH)*channel]) + (product10[ (DATA_WIDTH+COEFF_WIDTH)*(channel+1)-1 : (DATA_WIDTH+COEFF_WIDTH)*channel]) + (product11[ (DATA_WIDTH+COEFF_WIDTH)*(channel+1)-1 : (DATA_WIDTH+COEFF_WIDTH)*channel])) >> FRACTION_BITS) & ({ {COEFF_WIDTH{1'b0}}, {DATA_WIDTH{1'b1}} }); end end end endgenerate //---------------------------Data write logic---------------------------------- //Places input data into the correct ram in the RFIFO (ram FIFO) //Controls writing to the RFIFO, and discards lines that arn't used reg [INPUT_Y_RES_WIDTH-1:0] writeNextValidLine; //Which line greater than writeRowCount is the next one that must be read in reg [INPUT_Y_RES_WIDTH-1:0] writeNextPlusOne; //One greater than writeNextValidLine, because we must always read in two adjacent lines reg [INPUT_Y_RES_WIDTH-1:0] writeRowCount; //Which line we're reading from dIn reg [OUTPUT_Y_RES_WIDTH-1:0] writeOutputLine; //The output line that corresponds to the input line. This is incremented until writeNextValidLine is greater than writeRowCount reg getNextPlusOne; //Flag so that writeNextPlusOne is captured only once after writeRowCount >= writeNextValidLine. This is in case multiple cycles are requred until writeNextValidLine changes. //Determine which lines to read out and which to discard. //writeNextValidLine is the next valid line number that needs to be read out above current value writeRowCount //writeNextPlusOne also needs to be read out (to do interpolation), this may or may not be equal to writeNextValidLine always @(posedge clk or posedge rst or posedge start) begin if(rst | start) begin writeOutputLine <= 0; writeNextValidLine <= 0; writeNextPlusOne <= 1; getNextPlusOne <= 1; end else begin if(writeRowCount >= writeNextValidLine) //When writeRowCount becomes higher than the next valid line to read out, comptue the next valid line. begin if(getNextPlusOne) //Keep writeNextPlusOne begin writeNextPlusOne <= writeNextValidLine + 1; end getNextPlusOne <= 0; writeOutputLine <= writeOutputLine + 1; writeNextValidLine <= ((writeOutputLine*yScale + {{(OUTPUT_Y_RES_WIDTH + SCALE_INT_BITS){1'b0}}, topFracOffset}) >> SCALE_FRAC_BITS) & {{SCALE_BITS{1'b0}}, {OUTPUT_Y_RES_WIDTH{1'b1}}}; end else begin getNextPlusOne <= 1; end end end reg discardInput; reg [DISCARD_CNT_WIDTH-1:0] discardCountReg; wire advanceWrite; reg [1:0] writeState; reg [INPUT_X_RES_WIDTH-1:0] writeColCount; reg enableNextDin; reg forceRead; //Write state machine //Controls writing scaler input data into the RRB parameter WS_START = 0; parameter WS_DISCARD = 1; parameter WS_READ = 2; parameter WS_DONE = 3; //Control write and address signals to write data into ram FIFO always @ (posedge clk or posedge rst or posedge start) begin if(rst | start) begin writeState <= WS_START; enableNextDin <= 0; discardInput <= 0; readyForRead <= 0; writeRowCount <= 0; writeColCount <= 0; discardCountReg <= 0; forceRead <= 0; end else begin case (writeState) WS_START: begin discardCountReg <= inputDiscardCnt; if(inputDiscardCnt > 0) begin discardInput <= 1; enableNextDin <= 1; writeState <= WS_DISCARD; end else begin discardInput <= 0; enableNextDin <= 1; writeState <= WS_READ; end discardInput <= (inputDiscardCnt > 0) ? 1'b1 : 1'b0; end WS_DISCARD: //Discard pixels from input data begin if(dInValid) begin discardCountReg <= discardCountReg - 1; if((discardCountReg - 1) == 0) begin discardInput <= 0; writeState <= WS_READ; end end end WS_READ: begin if(dInValid & nextDin) begin if(writeColCount == inputXRes) begin //Occurs on the last pixel in the line if((writeNextValidLine == writeRowCount + 1) || (writeNextPlusOne == writeRowCount + 1)) begin //Next line is valid, write into buffer discardInput <= 0; end else begin //Next line is not valid, discard discardInput <= 1; end //Once writeRowCount is >= 2, data is ready to start being output. if(writeRowCount[1]) readyForRead <= 1; if(writeRowCount == inputYRes) //When all data has been read in, stop reading. begin writeState <= WS_DONE; enableNextDin <= 0; forceRead <= 1; end writeColCount <= 0; writeRowCount <= writeRowCount + 1; end else begin writeColCount <= writeColCount + 1; end end end WS_DONE: begin //do nothing, wait for reset end endcase end end //Advance write whenever we have just written a valid line (discardInput == 0) //Generate this signal one earlier than discardInput above that uses the same conditions, to advance the buffer at the right time. assign advanceWrite = (writeColCount == inputXRes) & (discardInput == 0) & dInValid & nextDin; assign allDataWritten = writeState == WS_DONE; assign nextDin = (fillCount < BUFFER_SIZE) & enableNextDin; ramFifo #( .DATA_WIDTH( DATA_WIDTH*CHANNELS ), .ADDRESS_WIDTH( INPUT_X_RES_WIDTH ), //Controls width of RAMs .BUFFER_SIZE( BUFFER_SIZE ) //Number of RAMs ) ramRB ( .clk( clk ), .rst( rst | start ), .advanceRead1( advanceRead1 ), .advanceRead2( advanceRead2 ), .advanceWrite( advanceWrite ), .forceRead( forceRead ), .writeData( dIn ), .writeAddress( writeColCount ), .writeEnable( dInValid & nextDin & enableNextDin & ~discardInput ), .fillCount( fillCount ), .readData00( readData00 ), .readData01( readData01 ), .readData10( readData10 ), .readData11( readData11 ), .readAddress( readAddress ) ); endmodule //scaler //---------------------------Ram FIFO (RFIFO)----------------------------- //FIFO buffer with rams as the elements, instead of data //One ram is filled, while two others are simultaneously read out. //Four neighboring pixels are read out at once, at the selected RAM and one line down, and at readAddress and readAddress + 1 module ramFifo #( parameter DATA_WIDTH = 8, parameter ADDRESS_WIDTH = 8, parameter BUFFER_SIZE = 2, parameter BUFFER_SIZE_WIDTH = ((BUFFER_SIZE+1) <= 2) ? 1 : //wide enough to hold value BUFFER_SIZE + 1 ((BUFFER_SIZE+1) <= 4) ? 2 : ((BUFFER_SIZE+1) <= 8) ? 3 : ((BUFFER_SIZE+1) <= 16) ? 4 : ((BUFFER_SIZE+1) <= 32) ? 5 : ((BUFFER_SIZE+1) <= 64) ? 6 : 7 )( input wire clk, input wire rst, input wire advanceRead1, //Advance selected read RAM by one input wire advanceRead2, //Advance selected read RAM by two input wire advanceWrite, //Advance selected write RAM by one input wire forceRead, //Disables writing to allow all data to be read out (RAM being written to cannot be read from normally) input wire [DATA_WIDTH-1:0] writeData, input wire [ADDRESS_WIDTH-1:0] writeAddress, input wire writeEnable, output reg [BUFFER_SIZE_WIDTH-1:0] fillCount, // yx output wire [DATA_WIDTH-1:0] readData00, //Read from deepest RAM (earliest data), at readAddress output wire [DATA_WIDTH-1:0] readData01, //Read from deepest RAM (earliest data), at readAddress + 1 output wire [DATA_WIDTH-1:0] readData10, //Read from second deepest RAM (second earliest data), at readAddress output wire [DATA_WIDTH-1:0] readData11, //Read from second deepest RAM (second earliest data), at readAddress + 1 input wire [ADDRESS_WIDTH-1:0] readAddress ); reg [BUFFER_SIZE-1:0] writeSelect; reg [BUFFER_SIZE-1:0] readSelect; //Read select ring register always @(posedge clk or posedge rst) begin if(rst) readSelect <= 1; else begin if(advanceRead1) begin readSelect <= {readSelect[BUFFER_SIZE-2 : 0], readSelect[BUFFER_SIZE-1]}; end else if(advanceRead2) begin readSelect <= {readSelect[BUFFER_SIZE-3 : 0], readSelect[BUFFER_SIZE-1:BUFFER_SIZE-2]}; end end end //Write select ring register always @(posedge clk or posedge rst) begin if(rst) writeSelect <= 1; else begin if(advanceWrite) begin writeSelect <= {writeSelect[BUFFER_SIZE-2 : 0], writeSelect[BUFFER_SIZE-1]}; end end end wire [DATA_WIDTH-1:0] ramDataOutA [2**BUFFER_SIZE-1:0]; wire [DATA_WIDTH-1:0] ramDataOutB [2**BUFFER_SIZE-1:0]; //Generate to instantiate the RAMs generate genvar i; for(i = 0; i < BUFFER_SIZE; i = i + 1) begin : ram_generate ramDualPort #( .DATA_WIDTH( DATA_WIDTH ), .ADDRESS_WIDTH( ADDRESS_WIDTH ) ) ram_inst_i( .clk( clk ), //Port A is written to as well as read from. When writing, this port cannot be read from. //As long as the buffer is large enough, this will not cause any problem. .addrA( ((writeSelect[i] == 1'b1) && !forceRead && writeEnable) ? writeAddress : readAddress ), //&& writeEnable is //to allow the full buffer to be used. After the buffer is filled, write is advanced, so writeSelect //and readSelect are the same. The full buffer isn't written to, so this allows the read to work properly. .dataA( writeData ), .weA( ((writeSelect[i] == 1'b1) && !forceRead) ? writeEnable : 1'b0 ), .qA( ramDataOutA[2**i] ), .addrB( readAddress + 1 ), .dataB( 0 ), .weB( 1'b0 ), .qB( ramDataOutB[2**i] ) ); end endgenerate //Select which ram to read from wire [BUFFER_SIZE-1:0] readSelect0 = readSelect; wire [BUFFER_SIZE-1:0] readSelect1 = (readSelect << 1) | readSelect[BUFFER_SIZE-1]; //Steer the output data to the right ports assign readData00 = ramDataOutA[readSelect0]; assign readData10 = ramDataOutA[readSelect1]; assign readData01 = ramDataOutB[readSelect0]; assign readData11 = ramDataOutB[readSelect1]; //Keep track of fill level always @(posedge clk or posedge rst) begin if(rst) begin fillCount <= 0; end else begin if(advanceWrite) begin if(advanceRead1) fillCount <= fillCount; else if(advanceRead2) fillCount <= fillCount - 1; else fillCount <= fillCount + 1; end else begin if(advanceRead1) fillCount <= fillCount - 1; else if(advanceRead2) fillCount <= fillCount - 2; else fillCount <= fillCount; end end end endmodule //ramFifo //Dual port RAM module ramDualPort #( parameter DATA_WIDTH = 8, parameter ADDRESS_WIDTH = 8 )( input wire [(DATA_WIDTH-1):0] dataA, dataB, input wire [(ADDRESS_WIDTH-1):0] addrA, addrB, input wire weA, weB, clk, output reg [(DATA_WIDTH-1):0] qA, qB ); // Declare the RAM variable reg [DATA_WIDTH-1:0] ram[2**ADDRESS_WIDTH-1:0]; //Port A always @ (posedge clk) begin if (weA) begin ram[addrA] <= dataA; qA <= dataA; end else begin qA <= ram[addrA]; end end //Port B always @ (posedge clk) begin if (weB) begin ram[addrB] <= dataB; qB <= dataB; end else begin qB <= ram[addrB]; end end endmodule //ramDualPort