OpenCores
URL https://opencores.org/ocsvn/theia_gpu/theia_gpu/trunk

Subversion Repositories theia_gpu

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /theia_gpu/trunk
    from Rev 142 to Rev 143
    Reverse comparison

Rev 142 → Rev 143

/rtl/Collaterals/aDefinitions.v
28,9 → 28,10
for simulation perfomance reasons mainly.
*******************************************************************************/
 
`define MAX_CORES 4 //The number of cores, make sure you update MAX_CORE_BITS!
`define MAX_CORE_BITS 2 // 2 ^ MAX_CORE_BITS = MAX_CORES
`define MAX_TMEM_BANKS 4 //The number of memory banks for TMEM
`define MAX_CORES 4 //The number of cores, make sure you update MAX_CORE_BITS!
`define MAX_CORE_BITS 2 // 2 ^ MAX_CORE_BITS = MAX_CORES
`define MAX_TMEM_BANKS 4 //The number of memory banks for TMEM
`define SELECT_ALL_CORES `MAX_CORES'b1111 //XXX: Change for more cores
//---------------------------------------------------------------------------------
//Verilog provides a `default_nettype none compiler directive. When
//this directive is set, implicit data types are disabled, which will make any
367,3 → 368,4
`define SWIZZLE_YXZ 32'd21
 
 
 
/rtl/GPU/TOP/Theia.v
1,5 → 1,24
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
//---------------------------------------------------------------------------
module THEIA
12,21 → 31,13
//when this gets asserted (see documentation)
//Wish Bone Interface
input wire [`WB_WIDTH-1:0] DAT_I, //Input data bus (Wishbone)
//output wire [`WB_WIDTH-1:0] DAT_O, //Output data bus (Wishbone)
input wire ACK_I, //Input ack
output wire ACK_O, //Output ack
//output wire [`WB_WIDTH-1:0] ADR_O, //Output address
input wire [`WB_WIDTH-1:0] ADR_I, //Input address
//output wire WE_O, //Output write enable
input wire WE_I, //Input write enable
//output wire STB_O, //Strobe signal, see wishbone documentation
input wire STB_I, //Strobe signal, see wishbone documentation
//output wire CYC_O, //Bus cycle signal, see wishbone documentation
input wire CYC_I, //Bus cycle signal, see wishbone documentation
//output wire [1:0] TGC_O, //Bus cycle tag, see THEAI documentation
input wire [1:0] TGA_I, //Input address tag, see THEAI documentation
//output wire [1:0] TGA_O, //Output address tag, see THEAI documentation
//input wire [1:0] TGC_I, //Bus cycle tag, see THEAI documentation
input wire [`MAX_CORES-1:0] SEL_I, //The WishBone Master uses this signal to configure a specific core (TBD, not sure is needed)
input wire [`MAX_CORES-1:0] RENDREN_I,
 
40,10 → 51,10
input wire [`MAX_TMEM_BANKS-1:0] TMSEL_I,
//Control Register
input wire [15:0] CREG_I,
output wire GRDY_O,
output wire HDL_O,
input wire STDONE_I,
input wire HDA_I,
input wire GACK_I,
input wire HDLACK_I,
output wire RCOMMIT_O,
output wire DONE_O
 
85,11 → 96,10
wire [1:0] wTGA_I[`MAX_CORES-1:0];
 
 
//wire [`MAX_CORES-1:0] wTMEM_ACK_I;
 
wire [`WB_WIDTH-1:0] wTMEM_Data;
wire [`WB_WIDTH-1:0] wTMEM_Address[`MAX_CORES-1:0];
wire [`WB_WIDTH-1:0] wTMEM_ReadAddr;
//wire [`MAX_CORES-1:0] wTMEM_STB_O;
wire [`MAX_CORES-1:0] wTMEM_Resquest;
wire [`MAX_CORES-1:0] wTMEM_Granted;
 
113,35 → 123,21
wire wTMEM_2_Core__Grant[`MAX_CORES-1:0];
 
wire[`MAX_CORE_BITS-1:0] wCurrentCoreSelected[`MAX_TMEM_BANKS-1:0];
//wire [`WB_WIDTH-1:0] wTMEM_2_Core_Data[`MAX_CORES-1:0]; //Vertical grid Buses going to each core.
wire[7:0] wCoreBankSelect[`MAX_CORES-1:0];
wire [`MAX_CORES-1:0] wGRDY_O;
wire[`WIDTH-1:0] wCoreBankSelect[`MAX_CORES-1:0];
wire [`MAX_CORES-1:0] wHDL_O;
 
 
wire [`MAX_CORES-1:0] wGReady;
wire [`MAX_CORES-1:0] wHostDataLatched;
wire [`MAX_CORES-1:0] wRCOMMIT_O;
wire [`MAX_CORES-1:0] wRCommited;
 
 
assign RCOMMIT_O = wRCommited[0] & wRCommited[1] & wRCommited[2] & wRCommited[3];
assign GRDY_O = wGReady[0] & wGReady[1] & wGReady[2] & wGReady[3];
//----------------------------------------------------------------
//The next secuencial logic just AND all the wDone signals
//I know that it would be much more elgant to just do parallel:
//assign DONE_O = wDone[0] & wDone[1] & ... & wDone[MAX_CORES-1];
//However, I don't know how to achieve this with 'generate' statements
//So coding a simple loop instead
assign RCOMMIT_O = wRCommited[0] & wRCommited[1] & wRCommited[2] & wRCommited[3];
assign HDL_O = wHostDataLatched[0] & wHostDataLatched[1] & wHostDataLatched[2] & wHostDataLatched[3];
assign DONE_O = wDone[0] & wDone[1] & wDone[2] & wDone[3];
 
 
 
/*
always @ (posedge CLK_I)
begin : AND_DONE_SIGNALS
integer k;
DONE_O = wDone[0];
for (k=0;k<=`MAX_CORES;k=k+1)
DONE_O=DONE_O & wDone[k+1];
end
*/
assign DONE_O = wDone[0] & wDone[1] & wDone[2] & wDone[3]; //Replace this by a counter??
//----------------------------------------------------------------
 
Module_BusArbitrer ARB1
155,11 → 151,7
);
//----------------------------------------------------------------
 
// assign DAT_O = wDAT_O[ wBusSelect ];
// assign TGA_O = wTGA_O[ wBusSelect ];
// assign ADR_O = wADR_O[ wBusSelect ];
// assign STB_O = wSTB_O[ wBusSelect ];
// assign WE_O = wWE_O[ wBusSelect ];
assign ACK_O = wACK_O[ wBusSelect];
 
wire [`WB_WIDTH-1:0] wDataOut[`MAX_CORES-1:0];
192,14 → 184,9
.CREG_I( CREG_I ),
//Master Signals
//.WE_O ( wWE_O[i] ),
//.STB_O( wSTB_O[i] ),
.ACK_O( wACK_O[i] ),
// .DAT_O( wDAT_O[i] ),
//.ADR_O( wADR_O[i] ),
.CYC_O( wBusRequest[i] ),
.GNT_I( wBusGranted[i] ),
//.TGA_O( wTGA_O[i] ),
`ifdef DEBUG
.iDebug_CoreID( i ),
`endif
208,18 → 195,18
.OMEM_ADR_O( wOMEM_Address[i] ),
.OMEM_DAT_O( wOMEM_Dat[i] ),
.TMEM_DAT_I( wCrossBarDataCollumn[i] ),
.TMEM_ADR_O( wTMemReadAdr[i] ),
.TMEM_CYC_O( wCORE_2_TMEM__Req[i] ),
.TMEM_GNT_I( wTMEM_2_Core__Grant[i] ),
.GRDY_O( wGRDY_O[i] ),
.HDA_I( HDA_I ), //Host data available
.HDL_O( wHDL_O[i] ), //Host data Latched
.HDLACK_I( ~HDL_O ), //Host data Latched ACK
.STDONE_I( STDONE_I ),
.RCOMMIT_O( wRCOMMIT_O[i] ),
.HDA_I( HDA_I ),
//Other
.DAT_I( DAT_I ),
.DONE_O( wDone[i] )
229,7 → 216,7
UPCOUNTER_POSEDGE # (1) UP_RCOMMIT
(
.Clock( CLK_I ),
.Reset( RST_I | GACK_I ),
.Reset( RST_I | HDLACK_I ),
.Initial( 1'b0 ),
.Enable( wRCOMMIT_O[i] ),
.Q(wRCommited[i])
238,13 → 225,13
UPCOUNTER_POSEDGE # (1) UP_GREADY
(
.Clock( CLK_I ),
.Reset( RST_I | GACK_I ),
.Reset( RST_I | HDLACK_I ),
.Initial( 1'b0 ),
.Enable( wGRDY_O[i] ),
.Q(wGReady[i])
.Enable( wHDL_O[i] ),
.Q(wHostDataLatched[i])
);
 
RAM_SINGLE_READ_PORT # ( `WB_WIDTH, `WB_WIDTH, 500000 ) OMEM //10k mem
RAM_SINGLE_READ_PORT # ( `WB_WIDTH, `WB_WIDTH, 250000 ) OMEM //500000 ) OMEM
(
.Clock( CLK_I ),
.iWriteEnable( wOMem_WE[i] ),
264,7 → 251,7
//Each slot has MAX_TMEM_BANKS bits. Only 1 bit can
//be 1 at any given point in time. All bits zero means,
//we are not requesting to read from any memory bank.
SELECT_1_TO_N # ( 8, 4 ) READDRQ
SELECT_1_TO_N # ( `WIDTH, `MAX_CORES ) READDRQ
(
.Sel(wCoreBankSelect[ i]),
.En(wCORE_2_TMEM__Req[i]),
276,7 → 263,7
//virtual adress into physical adress (relative to the bank) like this
//fadr = vadr / n = vadr >> log2(n)
 
assign wCrossBarAdressCollumn[i] = (wTMemReadAdr[i] >> ((`MAX_TMEM_BANKS)/2));
assign wCrossBarAdressCollumn[i] = (wTMemReadAdr[i] >> `MAX_CORE_BITS);
 
//Connect the granted signal to Arbiter of the Bank we want to read from
assign wTMEM_2_Core__Grant[i] = wBankReadGranted[wCoreBankSelect[i]][i];
314,7 → 301,7
(
.Clock( CLK_I ),
.Reset( RST_I ),
.iRequest( {wBankReadRequest[3][Bank],wBankReadRequest[2][Bank],wBankReadRequest[1][Bank],wBankReadRequest[0][Bank]}),//wBankReadRequest[Bank] ), //The cores requesting to read from this Bank
.iRequest( {wBankReadRequest[3][Bank],wBankReadRequest[2][Bank],wBankReadRequest[1][Bank],wBankReadRequest[0][Bank]}),
.oGrant( wBankReadGrantedDelay[Bank] ), //The bit of the core granted to read from this Bank
.oBusSelect( wCurrentCoreSelected[Bank] ) //The index of the core granted to read from this Bank
/rtl/GPU/CORES/CONTROL/Unit_Control.v
81,7 → 81,8
`define CU_DONE 48
`define CU_WAIT_FOR_RENDER_ENABLE 49
`define CU_ACK_TCC 50
`define CU_WAIT_FOR_HOST_DATA_AVAILABLE 51
`define CU_WAIT_FOR_HOST_DATA_AVAILABLE 51
`define CU_WAIT_FOR_HOST_DATA_ACK 52
//--------------------------------------------------------------
module ControlUnit
(
111,6 → 112,7
input wire iRenderEnable,
input wire iSceneTraverseComplete,
input wire iHostDataAvailable,
input wire iHostAckDataRead,
 
`ifdef DEBUG
input wire[`MAX_CORES-1:0] iDebug_CoreID,
156,7 → 158,7
`ifdef DEBUG_CU
always @ ( wHit )
begin
`LOGME "*** Triangle HIT ***\n");
$display( "*** Triangle HIT ***\n");
end
`endif
 
180,7 → 182,7
begin
`ifdef DEBUG_CU
`LOGME"%d CU_AFTER_RESET_STATE\n",$time);
$display("%d CU_AFTER_RESET_STATE\n",$time);
`endif
//oRamBusOwner <= 0;
207,7 → 209,7
begin
//$display("CORE: %d CU_WAIT_FOR_INITIAL_CONFIGURATION", iDebug_CoreID);
// `ifdef DEBUG_CU
// `LOGME"%d Control: CU_WAIT_FOR_INITIAL_CONFIGURATION\n",$time);
// $display("%d Control: CU_WAIT_FOR_INITIAL_CONFIGURATION\n",$time);
// `endif
//oRamBusOwner <= 0;
263,7 → 265,7
begin
//$display("CORE: %d CU_CLEAR_REGISTERS", iDebug_CoreID);
`ifdef DEBUG_CU
`LOGME"%d CU_CLEAR_REGISTERS\n",$time);
$display("%d CU_CLEAR_REGISTERS\n",$time);
`endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
289,7 → 291,7
`CU_WAIT_CLEAR_REGISTERS:
begin
// `ifdef DEBUG_CU
// `LOGME"%d CU_WAIT_CLEAR_REGISTERS\n",$time);
// $display("%d CU_WAIT_CLEAR_REGISTERS\n",$time);
// `endif
//$display("CORE: %d CU_WAIT_CLEAR_REGISTERS", iDebug_CoreID);
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
318,7 → 320,7
begin
`ifdef DEBUG_CU
`LOGME"%d CU_ACK_CLEAR_REGISTERS\n", $time);
$display("%d CU_ACK_CLEAR_REGISTERS\n", $time);
`endif
//$display("CORE: %d CU_ACK_CLEAR_REGISTERS", iDebug_CoreID);
348,7 → 350,7
begin
 
// `ifdef DEBUG_CU
// `LOGME"%d Control: CU_WAIT_FOR_CONFIG_DATA_READ\n",$time);
// $display("%d Control: CU_WAIT_FOR_CONFIG_DATA_READ\n",$time);
// `endif
 
 
380,7 → 382,7
begin
//$display("CORE: %d CU_PRECALCULATE_CONSTANTS", iDebug_CoreID);
`ifdef DEBUG_CU
`LOGME"%d Control: CU_PRECALCULATE_CONSTANTS\n", $time);
$display("%d Control: CU_PRECALCULATE_CONSTANTS\n", $time);
`endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
405,7 → 407,7
`CU_WAIT_FOR_CONSTANT:
begin
// `ifdef DEBUG_CU
// `LOGME"%d Control: CU_WAIT_FOR_CONSTANT\n", $time);
// $display("%d Control: CU_WAIT_FOR_CONSTANT\n", $time);
// `endif
 
 
435,7 → 437,7
begin
//$display("CORE: %d CU_ACK_PRECALCULATE_CONSTANTS", iDebug_CoreID);
`ifdef DEBUG_CU
`LOGME"%d Control: CU_ACK_PRECALCULATE_CONSTANTS\n", $time);
$display("%d Control: CU_ACK_PRECALCULATE_CONSTANTS\n", $time);
`endif
462,7 → 464,7
`CU_TRIGGER_USERCONSTANTS:
begin
`ifdef DEBUG_CU
`LOGME"%d Control: CU_TRIGGER_USERCONSTANTS\n",$time);
$display("%d Control: CU_TRIGGER_USERCONSTANTS\n",$time);
`endif
//$display("CORE: %d CU_TRIGGER_USERCONSTANTS", iDebug_CoreID);
489,7 → 491,7
begin
 
// `ifdef DEBUG_CU
// `LOGME"%d Control: CU_WAIT_FOR_RGU\n",$time);
// $display("%d Control: CU_WAIT_FOR_RGU\n",$time);
// `endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
517,7 → 519,7
begin
`ifdef DEBUG_CU
`LOGME"%d Control: CU_ACK_RGU\n",$time);
$display("%d Control: CU_ACK_RGU\n",$time);
`endif
//$display("CORE: %d CU_ACK_USERCONSTANTS", iDebug_CoreID);
574,7 → 576,7
begin
`ifdef DEBUG_CU
`LOGME"CORE: %d CU_TRIGGER_RGU", iDebug_CoreID);
$display("CORE: %d CU_TRIGGER_RGU", iDebug_CoreID);
`endif
 
600,7 → 602,7
begin
 
// `ifdef DEBUG_CU
// `LOGME"%d Control: CU_WAIT_FOR_RGU\n",$time);
// $display("%d Control: CU_WAIT_FOR_RGU\n",$time);
// `endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
628,7 → 630,7
begin
 
`ifdef DEBUG_CU
`LOGME"CORE: %d CU_ACK_RGU", iDebug_CoreID);
$display("CORE: %d CU_ACK_RGU", iDebug_CoreID);
`endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer <= 0;
656,7 → 658,7
begin
////$display("CU_TRIGGER_TCC");
`ifdef DEBUG_CU
`LOGME"%d CORE %d Control: CU_TRIGGER_TCC\n",$time,iDebug_CoreID);
$display("%d CORE %d Control: CU_TRIGGER_TCC\n",$time,iDebug_CoreID);
`endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
748,7 → 750,7
begin
`ifdef DEBUG_CU
`LOGME"%d CORE %d Control: CU_CHECK_HIT\n",$time,iDebug_CoreID);
$display("%d CORE %d Control: CU_CHECK_HIT\n",$time,iDebug_CoreID);
`endif
783,7 → 785,7
begin
`ifdef DEBUG_CU
`LOGME"%d Control: CU_TRIGGER_PSU_WITH_TEXTURE\n",$time);
$display("%d Control: CU_TRIGGER_PSU_WITH_TEXTURE\n",$time);
`endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
805,6 → 807,27
NextState <= `CU_WAIT_FOR_PSU;
end
//-----------------------------------------
`CU_WAIT_FOR_HOST_DATA_ACK:
begin
oCodeInstructioPointer <= 0;
oUCodeEnable <= 0;
oGFUEnable <= 0;
oIOWritePixel <= 0;
rResetHitFlop <= 0;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 0;
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 0;
if ( iHostAckDataRead )
NextState <= `CU_WAIT_FOR_HOST_DATA_AVAILABLE;
else
NextState <= `CU_WAIT_FOR_HOST_DATA_ACK;
end
//-----------------------------------------
//Wait until data from Host becomes available
`CU_WAIT_FOR_HOST_DATA_AVAILABLE:
begin
833,9 → 856,9
`CU_TRIGGER_MAIN:
begin
`ifdef DEBUG_CU
`LOGME"%d CORE: %d Control: CU_TRIGGER_MAIN\n",$time,iDebug_CoreID);
$display("%d CORE: %d Control: CU_TRIGGER_MAIN\n",$time,iDebug_CoreID);
`endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_MAIN;
oUCodeEnable <= 1;
860,7 → 883,7
`CU_WAIT_FOR_MAIN:
begin
// `ifdef DEBUG_CU
// `LOGME"%d Control: CU_WAIT_FOR_MAIN\n",$time);
// $display("%d Control: CU_WAIT_FOR_MAIN\n",$time);
// `endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
894,7 → 917,7
`CU_ACK_MAIN:
begin
`ifdef DEBUG_CU
`LOGME"%d CORE: %d Control: CU_ACK_MAIN\n",$time, iDebug_CoreID);
$display("%d CORE: %d Control: CU_ACK_MAIN\n",$time, iDebug_CoreID);
`endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_GFU;
917,7 → 940,7
if ( iUCodeDone == 1'b0 & iSceneTraverseComplete == 1'b1)
NextState <= `CU_CHECK_HIT;
else if ( iUCodeDone == 1'b0 & iSceneTraverseComplete == 1'b0) //ERROR!!! What if iSceneTraverseComplete will become 1 a cycle after this??
NextState <= `CU_TRIGGER_MAIN;
NextState <= `CU_WAIT_FOR_HOST_DATA_ACK;//`CU_WAIT_FOR_HOST_DATA_AVAILABLE;
else
NextState <= `CU_ACK_MAIN;
929,7 → 952,7
begin
// `ifdef DEBUG_CU
// `LOGME"%d Control: CU_TRIGGER_PSU\n",$time);
// $display("%d Control: CU_TRIGGER_PSU\n",$time);
// `endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
958,7 → 981,7
`CU_ACK_PSU:
begin
`ifdef DEBUG_CU
`LOGME"%d CORE: %d Control: CU_ACK_PSU\n",$time, iDebug_CoreID);
$display("%d CORE: %d Control: CU_ACK_PSU\n",$time, iDebug_CoreID);
`endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
989,9 → 1012,9
`CU_TRIGGER_NPU: //Next Pixel Unit
begin
`ifdef DEBUG_CU
`LOGME"%d CORE: %d Control: CU_TRIGGER_NPU\n",$time, iDebug_CoreID);
$display("%d CORE: %d Control: CU_TRIGGER_NPU\n",$time, iDebug_CoreID);
`endif
$write("*");
//$write("*");
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_NPG; //*
1042,7 → 1065,7
`CU_ACK_NPU:
begin
`ifdef DEBUG_CU
`LOGME"%d CORE: %d Control: CU_ACK_NPU\n",$time, iDebug_CoreID);
$display("%d CORE: %d Control: CU_ACK_NPU\n",$time, iDebug_CoreID);
`endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
1098,7 → 1121,7
`CU_TRIGGER_USERPIXELSHADER:
begin
`ifdef DEBUG_CU
`LOGME"%d Control: CU_TRIGGER_PSU\n",$time);
$display("%d Control: CU_TRIGGER_PSU\n",$time);
`endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
1124,7 → 1147,7
begin
// `ifdef DEBUG_CU
// `LOGME"%d Control: CU_TRIGGER_PSU\n",$time);
// $display("%d Control: CU_TRIGGER_PSU\n",$time);
// `endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
1153,7 → 1176,7
`CU_ACK_USERPIXELSHADER:
begin
`ifdef DEBUG_CU
`LOGME"%d Control: CU_ACK_PSU\n",$time);
$display("%d Control: CU_ACK_PSU\n",$time);
`endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
1183,7 → 1206,7
begin
`ifdef DEBUG_CU
`LOGME"%d Control: ERROR Undefined State\n",$time);
$display("%d Control: ERROR Undefined State\n",$time);
`endif
//oRamBusOwner <= 0;
/rtl/GPU/CORES/TOP/Theia_Core.v
76,7 → 76,8
input wire GNT_I, //Bus arbiter 'Granted' signal, see THEAI documentation
input wire RENDREN_I,
 
output wire GRDY_O, //Data Latched
output wire HDL_O, //Data Latched
input wire HDLACK_I, //Data Latched ACK
input wire STDONE_I, //Scene traverse complete
input wire HDA_I,
output wire RCOMMIT_O,
104,7 → 105,7
);
 
//When we flip the SMEM, this means we are ready to receive more data
assign GRDY_O = wCU2_FlipMem;
assign HDL_O = wCU2_FlipMem;
 
//Alias this signals
wire Clock,Reset;
226,6 → 227,7
.iSceneTraverseComplete( STDONE_I ),
.oResultCommited( RCOMMIT_O ),
.iHostDataAvailable( HDA_I ),
.iHostAckDataRead( HDLACK_I ),
 
`ifdef DEBUG
351,10 → 353,10
(
.Clock( Clock ),
.Reset( Reset ),
.iEnable( 0 ),// w2IO__EnableWBMaster ),
.iEnable( 1'b0 ),// w2IO__EnableWBMaster ),
.iBusCyc_Type( w2IO_MasterCycleType ),
.iStore( 1),//w2IO__Store ),
.iStore( 1'b1),//w2IO__Store ),
.iAdr_DataWriteBack( w2IO__DataWriteAddress ),
.iAdr_O_Set( w2IO__SetAddress ),
.iAdr_O_Imm( w2IO__AddressOffset ),
/rtl/GPU/HOST/Module_Host.v
33,14 → 33,14
 
 
 
`define MAX_VERTEX_IN_FRAME 8'd7 // WAS 8'd6
`define MAX_VERTEX_IN_FRAME `WIDTH'd7 // WAS 8'd6
`define TAG_INSTRUCTION_ADDRESS_TYPE 2'b01
`define TAG_DATA_ADDRESS_TYPE 2'b10
`define SELECT_INST_MEM 3'b00
`define SELECT_SCENE_MEM 3'b01
`define SELECT_GEO_MEM 3'b10
`define SELECT_ALL_CORES `MAX_CORES'b1111 //XXX: Change for more cores
 
 
`define HOST_IDLE 0
`define HOST_WRITE_INSTRUCTION 1
`define HOST_WAIT_INSTRUCTION 2
91,6 → 91,9
output wire STDONE_O,
output reg oHostDataAvailable,
input wire iGPUDone,
`ifndef NO_DISPLAY_STATS
input wire [`WIDTH-1:0] iDebugWidth,
`endif
input wire ACK_I
);
//---------------------------------------------------------------
104,7 → 107,7
wire [`MAX_CORES-1:0] wCoreSelect;
wire wLastValidReadAddress;
wire [`WB_WIDTH-1:0] wWriteAddress;
wire [7:0] wVertexCount;
wire [`WIDTH-1:0] wVertexCount;
reg [`WB_WIDTH-1:0] rInitialWriteAddress;
reg rSetWriteAddr;
reg rIncCoreSelect,rResetVertexCount;
131,12 → 134,12
);
 
 
UPCOUNTER_POSEDGE # (8 ) PRIMCOUNT
UPCOUNTER_POSEDGE # ( 32 ) PRIMCOUNT
(
.Clock( Clock ),
.Reset( Reset | rResetVertexCount ),
.Enable( iEnable & wWBMDone ),
.Initial( 8'b1 ), //WAS 0
.Initial( `WIDTH'b1 ),
.Q( wVertexCount )
);
//--------------------------------------------------------
186,6 → 189,8
end
//--------------------------------------------------------
 
reg [63:0] i;
reg [63:0] RenderedPixels;
wire wLastVertexInFrame;
assign wLastVertexInFrame =
(wVertexCount % `MAX_VERTEX_IN_FRAME == 1'b0 ) ? 1'b1 : 1'b0;
204,6 → 209,8
//Or until we are enabled
`HOST_IDLE:
begin
RenderedPixels <= 0;
rWBMEnable <= 0;
rInitiaReadAddr <= 1; //Start reading from 1, because 0 is the size
rWBMReset <= 0;
643,8 → 650,22
oHostDataAvailable <= 0;
 
if (iGPUCommitedResults)
begin
`ifndef NO_DISPLAY_STATS
for (i = 0; i < `MAX_CORES; i = i + 1)
begin
$write(".");
end
RenderedPixels = RenderedPixels + `MAX_CORES;
if ( RenderedPixels % iDebugWidth == 0)
$write("]%d\n[",RenderedPixels / iDebugWidth);
`endif
rHostNextState <= `HOST_PREPARE_FOR_GEO_REQUESTS;
end
else
rHostNextState <= `HOST_LAST_PRIMITIVE_REACHED;
end
/examples/scenes/example3/ReameFirst.txt
0,0 → 1,2
This example has only works with the 16 cores version of the GPU.
It has a lot of triangles, so this takes a lot of time to simulate.
/test_bench/readme.txt
7,7 → 7,7
**** I Creating a Project.*****
Steps to create a Xilinx Project:
 
1 - Un-compress all the verilog sources under the same folder
1 - Un-compress all the verilog sources under the same folder (ie. flatten the directory strucure)
(easier for later steps).
 
2 - Start ISE Project Navigator.
/test_bench/TestBench_THEIA.v
69,8 → 69,8
reg TMWE_O;
reg [31:0] rControlRegister[2:0];
integer file, log;
reg [31:0] rSceneParameters[64:0];
reg [31:0] rVertexBuffer[6000:0];
reg [31:0] rSceneParameters[120:0];
reg [31:0] rVertexBuffer[7000:0];
reg [31:0] rInstructionBuffer[512:0];
reg [31:0] rTextures[`TEXTURE_BUFFER_SIZE:0]; //Lets asume we use 256*256 textures
reg [7:0] rScreen[`MAX_SCREENBUFFER-1:0];
123,8 → 123,8
.TMWE_I( TMWE_O ),
.TMSEL_I( TMSEL_O ),
.GRDY_O( GRDY_I ),
.GACK_I( GACK_O ),
.HDL_O( GRDY_I ),
.HDLACK_I( GACK_O ),
.STDONE_I( STDONE_O ),
.RCOMMIT_O( wGPUCommitedResults ),
.HDA_I( wHostDataAvailable ),
169,10 → 169,14
.oHostDataAvailable( wHostDataAvailable ),
.iHostDataReadConfirmed( GRDY_I ),
.iMemorySize( wMemorySize ),
.iPrimitiveCount( (rVertexBuffer[6]+1) *7 ),
.iPrimitiveCount( (rVertexBuffer[6]+1) *7 ), //This is wrong I think
.iGPUCommitedResults( wGPUCommitedResults ),
.STDONE_O( STDONE_O ),
.iGPUDone( wDone ),
`ifndef NO_DISPLAY_STATS
.iDebugWidth( `RESOLUTION_WIDTH ),
`endif
 
//To Memory
.oReadAddress( wHostReadAddress ),
204,8 → 208,13
end
//---------------------------------------------
 
 
//-------------------------------------------------------------------------------------
/*
This makes sure the simulation actually writes the results to the PPM image file
once all the cores are done executing
*/
`define PARTITION_SIZE `RESOLUTION_HEIGHT/`MAX_CORES
integer i,j,kk;
reg [31:0] R;
217,7 → 226,7
begin
 
$display("Partition Size = %d",`PARTITION_SIZE);
for (kk = 0; kk < 4; kk = kk+1)
for (kk = 0; kk < `MAX_CORES; kk = kk+1)
begin
wOMEMBankSelect = kk;
$display("wOMEMBankSelect = %d\n",wOMEMBankSelect);
251,11 → 260,11
end
end
//-------------------------------------------------------------------------------------
 
 
reg [15:0] rTimeOut;
`define MAX_INSTRUCTIONS 2
// `define MAX_INSTRUCTIONS 2
initial begin
// Initialize Inputs
283,6 → 292,9
$readmemh("Vertex.mem",rVertexBuffer);
$display("Done");
$display("Number of primitives(%d): %d",rVertexBuffer[6],(rVertexBuffer[6]+1) *7);
//Read Texture Data
$write("Loading scene texture.... ");
$readmemh("Textures.mem",rTextures);
304,11 → 316,7
$fwrite(log, "Width : %d\n",`RESOLUTION_WIDTH);
$fwrite(log, "Height : %d\n",`RESOLUTION_HEIGHT);
// $fwrite(file,"P3\n");
// $fwrite(file,"#This file was generated by Theia's RTL simulation\n");
// $fwrite(file,"%d %d\n",`RESOLUTION_WIDTH, `RESOLUTION_HEIGHT );
// $fwrite(file,"255\n");
//Open output file
out2 = $fopen("Output.ppm");
331,8 → 339,8
for (k = 0;k < `TEXTURE_BUFFER_SIZE; k = k + 1)
begin
TMADR_O <= (k >> (`MAX_TMEM_BANKS/2));
TMSEL_O <= (k & (`MAX_TMEM_BANKS-1));
TMADR_O <= (k >> (`MAX_CORE_BITS));
TMSEL_O <= (k & (`MAX_TMEM_BANKS-1)); //X mod 2^n == X & (2^n - 1)
TMDAT_O <= rTextures[k];
#10;
end

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.