OpenCores
URL https://opencores.org/ocsvn/theia_gpu/theia_gpu/trunk

Subversion Repositories theia_gpu

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /theia_gpu/branches/icarus_version
    from Rev 175 to Rev 176
    Reverse comparison

Rev 175 → Rev 176

/rtl/Module_FixedPointAddtionSubstraction.v
12,7 → 12,7
);
always @ (posedge Clock)
begin
R = A + 1;
R = A + 1;
end
 
 
20,13 → 20,13
//-----------------------------------------------------------
module FixedAddSub
(
input wire Clock,
input wire Clock,
input wire Reset,
input wire[`LONG_WIDTH-1:0] A,
input wire[`LONG_WIDTH-1:0] B,
input wire [`LONG_WIDTH-1:0] A,
input wire [`LONG_WIDTH-1:0] B,
output wire [`LONG_WIDTH-1:0] R,
input wire iOperation,
input wire iInputReady, //Is the input data valid?
input wire iOperation,
input wire iInputReady, //Is the input data valid?
output wire OutputReady //Our output data is ready!
);
 
48,7 → 48,7
.Q( OutputReady )
);
 
assign R = ( A + wB );
 
endmodule
/rtl/Module_Host.v
228,9 → 228,9
//Or until we are enabled
`HOST_IDLE:
begin
`ifndef VERILATOR
// `ifndef VERILATOR
RenderedPixels = 0;
`endif
// `endif
rWBMEnable = 0;
rInitiaReadAddr = 1; //Start reading from 1, because 0 is the size
672,7 → 672,7
if (iGPUCommitedResults)
begin
`ifndef VERILATOR
// `ifndef VERILATOR
`ifndef NO_DISPLAY_STATS
for (i = 0; i < `MAX_CORES; i = i + 1)
begin
683,13 → 683,20
end
RenderedPixels = RenderedPixels + `MAX_CORES;
/* verilator lint_off WIDTH */
if ( RenderedPixels % iDebugWidth == 0)
begin
$write("]%d\n[",RenderedPixels / iDebugWidth);
`ifndef VERILATOR
$fflush;
`endif
`ifndef VERILATOR
$fflush;
`endif
end
/* verilator lint_on WIDTH */
`endif
`endif
// `endif
 
rHostNextState = `HOST_PREPARE_FOR_GEO_REQUESTS;
end
/rtl/aDefinitions.v
30,7 → 30,8
//`define VERILATOR 1
`define MAX_CORES 4 //The number of cores, make sure you update MAX_CORE_BITS!
`define MAX_CORE_BITS 2 // 2 ^ MAX_CORE_BITS = MAX_CORES
`define MAX_TMEM_BANKS 4 //The number of memory banks for TMEM
`define MAX_TMEM_BANKS 8 //The number of memory banks for TMEM
`define MAX_TMEM_BITS 3 //2 ^ MAX_TMEM_BANKS = MAX_TMEM_BITS
`define SELECT_ALL_CORES `MAX_CORES'b1111 //XXX: Change for more cores
//---------------------------------------------------------------------------------
//Verilog provides a `default_nettype none compiler directive. When
/rtl/Module_WishBoneSlave.v
28,32 → 28,32
module WishBoneSlaveUnit
(
//WB Input signals
input wire CLK_I,
input wire RST_I,
input wire STB_I,
input wire WE_I,
input wire[`WB_WIDTH-1:0] DAT_I,
input wire[`WB_WIDTH-1:0] ADR_I,
input wire [1:0] TGA_I,
output wire ACK_O,
input wire MST_I, //Master In!
input wire CYC_I,
output wire[`DATA_ADDRESS_WIDTH-1:0] oDataWriteAddress,
output wire [`DATA_ROW_WIDTH-1:0] oDataBus,
input wire CLK_I,
input wire RST_I,
input wire STB_I,
input wire WE_I,
input wire[`WB_WIDTH-1:0] DAT_I,
input wire[`WB_WIDTH-1:0] ADR_I,
input wire [1:0] TGA_I,
output wire ACK_O,
input wire MST_I, //Master In!
input wire CYC_I,
output wire[`DATA_ADDRESS_WIDTH-1:0] oDataWriteAddress,
output wire [`DATA_ROW_WIDTH-1:0] oDataBus,
output wire [`ROM_ADDRESS_WIDTH-1:0] oInstructionWriteAddress,
output wire [`INSTRUCTION_WIDTH-1:0] oInstructionBus,
output wire oDataWriteEnable,
output wire oInstructionWriteEnable
output wire [`INSTRUCTION_WIDTH-1:0] oInstructionBus,
output wire oDataWriteEnable,
output wire oInstructionWriteEnable
 
);
 
FFD_POSEDGE_SYNCRONOUS_RESET # (16) FFADR
(
.Clock( CYC_I ),
.Reset( RST_I ),
.Enable(1'b1),
.D( ADR_I[15:0] ),
.Q( oInstructionWriteAddress )
.Clock( CYC_I ),
.Reset( RST_I ),
.Enable(1'b1),
.D( ADR_I[15:0] ),
.Q( oInstructionWriteAddress )
);
 
assign oDataWriteAddress = oInstructionWriteAddress;
62,11 → 62,11
 
FFD_POSEDGE_SYNCRONOUS_RESET # (2) FFADDRTYPE
(
.Clock( CYC_I ),
.Reset( RST_I ),
.Enable(1'b1),
.D( TGA_I ),
.Q( wTGA_Latched )
.Clock( CYC_I ),
.Reset( RST_I ),
.Enable(1'b1),
.D( TGA_I ),
.Q( wTGA_Latched )
);
 
 
84,11 → 84,11
wire wDelay;
FFD_POSEDGE_SYNCRONOUS_RESET # (1) FFOutputDelay
(
.Clock( Clock ),
.Enable( 1'b1 ),
.Reset( Reset ),
.D( wLatchNow ),
.Q( wDelay )
.Clock( Clock ),
.Enable( 1'b1 ),
.Reset( Reset ),
.D( wLatchNow ),
.Q( wDelay )
);
 
assign ACK_O = wDelay & STB_I; //make sure we set ACK_O back to zero when STB_I is zero
111,12 → 111,12
wire [`WIDTH-1:0] wVx;
FFD_POSEDGE_SYNCRONOUS_RESET # (`WIDTH) FFD32_WBS2MEM_Vx
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( wXYZSel[0] & STB_I ),
.D( DAT_I ),
.Q( wVx )
.Clock( Clock ),
.Reset( Reset ),
.Enable( wXYZSel[0] & STB_I ),
.D( DAT_I ),
.Q( wVx )
);
 
 
124,12 → 124,12
wire [`WIDTH-1:0] wVy;
FFD_POSEDGE_SYNCRONOUS_RESET # (`WIDTH) FFD32_WBS2MEM_Vy
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( wXYZSel[1] & STB_I ),
.D( DAT_I ),
.Q( wVy )
.Clock( Clock ),
.Reset( Reset ),
.Enable( wXYZSel[1] & STB_I ),
.D( DAT_I ),
.Q( wVy )
);
 
//Flip Flop to Store Vz
137,14 → 137,14
 
FFD_POSEDGE_SYNCRONOUS_RESET # (`WIDTH) FFD32_WBS2MEM_Vz
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( wXYZSel[2] & STB_I ),
.D( DAT_I ),
.Q( wVz )
.Clock( Clock ),
.Reset( Reset ),
.Enable( wXYZSel[2] & STB_I ),
.D( DAT_I ),
.Q( wVz )
);
 
assign oDataBus = {wVx,wVy,wVz};
assign oDataBus = {wVx,wVy,wVz};
assign oInstructionBus = {wVx,wVy};
wire wIsInstructionAddress,wIsDataAddress;
assign wIsInstructionAddress = (wTGA_Latched == `TAG_WBS_INSTRUCTION_ADDRESS_TYPE) ? 1'b1 : 1'b0;
/rtl/Theia.v
112,11 → 112,12
 
 
 
wire [`WB_WIDTH-1:0] wCrossBarDataRow[`MAX_TMEM_BANKS-1:0]; //Horizontal grid Buses comming from each bank
wire [`WB_WIDTH-1:0] wCrossBarDataCollumn[`MAX_CORES-1:0]; //Vertical grid buses comming from each core.
wire [`WB_WIDTH-1:0] wTMemReadAdr[`MAX_CORES-1:0]; //Horizontal grid Buses comming from each core (virtual addr).
wire [`WB_WIDTH-1:0] wCrossBarAdressCollumn[`MAX_CORES-1:0]; //Vertical grid buses comming from each core. (physical addr).
wire [`WB_WIDTH-1:0] wCrossBarAddressRow[`MAX_TMEM_BANKS-1:0]; //Horizontal grid Buses comming from each bank.
wire [(`MAX_TMEM_BANKS*`WB_WIDTH)-1:0] wCrossBarDataRow; //Horizontal grid Buses comming from each bank
wire [(`MAX_CORES*`WB_WIDTH)-1:0] wCrossBarDataCollumn; //Vertical grid buses comming from each core.
wire [(`MAX_CORES*`WB_WIDTH)-1:0] wCrossBarAdressCollumn; //Vertical grid buses comming from each core. (physical addr).
wire [`WB_WIDTH-1:0] wTMemReadAdr[`MAX_CORES-1:0]; //Horizontal grid Buses comming from each core (virtual addr).
 
wire [`WB_WIDTH-1:0] wCrossBarAddressRow[`MAX_TMEM_BANKS-1:0]; //Horizontal grid Buses comming from each bank.
wire wCORE_2_TMEM__Req[`MAX_CORES-1:0];
wire [`MAX_TMEM_BANKS -1:0] wBankReadRequest[`MAX_CORES-1:0];
wire [`MAX_CORES-1:0] wBankReadGranted[`MAX_TMEM_BANKS-1:0];
194,7 → 195,7
.OMEM_ADR_O( wOMEM_Address[i] ),
.OMEM_DAT_O( wOMEM_Dat[i] ),
.TMEM_DAT_I( wCrossBarDataCollumn[i] ),
.TMEM_DAT_I( wCrossBarDataCollumn[ (i*`WB_WIDTH)+:`WB_WIDTH ] ),
.TMEM_ADR_O( wTMemReadAdr[i] ),
.TMEM_CYC_O( wCORE_2_TMEM__Req[i] ),
.TMEM_GNT_I( wTMEM_2_Core__Grant[i] ),
242,6 → 243,13
);
 
 
MUXFULLPARALELL_GENERIC # (`WB_WIDTH,`MAX_TMEM_BANKS,`MAX_TMEM_BITS) MUXG1
(
.in_bus( wCrossBarDataRow ),
.sel( wCoreBankSelect[ i ][0+:`MAX_TMEM_BITS] ),
.out( wCrossBarDataCollumn[ (i*`WB_WIDTH)+:`WB_WIDTH ] )
);
 
//If there are "n" banks, memory location "X" would reside in bank number X mod n.
//X mod 2^n == X & (2^n - 1)
assign wCoreBankSelect[i] = (wTMemReadAdr[i] & (`MAX_TMEM_BANKS-1));
250,7 → 258,7
//Each slot has MAX_TMEM_BANKS bits. Only 1 bit can
//be 1 at any given point in time. All bits zero means,
//we are not requesting to read from any memory bank.
SELECT_1_TO_N # ( `WIDTH, `MAX_CORES ) READDRQ
SELECT_1_TO_N # ( `WIDTH, `MAX_TMEM_BANKS ) READDRQ
(
.Sel(wCoreBankSelect[ i]),
.En(wCORE_2_TMEM__Req[i]),
262,7 → 270,7
//virtual adress into physical adress (relative to the bank) like this
//fadr = vadr / n = vadr >> log2(n)
 
assign wCrossBarAdressCollumn[i] = (wTMemReadAdr[i] >> `MAX_CORE_BITS);
assign wCrossBarAdressCollumn[(i*`WB_WIDTH)+:`WB_WIDTH] = (wTMemReadAdr[i] >> `MAX_CORE_BITS);
 
//Connect the granted signal to Arbiter of the Bank we want to read from
assign wTMEM_2_Core__Grant[i] = wBankReadGranted[wCoreBankSelect[i]][i];
282,6 → 290,7
begin : BANK
 
//The memory bank itself
RAM_SINGLE_READ_PORT # ( `WB_WIDTH, `WB_WIDTH, 50000 ) TMEM
(
.Clock( CLK_I ),
289,7 → 298,7
.iWriteAddress( TMADR_I ),
.iDataIn( TMDAT_I ),
.iReadAddress0( wCrossBarAddressRow[Bank] ), //Connect to the Row of the grid
.oDataOut0( wCrossBarDataRow[Bank] ) //Connect to the Row of the grid
.oDataOut0( wCrossBarDataRow[(`WB_WIDTH*Bank)+:`WB_WIDTH] ) //Connect to the Row of the grid
);
315,20 → 324,31
.Q(wBankReadGranted[Bank])
);
 
MUXFULLPARALELL_GENERIC # (`WB_WIDTH,`MAX_CORES,`MAX_CORE_BITS) MUXG2
(
.in_bus( wCrossBarAdressCollumn ),
.sel( wCurrentCoreSelected[ Bank ] ),
.out( wCrossBarAddressRow[ Bank ] )
);
//Create the Cross-Bar interconnection grid now, rows are coonected to the memory banks,
//while collumns are connected to the cores, 2 or more cores can not read from the same
//bank at any given point in time
for (Core = 0; Core < `MAX_CORES; Core = Core + 1)
begin: CORE_CONNECT
`ifndef VERILATOR
//for (Core = 0; Core < `MAX_CORES; Core = Core + 1)
//begin: CORE_CONNECT
//`ifndef VERILATOR
//Connect the Data Collum of this core to the Data Row of current bank, only if the Core is looking for data stored in this bank
assign wCrossBarDataCollumn[ Core ] = ( wCoreBankSelect[ Core ] == Bank ) ? wCrossBarDataRow[ Bank ] : `WB_WIDTH'bz;
// assign wCrossBarDataCollumn[ Core ] = ( wCoreBankSelect[ Core ] == Bank ) ? wCrossBarDataRow[ Bank ] : `WB_WIDTH'bz;
//Connect the Address Row of this Bank to the Address Column of the core, only if the Arbiter selected this core for reading
assign wCrossBarAddressRow[ Bank ] = ( wCurrentCoreSelected[ Bank ] == Core ) ? wCrossBarAdressCollumn[Core]: `WB_WIDTH'bz;
`endif
//assign wCrossBarAddressRow[ Bank ] = ( wCurrentCoreSelected[ Bank ] == Core ) ? wCrossBarAdressCollumn[Core]: `WB_WIDTH'bz;
//`endif
end
//end
end
endgenerate
/rtl/Module_ROM.v
1,56 → 1,56
 
 
`define ONE (32'h1 << `SCALE)
 
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
/*
I can't synthesize roms, the rom needs to be adapted depending on the
final target silicon.
*/
 
 
//--------------------------------------------------------
module ROM
(
 
 
`define ONE (32'h1 << `SCALE)
 
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
/*
I can't synthesize roms, the rom needs to be adapted depending on the
final target silicon.
*/
 
 
//--------------------------------------------------------
module ROM
(
input wire[`ROM_ADDRESS_WIDTH-1:0] Address,
`ifdef DEBUG
input wire [`MAX_CORES-1:0] iDebug_CoreID,
`endif
output reg [`INSTRUCTION_WIDTH-1:0] I
);
 
 
always @( Address )
begin
case (Address)
//Hardcoded stuff :(
`define RAY_INSIDE_BOX `R3
`define CURRENT_LIGHT_POS `CREG_FIRST_LIGTH //TODO: CAHNEG T
`define CURRENT_LIGHT_DIFFUSE 16'h6
 
//-----------------------------------------------------------------
`endif
output reg [`INSTRUCTION_WIDTH-1:0] I
);
 
 
always @( Address )
begin
case (Address)
//Hardcoded stuff :(
`define RAY_INSIDE_BOX `R3
`define CURRENT_LIGHT_POS `CREG_FIRST_LIGTH //TODO: CAHNEG T
`define CURRENT_LIGHT_DIFFUSE 16'h6
 
//-----------------------------------------------------------------
`define TAG_PIXELSHADER 16'd278
`define TAG_USERCONSTANTS 16'd276
`define TAG_PSU_UCODE_ADRESS2 16'd248
684,20 → 684,20
278: I = { `OMWRITE ,`OREG_PIXEL_COLOR ,`CREG_CURRENT_OUTPUT_PIXEL ,`CREG_TEXTURE_COLOR };
279: I = { `RETURN ,`RT_TRUE };
 
 
//-------------------------------------------------------------------------
 
default:
begin
`ifdef DEBUG
$display("%dns CORE %d Error: Reached undefined address in instruction Memory: %d!!!!",$time,iDebug_CoreID,Address);
// $stop();
`endif
I = {`INSTRUCTION_OP_LENGTH'hFF,16'hFFFF,32'hFFFFFFFF};
end
endcase
end
endmodule
 
//-------------------------------------------------------------------------
 
default:
begin
`ifdef DEBUG
$display("%dns CORE %d Error: Reached undefined address in instruction Memory: %d!!!!",$time,iDebug_CoreID,Address);
// $stop();
`endif
I = {`INSTRUCTION_OP_LENGTH'hFF,16'hFFFF,32'hFFFFFFFF};
end
endcase
end
endmodule
//--------------------------------------------------------
/rtl/Collaterals.v
98,8 → 98,33
 
endmodule
 
//----------------------------------------------------------------------
module MUXFULLPARALELL_GENERIC #(parameter WIDTH = `WIDTH, parameter CHANNELS = 4, parameter SELBITS = 2)
(
 
input wire [(CHANNELS*WIDTH)-1:0] in_bus,
input wire [SELBITS-1:0] sel,
 
output wire [WIDTH-1:0] out
);
 
genvar ig;
wire [WIDTH-1:0] input_array [0:CHANNELS-1];
 
assign out = input_array[sel];
 
generate
for(ig=0; ig<CHANNELS; ig=ig+1)
begin: array_assignments
assign input_array[ig] = in_bus[(ig*WIDTH)+:WIDTH];
end
endgenerate
 
 
 
endmodule
//----------------------------------------------------------------------
 
module MUXFULLPARALELL_2SEL_GENERIC # ( parameter SIZE=`WIDTH )
(
input wire [1:0] Sel,

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.