OpenCores
URL https://opencores.org/ocsvn/theia_gpu/theia_gpu/trunk

Subversion Repositories theia_gpu

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /theia_gpu/branches/gpu_8_cores/rtl/GPU/CORES
    from Rev 117 to Rev 128
    Reverse comparison

Rev 117 → Rev 128

/TOP/Theia_Core.v
0,0 → 1,425
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
/**********************************************************************************
Description:
This is the top level block for THEIA.
THEIA core has 5 main logical blocks called Units.
This module implements the interconections between the Units.
Units:
> EXE: Mananges execution logic for the SHADERS.
> GEO: Manages geometry data structures.
> IO: Input/Output (Wishbone).
> MEM: Internal memory, separate for Instructions and data.
> CONTROL: Main control Finite state machine.
Internal Buses:
THEIA has separate instruction and data buses.
THEIA avoids using tri-state buses by having separate input/output
for each bus.
There are 2 separate data buses since the Data memory
has a Dual read channel.
Please see the MEM unit chapter in the documentation for more details.
External Buses:
External buses are managed by the IO Unit.
External buses follow the wishbone protocol.
Please see the IO unit chapter in the documentation for more details.
**********************************************************************************/
 
`timescale 1ns / 1ps
`include "aDefinitions.v"
 
module THEIACORE
(
 
input wire CLK_I, //Input clock
input wire RST_I, //Input reset
//Theia Interfaces
input wire MST_I, //Master signal, THEIA enters configuration mode
//when this gets asserted (see documentation)
//Wish Bone Interface
input wire [`WB_WIDTH-1:0] DAT_I, //Input data bus (Wishbone)
output wire [`WB_WIDTH-1:0] DAT_O, //Output data bus (Wishbone)
input wire ACK_I, //Input ack
output wire ACK_O, //Output ack
output wire [`WB_WIDTH-1:0] ADR_O, //Output address
input wire [`WB_WIDTH-1:0] ADR_I, //Input address
output wire WE_O, //Output write enable
input wire WE_I, //Input write enable
output wire STB_O, //Strobe signal, see wishbone documentation
input wire STB_I, //Strobe signal, see wishbone documentation
output wire CYC_O, //Bus cycle signal, see wishbone documentation
input wire CYC_I, //Bus cycle signal, see wishbone documentation
output wire [1:0] TGC_O, //Bus cycle tag, see THEAI documentation
input wire [1:0] TGA_I, //Input address tag, see THEAI documentation
output wire [1:0] TGA_O, //Output address tag, see THEAI documentation
input wire [1:0] TGC_I, //Bus cycle tag, see THEAI documentation
input wire GNT_I, //Bus arbiter 'Granted' signal, see THEAI documentation
input wire RENDREN_I,
 
output wire GRDY_O, //Data Latched
input wire STDONE_I, //Scene traverse complete
input wire HDA_I,
output wire RCOMMIT_O,
 
output wire [`WB_WIDTH-1:0] OMEM_DAT_O,
output wire [`WB_WIDTH-1:0] OMEM_ADR_O,
output wire OMEM_WE_O,
 
input wire TMEM_ACK_I,
input wire [`WB_WIDTH-1:0] TMEM_DAT_I ,
output wire [`WB_WIDTH-1:0] TMEM_ADR_O ,
output wire TMEM_WE_O,
output wire TMEM_STB_O,
output wire TMEM_CYC_O,
input wire TMEM_GNT_I,
 
`ifdef DEBUG
input wire[`MAX_CORES-1:0] iDebug_CoreID,
`endif
//Control Register
input wire [15:0] CREG_I,
output wire DONE_O
 
 
);
 
//When we flip the SMEM, this means we are ready to receive more data
assign GRDY_O = wCU2_FlipMem;
 
//Alias this signals
wire Clock,Reset;
assign Clock = CLK_I;
assign Reset = RST_I;
 
wire wIO_Busy;
wire [`DATA_ROW_WIDTH-1:0] wEXE_2__MEM_WriteData;
wire [`DATA_ROW_WIDTH-1:0] wUCODE_RAMBus;
wire [`DATA_ADDRESS_WIDTH-1:0] wEXE_2__MEM_wDataWriteAddress;
wire w2IO__AddrIsImm;
wire [`DATA_ADDRESS_WIDTH-1:0] wUCODE_RAMAddress;
wire [`DATA_ADDRESS_WIDTH-1:0] w2IO__Adr_O_Pointer;
wire [`DATA_ADDRESS_WIDTH-1:0] wGEO2_IO__Adr_O_Pointer;
wire wEXE_2__DataWriteEnable;
wire wUCODE_RAMWriteEnable;
//wire [2:0] RamBusOwner;
//Unit intercoanection wires
 
wire wCU2__MicrocodeExecutionDone;
wire [`ROM_ADDRESS_WIDTH-1:0] InitialCodeAddress;
wire [`ROM_ADDRESS_WIDTH-1:0] wInstructionPointer1,wInstructionPointer2;
wire [`INSTRUCTION_WIDTH-1:0] wEncodedInstruction1,wEncodedInstruction2,wIO2_MEM__ExternalInstruction;
wire wCU2__ExecuteMicroCode;
wire [`ROM_ADDRESS_WIDTH-1:0] wIO2_MEM__InstructionWriteAddr;
wire [95:0] wMEM_2__EXE_DataRead0, wMEM_2__EXE_DataRead1,wMEM_2__IO_DataRead0, wMEM_2__IO_DataRead1;
wire [`DATA_ADDRESS_WIDTH-1:0] wEXE_2__MEM_DataReadAddress0,wEXE_2__MEM_DataReadAddress1;
wire [`DATA_ADDRESS_WIDTH-1:0] wUCODE_RAMReadAddress0,wUCODE_RAMReadAddress1;
 
 
wire [`WIDTH-1:0] w2IO__AddressOffset;
wire [`DATA_ADDRESS_WIDTH-1:0] w2IO__DataWriteAddress;
wire w2IO__Store;
wire w2IO__EnableWBMaster;
 
wire [`DATA_ADDRESS_WIDTH-1:0] wIO2_MEM__DataWriteAddress;
wire [`DATA_ADDRESS_WIDTH-1:0] wIO_2_MEM__DataReadAddress0;
wire [`DATA_ROW_WIDTH-1:0] wIO2_MEM__Bus;
wire [`WIDTH-1:0] wIO2_MEM__Data;
wire [`WIDTH-1:0] wIO2_WBM__Address;
wire wIO2_MEM__DataWriteEnable;
wire wIO2__Done;
wire wCU2_GEO__GeometryFetchEnable;
wire wIFU2__MicroCodeReturnValue;
wire wCU2_BCU__ACK;
wire wGEO2_CU__RequestAABBIU;
wire wGEO2_CU__RequestBIU;
wire wGEO2_CU__RequestTCC;
wire wGEO2_CU__GeometryUnitDone;
wire wGEO2_CU__Sync;
wire wEXE2__uCodeDone;
wire wEXE2_IFU__EXEBusy;
wire [`DATA_ADDRESS_WIDTH-1:0] wEXE2_IDU_DataFordward_LastDestination;
wire wALU2_EXE__BranchTaken;
wire wALU2_IFU_BranchNotTaken;
wire w2IO__SetAddress;
wire wIDU2_IFU__IDUBusy;
//Control Registe wires
wire[15:0] wCR2_ControlRegister;
wire wCR2_TextureMappingEnabled;
wire wGEO2_CU__TFFDone;
wire wCU2_GEO__TriggerTFF;
wire wIO2_MEM_InstructionWriteEnable;
wire wCU2_IO__WritePixel;
wire wGEO2_IO__AddrIsImm;
wire[31:0] wGEO2_IO__AddressOffset;
wire wGEO2_IO__EnableWBMaster;
wire wGEO2_IO__SetAddress;
wire[`WIDTH-1:0] wGEO2__CurrentPitch,wCU2_GEO_Pitch;
wire wCU2_GEO__SetPitch,wCU2_GEO__IncPicth;
 
wire [`DATA_ROW_WIDTH-1:0] wEXE_2__IO_WriteAddress;
wire [`DATA_ROW_WIDTH-1:0] wEXE_2__IO_WriteData;
wire wEXE_2__IO_OMEMWriteEnable;
 
wire [`DATA_ROW_WIDTH-1:0] wEXE_2__IO_TMEMAddress;
wire [`DATA_ROW_WIDTH-1:0] wIO_2_EXE__TMEMData;
wire wIO_2_EXE__DataAvailable;
wire wEXE_2_IO__DataRequest;
 
wire wCU2_FlipMemEnabled;
wire w2MEM_FlipMemory;
 
`ifdef DEBUG
wire [`ROM_ADDRESS_WIDTH-1:0] wDEBUG_IDU2_EXE_InstructionPointer;
`endif
//--------------------------------------------------------
 
 
assign wCR2_TextureMappingEnabled = wCR2_ControlRegister[ `CR_EN_TEXTURE ];
wire wCU2_FlipMem;
//--------------------------------------------------------
//Control Unit Instance
ControlUnit CU
(
.Clock(Clock),
.Reset(Reset),
.oFlipMemEnabled( wCU2_FlipMemEnabled ),
.oFlipMem( wCU2_FlipMem ),
.iControlRegister( wCR2_ControlRegister ),
//.oRamBusOwner( RamBusOwner ),
.oGFUEnable( wCU2_GEO__GeometryFetchEnable ),
.iTriggerAABBIURequest( wGEO2_CU__RequestAABBIU ),
.iTriggerBIURequest( wGEO2_CU__RequestBIU ),
.iTriggertTCCRequest( wGEO2_CU__RequestTCC ),
.oUCodeEnable( wCU2__ExecuteMicroCode ),
.oCodeInstructioPointer( InitialCodeAddress ),
.iUCodeDone( wCU2__MicrocodeExecutionDone ),
.iIODone( wIO2__Done ),
.oIOWritePixel( wCU2_IO__WritePixel ),
.iUCodeReturnValue( wIFU2__MicroCodeReturnValue ),
.iGEOSync( wGEO2_CU__Sync ),
.iTFFDone( wGEO2_CU__TFFDone ),
.oTriggerTFF( wCU2_GEO__TriggerTFF ),
.MST_I( MST_I ),
.oSetCurrentPitch( wCU2_GEO__SetPitch ),
.iGFUDone( wGEO2_CU__GeometryUnitDone ),
.iRenderEnable( RENDREN_I ),
.iSceneTraverseComplete( STDONE_I ),
.oResultCommited( RCOMMIT_O ),
.iHostDataAvailable( HDA_I ),
 
`ifdef DEBUG
.iDebug_CoreID( iDebug_CoreID ),
`endif
.oDone( DONE_O )
);
 
//--------------------------------------------------------
 
//assign w2MEM_FlipMemory = (wCU2__ExecuteMicroCode | wCU2_FlipMem ) & wCU2_FlipMemEnabled;
assign w2MEM_FlipMemory = wCU2_FlipMem & wCU2_FlipMemEnabled;
MemoryUnit MEM
(
.Clock(Clock),
.Reset(Reset),
 
.iFlipMemory( w2MEM_FlipMemory ),
 
//Data Bus to/from EXE
.iDataReadAddress1_EXE( wEXE_2__MEM_DataReadAddress0 ),
.iDataReadAddress2_EXE( wEXE_2__MEM_DataReadAddress1 ),
.oData1_EXE( wMEM_2__EXE_DataRead0 ),
.oData2_EXE( wMEM_2__EXE_DataRead1 ),
.iDataWriteEnable_EXE( wEXE_2__DataWriteEnable ),
.iDataWriteAddress_EXE( wEXE_2__MEM_wDataWriteAddress ),
.iData_EXE( wEXE_2__MEM_WriteData ),
 
//Data Bus to/from IO
 
.iDataReadAddress1_IO( wIO_2_MEM__DataReadAddress0 ),
.iDataReadAddress2_IO( wIO_2_MEM__DataReadAddress1 ),
.oData1_IO( wMEM_2__IO_DataRead0 ),
.oData2_IO( wMEM_2__IO_DataRead1 ),
.iDataWriteEnable_IO( wIO2_MEM__DataWriteEnable ),
.iDataWriteAddress_IO( wIO2_MEM__DataWriteAddress ),
.iData_IO( wIO2_MEM__Bus ),
 
`ifdef DEBUG
.iDebug_CoreID( iDebug_CoreID ),
`endif
 
 
//Instruction Bus
.iInstructionReadAddress1( wInstructionPointer1 ),
.iInstructionReadAddress2( wInstructionPointer2 ),
.oInstruction1( wEncodedInstruction1 ),
.oInstruction2( wEncodedInstruction2 ),
.iInstructionWriteEnable( wIO2_MEM_InstructionWriteEnable ),
.iInstructionWriteAddress( wIO2_MEM__InstructionWriteAddr ),
.iInstruction( wIO2_MEM__ExternalInstruction ),
.iControlRegister( CREG_I ),
.oControlRegister( wCR2_ControlRegister )
 
);
 
////--------------------------------------------------------
 
ExecutionUnit EXE
(
 
.Clock( Clock),
.Reset( Reset ),
.iInitialCodeAddress( InitialCodeAddress ),
.iInstruction1( wEncodedInstruction1 ),
.iInstruction2( wEncodedInstruction2 ),
.oInstructionPointer1( wInstructionPointer1 ),
.oInstructionPointer2( wInstructionPointer2 ),
.iDataRead0( wMEM_2__EXE_DataRead0 ),
.iDataRead1( wMEM_2__EXE_DataRead1 ),
.iTrigger( wCU2__ExecuteMicroCode ),
.oDataReadAddress0( wEXE_2__MEM_DataReadAddress0 ),
.oDataReadAddress1( wEXE_2__MEM_DataReadAddress1 ),
.oDataWriteEnable( wEXE_2__DataWriteEnable ),
.oDataWriteAddress( wEXE_2__MEM_wDataWriteAddress ),
.oDataBus( wEXE_2__MEM_WriteData ),
.oReturnCode( wIFU2__MicroCodeReturnValue ),
/**************/
.oOMEMWriteAddress( wEXE_2__IO_WriteAddress ),
.oOMEMWriteData( wEXE_2__IO_WriteData ),
.oOMEMWriteEnable( wEXE_2__IO_OMEMWriteEnable ),
 
.oTMEMReadAddress( wEXE_2__IO_TMEMAddress ),
.iTMEMReadData( wIO_2_EXE__TMEMData ),
.iTMEMDataAvailable( wIO_2_EXE__DataAvailable ),
.oTMEMDataRequest( wEXE_2_IO__DataRequest ),
/**************/
`ifdef DEBUG
.iDebug_CoreID( iDebug_CoreID ),
`endif
.oDone( wCU2__MicrocodeExecutionDone )
 
);
 
////--------------------------------------------------------
wire wGEO2__RequestingTextures;
wire w2IO_WriteBack_Set;
 
assign TGA_O = (wGEO2__RequestingTextures) ? 2'b1: 2'b0;
//---------------------------------------------------------------------------------------------------
wire[`DATA_ADDRESS_WIDTH-1:0] wIO_2_MEM__DataReadAddress1;
assign wEXE_2__MEM_DataReadAddress1 = (wCU2_IO__WritePixel == 0) ? wUCODE_RAMReadAddress1 : wIO_2_MEM__DataReadAddress1;
assign w2IO__EnableWBMaster = (wCU2_IO__WritePixel == 0 ) ? wGEO2_IO__EnableWBMaster : wCU2_IO__WritePixel;
assign w2IO__AddrIsImm = 0;//(wCU2_IO__WritePixel == 0 ) ? wGEO2_IO__AddrIsImm : 1'b0;
assign w2IO__AddressOffset = 0;//(wCU2_IO__WritePixel == 0 ) ? wGEO2_IO__AddressOffset : 32'b0;
assign w2IO__Adr_O_Pointer = (wCU2_IO__WritePixel == 0 ) ? wGEO2_IO__Adr_O_Pointer : `OREG_ADDR_O;
//assign w2IO__Adr_O_Pointer = (wCU2_IO__WritePixel == 0 ) ? wGEO2_IO__Adr_O_Pointer : `CREG_PIXEL_2D_INITIAL_POSITION;
 
wire w2IO_MasterCycleType;
assign w2IO_MasterCycleType = (wCU2_IO__WritePixel) ? `WB_SIMPLE_WRITE_CYCLE : `WB_SIMPLE_READ_CYCLE;
 
 
 
assign w2IO__SetAddress = (wCU2_IO__WritePixel == 0 )? wGEO2_IO__SetAddress : wCU2_GEO__SetPitch;
 
 
IO_Unit IO
(
.Clock( Clock ),
.Reset( Reset ),
.iEnable( 0 ),// w2IO__EnableWBMaster ),
.iBusCyc_Type( w2IO_MasterCycleType ),
.iStore( 1),//w2IO__Store ),
.iAdr_DataWriteBack( w2IO__DataWriteAddress ),
.iAdr_O_Set( w2IO__SetAddress ),
.iAdr_O_Imm( w2IO__AddressOffset ),
.iAdr_O_Type( w2IO__AddrIsImm ),
.iAdr_O_Pointer( w2IO__Adr_O_Pointer ),
.iReadDataBus( wMEM_2__IO_DataRead0 ),
.iReadDataBus2( wMEM_2__IO_DataRead1 ),
.iDat_O_Pointer( `OREG_PIXEL_COLOR ),
.oDataReadAddress( wIO_2_MEM__DataReadAddress0 ),
.oDataReadAddress2( wIO_2_MEM__DataReadAddress1 ),
.oDataWriteAddress( wIO2_MEM__DataWriteAddress ),
.oDataBus( wIO2_MEM__Bus ),
.oInstructionBus( wIO2_MEM__ExternalInstruction ),
.oDataWriteEnable( wIO2_MEM__DataWriteEnable ),
.oData( wIO2_MEM__Data ),
.oInstructionWriteEnable( wIO2_MEM_InstructionWriteEnable ),
.oInstructionWriteAddress( wIO2_MEM__InstructionWriteAddr ),
.iWriteBack_Set( w2IO_WriteBack_Set ),
.oBusy( wIO_Busy ),
.oDone( wIO2__Done ),
/**********/
.iOMEM_WriteAddress( wEXE_2__IO_WriteAddress ),
.iOMEM_WriteData( wEXE_2__IO_WriteData ),
.iOMEM_WriteEnable( wEXE_2__IO_OMEMWriteEnable ),
.OMEM_DAT_O( OMEM_DAT_O ),
.OMEM_ADR_O( OMEM_ADR_O ),
.OMEM_WE_O( OMEM_WE_O ),
.oTMEMReadData( wIO_2_EXE__TMEMData ),
.iTMEMDataRequest( wEXE_2_IO__DataRequest ),
.iTMEMReadAddress( wEXE_2__IO_TMEMAddress ),
.oTMEMDataAvailable( wIO_2_EXE__DataAvailable ),
 
.TMEM_ACK_I( TMEM_ACK_I ),
.TMEM_DAT_I( TMEM_DAT_I ),
.TMEM_ADR_O( TMEM_ADR_O ),
.TMEM_WE_O( TMEM_WE_O ),
.TMEM_STB_O( TMEM_STB_O ),
.TMEM_CYC_O( TMEM_CYC_O ),
.TMEM_GNT_I( TMEM_GNT_I ),
 
/**********/
.MST_I( MST_I ),
//Wish Bone Interface
.DAT_I( DAT_I ),
.DAT_O( DAT_O ),
.ACK_I( ACK_I & GNT_I ),
.ACK_O( ACK_O ),
.ADR_O( ADR_O ),
.ADR_I( ADR_I ),
.WE_O( WE_O ),
.WE_I( WE_I ),
.STB_O( STB_O ),
.STB_I( STB_I ),
.CYC_O( CYC_O ),
.TGA_I( TGA_I ),
.CYC_I( CYC_I ),
.GNT_I( GNT_I ),
.TGC_O( TGC_O )
 
 
);
//---------------------------------------------------------------------------------------------------
endmodule
/CONTROL/Unit_Control.v
0,0 → 1,1211
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
/**********************************************************************************
Description:
 
This is the main Finite State Machine.
 
**********************************************************************************/
 
`timescale 1ns / 1ps
`include "aDefinitions.v"
 
`define CU_AFTER_RESET_STATE 0
`define CU_WAIT_FOR_INITIAL_CONFIGURATION 1
`define CU_TRIGGER_CONFIGURATION_DATA_READ 2
`define CU_WAIT_FOR_CONFIG_DATA_READ 3
`define CU_ACK_CONFIG_DATA_READ 4
`define CU_PRECALCULATE_CONSTANTS 5
`define CU_WAIT_FOR_CONSTANT 6
`define CU_ACK_PRECALCULATE_CONSTANTS 7
`define CU_WAIT_FOR_TASK 8
`define CU_READ_TASK_DATA 9
`define CU_WAIT_TASK_DATA_READ 10
`define CU_ACK_TASK_DATA_READ 11
`define CU_TRIGGER_RGU 12
`define CU_WAIT_FOR_RGU 13
`define CU_ACK_RGU 14
`define CU_TRIGGER_GEO 15
`define CU_WAIT_FOR_GEO_SYNC 16
//`define CU_CHECK_AABBIU_REQUEST 17
`define CU_TRIGGER_TCC 17
//`define CU_CHECK_BIU_REQUEST 18
//`define CU_TRIGGER_TFF 18
//`define CU_CHECK_GEO_DONE 19
//`define CU_WAIT_FOR_TFF 19
`define CU_TRIGGER_AABBIU 20
`define CU_WAIT_FOR_AABBIU 21
`define CU_TRIGGER_MAIN 22
`define CU_WAIT_FOR_MAIN 23
`define CU_ACK_MAIN 24
`define CU_TRIGGER_PSU 25
`define CU_WAIT_FOR_PSU 26
`define CU_ACK_PSU 27
//`define CU_TRIGGER_PCU 28
`define CU_WAIT_FOR_PCU 29
`define CU_ACK_PCU 30
`define CU_CHECK_HIT 31
`define CU_CLEAR_REGISTERS 32
`define CU_WAIT_CLEAR_REGISTERS 33
`define CU_ACK_CLEAR_REGISTERS 34
`define CU_TRIGGER_PSU_WITH_TEXTURE 35
`define WAIT_FOR_TCC 36
`define CU_TRIGGER_NPU 37
`define CU_WAIT_NPU 38
`define CU_ACK_NPU 39
`define CU_PERFORM_INTIAL_CONFIGURATION 40
`define CU_SET_PICTH 41
`define CU_TRIGGER_USERCONSTANTS 42
`define CU_WAIT_USERCONSTANTS 43
`define CU_ACK_USERCONSTANTS 44
`define CU_TRIGGER_USERPIXELSHADER 45
`define CU_WAIT_FOR_USERPIXELSHADER 46
`define CU_ACK_USERPIXELSHADER 47
`define CU_DONE 48
`define CU_WAIT_FOR_RENDER_ENABLE 49
`define CU_ACK_TCC 50
`define CU_WAIT_FOR_HOST_DATA_AVAILABLE 51
//--------------------------------------------------------------
module ControlUnit
(
 
input wire Clock,
input wire Reset,
input wire[15:0] iControlRegister,
output reg oGFUEnable,
input wire iTriggerAABBIURequest,
input wire iTriggerBIURequest,
input wire iTriggertTCCRequest,
output reg oUCodeEnable,
output reg[`ROM_ADDRESS_WIDTH-1:0] oCodeInstructioPointer,
input wire iUCodeDone,
input wire iUCodeReturnValue,
input wire iGFUDone,
input wire iGEOSync,
output reg oTriggerTFF,
input wire iTFFDone,
input wire MST_I,
//output reg[2:0] //oRamBusOwner,
input wire iIODone,
output reg oSetCurrentPitch,
output reg oFlipMemEnabled,
output reg oFlipMem,
output reg oIOWritePixel,
input wire iRenderEnable,
input wire iSceneTraverseComplete,
input wire iHostDataAvailable,
 
`ifdef DEBUG
input wire[`MAX_CORES-1:0] iDebug_CoreID,
`endif
 
output reg oResultCommited,
output reg oDone
);
 
//Internal State Machine varibles
reg [5:0] CurrentState;
reg [5:0] NextState;
integer ucode_file;
reg rResetHitFlop,rHitFlopEnable;
wire wHit;
 
`ifdef DUMP_CODE
integer log;
initial
begin
//$display("Opening ucode dump file....\n");
ucode_file = $fopen("CU.log","w");
end
 
`endif
//--------------------------------------------------------------
FFToggleOnce_1Bit FFTO1
(
.Clock( Clock ),
.Reset( rResetHitFlop ),
.Enable( rHitFlopEnable && iUCodeDone ),
.S( iUCodeReturnValue ),
.Q( wHit )
);
//--------------------------------------------------------------
 
`ifdef DEBUG_CU
always @ ( wHit )
begin
`LOGME "*** Triangle HIT ***\n");
end
`endif
 
//Next states logic and Reset sequence
always @(posedge Clock or posedge Reset)
begin
if (Reset)
CurrentState <= `CU_AFTER_RESET_STATE;
else
CurrentState <= NextState;
end
 
//--------------------------------------------------------------
always @ ( * )
begin
case (CurrentState)
//-----------------------------------------
`CU_AFTER_RESET_STATE:
begin
`ifdef DEBUG_CU
`LOGME"%d CU_AFTER_RESET_STATE\n",$time);
`endif
//oRamBusOwner <= 0;
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_INITIAL;
oGFUEnable <= 0;
oUCodeEnable <= 0;
oIOWritePixel <= 0;
rResetHitFlop <= 1;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 1;
oFlipMemEnabled <= 0;
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 0;
//oIncCurrentPitch <= 0;
NextState <= `CU_WAIT_FOR_INITIAL_CONFIGURATION;
end
//-----------------------------------------
`CU_WAIT_FOR_INITIAL_CONFIGURATION:
begin
//$display("CORE: %d CU_WAIT_FOR_INITIAL_CONFIGURATION", iDebug_CoreID);
// `ifdef DEBUG_CU
// `LOGME"%d Control: CU_WAIT_FOR_INITIAL_CONFIGURATION\n",$time);
// `endif
//oRamBusOwner <= 0;
oCodeInstructioPointer <= 0;
oGFUEnable <= 0;
oUCodeEnable <= 0;
oIOWritePixel <= 0;
rResetHitFlop <= 1;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 0;
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 0;
//oIncCurrentPitch <= 0;
if ( MST_I )
NextState <= `CU_PERFORM_INTIAL_CONFIGURATION;//`CU_WAIT_FOR_CONFIG_DATA_READ;
else
NextState <= `CU_WAIT_FOR_INITIAL_CONFIGURATION;
end
//-----------------------------------------
`CU_PERFORM_INTIAL_CONFIGURATION:
begin
//$display("CORE: %d CU_PERFORM_INTIAL_CONFIGURATION", iDebug_CoreID);
//oRamBusOwner <= 0;
oCodeInstructioPointer <= 0;
oGFUEnable <= 0;
oUCodeEnable <= 0;
oIOWritePixel <= 0;
rResetHitFlop <= 1;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 0;
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 0;
//oIncCurrentPitch <= 0;
if ( MST_I == 0 && iRenderEnable == 1'b1)
NextState <= `CU_CLEAR_REGISTERS;//`CU_WAIT_FOR_CONFIG_DATA_READ;
else
NextState <= `CU_PERFORM_INTIAL_CONFIGURATION;
end
//-----------------------------------------
`CU_CLEAR_REGISTERS:
begin
//$display("CORE: %d CU_CLEAR_REGISTERS", iDebug_CoreID);
`ifdef DEBUG_CU
`LOGME"%d CU_CLEAR_REGISTERS\n",$time);
`endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_INITIAL;
oGFUEnable <= 0;
oUCodeEnable <= 1; //*
oIOWritePixel <= 0;
rResetHitFlop <= 0;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 1;
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 0;
////$display("\n\n %d XOXOXOXOX FLIP XOXOXOXOXOX\n\n",$time);
//oIncCurrentPitch <= 0;
NextState <= `CU_WAIT_CLEAR_REGISTERS;
end
//-----------------------------------------
`CU_WAIT_CLEAR_REGISTERS:
begin
// `ifdef DEBUG_CU
// `LOGME"%d CU_WAIT_CLEAR_REGISTERS\n",$time);
// `endif
//$display("CORE: %d CU_WAIT_CLEAR_REGISTERS", iDebug_CoreID);
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_INITIAL;
oGFUEnable <= 0;
oUCodeEnable <= 0;
oIOWritePixel <= 0;
rResetHitFlop <= 0;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 1;
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 0;
//oIncCurrentPitch <= 0;
if ( iUCodeDone )
NextState <= `CU_ACK_CLEAR_REGISTERS;
else
NextState <= `CU_WAIT_CLEAR_REGISTERS;
end
//-----------------------------------------
`CU_ACK_CLEAR_REGISTERS:
begin
`ifdef DEBUG_CU
`LOGME"%d CU_ACK_CLEAR_REGISTERS\n", $time);
`endif
//$display("CORE: %d CU_ACK_CLEAR_REGISTERS", iDebug_CoreID);
//oRamBusOwner <= 0;
oCodeInstructioPointer <= 0;
oGFUEnable <= 0;
oUCodeEnable <= 0; //*
oIOWritePixel <= 0;
rResetHitFlop <= 0;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 0;
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 0;
//oIncCurrentPitch <= 0;
NextState <= `CU_WAIT_FOR_CONFIG_DATA_READ;
end
//-----------------------------------------
`CU_WAIT_FOR_CONFIG_DATA_READ:
begin
 
// `ifdef DEBUG_CU
// `LOGME"%d Control: CU_WAIT_FOR_CONFIG_DATA_READ\n",$time);
// `endif
 
 
//$display("CORE: %d CU_WAIT_FOR_CONFIG_DATA_READ", iDebug_CoreID);
 
//oRamBusOwner <= 0;//`REG_BUS_OWNED_BY_BCU;
oCodeInstructioPointer <= 0;
oGFUEnable <= 0;
oUCodeEnable <= 0;
oIOWritePixel <= 0;
rResetHitFlop <= 0;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 0;
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 0;
//oIncCurrentPitch <= 0;
if ( MST_I == 0 )
NextState <= `CU_PRECALCULATE_CONSTANTS;
else
NextState <= `CU_WAIT_FOR_CONFIG_DATA_READ;
end
//-----------------------------------------
`CU_PRECALCULATE_CONSTANTS:
begin
//$display("CORE: %d CU_PRECALCULATE_CONSTANTS", iDebug_CoreID);
`ifdef DEBUG_CU
`LOGME"%d Control: CU_PRECALCULATE_CONSTANTS\n", $time);
`endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_CPPU;
oGFUEnable <= 0;
oUCodeEnable <= 1; //*
oIOWritePixel <= 0;
rResetHitFlop <= 0;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 0;
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 0;
//oIncCurrentPitch <= 0;
NextState <= `CU_WAIT_FOR_CONSTANT;
end
//-----------------------------------------
`CU_WAIT_FOR_CONSTANT:
begin
// `ifdef DEBUG_CU
// `LOGME"%d Control: CU_WAIT_FOR_CONSTANT\n", $time);
// `endif
 
 
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_CPPU;
oGFUEnable <= 0;
oUCodeEnable <= 0; //*
oIOWritePixel <= 0;
rResetHitFlop <= 0;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 0;
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 0;
//oIncCurrentPitch <= 0;
if ( iUCodeDone )
NextState <= `CU_ACK_PRECALCULATE_CONSTANTS;
else
NextState <= `CU_WAIT_FOR_CONSTANT;
end
//-----------------------------------------
`CU_ACK_PRECALCULATE_CONSTANTS:
begin
//$display("CORE: %d CU_ACK_PRECALCULATE_CONSTANTS", iDebug_CoreID);
`ifdef DEBUG_CU
`LOGME"%d Control: CU_ACK_PRECALCULATE_CONSTANTS\n", $time);
`endif
//oRamBusOwner <= 0;//`REG_BUS_OWNED_BY_BCU;
oCodeInstructioPointer <= 0;
oGFUEnable <= 0;
oUCodeEnable <= 0; //*
oIOWritePixel <= 0;
rResetHitFlop <= 0;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 0;
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 0;
//oIncCurrentPitch <= 0;
NextState <= `CU_TRIGGER_USERCONSTANTS;//CU_WAIT_FOR_TASK;
end
//-----------------------------------------
 
`CU_TRIGGER_USERCONSTANTS:
begin
`ifdef DEBUG_CU
`LOGME"%d Control: CU_TRIGGER_USERCONSTANTS\n",$time);
`endif
//$display("CORE: %d CU_TRIGGER_USERCONSTANTS", iDebug_CoreID);
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_USERCONSTANTS;
oGFUEnable <= 0;
oUCodeEnable <= 1; //*
oIOWritePixel <= 0;
rResetHitFlop <= 0;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 0;
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 0;
//oIncCurrentPitch <= 0;
NextState <= `CU_WAIT_USERCONSTANTS;
end
//-----------------------------------------
`CU_WAIT_USERCONSTANTS:
begin
 
// `ifdef DEBUG_CU
// `LOGME"%d Control: CU_WAIT_FOR_RGU\n",$time);
// `endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_USERCONSTANTS;
oGFUEnable <= 0;
oUCodeEnable <= 0;
oIOWritePixel <= 0;
rResetHitFlop <= 0;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 0;
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 0;
//oIncCurrentPitch <= 0;
if ( iUCodeDone )
NextState <= `CU_ACK_USERCONSTANTS;
else
NextState <= `CU_WAIT_USERCONSTANTS;
end
//-----------------------------------------
`CU_ACK_USERCONSTANTS:
begin
`ifdef DEBUG_CU
`LOGME"%d Control: CU_ACK_RGU\n",$time);
`endif
//$display("CORE: %d CU_ACK_USERCONSTANTS", iDebug_CoreID);
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer <= 0;
oGFUEnable <= 0;
oUCodeEnable <= 0; //*
oIOWritePixel <= 0;
rResetHitFlop <= 0;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 0;
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 0;
//oIncCurrentPitch <= 0;
if ( iUCodeDone == 0)
NextState <= `CU_WAIT_FOR_RENDER_ENABLE;
else
NextState <= `CU_ACK_USERCONSTANTS;
end
//-----------------------------------------
`CU_WAIT_FOR_RENDER_ENABLE:
begin
`ifdef DEBUG_CU
$display("CORE: %d CU_WAIT_FOR_RENDER_ENABLE", iDebug_CoreID);
`endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer <= 0;
oGFUEnable <= 0;
oUCodeEnable <= 0; //*
oIOWritePixel <= 0;
rResetHitFlop <= 0;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 0;
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 0;
//oIncCurrentPitch <= 0;
if ( iRenderEnable)
NextState <= `CU_TRIGGER_RGU;
else
NextState <= `CU_WAIT_FOR_RENDER_ENABLE;
end
//-----------------------------------------
`CU_TRIGGER_RGU:
begin
`ifdef DEBUG_CU
`LOGME"CORE: %d CU_TRIGGER_RGU", iDebug_CoreID);
`endif
 
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_RGU;
oGFUEnable <= 0;
oUCodeEnable <= 1; //*
oIOWritePixel <= 0;
rResetHitFlop <= 0;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 0;
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 0;
//oIncCurrentPitch <= 0;
NextState <= `CU_WAIT_FOR_RGU;
end
//-----------------------------------------
`CU_WAIT_FOR_RGU:
begin
 
// `ifdef DEBUG_CU
// `LOGME"%d Control: CU_WAIT_FOR_RGU\n",$time);
// `endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer <= 0;
oGFUEnable <= 0;
oUCodeEnable <= 0;
oIOWritePixel <= 0;
rResetHitFlop <= 0;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 0;
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 0;
//oIncCurrentPitch <= 0;
if ( iUCodeDone )
NextState <= `CU_ACK_RGU;
else
NextState <= `CU_WAIT_FOR_RGU;
end
//-----------------------------------------
`CU_ACK_RGU:
begin
 
`ifdef DEBUG_CU
`LOGME"CORE: %d CU_ACK_RGU", iDebug_CoreID);
`endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer <= 0;
oGFUEnable <= 0;
oUCodeEnable <= 0; //*
oIOWritePixel <= 0;
rResetHitFlop <= 0;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 1;
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 0;
//oIncCurrentPitch <= 0;
if ( iUCodeDone == 0 & iRenderEnable == 1)
NextState <= `CU_WAIT_FOR_HOST_DATA_AVAILABLE;//`CU_TRIGGER_GEO;///////////// GET RID OF GEO!!!
else
NextState <= `CU_ACK_RGU;
end
//-----------------------------------------
`CU_TRIGGER_TCC:
begin
////$display("CU_TRIGGER_TCC");
`ifdef DEBUG_CU
`LOGME"%d CORE %d Control: CU_TRIGGER_TCC\n",$time,iDebug_CoreID);
`endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_TCC;
oUCodeEnable <= 1; //*
oGFUEnable <= 0;
oIOWritePixel <= 0;
rResetHitFlop <= 0;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 1;
oFlipMem <= 0; //We need u,v from last IO read cycle
oResultCommited <= 0;
////$display("\n\n %d XOXOXOXOX FLIP XOXOXOXOXOX\n\n",$time);
//oIncCurrentPitch <= 0;
oDone <= 0;
NextState <= `WAIT_FOR_TCC;
end
//-----------------------------------------
`WAIT_FOR_TCC:
begin
////$display("WAIT_FOR_TCC");
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_TCC;
oUCodeEnable <= 0; //*
oGFUEnable <= 0;
oIOWritePixel <= 0;
rResetHitFlop <= 0;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 1;
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 0;
//oIncCurrentPitch <= 0;
if ( iUCodeDone )
NextState <= `CU_ACK_TCC;
else
NextState <= `WAIT_FOR_TCC;
end
//-----------------------------------------
`CU_ACK_TCC:
begin
////$display("WAIT_FOR_TCC");
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_TCC;
oUCodeEnable <= 0; //*
oGFUEnable <= 0;
oIOWritePixel <= 0;
rResetHitFlop <= 0;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 0;
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 0;
//oIncCurrentPitch <= 0;
if ( iUCodeDone == 0 && iSceneTraverseComplete == 1'b1) //DDDD
NextState <= `CU_TRIGGER_PSU_WITH_TEXTURE;
else if (iUCodeDone == 0 && iSceneTraverseComplete == 1'b0)
NextState <= `CU_WAIT_FOR_HOST_DATA_AVAILABLE;
else
NextState <= `CU_ACK_TCC;
end
//-----------------------------------------
/*
Was there any hit at all?
At this point, all the triangles in the list
have been traversed looking for a hit with our ray.
There are 3 possibilities:
1) The was not a single hit, then just paint a black
pixel on the screen and send it via PCU.
2)There was a hit and Texturing is not enabled, then trigger the PSU with
no texturing
2) There was a hit and Texturing is enabled, then fetch the texture
values corresponding to the triangle that we hitted.
*/
`CU_CHECK_HIT:
begin
`ifdef DEBUG_CU
`LOGME"%d CORE %d Control: CU_CHECK_HIT\n",$time,iDebug_CoreID);
`endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_GFU;
oCodeInstructioPointer <= 0;
oUCodeEnable <= 0;
oGFUEnable <= 0; ///CHANGED Aug 15
oIOWritePixel <= 0;
rResetHitFlop <= 0;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 0;
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 0;
if (wHit)
begin
//$display("HIT");
NextState <= `CU_TRIGGER_PSU_WITH_TEXTURE;
end
else
NextState <= `CU_TRIGGER_USERPIXELSHADER;//666
end
//-----------------------------------------
`CU_TRIGGER_PSU_WITH_TEXTURE:
begin
`ifdef DEBUG_CU
`LOGME"%d Control: CU_TRIGGER_PSU_WITH_TEXTURE\n",$time);
`endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_PSU2;
oUCodeEnable <= 1;
oGFUEnable <= 0;
oIOWritePixel <= 0;
rResetHitFlop <= 1;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 0;//////NEW NEW NEW NEW
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 0;
////$display("\n\n %d XOXOXOXOX FLIP XOXOXOXOXOX\n\n",$time);
//oIncCurrentPitch <= 0;
NextState <= `CU_WAIT_FOR_PSU;
end
//-----------------------------------------
//Wait until data from Host becomes available
`CU_WAIT_FOR_HOST_DATA_AVAILABLE:
begin
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer <= 0;
oUCodeEnable <= 0;
oGFUEnable <= 0;
oIOWritePixel <= 0;
rResetHitFlop <= 0;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 0;
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 0;
if ( iHostDataAvailable )
NextState <= `CU_TRIGGER_MAIN;
else
NextState <= `CU_WAIT_FOR_HOST_DATA_AVAILABLE;
end
//-----------------------------------------
`CU_TRIGGER_MAIN:
begin
`ifdef DEBUG_CU
`LOGME"%d CORE: %d Control: CU_TRIGGER_MAIN\n",$time,iDebug_CoreID);
`endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_MAIN;
oUCodeEnable <= 1;
oGFUEnable <= 1;
oIOWritePixel <= 0;
rResetHitFlop <= 0;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 1;
oFlipMem <= 1;
oDone <= 0;
oResultCommited <= 0;
////$display("\n\n %d XOXOXOXOX FLIP XOXOXOXOXOX\n\n",$time);
//oIncCurrentPitch <= 0;
// $stop();
NextState <= `CU_WAIT_FOR_MAIN;
end
//-----------------------------------------
`CU_WAIT_FOR_MAIN:
begin
// `ifdef DEBUG_CU
// `LOGME"%d Control: CU_WAIT_FOR_MAIN\n",$time);
// `endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_MAIN;
oUCodeEnable <= 0;
oGFUEnable <= 1;
oIOWritePixel <= 0;
rResetHitFlop <= 0;
rHitFlopEnable <= 1;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 1;
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 0;
//oIncCurrentPitch <= 0;
//NextState <= `CU_WAIT_FOR_MAIN;
if ( iUCodeDone )
NextState <= `CU_ACK_MAIN;
else
NextState <= `CU_WAIT_FOR_MAIN;
end
//-----------------------------------------
/*
ACK UCODE by setting oUCodeEnable = 0
*/
`CU_ACK_MAIN:
begin
`ifdef DEBUG_CU
`LOGME"%d CORE: %d Control: CU_ACK_MAIN\n",$time, iDebug_CoreID);
`endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_GFU;
oCodeInstructioPointer <= 0; //*
oUCodeEnable <= 0; //*
oGFUEnable <= 0; //Changed Aug 15
oIOWritePixel <= 0;
rResetHitFlop <= 0;
rHitFlopEnable <= 1;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 0;
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 0;
//oIncCurrentPitch <= 0;
// $stop();
if ( iUCodeDone == 1'b0 & iSceneTraverseComplete == 1'b1)
NextState <= `CU_CHECK_HIT;
else if ( iUCodeDone == 1'b0 & iSceneTraverseComplete == 1'b0) //ERROR!!! What if iSceneTraverseComplete will become 1 a cycle after this??
NextState <= `CU_TRIGGER_MAIN;
else
NextState <= `CU_ACK_MAIN;
end
//-----------------------------------------
`CU_WAIT_FOR_PSU:
begin
// `ifdef DEBUG_CU
// `LOGME"%d Control: CU_TRIGGER_PSU\n",$time);
// `endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_PSU;
oUCodeEnable <= 0;
oGFUEnable <= 0;
oIOWritePixel <= 0;
rResetHitFlop <= 0;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 1;
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 0;
//oIncCurrentPitch <= 0;
if ( iUCodeDone )
NextState <= `CU_ACK_PSU;
else
NextState <= `CU_WAIT_FOR_PSU;
end
//-----------------------------------------
`CU_ACK_PSU:
begin
`ifdef DEBUG_CU
`LOGME"%d CORE: %d Control: CU_ACK_PSU\n",$time, iDebug_CoreID);
`endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer <= 0; //*
oUCodeEnable <= 0; //*
oGFUEnable <= 0;
oIOWritePixel <= 0;
rResetHitFlop <= 0;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 1;
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 0;
//oIncCurrentPitch <= 0;
if ( iUCodeDone == 0)
NextState <= `CU_TRIGGER_USERPIXELSHADER;
else
NextState <= `CU_ACK_PSU;
end
//-----------------------------------------
//-----------------------------------------
`CU_TRIGGER_NPU: //Next Pixel Unit
begin
`ifdef DEBUG_CU
`LOGME"%d CORE: %d Control: CU_TRIGGER_NPU\n",$time, iDebug_CoreID);
`endif
$write("*");
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_NPG; //*
oUCodeEnable <= 1; //*
oGFUEnable <= 0;
oIOWritePixel <= 0;
rResetHitFlop <= 0;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 1;
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 0;
//oIncCurrentPitch <= 0;
 
NextState <= `CU_WAIT_NPU;
end
//-----------------------------------------
`CU_WAIT_NPU:
begin
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_NPG;
oUCodeEnable <= 0;
oGFUEnable <= 0;
oIOWritePixel <= 0;
rResetHitFlop <= 0;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 1;
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 0;
//oIncCurrentPitch <= 0;
if ( iUCodeDone )
NextState <= `CU_ACK_NPU;
else
NextState <= `CU_WAIT_NPU;
end
//-----------------------------------------
/*
Next Pixel generation: here we either goto
to RGU for the next pixel, or we have no
more pixels so we are done we our picture!
*/
`CU_ACK_NPU:
begin
`ifdef DEBUG_CU
`LOGME"%d CORE: %d Control: CU_ACK_NPU\n",$time, iDebug_CoreID);
`endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer <= 0; //*
oUCodeEnable <= 0; //*
oGFUEnable <= 0;
oIOWritePixel <= 0;
rResetHitFlop <= 0;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 1;
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 0;
//oIncCurrentPitch <= 0;
if ( iUCodeDone == 0 && iUCodeReturnValue == 1)
NextState <= `CU_TRIGGER_RGU;
else if (iUCodeDone == 0 && iUCodeReturnValue == 0)
NextState <= `CU_DONE;
else
NextState <= `CU_ACK_NPU;
end
//-----------------------------------------
`CU_DONE:
begin
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer <= 0;
oUCodeEnable <= 0;
oGFUEnable <= 0;
oIOWritePixel <= 0;
rResetHitFlop <= 0;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 0;
oFlipMem <= 1;
oDone <= 1;
oResultCommited <= 0;
//oIncCurrentPitch <= 0;
NextState <= `CU_DONE;
end
//-----------------------------------------
/*
Here we no longer use GFU so set Enable to zero
*/
`CU_TRIGGER_USERPIXELSHADER:
begin
`ifdef DEBUG_CU
`LOGME"%d Control: CU_TRIGGER_PSU\n",$time);
`endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_PIXELSHADER;
oUCodeEnable <= 1;
oGFUEnable <= 0;//*
oIOWritePixel <= 0;
rResetHitFlop <= 0;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 1;
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 0;
//oIncCurrentPitch <= 0;
NextState <= `CU_WAIT_FOR_USERPIXELSHADER;
end
//-----------------------------------------
`CU_WAIT_FOR_USERPIXELSHADER:
begin
// `ifdef DEBUG_CU
// `LOGME"%d Control: CU_TRIGGER_PSU\n",$time);
// `endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_PIXELSHADER;
oUCodeEnable <= 0;
oGFUEnable <= 0;
oIOWritePixel <= 0;
rResetHitFlop <= 0;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 1;
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 0;
//oIncCurrentPitch <= 0;
if ( iUCodeDone )
NextState <= `CU_ACK_USERPIXELSHADER;
else
NextState <= `CU_WAIT_FOR_USERPIXELSHADER;
end
//-----------------------------------------
`CU_ACK_USERPIXELSHADER:
begin
`ifdef DEBUG_CU
`LOGME"%d Control: CU_ACK_PSU\n",$time);
`endif
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer <= 0; //*
oUCodeEnable <= 0; //*
oGFUEnable <= 0;
oIOWritePixel <= 0;
rResetHitFlop <= 0;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 1;
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 1;
//oIncCurrentPitch <= 0;
if ( iUCodeDone == 0)
NextState <= `CU_TRIGGER_NPU;//`CU_TRIGGER_PCU;
else
NextState <= `CU_ACK_USERPIXELSHADER;
end
//---------------------------------------------------
default:
begin
`ifdef DEBUG_CU
`LOGME"%d Control: ERROR Undefined State\n",$time);
`endif
//oRamBusOwner <= 0;
oCodeInstructioPointer <= 0;
oUCodeEnable <= 0;
oGFUEnable <= 0;
oIOWritePixel <= 0;
rResetHitFlop <= 0;
rHitFlopEnable <= 0;
oTriggerTFF <= 0;
oSetCurrentPitch <= 0;
oFlipMemEnabled <= 0;
oFlipMem <= 0;
oDone <= 0;
oResultCommited <= 0;
//oIncCurrentPitch <= 0;
NextState <= `CU_AFTER_RESET_STATE;
end
//-----------------------------------------
 
endcase
end //always
endmodule
/MEM/Module_ROM.v
0,0 → 1,703
 
 
`define ONE (32'h1 << `SCALE)
 
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
/*
I can't synthesize roms, the rom needs to be adapted depending on the
final target silicon.
*/
 
 
//--------------------------------------------------------
module ROM
(
input wire[`ROM_ADDRESS_WIDTH-1:0] Address,
`ifdef DEBUG
input wire [`MAX_CORES-1:0] iDebug_CoreID,
`endif
output reg [`INSTRUCTION_WIDTH-1:0] I
);
 
 
always @( Address )
begin
case (Address)
//Hardcoded stuff :(
`define RAY_INSIDE_BOX `R3
`define CURRENT_LIGHT_POS `CREG_FIRST_LIGTH //TODO: CAHNEG T
`define CURRENT_LIGHT_DIFFUSE 16'h6
 
//-----------------------------------------------------------------
`define TAG_PIXELSHADER 16'd278
`define TAG_USERCONSTANTS 16'd276
`define TAG_PSU_UCODE_ADRESS2 16'd248
`define TAG_PSU_UCODE_ADRESS 16'd232
`define LABEL_TCC_EXIT 16'd231
`define TAG_TCC_UCODE_ADDRESS 16'd190
`define LABEL_BIU4 16'd189
`define LABEL_BIU3 16'd179
`define LABEL_BIU2 16'd176
`define LABEL_BIU1 16'd174
`define TAG_BIU_UCODE_ADDRESS 16'd157
`define LABEL_HIT 16'd155
`define LABEL15 16'd153
`define LABEL14 16'd151
`define LABEL13 16'd149
`define LABEL_TEST_XY_PLANE 16'd144
`define LABEL12 16'd142
`define LABEL11 16'd140
`define LABEL10 16'd138
`define LABEL_TEST_XZ_PLANE 16'd132
`define LABEL9 16'd130
`define LABEL8 16'd128
`define LABEL7 16'd126
`define LABEL_TEST_YZ_PLANE 16'd120
`define LABEL_RAY_INSIDE_BOX 16'd117
`define LABEL_ELSEZ 16'd116
`define LABEL6 16'd113
`define LABEL_ELESE_IFZ 16'd109
`define LABEL5 16'd106
`define LABEL_TEST_RAY_Z_ORIGEN 16'd102
`define LABEL_ELSEY 16'd101
`define LABEL4 16'd98
`define LABEL_ELESE_IFY 16'd94
`define LABEL3 16'd91
`define LABEL_TEST_RAY_Y_ORIGEN 16'd87
`define LABEL_ELSEX 16'd86
`define LABEL2 16'd83
`define LABEL_ELSE_IFX 16'd79
`define LABEL1 16'd76
`define LABEL_TEST_RAY_X_ORIGEN 16'd72
`define TAG_AABBIU_UCODE_ADDRESS 16'd69
`define LABEL_ALLDONE 16'd67
`define LABEL_NPG_NEXT_ROW 16'd63
`define TAG_NPG_UCODE_ADDRESS 16'd55
`define TAG_RGU_UCODE_ADDRESS 16'd47
`define TAG_CPPU_UCODE_ADDRESS 16'd44
`define LABEL_IS_NO_HIT 16'd43
`define LABEL_IS_HIT 16'd39
`define TAG_ADRR_MAIN 16'd37
 
 
//-------------------------------------------------------------------------
//Default values for some registers after reset
//-------------------------------------------------------------------------
//This is the first code that gets executed after the machine is
//externally configured ie after the MST_I goes from 1 to zero.
//It sets initial values for some of the internal registers
 
0: I = { `ZERO ,`CREG_LAST_t ,`VOID ,`VOID };
//Set the last 't' to very positive value(500)
1: I = { `SETX ,`CREG_LAST_t ,32'h1F40000 };
2: I = { `ZERO ,`OREG_PIXEL_COLOR ,`VOID ,`VOID };
3: I = { `COPY ,`CREG_PIXEL_2D_POSITION ,`CREG_PIXEL_2D_INITIAL_POSITION ,`VOID };
 
 
//Calculate the initial linear address for ADR_O
//this is: (X_initial + RESOLUTION_Y*Y_intial) * 3.
//Notice that we need to use 'unscaled' ie. integer
//values because the resuts of the multiplication by
//the resoluction is to large to fit a fixed point
//representation.
 
4: I = { `COPY ,`R1 ,`CREG_RESOLUTION ,`VOID };
5: I = { `UNSCALE ,`R1 ,`R1 ,`VOID };
6: I = { `SETX ,`R1 ,32'h1 };
7: I = { `SETZ ,`R1 ,32'h0 };
8: I = { `COPY ,`R2 ,`CREG_PIXEL_2D_INITIAL_POSITION ,`VOID };
9: I = { `UNSCALE ,`R2 ,`R2 ,`VOID };
 
//Ok lets start by calculating RESOLUTION_Y*Y_intial
10: I = { `IMUL ,`R1 ,`R1 ,`R2 };
11: I = { `COPY ,`R2 ,`R1 ,`VOID };
12: I = { `SWIZZLE3D ,`R2 ,`SWIZZLE_YYY };
 
//now X_initial + RESOLUTION_Y*Y_intial
13: I = { `ADD ,`R3 ,`R1 ,`R2 };
14: I = { `COPY ,`R2 ,`R1 ,`VOID };
15: I = { `SWIZZLE3D ,`R2 ,`SWIZZLE_ZZZ };
16: I = { `ADD ,`R3 ,`R3 ,`R2 };
17: I = { `SWIZZLE3D ,`R3 ,`SWIZZLE_XXX };
 
//finally multiply by 3 to get:
//(X_initial + RESOLUTION_Y*Y_intial) * 3 voila!
18: I = { `SETX ,`R2 ,32'h3 };
19: I = { `SWIZZLE3D ,`R2 ,`SWIZZLE_XXX };
20: I = { `IMUL ,`CREG_PIXEL_PITCH ,`R3 ,`R2 };
 
//By this point you should be wondering why not
//just do DOT R1 [1 Resolution_Y 0] [X_intial Y_intial 0 ]?
//well because DOT uses fixed point and the result may not
//fit :(
 
//Transform from fixed point to integer
//UNSCALE CREG_PIXEL_PITCH CREG_PIXEL_PITCH VOID
21: I = { `COPY ,`OREG_ADDR_O ,`CREG_PIXEL_PITCH ,`VOID };
 
22: I = { `SETX ,`CREG_3 ,32'h3 };
23: I = { `SWIZZLE3D ,`CREG_3 ,`SWIZZLE_XXX };
 
24: I = { `SETX ,`CREG_012 ,32'h0 };
25: I = { `SETY ,`CREG_012 ,32'h1 };
26: I = { `SETZ ,`CREG_012 ,32'h2 };
27: I = { `COPY ,`CREG_CURRENT_OUTPUT_PIXEL ,`CREG_012 ,`VOID };
28: I = { `ZERO ,`CREG_TEXTURE_COLOR ,`VOID ,`VOID };
29: I = { `ZERO ,`CREG_ZERO ,`VOID ,`VOID };
 
30: I = { `ZERO ,`R1 ,`VOID ,`VOID };
31: I = { `ZERO ,`R2 ,`VOID ,`VOID };
32: I = { `ZERO ,`R3 ,`VOID ,`VOID };
33: I = { `ZERO ,`R4 ,`VOID ,`VOID };
34: I = { `ZERO ,`R5 ,`VOID ,`VOID };
35: I = { `ZERO ,`R99 ,`VOID ,`VOID };
36: I = { `RETURN ,`RT_TRUE };
 
//----------------------------------------------
//TAG_ADRR_MAIN:
 
37: I = { `CALL ,`ENTRYPOINT_ADRR_BIU ,`VOID ,`VOID };
38: I = { `JEQX ,`LABEL_IS_NO_HIT ,`R99 ,`CREG_ZERO };
 
//LABEL_IS_HIT:
39: I = { `CALL ,`ENTRYPOINT_ADRR_TCC ,`VOID ,`VOID };
40: I = { `NOP ,`RT_FALSE };
41: I = { `RETURN ,`RT_TRUE };
42: I = { `NOP ,`RT_FALSE };
 
//LABEL_IS_NO_HIT:
43: I = { `RETURN ,`RT_FALSE };
 
 
//----------------------------------------------------------------------
//Micro code for CPPU
//TAG_CPPU_UCODE_ADDRESS:
 
 
44: I = { `SUB ,`R1 ,`CREG_PROJECTION_WINDOW_MAX ,`CREG_PROJECTION_WINDOW_MIN };
45: I = { `DIV ,`CREG_PROJECTION_WINDOW_SCALE ,`R1 ,`CREG_RESOLUTION };
46: I = { `RETURN ,`RT_FALSE };
 
//----------------------------------------------------------------------
//Micro code for RGU
//TAG_RGU_UCODE_ADDRESS:
 
 
47: I = { `MUL ,`R1 ,`CREG_PIXEL_2D_POSITION ,`CREG_PROJECTION_WINDOW_SCALE };
48: I = { `ADD ,`R1 ,`R1 ,`CREG_PROJECTION_WINDOW_MIN };
49: I = { `SUB ,`CREG_UNORMALIZED_DIRECTION ,`R1 ,`CREG_CAMERA_POSITION };
50: I = { `MAG ,`R2 ,`CREG_UNORMALIZED_DIRECTION ,`VOID };
51: I = { `DIV ,`CREG_RAY_DIRECTION ,`CREG_UNORMALIZED_DIRECTION ,`R2 };
52: I = { `DEC ,`CREG_LAST_COL ,`CREG_PIXEL_2D_FINAL_POSITION ,`VOID };
53: I = { `SETX ,`CREG_LAST_t ,32'h1F40000 };
54: I = { `RETURN ,`RT_FALSE };
//----------------------------------------------------------------------
//Next Pixel generation Code (NPG)
//TAG_NPG_UCODE_ADDRESS:
 
55: I = { `ZERO ,`CREG_TEXTURE_COLOR ,`VOID ,`VOID };
56: I = { `SETX ,`CREG_TEXTURE_COLOR ,32'h60000 };
57: I = { `ADD ,`CREG_CURRENT_OUTPUT_PIXEL ,`CREG_CURRENT_OUTPUT_PIXEL ,`CREG_3 };
 
58: I = { `ADD ,`CREG_PIXEL_PITCH ,`CREG_PIXEL_PITCH ,`CREG_3 };
59: I = { `COPY ,`OREG_ADDR_O ,`CREG_PIXEL_PITCH ,`VOID };
60: I = { `JGEX ,`LABEL_NPG_NEXT_ROW ,`CREG_PIXEL_2D_POSITION ,`CREG_LAST_COL };
61: I = { `INCX ,`CREG_PIXEL_2D_POSITION ,`CREG_PIXEL_2D_POSITION ,`VOID };
62: I = { `RETURN ,`RT_TRUE };
 
//LABEL_NPG_NEXT_ROW:
63: I = { `SETX ,`CREG_PIXEL_2D_POSITION ,32'h0 };
64: I = { `INCY ,`CREG_PIXEL_2D_POSITION ,`CREG_PIXEL_2D_POSITION ,`VOID };
65: I = { `JGEY ,`LABEL_ALLDONE ,`CREG_PIXEL_2D_POSITION ,`CREG_PIXEL_2D_FINAL_POSITION };
66: I = { `RETURN ,`RT_TRUE };
 
//LABEL_ALLDONE:
67: I = { `NOP ,`VOID ,`VOID };
68: I = { `RETURN ,`RT_FALSE };
 
//----------------------------------------------------------------------
//Micro code for AABBIU
//TAG_AABBIU_UCODE_ADDRESS:
69: I = { `ZERO ,`R3 ,`VOID ,`VOID };
70: I = { `SETX ,`CREG_LAST_t ,32'h1F40000 };
71: I = { `RETURN ,`RT_TRUE };
 
//LABEL_TEST_RAY_X_ORIGEN:
72: I = { `JGEX ,`LABEL_ELSE_IFX ,`CREG_CAMERA_POSITION ,`CREG_AABBMIN };
73: I = { `SUB ,`R1 ,`CREG_AABBMIN ,`CREG_CAMERA_POSITION };
74: I = { `JLEX ,`LABEL1 ,`R1 ,`CREG_UNORMALIZED_DIRECTION };
75: I = { `RETURN ,`RT_FALSE };
 
//LABEL1:
76: I = { `SETX ,`RAY_INSIDE_BOX ,32'd0 };
77: I = { `DIV ,`R6 ,`R1 ,`CREG_UNORMALIZED_DIRECTION };
78: I = { `JMP ,`LABEL_TEST_RAY_Y_ORIGEN ,`VOID ,`VOID };
 
//LABEL_ELSE_IFX:
79: I = { `JLEX ,`LABEL_ELSEX ,`CREG_CAMERA_POSITION ,`CREG_AABBMAX };
80: I = { `SUB ,`R1 ,`CREG_AABBMAX ,`CREG_CAMERA_POSITION };
81: I = { `JGEX ,`LABEL2 ,`R1 ,`CREG_UNORMALIZED_DIRECTION };
82: I = { `RETURN ,`RT_FALSE };
//LABEL2:
83: I = { `SETX ,`RAY_INSIDE_BOX ,32'd0 };
84: I = { `DIV ,`R6 ,`R1 ,`CREG_UNORMALIZED_DIRECTION };
85: I = { `JMP ,`LABEL_TEST_RAY_Y_ORIGEN ,`VOID ,`VOID };
//LABEL_ELSEX:
86: I = { `SETX ,`R5 ,32'b1 };
 
//LABEL_TEST_RAY_Y_ORIGEN:
87: I = { `JGEY ,`LABEL_ELESE_IFY ,`CREG_CAMERA_POSITION ,`CREG_AABBMIN };
88: I = { `SUB ,`R1 ,`CREG_AABBMIN ,`CREG_CAMERA_POSITION };
89: I = { `JLEY ,`LABEL3 ,`R1 ,`CREG_UNORMALIZED_DIRECTION };
90: I = { `RETURN ,`RT_FALSE };
 
//LABEL3:
91: I = { `SETX ,`RAY_INSIDE_BOX ,32'd0 };
92: I = { `DIV ,`R6 ,`R1 ,`CREG_UNORMALIZED_DIRECTION };
93: I = { `JMP ,`LABEL_TEST_RAY_Z_ORIGEN ,`VOID ,`VOID };
 
//LABEL_ELESE_IFY:
94: I = { `JLEY ,`LABEL_ELSEY ,`CREG_CAMERA_POSITION ,`CREG_AABBMAX };
95: I = { `SUB ,`R1 ,`CREG_AABBMAX ,`CREG_CAMERA_POSITION };
96: I = { `JGEY ,`LABEL4 ,`R1 ,`CREG_UNORMALIZED_DIRECTION };
97: I = { `RETURN ,`RT_FALSE };
 
//LABEL4:
98: I = { `SETX ,`RAY_INSIDE_BOX ,32'd0 };
99: I = { `DIV ,`R6 ,`R1 ,`CREG_UNORMALIZED_DIRECTION };
100: I = { `JMP ,`LABEL_TEST_RAY_Z_ORIGEN ,`VOID ,`VOID };
 
//LABEL_ELSEY:
101: I = { `SETY ,`R5 ,32'b1 };
 
//LABEL_TEST_RAY_Z_ORIGEN:
102: I = { `JGEZ ,`LABEL_ELESE_IFZ ,`CREG_CAMERA_POSITION ,`CREG_AABBMIN };
103: I = { `SUB ,`R1 ,`CREG_AABBMIN ,`CREG_CAMERA_POSITION };
104: I = { `JLEZ ,`LABEL5 ,`R1 ,`CREG_UNORMALIZED_DIRECTION };
105: I = { `RETURN ,`RT_FALSE };
 
//LABEL5:
106: I = { `SETX ,`RAY_INSIDE_BOX ,32'd0 };
107: I = { `DIV ,`R6 ,`R1 ,`CREG_UNORMALIZED_DIRECTION };
108: I = { `JMP ,`LABEL_RAY_INSIDE_BOX ,`VOID ,`VOID };
 
//LABEL_ELESE_IFZ:
109: I = { `JLEZ ,`LABEL_ELSEZ ,`CREG_CAMERA_POSITION ,`CREG_AABBMAX };
110: I = { `SUB ,`R1 ,`CREG_AABBMAX ,`CREG_CAMERA_POSITION };
111: I = { `JGEZ ,`LABEL6 ,`R1 ,`CREG_UNORMALIZED_DIRECTION };
112: I = { `RETURN ,`RT_FALSE };
 
//LABEL6:
113: I = { `SETX ,`RAY_INSIDE_BOX ,32'd0 };
114: I = { `DIV ,`R6 ,`R1 ,`CREG_UNORMALIZED_DIRECTION };
115: I = { `JMP ,`LABEL_RAY_INSIDE_BOX ,`VOID ,`VOID };
 
//LABEL_ELSEZ:
116: I = { `SETZ ,`R5 ,32'b1 };
 
//LABEL_RAY_INSIDE_BOX:
117: I = { `ZERO ,`R1 ,`VOID ,`VOID };
118: I = { `JEQX ,`LABEL_TEST_YZ_PLANE ,`R1 ,`RAY_INSIDE_BOX };
//BUG need a NOP here else pipeline gets confused
119: I = { `RETURN ,`RT_TRUE };
 
//LABEL_TEST_YZ_PLANE:
120: I = { `JNEX ,`LABEL_TEST_XZ_PLANE ,`R5 ,`R1 };
121: I = { `SWIZZLE3D ,`R6 ,`SWIZZLE_XXX };
122: I = { `MUL ,`R2 ,`CREG_UNORMALIZED_DIRECTION ,`R6 };
123: I = { `ADD ,`R2 ,`R2 ,`CREG_CAMERA_POSITION };
124: I = { `JGEY ,`LABEL7 ,`R2 ,`CREG_AABBMIN };
125: I = { `RETURN ,`RT_FALSE };
 
//LABEL7:
126: I = { `JLEY ,`LABEL8 ,`R2 ,`CREG_AABBMAX };
127: I = { `RETURN ,`RT_FALSE };
 
//LABEL8:
128: I = { `JGEZ ,`LABEL9 ,`R2 ,`CREG_AABBMIN };
129: I = { `RETURN ,`RT_FALSE };
 
//LABEL9:
130: I = { `JLEZ ,`LABEL_TEST_XZ_PLANE ,`R2 ,`CREG_AABBMAX };
131: I = { `RETURN ,`RT_FALSE };
 
//LABEL_TEST_XZ_PLANE:
132: I = { `JNEY ,`LABEL_TEST_XY_PLANE ,`R5 ,`R1 };
133: I = { `SWIZZLE3D ,`R6 ,`SWIZZLE_YYY };
134: I = { `MUL ,`R2 ,`CREG_UNORMALIZED_DIRECTION ,`R6 };
135: I = { `ADD ,`R2 ,`R2 ,`CREG_CAMERA_POSITION };
136: I = { `JGEX ,`LABEL10 ,`R2 ,`CREG_AABBMIN };
137: I = { `RETURN ,`RT_FALSE };
 
//LABEL10:
138: I = { `JLEX ,`LABEL11 ,`R2 ,`CREG_AABBMAX };
139: I = { `RETURN ,`RT_FALSE };
 
//LABEL11:
140: I = { `JGEZ ,`LABEL12 ,`R2 ,`CREG_AABBMIN };
141: I = { `RETURN ,`RT_FALSE };
 
//LABEL12:
142: I = { `JLEZ ,`LABEL_TEST_XY_PLANE ,`R2 ,`CREG_AABBMAX };
143: I = { `RETURN ,`RT_FALSE };
 
//LABEL_TEST_XY_PLANE:
144: I = { `SWIZZLE3D ,`R6 ,`SWIZZLE_ZZZ };
145: I = { `MUL ,`R2 ,`CREG_UNORMALIZED_DIRECTION ,`R6 };
146: I = { `ADD ,`R2 ,`R2 ,`CREG_CAMERA_POSITION };
147: I = { `JGEX ,`LABEL13 ,`R2 ,`CREG_AABBMIN };
148: I = { `RETURN ,`RT_FALSE };
 
//LABEL13:
149: I = { `JLEX ,`LABEL14 ,`R2 ,`CREG_AABBMAX };
150: I = { `RETURN ,`RT_FALSE };
 
//LABEL14:
151: I = { `JGEY ,`LABEL15 ,`R2 ,`CREG_AABBMIN };
152: I = { `RETURN ,`RT_FALSE };
 
//LABEL15:
153: I = { `JLEY ,`LABEL_HIT ,`R2 ,`CREG_AABBMAX };
154: I = { `RETURN ,`RT_FALSE };
 
//LABEL_HIT:
155: I = { `SETX ,`CREG_LAST_t ,32'h1F40000 };
156: I = { `RETURN ,`RT_TRUE };
 
//------------------------------------------------------------------------
//BIU Micro code
//TAG_BIU_UCODE_ADDRESS:
157: I = { `ZERO ,`OREG_PIXEL_COLOR ,`VOID ,`VOID };
158: I = { `SETX ,`R3 ,`ONE };
159: I = { `SETX ,`R1 ,32'h00000 };
160: I = { `SUB ,`CREG_E1 ,`CREG_V1 ,`CREG_V0 };
161: I = { `SUB ,`CREG_E2 ,`CREG_V2 ,`CREG_V0 };
162: I = { `SUB ,`CREG_T ,`CREG_CAMERA_POSITION ,`CREG_V0 };
163: I = { `CROSS ,`CREG_P ,`CREG_RAY_DIRECTION ,`CREG_E2 };
164: I = { `CROSS ,`CREG_Q ,`CREG_T ,`CREG_E1 };
165: I = { `DOT ,`CREG_H1 ,`CREG_Q ,`CREG_E2 };
166: I = { `DOT ,`CREG_H2 ,`CREG_P ,`CREG_T };
167: I = { `DOT ,`CREG_H3 ,`CREG_Q ,`CREG_RAY_DIRECTION };
168: I = { `DOT ,`CREG_DELTA ,`CREG_P ,`CREG_E1 };
169: I = { `DIV ,`CREG_t ,`CREG_H1 ,`CREG_DELTA };
170: I = { `DIV ,`CREG_u ,`CREG_H2 ,`CREG_DELTA };
171: I = { `DIV ,`CREG_v ,`CREG_H3 ,`CREG_DELTA };
172: I = { `JGEX ,`LABEL_BIU1 ,`CREG_u ,`R1 };
173: I = { `RET ,`R99 ,`FALSE };
 
//LABEL_BIU1:
174: I = { `JGEX ,`LABEL_BIU2 ,`CREG_v ,`R1 };
175: I = { `RET ,`R99 ,`FALSE };
 
//LABEL_BIU2:
176: I = { `ADD ,`R2 ,`CREG_u ,`CREG_v };
177: I = { `JLEX ,`LABEL_BIU3 ,`R2 ,`R3 };
178: I = { `RET ,`R99 ,`FALSE };
 
//LABEL_BIU3:
179: I = { `JGEX ,`LABEL_BIU4 ,`CREG_t ,`CREG_LAST_t };
180: I = { `COPY ,`CREG_LAST_t ,`CREG_t ,`VOID };
181: I = { `COPY ,`CREG_LAST_u ,`CREG_u ,`VOID };
182: I = { `COPY ,`CREG_LAST_v ,`CREG_v ,`VOID };
183: I = { `COPY ,`CREG_E1_LAST ,`CREG_E1 ,`VOID };
184: I = { `COPY ,`CREG_E2_LAST ,`CREG_E2 ,`VOID };
185: I = { `COPY ,`CREG_UV0_LAST ,`CREG_UV0 ,`VOID };
186: I = { `COPY ,`CREG_UV1_LAST ,`CREG_UV1 ,`VOID };
187: I = { `COPY ,`CREG_UV2_LAST ,`CREG_UV2 ,`VOID };
188: I = { `COPY ,`CREG_TRI_DIFFUSE_LAST ,`CREG_TRI_DIFFUSE ,`VOID };
//LABEL_BIU4:
189: I = { `RET ,`R99 ,`TRUE };
 
 
//-------------------------------------------------------------------------
//Calculate the adress of the texure coordiantes.
 
//TAG_TCC_UCODE_ADDRESS:
//Do this calculation only if this triangle is the one closest to the camera
190: I = { `JGX ,`LABEL_TCC_EXIT ,`CREG_t ,`CREG_LAST_t };
 
//First get the UV coodrinates and store in R1
//R1x: u_coordinate = U0 + last_u * (U1 - U0) + last_v * (U2 - U0)
//R1y: v_coordinate = V0 + last_u * (V1 - V0) + last_v * (V2 - V0)
//R1z: 0
 
191: I = { `SUB ,`R1 ,`CREG_UV1_LAST ,`CREG_UV0_LAST };
192: I = { `SUB ,`R2 ,`CREG_UV2_LAST ,`CREG_UV0_LAST };
193: I = { `MUL ,`R1 ,`CREG_LAST_u ,`R1 };
194: I = { `MUL ,`R2 ,`CREG_LAST_v ,`R2 };
195: I = { `ADD ,`R1 ,`R1 ,`R2 };
196: I = { `ADD ,`R1 ,`R1 ,`CREG_UV0_LAST };
 
//R7x : fu = (u_coordinate) * gTexture.mWidth
//R7y : fv = (v_coordinate) * gTexture.mWidth
//R7z : 0
197: I = { `MUL ,`R7 ,`R1 ,`CREG_TEXTURE_SIZE };
 
//R1x: u1 = ((int)fu) % gTexture.mWidth
//R1y: v1 = ((int)fv) % gTexture.mHeight
//R1z: 0
//R2x: u2 = (u1 + 1 ) % gTexture.mWidth
//R2y: v2 = (v2 + 1 ) % gTexture.mHeight
//R2z: 0
// Notice MOD2 only operates over
// numbers that are power of 2 also notice that the
// textures are assumed to be squares!
//x % 2^n == x & (2^n - 1).
 
198: I = { `MOD ,`R1 ,`R7 ,`CREG_TEXTURE_SIZE };
199: I = { `INC ,`R2 ,`R1 ,`VOID };
200: I = { `MOD ,`R2 ,`R2 ,`CREG_TEXTURE_SIZE };
 
//Cool now we should store the values in the appropiate registers
//OREG_TEX_COORD1.x = u1 + v1 * gTexture.mWidth
//OREG_TEX_COORD1.y = u2 + v1 * gTexture.mWidth
//OREG_TEX_COORD1.z = 0
//OREG_TEX_COORD2.x = u1 + v2 * gTexture.mWidth
//OREG_TEX_COORD2.y = u2 + v2 * gTexture.mWidth
//OREG_TEX_COORD1.z = 0
 
//R1= [u1 v1 0]
//R2= [u2 v2 0]
 
//R2 = [v2 u2 0]
201: I = { `SWIZZLE3D ,`R2 ,`SWIZZLE_YXZ };
 
//R3 = [v2 v1 0]
202: I = { `XCHANGEX ,`R3 ,`R1 ,`R2 };
 
 
//R4 = [u1 u2 0]
203: I = { `XCHANGEX ,`R4 ,`R2 ,`R1 };
 
//R2 = [v2*H v1*H 0]
204: I = { `UNSCALE ,`R9 ,`R3 ,`VOID };
205: I = { `UNSCALE ,`R8 ,`CREG_TEXTURE_SIZE ,`VOID };
206: I = { `IMUL ,`R2 ,`R9 ,`R8 };
 
//OREG_TEX_COORD1 = [u1 + v2*H u2 + v1*H 0]
//R4 = FixedToIinteger(R4)
207: I = { `UNSCALE ,`R4 ,`R4 ,`VOID };
208: I = { `ADD ,`R12 ,`R2 ,`R4 };
209: I = { `SETX ,`R5 ,32'h3 };
210: I = { `SETY ,`R5 ,32'h3 };
211: I = { `SETZ ,`R5 ,32'h3 };
//Multiply by 3 (the pitch)
//IMUL OREG_TEX_COORD1 R12 R5
212: I = { `IMUL ,`CREG_TEX_COORD1 ,`R12 ,`R5 };
 
//R4 = [u2 u1 0]
213: I = { `SWIZZLE3D ,`R4 ,`SWIZZLE_YXZ };
 
 
//OREG_TEX_COORD2 [u2 + v2*H u1 + v1*H 0]
214: I = { `ADD ,`R12 ,`R2 ,`R4 };
//Multiply by 3 (the pitch)
//IMUL OREG_TEX_COORD2 R12 R5
215: I = { `IMUL ,`CREG_TEX_COORD2 ,`R12 ,`R5 };
 
 
//Cool now get the weights
 
//w1 = (1 - fracu) * (1 - fracv)
//w2 = fracu * (1 - fracv)
//w3 = (1 - fracu) * fracv
//w4 = fracu * fracv
 
//R4x: fracu
//R4y: fracv
//R4z: 0
216: I = { `FRAC ,`R4 ,`R7 ,`VOID };
 
//R5x: fracv
//R5y: fracu
//R5z: 0
217: I = { `COPY ,`R5 ,`R4 ,`VOID };
218: I = { `SWIZZLE3D ,`R5 ,`SWIZZLE_YXZ };
 
 
//R5x: 1 - fracv
//R5y: 1 - fracu
//R5y: 1
219: I = { `NEG ,`R5 ,`R5 ,`VOID };
220: I = { `INC ,`R5 ,`R5 ,`VOID };
 
//R5x: 1 - fracv
//R5y: 1 - fracu
//R5y: (1 - fracv)(1 - fracu)
221: I = { `MULP ,`CREG_TEXWEIGHT1 ,`R5 ,`VOID };
 
//CREG_TEXWEIGHT1.x = (1 - fracv)(1 - fracu)
//CREG_TEXWEIGHT1.y = (1 - fracv)(1 - fracu)
//CREG_TEXWEIGHT1.z = (1 - fracv)(1 - fracu)
222: I = { `SWIZZLE3D ,`CREG_TEXWEIGHT1 ,`SWIZZLE_ZZZ };
 
 
//R6x: w2: fracu * (1 - fracv )
//R6y: w3: fracv * (1 - fracu )
//R6z: 0
223: I = { `MUL ,`R6 ,`R4 ,`R5 };
 
//CREG_TEXWEIGHT2.x = fracu * (1 - fracv )
//CREG_TEXWEIGHT2.y = fracu * (1 - fracv )
//CREG_TEXWEIGHT2.z = fracu * (1 - fracv )
224: I = { `COPY ,`CREG_TEXWEIGHT2 ,`R6 ,`VOID };
225: I = { `SWIZZLE3D ,`CREG_TEXWEIGHT2 ,`SWIZZLE_XXX };
 
//CREG_TEXWEIGHT3.x = fracv * (1 - fracu )
//CREG_TEXWEIGHT3.y = fracv * (1 - fracu )
//CREG_TEXWEIGHT3.z = fracv * (1 - fracu )
226: I = { `COPY ,`CREG_TEXWEIGHT3 ,`R6 ,`VOID };
227: I = { `SWIZZLE3D ,`CREG_TEXWEIGHT3 ,`SWIZZLE_YYY };
 
 
//R4x: fracu
//R4y: fracv
//R4z: fracu * fracv
228: I = { `MULP ,`R4 ,`R4 ,`VOID };
 
//CREG_TEXWEIGHT4.x = fracv * fracu
//CREG_TEXWEIGHT4.y = fracv * fracu
//CREG_TEXWEIGHT4.z = fracv * fracu
229: I = { `COPY ,`CREG_TEXWEIGHT4 ,`R4 ,`VOID };
230: I = { `SWIZZLE3D ,`CREG_TEXWEIGHT4 ,`SWIZZLE_ZZZ };
 
 
//LABEL_TCC_EXIT:
231: I = { `RET ,`R99 ,32'h0 };
 
 
//-------------------------------------------------------------------------
//TAG_PSU_UCODE_ADRESS:
//Pixel Shader #1
//This pixel shader has diffuse light but no textures
 
232: I = { `CROSS ,`R1 ,`CREG_E1_LAST ,`CREG_E2_LAST };
233: I = { `MAG ,`R2 ,`R1 ,`VOID };
234: I = { `DIV ,`R1 ,`R1 ,`R2 };
235: I = { `MUL ,`R2 ,`CREG_RAY_DIRECTION ,`CREG_LAST_t };
236: I = { `ADD ,`R2 ,`R2 ,`CREG_CAMERA_POSITION };
237: I = { `SUB ,`R2 ,`CURRENT_LIGHT_POS ,`R2 };
238: I = { `MAG ,`R3 ,`R2 ,`VOID };
239: I = { `DIV ,`R2 ,`R2 ,`R3 };
240: I = { `DOT ,`R3 ,`R2 ,`R1 };
241: I = { `MUL ,`CREG_COLOR_ACC ,`CREG_TRI_DIFFUSE_LAST ,`CURRENT_LIGHT_DIFFUSE };
242: I = { `MUL ,`CREG_COLOR_ACC ,`CREG_COLOR_ACC ,`R3 };
243: I = { `COPY ,`CREG_TEXTURE_COLOR ,`CREG_COLOR_ACC ,`VOID };
244: I = { `NOP ,`RT_FALSE };
245: I = { `NOP ,`RT_FALSE };
246: I = { `NOP ,`RT_FALSE };
247: I = { `RETURN ,`RT_TRUE };
 
//-------------------------------------------------------------------------
//Pixel Shader #2
//TAG_PSU_UCODE_ADRESS2:
//This Pixel Shader has no light but it does texturinng
//with bi-linear interpolation
 
 
 
248: I = { `COPY ,`R1 ,`CREG_TEX_COORD1 ,`VOID };
249: I = { `COPY ,`R2 ,`CREG_TEX_COORD1 ,`VOID };
250: I = { `COPY ,`R3 ,`CREG_TEX_COORD2 ,`VOID };
251: I = { `COPY ,`R4 ,`CREG_TEX_COORD2 ,`VOID };
 
 
252: I = { `SWIZZLE3D ,`R1 ,`SWIZZLE_XXX };
253: I = { `SWIZZLE3D ,`R2 ,`SWIZZLE_YYY };
254: I = { `SWIZZLE3D ,`R3 ,`SWIZZLE_XXX };
255: I = { `SWIZZLE3D ,`R4 ,`SWIZZLE_YYY };
256: I = { `ADD ,`R1 ,`R1 ,`CREG_012 };
257: I = { `ADD ,`R2 ,`R2 ,`CREG_012 };
258: I = { `ADD ,`R3 ,`R3 ,`CREG_012 };
259: I = { `ADD ,`R4 ,`R4 ,`CREG_012 };
 
 
260: I = { `TMREAD ,`CREG_TEX_COLOR1 ,`R1 ,`VOID };
261: I = { `NOP ,`RT_FALSE };
262: I = { `TMREAD ,`CREG_TEX_COLOR2 ,`R2 ,`VOID };
263: I = { `NOP ,`RT_FALSE };
264: I = { `TMREAD ,`CREG_TEX_COLOR3 ,`R3 ,`VOID };
265: I = { `NOP ,`RT_FALSE };
266: I = { `TMREAD ,`CREG_TEX_COLOR4 ,`R4 ,`VOID };
267: I = { `NOP ,`RT_FALSE };
 
 
 
 
//TextureColor.R = c1.R * w1 + c2.R * w2 + c3.R * w3 + c4.R * w4
//TextureColor.G = c1.G * w1 + c2.G * w2 + c3.G * w3 + c4.G * w4
//TextureColor.B = c1.B * w1 + c2.B * w2 + c3.B * w3 + c4.B * w4
 
 
//MUL R1 CREG_TEX_COLOR4 CREG_TEXWEIGHT1
//MUL R2 CREG_TEX_COLOR2 CREG_TEXWEIGHT2
//MUL R3 CREG_TEX_COLOR1 CREG_TEXWEIGHT3
//MUL R4 CREG_TEX_COLOR3 CREG_TEXWEIGHT4
 
268: I = { `MUL ,`R1 ,`CREG_TEX_COLOR3 ,`CREG_TEXWEIGHT1 };
269: I = { `MUL ,`R2 ,`CREG_TEX_COLOR2 ,`CREG_TEXWEIGHT2 };
270: I = { `MUL ,`R3 ,`CREG_TEX_COLOR1 ,`CREG_TEXWEIGHT3 };
271: I = { `MUL ,`R4 ,`CREG_TEX_COLOR4 ,`CREG_TEXWEIGHT4 };
 
272: I = { `ADD ,`CREG_TEXTURE_COLOR ,`R1 ,`R2 };
273: I = { `ADD ,`CREG_TEXTURE_COLOR ,`CREG_TEXTURE_COLOR ,`R3 };
274: I = { `ADD ,`CREG_TEXTURE_COLOR ,`CREG_TEXTURE_COLOR ,`R4 };
275: I = { `RETURN ,`RT_TRUE };
 
 
//-------------------------------------------------------------------------
//Default User constants
//TAG_USERCONSTANTS:
 
276: I = { `NOP ,`RT_FALSE };
277: I = { `RETURN ,`RT_TRUE };
 
//TAG_PIXELSHADER:
//Default Pixel Shader (just outputs texture)
278: I = { `OMWRITE ,`OREG_PIXEL_COLOR ,`CREG_CURRENT_OUTPUT_PIXEL ,`CREG_TEXTURE_COLOR };
279: I = { `RETURN ,`RT_TRUE };
 
 
//-------------------------------------------------------------------------
 
default:
begin
`ifdef DEBUG
$display("%dns CORE %d Error: Reached undefined address in instruction Memory: %d!!!!",$time,iDebug_CoreID,Address);
// $stop();
`endif
I = {`INSTRUCTION_OP_LENGTH'hFF,16'hFFFF,32'hFFFFFFFF};
end
endcase
end
endmodule
//--------------------------------------------------------
/MEM/Unit_MEM.v
0,0 → 1,328
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
/*
The memory unit has all the memory related modules for THEIA.
There a 3 memories in the core:
DMEM: The data memory, it is a R/W dual channel RAM, stores the data locations.
IMEM: The instruction memory, R/W dual channel RAM, stores user shaders.
IROM: RO instruction memory, stores default shaders and other internal code.
I use two ROMs with the same data, so that simulates dual channel.
This unit also has a Control register.
*/
`define USER_CODE_ENABLED 2
//-------------------------------------------------------------------
module MemoryUnit
(
input wire Clock,
input wire Reset,
input wire iFlipMemory,
 
//Data bus for EXE Unit
input wire iDataWriteEnable_EXE,
input wire[`DATA_ADDRESS_WIDTH-1:0] iDataReadAddress1_EXE,
output wire[`DATA_ROW_WIDTH-1:0] oData1_EXE,
input wire[`DATA_ADDRESS_WIDTH-1:0] iDataReadAddress2_EXE,
output wire[`DATA_ROW_WIDTH-1:0] oData2_EXE,
input wire[`DATA_ADDRESS_WIDTH-1:0] iDataWriteAddress_EXE,
input wire[`DATA_ROW_WIDTH-1:0] iData_EXE,
 
//Data bus for IO Unit
input wire iDataWriteEnable_IO,
input wire[`DATA_ADDRESS_WIDTH-1:0] iDataReadAddress1_IO,
output wire[`DATA_ROW_WIDTH-1:0] oData1_IO,
input wire[`DATA_ADDRESS_WIDTH-1:0] iDataReadAddress2_IO,
output wire[`DATA_ROW_WIDTH-1:0] oData2_IO,
input wire[`DATA_ADDRESS_WIDTH-1:0] iDataWriteAddress_IO,
input wire[`DATA_ROW_WIDTH-1:0] iData_IO,
 
//Instruction bus
input wire iInstructionWriteEnable,
input wire [`ROM_ADDRESS_WIDTH-1:0] iInstructionReadAddress1,
input wire [`ROM_ADDRESS_WIDTH-1:0] iInstructionReadAddress2,
input wire [`ROM_ADDRESS_WIDTH-1:0] iInstructionWriteAddress,
input wire [`INSTRUCTION_WIDTH-1:0] iInstruction,
output wire [`INSTRUCTION_WIDTH-1:0] oInstruction1,
output wire [`INSTRUCTION_WIDTH-1:0] oInstruction2,
 
`ifdef DEBUG
input wire [`MAX_CORES-1:0] iDebug_CoreID,
`endif
 
 
//Control Register
input wire[15:0] iControlRegister,
output wire[15:0] oControlRegister
 
 
);
 
wire [`ROM_ADDRESS_WIDTH-1:0] wROMInstructionAddress,wRAMInstructionAddress;
wire [`INSTRUCTION_WIDTH-1:0] wIMEM2_IMUX__DataOut1,wIMEM2_IMUX__DataOut2,
wIROM2_IMUX__DataOut1,wIROM2_IMUX__DataOut2;
 
 
wire wInstructionSelector,wInstructionSelector2;
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD1
(
.Clock(Clock),
.Reset(Reset),
.Enable( 1'b1 ),
.D( iInstructionReadAddress1[`ROM_ADDRESS_WIDTH-1] ),
.Q( wInstructionSelector )
);
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD2
(
.Clock(Clock),
.Reset(Reset),
.Enable( 1'b1 ),
.D( iInstructionReadAddress2[`ROM_ADDRESS_WIDTH-1] ),
.Q( wInstructionSelector2 )
);
 
assign oInstruction1 = (wInstructionSelector == 1) ?
wIMEM2_IMUX__DataOut1 : wIROM2_IMUX__DataOut1;
 
 
assign oInstruction2 = (wInstructionSelector2 == 1) ?
wIMEM2_IMUX__DataOut2 : wIROM2_IMUX__DataOut2;
//-------------------------------------------------------------------
/*
Data memory.
*/
`define SMEM_START_ADDR `DATA_ADDRESS_WIDTH'd32
`define RMEM_START_ADDR `DATA_ADDRESS_WIDTH'd64
`define OMEM_START_ADDR `DATA_ADDRESS_WIDTH'd128
 
wire wDataWriteEnable_RMEM,wDataWriteEnable_SMEM,wDataWriteEnable_IMEM,wDataWriteEnable_OMEM;
wire [`DATA_ADDRESS_WIDTH-1:0] wDataWriteAddress_RMEM,wDataWriteAddress_SMEM;
wire [`DATA_ADDRESS_WIDTH-1:0] wDataReadAddress_RMEM1,wDataReadAddress_RMEM2;
wire [`DATA_ADDRESS_WIDTH-1:0] wDataReadAddress_SMEM1,wDataReadAddress_SMEM2;
wire [`DATA_ROW_WIDTH-1:0] wData_SMEM1,wData_SMEM2,wData_RMEM1,wData_RMEM2,wData_IMEM1,wData_IMEM2;
wire [`DATA_ROW_WIDTH-1:0] wIOData_SMEM1,wIOData_SMEM2,wData_OMEM1,wData_OMEM2;
/*
always @ (posedge Clock)
begin
if (wDataWriteEnable_OMEM)
$display("%dns OMEM Writting %h to Addr %d (%h)",
$time,iData_EXE,iDataWriteAddress_EXE,iDataWriteAddress_EXE);
//if (iDataReadAddress1_IO >= 130)
//$display("%dns OMEM Readin %h from %d (%h)",
//$time,wData_OMEM1,iDataReadAddress1_IO,iDataReadAddress1_IO);
end
*/
assign wDataWriteEnable_OMEM =
(iDataWriteAddress_EXE >= `OMEM_START_ADDR )
? iDataWriteEnable_EXE : 1'b0;
 
assign wDataWriteEnable_IMEM =
(iDataWriteAddress_IO < `SMEM_START_ADDR )
? iDataWriteEnable_IO : 1'b0;
 
assign wDataWriteEnable_SMEM =
(iDataWriteAddress_EXE >= `SMEM_START_ADDR && iDataWriteAddress_EXE < `RMEM_START_ADDR)
? iDataWriteEnable_EXE : 1'b0;
 
 
assign wDataWriteEnable_RMEM =
(iDataWriteAddress_EXE >= `RMEM_START_ADDR && iDataWriteAddress_EXE < `OMEM_START_ADDR)
? iDataWriteEnable_EXE : 1'b0;
 
 
assign wDataWriteAddress_RMEM = iDataWriteAddress_EXE;
assign wDataReadAddress_RMEM1 = iDataReadAddress1_EXE;
assign wDataReadAddress_RMEM2 = iDataReadAddress2_EXE;
assign wDataWriteAddress_SMEM = iDataWriteAddress_EXE;
assign wDataReadAddress_SMEM1 = iDataReadAddress1_EXE;
assign wDataReadAddress_SMEM2 = iDataReadAddress2_EXE;
 
//assign oData1_EXE = ( iDataReadAddress1_EXE < `RMEM_START_ADDR ) ? wData_SMEM1 : wData_RMEM1;
assign oData1_EXE = ( iDataReadAddress1_EXE < `RMEM_START_ADDR ) ?
( ( iDataReadAddress1_EXE < `SMEM_START_ADDR ) ? wData_IMEM1 : wData_SMEM1 )
: wData_RMEM1;
 
//assign oData2_EXE = ( iDataReadAddress2_EXE < `RMEM_START_ADDR ) ? wData_SMEM2 : wData_RMEM2;
assign oData2_EXE = ( iDataReadAddress2_EXE < `RMEM_START_ADDR ) ?
( ( iDataReadAddress2_EXE < `SMEM_START_ADDR ) ? wData_IMEM2 : wData_SMEM2 )
: wData_RMEM2;
 
 
assign oData1_IO = ( iDataReadAddress1_IO < `OMEM_START_ADDR ) ? wIOData_SMEM1 : wData_OMEM1;
assign oData2_IO = ( iDataReadAddress2_IO < `OMEM_START_ADDR ) ? wIOData_SMEM2 : wData_OMEM2;
 
 
//Output registers written by EXE, Read by IO
RAM_DUAL_READ_PORT # (`DATA_ROW_WIDTH,`DATA_ADDRESS_WIDTH,512) OMEM
(
.Clock( Clock ),
.iWriteEnable( wDataWriteEnable_OMEM ),
.iReadAddress0( iDataReadAddress1_IO ),
.iReadAddress1( iDataReadAddress2_IO ),
.iWriteAddress( iDataWriteAddress_EXE ),
.iDataIn( iData_EXE ),
.oDataOut0( wData_OMEM1 ),
.oDataOut1( wData_OMEM2 )
);
 
//Input Registers, Written by IO, Read by EXE
RAM_DUAL_READ_PORT # (`DATA_ROW_WIDTH,`DATA_ADDRESS_WIDTH,42) IMEM
(
.Clock( Clock ),
.iWriteEnable( wDataWriteEnable_IMEM ),
.iReadAddress0( iDataReadAddress1_EXE ),
.iReadAddress1( iDataReadAddress2_EXE ),
.iWriteAddress( iDataWriteAddress_IO ),
.iDataIn( iData_IO ),
.oDataOut0( wData_IMEM1 ),
.oDataOut1( wData_IMEM2 )
);
 
//Swap registers, while IO reads/write values, EXE reads/write values
//the pointers get filped in the next iteration
SWAP_MEM # (`DATA_ROW_WIDTH,`DATA_ADDRESS_WIDTH,512) SMEM
(
.Clock( Clock ),
.iSelect( wFlipSelect ),
.iWriteEnableA( wDataWriteEnable_SMEM ),
.iReadAddressA0( wDataReadAddress_SMEM1 ),
.iReadAddressA1( wDataReadAddress_SMEM2 ),
.iWriteAddressA( wDataWriteAddress_SMEM ),
.iDataInA( iData_EXE ),
.oDataOutA0( wData_SMEM1 ),
.oDataOutA1( wData_SMEM2 ),
.iWriteEnableB( iDataWriteEnable_IO ),
.iReadAddressB0( iDataReadAddress1_IO ),
.iReadAddressB1( iDataReadAddress2_IO ),
.iWriteAddressB( iDataWriteAddress_IO ),
.iDataInB( iData_IO ),
.oDataOutB0( wIOData_SMEM1 ),
.oDataOutB1( wIOData_SMEM2 )
);
 
//General purpose registers, EXE can R/W, IO can not see these sections
//of the memory
RAM_DUAL_READ_PORT # (`DATA_ROW_WIDTH,`DATA_ADDRESS_WIDTH,256) RMEM
(
.Clock( Clock ),
.iWriteEnable( wDataWriteEnable_RMEM ),
.iReadAddress0( wDataReadAddress_RMEM1 ),
.iReadAddress1( wDataReadAddress_RMEM2 ),
.iWriteAddress( wDataWriteAddress_RMEM ),
.iDataIn( iData_EXE ),
.oDataOut0( wData_RMEM1 ),
.oDataOut1( wData_RMEM2 )
);
 
wire wFlipSelect;
UPCOUNTER_POSEDGE # (1) UPC1
(
.Clock(Clock),
.Reset( Reset ),
.Initial(1'b0),
.Enable(iFlipMemory),
.Q(wFlipSelect)
);
 
 
 
//-------------------------------------------------------------------
/*
Instruction memory.
*/
RAM_DUAL_READ_PORT # (`INSTRUCTION_WIDTH,`ROM_ADDRESS_WIDTH,512) INST_MEM
(
.Clock( Clock ),
.iWriteEnable( iInstructionWriteEnable ),
.iReadAddress0( {1'b0,iInstructionReadAddress1[`ROM_ADDRESS_WIDTH-2:0]} ),
.iReadAddress1( {1'b0,iInstructionReadAddress2[`ROM_ADDRESS_WIDTH-2:0]} ),
.iWriteAddress( iInstructionWriteAddress ),
.iDataIn( iInstruction ),
.oDataOut0( wIMEM2_IMUX__DataOut1 ),
.oDataOut1( wIMEM2_IMUX__DataOut2 )
);
//-------------------------------------------------------------------
/*
Default code stored in ROM.
*/
wire [`INSTRUCTION_WIDTH-1:0] wRomDelay1,wRomDelay2;
//In real world ROM will take at least 1 clock cycle,
//since ROMs are not syhtethizable, I won't hurt to put
//this delay
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `INSTRUCTION_WIDTH ) FFDA
(
.Clock(Clock),
.Reset(Reset),
.Enable(1'b1),
.D(wRomDelay1),
.Q(wIROM2_IMUX__DataOut1 )
);
 
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `INSTRUCTION_WIDTH ) FFDB
(
.Clock(Clock),
.Reset(Reset),
.Enable(1'b1),
.D(wRomDelay2),
.Q(wIROM2_IMUX__DataOut2 )
);
 
//The reason I put two ROMs is because I need to read 2 different Instruction
//addresses at the same time (branch-taken and branch-not-taken) and not sure
//hpw to write dual read channel ROM this way...
 
ROM IROM
(
.Address( {1'b0,iInstructionReadAddress1[`ROM_ADDRESS_WIDTH-2:0]} ),
`ifdef DEBUG
.iDebug_CoreID(iDebug_CoreID),
`endif
.I( wRomDelay1 )
);
 
ROM IROM2
(
.Address( {1'b0,iInstructionReadAddress2[`ROM_ADDRESS_WIDTH-2:0]} ),
`ifdef DEBUG
.iDebug_CoreID(iDebug_CoreID),
`endif
.I( wRomDelay2 )
);
//--------------------------------------------------------
ControlRegister CR
(
.Clock( Clock ),
.Reset( Reset ),
.iControlRegister( iControlRegister ),
.oControlRegister( oControlRegister )
);
 
 
endmodule
//-------------------------------------------------------------------
/MEM/Module_RAM.v
0,0 → 1,80
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
//--------------------------------------------------------
//Dual port RAM.
 
 
module RAM_DUAL_READ_PORT # ( parameter DATA_WIDTH=`DATA_ROW_WIDTH, parameter ADDR_WIDTH=`DATA_ADDRESS_WIDTH, parameter MEM_SIZE=128 )
(
input wire Clock,
input wire iWriteEnable,
input wire[ADDR_WIDTH-1:0] iReadAddress0,
input wire[ADDR_WIDTH-1:0] iReadAddress1,
input wire[ADDR_WIDTH-1:0] iWriteAddress,
input wire[DATA_WIDTH-1:0] iDataIn,
output reg [DATA_WIDTH-1:0] oDataOut0,
output reg [DATA_WIDTH-1:0] oDataOut1
);
 
reg [DATA_WIDTH-1:0] Ram [MEM_SIZE:0];
 
always @(posedge Clock)
begin
if (iWriteEnable)
Ram[iWriteAddress] <= iDataIn;
oDataOut0 <= Ram[iReadAddress0];
oDataOut1 <= Ram[iReadAddress1];
end
endmodule
//--------------------------------------------------------
 
module RAM_SINGLE_READ_PORT # ( parameter DATA_WIDTH=`DATA_ROW_WIDTH, parameter ADDR_WIDTH=`DATA_ADDRESS_WIDTH, parameter MEM_SIZE=128 )
(
input wire Clock,
input wire iWriteEnable,
input wire[ADDR_WIDTH-1:0] iReadAddress0,
input wire[ADDR_WIDTH-1:0] iWriteAddress,
input wire[DATA_WIDTH-1:0] iDataIn,
output reg [DATA_WIDTH-1:0] oDataOut0
);
 
reg [DATA_WIDTH-1:0] Ram [MEM_SIZE:0];
 
always @(posedge Clock)
begin
if (iWriteEnable)
Ram[iWriteAddress] <= iDataIn;
oDataOut0 <= Ram[iReadAddress0];
end
endmodule
 
 
/MEM/Module_SwapMemory.v
0,0 → 1,90
`timescale 1ns / 1ps
`include "aDefinitions.v"
 
module SWAP_MEM # ( parameter DATA_WIDTH=`DATA_ROW_WIDTH, parameter ADDR_WIDTH=`DATA_ADDRESS_WIDTH, parameter MEM_SIZE=128 )
(
input wire Clock,
input wire iSelect,
input wire iWriteEnableA,
input wire[ADDR_WIDTH-1:0] iReadAddressA0,
input wire[ADDR_WIDTH-1:0] iReadAddressA1,
input wire[ADDR_WIDTH-1:0] iWriteAddressA,
input wire[DATA_WIDTH-1:0] iDataInA,
output wire [DATA_WIDTH-1:0] oDataOutA0,
output wire [DATA_WIDTH-1:0] oDataOutA1,
input wire iWriteEnableB,
input wire[ADDR_WIDTH-1:0] iReadAddressB0,
input wire[ADDR_WIDTH-1:0] iReadAddressB1,
input wire[ADDR_WIDTH-1:0] iWriteAddressB,
input wire[DATA_WIDTH-1:0] iDataInB,
output wire [DATA_WIDTH-1:0] oDataOutB0,
output wire [DATA_WIDTH-1:0] oDataOutB1
);
 
 
wire wWriteEnableA;
wire[ADDR_WIDTH-1:0] wReadAddressA0;
wire[ADDR_WIDTH-1:0] wReadAddressA1;
wire[ADDR_WIDTH-1:0] wWriteAddressA;
wire[DATA_WIDTH-1:0] wDataInA;
wire [DATA_WIDTH-1:0] wDataOutA0;
wire [DATA_WIDTH-1:0] wDataOutA1;
 
wire wWriteEnableB;
wire[ADDR_WIDTH-1:0] wReadAddressB0;
wire[ADDR_WIDTH-1:0] wReadAddressB1;
wire[ADDR_WIDTH-1:0] wWriteAddressB;
wire[DATA_WIDTH-1:0] wDataInB;
wire [DATA_WIDTH-1:0] wDataOutB0;
wire [DATA_WIDTH-1:0] wDataOutB1;
 
 
assign wWriteEnableA = ( iSelect ) ? iWriteEnableA : iWriteEnableB;
assign wWriteEnableB = ( ~iSelect ) ? iWriteEnableA : iWriteEnableB;
 
assign wReadAddressA0 = ( iSelect ) ? iReadAddressA0 : iReadAddressB0;
assign wReadAddressB0 = ( ~iSelect ) ? iReadAddressA0 : iReadAddressB0;
 
assign wReadAddressA1 = ( iSelect ) ? iReadAddressA1 : iReadAddressB1;
assign wReadAddressB1 = ( ~iSelect ) ? iReadAddressA1 : iReadAddressB1;
 
assign wWriteAddressA = ( iSelect ) ? iWriteAddressA : iWriteAddressB;
assign wWriteAddressB = ( ~iSelect ) ? iWriteAddressA : iWriteAddressB;
 
assign wDataInA = ( iSelect ) ? iDataInA : iDataInB;
assign wDataInB = ( ~iSelect ) ? iDataInA : iDataInB;
 
assign oDataOutA0 = ( iSelect ) ? wDataOutA0 : wDataOutB0;
assign oDataOutB0 = ( ~iSelect ) ? wDataOutA0 : wDataOutB0;
 
assign oDataOutA1 = ( iSelect ) ? wDataOutA1 : wDataOutB1;
assign oDataOutB1 = ( ~iSelect ) ? wDataOutA1 : wDataOutB1;
 
RAM_DUAL_READ_PORT # (DATA_WIDTH,ADDR_WIDTH,MEM_SIZE) MEM_A
(
.Clock( Clock ),
.iWriteEnable( wWriteEnableA ),
.iReadAddress0( wReadAddressA0 ),
.iReadAddress1( wReadAddressA1 ),
.iWriteAddress( wWriteAddressA ),
.iDataIn( wDataInA ),
.oDataOut0( wDataOutA0 ),
.oDataOut1( wDataOutA1 )
);
 
 
RAM_DUAL_READ_PORT # (DATA_WIDTH,ADDR_WIDTH,MEM_SIZE) MEM_B
(
.Clock( Clock ),
.iWriteEnable( wWriteEnableB ),
.iReadAddress0( wReadAddressB0 ),
.iReadAddress1( wReadAddressB1 ),
.iWriteAddress( wWriteAddressB ),
.iDataIn( wDataInB ),
.oDataOut0( wDataOutB0 ),
.oDataOut1( wDataOutB1 )
);
 
endmodule
/MEM/Module_ControlRegister.v
0,0 → 1,28
`timescale 1ns / 1ps
`include "aDefinitions.v"
 
//-------------------------------------------------------------------
module ControlRegister
(
input wire Clock,
input wire Reset,
input wire[15:0] iControlRegister,
output wire[15:0] oControlRegister
);
 
reg [15:0] rControlRegister;
 
assign oControlRegister = rControlRegister;
 
always @ (posedge Clock)
begin
if ( Reset )
rControlRegister <= 16'b0;
else
begin
rControlRegister <= iControlRegister;
end
end
 
endmodule
//-------------------------------------------------------------------
/IO/Unit_IO.v
0,0 → 1,319
`timescale 1ns / 1ps
`include "aDefinitions.v"
`define ADR_IMM 1
`define ADR_POINTER 0
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
//--------------------------------------------------------------------------
module IO_Unit
(
input wire Clock,
input wire Reset,
input wire iEnable,
input wire [`DATA_ADDRESS_WIDTH-1:0] iDat_O_Pointer, //Pointer to what we want to send via DAT_O
input wire [`WIDTH-1:0] iAdr_O_Imm, //Value to assign to ADR_O
input wire [`DATA_ADDRESS_WIDTH-1:0] iAdr_O_Pointer, //Pointer to value to assing to ADR_O
input wire iAdr_O_Type, //Should we use iAdr_O_Imm or iAdr_O_Pointer
input wire iAdr_O_Set, //Should we set
input wire iBusCyc_Type, //Bus cycle type: simple read/write, etc.
input wire iStore, //Should we store read data into MEM
input wire [`DATA_ROW_WIDTH-1:0] iReadDataBus, //MEM Data read bus 1
input wire [`DATA_ROW_WIDTH-1:0] iReadDataBus2, //MEM Data read bus 2
input wire[`DATA_ADDRESS_WIDTH-1:0] iAdr_DataWriteBack, //Where in MEM we want to store DAT_I
input wire iWriteBack_Set, //We want to set the Write back Address?
output wire[`DATA_ADDRESS_WIDTH-1:0] oDataReadAddress,
output wire[`DATA_ADDRESS_WIDTH-1:0] oDataReadAddress2,
output wire[`DATA_ADDRESS_WIDTH-1:0] oDataWriteAddress,
output wire oDataWriteEnable,
output wire [`DATA_ROW_WIDTH-1:0] oDataBus,
output wire [`INSTRUCTION_WIDTH-1:0] oInstructionBus,
output wire oInstructionWriteEnable,
output wire [`ROM_ADDRESS_WIDTH-1:0] oInstructionWriteAddress,
inout wire [`WIDTH-1:0] oData,
output wire oBusy,
output wire oDone,
input wire [`DATA_ROW_WIDTH-1:0] iOMEM_WriteAddress,
input wire [`DATA_ROW_WIDTH-1:0] iOMEM_WriteData,
input wire iOMEM_WriteEnable,
output wire [`WB_WIDTH-1:0] OMEM_DAT_O,
output wire [`WB_WIDTH-1:0] OMEM_ADR_O,
output wire OMEM_WE_O,
//Theia specific interfaces
input wire MST_I,
//Wish Bone Interfaces
output wire [31:0] DAT_O,
input wire [31:0] DAT_I,
input wire ACK_I,
output wire ACK_O,
output wire [31:0] ADR_O,
output wire [31:0] ADR_I,
output wire WE_O,
input wire WE_I,
output wire STB_O,
input wire STB_I,
output wire CYC_O,
input wire CYC_I,
input wire [1:0] TGA_I,
output wire [1:0] TGC_O,
input wire GNT_I,
 
 
output wire [`DATA_ROW_WIDTH-1:0] oTMEMReadData,
input wire iTMEMDataRequest,
input wire [`DATA_ROW_WIDTH-1:0] iTMEMReadAddress,
output wire oTMEMDataAvailable,
 
input wire TMEM_ACK_I,
input wire [`WB_WIDTH-1:0] TMEM_DAT_I ,
output wire [`WB_WIDTH-1:0] TMEM_ADR_O ,
output wire TMEM_WE_O,
output wire TMEM_STB_O,
output wire TMEM_CYC_O,
input wire TMEM_GNT_I
);
 
 
wire [`WIDTH-1:0] wMEMToWBM2__ReadDataElement;
wire [`WIDTH-1:0] wMEMToWBM2__ReadDataElement2;
wire wMEMToWBM_2__Enable;
wire wWBMToMEM2__Done;
wire wWBM_2_WBMToMEM_DataAvailable;
wire [`WIDTH-1:0] wWBM_2_WBMToMEM_Data;
wire [`WIDTH-1:0] wWBS_2__WBMToMEM_Frame;
wire wWBMToMEM_2_WBM_Enable;
wire [`WIDTH-1:0] wWBMToMEM_2_WBM_Address;
wire wWBMToMEM2__oDataWriteEnable;
wire wAddrerssSelector2_oDataWriteEnable;
wire [`DATA_ROW_WIDTH-1:0] wWBMToMEM2__oDataBus;
wire [`DATA_ROW_WIDTH-1:0] wWBSToMEM2__oDataBus;
wire wAddressSelector_2__SetAddress;
wire [`WIDTH-1:0] wMEMToWBM_2__Address;
wire wMEMToWBM_2__Done;
wire w2WBMToMEM__Enable;
wire w2WBMToMEM__SetAddress;
wire wWBS_2__WBSToMEM_FrameAvailable;
wire[`WIDTH-1:0] wWBS_2__WBMToMEM_Address;
wire wWBSToMEM2__oDataWriteEnable;
wire[`DATA_ADDRESS_WIDTH-1:0] wWBSToMEM2__oDataWriteAddress;
wire[`DATA_ADDRESS_WIDTH-1:0] wWBMToMEM2__oDataWriteAddress;
 
 
 
//***********new*****************/
 
 
Module_OMemInterface OMI
(
.Clock( Clock ),
.Reset( Reset ),
.iWriteEnable( iOMEM_WriteEnable ),
.iData( iOMEM_WriteData ),
.iAddress( iOMEM_WriteAddress ),
.ADR_O( OMEM_ADR_O ),
.DAT_O( OMEM_DAT_O ),
.WE_O( OMEM_WE_O )
);
 
Module_TMemInterface TMI
(
.Clock( Clock ),
.Reset( Reset ),
.iEnable( iTMEMDataRequest ),
.iAddress( iTMEMReadAddress ),
.oData( oTMEMReadData ),
.oDone( oTMEMDataAvailable ),
 
.ACK_I( TMEM_ACK_I ),
.GNT_I( TMEM_GNT_I ),
.DAT_I( TMEM_DAT_I ),
.ADR_O( TMEM_ADR_O ),
.WE_O( TMEM_WE_O ),
.STB_O( TMEM_STB_O ),
.CYC_O( TMEM_CYC_O )
 
);
//***********new*****************/
 
assign oBusy = CYC_O;
wire wReadOperation;
assign wReadOperation = (iBusCyc_Type == `WB_SIMPLE_WRITE_CYCLE) ? 0 : 1;
 
assign wMEMToWBM_2__Address = ( iAdr_O_Type == `ADR_IMM ) ? iAdr_O_Imm : wMEMToWBM2__ReadDataElement;
assign w2WBMToMEM__Enable = ( iAdr_O_Type == `ADR_IMM ) ? iEnable : wMEMToWBM_2__Enable;
//assign oDone = ( (iAdr_O_Type == `ADR_IMM) && !(iBusCyc_Type == `WB_SIMPLE_WRITE_CYCLE) )
//? wWBMToMEM2__Done : wMEMToWBM_2__Done;
 
//TODO: WHEN ADR_POINTER Then Done is not until we got the 3 values from X,Y,Z in iAdr_O_Pointer
assign oDone = (iBusCyc_Type == `WB_SIMPLE_WRITE_CYCLE || iAdr_O_Type == `ADR_POINTER ) ? wMEMToWBM_2__Done : wWBMToMEM2__Done;
 
assign oDataWriteEnable = (MST_I == 1'b1) ? wWBSToMEM2__oDataWriteEnable : (wWBMToMEM2__oDataWriteEnable);// ^ wAddrerssSelector2_oDataWriteEnable);
assign oDataWriteAddress = (MST_I == 1'b1) ? wWBSToMEM2__oDataWriteAddress : wWBMToMEM2__oDataWriteAddress;
assign oDataBus = (MST_I == 1'b1) ? wWBSToMEM2__oDataBus : wWBMToMEM2__oDataBus;
 
 
 
wire [`DATA_ADDRESS_WIDTH-1:0] wMEMToWBM2_WBMToMEM_RAMWriteAddr;
wire [`DATA_ADDRESS_WIDTH-1:0] w2WBMToMEM_MEMWriteAddress;
 
assign w2WBMToMEM_MEMWriteAddress = ( iAdr_O_Type == `ADR_IMM) ? iAdr_DataWriteBack : wMEMToWBM2_WBMToMEM_RAMWriteAddr;
 
wire w2MEMToWBM_BusOperationComplete;
assign w2MEMToWBM_BusOperationComplete = (iBusCyc_Type == `WB_SIMPLE_WRITE_CYCLE) ? ACK_I : wWBMToMEM2__Done;
 
 
wire [`DATA_ADDRESS_WIDTH-1:0] w2MEMToWBM_DataPointer;
assign w2MEMToWBM_DataPointer = (iBusCyc_Type == `WB_SIMPLE_WRITE_CYCLE) ? iDat_O_Pointer : iAdr_O_Pointer;
 
 
//------------------------------------------------------------------------------
MEM2WBMUnitB MEMToWBM
(
.Clock( Clock ),
.Reset( Reset ),
.iEnable( iEnable & (~iAdr_O_Type | iBusCyc_Type) ),
.iMEMDataPointer( w2MEMToWBM_DataPointer ),
.iMEMDataPointer2( iAdr_O_Pointer ),
.iReadDataBus( iReadDataBus ), //3 Elements comming from DMEM
.iReadDataBus2( iReadDataBus2 ),
.oReadDataElement( wMEMToWBM2__ReadDataElement ), //1 out of 3 elements we read
.oReadDataElement2( wMEMToWBM2__ReadDataElement2 ), //1 out of 3 elements we read
.oDataReadAddress( oDataReadAddress ),
.oDataReadAddress2( oDataReadAddress2 ),
.oDataWriteEnable( wAddrerssSelector2_oDataWriteEnable ), //Always zero
.oDataAvailable( wMEMToWBM_2__Enable ), //Data from MEM available
.iRequestNextElement( w2MEMToWBM_BusOperationComplete ),
.iDataInitialStorageAddress( iAdr_DataWriteBack ), ////########
.oDataWriteAddress( wMEMToWBM2_WBMToMEM_RAMWriteAddr ), ////########
.oDone( wMEMToWBM_2__Done )
);
//------------------------------------------------------------------------------
 
 
 
 
 
wire [`DATA_ADDRESS_WIDTH-1:0] wTemp1;
assign wWBMToMEM2__oDataWriteAddress = (iAdr_O_Type == `ADR_IMM) ? iAdr_DataWriteBack : wTemp1;
 
 
wire [`WIDTH-1:0] wADR_O_InitialAddress;
assign wADR_O_InitialAddress = (iBusCyc_Type == `WB_SIMPLE_WRITE_CYCLE) ? wMEMToWBM2__ReadDataElement2 : wMEMToWBM_2__Address;
wire wIncrement_Address_O;
assign wIncrement_Address_O = iEnable & ACK_I;
 
 
 
wire wMEMToWBM2__Done;
wire wMEMToWBM2__Trigger;
wire[`WB_WIDTH-1:0] wMEMToWBM_2_Data;
wire w2MEMToWBM__Trigger;
wire wWBM2_MEMToWBM_DataWriteDone;
 
 
wire w2WBM_iEnable;
 
assign w2WBM_iEnable = (iBusCyc_Type == `WB_SIMPLE_WRITE_CYCLE) ? wMEMToWBM_2__Enable : iEnable;
 
//------------------------------------------------------------------------------
wire wSTB_O;
 
//If the address is a pointer, we need 1 cycle to read the data back from MEM
//before we can the set the value into WBM
wire wAddress_Set_Delayed;
FFD_POSEDGE_SYNCRONOUS_RESET # (1) FFD32_SetDelay
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( 1'b1 ),
.D( iAdr_O_Set ),
.Q( wAddress_Set_Delayed )
);
 
//If the Addr is IMM then just set it whenever iAdr_O_Set is set, but if we have a pointer, then use
//wAddress_Set_Delayed at the beginning and then wWBMToMEM2__Done
wire wWBM_iAddress_Set = (iAdr_O_Type == `ADR_POINTER) ? (wAddress_Set_Delayed | wWBMToMEM2__Done) : iAdr_O_Set;
 
assign STB_O = wSTB_O & ~oDone;
 
WishBoneMasterUnit WBM
(
.CLK_I( Clock ),
.RST_I( Reset ),
.DAT_I( DAT_I ),
.DAT_O( DAT_O ),
.ACK_I( ACK_I ),
.ADR_O( ADR_O ),
.WE_O( WE_O ),
.STB_O( wSTB_O ),
.CYC_O( CYC_O ),
.TGC_O( TGC_O ),
.GNT_I( GNT_I ),
.iEnable( w2WBM_iEnable ),
.iBusCyc_Type( iBusCyc_Type ),
.iAddress_Set( wWBM_iAddress_Set ),
.iAddress( wADR_O_InitialAddress ),
.oDataReady( wWBM_2_WBMToMEM_DataAvailable ),
.iData( wMEMToWBM2__ReadDataElement ),
.oData( wWBM_2_WBMToMEM_Data )
);
//------------------------------------------------------------------------------
WishBoneSlaveUnit WBS
(
 
.CLK_I( Clock ),
.RST_I( Reset ),
.STB_I( STB_I ),
.WE_I( WE_I ),
.DAT_I( DAT_I ),
.ADR_I( ADR_I ),
.TGA_I( TGA_I ),
.ACK_O( ACK_O ),
.CYC_I( CYC_I ),
.MST_I( MST_I ),
.oDataBus( wWBSToMEM2__oDataBus ),
.oInstructionBus( oInstructionBus ),
.oDataWriteAddress( wWBSToMEM2__oDataWriteAddress ),
.oDataWriteEnable( wWBSToMEM2__oDataWriteEnable ),
.oInstructionWriteAddress( oInstructionWriteAddress ),
.oInstructionWriteEnable( oInstructionWriteEnable )
 
 
);
//------------------------------------------------------------------------------
 
 
endmodule
//--------------------------------------------------------------------------
/IO/Module_OMemInterface.v
0,0 → 1,47
`timescale 1ns / 1ps
`include "aDefinitions.v"
 
module Module_OMemInterface
(
input wire Clock,
input wire Reset,
input wire iWriteEnable,
input wire [`DATA_ROW_WIDTH-1:0] iData,
input wire [`DATA_ROW_WIDTH-1:0] iAddress,
output wire [`WB_WIDTH-1:0] ADR_O,
output wire[`WB_WIDTH-1:0] DAT_O,
output wire WE_O
);
wire [2:0] wCurrentWord;
assign WE_O = iWriteEnable;
 
CIRCULAR_SHIFTLEFT_POSEDGE #(3) SHL
(
.Clock(Clock),
.Enable(iWriteEnable),
.Reset(Reset),
.Initial(3'b1),
.O(wCurrentWord)
);
 
MUXFULLPARALELL_3SEL_WALKINGONE # ( `WB_WIDTH ) MUX1
(
.Sel( wCurrentWord ),
.I3(iAddress[31:0]),
.I2(iAddress[63:32]),
.I1(iAddress[95:64]),
.O1( ADR_O )
);
MUXFULLPARALELL_3SEL_WALKINGONE # ( `WB_WIDTH ) MUX2
(
.Sel( wCurrentWord ),
.I3(iData[31:0]),
.I2(iData[63:32]),
.I1(iData[95:64]),
.O1( DAT_O )
);
endmodule
/IO/Module_TMemInterface.v
0,0 → 1,109
`timescale 1ns / 1ps
`include "aDefinitions.v"
//--------------------------------------------------------------------------
module Module_TMemInterface
(
input wire Clock,
input wire Reset,
input wire iEnable,
input wire [`DATA_ROW_WIDTH-1:0] iAddress,
output wire [`DATA_ROW_WIDTH-1:0] oData,
output wire oDone,
 
input wire ACK_I,
input wire GNT_I,
input wire [`WB_WIDTH-1:0 ] DAT_I,
 
//WB Output Signals
output wire [`WB_WIDTH-1:0 ] ADR_O,
output wire WE_O,
output wire STB_O,
output wire CYC_O
 
 
);
 
wire [3:0] wCurrentWord;
wire wDone;
assign oDone = wDone & iEnable;
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD_DONE
(
.Clock(Clock),
.Reset(Reset),
.Enable( 1'b1 ),
.D(wCurrentWord[3]),
.Q(wDone)
);
 
 
//wire wShiftNow;
assign WE_O = 1'b0; //we only read
assign CYC_O = iEnable;
 
 
 
wire[2:0] wLatchNow;
FFD_POSEDGE_SYNCRONOUS_RESET # ( 3 ) FFD_LATHCNOW
(
.Clock(Clock),
.Reset(Reset),
.Enable( 1'b1 ),
.D(wCurrentWord[2:0]),
.Q(wLatchNow)
);
 
 
 
SHIFTLEFT_POSEDGE #(4) SHL
(
.Clock(Clock),
.Enable(iEnable & GNT_I),//wShiftNow),
.Reset(Reset | ~iEnable ),
.Initial(4'b1),
.O(wCurrentWord)
);
 
MUXFULLPARALELL_3SEL_WALKINGONE # ( `WB_WIDTH ) MUX1
(
.Sel( wCurrentWord[2:0] ),
.I3(iAddress[31:0]),
.I2(iAddress[63:32]),
.I1(iAddress[95:64]),
.O1( ADR_O )
);
 
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) FFDX
(
.Clock(Clock),
.Reset(Reset),
.Enable( wLatchNow[0] & GNT_I),
.D(DAT_I),
.Q(oData[95:64])
);
 
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) FFDY
(
.Clock(Clock),
.Reset(Reset),
.Enable( wLatchNow[1] & GNT_I),
.D(DAT_I),
.Q(oData[63:32])
);
 
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) FFDZ
(
.Clock(Clock),
.Reset( Reset ),
.Enable( wLatchNow[2] & GNT_I),
.D(DAT_I),
.Q(oData[31:0])
);
 
endmodule
//--------------------------------------------------------------------------
/IO/Module_MEM2WBM.v
0,0 → 1,124
`timescale 1ns / 1ps
`include "aDefinitions.v"
 
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
/*
This unit is used when the External Address that comes into IO is not a immediate value,
ie. it is a value that we need to read from one of our internal memory locations.
Since each internal memory locations contains 3 * 32bits slots, ie X,Y and Z parts of the
memory location, then we make three requests for external data, one for every X Y and Z
part of our internal registry.So, summarising, each internal memory location, stores 3
external memory addresses to request to WBM. Once the 3 data has been read from outside world,
they will get stored back into 3 consecutive inernal memory addreses starting from
iDataInitialStorageAddress
*/
//---------------------------------------------------------------------
module MEM2WBMUnitB
(
input wire Clock,
input wire Reset,
input wire iEnable,
//output reg oSetAddress,
input wire[`DATA_ADDRESS_WIDTH-1:0] iMEMDataPointer,
input wire[`DATA_ADDRESS_WIDTH-1:0] iMEMDataPointer2,
output wire [`WIDTH-1:0] oReadDataElement,
output wire [`WIDTH-1:0] oReadDataElement2,
output wire[`DATA_ADDRESS_WIDTH-1:0] oDataReadAddress, //This tells MEM unit from wich address we want to read
output wire[`DATA_ADDRESS_WIDTH-1:0] oDataReadAddress2, //This tells MEM unit from wich address we want to read
input wire [`DATA_ROW_WIDTH-1:0] iReadDataBus, //This comes from the MEM unit
input wire [`DATA_ROW_WIDTH-1:0] iReadDataBus2, //This comes from the MEM unit
output wire oDataWriteEnable,
output wire oDataWriteEnable2,
output wire oDataAvailable,
input wire iRequestNextElement,
input wire[`DATA_ADDRESS_WIDTH-1:0] iDataInitialStorageAddress, //Initial address to store data ////########
output wire[`DATA_ADDRESS_WIDTH-1:0] oDataWriteAddress, //Were to store the values comming from WBM ////########
output wire oDone
);
assign oDataWriteEnable2 = 0;
assign oDataWriteEnable = 0; //We only read.
wire [3:0] wXYZSelector;
wire[`WIDTH-1:0] wValueFromBus,wLatchedValue;
assign oDataReadAddress = iMEMDataPointer;
assign oDataReadAddress2 = iMEMDataPointer2;
assign oDone = wXYZSelector[3];
 
wire wLacthNow;
 
wire iRequestNextElement_Delay;
FFD_POSEDGE_SYNCRONOUS_RESET # (1) FFD32_x
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( 1'b1 ),
.D( iRequestNextElement ),
.Q( iRequestNextElement_Delay )
);
 
assign oDataAvailable = iEnable & ~iRequestNextElement_Delay & wLacthNow;// & ~oDone;
 
FFD_POSEDGE_SYNCRONOUS_RESET # (1) FFD32_EnableDelay
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( 1'b1 ),
.D( iEnable ),
.Q( wLacthNow )
);
 
assign oDataWriteAddress = iDataInitialStorageAddress;
 
 
SHIFTLEFT_POSEDGE #(4) SHL
(
.Clock(iRequestNextElement | ~iEnable),
.Enable(1'b1),
.Reset(~iEnable | Reset ),
.Initial(4'b1),
.O(wXYZSelector)
);
 
MUXFULLPARALELL_3SEL_WALKINGONE MUXA
(
.Sel( wXYZSelector[2:0] ),
.I2( iReadDataBus[63:32]),
.I1( iReadDataBus[95:64]),
.I3( iReadDataBus[31:0] ),
.O1( oReadDataElement )
);
 
 
 
MUXFULLPARALELL_3SEL_WALKINGONE MUXA2
(
.Sel( wXYZSelector[2:0] ),
.I2( iReadDataBus2[63:32]),
.I1( iReadDataBus2[95:64]),
.I3( iReadDataBus2[31:0] ),
.O1( oReadDataElement2 )
);
 
endmodule
//---------------------------------------------------------------------
/IO/Module_WishBoneMaster.v
0,0 → 1,147
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
/*
In order to read the geometry, we will behave as a master.
Performing single Reads Bus cycles should be sufficient.
Choosing 32 bit for bus width for simplicity.
*/
 
module WishBoneMasterUnit
(
//WB Input signals
input wire CLK_I,
input wire RST_I,
input wire ACK_I,
input wire GNT_I, //granted signal from bus arbiter
input wire [`WB_WIDTH-1:0 ] DAT_I,
output wire [`WB_WIDTH-1:0] DAT_O,
 
 
//WB Output Signals
output wire [`WB_WIDTH-1:0 ] ADR_O,
output wire WE_O,
output wire STB_O,
output wire CYC_O,
output wire [1:0] TGC_O,
 
//Signals from inside the GPU
input wire iEnable,
input wire iBusCyc_Type,
input wire [`WIDTH-1:0 ] iAddress,
input wire iAddress_Set,
output wire oDataReady,
input wire [`WIDTH-1:0 ] iData,
output wire [`WIDTH-1:0 ] oData
 
);
wire wReadOperation;
wire wEnable;
assign wEnable = iEnable & GNT_I;
//If CYC_O is 1, it means we are requesting bus ownership
assign CYC_O = iEnable;
 
assign wReadOperation = (iBusCyc_Type == `WB_SIMPLE_READ_CYCLE) ? 1 : 0;
assign WE_O = (iBusCyc_Type == `WB_SIMPLE_WRITE_CYCLE && wEnable) ? 1 : 0;
 
 
wire wEnable_Delayed;
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD88
(
.Clock(CLK_I),
.Reset(RST_I),
.Enable(1'b1 ),
.D(wEnable),
.Q(wEnable_Delayed)
);
 
 
 
//We only start Strobbing 1 cycle after iEnable and only
//if iEnable is 1 and if GNT_I is 1 (meaning we own the bus)
assign STB_O = wEnable_Delayed & ~ACK_I & wEnable;
 
 
assign DAT_O = (wReadOperation | ~wEnable ) ? `WB_WIDTH'bz : iData;
 
wire [`WB_WIDTH-1:0 ] wReadADR_O,wWriteADR_O;
assign ADR_O = ( wReadOperation ) ? wReadADR_O : wWriteADR_O;
 
//The ADR_O, it increments with each ACK_I, and it resets
//to the value iAddress everytime iAddress_Set is 1.
UPCOUNTER_POSEDGE # (`WIDTH) WBM_O_READ_ADDRESS
(
.Clock(CLK_I),
.Reset( iAddress_Set ),
.Enable((ACK_I & GNT_I) | iAddress_Set),
.Initial(iAddress),
.Q(wReadADR_O)
);
wire wDelayWE;
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD3
(
.Clock(CLK_I),
.Reset(RST_I),
.Enable(1'b1),
.D(WE_O),
.Q(wDelayWE)
);
 
UPCOUNTER_POSEDGE # (`WIDTH) WBM_O_WRITE_ADDRESS
(
.Clock(CLK_I),
.Reset( iAddress_Set ),//RST_I ),
.Enable( (wDelayWE & ACK_I ) | iAddress_Set),
.Initial(iAddress),//`WIDTH'b0),
.Q(wWriteADR_O)
);
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) FFD1
(
.Clock(ACK_I),
.Reset(~wEnable),
.Enable(wReadOperation ),
.D(DAT_I),
.Q(oData)
);
 
wire wDelayDataReady;
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD2
(
.Clock(CLK_I),
.Reset(~wEnable),
.Enable(wReadOperation),
.D(ACK_I),
.Q(wDelayDataReady)
);
/*
always @ (posedge wDelayDataReady)
begin
$display("WBM Got data: %h ",oData);
$display("oDataReady = %d",oDataReady );
end
*/
 
assign oDataReady = wDelayDataReady & wEnable;
 
endmodule
 
/IO/Module_WishBoneSlave.v
0,0 → 1,159
`timescale 1ns / 1ps
`include "aDefinitions.v"
 
 
 
`define TAG_INSTRUCTION_ADDRESS_TYPE 2'b10
`define TAG_DATA_ADDRESS_TYPE 2'b01
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
//------------------------------------------------------------------------------
module WishBoneSlaveUnit
(
//WB Input signals
input wire CLK_I,
input wire RST_I,
input wire STB_I,
input wire WE_I,
input wire[`WB_WIDTH-1:0] DAT_I,
input wire[`WB_WIDTH-1:0] ADR_I,
input wire [1:0] TGA_I,
output wire ACK_O,
input wire MST_I, //Master In!
input wire CYC_I,
output wire[`DATA_ADDRESS_WIDTH-1:0] oDataWriteAddress,
output wire [`DATA_ROW_WIDTH-1:0] oDataBus,
output wire [`ROM_ADDRESS_WIDTH-1:0] oInstructionWriteAddress,
output wire [`INSTRUCTION_WIDTH-1:0] oInstructionBus,
output wire oDataWriteEnable,
output wire oInstructionWriteEnable
 
);
 
FFD_POSEDGE_SYNCRONOUS_RESET # (16) FFADR
(
.Clock( CYC_I ),
.Reset( RST_I ),
.Enable(1'b1),
.D( ADR_I[15:0] ),
.Q( oInstructionWriteAddress )
);
 
assign oDataWriteAddress = oInstructionWriteAddress;
 
wire[1:0] wTGA_Latched;
 
FFD_POSEDGE_SYNCRONOUS_RESET # (2) FFADDRTYPE
(
.Clock( CYC_I ),
.Reset( RST_I ),
.Enable(1'b1),
.D( TGA_I ),
.Q( wTGA_Latched )
);
 
 
 
wire Clock,Reset;
assign Clock = CLK_I;
assign Reset = RST_I;
 
 
wire wLatchNow;
assign wLatchNow = STB_I & WE_I;
 
//1 Clock cycle after we assert the latch signal
//then the FF has the data ready to propagate
wire wDelay;
FFD_POSEDGE_SYNCRONOUS_RESET # (1) FFOutputDelay
(
.Clock( Clock ),
.Enable( 1'b1 ),
.Reset( Reset ),
.D( wLatchNow ),
.Q( wDelay )
);
 
assign ACK_O = wDelay & STB_I; //make sure we set ACK_O back to zero when STB_I is zero
 
 
wire [2:0] wXYZSel;
 
SHIFTLEFT_POSEDGE #(3) SHL
(
.Clock(CLK_I),
.Enable(STB_I & ~ACK_O),
.Reset(~CYC_I),
.Initial(3'b1),
.O(wXYZSel)
);
 
 
//Flip Flop to Store Vx
wire [`WIDTH-1:0] wVx;
FFD_POSEDGE_SYNCRONOUS_RESET # (`WIDTH) FFD32_WBS2MEM_Vx
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( wXYZSel[0] & STB_I ),
.D( DAT_I ),
.Q( wVx )
);
 
 
//Flip Flop to Store Vy
wire [`WIDTH-1:0] wVy;
FFD_POSEDGE_SYNCRONOUS_RESET # (`WIDTH) FFD32_WBS2MEM_Vy
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( wXYZSel[1] & STB_I ),
.D( DAT_I ),
.Q( wVy )
);
 
//Flip Flop to Store Vz
wire [`WIDTH-1:0] wVz;
 
FFD_POSEDGE_SYNCRONOUS_RESET # (`WIDTH) FFD32_WBS2MEM_Vz
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( wXYZSel[2] & STB_I ),
.D( DAT_I ),
.Q( wVz )
);
 
assign oDataBus = {wVx,wVy,wVz};
assign oInstructionBus = {wVx,wVy};
wire wIsInstructionAddress,wIsDataAddress;
assign wIsInstructionAddress = (wTGA_Latched == `TAG_INSTRUCTION_ADDRESS_TYPE) ? 1'b1 : 1'b0;
assign wIsDataAddress = (wTGA_Latched == `TAG_DATA_ADDRESS_TYPE ) ? 1'b1 : 1'b0;
 
assign oDataWriteEnable = (MST_I && !CYC_I && wIsInstructionAddress) ? 1'b1 : 1'b0;
assign oInstructionWriteEnable = ( MST_I && !CYC_I && wIsDataAddress) ? 1'b1 : 1'b0;
 
 
 
endmodule
//------------------------------------------------------------------------------
/EXE/Module_ExecutionFSM.v
0,0 → 1,539
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
`define EXEU_AFTER_RESET 0
`define EXEU_INITIAL_STATE 1
`define EXEU_WAIT_FOR_DECODE 2
`define EXEU_FETCH_DECODED_INST 3
`define EXEU_WAIT_FOR_ALU_EXECUTION 4
`define EXEU_WRITE_BACK_TO_RAM 5
`define EXEU_HANDLE_JUMP 7
 
 
 
module ExecutionFSM
(
input wire Clock,
input wire Reset,
 
input wire iDecodeDone,
input wire[`INSTRUCTION_OP_LENGTH-1:0] iOperation,
input wire[`DATA_ROW_WIDTH-1:0] iSource0,iSource1,
input wire[`DATA_ADDRESS_WIDTH-1:0] iDestination,
inout wire[`DATA_ROW_WIDTH-1:0] RAMBus,
//output reg ReadyForNextInstruction,
output wire oJumpFlag ,
output wire [`ROM_ADDRESS_WIDTH-1:0] oJumpIp ,
output wire oRAMWriteEnable ,
output wire [`DATA_ADDRESS_WIDTH-1:0] oRAMWriteAddress ,
output wire oExeLatchedValues,
output reg oBusy ,
 
//ALU ports and control signals
output wire [`INSTRUCTION_OP_LENGTH-1:0] oALUOperation,
output wire [`WIDTH-1:0] oALUChannelX1,
output wire [`WIDTH-1:0] oALUChannelY1,
output wire [`WIDTH-1:0] oALUChannelZ1,
output wire [`WIDTH-1:0] oALUChannelX2,
output wire [`WIDTH-1:0] oALUChannelY2,
output wire [`WIDTH-1:0] oALUChannelZ2,
output wire oTriggerALU,
 
input wire [`WIDTH-1:0] iALUResultX,
input wire [`WIDTH-1:0] iALUResultY,
input wire [`WIDTH-1:0] iALUResultZ,
input wire iALUOutputReady,
input wire iBranchTaken,
input wire iBranchNotTaken,
 
 
`ifdef DEBUG
input wire[`ROM_ADDRESS_WIDTH-1:0] iDebug_CurrentIP,
input wire [`MAX_CORES-1:0] iDebug_CoreID,
`endif
//Data forward Signals
output wire [`DATA_ADDRESS_WIDTH-1:0] oLastDestination
 
 
);
 
wire wLatchNow;
reg rInputLatchesEnabled;
 
//If ALU says jump, just pass along
assign oJumpFlag = iBranchTaken;
//JumpIP is the instruction destination (= oRAMWriteAddress)
assign oJumpIp = oRAMWriteAddress;
 
assign wLatchNow = iDecodeDone & rInputLatchesEnabled;
assign oExeLatchedValues = wLatchNow;
assign oTriggerALU = wLatchNow;
 
wire wOperationIsJump;
assign wOperationIsJump = iBranchTaken || iBranchNotTaken;
 
//Don't allow me to write back back if the operation is a NOP
`ifdef DEBUG
assign oRAMWriteEnable = iALUOutputReady && !wOperationIsJump &&
(oALUOperation != `NOP) && oALUOperation != `DEBUG_PRINT;
`else
assign oRAMWriteEnable = iALUOutputReady && !wOperationIsJump && oALUOperation != `NOP;
`endif
 
 
assign RAMBus = ( oRAMWriteEnable ) ? {iALUResultX,iALUResultY,iALUResultZ} : `DATA_ROW_WIDTH'bz;
 
assign oALUChannelX1 = iSource1[95:64];
assign oALUChannelY1 = iSource1[63:32];
assign oALUChannelZ1 = iSource1[31:0];
 
assign oALUChannelX2 = iSource0[95:64];
assign oALUChannelY2 = iSource0[63:32];
assign oALUChannelZ2 = iSource0[31:0];
/*
FF32_POSEDGE_SYNCRONOUS_RESET SourceX1
(
.Clock( wLatchNow ),
.Clear( Reset ),
.D( iSource1[95:64] ),
.Q( oALUChannelX1 )
);
 
FF32_POSEDGE_SYNCRONOUS_RESET SourceY1
(
.Clock( wLatchNow ),
.Clear( Reset ),
.D( iSource1[63:32] ),
.Q( oALUChannelY1 )
);
 
FF32_POSEDGE_SYNCRONOUS_RESET SourceZ1
(
.Clock( wLatchNow ),
.Clear( Reset ),
.D( iSource1[31:0] ),
.Q( oALUChannelZ1 )
);
*/
/*
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceX1
(
.Clock( Clock ),//wLatchNow ),
.Reset( Reset),
.Enable( wLatchNow ),//1'b1 ),
.D( iSource1[95:64] ),
.Q(oALUChannelX1)
);
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceY1
(
.Clock( Clock ),//wLatchNow ),
.Reset( Reset),
.Enable( wLatchNow ),//1'b1 ),
.D( iSource1[63:32] ),
.Q(oALUChannelY1)
);
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceZ1
(
.Clock( Clock ),//wLatchNow ),
.Reset( Reset),
.Enable( wLatchNow ),//1'b1 ),
.D( iSource1[31:0] ),
.Q(oALUChannelZ1)
);
*/
/*
FF32_POSEDGE_SYNCRONOUS_RESET SourceX2
(
.Clock( wLatchNow ),
.Clear( Reset ),
.D( iSource0[95:64] ),
.Q( oALUChannelX2 )
);
 
FF32_POSEDGE_SYNCRONOUS_RESET SourceY2
(
.Clock( wLatchNow ),
.Clear( Reset ),
.D( iSource0[63:32] ),
.Q( oALUChannelY2 )
);
 
FF32_POSEDGE_SYNCRONOUS_RESET SourceZ2
(
.Clock( wLatchNow ),
.Clear( Reset ),
.D( iSource0[31:0] ),
.Q( oALUChannelZ2 )
);
*/
/*
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceX2
(
.Clock( Clock ),//wLatchNow ),
.Reset( Reset),
.Enable( wLatchNow ),//1'b1 ),
.D( iSource0[95:64] ),
.Q(oALUChannelX2)
);
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceY2
(
.Clock( Clock ),//wLatchNow ),
.Reset( Reset),
.Enable( wLatchNow ),//1'b1 ),
.D( iSource0[63:32] ),
.Q(oALUChannelY2)
);
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceZ2
(
.Clock( Clock ),//wLatchNow ),
.Reset( Reset),
.Enable( wLatchNow ),//1'b1 ),
.D( iSource0[31:0] ),
.Q(oALUChannelZ2)
);
*/
//Finally one more latch to store
//the iOperation and the destination
 
 
assign oALUOperation = iOperation;
//assign oRAMWriteAddress = iDestination;
/*
FF_OPCODE_POSEDGE_SYNCRONOUS_RESET FFOperation
(
.Clock( wLatchNow ),
.Clear( Reset ),
.D( iOperation ),
.Q( oALUOperation )
);
 
 
FF16_POSEDGE_SYNCRONOUS_RESET PSRegDestination
(
.Clock( wLatchNow ),
.Clear( Reset ),
.D( iDestination ),
.Q( oRAMWriteAddress )
);
*/
/*
FFD_POSEDGE_SYNCRONOUS_RESET # ( `INSTRUCTION_OP_LENGTH ) FFOperation
(
.Clock( Clock ),//wLatchNow ),
.Reset( Reset),
.Enable( wLatchNow ),//1'b1 ),
.D( iOperation ),
.Q(oALUOperation)
);
*/
FFD_POSEDGE_SYNCRONOUS_RESET # ( `DATA_ADDRESS_WIDTH ) PSRegDestination
(
.Clock( Clock ),//wLatchNow ),
.Reset( Reset),
.Enable( wLatchNow ),//1'b1 ),
.D( iDestination ),
.Q(oRAMWriteAddress)
);
 
//Data forwarding
assign oLastDestination = oRAMWriteAddress;
 
reg [7:0] CurrentState;
reg [7:0] NextState;
 
 
//------------------------------------------------
always @(posedge Clock or posedge Reset)
begin
if (Reset)
CurrentState <= `EXEU_AFTER_RESET;
else
CurrentState <= NextState;
end
//------------------------------------------------
 
 
always @( * )
begin
case (CurrentState)
//------------------------------------------
`EXEU_AFTER_RESET:
begin
//ReadyForNextInstruction <= 1;
oBusy <= 0;
rInputLatchesEnabled <= 1;
NextState <= `EXEU_WAIT_FOR_DECODE;
end
//------------------------------------------
/**
At the same time iDecodeDone goes to 1, our Flops
will store the value, so next clock cycle we can
tell IDU to go ahead and decode the next instruction
in the pipeline.
*/
`EXEU_WAIT_FOR_DECODE:
begin
 
//ReadyForNextInstruction <= 1;
oBusy <= 0;
rInputLatchesEnabled <= 1;
if ( iDecodeDone ) //This same thing triggers the ALU
NextState <= `EXEU_WAIT_FOR_ALU_EXECUTION;
else
NextState <= `EXEU_WAIT_FOR_DECODE;
end
//------------------------------------------
/*
If the instruction is aritmetic then pass the parameters
the ALU, else if it store iOperation then...
*/
`EXEU_WAIT_FOR_ALU_EXECUTION:
begin
 
//ReadyForNextInstruction <= 0; //*
oBusy <= 1;
rInputLatchesEnabled <= 0; //NO INTERRUPTIONS WHILE WE WAIT!!
 
if ( iALUOutputReady ) /////This same thing enables writing th results to RAM
NextState <= `EXEU_WAIT_FOR_DECODE;
else
NextState <= `EXEU_WAIT_FOR_ALU_EXECUTION;
end
//------------------------------------------
`EXEU_WRITE_BACK_TO_RAM:
begin
//ReadyForNextInstruction <= 0;
oBusy <= 1;
rInputLatchesEnabled <= 1;
if ( iDecodeDone )
NextState <= `EXEU_WAIT_FOR_ALU_EXECUTION;
else
NextState <= `EXEU_WAIT_FOR_DECODE;
end
//------------------------------------------
default:
begin
//ReadyForNextInstruction <= 1;
oBusy <= 0;
rInputLatchesEnabled <= 1;
 
NextState <= `EXEU_AFTER_RESET;
end
//------------------------------------------
endcase
end
 
//-----------------------------------------------------------------------
`ifdef DUMP_CODE
integer ucode_file;
integer reg_log;
initial
begin
 
$display("Opening ucode dump file....\n");
ucode_file = $fopen("Code.log","w");
$fwrite(ucode_file,"\n\n************ Theia UCODE DUMP *******\n\n\n\n");
$display("Opening Register lof file...\n");
reg_log = $fopen("Registers.log","w");
end
 
`endif //Ucode dump
 
//-----------------------------------------------------------------------
`ifdef DEBUG
wire [`WIDTH-1:0] wALUChannelX1,wALUChannelY1,wALUChannelZ1;
wire [`WIDTH-1:0] wALUChannelX2,wALUChannelY2,wALUChannelZ2;
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceX1
(
.Clock( Clock ),
.Reset( Reset),
.Enable( wLatchNow ),
.D( iSource1[95:64] ),
.Q(wALUChannelX1)
);
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceY1
(
.Clock( Clock ),
.Reset( Reset),
.Enable( wLatchNow ),
.D( iSource1[63:32] ),
.Q(wALUChannelY1)
);
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceZ1
(
.Clock( Clock ),
.Reset( Reset),
.Enable( wLatchNow ),
.D( iSource1[31:0] ),
.Q(wALUChannelZ1)
);
 
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceX2
(
.Clock( Clock ),
.Reset( Reset),
.Enable( wLatchNow ),
.D( iSource0[95:64] ),
.Q(wALUChannelX2)
);
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceY2
(
.Clock( Clock ),
.Reset( Reset),
.Enable( wLatchNow ),
.D( iSource0[63:32] ),
.Q(wALUChannelY2)
);
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceZ2
(
.Clock( Clock ),
.Reset( Reset),
.Enable( wLatchNow ),
.D( iSource0[31:0] ),
.Q(wALUChannelZ2)
);
 
 
always @ (posedge iDecodeDone && iDebug_CoreID == `DEBUG_CORE)
begin
`LOGME"[CORE %d] IP:%d", iDebug_CoreID,iDebug_CurrentIP);
end
 
always @ (negedge Clock && iDebug_CoreID == `DEBUG_CORE)
begin
if ( iALUOutputReady )
begin
if (iBranchTaken)
`LOGME"<BT>");
if (iBranchNotTaken )
`LOGME"<BNT>");
if (oRAMWriteEnable)
`LOGME"<WE>");
`LOGME "(%dns ",$time);
case ( oALUOperation )
`RETURN: `LOGME"RETURN");
`ADD: `LOGME"ADD");
`SUB: `LOGME"SUB");
`DIV: `LOGME"DIV");
`MUL: `LOGME"MUL");
`MAG: `LOGME"MAG");
`JGX: `LOGME"JGX");
`JLX: `LOGME"JLX");
`JGEX: `LOGME"JGEX");
`JGEY: `LOGME"JGEY");
`JGEZ: `LOGME"JGEZ");
`JLEX: `LOGME"JLEX");
`JLEY: `LOGME"JLEY");
`JLEZ: `LOGME"JLEZ");
`JMP: `LOGME"JMP");
`ZERO: `LOGME"ZERO");
`JNEX: `LOGME"JNEX");
`JNEY: `LOGME"JNEY");
`JNEZ: `LOGME"JNEZ");
`JEQX: `LOGME"JEQX");
`JEQY: `LOGME"JEQY");
`JEQZ: `LOGME"JEQZ");
`CROSS: `LOGME"CROSS");
`DOT: `LOGME"DOT");
`SETX: `LOGME"SETX");
`SETY: `LOGME"SETY");
`SETZ: `LOGME"SETZ");
`NOP: `LOGME"NOP");
`COPY: `LOGME"COPY");
`INC: `LOGME"INC");
`DEC: `LOGME"DEC");
`MOD: `LOGME"MOD");
`FRAC: `LOGME"FRAC");
`NEG: `LOGME"NEG");
`SWIZZLE3D: `LOGME"SWIZZLE3D");
`MULP: `LOGME"MULP");
`XCHANGEX: `LOGME"XCHANGEX");
`IMUL: `LOGME"IMUL");
`UNSCALE: `LOGME"UNSCALE");
`INCX: `LOGME"INCX");
`INCY: `LOGME"INCY");
`INCZ: `LOGME"INCZ");
`OMWRITE: `LOGME"OMWRITE");
`TMREAD: `LOGME"TMREAD");
`LEA: `LOGME"LEA");
`CALL: `LOGME"CALL");
`RET: `LOGME"RET");
`DEBUG_PRINT:
begin
`LOGME"DEBUG_PRINT");
end
default:
begin
`LOGME"**********ERROR UNKNOWN OP*********");
$display("%dns EXE: Error Unknown Instruction : %d", $time,oALUOperation);
// $stop();
end
endcase
`LOGME"\t %h [ %h %h %h ][ %h %h %h ] = ",
oRAMWriteAddress,
wALUChannelX1,wALUChannelY1,wALUChannelZ1,
wALUChannelX2,wALUChannelY2,wALUChannelZ2
);
if (oALUOperation == `RETURN)
`LOGME"\n\n\n");
end
end //always
always @ ( negedge Clock && iDebug_CoreID == `DEBUG_CORE )
begin
if ( iALUOutputReady )
`LOGME" [ %h %h %h ])\n",iALUResultX,iALUResultY,iALUResultZ);
end //always
`endif
 
endmodule
/EXE/Module_InstructionFetch.v
0,0 → 1,215
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
/**********************************************************************************
Description:
This is the instruction fetch unit.
It gets the next instruction from the IMEM module at the MEM unit.
It increments the instruction pointer (IP) in such a way that EXE has always
one instruction per clock cycle (best pipeline performance). In order to achieve this,
IFU has 2 instruction pointers, so that in case of 'branch' instructions,
two instructions pointer are generated and two different instructions are simultaneously
fetched from IMEM: the branch-taken and branch-not-taken instructions, so that once the
branch outcome is calculted in EXE, both possible outcomes are already pre-fetched.
**********************************************************************************/
module InstructionFetch
(
input wire Clock,
input wire Reset,
input wire iTrigger,
input wire[`ROM_ADDRESS_WIDTH-1:0] iInitialCodeAddress,
input wire[`INSTRUCTION_WIDTH-1:0] iInstruction1, //Branch not taken instruction
input wire[`INSTRUCTION_WIDTH-1:0] iInstruction2, //Branch taken instruction
input wire iBranchTaken,
output wire oInstructionAvalable,
output wire [`ROM_ADDRESS_WIDTH-1:0] oIP,
output wire [`ROM_ADDRESS_WIDTH-1:0] oIP2, //calcule both decide later
output wire[`INSTRUCTION_WIDTH-1:0] oCurrentInstruction,
input wire iEXEDone,
output wire oMicroCodeReturnValue,
input wire iSubroutineReturn,
//input wire [`ROM_ADDRESS_WIDTH-1:0] iReturnAddress,
output wire oExecutionDone
);
`define INSTRUCTION_OPCODE oCurrentInstruction[`INSTRUCTION_WIDTH-1:`INSTRUCTION_WIDTH-`INSTRUCTION_OP_LENGTH]
 
 
assign oMicroCodeReturnValue = oCurrentInstruction[0];
assign oIP2 = oCurrentInstruction[47:32];
 
wire wTriggerDelay1,wTriggerDelay2,wIncrementIP_Delay1,wIncrementIP_Delay2,
wLastInst_Delay1,wLastInst_Delay2;
wire wIncrementIP,wLastInstruction;
wire wInstructionAvalable,wSubReturnDelay1,wSubReturnDelay2;
 
assign wLastInstruction = (`INSTRUCTION_OPCODE == `RETURN );
 
wire IsCall;
reg [`ROM_ADDRESS_WIDTH-1:0] rReturnAddress;
assign IsCall = ( `INSTRUCTION_OPCODE == `CALL ) ? 1'b1 : 1'b0;
always @ (posedge IsCall)
rReturnAddress <= oIP+1;
 
//Increment IP 2 cycles after trigger or everytime EXE is done, or 2 cycles after return from sub, but stop if we get to the RETURN
assign wIncrementIP = wTriggerDelay2 | (iEXEDone & ~wLastInstruction) | wSubReturnDelay2;
//It takes 1 clock cycle to read the instruction back from IMEM
 
 
//Instructions become available to IDU:
//* 2 cycles after IFU is initially triggered
//* Everytime previous instruction execution is complete except for the last instruction in
//the flow
assign wInstructionAvalable = wTriggerDelay2 | (iEXEDone & ~wLastInst_Delay2);
 
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD22
(
.Clock( Clock ),
.Reset( Reset ),
.Enable(1'b1),
.D( iSubroutineReturn ),
.Q( wSubReturnDelay1 )
);
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD23
(
.Clock( Clock ),
.Reset( Reset ),
.Enable(1'b1),
.D( wSubReturnDelay1 ),
.Q( wSubReturnDelay2 )
);
//Special case for instruction available pin: if a return from subroutine instruction was issued,
//then wait 1 cycle before anouncing Instruction available to IDU
assign oInstructionAvalable = wInstructionAvalable & ~iSubroutineReturn | wSubReturnDelay2;
 
 
 
 
 
//Once we reach the last instruction, wait until EXE says he is done, then assert oExecutionDone
assign oExecutionDone = (wLastInstruction & iEXEDone);
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD2
(
.Clock( Clock ),
.Reset( Reset ),
.Enable(1'b1),
.D( iTrigger ),
.Q( wTriggerDelay1 )
);
 
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD3
(
.Clock( Clock ),
.Reset( Reset ),
.Enable(1'b1),
.D( wTriggerDelay1 ),
.Q( wTriggerDelay2 )
);
 
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD4
(
.Clock( Clock ),
.Reset( Reset ),
.Enable(wLastInstruction),
.D( oInstructionAvalable ),
.Q( wLastInst_Delay1 )
);
 
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD5
(
.Clock( Clock ),
.Reset( Reset ),
.Enable(1'b1),//wLastInstruction),
.D( wLastInst_Delay1 ),
.Q( wLastInst_Delay2 )
);
 
wire [`ROM_ADDRESS_WIDTH-1:0] oIP2_Next;
 
/*
In case the branch is taken:
We point current instruction into the iInstruction2 (branch-taken) instruction
that corresponds to oIP2.
Then, in the next clock cycle we should use the oIP2 incremented by one,
so we need to load UPCOUNTER_POSEDGE with oIP2+1
*/
 
 
//If the branch was taken, then use the pre-fetched instruction (iInstruction2)
wire[`INSTRUCTION_WIDTH-1:0] wCurrentInstruction_Delay1,wCurrentInstruction_BranchTaken;
FFD_POSEDGE_SYNCRONOUS_RESET # ( `INSTRUCTION_WIDTH ) FFDX
(
.Clock( Clock ),
.Reset( Reset ),
.Enable(iBranchTaken),
.D( oCurrentInstruction ),
.Q( wCurrentInstruction_Delay1 )
);
 
wire wBranchTaken_Delay1;
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFDY
(
.Clock( Clock ),
.Reset( Reset ),
.Enable(1'b1),
.D( iBranchTaken ),
.Q( wBranchTaken_Delay1 )
);
 
 
assign wCurrentInstruction_BranchTaken = ( iBranchTaken & ~iSubroutineReturn) ? iInstruction2 : iInstruction1;
 
assign oCurrentInstruction = (wBranchTaken_Delay1 ) ?
wCurrentInstruction_Delay1 : wCurrentInstruction_BranchTaken;
 
INCREMENT # (`ROM_ADDRESS_WIDTH) INC1
(
.Clock( Clock ),
.Reset( Reset ),
.A( oIP2 ),
.R( oIP2_Next )
);
 
wire[`ROM_ADDRESS_WIDTH-1:0] wIPEntryPoint;
//assign wIPEntryPoint = (iBranchTaken) ? oIP2_Next : iInitialCodeAddress;
 
//iReturnAddress is a register stored @ IDU everytime a CALL instruction is decoded
assign wIPEntryPoint = (iBranchTaken & ~wBranchTaken_Delay1) ? (iSubroutineReturn) ? rReturnAddress : oIP2_Next : iInitialCodeAddress;
 
 
UPCOUNTER_POSEDGE # (`ROM_ADDRESS_WIDTH) InstructionPointer
(
.Clock( Clock ),
.Reset(iTrigger | (iBranchTaken & ~wBranchTaken_Delay1)),
.Enable(wIncrementIP & (~iBranchTaken | wBranchTaken_Delay1 ) ),
.Initial( wIPEntryPoint ),
.Q(oIP)
);
 
 
endmodule
 
//-------------------------------------------------------------------------------
/EXE/Unit_EXE.v
0,0 → 1,275
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
//---------------------------------------------------------------------
module ExecutionUnit
(
 
input wire Clock,
input wire Reset,
input wire [`ROM_ADDRESS_WIDTH-1:0] iInitialCodeAddress,
input wire [`INSTRUCTION_WIDTH-1:0] iInstruction1,
input wire [`INSTRUCTION_WIDTH-1:0] iInstruction2,
 
 
input wire [`DATA_ROW_WIDTH-1:0] iDataRead0,
input wire [`DATA_ROW_WIDTH-1:0] iDataRead1,
input wire iTrigger,
 
 
output wire [`ROM_ADDRESS_WIDTH-1:0] oInstructionPointer1,
output wire [`ROM_ADDRESS_WIDTH-1:0] oInstructionPointer2,
output wire [`DATA_ADDRESS_WIDTH-1:0] oDataReadAddress0,
output wire [`DATA_ADDRESS_WIDTH-1:0] oDataReadAddress1,
output wire oDataWriteEnable,
output wire [`DATA_ADDRESS_WIDTH-1:0] oDataWriteAddress,
output wire [`DATA_ROW_WIDTH-1:0] oDataBus,
output wire oReturnCode,
 
 
output wire [`DATA_ROW_WIDTH-1:0] oOMEMWriteAddress,
output wire [`DATA_ROW_WIDTH-1:0] oOMEMWriteData,
output wire oOMEMWriteEnable,
output wire [`DATA_ROW_WIDTH-1:0] oTMEMReadAddress,
input wire [`DATA_ROW_WIDTH-1:0] iTMEMReadData,
input wire iTMEMDataAvailable,
output wire oTMEMDataRequest,
 
`ifdef DEBUG
input wire [`MAX_CORES-1:0] iDebug_CoreID,
`endif
output wire oDone
 
 
 
 
);
 
 
`ifdef DEBUG
wire [`ROM_ADDRESS_WIDTH-1:0] wDEBUG_IDU2_EXE_InstructionPointer;
`endif
 
wire wEXE2__uCodeDone;
wire wEXE2_IFU__EXEBusy;
wire [`DATA_ADDRESS_WIDTH-1:0] wEXE2_IDU_DataFordward_LastDestination;
wire wALU2_EXE__BranchTaken;
wire wALU2_IFU_BranchNotTaken;
wire [`INSTRUCTION_WIDTH-1:0] CurrentInstruction;
//wire wIDU2_IFU__IDUBusy;
 
 
wire [`INSTRUCTION_OP_LENGTH-1:0] wOperation;
 
 
wire [`DATA_ROW_WIDTH-1:0] wSource0,wSource1;
wire [`DATA_ADDRESS_WIDTH-1:0] wDestination;
wire wInstructionAvailable;
 
//ALU wires
wire [`INSTRUCTION_OP_LENGTH-1:0] ALU2Operation;
wire [`WIDTH-1:0] ALU2ChannelA;
wire [`WIDTH-1:0] ALU2ChannelB;
wire [`WIDTH-1:0] ALU2ChannelC;
wire [`WIDTH-1:0] ALU2ChannelD;
wire [`WIDTH-1:0] ALU2ChannelE;
wire [`WIDTH-1:0] ALU2ChannelF;
wire [`WIDTH-1:0] ALU2ResultA;
wire [`WIDTH-1:0] ALU2ResultB;
wire [`WIDTH-1:0] ALU2ResultC;
wire wEXE2_ALU__TriggerALU;
wire ALU2OutputReady;
wire w2FIU__BranchTaken;
wire [`ROM_ADDRESS_WIDTH-1:0] JumpIp;
wire [`ROM_ADDRESS_WIDTH-1:0] wIDU2_IFU_ReturnAddress;
wire wALU2_IFU_ReturnFromSub;
 
//wire wIDU2_IFU__InputsLatched;
 
wire wEPU_Busy,wTriggerIFU;
wire [`ROM_ADDRESS_WIDTH-1:0] wEPU_IP,wIFU_IP,wCodeEntryPoint;
 
assign oInstructionPointer1 = (wEPU_Busy) ? wEPU_IP : wIFU_IP;
 
 
InstructionEntryPoint EPU
(
.Clock( Clock ),
.Reset( Reset ),
.iTrigger( iTrigger ),
.iInitialCodeAddress( iInitialCodeAddress ),
.iIMemInput(iInstruction1),
 
.oEPU_Busy(wEPU_Busy),
.oEntryPoint( wCodeEntryPoint ),
.oTriggerIFU( wTriggerIFU ),
.oInstructionAddr( wEPU_IP )
 
);
 
InstructionFetch IFU
(
.Clock( Clock ),
.Reset( Reset ),
.iTrigger( wTriggerIFU ),
.iInstruction1( iInstruction1 ),
.iInstruction2( iInstruction2 ),
.iInitialCodeAddress( wCodeEntryPoint ),
.iBranchTaken( w2FIU__BranchTaken ),
.iSubroutineReturn( wALU2_IFU_ReturnFromSub ),
//.iReturnAddress( wIDU2_IFU_ReturnAddress ),
.oCurrentInstruction( CurrentInstruction ),
.oInstructionAvalable( wInstructionAvailable ),
.oIP( wIFU_IP ),
.oIP2( oInstructionPointer2 ),
.iEXEDone( ALU2OutputReady ),
.oMicroCodeReturnValue( oReturnCode ),
.oExecutionDone( oDone )
);
 
////---------------------------------------------------------
wire wIDU2_EXE_DataReady;
wire wEXE2_IDU_ExeLatchedValues;
 
InstructionDecode IDU
(
.Clock( Clock ),
.Reset( Reset ),
.iEncodedInstruction( CurrentInstruction ),
.iInstructionAvailable( wInstructionAvailable ),
//.iIP( oInstructionPointer1 ),
//.oReturnAddress( wIDU2_IFU_ReturnAddress ),
.oRamAddress0( oDataReadAddress0 ),
.oRamAddress1( oDataReadAddress1 ),
.iRamValue0( iDataRead0 ),
.iRamValue1( iDataRead1 ),
.iLastDestination( wEXE2_IDU_DataFordward_LastDestination ),
.iDataForward( {ALU2ResultA,ALU2ResultB,ALU2ResultC} ),
//Outputs going to the ALU-FSM
.oOperation( wOperation ),
.oDestination( wDestination ),
.oSource0( wSource0 ),
.oSource1( wSource1 ),
`ifdef DEBUG
.iDebug_CurrentIP( oInstructionPointer1 ),
.oDebug_CurrentIP( wDEBUG_IDU2_EXE_InstructionPointer ),
`endif
.oDataReadyForExe( wIDU2_EXE_DataReady )
);
 
 
ExecutionFSM EXE
(
.Clock( Clock ),
.Reset( Reset | iTrigger ), //New Sat Jun13
.iDecodeDone( wIDU2_EXE_DataReady ),
.iOperation( wOperation ),
.iDestination( wDestination ),
.iSource0( wSource0 ),
.iSource1( wSource1 ) ,
 
`ifdef DEBUG
.iDebug_CurrentIP( wDEBUG_IDU2_EXE_InstructionPointer ),
.iDebug_CoreID( iDebug_CoreID ),
`endif
//.iJumpResultFromALU( wALU2_EXE__BranchTaken ),
.iBranchTaken( wALU2_EXE__BranchTaken ),
.iBranchNotTaken( wALU2_IFU_BranchNotTaken ),
.oJumpFlag( w2FIU__BranchTaken ),
.oJumpIp( JumpIp ),
.oRAMWriteEnable( oDataWriteEnable ),
.oRAMWriteAddress( oDataWriteAddress ),
.RAMBus( oDataBus ),
.oBusy( wEXE2_IFU__EXEBusy ),
 
.oExeLatchedValues( wEXE2_IDU_ExeLatchedValues ),
.oLastDestination( wEXE2_IDU_DataFordward_LastDestination ),
 
//ALU ports and control signals
.oTriggerALU( wEXE2_ALU__TriggerALU ),
.oALUOperation( ALU2Operation ),
.oALUChannelX1( ALU2ChannelA ),
.oALUChannelX2( ALU2ChannelB ),
.oALUChannelY1( ALU2ChannelC ),
.oALUChannelY2( ALU2ChannelD ),
.oALUChannelZ1( ALU2ChannelE ),
.oALUChannelZ2( ALU2ChannelF ),
.iALUResultX( ALU2ResultA ),
.iALUResultY( ALU2ResultB ),
.iALUResultZ( ALU2ResultC ),
.iALUOutputReady( ALU2OutputReady )
 
);
 
 
//--------------------------------------------------------
 
VectorALU ALU
(
.Clock(Clock),
.Reset(Reset),
.iOperation( ALU2Operation ),
.iChannel_Ax( ALU2ChannelA ),
.iChannel_Bx( ALU2ChannelB ),
.iChannel_Ay( ALU2ChannelC ),
.iChannel_By( ALU2ChannelD ),
.iChannel_Az( ALU2ChannelE ),
.iChannel_Bz( ALU2ChannelF ),
.oResultA( ALU2ResultA ),
.oResultB( ALU2ResultB ),
.oResultC( ALU2ResultC ),
.oBranchTaken( wALU2_EXE__BranchTaken ),
.oBranchNotTaken( wALU2_IFU_BranchNotTaken ),
.oReturnFromSub( wALU2_IFU_ReturnFromSub ),
.iInputReady( wEXE2_ALU__TriggerALU ),
//***********
.oOMEMWriteAddress( oOMEMWriteAddress ),
.oOMEMWriteData( oOMEMWriteData ),
.oOMEM_WriteEnable( oOMEMWriteEnable ),
.oTMEMReadAddress( oTMEMReadAddress ),
.iTMEMReadData( iTMEMReadData ),
.iTMEMDataAvailable( iTMEMDataAvailable ),
.oTMEMDataRequest( oTMEMDataRequest ),
//***********
.iCurrentIP( oInstructionPointer1 ),
.OutputReady( ALU2OutputReady )
);
 
 
endmodule
//---------------------------------------------------------------------
/EXE/Module_VectorALU.v
0,0 → 1,1274
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
 
 
//--------------------------------------------------------------
module VectorALU
(
input wire Clock,
input wire Reset,
input wire[`INSTRUCTION_OP_LENGTH-1:0] iOperation,
input wire[`WIDTH-1:0] iChannel_Ax,
input wire[`WIDTH-1:0] iChannel_Bx,
input wire[`WIDTH-1:0] iChannel_Ay,
input wire[`WIDTH-1:0] iChannel_By,
input wire[`WIDTH-1:0] iChannel_Az,
input wire[`WIDTH-1:0] iChannel_Bz,
output wire [`WIDTH-1:0] oResultA,
output wire [`WIDTH-1:0] oResultB,
output wire [`WIDTH-1:0] oResultC,
input wire iInputReady,
output reg oBranchTaken,
output reg oBranchNotTaken,
output reg oReturnFromSub,
input wire [`ROM_ADDRESS_WIDTH-1:0] iCurrentIP,
//Connections to the O Memory
output wire [`DATA_ROW_WIDTH-1:0] oOMEMWriteAddress,
output wire [`DATA_ROW_WIDTH-1:0] oOMEMWriteData,
output wire oOMEM_WriteEnable,
//Connections to the R Memory
output wire [`DATA_ROW_WIDTH-1:0] oTMEMReadAddress,
input wire [`DATA_ROW_WIDTH-1:0] iTMEMReadData,
input wire iTMEMDataAvailable,
output wire oTMEMDataRequest,
output reg OutputReady
);
 
 
 
 
 
wire wMultiplcationUnscaled;
assign wMultiplcationUnscaled = (iOperation == `IMUL) ? 1'b1 : 1'b0;
 
//--------------------------------------------------------------
 
reg [7:0] InputReadyA,InputReadyB,InputReadyC;
 
//------------------------------------------------------
/*
This is the block that takes care of all tha arithmetic
comparisons. Supported operations are <,>,<=,>=,==,!=
*/
//------------------------------------------------------
reg [`WIDTH-1:0] wMultiplicationA_Ax;
reg [`WIDTH-1:0] wMultiplicationA_Bx;
wire [`LONG_WIDTH-1:0] wMultiplicationA_Result;
wire wMultiplicationA_InputReady;
wire wMultiplicationA_OutputReady;
wire wMultiplicationOutputReady, wMultiplicationOutputReadyA,
wMultiplicationOutputReadyB,wMultiplicationOutputReadyC,wMultiplicationOutputReadyD;
 
wire wAddSubAOutputReady,wAddSubBOutputReady,wAddSubCOutputReady;
 
//--------------------------------------------------------------------
reg [`WIDTH-1:0] ResultA,ResultB,ResultC;
 
//Output Flip Flops,
//This flip flop will control the outputs so that the
//values of the outputs change ONLY when when there is
//a positive edge of OutputReady
 
FFD32_POSEDGE ResultAFFD
(
.Clock( OutputReady ),
.D( ResultA ),
.Q( oResultA )
);
 
FFD32_POSEDGE ResultBFFD
(
.Clock( OutputReady ),
.D( ResultB ),
.Q( oResultB )
);
 
FFD32_POSEDGE ResultCFFD
(
.Clock( OutputReady ),
.D( ResultC ),
.Q( oResultC )
);
//--------------------------------------------------------------------
wire [`WIDTH-1:0] wSwizzleOutputX,wSwizzleOutputY,wSwizzleOutputZ;
 
 
Swizzle3D Swizzle1
(
.Source0_X( iChannel_Bx ),
.Source0_Y( iChannel_By ),
.Source0_Z( iChannel_Bz ),
.iOperation( iChannel_Ax ),
.SwizzleX( wSwizzleOutputX ),
.SwizzleY( wSwizzleOutputY ),
.SwizzleZ( wSwizzleOutputZ )
);
//---------------------------------------------------------------------
wire [`LONG_WIDTH-1:0] wModulus2N_ResultA,wModulus2N_ResultB,wModulus2N_ResultC;
 
//---------------------------------------------------------------------(
 
wire IOW_Operation,wOMEM_We;
assign IOW_Operation = (iOperation == `OMWRITE);
 
always @ ( * )
begin
if (iOperation == `RET)
oReturnFromSub <= OutputReady;
else
oReturnFromSub <= 1'b0;
end
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD1_AWE
(
.Clock( Clock ),
.Reset( Reset),
.Enable( 1'b1 ),
.D( IOW_Operation ),
.Q( wOMEM_We )
);
 
assign oOMEM_WriteEnable = wOMEM_We & IOW_Operation;
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `DATA_ROW_WIDTH ) FFD1_A
(
.Clock( Clock ),
.Reset( Reset),
.Enable( iInputReady ),
.D( {iChannel_Ax,iChannel_Ay,iChannel_Az} ),
.Q( oOMEMWriteAddress)
);
FFD_POSEDGE_SYNCRONOUS_RESET # ( `DATA_ROW_WIDTH ) FFD2_B
(
.Clock( Clock ),
.Reset( Reset),
.Enable( iInputReady ),
.D( {iChannel_Bx,iChannel_By,iChannel_Bz} ),
.Q( oOMEMWriteData )
);
 
 
 
wire wTMReadOutputReady;
assign wTMReadOutputReady = iTMEMDataAvailable;
/*
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD1_ARE
(
.Clock( Clock ),
.Reset( Reset),
.Enable( 1'b1 ),
.D( iTMEMDataAvailable ),
.Q( wTMReadOutputReady )
);
*/
//assign oTMEMReadAddress = {iChannel_Ax,iChannel_Ay,iChannel_Az};
 
//We wait 1 clock cycle before be send the data read request, because
//we need to lathc the values at the output
 
wire wOpTRead;
assign wOpTRead = ( iOperation == `TMREAD ) ? 1'b1 : 1'b0;
wire wTMEMRequest;
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD1_ARE123
(
.Clock( Clock ),
.Reset( Reset),
.Enable( 1'b1 ),
.D( wOpTRead ),
.Q( wTMEMRequest )
);
assign oTMEMDataRequest = wTMEMRequest & wOpTRead;
FFD_POSEDGE_SYNCRONOUS_RESET # ( `DATA_ROW_WIDTH ) FFD2_B445
(
.Clock( Clock ),
.Reset( Reset),
.Enable( iInputReady & wOpTRead ),
.D( {iChannel_Ax,iChannel_Ay,iChannel_Az} ),
.Q( oTMEMReadAddress )
);
 
/*
This MUX will select the apropiated X,Y or Z depending on
wheter it is XYZ iOperation. This gets defined by the bits 3 and 4
of iOperation, and only applies for oBranchTaken and Store operations.
*/
 
wire wArithmeticComparison_Result;
wire ArithmeticComparison_InputReady;
wire ArithmeticComparison_OutputReady;
reg[`WIDTH-1:0] ArithmeticComparison_A,ArithmeticComparison_B;
 
 
always @ ( * )
begin
case ( {iOperation[4],iOperation[3]} )
2'b01: ArithmeticComparison_A = iChannel_Ax;
2'b10: ArithmeticComparison_A = iChannel_Ay;
2'b11: ArithmeticComparison_A = iChannel_Az;
default: ArithmeticComparison_A = 0; //Should never happen
endcase
end
//---------------------------------------------------------------------
always @ ( * )
begin
case ( {iOperation[4],iOperation[3]} )
2'b01: ArithmeticComparison_B = iChannel_Bx;
2'b10: ArithmeticComparison_B = iChannel_By;
2'b11: ArithmeticComparison_B = iChannel_Bz;
default: ArithmeticComparison_B = 0; //Should never happen
endcase
end
 
//---------------------------------------------------------------------
/*
The onbly instance of Aritmetic comparison in the ALU,
ArithmeticComparison operations matches the 3 LSB of
Global ALU iOperation for oBranchTaken Instruction family
*/
 
assign ArithmeticComparison_InputReady = iInputReady;
 
wire wArithmeticComparisonResult;
 
ArithmeticComparison ArithmeticComparison_1
(
.Clock( Clock ),
.X( ArithmeticComparison_A ),
.Y( ArithmeticComparison_B ),
.iOperation( iOperation[2:0] ),
.iInputReady( ArithmeticComparison_InputReady ),
.OutputReady( ArithmeticComparison_OutputReady ),
.Result( wArithmeticComparisonResult )
);
 
 
assign wArithmeticComparison_Result = wArithmeticComparisonResult && OutputReady;
//--------------------------------------------------------------------
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_A
(
 
.Clock( Clock ),
.Reset( Reset ),
.A( wMultiplicationA_Ax ),
.B( wMultiplicationA_Bx ),
.R( wMultiplicationA_Result ),
.iUnscaled( wMultiplcationUnscaled ),
.iInputReady( wMultiplicationA_InputReady ),
.OutputReady( wMultiplicationA_OutputReady )
);
 
//--------------------------------------------------------------------
always @ ( * )
begin
case (iOperation)
`CROSS: wMultiplicationA_Ax = iChannel_Ay; // Ay * Bz
`MAG: wMultiplicationA_Ax = iChannel_Ax;
`MULP: wMultiplicationA_Ax = iChannel_Ax; //Az = Ax * Ay
default: wMultiplicationA_Ax = iChannel_Ax; // Ax * Bx
endcase
end
//--------------------------------------------------------------------
 
//assign wMultiplicationA_Ax = iChannel_Ax;
 
assign wMultiplicationA_InputReady
= (iOperation == `CROSS ||
iOperation == `DOT ||
iOperation == `MUL ||
iOperation == `IMUL ||
iOperation == `MAG ||
iOperation == `MULP
) ? iInputReady : 0;
//--------------------------------------------------------------------
always @ ( * )
begin
case (iOperation)
`MUL,`IMUL: wMultiplicationA_Bx = iChannel_Bx; //Ax*Bx
`MAG: wMultiplicationA_Bx = iChannel_Ax; //Ax^2
`DOT: wMultiplicationA_Bx = iChannel_Bx; //Ax*Bx
`CROSS: wMultiplicationA_Bx = iChannel_Bz; // Ay * Bz
`MULP: wMultiplicationA_Bx = iChannel_Ay; //Az = Ax * Ay
default: wMultiplicationA_Bx = 32'b0;
endcase
end
//--------------------------------------------------------------------
 
//------------------------------------------------------
 
reg [`WIDTH-1:0] wMultiplicationB_Ay;
reg [`WIDTH-1:0] wMultiplicationB_By;
wire [`LONG_WIDTH-1:0] wMultiplicationB_Result;
wire wMultiplicationB_InputReady;
wire wMultiplicationB_OutputReady;
 
 
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_B
(
 
.Clock( Clock ),
.Reset( Reset ),
.A( wMultiplicationB_Ay ),
.B( wMultiplicationB_By ),
.R( wMultiplicationB_Result ),
.iUnscaled( wMultiplcationUnscaled ),
.iInputReady( wMultiplicationB_InputReady ),
.OutputReady( wMultiplicationB_OutputReady )
);
 
 
//----------------------------------------------------
 
always @ ( * )
begin
case (iOperation)
`CROSS: wMultiplicationB_Ay = iChannel_Az; // Az * By
`MAG: wMultiplicationB_Ay = iChannel_Ay;
default: wMultiplicationB_Ay = iChannel_Ay; // Ay * By
endcase
end
//----------------------------------------------------
assign wMultiplicationB_InputReady
= (iOperation == `CROSS ||
iOperation == `DOT ||
iOperation == `MUL ||
iOperation == `IMUL ||
iOperation == `MAG ) ? iInputReady : 0;
//----------------------------------------------------
always @ ( * )
begin
case (iOperation)
`MUL,`IMUL: wMultiplicationB_By = iChannel_By; //Ay*By
`MAG: wMultiplicationB_By = iChannel_Ay; //Ay^2
`DOT: wMultiplicationB_By = iChannel_By; //Ay*By
`CROSS: wMultiplicationB_By = iChannel_By; // Az * By
default: wMultiplicationB_By = 32'b0;
endcase
end
//----------------------------------------------------
//------------------------------------------------------
reg [`WIDTH-1:0] wMultiplicationC_Az;
reg [`WIDTH-1:0] wMultiplicationC_Bz;
wire [`LONG_WIDTH-1:0] wMultiplicationC_Result;
wire wMultiplicationC_InputReady;
wire wMultiplicationC_OutputReady;
 
 
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_C
(
 
.Clock( Clock ),
.Reset( Reset ),
.A( wMultiplicationC_Az ),
.B( wMultiplicationC_Bz ),
.R( wMultiplicationC_Result ),
.iUnscaled( wMultiplcationUnscaled ),
.iInputReady( wMultiplicationC_InputReady ),
.OutputReady( wMultiplicationC_OutputReady )
);
 
 
//----------------------------------------------------
always @ ( * )
begin
case (iOperation)
`CROSS: wMultiplicationC_Az = iChannel_Az; //Az*Bx
`MAG: wMultiplicationC_Az = iChannel_Az;
default: wMultiplicationC_Az = iChannel_Az; //Az*Bz
endcase
end
//----------------------------------------------------
 
assign wMultiplicationC_InputReady
= (
iOperation == `CROSS ||
iOperation == `DOT ||
iOperation == `MUL ||
iOperation == `IMUL ||
iOperation == `MAG
) ? iInputReady : 0;
//----------------------------------------------------
always @ ( * )
begin
case (iOperation)
`MUL,`IMUL: wMultiplicationC_Bz = iChannel_Bz; //Az*Bz
`MAG: wMultiplicationC_Bz = iChannel_Az; //Ay^2
`DOT: wMultiplicationC_Bz = iChannel_Bz; //Az*Bz
`CROSS: wMultiplicationC_Bz = iChannel_Bx; //Az*Bx
default: wMultiplicationC_Bz = 32'b0;
endcase
end
//----------------------------------------------------
 
reg [`WIDTH-1:0] wMultiplicationD_Aw;
reg [`WIDTH-1:0] wMultiplicationD_Bw;
wire [`LONG_WIDTH-1:0] wMultiplicationD_Result;
wire wMultiplicationD_InputReady;
wire wMultiplicationD_OutputReady;
 
 
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_D
(
 
.Clock( Clock ),
.Reset( Reset ),
.A( wMultiplicationD_Aw ),
.B( wMultiplicationD_Bw ),
.R( wMultiplicationD_Result ),
.iUnscaled( wMultiplcationUnscaled ),
.iInputReady( wMultiplicationD_InputReady ),
.OutputReady( wMultiplicationD_OutputReady )
);
 
assign wMultiplicationD_InputReady
= (iOperation == `CROSS ) ? iInputReady : 0;
 
 
//----------------------------------------------------
always @ ( * )
begin
case (iOperation)
`CROSS: wMultiplicationD_Aw = iChannel_Ax; //Ax*Bz
default: wMultiplicationD_Aw = 32'b0;
endcase
end
//----------------------------------------------------
always @ ( * )
begin
case (iOperation)
`CROSS: wMultiplicationD_Bw = iChannel_Bz; //Ax*Bz
default: wMultiplicationD_Bw = 32'b0;
endcase
end
//----------------------------------------------------
reg [`WIDTH-1:0] wMultiplicationE_Ak;
reg [`WIDTH-1:0] wMultiplicationE_Bk;
wire [`LONG_WIDTH-1:0] wMultiplicationE_Result;
wire wMultiplicationE_InputReady;
wire wMultiplicationE_OutputReady;
 
 
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_E
(
 
.Clock( Clock ),
.Reset( Reset ),
.A( wMultiplicationE_Ak ),
.B( wMultiplicationE_Bk ),
.R( wMultiplicationE_Result ),
.iUnscaled( wMultiplcationUnscaled ),
.iInputReady( wMultiplicationE_InputReady ),
.OutputReady( wMultiplicationE_OutputReady )
);
 
assign wMultiplicationE_InputReady
= (iOperation == `CROSS ) ? iInputReady : 0;
//----------------------------------------------------
always @ ( * )
begin
case (iOperation)
`CROSS: wMultiplicationE_Ak = iChannel_Ax; //Ax*By
default: wMultiplicationE_Ak = 32'b0;
endcase
end
//----------------------------------------------------
always @ ( * )
begin
case (iOperation)
`CROSS: wMultiplicationE_Bk = iChannel_By; //Ax*By
default: wMultiplicationE_Bk = 32'b0;
endcase
end
//----------------------------------------------------
reg [`WIDTH-1:0] wMultiplicationF_Al;
reg [`WIDTH-1:0] wMultiplicationF_Bl;
wire [`LONG_WIDTH-1:0] wMultiplicationF_Result;
wire wMultiplicationF_InputReady;
wire wMultiplicationF_OutputReady;
 
 
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_F
(
 
.Clock( Clock ),
.Reset( Reset ),
.A( wMultiplicationF_Al ),
.B( wMultiplicationF_Bl ),
.R( wMultiplicationF_Result ),
.iUnscaled( wMultiplcationUnscaled ),
.iInputReady( wMultiplicationF_InputReady ),
.OutputReady( wMultiplicationF_OutputReady )
);
assign wMultiplicationF_InputReady
= (iOperation == `CROSS ) ? iInputReady : 0;
//----------------------------------------------------
always @ ( * )
begin
case (iOperation)
`CROSS: wMultiplicationF_Al = iChannel_Ay; //Ay*Bx
default: wMultiplicationF_Al = 32'b0;
endcase
end
//----------------------------------------------------
always @ ( * )
begin
case (iOperation)
`CROSS: wMultiplicationF_Bl = iChannel_Bx; //Ay*Bx
default: wMultiplicationF_Bl = 32'b0;
endcase
end
//------------------------------------------------------
wire [`WIDTH-1:0] wDivisionA_Result;
wire wDivisionA_OutputReady;
wire wDivisionA_InputReady;
 
assign wDivisionA_InputReady =
( iOperation == `DIV) ? iInputReady : 0;
 
SignedIntegerDivision DivisionChannel_A
(
.Clock( Clock ),
.Reset( Reset ),
.iDividend( iChannel_Ax ),
.iDivisor( iChannel_Bx ),
.xQuotient( wDivisionA_Result ),
.iInputReady( wDivisionA_InputReady ),
.OutputReady( wDivisionA_OutputReady )
 
);
//------------------------------------------------------
wire [`WIDTH-1:0] wDivisionB_Result;
wire wDivisionB_OutputReady;
wire wDivisionB_InputReady;
 
assign wDivisionB_InputReady =
( iOperation == `DIV) ? iInputReady : 0;
 
SignedIntegerDivision DivisionChannel_B
(
.Clock( Clock ),
.Reset( Reset ),
.iDividend( iChannel_Ay ),
.iDivisor( iChannel_By ),
.xQuotient( wDivisionB_Result ),
.iInputReady( wDivisionB_InputReady ),
.OutputReady( wDivisionB_OutputReady )
 
);
//------------------------------------------------------
wire [`WIDTH-1:0] wDivisionC_Result;
wire wDivisionC_OutputReady;
wire wDivisionC_InputReady;
 
 
assign wDivisionC_InputReady =
( iOperation == `DIV) ? iInputReady : 0;
 
SignedIntegerDivision DivisionChannel_C
(
.Clock( Clock ),
.Reset( Reset ),
.iDividend( iChannel_Az ),
.iDivisor( iChannel_Bz ),
.xQuotient( wDivisionC_Result ),
.iInputReady( wDivisionC_InputReady ),
.OutputReady( wDivisionC_OutputReady )
 
);
//--------------------------------------------------------------
/*
First addtion block instance goes here.
Note that all inputs/outputs to the block
are wires. It has two MUXES one for each entry.
*/
reg [`LONG_WIDTH-1:0] wAddSubA_Ax,wAddSubA_Bx;
wire [`LONG_WIDTH-1:0] wAddSubA_Result;
wire wAddSubA_Operation; //Either addition or substraction
reg wAddSubA_InputReady;
wire wAddSubA_OutputReady;
 
assign wAddSubA_Operation
= (
iOperation == `SUB
|| iOperation == `CROSS
|| iOperation == `DEC
|| iOperation == `MOD
) ? 1 : 0;
 
FixedAddSub AddSubChannel_A
(
.Clock( Clock ),
.Reset( Reset ),
.A( wAddSubA_Ax ),
.B( wAddSubA_Bx ),
.R( wAddSubA_Result ),
.iOperation( wAddSubA_Operation ),
.iInputReady( wAddSubA_InputReady ),
.OutputReady( wAddSubA_OutputReady )
);
//Diego
 
 
//----------------------------
 
//InpuReady Mux A
always @ ( * )
begin
case (iOperation)
`ADD: wAddSubA_InputReady = iInputReady;
`SUB: wAddSubA_InputReady = iInputReady;
`INC,`INCX,`INCY,`INCZ: wAddSubA_InputReady = iInputReady;
`DEC: wAddSubA_InputReady = iInputReady;
`MOD: wAddSubA_InputReady = iInputReady;
`MAG: wAddSubA_InputReady = wMultiplicationOutputReadyA &&
wMultiplicationOutputReadyB;
//wMultiplicationA_OutputReady
//&& wMultiplicationB_OutputReady;
`DOT: wAddSubA_InputReady =
wMultiplicationOutputReadyA &&
wMultiplicationOutputReadyB;
//wMultiplicationA_OutputReady
//&& wMultiplicationB_OutputReady;
`CROSS: wAddSubA_InputReady =
wMultiplicationOutputReadyA &&
wMultiplicationOutputReadyB;
// wMultiplicationA_OutputReady
//&& wMultiplicationB_OutputReady;
default: wAddSubA_InputReady = 1'b0;
endcase
end
//----------------------------
 
//wAddSubA_Bx 2:1 input Mux
always @ ( * )
begin
case (iOperation)
`ADD: wAddSubA_Ax = ( iChannel_Ax[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ax } : { 32'b0, iChannel_Ax };
`SUB: wAddSubA_Ax = ( iChannel_Ax[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ax } : { 32'b0, iChannel_Ax };
`INC,`INCX,`INCY,`INCZ: wAddSubA_Ax = ( iChannel_Ax[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ax } : { 32'b0, iChannel_Ax };
`DEC: wAddSubA_Ax = ( iChannel_Ax[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ax } : { 32'b0, iChannel_Ax };
`MOD: wAddSubA_Ax = ( iChannel_Bx[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Bx } : { 32'b0, iChannel_Bx };
`MAG: wAddSubA_Ax = wMultiplicationA_Result;
`DOT: wAddSubA_Ax = wMultiplicationA_Result;
`CROSS: wAddSubA_Ax = wMultiplicationA_Result;
default: wAddSubA_Ax = 64'b0;
endcase
end
//----------------------------
//wAddSubA_Bx 2:1 input Mux
always @ ( * )
begin
case (iOperation)
`ADD: wAddSubA_Bx = ( iChannel_Bx[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Bx } : { 32'b0, iChannel_Bx };
`SUB: wAddSubA_Bx = ( iChannel_Bx[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Bx } : { 32'b0, iChannel_Bx };
`INC,`INCX: wAddSubA_Bx = (`LONG_WIDTH'd1 << `SCALE);
`INCY,`INCZ: wAddSubA_Bx = `LONG_WIDTH'd0;
`DEC: wAddSubA_Bx = (`LONG_WIDTH'd1 << `SCALE);
`MOD: wAddSubA_Bx = (`LONG_WIDTH'd1 << `SCALE);
`MAG: wAddSubA_Bx = wMultiplicationB_Result;
`DOT: wAddSubA_Bx = wMultiplicationB_Result;
`CROSS: wAddSubA_Bx = wMultiplicationB_Result;
default: wAddSubA_Bx = 64'b0;
endcase
end
//--------------------------------------------------------------
/*
Second addtion block instance goes here.
Note that all inputs/outputs to the block
are wires. It has two MUXES one for each entry.
*/
 
wire [`LONG_WIDTH-1:0] wAddSubB_Result;
 
 
wire wAddSubB_Operation; //Either addition or substraction
reg wAddSubB_InputReady;
wire wAddSubB_OutputReady;
 
reg [`LONG_WIDTH-1:0] wAddSubB_Ay,wAddSubB_By;
 
assign wAddSubB_Operation =
( iOperation == `SUB
|| iOperation == `CROSS
|| iOperation == `DEC
|| iOperation == `MOD
) ? 1 : 0;
 
FixedAddSub AddSubChannel_B
(
.Clock( Clock ),
.Reset( Reset ),
.A( wAddSubB_Ay ),
.B( wAddSubB_By ),
.R( wAddSubB_Result ),
.iOperation( wAddSubB_Operation ),
.iInputReady( wAddSubB_InputReady ),
.OutputReady( wAddSubB_OutputReady )
);
//----------------------------
wire wMultiplicationOutputReadyC_Dealy1;
FFD_POSEDGE_ASYNC_RESET # (1) FFwMultiplicationOutputReadyC_Dealy1
(
.Clock( Clock ),
.Clear( Reset ),
.D( wMultiplicationOutputReadyC ),
.Q( wMultiplicationOutputReadyC_Dealy1 )
);
 
 
 
 
 
//InpuReady Mux B
always @ ( * )
begin
case (iOperation)
`ADD: wAddSubB_InputReady = iInputReady;
`SUB: wAddSubB_InputReady = iInputReady;
`INC,`INCX,`INCY,`INCZ: wAddSubB_InputReady = iInputReady;
`DEC: wAddSubB_InputReady = iInputReady;
`MOD: wAddSubB_InputReady = iInputReady;
`MAG: wAddSubB_InputReady = wAddSubAOutputReady
&& wMultiplicationOutputReadyC_Dealy1;
//&& wMultiplicationC_OutputReady;
`DOT: wAddSubB_InputReady = wAddSubAOutputReady
&& wMultiplicationOutputReadyC_Dealy1;
//&& wMultiplicationC_OutputReady;
`CROSS: wAddSubB_InputReady = wMultiplicationOutputReadyC &&
wMultiplicationOutputReadyD;
// wMultiplicationC_OutputReady
//&& wMultiplicationD_OutputReady;
default: wAddSubB_InputReady = 1'b0;
endcase
end
//----------------------------
// wAddSubB_Ay 2:1 input Mux
// If the iOperation is ADD or SUB, it will simply take the inputs from
// ALU Channels. If it is a VECTOR_MAGNITUDE, it take the input from the
// previus ADDER_A, same for dot product.
always @ ( * )
begin
case (iOperation)
`ADD: wAddSubB_Ay = (iChannel_Ay[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ay} : {32'b0,iChannel_Ay}; //Ay
`SUB: wAddSubB_Ay = (iChannel_Ay[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ay} : {32'b0,iChannel_Ay}; //Ay
`INC,`INCX,`INCY,`INCZ: wAddSubB_Ay = (iChannel_Ay[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ay} : {32'b0,iChannel_Ay}; //Ay
`DEC: wAddSubB_Ay = (iChannel_Ay[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ay} : {32'b0,iChannel_Ay}; //Ay
`MOD: wAddSubB_Ay = (iChannel_By[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_By} : {32'b0,iChannel_By}; //Ay
`MAG: wAddSubB_Ay = wAddSubA_Result; //A^2+B^2
`DOT: wAddSubB_Ay = wAddSubA_Result; //Ax*Bx + Ay*By
`CROSS: wAddSubB_Ay = wMultiplicationC_Result;
default: wAddSubB_Ay = 64'b0;
endcase
end
//----------------------------
//wAddSubB_By 2:1 input Mux
always @ ( * )
begin
case (iOperation)
`ADD: wAddSubB_By = (iChannel_By[31] == 1'b1) ? {32'hFFFFFFFF,iChannel_By } : {32'b0,iChannel_By}; //By
`SUB: wAddSubB_By = (iChannel_By[31] == 1'b1) ? {32'hFFFFFFFF,iChannel_By } : {32'b0,iChannel_By}; //{32'b0,iChannel_By}; //By
`INC,`INCY: wAddSubB_By = (`LONG_WIDTH'd1 << `SCALE);
`INCX,`INCZ: wAddSubB_By = `LONG_WIDTH'd0;
`DEC: wAddSubB_By = (`LONG_WIDTH'd1 << `SCALE);
`MOD: wAddSubB_By = (`LONG_WIDTH'd1 << `SCALE);
`MAG: wAddSubB_By = wMultiplicationC_Result; //C^2
`DOT: wAddSubB_By = wMultiplicationC_Result; //Az * Bz
`CROSS: wAddSubB_By = wMultiplicationD_Result;
default: wAddSubB_By = 32'b0;
endcase
end
//--------------------------------------------------------------
wire [`LONG_WIDTH-1:0] wAddSubC_Result;
reg [`LONG_WIDTH-1:0] wAddSubC_Az,wAddSubC_Bz;
 
wire wAddSubC_Operation; //Either addition or substraction
reg wAddSubC_InputReady;
wire wAddSubC_OutputReady;
 
reg [`LONG_WIDTH-1:0] AddSubC_Az,AddSubB_Bz;
 
//-----------------------------------------
always @ ( * )
begin
case (iOperation)
`CROSS: wAddSubC_Az = wMultiplicationE_Result;
`MOD: wAddSubC_Az = (iChannel_Bz[31] == 1'b1) ? {32'hFFFFFFFF,iChannel_Bz} : {32'b0,iChannel_Bz};
default: wAddSubC_Az = (iChannel_Az[31] == 1'b1) ? {32'hFFFFFFFF,iChannel_Az} : {32'b0,iChannel_Az};
endcase
end
//-----------------------------------------
always @ ( * )
begin
case (iOperation)
`CROSS: wAddSubC_Bz = wMultiplicationF_Result;
`INC,`INCZ: wAddSubC_Bz = (`LONG_WIDTH'd1 << `SCALE);
`INCX,`INCY: wAddSubC_Bz = `LONG_WIDTH'd0;
`DEC: wAddSubC_Bz = (`LONG_WIDTH'd1 << `SCALE);
`MOD: wAddSubC_Bz = (`LONG_WIDTH'd1 << `SCALE);
default: wAddSubC_Bz = (iChannel_Bz[31] == 1'b1) ? {32'hFFFFFFFF,iChannel_Bz} : {32'b0,iChannel_Bz};
endcase
end
//-----------------------------------------
 
assign wAddSubC_Operation
= (
iOperation == `SUB
|| iOperation == `CROSS
|| iOperation == `DEC
|| iOperation == `MOD
) ? 1 : 0;
 
FixedAddSub AddSubChannel_C
(
.Clock( Clock ),
.Reset( Reset ),
.A( wAddSubC_Az ),
.B( wAddSubC_Bz ),
.R( wAddSubC_Result ),
.iOperation( wAddSubC_Operation ),
.iInputReady( wAddSubC_InputReady ),
.OutputReady( wAddSubC_OutputReady )
);
 
 
always @ ( * )
begin
case (iOperation)
`CROSS: wAddSubC_InputReady = wMultiplicationE_OutputReady &&
wMultiplicationF_OutputReady;
default: wAddSubC_InputReady = iInputReady;
endcase
end
 
//------------------------------------------------------
wire [`WIDTH-1:0] wSquareRoot_Result;
wire wSquareRoot_OutputReady;
 
 
FixedPointSquareRoot SQROOT1
(
.Clock( Clock ),
.Reset( Reset ),
.Operand( wAddSubB_Result ),
.iInputReady( wAddSubBOutputReady && iOperation == `MAG),
.OutputReady( wSquareRoot_OutputReady ),
.Result( wSquareRoot_Result )
);
//------------------------------------------------------
 
assign wModulus2N_ResultA = (iChannel_Ax & wAddSubA_Result );
assign wModulus2N_ResultB = (iChannel_Ay & wAddSubB_Result );
assign wModulus2N_ResultC = (iChannel_Az & wAddSubC_Result );
 
 
 
 
 
 
//&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&//
//****Mux for ResultA***
// Notice that the Dot Product or the Magnitud Result will
// output in ResultA.
 
always @ ( * )
begin
case ( iOperation )
`RETURN: ResultA = iChannel_Ax;
`ADD: ResultA = (wAddSubA_Result[63] == 1'b1) ? { 1'b1,wAddSubA_Result[30:0]} : {1'b0,wAddSubA_Result[30:0]};// & 32'h7FFFFFFF;
`SUB: ResultA = (wAddSubA_Result[63] == 1'b1) ? { 1'b1,wAddSubA_Result[30:0]} : {1'b0,wAddSubA_Result[30:0]};//wAddSubA_Result[31:0];
`CROSS: ResultA = (wAddSubA_Result[63] == 1'b1) ? { 1'b1,wAddSubA_Result[30:0]} : {1'b0,wAddSubA_Result[30:0]};//wAddSubA_Result[31:0];
`DIV: ResultA = wDivisionA_Result;
`MUL: ResultA = wMultiplicationA_Result[31:0];
`IMUL: ResultA = wMultiplicationA_Result[31:0];
`DOT: ResultA = (wAddSubB_Result[63] == 1'b1) ? { 1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]};//wAddSubB_Result[31:0];
`MAG: ResultA = wSquareRoot_Result;
`ZERO: ResultA = 32'b0;
`COPY: ResultA = iChannel_Ax;
`TMREAD: ResultA = iTMEMReadData[95:64];
`LEA: ResultA = {16'b0,iCurrentIP};
`SWIZZLE3D: ResultA = wSwizzleOutputX;
//Set Operations
`UNSCALE: ResultA = iChannel_Ax >> `SCALE;
`SETX,`RET: ResultA = iChannel_Ax;
`SETY: ResultA = iChannel_Bx;
`SETZ: ResultA = iChannel_Bx;
`INC,`INCX,`INCY,`INCZ: ResultA = (wAddSubA_Result[63] == 1'b1) ? { 1'b1,wAddSubA_Result[30:0]} : {1'b0,wAddSubA_Result[30:0]};
`DEC: ResultA = (wAddSubA_Result[63] == 1'b1) ? { 1'b1,wAddSubA_Result[30:0]} : {1'b0,wAddSubA_Result[30:0]};
`MOD: ResultA = wModulus2N_ResultA;
`FRAC: ResultA = iChannel_Ax & (`WIDTH'hFFFFFFFF >> (`WIDTH - `SCALE));
`MULP: ResultA = iChannel_Ax;
`NEG: ResultA = ~iChannel_Ax + 1'b1;
`XCHANGEX: ResultA = iChannel_Bx;
 
default:
begin
`ifdef DEBUG
// $display("%dns ALU: Error Unknown Operation: %d",$time,iOperation);
// $stop();
`endif
ResultA = 32'b0;
end
endcase
end
//------------------------------------------------------
//****Mux for RB***
always @ ( * )
begin
case ( iOperation )
`RETURN: ResultB = iChannel_Ax;
`ADD: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]}; // & 32'h7FFFFFFF;
`SUB: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]}; //wAddSubB_Result[31:0];
`CROSS: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]};//wAddSubB_Result[31:0];
`DIV: ResultB = wDivisionB_Result;
`MUL: ResultB = wMultiplicationB_Result[31:0];
`IMUL: ResultB = wMultiplicationB_Result[31:0];
`DOT: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]};//wAddSubB_Result[31:0];
`MAG: ResultB = wSquareRoot_Result;
`ZERO: ResultB = 32'b0;
`COPY: ResultB = iChannel_Ay;
`TMREAD: ResultB = iTMEMReadData[63:32];
`LEA: ResultB = {16'b0,iCurrentIP};
//Set Operations
`UNSCALE: ResultB = iChannel_Ay >> `SCALE;
`SETX,`RET: ResultB = iChannel_By; // {Source1[95:64],Source0[63:32],Source0[31:0]};
`SETY: ResultB = iChannel_Ax; // {Source0[95:64],Source1[95:64],Source0[31:0]};
`SETZ: ResultB = iChannel_By; // {Source0[95:64],Source0[63:32],Source1[95:64]};
`SWIZZLE3D: ResultB = wSwizzleOutputY;
`INC,`INCX,`INCY,`INCZ: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]}; // & 32'h7FFFFFFF;
`DEC: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]}; // & 32'h7FFFFFFF;
`MOD: ResultB = wModulus2N_ResultB;
`FRAC: ResultB = iChannel_Ay & (`WIDTH'hFFFFFFFF >> (`WIDTH - `SCALE));
`MULP: ResultB = iChannel_Ay;
`NEG: ResultB = ~iChannel_Ay + 1'b1;
`XCHANGEX: ResultB = iChannel_Ay;
default:
begin
`ifdef DEBUG
//$display("%dns ALU: Error Unknown Operation: %d",$time,iOperation);
//$stop();
`endif
ResultB = 32'b0;
end
endcase
end
//------------------------------------------------------
//****Mux for RC***
always @ ( * )
begin
case ( iOperation )
`RETURN: ResultC = iChannel_Ax;
`ADD: ResultC = (wAddSubC_Result[63] == 1'b1) ? {1'b1,wAddSubC_Result[30:0]} : {1'b0,wAddSubC_Result[30:0]}; //wAddSubC_Result[31:0];// & 32'h7FFFFFFF;
`SUB: ResultC = (wAddSubC_Result[63] == 1'b1) ? {1'b1,wAddSubC_Result[30:0]} : {1'b0,wAddSubC_Result[30:0]}; //wAddSubC_Result[31:0];
`CROSS: ResultC = (wAddSubC_Result[63] == 1'b1) ? {1'b1,wAddSubC_Result[30:0]} : {1'b0,wAddSubC_Result[30:0]};//wAddSubC_Result[31:0];
`DIV: ResultC = wDivisionC_Result;
`MUL: ResultC = wMultiplicationC_Result[31:0];
`IMUL: ResultC = wMultiplicationC_Result[31:0];
`DOT: ResultC = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]};//wAddSubB_Result[31:0];
`MAG: ResultC = wSquareRoot_Result;
`ZERO: ResultC = 32'b0;
`COPY: ResultC = iChannel_Az;
`TMREAD: ResultC = iTMEMReadData[31:0];
`LEA: ResultC = {16'b0,iCurrentIP};
`SWIZZLE3D: ResultC = wSwizzleOutputZ;
//Set Operations
`UNSCALE: ResultC = iChannel_Az >> `SCALE;
`SETX,`RET: ResultC = iChannel_Bz; // {Source1[95:64],Source0[63:32],Source0[31:0]};
`SETY: ResultC = iChannel_Bz; // {Source0[95:64],Source1[95:64],Source0[31:0]};
`SETZ: ResultC = iChannel_Ax; // {Source0[95:64],Source0[63:32],Source1[95:64]};
`INC,`INCX,`INCY,`INCZ: ResultC = (wAddSubC_Result[63] == 1'b1) ? {1'b1,wAddSubC_Result[30:0]} : {1'b0,wAddSubC_Result[30:0]}; //wAddSubC_Result[31:0];// & 32'h7FFFFFFF;
`DEC: ResultC = (wAddSubC_Result[63] == 1'b1) ? {1'b1,wAddSubC_Result[30:0]} : {1'b0,wAddSubC_Result[30:0]}; //wAddSubC_Result[31:0];// & 32'h7FFFFFFF;
`MOD: ResultC = wModulus2N_ResultC;
`FRAC: ResultC = iChannel_Az & (`WIDTH'hFFFFFFFF >> (`WIDTH - `SCALE));
`MULP: ResultC = wMultiplicationA_Result[31:0];
`NEG: ResultC = ~iChannel_Az + 1'b1;
`XCHANGEX: ResultC = iChannel_Az;
default:
begin
`ifdef DEBUG
//$display("%dns ALU: Error Unknown Operation: %d",$time,iOperation);
//$stop();
`endif
ResultC = 32'b0;
end
endcase
end
//------------------------------------------------------------------------
 
 
always @ ( * )
begin
case (iOperation)
`JMP,`CALL,`RET: oBranchTaken = OutputReady;
`JGX: oBranchTaken = wArithmeticComparison_Result;
`JGY: oBranchTaken = wArithmeticComparison_Result;
`JGZ: oBranchTaken = wArithmeticComparison_Result;
`JLX: oBranchTaken = wArithmeticComparison_Result;
`JLY: oBranchTaken = wArithmeticComparison_Result;
`JLZ: oBranchTaken = wArithmeticComparison_Result;
`JEQX: oBranchTaken = wArithmeticComparison_Result;
`JEQY: oBranchTaken = wArithmeticComparison_Result;
`JEQZ: oBranchTaken = wArithmeticComparison_Result;
`JNEX: oBranchTaken = wArithmeticComparison_Result;
`JNEY: oBranchTaken = wArithmeticComparison_Result;
`JNEZ: oBranchTaken = wArithmeticComparison_Result;
`JGEX: oBranchTaken = wArithmeticComparison_Result;
`JGEY: oBranchTaken = wArithmeticComparison_Result;
`JGEZ: oBranchTaken = wArithmeticComparison_Result;
`JLEX: oBranchTaken = wArithmeticComparison_Result;
`JLEY: oBranchTaken = wArithmeticComparison_Result;
`JLEZ: oBranchTaken = wArithmeticComparison_Result;
default: oBranchTaken = 0;
endcase
end
 
always @ ( * )
begin
case (iOperation)
`JMP,`CALL,`RET,`JGX,`JGY,`JGZ,`JLX,`JLY,`JLZ,`JEQX,`JEQY,`JEQZ,
`JNEX,`JNEY,`JNEZ,`JGEX,`JGEY,`JGEZ: oBranchNotTaken = !oBranchTaken && OutputReady;
`JLEX: oBranchNotTaken = !oBranchTaken && OutputReady;
`JLEY: oBranchNotTaken = !oBranchTaken && OutputReady;
`JLEZ: oBranchNotTaken = !oBranchTaken && OutputReady;
default:
oBranchNotTaken = 0;
endcase
end
//------------------------------------------------------------------------
//Output ready logic Stuff for Division...
//Some FFT will hopefully do the trick
 
wire wDivisionOutputReadyA,wDivisionOutputReadyB,wDivisionOutputReadyC;
wire wDivisionOutputReady;
 
 
assign wAddSubAOutputReady = wAddSubA_OutputReady;
assign wAddSubBOutputReady = wAddSubB_OutputReady;
assign wAddSubCOutputReady = wAddSubC_OutputReady;
 
 
FFT1 FFT_DivisionA
(
.D(1'b1),
.Clock( wDivisionA_OutputReady ),
.Reset( iInputReady ),
.Q( wDivisionOutputReadyA )
);
 
FFT1 FFT_DivisionB
(
.D(1'b1),
.Clock( wDivisionB_OutputReady ),
.Reset( iInputReady ),
.Q( wDivisionOutputReadyB )
);
FFT1 FFT_DivisionC
(
.D(1'b1),
.Clock( wDivisionC_OutputReady ),
.Reset( iInputReady ),
.Q( wDivisionOutputReadyC )
);
assign wDivisionOutputReady =
( wDivisionOutputReadyA && wDivisionOutputReadyB && wDivisionOutputReadyC );
assign wMultiplicationOutputReadyA = wMultiplicationA_OutputReady;
assign wMultiplicationOutputReadyB = wMultiplicationB_OutputReady;
assign wMultiplicationOutputReadyC = wMultiplicationC_OutputReady;
assign wMultiplicationOutputReadyD = wMultiplicationD_OutputReady;
assign wMultiplicationOutputReady =
( wMultiplicationOutputReadyA && wMultiplicationOutputReadyB && wMultiplicationOutputReadyC );
wire wSquareRootOutputReady;
FFT1 FFT_Sqrt
(
.D(1'b1),
.Clock( wSquareRoot_OutputReady ),
.Reset( iInputReady ),
.Q( wSquareRootOutputReady )
);
//------------------------------------------------------------------------
wire wOutputDelay1Cycle,wOutputDelay2Cycle,wOutputDelay3Cycle;
 
 
FFD_POSEDGE_ASYNC_RESET # (1) FFOutputReadyDelay2
(
.Clock( Clock ),
.Clear( Reset ),
.D( iInputReady ),
.Q( wOutputDelay1Cycle )
);
 
FFD_POSEDGE_ASYNC_RESET # (1) FFOutputReadyDelay22
(
.Clock( Clock ),
.Clear( Reset ),
.D( wOutputDelay1Cycle ),
.Q( wOutputDelay2Cycle )
);
 
 
FFD_POSEDGE_ASYNC_RESET # (1) FFOutputReadyDelay222
(
.Clock( Clock && wOperation == `OMWRITE),
.Clear( Reset ),
.D( wOutputDelay2Cycle ),
.Q( wOutputDelay3Cycle )
);
 
wire [`INSTRUCTION_OP_LENGTH-1:0] wOperation;
 
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `INSTRUCTION_OP_LENGTH ) SourceZ2
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( iInputReady ),
.D( iOperation ),
.Q(wOperation)
);
 
 
//Mux for output ready signal
always @ ( * )
begin
case ( wOperation )
`UNSCALE: OutputReady = wOutputDelay1Cycle;
`RETURN: OutputReady = wOutputDelay1Cycle;
`NOP: OutputReady = wOutputDelay1Cycle;
`FRAC: OutputReady = wOutputDelay1Cycle;
`NEG: OutputReady = wOutputDelay1Cycle;
`OMWRITE: OutputReady = wOutputDelay3Cycle;
`TMREAD: OutputReady = wTMReadOutputReady; //One cycle after TMEM data availale asserted
`ifdef DEBUG
//Debug Print behaves as a NOP in terms of ALU...
`DEBUG_PRINT: OutputReady = wOutputDelay1Cycle;
`endif
`ADD,`INC,`INCX,`INCY,`INCZ: OutputReady = wAddSubAOutputReady &&
wAddSubBOutputReady &&
wAddSubCOutputReady;
`SUB,`DEC: OutputReady = wAddSubAOutputReady &&
wAddSubBOutputReady &&
wAddSubCOutputReady;
`DIV: OutputReady = wDivisionOutputReady;
`MUL,`IMUL: OutputReady = wMultiplicationOutputReady;
`MULP: OutputReady = wMultiplicationOutputReadyA;
`DOT: OutputReady = wAddSubBOutputReady;
`CROSS: OutputReady = wAddSubAOutputReady &&
wAddSubBOutputReady &&
wAddSubCOutputReady;
`MAG: OutputReady = wSquareRootOutputReady;
`ZERO: OutputReady = wOutputDelay1Cycle;
`COPY: OutputReady = wOutputDelay1Cycle;
`SWIZZLE3D: OutputReady = wOutputDelay1Cycle;
`SETX,`SETY,`SETZ,`JMP,`LEA,`CALL,`RET: OutputReady = wOutputDelay1Cycle;
 
`JGX,`JGY,`JGZ: OutputReady = ArithmeticComparison_OutputReady;
`JLX,`JLY,`JLZ: OutputReady = ArithmeticComparison_OutputReady;
`JEQX,`JEQY,`JEQZ: OutputReady = ArithmeticComparison_OutputReady;
`JNEX,`JNEY,`JNEZ: OutputReady = ArithmeticComparison_OutputReady;
`JGEX,`JGEY,`JGEZ: OutputReady = ArithmeticComparison_OutputReady;
`JLEX,`JLEY,`JLEZ: OutputReady = ArithmeticComparison_OutputReady;
`MOD: OutputReady = wAddSubAOutputReady && //TODO: wait 1 more cycle
wAddSubBOutputReady &&
wAddSubCOutputReady;
`XCHANGEX: OutputReady = wOutputDelay1Cycle;
default:
begin
OutputReady = 32'b0;
$display("*** ALU ERROR: iOperation = %d ***",iOperation);
end
endcase
end
 
endmodule
//------------------------------------------------------------------------
/EXE/Module_InstructionDecode.v
0,0 → 1,156
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
module InstructionDecode
(
input wire Clock,
input wire Reset,
input wire iInstructionAvailable,
input wire[`INSTRUCTION_WIDTH-1:0] iEncodedInstruction,
input wire[`DATA_ROW_WIDTH-1:0] iRamValue0,
input wire[`DATA_ROW_WIDTH-1:0] iRamValue1,
output wire[`DATA_ADDRESS_WIDTH-1:0] oRamAddress0,oRamAddress1,
output wire[`INSTRUCTION_OP_LENGTH-1:0] oOperation,
output wire [`DATA_ROW_WIDTH-1:0] oSource0,oSource1,
output wire [`DATA_ADDRESS_WIDTH-1:0] oDestination,
input wire [`DATA_ROW_WIDTH-1:0] iDataForward,
input wire [`DATA_ADDRESS_WIDTH-1:0] iLastDestination,
 
`ifdef DEBUG
input wire [`ROM_ADDRESS_WIDTH-1:0] iDebug_CurrentIP,
output wire [`ROM_ADDRESS_WIDTH-1:0] oDebug_CurrentIP,
`endif
 
//input wire [`ROM_ADDRESS_WIDTH-1:0] iIP,
//output reg [`ROM_ADDRESS_WIDTH-1:0] oReturnAddress,
output wire oDataReadyForExe
 
);
wire wInmediateOperand;
wire [`DATA_ROW_WIDTH-1:0] wSource0,wSource1;
wire wTriggerSource0DataForward,wTriggerSource1DataForward;
wire wSource0AddrssEqualsLastDestination,wSource1AddrssEqualsLastDestination;
 
`ifdef DEBUG
assign oDebug_CurrentIP = iDebug_CurrentIP;
`endif
//See if operation takes scalar argument
assign wInmediateOperand = iEncodedInstruction[`INSTRUCTION_IMM_BITPOS];
 
//Has the value of the first argument fetched from IMEM
assign wSource0 = iRamValue0;
//Has the value of the second argument fetched from IMEM, or the value of the
//destinatin register in case of scalar operation
assign wSource1 = ( wInmediateOperand ) ? {oRamAddress1,iEncodedInstruction[15:0] ,32'b0,32'b0} : iRamValue1; //{oRamAddress1,oRamAddress0,32'b0,32'b0} : iRamValue1;
 
//Data forwarding logic
assign wSource0AddrssEqualsLastDestination = (oRamAddress0 == iLastDestination) ? 1'b1: 1'b0;
assign wSource1AddrssEqualsLastDestination = (oRamAddress1 == iLastDestination) ? 1'b1: 1'b0;
assign wTriggerSource0DataForward = wSource0AddrssEqualsLastDestination;
assign wTriggerSource1DataForward = wSource1AddrssEqualsLastDestination && !wInmediateOperand;
 
//The data address to fetch from IMEM
assign oRamAddress1 = iEncodedInstruction[31:16];
 
//If operation takes a scalar value, then ask IMEM
//for the previous value of the destination ([47:32])
//and have this value ready at oRamAddress0
MUXFULLPARALELL_16bits_2SEL RAMAddr0MUX
(
.Sel( wInmediateOperand ),
.I1( iEncodedInstruction[15:0] ),
.I2( iEncodedInstruction[47:32] ),
.O1( oRamAddress0 )
);
 
 
//One clock cycle after the new instruction becomes
//available to IDU, it should be decoded and ready
//for execution
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD1
(
.Clock( Clock ),
.Reset( Reset ),
.Enable(1'b1),
.D( iInstructionAvailable ),
.Q( oDataReadyForExe )
);
 
/*
wire IsCall;
assign IsCall = ( oOperation == `CALL ) ? 1'b1 : 1'b0;
always @ (posedge IsCall)
oReturnAddress <= iIP;
*/
/*
FFD_POSEDGE_SYNCRONOUS_RESET # ( `ROM_ADDRESS_WIDTH ) FFRETURNADDR
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( IsCall ),
.D( iIP ),
.Q( oReturnAddress )
);
*/
 
 
//Latch the Operation
FFD_POSEDGE_SYNCRONOUS_RESET # ( `INSTRUCTION_OP_LENGTH ) FFD3
(
.Clock(Clock),
.Reset(Reset),
.Enable(iInstructionAvailable),
.D(iEncodedInstruction[`INSTRUCTION_WIDTH-1:`INSTRUCTION_WIDTH-`INSTRUCTION_OP_LENGTH]),
.Q( oOperation )
);
//Latch the Destination
FFD_POSEDGE_SYNCRONOUS_RESET # ( `DATA_ADDRESS_WIDTH ) FFD2
(
.Clock(Clock),
.Reset(Reset),
.Enable(iInstructionAvailable),
.D(iEncodedInstruction[47:32]),
.Q(oDestination )
);
 
 
//Once we made a decicions if the Sources must be forwarded or not, a series of muxes
//are used to routed the correct data into the decoded Source outputs
 
MUXFULLPARALELL_96bits_2SEL Source0_Mux
(
.Sel( wTriggerSource0DataForward ),
.I1( wSource0 ),
.I2( iDataForward ),
.O1( oSource0 )
);
 
MUXFULLPARALELL_96bits_2SEL Source1_Mux
(
.Sel( wTriggerSource1DataForward ),
.I1( wSource1 ),
.I2( iDataForward ),
.O1( oSource1 )
);
 
endmodule
 
/EXE/Module_InstructionEntryPoint.v
0,0 → 1,32
`timescale 1ns / 1ps
`include "aDefinitions.v"
module InstructionEntryPoint
(
input wire Clock,
input wire Reset,
input wire iTrigger,
input wire[`ROM_ADDRESS_WIDTH-1:0] iInitialCodeAddress,
input wire [`INSTRUCTION_WIDTH-1:0] iIMemInput,
 
output wire oEPU_Busy,
output wire [`ROM_ADDRESS_WIDTH-1:0] oEntryPoint,
output wire oTriggerIFU,
output wire [`ROM_ADDRESS_WIDTH-1:0] oInstructionAddr
);
 
assign oInstructionAddr = (oTriggerIFU) ? oEntryPoint : iInitialCodeAddress;
assign oEPU_Busy = iTrigger | oTriggerIFU;
 
 
 
FFD_POSEDGE_ASYNC_RESET # ( 1 ) FFD1
(
.Clock(Clock),
.Clear( Reset ),
.D(iTrigger),
.Q(oTriggerIFU)
);
 
assign oEntryPoint = (oTriggerIFU) ? iIMemInput[`ROM_ADDRESS_WIDTH-1:0] : `ROM_ADDRESS_WIDTH'b0;
 
endmodule

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.