URL
https://opencores.org/ocsvn/theia_gpu/theia_gpu/trunk
Subversion Repositories theia_gpu
Compare Revisions
- This comparison shows the changes necessary to convert path
/theia_gpu/branches/gpu_8_cores/rtl/GPU/CORES
- from Rev 117 to Rev 128
- ↔ Reverse comparison
Rev 117 → Rev 128
/TOP/Theia_Core.v
0,0 → 1,425
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
|
/********************************************************************************** |
Description: |
This is the top level block for THEIA. |
THEIA core has 5 main logical blocks called Units. |
This module implements the interconections between the Units. |
|
Units: |
> EXE: Mananges execution logic for the SHADERS. |
> GEO: Manages geometry data structures. |
> IO: Input/Output (Wishbone). |
> MEM: Internal memory, separate for Instructions and data. |
> CONTROL: Main control Finite state machine. |
|
Internal Buses: |
THEIA has separate instruction and data buses. |
THEIA avoids using tri-state buses by having separate input/output |
for each bus. |
There are 2 separate data buses since the Data memory |
has a Dual read channel. |
Please see the MEM unit chapter in the documentation for more details. |
|
External Buses: |
External buses are managed by the IO Unit. |
External buses follow the wishbone protocol. |
Please see the IO unit chapter in the documentation for more details. |
**********************************************************************************/ |
|
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
|
module THEIACORE |
( |
|
input wire CLK_I, //Input clock |
input wire RST_I, //Input reset |
//Theia Interfaces |
input wire MST_I, //Master signal, THEIA enters configuration mode |
//when this gets asserted (see documentation) |
//Wish Bone Interface |
input wire [`WB_WIDTH-1:0] DAT_I, //Input data bus (Wishbone) |
output wire [`WB_WIDTH-1:0] DAT_O, //Output data bus (Wishbone) |
input wire ACK_I, //Input ack |
output wire ACK_O, //Output ack |
output wire [`WB_WIDTH-1:0] ADR_O, //Output address |
input wire [`WB_WIDTH-1:0] ADR_I, //Input address |
output wire WE_O, //Output write enable |
input wire WE_I, //Input write enable |
output wire STB_O, //Strobe signal, see wishbone documentation |
input wire STB_I, //Strobe signal, see wishbone documentation |
output wire CYC_O, //Bus cycle signal, see wishbone documentation |
input wire CYC_I, //Bus cycle signal, see wishbone documentation |
output wire [1:0] TGC_O, //Bus cycle tag, see THEAI documentation |
input wire [1:0] TGA_I, //Input address tag, see THEAI documentation |
output wire [1:0] TGA_O, //Output address tag, see THEAI documentation |
input wire [1:0] TGC_I, //Bus cycle tag, see THEAI documentation |
input wire GNT_I, //Bus arbiter 'Granted' signal, see THEAI documentation |
input wire RENDREN_I, |
|
output wire GRDY_O, //Data Latched |
input wire STDONE_I, //Scene traverse complete |
input wire HDA_I, |
output wire RCOMMIT_O, |
|
output wire [`WB_WIDTH-1:0] OMEM_DAT_O, |
output wire [`WB_WIDTH-1:0] OMEM_ADR_O, |
output wire OMEM_WE_O, |
|
input wire TMEM_ACK_I, |
input wire [`WB_WIDTH-1:0] TMEM_DAT_I , |
output wire [`WB_WIDTH-1:0] TMEM_ADR_O , |
output wire TMEM_WE_O, |
output wire TMEM_STB_O, |
output wire TMEM_CYC_O, |
input wire TMEM_GNT_I, |
|
`ifdef DEBUG |
input wire[`MAX_CORES-1:0] iDebug_CoreID, |
`endif |
//Control Register |
input wire [15:0] CREG_I, |
output wire DONE_O |
|
|
); |
|
//When we flip the SMEM, this means we are ready to receive more data |
assign GRDY_O = wCU2_FlipMem; |
|
//Alias this signals |
wire Clock,Reset; |
assign Clock = CLK_I; |
assign Reset = RST_I; |
|
wire wIO_Busy; |
wire [`DATA_ROW_WIDTH-1:0] wEXE_2__MEM_WriteData; |
wire [`DATA_ROW_WIDTH-1:0] wUCODE_RAMBus; |
wire [`DATA_ADDRESS_WIDTH-1:0] wEXE_2__MEM_wDataWriteAddress; |
wire w2IO__AddrIsImm; |
wire [`DATA_ADDRESS_WIDTH-1:0] wUCODE_RAMAddress; |
wire [`DATA_ADDRESS_WIDTH-1:0] w2IO__Adr_O_Pointer; |
wire [`DATA_ADDRESS_WIDTH-1:0] wGEO2_IO__Adr_O_Pointer; |
wire wEXE_2__DataWriteEnable; |
wire wUCODE_RAMWriteEnable; |
//wire [2:0] RamBusOwner; |
//Unit intercoanection wires |
|
wire wCU2__MicrocodeExecutionDone; |
wire [`ROM_ADDRESS_WIDTH-1:0] InitialCodeAddress; |
wire [`ROM_ADDRESS_WIDTH-1:0] wInstructionPointer1,wInstructionPointer2; |
wire [`INSTRUCTION_WIDTH-1:0] wEncodedInstruction1,wEncodedInstruction2,wIO2_MEM__ExternalInstruction; |
wire wCU2__ExecuteMicroCode; |
wire [`ROM_ADDRESS_WIDTH-1:0] wIO2_MEM__InstructionWriteAddr; |
wire [95:0] wMEM_2__EXE_DataRead0, wMEM_2__EXE_DataRead1,wMEM_2__IO_DataRead0, wMEM_2__IO_DataRead1; |
wire [`DATA_ADDRESS_WIDTH-1:0] wEXE_2__MEM_DataReadAddress0,wEXE_2__MEM_DataReadAddress1; |
wire [`DATA_ADDRESS_WIDTH-1:0] wUCODE_RAMReadAddress0,wUCODE_RAMReadAddress1; |
|
|
wire [`WIDTH-1:0] w2IO__AddressOffset; |
wire [`DATA_ADDRESS_WIDTH-1:0] w2IO__DataWriteAddress; |
wire w2IO__Store; |
wire w2IO__EnableWBMaster; |
|
wire [`DATA_ADDRESS_WIDTH-1:0] wIO2_MEM__DataWriteAddress; |
wire [`DATA_ADDRESS_WIDTH-1:0] wIO_2_MEM__DataReadAddress0; |
wire [`DATA_ROW_WIDTH-1:0] wIO2_MEM__Bus; |
wire [`WIDTH-1:0] wIO2_MEM__Data; |
wire [`WIDTH-1:0] wIO2_WBM__Address; |
wire wIO2_MEM__DataWriteEnable; |
wire wIO2__Done; |
wire wCU2_GEO__GeometryFetchEnable; |
wire wIFU2__MicroCodeReturnValue; |
wire wCU2_BCU__ACK; |
wire wGEO2_CU__RequestAABBIU; |
wire wGEO2_CU__RequestBIU; |
wire wGEO2_CU__RequestTCC; |
wire wGEO2_CU__GeometryUnitDone; |
wire wGEO2_CU__Sync; |
wire wEXE2__uCodeDone; |
wire wEXE2_IFU__EXEBusy; |
wire [`DATA_ADDRESS_WIDTH-1:0] wEXE2_IDU_DataFordward_LastDestination; |
wire wALU2_EXE__BranchTaken; |
wire wALU2_IFU_BranchNotTaken; |
wire w2IO__SetAddress; |
wire wIDU2_IFU__IDUBusy; |
//Control Registe wires |
wire[15:0] wCR2_ControlRegister; |
wire wCR2_TextureMappingEnabled; |
wire wGEO2_CU__TFFDone; |
wire wCU2_GEO__TriggerTFF; |
wire wIO2_MEM_InstructionWriteEnable; |
wire wCU2_IO__WritePixel; |
wire wGEO2_IO__AddrIsImm; |
wire[31:0] wGEO2_IO__AddressOffset; |
wire wGEO2_IO__EnableWBMaster; |
wire wGEO2_IO__SetAddress; |
wire[`WIDTH-1:0] wGEO2__CurrentPitch,wCU2_GEO_Pitch; |
wire wCU2_GEO__SetPitch,wCU2_GEO__IncPicth; |
|
wire [`DATA_ROW_WIDTH-1:0] wEXE_2__IO_WriteAddress; |
wire [`DATA_ROW_WIDTH-1:0] wEXE_2__IO_WriteData; |
wire wEXE_2__IO_OMEMWriteEnable; |
|
wire [`DATA_ROW_WIDTH-1:0] wEXE_2__IO_TMEMAddress; |
wire [`DATA_ROW_WIDTH-1:0] wIO_2_EXE__TMEMData; |
wire wIO_2_EXE__DataAvailable; |
wire wEXE_2_IO__DataRequest; |
|
wire wCU2_FlipMemEnabled; |
wire w2MEM_FlipMemory; |
|
`ifdef DEBUG |
wire [`ROM_ADDRESS_WIDTH-1:0] wDEBUG_IDU2_EXE_InstructionPointer; |
`endif |
//-------------------------------------------------------- |
|
|
assign wCR2_TextureMappingEnabled = wCR2_ControlRegister[ `CR_EN_TEXTURE ]; |
wire wCU2_FlipMem; |
//-------------------------------------------------------- |
//Control Unit Instance |
ControlUnit CU |
( |
.Clock(Clock), |
.Reset(Reset), |
.oFlipMemEnabled( wCU2_FlipMemEnabled ), |
.oFlipMem( wCU2_FlipMem ), |
.iControlRegister( wCR2_ControlRegister ), |
//.oRamBusOwner( RamBusOwner ), |
.oGFUEnable( wCU2_GEO__GeometryFetchEnable ), |
.iTriggerAABBIURequest( wGEO2_CU__RequestAABBIU ), |
.iTriggerBIURequest( wGEO2_CU__RequestBIU ), |
.iTriggertTCCRequest( wGEO2_CU__RequestTCC ), |
.oUCodeEnable( wCU2__ExecuteMicroCode ), |
.oCodeInstructioPointer( InitialCodeAddress ), |
.iUCodeDone( wCU2__MicrocodeExecutionDone ), |
.iIODone( wIO2__Done ), |
.oIOWritePixel( wCU2_IO__WritePixel ), |
.iUCodeReturnValue( wIFU2__MicroCodeReturnValue ), |
.iGEOSync( wGEO2_CU__Sync ), |
.iTFFDone( wGEO2_CU__TFFDone ), |
.oTriggerTFF( wCU2_GEO__TriggerTFF ), |
.MST_I( MST_I ), |
.oSetCurrentPitch( wCU2_GEO__SetPitch ), |
.iGFUDone( wGEO2_CU__GeometryUnitDone ), |
.iRenderEnable( RENDREN_I ), |
.iSceneTraverseComplete( STDONE_I ), |
.oResultCommited( RCOMMIT_O ), |
.iHostDataAvailable( HDA_I ), |
|
|
`ifdef DEBUG |
.iDebug_CoreID( iDebug_CoreID ), |
`endif |
.oDone( DONE_O ) |
|
); |
|
|
|
|
//-------------------------------------------------------- |
|
//assign w2MEM_FlipMemory = (wCU2__ExecuteMicroCode | wCU2_FlipMem ) & wCU2_FlipMemEnabled; |
assign w2MEM_FlipMemory = wCU2_FlipMem & wCU2_FlipMemEnabled; |
MemoryUnit MEM |
( |
.Clock(Clock), |
.Reset(Reset), |
|
.iFlipMemory( w2MEM_FlipMemory ), |
|
//Data Bus to/from EXE |
.iDataReadAddress1_EXE( wEXE_2__MEM_DataReadAddress0 ), |
.iDataReadAddress2_EXE( wEXE_2__MEM_DataReadAddress1 ), |
.oData1_EXE( wMEM_2__EXE_DataRead0 ), |
.oData2_EXE( wMEM_2__EXE_DataRead1 ), |
.iDataWriteEnable_EXE( wEXE_2__DataWriteEnable ), |
.iDataWriteAddress_EXE( wEXE_2__MEM_wDataWriteAddress ), |
.iData_EXE( wEXE_2__MEM_WriteData ), |
|
//Data Bus to/from IO |
|
.iDataReadAddress1_IO( wIO_2_MEM__DataReadAddress0 ), |
.iDataReadAddress2_IO( wIO_2_MEM__DataReadAddress1 ), |
.oData1_IO( wMEM_2__IO_DataRead0 ), |
.oData2_IO( wMEM_2__IO_DataRead1 ), |
.iDataWriteEnable_IO( wIO2_MEM__DataWriteEnable ), |
.iDataWriteAddress_IO( wIO2_MEM__DataWriteAddress ), |
.iData_IO( wIO2_MEM__Bus ), |
|
`ifdef DEBUG |
.iDebug_CoreID( iDebug_CoreID ), |
`endif |
|
|
//Instruction Bus |
.iInstructionReadAddress1( wInstructionPointer1 ), |
.iInstructionReadAddress2( wInstructionPointer2 ), |
.oInstruction1( wEncodedInstruction1 ), |
.oInstruction2( wEncodedInstruction2 ), |
.iInstructionWriteEnable( wIO2_MEM_InstructionWriteEnable ), |
.iInstructionWriteAddress( wIO2_MEM__InstructionWriteAddr ), |
.iInstruction( wIO2_MEM__ExternalInstruction ), |
.iControlRegister( CREG_I ), |
.oControlRegister( wCR2_ControlRegister ) |
|
); |
|
////-------------------------------------------------------- |
|
|
ExecutionUnit EXE |
( |
|
.Clock( Clock), |
.Reset( Reset ), |
.iInitialCodeAddress( InitialCodeAddress ), |
.iInstruction1( wEncodedInstruction1 ), |
.iInstruction2( wEncodedInstruction2 ), |
.oInstructionPointer1( wInstructionPointer1 ), |
.oInstructionPointer2( wInstructionPointer2 ), |
.iDataRead0( wMEM_2__EXE_DataRead0 ), |
.iDataRead1( wMEM_2__EXE_DataRead1 ), |
.iTrigger( wCU2__ExecuteMicroCode ), |
.oDataReadAddress0( wEXE_2__MEM_DataReadAddress0 ), |
.oDataReadAddress1( wEXE_2__MEM_DataReadAddress1 ), |
.oDataWriteEnable( wEXE_2__DataWriteEnable ), |
.oDataWriteAddress( wEXE_2__MEM_wDataWriteAddress ), |
.oDataBus( wEXE_2__MEM_WriteData ), |
.oReturnCode( wIFU2__MicroCodeReturnValue ), |
/**************/ |
.oOMEMWriteAddress( wEXE_2__IO_WriteAddress ), |
.oOMEMWriteData( wEXE_2__IO_WriteData ), |
.oOMEMWriteEnable( wEXE_2__IO_OMEMWriteEnable ), |
|
.oTMEMReadAddress( wEXE_2__IO_TMEMAddress ), |
.iTMEMReadData( wIO_2_EXE__TMEMData ), |
.iTMEMDataAvailable( wIO_2_EXE__DataAvailable ), |
.oTMEMDataRequest( wEXE_2_IO__DataRequest ), |
/**************/ |
`ifdef DEBUG |
.iDebug_CoreID( iDebug_CoreID ), |
`endif |
.oDone( wCU2__MicrocodeExecutionDone ) |
|
); |
|
////-------------------------------------------------------- |
wire wGEO2__RequestingTextures; |
wire w2IO_WriteBack_Set; |
|
assign TGA_O = (wGEO2__RequestingTextures) ? 2'b1: 2'b0; |
//--------------------------------------------------------------------------------------------------- |
wire[`DATA_ADDRESS_WIDTH-1:0] wIO_2_MEM__DataReadAddress1; |
assign wEXE_2__MEM_DataReadAddress1 = (wCU2_IO__WritePixel == 0) ? wUCODE_RAMReadAddress1 : wIO_2_MEM__DataReadAddress1; |
assign w2IO__EnableWBMaster = (wCU2_IO__WritePixel == 0 ) ? wGEO2_IO__EnableWBMaster : wCU2_IO__WritePixel; |
assign w2IO__AddrIsImm = 0;//(wCU2_IO__WritePixel == 0 ) ? wGEO2_IO__AddrIsImm : 1'b0; |
assign w2IO__AddressOffset = 0;//(wCU2_IO__WritePixel == 0 ) ? wGEO2_IO__AddressOffset : 32'b0; |
assign w2IO__Adr_O_Pointer = (wCU2_IO__WritePixel == 0 ) ? wGEO2_IO__Adr_O_Pointer : `OREG_ADDR_O; |
//assign w2IO__Adr_O_Pointer = (wCU2_IO__WritePixel == 0 ) ? wGEO2_IO__Adr_O_Pointer : `CREG_PIXEL_2D_INITIAL_POSITION; |
|
wire w2IO_MasterCycleType; |
assign w2IO_MasterCycleType = (wCU2_IO__WritePixel) ? `WB_SIMPLE_WRITE_CYCLE : `WB_SIMPLE_READ_CYCLE; |
|
|
|
assign w2IO__SetAddress = (wCU2_IO__WritePixel == 0 )? wGEO2_IO__SetAddress : wCU2_GEO__SetPitch; |
|
|
IO_Unit IO |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.iEnable( 0 ),// w2IO__EnableWBMaster ), |
.iBusCyc_Type( w2IO_MasterCycleType ), |
|
.iStore( 1),//w2IO__Store ), |
.iAdr_DataWriteBack( w2IO__DataWriteAddress ), |
.iAdr_O_Set( w2IO__SetAddress ), |
.iAdr_O_Imm( w2IO__AddressOffset ), |
.iAdr_O_Type( w2IO__AddrIsImm ), |
.iAdr_O_Pointer( w2IO__Adr_O_Pointer ), |
.iReadDataBus( wMEM_2__IO_DataRead0 ), |
.iReadDataBus2( wMEM_2__IO_DataRead1 ), |
.iDat_O_Pointer( `OREG_PIXEL_COLOR ), |
|
|
.oDataReadAddress( wIO_2_MEM__DataReadAddress0 ), |
.oDataReadAddress2( wIO_2_MEM__DataReadAddress1 ), |
.oDataWriteAddress( wIO2_MEM__DataWriteAddress ), |
.oDataBus( wIO2_MEM__Bus ), |
.oInstructionBus( wIO2_MEM__ExternalInstruction ), |
|
.oDataWriteEnable( wIO2_MEM__DataWriteEnable ), |
.oData( wIO2_MEM__Data ), |
.oInstructionWriteEnable( wIO2_MEM_InstructionWriteEnable ), |
.oInstructionWriteAddress( wIO2_MEM__InstructionWriteAddr ), |
.iWriteBack_Set( w2IO_WriteBack_Set ), |
.oBusy( wIO_Busy ), |
.oDone( wIO2__Done ), |
/**********/ |
.iOMEM_WriteAddress( wEXE_2__IO_WriteAddress ), |
.iOMEM_WriteData( wEXE_2__IO_WriteData ), |
.iOMEM_WriteEnable( wEXE_2__IO_OMEMWriteEnable ), |
.OMEM_DAT_O( OMEM_DAT_O ), |
.OMEM_ADR_O( OMEM_ADR_O ), |
.OMEM_WE_O( OMEM_WE_O ), |
|
|
.oTMEMReadData( wIO_2_EXE__TMEMData ), |
.iTMEMDataRequest( wEXE_2_IO__DataRequest ), |
.iTMEMReadAddress( wEXE_2__IO_TMEMAddress ), |
.oTMEMDataAvailable( wIO_2_EXE__DataAvailable ), |
|
.TMEM_ACK_I( TMEM_ACK_I ), |
.TMEM_DAT_I( TMEM_DAT_I ), |
.TMEM_ADR_O( TMEM_ADR_O ), |
.TMEM_WE_O( TMEM_WE_O ), |
.TMEM_STB_O( TMEM_STB_O ), |
.TMEM_CYC_O( TMEM_CYC_O ), |
.TMEM_GNT_I( TMEM_GNT_I ), |
|
/**********/ |
.MST_I( MST_I ), |
//Wish Bone Interface |
.DAT_I( DAT_I ), |
.DAT_O( DAT_O ), |
.ACK_I( ACK_I & GNT_I ), |
.ACK_O( ACK_O ), |
.ADR_O( ADR_O ), |
.ADR_I( ADR_I ), |
.WE_O( WE_O ), |
.WE_I( WE_I ), |
.STB_O( STB_O ), |
.STB_I( STB_I ), |
.CYC_O( CYC_O ), |
.TGA_I( TGA_I ), |
.CYC_I( CYC_I ), |
.GNT_I( GNT_I ), |
.TGC_O( TGC_O ) |
|
|
); |
//--------------------------------------------------------------------------------------------------- |
endmodule |
/CONTROL/Unit_Control.v
0,0 → 1,1211
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
/********************************************************************************** |
Description: |
|
This is the main Finite State Machine. |
|
**********************************************************************************/ |
|
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
|
`define CU_AFTER_RESET_STATE 0 |
`define CU_WAIT_FOR_INITIAL_CONFIGURATION 1 |
`define CU_TRIGGER_CONFIGURATION_DATA_READ 2 |
`define CU_WAIT_FOR_CONFIG_DATA_READ 3 |
`define CU_ACK_CONFIG_DATA_READ 4 |
`define CU_PRECALCULATE_CONSTANTS 5 |
`define CU_WAIT_FOR_CONSTANT 6 |
`define CU_ACK_PRECALCULATE_CONSTANTS 7 |
`define CU_WAIT_FOR_TASK 8 |
`define CU_READ_TASK_DATA 9 |
`define CU_WAIT_TASK_DATA_READ 10 |
`define CU_ACK_TASK_DATA_READ 11 |
`define CU_TRIGGER_RGU 12 |
`define CU_WAIT_FOR_RGU 13 |
`define CU_ACK_RGU 14 |
`define CU_TRIGGER_GEO 15 |
`define CU_WAIT_FOR_GEO_SYNC 16 |
//`define CU_CHECK_AABBIU_REQUEST 17 |
`define CU_TRIGGER_TCC 17 |
//`define CU_CHECK_BIU_REQUEST 18 |
//`define CU_TRIGGER_TFF 18 |
//`define CU_CHECK_GEO_DONE 19 |
//`define CU_WAIT_FOR_TFF 19 |
`define CU_TRIGGER_AABBIU 20 |
`define CU_WAIT_FOR_AABBIU 21 |
`define CU_TRIGGER_MAIN 22 |
`define CU_WAIT_FOR_MAIN 23 |
`define CU_ACK_MAIN 24 |
`define CU_TRIGGER_PSU 25 |
`define CU_WAIT_FOR_PSU 26 |
`define CU_ACK_PSU 27 |
//`define CU_TRIGGER_PCU 28 |
`define CU_WAIT_FOR_PCU 29 |
`define CU_ACK_PCU 30 |
`define CU_CHECK_HIT 31 |
`define CU_CLEAR_REGISTERS 32 |
`define CU_WAIT_CLEAR_REGISTERS 33 |
`define CU_ACK_CLEAR_REGISTERS 34 |
`define CU_TRIGGER_PSU_WITH_TEXTURE 35 |
`define WAIT_FOR_TCC 36 |
`define CU_TRIGGER_NPU 37 |
`define CU_WAIT_NPU 38 |
`define CU_ACK_NPU 39 |
`define CU_PERFORM_INTIAL_CONFIGURATION 40 |
`define CU_SET_PICTH 41 |
`define CU_TRIGGER_USERCONSTANTS 42 |
`define CU_WAIT_USERCONSTANTS 43 |
`define CU_ACK_USERCONSTANTS 44 |
`define CU_TRIGGER_USERPIXELSHADER 45 |
`define CU_WAIT_FOR_USERPIXELSHADER 46 |
`define CU_ACK_USERPIXELSHADER 47 |
`define CU_DONE 48 |
`define CU_WAIT_FOR_RENDER_ENABLE 49 |
`define CU_ACK_TCC 50 |
`define CU_WAIT_FOR_HOST_DATA_AVAILABLE 51 |
//-------------------------------------------------------------- |
module ControlUnit |
( |
|
input wire Clock, |
input wire Reset, |
input wire[15:0] iControlRegister, |
output reg oGFUEnable, |
input wire iTriggerAABBIURequest, |
input wire iTriggerBIURequest, |
input wire iTriggertTCCRequest, |
output reg oUCodeEnable, |
output reg[`ROM_ADDRESS_WIDTH-1:0] oCodeInstructioPointer, |
input wire iUCodeDone, |
input wire iUCodeReturnValue, |
input wire iGFUDone, |
input wire iGEOSync, |
output reg oTriggerTFF, |
input wire iTFFDone, |
input wire MST_I, |
//output reg[2:0] //oRamBusOwner, |
input wire iIODone, |
output reg oSetCurrentPitch, |
output reg oFlipMemEnabled, |
output reg oFlipMem, |
output reg oIOWritePixel, |
input wire iRenderEnable, |
input wire iSceneTraverseComplete, |
input wire iHostDataAvailable, |
|
`ifdef DEBUG |
input wire[`MAX_CORES-1:0] iDebug_CoreID, |
`endif |
|
output reg oResultCommited, |
output reg oDone |
|
); |
|
//Internal State Machine varibles |
reg [5:0] CurrentState; |
reg [5:0] NextState; |
integer ucode_file; |
reg rResetHitFlop,rHitFlopEnable; |
wire wHit; |
|
`ifdef DUMP_CODE |
integer log; |
|
initial |
begin |
|
//$display("Opening ucode dump file....\n"); |
ucode_file = $fopen("CU.log","w"); |
end |
|
`endif |
|
|
|
//-------------------------------------------------------------- |
FFToggleOnce_1Bit FFTO1 |
( |
.Clock( Clock ), |
.Reset( rResetHitFlop ), |
.Enable( rHitFlopEnable && iUCodeDone ), |
.S( iUCodeReturnValue ), |
.Q( wHit ) |
); |
//-------------------------------------------------------------- |
|
`ifdef DEBUG_CU |
always @ ( wHit ) |
begin |
`LOGME "*** Triangle HIT ***\n"); |
end |
`endif |
|
//Next states logic and Reset sequence |
always @(posedge Clock or posedge Reset) |
begin |
|
if (Reset) |
CurrentState <= `CU_AFTER_RESET_STATE; |
else |
CurrentState <= NextState; |
|
end |
|
//-------------------------------------------------------------- |
always @ ( * ) |
begin |
case (CurrentState) |
//----------------------------------------- |
`CU_AFTER_RESET_STATE: |
begin |
|
`ifdef DEBUG_CU |
`LOGME"%d CU_AFTER_RESET_STATE\n",$time); |
`endif |
|
//oRamBusOwner <= 0; |
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_INITIAL; |
oGFUEnable <= 0; |
oUCodeEnable <= 0; |
oIOWritePixel <= 0; |
rResetHitFlop <= 1; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 1; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
NextState <= `CU_WAIT_FOR_INITIAL_CONFIGURATION; |
|
end |
//----------------------------------------- |
|
`CU_WAIT_FOR_INITIAL_CONFIGURATION: |
begin |
//$display("CORE: %d CU_WAIT_FOR_INITIAL_CONFIGURATION", iDebug_CoreID); |
// `ifdef DEBUG_CU |
// `LOGME"%d Control: CU_WAIT_FOR_INITIAL_CONFIGURATION\n",$time); |
// `endif |
|
//oRamBusOwner <= 0; |
oCodeInstructioPointer <= 0; |
oGFUEnable <= 0; |
oUCodeEnable <= 0; |
oIOWritePixel <= 0; |
rResetHitFlop <= 1; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
if ( MST_I ) |
NextState <= `CU_PERFORM_INTIAL_CONFIGURATION;//`CU_WAIT_FOR_CONFIG_DATA_READ; |
else |
NextState <= `CU_WAIT_FOR_INITIAL_CONFIGURATION; |
|
|
end |
//----------------------------------------- |
`CU_PERFORM_INTIAL_CONFIGURATION: |
begin |
//$display("CORE: %d CU_PERFORM_INTIAL_CONFIGURATION", iDebug_CoreID); |
//oRamBusOwner <= 0; |
oCodeInstructioPointer <= 0; |
oGFUEnable <= 0; |
oUCodeEnable <= 0; |
oIOWritePixel <= 0; |
rResetHitFlop <= 1; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
if ( MST_I == 0 && iRenderEnable == 1'b1) |
NextState <= `CU_CLEAR_REGISTERS;//`CU_WAIT_FOR_CONFIG_DATA_READ; |
else |
NextState <= `CU_PERFORM_INTIAL_CONFIGURATION; |
|
|
end |
//----------------------------------------- |
`CU_CLEAR_REGISTERS: |
begin |
//$display("CORE: %d CU_CLEAR_REGISTERS", iDebug_CoreID); |
`ifdef DEBUG_CU |
`LOGME"%d CU_CLEAR_REGISTERS\n",$time); |
`endif |
|
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_INITIAL; |
oGFUEnable <= 0; |
oUCodeEnable <= 1; //* |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
|
////$display("\n\n %d XOXOXOXOX FLIP XOXOXOXOXOX\n\n",$time); |
//oIncCurrentPitch <= 0; |
|
NextState <= `CU_WAIT_CLEAR_REGISTERS; |
end |
//----------------------------------------- |
`CU_WAIT_CLEAR_REGISTERS: |
begin |
// `ifdef DEBUG_CU |
// `LOGME"%d CU_WAIT_CLEAR_REGISTERS\n",$time); |
// `endif |
//$display("CORE: %d CU_WAIT_CLEAR_REGISTERS", iDebug_CoreID); |
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_INITIAL; |
oGFUEnable <= 0; |
oUCodeEnable <= 0; |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
if ( iUCodeDone ) |
NextState <= `CU_ACK_CLEAR_REGISTERS; |
else |
NextState <= `CU_WAIT_CLEAR_REGISTERS; |
|
end |
//----------------------------------------- |
`CU_ACK_CLEAR_REGISTERS: |
begin |
|
`ifdef DEBUG_CU |
`LOGME"%d CU_ACK_CLEAR_REGISTERS\n", $time); |
`endif |
|
//$display("CORE: %d CU_ACK_CLEAR_REGISTERS", iDebug_CoreID); |
|
//oRamBusOwner <= 0; |
oCodeInstructioPointer <= 0; |
oGFUEnable <= 0; |
oUCodeEnable <= 0; //* |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
NextState <= `CU_WAIT_FOR_CONFIG_DATA_READ; |
end |
|
|
|
//----------------------------------------- |
`CU_WAIT_FOR_CONFIG_DATA_READ: |
begin |
|
// `ifdef DEBUG_CU |
// `LOGME"%d Control: CU_WAIT_FOR_CONFIG_DATA_READ\n",$time); |
// `endif |
|
|
//$display("CORE: %d CU_WAIT_FOR_CONFIG_DATA_READ", iDebug_CoreID); |
|
//oRamBusOwner <= 0;//`REG_BUS_OWNED_BY_BCU; |
oCodeInstructioPointer <= 0; |
oGFUEnable <= 0; |
oUCodeEnable <= 0; |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
if ( MST_I == 0 ) |
NextState <= `CU_PRECALCULATE_CONSTANTS; |
else |
NextState <= `CU_WAIT_FOR_CONFIG_DATA_READ; |
|
end |
//----------------------------------------- |
`CU_PRECALCULATE_CONSTANTS: |
begin |
//$display("CORE: %d CU_PRECALCULATE_CONSTANTS", iDebug_CoreID); |
`ifdef DEBUG_CU |
`LOGME"%d Control: CU_PRECALCULATE_CONSTANTS\n", $time); |
`endif |
|
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_CPPU; |
oGFUEnable <= 0; |
oUCodeEnable <= 1; //* |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
NextState <= `CU_WAIT_FOR_CONSTANT; |
|
end |
//----------------------------------------- |
`CU_WAIT_FOR_CONSTANT: |
begin |
// `ifdef DEBUG_CU |
// `LOGME"%d Control: CU_WAIT_FOR_CONSTANT\n", $time); |
// `endif |
|
|
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_CPPU; |
oGFUEnable <= 0; |
oUCodeEnable <= 0; //* |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
if ( iUCodeDone ) |
NextState <= `CU_ACK_PRECALCULATE_CONSTANTS; |
else |
NextState <= `CU_WAIT_FOR_CONSTANT; |
|
end |
//----------------------------------------- |
`CU_ACK_PRECALCULATE_CONSTANTS: |
begin |
//$display("CORE: %d CU_ACK_PRECALCULATE_CONSTANTS", iDebug_CoreID); |
`ifdef DEBUG_CU |
`LOGME"%d Control: CU_ACK_PRECALCULATE_CONSTANTS\n", $time); |
`endif |
|
|
//oRamBusOwner <= 0;//`REG_BUS_OWNED_BY_BCU; |
oCodeInstructioPointer <= 0; |
oGFUEnable <= 0; |
oUCodeEnable <= 0; //* |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
NextState <= `CU_TRIGGER_USERCONSTANTS;//CU_WAIT_FOR_TASK; |
|
end |
//----------------------------------------- |
|
`CU_TRIGGER_USERCONSTANTS: |
begin |
`ifdef DEBUG_CU |
`LOGME"%d Control: CU_TRIGGER_USERCONSTANTS\n",$time); |
`endif |
|
//$display("CORE: %d CU_TRIGGER_USERCONSTANTS", iDebug_CoreID); |
|
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_USERCONSTANTS; |
oGFUEnable <= 0; |
oUCodeEnable <= 1; //* |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
NextState <= `CU_WAIT_USERCONSTANTS; |
end |
//----------------------------------------- |
`CU_WAIT_USERCONSTANTS: |
begin |
|
// `ifdef DEBUG_CU |
// `LOGME"%d Control: CU_WAIT_FOR_RGU\n",$time); |
// `endif |
|
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_USERCONSTANTS; |
oGFUEnable <= 0; |
oUCodeEnable <= 0; |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
if ( iUCodeDone ) |
NextState <= `CU_ACK_USERCONSTANTS; |
else |
NextState <= `CU_WAIT_USERCONSTANTS; |
end |
//----------------------------------------- |
`CU_ACK_USERCONSTANTS: |
begin |
|
`ifdef DEBUG_CU |
`LOGME"%d Control: CU_ACK_RGU\n",$time); |
`endif |
|
//$display("CORE: %d CU_ACK_USERCONSTANTS", iDebug_CoreID); |
|
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer <= 0; |
oGFUEnable <= 0; |
oUCodeEnable <= 0; //* |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
if ( iUCodeDone == 0) |
NextState <= `CU_WAIT_FOR_RENDER_ENABLE; |
else |
NextState <= `CU_ACK_USERCONSTANTS; |
|
end |
//----------------------------------------- |
`CU_WAIT_FOR_RENDER_ENABLE: |
begin |
`ifdef DEBUG_CU |
$display("CORE: %d CU_WAIT_FOR_RENDER_ENABLE", iDebug_CoreID); |
`endif |
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer <= 0; |
oGFUEnable <= 0; |
oUCodeEnable <= 0; //* |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
if ( iRenderEnable) |
NextState <= `CU_TRIGGER_RGU; |
else |
NextState <= `CU_WAIT_FOR_RENDER_ENABLE; |
end |
//----------------------------------------- |
`CU_TRIGGER_RGU: |
begin |
|
`ifdef DEBUG_CU |
`LOGME"CORE: %d CU_TRIGGER_RGU", iDebug_CoreID); |
`endif |
|
|
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_RGU; |
oGFUEnable <= 0; |
oUCodeEnable <= 1; //* |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
NextState <= `CU_WAIT_FOR_RGU; |
end |
//----------------------------------------- |
`CU_WAIT_FOR_RGU: |
begin |
|
// `ifdef DEBUG_CU |
// `LOGME"%d Control: CU_WAIT_FOR_RGU\n",$time); |
// `endif |
|
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer <= 0; |
oGFUEnable <= 0; |
oUCodeEnable <= 0; |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
if ( iUCodeDone ) |
NextState <= `CU_ACK_RGU; |
else |
NextState <= `CU_WAIT_FOR_RGU; |
end |
//----------------------------------------- |
`CU_ACK_RGU: |
begin |
|
`ifdef DEBUG_CU |
`LOGME"CORE: %d CU_ACK_RGU", iDebug_CoreID); |
`endif |
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer <= 0; |
oGFUEnable <= 0; |
oUCodeEnable <= 0; //* |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
if ( iUCodeDone == 0 & iRenderEnable == 1) |
NextState <= `CU_WAIT_FOR_HOST_DATA_AVAILABLE;//`CU_TRIGGER_GEO;///////////// GET RID OF GEO!!! |
else |
NextState <= `CU_ACK_RGU; |
|
end |
//----------------------------------------- |
`CU_TRIGGER_TCC: |
begin |
////$display("CU_TRIGGER_TCC"); |
`ifdef DEBUG_CU |
`LOGME"%d CORE %d Control: CU_TRIGGER_TCC\n",$time,iDebug_CoreID); |
`endif |
|
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_TCC; |
oUCodeEnable <= 1; //* |
oGFUEnable <= 0; |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 0; //We need u,v from last IO read cycle |
oResultCommited <= 0; |
////$display("\n\n %d XOXOXOXOX FLIP XOXOXOXOXOX\n\n",$time); |
//oIncCurrentPitch <= 0; |
oDone <= 0; |
|
NextState <= `WAIT_FOR_TCC; |
end |
//----------------------------------------- |
`WAIT_FOR_TCC: |
begin |
|
////$display("WAIT_FOR_TCC"); |
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_TCC; |
oUCodeEnable <= 0; //* |
oGFUEnable <= 0; |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
if ( iUCodeDone ) |
NextState <= `CU_ACK_TCC; |
else |
NextState <= `WAIT_FOR_TCC; |
|
end |
//----------------------------------------- |
`CU_ACK_TCC: |
begin |
|
////$display("WAIT_FOR_TCC"); |
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_TCC; |
oUCodeEnable <= 0; //* |
oGFUEnable <= 0; |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
if ( iUCodeDone == 0 && iSceneTraverseComplete == 1'b1) //DDDD |
NextState <= `CU_TRIGGER_PSU_WITH_TEXTURE; |
else if (iUCodeDone == 0 && iSceneTraverseComplete == 1'b0) |
NextState <= `CU_WAIT_FOR_HOST_DATA_AVAILABLE; |
else |
NextState <= `CU_ACK_TCC; |
|
end |
//----------------------------------------- |
/* |
Was there any hit at all? |
At this point, all the triangles in the list |
have been traversed looking for a hit with our ray. |
There are 3 possibilities: |
1) The was not a single hit, then just paint a black |
pixel on the screen and send it via PCU. |
2)There was a hit and Texturing is not enabled, then trigger the PSU with |
no texturing |
2) There was a hit and Texturing is enabled, then fetch the texture |
values corresponding to the triangle that we hitted. |
*/ |
`CU_CHECK_HIT: |
begin |
|
`ifdef DEBUG_CU |
`LOGME"%d CORE %d Control: CU_CHECK_HIT\n",$time,iDebug_CoreID); |
`endif |
|
|
//oRamBusOwner <= `REG_BUS_OWNED_BY_GFU; |
oCodeInstructioPointer <= 0; |
oUCodeEnable <= 0; |
oGFUEnable <= 0; ///CHANGED Aug 15 |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
|
|
|
if (wHit) |
begin |
//$display("HIT"); |
NextState <= `CU_TRIGGER_PSU_WITH_TEXTURE; |
end |
else |
NextState <= `CU_TRIGGER_USERPIXELSHADER;//666 |
|
end |
|
//----------------------------------------- |
`CU_TRIGGER_PSU_WITH_TEXTURE: |
begin |
|
`ifdef DEBUG_CU |
`LOGME"%d Control: CU_TRIGGER_PSU_WITH_TEXTURE\n",$time); |
`endif |
|
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_PSU2; |
oUCodeEnable <= 1; |
oGFUEnable <= 0; |
oIOWritePixel <= 0; |
rResetHitFlop <= 1; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0;//////NEW NEW NEW NEW |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
////$display("\n\n %d XOXOXOXOX FLIP XOXOXOXOXOX\n\n",$time); |
//oIncCurrentPitch <= 0; |
|
NextState <= `CU_WAIT_FOR_PSU; |
end |
//----------------------------------------- |
//Wait until data from Host becomes available |
`CU_WAIT_FOR_HOST_DATA_AVAILABLE: |
begin |
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer <= 0; |
oUCodeEnable <= 0; |
oGFUEnable <= 0; |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
|
if ( iHostDataAvailable ) |
NextState <= `CU_TRIGGER_MAIN; |
else |
NextState <= `CU_WAIT_FOR_HOST_DATA_AVAILABLE; |
|
|
end |
//----------------------------------------- |
`CU_TRIGGER_MAIN: |
begin |
`ifdef DEBUG_CU |
`LOGME"%d CORE: %d Control: CU_TRIGGER_MAIN\n",$time,iDebug_CoreID); |
`endif |
|
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_MAIN; |
oUCodeEnable <= 1; |
oGFUEnable <= 1; |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 1; |
oDone <= 0; |
oResultCommited <= 0; |
////$display("\n\n %d XOXOXOXOX FLIP XOXOXOXOXOX\n\n",$time); |
//oIncCurrentPitch <= 0; |
// $stop(); |
|
NextState <= `CU_WAIT_FOR_MAIN; |
|
end |
//----------------------------------------- |
`CU_WAIT_FOR_MAIN: |
begin |
// `ifdef DEBUG_CU |
// `LOGME"%d Control: CU_WAIT_FOR_MAIN\n",$time); |
// `endif |
|
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_MAIN; |
oUCodeEnable <= 0; |
oGFUEnable <= 1; |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 1; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
//NextState <= `CU_WAIT_FOR_MAIN; |
|
|
if ( iUCodeDone ) |
NextState <= `CU_ACK_MAIN; |
else |
NextState <= `CU_WAIT_FOR_MAIN; |
|
end |
//----------------------------------------- |
/* |
ACK UCODE by setting oUCodeEnable = 0 |
*/ |
`CU_ACK_MAIN: |
begin |
`ifdef DEBUG_CU |
`LOGME"%d CORE: %d Control: CU_ACK_MAIN\n",$time, iDebug_CoreID); |
`endif |
|
//oRamBusOwner <= `REG_BUS_OWNED_BY_GFU; |
oCodeInstructioPointer <= 0; //* |
oUCodeEnable <= 0; //* |
oGFUEnable <= 0; //Changed Aug 15 |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 1; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
// $stop(); |
|
if ( iUCodeDone == 1'b0 & iSceneTraverseComplete == 1'b1) |
NextState <= `CU_CHECK_HIT; |
else if ( iUCodeDone == 1'b0 & iSceneTraverseComplete == 1'b0) //ERROR!!! What if iSceneTraverseComplete will become 1 a cycle after this?? |
NextState <= `CU_TRIGGER_MAIN; |
else |
NextState <= `CU_ACK_MAIN; |
|
|
|
end |
//----------------------------------------- |
`CU_WAIT_FOR_PSU: |
begin |
|
// `ifdef DEBUG_CU |
// `LOGME"%d Control: CU_TRIGGER_PSU\n",$time); |
// `endif |
|
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_PSU; |
oUCodeEnable <= 0; |
oGFUEnable <= 0; |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
|
if ( iUCodeDone ) |
NextState <= `CU_ACK_PSU; |
else |
NextState <= `CU_WAIT_FOR_PSU; |
|
end |
//----------------------------------------- |
`CU_ACK_PSU: |
begin |
`ifdef DEBUG_CU |
`LOGME"%d CORE: %d Control: CU_ACK_PSU\n",$time, iDebug_CoreID); |
`endif |
|
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer <= 0; //* |
oUCodeEnable <= 0; //* |
oGFUEnable <= 0; |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
if ( iUCodeDone == 0) |
NextState <= `CU_TRIGGER_USERPIXELSHADER; |
else |
NextState <= `CU_ACK_PSU; |
|
|
end |
//----------------------------------------- |
|
//----------------------------------------- |
`CU_TRIGGER_NPU: //Next Pixel Unit |
begin |
`ifdef DEBUG_CU |
`LOGME"%d CORE: %d Control: CU_TRIGGER_NPU\n",$time, iDebug_CoreID); |
`endif |
$write("*"); |
|
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_NPG; //* |
oUCodeEnable <= 1; //* |
oGFUEnable <= 0; |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
NextState <= `CU_WAIT_NPU; |
end |
//----------------------------------------- |
`CU_WAIT_NPU: |
begin |
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_NPG; |
oUCodeEnable <= 0; |
oGFUEnable <= 0; |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
if ( iUCodeDone ) |
NextState <= `CU_ACK_NPU; |
else |
NextState <= `CU_WAIT_NPU; |
end |
//----------------------------------------- |
/* |
Next Pixel generation: here we either goto |
to RGU for the next pixel, or we have no |
more pixels so we are done we our picture! |
*/ |
`CU_ACK_NPU: |
begin |
`ifdef DEBUG_CU |
`LOGME"%d CORE: %d Control: CU_ACK_NPU\n",$time, iDebug_CoreID); |
`endif |
|
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer <= 0; //* |
oUCodeEnable <= 0; //* |
oGFUEnable <= 0; |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
if ( iUCodeDone == 0 && iUCodeReturnValue == 1) |
NextState <= `CU_TRIGGER_RGU; |
else if (iUCodeDone == 0 && iUCodeReturnValue == 0) |
NextState <= `CU_DONE; |
else |
NextState <= `CU_ACK_NPU; |
|
|
end |
//----------------------------------------- |
`CU_DONE: |
begin |
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer <= 0; |
oUCodeEnable <= 0; |
oGFUEnable <= 0; |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 1; |
oDone <= 1; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
|
NextState <= `CU_DONE; |
|
end |
//----------------------------------------- |
/* |
Here we no longer use GFU so set Enable to zero |
*/ |
`CU_TRIGGER_USERPIXELSHADER: |
begin |
`ifdef DEBUG_CU |
`LOGME"%d Control: CU_TRIGGER_PSU\n",$time); |
`endif |
|
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_PIXELSHADER; |
oUCodeEnable <= 1; |
oGFUEnable <= 0;//* |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
|
NextState <= `CU_WAIT_FOR_USERPIXELSHADER; |
end |
//----------------------------------------- |
`CU_WAIT_FOR_USERPIXELSHADER: |
begin |
|
// `ifdef DEBUG_CU |
// `LOGME"%d Control: CU_TRIGGER_PSU\n",$time); |
// `endif |
|
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_PIXELSHADER; |
oUCodeEnable <= 0; |
oGFUEnable <= 0; |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
|
if ( iUCodeDone ) |
NextState <= `CU_ACK_USERPIXELSHADER; |
else |
NextState <= `CU_WAIT_FOR_USERPIXELSHADER; |
|
end |
//----------------------------------------- |
`CU_ACK_USERPIXELSHADER: |
begin |
`ifdef DEBUG_CU |
`LOGME"%d Control: CU_ACK_PSU\n",$time); |
`endif |
|
//oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer <= 0; //* |
oUCodeEnable <= 0; //* |
oGFUEnable <= 0; |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 1; |
//oIncCurrentPitch <= 0; |
|
if ( iUCodeDone == 0) |
NextState <= `CU_TRIGGER_NPU;//`CU_TRIGGER_PCU; |
else |
NextState <= `CU_ACK_USERPIXELSHADER; |
|
|
end |
//--------------------------------------------------- |
default: |
begin |
|
`ifdef DEBUG_CU |
`LOGME"%d Control: ERROR Undefined State\n",$time); |
`endif |
|
//oRamBusOwner <= 0; |
oCodeInstructioPointer <= 0; |
oUCodeEnable <= 0; |
oGFUEnable <= 0; |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
NextState <= `CU_AFTER_RESET_STATE; |
end |
//----------------------------------------- |
|
endcase |
|
end //always |
endmodule |
/MEM/Module_ROM.v
0,0 → 1,703
|
|
`define ONE (32'h1 << `SCALE) |
|
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
|
/* |
I can't synthesize roms, the rom needs to be adapted depending on the |
final target silicon. |
*/ |
|
|
//-------------------------------------------------------- |
module ROM |
( |
input wire[`ROM_ADDRESS_WIDTH-1:0] Address, |
`ifdef DEBUG |
input wire [`MAX_CORES-1:0] iDebug_CoreID, |
`endif |
output reg [`INSTRUCTION_WIDTH-1:0] I |
); |
|
|
always @( Address ) |
begin |
case (Address) |
|
//Hardcoded stuff :( |
`define RAY_INSIDE_BOX `R3 |
`define CURRENT_LIGHT_POS `CREG_FIRST_LIGTH //TODO: CAHNEG T |
`define CURRENT_LIGHT_DIFFUSE 16'h6 |
|
//----------------------------------------------------------------- |
`define TAG_PIXELSHADER 16'd278 |
`define TAG_USERCONSTANTS 16'd276 |
`define TAG_PSU_UCODE_ADRESS2 16'd248 |
`define TAG_PSU_UCODE_ADRESS 16'd232 |
`define LABEL_TCC_EXIT 16'd231 |
`define TAG_TCC_UCODE_ADDRESS 16'd190 |
`define LABEL_BIU4 16'd189 |
`define LABEL_BIU3 16'd179 |
`define LABEL_BIU2 16'd176 |
`define LABEL_BIU1 16'd174 |
`define TAG_BIU_UCODE_ADDRESS 16'd157 |
`define LABEL_HIT 16'd155 |
`define LABEL15 16'd153 |
`define LABEL14 16'd151 |
`define LABEL13 16'd149 |
`define LABEL_TEST_XY_PLANE 16'd144 |
`define LABEL12 16'd142 |
`define LABEL11 16'd140 |
`define LABEL10 16'd138 |
`define LABEL_TEST_XZ_PLANE 16'd132 |
`define LABEL9 16'd130 |
`define LABEL8 16'd128 |
`define LABEL7 16'd126 |
`define LABEL_TEST_YZ_PLANE 16'd120 |
`define LABEL_RAY_INSIDE_BOX 16'd117 |
`define LABEL_ELSEZ 16'd116 |
`define LABEL6 16'd113 |
`define LABEL_ELESE_IFZ 16'd109 |
`define LABEL5 16'd106 |
`define LABEL_TEST_RAY_Z_ORIGEN 16'd102 |
`define LABEL_ELSEY 16'd101 |
`define LABEL4 16'd98 |
`define LABEL_ELESE_IFY 16'd94 |
`define LABEL3 16'd91 |
`define LABEL_TEST_RAY_Y_ORIGEN 16'd87 |
`define LABEL_ELSEX 16'd86 |
`define LABEL2 16'd83 |
`define LABEL_ELSE_IFX 16'd79 |
`define LABEL1 16'd76 |
`define LABEL_TEST_RAY_X_ORIGEN 16'd72 |
`define TAG_AABBIU_UCODE_ADDRESS 16'd69 |
`define LABEL_ALLDONE 16'd67 |
`define LABEL_NPG_NEXT_ROW 16'd63 |
`define TAG_NPG_UCODE_ADDRESS 16'd55 |
`define TAG_RGU_UCODE_ADDRESS 16'd47 |
`define TAG_CPPU_UCODE_ADDRESS 16'd44 |
`define LABEL_IS_NO_HIT 16'd43 |
`define LABEL_IS_HIT 16'd39 |
`define TAG_ADRR_MAIN 16'd37 |
|
|
//------------------------------------------------------------------------- |
//Default values for some registers after reset |
//------------------------------------------------------------------------- |
//This is the first code that gets executed after the machine is |
//externally configured ie after the MST_I goes from 1 to zero. |
//It sets initial values for some of the internal registers |
|
0: I = { `ZERO ,`CREG_LAST_t ,`VOID ,`VOID }; |
//Set the last 't' to very positive value(500) |
1: I = { `SETX ,`CREG_LAST_t ,32'h1F40000 }; |
2: I = { `ZERO ,`OREG_PIXEL_COLOR ,`VOID ,`VOID }; |
3: I = { `COPY ,`CREG_PIXEL_2D_POSITION ,`CREG_PIXEL_2D_INITIAL_POSITION ,`VOID }; |
|
|
//Calculate the initial linear address for ADR_O |
//this is: (X_initial + RESOLUTION_Y*Y_intial) * 3. |
//Notice that we need to use 'unscaled' ie. integer |
//values because the resuts of the multiplication by |
//the resoluction is to large to fit a fixed point |
//representation. |
|
4: I = { `COPY ,`R1 ,`CREG_RESOLUTION ,`VOID }; |
5: I = { `UNSCALE ,`R1 ,`R1 ,`VOID }; |
6: I = { `SETX ,`R1 ,32'h1 }; |
7: I = { `SETZ ,`R1 ,32'h0 }; |
8: I = { `COPY ,`R2 ,`CREG_PIXEL_2D_INITIAL_POSITION ,`VOID }; |
9: I = { `UNSCALE ,`R2 ,`R2 ,`VOID }; |
|
//Ok lets start by calculating RESOLUTION_Y*Y_intial |
10: I = { `IMUL ,`R1 ,`R1 ,`R2 }; |
11: I = { `COPY ,`R2 ,`R1 ,`VOID }; |
12: I = { `SWIZZLE3D ,`R2 ,`SWIZZLE_YYY }; |
|
//now X_initial + RESOLUTION_Y*Y_intial |
13: I = { `ADD ,`R3 ,`R1 ,`R2 }; |
14: I = { `COPY ,`R2 ,`R1 ,`VOID }; |
15: I = { `SWIZZLE3D ,`R2 ,`SWIZZLE_ZZZ }; |
16: I = { `ADD ,`R3 ,`R3 ,`R2 }; |
17: I = { `SWIZZLE3D ,`R3 ,`SWIZZLE_XXX }; |
|
//finally multiply by 3 to get: |
//(X_initial + RESOLUTION_Y*Y_intial) * 3 voila! |
18: I = { `SETX ,`R2 ,32'h3 }; |
19: I = { `SWIZZLE3D ,`R2 ,`SWIZZLE_XXX }; |
20: I = { `IMUL ,`CREG_PIXEL_PITCH ,`R3 ,`R2 }; |
|
//By this point you should be wondering why not |
//just do DOT R1 [1 Resolution_Y 0] [X_intial Y_intial 0 ]? |
//well because DOT uses fixed point and the result may not |
//fit :( |
|
//Transform from fixed point to integer |
//UNSCALE CREG_PIXEL_PITCH CREG_PIXEL_PITCH VOID |
21: I = { `COPY ,`OREG_ADDR_O ,`CREG_PIXEL_PITCH ,`VOID }; |
|
22: I = { `SETX ,`CREG_3 ,32'h3 }; |
23: I = { `SWIZZLE3D ,`CREG_3 ,`SWIZZLE_XXX }; |
|
24: I = { `SETX ,`CREG_012 ,32'h0 }; |
25: I = { `SETY ,`CREG_012 ,32'h1 }; |
26: I = { `SETZ ,`CREG_012 ,32'h2 }; |
27: I = { `COPY ,`CREG_CURRENT_OUTPUT_PIXEL ,`CREG_012 ,`VOID }; |
28: I = { `ZERO ,`CREG_TEXTURE_COLOR ,`VOID ,`VOID }; |
29: I = { `ZERO ,`CREG_ZERO ,`VOID ,`VOID }; |
|
30: I = { `ZERO ,`R1 ,`VOID ,`VOID }; |
31: I = { `ZERO ,`R2 ,`VOID ,`VOID }; |
32: I = { `ZERO ,`R3 ,`VOID ,`VOID }; |
33: I = { `ZERO ,`R4 ,`VOID ,`VOID }; |
34: I = { `ZERO ,`R5 ,`VOID ,`VOID }; |
35: I = { `ZERO ,`R99 ,`VOID ,`VOID }; |
36: I = { `RETURN ,`RT_TRUE }; |
|
//---------------------------------------------- |
//TAG_ADRR_MAIN: |
|
37: I = { `CALL ,`ENTRYPOINT_ADRR_BIU ,`VOID ,`VOID }; |
38: I = { `JEQX ,`LABEL_IS_NO_HIT ,`R99 ,`CREG_ZERO }; |
|
//LABEL_IS_HIT: |
39: I = { `CALL ,`ENTRYPOINT_ADRR_TCC ,`VOID ,`VOID }; |
40: I = { `NOP ,`RT_FALSE }; |
41: I = { `RETURN ,`RT_TRUE }; |
42: I = { `NOP ,`RT_FALSE }; |
|
//LABEL_IS_NO_HIT: |
43: I = { `RETURN ,`RT_FALSE }; |
|
|
//---------------------------------------------------------------------- |
//Micro code for CPPU |
//TAG_CPPU_UCODE_ADDRESS: |
|
|
44: I = { `SUB ,`R1 ,`CREG_PROJECTION_WINDOW_MAX ,`CREG_PROJECTION_WINDOW_MIN }; |
45: I = { `DIV ,`CREG_PROJECTION_WINDOW_SCALE ,`R1 ,`CREG_RESOLUTION }; |
46: I = { `RETURN ,`RT_FALSE }; |
|
//---------------------------------------------------------------------- |
//Micro code for RGU |
//TAG_RGU_UCODE_ADDRESS: |
|
|
47: I = { `MUL ,`R1 ,`CREG_PIXEL_2D_POSITION ,`CREG_PROJECTION_WINDOW_SCALE }; |
48: I = { `ADD ,`R1 ,`R1 ,`CREG_PROJECTION_WINDOW_MIN }; |
49: I = { `SUB ,`CREG_UNORMALIZED_DIRECTION ,`R1 ,`CREG_CAMERA_POSITION }; |
50: I = { `MAG ,`R2 ,`CREG_UNORMALIZED_DIRECTION ,`VOID }; |
51: I = { `DIV ,`CREG_RAY_DIRECTION ,`CREG_UNORMALIZED_DIRECTION ,`R2 }; |
52: I = { `DEC ,`CREG_LAST_COL ,`CREG_PIXEL_2D_FINAL_POSITION ,`VOID }; |
53: I = { `SETX ,`CREG_LAST_t ,32'h1F40000 }; |
|
54: I = { `RETURN ,`RT_FALSE }; |
//---------------------------------------------------------------------- |
//Next Pixel generation Code (NPG) |
//TAG_NPG_UCODE_ADDRESS: |
|
55: I = { `ZERO ,`CREG_TEXTURE_COLOR ,`VOID ,`VOID }; |
56: I = { `SETX ,`CREG_TEXTURE_COLOR ,32'h60000 }; |
57: I = { `ADD ,`CREG_CURRENT_OUTPUT_PIXEL ,`CREG_CURRENT_OUTPUT_PIXEL ,`CREG_3 }; |
|
58: I = { `ADD ,`CREG_PIXEL_PITCH ,`CREG_PIXEL_PITCH ,`CREG_3 }; |
59: I = { `COPY ,`OREG_ADDR_O ,`CREG_PIXEL_PITCH ,`VOID }; |
60: I = { `JGEX ,`LABEL_NPG_NEXT_ROW ,`CREG_PIXEL_2D_POSITION ,`CREG_LAST_COL }; |
61: I = { `INCX ,`CREG_PIXEL_2D_POSITION ,`CREG_PIXEL_2D_POSITION ,`VOID }; |
62: I = { `RETURN ,`RT_TRUE }; |
|
//LABEL_NPG_NEXT_ROW: |
63: I = { `SETX ,`CREG_PIXEL_2D_POSITION ,32'h0 }; |
64: I = { `INCY ,`CREG_PIXEL_2D_POSITION ,`CREG_PIXEL_2D_POSITION ,`VOID }; |
65: I = { `JGEY ,`LABEL_ALLDONE ,`CREG_PIXEL_2D_POSITION ,`CREG_PIXEL_2D_FINAL_POSITION }; |
66: I = { `RETURN ,`RT_TRUE }; |
|
//LABEL_ALLDONE: |
67: I = { `NOP ,`VOID ,`VOID }; |
68: I = { `RETURN ,`RT_FALSE }; |
|
//---------------------------------------------------------------------- |
//Micro code for AABBIU |
//TAG_AABBIU_UCODE_ADDRESS: |
|
69: I = { `ZERO ,`R3 ,`VOID ,`VOID }; |
70: I = { `SETX ,`CREG_LAST_t ,32'h1F40000 }; |
71: I = { `RETURN ,`RT_TRUE }; |
|
//LABEL_TEST_RAY_X_ORIGEN: |
72: I = { `JGEX ,`LABEL_ELSE_IFX ,`CREG_CAMERA_POSITION ,`CREG_AABBMIN }; |
73: I = { `SUB ,`R1 ,`CREG_AABBMIN ,`CREG_CAMERA_POSITION }; |
74: I = { `JLEX ,`LABEL1 ,`R1 ,`CREG_UNORMALIZED_DIRECTION }; |
75: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL1: |
76: I = { `SETX ,`RAY_INSIDE_BOX ,32'd0 }; |
77: I = { `DIV ,`R6 ,`R1 ,`CREG_UNORMALIZED_DIRECTION }; |
78: I = { `JMP ,`LABEL_TEST_RAY_Y_ORIGEN ,`VOID ,`VOID }; |
|
//LABEL_ELSE_IFX: |
79: I = { `JLEX ,`LABEL_ELSEX ,`CREG_CAMERA_POSITION ,`CREG_AABBMAX }; |
80: I = { `SUB ,`R1 ,`CREG_AABBMAX ,`CREG_CAMERA_POSITION }; |
81: I = { `JGEX ,`LABEL2 ,`R1 ,`CREG_UNORMALIZED_DIRECTION }; |
82: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL2: |
83: I = { `SETX ,`RAY_INSIDE_BOX ,32'd0 }; |
84: I = { `DIV ,`R6 ,`R1 ,`CREG_UNORMALIZED_DIRECTION }; |
85: I = { `JMP ,`LABEL_TEST_RAY_Y_ORIGEN ,`VOID ,`VOID }; |
//LABEL_ELSEX: |
86: I = { `SETX ,`R5 ,32'b1 }; |
|
//LABEL_TEST_RAY_Y_ORIGEN: |
87: I = { `JGEY ,`LABEL_ELESE_IFY ,`CREG_CAMERA_POSITION ,`CREG_AABBMIN }; |
88: I = { `SUB ,`R1 ,`CREG_AABBMIN ,`CREG_CAMERA_POSITION }; |
89: I = { `JLEY ,`LABEL3 ,`R1 ,`CREG_UNORMALIZED_DIRECTION }; |
90: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL3: |
91: I = { `SETX ,`RAY_INSIDE_BOX ,32'd0 }; |
92: I = { `DIV ,`R6 ,`R1 ,`CREG_UNORMALIZED_DIRECTION }; |
93: I = { `JMP ,`LABEL_TEST_RAY_Z_ORIGEN ,`VOID ,`VOID }; |
|
//LABEL_ELESE_IFY: |
94: I = { `JLEY ,`LABEL_ELSEY ,`CREG_CAMERA_POSITION ,`CREG_AABBMAX }; |
95: I = { `SUB ,`R1 ,`CREG_AABBMAX ,`CREG_CAMERA_POSITION }; |
96: I = { `JGEY ,`LABEL4 ,`R1 ,`CREG_UNORMALIZED_DIRECTION }; |
97: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL4: |
98: I = { `SETX ,`RAY_INSIDE_BOX ,32'd0 }; |
99: I = { `DIV ,`R6 ,`R1 ,`CREG_UNORMALIZED_DIRECTION }; |
100: I = { `JMP ,`LABEL_TEST_RAY_Z_ORIGEN ,`VOID ,`VOID }; |
|
//LABEL_ELSEY: |
101: I = { `SETY ,`R5 ,32'b1 }; |
|
//LABEL_TEST_RAY_Z_ORIGEN: |
102: I = { `JGEZ ,`LABEL_ELESE_IFZ ,`CREG_CAMERA_POSITION ,`CREG_AABBMIN }; |
103: I = { `SUB ,`R1 ,`CREG_AABBMIN ,`CREG_CAMERA_POSITION }; |
104: I = { `JLEZ ,`LABEL5 ,`R1 ,`CREG_UNORMALIZED_DIRECTION }; |
105: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL5: |
106: I = { `SETX ,`RAY_INSIDE_BOX ,32'd0 }; |
107: I = { `DIV ,`R6 ,`R1 ,`CREG_UNORMALIZED_DIRECTION }; |
108: I = { `JMP ,`LABEL_RAY_INSIDE_BOX ,`VOID ,`VOID }; |
|
//LABEL_ELESE_IFZ: |
109: I = { `JLEZ ,`LABEL_ELSEZ ,`CREG_CAMERA_POSITION ,`CREG_AABBMAX }; |
110: I = { `SUB ,`R1 ,`CREG_AABBMAX ,`CREG_CAMERA_POSITION }; |
111: I = { `JGEZ ,`LABEL6 ,`R1 ,`CREG_UNORMALIZED_DIRECTION }; |
112: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL6: |
113: I = { `SETX ,`RAY_INSIDE_BOX ,32'd0 }; |
114: I = { `DIV ,`R6 ,`R1 ,`CREG_UNORMALIZED_DIRECTION }; |
115: I = { `JMP ,`LABEL_RAY_INSIDE_BOX ,`VOID ,`VOID }; |
|
//LABEL_ELSEZ: |
116: I = { `SETZ ,`R5 ,32'b1 }; |
|
//LABEL_RAY_INSIDE_BOX: |
117: I = { `ZERO ,`R1 ,`VOID ,`VOID }; |
118: I = { `JEQX ,`LABEL_TEST_YZ_PLANE ,`R1 ,`RAY_INSIDE_BOX }; |
//BUG need a NOP here else pipeline gets confused |
119: I = { `RETURN ,`RT_TRUE }; |
|
//LABEL_TEST_YZ_PLANE: |
120: I = { `JNEX ,`LABEL_TEST_XZ_PLANE ,`R5 ,`R1 }; |
121: I = { `SWIZZLE3D ,`R6 ,`SWIZZLE_XXX }; |
122: I = { `MUL ,`R2 ,`CREG_UNORMALIZED_DIRECTION ,`R6 }; |
123: I = { `ADD ,`R2 ,`R2 ,`CREG_CAMERA_POSITION }; |
124: I = { `JGEY ,`LABEL7 ,`R2 ,`CREG_AABBMIN }; |
125: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL7: |
126: I = { `JLEY ,`LABEL8 ,`R2 ,`CREG_AABBMAX }; |
127: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL8: |
128: I = { `JGEZ ,`LABEL9 ,`R2 ,`CREG_AABBMIN }; |
129: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL9: |
130: I = { `JLEZ ,`LABEL_TEST_XZ_PLANE ,`R2 ,`CREG_AABBMAX }; |
131: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL_TEST_XZ_PLANE: |
132: I = { `JNEY ,`LABEL_TEST_XY_PLANE ,`R5 ,`R1 }; |
133: I = { `SWIZZLE3D ,`R6 ,`SWIZZLE_YYY }; |
134: I = { `MUL ,`R2 ,`CREG_UNORMALIZED_DIRECTION ,`R6 }; |
135: I = { `ADD ,`R2 ,`R2 ,`CREG_CAMERA_POSITION }; |
136: I = { `JGEX ,`LABEL10 ,`R2 ,`CREG_AABBMIN }; |
137: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL10: |
138: I = { `JLEX ,`LABEL11 ,`R2 ,`CREG_AABBMAX }; |
139: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL11: |
140: I = { `JGEZ ,`LABEL12 ,`R2 ,`CREG_AABBMIN }; |
141: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL12: |
142: I = { `JLEZ ,`LABEL_TEST_XY_PLANE ,`R2 ,`CREG_AABBMAX }; |
143: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL_TEST_XY_PLANE: |
144: I = { `SWIZZLE3D ,`R6 ,`SWIZZLE_ZZZ }; |
145: I = { `MUL ,`R2 ,`CREG_UNORMALIZED_DIRECTION ,`R6 }; |
146: I = { `ADD ,`R2 ,`R2 ,`CREG_CAMERA_POSITION }; |
147: I = { `JGEX ,`LABEL13 ,`R2 ,`CREG_AABBMIN }; |
148: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL13: |
149: I = { `JLEX ,`LABEL14 ,`R2 ,`CREG_AABBMAX }; |
150: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL14: |
151: I = { `JGEY ,`LABEL15 ,`R2 ,`CREG_AABBMIN }; |
152: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL15: |
153: I = { `JLEY ,`LABEL_HIT ,`R2 ,`CREG_AABBMAX }; |
154: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL_HIT: |
155: I = { `SETX ,`CREG_LAST_t ,32'h1F40000 }; |
156: I = { `RETURN ,`RT_TRUE }; |
|
//------------------------------------------------------------------------ |
//BIU Micro code |
//TAG_BIU_UCODE_ADDRESS: |
|
157: I = { `ZERO ,`OREG_PIXEL_COLOR ,`VOID ,`VOID }; |
158: I = { `SETX ,`R3 ,`ONE }; |
159: I = { `SETX ,`R1 ,32'h00000 }; |
160: I = { `SUB ,`CREG_E1 ,`CREG_V1 ,`CREG_V0 }; |
161: I = { `SUB ,`CREG_E2 ,`CREG_V2 ,`CREG_V0 }; |
162: I = { `SUB ,`CREG_T ,`CREG_CAMERA_POSITION ,`CREG_V0 }; |
163: I = { `CROSS ,`CREG_P ,`CREG_RAY_DIRECTION ,`CREG_E2 }; |
164: I = { `CROSS ,`CREG_Q ,`CREG_T ,`CREG_E1 }; |
165: I = { `DOT ,`CREG_H1 ,`CREG_Q ,`CREG_E2 }; |
166: I = { `DOT ,`CREG_H2 ,`CREG_P ,`CREG_T }; |
167: I = { `DOT ,`CREG_H3 ,`CREG_Q ,`CREG_RAY_DIRECTION }; |
168: I = { `DOT ,`CREG_DELTA ,`CREG_P ,`CREG_E1 }; |
169: I = { `DIV ,`CREG_t ,`CREG_H1 ,`CREG_DELTA }; |
170: I = { `DIV ,`CREG_u ,`CREG_H2 ,`CREG_DELTA }; |
171: I = { `DIV ,`CREG_v ,`CREG_H3 ,`CREG_DELTA }; |
172: I = { `JGEX ,`LABEL_BIU1 ,`CREG_u ,`R1 }; |
173: I = { `RET ,`R99 ,`FALSE }; |
|
//LABEL_BIU1: |
174: I = { `JGEX ,`LABEL_BIU2 ,`CREG_v ,`R1 }; |
175: I = { `RET ,`R99 ,`FALSE }; |
|
//LABEL_BIU2: |
176: I = { `ADD ,`R2 ,`CREG_u ,`CREG_v }; |
177: I = { `JLEX ,`LABEL_BIU3 ,`R2 ,`R3 }; |
178: I = { `RET ,`R99 ,`FALSE }; |
|
//LABEL_BIU3: |
179: I = { `JGEX ,`LABEL_BIU4 ,`CREG_t ,`CREG_LAST_t }; |
180: I = { `COPY ,`CREG_LAST_t ,`CREG_t ,`VOID }; |
181: I = { `COPY ,`CREG_LAST_u ,`CREG_u ,`VOID }; |
182: I = { `COPY ,`CREG_LAST_v ,`CREG_v ,`VOID }; |
183: I = { `COPY ,`CREG_E1_LAST ,`CREG_E1 ,`VOID }; |
184: I = { `COPY ,`CREG_E2_LAST ,`CREG_E2 ,`VOID }; |
185: I = { `COPY ,`CREG_UV0_LAST ,`CREG_UV0 ,`VOID }; |
186: I = { `COPY ,`CREG_UV1_LAST ,`CREG_UV1 ,`VOID }; |
187: I = { `COPY ,`CREG_UV2_LAST ,`CREG_UV2 ,`VOID }; |
188: I = { `COPY ,`CREG_TRI_DIFFUSE_LAST ,`CREG_TRI_DIFFUSE ,`VOID }; |
//LABEL_BIU4: |
189: I = { `RET ,`R99 ,`TRUE }; |
|
|
//------------------------------------------------------------------------- |
//Calculate the adress of the texure coordiantes. |
|
//TAG_TCC_UCODE_ADDRESS: |
//Do this calculation only if this triangle is the one closest to the camera |
190: I = { `JGX ,`LABEL_TCC_EXIT ,`CREG_t ,`CREG_LAST_t }; |
|
//First get the UV coodrinates and store in R1 |
//R1x: u_coordinate = U0 + last_u * (U1 - U0) + last_v * (U2 - U0) |
//R1y: v_coordinate = V0 + last_u * (V1 - V0) + last_v * (V2 - V0) |
//R1z: 0 |
|
191: I = { `SUB ,`R1 ,`CREG_UV1_LAST ,`CREG_UV0_LAST }; |
192: I = { `SUB ,`R2 ,`CREG_UV2_LAST ,`CREG_UV0_LAST }; |
193: I = { `MUL ,`R1 ,`CREG_LAST_u ,`R1 }; |
194: I = { `MUL ,`R2 ,`CREG_LAST_v ,`R2 }; |
195: I = { `ADD ,`R1 ,`R1 ,`R2 }; |
196: I = { `ADD ,`R1 ,`R1 ,`CREG_UV0_LAST }; |
|
//R7x : fu = (u_coordinate) * gTexture.mWidth |
//R7y : fv = (v_coordinate) * gTexture.mWidth |
//R7z : 0 |
197: I = { `MUL ,`R7 ,`R1 ,`CREG_TEXTURE_SIZE }; |
|
//R1x: u1 = ((int)fu) % gTexture.mWidth |
//R1y: v1 = ((int)fv) % gTexture.mHeight |
//R1z: 0 |
//R2x: u2 = (u1 + 1 ) % gTexture.mWidth |
//R2y: v2 = (v2 + 1 ) % gTexture.mHeight |
//R2z: 0 |
// Notice MOD2 only operates over |
// numbers that are power of 2 also notice that the |
// textures are assumed to be squares! |
//x % 2^n == x & (2^n - 1). |
|
198: I = { `MOD ,`R1 ,`R7 ,`CREG_TEXTURE_SIZE }; |
199: I = { `INC ,`R2 ,`R1 ,`VOID }; |
200: I = { `MOD ,`R2 ,`R2 ,`CREG_TEXTURE_SIZE }; |
|
//Cool now we should store the values in the appropiate registers |
//OREG_TEX_COORD1.x = u1 + v1 * gTexture.mWidth |
//OREG_TEX_COORD1.y = u2 + v1 * gTexture.mWidth |
//OREG_TEX_COORD1.z = 0 |
//OREG_TEX_COORD2.x = u1 + v2 * gTexture.mWidth |
//OREG_TEX_COORD2.y = u2 + v2 * gTexture.mWidth |
//OREG_TEX_COORD1.z = 0 |
|
//R1= [u1 v1 0] |
//R2= [u2 v2 0] |
|
//R2 = [v2 u2 0] |
201: I = { `SWIZZLE3D ,`R2 ,`SWIZZLE_YXZ }; |
|
//R3 = [v2 v1 0] |
202: I = { `XCHANGEX ,`R3 ,`R1 ,`R2 }; |
|
|
//R4 = [u1 u2 0] |
203: I = { `XCHANGEX ,`R4 ,`R2 ,`R1 }; |
|
//R2 = [v2*H v1*H 0] |
204: I = { `UNSCALE ,`R9 ,`R3 ,`VOID }; |
205: I = { `UNSCALE ,`R8 ,`CREG_TEXTURE_SIZE ,`VOID }; |
206: I = { `IMUL ,`R2 ,`R9 ,`R8 }; |
|
//OREG_TEX_COORD1 = [u1 + v2*H u2 + v1*H 0] |
//R4 = FixedToIinteger(R4) |
207: I = { `UNSCALE ,`R4 ,`R4 ,`VOID }; |
208: I = { `ADD ,`R12 ,`R2 ,`R4 }; |
209: I = { `SETX ,`R5 ,32'h3 }; |
210: I = { `SETY ,`R5 ,32'h3 }; |
211: I = { `SETZ ,`R5 ,32'h3 }; |
//Multiply by 3 (the pitch) |
//IMUL OREG_TEX_COORD1 R12 R5 |
212: I = { `IMUL ,`CREG_TEX_COORD1 ,`R12 ,`R5 }; |
|
//R4 = [u2 u1 0] |
213: I = { `SWIZZLE3D ,`R4 ,`SWIZZLE_YXZ }; |
|
|
//OREG_TEX_COORD2 [u2 + v2*H u1 + v1*H 0] |
214: I = { `ADD ,`R12 ,`R2 ,`R4 }; |
//Multiply by 3 (the pitch) |
//IMUL OREG_TEX_COORD2 R12 R5 |
215: I = { `IMUL ,`CREG_TEX_COORD2 ,`R12 ,`R5 }; |
|
|
//Cool now get the weights |
|
//w1 = (1 - fracu) * (1 - fracv) |
//w2 = fracu * (1 - fracv) |
//w3 = (1 - fracu) * fracv |
//w4 = fracu * fracv |
|
//R4x: fracu |
//R4y: fracv |
//R4z: 0 |
216: I = { `FRAC ,`R4 ,`R7 ,`VOID }; |
|
//R5x: fracv |
//R5y: fracu |
//R5z: 0 |
217: I = { `COPY ,`R5 ,`R4 ,`VOID }; |
218: I = { `SWIZZLE3D ,`R5 ,`SWIZZLE_YXZ }; |
|
|
//R5x: 1 - fracv |
//R5y: 1 - fracu |
//R5y: 1 |
219: I = { `NEG ,`R5 ,`R5 ,`VOID }; |
220: I = { `INC ,`R5 ,`R5 ,`VOID }; |
|
//R5x: 1 - fracv |
//R5y: 1 - fracu |
//R5y: (1 - fracv)(1 - fracu) |
221: I = { `MULP ,`CREG_TEXWEIGHT1 ,`R5 ,`VOID }; |
|
//CREG_TEXWEIGHT1.x = (1 - fracv)(1 - fracu) |
//CREG_TEXWEIGHT1.y = (1 - fracv)(1 - fracu) |
//CREG_TEXWEIGHT1.z = (1 - fracv)(1 - fracu) |
222: I = { `SWIZZLE3D ,`CREG_TEXWEIGHT1 ,`SWIZZLE_ZZZ }; |
|
|
//R6x: w2: fracu * (1 - fracv ) |
//R6y: w3: fracv * (1 - fracu ) |
//R6z: 0 |
223: I = { `MUL ,`R6 ,`R4 ,`R5 }; |
|
//CREG_TEXWEIGHT2.x = fracu * (1 - fracv ) |
//CREG_TEXWEIGHT2.y = fracu * (1 - fracv ) |
//CREG_TEXWEIGHT2.z = fracu * (1 - fracv ) |
224: I = { `COPY ,`CREG_TEXWEIGHT2 ,`R6 ,`VOID }; |
225: I = { `SWIZZLE3D ,`CREG_TEXWEIGHT2 ,`SWIZZLE_XXX }; |
|
//CREG_TEXWEIGHT3.x = fracv * (1 - fracu ) |
//CREG_TEXWEIGHT3.y = fracv * (1 - fracu ) |
//CREG_TEXWEIGHT3.z = fracv * (1 - fracu ) |
226: I = { `COPY ,`CREG_TEXWEIGHT3 ,`R6 ,`VOID }; |
227: I = { `SWIZZLE3D ,`CREG_TEXWEIGHT3 ,`SWIZZLE_YYY }; |
|
|
//R4x: fracu |
//R4y: fracv |
//R4z: fracu * fracv |
228: I = { `MULP ,`R4 ,`R4 ,`VOID }; |
|
//CREG_TEXWEIGHT4.x = fracv * fracu |
//CREG_TEXWEIGHT4.y = fracv * fracu |
//CREG_TEXWEIGHT4.z = fracv * fracu |
229: I = { `COPY ,`CREG_TEXWEIGHT4 ,`R4 ,`VOID }; |
230: I = { `SWIZZLE3D ,`CREG_TEXWEIGHT4 ,`SWIZZLE_ZZZ }; |
|
|
//LABEL_TCC_EXIT: |
231: I = { `RET ,`R99 ,32'h0 }; |
|
|
//------------------------------------------------------------------------- |
//TAG_PSU_UCODE_ADRESS: |
//Pixel Shader #1 |
//This pixel shader has diffuse light but no textures |
|
|
232: I = { `CROSS ,`R1 ,`CREG_E1_LAST ,`CREG_E2_LAST }; |
233: I = { `MAG ,`R2 ,`R1 ,`VOID }; |
234: I = { `DIV ,`R1 ,`R1 ,`R2 }; |
235: I = { `MUL ,`R2 ,`CREG_RAY_DIRECTION ,`CREG_LAST_t }; |
236: I = { `ADD ,`R2 ,`R2 ,`CREG_CAMERA_POSITION }; |
237: I = { `SUB ,`R2 ,`CURRENT_LIGHT_POS ,`R2 }; |
238: I = { `MAG ,`R3 ,`R2 ,`VOID }; |
239: I = { `DIV ,`R2 ,`R2 ,`R3 }; |
240: I = { `DOT ,`R3 ,`R2 ,`R1 }; |
241: I = { `MUL ,`CREG_COLOR_ACC ,`CREG_TRI_DIFFUSE_LAST ,`CURRENT_LIGHT_DIFFUSE }; |
242: I = { `MUL ,`CREG_COLOR_ACC ,`CREG_COLOR_ACC ,`R3 }; |
243: I = { `COPY ,`CREG_TEXTURE_COLOR ,`CREG_COLOR_ACC ,`VOID }; |
244: I = { `NOP ,`RT_FALSE }; |
245: I = { `NOP ,`RT_FALSE }; |
246: I = { `NOP ,`RT_FALSE }; |
247: I = { `RETURN ,`RT_TRUE }; |
|
//------------------------------------------------------------------------- |
//Pixel Shader #2 |
//TAG_PSU_UCODE_ADRESS2: |
//This Pixel Shader has no light but it does texturinng |
//with bi-linear interpolation |
|
|
|
248: I = { `COPY ,`R1 ,`CREG_TEX_COORD1 ,`VOID }; |
249: I = { `COPY ,`R2 ,`CREG_TEX_COORD1 ,`VOID }; |
250: I = { `COPY ,`R3 ,`CREG_TEX_COORD2 ,`VOID }; |
251: I = { `COPY ,`R4 ,`CREG_TEX_COORD2 ,`VOID }; |
|
|
252: I = { `SWIZZLE3D ,`R1 ,`SWIZZLE_XXX }; |
253: I = { `SWIZZLE3D ,`R2 ,`SWIZZLE_YYY }; |
254: I = { `SWIZZLE3D ,`R3 ,`SWIZZLE_XXX }; |
255: I = { `SWIZZLE3D ,`R4 ,`SWIZZLE_YYY }; |
256: I = { `ADD ,`R1 ,`R1 ,`CREG_012 }; |
257: I = { `ADD ,`R2 ,`R2 ,`CREG_012 }; |
258: I = { `ADD ,`R3 ,`R3 ,`CREG_012 }; |
259: I = { `ADD ,`R4 ,`R4 ,`CREG_012 }; |
|
|
260: I = { `TMREAD ,`CREG_TEX_COLOR1 ,`R1 ,`VOID }; |
261: I = { `NOP ,`RT_FALSE }; |
262: I = { `TMREAD ,`CREG_TEX_COLOR2 ,`R2 ,`VOID }; |
263: I = { `NOP ,`RT_FALSE }; |
264: I = { `TMREAD ,`CREG_TEX_COLOR3 ,`R3 ,`VOID }; |
265: I = { `NOP ,`RT_FALSE }; |
266: I = { `TMREAD ,`CREG_TEX_COLOR4 ,`R4 ,`VOID }; |
267: I = { `NOP ,`RT_FALSE }; |
|
|
|
|
//TextureColor.R = c1.R * w1 + c2.R * w2 + c3.R * w3 + c4.R * w4 |
//TextureColor.G = c1.G * w1 + c2.G * w2 + c3.G * w3 + c4.G * w4 |
//TextureColor.B = c1.B * w1 + c2.B * w2 + c3.B * w3 + c4.B * w4 |
|
|
//MUL R1 CREG_TEX_COLOR4 CREG_TEXWEIGHT1 |
//MUL R2 CREG_TEX_COLOR2 CREG_TEXWEIGHT2 |
//MUL R3 CREG_TEX_COLOR1 CREG_TEXWEIGHT3 |
//MUL R4 CREG_TEX_COLOR3 CREG_TEXWEIGHT4 |
|
268: I = { `MUL ,`R1 ,`CREG_TEX_COLOR3 ,`CREG_TEXWEIGHT1 }; |
269: I = { `MUL ,`R2 ,`CREG_TEX_COLOR2 ,`CREG_TEXWEIGHT2 }; |
270: I = { `MUL ,`R3 ,`CREG_TEX_COLOR1 ,`CREG_TEXWEIGHT3 }; |
271: I = { `MUL ,`R4 ,`CREG_TEX_COLOR4 ,`CREG_TEXWEIGHT4 }; |
|
272: I = { `ADD ,`CREG_TEXTURE_COLOR ,`R1 ,`R2 }; |
273: I = { `ADD ,`CREG_TEXTURE_COLOR ,`CREG_TEXTURE_COLOR ,`R3 }; |
274: I = { `ADD ,`CREG_TEXTURE_COLOR ,`CREG_TEXTURE_COLOR ,`R4 }; |
275: I = { `RETURN ,`RT_TRUE }; |
|
|
//------------------------------------------------------------------------- |
//Default User constants |
//TAG_USERCONSTANTS: |
|
276: I = { `NOP ,`RT_FALSE }; |
277: I = { `RETURN ,`RT_TRUE }; |
|
//TAG_PIXELSHADER: |
//Default Pixel Shader (just outputs texture) |
278: I = { `OMWRITE ,`OREG_PIXEL_COLOR ,`CREG_CURRENT_OUTPUT_PIXEL ,`CREG_TEXTURE_COLOR }; |
279: I = { `RETURN ,`RT_TRUE }; |
|
|
//------------------------------------------------------------------------- |
|
|
default: |
begin |
|
`ifdef DEBUG |
$display("%dns CORE %d Error: Reached undefined address in instruction Memory: %d!!!!",$time,iDebug_CoreID,Address); |
// $stop(); |
`endif |
I = {`INSTRUCTION_OP_LENGTH'hFF,16'hFFFF,32'hFFFFFFFF}; |
end |
endcase |
end |
endmodule |
//-------------------------------------------------------- |
/MEM/Unit_MEM.v
0,0 → 1,328
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
/* |
The memory unit has all the memory related modules for THEIA. |
There a 3 memories in the core: |
DMEM: The data memory, it is a R/W dual channel RAM, stores the data locations. |
IMEM: The instruction memory, R/W dual channel RAM, stores user shaders. |
IROM: RO instruction memory, stores default shaders and other internal code. |
I use two ROMs with the same data, so that simulates dual channel. |
This unit also has a Control register. |
*/ |
`define USER_CODE_ENABLED 2 |
//------------------------------------------------------------------- |
module MemoryUnit |
( |
input wire Clock, |
input wire Reset, |
input wire iFlipMemory, |
|
//Data bus for EXE Unit |
input wire iDataWriteEnable_EXE, |
input wire[`DATA_ADDRESS_WIDTH-1:0] iDataReadAddress1_EXE, |
output wire[`DATA_ROW_WIDTH-1:0] oData1_EXE, |
input wire[`DATA_ADDRESS_WIDTH-1:0] iDataReadAddress2_EXE, |
output wire[`DATA_ROW_WIDTH-1:0] oData2_EXE, |
input wire[`DATA_ADDRESS_WIDTH-1:0] iDataWriteAddress_EXE, |
input wire[`DATA_ROW_WIDTH-1:0] iData_EXE, |
|
//Data bus for IO Unit |
input wire iDataWriteEnable_IO, |
input wire[`DATA_ADDRESS_WIDTH-1:0] iDataReadAddress1_IO, |
output wire[`DATA_ROW_WIDTH-1:0] oData1_IO, |
input wire[`DATA_ADDRESS_WIDTH-1:0] iDataReadAddress2_IO, |
output wire[`DATA_ROW_WIDTH-1:0] oData2_IO, |
input wire[`DATA_ADDRESS_WIDTH-1:0] iDataWriteAddress_IO, |
input wire[`DATA_ROW_WIDTH-1:0] iData_IO, |
|
//Instruction bus |
input wire iInstructionWriteEnable, |
input wire [`ROM_ADDRESS_WIDTH-1:0] iInstructionReadAddress1, |
input wire [`ROM_ADDRESS_WIDTH-1:0] iInstructionReadAddress2, |
input wire [`ROM_ADDRESS_WIDTH-1:0] iInstructionWriteAddress, |
input wire [`INSTRUCTION_WIDTH-1:0] iInstruction, |
output wire [`INSTRUCTION_WIDTH-1:0] oInstruction1, |
output wire [`INSTRUCTION_WIDTH-1:0] oInstruction2, |
|
`ifdef DEBUG |
input wire [`MAX_CORES-1:0] iDebug_CoreID, |
`endif |
|
|
//Control Register |
input wire[15:0] iControlRegister, |
output wire[15:0] oControlRegister |
|
|
); |
|
wire [`ROM_ADDRESS_WIDTH-1:0] wROMInstructionAddress,wRAMInstructionAddress; |
wire [`INSTRUCTION_WIDTH-1:0] wIMEM2_IMUX__DataOut1,wIMEM2_IMUX__DataOut2, |
wIROM2_IMUX__DataOut1,wIROM2_IMUX__DataOut2; |
|
|
wire wInstructionSelector,wInstructionSelector2; |
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD1 |
( |
.Clock(Clock), |
.Reset(Reset), |
.Enable( 1'b1 ), |
.D( iInstructionReadAddress1[`ROM_ADDRESS_WIDTH-1] ), |
.Q( wInstructionSelector ) |
); |
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD2 |
( |
.Clock(Clock), |
.Reset(Reset), |
.Enable( 1'b1 ), |
.D( iInstructionReadAddress2[`ROM_ADDRESS_WIDTH-1] ), |
.Q( wInstructionSelector2 ) |
); |
|
assign oInstruction1 = (wInstructionSelector == 1) ? |
wIMEM2_IMUX__DataOut1 : wIROM2_IMUX__DataOut1; |
|
|
assign oInstruction2 = (wInstructionSelector2 == 1) ? |
wIMEM2_IMUX__DataOut2 : wIROM2_IMUX__DataOut2; |
//------------------------------------------------------------------- |
/* |
Data memory. |
*/ |
`define SMEM_START_ADDR `DATA_ADDRESS_WIDTH'd32 |
`define RMEM_START_ADDR `DATA_ADDRESS_WIDTH'd64 |
`define OMEM_START_ADDR `DATA_ADDRESS_WIDTH'd128 |
|
wire wDataWriteEnable_RMEM,wDataWriteEnable_SMEM,wDataWriteEnable_IMEM,wDataWriteEnable_OMEM; |
wire [`DATA_ADDRESS_WIDTH-1:0] wDataWriteAddress_RMEM,wDataWriteAddress_SMEM; |
wire [`DATA_ADDRESS_WIDTH-1:0] wDataReadAddress_RMEM1,wDataReadAddress_RMEM2; |
wire [`DATA_ADDRESS_WIDTH-1:0] wDataReadAddress_SMEM1,wDataReadAddress_SMEM2; |
wire [`DATA_ROW_WIDTH-1:0] wData_SMEM1,wData_SMEM2,wData_RMEM1,wData_RMEM2,wData_IMEM1,wData_IMEM2; |
wire [`DATA_ROW_WIDTH-1:0] wIOData_SMEM1,wIOData_SMEM2,wData_OMEM1,wData_OMEM2; |
/* |
always @ (posedge Clock) |
begin |
if (wDataWriteEnable_OMEM) |
$display("%dns OMEM Writting %h to Addr %d (%h)", |
$time,iData_EXE,iDataWriteAddress_EXE,iDataWriteAddress_EXE); |
|
//if (iDataReadAddress1_IO >= 130) |
//$display("%dns OMEM Readin %h from %d (%h)", |
//$time,wData_OMEM1,iDataReadAddress1_IO,iDataReadAddress1_IO); |
|
end |
*/ |
assign wDataWriteEnable_OMEM = |
(iDataWriteAddress_EXE >= `OMEM_START_ADDR ) |
? iDataWriteEnable_EXE : 1'b0; |
|
assign wDataWriteEnable_IMEM = |
(iDataWriteAddress_IO < `SMEM_START_ADDR ) |
? iDataWriteEnable_IO : 1'b0; |
|
assign wDataWriteEnable_SMEM = |
(iDataWriteAddress_EXE >= `SMEM_START_ADDR && iDataWriteAddress_EXE < `RMEM_START_ADDR) |
? iDataWriteEnable_EXE : 1'b0; |
|
|
assign wDataWriteEnable_RMEM = |
(iDataWriteAddress_EXE >= `RMEM_START_ADDR && iDataWriteAddress_EXE < `OMEM_START_ADDR) |
? iDataWriteEnable_EXE : 1'b0; |
|
|
assign wDataWriteAddress_RMEM = iDataWriteAddress_EXE; |
assign wDataReadAddress_RMEM1 = iDataReadAddress1_EXE; |
assign wDataReadAddress_RMEM2 = iDataReadAddress2_EXE; |
assign wDataWriteAddress_SMEM = iDataWriteAddress_EXE; |
assign wDataReadAddress_SMEM1 = iDataReadAddress1_EXE; |
assign wDataReadAddress_SMEM2 = iDataReadAddress2_EXE; |
|
//assign oData1_EXE = ( iDataReadAddress1_EXE < `RMEM_START_ADDR ) ? wData_SMEM1 : wData_RMEM1; |
assign oData1_EXE = ( iDataReadAddress1_EXE < `RMEM_START_ADDR ) ? |
( ( iDataReadAddress1_EXE < `SMEM_START_ADDR ) ? wData_IMEM1 : wData_SMEM1 ) |
: wData_RMEM1; |
|
//assign oData2_EXE = ( iDataReadAddress2_EXE < `RMEM_START_ADDR ) ? wData_SMEM2 : wData_RMEM2; |
assign oData2_EXE = ( iDataReadAddress2_EXE < `RMEM_START_ADDR ) ? |
( ( iDataReadAddress2_EXE < `SMEM_START_ADDR ) ? wData_IMEM2 : wData_SMEM2 ) |
: wData_RMEM2; |
|
|
assign oData1_IO = ( iDataReadAddress1_IO < `OMEM_START_ADDR ) ? wIOData_SMEM1 : wData_OMEM1; |
assign oData2_IO = ( iDataReadAddress2_IO < `OMEM_START_ADDR ) ? wIOData_SMEM2 : wData_OMEM2; |
|
|
//Output registers written by EXE, Read by IO |
RAM_DUAL_READ_PORT # (`DATA_ROW_WIDTH,`DATA_ADDRESS_WIDTH,512) OMEM |
( |
.Clock( Clock ), |
.iWriteEnable( wDataWriteEnable_OMEM ), |
.iReadAddress0( iDataReadAddress1_IO ), |
.iReadAddress1( iDataReadAddress2_IO ), |
.iWriteAddress( iDataWriteAddress_EXE ), |
.iDataIn( iData_EXE ), |
.oDataOut0( wData_OMEM1 ), |
.oDataOut1( wData_OMEM2 ) |
); |
|
//Input Registers, Written by IO, Read by EXE |
RAM_DUAL_READ_PORT # (`DATA_ROW_WIDTH,`DATA_ADDRESS_WIDTH,42) IMEM |
( |
.Clock( Clock ), |
.iWriteEnable( wDataWriteEnable_IMEM ), |
.iReadAddress0( iDataReadAddress1_EXE ), |
.iReadAddress1( iDataReadAddress2_EXE ), |
.iWriteAddress( iDataWriteAddress_IO ), |
.iDataIn( iData_IO ), |
.oDataOut0( wData_IMEM1 ), |
.oDataOut1( wData_IMEM2 ) |
); |
|
//Swap registers, while IO reads/write values, EXE reads/write values |
//the pointers get filped in the next iteration |
SWAP_MEM # (`DATA_ROW_WIDTH,`DATA_ADDRESS_WIDTH,512) SMEM |
( |
.Clock( Clock ), |
.iSelect( wFlipSelect ), |
|
.iWriteEnableA( wDataWriteEnable_SMEM ), |
.iReadAddressA0( wDataReadAddress_SMEM1 ), |
.iReadAddressA1( wDataReadAddress_SMEM2 ), |
.iWriteAddressA( wDataWriteAddress_SMEM ), |
.iDataInA( iData_EXE ), |
.oDataOutA0( wData_SMEM1 ), |
.oDataOutA1( wData_SMEM2 ), |
|
.iWriteEnableB( iDataWriteEnable_IO ), |
.iReadAddressB0( iDataReadAddress1_IO ), |
.iReadAddressB1( iDataReadAddress2_IO ), |
.iWriteAddressB( iDataWriteAddress_IO ), |
.iDataInB( iData_IO ), |
.oDataOutB0( wIOData_SMEM1 ), |
.oDataOutB1( wIOData_SMEM2 ) |
|
); |
|
//General purpose registers, EXE can R/W, IO can not see these sections |
//of the memory |
RAM_DUAL_READ_PORT # (`DATA_ROW_WIDTH,`DATA_ADDRESS_WIDTH,256) RMEM |
( |
.Clock( Clock ), |
.iWriteEnable( wDataWriteEnable_RMEM ), |
.iReadAddress0( wDataReadAddress_RMEM1 ), |
.iReadAddress1( wDataReadAddress_RMEM2 ), |
.iWriteAddress( wDataWriteAddress_RMEM ), |
.iDataIn( iData_EXE ), |
.oDataOut0( wData_RMEM1 ), |
.oDataOut1( wData_RMEM2 ) |
); |
|
wire wFlipSelect; |
UPCOUNTER_POSEDGE # (1) UPC1 |
( |
.Clock(Clock), |
.Reset( Reset ), |
.Initial(1'b0), |
.Enable(iFlipMemory), |
.Q(wFlipSelect) |
); |
|
|
|
//------------------------------------------------------------------- |
/* |
Instruction memory. |
*/ |
RAM_DUAL_READ_PORT # (`INSTRUCTION_WIDTH,`ROM_ADDRESS_WIDTH,512) INST_MEM |
( |
.Clock( Clock ), |
.iWriteEnable( iInstructionWriteEnable ), |
.iReadAddress0( {1'b0,iInstructionReadAddress1[`ROM_ADDRESS_WIDTH-2:0]} ), |
.iReadAddress1( {1'b0,iInstructionReadAddress2[`ROM_ADDRESS_WIDTH-2:0]} ), |
.iWriteAddress( iInstructionWriteAddress ), |
.iDataIn( iInstruction ), |
.oDataOut0( wIMEM2_IMUX__DataOut1 ), |
.oDataOut1( wIMEM2_IMUX__DataOut2 ) |
|
); |
//------------------------------------------------------------------- |
/* |
Default code stored in ROM. |
*/ |
wire [`INSTRUCTION_WIDTH-1:0] wRomDelay1,wRomDelay2; |
//In real world ROM will take at least 1 clock cycle, |
//since ROMs are not syhtethizable, I won't hurt to put |
//this delay |
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( `INSTRUCTION_WIDTH ) FFDA |
( |
.Clock(Clock), |
.Reset(Reset), |
.Enable(1'b1), |
.D(wRomDelay1), |
.Q(wIROM2_IMUX__DataOut1 ) |
); |
|
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( `INSTRUCTION_WIDTH ) FFDB |
( |
.Clock(Clock), |
.Reset(Reset), |
.Enable(1'b1), |
.D(wRomDelay2), |
.Q(wIROM2_IMUX__DataOut2 ) |
); |
|
//The reason I put two ROMs is because I need to read 2 different Instruction |
//addresses at the same time (branch-taken and branch-not-taken) and not sure |
//hpw to write dual read channel ROM this way... |
|
ROM IROM |
( |
.Address( {1'b0,iInstructionReadAddress1[`ROM_ADDRESS_WIDTH-2:0]} ), |
`ifdef DEBUG |
.iDebug_CoreID(iDebug_CoreID), |
`endif |
.I( wRomDelay1 ) |
); |
|
ROM IROM2 |
( |
.Address( {1'b0,iInstructionReadAddress2[`ROM_ADDRESS_WIDTH-2:0]} ), |
`ifdef DEBUG |
.iDebug_CoreID(iDebug_CoreID), |
`endif |
.I( wRomDelay2 ) |
); |
//-------------------------------------------------------- |
ControlRegister CR |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.iControlRegister( iControlRegister ), |
.oControlRegister( oControlRegister ) |
); |
|
|
endmodule |
//------------------------------------------------------------------- |
/MEM/Module_RAM.v
0,0 → 1,80
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
//-------------------------------------------------------- |
//Dual port RAM. |
|
|
module RAM_DUAL_READ_PORT # ( parameter DATA_WIDTH=`DATA_ROW_WIDTH, parameter ADDR_WIDTH=`DATA_ADDRESS_WIDTH, parameter MEM_SIZE=128 ) |
( |
input wire Clock, |
input wire iWriteEnable, |
input wire[ADDR_WIDTH-1:0] iReadAddress0, |
input wire[ADDR_WIDTH-1:0] iReadAddress1, |
input wire[ADDR_WIDTH-1:0] iWriteAddress, |
input wire[DATA_WIDTH-1:0] iDataIn, |
output reg [DATA_WIDTH-1:0] oDataOut0, |
output reg [DATA_WIDTH-1:0] oDataOut1 |
); |
|
reg [DATA_WIDTH-1:0] Ram [MEM_SIZE:0]; |
|
always @(posedge Clock) |
begin |
|
if (iWriteEnable) |
Ram[iWriteAddress] <= iDataIn; |
|
|
oDataOut0 <= Ram[iReadAddress0]; |
oDataOut1 <= Ram[iReadAddress1]; |
|
end |
endmodule |
//-------------------------------------------------------- |
|
module RAM_SINGLE_READ_PORT # ( parameter DATA_WIDTH=`DATA_ROW_WIDTH, parameter ADDR_WIDTH=`DATA_ADDRESS_WIDTH, parameter MEM_SIZE=128 ) |
( |
input wire Clock, |
input wire iWriteEnable, |
input wire[ADDR_WIDTH-1:0] iReadAddress0, |
input wire[ADDR_WIDTH-1:0] iWriteAddress, |
input wire[DATA_WIDTH-1:0] iDataIn, |
output reg [DATA_WIDTH-1:0] oDataOut0 |
|
); |
|
reg [DATA_WIDTH-1:0] Ram [MEM_SIZE:0]; |
|
always @(posedge Clock) |
begin |
|
if (iWriteEnable) |
Ram[iWriteAddress] <= iDataIn; |
|
|
oDataOut0 <= Ram[iReadAddress0]; |
|
|
end |
endmodule |
|
|
/MEM/Module_SwapMemory.v
0,0 → 1,90
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
|
module SWAP_MEM # ( parameter DATA_WIDTH=`DATA_ROW_WIDTH, parameter ADDR_WIDTH=`DATA_ADDRESS_WIDTH, parameter MEM_SIZE=128 ) |
( |
input wire Clock, |
input wire iSelect, |
input wire iWriteEnableA, |
input wire[ADDR_WIDTH-1:0] iReadAddressA0, |
input wire[ADDR_WIDTH-1:0] iReadAddressA1, |
input wire[ADDR_WIDTH-1:0] iWriteAddressA, |
input wire[DATA_WIDTH-1:0] iDataInA, |
output wire [DATA_WIDTH-1:0] oDataOutA0, |
output wire [DATA_WIDTH-1:0] oDataOutA1, |
|
|
input wire iWriteEnableB, |
input wire[ADDR_WIDTH-1:0] iReadAddressB0, |
input wire[ADDR_WIDTH-1:0] iReadAddressB1, |
input wire[ADDR_WIDTH-1:0] iWriteAddressB, |
input wire[DATA_WIDTH-1:0] iDataInB, |
output wire [DATA_WIDTH-1:0] oDataOutB0, |
output wire [DATA_WIDTH-1:0] oDataOutB1 |
); |
|
|
wire wWriteEnableA; |
wire[ADDR_WIDTH-1:0] wReadAddressA0; |
wire[ADDR_WIDTH-1:0] wReadAddressA1; |
wire[ADDR_WIDTH-1:0] wWriteAddressA; |
wire[DATA_WIDTH-1:0] wDataInA; |
wire [DATA_WIDTH-1:0] wDataOutA0; |
wire [DATA_WIDTH-1:0] wDataOutA1; |
|
wire wWriteEnableB; |
wire[ADDR_WIDTH-1:0] wReadAddressB0; |
wire[ADDR_WIDTH-1:0] wReadAddressB1; |
wire[ADDR_WIDTH-1:0] wWriteAddressB; |
wire[DATA_WIDTH-1:0] wDataInB; |
wire [DATA_WIDTH-1:0] wDataOutB0; |
wire [DATA_WIDTH-1:0] wDataOutB1; |
|
|
assign wWriteEnableA = ( iSelect ) ? iWriteEnableA : iWriteEnableB; |
assign wWriteEnableB = ( ~iSelect ) ? iWriteEnableA : iWriteEnableB; |
|
assign wReadAddressA0 = ( iSelect ) ? iReadAddressA0 : iReadAddressB0; |
assign wReadAddressB0 = ( ~iSelect ) ? iReadAddressA0 : iReadAddressB0; |
|
assign wReadAddressA1 = ( iSelect ) ? iReadAddressA1 : iReadAddressB1; |
assign wReadAddressB1 = ( ~iSelect ) ? iReadAddressA1 : iReadAddressB1; |
|
assign wWriteAddressA = ( iSelect ) ? iWriteAddressA : iWriteAddressB; |
assign wWriteAddressB = ( ~iSelect ) ? iWriteAddressA : iWriteAddressB; |
|
assign wDataInA = ( iSelect ) ? iDataInA : iDataInB; |
assign wDataInB = ( ~iSelect ) ? iDataInA : iDataInB; |
|
assign oDataOutA0 = ( iSelect ) ? wDataOutA0 : wDataOutB0; |
assign oDataOutB0 = ( ~iSelect ) ? wDataOutA0 : wDataOutB0; |
|
assign oDataOutA1 = ( iSelect ) ? wDataOutA1 : wDataOutB1; |
assign oDataOutB1 = ( ~iSelect ) ? wDataOutA1 : wDataOutB1; |
|
RAM_DUAL_READ_PORT # (DATA_WIDTH,ADDR_WIDTH,MEM_SIZE) MEM_A |
( |
.Clock( Clock ), |
.iWriteEnable( wWriteEnableA ), |
.iReadAddress0( wReadAddressA0 ), |
.iReadAddress1( wReadAddressA1 ), |
.iWriteAddress( wWriteAddressA ), |
.iDataIn( wDataInA ), |
.oDataOut0( wDataOutA0 ), |
.oDataOut1( wDataOutA1 ) |
); |
|
|
RAM_DUAL_READ_PORT # (DATA_WIDTH,ADDR_WIDTH,MEM_SIZE) MEM_B |
( |
.Clock( Clock ), |
.iWriteEnable( wWriteEnableB ), |
.iReadAddress0( wReadAddressB0 ), |
.iReadAddress1( wReadAddressB1 ), |
.iWriteAddress( wWriteAddressB ), |
.iDataIn( wDataInB ), |
.oDataOut0( wDataOutB0 ), |
.oDataOut1( wDataOutB1 ) |
); |
|
endmodule |
/MEM/Module_ControlRegister.v
0,0 → 1,28
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
|
//------------------------------------------------------------------- |
module ControlRegister |
( |
input wire Clock, |
input wire Reset, |
input wire[15:0] iControlRegister, |
output wire[15:0] oControlRegister |
); |
|
reg [15:0] rControlRegister; |
|
assign oControlRegister = rControlRegister; |
|
always @ (posedge Clock) |
begin |
if ( Reset ) |
rControlRegister <= 16'b0; |
else |
begin |
rControlRegister <= iControlRegister; |
end |
end |
|
endmodule |
//------------------------------------------------------------------- |
/IO/Unit_IO.v
0,0 → 1,319
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
`define ADR_IMM 1 |
`define ADR_POINTER 0 |
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
//-------------------------------------------------------------------------- |
module IO_Unit |
( |
input wire Clock, |
input wire Reset, |
input wire iEnable, |
input wire [`DATA_ADDRESS_WIDTH-1:0] iDat_O_Pointer, //Pointer to what we want to send via DAT_O |
input wire [`WIDTH-1:0] iAdr_O_Imm, //Value to assign to ADR_O |
input wire [`DATA_ADDRESS_WIDTH-1:0] iAdr_O_Pointer, //Pointer to value to assing to ADR_O |
input wire iAdr_O_Type, //Should we use iAdr_O_Imm or iAdr_O_Pointer |
input wire iAdr_O_Set, //Should we set |
input wire iBusCyc_Type, //Bus cycle type: simple read/write, etc. |
input wire iStore, //Should we store read data into MEM |
input wire [`DATA_ROW_WIDTH-1:0] iReadDataBus, //MEM Data read bus 1 |
input wire [`DATA_ROW_WIDTH-1:0] iReadDataBus2, //MEM Data read bus 2 |
input wire[`DATA_ADDRESS_WIDTH-1:0] iAdr_DataWriteBack, //Where in MEM we want to store DAT_I |
input wire iWriteBack_Set, //We want to set the Write back Address? |
|
|
output wire[`DATA_ADDRESS_WIDTH-1:0] oDataReadAddress, |
output wire[`DATA_ADDRESS_WIDTH-1:0] oDataReadAddress2, |
output wire[`DATA_ADDRESS_WIDTH-1:0] oDataWriteAddress, |
output wire oDataWriteEnable, |
output wire [`DATA_ROW_WIDTH-1:0] oDataBus, |
output wire [`INSTRUCTION_WIDTH-1:0] oInstructionBus, |
output wire oInstructionWriteEnable, |
output wire [`ROM_ADDRESS_WIDTH-1:0] oInstructionWriteAddress, |
inout wire [`WIDTH-1:0] oData, |
output wire oBusy, |
output wire oDone, |
|
|
input wire [`DATA_ROW_WIDTH-1:0] iOMEM_WriteAddress, |
input wire [`DATA_ROW_WIDTH-1:0] iOMEM_WriteData, |
input wire iOMEM_WriteEnable, |
output wire [`WB_WIDTH-1:0] OMEM_DAT_O, |
output wire [`WB_WIDTH-1:0] OMEM_ADR_O, |
output wire OMEM_WE_O, |
|
//Theia specific interfaces |
input wire MST_I, |
//Wish Bone Interfaces |
output wire [31:0] DAT_O, |
input wire [31:0] DAT_I, |
input wire ACK_I, |
output wire ACK_O, |
output wire [31:0] ADR_O, |
output wire [31:0] ADR_I, |
output wire WE_O, |
input wire WE_I, |
output wire STB_O, |
input wire STB_I, |
output wire CYC_O, |
input wire CYC_I, |
input wire [1:0] TGA_I, |
output wire [1:0] TGC_O, |
input wire GNT_I, |
|
|
output wire [`DATA_ROW_WIDTH-1:0] oTMEMReadData, |
input wire iTMEMDataRequest, |
input wire [`DATA_ROW_WIDTH-1:0] iTMEMReadAddress, |
output wire oTMEMDataAvailable, |
|
input wire TMEM_ACK_I, |
input wire [`WB_WIDTH-1:0] TMEM_DAT_I , |
output wire [`WB_WIDTH-1:0] TMEM_ADR_O , |
output wire TMEM_WE_O, |
output wire TMEM_STB_O, |
output wire TMEM_CYC_O, |
input wire TMEM_GNT_I |
); |
|
|
wire [`WIDTH-1:0] wMEMToWBM2__ReadDataElement; |
wire [`WIDTH-1:0] wMEMToWBM2__ReadDataElement2; |
wire wMEMToWBM_2__Enable; |
wire wWBMToMEM2__Done; |
wire wWBM_2_WBMToMEM_DataAvailable; |
wire [`WIDTH-1:0] wWBM_2_WBMToMEM_Data; |
wire [`WIDTH-1:0] wWBS_2__WBMToMEM_Frame; |
wire wWBMToMEM_2_WBM_Enable; |
wire [`WIDTH-1:0] wWBMToMEM_2_WBM_Address; |
wire wWBMToMEM2__oDataWriteEnable; |
wire wAddrerssSelector2_oDataWriteEnable; |
wire [`DATA_ROW_WIDTH-1:0] wWBMToMEM2__oDataBus; |
wire [`DATA_ROW_WIDTH-1:0] wWBSToMEM2__oDataBus; |
wire wAddressSelector_2__SetAddress; |
wire [`WIDTH-1:0] wMEMToWBM_2__Address; |
wire wMEMToWBM_2__Done; |
wire w2WBMToMEM__Enable; |
wire w2WBMToMEM__SetAddress; |
wire wWBS_2__WBSToMEM_FrameAvailable; |
wire[`WIDTH-1:0] wWBS_2__WBMToMEM_Address; |
wire wWBSToMEM2__oDataWriteEnable; |
wire[`DATA_ADDRESS_WIDTH-1:0] wWBSToMEM2__oDataWriteAddress; |
wire[`DATA_ADDRESS_WIDTH-1:0] wWBMToMEM2__oDataWriteAddress; |
|
|
|
//***********new*****************/ |
|
|
Module_OMemInterface OMI |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.iWriteEnable( iOMEM_WriteEnable ), |
.iData( iOMEM_WriteData ), |
.iAddress( iOMEM_WriteAddress ), |
.ADR_O( OMEM_ADR_O ), |
.DAT_O( OMEM_DAT_O ), |
.WE_O( OMEM_WE_O ) |
|
); |
|
Module_TMemInterface TMI |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.iEnable( iTMEMDataRequest ), |
.iAddress( iTMEMReadAddress ), |
.oData( oTMEMReadData ), |
.oDone( oTMEMDataAvailable ), |
|
.ACK_I( TMEM_ACK_I ), |
.GNT_I( TMEM_GNT_I ), |
.DAT_I( TMEM_DAT_I ), |
.ADR_O( TMEM_ADR_O ), |
.WE_O( TMEM_WE_O ), |
.STB_O( TMEM_STB_O ), |
.CYC_O( TMEM_CYC_O ) |
|
|
); |
//***********new*****************/ |
|
assign oBusy = CYC_O; |
wire wReadOperation; |
assign wReadOperation = (iBusCyc_Type == `WB_SIMPLE_WRITE_CYCLE) ? 0 : 1; |
|
|
assign wMEMToWBM_2__Address = ( iAdr_O_Type == `ADR_IMM ) ? iAdr_O_Imm : wMEMToWBM2__ReadDataElement; |
assign w2WBMToMEM__Enable = ( iAdr_O_Type == `ADR_IMM ) ? iEnable : wMEMToWBM_2__Enable; |
//assign oDone = ( (iAdr_O_Type == `ADR_IMM) && !(iBusCyc_Type == `WB_SIMPLE_WRITE_CYCLE) ) |
//? wWBMToMEM2__Done : wMEMToWBM_2__Done; |
|
//TODO: WHEN ADR_POINTER Then Done is not until we got the 3 values from X,Y,Z in iAdr_O_Pointer |
assign oDone = (iBusCyc_Type == `WB_SIMPLE_WRITE_CYCLE || iAdr_O_Type == `ADR_POINTER ) ? wMEMToWBM_2__Done : wWBMToMEM2__Done; |
|
assign oDataWriteEnable = (MST_I == 1'b1) ? wWBSToMEM2__oDataWriteEnable : (wWBMToMEM2__oDataWriteEnable);// ^ wAddrerssSelector2_oDataWriteEnable); |
assign oDataWriteAddress = (MST_I == 1'b1) ? wWBSToMEM2__oDataWriteAddress : wWBMToMEM2__oDataWriteAddress; |
assign oDataBus = (MST_I == 1'b1) ? wWBSToMEM2__oDataBus : wWBMToMEM2__oDataBus; |
|
|
|
|
|
wire [`DATA_ADDRESS_WIDTH-1:0] wMEMToWBM2_WBMToMEM_RAMWriteAddr; |
wire [`DATA_ADDRESS_WIDTH-1:0] w2WBMToMEM_MEMWriteAddress; |
|
assign w2WBMToMEM_MEMWriteAddress = ( iAdr_O_Type == `ADR_IMM) ? iAdr_DataWriteBack : wMEMToWBM2_WBMToMEM_RAMWriteAddr; |
|
wire w2MEMToWBM_BusOperationComplete; |
assign w2MEMToWBM_BusOperationComplete = (iBusCyc_Type == `WB_SIMPLE_WRITE_CYCLE) ? ACK_I : wWBMToMEM2__Done; |
|
|
wire [`DATA_ADDRESS_WIDTH-1:0] w2MEMToWBM_DataPointer; |
assign w2MEMToWBM_DataPointer = (iBusCyc_Type == `WB_SIMPLE_WRITE_CYCLE) ? iDat_O_Pointer : iAdr_O_Pointer; |
|
|
//------------------------------------------------------------------------------ |
MEM2WBMUnitB MEMToWBM |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.iEnable( iEnable & (~iAdr_O_Type | iBusCyc_Type) ), |
.iMEMDataPointer( w2MEMToWBM_DataPointer ), |
.iMEMDataPointer2( iAdr_O_Pointer ), |
.iReadDataBus( iReadDataBus ), //3 Elements comming from DMEM |
.iReadDataBus2( iReadDataBus2 ), |
.oReadDataElement( wMEMToWBM2__ReadDataElement ), //1 out of 3 elements we read |
.oReadDataElement2( wMEMToWBM2__ReadDataElement2 ), //1 out of 3 elements we read |
.oDataReadAddress( oDataReadAddress ), |
.oDataReadAddress2( oDataReadAddress2 ), |
.oDataWriteEnable( wAddrerssSelector2_oDataWriteEnable ), //Always zero |
.oDataAvailable( wMEMToWBM_2__Enable ), //Data from MEM available |
.iRequestNextElement( w2MEMToWBM_BusOperationComplete ), |
.iDataInitialStorageAddress( iAdr_DataWriteBack ), ////######## |
.oDataWriteAddress( wMEMToWBM2_WBMToMEM_RAMWriteAddr ), ////######## |
.oDone( wMEMToWBM_2__Done ) |
); |
//------------------------------------------------------------------------------ |
|
|
|
|
|
wire [`DATA_ADDRESS_WIDTH-1:0] wTemp1; |
assign wWBMToMEM2__oDataWriteAddress = (iAdr_O_Type == `ADR_IMM) ? iAdr_DataWriteBack : wTemp1; |
|
|
|
wire [`WIDTH-1:0] wADR_O_InitialAddress; |
assign wADR_O_InitialAddress = (iBusCyc_Type == `WB_SIMPLE_WRITE_CYCLE) ? wMEMToWBM2__ReadDataElement2 : wMEMToWBM_2__Address; |
wire wIncrement_Address_O; |
assign wIncrement_Address_O = iEnable & ACK_I; |
|
|
|
wire wMEMToWBM2__Done; |
wire wMEMToWBM2__Trigger; |
wire[`WB_WIDTH-1:0] wMEMToWBM_2_Data; |
wire w2MEMToWBM__Trigger; |
wire wWBM2_MEMToWBM_DataWriteDone; |
|
|
wire w2WBM_iEnable; |
|
assign w2WBM_iEnable = (iBusCyc_Type == `WB_SIMPLE_WRITE_CYCLE) ? wMEMToWBM_2__Enable : iEnable; |
|
//------------------------------------------------------------------------------ |
wire wSTB_O; |
|
//If the address is a pointer, we need 1 cycle to read the data back from MEM |
//before we can the set the value into WBM |
wire wAddress_Set_Delayed; |
FFD_POSEDGE_SYNCRONOUS_RESET # (1) FFD32_SetDelay |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable( 1'b1 ), |
.D( iAdr_O_Set ), |
.Q( wAddress_Set_Delayed ) |
|
); |
|
//If the Addr is IMM then just set it whenever iAdr_O_Set is set, but if we have a pointer, then use |
//wAddress_Set_Delayed at the beginning and then wWBMToMEM2__Done |
wire wWBM_iAddress_Set = (iAdr_O_Type == `ADR_POINTER) ? (wAddress_Set_Delayed | wWBMToMEM2__Done) : iAdr_O_Set; |
|
assign STB_O = wSTB_O & ~oDone; |
|
WishBoneMasterUnit WBM |
( |
.CLK_I( Clock ), |
.RST_I( Reset ), |
.DAT_I( DAT_I ), |
.DAT_O( DAT_O ), |
.ACK_I( ACK_I ), |
.ADR_O( ADR_O ), |
.WE_O( WE_O ), |
.STB_O( wSTB_O ), |
.CYC_O( CYC_O ), |
.TGC_O( TGC_O ), |
.GNT_I( GNT_I ), |
|
.iEnable( w2WBM_iEnable ), |
.iBusCyc_Type( iBusCyc_Type ), |
.iAddress_Set( wWBM_iAddress_Set ), |
.iAddress( wADR_O_InitialAddress ), |
.oDataReady( wWBM_2_WBMToMEM_DataAvailable ), |
.iData( wMEMToWBM2__ReadDataElement ), |
.oData( wWBM_2_WBMToMEM_Data ) |
); |
|
//------------------------------------------------------------------------------ |
WishBoneSlaveUnit WBS |
( |
|
.CLK_I( Clock ), |
.RST_I( Reset ), |
.STB_I( STB_I ), |
.WE_I( WE_I ), |
.DAT_I( DAT_I ), |
.ADR_I( ADR_I ), |
.TGA_I( TGA_I ), |
.ACK_O( ACK_O ), |
.CYC_I( CYC_I ), |
.MST_I( MST_I ), |
|
.oDataBus( wWBSToMEM2__oDataBus ), |
.oInstructionBus( oInstructionBus ), |
.oDataWriteAddress( wWBSToMEM2__oDataWriteAddress ), |
.oDataWriteEnable( wWBSToMEM2__oDataWriteEnable ), |
.oInstructionWriteAddress( oInstructionWriteAddress ), |
.oInstructionWriteEnable( oInstructionWriteEnable ) |
|
|
|
); |
//------------------------------------------------------------------------------ |
|
|
endmodule |
//-------------------------------------------------------------------------- |
/IO/Module_OMemInterface.v
0,0 → 1,47
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
|
module Module_OMemInterface |
( |
input wire Clock, |
input wire Reset, |
input wire iWriteEnable, |
input wire [`DATA_ROW_WIDTH-1:0] iData, |
input wire [`DATA_ROW_WIDTH-1:0] iAddress, |
output wire [`WB_WIDTH-1:0] ADR_O, |
output wire[`WB_WIDTH-1:0] DAT_O, |
output wire WE_O |
|
); |
wire [2:0] wCurrentWord; |
assign WE_O = iWriteEnable; |
|
CIRCULAR_SHIFTLEFT_POSEDGE #(3) SHL |
( |
.Clock(Clock), |
.Enable(iWriteEnable), |
.Reset(Reset), |
.Initial(3'b1), |
.O(wCurrentWord) |
|
); |
|
MUXFULLPARALELL_3SEL_WALKINGONE # ( `WB_WIDTH ) MUX1 |
( |
.Sel( wCurrentWord ), |
.I3(iAddress[31:0]), |
.I2(iAddress[63:32]), |
.I1(iAddress[95:64]), |
.O1( ADR_O ) |
); |
|
MUXFULLPARALELL_3SEL_WALKINGONE # ( `WB_WIDTH ) MUX2 |
( |
.Sel( wCurrentWord ), |
.I3(iData[31:0]), |
.I2(iData[63:32]), |
.I1(iData[95:64]), |
.O1( DAT_O ) |
); |
|
endmodule |
/IO/Module_TMemInterface.v
0,0 → 1,109
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
//-------------------------------------------------------------------------- |
module Module_TMemInterface |
( |
input wire Clock, |
input wire Reset, |
input wire iEnable, |
input wire [`DATA_ROW_WIDTH-1:0] iAddress, |
output wire [`DATA_ROW_WIDTH-1:0] oData, |
output wire oDone, |
|
input wire ACK_I, |
input wire GNT_I, |
input wire [`WB_WIDTH-1:0 ] DAT_I, |
|
//WB Output Signals |
output wire [`WB_WIDTH-1:0 ] ADR_O, |
output wire WE_O, |
output wire STB_O, |
output wire CYC_O |
|
|
); |
|
wire [3:0] wCurrentWord; |
wire wDone; |
assign oDone = wDone & iEnable; |
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD_DONE |
( |
.Clock(Clock), |
.Reset(Reset), |
.Enable( 1'b1 ), |
.D(wCurrentWord[3]), |
.Q(wDone) |
); |
|
|
//wire wShiftNow; |
assign WE_O = 1'b0; //we only read |
assign CYC_O = iEnable; |
|
|
|
wire[2:0] wLatchNow; |
FFD_POSEDGE_SYNCRONOUS_RESET # ( 3 ) FFD_LATHCNOW |
( |
.Clock(Clock), |
.Reset(Reset), |
.Enable( 1'b1 ), |
.D(wCurrentWord[2:0]), |
.Q(wLatchNow) |
); |
|
|
|
SHIFTLEFT_POSEDGE #(4) SHL |
( |
.Clock(Clock), |
.Enable(iEnable & GNT_I),//wShiftNow), |
.Reset(Reset | ~iEnable ), |
.Initial(4'b1), |
.O(wCurrentWord) |
|
); |
|
MUXFULLPARALELL_3SEL_WALKINGONE # ( `WB_WIDTH ) MUX1 |
( |
.Sel( wCurrentWord[2:0] ), |
.I3(iAddress[31:0]), |
.I2(iAddress[63:32]), |
.I1(iAddress[95:64]), |
.O1( ADR_O ) |
); |
|
|
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) FFDX |
( |
.Clock(Clock), |
.Reset(Reset), |
.Enable( wLatchNow[0] & GNT_I), |
.D(DAT_I), |
.Q(oData[95:64]) |
); |
|
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) FFDY |
( |
.Clock(Clock), |
.Reset(Reset), |
.Enable( wLatchNow[1] & GNT_I), |
.D(DAT_I), |
.Q(oData[63:32]) |
); |
|
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) FFDZ |
( |
.Clock(Clock), |
.Reset( Reset ), |
.Enable( wLatchNow[2] & GNT_I), |
.D(DAT_I), |
.Q(oData[31:0]) |
); |
|
endmodule |
//-------------------------------------------------------------------------- |
/IO/Module_MEM2WBM.v
0,0 → 1,124
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
|
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
/* |
This unit is used when the External Address that comes into IO is not a immediate value, |
ie. it is a value that we need to read from one of our internal memory locations. |
Since each internal memory locations contains 3 * 32bits slots, ie X,Y and Z parts of the |
memory location, then we make three requests for external data, one for every X Y and Z |
part of our internal registry.So, summarising, each internal memory location, stores 3 |
external memory addresses to request to WBM. Once the 3 data has been read from outside world, |
they will get stored back into 3 consecutive inernal memory addreses starting from |
iDataInitialStorageAddress |
*/ |
//--------------------------------------------------------------------- |
module MEM2WBMUnitB |
( |
input wire Clock, |
input wire Reset, |
input wire iEnable, |
//output reg oSetAddress, |
input wire[`DATA_ADDRESS_WIDTH-1:0] iMEMDataPointer, |
input wire[`DATA_ADDRESS_WIDTH-1:0] iMEMDataPointer2, |
output wire [`WIDTH-1:0] oReadDataElement, |
output wire [`WIDTH-1:0] oReadDataElement2, |
output wire[`DATA_ADDRESS_WIDTH-1:0] oDataReadAddress, //This tells MEM unit from wich address we want to read |
output wire[`DATA_ADDRESS_WIDTH-1:0] oDataReadAddress2, //This tells MEM unit from wich address we want to read |
input wire [`DATA_ROW_WIDTH-1:0] iReadDataBus, //This comes from the MEM unit |
input wire [`DATA_ROW_WIDTH-1:0] iReadDataBus2, //This comes from the MEM unit |
output wire oDataWriteEnable, |
output wire oDataWriteEnable2, |
output wire oDataAvailable, |
input wire iRequestNextElement, |
input wire[`DATA_ADDRESS_WIDTH-1:0] iDataInitialStorageAddress, //Initial address to store data ////######## |
output wire[`DATA_ADDRESS_WIDTH-1:0] oDataWriteAddress, //Were to store the values comming from WBM ////######## |
output wire oDone |
); |
assign oDataWriteEnable2 = 0; |
assign oDataWriteEnable = 0; //We only read. |
wire [3:0] wXYZSelector; |
wire[`WIDTH-1:0] wValueFromBus,wLatchedValue; |
assign oDataReadAddress = iMEMDataPointer; |
assign oDataReadAddress2 = iMEMDataPointer2; |
assign oDone = wXYZSelector[3]; |
|
wire wLacthNow; |
|
wire iRequestNextElement_Delay; |
FFD_POSEDGE_SYNCRONOUS_RESET # (1) FFD32_x |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable( 1'b1 ), |
.D( iRequestNextElement ), |
.Q( iRequestNextElement_Delay ) |
|
); |
|
assign oDataAvailable = iEnable & ~iRequestNextElement_Delay & wLacthNow;// & ~oDone; |
|
FFD_POSEDGE_SYNCRONOUS_RESET # (1) FFD32_EnableDelay |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable( 1'b1 ), |
.D( iEnable ), |
.Q( wLacthNow ) |
|
); |
|
assign oDataWriteAddress = iDataInitialStorageAddress; |
|
|
SHIFTLEFT_POSEDGE #(4) SHL |
( |
.Clock(iRequestNextElement | ~iEnable), |
.Enable(1'b1), |
.Reset(~iEnable | Reset ), |
.Initial(4'b1), |
.O(wXYZSelector) |
|
); |
|
MUXFULLPARALELL_3SEL_WALKINGONE MUXA |
( |
.Sel( wXYZSelector[2:0] ), |
.I2( iReadDataBus[63:32]), |
.I1( iReadDataBus[95:64]), |
.I3( iReadDataBus[31:0] ), |
.O1( oReadDataElement ) |
|
); |
|
|
|
MUXFULLPARALELL_3SEL_WALKINGONE MUXA2 |
( |
.Sel( wXYZSelector[2:0] ), |
.I2( iReadDataBus2[63:32]), |
.I1( iReadDataBus2[95:64]), |
.I3( iReadDataBus2[31:0] ), |
.O1( oReadDataElement2 ) |
); |
|
endmodule |
//--------------------------------------------------------------------- |
/IO/Module_WishBoneMaster.v
0,0 → 1,147
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
/* |
In order to read the geometry, we will behave as a master. |
Performing single Reads Bus cycles should be sufficient. |
Choosing 32 bit for bus width for simplicity. |
*/ |
|
module WishBoneMasterUnit |
( |
//WB Input signals |
input wire CLK_I, |
input wire RST_I, |
input wire ACK_I, |
input wire GNT_I, //granted signal from bus arbiter |
input wire [`WB_WIDTH-1:0 ] DAT_I, |
output wire [`WB_WIDTH-1:0] DAT_O, |
|
|
//WB Output Signals |
output wire [`WB_WIDTH-1:0 ] ADR_O, |
output wire WE_O, |
output wire STB_O, |
output wire CYC_O, |
output wire [1:0] TGC_O, |
|
//Signals from inside the GPU |
input wire iEnable, |
input wire iBusCyc_Type, |
input wire [`WIDTH-1:0 ] iAddress, |
input wire iAddress_Set, |
output wire oDataReady, |
input wire [`WIDTH-1:0 ] iData, |
output wire [`WIDTH-1:0 ] oData |
|
|
); |
wire wReadOperation; |
wire wEnable; |
assign wEnable = iEnable & GNT_I; |
//If CYC_O is 1, it means we are requesting bus ownership |
assign CYC_O = iEnable; |
|
assign wReadOperation = (iBusCyc_Type == `WB_SIMPLE_READ_CYCLE) ? 1 : 0; |
assign WE_O = (iBusCyc_Type == `WB_SIMPLE_WRITE_CYCLE && wEnable) ? 1 : 0; |
|
|
wire wEnable_Delayed; |
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD88 |
( |
.Clock(CLK_I), |
.Reset(RST_I), |
.Enable(1'b1 ), |
.D(wEnable), |
.Q(wEnable_Delayed) |
); |
|
|
|
//We only start Strobbing 1 cycle after iEnable and only |
//if iEnable is 1 and if GNT_I is 1 (meaning we own the bus) |
assign STB_O = wEnable_Delayed & ~ACK_I & wEnable; |
|
|
assign DAT_O = (wReadOperation | ~wEnable ) ? `WB_WIDTH'bz : iData; |
|
wire [`WB_WIDTH-1:0 ] wReadADR_O,wWriteADR_O; |
assign ADR_O = ( wReadOperation ) ? wReadADR_O : wWriteADR_O; |
|
//The ADR_O, it increments with each ACK_I, and it resets |
//to the value iAddress everytime iAddress_Set is 1. |
UPCOUNTER_POSEDGE # (`WIDTH) WBM_O_READ_ADDRESS |
( |
.Clock(CLK_I), |
.Reset( iAddress_Set ), |
.Enable((ACK_I & GNT_I) | iAddress_Set), |
.Initial(iAddress), |
.Q(wReadADR_O) |
); |
wire wDelayWE; |
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD3 |
( |
.Clock(CLK_I), |
.Reset(RST_I), |
.Enable(1'b1), |
.D(WE_O), |
.Q(wDelayWE) |
); |
|
UPCOUNTER_POSEDGE # (`WIDTH) WBM_O_WRITE_ADDRESS |
( |
.Clock(CLK_I), |
.Reset( iAddress_Set ),//RST_I ), |
.Enable( (wDelayWE & ACK_I ) | iAddress_Set), |
.Initial(iAddress),//`WIDTH'b0), |
.Q(wWriteADR_O) |
); |
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) FFD1 |
( |
.Clock(ACK_I), |
.Reset(~wEnable), |
.Enable(wReadOperation ), |
.D(DAT_I), |
.Q(oData) |
); |
|
wire wDelayDataReady; |
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD2 |
( |
.Clock(CLK_I), |
.Reset(~wEnable), |
.Enable(wReadOperation), |
.D(ACK_I), |
.Q(wDelayDataReady) |
); |
/* |
always @ (posedge wDelayDataReady) |
begin |
$display("WBM Got data: %h ",oData); |
$display("oDataReady = %d",oDataReady ); |
end |
*/ |
|
assign oDataReady = wDelayDataReady & wEnable; |
|
endmodule |
|
/IO/Module_WishBoneSlave.v
0,0 → 1,159
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
|
|
|
`define TAG_INSTRUCTION_ADDRESS_TYPE 2'b10 |
`define TAG_DATA_ADDRESS_TYPE 2'b01 |
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
//------------------------------------------------------------------------------ |
module WishBoneSlaveUnit |
( |
//WB Input signals |
input wire CLK_I, |
input wire RST_I, |
input wire STB_I, |
input wire WE_I, |
input wire[`WB_WIDTH-1:0] DAT_I, |
input wire[`WB_WIDTH-1:0] ADR_I, |
input wire [1:0] TGA_I, |
output wire ACK_O, |
input wire MST_I, //Master In! |
input wire CYC_I, |
output wire[`DATA_ADDRESS_WIDTH-1:0] oDataWriteAddress, |
output wire [`DATA_ROW_WIDTH-1:0] oDataBus, |
output wire [`ROM_ADDRESS_WIDTH-1:0] oInstructionWriteAddress, |
output wire [`INSTRUCTION_WIDTH-1:0] oInstructionBus, |
output wire oDataWriteEnable, |
output wire oInstructionWriteEnable |
|
); |
|
FFD_POSEDGE_SYNCRONOUS_RESET # (16) FFADR |
( |
.Clock( CYC_I ), |
.Reset( RST_I ), |
.Enable(1'b1), |
.D( ADR_I[15:0] ), |
.Q( oInstructionWriteAddress ) |
); |
|
assign oDataWriteAddress = oInstructionWriteAddress; |
|
wire[1:0] wTGA_Latched; |
|
FFD_POSEDGE_SYNCRONOUS_RESET # (2) FFADDRTYPE |
( |
.Clock( CYC_I ), |
.Reset( RST_I ), |
.Enable(1'b1), |
.D( TGA_I ), |
.Q( wTGA_Latched ) |
); |
|
|
|
wire Clock,Reset; |
assign Clock = CLK_I; |
assign Reset = RST_I; |
|
|
wire wLatchNow; |
assign wLatchNow = STB_I & WE_I; |
|
//1 Clock cycle after we assert the latch signal |
//then the FF has the data ready to propagate |
wire wDelay; |
FFD_POSEDGE_SYNCRONOUS_RESET # (1) FFOutputDelay |
( |
.Clock( Clock ), |
.Enable( 1'b1 ), |
.Reset( Reset ), |
.D( wLatchNow ), |
.Q( wDelay ) |
); |
|
assign ACK_O = wDelay & STB_I; //make sure we set ACK_O back to zero when STB_I is zero |
|
|
wire [2:0] wXYZSel; |
|
SHIFTLEFT_POSEDGE #(3) SHL |
( |
.Clock(CLK_I), |
.Enable(STB_I & ~ACK_O), |
.Reset(~CYC_I), |
.Initial(3'b1), |
.O(wXYZSel) |
|
); |
|
|
//Flip Flop to Store Vx |
wire [`WIDTH-1:0] wVx; |
FFD_POSEDGE_SYNCRONOUS_RESET # (`WIDTH) FFD32_WBS2MEM_Vx |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable( wXYZSel[0] & STB_I ), |
.D( DAT_I ), |
.Q( wVx ) |
|
); |
|
|
//Flip Flop to Store Vy |
wire [`WIDTH-1:0] wVy; |
FFD_POSEDGE_SYNCRONOUS_RESET # (`WIDTH) FFD32_WBS2MEM_Vy |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable( wXYZSel[1] & STB_I ), |
.D( DAT_I ), |
.Q( wVy ) |
|
); |
|
//Flip Flop to Store Vz |
wire [`WIDTH-1:0] wVz; |
|
FFD_POSEDGE_SYNCRONOUS_RESET # (`WIDTH) FFD32_WBS2MEM_Vz |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable( wXYZSel[2] & STB_I ), |
.D( DAT_I ), |
.Q( wVz ) |
); |
|
assign oDataBus = {wVx,wVy,wVz}; |
assign oInstructionBus = {wVx,wVy}; |
wire wIsInstructionAddress,wIsDataAddress; |
assign wIsInstructionAddress = (wTGA_Latched == `TAG_INSTRUCTION_ADDRESS_TYPE) ? 1'b1 : 1'b0; |
assign wIsDataAddress = (wTGA_Latched == `TAG_DATA_ADDRESS_TYPE ) ? 1'b1 : 1'b0; |
|
assign oDataWriteEnable = (MST_I && !CYC_I && wIsInstructionAddress) ? 1'b1 : 1'b0; |
assign oInstructionWriteEnable = ( MST_I && !CYC_I && wIsDataAddress) ? 1'b1 : 1'b0; |
|
|
|
endmodule |
//------------------------------------------------------------------------------ |
/EXE/Module_ExecutionFSM.v
0,0 → 1,539
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
`define EXEU_AFTER_RESET 0 |
`define EXEU_INITIAL_STATE 1 |
`define EXEU_WAIT_FOR_DECODE 2 |
`define EXEU_FETCH_DECODED_INST 3 |
`define EXEU_WAIT_FOR_ALU_EXECUTION 4 |
`define EXEU_WRITE_BACK_TO_RAM 5 |
`define EXEU_HANDLE_JUMP 7 |
|
|
|
module ExecutionFSM |
( |
input wire Clock, |
input wire Reset, |
|
input wire iDecodeDone, |
input wire[`INSTRUCTION_OP_LENGTH-1:0] iOperation, |
input wire[`DATA_ROW_WIDTH-1:0] iSource0,iSource1, |
input wire[`DATA_ADDRESS_WIDTH-1:0] iDestination, |
inout wire[`DATA_ROW_WIDTH-1:0] RAMBus, |
//output reg ReadyForNextInstruction, |
output wire oJumpFlag , |
output wire [`ROM_ADDRESS_WIDTH-1:0] oJumpIp , |
output wire oRAMWriteEnable , |
output wire [`DATA_ADDRESS_WIDTH-1:0] oRAMWriteAddress , |
output wire oExeLatchedValues, |
output reg oBusy , |
|
//ALU ports and control signals |
output wire [`INSTRUCTION_OP_LENGTH-1:0] oALUOperation, |
output wire [`WIDTH-1:0] oALUChannelX1, |
output wire [`WIDTH-1:0] oALUChannelY1, |
output wire [`WIDTH-1:0] oALUChannelZ1, |
output wire [`WIDTH-1:0] oALUChannelX2, |
output wire [`WIDTH-1:0] oALUChannelY2, |
output wire [`WIDTH-1:0] oALUChannelZ2, |
output wire oTriggerALU, |
|
input wire [`WIDTH-1:0] iALUResultX, |
input wire [`WIDTH-1:0] iALUResultY, |
input wire [`WIDTH-1:0] iALUResultZ, |
input wire iALUOutputReady, |
input wire iBranchTaken, |
input wire iBranchNotTaken, |
|
|
`ifdef DEBUG |
input wire[`ROM_ADDRESS_WIDTH-1:0] iDebug_CurrentIP, |
input wire [`MAX_CORES-1:0] iDebug_CoreID, |
`endif |
//Data forward Signals |
output wire [`DATA_ADDRESS_WIDTH-1:0] oLastDestination |
|
|
); |
|
wire wLatchNow; |
reg rInputLatchesEnabled; |
|
//If ALU says jump, just pass along |
assign oJumpFlag = iBranchTaken; |
//JumpIP is the instruction destination (= oRAMWriteAddress) |
assign oJumpIp = oRAMWriteAddress; |
|
assign wLatchNow = iDecodeDone & rInputLatchesEnabled; |
assign oExeLatchedValues = wLatchNow; |
assign oTriggerALU = wLatchNow; |
|
wire wOperationIsJump; |
assign wOperationIsJump = iBranchTaken || iBranchNotTaken; |
|
//Don't allow me to write back back if the operation is a NOP |
`ifdef DEBUG |
assign oRAMWriteEnable = iALUOutputReady && !wOperationIsJump && |
(oALUOperation != `NOP) && oALUOperation != `DEBUG_PRINT; |
`else |
assign oRAMWriteEnable = iALUOutputReady && !wOperationIsJump && oALUOperation != `NOP; |
`endif |
|
|
assign RAMBus = ( oRAMWriteEnable ) ? {iALUResultX,iALUResultY,iALUResultZ} : `DATA_ROW_WIDTH'bz; |
|
assign oALUChannelX1 = iSource1[95:64]; |
assign oALUChannelY1 = iSource1[63:32]; |
assign oALUChannelZ1 = iSource1[31:0]; |
|
assign oALUChannelX2 = iSource0[95:64]; |
assign oALUChannelY2 = iSource0[63:32]; |
assign oALUChannelZ2 = iSource0[31:0]; |
|
/* |
FF32_POSEDGE_SYNCRONOUS_RESET SourceX1 |
( |
.Clock( wLatchNow ), |
.Clear( Reset ), |
.D( iSource1[95:64] ), |
.Q( oALUChannelX1 ) |
); |
|
FF32_POSEDGE_SYNCRONOUS_RESET SourceY1 |
( |
.Clock( wLatchNow ), |
.Clear( Reset ), |
.D( iSource1[63:32] ), |
.Q( oALUChannelY1 ) |
); |
|
FF32_POSEDGE_SYNCRONOUS_RESET SourceZ1 |
( |
.Clock( wLatchNow ), |
.Clear( Reset ), |
.D( iSource1[31:0] ), |
.Q( oALUChannelZ1 ) |
); |
*/ |
/* |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceX1 |
( |
.Clock( Clock ),//wLatchNow ), |
.Reset( Reset), |
.Enable( wLatchNow ),//1'b1 ), |
.D( iSource1[95:64] ), |
.Q(oALUChannelX1) |
); |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceY1 |
( |
.Clock( Clock ),//wLatchNow ), |
.Reset( Reset), |
.Enable( wLatchNow ),//1'b1 ), |
.D( iSource1[63:32] ), |
.Q(oALUChannelY1) |
); |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceZ1 |
( |
.Clock( Clock ),//wLatchNow ), |
.Reset( Reset), |
.Enable( wLatchNow ),//1'b1 ), |
.D( iSource1[31:0] ), |
.Q(oALUChannelZ1) |
); |
*/ |
/* |
FF32_POSEDGE_SYNCRONOUS_RESET SourceX2 |
( |
.Clock( wLatchNow ), |
.Clear( Reset ), |
.D( iSource0[95:64] ), |
.Q( oALUChannelX2 ) |
); |
|
FF32_POSEDGE_SYNCRONOUS_RESET SourceY2 |
( |
.Clock( wLatchNow ), |
.Clear( Reset ), |
.D( iSource0[63:32] ), |
.Q( oALUChannelY2 ) |
); |
|
FF32_POSEDGE_SYNCRONOUS_RESET SourceZ2 |
( |
.Clock( wLatchNow ), |
.Clear( Reset ), |
.D( iSource0[31:0] ), |
.Q( oALUChannelZ2 ) |
); |
*/ |
/* |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceX2 |
( |
.Clock( Clock ),//wLatchNow ), |
.Reset( Reset), |
.Enable( wLatchNow ),//1'b1 ), |
.D( iSource0[95:64] ), |
.Q(oALUChannelX2) |
); |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceY2 |
( |
.Clock( Clock ),//wLatchNow ), |
.Reset( Reset), |
.Enable( wLatchNow ),//1'b1 ), |
.D( iSource0[63:32] ), |
.Q(oALUChannelY2) |
); |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceZ2 |
( |
.Clock( Clock ),//wLatchNow ), |
.Reset( Reset), |
.Enable( wLatchNow ),//1'b1 ), |
.D( iSource0[31:0] ), |
.Q(oALUChannelZ2) |
); |
*/ |
//Finally one more latch to store |
//the iOperation and the destination |
|
|
assign oALUOperation = iOperation; |
//assign oRAMWriteAddress = iDestination; |
/* |
FF_OPCODE_POSEDGE_SYNCRONOUS_RESET FFOperation |
( |
.Clock( wLatchNow ), |
.Clear( Reset ), |
.D( iOperation ), |
.Q( oALUOperation ) |
|
); |
|
|
FF16_POSEDGE_SYNCRONOUS_RESET PSRegDestination |
( |
.Clock( wLatchNow ), |
.Clear( Reset ), |
.D( iDestination ), |
.Q( oRAMWriteAddress ) |
|
); |
*/ |
/* |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `INSTRUCTION_OP_LENGTH ) FFOperation |
( |
.Clock( Clock ),//wLatchNow ), |
.Reset( Reset), |
.Enable( wLatchNow ),//1'b1 ), |
.D( iOperation ), |
.Q(oALUOperation) |
); |
*/ |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `DATA_ADDRESS_WIDTH ) PSRegDestination |
( |
.Clock( Clock ),//wLatchNow ), |
.Reset( Reset), |
.Enable( wLatchNow ),//1'b1 ), |
.D( iDestination ), |
.Q(oRAMWriteAddress) |
); |
|
//Data forwarding |
assign oLastDestination = oRAMWriteAddress; |
|
reg [7:0] CurrentState; |
reg [7:0] NextState; |
|
|
//------------------------------------------------ |
always @(posedge Clock or posedge Reset) |
begin |
|
|
|
if (Reset) |
CurrentState <= `EXEU_AFTER_RESET; |
else |
CurrentState <= NextState; |
|
end |
//------------------------------------------------ |
|
|
always @( * ) |
begin |
case (CurrentState) |
//------------------------------------------ |
`EXEU_AFTER_RESET: |
begin |
//ReadyForNextInstruction <= 1; |
oBusy <= 0; |
rInputLatchesEnabled <= 1; |
|
|
NextState <= `EXEU_WAIT_FOR_DECODE; |
end |
//------------------------------------------ |
/** |
At the same time iDecodeDone goes to 1, our Flops |
will store the value, so next clock cycle we can |
tell IDU to go ahead and decode the next instruction |
in the pipeline. |
*/ |
`EXEU_WAIT_FOR_DECODE: |
begin |
|
|
//ReadyForNextInstruction <= 1; |
oBusy <= 0; |
rInputLatchesEnabled <= 1; |
|
|
if ( iDecodeDone ) //This same thing triggers the ALU |
NextState <= `EXEU_WAIT_FOR_ALU_EXECUTION; |
else |
NextState <= `EXEU_WAIT_FOR_DECODE; |
end |
//------------------------------------------ |
/* |
If the instruction is aritmetic then pass the parameters |
the ALU, else if it store iOperation then... |
*/ |
`EXEU_WAIT_FOR_ALU_EXECUTION: |
begin |
|
//ReadyForNextInstruction <= 0; //* |
oBusy <= 1; |
rInputLatchesEnabled <= 0; //NO INTERRUPTIONS WHILE WE WAIT!! |
|
|
|
if ( iALUOutputReady ) /////This same thing enables writing th results to RAM |
NextState <= `EXEU_WAIT_FOR_DECODE; |
else |
NextState <= `EXEU_WAIT_FOR_ALU_EXECUTION; |
end |
//------------------------------------------ |
`EXEU_WRITE_BACK_TO_RAM: |
begin |
|
//ReadyForNextInstruction <= 0; |
oBusy <= 1; |
rInputLatchesEnabled <= 1; |
|
if ( iDecodeDone ) |
NextState <= `EXEU_WAIT_FOR_ALU_EXECUTION; |
else |
NextState <= `EXEU_WAIT_FOR_DECODE; |
|
end |
|
//------------------------------------------ |
default: |
begin |
|
//ReadyForNextInstruction <= 1; |
oBusy <= 0; |
rInputLatchesEnabled <= 1; |
|
NextState <= `EXEU_AFTER_RESET; |
end |
//------------------------------------------ |
endcase |
end |
|
//----------------------------------------------------------------------- |
`ifdef DUMP_CODE |
integer ucode_file; |
integer reg_log; |
initial |
begin |
|
$display("Opening ucode dump file....\n"); |
ucode_file = $fopen("Code.log","w"); |
$fwrite(ucode_file,"\n\n************ Theia UCODE DUMP *******\n\n\n\n"); |
$display("Opening Register lof file...\n"); |
reg_log = $fopen("Registers.log","w"); |
|
end |
|
`endif //Ucode dump |
|
//----------------------------------------------------------------------- |
`ifdef DEBUG |
wire [`WIDTH-1:0] wALUChannelX1,wALUChannelY1,wALUChannelZ1; |
wire [`WIDTH-1:0] wALUChannelX2,wALUChannelY2,wALUChannelZ2; |
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceX1 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( wLatchNow ), |
.D( iSource1[95:64] ), |
.Q(wALUChannelX1) |
); |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceY1 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( wLatchNow ), |
.D( iSource1[63:32] ), |
.Q(wALUChannelY1) |
); |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceZ1 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( wLatchNow ), |
.D( iSource1[31:0] ), |
.Q(wALUChannelZ1) |
); |
|
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceX2 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( wLatchNow ), |
.D( iSource0[95:64] ), |
.Q(wALUChannelX2) |
); |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceY2 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( wLatchNow ), |
.D( iSource0[63:32] ), |
.Q(wALUChannelY2) |
); |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceZ2 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( wLatchNow ), |
.D( iSource0[31:0] ), |
.Q(wALUChannelZ2) |
); |
|
|
always @ (posedge iDecodeDone && iDebug_CoreID == `DEBUG_CORE) |
begin |
`LOGME"[CORE %d] IP:%d", iDebug_CoreID,iDebug_CurrentIP); |
end |
|
always @ (negedge Clock && iDebug_CoreID == `DEBUG_CORE) |
begin |
if ( iALUOutputReady ) |
begin |
|
|
if (iBranchTaken) |
`LOGME"<BT>"); |
|
if (iBranchNotTaken ) |
`LOGME"<BNT>"); |
|
if (oRAMWriteEnable) |
`LOGME"<WE>"); |
|
`LOGME "(%dns ",$time); |
case ( oALUOperation ) |
`RETURN: `LOGME"RETURN"); |
`ADD: `LOGME"ADD"); |
`SUB: `LOGME"SUB"); |
`DIV: `LOGME"DIV"); |
`MUL: `LOGME"MUL"); |
`MAG: `LOGME"MAG"); |
`JGX: `LOGME"JGX"); |
`JLX: `LOGME"JLX"); |
`JGEX: `LOGME"JGEX"); |
`JGEY: `LOGME"JGEY"); |
`JGEZ: `LOGME"JGEZ"); |
`JLEX: `LOGME"JLEX"); |
`JLEY: `LOGME"JLEY"); |
`JLEZ: `LOGME"JLEZ"); |
`JMP: `LOGME"JMP"); |
`ZERO: `LOGME"ZERO"); |
`JNEX: `LOGME"JNEX"); |
`JNEY: `LOGME"JNEY"); |
`JNEZ: `LOGME"JNEZ"); |
`JEQX: `LOGME"JEQX"); |
`JEQY: `LOGME"JEQY"); |
`JEQZ: `LOGME"JEQZ"); |
`CROSS: `LOGME"CROSS"); |
`DOT: `LOGME"DOT"); |
`SETX: `LOGME"SETX"); |
`SETY: `LOGME"SETY"); |
`SETZ: `LOGME"SETZ"); |
`NOP: `LOGME"NOP"); |
`COPY: `LOGME"COPY"); |
`INC: `LOGME"INC"); |
`DEC: `LOGME"DEC"); |
`MOD: `LOGME"MOD"); |
`FRAC: `LOGME"FRAC"); |
`NEG: `LOGME"NEG"); |
`SWIZZLE3D: `LOGME"SWIZZLE3D"); |
`MULP: `LOGME"MULP"); |
`XCHANGEX: `LOGME"XCHANGEX"); |
`IMUL: `LOGME"IMUL"); |
`UNSCALE: `LOGME"UNSCALE"); |
`INCX: `LOGME"INCX"); |
`INCY: `LOGME"INCY"); |
`INCZ: `LOGME"INCZ"); |
`OMWRITE: `LOGME"OMWRITE"); |
`TMREAD: `LOGME"TMREAD"); |
`LEA: `LOGME"LEA"); |
`CALL: `LOGME"CALL"); |
`RET: `LOGME"RET"); |
`DEBUG_PRINT: |
begin |
`LOGME"DEBUG_PRINT"); |
|
end |
default: |
begin |
`LOGME"**********ERROR UNKNOWN OP*********"); |
$display("%dns EXE: Error Unknown Instruction : %d", $time,oALUOperation); |
// $stop(); |
end |
endcase |
|
`LOGME"\t %h [ %h %h %h ][ %h %h %h ] = ", |
oRAMWriteAddress, |
wALUChannelX1,wALUChannelY1,wALUChannelZ1, |
wALUChannelX2,wALUChannelY2,wALUChannelZ2 |
|
); |
|
if (oALUOperation == `RETURN) |
`LOGME"\n\n\n"); |
|
end |
end //always |
|
always @ ( negedge Clock && iDebug_CoreID == `DEBUG_CORE ) |
begin |
if ( iALUOutputReady ) |
`LOGME" [ %h %h %h ])\n",iALUResultX,iALUResultY,iALUResultZ); |
end //always |
`endif |
|
endmodule |
/EXE/Module_InstructionFetch.v
0,0 → 1,215
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
/********************************************************************************** |
Description: |
This is the instruction fetch unit. |
It gets the next instruction from the IMEM module at the MEM unit. |
It increments the instruction pointer (IP) in such a way that EXE has always |
one instruction per clock cycle (best pipeline performance). In order to achieve this, |
IFU has 2 instruction pointers, so that in case of 'branch' instructions, |
two instructions pointer are generated and two different instructions are simultaneously |
fetched from IMEM: the branch-taken and branch-not-taken instructions, so that once the |
branch outcome is calculted in EXE, both possible outcomes are already pre-fetched. |
**********************************************************************************/ |
module InstructionFetch |
( |
input wire Clock, |
input wire Reset, |
input wire iTrigger, |
input wire[`ROM_ADDRESS_WIDTH-1:0] iInitialCodeAddress, |
input wire[`INSTRUCTION_WIDTH-1:0] iInstruction1, //Branch not taken instruction |
input wire[`INSTRUCTION_WIDTH-1:0] iInstruction2, //Branch taken instruction |
input wire iBranchTaken, |
output wire oInstructionAvalable, |
output wire [`ROM_ADDRESS_WIDTH-1:0] oIP, |
output wire [`ROM_ADDRESS_WIDTH-1:0] oIP2, //calcule both decide later |
output wire[`INSTRUCTION_WIDTH-1:0] oCurrentInstruction, |
input wire iEXEDone, |
output wire oMicroCodeReturnValue, |
input wire iSubroutineReturn, |
//input wire [`ROM_ADDRESS_WIDTH-1:0] iReturnAddress, |
output wire oExecutionDone |
); |
`define INSTRUCTION_OPCODE oCurrentInstruction[`INSTRUCTION_WIDTH-1:`INSTRUCTION_WIDTH-`INSTRUCTION_OP_LENGTH] |
|
|
assign oMicroCodeReturnValue = oCurrentInstruction[0]; |
assign oIP2 = oCurrentInstruction[47:32]; |
|
wire wTriggerDelay1,wTriggerDelay2,wIncrementIP_Delay1,wIncrementIP_Delay2, |
wLastInst_Delay1,wLastInst_Delay2; |
wire wIncrementIP,wLastInstruction; |
wire wInstructionAvalable,wSubReturnDelay1,wSubReturnDelay2; |
|
assign wLastInstruction = (`INSTRUCTION_OPCODE == `RETURN ); |
|
wire IsCall; |
reg [`ROM_ADDRESS_WIDTH-1:0] rReturnAddress; |
assign IsCall = ( `INSTRUCTION_OPCODE == `CALL ) ? 1'b1 : 1'b0; |
always @ (posedge IsCall) |
rReturnAddress <= oIP+1; |
|
//Increment IP 2 cycles after trigger or everytime EXE is done, or 2 cycles after return from sub, but stop if we get to the RETURN |
assign wIncrementIP = wTriggerDelay2 | (iEXEDone & ~wLastInstruction) | wSubReturnDelay2; |
//It takes 1 clock cycle to read the instruction back from IMEM |
|
|
//Instructions become available to IDU: |
//* 2 cycles after IFU is initially triggered |
//* Everytime previous instruction execution is complete except for the last instruction in |
//the flow |
assign wInstructionAvalable = wTriggerDelay2 | (iEXEDone & ~wLastInst_Delay2); |
|
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD22 |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable(1'b1), |
.D( iSubroutineReturn ), |
.Q( wSubReturnDelay1 ) |
); |
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD23 |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable(1'b1), |
.D( wSubReturnDelay1 ), |
.Q( wSubReturnDelay2 ) |
); |
//Special case for instruction available pin: if a return from subroutine instruction was issued, |
//then wait 1 cycle before anouncing Instruction available to IDU |
assign oInstructionAvalable = wInstructionAvalable & ~iSubroutineReturn | wSubReturnDelay2; |
|
|
|
|
|
//Once we reach the last instruction, wait until EXE says he is done, then assert oExecutionDone |
assign oExecutionDone = (wLastInstruction & iEXEDone); |
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD2 |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable(1'b1), |
.D( iTrigger ), |
.Q( wTriggerDelay1 ) |
); |
|
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD3 |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable(1'b1), |
.D( wTriggerDelay1 ), |
.Q( wTriggerDelay2 ) |
); |
|
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD4 |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable(wLastInstruction), |
.D( oInstructionAvalable ), |
.Q( wLastInst_Delay1 ) |
); |
|
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD5 |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable(1'b1),//wLastInstruction), |
.D( wLastInst_Delay1 ), |
.Q( wLastInst_Delay2 ) |
); |
|
wire [`ROM_ADDRESS_WIDTH-1:0] oIP2_Next; |
|
/* |
In case the branch is taken: |
We point current instruction into the iInstruction2 (branch-taken) instruction |
that corresponds to oIP2. |
Then, in the next clock cycle we should use the oIP2 incremented by one, |
so we need to load UPCOUNTER_POSEDGE with oIP2+1 |
*/ |
|
|
//If the branch was taken, then use the pre-fetched instruction (iInstruction2) |
wire[`INSTRUCTION_WIDTH-1:0] wCurrentInstruction_Delay1,wCurrentInstruction_BranchTaken; |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `INSTRUCTION_WIDTH ) FFDX |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable(iBranchTaken), |
.D( oCurrentInstruction ), |
.Q( wCurrentInstruction_Delay1 ) |
); |
|
wire wBranchTaken_Delay1; |
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFDY |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable(1'b1), |
.D( iBranchTaken ), |
.Q( wBranchTaken_Delay1 ) |
); |
|
|
assign wCurrentInstruction_BranchTaken = ( iBranchTaken & ~iSubroutineReturn) ? iInstruction2 : iInstruction1; |
|
assign oCurrentInstruction = (wBranchTaken_Delay1 ) ? |
wCurrentInstruction_Delay1 : wCurrentInstruction_BranchTaken; |
|
INCREMENT # (`ROM_ADDRESS_WIDTH) INC1 |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.A( oIP2 ), |
.R( oIP2_Next ) |
); |
|
wire[`ROM_ADDRESS_WIDTH-1:0] wIPEntryPoint; |
//assign wIPEntryPoint = (iBranchTaken) ? oIP2_Next : iInitialCodeAddress; |
|
//iReturnAddress is a register stored @ IDU everytime a CALL instruction is decoded |
assign wIPEntryPoint = (iBranchTaken & ~wBranchTaken_Delay1) ? (iSubroutineReturn) ? rReturnAddress : oIP2_Next : iInitialCodeAddress; |
|
|
UPCOUNTER_POSEDGE # (`ROM_ADDRESS_WIDTH) InstructionPointer |
( |
.Clock( Clock ), |
.Reset(iTrigger | (iBranchTaken & ~wBranchTaken_Delay1)), |
.Enable(wIncrementIP & (~iBranchTaken | wBranchTaken_Delay1 ) ), |
.Initial( wIPEntryPoint ), |
.Q(oIP) |
); |
|
|
endmodule |
|
//------------------------------------------------------------------------------- |
/EXE/Unit_EXE.v
0,0 → 1,275
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
|
//--------------------------------------------------------------------- |
module ExecutionUnit |
( |
|
input wire Clock, |
input wire Reset, |
input wire [`ROM_ADDRESS_WIDTH-1:0] iInitialCodeAddress, |
input wire [`INSTRUCTION_WIDTH-1:0] iInstruction1, |
input wire [`INSTRUCTION_WIDTH-1:0] iInstruction2, |
|
|
input wire [`DATA_ROW_WIDTH-1:0] iDataRead0, |
input wire [`DATA_ROW_WIDTH-1:0] iDataRead1, |
input wire iTrigger, |
|
|
output wire [`ROM_ADDRESS_WIDTH-1:0] oInstructionPointer1, |
output wire [`ROM_ADDRESS_WIDTH-1:0] oInstructionPointer2, |
output wire [`DATA_ADDRESS_WIDTH-1:0] oDataReadAddress0, |
output wire [`DATA_ADDRESS_WIDTH-1:0] oDataReadAddress1, |
output wire oDataWriteEnable, |
output wire [`DATA_ADDRESS_WIDTH-1:0] oDataWriteAddress, |
output wire [`DATA_ROW_WIDTH-1:0] oDataBus, |
output wire oReturnCode, |
|
|
output wire [`DATA_ROW_WIDTH-1:0] oOMEMWriteAddress, |
output wire [`DATA_ROW_WIDTH-1:0] oOMEMWriteData, |
output wire oOMEMWriteEnable, |
output wire [`DATA_ROW_WIDTH-1:0] oTMEMReadAddress, |
input wire [`DATA_ROW_WIDTH-1:0] iTMEMReadData, |
input wire iTMEMDataAvailable, |
output wire oTMEMDataRequest, |
|
`ifdef DEBUG |
input wire [`MAX_CORES-1:0] iDebug_CoreID, |
`endif |
output wire oDone |
|
|
|
|
); |
|
|
`ifdef DEBUG |
wire [`ROM_ADDRESS_WIDTH-1:0] wDEBUG_IDU2_EXE_InstructionPointer; |
`endif |
|
wire wEXE2__uCodeDone; |
wire wEXE2_IFU__EXEBusy; |
wire [`DATA_ADDRESS_WIDTH-1:0] wEXE2_IDU_DataFordward_LastDestination; |
wire wALU2_EXE__BranchTaken; |
wire wALU2_IFU_BranchNotTaken; |
wire [`INSTRUCTION_WIDTH-1:0] CurrentInstruction; |
//wire wIDU2_IFU__IDUBusy; |
|
|
wire [`INSTRUCTION_OP_LENGTH-1:0] wOperation; |
|
|
wire [`DATA_ROW_WIDTH-1:0] wSource0,wSource1; |
wire [`DATA_ADDRESS_WIDTH-1:0] wDestination; |
wire wInstructionAvailable; |
|
//ALU wires |
wire [`INSTRUCTION_OP_LENGTH-1:0] ALU2Operation; |
wire [`WIDTH-1:0] ALU2ChannelA; |
wire [`WIDTH-1:0] ALU2ChannelB; |
wire [`WIDTH-1:0] ALU2ChannelC; |
wire [`WIDTH-1:0] ALU2ChannelD; |
wire [`WIDTH-1:0] ALU2ChannelE; |
wire [`WIDTH-1:0] ALU2ChannelF; |
wire [`WIDTH-1:0] ALU2ResultA; |
wire [`WIDTH-1:0] ALU2ResultB; |
wire [`WIDTH-1:0] ALU2ResultC; |
wire wEXE2_ALU__TriggerALU; |
wire ALU2OutputReady; |
wire w2FIU__BranchTaken; |
wire [`ROM_ADDRESS_WIDTH-1:0] JumpIp; |
wire [`ROM_ADDRESS_WIDTH-1:0] wIDU2_IFU_ReturnAddress; |
wire wALU2_IFU_ReturnFromSub; |
|
//wire wIDU2_IFU__InputsLatched; |
|
wire wEPU_Busy,wTriggerIFU; |
wire [`ROM_ADDRESS_WIDTH-1:0] wEPU_IP,wIFU_IP,wCodeEntryPoint; |
|
assign oInstructionPointer1 = (wEPU_Busy) ? wEPU_IP : wIFU_IP; |
|
|
InstructionEntryPoint EPU |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.iTrigger( iTrigger ), |
.iInitialCodeAddress( iInitialCodeAddress ), |
.iIMemInput(iInstruction1), |
|
.oEPU_Busy(wEPU_Busy), |
.oEntryPoint( wCodeEntryPoint ), |
.oTriggerIFU( wTriggerIFU ), |
.oInstructionAddr( wEPU_IP ) |
|
); |
|
InstructionFetch IFU |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.iTrigger( wTriggerIFU ), |
.iInstruction1( iInstruction1 ), |
.iInstruction2( iInstruction2 ), |
.iInitialCodeAddress( wCodeEntryPoint ), |
.iBranchTaken( w2FIU__BranchTaken ), |
.iSubroutineReturn( wALU2_IFU_ReturnFromSub ), |
//.iReturnAddress( wIDU2_IFU_ReturnAddress ), |
.oCurrentInstruction( CurrentInstruction ), |
.oInstructionAvalable( wInstructionAvailable ), |
.oIP( wIFU_IP ), |
.oIP2( oInstructionPointer2 ), |
.iEXEDone( ALU2OutputReady ), |
.oMicroCodeReturnValue( oReturnCode ), |
.oExecutionDone( oDone ) |
); |
|
////--------------------------------------------------------- |
wire wIDU2_EXE_DataReady; |
wire wEXE2_IDU_ExeLatchedValues; |
|
InstructionDecode IDU |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.iEncodedInstruction( CurrentInstruction ), |
.iInstructionAvailable( wInstructionAvailable ), |
//.iIP( oInstructionPointer1 ), |
//.oReturnAddress( wIDU2_IFU_ReturnAddress ), |
|
.oRamAddress0( oDataReadAddress0 ), |
.oRamAddress1( oDataReadAddress1 ), |
.iRamValue0( iDataRead0 ), |
.iRamValue1( iDataRead1 ), |
|
.iLastDestination( wEXE2_IDU_DataFordward_LastDestination ), |
.iDataForward( {ALU2ResultA,ALU2ResultB,ALU2ResultC} ), |
|
//Outputs going to the ALU-FSM |
.oOperation( wOperation ), |
.oDestination( wDestination ), |
.oSource0( wSource0 ), |
.oSource1( wSource1 ), |
|
`ifdef DEBUG |
.iDebug_CurrentIP( oInstructionPointer1 ), |
.oDebug_CurrentIP( wDEBUG_IDU2_EXE_InstructionPointer ), |
`endif |
|
.oDataReadyForExe( wIDU2_EXE_DataReady ) |
|
|
|
|
|
); |
|
|
ExecutionFSM EXE |
( |
.Clock( Clock ), |
.Reset( Reset | iTrigger ), //New Sat Jun13 |
.iDecodeDone( wIDU2_EXE_DataReady ), |
.iOperation( wOperation ), |
.iDestination( wDestination ), |
.iSource0( wSource0 ), |
.iSource1( wSource1 ) , |
|
|
`ifdef DEBUG |
.iDebug_CurrentIP( wDEBUG_IDU2_EXE_InstructionPointer ), |
.iDebug_CoreID( iDebug_CoreID ), |
`endif |
|
//.iJumpResultFromALU( wALU2_EXE__BranchTaken ), |
.iBranchTaken( wALU2_EXE__BranchTaken ), |
.iBranchNotTaken( wALU2_IFU_BranchNotTaken ), |
.oJumpFlag( w2FIU__BranchTaken ), |
.oJumpIp( JumpIp ), |
.oRAMWriteEnable( oDataWriteEnable ), |
.oRAMWriteAddress( oDataWriteAddress ), |
.RAMBus( oDataBus ), |
.oBusy( wEXE2_IFU__EXEBusy ), |
|
.oExeLatchedValues( wEXE2_IDU_ExeLatchedValues ), |
.oLastDestination( wEXE2_IDU_DataFordward_LastDestination ), |
|
//ALU ports and control signals |
.oTriggerALU( wEXE2_ALU__TriggerALU ), |
.oALUOperation( ALU2Operation ), |
.oALUChannelX1( ALU2ChannelA ), |
.oALUChannelX2( ALU2ChannelB ), |
.oALUChannelY1( ALU2ChannelC ), |
.oALUChannelY2( ALU2ChannelD ), |
.oALUChannelZ1( ALU2ChannelE ), |
.oALUChannelZ2( ALU2ChannelF ), |
.iALUResultX( ALU2ResultA ), |
.iALUResultY( ALU2ResultB ), |
.iALUResultZ( ALU2ResultC ), |
.iALUOutputReady( ALU2OutputReady ) |
|
); |
|
|
//-------------------------------------------------------- |
|
VectorALU ALU |
( |
.Clock(Clock), |
.Reset(Reset), |
.iOperation( ALU2Operation ), |
.iChannel_Ax( ALU2ChannelA ), |
.iChannel_Bx( ALU2ChannelB ), |
.iChannel_Ay( ALU2ChannelC ), |
.iChannel_By( ALU2ChannelD ), |
.iChannel_Az( ALU2ChannelE ), |
.iChannel_Bz( ALU2ChannelF ), |
.oResultA( ALU2ResultA ), |
.oResultB( ALU2ResultB ), |
.oResultC( ALU2ResultC ), |
.oBranchTaken( wALU2_EXE__BranchTaken ), |
.oBranchNotTaken( wALU2_IFU_BranchNotTaken ), |
.oReturnFromSub( wALU2_IFU_ReturnFromSub ), |
.iInputReady( wEXE2_ALU__TriggerALU ), |
|
//*********** |
.oOMEMWriteAddress( oOMEMWriteAddress ), |
.oOMEMWriteData( oOMEMWriteData ), |
.oOMEM_WriteEnable( oOMEMWriteEnable ), |
|
.oTMEMReadAddress( oTMEMReadAddress ), |
.iTMEMReadData( iTMEMReadData ), |
.iTMEMDataAvailable( iTMEMDataAvailable ), |
.oTMEMDataRequest( oTMEMDataRequest ), |
//*********** |
.iCurrentIP( oInstructionPointer1 ), |
.OutputReady( ALU2OutputReady ) |
|
); |
|
|
|
endmodule |
//--------------------------------------------------------------------- |
/EXE/Module_VectorALU.v
0,0 → 1,1274
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
|
|
|
//-------------------------------------------------------------- |
module VectorALU |
( |
input wire Clock, |
input wire Reset, |
input wire[`INSTRUCTION_OP_LENGTH-1:0] iOperation, |
input wire[`WIDTH-1:0] iChannel_Ax, |
input wire[`WIDTH-1:0] iChannel_Bx, |
input wire[`WIDTH-1:0] iChannel_Ay, |
input wire[`WIDTH-1:0] iChannel_By, |
input wire[`WIDTH-1:0] iChannel_Az, |
input wire[`WIDTH-1:0] iChannel_Bz, |
output wire [`WIDTH-1:0] oResultA, |
output wire [`WIDTH-1:0] oResultB, |
output wire [`WIDTH-1:0] oResultC, |
input wire iInputReady, |
output reg oBranchTaken, |
output reg oBranchNotTaken, |
output reg oReturnFromSub, |
input wire [`ROM_ADDRESS_WIDTH-1:0] iCurrentIP, |
|
//Connections to the O Memory |
output wire [`DATA_ROW_WIDTH-1:0] oOMEMWriteAddress, |
output wire [`DATA_ROW_WIDTH-1:0] oOMEMWriteData, |
output wire oOMEM_WriteEnable, |
//Connections to the R Memory |
output wire [`DATA_ROW_WIDTH-1:0] oTMEMReadAddress, |
input wire [`DATA_ROW_WIDTH-1:0] iTMEMReadData, |
input wire iTMEMDataAvailable, |
output wire oTMEMDataRequest, |
|
output reg OutputReady |
|
); |
|
|
|
|
|
wire wMultiplcationUnscaled; |
assign wMultiplcationUnscaled = (iOperation == `IMUL) ? 1'b1 : 1'b0; |
|
//-------------------------------------------------------------- |
|
reg [7:0] InputReadyA,InputReadyB,InputReadyC; |
|
//------------------------------------------------------ |
/* |
This is the block that takes care of all tha arithmetic |
comparisons. Supported operations are <,>,<=,>=,==,!= |
|
*/ |
//------------------------------------------------------ |
reg [`WIDTH-1:0] wMultiplicationA_Ax; |
reg [`WIDTH-1:0] wMultiplicationA_Bx; |
wire [`LONG_WIDTH-1:0] wMultiplicationA_Result; |
wire wMultiplicationA_InputReady; |
wire wMultiplicationA_OutputReady; |
wire wMultiplicationOutputReady, wMultiplicationOutputReadyA, |
wMultiplicationOutputReadyB,wMultiplicationOutputReadyC,wMultiplicationOutputReadyD; |
|
wire wAddSubAOutputReady,wAddSubBOutputReady,wAddSubCOutputReady; |
|
//-------------------------------------------------------------------- |
reg [`WIDTH-1:0] ResultA,ResultB,ResultC; |
|
//Output Flip Flops, |
//This flip flop will control the outputs so that the |
//values of the outputs change ONLY when when there is |
//a positive edge of OutputReady |
|
FFD32_POSEDGE ResultAFFD |
( |
.Clock( OutputReady ), |
.D( ResultA ), |
.Q( oResultA ) |
); |
|
FFD32_POSEDGE ResultBFFD |
( |
.Clock( OutputReady ), |
.D( ResultB ), |
.Q( oResultB ) |
); |
|
FFD32_POSEDGE ResultCFFD |
( |
.Clock( OutputReady ), |
.D( ResultC ), |
.Q( oResultC ) |
); |
//-------------------------------------------------------------------- |
wire [`WIDTH-1:0] wSwizzleOutputX,wSwizzleOutputY,wSwizzleOutputZ; |
|
|
Swizzle3D Swizzle1 |
( |
.Source0_X( iChannel_Bx ), |
.Source0_Y( iChannel_By ), |
.Source0_Z( iChannel_Bz ), |
.iOperation( iChannel_Ax ), |
|
.SwizzleX( wSwizzleOutputX ), |
.SwizzleY( wSwizzleOutputY ), |
.SwizzleZ( wSwizzleOutputZ ) |
); |
//--------------------------------------------------------------------- |
wire [`LONG_WIDTH-1:0] wModulus2N_ResultA,wModulus2N_ResultB,wModulus2N_ResultC; |
|
//---------------------------------------------------------------------( |
|
wire IOW_Operation,wOMEM_We; |
assign IOW_Operation = (iOperation == `OMWRITE); |
|
always @ ( * ) |
begin |
if (iOperation == `RET) |
oReturnFromSub <= OutputReady; |
else |
oReturnFromSub <= 1'b0; |
|
end |
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD1_AWE |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( 1'b1 ), |
.D( IOW_Operation ), |
.Q( wOMEM_We ) |
); |
|
assign oOMEM_WriteEnable = wOMEM_We & IOW_Operation; |
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( `DATA_ROW_WIDTH ) FFD1_A |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( iInputReady ), |
.D( {iChannel_Ax,iChannel_Ay,iChannel_Az} ), |
.Q( oOMEMWriteAddress) |
); |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `DATA_ROW_WIDTH ) FFD2_B |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( iInputReady ), |
.D( {iChannel_Bx,iChannel_By,iChannel_Bz} ), |
.Q( oOMEMWriteData ) |
); |
|
|
|
wire wTMReadOutputReady; |
assign wTMReadOutputReady = iTMEMDataAvailable; |
/* |
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD1_ARE |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( 1'b1 ), |
.D( iTMEMDataAvailable ), |
.Q( wTMReadOutputReady ) |
); |
*/ |
//assign oTMEMReadAddress = {iChannel_Ax,iChannel_Ay,iChannel_Az}; |
|
//We wait 1 clock cycle before be send the data read request, because |
//we need to lathc the values at the output |
|
wire wOpTRead; |
assign wOpTRead = ( iOperation == `TMREAD ) ? 1'b1 : 1'b0; |
wire wTMEMRequest; |
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD1_ARE123 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( 1'b1 ), |
.D( wOpTRead ), |
.Q( wTMEMRequest ) |
); |
assign oTMEMDataRequest = wTMEMRequest & wOpTRead; |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `DATA_ROW_WIDTH ) FFD2_B445 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( iInputReady & wOpTRead ), |
.D( {iChannel_Ax,iChannel_Ay,iChannel_Az} ), |
.Q( oTMEMReadAddress ) |
); |
|
/* |
This MUX will select the apropiated X,Y or Z depending on |
wheter it is XYZ iOperation. This gets defined by the bits 3 and 4 |
of iOperation, and only applies for oBranchTaken and Store operations. |
*/ |
|
wire wArithmeticComparison_Result; |
wire ArithmeticComparison_InputReady; |
wire ArithmeticComparison_OutputReady; |
reg[`WIDTH-1:0] ArithmeticComparison_A,ArithmeticComparison_B; |
|
|
always @ ( * ) |
begin |
case ( {iOperation[4],iOperation[3]} ) |
2'b01: ArithmeticComparison_A = iChannel_Ax; |
2'b10: ArithmeticComparison_A = iChannel_Ay; |
2'b11: ArithmeticComparison_A = iChannel_Az; |
default: ArithmeticComparison_A = 0; //Should never happen |
endcase |
end |
//--------------------------------------------------------------------- |
always @ ( * ) |
begin |
case ( {iOperation[4],iOperation[3]} ) |
2'b01: ArithmeticComparison_B = iChannel_Bx; |
2'b10: ArithmeticComparison_B = iChannel_By; |
2'b11: ArithmeticComparison_B = iChannel_Bz; |
default: ArithmeticComparison_B = 0; //Should never happen |
endcase |
end |
|
//--------------------------------------------------------------------- |
/* |
The onbly instance of Aritmetic comparison in the ALU, |
ArithmeticComparison operations matches the 3 LSB of |
Global ALU iOperation for oBranchTaken Instruction family |
*/ |
|
assign ArithmeticComparison_InputReady = iInputReady; |
|
wire wArithmeticComparisonResult; |
|
ArithmeticComparison ArithmeticComparison_1 |
( |
.Clock( Clock ), |
.X( ArithmeticComparison_A ), |
.Y( ArithmeticComparison_B ), |
.iOperation( iOperation[2:0] ), |
.iInputReady( ArithmeticComparison_InputReady ), |
.OutputReady( ArithmeticComparison_OutputReady ), |
.Result( wArithmeticComparisonResult ) |
); |
|
|
assign wArithmeticComparison_Result = wArithmeticComparisonResult && OutputReady; |
//-------------------------------------------------------------------- |
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_A |
( |
|
.Clock( Clock ), |
.Reset( Reset ), |
.A( wMultiplicationA_Ax ), |
.B( wMultiplicationA_Bx ), |
.R( wMultiplicationA_Result ), |
.iUnscaled( wMultiplcationUnscaled ), |
.iInputReady( wMultiplicationA_InputReady ), |
.OutputReady( wMultiplicationA_OutputReady ) |
); |
|
//-------------------------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wMultiplicationA_Ax = iChannel_Ay; // Ay * Bz |
`MAG: wMultiplicationA_Ax = iChannel_Ax; |
`MULP: wMultiplicationA_Ax = iChannel_Ax; //Az = Ax * Ay |
default: wMultiplicationA_Ax = iChannel_Ax; // Ax * Bx |
endcase |
end |
//-------------------------------------------------------------------- |
|
//assign wMultiplicationA_Ax = iChannel_Ax; |
|
assign wMultiplicationA_InputReady |
= (iOperation == `CROSS || |
iOperation == `DOT || |
iOperation == `MUL || |
iOperation == `IMUL || |
iOperation == `MAG || |
iOperation == `MULP |
) ? iInputReady : 0; |
|
//-------------------------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`MUL,`IMUL: wMultiplicationA_Bx = iChannel_Bx; //Ax*Bx |
`MAG: wMultiplicationA_Bx = iChannel_Ax; //Ax^2 |
`DOT: wMultiplicationA_Bx = iChannel_Bx; //Ax*Bx |
`CROSS: wMultiplicationA_Bx = iChannel_Bz; // Ay * Bz |
`MULP: wMultiplicationA_Bx = iChannel_Ay; //Az = Ax * Ay |
default: wMultiplicationA_Bx = 32'b0; |
endcase |
end |
//-------------------------------------------------------------------- |
|
//------------------------------------------------------ |
|
reg [`WIDTH-1:0] wMultiplicationB_Ay; |
reg [`WIDTH-1:0] wMultiplicationB_By; |
wire [`LONG_WIDTH-1:0] wMultiplicationB_Result; |
wire wMultiplicationB_InputReady; |
wire wMultiplicationB_OutputReady; |
|
|
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_B |
( |
|
.Clock( Clock ), |
.Reset( Reset ), |
.A( wMultiplicationB_Ay ), |
.B( wMultiplicationB_By ), |
.R( wMultiplicationB_Result ), |
.iUnscaled( wMultiplcationUnscaled ), |
.iInputReady( wMultiplicationB_InputReady ), |
.OutputReady( wMultiplicationB_OutputReady ) |
); |
|
|
//---------------------------------------------------- |
|
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wMultiplicationB_Ay = iChannel_Az; // Az * By |
`MAG: wMultiplicationB_Ay = iChannel_Ay; |
default: wMultiplicationB_Ay = iChannel_Ay; // Ay * By |
endcase |
end |
//---------------------------------------------------- |
assign wMultiplicationB_InputReady |
= (iOperation == `CROSS || |
iOperation == `DOT || |
iOperation == `MUL || |
iOperation == `IMUL || |
iOperation == `MAG ) ? iInputReady : 0; |
|
//---------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`MUL,`IMUL: wMultiplicationB_By = iChannel_By; //Ay*By |
`MAG: wMultiplicationB_By = iChannel_Ay; //Ay^2 |
`DOT: wMultiplicationB_By = iChannel_By; //Ay*By |
`CROSS: wMultiplicationB_By = iChannel_By; // Az * By |
default: wMultiplicationB_By = 32'b0; |
endcase |
end |
//---------------------------------------------------- |
|
//------------------------------------------------------ |
reg [`WIDTH-1:0] wMultiplicationC_Az; |
reg [`WIDTH-1:0] wMultiplicationC_Bz; |
wire [`LONG_WIDTH-1:0] wMultiplicationC_Result; |
wire wMultiplicationC_InputReady; |
wire wMultiplicationC_OutputReady; |
|
|
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_C |
( |
|
.Clock( Clock ), |
.Reset( Reset ), |
.A( wMultiplicationC_Az ), |
.B( wMultiplicationC_Bz ), |
.R( wMultiplicationC_Result ), |
.iUnscaled( wMultiplcationUnscaled ), |
.iInputReady( wMultiplicationC_InputReady ), |
.OutputReady( wMultiplicationC_OutputReady ) |
); |
|
|
//---------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wMultiplicationC_Az = iChannel_Az; //Az*Bx |
`MAG: wMultiplicationC_Az = iChannel_Az; |
default: wMultiplicationC_Az = iChannel_Az; //Az*Bz |
endcase |
end |
//---------------------------------------------------- |
|
assign wMultiplicationC_InputReady |
= ( |
iOperation == `CROSS || |
iOperation == `DOT || |
iOperation == `MUL || |
iOperation == `IMUL || |
iOperation == `MAG |
) ? iInputReady : 0; |
|
//---------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`MUL,`IMUL: wMultiplicationC_Bz = iChannel_Bz; //Az*Bz |
`MAG: wMultiplicationC_Bz = iChannel_Az; //Ay^2 |
`DOT: wMultiplicationC_Bz = iChannel_Bz; //Az*Bz |
`CROSS: wMultiplicationC_Bz = iChannel_Bx; //Az*Bx |
default: wMultiplicationC_Bz = 32'b0; |
endcase |
end |
//---------------------------------------------------- |
|
reg [`WIDTH-1:0] wMultiplicationD_Aw; |
reg [`WIDTH-1:0] wMultiplicationD_Bw; |
wire [`LONG_WIDTH-1:0] wMultiplicationD_Result; |
wire wMultiplicationD_InputReady; |
wire wMultiplicationD_OutputReady; |
|
|
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_D |
( |
|
.Clock( Clock ), |
.Reset( Reset ), |
.A( wMultiplicationD_Aw ), |
.B( wMultiplicationD_Bw ), |
.R( wMultiplicationD_Result ), |
.iUnscaled( wMultiplcationUnscaled ), |
.iInputReady( wMultiplicationD_InputReady ), |
.OutputReady( wMultiplicationD_OutputReady ) |
); |
|
assign wMultiplicationD_InputReady |
= (iOperation == `CROSS ) ? iInputReady : 0; |
|
|
//---------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wMultiplicationD_Aw = iChannel_Ax; //Ax*Bz |
default: wMultiplicationD_Aw = 32'b0; |
endcase |
end |
//---------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wMultiplicationD_Bw = iChannel_Bz; //Ax*Bz |
default: wMultiplicationD_Bw = 32'b0; |
endcase |
end |
//---------------------------------------------------- |
reg [`WIDTH-1:0] wMultiplicationE_Ak; |
reg [`WIDTH-1:0] wMultiplicationE_Bk; |
wire [`LONG_WIDTH-1:0] wMultiplicationE_Result; |
wire wMultiplicationE_InputReady; |
wire wMultiplicationE_OutputReady; |
|
|
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_E |
( |
|
.Clock( Clock ), |
.Reset( Reset ), |
.A( wMultiplicationE_Ak ), |
.B( wMultiplicationE_Bk ), |
.R( wMultiplicationE_Result ), |
.iUnscaled( wMultiplcationUnscaled ), |
.iInputReady( wMultiplicationE_InputReady ), |
.OutputReady( wMultiplicationE_OutputReady ) |
); |
|
assign wMultiplicationE_InputReady |
= (iOperation == `CROSS ) ? iInputReady : 0; |
|
|
//---------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wMultiplicationE_Ak = iChannel_Ax; //Ax*By |
default: wMultiplicationE_Ak = 32'b0; |
endcase |
end |
//---------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wMultiplicationE_Bk = iChannel_By; //Ax*By |
default: wMultiplicationE_Bk = 32'b0; |
endcase |
end |
|
//---------------------------------------------------- |
reg [`WIDTH-1:0] wMultiplicationF_Al; |
reg [`WIDTH-1:0] wMultiplicationF_Bl; |
wire [`LONG_WIDTH-1:0] wMultiplicationF_Result; |
wire wMultiplicationF_InputReady; |
wire wMultiplicationF_OutputReady; |
|
|
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_F |
( |
|
.Clock( Clock ), |
.Reset( Reset ), |
.A( wMultiplicationF_Al ), |
.B( wMultiplicationF_Bl ), |
.R( wMultiplicationF_Result ), |
.iUnscaled( wMultiplcationUnscaled ), |
.iInputReady( wMultiplicationF_InputReady ), |
.OutputReady( wMultiplicationF_OutputReady ) |
); |
assign wMultiplicationF_InputReady |
= (iOperation == `CROSS ) ? iInputReady : 0; |
|
|
//---------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wMultiplicationF_Al = iChannel_Ay; //Ay*Bx |
default: wMultiplicationF_Al = 32'b0; |
endcase |
end |
//---------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wMultiplicationF_Bl = iChannel_Bx; //Ay*Bx |
default: wMultiplicationF_Bl = 32'b0; |
endcase |
end |
//------------------------------------------------------ |
wire [`WIDTH-1:0] wDivisionA_Result; |
wire wDivisionA_OutputReady; |
wire wDivisionA_InputReady; |
|
assign wDivisionA_InputReady = |
( iOperation == `DIV) ? iInputReady : 0; |
|
SignedIntegerDivision DivisionChannel_A |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.iDividend( iChannel_Ax ), |
.iDivisor( iChannel_Bx ), |
.xQuotient( wDivisionA_Result ), |
.iInputReady( wDivisionA_InputReady ), |
.OutputReady( wDivisionA_OutputReady ) |
|
); |
//------------------------------------------------------ |
wire [`WIDTH-1:0] wDivisionB_Result; |
wire wDivisionB_OutputReady; |
wire wDivisionB_InputReady; |
|
assign wDivisionB_InputReady = |
( iOperation == `DIV) ? iInputReady : 0; |
|
SignedIntegerDivision DivisionChannel_B |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.iDividend( iChannel_Ay ), |
.iDivisor( iChannel_By ), |
.xQuotient( wDivisionB_Result ), |
.iInputReady( wDivisionB_InputReady ), |
.OutputReady( wDivisionB_OutputReady ) |
|
); |
//------------------------------------------------------ |
wire [`WIDTH-1:0] wDivisionC_Result; |
wire wDivisionC_OutputReady; |
wire wDivisionC_InputReady; |
|
|
assign wDivisionC_InputReady = |
( iOperation == `DIV) ? iInputReady : 0; |
|
SignedIntegerDivision DivisionChannel_C |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.iDividend( iChannel_Az ), |
.iDivisor( iChannel_Bz ), |
.xQuotient( wDivisionC_Result ), |
.iInputReady( wDivisionC_InputReady ), |
.OutputReady( wDivisionC_OutputReady ) |
|
); |
//-------------------------------------------------------------- |
/* |
First addtion block instance goes here. |
Note that all inputs/outputs to the block |
are wires. It has two MUXES one for each entry. |
*/ |
reg [`LONG_WIDTH-1:0] wAddSubA_Ax,wAddSubA_Bx; |
wire [`LONG_WIDTH-1:0] wAddSubA_Result; |
wire wAddSubA_Operation; //Either addition or substraction |
reg wAddSubA_InputReady; |
wire wAddSubA_OutputReady; |
|
assign wAddSubA_Operation |
= ( |
iOperation == `SUB |
|| iOperation == `CROSS |
|| iOperation == `DEC |
|| iOperation == `MOD |
) ? 1 : 0; |
|
FixedAddSub AddSubChannel_A |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.A( wAddSubA_Ax ), |
.B( wAddSubA_Bx ), |
.R( wAddSubA_Result ), |
.iOperation( wAddSubA_Operation ), |
.iInputReady( wAddSubA_InputReady ), |
.OutputReady( wAddSubA_OutputReady ) |
); |
//Diego |
|
|
//---------------------------- |
|
//InpuReady Mux A |
always @ ( * ) |
begin |
case (iOperation) |
`ADD: wAddSubA_InputReady = iInputReady; |
`SUB: wAddSubA_InputReady = iInputReady; |
`INC,`INCX,`INCY,`INCZ: wAddSubA_InputReady = iInputReady; |
`DEC: wAddSubA_InputReady = iInputReady; |
`MOD: wAddSubA_InputReady = iInputReady; |
|
`MAG: wAddSubA_InputReady = wMultiplicationOutputReadyA && |
wMultiplicationOutputReadyB; |
//wMultiplicationA_OutputReady |
//&& wMultiplicationB_OutputReady; |
|
`DOT: wAddSubA_InputReady = |
wMultiplicationOutputReadyA && |
wMultiplicationOutputReadyB; |
//wMultiplicationA_OutputReady |
//&& wMultiplicationB_OutputReady; |
|
`CROSS: wAddSubA_InputReady = |
wMultiplicationOutputReadyA && |
wMultiplicationOutputReadyB; |
// wMultiplicationA_OutputReady |
//&& wMultiplicationB_OutputReady; |
|
default: wAddSubA_InputReady = 1'b0; |
endcase |
end |
//---------------------------- |
|
//wAddSubA_Bx 2:1 input Mux |
always @ ( * ) |
begin |
case (iOperation) |
|
`ADD: wAddSubA_Ax = ( iChannel_Ax[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ax } : { 32'b0, iChannel_Ax }; |
`SUB: wAddSubA_Ax = ( iChannel_Ax[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ax } : { 32'b0, iChannel_Ax }; |
`INC,`INCX,`INCY,`INCZ: wAddSubA_Ax = ( iChannel_Ax[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ax } : { 32'b0, iChannel_Ax }; |
`DEC: wAddSubA_Ax = ( iChannel_Ax[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ax } : { 32'b0, iChannel_Ax }; |
`MOD: wAddSubA_Ax = ( iChannel_Bx[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Bx } : { 32'b0, iChannel_Bx }; |
|
`MAG: wAddSubA_Ax = wMultiplicationA_Result; |
`DOT: wAddSubA_Ax = wMultiplicationA_Result; |
`CROSS: wAddSubA_Ax = wMultiplicationA_Result; |
default: wAddSubA_Ax = 64'b0; |
endcase |
end |
//---------------------------- |
//wAddSubA_Bx 2:1 input Mux |
always @ ( * ) |
begin |
case (iOperation) |
`ADD: wAddSubA_Bx = ( iChannel_Bx[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Bx } : { 32'b0, iChannel_Bx }; |
`SUB: wAddSubA_Bx = ( iChannel_Bx[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Bx } : { 32'b0, iChannel_Bx }; |
`INC,`INCX: wAddSubA_Bx = (`LONG_WIDTH'd1 << `SCALE); |
`INCY,`INCZ: wAddSubA_Bx = `LONG_WIDTH'd0; |
`DEC: wAddSubA_Bx = (`LONG_WIDTH'd1 << `SCALE); |
`MOD: wAddSubA_Bx = (`LONG_WIDTH'd1 << `SCALE); |
|
`MAG: wAddSubA_Bx = wMultiplicationB_Result; |
`DOT: wAddSubA_Bx = wMultiplicationB_Result; |
`CROSS: wAddSubA_Bx = wMultiplicationB_Result; |
default: wAddSubA_Bx = 64'b0; |
endcase |
end |
//-------------------------------------------------------------- |
/* |
Second addtion block instance goes here. |
Note that all inputs/outputs to the block |
are wires. It has two MUXES one for each entry. |
*/ |
|
wire [`LONG_WIDTH-1:0] wAddSubB_Result; |
|
|
wire wAddSubB_Operation; //Either addition or substraction |
reg wAddSubB_InputReady; |
wire wAddSubB_OutputReady; |
|
reg [`LONG_WIDTH-1:0] wAddSubB_Ay,wAddSubB_By; |
|
assign wAddSubB_Operation = |
( iOperation == `SUB |
|| iOperation == `CROSS |
|| iOperation == `DEC |
|| iOperation == `MOD |
) ? 1 : 0; |
|
FixedAddSub AddSubChannel_B |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.A( wAddSubB_Ay ), |
.B( wAddSubB_By ), |
.R( wAddSubB_Result ), |
.iOperation( wAddSubB_Operation ), |
.iInputReady( wAddSubB_InputReady ), |
.OutputReady( wAddSubB_OutputReady ) |
); |
//---------------------------- |
wire wMultiplicationOutputReadyC_Dealy1; |
FFD_POSEDGE_ASYNC_RESET # (1) FFwMultiplicationOutputReadyC_Dealy1 |
( |
.Clock( Clock ), |
.Clear( Reset ), |
.D( wMultiplicationOutputReadyC ), |
.Q( wMultiplicationOutputReadyC_Dealy1 ) |
); |
|
|
|
|
|
//InpuReady Mux B |
always @ ( * ) |
begin |
case (iOperation) |
`ADD: wAddSubB_InputReady = iInputReady; |
`SUB: wAddSubB_InputReady = iInputReady; |
`INC,`INCX,`INCY,`INCZ: wAddSubB_InputReady = iInputReady; |
`DEC: wAddSubB_InputReady = iInputReady; |
`MOD: wAddSubB_InputReady = iInputReady; |
|
`MAG: wAddSubB_InputReady = wAddSubAOutputReady |
&& wMultiplicationOutputReadyC_Dealy1; |
//&& wMultiplicationC_OutputReady; |
|
`DOT: wAddSubB_InputReady = wAddSubAOutputReady |
&& wMultiplicationOutputReadyC_Dealy1; |
//&& wMultiplicationC_OutputReady; |
|
`CROSS: wAddSubB_InputReady = wMultiplicationOutputReadyC && |
wMultiplicationOutputReadyD; |
// wMultiplicationC_OutputReady |
//&& wMultiplicationD_OutputReady; |
|
default: wAddSubB_InputReady = 1'b0; |
|
endcase |
end |
//---------------------------- |
// wAddSubB_Ay 2:1 input Mux |
// If the iOperation is ADD or SUB, it will simply take the inputs from |
// ALU Channels. If it is a VECTOR_MAGNITUDE, it take the input from the |
// previus ADDER_A, same for dot product. |
always @ ( * ) |
begin |
case (iOperation) |
`ADD: wAddSubB_Ay = (iChannel_Ay[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ay} : {32'b0,iChannel_Ay}; //Ay |
`SUB: wAddSubB_Ay = (iChannel_Ay[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ay} : {32'b0,iChannel_Ay}; //Ay |
`INC,`INCX,`INCY,`INCZ: wAddSubB_Ay = (iChannel_Ay[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ay} : {32'b0,iChannel_Ay}; //Ay |
`DEC: wAddSubB_Ay = (iChannel_Ay[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ay} : {32'b0,iChannel_Ay}; //Ay |
`MOD: wAddSubB_Ay = (iChannel_By[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_By} : {32'b0,iChannel_By}; //Ay |
`MAG: wAddSubB_Ay = wAddSubA_Result; //A^2+B^2 |
`DOT: wAddSubB_Ay = wAddSubA_Result; //Ax*Bx + Ay*By |
`CROSS: wAddSubB_Ay = wMultiplicationC_Result; |
default: wAddSubB_Ay = 64'b0; |
endcase |
end |
//---------------------------- |
//wAddSubB_By 2:1 input Mux |
always @ ( * ) |
begin |
case (iOperation) |
`ADD: wAddSubB_By = (iChannel_By[31] == 1'b1) ? {32'hFFFFFFFF,iChannel_By } : {32'b0,iChannel_By}; //By |
`SUB: wAddSubB_By = (iChannel_By[31] == 1'b1) ? {32'hFFFFFFFF,iChannel_By } : {32'b0,iChannel_By}; //{32'b0,iChannel_By}; //By |
`INC,`INCY: wAddSubB_By = (`LONG_WIDTH'd1 << `SCALE); |
`INCX,`INCZ: wAddSubB_By = `LONG_WIDTH'd0; |
`DEC: wAddSubB_By = (`LONG_WIDTH'd1 << `SCALE); |
`MOD: wAddSubB_By = (`LONG_WIDTH'd1 << `SCALE); |
`MAG: wAddSubB_By = wMultiplicationC_Result; //C^2 |
`DOT: wAddSubB_By = wMultiplicationC_Result; //Az * Bz |
`CROSS: wAddSubB_By = wMultiplicationD_Result; |
default: wAddSubB_By = 32'b0; |
endcase |
end |
//-------------------------------------------------------------- |
wire [`LONG_WIDTH-1:0] wAddSubC_Result; |
reg [`LONG_WIDTH-1:0] wAddSubC_Az,wAddSubC_Bz; |
|
wire wAddSubC_Operation; //Either addition or substraction |
reg wAddSubC_InputReady; |
wire wAddSubC_OutputReady; |
|
reg [`LONG_WIDTH-1:0] AddSubC_Az,AddSubB_Bz; |
|
//----------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wAddSubC_Az = wMultiplicationE_Result; |
`MOD: wAddSubC_Az = (iChannel_Bz[31] == 1'b1) ? {32'hFFFFFFFF,iChannel_Bz} : {32'b0,iChannel_Bz}; |
default: wAddSubC_Az = (iChannel_Az[31] == 1'b1) ? {32'hFFFFFFFF,iChannel_Az} : {32'b0,iChannel_Az}; |
endcase |
end |
//----------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wAddSubC_Bz = wMultiplicationF_Result; |
`INC,`INCZ: wAddSubC_Bz = (`LONG_WIDTH'd1 << `SCALE); |
`INCX,`INCY: wAddSubC_Bz = `LONG_WIDTH'd0; |
`DEC: wAddSubC_Bz = (`LONG_WIDTH'd1 << `SCALE); |
`MOD: wAddSubC_Bz = (`LONG_WIDTH'd1 << `SCALE); |
default: wAddSubC_Bz = (iChannel_Bz[31] == 1'b1) ? {32'hFFFFFFFF,iChannel_Bz} : {32'b0,iChannel_Bz}; |
endcase |
end |
//----------------------------------------- |
|
assign wAddSubC_Operation |
= ( |
iOperation == `SUB |
|| iOperation == `CROSS |
|| iOperation == `DEC |
|| iOperation == `MOD |
) ? 1 : 0; |
|
FixedAddSub AddSubChannel_C |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.A( wAddSubC_Az ), |
.B( wAddSubC_Bz ), |
.R( wAddSubC_Result ), |
.iOperation( wAddSubC_Operation ), |
.iInputReady( wAddSubC_InputReady ), |
.OutputReady( wAddSubC_OutputReady ) |
); |
|
|
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wAddSubC_InputReady = wMultiplicationE_OutputReady && |
wMultiplicationF_OutputReady; |
|
default: wAddSubC_InputReady = iInputReady; |
endcase |
end |
|
//------------------------------------------------------ |
wire [`WIDTH-1:0] wSquareRoot_Result; |
wire wSquareRoot_OutputReady; |
|
|
FixedPointSquareRoot SQROOT1 |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Operand( wAddSubB_Result ), |
.iInputReady( wAddSubBOutputReady && iOperation == `MAG), |
.OutputReady( wSquareRoot_OutputReady ), |
.Result( wSquareRoot_Result ) |
); |
//------------------------------------------------------ |
|
assign wModulus2N_ResultA = (iChannel_Ax & wAddSubA_Result ); |
assign wModulus2N_ResultB = (iChannel_Ay & wAddSubB_Result ); |
assign wModulus2N_ResultC = (iChannel_Az & wAddSubC_Result ); |
|
|
|
|
|
|
//&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&// |
//****Mux for ResultA*** |
// Notice that the Dot Product or the Magnitud Result will |
// output in ResultA. |
|
always @ ( * ) |
begin |
case ( iOperation ) |
`RETURN: ResultA = iChannel_Ax; |
`ADD: ResultA = (wAddSubA_Result[63] == 1'b1) ? { 1'b1,wAddSubA_Result[30:0]} : {1'b0,wAddSubA_Result[30:0]};// & 32'h7FFFFFFF; |
`SUB: ResultA = (wAddSubA_Result[63] == 1'b1) ? { 1'b1,wAddSubA_Result[30:0]} : {1'b0,wAddSubA_Result[30:0]};//wAddSubA_Result[31:0]; |
`CROSS: ResultA = (wAddSubA_Result[63] == 1'b1) ? { 1'b1,wAddSubA_Result[30:0]} : {1'b0,wAddSubA_Result[30:0]};//wAddSubA_Result[31:0]; |
`DIV: ResultA = wDivisionA_Result; |
`MUL: ResultA = wMultiplicationA_Result[31:0]; |
`IMUL: ResultA = wMultiplicationA_Result[31:0]; |
`DOT: ResultA = (wAddSubB_Result[63] == 1'b1) ? { 1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]};//wAddSubB_Result[31:0]; |
`MAG: ResultA = wSquareRoot_Result; |
`ZERO: ResultA = 32'b0; |
`COPY: ResultA = iChannel_Ax; |
`TMREAD: ResultA = iTMEMReadData[95:64]; |
`LEA: ResultA = {16'b0,iCurrentIP}; |
|
`SWIZZLE3D: ResultA = wSwizzleOutputX; |
|
//Set Operations |
`UNSCALE: ResultA = iChannel_Ax >> `SCALE; |
`SETX,`RET: ResultA = iChannel_Ax; |
`SETY: ResultA = iChannel_Bx; |
`SETZ: ResultA = iChannel_Bx; |
`INC,`INCX,`INCY,`INCZ: ResultA = (wAddSubA_Result[63] == 1'b1) ? { 1'b1,wAddSubA_Result[30:0]} : {1'b0,wAddSubA_Result[30:0]}; |
`DEC: ResultA = (wAddSubA_Result[63] == 1'b1) ? { 1'b1,wAddSubA_Result[30:0]} : {1'b0,wAddSubA_Result[30:0]}; |
`MOD: ResultA = wModulus2N_ResultA; |
`FRAC: ResultA = iChannel_Ax & (`WIDTH'hFFFFFFFF >> (`WIDTH - `SCALE)); |
`MULP: ResultA = iChannel_Ax; |
`NEG: ResultA = ~iChannel_Ax + 1'b1; |
`XCHANGEX: ResultA = iChannel_Bx; |
|
default: |
begin |
`ifdef DEBUG |
// $display("%dns ALU: Error Unknown Operation: %d",$time,iOperation); |
// $stop(); |
`endif |
ResultA = 32'b0; |
end |
endcase |
end |
//------------------------------------------------------ |
//****Mux for RB*** |
always @ ( * ) |
begin |
case ( iOperation ) |
`RETURN: ResultB = iChannel_Ax; |
`ADD: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]}; // & 32'h7FFFFFFF; |
`SUB: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]}; //wAddSubB_Result[31:0]; |
`CROSS: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]};//wAddSubB_Result[31:0]; |
`DIV: ResultB = wDivisionB_Result; |
`MUL: ResultB = wMultiplicationB_Result[31:0]; |
`IMUL: ResultB = wMultiplicationB_Result[31:0]; |
`DOT: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]};//wAddSubB_Result[31:0]; |
`MAG: ResultB = wSquareRoot_Result; |
`ZERO: ResultB = 32'b0; |
`COPY: ResultB = iChannel_Ay; |
`TMREAD: ResultB = iTMEMReadData[63:32]; |
`LEA: ResultB = {16'b0,iCurrentIP}; |
|
//Set Operations |
`UNSCALE: ResultB = iChannel_Ay >> `SCALE; |
`SETX,`RET: ResultB = iChannel_By; // {Source1[95:64],Source0[63:32],Source0[31:0]}; |
`SETY: ResultB = iChannel_Ax; // {Source0[95:64],Source1[95:64],Source0[31:0]}; |
`SETZ: ResultB = iChannel_By; // {Source0[95:64],Source0[63:32],Source1[95:64]}; |
|
`SWIZZLE3D: ResultB = wSwizzleOutputY; |
|
`INC,`INCX,`INCY,`INCZ: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]}; // & 32'h7FFFFFFF; |
`DEC: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]}; // & 32'h7FFFFFFF; |
`MOD: ResultB = wModulus2N_ResultB; |
`FRAC: ResultB = iChannel_Ay & (`WIDTH'hFFFFFFFF >> (`WIDTH - `SCALE)); |
`MULP: ResultB = iChannel_Ay; |
`NEG: ResultB = ~iChannel_Ay + 1'b1; |
`XCHANGEX: ResultB = iChannel_Ay; |
|
default: |
begin |
`ifdef DEBUG |
//$display("%dns ALU: Error Unknown Operation: %d",$time,iOperation); |
//$stop(); |
`endif |
ResultB = 32'b0; |
end |
endcase |
end |
//------------------------------------------------------ |
//****Mux for RC*** |
always @ ( * ) |
begin |
case ( iOperation ) |
`RETURN: ResultC = iChannel_Ax; |
`ADD: ResultC = (wAddSubC_Result[63] == 1'b1) ? {1'b1,wAddSubC_Result[30:0]} : {1'b0,wAddSubC_Result[30:0]}; //wAddSubC_Result[31:0];// & 32'h7FFFFFFF; |
`SUB: ResultC = (wAddSubC_Result[63] == 1'b1) ? {1'b1,wAddSubC_Result[30:0]} : {1'b0,wAddSubC_Result[30:0]}; //wAddSubC_Result[31:0]; |
`CROSS: ResultC = (wAddSubC_Result[63] == 1'b1) ? {1'b1,wAddSubC_Result[30:0]} : {1'b0,wAddSubC_Result[30:0]};//wAddSubC_Result[31:0]; |
`DIV: ResultC = wDivisionC_Result; |
`MUL: ResultC = wMultiplicationC_Result[31:0]; |
`IMUL: ResultC = wMultiplicationC_Result[31:0]; |
`DOT: ResultC = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]};//wAddSubB_Result[31:0]; |
`MAG: ResultC = wSquareRoot_Result; |
`ZERO: ResultC = 32'b0; |
`COPY: ResultC = iChannel_Az; |
`TMREAD: ResultC = iTMEMReadData[31:0]; |
`LEA: ResultC = {16'b0,iCurrentIP}; |
|
`SWIZZLE3D: ResultC = wSwizzleOutputZ; |
|
//Set Operations |
`UNSCALE: ResultC = iChannel_Az >> `SCALE; |
`SETX,`RET: ResultC = iChannel_Bz; // {Source1[95:64],Source0[63:32],Source0[31:0]}; |
`SETY: ResultC = iChannel_Bz; // {Source0[95:64],Source1[95:64],Source0[31:0]}; |
`SETZ: ResultC = iChannel_Ax; // {Source0[95:64],Source0[63:32],Source1[95:64]}; |
|
`INC,`INCX,`INCY,`INCZ: ResultC = (wAddSubC_Result[63] == 1'b1) ? {1'b1,wAddSubC_Result[30:0]} : {1'b0,wAddSubC_Result[30:0]}; //wAddSubC_Result[31:0];// & 32'h7FFFFFFF; |
`DEC: ResultC = (wAddSubC_Result[63] == 1'b1) ? {1'b1,wAddSubC_Result[30:0]} : {1'b0,wAddSubC_Result[30:0]}; //wAddSubC_Result[31:0];// & 32'h7FFFFFFF; |
`MOD: ResultC = wModulus2N_ResultC; |
`FRAC: ResultC = iChannel_Az & (`WIDTH'hFFFFFFFF >> (`WIDTH - `SCALE)); |
`MULP: ResultC = wMultiplicationA_Result[31:0]; |
`NEG: ResultC = ~iChannel_Az + 1'b1; |
`XCHANGEX: ResultC = iChannel_Az; |
default: |
begin |
`ifdef DEBUG |
//$display("%dns ALU: Error Unknown Operation: %d",$time,iOperation); |
//$stop(); |
`endif |
ResultC = 32'b0; |
end |
endcase |
end |
//------------------------------------------------------------------------ |
|
|
always @ ( * ) |
begin |
case (iOperation) |
`JMP,`CALL,`RET: oBranchTaken = OutputReady; |
`JGX: oBranchTaken = wArithmeticComparison_Result; |
`JGY: oBranchTaken = wArithmeticComparison_Result; |
`JGZ: oBranchTaken = wArithmeticComparison_Result; |
|
`JLX: oBranchTaken = wArithmeticComparison_Result; |
`JLY: oBranchTaken = wArithmeticComparison_Result; |
`JLZ: oBranchTaken = wArithmeticComparison_Result; |
|
`JEQX: oBranchTaken = wArithmeticComparison_Result; |
`JEQY: oBranchTaken = wArithmeticComparison_Result; |
`JEQZ: oBranchTaken = wArithmeticComparison_Result; |
|
`JNEX: oBranchTaken = wArithmeticComparison_Result; |
`JNEY: oBranchTaken = wArithmeticComparison_Result; |
`JNEZ: oBranchTaken = wArithmeticComparison_Result; |
|
`JGEX: oBranchTaken = wArithmeticComparison_Result; |
`JGEY: oBranchTaken = wArithmeticComparison_Result; |
`JGEZ: oBranchTaken = wArithmeticComparison_Result; |
|
`JLEX: oBranchTaken = wArithmeticComparison_Result; |
`JLEY: oBranchTaken = wArithmeticComparison_Result; |
`JLEZ: oBranchTaken = wArithmeticComparison_Result; |
|
default: oBranchTaken = 0; |
endcase |
|
end |
|
always @ ( * ) |
begin |
case (iOperation) |
|
`JMP,`CALL,`RET,`JGX,`JGY,`JGZ,`JLX,`JLY,`JLZ,`JEQX,`JEQY,`JEQZ, |
`JNEX,`JNEY,`JNEZ,`JGEX,`JGEY,`JGEZ: oBranchNotTaken = !oBranchTaken && OutputReady; |
`JLEX: oBranchNotTaken = !oBranchTaken && OutputReady; |
`JLEY: oBranchNotTaken = !oBranchTaken && OutputReady; |
`JLEZ: oBranchNotTaken = !oBranchTaken && OutputReady; |
default: |
oBranchNotTaken = 0; |
endcase |
end |
//------------------------------------------------------------------------ |
//Output ready logic Stuff for Division... |
//Some FFT will hopefully do the trick |
|
wire wDivisionOutputReadyA,wDivisionOutputReadyB,wDivisionOutputReadyC; |
wire wDivisionOutputReady; |
|
|
assign wAddSubAOutputReady = wAddSubA_OutputReady; |
assign wAddSubBOutputReady = wAddSubB_OutputReady; |
assign wAddSubCOutputReady = wAddSubC_OutputReady; |
|
|
FFT1 FFT_DivisionA |
( |
.D(1'b1), |
.Clock( wDivisionA_OutputReady ), |
.Reset( iInputReady ), |
.Q( wDivisionOutputReadyA ) |
); |
|
FFT1 FFT_DivisionB |
( |
.D(1'b1), |
.Clock( wDivisionB_OutputReady ), |
.Reset( iInputReady ), |
.Q( wDivisionOutputReadyB ) |
); |
|
FFT1 FFT_DivisionC |
( |
.D(1'b1), |
.Clock( wDivisionC_OutputReady ), |
.Reset( iInputReady ), |
.Q( wDivisionOutputReadyC ) |
); |
|
assign wDivisionOutputReady = |
( wDivisionOutputReadyA && wDivisionOutputReadyB && wDivisionOutputReadyC ); |
|
assign wMultiplicationOutputReadyA = wMultiplicationA_OutputReady; |
assign wMultiplicationOutputReadyB = wMultiplicationB_OutputReady; |
assign wMultiplicationOutputReadyC = wMultiplicationC_OutputReady; |
assign wMultiplicationOutputReadyD = wMultiplicationD_OutputReady; |
|
assign wMultiplicationOutputReady = |
( wMultiplicationOutputReadyA && wMultiplicationOutputReadyB && wMultiplicationOutputReadyC ); |
|
wire wSquareRootOutputReady; |
FFT1 FFT_Sqrt |
( |
.D(1'b1), |
.Clock( wSquareRoot_OutputReady ), |
.Reset( iInputReady ), |
.Q( wSquareRootOutputReady ) |
); |
|
|
//------------------------------------------------------------------------ |
wire wOutputDelay1Cycle,wOutputDelay2Cycle,wOutputDelay3Cycle; |
|
|
FFD_POSEDGE_ASYNC_RESET # (1) FFOutputReadyDelay2 |
( |
.Clock( Clock ), |
.Clear( Reset ), |
.D( iInputReady ), |
.Q( wOutputDelay1Cycle ) |
); |
|
FFD_POSEDGE_ASYNC_RESET # (1) FFOutputReadyDelay22 |
( |
.Clock( Clock ), |
.Clear( Reset ), |
.D( wOutputDelay1Cycle ), |
.Q( wOutputDelay2Cycle ) |
); |
|
|
FFD_POSEDGE_ASYNC_RESET # (1) FFOutputReadyDelay222 |
( |
.Clock( Clock && wOperation == `OMWRITE), |
.Clear( Reset ), |
.D( wOutputDelay2Cycle ), |
.Q( wOutputDelay3Cycle ) |
); |
|
wire [`INSTRUCTION_OP_LENGTH-1:0] wOperation; |
|
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( `INSTRUCTION_OP_LENGTH ) SourceZ2 |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable( iInputReady ), |
.D( iOperation ), |
.Q(wOperation) |
); |
|
|
//Mux for output ready signal |
always @ ( * ) |
begin |
case ( wOperation ) |
`UNSCALE: OutputReady = wOutputDelay1Cycle; |
`RETURN: OutputReady = wOutputDelay1Cycle; |
|
`NOP: OutputReady = wOutputDelay1Cycle; |
`FRAC: OutputReady = wOutputDelay1Cycle; |
`NEG: OutputReady = wOutputDelay1Cycle; |
`OMWRITE: OutputReady = wOutputDelay3Cycle; |
`TMREAD: OutputReady = wTMReadOutputReady; //One cycle after TMEM data availale asserted |
|
`ifdef DEBUG |
//Debug Print behaves as a NOP in terms of ALU... |
`DEBUG_PRINT: OutputReady = wOutputDelay1Cycle; |
`endif |
|
`ADD,`INC,`INCX,`INCY,`INCZ: OutputReady = wAddSubAOutputReady && |
wAddSubBOutputReady && |
wAddSubCOutputReady; |
|
`SUB,`DEC: OutputReady = wAddSubAOutputReady && |
wAddSubBOutputReady && |
wAddSubCOutputReady; |
|
`DIV: OutputReady = wDivisionOutputReady; |
|
|
`MUL,`IMUL: OutputReady = wMultiplicationOutputReady; |
`MULP: OutputReady = wMultiplicationOutputReadyA; |
|
`DOT: OutputReady = wAddSubBOutputReady; |
|
`CROSS: OutputReady = wAddSubAOutputReady && |
wAddSubBOutputReady && |
wAddSubCOutputReady; |
|
`MAG: OutputReady = wSquareRootOutputReady; |
|
`ZERO: OutputReady = wOutputDelay1Cycle; |
|
`COPY: OutputReady = wOutputDelay1Cycle; |
|
`SWIZZLE3D: OutputReady = wOutputDelay1Cycle; |
|
`SETX,`SETY,`SETZ,`JMP,`LEA,`CALL,`RET: OutputReady = wOutputDelay1Cycle; |
|
|
|
`JGX,`JGY,`JGZ: OutputReady = ArithmeticComparison_OutputReady; |
`JLX,`JLY,`JLZ: OutputReady = ArithmeticComparison_OutputReady; |
`JEQX,`JEQY,`JEQZ: OutputReady = ArithmeticComparison_OutputReady; |
`JNEX,`JNEY,`JNEZ: OutputReady = ArithmeticComparison_OutputReady; |
`JGEX,`JGEY,`JGEZ: OutputReady = ArithmeticComparison_OutputReady; |
`JLEX,`JLEY,`JLEZ: OutputReady = ArithmeticComparison_OutputReady; |
|
`MOD: OutputReady = wAddSubAOutputReady && //TODO: wait 1 more cycle |
wAddSubBOutputReady && |
wAddSubCOutputReady; |
|
`XCHANGEX: OutputReady = wOutputDelay1Cycle; |
|
|
default: |
begin |
OutputReady = 32'b0; |
$display("*** ALU ERROR: iOperation = %d ***",iOperation); |
end |
|
endcase |
end |
|
endmodule |
//------------------------------------------------------------------------ |
/EXE/Module_InstructionDecode.v
0,0 → 1,156
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
|
module InstructionDecode |
( |
input wire Clock, |
input wire Reset, |
input wire iInstructionAvailable, |
input wire[`INSTRUCTION_WIDTH-1:0] iEncodedInstruction, |
input wire[`DATA_ROW_WIDTH-1:0] iRamValue0, |
input wire[`DATA_ROW_WIDTH-1:0] iRamValue1, |
output wire[`DATA_ADDRESS_WIDTH-1:0] oRamAddress0,oRamAddress1, |
output wire[`INSTRUCTION_OP_LENGTH-1:0] oOperation, |
output wire [`DATA_ROW_WIDTH-1:0] oSource0,oSource1, |
output wire [`DATA_ADDRESS_WIDTH-1:0] oDestination, |
input wire [`DATA_ROW_WIDTH-1:0] iDataForward, |
input wire [`DATA_ADDRESS_WIDTH-1:0] iLastDestination, |
|
`ifdef DEBUG |
input wire [`ROM_ADDRESS_WIDTH-1:0] iDebug_CurrentIP, |
output wire [`ROM_ADDRESS_WIDTH-1:0] oDebug_CurrentIP, |
`endif |
|
//input wire [`ROM_ADDRESS_WIDTH-1:0] iIP, |
//output reg [`ROM_ADDRESS_WIDTH-1:0] oReturnAddress, |
output wire oDataReadyForExe |
|
); |
wire wInmediateOperand; |
wire [`DATA_ROW_WIDTH-1:0] wSource0,wSource1; |
wire wTriggerSource0DataForward,wTriggerSource1DataForward; |
wire wSource0AddrssEqualsLastDestination,wSource1AddrssEqualsLastDestination; |
|
`ifdef DEBUG |
assign oDebug_CurrentIP = iDebug_CurrentIP; |
`endif |
//See if operation takes scalar argument |
assign wInmediateOperand = iEncodedInstruction[`INSTRUCTION_IMM_BITPOS]; |
|
//Has the value of the first argument fetched from IMEM |
assign wSource0 = iRamValue0; |
//Has the value of the second argument fetched from IMEM, or the value of the |
//destinatin register in case of scalar operation |
assign wSource1 = ( wInmediateOperand ) ? {oRamAddress1,iEncodedInstruction[15:0] ,32'b0,32'b0} : iRamValue1; //{oRamAddress1,oRamAddress0,32'b0,32'b0} : iRamValue1; |
|
//Data forwarding logic |
assign wSource0AddrssEqualsLastDestination = (oRamAddress0 == iLastDestination) ? 1'b1: 1'b0; |
assign wSource1AddrssEqualsLastDestination = (oRamAddress1 == iLastDestination) ? 1'b1: 1'b0; |
assign wTriggerSource0DataForward = wSource0AddrssEqualsLastDestination; |
assign wTriggerSource1DataForward = wSource1AddrssEqualsLastDestination && !wInmediateOperand; |
|
//The data address to fetch from IMEM |
assign oRamAddress1 = iEncodedInstruction[31:16]; |
|
//If operation takes a scalar value, then ask IMEM |
//for the previous value of the destination ([47:32]) |
//and have this value ready at oRamAddress0 |
MUXFULLPARALELL_16bits_2SEL RAMAddr0MUX |
( |
.Sel( wInmediateOperand ), |
.I1( iEncodedInstruction[15:0] ), |
.I2( iEncodedInstruction[47:32] ), |
.O1( oRamAddress0 ) |
); |
|
|
//One clock cycle after the new instruction becomes |
//available to IDU, it should be decoded and ready |
//for execution |
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD1 |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable(1'b1), |
.D( iInstructionAvailable ), |
.Q( oDataReadyForExe ) |
); |
|
/* |
wire IsCall; |
assign IsCall = ( oOperation == `CALL ) ? 1'b1 : 1'b0; |
always @ (posedge IsCall) |
oReturnAddress <= iIP; |
*/ |
/* |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `ROM_ADDRESS_WIDTH ) FFRETURNADDR |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable( IsCall ), |
.D( iIP ), |
.Q( oReturnAddress ) |
); |
*/ |
|
|
//Latch the Operation |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `INSTRUCTION_OP_LENGTH ) FFD3 |
( |
.Clock(Clock), |
.Reset(Reset), |
.Enable(iInstructionAvailable), |
.D(iEncodedInstruction[`INSTRUCTION_WIDTH-1:`INSTRUCTION_WIDTH-`INSTRUCTION_OP_LENGTH]), |
.Q( oOperation ) |
); |
//Latch the Destination |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `DATA_ADDRESS_WIDTH ) FFD2 |
( |
.Clock(Clock), |
.Reset(Reset), |
.Enable(iInstructionAvailable), |
.D(iEncodedInstruction[47:32]), |
.Q(oDestination ) |
); |
|
|
//Once we made a decicions if the Sources must be forwarded or not, a series of muxes |
//are used to routed the correct data into the decoded Source outputs |
|
MUXFULLPARALELL_96bits_2SEL Source0_Mux |
( |
.Sel( wTriggerSource0DataForward ), |
.I1( wSource0 ), |
.I2( iDataForward ), |
.O1( oSource0 ) |
); |
|
MUXFULLPARALELL_96bits_2SEL Source1_Mux |
( |
.Sel( wTriggerSource1DataForward ), |
.I1( wSource1 ), |
.I2( iDataForward ), |
.O1( oSource1 ) |
); |
|
endmodule |
|
/EXE/Module_InstructionEntryPoint.v
0,0 → 1,32
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
module InstructionEntryPoint |
( |
input wire Clock, |
input wire Reset, |
input wire iTrigger, |
input wire[`ROM_ADDRESS_WIDTH-1:0] iInitialCodeAddress, |
input wire [`INSTRUCTION_WIDTH-1:0] iIMemInput, |
|
output wire oEPU_Busy, |
output wire [`ROM_ADDRESS_WIDTH-1:0] oEntryPoint, |
output wire oTriggerIFU, |
output wire [`ROM_ADDRESS_WIDTH-1:0] oInstructionAddr |
); |
|
assign oInstructionAddr = (oTriggerIFU) ? oEntryPoint : iInitialCodeAddress; |
assign oEPU_Busy = iTrigger | oTriggerIFU; |
|
|
|
FFD_POSEDGE_ASYNC_RESET # ( 1 ) FFD1 |
( |
.Clock(Clock), |
.Clear( Reset ), |
.D(iTrigger), |
.Q(oTriggerIFU) |
); |
|
assign oEntryPoint = (oTriggerIFU) ? iIMemInput[`ROM_ADDRESS_WIDTH-1:0] : `ROM_ADDRESS_WIDTH'b0; |
|
endmodule |