URL
https://opencores.org/ocsvn/theia_gpu/theia_gpu/trunk
Subversion Repositories theia_gpu
Compare Revisions
- This comparison shows the changes necessary to convert path
/theia_gpu/trunk
- from Rev 104 to Rev 105
- ↔ Reverse comparison
Rev 104 → Rev 105
/rtl/GPU/CORES/CONTROL/Unit_Control.v
47,18 → 47,18
//`define CU_CHECK_AABBIU_REQUEST 17 |
`define CU_TRIGGER_TCC 17 |
//`define CU_CHECK_BIU_REQUEST 18 |
`define CU_TRIGGER_TFF 18 |
//`define CU_TRIGGER_TFF 18 |
//`define CU_CHECK_GEO_DONE 19 |
`define CU_WAIT_FOR_TFF 19 |
//`define CU_WAIT_FOR_TFF 19 |
`define CU_TRIGGER_AABBIU 20 |
`define CU_WAIT_FOR_AABBIU 21 |
`define CU_TRIGGER_BIU 22 |
`define CU_WAIT_FOR_BIU 23 |
`define CU_ACK_UCODE 24 |
`define CU_TRIGGER_MAIN 22 |
`define CU_WAIT_FOR_MAIN 23 |
`define CU_ACK_MAIN 24 |
`define CU_TRIGGER_PSU 25 |
`define CU_WAIT_FOR_PSU 26 |
`define CU_ACK_PSU 27 |
`define CU_TRIGGER_PCU 28 |
//`define CU_TRIGGER_PCU 28 |
`define CU_WAIT_FOR_PCU 29 |
`define CU_ACK_PCU 30 |
`define CU_CHECK_HIT 31 |
79,8 → 79,9
`define CU_WAIT_FOR_USERPIXELSHADER 46 |
`define CU_ACK_USERPIXELSHADER 47 |
`define CU_DONE 48 |
`define CU_WAIT_FOR_RENDER_ENABLE 49 |
|
`define CU_WAIT_FOR_RENDER_ENABLE 49 |
`define CU_ACK_TCC 50 |
`define CU_WAIT_FOR_HOST_DATA_AVAILABLE 51 |
//-------------------------------------------------------------- |
module ControlUnit |
( |
107,12 → 108,15
output reg oFlipMemEnabled, |
output reg oFlipMem, |
output reg oIOWritePixel, |
input wire iRenderEnable, |
input wire iRenderEnable, |
input wire iSceneTraverseComplete, |
input wire iHostDataAvailable, |
|
`ifdef DEBUG |
input wire[`MAX_CORES-1:0] iDebug_CoreID, |
`endif |
|
output reg oResultCommited, |
output reg oDone |
|
); |
135,6 → 139,8
end |
|
`endif |
|
|
|
//-------------------------------------------------------------- |
FFToggleOnce_1Bit FFTO1 |
188,7 → 194,8
oSetCurrentPitch <= 1; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
NextState <= `CU_WAIT_FOR_INITIAL_CONFIGURATION; |
214,7 → 221,8
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
if ( MST_I ) |
239,7 → 247,8
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
if ( MST_I == 0 && iRenderEnable == 1'b1) |
267,8 → 276,10
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 1; |
oDone <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
|
////$display("\n\n %d XOXOXOXOX FLIP XOXOXOXOXOX\n\n",$time); |
//oIncCurrentPitch <= 0; |
|
292,7 → 303,8
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 0; |
oDone <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
if ( iUCodeDone ) |
322,7 → 334,8
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
NextState <= `CU_WAIT_FOR_CONFIG_DATA_READ; |
352,7 → 365,8
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
if ( MST_I == 0 ) |
380,7 → 394,8
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
NextState <= `CU_WAIT_FOR_CONSTANT; |
405,7 → 420,8
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
if ( iUCodeDone ) |
434,7 → 450,8
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
NextState <= `CU_TRIGGER_USERCONSTANTS;//CU_WAIT_FOR_TASK; |
461,7 → 478,8
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
NextState <= `CU_WAIT_USERCONSTANTS; |
485,7 → 503,8
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
if ( iUCodeDone ) |
514,7 → 533,8
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
if ( iUCodeDone == 0) |
540,7 → 560,8
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
if ( iRenderEnable) |
568,7 → 589,8
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
NextState <= `CU_WAIT_FOR_RGU; |
592,7 → 614,8
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
if ( iUCodeDone ) |
617,18 → 640,20
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
if ( iUCodeDone == 0) |
NextState <= `CU_TRIGGER_GEO; |
if ( iUCodeDone == 0 & iRenderEnable == 1) |
NextState <= `CU_WAIT_FOR_HOST_DATA_AVAILABLE;//`CU_TRIGGER_GEO;///////////// GET RID OF GEO!!! |
else |
NextState <= `CU_ACK_RGU; |
|
end |
//----------------------------------------- |
//----------------------------------------- |
/* |
`CU_TRIGGER_GEO: |
begin |
|
642,19 → 667,22
oUCodeEnable <= 0; |
oGFUEnable <= 1; |
oIOWritePixel <= 0; |
rResetHitFlop <= 1; //* |
rResetHitFlop <= 1; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
NextState <= `CU_WAIT_FOR_GEO_SYNC; |
|
end |
//----------------------------------------- |
end |
*/ |
//----------------------------------------- |
/* |
`CU_WAIT_FOR_GEO_SYNC: |
begin |
|
675,13 → 703,14
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
if (iGEOSync & iTriggerAABBIURequest ) |
NextState <= `CU_TRIGGER_AABBIU; |
else if (iGEOSync & iTriggerBIURequest) |
NextState <= `CU_TRIGGER_BIU; |
NextState <= `CU_TRIGGER_MAIN; |
else if (iGEOSync & iTriggertTCCRequest ) |
NextState <= `CU_TRIGGER_TCC; |
else if (iGEOSync & iGFUDone ) |
689,7 → 718,8
else |
NextState <= `CU_WAIT_FOR_GEO_SYNC; |
|
end |
end |
*/ |
//----------------------------------------- |
`CU_TRIGGER_TCC: |
begin |
708,7 → 738,8
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 1; //We need u,v from last IO read cycle |
oFlipMem <= 0; //We need u,v from last IO read cycle |
oResultCommited <= 0; |
////$display("\n\n %d XOXOXOXOX FLIP XOXOXOXOXOX\n\n",$time); |
//oIncCurrentPitch <= 0; |
oDone <= 0; |
731,15 → 762,44
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 0; |
oDone <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
if ( iUCodeDone ) |
NextState <= `CU_ACK_UCODE; |
NextState <= `CU_ACK_TCC; |
else |
NextState <= `WAIT_FOR_TCC; |
|
end |
//----------------------------------------- |
`CU_ACK_TCC: |
begin |
|
////$display("WAIT_FOR_TCC"); |
oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_TCC; |
oUCodeEnable <= 0; //* |
oGFUEnable <= 0; |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
if ( iUCodeDone == 0 && iSceneTraverseComplete == 1'b1) //DDDD |
NextState <= `CU_TRIGGER_PSU_WITH_TEXTURE; |
else if (iUCodeDone == 0 && iSceneTraverseComplete == 1'b0) |
NextState <= `CU_WAIT_FOR_HOST_DATA_AVAILABLE; |
else |
NextState <= `CU_ACK_TCC; |
|
end |
//----------------------------------------- |
/* |
Was there any hit at all? |
758,8 → 818,9
|
`ifdef DEBUG_CU |
`LOGME"%d CORE %d Control: CU_CHECK_HIT\n",$time,iDebug_CoreID); |
`endif |
`endif |
|
|
oRamBusOwner <= `REG_BUS_OWNED_BY_GFU; |
oCodeInstructioPointer <= 0; |
oUCodeEnable <= 0; |
769,78 → 830,24
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 1; |
oDone <= 0; |
////$display("\n\n %d XOXOXOXOX FLIP XOXOXOXOXOX\n\n",$time); |
//oIncCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
|
|
|
if (wHit && !iControlRegister[`CR_EN_TEXTURE] ) |
NextState <= `CU_TRIGGER_PSU; |
else if (wHit && iControlRegister[`CR_EN_TEXTURE]) |
NextState <= `CU_TRIGGER_TFF; //Fetch the texture values from external RAM |
if (wHit) |
begin |
//$display("HIT"); |
NextState <= `CU_TRIGGER_PSU_WITH_TEXTURE; |
end |
else |
NextState <= `CU_TRIGGER_PCU; //Make sure contents of the OUT_REG are ZERo! |
NextState <= `CU_TRIGGER_USERPIXELSHADER;//666 |
|
end |
//----------------------------------------- |
/* |
Get the texture values from external RAM. |
GFU already took care of calculating the correct |
texture addresses for the 4 coordinates so now lets |
just ask for them. |
*/ |
`CU_TRIGGER_TFF: |
begin |
`ifdef DEBUG_CU |
`LOGME"%d CORE %d Control: CU_TRIGGER_TFF\n",$time,iDebug_CoreID); |
`endif |
|
////$display("CU_TRIGGER_TFF"); |
|
oRamBusOwner <= `REG_BUS_OWNED_BY_GFU; |
oCodeInstructioPointer <= 0; |
oUCodeEnable <= 0; |
oGFUEnable <= 1; |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 1; //* |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 0; //NO need, we did this n check hit |
//oIncCurrentPitch <= 0; |
oDone <= 0; |
|
NextState <= `CU_WAIT_FOR_TFF; |
end |
//----------------------------------------- |
`CU_WAIT_FOR_TFF: |
begin |
`ifdef DEBUG_CU |
`LOGME"%d CORE %d Control: CU_WAIT_FOR_TFF\n",$time,iDebug_CoreID); |
`endif |
oRamBusOwner <= `REG_BUS_OWNED_BY_GFU; |
oCodeInstructioPointer <= 0; |
oUCodeEnable <= 0; |
oGFUEnable <= 0; //Changed Aug 14 |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 0; |
oDone <= 0; |
|
//oIncCurrentPitch <= 0; |
|
if (iTFFDone) |
NextState <= `CU_TRIGGER_PSU_WITH_TEXTURE; |
else |
NextState <= `CU_WAIT_FOR_TFF; |
|
end |
//----------------------------------------- |
`CU_TRIGGER_PSU_WITH_TEXTURE: |
begin |
|
853,114 → 860,80
oUCodeEnable <= 1; |
oGFUEnable <= 0; |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rResetHitFlop <= 1; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 1; |
oDone <= 0; |
oFlipMemEnabled <= 0;//////NEW NEW NEW NEW |
oFlipMem <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
////$display("\n\n %d XOXOXOXOX FLIP XOXOXOXOXOX\n\n",$time); |
//oIncCurrentPitch <= 0; |
|
NextState <= `CU_WAIT_FOR_PSU; |
end |
//----------------------------------------- |
`CU_TRIGGER_AABBIU: |
begin |
|
`ifdef DEBUG_CU |
`LOGME"%d Control: CU_TRIGGER_AABBIU\n",$time); |
`endif |
// $stop(); |
end |
//----------------------------------------- |
//Wait until data from Host becomes available |
`CU_WAIT_FOR_HOST_DATA_AVAILABLE: |
begin |
oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
|
oCodeInstructioPointer <=`ENTRYPOINT_INDEX_AABBIU; |
oUCodeEnable <= 1; |
oGFUEnable <= 1; |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 1; |
oDone <= 0; |
////$display("\n\n %d XOXOXOXOX FLIP XOXOXOXOXOX\n\n",$time); |
//oIncCurrentPitch <= 0; |
|
NextState <= `CU_WAIT_FOR_AABBIU; |
end |
//----------------------------------------- |
`CU_WAIT_FOR_AABBIU: |
begin |
|
|
// `ifdef DEBUG_CU |
// `LOGME"%d Control: CU_WAIT_FOR_AABBIU\n",$time); |
// `endif |
|
|
// //$display("iUCodeDone",iUCodeDone); |
|
oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_AABBIU; |
oCodeInstructioPointer <= 0; |
oUCodeEnable <= 0; |
oGFUEnable <= 1; |
oGFUEnable <= 0; |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
//oIncCurrentPitch <= 0; |
|
if ( iUCodeDone ) |
begin |
// //$display("iUCodeDone\n",iUCodeDone); |
// $stop(); |
NextState <= `CU_ACK_UCODE; |
end |
else |
NextState <= `CU_WAIT_FOR_AABBIU; |
oDone <= 0; |
oResultCommited <= 0; |
|
if ( iHostDataAvailable ) |
NextState <= `CU_TRIGGER_MAIN; |
else |
NextState <= `CU_WAIT_FOR_HOST_DATA_AVAILABLE; |
|
|
end |
//----------------------------------------- |
`CU_TRIGGER_BIU: |
`CU_TRIGGER_MAIN: |
begin |
`ifdef DEBUG_CU |
`LOGME"%d CORE: %d Control: CU_TRIGGER_BIU\n",$time,iDebug_CoreID); |
`LOGME"%d CORE: %d Control: CU_TRIGGER_MAIN\n",$time,iDebug_CoreID); |
`endif |
|
oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_BIU; |
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_MAIN; |
oUCodeEnable <= 1; |
oGFUEnable <= 1; |
oIOWritePixel <= 0; |
rResetHitFlop <= 0;//1; |
rHitFlopEnable <= 1; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 1; |
oDone <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
////$display("\n\n %d XOXOXOXOX FLIP XOXOXOXOXOX\n\n",$time); |
//oIncCurrentPitch <= 0; |
// $stop(); |
|
NextState <= `CU_WAIT_FOR_BIU; |
NextState <= `CU_WAIT_FOR_MAIN; |
|
end |
//----------------------------------------- |
`CU_WAIT_FOR_BIU: |
`CU_WAIT_FOR_MAIN: |
begin |
// `ifdef DEBUG_CU |
// `LOGME"%d Control: CU_WAIT_FOR_BIU\n",$time); |
// `LOGME"%d Control: CU_WAIT_FOR_MAIN\n",$time); |
// `endif |
|
oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_BIU; |
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_MAIN; |
oUCodeEnable <= 0; |
oGFUEnable <= 1; |
oIOWritePixel <= 0; |
970,22 → 943,27
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 0; |
oDone <= 0; |
//oIncCurrentPitch <= 0; |
|
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
//NextState <= `CU_WAIT_FOR_MAIN; |
|
|
if ( iUCodeDone ) |
NextState <= `CU_ACK_UCODE; |
NextState <= `CU_ACK_MAIN; |
else |
NextState <= `CU_WAIT_FOR_BIU; |
NextState <= `CU_WAIT_FOR_MAIN; |
|
end |
//----------------------------------------- |
/* |
ACK UCODE by setting oUCodeEnable = 0 |
*/ |
`CU_ACK_UCODE: |
`CU_ACK_MAIN: |
begin |
`ifdef DEBUG_CU |
`LOGME"%d CORE: %d Control: CU_ACK_UCODE\n",$time, iDebug_CoreID); |
`LOGME"%d CORE: %d Control: CU_ACK_MAIN\n",$time, iDebug_CoreID); |
`endif |
|
oRamBusOwner <= `REG_BUS_OWNED_BY_GFU; |
994,50 → 972,28
oGFUEnable <= 0; //Changed Aug 15 |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
rHitFlopEnable <= 1; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
// $stop(); |
|
if ( iUCodeDone == 0 ) |
NextState <= `CU_WAIT_FOR_GEO_SYNC; |
|
if ( iUCodeDone == 1'b0 & iSceneTraverseComplete == 1'b1) |
NextState <= `CU_CHECK_HIT; |
else if ( iUCodeDone == 1'b0 & iSceneTraverseComplete == 1'b0) //ERROR!!! What if iSceneTraverseComplete will become 1 a cycle after this?? |
NextState <= `CU_TRIGGER_MAIN; |
else |
NextState <= `CU_ACK_UCODE; |
NextState <= `CU_ACK_MAIN; |
|
|
|
end |
//----------------------------------------- |
/* |
Here we no longer use GFU so set Enable to zero |
*/ |
`CU_TRIGGER_PSU: |
begin |
`ifdef DEBUG_CU |
`LOGME"%d CORE: %d Control: CU_TRIGGER_PSU\n",$time, iDebug_CoreID); |
`endif |
|
oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_PSU; |
oUCodeEnable <= 1; |
oGFUEnable <= 0;//* |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 1; |
oDone <= 0; |
////$display("\n\n %d XOXOXOXOX FLIP XOXOXOXOXOX\n\n",$time); |
//oIncCurrentPitch <= 0; |
|
|
NextState <= `CU_WAIT_FOR_PSU; |
end |
//----------------------------------------- |
`CU_WAIT_FOR_PSU: |
begin |
|
1056,7 → 1012,8
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 0; |
oDone <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
|
1084,7 → 1041,8
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 0; |
oDone <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
if ( iUCodeDone == 0) |
1095,121 → 1053,14
|
end |
//----------------------------------------- |
/* |
Trigger the Pixel Commit. |
*/ |
`CU_TRIGGER_PCU: |
begin |
|
`ifdef DEBUG_CU |
`LOGME"%d CORE: %d Control: CU_TRIGGER_PCU\n",$time, iDebug_CoreID); |
`endif |
|
oRamBusOwner <= `REG_BUS_OWNED_BY_GFU; |
oCodeInstructioPointer <= 0; //* |
oUCodeEnable <= 0; //* |
oGFUEnable <= 0; |
oIOWritePixel <= 1; //* |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 1; |
oDone <= 0; |
////$display("\n\n %d XOXOXOXOX FLIP XOXOXOXOXOX\n\n",$time); |
//oIncCurrentPitch <= 0; |
|
|
NextState <= `CU_SET_PICTH; |
|
end |
//----------------------------------------- |
`CU_SET_PICTH: |
begin |
|
`ifdef DEBUG_CU |
`LOGME"%d CORE: %d Control: CU_SET_PICTH\n",$time, iDebug_CoreID); |
`endif |
|
|
oRamBusOwner <= `REG_BUS_OWNED_BY_GFU; |
oCodeInstructioPointer <= 0; //* |
oUCodeEnable <= 0; //* |
oGFUEnable <= 0; |
oIOWritePixel <= 1; //* |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 1; //* |
oFlipMemEnabled <= 1; |
oFlipMem <= 0; |
oDone <= 0; |
//oIncCurrentPitch <= 0; |
|
|
NextState <= `CU_WAIT_FOR_PCU; |
end |
//----------------------------------------- |
`CU_WAIT_FOR_PCU: |
begin |
|
// `ifdef DEBUG_CU |
// `LOGME"%d Control: CU_WAIT_FOR_PCU\n",$time); |
// `endif |
|
oRamBusOwner <= `REG_BUS_OWNED_BY_GFU; |
oCodeInstructioPointer <= 0; //* |
oUCodeEnable <= 0; //* |
oGFUEnable <= 0; |
oIOWritePixel <= 1; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 0; |
oDone <= 0; |
//oIncCurrentPitch <= 0; |
|
if ( iIODone ) |
NextState <= `CU_ACK_PCU; |
else |
NextState <= `CU_WAIT_FOR_PCU; |
|
end |
//----------------------------------------- |
`CU_ACK_PCU: |
begin |
|
`ifdef DEBUG_CU |
`LOGME"%d CORE: %d Control: CU_ACK_PCU\n",$time, iDebug_CoreID); |
`endif |
|
oRamBusOwner <= `REG_BUS_OWNED_BY_GFU; |
oCodeInstructioPointer <= 0; //* |
oUCodeEnable <= 0; //* |
oGFUEnable <= 0; |
oIOWritePixel <= 0; |
rResetHitFlop <= 0; |
rHitFlopEnable <= 0; |
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 0; |
oDone <= 0; |
//oIncCurrentPitch <= 0; |
|
NextState <= `CU_TRIGGER_NPU; |
|
end |
//----------------------------------------- |
`CU_TRIGGER_NPU: //Next Pixel Unit |
begin |
`ifdef DEBUG_CU |
`LOGME"%d CORE: %d Control: CU_TRIGGER_NPU\n",$time, iDebug_CoreID); |
`endif |
|
$write("*"); |
|
oRamBusOwner <= `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer <= `ENTRYPOINT_INDEX_NPG; //* |
1222,7 → 1073,8
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 0; |
oDone <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
NextState <= `CU_WAIT_NPU; |
1241,7 → 1093,8
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 0; |
oDone <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
if ( iUCodeDone ) |
1272,7 → 1125,8
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 0; |
oDone <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
if ( iUCodeDone == 0 && iUCodeReturnValue == 1) |
1297,8 → 1151,9
oTriggerTFF <= 0; |
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 1; |
oFlipMem <= 1; |
oDone <= 1; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
|
1326,7 → 1181,8
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 0; |
oDone <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
|
1351,7 → 1207,8
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 0; |
oDone <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
|
1379,11 → 1236,12
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 1; |
oFlipMem <= 0; |
oDone <= 0; |
oDone <= 0; |
oResultCommited <= 1; |
//oIncCurrentPitch <= 0; |
|
if ( iUCodeDone == 0) |
NextState <= `CU_TRIGGER_PCU; |
NextState <= `CU_TRIGGER_NPU;//`CU_TRIGGER_PCU; |
else |
NextState <= `CU_ACK_USERPIXELSHADER; |
|
1394,7 → 1252,7
begin |
|
`ifdef DEBUG_CU |
`LOGME"%d Control: ERRO Undefined State\n",$time); |
`LOGME"%d Control: ERROR Undefined State\n",$time); |
`endif |
|
oRamBusOwner <= 0; |
1408,7 → 1266,8
oSetCurrentPitch <= 0; |
oFlipMemEnabled <= 0; |
oFlipMem <= 0; |
oDone <= 0; |
oDone <= 0; |
oResultCommited <= 0; |
//oIncCurrentPitch <= 0; |
|
NextState <= `CU_AFTER_RESET_STATE; |
/rtl/GPU/CORES/EXE/Module_ExecutionFSM.v
62,10 → 62,11
input wire [`WIDTH-1:0] iALUResultZ, |
input wire iALUOutputReady, |
input wire iBranchTaken, |
input wire iBranchNotTaken, |
input wire iBranchNotTaken, |
|
|
`ifdef DEBUG |
input wire[`ROM_ADDRESS_WIDTH-1:0] iDebug_CurrentIP, |
input wire[`ROM_ADDRESS_WIDTH-1:0] iDebug_CurrentIP, |
input wire [`MAX_CORES-1:0] iDebug_CoreID, |
`endif |
//Data forward Signals |
497,6 → 498,11
`INCX: `LOGME"INCX"); |
`INCY: `LOGME"INCY"); |
`INCZ: `LOGME"INCZ"); |
`OMWRITE: `LOGME"OMWRITE"); |
`TMREAD: `LOGME"TMREAD"); |
`LEA: `LOGME"LEA"); |
`CALL: `LOGME"CALL"); |
`RET: `LOGME"RET"); |
`DEBUG_PRINT: |
begin |
`LOGME"DEBUG_PRINT"); |
/rtl/GPU/CORES/EXE/Module_InstructionFetch.v
44,26 → 44,67
output wire [`ROM_ADDRESS_WIDTH-1:0] oIP2, //calcule both decide later |
output wire[`INSTRUCTION_WIDTH-1:0] oCurrentInstruction, |
input wire iEXEDone, |
output wire oMicroCodeReturnValue, |
output wire oMicroCodeReturnValue, |
input wire iSubroutineReturn, |
//input wire [`ROM_ADDRESS_WIDTH-1:0] iReturnAddress, |
output wire oExecutionDone |
); |
`define INSTRUCTION_OPCODE oCurrentInstruction[`INSTRUCTION_WIDTH-1:`INSTRUCTION_WIDTH-`INSTRUCTION_OP_LENGTH] |
//iInstruction1[`INSTRUCTION_WIDTH-1:`INSTRUCTION_WIDTH-`INSTRUCTION_OP_LENGTH] |
|
|
assign oMicroCodeReturnValue = oCurrentInstruction[0]; |
assign oIP2 = oCurrentInstruction[47:32];//iInstruction1[47:32]; |
assign oIP2 = oCurrentInstruction[47:32]; |
|
wire wTriggerDelay1,wTriggerDelay2,wIncrementIP_Delay1,wIncrementIP_Delay2, |
wLastInst_Delay1,wLastInst_Delay2; |
wire wIncrementIP,wLastInstruction; |
wire wInstructionAvalable,wSubReturnDelay1,wSubReturnDelay2; |
|
assign wLastInstruction = (`INSTRUCTION_OPCODE == `RETURN ); |
|
wire IsCall; |
reg [`ROM_ADDRESS_WIDTH-1:0] rReturnAddress; |
assign IsCall = ( `INSTRUCTION_OPCODE == `CALL ) ? 1'b1 : 1'b0; |
always @ (posedge IsCall) |
rReturnAddress <= oIP+1; |
|
assign wLastInstruction = (`INSTRUCTION_OPCODE == `RETURN); |
//Increment IP 2 cycles after trigger or everytime EXE is done, or 2 cycles after return from sub, but stop if we get to the RETURN |
assign wIncrementIP = wTriggerDelay2 | (iEXEDone & ~wLastInstruction) | wSubReturnDelay2; |
//It takes 1 clock cycle to read the instruction back from IMEM |
|
|
//Instructions become available to IDU: |
//* 2 cycles after IFU is initially triggered |
//* Everytime previous instruction execution is complete except for the last instruction in |
//the flow |
assign wInstructionAvalable = wTriggerDelay2 | (iEXEDone & ~wLastInst_Delay2); |
|
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD22 |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable(1'b1), |
.D( iSubroutineReturn ), |
.Q( wSubReturnDelay1 ) |
); |
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD23 |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable(1'b1), |
.D( wSubReturnDelay1 ), |
.Q( wSubReturnDelay2 ) |
); |
//Special case for instruction available pin: if a return from subroutine instruction was issued, |
//then wait 1 cycle before anouncing Instruction available to IDU |
assign oInstructionAvalable = wInstructionAvalable & ~iSubroutineReturn | wSubReturnDelay2; |
|
|
|
|
|
//Increment IP 2 cycles after trigger or everytime EXE is done, but stop if we get to the RETURN |
assign wIncrementIP = wTriggerDelay2 | (iEXEDone & ~wLastInstruction); |
//It takes 1 clock cycle to read the instruction back from IMEM |
assign oInstructionAvalable = wTriggerDelay2 | (iEXEDone & ~wLastInst_Delay2); |
//Once we reach the last instruction, wait until EXE says he is done, then assert oExecutionDone |
assign oExecutionDone = (wLastInstruction & iEXEDone); |
|
139,9 → 180,9
); |
|
|
assign wCurrentInstruction_BranchTaken = (iBranchTaken ) ? iInstruction2 : iInstruction1; |
assign wCurrentInstruction_BranchTaken = ( iBranchTaken & ~iSubroutineReturn) ? iInstruction2 : iInstruction1; |
|
assign oCurrentInstruction = (wBranchTaken_Delay1) ? |
assign oCurrentInstruction = (wBranchTaken_Delay1 ) ? |
wCurrentInstruction_Delay1 : wCurrentInstruction_BranchTaken; |
|
INCREMENT # (`ROM_ADDRESS_WIDTH) INC1 |
153,13 → 194,17
); |
|
wire[`ROM_ADDRESS_WIDTH-1:0] wIPEntryPoint; |
assign wIPEntryPoint = (iBranchTaken) ? oIP2_Next : iInitialCodeAddress; |
//assign wIPEntryPoint = (iBranchTaken) ? oIP2_Next : iInitialCodeAddress; |
|
//iReturnAddress is a register stored @ IDU everytime a CALL instruction is decoded |
assign wIPEntryPoint = (iBranchTaken & ~wBranchTaken_Delay1) ? (iSubroutineReturn) ? rReturnAddress : oIP2_Next : iInitialCodeAddress; |
|
|
UPCOUNTER_POSEDGE # (`ROM_ADDRESS_WIDTH) InstructionPointer |
( |
.Clock( Clock ), |
.Reset(iTrigger | iBranchTaken), |
.Enable(wIncrementIP & ~iBranchTaken ), |
.Reset(iTrigger | (iBranchTaken & ~wBranchTaken_Delay1)), |
.Enable(wIncrementIP & (~iBranchTaken | wBranchTaken_Delay1 ) ), |
.Initial( wIPEntryPoint ), |
.Q(oIP) |
); |
/rtl/GPU/CORES/EXE/Unit_EXE.v
45,6 → 45,15
output wire [`DATA_ROW_WIDTH-1:0] oDataBus, |
output wire oReturnCode, |
|
|
output wire [`DATA_ROW_WIDTH-1:0] oOMEMWriteAddress, |
output wire [`DATA_ROW_WIDTH-1:0] oOMEMWriteData, |
output wire oOMEMWriteEnable, |
output wire [`DATA_ROW_WIDTH-1:0] oTMEMReadAddress, |
input wire [`DATA_ROW_WIDTH-1:0] iTMEMReadData, |
input wire iTMEMDataAvailable, |
output wire oTMEMDataRequest, |
|
`ifdef DEBUG |
input wire [`MAX_CORES-1:0] iDebug_CoreID, |
`endif |
59,7 → 68,7
`ifdef DEBUG |
wire [`ROM_ADDRESS_WIDTH-1:0] wDEBUG_IDU2_EXE_InstructionPointer; |
`endif |
|
|
wire wEXE2__uCodeDone; |
wire wEXE2_IFU__EXEBusy; |
wire [`DATA_ADDRESS_WIDTH-1:0] wEXE2_IDU_DataFordward_LastDestination; |
90,9 → 99,10
wire wEXE2_ALU__TriggerALU; |
wire ALU2OutputReady; |
wire w2FIU__BranchTaken; |
wire [`ROM_ADDRESS_WIDTH-1:0] JumpIp; |
wire [`ROM_ADDRESS_WIDTH-1:0] JumpIp; |
wire [`ROM_ADDRESS_WIDTH-1:0] wIDU2_IFU_ReturnAddress; |
wire wALU2_IFU_ReturnFromSub; |
|
|
//wire wIDU2_IFU__InputsLatched; |
|
wire wEPU_Busy,wTriggerIFU; |
124,14 → 134,16
.iInstruction1( iInstruction1 ), |
.iInstruction2( iInstruction2 ), |
.iInitialCodeAddress( wCodeEntryPoint ), |
.iBranchTaken( w2FIU__BranchTaken ), |
.oCurrentInstruction( CurrentInstruction ), |
.oInstructionAvalable( wInstructionAvailable ), |
.oIP( wIFU_IP ), |
.oIP2( oInstructionPointer2 ), |
.iEXEDone( ALU2OutputReady ), |
.oMicroCodeReturnValue( oReturnCode ), |
.oExecutionDone( oDone ) |
.iBranchTaken( w2FIU__BranchTaken ), |
.iSubroutineReturn( wALU2_IFU_ReturnFromSub ), |
//.iReturnAddress( wIDU2_IFU_ReturnAddress ), |
.oCurrentInstruction( CurrentInstruction ), |
.oInstructionAvalable( wInstructionAvailable ), |
.oIP( wIFU_IP ), |
.oIP2( oInstructionPointer2 ), |
.iEXEDone( ALU2OutputReady ), |
.oMicroCodeReturnValue( oReturnCode ), |
.oExecutionDone( oDone ) |
); |
|
////--------------------------------------------------------- |
143,7 → 155,9
.Clock( Clock ), |
.Reset( Reset ), |
.iEncodedInstruction( CurrentInstruction ), |
.iInstructionAvailable( wInstructionAvailable ), |
.iInstructionAvailable( wInstructionAvailable ), |
//.iIP( oInstructionPointer1 ), |
//.oReturnAddress( wIDU2_IFU_ReturnAddress ), |
|
.oRamAddress0( oDataReadAddress0 ), |
.oRamAddress1( oDataReadAddress1 ), |
163,7 → 177,7
.iDebug_CurrentIP( oInstructionPointer1 ), |
.oDebug_CurrentIP( wDEBUG_IDU2_EXE_InstructionPointer ), |
`endif |
|
|
.oDataReadyForExe( wIDU2_EXE_DataReady ) |
|
|
181,7 → 195,8
.iOperation( wOperation ), |
.iDestination( wDestination ), |
.iSource0( wSource0 ), |
.iSource1( wSource1 ) , |
.iSource1( wSource1 ) , |
|
|
`ifdef DEBUG |
.iDebug_CurrentIP( wDEBUG_IDU2_EXE_InstructionPointer ), |
235,8 → 250,21
.oResultB( ALU2ResultB ), |
.oResultC( ALU2ResultC ), |
.oBranchTaken( wALU2_EXE__BranchTaken ), |
.oBranchNotTaken( wALU2_IFU_BranchNotTaken ), |
.iInputReady( wEXE2_ALU__TriggerALU ), |
.oBranchNotTaken( wALU2_IFU_BranchNotTaken ), |
.oReturnFromSub( wALU2_IFU_ReturnFromSub ), |
.iInputReady( wEXE2_ALU__TriggerALU ), |
|
//*********** |
.oOMEMWriteAddress( oOMEMWriteAddress ), |
.oOMEMWriteData( oOMEMWriteData ), |
.oOMEM_WriteEnable( oOMEMWriteEnable ), |
|
.oTMEMReadAddress( oTMEMReadAddress ), |
.iTMEMReadData( iTMEMReadData ), |
.iTMEMDataAvailable( iTMEMDataAvailable ), |
.oTMEMDataRequest( oTMEMDataRequest ), |
//*********** |
.iCurrentIP( oInstructionPointer1 ), |
.OutputReady( ALU2OutputReady ) |
|
); |
/rtl/GPU/CORES/EXE/Module_VectorALU.v
39,12 → 39,29
output wire [`WIDTH-1:0] oResultC, |
input wire iInputReady, |
output reg oBranchTaken, |
output reg oBranchNotTaken, |
output reg oBranchNotTaken, |
output reg oReturnFromSub, |
input wire [`ROM_ADDRESS_WIDTH-1:0] iCurrentIP, |
|
//Connections to the O Memory |
output wire [`DATA_ROW_WIDTH-1:0] oOMEMWriteAddress, |
output wire [`DATA_ROW_WIDTH-1:0] oOMEMWriteData, |
output wire oOMEM_WriteEnable, |
//Connections to the R Memory |
output wire [`DATA_ROW_WIDTH-1:0] oTMEMReadAddress, |
input wire [`DATA_ROW_WIDTH-1:0] iTMEMReadData, |
input wire iTMEMDataAvailable, |
output wire oTMEMDataRequest, |
|
output reg OutputReady |
|
); |
|
wire wMultiplcationUnscaled; |
|
|
|
|
wire wMultiplcationUnscaled; |
assign wMultiplcationUnscaled = (iOperation == `IMUL) ? 1'b1 : 1'b0; |
|
//-------------------------------------------------------------- |
113,41 → 130,89
); |
//--------------------------------------------------------------------- |
wire [`LONG_WIDTH-1:0] wModulus2N_ResultA,wModulus2N_ResultB,wModulus2N_ResultC; |
//wire wModulusOutputReadyA,wModulusOutputReadyB,wModulusOutputReadyC; |
|
/* |
Modulus2N MODA |
//---------------------------------------------------------------------( |
|
wire IOW_Operation,wOMEM_We; |
assign IOW_Operation = (iOperation == `OMWRITE); |
|
always @ ( * ) |
begin |
if (iOperation == `RET) |
oReturnFromSub <= OutputReady; |
else |
oReturnFromSub <= 1'b0; |
|
end |
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD1_AWE |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.oQuotient( wModulus2N_ResultA ), |
.iInputReady( iInputReady ), |
.oOutputReady( wModulusOutputReadyA ) |
.Reset( Reset), |
.Enable( 1'b1 ), |
.D( IOW_Operation ), |
.Q( wOMEM_We ) |
); |
|
Modulus2N MODB |
assign oOMEM_WriteEnable = wOMEM_We & IOW_Operation; |
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( `DATA_ROW_WIDTH ) FFD1_A |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.oQuotient( wModulus2N_ResultB ), |
.iInputReady( iInputReady ), |
.oOutputReady( wModulusOutputReadyB ) |
.Reset( Reset), |
.Enable( iInputReady ), |
.D( {iChannel_Ax,iChannel_Ay,iChannel_Az} ), |
.Q( oOMEMWriteAddress) |
); |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `DATA_ROW_WIDTH ) FFD2_B |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( iInputReady ), |
.D( {iChannel_Bx,iChannel_By,iChannel_Bz} ), |
.Q( oOMEMWriteData ) |
); |
|
Modulus2N MODC |
|
|
wire wTMReadOutputReady; |
assign wTMReadOutputReady = iTMEMDataAvailable; |
/* |
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD1_ARE |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.oQuotient( wModulus2N_ResultC ), |
.iInputReady( iInputReady ), |
.oOutputReady( wModulusOutputReadyC ) |
.Reset( Reset), |
.Enable( 1'b1 ), |
.D( iTMEMDataAvailable ), |
.Q( wTMReadOutputReady ) |
); |
*/ |
//---------------------------------------------------------------------( |
//assign oTMEMReadAddress = {iChannel_Ax,iChannel_Ay,iChannel_Az}; |
|
//We wait 1 clock cycle before be send the data read request, because |
//we need to lathc the values at the output |
|
wire wOpTRead; |
assign wOpTRead = ( iOperation == `TMREAD ) ? 1'b1 : 1'b0; |
wire wTMEMRequest; |
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD1_ARE123 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( 1'b1 ), |
.D( wOpTRead ), |
.Q( wTMEMRequest ) |
); |
assign oTMEMDataRequest = wTMEMRequest & wOpTRead; |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `DATA_ROW_WIDTH ) FFD2_B445 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( iInputReady & wOpTRead ), |
.D( {iChannel_Ax,iChannel_Ay,iChannel_Az} ), |
.Q( oTMEMReadAddress ) |
); |
|
|
/* |
This MUX will select the apropiated X,Y or Z depending on |
wheter it is XYZ iOperation. This gets defined by the bits 3 and 4 |
212,7 → 277,7
.Reset( Reset ), |
.A( wMultiplicationA_Ax ), |
.B( wMultiplicationA_Bx ), |
.R( wMultiplicationA_Result ), |
.R( wMultiplicationA_Result ), |
.iUnscaled( wMultiplcationUnscaled ), |
.iInputReady( wMultiplicationA_InputReady ), |
.OutputReady( wMultiplicationA_OutputReady ) |
271,7 → 336,7
.Reset( Reset ), |
.A( wMultiplicationB_Ay ), |
.B( wMultiplicationB_By ), |
.R( wMultiplicationB_Result ), |
.R( wMultiplicationB_Result ), |
.iUnscaled( wMultiplcationUnscaled ), |
.iInputReady( wMultiplicationB_InputReady ), |
.OutputReady( wMultiplicationB_OutputReady ) |
324,7 → 389,7
.Reset( Reset ), |
.A( wMultiplicationC_Az ), |
.B( wMultiplicationC_Bz ), |
.R( wMultiplicationC_Result ), |
.R( wMultiplicationC_Result ), |
.iUnscaled( wMultiplcationUnscaled ), |
.iInputReady( wMultiplicationC_InputReady ), |
.OutputReady( wMultiplicationC_OutputReady ) |
378,7 → 443,7
.Reset( Reset ), |
.A( wMultiplicationD_Aw ), |
.B( wMultiplicationD_Bw ), |
.R( wMultiplicationD_Result ), |
.R( wMultiplicationD_Result ), |
.iUnscaled( wMultiplcationUnscaled ), |
.iInputReady( wMultiplicationD_InputReady ), |
.OutputReady( wMultiplicationD_OutputReady ) |
419,7 → 484,7
.Reset( Reset ), |
.A( wMultiplicationE_Ak ), |
.B( wMultiplicationE_Bk ), |
.R( wMultiplicationE_Result ), |
.R( wMultiplicationE_Result ), |
.iUnscaled( wMultiplcationUnscaled ), |
.iInputReady( wMultiplicationE_InputReady ), |
.OutputReady( wMultiplicationE_OutputReady ) |
461,7 → 526,7
.Reset( Reset ), |
.A( wMultiplicationF_Al ), |
.B( wMultiplicationF_Bl ), |
.R( wMultiplicationF_Result ), |
.R( wMultiplicationF_Result ), |
.iUnscaled( wMultiplcationUnscaled ), |
.iInputReady( wMultiplicationF_InputReady ), |
.OutputReady( wMultiplicationF_OutputReady ) |
866,13 → 931,15
`MAG: ResultA = wSquareRoot_Result; |
`ZERO: ResultA = 32'b0; |
`COPY: ResultA = iChannel_Ax; |
`TMREAD: ResultA = iTMEMReadData[95:64]; |
`LEA: ResultA = {16'b0,iCurrentIP}; |
|
`SWIZZLE3D: ResultA = wSwizzleOutputX; |
|
//Set Operations |
`UNSCALE: ResultA = iChannel_Ax >> `SCALE; |
`SETX: ResultA = iChannel_Ax; |
`SETY: ResultA = iChannel_Bx; |
`SETX,`RET: ResultA = iChannel_Ax; |
`SETY: ResultA = iChannel_Bx; |
`SETZ: ResultA = iChannel_Bx; |
`INC,`INCX,`INCY,`INCZ: ResultA = (wAddSubA_Result[63] == 1'b1) ? { 1'b1,wAddSubA_Result[30:0]} : {1'b0,wAddSubA_Result[30:0]}; |
`DEC: ResultA = (wAddSubA_Result[63] == 1'b1) ? { 1'b1,wAddSubA_Result[30:0]} : {1'b0,wAddSubA_Result[30:0]}; |
908,10 → 975,12
`MAG: ResultB = wSquareRoot_Result; |
`ZERO: ResultB = 32'b0; |
`COPY: ResultB = iChannel_Ay; |
`TMREAD: ResultB = iTMEMReadData[63:32]; |
`LEA: ResultB = {16'b0,iCurrentIP}; |
|
//Set Operations |
`UNSCALE: ResultB = iChannel_Ay >> `SCALE; |
`SETX: ResultB = iChannel_By; // {Source1[95:64],Source0[63:32],Source0[31:0]}; |
`SETX,`RET: ResultB = iChannel_By; // {Source1[95:64],Source0[63:32],Source0[31:0]}; |
`SETY: ResultB = iChannel_Ax; // {Source0[95:64],Source1[95:64],Source0[31:0]}; |
`SETZ: ResultB = iChannel_By; // {Source0[95:64],Source0[63:32],Source1[95:64]}; |
|
951,12 → 1020,14
`MAG: ResultC = wSquareRoot_Result; |
`ZERO: ResultC = 32'b0; |
`COPY: ResultC = iChannel_Az; |
`TMREAD: ResultC = iTMEMReadData[31:0]; |
`LEA: ResultC = {16'b0,iCurrentIP}; |
|
`SWIZZLE3D: ResultC = wSwizzleOutputZ; |
|
//Set Operations |
`UNSCALE: ResultC = iChannel_Az >> `SCALE; |
`SETX: ResultC = iChannel_Bz; // {Source1[95:64],Source0[63:32],Source0[31:0]}; |
`SETX,`RET: ResultC = iChannel_Bz; // {Source1[95:64],Source0[63:32],Source0[31:0]}; |
`SETY: ResultC = iChannel_Bz; // {Source0[95:64],Source1[95:64],Source0[31:0]}; |
`SETZ: ResultC = iChannel_Ax; // {Source0[95:64],Source0[63:32],Source1[95:64]}; |
|
983,7 → 1054,7
always @ ( * ) |
begin |
case (iOperation) |
`JMP: oBranchTaken = 1; |
`JMP,`CALL,`RET: oBranchTaken = OutputReady; |
`JGX: oBranchTaken = wArithmeticComparison_Result; |
`JGY: oBranchTaken = wArithmeticComparison_Result; |
`JGZ: oBranchTaken = wArithmeticComparison_Result; |
1017,7 → 1088,7
begin |
case (iOperation) |
|
`JMP,`JGX,`JGY,`JGZ,`JLX,`JLY,`JLZ,`JEQX,`JEQY,`JEQZ, |
`JMP,`CALL,`RET,`JGX,`JGY,`JGZ,`JLX,`JLY,`JLZ,`JEQX,`JEQY,`JEQZ, |
`JNEX,`JNEY,`JNEZ,`JGEX,`JGEY,`JGEZ: oBranchNotTaken = !oBranchTaken && OutputReady; |
`JLEX: oBranchNotTaken = !oBranchTaken && OutputReady; |
`JLEY: oBranchNotTaken = !oBranchTaken && OutputReady; |
1085,7 → 1156,7
|
|
//------------------------------------------------------------------------ |
wire wOutputDelay1Cycle; |
wire wOutputDelay1Cycle,wOutputDelay2Cycle,wOutputDelay3Cycle; |
|
|
FFD_POSEDGE_ASYNC_RESET # (1) FFOutputReadyDelay2 |
1096,6 → 1167,23
.Q( wOutputDelay1Cycle ) |
); |
|
FFD_POSEDGE_ASYNC_RESET # (1) FFOutputReadyDelay22 |
( |
.Clock( Clock ), |
.Clear( Reset ), |
.D( wOutputDelay1Cycle ), |
.Q( wOutputDelay2Cycle ) |
); |
|
|
FFD_POSEDGE_ASYNC_RESET # (1) FFOutputReadyDelay222 |
( |
.Clock( Clock && wOperation == `OMWRITE), |
.Clear( Reset ), |
.D( wOutputDelay2Cycle ), |
.Q( wOutputDelay3Cycle ) |
); |
|
wire [`INSTRUCTION_OP_LENGTH-1:0] wOperation; |
|
|
1119,6 → 1207,8
`NOP: OutputReady = wOutputDelay1Cycle; |
`FRAC: OutputReady = wOutputDelay1Cycle; |
`NEG: OutputReady = wOutputDelay1Cycle; |
`OMWRITE: OutputReady = wOutputDelay3Cycle; |
`TMREAD: OutputReady = wTMReadOutputReady; //One cycle after TMEM data availale asserted |
|
`ifdef DEBUG |
//Debug Print behaves as a NOP in terms of ALU... |
1153,7 → 1243,8
|
`SWIZZLE3D: OutputReady = wOutputDelay1Cycle; |
|
`SETX,`SETY,`SETZ,`JMP: OutputReady = wOutputDelay1Cycle; |
`SETX,`SETY,`SETZ,`JMP,`LEA,`CALL,`RET: OutputReady = wOutputDelay1Cycle; |
|
|
|
`JGX,`JGY,`JGZ: OutputReady = ArithmeticComparison_OutputReady; |
/rtl/GPU/CORES/EXE/Module_InstructionDecode.v
38,7 → 38,10
`ifdef DEBUG |
input wire [`ROM_ADDRESS_WIDTH-1:0] iDebug_CurrentIP, |
output wire [`ROM_ADDRESS_WIDTH-1:0] oDebug_CurrentIP, |
`endif |
`endif |
|
//input wire [`ROM_ADDRESS_WIDTH-1:0] iIP, |
//output reg [`ROM_ADDRESS_WIDTH-1:0] oReturnAddress, |
output wire oDataReadyForExe |
|
); |
91,8 → 94,25
.D( iInstructionAvailable ), |
.Q( oDataReadyForExe ) |
); |
|
/* |
wire IsCall; |
assign IsCall = ( oOperation == `CALL ) ? 1'b1 : 1'b0; |
always @ (posedge IsCall) |
oReturnAddress <= iIP; |
*/ |
/* |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `ROM_ADDRESS_WIDTH ) FFRETURNADDR |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable( IsCall ), |
.D( iIP ), |
.Q( oReturnAddress ) |
); |
*/ |
|
|
|
//Latch the Operation |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `INSTRUCTION_OP_LENGTH ) FFD3 |
( |
100,7 → 120,7
.Reset(Reset), |
.Enable(iInstructionAvailable), |
.D(iEncodedInstruction[`INSTRUCTION_WIDTH-1:`INSTRUCTION_WIDTH-`INSTRUCTION_OP_LENGTH]), |
.Q(oOperation ) |
.Q( oOperation ) |
); |
//Latch the Destination |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `DATA_ADDRESS_WIDTH ) FFD2 |
/rtl/GPU/CORES/TOP/Theia_Core.v
72,25 → 72,45
output wire [1:0] TGC_O, //Bus cycle tag, see THEAI documentation |
input wire [1:0] TGA_I, //Input address tag, see THEAI documentation |
output wire [1:0] TGA_O, //Output address tag, see THEAI documentation |
input wire [1:0] TGC_I, //Bus cycle tag, see THEAI documentation |
input wire GNT_I, //Bus arbiter 'Granted' signal, see THEAI documentation |
input wire [1:0] TGC_I, //Bus cycle tag, see THEAI documentation |
input wire GNT_I, //Bus arbiter 'Granted' signal, see THEAI documentation |
input wire RENDREN_I, |
|
`ifdef DEBUG |
input wire[`MAX_CORES-1:0] iDebug_CoreID, |
|
output wire GRDY_O, //Data Latched |
input wire STDONE_I, //Scene traverse complete |
input wire HDA_I, |
output wire RCOMMIT_O, |
|
output wire [`WB_WIDTH-1:0] OMEM_DAT_O, |
output wire [`WB_WIDTH-1:0] OMEM_ADR_O, |
output wire OMEM_WE_O, |
|
input wire TMEM_ACK_I, |
input wire [`WB_WIDTH-1:0] TMEM_DAT_I , |
output wire [`WB_WIDTH-1:0] TMEM_ADR_O , |
output wire TMEM_WE_O, |
output wire TMEM_STB_O, |
output wire TMEM_CYC_O, |
input wire TMEM_GNT_I, |
|
`ifdef DEBUG |
input wire[`MAX_CORES-1:0] iDebug_CoreID, |
`endif |
//Control Register |
input wire [15:0] CREG_I, |
input wire [15:0] CREG_I, |
output wire DONE_O |
|
|
); |
|
//When we flip the SMEM, this means we are ready to receive more data |
assign GRDY_O = wCU2_FlipMem; |
|
//Alias this signals |
wire Clock,Reset; |
assign Clock = CLK_I; |
assign Reset = RST_I; |
|
|
wire wIO_Busy; |
wire [`DATA_ROW_WIDTH-1:0] wEXE_2__MEM_WriteData; |
wire [`DATA_ROW_WIDTH-1:0] wUCODE_RAMBus; |
155,6 → 175,16
wire wGEO2_IO__SetAddress; |
wire[`WIDTH-1:0] wGEO2__CurrentPitch,wCU2_GEO_Pitch; |
wire wCU2_GEO__SetPitch,wCU2_GEO__IncPicth; |
|
wire [`DATA_ROW_WIDTH-1:0] wEXE_2__IO_WriteAddress; |
wire [`DATA_ROW_WIDTH-1:0] wEXE_2__IO_WriteData; |
wire wEXE_2__IO_OMEMWriteEnable; |
|
wire [`DATA_ROW_WIDTH-1:0] wEXE_2__IO_TMEMAddress; |
wire [`DATA_ROW_WIDTH-1:0] wIO_2_EXE__TMEMData; |
wire wIO_2_EXE__DataAvailable; |
wire wEXE_2_IO__DataRequest; |
|
wire wCU2_FlipMemEnabled; |
wire w2MEM_FlipMemory; |
|
191,12 → 221,16
.oTriggerTFF( wCU2_GEO__TriggerTFF ), |
.MST_I( MST_I ), |
.oSetCurrentPitch( wCU2_GEO__SetPitch ), |
.iGFUDone( wGEO2_CU__GeometryUnitDone ), |
.iGFUDone( wGEO2_CU__GeometryUnitDone ), |
.iRenderEnable( RENDREN_I ), |
|
`ifdef DEBUG |
.iDebug_CoreID( iDebug_CoreID ), |
`endif |
.iSceneTraverseComplete( STDONE_I ), |
.oResultCommited( RCOMMIT_O ), |
.iHostDataAvailable( HDA_I ), |
|
|
`ifdef DEBUG |
.iDebug_CoreID( iDebug_CoreID ), |
`endif |
.oDone( DONE_O ) |
|
); |
232,7 → 266,11
.oData2_IO( wMEM_2__IO_DataRead1 ), |
.iDataWriteEnable_IO( wIO2_MEM__DataWriteEnable ), |
.iDataWriteAddress_IO( wIO2_MEM__DataWriteAddress ), |
.iData_IO( wIO2_MEM__Bus ), |
.iData_IO( wIO2_MEM__Bus ), |
|
`ifdef DEBUG |
.iDebug_CoreID( iDebug_CoreID ), |
`endif |
|
|
//Instruction Bus |
269,10 → 307,19
.oDataWriteEnable( wEXE_2__DataWriteEnable ), |
.oDataWriteAddress( wEXE_2__MEM_wDataWriteAddress ), |
.oDataBus( wEXE_2__MEM_WriteData ), |
.oReturnCode( wIFU2__MicroCodeReturnValue ), |
|
`ifdef DEBUG |
.iDebug_CoreID( iDebug_CoreID ), |
.oReturnCode( wIFU2__MicroCodeReturnValue ), |
/**************/ |
.oOMEMWriteAddress( wEXE_2__IO_WriteAddress ), |
.oOMEMWriteData( wEXE_2__IO_WriteData ), |
.oOMEMWriteEnable( wEXE_2__IO_OMEMWriteEnable ), |
|
.oTMEMReadAddress( wEXE_2__IO_TMEMAddress ), |
.iTMEMReadData( wIO_2_EXE__TMEMData ), |
.iTMEMDataAvailable( wIO_2_EXE__DataAvailable ), |
.oTMEMDataRequest( wEXE_2_IO__DataRequest ), |
/**************/ |
`ifdef DEBUG |
.iDebug_CoreID( iDebug_CoreID ), |
`endif |
.oDone( wCU2__MicrocodeExecutionDone ) |
|
282,81 → 329,42
wire wGEO2__RequestingTextures; |
wire w2IO_WriteBack_Set; |
|
GeometryUnit GEO |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.iEnable( wCU2_GEO__GeometryFetchEnable ), |
.iIOBusy( wIO_Busy ), |
.iTexturingEnable( wCR2_TextureMappingEnabled ), |
//Wires from IO |
.iData_WBM( wIO2_MEM__Data ), |
.iDataReady_WBM( wIO2__Done ), |
//Wires to WBM |
.oAddressWBM_Imm( wGEO2_IO__AddressOffset ), |
.oAddressWBM_fromMEM( wGEO2_IO__Adr_O_Pointer ), |
.oAddressWBM_IsImm( wGEO2_IO__AddrIsImm ), |
.oEnable_WBM( wGEO2_IO__EnableWBMaster ), |
.oSetAddressWBM( wGEO2_IO__SetAddress ), |
.oSetIOWriteBackAddr( w2IO_WriteBack_Set ), |
//Wires to CU |
.oRequest_AABBIU( wGEO2_CU__RequestAABBIU ), |
.oRequest_BIU( wGEO2_CU__RequestBIU ), |
.oRequest_TCC( wGEO2_CU__RequestTCC ), |
.oTFFDone( wGEO2_CU__TFFDone ), |
//Wires to RAM-Bus MUX |
.oRAMWriteAddress( w2IO__DataWriteAddress ), |
.oRAMWriteEnable( w2IO__Store ), |
//Wires from Execution Unit |
.iMicrocodeExecutionDone( wCU2__MicrocodeExecutionDone ), |
.iMicroCodeReturnValue( wIFU2__MicroCodeReturnValue ), |
.oSync( wGEO2_CU__Sync ), |
.iTrigger_TFF( wCU2_GEO__TriggerTFF ), |
.iBIUHit( wIFU2__MicroCodeReturnValue ), |
.oRequestingTextures( wGEO2__RequestingTextures ), |
`ifdef DEBUG |
.iDebug_CoreID( iDebug_CoreID ), |
`endif |
.oDone( wGEO2_CU__GeometryUnitDone ) |
); |
|
|
assign TGA_O = (wGEO2__RequestingTextures) ? 2'b1: 2'b0; |
//--------------------------------------------------------------------------------------------------- |
wire[`DATA_ADDRESS_WIDTH-1:0] wIO_2_MEM__DataReadAddress1; |
assign wEXE_2__MEM_DataReadAddress1 = (wCU2_IO__WritePixel == 0) ? wUCODE_RAMReadAddress1 : wIO_2_MEM__DataReadAddress1; |
assign w2IO__EnableWBMaster = (wCU2_IO__WritePixel == 0 ) ? wGEO2_IO__EnableWBMaster : wCU2_IO__WritePixel; |
assign w2IO__AddrIsImm = (wCU2_IO__WritePixel == 0 ) ? wGEO2_IO__AddrIsImm : 1'b0; |
assign w2IO__AddressOffset = (wCU2_IO__WritePixel == 0 ) ? wGEO2_IO__AddressOffset : 32'b0; |
assign w2IO__Adr_O_Pointer = (wCU2_IO__WritePixel == 0 ) ? wGEO2_IO__Adr_O_Pointer : `OREG_ADDR_O; |
//assign w2IO__Adr_O_Pointer = (wCU2_IO__WritePixel == 0 ) ? wGEO2_IO__Adr_O_Pointer : `CREG_PIXEL_2D_INITIAL_POSITION; |
assign w2IO__AddrIsImm = 0;//(wCU2_IO__WritePixel == 0 ) ? wGEO2_IO__AddrIsImm : 1'b0; |
assign w2IO__AddressOffset = 0;//(wCU2_IO__WritePixel == 0 ) ? wGEO2_IO__AddressOffset : 32'b0; |
assign w2IO__Adr_O_Pointer = (wCU2_IO__WritePixel == 0 ) ? wGEO2_IO__Adr_O_Pointer : `OREG_ADDR_O; |
//assign w2IO__Adr_O_Pointer = (wCU2_IO__WritePixel == 0 ) ? wGEO2_IO__Adr_O_Pointer : `CREG_PIXEL_2D_INITIAL_POSITION; |
|
wire w2IO_MasterCycleType; |
assign w2IO_MasterCycleType = (wCU2_IO__WritePixel) ? `WB_SIMPLE_WRITE_CYCLE : `WB_SIMPLE_READ_CYCLE; |
|
|
|
assign w2IO__SetAddress = (wCU2_IO__WritePixel == 0 )? wGEO2_IO__SetAddress : wCU2_GEO__SetPitch; |
|
|
|
|
assign w2IO__SetAddress = (wCU2_IO__WritePixel == 0 )? wGEO2_IO__SetAddress : wCU2_GEO__SetPitch; |
|
|
IO_Unit IO |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.iEnable( w2IO__EnableWBMaster ), |
.iEnable( 0 ),// w2IO__EnableWBMaster ), |
.iBusCyc_Type( w2IO_MasterCycleType ), |
|
.iStore( w2IO__Store ), |
.iStore( 1),//w2IO__Store ), |
.iAdr_DataWriteBack( w2IO__DataWriteAddress ), |
.iAdr_O_Set( w2IO__SetAddress ), |
.iAdr_O_Imm( w2IO__AddressOffset ), |
.iAdr_O_Type( w2IO__AddrIsImm ), |
.iAdr_O_Pointer( w2IO__Adr_O_Pointer ), |
.iAdr_O_Pointer( w2IO__Adr_O_Pointer ), |
.iReadDataBus( wMEM_2__IO_DataRead0 ), |
.iReadDataBus2( wMEM_2__IO_DataRead1 ), |
.iDat_O_Pointer( `OREG_PIXEL_COLOR ), |
|
.iReadDataBus2( wMEM_2__IO_DataRead1 ), |
.iDat_O_Pointer( `OREG_PIXEL_COLOR ), |
|
|
.oDataReadAddress( wIO_2_MEM__DataReadAddress0 ), |
.oDataReadAddress2( wIO_2_MEM__DataReadAddress1 ), |
.oDataWriteAddress( wIO2_MEM__DataWriteAddress ), |
370,6 → 378,29
.iWriteBack_Set( w2IO_WriteBack_Set ), |
.oBusy( wIO_Busy ), |
.oDone( wIO2__Done ), |
/**********/ |
.iOMEM_WriteAddress( wEXE_2__IO_WriteAddress ), |
.iOMEM_WriteData( wEXE_2__IO_WriteData ), |
.iOMEM_WriteEnable( wEXE_2__IO_OMEMWriteEnable ), |
.OMEM_DAT_O( OMEM_DAT_O ), |
.OMEM_ADR_O( OMEM_ADR_O ), |
.OMEM_WE_O( OMEM_WE_O ), |
|
|
.oTMEMReadData( wIO_2_EXE__TMEMData ), |
.iTMEMDataRequest( wEXE_2_IO__DataRequest ), |
.iTMEMReadAddress( wEXE_2__IO_TMEMAddress ), |
.oTMEMDataAvailable( wIO_2_EXE__DataAvailable ), |
|
.TMEM_ACK_I( TMEM_ACK_I ), |
.TMEM_DAT_I( TMEM_DAT_I ), |
.TMEM_ADR_O( TMEM_ADR_O ), |
.TMEM_WE_O( TMEM_WE_O ), |
.TMEM_STB_O( TMEM_STB_O ), |
.TMEM_CYC_O( TMEM_CYC_O ), |
.TMEM_GNT_I( TMEM_GNT_I ), |
|
/**********/ |
.MST_I( MST_I ), |
//Wish Bone Interface |
.DAT_I( DAT_I ), |
384,7 → 415,7
.STB_I( STB_I ), |
.CYC_O( CYC_O ), |
.TGA_I( TGA_I ), |
.CYC_I( CYC_I ), |
.CYC_I( CYC_I ), |
.GNT_I( GNT_I ), |
.TGC_O( TGC_O ) |
|
/rtl/GPU/CORES/MEM/Unit_MEM.v
61,7 → 61,11
input wire [`ROM_ADDRESS_WIDTH-1:0] iInstructionWriteAddress, |
input wire [`INSTRUCTION_WIDTH-1:0] iInstruction, |
output wire [`INSTRUCTION_WIDTH-1:0] oInstruction1, |
output wire [`INSTRUCTION_WIDTH-1:0] oInstruction2, |
output wire [`INSTRUCTION_WIDTH-1:0] oInstruction2, |
|
`ifdef DEBUG |
input wire [`MAX_CORES-1:0] iDebug_CoreID, |
`endif |
|
|
//Control Register |
76,7 → 80,7
wIROM2_IMUX__DataOut1,wIROM2_IMUX__DataOut2; |
|
|
wire wInstructionSelector; |
wire wInstructionSelector,wInstructionSelector2; |
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD1 |
( |
.Clock(Clock), |
84,6 → 88,15
.Enable( 1'b1 ), |
.D( iInstructionReadAddress1[`ROM_ADDRESS_WIDTH-1] ), |
.Q( wInstructionSelector ) |
); |
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD2 |
( |
.Clock(Clock), |
.Reset(Reset), |
.Enable( 1'b1 ), |
.D( iInstructionReadAddress2[`ROM_ADDRESS_WIDTH-1] ), |
.Q( wInstructionSelector2 ) |
); |
|
assign oInstruction1 = (wInstructionSelector == 1) ? |
90,7 → 103,7
wIMEM2_IMUX__DataOut1 : wIROM2_IMUX__DataOut1; |
|
|
assign oInstruction2 = (wInstructionSelector == 1) ? |
assign oInstruction2 = (wInstructionSelector2 == 1) ? |
wIMEM2_IMUX__DataOut2 : wIROM2_IMUX__DataOut2; |
//------------------------------------------------------------------- |
/* |
158,8 → 171,6
assign oData1_IO = ( iDataReadAddress1_IO < `OMEM_START_ADDR ) ? wIOData_SMEM1 : wData_OMEM1; |
assign oData2_IO = ( iDataReadAddress2_IO < `OMEM_START_ADDR ) ? wIOData_SMEM2 : wData_OMEM2; |
|
//assign oData1_IO = wIOData_SMEM1; |
//assign oData2_IO = wIOData_SMEM2; |
|
//Output registers written by EXE, Read by IO |
RAM_DUAL_READ_PORT # (`DATA_ROW_WIDTH,`DATA_ADDRESS_WIDTH,512) OMEM |
187,7 → 198,7
.oDataOut1( wData_IMEM2 ) |
); |
|
//Swap registers, while IO writes/write values, EXE reads/write values |
//Swap registers, while IO reads/write values, EXE reads/write values |
//the pointers get filped in the next iteration |
SWAP_MEM # (`DATA_ROW_WIDTH,`DATA_ADDRESS_WIDTH,512) SMEM |
( |
288,13 → 299,19
|
ROM IROM |
( |
.Address( {1'b0,iInstructionReadAddress1[`ROM_ADDRESS_WIDTH-2:0]} ), |
.Address( {1'b0,iInstructionReadAddress1[`ROM_ADDRESS_WIDTH-2:0]} ), |
`ifdef DEBUG |
.iDebug_CoreID(iDebug_CoreID), |
`endif |
.I( wRomDelay1 ) |
); |
|
ROM IROM2 |
( |
.Address( {1'b0,iInstructionReadAddress2[`ROM_ADDRESS_WIDTH-2:0]} ), |
.Address( {1'b0,iInstructionReadAddress2[`ROM_ADDRESS_WIDTH-2:0]} ), |
`ifdef DEBUG |
.iDebug_CoreID(iDebug_CoreID), |
`endif |
.I( wRomDelay2 ) |
); |
//-------------------------------------------------------- |
/rtl/GPU/CORES/MEM/Module_ROM.v
33,7 → 33,10
//-------------------------------------------------------- |
module ROM |
( |
input wire[`ROM_ADDRESS_WIDTH-1:0] Address, |
input wire[`ROM_ADDRESS_WIDTH-1:0] Address, |
`ifdef DEBUG |
input wire [`MAX_CORES-1:0] iDebug_CoreID, |
`endif |
output reg [`INSTRUCTION_WIDTH-1:0] I |
); |
|
138,7 → 141,8
//(X_initial + RESOLUTION_Y*Y_intial) * 3, voila! |
18: I = { `SETX ,`R2 ,32'h3 }; |
19: I = { `SWIZZLE3D ,`R2 ,`SWIZZLE_XXX }; |
20: I = { `IMUL ,`CREG_PIXEL_PITCH ,`R3 ,`R2 }; |
20:// I = { `ZERO ,`CREG_PIXEL_PITCH ,`VOID ,`VOID }; |
I = { `IMUL ,`CREG_PIXEL_PITCH ,`R3 ,`R2 }; |
//By this point you should be wondering why not |
//just do DOT R1 [1 Resolution_Y 0] [X_intial Y_intial 0 ]? |
//well because DOT uses fixed point and the result may not |
377,16 → 381,16
154: I = { `DIV ,`CREG_u ,`CREG_H2 ,`CREG_DELTA }; |
155: I = { `DIV ,`CREG_v ,`CREG_H3 ,`CREG_DELTA }; |
156: I = { `JGEX ,`LABEL_BIU1 ,`CREG_u ,`R1 }; |
157: I = { `RETURN ,`RT_FALSE }; |
157: I = { `RET ,`R99, `FALSE }; //157: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL_BIU1: |
158: I = { `JGEX ,`LABEL_BIU2 ,`CREG_v ,`R1 }; |
159: I = { `RETURN ,`RT_FALSE }; |
159: I = { `RET ,`R99, `FALSE }; //159: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL_BIU2: |
160: I = { `ADD ,`R2 ,`CREG_u ,`CREG_v }; |
161: I = { `JLEX ,`LABEL_BIU3 ,`R2 ,`R3 }; |
162: I = { `RETURN ,`RT_FALSE }; |
162: I = { `RET ,`R99, `FALSE }; //162: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL_BIU3: |
163: I = { `JGEX ,`LABEL_BIU4 ,`CREG_t ,`CREG_LAST_t }; |
400,7 → 404,7
171: I = { `COPY ,`CREG_UV2_LAST ,`CREG_UV2 ,`VOID }; |
172: I = { `COPY ,`CREG_TRI_DIFFUSE_LAST ,`CREG_TRI_DIFFUSE ,`VOID }; |
//LABEL_BIU4: |
173: I = { `RETURN ,`RT_TRUE }; |
173: I = { `RET ,`R99, `TRUE }; //173: I = { `RETURN ,`RT_TRUE }; |
|
|
//------------------------------------------------------------------------- |
476,7 → 480,8
194: I = { `SETY ,`R5 ,32'h3 }; |
195: I = { `SETZ ,`R5 ,32'h3 }; |
//Multiply by 3 (the pitch) |
196: I = { `IMUL ,`OREG_TEX_COORD1 ,`R12 ,`R5 }; |
//196: I = { `IMUL ,`OREG_TEX_COORD1 ,`R12 ,`R5 }; |
196: I = { `IMUL ,`CREG_TEX_COORD1 ,`R12 ,`R5 }; |
|
//R4 = [u2 u1 0] |
197: I = { `SWIZZLE3D ,`R4 ,`SWIZZLE_YXZ }; |
485,7 → 490,8
//OREG_TEX_COORD2 [u2 + v2*H u1 + v1*H 0] |
198: I = { `ADD ,`R12 ,`R2 ,`R4 }; |
//Multiply by 3 (the pitch) |
199: I = { `IMUL ,`OREG_TEX_COORD2 ,`R12 ,`R5 }; |
//199: I = { `IMUL ,`OREG_TEX_COORD2 ,`R12 ,`R5 }; |
199: I = { `IMUL ,`CREG_TEX_COORD2 ,`R12 ,`R5 }; |
|
|
//Cool now get the weights |
555,7 → 561,7
|
|
//LABEL_TCC_EXIT: |
215: I = { `RETURN ,`RT_TRUE }; |
215: I = { `RET ,`R99, 32'h0 };//215: I = { `RETURN ,`RT_TRUE }; |
|
|
//------------------------------------------------------------------------- |
625,7 → 631,7
begin |
|
`ifdef DEBUG |
$display("Error: Reached undefined address in instruction Memory: %d!!!!",Address); |
$display("%dns CORE %d Error: Reached undefined address in instruction Memory: %d!!!!",$time,iDebug_CoreID,Address); |
// $stop(); |
`endif |
I = {`INSTRUCTION_OP_LENGTH'hFF,16'hFFFF,32'hFFFFFFFF}; |
/rtl/GPU/CORES/MEM/Module_RAM.v
49,4 → 49,32
|
end |
endmodule |
//-------------------------------------------------------- |
//-------------------------------------------------------- |
|
module RAM_SINGLE_READ_PORT # ( parameter DATA_WIDTH=`DATA_ROW_WIDTH, parameter ADDR_WIDTH=`DATA_ADDRESS_WIDTH, parameter MEM_SIZE=128 ) |
( |
input wire Clock, |
input wire iWriteEnable, |
input wire[ADDR_WIDTH-1:0] iReadAddress0, |
input wire[ADDR_WIDTH-1:0] iWriteAddress, |
input wire[DATA_WIDTH-1:0] iDataIn, |
output reg [DATA_WIDTH-1:0] oDataOut0 |
|
); |
|
reg [DATA_WIDTH-1:0] Ram [MEM_SIZE:0]; |
|
always @(posedge Clock) |
begin |
|
if (iWriteEnable) |
Ram[iWriteAddress] <= iDataIn; |
|
|
oDataOut0 <= Ram[iReadAddress0]; |
|
|
end |
endmodule |
|
|