URL
https://opencores.org/ocsvn/theia_gpu/theia_gpu/trunk
Subversion Repositories theia_gpu
Compare Revisions
- This comparison shows the changes necessary to convert path
/theia_gpu
- from Rev 153 to Rev 154
- ↔ Reverse comparison
Rev 153 → Rev 154
/trunk/src/Module_Swizzle.v
File deleted
\ No newline at end of file
/trunk/src/Module_RadixRMul.v
File deleted
/trunk/src/Unit_MEM.v
File deleted
\ No newline at end of file
/trunk/src/Module_InstructionEntryPoint.v
File deleted
/trunk/src/Module_FixedPointAddtionSubstraction.v
File deleted
/trunk/src/Module_FixedPointDivision.v
File deleted
/trunk/src/Module_HostWBM.v
File deleted
/trunk/src/Theia.v
File deleted
/trunk/src/Module_WishBoneMaster.v
File deleted
/trunk/src/Theia_Core.v
File deleted
/trunk/src/Collaterals.v
File deleted
/trunk/src/Module_VectorALU.v
File deleted
/trunk/src/aDefinitions.v
File deleted
/trunk/src/Module_WishBoneSlave.v
File deleted
\ No newline at end of file
/trunk/src/Module_ExecutionFSM.v
File deleted
/trunk/src/Unit_Control.v
File deleted
/trunk/src/Unit_EXE.v
File deleted
\ No newline at end of file
/trunk/src/Module_Host.v
File deleted
/trunk/src/Module_OMemInterface.v
File deleted
/trunk/src/Module_RAM.v
File deleted
/trunk/src/Module_TMemInterface.v
File deleted
\ No newline at end of file
/trunk/src/Module_BusArbitrer.v
File deleted
/trunk/src/Module_ArithmeticComparison.v
File deleted
\ No newline at end of file
/trunk/src/TestBench_THEIA.v
File deleted
/trunk/src/Module_InstructionDecode.v
File deleted
/trunk/src/Module_ControlRegister.v
File deleted
\ No newline at end of file
/trunk/src/Module_InstructionFetch.v
File deleted
\ No newline at end of file
/trunk/src/Module_ROM.v
File deleted
\ No newline at end of file
/trunk/src/Module_FixedPointSquareRoot.v
File deleted
\ No newline at end of file
/trunk/src/Unit_IO.v
File deleted
\ No newline at end of file
/trunk/src/Module_SwapMemory.v
File deleted
/trunk/src/Module_MEM2WBM.v
File deleted
\ No newline at end of file
/trunk/rtl/Module_MEM2WBM.v
0,0 → 1,124
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
|
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
/* |
This unit is used when the External Address that comes into IO is not a immediate value, |
ie. it is a value that we need to read from one of our internal memory locations. |
Since each internal memory locations contains 3 * 32bits slots, ie X,Y and Z parts of the |
memory location, then we make three requests for external data, one for every X Y and Z |
part of our internal registry.So, summarising, each internal memory location, stores 3 |
external memory addresses to request to WBM. Once the 3 data has been read from outside world, |
they will get stored back into 3 consecutive inernal memory addreses starting from |
iDataInitialStorageAddress |
*/ |
//--------------------------------------------------------------------- |
module MEM2WBMUnitB |
( |
input wire Clock, |
input wire Reset, |
input wire iEnable, |
//output reg oSetAddress, |
input wire[`DATA_ADDRESS_WIDTH-1:0] iMEMDataPointer, |
input wire[`DATA_ADDRESS_WIDTH-1:0] iMEMDataPointer2, |
output wire [`WIDTH-1:0] oReadDataElement, |
output wire [`WIDTH-1:0] oReadDataElement2, |
output wire[`DATA_ADDRESS_WIDTH-1:0] oDataReadAddress, //This tells MEM unit from wich address we want to read |
output wire[`DATA_ADDRESS_WIDTH-1:0] oDataReadAddress2, //This tells MEM unit from wich address we want to read |
input wire [`DATA_ROW_WIDTH-1:0] iReadDataBus, //This comes from the MEM unit |
input wire [`DATA_ROW_WIDTH-1:0] iReadDataBus2, //This comes from the MEM unit |
output wire oDataWriteEnable, |
output wire oDataWriteEnable2, |
output wire oDataAvailable, |
input wire iRequestNextElement, |
input wire[`DATA_ADDRESS_WIDTH-1:0] iDataInitialStorageAddress, //Initial address to store data ////######## |
output wire[`DATA_ADDRESS_WIDTH-1:0] oDataWriteAddress, //Were to store the values comming from WBM ////######## |
output wire oDone |
); |
assign oDataWriteEnable2 = 0; |
assign oDataWriteEnable = 0; //We only read. |
wire [3:0] wXYZSelector; |
wire[`WIDTH-1:0] wValueFromBus,wLatchedValue; |
assign oDataReadAddress = iMEMDataPointer; |
assign oDataReadAddress2 = iMEMDataPointer2; |
assign oDone = wXYZSelector[3]; |
|
wire wLacthNow; |
|
wire iRequestNextElement_Delay; |
FFD_POSEDGE_SYNCRONOUS_RESET # (1) FFD32_x |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable( 1'b1 ), |
.D( iRequestNextElement ), |
.Q( iRequestNextElement_Delay ) |
|
); |
|
assign oDataAvailable = iEnable & ~iRequestNextElement_Delay & wLacthNow;// & ~oDone; |
|
FFD_POSEDGE_SYNCRONOUS_RESET # (1) FFD32_EnableDelay |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable( 1'b1 ), |
.D( iEnable ), |
.Q( wLacthNow ) |
|
); |
|
assign oDataWriteAddress = iDataInitialStorageAddress; |
|
|
SHIFTLEFT_POSEDGE #(4) SHL |
( |
.Clock(iRequestNextElement | ~iEnable), |
.Enable(1'b1), |
.Reset(~iEnable | Reset ), |
.Initial(4'b1), |
.O(wXYZSelector) |
|
); |
|
MUXFULLPARALELL_3SEL_WALKINGONE MUXA |
( |
.Sel( wXYZSelector[2:0] ), |
.I2( iReadDataBus[63:32]), |
.I1( iReadDataBus[95:64]), |
.I3( iReadDataBus[31:0] ), |
.O1( oReadDataElement ) |
|
); |
|
|
|
MUXFULLPARALELL_3SEL_WALKINGONE MUXA2 |
( |
.Sel( wXYZSelector[2:0] ), |
.I2( iReadDataBus2[63:32]), |
.I1( iReadDataBus2[95:64]), |
.I3( iReadDataBus2[31:0] ), |
.O1( oReadDataElement2 ) |
); |
|
endmodule |
//--------------------------------------------------------------------- |
/trunk/rtl/Module_Swizzle.v
0,0 → 1,53
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
//--------------------------------------------------------------------------- |
module Swizzle3D |
( |
input wire [`WIDTH-1:0] Source0_X, |
input wire [`WIDTH-1:0] Source0_Y, |
input wire [`WIDTH-1:0] Source0_Z, |
input wire [`WIDTH-1:0] iOperation, |
|
output reg [`WIDTH-1:0] SwizzleX, |
output reg [`WIDTH-1:0] SwizzleY, |
output reg [`WIDTH-1:0] SwizzleZ |
|
); |
|
//wire [31:0] SwizzleX,SwizzleY,SwizzleZ; |
//----------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`SWIZZLE_XXX: SwizzleX = Source0_X; |
`SWIZZLE_YYY: SwizzleX = Source0_Y; |
`SWIZZLE_ZZZ: SwizzleX = Source0_Z; |
`SWIZZLE_YXZ: SwizzleX = Source0_Y; |
default: SwizzleX = `DATA_ROW_WIDTH'd0; |
endcase |
end |
//----------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`SWIZZLE_XXX: SwizzleY = Source0_X; |
`SWIZZLE_YYY: SwizzleY = Source0_Y; |
`SWIZZLE_ZZZ: SwizzleY = Source0_Z; |
`SWIZZLE_YXZ: SwizzleY = Source0_X; |
default: SwizzleY = `DATA_ROW_WIDTH'd0; |
endcase |
end |
//----------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`SWIZZLE_XXX: SwizzleZ = Source0_X; |
`SWIZZLE_YYY: SwizzleZ = Source0_Y; |
`SWIZZLE_ZZZ: SwizzleZ = Source0_Z; |
`SWIZZLE_YXZ: SwizzleZ = Source0_Z; |
default: SwizzleZ = `DATA_ROW_WIDTH'd0; |
endcase |
end |
//----------------------------------------------------- |
endmodule |
//--------------------------------------------------------------------------- |
/trunk/rtl/Module_RadixRMul.v
0,0 → 1,337
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
////////////////////////////////////////////////////////////////////////////////// |
// Company: |
// Engineer: |
// |
// Create Date: 19:49:14 01/13/2009 |
// Design Name: |
// Module Name: RadixRMul |
// Project Name: |
// Target Devices: |
// Tool versions: |
// Description: |
// |
// Dependencies: |
// |
// Revision: |
// Revision 0.01 - File Created |
// Additional Comments: |
// |
////////////////////////////////////////////////////////////////////////////////// |
|
`default_nettype none |
|
|
//--------------------------------------------------- |
module MUX_4_TO_1_32Bits_FullParallel |
( |
input wire [31:0] i1,i2,i3,i4, |
output reg [31:0] O, |
input wire [1:0] Sel |
); |
|
always @ ( Sel or i1 or i2 or i3 or i4 ) |
begin |
case (Sel) |
2'b00: O = i1; |
2'b01: O = i2; |
2'b10: O = i3; |
2'b11: O = i4; |
endcase |
|
end |
|
endmodule |
//--------------------------------------------------- |
/* |
module SHIFTER2_16_BITS |
( |
input wire C, |
input wire[15:0] In, |
output reg[15:0] Out |
); |
|
reg [15:0] Temp; |
always @ (posedge C ) |
begin |
Out = In << 2; |
|
end |
|
endmodule |
*/ |
//--------------------------------------------------- |
module RADIX_R_MUL_32_FULL_PARALLEL |
( |
input wire Clock, |
input wire Reset, |
input wire[31:0] A, |
input wire[31:0] B, |
output wire[63:0] R, |
input wire iUnscaled, |
input wire iInputReady, |
output wire OutputReady |
|
|
); |
|
|
wire wInputDelay1; |
//------------------- |
wire [31:0] wALatched,wBLatched; |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) FFD1 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( iInputReady ), |
.D( A ), |
.Q( wALatched) |
); |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) FFD2 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( iInputReady ), |
.D( B ), |
.Q( wBLatched ) |
); |
|
//------------------- |
|
|
FFD_POSEDGE_ASYNC_RESET #(1) FFOutputReadyDelay1 |
( |
.Clock( Clock ), |
.Clear( Reset ), |
.D( iInputReady ), |
.Q( wInputDelay1 ) |
); |
|
FFD_POSEDGE_ASYNC_RESET #(1) FFOutputReadyDelay2 |
( |
.Clock( Clock ), |
.Clear( Reset ), |
.D( wInputDelay1 ), |
.Q( OutputReady ) |
); |
|
wire [31:0] wA, w2A, w3A, wB; |
wire SignA,SignB; |
|
assign SignA = wALatched[31]; |
assign SignB = wBLatched[31]; |
|
|
assign wB = (SignB == 1) ? ~wBLatched + 1'b1 : wBLatched; |
assign wA = (SignA == 1) ? ~wALatched + 1'b1 : wALatched; |
|
assign w2A = wA << 1; |
assign w3A = w2A + wA; |
|
wire [31:0] wPartialResult0,wPartialResult1,wPartialResult2,wPartialResult3,wPartialResult4,wPartialResult5; |
wire [31:0] wPartialResult6,wPartialResult7,wPartialResult8,wPartialResult9,wPartialResult10,wPartialResult11; |
wire [31:0] wPartialResult12,wPartialResult13,wPartialResult14,wPartialResult15; |
|
MUX_4_TO_1_32Bits_FullParallel MUX0 |
( |
.i1( 32'b 0 ), |
.i2( wA ), |
.i3( w2A ), |
.i4( w3A ), |
.Sel( {wB[1],wB[0]} ), |
.O( wPartialResult0 ) |
); |
|
|
MUX_4_TO_1_32Bits_FullParallel MUX1 |
( |
.i1( 32'b 0 ), |
.i2( wA ), |
.i3( w2A ), |
.i4( w3A ), |
.Sel( {wB[3],wB[2]} ), |
.O( wPartialResult1 ) |
); |
|
MUX_4_TO_1_32Bits_FullParallel MUX2 |
( |
.i1( 32'b 0 ), |
.i2( wA ), |
.i3( w2A ), |
.i4( w3A ), |
.Sel( {wB[5],wB[4]} ), |
.O( wPartialResult2 ) |
); |
|
MUX_4_TO_1_32Bits_FullParallel MUX3 |
( |
.i1( 32'b 0 ), |
.i2( wA ), |
.i3( w2A ), |
.i4( w3A ), |
.Sel( {wB[7],wB[6]} ), |
.O( wPartialResult3 ) |
); |
|
MUX_4_TO_1_32Bits_FullParallel MUX4 |
( |
.i1( 32'b 0 ), |
.i2( wA ), |
.i3( w2A ), |
.i4( w3A ), |
.Sel( {wB[9],wB[8]} ), |
.O( wPartialResult4 ) |
); |
|
MUX_4_TO_1_32Bits_FullParallel MUX5 |
( |
.i1( 32'b 0 ), |
.i2( wA ), |
.i3( w2A ), |
.i4( w3A ), |
.Sel( {wB[11],wB[10]} ), |
.O( wPartialResult5 ) |
); |
|
MUX_4_TO_1_32Bits_FullParallel MUX6 |
( |
.i1( 32'b 0 ), |
.i2( wA ), |
.i3( w2A ), |
.i4( w3A ), |
.Sel( {wB[13],wB[12]} ), |
.O( wPartialResult6 ) |
); |
|
MUX_4_TO_1_32Bits_FullParallel MUX7 |
( |
.i1( 32'b 0 ), |
.i2( wA ), |
.i3( w2A ), |
.i4( w3A ), |
.Sel( {wB[15],wB[14]} ), |
.O( wPartialResult7 ) |
); |
|
MUX_4_TO_1_32Bits_FullParallel MUX8 |
( |
.i1( 32'b 0 ), |
.i2( wA ), |
.i3( w2A ), |
.i4( w3A ), |
.Sel( {wB[17],wB[16]} ), |
.O( wPartialResult8 ) |
); |
|
MUX_4_TO_1_32Bits_FullParallel MUX9 |
( |
.i1( 32'b 0 ), |
.i2( wA ), |
.i3( w2A ), |
.i4( w3A ), |
.Sel( {wB[19],wB[18]} ), |
.O( wPartialResult9 ) |
); |
|
MUX_4_TO_1_32Bits_FullParallel MUX10 |
( |
.i1( 32'b 0 ), |
.i2( wA ), |
.i3( w2A ), |
.i4( w3A ), |
.Sel( {wB[21],wB[20]} ), |
.O( wPartialResult10 ) |
); |
|
MUX_4_TO_1_32Bits_FullParallel MUX11 |
( |
.i1( 32'b 0 ), |
.i2( wA ), |
.i3( w2A ), |
.i4( w3A ), |
.Sel( {wB[23],wB[22]} ), |
.O( wPartialResult11 ) |
); |
|
MUX_4_TO_1_32Bits_FullParallel MUX12 |
( |
.i1( 32'b 0 ), |
.i2( wA ), |
.i3( w2A ), |
.i4( w3A ), |
.Sel( {wB[25],wB[24]} ), |
.O( wPartialResult12 ) |
); |
|
MUX_4_TO_1_32Bits_FullParallel MUX13 |
( |
.i1( 32'b 0 ), |
.i2( wA ), |
.i3( w2A ), |
.i4( w3A ), |
.Sel( {wB[27],wB[26]} ), |
.O( wPartialResult13 ) |
); |
|
MUX_4_TO_1_32Bits_FullParallel MUX14 |
( |
.i1( 32'b 0 ), |
.i2( wA ), |
.i3( w2A ), |
.i4( w3A ), |
.Sel( {wB[29],wB[28]} ), |
.O( wPartialResult14 ) |
); |
|
MUX_4_TO_1_32Bits_FullParallel MUX15 |
( |
.i1( 32'b 0 ), |
.i2( wA ), |
.i3( w2A ), |
.i4( w3A ), |
.Sel( {wB[31],wB[30]} ), |
.O( wPartialResult15 ) |
); |
|
|
|
wire[63:0] wPartialResult1_0,wPartialResult1_1,wPartialResult1_2,wPartialResult1_3, |
wPartialResult1_4,wPartialResult1_5,wPartialResult1_6,wPartialResult1_7; |
|
|
assign wPartialResult1_0 = (wPartialResult0) + (wPartialResult1<<2); |
assign wPartialResult1_1 = (wPartialResult2 << 4) + (wPartialResult3<<6); |
assign wPartialResult1_2 = (wPartialResult4 << 8) + (wPartialResult5<<10); |
assign wPartialResult1_3 = (wPartialResult6 << 12)+ (wPartialResult7<<14); |
assign wPartialResult1_4 = (wPartialResult8 << 16)+ (wPartialResult9<<18); |
assign wPartialResult1_5 = (wPartialResult10 << 20) + (wPartialResult11<< 22); |
assign wPartialResult1_6 = (wPartialResult12 << 24) + (wPartialResult13 << 26); |
assign wPartialResult1_7 = (wPartialResult14 << 28) + (wPartialResult15 << 30); |
|
|
|
|
wire [63:0] wPartialResult2_0,wPartialResult2_1,wPartialResult2_2,wPartialResult2_3; |
|
assign wPartialResult2_0 = wPartialResult1_0 + wPartialResult1_1; |
assign wPartialResult2_1 = wPartialResult1_2 + wPartialResult1_3; |
assign wPartialResult2_2 = wPartialResult1_4 + wPartialResult1_5; |
assign wPartialResult2_3 = wPartialResult1_6 + wPartialResult1_7; |
|
wire [63:0] wPartialResult3_0,wPartialResult3_1; |
|
assign wPartialResult3_0 = wPartialResult2_0 + wPartialResult2_1; |
assign wPartialResult3_1 = wPartialResult2_2 + wPartialResult2_3; |
|
wire [63:0] R_pre1,R_pre2; |
|
//assign R_pre1 = (wPartialResult3_0 + wPartialResult3_1); |
assign R_pre1 = (iUnscaled == 1) ? (wPartialResult3_0 + wPartialResult3_1) : ((wPartialResult3_0 + wPartialResult3_1) >> `SCALE); |
|
assign R_pre2 = ( (SignA ^ SignB) == 1) ? ~R_pre1 + 1'b1 : R_pre1; |
|
//assign R = R_pre2 >> `SCALE; |
assign R = R_pre2; |
|
endmodule |
/trunk/rtl/Unit_MEM.v
0,0 → 1,343
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
/* |
The memory unit has all the memory related modules for THEIA. |
There a 3 memories in the core: |
DMEM: The data memory, it is a R/W dual channel RAM, stores the data locations. |
IMEM: The instruction memory, R/W dual channel RAM, stores user shaders. |
IROM: RO instruction memory, stores default shaders and other internal code. |
I use two ROMs with the same data, so that simulates dual channel. |
This unit also has a Control register. |
*/ |
`define USER_CODE_ENABLED 2 |
//------------------------------------------------------------------- |
module MemoryUnit |
( |
input wire Clock, |
input wire Reset, |
input wire iFlipMemory, |
|
//Data bus for EXE Unit |
input wire iDataWriteEnable_EXE, |
input wire[`DATA_ADDRESS_WIDTH-1:0] iDataReadAddress1_EXE, |
output wire[`DATA_ROW_WIDTH-1:0] oData1_EXE, |
input wire[`DATA_ADDRESS_WIDTH-1:0] iDataReadAddress2_EXE, |
output wire[`DATA_ROW_WIDTH-1:0] oData2_EXE, |
input wire[`DATA_ADDRESS_WIDTH-1:0] iDataWriteAddress_EXE, |
input wire[`DATA_ROW_WIDTH-1:0] iData_EXE, |
|
//Data bus for IO Unit |
input wire iDataWriteEnable_IO, |
input wire[`DATA_ADDRESS_WIDTH-1:0] iDataReadAddress1_IO, |
output wire[`DATA_ROW_WIDTH-1:0] oData1_IO, |
input wire[`DATA_ADDRESS_WIDTH-1:0] iDataReadAddress2_IO, |
output wire[`DATA_ROW_WIDTH-1:0] oData2_IO, |
input wire[`DATA_ADDRESS_WIDTH-1:0] iDataWriteAddress_IO, |
input wire[`DATA_ROW_WIDTH-1:0] iData_IO, |
|
//Instruction bus |
input wire iInstructionWriteEnable, |
input wire [`ROM_ADDRESS_WIDTH-1:0] iInstructionReadAddress1, |
input wire [`ROM_ADDRESS_WIDTH-1:0] iInstructionReadAddress2, |
input wire [`ROM_ADDRESS_WIDTH-1:0] iInstructionWriteAddress, |
input wire [`INSTRUCTION_WIDTH-1:0] iInstruction, |
output wire [`INSTRUCTION_WIDTH-1:0] oInstruction1, |
output wire [`INSTRUCTION_WIDTH-1:0] oInstruction2, |
|
`ifdef DEBUG |
input wire [`MAX_CORES-1:0] iDebug_CoreID, |
`endif |
|
|
//Control Register |
input wire[15:0] iControlRegister, |
output wire[15:0] oControlRegister |
|
|
); |
|
wire [`ROM_ADDRESS_WIDTH-1:0] wROMInstructionAddress,wRAMInstructionAddress; |
wire [`INSTRUCTION_WIDTH-1:0] wIMEM2_IMUX__DataOut1,wIMEM2_IMUX__DataOut2, |
wIROM2_IMUX__DataOut1,wIROM2_IMUX__DataOut2; |
wire wFlipSelect; |
|
wire wInstructionSelector,wInstructionSelector2; |
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD1 |
( |
.Clock(Clock), |
.Reset(Reset), |
.Enable( 1'b1 ), |
.D( iInstructionReadAddress1[`ROM_ADDRESS_WIDTH-1] ), |
.Q( wInstructionSelector ) |
); |
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD2 |
( |
.Clock(Clock), |
.Reset(Reset), |
.Enable( 1'b1 ), |
.D( iInstructionReadAddress2[`ROM_ADDRESS_WIDTH-1] ), |
.Q( wInstructionSelector2 ) |
); |
|
assign oInstruction1 = (wInstructionSelector == 1) ? |
wIMEM2_IMUX__DataOut1 : wIROM2_IMUX__DataOut1; |
|
|
assign oInstruction2 = (wInstructionSelector2 == 1) ? |
wIMEM2_IMUX__DataOut2 : wIROM2_IMUX__DataOut2; |
//------------------------------------------------------------------- |
|
wire wDataWriteEnable_RMEM,wDataWriteEnable_SMEM,wDataWriteEnable_XMEM; |
wire [`DATA_ROW_WIDTH-1:0] wData_SMEM1,wData_SMEM2; |
wire [`DATA_ROW_WIDTH-1:0] wData_RMEM1,wData_RMEM2,wData_IMEM1,wData_IMEM2,wData_XMEM1,wData_XMEM2; |
wire [`DATA_ROW_WIDTH-1:0] wIOData_SMEM1,wIOData_SMEM2;//,wData_OMEM1,wData_OMEM2; |
|
/******************************************************* |
The Data memory is divided into several memory banks. |
Each Bank has different characteristics: |
|
* IO MEM: Input Registers, Written by IO, Read by EXE. |
* SWAP MEM: Swap registers, while IO reads/write values, |
EXE reads/write values. |
* C1-C7, R1- R12: General purpose registers, |
EXE can R/W, IO can not see these sections of the memory |
* OREG*: Output registers written by EXE, Read by IO. |
|
Whenever an input address is received, this imput address |
is divided in a bank selector and offset in the following way: |
|
__________________________ |
| b6 b5 | b4 b3 b2 b1 b0 | |
|
The bits b4 .. b0 are the LSB of the address, this give the |
position relative to the bank |
|
The bits b6 and b5 give the actual Bank to select. |
Please see aDefinitions.v for a description of each |
register location. |
|
0____________________ |
| IO MEM | |
| | |
| | b6b5 = 00 |
32|__________________| |
| SWAP MEM | |
| | b6b5 = 01 |
| | |
64|__________________| |
| C1 - C7 | |
| R1 - R12 | b6b5 = 10 |
| | |
96|__________________| |
| CREG* | |
| | b6b5 = 11 |
| | |
|__________________| |
|
|
*******************************************************/ |
|
|
|
MUXFULLPARALELL_2SEL_GENERIC # ( `DATA_ROW_WIDTH ) MUX1 |
( |
.Sel( iDataReadAddress1_EXE[6:5] ), |
.I1( wData_IMEM1 ), //IO MEM |
.I2( wData_SMEM1 ), //SWAP MEM |
.I3( wData_RMEM1 ), //R*, C* |
.I4( wData_XMEM1 ), //CREG* |
.O1( oData1_EXE ) |
); |
|
|
MUXFULLPARALELL_2SEL_GENERIC # ( `DATA_ROW_WIDTH ) MUX2 |
( |
.Sel( iDataReadAddress2_EXE[6:5] ), |
.I1( wData_IMEM2 ), //IO MEM |
.I2( wData_SMEM2 ), //SWAP MEM |
.I3( wData_RMEM2 ), //R*, C* |
.I4( wData_XMEM2 ), //CREG* |
.O1( oData2_EXE ) |
); |
|
assign wDataWriteEnable_SMEM = ( iDataWriteAddress_EXE[6:5] == 2'b01 && iDataWriteEnable_EXE ); //Enable WE for SMEM if bank == 01 |
assign wDataWriteEnable_RMEM = ( iDataWriteAddress_EXE[6:5] == 2'b10 && iDataWriteEnable_EXE); //Enable WE for RMEM if bank == 10 |
assign wDataWriteEnable_XMEM = ( iDataWriteAddress_EXE[6:5] == 2'b11 && iDataWriteEnable_EXE); //Enable WE for RMEM if bank == 11 |
|
|
//Input Registers, Written by IO, Read by EXE |
RAM_DUAL_READ_PORT # (`DATA_ROW_WIDTH,5,/*42*/32) IMEM //16 here is enough, I hate small devices! |
( |
.Clock( Clock ), |
.iWriteEnable( iDataWriteEnable_IO ), //Only IO can write into this bank |
.iReadAddress0( iDataReadAddress1_EXE[4:0] ), //EXE read address channel 1 |
.iReadAddress1( iDataReadAddress2_EXE[4:0] ), //EXE read address channel 2 |
.iWriteAddress( iDataWriteAddress_IO[4:0] ), //Only IO can write into this bank |
.iDataIn( iData_IO ), |
.oDataOut0( wData_IMEM1 ), |
.oDataOut1( wData_IMEM2 ) |
); |
|
//Swap registers, while IO reads/write values, EXE reads/write values |
//the pointers get filped in the next iteration |
|
SWAP_MEM # (`DATA_ROW_WIDTH,5,32) SMEM |
( |
.Clock( Clock ), |
.iSelect( wFlipSelect ), |
|
.iWriteEnableA( wDataWriteEnable_SMEM ), |
.iReadAddressA0( iDataReadAddress1_EXE[4:0] ), |
.iReadAddressA1( iDataReadAddress2_EXE[4:0] ), |
.iWriteAddressA( iDataWriteAddress_EXE[4:0] ), |
.iDataInA( iData_EXE ), |
.oDataOutA0( wData_SMEM1 ), |
.oDataOutA1( wData_SMEM2 ), |
|
.iWriteEnableB( iDataWriteEnable_IO ), |
.iReadAddressB0( iDataReadAddress1_IO ), |
.iReadAddressB1( iDataReadAddress2_IO ), |
.iWriteAddressB( iDataWriteAddress_IO ), |
.iDataInB( iData_IO ) |
// .oDataOutB0( wIOData_SMEM1 ), |
// .oDataOutB1( wIOData_SMEM2 ) |
|
); |
|
//General purpose registers, EXE can R/W, IO can not see these sections |
//of the memory |
RAM_DUAL_READ_PORT # (`DATA_ROW_WIDTH,5,32) RMEM //Ok so we have fewer Registers then... |
( |
.Clock( Clock ), |
.iWriteEnable( wDataWriteEnable_RMEM ), |
.iReadAddress0( iDataReadAddress1_EXE[4:0] ), |
.iReadAddress1( iDataReadAddress2_EXE[4:0] ), |
.iWriteAddress( iDataWriteAddress_EXE[4:0] ), |
.iDataIn( iData_EXE ), |
.oDataOut0( wData_RMEM1 ), |
.oDataOut1( wData_RMEM2 ) |
); |
|
RAM_DUAL_READ_PORT # (`DATA_ROW_WIDTH,5,32) XMEM //Ok so we have fewer Registers then... |
( |
.Clock( Clock ), |
.iWriteEnable( wDataWriteEnable_XMEM ), |
.iReadAddress0( iDataReadAddress1_EXE[4:0] ), |
.iReadAddress1( iDataReadAddress2_EXE[4:0] ), |
.iWriteAddress( iDataWriteAddress_EXE[4:0] ), |
.iDataIn( iData_EXE ), |
.oDataOut0( wData_XMEM1 ), |
.oDataOut1( wData_XMEM2 ) |
); |
|
|
UPCOUNTER_POSEDGE # (1) UPC1 |
( |
.Clock(Clock), |
.Reset( Reset ), |
.Initial(1'b0), |
.Enable(iFlipMemory), |
.Q(wFlipSelect) |
); |
|
|
|
//------------------------------------------------------------------- |
/* |
Instruction memory. |
*/ |
|
// ROM_ADDRESS_WIDTH exceds the array size it may get trimmed... |
RAM_DUAL_READ_PORT # (`INSTRUCTION_WIDTH,`ROM_ADDRESS_WIDTH,/*512*/128) INST_MEM //Only 128 instructions :( well this is for the user anyway |
( |
.Clock( Clock ), |
.iWriteEnable( iInstructionWriteEnable ), |
.iReadAddress0( {1'b0,iInstructionReadAddress1[`ROM_ADDRESS_WIDTH-2:0]} ), |
.iReadAddress1( {1'b0,iInstructionReadAddress2[`ROM_ADDRESS_WIDTH-2:0]} ), |
.iWriteAddress( iInstructionWriteAddress ), |
.iDataIn( iInstruction ), |
.oDataOut0( wIMEM2_IMUX__DataOut1 ), |
.oDataOut1( wIMEM2_IMUX__DataOut2 ) |
|
); |
//------------------------------------------------------------------- |
/* |
Default code stored in ROM. |
*/ |
wire [`INSTRUCTION_WIDTH-1:0] wRomDelay1,wRomDelay2; |
//In real world ROM will take at least 1 clock cycle, |
//since ROMs are not syhtethizable, I won't hurt to put |
//this delay |
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( `INSTRUCTION_WIDTH ) FFDA |
( |
.Clock(Clock), |
.Reset(Reset), |
.Enable(1'b1), |
.D(wRomDelay1), |
.Q(wIROM2_IMUX__DataOut1 ) |
); |
|
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( `INSTRUCTION_WIDTH ) FFDB |
( |
.Clock(Clock), |
.Reset(Reset), |
.Enable(1'b1), |
.D(wRomDelay2), |
.Q(wIROM2_IMUX__DataOut2 ) |
); |
|
//The reason I put two ROMs is because I need to read 2 different Instruction |
//addresses at the same time (branch-taken and branch-not-taken) and not sure |
//how to write dual read channel ROM this way... |
|
ROM IROM |
( |
.Address( {1'b0,iInstructionReadAddress1[`ROM_ADDRESS_WIDTH-2:0]} ), |
`ifdef DEBUG |
.iDebug_CoreID(iDebug_CoreID), |
`endif |
.I( wRomDelay1 ) |
); |
|
ROM IROM2 |
( |
.Address( {1'b0,iInstructionReadAddress2[`ROM_ADDRESS_WIDTH-2:0]} ), |
`ifdef DEBUG |
.iDebug_CoreID(iDebug_CoreID), |
`endif |
.I( wRomDelay2 ) |
); |
//-------------------------------------------------------- |
ControlRegister CR |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.iControlRegister( iControlRegister ), |
.oControlRegister( oControlRegister ) |
); |
|
|
endmodule |
//------------------------------------------------------------------- |
/trunk/rtl/Module_ExecutionFSM.v
0,0 → 1,539
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
`define EXEU_AFTER_RESET 0 |
`define EXEU_INITIAL_STATE 1 |
`define EXEU_WAIT_FOR_DECODE 2 |
`define EXEU_FETCH_DECODED_INST 3 |
`define EXEU_WAIT_FOR_ALU_EXECUTION 4 |
`define EXEU_WRITE_BACK_TO_RAM 5 |
`define EXEU_HANDLE_JUMP 7 |
|
|
|
module ExecutionFSM |
( |
input wire Clock, |
input wire Reset, |
|
input wire iDecodeDone, |
input wire[`INSTRUCTION_OP_LENGTH-1:0] iOperation, |
input wire[`DATA_ROW_WIDTH-1:0] iSource0,iSource1, |
input wire[`DATA_ADDRESS_WIDTH-1:0] iDestination, |
inout wire[`DATA_ROW_WIDTH-1:0] RAMBus, |
//output reg ReadyForNextInstruction, |
output wire oJumpFlag , |
output wire [`ROM_ADDRESS_WIDTH-1:0] oJumpIp , |
output wire oRAMWriteEnable , |
output wire [`DATA_ADDRESS_WIDTH-1:0] oRAMWriteAddress , |
output wire oExeLatchedValues, |
output reg oBusy , |
|
//ALU ports and control signals |
output wire [`INSTRUCTION_OP_LENGTH-1:0] oALUOperation, |
output wire [`WIDTH-1:0] oALUChannelX1, |
output wire [`WIDTH-1:0] oALUChannelY1, |
output wire [`WIDTH-1:0] oALUChannelZ1, |
output wire [`WIDTH-1:0] oALUChannelX2, |
output wire [`WIDTH-1:0] oALUChannelY2, |
output wire [`WIDTH-1:0] oALUChannelZ2, |
output wire oTriggerALU, |
|
input wire [`WIDTH-1:0] iALUResultX, |
input wire [`WIDTH-1:0] iALUResultY, |
input wire [`WIDTH-1:0] iALUResultZ, |
input wire iALUOutputReady, |
input wire iBranchTaken, |
input wire iBranchNotTaken, |
|
|
`ifdef DEBUG |
input wire[`ROM_ADDRESS_WIDTH-1:0] iDebug_CurrentIP, |
input wire [`MAX_CORES-1:0] iDebug_CoreID, |
`endif |
//Data forward Signals |
output wire [`DATA_ADDRESS_WIDTH-1:0] oLastDestination |
|
|
); |
|
wire wLatchNow; |
reg rInputLatchesEnabled; |
|
//If ALU says jump, just pass along |
assign oJumpFlag = iBranchTaken; |
//JumpIP is the instruction destination (= oRAMWriteAddress) |
assign oJumpIp = oRAMWriteAddress; |
|
assign wLatchNow = iDecodeDone & rInputLatchesEnabled; |
assign oExeLatchedValues = wLatchNow; |
assign oTriggerALU = wLatchNow; |
|
wire wOperationIsJump; |
assign wOperationIsJump = iBranchTaken || iBranchNotTaken; |
|
//Don't allow me to write back back if the operation is a NOP |
`ifdef DEBUG |
assign oRAMWriteEnable = iALUOutputReady && !wOperationIsJump && |
(oALUOperation != `NOP) && oALUOperation != `DEBUG_PRINT; |
`else |
assign oRAMWriteEnable = iALUOutputReady && !wOperationIsJump && oALUOperation != `NOP; |
`endif |
|
|
assign RAMBus = ( oRAMWriteEnable ) ? {iALUResultX,iALUResultY,iALUResultZ} : `DATA_ROW_WIDTH'bz; |
|
assign oALUChannelX1 = iSource1[95:64]; |
assign oALUChannelY1 = iSource1[63:32]; |
assign oALUChannelZ1 = iSource1[31:0]; |
|
assign oALUChannelX2 = iSource0[95:64]; |
assign oALUChannelY2 = iSource0[63:32]; |
assign oALUChannelZ2 = iSource0[31:0]; |
|
/* |
FF32_POSEDGE_SYNCRONOUS_RESET SourceX1 |
( |
.Clock( wLatchNow ), |
.Clear( Reset ), |
.D( iSource1[95:64] ), |
.Q( oALUChannelX1 ) |
); |
|
FF32_POSEDGE_SYNCRONOUS_RESET SourceY1 |
( |
.Clock( wLatchNow ), |
.Clear( Reset ), |
.D( iSource1[63:32] ), |
.Q( oALUChannelY1 ) |
); |
|
FF32_POSEDGE_SYNCRONOUS_RESET SourceZ1 |
( |
.Clock( wLatchNow ), |
.Clear( Reset ), |
.D( iSource1[31:0] ), |
.Q( oALUChannelZ1 ) |
); |
*/ |
/* |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceX1 |
( |
.Clock( Clock ),//wLatchNow ), |
.Reset( Reset), |
.Enable( wLatchNow ),//1'b1 ), |
.D( iSource1[95:64] ), |
.Q(oALUChannelX1) |
); |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceY1 |
( |
.Clock( Clock ),//wLatchNow ), |
.Reset( Reset), |
.Enable( wLatchNow ),//1'b1 ), |
.D( iSource1[63:32] ), |
.Q(oALUChannelY1) |
); |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceZ1 |
( |
.Clock( Clock ),//wLatchNow ), |
.Reset( Reset), |
.Enable( wLatchNow ),//1'b1 ), |
.D( iSource1[31:0] ), |
.Q(oALUChannelZ1) |
); |
*/ |
/* |
FF32_POSEDGE_SYNCRONOUS_RESET SourceX2 |
( |
.Clock( wLatchNow ), |
.Clear( Reset ), |
.D( iSource0[95:64] ), |
.Q( oALUChannelX2 ) |
); |
|
FF32_POSEDGE_SYNCRONOUS_RESET SourceY2 |
( |
.Clock( wLatchNow ), |
.Clear( Reset ), |
.D( iSource0[63:32] ), |
.Q( oALUChannelY2 ) |
); |
|
FF32_POSEDGE_SYNCRONOUS_RESET SourceZ2 |
( |
.Clock( wLatchNow ), |
.Clear( Reset ), |
.D( iSource0[31:0] ), |
.Q( oALUChannelZ2 ) |
); |
*/ |
/* |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceX2 |
( |
.Clock( Clock ),//wLatchNow ), |
.Reset( Reset), |
.Enable( wLatchNow ),//1'b1 ), |
.D( iSource0[95:64] ), |
.Q(oALUChannelX2) |
); |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceY2 |
( |
.Clock( Clock ),//wLatchNow ), |
.Reset( Reset), |
.Enable( wLatchNow ),//1'b1 ), |
.D( iSource0[63:32] ), |
.Q(oALUChannelY2) |
); |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceZ2 |
( |
.Clock( Clock ),//wLatchNow ), |
.Reset( Reset), |
.Enable( wLatchNow ),//1'b1 ), |
.D( iSource0[31:0] ), |
.Q(oALUChannelZ2) |
); |
*/ |
//Finally one more latch to store |
//the iOperation and the destination |
|
|
assign oALUOperation = iOperation; |
//assign oRAMWriteAddress = iDestination; |
/* |
FF_OPCODE_POSEDGE_SYNCRONOUS_RESET FFOperation |
( |
.Clock( wLatchNow ), |
.Clear( Reset ), |
.D( iOperation ), |
.Q( oALUOperation ) |
|
); |
|
|
FF16_POSEDGE_SYNCRONOUS_RESET PSRegDestination |
( |
.Clock( wLatchNow ), |
.Clear( Reset ), |
.D( iDestination ), |
.Q( oRAMWriteAddress ) |
|
); |
*/ |
/* |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `INSTRUCTION_OP_LENGTH ) FFOperation |
( |
.Clock( Clock ),//wLatchNow ), |
.Reset( Reset), |
.Enable( wLatchNow ),//1'b1 ), |
.D( iOperation ), |
.Q(oALUOperation) |
); |
*/ |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `DATA_ADDRESS_WIDTH ) PSRegDestination |
( |
.Clock( Clock ),//wLatchNow ), |
.Reset( Reset), |
.Enable( wLatchNow ),//1'b1 ), |
.D( iDestination ), |
.Q(oRAMWriteAddress) |
); |
|
//Data forwarding |
assign oLastDestination = oRAMWriteAddress; |
|
reg [7:0] CurrentState; |
reg [7:0] NextState; |
|
|
//------------------------------------------------ |
always @(posedge Clock or posedge Reset) |
begin |
|
|
|
if (Reset) |
CurrentState <= `EXEU_AFTER_RESET; |
else |
CurrentState <= NextState; |
|
end |
//------------------------------------------------ |
|
|
always @( * ) |
begin |
case (CurrentState) |
//------------------------------------------ |
`EXEU_AFTER_RESET: |
begin |
//ReadyForNextInstruction <= 1; |
oBusy <= 0; |
rInputLatchesEnabled <= 1; |
|
|
NextState <= `EXEU_WAIT_FOR_DECODE; |
end |
//------------------------------------------ |
/** |
At the same time iDecodeDone goes to 1, our Flops |
will store the value, so next clock cycle we can |
tell IDU to go ahead and decode the next instruction |
in the pipeline. |
*/ |
`EXEU_WAIT_FOR_DECODE: |
begin |
|
|
//ReadyForNextInstruction <= 1; |
oBusy <= 0; |
rInputLatchesEnabled <= 1; |
|
|
if ( iDecodeDone ) //This same thing triggers the ALU |
NextState <= `EXEU_WAIT_FOR_ALU_EXECUTION; |
else |
NextState <= `EXEU_WAIT_FOR_DECODE; |
end |
//------------------------------------------ |
/* |
If the instruction is aritmetic then pass the parameters |
the ALU, else if it store iOperation then... |
*/ |
`EXEU_WAIT_FOR_ALU_EXECUTION: |
begin |
|
//ReadyForNextInstruction <= 0; //* |
oBusy <= 1; |
rInputLatchesEnabled <= 0; //NO INTERRUPTIONS WHILE WE WAIT!! |
|
|
|
if ( iALUOutputReady ) /////This same thing enables writing th results to RAM |
NextState <= `EXEU_WAIT_FOR_DECODE; |
else |
NextState <= `EXEU_WAIT_FOR_ALU_EXECUTION; |
end |
//------------------------------------------ |
`EXEU_WRITE_BACK_TO_RAM: |
begin |
|
//ReadyForNextInstruction <= 0; |
oBusy <= 1; |
rInputLatchesEnabled <= 1; |
|
if ( iDecodeDone ) |
NextState <= `EXEU_WAIT_FOR_ALU_EXECUTION; |
else |
NextState <= `EXEU_WAIT_FOR_DECODE; |
|
end |
|
//------------------------------------------ |
default: |
begin |
|
//ReadyForNextInstruction <= 1; |
oBusy <= 0; |
rInputLatchesEnabled <= 1; |
|
NextState <= `EXEU_AFTER_RESET; |
end |
//------------------------------------------ |
endcase |
end |
|
//----------------------------------------------------------------------- |
`ifdef DUMP_CODE |
integer ucode_file; |
integer reg_log; |
initial |
begin |
|
$display("Opening ucode dump file....\n"); |
ucode_file = $fopen("Code.log","w"); |
$fwrite(ucode_file,"\n\n************ Theia UCODE DUMP *******\n\n\n\n"); |
$display("Opening Register lof file...\n"); |
reg_log = $fopen("Registers.log","w"); |
|
end |
|
`endif //Ucode dump |
|
//----------------------------------------------------------------------- |
`ifdef DEBUG |
wire [`WIDTH-1:0] wALUChannelX1,wALUChannelY1,wALUChannelZ1; |
wire [`WIDTH-1:0] wALUChannelX2,wALUChannelY2,wALUChannelZ2; |
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceX1 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( wLatchNow ), |
.D( iSource1[95:64] ), |
.Q(wALUChannelX1) |
); |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceY1 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( wLatchNow ), |
.D( iSource1[63:32] ), |
.Q(wALUChannelY1) |
); |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceZ1 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( wLatchNow ), |
.D( iSource1[31:0] ), |
.Q(wALUChannelZ1) |
); |
|
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceX2 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( wLatchNow ), |
.D( iSource0[95:64] ), |
.Q(wALUChannelX2) |
); |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceY2 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( wLatchNow ), |
.D( iSource0[63:32] ), |
.Q(wALUChannelY2) |
); |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceZ2 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( wLatchNow ), |
.D( iSource0[31:0] ), |
.Q(wALUChannelZ2) |
); |
|
|
always @ (posedge iDecodeDone && iDebug_CoreID == `DEBUG_CORE) |
begin |
`LOGME"[CORE %d] IP:%d", iDebug_CoreID,iDebug_CurrentIP); |
end |
|
always @ (negedge Clock && iDebug_CoreID == `DEBUG_CORE) |
begin |
if ( iALUOutputReady ) |
begin |
|
|
if (iBranchTaken) |
`LOGME"<BT>"); |
|
if (iBranchNotTaken ) |
`LOGME"<BNT>"); |
|
if (oRAMWriteEnable) |
`LOGME"<WE>"); |
|
`LOGME "(%dns ",$time); |
case ( oALUOperation ) |
`RETURN: `LOGME"RETURN"); |
`ADD: `LOGME"ADD"); |
`SUB: `LOGME"SUB"); |
`DIV: `LOGME"DIV"); |
`MUL: `LOGME"MUL"); |
`MAG: `LOGME"MAG"); |
`JGX: `LOGME"JGX"); |
`JLX: `LOGME"JLX"); |
`JGEX: `LOGME"JGEX"); |
`JGEY: `LOGME"JGEY"); |
`JGEZ: `LOGME"JGEZ"); |
`JLEX: `LOGME"JLEX"); |
`JLEY: `LOGME"JLEY"); |
`JLEZ: `LOGME"JLEZ"); |
`JMP: `LOGME"JMP"); |
`ZERO: `LOGME"ZERO"); |
`JNEX: `LOGME"JNEX"); |
`JNEY: `LOGME"JNEY"); |
`JNEZ: `LOGME"JNEZ"); |
`JEQX: `LOGME"JEQX"); |
`JEQY: `LOGME"JEQY"); |
`JEQZ: `LOGME"JEQZ"); |
`CROSS: `LOGME"CROSS"); |
`DOT: `LOGME"DOT"); |
`SETX: `LOGME"SETX"); |
`SETY: `LOGME"SETY"); |
`SETZ: `LOGME"SETZ"); |
`NOP: `LOGME"NOP"); |
`COPY: `LOGME"COPY"); |
`INC: `LOGME"INC"); |
`DEC: `LOGME"DEC"); |
`MOD: `LOGME"MOD"); |
`FRAC: `LOGME"FRAC"); |
`NEG: `LOGME"NEG"); |
`SWIZZLE3D: `LOGME"SWIZZLE3D"); |
`MULP: `LOGME"MULP"); |
`XCHANGEX: `LOGME"XCHANGEX"); |
`IMUL: `LOGME"IMUL"); |
`UNSCALE: `LOGME"UNSCALE"); |
`INCX: `LOGME"INCX"); |
`INCY: `LOGME"INCY"); |
`INCZ: `LOGME"INCZ"); |
`OMWRITE: `LOGME"OMWRITE"); |
`TMREAD: `LOGME"TMREAD"); |
`LEA: `LOGME"LEA"); |
`CALL: `LOGME"CALL"); |
`RET: `LOGME"RET"); |
`DEBUG_PRINT: |
begin |
`LOGME"DEBUG_PRINT"); |
|
end |
default: |
begin |
`LOGME"**********ERROR UNKNOWN OP*********"); |
$display("%dns EXE: Error Unknown Instruction : %d", $time,oALUOperation); |
// $stop(); |
end |
endcase |
|
`LOGME"\t %h [ %h %h %h ][ %h %h %h ] = ", |
oRAMWriteAddress, |
wALUChannelX1,wALUChannelY1,wALUChannelZ1, |
wALUChannelX2,wALUChannelY2,wALUChannelZ2 |
|
); |
|
if (oALUOperation == `RETURN) |
`LOGME"\n\n\n"); |
|
end |
end //always |
|
always @ ( negedge Clock && iDebug_CoreID == `DEBUG_CORE ) |
begin |
if ( iALUOutputReady ) |
`LOGME" [ %h %h %h ])\n",iALUResultX,iALUResultY,iALUResultZ); |
end //always |
`endif |
|
endmodule |
/trunk/rtl/Module_FixedPointAddtionSubstraction.v
0,0 → 1,67
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
|
|
//----------------------------------------------------------- |
module INCREMENT # ( parameter SIZE=`WIDTH ) |
( |
input wire Clock, |
input wire Reset, |
input wire[SIZE-1:0] A, |
output reg [SIZE-1:0] R |
); |
always @ (posedge Clock) |
begin |
R = A + 1; |
end |
|
|
endmodule |
//----------------------------------------------------------- |
module FixedAddSub |
( |
input wire Clock, |
input wire Reset, |
input wire[`LONG_WIDTH-1:0] A, |
input wire[`LONG_WIDTH-1:0] B, |
output reg[`LONG_WIDTH-1:0] R, |
input wire iOperation, |
input wire iInputReady, //Is the input data valid? |
output wire OutputReady //Our output data is ready! |
); |
|
reg MyOutputReady = 0; |
|
wire [`LONG_WIDTH-1:0] wB; |
|
assign wB = ( iOperation ) ? ~B + 1'b1 : B; |
|
//Output ready just take 1 cycle |
//assign OutputReady = iInputReady; |
|
FFD_POSEDGE_ASYNC_RESET #(1) FFOutputReadyDelay2 |
( |
.Clock( Clock ), |
.Clear( Reset ), |
.D( iInputReady ), |
.Q( OutputReady ) |
); |
|
|
//------------------------------- |
always @ (posedge Clock) |
begin |
|
if (iInputReady == 1) |
begin |
R = ( A + wB ); |
end |
else |
begin |
R = 64'hFFFFFFFF; |
|
end |
|
end // always |
|
endmodule |
/trunk/rtl/Module_Host.v
0,0 → 1,726
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
|
|
|
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
|
|
/******************************************************************************* |
Module Description: |
|
WIP |
|
*******************************************************************************/ |
|
|
|
`define MAX_VERTEX_IN_FRAME `WIDTH'd7 // WAS 8'd6 |
`define TAG_INSTRUCTION_ADDRESS_TYPE 2'b01 |
`define TAG_DATA_ADDRESS_TYPE 2'b10 |
`define SELECT_INST_MEM 3'b00 |
`define SELECT_SCENE_MEM 3'b01 |
`define SELECT_GEO_MEM 3'b10 |
|
|
`define HOST_IDLE 0 |
`define HOST_WRITE_INSTRUCTION 1 |
`define HOST_WAIT_INSTRUCTION 2 |
`define HOST_WRITE_SCENE_PARAMS 3 |
`define HOST_WAIT_SCENE_PARAMS 4 |
`define HOST_PREPARE_CORE_CONFIG 5 |
`define HOST_UNICAST_CORE_CONFIG 6 |
`define HOST_WAIT_CORE_CONFIG 7 |
`define HOST_PREPARE_NEXT_CORE_CONFIG 8 |
`define HOST_WAIT_DATA_READ_CONFIRMATION 10 |
`define HOST_BROADCAST_NEXT_VERTEX 11 |
`define HOST_WAIT_FOR_VERTEX 12 |
`define HOST_INITIAL_SCENE_PARAMS_STAGE 13 |
`define HOST_PREPARE_FOR_GEO_REQUESTS 14 |
`define HOST_ACK_GEO_REQUEST 15 |
`define HOST_GET_PRIMITIVE_COUNT 16 |
`define HOST_LAST_PRIMITIVE_REACHED 17 |
`define HOST_GPU_EXECUTION_DONE 18 |
|
//--------------------------------------------------------------- |
module Module_Host |
( |
input wire Clock, |
input wire Reset, |
input wire iEnable, |
input wire iHostDataReadConfirmed, |
input wire [`WB_WIDTH-1:0] iMemorySize, |
input wire [`WB_WIDTH-1:0] iPrimitiveCount, |
|
//To Memory |
output wire [`WB_WIDTH-1:0] oReadAddress, |
input wire [`WB_WIDTH-1:0] iReadData, |
input wire iGPUCommitedResults, |
|
//To Hub/Switch |
output wire [`MAX_CORES-1:0] oCoreSelectMask, |
output reg [2:0] oMemSelect, |
output wire [`WB_WIDTH-1:0] DAT_O, |
output wire [`WB_WIDTH-1:0] ADR_O, |
output reg[1:0] TGA_O, |
output reg[`MAX_CORES-1:0] RENDREN_O, |
output wire CYC_O, |
output wire STB_O, |
output reg MST_O, |
output wire WE_O, |
input wire GRDY_I, //This means all the cores are done rading the primitive we send |
output reg GACK_O, //We set this to ACK that the cored read the primitive |
output wire STDONE_O, |
output reg oHostDataAvailable, |
input wire iGPUDone, |
`ifndef NO_DISPLAY_STATS |
input wire [`WIDTH-1:0] iDebugWidth, |
`endif |
input wire ACK_I |
); |
//--------------------------------------------------------------- |
wire wLastPrimitive; |
assign wLastPrimitive = (wVertexCount >= iPrimitiveCount) ? 1'b1 : 1'b0; |
assign STDONE_O = wLastPrimitive; |
|
wire wWBMDone; |
reg rWBMEnable,rWBMReset,rCoreBroadCast; |
reg [`WB_WIDTH-1:0] rInitiaReadAddr; |
wire [`MAX_CORES-1:0] wCoreSelect; |
wire wLastValidReadAddress; |
wire [`WB_WIDTH-1:0] wWriteAddress; |
wire [`WIDTH-1:0] wVertexCount; |
reg [`WB_WIDTH-1:0] rInitialWriteAddress; |
reg rSetWriteAddr; |
reg rIncCoreSelect,rResetVertexCount; |
//-------------------------------------------------------- |
|
assign WE_O = MST_O; |
|
assign oCoreSelectMask = |
(rCoreBroadCast) ? `SELECT_ALL_CORES : wCoreSelect; |
|
assign wLastValidReadAddress = |
(oReadAddress >= iMemorySize) ? 1'b1 : 1'b0; |
|
wire wLastParameter; |
assign wLastParameter = (oReadAddress >= 32'h12) ? 1'b1 : 1'b0; |
//-------------------------------------------------------- |
UPCOUNTER_POSEDGE # (`WB_WIDTH ) UPWADDR |
( |
.Clock( Clock ), |
.Reset( Reset | rSetWriteAddr ), |
.Enable( iEnable & wWBMDone ), |
.Initial( rInitialWriteAddress ), |
.Q( wWriteAddress ) |
); |
|
|
UPCOUNTER_POSEDGE # ( 32 ) PRIMCOUNT |
( |
.Clock( Clock ), |
.Reset( Reset | rResetVertexCount ), |
.Enable( iEnable & wWBMDone ), |
.Initial( `WIDTH'b1 ), |
.Q( wVertexCount ) |
); |
//-------------------------------------------------------- |
CIRCULAR_SHIFTLEFT_POSEDGE_EX # (`MAX_CORES ) SHF1 |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Initial( `MAX_CORES'b1 ), |
.Enable( rIncCoreSelect ), |
.O( wCoreSelect ) |
); |
//-------------------------------------------------------- |
wire wShortCycle; |
//For instruction we send 2 packets per cycle |
//for the other we send 3 packets per cycle |
assign wShortCycle = (oMemSelect == `SELECT_INST_MEM) ? 1'b1 : 1'b0; |
|
WBMaster WBM |
( |
.Clock( Clock ), |
.Reset( Reset | rWBMReset ), |
.iEnable( rWBMEnable ), |
.iInitialReadAddr( rInitiaReadAddr ), |
.iWriteAddr( wWriteAddress ), |
.oReadAddress( oReadAddress ), |
.iReadData( iReadData ), |
.iShortFlow( wShortCycle ), |
|
|
.STB_O( STB_O ), |
.ACK_I( ACK_I ), |
.CYC_O( CYC_O ), |
.DAT_O( DAT_O ), |
.ADR_O( ADR_O ), |
.oDone( wWBMDone ) |
); |
|
//-------------------------------------------------------- |
// Current State Logic // |
reg [7:0] rHostCurrentState,rHostNextState; |
always @(posedge Clock or posedge Reset) |
begin |
if( Reset!=1 ) |
rHostCurrentState <= rHostNextState; |
else |
rHostCurrentState <= `HOST_IDLE; |
end |
//-------------------------------------------------------- |
|
reg [63:0] i; |
reg [63:0] RenderedPixels; |
wire wLastVertexInFrame; |
assign wLastVertexInFrame = |
(wVertexCount % `MAX_VERTEX_IN_FRAME == 1'b0 ) ? 1'b1 : 1'b0; |
|
// WAS ((wVertexCount % `MAX_VERTEX_IN_FRAME) == 1'b0 && wVertexCount != 0) ? 1'b1 : 1'b0; |
|
reg [31:0] StartTime; |
|
// Host Finite State Machine // |
always @( * ) |
begin |
|
case (rHostCurrentState) |
//---------------------------------------- |
//Wait for reset sequence to complete, |
//Or until we are enabled |
`HOST_IDLE: |
begin |
RenderedPixels = 0; |
|
rWBMEnable = 0; |
rInitiaReadAddr = 1; //Start reading from 1, because 0 is the size |
rWBMReset = 0; |
oMemSelect = 0; |
TGA_O = 0; |
MST_O = 0; |
rInitialWriteAddress = 0; |
rSetWriteAddr = 0; |
rCoreBroadCast = 0; |
rIncCoreSelect = 0; |
RENDREN_O = 0; |
rResetVertexCount = 0; |
GACK_O = 0; |
//STDONE_O = 0; |
oHostDataAvailable = 0; |
|
if ( ~Reset & iEnable ) |
begin |
$display("-I- HOST: Broadcasting User code to all Cores\n"); $fflush; |
rHostNextState = `HOST_WRITE_INSTRUCTION; |
end |
else |
rHostNextState = `HOST_IDLE; |
end |
//---------------------------------------- |
//Broadcast the instructions to all the cores |
`HOST_WRITE_INSTRUCTION: |
begin |
|
StartTime = $time; |
|
rWBMEnable = 1; //Enable Wish bone master |
rInitiaReadAddr = 1; //Start reading from 1, because 0 is the size |
rWBMReset = 0; //No need to reset since we just came from reset |
oMemSelect = `SELECT_INST_MEM; //Start by sending the instructions |
TGA_O = `TAG_INSTRUCTION_ADDRESS_TYPE; |
MST_O = 1; |
rInitialWriteAddress = 0; |
rSetWriteAddr = 0; |
rCoreBroadCast = 1; |
rIncCoreSelect = 0; |
RENDREN_O = 0; |
rResetVertexCount = 0; |
GACK_O = 0; |
//STDONE_O = 0; |
oHostDataAvailable = 0; |
|
rHostNextState = `HOST_WAIT_INSTRUCTION; |
end |
//---------------------------------------- |
`HOST_WAIT_INSTRUCTION: |
begin |
rWBMEnable = ~wWBMDone; |
rInitiaReadAddr = 0; |
rWBMReset = 0; |
oMemSelect = `SELECT_INST_MEM; |
TGA_O = `TAG_INSTRUCTION_ADDRESS_TYPE; |
MST_O = 1; |
rInitialWriteAddress = 0; |
rSetWriteAddr = 0; |
rCoreBroadCast = 1; |
rIncCoreSelect = 0; |
RENDREN_O = 0; |
rResetVertexCount = 0; |
GACK_O = 0; |
//STDONE_O = 0; |
oHostDataAvailable = 0; |
|
if ( wWBMDone && ~wLastValidReadAddress ) |
rHostNextState = `HOST_WRITE_INSTRUCTION; |
else if (wWBMDone && wLastValidReadAddress ) |
rHostNextState = `HOST_INITIAL_SCENE_PARAMS_STAGE; |
else |
rHostNextState = `HOST_WAIT_INSTRUCTION; |
end |
//---------------------------------------- |
/* |
Make sure to read-pointer points to the |
first memory address at te params memory |
*/ |
`HOST_INITIAL_SCENE_PARAMS_STAGE: |
begin |
rWBMEnable = 0; |
rInitiaReadAddr = 1; //Start reading from 1, because 0 is the size |
rWBMReset = 1; |
oMemSelect = `SELECT_SCENE_MEM; //We are reading from the scene memory |
TGA_O = `TAG_DATA_ADDRESS_TYPE; //We will write to the DATA section of the core MEM |
MST_O = 1; //Keep master signal in 1 for now |
rInitialWriteAddress = 0; //We start writing from address zero now |
rSetWriteAddr = 1; |
rCoreBroadCast = 1; //Set to zero to unicast, starting from core 0 |
rIncCoreSelect = 0; //Set to unicast to the next core |
RENDREN_O = 0; |
rResetVertexCount = 0; |
GACK_O = 0; |
//STDONE_O = 0; |
oHostDataAvailable = 0; |
|
$display("-I- HOST: Configuring Core Mask %b\n",oCoreSelectMask); $fflush; |
|
rHostNextState = `HOST_WRITE_SCENE_PARAMS; |
end |
|
//---------------------------------------- |
//Broadcast the instructions to all the cores |
`HOST_WRITE_SCENE_PARAMS: |
begin |
rWBMEnable = 1; |
rInitiaReadAddr = 0; |
rWBMReset = 0; |
oMemSelect = `SELECT_SCENE_MEM; |
TGA_O = `TAG_DATA_ADDRESS_TYPE; |
MST_O = 1; |
rInitialWriteAddress = 0; |
rSetWriteAddr = 0; |
rCoreBroadCast = 1; |
rIncCoreSelect = 0; |
RENDREN_O = 0; |
rResetVertexCount = 0; |
GACK_O = 0; |
//STDONE_O = 0; |
oHostDataAvailable = 0; |
|
rHostNextState = `HOST_WAIT_SCENE_PARAMS; |
end |
//---------------------------------------- |
`HOST_WAIT_SCENE_PARAMS: |
begin |
rWBMEnable = ~wWBMDone; |
rInitiaReadAddr = 0; |
rWBMReset = 0; |
oMemSelect = `SELECT_SCENE_MEM; |
TGA_O = `TAG_DATA_ADDRESS_TYPE; |
MST_O = 1; |
rInitialWriteAddress = 0; |
rSetWriteAddr = 0; |
rCoreBroadCast = 1; |
rIncCoreSelect = 0; |
RENDREN_O = 0; |
rResetVertexCount = 0; |
GACK_O = 0; |
//STDONE_O = 0; |
oHostDataAvailable = 0; |
|
if ( wWBMDone && ~wLastParameter ) |
rHostNextState = `HOST_WRITE_SCENE_PARAMS; |
else if (wWBMDone && wLastParameter ) |
rHostNextState = `HOST_PREPARE_CORE_CONFIG; |
else |
rHostNextState = `HOST_WAIT_SCENE_PARAMS; |
end |
//---------------------------------------- |
/* |
This state set the read Write Address pointer to |
CREG_PIXEL_2D_INITIAL_POSITION memory position, |
also selects the scene MEM from the external MEM |
MUX. |
*/ |
`HOST_PREPARE_CORE_CONFIG: |
begin |
rWBMEnable = 0; |
rInitiaReadAddr = 0; |
rWBMReset = 0; |
oMemSelect = `SELECT_SCENE_MEM; //We are reading from the scene memory |
TGA_O = `TAG_DATA_ADDRESS_TYPE; //We will write to the DATA section of the core MEM |
MST_O = 1; //Keep master signal in 1 for now |
rInitialWriteAddress = `CREG_PIXEL_2D_INITIAL_POSITION; //The address from which to start wrting @ the cores |
rSetWriteAddr = 1; //Set to use the initial write address bellow |
rCoreBroadCast = 0; //Set to zero to unicast, starting from core 0 |
rIncCoreSelect = 0; //Set to unicast to the next core |
RENDREN_O = 0; |
rResetVertexCount = 0; |
GACK_O = 0; |
//STDONE_O = 0; |
oHostDataAvailable = 0; |
|
|
rHostNextState = `HOST_UNICAST_CORE_CONFIG; |
end |
|
//---------------------------------------- |
`HOST_UNICAST_CORE_CONFIG: |
begin |
rWBMEnable = 1; |
rInitiaReadAddr = 0; |
rWBMReset = 0; |
oMemSelect = `SELECT_SCENE_MEM; |
TGA_O = `TAG_DATA_ADDRESS_TYPE; |
MST_O = 1; |
rInitialWriteAddress = 0; |
rSetWriteAddr = 0; |
rCoreBroadCast = 0; |
rIncCoreSelect = 0; |
RENDREN_O = 0; |
rResetVertexCount = 0; |
GACK_O = 0; |
//STDONE_O = 0; |
oHostDataAvailable = 0; |
|
rHostNextState = `HOST_WAIT_CORE_CONFIG; |
end |
//---------------------------------------- |
`HOST_WAIT_CORE_CONFIG: |
begin |
rWBMEnable = ~wWBMDone; |
rInitiaReadAddr = 0; |
rWBMReset = 0; |
oMemSelect = `SELECT_SCENE_MEM; |
TGA_O = `TAG_DATA_ADDRESS_TYPE; |
MST_O = 1; |
rInitialWriteAddress = 0; |
rSetWriteAddr = 0; |
rCoreBroadCast = 0; |
rIncCoreSelect = 0; |
RENDREN_O = 0; |
rResetVertexCount = 0; |
GACK_O = 0; |
//STDONE_O = 0; |
oHostDataAvailable = 0; |
|
$display("%dns wWBMDone = %d (oReadAddress mod 2) = %d",$time,wWBMDone,(oReadAddress % 2)); |
|
if (wWBMDone && !(oReadAddress % 2)) |
rHostNextState = `HOST_UNICAST_CORE_CONFIG; |
else if (wWBMDone && (oReadAddress % 2) ) |
rHostNextState = `HOST_PREPARE_NEXT_CORE_CONFIG; |
else |
rHostNextState = `HOST_WAIT_CORE_CONFIG; |
|
end |
//---------------------------------------- |
/* |
Reset the WBM to tell it to start reading |
from address 0 at the Geometry memory. |
*/ |
`HOST_PREPARE_NEXT_CORE_CONFIG: |
begin |
rWBMEnable = 0; |
rInitiaReadAddr = 0; |
rWBMReset = 0; |
oMemSelect = `SELECT_GEO_MEM; |
TGA_O = `TAG_DATA_ADDRESS_TYPE; |
MST_O = 0; //The master signal goes to zero until request |
rInitialWriteAddress = `CREG_PIXEL_2D_INITIAL_POSITION; //Write starting from this location on the cores |
rSetWriteAddr = 1; //Set to use the initial write address bellow |
rCoreBroadCast = 0; |
rIncCoreSelect = 1; //Moving to configure the next core now |
RENDREN_O = 0; |
rResetVertexCount = 0; |
GACK_O = 0; |
//STDONE_O = 0; |
oHostDataAvailable = 0; |
|
if (wCoreSelect[`MAX_CORES-1] == 1) |
rHostNextState = `HOST_PREPARE_FOR_GEO_REQUESTS; |
else |
rHostNextState = `HOST_UNICAST_CORE_CONFIG; |
end |
//---------------------------------------- |
/* |
Prepare the write address for the next primitive. |
|
*/ |
`HOST_PREPARE_FOR_GEO_REQUESTS: |
begin |
rWBMEnable = 0; //Do not enable until we are resquested |
rInitiaReadAddr = 32'hA; //Start reading from addr 0 @ GEO MEM |
rWBMReset = 1; //Tell WBM to start reading from the addr bellow |
oMemSelect = `SELECT_GEO_MEM; //Use external GEO mem for reading |
TGA_O = `TAG_DATA_ADDRESS_TYPE; //We write to the data MEM @ the cores |
MST_O = 0; //The master signal goes to zero until request |
rInitialWriteAddress = `CREG_V0; //Write starting from this location on the cores |
rSetWriteAddr = 1; //Set to use the initial write address bellow |
rCoreBroadCast = 1; //From now on we only broadcast |
rIncCoreSelect = 0; //Ignored during broadcasts |
RENDREN_O = 0; |
rResetVertexCount = 1; |
GACK_O = 0; |
//STDONE_O = 0; |
oHostDataAvailable = 0; |
|
if (iGPUDone) |
rHostNextState = `HOST_GPU_EXECUTION_DONE; |
else |
rHostNextState = `HOST_BROADCAST_NEXT_VERTEX; |
|
end |
//---------------------------------------- |
`HOST_ACK_GEO_REQUEST: |
begin |
rWBMEnable = 0; //Do not enable until we are resquested |
rInitiaReadAddr = 0; //Ignored |
rWBMReset = 0; //Ignored |
oMemSelect = `SELECT_GEO_MEM; //Use external GEO mem for reading |
TGA_O = `TAG_DATA_ADDRESS_TYPE; //We write to the data MEM @ the cores |
MST_O = 0; //The master signal goes to zero until request |
rInitialWriteAddress = `CREG_V0; //Write starting from this location on the cores |
rSetWriteAddr = 1; //Set to use the initial write address bellow |
rCoreBroadCast = 1; //From now on we only broadcast |
rIncCoreSelect = 0; //Ignored during broadcasts |
RENDREN_O = 0; |
rResetVertexCount = 0; |
GACK_O = 1; |
//STDONE_O = 0; |
oHostDataAvailable = 0; |
|
|
rHostNextState = `HOST_BROADCAST_NEXT_VERTEX; |
|
end |
//---------------------------------------- |
/* |
Send the next primitive to the HUB/SWITCH unit |
so that it gets broadcasted to all the cores |
*/ |
`HOST_BROADCAST_NEXT_VERTEX: |
begin |
rWBMEnable = 1; //Start the Transmition |
rInitiaReadAddr = 0; |
rWBMReset = 0; |
oMemSelect = `SELECT_GEO_MEM; |
TGA_O = `TAG_DATA_ADDRESS_TYPE; |
MST_O = 1; //Start the Transmition |
rInitialWriteAddress = 0; |
rSetWriteAddr = 0; |
rCoreBroadCast = 1; |
rIncCoreSelect = 0; |
RENDREN_O = `SELECT_ALL_CORES; |
rResetVertexCount = 0; |
GACK_O = 0; |
//STDONE_O = 0; |
oHostDataAvailable = 0; |
|
rHostNextState = `HOST_WAIT_FOR_VERTEX; |
|
end |
//---------------------------------------- |
`HOST_WAIT_FOR_VERTEX: |
begin |
rWBMEnable = ~wWBMDone; //Disable WBM when it is donw |
rInitiaReadAddr = 0; |
rWBMReset = 0; |
oMemSelect = `SELECT_GEO_MEM; |
TGA_O = `TAG_DATA_ADDRESS_TYPE; |
MST_O = 1; //Start the Transmition |
rInitialWriteAddress = 0; |
rSetWriteAddr = 0; |
rCoreBroadCast = 1; |
rIncCoreSelect = 0; |
RENDREN_O = `SELECT_ALL_CORES; |
rResetVertexCount = 0; |
GACK_O = 0; |
//STDONE_O = 0; |
oHostDataAvailable = 0; |
|
|
if (wWBMDone & ~wLastVertexInFrame ) |
rHostNextState = `HOST_BROADCAST_NEXT_VERTEX; |
else if (wWBMDone & wLastVertexInFrame ) |
rHostNextState = `HOST_GET_PRIMITIVE_COUNT; |
else |
rHostNextState = `HOST_WAIT_FOR_VERTEX; |
|
|
/* |
if (wWBMDone) |
rHostNextState = `HOST_WAIT_DATA_READ_CONFIRMATION; |
else |
rHostNextState = `HOST_WAIT_FOR_VERTEX; |
*/ |
end |
//---------------------------------------- |
`HOST_GET_PRIMITIVE_COUNT: |
begin |
rWBMEnable = 0; //Disable WBM when it is donw |
rInitiaReadAddr = 0; |
rWBMReset = 0; |
oMemSelect = `SELECT_GEO_MEM; |
TGA_O = `TAG_DATA_ADDRESS_TYPE; |
MST_O = 1; //Start the Transmition |
rInitialWriteAddress = 0; |
rSetWriteAddr = 0; |
rCoreBroadCast = 1; |
rIncCoreSelect = 0; |
RENDREN_O = `SELECT_ALL_CORES; |
rResetVertexCount = 0; |
GACK_O = 0; |
//STDONE_O = 0; |
oHostDataAvailable = 0;//1; |
|
if (wVertexCount >= iPrimitiveCount) |
rHostNextState = `HOST_LAST_PRIMITIVE_REACHED; |
else |
rHostNextState = `HOST_WAIT_DATA_READ_CONFIRMATION; |
|
end |
//---------------------------------------- |
/* |
we wait until all the cores are ready for the next primitive, |
this happens when the iHostDataReadConfirmed signal |
gets asserted |
*/ |
`HOST_WAIT_DATA_READ_CONFIRMATION: |
begin |
rWBMEnable = 0; //Do not enable until we are resquested |
rInitiaReadAddr = 0; //Ignored |
rWBMReset = 0; //Continue from previous read address |
oMemSelect = `SELECT_GEO_MEM; //Use external GEO mem for reading |
TGA_O = `TAG_DATA_ADDRESS_TYPE; //We write to the data MEM @ the cores |
MST_O = 0; //The master signal goes to zero until request |
rInitialWriteAddress = `CREG_V0; //Write starting from this location on the cores |
rSetWriteAddr = 1; //Set to use the initial write address bellow |
rCoreBroadCast = 1; //From now on we only broadcast |
rIncCoreSelect = 0; //Ignored during broadcasts |
RENDREN_O = `SELECT_ALL_CORES; |
rResetVertexCount = 0; |
GACK_O = 0; |
//STDONE_O = 0; |
oHostDataAvailable = 1; |
|
if ( iHostDataReadConfirmed ) |
rHostNextState = `HOST_ACK_GEO_REQUEST; |
else |
rHostNextState = `HOST_WAIT_DATA_READ_CONFIRMATION; |
end |
//---------------------------------------- |
`HOST_LAST_PRIMITIVE_REACHED: |
begin |
rWBMEnable = 0; //Disable WBM when it is donw |
rInitiaReadAddr = 32'hA; //Reset primitive counter to first primitive |
rWBMReset = 1; //Reset primitive counter to first primitive |
oMemSelect = `SELECT_GEO_MEM; |
TGA_O = `TAG_DATA_ADDRESS_TYPE; |
MST_O = 1; |
rInitialWriteAddress = 0; |
rSetWriteAddr = 0; |
rCoreBroadCast = 1; |
rIncCoreSelect = 0; |
RENDREN_O = `SELECT_ALL_CORES; |
rResetVertexCount = 0; //Reset the vertex count to zero |
GACK_O = 0; |
//STDONE_O = 1; |
oHostDataAvailable = 0; |
|
|
|
if (iGPUCommitedResults) |
begin |
|
`ifndef NO_DISPLAY_STATS |
for (i = 0; i < `MAX_CORES; i = i + 1) |
begin |
$write("."); |
end |
RenderedPixels = RenderedPixels + `MAX_CORES; |
if ( RenderedPixels % iDebugWidth == 0) |
$write("]%d\n[",RenderedPixels / iDebugWidth); |
`endif |
|
rHostNextState = `HOST_PREPARE_FOR_GEO_REQUESTS; |
end |
else |
rHostNextState = `HOST_LAST_PRIMITIVE_REACHED; |
end |
//---------------------------------------- |
`HOST_GPU_EXECUTION_DONE: |
begin |
$display("THEIA Execution done in %dns\n",$time-StartTime); |
rWBMEnable = 0; |
rInitiaReadAddr = 0; |
rWBMReset = 0; |
oMemSelect = 0; |
TGA_O = 0; |
MST_O = 0; |
rInitialWriteAddress = 0; |
rSetWriteAddr = 0; |
rCoreBroadCast = 0; |
rIncCoreSelect = 0; |
RENDREN_O = 0; |
rResetVertexCount = 0; |
GACK_O = 0; |
//STDONE_O = 0; |
oHostDataAvailable = 0; |
|
rHostNextState = `HOST_GPU_EXECUTION_DONE; |
end |
//---------------------------------------- |
default: |
begin |
|
rWBMEnable = 0; |
rInitiaReadAddr = 0; |
rWBMReset = 0; |
oMemSelect = 0; |
TGA_O = 0; |
MST_O = 0; |
rInitialWriteAddress = 0; |
rSetWriteAddr = 0; |
rCoreBroadCast = 0; |
rIncCoreSelect = 0; |
RENDREN_O = 0; |
rResetVertexCount = 0; |
GACK_O = 0; |
//STDONE_O = 0; |
oHostDataAvailable = 0; |
|
rHostNextState = `HOST_IDLE; |
end |
//---------------------------------------- |
endcase |
end |
|
endmodule |
/trunk/rtl/Module_InstructionEntryPoint.v
0,0 → 1,32
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
module InstructionEntryPoint |
( |
input wire Clock, |
input wire Reset, |
input wire iTrigger, |
input wire[`ROM_ADDRESS_WIDTH-1:0] iInitialCodeAddress, |
input wire [`INSTRUCTION_WIDTH-1:0] iIMemInput, |
|
output wire oEPU_Busy, |
output wire [`ROM_ADDRESS_WIDTH-1:0] oEntryPoint, |
output wire oTriggerIFU, |
output wire [`ROM_ADDRESS_WIDTH-1:0] oInstructionAddr |
); |
|
assign oInstructionAddr = (oTriggerIFU) ? oEntryPoint : iInitialCodeAddress; |
assign oEPU_Busy = iTrigger | oTriggerIFU; |
|
|
|
FFD_POSEDGE_ASYNC_RESET # ( 1 ) FFD1 |
( |
.Clock(Clock), |
.Clear( Reset ), |
.D(iTrigger), |
.Q(oTriggerIFU) |
); |
|
assign oEntryPoint = (oTriggerIFU) ? iIMemInput[`ROM_ADDRESS_WIDTH-1:0] : `ROM_ADDRESS_WIDTH'b0; |
|
endmodule |
/trunk/rtl/Module_HostWBM.v
0,0 → 1,70
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
|
//--------------------------------------------------------------- |
module WBMaster |
( |
input wire Clock, |
input wire Reset, |
input wire iEnable, |
input wire iShortFlow, |
input wire [`WB_WIDTH-1:0] iInitialReadAddr, |
input wire [`WB_WIDTH-1:0] iWriteAddr, |
output wire [`WB_WIDTH-1:0] oReadAddress, |
input wire [`WB_WIDTH-1:0] iReadData, |
|
output reg STB_O, |
input wire ACK_I, |
output wire CYC_O, |
output wire [`WB_WIDTH-1:0] DAT_O, |
output wire [`WB_WIDTH-1:0] ADR_O, |
output wire oDone |
); |
|
assign ADR_O = iWriteAddr; |
wire [3:0] wXYZSel_Long; |
wire [2:0] wXYZSel_Short; |
|
UPCOUNTER_POSEDGE # (`WB_WIDTH) WBM_O_READ_ADDRESS |
( |
.Clock(Clock), |
.Reset( Reset ), |
.Enable(iEnable & ACK_I), |
.Initial(iInitialReadAddr), |
.Q(oReadAddress) |
); |
|
CIRCULAR_SHIFTLEFT_POSEDGE #(4) SHL |
( |
.Clock(Clock), |
.Enable(ACK_I & iEnable), |
.Reset( Reset ), |
.Initial(4'b1), |
.O(wXYZSel_Long) |
|
); |
CIRCULAR_SHIFTLEFT_POSEDGE #(3) SHL2 |
( |
.Clock(Clock), |
.Enable(ACK_I & iEnable), |
.Reset( Reset ), |
.Initial(3'b1), |
.O(wXYZSel_Short) |
|
); |
|
assign oDone = (iShortFlow) ? wXYZSel_Short[2] : wXYZSel_Long[3]; |
assign DAT_O = iReadData; |
|
assign CYC_O = iEnable; |
|
always @ (posedge Clock) |
begin |
if (iEnable ) |
STB_O <= ~ACK_I; |
else |
STB_O <= 0; |
end |
|
|
endmodule |
/trunk/rtl/TestBench_THEIA.v
0,0 → 1,345
|
|
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
|
|
/******************************************************************************* |
Module Description: |
|
This is the Main test bench of the GPU. It simulates the behavior of |
an external control unit or CPU that sends configuration information into DUT. |
It also implements a second processs that simulates a Wishbone slave that sends |
data from an external memory. These blocks are just behavioral CTE and therefore |
are not meant to be synthethized. |
|
*******************************************************************************/ |
|
|
|
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
`define RESOLUTION_WIDTH (rSceneParameters[13] >> `SCALE) |
`define RESOLUTION_HEIGHT (rSceneParameters[14] >> `SCALE) |
`define DELTA_ROW (32'h1 << `SCALE) |
`define DELTA_COL (32'h1 << `SCALE) |
`define TEXTURE_BUFFER_SIZE (256*256*3) |
`define MAX_WIDTH 200 |
`define MAX_SCREENBUFFER (`MAX_WIDTH*`MAX_WIDTH*3) |
module TestBench_Theia; |
|
|
//------------------------------------------------------------------------ |
//**WARNING: Declare all of your varaibles at the begining |
//of the file. I hve noticed that sometimes the verilog |
//simulator allows you to use some regs even if they have not been |
//previously declared, leadeing to crahses or unexpected behavior |
// Inputs |
reg Clock; |
reg Reset; |
wire [`WB_WIDTH-1:0] DAT_O; |
reg ACK_O; |
wire ACK_I; |
wire [`WB_WIDTH-1:0] ADR_I,ADR_O; |
wire WE_I,STB_I; |
wire CYC_O,WE_O,TGC_O,STB_O; |
wire [1:0] TGA_O; |
wire [1:0] TGA_I; |
reg [`WB_WIDTH-1:0] TMADR_O,TMDAT_O; |
reg [`MAX_TMEM_BANKS-1:0] TMSEL_O; |
reg TMWE_O; |
reg [31:0] rControlRegister[2:0]; |
integer file, log; |
reg [31:0] rSceneParameters[120:0]; |
reg [31:0] rVertexBuffer[7000:0]; |
reg [31:0] rInstructionBuffer[512:0]; |
reg [31:0] rTextures[`TEXTURE_BUFFER_SIZE:0]; //Lets asume we use 256*256 textures |
reg [7:0] rScreen[`MAX_SCREENBUFFER-1:0]; |
wire [`MAX_CORES-1:0] wCoreSelect; |
wire [3:0] CYC_I,GNT_O; |
wire MST_O; |
wire wDone; |
wire [`MAX_CORES-1:0] RENDREN_O; |
reg [`MAX_CORE_BITS-1:0] wOMEMBankSelect; |
reg [`WB_WIDTH-1:0] wOMEMReadAddr; //Output adress (relative to current bank) |
wire [`WB_WIDTH-1:0] wOMEMData; //Output data bus (Wishbone) |
reg rHostEnable; |
integer k,out2; |
wire GRDY_I; |
wire GACK_O; |
wire STDONE_O; |
wire wGPUCommitedResults; |
wire wHostDataAvailable; |
|
|
THEIA GPU |
( |
.CLK_I( Clock ), |
.RST_I( Reset ), |
.RENDREN_I( RENDREN_O ), |
.DAT_I( DAT_O ), |
.ACK_I( ACK_O ), |
|
.CYC_I( CYC_O ), |
.MST_I( MST_O ), |
.TGA_I( TGA_O ), |
.ACK_O( ACK_I ), |
.ADR_I( ADR_O ), |
.WE_I( WE_O ), |
.SEL_I( wCoreSelect ), |
.STB_I( STB_O ), |
|
//Output memory |
.OMBSEL_I( wOMEMBankSelect ), |
.OMADR_I( wOMEMReadAddr ), |
.OMEM_O( wOMEMData ), |
.TMDAT_I( TMDAT_O ), |
.TMADR_I( TMADR_O ), |
.TMWE_I( TMWE_O ), |
.TMSEL_I( TMSEL_O ), |
|
.HDL_O( GRDY_I ), |
.HDLACK_I( GACK_O ), |
.STDONE_I( STDONE_O ), |
.RCOMMIT_O( wGPUCommitedResults ), |
.HDA_I( wHostDataAvailable ), |
|
//Control register |
.CREG_I( rControlRegister[0][15:0] ), |
//Other stuff |
.DONE_O( wDone ) |
|
); |
|
wire[`WB_WIDTH-1:0] wHostReadAddress; |
wire[`WB_WIDTH-1:0] wHostReadData; |
wire[`WB_WIDTH-1:0] wMemorySize; |
wire[1:0] wMemSelect; |
|
MUXFULLPARALELL_2SEL_GENERIC # ( `WB_WIDTH ) MUX1 |
( |
.Sel( wMemSelect ), |
.I1( rInstructionBuffer[wHostReadAddress] ), |
.I2( rSceneParameters[wHostReadAddress] ), |
.I3( rVertexBuffer[wHostReadAddress] ), |
.I4(0), |
.O1(wHostReadData) |
); |
|
MUXFULLPARALELL_2SEL_GENERIC # ( `WB_WIDTH ) MUX2 |
( |
.Sel( wMemSelect ), |
.I1( rInstructionBuffer[0] ), |
.I2( rSceneParameters[0] ), |
.I3( rVertexBuffer[0] ), |
.I4(0), |
.O1(wMemorySize) |
); |
|
Module_Host HOST |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.iEnable( rHostEnable ), |
.oHostDataAvailable( wHostDataAvailable ), |
.iHostDataReadConfirmed( GRDY_I ), |
.iMemorySize( wMemorySize ), |
.iPrimitiveCount( (rVertexBuffer[6]+1) *7 ), //This is wrong I think |
.iGPUCommitedResults( wGPUCommitedResults ), |
.STDONE_O( STDONE_O ), |
.iGPUDone( wDone ), |
|
`ifndef NO_DISPLAY_STATS |
.iDebugWidth( `RESOLUTION_WIDTH ), |
`endif |
|
//To Memory |
.oReadAddress( wHostReadAddress ), |
.iReadData( wHostReadData ), |
|
//To Hub/Switch |
.oCoreSelectMask( wCoreSelect ), |
.oMemSelect( wMemSelect ), |
.DAT_O( DAT_O), |
.ADR_O( ADR_O ), |
.TGA_O( TGA_O ), |
.RENDREN_O( RENDREN_O ), |
.CYC_O( CYC_O ), |
.STB_O( STB_O ), |
.MST_O( MST_O ), |
|
.GRDY_I( GRDY_I ), |
.GACK_O( GACK_O ), |
|
.WE_O( WE_O ), |
|
|
.ACK_I( ACK_I ) |
); |
//--------------------------------------------- |
//generate the clock signal here |
always begin |
#`CLOCK_CYCLE Clock = ! Clock; |
|
end |
//--------------------------------------------- |
|
|
//------------------------------------------------------------------------------------- |
/* |
This makes sure the simulation actually writes the results to the PPM image file |
once all the cores are done executing |
*/ |
`define PARTITION_SIZE `RESOLUTION_HEIGHT/`MAX_CORES |
integer i,j,kk; |
reg [31:0] R; |
always @ ( * ) |
begin |
|
|
if (wDone == 1'b1) |
begin |
|
$display("Partition Size = %d",`PARTITION_SIZE); |
for (kk = 0; kk < `MAX_CORES; kk = kk+1) |
begin |
wOMEMBankSelect = kk; |
$display("wOMEMBankSelect = %d\n",wOMEMBankSelect); |
for (j=0; j < `PARTITION_SIZE; j=j+1) |
begin |
|
for (i = 0; i < `RESOLUTION_HEIGHT*3; i = i +1) |
begin |
wOMEMReadAddr = i+j*`RESOLUTION_WIDTH*3; |
#`CLOCK_PERIOD; |
#1; |
R = ((wOMEMData >> (`SCALE-8)) > 255) ? 255 : (wOMEMData >> (`SCALE-8)); |
$fwrite(out2,"%d " , R ); |
|
if ((i %3) == 0) |
$fwrite(out2,"\n# %d %d\n",i/3,j); |
|
end |
end |
end |
|
|
|
$fclose(out2); |
$fwrite(log, "Simulation end time : %dns\n",$time); |
$fclose(log); |
|
|
$stop(); |
|
|
end |
end |
//------------------------------------------------------------------------------------- |
|
reg [15:0] rTimeOut; |
|
// `define MAX_INSTRUCTIONS 2 |
|
initial begin |
// Initialize Inputs |
|
|
Clock = 0; |
Reset = 0; |
rTimeOut = 0; |
rHostEnable = 0; |
//Read Config register values |
$write("Loading control register.... "); |
$readmemh("Creg.mem",rControlRegister); |
$display("Done"); |
|
|
|
//Read configuration Data |
$write("Loading scene parameters.... "); |
$readmemh("Params.mem", rSceneParameters ); |
$display("Done"); |
|
|
//Read Scene Data |
$write("Loading scene geometry.... "); |
$readmemh("Vertex.mem",rVertexBuffer); |
$display("Done"); |
|
$display("Number of primitives(%d): %d",rVertexBuffer[6],(rVertexBuffer[6]+1) *7); |
|
|
//Read Texture Data |
$write("Loading scene texture.... "); |
$readmemh("Textures.mem",rTextures); |
$display("Done"); |
|
|
//Read instruction data |
$write("Loading code allocation table and user shaders.... "); |
$readmemh("Instructions.mem",rInstructionBuffer); |
$display("Done"); |
|
$display("Control Register : %b",rControlRegister[0]); |
$display("Resolution : %d X %d",`RESOLUTION_WIDTH, `RESOLUTION_HEIGHT ); |
|
|
log = $fopen("Simulation.log"); |
$fwrite(log, "Simulation start time : %dns\n",$time); |
$fwrite(log, "Width : %d\n",`RESOLUTION_WIDTH); |
$fwrite(log, "Height : %d\n",`RESOLUTION_HEIGHT); |
|
|
//Open output file |
out2 = $fopen("Output.ppm"); |
|
$fwrite(out2,"P3\n"); |
$fwrite(out2,"#This file was generated by Theia's RTL simulation\n"); |
$fwrite(out2,"%d %d\n",`RESOLUTION_WIDTH, `RESOLUTION_HEIGHT ); |
$fwrite(out2,"255\n"); |
|
#10 |
Reset = 1; |
|
|
// Wait 100 ns for global reset to finish |
TMWE_O = 1; |
#100 Reset = 0; |
TMWE_O = 1; |
|
$display("Intilializing TMEM @ %dns",$time); |
//starts in 2 to skip Width and Height |
for (k = 0;k < `TEXTURE_BUFFER_SIZE; k = k + 1) |
begin |
|
TMADR_O <= (k >> (`MAX_CORE_BITS)); |
TMSEL_O <= (k & (`MAX_TMEM_BANKS-1)); //X mod 2^n == X & (2^n - 1) |
TMDAT_O <= rTextures[k]; |
#10; |
end |
$display("Done Intilializing TMEM @ %dns",$time); |
TMWE_O = 0; |
rHostEnable = 1; |
|
end |
|
|
endmodule |
/trunk/rtl/Module_ArithmeticComparison.v
0,0 → 1,64
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
|
//------------------------------------------------------------------ |
module ArithmeticComparison |
( |
input wire Clock, |
input wire[`WIDTH-1:0] X,Y, |
input wire[2:0] iOperation, |
input wire iInputReady, |
output reg OutputReady, |
output reg Result |
); |
|
|
wire [`WIDTH-1:0] wX,wY; |
wire SignX,SignY; |
reg rGreaterThan; |
wire wUGt,wULT,wEQ; |
|
assign SignX = (X == 0) ? 0: X[31]; |
assign SignY = (Y == 0) ? 0: Y[31]; |
|
assign wX = ( SignX ) ? ~X + 1'b1 : X; |
assign wY = ( SignY ) ? ~Y + 1'b1 : Y; |
|
assign wUGt = wX > wY; |
assign wULT = wX < wY; |
assign wEQ = wX == wY; |
|
always @ ( * ) |
begin |
case ( {SignX,SignY} ) |
//Greater than test ( X > Y ) |
2'b00: rGreaterThan = wUGt; //both numbers positive |
2'b01: rGreaterThan = 1; //X positive, y negative |
2'b10: rGreaterThan = 0; //X negative, y positive |
2'b11: rGreaterThan = wULT; //X negative, y negative |
endcase |
end |
|
always @ ( posedge Clock ) |
begin |
|
if (iInputReady) |
begin |
case ( iOperation ) |
3'b000: Result = rGreaterThan; //X > Y |
3'b001: Result = ~rGreaterThan; //X < Y |
3'b010: Result = wEQ; //X == Y |
3'b011: Result = ~wEQ; //X != Y |
3'b100: Result = rGreaterThan || wEQ; // X >= Y |
3'b101: Result = ~rGreaterThan || wEQ; // X <= Y |
default: Result = 0; |
endcase |
OutputReady = 1; |
end |
else |
OutputReady = 0; |
end |
|
|
endmodule |
//--------------------------------------------- |
/trunk/rtl/Module_InstructionFetch.v
0,0 → 1,215
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
/********************************************************************************** |
Description: |
This is the instruction fetch unit. |
It gets the next instruction from the IMEM module at the MEM unit. |
It increments the instruction pointer (IP) in such a way that EXE has always |
one instruction per clock cycle (best pipeline performance). In order to achieve this, |
IFU has 2 instruction pointers, so that in case of 'branch' instructions, |
two instructions pointer are generated and two different instructions are simultaneously |
fetched from IMEM: the branch-taken and branch-not-taken instructions, so that once the |
branch outcome is calculted in EXE, both possible outcomes are already pre-fetched. |
**********************************************************************************/ |
module InstructionFetch |
( |
input wire Clock, |
input wire Reset, |
input wire iTrigger, |
input wire[`ROM_ADDRESS_WIDTH-1:0] iInitialCodeAddress, |
input wire[`INSTRUCTION_WIDTH-1:0] iInstruction1, //Branch not taken instruction |
input wire[`INSTRUCTION_WIDTH-1:0] iInstruction2, //Branch taken instruction |
input wire iBranchTaken, |
output wire oInstructionAvalable, |
output wire [`ROM_ADDRESS_WIDTH-1:0] oIP, |
output wire [`ROM_ADDRESS_WIDTH-1:0] oIP2, //calcule both decide later |
output wire[`INSTRUCTION_WIDTH-1:0] oCurrentInstruction, |
input wire iEXEDone, |
output wire oMicroCodeReturnValue, |
input wire iSubroutineReturn, |
//input wire [`ROM_ADDRESS_WIDTH-1:0] iReturnAddress, |
output wire oExecutionDone |
); |
`define INSTRUCTION_OPCODE oCurrentInstruction[`INSTRUCTION_WIDTH-1:`INSTRUCTION_WIDTH-`INSTRUCTION_OP_LENGTH] |
|
|
assign oMicroCodeReturnValue = oCurrentInstruction[0]; |
assign oIP2 = oCurrentInstruction[47:32]; |
|
wire wTriggerDelay1,wTriggerDelay2,wIncrementIP_Delay1,wIncrementIP_Delay2, |
wLastInst_Delay1,wLastInst_Delay2; |
wire wIncrementIP,wLastInstruction; |
wire wInstructionAvalable,wSubReturnDelay1,wSubReturnDelay2; |
|
assign wLastInstruction = (`INSTRUCTION_OPCODE == `RETURN ); |
|
wire IsCall; |
reg [`ROM_ADDRESS_WIDTH-1:0] rReturnAddress; |
assign IsCall = ( `INSTRUCTION_OPCODE == `CALL ) ? 1'b1 : 1'b0; |
always @ (posedge IsCall) |
rReturnAddress <= oIP+1; |
|
//Increment IP 2 cycles after trigger or everytime EXE is done, or 2 cycles after return from sub, but stop if we get to the RETURN |
assign wIncrementIP = wTriggerDelay2 | (iEXEDone & ~wLastInstruction) | wSubReturnDelay2; |
//It takes 1 clock cycle to read the instruction back from IMEM |
|
|
//Instructions become available to IDU: |
//* 2 cycles after IFU is initially triggered |
//* Everytime previous instruction execution is complete except for the last instruction in |
//the flow |
assign wInstructionAvalable = wTriggerDelay2 | (iEXEDone & ~wLastInst_Delay2); |
|
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD22 |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable(1'b1), |
.D( iSubroutineReturn ), |
.Q( wSubReturnDelay1 ) |
); |
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD23 |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable(1'b1), |
.D( wSubReturnDelay1 ), |
.Q( wSubReturnDelay2 ) |
); |
//Special case for instruction available pin: if a return from subroutine instruction was issued, |
//then wait 1 cycle before anouncing Instruction available to IDU |
assign oInstructionAvalable = wInstructionAvalable & ~iSubroutineReturn | wSubReturnDelay2; |
|
|
|
|
|
//Once we reach the last instruction, wait until EXE says he is done, then assert oExecutionDone |
assign oExecutionDone = (wLastInstruction & iEXEDone); |
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD2 |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable(1'b1), |
.D( iTrigger ), |
.Q( wTriggerDelay1 ) |
); |
|
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD3 |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable(1'b1), |
.D( wTriggerDelay1 ), |
.Q( wTriggerDelay2 ) |
); |
|
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD4 |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable(wLastInstruction), |
.D( oInstructionAvalable ), |
.Q( wLastInst_Delay1 ) |
); |
|
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD5 |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable(1'b1),//wLastInstruction), |
.D( wLastInst_Delay1 ), |
.Q( wLastInst_Delay2 ) |
); |
|
wire [`ROM_ADDRESS_WIDTH-1:0] oIP2_Next; |
|
/* |
In case the branch is taken: |
We point current instruction into the iInstruction2 (branch-taken) instruction |
that corresponds to oIP2. |
Then, in the next clock cycle we should use the oIP2 incremented by one, |
so we need to load UPCOUNTER_POSEDGE with oIP2+1 |
*/ |
|
|
//If the branch was taken, then use the pre-fetched instruction (iInstruction2) |
wire[`INSTRUCTION_WIDTH-1:0] wCurrentInstruction_Delay1,wCurrentInstruction_BranchTaken; |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `INSTRUCTION_WIDTH ) FFDX |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable(iBranchTaken), |
.D( oCurrentInstruction ), |
.Q( wCurrentInstruction_Delay1 ) |
); |
|
wire wBranchTaken_Delay1; |
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFDY |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable(1'b1), |
.D( iBranchTaken ), |
.Q( wBranchTaken_Delay1 ) |
); |
|
|
assign wCurrentInstruction_BranchTaken = ( iBranchTaken & ~iSubroutineReturn) ? iInstruction2 : iInstruction1; |
|
assign oCurrentInstruction = (wBranchTaken_Delay1 ) ? |
wCurrentInstruction_Delay1 : wCurrentInstruction_BranchTaken; |
|
INCREMENT # (`ROM_ADDRESS_WIDTH) INC1 |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.A( oIP2 ), |
.R( oIP2_Next ) |
); |
|
wire[`ROM_ADDRESS_WIDTH-1:0] wIPEntryPoint; |
//assign wIPEntryPoint = (iBranchTaken) ? oIP2_Next : iInitialCodeAddress; |
|
//iReturnAddress is a register stored @ IDU everytime a CALL instruction is decoded |
assign wIPEntryPoint = (iBranchTaken & ~wBranchTaken_Delay1) ? (iSubroutineReturn) ? rReturnAddress : oIP2_Next : iInitialCodeAddress; |
|
|
UPCOUNTER_POSEDGE # (`ROM_ADDRESS_WIDTH) InstructionPointer |
( |
.Clock( Clock ), |
.Reset(iTrigger | (iBranchTaken & ~wBranchTaken_Delay1)), |
.Enable(wIncrementIP & (~iBranchTaken | wBranchTaken_Delay1 ) ), |
.Initial( wIPEntryPoint ), |
.Q(oIP) |
); |
|
|
endmodule |
|
//------------------------------------------------------------------------------- |
/trunk/rtl/Theia_Core.v
0,0 → 1,430
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
|
/********************************************************************************** |
Description: |
This is the top level block for THEIA. |
THEIA core has 5 main logical blocks called Units. |
This module implements the interconections between the Units. |
|
Units: |
> EXE: Mananges execution logic for the SHADERS. |
> GEO: Manages geometry data structures. |
> IO: Input/Output (Wishbone). |
> MEM: Internal memory, separate for Instructions and data. |
> CONTROL: Main control Finite state machine. |
|
Internal Buses: |
THEIA has separate instruction and data buses. |
THEIA avoids using tri-state buses by having separate input/output |
for each bus. |
There are 2 separate data buses since the Data memory |
has a Dual read channel. |
Please see the MEM unit chapter in the documentation for more details. |
|
External Buses: |
External buses are managed by the IO Unit. |
External buses follow the wishbone protocol. |
Please see the IO unit chapter in the documentation for more details. |
**********************************************************************************/ |
|
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
|
module THEIACORE |
( |
|
input wire CLK_I, //Input clock |
input wire RST_I, //Input reset |
//Theia Interfaces |
input wire MST_I, //Master signal, THEIA enters configuration mode |
//when this gets asserted (see documentation) |
//Wish Bone Interface |
input wire [`WB_WIDTH-1:0] DAT_I, //Input data bus (Wishbone) |
output wire [`WB_WIDTH-1:0] DAT_O, //Output data bus (Wishbone) |
input wire ACK_I, //Input ack |
output wire ACK_O, //Output ack |
output wire [`WB_WIDTH-1:0] ADR_O, //Output address |
input wire [`WB_WIDTH-1:0] ADR_I, //Input address |
output wire WE_O, //Output write enable |
input wire WE_I, //Input write enable |
output wire STB_O, //Strobe signal, see wishbone documentation |
input wire STB_I, //Strobe signal, see wishbone documentation |
output wire CYC_O, //Bus cycle signal, see wishbone documentation |
input wire CYC_I, //Bus cycle signal, see wishbone documentation |
output wire [1:0] TGC_O, //Bus cycle tag, see THEAI documentation |
input wire [1:0] TGA_I, //Input address tag, see THEAI documentation |
output wire [1:0] TGA_O, //Output address tag, see THEAI documentation |
input wire [1:0] TGC_I, //Bus cycle tag, see THEAI documentation |
input wire GNT_I, //Bus arbiter 'Granted' signal, see THEAI documentation |
input wire RENDREN_I, |
|
output wire HDL_O, //Data Latched |
input wire HDLACK_I, //Data Latched ACK |
input wire STDONE_I, //Scene traverse complete |
input wire HDA_I, |
output wire RCOMMIT_O, |
|
output wire [`WB_WIDTH-1:0] OMEM_DAT_O, |
output wire [`WB_WIDTH-1:0] OMEM_ADR_O, |
output wire OMEM_WE_O, |
|
input wire TMEM_ACK_I, |
input wire [`WB_WIDTH-1:0] TMEM_DAT_I , |
output wire [`WB_WIDTH-1:0] TMEM_ADR_O , |
output wire TMEM_WE_O, |
output wire TMEM_STB_O, |
output wire TMEM_CYC_O, |
input wire TMEM_GNT_I, |
|
`ifdef DEBUG |
input wire[`MAX_CORES-1:0] iDebug_CoreID, |
`endif |
//Control Register |
input wire [15:0] CREG_I, |
output wire DONE_O |
|
|
); |
|
//When we flip the SMEM, this means we are ready to receive more data |
|
|
//Alias this signals |
wire Clock,Reset; |
assign Clock = CLK_I; |
assign Reset = RST_I; |
|
wire wIO_Busy; |
wire [`DATA_ROW_WIDTH-1:0] wEXE_2__MEM_WriteData; |
wire [`DATA_ROW_WIDTH-1:0] wUCODE_RAMBus; |
wire [`DATA_ADDRESS_WIDTH-1:0] wEXE_2__MEM_wDataWriteAddress; |
wire w2IO__AddrIsImm; |
wire [`DATA_ADDRESS_WIDTH-1:0] wUCODE_RAMAddress; |
wire [`DATA_ADDRESS_WIDTH-1:0] w2IO__Adr_O_Pointer; |
wire [`DATA_ADDRESS_WIDTH-1:0] wGEO2_IO__Adr_O_Pointer; |
wire wEXE_2__DataWriteEnable; |
wire wUCODE_RAMWriteEnable; |
//wire [2:0] RamBusOwner; |
//Unit intercoanection wires |
|
wire wCU2__MicrocodeExecutionDone; |
wire [`ROM_ADDRESS_WIDTH-1:0] InitialCodeAddress; |
wire [`ROM_ADDRESS_WIDTH-1:0] wInstructionPointer1,wInstructionPointer2; |
wire [`INSTRUCTION_WIDTH-1:0] wEncodedInstruction1,wEncodedInstruction2,wIO2_MEM__ExternalInstruction; |
wire wCU2__ExecuteMicroCode; |
wire [`ROM_ADDRESS_WIDTH-1:0] wIO2_MEM__InstructionWriteAddr; |
wire [95:0] wMEM_2__EXE_DataRead0, wMEM_2__EXE_DataRead1,wMEM_2__IO_DataRead0, wMEM_2__IO_DataRead1; |
wire [`DATA_ADDRESS_WIDTH-1:0] wEXE_2__MEM_DataReadAddress0,wEXE_2__MEM_DataReadAddress1; |
wire [`DATA_ADDRESS_WIDTH-1:0] wUCODE_RAMReadAddress0,wUCODE_RAMReadAddress1; |
|
|
wire [`WIDTH-1:0] w2IO__AddressOffset; |
wire [`DATA_ADDRESS_WIDTH-1:0] w2IO__DataWriteAddress; |
wire w2IO__Store; |
wire w2IO__EnableWBMaster; |
|
wire [`DATA_ADDRESS_WIDTH-1:0] wIO2_MEM__DataWriteAddress; |
wire [`DATA_ADDRESS_WIDTH-1:0] wIO_2_MEM__DataReadAddress0; |
wire [`DATA_ROW_WIDTH-1:0] wIO2_MEM__Bus; |
wire [`WIDTH-1:0] wIO2_MEM__Data; |
wire [`WIDTH-1:0] wIO2_WBM__Address; |
wire wIO2_MEM__DataWriteEnable; |
wire wIO2__Done; |
wire wCU2_GEO__GeometryFetchEnable; |
wire wIFU2__MicroCodeReturnValue; |
wire wCU2_BCU__ACK; |
wire wGEO2_CU__RequestAABBIU; |
wire wGEO2_CU__RequestBIU; |
wire wGEO2_CU__RequestTCC; |
wire wGEO2_CU__GeometryUnitDone; |
wire wGEO2_CU__Sync; |
wire wEXE2__uCodeDone; |
wire wEXE2_IFU__EXEBusy; |
wire [`DATA_ADDRESS_WIDTH-1:0] wEXE2_IDU_DataFordward_LastDestination; |
wire wALU2_EXE__BranchTaken; |
wire wALU2_IFU_BranchNotTaken; |
wire w2IO__SetAddress; |
wire wIDU2_IFU__IDUBusy; |
//Control Registe wires |
wire[15:0] wCR2_ControlRegister; |
wire wCR2_TextureMappingEnabled; |
wire wGEO2_CU__TFFDone; |
wire wCU2_GEO__TriggerTFF; |
wire wIO2_MEM_InstructionWriteEnable; |
wire wCU2_IO__WritePixel; |
wire wGEO2_IO__AddrIsImm; |
wire[31:0] wGEO2_IO__AddressOffset; |
wire wGEO2_IO__EnableWBMaster; |
wire wGEO2_IO__SetAddress; |
wire[`WIDTH-1:0] wGEO2__CurrentPitch,wCU2_GEO_Pitch; |
wire wCU2_GEO__SetPitch,wCU2_GEO__IncPicth; |
|
wire [`DATA_ROW_WIDTH-1:0] wEXE_2__IO_WriteAddress; |
wire [`DATA_ROW_WIDTH-1:0] wEXE_2__IO_WriteData; |
wire wEXE_2__IO_OMEMWriteEnable; |
|
wire [`DATA_ROW_WIDTH-1:0] wEXE_2__IO_TMEMAddress; |
wire [`DATA_ROW_WIDTH-1:0] wIO_2_EXE__TMEMData; |
wire wIO_2_EXE__DataAvailable; |
wire wEXE_2_IO__DataRequest; |
wire wCU2_FlipMem; |
wire wCU2_FlipMemEnabled; |
wire w2MEM_FlipMemory; |
wire wGEO2__RequestingTextures; |
wire w2IO_WriteBack_Set; |
wire[`DATA_ADDRESS_WIDTH-1:0] wIO_2_MEM__DataReadAddress1; |
|
`ifdef DEBUG |
wire [`ROM_ADDRESS_WIDTH-1:0] wDEBUG_IDU2_EXE_InstructionPointer; |
`endif |
//-------------------------------------------------------- |
|
assign HDL_O = wCU2_FlipMem; |
|
assign wCR2_TextureMappingEnabled = wCR2_ControlRegister[ `CR_EN_TEXTURE ]; |
|
//-------------------------------------------------------- |
//Control Unit Instance |
ControlUnit CU |
( |
.Clock(Clock), |
.Reset(Reset), |
.oFlipMemEnabled( wCU2_FlipMemEnabled ), |
.oFlipMem( wCU2_FlipMem ), |
.iControlRegister( wCR2_ControlRegister ), |
//.oRamBusOwner( RamBusOwner ), |
.oGFUEnable( wCU2_GEO__GeometryFetchEnable ), |
.iTriggerAABBIURequest( wGEO2_CU__RequestAABBIU ), |
.iTriggerBIURequest( wGEO2_CU__RequestBIU ), |
.iTriggertTCCRequest( wGEO2_CU__RequestTCC ), |
.oUCodeEnable( wCU2__ExecuteMicroCode ), |
.oCodeInstructioPointer( InitialCodeAddress ), |
.iUCodeDone( wCU2__MicrocodeExecutionDone ), |
.iIODone( wIO2__Done ), |
.oIOWritePixel( wCU2_IO__WritePixel ), |
.iUCodeReturnValue( wIFU2__MicroCodeReturnValue ), |
.iGEOSync( wGEO2_CU__Sync ), |
.iTFFDone( wGEO2_CU__TFFDone ), |
.oTriggerTFF( wCU2_GEO__TriggerTFF ), |
.MST_I( MST_I ), |
.oSetCurrentPitch( wCU2_GEO__SetPitch ), |
.iGFUDone( wGEO2_CU__GeometryUnitDone ), |
.iRenderEnable( RENDREN_I ), |
.iSceneTraverseComplete( STDONE_I ), |
.oResultCommited( RCOMMIT_O ), |
.iHostDataAvailable( HDA_I ), |
.iHostAckDataRead( HDLACK_I ), |
|
|
`ifdef DEBUG |
.iDebug_CoreID( iDebug_CoreID ), |
`endif |
.oDone( DONE_O ) |
|
); |
|
|
|
|
//-------------------------------------------------------- |
|
//assign w2MEM_FlipMemory = (wCU2__ExecuteMicroCode | wCU2_FlipMem ) & wCU2_FlipMemEnabled; |
assign w2MEM_FlipMemory = wCU2_FlipMem & wCU2_FlipMemEnabled; |
MemoryUnit MEM |
( |
.Clock(Clock), |
.Reset(Reset), |
|
.iFlipMemory( w2MEM_FlipMemory ), |
|
//Data Bus to/from EXE |
.iDataReadAddress1_EXE( wEXE_2__MEM_DataReadAddress0 ), |
.iDataReadAddress2_EXE( wEXE_2__MEM_DataReadAddress1 ), |
.oData1_EXE( wMEM_2__EXE_DataRead0 ), |
.oData2_EXE( wMEM_2__EXE_DataRead1 ), |
.iDataWriteEnable_EXE( wEXE_2__DataWriteEnable ), |
.iDataWriteAddress_EXE( wEXE_2__MEM_wDataWriteAddress ), |
.iData_EXE( wEXE_2__MEM_WriteData ), |
|
//Data Bus to/from IO |
|
.iDataReadAddress1_IO( wIO_2_MEM__DataReadAddress0 ), |
.iDataReadAddress2_IO( wIO_2_MEM__DataReadAddress1 ), |
.oData1_IO( wMEM_2__IO_DataRead0 ), |
.oData2_IO( wMEM_2__IO_DataRead1 ), |
.iDataWriteEnable_IO( wIO2_MEM__DataWriteEnable ), |
.iDataWriteAddress_IO( wIO2_MEM__DataWriteAddress ), |
.iData_IO( wIO2_MEM__Bus ), |
|
`ifdef DEBUG |
.iDebug_CoreID( iDebug_CoreID ), |
`endif |
|
|
//Instruction Bus |
.iInstructionReadAddress1( wInstructionPointer1 ), |
.iInstructionReadAddress2( wInstructionPointer2 ), |
.oInstruction1( wEncodedInstruction1 ), |
.oInstruction2( wEncodedInstruction2 ), |
.iInstructionWriteEnable( wIO2_MEM_InstructionWriteEnable ), |
.iInstructionWriteAddress( wIO2_MEM__InstructionWriteAddr ), |
.iInstruction( wIO2_MEM__ExternalInstruction ), |
.iControlRegister( CREG_I ), |
.oControlRegister( wCR2_ControlRegister ) |
|
); |
|
////-------------------------------------------------------- |
|
|
ExecutionUnit EXE |
( |
|
.Clock( Clock), |
.Reset( Reset ), |
.iInitialCodeAddress( InitialCodeAddress ), |
.iInstruction1( wEncodedInstruction1 ), |
.iInstruction2( wEncodedInstruction2 ), |
.oInstructionPointer1( wInstructionPointer1 ), |
.oInstructionPointer2( wInstructionPointer2 ), |
.iDataRead0( wMEM_2__EXE_DataRead0 ), |
.iDataRead1( wMEM_2__EXE_DataRead1 ), |
.iTrigger( wCU2__ExecuteMicroCode ), |
.oDataReadAddress0( wEXE_2__MEM_DataReadAddress0 ), |
.oDataReadAddress1( wEXE_2__MEM_DataReadAddress1 ), |
.oDataWriteEnable( wEXE_2__DataWriteEnable ), |
.oDataWriteAddress( wEXE_2__MEM_wDataWriteAddress ), |
.oDataBus( wEXE_2__MEM_WriteData ), |
.oReturnCode( wIFU2__MicroCodeReturnValue ), |
/**************/ |
.oOMEMWriteAddress( wEXE_2__IO_WriteAddress ), |
.oOMEMWriteData( wEXE_2__IO_WriteData ), |
.oOMEMWriteEnable( wEXE_2__IO_OMEMWriteEnable ), |
|
.oTMEMReadAddress( wEXE_2__IO_TMEMAddress ), |
.iTMEMReadData( wIO_2_EXE__TMEMData ), |
.iTMEMDataAvailable( wIO_2_EXE__DataAvailable ), |
.oTMEMDataRequest( wEXE_2_IO__DataRequest ), |
/**************/ |
`ifdef DEBUG |
.iDebug_CoreID( iDebug_CoreID ), |
`endif |
.oDone( wCU2__MicrocodeExecutionDone ) |
|
); |
|
////-------------------------------------------------------- |
|
|
assign TGA_O = (wGEO2__RequestingTextures) ? 2'b1: 2'b0; |
//--------------------------------------------------------------------------------------------------- |
|
//assign wEXE_2__MEM_DataReadAddress1 = (wCU2_IO__WritePixel == 0) ? wUCODE_RAMReadAddress1 : wIO_2_MEM__DataReadAddress1; |
assign w2IO__EnableWBMaster = (wCU2_IO__WritePixel == 0 ) ? wGEO2_IO__EnableWBMaster : wCU2_IO__WritePixel; |
assign w2IO__AddrIsImm = 0;//(wCU2_IO__WritePixel == 0 ) ? wGEO2_IO__AddrIsImm : 1'b0; |
assign w2IO__AddressOffset = 0;//(wCU2_IO__WritePixel == 0 ) ? wGEO2_IO__AddressOffset : 32'b0; |
assign w2IO__Adr_O_Pointer = (wCU2_IO__WritePixel == 0 ) ? wGEO2_IO__Adr_O_Pointer : `OREG_ADDR_O; |
//assign w2IO__Adr_O_Pointer = (wCU2_IO__WritePixel == 0 ) ? wGEO2_IO__Adr_O_Pointer : `CREG_PIXEL_2D_INITIAL_POSITION; |
|
wire w2IO_MasterCycleType; |
assign w2IO_MasterCycleType = (wCU2_IO__WritePixel) ? `WB_SIMPLE_WRITE_CYCLE : `WB_SIMPLE_READ_CYCLE; |
|
|
|
assign w2IO__SetAddress = (wCU2_IO__WritePixel == 0 )? wGEO2_IO__SetAddress : wCU2_GEO__SetPitch; |
|
|
IO_Unit IO |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.iEnable( 1'b0 ),// w2IO__EnableWBMaster ), |
.iBusCyc_Type( w2IO_MasterCycleType ), |
|
.iStore( 1'b1),//w2IO__Store ), |
.iAdr_DataWriteBack( w2IO__DataWriteAddress ), |
.iAdr_O_Set( w2IO__SetAddress ), |
.iAdr_O_Imm( w2IO__AddressOffset ), |
.iAdr_O_Type( w2IO__AddrIsImm ), |
.iAdr_O_Pointer( w2IO__Adr_O_Pointer ), |
.iReadDataBus( wMEM_2__IO_DataRead0 ), |
.iReadDataBus2( wMEM_2__IO_DataRead1 ), |
.iDat_O_Pointer( `OREG_PIXEL_COLOR ), |
|
|
.oDataReadAddress( wIO_2_MEM__DataReadAddress0 ), |
.oDataReadAddress2( wIO_2_MEM__DataReadAddress1 ), |
.oDataWriteAddress( wIO2_MEM__DataWriteAddress ), |
.oDataBus( wIO2_MEM__Bus ), |
.oInstructionBus( wIO2_MEM__ExternalInstruction ), |
|
.oDataWriteEnable( wIO2_MEM__DataWriteEnable ), |
.oData( wIO2_MEM__Data ), |
.oInstructionWriteEnable( wIO2_MEM_InstructionWriteEnable ), |
.oInstructionWriteAddress( wIO2_MEM__InstructionWriteAddr ), |
.iWriteBack_Set( w2IO_WriteBack_Set ), |
.oBusy( wIO_Busy ), |
.oDone( wIO2__Done ), |
/**********/ |
.iOMEM_WriteAddress( wEXE_2__IO_WriteAddress ), |
.iOMEM_WriteData( wEXE_2__IO_WriteData ), |
.iOMEM_WriteEnable( wEXE_2__IO_OMEMWriteEnable ), |
.OMEM_DAT_O( OMEM_DAT_O ), |
.OMEM_ADR_O( OMEM_ADR_O ), |
.OMEM_WE_O( OMEM_WE_O ), |
|
|
.oTMEMReadData( wIO_2_EXE__TMEMData ), |
.iTMEMDataRequest( wEXE_2_IO__DataRequest ), |
.iTMEMReadAddress( wEXE_2__IO_TMEMAddress ), |
.oTMEMDataAvailable( wIO_2_EXE__DataAvailable ), |
|
.TMEM_ACK_I( TMEM_ACK_I ), |
.TMEM_DAT_I( TMEM_DAT_I ), |
.TMEM_ADR_O( TMEM_ADR_O ), |
.TMEM_WE_O( TMEM_WE_O ), |
.TMEM_STB_O( TMEM_STB_O ), |
.TMEM_CYC_O( TMEM_CYC_O ), |
.TMEM_GNT_I( TMEM_GNT_I ), |
|
/**********/ |
.MST_I( MST_I ), |
//Wish Bone Interface |
.DAT_I( DAT_I ), |
.DAT_O( DAT_O ), |
.ACK_I( ACK_I & GNT_I ), |
.ACK_O( ACK_O ), |
.ADR_O( ADR_O ), |
.ADR_I( ADR_I ), |
.WE_O( WE_O ), |
.WE_I( WE_I ), |
.STB_O( STB_O ), |
.STB_I( STB_I ), |
.CYC_O( CYC_O ), |
.TGA_I( TGA_I ), |
.CYC_I( CYC_I ), |
.GNT_I( GNT_I ), |
.TGC_O( TGC_O ) |
|
|
); |
//--------------------------------------------------------------------------------------------------- |
endmodule |
/trunk/rtl/Module_FixedPointSquareRoot.v
0,0 → 1,120
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
|
//Square Root State Machine Constants |
`define SQUARE_ROOT_LOOP 1 |
`define WRITE_SQUARE_ROOT_RESULT 2 |
|
|
`define SR_AFTER_RESET_STATE 0 |
//----------------------------------------------------------------- |
/* |
|
Calcualtes the SquareRoot of a Fixed Point Number |
Input: Q32.32 |
Output: Q16.16 |
Notice that the result has half the precicion as the operands!! |
*/ |
module FixedPointSquareRoot |
( |
input wire Clock, |
input wire Reset, |
input wire[`LONG_WIDTH-1:0] Operand, |
input wire iInputReady, |
output reg OutputReady, |
output reg [`WIDTH-1:0] Result |
); |
|
reg[63:0] x; |
reg[0:`WIDTH-1] group,sum,diff; |
reg[0:`WIDTH-1] temp1,temp2; |
reg [5:0] CurrentState, NextState; |
|
reg myInputReady; |
|
//---------------------------------------- |
always @(posedge Clock) |
begin |
myInputReady = iInputReady; |
end |
//---------------------------------------- |
//Next states logic |
always @(negedge Clock) |
begin |
if( Reset!=1 ) |
CurrentState = NextState; |
else |
CurrentState = `SR_AFTER_RESET_STATE; |
end |
//---------------------------------------- |
|
always @ (posedge Clock) |
begin |
case (CurrentState) |
//---------------------------------------- |
`SR_AFTER_RESET_STATE: |
begin |
OutputReady = 0; |
Result = 0; |
sum = 0; |
diff = 0; |
group=32; //WAS 16 |
x = 0; |
if ( myInputReady == 1 ) |
begin |
// x[31:0] = Operand; |
x = Operand; |
x = x << `SCALE; |
NextState = `SQUARE_ROOT_LOOP; |
end else |
NextState = `SR_AFTER_RESET_STATE; |
|
end |
//---------------------------------------- |
`SQUARE_ROOT_LOOP: |
begin |
|
|
|
sum = sum << 1; |
sum = sum + 1; |
temp1 = diff << 2; |
//diff = diff + (x>>(group*2)) &3; |
temp2 = group << 1; //group * 2 ?? |
diff = temp1 + ((x >> temp2) &3); |
|
if (sum > diff) |
begin |
sum = sum -1; |
end |
else |
begin |
Result = Result + (1<<group); |
diff = diff - sum; |
sum = sum + 1; |
end//if |
|
|
if ( group != 0 ) |
begin |
group = group - 1; |
NextState = `SQUARE_ROOT_LOOP; |
end |
else |
begin |
NextState = `WRITE_SQUARE_ROOT_RESULT; |
|
end |
end |
//---------------------------------------- |
`WRITE_SQUARE_ROOT_RESULT: |
begin |
OutputReady = 1; |
NextState = (iInputReady == 0) ? |
`SR_AFTER_RESET_STATE : `WRITE_SQUARE_ROOT_RESULT; |
end |
//---------------------------------------- |
endcase |
end //always |
endmodule |
//----------------------------------------------------------------- |
/trunk/rtl/Unit_IO.v
0,0 → 1,319
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
`define ADR_IMM 1 |
`define ADR_POINTER 0 |
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
//-------------------------------------------------------------------------- |
module IO_Unit |
( |
input wire Clock, |
input wire Reset, |
input wire iEnable, |
input wire [`DATA_ADDRESS_WIDTH-1:0] iDat_O_Pointer, //Pointer to what we want to send via DAT_O |
input wire [`WIDTH-1:0] iAdr_O_Imm, //Value to assign to ADR_O |
input wire [`DATA_ADDRESS_WIDTH-1:0] iAdr_O_Pointer, //Pointer to value to assing to ADR_O |
input wire iAdr_O_Type, //Should we use iAdr_O_Imm or iAdr_O_Pointer |
input wire iAdr_O_Set, //Should we set |
input wire iBusCyc_Type, //Bus cycle type: simple read/write, etc. |
input wire iStore, //Should we store read data into MEM |
input wire [`DATA_ROW_WIDTH-1:0] iReadDataBus, //MEM Data read bus 1 |
input wire [`DATA_ROW_WIDTH-1:0] iReadDataBus2, //MEM Data read bus 2 |
input wire[`DATA_ADDRESS_WIDTH-1:0] iAdr_DataWriteBack, //Where in MEM we want to store DAT_I |
input wire iWriteBack_Set, //We want to set the Write back Address? |
|
|
output wire[`DATA_ADDRESS_WIDTH-1:0] oDataReadAddress, |
output wire[`DATA_ADDRESS_WIDTH-1:0] oDataReadAddress2, |
output wire[`DATA_ADDRESS_WIDTH-1:0] oDataWriteAddress, |
output wire oDataWriteEnable, |
output wire [`DATA_ROW_WIDTH-1:0] oDataBus, |
output wire [`INSTRUCTION_WIDTH-1:0] oInstructionBus, |
output wire oInstructionWriteEnable, |
output wire [`ROM_ADDRESS_WIDTH-1:0] oInstructionWriteAddress, |
inout wire [`WIDTH-1:0] oData, |
output wire oBusy, |
output wire oDone, |
|
|
input wire [`DATA_ROW_WIDTH-1:0] iOMEM_WriteAddress, |
input wire [`DATA_ROW_WIDTH-1:0] iOMEM_WriteData, |
input wire iOMEM_WriteEnable, |
output wire [`WB_WIDTH-1:0] OMEM_DAT_O, |
output wire [`WB_WIDTH-1:0] OMEM_ADR_O, |
output wire OMEM_WE_O, |
|
//Theia specific interfaces |
input wire MST_I, |
//Wish Bone Interfaces |
output wire [31:0] DAT_O, |
input wire [31:0] DAT_I, |
input wire ACK_I, |
output wire ACK_O, |
output wire [31:0] ADR_O, |
input wire [31:0] ADR_I, |
output wire WE_O, |
input wire WE_I, |
output wire STB_O, |
input wire STB_I, |
output wire CYC_O, |
input wire CYC_I, |
input wire [1:0] TGA_I, |
output wire [1:0] TGC_O, |
input wire GNT_I, |
|
|
output wire [`DATA_ROW_WIDTH-1:0] oTMEMReadData, |
input wire iTMEMDataRequest, |
input wire [`DATA_ROW_WIDTH-1:0] iTMEMReadAddress, |
output wire oTMEMDataAvailable, |
|
input wire TMEM_ACK_I, |
input wire [`WB_WIDTH-1:0] TMEM_DAT_I , |
output wire [`WB_WIDTH-1:0] TMEM_ADR_O , |
output wire TMEM_WE_O, |
output wire TMEM_STB_O, |
output wire TMEM_CYC_O, |
input wire TMEM_GNT_I |
); |
|
|
wire [`WIDTH-1:0] wMEMToWBM2__ReadDataElement; |
wire [`WIDTH-1:0] wMEMToWBM2__ReadDataElement2; |
wire wMEMToWBM_2__Enable; |
wire wWBMToMEM2__Done; |
wire wWBM_2_WBMToMEM_DataAvailable; |
wire [`WIDTH-1:0] wWBM_2_WBMToMEM_Data; |
wire [`WIDTH-1:0] wWBS_2__WBMToMEM_Frame; |
wire wWBMToMEM_2_WBM_Enable; |
wire [`WIDTH-1:0] wWBMToMEM_2_WBM_Address; |
wire wWBMToMEM2__oDataWriteEnable; |
wire wAddrerssSelector2_oDataWriteEnable; |
wire [`DATA_ROW_WIDTH-1:0] wWBMToMEM2__oDataBus; |
wire [`DATA_ROW_WIDTH-1:0] wWBSToMEM2__oDataBus; |
wire wAddressSelector_2__SetAddress; |
wire [`WIDTH-1:0] wMEMToWBM_2__Address; |
wire wMEMToWBM_2__Done; |
wire w2WBMToMEM__Enable; |
wire w2WBMToMEM__SetAddress; |
wire wWBS_2__WBSToMEM_FrameAvailable; |
wire[`WIDTH-1:0] wWBS_2__WBMToMEM_Address; |
wire wWBSToMEM2__oDataWriteEnable; |
wire[`DATA_ADDRESS_WIDTH-1:0] wWBSToMEM2__oDataWriteAddress; |
wire[`DATA_ADDRESS_WIDTH-1:0] wWBMToMEM2__oDataWriteAddress; |
|
|
|
//***********new*****************/ |
|
|
Module_OMemInterface OMI |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.iWriteEnable( iOMEM_WriteEnable ), |
.iData( iOMEM_WriteData ), |
.iAddress( iOMEM_WriteAddress ), |
.ADR_O( OMEM_ADR_O ), |
.DAT_O( OMEM_DAT_O ), |
.WE_O( OMEM_WE_O ) |
|
); |
|
Module_TMemInterface TMI |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.iEnable( iTMEMDataRequest ), |
.iAddress( iTMEMReadAddress ), |
.oData( oTMEMReadData ), |
.oDone( oTMEMDataAvailable ), |
|
.ACK_I( TMEM_ACK_I ), |
.GNT_I( TMEM_GNT_I ), |
.DAT_I( TMEM_DAT_I ), |
.ADR_O( TMEM_ADR_O ), |
.WE_O( TMEM_WE_O ), |
.STB_O( TMEM_STB_O ), |
.CYC_O( TMEM_CYC_O ) |
|
|
); |
//***********new*****************/ |
|
assign oBusy = CYC_O; |
wire wReadOperation; |
assign wReadOperation = (iBusCyc_Type == `WB_SIMPLE_WRITE_CYCLE) ? 0 : 1; |
|
|
assign wMEMToWBM_2__Address = ( iAdr_O_Type == `ADR_IMM ) ? iAdr_O_Imm : wMEMToWBM2__ReadDataElement; |
assign w2WBMToMEM__Enable = ( iAdr_O_Type == `ADR_IMM ) ? iEnable : wMEMToWBM_2__Enable; |
//assign oDone = ( (iAdr_O_Type == `ADR_IMM) && !(iBusCyc_Type == `WB_SIMPLE_WRITE_CYCLE) ) |
//? wWBMToMEM2__Done : wMEMToWBM_2__Done; |
|
//TODO: WHEN ADR_POINTER Then Done is not until we got the 3 values from X,Y,Z in iAdr_O_Pointer |
assign oDone = (iBusCyc_Type == `WB_SIMPLE_WRITE_CYCLE || iAdr_O_Type == `ADR_POINTER ) ? wMEMToWBM_2__Done : wWBMToMEM2__Done; |
|
assign oDataWriteEnable = (MST_I == 1'b1) ? wWBSToMEM2__oDataWriteEnable : (wWBMToMEM2__oDataWriteEnable);// ^ wAddrerssSelector2_oDataWriteEnable); |
assign oDataWriteAddress = (MST_I == 1'b1) ? wWBSToMEM2__oDataWriteAddress : wWBMToMEM2__oDataWriteAddress; |
assign oDataBus = (MST_I == 1'b1) ? wWBSToMEM2__oDataBus : wWBMToMEM2__oDataBus; |
|
|
|
|
|
wire [`DATA_ADDRESS_WIDTH-1:0] wMEMToWBM2_WBMToMEM_RAMWriteAddr; |
wire [`DATA_ADDRESS_WIDTH-1:0] w2WBMToMEM_MEMWriteAddress; |
|
assign w2WBMToMEM_MEMWriteAddress = ( iAdr_O_Type == `ADR_IMM) ? iAdr_DataWriteBack : wMEMToWBM2_WBMToMEM_RAMWriteAddr; |
|
wire w2MEMToWBM_BusOperationComplete; |
assign w2MEMToWBM_BusOperationComplete = (iBusCyc_Type == `WB_SIMPLE_WRITE_CYCLE) ? ACK_I : wWBMToMEM2__Done; |
|
|
wire [`DATA_ADDRESS_WIDTH-1:0] w2MEMToWBM_DataPointer; |
assign w2MEMToWBM_DataPointer = (iBusCyc_Type == `WB_SIMPLE_WRITE_CYCLE) ? iDat_O_Pointer : iAdr_O_Pointer; |
|
|
//------------------------------------------------------------------------------ |
MEM2WBMUnitB MEMToWBM |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.iEnable( iEnable & (~iAdr_O_Type | iBusCyc_Type) ), |
.iMEMDataPointer( w2MEMToWBM_DataPointer ), |
.iMEMDataPointer2( iAdr_O_Pointer ), |
.iReadDataBus( iReadDataBus ), //3 Elements comming from DMEM |
.iReadDataBus2( iReadDataBus2 ), |
.oReadDataElement( wMEMToWBM2__ReadDataElement ), //1 out of 3 elements we read |
.oReadDataElement2( wMEMToWBM2__ReadDataElement2 ), //1 out of 3 elements we read |
.oDataReadAddress( oDataReadAddress ), |
.oDataReadAddress2( oDataReadAddress2 ), |
.oDataWriteEnable( wAddrerssSelector2_oDataWriteEnable ), //Always zero |
.oDataAvailable( wMEMToWBM_2__Enable ), //Data from MEM available |
.iRequestNextElement( w2MEMToWBM_BusOperationComplete ), |
.iDataInitialStorageAddress( iAdr_DataWriteBack ), ////######## |
.oDataWriteAddress( wMEMToWBM2_WBMToMEM_RAMWriteAddr ), ////######## |
.oDone( wMEMToWBM_2__Done ) |
); |
//------------------------------------------------------------------------------ |
|
|
|
|
|
wire [`DATA_ADDRESS_WIDTH-1:0] wTemp1; |
assign wWBMToMEM2__oDataWriteAddress = (iAdr_O_Type == `ADR_IMM) ? iAdr_DataWriteBack : wTemp1; |
|
|
|
wire [`WIDTH-1:0] wADR_O_InitialAddress; |
assign wADR_O_InitialAddress = (iBusCyc_Type == `WB_SIMPLE_WRITE_CYCLE) ? wMEMToWBM2__ReadDataElement2 : wMEMToWBM_2__Address; |
wire wIncrement_Address_O; |
assign wIncrement_Address_O = iEnable & ACK_I; |
|
|
|
wire wMEMToWBM2__Done; |
wire wMEMToWBM2__Trigger; |
wire[`WB_WIDTH-1:0] wMEMToWBM_2_Data; |
wire w2MEMToWBM__Trigger; |
wire wWBM2_MEMToWBM_DataWriteDone; |
|
|
wire w2WBM_iEnable; |
|
assign w2WBM_iEnable = (iBusCyc_Type == `WB_SIMPLE_WRITE_CYCLE) ? wMEMToWBM_2__Enable : iEnable; |
|
//------------------------------------------------------------------------------ |
wire wSTB_O; |
|
//If the address is a pointer, we need 1 cycle to read the data back from MEM |
//before we can the set the value into WBM |
wire wAddress_Set_Delayed; |
FFD_POSEDGE_SYNCRONOUS_RESET # (1) FFD32_SetDelay |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable( 1'b1 ), |
.D( iAdr_O_Set ), |
.Q( wAddress_Set_Delayed ) |
|
); |
|
//If the Addr is IMM then just set it whenever iAdr_O_Set is set, but if we have a pointer, then use |
//wAddress_Set_Delayed at the beginning and then wWBMToMEM2__Done |
wire wWBM_iAddress_Set = (iAdr_O_Type == `ADR_POINTER) ? (wAddress_Set_Delayed | wWBMToMEM2__Done) : iAdr_O_Set; |
|
assign STB_O = wSTB_O & ~oDone; |
|
WishBoneMasterUnit WBM |
( |
.CLK_I( Clock ), |
.RST_I( Reset ), |
.DAT_I( DAT_I ), |
.DAT_O( DAT_O ), |
.ACK_I( ACK_I ), |
.ADR_O( ADR_O ), |
.WE_O( WE_O ), |
.STB_O( wSTB_O ), |
.CYC_O( CYC_O ), |
.TGC_O( TGC_O ), |
.GNT_I( GNT_I ), |
|
.iEnable( w2WBM_iEnable ), |
.iBusCyc_Type( iBusCyc_Type ), |
.iAddress_Set( wWBM_iAddress_Set ), |
.iAddress( wADR_O_InitialAddress ), |
.oDataReady( wWBM_2_WBMToMEM_DataAvailable ), |
.iData( wMEMToWBM2__ReadDataElement ), |
.oData( wWBM_2_WBMToMEM_Data ) |
); |
|
//------------------------------------------------------------------------------ |
WishBoneSlaveUnit WBS |
( |
|
.CLK_I( Clock ), |
.RST_I( Reset ), |
.STB_I( STB_I ), |
.WE_I( WE_I ), |
.DAT_I( DAT_I ), |
.ADR_I( ADR_I ), |
.TGA_I( TGA_I ), |
.ACK_O( ACK_O ), |
.CYC_I( CYC_I ), |
.MST_I( MST_I ), |
|
.oDataBus( wWBSToMEM2__oDataBus ), |
.oInstructionBus( oInstructionBus ), |
.oDataWriteAddress( wWBSToMEM2__oDataWriteAddress ), |
.oDataWriteEnable( wWBSToMEM2__oDataWriteEnable ), |
.oInstructionWriteAddress( oInstructionWriteAddress ), |
.oInstructionWriteEnable( oInstructionWriteEnable ) |
|
|
|
); |
//------------------------------------------------------------------------------ |
|
|
endmodule |
//-------------------------------------------------------------------------- |
/trunk/rtl/Module_SwapMemory.v
0,0 → 1,90
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
|
module SWAP_MEM # ( parameter DATA_WIDTH=`DATA_ROW_WIDTH, parameter ADDR_WIDTH=`DATA_ADDRESS_WIDTH, parameter MEM_SIZE=128 ) |
( |
input wire Clock, |
input wire iSelect, |
input wire iWriteEnableA, |
input wire[ADDR_WIDTH-1:0] iReadAddressA0, |
input wire[ADDR_WIDTH-1:0] iReadAddressA1, |
input wire[ADDR_WIDTH-1:0] iWriteAddressA, |
input wire[DATA_WIDTH-1:0] iDataInA, |
output wire [DATA_WIDTH-1:0] oDataOutA0, |
output wire [DATA_WIDTH-1:0] oDataOutA1, |
|
|
input wire iWriteEnableB, |
input wire[ADDR_WIDTH-1:0] iReadAddressB0, |
input wire[ADDR_WIDTH-1:0] iReadAddressB1, |
input wire[ADDR_WIDTH-1:0] iWriteAddressB, |
input wire[DATA_WIDTH-1:0] iDataInB, |
output wire [DATA_WIDTH-1:0] oDataOutB0, |
output wire [DATA_WIDTH-1:0] oDataOutB1 |
); |
|
|
wire wWriteEnableA; |
wire[ADDR_WIDTH-1:0] wReadAddressA0; |
wire[ADDR_WIDTH-1:0] wReadAddressA1; |
wire[ADDR_WIDTH-1:0] wWriteAddressA; |
wire[DATA_WIDTH-1:0] wDataInA; |
wire [DATA_WIDTH-1:0] wDataOutA0; |
wire [DATA_WIDTH-1:0] wDataOutA1; |
|
wire wWriteEnableB; |
wire[ADDR_WIDTH-1:0] wReadAddressB0; |
wire[ADDR_WIDTH-1:0] wReadAddressB1; |
wire[ADDR_WIDTH-1:0] wWriteAddressB; |
wire[DATA_WIDTH-1:0] wDataInB; |
wire [DATA_WIDTH-1:0] wDataOutB0; |
wire [DATA_WIDTH-1:0] wDataOutB1; |
|
|
assign wWriteEnableA = ( iSelect ) ? iWriteEnableA : iWriteEnableB; |
assign wWriteEnableB = ( ~iSelect ) ? iWriteEnableA : iWriteEnableB; |
|
assign wReadAddressA0 = ( iSelect ) ? iReadAddressA0 : iReadAddressB0; |
assign wReadAddressB0 = ( ~iSelect ) ? iReadAddressA0 : iReadAddressB0; |
|
assign wReadAddressA1 = ( iSelect ) ? iReadAddressA1 : iReadAddressB1; |
assign wReadAddressB1 = ( ~iSelect ) ? iReadAddressA1 : iReadAddressB1; |
|
assign wWriteAddressA = ( iSelect ) ? iWriteAddressA : iWriteAddressB; |
assign wWriteAddressB = ( ~iSelect ) ? iWriteAddressA : iWriteAddressB; |
|
assign wDataInA = ( iSelect ) ? iDataInA : iDataInB; |
assign wDataInB = ( ~iSelect ) ? iDataInA : iDataInB; |
|
assign oDataOutA0 = ( iSelect ) ? wDataOutA0 : wDataOutB0; |
assign oDataOutB0 = ( ~iSelect ) ? wDataOutA0 : wDataOutB0; |
|
assign oDataOutA1 = ( iSelect ) ? wDataOutA1 : wDataOutB1; |
assign oDataOutB1 = ( ~iSelect ) ? wDataOutA1 : wDataOutB1; |
|
RAM_DUAL_READ_PORT # (DATA_WIDTH,ADDR_WIDTH,MEM_SIZE) MEM_A |
( |
.Clock( Clock ), |
.iWriteEnable( wWriteEnableA ), |
.iReadAddress0( wReadAddressA0 ), |
.iReadAddress1( wReadAddressA1 ), |
.iWriteAddress( wWriteAddressA ), |
.iDataIn( wDataInA ), |
.oDataOut0( wDataOutA0 ), |
.oDataOut1( wDataOutA1 ) |
); |
|
|
RAM_DUAL_READ_PORT # (DATA_WIDTH,ADDR_WIDTH,MEM_SIZE) MEM_B |
( |
.Clock( Clock ), |
.iWriteEnable( wWriteEnableB ), |
.iReadAddress0( wReadAddressB0 ), |
.iReadAddress1( wReadAddressB1 ), |
.iWriteAddress( wWriteAddressB ), |
.iDataIn( wDataInB ), |
.oDataOut0( wDataOutB0 ), |
.oDataOut1( wDataOutB1 ) |
); |
|
endmodule |
/trunk/rtl/Module_VectorALU.v
0,0 → 1,1278
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
|
|
|
//-------------------------------------------------------------- |
module VectorALU |
( |
input wire Clock, |
input wire Reset, |
input wire[`INSTRUCTION_OP_LENGTH-1:0] iOperation, |
input wire[`WIDTH-1:0] iChannel_Ax, |
input wire[`WIDTH-1:0] iChannel_Bx, |
input wire[`WIDTH-1:0] iChannel_Ay, |
input wire[`WIDTH-1:0] iChannel_By, |
input wire[`WIDTH-1:0] iChannel_Az, |
input wire[`WIDTH-1:0] iChannel_Bz, |
output wire [`WIDTH-1:0] oResultA, |
output wire [`WIDTH-1:0] oResultB, |
output wire [`WIDTH-1:0] oResultC, |
input wire iInputReady, |
output reg oBranchTaken, |
output reg oBranchNotTaken, |
output reg oReturnFromSub, |
input wire [`ROM_ADDRESS_WIDTH-1:0] iCurrentIP, |
|
//Connections to the O Memory |
output wire [`DATA_ROW_WIDTH-1:0] oOMEMWriteAddress, |
output wire [`DATA_ROW_WIDTH-1:0] oOMEMWriteData, |
output wire oOMEM_WriteEnable, |
//Connections to the R Memory |
output wire [`DATA_ROW_WIDTH-1:0] oTMEMReadAddress, |
input wire [`DATA_ROW_WIDTH-1:0] iTMEMReadData, |
input wire iTMEMDataAvailable, |
output wire oTMEMDataRequest, |
|
output reg OutputReady |
|
); |
|
|
|
|
|
wire wMultiplcationUnscaled; |
assign wMultiplcationUnscaled = (iOperation == `IMUL) ? 1'b1 : 1'b0; |
|
//-------------------------------------------------------------- |
|
reg [7:0] InputReadyA,InputReadyB,InputReadyC; |
|
//------------------------------------------------------ |
/* |
This is the block that takes care of all tha arithmetic |
comparisons. Supported operations are <,>,<=,>=,==,!= |
|
*/ |
//------------------------------------------------------ |
reg [`WIDTH-1:0] wMultiplicationA_Ax; |
reg [`WIDTH-1:0] wMultiplicationA_Bx; |
wire [`LONG_WIDTH-1:0] wMultiplicationA_Result; |
wire wMultiplicationA_InputReady; |
wire wMultiplicationA_OutputReady; |
wire wMultiplicationOutputReady, wMultiplicationOutputReadyA, |
wMultiplicationOutputReadyB,wMultiplicationOutputReadyC,wMultiplicationOutputReadyD; |
|
wire wAddSubAOutputReady,wAddSubBOutputReady,wAddSubCOutputReady; |
wire [`INSTRUCTION_OP_LENGTH-1:0] wOperation; |
wire [`WIDTH-1:0] wSwizzleOutputX,wSwizzleOutputY,wSwizzleOutputZ; |
|
//-------------------------------------------------------------------- |
reg [`WIDTH-1:0] ResultA,ResultB,ResultC; |
|
//Output Flip Flops, |
//This flip flop will control the outputs so that the |
//values of the outputs change ONLY when when there is |
//a positive edge of OutputReady |
|
FFD32_POSEDGE ResultAFFD |
( |
.Clock( OutputReady ), |
.D( ResultA ), |
.Q( oResultA ) |
); |
|
FFD32_POSEDGE ResultBFFD |
( |
.Clock( OutputReady ), |
.D( ResultB ), |
.Q( oResultB ) |
); |
|
FFD32_POSEDGE ResultCFFD |
( |
.Clock( OutputReady ), |
.D( ResultC ), |
.Q( oResultC ) |
); |
//-------------------------------------------------------------------- |
|
|
|
Swizzle3D Swizzle1 |
( |
.Source0_X( iChannel_Bx ), |
.Source0_Y( iChannel_By ), |
.Source0_Z( iChannel_Bz ), |
.iOperation( iChannel_Ax ), |
|
.SwizzleX( wSwizzleOutputX ), |
.SwizzleY( wSwizzleOutputY ), |
.SwizzleZ( wSwizzleOutputZ ) |
); |
//--------------------------------------------------------------------- |
wire [`LONG_WIDTH-1:0] wModulus2N_ResultA,wModulus2N_ResultB,wModulus2N_ResultC; |
|
//---------------------------------------------------------------------( |
|
wire IOW_Operation,wOMEM_We; |
assign IOW_Operation = (iOperation == `OMWRITE); |
|
always @ ( * ) |
begin |
if (iOperation == `RET) |
oReturnFromSub <= OutputReady; |
else |
oReturnFromSub <= 1'b0; |
|
end |
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD1_AWE |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( 1'b1 ), |
.D( IOW_Operation ), |
.Q( wOMEM_We ) |
); |
|
assign oOMEM_WriteEnable = wOMEM_We & IOW_Operation; |
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( `DATA_ROW_WIDTH ) FFD1_A |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( iInputReady ), |
.D( {iChannel_Ax,iChannel_Ay,iChannel_Az} ), |
.Q( oOMEMWriteAddress) |
); |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `DATA_ROW_WIDTH ) FFD2_B |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( iInputReady ), |
.D( {iChannel_Bx,iChannel_By,iChannel_Bz} ), |
.Q( oOMEMWriteData ) |
); |
|
|
|
wire wTMReadOutputReady; |
assign wTMReadOutputReady = iTMEMDataAvailable; |
/* |
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD1_ARE |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( 1'b1 ), |
.D( iTMEMDataAvailable ), |
.Q( wTMReadOutputReady ) |
); |
*/ |
//assign oTMEMReadAddress = {iChannel_Ax,iChannel_Ay,iChannel_Az}; |
|
//We wait 1 clock cycle before be send the data read request, because |
//we need to lathc the values at the output |
|
wire wOpTRead; |
assign wOpTRead = ( iOperation == `TMREAD ) ? 1'b1 : 1'b0; |
wire wTMEMRequest; |
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD1_ARE123 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( 1'b1 ), |
.D( wOpTRead ), |
.Q( wTMEMRequest ) |
); |
assign oTMEMDataRequest = wTMEMRequest & wOpTRead; |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `DATA_ROW_WIDTH ) FFD2_B445 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( iInputReady & wOpTRead ), |
.D( {iChannel_Ax,iChannel_Ay,iChannel_Az} ), |
.Q( oTMEMReadAddress ) |
); |
|
/* |
This MUX will select the apropiated X,Y or Z depending on |
wheter it is XYZ iOperation. This gets defined by the bits 3 and 4 |
of iOperation, and only applies for oBranchTaken and Store operations. |
*/ |
|
wire wArithmeticComparison_Result; |
wire ArithmeticComparison_InputReady; |
wire ArithmeticComparison_OutputReady; |
reg[`WIDTH-1:0] ArithmeticComparison_A,ArithmeticComparison_B; |
|
|
always @ ( * ) |
begin |
case ( {iOperation[4],iOperation[3]} ) |
2'b01: ArithmeticComparison_A = iChannel_Ax; |
2'b10: ArithmeticComparison_A = iChannel_Ay; |
2'b11: ArithmeticComparison_A = iChannel_Az; |
default: ArithmeticComparison_A = 0; //Should never happen |
endcase |
end |
//--------------------------------------------------------------------- |
always @ ( * ) |
begin |
case ( {iOperation[4],iOperation[3]} ) |
2'b01: ArithmeticComparison_B = iChannel_Bx; |
2'b10: ArithmeticComparison_B = iChannel_By; |
2'b11: ArithmeticComparison_B = iChannel_Bz; |
default: ArithmeticComparison_B = 0; //Should never happen |
endcase |
end |
|
//--------------------------------------------------------------------- |
/* |
The onbly instance of Aritmetic comparison in the ALU, |
ArithmeticComparison operations matches the 3 LSB of |
Global ALU iOperation for oBranchTaken Instruction family |
*/ |
|
assign ArithmeticComparison_InputReady = iInputReady; |
|
wire wArithmeticComparisonResult; |
|
ArithmeticComparison ArithmeticComparison_1 |
( |
.Clock( Clock ), |
.X( ArithmeticComparison_A ), |
.Y( ArithmeticComparison_B ), |
.iOperation( iOperation[2:0] ), |
.iInputReady( ArithmeticComparison_InputReady ), |
.OutputReady( ArithmeticComparison_OutputReady ), |
.Result( wArithmeticComparisonResult ) |
); |
|
|
assign wArithmeticComparison_Result = wArithmeticComparisonResult && OutputReady; |
//-------------------------------------------------------------------- |
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_A |
( |
|
.Clock( Clock ), |
.Reset( Reset ), |
.A( wMultiplicationA_Ax ), |
.B( wMultiplicationA_Bx ), |
.R( wMultiplicationA_Result ), |
.iUnscaled( wMultiplcationUnscaled ), |
.iInputReady( wMultiplicationA_InputReady ), |
.OutputReady( wMultiplicationA_OutputReady ) |
); |
|
//-------------------------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wMultiplicationA_Ax = iChannel_Ay; // Ay * Bz |
`MAG: wMultiplicationA_Ax = iChannel_Ax; |
`MULP: wMultiplicationA_Ax = iChannel_Ax; //Az = Ax * Ay |
default: wMultiplicationA_Ax = iChannel_Ax; // Ax * Bx |
endcase |
end |
//-------------------------------------------------------------------- |
|
//assign wMultiplicationA_Ax = iChannel_Ax; |
|
assign wMultiplicationA_InputReady |
= (iOperation == `CROSS || |
iOperation == `DOT || |
iOperation == `MUL || |
iOperation == `IMUL || |
iOperation == `MAG || |
iOperation == `MULP |
) ? iInputReady : 0; |
|
//-------------------------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`MUL,`IMUL: wMultiplicationA_Bx = iChannel_Bx; //Ax*Bx |
`MAG: wMultiplicationA_Bx = iChannel_Ax; //Ax^2 |
`DOT: wMultiplicationA_Bx = iChannel_Bx; //Ax*Bx |
`CROSS: wMultiplicationA_Bx = iChannel_Bz; // Ay * Bz |
`MULP: wMultiplicationA_Bx = iChannel_Ay; //Az = Ax * Ay |
default: wMultiplicationA_Bx = 32'b0; |
endcase |
end |
//-------------------------------------------------------------------- |
|
//------------------------------------------------------ |
|
reg [`WIDTH-1:0] wMultiplicationB_Ay; |
reg [`WIDTH-1:0] wMultiplicationB_By; |
wire [`LONG_WIDTH-1:0] wMultiplicationB_Result; |
wire wMultiplicationB_InputReady; |
wire wMultiplicationB_OutputReady; |
|
|
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_B |
( |
|
.Clock( Clock ), |
.Reset( Reset ), |
.A( wMultiplicationB_Ay ), |
.B( wMultiplicationB_By ), |
.R( wMultiplicationB_Result ), |
.iUnscaled( wMultiplcationUnscaled ), |
.iInputReady( wMultiplicationB_InputReady ), |
.OutputReady( wMultiplicationB_OutputReady ) |
); |
|
|
//---------------------------------------------------- |
|
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wMultiplicationB_Ay = iChannel_Az; // Az * By |
`MAG: wMultiplicationB_Ay = iChannel_Ay; |
default: wMultiplicationB_Ay = iChannel_Ay; // Ay * By |
endcase |
end |
//---------------------------------------------------- |
assign wMultiplicationB_InputReady |
= (iOperation == `CROSS || |
iOperation == `DOT || |
iOperation == `MUL || |
iOperation == `IMUL || |
iOperation == `MAG ) ? iInputReady : 0; |
|
//---------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`MUL,`IMUL: wMultiplicationB_By = iChannel_By; //Ay*By |
`MAG: wMultiplicationB_By = iChannel_Ay; //Ay^2 |
`DOT: wMultiplicationB_By = iChannel_By; //Ay*By |
`CROSS: wMultiplicationB_By = iChannel_By; // Az * By |
default: wMultiplicationB_By = 32'b0; |
endcase |
end |
//---------------------------------------------------- |
|
//------------------------------------------------------ |
reg [`WIDTH-1:0] wMultiplicationC_Az; |
reg [`WIDTH-1:0] wMultiplicationC_Bz; |
wire [`LONG_WIDTH-1:0] wMultiplicationC_Result; |
wire wMultiplicationC_InputReady; |
wire wMultiplicationC_OutputReady; |
|
|
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_C |
( |
|
.Clock( Clock ), |
.Reset( Reset ), |
.A( wMultiplicationC_Az ), |
.B( wMultiplicationC_Bz ), |
.R( wMultiplicationC_Result ), |
.iUnscaled( wMultiplcationUnscaled ), |
.iInputReady( wMultiplicationC_InputReady ), |
.OutputReady( wMultiplicationC_OutputReady ) |
); |
|
|
//---------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wMultiplicationC_Az = iChannel_Az; //Az*Bx |
`MAG: wMultiplicationC_Az = iChannel_Az; |
default: wMultiplicationC_Az = iChannel_Az; //Az*Bz |
endcase |
end |
//---------------------------------------------------- |
|
assign wMultiplicationC_InputReady |
= ( |
iOperation == `CROSS || |
iOperation == `DOT || |
iOperation == `MUL || |
iOperation == `IMUL || |
iOperation == `MAG |
) ? iInputReady : 0; |
|
//---------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`MUL,`IMUL: wMultiplicationC_Bz = iChannel_Bz; //Az*Bz |
`MAG: wMultiplicationC_Bz = iChannel_Az; //Ay^2 |
`DOT: wMultiplicationC_Bz = iChannel_Bz; //Az*Bz |
`CROSS: wMultiplicationC_Bz = iChannel_Bx; //Az*Bx |
default: wMultiplicationC_Bz = 32'b0; |
endcase |
end |
//---------------------------------------------------- |
|
reg [`WIDTH-1:0] wMultiplicationD_Aw; |
reg [`WIDTH-1:0] wMultiplicationD_Bw; |
wire [`LONG_WIDTH-1:0] wMultiplicationD_Result; |
wire wMultiplicationD_InputReady; |
wire wMultiplicationD_OutputReady; |
|
|
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_D |
( |
|
.Clock( Clock ), |
.Reset( Reset ), |
.A( wMultiplicationD_Aw ), |
.B( wMultiplicationD_Bw ), |
.R( wMultiplicationD_Result ), |
.iUnscaled( wMultiplcationUnscaled ), |
.iInputReady( wMultiplicationD_InputReady ), |
.OutputReady( wMultiplicationD_OutputReady ) |
); |
|
assign wMultiplicationD_InputReady |
= (iOperation == `CROSS ) ? iInputReady : 0; |
|
|
//---------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wMultiplicationD_Aw = iChannel_Ax; //Ax*Bz |
default: wMultiplicationD_Aw = 32'b0; |
endcase |
end |
//---------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wMultiplicationD_Bw = iChannel_Bz; //Ax*Bz |
default: wMultiplicationD_Bw = 32'b0; |
endcase |
end |
//---------------------------------------------------- |
reg [`WIDTH-1:0] wMultiplicationE_Ak; |
reg [`WIDTH-1:0] wMultiplicationE_Bk; |
wire [`LONG_WIDTH-1:0] wMultiplicationE_Result; |
wire wMultiplicationE_InputReady; |
wire wMultiplicationE_OutputReady; |
|
|
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_E |
( |
|
.Clock( Clock ), |
.Reset( Reset ), |
.A( wMultiplicationE_Ak ), |
.B( wMultiplicationE_Bk ), |
.R( wMultiplicationE_Result ), |
.iUnscaled( wMultiplcationUnscaled ), |
.iInputReady( wMultiplicationE_InputReady ), |
.OutputReady( wMultiplicationE_OutputReady ) |
); |
|
assign wMultiplicationE_InputReady |
= (iOperation == `CROSS ) ? iInputReady : 0; |
|
|
//---------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wMultiplicationE_Ak = iChannel_Ax; //Ax*By |
default: wMultiplicationE_Ak = 32'b0; |
endcase |
end |
//---------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wMultiplicationE_Bk = iChannel_By; //Ax*By |
default: wMultiplicationE_Bk = 32'b0; |
endcase |
end |
|
//---------------------------------------------------- |
reg [`WIDTH-1:0] wMultiplicationF_Al; |
reg [`WIDTH-1:0] wMultiplicationF_Bl; |
wire [`LONG_WIDTH-1:0] wMultiplicationF_Result; |
wire wMultiplicationF_InputReady; |
wire wMultiplicationF_OutputReady; |
|
|
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_F |
( |
|
.Clock( Clock ), |
.Reset( Reset ), |
.A( wMultiplicationF_Al ), |
.B( wMultiplicationF_Bl ), |
.R( wMultiplicationF_Result ), |
.iUnscaled( wMultiplcationUnscaled ), |
.iInputReady( wMultiplicationF_InputReady ), |
.OutputReady( wMultiplicationF_OutputReady ) |
); |
assign wMultiplicationF_InputReady |
= (iOperation == `CROSS ) ? iInputReady : 0; |
|
|
//---------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wMultiplicationF_Al = iChannel_Ay; //Ay*Bx |
default: wMultiplicationF_Al = 32'b0; |
endcase |
end |
//---------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wMultiplicationF_Bl = iChannel_Bx; //Ay*Bx |
default: wMultiplicationF_Bl = 32'b0; |
endcase |
end |
//------------------------------------------------------ |
wire [`WIDTH-1:0] wDivisionA_Result; |
wire wDivisionA_OutputReady; |
wire wDivisionA_InputReady; |
|
assign wDivisionA_InputReady = |
( iOperation == `DIV) ? iInputReady : 0; |
|
SignedIntegerDivision DivisionChannel_A |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.iDividend( iChannel_Ax ), |
.iDivisor( iChannel_Bx ), |
.xQuotient( wDivisionA_Result ), |
.iInputReady( wDivisionA_InputReady ), |
.OutputReady( wDivisionA_OutputReady ) |
|
); |
//------------------------------------------------------ |
wire [`WIDTH-1:0] wDivisionB_Result; |
wire wDivisionB_OutputReady; |
wire wDivisionB_InputReady; |
|
assign wDivisionB_InputReady = |
( iOperation == `DIV) ? iInputReady : 0; |
|
SignedIntegerDivision DivisionChannel_B |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.iDividend( iChannel_Ay ), |
.iDivisor( iChannel_By ), |
.xQuotient( wDivisionB_Result ), |
.iInputReady( wDivisionB_InputReady ), |
.OutputReady( wDivisionB_OutputReady ) |
|
); |
//------------------------------------------------------ |
wire [`WIDTH-1:0] wDivisionC_Result; |
wire wDivisionC_OutputReady; |
wire wDivisionC_InputReady; |
|
|
assign wDivisionC_InputReady = |
( iOperation == `DIV) ? iInputReady : 0; |
|
SignedIntegerDivision DivisionChannel_C |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.iDividend( iChannel_Az ), |
.iDivisor( iChannel_Bz ), |
.xQuotient( wDivisionC_Result ), |
.iInputReady( wDivisionC_InputReady ), |
.OutputReady( wDivisionC_OutputReady ) |
|
); |
//-------------------------------------------------------------- |
/* |
First addtion block instance goes here. |
Note that all inputs/outputs to the block |
are wires. It has two MUXES one for each entry. |
*/ |
reg [`LONG_WIDTH-1:0] wAddSubA_Ax,wAddSubA_Bx; |
wire [`LONG_WIDTH-1:0] wAddSubA_Result; |
wire wAddSubA_Operation; //Either addition or substraction |
reg wAddSubA_InputReady; |
wire wAddSubA_OutputReady; |
|
assign wAddSubA_Operation |
= ( |
iOperation == `SUB |
|| iOperation == `CROSS |
|| iOperation == `DEC |
|| iOperation == `MOD |
) ? 1 : 0; |
|
FixedAddSub AddSubChannel_A |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.A( wAddSubA_Ax ), |
.B( wAddSubA_Bx ), |
.R( wAddSubA_Result ), |
.iOperation( wAddSubA_Operation ), |
.iInputReady( wAddSubA_InputReady ), |
.OutputReady( wAddSubA_OutputReady ) |
); |
//Diego |
|
|
//---------------------------- |
|
//InpuReady Mux A |
always @ ( * ) |
begin |
case (iOperation) |
`ADD: wAddSubA_InputReady = iInputReady; |
`SUB: wAddSubA_InputReady = iInputReady; |
`INC,`INCX,`INCY,`INCZ: wAddSubA_InputReady = iInputReady; |
`DEC: wAddSubA_InputReady = iInputReady; |
`MOD: wAddSubA_InputReady = iInputReady; |
|
`MAG: wAddSubA_InputReady = wMultiplicationOutputReadyA && |
wMultiplicationOutputReadyB; |
//wMultiplicationA_OutputReady |
//&& wMultiplicationB_OutputReady; |
|
`DOT: wAddSubA_InputReady = |
wMultiplicationOutputReadyA && |
wMultiplicationOutputReadyB; |
//wMultiplicationA_OutputReady |
//&& wMultiplicationB_OutputReady; |
|
`CROSS: wAddSubA_InputReady = |
wMultiplicationOutputReadyA && |
wMultiplicationOutputReadyB; |
// wMultiplicationA_OutputReady |
//&& wMultiplicationB_OutputReady; |
|
default: wAddSubA_InputReady = 1'b0; |
endcase |
end |
//---------------------------- |
|
//wAddSubA_Bx 2:1 input Mux |
always @ ( * ) |
begin |
case (iOperation) |
|
`ADD: wAddSubA_Ax = ( iChannel_Ax[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ax } : { 32'b0, iChannel_Ax }; |
`SUB: wAddSubA_Ax = ( iChannel_Ax[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ax } : { 32'b0, iChannel_Ax }; |
`INC,`INCX,`INCY,`INCZ: wAddSubA_Ax = ( iChannel_Ax[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ax } : { 32'b0, iChannel_Ax }; |
`DEC: wAddSubA_Ax = ( iChannel_Ax[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ax } : { 32'b0, iChannel_Ax }; |
`MOD: wAddSubA_Ax = ( iChannel_Bx[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Bx } : { 32'b0, iChannel_Bx }; |
|
`MAG: wAddSubA_Ax = wMultiplicationA_Result; |
`DOT: wAddSubA_Ax = wMultiplicationA_Result; |
`CROSS: wAddSubA_Ax = wMultiplicationA_Result; |
default: wAddSubA_Ax = 64'b0; |
endcase |
end |
//---------------------------- |
//wAddSubA_Bx 2:1 input Mux |
always @ ( * ) |
begin |
case (iOperation) |
`ADD: wAddSubA_Bx = ( iChannel_Bx[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Bx } : { 32'b0, iChannel_Bx }; |
`SUB: wAddSubA_Bx = ( iChannel_Bx[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Bx } : { 32'b0, iChannel_Bx }; |
`INC,`INCX: wAddSubA_Bx = (`LONG_WIDTH'd1 << `SCALE); |
`INCY,`INCZ: wAddSubA_Bx = `LONG_WIDTH'd0; |
`DEC: wAddSubA_Bx = (`LONG_WIDTH'd1 << `SCALE); |
`MOD: wAddSubA_Bx = (`LONG_WIDTH'd1 << `SCALE); |
|
`MAG: wAddSubA_Bx = wMultiplicationB_Result; |
`DOT: wAddSubA_Bx = wMultiplicationB_Result; |
`CROSS: wAddSubA_Bx = wMultiplicationB_Result; |
default: wAddSubA_Bx = 64'b0; |
endcase |
end |
//-------------------------------------------------------------- |
/* |
Second addtion block instance goes here. |
Note that all inputs/outputs to the block |
are wires. It has two MUXES one for each entry. |
*/ |
|
wire [`LONG_WIDTH-1:0] wAddSubB_Result; |
|
|
wire wAddSubB_Operation; //Either addition or substraction |
reg wAddSubB_InputReady; |
wire wAddSubB_OutputReady; |
|
reg [`LONG_WIDTH-1:0] wAddSubB_Ay,wAddSubB_By; |
|
assign wAddSubB_Operation = |
( iOperation == `SUB |
|| iOperation == `CROSS |
|| iOperation == `DEC |
|| iOperation == `MOD |
) ? 1 : 0; |
|
FixedAddSub AddSubChannel_B |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.A( wAddSubB_Ay ), |
.B( wAddSubB_By ), |
.R( wAddSubB_Result ), |
.iOperation( wAddSubB_Operation ), |
.iInputReady( wAddSubB_InputReady ), |
.OutputReady( wAddSubB_OutputReady ) |
); |
//---------------------------- |
wire wMultiplicationOutputReadyC_Dealy1; |
FFD_POSEDGE_ASYNC_RESET # (1) FFwMultiplicationOutputReadyC_Dealy1 |
( |
.Clock( Clock ), |
.Clear( Reset ), |
.D( wMultiplicationOutputReadyC ), |
.Q( wMultiplicationOutputReadyC_Dealy1 ) |
); |
|
|
|
|
|
//InpuReady Mux B |
always @ ( * ) |
begin |
case (iOperation) |
`ADD: wAddSubB_InputReady = iInputReady; |
`SUB: wAddSubB_InputReady = iInputReady; |
`INC,`INCX,`INCY,`INCZ: wAddSubB_InputReady = iInputReady; |
`DEC: wAddSubB_InputReady = iInputReady; |
`MOD: wAddSubB_InputReady = iInputReady; |
|
`MAG: wAddSubB_InputReady = wAddSubAOutputReady |
&& wMultiplicationOutputReadyC_Dealy1; |
//&& wMultiplicationC_OutputReady; |
|
`DOT: wAddSubB_InputReady = wAddSubAOutputReady |
&& wMultiplicationOutputReadyC_Dealy1; |
//&& wMultiplicationC_OutputReady; |
|
`CROSS: wAddSubB_InputReady = wMultiplicationOutputReadyC && |
wMultiplicationOutputReadyD; |
// wMultiplicationC_OutputReady |
//&& wMultiplicationD_OutputReady; |
|
default: wAddSubB_InputReady = 1'b0; |
|
endcase |
end |
//---------------------------- |
// wAddSubB_Ay 2:1 input Mux |
// If the iOperation is ADD or SUB, it will simply take the inputs from |
// ALU Channels. If it is a VECTOR_MAGNITUDE, it take the input from the |
// previus ADDER_A, same for dot product. |
always @ ( * ) |
begin |
case (iOperation) |
`ADD: wAddSubB_Ay = (iChannel_Ay[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ay} : {32'b0,iChannel_Ay}; //Ay |
`SUB: wAddSubB_Ay = (iChannel_Ay[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ay} : {32'b0,iChannel_Ay}; //Ay |
`INC,`INCX,`INCY,`INCZ: wAddSubB_Ay = (iChannel_Ay[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ay} : {32'b0,iChannel_Ay}; //Ay |
`DEC: wAddSubB_Ay = (iChannel_Ay[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ay} : {32'b0,iChannel_Ay}; //Ay |
`MOD: wAddSubB_Ay = (iChannel_By[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_By} : {32'b0,iChannel_By}; //Ay |
`MAG: wAddSubB_Ay = wAddSubA_Result; //A^2+B^2 |
`DOT: wAddSubB_Ay = wAddSubA_Result; //Ax*Bx + Ay*By |
`CROSS: wAddSubB_Ay = wMultiplicationC_Result; |
default: wAddSubB_Ay = 64'b0; |
endcase |
end |
//---------------------------- |
//wAddSubB_By 2:1 input Mux |
always @ ( * ) |
begin |
case (iOperation) |
`ADD: wAddSubB_By = (iChannel_By[31] == 1'b1) ? {32'hFFFFFFFF,iChannel_By } : {32'b0,iChannel_By}; //By |
`SUB: wAddSubB_By = (iChannel_By[31] == 1'b1) ? {32'hFFFFFFFF,iChannel_By } : {32'b0,iChannel_By}; //{32'b0,iChannel_By}; //By |
`INC,`INCY: wAddSubB_By = (`LONG_WIDTH'd1 << `SCALE); |
`INCX,`INCZ: wAddSubB_By = `LONG_WIDTH'd0; |
`DEC: wAddSubB_By = (`LONG_WIDTH'd1 << `SCALE); |
`MOD: wAddSubB_By = (`LONG_WIDTH'd1 << `SCALE); |
`MAG: wAddSubB_By = wMultiplicationC_Result; //C^2 |
`DOT: wAddSubB_By = wMultiplicationC_Result; //Az * Bz |
`CROSS: wAddSubB_By = wMultiplicationD_Result; |
default: wAddSubB_By = 32'b0; |
endcase |
end |
//-------------------------------------------------------------- |
wire [`LONG_WIDTH-1:0] wAddSubC_Result; |
reg [`LONG_WIDTH-1:0] wAddSubC_Az,wAddSubC_Bz; |
|
wire wAddSubC_Operation; //Either addition or substraction |
reg wAddSubC_InputReady; |
wire wAddSubC_OutputReady; |
|
reg [`LONG_WIDTH-1:0] AddSubC_Az,AddSubB_Bz; |
|
//----------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wAddSubC_Az = wMultiplicationE_Result; |
`MOD: wAddSubC_Az = (iChannel_Bz[31] == 1'b1) ? {32'hFFFFFFFF,iChannel_Bz} : {32'b0,iChannel_Bz}; |
default: wAddSubC_Az = (iChannel_Az[31] == 1'b1) ? {32'hFFFFFFFF,iChannel_Az} : {32'b0,iChannel_Az}; |
endcase |
end |
//----------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wAddSubC_Bz = wMultiplicationF_Result; |
`INC,`INCZ: wAddSubC_Bz = (`LONG_WIDTH'd1 << `SCALE); |
`INCX,`INCY: wAddSubC_Bz = `LONG_WIDTH'd0; |
`DEC: wAddSubC_Bz = (`LONG_WIDTH'd1 << `SCALE); |
`MOD: wAddSubC_Bz = (`LONG_WIDTH'd1 << `SCALE); |
default: wAddSubC_Bz = (iChannel_Bz[31] == 1'b1) ? {32'hFFFFFFFF,iChannel_Bz} : {32'b0,iChannel_Bz}; |
endcase |
end |
//----------------------------------------- |
|
assign wAddSubC_Operation |
= ( |
iOperation == `SUB |
|| iOperation == `CROSS |
|| iOperation == `DEC |
|| iOperation == `MOD |
) ? 1 : 0; |
|
FixedAddSub AddSubChannel_C |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.A( wAddSubC_Az ), |
.B( wAddSubC_Bz ), |
.R( wAddSubC_Result ), |
.iOperation( wAddSubC_Operation ), |
.iInputReady( wAddSubC_InputReady ), |
.OutputReady( wAddSubC_OutputReady ) |
); |
|
|
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wAddSubC_InputReady = wMultiplicationE_OutputReady && |
wMultiplicationF_OutputReady; |
|
default: wAddSubC_InputReady = iInputReady; |
endcase |
end |
|
//------------------------------------------------------ |
wire [`WIDTH-1:0] wSquareRoot_Result; |
wire wSquareRoot_OutputReady; |
|
|
FixedPointSquareRoot SQROOT1 |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Operand( wAddSubB_Result ), |
.iInputReady( wAddSubBOutputReady && iOperation == `MAG), |
.OutputReady( wSquareRoot_OutputReady ), |
.Result( wSquareRoot_Result ) |
); |
//------------------------------------------------------ |
|
assign wModulus2N_ResultA = (iChannel_Ax & wAddSubA_Result ); |
assign wModulus2N_ResultB = (iChannel_Ay & wAddSubB_Result ); |
assign wModulus2N_ResultC = (iChannel_Az & wAddSubC_Result ); |
|
|
|
|
|
|
//&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&// |
//****Mux for ResultA*** |
// Notice that the Dot Product or the Magnitud Result will |
// output in ResultA. |
|
always @ ( * ) |
begin |
case ( iOperation ) |
`RETURN: ResultA = iChannel_Ax; |
`ADD: ResultA = (wAddSubA_Result[63] == 1'b1) ? { 1'b1,wAddSubA_Result[30:0]} : {1'b0,wAddSubA_Result[30:0]};// & 32'h7FFFFFFF; |
`SUB: ResultA = (wAddSubA_Result[63] == 1'b1) ? { 1'b1,wAddSubA_Result[30:0]} : {1'b0,wAddSubA_Result[30:0]};//wAddSubA_Result[31:0]; |
`CROSS: ResultA = (wAddSubA_Result[63] == 1'b1) ? { 1'b1,wAddSubA_Result[30:0]} : {1'b0,wAddSubA_Result[30:0]};//wAddSubA_Result[31:0]; |
`DIV: ResultA = wDivisionA_Result; |
`MUL: ResultA = wMultiplicationA_Result[31:0]; |
`IMUL: ResultA = wMultiplicationA_Result[31:0]; |
`DOT: ResultA = (wAddSubB_Result[63] == 1'b1) ? { 1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]};//wAddSubB_Result[31:0]; |
`MAG: ResultA = wSquareRoot_Result; |
`ZERO: ResultA = 32'b0; |
`COPY: ResultA = iChannel_Ax; |
`TMREAD: ResultA = iTMEMReadData[95:64]; |
`LEA: ResultA = {16'b0,iCurrentIP}; |
|
`SWIZZLE3D: ResultA = wSwizzleOutputX; |
|
//Set Operations |
`UNSCALE: ResultA = iChannel_Ax >> `SCALE; |
`SETX,`RET: ResultA = iChannel_Ax; |
`SETY: ResultA = iChannel_Bx; |
`SETZ: ResultA = iChannel_Bx; |
`INC,`INCX,`INCY,`INCZ: ResultA = (wAddSubA_Result[63] == 1'b1) ? { 1'b1,wAddSubA_Result[30:0]} : {1'b0,wAddSubA_Result[30:0]}; |
`DEC: ResultA = (wAddSubA_Result[63] == 1'b1) ? { 1'b1,wAddSubA_Result[30:0]} : {1'b0,wAddSubA_Result[30:0]}; |
`MOD: ResultA = wModulus2N_ResultA; |
`FRAC: ResultA = iChannel_Ax & (`WIDTH'hFFFFFFFF >> (`WIDTH - `SCALE)); |
`MULP: ResultA = iChannel_Ax; |
`NEG: ResultA = ~iChannel_Ax + 1'b1; |
`XCHANGEX: ResultA = iChannel_Bx; |
|
default: |
begin |
`ifdef DEBUG |
// $display("%dns ALU: Error Unknown Operation: %d",$time,iOperation); |
// $stop(); |
`endif |
ResultA = 32'b0; |
end |
endcase |
end |
//------------------------------------------------------ |
//****Mux for RB*** |
always @ ( * ) |
begin |
case ( iOperation ) |
`RETURN: ResultB = iChannel_Ax; |
`ADD: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]}; // & 32'h7FFFFFFF; |
`SUB: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]}; //wAddSubB_Result[31:0]; |
`CROSS: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]};//wAddSubB_Result[31:0]; |
`DIV: ResultB = wDivisionB_Result; |
`MUL: ResultB = wMultiplicationB_Result[31:0]; |
`IMUL: ResultB = wMultiplicationB_Result[31:0]; |
`DOT: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]};//wAddSubB_Result[31:0]; |
`MAG: ResultB = wSquareRoot_Result; |
`ZERO: ResultB = 32'b0; |
`COPY: ResultB = iChannel_Ay; |
`TMREAD: ResultB = iTMEMReadData[63:32]; |
`LEA: ResultB = {16'b0,iCurrentIP}; |
|
//Set Operations |
`UNSCALE: ResultB = iChannel_Ay >> `SCALE; |
`SETX,`RET: ResultB = iChannel_By; // {Source1[95:64],Source0[63:32],Source0[31:0]}; |
`SETY: ResultB = iChannel_Ax; // {Source0[95:64],Source1[95:64],Source0[31:0]}; |
`SETZ: ResultB = iChannel_By; // {Source0[95:64],Source0[63:32],Source1[95:64]}; |
|
`SWIZZLE3D: ResultB = wSwizzleOutputY; |
|
`INC,`INCX,`INCY,`INCZ: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]}; // & 32'h7FFFFFFF; |
`DEC: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]}; // & 32'h7FFFFFFF; |
`MOD: ResultB = wModulus2N_ResultB; |
`FRAC: ResultB = iChannel_Ay & (`WIDTH'hFFFFFFFF >> (`WIDTH - `SCALE)); |
`MULP: ResultB = iChannel_Ay; |
`NEG: ResultB = ~iChannel_Ay + 1'b1; |
`XCHANGEX: ResultB = iChannel_Ay; |
|
default: |
begin |
`ifdef DEBUG |
//$display("%dns ALU: Error Unknown Operation: %d",$time,iOperation); |
//$stop(); |
`endif |
ResultB = 32'b0; |
end |
endcase |
end |
//------------------------------------------------------ |
//****Mux for RC*** |
always @ ( * ) |
begin |
case ( iOperation ) |
`RETURN: ResultC = iChannel_Ax; |
`ADD: ResultC = (wAddSubC_Result[63] == 1'b1) ? {1'b1,wAddSubC_Result[30:0]} : {1'b0,wAddSubC_Result[30:0]}; //wAddSubC_Result[31:0];// & 32'h7FFFFFFF; |
`SUB: ResultC = (wAddSubC_Result[63] == 1'b1) ? {1'b1,wAddSubC_Result[30:0]} : {1'b0,wAddSubC_Result[30:0]}; //wAddSubC_Result[31:0]; |
`CROSS: ResultC = (wAddSubC_Result[63] == 1'b1) ? {1'b1,wAddSubC_Result[30:0]} : {1'b0,wAddSubC_Result[30:0]};//wAddSubC_Result[31:0]; |
`DIV: ResultC = wDivisionC_Result; |
`MUL: ResultC = wMultiplicationC_Result[31:0]; |
`IMUL: ResultC = wMultiplicationC_Result[31:0]; |
`DOT: ResultC = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]};//wAddSubB_Result[31:0]; |
`MAG: ResultC = wSquareRoot_Result; |
`ZERO: ResultC = 32'b0; |
`COPY: ResultC = iChannel_Az; |
`TMREAD: ResultC = iTMEMReadData[31:0]; |
`LEA: ResultC = {16'b0,iCurrentIP}; |
|
`SWIZZLE3D: ResultC = wSwizzleOutputZ; |
|
//Set Operations |
`UNSCALE: ResultC = iChannel_Az >> `SCALE; |
`SETX,`RET: ResultC = iChannel_Bz; // {Source1[95:64],Source0[63:32],Source0[31:0]}; |
`SETY: ResultC = iChannel_Bz; // {Source0[95:64],Source1[95:64],Source0[31:0]}; |
`SETZ: ResultC = iChannel_Ax; // {Source0[95:64],Source0[63:32],Source1[95:64]}; |
|
`INC,`INCX,`INCY,`INCZ: ResultC = (wAddSubC_Result[63] == 1'b1) ? {1'b1,wAddSubC_Result[30:0]} : {1'b0,wAddSubC_Result[30:0]}; //wAddSubC_Result[31:0];// & 32'h7FFFFFFF; |
`DEC: ResultC = (wAddSubC_Result[63] == 1'b1) ? {1'b1,wAddSubC_Result[30:0]} : {1'b0,wAddSubC_Result[30:0]}; //wAddSubC_Result[31:0];// & 32'h7FFFFFFF; |
`MOD: ResultC = wModulus2N_ResultC; |
`FRAC: ResultC = iChannel_Az & (`WIDTH'hFFFFFFFF >> (`WIDTH - `SCALE)); |
`MULP: ResultC = wMultiplicationA_Result[31:0]; |
`NEG: ResultC = ~iChannel_Az + 1'b1; |
`XCHANGEX: ResultC = iChannel_Az; |
default: |
begin |
`ifdef DEBUG |
//$display("%dns ALU: Error Unknown Operation: %d",$time,iOperation); |
//$stop(); |
`endif |
ResultC = 32'b0; |
end |
endcase |
end |
//------------------------------------------------------------------------ |
|
|
always @ ( * ) |
begin |
case (iOperation) |
`JMP,`CALL,`RET: oBranchTaken = OutputReady; |
`JGX: oBranchTaken = wArithmeticComparison_Result; |
`JGY: oBranchTaken = wArithmeticComparison_Result; |
`JGZ: oBranchTaken = wArithmeticComparison_Result; |
|
`JLX: oBranchTaken = wArithmeticComparison_Result; |
`JLY: oBranchTaken = wArithmeticComparison_Result; |
`JLZ: oBranchTaken = wArithmeticComparison_Result; |
|
`JEQX: oBranchTaken = wArithmeticComparison_Result; |
`JEQY: oBranchTaken = wArithmeticComparison_Result; |
`JEQZ: oBranchTaken = wArithmeticComparison_Result; |
|
`JNEX: oBranchTaken = wArithmeticComparison_Result; |
`JNEY: oBranchTaken = wArithmeticComparison_Result; |
`JNEZ: oBranchTaken = wArithmeticComparison_Result; |
|
`JGEX: oBranchTaken = wArithmeticComparison_Result; |
`JGEY: oBranchTaken = wArithmeticComparison_Result; |
`JGEZ: oBranchTaken = wArithmeticComparison_Result; |
|
`JLEX: oBranchTaken = wArithmeticComparison_Result; |
`JLEY: oBranchTaken = wArithmeticComparison_Result; |
`JLEZ: oBranchTaken = wArithmeticComparison_Result; |
|
default: oBranchTaken = 0; |
endcase |
|
end |
|
always @ ( * ) |
begin |
case (iOperation) |
|
`JMP,`CALL,`RET,`JGX,`JGY,`JGZ,`JLX,`JLY,`JLZ,`JEQX,`JEQY,`JEQZ, |
`JNEX,`JNEY,`JNEZ,`JGEX,`JGEY,`JGEZ: oBranchNotTaken = !oBranchTaken && OutputReady; |
`JLEX: oBranchNotTaken = !oBranchTaken && OutputReady; |
`JLEY: oBranchNotTaken = !oBranchTaken && OutputReady; |
`JLEZ: oBranchNotTaken = !oBranchTaken && OutputReady; |
default: |
oBranchNotTaken = 0; |
endcase |
end |
//------------------------------------------------------------------------ |
//Output ready logic Stuff for Division... |
//Some FFT will hopefully do the trick |
|
wire wDivisionOutputReadyA,wDivisionOutputReadyB,wDivisionOutputReadyC; |
wire wDivisionOutputReady; |
|
|
assign wAddSubAOutputReady = wAddSubA_OutputReady; |
assign wAddSubBOutputReady = wAddSubB_OutputReady; |
assign wAddSubCOutputReady = wAddSubC_OutputReady; |
|
|
FFT1 FFT_DivisionA |
( |
.D(1'b1), |
.Clock( wDivisionA_OutputReady ), |
.Reset( iInputReady ), |
.Q( wDivisionOutputReadyA ) |
); |
|
FFT1 FFT_DivisionB |
( |
.D(1'b1), |
.Clock( wDivisionB_OutputReady ), |
.Reset( iInputReady ), |
.Q( wDivisionOutputReadyB ) |
); |
|
FFT1 FFT_DivisionC |
( |
.D(1'b1), |
.Clock( wDivisionC_OutputReady ), |
.Reset( iInputReady ), |
.Q( wDivisionOutputReadyC ) |
); |
|
assign wDivisionOutputReady = |
( wDivisionOutputReadyA && wDivisionOutputReadyB && wDivisionOutputReadyC ); |
|
assign wMultiplicationOutputReadyA = wMultiplicationA_OutputReady; |
assign wMultiplicationOutputReadyB = wMultiplicationB_OutputReady; |
assign wMultiplicationOutputReadyC = wMultiplicationC_OutputReady; |
assign wMultiplicationOutputReadyD = wMultiplicationD_OutputReady; |
|
assign wMultiplicationOutputReady = |
( wMultiplicationOutputReadyA && wMultiplicationOutputReadyB && wMultiplicationOutputReadyC ); |
|
wire wSquareRootOutputReady; |
FFT1 FFT_Sqrt |
( |
.D(1'b1), |
.Clock( wSquareRoot_OutputReady ), |
.Reset( iInputReady ), |
.Q( wSquareRootOutputReady ) |
); |
|
|
//------------------------------------------------------------------------ |
wire wOutputDelay1Cycle,wOutputDelay2Cycle,wOutputDelay3Cycle; |
|
|
FFD_POSEDGE_ASYNC_RESET # (1) FFOutputReadyDelay2 |
( |
.Clock( Clock ), |
.Clear( Reset ), |
.D( iInputReady ), |
.Q( wOutputDelay1Cycle ) |
); |
|
FFD_POSEDGE_ASYNC_RESET # (1) FFOutputReadyDelay22 |
( |
.Clock( Clock ), |
.Clear( Reset ), |
.D( wOutputDelay1Cycle ), |
.Q( wOutputDelay2Cycle ) |
); |
|
|
FFD_POSEDGE_ASYNC_RESET # (1) FFOutputReadyDelay222 |
( |
.Clock( Clock && wOperation == `OMWRITE), |
.Clear( Reset ), |
.D( wOutputDelay2Cycle ), |
.Q( wOutputDelay3Cycle ) |
); |
|
|
|
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( `INSTRUCTION_OP_LENGTH ) SourceZ2 |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable( iInputReady ), |
.D( iOperation ), |
.Q(wOperation) |
); |
|
|
//Mux for output ready signal |
always @ ( * ) |
begin |
case ( wOperation ) |
`UNSCALE: OutputReady = wOutputDelay1Cycle; |
`RETURN: OutputReady = wOutputDelay1Cycle; |
|
`NOP: OutputReady = wOutputDelay1Cycle; |
`FRAC: OutputReady = wOutputDelay1Cycle; |
`NEG: OutputReady = wOutputDelay1Cycle; |
`OMWRITE: OutputReady = wOutputDelay3Cycle; |
`TMREAD: OutputReady = wTMReadOutputReady; //One cycle after TMEM data availale asserted |
|
`ifdef DEBUG |
//Debug Print behaves as a NOP in terms of ALU... |
`DEBUG_PRINT: OutputReady = wOutputDelay1Cycle; |
`endif |
|
`ADD,`INC,`INCX,`INCY,`INCZ: OutputReady = wAddSubAOutputReady && |
wAddSubBOutputReady && |
wAddSubCOutputReady; |
|
`SUB,`DEC: OutputReady = wAddSubAOutputReady && |
wAddSubBOutputReady && |
wAddSubCOutputReady; |
|
`DIV: OutputReady = wDivisionOutputReady; |
|
|
`MUL,`IMUL: OutputReady = wMultiplicationOutputReady; |
`MULP: OutputReady = wMultiplicationOutputReadyA; |
|
`DOT: OutputReady = wAddSubBOutputReady; |
|
`CROSS: OutputReady = wAddSubAOutputReady && |
wAddSubBOutputReady && |
wAddSubCOutputReady; |
|
`MAG: OutputReady = wSquareRootOutputReady; |
|
`ZERO: OutputReady = wOutputDelay1Cycle; |
|
`COPY: OutputReady = wOutputDelay1Cycle; |
|
`SWIZZLE3D: OutputReady = wOutputDelay1Cycle; |
|
`SETX,`SETY,`SETZ,`JMP,`LEA,`CALL,`RET: OutputReady = wOutputDelay1Cycle; |
|
|
|
`JGX,`JGY,`JGZ: OutputReady = ArithmeticComparison_OutputReady; |
`JLX,`JLY,`JLZ: OutputReady = ArithmeticComparison_OutputReady; |
`JEQX,`JEQY,`JEQZ: OutputReady = ArithmeticComparison_OutputReady; |
`JNEX,`JNEY,`JNEZ: OutputReady = ArithmeticComparison_OutputReady; |
`JGEX,`JGEY,`JGEZ: OutputReady = ArithmeticComparison_OutputReady; |
`JLEX,`JLEY,`JLEZ: OutputReady = ArithmeticComparison_OutputReady; |
|
`MOD: OutputReady = wAddSubAOutputReady && //TODO: wait 1 more cycle |
wAddSubBOutputReady && |
wAddSubCOutputReady; |
|
`XCHANGEX: OutputReady = wOutputDelay1Cycle; |
|
|
default: |
begin |
OutputReady = 32'b0; |
//`ifdef DEBUG |
//$display("*** ALU ERROR: iOperation = %d ***",iOperation); |
//`endif |
end |
|
endcase |
end |
|
endmodule |
//------------------------------------------------------------------------ |
/trunk/rtl/aDefinitions.v
0,0 → 1,371
/********************************************************************************** |
Theaia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2009 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
|
|
/******************************************************************************* |
Module Description: |
|
This module defines constants that are going to be used |
all over the code. By now you have may noticed that all |
constants are pre-compilation define directives. This is |
for simulation perfomance reasons mainly. |
*******************************************************************************/ |
|
`define MAX_CORES 4 //The number of cores, make sure you update MAX_CORE_BITS! |
`define MAX_CORE_BITS 2 // 2 ^ MAX_CORE_BITS = MAX_CORES |
`define MAX_TMEM_BANKS 4 //The number of memory banks for TMEM |
`define SELECT_ALL_CORES `MAX_CORES'b1111 //XXX: Change for more cores |
//--------------------------------------------------------------------------------- |
//Verilog provides a `default_nettype none compiler directive. When |
//this directive is set, implicit data types are disabled, which will make any |
//undeclared signal name a syntax error.This is very usefull to avoid annoying |
//automatic 1 bit long wire declaration where you don't want them to be! |
`default_nettype none |
|
//The clock cycle |
`define CLOCK_CYCLE 5 |
`define CLOCK_PERIOD 10 |
//--------------------------------------------------------------------------------- |
//Defines the Scale. This very important because it sets the fixed point precision. |
//The Scale defines the number bits that are used as the decimal part of the number. |
//The code has been written in such a way that allows you to change the value of the |
//Scale, so that it is possible to experiment with different scenarios. SCALE can be |
//no smaller that 1 and no bigger that WIDTH. |
`define SCALE 17 |
|
//The next section defines the length of the registers, buses and other structures, |
//do not change this valued unless you really know what you are doing (seriously!) |
`define WIDTH 32 |
`define WB_WIDTH 32 //width of wish-bone buses |
`define LONG_WIDTH 64 |
|
`define WB_SIMPLE_READ_CYCLE 0 |
`define WB_SIMPLE_WRITE_CYCLE 1 |
//--------------------------------------------------------------------------------- |
//Next are the constants that define the size of the instructions. |
//instructions are formed like this: |
// Tupe I: |
// Operand (of size INSTRUCTION_OP_LENGTH ) |
// DestinationAddr (of size DATA_ADDRESS_WIDTH ) |
// SourceAddrr1 (of size DATA_ADDRESS_WIDTH ) |
// SourceAddrr2 (of size DATA_ADDRESS_WIDTH ) |
//Type II: |
// Operand (of size INSTRUCTION_OP_LENGTH ) |
// DestinationAddr (of size DATA_ADDRESS_WIDTH ) |
// InmeadiateValue (of size WIDTH = DATA_ADDRESS_WIDTH * 2 ) |
// |
//You can play around with the size of instuctions, but keep |
//in mind that Bits 3 and 4 of the Operand have a special meaning |
//that is used for the jump familiy of instructions (see Documentation). |
//Also the MSB of Operand is used by the decoder to distinguish |
//between Type I and Type II instructions. |
`define INSTRUCTION_WIDTH 64 |
`define INSTRUCTION_OP_LENGTH 16 |
`define INSTRUCTION_IMM_BITPOS 54 |
`define INSTRUCTION_IMM_BIT 6 //don't change this! |
|
//Defines the Lenght of Memory blocks |
`define DATA_ROW_WIDTH 96 |
`define DATA_ADDRESS_WIDTH 16 |
`define ROM_ADDRESS_WIDTH 16 |
`define ROM_ADDRESS_SEL_MASK `ROM_ADDRESS_WIDTH'h8000 |
|
//--------------------------------------------------------------------------------- |
//The next section defines the code memory entry point for the various code routines |
//Please keep this syntax ENTRYPOINT_ADDR_* because the perl script that |
//parses the user code expects this pattern in order to read in the tokens |
|
//Internal Entry points (default ROM Address) |
`define ENTRYPOINT_ADRR_INITIAL `ROM_ADDRESS_WIDTH'd0 //0 - This should always be zero |
`define ENTRYPOINT_ADRR_CPPU `ROM_ADDRESS_WIDTH'd44 |
`define ENTRYPOINT_ADRR_RGU `ROM_ADDRESS_WIDTH'd47 |
`define ENTRYPOINT_ADRR_AABBIU `ROM_ADDRESS_WIDTH'd69 |
`define ENTRYPOINT_ADRR_BIU `ROM_ADDRESS_WIDTH'd157 |
`define ENTRYPOINT_ADRR_PSU `ROM_ADDRESS_WIDTH'd232 |
`define ENTRYPOINT_ADRR_PSU2 `ROM_ADDRESS_WIDTH'd248 |
`define ENTRYPOINT_ADRR_TCC `ROM_ADDRESS_WIDTH'd190 |
`define ENTRYPOINT_ADRR_NPG `ROM_ADDRESS_WIDTH'd55 |
//User Entry points (default ROM Address) |
`define ENTRYPOINT_ADRR_USERCONSTANTS `ROM_ADDRESS_WIDTH'd276 |
`define ENTRYPOINT_ADRR_PIXELSHADER `ROM_ADDRESS_WIDTH'd278 |
`define ENTRYPOINT_ADRR_MAIN `ROM_ADDRESS_WIDTH'd37 |
|
//Please keep this syntax ENTRYPOINT_INDEX_* because the perl script that |
//parses the user code expects this pattern in order to read in the tokens |
//Internal subroutines |
`define ENTRYPOINT_INDEX_INITIAL `ROM_ADDRESS_WIDTH'h8000 |
`define ENTRYPOINT_INDEX_CPPU `ROM_ADDRESS_WIDTH'h8001 |
`define ENTRYPOINT_INDEX_RGU `ROM_ADDRESS_WIDTH'h8002 |
`define ENTRYPOINT_INDEX_AABBIU `ROM_ADDRESS_WIDTH'h8003 |
`define ENTRYPOINT_INDEX_BIU `ROM_ADDRESS_WIDTH'h8004 |
`define ENTRYPOINT_INDEX_PSU `ROM_ADDRESS_WIDTH'h8005 |
`define ENTRYPOINT_INDEX_PSU2 `ROM_ADDRESS_WIDTH'h8006 |
`define ENTRYPOINT_INDEX_TCC `ROM_ADDRESS_WIDTH'h8007 |
`define ENTRYPOINT_INDEX_NPG `ROM_ADDRESS_WIDTH'h8008 |
//User defined subroutines |
`define ENTRYPOINT_INDEX_USERCONSTANTS `ROM_ADDRESS_WIDTH'h8009 |
`define ENTRYPOINT_INDEX_PIXELSHADER `ROM_ADDRESS_WIDTH'h800A |
`define ENTRYPOINT_INDEX_MAIN `ROM_ADDRESS_WIDTH'h800B |
|
`define USER_AABBIU_UCODE_ADDRESS `ROM_ADDRESS_WIDTH'b1000000000000000 |
//--------------------------------------------------------------------------------- |
//This handy little macro allows me to print stuff either to STDOUT or a file. |
//Notice that the compilation vairable DUMP_CODE must be set if you want to print |
//to a file. In XILINX right click 'Simulate Beahvioral Model' -> Properties and |
//under 'Specify `define macro name and value' type 'DEBUG=1|DUMP_CODE=1|DEBUG_CORE=<core you want to dump>' |
`ifdef DUMP_CODE |
|
`define LOGME $fwrite(ucode_file, |
`else |
`define LOGME $write( |
`endif |
//--------------------------------------------------------------------------------- |
`define TRUE 32'h1 |
`define FALSE 32'h0 |
`define RT_TRUE 48'b1 |
`define RT_FALSE 48'b0 |
//--------------------------------------------------------------------------------- |
|
`define GENERAL_PURPOSE_REG_ADDR_MASK `DATA_ADDRESS_WIDTH'h1F |
`define VOID `DATA_ADDRESS_WIDTH'd0 //0000 |
//** Control register bits **// |
`define CR_EN_LIGHTS 0 |
`define CR_EN_TEXTURE 1 |
`define CR_USER_AABBIU 2 |
/** Swapping registers **/ |
//** Configuration Registers **// |
`define CREG_LIGHT_INFO `DATA_ADDRESS_WIDTH'd0 |
`define CREG_CAMERA_POSITION `DATA_ADDRESS_WIDTH'd1 |
`define CREG_PROJECTION_WINDOW_MIN `DATA_ADDRESS_WIDTH'd2 |
`define CREG_PROJECTION_WINDOW_MAX `DATA_ADDRESS_WIDTH'd3 |
`define CREG_RESOLUTION `DATA_ADDRESS_WIDTH'd4 |
`define CREG_TEXTURE_SIZE `DATA_ADDRESS_WIDTH'd5 |
`define CREG_PIXEL_2D_INITIAL_POSITION `DATA_ADDRESS_WIDTH'd6 |
`define CREG_PIXEL_2D_FINAL_POSITION `DATA_ADDRESS_WIDTH'd7 |
`define CREG_FIRST_LIGTH `DATA_ADDRESS_WIDTH'd8 |
`define CREG_FIRST_LIGTH_DIFFUSE `DATA_ADDRESS_WIDTH'd8 |
//OK, so from address 0x06 to 0x0F is where the lights are,watch out values are harcoded |
//for now!! (look in ROM.v for hardcoded values!!!) |
|
|
//Don't change the order of the registers. CREG_V* and CREG_UV* registers |
//need to be in that specific order for the triangle fetcher to work |
//correctly! |
|
`define CREG_AABBMIN `DATA_ADDRESS_WIDTH'd42 |
`define CREG_AABBMAX `DATA_ADDRESS_WIDTH'd43 |
`define CREG_V0 `DATA_ADDRESS_WIDTH'd44 |
`define CREG_UV0 `DATA_ADDRESS_WIDTH'd45 |
`define CREG_V1 `DATA_ADDRESS_WIDTH'd46 |
`define CREG_UV1 `DATA_ADDRESS_WIDTH'd47 |
`define CREG_V2 `DATA_ADDRESS_WIDTH'd48 |
`define CREG_UV2 `DATA_ADDRESS_WIDTH'd49 |
`define CREG_TRI_DIFFUSE `DATA_ADDRESS_WIDTH'd50 |
`define CREG_TEX_COLOR1 `DATA_ADDRESS_WIDTH'd53 |
`define CREG_TEX_COLOR2 `DATA_ADDRESS_WIDTH'd54 |
`define CREG_TEX_COLOR3 `DATA_ADDRESS_WIDTH'd55 |
`define CREG_TEX_COLOR4 `DATA_ADDRESS_WIDTH'd56 |
`define CREG_TEX_COLOR5 `DATA_ADDRESS_WIDTH'd57 |
`define CREG_TEX_COLOR6 `DATA_ADDRESS_WIDTH'd58 |
`define CREG_TEX_COLOR7 `DATA_ADDRESS_WIDTH'd59 |
|
|
/** Non-Swapping registers **/ |
// ** User Registers **// |
//General Purpose registers, the user may put what ever he/she |
//wants in here... |
`define C1 `DATA_ADDRESS_WIDTH'd64 |
`define C2 `DATA_ADDRESS_WIDTH'd65 |
`define C3 `DATA_ADDRESS_WIDTH'd66 |
`define C4 `DATA_ADDRESS_WIDTH'd67 |
`define C5 `DATA_ADDRESS_WIDTH'd68 |
`define C6 `DATA_ADDRESS_WIDTH'd69 |
`define C7 `DATA_ADDRESS_WIDTH'd70 |
`define R1 `DATA_ADDRESS_WIDTH'd71 |
`define R2 `DATA_ADDRESS_WIDTH'd72 |
`define R3 `DATA_ADDRESS_WIDTH'd73 |
`define R4 `DATA_ADDRESS_WIDTH'd74 |
`define R5 `DATA_ADDRESS_WIDTH'd75 |
`define R6 `DATA_ADDRESS_WIDTH'd76 |
`define R7 `DATA_ADDRESS_WIDTH'd77 |
`define R8 `DATA_ADDRESS_WIDTH'd78 |
`define R9 `DATA_ADDRESS_WIDTH'd79 |
`define R10 `DATA_ADDRESS_WIDTH'd80 |
`define R11 `DATA_ADDRESS_WIDTH'd81 |
`define R12 `DATA_ADDRESS_WIDTH'd82 |
|
//** Internal Registers **// |
`define CREG_PROJECTION_WINDOW_SCALE `DATA_ADDRESS_WIDTH'd83 |
`define CREG_UNORMALIZED_DIRECTION `DATA_ADDRESS_WIDTH'd84 |
`define CREG_RAY_DIRECTION `DATA_ADDRESS_WIDTH'd85 |
`define CREG_E1_LAST `DATA_ADDRESS_WIDTH'd86 |
`define CREG_E2_LAST `DATA_ADDRESS_WIDTH'd87 |
`define CREG_T `DATA_ADDRESS_WIDTH'd88 |
`define CREG_P `DATA_ADDRESS_WIDTH'd89 |
`define CREG_Q `DATA_ADDRESS_WIDTH'd90 |
`define CREG_UV0_LAST `DATA_ADDRESS_WIDTH'd91 |
`define CREG_UV1_LAST `DATA_ADDRESS_WIDTH'd92 |
`define CREG_UV2_LAST `DATA_ADDRESS_WIDTH'd93 |
`define CREG_TRI_DIFFUSE_LAST `DATA_ADDRESS_WIDTH'd94 |
`define CREG_LAST_t `DATA_ADDRESS_WIDTH'd95 |
`define CREG_LAST_u `DATA_ADDRESS_WIDTH'd96 |
`define CREG_LAST_v `DATA_ADDRESS_WIDTH'd97 |
`define CREG_COLOR_ACC `DATA_ADDRESS_WIDTH'd98 |
`define CREG_t `DATA_ADDRESS_WIDTH'd99 |
`define CREG_E1 `DATA_ADDRESS_WIDTH'd100 |
`define CREG_E2 `DATA_ADDRESS_WIDTH'd101 |
`define CREG_DELTA `DATA_ADDRESS_WIDTH'd102 |
`define CREG_u `DATA_ADDRESS_WIDTH'd103 |
`define CREG_v `DATA_ADDRESS_WIDTH'd104 |
`define CREG_H1 `DATA_ADDRESS_WIDTH'd105 |
`define CREG_H2 `DATA_ADDRESS_WIDTH'd106 |
`define CREG_H3 `DATA_ADDRESS_WIDTH'd107 |
`define CREG_PIXEL_PITCH `DATA_ADDRESS_WIDTH'd108 |
|
`define CREG_LAST_COL `DATA_ADDRESS_WIDTH'd109 //the last valid column, simply CREG_RESOLUTIONX - 1 |
`define CREG_TEXTURE_COLOR `DATA_ADDRESS_WIDTH'd110 |
`define CREG_PIXEL_2D_POSITION `DATA_ADDRESS_WIDTH'd111 |
`define CREG_TEXWEIGHT1 `DATA_ADDRESS_WIDTH'd112 |
`define CREG_TEXWEIGHT2 `DATA_ADDRESS_WIDTH'd113 |
`define CREG_TEXWEIGHT3 `DATA_ADDRESS_WIDTH'd114 |
`define CREG_TEXWEIGHT4 `DATA_ADDRESS_WIDTH'd115 |
`define CREG_TEX_COORD1 `DATA_ADDRESS_WIDTH'd116 |
`define CREG_TEX_COORD2 `DATA_ADDRESS_WIDTH'd117 |
`define R99 `DATA_ADDRESS_WIDTH'd118 |
`define CREG_ZERO `DATA_ADDRESS_WIDTH'd119 |
`define CREG_CURRENT_OUTPUT_PIXEL `DATA_ADDRESS_WIDTH'd120 |
`define CREG_3 `DATA_ADDRESS_WIDTH'd121 |
`define CREG_012 `DATA_ADDRESS_WIDTH'd122 |
|
//** Ouput registers **// |
|
`define OREG_PIXEL_COLOR `DATA_ADDRESS_WIDTH'd128 |
`define OREG_TEX_COORD1 `DATA_ADDRESS_WIDTH'd129 |
`define OREG_TEX_COORD2 `DATA_ADDRESS_WIDTH'd130 |
`define OREG_ADDR_O `DATA_ADDRESS_WIDTH'd131 |
//------------------------------------------------------------- |
//*** Instruction Set *** |
//The order of the instructions is important here!. Don't change |
//it unless you know what you are doing. For example all the 'SET' |
//family of instructions have the MSB bit in 1. This means that |
//if you add an instruction and the MSB=1, this instruction will treated |
//as type II (see manual) meaning the second 32bit argument is expected to be |
//an inmediate value instead of a register address! |
//Another example is that in the JUMP family Bits 3 and 4 have a special |
//meaning: b4b3 = 01 => X jump type, b4b3 = 10 => Y jump type, finally |
//b4b3 = 11 means Z jump type. |
//All this is just to tell you: Don't play with these values! |
|
// *** Type I Instructions (OP DST REG1 REG2) *** |
`define NOP `INSTRUCTION_OP_LENGTH'b0_000000 //0 |
`define ADD `INSTRUCTION_OP_LENGTH'b0_000001 //1 |
`define SUB `INSTRUCTION_OP_LENGTH'b0_000010 //2 |
`define DIV `INSTRUCTION_OP_LENGTH'b0_000011 //3 |
`define MUL `INSTRUCTION_OP_LENGTH'b0_000100 //4 |
`define MAG `INSTRUCTION_OP_LENGTH'b0_000101 //5 |
`define COPY `INSTRUCTION_OP_LENGTH'b0_000111 //7 |
`define JGX `INSTRUCTION_OP_LENGTH'b0_001_000 //8 |
`define JLX `INSTRUCTION_OP_LENGTH'b0_001_001 //9 |
`define JEQX `INSTRUCTION_OP_LENGTH'b0_001_010 //10 - A |
`define JNEX `INSTRUCTION_OP_LENGTH'b0_001_011 //11 - B |
`define JGEX `INSTRUCTION_OP_LENGTH'b0_001_100 //12 - C |
`define JLEX `INSTRUCTION_OP_LENGTH'b0_001_101 //13 - D |
`define INC `INSTRUCTION_OP_LENGTH'b0_001_110 //14 - E |
`define ZERO `INSTRUCTION_OP_LENGTH'b0_001_111 //15 - F |
`define JGY `INSTRUCTION_OP_LENGTH'b0_010_000 //16 |
`define JLY `INSTRUCTION_OP_LENGTH'b0_010_001 //17 |
`define JEQY `INSTRUCTION_OP_LENGTH'b0_010_010 //18 |
`define JNEY `INSTRUCTION_OP_LENGTH'b0_010_011 //19 |
`define JGEY `INSTRUCTION_OP_LENGTH'b0_010_100 //20 |
`define JLEY `INSTRUCTION_OP_LENGTH'b0_010_101 //21 |
`define CROSS `INSTRUCTION_OP_LENGTH'b0_010_110 //22 |
`define DOT `INSTRUCTION_OP_LENGTH'b0_010_111 //23 |
`define JGZ `INSTRUCTION_OP_LENGTH'b0_011_000 //24 |
`define JLZ `INSTRUCTION_OP_LENGTH'b0_011_001 //25 |
`define JEQZ `INSTRUCTION_OP_LENGTH'b0_011_010 //26 |
`define JNEZ `INSTRUCTION_OP_LENGTH'b0_011_011 //27 |
`define JGEZ `INSTRUCTION_OP_LENGTH'b0_011_100 //28 |
`define JLEZ `INSTRUCTION_OP_LENGTH'b0_011_101 //29 |
|
//The next instruction is for simulation debug only |
//not to be synthetized! Pretty much behaves the same |
//as a NOP, only that prints the register value to |
//a log file called 'Registers.log' |
`ifdef DEBUG |
`define DEBUG_PRINT `INSTRUCTION_OP_LENGTH'b0_011_110 //30 |
`endif |
|
`define MULP `INSTRUCTION_OP_LENGTH'b0_011_111 //31 R1.z = S1.x * S1.y |
`define MOD `INSTRUCTION_OP_LENGTH'b0_100_000 //32 R = MODULO( S1,S2 ) |
`define FRAC `INSTRUCTION_OP_LENGTH'b0_100_001 //33 R =FractionalPart( S1 ) |
`define INTP `INSTRUCTION_OP_LENGTH'b0_100_010 //34 R =IntergerPart( S1 ) |
`define NEG `INSTRUCTION_OP_LENGTH'b0_100_011 //35 R = -S1 |
`define DEC `INSTRUCTION_OP_LENGTH'b0_100_100 //36 R = S1-- |
`define XCHANGEX `INSTRUCTION_OP_LENGTH'b0_100_101 // R.x = S2.x, R.y = S1.y, R.z = S1.z |
`define XCHANGEY `INSTRUCTION_OP_LENGTH'b0_100_110 // R.x = S1.x, R.y = S2.y, R.z = S1.z |
`define XCHANGEZ `INSTRUCTION_OP_LENGTH'b0_100_111 // R.x = S1.x, R.y = S1.y, R.z = S2.z |
`define IMUL `INSTRUCTION_OP_LENGTH'b0_101_000 // R = INTEGER( S1 * S2 ) |
`define UNSCALE `INSTRUCTION_OP_LENGTH'b0_101_001 // R = S1 >> SCALE |
`define RESCALE `INSTRUCTION_OP_LENGTH'b0_101_010 // R = S1 << SCALE |
`define INCX `INSTRUCTION_OP_LENGTH'b0_101_011 // R.X = S1.X + 1 |
`define INCY `INSTRUCTION_OP_LENGTH'b0_101_100 // R.Y = S1.Y + 1 |
`define INCZ `INSTRUCTION_OP_LENGTH'b0_101_101 // R.Z = S1.Z + 1 |
`define OMWRITE `INSTRUCTION_OP_LENGTH'b0_101_111 //47 IO write to O memory |
`define TMREAD `INSTRUCTION_OP_LENGTH'b0_110_000 //48 IO read from T memory |
`define LEA `INSTRUCTION_OP_LENGTH'b0_110_001 //49 Load effective address |
|
//*** Type II Instructions (OP DST REG1 IMM) *** |
`define RETURN `INSTRUCTION_OP_LENGTH'b1_000000 //64 0x40 |
`define SETX `INSTRUCTION_OP_LENGTH'b1_000001 //65 0x41 |
`define SETY `INSTRUCTION_OP_LENGTH'b1_000010 //66 |
`define SETZ `INSTRUCTION_OP_LENGTH'b1_000011 //67 |
`define SWIZZLE3D `INSTRUCTION_OP_LENGTH'b1_000100 //68 |
`define JMP `INSTRUCTION_OP_LENGTH'b1_011000 //56 |
`define CALL `INSTRUCTION_OP_LENGTH'b1_011001 //57 |
`define RET `INSTRUCTION_OP_LENGTH'b1_011010 //58 |
|
//------------------------------------------------------------- |
|
//All the posible values for the SWIZZLE3D instruction are defined next |
`define SWIZZLE_XXX 32'd0 |
`define SWIZZLE_YYY 32'd1 |
`define SWIZZLE_ZZZ 32'd2 |
`define SWIZZLE_XYY 32'd3 |
`define SWIZZLE_XXY 32'd4 |
`define SWIZZLE_XZZ 32'd5 |
`define SWIZZLE_XXZ 32'd6 |
`define SWIZZLE_YXX 32'd7 |
`define SWIZZLE_YYX 32'd8 |
`define SWIZZLE_YZZ 32'd9 |
`define SWIZZLE_YYZ 32'd10 |
`define SWIZZLE_ZXX 32'd11 |
`define SWIZZLE_ZZX 32'd12 |
`define SWIZZLE_ZYY 32'd13 |
`define SWIZZLE_ZZY 32'd14 |
`define SWIZZLE_XZX 32'd15 |
`define SWIZZLE_XYX 32'd16 |
`define SWIZZLE_YXY 32'd17 |
`define SWIZZLE_YZY 32'd18 |
`define SWIZZLE_ZXZ 32'd19 |
`define SWIZZLE_ZYZ 32'd20 |
`define SWIZZLE_YXZ 32'd21 |
|
|
|
/trunk/rtl/Module_WishBoneSlave.v
0,0 → 1,159
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
|
|
|
`define TAG_INSTRUCTION_ADDRESS_TYPE 2'b10 |
`define TAG_DATA_ADDRESS_TYPE 2'b01 |
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
//------------------------------------------------------------------------------ |
module WishBoneSlaveUnit |
( |
//WB Input signals |
input wire CLK_I, |
input wire RST_I, |
input wire STB_I, |
input wire WE_I, |
input wire[`WB_WIDTH-1:0] DAT_I, |
input wire[`WB_WIDTH-1:0] ADR_I, |
input wire [1:0] TGA_I, |
output wire ACK_O, |
input wire MST_I, //Master In! |
input wire CYC_I, |
output wire[`DATA_ADDRESS_WIDTH-1:0] oDataWriteAddress, |
output wire [`DATA_ROW_WIDTH-1:0] oDataBus, |
output wire [`ROM_ADDRESS_WIDTH-1:0] oInstructionWriteAddress, |
output wire [`INSTRUCTION_WIDTH-1:0] oInstructionBus, |
output wire oDataWriteEnable, |
output wire oInstructionWriteEnable |
|
); |
|
FFD_POSEDGE_SYNCRONOUS_RESET # (16) FFADR |
( |
.Clock( CYC_I ), |
.Reset( RST_I ), |
.Enable(1'b1), |
.D( ADR_I[15:0] ), |
.Q( oInstructionWriteAddress ) |
); |
|
assign oDataWriteAddress = oInstructionWriteAddress; |
|
wire[1:0] wTGA_Latched; |
|
FFD_POSEDGE_SYNCRONOUS_RESET # (2) FFADDRTYPE |
( |
.Clock( CYC_I ), |
.Reset( RST_I ), |
.Enable(1'b1), |
.D( TGA_I ), |
.Q( wTGA_Latched ) |
); |
|
|
|
wire Clock,Reset; |
assign Clock = CLK_I; |
assign Reset = RST_I; |
|
|
wire wLatchNow; |
assign wLatchNow = STB_I & WE_I; |
|
//1 Clock cycle after we assert the latch signal |
//then the FF has the data ready to propagate |
wire wDelay; |
FFD_POSEDGE_SYNCRONOUS_RESET # (1) FFOutputDelay |
( |
.Clock( Clock ), |
.Enable( 1'b1 ), |
.Reset( Reset ), |
.D( wLatchNow ), |
.Q( wDelay ) |
); |
|
assign ACK_O = wDelay & STB_I; //make sure we set ACK_O back to zero when STB_I is zero |
|
|
wire [2:0] wXYZSel; |
|
SHIFTLEFT_POSEDGE #(3) SHL |
( |
.Clock(CLK_I), |
.Enable(STB_I & ~ACK_O), |
.Reset(~CYC_I), |
.Initial(3'b1), |
.O(wXYZSel) |
|
); |
|
|
//Flip Flop to Store Vx |
wire [`WIDTH-1:0] wVx; |
FFD_POSEDGE_SYNCRONOUS_RESET # (`WIDTH) FFD32_WBS2MEM_Vx |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable( wXYZSel[0] & STB_I ), |
.D( DAT_I ), |
.Q( wVx ) |
|
); |
|
|
//Flip Flop to Store Vy |
wire [`WIDTH-1:0] wVy; |
FFD_POSEDGE_SYNCRONOUS_RESET # (`WIDTH) FFD32_WBS2MEM_Vy |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable( wXYZSel[1] & STB_I ), |
.D( DAT_I ), |
.Q( wVy ) |
|
); |
|
//Flip Flop to Store Vz |
wire [`WIDTH-1:0] wVz; |
|
FFD_POSEDGE_SYNCRONOUS_RESET # (`WIDTH) FFD32_WBS2MEM_Vz |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable( wXYZSel[2] & STB_I ), |
.D( DAT_I ), |
.Q( wVz ) |
); |
|
assign oDataBus = {wVx,wVy,wVz}; |
assign oInstructionBus = {wVx,wVy}; |
wire wIsInstructionAddress,wIsDataAddress; |
assign wIsInstructionAddress = (wTGA_Latched == `TAG_INSTRUCTION_ADDRESS_TYPE) ? 1'b1 : 1'b0; |
assign wIsDataAddress = (wTGA_Latched == `TAG_DATA_ADDRESS_TYPE ) ? 1'b1 : 1'b0; |
|
assign oDataWriteEnable = (MST_I && !CYC_I && wIsInstructionAddress) ? 1'b1 : 1'b0; |
assign oInstructionWriteEnable = ( MST_I && !CYC_I && wIsDataAddress) ? 1'b1 : 1'b0; |
|
|
|
endmodule |
//------------------------------------------------------------------------------ |
/trunk/rtl/Unit_Control.v
0,0 → 1,1234
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
/********************************************************************************** |
Description: |
|
This is the main Finite State Machine. |
|
**********************************************************************************/ |
|
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
|
`define CU_AFTER_RESET_STATE 0 |
`define CU_WAIT_FOR_INITIAL_CONFIGURATION 1 |
`define CU_TRIGGER_CONFIGURATION_DATA_READ 2 |
`define CU_WAIT_FOR_CONFIG_DATA_READ 3 |
`define CU_ACK_CONFIG_DATA_READ 4 |
`define CU_PRECALCULATE_CONSTANTS 5 |
`define CU_WAIT_FOR_CONSTANT 6 |
`define CU_ACK_PRECALCULATE_CONSTANTS 7 |
`define CU_WAIT_FOR_TASK 8 |
`define CU_READ_TASK_DATA 9 |
`define CU_WAIT_TASK_DATA_READ 10 |
`define CU_ACK_TASK_DATA_READ 11 |
`define CU_TRIGGER_RGU 12 |
`define CU_WAIT_FOR_RGU 13 |
`define CU_ACK_RGU 14 |
`define CU_TRIGGER_GEO 15 |
`define CU_WAIT_FOR_GEO_SYNC 16 |
//`define CU_CHECK_AABBIU_REQUEST 17 |
`define CU_TRIGGER_TCC 17 |
//`define CU_CHECK_BIU_REQUEST 18 |
//`define CU_TRIGGER_TFF 18 |
//`define CU_CHECK_GEO_DONE 19 |
//`define CU_WAIT_FOR_TFF 19 |
`define CU_TRIGGER_AABBIU 20 |
`define CU_WAIT_FOR_AABBIU 21 |
`define CU_TRIGGER_MAIN 22 |
`define CU_WAIT_FOR_MAIN 23 |
`define CU_ACK_MAIN 24 |
`define CU_TRIGGER_PSU 25 |
`define CU_WAIT_FOR_PSU 26 |
`define CU_ACK_PSU 27 |
//`define CU_TRIGGER_PCU 28 |
`define CU_WAIT_FOR_PCU 29 |
`define CU_ACK_PCU 30 |
`define CU_CHECK_HIT 31 |
`define CU_CLEAR_REGISTERS 32 |
`define CU_WAIT_CLEAR_REGISTERS 33 |
`define CU_ACK_CLEAR_REGISTERS 34 |
`define CU_TRIGGER_PSU_WITH_TEXTURE 35 |
`define WAIT_FOR_TCC 36 |
`define CU_TRIGGER_NPU 37 |
`define CU_WAIT_NPU 38 |
`define CU_ACK_NPU 39 |
`define CU_PERFORM_INTIAL_CONFIGURATION 40 |
`define CU_SET_PICTH 41 |
`define CU_TRIGGER_USERCONSTANTS 42 |
`define CU_WAIT_USERCONSTANTS 43 |
`define CU_ACK_USERCONSTANTS 44 |
`define CU_TRIGGER_USERPIXELSHADER 45 |
`define CU_WAIT_FOR_USERPIXELSHADER 46 |
`define CU_ACK_USERPIXELSHADER 47 |
`define CU_DONE 48 |
`define CU_WAIT_FOR_RENDER_ENABLE 49 |
`define CU_ACK_TCC 50 |
`define CU_WAIT_FOR_HOST_DATA_AVAILABLE 51 |
`define CU_WAIT_FOR_HOST_DATA_ACK 52 |
//-------------------------------------------------------------- |
module ControlUnit |
( |
|
input wire Clock, |
input wire Reset, |
input wire[15:0] iControlRegister, |
output reg oGFUEnable, |
input wire iTriggerAABBIURequest, |
input wire iTriggerBIURequest, |
input wire iTriggertTCCRequest, |
output reg oUCodeEnable, |
output reg[`ROM_ADDRESS_WIDTH-1:0] oCodeInstructioPointer, |
input wire iUCodeDone, |
input wire iUCodeReturnValue, |
input wire iGFUDone, |
input wire iGEOSync, |
output reg oTriggerTFF, |
input wire iTFFDone, |
input wire MST_I, |
//output reg[2:0] //oRamBusOwner, |
input wire iIODone, |
output reg oSetCurrentPitch, |
output reg oFlipMemEnabled, |
output reg oFlipMem, |
output reg oIOWritePixel, |
input wire iRenderEnable, |
input wire iSceneTraverseComplete, |
input wire iHostDataAvailable, |
input wire iHostAckDataRead, |
|
`ifdef DEBUG |
input wire[`MAX_CORES-1:0] iDebug_CoreID, |
`endif |
|
output reg oResultCommited, |
output reg oDone |
|
); |
|
//Internal State Machine varibles |
reg [5:0] CurrentState; |
reg [5:0] NextState; |
integer ucode_file; |
reg rResetHitFlop,rHitFlopEnable; |
wire wHit; |
|
`ifdef DUMP_CODE |
integer log; |
|
initial |
begin |
|
//$display("Opening ucode dump file....\n"); |
ucode_file = $fopen("CU.log","w"); |
end |
|
`endif |
|
|
|
//-------------------------------------------------------------- |
FFToggleOnce_1Bit FFTO1 |
( |
.Clock( Clock ), |
.Reset( rResetHitFlop ), |
.Enable( rHitFlopEnable && iUCodeDone ), |
.S( iUCodeReturnValue ), |
.Q( wHit ) |
); |
//-------------------------------------------------------------- |
|
`ifdef DEBUG_CU |
always @ ( wHit ) |
begin |
$display( "*** Triangle HIT ***\n"); |
end |
`endif |
|
//Next states logic and Reset sequence |
always @(posedge Clock or posedge Reset) |
begin |
|
if (Reset) |
CurrentState <= `CU_AFTER_RESET_STATE; |
else |
CurrentState <= NextState; |
|
end |
|
//-------------------------------------------------------------- |
always @ ( * ) |
begin |
case (CurrentState) |
//----------------------------------------- |
`CU_AFTER_RESET_STATE: |
begin |
|
`ifdef DEBUG_CU |
$display("%d CU_AFTER_RESET_STATE\n",$time); |
`endif |
|
//oRamBusOwner = 0; |
oCodeInstructioPointer = `ENTRYPOINT_INDEX_INITIAL; |
oGFUEnable = 0; |
oUCodeEnable = 0; |
oIOWritePixel = 0; |
rResetHitFlop = 1; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 1; |
oFlipMemEnabled = 0; |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 0; |
//oIncCurrentPitch = 0; |
|
NextState = `CU_WAIT_FOR_INITIAL_CONFIGURATION; |
|
end |
//----------------------------------------- |
|
`CU_WAIT_FOR_INITIAL_CONFIGURATION: |
begin |
//$display("CORE: %d CU_WAIT_FOR_INITIAL_CONFIGURATION", iDebug_CoreID); |
// `ifdef DEBUG_CU |
// $display("%d Control: CU_WAIT_FOR_INITIAL_CONFIGURATION\n",$time); |
// `endif |
|
//oRamBusOwner = 0; |
oCodeInstructioPointer = 0; |
oGFUEnable = 0; |
oUCodeEnable = 0; |
oIOWritePixel = 0; |
rResetHitFlop = 1; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 0; |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 0; |
//oIncCurrentPitch = 0; |
|
if ( MST_I ) |
NextState = `CU_PERFORM_INTIAL_CONFIGURATION;//`CU_WAIT_FOR_CONFIG_DATA_READ; |
else |
NextState = `CU_WAIT_FOR_INITIAL_CONFIGURATION; |
|
|
end |
//----------------------------------------- |
`CU_PERFORM_INTIAL_CONFIGURATION: |
begin |
|
//oRamBusOwner = 0; |
oCodeInstructioPointer = 0; |
oGFUEnable = 0; |
oUCodeEnable = 0; |
oIOWritePixel = 0; |
rResetHitFlop = 1; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 0; |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 0; |
//oIncCurrentPitch = 0; |
|
if ( MST_I == 0 && iRenderEnable == 1'b1) |
NextState = `CU_CLEAR_REGISTERS;//`CU_WAIT_FOR_CONFIG_DATA_READ; |
else |
NextState = `CU_PERFORM_INTIAL_CONFIGURATION; |
|
|
end |
//----------------------------------------- |
`CU_CLEAR_REGISTERS: |
begin |
|
`ifdef DEBUG_CU |
$display("%d CU_CLEAR_REGISTERS\n",$time); |
`endif |
|
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer = `ENTRYPOINT_INDEX_INITIAL; |
oGFUEnable = 0; |
oUCodeEnable = 1; //* |
oIOWritePixel = 0; |
rResetHitFlop = 0; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 1; |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 0; |
|
////$display("\n\n %d XOXOXOXOX FLIP XOXOXOXOXOX\n\n",$time); |
//oIncCurrentPitch = 0; |
|
NextState = `CU_WAIT_CLEAR_REGISTERS; |
end |
//----------------------------------------- |
`CU_WAIT_CLEAR_REGISTERS: |
begin |
// `ifdef DEBUG_CU |
// $display("%d CU_WAIT_CLEAR_REGISTERS\n",$time); |
// `endif |
//$display("CORE: %d CU_WAIT_CLEAR_REGISTERS", iDebug_CoreID); |
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer = `ENTRYPOINT_INDEX_INITIAL; |
oGFUEnable = 0; |
oUCodeEnable = 0; |
oIOWritePixel = 0; |
rResetHitFlop = 0; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 1; |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 0; |
//oIncCurrentPitch = 0; |
|
if ( iUCodeDone ) |
NextState = `CU_ACK_CLEAR_REGISTERS; |
else |
NextState = `CU_WAIT_CLEAR_REGISTERS; |
|
end |
//----------------------------------------- |
`CU_ACK_CLEAR_REGISTERS: |
begin |
|
`ifdef DEBUG_CU |
$display("%d CU_ACK_CLEAR_REGISTERS\n", $time); |
`endif |
|
//$display("CORE: %d CU_ACK_CLEAR_REGISTERS", iDebug_CoreID); |
|
//oRamBusOwner = 0; |
oCodeInstructioPointer = 0; |
oGFUEnable = 0; |
oUCodeEnable = 0; //* |
oIOWritePixel = 0; |
rResetHitFlop = 0; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 0; |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 0; |
//oIncCurrentPitch = 0; |
|
NextState = `CU_WAIT_FOR_CONFIG_DATA_READ; |
end |
|
|
|
//----------------------------------------- |
`CU_WAIT_FOR_CONFIG_DATA_READ: |
begin |
|
// `ifdef DEBUG_CU |
// $display("%d Control: CU_WAIT_FOR_CONFIG_DATA_READ\n",$time); |
// `endif |
|
|
//$display("CORE: %d CU_WAIT_FOR_CONFIG_DATA_READ", iDebug_CoreID); |
|
//oRamBusOwner = 0;//`REG_BUS_OWNED_BY_BCU; |
oCodeInstructioPointer = 0; |
oGFUEnable = 0; |
oUCodeEnable = 0; |
oIOWritePixel = 0; |
rResetHitFlop = 0; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 0; |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 0; |
//oIncCurrentPitch = 0; |
|
if ( MST_I == 0 ) |
NextState = `CU_PRECALCULATE_CONSTANTS; |
else |
NextState = `CU_WAIT_FOR_CONFIG_DATA_READ; |
|
end |
//----------------------------------------- |
`CU_PRECALCULATE_CONSTANTS: |
begin |
//$display("CORE: %d CU_PRECALCULATE_CONSTANTS", iDebug_CoreID); |
`ifdef DEBUG_CU |
$display("%d Control: CU_PRECALCULATE_CONSTANTS\n", $time); |
`endif |
|
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer = `ENTRYPOINT_INDEX_CPPU; |
oGFUEnable = 0; |
oUCodeEnable = 1; //* |
oIOWritePixel = 0; |
rResetHitFlop = 0; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 0; |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 0; |
//oIncCurrentPitch = 0; |
|
NextState = `CU_WAIT_FOR_CONSTANT; |
|
end |
//----------------------------------------- |
`CU_WAIT_FOR_CONSTANT: |
begin |
// `ifdef DEBUG_CU |
// $display("%d Control: CU_WAIT_FOR_CONSTANT\n", $time); |
// `endif |
|
|
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer = `ENTRYPOINT_INDEX_CPPU; |
oGFUEnable = 0; |
oUCodeEnable = 0; //* |
oIOWritePixel = 0; |
rResetHitFlop = 0; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 0; |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 0; |
//oIncCurrentPitch = 0; |
|
if ( iUCodeDone ) |
NextState = `CU_ACK_PRECALCULATE_CONSTANTS; |
else |
NextState = `CU_WAIT_FOR_CONSTANT; |
|
end |
//----------------------------------------- |
`CU_ACK_PRECALCULATE_CONSTANTS: |
begin |
//$display("CORE: %d CU_ACK_PRECALCULATE_CONSTANTS", iDebug_CoreID); |
`ifdef DEBUG_CU |
$display("%d Control: CU_ACK_PRECALCULATE_CONSTANTS\n", $time); |
`endif |
|
|
//oRamBusOwner = 0;//`REG_BUS_OWNED_BY_BCU; |
oCodeInstructioPointer = 0; |
oGFUEnable = 0; |
oUCodeEnable = 0; //* |
oIOWritePixel = 0; |
rResetHitFlop = 0; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 0; |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 0; |
//oIncCurrentPitch = 0; |
|
NextState = `CU_TRIGGER_USERCONSTANTS;//CU_WAIT_FOR_TASK; |
|
end |
//----------------------------------------- |
|
`CU_TRIGGER_USERCONSTANTS: |
begin |
`ifdef DEBUG_CU |
$display("%d Control: CU_TRIGGER_USERCONSTANTS\n",$time); |
`endif |
|
//$display("CORE: %d CU_TRIGGER_USERCONSTANTS", iDebug_CoreID); |
|
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer = `ENTRYPOINT_INDEX_USERCONSTANTS; |
oGFUEnable = 0; |
oUCodeEnable = 1; //* |
oIOWritePixel = 0; |
rResetHitFlop = 0; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 0; |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 0; |
//oIncCurrentPitch = 0; |
|
NextState = `CU_WAIT_USERCONSTANTS; |
end |
//----------------------------------------- |
`CU_WAIT_USERCONSTANTS: |
begin |
|
// `ifdef DEBUG_CU |
// $display("%d Control: CU_WAIT_FOR_RGU\n",$time); |
// `endif |
|
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer = `ENTRYPOINT_INDEX_USERCONSTANTS; |
oGFUEnable = 0; |
oUCodeEnable = 0; |
oIOWritePixel = 0; |
rResetHitFlop = 0; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 0; |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 0; |
//oIncCurrentPitch = 0; |
|
if ( iUCodeDone ) |
NextState = `CU_ACK_USERCONSTANTS; |
else |
NextState = `CU_WAIT_USERCONSTANTS; |
end |
//----------------------------------------- |
`CU_ACK_USERCONSTANTS: |
begin |
|
`ifdef DEBUG_CU |
$display("%d Control: CU_ACK_RGU\n",$time); |
`endif |
|
//$display("CORE: %d CU_ACK_USERCONSTANTS", iDebug_CoreID); |
|
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer = 0; |
oGFUEnable = 0; |
oUCodeEnable = 0; //* |
oIOWritePixel = 0; |
rResetHitFlop = 0; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 0; |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 0; |
//oIncCurrentPitch = 0; |
|
if ( iUCodeDone == 0) |
NextState = `CU_WAIT_FOR_RENDER_ENABLE; |
else |
NextState = `CU_ACK_USERCONSTANTS; |
|
end |
//----------------------------------------- |
`CU_WAIT_FOR_RENDER_ENABLE: |
begin |
`ifdef DEBUG_CU |
$display("CORE: %d CU_WAIT_FOR_RENDER_ENABLE", iDebug_CoreID); |
`endif |
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer = 0; |
oGFUEnable = 0; |
oUCodeEnable = 0; //* |
oIOWritePixel = 0; |
rResetHitFlop = 0; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 0; |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 0; |
//oIncCurrentPitch = 0; |
|
if ( iRenderEnable) |
NextState = `CU_TRIGGER_RGU; |
else |
NextState = `CU_WAIT_FOR_RENDER_ENABLE; |
end |
//----------------------------------------- |
`CU_TRIGGER_RGU: |
begin |
|
`ifdef DEBUG_CU |
$display("CORE: %d CU_TRIGGER_RGU", iDebug_CoreID); |
`endif |
|
|
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer = `ENTRYPOINT_INDEX_RGU; |
oGFUEnable = 0; |
oUCodeEnable = 1; //* |
oIOWritePixel = 0; |
rResetHitFlop = 0; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 0; |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 0; |
//oIncCurrentPitch = 0; |
|
NextState = `CU_WAIT_FOR_RGU; |
end |
//----------------------------------------- |
`CU_WAIT_FOR_RGU: |
begin |
|
// `ifdef DEBUG_CU |
// $display("%d Control: CU_WAIT_FOR_RGU\n",$time); |
// `endif |
|
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer = 0; |
oGFUEnable = 0; |
oUCodeEnable = 0; |
oIOWritePixel = 0; |
rResetHitFlop = 0; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 0; |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 0; |
//oIncCurrentPitch = 0; |
|
if ( iUCodeDone ) |
NextState = `CU_ACK_RGU; |
else |
NextState = `CU_WAIT_FOR_RGU; |
end |
//----------------------------------------- |
`CU_ACK_RGU: |
begin |
|
`ifdef DEBUG_CU |
$display("CORE: %d CU_ACK_RGU", iDebug_CoreID); |
`endif |
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer = 0; |
oGFUEnable = 0; |
oUCodeEnable = 0; //* |
oIOWritePixel = 0; |
rResetHitFlop = 0; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 1; |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 0; |
//oIncCurrentPitch = 0; |
|
if ( iUCodeDone == 0 & iRenderEnable == 1) |
NextState = `CU_WAIT_FOR_HOST_DATA_AVAILABLE;//`CU_TRIGGER_GEO;///////////// GET RID OF GEO!!! |
else |
NextState = `CU_ACK_RGU; |
|
end |
//----------------------------------------- |
`CU_TRIGGER_TCC: |
begin |
////$display("CU_TRIGGER_TCC"); |
`ifdef DEBUG_CU |
$display("%d CORE %d Control: CU_TRIGGER_TCC\n",$time,iDebug_CoreID); |
`endif |
|
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer = `ENTRYPOINT_INDEX_TCC; |
oUCodeEnable = 1; //* |
oGFUEnable = 0; |
oIOWritePixel = 0; |
rResetHitFlop = 0; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 1; |
oFlipMem = 0; //We need u,v from last IO read cycle |
oResultCommited = 0; |
////$display("\n\n %d XOXOXOXOX FLIP XOXOXOXOXOX\n\n",$time); |
//oIncCurrentPitch = 0; |
oDone = 0; |
|
NextState = `WAIT_FOR_TCC; |
end |
//----------------------------------------- |
`WAIT_FOR_TCC: |
begin |
|
////$display("WAIT_FOR_TCC"); |
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer = `ENTRYPOINT_INDEX_TCC; |
oUCodeEnable = 0; //* |
oGFUEnable = 0; |
oIOWritePixel = 0; |
rResetHitFlop = 0; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 1; |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 0; |
//oIncCurrentPitch = 0; |
|
if ( iUCodeDone ) |
NextState = `CU_ACK_TCC; |
else |
NextState = `WAIT_FOR_TCC; |
|
end |
//----------------------------------------- |
`CU_ACK_TCC: |
begin |
|
////$display("WAIT_FOR_TCC"); |
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer = `ENTRYPOINT_INDEX_TCC; |
oUCodeEnable = 0; //* |
oGFUEnable = 0; |
oIOWritePixel = 0; |
rResetHitFlop = 0; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 0; |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 0; |
//oIncCurrentPitch = 0; |
|
if ( iUCodeDone == 0 && iSceneTraverseComplete == 1'b1) //DDDD |
NextState = `CU_TRIGGER_PSU_WITH_TEXTURE; |
else if (iUCodeDone == 0 && iSceneTraverseComplete == 1'b0) |
NextState = `CU_WAIT_FOR_HOST_DATA_AVAILABLE; |
else |
NextState = `CU_ACK_TCC; |
|
end |
//----------------------------------------- |
/* |
Was there any hit at all? |
At this point, all the triangles in the list |
have been traversed looking for a hit with our ray. |
There are 3 possibilities: |
1) The was not a single hit, then just paint a black |
pixel on the screen and send it via PCU. |
2)There was a hit and Texturing is not enabled, then trigger the PSU with |
no texturing |
2) There was a hit and Texturing is enabled, then fetch the texture |
values corresponding to the triangle that we hitted. |
*/ |
`CU_CHECK_HIT: |
begin |
|
`ifdef DEBUG_CU |
$display("%d CORE %d Control: CU_CHECK_HIT\n",$time,iDebug_CoreID); |
`endif |
|
|
//oRamBusOwner = `REG_BUS_OWNED_BY_GFU; |
oCodeInstructioPointer = 0; |
oUCodeEnable = 0; |
oGFUEnable = 0; ///CHANGED Aug 15 |
oIOWritePixel = 0; |
rResetHitFlop = 0; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 0; |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 0; |
|
|
|
if (wHit) |
begin |
//$display("HIT"); |
NextState = `CU_TRIGGER_PSU_WITH_TEXTURE; |
end |
else |
NextState = `CU_TRIGGER_USERPIXELSHADER;//666 |
|
end |
|
//----------------------------------------- |
`CU_TRIGGER_PSU_WITH_TEXTURE: |
begin |
|
`ifdef DEBUG_CU |
$display("%d Control: CU_TRIGGER_PSU_WITH_TEXTURE\n",$time); |
`endif |
|
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer = `ENTRYPOINT_INDEX_PSU2; |
oUCodeEnable = 1; |
oGFUEnable = 0; |
oIOWritePixel = 0; |
rResetHitFlop = 1; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 0;//////NEW NEW NEW NEW |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 0; |
////$display("\n\n %d XOXOXOXOX FLIP XOXOXOXOXOX\n\n",$time); |
//oIncCurrentPitch = 0; |
|
NextState = `CU_WAIT_FOR_PSU; |
end |
//----------------------------------------- |
`CU_WAIT_FOR_HOST_DATA_ACK: |
begin |
oCodeInstructioPointer = 0; |
oUCodeEnable = 0; |
oGFUEnable = 0; |
oIOWritePixel = 0; |
rResetHitFlop = 0; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 0; |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 0; |
|
if ( iHostAckDataRead ) |
NextState = `CU_WAIT_FOR_HOST_DATA_AVAILABLE; |
else |
NextState = `CU_WAIT_FOR_HOST_DATA_ACK; |
end |
//----------------------------------------- |
//Wait until data from Host becomes available |
`CU_WAIT_FOR_HOST_DATA_AVAILABLE: |
begin |
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer = 0; |
oUCodeEnable = 0; |
oGFUEnable = 0; |
oIOWritePixel = 0; |
rResetHitFlop = 0; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 0; |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 0; |
|
if ( iHostDataAvailable ) |
NextState = `CU_TRIGGER_MAIN; |
else |
NextState = `CU_WAIT_FOR_HOST_DATA_AVAILABLE; |
|
|
end |
//----------------------------------------- |
`CU_TRIGGER_MAIN: |
begin |
`ifdef DEBUG_CU |
$display("%d CORE: %d Control: CU_TRIGGER_MAIN\n",$time,iDebug_CoreID); |
`endif |
|
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer = `ENTRYPOINT_INDEX_MAIN; |
oUCodeEnable = 1; |
oGFUEnable = 1; |
oIOWritePixel = 0; |
rResetHitFlop = 0; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 1; |
oFlipMem = 1; |
oDone = 0; |
oResultCommited = 0; |
////$display("\n\n %d XOXOXOXOX FLIP XOXOXOXOXOX\n\n",$time); |
//oIncCurrentPitch = 0; |
// $stop(); |
|
NextState = `CU_WAIT_FOR_MAIN; |
|
end |
//----------------------------------------- |
`CU_WAIT_FOR_MAIN: |
begin |
// `ifdef DEBUG_CU |
// $display("%d Control: CU_WAIT_FOR_MAIN\n",$time); |
// `endif |
|
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer = `ENTRYPOINT_INDEX_MAIN; |
oUCodeEnable = 0; |
oGFUEnable = 1; |
oIOWritePixel = 0; |
rResetHitFlop = 0; |
rHitFlopEnable = 1; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 1; |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 0; |
//oIncCurrentPitch = 0; |
|
//NextState = `CU_WAIT_FOR_MAIN; |
|
|
if ( iUCodeDone ) |
NextState = `CU_ACK_MAIN; |
else |
NextState = `CU_WAIT_FOR_MAIN; |
|
end |
//----------------------------------------- |
/* |
ACK UCODE by setting oUCodeEnable = 0 |
*/ |
`CU_ACK_MAIN: |
begin |
`ifdef DEBUG_CU |
$display("%d CORE: %d Control: CU_ACK_MAIN\n",$time, iDebug_CoreID); |
`endif |
|
//oRamBusOwner = `REG_BUS_OWNED_BY_GFU; |
oCodeInstructioPointer = 0; //* |
oUCodeEnable = 0; //* |
oGFUEnable = 0; //Changed Aug 15 |
oIOWritePixel = 0; |
rResetHitFlop = 0; |
rHitFlopEnable = 1; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 0; |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 0; |
//oIncCurrentPitch = 0; |
|
// $stop(); |
|
if ( iUCodeDone == 1'b0 & iSceneTraverseComplete == 1'b1) |
NextState = `CU_CHECK_HIT; |
else if ( iUCodeDone == 1'b0 & iSceneTraverseComplete == 1'b0) //ERROR!!! What if iSceneTraverseComplete will become 1 a cycle after this?? |
NextState = `CU_WAIT_FOR_HOST_DATA_ACK;//`CU_WAIT_FOR_HOST_DATA_AVAILABLE; |
else |
NextState = `CU_ACK_MAIN; |
|
|
|
end |
//----------------------------------------- |
`CU_WAIT_FOR_PSU: |
begin |
|
// `ifdef DEBUG_CU |
// $display("%d Control: CU_TRIGGER_PSU\n",$time); |
// `endif |
|
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer = `ENTRYPOINT_INDEX_PSU; |
oUCodeEnable = 0; |
oGFUEnable = 0; |
oIOWritePixel = 0; |
rResetHitFlop = 0; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 1; |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 0; |
//oIncCurrentPitch = 0; |
|
|
if ( iUCodeDone ) |
NextState = `CU_ACK_PSU; |
else |
NextState = `CU_WAIT_FOR_PSU; |
|
end |
//----------------------------------------- |
`CU_ACK_PSU: |
begin |
`ifdef DEBUG_CU |
$display("%d CORE: %d Control: CU_ACK_PSU\n",$time, iDebug_CoreID); |
`endif |
|
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer = 0; //* |
oUCodeEnable = 0; //* |
oGFUEnable = 0; |
oIOWritePixel = 0; |
rResetHitFlop = 0; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 1; |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 0; |
//oIncCurrentPitch = 0; |
|
if ( iUCodeDone == 0) |
NextState = `CU_TRIGGER_USERPIXELSHADER; |
else |
NextState = `CU_ACK_PSU; |
|
|
end |
//----------------------------------------- |
|
//----------------------------------------- |
`CU_TRIGGER_NPU: //Next Pixel Unit |
begin |
`ifdef DEBUG_CU |
$display("%d CORE: %d Control: CU_TRIGGER_NPU\n",$time, iDebug_CoreID); |
`endif |
//$write("*"); |
|
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer = `ENTRYPOINT_INDEX_NPG; //* |
oUCodeEnable = 1; //* |
oGFUEnable = 0; |
oIOWritePixel = 0; |
rResetHitFlop = 0; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 1; |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 0; |
//oIncCurrentPitch = 0; |
|
NextState = `CU_WAIT_NPU; |
end |
//----------------------------------------- |
`CU_WAIT_NPU: |
begin |
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer = `ENTRYPOINT_INDEX_NPG; |
oUCodeEnable = 0; |
oGFUEnable = 0; |
oIOWritePixel = 0; |
rResetHitFlop = 0; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 1; |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 0; |
//oIncCurrentPitch = 0; |
|
if ( iUCodeDone ) |
NextState = `CU_ACK_NPU; |
else |
NextState = `CU_WAIT_NPU; |
end |
//----------------------------------------- |
/* |
Next Pixel generation: here we either goto |
to RGU for the next pixel, or we have no |
more pixels so we are done we our picture! |
*/ |
`CU_ACK_NPU: |
begin |
`ifdef DEBUG_CU |
$display("%d CORE: %d Control: CU_ACK_NPU\n",$time, iDebug_CoreID); |
`endif |
|
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer = 0; //* |
oUCodeEnable = 0; //* |
oGFUEnable = 0; |
oIOWritePixel = 0; |
rResetHitFlop = 0; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 1; |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 0; |
//oIncCurrentPitch = 0; |
|
if ( iUCodeDone == 0 && iUCodeReturnValue == 1) |
NextState = `CU_TRIGGER_RGU; |
else if (iUCodeDone == 0 && iUCodeReturnValue == 0) |
NextState = `CU_DONE; |
else |
NextState = `CU_ACK_NPU; |
|
|
end |
//----------------------------------------- |
`CU_DONE: |
begin |
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer = 0; |
oUCodeEnable = 0; |
oGFUEnable = 0; |
oIOWritePixel = 0; |
rResetHitFlop = 0; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 0; |
oFlipMem = 1; |
oDone = 1; |
oResultCommited = 0; |
//oIncCurrentPitch = 0; |
|
|
NextState = `CU_DONE; |
|
end |
//----------------------------------------- |
/* |
Here we no longer use GFU so set Enable to zero |
*/ |
`CU_TRIGGER_USERPIXELSHADER: |
begin |
`ifdef DEBUG_CU |
$display("%d Control: CU_TRIGGER_PSU\n",$time); |
`endif |
|
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer = `ENTRYPOINT_INDEX_PIXELSHADER; |
oUCodeEnable = 1; |
oGFUEnable = 0;//* |
oIOWritePixel = 0; |
rResetHitFlop = 0; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 1; |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 0; |
//oIncCurrentPitch = 0; |
|
|
NextState = `CU_WAIT_FOR_USERPIXELSHADER; |
end |
//----------------------------------------- |
`CU_WAIT_FOR_USERPIXELSHADER: |
begin |
|
// `ifdef DEBUG_CU |
// $display("%d Control: CU_TRIGGER_PSU\n",$time); |
// `endif |
|
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer = `ENTRYPOINT_INDEX_PIXELSHADER; |
oUCodeEnable = 0; |
oGFUEnable = 0; |
oIOWritePixel = 0; |
rResetHitFlop = 0; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 1; |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 0; |
//oIncCurrentPitch = 0; |
|
|
if ( iUCodeDone ) |
NextState = `CU_ACK_USERPIXELSHADER; |
else |
NextState = `CU_WAIT_FOR_USERPIXELSHADER; |
|
end |
//----------------------------------------- |
`CU_ACK_USERPIXELSHADER: |
begin |
`ifdef DEBUG_CU |
$display("%d Control: CU_ACK_PSU\n",$time); |
`endif |
|
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE; |
oCodeInstructioPointer = 0; //* |
oUCodeEnable = 0; //* |
oGFUEnable = 0; |
oIOWritePixel = 0; |
rResetHitFlop = 0; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 1; |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 1; |
//oIncCurrentPitch = 0; |
|
if ( iUCodeDone == 0) |
NextState = `CU_TRIGGER_NPU;//`CU_TRIGGER_PCU; |
else |
NextState = `CU_ACK_USERPIXELSHADER; |
|
|
end |
//--------------------------------------------------- |
default: |
begin |
|
`ifdef DEBUG_CU |
$display("%d Control: ERROR Undefined State\n",$time); |
`endif |
|
//oRamBusOwner = 0; |
oCodeInstructioPointer = 0; |
oUCodeEnable = 0; |
oGFUEnable = 0; |
oIOWritePixel = 0; |
rResetHitFlop = 0; |
rHitFlopEnable = 0; |
oTriggerTFF = 0; |
oSetCurrentPitch = 0; |
oFlipMemEnabled = 0; |
oFlipMem = 0; |
oDone = 0; |
oResultCommited = 0; |
//oIncCurrentPitch = 0; |
|
NextState = `CU_AFTER_RESET_STATE; |
end |
//----------------------------------------- |
|
endcase |
|
end //always |
endmodule |
/trunk/rtl/Unit_EXE.v
0,0 → 1,275
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
|
//--------------------------------------------------------------------- |
module ExecutionUnit |
( |
|
input wire Clock, |
input wire Reset, |
input wire [`ROM_ADDRESS_WIDTH-1:0] iInitialCodeAddress, |
input wire [`INSTRUCTION_WIDTH-1:0] iInstruction1, |
input wire [`INSTRUCTION_WIDTH-1:0] iInstruction2, |
|
|
input wire [`DATA_ROW_WIDTH-1:0] iDataRead0, |
input wire [`DATA_ROW_WIDTH-1:0] iDataRead1, |
input wire iTrigger, |
|
|
output wire [`ROM_ADDRESS_WIDTH-1:0] oInstructionPointer1, |
output wire [`ROM_ADDRESS_WIDTH-1:0] oInstructionPointer2, |
output wire [`DATA_ADDRESS_WIDTH-1:0] oDataReadAddress0, |
output wire [`DATA_ADDRESS_WIDTH-1:0] oDataReadAddress1, |
output wire oDataWriteEnable, |
output wire [`DATA_ADDRESS_WIDTH-1:0] oDataWriteAddress, |
output wire [`DATA_ROW_WIDTH-1:0] oDataBus, |
output wire oReturnCode, |
|
|
output wire [`DATA_ROW_WIDTH-1:0] oOMEMWriteAddress, |
output wire [`DATA_ROW_WIDTH-1:0] oOMEMWriteData, |
output wire oOMEMWriteEnable, |
output wire [`DATA_ROW_WIDTH-1:0] oTMEMReadAddress, |
input wire [`DATA_ROW_WIDTH-1:0] iTMEMReadData, |
input wire iTMEMDataAvailable, |
output wire oTMEMDataRequest, |
|
`ifdef DEBUG |
input wire [`MAX_CORES-1:0] iDebug_CoreID, |
`endif |
output wire oDone |
|
|
|
|
); |
|
|
`ifdef DEBUG |
wire [`ROM_ADDRESS_WIDTH-1:0] wDEBUG_IDU2_EXE_InstructionPointer; |
`endif |
|
wire wEXE2__uCodeDone; |
wire wEXE2_IFU__EXEBusy; |
wire [`DATA_ADDRESS_WIDTH-1:0] wEXE2_IDU_DataFordward_LastDestination; |
wire wALU2_EXE__BranchTaken; |
wire wALU2_IFU_BranchNotTaken; |
wire [`INSTRUCTION_WIDTH-1:0] CurrentInstruction; |
//wire wIDU2_IFU__IDUBusy; |
|
|
wire [`INSTRUCTION_OP_LENGTH-1:0] wOperation; |
|
|
wire [`DATA_ROW_WIDTH-1:0] wSource0,wSource1; |
wire [`DATA_ADDRESS_WIDTH-1:0] wDestination; |
wire wInstructionAvailable; |
|
//ALU wires |
wire [`INSTRUCTION_OP_LENGTH-1:0] ALU2Operation; |
wire [`WIDTH-1:0] ALU2ChannelA; |
wire [`WIDTH-1:0] ALU2ChannelB; |
wire [`WIDTH-1:0] ALU2ChannelC; |
wire [`WIDTH-1:0] ALU2ChannelD; |
wire [`WIDTH-1:0] ALU2ChannelE; |
wire [`WIDTH-1:0] ALU2ChannelF; |
wire [`WIDTH-1:0] ALU2ResultA; |
wire [`WIDTH-1:0] ALU2ResultB; |
wire [`WIDTH-1:0] ALU2ResultC; |
wire wEXE2_ALU__TriggerALU; |
wire ALU2OutputReady; |
wire w2FIU__BranchTaken; |
wire [`ROM_ADDRESS_WIDTH-1:0] JumpIp; |
wire [`ROM_ADDRESS_WIDTH-1:0] wIDU2_IFU_ReturnAddress; |
wire wALU2_IFU_ReturnFromSub; |
|
//wire wIDU2_IFU__InputsLatched; |
|
wire wEPU_Busy,wTriggerIFU; |
wire [`ROM_ADDRESS_WIDTH-1:0] wEPU_IP,wIFU_IP,wCodeEntryPoint; |
|
assign oInstructionPointer1 = (wEPU_Busy) ? wEPU_IP : wIFU_IP; |
|
|
InstructionEntryPoint EPU |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.iTrigger( iTrigger ), |
.iInitialCodeAddress( iInitialCodeAddress ), |
.iIMemInput(iInstruction1), |
|
.oEPU_Busy(wEPU_Busy), |
.oEntryPoint( wCodeEntryPoint ), |
.oTriggerIFU( wTriggerIFU ), |
.oInstructionAddr( wEPU_IP ) |
|
); |
|
InstructionFetch IFU |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.iTrigger( wTriggerIFU ), |
.iInstruction1( iInstruction1 ), |
.iInstruction2( iInstruction2 ), |
.iInitialCodeAddress( wCodeEntryPoint ), |
.iBranchTaken( w2FIU__BranchTaken ), |
.iSubroutineReturn( wALU2_IFU_ReturnFromSub ), |
//.iReturnAddress( wIDU2_IFU_ReturnAddress ), |
.oCurrentInstruction( CurrentInstruction ), |
.oInstructionAvalable( wInstructionAvailable ), |
.oIP( wIFU_IP ), |
.oIP2( oInstructionPointer2 ), |
.iEXEDone( ALU2OutputReady ), |
.oMicroCodeReturnValue( oReturnCode ), |
.oExecutionDone( oDone ) |
); |
|
////--------------------------------------------------------- |
wire wIDU2_EXE_DataReady; |
wire wEXE2_IDU_ExeLatchedValues; |
|
InstructionDecode IDU |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.iEncodedInstruction( CurrentInstruction ), |
.iInstructionAvailable( wInstructionAvailable ), |
//.iIP( oInstructionPointer1 ), |
//.oReturnAddress( wIDU2_IFU_ReturnAddress ), |
|
.oRamAddress0( oDataReadAddress0 ), |
.oRamAddress1( oDataReadAddress1 ), |
.iRamValue0( iDataRead0 ), |
.iRamValue1( iDataRead1 ), |
|
.iLastDestination( wEXE2_IDU_DataFordward_LastDestination ), |
.iDataForward( {ALU2ResultA,ALU2ResultB,ALU2ResultC} ), |
|
//Outputs going to the ALU-FSM |
.oOperation( wOperation ), |
.oDestination( wDestination ), |
.oSource0( wSource0 ), |
.oSource1( wSource1 ), |
|
`ifdef DEBUG |
.iDebug_CurrentIP( oInstructionPointer1 ), |
.oDebug_CurrentIP( wDEBUG_IDU2_EXE_InstructionPointer ), |
`endif |
|
.oDataReadyForExe( wIDU2_EXE_DataReady ) |
|
|
|
|
|
); |
|
|
ExecutionFSM EXE |
( |
.Clock( Clock ), |
.Reset( Reset | iTrigger ), //New Sat Jun13 |
.iDecodeDone( wIDU2_EXE_DataReady ), |
.iOperation( wOperation ), |
.iDestination( wDestination ), |
.iSource0( wSource0 ), |
.iSource1( wSource1 ) , |
|
|
`ifdef DEBUG |
.iDebug_CurrentIP( wDEBUG_IDU2_EXE_InstructionPointer ), |
.iDebug_CoreID( iDebug_CoreID ), |
`endif |
|
//.iJumpResultFromALU( wALU2_EXE__BranchTaken ), |
.iBranchTaken( wALU2_EXE__BranchTaken ), |
.iBranchNotTaken( wALU2_IFU_BranchNotTaken ), |
.oJumpFlag( w2FIU__BranchTaken ), |
.oJumpIp( JumpIp ), |
.oRAMWriteEnable( oDataWriteEnable ), |
.oRAMWriteAddress( oDataWriteAddress ), |
.RAMBus( oDataBus ), |
.oBusy( wEXE2_IFU__EXEBusy ), |
|
.oExeLatchedValues( wEXE2_IDU_ExeLatchedValues ), |
.oLastDestination( wEXE2_IDU_DataFordward_LastDestination ), |
|
//ALU ports and control signals |
.oTriggerALU( wEXE2_ALU__TriggerALU ), |
.oALUOperation( ALU2Operation ), |
.oALUChannelX1( ALU2ChannelA ), |
.oALUChannelX2( ALU2ChannelB ), |
.oALUChannelY1( ALU2ChannelC ), |
.oALUChannelY2( ALU2ChannelD ), |
.oALUChannelZ1( ALU2ChannelE ), |
.oALUChannelZ2( ALU2ChannelF ), |
.iALUResultX( ALU2ResultA ), |
.iALUResultY( ALU2ResultB ), |
.iALUResultZ( ALU2ResultC ), |
.iALUOutputReady( ALU2OutputReady ) |
|
); |
|
|
//-------------------------------------------------------- |
|
VectorALU ALU |
( |
.Clock(Clock), |
.Reset(Reset), |
.iOperation( ALU2Operation ), |
.iChannel_Ax( ALU2ChannelA ), |
.iChannel_Bx( ALU2ChannelB ), |
.iChannel_Ay( ALU2ChannelC ), |
.iChannel_By( ALU2ChannelD ), |
.iChannel_Az( ALU2ChannelE ), |
.iChannel_Bz( ALU2ChannelF ), |
.oResultA( ALU2ResultA ), |
.oResultB( ALU2ResultB ), |
.oResultC( ALU2ResultC ), |
.oBranchTaken( wALU2_EXE__BranchTaken ), |
.oBranchNotTaken( wALU2_IFU_BranchNotTaken ), |
.oReturnFromSub( wALU2_IFU_ReturnFromSub ), |
.iInputReady( wEXE2_ALU__TriggerALU ), |
|
//*********** |
.oOMEMWriteAddress( oOMEMWriteAddress ), |
.oOMEMWriteData( oOMEMWriteData ), |
.oOMEM_WriteEnable( oOMEMWriteEnable ), |
|
.oTMEMReadAddress( oTMEMReadAddress ), |
.iTMEMReadData( iTMEMReadData ), |
.iTMEMDataAvailable( iTMEMDataAvailable ), |
.oTMEMDataRequest( oTMEMDataRequest ), |
//*********** |
.iCurrentIP( oInstructionPointer1 ), |
.OutputReady( ALU2OutputReady ) |
|
); |
|
|
|
endmodule |
//--------------------------------------------------------------------- |
/trunk/rtl/Module_FixedPointDivision.v
0,0 → 1,328
/* |
Fixed point Multiplication Module Qm.n |
C = (A << n) / B |
|
*/ |
|
|
//Division State Machine Constants |
`define INITIAL_DIVISION_STATE 6'd1 |
`define DIVISION_REVERSE_LAST_ITERATION 6'd2 |
`define PRE_CALCULATE_REMAINDER 6'd3 |
`define CALCULATE_REMAINDER 6'd4 |
`define WRITE_DIVISION_RESULT 6'd5 |
|
|
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
`define FPS_AFTER_RESET_STATE 0 |
//----------------------------------------------------------------- |
//This only works if you dividend is power of 2 |
//x % 2^n == x & (2^n - 1). |
/* |
module Modulus2N |
( |
input wire Clock, |
input wire Reset, |
input wire [`WIDTH-1:0] iDividend,iDivisor, |
output reg [`WIDTH-1:0] oQuotient, |
input wire iInputReady, //Is the input data valid? |
output reg oOutputReady //Our output data is ready! |
); |
|
|
|
FF1_POSEDGE_SYNCRONOUS_RESET FFOutputReadyDelay2 |
( |
.Clock( Clock ), |
.Clear( Reset ), |
.D( iInputReady ), |
.Q( oOutputReady ) |
); |
|
assign oQuotient = (iDividend & (iDivisor-1'b1)); |
|
|
endmodule |
*/ |
//----------------------------------------------------------------- |
/* |
Be aware that the unsgined division algorith doesn't know or care |
about the sign bit of the Result (bit 31). So if you divisor is very |
small there is a chance that the bit 31 from the usginned division is |
one even thogh the result should be positive |
|
*/ |
module SignedIntegerDivision |
( |
input wire Clock,Reset, |
input wire [`WIDTH-1:0] iDividend,iDivisor, |
output reg [`WIDTH-1:0] xQuotient, |
input wire iInputReady, //Is the input data valid? |
output reg OutputReady //Our output data is ready! |
); |
|
|
parameter SIGN = 31; |
wire Sign; |
|
wire [`WIDTH-1:0] wDividend,wDivisor; |
wire wInputReady; |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) FFD1 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( iInputReady ), |
.D( iDividend ), |
.Q( wDividend) |
); |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) FFD2 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( iInputReady ), |
.D( iDivisor ), |
.Q( wDivisor ) |
); |
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD3 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( 1'b1 ), |
.D( iInputReady ), |
.Q( wInputReady ) |
); |
|
|
//wire [7:0] wExitStatus; |
wire [`WIDTH-1:0] wAbsDividend,wAbsDivisor; |
wire [`WIDTH-1:0] wQuottientTemp; |
wire [`WIDTH-1:0] wAbsQuotient; |
|
assign Sign = wDividend[SIGN] ^ wDivisor[SIGN]; |
|
assign wAbsDividend = ( wDividend[SIGN] == 1 )? |
~wDividend + 1'b1 : wDividend; |
|
assign wAbsDivisor = ( wDivisor[SIGN] == 1 )? |
~wDivisor + 1'b1 : wDivisor; |
|
wire DivReady; |
|
|
UnsignedIntegerDivision UDIV |
( |
.Clock(Clock), |
.Reset( Reset ), |
.iDividend( wAbsDividend), |
.iDivisor( wAbsDivisor ), |
.xQuotient(wQuottientTemp), |
.iInputReady( wInputReady ), |
.OutputReady( DivReady ) |
|
); |
|
//Make sure the output from the 'unsigned' operation is really posity |
assign wAbsQuotient = wQuottientTemp & 32'h7FFFFFFF; |
|
//assign Quotient = wAbsQuotient; |
|
//----------------------------------------------- |
always @ ( posedge Clock ) |
begin |
|
if ( DivReady ) |
begin |
if ( Sign == 1 ) |
xQuotient = ~wAbsQuotient + 1'b1; |
else |
xQuotient = wAbsQuotient; |
|
end |
|
OutputReady = DivReady; |
|
if (Reset == 1) |
OutputReady = 0; |
|
|
end |
//----------------------------------------------- |
|
endmodule |
//----------------------------------------------------------------- |
/* |
|
Returns the integer part (Quotient) of a division. |
|
Division is the process of repeated subtraction. |
Like the long division we learned in grade school, |
a binary division algorithm works from the high |
order digits to the low order digits and generates |
a quotient (division result) with each step. |
The division algorithm is divided into two steps: |
* Shift the upper bits of the dividend (the number |
we are dividing into) into the remainder. |
* Subtract the divisor from the value in the remainder. |
The high order bit of the result become a bit of |
the quotient (division result). |
|
*/ |
|
//----------------------------------------------------------------- |
/* |
Try to implemet the division as a FSM, |
this basically because the behavioral Division has a for loop, |
with a variable loop limit counter which I think is not friendly |
to the synthetiser (dumb dumb synthetizer :) ) |
*/ |
module UnsignedIntegerDivision( |
input wire Clock,Reset, |
input wire [`WIDTH-1:0] iDividend,iDivisor, |
//output reg [`WIDTH-1:0] Quotient,Remainder, |
|
output reg [`WIDTH-1:0] xQuotient, |
|
input wire iInputReady, //Is the input data valid? |
output reg OutputReady //Our output data is ready! |
//output reg [7:0] ExitStatus |
); |
|
//reg [`WIDTH-1:0] Dividend, Divisor; |
|
reg [63:0] Dividend,Divisor; |
|
//reg [`WIDTH-1:0] t, q, d, i,Bit, num_bits; |
reg [`WIDTH-1:0] i,num_bits; |
reg [63:0] t, q, d, Bit; |
reg [63:0] Quotient,Remainder; |
|
reg [5:0] CurrentState, NextState; |
//---------------------------------------- |
//Next states logic and Reset sequence |
always @(negedge Clock) |
begin |
if( Reset!=1 ) |
CurrentState = NextState; |
else |
CurrentState = `FPS_AFTER_RESET_STATE; |
end |
//---------------------------------------- |
|
always @ (posedge Clock) |
begin |
case (CurrentState) |
//---------------------------------------- |
`FPS_AFTER_RESET_STATE: |
begin |
OutputReady = 0; |
NextState = ( iInputReady == 1 ) ? |
`INITIAL_DIVISION_STATE : `FPS_AFTER_RESET_STATE; |
end |
//---------------------------------------- |
`INITIAL_DIVISION_STATE: |
begin |
Dividend = iDividend; |
Dividend = Dividend << `SCALE; |
|
Divisor = iDivisor; |
Remainder = 0; |
Quotient = 0; |
|
if (Divisor == 0) |
begin |
Quotient[31:0] = 32'h0FFF_FFFF; |
// ExitStatus = `DIVISION_BY_ZERO; |
NextState = `WRITE_DIVISION_RESULT; |
end |
else if (Divisor > Dividend) |
begin |
Remainder = Dividend; |
//ExitStatus = `NORMAL_EXIT; |
NextState = `WRITE_DIVISION_RESULT; |
end |
else if (Divisor == Dividend) |
begin |
Quotient = 1; |
// ExitStatus = `NORMAL_EXIT; |
NextState = `WRITE_DIVISION_RESULT; |
end |
else |
begin |
NextState = `PRE_CALCULATE_REMAINDER; |
end |
//num_bits = 32; |
num_bits = 64; |
end |
|
//---------------------------------------- |
`PRE_CALCULATE_REMAINDER: |
begin |
|
//Bit = (Dividend & 32'h80000000) >> 31; |
Bit = (Dividend & 64'h8000000000000000 ) >> 63; |
Remainder = (Remainder << 1) | Bit; |
d = Dividend; |
Dividend = Dividend << 1; |
num_bits = num_bits - 1; |
|
|
// $display("num_bits %d Remainder %d Divisor %d\n",num_bits,Remainder,Divisor); |
NextState = (Remainder < Divisor) ? |
`PRE_CALCULATE_REMAINDER : `DIVISION_REVERSE_LAST_ITERATION; |
end |
//---------------------------------------- |
/* |
The loop, above, always goes one iteration too far. |
To avoid inserting an "if" statement inside the loop |
the last iteration is simply reversed. |
*/ |
`DIVISION_REVERSE_LAST_ITERATION: |
begin |
Dividend = d; |
Remainder = Remainder >> 1; |
num_bits = num_bits + 1; |
i = 0; |
|
NextState = `CALCULATE_REMAINDER; |
end |
//---------------------------------------- |
`CALCULATE_REMAINDER: |
begin |
//Bit = (Dividend & 32'h80000000) >> 31; |
Bit = (Dividend & 64'h8000000000000000 ) >> 63; |
Remainder = (Remainder << 1) | Bit; |
t = Remainder - Divisor; |
//q = !((t & 32'h80000000) >> 31); |
q = !((t & 64'h8000000000000000 ) >> 63); |
Dividend = Dividend << 1; |
Quotient = (Quotient << 1) | q; |
if ( q != 0 ) |
Remainder = t; |
i = i + 1; |
|
if (i < num_bits) |
NextState = `CALCULATE_REMAINDER; |
else |
NextState = `WRITE_DIVISION_RESULT; |
end |
//---------------------------------------- |
//Will go to the IDLE leaving the Result Registers |
//with the current results until next stuff comes |
//So, stay in this state until our client sets iInputReady |
//to 0 telling us he read the result |
`WRITE_DIVISION_RESULT: |
begin |
xQuotient = Quotient[32:0]; //Simply chop to round |
OutputReady = 1; |
// $display("Quotient = %h - %b \n", Quotient, Quotient); |
|
NextState = (iInputReady == 0) ? |
`FPS_AFTER_RESET_STATE : `WRITE_DIVISION_RESULT; |
end |
endcase |
|
end //always |
endmodule |
//----------------------------------------------------------------- |
/trunk/rtl/Module_OMemInterface.v
0,0 → 1,47
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
|
module Module_OMemInterface |
( |
input wire Clock, |
input wire Reset, |
input wire iWriteEnable, |
input wire [`DATA_ROW_WIDTH-1:0] iData, |
input wire [`DATA_ROW_WIDTH-1:0] iAddress, |
output wire [`WB_WIDTH-1:0] ADR_O, |
output wire[`WB_WIDTH-1:0] DAT_O, |
output wire WE_O |
|
); |
wire [2:0] wCurrentWord; |
assign WE_O = iWriteEnable; |
|
CIRCULAR_SHIFTLEFT_POSEDGE #(3) SHL |
( |
.Clock(Clock), |
.Enable(iWriteEnable), |
.Reset(Reset), |
.Initial(3'b1), |
.O(wCurrentWord) |
|
); |
|
MUXFULLPARALELL_3SEL_WALKINGONE # ( `WB_WIDTH ) MUX1 |
( |
.Sel( wCurrentWord ), |
.I3(iAddress[31:0]), |
.I2(iAddress[63:32]), |
.I1(iAddress[95:64]), |
.O1( ADR_O ) |
); |
|
MUXFULLPARALELL_3SEL_WALKINGONE # ( `WB_WIDTH ) MUX2 |
( |
.Sel( wCurrentWord ), |
.I3(iData[31:0]), |
.I2(iData[63:32]), |
.I1(iData[95:64]), |
.O1( DAT_O ) |
); |
|
endmodule |
/trunk/rtl/Module_RAM.v
0,0 → 1,80
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
//-------------------------------------------------------- |
//Dual port RAM. |
|
|
module RAM_DUAL_READ_PORT # ( parameter DATA_WIDTH=`DATA_ROW_WIDTH, parameter ADDR_WIDTH=`DATA_ADDRESS_WIDTH, parameter MEM_SIZE=128 ) |
( |
input wire Clock, |
input wire iWriteEnable, |
input wire[ADDR_WIDTH-1:0] iReadAddress0, |
input wire[ADDR_WIDTH-1:0] iReadAddress1, |
input wire[ADDR_WIDTH-1:0] iWriteAddress, |
input wire[DATA_WIDTH-1:0] iDataIn, |
output reg [DATA_WIDTH-1:0] oDataOut0, |
output reg [DATA_WIDTH-1:0] oDataOut1 |
); |
|
reg [DATA_WIDTH-1:0] Ram [MEM_SIZE:0]; |
|
always @(posedge Clock) |
begin |
|
if (iWriteEnable) |
Ram[iWriteAddress] <= iDataIn; |
|
|
oDataOut0 <= Ram[iReadAddress0]; |
oDataOut1 <= Ram[iReadAddress1]; |
|
end |
endmodule |
//-------------------------------------------------------- |
|
module RAM_SINGLE_READ_PORT # ( parameter DATA_WIDTH=`DATA_ROW_WIDTH, parameter ADDR_WIDTH=`DATA_ADDRESS_WIDTH, parameter MEM_SIZE=128 ) |
( |
input wire Clock, |
input wire iWriteEnable, |
input wire[ADDR_WIDTH-1:0] iReadAddress0, |
input wire[ADDR_WIDTH-1:0] iWriteAddress, |
input wire[DATA_WIDTH-1:0] iDataIn, |
output reg [DATA_WIDTH-1:0] oDataOut0 |
|
); |
|
reg [DATA_WIDTH-1:0] Ram [MEM_SIZE:0]; |
|
always @(posedge Clock) |
begin |
|
if (iWriteEnable) |
Ram[iWriteAddress] <= iDataIn; |
|
|
oDataOut0 <= Ram[iReadAddress0]; |
|
|
end |
endmodule |
|
|
/trunk/rtl/Module_TMemInterface.v
0,0 → 1,109
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
//-------------------------------------------------------------------------- |
module Module_TMemInterface |
( |
input wire Clock, |
input wire Reset, |
input wire iEnable, |
input wire [`DATA_ROW_WIDTH-1:0] iAddress, |
output wire [`DATA_ROW_WIDTH-1:0] oData, |
output wire oDone, |
|
input wire ACK_I, |
input wire GNT_I, |
input wire [`WB_WIDTH-1:0 ] DAT_I, |
|
//WB Output Signals |
output wire [`WB_WIDTH-1:0 ] ADR_O, |
output wire WE_O, |
output wire STB_O, |
output wire CYC_O |
|
|
); |
|
wire [3:0] wCurrentWord; |
wire wDone; |
assign oDone = wDone & iEnable; |
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD_DONE |
( |
.Clock(Clock), |
.Reset(Reset), |
.Enable( 1'b1 ), |
.D(wCurrentWord[3]), |
.Q(wDone) |
); |
|
|
//wire wShiftNow; |
assign WE_O = 1'b0; //we only read |
assign CYC_O = iEnable; |
|
|
|
wire[2:0] wLatchNow; |
FFD_POSEDGE_SYNCRONOUS_RESET # ( 3 ) FFD_LATHCNOW |
( |
.Clock(Clock), |
.Reset(Reset), |
.Enable( 1'b1 ), |
.D(wCurrentWord[2:0]), |
.Q(wLatchNow) |
); |
|
|
|
SHIFTLEFT_POSEDGE #(4) SHL |
( |
.Clock(Clock), |
.Enable(iEnable & GNT_I),//wShiftNow), |
.Reset(Reset | ~iEnable ), |
.Initial(4'b1), |
.O(wCurrentWord) |
|
); |
|
MUXFULLPARALELL_3SEL_WALKINGONE # ( `WB_WIDTH ) MUX1 |
( |
.Sel( wCurrentWord[2:0] ), |
.I3(iAddress[31:0]), |
.I2(iAddress[63:32]), |
.I1(iAddress[95:64]), |
.O1( ADR_O ) |
); |
|
|
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) FFDX |
( |
.Clock(Clock), |
.Reset(Reset), |
.Enable( wLatchNow[0] & GNT_I), |
.D(DAT_I), |
.Q(oData[95:64]) |
); |
|
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) FFDY |
( |
.Clock(Clock), |
.Reset(Reset), |
.Enable( wLatchNow[1] & GNT_I), |
.D(DAT_I), |
.Q(oData[63:32]) |
); |
|
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) FFDZ |
( |
.Clock(Clock), |
.Reset( Reset ), |
.Enable( wLatchNow[2] & GNT_I), |
.D(DAT_I), |
.Q(oData[31:0]) |
); |
|
endmodule |
//-------------------------------------------------------------------------- |
/trunk/rtl/Module_BusArbitrer.v
0,0 → 1,80
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
|
|
|
module Module_BusArbitrer |
( |
input wire Clock, |
input wire Reset, |
|
input wire [`MAX_CORES-1:0] iRequest, |
output wire [`MAX_CORES-1:0] oGrant, |
output wire [`MAX_CORE_BITS-1:0] oBusSelect |
); |
|
wire[`MAX_CORES-1:0] wCurrentMasterMask; |
wire[`MAX_CORE_BITS-1:0] wCurrentBusMaster; |
wire wCurrentRequest; |
|
//Just one requester can have the bus at a given |
//point in time, the mask makes sure this happens |
genvar i; |
generate |
for (i = 0; i < `MAX_CORES; i = i +1) |
begin : ARB |
assign oGrant[i] = iRequest[i] & wCurrentMasterMask[i]; |
end |
endgenerate |
|
|
|
//When a requester relinquishes the bus (by negating its [iRequest] signal), |
//the switch is turned to the next position |
//So while iRequest == 1 the ciruclar list will not move |
|
CIRCULAR_SHIFTLEFT_POSEDGE_EX # (`MAX_CORES) SHL_A |
( |
.Clock( Clock ), |
.Enable( ~wCurrentRequest ), |
.Reset( Reset ), |
.Initial(`MAX_CORES'b1), |
.O( wCurrentMasterMask ) |
|
); |
|
assign oBusSelect = wCurrentBusMaster; |
|
//Poll the current request |
assign wCurrentRequest = iRequest[ wCurrentBusMaster ]; |
|
|
UPCOUNTER_POSEDGE # (`MAX_CORE_BITS ) UP1 |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Initial( `MAX_CORE_BITS'd0 ), |
.Enable(~wCurrentRequest), |
.Q(wCurrentBusMaster) |
); |
|
endmodule |
/trunk/rtl/Theia.v
0,0 → 1,341
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
|
//--------------------------------------------------------------------------- |
module THEIA |
( |
|
input wire CLK_I, //Input clock |
input wire RST_I, //Input reset |
//Theia Interfaces |
input wire MST_I, //Master signal, THEIA enters configuration mode |
//when this gets asserted (see documentation) |
//Wish Bone Interface |
input wire [`WB_WIDTH-1:0] DAT_I, //Input data bus (Wishbone) |
input wire ACK_I, //Input ack |
output wire ACK_O, //Output ack |
input wire [`WB_WIDTH-1:0] ADR_I, //Input address |
input wire WE_I, //Input write enable |
input wire STB_I, //Strobe signal, see wishbone documentation |
input wire CYC_I, //Bus cycle signal, see wishbone documentation |
input wire [1:0] TGA_I, //Input address tag, see THEAI documentation |
input wire [`MAX_CORES-1:0] SEL_I, //The WishBone Master uses this signal to configure a specific core (TBD, not sure is needed) |
input wire [`MAX_CORES-1:0] RENDREN_I, |
|
input wire [`MAX_CORE_BITS-1:0] OMBSEL_I, //Output memory bank select |
input wire [`WB_WIDTH-1:0] OMADR_I, //Output adress (relative to current bank) |
output wire [`WB_WIDTH-1:0] OMEM_O, //Output data bus (Wishbone) |
|
input wire [`WB_WIDTH-1:0] TMDAT_I, |
input wire [`WB_WIDTH-1:0] TMADR_I, |
input wire TMWE_I, |
input wire [`MAX_TMEM_BANKS-1:0] TMSEL_I, |
//Control Register |
input wire [15:0] CREG_I, |
output wire HDL_O, |
input wire STDONE_I, |
input wire HDA_I, |
input wire HDLACK_I, |
output wire RCOMMIT_O, |
output wire DONE_O |
|
); |
|
|
|
|
wire [`MAX_TMEM_BANKS-1:0] wTMemWriteEnable; |
SELECT_1_TO_N # ( `MAX_TMEM_BANKS, `MAX_TMEM_BANKS ) TMWE_SEL |
( |
.Sel(TMSEL_I), |
.En(TMWE_I), |
.O(wTMemWriteEnable) |
); |
|
|
wire [`MAX_CORES-1:0] wDone; |
wire [`MAX_CORES-1:0] wBusGranted,wBusRequest; |
//wire [`WB_WIDTH-1:0] wDAT_O[`MAX_CORES-1:0]; |
//wire [`WB_WIDTH-1:0] wADR_O[`MAX_CORES-1:0]; |
//wire [1:0] wTGA_O[`MAX_CORES-1:0]; |
wire [`MAX_CORE_BITS-1:0] wBusSelect; |
|
|
//wire [`MAX_CORES-1:0] wSTB_O; |
//wire [`MAX_CORES-1:0] wWE_O; |
wire [`MAX_CORES-1:0]wACK_O; |
|
|
wire wOMem_WE[`MAX_CORES-1:0]; |
wire [`WB_WIDTH-1:0] wOMEM_Address[`MAX_CORES-1:0]; |
wire [`WB_WIDTH-1:0] wOMEM_Dat[`MAX_CORES-1:0]; |
|
wire [`MAX_CORES-1:0] wSTB_I; |
wire [`MAX_CORES-1:0] wMST_I; |
wire [`MAX_CORES-1:0] wACK_I; |
wire [`MAX_CORES-1:0] wCYC_I; |
wire [1:0] wTGA_I[`MAX_CORES-1:0]; |
|
|
|
wire [`WB_WIDTH-1:0] wTMEM_Data; |
wire [`WB_WIDTH-1:0] wTMEM_Address[`MAX_CORES-1:0]; |
wire [`WB_WIDTH-1:0] wTMEM_ReadAddr; |
wire [`MAX_CORES-1:0] wTMEM_Resquest; |
wire [`MAX_CORES-1:0] wTMEM_Granted; |
|
|
|
//CROSS-BAR cables |
|
|
|
wire [`WB_WIDTH-1:0] wCrossBarDataRow[`MAX_TMEM_BANKS-1:0]; //Horizontal grid Buses comming from each bank |
wire [`WB_WIDTH-1:0] wCrossBarDataCollumn[`MAX_CORES-1:0]; //Vertical grid buses comming from each core. |
wire [`WB_WIDTH-1:0] wTMemReadAdr[`MAX_CORES-1:0]; //Horizontal grid Buses comming from each core (virtual addr). |
wire [`WB_WIDTH-1:0] wCrossBarAdressCollumn[`MAX_CORES-1:0]; //Vertical grid buses comming from each core. (physical addr). |
wire [`WB_WIDTH-1:0] wCrossBarAddressRow[`MAX_TMEM_BANKS-1:0]; //Horizontal grid Buses comming from each bank. |
|
wire wCORE_2_TMEM__Req[`MAX_CORES-1:0]; |
wire [`MAX_TMEM_BANKS -1:0] wBankReadRequest[`MAX_CORES-1:0]; |
|
|
wire [`MAX_CORES-1:0] wBankReadGranted[`MAX_TMEM_BANKS-1:0]; |
wire wTMEM_2_Core__Grant[`MAX_CORES-1:0]; |
|
wire[`MAX_CORE_BITS-1:0] wCurrentCoreSelected[`MAX_TMEM_BANKS-1:0]; |
wire[`WIDTH-1:0] wCoreBankSelect[`MAX_CORES-1:0]; |
wire [`MAX_CORES-1:0] wHDL_O; |
|
|
wire [`MAX_CORES-1:0] wHostDataLatched; |
wire [`MAX_CORES-1:0] wRCOMMIT_O; |
wire [`MAX_CORES-1:0] wRCommited; |
|
|
assign RCOMMIT_O = wRCommited[0] & wRCommited[1] & wRCommited[2] & wRCommited[3]; |
assign HDL_O = wHostDataLatched[0] & wHostDataLatched[1] & wHostDataLatched[2] & wHostDataLatched[3]; |
assign DONE_O = wDone[0] & wDone[1] & wDone[2] & wDone[3]; |
|
|
|
//---------------------------------------------------------------- |
|
Module_BusArbitrer ARB1 |
( |
.Clock( CLK_I ), |
.Reset( RST_I ), |
.iRequest( wBusRequest ), |
.oGrant( wBusGranted ), |
.oBusSelect( wBusSelect ) |
|
); |
//---------------------------------------------------------------- |
|
wire wMaskedACK_O; |
assign wMaskedACK_O = (SEL_I & wACK_O) ? 1'b1 : 1'b0; |
assign ACK_O = ( MST_I ) ? wMaskedACK_O : wACK_O[ wBusSelect]; |
|
|
wire [`WB_WIDTH-1:0] wDataOut[`MAX_CORES-1:0]; |
assign OMEM_O = wDataOut[ OMBSEL_I ]; |
|
genvar i; |
generate |
for (i = 0; i < `MAX_CORES; i = i +1) |
begin : CORE |
assign wMST_I[i] = (SEL_I[i]) ? MST_I : 0; |
assign wSTB_I[i] = (SEL_I[i]) ? STB_I : 0; |
assign wCYC_I[i] = (SEL_I[i]) ? CYC_I : 0; |
assign wTGA_I[i] = (SEL_I[i]) ? TGA_I : 0; |
|
|
THEIACORE CTHEIA |
( |
.CLK_I( CLK_I ), |
.RST_I( RST_I ), |
.RENDREN_I( RENDREN_I[i] ), |
|
//Slave signals |
.ADR_I( ADR_I ), |
.WE_I( WE_I ), |
.STB_I( wSTB_I[i] ), |
.ACK_I( ACK_I ), |
.CYC_I( wCYC_I[i] ), |
.MST_I( wMST_I[i] ), |
.TGA_I( wTGA_I[i] ), |
.CREG_I( CREG_I ), |
|
//Master Signals |
.ACK_O( wACK_O[i] ), |
.CYC_O( wBusRequest[i] ), |
.GNT_I( wBusGranted[i] ), |
`ifdef DEBUG |
.iDebug_CoreID( i ), |
`endif |
|
.OMEM_WE_O( wOMem_WE[i] ), |
.OMEM_ADR_O( wOMEM_Address[i] ), |
.OMEM_DAT_O( wOMEM_Dat[i] ), |
|
.TMEM_DAT_I( wCrossBarDataCollumn[i] ), |
.TMEM_ADR_O( wTMemReadAdr[i] ), |
.TMEM_CYC_O( wCORE_2_TMEM__Req[i] ), |
.TMEM_GNT_I( wTMEM_2_Core__Grant[i] ), |
|
.HDA_I( HDA_I ), //Host data available |
.HDL_O( wHDL_O[i] ), //Host data Latched |
.HDLACK_I( ~HDL_O ), //Host data Latched ACK |
.STDONE_I( STDONE_I ), |
.RCOMMIT_O( wRCOMMIT_O[i] ), |
|
|
//Other |
.DAT_I( DAT_I ), |
.DONE_O( wDone[i] ) |
|
); |
|
UPCOUNTER_POSEDGE # (1) UP_RCOMMIT |
( |
.Clock( CLK_I ), |
.Reset( RST_I | HDLACK_I ), |
.Initial( 1'b0 ), |
.Enable( wRCOMMIT_O[i] ), |
.Q(wRCommited[i]) |
); |
|
UPCOUNTER_POSEDGE # (1) UP_GREADY |
( |
.Clock( CLK_I ), |
.Reset( RST_I | HDLACK_I ), |
.Initial( 1'b0 ), |
.Enable( wHDL_O[i] ), |
.Q(wHostDataLatched[i]) |
); |
|
RAM_SINGLE_READ_PORT # ( `WB_WIDTH, `WB_WIDTH, 250000 ) OMEM //500000 ) OMEM |
( |
.Clock( CLK_I ), |
.iWriteEnable( wOMem_WE[i] ), |
.iWriteAddress( wOMEM_Address[i] ), |
.iDataIn( wOMEM_Dat[i] ), |
.iReadAddress0( OMADR_I ), |
.oDataOut0( wDataOut[i] ) |
|
); |
|
|
//If there are "n" banks, memory location "X" would reside in bank number X mod n. |
//X mod 2^n == X & (2^n - 1) |
assign wCoreBankSelect[i] = (wTMemReadAdr[i] & (`MAX_TMEM_BANKS-1)); |
|
//Each core has 1 bank request slot |
//Each slot has MAX_TMEM_BANKS bits. Only 1 bit can |
//be 1 at any given point in time. All bits zero means, |
//we are not requesting to read from any memory bank. |
SELECT_1_TO_N # ( `WIDTH, `MAX_CORES ) READDRQ |
( |
.Sel(wCoreBankSelect[ i]), |
.En(wCORE_2_TMEM__Req[i]), |
.O(wBankReadRequest[i]) |
); |
|
//The address coming from the core is virtual adress, meaning it assumes linear |
//address space, however, since memory is interleaved in a n-way memory we transform |
//virtual adress into physical adress (relative to the bank) like this |
//fadr = vadr / n = vadr >> log2(n) |
|
assign wCrossBarAdressCollumn[i] = (wTMemReadAdr[i] >> `MAX_CORE_BITS); |
|
//Connect the granted signal to Arbiter of the Bank we want to read from |
assign wTMEM_2_Core__Grant[i] = wBankReadGranted[wCoreBankSelect[i]][i]; |
|
//Connect the request signal to Arbiter of the Bank we want to read from |
//assign wBankReadRequest[wCoreBankSelect[i]][i] = wCORE_2_TMEM__Req[i]; |
|
end |
endgenerate |
|
|
////////////// CROSS-BAR INTERCONECTION////////////////////////// |
|
genvar Core,Bank; |
generate |
for (Bank = 0; Bank < `MAX_TMEM_BANKS; Bank = Bank + 1) |
begin : BANK |
|
//The memory bank itself |
RAM_SINGLE_READ_PORT # ( `WB_WIDTH, `WB_WIDTH, 50000 ) TMEM |
( |
.Clock( CLK_I ), |
.iWriteEnable( wTMemWriteEnable[Bank] ), |
.iWriteAddress( TMADR_I ), |
.iDataIn( TMDAT_I ), |
.iReadAddress0( wCrossBarAddressRow[Bank] ), //Connect to the Row of the grid |
.oDataOut0( wCrossBarDataRow[Bank] ) //Connect to the Row of the grid |
|
); |
|
//Arbiter will Round-Robin Cores attempting to read from the same Bank |
//at a given point in time |
wire [`MAX_CORES-1:0] wBankReadGrantedDelay[`MAX_TMEM_BANKS-1:0]; |
Module_BusArbitrer ARB_TMEM |
( |
.Clock( CLK_I ), |
.Reset( RST_I ), |
.iRequest( {wBankReadRequest[3][Bank],wBankReadRequest[2][Bank],wBankReadRequest[1][Bank],wBankReadRequest[0][Bank]}), |
.oGrant( wBankReadGrantedDelay[Bank] ), //The bit of the core granted to read from this Bank |
.oBusSelect( wCurrentCoreSelected[Bank] ) //The index of the core granted to read from this Bank |
|
); |
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( `MAX_CORES ) FFD_GNT |
( |
.Clock(CLK_I), |
.Reset(RST_I), |
.Enable( 1'b1 ), |
.D(wBankReadGrantedDelay[Bank]), |
.Q(wBankReadGranted[Bank]) |
); |
|
|
//Create the Cross-Bar interconnection grid now, rows are coonected to the memory banks, |
//while collumns are connected to the cores, 2 or more cores can not read from the same |
//bank at any given point in time |
for (Core = 0; Core < `MAX_CORES; Core = Core + 1) |
begin: CORE_CONNECT |
//Connect the Data Collum of this core to the Data Row of current bank, only if the Core is looking for data stored in this bank |
assign wCrossBarDataCollumn[ Core ] = ( wCoreBankSelect[ Core ] == Bank ) ? wCrossBarDataRow[ Bank ] : `WB_WIDTH'bz; |
//Connect the Address Row of this Bank to the Address Column of the core, only if the Arbiter selected this core for reading |
assign wCrossBarAddressRow[ Bank ] = ( wCurrentCoreSelected[ Bank ] == Core ) ? wCrossBarAdressCollumn[Core]: `WB_WIDTH'bz; |
|
end |
|
end |
endgenerate |
|
////////////// CROSS-BAR INTERCONECTION////////////////////////// |
//---------------------------------------------------------------- |
|
endmodule |
//--------------------------------------------------------------------------- |
/trunk/rtl/Module_ControlRegister.v
0,0 → 1,28
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
|
//------------------------------------------------------------------- |
module ControlRegister |
( |
input wire Clock, |
input wire Reset, |
input wire[15:0] iControlRegister, |
output wire[15:0] oControlRegister |
); |
|
reg [15:0] rControlRegister; |
|
assign oControlRegister = rControlRegister; |
|
always @ (posedge Clock) |
begin |
if ( Reset ) |
rControlRegister <= 16'b0; |
else |
begin |
rControlRegister <= iControlRegister; |
end |
end |
|
endmodule |
//------------------------------------------------------------------- |
/trunk/rtl/Module_InstructionDecode.v
0,0 → 1,156
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
|
module InstructionDecode |
( |
input wire Clock, |
input wire Reset, |
input wire iInstructionAvailable, |
input wire[`INSTRUCTION_WIDTH-1:0] iEncodedInstruction, |
input wire[`DATA_ROW_WIDTH-1:0] iRamValue0, |
input wire[`DATA_ROW_WIDTH-1:0] iRamValue1, |
output wire[`DATA_ADDRESS_WIDTH-1:0] oRamAddress0,oRamAddress1, |
output wire[`INSTRUCTION_OP_LENGTH-1:0] oOperation, |
output wire [`DATA_ROW_WIDTH-1:0] oSource0,oSource1, |
output wire [`DATA_ADDRESS_WIDTH-1:0] oDestination, |
input wire [`DATA_ROW_WIDTH-1:0] iDataForward, |
input wire [`DATA_ADDRESS_WIDTH-1:0] iLastDestination, |
|
`ifdef DEBUG |
input wire [`ROM_ADDRESS_WIDTH-1:0] iDebug_CurrentIP, |
output wire [`ROM_ADDRESS_WIDTH-1:0] oDebug_CurrentIP, |
`endif |
|
//input wire [`ROM_ADDRESS_WIDTH-1:0] iIP, |
//output reg [`ROM_ADDRESS_WIDTH-1:0] oReturnAddress, |
output wire oDataReadyForExe |
|
); |
wire wInmediateOperand; |
wire [`DATA_ROW_WIDTH-1:0] wSource0,wSource1; |
wire wTriggerSource0DataForward,wTriggerSource1DataForward; |
wire wSource0AddrssEqualsLastDestination,wSource1AddrssEqualsLastDestination; |
|
`ifdef DEBUG |
assign oDebug_CurrentIP = iDebug_CurrentIP; |
`endif |
//See if operation takes scalar argument |
assign wInmediateOperand = iEncodedInstruction[`INSTRUCTION_IMM_BITPOS]; |
|
//Has the value of the first argument fetched from IMEM |
assign wSource0 = iRamValue0; |
//Has the value of the second argument fetched from IMEM, or the value of the |
//destinatin register in case of scalar operation |
assign wSource1 = ( wInmediateOperand ) ? {oRamAddress1,iEncodedInstruction[15:0] ,32'b0,32'b0} : iRamValue1; //{oRamAddress1,oRamAddress0,32'b0,32'b0} : iRamValue1; |
|
//Data forwarding logic |
assign wSource0AddrssEqualsLastDestination = (oRamAddress0 == iLastDestination) ? 1'b1: 1'b0; |
assign wSource1AddrssEqualsLastDestination = (oRamAddress1 == iLastDestination) ? 1'b1: 1'b0; |
assign wTriggerSource0DataForward = wSource0AddrssEqualsLastDestination; |
assign wTriggerSource1DataForward = wSource1AddrssEqualsLastDestination && !wInmediateOperand; |
|
//The data address to fetch from IMEM |
assign oRamAddress1 = iEncodedInstruction[31:16]; |
|
//If operation takes a scalar value, then ask IMEM |
//for the previous value of the destination ([47:32]) |
//and have this value ready at oRamAddress0 |
MUXFULLPARALELL_16bits_2SEL RAMAddr0MUX |
( |
.Sel( wInmediateOperand ), |
.I1( iEncodedInstruction[15:0] ), |
.I2( iEncodedInstruction[47:32] ), |
.O1( oRamAddress0 ) |
); |
|
|
//One clock cycle after the new instruction becomes |
//available to IDU, it should be decoded and ready |
//for execution |
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD1 |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable(1'b1), |
.D( iInstructionAvailable ), |
.Q( oDataReadyForExe ) |
); |
|
/* |
wire IsCall; |
assign IsCall = ( oOperation == `CALL ) ? 1'b1 : 1'b0; |
always @ (posedge IsCall) |
oReturnAddress <= iIP; |
*/ |
/* |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `ROM_ADDRESS_WIDTH ) FFRETURNADDR |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable( IsCall ), |
.D( iIP ), |
.Q( oReturnAddress ) |
); |
*/ |
|
|
//Latch the Operation |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `INSTRUCTION_OP_LENGTH ) FFD3 |
( |
.Clock(Clock), |
.Reset(Reset), |
.Enable(iInstructionAvailable), |
.D(iEncodedInstruction[`INSTRUCTION_WIDTH-1:`INSTRUCTION_WIDTH-`INSTRUCTION_OP_LENGTH]), |
.Q( oOperation ) |
); |
//Latch the Destination |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `DATA_ADDRESS_WIDTH ) FFD2 |
( |
.Clock(Clock), |
.Reset(Reset), |
.Enable(iInstructionAvailable), |
.D(iEncodedInstruction[47:32]), |
.Q(oDestination ) |
); |
|
|
//Once we made a decicions if the Sources must be forwarded or not, a series of muxes |
//are used to routed the correct data into the decoded Source outputs |
|
MUXFULLPARALELL_96bits_2SEL Source0_Mux |
( |
.Sel( wTriggerSource0DataForward ), |
.I1( wSource0 ), |
.I2( iDataForward ), |
.O1( oSource0 ) |
); |
|
MUXFULLPARALELL_96bits_2SEL Source1_Mux |
( |
.Sel( wTriggerSource1DataForward ), |
.I1( wSource1 ), |
.I2( iDataForward ), |
.O1( oSource1 ) |
); |
|
endmodule |
|
/trunk/rtl/Module_WishBoneMaster.v
0,0 → 1,147
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
/* |
In order to read the geometry, we will behave as a master. |
Performing single Reads Bus cycles should be sufficient. |
Choosing 32 bit for bus width for simplicity. |
*/ |
|
module WishBoneMasterUnit |
( |
//WB Input signals |
input wire CLK_I, |
input wire RST_I, |
input wire ACK_I, |
input wire GNT_I, //granted signal from bus arbiter |
input wire [`WB_WIDTH-1:0 ] DAT_I, |
output wire [`WB_WIDTH-1:0] DAT_O, |
|
|
//WB Output Signals |
output wire [`WB_WIDTH-1:0 ] ADR_O, |
output wire WE_O, |
output wire STB_O, |
output wire CYC_O, |
output wire [1:0] TGC_O, |
|
//Signals from inside the GPU |
input wire iEnable, |
input wire iBusCyc_Type, |
input wire [`WIDTH-1:0 ] iAddress, |
input wire iAddress_Set, |
output wire oDataReady, |
input wire [`WIDTH-1:0 ] iData, |
output wire [`WIDTH-1:0 ] oData |
|
|
); |
wire wReadOperation; |
wire wEnable; |
assign wEnable = iEnable & GNT_I; |
//If CYC_O is 1, it means we are requesting bus ownership |
assign CYC_O = iEnable; |
|
assign wReadOperation = (iBusCyc_Type == `WB_SIMPLE_READ_CYCLE) ? 1 : 0; |
assign WE_O = (iBusCyc_Type == `WB_SIMPLE_WRITE_CYCLE && wEnable) ? 1 : 0; |
|
|
wire wEnable_Delayed; |
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD88 |
( |
.Clock(CLK_I), |
.Reset(RST_I), |
.Enable(1'b1 ), |
.D(wEnable), |
.Q(wEnable_Delayed) |
); |
|
|
|
//We only start Strobbing 1 cycle after iEnable and only |
//if iEnable is 1 and if GNT_I is 1 (meaning we own the bus) |
assign STB_O = wEnable_Delayed & ~ACK_I & wEnable; |
|
|
assign DAT_O = (wReadOperation | ~wEnable ) ? `WB_WIDTH'bz : iData; |
|
wire [`WB_WIDTH-1:0 ] wReadADR_O,wWriteADR_O; |
assign ADR_O = ( wReadOperation ) ? wReadADR_O : wWriteADR_O; |
|
//The ADR_O, it increments with each ACK_I, and it resets |
//to the value iAddress everytime iAddress_Set is 1. |
UPCOUNTER_POSEDGE # (`WIDTH) WBM_O_READ_ADDRESS |
( |
.Clock(CLK_I), |
.Reset( iAddress_Set ), |
.Enable((ACK_I & GNT_I) | iAddress_Set), |
.Initial(iAddress), |
.Q(wReadADR_O) |
); |
wire wDelayWE; |
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD3 |
( |
.Clock(CLK_I), |
.Reset(RST_I), |
.Enable(1'b1), |
.D(WE_O), |
.Q(wDelayWE) |
); |
|
UPCOUNTER_POSEDGE # (`WIDTH) WBM_O_WRITE_ADDRESS |
( |
.Clock(CLK_I), |
.Reset( iAddress_Set ),//RST_I ), |
.Enable( (wDelayWE & ACK_I ) | iAddress_Set), |
.Initial(iAddress),//`WIDTH'b0), |
.Q(wWriteADR_O) |
); |
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) FFD1 |
( |
.Clock(ACK_I), |
.Reset(~wEnable), |
.Enable(wReadOperation ), |
.D(DAT_I), |
.Q(oData) |
); |
|
wire wDelayDataReady; |
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD2 |
( |
.Clock(CLK_I), |
.Reset(~wEnable), |
.Enable(wReadOperation), |
.D(ACK_I), |
.Q(wDelayDataReady) |
); |
/* |
always @ (posedge wDelayDataReady) |
begin |
$display("WBM Got data: %h ",oData); |
$display("oDataReady = %d",oDataReady ); |
end |
*/ |
|
assign oDataReady = wDelayDataReady & wEnable; |
|
endmodule |
|
/trunk/rtl/Module_ROM.v
0,0 → 1,703
|
|
`define ONE (32'h1 << `SCALE) |
|
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
|
/* |
I can't synthesize roms, the rom needs to be adapted depending on the |
final target silicon. |
*/ |
|
|
//-------------------------------------------------------- |
module ROM |
( |
input wire[`ROM_ADDRESS_WIDTH-1:0] Address, |
`ifdef DEBUG |
input wire [`MAX_CORES-1:0] iDebug_CoreID, |
`endif |
output reg [`INSTRUCTION_WIDTH-1:0] I |
); |
|
|
always @( Address ) |
begin |
case (Address) |
|
//Hardcoded stuff :( |
`define RAY_INSIDE_BOX `R3 |
`define CURRENT_LIGHT_POS `CREG_FIRST_LIGTH //TODO: CAHNEG T |
`define CURRENT_LIGHT_DIFFUSE 16'h6 |
|
//----------------------------------------------------------------- |
`define TAG_PIXELSHADER 16'd278 |
`define TAG_USERCONSTANTS 16'd276 |
`define TAG_PSU_UCODE_ADRESS2 16'd248 |
`define TAG_PSU_UCODE_ADRESS 16'd232 |
`define LABEL_TCC_EXIT 16'd231 |
`define TAG_TCC_UCODE_ADDRESS 16'd190 |
`define LABEL_BIU4 16'd189 |
`define LABEL_BIU3 16'd179 |
`define LABEL_BIU2 16'd176 |
`define LABEL_BIU1 16'd174 |
`define TAG_BIU_UCODE_ADDRESS 16'd157 |
`define LABEL_HIT 16'd155 |
`define LABEL15 16'd153 |
`define LABEL14 16'd151 |
`define LABEL13 16'd149 |
`define LABEL_TEST_XY_PLANE 16'd144 |
`define LABEL12 16'd142 |
`define LABEL11 16'd140 |
`define LABEL10 16'd138 |
`define LABEL_TEST_XZ_PLANE 16'd132 |
`define LABEL9 16'd130 |
`define LABEL8 16'd128 |
`define LABEL7 16'd126 |
`define LABEL_TEST_YZ_PLANE 16'd120 |
`define LABEL_RAY_INSIDE_BOX 16'd117 |
`define LABEL_ELSEZ 16'd116 |
`define LABEL6 16'd113 |
`define LABEL_ELESE_IFZ 16'd109 |
`define LABEL5 16'd106 |
`define LABEL_TEST_RAY_Z_ORIGEN 16'd102 |
`define LABEL_ELSEY 16'd101 |
`define LABEL4 16'd98 |
`define LABEL_ELESE_IFY 16'd94 |
`define LABEL3 16'd91 |
`define LABEL_TEST_RAY_Y_ORIGEN 16'd87 |
`define LABEL_ELSEX 16'd86 |
`define LABEL2 16'd83 |
`define LABEL_ELSE_IFX 16'd79 |
`define LABEL1 16'd76 |
`define LABEL_TEST_RAY_X_ORIGEN 16'd72 |
`define TAG_AABBIU_UCODE_ADDRESS 16'd69 |
`define LABEL_ALLDONE 16'd67 |
`define LABEL_NPG_NEXT_ROW 16'd63 |
`define TAG_NPG_UCODE_ADDRESS 16'd55 |
`define TAG_RGU_UCODE_ADDRESS 16'd47 |
`define TAG_CPPU_UCODE_ADDRESS 16'd44 |
`define LABEL_IS_NO_HIT 16'd43 |
`define LABEL_IS_HIT 16'd39 |
`define TAG_ADRR_MAIN 16'd37 |
|
|
//------------------------------------------------------------------------- |
//Default values for some registers after reset |
//------------------------------------------------------------------------- |
//This is the first code that gets executed after the machine is |
//externally configured ie after the MST_I goes from 1 to zero. |
//It sets initial values for some of the internal registers |
|
0: I = { `ZERO ,`CREG_LAST_t ,`VOID ,`VOID }; |
//Set the last 't' to very positive value(500) |
1: I = { `SETX ,`CREG_LAST_t ,32'h1F40000 }; |
2: I = { `ZERO ,`OREG_PIXEL_COLOR ,`VOID ,`VOID }; |
3: I = { `COPY ,`CREG_PIXEL_2D_POSITION ,`CREG_PIXEL_2D_INITIAL_POSITION ,`VOID }; |
|
|
//Calculate the initial linear address for ADR_O |
//this is: (X_initial + RESOLUTION_Y*Y_intial) * 3. |
//Notice that we need to use 'unscaled' ie. integer |
//values because the resuts of the multiplication by |
//the resoluction is to large to fit a fixed point |
//representation. |
|
4: I = { `COPY ,`R1 ,`CREG_RESOLUTION ,`VOID }; |
5: I = { `UNSCALE ,`R1 ,`R1 ,`VOID }; |
6: I = { `SETX ,`R1 ,32'h1 }; |
7: I = { `SETZ ,`R1 ,32'h0 }; |
8: I = { `COPY ,`R2 ,`CREG_PIXEL_2D_INITIAL_POSITION ,`VOID }; |
9: I = { `UNSCALE ,`R2 ,`R2 ,`VOID }; |
|
//Ok lets start by calculating RESOLUTION_Y*Y_intial |
10: I = { `IMUL ,`R1 ,`R1 ,`R2 }; |
11: I = { `COPY ,`R2 ,`R1 ,`VOID }; |
12: I = { `SWIZZLE3D ,`R2 ,`SWIZZLE_YYY }; |
|
//now X_initial + RESOLUTION_Y*Y_intial |
13: I = { `ADD ,`R3 ,`R1 ,`R2 }; |
14: I = { `COPY ,`R2 ,`R1 ,`VOID }; |
15: I = { `SWIZZLE3D ,`R2 ,`SWIZZLE_ZZZ }; |
16: I = { `ADD ,`R3 ,`R3 ,`R2 }; |
17: I = { `SWIZZLE3D ,`R3 ,`SWIZZLE_XXX }; |
|
//finally multiply by 3 to get: |
//(X_initial + RESOLUTION_Y*Y_intial) * 3 voila! |
18: I = { `SETX ,`R2 ,32'h3 }; |
19: I = { `SWIZZLE3D ,`R2 ,`SWIZZLE_XXX }; |
20: I = { `IMUL ,`CREG_PIXEL_PITCH ,`R3 ,`R2 }; |
|
//By this point you should be wondering why not |
//just do DOT R1 [1 Resolution_Y 0] [X_intial Y_intial 0 ]? |
//well because DOT uses fixed point and the result may not |
//fit :( |
|
//Transform from fixed point to integer |
//UNSCALE CREG_PIXEL_PITCH CREG_PIXEL_PITCH VOID |
21: I = { `COPY ,`OREG_ADDR_O ,`CREG_PIXEL_PITCH ,`VOID }; |
|
22: I = { `SETX ,`CREG_3 ,32'h3 }; |
23: I = { `SWIZZLE3D ,`CREG_3 ,`SWIZZLE_XXX }; |
|
24: I = { `SETX ,`CREG_012 ,32'h0 }; |
25: I = { `SETY ,`CREG_012 ,32'h1 }; |
26: I = { `SETZ ,`CREG_012 ,32'h2 }; |
27: I = { `COPY ,`CREG_CURRENT_OUTPUT_PIXEL ,`CREG_012 ,`VOID }; |
28: I = { `ZERO ,`CREG_TEXTURE_COLOR ,`VOID ,`VOID }; |
29: I = { `ZERO ,`CREG_ZERO ,`VOID ,`VOID }; |
|
30: I = { `ZERO ,`R1 ,`VOID ,`VOID }; |
31: I = { `ZERO ,`R2 ,`VOID ,`VOID }; |
32: I = { `ZERO ,`R3 ,`VOID ,`VOID }; |
33: I = { `ZERO ,`R4 ,`VOID ,`VOID }; |
34: I = { `ZERO ,`R5 ,`VOID ,`VOID }; |
35: I = { `ZERO ,`R99 ,`VOID ,`VOID }; |
36: I = { `RETURN ,`RT_TRUE }; |
|
//---------------------------------------------- |
//TAG_ADRR_MAIN: |
|
37: I = { `CALL ,`ENTRYPOINT_ADRR_BIU ,`VOID ,`VOID }; |
38: I = { `JEQX ,`LABEL_IS_NO_HIT ,`R99 ,`CREG_ZERO }; |
|
//LABEL_IS_HIT: |
39: I = { `CALL ,`ENTRYPOINT_ADRR_TCC ,`VOID ,`VOID }; |
40: I = { `NOP ,`RT_FALSE }; |
41: I = { `RETURN ,`RT_TRUE }; |
42: I = { `NOP ,`RT_FALSE }; |
|
//LABEL_IS_NO_HIT: |
43: I = { `RETURN ,`RT_FALSE }; |
|
|
//---------------------------------------------------------------------- |
//Micro code for CPPU |
//TAG_CPPU_UCODE_ADDRESS: |
|
|
44: I = { `SUB ,`R1 ,`CREG_PROJECTION_WINDOW_MAX ,`CREG_PROJECTION_WINDOW_MIN }; |
45: I = { `DIV ,`CREG_PROJECTION_WINDOW_SCALE ,`R1 ,`CREG_RESOLUTION }; |
46: I = { `RETURN ,`RT_FALSE }; |
|
//---------------------------------------------------------------------- |
//Micro code for RGU |
//TAG_RGU_UCODE_ADDRESS: |
|
|
47: I = { `MUL ,`R1 ,`CREG_PIXEL_2D_POSITION ,`CREG_PROJECTION_WINDOW_SCALE }; |
48: I = { `ADD ,`R1 ,`R1 ,`CREG_PROJECTION_WINDOW_MIN }; |
49: I = { `SUB ,`CREG_UNORMALIZED_DIRECTION ,`R1 ,`CREG_CAMERA_POSITION }; |
50: I = { `MAG ,`R2 ,`CREG_UNORMALIZED_DIRECTION ,`VOID }; |
51: I = { `DIV ,`CREG_RAY_DIRECTION ,`CREG_UNORMALIZED_DIRECTION ,`R2 }; |
52: I = { `DEC ,`CREG_LAST_COL ,`CREG_PIXEL_2D_FINAL_POSITION ,`VOID }; |
53: I = { `SETX ,`CREG_LAST_t ,32'h1F40000 }; |
|
54: I = { `RETURN ,`RT_FALSE }; |
//---------------------------------------------------------------------- |
//Next Pixel generation Code (NPG) |
//TAG_NPG_UCODE_ADDRESS: |
|
55: I = { `ZERO ,`CREG_TEXTURE_COLOR ,`VOID ,`VOID }; |
56: I = { `SETX ,`CREG_TEXTURE_COLOR ,32'h60000 }; |
57: I = { `ADD ,`CREG_CURRENT_OUTPUT_PIXEL ,`CREG_CURRENT_OUTPUT_PIXEL ,`CREG_3 }; |
|
58: I = { `ADD ,`CREG_PIXEL_PITCH ,`CREG_PIXEL_PITCH ,`CREG_3 }; |
59: I = { `COPY ,`OREG_ADDR_O ,`CREG_PIXEL_PITCH ,`VOID }; |
60: I = { `JGEX ,`LABEL_NPG_NEXT_ROW ,`CREG_PIXEL_2D_POSITION ,`CREG_LAST_COL }; |
61: I = { `INCX ,`CREG_PIXEL_2D_POSITION ,`CREG_PIXEL_2D_POSITION ,`VOID }; |
62: I = { `RETURN ,`RT_TRUE }; |
|
//LABEL_NPG_NEXT_ROW: |
63: I = { `SETX ,`CREG_PIXEL_2D_POSITION ,32'h0 }; |
64: I = { `INCY ,`CREG_PIXEL_2D_POSITION ,`CREG_PIXEL_2D_POSITION ,`VOID }; |
65: I = { `JGEY ,`LABEL_ALLDONE ,`CREG_PIXEL_2D_POSITION ,`CREG_PIXEL_2D_FINAL_POSITION }; |
66: I = { `RETURN ,`RT_TRUE }; |
|
//LABEL_ALLDONE: |
67: I = { `NOP ,`VOID ,`VOID }; |
68: I = { `RETURN ,`RT_FALSE }; |
|
//---------------------------------------------------------------------- |
//Micro code for AABBIU |
//TAG_AABBIU_UCODE_ADDRESS: |
|
69: I = { `ZERO ,`R3 ,`VOID ,`VOID }; |
70: I = { `SETX ,`CREG_LAST_t ,32'h1F40000 }; |
71: I = { `RETURN ,`RT_TRUE }; |
|
//LABEL_TEST_RAY_X_ORIGEN: |
72: I = { `JGEX ,`LABEL_ELSE_IFX ,`CREG_CAMERA_POSITION ,`CREG_AABBMIN }; |
73: I = { `SUB ,`R1 ,`CREG_AABBMIN ,`CREG_CAMERA_POSITION }; |
74: I = { `JLEX ,`LABEL1 ,`R1 ,`CREG_UNORMALIZED_DIRECTION }; |
75: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL1: |
76: I = { `SETX ,`RAY_INSIDE_BOX ,32'd0 }; |
77: I = { `DIV ,`R6 ,`R1 ,`CREG_UNORMALIZED_DIRECTION }; |
78: I = { `JMP ,`LABEL_TEST_RAY_Y_ORIGEN ,`VOID ,`VOID }; |
|
//LABEL_ELSE_IFX: |
79: I = { `JLEX ,`LABEL_ELSEX ,`CREG_CAMERA_POSITION ,`CREG_AABBMAX }; |
80: I = { `SUB ,`R1 ,`CREG_AABBMAX ,`CREG_CAMERA_POSITION }; |
81: I = { `JGEX ,`LABEL2 ,`R1 ,`CREG_UNORMALIZED_DIRECTION }; |
82: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL2: |
83: I = { `SETX ,`RAY_INSIDE_BOX ,32'd0 }; |
84: I = { `DIV ,`R6 ,`R1 ,`CREG_UNORMALIZED_DIRECTION }; |
85: I = { `JMP ,`LABEL_TEST_RAY_Y_ORIGEN ,`VOID ,`VOID }; |
//LABEL_ELSEX: |
86: I = { `SETX ,`R5 ,32'b1 }; |
|
//LABEL_TEST_RAY_Y_ORIGEN: |
87: I = { `JGEY ,`LABEL_ELESE_IFY ,`CREG_CAMERA_POSITION ,`CREG_AABBMIN }; |
88: I = { `SUB ,`R1 ,`CREG_AABBMIN ,`CREG_CAMERA_POSITION }; |
89: I = { `JLEY ,`LABEL3 ,`R1 ,`CREG_UNORMALIZED_DIRECTION }; |
90: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL3: |
91: I = { `SETX ,`RAY_INSIDE_BOX ,32'd0 }; |
92: I = { `DIV ,`R6 ,`R1 ,`CREG_UNORMALIZED_DIRECTION }; |
93: I = { `JMP ,`LABEL_TEST_RAY_Z_ORIGEN ,`VOID ,`VOID }; |
|
//LABEL_ELESE_IFY: |
94: I = { `JLEY ,`LABEL_ELSEY ,`CREG_CAMERA_POSITION ,`CREG_AABBMAX }; |
95: I = { `SUB ,`R1 ,`CREG_AABBMAX ,`CREG_CAMERA_POSITION }; |
96: I = { `JGEY ,`LABEL4 ,`R1 ,`CREG_UNORMALIZED_DIRECTION }; |
97: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL4: |
98: I = { `SETX ,`RAY_INSIDE_BOX ,32'd0 }; |
99: I = { `DIV ,`R6 ,`R1 ,`CREG_UNORMALIZED_DIRECTION }; |
100: I = { `JMP ,`LABEL_TEST_RAY_Z_ORIGEN ,`VOID ,`VOID }; |
|
//LABEL_ELSEY: |
101: I = { `SETY ,`R5 ,32'b1 }; |
|
//LABEL_TEST_RAY_Z_ORIGEN: |
102: I = { `JGEZ ,`LABEL_ELESE_IFZ ,`CREG_CAMERA_POSITION ,`CREG_AABBMIN }; |
103: I = { `SUB ,`R1 ,`CREG_AABBMIN ,`CREG_CAMERA_POSITION }; |
104: I = { `JLEZ ,`LABEL5 ,`R1 ,`CREG_UNORMALIZED_DIRECTION }; |
105: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL5: |
106: I = { `SETX ,`RAY_INSIDE_BOX ,32'd0 }; |
107: I = { `DIV ,`R6 ,`R1 ,`CREG_UNORMALIZED_DIRECTION }; |
108: I = { `JMP ,`LABEL_RAY_INSIDE_BOX ,`VOID ,`VOID }; |
|
//LABEL_ELESE_IFZ: |
109: I = { `JLEZ ,`LABEL_ELSEZ ,`CREG_CAMERA_POSITION ,`CREG_AABBMAX }; |
110: I = { `SUB ,`R1 ,`CREG_AABBMAX ,`CREG_CAMERA_POSITION }; |
111: I = { `JGEZ ,`LABEL6 ,`R1 ,`CREG_UNORMALIZED_DIRECTION }; |
112: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL6: |
113: I = { `SETX ,`RAY_INSIDE_BOX ,32'd0 }; |
114: I = { `DIV ,`R6 ,`R1 ,`CREG_UNORMALIZED_DIRECTION }; |
115: I = { `JMP ,`LABEL_RAY_INSIDE_BOX ,`VOID ,`VOID }; |
|
//LABEL_ELSEZ: |
116: I = { `SETZ ,`R5 ,32'b1 }; |
|
//LABEL_RAY_INSIDE_BOX: |
117: I = { `ZERO ,`R1 ,`VOID ,`VOID }; |
118: I = { `JEQX ,`LABEL_TEST_YZ_PLANE ,`R1 ,`RAY_INSIDE_BOX }; |
//BUG need a NOP here else pipeline gets confused |
119: I = { `RETURN ,`RT_TRUE }; |
|
//LABEL_TEST_YZ_PLANE: |
120: I = { `JNEX ,`LABEL_TEST_XZ_PLANE ,`R5 ,`R1 }; |
121: I = { `SWIZZLE3D ,`R6 ,`SWIZZLE_XXX }; |
122: I = { `MUL ,`R2 ,`CREG_UNORMALIZED_DIRECTION ,`R6 }; |
123: I = { `ADD ,`R2 ,`R2 ,`CREG_CAMERA_POSITION }; |
124: I = { `JGEY ,`LABEL7 ,`R2 ,`CREG_AABBMIN }; |
125: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL7: |
126: I = { `JLEY ,`LABEL8 ,`R2 ,`CREG_AABBMAX }; |
127: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL8: |
128: I = { `JGEZ ,`LABEL9 ,`R2 ,`CREG_AABBMIN }; |
129: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL9: |
130: I = { `JLEZ ,`LABEL_TEST_XZ_PLANE ,`R2 ,`CREG_AABBMAX }; |
131: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL_TEST_XZ_PLANE: |
132: I = { `JNEY ,`LABEL_TEST_XY_PLANE ,`R5 ,`R1 }; |
133: I = { `SWIZZLE3D ,`R6 ,`SWIZZLE_YYY }; |
134: I = { `MUL ,`R2 ,`CREG_UNORMALIZED_DIRECTION ,`R6 }; |
135: I = { `ADD ,`R2 ,`R2 ,`CREG_CAMERA_POSITION }; |
136: I = { `JGEX ,`LABEL10 ,`R2 ,`CREG_AABBMIN }; |
137: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL10: |
138: I = { `JLEX ,`LABEL11 ,`R2 ,`CREG_AABBMAX }; |
139: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL11: |
140: I = { `JGEZ ,`LABEL12 ,`R2 ,`CREG_AABBMIN }; |
141: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL12: |
142: I = { `JLEZ ,`LABEL_TEST_XY_PLANE ,`R2 ,`CREG_AABBMAX }; |
143: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL_TEST_XY_PLANE: |
144: I = { `SWIZZLE3D ,`R6 ,`SWIZZLE_ZZZ }; |
145: I = { `MUL ,`R2 ,`CREG_UNORMALIZED_DIRECTION ,`R6 }; |
146: I = { `ADD ,`R2 ,`R2 ,`CREG_CAMERA_POSITION }; |
147: I = { `JGEX ,`LABEL13 ,`R2 ,`CREG_AABBMIN }; |
148: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL13: |
149: I = { `JLEX ,`LABEL14 ,`R2 ,`CREG_AABBMAX }; |
150: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL14: |
151: I = { `JGEY ,`LABEL15 ,`R2 ,`CREG_AABBMIN }; |
152: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL15: |
153: I = { `JLEY ,`LABEL_HIT ,`R2 ,`CREG_AABBMAX }; |
154: I = { `RETURN ,`RT_FALSE }; |
|
//LABEL_HIT: |
155: I = { `SETX ,`CREG_LAST_t ,32'h1F40000 }; |
156: I = { `RETURN ,`RT_TRUE }; |
|
//------------------------------------------------------------------------ |
//BIU Micro code |
//TAG_BIU_UCODE_ADDRESS: |
|
157: I = { `ZERO ,`OREG_PIXEL_COLOR ,`VOID ,`VOID }; |
158: I = { `SETX ,`R3 ,`ONE }; |
159: I = { `SETX ,`R1 ,32'h00000 }; |
160: I = { `SUB ,`CREG_E1 ,`CREG_V1 ,`CREG_V0 }; |
161: I = { `SUB ,`CREG_E2 ,`CREG_V2 ,`CREG_V0 }; |
162: I = { `SUB ,`CREG_T ,`CREG_CAMERA_POSITION ,`CREG_V0 }; |
163: I = { `CROSS ,`CREG_P ,`CREG_RAY_DIRECTION ,`CREG_E2 }; |
164: I = { `CROSS ,`CREG_Q ,`CREG_T ,`CREG_E1 }; |
165: I = { `DOT ,`CREG_H1 ,`CREG_Q ,`CREG_E2 }; |
166: I = { `DOT ,`CREG_H2 ,`CREG_P ,`CREG_T }; |
167: I = { `DOT ,`CREG_H3 ,`CREG_Q ,`CREG_RAY_DIRECTION }; |
168: I = { `DOT ,`CREG_DELTA ,`CREG_P ,`CREG_E1 }; |
169: I = { `DIV ,`CREG_t ,`CREG_H1 ,`CREG_DELTA }; |
170: I = { `DIV ,`CREG_u ,`CREG_H2 ,`CREG_DELTA }; |
171: I = { `DIV ,`CREG_v ,`CREG_H3 ,`CREG_DELTA }; |
172: I = { `JGEX ,`LABEL_BIU1 ,`CREG_u ,`R1 }; |
173: I = { `RET ,`R99 ,`FALSE }; |
|
//LABEL_BIU1: |
174: I = { `JGEX ,`LABEL_BIU2 ,`CREG_v ,`R1 }; |
175: I = { `RET ,`R99 ,`FALSE }; |
|
//LABEL_BIU2: |
176: I = { `ADD ,`R2 ,`CREG_u ,`CREG_v }; |
177: I = { `JLEX ,`LABEL_BIU3 ,`R2 ,`R3 }; |
178: I = { `RET ,`R99 ,`FALSE }; |
|
//LABEL_BIU3: |
179: I = { `JGEX ,`LABEL_BIU4 ,`CREG_t ,`CREG_LAST_t }; |
180: I = { `COPY ,`CREG_LAST_t ,`CREG_t ,`VOID }; |
181: I = { `COPY ,`CREG_LAST_u ,`CREG_u ,`VOID }; |
182: I = { `COPY ,`CREG_LAST_v ,`CREG_v ,`VOID }; |
183: I = { `COPY ,`CREG_E1_LAST ,`CREG_E1 ,`VOID }; |
184: I = { `COPY ,`CREG_E2_LAST ,`CREG_E2 ,`VOID }; |
185: I = { `COPY ,`CREG_UV0_LAST ,`CREG_UV0 ,`VOID }; |
186: I = { `COPY ,`CREG_UV1_LAST ,`CREG_UV1 ,`VOID }; |
187: I = { `COPY ,`CREG_UV2_LAST ,`CREG_UV2 ,`VOID }; |
188: I = { `COPY ,`CREG_TRI_DIFFUSE_LAST ,`CREG_TRI_DIFFUSE ,`VOID }; |
//LABEL_BIU4: |
189: I = { `RET ,`R99 ,`TRUE }; |
|
|
//------------------------------------------------------------------------- |
//Calculate the adress of the texure coordiantes. |
|
//TAG_TCC_UCODE_ADDRESS: |
//Do this calculation only if this triangle is the one closest to the camera |
190: I = { `JGX ,`LABEL_TCC_EXIT ,`CREG_t ,`CREG_LAST_t }; |
|
//First get the UV coodrinates and store in R1 |
//R1x: u_coordinate = U0 + last_u * (U1 - U0) + last_v * (U2 - U0) |
//R1y: v_coordinate = V0 + last_u * (V1 - V0) + last_v * (V2 - V0) |
//R1z: 0 |
|
191: I = { `SUB ,`R1 ,`CREG_UV1_LAST ,`CREG_UV0_LAST }; |
192: I = { `SUB ,`R2 ,`CREG_UV2_LAST ,`CREG_UV0_LAST }; |
193: I = { `MUL ,`R1 ,`CREG_LAST_u ,`R1 }; |
194: I = { `MUL ,`R2 ,`CREG_LAST_v ,`R2 }; |
195: I = { `ADD ,`R1 ,`R1 ,`R2 }; |
196: I = { `ADD ,`R1 ,`R1 ,`CREG_UV0_LAST }; |
|
//R7x : fu = (u_coordinate) * gTexture.mWidth |
//R7y : fv = (v_coordinate) * gTexture.mWidth |
//R7z : 0 |
197: I = { `MUL ,`R7 ,`R1 ,`CREG_TEXTURE_SIZE }; |
|
//R1x: u1 = ((int)fu) % gTexture.mWidth |
//R1y: v1 = ((int)fv) % gTexture.mHeight |
//R1z: 0 |
//R2x: u2 = (u1 + 1 ) % gTexture.mWidth |
//R2y: v2 = (v2 + 1 ) % gTexture.mHeight |
//R2z: 0 |
// Notice MOD2 only operates over |
// numbers that are power of 2 also notice that the |
// textures are assumed to be squares! |
//x % 2^n == x & (2^n - 1). |
|
198: I = { `MOD ,`R1 ,`R7 ,`CREG_TEXTURE_SIZE }; |
199: I = { `INC ,`R2 ,`R1 ,`VOID }; |
200: I = { `MOD ,`R2 ,`R2 ,`CREG_TEXTURE_SIZE }; |
|
//Cool now we should store the values in the appropiate registers |
//OREG_TEX_COORD1.x = u1 + v1 * gTexture.mWidth |
//OREG_TEX_COORD1.y = u2 + v1 * gTexture.mWidth |
//OREG_TEX_COORD1.z = 0 |
//OREG_TEX_COORD2.x = u1 + v2 * gTexture.mWidth |
//OREG_TEX_COORD2.y = u2 + v2 * gTexture.mWidth |
//OREG_TEX_COORD1.z = 0 |
|
//R1= [u1 v1 0] |
//R2= [u2 v2 0] |
|
//R2 = [v2 u2 0] |
201: I = { `SWIZZLE3D ,`R2 ,`SWIZZLE_YXZ }; |
|
//R3 = [v2 v1 0] |
202: I = { `XCHANGEX ,`R3 ,`R1 ,`R2 }; |
|
|
//R4 = [u1 u2 0] |
203: I = { `XCHANGEX ,`R4 ,`R2 ,`R1 }; |
|
//R2 = [v2*H v1*H 0] |
204: I = { `UNSCALE ,`R9 ,`R3 ,`VOID }; |
205: I = { `UNSCALE ,`R8 ,`CREG_TEXTURE_SIZE ,`VOID }; |
206: I = { `IMUL ,`R2 ,`R9 ,`R8 }; |
|
//OREG_TEX_COORD1 = [u1 + v2*H u2 + v1*H 0] |
//R4 = FixedToIinteger(R4) |
207: I = { `UNSCALE ,`R4 ,`R4 ,`VOID }; |
208: I = { `ADD ,`R12 ,`R2 ,`R4 }; |
209: I = { `SETX ,`R5 ,32'h3 }; |
210: I = { `SETY ,`R5 ,32'h3 }; |
211: I = { `SETZ ,`R5 ,32'h3 }; |
//Multiply by 3 (the pitch) |
//IMUL OREG_TEX_COORD1 R12 R5 |
212: I = { `IMUL ,`CREG_TEX_COORD1 ,`R12 ,`R5 }; |
|
//R4 = [u2 u1 0] |
213: I = { `SWIZZLE3D ,`R4 ,`SWIZZLE_YXZ }; |
|
|
//OREG_TEX_COORD2 [u2 + v2*H u1 + v1*H 0] |
214: I = { `ADD ,`R12 ,`R2 ,`R4 }; |
//Multiply by 3 (the pitch) |
//IMUL OREG_TEX_COORD2 R12 R5 |
215: I = { `IMUL ,`CREG_TEX_COORD2 ,`R12 ,`R5 }; |
|
|
//Cool now get the weights |
|
//w1 = (1 - fracu) * (1 - fracv) |
//w2 = fracu * (1 - fracv) |
//w3 = (1 - fracu) * fracv |
//w4 = fracu * fracv |
|
//R4x: fracu |
//R4y: fracv |
//R4z: 0 |
216: I = { `FRAC ,`R4 ,`R7 ,`VOID }; |
|
//R5x: fracv |
//R5y: fracu |
//R5z: 0 |
217: I = { `COPY ,`R5 ,`R4 ,`VOID }; |
218: I = { `SWIZZLE3D ,`R5 ,`SWIZZLE_YXZ }; |
|
|
//R5x: 1 - fracv |
//R5y: 1 - fracu |
//R5y: 1 |
219: I = { `NEG ,`R5 ,`R5 ,`VOID }; |
220: I = { `INC ,`R5 ,`R5 ,`VOID }; |
|
//R5x: 1 - fracv |
//R5y: 1 - fracu |
//R5y: (1 - fracv)(1 - fracu) |
221: I = { `MULP ,`CREG_TEXWEIGHT1 ,`R5 ,`VOID }; |
|
//CREG_TEXWEIGHT1.x = (1 - fracv)(1 - fracu) |
//CREG_TEXWEIGHT1.y = (1 - fracv)(1 - fracu) |
//CREG_TEXWEIGHT1.z = (1 - fracv)(1 - fracu) |
222: I = { `SWIZZLE3D ,`CREG_TEXWEIGHT1 ,`SWIZZLE_ZZZ }; |
|
|
//R6x: w2: fracu * (1 - fracv ) |
//R6y: w3: fracv * (1 - fracu ) |
//R6z: 0 |
223: I = { `MUL ,`R6 ,`R4 ,`R5 }; |
|
//CREG_TEXWEIGHT2.x = fracu * (1 - fracv ) |
//CREG_TEXWEIGHT2.y = fracu * (1 - fracv ) |
//CREG_TEXWEIGHT2.z = fracu * (1 - fracv ) |
224: I = { `COPY ,`CREG_TEXWEIGHT2 ,`R6 ,`VOID }; |
225: I = { `SWIZZLE3D ,`CREG_TEXWEIGHT2 ,`SWIZZLE_XXX }; |
|
//CREG_TEXWEIGHT3.x = fracv * (1 - fracu ) |
//CREG_TEXWEIGHT3.y = fracv * (1 - fracu ) |
//CREG_TEXWEIGHT3.z = fracv * (1 - fracu ) |
226: I = { `COPY ,`CREG_TEXWEIGHT3 ,`R6 ,`VOID }; |
227: I = { `SWIZZLE3D ,`CREG_TEXWEIGHT3 ,`SWIZZLE_YYY }; |
|
|
//R4x: fracu |
//R4y: fracv |
//R4z: fracu * fracv |
228: I = { `MULP ,`R4 ,`R4 ,`VOID }; |
|
//CREG_TEXWEIGHT4.x = fracv * fracu |
//CREG_TEXWEIGHT4.y = fracv * fracu |
//CREG_TEXWEIGHT4.z = fracv * fracu |
229: I = { `COPY ,`CREG_TEXWEIGHT4 ,`R4 ,`VOID }; |
230: I = { `SWIZZLE3D ,`CREG_TEXWEIGHT4 ,`SWIZZLE_ZZZ }; |
|
|
//LABEL_TCC_EXIT: |
231: I = { `RET ,`R99 ,32'h0 }; |
|
|
//------------------------------------------------------------------------- |
//TAG_PSU_UCODE_ADRESS: |
//Pixel Shader #1 |
//This pixel shader has diffuse light but no textures |
|
|
232: I = { `CROSS ,`R1 ,`CREG_E1_LAST ,`CREG_E2_LAST }; |
233: I = { `MAG ,`R2 ,`R1 ,`VOID }; |
234: I = { `DIV ,`R1 ,`R1 ,`R2 }; |
235: I = { `MUL ,`R2 ,`CREG_RAY_DIRECTION ,`CREG_LAST_t }; |
236: I = { `ADD ,`R2 ,`R2 ,`CREG_CAMERA_POSITION }; |
237: I = { `SUB ,`R2 ,`CURRENT_LIGHT_POS ,`R2 }; |
238: I = { `MAG ,`R3 ,`R2 ,`VOID }; |
239: I = { `DIV ,`R2 ,`R2 ,`R3 }; |
240: I = { `DOT ,`R3 ,`R2 ,`R1 }; |
241: I = { `MUL ,`CREG_COLOR_ACC ,`CREG_TRI_DIFFUSE_LAST ,`CURRENT_LIGHT_DIFFUSE }; |
242: I = { `MUL ,`CREG_COLOR_ACC ,`CREG_COLOR_ACC ,`R3 }; |
243: I = { `COPY ,`CREG_TEXTURE_COLOR ,`CREG_COLOR_ACC ,`VOID }; |
244: I = { `NOP ,`RT_FALSE }; |
245: I = { `NOP ,`RT_FALSE }; |
246: I = { `NOP ,`RT_FALSE }; |
247: I = { `RETURN ,`RT_TRUE }; |
|
//------------------------------------------------------------------------- |
//Pixel Shader #2 |
//TAG_PSU_UCODE_ADRESS2: |
//This Pixel Shader has no light but it does texturinng |
//with bi-linear interpolation |
|
|
|
248: I = { `COPY ,`R1 ,`CREG_TEX_COORD1 ,`VOID }; |
249: I = { `COPY ,`R2 ,`CREG_TEX_COORD1 ,`VOID }; |
250: I = { `COPY ,`R3 ,`CREG_TEX_COORD2 ,`VOID }; |
251: I = { `COPY ,`R4 ,`CREG_TEX_COORD2 ,`VOID }; |
|
|
252: I = { `SWIZZLE3D ,`R1 ,`SWIZZLE_XXX }; |
253: I = { `SWIZZLE3D ,`R2 ,`SWIZZLE_YYY }; |
254: I = { `SWIZZLE3D ,`R3 ,`SWIZZLE_XXX }; |
255: I = { `SWIZZLE3D ,`R4 ,`SWIZZLE_YYY }; |
256: I = { `ADD ,`R1 ,`R1 ,`CREG_012 }; |
257: I = { `ADD ,`R2 ,`R2 ,`CREG_012 }; |
258: I = { `ADD ,`R3 ,`R3 ,`CREG_012 }; |
259: I = { `ADD ,`R4 ,`R4 ,`CREG_012 }; |
|
|
260: I = { `TMREAD ,`CREG_TEX_COLOR1 ,`R1 ,`VOID }; |
261: I = { `NOP ,`RT_FALSE }; |
262: I = { `TMREAD ,`CREG_TEX_COLOR2 ,`R2 ,`VOID }; |
263: I = { `NOP ,`RT_FALSE }; |
264: I = { `TMREAD ,`CREG_TEX_COLOR3 ,`R3 ,`VOID }; |
265: I = { `NOP ,`RT_FALSE }; |
266: I = { `TMREAD ,`CREG_TEX_COLOR4 ,`R4 ,`VOID }; |
267: I = { `NOP ,`RT_FALSE }; |
|
|
|
|
//TextureColor.R = c1.R * w1 + c2.R * w2 + c3.R * w3 + c4.R * w4 |
//TextureColor.G = c1.G * w1 + c2.G * w2 + c3.G * w3 + c4.G * w4 |
//TextureColor.B = c1.B * w1 + c2.B * w2 + c3.B * w3 + c4.B * w4 |
|
|
//MUL R1 CREG_TEX_COLOR4 CREG_TEXWEIGHT1 |
//MUL R2 CREG_TEX_COLOR2 CREG_TEXWEIGHT2 |
//MUL R3 CREG_TEX_COLOR1 CREG_TEXWEIGHT3 |
//MUL R4 CREG_TEX_COLOR3 CREG_TEXWEIGHT4 |
|
268: I = { `MUL ,`R1 ,`CREG_TEX_COLOR3 ,`CREG_TEXWEIGHT1 }; |
269: I = { `MUL ,`R2 ,`CREG_TEX_COLOR2 ,`CREG_TEXWEIGHT2 }; |
270: I = { `MUL ,`R3 ,`CREG_TEX_COLOR1 ,`CREG_TEXWEIGHT3 }; |
271: I = { `MUL ,`R4 ,`CREG_TEX_COLOR4 ,`CREG_TEXWEIGHT4 }; |
|
272: I = { `ADD ,`CREG_TEXTURE_COLOR ,`R1 ,`R2 }; |
273: I = { `ADD ,`CREG_TEXTURE_COLOR ,`CREG_TEXTURE_COLOR ,`R3 }; |
274: I = { `ADD ,`CREG_TEXTURE_COLOR ,`CREG_TEXTURE_COLOR ,`R4 }; |
275: I = { `RETURN ,`RT_TRUE }; |
|
|
//------------------------------------------------------------------------- |
//Default User constants |
//TAG_USERCONSTANTS: |
|
276: I = { `NOP ,`RT_FALSE }; |
277: I = { `RETURN ,`RT_TRUE }; |
|
//TAG_PIXELSHADER: |
//Default Pixel Shader (just outputs texture) |
278: I = { `OMWRITE ,`OREG_PIXEL_COLOR ,`CREG_CURRENT_OUTPUT_PIXEL ,`CREG_TEXTURE_COLOR }; |
279: I = { `RETURN ,`RT_TRUE }; |
|
|
//------------------------------------------------------------------------- |
|
|
default: |
begin |
|
`ifdef DEBUG |
$display("%dns CORE %d Error: Reached undefined address in instruction Memory: %d!!!!",$time,iDebug_CoreID,Address); |
// $stop(); |
`endif |
I = {`INSTRUCTION_OP_LENGTH'hFF,16'hFFFF,32'hFFFFFFFF}; |
end |
endcase |
end |
endmodule |
//-------------------------------------------------------- |
/trunk/rtl/Collaterals.v
0,0 → 1,488
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
//------------------------------------------------ |
module FFD_POSEDGE_ASYNC_RESET # ( parameter SIZE=`WIDTH ) |
( |
input wire Clock, |
input wire Clear, |
input wire [SIZE-1:0] D, |
output reg [SIZE-1:0] Q |
); |
|
always @(posedge Clock or posedge Clear) |
begin |
if (Clear) |
Q = 0; |
else |
Q = D; |
end |
endmodule |
//---------------------------------------------------- |
module FFD_POSEDGE_SYNCRONOUS_RESET # ( parameter SIZE=`WIDTH ) |
( |
input wire Clock, |
input wire Reset, |
input wire Enable, |
input wire [SIZE-1:0] D, |
output reg [SIZE-1:0] Q |
); |
|
|
always @ (posedge Clock) |
begin |
if ( Reset ) |
Q <= `WIDTH'b0; |
else |
begin |
if (Enable) |
Q <= D; |
end |
|
end//always |
|
endmodule |
//------------------------------------------------ |
module UPCOUNTER_POSEDGE # (parameter SIZE=`WIDTH) |
( |
input wire Clock, Reset, |
input wire [SIZE-1:0] Initial, |
input wire Enable, |
output reg [SIZE-1:0] Q |
); |
|
|
always @(posedge Clock ) |
begin |
if (Reset) |
Q <= Initial; |
else |
begin |
if (Enable) |
Q <= Q + 1; |
|
end |
end |
|
endmodule |
|
//---------------------------------------------------------------------- |
|
module SELECT_1_TO_N # ( parameter SEL_WIDTH=4, parameter OUTPUT_WIDTH=16 ) |
( |
input wire [SEL_WIDTH-1:0] Sel, |
input wire En, |
output wire [OUTPUT_WIDTH-1:0] O |
); |
|
reg[OUTPUT_WIDTH-1:0] shift; |
|
always @ ( * ) |
begin |
if (~En) |
shift = 1; |
else |
shift = (1 << Sel); |
|
|
end |
|
assign O = ( ~En ) ? 0 : shift ; |
|
//assign O = En & (1 << Sel); |
|
endmodule |
|
//---------------------------------------------------------------------- |
|
module MUXFULLPARALELL_2SEL_GENERIC # ( parameter SIZE=`WIDTH ) |
( |
input wire [1:0] Sel, |
input wire [SIZE-1:0]I1, I2, I3,I4, |
output reg [SIZE-1:0] O1 |
); |
|
always @( * ) |
|
begin |
|
case (Sel) |
|
2'b00: O1 = I1; |
2'b01: O1 = I2; |
2'b10: O1 = I3; |
2'b11: O1 = I4; |
default: O1 = SIZE-1'b0; |
|
endcase |
|
end |
|
endmodule |
|
//-------- |
module CIRCULAR_SHIFTLEFT_POSEDGE_EX # ( parameter SIZE=`WIDTH ) |
( input wire Clock, |
input wire Reset, |
input wire[SIZE-1:0] Initial, |
input wire Enable, |
output wire[SIZE-1:0] O |
); |
|
reg [SIZE-1:0] tmp; |
|
|
always @(posedge Clock) |
begin |
if (Reset) |
tmp <= Initial; |
else |
begin |
if (Enable) |
begin |
if (tmp[SIZE-1]) |
begin |
tmp <= Initial; |
end |
else |
begin |
tmp <= tmp << 1; |
end |
end |
end |
end |
|
|
assign O = tmp; |
endmodule |
//------------------------------------------------ |
module MUXFULLPARALELL_3SEL_WALKINGONE # ( parameter SIZE=`WIDTH ) |
( |
input wire [2:0] Sel, |
input wire [SIZE-1:0]I1, I2, I3, |
output reg [SIZE-1:0] O1 |
); |
|
always @( * ) |
|
begin |
|
case (Sel) |
|
3'b001: O1 = I1; |
3'b010: O1 = I2; |
3'b100: O1 = I3; |
default: O1 = SIZE-1'b0; |
|
endcase |
|
end |
|
endmodule |
//------------------------------------------------ |
module SHIFTLEFT_POSEDGE # ( parameter SIZE=`WIDTH ) |
( input wire Clock, |
input wire Reset, |
input wire[SIZE-1:0] Initial, |
input wire Enable, |
output wire[SIZE-1:0] O |
); |
|
reg [SIZE-1:0] tmp; |
|
|
always @(posedge Clock) |
begin |
if (Reset) |
tmp <= Initial; |
else |
begin |
if (Enable) |
tmp <= tmp << 1; |
end |
end |
|
|
assign O = tmp; |
endmodule |
//------------------------------------------------ |
//------------------------------------------------ |
module CIRCULAR_SHIFTLEFT_POSEDGE # ( parameter SIZE=`WIDTH ) |
( input wire Clock, |
input wire Reset, |
input wire[SIZE-1:0] Initial, |
input wire Enable, |
output wire[SIZE-1:0] O |
); |
|
reg [SIZE-1:0] tmp; |
|
|
always @(posedge Clock) |
begin |
if (Reset || tmp[SIZE-1]) |
tmp <= Initial; |
else |
begin |
if (Enable) |
tmp <= tmp << 1; |
end |
end |
|
|
assign O = tmp; |
endmodule |
//----------------------------------------------------------- |
/* |
Sorry forgot how this flop is called. |
Any way Truth table is this |
|
Q S Q_next R |
0 0 0 0 |
0 1 1 0 |
1 0 1 0 |
1 1 1 0 |
X X 0 1 |
|
The idea is that it toggles from 0 to 1 when S = 1, but if it |
gets another S = 1, it keeps the output to 1. |
*/ |
module FFToggleOnce_1Bit |
( |
input wire Clock, |
input wire Reset, |
input wire Enable, |
input wire S, |
output reg Q |
|
); |
|
|
reg Q_next; |
|
always @ (negedge Clock) |
begin |
Q <= Q_next; |
end |
|
always @ ( posedge Clock ) |
begin |
if (Reset) |
Q_next <= 0; |
else if (Enable) |
Q_next <= (S && !Q) || Q; |
else |
Q_next <= Q; |
end |
endmodule |
|
//----------------------------------------------------------- |
module UpCounter_16E |
( |
input wire Clock, |
input wire Reset, |
input wire [15:0] Initial, |
input wire Enable, |
output wire [15:0] Q |
); |
reg [15:0] Temp; |
|
|
always @(posedge Clock or posedge Reset) |
begin |
if (Reset) |
Temp = Initial; |
else |
if (Enable) |
Temp = Temp + 1'b1; |
end |
assign Q = Temp; |
|
endmodule |
//----------------------------------------------------------- |
module UpCounter_32 |
( |
input wire Clock, |
input wire Reset, |
input wire [31:0] Initial, |
input wire Enable, |
output wire [31:0] Q |
); |
reg [31:0] Temp; |
|
|
always @(posedge Clock or posedge Reset) |
begin |
if (Reset) |
begin |
Temp = Initial; |
end |
else |
begin |
if (Enable) |
begin |
Temp = Temp + 1'b1; |
end |
end |
end |
assign Q = Temp; |
|
endmodule |
//----------------------------------------------------------- |
module UpCounter_3 |
( |
input wire Clock, |
input wire Reset, |
input wire [2:0] Initial, |
input wire Enable, |
output wire [2:0] Q |
); |
reg [2:0] Temp; |
|
|
always @(posedge Clock or posedge Reset) |
begin |
if (Reset) |
Temp = Initial; |
else |
if (Enable) |
Temp = Temp + 3'b1; |
end |
assign Q = Temp; |
|
endmodule |
|
|
module FFD32_POSEDGE |
( |
input wire Clock, |
input wire[31:0] D, |
output reg[31:0] Q |
); |
|
always @ (posedge Clock) |
Q <= D; |
|
endmodule |
|
//------------------------------------------------ |
module MUXFULLPARALELL_96bits_2SEL |
( |
input wire Sel, |
input wire [95:0]I1, I2, |
output reg [95:0] O1 |
); |
|
|
|
always @( * ) |
|
begin |
|
case (Sel) |
|
1'b0: O1 = I1; |
1'b1: O1 = I2; |
|
endcase |
|
end |
|
endmodule |
//------------------------------------------------ |
|
module MUXFULLPARALELL_16bits_2SEL_X |
( |
input wire [1:0] Sel, |
input wire [15:0]I1, I2, I3, |
output reg [15:0] O1 |
); |
|
|
|
always @( * ) |
|
begin |
|
case (Sel) |
|
2'b00: O1 = I1; |
2'b01: O1 = I2; |
2'b10: O1 = I3; |
default: O1 = 16'b0; |
|
endcase |
|
end |
|
endmodule |
//------------------------------------------------ |
module MUXFULLPARALELL_16bits_2SEL |
( |
input wire Sel, |
input wire [15:0]I1, I2, |
output reg [15:0] O1 |
); |
|
|
|
always @( * ) |
|
begin |
|
case (Sel) |
|
1'b0: O1 = I1; |
1'b1: O1 = I2; |
|
endcase |
|
end |
|
endmodule |
|
//-------------------------------------------------------------- |
|
module FFT1 |
( |
input wire D, |
input wire Clock, |
input wire Reset , |
output reg Q |
); |
|
always @ ( posedge Clock or posedge Reset ) |
begin |
|
if (Reset) |
begin |
Q <= 1'b0; |
end |
else |
begin |
if (D) |
Q <= ! Q; |
end |
|
end//always |
|
endmodule |
//-------------------------------------------------------------- |