OpenCores
URL https://opencores.org/ocsvn/theia_gpu/theia_gpu/trunk

Subversion Repositories theia_gpu

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /theia_gpu/trunk
    from Rev 153 to Rev 154
    Reverse comparison

Rev 153 → Rev 154

/src/Module_MEM2WBM.v File deleted \ No newline at end of file
/src/Module_Swizzle.v File deleted \ No newline at end of file
/src/Unit_MEM.v File deleted \ No newline at end of file
/src/Module_Host.v File deleted
/src/Module_HostWBM.v File deleted
/src/Module_ArithmeticComparison.v File deleted \ No newline at end of file
/src/TestBench_THEIA.v File deleted
/src/Module_InstructionFetch.v File deleted \ No newline at end of file
/src/Theia_Core.v File deleted
/src/Module_FixedPointSquareRoot.v File deleted \ No newline at end of file
/src/Unit_IO.v File deleted \ No newline at end of file
/src/aDefinitions.v File deleted
/src/Module_WishBoneSlave.v File deleted \ No newline at end of file
/src/Unit_Control.v File deleted
/src/Unit_EXE.v File deleted \ No newline at end of file
/src/Module_RAM.v File deleted
/src/Module_TMemInterface.v File deleted \ No newline at end of file
/src/Module_ControlRegister.v File deleted \ No newline at end of file
/src/Theia.v File deleted
/src/Module_ROM.v File deleted \ No newline at end of file
/src/Collaterals.v File deleted
/rtl/Module_MEM2WBM.v
0,0 → 1,124
`timescale 1ns / 1ps
`include "aDefinitions.v"
 
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
/*
This unit is used when the External Address that comes into IO is not a immediate value,
ie. it is a value that we need to read from one of our internal memory locations.
Since each internal memory locations contains 3 * 32bits slots, ie X,Y and Z parts of the
memory location, then we make three requests for external data, one for every X Y and Z
part of our internal registry.So, summarising, each internal memory location, stores 3
external memory addresses to request to WBM. Once the 3 data has been read from outside world,
they will get stored back into 3 consecutive inernal memory addreses starting from
iDataInitialStorageAddress
*/
//---------------------------------------------------------------------
module MEM2WBMUnitB
(
input wire Clock,
input wire Reset,
input wire iEnable,
//output reg oSetAddress,
input wire[`DATA_ADDRESS_WIDTH-1:0] iMEMDataPointer,
input wire[`DATA_ADDRESS_WIDTH-1:0] iMEMDataPointer2,
output wire [`WIDTH-1:0] oReadDataElement,
output wire [`WIDTH-1:0] oReadDataElement2,
output wire[`DATA_ADDRESS_WIDTH-1:0] oDataReadAddress, //This tells MEM unit from wich address we want to read
output wire[`DATA_ADDRESS_WIDTH-1:0] oDataReadAddress2, //This tells MEM unit from wich address we want to read
input wire [`DATA_ROW_WIDTH-1:0] iReadDataBus, //This comes from the MEM unit
input wire [`DATA_ROW_WIDTH-1:0] iReadDataBus2, //This comes from the MEM unit
output wire oDataWriteEnable,
output wire oDataWriteEnable2,
output wire oDataAvailable,
input wire iRequestNextElement,
input wire[`DATA_ADDRESS_WIDTH-1:0] iDataInitialStorageAddress, //Initial address to store data ////########
output wire[`DATA_ADDRESS_WIDTH-1:0] oDataWriteAddress, //Were to store the values comming from WBM ////########
output wire oDone
);
assign oDataWriteEnable2 = 0;
assign oDataWriteEnable = 0; //We only read.
wire [3:0] wXYZSelector;
wire[`WIDTH-1:0] wValueFromBus,wLatchedValue;
assign oDataReadAddress = iMEMDataPointer;
assign oDataReadAddress2 = iMEMDataPointer2;
assign oDone = wXYZSelector[3];
 
wire wLacthNow;
 
wire iRequestNextElement_Delay;
FFD_POSEDGE_SYNCRONOUS_RESET # (1) FFD32_x
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( 1'b1 ),
.D( iRequestNextElement ),
.Q( iRequestNextElement_Delay )
);
 
assign oDataAvailable = iEnable & ~iRequestNextElement_Delay & wLacthNow;// & ~oDone;
 
FFD_POSEDGE_SYNCRONOUS_RESET # (1) FFD32_EnableDelay
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( 1'b1 ),
.D( iEnable ),
.Q( wLacthNow )
);
 
assign oDataWriteAddress = iDataInitialStorageAddress;
 
 
SHIFTLEFT_POSEDGE #(4) SHL
(
.Clock(iRequestNextElement | ~iEnable),
.Enable(1'b1),
.Reset(~iEnable | Reset ),
.Initial(4'b1),
.O(wXYZSelector)
);
 
MUXFULLPARALELL_3SEL_WALKINGONE MUXA
(
.Sel( wXYZSelector[2:0] ),
.I2( iReadDataBus[63:32]),
.I1( iReadDataBus[95:64]),
.I3( iReadDataBus[31:0] ),
.O1( oReadDataElement )
);
 
 
 
MUXFULLPARALELL_3SEL_WALKINGONE MUXA2
(
.Sel( wXYZSelector[2:0] ),
.I2( iReadDataBus2[63:32]),
.I1( iReadDataBus2[95:64]),
.I3( iReadDataBus2[31:0] ),
.O1( oReadDataElement2 )
);
 
endmodule
//---------------------------------------------------------------------
/rtl/Module_Swizzle.v
0,0 → 1,53
`timescale 1ns / 1ps
`include "aDefinitions.v"
//---------------------------------------------------------------------------
module Swizzle3D
(
input wire [`WIDTH-1:0] Source0_X,
input wire [`WIDTH-1:0] Source0_Y,
input wire [`WIDTH-1:0] Source0_Z,
input wire [`WIDTH-1:0] iOperation,
output reg [`WIDTH-1:0] SwizzleX,
output reg [`WIDTH-1:0] SwizzleY,
output reg [`WIDTH-1:0] SwizzleZ
);
//wire [31:0] SwizzleX,SwizzleY,SwizzleZ;
//-----------------------------------------------------
always @ ( * )
begin
case (iOperation)
`SWIZZLE_XXX: SwizzleX = Source0_X;
`SWIZZLE_YYY: SwizzleX = Source0_Y;
`SWIZZLE_ZZZ: SwizzleX = Source0_Z;
`SWIZZLE_YXZ: SwizzleX = Source0_Y;
default: SwizzleX = `DATA_ROW_WIDTH'd0;
endcase
end
//-----------------------------------------------------
always @ ( * )
begin
case (iOperation)
`SWIZZLE_XXX: SwizzleY = Source0_X;
`SWIZZLE_YYY: SwizzleY = Source0_Y;
`SWIZZLE_ZZZ: SwizzleY = Source0_Z;
`SWIZZLE_YXZ: SwizzleY = Source0_X;
default: SwizzleY = `DATA_ROW_WIDTH'd0;
endcase
end
//-----------------------------------------------------
always @ ( * )
begin
case (iOperation)
`SWIZZLE_XXX: SwizzleZ = Source0_X;
`SWIZZLE_YYY: SwizzleZ = Source0_Y;
`SWIZZLE_ZZZ: SwizzleZ = Source0_Z;
`SWIZZLE_YXZ: SwizzleZ = Source0_Z;
default: SwizzleZ = `DATA_ROW_WIDTH'd0;
endcase
end
//-----------------------------------------------------
endmodule
//---------------------------------------------------------------------------
/rtl/Module_RadixRMul.v
0,0 → 1,337
`timescale 1ns / 1ps
`include "aDefinitions.v"
//////////////////////////////////////////////////////////////////////////////////
// Company:
// Engineer:
//
// Create Date: 19:49:14 01/13/2009
// Design Name:
// Module Name: RadixRMul
// Project Name:
// Target Devices:
// Tool versions:
// Description:
//
// Dependencies:
//
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
//
//////////////////////////////////////////////////////////////////////////////////
 
`default_nettype none
 
 
//---------------------------------------------------
module MUX_4_TO_1_32Bits_FullParallel
(
input wire [31:0] i1,i2,i3,i4,
output reg [31:0] O,
input wire [1:0] Sel
);
 
always @ ( Sel or i1 or i2 or i3 or i4 )
begin
case (Sel)
2'b00: O = i1;
2'b01: O = i2;
2'b10: O = i3;
2'b11: O = i4;
endcase
end
 
endmodule
//---------------------------------------------------
/*
module SHIFTER2_16_BITS
(
input wire C,
input wire[15:0] In,
output reg[15:0] Out
);
 
reg [15:0] Temp;
always @ (posedge C )
begin
Out = In << 2;
end
 
endmodule
*/
//---------------------------------------------------
module RADIX_R_MUL_32_FULL_PARALLEL
(
input wire Clock,
input wire Reset,
input wire[31:0] A,
input wire[31:0] B,
output wire[63:0] R,
input wire iUnscaled,
input wire iInputReady,
output wire OutputReady
 
);
 
 
wire wInputDelay1;
//-------------------
wire [31:0] wALatched,wBLatched;
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) FFD1
(
.Clock( Clock ),
.Reset( Reset),
.Enable( iInputReady ),
.D( A ),
.Q( wALatched)
);
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) FFD2
(
.Clock( Clock ),
.Reset( Reset),
.Enable( iInputReady ),
.D( B ),
.Q( wBLatched )
);
 
//-------------------
 
 
FFD_POSEDGE_ASYNC_RESET #(1) FFOutputReadyDelay1
(
.Clock( Clock ),
.Clear( Reset ),
.D( iInputReady ),
.Q( wInputDelay1 )
);
 
FFD_POSEDGE_ASYNC_RESET #(1) FFOutputReadyDelay2
(
.Clock( Clock ),
.Clear( Reset ),
.D( wInputDelay1 ),
.Q( OutputReady )
);
 
wire [31:0] wA, w2A, w3A, wB;
wire SignA,SignB;
 
assign SignA = wALatched[31];
assign SignB = wBLatched[31];
 
 
assign wB = (SignB == 1) ? ~wBLatched + 1'b1 : wBLatched;
assign wA = (SignA == 1) ? ~wALatched + 1'b1 : wALatched;
 
assign w2A = wA << 1;
assign w3A = w2A + wA;
 
wire [31:0] wPartialResult0,wPartialResult1,wPartialResult2,wPartialResult3,wPartialResult4,wPartialResult5;
wire [31:0] wPartialResult6,wPartialResult7,wPartialResult8,wPartialResult9,wPartialResult10,wPartialResult11;
wire [31:0] wPartialResult12,wPartialResult13,wPartialResult14,wPartialResult15;
 
MUX_4_TO_1_32Bits_FullParallel MUX0
(
.i1( 32'b 0 ),
.i2( wA ),
.i3( w2A ),
.i4( w3A ),
.Sel( {wB[1],wB[0]} ),
.O( wPartialResult0 )
);
 
 
MUX_4_TO_1_32Bits_FullParallel MUX1
(
.i1( 32'b 0 ),
.i2( wA ),
.i3( w2A ),
.i4( w3A ),
.Sel( {wB[3],wB[2]} ),
.O( wPartialResult1 )
);
 
MUX_4_TO_1_32Bits_FullParallel MUX2
(
.i1( 32'b 0 ),
.i2( wA ),
.i3( w2A ),
.i4( w3A ),
.Sel( {wB[5],wB[4]} ),
.O( wPartialResult2 )
);
 
MUX_4_TO_1_32Bits_FullParallel MUX3
(
.i1( 32'b 0 ),
.i2( wA ),
.i3( w2A ),
.i4( w3A ),
.Sel( {wB[7],wB[6]} ),
.O( wPartialResult3 )
);
 
MUX_4_TO_1_32Bits_FullParallel MUX4
(
.i1( 32'b 0 ),
.i2( wA ),
.i3( w2A ),
.i4( w3A ),
.Sel( {wB[9],wB[8]} ),
.O( wPartialResult4 )
);
 
MUX_4_TO_1_32Bits_FullParallel MUX5
(
.i1( 32'b 0 ),
.i2( wA ),
.i3( w2A ),
.i4( w3A ),
.Sel( {wB[11],wB[10]} ),
.O( wPartialResult5 )
);
 
MUX_4_TO_1_32Bits_FullParallel MUX6
(
.i1( 32'b 0 ),
.i2( wA ),
.i3( w2A ),
.i4( w3A ),
.Sel( {wB[13],wB[12]} ),
.O( wPartialResult6 )
);
 
MUX_4_TO_1_32Bits_FullParallel MUX7
(
.i1( 32'b 0 ),
.i2( wA ),
.i3( w2A ),
.i4( w3A ),
.Sel( {wB[15],wB[14]} ),
.O( wPartialResult7 )
);
 
MUX_4_TO_1_32Bits_FullParallel MUX8
(
.i1( 32'b 0 ),
.i2( wA ),
.i3( w2A ),
.i4( w3A ),
.Sel( {wB[17],wB[16]} ),
.O( wPartialResult8 )
);
 
MUX_4_TO_1_32Bits_FullParallel MUX9
(
.i1( 32'b 0 ),
.i2( wA ),
.i3( w2A ),
.i4( w3A ),
.Sel( {wB[19],wB[18]} ),
.O( wPartialResult9 )
);
 
MUX_4_TO_1_32Bits_FullParallel MUX10
(
.i1( 32'b 0 ),
.i2( wA ),
.i3( w2A ),
.i4( w3A ),
.Sel( {wB[21],wB[20]} ),
.O( wPartialResult10 )
);
 
MUX_4_TO_1_32Bits_FullParallel MUX11
(
.i1( 32'b 0 ),
.i2( wA ),
.i3( w2A ),
.i4( w3A ),
.Sel( {wB[23],wB[22]} ),
.O( wPartialResult11 )
);
 
MUX_4_TO_1_32Bits_FullParallel MUX12
(
.i1( 32'b 0 ),
.i2( wA ),
.i3( w2A ),
.i4( w3A ),
.Sel( {wB[25],wB[24]} ),
.O( wPartialResult12 )
);
 
MUX_4_TO_1_32Bits_FullParallel MUX13
(
.i1( 32'b 0 ),
.i2( wA ),
.i3( w2A ),
.i4( w3A ),
.Sel( {wB[27],wB[26]} ),
.O( wPartialResult13 )
);
 
MUX_4_TO_1_32Bits_FullParallel MUX14
(
.i1( 32'b 0 ),
.i2( wA ),
.i3( w2A ),
.i4( w3A ),
.Sel( {wB[29],wB[28]} ),
.O( wPartialResult14 )
);
 
MUX_4_TO_1_32Bits_FullParallel MUX15
(
.i1( 32'b 0 ),
.i2( wA ),
.i3( w2A ),
.i4( w3A ),
.Sel( {wB[31],wB[30]} ),
.O( wPartialResult15 )
);
 
 
 
wire[63:0] wPartialResult1_0,wPartialResult1_1,wPartialResult1_2,wPartialResult1_3,
wPartialResult1_4,wPartialResult1_5,wPartialResult1_6,wPartialResult1_7;
 
 
assign wPartialResult1_0 = (wPartialResult0) + (wPartialResult1<<2);
assign wPartialResult1_1 = (wPartialResult2 << 4) + (wPartialResult3<<6);
assign wPartialResult1_2 = (wPartialResult4 << 8) + (wPartialResult5<<10);
assign wPartialResult1_3 = (wPartialResult6 << 12)+ (wPartialResult7<<14);
assign wPartialResult1_4 = (wPartialResult8 << 16)+ (wPartialResult9<<18);
assign wPartialResult1_5 = (wPartialResult10 << 20) + (wPartialResult11<< 22);
assign wPartialResult1_6 = (wPartialResult12 << 24) + (wPartialResult13 << 26);
assign wPartialResult1_7 = (wPartialResult14 << 28) + (wPartialResult15 << 30);
 
 
 
 
wire [63:0] wPartialResult2_0,wPartialResult2_1,wPartialResult2_2,wPartialResult2_3;
 
assign wPartialResult2_0 = wPartialResult1_0 + wPartialResult1_1;
assign wPartialResult2_1 = wPartialResult1_2 + wPartialResult1_3;
assign wPartialResult2_2 = wPartialResult1_4 + wPartialResult1_5;
assign wPartialResult2_3 = wPartialResult1_6 + wPartialResult1_7;
 
wire [63:0] wPartialResult3_0,wPartialResult3_1;
 
assign wPartialResult3_0 = wPartialResult2_0 + wPartialResult2_1;
assign wPartialResult3_1 = wPartialResult2_2 + wPartialResult2_3;
 
wire [63:0] R_pre1,R_pre2;
 
//assign R_pre1 = (wPartialResult3_0 + wPartialResult3_1);
assign R_pre1 = (iUnscaled == 1) ? (wPartialResult3_0 + wPartialResult3_1) : ((wPartialResult3_0 + wPartialResult3_1) >> `SCALE);
 
assign R_pre2 = ( (SignA ^ SignB) == 1) ? ~R_pre1 + 1'b1 : R_pre1;
 
//assign R = R_pre2 >> `SCALE;
assign R = R_pre2;
 
endmodule
/rtl/Unit_MEM.v
0,0 → 1,343
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
/*
The memory unit has all the memory related modules for THEIA.
There a 3 memories in the core:
DMEM: The data memory, it is a R/W dual channel RAM, stores the data locations.
IMEM: The instruction memory, R/W dual channel RAM, stores user shaders.
IROM: RO instruction memory, stores default shaders and other internal code.
I use two ROMs with the same data, so that simulates dual channel.
This unit also has a Control register.
*/
`define USER_CODE_ENABLED 2
//-------------------------------------------------------------------
module MemoryUnit
(
input wire Clock,
input wire Reset,
input wire iFlipMemory,
 
//Data bus for EXE Unit
input wire iDataWriteEnable_EXE,
input wire[`DATA_ADDRESS_WIDTH-1:0] iDataReadAddress1_EXE,
output wire[`DATA_ROW_WIDTH-1:0] oData1_EXE,
input wire[`DATA_ADDRESS_WIDTH-1:0] iDataReadAddress2_EXE,
output wire[`DATA_ROW_WIDTH-1:0] oData2_EXE,
input wire[`DATA_ADDRESS_WIDTH-1:0] iDataWriteAddress_EXE,
input wire[`DATA_ROW_WIDTH-1:0] iData_EXE,
 
//Data bus for IO Unit
input wire iDataWriteEnable_IO,
input wire[`DATA_ADDRESS_WIDTH-1:0] iDataReadAddress1_IO,
output wire[`DATA_ROW_WIDTH-1:0] oData1_IO,
input wire[`DATA_ADDRESS_WIDTH-1:0] iDataReadAddress2_IO,
output wire[`DATA_ROW_WIDTH-1:0] oData2_IO,
input wire[`DATA_ADDRESS_WIDTH-1:0] iDataWriteAddress_IO,
input wire[`DATA_ROW_WIDTH-1:0] iData_IO,
 
//Instruction bus
input wire iInstructionWriteEnable,
input wire [`ROM_ADDRESS_WIDTH-1:0] iInstructionReadAddress1,
input wire [`ROM_ADDRESS_WIDTH-1:0] iInstructionReadAddress2,
input wire [`ROM_ADDRESS_WIDTH-1:0] iInstructionWriteAddress,
input wire [`INSTRUCTION_WIDTH-1:0] iInstruction,
output wire [`INSTRUCTION_WIDTH-1:0] oInstruction1,
output wire [`INSTRUCTION_WIDTH-1:0] oInstruction2,
 
`ifdef DEBUG
input wire [`MAX_CORES-1:0] iDebug_CoreID,
`endif
 
 
//Control Register
input wire[15:0] iControlRegister,
output wire[15:0] oControlRegister
 
 
);
 
wire [`ROM_ADDRESS_WIDTH-1:0] wROMInstructionAddress,wRAMInstructionAddress;
wire [`INSTRUCTION_WIDTH-1:0] wIMEM2_IMUX__DataOut1,wIMEM2_IMUX__DataOut2,
wIROM2_IMUX__DataOut1,wIROM2_IMUX__DataOut2;
wire wFlipSelect;
 
wire wInstructionSelector,wInstructionSelector2;
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD1
(
.Clock(Clock),
.Reset(Reset),
.Enable( 1'b1 ),
.D( iInstructionReadAddress1[`ROM_ADDRESS_WIDTH-1] ),
.Q( wInstructionSelector )
);
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD2
(
.Clock(Clock),
.Reset(Reset),
.Enable( 1'b1 ),
.D( iInstructionReadAddress2[`ROM_ADDRESS_WIDTH-1] ),
.Q( wInstructionSelector2 )
);
 
assign oInstruction1 = (wInstructionSelector == 1) ?
wIMEM2_IMUX__DataOut1 : wIROM2_IMUX__DataOut1;
 
 
assign oInstruction2 = (wInstructionSelector2 == 1) ?
wIMEM2_IMUX__DataOut2 : wIROM2_IMUX__DataOut2;
//-------------------------------------------------------------------
 
wire wDataWriteEnable_RMEM,wDataWriteEnable_SMEM,wDataWriteEnable_XMEM;
wire [`DATA_ROW_WIDTH-1:0] wData_SMEM1,wData_SMEM2;
wire [`DATA_ROW_WIDTH-1:0] wData_RMEM1,wData_RMEM2,wData_IMEM1,wData_IMEM2,wData_XMEM1,wData_XMEM2;
wire [`DATA_ROW_WIDTH-1:0] wIOData_SMEM1,wIOData_SMEM2;//,wData_OMEM1,wData_OMEM2;
 
/*******************************************************
The Data memory is divided into several memory banks.
Each Bank has different characteristics:
 
* IO MEM: Input Registers, Written by IO, Read by EXE.
* SWAP MEM: Swap registers, while IO reads/write values,
EXE reads/write values.
* C1-C7, R1- R12: General purpose registers,
EXE can R/W, IO can not see these sections of the memory
* OREG*: Output registers written by EXE, Read by IO.
 
Whenever an input address is received, this imput address
is divided in a bank selector and offset in the following way:
 
__________________________
| b6 b5 | b4 b3 b2 b1 b0 |
The bits b4 .. b0 are the LSB of the address, this give the
position relative to the bank
 
The bits b6 and b5 give the actual Bank to select.
Please see aDefinitions.v for a description of each
register location.
 
0____________________
| IO MEM |
| |
| | b6b5 = 00
32|__________________|
| SWAP MEM |
| | b6b5 = 01
| |
64|__________________|
| C1 - C7 |
| R1 - R12 | b6b5 = 10
| |
96|__________________|
| CREG* |
| | b6b5 = 11
| |
|__________________|
*******************************************************/
 
 
 
MUXFULLPARALELL_2SEL_GENERIC # ( `DATA_ROW_WIDTH ) MUX1
(
.Sel( iDataReadAddress1_EXE[6:5] ),
.I1( wData_IMEM1 ), //IO MEM
.I2( wData_SMEM1 ), //SWAP MEM
.I3( wData_RMEM1 ), //R*, C*
.I4( wData_XMEM1 ), //CREG*
.O1( oData1_EXE )
);
 
 
MUXFULLPARALELL_2SEL_GENERIC # ( `DATA_ROW_WIDTH ) MUX2
(
.Sel( iDataReadAddress2_EXE[6:5] ),
.I1( wData_IMEM2 ), //IO MEM
.I2( wData_SMEM2 ), //SWAP MEM
.I3( wData_RMEM2 ), //R*, C*
.I4( wData_XMEM2 ), //CREG*
.O1( oData2_EXE )
);
 
assign wDataWriteEnable_SMEM = ( iDataWriteAddress_EXE[6:5] == 2'b01 && iDataWriteEnable_EXE ); //Enable WE for SMEM if bank == 01
assign wDataWriteEnable_RMEM = ( iDataWriteAddress_EXE[6:5] == 2'b10 && iDataWriteEnable_EXE); //Enable WE for RMEM if bank == 10
assign wDataWriteEnable_XMEM = ( iDataWriteAddress_EXE[6:5] == 2'b11 && iDataWriteEnable_EXE); //Enable WE for RMEM if bank == 11
 
 
//Input Registers, Written by IO, Read by EXE
RAM_DUAL_READ_PORT # (`DATA_ROW_WIDTH,5,/*42*/32) IMEM //16 here is enough, I hate small devices!
(
.Clock( Clock ),
.iWriteEnable( iDataWriteEnable_IO ), //Only IO can write into this bank
.iReadAddress0( iDataReadAddress1_EXE[4:0] ), //EXE read address channel 1
.iReadAddress1( iDataReadAddress2_EXE[4:0] ), //EXE read address channel 2
.iWriteAddress( iDataWriteAddress_IO[4:0] ), //Only IO can write into this bank
.iDataIn( iData_IO ),
.oDataOut0( wData_IMEM1 ),
.oDataOut1( wData_IMEM2 )
);
 
//Swap registers, while IO reads/write values, EXE reads/write values
//the pointers get filped in the next iteration
 
SWAP_MEM # (`DATA_ROW_WIDTH,5,32) SMEM
(
.Clock( Clock ),
.iSelect( wFlipSelect ),
.iWriteEnableA( wDataWriteEnable_SMEM ),
.iReadAddressA0( iDataReadAddress1_EXE[4:0] ),
.iReadAddressA1( iDataReadAddress2_EXE[4:0] ),
.iWriteAddressA( iDataWriteAddress_EXE[4:0] ),
.iDataInA( iData_EXE ),
.oDataOutA0( wData_SMEM1 ),
.oDataOutA1( wData_SMEM2 ),
.iWriteEnableB( iDataWriteEnable_IO ),
.iReadAddressB0( iDataReadAddress1_IO ),
.iReadAddressB1( iDataReadAddress2_IO ),
.iWriteAddressB( iDataWriteAddress_IO ),
.iDataInB( iData_IO )
// .oDataOutB0( wIOData_SMEM1 ),
// .oDataOutB1( wIOData_SMEM2 )
);
 
//General purpose registers, EXE can R/W, IO can not see these sections
//of the memory
RAM_DUAL_READ_PORT # (`DATA_ROW_WIDTH,5,32) RMEM //Ok so we have fewer Registers then...
(
.Clock( Clock ),
.iWriteEnable( wDataWriteEnable_RMEM ),
.iReadAddress0( iDataReadAddress1_EXE[4:0] ),
.iReadAddress1( iDataReadAddress2_EXE[4:0] ),
.iWriteAddress( iDataWriteAddress_EXE[4:0] ),
.iDataIn( iData_EXE ),
.oDataOut0( wData_RMEM1 ),
.oDataOut1( wData_RMEM2 )
);
 
RAM_DUAL_READ_PORT # (`DATA_ROW_WIDTH,5,32) XMEM //Ok so we have fewer Registers then...
(
.Clock( Clock ),
.iWriteEnable( wDataWriteEnable_XMEM ),
.iReadAddress0( iDataReadAddress1_EXE[4:0] ),
.iReadAddress1( iDataReadAddress2_EXE[4:0] ),
.iWriteAddress( iDataWriteAddress_EXE[4:0] ),
.iDataIn( iData_EXE ),
.oDataOut0( wData_XMEM1 ),
.oDataOut1( wData_XMEM2 )
);
 
 
UPCOUNTER_POSEDGE # (1) UPC1
(
.Clock(Clock),
.Reset( Reset ),
.Initial(1'b0),
.Enable(iFlipMemory),
.Q(wFlipSelect)
);
 
 
 
//-------------------------------------------------------------------
/*
Instruction memory.
*/
 
// ROM_ADDRESS_WIDTH exceds the array size it may get trimmed...
RAM_DUAL_READ_PORT # (`INSTRUCTION_WIDTH,`ROM_ADDRESS_WIDTH,/*512*/128) INST_MEM //Only 128 instructions :( well this is for the user anyway
(
.Clock( Clock ),
.iWriteEnable( iInstructionWriteEnable ),
.iReadAddress0( {1'b0,iInstructionReadAddress1[`ROM_ADDRESS_WIDTH-2:0]} ),
.iReadAddress1( {1'b0,iInstructionReadAddress2[`ROM_ADDRESS_WIDTH-2:0]} ),
.iWriteAddress( iInstructionWriteAddress ),
.iDataIn( iInstruction ),
.oDataOut0( wIMEM2_IMUX__DataOut1 ),
.oDataOut1( wIMEM2_IMUX__DataOut2 )
);
//-------------------------------------------------------------------
/*
Default code stored in ROM.
*/
wire [`INSTRUCTION_WIDTH-1:0] wRomDelay1,wRomDelay2;
//In real world ROM will take at least 1 clock cycle,
//since ROMs are not syhtethizable, I won't hurt to put
//this delay
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `INSTRUCTION_WIDTH ) FFDA
(
.Clock(Clock),
.Reset(Reset),
.Enable(1'b1),
.D(wRomDelay1),
.Q(wIROM2_IMUX__DataOut1 )
);
 
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `INSTRUCTION_WIDTH ) FFDB
(
.Clock(Clock),
.Reset(Reset),
.Enable(1'b1),
.D(wRomDelay2),
.Q(wIROM2_IMUX__DataOut2 )
);
 
//The reason I put two ROMs is because I need to read 2 different Instruction
//addresses at the same time (branch-taken and branch-not-taken) and not sure
//how to write dual read channel ROM this way...
 
ROM IROM
(
.Address( {1'b0,iInstructionReadAddress1[`ROM_ADDRESS_WIDTH-2:0]} ),
`ifdef DEBUG
.iDebug_CoreID(iDebug_CoreID),
`endif
.I( wRomDelay1 )
);
 
ROM IROM2
(
.Address( {1'b0,iInstructionReadAddress2[`ROM_ADDRESS_WIDTH-2:0]} ),
`ifdef DEBUG
.iDebug_CoreID(iDebug_CoreID),
`endif
.I( wRomDelay2 )
);
//--------------------------------------------------------
ControlRegister CR
(
.Clock( Clock ),
.Reset( Reset ),
.iControlRegister( iControlRegister ),
.oControlRegister( oControlRegister )
);
 
 
endmodule
//-------------------------------------------------------------------
/rtl/Module_ExecutionFSM.v
0,0 → 1,539
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
`define EXEU_AFTER_RESET 0
`define EXEU_INITIAL_STATE 1
`define EXEU_WAIT_FOR_DECODE 2
`define EXEU_FETCH_DECODED_INST 3
`define EXEU_WAIT_FOR_ALU_EXECUTION 4
`define EXEU_WRITE_BACK_TO_RAM 5
`define EXEU_HANDLE_JUMP 7
 
 
 
module ExecutionFSM
(
input wire Clock,
input wire Reset,
 
input wire iDecodeDone,
input wire[`INSTRUCTION_OP_LENGTH-1:0] iOperation,
input wire[`DATA_ROW_WIDTH-1:0] iSource0,iSource1,
input wire[`DATA_ADDRESS_WIDTH-1:0] iDestination,
inout wire[`DATA_ROW_WIDTH-1:0] RAMBus,
//output reg ReadyForNextInstruction,
output wire oJumpFlag ,
output wire [`ROM_ADDRESS_WIDTH-1:0] oJumpIp ,
output wire oRAMWriteEnable ,
output wire [`DATA_ADDRESS_WIDTH-1:0] oRAMWriteAddress ,
output wire oExeLatchedValues,
output reg oBusy ,
 
//ALU ports and control signals
output wire [`INSTRUCTION_OP_LENGTH-1:0] oALUOperation,
output wire [`WIDTH-1:0] oALUChannelX1,
output wire [`WIDTH-1:0] oALUChannelY1,
output wire [`WIDTH-1:0] oALUChannelZ1,
output wire [`WIDTH-1:0] oALUChannelX2,
output wire [`WIDTH-1:0] oALUChannelY2,
output wire [`WIDTH-1:0] oALUChannelZ2,
output wire oTriggerALU,
 
input wire [`WIDTH-1:0] iALUResultX,
input wire [`WIDTH-1:0] iALUResultY,
input wire [`WIDTH-1:0] iALUResultZ,
input wire iALUOutputReady,
input wire iBranchTaken,
input wire iBranchNotTaken,
 
 
`ifdef DEBUG
input wire[`ROM_ADDRESS_WIDTH-1:0] iDebug_CurrentIP,
input wire [`MAX_CORES-1:0] iDebug_CoreID,
`endif
//Data forward Signals
output wire [`DATA_ADDRESS_WIDTH-1:0] oLastDestination
 
 
);
 
wire wLatchNow;
reg rInputLatchesEnabled;
 
//If ALU says jump, just pass along
assign oJumpFlag = iBranchTaken;
//JumpIP is the instruction destination (= oRAMWriteAddress)
assign oJumpIp = oRAMWriteAddress;
 
assign wLatchNow = iDecodeDone & rInputLatchesEnabled;
assign oExeLatchedValues = wLatchNow;
assign oTriggerALU = wLatchNow;
 
wire wOperationIsJump;
assign wOperationIsJump = iBranchTaken || iBranchNotTaken;
 
//Don't allow me to write back back if the operation is a NOP
`ifdef DEBUG
assign oRAMWriteEnable = iALUOutputReady && !wOperationIsJump &&
(oALUOperation != `NOP) && oALUOperation != `DEBUG_PRINT;
`else
assign oRAMWriteEnable = iALUOutputReady && !wOperationIsJump && oALUOperation != `NOP;
`endif
 
 
assign RAMBus = ( oRAMWriteEnable ) ? {iALUResultX,iALUResultY,iALUResultZ} : `DATA_ROW_WIDTH'bz;
 
assign oALUChannelX1 = iSource1[95:64];
assign oALUChannelY1 = iSource1[63:32];
assign oALUChannelZ1 = iSource1[31:0];
 
assign oALUChannelX2 = iSource0[95:64];
assign oALUChannelY2 = iSource0[63:32];
assign oALUChannelZ2 = iSource0[31:0];
/*
FF32_POSEDGE_SYNCRONOUS_RESET SourceX1
(
.Clock( wLatchNow ),
.Clear( Reset ),
.D( iSource1[95:64] ),
.Q( oALUChannelX1 )
);
 
FF32_POSEDGE_SYNCRONOUS_RESET SourceY1
(
.Clock( wLatchNow ),
.Clear( Reset ),
.D( iSource1[63:32] ),
.Q( oALUChannelY1 )
);
 
FF32_POSEDGE_SYNCRONOUS_RESET SourceZ1
(
.Clock( wLatchNow ),
.Clear( Reset ),
.D( iSource1[31:0] ),
.Q( oALUChannelZ1 )
);
*/
/*
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceX1
(
.Clock( Clock ),//wLatchNow ),
.Reset( Reset),
.Enable( wLatchNow ),//1'b1 ),
.D( iSource1[95:64] ),
.Q(oALUChannelX1)
);
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceY1
(
.Clock( Clock ),//wLatchNow ),
.Reset( Reset),
.Enable( wLatchNow ),//1'b1 ),
.D( iSource1[63:32] ),
.Q(oALUChannelY1)
);
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceZ1
(
.Clock( Clock ),//wLatchNow ),
.Reset( Reset),
.Enable( wLatchNow ),//1'b1 ),
.D( iSource1[31:0] ),
.Q(oALUChannelZ1)
);
*/
/*
FF32_POSEDGE_SYNCRONOUS_RESET SourceX2
(
.Clock( wLatchNow ),
.Clear( Reset ),
.D( iSource0[95:64] ),
.Q( oALUChannelX2 )
);
 
FF32_POSEDGE_SYNCRONOUS_RESET SourceY2
(
.Clock( wLatchNow ),
.Clear( Reset ),
.D( iSource0[63:32] ),
.Q( oALUChannelY2 )
);
 
FF32_POSEDGE_SYNCRONOUS_RESET SourceZ2
(
.Clock( wLatchNow ),
.Clear( Reset ),
.D( iSource0[31:0] ),
.Q( oALUChannelZ2 )
);
*/
/*
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceX2
(
.Clock( Clock ),//wLatchNow ),
.Reset( Reset),
.Enable( wLatchNow ),//1'b1 ),
.D( iSource0[95:64] ),
.Q(oALUChannelX2)
);
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceY2
(
.Clock( Clock ),//wLatchNow ),
.Reset( Reset),
.Enable( wLatchNow ),//1'b1 ),
.D( iSource0[63:32] ),
.Q(oALUChannelY2)
);
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceZ2
(
.Clock( Clock ),//wLatchNow ),
.Reset( Reset),
.Enable( wLatchNow ),//1'b1 ),
.D( iSource0[31:0] ),
.Q(oALUChannelZ2)
);
*/
//Finally one more latch to store
//the iOperation and the destination
 
 
assign oALUOperation = iOperation;
//assign oRAMWriteAddress = iDestination;
/*
FF_OPCODE_POSEDGE_SYNCRONOUS_RESET FFOperation
(
.Clock( wLatchNow ),
.Clear( Reset ),
.D( iOperation ),
.Q( oALUOperation )
);
 
 
FF16_POSEDGE_SYNCRONOUS_RESET PSRegDestination
(
.Clock( wLatchNow ),
.Clear( Reset ),
.D( iDestination ),
.Q( oRAMWriteAddress )
);
*/
/*
FFD_POSEDGE_SYNCRONOUS_RESET # ( `INSTRUCTION_OP_LENGTH ) FFOperation
(
.Clock( Clock ),//wLatchNow ),
.Reset( Reset),
.Enable( wLatchNow ),//1'b1 ),
.D( iOperation ),
.Q(oALUOperation)
);
*/
FFD_POSEDGE_SYNCRONOUS_RESET # ( `DATA_ADDRESS_WIDTH ) PSRegDestination
(
.Clock( Clock ),//wLatchNow ),
.Reset( Reset),
.Enable( wLatchNow ),//1'b1 ),
.D( iDestination ),
.Q(oRAMWriteAddress)
);
 
//Data forwarding
assign oLastDestination = oRAMWriteAddress;
 
reg [7:0] CurrentState;
reg [7:0] NextState;
 
 
//------------------------------------------------
always @(posedge Clock or posedge Reset)
begin
if (Reset)
CurrentState <= `EXEU_AFTER_RESET;
else
CurrentState <= NextState;
end
//------------------------------------------------
 
 
always @( * )
begin
case (CurrentState)
//------------------------------------------
`EXEU_AFTER_RESET:
begin
//ReadyForNextInstruction <= 1;
oBusy <= 0;
rInputLatchesEnabled <= 1;
NextState <= `EXEU_WAIT_FOR_DECODE;
end
//------------------------------------------
/**
At the same time iDecodeDone goes to 1, our Flops
will store the value, so next clock cycle we can
tell IDU to go ahead and decode the next instruction
in the pipeline.
*/
`EXEU_WAIT_FOR_DECODE:
begin
 
//ReadyForNextInstruction <= 1;
oBusy <= 0;
rInputLatchesEnabled <= 1;
if ( iDecodeDone ) //This same thing triggers the ALU
NextState <= `EXEU_WAIT_FOR_ALU_EXECUTION;
else
NextState <= `EXEU_WAIT_FOR_DECODE;
end
//------------------------------------------
/*
If the instruction is aritmetic then pass the parameters
the ALU, else if it store iOperation then...
*/
`EXEU_WAIT_FOR_ALU_EXECUTION:
begin
 
//ReadyForNextInstruction <= 0; //*
oBusy <= 1;
rInputLatchesEnabled <= 0; //NO INTERRUPTIONS WHILE WE WAIT!!
 
if ( iALUOutputReady ) /////This same thing enables writing th results to RAM
NextState <= `EXEU_WAIT_FOR_DECODE;
else
NextState <= `EXEU_WAIT_FOR_ALU_EXECUTION;
end
//------------------------------------------
`EXEU_WRITE_BACK_TO_RAM:
begin
//ReadyForNextInstruction <= 0;
oBusy <= 1;
rInputLatchesEnabled <= 1;
if ( iDecodeDone )
NextState <= `EXEU_WAIT_FOR_ALU_EXECUTION;
else
NextState <= `EXEU_WAIT_FOR_DECODE;
end
//------------------------------------------
default:
begin
//ReadyForNextInstruction <= 1;
oBusy <= 0;
rInputLatchesEnabled <= 1;
 
NextState <= `EXEU_AFTER_RESET;
end
//------------------------------------------
endcase
end
 
//-----------------------------------------------------------------------
`ifdef DUMP_CODE
integer ucode_file;
integer reg_log;
initial
begin
 
$display("Opening ucode dump file....\n");
ucode_file = $fopen("Code.log","w");
$fwrite(ucode_file,"\n\n************ Theia UCODE DUMP *******\n\n\n\n");
$display("Opening Register lof file...\n");
reg_log = $fopen("Registers.log","w");
end
 
`endif //Ucode dump
 
//-----------------------------------------------------------------------
`ifdef DEBUG
wire [`WIDTH-1:0] wALUChannelX1,wALUChannelY1,wALUChannelZ1;
wire [`WIDTH-1:0] wALUChannelX2,wALUChannelY2,wALUChannelZ2;
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceX1
(
.Clock( Clock ),
.Reset( Reset),
.Enable( wLatchNow ),
.D( iSource1[95:64] ),
.Q(wALUChannelX1)
);
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceY1
(
.Clock( Clock ),
.Reset( Reset),
.Enable( wLatchNow ),
.D( iSource1[63:32] ),
.Q(wALUChannelY1)
);
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceZ1
(
.Clock( Clock ),
.Reset( Reset),
.Enable( wLatchNow ),
.D( iSource1[31:0] ),
.Q(wALUChannelZ1)
);
 
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceX2
(
.Clock( Clock ),
.Reset( Reset),
.Enable( wLatchNow ),
.D( iSource0[95:64] ),
.Q(wALUChannelX2)
);
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceY2
(
.Clock( Clock ),
.Reset( Reset),
.Enable( wLatchNow ),
.D( iSource0[63:32] ),
.Q(wALUChannelY2)
);
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceZ2
(
.Clock( Clock ),
.Reset( Reset),
.Enable( wLatchNow ),
.D( iSource0[31:0] ),
.Q(wALUChannelZ2)
);
 
 
always @ (posedge iDecodeDone && iDebug_CoreID == `DEBUG_CORE)
begin
`LOGME"[CORE %d] IP:%d", iDebug_CoreID,iDebug_CurrentIP);
end
 
always @ (negedge Clock && iDebug_CoreID == `DEBUG_CORE)
begin
if ( iALUOutputReady )
begin
if (iBranchTaken)
`LOGME"<BT>");
if (iBranchNotTaken )
`LOGME"<BNT>");
if (oRAMWriteEnable)
`LOGME"<WE>");
`LOGME "(%dns ",$time);
case ( oALUOperation )
`RETURN: `LOGME"RETURN");
`ADD: `LOGME"ADD");
`SUB: `LOGME"SUB");
`DIV: `LOGME"DIV");
`MUL: `LOGME"MUL");
`MAG: `LOGME"MAG");
`JGX: `LOGME"JGX");
`JLX: `LOGME"JLX");
`JGEX: `LOGME"JGEX");
`JGEY: `LOGME"JGEY");
`JGEZ: `LOGME"JGEZ");
`JLEX: `LOGME"JLEX");
`JLEY: `LOGME"JLEY");
`JLEZ: `LOGME"JLEZ");
`JMP: `LOGME"JMP");
`ZERO: `LOGME"ZERO");
`JNEX: `LOGME"JNEX");
`JNEY: `LOGME"JNEY");
`JNEZ: `LOGME"JNEZ");
`JEQX: `LOGME"JEQX");
`JEQY: `LOGME"JEQY");
`JEQZ: `LOGME"JEQZ");
`CROSS: `LOGME"CROSS");
`DOT: `LOGME"DOT");
`SETX: `LOGME"SETX");
`SETY: `LOGME"SETY");
`SETZ: `LOGME"SETZ");
`NOP: `LOGME"NOP");
`COPY: `LOGME"COPY");
`INC: `LOGME"INC");
`DEC: `LOGME"DEC");
`MOD: `LOGME"MOD");
`FRAC: `LOGME"FRAC");
`NEG: `LOGME"NEG");
`SWIZZLE3D: `LOGME"SWIZZLE3D");
`MULP: `LOGME"MULP");
`XCHANGEX: `LOGME"XCHANGEX");
`IMUL: `LOGME"IMUL");
`UNSCALE: `LOGME"UNSCALE");
`INCX: `LOGME"INCX");
`INCY: `LOGME"INCY");
`INCZ: `LOGME"INCZ");
`OMWRITE: `LOGME"OMWRITE");
`TMREAD: `LOGME"TMREAD");
`LEA: `LOGME"LEA");
`CALL: `LOGME"CALL");
`RET: `LOGME"RET");
`DEBUG_PRINT:
begin
`LOGME"DEBUG_PRINT");
end
default:
begin
`LOGME"**********ERROR UNKNOWN OP*********");
$display("%dns EXE: Error Unknown Instruction : %d", $time,oALUOperation);
// $stop();
end
endcase
`LOGME"\t %h [ %h %h %h ][ %h %h %h ] = ",
oRAMWriteAddress,
wALUChannelX1,wALUChannelY1,wALUChannelZ1,
wALUChannelX2,wALUChannelY2,wALUChannelZ2
);
if (oALUOperation == `RETURN)
`LOGME"\n\n\n");
end
end //always
always @ ( negedge Clock && iDebug_CoreID == `DEBUG_CORE )
begin
if ( iALUOutputReady )
`LOGME" [ %h %h %h ])\n",iALUResultX,iALUResultY,iALUResultZ);
end //always
`endif
 
endmodule
/rtl/Module_FixedPointAddtionSubstraction.v
0,0 → 1,67
`timescale 1ns / 1ps
`include "aDefinitions.v"
 
 
//-----------------------------------------------------------
module INCREMENT # ( parameter SIZE=`WIDTH )
(
input wire Clock,
input wire Reset,
input wire[SIZE-1:0] A,
output reg [SIZE-1:0] R
);
always @ (posedge Clock)
begin
R = A + 1;
end
 
 
endmodule
//-----------------------------------------------------------
module FixedAddSub
(
input wire Clock,
input wire Reset,
input wire[`LONG_WIDTH-1:0] A,
input wire[`LONG_WIDTH-1:0] B,
output reg[`LONG_WIDTH-1:0] R,
input wire iOperation,
input wire iInputReady, //Is the input data valid?
output wire OutputReady //Our output data is ready!
);
 
reg MyOutputReady = 0;
 
wire [`LONG_WIDTH-1:0] wB;
 
assign wB = ( iOperation ) ? ~B + 1'b1 : B;
//Output ready just take 1 cycle
//assign OutputReady = iInputReady;
 
FFD_POSEDGE_ASYNC_RESET #(1) FFOutputReadyDelay2
(
.Clock( Clock ),
.Clear( Reset ),
.D( iInputReady ),
.Q( OutputReady )
);
//-------------------------------
always @ (posedge Clock)
begin
 
if (iInputReady == 1)
begin
R = ( A + wB );
end
else
begin
R = 64'hFFFFFFFF;
 
end
 
end // always
 
endmodule
/rtl/Module_Host.v
0,0 → 1,726
`timescale 1ns / 1ps
`include "aDefinitions.v"
 
 
 
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
 
/*******************************************************************************
Module Description:
 
WIP
 
*******************************************************************************/
 
 
 
`define MAX_VERTEX_IN_FRAME `WIDTH'd7 // WAS 8'd6
`define TAG_INSTRUCTION_ADDRESS_TYPE 2'b01
`define TAG_DATA_ADDRESS_TYPE 2'b10
`define SELECT_INST_MEM 3'b00
`define SELECT_SCENE_MEM 3'b01
`define SELECT_GEO_MEM 3'b10
 
 
`define HOST_IDLE 0
`define HOST_WRITE_INSTRUCTION 1
`define HOST_WAIT_INSTRUCTION 2
`define HOST_WRITE_SCENE_PARAMS 3
`define HOST_WAIT_SCENE_PARAMS 4
`define HOST_PREPARE_CORE_CONFIG 5
`define HOST_UNICAST_CORE_CONFIG 6
`define HOST_WAIT_CORE_CONFIG 7
`define HOST_PREPARE_NEXT_CORE_CONFIG 8
`define HOST_WAIT_DATA_READ_CONFIRMATION 10
`define HOST_BROADCAST_NEXT_VERTEX 11
`define HOST_WAIT_FOR_VERTEX 12
`define HOST_INITIAL_SCENE_PARAMS_STAGE 13
`define HOST_PREPARE_FOR_GEO_REQUESTS 14
`define HOST_ACK_GEO_REQUEST 15
`define HOST_GET_PRIMITIVE_COUNT 16
`define HOST_LAST_PRIMITIVE_REACHED 17
`define HOST_GPU_EXECUTION_DONE 18
 
//---------------------------------------------------------------
module Module_Host
(
input wire Clock,
input wire Reset,
input wire iEnable,
input wire iHostDataReadConfirmed,
input wire [`WB_WIDTH-1:0] iMemorySize,
input wire [`WB_WIDTH-1:0] iPrimitiveCount,
//To Memory
output wire [`WB_WIDTH-1:0] oReadAddress,
input wire [`WB_WIDTH-1:0] iReadData,
input wire iGPUCommitedResults,
//To Hub/Switch
output wire [`MAX_CORES-1:0] oCoreSelectMask,
output reg [2:0] oMemSelect,
output wire [`WB_WIDTH-1:0] DAT_O,
output wire [`WB_WIDTH-1:0] ADR_O,
output reg[1:0] TGA_O,
output reg[`MAX_CORES-1:0] RENDREN_O,
output wire CYC_O,
output wire STB_O,
output reg MST_O,
output wire WE_O,
input wire GRDY_I, //This means all the cores are done rading the primitive we send
output reg GACK_O, //We set this to ACK that the cored read the primitive
output wire STDONE_O,
output reg oHostDataAvailable,
input wire iGPUDone,
`ifndef NO_DISPLAY_STATS
input wire [`WIDTH-1:0] iDebugWidth,
`endif
input wire ACK_I
);
//---------------------------------------------------------------
wire wLastPrimitive;
assign wLastPrimitive = (wVertexCount >= iPrimitiveCount) ? 1'b1 : 1'b0;
assign STDONE_O = wLastPrimitive;
 
wire wWBMDone;
reg rWBMEnable,rWBMReset,rCoreBroadCast;
reg [`WB_WIDTH-1:0] rInitiaReadAddr;
wire [`MAX_CORES-1:0] wCoreSelect;
wire wLastValidReadAddress;
wire [`WB_WIDTH-1:0] wWriteAddress;
wire [`WIDTH-1:0] wVertexCount;
reg [`WB_WIDTH-1:0] rInitialWriteAddress;
reg rSetWriteAddr;
reg rIncCoreSelect,rResetVertexCount;
//--------------------------------------------------------
 
assign WE_O = MST_O;
 
assign oCoreSelectMask =
(rCoreBroadCast) ? `SELECT_ALL_CORES : wCoreSelect;
assign wLastValidReadAddress =
(oReadAddress >= iMemorySize) ? 1'b1 : 1'b0;
 
wire wLastParameter;
assign wLastParameter = (oReadAddress >= 32'h12) ? 1'b1 : 1'b0;
//--------------------------------------------------------
UPCOUNTER_POSEDGE # (`WB_WIDTH ) UPWADDR
(
.Clock( Clock ),
.Reset( Reset | rSetWriteAddr ),
.Enable( iEnable & wWBMDone ),
.Initial( rInitialWriteAddress ),
.Q( wWriteAddress )
);
 
 
UPCOUNTER_POSEDGE # ( 32 ) PRIMCOUNT
(
.Clock( Clock ),
.Reset( Reset | rResetVertexCount ),
.Enable( iEnable & wWBMDone ),
.Initial( `WIDTH'b1 ),
.Q( wVertexCount )
);
//--------------------------------------------------------
CIRCULAR_SHIFTLEFT_POSEDGE_EX # (`MAX_CORES ) SHF1
(
.Clock( Clock ),
.Reset( Reset ),
.Initial( `MAX_CORES'b1 ),
.Enable( rIncCoreSelect ),
.O( wCoreSelect )
);
//--------------------------------------------------------
wire wShortCycle;
//For instruction we send 2 packets per cycle
//for the other we send 3 packets per cycle
assign wShortCycle = (oMemSelect == `SELECT_INST_MEM) ? 1'b1 : 1'b0;
 
WBMaster WBM
(
.Clock( Clock ),
.Reset( Reset | rWBMReset ),
.iEnable( rWBMEnable ),
.iInitialReadAddr( rInitiaReadAddr ),
.iWriteAddr( wWriteAddress ),
.oReadAddress( oReadAddress ),
.iReadData( iReadData ),
.iShortFlow( wShortCycle ),
.STB_O( STB_O ),
.ACK_I( ACK_I ),
.CYC_O( CYC_O ),
.DAT_O( DAT_O ),
.ADR_O( ADR_O ),
.oDone( wWBMDone )
);
 
//--------------------------------------------------------
// Current State Logic //
reg [7:0] rHostCurrentState,rHostNextState;
always @(posedge Clock or posedge Reset)
begin
if( Reset!=1 )
rHostCurrentState <= rHostNextState;
else
rHostCurrentState <= `HOST_IDLE;
end
//--------------------------------------------------------
 
reg [63:0] i;
reg [63:0] RenderedPixels;
wire wLastVertexInFrame;
assign wLastVertexInFrame =
(wVertexCount % `MAX_VERTEX_IN_FRAME == 1'b0 ) ? 1'b1 : 1'b0;
 
// WAS ((wVertexCount % `MAX_VERTEX_IN_FRAME) == 1'b0 && wVertexCount != 0) ? 1'b1 : 1'b0;
 
reg [31:0] StartTime;
 
// Host Finite State Machine //
always @( * )
begin
case (rHostCurrentState)
//----------------------------------------
//Wait for reset sequence to complete,
//Or until we are enabled
`HOST_IDLE:
begin
RenderedPixels = 0;
rWBMEnable = 0;
rInitiaReadAddr = 1; //Start reading from 1, because 0 is the size
rWBMReset = 0;
oMemSelect = 0;
TGA_O = 0;
MST_O = 0;
rInitialWriteAddress = 0;
rSetWriteAddr = 0;
rCoreBroadCast = 0;
rIncCoreSelect = 0;
RENDREN_O = 0;
rResetVertexCount = 0;
GACK_O = 0;
//STDONE_O = 0;
oHostDataAvailable = 0;
if ( ~Reset & iEnable )
begin
$display("-I- HOST: Broadcasting User code to all Cores\n"); $fflush;
rHostNextState = `HOST_WRITE_INSTRUCTION;
end
else
rHostNextState = `HOST_IDLE;
end
//----------------------------------------
//Broadcast the instructions to all the cores
`HOST_WRITE_INSTRUCTION:
begin
StartTime = $time;
rWBMEnable = 1; //Enable Wish bone master
rInitiaReadAddr = 1; //Start reading from 1, because 0 is the size
rWBMReset = 0; //No need to reset since we just came from reset
oMemSelect = `SELECT_INST_MEM; //Start by sending the instructions
TGA_O = `TAG_INSTRUCTION_ADDRESS_TYPE;
MST_O = 1;
rInitialWriteAddress = 0;
rSetWriteAddr = 0;
rCoreBroadCast = 1;
rIncCoreSelect = 0;
RENDREN_O = 0;
rResetVertexCount = 0;
GACK_O = 0;
//STDONE_O = 0;
oHostDataAvailable = 0;
rHostNextState = `HOST_WAIT_INSTRUCTION;
end
//----------------------------------------
`HOST_WAIT_INSTRUCTION:
begin
rWBMEnable = ~wWBMDone;
rInitiaReadAddr = 0;
rWBMReset = 0;
oMemSelect = `SELECT_INST_MEM;
TGA_O = `TAG_INSTRUCTION_ADDRESS_TYPE;
MST_O = 1;
rInitialWriteAddress = 0;
rSetWriteAddr = 0;
rCoreBroadCast = 1;
rIncCoreSelect = 0;
RENDREN_O = 0;
rResetVertexCount = 0;
GACK_O = 0;
//STDONE_O = 0;
oHostDataAvailable = 0;
if ( wWBMDone && ~wLastValidReadAddress )
rHostNextState = `HOST_WRITE_INSTRUCTION;
else if (wWBMDone && wLastValidReadAddress )
rHostNextState = `HOST_INITIAL_SCENE_PARAMS_STAGE;
else
rHostNextState = `HOST_WAIT_INSTRUCTION;
end
//----------------------------------------
/*
Make sure to read-pointer points to the
first memory address at te params memory
*/
`HOST_INITIAL_SCENE_PARAMS_STAGE:
begin
rWBMEnable = 0;
rInitiaReadAddr = 1; //Start reading from 1, because 0 is the size
rWBMReset = 1;
oMemSelect = `SELECT_SCENE_MEM; //We are reading from the scene memory
TGA_O = `TAG_DATA_ADDRESS_TYPE; //We will write to the DATA section of the core MEM
MST_O = 1; //Keep master signal in 1 for now
rInitialWriteAddress = 0; //We start writing from address zero now
rSetWriteAddr = 1;
rCoreBroadCast = 1; //Set to zero to unicast, starting from core 0
rIncCoreSelect = 0; //Set to unicast to the next core
RENDREN_O = 0;
rResetVertexCount = 0;
GACK_O = 0;
//STDONE_O = 0;
oHostDataAvailable = 0;
$display("-I- HOST: Configuring Core Mask %b\n",oCoreSelectMask); $fflush;
rHostNextState = `HOST_WRITE_SCENE_PARAMS;
end
//----------------------------------------
//Broadcast the instructions to all the cores
`HOST_WRITE_SCENE_PARAMS:
begin
rWBMEnable = 1;
rInitiaReadAddr = 0;
rWBMReset = 0;
oMemSelect = `SELECT_SCENE_MEM;
TGA_O = `TAG_DATA_ADDRESS_TYPE;
MST_O = 1;
rInitialWriteAddress = 0;
rSetWriteAddr = 0;
rCoreBroadCast = 1;
rIncCoreSelect = 0;
RENDREN_O = 0;
rResetVertexCount = 0;
GACK_O = 0;
//STDONE_O = 0;
oHostDataAvailable = 0;
rHostNextState = `HOST_WAIT_SCENE_PARAMS;
end
//----------------------------------------
`HOST_WAIT_SCENE_PARAMS:
begin
rWBMEnable = ~wWBMDone;
rInitiaReadAddr = 0;
rWBMReset = 0;
oMemSelect = `SELECT_SCENE_MEM;
TGA_O = `TAG_DATA_ADDRESS_TYPE;
MST_O = 1;
rInitialWriteAddress = 0;
rSetWriteAddr = 0;
rCoreBroadCast = 1;
rIncCoreSelect = 0;
RENDREN_O = 0;
rResetVertexCount = 0;
GACK_O = 0;
//STDONE_O = 0;
oHostDataAvailable = 0;
if ( wWBMDone && ~wLastParameter )
rHostNextState = `HOST_WRITE_SCENE_PARAMS;
else if (wWBMDone && wLastParameter )
rHostNextState = `HOST_PREPARE_CORE_CONFIG;
else
rHostNextState = `HOST_WAIT_SCENE_PARAMS;
end
//----------------------------------------
/*
This state set the read Write Address pointer to
CREG_PIXEL_2D_INITIAL_POSITION memory position,
also selects the scene MEM from the external MEM
MUX.
*/
`HOST_PREPARE_CORE_CONFIG:
begin
rWBMEnable = 0;
rInitiaReadAddr = 0;
rWBMReset = 0;
oMemSelect = `SELECT_SCENE_MEM; //We are reading from the scene memory
TGA_O = `TAG_DATA_ADDRESS_TYPE; //We will write to the DATA section of the core MEM
MST_O = 1; //Keep master signal in 1 for now
rInitialWriteAddress = `CREG_PIXEL_2D_INITIAL_POSITION; //The address from which to start wrting @ the cores
rSetWriteAddr = 1; //Set to use the initial write address bellow
rCoreBroadCast = 0; //Set to zero to unicast, starting from core 0
rIncCoreSelect = 0; //Set to unicast to the next core
RENDREN_O = 0;
rResetVertexCount = 0;
GACK_O = 0;
//STDONE_O = 0;
oHostDataAvailable = 0;
rHostNextState = `HOST_UNICAST_CORE_CONFIG;
end
//----------------------------------------
`HOST_UNICAST_CORE_CONFIG:
begin
rWBMEnable = 1;
rInitiaReadAddr = 0;
rWBMReset = 0;
oMemSelect = `SELECT_SCENE_MEM;
TGA_O = `TAG_DATA_ADDRESS_TYPE;
MST_O = 1;
rInitialWriteAddress = 0;
rSetWriteAddr = 0;
rCoreBroadCast = 0;
rIncCoreSelect = 0;
RENDREN_O = 0;
rResetVertexCount = 0;
GACK_O = 0;
//STDONE_O = 0;
oHostDataAvailable = 0;
rHostNextState = `HOST_WAIT_CORE_CONFIG;
end
//----------------------------------------
`HOST_WAIT_CORE_CONFIG:
begin
rWBMEnable = ~wWBMDone;
rInitiaReadAddr = 0;
rWBMReset = 0;
oMemSelect = `SELECT_SCENE_MEM;
TGA_O = `TAG_DATA_ADDRESS_TYPE;
MST_O = 1;
rInitialWriteAddress = 0;
rSetWriteAddr = 0;
rCoreBroadCast = 0;
rIncCoreSelect = 0;
RENDREN_O = 0;
rResetVertexCount = 0;
GACK_O = 0;
//STDONE_O = 0;
oHostDataAvailable = 0;
$display("%dns wWBMDone = %d (oReadAddress mod 2) = %d",$time,wWBMDone,(oReadAddress % 2));
if (wWBMDone && !(oReadAddress % 2))
rHostNextState = `HOST_UNICAST_CORE_CONFIG;
else if (wWBMDone && (oReadAddress % 2) )
rHostNextState = `HOST_PREPARE_NEXT_CORE_CONFIG;
else
rHostNextState = `HOST_WAIT_CORE_CONFIG;
end
//----------------------------------------
/*
Reset the WBM to tell it to start reading
from address 0 at the Geometry memory.
*/
`HOST_PREPARE_NEXT_CORE_CONFIG:
begin
rWBMEnable = 0;
rInitiaReadAddr = 0;
rWBMReset = 0;
oMemSelect = `SELECT_GEO_MEM;
TGA_O = `TAG_DATA_ADDRESS_TYPE;
MST_O = 0; //The master signal goes to zero until request
rInitialWriteAddress = `CREG_PIXEL_2D_INITIAL_POSITION; //Write starting from this location on the cores
rSetWriteAddr = 1; //Set to use the initial write address bellow
rCoreBroadCast = 0;
rIncCoreSelect = 1; //Moving to configure the next core now
RENDREN_O = 0;
rResetVertexCount = 0;
GACK_O = 0;
//STDONE_O = 0;
oHostDataAvailable = 0;
if (wCoreSelect[`MAX_CORES-1] == 1)
rHostNextState = `HOST_PREPARE_FOR_GEO_REQUESTS;
else
rHostNextState = `HOST_UNICAST_CORE_CONFIG;
end
//----------------------------------------
/*
Prepare the write address for the next primitive.
*/
`HOST_PREPARE_FOR_GEO_REQUESTS:
begin
rWBMEnable = 0; //Do not enable until we are resquested
rInitiaReadAddr = 32'hA; //Start reading from addr 0 @ GEO MEM
rWBMReset = 1; //Tell WBM to start reading from the addr bellow
oMemSelect = `SELECT_GEO_MEM; //Use external GEO mem for reading
TGA_O = `TAG_DATA_ADDRESS_TYPE; //We write to the data MEM @ the cores
MST_O = 0; //The master signal goes to zero until request
rInitialWriteAddress = `CREG_V0; //Write starting from this location on the cores
rSetWriteAddr = 1; //Set to use the initial write address bellow
rCoreBroadCast = 1; //From now on we only broadcast
rIncCoreSelect = 0; //Ignored during broadcasts
RENDREN_O = 0;
rResetVertexCount = 1;
GACK_O = 0;
//STDONE_O = 0;
oHostDataAvailable = 0;
if (iGPUDone)
rHostNextState = `HOST_GPU_EXECUTION_DONE;
else
rHostNextState = `HOST_BROADCAST_NEXT_VERTEX;
end
//----------------------------------------
`HOST_ACK_GEO_REQUEST:
begin
rWBMEnable = 0; //Do not enable until we are resquested
rInitiaReadAddr = 0; //Ignored
rWBMReset = 0; //Ignored
oMemSelect = `SELECT_GEO_MEM; //Use external GEO mem for reading
TGA_O = `TAG_DATA_ADDRESS_TYPE; //We write to the data MEM @ the cores
MST_O = 0; //The master signal goes to zero until request
rInitialWriteAddress = `CREG_V0; //Write starting from this location on the cores
rSetWriteAddr = 1; //Set to use the initial write address bellow
rCoreBroadCast = 1; //From now on we only broadcast
rIncCoreSelect = 0; //Ignored during broadcasts
RENDREN_O = 0;
rResetVertexCount = 0;
GACK_O = 1;
//STDONE_O = 0;
oHostDataAvailable = 0;
rHostNextState = `HOST_BROADCAST_NEXT_VERTEX;
end
//----------------------------------------
/*
Send the next primitive to the HUB/SWITCH unit
so that it gets broadcasted to all the cores
*/
`HOST_BROADCAST_NEXT_VERTEX:
begin
rWBMEnable = 1; //Start the Transmition
rInitiaReadAddr = 0;
rWBMReset = 0;
oMemSelect = `SELECT_GEO_MEM;
TGA_O = `TAG_DATA_ADDRESS_TYPE;
MST_O = 1; //Start the Transmition
rInitialWriteAddress = 0;
rSetWriteAddr = 0;
rCoreBroadCast = 1;
rIncCoreSelect = 0;
RENDREN_O = `SELECT_ALL_CORES;
rResetVertexCount = 0;
GACK_O = 0;
//STDONE_O = 0;
oHostDataAvailable = 0;
rHostNextState = `HOST_WAIT_FOR_VERTEX;
end
//----------------------------------------
`HOST_WAIT_FOR_VERTEX:
begin
rWBMEnable = ~wWBMDone; //Disable WBM when it is donw
rInitiaReadAddr = 0;
rWBMReset = 0;
oMemSelect = `SELECT_GEO_MEM;
TGA_O = `TAG_DATA_ADDRESS_TYPE;
MST_O = 1; //Start the Transmition
rInitialWriteAddress = 0;
rSetWriteAddr = 0;
rCoreBroadCast = 1;
rIncCoreSelect = 0;
RENDREN_O = `SELECT_ALL_CORES;
rResetVertexCount = 0;
GACK_O = 0;
//STDONE_O = 0;
oHostDataAvailable = 0;
if (wWBMDone & ~wLastVertexInFrame )
rHostNextState = `HOST_BROADCAST_NEXT_VERTEX;
else if (wWBMDone & wLastVertexInFrame )
rHostNextState = `HOST_GET_PRIMITIVE_COUNT;
else
rHostNextState = `HOST_WAIT_FOR_VERTEX;
/*
if (wWBMDone)
rHostNextState = `HOST_WAIT_DATA_READ_CONFIRMATION;
else
rHostNextState = `HOST_WAIT_FOR_VERTEX;
*/
end
//----------------------------------------
`HOST_GET_PRIMITIVE_COUNT:
begin
rWBMEnable = 0; //Disable WBM when it is donw
rInitiaReadAddr = 0;
rWBMReset = 0;
oMemSelect = `SELECT_GEO_MEM;
TGA_O = `TAG_DATA_ADDRESS_TYPE;
MST_O = 1; //Start the Transmition
rInitialWriteAddress = 0;
rSetWriteAddr = 0;
rCoreBroadCast = 1;
rIncCoreSelect = 0;
RENDREN_O = `SELECT_ALL_CORES;
rResetVertexCount = 0;
GACK_O = 0;
//STDONE_O = 0;
oHostDataAvailable = 0;//1;
if (wVertexCount >= iPrimitiveCount)
rHostNextState = `HOST_LAST_PRIMITIVE_REACHED;
else
rHostNextState = `HOST_WAIT_DATA_READ_CONFIRMATION;
end
//----------------------------------------
/*
we wait until all the cores are ready for the next primitive,
this happens when the iHostDataReadConfirmed signal
gets asserted
*/
`HOST_WAIT_DATA_READ_CONFIRMATION:
begin
rWBMEnable = 0; //Do not enable until we are resquested
rInitiaReadAddr = 0; //Ignored
rWBMReset = 0; //Continue from previous read address
oMemSelect = `SELECT_GEO_MEM; //Use external GEO mem for reading
TGA_O = `TAG_DATA_ADDRESS_TYPE; //We write to the data MEM @ the cores
MST_O = 0; //The master signal goes to zero until request
rInitialWriteAddress = `CREG_V0; //Write starting from this location on the cores
rSetWriteAddr = 1; //Set to use the initial write address bellow
rCoreBroadCast = 1; //From now on we only broadcast
rIncCoreSelect = 0; //Ignored during broadcasts
RENDREN_O = `SELECT_ALL_CORES;
rResetVertexCount = 0;
GACK_O = 0;
//STDONE_O = 0;
oHostDataAvailable = 1;
if ( iHostDataReadConfirmed )
rHostNextState = `HOST_ACK_GEO_REQUEST;
else
rHostNextState = `HOST_WAIT_DATA_READ_CONFIRMATION;
end
//----------------------------------------
`HOST_LAST_PRIMITIVE_REACHED:
begin
rWBMEnable = 0; //Disable WBM when it is donw
rInitiaReadAddr = 32'hA; //Reset primitive counter to first primitive
rWBMReset = 1; //Reset primitive counter to first primitive
oMemSelect = `SELECT_GEO_MEM;
TGA_O = `TAG_DATA_ADDRESS_TYPE;
MST_O = 1;
rInitialWriteAddress = 0;
rSetWriteAddr = 0;
rCoreBroadCast = 1;
rIncCoreSelect = 0;
RENDREN_O = `SELECT_ALL_CORES;
rResetVertexCount = 0; //Reset the vertex count to zero
GACK_O = 0;
//STDONE_O = 1;
oHostDataAvailable = 0;
 
if (iGPUCommitedResults)
begin
`ifndef NO_DISPLAY_STATS
for (i = 0; i < `MAX_CORES; i = i + 1)
begin
$write(".");
end
RenderedPixels = RenderedPixels + `MAX_CORES;
if ( RenderedPixels % iDebugWidth == 0)
$write("]%d\n[",RenderedPixels / iDebugWidth);
`endif
rHostNextState = `HOST_PREPARE_FOR_GEO_REQUESTS;
end
else
rHostNextState = `HOST_LAST_PRIMITIVE_REACHED;
end
//----------------------------------------
`HOST_GPU_EXECUTION_DONE:
begin
$display("THEIA Execution done in %dns\n",$time-StartTime);
rWBMEnable = 0;
rInitiaReadAddr = 0;
rWBMReset = 0;
oMemSelect = 0;
TGA_O = 0;
MST_O = 0;
rInitialWriteAddress = 0;
rSetWriteAddr = 0;
rCoreBroadCast = 0;
rIncCoreSelect = 0;
RENDREN_O = 0;
rResetVertexCount = 0;
GACK_O = 0;
//STDONE_O = 0;
oHostDataAvailable = 0;
rHostNextState = `HOST_GPU_EXECUTION_DONE;
end
//----------------------------------------
default:
begin
rWBMEnable = 0;
rInitiaReadAddr = 0;
rWBMReset = 0;
oMemSelect = 0;
TGA_O = 0;
MST_O = 0;
rInitialWriteAddress = 0;
rSetWriteAddr = 0;
rCoreBroadCast = 0;
rIncCoreSelect = 0;
RENDREN_O = 0;
rResetVertexCount = 0;
GACK_O = 0;
//STDONE_O = 0;
oHostDataAvailable = 0;
rHostNextState = `HOST_IDLE;
end
//----------------------------------------
endcase
end
 
endmodule
/rtl/Module_InstructionEntryPoint.v
0,0 → 1,32
`timescale 1ns / 1ps
`include "aDefinitions.v"
module InstructionEntryPoint
(
input wire Clock,
input wire Reset,
input wire iTrigger,
input wire[`ROM_ADDRESS_WIDTH-1:0] iInitialCodeAddress,
input wire [`INSTRUCTION_WIDTH-1:0] iIMemInput,
 
output wire oEPU_Busy,
output wire [`ROM_ADDRESS_WIDTH-1:0] oEntryPoint,
output wire oTriggerIFU,
output wire [`ROM_ADDRESS_WIDTH-1:0] oInstructionAddr
);
 
assign oInstructionAddr = (oTriggerIFU) ? oEntryPoint : iInitialCodeAddress;
assign oEPU_Busy = iTrigger | oTriggerIFU;
 
 
 
FFD_POSEDGE_ASYNC_RESET # ( 1 ) FFD1
(
.Clock(Clock),
.Clear( Reset ),
.D(iTrigger),
.Q(oTriggerIFU)
);
 
assign oEntryPoint = (oTriggerIFU) ? iIMemInput[`ROM_ADDRESS_WIDTH-1:0] : `ROM_ADDRESS_WIDTH'b0;
 
endmodule
/rtl/Module_HostWBM.v
0,0 → 1,70
`timescale 1ns / 1ps
`include "aDefinitions.v"
 
//---------------------------------------------------------------
module WBMaster
(
input wire Clock,
input wire Reset,
input wire iEnable,
input wire iShortFlow,
input wire [`WB_WIDTH-1:0] iInitialReadAddr,
input wire [`WB_WIDTH-1:0] iWriteAddr,
output wire [`WB_WIDTH-1:0] oReadAddress,
input wire [`WB_WIDTH-1:0] iReadData,
output reg STB_O,
input wire ACK_I,
output wire CYC_O,
output wire [`WB_WIDTH-1:0] DAT_O,
output wire [`WB_WIDTH-1:0] ADR_O,
output wire oDone
);
 
assign ADR_O = iWriteAddr;
wire [3:0] wXYZSel_Long;
wire [2:0] wXYZSel_Short;
 
UPCOUNTER_POSEDGE # (`WB_WIDTH) WBM_O_READ_ADDRESS
(
.Clock(Clock),
.Reset( Reset ),
.Enable(iEnable & ACK_I),
.Initial(iInitialReadAddr),
.Q(oReadAddress)
);
CIRCULAR_SHIFTLEFT_POSEDGE #(4) SHL
(
.Clock(Clock),
.Enable(ACK_I & iEnable),
.Reset( Reset ),
.Initial(4'b1),
.O(wXYZSel_Long)
);
CIRCULAR_SHIFTLEFT_POSEDGE #(3) SHL2
(
.Clock(Clock),
.Enable(ACK_I & iEnable),
.Reset( Reset ),
.Initial(3'b1),
.O(wXYZSel_Short)
);
assign oDone = (iShortFlow) ? wXYZSel_Short[2] : wXYZSel_Long[3];
assign DAT_O = iReadData;
 
assign CYC_O = iEnable;
always @ (posedge Clock)
begin
if (iEnable )
STB_O <= ~ACK_I;
else
STB_O <= 0;
end
 
 
endmodule
/rtl/TestBench_THEIA.v
0,0 → 1,345
 
 
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
 
/*******************************************************************************
Module Description:
 
This is the Main test bench of the GPU. It simulates the behavior of
an external control unit or CPU that sends configuration information into DUT.
It also implements a second processs that simulates a Wishbone slave that sends
data from an external memory. These blocks are just behavioral CTE and therefore
are not meant to be synthethized.
 
*******************************************************************************/
 
 
 
`timescale 1ns / 1ps
`include "aDefinitions.v"
`define RESOLUTION_WIDTH (rSceneParameters[13] >> `SCALE)
`define RESOLUTION_HEIGHT (rSceneParameters[14] >> `SCALE)
`define DELTA_ROW (32'h1 << `SCALE)
`define DELTA_COL (32'h1 << `SCALE)
`define TEXTURE_BUFFER_SIZE (256*256*3)
`define MAX_WIDTH 200
`define MAX_SCREENBUFFER (`MAX_WIDTH*`MAX_WIDTH*3)
module TestBench_Theia;
 
 
//------------------------------------------------------------------------
//**WARNING: Declare all of your varaibles at the begining
//of the file. I hve noticed that sometimes the verilog
//simulator allows you to use some regs even if they have not been
//previously declared, leadeing to crahses or unexpected behavior
// Inputs
reg Clock;
reg Reset;
wire [`WB_WIDTH-1:0] DAT_O;
reg ACK_O;
wire ACK_I;
wire [`WB_WIDTH-1:0] ADR_I,ADR_O;
wire WE_I,STB_I;
wire CYC_O,WE_O,TGC_O,STB_O;
wire [1:0] TGA_O;
wire [1:0] TGA_I;
reg [`WB_WIDTH-1:0] TMADR_O,TMDAT_O;
reg [`MAX_TMEM_BANKS-1:0] TMSEL_O;
reg TMWE_O;
reg [31:0] rControlRegister[2:0];
integer file, log;
reg [31:0] rSceneParameters[120:0];
reg [31:0] rVertexBuffer[7000:0];
reg [31:0] rInstructionBuffer[512:0];
reg [31:0] rTextures[`TEXTURE_BUFFER_SIZE:0]; //Lets asume we use 256*256 textures
reg [7:0] rScreen[`MAX_SCREENBUFFER-1:0];
wire [`MAX_CORES-1:0] wCoreSelect;
wire [3:0] CYC_I,GNT_O;
wire MST_O;
wire wDone;
wire [`MAX_CORES-1:0] RENDREN_O;
reg [`MAX_CORE_BITS-1:0] wOMEMBankSelect;
reg [`WB_WIDTH-1:0] wOMEMReadAddr; //Output adress (relative to current bank)
wire [`WB_WIDTH-1:0] wOMEMData; //Output data bus (Wishbone)
reg rHostEnable;
integer k,out2;
wire GRDY_I;
wire GACK_O;
wire STDONE_O;
wire wGPUCommitedResults;
wire wHostDataAvailable;
 
 
THEIA GPU
(
.CLK_I( Clock ),
.RST_I( Reset ),
.RENDREN_I( RENDREN_O ),
.DAT_I( DAT_O ),
.ACK_I( ACK_O ),
 
.CYC_I( CYC_O ),
.MST_I( MST_O ),
.TGA_I( TGA_O ),
.ACK_O( ACK_I ),
.ADR_I( ADR_O ),
.WE_I( WE_O ),
.SEL_I( wCoreSelect ),
.STB_I( STB_O ),
//Output memory
.OMBSEL_I( wOMEMBankSelect ),
.OMADR_I( wOMEMReadAddr ),
.OMEM_O( wOMEMData ),
.TMDAT_I( TMDAT_O ),
.TMADR_I( TMADR_O ),
.TMWE_I( TMWE_O ),
.TMSEL_I( TMSEL_O ),
.HDL_O( GRDY_I ),
.HDLACK_I( GACK_O ),
.STDONE_I( STDONE_O ),
.RCOMMIT_O( wGPUCommitedResults ),
.HDA_I( wHostDataAvailable ),
 
//Control register
.CREG_I( rControlRegister[0][15:0] ),
//Other stuff
.DONE_O( wDone )
 
);
 
wire[`WB_WIDTH-1:0] wHostReadAddress;
wire[`WB_WIDTH-1:0] wHostReadData;
wire[`WB_WIDTH-1:0] wMemorySize;
wire[1:0] wMemSelect;
 
MUXFULLPARALELL_2SEL_GENERIC # ( `WB_WIDTH ) MUX1
(
.Sel( wMemSelect ),
.I1( rInstructionBuffer[wHostReadAddress] ),
.I2( rSceneParameters[wHostReadAddress] ),
.I3( rVertexBuffer[wHostReadAddress] ),
.I4(0),
.O1(wHostReadData)
);
 
MUXFULLPARALELL_2SEL_GENERIC # ( `WB_WIDTH ) MUX2
(
.Sel( wMemSelect ),
.I1( rInstructionBuffer[0] ),
.I2( rSceneParameters[0] ),
.I3( rVertexBuffer[0] ),
.I4(0),
.O1(wMemorySize)
);
 
Module_Host HOST
(
.Clock( Clock ),
.Reset( Reset ),
.iEnable( rHostEnable ),
.oHostDataAvailable( wHostDataAvailable ),
.iHostDataReadConfirmed( GRDY_I ),
.iMemorySize( wMemorySize ),
.iPrimitiveCount( (rVertexBuffer[6]+1) *7 ), //This is wrong I think
.iGPUCommitedResults( wGPUCommitedResults ),
.STDONE_O( STDONE_O ),
.iGPUDone( wDone ),
`ifndef NO_DISPLAY_STATS
.iDebugWidth( `RESOLUTION_WIDTH ),
`endif
 
//To Memory
.oReadAddress( wHostReadAddress ),
.iReadData( wHostReadData ),
//To Hub/Switch
.oCoreSelectMask( wCoreSelect ),
.oMemSelect( wMemSelect ),
.DAT_O( DAT_O),
.ADR_O( ADR_O ),
.TGA_O( TGA_O ),
.RENDREN_O( RENDREN_O ),
.CYC_O( CYC_O ),
.STB_O( STB_O ),
.MST_O( MST_O ),
 
.GRDY_I( GRDY_I ),
.GACK_O( GACK_O ),
 
.WE_O( WE_O ),
 
 
.ACK_I( ACK_I )
);
//---------------------------------------------
//generate the clock signal here
always begin
#`CLOCK_CYCLE Clock = ! Clock;
end
//---------------------------------------------
 
//-------------------------------------------------------------------------------------
/*
This makes sure the simulation actually writes the results to the PPM image file
once all the cores are done executing
*/
`define PARTITION_SIZE `RESOLUTION_HEIGHT/`MAX_CORES
integer i,j,kk;
reg [31:0] R;
always @ ( * )
begin
 
 
if (wDone == 1'b1)
begin
 
$display("Partition Size = %d",`PARTITION_SIZE);
for (kk = 0; kk < `MAX_CORES; kk = kk+1)
begin
wOMEMBankSelect = kk;
$display("wOMEMBankSelect = %d\n",wOMEMBankSelect);
for (j=0; j < `PARTITION_SIZE; j=j+1)
begin
for (i = 0; i < `RESOLUTION_HEIGHT*3; i = i +1)
begin
wOMEMReadAddr = i+j*`RESOLUTION_WIDTH*3;
#`CLOCK_PERIOD;
#1;
R = ((wOMEMData >> (`SCALE-8)) > 255) ? 255 : (wOMEMData >> (`SCALE-8));
$fwrite(out2,"%d " , R );
 
if ((i %3) == 0)
$fwrite(out2,"\n# %d %d\n",i/3,j);
end
end
end
 
$fclose(out2);
$fwrite(log, "Simulation end time : %dns\n",$time);
$fclose(log);
 
$stop();
end
end
//-------------------------------------------------------------------------------------
 
reg [15:0] rTimeOut;
// `define MAX_INSTRUCTIONS 2
initial begin
// Initialize Inputs
Clock = 0;
Reset = 0;
rTimeOut = 0;
rHostEnable = 0;
//Read Config register values
$write("Loading control register.... ");
$readmemh("Creg.mem",rControlRegister);
$display("Done");
//Read configuration Data
$write("Loading scene parameters.... ");
$readmemh("Params.mem", rSceneParameters );
$display("Done");
//Read Scene Data
$write("Loading scene geometry.... ");
$readmemh("Vertex.mem",rVertexBuffer);
$display("Done");
$display("Number of primitives(%d): %d",rVertexBuffer[6],(rVertexBuffer[6]+1) *7);
//Read Texture Data
$write("Loading scene texture.... ");
$readmemh("Textures.mem",rTextures);
$display("Done");
 
//Read instruction data
$write("Loading code allocation table and user shaders.... ");
$readmemh("Instructions.mem",rInstructionBuffer);
$display("Done");
$display("Control Register : %b",rControlRegister[0]);
$display("Resolution : %d X %d",`RESOLUTION_WIDTH, `RESOLUTION_HEIGHT );
log = $fopen("Simulation.log");
$fwrite(log, "Simulation start time : %dns\n",$time);
$fwrite(log, "Width : %d\n",`RESOLUTION_WIDTH);
$fwrite(log, "Height : %d\n",`RESOLUTION_HEIGHT);
//Open output file
out2 = $fopen("Output.ppm");
$fwrite(out2,"P3\n");
$fwrite(out2,"#This file was generated by Theia's RTL simulation\n");
$fwrite(out2,"%d %d\n",`RESOLUTION_WIDTH, `RESOLUTION_HEIGHT );
$fwrite(out2,"255\n");
#10
Reset = 1;
 
// Wait 100 ns for global reset to finish
TMWE_O = 1;
#100 Reset = 0;
TMWE_O = 1;
$display("Intilializing TMEM @ %dns",$time);
//starts in 2 to skip Width and Height
for (k = 0;k < `TEXTURE_BUFFER_SIZE; k = k + 1)
begin
TMADR_O <= (k >> (`MAX_CORE_BITS));
TMSEL_O <= (k & (`MAX_TMEM_BANKS-1)); //X mod 2^n == X & (2^n - 1)
TMDAT_O <= rTextures[k];
#10;
end
$display("Done Intilializing TMEM @ %dns",$time);
TMWE_O = 0;
rHostEnable = 1;
end
 
endmodule
/rtl/Module_ArithmeticComparison.v
0,0 → 1,64
`timescale 1ns / 1ps
`include "aDefinitions.v"
 
//------------------------------------------------------------------
module ArithmeticComparison
(
input wire Clock,
input wire[`WIDTH-1:0] X,Y,
input wire[2:0] iOperation,
input wire iInputReady,
output reg OutputReady,
output reg Result
);
 
 
wire [`WIDTH-1:0] wX,wY;
wire SignX,SignY;
reg rGreaterThan;
wire wUGt,wULT,wEQ;
 
assign SignX = (X == 0) ? 0: X[31];
assign SignY = (Y == 0) ? 0: Y[31];
 
assign wX = ( SignX ) ? ~X + 1'b1 : X;
assign wY = ( SignY ) ? ~Y + 1'b1 : Y;
 
assign wUGt = wX > wY;
assign wULT = wX < wY;
assign wEQ = wX == wY;
 
always @ ( * )
begin
case ( {SignX,SignY} )
//Greater than test ( X > Y )
2'b00: rGreaterThan = wUGt; //both numbers positive
2'b01: rGreaterThan = 1; //X positive, y negative
2'b10: rGreaterThan = 0; //X negative, y positive
2'b11: rGreaterThan = wULT; //X negative, y negative
endcase
end
 
always @ ( posedge Clock )
begin
 
if (iInputReady)
begin
case ( iOperation )
3'b000: Result = rGreaterThan; //X > Y
3'b001: Result = ~rGreaterThan; //X < Y
3'b010: Result = wEQ; //X == Y
3'b011: Result = ~wEQ; //X != Y
3'b100: Result = rGreaterThan || wEQ; // X >= Y
3'b101: Result = ~rGreaterThan || wEQ; // X <= Y
default: Result = 0;
endcase
OutputReady = 1;
end
else
OutputReady = 0;
end
 
 
endmodule
//---------------------------------------------
/rtl/Module_InstructionFetch.v
0,0 → 1,215
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
/**********************************************************************************
Description:
This is the instruction fetch unit.
It gets the next instruction from the IMEM module at the MEM unit.
It increments the instruction pointer (IP) in such a way that EXE has always
one instruction per clock cycle (best pipeline performance). In order to achieve this,
IFU has 2 instruction pointers, so that in case of 'branch' instructions,
two instructions pointer are generated and two different instructions are simultaneously
fetched from IMEM: the branch-taken and branch-not-taken instructions, so that once the
branch outcome is calculted in EXE, both possible outcomes are already pre-fetched.
**********************************************************************************/
module InstructionFetch
(
input wire Clock,
input wire Reset,
input wire iTrigger,
input wire[`ROM_ADDRESS_WIDTH-1:0] iInitialCodeAddress,
input wire[`INSTRUCTION_WIDTH-1:0] iInstruction1, //Branch not taken instruction
input wire[`INSTRUCTION_WIDTH-1:0] iInstruction2, //Branch taken instruction
input wire iBranchTaken,
output wire oInstructionAvalable,
output wire [`ROM_ADDRESS_WIDTH-1:0] oIP,
output wire [`ROM_ADDRESS_WIDTH-1:0] oIP2, //calcule both decide later
output wire[`INSTRUCTION_WIDTH-1:0] oCurrentInstruction,
input wire iEXEDone,
output wire oMicroCodeReturnValue,
input wire iSubroutineReturn,
//input wire [`ROM_ADDRESS_WIDTH-1:0] iReturnAddress,
output wire oExecutionDone
);
`define INSTRUCTION_OPCODE oCurrentInstruction[`INSTRUCTION_WIDTH-1:`INSTRUCTION_WIDTH-`INSTRUCTION_OP_LENGTH]
 
 
assign oMicroCodeReturnValue = oCurrentInstruction[0];
assign oIP2 = oCurrentInstruction[47:32];
 
wire wTriggerDelay1,wTriggerDelay2,wIncrementIP_Delay1,wIncrementIP_Delay2,
wLastInst_Delay1,wLastInst_Delay2;
wire wIncrementIP,wLastInstruction;
wire wInstructionAvalable,wSubReturnDelay1,wSubReturnDelay2;
 
assign wLastInstruction = (`INSTRUCTION_OPCODE == `RETURN );
 
wire IsCall;
reg [`ROM_ADDRESS_WIDTH-1:0] rReturnAddress;
assign IsCall = ( `INSTRUCTION_OPCODE == `CALL ) ? 1'b1 : 1'b0;
always @ (posedge IsCall)
rReturnAddress <= oIP+1;
 
//Increment IP 2 cycles after trigger or everytime EXE is done, or 2 cycles after return from sub, but stop if we get to the RETURN
assign wIncrementIP = wTriggerDelay2 | (iEXEDone & ~wLastInstruction) | wSubReturnDelay2;
//It takes 1 clock cycle to read the instruction back from IMEM
 
 
//Instructions become available to IDU:
//* 2 cycles after IFU is initially triggered
//* Everytime previous instruction execution is complete except for the last instruction in
//the flow
assign wInstructionAvalable = wTriggerDelay2 | (iEXEDone & ~wLastInst_Delay2);
 
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD22
(
.Clock( Clock ),
.Reset( Reset ),
.Enable(1'b1),
.D( iSubroutineReturn ),
.Q( wSubReturnDelay1 )
);
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD23
(
.Clock( Clock ),
.Reset( Reset ),
.Enable(1'b1),
.D( wSubReturnDelay1 ),
.Q( wSubReturnDelay2 )
);
//Special case for instruction available pin: if a return from subroutine instruction was issued,
//then wait 1 cycle before anouncing Instruction available to IDU
assign oInstructionAvalable = wInstructionAvalable & ~iSubroutineReturn | wSubReturnDelay2;
 
 
 
 
 
//Once we reach the last instruction, wait until EXE says he is done, then assert oExecutionDone
assign oExecutionDone = (wLastInstruction & iEXEDone);
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD2
(
.Clock( Clock ),
.Reset( Reset ),
.Enable(1'b1),
.D( iTrigger ),
.Q( wTriggerDelay1 )
);
 
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD3
(
.Clock( Clock ),
.Reset( Reset ),
.Enable(1'b1),
.D( wTriggerDelay1 ),
.Q( wTriggerDelay2 )
);
 
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD4
(
.Clock( Clock ),
.Reset( Reset ),
.Enable(wLastInstruction),
.D( oInstructionAvalable ),
.Q( wLastInst_Delay1 )
);
 
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD5
(
.Clock( Clock ),
.Reset( Reset ),
.Enable(1'b1),//wLastInstruction),
.D( wLastInst_Delay1 ),
.Q( wLastInst_Delay2 )
);
 
wire [`ROM_ADDRESS_WIDTH-1:0] oIP2_Next;
 
/*
In case the branch is taken:
We point current instruction into the iInstruction2 (branch-taken) instruction
that corresponds to oIP2.
Then, in the next clock cycle we should use the oIP2 incremented by one,
so we need to load UPCOUNTER_POSEDGE with oIP2+1
*/
 
 
//If the branch was taken, then use the pre-fetched instruction (iInstruction2)
wire[`INSTRUCTION_WIDTH-1:0] wCurrentInstruction_Delay1,wCurrentInstruction_BranchTaken;
FFD_POSEDGE_SYNCRONOUS_RESET # ( `INSTRUCTION_WIDTH ) FFDX
(
.Clock( Clock ),
.Reset( Reset ),
.Enable(iBranchTaken),
.D( oCurrentInstruction ),
.Q( wCurrentInstruction_Delay1 )
);
 
wire wBranchTaken_Delay1;
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFDY
(
.Clock( Clock ),
.Reset( Reset ),
.Enable(1'b1),
.D( iBranchTaken ),
.Q( wBranchTaken_Delay1 )
);
 
 
assign wCurrentInstruction_BranchTaken = ( iBranchTaken & ~iSubroutineReturn) ? iInstruction2 : iInstruction1;
 
assign oCurrentInstruction = (wBranchTaken_Delay1 ) ?
wCurrentInstruction_Delay1 : wCurrentInstruction_BranchTaken;
 
INCREMENT # (`ROM_ADDRESS_WIDTH) INC1
(
.Clock( Clock ),
.Reset( Reset ),
.A( oIP2 ),
.R( oIP2_Next )
);
 
wire[`ROM_ADDRESS_WIDTH-1:0] wIPEntryPoint;
//assign wIPEntryPoint = (iBranchTaken) ? oIP2_Next : iInitialCodeAddress;
 
//iReturnAddress is a register stored @ IDU everytime a CALL instruction is decoded
assign wIPEntryPoint = (iBranchTaken & ~wBranchTaken_Delay1) ? (iSubroutineReturn) ? rReturnAddress : oIP2_Next : iInitialCodeAddress;
 
 
UPCOUNTER_POSEDGE # (`ROM_ADDRESS_WIDTH) InstructionPointer
(
.Clock( Clock ),
.Reset(iTrigger | (iBranchTaken & ~wBranchTaken_Delay1)),
.Enable(wIncrementIP & (~iBranchTaken | wBranchTaken_Delay1 ) ),
.Initial( wIPEntryPoint ),
.Q(oIP)
);
 
 
endmodule
 
//-------------------------------------------------------------------------------
/rtl/Theia_Core.v
0,0 → 1,430
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
/**********************************************************************************
Description:
This is the top level block for THEIA.
THEIA core has 5 main logical blocks called Units.
This module implements the interconections between the Units.
Units:
> EXE: Mananges execution logic for the SHADERS.
> GEO: Manages geometry data structures.
> IO: Input/Output (Wishbone).
> MEM: Internal memory, separate for Instructions and data.
> CONTROL: Main control Finite state machine.
Internal Buses:
THEIA has separate instruction and data buses.
THEIA avoids using tri-state buses by having separate input/output
for each bus.
There are 2 separate data buses since the Data memory
has a Dual read channel.
Please see the MEM unit chapter in the documentation for more details.
External Buses:
External buses are managed by the IO Unit.
External buses follow the wishbone protocol.
Please see the IO unit chapter in the documentation for more details.
**********************************************************************************/
 
`timescale 1ns / 1ps
`include "aDefinitions.v"
 
module THEIACORE
(
 
input wire CLK_I, //Input clock
input wire RST_I, //Input reset
//Theia Interfaces
input wire MST_I, //Master signal, THEIA enters configuration mode
//when this gets asserted (see documentation)
//Wish Bone Interface
input wire [`WB_WIDTH-1:0] DAT_I, //Input data bus (Wishbone)
output wire [`WB_WIDTH-1:0] DAT_O, //Output data bus (Wishbone)
input wire ACK_I, //Input ack
output wire ACK_O, //Output ack
output wire [`WB_WIDTH-1:0] ADR_O, //Output address
input wire [`WB_WIDTH-1:0] ADR_I, //Input address
output wire WE_O, //Output write enable
input wire WE_I, //Input write enable
output wire STB_O, //Strobe signal, see wishbone documentation
input wire STB_I, //Strobe signal, see wishbone documentation
output wire CYC_O, //Bus cycle signal, see wishbone documentation
input wire CYC_I, //Bus cycle signal, see wishbone documentation
output wire [1:0] TGC_O, //Bus cycle tag, see THEAI documentation
input wire [1:0] TGA_I, //Input address tag, see THEAI documentation
output wire [1:0] TGA_O, //Output address tag, see THEAI documentation
input wire [1:0] TGC_I, //Bus cycle tag, see THEAI documentation
input wire GNT_I, //Bus arbiter 'Granted' signal, see THEAI documentation
input wire RENDREN_I,
 
output wire HDL_O, //Data Latched
input wire HDLACK_I, //Data Latched ACK
input wire STDONE_I, //Scene traverse complete
input wire HDA_I,
output wire RCOMMIT_O,
 
output wire [`WB_WIDTH-1:0] OMEM_DAT_O,
output wire [`WB_WIDTH-1:0] OMEM_ADR_O,
output wire OMEM_WE_O,
 
input wire TMEM_ACK_I,
input wire [`WB_WIDTH-1:0] TMEM_DAT_I ,
output wire [`WB_WIDTH-1:0] TMEM_ADR_O ,
output wire TMEM_WE_O,
output wire TMEM_STB_O,
output wire TMEM_CYC_O,
input wire TMEM_GNT_I,
 
`ifdef DEBUG
input wire[`MAX_CORES-1:0] iDebug_CoreID,
`endif
//Control Register
input wire [15:0] CREG_I,
output wire DONE_O
 
 
);
 
//When we flip the SMEM, this means we are ready to receive more data
 
 
//Alias this signals
wire Clock,Reset;
assign Clock = CLK_I;
assign Reset = RST_I;
 
wire wIO_Busy;
wire [`DATA_ROW_WIDTH-1:0] wEXE_2__MEM_WriteData;
wire [`DATA_ROW_WIDTH-1:0] wUCODE_RAMBus;
wire [`DATA_ADDRESS_WIDTH-1:0] wEXE_2__MEM_wDataWriteAddress;
wire w2IO__AddrIsImm;
wire [`DATA_ADDRESS_WIDTH-1:0] wUCODE_RAMAddress;
wire [`DATA_ADDRESS_WIDTH-1:0] w2IO__Adr_O_Pointer;
wire [`DATA_ADDRESS_WIDTH-1:0] wGEO2_IO__Adr_O_Pointer;
wire wEXE_2__DataWriteEnable;
wire wUCODE_RAMWriteEnable;
//wire [2:0] RamBusOwner;
//Unit intercoanection wires
 
wire wCU2__MicrocodeExecutionDone;
wire [`ROM_ADDRESS_WIDTH-1:0] InitialCodeAddress;
wire [`ROM_ADDRESS_WIDTH-1:0] wInstructionPointer1,wInstructionPointer2;
wire [`INSTRUCTION_WIDTH-1:0] wEncodedInstruction1,wEncodedInstruction2,wIO2_MEM__ExternalInstruction;
wire wCU2__ExecuteMicroCode;
wire [`ROM_ADDRESS_WIDTH-1:0] wIO2_MEM__InstructionWriteAddr;
wire [95:0] wMEM_2__EXE_DataRead0, wMEM_2__EXE_DataRead1,wMEM_2__IO_DataRead0, wMEM_2__IO_DataRead1;
wire [`DATA_ADDRESS_WIDTH-1:0] wEXE_2__MEM_DataReadAddress0,wEXE_2__MEM_DataReadAddress1;
wire [`DATA_ADDRESS_WIDTH-1:0] wUCODE_RAMReadAddress0,wUCODE_RAMReadAddress1;
 
 
wire [`WIDTH-1:0] w2IO__AddressOffset;
wire [`DATA_ADDRESS_WIDTH-1:0] w2IO__DataWriteAddress;
wire w2IO__Store;
wire w2IO__EnableWBMaster;
 
wire [`DATA_ADDRESS_WIDTH-1:0] wIO2_MEM__DataWriteAddress;
wire [`DATA_ADDRESS_WIDTH-1:0] wIO_2_MEM__DataReadAddress0;
wire [`DATA_ROW_WIDTH-1:0] wIO2_MEM__Bus;
wire [`WIDTH-1:0] wIO2_MEM__Data;
wire [`WIDTH-1:0] wIO2_WBM__Address;
wire wIO2_MEM__DataWriteEnable;
wire wIO2__Done;
wire wCU2_GEO__GeometryFetchEnable;
wire wIFU2__MicroCodeReturnValue;
wire wCU2_BCU__ACK;
wire wGEO2_CU__RequestAABBIU;
wire wGEO2_CU__RequestBIU;
wire wGEO2_CU__RequestTCC;
wire wGEO2_CU__GeometryUnitDone;
wire wGEO2_CU__Sync;
wire wEXE2__uCodeDone;
wire wEXE2_IFU__EXEBusy;
wire [`DATA_ADDRESS_WIDTH-1:0] wEXE2_IDU_DataFordward_LastDestination;
wire wALU2_EXE__BranchTaken;
wire wALU2_IFU_BranchNotTaken;
wire w2IO__SetAddress;
wire wIDU2_IFU__IDUBusy;
//Control Registe wires
wire[15:0] wCR2_ControlRegister;
wire wCR2_TextureMappingEnabled;
wire wGEO2_CU__TFFDone;
wire wCU2_GEO__TriggerTFF;
wire wIO2_MEM_InstructionWriteEnable;
wire wCU2_IO__WritePixel;
wire wGEO2_IO__AddrIsImm;
wire[31:0] wGEO2_IO__AddressOffset;
wire wGEO2_IO__EnableWBMaster;
wire wGEO2_IO__SetAddress;
wire[`WIDTH-1:0] wGEO2__CurrentPitch,wCU2_GEO_Pitch;
wire wCU2_GEO__SetPitch,wCU2_GEO__IncPicth;
 
wire [`DATA_ROW_WIDTH-1:0] wEXE_2__IO_WriteAddress;
wire [`DATA_ROW_WIDTH-1:0] wEXE_2__IO_WriteData;
wire wEXE_2__IO_OMEMWriteEnable;
 
wire [`DATA_ROW_WIDTH-1:0] wEXE_2__IO_TMEMAddress;
wire [`DATA_ROW_WIDTH-1:0] wIO_2_EXE__TMEMData;
wire wIO_2_EXE__DataAvailable;
wire wEXE_2_IO__DataRequest;
wire wCU2_FlipMem;
wire wCU2_FlipMemEnabled;
wire w2MEM_FlipMemory;
wire wGEO2__RequestingTextures;
wire w2IO_WriteBack_Set;
wire[`DATA_ADDRESS_WIDTH-1:0] wIO_2_MEM__DataReadAddress1;
 
`ifdef DEBUG
wire [`ROM_ADDRESS_WIDTH-1:0] wDEBUG_IDU2_EXE_InstructionPointer;
`endif
//--------------------------------------------------------
 
assign HDL_O = wCU2_FlipMem;
 
assign wCR2_TextureMappingEnabled = wCR2_ControlRegister[ `CR_EN_TEXTURE ];
 
//--------------------------------------------------------
//Control Unit Instance
ControlUnit CU
(
.Clock(Clock),
.Reset(Reset),
.oFlipMemEnabled( wCU2_FlipMemEnabled ),
.oFlipMem( wCU2_FlipMem ),
.iControlRegister( wCR2_ControlRegister ),
//.oRamBusOwner( RamBusOwner ),
.oGFUEnable( wCU2_GEO__GeometryFetchEnable ),
.iTriggerAABBIURequest( wGEO2_CU__RequestAABBIU ),
.iTriggerBIURequest( wGEO2_CU__RequestBIU ),
.iTriggertTCCRequest( wGEO2_CU__RequestTCC ),
.oUCodeEnable( wCU2__ExecuteMicroCode ),
.oCodeInstructioPointer( InitialCodeAddress ),
.iUCodeDone( wCU2__MicrocodeExecutionDone ),
.iIODone( wIO2__Done ),
.oIOWritePixel( wCU2_IO__WritePixel ),
.iUCodeReturnValue( wIFU2__MicroCodeReturnValue ),
.iGEOSync( wGEO2_CU__Sync ),
.iTFFDone( wGEO2_CU__TFFDone ),
.oTriggerTFF( wCU2_GEO__TriggerTFF ),
.MST_I( MST_I ),
.oSetCurrentPitch( wCU2_GEO__SetPitch ),
.iGFUDone( wGEO2_CU__GeometryUnitDone ),
.iRenderEnable( RENDREN_I ),
.iSceneTraverseComplete( STDONE_I ),
.oResultCommited( RCOMMIT_O ),
.iHostDataAvailable( HDA_I ),
.iHostAckDataRead( HDLACK_I ),
 
`ifdef DEBUG
.iDebug_CoreID( iDebug_CoreID ),
`endif
.oDone( DONE_O )
);
 
//--------------------------------------------------------
 
//assign w2MEM_FlipMemory = (wCU2__ExecuteMicroCode | wCU2_FlipMem ) & wCU2_FlipMemEnabled;
assign w2MEM_FlipMemory = wCU2_FlipMem & wCU2_FlipMemEnabled;
MemoryUnit MEM
(
.Clock(Clock),
.Reset(Reset),
 
.iFlipMemory( w2MEM_FlipMemory ),
 
//Data Bus to/from EXE
.iDataReadAddress1_EXE( wEXE_2__MEM_DataReadAddress0 ),
.iDataReadAddress2_EXE( wEXE_2__MEM_DataReadAddress1 ),
.oData1_EXE( wMEM_2__EXE_DataRead0 ),
.oData2_EXE( wMEM_2__EXE_DataRead1 ),
.iDataWriteEnable_EXE( wEXE_2__DataWriteEnable ),
.iDataWriteAddress_EXE( wEXE_2__MEM_wDataWriteAddress ),
.iData_EXE( wEXE_2__MEM_WriteData ),
 
//Data Bus to/from IO
 
.iDataReadAddress1_IO( wIO_2_MEM__DataReadAddress0 ),
.iDataReadAddress2_IO( wIO_2_MEM__DataReadAddress1 ),
.oData1_IO( wMEM_2__IO_DataRead0 ),
.oData2_IO( wMEM_2__IO_DataRead1 ),
.iDataWriteEnable_IO( wIO2_MEM__DataWriteEnable ),
.iDataWriteAddress_IO( wIO2_MEM__DataWriteAddress ),
.iData_IO( wIO2_MEM__Bus ),
 
`ifdef DEBUG
.iDebug_CoreID( iDebug_CoreID ),
`endif
 
 
//Instruction Bus
.iInstructionReadAddress1( wInstructionPointer1 ),
.iInstructionReadAddress2( wInstructionPointer2 ),
.oInstruction1( wEncodedInstruction1 ),
.oInstruction2( wEncodedInstruction2 ),
.iInstructionWriteEnable( wIO2_MEM_InstructionWriteEnable ),
.iInstructionWriteAddress( wIO2_MEM__InstructionWriteAddr ),
.iInstruction( wIO2_MEM__ExternalInstruction ),
.iControlRegister( CREG_I ),
.oControlRegister( wCR2_ControlRegister )
 
);
 
////--------------------------------------------------------
 
ExecutionUnit EXE
(
 
.Clock( Clock),
.Reset( Reset ),
.iInitialCodeAddress( InitialCodeAddress ),
.iInstruction1( wEncodedInstruction1 ),
.iInstruction2( wEncodedInstruction2 ),
.oInstructionPointer1( wInstructionPointer1 ),
.oInstructionPointer2( wInstructionPointer2 ),
.iDataRead0( wMEM_2__EXE_DataRead0 ),
.iDataRead1( wMEM_2__EXE_DataRead1 ),
.iTrigger( wCU2__ExecuteMicroCode ),
.oDataReadAddress0( wEXE_2__MEM_DataReadAddress0 ),
.oDataReadAddress1( wEXE_2__MEM_DataReadAddress1 ),
.oDataWriteEnable( wEXE_2__DataWriteEnable ),
.oDataWriteAddress( wEXE_2__MEM_wDataWriteAddress ),
.oDataBus( wEXE_2__MEM_WriteData ),
.oReturnCode( wIFU2__MicroCodeReturnValue ),
/**************/
.oOMEMWriteAddress( wEXE_2__IO_WriteAddress ),
.oOMEMWriteData( wEXE_2__IO_WriteData ),
.oOMEMWriteEnable( wEXE_2__IO_OMEMWriteEnable ),
 
.oTMEMReadAddress( wEXE_2__IO_TMEMAddress ),
.iTMEMReadData( wIO_2_EXE__TMEMData ),
.iTMEMDataAvailable( wIO_2_EXE__DataAvailable ),
.oTMEMDataRequest( wEXE_2_IO__DataRequest ),
/**************/
`ifdef DEBUG
.iDebug_CoreID( iDebug_CoreID ),
`endif
.oDone( wCU2__MicrocodeExecutionDone )
 
);
 
////--------------------------------------------------------
 
 
assign TGA_O = (wGEO2__RequestingTextures) ? 2'b1: 2'b0;
//---------------------------------------------------------------------------------------------------
 
//assign wEXE_2__MEM_DataReadAddress1 = (wCU2_IO__WritePixel == 0) ? wUCODE_RAMReadAddress1 : wIO_2_MEM__DataReadAddress1;
assign w2IO__EnableWBMaster = (wCU2_IO__WritePixel == 0 ) ? wGEO2_IO__EnableWBMaster : wCU2_IO__WritePixel;
assign w2IO__AddrIsImm = 0;//(wCU2_IO__WritePixel == 0 ) ? wGEO2_IO__AddrIsImm : 1'b0;
assign w2IO__AddressOffset = 0;//(wCU2_IO__WritePixel == 0 ) ? wGEO2_IO__AddressOffset : 32'b0;
assign w2IO__Adr_O_Pointer = (wCU2_IO__WritePixel == 0 ) ? wGEO2_IO__Adr_O_Pointer : `OREG_ADDR_O;
//assign w2IO__Adr_O_Pointer = (wCU2_IO__WritePixel == 0 ) ? wGEO2_IO__Adr_O_Pointer : `CREG_PIXEL_2D_INITIAL_POSITION;
 
wire w2IO_MasterCycleType;
assign w2IO_MasterCycleType = (wCU2_IO__WritePixel) ? `WB_SIMPLE_WRITE_CYCLE : `WB_SIMPLE_READ_CYCLE;
 
 
 
assign w2IO__SetAddress = (wCU2_IO__WritePixel == 0 )? wGEO2_IO__SetAddress : wCU2_GEO__SetPitch;
 
 
IO_Unit IO
(
.Clock( Clock ),
.Reset( Reset ),
.iEnable( 1'b0 ),// w2IO__EnableWBMaster ),
.iBusCyc_Type( w2IO_MasterCycleType ),
.iStore( 1'b1),//w2IO__Store ),
.iAdr_DataWriteBack( w2IO__DataWriteAddress ),
.iAdr_O_Set( w2IO__SetAddress ),
.iAdr_O_Imm( w2IO__AddressOffset ),
.iAdr_O_Type( w2IO__AddrIsImm ),
.iAdr_O_Pointer( w2IO__Adr_O_Pointer ),
.iReadDataBus( wMEM_2__IO_DataRead0 ),
.iReadDataBus2( wMEM_2__IO_DataRead1 ),
.iDat_O_Pointer( `OREG_PIXEL_COLOR ),
.oDataReadAddress( wIO_2_MEM__DataReadAddress0 ),
.oDataReadAddress2( wIO_2_MEM__DataReadAddress1 ),
.oDataWriteAddress( wIO2_MEM__DataWriteAddress ),
.oDataBus( wIO2_MEM__Bus ),
.oInstructionBus( wIO2_MEM__ExternalInstruction ),
.oDataWriteEnable( wIO2_MEM__DataWriteEnable ),
.oData( wIO2_MEM__Data ),
.oInstructionWriteEnable( wIO2_MEM_InstructionWriteEnable ),
.oInstructionWriteAddress( wIO2_MEM__InstructionWriteAddr ),
.iWriteBack_Set( w2IO_WriteBack_Set ),
.oBusy( wIO_Busy ),
.oDone( wIO2__Done ),
/**********/
.iOMEM_WriteAddress( wEXE_2__IO_WriteAddress ),
.iOMEM_WriteData( wEXE_2__IO_WriteData ),
.iOMEM_WriteEnable( wEXE_2__IO_OMEMWriteEnable ),
.OMEM_DAT_O( OMEM_DAT_O ),
.OMEM_ADR_O( OMEM_ADR_O ),
.OMEM_WE_O( OMEM_WE_O ),
.oTMEMReadData( wIO_2_EXE__TMEMData ),
.iTMEMDataRequest( wEXE_2_IO__DataRequest ),
.iTMEMReadAddress( wEXE_2__IO_TMEMAddress ),
.oTMEMDataAvailable( wIO_2_EXE__DataAvailable ),
 
.TMEM_ACK_I( TMEM_ACK_I ),
.TMEM_DAT_I( TMEM_DAT_I ),
.TMEM_ADR_O( TMEM_ADR_O ),
.TMEM_WE_O( TMEM_WE_O ),
.TMEM_STB_O( TMEM_STB_O ),
.TMEM_CYC_O( TMEM_CYC_O ),
.TMEM_GNT_I( TMEM_GNT_I ),
 
/**********/
.MST_I( MST_I ),
//Wish Bone Interface
.DAT_I( DAT_I ),
.DAT_O( DAT_O ),
.ACK_I( ACK_I & GNT_I ),
.ACK_O( ACK_O ),
.ADR_O( ADR_O ),
.ADR_I( ADR_I ),
.WE_O( WE_O ),
.WE_I( WE_I ),
.STB_O( STB_O ),
.STB_I( STB_I ),
.CYC_O( CYC_O ),
.TGA_I( TGA_I ),
.CYC_I( CYC_I ),
.GNT_I( GNT_I ),
.TGC_O( TGC_O )
 
 
);
//---------------------------------------------------------------------------------------------------
endmodule
/rtl/Module_FixedPointSquareRoot.v
0,0 → 1,120
`timescale 1ns / 1ps
`include "aDefinitions.v"
 
//Square Root State Machine Constants
`define SQUARE_ROOT_LOOP 1
`define WRITE_SQUARE_ROOT_RESULT 2
 
 
`define SR_AFTER_RESET_STATE 0
//-----------------------------------------------------------------
/*
 
Calcualtes the SquareRoot of a Fixed Point Number
Input: Q32.32
Output: Q16.16
Notice that the result has half the precicion as the operands!!
*/
module FixedPointSquareRoot
(
input wire Clock,
input wire Reset,
input wire[`LONG_WIDTH-1:0] Operand,
input wire iInputReady,
output reg OutputReady,
output reg [`WIDTH-1:0] Result
);
 
reg[63:0] x;
reg[0:`WIDTH-1] group,sum,diff;
reg[0:`WIDTH-1] temp1,temp2;
reg [5:0] CurrentState, NextState;
 
reg myInputReady;
//----------------------------------------
always @(posedge Clock)
begin
myInputReady = iInputReady;
end
//----------------------------------------
//Next states logic
always @(negedge Clock)
begin
if( Reset!=1 )
CurrentState = NextState;
else
CurrentState = `SR_AFTER_RESET_STATE;
end
//----------------------------------------
 
always @ (posedge Clock)
begin
case (CurrentState)
//----------------------------------------
`SR_AFTER_RESET_STATE:
begin
OutputReady = 0;
Result = 0;
sum = 0;
diff = 0;
group=32; //WAS 16
x = 0;
if ( myInputReady == 1 )
begin
// x[31:0] = Operand;
x = Operand;
x = x << `SCALE;
NextState = `SQUARE_ROOT_LOOP;
end else
NextState = `SR_AFTER_RESET_STATE;
end
//----------------------------------------
`SQUARE_ROOT_LOOP:
begin
sum = sum << 1;
sum = sum + 1;
temp1 = diff << 2;
//diff = diff + (x>>(group*2)) &3;
temp2 = group << 1; //group * 2 ??
diff = temp1 + ((x >> temp2) &3);
if (sum > diff)
begin
sum = sum -1;
end
else
begin
Result = Result + (1<<group);
diff = diff - sum;
sum = sum + 1;
end//if
if ( group != 0 )
begin
group = group - 1;
NextState = `SQUARE_ROOT_LOOP;
end
else
begin
NextState = `WRITE_SQUARE_ROOT_RESULT;
end
end
//----------------------------------------
`WRITE_SQUARE_ROOT_RESULT:
begin
OutputReady = 1;
NextState = (iInputReady == 0) ?
`SR_AFTER_RESET_STATE : `WRITE_SQUARE_ROOT_RESULT;
end
//----------------------------------------
endcase
end //always
endmodule
//-----------------------------------------------------------------
/rtl/Unit_IO.v
0,0 → 1,319
`timescale 1ns / 1ps
`include "aDefinitions.v"
`define ADR_IMM 1
`define ADR_POINTER 0
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
//--------------------------------------------------------------------------
module IO_Unit
(
input wire Clock,
input wire Reset,
input wire iEnable,
input wire [`DATA_ADDRESS_WIDTH-1:0] iDat_O_Pointer, //Pointer to what we want to send via DAT_O
input wire [`WIDTH-1:0] iAdr_O_Imm, //Value to assign to ADR_O
input wire [`DATA_ADDRESS_WIDTH-1:0] iAdr_O_Pointer, //Pointer to value to assing to ADR_O
input wire iAdr_O_Type, //Should we use iAdr_O_Imm or iAdr_O_Pointer
input wire iAdr_O_Set, //Should we set
input wire iBusCyc_Type, //Bus cycle type: simple read/write, etc.
input wire iStore, //Should we store read data into MEM
input wire [`DATA_ROW_WIDTH-1:0] iReadDataBus, //MEM Data read bus 1
input wire [`DATA_ROW_WIDTH-1:0] iReadDataBus2, //MEM Data read bus 2
input wire[`DATA_ADDRESS_WIDTH-1:0] iAdr_DataWriteBack, //Where in MEM we want to store DAT_I
input wire iWriteBack_Set, //We want to set the Write back Address?
output wire[`DATA_ADDRESS_WIDTH-1:0] oDataReadAddress,
output wire[`DATA_ADDRESS_WIDTH-1:0] oDataReadAddress2,
output wire[`DATA_ADDRESS_WIDTH-1:0] oDataWriteAddress,
output wire oDataWriteEnable,
output wire [`DATA_ROW_WIDTH-1:0] oDataBus,
output wire [`INSTRUCTION_WIDTH-1:0] oInstructionBus,
output wire oInstructionWriteEnable,
output wire [`ROM_ADDRESS_WIDTH-1:0] oInstructionWriteAddress,
inout wire [`WIDTH-1:0] oData,
output wire oBusy,
output wire oDone,
input wire [`DATA_ROW_WIDTH-1:0] iOMEM_WriteAddress,
input wire [`DATA_ROW_WIDTH-1:0] iOMEM_WriteData,
input wire iOMEM_WriteEnable,
output wire [`WB_WIDTH-1:0] OMEM_DAT_O,
output wire [`WB_WIDTH-1:0] OMEM_ADR_O,
output wire OMEM_WE_O,
//Theia specific interfaces
input wire MST_I,
//Wish Bone Interfaces
output wire [31:0] DAT_O,
input wire [31:0] DAT_I,
input wire ACK_I,
output wire ACK_O,
output wire [31:0] ADR_O,
input wire [31:0] ADR_I,
output wire WE_O,
input wire WE_I,
output wire STB_O,
input wire STB_I,
output wire CYC_O,
input wire CYC_I,
input wire [1:0] TGA_I,
output wire [1:0] TGC_O,
input wire GNT_I,
 
 
output wire [`DATA_ROW_WIDTH-1:0] oTMEMReadData,
input wire iTMEMDataRequest,
input wire [`DATA_ROW_WIDTH-1:0] iTMEMReadAddress,
output wire oTMEMDataAvailable,
 
input wire TMEM_ACK_I,
input wire [`WB_WIDTH-1:0] TMEM_DAT_I ,
output wire [`WB_WIDTH-1:0] TMEM_ADR_O ,
output wire TMEM_WE_O,
output wire TMEM_STB_O,
output wire TMEM_CYC_O,
input wire TMEM_GNT_I
);
 
 
wire [`WIDTH-1:0] wMEMToWBM2__ReadDataElement;
wire [`WIDTH-1:0] wMEMToWBM2__ReadDataElement2;
wire wMEMToWBM_2__Enable;
wire wWBMToMEM2__Done;
wire wWBM_2_WBMToMEM_DataAvailable;
wire [`WIDTH-1:0] wWBM_2_WBMToMEM_Data;
wire [`WIDTH-1:0] wWBS_2__WBMToMEM_Frame;
wire wWBMToMEM_2_WBM_Enable;
wire [`WIDTH-1:0] wWBMToMEM_2_WBM_Address;
wire wWBMToMEM2__oDataWriteEnable;
wire wAddrerssSelector2_oDataWriteEnable;
wire [`DATA_ROW_WIDTH-1:0] wWBMToMEM2__oDataBus;
wire [`DATA_ROW_WIDTH-1:0] wWBSToMEM2__oDataBus;
wire wAddressSelector_2__SetAddress;
wire [`WIDTH-1:0] wMEMToWBM_2__Address;
wire wMEMToWBM_2__Done;
wire w2WBMToMEM__Enable;
wire w2WBMToMEM__SetAddress;
wire wWBS_2__WBSToMEM_FrameAvailable;
wire[`WIDTH-1:0] wWBS_2__WBMToMEM_Address;
wire wWBSToMEM2__oDataWriteEnable;
wire[`DATA_ADDRESS_WIDTH-1:0] wWBSToMEM2__oDataWriteAddress;
wire[`DATA_ADDRESS_WIDTH-1:0] wWBMToMEM2__oDataWriteAddress;
 
 
 
//***********new*****************/
 
 
Module_OMemInterface OMI
(
.Clock( Clock ),
.Reset( Reset ),
.iWriteEnable( iOMEM_WriteEnable ),
.iData( iOMEM_WriteData ),
.iAddress( iOMEM_WriteAddress ),
.ADR_O( OMEM_ADR_O ),
.DAT_O( OMEM_DAT_O ),
.WE_O( OMEM_WE_O )
);
 
Module_TMemInterface TMI
(
.Clock( Clock ),
.Reset( Reset ),
.iEnable( iTMEMDataRequest ),
.iAddress( iTMEMReadAddress ),
.oData( oTMEMReadData ),
.oDone( oTMEMDataAvailable ),
 
.ACK_I( TMEM_ACK_I ),
.GNT_I( TMEM_GNT_I ),
.DAT_I( TMEM_DAT_I ),
.ADR_O( TMEM_ADR_O ),
.WE_O( TMEM_WE_O ),
.STB_O( TMEM_STB_O ),
.CYC_O( TMEM_CYC_O )
 
);
//***********new*****************/
 
assign oBusy = CYC_O;
wire wReadOperation;
assign wReadOperation = (iBusCyc_Type == `WB_SIMPLE_WRITE_CYCLE) ? 0 : 1;
 
assign wMEMToWBM_2__Address = ( iAdr_O_Type == `ADR_IMM ) ? iAdr_O_Imm : wMEMToWBM2__ReadDataElement;
assign w2WBMToMEM__Enable = ( iAdr_O_Type == `ADR_IMM ) ? iEnable : wMEMToWBM_2__Enable;
//assign oDone = ( (iAdr_O_Type == `ADR_IMM) && !(iBusCyc_Type == `WB_SIMPLE_WRITE_CYCLE) )
//? wWBMToMEM2__Done : wMEMToWBM_2__Done;
 
//TODO: WHEN ADR_POINTER Then Done is not until we got the 3 values from X,Y,Z in iAdr_O_Pointer
assign oDone = (iBusCyc_Type == `WB_SIMPLE_WRITE_CYCLE || iAdr_O_Type == `ADR_POINTER ) ? wMEMToWBM_2__Done : wWBMToMEM2__Done;
 
assign oDataWriteEnable = (MST_I == 1'b1) ? wWBSToMEM2__oDataWriteEnable : (wWBMToMEM2__oDataWriteEnable);// ^ wAddrerssSelector2_oDataWriteEnable);
assign oDataWriteAddress = (MST_I == 1'b1) ? wWBSToMEM2__oDataWriteAddress : wWBMToMEM2__oDataWriteAddress;
assign oDataBus = (MST_I == 1'b1) ? wWBSToMEM2__oDataBus : wWBMToMEM2__oDataBus;
 
 
 
wire [`DATA_ADDRESS_WIDTH-1:0] wMEMToWBM2_WBMToMEM_RAMWriteAddr;
wire [`DATA_ADDRESS_WIDTH-1:0] w2WBMToMEM_MEMWriteAddress;
 
assign w2WBMToMEM_MEMWriteAddress = ( iAdr_O_Type == `ADR_IMM) ? iAdr_DataWriteBack : wMEMToWBM2_WBMToMEM_RAMWriteAddr;
 
wire w2MEMToWBM_BusOperationComplete;
assign w2MEMToWBM_BusOperationComplete = (iBusCyc_Type == `WB_SIMPLE_WRITE_CYCLE) ? ACK_I : wWBMToMEM2__Done;
 
 
wire [`DATA_ADDRESS_WIDTH-1:0] w2MEMToWBM_DataPointer;
assign w2MEMToWBM_DataPointer = (iBusCyc_Type == `WB_SIMPLE_WRITE_CYCLE) ? iDat_O_Pointer : iAdr_O_Pointer;
 
 
//------------------------------------------------------------------------------
MEM2WBMUnitB MEMToWBM
(
.Clock( Clock ),
.Reset( Reset ),
.iEnable( iEnable & (~iAdr_O_Type | iBusCyc_Type) ),
.iMEMDataPointer( w2MEMToWBM_DataPointer ),
.iMEMDataPointer2( iAdr_O_Pointer ),
.iReadDataBus( iReadDataBus ), //3 Elements comming from DMEM
.iReadDataBus2( iReadDataBus2 ),
.oReadDataElement( wMEMToWBM2__ReadDataElement ), //1 out of 3 elements we read
.oReadDataElement2( wMEMToWBM2__ReadDataElement2 ), //1 out of 3 elements we read
.oDataReadAddress( oDataReadAddress ),
.oDataReadAddress2( oDataReadAddress2 ),
.oDataWriteEnable( wAddrerssSelector2_oDataWriteEnable ), //Always zero
.oDataAvailable( wMEMToWBM_2__Enable ), //Data from MEM available
.iRequestNextElement( w2MEMToWBM_BusOperationComplete ),
.iDataInitialStorageAddress( iAdr_DataWriteBack ), ////########
.oDataWriteAddress( wMEMToWBM2_WBMToMEM_RAMWriteAddr ), ////########
.oDone( wMEMToWBM_2__Done )
);
//------------------------------------------------------------------------------
 
 
 
 
 
wire [`DATA_ADDRESS_WIDTH-1:0] wTemp1;
assign wWBMToMEM2__oDataWriteAddress = (iAdr_O_Type == `ADR_IMM) ? iAdr_DataWriteBack : wTemp1;
 
 
wire [`WIDTH-1:0] wADR_O_InitialAddress;
assign wADR_O_InitialAddress = (iBusCyc_Type == `WB_SIMPLE_WRITE_CYCLE) ? wMEMToWBM2__ReadDataElement2 : wMEMToWBM_2__Address;
wire wIncrement_Address_O;
assign wIncrement_Address_O = iEnable & ACK_I;
 
 
 
wire wMEMToWBM2__Done;
wire wMEMToWBM2__Trigger;
wire[`WB_WIDTH-1:0] wMEMToWBM_2_Data;
wire w2MEMToWBM__Trigger;
wire wWBM2_MEMToWBM_DataWriteDone;
 
 
wire w2WBM_iEnable;
 
assign w2WBM_iEnable = (iBusCyc_Type == `WB_SIMPLE_WRITE_CYCLE) ? wMEMToWBM_2__Enable : iEnable;
 
//------------------------------------------------------------------------------
wire wSTB_O;
 
//If the address is a pointer, we need 1 cycle to read the data back from MEM
//before we can the set the value into WBM
wire wAddress_Set_Delayed;
FFD_POSEDGE_SYNCRONOUS_RESET # (1) FFD32_SetDelay
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( 1'b1 ),
.D( iAdr_O_Set ),
.Q( wAddress_Set_Delayed )
);
 
//If the Addr is IMM then just set it whenever iAdr_O_Set is set, but if we have a pointer, then use
//wAddress_Set_Delayed at the beginning and then wWBMToMEM2__Done
wire wWBM_iAddress_Set = (iAdr_O_Type == `ADR_POINTER) ? (wAddress_Set_Delayed | wWBMToMEM2__Done) : iAdr_O_Set;
 
assign STB_O = wSTB_O & ~oDone;
 
WishBoneMasterUnit WBM
(
.CLK_I( Clock ),
.RST_I( Reset ),
.DAT_I( DAT_I ),
.DAT_O( DAT_O ),
.ACK_I( ACK_I ),
.ADR_O( ADR_O ),
.WE_O( WE_O ),
.STB_O( wSTB_O ),
.CYC_O( CYC_O ),
.TGC_O( TGC_O ),
.GNT_I( GNT_I ),
.iEnable( w2WBM_iEnable ),
.iBusCyc_Type( iBusCyc_Type ),
.iAddress_Set( wWBM_iAddress_Set ),
.iAddress( wADR_O_InitialAddress ),
.oDataReady( wWBM_2_WBMToMEM_DataAvailable ),
.iData( wMEMToWBM2__ReadDataElement ),
.oData( wWBM_2_WBMToMEM_Data )
);
//------------------------------------------------------------------------------
WishBoneSlaveUnit WBS
(
 
.CLK_I( Clock ),
.RST_I( Reset ),
.STB_I( STB_I ),
.WE_I( WE_I ),
.DAT_I( DAT_I ),
.ADR_I( ADR_I ),
.TGA_I( TGA_I ),
.ACK_O( ACK_O ),
.CYC_I( CYC_I ),
.MST_I( MST_I ),
.oDataBus( wWBSToMEM2__oDataBus ),
.oInstructionBus( oInstructionBus ),
.oDataWriteAddress( wWBSToMEM2__oDataWriteAddress ),
.oDataWriteEnable( wWBSToMEM2__oDataWriteEnable ),
.oInstructionWriteAddress( oInstructionWriteAddress ),
.oInstructionWriteEnable( oInstructionWriteEnable )
 
 
);
//------------------------------------------------------------------------------
 
 
endmodule
//--------------------------------------------------------------------------
/rtl/Module_SwapMemory.v
0,0 → 1,90
`timescale 1ns / 1ps
`include "aDefinitions.v"
 
module SWAP_MEM # ( parameter DATA_WIDTH=`DATA_ROW_WIDTH, parameter ADDR_WIDTH=`DATA_ADDRESS_WIDTH, parameter MEM_SIZE=128 )
(
input wire Clock,
input wire iSelect,
input wire iWriteEnableA,
input wire[ADDR_WIDTH-1:0] iReadAddressA0,
input wire[ADDR_WIDTH-1:0] iReadAddressA1,
input wire[ADDR_WIDTH-1:0] iWriteAddressA,
input wire[DATA_WIDTH-1:0] iDataInA,
output wire [DATA_WIDTH-1:0] oDataOutA0,
output wire [DATA_WIDTH-1:0] oDataOutA1,
input wire iWriteEnableB,
input wire[ADDR_WIDTH-1:0] iReadAddressB0,
input wire[ADDR_WIDTH-1:0] iReadAddressB1,
input wire[ADDR_WIDTH-1:0] iWriteAddressB,
input wire[DATA_WIDTH-1:0] iDataInB,
output wire [DATA_WIDTH-1:0] oDataOutB0,
output wire [DATA_WIDTH-1:0] oDataOutB1
);
 
 
wire wWriteEnableA;
wire[ADDR_WIDTH-1:0] wReadAddressA0;
wire[ADDR_WIDTH-1:0] wReadAddressA1;
wire[ADDR_WIDTH-1:0] wWriteAddressA;
wire[DATA_WIDTH-1:0] wDataInA;
wire [DATA_WIDTH-1:0] wDataOutA0;
wire [DATA_WIDTH-1:0] wDataOutA1;
 
wire wWriteEnableB;
wire[ADDR_WIDTH-1:0] wReadAddressB0;
wire[ADDR_WIDTH-1:0] wReadAddressB1;
wire[ADDR_WIDTH-1:0] wWriteAddressB;
wire[DATA_WIDTH-1:0] wDataInB;
wire [DATA_WIDTH-1:0] wDataOutB0;
wire [DATA_WIDTH-1:0] wDataOutB1;
 
 
assign wWriteEnableA = ( iSelect ) ? iWriteEnableA : iWriteEnableB;
assign wWriteEnableB = ( ~iSelect ) ? iWriteEnableA : iWriteEnableB;
 
assign wReadAddressA0 = ( iSelect ) ? iReadAddressA0 : iReadAddressB0;
assign wReadAddressB0 = ( ~iSelect ) ? iReadAddressA0 : iReadAddressB0;
 
assign wReadAddressA1 = ( iSelect ) ? iReadAddressA1 : iReadAddressB1;
assign wReadAddressB1 = ( ~iSelect ) ? iReadAddressA1 : iReadAddressB1;
 
assign wWriteAddressA = ( iSelect ) ? iWriteAddressA : iWriteAddressB;
assign wWriteAddressB = ( ~iSelect ) ? iWriteAddressA : iWriteAddressB;
 
assign wDataInA = ( iSelect ) ? iDataInA : iDataInB;
assign wDataInB = ( ~iSelect ) ? iDataInA : iDataInB;
 
assign oDataOutA0 = ( iSelect ) ? wDataOutA0 : wDataOutB0;
assign oDataOutB0 = ( ~iSelect ) ? wDataOutA0 : wDataOutB0;
 
assign oDataOutA1 = ( iSelect ) ? wDataOutA1 : wDataOutB1;
assign oDataOutB1 = ( ~iSelect ) ? wDataOutA1 : wDataOutB1;
 
RAM_DUAL_READ_PORT # (DATA_WIDTH,ADDR_WIDTH,MEM_SIZE) MEM_A
(
.Clock( Clock ),
.iWriteEnable( wWriteEnableA ),
.iReadAddress0( wReadAddressA0 ),
.iReadAddress1( wReadAddressA1 ),
.iWriteAddress( wWriteAddressA ),
.iDataIn( wDataInA ),
.oDataOut0( wDataOutA0 ),
.oDataOut1( wDataOutA1 )
);
 
 
RAM_DUAL_READ_PORT # (DATA_WIDTH,ADDR_WIDTH,MEM_SIZE) MEM_B
(
.Clock( Clock ),
.iWriteEnable( wWriteEnableB ),
.iReadAddress0( wReadAddressB0 ),
.iReadAddress1( wReadAddressB1 ),
.iWriteAddress( wWriteAddressB ),
.iDataIn( wDataInB ),
.oDataOut0( wDataOutB0 ),
.oDataOut1( wDataOutB1 )
);
 
endmodule
/rtl/Module_VectorALU.v
0,0 → 1,1278
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
 
 
//--------------------------------------------------------------
module VectorALU
(
input wire Clock,
input wire Reset,
input wire[`INSTRUCTION_OP_LENGTH-1:0] iOperation,
input wire[`WIDTH-1:0] iChannel_Ax,
input wire[`WIDTH-1:0] iChannel_Bx,
input wire[`WIDTH-1:0] iChannel_Ay,
input wire[`WIDTH-1:0] iChannel_By,
input wire[`WIDTH-1:0] iChannel_Az,
input wire[`WIDTH-1:0] iChannel_Bz,
output wire [`WIDTH-1:0] oResultA,
output wire [`WIDTH-1:0] oResultB,
output wire [`WIDTH-1:0] oResultC,
input wire iInputReady,
output reg oBranchTaken,
output reg oBranchNotTaken,
output reg oReturnFromSub,
input wire [`ROM_ADDRESS_WIDTH-1:0] iCurrentIP,
//Connections to the O Memory
output wire [`DATA_ROW_WIDTH-1:0] oOMEMWriteAddress,
output wire [`DATA_ROW_WIDTH-1:0] oOMEMWriteData,
output wire oOMEM_WriteEnable,
//Connections to the R Memory
output wire [`DATA_ROW_WIDTH-1:0] oTMEMReadAddress,
input wire [`DATA_ROW_WIDTH-1:0] iTMEMReadData,
input wire iTMEMDataAvailable,
output wire oTMEMDataRequest,
output reg OutputReady
);
 
 
 
 
 
wire wMultiplcationUnscaled;
assign wMultiplcationUnscaled = (iOperation == `IMUL) ? 1'b1 : 1'b0;
 
//--------------------------------------------------------------
 
reg [7:0] InputReadyA,InputReadyB,InputReadyC;
 
//------------------------------------------------------
/*
This is the block that takes care of all tha arithmetic
comparisons. Supported operations are <,>,<=,>=,==,!=
*/
//------------------------------------------------------
reg [`WIDTH-1:0] wMultiplicationA_Ax;
reg [`WIDTH-1:0] wMultiplicationA_Bx;
wire [`LONG_WIDTH-1:0] wMultiplicationA_Result;
wire wMultiplicationA_InputReady;
wire wMultiplicationA_OutputReady;
wire wMultiplicationOutputReady, wMultiplicationOutputReadyA,
wMultiplicationOutputReadyB,wMultiplicationOutputReadyC,wMultiplicationOutputReadyD;
 
wire wAddSubAOutputReady,wAddSubBOutputReady,wAddSubCOutputReady;
wire [`INSTRUCTION_OP_LENGTH-1:0] wOperation;
wire [`WIDTH-1:0] wSwizzleOutputX,wSwizzleOutputY,wSwizzleOutputZ;
 
//--------------------------------------------------------------------
reg [`WIDTH-1:0] ResultA,ResultB,ResultC;
 
//Output Flip Flops,
//This flip flop will control the outputs so that the
//values of the outputs change ONLY when when there is
//a positive edge of OutputReady
 
FFD32_POSEDGE ResultAFFD
(
.Clock( OutputReady ),
.D( ResultA ),
.Q( oResultA )
);
 
FFD32_POSEDGE ResultBFFD
(
.Clock( OutputReady ),
.D( ResultB ),
.Q( oResultB )
);
 
FFD32_POSEDGE ResultCFFD
(
.Clock( OutputReady ),
.D( ResultC ),
.Q( oResultC )
);
//--------------------------------------------------------------------
 
 
 
Swizzle3D Swizzle1
(
.Source0_X( iChannel_Bx ),
.Source0_Y( iChannel_By ),
.Source0_Z( iChannel_Bz ),
.iOperation( iChannel_Ax ),
.SwizzleX( wSwizzleOutputX ),
.SwizzleY( wSwizzleOutputY ),
.SwizzleZ( wSwizzleOutputZ )
);
//---------------------------------------------------------------------
wire [`LONG_WIDTH-1:0] wModulus2N_ResultA,wModulus2N_ResultB,wModulus2N_ResultC;
 
//---------------------------------------------------------------------(
 
wire IOW_Operation,wOMEM_We;
assign IOW_Operation = (iOperation == `OMWRITE);
 
always @ ( * )
begin
if (iOperation == `RET)
oReturnFromSub <= OutputReady;
else
oReturnFromSub <= 1'b0;
end
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD1_AWE
(
.Clock( Clock ),
.Reset( Reset),
.Enable( 1'b1 ),
.D( IOW_Operation ),
.Q( wOMEM_We )
);
 
assign oOMEM_WriteEnable = wOMEM_We & IOW_Operation;
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `DATA_ROW_WIDTH ) FFD1_A
(
.Clock( Clock ),
.Reset( Reset),
.Enable( iInputReady ),
.D( {iChannel_Ax,iChannel_Ay,iChannel_Az} ),
.Q( oOMEMWriteAddress)
);
FFD_POSEDGE_SYNCRONOUS_RESET # ( `DATA_ROW_WIDTH ) FFD2_B
(
.Clock( Clock ),
.Reset( Reset),
.Enable( iInputReady ),
.D( {iChannel_Bx,iChannel_By,iChannel_Bz} ),
.Q( oOMEMWriteData )
);
 
 
 
wire wTMReadOutputReady;
assign wTMReadOutputReady = iTMEMDataAvailable;
/*
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD1_ARE
(
.Clock( Clock ),
.Reset( Reset),
.Enable( 1'b1 ),
.D( iTMEMDataAvailable ),
.Q( wTMReadOutputReady )
);
*/
//assign oTMEMReadAddress = {iChannel_Ax,iChannel_Ay,iChannel_Az};
 
//We wait 1 clock cycle before be send the data read request, because
//we need to lathc the values at the output
 
wire wOpTRead;
assign wOpTRead = ( iOperation == `TMREAD ) ? 1'b1 : 1'b0;
wire wTMEMRequest;
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD1_ARE123
(
.Clock( Clock ),
.Reset( Reset),
.Enable( 1'b1 ),
.D( wOpTRead ),
.Q( wTMEMRequest )
);
assign oTMEMDataRequest = wTMEMRequest & wOpTRead;
FFD_POSEDGE_SYNCRONOUS_RESET # ( `DATA_ROW_WIDTH ) FFD2_B445
(
.Clock( Clock ),
.Reset( Reset),
.Enable( iInputReady & wOpTRead ),
.D( {iChannel_Ax,iChannel_Ay,iChannel_Az} ),
.Q( oTMEMReadAddress )
);
 
/*
This MUX will select the apropiated X,Y or Z depending on
wheter it is XYZ iOperation. This gets defined by the bits 3 and 4
of iOperation, and only applies for oBranchTaken and Store operations.
*/
 
wire wArithmeticComparison_Result;
wire ArithmeticComparison_InputReady;
wire ArithmeticComparison_OutputReady;
reg[`WIDTH-1:0] ArithmeticComparison_A,ArithmeticComparison_B;
 
 
always @ ( * )
begin
case ( {iOperation[4],iOperation[3]} )
2'b01: ArithmeticComparison_A = iChannel_Ax;
2'b10: ArithmeticComparison_A = iChannel_Ay;
2'b11: ArithmeticComparison_A = iChannel_Az;
default: ArithmeticComparison_A = 0; //Should never happen
endcase
end
//---------------------------------------------------------------------
always @ ( * )
begin
case ( {iOperation[4],iOperation[3]} )
2'b01: ArithmeticComparison_B = iChannel_Bx;
2'b10: ArithmeticComparison_B = iChannel_By;
2'b11: ArithmeticComparison_B = iChannel_Bz;
default: ArithmeticComparison_B = 0; //Should never happen
endcase
end
 
//---------------------------------------------------------------------
/*
The onbly instance of Aritmetic comparison in the ALU,
ArithmeticComparison operations matches the 3 LSB of
Global ALU iOperation for oBranchTaken Instruction family
*/
 
assign ArithmeticComparison_InputReady = iInputReady;
 
wire wArithmeticComparisonResult;
 
ArithmeticComparison ArithmeticComparison_1
(
.Clock( Clock ),
.X( ArithmeticComparison_A ),
.Y( ArithmeticComparison_B ),
.iOperation( iOperation[2:0] ),
.iInputReady( ArithmeticComparison_InputReady ),
.OutputReady( ArithmeticComparison_OutputReady ),
.Result( wArithmeticComparisonResult )
);
 
 
assign wArithmeticComparison_Result = wArithmeticComparisonResult && OutputReady;
//--------------------------------------------------------------------
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_A
(
 
.Clock( Clock ),
.Reset( Reset ),
.A( wMultiplicationA_Ax ),
.B( wMultiplicationA_Bx ),
.R( wMultiplicationA_Result ),
.iUnscaled( wMultiplcationUnscaled ),
.iInputReady( wMultiplicationA_InputReady ),
.OutputReady( wMultiplicationA_OutputReady )
);
 
//--------------------------------------------------------------------
always @ ( * )
begin
case (iOperation)
`CROSS: wMultiplicationA_Ax = iChannel_Ay; // Ay * Bz
`MAG: wMultiplicationA_Ax = iChannel_Ax;
`MULP: wMultiplicationA_Ax = iChannel_Ax; //Az = Ax * Ay
default: wMultiplicationA_Ax = iChannel_Ax; // Ax * Bx
endcase
end
//--------------------------------------------------------------------
 
//assign wMultiplicationA_Ax = iChannel_Ax;
 
assign wMultiplicationA_InputReady
= (iOperation == `CROSS ||
iOperation == `DOT ||
iOperation == `MUL ||
iOperation == `IMUL ||
iOperation == `MAG ||
iOperation == `MULP
) ? iInputReady : 0;
//--------------------------------------------------------------------
always @ ( * )
begin
case (iOperation)
`MUL,`IMUL: wMultiplicationA_Bx = iChannel_Bx; //Ax*Bx
`MAG: wMultiplicationA_Bx = iChannel_Ax; //Ax^2
`DOT: wMultiplicationA_Bx = iChannel_Bx; //Ax*Bx
`CROSS: wMultiplicationA_Bx = iChannel_Bz; // Ay * Bz
`MULP: wMultiplicationA_Bx = iChannel_Ay; //Az = Ax * Ay
default: wMultiplicationA_Bx = 32'b0;
endcase
end
//--------------------------------------------------------------------
 
//------------------------------------------------------
 
reg [`WIDTH-1:0] wMultiplicationB_Ay;
reg [`WIDTH-1:0] wMultiplicationB_By;
wire [`LONG_WIDTH-1:0] wMultiplicationB_Result;
wire wMultiplicationB_InputReady;
wire wMultiplicationB_OutputReady;
 
 
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_B
(
 
.Clock( Clock ),
.Reset( Reset ),
.A( wMultiplicationB_Ay ),
.B( wMultiplicationB_By ),
.R( wMultiplicationB_Result ),
.iUnscaled( wMultiplcationUnscaled ),
.iInputReady( wMultiplicationB_InputReady ),
.OutputReady( wMultiplicationB_OutputReady )
);
 
 
//----------------------------------------------------
 
always @ ( * )
begin
case (iOperation)
`CROSS: wMultiplicationB_Ay = iChannel_Az; // Az * By
`MAG: wMultiplicationB_Ay = iChannel_Ay;
default: wMultiplicationB_Ay = iChannel_Ay; // Ay * By
endcase
end
//----------------------------------------------------
assign wMultiplicationB_InputReady
= (iOperation == `CROSS ||
iOperation == `DOT ||
iOperation == `MUL ||
iOperation == `IMUL ||
iOperation == `MAG ) ? iInputReady : 0;
//----------------------------------------------------
always @ ( * )
begin
case (iOperation)
`MUL,`IMUL: wMultiplicationB_By = iChannel_By; //Ay*By
`MAG: wMultiplicationB_By = iChannel_Ay; //Ay^2
`DOT: wMultiplicationB_By = iChannel_By; //Ay*By
`CROSS: wMultiplicationB_By = iChannel_By; // Az * By
default: wMultiplicationB_By = 32'b0;
endcase
end
//----------------------------------------------------
//------------------------------------------------------
reg [`WIDTH-1:0] wMultiplicationC_Az;
reg [`WIDTH-1:0] wMultiplicationC_Bz;
wire [`LONG_WIDTH-1:0] wMultiplicationC_Result;
wire wMultiplicationC_InputReady;
wire wMultiplicationC_OutputReady;
 
 
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_C
(
 
.Clock( Clock ),
.Reset( Reset ),
.A( wMultiplicationC_Az ),
.B( wMultiplicationC_Bz ),
.R( wMultiplicationC_Result ),
.iUnscaled( wMultiplcationUnscaled ),
.iInputReady( wMultiplicationC_InputReady ),
.OutputReady( wMultiplicationC_OutputReady )
);
 
 
//----------------------------------------------------
always @ ( * )
begin
case (iOperation)
`CROSS: wMultiplicationC_Az = iChannel_Az; //Az*Bx
`MAG: wMultiplicationC_Az = iChannel_Az;
default: wMultiplicationC_Az = iChannel_Az; //Az*Bz
endcase
end
//----------------------------------------------------
 
assign wMultiplicationC_InputReady
= (
iOperation == `CROSS ||
iOperation == `DOT ||
iOperation == `MUL ||
iOperation == `IMUL ||
iOperation == `MAG
) ? iInputReady : 0;
//----------------------------------------------------
always @ ( * )
begin
case (iOperation)
`MUL,`IMUL: wMultiplicationC_Bz = iChannel_Bz; //Az*Bz
`MAG: wMultiplicationC_Bz = iChannel_Az; //Ay^2
`DOT: wMultiplicationC_Bz = iChannel_Bz; //Az*Bz
`CROSS: wMultiplicationC_Bz = iChannel_Bx; //Az*Bx
default: wMultiplicationC_Bz = 32'b0;
endcase
end
//----------------------------------------------------
 
reg [`WIDTH-1:0] wMultiplicationD_Aw;
reg [`WIDTH-1:0] wMultiplicationD_Bw;
wire [`LONG_WIDTH-1:0] wMultiplicationD_Result;
wire wMultiplicationD_InputReady;
wire wMultiplicationD_OutputReady;
 
 
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_D
(
 
.Clock( Clock ),
.Reset( Reset ),
.A( wMultiplicationD_Aw ),
.B( wMultiplicationD_Bw ),
.R( wMultiplicationD_Result ),
.iUnscaled( wMultiplcationUnscaled ),
.iInputReady( wMultiplicationD_InputReady ),
.OutputReady( wMultiplicationD_OutputReady )
);
 
assign wMultiplicationD_InputReady
= (iOperation == `CROSS ) ? iInputReady : 0;
 
 
//----------------------------------------------------
always @ ( * )
begin
case (iOperation)
`CROSS: wMultiplicationD_Aw = iChannel_Ax; //Ax*Bz
default: wMultiplicationD_Aw = 32'b0;
endcase
end
//----------------------------------------------------
always @ ( * )
begin
case (iOperation)
`CROSS: wMultiplicationD_Bw = iChannel_Bz; //Ax*Bz
default: wMultiplicationD_Bw = 32'b0;
endcase
end
//----------------------------------------------------
reg [`WIDTH-1:0] wMultiplicationE_Ak;
reg [`WIDTH-1:0] wMultiplicationE_Bk;
wire [`LONG_WIDTH-1:0] wMultiplicationE_Result;
wire wMultiplicationE_InputReady;
wire wMultiplicationE_OutputReady;
 
 
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_E
(
 
.Clock( Clock ),
.Reset( Reset ),
.A( wMultiplicationE_Ak ),
.B( wMultiplicationE_Bk ),
.R( wMultiplicationE_Result ),
.iUnscaled( wMultiplcationUnscaled ),
.iInputReady( wMultiplicationE_InputReady ),
.OutputReady( wMultiplicationE_OutputReady )
);
 
assign wMultiplicationE_InputReady
= (iOperation == `CROSS ) ? iInputReady : 0;
//----------------------------------------------------
always @ ( * )
begin
case (iOperation)
`CROSS: wMultiplicationE_Ak = iChannel_Ax; //Ax*By
default: wMultiplicationE_Ak = 32'b0;
endcase
end
//----------------------------------------------------
always @ ( * )
begin
case (iOperation)
`CROSS: wMultiplicationE_Bk = iChannel_By; //Ax*By
default: wMultiplicationE_Bk = 32'b0;
endcase
end
//----------------------------------------------------
reg [`WIDTH-1:0] wMultiplicationF_Al;
reg [`WIDTH-1:0] wMultiplicationF_Bl;
wire [`LONG_WIDTH-1:0] wMultiplicationF_Result;
wire wMultiplicationF_InputReady;
wire wMultiplicationF_OutputReady;
 
 
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_F
(
 
.Clock( Clock ),
.Reset( Reset ),
.A( wMultiplicationF_Al ),
.B( wMultiplicationF_Bl ),
.R( wMultiplicationF_Result ),
.iUnscaled( wMultiplcationUnscaled ),
.iInputReady( wMultiplicationF_InputReady ),
.OutputReady( wMultiplicationF_OutputReady )
);
assign wMultiplicationF_InputReady
= (iOperation == `CROSS ) ? iInputReady : 0;
//----------------------------------------------------
always @ ( * )
begin
case (iOperation)
`CROSS: wMultiplicationF_Al = iChannel_Ay; //Ay*Bx
default: wMultiplicationF_Al = 32'b0;
endcase
end
//----------------------------------------------------
always @ ( * )
begin
case (iOperation)
`CROSS: wMultiplicationF_Bl = iChannel_Bx; //Ay*Bx
default: wMultiplicationF_Bl = 32'b0;
endcase
end
//------------------------------------------------------
wire [`WIDTH-1:0] wDivisionA_Result;
wire wDivisionA_OutputReady;
wire wDivisionA_InputReady;
 
assign wDivisionA_InputReady =
( iOperation == `DIV) ? iInputReady : 0;
 
SignedIntegerDivision DivisionChannel_A
(
.Clock( Clock ),
.Reset( Reset ),
.iDividend( iChannel_Ax ),
.iDivisor( iChannel_Bx ),
.xQuotient( wDivisionA_Result ),
.iInputReady( wDivisionA_InputReady ),
.OutputReady( wDivisionA_OutputReady )
 
);
//------------------------------------------------------
wire [`WIDTH-1:0] wDivisionB_Result;
wire wDivisionB_OutputReady;
wire wDivisionB_InputReady;
 
assign wDivisionB_InputReady =
( iOperation == `DIV) ? iInputReady : 0;
 
SignedIntegerDivision DivisionChannel_B
(
.Clock( Clock ),
.Reset( Reset ),
.iDividend( iChannel_Ay ),
.iDivisor( iChannel_By ),
.xQuotient( wDivisionB_Result ),
.iInputReady( wDivisionB_InputReady ),
.OutputReady( wDivisionB_OutputReady )
 
);
//------------------------------------------------------
wire [`WIDTH-1:0] wDivisionC_Result;
wire wDivisionC_OutputReady;
wire wDivisionC_InputReady;
 
 
assign wDivisionC_InputReady =
( iOperation == `DIV) ? iInputReady : 0;
 
SignedIntegerDivision DivisionChannel_C
(
.Clock( Clock ),
.Reset( Reset ),
.iDividend( iChannel_Az ),
.iDivisor( iChannel_Bz ),
.xQuotient( wDivisionC_Result ),
.iInputReady( wDivisionC_InputReady ),
.OutputReady( wDivisionC_OutputReady )
 
);
//--------------------------------------------------------------
/*
First addtion block instance goes here.
Note that all inputs/outputs to the block
are wires. It has two MUXES one for each entry.
*/
reg [`LONG_WIDTH-1:0] wAddSubA_Ax,wAddSubA_Bx;
wire [`LONG_WIDTH-1:0] wAddSubA_Result;
wire wAddSubA_Operation; //Either addition or substraction
reg wAddSubA_InputReady;
wire wAddSubA_OutputReady;
 
assign wAddSubA_Operation
= (
iOperation == `SUB
|| iOperation == `CROSS
|| iOperation == `DEC
|| iOperation == `MOD
) ? 1 : 0;
 
FixedAddSub AddSubChannel_A
(
.Clock( Clock ),
.Reset( Reset ),
.A( wAddSubA_Ax ),
.B( wAddSubA_Bx ),
.R( wAddSubA_Result ),
.iOperation( wAddSubA_Operation ),
.iInputReady( wAddSubA_InputReady ),
.OutputReady( wAddSubA_OutputReady )
);
//Diego
 
 
//----------------------------
 
//InpuReady Mux A
always @ ( * )
begin
case (iOperation)
`ADD: wAddSubA_InputReady = iInputReady;
`SUB: wAddSubA_InputReady = iInputReady;
`INC,`INCX,`INCY,`INCZ: wAddSubA_InputReady = iInputReady;
`DEC: wAddSubA_InputReady = iInputReady;
`MOD: wAddSubA_InputReady = iInputReady;
`MAG: wAddSubA_InputReady = wMultiplicationOutputReadyA &&
wMultiplicationOutputReadyB;
//wMultiplicationA_OutputReady
//&& wMultiplicationB_OutputReady;
`DOT: wAddSubA_InputReady =
wMultiplicationOutputReadyA &&
wMultiplicationOutputReadyB;
//wMultiplicationA_OutputReady
//&& wMultiplicationB_OutputReady;
`CROSS: wAddSubA_InputReady =
wMultiplicationOutputReadyA &&
wMultiplicationOutputReadyB;
// wMultiplicationA_OutputReady
//&& wMultiplicationB_OutputReady;
default: wAddSubA_InputReady = 1'b0;
endcase
end
//----------------------------
 
//wAddSubA_Bx 2:1 input Mux
always @ ( * )
begin
case (iOperation)
`ADD: wAddSubA_Ax = ( iChannel_Ax[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ax } : { 32'b0, iChannel_Ax };
`SUB: wAddSubA_Ax = ( iChannel_Ax[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ax } : { 32'b0, iChannel_Ax };
`INC,`INCX,`INCY,`INCZ: wAddSubA_Ax = ( iChannel_Ax[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ax } : { 32'b0, iChannel_Ax };
`DEC: wAddSubA_Ax = ( iChannel_Ax[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ax } : { 32'b0, iChannel_Ax };
`MOD: wAddSubA_Ax = ( iChannel_Bx[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Bx } : { 32'b0, iChannel_Bx };
`MAG: wAddSubA_Ax = wMultiplicationA_Result;
`DOT: wAddSubA_Ax = wMultiplicationA_Result;
`CROSS: wAddSubA_Ax = wMultiplicationA_Result;
default: wAddSubA_Ax = 64'b0;
endcase
end
//----------------------------
//wAddSubA_Bx 2:1 input Mux
always @ ( * )
begin
case (iOperation)
`ADD: wAddSubA_Bx = ( iChannel_Bx[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Bx } : { 32'b0, iChannel_Bx };
`SUB: wAddSubA_Bx = ( iChannel_Bx[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Bx } : { 32'b0, iChannel_Bx };
`INC,`INCX: wAddSubA_Bx = (`LONG_WIDTH'd1 << `SCALE);
`INCY,`INCZ: wAddSubA_Bx = `LONG_WIDTH'd0;
`DEC: wAddSubA_Bx = (`LONG_WIDTH'd1 << `SCALE);
`MOD: wAddSubA_Bx = (`LONG_WIDTH'd1 << `SCALE);
`MAG: wAddSubA_Bx = wMultiplicationB_Result;
`DOT: wAddSubA_Bx = wMultiplicationB_Result;
`CROSS: wAddSubA_Bx = wMultiplicationB_Result;
default: wAddSubA_Bx = 64'b0;
endcase
end
//--------------------------------------------------------------
/*
Second addtion block instance goes here.
Note that all inputs/outputs to the block
are wires. It has two MUXES one for each entry.
*/
 
wire [`LONG_WIDTH-1:0] wAddSubB_Result;
 
 
wire wAddSubB_Operation; //Either addition or substraction
reg wAddSubB_InputReady;
wire wAddSubB_OutputReady;
 
reg [`LONG_WIDTH-1:0] wAddSubB_Ay,wAddSubB_By;
 
assign wAddSubB_Operation =
( iOperation == `SUB
|| iOperation == `CROSS
|| iOperation == `DEC
|| iOperation == `MOD
) ? 1 : 0;
 
FixedAddSub AddSubChannel_B
(
.Clock( Clock ),
.Reset( Reset ),
.A( wAddSubB_Ay ),
.B( wAddSubB_By ),
.R( wAddSubB_Result ),
.iOperation( wAddSubB_Operation ),
.iInputReady( wAddSubB_InputReady ),
.OutputReady( wAddSubB_OutputReady )
);
//----------------------------
wire wMultiplicationOutputReadyC_Dealy1;
FFD_POSEDGE_ASYNC_RESET # (1) FFwMultiplicationOutputReadyC_Dealy1
(
.Clock( Clock ),
.Clear( Reset ),
.D( wMultiplicationOutputReadyC ),
.Q( wMultiplicationOutputReadyC_Dealy1 )
);
 
 
 
 
 
//InpuReady Mux B
always @ ( * )
begin
case (iOperation)
`ADD: wAddSubB_InputReady = iInputReady;
`SUB: wAddSubB_InputReady = iInputReady;
`INC,`INCX,`INCY,`INCZ: wAddSubB_InputReady = iInputReady;
`DEC: wAddSubB_InputReady = iInputReady;
`MOD: wAddSubB_InputReady = iInputReady;
`MAG: wAddSubB_InputReady = wAddSubAOutputReady
&& wMultiplicationOutputReadyC_Dealy1;
//&& wMultiplicationC_OutputReady;
`DOT: wAddSubB_InputReady = wAddSubAOutputReady
&& wMultiplicationOutputReadyC_Dealy1;
//&& wMultiplicationC_OutputReady;
`CROSS: wAddSubB_InputReady = wMultiplicationOutputReadyC &&
wMultiplicationOutputReadyD;
// wMultiplicationC_OutputReady
//&& wMultiplicationD_OutputReady;
default: wAddSubB_InputReady = 1'b0;
endcase
end
//----------------------------
// wAddSubB_Ay 2:1 input Mux
// If the iOperation is ADD or SUB, it will simply take the inputs from
// ALU Channels. If it is a VECTOR_MAGNITUDE, it take the input from the
// previus ADDER_A, same for dot product.
always @ ( * )
begin
case (iOperation)
`ADD: wAddSubB_Ay = (iChannel_Ay[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ay} : {32'b0,iChannel_Ay}; //Ay
`SUB: wAddSubB_Ay = (iChannel_Ay[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ay} : {32'b0,iChannel_Ay}; //Ay
`INC,`INCX,`INCY,`INCZ: wAddSubB_Ay = (iChannel_Ay[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ay} : {32'b0,iChannel_Ay}; //Ay
`DEC: wAddSubB_Ay = (iChannel_Ay[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ay} : {32'b0,iChannel_Ay}; //Ay
`MOD: wAddSubB_Ay = (iChannel_By[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_By} : {32'b0,iChannel_By}; //Ay
`MAG: wAddSubB_Ay = wAddSubA_Result; //A^2+B^2
`DOT: wAddSubB_Ay = wAddSubA_Result; //Ax*Bx + Ay*By
`CROSS: wAddSubB_Ay = wMultiplicationC_Result;
default: wAddSubB_Ay = 64'b0;
endcase
end
//----------------------------
//wAddSubB_By 2:1 input Mux
always @ ( * )
begin
case (iOperation)
`ADD: wAddSubB_By = (iChannel_By[31] == 1'b1) ? {32'hFFFFFFFF,iChannel_By } : {32'b0,iChannel_By}; //By
`SUB: wAddSubB_By = (iChannel_By[31] == 1'b1) ? {32'hFFFFFFFF,iChannel_By } : {32'b0,iChannel_By}; //{32'b0,iChannel_By}; //By
`INC,`INCY: wAddSubB_By = (`LONG_WIDTH'd1 << `SCALE);
`INCX,`INCZ: wAddSubB_By = `LONG_WIDTH'd0;
`DEC: wAddSubB_By = (`LONG_WIDTH'd1 << `SCALE);
`MOD: wAddSubB_By = (`LONG_WIDTH'd1 << `SCALE);
`MAG: wAddSubB_By = wMultiplicationC_Result; //C^2
`DOT: wAddSubB_By = wMultiplicationC_Result; //Az * Bz
`CROSS: wAddSubB_By = wMultiplicationD_Result;
default: wAddSubB_By = 32'b0;
endcase
end
//--------------------------------------------------------------
wire [`LONG_WIDTH-1:0] wAddSubC_Result;
reg [`LONG_WIDTH-1:0] wAddSubC_Az,wAddSubC_Bz;
 
wire wAddSubC_Operation; //Either addition or substraction
reg wAddSubC_InputReady;
wire wAddSubC_OutputReady;
 
reg [`LONG_WIDTH-1:0] AddSubC_Az,AddSubB_Bz;
 
//-----------------------------------------
always @ ( * )
begin
case (iOperation)
`CROSS: wAddSubC_Az = wMultiplicationE_Result;
`MOD: wAddSubC_Az = (iChannel_Bz[31] == 1'b1) ? {32'hFFFFFFFF,iChannel_Bz} : {32'b0,iChannel_Bz};
default: wAddSubC_Az = (iChannel_Az[31] == 1'b1) ? {32'hFFFFFFFF,iChannel_Az} : {32'b0,iChannel_Az};
endcase
end
//-----------------------------------------
always @ ( * )
begin
case (iOperation)
`CROSS: wAddSubC_Bz = wMultiplicationF_Result;
`INC,`INCZ: wAddSubC_Bz = (`LONG_WIDTH'd1 << `SCALE);
`INCX,`INCY: wAddSubC_Bz = `LONG_WIDTH'd0;
`DEC: wAddSubC_Bz = (`LONG_WIDTH'd1 << `SCALE);
`MOD: wAddSubC_Bz = (`LONG_WIDTH'd1 << `SCALE);
default: wAddSubC_Bz = (iChannel_Bz[31] == 1'b1) ? {32'hFFFFFFFF,iChannel_Bz} : {32'b0,iChannel_Bz};
endcase
end
//-----------------------------------------
 
assign wAddSubC_Operation
= (
iOperation == `SUB
|| iOperation == `CROSS
|| iOperation == `DEC
|| iOperation == `MOD
) ? 1 : 0;
 
FixedAddSub AddSubChannel_C
(
.Clock( Clock ),
.Reset( Reset ),
.A( wAddSubC_Az ),
.B( wAddSubC_Bz ),
.R( wAddSubC_Result ),
.iOperation( wAddSubC_Operation ),
.iInputReady( wAddSubC_InputReady ),
.OutputReady( wAddSubC_OutputReady )
);
 
 
always @ ( * )
begin
case (iOperation)
`CROSS: wAddSubC_InputReady = wMultiplicationE_OutputReady &&
wMultiplicationF_OutputReady;
default: wAddSubC_InputReady = iInputReady;
endcase
end
 
//------------------------------------------------------
wire [`WIDTH-1:0] wSquareRoot_Result;
wire wSquareRoot_OutputReady;
 
 
FixedPointSquareRoot SQROOT1
(
.Clock( Clock ),
.Reset( Reset ),
.Operand( wAddSubB_Result ),
.iInputReady( wAddSubBOutputReady && iOperation == `MAG),
.OutputReady( wSquareRoot_OutputReady ),
.Result( wSquareRoot_Result )
);
//------------------------------------------------------
 
assign wModulus2N_ResultA = (iChannel_Ax & wAddSubA_Result );
assign wModulus2N_ResultB = (iChannel_Ay & wAddSubB_Result );
assign wModulus2N_ResultC = (iChannel_Az & wAddSubC_Result );
 
 
 
 
 
 
//&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&//
//****Mux for ResultA***
// Notice that the Dot Product or the Magnitud Result will
// output in ResultA.
 
always @ ( * )
begin
case ( iOperation )
`RETURN: ResultA = iChannel_Ax;
`ADD: ResultA = (wAddSubA_Result[63] == 1'b1) ? { 1'b1,wAddSubA_Result[30:0]} : {1'b0,wAddSubA_Result[30:0]};// & 32'h7FFFFFFF;
`SUB: ResultA = (wAddSubA_Result[63] == 1'b1) ? { 1'b1,wAddSubA_Result[30:0]} : {1'b0,wAddSubA_Result[30:0]};//wAddSubA_Result[31:0];
`CROSS: ResultA = (wAddSubA_Result[63] == 1'b1) ? { 1'b1,wAddSubA_Result[30:0]} : {1'b0,wAddSubA_Result[30:0]};//wAddSubA_Result[31:0];
`DIV: ResultA = wDivisionA_Result;
`MUL: ResultA = wMultiplicationA_Result[31:0];
`IMUL: ResultA = wMultiplicationA_Result[31:0];
`DOT: ResultA = (wAddSubB_Result[63] == 1'b1) ? { 1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]};//wAddSubB_Result[31:0];
`MAG: ResultA = wSquareRoot_Result;
`ZERO: ResultA = 32'b0;
`COPY: ResultA = iChannel_Ax;
`TMREAD: ResultA = iTMEMReadData[95:64];
`LEA: ResultA = {16'b0,iCurrentIP};
`SWIZZLE3D: ResultA = wSwizzleOutputX;
//Set Operations
`UNSCALE: ResultA = iChannel_Ax >> `SCALE;
`SETX,`RET: ResultA = iChannel_Ax;
`SETY: ResultA = iChannel_Bx;
`SETZ: ResultA = iChannel_Bx;
`INC,`INCX,`INCY,`INCZ: ResultA = (wAddSubA_Result[63] == 1'b1) ? { 1'b1,wAddSubA_Result[30:0]} : {1'b0,wAddSubA_Result[30:0]};
`DEC: ResultA = (wAddSubA_Result[63] == 1'b1) ? { 1'b1,wAddSubA_Result[30:0]} : {1'b0,wAddSubA_Result[30:0]};
`MOD: ResultA = wModulus2N_ResultA;
`FRAC: ResultA = iChannel_Ax & (`WIDTH'hFFFFFFFF >> (`WIDTH - `SCALE));
`MULP: ResultA = iChannel_Ax;
`NEG: ResultA = ~iChannel_Ax + 1'b1;
`XCHANGEX: ResultA = iChannel_Bx;
 
default:
begin
`ifdef DEBUG
// $display("%dns ALU: Error Unknown Operation: %d",$time,iOperation);
// $stop();
`endif
ResultA = 32'b0;
end
endcase
end
//------------------------------------------------------
//****Mux for RB***
always @ ( * )
begin
case ( iOperation )
`RETURN: ResultB = iChannel_Ax;
`ADD: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]}; // & 32'h7FFFFFFF;
`SUB: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]}; //wAddSubB_Result[31:0];
`CROSS: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]};//wAddSubB_Result[31:0];
`DIV: ResultB = wDivisionB_Result;
`MUL: ResultB = wMultiplicationB_Result[31:0];
`IMUL: ResultB = wMultiplicationB_Result[31:0];
`DOT: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]};//wAddSubB_Result[31:0];
`MAG: ResultB = wSquareRoot_Result;
`ZERO: ResultB = 32'b0;
`COPY: ResultB = iChannel_Ay;
`TMREAD: ResultB = iTMEMReadData[63:32];
`LEA: ResultB = {16'b0,iCurrentIP};
//Set Operations
`UNSCALE: ResultB = iChannel_Ay >> `SCALE;
`SETX,`RET: ResultB = iChannel_By; // {Source1[95:64],Source0[63:32],Source0[31:0]};
`SETY: ResultB = iChannel_Ax; // {Source0[95:64],Source1[95:64],Source0[31:0]};
`SETZ: ResultB = iChannel_By; // {Source0[95:64],Source0[63:32],Source1[95:64]};
`SWIZZLE3D: ResultB = wSwizzleOutputY;
`INC,`INCX,`INCY,`INCZ: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]}; // & 32'h7FFFFFFF;
`DEC: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]}; // & 32'h7FFFFFFF;
`MOD: ResultB = wModulus2N_ResultB;
`FRAC: ResultB = iChannel_Ay & (`WIDTH'hFFFFFFFF >> (`WIDTH - `SCALE));
`MULP: ResultB = iChannel_Ay;
`NEG: ResultB = ~iChannel_Ay + 1'b1;
`XCHANGEX: ResultB = iChannel_Ay;
default:
begin
`ifdef DEBUG
//$display("%dns ALU: Error Unknown Operation: %d",$time,iOperation);
//$stop();
`endif
ResultB = 32'b0;
end
endcase
end
//------------------------------------------------------
//****Mux for RC***
always @ ( * )
begin
case ( iOperation )
`RETURN: ResultC = iChannel_Ax;
`ADD: ResultC = (wAddSubC_Result[63] == 1'b1) ? {1'b1,wAddSubC_Result[30:0]} : {1'b0,wAddSubC_Result[30:0]}; //wAddSubC_Result[31:0];// & 32'h7FFFFFFF;
`SUB: ResultC = (wAddSubC_Result[63] == 1'b1) ? {1'b1,wAddSubC_Result[30:0]} : {1'b0,wAddSubC_Result[30:0]}; //wAddSubC_Result[31:0];
`CROSS: ResultC = (wAddSubC_Result[63] == 1'b1) ? {1'b1,wAddSubC_Result[30:0]} : {1'b0,wAddSubC_Result[30:0]};//wAddSubC_Result[31:0];
`DIV: ResultC = wDivisionC_Result;
`MUL: ResultC = wMultiplicationC_Result[31:0];
`IMUL: ResultC = wMultiplicationC_Result[31:0];
`DOT: ResultC = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]};//wAddSubB_Result[31:0];
`MAG: ResultC = wSquareRoot_Result;
`ZERO: ResultC = 32'b0;
`COPY: ResultC = iChannel_Az;
`TMREAD: ResultC = iTMEMReadData[31:0];
`LEA: ResultC = {16'b0,iCurrentIP};
`SWIZZLE3D: ResultC = wSwizzleOutputZ;
//Set Operations
`UNSCALE: ResultC = iChannel_Az >> `SCALE;
`SETX,`RET: ResultC = iChannel_Bz; // {Source1[95:64],Source0[63:32],Source0[31:0]};
`SETY: ResultC = iChannel_Bz; // {Source0[95:64],Source1[95:64],Source0[31:0]};
`SETZ: ResultC = iChannel_Ax; // {Source0[95:64],Source0[63:32],Source1[95:64]};
`INC,`INCX,`INCY,`INCZ: ResultC = (wAddSubC_Result[63] == 1'b1) ? {1'b1,wAddSubC_Result[30:0]} : {1'b0,wAddSubC_Result[30:0]}; //wAddSubC_Result[31:0];// & 32'h7FFFFFFF;
`DEC: ResultC = (wAddSubC_Result[63] == 1'b1) ? {1'b1,wAddSubC_Result[30:0]} : {1'b0,wAddSubC_Result[30:0]}; //wAddSubC_Result[31:0];// & 32'h7FFFFFFF;
`MOD: ResultC = wModulus2N_ResultC;
`FRAC: ResultC = iChannel_Az & (`WIDTH'hFFFFFFFF >> (`WIDTH - `SCALE));
`MULP: ResultC = wMultiplicationA_Result[31:0];
`NEG: ResultC = ~iChannel_Az + 1'b1;
`XCHANGEX: ResultC = iChannel_Az;
default:
begin
`ifdef DEBUG
//$display("%dns ALU: Error Unknown Operation: %d",$time,iOperation);
//$stop();
`endif
ResultC = 32'b0;
end
endcase
end
//------------------------------------------------------------------------
 
 
always @ ( * )
begin
case (iOperation)
`JMP,`CALL,`RET: oBranchTaken = OutputReady;
`JGX: oBranchTaken = wArithmeticComparison_Result;
`JGY: oBranchTaken = wArithmeticComparison_Result;
`JGZ: oBranchTaken = wArithmeticComparison_Result;
`JLX: oBranchTaken = wArithmeticComparison_Result;
`JLY: oBranchTaken = wArithmeticComparison_Result;
`JLZ: oBranchTaken = wArithmeticComparison_Result;
`JEQX: oBranchTaken = wArithmeticComparison_Result;
`JEQY: oBranchTaken = wArithmeticComparison_Result;
`JEQZ: oBranchTaken = wArithmeticComparison_Result;
`JNEX: oBranchTaken = wArithmeticComparison_Result;
`JNEY: oBranchTaken = wArithmeticComparison_Result;
`JNEZ: oBranchTaken = wArithmeticComparison_Result;
`JGEX: oBranchTaken = wArithmeticComparison_Result;
`JGEY: oBranchTaken = wArithmeticComparison_Result;
`JGEZ: oBranchTaken = wArithmeticComparison_Result;
`JLEX: oBranchTaken = wArithmeticComparison_Result;
`JLEY: oBranchTaken = wArithmeticComparison_Result;
`JLEZ: oBranchTaken = wArithmeticComparison_Result;
default: oBranchTaken = 0;
endcase
end
 
always @ ( * )
begin
case (iOperation)
`JMP,`CALL,`RET,`JGX,`JGY,`JGZ,`JLX,`JLY,`JLZ,`JEQX,`JEQY,`JEQZ,
`JNEX,`JNEY,`JNEZ,`JGEX,`JGEY,`JGEZ: oBranchNotTaken = !oBranchTaken && OutputReady;
`JLEX: oBranchNotTaken = !oBranchTaken && OutputReady;
`JLEY: oBranchNotTaken = !oBranchTaken && OutputReady;
`JLEZ: oBranchNotTaken = !oBranchTaken && OutputReady;
default:
oBranchNotTaken = 0;
endcase
end
//------------------------------------------------------------------------
//Output ready logic Stuff for Division...
//Some FFT will hopefully do the trick
 
wire wDivisionOutputReadyA,wDivisionOutputReadyB,wDivisionOutputReadyC;
wire wDivisionOutputReady;
 
 
assign wAddSubAOutputReady = wAddSubA_OutputReady;
assign wAddSubBOutputReady = wAddSubB_OutputReady;
assign wAddSubCOutputReady = wAddSubC_OutputReady;
 
 
FFT1 FFT_DivisionA
(
.D(1'b1),
.Clock( wDivisionA_OutputReady ),
.Reset( iInputReady ),
.Q( wDivisionOutputReadyA )
);
 
FFT1 FFT_DivisionB
(
.D(1'b1),
.Clock( wDivisionB_OutputReady ),
.Reset( iInputReady ),
.Q( wDivisionOutputReadyB )
);
FFT1 FFT_DivisionC
(
.D(1'b1),
.Clock( wDivisionC_OutputReady ),
.Reset( iInputReady ),
.Q( wDivisionOutputReadyC )
);
assign wDivisionOutputReady =
( wDivisionOutputReadyA && wDivisionOutputReadyB && wDivisionOutputReadyC );
assign wMultiplicationOutputReadyA = wMultiplicationA_OutputReady;
assign wMultiplicationOutputReadyB = wMultiplicationB_OutputReady;
assign wMultiplicationOutputReadyC = wMultiplicationC_OutputReady;
assign wMultiplicationOutputReadyD = wMultiplicationD_OutputReady;
assign wMultiplicationOutputReady =
( wMultiplicationOutputReadyA && wMultiplicationOutputReadyB && wMultiplicationOutputReadyC );
wire wSquareRootOutputReady;
FFT1 FFT_Sqrt
(
.D(1'b1),
.Clock( wSquareRoot_OutputReady ),
.Reset( iInputReady ),
.Q( wSquareRootOutputReady )
);
//------------------------------------------------------------------------
wire wOutputDelay1Cycle,wOutputDelay2Cycle,wOutputDelay3Cycle;
 
 
FFD_POSEDGE_ASYNC_RESET # (1) FFOutputReadyDelay2
(
.Clock( Clock ),
.Clear( Reset ),
.D( iInputReady ),
.Q( wOutputDelay1Cycle )
);
 
FFD_POSEDGE_ASYNC_RESET # (1) FFOutputReadyDelay22
(
.Clock( Clock ),
.Clear( Reset ),
.D( wOutputDelay1Cycle ),
.Q( wOutputDelay2Cycle )
);
 
 
FFD_POSEDGE_ASYNC_RESET # (1) FFOutputReadyDelay222
(
.Clock( Clock && wOperation == `OMWRITE),
.Clear( Reset ),
.D( wOutputDelay2Cycle ),
.Q( wOutputDelay3Cycle )
);
 
 
 
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `INSTRUCTION_OP_LENGTH ) SourceZ2
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( iInputReady ),
.D( iOperation ),
.Q(wOperation)
);
 
 
//Mux for output ready signal
always @ ( * )
begin
case ( wOperation )
`UNSCALE: OutputReady = wOutputDelay1Cycle;
`RETURN: OutputReady = wOutputDelay1Cycle;
`NOP: OutputReady = wOutputDelay1Cycle;
`FRAC: OutputReady = wOutputDelay1Cycle;
`NEG: OutputReady = wOutputDelay1Cycle;
`OMWRITE: OutputReady = wOutputDelay3Cycle;
`TMREAD: OutputReady = wTMReadOutputReady; //One cycle after TMEM data availale asserted
`ifdef DEBUG
//Debug Print behaves as a NOP in terms of ALU...
`DEBUG_PRINT: OutputReady = wOutputDelay1Cycle;
`endif
`ADD,`INC,`INCX,`INCY,`INCZ: OutputReady = wAddSubAOutputReady &&
wAddSubBOutputReady &&
wAddSubCOutputReady;
`SUB,`DEC: OutputReady = wAddSubAOutputReady &&
wAddSubBOutputReady &&
wAddSubCOutputReady;
`DIV: OutputReady = wDivisionOutputReady;
`MUL,`IMUL: OutputReady = wMultiplicationOutputReady;
`MULP: OutputReady = wMultiplicationOutputReadyA;
`DOT: OutputReady = wAddSubBOutputReady;
`CROSS: OutputReady = wAddSubAOutputReady &&
wAddSubBOutputReady &&
wAddSubCOutputReady;
`MAG: OutputReady = wSquareRootOutputReady;
`ZERO: OutputReady = wOutputDelay1Cycle;
`COPY: OutputReady = wOutputDelay1Cycle;
`SWIZZLE3D: OutputReady = wOutputDelay1Cycle;
`SETX,`SETY,`SETZ,`JMP,`LEA,`CALL,`RET: OutputReady = wOutputDelay1Cycle;
 
`JGX,`JGY,`JGZ: OutputReady = ArithmeticComparison_OutputReady;
`JLX,`JLY,`JLZ: OutputReady = ArithmeticComparison_OutputReady;
`JEQX,`JEQY,`JEQZ: OutputReady = ArithmeticComparison_OutputReady;
`JNEX,`JNEY,`JNEZ: OutputReady = ArithmeticComparison_OutputReady;
`JGEX,`JGEY,`JGEZ: OutputReady = ArithmeticComparison_OutputReady;
`JLEX,`JLEY,`JLEZ: OutputReady = ArithmeticComparison_OutputReady;
`MOD: OutputReady = wAddSubAOutputReady && //TODO: wait 1 more cycle
wAddSubBOutputReady &&
wAddSubCOutputReady;
`XCHANGEX: OutputReady = wOutputDelay1Cycle;
default:
begin
OutputReady = 32'b0;
//`ifdef DEBUG
//$display("*** ALU ERROR: iOperation = %d ***",iOperation);
//`endif
end
endcase
end
 
endmodule
//------------------------------------------------------------------------
/rtl/aDefinitions.v
0,0 → 1,371
/**********************************************************************************
Theaia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2009 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
 
/*******************************************************************************
Module Description:
 
This module defines constants that are going to be used
all over the code. By now you have may noticed that all
constants are pre-compilation define directives. This is
for simulation perfomance reasons mainly.
*******************************************************************************/
 
`define MAX_CORES 4 //The number of cores, make sure you update MAX_CORE_BITS!
`define MAX_CORE_BITS 2 // 2 ^ MAX_CORE_BITS = MAX_CORES
`define MAX_TMEM_BANKS 4 //The number of memory banks for TMEM
`define SELECT_ALL_CORES `MAX_CORES'b1111 //XXX: Change for more cores
//---------------------------------------------------------------------------------
//Verilog provides a `default_nettype none compiler directive. When
//this directive is set, implicit data types are disabled, which will make any
//undeclared signal name a syntax error.This is very usefull to avoid annoying
//automatic 1 bit long wire declaration where you don't want them to be!
`default_nettype none
 
//The clock cycle
`define CLOCK_CYCLE 5
`define CLOCK_PERIOD 10
//---------------------------------------------------------------------------------
//Defines the Scale. This very important because it sets the fixed point precision.
//The Scale defines the number bits that are used as the decimal part of the number.
//The code has been written in such a way that allows you to change the value of the
//Scale, so that it is possible to experiment with different scenarios. SCALE can be
//no smaller that 1 and no bigger that WIDTH.
`define SCALE 17
 
//The next section defines the length of the registers, buses and other structures,
//do not change this valued unless you really know what you are doing (seriously!)
`define WIDTH 32
`define WB_WIDTH 32 //width of wish-bone buses
`define LONG_WIDTH 64
 
`define WB_SIMPLE_READ_CYCLE 0
`define WB_SIMPLE_WRITE_CYCLE 1
//---------------------------------------------------------------------------------
//Next are the constants that define the size of the instructions.
//instructions are formed like this:
// Tupe I:
// Operand (of size INSTRUCTION_OP_LENGTH )
// DestinationAddr (of size DATA_ADDRESS_WIDTH )
// SourceAddrr1 (of size DATA_ADDRESS_WIDTH )
// SourceAddrr2 (of size DATA_ADDRESS_WIDTH )
//Type II:
// Operand (of size INSTRUCTION_OP_LENGTH )
// DestinationAddr (of size DATA_ADDRESS_WIDTH )
// InmeadiateValue (of size WIDTH = DATA_ADDRESS_WIDTH * 2 )
//
//You can play around with the size of instuctions, but keep
//in mind that Bits 3 and 4 of the Operand have a special meaning
//that is used for the jump familiy of instructions (see Documentation).
//Also the MSB of Operand is used by the decoder to distinguish
//between Type I and Type II instructions.
`define INSTRUCTION_WIDTH 64
`define INSTRUCTION_OP_LENGTH 16
`define INSTRUCTION_IMM_BITPOS 54
`define INSTRUCTION_IMM_BIT 6 //don't change this!
 
//Defines the Lenght of Memory blocks
`define DATA_ROW_WIDTH 96
`define DATA_ADDRESS_WIDTH 16
`define ROM_ADDRESS_WIDTH 16
`define ROM_ADDRESS_SEL_MASK `ROM_ADDRESS_WIDTH'h8000
 
//---------------------------------------------------------------------------------
//The next section defines the code memory entry point for the various code routines
//Please keep this syntax ENTRYPOINT_ADDR_* because the perl script that
//parses the user code expects this pattern in order to read in the tokens
 
//Internal Entry points (default ROM Address)
`define ENTRYPOINT_ADRR_INITIAL `ROM_ADDRESS_WIDTH'd0 //0 - This should always be zero
`define ENTRYPOINT_ADRR_CPPU `ROM_ADDRESS_WIDTH'd44
`define ENTRYPOINT_ADRR_RGU `ROM_ADDRESS_WIDTH'd47
`define ENTRYPOINT_ADRR_AABBIU `ROM_ADDRESS_WIDTH'd69
`define ENTRYPOINT_ADRR_BIU `ROM_ADDRESS_WIDTH'd157
`define ENTRYPOINT_ADRR_PSU `ROM_ADDRESS_WIDTH'd232
`define ENTRYPOINT_ADRR_PSU2 `ROM_ADDRESS_WIDTH'd248
`define ENTRYPOINT_ADRR_TCC `ROM_ADDRESS_WIDTH'd190
`define ENTRYPOINT_ADRR_NPG `ROM_ADDRESS_WIDTH'd55
//User Entry points (default ROM Address)
`define ENTRYPOINT_ADRR_USERCONSTANTS `ROM_ADDRESS_WIDTH'd276
`define ENTRYPOINT_ADRR_PIXELSHADER `ROM_ADDRESS_WIDTH'd278
`define ENTRYPOINT_ADRR_MAIN `ROM_ADDRESS_WIDTH'd37
 
//Please keep this syntax ENTRYPOINT_INDEX_* because the perl script that
//parses the user code expects this pattern in order to read in the tokens
//Internal subroutines
`define ENTRYPOINT_INDEX_INITIAL `ROM_ADDRESS_WIDTH'h8000
`define ENTRYPOINT_INDEX_CPPU `ROM_ADDRESS_WIDTH'h8001
`define ENTRYPOINT_INDEX_RGU `ROM_ADDRESS_WIDTH'h8002
`define ENTRYPOINT_INDEX_AABBIU `ROM_ADDRESS_WIDTH'h8003
`define ENTRYPOINT_INDEX_BIU `ROM_ADDRESS_WIDTH'h8004
`define ENTRYPOINT_INDEX_PSU `ROM_ADDRESS_WIDTH'h8005
`define ENTRYPOINT_INDEX_PSU2 `ROM_ADDRESS_WIDTH'h8006
`define ENTRYPOINT_INDEX_TCC `ROM_ADDRESS_WIDTH'h8007
`define ENTRYPOINT_INDEX_NPG `ROM_ADDRESS_WIDTH'h8008
//User defined subroutines
`define ENTRYPOINT_INDEX_USERCONSTANTS `ROM_ADDRESS_WIDTH'h8009
`define ENTRYPOINT_INDEX_PIXELSHADER `ROM_ADDRESS_WIDTH'h800A
`define ENTRYPOINT_INDEX_MAIN `ROM_ADDRESS_WIDTH'h800B
 
`define USER_AABBIU_UCODE_ADDRESS `ROM_ADDRESS_WIDTH'b1000000000000000
//---------------------------------------------------------------------------------
//This handy little macro allows me to print stuff either to STDOUT or a file.
//Notice that the compilation vairable DUMP_CODE must be set if you want to print
//to a file. In XILINX right click 'Simulate Beahvioral Model' -> Properties and
//under 'Specify `define macro name and value' type 'DEBUG=1|DUMP_CODE=1|DEBUG_CORE=<core you want to dump>'
`ifdef DUMP_CODE
`define LOGME $fwrite(ucode_file,
`else
`define LOGME $write(
`endif
//---------------------------------------------------------------------------------
`define TRUE 32'h1
`define FALSE 32'h0
`define RT_TRUE 48'b1
`define RT_FALSE 48'b0
//---------------------------------------------------------------------------------
 
`define GENERAL_PURPOSE_REG_ADDR_MASK `DATA_ADDRESS_WIDTH'h1F
`define VOID `DATA_ADDRESS_WIDTH'd0 //0000
//** Control register bits **//
`define CR_EN_LIGHTS 0
`define CR_EN_TEXTURE 1
`define CR_USER_AABBIU 2
/** Swapping registers **/
//** Configuration Registers **//
`define CREG_LIGHT_INFO `DATA_ADDRESS_WIDTH'd0
`define CREG_CAMERA_POSITION `DATA_ADDRESS_WIDTH'd1
`define CREG_PROJECTION_WINDOW_MIN `DATA_ADDRESS_WIDTH'd2
`define CREG_PROJECTION_WINDOW_MAX `DATA_ADDRESS_WIDTH'd3
`define CREG_RESOLUTION `DATA_ADDRESS_WIDTH'd4
`define CREG_TEXTURE_SIZE `DATA_ADDRESS_WIDTH'd5
`define CREG_PIXEL_2D_INITIAL_POSITION `DATA_ADDRESS_WIDTH'd6
`define CREG_PIXEL_2D_FINAL_POSITION `DATA_ADDRESS_WIDTH'd7
`define CREG_FIRST_LIGTH `DATA_ADDRESS_WIDTH'd8
`define CREG_FIRST_LIGTH_DIFFUSE `DATA_ADDRESS_WIDTH'd8
//OK, so from address 0x06 to 0x0F is where the lights are,watch out values are harcoded
//for now!! (look in ROM.v for hardcoded values!!!)
 
 
//Don't change the order of the registers. CREG_V* and CREG_UV* registers
//need to be in that specific order for the triangle fetcher to work
//correctly!
 
`define CREG_AABBMIN `DATA_ADDRESS_WIDTH'd42
`define CREG_AABBMAX `DATA_ADDRESS_WIDTH'd43
`define CREG_V0 `DATA_ADDRESS_WIDTH'd44
`define CREG_UV0 `DATA_ADDRESS_WIDTH'd45
`define CREG_V1 `DATA_ADDRESS_WIDTH'd46
`define CREG_UV1 `DATA_ADDRESS_WIDTH'd47
`define CREG_V2 `DATA_ADDRESS_WIDTH'd48
`define CREG_UV2 `DATA_ADDRESS_WIDTH'd49
`define CREG_TRI_DIFFUSE `DATA_ADDRESS_WIDTH'd50
`define CREG_TEX_COLOR1 `DATA_ADDRESS_WIDTH'd53
`define CREG_TEX_COLOR2 `DATA_ADDRESS_WIDTH'd54
`define CREG_TEX_COLOR3 `DATA_ADDRESS_WIDTH'd55
`define CREG_TEX_COLOR4 `DATA_ADDRESS_WIDTH'd56
`define CREG_TEX_COLOR5 `DATA_ADDRESS_WIDTH'd57
`define CREG_TEX_COLOR6 `DATA_ADDRESS_WIDTH'd58
`define CREG_TEX_COLOR7 `DATA_ADDRESS_WIDTH'd59
 
 
/** Non-Swapping registers **/
// ** User Registers **//
//General Purpose registers, the user may put what ever he/she
//wants in here...
`define C1 `DATA_ADDRESS_WIDTH'd64
`define C2 `DATA_ADDRESS_WIDTH'd65
`define C3 `DATA_ADDRESS_WIDTH'd66
`define C4 `DATA_ADDRESS_WIDTH'd67
`define C5 `DATA_ADDRESS_WIDTH'd68
`define C6 `DATA_ADDRESS_WIDTH'd69
`define C7 `DATA_ADDRESS_WIDTH'd70
`define R1 `DATA_ADDRESS_WIDTH'd71
`define R2 `DATA_ADDRESS_WIDTH'd72
`define R3 `DATA_ADDRESS_WIDTH'd73
`define R4 `DATA_ADDRESS_WIDTH'd74
`define R5 `DATA_ADDRESS_WIDTH'd75
`define R6 `DATA_ADDRESS_WIDTH'd76
`define R7 `DATA_ADDRESS_WIDTH'd77
`define R8 `DATA_ADDRESS_WIDTH'd78
`define R9 `DATA_ADDRESS_WIDTH'd79
`define R10 `DATA_ADDRESS_WIDTH'd80
`define R11 `DATA_ADDRESS_WIDTH'd81
`define R12 `DATA_ADDRESS_WIDTH'd82
 
//** Internal Registers **//
`define CREG_PROJECTION_WINDOW_SCALE `DATA_ADDRESS_WIDTH'd83
`define CREG_UNORMALIZED_DIRECTION `DATA_ADDRESS_WIDTH'd84
`define CREG_RAY_DIRECTION `DATA_ADDRESS_WIDTH'd85
`define CREG_E1_LAST `DATA_ADDRESS_WIDTH'd86
`define CREG_E2_LAST `DATA_ADDRESS_WIDTH'd87
`define CREG_T `DATA_ADDRESS_WIDTH'd88
`define CREG_P `DATA_ADDRESS_WIDTH'd89
`define CREG_Q `DATA_ADDRESS_WIDTH'd90
`define CREG_UV0_LAST `DATA_ADDRESS_WIDTH'd91
`define CREG_UV1_LAST `DATA_ADDRESS_WIDTH'd92
`define CREG_UV2_LAST `DATA_ADDRESS_WIDTH'd93
`define CREG_TRI_DIFFUSE_LAST `DATA_ADDRESS_WIDTH'd94
`define CREG_LAST_t `DATA_ADDRESS_WIDTH'd95
`define CREG_LAST_u `DATA_ADDRESS_WIDTH'd96
`define CREG_LAST_v `DATA_ADDRESS_WIDTH'd97
`define CREG_COLOR_ACC `DATA_ADDRESS_WIDTH'd98
`define CREG_t `DATA_ADDRESS_WIDTH'd99
`define CREG_E1 `DATA_ADDRESS_WIDTH'd100
`define CREG_E2 `DATA_ADDRESS_WIDTH'd101
`define CREG_DELTA `DATA_ADDRESS_WIDTH'd102
`define CREG_u `DATA_ADDRESS_WIDTH'd103
`define CREG_v `DATA_ADDRESS_WIDTH'd104
`define CREG_H1 `DATA_ADDRESS_WIDTH'd105
`define CREG_H2 `DATA_ADDRESS_WIDTH'd106
`define CREG_H3 `DATA_ADDRESS_WIDTH'd107
`define CREG_PIXEL_PITCH `DATA_ADDRESS_WIDTH'd108
 
`define CREG_LAST_COL `DATA_ADDRESS_WIDTH'd109 //the last valid column, simply CREG_RESOLUTIONX - 1
`define CREG_TEXTURE_COLOR `DATA_ADDRESS_WIDTH'd110
`define CREG_PIXEL_2D_POSITION `DATA_ADDRESS_WIDTH'd111
`define CREG_TEXWEIGHT1 `DATA_ADDRESS_WIDTH'd112
`define CREG_TEXWEIGHT2 `DATA_ADDRESS_WIDTH'd113
`define CREG_TEXWEIGHT3 `DATA_ADDRESS_WIDTH'd114
`define CREG_TEXWEIGHT4 `DATA_ADDRESS_WIDTH'd115
`define CREG_TEX_COORD1 `DATA_ADDRESS_WIDTH'd116
`define CREG_TEX_COORD2 `DATA_ADDRESS_WIDTH'd117
`define R99 `DATA_ADDRESS_WIDTH'd118
`define CREG_ZERO `DATA_ADDRESS_WIDTH'd119
`define CREG_CURRENT_OUTPUT_PIXEL `DATA_ADDRESS_WIDTH'd120
`define CREG_3 `DATA_ADDRESS_WIDTH'd121
`define CREG_012 `DATA_ADDRESS_WIDTH'd122
 
//** Ouput registers **//
 
`define OREG_PIXEL_COLOR `DATA_ADDRESS_WIDTH'd128
`define OREG_TEX_COORD1 `DATA_ADDRESS_WIDTH'd129
`define OREG_TEX_COORD2 `DATA_ADDRESS_WIDTH'd130
`define OREG_ADDR_O `DATA_ADDRESS_WIDTH'd131
//-------------------------------------------------------------
//*** Instruction Set ***
//The order of the instructions is important here!. Don't change
//it unless you know what you are doing. For example all the 'SET'
//family of instructions have the MSB bit in 1. This means that
//if you add an instruction and the MSB=1, this instruction will treated
//as type II (see manual) meaning the second 32bit argument is expected to be
//an inmediate value instead of a register address!
//Another example is that in the JUMP family Bits 3 and 4 have a special
//meaning: b4b3 = 01 => X jump type, b4b3 = 10 => Y jump type, finally
//b4b3 = 11 means Z jump type.
//All this is just to tell you: Don't play with these values!
 
// *** Type I Instructions (OP DST REG1 REG2) ***
`define NOP `INSTRUCTION_OP_LENGTH'b0_000000 //0
`define ADD `INSTRUCTION_OP_LENGTH'b0_000001 //1
`define SUB `INSTRUCTION_OP_LENGTH'b0_000010 //2
`define DIV `INSTRUCTION_OP_LENGTH'b0_000011 //3
`define MUL `INSTRUCTION_OP_LENGTH'b0_000100 //4
`define MAG `INSTRUCTION_OP_LENGTH'b0_000101 //5
`define COPY `INSTRUCTION_OP_LENGTH'b0_000111 //7
`define JGX `INSTRUCTION_OP_LENGTH'b0_001_000 //8
`define JLX `INSTRUCTION_OP_LENGTH'b0_001_001 //9
`define JEQX `INSTRUCTION_OP_LENGTH'b0_001_010 //10 - A
`define JNEX `INSTRUCTION_OP_LENGTH'b0_001_011 //11 - B
`define JGEX `INSTRUCTION_OP_LENGTH'b0_001_100 //12 - C
`define JLEX `INSTRUCTION_OP_LENGTH'b0_001_101 //13 - D
`define INC `INSTRUCTION_OP_LENGTH'b0_001_110 //14 - E
`define ZERO `INSTRUCTION_OP_LENGTH'b0_001_111 //15 - F
`define JGY `INSTRUCTION_OP_LENGTH'b0_010_000 //16
`define JLY `INSTRUCTION_OP_LENGTH'b0_010_001 //17
`define JEQY `INSTRUCTION_OP_LENGTH'b0_010_010 //18
`define JNEY `INSTRUCTION_OP_LENGTH'b0_010_011 //19
`define JGEY `INSTRUCTION_OP_LENGTH'b0_010_100 //20
`define JLEY `INSTRUCTION_OP_LENGTH'b0_010_101 //21
`define CROSS `INSTRUCTION_OP_LENGTH'b0_010_110 //22
`define DOT `INSTRUCTION_OP_LENGTH'b0_010_111 //23
`define JGZ `INSTRUCTION_OP_LENGTH'b0_011_000 //24
`define JLZ `INSTRUCTION_OP_LENGTH'b0_011_001 //25
`define JEQZ `INSTRUCTION_OP_LENGTH'b0_011_010 //26
`define JNEZ `INSTRUCTION_OP_LENGTH'b0_011_011 //27
`define JGEZ `INSTRUCTION_OP_LENGTH'b0_011_100 //28
`define JLEZ `INSTRUCTION_OP_LENGTH'b0_011_101 //29
 
//The next instruction is for simulation debug only
//not to be synthetized! Pretty much behaves the same
//as a NOP, only that prints the register value to
//a log file called 'Registers.log'
`ifdef DEBUG
`define DEBUG_PRINT `INSTRUCTION_OP_LENGTH'b0_011_110 //30
`endif
 
`define MULP `INSTRUCTION_OP_LENGTH'b0_011_111 //31 R1.z = S1.x * S1.y
`define MOD `INSTRUCTION_OP_LENGTH'b0_100_000 //32 R = MODULO( S1,S2 )
`define FRAC `INSTRUCTION_OP_LENGTH'b0_100_001 //33 R =FractionalPart( S1 )
`define INTP `INSTRUCTION_OP_LENGTH'b0_100_010 //34 R =IntergerPart( S1 )
`define NEG `INSTRUCTION_OP_LENGTH'b0_100_011 //35 R = -S1
`define DEC `INSTRUCTION_OP_LENGTH'b0_100_100 //36 R = S1--
`define XCHANGEX `INSTRUCTION_OP_LENGTH'b0_100_101 // R.x = S2.x, R.y = S1.y, R.z = S1.z
`define XCHANGEY `INSTRUCTION_OP_LENGTH'b0_100_110 // R.x = S1.x, R.y = S2.y, R.z = S1.z
`define XCHANGEZ `INSTRUCTION_OP_LENGTH'b0_100_111 // R.x = S1.x, R.y = S1.y, R.z = S2.z
`define IMUL `INSTRUCTION_OP_LENGTH'b0_101_000 // R = INTEGER( S1 * S2 )
`define UNSCALE `INSTRUCTION_OP_LENGTH'b0_101_001 // R = S1 >> SCALE
`define RESCALE `INSTRUCTION_OP_LENGTH'b0_101_010 // R = S1 << SCALE
`define INCX `INSTRUCTION_OP_LENGTH'b0_101_011 // R.X = S1.X + 1
`define INCY `INSTRUCTION_OP_LENGTH'b0_101_100 // R.Y = S1.Y + 1
`define INCZ `INSTRUCTION_OP_LENGTH'b0_101_101 // R.Z = S1.Z + 1
`define OMWRITE `INSTRUCTION_OP_LENGTH'b0_101_111 //47 IO write to O memory
`define TMREAD `INSTRUCTION_OP_LENGTH'b0_110_000 //48 IO read from T memory
`define LEA `INSTRUCTION_OP_LENGTH'b0_110_001 //49 Load effective address
 
//*** Type II Instructions (OP DST REG1 IMM) ***
`define RETURN `INSTRUCTION_OP_LENGTH'b1_000000 //64 0x40
`define SETX `INSTRUCTION_OP_LENGTH'b1_000001 //65 0x41
`define SETY `INSTRUCTION_OP_LENGTH'b1_000010 //66
`define SETZ `INSTRUCTION_OP_LENGTH'b1_000011 //67
`define SWIZZLE3D `INSTRUCTION_OP_LENGTH'b1_000100 //68
`define JMP `INSTRUCTION_OP_LENGTH'b1_011000 //56
`define CALL `INSTRUCTION_OP_LENGTH'b1_011001 //57
`define RET `INSTRUCTION_OP_LENGTH'b1_011010 //58
 
//-------------------------------------------------------------
 
//All the posible values for the SWIZZLE3D instruction are defined next
`define SWIZZLE_XXX 32'd0
`define SWIZZLE_YYY 32'd1
`define SWIZZLE_ZZZ 32'd2
`define SWIZZLE_XYY 32'd3
`define SWIZZLE_XXY 32'd4
`define SWIZZLE_XZZ 32'd5
`define SWIZZLE_XXZ 32'd6
`define SWIZZLE_YXX 32'd7
`define SWIZZLE_YYX 32'd8
`define SWIZZLE_YZZ 32'd9
`define SWIZZLE_YYZ 32'd10
`define SWIZZLE_ZXX 32'd11
`define SWIZZLE_ZZX 32'd12
`define SWIZZLE_ZYY 32'd13
`define SWIZZLE_ZZY 32'd14
`define SWIZZLE_XZX 32'd15
`define SWIZZLE_XYX 32'd16
`define SWIZZLE_YXY 32'd17
`define SWIZZLE_YZY 32'd18
`define SWIZZLE_ZXZ 32'd19
`define SWIZZLE_ZYZ 32'd20
`define SWIZZLE_YXZ 32'd21
 
 
 
/rtl/Module_WishBoneSlave.v
0,0 → 1,159
`timescale 1ns / 1ps
`include "aDefinitions.v"
 
 
 
`define TAG_INSTRUCTION_ADDRESS_TYPE 2'b10
`define TAG_DATA_ADDRESS_TYPE 2'b01
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
//------------------------------------------------------------------------------
module WishBoneSlaveUnit
(
//WB Input signals
input wire CLK_I,
input wire RST_I,
input wire STB_I,
input wire WE_I,
input wire[`WB_WIDTH-1:0] DAT_I,
input wire[`WB_WIDTH-1:0] ADR_I,
input wire [1:0] TGA_I,
output wire ACK_O,
input wire MST_I, //Master In!
input wire CYC_I,
output wire[`DATA_ADDRESS_WIDTH-1:0] oDataWriteAddress,
output wire [`DATA_ROW_WIDTH-1:0] oDataBus,
output wire [`ROM_ADDRESS_WIDTH-1:0] oInstructionWriteAddress,
output wire [`INSTRUCTION_WIDTH-1:0] oInstructionBus,
output wire oDataWriteEnable,
output wire oInstructionWriteEnable
 
);
 
FFD_POSEDGE_SYNCRONOUS_RESET # (16) FFADR
(
.Clock( CYC_I ),
.Reset( RST_I ),
.Enable(1'b1),
.D( ADR_I[15:0] ),
.Q( oInstructionWriteAddress )
);
 
assign oDataWriteAddress = oInstructionWriteAddress;
 
wire[1:0] wTGA_Latched;
 
FFD_POSEDGE_SYNCRONOUS_RESET # (2) FFADDRTYPE
(
.Clock( CYC_I ),
.Reset( RST_I ),
.Enable(1'b1),
.D( TGA_I ),
.Q( wTGA_Latched )
);
 
 
 
wire Clock,Reset;
assign Clock = CLK_I;
assign Reset = RST_I;
 
 
wire wLatchNow;
assign wLatchNow = STB_I & WE_I;
 
//1 Clock cycle after we assert the latch signal
//then the FF has the data ready to propagate
wire wDelay;
FFD_POSEDGE_SYNCRONOUS_RESET # (1) FFOutputDelay
(
.Clock( Clock ),
.Enable( 1'b1 ),
.Reset( Reset ),
.D( wLatchNow ),
.Q( wDelay )
);
 
assign ACK_O = wDelay & STB_I; //make sure we set ACK_O back to zero when STB_I is zero
 
 
wire [2:0] wXYZSel;
 
SHIFTLEFT_POSEDGE #(3) SHL
(
.Clock(CLK_I),
.Enable(STB_I & ~ACK_O),
.Reset(~CYC_I),
.Initial(3'b1),
.O(wXYZSel)
);
 
 
//Flip Flop to Store Vx
wire [`WIDTH-1:0] wVx;
FFD_POSEDGE_SYNCRONOUS_RESET # (`WIDTH) FFD32_WBS2MEM_Vx
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( wXYZSel[0] & STB_I ),
.D( DAT_I ),
.Q( wVx )
);
 
 
//Flip Flop to Store Vy
wire [`WIDTH-1:0] wVy;
FFD_POSEDGE_SYNCRONOUS_RESET # (`WIDTH) FFD32_WBS2MEM_Vy
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( wXYZSel[1] & STB_I ),
.D( DAT_I ),
.Q( wVy )
);
 
//Flip Flop to Store Vz
wire [`WIDTH-1:0] wVz;
 
FFD_POSEDGE_SYNCRONOUS_RESET # (`WIDTH) FFD32_WBS2MEM_Vz
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( wXYZSel[2] & STB_I ),
.D( DAT_I ),
.Q( wVz )
);
 
assign oDataBus = {wVx,wVy,wVz};
assign oInstructionBus = {wVx,wVy};
wire wIsInstructionAddress,wIsDataAddress;
assign wIsInstructionAddress = (wTGA_Latched == `TAG_INSTRUCTION_ADDRESS_TYPE) ? 1'b1 : 1'b0;
assign wIsDataAddress = (wTGA_Latched == `TAG_DATA_ADDRESS_TYPE ) ? 1'b1 : 1'b0;
 
assign oDataWriteEnable = (MST_I && !CYC_I && wIsInstructionAddress) ? 1'b1 : 1'b0;
assign oInstructionWriteEnable = ( MST_I && !CYC_I && wIsDataAddress) ? 1'b1 : 1'b0;
 
 
 
endmodule
//------------------------------------------------------------------------------
/rtl/Unit_Control.v
0,0 → 1,1234
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
/**********************************************************************************
Description:
 
This is the main Finite State Machine.
 
**********************************************************************************/
 
`timescale 1ns / 1ps
`include "aDefinitions.v"
 
`define CU_AFTER_RESET_STATE 0
`define CU_WAIT_FOR_INITIAL_CONFIGURATION 1
`define CU_TRIGGER_CONFIGURATION_DATA_READ 2
`define CU_WAIT_FOR_CONFIG_DATA_READ 3
`define CU_ACK_CONFIG_DATA_READ 4
`define CU_PRECALCULATE_CONSTANTS 5
`define CU_WAIT_FOR_CONSTANT 6
`define CU_ACK_PRECALCULATE_CONSTANTS 7
`define CU_WAIT_FOR_TASK 8
`define CU_READ_TASK_DATA 9
`define CU_WAIT_TASK_DATA_READ 10
`define CU_ACK_TASK_DATA_READ 11
`define CU_TRIGGER_RGU 12
`define CU_WAIT_FOR_RGU 13
`define CU_ACK_RGU 14
`define CU_TRIGGER_GEO 15
`define CU_WAIT_FOR_GEO_SYNC 16
//`define CU_CHECK_AABBIU_REQUEST 17
`define CU_TRIGGER_TCC 17
//`define CU_CHECK_BIU_REQUEST 18
//`define CU_TRIGGER_TFF 18
//`define CU_CHECK_GEO_DONE 19
//`define CU_WAIT_FOR_TFF 19
`define CU_TRIGGER_AABBIU 20
`define CU_WAIT_FOR_AABBIU 21
`define CU_TRIGGER_MAIN 22
`define CU_WAIT_FOR_MAIN 23
`define CU_ACK_MAIN 24
`define CU_TRIGGER_PSU 25
`define CU_WAIT_FOR_PSU 26
`define CU_ACK_PSU 27
//`define CU_TRIGGER_PCU 28
`define CU_WAIT_FOR_PCU 29
`define CU_ACK_PCU 30
`define CU_CHECK_HIT 31
`define CU_CLEAR_REGISTERS 32
`define CU_WAIT_CLEAR_REGISTERS 33
`define CU_ACK_CLEAR_REGISTERS 34
`define CU_TRIGGER_PSU_WITH_TEXTURE 35
`define WAIT_FOR_TCC 36
`define CU_TRIGGER_NPU 37
`define CU_WAIT_NPU 38
`define CU_ACK_NPU 39
`define CU_PERFORM_INTIAL_CONFIGURATION 40
`define CU_SET_PICTH 41
`define CU_TRIGGER_USERCONSTANTS 42
`define CU_WAIT_USERCONSTANTS 43
`define CU_ACK_USERCONSTANTS 44
`define CU_TRIGGER_USERPIXELSHADER 45
`define CU_WAIT_FOR_USERPIXELSHADER 46
`define CU_ACK_USERPIXELSHADER 47
`define CU_DONE 48
`define CU_WAIT_FOR_RENDER_ENABLE 49
`define CU_ACK_TCC 50
`define CU_WAIT_FOR_HOST_DATA_AVAILABLE 51
`define CU_WAIT_FOR_HOST_DATA_ACK 52
//--------------------------------------------------------------
module ControlUnit
(
 
input wire Clock,
input wire Reset,
input wire[15:0] iControlRegister,
output reg oGFUEnable,
input wire iTriggerAABBIURequest,
input wire iTriggerBIURequest,
input wire iTriggertTCCRequest,
output reg oUCodeEnable,
output reg[`ROM_ADDRESS_WIDTH-1:0] oCodeInstructioPointer,
input wire iUCodeDone,
input wire iUCodeReturnValue,
input wire iGFUDone,
input wire iGEOSync,
output reg oTriggerTFF,
input wire iTFFDone,
input wire MST_I,
//output reg[2:0] //oRamBusOwner,
input wire iIODone,
output reg oSetCurrentPitch,
output reg oFlipMemEnabled,
output reg oFlipMem,
output reg oIOWritePixel,
input wire iRenderEnable,
input wire iSceneTraverseComplete,
input wire iHostDataAvailable,
input wire iHostAckDataRead,
 
`ifdef DEBUG
input wire[`MAX_CORES-1:0] iDebug_CoreID,
`endif
 
output reg oResultCommited,
output reg oDone
);
 
//Internal State Machine varibles
reg [5:0] CurrentState;
reg [5:0] NextState;
integer ucode_file;
reg rResetHitFlop,rHitFlopEnable;
wire wHit;
 
`ifdef DUMP_CODE
integer log;
initial
begin
//$display("Opening ucode dump file....\n");
ucode_file = $fopen("CU.log","w");
end
 
`endif
//--------------------------------------------------------------
FFToggleOnce_1Bit FFTO1
(
.Clock( Clock ),
.Reset( rResetHitFlop ),
.Enable( rHitFlopEnable && iUCodeDone ),
.S( iUCodeReturnValue ),
.Q( wHit )
);
//--------------------------------------------------------------
 
`ifdef DEBUG_CU
always @ ( wHit )
begin
$display( "*** Triangle HIT ***\n");
end
`endif
 
//Next states logic and Reset sequence
always @(posedge Clock or posedge Reset)
begin
if (Reset)
CurrentState <= `CU_AFTER_RESET_STATE;
else
CurrentState <= NextState;
end
 
//--------------------------------------------------------------
always @ ( * )
begin
case (CurrentState)
//-----------------------------------------
`CU_AFTER_RESET_STATE:
begin
`ifdef DEBUG_CU
$display("%d CU_AFTER_RESET_STATE\n",$time);
`endif
//oRamBusOwner = 0;
oCodeInstructioPointer = `ENTRYPOINT_INDEX_INITIAL;
oGFUEnable = 0;
oUCodeEnable = 0;
oIOWritePixel = 0;
rResetHitFlop = 1;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 1;
oFlipMemEnabled = 0;
oFlipMem = 0;
oDone = 0;
oResultCommited = 0;
//oIncCurrentPitch = 0;
NextState = `CU_WAIT_FOR_INITIAL_CONFIGURATION;
end
//-----------------------------------------
`CU_WAIT_FOR_INITIAL_CONFIGURATION:
begin
//$display("CORE: %d CU_WAIT_FOR_INITIAL_CONFIGURATION", iDebug_CoreID);
// `ifdef DEBUG_CU
// $display("%d Control: CU_WAIT_FOR_INITIAL_CONFIGURATION\n",$time);
// `endif
//oRamBusOwner = 0;
oCodeInstructioPointer = 0;
oGFUEnable = 0;
oUCodeEnable = 0;
oIOWritePixel = 0;
rResetHitFlop = 1;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 0;
oFlipMem = 0;
oDone = 0;
oResultCommited = 0;
//oIncCurrentPitch = 0;
if ( MST_I )
NextState = `CU_PERFORM_INTIAL_CONFIGURATION;//`CU_WAIT_FOR_CONFIG_DATA_READ;
else
NextState = `CU_WAIT_FOR_INITIAL_CONFIGURATION;
end
//-----------------------------------------
`CU_PERFORM_INTIAL_CONFIGURATION:
begin
//oRamBusOwner = 0;
oCodeInstructioPointer = 0;
oGFUEnable = 0;
oUCodeEnable = 0;
oIOWritePixel = 0;
rResetHitFlop = 1;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 0;
oFlipMem = 0;
oDone = 0;
oResultCommited = 0;
//oIncCurrentPitch = 0;
if ( MST_I == 0 && iRenderEnable == 1'b1)
NextState = `CU_CLEAR_REGISTERS;//`CU_WAIT_FOR_CONFIG_DATA_READ;
else
NextState = `CU_PERFORM_INTIAL_CONFIGURATION;
end
//-----------------------------------------
`CU_CLEAR_REGISTERS:
begin
`ifdef DEBUG_CU
$display("%d CU_CLEAR_REGISTERS\n",$time);
`endif
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer = `ENTRYPOINT_INDEX_INITIAL;
oGFUEnable = 0;
oUCodeEnable = 1; //*
oIOWritePixel = 0;
rResetHitFlop = 0;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 1;
oFlipMem = 0;
oDone = 0;
oResultCommited = 0;
////$display("\n\n %d XOXOXOXOX FLIP XOXOXOXOXOX\n\n",$time);
//oIncCurrentPitch = 0;
NextState = `CU_WAIT_CLEAR_REGISTERS;
end
//-----------------------------------------
`CU_WAIT_CLEAR_REGISTERS:
begin
// `ifdef DEBUG_CU
// $display("%d CU_WAIT_CLEAR_REGISTERS\n",$time);
// `endif
//$display("CORE: %d CU_WAIT_CLEAR_REGISTERS", iDebug_CoreID);
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer = `ENTRYPOINT_INDEX_INITIAL;
oGFUEnable = 0;
oUCodeEnable = 0;
oIOWritePixel = 0;
rResetHitFlop = 0;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 1;
oFlipMem = 0;
oDone = 0;
oResultCommited = 0;
//oIncCurrentPitch = 0;
if ( iUCodeDone )
NextState = `CU_ACK_CLEAR_REGISTERS;
else
NextState = `CU_WAIT_CLEAR_REGISTERS;
end
//-----------------------------------------
`CU_ACK_CLEAR_REGISTERS:
begin
`ifdef DEBUG_CU
$display("%d CU_ACK_CLEAR_REGISTERS\n", $time);
`endif
//$display("CORE: %d CU_ACK_CLEAR_REGISTERS", iDebug_CoreID);
//oRamBusOwner = 0;
oCodeInstructioPointer = 0;
oGFUEnable = 0;
oUCodeEnable = 0; //*
oIOWritePixel = 0;
rResetHitFlop = 0;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 0;
oFlipMem = 0;
oDone = 0;
oResultCommited = 0;
//oIncCurrentPitch = 0;
NextState = `CU_WAIT_FOR_CONFIG_DATA_READ;
end
//-----------------------------------------
`CU_WAIT_FOR_CONFIG_DATA_READ:
begin
 
// `ifdef DEBUG_CU
// $display("%d Control: CU_WAIT_FOR_CONFIG_DATA_READ\n",$time);
// `endif
 
 
//$display("CORE: %d CU_WAIT_FOR_CONFIG_DATA_READ", iDebug_CoreID);
 
//oRamBusOwner = 0;//`REG_BUS_OWNED_BY_BCU;
oCodeInstructioPointer = 0;
oGFUEnable = 0;
oUCodeEnable = 0;
oIOWritePixel = 0;
rResetHitFlop = 0;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 0;
oFlipMem = 0;
oDone = 0;
oResultCommited = 0;
//oIncCurrentPitch = 0;
if ( MST_I == 0 )
NextState = `CU_PRECALCULATE_CONSTANTS;
else
NextState = `CU_WAIT_FOR_CONFIG_DATA_READ;
end
//-----------------------------------------
`CU_PRECALCULATE_CONSTANTS:
begin
//$display("CORE: %d CU_PRECALCULATE_CONSTANTS", iDebug_CoreID);
`ifdef DEBUG_CU
$display("%d Control: CU_PRECALCULATE_CONSTANTS\n", $time);
`endif
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer = `ENTRYPOINT_INDEX_CPPU;
oGFUEnable = 0;
oUCodeEnable = 1; //*
oIOWritePixel = 0;
rResetHitFlop = 0;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 0;
oFlipMem = 0;
oDone = 0;
oResultCommited = 0;
//oIncCurrentPitch = 0;
NextState = `CU_WAIT_FOR_CONSTANT;
end
//-----------------------------------------
`CU_WAIT_FOR_CONSTANT:
begin
// `ifdef DEBUG_CU
// $display("%d Control: CU_WAIT_FOR_CONSTANT\n", $time);
// `endif
 
 
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer = `ENTRYPOINT_INDEX_CPPU;
oGFUEnable = 0;
oUCodeEnable = 0; //*
oIOWritePixel = 0;
rResetHitFlop = 0;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 0;
oFlipMem = 0;
oDone = 0;
oResultCommited = 0;
//oIncCurrentPitch = 0;
if ( iUCodeDone )
NextState = `CU_ACK_PRECALCULATE_CONSTANTS;
else
NextState = `CU_WAIT_FOR_CONSTANT;
end
//-----------------------------------------
`CU_ACK_PRECALCULATE_CONSTANTS:
begin
//$display("CORE: %d CU_ACK_PRECALCULATE_CONSTANTS", iDebug_CoreID);
`ifdef DEBUG_CU
$display("%d Control: CU_ACK_PRECALCULATE_CONSTANTS\n", $time);
`endif
//oRamBusOwner = 0;//`REG_BUS_OWNED_BY_BCU;
oCodeInstructioPointer = 0;
oGFUEnable = 0;
oUCodeEnable = 0; //*
oIOWritePixel = 0;
rResetHitFlop = 0;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 0;
oFlipMem = 0;
oDone = 0;
oResultCommited = 0;
//oIncCurrentPitch = 0;
NextState = `CU_TRIGGER_USERCONSTANTS;//CU_WAIT_FOR_TASK;
end
//-----------------------------------------
 
`CU_TRIGGER_USERCONSTANTS:
begin
`ifdef DEBUG_CU
$display("%d Control: CU_TRIGGER_USERCONSTANTS\n",$time);
`endif
//$display("CORE: %d CU_TRIGGER_USERCONSTANTS", iDebug_CoreID);
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer = `ENTRYPOINT_INDEX_USERCONSTANTS;
oGFUEnable = 0;
oUCodeEnable = 1; //*
oIOWritePixel = 0;
rResetHitFlop = 0;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 0;
oFlipMem = 0;
oDone = 0;
oResultCommited = 0;
//oIncCurrentPitch = 0;
NextState = `CU_WAIT_USERCONSTANTS;
end
//-----------------------------------------
`CU_WAIT_USERCONSTANTS:
begin
 
// `ifdef DEBUG_CU
// $display("%d Control: CU_WAIT_FOR_RGU\n",$time);
// `endif
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer = `ENTRYPOINT_INDEX_USERCONSTANTS;
oGFUEnable = 0;
oUCodeEnable = 0;
oIOWritePixel = 0;
rResetHitFlop = 0;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 0;
oFlipMem = 0;
oDone = 0;
oResultCommited = 0;
//oIncCurrentPitch = 0;
if ( iUCodeDone )
NextState = `CU_ACK_USERCONSTANTS;
else
NextState = `CU_WAIT_USERCONSTANTS;
end
//-----------------------------------------
`CU_ACK_USERCONSTANTS:
begin
`ifdef DEBUG_CU
$display("%d Control: CU_ACK_RGU\n",$time);
`endif
//$display("CORE: %d CU_ACK_USERCONSTANTS", iDebug_CoreID);
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer = 0;
oGFUEnable = 0;
oUCodeEnable = 0; //*
oIOWritePixel = 0;
rResetHitFlop = 0;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 0;
oFlipMem = 0;
oDone = 0;
oResultCommited = 0;
//oIncCurrentPitch = 0;
if ( iUCodeDone == 0)
NextState = `CU_WAIT_FOR_RENDER_ENABLE;
else
NextState = `CU_ACK_USERCONSTANTS;
end
//-----------------------------------------
`CU_WAIT_FOR_RENDER_ENABLE:
begin
`ifdef DEBUG_CU
$display("CORE: %d CU_WAIT_FOR_RENDER_ENABLE", iDebug_CoreID);
`endif
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer = 0;
oGFUEnable = 0;
oUCodeEnable = 0; //*
oIOWritePixel = 0;
rResetHitFlop = 0;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 0;
oFlipMem = 0;
oDone = 0;
oResultCommited = 0;
//oIncCurrentPitch = 0;
if ( iRenderEnable)
NextState = `CU_TRIGGER_RGU;
else
NextState = `CU_WAIT_FOR_RENDER_ENABLE;
end
//-----------------------------------------
`CU_TRIGGER_RGU:
begin
`ifdef DEBUG_CU
$display("CORE: %d CU_TRIGGER_RGU", iDebug_CoreID);
`endif
 
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer = `ENTRYPOINT_INDEX_RGU;
oGFUEnable = 0;
oUCodeEnable = 1; //*
oIOWritePixel = 0;
rResetHitFlop = 0;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 0;
oFlipMem = 0;
oDone = 0;
oResultCommited = 0;
//oIncCurrentPitch = 0;
NextState = `CU_WAIT_FOR_RGU;
end
//-----------------------------------------
`CU_WAIT_FOR_RGU:
begin
 
// `ifdef DEBUG_CU
// $display("%d Control: CU_WAIT_FOR_RGU\n",$time);
// `endif
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer = 0;
oGFUEnable = 0;
oUCodeEnable = 0;
oIOWritePixel = 0;
rResetHitFlop = 0;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 0;
oFlipMem = 0;
oDone = 0;
oResultCommited = 0;
//oIncCurrentPitch = 0;
if ( iUCodeDone )
NextState = `CU_ACK_RGU;
else
NextState = `CU_WAIT_FOR_RGU;
end
//-----------------------------------------
`CU_ACK_RGU:
begin
 
`ifdef DEBUG_CU
$display("CORE: %d CU_ACK_RGU", iDebug_CoreID);
`endif
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer = 0;
oGFUEnable = 0;
oUCodeEnable = 0; //*
oIOWritePixel = 0;
rResetHitFlop = 0;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 1;
oFlipMem = 0;
oDone = 0;
oResultCommited = 0;
//oIncCurrentPitch = 0;
if ( iUCodeDone == 0 & iRenderEnable == 1)
NextState = `CU_WAIT_FOR_HOST_DATA_AVAILABLE;//`CU_TRIGGER_GEO;///////////// GET RID OF GEO!!!
else
NextState = `CU_ACK_RGU;
end
//-----------------------------------------
`CU_TRIGGER_TCC:
begin
////$display("CU_TRIGGER_TCC");
`ifdef DEBUG_CU
$display("%d CORE %d Control: CU_TRIGGER_TCC\n",$time,iDebug_CoreID);
`endif
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer = `ENTRYPOINT_INDEX_TCC;
oUCodeEnable = 1; //*
oGFUEnable = 0;
oIOWritePixel = 0;
rResetHitFlop = 0;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 1;
oFlipMem = 0; //We need u,v from last IO read cycle
oResultCommited = 0;
////$display("\n\n %d XOXOXOXOX FLIP XOXOXOXOXOX\n\n",$time);
//oIncCurrentPitch = 0;
oDone = 0;
NextState = `WAIT_FOR_TCC;
end
//-----------------------------------------
`WAIT_FOR_TCC:
begin
////$display("WAIT_FOR_TCC");
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer = `ENTRYPOINT_INDEX_TCC;
oUCodeEnable = 0; //*
oGFUEnable = 0;
oIOWritePixel = 0;
rResetHitFlop = 0;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 1;
oFlipMem = 0;
oDone = 0;
oResultCommited = 0;
//oIncCurrentPitch = 0;
if ( iUCodeDone )
NextState = `CU_ACK_TCC;
else
NextState = `WAIT_FOR_TCC;
end
//-----------------------------------------
`CU_ACK_TCC:
begin
////$display("WAIT_FOR_TCC");
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer = `ENTRYPOINT_INDEX_TCC;
oUCodeEnable = 0; //*
oGFUEnable = 0;
oIOWritePixel = 0;
rResetHitFlop = 0;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 0;
oFlipMem = 0;
oDone = 0;
oResultCommited = 0;
//oIncCurrentPitch = 0;
if ( iUCodeDone == 0 && iSceneTraverseComplete == 1'b1) //DDDD
NextState = `CU_TRIGGER_PSU_WITH_TEXTURE;
else if (iUCodeDone == 0 && iSceneTraverseComplete == 1'b0)
NextState = `CU_WAIT_FOR_HOST_DATA_AVAILABLE;
else
NextState = `CU_ACK_TCC;
end
//-----------------------------------------
/*
Was there any hit at all?
At this point, all the triangles in the list
have been traversed looking for a hit with our ray.
There are 3 possibilities:
1) The was not a single hit, then just paint a black
pixel on the screen and send it via PCU.
2)There was a hit and Texturing is not enabled, then trigger the PSU with
no texturing
2) There was a hit and Texturing is enabled, then fetch the texture
values corresponding to the triangle that we hitted.
*/
`CU_CHECK_HIT:
begin
`ifdef DEBUG_CU
$display("%d CORE %d Control: CU_CHECK_HIT\n",$time,iDebug_CoreID);
`endif
//oRamBusOwner = `REG_BUS_OWNED_BY_GFU;
oCodeInstructioPointer = 0;
oUCodeEnable = 0;
oGFUEnable = 0; ///CHANGED Aug 15
oIOWritePixel = 0;
rResetHitFlop = 0;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 0;
oFlipMem = 0;
oDone = 0;
oResultCommited = 0;
if (wHit)
begin
//$display("HIT");
NextState = `CU_TRIGGER_PSU_WITH_TEXTURE;
end
else
NextState = `CU_TRIGGER_USERPIXELSHADER;//666
end
//-----------------------------------------
`CU_TRIGGER_PSU_WITH_TEXTURE:
begin
`ifdef DEBUG_CU
$display("%d Control: CU_TRIGGER_PSU_WITH_TEXTURE\n",$time);
`endif
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer = `ENTRYPOINT_INDEX_PSU2;
oUCodeEnable = 1;
oGFUEnable = 0;
oIOWritePixel = 0;
rResetHitFlop = 1;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 0;//////NEW NEW NEW NEW
oFlipMem = 0;
oDone = 0;
oResultCommited = 0;
////$display("\n\n %d XOXOXOXOX FLIP XOXOXOXOXOX\n\n",$time);
//oIncCurrentPitch = 0;
NextState = `CU_WAIT_FOR_PSU;
end
//-----------------------------------------
`CU_WAIT_FOR_HOST_DATA_ACK:
begin
oCodeInstructioPointer = 0;
oUCodeEnable = 0;
oGFUEnable = 0;
oIOWritePixel = 0;
rResetHitFlop = 0;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 0;
oFlipMem = 0;
oDone = 0;
oResultCommited = 0;
if ( iHostAckDataRead )
NextState = `CU_WAIT_FOR_HOST_DATA_AVAILABLE;
else
NextState = `CU_WAIT_FOR_HOST_DATA_ACK;
end
//-----------------------------------------
//Wait until data from Host becomes available
`CU_WAIT_FOR_HOST_DATA_AVAILABLE:
begin
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer = 0;
oUCodeEnable = 0;
oGFUEnable = 0;
oIOWritePixel = 0;
rResetHitFlop = 0;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 0;
oFlipMem = 0;
oDone = 0;
oResultCommited = 0;
if ( iHostDataAvailable )
NextState = `CU_TRIGGER_MAIN;
else
NextState = `CU_WAIT_FOR_HOST_DATA_AVAILABLE;
end
//-----------------------------------------
`CU_TRIGGER_MAIN:
begin
`ifdef DEBUG_CU
$display("%d CORE: %d Control: CU_TRIGGER_MAIN\n",$time,iDebug_CoreID);
`endif
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer = `ENTRYPOINT_INDEX_MAIN;
oUCodeEnable = 1;
oGFUEnable = 1;
oIOWritePixel = 0;
rResetHitFlop = 0;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 1;
oFlipMem = 1;
oDone = 0;
oResultCommited = 0;
////$display("\n\n %d XOXOXOXOX FLIP XOXOXOXOXOX\n\n",$time);
//oIncCurrentPitch = 0;
// $stop();
NextState = `CU_WAIT_FOR_MAIN;
end
//-----------------------------------------
`CU_WAIT_FOR_MAIN:
begin
// `ifdef DEBUG_CU
// $display("%d Control: CU_WAIT_FOR_MAIN\n",$time);
// `endif
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer = `ENTRYPOINT_INDEX_MAIN;
oUCodeEnable = 0;
oGFUEnable = 1;
oIOWritePixel = 0;
rResetHitFlop = 0;
rHitFlopEnable = 1;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 1;
oFlipMem = 0;
oDone = 0;
oResultCommited = 0;
//oIncCurrentPitch = 0;
//NextState = `CU_WAIT_FOR_MAIN;
if ( iUCodeDone )
NextState = `CU_ACK_MAIN;
else
NextState = `CU_WAIT_FOR_MAIN;
end
//-----------------------------------------
/*
ACK UCODE by setting oUCodeEnable = 0
*/
`CU_ACK_MAIN:
begin
`ifdef DEBUG_CU
$display("%d CORE: %d Control: CU_ACK_MAIN\n",$time, iDebug_CoreID);
`endif
//oRamBusOwner = `REG_BUS_OWNED_BY_GFU;
oCodeInstructioPointer = 0; //*
oUCodeEnable = 0; //*
oGFUEnable = 0; //Changed Aug 15
oIOWritePixel = 0;
rResetHitFlop = 0;
rHitFlopEnable = 1;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 0;
oFlipMem = 0;
oDone = 0;
oResultCommited = 0;
//oIncCurrentPitch = 0;
// $stop();
if ( iUCodeDone == 1'b0 & iSceneTraverseComplete == 1'b1)
NextState = `CU_CHECK_HIT;
else if ( iUCodeDone == 1'b0 & iSceneTraverseComplete == 1'b0) //ERROR!!! What if iSceneTraverseComplete will become 1 a cycle after this??
NextState = `CU_WAIT_FOR_HOST_DATA_ACK;//`CU_WAIT_FOR_HOST_DATA_AVAILABLE;
else
NextState = `CU_ACK_MAIN;
end
//-----------------------------------------
`CU_WAIT_FOR_PSU:
begin
// `ifdef DEBUG_CU
// $display("%d Control: CU_TRIGGER_PSU\n",$time);
// `endif
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer = `ENTRYPOINT_INDEX_PSU;
oUCodeEnable = 0;
oGFUEnable = 0;
oIOWritePixel = 0;
rResetHitFlop = 0;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 1;
oFlipMem = 0;
oDone = 0;
oResultCommited = 0;
//oIncCurrentPitch = 0;
if ( iUCodeDone )
NextState = `CU_ACK_PSU;
else
NextState = `CU_WAIT_FOR_PSU;
end
//-----------------------------------------
`CU_ACK_PSU:
begin
`ifdef DEBUG_CU
$display("%d CORE: %d Control: CU_ACK_PSU\n",$time, iDebug_CoreID);
`endif
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer = 0; //*
oUCodeEnable = 0; //*
oGFUEnable = 0;
oIOWritePixel = 0;
rResetHitFlop = 0;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 1;
oFlipMem = 0;
oDone = 0;
oResultCommited = 0;
//oIncCurrentPitch = 0;
if ( iUCodeDone == 0)
NextState = `CU_TRIGGER_USERPIXELSHADER;
else
NextState = `CU_ACK_PSU;
end
//-----------------------------------------
//-----------------------------------------
`CU_TRIGGER_NPU: //Next Pixel Unit
begin
`ifdef DEBUG_CU
$display("%d CORE: %d Control: CU_TRIGGER_NPU\n",$time, iDebug_CoreID);
`endif
//$write("*");
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer = `ENTRYPOINT_INDEX_NPG; //*
oUCodeEnable = 1; //*
oGFUEnable = 0;
oIOWritePixel = 0;
rResetHitFlop = 0;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 1;
oFlipMem = 0;
oDone = 0;
oResultCommited = 0;
//oIncCurrentPitch = 0;
 
NextState = `CU_WAIT_NPU;
end
//-----------------------------------------
`CU_WAIT_NPU:
begin
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer = `ENTRYPOINT_INDEX_NPG;
oUCodeEnable = 0;
oGFUEnable = 0;
oIOWritePixel = 0;
rResetHitFlop = 0;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 1;
oFlipMem = 0;
oDone = 0;
oResultCommited = 0;
//oIncCurrentPitch = 0;
if ( iUCodeDone )
NextState = `CU_ACK_NPU;
else
NextState = `CU_WAIT_NPU;
end
//-----------------------------------------
/*
Next Pixel generation: here we either goto
to RGU for the next pixel, or we have no
more pixels so we are done we our picture!
*/
`CU_ACK_NPU:
begin
`ifdef DEBUG_CU
$display("%d CORE: %d Control: CU_ACK_NPU\n",$time, iDebug_CoreID);
`endif
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer = 0; //*
oUCodeEnable = 0; //*
oGFUEnable = 0;
oIOWritePixel = 0;
rResetHitFlop = 0;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 1;
oFlipMem = 0;
oDone = 0;
oResultCommited = 0;
//oIncCurrentPitch = 0;
if ( iUCodeDone == 0 && iUCodeReturnValue == 1)
NextState = `CU_TRIGGER_RGU;
else if (iUCodeDone == 0 && iUCodeReturnValue == 0)
NextState = `CU_DONE;
else
NextState = `CU_ACK_NPU;
end
//-----------------------------------------
`CU_DONE:
begin
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer = 0;
oUCodeEnable = 0;
oGFUEnable = 0;
oIOWritePixel = 0;
rResetHitFlop = 0;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 0;
oFlipMem = 1;
oDone = 1;
oResultCommited = 0;
//oIncCurrentPitch = 0;
NextState = `CU_DONE;
end
//-----------------------------------------
/*
Here we no longer use GFU so set Enable to zero
*/
`CU_TRIGGER_USERPIXELSHADER:
begin
`ifdef DEBUG_CU
$display("%d Control: CU_TRIGGER_PSU\n",$time);
`endif
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer = `ENTRYPOINT_INDEX_PIXELSHADER;
oUCodeEnable = 1;
oGFUEnable = 0;//*
oIOWritePixel = 0;
rResetHitFlop = 0;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 1;
oFlipMem = 0;
oDone = 0;
oResultCommited = 0;
//oIncCurrentPitch = 0;
NextState = `CU_WAIT_FOR_USERPIXELSHADER;
end
//-----------------------------------------
`CU_WAIT_FOR_USERPIXELSHADER:
begin
// `ifdef DEBUG_CU
// $display("%d Control: CU_TRIGGER_PSU\n",$time);
// `endif
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer = `ENTRYPOINT_INDEX_PIXELSHADER;
oUCodeEnable = 0;
oGFUEnable = 0;
oIOWritePixel = 0;
rResetHitFlop = 0;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 1;
oFlipMem = 0;
oDone = 0;
oResultCommited = 0;
//oIncCurrentPitch = 0;
if ( iUCodeDone )
NextState = `CU_ACK_USERPIXELSHADER;
else
NextState = `CU_WAIT_FOR_USERPIXELSHADER;
end
//-----------------------------------------
`CU_ACK_USERPIXELSHADER:
begin
`ifdef DEBUG_CU
$display("%d Control: CU_ACK_PSU\n",$time);
`endif
//oRamBusOwner = `REG_BUS_OWNED_BY_UCODE;
oCodeInstructioPointer = 0; //*
oUCodeEnable = 0; //*
oGFUEnable = 0;
oIOWritePixel = 0;
rResetHitFlop = 0;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 1;
oFlipMem = 0;
oDone = 0;
oResultCommited = 1;
//oIncCurrentPitch = 0;
if ( iUCodeDone == 0)
NextState = `CU_TRIGGER_NPU;//`CU_TRIGGER_PCU;
else
NextState = `CU_ACK_USERPIXELSHADER;
end
//---------------------------------------------------
default:
begin
`ifdef DEBUG_CU
$display("%d Control: ERROR Undefined State\n",$time);
`endif
//oRamBusOwner = 0;
oCodeInstructioPointer = 0;
oUCodeEnable = 0;
oGFUEnable = 0;
oIOWritePixel = 0;
rResetHitFlop = 0;
rHitFlopEnable = 0;
oTriggerTFF = 0;
oSetCurrentPitch = 0;
oFlipMemEnabled = 0;
oFlipMem = 0;
oDone = 0;
oResultCommited = 0;
//oIncCurrentPitch = 0;
NextState = `CU_AFTER_RESET_STATE;
end
//-----------------------------------------
 
endcase
end //always
endmodule
/rtl/Unit_EXE.v
0,0 → 1,275
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
//---------------------------------------------------------------------
module ExecutionUnit
(
 
input wire Clock,
input wire Reset,
input wire [`ROM_ADDRESS_WIDTH-1:0] iInitialCodeAddress,
input wire [`INSTRUCTION_WIDTH-1:0] iInstruction1,
input wire [`INSTRUCTION_WIDTH-1:0] iInstruction2,
 
 
input wire [`DATA_ROW_WIDTH-1:0] iDataRead0,
input wire [`DATA_ROW_WIDTH-1:0] iDataRead1,
input wire iTrigger,
 
 
output wire [`ROM_ADDRESS_WIDTH-1:0] oInstructionPointer1,
output wire [`ROM_ADDRESS_WIDTH-1:0] oInstructionPointer2,
output wire [`DATA_ADDRESS_WIDTH-1:0] oDataReadAddress0,
output wire [`DATA_ADDRESS_WIDTH-1:0] oDataReadAddress1,
output wire oDataWriteEnable,
output wire [`DATA_ADDRESS_WIDTH-1:0] oDataWriteAddress,
output wire [`DATA_ROW_WIDTH-1:0] oDataBus,
output wire oReturnCode,
 
 
output wire [`DATA_ROW_WIDTH-1:0] oOMEMWriteAddress,
output wire [`DATA_ROW_WIDTH-1:0] oOMEMWriteData,
output wire oOMEMWriteEnable,
output wire [`DATA_ROW_WIDTH-1:0] oTMEMReadAddress,
input wire [`DATA_ROW_WIDTH-1:0] iTMEMReadData,
input wire iTMEMDataAvailable,
output wire oTMEMDataRequest,
 
`ifdef DEBUG
input wire [`MAX_CORES-1:0] iDebug_CoreID,
`endif
output wire oDone
 
 
 
 
);
 
 
`ifdef DEBUG
wire [`ROM_ADDRESS_WIDTH-1:0] wDEBUG_IDU2_EXE_InstructionPointer;
`endif
 
wire wEXE2__uCodeDone;
wire wEXE2_IFU__EXEBusy;
wire [`DATA_ADDRESS_WIDTH-1:0] wEXE2_IDU_DataFordward_LastDestination;
wire wALU2_EXE__BranchTaken;
wire wALU2_IFU_BranchNotTaken;
wire [`INSTRUCTION_WIDTH-1:0] CurrentInstruction;
//wire wIDU2_IFU__IDUBusy;
 
 
wire [`INSTRUCTION_OP_LENGTH-1:0] wOperation;
 
 
wire [`DATA_ROW_WIDTH-1:0] wSource0,wSource1;
wire [`DATA_ADDRESS_WIDTH-1:0] wDestination;
wire wInstructionAvailable;
 
//ALU wires
wire [`INSTRUCTION_OP_LENGTH-1:0] ALU2Operation;
wire [`WIDTH-1:0] ALU2ChannelA;
wire [`WIDTH-1:0] ALU2ChannelB;
wire [`WIDTH-1:0] ALU2ChannelC;
wire [`WIDTH-1:0] ALU2ChannelD;
wire [`WIDTH-1:0] ALU2ChannelE;
wire [`WIDTH-1:0] ALU2ChannelF;
wire [`WIDTH-1:0] ALU2ResultA;
wire [`WIDTH-1:0] ALU2ResultB;
wire [`WIDTH-1:0] ALU2ResultC;
wire wEXE2_ALU__TriggerALU;
wire ALU2OutputReady;
wire w2FIU__BranchTaken;
wire [`ROM_ADDRESS_WIDTH-1:0] JumpIp;
wire [`ROM_ADDRESS_WIDTH-1:0] wIDU2_IFU_ReturnAddress;
wire wALU2_IFU_ReturnFromSub;
 
//wire wIDU2_IFU__InputsLatched;
 
wire wEPU_Busy,wTriggerIFU;
wire [`ROM_ADDRESS_WIDTH-1:0] wEPU_IP,wIFU_IP,wCodeEntryPoint;
 
assign oInstructionPointer1 = (wEPU_Busy) ? wEPU_IP : wIFU_IP;
 
 
InstructionEntryPoint EPU
(
.Clock( Clock ),
.Reset( Reset ),
.iTrigger( iTrigger ),
.iInitialCodeAddress( iInitialCodeAddress ),
.iIMemInput(iInstruction1),
 
.oEPU_Busy(wEPU_Busy),
.oEntryPoint( wCodeEntryPoint ),
.oTriggerIFU( wTriggerIFU ),
.oInstructionAddr( wEPU_IP )
 
);
 
InstructionFetch IFU
(
.Clock( Clock ),
.Reset( Reset ),
.iTrigger( wTriggerIFU ),
.iInstruction1( iInstruction1 ),
.iInstruction2( iInstruction2 ),
.iInitialCodeAddress( wCodeEntryPoint ),
.iBranchTaken( w2FIU__BranchTaken ),
.iSubroutineReturn( wALU2_IFU_ReturnFromSub ),
//.iReturnAddress( wIDU2_IFU_ReturnAddress ),
.oCurrentInstruction( CurrentInstruction ),
.oInstructionAvalable( wInstructionAvailable ),
.oIP( wIFU_IP ),
.oIP2( oInstructionPointer2 ),
.iEXEDone( ALU2OutputReady ),
.oMicroCodeReturnValue( oReturnCode ),
.oExecutionDone( oDone )
);
 
////---------------------------------------------------------
wire wIDU2_EXE_DataReady;
wire wEXE2_IDU_ExeLatchedValues;
 
InstructionDecode IDU
(
.Clock( Clock ),
.Reset( Reset ),
.iEncodedInstruction( CurrentInstruction ),
.iInstructionAvailable( wInstructionAvailable ),
//.iIP( oInstructionPointer1 ),
//.oReturnAddress( wIDU2_IFU_ReturnAddress ),
.oRamAddress0( oDataReadAddress0 ),
.oRamAddress1( oDataReadAddress1 ),
.iRamValue0( iDataRead0 ),
.iRamValue1( iDataRead1 ),
.iLastDestination( wEXE2_IDU_DataFordward_LastDestination ),
.iDataForward( {ALU2ResultA,ALU2ResultB,ALU2ResultC} ),
//Outputs going to the ALU-FSM
.oOperation( wOperation ),
.oDestination( wDestination ),
.oSource0( wSource0 ),
.oSource1( wSource1 ),
`ifdef DEBUG
.iDebug_CurrentIP( oInstructionPointer1 ),
.oDebug_CurrentIP( wDEBUG_IDU2_EXE_InstructionPointer ),
`endif
.oDataReadyForExe( wIDU2_EXE_DataReady )
);
 
 
ExecutionFSM EXE
(
.Clock( Clock ),
.Reset( Reset | iTrigger ), //New Sat Jun13
.iDecodeDone( wIDU2_EXE_DataReady ),
.iOperation( wOperation ),
.iDestination( wDestination ),
.iSource0( wSource0 ),
.iSource1( wSource1 ) ,
 
`ifdef DEBUG
.iDebug_CurrentIP( wDEBUG_IDU2_EXE_InstructionPointer ),
.iDebug_CoreID( iDebug_CoreID ),
`endif
//.iJumpResultFromALU( wALU2_EXE__BranchTaken ),
.iBranchTaken( wALU2_EXE__BranchTaken ),
.iBranchNotTaken( wALU2_IFU_BranchNotTaken ),
.oJumpFlag( w2FIU__BranchTaken ),
.oJumpIp( JumpIp ),
.oRAMWriteEnable( oDataWriteEnable ),
.oRAMWriteAddress( oDataWriteAddress ),
.RAMBus( oDataBus ),
.oBusy( wEXE2_IFU__EXEBusy ),
 
.oExeLatchedValues( wEXE2_IDU_ExeLatchedValues ),
.oLastDestination( wEXE2_IDU_DataFordward_LastDestination ),
 
//ALU ports and control signals
.oTriggerALU( wEXE2_ALU__TriggerALU ),
.oALUOperation( ALU2Operation ),
.oALUChannelX1( ALU2ChannelA ),
.oALUChannelX2( ALU2ChannelB ),
.oALUChannelY1( ALU2ChannelC ),
.oALUChannelY2( ALU2ChannelD ),
.oALUChannelZ1( ALU2ChannelE ),
.oALUChannelZ2( ALU2ChannelF ),
.iALUResultX( ALU2ResultA ),
.iALUResultY( ALU2ResultB ),
.iALUResultZ( ALU2ResultC ),
.iALUOutputReady( ALU2OutputReady )
 
);
 
 
//--------------------------------------------------------
 
VectorALU ALU
(
.Clock(Clock),
.Reset(Reset),
.iOperation( ALU2Operation ),
.iChannel_Ax( ALU2ChannelA ),
.iChannel_Bx( ALU2ChannelB ),
.iChannel_Ay( ALU2ChannelC ),
.iChannel_By( ALU2ChannelD ),
.iChannel_Az( ALU2ChannelE ),
.iChannel_Bz( ALU2ChannelF ),
.oResultA( ALU2ResultA ),
.oResultB( ALU2ResultB ),
.oResultC( ALU2ResultC ),
.oBranchTaken( wALU2_EXE__BranchTaken ),
.oBranchNotTaken( wALU2_IFU_BranchNotTaken ),
.oReturnFromSub( wALU2_IFU_ReturnFromSub ),
.iInputReady( wEXE2_ALU__TriggerALU ),
//***********
.oOMEMWriteAddress( oOMEMWriteAddress ),
.oOMEMWriteData( oOMEMWriteData ),
.oOMEM_WriteEnable( oOMEMWriteEnable ),
.oTMEMReadAddress( oTMEMReadAddress ),
.iTMEMReadData( iTMEMReadData ),
.iTMEMDataAvailable( iTMEMDataAvailable ),
.oTMEMDataRequest( oTMEMDataRequest ),
//***********
.iCurrentIP( oInstructionPointer1 ),
.OutputReady( ALU2OutputReady )
);
 
 
endmodule
//---------------------------------------------------------------------
/rtl/Module_FixedPointDivision.v
0,0 → 1,328
/*
Fixed point Multiplication Module Qm.n
C = (A << n) / B
*/
 
 
//Division State Machine Constants
`define INITIAL_DIVISION_STATE 6'd1
`define DIVISION_REVERSE_LAST_ITERATION 6'd2
`define PRE_CALCULATE_REMAINDER 6'd3
`define CALCULATE_REMAINDER 6'd4
`define WRITE_DIVISION_RESULT 6'd5
 
 
`timescale 1ns / 1ps
`include "aDefinitions.v"
`define FPS_AFTER_RESET_STATE 0
//-----------------------------------------------------------------
//This only works if you dividend is power of 2
//x % 2^n == x & (2^n - 1).
/*
module Modulus2N
(
input wire Clock,
input wire Reset,
input wire [`WIDTH-1:0] iDividend,iDivisor,
output reg [`WIDTH-1:0] oQuotient,
input wire iInputReady, //Is the input data valid?
output reg oOutputReady //Our output data is ready!
);
 
 
 
FF1_POSEDGE_SYNCRONOUS_RESET FFOutputReadyDelay2
(
.Clock( Clock ),
.Clear( Reset ),
.D( iInputReady ),
.Q( oOutputReady )
);
 
assign oQuotient = (iDividend & (iDivisor-1'b1));
 
 
endmodule
*/
//-----------------------------------------------------------------
/*
Be aware that the unsgined division algorith doesn't know or care
about the sign bit of the Result (bit 31). So if you divisor is very
small there is a chance that the bit 31 from the usginned division is
one even thogh the result should be positive
 
*/
module SignedIntegerDivision
(
input wire Clock,Reset,
input wire [`WIDTH-1:0] iDividend,iDivisor,
output reg [`WIDTH-1:0] xQuotient,
input wire iInputReady, //Is the input data valid?
output reg OutputReady //Our output data is ready!
);
 
 
parameter SIGN = 31;
wire Sign;
 
wire [`WIDTH-1:0] wDividend,wDivisor;
wire wInputReady;
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) FFD1
(
.Clock( Clock ),
.Reset( Reset),
.Enable( iInputReady ),
.D( iDividend ),
.Q( wDividend)
);
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) FFD2
(
.Clock( Clock ),
.Reset( Reset),
.Enable( iInputReady ),
.D( iDivisor ),
.Q( wDivisor )
);
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD3
(
.Clock( Clock ),
.Reset( Reset),
.Enable( 1'b1 ),
.D( iInputReady ),
.Q( wInputReady )
);
 
 
//wire [7:0] wExitStatus;
wire [`WIDTH-1:0] wAbsDividend,wAbsDivisor;
wire [`WIDTH-1:0] wQuottientTemp;
wire [`WIDTH-1:0] wAbsQuotient;
 
assign Sign = wDividend[SIGN] ^ wDivisor[SIGN];
 
assign wAbsDividend = ( wDividend[SIGN] == 1 )?
~wDividend + 1'b1 : wDividend;
assign wAbsDivisor = ( wDivisor[SIGN] == 1 )?
~wDivisor + 1'b1 : wDivisor;
 
wire DivReady;
 
 
UnsignedIntegerDivision UDIV
(
.Clock(Clock),
.Reset( Reset ),
.iDividend( wAbsDividend),
.iDivisor( wAbsDivisor ),
.xQuotient(wQuottientTemp),
.iInputReady( wInputReady ),
.OutputReady( DivReady )
);
 
//Make sure the output from the 'unsigned' operation is really posity
assign wAbsQuotient = wQuottientTemp & 32'h7FFFFFFF;
 
//assign Quotient = wAbsQuotient;
//-----------------------------------------------
always @ ( posedge Clock )
begin
if ( DivReady )
begin
if ( Sign == 1 )
xQuotient = ~wAbsQuotient + 1'b1;
else
xQuotient = wAbsQuotient;
end
OutputReady = DivReady;
if (Reset == 1)
OutputReady = 0;
end
//-----------------------------------------------
 
endmodule
//-----------------------------------------------------------------
/*
Returns the integer part (Quotient) of a division.
Division is the process of repeated subtraction.
Like the long division we learned in grade school,
a binary division algorithm works from the high
order digits to the low order digits and generates
a quotient (division result) with each step.
The division algorithm is divided into two steps:
* Shift the upper bits of the dividend (the number
we are dividing into) into the remainder.
* Subtract the divisor from the value in the remainder.
The high order bit of the result become a bit of
the quotient (division result).
*/
 
//-----------------------------------------------------------------
/*
Try to implemet the division as a FSM,
this basically because the behavioral Division has a for loop,
with a variable loop limit counter which I think is not friendly
to the synthetiser (dumb dumb synthetizer :) )
*/
module UnsignedIntegerDivision(
input wire Clock,Reset,
input wire [`WIDTH-1:0] iDividend,iDivisor,
//output reg [`WIDTH-1:0] Quotient,Remainder,
 
output reg [`WIDTH-1:0] xQuotient,
 
input wire iInputReady, //Is the input data valid?
output reg OutputReady //Our output data is ready!
//output reg [7:0] ExitStatus
);
 
//reg [`WIDTH-1:0] Dividend, Divisor;
 
reg [63:0] Dividend,Divisor;
 
//reg [`WIDTH-1:0] t, q, d, i,Bit, num_bits;
reg [`WIDTH-1:0] i,num_bits;
reg [63:0] t, q, d, Bit;
reg [63:0] Quotient,Remainder;
 
reg [5:0] CurrentState, NextState;
//----------------------------------------
//Next states logic and Reset sequence
always @(negedge Clock)
begin
if( Reset!=1 )
CurrentState = NextState;
else
CurrentState = `FPS_AFTER_RESET_STATE;
end
//----------------------------------------
 
always @ (posedge Clock)
begin
case (CurrentState)
//----------------------------------------
`FPS_AFTER_RESET_STATE:
begin
OutputReady = 0;
NextState = ( iInputReady == 1 ) ?
`INITIAL_DIVISION_STATE : `FPS_AFTER_RESET_STATE;
end
//----------------------------------------
`INITIAL_DIVISION_STATE:
begin
Dividend = iDividend;
Dividend = Dividend << `SCALE;
Divisor = iDivisor;
Remainder = 0;
Quotient = 0;
if (Divisor == 0)
begin
Quotient[31:0] = 32'h0FFF_FFFF;
// ExitStatus = `DIVISION_BY_ZERO;
NextState = `WRITE_DIVISION_RESULT;
end
else if (Divisor > Dividend)
begin
Remainder = Dividend;
//ExitStatus = `NORMAL_EXIT;
NextState = `WRITE_DIVISION_RESULT;
end
else if (Divisor == Dividend)
begin
Quotient = 1;
// ExitStatus = `NORMAL_EXIT;
NextState = `WRITE_DIVISION_RESULT;
end
else
begin
NextState = `PRE_CALCULATE_REMAINDER;
end
//num_bits = 32;
num_bits = 64;
end
//----------------------------------------
`PRE_CALCULATE_REMAINDER:
begin
//Bit = (Dividend & 32'h80000000) >> 31;
Bit = (Dividend & 64'h8000000000000000 ) >> 63;
Remainder = (Remainder << 1) | Bit;
d = Dividend;
Dividend = Dividend << 1;
num_bits = num_bits - 1;
// $display("num_bits %d Remainder %d Divisor %d\n",num_bits,Remainder,Divisor);
NextState = (Remainder < Divisor) ?
`PRE_CALCULATE_REMAINDER : `DIVISION_REVERSE_LAST_ITERATION;
end
//----------------------------------------
/*
The loop, above, always goes one iteration too far.
To avoid inserting an "if" statement inside the loop
the last iteration is simply reversed.
*/
`DIVISION_REVERSE_LAST_ITERATION:
begin
Dividend = d;
Remainder = Remainder >> 1;
num_bits = num_bits + 1;
i = 0;
NextState = `CALCULATE_REMAINDER;
end
//----------------------------------------
`CALCULATE_REMAINDER:
begin
//Bit = (Dividend & 32'h80000000) >> 31;
Bit = (Dividend & 64'h8000000000000000 ) >> 63;
Remainder = (Remainder << 1) | Bit;
t = Remainder - Divisor;
//q = !((t & 32'h80000000) >> 31);
q = !((t & 64'h8000000000000000 ) >> 63);
Dividend = Dividend << 1;
Quotient = (Quotient << 1) | q;
if ( q != 0 )
Remainder = t;
i = i + 1;
if (i < num_bits)
NextState = `CALCULATE_REMAINDER;
else
NextState = `WRITE_DIVISION_RESULT;
end
//----------------------------------------
//Will go to the IDLE leaving the Result Registers
//with the current results until next stuff comes
//So, stay in this state until our client sets iInputReady
//to 0 telling us he read the result
`WRITE_DIVISION_RESULT:
begin
xQuotient = Quotient[32:0]; //Simply chop to round
OutputReady = 1;
// $display("Quotient = %h - %b \n", Quotient, Quotient);
 
NextState = (iInputReady == 0) ?
`FPS_AFTER_RESET_STATE : `WRITE_DIVISION_RESULT;
end
endcase
 
end //always
endmodule
//-----------------------------------------------------------------
/rtl/Module_OMemInterface.v
0,0 → 1,47
`timescale 1ns / 1ps
`include "aDefinitions.v"
 
module Module_OMemInterface
(
input wire Clock,
input wire Reset,
input wire iWriteEnable,
input wire [`DATA_ROW_WIDTH-1:0] iData,
input wire [`DATA_ROW_WIDTH-1:0] iAddress,
output wire [`WB_WIDTH-1:0] ADR_O,
output wire[`WB_WIDTH-1:0] DAT_O,
output wire WE_O
);
wire [2:0] wCurrentWord;
assign WE_O = iWriteEnable;
 
CIRCULAR_SHIFTLEFT_POSEDGE #(3) SHL
(
.Clock(Clock),
.Enable(iWriteEnable),
.Reset(Reset),
.Initial(3'b1),
.O(wCurrentWord)
);
 
MUXFULLPARALELL_3SEL_WALKINGONE # ( `WB_WIDTH ) MUX1
(
.Sel( wCurrentWord ),
.I3(iAddress[31:0]),
.I2(iAddress[63:32]),
.I1(iAddress[95:64]),
.O1( ADR_O )
);
MUXFULLPARALELL_3SEL_WALKINGONE # ( `WB_WIDTH ) MUX2
(
.Sel( wCurrentWord ),
.I3(iData[31:0]),
.I2(iData[63:32]),
.I1(iData[95:64]),
.O1( DAT_O )
);
endmodule
/rtl/Module_RAM.v
0,0 → 1,80
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
//--------------------------------------------------------
//Dual port RAM.
 
 
module RAM_DUAL_READ_PORT # ( parameter DATA_WIDTH=`DATA_ROW_WIDTH, parameter ADDR_WIDTH=`DATA_ADDRESS_WIDTH, parameter MEM_SIZE=128 )
(
input wire Clock,
input wire iWriteEnable,
input wire[ADDR_WIDTH-1:0] iReadAddress0,
input wire[ADDR_WIDTH-1:0] iReadAddress1,
input wire[ADDR_WIDTH-1:0] iWriteAddress,
input wire[DATA_WIDTH-1:0] iDataIn,
output reg [DATA_WIDTH-1:0] oDataOut0,
output reg [DATA_WIDTH-1:0] oDataOut1
);
 
reg [DATA_WIDTH-1:0] Ram [MEM_SIZE:0];
 
always @(posedge Clock)
begin
if (iWriteEnable)
Ram[iWriteAddress] <= iDataIn;
oDataOut0 <= Ram[iReadAddress0];
oDataOut1 <= Ram[iReadAddress1];
end
endmodule
//--------------------------------------------------------
 
module RAM_SINGLE_READ_PORT # ( parameter DATA_WIDTH=`DATA_ROW_WIDTH, parameter ADDR_WIDTH=`DATA_ADDRESS_WIDTH, parameter MEM_SIZE=128 )
(
input wire Clock,
input wire iWriteEnable,
input wire[ADDR_WIDTH-1:0] iReadAddress0,
input wire[ADDR_WIDTH-1:0] iWriteAddress,
input wire[DATA_WIDTH-1:0] iDataIn,
output reg [DATA_WIDTH-1:0] oDataOut0
);
 
reg [DATA_WIDTH-1:0] Ram [MEM_SIZE:0];
 
always @(posedge Clock)
begin
if (iWriteEnable)
Ram[iWriteAddress] <= iDataIn;
oDataOut0 <= Ram[iReadAddress0];
end
endmodule
 
 
/rtl/Module_TMemInterface.v
0,0 → 1,109
`timescale 1ns / 1ps
`include "aDefinitions.v"
//--------------------------------------------------------------------------
module Module_TMemInterface
(
input wire Clock,
input wire Reset,
input wire iEnable,
input wire [`DATA_ROW_WIDTH-1:0] iAddress,
output wire [`DATA_ROW_WIDTH-1:0] oData,
output wire oDone,
 
input wire ACK_I,
input wire GNT_I,
input wire [`WB_WIDTH-1:0 ] DAT_I,
 
//WB Output Signals
output wire [`WB_WIDTH-1:0 ] ADR_O,
output wire WE_O,
output wire STB_O,
output wire CYC_O
 
 
);
 
wire [3:0] wCurrentWord;
wire wDone;
assign oDone = wDone & iEnable;
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD_DONE
(
.Clock(Clock),
.Reset(Reset),
.Enable( 1'b1 ),
.D(wCurrentWord[3]),
.Q(wDone)
);
 
 
//wire wShiftNow;
assign WE_O = 1'b0; //we only read
assign CYC_O = iEnable;
 
 
 
wire[2:0] wLatchNow;
FFD_POSEDGE_SYNCRONOUS_RESET # ( 3 ) FFD_LATHCNOW
(
.Clock(Clock),
.Reset(Reset),
.Enable( 1'b1 ),
.D(wCurrentWord[2:0]),
.Q(wLatchNow)
);
 
 
 
SHIFTLEFT_POSEDGE #(4) SHL
(
.Clock(Clock),
.Enable(iEnable & GNT_I),//wShiftNow),
.Reset(Reset | ~iEnable ),
.Initial(4'b1),
.O(wCurrentWord)
);
 
MUXFULLPARALELL_3SEL_WALKINGONE # ( `WB_WIDTH ) MUX1
(
.Sel( wCurrentWord[2:0] ),
.I3(iAddress[31:0]),
.I2(iAddress[63:32]),
.I1(iAddress[95:64]),
.O1( ADR_O )
);
 
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) FFDX
(
.Clock(Clock),
.Reset(Reset),
.Enable( wLatchNow[0] & GNT_I),
.D(DAT_I),
.Q(oData[95:64])
);
 
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) FFDY
(
.Clock(Clock),
.Reset(Reset),
.Enable( wLatchNow[1] & GNT_I),
.D(DAT_I),
.Q(oData[63:32])
);
 
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) FFDZ
(
.Clock(Clock),
.Reset( Reset ),
.Enable( wLatchNow[2] & GNT_I),
.D(DAT_I),
.Q(oData[31:0])
);
 
endmodule
//--------------------------------------------------------------------------
/rtl/Module_BusArbitrer.v
0,0 → 1,80
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
 
 
module Module_BusArbitrer
(
input wire Clock,
input wire Reset,
 
input wire [`MAX_CORES-1:0] iRequest,
output wire [`MAX_CORES-1:0] oGrant,
output wire [`MAX_CORE_BITS-1:0] oBusSelect
);
 
wire[`MAX_CORES-1:0] wCurrentMasterMask;
wire[`MAX_CORE_BITS-1:0] wCurrentBusMaster;
wire wCurrentRequest;
 
//Just one requester can have the bus at a given
//point in time, the mask makes sure this happens
genvar i;
generate
for (i = 0; i < `MAX_CORES; i = i +1)
begin : ARB
assign oGrant[i] = iRequest[i] & wCurrentMasterMask[i];
end
endgenerate
 
 
//When a requester relinquishes the bus (by negating its [iRequest] signal),
//the switch is turned to the next position
//So while iRequest == 1 the ciruclar list will not move
 
CIRCULAR_SHIFTLEFT_POSEDGE_EX # (`MAX_CORES) SHL_A
(
.Clock( Clock ),
.Enable( ~wCurrentRequest ),
.Reset( Reset ),
.Initial(`MAX_CORES'b1),
.O( wCurrentMasterMask )
);
 
assign oBusSelect = wCurrentBusMaster;
 
//Poll the current request
assign wCurrentRequest = iRequest[ wCurrentBusMaster ];
 
 
UPCOUNTER_POSEDGE # (`MAX_CORE_BITS ) UP1
(
.Clock( Clock ),
.Reset( Reset ),
.Initial( `MAX_CORE_BITS'd0 ),
.Enable(~wCurrentRequest),
.Q(wCurrentBusMaster)
);
 
endmodule
/rtl/Theia.v
0,0 → 1,341
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
//---------------------------------------------------------------------------
module THEIA
(
 
input wire CLK_I, //Input clock
input wire RST_I, //Input reset
//Theia Interfaces
input wire MST_I, //Master signal, THEIA enters configuration mode
//when this gets asserted (see documentation)
//Wish Bone Interface
input wire [`WB_WIDTH-1:0] DAT_I, //Input data bus (Wishbone)
input wire ACK_I, //Input ack
output wire ACK_O, //Output ack
input wire [`WB_WIDTH-1:0] ADR_I, //Input address
input wire WE_I, //Input write enable
input wire STB_I, //Strobe signal, see wishbone documentation
input wire CYC_I, //Bus cycle signal, see wishbone documentation
input wire [1:0] TGA_I, //Input address tag, see THEAI documentation
input wire [`MAX_CORES-1:0] SEL_I, //The WishBone Master uses this signal to configure a specific core (TBD, not sure is needed)
input wire [`MAX_CORES-1:0] RENDREN_I,
 
input wire [`MAX_CORE_BITS-1:0] OMBSEL_I, //Output memory bank select
input wire [`WB_WIDTH-1:0] OMADR_I, //Output adress (relative to current bank)
output wire [`WB_WIDTH-1:0] OMEM_O, //Output data bus (Wishbone)
 
input wire [`WB_WIDTH-1:0] TMDAT_I,
input wire [`WB_WIDTH-1:0] TMADR_I,
input wire TMWE_I,
input wire [`MAX_TMEM_BANKS-1:0] TMSEL_I,
//Control Register
input wire [15:0] CREG_I,
output wire HDL_O,
input wire STDONE_I,
input wire HDA_I,
input wire HDLACK_I,
output wire RCOMMIT_O,
output wire DONE_O
 
);
 
 
 
 
wire [`MAX_TMEM_BANKS-1:0] wTMemWriteEnable;
SELECT_1_TO_N # ( `MAX_TMEM_BANKS, `MAX_TMEM_BANKS ) TMWE_SEL
(
.Sel(TMSEL_I),
.En(TMWE_I),
.O(wTMemWriteEnable)
);
 
 
wire [`MAX_CORES-1:0] wDone;
wire [`MAX_CORES-1:0] wBusGranted,wBusRequest;
//wire [`WB_WIDTH-1:0] wDAT_O[`MAX_CORES-1:0];
//wire [`WB_WIDTH-1:0] wADR_O[`MAX_CORES-1:0];
//wire [1:0] wTGA_O[`MAX_CORES-1:0];
wire [`MAX_CORE_BITS-1:0] wBusSelect;
 
 
//wire [`MAX_CORES-1:0] wSTB_O;
//wire [`MAX_CORES-1:0] wWE_O;
wire [`MAX_CORES-1:0]wACK_O;
 
 
wire wOMem_WE[`MAX_CORES-1:0];
wire [`WB_WIDTH-1:0] wOMEM_Address[`MAX_CORES-1:0];
wire [`WB_WIDTH-1:0] wOMEM_Dat[`MAX_CORES-1:0];
 
wire [`MAX_CORES-1:0] wSTB_I;
wire [`MAX_CORES-1:0] wMST_I;
wire [`MAX_CORES-1:0] wACK_I;
wire [`MAX_CORES-1:0] wCYC_I;
wire [1:0] wTGA_I[`MAX_CORES-1:0];
 
 
 
wire [`WB_WIDTH-1:0] wTMEM_Data;
wire [`WB_WIDTH-1:0] wTMEM_Address[`MAX_CORES-1:0];
wire [`WB_WIDTH-1:0] wTMEM_ReadAddr;
wire [`MAX_CORES-1:0] wTMEM_Resquest;
wire [`MAX_CORES-1:0] wTMEM_Granted;
 
 
 
//CROSS-BAR cables
 
 
 
wire [`WB_WIDTH-1:0] wCrossBarDataRow[`MAX_TMEM_BANKS-1:0]; //Horizontal grid Buses comming from each bank
wire [`WB_WIDTH-1:0] wCrossBarDataCollumn[`MAX_CORES-1:0]; //Vertical grid buses comming from each core.
wire [`WB_WIDTH-1:0] wTMemReadAdr[`MAX_CORES-1:0]; //Horizontal grid Buses comming from each core (virtual addr).
wire [`WB_WIDTH-1:0] wCrossBarAdressCollumn[`MAX_CORES-1:0]; //Vertical grid buses comming from each core. (physical addr).
wire [`WB_WIDTH-1:0] wCrossBarAddressRow[`MAX_TMEM_BANKS-1:0]; //Horizontal grid Buses comming from each bank.
 
wire wCORE_2_TMEM__Req[`MAX_CORES-1:0];
wire [`MAX_TMEM_BANKS -1:0] wBankReadRequest[`MAX_CORES-1:0];
 
 
wire [`MAX_CORES-1:0] wBankReadGranted[`MAX_TMEM_BANKS-1:0];
wire wTMEM_2_Core__Grant[`MAX_CORES-1:0];
 
wire[`MAX_CORE_BITS-1:0] wCurrentCoreSelected[`MAX_TMEM_BANKS-1:0];
wire[`WIDTH-1:0] wCoreBankSelect[`MAX_CORES-1:0];
wire [`MAX_CORES-1:0] wHDL_O;
 
 
wire [`MAX_CORES-1:0] wHostDataLatched;
wire [`MAX_CORES-1:0] wRCOMMIT_O;
wire [`MAX_CORES-1:0] wRCommited;
 
 
assign RCOMMIT_O = wRCommited[0] & wRCommited[1] & wRCommited[2] & wRCommited[3];
assign HDL_O = wHostDataLatched[0] & wHostDataLatched[1] & wHostDataLatched[2] & wHostDataLatched[3];
assign DONE_O = wDone[0] & wDone[1] & wDone[2] & wDone[3];
 
 
 
//----------------------------------------------------------------
 
Module_BusArbitrer ARB1
(
.Clock( CLK_I ),
.Reset( RST_I ),
.iRequest( wBusRequest ),
.oGrant( wBusGranted ),
.oBusSelect( wBusSelect )
);
//----------------------------------------------------------------
 
wire wMaskedACK_O;
assign wMaskedACK_O = (SEL_I & wACK_O) ? 1'b1 : 1'b0;
assign ACK_O = ( MST_I ) ? wMaskedACK_O : wACK_O[ wBusSelect];
 
wire [`WB_WIDTH-1:0] wDataOut[`MAX_CORES-1:0];
assign OMEM_O = wDataOut[ OMBSEL_I ];
genvar i;
generate
for (i = 0; i < `MAX_CORES; i = i +1)
begin : CORE
assign wMST_I[i] = (SEL_I[i]) ? MST_I : 0;
assign wSTB_I[i] = (SEL_I[i]) ? STB_I : 0;
assign wCYC_I[i] = (SEL_I[i]) ? CYC_I : 0;
assign wTGA_I[i] = (SEL_I[i]) ? TGA_I : 0;
 
THEIACORE CTHEIA
(
.CLK_I( CLK_I ),
.RST_I( RST_I ),
.RENDREN_I( RENDREN_I[i] ),
//Slave signals
.ADR_I( ADR_I ),
.WE_I( WE_I ),
.STB_I( wSTB_I[i] ),
.ACK_I( ACK_I ),
.CYC_I( wCYC_I[i] ),
.MST_I( wMST_I[i] ),
.TGA_I( wTGA_I[i] ),
.CREG_I( CREG_I ),
//Master Signals
.ACK_O( wACK_O[i] ),
.CYC_O( wBusRequest[i] ),
.GNT_I( wBusGranted[i] ),
`ifdef DEBUG
.iDebug_CoreID( i ),
`endif
.OMEM_WE_O( wOMem_WE[i] ),
.OMEM_ADR_O( wOMEM_Address[i] ),
.OMEM_DAT_O( wOMEM_Dat[i] ),
.TMEM_DAT_I( wCrossBarDataCollumn[i] ),
.TMEM_ADR_O( wTMemReadAdr[i] ),
.TMEM_CYC_O( wCORE_2_TMEM__Req[i] ),
.TMEM_GNT_I( wTMEM_2_Core__Grant[i] ),
.HDA_I( HDA_I ), //Host data available
.HDL_O( wHDL_O[i] ), //Host data Latched
.HDLACK_I( ~HDL_O ), //Host data Latched ACK
.STDONE_I( STDONE_I ),
.RCOMMIT_O( wRCOMMIT_O[i] ),
//Other
.DAT_I( DAT_I ),
.DONE_O( wDone[i] )
 
);
UPCOUNTER_POSEDGE # (1) UP_RCOMMIT
(
.Clock( CLK_I ),
.Reset( RST_I | HDLACK_I ),
.Initial( 1'b0 ),
.Enable( wRCOMMIT_O[i] ),
.Q(wRCommited[i])
);
UPCOUNTER_POSEDGE # (1) UP_GREADY
(
.Clock( CLK_I ),
.Reset( RST_I | HDLACK_I ),
.Initial( 1'b0 ),
.Enable( wHDL_O[i] ),
.Q(wHostDataLatched[i])
);
 
RAM_SINGLE_READ_PORT # ( `WB_WIDTH, `WB_WIDTH, 250000 ) OMEM //500000 ) OMEM
(
.Clock( CLK_I ),
.iWriteEnable( wOMem_WE[i] ),
.iWriteAddress( wOMEM_Address[i] ),
.iDataIn( wOMEM_Dat[i] ),
.iReadAddress0( OMADR_I ),
.oDataOut0( wDataOut[i] )
);
 
 
//If there are "n" banks, memory location "X" would reside in bank number X mod n.
//X mod 2^n == X & (2^n - 1)
assign wCoreBankSelect[i] = (wTMemReadAdr[i] & (`MAX_TMEM_BANKS-1));
 
//Each core has 1 bank request slot
//Each slot has MAX_TMEM_BANKS bits. Only 1 bit can
//be 1 at any given point in time. All bits zero means,
//we are not requesting to read from any memory bank.
SELECT_1_TO_N # ( `WIDTH, `MAX_CORES ) READDRQ
(
.Sel(wCoreBankSelect[ i]),
.En(wCORE_2_TMEM__Req[i]),
.O(wBankReadRequest[i])
);
 
//The address coming from the core is virtual adress, meaning it assumes linear
//address space, however, since memory is interleaved in a n-way memory we transform
//virtual adress into physical adress (relative to the bank) like this
//fadr = vadr / n = vadr >> log2(n)
 
assign wCrossBarAdressCollumn[i] = (wTMemReadAdr[i] >> `MAX_CORE_BITS);
 
//Connect the granted signal to Arbiter of the Bank we want to read from
assign wTMEM_2_Core__Grant[i] = wBankReadGranted[wCoreBankSelect[i]][i];
 
//Connect the request signal to Arbiter of the Bank we want to read from
//assign wBankReadRequest[wCoreBankSelect[i]][i] = wCORE_2_TMEM__Req[i];
 
end
endgenerate
////////////// CROSS-BAR INTERCONECTION//////////////////////////
 
genvar Core,Bank;
generate
for (Bank = 0; Bank < `MAX_TMEM_BANKS; Bank = Bank + 1)
begin : BANK
 
//The memory bank itself
RAM_SINGLE_READ_PORT # ( `WB_WIDTH, `WB_WIDTH, 50000 ) TMEM
(
.Clock( CLK_I ),
.iWriteEnable( wTMemWriteEnable[Bank] ),
.iWriteAddress( TMADR_I ),
.iDataIn( TMDAT_I ),
.iReadAddress0( wCrossBarAddressRow[Bank] ), //Connect to the Row of the grid
.oDataOut0( wCrossBarDataRow[Bank] ) //Connect to the Row of the grid
);
//Arbiter will Round-Robin Cores attempting to read from the same Bank
//at a given point in time
wire [`MAX_CORES-1:0] wBankReadGrantedDelay[`MAX_TMEM_BANKS-1:0];
Module_BusArbitrer ARB_TMEM
(
.Clock( CLK_I ),
.Reset( RST_I ),
.iRequest( {wBankReadRequest[3][Bank],wBankReadRequest[2][Bank],wBankReadRequest[1][Bank],wBankReadRequest[0][Bank]}),
.oGrant( wBankReadGrantedDelay[Bank] ), //The bit of the core granted to read from this Bank
.oBusSelect( wCurrentCoreSelected[Bank] ) //The index of the core granted to read from this Bank
);
FFD_POSEDGE_SYNCRONOUS_RESET # ( `MAX_CORES ) FFD_GNT
(
.Clock(CLK_I),
.Reset(RST_I),
.Enable( 1'b1 ),
.D(wBankReadGrantedDelay[Bank]),
.Q(wBankReadGranted[Bank])
);
 
//Create the Cross-Bar interconnection grid now, rows are coonected to the memory banks,
//while collumns are connected to the cores, 2 or more cores can not read from the same
//bank at any given point in time
for (Core = 0; Core < `MAX_CORES; Core = Core + 1)
begin: CORE_CONNECT
//Connect the Data Collum of this core to the Data Row of current bank, only if the Core is looking for data stored in this bank
assign wCrossBarDataCollumn[ Core ] = ( wCoreBankSelect[ Core ] == Bank ) ? wCrossBarDataRow[ Bank ] : `WB_WIDTH'bz;
//Connect the Address Row of this Bank to the Address Column of the core, only if the Arbiter selected this core for reading
assign wCrossBarAddressRow[ Bank ] = ( wCurrentCoreSelected[ Bank ] == Core ) ? wCrossBarAdressCollumn[Core]: `WB_WIDTH'bz;
end
end
endgenerate
 
////////////// CROSS-BAR INTERCONECTION//////////////////////////
//----------------------------------------------------------------
 
endmodule
//---------------------------------------------------------------------------
/rtl/Module_ControlRegister.v
0,0 → 1,28
`timescale 1ns / 1ps
`include "aDefinitions.v"
 
//-------------------------------------------------------------------
module ControlRegister
(
input wire Clock,
input wire Reset,
input wire[15:0] iControlRegister,
output wire[15:0] oControlRegister
);
 
reg [15:0] rControlRegister;
 
assign oControlRegister = rControlRegister;
 
always @ (posedge Clock)
begin
if ( Reset )
rControlRegister <= 16'b0;
else
begin
rControlRegister <= iControlRegister;
end
end
 
endmodule
//-------------------------------------------------------------------
/rtl/Module_InstructionDecode.v
0,0 → 1,156
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
module InstructionDecode
(
input wire Clock,
input wire Reset,
input wire iInstructionAvailable,
input wire[`INSTRUCTION_WIDTH-1:0] iEncodedInstruction,
input wire[`DATA_ROW_WIDTH-1:0] iRamValue0,
input wire[`DATA_ROW_WIDTH-1:0] iRamValue1,
output wire[`DATA_ADDRESS_WIDTH-1:0] oRamAddress0,oRamAddress1,
output wire[`INSTRUCTION_OP_LENGTH-1:0] oOperation,
output wire [`DATA_ROW_WIDTH-1:0] oSource0,oSource1,
output wire [`DATA_ADDRESS_WIDTH-1:0] oDestination,
input wire [`DATA_ROW_WIDTH-1:0] iDataForward,
input wire [`DATA_ADDRESS_WIDTH-1:0] iLastDestination,
 
`ifdef DEBUG
input wire [`ROM_ADDRESS_WIDTH-1:0] iDebug_CurrentIP,
output wire [`ROM_ADDRESS_WIDTH-1:0] oDebug_CurrentIP,
`endif
 
//input wire [`ROM_ADDRESS_WIDTH-1:0] iIP,
//output reg [`ROM_ADDRESS_WIDTH-1:0] oReturnAddress,
output wire oDataReadyForExe
 
);
wire wInmediateOperand;
wire [`DATA_ROW_WIDTH-1:0] wSource0,wSource1;
wire wTriggerSource0DataForward,wTriggerSource1DataForward;
wire wSource0AddrssEqualsLastDestination,wSource1AddrssEqualsLastDestination;
 
`ifdef DEBUG
assign oDebug_CurrentIP = iDebug_CurrentIP;
`endif
//See if operation takes scalar argument
assign wInmediateOperand = iEncodedInstruction[`INSTRUCTION_IMM_BITPOS];
 
//Has the value of the first argument fetched from IMEM
assign wSource0 = iRamValue0;
//Has the value of the second argument fetched from IMEM, or the value of the
//destinatin register in case of scalar operation
assign wSource1 = ( wInmediateOperand ) ? {oRamAddress1,iEncodedInstruction[15:0] ,32'b0,32'b0} : iRamValue1; //{oRamAddress1,oRamAddress0,32'b0,32'b0} : iRamValue1;
 
//Data forwarding logic
assign wSource0AddrssEqualsLastDestination = (oRamAddress0 == iLastDestination) ? 1'b1: 1'b0;
assign wSource1AddrssEqualsLastDestination = (oRamAddress1 == iLastDestination) ? 1'b1: 1'b0;
assign wTriggerSource0DataForward = wSource0AddrssEqualsLastDestination;
assign wTriggerSource1DataForward = wSource1AddrssEqualsLastDestination && !wInmediateOperand;
 
//The data address to fetch from IMEM
assign oRamAddress1 = iEncodedInstruction[31:16];
 
//If operation takes a scalar value, then ask IMEM
//for the previous value of the destination ([47:32])
//and have this value ready at oRamAddress0
MUXFULLPARALELL_16bits_2SEL RAMAddr0MUX
(
.Sel( wInmediateOperand ),
.I1( iEncodedInstruction[15:0] ),
.I2( iEncodedInstruction[47:32] ),
.O1( oRamAddress0 )
);
 
 
//One clock cycle after the new instruction becomes
//available to IDU, it should be decoded and ready
//for execution
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD1
(
.Clock( Clock ),
.Reset( Reset ),
.Enable(1'b1),
.D( iInstructionAvailable ),
.Q( oDataReadyForExe )
);
 
/*
wire IsCall;
assign IsCall = ( oOperation == `CALL ) ? 1'b1 : 1'b0;
always @ (posedge IsCall)
oReturnAddress <= iIP;
*/
/*
FFD_POSEDGE_SYNCRONOUS_RESET # ( `ROM_ADDRESS_WIDTH ) FFRETURNADDR
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( IsCall ),
.D( iIP ),
.Q( oReturnAddress )
);
*/
 
 
//Latch the Operation
FFD_POSEDGE_SYNCRONOUS_RESET # ( `INSTRUCTION_OP_LENGTH ) FFD3
(
.Clock(Clock),
.Reset(Reset),
.Enable(iInstructionAvailable),
.D(iEncodedInstruction[`INSTRUCTION_WIDTH-1:`INSTRUCTION_WIDTH-`INSTRUCTION_OP_LENGTH]),
.Q( oOperation )
);
//Latch the Destination
FFD_POSEDGE_SYNCRONOUS_RESET # ( `DATA_ADDRESS_WIDTH ) FFD2
(
.Clock(Clock),
.Reset(Reset),
.Enable(iInstructionAvailable),
.D(iEncodedInstruction[47:32]),
.Q(oDestination )
);
 
 
//Once we made a decicions if the Sources must be forwarded or not, a series of muxes
//are used to routed the correct data into the decoded Source outputs
 
MUXFULLPARALELL_96bits_2SEL Source0_Mux
(
.Sel( wTriggerSource0DataForward ),
.I1( wSource0 ),
.I2( iDataForward ),
.O1( oSource0 )
);
 
MUXFULLPARALELL_96bits_2SEL Source1_Mux
(
.Sel( wTriggerSource1DataForward ),
.I1( wSource1 ),
.I2( iDataForward ),
.O1( oSource1 )
);
 
endmodule
 
/rtl/Module_WishBoneMaster.v
0,0 → 1,147
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
/*
In order to read the geometry, we will behave as a master.
Performing single Reads Bus cycles should be sufficient.
Choosing 32 bit for bus width for simplicity.
*/
 
module WishBoneMasterUnit
(
//WB Input signals
input wire CLK_I,
input wire RST_I,
input wire ACK_I,
input wire GNT_I, //granted signal from bus arbiter
input wire [`WB_WIDTH-1:0 ] DAT_I,
output wire [`WB_WIDTH-1:0] DAT_O,
 
 
//WB Output Signals
output wire [`WB_WIDTH-1:0 ] ADR_O,
output wire WE_O,
output wire STB_O,
output wire CYC_O,
output wire [1:0] TGC_O,
 
//Signals from inside the GPU
input wire iEnable,
input wire iBusCyc_Type,
input wire [`WIDTH-1:0 ] iAddress,
input wire iAddress_Set,
output wire oDataReady,
input wire [`WIDTH-1:0 ] iData,
output wire [`WIDTH-1:0 ] oData
 
);
wire wReadOperation;
wire wEnable;
assign wEnable = iEnable & GNT_I;
//If CYC_O is 1, it means we are requesting bus ownership
assign CYC_O = iEnable;
 
assign wReadOperation = (iBusCyc_Type == `WB_SIMPLE_READ_CYCLE) ? 1 : 0;
assign WE_O = (iBusCyc_Type == `WB_SIMPLE_WRITE_CYCLE && wEnable) ? 1 : 0;
 
 
wire wEnable_Delayed;
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD88
(
.Clock(CLK_I),
.Reset(RST_I),
.Enable(1'b1 ),
.D(wEnable),
.Q(wEnable_Delayed)
);
 
 
 
//We only start Strobbing 1 cycle after iEnable and only
//if iEnable is 1 and if GNT_I is 1 (meaning we own the bus)
assign STB_O = wEnable_Delayed & ~ACK_I & wEnable;
 
 
assign DAT_O = (wReadOperation | ~wEnable ) ? `WB_WIDTH'bz : iData;
 
wire [`WB_WIDTH-1:0 ] wReadADR_O,wWriteADR_O;
assign ADR_O = ( wReadOperation ) ? wReadADR_O : wWriteADR_O;
 
//The ADR_O, it increments with each ACK_I, and it resets
//to the value iAddress everytime iAddress_Set is 1.
UPCOUNTER_POSEDGE # (`WIDTH) WBM_O_READ_ADDRESS
(
.Clock(CLK_I),
.Reset( iAddress_Set ),
.Enable((ACK_I & GNT_I) | iAddress_Set),
.Initial(iAddress),
.Q(wReadADR_O)
);
wire wDelayWE;
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD3
(
.Clock(CLK_I),
.Reset(RST_I),
.Enable(1'b1),
.D(WE_O),
.Q(wDelayWE)
);
 
UPCOUNTER_POSEDGE # (`WIDTH) WBM_O_WRITE_ADDRESS
(
.Clock(CLK_I),
.Reset( iAddress_Set ),//RST_I ),
.Enable( (wDelayWE & ACK_I ) | iAddress_Set),
.Initial(iAddress),//`WIDTH'b0),
.Q(wWriteADR_O)
);
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) FFD1
(
.Clock(ACK_I),
.Reset(~wEnable),
.Enable(wReadOperation ),
.D(DAT_I),
.Q(oData)
);
 
wire wDelayDataReady;
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD2
(
.Clock(CLK_I),
.Reset(~wEnable),
.Enable(wReadOperation),
.D(ACK_I),
.Q(wDelayDataReady)
);
/*
always @ (posedge wDelayDataReady)
begin
$display("WBM Got data: %h ",oData);
$display("oDataReady = %d",oDataReady );
end
*/
 
assign oDataReady = wDelayDataReady & wEnable;
 
endmodule
 
/rtl/Module_ROM.v
0,0 → 1,703
 
 
`define ONE (32'h1 << `SCALE)
 
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
/*
I can't synthesize roms, the rom needs to be adapted depending on the
final target silicon.
*/
 
 
//--------------------------------------------------------
module ROM
(
input wire[`ROM_ADDRESS_WIDTH-1:0] Address,
`ifdef DEBUG
input wire [`MAX_CORES-1:0] iDebug_CoreID,
`endif
output reg [`INSTRUCTION_WIDTH-1:0] I
);
 
 
always @( Address )
begin
case (Address)
//Hardcoded stuff :(
`define RAY_INSIDE_BOX `R3
`define CURRENT_LIGHT_POS `CREG_FIRST_LIGTH //TODO: CAHNEG T
`define CURRENT_LIGHT_DIFFUSE 16'h6
 
//-----------------------------------------------------------------
`define TAG_PIXELSHADER 16'd278
`define TAG_USERCONSTANTS 16'd276
`define TAG_PSU_UCODE_ADRESS2 16'd248
`define TAG_PSU_UCODE_ADRESS 16'd232
`define LABEL_TCC_EXIT 16'd231
`define TAG_TCC_UCODE_ADDRESS 16'd190
`define LABEL_BIU4 16'd189
`define LABEL_BIU3 16'd179
`define LABEL_BIU2 16'd176
`define LABEL_BIU1 16'd174
`define TAG_BIU_UCODE_ADDRESS 16'd157
`define LABEL_HIT 16'd155
`define LABEL15 16'd153
`define LABEL14 16'd151
`define LABEL13 16'd149
`define LABEL_TEST_XY_PLANE 16'd144
`define LABEL12 16'd142
`define LABEL11 16'd140
`define LABEL10 16'd138
`define LABEL_TEST_XZ_PLANE 16'd132
`define LABEL9 16'd130
`define LABEL8 16'd128
`define LABEL7 16'd126
`define LABEL_TEST_YZ_PLANE 16'd120
`define LABEL_RAY_INSIDE_BOX 16'd117
`define LABEL_ELSEZ 16'd116
`define LABEL6 16'd113
`define LABEL_ELESE_IFZ 16'd109
`define LABEL5 16'd106
`define LABEL_TEST_RAY_Z_ORIGEN 16'd102
`define LABEL_ELSEY 16'd101
`define LABEL4 16'd98
`define LABEL_ELESE_IFY 16'd94
`define LABEL3 16'd91
`define LABEL_TEST_RAY_Y_ORIGEN 16'd87
`define LABEL_ELSEX 16'd86
`define LABEL2 16'd83
`define LABEL_ELSE_IFX 16'd79
`define LABEL1 16'd76
`define LABEL_TEST_RAY_X_ORIGEN 16'd72
`define TAG_AABBIU_UCODE_ADDRESS 16'd69
`define LABEL_ALLDONE 16'd67
`define LABEL_NPG_NEXT_ROW 16'd63
`define TAG_NPG_UCODE_ADDRESS 16'd55
`define TAG_RGU_UCODE_ADDRESS 16'd47
`define TAG_CPPU_UCODE_ADDRESS 16'd44
`define LABEL_IS_NO_HIT 16'd43
`define LABEL_IS_HIT 16'd39
`define TAG_ADRR_MAIN 16'd37
 
 
//-------------------------------------------------------------------------
//Default values for some registers after reset
//-------------------------------------------------------------------------
//This is the first code that gets executed after the machine is
//externally configured ie after the MST_I goes from 1 to zero.
//It sets initial values for some of the internal registers
 
0: I = { `ZERO ,`CREG_LAST_t ,`VOID ,`VOID };
//Set the last 't' to very positive value(500)
1: I = { `SETX ,`CREG_LAST_t ,32'h1F40000 };
2: I = { `ZERO ,`OREG_PIXEL_COLOR ,`VOID ,`VOID };
3: I = { `COPY ,`CREG_PIXEL_2D_POSITION ,`CREG_PIXEL_2D_INITIAL_POSITION ,`VOID };
 
 
//Calculate the initial linear address for ADR_O
//this is: (X_initial + RESOLUTION_Y*Y_intial) * 3.
//Notice that we need to use 'unscaled' ie. integer
//values because the resuts of the multiplication by
//the resoluction is to large to fit a fixed point
//representation.
 
4: I = { `COPY ,`R1 ,`CREG_RESOLUTION ,`VOID };
5: I = { `UNSCALE ,`R1 ,`R1 ,`VOID };
6: I = { `SETX ,`R1 ,32'h1 };
7: I = { `SETZ ,`R1 ,32'h0 };
8: I = { `COPY ,`R2 ,`CREG_PIXEL_2D_INITIAL_POSITION ,`VOID };
9: I = { `UNSCALE ,`R2 ,`R2 ,`VOID };
 
//Ok lets start by calculating RESOLUTION_Y*Y_intial
10: I = { `IMUL ,`R1 ,`R1 ,`R2 };
11: I = { `COPY ,`R2 ,`R1 ,`VOID };
12: I = { `SWIZZLE3D ,`R2 ,`SWIZZLE_YYY };
 
//now X_initial + RESOLUTION_Y*Y_intial
13: I = { `ADD ,`R3 ,`R1 ,`R2 };
14: I = { `COPY ,`R2 ,`R1 ,`VOID };
15: I = { `SWIZZLE3D ,`R2 ,`SWIZZLE_ZZZ };
16: I = { `ADD ,`R3 ,`R3 ,`R2 };
17: I = { `SWIZZLE3D ,`R3 ,`SWIZZLE_XXX };
 
//finally multiply by 3 to get:
//(X_initial + RESOLUTION_Y*Y_intial) * 3 voila!
18: I = { `SETX ,`R2 ,32'h3 };
19: I = { `SWIZZLE3D ,`R2 ,`SWIZZLE_XXX };
20: I = { `IMUL ,`CREG_PIXEL_PITCH ,`R3 ,`R2 };
 
//By this point you should be wondering why not
//just do DOT R1 [1 Resolution_Y 0] [X_intial Y_intial 0 ]?
//well because DOT uses fixed point and the result may not
//fit :(
 
//Transform from fixed point to integer
//UNSCALE CREG_PIXEL_PITCH CREG_PIXEL_PITCH VOID
21: I = { `COPY ,`OREG_ADDR_O ,`CREG_PIXEL_PITCH ,`VOID };
 
22: I = { `SETX ,`CREG_3 ,32'h3 };
23: I = { `SWIZZLE3D ,`CREG_3 ,`SWIZZLE_XXX };
 
24: I = { `SETX ,`CREG_012 ,32'h0 };
25: I = { `SETY ,`CREG_012 ,32'h1 };
26: I = { `SETZ ,`CREG_012 ,32'h2 };
27: I = { `COPY ,`CREG_CURRENT_OUTPUT_PIXEL ,`CREG_012 ,`VOID };
28: I = { `ZERO ,`CREG_TEXTURE_COLOR ,`VOID ,`VOID };
29: I = { `ZERO ,`CREG_ZERO ,`VOID ,`VOID };
 
30: I = { `ZERO ,`R1 ,`VOID ,`VOID };
31: I = { `ZERO ,`R2 ,`VOID ,`VOID };
32: I = { `ZERO ,`R3 ,`VOID ,`VOID };
33: I = { `ZERO ,`R4 ,`VOID ,`VOID };
34: I = { `ZERO ,`R5 ,`VOID ,`VOID };
35: I = { `ZERO ,`R99 ,`VOID ,`VOID };
36: I = { `RETURN ,`RT_TRUE };
 
//----------------------------------------------
//TAG_ADRR_MAIN:
 
37: I = { `CALL ,`ENTRYPOINT_ADRR_BIU ,`VOID ,`VOID };
38: I = { `JEQX ,`LABEL_IS_NO_HIT ,`R99 ,`CREG_ZERO };
 
//LABEL_IS_HIT:
39: I = { `CALL ,`ENTRYPOINT_ADRR_TCC ,`VOID ,`VOID };
40: I = { `NOP ,`RT_FALSE };
41: I = { `RETURN ,`RT_TRUE };
42: I = { `NOP ,`RT_FALSE };
 
//LABEL_IS_NO_HIT:
43: I = { `RETURN ,`RT_FALSE };
 
 
//----------------------------------------------------------------------
//Micro code for CPPU
//TAG_CPPU_UCODE_ADDRESS:
 
 
44: I = { `SUB ,`R1 ,`CREG_PROJECTION_WINDOW_MAX ,`CREG_PROJECTION_WINDOW_MIN };
45: I = { `DIV ,`CREG_PROJECTION_WINDOW_SCALE ,`R1 ,`CREG_RESOLUTION };
46: I = { `RETURN ,`RT_FALSE };
 
//----------------------------------------------------------------------
//Micro code for RGU
//TAG_RGU_UCODE_ADDRESS:
 
 
47: I = { `MUL ,`R1 ,`CREG_PIXEL_2D_POSITION ,`CREG_PROJECTION_WINDOW_SCALE };
48: I = { `ADD ,`R1 ,`R1 ,`CREG_PROJECTION_WINDOW_MIN };
49: I = { `SUB ,`CREG_UNORMALIZED_DIRECTION ,`R1 ,`CREG_CAMERA_POSITION };
50: I = { `MAG ,`R2 ,`CREG_UNORMALIZED_DIRECTION ,`VOID };
51: I = { `DIV ,`CREG_RAY_DIRECTION ,`CREG_UNORMALIZED_DIRECTION ,`R2 };
52: I = { `DEC ,`CREG_LAST_COL ,`CREG_PIXEL_2D_FINAL_POSITION ,`VOID };
53: I = { `SETX ,`CREG_LAST_t ,32'h1F40000 };
54: I = { `RETURN ,`RT_FALSE };
//----------------------------------------------------------------------
//Next Pixel generation Code (NPG)
//TAG_NPG_UCODE_ADDRESS:
 
55: I = { `ZERO ,`CREG_TEXTURE_COLOR ,`VOID ,`VOID };
56: I = { `SETX ,`CREG_TEXTURE_COLOR ,32'h60000 };
57: I = { `ADD ,`CREG_CURRENT_OUTPUT_PIXEL ,`CREG_CURRENT_OUTPUT_PIXEL ,`CREG_3 };
 
58: I = { `ADD ,`CREG_PIXEL_PITCH ,`CREG_PIXEL_PITCH ,`CREG_3 };
59: I = { `COPY ,`OREG_ADDR_O ,`CREG_PIXEL_PITCH ,`VOID };
60: I = { `JGEX ,`LABEL_NPG_NEXT_ROW ,`CREG_PIXEL_2D_POSITION ,`CREG_LAST_COL };
61: I = { `INCX ,`CREG_PIXEL_2D_POSITION ,`CREG_PIXEL_2D_POSITION ,`VOID };
62: I = { `RETURN ,`RT_TRUE };
 
//LABEL_NPG_NEXT_ROW:
63: I = { `SETX ,`CREG_PIXEL_2D_POSITION ,32'h0 };
64: I = { `INCY ,`CREG_PIXEL_2D_POSITION ,`CREG_PIXEL_2D_POSITION ,`VOID };
65: I = { `JGEY ,`LABEL_ALLDONE ,`CREG_PIXEL_2D_POSITION ,`CREG_PIXEL_2D_FINAL_POSITION };
66: I = { `RETURN ,`RT_TRUE };
 
//LABEL_ALLDONE:
67: I = { `NOP ,`VOID ,`VOID };
68: I = { `RETURN ,`RT_FALSE };
 
//----------------------------------------------------------------------
//Micro code for AABBIU
//TAG_AABBIU_UCODE_ADDRESS:
69: I = { `ZERO ,`R3 ,`VOID ,`VOID };
70: I = { `SETX ,`CREG_LAST_t ,32'h1F40000 };
71: I = { `RETURN ,`RT_TRUE };
 
//LABEL_TEST_RAY_X_ORIGEN:
72: I = { `JGEX ,`LABEL_ELSE_IFX ,`CREG_CAMERA_POSITION ,`CREG_AABBMIN };
73: I = { `SUB ,`R1 ,`CREG_AABBMIN ,`CREG_CAMERA_POSITION };
74: I = { `JLEX ,`LABEL1 ,`R1 ,`CREG_UNORMALIZED_DIRECTION };
75: I = { `RETURN ,`RT_FALSE };
 
//LABEL1:
76: I = { `SETX ,`RAY_INSIDE_BOX ,32'd0 };
77: I = { `DIV ,`R6 ,`R1 ,`CREG_UNORMALIZED_DIRECTION };
78: I = { `JMP ,`LABEL_TEST_RAY_Y_ORIGEN ,`VOID ,`VOID };
 
//LABEL_ELSE_IFX:
79: I = { `JLEX ,`LABEL_ELSEX ,`CREG_CAMERA_POSITION ,`CREG_AABBMAX };
80: I = { `SUB ,`R1 ,`CREG_AABBMAX ,`CREG_CAMERA_POSITION };
81: I = { `JGEX ,`LABEL2 ,`R1 ,`CREG_UNORMALIZED_DIRECTION };
82: I = { `RETURN ,`RT_FALSE };
//LABEL2:
83: I = { `SETX ,`RAY_INSIDE_BOX ,32'd0 };
84: I = { `DIV ,`R6 ,`R1 ,`CREG_UNORMALIZED_DIRECTION };
85: I = { `JMP ,`LABEL_TEST_RAY_Y_ORIGEN ,`VOID ,`VOID };
//LABEL_ELSEX:
86: I = { `SETX ,`R5 ,32'b1 };
 
//LABEL_TEST_RAY_Y_ORIGEN:
87: I = { `JGEY ,`LABEL_ELESE_IFY ,`CREG_CAMERA_POSITION ,`CREG_AABBMIN };
88: I = { `SUB ,`R1 ,`CREG_AABBMIN ,`CREG_CAMERA_POSITION };
89: I = { `JLEY ,`LABEL3 ,`R1 ,`CREG_UNORMALIZED_DIRECTION };
90: I = { `RETURN ,`RT_FALSE };
 
//LABEL3:
91: I = { `SETX ,`RAY_INSIDE_BOX ,32'd0 };
92: I = { `DIV ,`R6 ,`R1 ,`CREG_UNORMALIZED_DIRECTION };
93: I = { `JMP ,`LABEL_TEST_RAY_Z_ORIGEN ,`VOID ,`VOID };
 
//LABEL_ELESE_IFY:
94: I = { `JLEY ,`LABEL_ELSEY ,`CREG_CAMERA_POSITION ,`CREG_AABBMAX };
95: I = { `SUB ,`R1 ,`CREG_AABBMAX ,`CREG_CAMERA_POSITION };
96: I = { `JGEY ,`LABEL4 ,`R1 ,`CREG_UNORMALIZED_DIRECTION };
97: I = { `RETURN ,`RT_FALSE };
 
//LABEL4:
98: I = { `SETX ,`RAY_INSIDE_BOX ,32'd0 };
99: I = { `DIV ,`R6 ,`R1 ,`CREG_UNORMALIZED_DIRECTION };
100: I = { `JMP ,`LABEL_TEST_RAY_Z_ORIGEN ,`VOID ,`VOID };
 
//LABEL_ELSEY:
101: I = { `SETY ,`R5 ,32'b1 };
 
//LABEL_TEST_RAY_Z_ORIGEN:
102: I = { `JGEZ ,`LABEL_ELESE_IFZ ,`CREG_CAMERA_POSITION ,`CREG_AABBMIN };
103: I = { `SUB ,`R1 ,`CREG_AABBMIN ,`CREG_CAMERA_POSITION };
104: I = { `JLEZ ,`LABEL5 ,`R1 ,`CREG_UNORMALIZED_DIRECTION };
105: I = { `RETURN ,`RT_FALSE };
 
//LABEL5:
106: I = { `SETX ,`RAY_INSIDE_BOX ,32'd0 };
107: I = { `DIV ,`R6 ,`R1 ,`CREG_UNORMALIZED_DIRECTION };
108: I = { `JMP ,`LABEL_RAY_INSIDE_BOX ,`VOID ,`VOID };
 
//LABEL_ELESE_IFZ:
109: I = { `JLEZ ,`LABEL_ELSEZ ,`CREG_CAMERA_POSITION ,`CREG_AABBMAX };
110: I = { `SUB ,`R1 ,`CREG_AABBMAX ,`CREG_CAMERA_POSITION };
111: I = { `JGEZ ,`LABEL6 ,`R1 ,`CREG_UNORMALIZED_DIRECTION };
112: I = { `RETURN ,`RT_FALSE };
 
//LABEL6:
113: I = { `SETX ,`RAY_INSIDE_BOX ,32'd0 };
114: I = { `DIV ,`R6 ,`R1 ,`CREG_UNORMALIZED_DIRECTION };
115: I = { `JMP ,`LABEL_RAY_INSIDE_BOX ,`VOID ,`VOID };
 
//LABEL_ELSEZ:
116: I = { `SETZ ,`R5 ,32'b1 };
 
//LABEL_RAY_INSIDE_BOX:
117: I = { `ZERO ,`R1 ,`VOID ,`VOID };
118: I = { `JEQX ,`LABEL_TEST_YZ_PLANE ,`R1 ,`RAY_INSIDE_BOX };
//BUG need a NOP here else pipeline gets confused
119: I = { `RETURN ,`RT_TRUE };
 
//LABEL_TEST_YZ_PLANE:
120: I = { `JNEX ,`LABEL_TEST_XZ_PLANE ,`R5 ,`R1 };
121: I = { `SWIZZLE3D ,`R6 ,`SWIZZLE_XXX };
122: I = { `MUL ,`R2 ,`CREG_UNORMALIZED_DIRECTION ,`R6 };
123: I = { `ADD ,`R2 ,`R2 ,`CREG_CAMERA_POSITION };
124: I = { `JGEY ,`LABEL7 ,`R2 ,`CREG_AABBMIN };
125: I = { `RETURN ,`RT_FALSE };
 
//LABEL7:
126: I = { `JLEY ,`LABEL8 ,`R2 ,`CREG_AABBMAX };
127: I = { `RETURN ,`RT_FALSE };
 
//LABEL8:
128: I = { `JGEZ ,`LABEL9 ,`R2 ,`CREG_AABBMIN };
129: I = { `RETURN ,`RT_FALSE };
 
//LABEL9:
130: I = { `JLEZ ,`LABEL_TEST_XZ_PLANE ,`R2 ,`CREG_AABBMAX };
131: I = { `RETURN ,`RT_FALSE };
 
//LABEL_TEST_XZ_PLANE:
132: I = { `JNEY ,`LABEL_TEST_XY_PLANE ,`R5 ,`R1 };
133: I = { `SWIZZLE3D ,`R6 ,`SWIZZLE_YYY };
134: I = { `MUL ,`R2 ,`CREG_UNORMALIZED_DIRECTION ,`R6 };
135: I = { `ADD ,`R2 ,`R2 ,`CREG_CAMERA_POSITION };
136: I = { `JGEX ,`LABEL10 ,`R2 ,`CREG_AABBMIN };
137: I = { `RETURN ,`RT_FALSE };
 
//LABEL10:
138: I = { `JLEX ,`LABEL11 ,`R2 ,`CREG_AABBMAX };
139: I = { `RETURN ,`RT_FALSE };
 
//LABEL11:
140: I = { `JGEZ ,`LABEL12 ,`R2 ,`CREG_AABBMIN };
141: I = { `RETURN ,`RT_FALSE };
 
//LABEL12:
142: I = { `JLEZ ,`LABEL_TEST_XY_PLANE ,`R2 ,`CREG_AABBMAX };
143: I = { `RETURN ,`RT_FALSE };
 
//LABEL_TEST_XY_PLANE:
144: I = { `SWIZZLE3D ,`R6 ,`SWIZZLE_ZZZ };
145: I = { `MUL ,`R2 ,`CREG_UNORMALIZED_DIRECTION ,`R6 };
146: I = { `ADD ,`R2 ,`R2 ,`CREG_CAMERA_POSITION };
147: I = { `JGEX ,`LABEL13 ,`R2 ,`CREG_AABBMIN };
148: I = { `RETURN ,`RT_FALSE };
 
//LABEL13:
149: I = { `JLEX ,`LABEL14 ,`R2 ,`CREG_AABBMAX };
150: I = { `RETURN ,`RT_FALSE };
 
//LABEL14:
151: I = { `JGEY ,`LABEL15 ,`R2 ,`CREG_AABBMIN };
152: I = { `RETURN ,`RT_FALSE };
 
//LABEL15:
153: I = { `JLEY ,`LABEL_HIT ,`R2 ,`CREG_AABBMAX };
154: I = { `RETURN ,`RT_FALSE };
 
//LABEL_HIT:
155: I = { `SETX ,`CREG_LAST_t ,32'h1F40000 };
156: I = { `RETURN ,`RT_TRUE };
 
//------------------------------------------------------------------------
//BIU Micro code
//TAG_BIU_UCODE_ADDRESS:
157: I = { `ZERO ,`OREG_PIXEL_COLOR ,`VOID ,`VOID };
158: I = { `SETX ,`R3 ,`ONE };
159: I = { `SETX ,`R1 ,32'h00000 };
160: I = { `SUB ,`CREG_E1 ,`CREG_V1 ,`CREG_V0 };
161: I = { `SUB ,`CREG_E2 ,`CREG_V2 ,`CREG_V0 };
162: I = { `SUB ,`CREG_T ,`CREG_CAMERA_POSITION ,`CREG_V0 };
163: I = { `CROSS ,`CREG_P ,`CREG_RAY_DIRECTION ,`CREG_E2 };
164: I = { `CROSS ,`CREG_Q ,`CREG_T ,`CREG_E1 };
165: I = { `DOT ,`CREG_H1 ,`CREG_Q ,`CREG_E2 };
166: I = { `DOT ,`CREG_H2 ,`CREG_P ,`CREG_T };
167: I = { `DOT ,`CREG_H3 ,`CREG_Q ,`CREG_RAY_DIRECTION };
168: I = { `DOT ,`CREG_DELTA ,`CREG_P ,`CREG_E1 };
169: I = { `DIV ,`CREG_t ,`CREG_H1 ,`CREG_DELTA };
170: I = { `DIV ,`CREG_u ,`CREG_H2 ,`CREG_DELTA };
171: I = { `DIV ,`CREG_v ,`CREG_H3 ,`CREG_DELTA };
172: I = { `JGEX ,`LABEL_BIU1 ,`CREG_u ,`R1 };
173: I = { `RET ,`R99 ,`FALSE };
 
//LABEL_BIU1:
174: I = { `JGEX ,`LABEL_BIU2 ,`CREG_v ,`R1 };
175: I = { `RET ,`R99 ,`FALSE };
 
//LABEL_BIU2:
176: I = { `ADD ,`R2 ,`CREG_u ,`CREG_v };
177: I = { `JLEX ,`LABEL_BIU3 ,`R2 ,`R3 };
178: I = { `RET ,`R99 ,`FALSE };
 
//LABEL_BIU3:
179: I = { `JGEX ,`LABEL_BIU4 ,`CREG_t ,`CREG_LAST_t };
180: I = { `COPY ,`CREG_LAST_t ,`CREG_t ,`VOID };
181: I = { `COPY ,`CREG_LAST_u ,`CREG_u ,`VOID };
182: I = { `COPY ,`CREG_LAST_v ,`CREG_v ,`VOID };
183: I = { `COPY ,`CREG_E1_LAST ,`CREG_E1 ,`VOID };
184: I = { `COPY ,`CREG_E2_LAST ,`CREG_E2 ,`VOID };
185: I = { `COPY ,`CREG_UV0_LAST ,`CREG_UV0 ,`VOID };
186: I = { `COPY ,`CREG_UV1_LAST ,`CREG_UV1 ,`VOID };
187: I = { `COPY ,`CREG_UV2_LAST ,`CREG_UV2 ,`VOID };
188: I = { `COPY ,`CREG_TRI_DIFFUSE_LAST ,`CREG_TRI_DIFFUSE ,`VOID };
//LABEL_BIU4:
189: I = { `RET ,`R99 ,`TRUE };
 
 
//-------------------------------------------------------------------------
//Calculate the adress of the texure coordiantes.
 
//TAG_TCC_UCODE_ADDRESS:
//Do this calculation only if this triangle is the one closest to the camera
190: I = { `JGX ,`LABEL_TCC_EXIT ,`CREG_t ,`CREG_LAST_t };
 
//First get the UV coodrinates and store in R1
//R1x: u_coordinate = U0 + last_u * (U1 - U0) + last_v * (U2 - U0)
//R1y: v_coordinate = V0 + last_u * (V1 - V0) + last_v * (V2 - V0)
//R1z: 0
 
191: I = { `SUB ,`R1 ,`CREG_UV1_LAST ,`CREG_UV0_LAST };
192: I = { `SUB ,`R2 ,`CREG_UV2_LAST ,`CREG_UV0_LAST };
193: I = { `MUL ,`R1 ,`CREG_LAST_u ,`R1 };
194: I = { `MUL ,`R2 ,`CREG_LAST_v ,`R2 };
195: I = { `ADD ,`R1 ,`R1 ,`R2 };
196: I = { `ADD ,`R1 ,`R1 ,`CREG_UV0_LAST };
 
//R7x : fu = (u_coordinate) * gTexture.mWidth
//R7y : fv = (v_coordinate) * gTexture.mWidth
//R7z : 0
197: I = { `MUL ,`R7 ,`R1 ,`CREG_TEXTURE_SIZE };
 
//R1x: u1 = ((int)fu) % gTexture.mWidth
//R1y: v1 = ((int)fv) % gTexture.mHeight
//R1z: 0
//R2x: u2 = (u1 + 1 ) % gTexture.mWidth
//R2y: v2 = (v2 + 1 ) % gTexture.mHeight
//R2z: 0
// Notice MOD2 only operates over
// numbers that are power of 2 also notice that the
// textures are assumed to be squares!
//x % 2^n == x & (2^n - 1).
 
198: I = { `MOD ,`R1 ,`R7 ,`CREG_TEXTURE_SIZE };
199: I = { `INC ,`R2 ,`R1 ,`VOID };
200: I = { `MOD ,`R2 ,`R2 ,`CREG_TEXTURE_SIZE };
 
//Cool now we should store the values in the appropiate registers
//OREG_TEX_COORD1.x = u1 + v1 * gTexture.mWidth
//OREG_TEX_COORD1.y = u2 + v1 * gTexture.mWidth
//OREG_TEX_COORD1.z = 0
//OREG_TEX_COORD2.x = u1 + v2 * gTexture.mWidth
//OREG_TEX_COORD2.y = u2 + v2 * gTexture.mWidth
//OREG_TEX_COORD1.z = 0
 
//R1= [u1 v1 0]
//R2= [u2 v2 0]
 
//R2 = [v2 u2 0]
201: I = { `SWIZZLE3D ,`R2 ,`SWIZZLE_YXZ };
 
//R3 = [v2 v1 0]
202: I = { `XCHANGEX ,`R3 ,`R1 ,`R2 };
 
 
//R4 = [u1 u2 0]
203: I = { `XCHANGEX ,`R4 ,`R2 ,`R1 };
 
//R2 = [v2*H v1*H 0]
204: I = { `UNSCALE ,`R9 ,`R3 ,`VOID };
205: I = { `UNSCALE ,`R8 ,`CREG_TEXTURE_SIZE ,`VOID };
206: I = { `IMUL ,`R2 ,`R9 ,`R8 };
 
//OREG_TEX_COORD1 = [u1 + v2*H u2 + v1*H 0]
//R4 = FixedToIinteger(R4)
207: I = { `UNSCALE ,`R4 ,`R4 ,`VOID };
208: I = { `ADD ,`R12 ,`R2 ,`R4 };
209: I = { `SETX ,`R5 ,32'h3 };
210: I = { `SETY ,`R5 ,32'h3 };
211: I = { `SETZ ,`R5 ,32'h3 };
//Multiply by 3 (the pitch)
//IMUL OREG_TEX_COORD1 R12 R5
212: I = { `IMUL ,`CREG_TEX_COORD1 ,`R12 ,`R5 };
 
//R4 = [u2 u1 0]
213: I = { `SWIZZLE3D ,`R4 ,`SWIZZLE_YXZ };
 
 
//OREG_TEX_COORD2 [u2 + v2*H u1 + v1*H 0]
214: I = { `ADD ,`R12 ,`R2 ,`R4 };
//Multiply by 3 (the pitch)
//IMUL OREG_TEX_COORD2 R12 R5
215: I = { `IMUL ,`CREG_TEX_COORD2 ,`R12 ,`R5 };
 
 
//Cool now get the weights
 
//w1 = (1 - fracu) * (1 - fracv)
//w2 = fracu * (1 - fracv)
//w3 = (1 - fracu) * fracv
//w4 = fracu * fracv
 
//R4x: fracu
//R4y: fracv
//R4z: 0
216: I = { `FRAC ,`R4 ,`R7 ,`VOID };
 
//R5x: fracv
//R5y: fracu
//R5z: 0
217: I = { `COPY ,`R5 ,`R4 ,`VOID };
218: I = { `SWIZZLE3D ,`R5 ,`SWIZZLE_YXZ };
 
 
//R5x: 1 - fracv
//R5y: 1 - fracu
//R5y: 1
219: I = { `NEG ,`R5 ,`R5 ,`VOID };
220: I = { `INC ,`R5 ,`R5 ,`VOID };
 
//R5x: 1 - fracv
//R5y: 1 - fracu
//R5y: (1 - fracv)(1 - fracu)
221: I = { `MULP ,`CREG_TEXWEIGHT1 ,`R5 ,`VOID };
 
//CREG_TEXWEIGHT1.x = (1 - fracv)(1 - fracu)
//CREG_TEXWEIGHT1.y = (1 - fracv)(1 - fracu)
//CREG_TEXWEIGHT1.z = (1 - fracv)(1 - fracu)
222: I = { `SWIZZLE3D ,`CREG_TEXWEIGHT1 ,`SWIZZLE_ZZZ };
 
 
//R6x: w2: fracu * (1 - fracv )
//R6y: w3: fracv * (1 - fracu )
//R6z: 0
223: I = { `MUL ,`R6 ,`R4 ,`R5 };
 
//CREG_TEXWEIGHT2.x = fracu * (1 - fracv )
//CREG_TEXWEIGHT2.y = fracu * (1 - fracv )
//CREG_TEXWEIGHT2.z = fracu * (1 - fracv )
224: I = { `COPY ,`CREG_TEXWEIGHT2 ,`R6 ,`VOID };
225: I = { `SWIZZLE3D ,`CREG_TEXWEIGHT2 ,`SWIZZLE_XXX };
 
//CREG_TEXWEIGHT3.x = fracv * (1 - fracu )
//CREG_TEXWEIGHT3.y = fracv * (1 - fracu )
//CREG_TEXWEIGHT3.z = fracv * (1 - fracu )
226: I = { `COPY ,`CREG_TEXWEIGHT3 ,`R6 ,`VOID };
227: I = { `SWIZZLE3D ,`CREG_TEXWEIGHT3 ,`SWIZZLE_YYY };
 
 
//R4x: fracu
//R4y: fracv
//R4z: fracu * fracv
228: I = { `MULP ,`R4 ,`R4 ,`VOID };
 
//CREG_TEXWEIGHT4.x = fracv * fracu
//CREG_TEXWEIGHT4.y = fracv * fracu
//CREG_TEXWEIGHT4.z = fracv * fracu
229: I = { `COPY ,`CREG_TEXWEIGHT4 ,`R4 ,`VOID };
230: I = { `SWIZZLE3D ,`CREG_TEXWEIGHT4 ,`SWIZZLE_ZZZ };
 
 
//LABEL_TCC_EXIT:
231: I = { `RET ,`R99 ,32'h0 };
 
 
//-------------------------------------------------------------------------
//TAG_PSU_UCODE_ADRESS:
//Pixel Shader #1
//This pixel shader has diffuse light but no textures
 
232: I = { `CROSS ,`R1 ,`CREG_E1_LAST ,`CREG_E2_LAST };
233: I = { `MAG ,`R2 ,`R1 ,`VOID };
234: I = { `DIV ,`R1 ,`R1 ,`R2 };
235: I = { `MUL ,`R2 ,`CREG_RAY_DIRECTION ,`CREG_LAST_t };
236: I = { `ADD ,`R2 ,`R2 ,`CREG_CAMERA_POSITION };
237: I = { `SUB ,`R2 ,`CURRENT_LIGHT_POS ,`R2 };
238: I = { `MAG ,`R3 ,`R2 ,`VOID };
239: I = { `DIV ,`R2 ,`R2 ,`R3 };
240: I = { `DOT ,`R3 ,`R2 ,`R1 };
241: I = { `MUL ,`CREG_COLOR_ACC ,`CREG_TRI_DIFFUSE_LAST ,`CURRENT_LIGHT_DIFFUSE };
242: I = { `MUL ,`CREG_COLOR_ACC ,`CREG_COLOR_ACC ,`R3 };
243: I = { `COPY ,`CREG_TEXTURE_COLOR ,`CREG_COLOR_ACC ,`VOID };
244: I = { `NOP ,`RT_FALSE };
245: I = { `NOP ,`RT_FALSE };
246: I = { `NOP ,`RT_FALSE };
247: I = { `RETURN ,`RT_TRUE };
 
//-------------------------------------------------------------------------
//Pixel Shader #2
//TAG_PSU_UCODE_ADRESS2:
//This Pixel Shader has no light but it does texturinng
//with bi-linear interpolation
 
 
 
248: I = { `COPY ,`R1 ,`CREG_TEX_COORD1 ,`VOID };
249: I = { `COPY ,`R2 ,`CREG_TEX_COORD1 ,`VOID };
250: I = { `COPY ,`R3 ,`CREG_TEX_COORD2 ,`VOID };
251: I = { `COPY ,`R4 ,`CREG_TEX_COORD2 ,`VOID };
 
 
252: I = { `SWIZZLE3D ,`R1 ,`SWIZZLE_XXX };
253: I = { `SWIZZLE3D ,`R2 ,`SWIZZLE_YYY };
254: I = { `SWIZZLE3D ,`R3 ,`SWIZZLE_XXX };
255: I = { `SWIZZLE3D ,`R4 ,`SWIZZLE_YYY };
256: I = { `ADD ,`R1 ,`R1 ,`CREG_012 };
257: I = { `ADD ,`R2 ,`R2 ,`CREG_012 };
258: I = { `ADD ,`R3 ,`R3 ,`CREG_012 };
259: I = { `ADD ,`R4 ,`R4 ,`CREG_012 };
 
 
260: I = { `TMREAD ,`CREG_TEX_COLOR1 ,`R1 ,`VOID };
261: I = { `NOP ,`RT_FALSE };
262: I = { `TMREAD ,`CREG_TEX_COLOR2 ,`R2 ,`VOID };
263: I = { `NOP ,`RT_FALSE };
264: I = { `TMREAD ,`CREG_TEX_COLOR3 ,`R3 ,`VOID };
265: I = { `NOP ,`RT_FALSE };
266: I = { `TMREAD ,`CREG_TEX_COLOR4 ,`R4 ,`VOID };
267: I = { `NOP ,`RT_FALSE };
 
 
 
 
//TextureColor.R = c1.R * w1 + c2.R * w2 + c3.R * w3 + c4.R * w4
//TextureColor.G = c1.G * w1 + c2.G * w2 + c3.G * w3 + c4.G * w4
//TextureColor.B = c1.B * w1 + c2.B * w2 + c3.B * w3 + c4.B * w4
 
 
//MUL R1 CREG_TEX_COLOR4 CREG_TEXWEIGHT1
//MUL R2 CREG_TEX_COLOR2 CREG_TEXWEIGHT2
//MUL R3 CREG_TEX_COLOR1 CREG_TEXWEIGHT3
//MUL R4 CREG_TEX_COLOR3 CREG_TEXWEIGHT4
 
268: I = { `MUL ,`R1 ,`CREG_TEX_COLOR3 ,`CREG_TEXWEIGHT1 };
269: I = { `MUL ,`R2 ,`CREG_TEX_COLOR2 ,`CREG_TEXWEIGHT2 };
270: I = { `MUL ,`R3 ,`CREG_TEX_COLOR1 ,`CREG_TEXWEIGHT3 };
271: I = { `MUL ,`R4 ,`CREG_TEX_COLOR4 ,`CREG_TEXWEIGHT4 };
 
272: I = { `ADD ,`CREG_TEXTURE_COLOR ,`R1 ,`R2 };
273: I = { `ADD ,`CREG_TEXTURE_COLOR ,`CREG_TEXTURE_COLOR ,`R3 };
274: I = { `ADD ,`CREG_TEXTURE_COLOR ,`CREG_TEXTURE_COLOR ,`R4 };
275: I = { `RETURN ,`RT_TRUE };
 
 
//-------------------------------------------------------------------------
//Default User constants
//TAG_USERCONSTANTS:
 
276: I = { `NOP ,`RT_FALSE };
277: I = { `RETURN ,`RT_TRUE };
 
//TAG_PIXELSHADER:
//Default Pixel Shader (just outputs texture)
278: I = { `OMWRITE ,`OREG_PIXEL_COLOR ,`CREG_CURRENT_OUTPUT_PIXEL ,`CREG_TEXTURE_COLOR };
279: I = { `RETURN ,`RT_TRUE };
 
 
//-------------------------------------------------------------------------
 
default:
begin
`ifdef DEBUG
$display("%dns CORE %d Error: Reached undefined address in instruction Memory: %d!!!!",$time,iDebug_CoreID,Address);
// $stop();
`endif
I = {`INSTRUCTION_OP_LENGTH'hFF,16'hFFFF,32'hFFFFFFFF};
end
endcase
end
endmodule
//--------------------------------------------------------
/rtl/Collaterals.v
0,0 → 1,488
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
//------------------------------------------------
module FFD_POSEDGE_ASYNC_RESET # ( parameter SIZE=`WIDTH )
(
input wire Clock,
input wire Clear,
input wire [SIZE-1:0] D,
output reg [SIZE-1:0] Q
);
always @(posedge Clock or posedge Clear)
begin
if (Clear)
Q = 0;
else
Q = D;
end
endmodule
//----------------------------------------------------
module FFD_POSEDGE_SYNCRONOUS_RESET # ( parameter SIZE=`WIDTH )
(
input wire Clock,
input wire Reset,
input wire Enable,
input wire [SIZE-1:0] D,
output reg [SIZE-1:0] Q
);
 
always @ (posedge Clock)
begin
if ( Reset )
Q <= `WIDTH'b0;
else
begin
if (Enable)
Q <= D;
end
end//always
 
endmodule
//------------------------------------------------
module UPCOUNTER_POSEDGE # (parameter SIZE=`WIDTH)
(
input wire Clock, Reset,
input wire [SIZE-1:0] Initial,
input wire Enable,
output reg [SIZE-1:0] Q
);
 
 
always @(posedge Clock )
begin
if (Reset)
Q <= Initial;
else
begin
if (Enable)
Q <= Q + 1;
end
end
 
endmodule
 
//----------------------------------------------------------------------
 
module SELECT_1_TO_N # ( parameter SEL_WIDTH=4, parameter OUTPUT_WIDTH=16 )
(
input wire [SEL_WIDTH-1:0] Sel,
input wire En,
output wire [OUTPUT_WIDTH-1:0] O
);
 
reg[OUTPUT_WIDTH-1:0] shift;
 
always @ ( * )
begin
if (~En)
shift = 1;
else
shift = (1 << Sel);
 
 
end
 
assign O = ( ~En ) ? 0 : shift ;
 
//assign O = En & (1 << Sel);
 
endmodule
 
//----------------------------------------------------------------------
 
module MUXFULLPARALELL_2SEL_GENERIC # ( parameter SIZE=`WIDTH )
(
input wire [1:0] Sel,
input wire [SIZE-1:0]I1, I2, I3,I4,
output reg [SIZE-1:0] O1
);
 
always @( * )
 
begin
 
case (Sel)
 
2'b00: O1 = I1;
2'b01: O1 = I2;
2'b10: O1 = I3;
2'b11: O1 = I4;
default: O1 = SIZE-1'b0;
 
endcase
 
end
 
endmodule
 
//--------
module CIRCULAR_SHIFTLEFT_POSEDGE_EX # ( parameter SIZE=`WIDTH )
( input wire Clock,
input wire Reset,
input wire[SIZE-1:0] Initial,
input wire Enable,
output wire[SIZE-1:0] O
);
 
reg [SIZE-1:0] tmp;
 
 
always @(posedge Clock)
begin
if (Reset)
tmp <= Initial;
else
begin
if (Enable)
begin
if (tmp[SIZE-1])
begin
tmp <= Initial;
end
else
begin
tmp <= tmp << 1;
end
end
end
end
assign O = tmp;
endmodule
//------------------------------------------------
module MUXFULLPARALELL_3SEL_WALKINGONE # ( parameter SIZE=`WIDTH )
(
input wire [2:0] Sel,
input wire [SIZE-1:0]I1, I2, I3,
output reg [SIZE-1:0] O1
);
 
always @( * )
 
begin
 
case (Sel)
 
3'b001: O1 = I1;
3'b010: O1 = I2;
3'b100: O1 = I3;
default: O1 = SIZE-1'b0;
 
endcase
 
end
 
endmodule
//------------------------------------------------
module SHIFTLEFT_POSEDGE # ( parameter SIZE=`WIDTH )
( input wire Clock,
input wire Reset,
input wire[SIZE-1:0] Initial,
input wire Enable,
output wire[SIZE-1:0] O
);
 
reg [SIZE-1:0] tmp;
 
 
always @(posedge Clock)
begin
if (Reset)
tmp <= Initial;
else
begin
if (Enable)
tmp <= tmp << 1;
end
end
assign O = tmp;
endmodule
//------------------------------------------------
//------------------------------------------------
module CIRCULAR_SHIFTLEFT_POSEDGE # ( parameter SIZE=`WIDTH )
( input wire Clock,
input wire Reset,
input wire[SIZE-1:0] Initial,
input wire Enable,
output wire[SIZE-1:0] O
);
 
reg [SIZE-1:0] tmp;
 
 
always @(posedge Clock)
begin
if (Reset || tmp[SIZE-1])
tmp <= Initial;
else
begin
if (Enable)
tmp <= tmp << 1;
end
end
assign O = tmp;
endmodule
//-----------------------------------------------------------
/*
Sorry forgot how this flop is called.
Any way Truth table is this
Q S Q_next R
0 0 0 0
0 1 1 0
1 0 1 0
1 1 1 0
X X 0 1
The idea is that it toggles from 0 to 1 when S = 1, but if it
gets another S = 1, it keeps the output to 1.
*/
module FFToggleOnce_1Bit
(
input wire Clock,
input wire Reset,
input wire Enable,
input wire S,
output reg Q
);
 
 
reg Q_next;
 
always @ (negedge Clock)
begin
Q <= Q_next;
end
 
always @ ( posedge Clock )
begin
if (Reset)
Q_next <= 0;
else if (Enable)
Q_next <= (S && !Q) || Q;
else
Q_next <= Q;
end
endmodule
 
//-----------------------------------------------------------
module UpCounter_16E
(
input wire Clock,
input wire Reset,
input wire [15:0] Initial,
input wire Enable,
output wire [15:0] Q
);
reg [15:0] Temp;
 
 
always @(posedge Clock or posedge Reset)
begin
if (Reset)
Temp = Initial;
else
if (Enable)
Temp = Temp + 1'b1;
end
assign Q = Temp;
 
endmodule
//-----------------------------------------------------------
module UpCounter_32
(
input wire Clock,
input wire Reset,
input wire [31:0] Initial,
input wire Enable,
output wire [31:0] Q
);
reg [31:0] Temp;
 
 
always @(posedge Clock or posedge Reset)
begin
if (Reset)
begin
Temp = Initial;
end
else
begin
if (Enable)
begin
Temp = Temp + 1'b1;
end
end
end
assign Q = Temp;
 
endmodule
//-----------------------------------------------------------
module UpCounter_3
(
input wire Clock,
input wire Reset,
input wire [2:0] Initial,
input wire Enable,
output wire [2:0] Q
);
reg [2:0] Temp;
 
 
always @(posedge Clock or posedge Reset)
begin
if (Reset)
Temp = Initial;
else
if (Enable)
Temp = Temp + 3'b1;
end
assign Q = Temp;
 
endmodule
 
 
module FFD32_POSEDGE
(
input wire Clock,
input wire[31:0] D,
output reg[31:0] Q
);
always @ (posedge Clock)
Q <= D;
endmodule
 
//------------------------------------------------
module MUXFULLPARALELL_96bits_2SEL
(
input wire Sel,
input wire [95:0]I1, I2,
output reg [95:0] O1
);
 
 
 
always @( * )
 
begin
 
case (Sel)
 
1'b0: O1 = I1;
1'b1: O1 = I2;
 
endcase
 
end
 
endmodule
//------------------------------------------------
 
module MUXFULLPARALELL_16bits_2SEL_X
(
input wire [1:0] Sel,
input wire [15:0]I1, I2, I3,
output reg [15:0] O1
);
 
 
 
always @( * )
 
begin
 
case (Sel)
 
2'b00: O1 = I1;
2'b01: O1 = I2;
2'b10: O1 = I3;
default: O1 = 16'b0;
 
endcase
 
end
 
endmodule
//------------------------------------------------
module MUXFULLPARALELL_16bits_2SEL
(
input wire Sel,
input wire [15:0]I1, I2,
output reg [15:0] O1
);
 
 
 
always @( * )
 
begin
 
case (Sel)
 
1'b0: O1 = I1;
1'b1: O1 = I2;
 
endcase
 
end
 
endmodule
 
//--------------------------------------------------------------
 
module FFT1
(
input wire D,
input wire Clock,
input wire Reset ,
output reg Q
);
always @ ( posedge Clock or posedge Reset )
begin
if (Reset)
begin
Q <= 1'b0;
end
else
begin
if (D)
Q <= ! Q;
end
end//always
endmodule
//--------------------------------------------------------------

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.