URL
https://opencores.org/ocsvn/theia_gpu/theia_gpu/trunk
Subversion Repositories theia_gpu
Compare Revisions
- This comparison shows the changes necessary to convert path
/theia_gpu/tags/Beta_0.2/rtl/EXE
- from Rev 82 to Rev 86
- ↔ Reverse comparison
Rev 82 → Rev 86
/Module_ExecutionFSM.v
0,0 → 1,533
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
`define EXEU_AFTER_RESET 0 |
`define EXEU_INITIAL_STATE 1 |
`define EXEU_WAIT_FOR_DECODE 2 |
`define EXEU_FETCH_DECODED_INST 3 |
`define EXEU_WAIT_FOR_ALU_EXECUTION 4 |
`define EXEU_WRITE_BACK_TO_RAM 5 |
`define EXEU_HANDLE_JUMP 7 |
|
|
|
module ExecutionFSM |
( |
input wire Clock, |
input wire Reset, |
|
input wire iDecodeDone, |
input wire[`INSTRUCTION_OP_LENGTH-1:0] iOperation, |
input wire[`DATA_ROW_WIDTH-1:0] iSource0,iSource1, |
input wire[`DATA_ADDRESS_WIDTH-1:0] iDestination, |
inout wire[`DATA_ROW_WIDTH-1:0] RAMBus, |
//output reg ReadyForNextInstruction, |
output wire oJumpFlag , |
output wire [`ROM_ADDRESS_WIDTH-1:0] oJumpIp , |
output wire oRAMWriteEnable , |
output wire [`DATA_ADDRESS_WIDTH-1:0] oRAMWriteAddress , |
output wire oExeLatchedValues, |
output reg oBusy , |
|
//ALU ports and control signals |
output wire [`INSTRUCTION_OP_LENGTH-1:0] oALUOperation, |
output wire [`WIDTH-1:0] oALUChannelX1, |
output wire [`WIDTH-1:0] oALUChannelY1, |
output wire [`WIDTH-1:0] oALUChannelZ1, |
output wire [`WIDTH-1:0] oALUChannelX2, |
output wire [`WIDTH-1:0] oALUChannelY2, |
output wire [`WIDTH-1:0] oALUChannelZ2, |
output wire oTriggerALU, |
|
input wire [`WIDTH-1:0] iALUResultX, |
input wire [`WIDTH-1:0] iALUResultY, |
input wire [`WIDTH-1:0] iALUResultZ, |
input wire iALUOutputReady, |
input wire iBranchTaken, |
input wire iBranchNotTaken, |
|
`ifdef DEBUG |
input wire[`ROM_ADDRESS_WIDTH-1:0] iDebug_CurrentIP, |
input wire [`MAX_CORES-1:0] iDebug_CoreID, |
`endif |
//Data forward Signals |
output wire [`DATA_ADDRESS_WIDTH-1:0] oLastDestination |
|
|
); |
|
wire wLatchNow; |
reg rInputLatchesEnabled; |
|
//If ALU says jump, just pass along |
assign oJumpFlag = iBranchTaken; |
//JumpIP is the instruction destination (= oRAMWriteAddress) |
assign oJumpIp = oRAMWriteAddress; |
|
assign wLatchNow = iDecodeDone & rInputLatchesEnabled; |
assign oExeLatchedValues = wLatchNow; |
assign oTriggerALU = wLatchNow; |
|
wire wOperationIsJump; |
assign wOperationIsJump = iBranchTaken || iBranchNotTaken; |
|
//Don't allow me to write back back if the operation is a NOP |
`ifdef DEBUG |
assign oRAMWriteEnable = iALUOutputReady && !wOperationIsJump && |
(oALUOperation != `NOP) && oALUOperation != `DEBUG_PRINT; |
`else |
assign oRAMWriteEnable = iALUOutputReady && !wOperationIsJump && oALUOperation != `NOP; |
`endif |
|
|
assign RAMBus = ( oRAMWriteEnable ) ? {iALUResultX,iALUResultY,iALUResultZ} : `DATA_ROW_WIDTH'bz; |
|
assign oALUChannelX1 = iSource1[95:64]; |
assign oALUChannelY1 = iSource1[63:32]; |
assign oALUChannelZ1 = iSource1[31:0]; |
|
assign oALUChannelX2 = iSource0[95:64]; |
assign oALUChannelY2 = iSource0[63:32]; |
assign oALUChannelZ2 = iSource0[31:0]; |
|
/* |
FF32_POSEDGE_SYNCRONOUS_RESET SourceX1 |
( |
.Clock( wLatchNow ), |
.Clear( Reset ), |
.D( iSource1[95:64] ), |
.Q( oALUChannelX1 ) |
); |
|
FF32_POSEDGE_SYNCRONOUS_RESET SourceY1 |
( |
.Clock( wLatchNow ), |
.Clear( Reset ), |
.D( iSource1[63:32] ), |
.Q( oALUChannelY1 ) |
); |
|
FF32_POSEDGE_SYNCRONOUS_RESET SourceZ1 |
( |
.Clock( wLatchNow ), |
.Clear( Reset ), |
.D( iSource1[31:0] ), |
.Q( oALUChannelZ1 ) |
); |
*/ |
/* |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceX1 |
( |
.Clock( Clock ),//wLatchNow ), |
.Reset( Reset), |
.Enable( wLatchNow ),//1'b1 ), |
.D( iSource1[95:64] ), |
.Q(oALUChannelX1) |
); |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceY1 |
( |
.Clock( Clock ),//wLatchNow ), |
.Reset( Reset), |
.Enable( wLatchNow ),//1'b1 ), |
.D( iSource1[63:32] ), |
.Q(oALUChannelY1) |
); |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceZ1 |
( |
.Clock( Clock ),//wLatchNow ), |
.Reset( Reset), |
.Enable( wLatchNow ),//1'b1 ), |
.D( iSource1[31:0] ), |
.Q(oALUChannelZ1) |
); |
*/ |
/* |
FF32_POSEDGE_SYNCRONOUS_RESET SourceX2 |
( |
.Clock( wLatchNow ), |
.Clear( Reset ), |
.D( iSource0[95:64] ), |
.Q( oALUChannelX2 ) |
); |
|
FF32_POSEDGE_SYNCRONOUS_RESET SourceY2 |
( |
.Clock( wLatchNow ), |
.Clear( Reset ), |
.D( iSource0[63:32] ), |
.Q( oALUChannelY2 ) |
); |
|
FF32_POSEDGE_SYNCRONOUS_RESET SourceZ2 |
( |
.Clock( wLatchNow ), |
.Clear( Reset ), |
.D( iSource0[31:0] ), |
.Q( oALUChannelZ2 ) |
); |
*/ |
/* |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceX2 |
( |
.Clock( Clock ),//wLatchNow ), |
.Reset( Reset), |
.Enable( wLatchNow ),//1'b1 ), |
.D( iSource0[95:64] ), |
.Q(oALUChannelX2) |
); |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceY2 |
( |
.Clock( Clock ),//wLatchNow ), |
.Reset( Reset), |
.Enable( wLatchNow ),//1'b1 ), |
.D( iSource0[63:32] ), |
.Q(oALUChannelY2) |
); |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceZ2 |
( |
.Clock( Clock ),//wLatchNow ), |
.Reset( Reset), |
.Enable( wLatchNow ),//1'b1 ), |
.D( iSource0[31:0] ), |
.Q(oALUChannelZ2) |
); |
*/ |
//Finally one more latch to store |
//the iOperation and the destination |
|
|
assign oALUOperation = iOperation; |
//assign oRAMWriteAddress = iDestination; |
/* |
FF_OPCODE_POSEDGE_SYNCRONOUS_RESET FFOperation |
( |
.Clock( wLatchNow ), |
.Clear( Reset ), |
.D( iOperation ), |
.Q( oALUOperation ) |
|
); |
|
|
FF16_POSEDGE_SYNCRONOUS_RESET PSRegDestination |
( |
.Clock( wLatchNow ), |
.Clear( Reset ), |
.D( iDestination ), |
.Q( oRAMWriteAddress ) |
|
); |
*/ |
/* |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `INSTRUCTION_OP_LENGTH ) FFOperation |
( |
.Clock( Clock ),//wLatchNow ), |
.Reset( Reset), |
.Enable( wLatchNow ),//1'b1 ), |
.D( iOperation ), |
.Q(oALUOperation) |
); |
*/ |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `DATA_ADDRESS_WIDTH ) PSRegDestination |
( |
.Clock( Clock ),//wLatchNow ), |
.Reset( Reset), |
.Enable( wLatchNow ),//1'b1 ), |
.D( iDestination ), |
.Q(oRAMWriteAddress) |
); |
|
//Data forwarding |
assign oLastDestination = oRAMWriteAddress; |
|
reg [7:0] CurrentState; |
reg [7:0] NextState; |
|
|
//------------------------------------------------ |
always @(posedge Clock or posedge Reset) |
begin |
|
|
|
if (Reset) |
CurrentState <= `EXEU_AFTER_RESET; |
else |
CurrentState <= NextState; |
|
end |
//------------------------------------------------ |
|
|
always @( * ) |
begin |
case (CurrentState) |
//------------------------------------------ |
`EXEU_AFTER_RESET: |
begin |
//ReadyForNextInstruction <= 1; |
oBusy <= 0; |
rInputLatchesEnabled <= 1; |
|
|
NextState <= `EXEU_WAIT_FOR_DECODE; |
end |
//------------------------------------------ |
/** |
At the same time iDecodeDone goes to 1, our Flops |
will store the value, so next clock cycle we can |
tell IDU to go ahead and decode the next instruction |
in the pipeline. |
*/ |
`EXEU_WAIT_FOR_DECODE: |
begin |
|
|
//ReadyForNextInstruction <= 1; |
oBusy <= 0; |
rInputLatchesEnabled <= 1; |
|
|
if ( iDecodeDone ) //This same thing triggers the ALU |
NextState <= `EXEU_WAIT_FOR_ALU_EXECUTION; |
else |
NextState <= `EXEU_WAIT_FOR_DECODE; |
end |
//------------------------------------------ |
/* |
If the instruction is aritmetic then pass the parameters |
the ALU, else if it store iOperation then... |
*/ |
`EXEU_WAIT_FOR_ALU_EXECUTION: |
begin |
|
//ReadyForNextInstruction <= 0; //* |
oBusy <= 1; |
rInputLatchesEnabled <= 0; //NO INTERRUPTIONS WHILE WE WAIT!! |
|
|
|
if ( iALUOutputReady ) /////This same thing enables writing th results to RAM |
NextState <= `EXEU_WAIT_FOR_DECODE; |
else |
NextState <= `EXEU_WAIT_FOR_ALU_EXECUTION; |
end |
//------------------------------------------ |
`EXEU_WRITE_BACK_TO_RAM: |
begin |
|
//ReadyForNextInstruction <= 0; |
oBusy <= 1; |
rInputLatchesEnabled <= 1; |
|
if ( iDecodeDone ) |
NextState <= `EXEU_WAIT_FOR_ALU_EXECUTION; |
else |
NextState <= `EXEU_WAIT_FOR_DECODE; |
|
end |
|
//------------------------------------------ |
default: |
begin |
|
//ReadyForNextInstruction <= 1; |
oBusy <= 0; |
rInputLatchesEnabled <= 1; |
|
NextState <= `EXEU_AFTER_RESET; |
end |
//------------------------------------------ |
endcase |
end |
|
//----------------------------------------------------------------------- |
`ifdef DUMP_CODE |
integer ucode_file; |
integer reg_log; |
initial |
begin |
|
$display("Opening ucode dump file....\n"); |
ucode_file = $fopen("Code.log","w"); |
$fwrite(ucode_file,"\n\n************ Theia UCODE DUMP *******\n\n\n\n"); |
$display("Opening Register lof file...\n"); |
reg_log = $fopen("Registers.log","w"); |
|
end |
|
`endif //Ucode dump |
|
//----------------------------------------------------------------------- |
`ifdef DEBUG |
wire [`WIDTH-1:0] wALUChannelX1,wALUChannelY1,wALUChannelZ1; |
wire [`WIDTH-1:0] wALUChannelX2,wALUChannelY2,wALUChannelZ2; |
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceX1 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( wLatchNow ), |
.D( iSource1[95:64] ), |
.Q(wALUChannelX1) |
); |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceY1 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( wLatchNow ), |
.D( iSource1[63:32] ), |
.Q(wALUChannelY1) |
); |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceZ1 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( wLatchNow ), |
.D( iSource1[31:0] ), |
.Q(wALUChannelZ1) |
); |
|
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceX2 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( wLatchNow ), |
.D( iSource0[95:64] ), |
.Q(wALUChannelX2) |
); |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceY2 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( wLatchNow ), |
.D( iSource0[63:32] ), |
.Q(wALUChannelY2) |
); |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceZ2 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( wLatchNow ), |
.D( iSource0[31:0] ), |
.Q(wALUChannelZ2) |
); |
|
|
always @ (posedge iDecodeDone && iDebug_CoreID == `DEBUG_CORE) |
begin |
`LOGME"[CORE %d] IP:%d", iDebug_CoreID,iDebug_CurrentIP); |
end |
|
always @ (negedge Clock && iDebug_CoreID == `DEBUG_CORE) |
begin |
if ( iALUOutputReady ) |
begin |
|
|
if (iBranchTaken) |
`LOGME"<BT>"); |
|
if (iBranchNotTaken ) |
`LOGME"<BNT>"); |
|
if (oRAMWriteEnable) |
`LOGME"<WE>"); |
|
`LOGME "(%dns ",$time); |
case ( oALUOperation ) |
`RETURN: `LOGME"RETURN"); |
`ADD: `LOGME"ADD"); |
`SUB: `LOGME"SUB"); |
`DIV: `LOGME"DIV"); |
`MUL: `LOGME"MUL"); |
`MAG: `LOGME"MAG"); |
`JGX: `LOGME"JGX"); |
`JLX: `LOGME"JLX"); |
`JGEX: `LOGME"JGEX"); |
`JGEY: `LOGME"JGEY"); |
`JGEZ: `LOGME"JGEZ"); |
`JLEX: `LOGME"JLEX"); |
`JLEY: `LOGME"JLEY"); |
`JLEZ: `LOGME"JLEZ"); |
`JMP: `LOGME"JMP"); |
`ZERO: `LOGME"ZERO"); |
`JNEX: `LOGME"JNEX"); |
`JNEY: `LOGME"JNEY"); |
`JNEZ: `LOGME"JNEZ"); |
`JEQX: `LOGME"JEQX"); |
`JEQY: `LOGME"JEQY"); |
`JEQZ: `LOGME"JEQZ"); |
`CROSS: `LOGME"CROSS"); |
`DOT: `LOGME"DOT"); |
`SETX: `LOGME"SETX"); |
`SETY: `LOGME"SETY"); |
`SETZ: `LOGME"SETZ"); |
`NOP: `LOGME"NOP"); |
`COPY: `LOGME"COPY"); |
`INC: `LOGME"INC"); |
`DEC: `LOGME"DEC"); |
`MOD: `LOGME"MOD"); |
`FRAC: `LOGME"FRAC"); |
`NEG: `LOGME"NEG"); |
`SWIZZLE3D: `LOGME"SWIZZLE3D"); |
`MULP: `LOGME"MULP"); |
`XCHANGEX: `LOGME"XCHANGEX"); |
`IMUL: `LOGME"IMUL"); |
`UNSCALE: `LOGME"UNSCALE"); |
`INCX: `LOGME"INCX"); |
`INCY: `LOGME"INCY"); |
`INCZ: `LOGME"INCZ"); |
`DEBUG_PRINT: |
begin |
`LOGME"DEBUG_PRINT"); |
|
end |
default: |
begin |
`LOGME"**********ERROR UNKNOWN OP*********"); |
$display("%dns EXE: Error Unknown Instruction : %d", $time,oALUOperation); |
// $stop(); |
end |
endcase |
|
`LOGME"\t %h [ %h %h %h ][ %h %h %h ] = ", |
oRAMWriteAddress, |
wALUChannelX1,wALUChannelY1,wALUChannelZ1, |
wALUChannelX2,wALUChannelY2,wALUChannelZ2 |
|
); |
|
if (oALUOperation == `RETURN) |
`LOGME"\n\n\n"); |
|
end |
end //always |
|
always @ ( negedge Clock && iDebug_CoreID == `DEBUG_CORE ) |
begin |
if ( iALUOutputReady ) |
`LOGME" [ %h %h %h ])\n",iALUResultX,iALUResultY,iALUResultZ); |
end //always |
`endif |
|
endmodule |
/Unit_EXE.v
0,0 → 1,247
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
|
//--------------------------------------------------------------------- |
module ExecutionUnit |
( |
|
input wire Clock, |
input wire Reset, |
input wire [`ROM_ADDRESS_WIDTH-1:0] iInitialCodeAddress, |
input wire [`INSTRUCTION_WIDTH-1:0] iInstruction1, |
input wire [`INSTRUCTION_WIDTH-1:0] iInstruction2, |
|
|
input wire [`DATA_ROW_WIDTH-1:0] iDataRead0, |
input wire [`DATA_ROW_WIDTH-1:0] iDataRead1, |
input wire iTrigger, |
|
|
output wire [`ROM_ADDRESS_WIDTH-1:0] oInstructionPointer1, |
output wire [`ROM_ADDRESS_WIDTH-1:0] oInstructionPointer2, |
output wire [`DATA_ADDRESS_WIDTH-1:0] oDataReadAddress0, |
output wire [`DATA_ADDRESS_WIDTH-1:0] oDataReadAddress1, |
output wire oDataWriteEnable, |
output wire [`DATA_ADDRESS_WIDTH-1:0] oDataWriteAddress, |
output wire [`DATA_ROW_WIDTH-1:0] oDataBus, |
output wire oReturnCode, |
|
`ifdef DEBUG |
input wire [`MAX_CORES-1:0] iDebug_CoreID, |
`endif |
output wire oDone |
|
|
|
|
); |
|
|
`ifdef DEBUG |
wire [`ROM_ADDRESS_WIDTH-1:0] wDEBUG_IDU2_EXE_InstructionPointer; |
`endif |
|
wire wEXE2__uCodeDone; |
wire wEXE2_IFU__EXEBusy; |
wire [`DATA_ADDRESS_WIDTH-1:0] wEXE2_IDU_DataFordward_LastDestination; |
wire wALU2_EXE__BranchTaken; |
wire wALU2_IFU_BranchNotTaken; |
wire [`INSTRUCTION_WIDTH-1:0] CurrentInstruction; |
//wire wIDU2_IFU__IDUBusy; |
|
|
wire [`INSTRUCTION_OP_LENGTH-1:0] wOperation; |
|
|
wire [`DATA_ROW_WIDTH-1:0] wSource0,wSource1; |
wire [`DATA_ADDRESS_WIDTH-1:0] wDestination; |
wire wInstructionAvailable; |
|
//ALU wires |
wire [`INSTRUCTION_OP_LENGTH-1:0] ALU2Operation; |
wire [`WIDTH-1:0] ALU2ChannelA; |
wire [`WIDTH-1:0] ALU2ChannelB; |
wire [`WIDTH-1:0] ALU2ChannelC; |
wire [`WIDTH-1:0] ALU2ChannelD; |
wire [`WIDTH-1:0] ALU2ChannelE; |
wire [`WIDTH-1:0] ALU2ChannelF; |
wire [`WIDTH-1:0] ALU2ResultA; |
wire [`WIDTH-1:0] ALU2ResultB; |
wire [`WIDTH-1:0] ALU2ResultC; |
wire wEXE2_ALU__TriggerALU; |
wire ALU2OutputReady; |
wire w2FIU__BranchTaken; |
wire [`ROM_ADDRESS_WIDTH-1:0] JumpIp; |
|
|
//wire wIDU2_IFU__InputsLatched; |
|
wire wEPU_Busy,wTriggerIFU; |
wire [`ROM_ADDRESS_WIDTH-1:0] wEPU_IP,wIFU_IP,wCodeEntryPoint; |
|
assign oInstructionPointer1 = (wEPU_Busy) ? wEPU_IP : wIFU_IP; |
|
|
InstructionEntryPoint EPU |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.iTrigger( iTrigger ), |
.iInitialCodeAddress( iInitialCodeAddress ), |
.iIMemInput(iInstruction1), |
|
.oEPU_Busy(wEPU_Busy), |
.oEntryPoint( wCodeEntryPoint ), |
.oTriggerIFU( wTriggerIFU ), |
.oInstructionAddr( wEPU_IP ) |
|
); |
|
InstructionFetch IFU |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.iTrigger( wTriggerIFU ), |
.iInstruction1( iInstruction1 ), |
.iInstruction2( iInstruction2 ), |
.iInitialCodeAddress( wCodeEntryPoint ), |
.iBranchTaken( w2FIU__BranchTaken ), |
.oCurrentInstruction( CurrentInstruction ), |
.oInstructionAvalable( wInstructionAvailable ), |
.oIP( wIFU_IP ), |
.oIP2( oInstructionPointer2 ), |
.iEXEDone( ALU2OutputReady ), |
.oMicroCodeReturnValue( oReturnCode ), |
.oExecutionDone( oDone ) |
); |
|
////--------------------------------------------------------- |
wire wIDU2_EXE_DataReady; |
wire wEXE2_IDU_ExeLatchedValues; |
|
InstructionDecode IDU |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.iEncodedInstruction( CurrentInstruction ), |
.iInstructionAvailable( wInstructionAvailable ), |
|
.oRamAddress0( oDataReadAddress0 ), |
.oRamAddress1( oDataReadAddress1 ), |
.iRamValue0( iDataRead0 ), |
.iRamValue1( iDataRead1 ), |
|
.iLastDestination( wEXE2_IDU_DataFordward_LastDestination ), |
.iDataForward( {ALU2ResultA,ALU2ResultB,ALU2ResultC} ), |
|
//Outputs going to the ALU-FSM |
.oOperation( wOperation ), |
.oDestination( wDestination ), |
.oSource0( wSource0 ), |
.oSource1( wSource1 ), |
|
`ifdef DEBUG |
.iDebug_CurrentIP( oInstructionPointer1 ), |
.oDebug_CurrentIP( wDEBUG_IDU2_EXE_InstructionPointer ), |
`endif |
|
.oDataReadyForExe( wIDU2_EXE_DataReady ) |
|
|
|
|
|
); |
|
|
ExecutionFSM EXE |
( |
.Clock( Clock ), |
.Reset( Reset | iTrigger ), //New Sat Jun13 |
.iDecodeDone( wIDU2_EXE_DataReady ), |
.iOperation( wOperation ), |
.iDestination( wDestination ), |
.iSource0( wSource0 ), |
.iSource1( wSource1 ) , |
|
`ifdef DEBUG |
.iDebug_CurrentIP( wDEBUG_IDU2_EXE_InstructionPointer ), |
.iDebug_CoreID( iDebug_CoreID ), |
`endif |
|
//.iJumpResultFromALU( wALU2_EXE__BranchTaken ), |
.iBranchTaken( wALU2_EXE__BranchTaken ), |
.iBranchNotTaken( wALU2_IFU_BranchNotTaken ), |
.oJumpFlag( w2FIU__BranchTaken ), |
.oJumpIp( JumpIp ), |
.oRAMWriteEnable( oDataWriteEnable ), |
.oRAMWriteAddress( oDataWriteAddress ), |
.RAMBus( oDataBus ), |
.oBusy( wEXE2_IFU__EXEBusy ), |
|
.oExeLatchedValues( wEXE2_IDU_ExeLatchedValues ), |
.oLastDestination( wEXE2_IDU_DataFordward_LastDestination ), |
|
//ALU ports and control signals |
.oTriggerALU( wEXE2_ALU__TriggerALU ), |
.oALUOperation( ALU2Operation ), |
.oALUChannelX1( ALU2ChannelA ), |
.oALUChannelX2( ALU2ChannelB ), |
.oALUChannelY1( ALU2ChannelC ), |
.oALUChannelY2( ALU2ChannelD ), |
.oALUChannelZ1( ALU2ChannelE ), |
.oALUChannelZ2( ALU2ChannelF ), |
.iALUResultX( ALU2ResultA ), |
.iALUResultY( ALU2ResultB ), |
.iALUResultZ( ALU2ResultC ), |
.iALUOutputReady( ALU2OutputReady ) |
|
); |
|
|
//-------------------------------------------------------- |
|
VectorALU ALU |
( |
.Clock(Clock), |
.Reset(Reset), |
.iOperation( ALU2Operation ), |
.iChannel_Ax( ALU2ChannelA ), |
.iChannel_Bx( ALU2ChannelB ), |
.iChannel_Ay( ALU2ChannelC ), |
.iChannel_By( ALU2ChannelD ), |
.iChannel_Az( ALU2ChannelE ), |
.iChannel_Bz( ALU2ChannelF ), |
.oResultA( ALU2ResultA ), |
.oResultB( ALU2ResultB ), |
.oResultC( ALU2ResultC ), |
.oBranchTaken( wALU2_EXE__BranchTaken ), |
.oBranchNotTaken( wALU2_IFU_BranchNotTaken ), |
.iInputReady( wEXE2_ALU__TriggerALU ), |
.OutputReady( ALU2OutputReady ) |
|
); |
|
|
|
endmodule |
//--------------------------------------------------------------------- |
/Module_InstructionFetch.v
0,0 → 1,170
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
/********************************************************************************** |
Description: |
This is the instruction fetch unit. |
It gets the next instruction from the IMEM module at the MEM unit. |
It increments the instruction pointer (IP) in such a way that EXE has always |
one instruction per clock cycle (best pipeline performance). In order to achieve this, |
IFU has 2 instruction pointers, so that in case of 'branch' instructions, |
two instructions pointer are generated and two different instructions are simultaneously |
fetched from IMEM: the branch-taken and branch-not-taken instructions, so that once the |
branch outcome is calculted in EXE, both possible outcomes are already pre-fetched. |
**********************************************************************************/ |
module InstructionFetch |
( |
input wire Clock, |
input wire Reset, |
input wire iTrigger, |
input wire[`ROM_ADDRESS_WIDTH-1:0] iInitialCodeAddress, |
input wire[`INSTRUCTION_WIDTH-1:0] iInstruction1, //Branch not taken instruction |
input wire[`INSTRUCTION_WIDTH-1:0] iInstruction2, //Branch taken instruction |
input wire iBranchTaken, |
output wire oInstructionAvalable, |
output wire [`ROM_ADDRESS_WIDTH-1:0] oIP, |
output wire [`ROM_ADDRESS_WIDTH-1:0] oIP2, //calcule both decide later |
output wire[`INSTRUCTION_WIDTH-1:0] oCurrentInstruction, |
input wire iEXEDone, |
output wire oMicroCodeReturnValue, |
output wire oExecutionDone |
); |
`define INSTRUCTION_OPCODE oCurrentInstruction[`INSTRUCTION_WIDTH-1:`INSTRUCTION_WIDTH-`INSTRUCTION_OP_LENGTH] |
//iInstruction1[`INSTRUCTION_WIDTH-1:`INSTRUCTION_WIDTH-`INSTRUCTION_OP_LENGTH] |
|
assign oMicroCodeReturnValue = oCurrentInstruction[0]; |
assign oIP2 = oCurrentInstruction[47:32];//iInstruction1[47:32]; |
|
wire wTriggerDelay1,wTriggerDelay2,wIncrementIP_Delay1,wIncrementIP_Delay2, |
wLastInst_Delay1,wLastInst_Delay2; |
wire wIncrementIP,wLastInstruction; |
|
|
assign wLastInstruction = (`INSTRUCTION_OPCODE == `RETURN); |
|
//Increment IP 2 cycles after trigger or everytime EXE is done, but stop if we get to the RETURN |
assign wIncrementIP = wTriggerDelay2 | (iEXEDone & ~wLastInstruction); |
//It takes 1 clock cycle to read the instruction back from IMEM |
assign oInstructionAvalable = wTriggerDelay2 | (iEXEDone & ~wLastInst_Delay2); |
//Once we reach the last instruction, wait until EXE says he is done, then assert oExecutionDone |
assign oExecutionDone = (wLastInstruction & iEXEDone); |
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD2 |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable(1'b1), |
.D( iTrigger ), |
.Q( wTriggerDelay1 ) |
); |
|
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD3 |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable(1'b1), |
.D( wTriggerDelay1 ), |
.Q( wTriggerDelay2 ) |
); |
|
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD4 |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable(wLastInstruction), |
.D( oInstructionAvalable ), |
.Q( wLastInst_Delay1 ) |
); |
|
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD5 |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable(1'b1),//wLastInstruction), |
.D( wLastInst_Delay1 ), |
.Q( wLastInst_Delay2 ) |
); |
|
wire [`ROM_ADDRESS_WIDTH-1:0] oIP2_Next; |
|
/* |
In case the branch is taken: |
We point current instruction into the iInstruction2 (branch-taken) instruction |
that corresponds to oIP2. |
Then, in the next clock cycle we should use the oIP2 incremented by one, |
so we need to load UPCOUNTER_POSEDGE with oIP2+1 |
*/ |
|
|
//If the branch was taken, then use the pre-fetched instruction (iInstruction2) |
wire[`INSTRUCTION_WIDTH-1:0] wCurrentInstruction_Delay1,wCurrentInstruction_BranchTaken; |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `INSTRUCTION_WIDTH ) FFDX |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable(iBranchTaken), |
.D( oCurrentInstruction ), |
.Q( wCurrentInstruction_Delay1 ) |
); |
|
wire wBranchTaken_Delay1; |
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFDY |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable(1'b1), |
.D( iBranchTaken ), |
.Q( wBranchTaken_Delay1 ) |
); |
|
|
assign wCurrentInstruction_BranchTaken = (iBranchTaken ) ? iInstruction2 : iInstruction1; |
|
assign oCurrentInstruction = (wBranchTaken_Delay1) ? |
wCurrentInstruction_Delay1 : wCurrentInstruction_BranchTaken; |
|
INCREMENT # (`ROM_ADDRESS_WIDTH) INC1 |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.A( oIP2 ), |
.R( oIP2_Next ) |
); |
|
wire[`ROM_ADDRESS_WIDTH-1:0] wIPEntryPoint; |
assign wIPEntryPoint = (iBranchTaken) ? oIP2_Next : iInitialCodeAddress; |
|
UPCOUNTER_POSEDGE # (`ROM_ADDRESS_WIDTH) InstructionPointer |
( |
.Clock( Clock ), |
.Reset(iTrigger | iBranchTaken), |
.Enable(wIncrementIP & ~iBranchTaken ), |
.Initial( wIPEntryPoint ), |
.Q(oIP) |
); |
|
|
endmodule |
|
//------------------------------------------------------------------------------- |
/Module_VectorALU.v
0,0 → 1,1183
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
|
|
|
//-------------------------------------------------------------- |
module VectorALU |
( |
input wire Clock, |
input wire Reset, |
input wire[`INSTRUCTION_OP_LENGTH-1:0] iOperation, |
input wire[`WIDTH-1:0] iChannel_Ax, |
input wire[`WIDTH-1:0] iChannel_Bx, |
input wire[`WIDTH-1:0] iChannel_Ay, |
input wire[`WIDTH-1:0] iChannel_By, |
input wire[`WIDTH-1:0] iChannel_Az, |
input wire[`WIDTH-1:0] iChannel_Bz, |
output wire [`WIDTH-1:0] oResultA, |
output wire [`WIDTH-1:0] oResultB, |
output wire [`WIDTH-1:0] oResultC, |
input wire iInputReady, |
output reg oBranchTaken, |
output reg oBranchNotTaken, |
output reg OutputReady |
|
); |
|
wire wMultiplcationUnscaled; |
assign wMultiplcationUnscaled = (iOperation == `IMUL) ? 1'b1 : 1'b0; |
|
//-------------------------------------------------------------- |
|
reg [7:0] InputReadyA,InputReadyB,InputReadyC; |
|
//------------------------------------------------------ |
/* |
This is the block that takes care of all tha arithmetic |
comparisons. Supported operations are <,>,<=,>=,==,!= |
|
*/ |
//------------------------------------------------------ |
reg [`WIDTH-1:0] wMultiplicationA_Ax; |
reg [`WIDTH-1:0] wMultiplicationA_Bx; |
wire [`LONG_WIDTH-1:0] wMultiplicationA_Result; |
wire wMultiplicationA_InputReady; |
wire wMultiplicationA_OutputReady; |
wire wMultiplicationOutputReady, wMultiplicationOutputReadyA, |
wMultiplicationOutputReadyB,wMultiplicationOutputReadyC,wMultiplicationOutputReadyD; |
|
wire wAddSubAOutputReady,wAddSubBOutputReady,wAddSubCOutputReady; |
|
//-------------------------------------------------------------------- |
reg [`WIDTH-1:0] ResultA,ResultB,ResultC; |
|
//Output Flip Flops, |
//This flip flop will control the outputs so that the |
//values of the outputs change ONLY when when there is |
//a positive edge of OutputReady |
|
FFD32_POSEDGE ResultAFFD |
( |
.Clock( OutputReady ), |
.D( ResultA ), |
.Q( oResultA ) |
); |
|
FFD32_POSEDGE ResultBFFD |
( |
.Clock( OutputReady ), |
.D( ResultB ), |
.Q( oResultB ) |
); |
|
FFD32_POSEDGE ResultCFFD |
( |
.Clock( OutputReady ), |
.D( ResultC ), |
.Q( oResultC ) |
); |
//-------------------------------------------------------------------- |
wire [`WIDTH-1:0] wSwizzleOutputX,wSwizzleOutputY,wSwizzleOutputZ; |
|
|
Swizzle3D Swizzle1 |
( |
.Source0_X( iChannel_Bx ), |
.Source0_Y( iChannel_By ), |
.Source0_Z( iChannel_Bz ), |
.iOperation( iChannel_Ax ), |
|
.SwizzleX( wSwizzleOutputX ), |
.SwizzleY( wSwizzleOutputY ), |
.SwizzleZ( wSwizzleOutputZ ) |
); |
//--------------------------------------------------------------------- |
wire [`LONG_WIDTH-1:0] wModulus2N_ResultA,wModulus2N_ResultB,wModulus2N_ResultC; |
//wire wModulusOutputReadyA,wModulusOutputReadyB,wModulusOutputReadyC; |
|
/* |
Modulus2N MODA |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.oQuotient( wModulus2N_ResultA ), |
.iInputReady( iInputReady ), |
.oOutputReady( wModulusOutputReadyA ) |
); |
|
Modulus2N MODB |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.oQuotient( wModulus2N_ResultB ), |
.iInputReady( iInputReady ), |
.oOutputReady( wModulusOutputReadyB ) |
); |
|
Modulus2N MODC |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.oQuotient( wModulus2N_ResultC ), |
.iInputReady( iInputReady ), |
.oOutputReady( wModulusOutputReadyC ) |
); |
*/ |
//---------------------------------------------------------------------( |
|
|
|
|
/* |
This MUX will select the apropiated X,Y or Z depending on |
wheter it is XYZ iOperation. This gets defined by the bits 3 and 4 |
of iOperation, and only applies for oBranchTaken and Store operations. |
*/ |
|
wire wArithmeticComparison_Result; |
wire ArithmeticComparison_InputReady; |
wire ArithmeticComparison_OutputReady; |
reg[`WIDTH-1:0] ArithmeticComparison_A,ArithmeticComparison_B; |
|
|
always @ ( * ) |
begin |
case ( {iOperation[4],iOperation[3]} ) |
2'b01: ArithmeticComparison_A = iChannel_Ax; |
2'b10: ArithmeticComparison_A = iChannel_Ay; |
2'b11: ArithmeticComparison_A = iChannel_Az; |
default: ArithmeticComparison_A = 0; //Should never happen |
endcase |
end |
//--------------------------------------------------------------------- |
always @ ( * ) |
begin |
case ( {iOperation[4],iOperation[3]} ) |
2'b01: ArithmeticComparison_B = iChannel_Bx; |
2'b10: ArithmeticComparison_B = iChannel_By; |
2'b11: ArithmeticComparison_B = iChannel_Bz; |
default: ArithmeticComparison_B = 0; //Should never happen |
endcase |
end |
|
//--------------------------------------------------------------------- |
/* |
The onbly instance of Aritmetic comparison in the ALU, |
ArithmeticComparison operations matches the 3 LSB of |
Global ALU iOperation for oBranchTaken Instruction family |
*/ |
|
assign ArithmeticComparison_InputReady = iInputReady; |
|
wire wArithmeticComparisonResult; |
|
ArithmeticComparison ArithmeticComparison_1 |
( |
.Clock( Clock ), |
.X( ArithmeticComparison_A ), |
.Y( ArithmeticComparison_B ), |
.iOperation( iOperation[2:0] ), |
.iInputReady( ArithmeticComparison_InputReady ), |
.OutputReady( ArithmeticComparison_OutputReady ), |
.Result( wArithmeticComparisonResult ) |
); |
|
|
assign wArithmeticComparison_Result = wArithmeticComparisonResult && OutputReady; |
//-------------------------------------------------------------------- |
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_A |
( |
|
.Clock( Clock ), |
.Reset( Reset ), |
.A( wMultiplicationA_Ax ), |
.B( wMultiplicationA_Bx ), |
.R( wMultiplicationA_Result ), |
.iUnscaled( wMultiplcationUnscaled ), |
.iInputReady( wMultiplicationA_InputReady ), |
.OutputReady( wMultiplicationA_OutputReady ) |
); |
|
//-------------------------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wMultiplicationA_Ax = iChannel_Ay; // Ay * Bz |
`MAG: wMultiplicationA_Ax = iChannel_Ax; |
`MULP: wMultiplicationA_Ax = iChannel_Ax; //Az = Ax * Ay |
default: wMultiplicationA_Ax = iChannel_Ax; // Ax * Bx |
endcase |
end |
//-------------------------------------------------------------------- |
|
//assign wMultiplicationA_Ax = iChannel_Ax; |
|
assign wMultiplicationA_InputReady |
= (iOperation == `CROSS || |
iOperation == `DOT || |
iOperation == `MUL || |
iOperation == `IMUL || |
iOperation == `MAG || |
iOperation == `MULP |
) ? iInputReady : 0; |
|
//-------------------------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`MUL,`IMUL: wMultiplicationA_Bx = iChannel_Bx; //Ax*Bx |
`MAG: wMultiplicationA_Bx = iChannel_Ax; //Ax^2 |
`DOT: wMultiplicationA_Bx = iChannel_Bx; //Ax*Bx |
`CROSS: wMultiplicationA_Bx = iChannel_Bz; // Ay * Bz |
`MULP: wMultiplicationA_Bx = iChannel_Ay; //Az = Ax * Ay |
default: wMultiplicationA_Bx = 32'b0; |
endcase |
end |
//-------------------------------------------------------------------- |
|
//------------------------------------------------------ |
|
reg [`WIDTH-1:0] wMultiplicationB_Ay; |
reg [`WIDTH-1:0] wMultiplicationB_By; |
wire [`LONG_WIDTH-1:0] wMultiplicationB_Result; |
wire wMultiplicationB_InputReady; |
wire wMultiplicationB_OutputReady; |
|
|
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_B |
( |
|
.Clock( Clock ), |
.Reset( Reset ), |
.A( wMultiplicationB_Ay ), |
.B( wMultiplicationB_By ), |
.R( wMultiplicationB_Result ), |
.iUnscaled( wMultiplcationUnscaled ), |
.iInputReady( wMultiplicationB_InputReady ), |
.OutputReady( wMultiplicationB_OutputReady ) |
); |
|
|
//---------------------------------------------------- |
|
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wMultiplicationB_Ay = iChannel_Az; // Az * By |
`MAG: wMultiplicationB_Ay = iChannel_Ay; |
default: wMultiplicationB_Ay = iChannel_Ay; // Ay * By |
endcase |
end |
//---------------------------------------------------- |
assign wMultiplicationB_InputReady |
= (iOperation == `CROSS || |
iOperation == `DOT || |
iOperation == `MUL || |
iOperation == `IMUL || |
iOperation == `MAG ) ? iInputReady : 0; |
|
//---------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`MUL,`IMUL: wMultiplicationB_By = iChannel_By; //Ay*By |
`MAG: wMultiplicationB_By = iChannel_Ay; //Ay^2 |
`DOT: wMultiplicationB_By = iChannel_By; //Ay*By |
`CROSS: wMultiplicationB_By = iChannel_By; // Az * By |
default: wMultiplicationB_By = 32'b0; |
endcase |
end |
//---------------------------------------------------- |
|
//------------------------------------------------------ |
reg [`WIDTH-1:0] wMultiplicationC_Az; |
reg [`WIDTH-1:0] wMultiplicationC_Bz; |
wire [`LONG_WIDTH-1:0] wMultiplicationC_Result; |
wire wMultiplicationC_InputReady; |
wire wMultiplicationC_OutputReady; |
|
|
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_C |
( |
|
.Clock( Clock ), |
.Reset( Reset ), |
.A( wMultiplicationC_Az ), |
.B( wMultiplicationC_Bz ), |
.R( wMultiplicationC_Result ), |
.iUnscaled( wMultiplcationUnscaled ), |
.iInputReady( wMultiplicationC_InputReady ), |
.OutputReady( wMultiplicationC_OutputReady ) |
); |
|
|
//---------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wMultiplicationC_Az = iChannel_Az; //Az*Bx |
`MAG: wMultiplicationC_Az = iChannel_Az; |
default: wMultiplicationC_Az = iChannel_Az; //Az*Bz |
endcase |
end |
//---------------------------------------------------- |
|
assign wMultiplicationC_InputReady |
= ( |
iOperation == `CROSS || |
iOperation == `DOT || |
iOperation == `MUL || |
iOperation == `IMUL || |
iOperation == `MAG |
) ? iInputReady : 0; |
|
//---------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`MUL,`IMUL: wMultiplicationC_Bz = iChannel_Bz; //Az*Bz |
`MAG: wMultiplicationC_Bz = iChannel_Az; //Ay^2 |
`DOT: wMultiplicationC_Bz = iChannel_Bz; //Az*Bz |
`CROSS: wMultiplicationC_Bz = iChannel_Bx; //Az*Bx |
default: wMultiplicationC_Bz = 32'b0; |
endcase |
end |
//---------------------------------------------------- |
|
reg [`WIDTH-1:0] wMultiplicationD_Aw; |
reg [`WIDTH-1:0] wMultiplicationD_Bw; |
wire [`LONG_WIDTH-1:0] wMultiplicationD_Result; |
wire wMultiplicationD_InputReady; |
wire wMultiplicationD_OutputReady; |
|
|
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_D |
( |
|
.Clock( Clock ), |
.Reset( Reset ), |
.A( wMultiplicationD_Aw ), |
.B( wMultiplicationD_Bw ), |
.R( wMultiplicationD_Result ), |
.iUnscaled( wMultiplcationUnscaled ), |
.iInputReady( wMultiplicationD_InputReady ), |
.OutputReady( wMultiplicationD_OutputReady ) |
); |
|
assign wMultiplicationD_InputReady |
= (iOperation == `CROSS ) ? iInputReady : 0; |
|
|
//---------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wMultiplicationD_Aw = iChannel_Ax; //Ax*Bz |
default: wMultiplicationD_Aw = 32'b0; |
endcase |
end |
//---------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wMultiplicationD_Bw = iChannel_Bz; //Ax*Bz |
default: wMultiplicationD_Bw = 32'b0; |
endcase |
end |
//---------------------------------------------------- |
reg [`WIDTH-1:0] wMultiplicationE_Ak; |
reg [`WIDTH-1:0] wMultiplicationE_Bk; |
wire [`LONG_WIDTH-1:0] wMultiplicationE_Result; |
wire wMultiplicationE_InputReady; |
wire wMultiplicationE_OutputReady; |
|
|
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_E |
( |
|
.Clock( Clock ), |
.Reset( Reset ), |
.A( wMultiplicationE_Ak ), |
.B( wMultiplicationE_Bk ), |
.R( wMultiplicationE_Result ), |
.iUnscaled( wMultiplcationUnscaled ), |
.iInputReady( wMultiplicationE_InputReady ), |
.OutputReady( wMultiplicationE_OutputReady ) |
); |
|
assign wMultiplicationE_InputReady |
= (iOperation == `CROSS ) ? iInputReady : 0; |
|
|
//---------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wMultiplicationE_Ak = iChannel_Ax; //Ax*By |
default: wMultiplicationE_Ak = 32'b0; |
endcase |
end |
//---------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wMultiplicationE_Bk = iChannel_By; //Ax*By |
default: wMultiplicationE_Bk = 32'b0; |
endcase |
end |
|
//---------------------------------------------------- |
reg [`WIDTH-1:0] wMultiplicationF_Al; |
reg [`WIDTH-1:0] wMultiplicationF_Bl; |
wire [`LONG_WIDTH-1:0] wMultiplicationF_Result; |
wire wMultiplicationF_InputReady; |
wire wMultiplicationF_OutputReady; |
|
|
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_F |
( |
|
.Clock( Clock ), |
.Reset( Reset ), |
.A( wMultiplicationF_Al ), |
.B( wMultiplicationF_Bl ), |
.R( wMultiplicationF_Result ), |
.iUnscaled( wMultiplcationUnscaled ), |
.iInputReady( wMultiplicationF_InputReady ), |
.OutputReady( wMultiplicationF_OutputReady ) |
); |
assign wMultiplicationF_InputReady |
= (iOperation == `CROSS ) ? iInputReady : 0; |
|
|
//---------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wMultiplicationF_Al = iChannel_Ay; //Ay*Bx |
default: wMultiplicationF_Al = 32'b0; |
endcase |
end |
//---------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wMultiplicationF_Bl = iChannel_Bx; //Ay*Bx |
default: wMultiplicationF_Bl = 32'b0; |
endcase |
end |
//------------------------------------------------------ |
wire [`WIDTH-1:0] wDivisionA_Result; |
wire wDivisionA_OutputReady; |
wire wDivisionA_InputReady; |
|
assign wDivisionA_InputReady = |
( iOperation == `DIV) ? iInputReady : 0; |
|
SignedIntegerDivision DivisionChannel_A |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.iDividend( iChannel_Ax ), |
.iDivisor( iChannel_Bx ), |
.xQuotient( wDivisionA_Result ), |
.iInputReady( wDivisionA_InputReady ), |
.OutputReady( wDivisionA_OutputReady ) |
|
); |
//------------------------------------------------------ |
wire [`WIDTH-1:0] wDivisionB_Result; |
wire wDivisionB_OutputReady; |
wire wDivisionB_InputReady; |
|
assign wDivisionB_InputReady = |
( iOperation == `DIV) ? iInputReady : 0; |
|
SignedIntegerDivision DivisionChannel_B |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.iDividend( iChannel_Ay ), |
.iDivisor( iChannel_By ), |
.xQuotient( wDivisionB_Result ), |
.iInputReady( wDivisionB_InputReady ), |
.OutputReady( wDivisionB_OutputReady ) |
|
); |
//------------------------------------------------------ |
wire [`WIDTH-1:0] wDivisionC_Result; |
wire wDivisionC_OutputReady; |
wire wDivisionC_InputReady; |
|
|
assign wDivisionC_InputReady = |
( iOperation == `DIV) ? iInputReady : 0; |
|
SignedIntegerDivision DivisionChannel_C |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.iDividend( iChannel_Az ), |
.iDivisor( iChannel_Bz ), |
.xQuotient( wDivisionC_Result ), |
.iInputReady( wDivisionC_InputReady ), |
.OutputReady( wDivisionC_OutputReady ) |
|
); |
//-------------------------------------------------------------- |
/* |
First addtion block instance goes here. |
Note that all inputs/outputs to the block |
are wires. It has two MUXES one for each entry. |
*/ |
reg [`LONG_WIDTH-1:0] wAddSubA_Ax,wAddSubA_Bx; |
wire [`LONG_WIDTH-1:0] wAddSubA_Result; |
wire wAddSubA_Operation; //Either addition or substraction |
reg wAddSubA_InputReady; |
wire wAddSubA_OutputReady; |
|
assign wAddSubA_Operation |
= ( |
iOperation == `SUB |
|| iOperation == `CROSS |
|| iOperation == `DEC |
|| iOperation == `MOD |
) ? 1 : 0; |
|
FixedAddSub AddSubChannel_A |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.A( wAddSubA_Ax ), |
.B( wAddSubA_Bx ), |
.R( wAddSubA_Result ), |
.iOperation( wAddSubA_Operation ), |
.iInputReady( wAddSubA_InputReady ), |
.OutputReady( wAddSubA_OutputReady ) |
); |
//Diego |
|
|
//---------------------------- |
|
//InpuReady Mux A |
always @ ( * ) |
begin |
case (iOperation) |
`ADD: wAddSubA_InputReady = iInputReady; |
`SUB: wAddSubA_InputReady = iInputReady; |
`INC,`INCX,`INCY,`INCZ: wAddSubA_InputReady = iInputReady; |
`DEC: wAddSubA_InputReady = iInputReady; |
`MOD: wAddSubA_InputReady = iInputReady; |
|
`MAG: wAddSubA_InputReady = wMultiplicationOutputReadyA && |
wMultiplicationOutputReadyB; |
//wMultiplicationA_OutputReady |
//&& wMultiplicationB_OutputReady; |
|
`DOT: wAddSubA_InputReady = |
wMultiplicationOutputReadyA && |
wMultiplicationOutputReadyB; |
//wMultiplicationA_OutputReady |
//&& wMultiplicationB_OutputReady; |
|
`CROSS: wAddSubA_InputReady = |
wMultiplicationOutputReadyA && |
wMultiplicationOutputReadyB; |
// wMultiplicationA_OutputReady |
//&& wMultiplicationB_OutputReady; |
|
default: wAddSubA_InputReady = 1'b0; |
endcase |
end |
//---------------------------- |
|
//wAddSubA_Bx 2:1 input Mux |
always @ ( * ) |
begin |
case (iOperation) |
|
`ADD: wAddSubA_Ax = ( iChannel_Ax[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ax } : { 32'b0, iChannel_Ax }; |
`SUB: wAddSubA_Ax = ( iChannel_Ax[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ax } : { 32'b0, iChannel_Ax }; |
`INC,`INCX,`INCY,`INCZ: wAddSubA_Ax = ( iChannel_Ax[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ax } : { 32'b0, iChannel_Ax }; |
`DEC: wAddSubA_Ax = ( iChannel_Ax[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ax } : { 32'b0, iChannel_Ax }; |
`MOD: wAddSubA_Ax = ( iChannel_Bx[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Bx } : { 32'b0, iChannel_Bx }; |
|
`MAG: wAddSubA_Ax = wMultiplicationA_Result; |
`DOT: wAddSubA_Ax = wMultiplicationA_Result; |
`CROSS: wAddSubA_Ax = wMultiplicationA_Result; |
default: wAddSubA_Ax = 64'b0; |
endcase |
end |
//---------------------------- |
//wAddSubA_Bx 2:1 input Mux |
always @ ( * ) |
begin |
case (iOperation) |
`ADD: wAddSubA_Bx = ( iChannel_Bx[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Bx } : { 32'b0, iChannel_Bx }; |
`SUB: wAddSubA_Bx = ( iChannel_Bx[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Bx } : { 32'b0, iChannel_Bx }; |
`INC,`INCX: wAddSubA_Bx = (`LONG_WIDTH'd1 << `SCALE); |
`INCY,`INCZ: wAddSubA_Bx = `LONG_WIDTH'd0; |
`DEC: wAddSubA_Bx = (`LONG_WIDTH'd1 << `SCALE); |
`MOD: wAddSubA_Bx = (`LONG_WIDTH'd1 << `SCALE); |
|
`MAG: wAddSubA_Bx = wMultiplicationB_Result; |
`DOT: wAddSubA_Bx = wMultiplicationB_Result; |
`CROSS: wAddSubA_Bx = wMultiplicationB_Result; |
default: wAddSubA_Bx = 64'b0; |
endcase |
end |
//-------------------------------------------------------------- |
/* |
Second addtion block instance goes here. |
Note that all inputs/outputs to the block |
are wires. It has two MUXES one for each entry. |
*/ |
|
wire [`LONG_WIDTH-1:0] wAddSubB_Result; |
|
|
wire wAddSubB_Operation; //Either addition or substraction |
reg wAddSubB_InputReady; |
wire wAddSubB_OutputReady; |
|
reg [`LONG_WIDTH-1:0] wAddSubB_Ay,wAddSubB_By; |
|
assign wAddSubB_Operation = |
( iOperation == `SUB |
|| iOperation == `CROSS |
|| iOperation == `DEC |
|| iOperation == `MOD |
) ? 1 : 0; |
|
FixedAddSub AddSubChannel_B |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.A( wAddSubB_Ay ), |
.B( wAddSubB_By ), |
.R( wAddSubB_Result ), |
.iOperation( wAddSubB_Operation ), |
.iInputReady( wAddSubB_InputReady ), |
.OutputReady( wAddSubB_OutputReady ) |
); |
//---------------------------- |
wire wMultiplicationOutputReadyC_Dealy1; |
FFD_POSEDGE_ASYNC_RESET # (1) FFwMultiplicationOutputReadyC_Dealy1 |
( |
.Clock( Clock ), |
.Clear( Reset ), |
.D( wMultiplicationOutputReadyC ), |
.Q( wMultiplicationOutputReadyC_Dealy1 ) |
); |
|
|
|
|
|
//InpuReady Mux B |
always @ ( * ) |
begin |
case (iOperation) |
`ADD: wAddSubB_InputReady = iInputReady; |
`SUB: wAddSubB_InputReady = iInputReady; |
`INC,`INCX,`INCY,`INCZ: wAddSubB_InputReady = iInputReady; |
`DEC: wAddSubB_InputReady = iInputReady; |
`MOD: wAddSubB_InputReady = iInputReady; |
|
`MAG: wAddSubB_InputReady = wAddSubAOutputReady |
&& wMultiplicationOutputReadyC_Dealy1; |
//&& wMultiplicationC_OutputReady; |
|
`DOT: wAddSubB_InputReady = wAddSubAOutputReady |
&& wMultiplicationOutputReadyC_Dealy1; |
//&& wMultiplicationC_OutputReady; |
|
`CROSS: wAddSubB_InputReady = wMultiplicationOutputReadyC && |
wMultiplicationOutputReadyD; |
// wMultiplicationC_OutputReady |
//&& wMultiplicationD_OutputReady; |
|
default: wAddSubB_InputReady = 1'b0; |
|
endcase |
end |
//---------------------------- |
// wAddSubB_Ay 2:1 input Mux |
// If the iOperation is ADD or SUB, it will simply take the inputs from |
// ALU Channels. If it is a VECTOR_MAGNITUDE, it take the input from the |
// previus ADDER_A, same for dot product. |
always @ ( * ) |
begin |
case (iOperation) |
`ADD: wAddSubB_Ay = (iChannel_Ay[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ay} : {32'b0,iChannel_Ay}; //Ay |
`SUB: wAddSubB_Ay = (iChannel_Ay[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ay} : {32'b0,iChannel_Ay}; //Ay |
`INC,`INCX,`INCY,`INCZ: wAddSubB_Ay = (iChannel_Ay[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ay} : {32'b0,iChannel_Ay}; //Ay |
`DEC: wAddSubB_Ay = (iChannel_Ay[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ay} : {32'b0,iChannel_Ay}; //Ay |
`MOD: wAddSubB_Ay = (iChannel_By[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_By} : {32'b0,iChannel_By}; //Ay |
`MAG: wAddSubB_Ay = wAddSubA_Result; //A^2+B^2 |
`DOT: wAddSubB_Ay = wAddSubA_Result; //Ax*Bx + Ay*By |
`CROSS: wAddSubB_Ay = wMultiplicationC_Result; |
default: wAddSubB_Ay = 64'b0; |
endcase |
end |
//---------------------------- |
//wAddSubB_By 2:1 input Mux |
always @ ( * ) |
begin |
case (iOperation) |
`ADD: wAddSubB_By = (iChannel_By[31] == 1'b1) ? {32'hFFFFFFFF,iChannel_By } : {32'b0,iChannel_By}; //By |
`SUB: wAddSubB_By = (iChannel_By[31] == 1'b1) ? {32'hFFFFFFFF,iChannel_By } : {32'b0,iChannel_By}; //{32'b0,iChannel_By}; //By |
`INC,`INCY: wAddSubB_By = (`LONG_WIDTH'd1 << `SCALE); |
`INCX,`INCZ: wAddSubB_By = `LONG_WIDTH'd0; |
`DEC: wAddSubB_By = (`LONG_WIDTH'd1 << `SCALE); |
`MOD: wAddSubB_By = (`LONG_WIDTH'd1 << `SCALE); |
`MAG: wAddSubB_By = wMultiplicationC_Result; //C^2 |
`DOT: wAddSubB_By = wMultiplicationC_Result; //Az * Bz |
`CROSS: wAddSubB_By = wMultiplicationD_Result; |
default: wAddSubB_By = 32'b0; |
endcase |
end |
//-------------------------------------------------------------- |
wire [`LONG_WIDTH-1:0] wAddSubC_Result; |
reg [`LONG_WIDTH-1:0] wAddSubC_Az,wAddSubC_Bz; |
|
wire wAddSubC_Operation; //Either addition or substraction |
reg wAddSubC_InputReady; |
wire wAddSubC_OutputReady; |
|
reg [`LONG_WIDTH-1:0] AddSubC_Az,AddSubB_Bz; |
|
//----------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wAddSubC_Az = wMultiplicationE_Result; |
`MOD: wAddSubC_Az = (iChannel_Bz[31] == 1'b1) ? {32'hFFFFFFFF,iChannel_Bz} : {32'b0,iChannel_Bz}; |
default: wAddSubC_Az = (iChannel_Az[31] == 1'b1) ? {32'hFFFFFFFF,iChannel_Az} : {32'b0,iChannel_Az}; |
endcase |
end |
//----------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wAddSubC_Bz = wMultiplicationF_Result; |
`INC,`INCZ: wAddSubC_Bz = (`LONG_WIDTH'd1 << `SCALE); |
`INCX,`INCY: wAddSubC_Bz = `LONG_WIDTH'd0; |
`DEC: wAddSubC_Bz = (`LONG_WIDTH'd1 << `SCALE); |
`MOD: wAddSubC_Bz = (`LONG_WIDTH'd1 << `SCALE); |
default: wAddSubC_Bz = (iChannel_Bz[31] == 1'b1) ? {32'hFFFFFFFF,iChannel_Bz} : {32'b0,iChannel_Bz}; |
endcase |
end |
//----------------------------------------- |
|
assign wAddSubC_Operation |
= ( |
iOperation == `SUB |
|| iOperation == `CROSS |
|| iOperation == `DEC |
|| iOperation == `MOD |
) ? 1 : 0; |
|
FixedAddSub AddSubChannel_C |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.A( wAddSubC_Az ), |
.B( wAddSubC_Bz ), |
.R( wAddSubC_Result ), |
.iOperation( wAddSubC_Operation ), |
.iInputReady( wAddSubC_InputReady ), |
.OutputReady( wAddSubC_OutputReady ) |
); |
|
|
always @ ( * ) |
begin |
case (iOperation) |
`CROSS: wAddSubC_InputReady = wMultiplicationE_OutputReady && |
wMultiplicationF_OutputReady; |
|
default: wAddSubC_InputReady = iInputReady; |
endcase |
end |
|
//------------------------------------------------------ |
wire [`WIDTH-1:0] wSquareRoot_Result; |
wire wSquareRoot_OutputReady; |
|
|
FixedPointSquareRoot SQROOT1 |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Operand( wAddSubB_Result ), |
.iInputReady( wAddSubBOutputReady && iOperation == `MAG), |
.OutputReady( wSquareRoot_OutputReady ), |
.Result( wSquareRoot_Result ) |
); |
//------------------------------------------------------ |
|
assign wModulus2N_ResultA = (iChannel_Ax & wAddSubA_Result ); |
assign wModulus2N_ResultB = (iChannel_Ay & wAddSubB_Result ); |
assign wModulus2N_ResultC = (iChannel_Az & wAddSubC_Result ); |
|
|
|
|
|
|
//&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&// |
//****Mux for ResultA*** |
// Notice that the Dot Product or the Magnitud Result will |
// output in ResultA. |
|
always @ ( * ) |
begin |
case ( iOperation ) |
`RETURN: ResultA = iChannel_Ax; |
`ADD: ResultA = (wAddSubA_Result[63] == 1'b1) ? { 1'b1,wAddSubA_Result[30:0]} : {1'b0,wAddSubA_Result[30:0]};// & 32'h7FFFFFFF; |
`SUB: ResultA = (wAddSubA_Result[63] == 1'b1) ? { 1'b1,wAddSubA_Result[30:0]} : {1'b0,wAddSubA_Result[30:0]};//wAddSubA_Result[31:0]; |
`CROSS: ResultA = (wAddSubA_Result[63] == 1'b1) ? { 1'b1,wAddSubA_Result[30:0]} : {1'b0,wAddSubA_Result[30:0]};//wAddSubA_Result[31:0]; |
`DIV: ResultA = wDivisionA_Result; |
`MUL: ResultA = wMultiplicationA_Result[31:0]; |
`IMUL: ResultA = wMultiplicationA_Result[31:0]; |
`DOT: ResultA = (wAddSubB_Result[63] == 1'b1) ? { 1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]};//wAddSubB_Result[31:0]; |
`MAG: ResultA = wSquareRoot_Result; |
`ZERO: ResultA = 32'b0; |
`COPY: ResultA = iChannel_Ax; |
|
`SWIZZLE3D: ResultA = wSwizzleOutputX; |
|
//Set Operations |
`UNSCALE: ResultA = iChannel_Ax >> `SCALE; |
`SETX: ResultA = iChannel_Ax; |
`SETY: ResultA = iChannel_Bx; |
`SETZ: ResultA = iChannel_Bx; |
`INC,`INCX,`INCY,`INCZ: ResultA = (wAddSubA_Result[63] == 1'b1) ? { 1'b1,wAddSubA_Result[30:0]} : {1'b0,wAddSubA_Result[30:0]}; |
`DEC: ResultA = (wAddSubA_Result[63] == 1'b1) ? { 1'b1,wAddSubA_Result[30:0]} : {1'b0,wAddSubA_Result[30:0]}; |
`MOD: ResultA = wModulus2N_ResultA; |
`FRAC: ResultA = iChannel_Ax & (`WIDTH'hFFFFFFFF >> (`WIDTH - `SCALE)); |
`MULP: ResultA = iChannel_Ax; |
`NEG: ResultA = ~iChannel_Ax + 1'b1; |
`XCHANGEX: ResultA = iChannel_Bx; |
|
default: |
begin |
`ifdef DEBUG |
// $display("%dns ALU: Error Unknown Operation: %d",$time,iOperation); |
// $stop(); |
`endif |
ResultA = 32'b0; |
end |
endcase |
end |
//------------------------------------------------------ |
//****Mux for RB*** |
always @ ( * ) |
begin |
case ( iOperation ) |
`RETURN: ResultB = iChannel_Ax; |
`ADD: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]}; // & 32'h7FFFFFFF; |
`SUB: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]}; //wAddSubB_Result[31:0]; |
`CROSS: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]};//wAddSubB_Result[31:0]; |
`DIV: ResultB = wDivisionB_Result; |
`MUL: ResultB = wMultiplicationB_Result[31:0]; |
`IMUL: ResultB = wMultiplicationB_Result[31:0]; |
`DOT: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]};//wAddSubB_Result[31:0]; |
`MAG: ResultB = wSquareRoot_Result; |
`ZERO: ResultB = 32'b0; |
`COPY: ResultB = iChannel_Ay; |
|
//Set Operations |
`UNSCALE: ResultB = iChannel_Ay >> `SCALE; |
`SETX: ResultB = iChannel_By; // {Source1[95:64],Source0[63:32],Source0[31:0]}; |
`SETY: ResultB = iChannel_Ax; // {Source0[95:64],Source1[95:64],Source0[31:0]}; |
`SETZ: ResultB = iChannel_By; // {Source0[95:64],Source0[63:32],Source1[95:64]}; |
|
`SWIZZLE3D: ResultB = wSwizzleOutputY; |
|
`INC,`INCX,`INCY,`INCZ: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]}; // & 32'h7FFFFFFF; |
`DEC: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]}; // & 32'h7FFFFFFF; |
`MOD: ResultB = wModulus2N_ResultB; |
`FRAC: ResultB = iChannel_Ay & (`WIDTH'hFFFFFFFF >> (`WIDTH - `SCALE)); |
`MULP: ResultB = iChannel_Ay; |
`NEG: ResultB = ~iChannel_Ay + 1'b1; |
`XCHANGEX: ResultB = iChannel_Ay; |
|
default: |
begin |
`ifdef DEBUG |
//$display("%dns ALU: Error Unknown Operation: %d",$time,iOperation); |
//$stop(); |
`endif |
ResultB = 32'b0; |
end |
endcase |
end |
//------------------------------------------------------ |
//****Mux for RC*** |
always @ ( * ) |
begin |
case ( iOperation ) |
`RETURN: ResultC = iChannel_Ax; |
`ADD: ResultC = (wAddSubC_Result[63] == 1'b1) ? {1'b1,wAddSubC_Result[30:0]} : {1'b0,wAddSubC_Result[30:0]}; //wAddSubC_Result[31:0];// & 32'h7FFFFFFF; |
`SUB: ResultC = (wAddSubC_Result[63] == 1'b1) ? {1'b1,wAddSubC_Result[30:0]} : {1'b0,wAddSubC_Result[30:0]}; //wAddSubC_Result[31:0]; |
`CROSS: ResultC = (wAddSubC_Result[63] == 1'b1) ? {1'b1,wAddSubC_Result[30:0]} : {1'b0,wAddSubC_Result[30:0]};//wAddSubC_Result[31:0]; |
`DIV: ResultC = wDivisionC_Result; |
`MUL: ResultC = wMultiplicationC_Result[31:0]; |
`IMUL: ResultC = wMultiplicationC_Result[31:0]; |
`DOT: ResultC = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]};//wAddSubB_Result[31:0]; |
`MAG: ResultC = wSquareRoot_Result; |
`ZERO: ResultC = 32'b0; |
`COPY: ResultC = iChannel_Az; |
|
`SWIZZLE3D: ResultC = wSwizzleOutputZ; |
|
//Set Operations |
`UNSCALE: ResultC = iChannel_Az >> `SCALE; |
`SETX: ResultC = iChannel_Bz; // {Source1[95:64],Source0[63:32],Source0[31:0]}; |
`SETY: ResultC = iChannel_Bz; // {Source0[95:64],Source1[95:64],Source0[31:0]}; |
`SETZ: ResultC = iChannel_Ax; // {Source0[95:64],Source0[63:32],Source1[95:64]}; |
|
`INC,`INCX,`INCY,`INCZ: ResultC = (wAddSubC_Result[63] == 1'b1) ? {1'b1,wAddSubC_Result[30:0]} : {1'b0,wAddSubC_Result[30:0]}; //wAddSubC_Result[31:0];// & 32'h7FFFFFFF; |
`DEC: ResultC = (wAddSubC_Result[63] == 1'b1) ? {1'b1,wAddSubC_Result[30:0]} : {1'b0,wAddSubC_Result[30:0]}; //wAddSubC_Result[31:0];// & 32'h7FFFFFFF; |
`MOD: ResultC = wModulus2N_ResultC; |
`FRAC: ResultC = iChannel_Az & (`WIDTH'hFFFFFFFF >> (`WIDTH - `SCALE)); |
`MULP: ResultC = wMultiplicationA_Result[31:0]; |
`NEG: ResultC = ~iChannel_Az + 1'b1; |
`XCHANGEX: ResultC = iChannel_Az; |
default: |
begin |
`ifdef DEBUG |
//$display("%dns ALU: Error Unknown Operation: %d",$time,iOperation); |
//$stop(); |
`endif |
ResultC = 32'b0; |
end |
endcase |
end |
//------------------------------------------------------------------------ |
|
|
always @ ( * ) |
begin |
case (iOperation) |
`JMP: oBranchTaken = 1; |
`JGX: oBranchTaken = wArithmeticComparison_Result; |
`JGY: oBranchTaken = wArithmeticComparison_Result; |
`JGZ: oBranchTaken = wArithmeticComparison_Result; |
|
`JLX: oBranchTaken = wArithmeticComparison_Result; |
`JLY: oBranchTaken = wArithmeticComparison_Result; |
`JLZ: oBranchTaken = wArithmeticComparison_Result; |
|
`JEQX: oBranchTaken = wArithmeticComparison_Result; |
`JEQY: oBranchTaken = wArithmeticComparison_Result; |
`JEQZ: oBranchTaken = wArithmeticComparison_Result; |
|
`JNEX: oBranchTaken = wArithmeticComparison_Result; |
`JNEY: oBranchTaken = wArithmeticComparison_Result; |
`JNEZ: oBranchTaken = wArithmeticComparison_Result; |
|
`JGEX: oBranchTaken = wArithmeticComparison_Result; |
`JGEY: oBranchTaken = wArithmeticComparison_Result; |
`JGEZ: oBranchTaken = wArithmeticComparison_Result; |
|
`JLEX: oBranchTaken = wArithmeticComparison_Result; |
`JLEY: oBranchTaken = wArithmeticComparison_Result; |
`JLEZ: oBranchTaken = wArithmeticComparison_Result; |
|
default: oBranchTaken = 0; |
endcase |
|
end |
|
always @ ( * ) |
begin |
case (iOperation) |
|
`JMP,`JGX,`JGY,`JGZ,`JLX,`JLY,`JLZ,`JEQX,`JEQY,`JEQZ, |
`JNEX,`JNEY,`JNEZ,`JGEX,`JGEY,`JGEZ: oBranchNotTaken = !oBranchTaken && OutputReady; |
`JLEX: oBranchNotTaken = !oBranchTaken && OutputReady; |
`JLEY: oBranchNotTaken = !oBranchTaken && OutputReady; |
`JLEZ: oBranchNotTaken = !oBranchTaken && OutputReady; |
default: |
oBranchNotTaken = 0; |
endcase |
end |
//------------------------------------------------------------------------ |
//Output ready logic Stuff for Division... |
//Some FFT will hopefully do the trick |
|
wire wDivisionOutputReadyA,wDivisionOutputReadyB,wDivisionOutputReadyC; |
wire wDivisionOutputReady; |
|
|
assign wAddSubAOutputReady = wAddSubA_OutputReady; |
assign wAddSubBOutputReady = wAddSubB_OutputReady; |
assign wAddSubCOutputReady = wAddSubC_OutputReady; |
|
|
FFT1 FFT_DivisionA |
( |
.D(1'b1), |
.Clock( wDivisionA_OutputReady ), |
.Reset( iInputReady ), |
.Q( wDivisionOutputReadyA ) |
); |
|
FFT1 FFT_DivisionB |
( |
.D(1'b1), |
.Clock( wDivisionB_OutputReady ), |
.Reset( iInputReady ), |
.Q( wDivisionOutputReadyB ) |
); |
|
FFT1 FFT_DivisionC |
( |
.D(1'b1), |
.Clock( wDivisionC_OutputReady ), |
.Reset( iInputReady ), |
.Q( wDivisionOutputReadyC ) |
); |
|
assign wDivisionOutputReady = |
( wDivisionOutputReadyA && wDivisionOutputReadyB && wDivisionOutputReadyC ); |
|
assign wMultiplicationOutputReadyA = wMultiplicationA_OutputReady; |
assign wMultiplicationOutputReadyB = wMultiplicationB_OutputReady; |
assign wMultiplicationOutputReadyC = wMultiplicationC_OutputReady; |
assign wMultiplicationOutputReadyD = wMultiplicationD_OutputReady; |
|
assign wMultiplicationOutputReady = |
( wMultiplicationOutputReadyA && wMultiplicationOutputReadyB && wMultiplicationOutputReadyC ); |
|
wire wSquareRootOutputReady; |
FFT1 FFT_Sqrt |
( |
.D(1'b1), |
.Clock( wSquareRoot_OutputReady ), |
.Reset( iInputReady ), |
.Q( wSquareRootOutputReady ) |
); |
|
|
//------------------------------------------------------------------------ |
wire wOutputDelay1Cycle; |
|
|
FFD_POSEDGE_ASYNC_RESET # (1) FFOutputReadyDelay2 |
( |
.Clock( Clock ), |
.Clear( Reset ), |
.D( iInputReady ), |
.Q( wOutputDelay1Cycle ) |
); |
|
wire [`INSTRUCTION_OP_LENGTH-1:0] wOperation; |
|
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( `INSTRUCTION_OP_LENGTH ) SourceZ2 |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable( iInputReady ), |
.D( iOperation ), |
.Q(wOperation) |
); |
|
|
//Mux for output ready signal |
always @ ( * ) |
begin |
case ( wOperation ) |
`UNSCALE: OutputReady = wOutputDelay1Cycle; |
`RETURN: OutputReady = wOutputDelay1Cycle; |
|
`NOP: OutputReady = wOutputDelay1Cycle; |
`FRAC: OutputReady = wOutputDelay1Cycle; |
`NEG: OutputReady = wOutputDelay1Cycle; |
|
`ifdef DEBUG |
//Debug Print behaves as a NOP in terms of ALU... |
`DEBUG_PRINT: OutputReady = wOutputDelay1Cycle; |
`endif |
|
`ADD,`INC,`INCX,`INCY,`INCZ: OutputReady = wAddSubAOutputReady && |
wAddSubBOutputReady && |
wAddSubCOutputReady; |
|
`SUB,`DEC: OutputReady = wAddSubAOutputReady && |
wAddSubBOutputReady && |
wAddSubCOutputReady; |
|
`DIV: OutputReady = wDivisionOutputReady; |
|
|
`MUL,`IMUL: OutputReady = wMultiplicationOutputReady; |
`MULP: OutputReady = wMultiplicationOutputReadyA; |
|
`DOT: OutputReady = wAddSubBOutputReady; |
|
`CROSS: OutputReady = wAddSubAOutputReady && |
wAddSubBOutputReady && |
wAddSubCOutputReady; |
|
`MAG: OutputReady = wSquareRootOutputReady; |
|
`ZERO: OutputReady = wOutputDelay1Cycle; |
|
`COPY: OutputReady = wOutputDelay1Cycle; |
|
`SWIZZLE3D: OutputReady = wOutputDelay1Cycle; |
|
`SETX,`SETY,`SETZ,`JMP: OutputReady = wOutputDelay1Cycle; |
|
|
`JGX,`JGY,`JGZ: OutputReady = ArithmeticComparison_OutputReady; |
`JLX,`JLY,`JLZ: OutputReady = ArithmeticComparison_OutputReady; |
`JEQX,`JEQY,`JEQZ: OutputReady = ArithmeticComparison_OutputReady; |
`JNEX,`JNEY,`JNEZ: OutputReady = ArithmeticComparison_OutputReady; |
`JGEX,`JGEY,`JGEZ: OutputReady = ArithmeticComparison_OutputReady; |
`JLEX,`JLEY,`JLEZ: OutputReady = ArithmeticComparison_OutputReady; |
|
`MOD: OutputReady = wAddSubAOutputReady && //TODO: wait 1 more cycle |
wAddSubBOutputReady && |
wAddSubCOutputReady; |
|
`XCHANGEX: OutputReady = wOutputDelay1Cycle; |
|
|
default: |
begin |
OutputReady = 32'b0; |
$display("*** ALU ERROR: iOperation = %d ***",iOperation); |
end |
|
endcase |
end |
|
endmodule |
//------------------------------------------------------------------------ |
/Module_InstructionDecode.v
0,0 → 1,136
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
|
module InstructionDecode |
( |
input wire Clock, |
input wire Reset, |
input wire iInstructionAvailable, |
input wire[`INSTRUCTION_WIDTH-1:0] iEncodedInstruction, |
input wire[`DATA_ROW_WIDTH-1:0] iRamValue0, |
input wire[`DATA_ROW_WIDTH-1:0] iRamValue1, |
output wire[`DATA_ADDRESS_WIDTH-1:0] oRamAddress0,oRamAddress1, |
output wire[`INSTRUCTION_OP_LENGTH-1:0] oOperation, |
output wire [`DATA_ROW_WIDTH-1:0] oSource0,oSource1, |
output wire [`DATA_ADDRESS_WIDTH-1:0] oDestination, |
input wire [`DATA_ROW_WIDTH-1:0] iDataForward, |
input wire [`DATA_ADDRESS_WIDTH-1:0] iLastDestination, |
|
`ifdef DEBUG |
input wire [`ROM_ADDRESS_WIDTH-1:0] iDebug_CurrentIP, |
output wire [`ROM_ADDRESS_WIDTH-1:0] oDebug_CurrentIP, |
`endif |
output wire oDataReadyForExe |
|
); |
wire wInmediateOperand; |
wire [`DATA_ROW_WIDTH-1:0] wSource0,wSource1; |
wire wTriggerSource0DataForward,wTriggerSource1DataForward; |
wire wSource0AddrssEqualsLastDestination,wSource1AddrssEqualsLastDestination; |
|
`ifdef DEBUG |
assign oDebug_CurrentIP = iDebug_CurrentIP; |
`endif |
//See if operation takes scalar argument |
assign wInmediateOperand = iEncodedInstruction[`INSTRUCTION_IMM_BITPOS]; |
|
//Has the value of the first argument fetched from IMEM |
assign wSource0 = iRamValue0; |
//Has the value of the second argument fetched from IMEM, or the value of the |
//destinatin register in case of scalar operation |
assign wSource1 = ( wInmediateOperand ) ? {oRamAddress1,iEncodedInstruction[15:0] ,32'b0,32'b0} : iRamValue1; //{oRamAddress1,oRamAddress0,32'b0,32'b0} : iRamValue1; |
|
//Data forwarding logic |
assign wSource0AddrssEqualsLastDestination = (oRamAddress0 == iLastDestination) ? 1'b1: 1'b0; |
assign wSource1AddrssEqualsLastDestination = (oRamAddress1 == iLastDestination) ? 1'b1: 1'b0; |
assign wTriggerSource0DataForward = wSource0AddrssEqualsLastDestination; |
assign wTriggerSource1DataForward = wSource1AddrssEqualsLastDestination && !wInmediateOperand; |
|
//The data address to fetch from IMEM |
assign oRamAddress1 = iEncodedInstruction[31:16]; |
|
//If operation takes a scalar value, then ask IMEM |
//for the previous value of the destination ([47:32]) |
//and have this value ready at oRamAddress0 |
MUXFULLPARALELL_16bits_2SEL RAMAddr0MUX |
( |
.Sel( wInmediateOperand ), |
.I1( iEncodedInstruction[15:0] ), |
.I2( iEncodedInstruction[47:32] ), |
.O1( oRamAddress0 ) |
); |
|
|
//One clock cycle after the new instruction becomes |
//available to IDU, it should be decoded and ready |
//for execution |
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD1 |
( |
.Clock( Clock ), |
.Reset( Reset ), |
.Enable(1'b1), |
.D( iInstructionAvailable ), |
.Q( oDataReadyForExe ) |
); |
|
|
//Latch the Operation |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `INSTRUCTION_OP_LENGTH ) FFD3 |
( |
.Clock(Clock), |
.Reset(Reset), |
.Enable(iInstructionAvailable), |
.D(iEncodedInstruction[`INSTRUCTION_WIDTH-1:`INSTRUCTION_WIDTH-`INSTRUCTION_OP_LENGTH]), |
.Q(oOperation ) |
); |
//Latch the Destination |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `DATA_ADDRESS_WIDTH ) FFD2 |
( |
.Clock(Clock), |
.Reset(Reset), |
.Enable(iInstructionAvailable), |
.D(iEncodedInstruction[47:32]), |
.Q(oDestination ) |
); |
|
|
//Once we made a decicions if the Sources must be forwarded or not, a series of muxes |
//are used to routed the correct data into the decoded Source outputs |
|
MUXFULLPARALELL_96bits_2SEL Source0_Mux |
( |
.Sel( wTriggerSource0DataForward ), |
.I1( wSource0 ), |
.I2( iDataForward ), |
.O1( oSource0 ) |
); |
|
MUXFULLPARALELL_96bits_2SEL Source1_Mux |
( |
.Sel( wTriggerSource1DataForward ), |
.I1( wSource1 ), |
.I2( iDataForward ), |
.O1( oSource1 ) |
); |
|
endmodule |
|
/Module_InstructionEntryPoint.v
0,0 → 1,32
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
module InstructionEntryPoint |
( |
input wire Clock, |
input wire Reset, |
input wire iTrigger, |
input wire[`ROM_ADDRESS_WIDTH-1:0] iInitialCodeAddress, |
input wire [`INSTRUCTION_WIDTH-1:0] iIMemInput, |
|
output wire oEPU_Busy, |
output wire [`ROM_ADDRESS_WIDTH-1:0] oEntryPoint, |
output wire oTriggerIFU, |
output wire [`ROM_ADDRESS_WIDTH-1:0] oInstructionAddr |
); |
|
assign oInstructionAddr = (oTriggerIFU) ? oEntryPoint : iInitialCodeAddress; |
assign oEPU_Busy = iTrigger | oTriggerIFU; |
|
|
|
FFD_POSEDGE_ASYNC_RESET # ( 1 ) FFD1 |
( |
.Clock(Clock), |
.Clear( Reset ), |
.D(iTrigger), |
.Q(oTriggerIFU) |
); |
|
assign oEntryPoint = (oTriggerIFU) ? iIMemInput[`ROM_ADDRESS_WIDTH-1:0] : `ROM_ADDRESS_WIDTH'b0; |
|
endmodule |