OpenCores
URL https://opencores.org/ocsvn/theia_gpu/theia_gpu/trunk

Subversion Repositories theia_gpu

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /theia_gpu/tags/Beta_0.2/rtl/EXE
    from Rev 82 to Rev 86
    Reverse comparison

Rev 82 → Rev 86

/Module_ExecutionFSM.v
0,0 → 1,533
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
`define EXEU_AFTER_RESET 0
`define EXEU_INITIAL_STATE 1
`define EXEU_WAIT_FOR_DECODE 2
`define EXEU_FETCH_DECODED_INST 3
`define EXEU_WAIT_FOR_ALU_EXECUTION 4
`define EXEU_WRITE_BACK_TO_RAM 5
`define EXEU_HANDLE_JUMP 7
 
 
 
module ExecutionFSM
(
input wire Clock,
input wire Reset,
 
input wire iDecodeDone,
input wire[`INSTRUCTION_OP_LENGTH-1:0] iOperation,
input wire[`DATA_ROW_WIDTH-1:0] iSource0,iSource1,
input wire[`DATA_ADDRESS_WIDTH-1:0] iDestination,
inout wire[`DATA_ROW_WIDTH-1:0] RAMBus,
//output reg ReadyForNextInstruction,
output wire oJumpFlag ,
output wire [`ROM_ADDRESS_WIDTH-1:0] oJumpIp ,
output wire oRAMWriteEnable ,
output wire [`DATA_ADDRESS_WIDTH-1:0] oRAMWriteAddress ,
output wire oExeLatchedValues,
output reg oBusy ,
 
//ALU ports and control signals
output wire [`INSTRUCTION_OP_LENGTH-1:0] oALUOperation,
output wire [`WIDTH-1:0] oALUChannelX1,
output wire [`WIDTH-1:0] oALUChannelY1,
output wire [`WIDTH-1:0] oALUChannelZ1,
output wire [`WIDTH-1:0] oALUChannelX2,
output wire [`WIDTH-1:0] oALUChannelY2,
output wire [`WIDTH-1:0] oALUChannelZ2,
output wire oTriggerALU,
 
input wire [`WIDTH-1:0] iALUResultX,
input wire [`WIDTH-1:0] iALUResultY,
input wire [`WIDTH-1:0] iALUResultZ,
input wire iALUOutputReady,
input wire iBranchTaken,
input wire iBranchNotTaken,
 
`ifdef DEBUG
input wire[`ROM_ADDRESS_WIDTH-1:0] iDebug_CurrentIP,
input wire [`MAX_CORES-1:0] iDebug_CoreID,
`endif
//Data forward Signals
output wire [`DATA_ADDRESS_WIDTH-1:0] oLastDestination
 
 
);
 
wire wLatchNow;
reg rInputLatchesEnabled;
 
//If ALU says jump, just pass along
assign oJumpFlag = iBranchTaken;
//JumpIP is the instruction destination (= oRAMWriteAddress)
assign oJumpIp = oRAMWriteAddress;
 
assign wLatchNow = iDecodeDone & rInputLatchesEnabled;
assign oExeLatchedValues = wLatchNow;
assign oTriggerALU = wLatchNow;
 
wire wOperationIsJump;
assign wOperationIsJump = iBranchTaken || iBranchNotTaken;
 
//Don't allow me to write back back if the operation is a NOP
`ifdef DEBUG
assign oRAMWriteEnable = iALUOutputReady && !wOperationIsJump &&
(oALUOperation != `NOP) && oALUOperation != `DEBUG_PRINT;
`else
assign oRAMWriteEnable = iALUOutputReady && !wOperationIsJump && oALUOperation != `NOP;
`endif
 
 
assign RAMBus = ( oRAMWriteEnable ) ? {iALUResultX,iALUResultY,iALUResultZ} : `DATA_ROW_WIDTH'bz;
 
assign oALUChannelX1 = iSource1[95:64];
assign oALUChannelY1 = iSource1[63:32];
assign oALUChannelZ1 = iSource1[31:0];
 
assign oALUChannelX2 = iSource0[95:64];
assign oALUChannelY2 = iSource0[63:32];
assign oALUChannelZ2 = iSource0[31:0];
/*
FF32_POSEDGE_SYNCRONOUS_RESET SourceX1
(
.Clock( wLatchNow ),
.Clear( Reset ),
.D( iSource1[95:64] ),
.Q( oALUChannelX1 )
);
 
FF32_POSEDGE_SYNCRONOUS_RESET SourceY1
(
.Clock( wLatchNow ),
.Clear( Reset ),
.D( iSource1[63:32] ),
.Q( oALUChannelY1 )
);
 
FF32_POSEDGE_SYNCRONOUS_RESET SourceZ1
(
.Clock( wLatchNow ),
.Clear( Reset ),
.D( iSource1[31:0] ),
.Q( oALUChannelZ1 )
);
*/
/*
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceX1
(
.Clock( Clock ),//wLatchNow ),
.Reset( Reset),
.Enable( wLatchNow ),//1'b1 ),
.D( iSource1[95:64] ),
.Q(oALUChannelX1)
);
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceY1
(
.Clock( Clock ),//wLatchNow ),
.Reset( Reset),
.Enable( wLatchNow ),//1'b1 ),
.D( iSource1[63:32] ),
.Q(oALUChannelY1)
);
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceZ1
(
.Clock( Clock ),//wLatchNow ),
.Reset( Reset),
.Enable( wLatchNow ),//1'b1 ),
.D( iSource1[31:0] ),
.Q(oALUChannelZ1)
);
*/
/*
FF32_POSEDGE_SYNCRONOUS_RESET SourceX2
(
.Clock( wLatchNow ),
.Clear( Reset ),
.D( iSource0[95:64] ),
.Q( oALUChannelX2 )
);
 
FF32_POSEDGE_SYNCRONOUS_RESET SourceY2
(
.Clock( wLatchNow ),
.Clear( Reset ),
.D( iSource0[63:32] ),
.Q( oALUChannelY2 )
);
 
FF32_POSEDGE_SYNCRONOUS_RESET SourceZ2
(
.Clock( wLatchNow ),
.Clear( Reset ),
.D( iSource0[31:0] ),
.Q( oALUChannelZ2 )
);
*/
/*
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceX2
(
.Clock( Clock ),//wLatchNow ),
.Reset( Reset),
.Enable( wLatchNow ),//1'b1 ),
.D( iSource0[95:64] ),
.Q(oALUChannelX2)
);
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceY2
(
.Clock( Clock ),//wLatchNow ),
.Reset( Reset),
.Enable( wLatchNow ),//1'b1 ),
.D( iSource0[63:32] ),
.Q(oALUChannelY2)
);
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceZ2
(
.Clock( Clock ),//wLatchNow ),
.Reset( Reset),
.Enable( wLatchNow ),//1'b1 ),
.D( iSource0[31:0] ),
.Q(oALUChannelZ2)
);
*/
//Finally one more latch to store
//the iOperation and the destination
 
 
assign oALUOperation = iOperation;
//assign oRAMWriteAddress = iDestination;
/*
FF_OPCODE_POSEDGE_SYNCRONOUS_RESET FFOperation
(
.Clock( wLatchNow ),
.Clear( Reset ),
.D( iOperation ),
.Q( oALUOperation )
);
 
 
FF16_POSEDGE_SYNCRONOUS_RESET PSRegDestination
(
.Clock( wLatchNow ),
.Clear( Reset ),
.D( iDestination ),
.Q( oRAMWriteAddress )
);
*/
/*
FFD_POSEDGE_SYNCRONOUS_RESET # ( `INSTRUCTION_OP_LENGTH ) FFOperation
(
.Clock( Clock ),//wLatchNow ),
.Reset( Reset),
.Enable( wLatchNow ),//1'b1 ),
.D( iOperation ),
.Q(oALUOperation)
);
*/
FFD_POSEDGE_SYNCRONOUS_RESET # ( `DATA_ADDRESS_WIDTH ) PSRegDestination
(
.Clock( Clock ),//wLatchNow ),
.Reset( Reset),
.Enable( wLatchNow ),//1'b1 ),
.D( iDestination ),
.Q(oRAMWriteAddress)
);
 
//Data forwarding
assign oLastDestination = oRAMWriteAddress;
 
reg [7:0] CurrentState;
reg [7:0] NextState;
 
 
//------------------------------------------------
always @(posedge Clock or posedge Reset)
begin
if (Reset)
CurrentState <= `EXEU_AFTER_RESET;
else
CurrentState <= NextState;
end
//------------------------------------------------
 
 
always @( * )
begin
case (CurrentState)
//------------------------------------------
`EXEU_AFTER_RESET:
begin
//ReadyForNextInstruction <= 1;
oBusy <= 0;
rInputLatchesEnabled <= 1;
NextState <= `EXEU_WAIT_FOR_DECODE;
end
//------------------------------------------
/**
At the same time iDecodeDone goes to 1, our Flops
will store the value, so next clock cycle we can
tell IDU to go ahead and decode the next instruction
in the pipeline.
*/
`EXEU_WAIT_FOR_DECODE:
begin
 
//ReadyForNextInstruction <= 1;
oBusy <= 0;
rInputLatchesEnabled <= 1;
if ( iDecodeDone ) //This same thing triggers the ALU
NextState <= `EXEU_WAIT_FOR_ALU_EXECUTION;
else
NextState <= `EXEU_WAIT_FOR_DECODE;
end
//------------------------------------------
/*
If the instruction is aritmetic then pass the parameters
the ALU, else if it store iOperation then...
*/
`EXEU_WAIT_FOR_ALU_EXECUTION:
begin
 
//ReadyForNextInstruction <= 0; //*
oBusy <= 1;
rInputLatchesEnabled <= 0; //NO INTERRUPTIONS WHILE WE WAIT!!
 
if ( iALUOutputReady ) /////This same thing enables writing th results to RAM
NextState <= `EXEU_WAIT_FOR_DECODE;
else
NextState <= `EXEU_WAIT_FOR_ALU_EXECUTION;
end
//------------------------------------------
`EXEU_WRITE_BACK_TO_RAM:
begin
//ReadyForNextInstruction <= 0;
oBusy <= 1;
rInputLatchesEnabled <= 1;
if ( iDecodeDone )
NextState <= `EXEU_WAIT_FOR_ALU_EXECUTION;
else
NextState <= `EXEU_WAIT_FOR_DECODE;
end
//------------------------------------------
default:
begin
//ReadyForNextInstruction <= 1;
oBusy <= 0;
rInputLatchesEnabled <= 1;
 
NextState <= `EXEU_AFTER_RESET;
end
//------------------------------------------
endcase
end
 
//-----------------------------------------------------------------------
`ifdef DUMP_CODE
integer ucode_file;
integer reg_log;
initial
begin
 
$display("Opening ucode dump file....\n");
ucode_file = $fopen("Code.log","w");
$fwrite(ucode_file,"\n\n************ Theia UCODE DUMP *******\n\n\n\n");
$display("Opening Register lof file...\n");
reg_log = $fopen("Registers.log","w");
end
 
`endif //Ucode dump
 
//-----------------------------------------------------------------------
`ifdef DEBUG
wire [`WIDTH-1:0] wALUChannelX1,wALUChannelY1,wALUChannelZ1;
wire [`WIDTH-1:0] wALUChannelX2,wALUChannelY2,wALUChannelZ2;
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceX1
(
.Clock( Clock ),
.Reset( Reset),
.Enable( wLatchNow ),
.D( iSource1[95:64] ),
.Q(wALUChannelX1)
);
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceY1
(
.Clock( Clock ),
.Reset( Reset),
.Enable( wLatchNow ),
.D( iSource1[63:32] ),
.Q(wALUChannelY1)
);
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceZ1
(
.Clock( Clock ),
.Reset( Reset),
.Enable( wLatchNow ),
.D( iSource1[31:0] ),
.Q(wALUChannelZ1)
);
 
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceX2
(
.Clock( Clock ),
.Reset( Reset),
.Enable( wLatchNow ),
.D( iSource0[95:64] ),
.Q(wALUChannelX2)
);
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceY2
(
.Clock( Clock ),
.Reset( Reset),
.Enable( wLatchNow ),
.D( iSource0[63:32] ),
.Q(wALUChannelY2)
);
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) SourceZ2
(
.Clock( Clock ),
.Reset( Reset),
.Enable( wLatchNow ),
.D( iSource0[31:0] ),
.Q(wALUChannelZ2)
);
 
 
always @ (posedge iDecodeDone && iDebug_CoreID == `DEBUG_CORE)
begin
`LOGME"[CORE %d] IP:%d", iDebug_CoreID,iDebug_CurrentIP);
end
 
always @ (negedge Clock && iDebug_CoreID == `DEBUG_CORE)
begin
if ( iALUOutputReady )
begin
if (iBranchTaken)
`LOGME"<BT>");
if (iBranchNotTaken )
`LOGME"<BNT>");
if (oRAMWriteEnable)
`LOGME"<WE>");
`LOGME "(%dns ",$time);
case ( oALUOperation )
`RETURN: `LOGME"RETURN");
`ADD: `LOGME"ADD");
`SUB: `LOGME"SUB");
`DIV: `LOGME"DIV");
`MUL: `LOGME"MUL");
`MAG: `LOGME"MAG");
`JGX: `LOGME"JGX");
`JLX: `LOGME"JLX");
`JGEX: `LOGME"JGEX");
`JGEY: `LOGME"JGEY");
`JGEZ: `LOGME"JGEZ");
`JLEX: `LOGME"JLEX");
`JLEY: `LOGME"JLEY");
`JLEZ: `LOGME"JLEZ");
`JMP: `LOGME"JMP");
`ZERO: `LOGME"ZERO");
`JNEX: `LOGME"JNEX");
`JNEY: `LOGME"JNEY");
`JNEZ: `LOGME"JNEZ");
`JEQX: `LOGME"JEQX");
`JEQY: `LOGME"JEQY");
`JEQZ: `LOGME"JEQZ");
`CROSS: `LOGME"CROSS");
`DOT: `LOGME"DOT");
`SETX: `LOGME"SETX");
`SETY: `LOGME"SETY");
`SETZ: `LOGME"SETZ");
`NOP: `LOGME"NOP");
`COPY: `LOGME"COPY");
`INC: `LOGME"INC");
`DEC: `LOGME"DEC");
`MOD: `LOGME"MOD");
`FRAC: `LOGME"FRAC");
`NEG: `LOGME"NEG");
`SWIZZLE3D: `LOGME"SWIZZLE3D");
`MULP: `LOGME"MULP");
`XCHANGEX: `LOGME"XCHANGEX");
`IMUL: `LOGME"IMUL");
`UNSCALE: `LOGME"UNSCALE");
`INCX: `LOGME"INCX");
`INCY: `LOGME"INCY");
`INCZ: `LOGME"INCZ");
`DEBUG_PRINT:
begin
`LOGME"DEBUG_PRINT");
end
default:
begin
`LOGME"**********ERROR UNKNOWN OP*********");
$display("%dns EXE: Error Unknown Instruction : %d", $time,oALUOperation);
// $stop();
end
endcase
`LOGME"\t %h [ %h %h %h ][ %h %h %h ] = ",
oRAMWriteAddress,
wALUChannelX1,wALUChannelY1,wALUChannelZ1,
wALUChannelX2,wALUChannelY2,wALUChannelZ2
);
if (oALUOperation == `RETURN)
`LOGME"\n\n\n");
end
end //always
always @ ( negedge Clock && iDebug_CoreID == `DEBUG_CORE )
begin
if ( iALUOutputReady )
`LOGME" [ %h %h %h ])\n",iALUResultX,iALUResultY,iALUResultZ);
end //always
`endif
 
endmodule
/Unit_EXE.v
0,0 → 1,247
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
//---------------------------------------------------------------------
module ExecutionUnit
(
 
input wire Clock,
input wire Reset,
input wire [`ROM_ADDRESS_WIDTH-1:0] iInitialCodeAddress,
input wire [`INSTRUCTION_WIDTH-1:0] iInstruction1,
input wire [`INSTRUCTION_WIDTH-1:0] iInstruction2,
 
 
input wire [`DATA_ROW_WIDTH-1:0] iDataRead0,
input wire [`DATA_ROW_WIDTH-1:0] iDataRead1,
input wire iTrigger,
 
 
output wire [`ROM_ADDRESS_WIDTH-1:0] oInstructionPointer1,
output wire [`ROM_ADDRESS_WIDTH-1:0] oInstructionPointer2,
output wire [`DATA_ADDRESS_WIDTH-1:0] oDataReadAddress0,
output wire [`DATA_ADDRESS_WIDTH-1:0] oDataReadAddress1,
output wire oDataWriteEnable,
output wire [`DATA_ADDRESS_WIDTH-1:0] oDataWriteAddress,
output wire [`DATA_ROW_WIDTH-1:0] oDataBus,
output wire oReturnCode,
 
`ifdef DEBUG
input wire [`MAX_CORES-1:0] iDebug_CoreID,
`endif
output wire oDone
 
 
 
 
);
 
 
`ifdef DEBUG
wire [`ROM_ADDRESS_WIDTH-1:0] wDEBUG_IDU2_EXE_InstructionPointer;
`endif
 
wire wEXE2__uCodeDone;
wire wEXE2_IFU__EXEBusy;
wire [`DATA_ADDRESS_WIDTH-1:0] wEXE2_IDU_DataFordward_LastDestination;
wire wALU2_EXE__BranchTaken;
wire wALU2_IFU_BranchNotTaken;
wire [`INSTRUCTION_WIDTH-1:0] CurrentInstruction;
//wire wIDU2_IFU__IDUBusy;
 
 
wire [`INSTRUCTION_OP_LENGTH-1:0] wOperation;
 
 
wire [`DATA_ROW_WIDTH-1:0] wSource0,wSource1;
wire [`DATA_ADDRESS_WIDTH-1:0] wDestination;
wire wInstructionAvailable;
 
//ALU wires
wire [`INSTRUCTION_OP_LENGTH-1:0] ALU2Operation;
wire [`WIDTH-1:0] ALU2ChannelA;
wire [`WIDTH-1:0] ALU2ChannelB;
wire [`WIDTH-1:0] ALU2ChannelC;
wire [`WIDTH-1:0] ALU2ChannelD;
wire [`WIDTH-1:0] ALU2ChannelE;
wire [`WIDTH-1:0] ALU2ChannelF;
wire [`WIDTH-1:0] ALU2ResultA;
wire [`WIDTH-1:0] ALU2ResultB;
wire [`WIDTH-1:0] ALU2ResultC;
wire wEXE2_ALU__TriggerALU;
wire ALU2OutputReady;
wire w2FIU__BranchTaken;
wire [`ROM_ADDRESS_WIDTH-1:0] JumpIp;
 
 
//wire wIDU2_IFU__InputsLatched;
 
wire wEPU_Busy,wTriggerIFU;
wire [`ROM_ADDRESS_WIDTH-1:0] wEPU_IP,wIFU_IP,wCodeEntryPoint;
 
assign oInstructionPointer1 = (wEPU_Busy) ? wEPU_IP : wIFU_IP;
 
 
InstructionEntryPoint EPU
(
.Clock( Clock ),
.Reset( Reset ),
.iTrigger( iTrigger ),
.iInitialCodeAddress( iInitialCodeAddress ),
.iIMemInput(iInstruction1),
 
.oEPU_Busy(wEPU_Busy),
.oEntryPoint( wCodeEntryPoint ),
.oTriggerIFU( wTriggerIFU ),
.oInstructionAddr( wEPU_IP )
 
);
 
InstructionFetch IFU
(
.Clock( Clock ),
.Reset( Reset ),
.iTrigger( wTriggerIFU ),
.iInstruction1( iInstruction1 ),
.iInstruction2( iInstruction2 ),
.iInitialCodeAddress( wCodeEntryPoint ),
.iBranchTaken( w2FIU__BranchTaken ),
.oCurrentInstruction( CurrentInstruction ),
.oInstructionAvalable( wInstructionAvailable ),
.oIP( wIFU_IP ),
.oIP2( oInstructionPointer2 ),
.iEXEDone( ALU2OutputReady ),
.oMicroCodeReturnValue( oReturnCode ),
.oExecutionDone( oDone )
);
 
////---------------------------------------------------------
wire wIDU2_EXE_DataReady;
wire wEXE2_IDU_ExeLatchedValues;
 
InstructionDecode IDU
(
.Clock( Clock ),
.Reset( Reset ),
.iEncodedInstruction( CurrentInstruction ),
.iInstructionAvailable( wInstructionAvailable ),
.oRamAddress0( oDataReadAddress0 ),
.oRamAddress1( oDataReadAddress1 ),
.iRamValue0( iDataRead0 ),
.iRamValue1( iDataRead1 ),
.iLastDestination( wEXE2_IDU_DataFordward_LastDestination ),
.iDataForward( {ALU2ResultA,ALU2ResultB,ALU2ResultC} ),
//Outputs going to the ALU-FSM
.oOperation( wOperation ),
.oDestination( wDestination ),
.oSource0( wSource0 ),
.oSource1( wSource1 ),
`ifdef DEBUG
.iDebug_CurrentIP( oInstructionPointer1 ),
.oDebug_CurrentIP( wDEBUG_IDU2_EXE_InstructionPointer ),
`endif
.oDataReadyForExe( wIDU2_EXE_DataReady )
);
 
 
ExecutionFSM EXE
(
.Clock( Clock ),
.Reset( Reset | iTrigger ), //New Sat Jun13
.iDecodeDone( wIDU2_EXE_DataReady ),
.iOperation( wOperation ),
.iDestination( wDestination ),
.iSource0( wSource0 ),
.iSource1( wSource1 ) ,
`ifdef DEBUG
.iDebug_CurrentIP( wDEBUG_IDU2_EXE_InstructionPointer ),
.iDebug_CoreID( iDebug_CoreID ),
`endif
//.iJumpResultFromALU( wALU2_EXE__BranchTaken ),
.iBranchTaken( wALU2_EXE__BranchTaken ),
.iBranchNotTaken( wALU2_IFU_BranchNotTaken ),
.oJumpFlag( w2FIU__BranchTaken ),
.oJumpIp( JumpIp ),
.oRAMWriteEnable( oDataWriteEnable ),
.oRAMWriteAddress( oDataWriteAddress ),
.RAMBus( oDataBus ),
.oBusy( wEXE2_IFU__EXEBusy ),
 
.oExeLatchedValues( wEXE2_IDU_ExeLatchedValues ),
.oLastDestination( wEXE2_IDU_DataFordward_LastDestination ),
 
//ALU ports and control signals
.oTriggerALU( wEXE2_ALU__TriggerALU ),
.oALUOperation( ALU2Operation ),
.oALUChannelX1( ALU2ChannelA ),
.oALUChannelX2( ALU2ChannelB ),
.oALUChannelY1( ALU2ChannelC ),
.oALUChannelY2( ALU2ChannelD ),
.oALUChannelZ1( ALU2ChannelE ),
.oALUChannelZ2( ALU2ChannelF ),
.iALUResultX( ALU2ResultA ),
.iALUResultY( ALU2ResultB ),
.iALUResultZ( ALU2ResultC ),
.iALUOutputReady( ALU2OutputReady )
 
);
 
 
//--------------------------------------------------------
 
VectorALU ALU
(
.Clock(Clock),
.Reset(Reset),
.iOperation( ALU2Operation ),
.iChannel_Ax( ALU2ChannelA ),
.iChannel_Bx( ALU2ChannelB ),
.iChannel_Ay( ALU2ChannelC ),
.iChannel_By( ALU2ChannelD ),
.iChannel_Az( ALU2ChannelE ),
.iChannel_Bz( ALU2ChannelF ),
.oResultA( ALU2ResultA ),
.oResultB( ALU2ResultB ),
.oResultC( ALU2ResultC ),
.oBranchTaken( wALU2_EXE__BranchTaken ),
.oBranchNotTaken( wALU2_IFU_BranchNotTaken ),
.iInputReady( wEXE2_ALU__TriggerALU ),
.OutputReady( ALU2OutputReady )
);
 
 
endmodule
//---------------------------------------------------------------------
/Module_InstructionFetch.v
0,0 → 1,170
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
/**********************************************************************************
Description:
This is the instruction fetch unit.
It gets the next instruction from the IMEM module at the MEM unit.
It increments the instruction pointer (IP) in such a way that EXE has always
one instruction per clock cycle (best pipeline performance). In order to achieve this,
IFU has 2 instruction pointers, so that in case of 'branch' instructions,
two instructions pointer are generated and two different instructions are simultaneously
fetched from IMEM: the branch-taken and branch-not-taken instructions, so that once the
branch outcome is calculted in EXE, both possible outcomes are already pre-fetched.
**********************************************************************************/
module InstructionFetch
(
input wire Clock,
input wire Reset,
input wire iTrigger,
input wire[`ROM_ADDRESS_WIDTH-1:0] iInitialCodeAddress,
input wire[`INSTRUCTION_WIDTH-1:0] iInstruction1, //Branch not taken instruction
input wire[`INSTRUCTION_WIDTH-1:0] iInstruction2, //Branch taken instruction
input wire iBranchTaken,
output wire oInstructionAvalable,
output wire [`ROM_ADDRESS_WIDTH-1:0] oIP,
output wire [`ROM_ADDRESS_WIDTH-1:0] oIP2, //calcule both decide later
output wire[`INSTRUCTION_WIDTH-1:0] oCurrentInstruction,
input wire iEXEDone,
output wire oMicroCodeReturnValue,
output wire oExecutionDone
);
`define INSTRUCTION_OPCODE oCurrentInstruction[`INSTRUCTION_WIDTH-1:`INSTRUCTION_WIDTH-`INSTRUCTION_OP_LENGTH]
//iInstruction1[`INSTRUCTION_WIDTH-1:`INSTRUCTION_WIDTH-`INSTRUCTION_OP_LENGTH]
 
assign oMicroCodeReturnValue = oCurrentInstruction[0];
assign oIP2 = oCurrentInstruction[47:32];//iInstruction1[47:32];
 
wire wTriggerDelay1,wTriggerDelay2,wIncrementIP_Delay1,wIncrementIP_Delay2,
wLastInst_Delay1,wLastInst_Delay2;
wire wIncrementIP,wLastInstruction;
 
 
assign wLastInstruction = (`INSTRUCTION_OPCODE == `RETURN);
 
//Increment IP 2 cycles after trigger or everytime EXE is done, but stop if we get to the RETURN
assign wIncrementIP = wTriggerDelay2 | (iEXEDone & ~wLastInstruction);
//It takes 1 clock cycle to read the instruction back from IMEM
assign oInstructionAvalable = wTriggerDelay2 | (iEXEDone & ~wLastInst_Delay2);
//Once we reach the last instruction, wait until EXE says he is done, then assert oExecutionDone
assign oExecutionDone = (wLastInstruction & iEXEDone);
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD2
(
.Clock( Clock ),
.Reset( Reset ),
.Enable(1'b1),
.D( iTrigger ),
.Q( wTriggerDelay1 )
);
 
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD3
(
.Clock( Clock ),
.Reset( Reset ),
.Enable(1'b1),
.D( wTriggerDelay1 ),
.Q( wTriggerDelay2 )
);
 
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD4
(
.Clock( Clock ),
.Reset( Reset ),
.Enable(wLastInstruction),
.D( oInstructionAvalable ),
.Q( wLastInst_Delay1 )
);
 
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD5
(
.Clock( Clock ),
.Reset( Reset ),
.Enable(1'b1),//wLastInstruction),
.D( wLastInst_Delay1 ),
.Q( wLastInst_Delay2 )
);
 
wire [`ROM_ADDRESS_WIDTH-1:0] oIP2_Next;
 
/*
In case the branch is taken:
We point current instruction into the iInstruction2 (branch-taken) instruction
that corresponds to oIP2.
Then, in the next clock cycle we should use the oIP2 incremented by one,
so we need to load UPCOUNTER_POSEDGE with oIP2+1
*/
 
 
//If the branch was taken, then use the pre-fetched instruction (iInstruction2)
wire[`INSTRUCTION_WIDTH-1:0] wCurrentInstruction_Delay1,wCurrentInstruction_BranchTaken;
FFD_POSEDGE_SYNCRONOUS_RESET # ( `INSTRUCTION_WIDTH ) FFDX
(
.Clock( Clock ),
.Reset( Reset ),
.Enable(iBranchTaken),
.D( oCurrentInstruction ),
.Q( wCurrentInstruction_Delay1 )
);
 
wire wBranchTaken_Delay1;
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFDY
(
.Clock( Clock ),
.Reset( Reset ),
.Enable(1'b1),
.D( iBranchTaken ),
.Q( wBranchTaken_Delay1 )
);
 
 
assign wCurrentInstruction_BranchTaken = (iBranchTaken ) ? iInstruction2 : iInstruction1;
 
assign oCurrentInstruction = (wBranchTaken_Delay1) ?
wCurrentInstruction_Delay1 : wCurrentInstruction_BranchTaken;
 
INCREMENT # (`ROM_ADDRESS_WIDTH) INC1
(
.Clock( Clock ),
.Reset( Reset ),
.A( oIP2 ),
.R( oIP2_Next )
);
 
wire[`ROM_ADDRESS_WIDTH-1:0] wIPEntryPoint;
assign wIPEntryPoint = (iBranchTaken) ? oIP2_Next : iInitialCodeAddress;
 
UPCOUNTER_POSEDGE # (`ROM_ADDRESS_WIDTH) InstructionPointer
(
.Clock( Clock ),
.Reset(iTrigger | iBranchTaken),
.Enable(wIncrementIP & ~iBranchTaken ),
.Initial( wIPEntryPoint ),
.Q(oIP)
);
 
 
endmodule
 
//-------------------------------------------------------------------------------
/Module_VectorALU.v
0,0 → 1,1183
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
 
 
//--------------------------------------------------------------
module VectorALU
(
input wire Clock,
input wire Reset,
input wire[`INSTRUCTION_OP_LENGTH-1:0] iOperation,
input wire[`WIDTH-1:0] iChannel_Ax,
input wire[`WIDTH-1:0] iChannel_Bx,
input wire[`WIDTH-1:0] iChannel_Ay,
input wire[`WIDTH-1:0] iChannel_By,
input wire[`WIDTH-1:0] iChannel_Az,
input wire[`WIDTH-1:0] iChannel_Bz,
output wire [`WIDTH-1:0] oResultA,
output wire [`WIDTH-1:0] oResultB,
output wire [`WIDTH-1:0] oResultC,
input wire iInputReady,
output reg oBranchTaken,
output reg oBranchNotTaken,
output reg OutputReady
);
 
wire wMultiplcationUnscaled;
assign wMultiplcationUnscaled = (iOperation == `IMUL) ? 1'b1 : 1'b0;
 
//--------------------------------------------------------------
 
reg [7:0] InputReadyA,InputReadyB,InputReadyC;
 
//------------------------------------------------------
/*
This is the block that takes care of all tha arithmetic
comparisons. Supported operations are <,>,<=,>=,==,!=
*/
//------------------------------------------------------
reg [`WIDTH-1:0] wMultiplicationA_Ax;
reg [`WIDTH-1:0] wMultiplicationA_Bx;
wire [`LONG_WIDTH-1:0] wMultiplicationA_Result;
wire wMultiplicationA_InputReady;
wire wMultiplicationA_OutputReady;
wire wMultiplicationOutputReady, wMultiplicationOutputReadyA,
wMultiplicationOutputReadyB,wMultiplicationOutputReadyC,wMultiplicationOutputReadyD;
 
wire wAddSubAOutputReady,wAddSubBOutputReady,wAddSubCOutputReady;
 
//--------------------------------------------------------------------
reg [`WIDTH-1:0] ResultA,ResultB,ResultC;
 
//Output Flip Flops,
//This flip flop will control the outputs so that the
//values of the outputs change ONLY when when there is
//a positive edge of OutputReady
 
FFD32_POSEDGE ResultAFFD
(
.Clock( OutputReady ),
.D( ResultA ),
.Q( oResultA )
);
 
FFD32_POSEDGE ResultBFFD
(
.Clock( OutputReady ),
.D( ResultB ),
.Q( oResultB )
);
 
FFD32_POSEDGE ResultCFFD
(
.Clock( OutputReady ),
.D( ResultC ),
.Q( oResultC )
);
//--------------------------------------------------------------------
wire [`WIDTH-1:0] wSwizzleOutputX,wSwizzleOutputY,wSwizzleOutputZ;
 
 
Swizzle3D Swizzle1
(
.Source0_X( iChannel_Bx ),
.Source0_Y( iChannel_By ),
.Source0_Z( iChannel_Bz ),
.iOperation( iChannel_Ax ),
.SwizzleX( wSwizzleOutputX ),
.SwizzleY( wSwizzleOutputY ),
.SwizzleZ( wSwizzleOutputZ )
);
//---------------------------------------------------------------------
wire [`LONG_WIDTH-1:0] wModulus2N_ResultA,wModulus2N_ResultB,wModulus2N_ResultC;
//wire wModulusOutputReadyA,wModulusOutputReadyB,wModulusOutputReadyC;
 
/*
Modulus2N MODA
(
.Clock( Clock ),
.Reset( Reset ),
.oQuotient( wModulus2N_ResultA ),
.iInputReady( iInputReady ),
.oOutputReady( wModulusOutputReadyA )
);
 
Modulus2N MODB
(
.Clock( Clock ),
.Reset( Reset ),
.oQuotient( wModulus2N_ResultB ),
.iInputReady( iInputReady ),
.oOutputReady( wModulusOutputReadyB )
);
 
Modulus2N MODC
(
.Clock( Clock ),
.Reset( Reset ),
.oQuotient( wModulus2N_ResultC ),
.iInputReady( iInputReady ),
.oOutputReady( wModulusOutputReadyC )
);
*/
//---------------------------------------------------------------------(
 
 
 
 
/*
This MUX will select the apropiated X,Y or Z depending on
wheter it is XYZ iOperation. This gets defined by the bits 3 and 4
of iOperation, and only applies for oBranchTaken and Store operations.
*/
 
wire wArithmeticComparison_Result;
wire ArithmeticComparison_InputReady;
wire ArithmeticComparison_OutputReady;
reg[`WIDTH-1:0] ArithmeticComparison_A,ArithmeticComparison_B;
 
 
always @ ( * )
begin
case ( {iOperation[4],iOperation[3]} )
2'b01: ArithmeticComparison_A = iChannel_Ax;
2'b10: ArithmeticComparison_A = iChannel_Ay;
2'b11: ArithmeticComparison_A = iChannel_Az;
default: ArithmeticComparison_A = 0; //Should never happen
endcase
end
//---------------------------------------------------------------------
always @ ( * )
begin
case ( {iOperation[4],iOperation[3]} )
2'b01: ArithmeticComparison_B = iChannel_Bx;
2'b10: ArithmeticComparison_B = iChannel_By;
2'b11: ArithmeticComparison_B = iChannel_Bz;
default: ArithmeticComparison_B = 0; //Should never happen
endcase
end
 
//---------------------------------------------------------------------
/*
The onbly instance of Aritmetic comparison in the ALU,
ArithmeticComparison operations matches the 3 LSB of
Global ALU iOperation for oBranchTaken Instruction family
*/
 
assign ArithmeticComparison_InputReady = iInputReady;
 
wire wArithmeticComparisonResult;
 
ArithmeticComparison ArithmeticComparison_1
(
.Clock( Clock ),
.X( ArithmeticComparison_A ),
.Y( ArithmeticComparison_B ),
.iOperation( iOperation[2:0] ),
.iInputReady( ArithmeticComparison_InputReady ),
.OutputReady( ArithmeticComparison_OutputReady ),
.Result( wArithmeticComparisonResult )
);
 
 
assign wArithmeticComparison_Result = wArithmeticComparisonResult && OutputReady;
//--------------------------------------------------------------------
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_A
(
 
.Clock( Clock ),
.Reset( Reset ),
.A( wMultiplicationA_Ax ),
.B( wMultiplicationA_Bx ),
.R( wMultiplicationA_Result ),
.iUnscaled( wMultiplcationUnscaled ),
.iInputReady( wMultiplicationA_InputReady ),
.OutputReady( wMultiplicationA_OutputReady )
);
 
//--------------------------------------------------------------------
always @ ( * )
begin
case (iOperation)
`CROSS: wMultiplicationA_Ax = iChannel_Ay; // Ay * Bz
`MAG: wMultiplicationA_Ax = iChannel_Ax;
`MULP: wMultiplicationA_Ax = iChannel_Ax; //Az = Ax * Ay
default: wMultiplicationA_Ax = iChannel_Ax; // Ax * Bx
endcase
end
//--------------------------------------------------------------------
 
//assign wMultiplicationA_Ax = iChannel_Ax;
 
assign wMultiplicationA_InputReady
= (iOperation == `CROSS ||
iOperation == `DOT ||
iOperation == `MUL ||
iOperation == `IMUL ||
iOperation == `MAG ||
iOperation == `MULP
) ? iInputReady : 0;
//--------------------------------------------------------------------
always @ ( * )
begin
case (iOperation)
`MUL,`IMUL: wMultiplicationA_Bx = iChannel_Bx; //Ax*Bx
`MAG: wMultiplicationA_Bx = iChannel_Ax; //Ax^2
`DOT: wMultiplicationA_Bx = iChannel_Bx; //Ax*Bx
`CROSS: wMultiplicationA_Bx = iChannel_Bz; // Ay * Bz
`MULP: wMultiplicationA_Bx = iChannel_Ay; //Az = Ax * Ay
default: wMultiplicationA_Bx = 32'b0;
endcase
end
//--------------------------------------------------------------------
 
//------------------------------------------------------
 
reg [`WIDTH-1:0] wMultiplicationB_Ay;
reg [`WIDTH-1:0] wMultiplicationB_By;
wire [`LONG_WIDTH-1:0] wMultiplicationB_Result;
wire wMultiplicationB_InputReady;
wire wMultiplicationB_OutputReady;
 
 
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_B
(
 
.Clock( Clock ),
.Reset( Reset ),
.A( wMultiplicationB_Ay ),
.B( wMultiplicationB_By ),
.R( wMultiplicationB_Result ),
.iUnscaled( wMultiplcationUnscaled ),
.iInputReady( wMultiplicationB_InputReady ),
.OutputReady( wMultiplicationB_OutputReady )
);
 
 
//----------------------------------------------------
 
always @ ( * )
begin
case (iOperation)
`CROSS: wMultiplicationB_Ay = iChannel_Az; // Az * By
`MAG: wMultiplicationB_Ay = iChannel_Ay;
default: wMultiplicationB_Ay = iChannel_Ay; // Ay * By
endcase
end
//----------------------------------------------------
assign wMultiplicationB_InputReady
= (iOperation == `CROSS ||
iOperation == `DOT ||
iOperation == `MUL ||
iOperation == `IMUL ||
iOperation == `MAG ) ? iInputReady : 0;
//----------------------------------------------------
always @ ( * )
begin
case (iOperation)
`MUL,`IMUL: wMultiplicationB_By = iChannel_By; //Ay*By
`MAG: wMultiplicationB_By = iChannel_Ay; //Ay^2
`DOT: wMultiplicationB_By = iChannel_By; //Ay*By
`CROSS: wMultiplicationB_By = iChannel_By; // Az * By
default: wMultiplicationB_By = 32'b0;
endcase
end
//----------------------------------------------------
//------------------------------------------------------
reg [`WIDTH-1:0] wMultiplicationC_Az;
reg [`WIDTH-1:0] wMultiplicationC_Bz;
wire [`LONG_WIDTH-1:0] wMultiplicationC_Result;
wire wMultiplicationC_InputReady;
wire wMultiplicationC_OutputReady;
 
 
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_C
(
 
.Clock( Clock ),
.Reset( Reset ),
.A( wMultiplicationC_Az ),
.B( wMultiplicationC_Bz ),
.R( wMultiplicationC_Result ),
.iUnscaled( wMultiplcationUnscaled ),
.iInputReady( wMultiplicationC_InputReady ),
.OutputReady( wMultiplicationC_OutputReady )
);
 
 
//----------------------------------------------------
always @ ( * )
begin
case (iOperation)
`CROSS: wMultiplicationC_Az = iChannel_Az; //Az*Bx
`MAG: wMultiplicationC_Az = iChannel_Az;
default: wMultiplicationC_Az = iChannel_Az; //Az*Bz
endcase
end
//----------------------------------------------------
 
assign wMultiplicationC_InputReady
= (
iOperation == `CROSS ||
iOperation == `DOT ||
iOperation == `MUL ||
iOperation == `IMUL ||
iOperation == `MAG
) ? iInputReady : 0;
//----------------------------------------------------
always @ ( * )
begin
case (iOperation)
`MUL,`IMUL: wMultiplicationC_Bz = iChannel_Bz; //Az*Bz
`MAG: wMultiplicationC_Bz = iChannel_Az; //Ay^2
`DOT: wMultiplicationC_Bz = iChannel_Bz; //Az*Bz
`CROSS: wMultiplicationC_Bz = iChannel_Bx; //Az*Bx
default: wMultiplicationC_Bz = 32'b0;
endcase
end
//----------------------------------------------------
 
reg [`WIDTH-1:0] wMultiplicationD_Aw;
reg [`WIDTH-1:0] wMultiplicationD_Bw;
wire [`LONG_WIDTH-1:0] wMultiplicationD_Result;
wire wMultiplicationD_InputReady;
wire wMultiplicationD_OutputReady;
 
 
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_D
(
 
.Clock( Clock ),
.Reset( Reset ),
.A( wMultiplicationD_Aw ),
.B( wMultiplicationD_Bw ),
.R( wMultiplicationD_Result ),
.iUnscaled( wMultiplcationUnscaled ),
.iInputReady( wMultiplicationD_InputReady ),
.OutputReady( wMultiplicationD_OutputReady )
);
 
assign wMultiplicationD_InputReady
= (iOperation == `CROSS ) ? iInputReady : 0;
 
 
//----------------------------------------------------
always @ ( * )
begin
case (iOperation)
`CROSS: wMultiplicationD_Aw = iChannel_Ax; //Ax*Bz
default: wMultiplicationD_Aw = 32'b0;
endcase
end
//----------------------------------------------------
always @ ( * )
begin
case (iOperation)
`CROSS: wMultiplicationD_Bw = iChannel_Bz; //Ax*Bz
default: wMultiplicationD_Bw = 32'b0;
endcase
end
//----------------------------------------------------
reg [`WIDTH-1:0] wMultiplicationE_Ak;
reg [`WIDTH-1:0] wMultiplicationE_Bk;
wire [`LONG_WIDTH-1:0] wMultiplicationE_Result;
wire wMultiplicationE_InputReady;
wire wMultiplicationE_OutputReady;
 
 
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_E
(
 
.Clock( Clock ),
.Reset( Reset ),
.A( wMultiplicationE_Ak ),
.B( wMultiplicationE_Bk ),
.R( wMultiplicationE_Result ),
.iUnscaled( wMultiplcationUnscaled ),
.iInputReady( wMultiplicationE_InputReady ),
.OutputReady( wMultiplicationE_OutputReady )
);
 
assign wMultiplicationE_InputReady
= (iOperation == `CROSS ) ? iInputReady : 0;
//----------------------------------------------------
always @ ( * )
begin
case (iOperation)
`CROSS: wMultiplicationE_Ak = iChannel_Ax; //Ax*By
default: wMultiplicationE_Ak = 32'b0;
endcase
end
//----------------------------------------------------
always @ ( * )
begin
case (iOperation)
`CROSS: wMultiplicationE_Bk = iChannel_By; //Ax*By
default: wMultiplicationE_Bk = 32'b0;
endcase
end
//----------------------------------------------------
reg [`WIDTH-1:0] wMultiplicationF_Al;
reg [`WIDTH-1:0] wMultiplicationF_Bl;
wire [`LONG_WIDTH-1:0] wMultiplicationF_Result;
wire wMultiplicationF_InputReady;
wire wMultiplicationF_OutputReady;
 
 
RADIX_R_MUL_32_FULL_PARALLEL MultiplicationChannel_F
(
 
.Clock( Clock ),
.Reset( Reset ),
.A( wMultiplicationF_Al ),
.B( wMultiplicationF_Bl ),
.R( wMultiplicationF_Result ),
.iUnscaled( wMultiplcationUnscaled ),
.iInputReady( wMultiplicationF_InputReady ),
.OutputReady( wMultiplicationF_OutputReady )
);
assign wMultiplicationF_InputReady
= (iOperation == `CROSS ) ? iInputReady : 0;
//----------------------------------------------------
always @ ( * )
begin
case (iOperation)
`CROSS: wMultiplicationF_Al = iChannel_Ay; //Ay*Bx
default: wMultiplicationF_Al = 32'b0;
endcase
end
//----------------------------------------------------
always @ ( * )
begin
case (iOperation)
`CROSS: wMultiplicationF_Bl = iChannel_Bx; //Ay*Bx
default: wMultiplicationF_Bl = 32'b0;
endcase
end
//------------------------------------------------------
wire [`WIDTH-1:0] wDivisionA_Result;
wire wDivisionA_OutputReady;
wire wDivisionA_InputReady;
 
assign wDivisionA_InputReady =
( iOperation == `DIV) ? iInputReady : 0;
 
SignedIntegerDivision DivisionChannel_A
(
.Clock( Clock ),
.Reset( Reset ),
.iDividend( iChannel_Ax ),
.iDivisor( iChannel_Bx ),
.xQuotient( wDivisionA_Result ),
.iInputReady( wDivisionA_InputReady ),
.OutputReady( wDivisionA_OutputReady )
 
);
//------------------------------------------------------
wire [`WIDTH-1:0] wDivisionB_Result;
wire wDivisionB_OutputReady;
wire wDivisionB_InputReady;
 
assign wDivisionB_InputReady =
( iOperation == `DIV) ? iInputReady : 0;
 
SignedIntegerDivision DivisionChannel_B
(
.Clock( Clock ),
.Reset( Reset ),
.iDividend( iChannel_Ay ),
.iDivisor( iChannel_By ),
.xQuotient( wDivisionB_Result ),
.iInputReady( wDivisionB_InputReady ),
.OutputReady( wDivisionB_OutputReady )
 
);
//------------------------------------------------------
wire [`WIDTH-1:0] wDivisionC_Result;
wire wDivisionC_OutputReady;
wire wDivisionC_InputReady;
 
 
assign wDivisionC_InputReady =
( iOperation == `DIV) ? iInputReady : 0;
 
SignedIntegerDivision DivisionChannel_C
(
.Clock( Clock ),
.Reset( Reset ),
.iDividend( iChannel_Az ),
.iDivisor( iChannel_Bz ),
.xQuotient( wDivisionC_Result ),
.iInputReady( wDivisionC_InputReady ),
.OutputReady( wDivisionC_OutputReady )
 
);
//--------------------------------------------------------------
/*
First addtion block instance goes here.
Note that all inputs/outputs to the block
are wires. It has two MUXES one for each entry.
*/
reg [`LONG_WIDTH-1:0] wAddSubA_Ax,wAddSubA_Bx;
wire [`LONG_WIDTH-1:0] wAddSubA_Result;
wire wAddSubA_Operation; //Either addition or substraction
reg wAddSubA_InputReady;
wire wAddSubA_OutputReady;
 
assign wAddSubA_Operation
= (
iOperation == `SUB
|| iOperation == `CROSS
|| iOperation == `DEC
|| iOperation == `MOD
) ? 1 : 0;
 
FixedAddSub AddSubChannel_A
(
.Clock( Clock ),
.Reset( Reset ),
.A( wAddSubA_Ax ),
.B( wAddSubA_Bx ),
.R( wAddSubA_Result ),
.iOperation( wAddSubA_Operation ),
.iInputReady( wAddSubA_InputReady ),
.OutputReady( wAddSubA_OutputReady )
);
//Diego
 
 
//----------------------------
 
//InpuReady Mux A
always @ ( * )
begin
case (iOperation)
`ADD: wAddSubA_InputReady = iInputReady;
`SUB: wAddSubA_InputReady = iInputReady;
`INC,`INCX,`INCY,`INCZ: wAddSubA_InputReady = iInputReady;
`DEC: wAddSubA_InputReady = iInputReady;
`MOD: wAddSubA_InputReady = iInputReady;
`MAG: wAddSubA_InputReady = wMultiplicationOutputReadyA &&
wMultiplicationOutputReadyB;
//wMultiplicationA_OutputReady
//&& wMultiplicationB_OutputReady;
`DOT: wAddSubA_InputReady =
wMultiplicationOutputReadyA &&
wMultiplicationOutputReadyB;
//wMultiplicationA_OutputReady
//&& wMultiplicationB_OutputReady;
`CROSS: wAddSubA_InputReady =
wMultiplicationOutputReadyA &&
wMultiplicationOutputReadyB;
// wMultiplicationA_OutputReady
//&& wMultiplicationB_OutputReady;
default: wAddSubA_InputReady = 1'b0;
endcase
end
//----------------------------
 
//wAddSubA_Bx 2:1 input Mux
always @ ( * )
begin
case (iOperation)
`ADD: wAddSubA_Ax = ( iChannel_Ax[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ax } : { 32'b0, iChannel_Ax };
`SUB: wAddSubA_Ax = ( iChannel_Ax[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ax } : { 32'b0, iChannel_Ax };
`INC,`INCX,`INCY,`INCZ: wAddSubA_Ax = ( iChannel_Ax[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ax } : { 32'b0, iChannel_Ax };
`DEC: wAddSubA_Ax = ( iChannel_Ax[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ax } : { 32'b0, iChannel_Ax };
`MOD: wAddSubA_Ax = ( iChannel_Bx[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Bx } : { 32'b0, iChannel_Bx };
`MAG: wAddSubA_Ax = wMultiplicationA_Result;
`DOT: wAddSubA_Ax = wMultiplicationA_Result;
`CROSS: wAddSubA_Ax = wMultiplicationA_Result;
default: wAddSubA_Ax = 64'b0;
endcase
end
//----------------------------
//wAddSubA_Bx 2:1 input Mux
always @ ( * )
begin
case (iOperation)
`ADD: wAddSubA_Bx = ( iChannel_Bx[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Bx } : { 32'b0, iChannel_Bx };
`SUB: wAddSubA_Bx = ( iChannel_Bx[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Bx } : { 32'b0, iChannel_Bx };
`INC,`INCX: wAddSubA_Bx = (`LONG_WIDTH'd1 << `SCALE);
`INCY,`INCZ: wAddSubA_Bx = `LONG_WIDTH'd0;
`DEC: wAddSubA_Bx = (`LONG_WIDTH'd1 << `SCALE);
`MOD: wAddSubA_Bx = (`LONG_WIDTH'd1 << `SCALE);
`MAG: wAddSubA_Bx = wMultiplicationB_Result;
`DOT: wAddSubA_Bx = wMultiplicationB_Result;
`CROSS: wAddSubA_Bx = wMultiplicationB_Result;
default: wAddSubA_Bx = 64'b0;
endcase
end
//--------------------------------------------------------------
/*
Second addtion block instance goes here.
Note that all inputs/outputs to the block
are wires. It has two MUXES one for each entry.
*/
 
wire [`LONG_WIDTH-1:0] wAddSubB_Result;
 
 
wire wAddSubB_Operation; //Either addition or substraction
reg wAddSubB_InputReady;
wire wAddSubB_OutputReady;
 
reg [`LONG_WIDTH-1:0] wAddSubB_Ay,wAddSubB_By;
 
assign wAddSubB_Operation =
( iOperation == `SUB
|| iOperation == `CROSS
|| iOperation == `DEC
|| iOperation == `MOD
) ? 1 : 0;
 
FixedAddSub AddSubChannel_B
(
.Clock( Clock ),
.Reset( Reset ),
.A( wAddSubB_Ay ),
.B( wAddSubB_By ),
.R( wAddSubB_Result ),
.iOperation( wAddSubB_Operation ),
.iInputReady( wAddSubB_InputReady ),
.OutputReady( wAddSubB_OutputReady )
);
//----------------------------
wire wMultiplicationOutputReadyC_Dealy1;
FFD_POSEDGE_ASYNC_RESET # (1) FFwMultiplicationOutputReadyC_Dealy1
(
.Clock( Clock ),
.Clear( Reset ),
.D( wMultiplicationOutputReadyC ),
.Q( wMultiplicationOutputReadyC_Dealy1 )
);
 
 
 
 
 
//InpuReady Mux B
always @ ( * )
begin
case (iOperation)
`ADD: wAddSubB_InputReady = iInputReady;
`SUB: wAddSubB_InputReady = iInputReady;
`INC,`INCX,`INCY,`INCZ: wAddSubB_InputReady = iInputReady;
`DEC: wAddSubB_InputReady = iInputReady;
`MOD: wAddSubB_InputReady = iInputReady;
`MAG: wAddSubB_InputReady = wAddSubAOutputReady
&& wMultiplicationOutputReadyC_Dealy1;
//&& wMultiplicationC_OutputReady;
`DOT: wAddSubB_InputReady = wAddSubAOutputReady
&& wMultiplicationOutputReadyC_Dealy1;
//&& wMultiplicationC_OutputReady;
`CROSS: wAddSubB_InputReady = wMultiplicationOutputReadyC &&
wMultiplicationOutputReadyD;
// wMultiplicationC_OutputReady
//&& wMultiplicationD_OutputReady;
default: wAddSubB_InputReady = 1'b0;
endcase
end
//----------------------------
// wAddSubB_Ay 2:1 input Mux
// If the iOperation is ADD or SUB, it will simply take the inputs from
// ALU Channels. If it is a VECTOR_MAGNITUDE, it take the input from the
// previus ADDER_A, same for dot product.
always @ ( * )
begin
case (iOperation)
`ADD: wAddSubB_Ay = (iChannel_Ay[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ay} : {32'b0,iChannel_Ay}; //Ay
`SUB: wAddSubB_Ay = (iChannel_Ay[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ay} : {32'b0,iChannel_Ay}; //Ay
`INC,`INCX,`INCY,`INCZ: wAddSubB_Ay = (iChannel_Ay[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ay} : {32'b0,iChannel_Ay}; //Ay
`DEC: wAddSubB_Ay = (iChannel_Ay[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_Ay} : {32'b0,iChannel_Ay}; //Ay
`MOD: wAddSubB_Ay = (iChannel_By[31] == 1'b1) ? {32'hFFFFFFFF, iChannel_By} : {32'b0,iChannel_By}; //Ay
`MAG: wAddSubB_Ay = wAddSubA_Result; //A^2+B^2
`DOT: wAddSubB_Ay = wAddSubA_Result; //Ax*Bx + Ay*By
`CROSS: wAddSubB_Ay = wMultiplicationC_Result;
default: wAddSubB_Ay = 64'b0;
endcase
end
//----------------------------
//wAddSubB_By 2:1 input Mux
always @ ( * )
begin
case (iOperation)
`ADD: wAddSubB_By = (iChannel_By[31] == 1'b1) ? {32'hFFFFFFFF,iChannel_By } : {32'b0,iChannel_By}; //By
`SUB: wAddSubB_By = (iChannel_By[31] == 1'b1) ? {32'hFFFFFFFF,iChannel_By } : {32'b0,iChannel_By}; //{32'b0,iChannel_By}; //By
`INC,`INCY: wAddSubB_By = (`LONG_WIDTH'd1 << `SCALE);
`INCX,`INCZ: wAddSubB_By = `LONG_WIDTH'd0;
`DEC: wAddSubB_By = (`LONG_WIDTH'd1 << `SCALE);
`MOD: wAddSubB_By = (`LONG_WIDTH'd1 << `SCALE);
`MAG: wAddSubB_By = wMultiplicationC_Result; //C^2
`DOT: wAddSubB_By = wMultiplicationC_Result; //Az * Bz
`CROSS: wAddSubB_By = wMultiplicationD_Result;
default: wAddSubB_By = 32'b0;
endcase
end
//--------------------------------------------------------------
wire [`LONG_WIDTH-1:0] wAddSubC_Result;
reg [`LONG_WIDTH-1:0] wAddSubC_Az,wAddSubC_Bz;
 
wire wAddSubC_Operation; //Either addition or substraction
reg wAddSubC_InputReady;
wire wAddSubC_OutputReady;
 
reg [`LONG_WIDTH-1:0] AddSubC_Az,AddSubB_Bz;
 
//-----------------------------------------
always @ ( * )
begin
case (iOperation)
`CROSS: wAddSubC_Az = wMultiplicationE_Result;
`MOD: wAddSubC_Az = (iChannel_Bz[31] == 1'b1) ? {32'hFFFFFFFF,iChannel_Bz} : {32'b0,iChannel_Bz};
default: wAddSubC_Az = (iChannel_Az[31] == 1'b1) ? {32'hFFFFFFFF,iChannel_Az} : {32'b0,iChannel_Az};
endcase
end
//-----------------------------------------
always @ ( * )
begin
case (iOperation)
`CROSS: wAddSubC_Bz = wMultiplicationF_Result;
`INC,`INCZ: wAddSubC_Bz = (`LONG_WIDTH'd1 << `SCALE);
`INCX,`INCY: wAddSubC_Bz = `LONG_WIDTH'd0;
`DEC: wAddSubC_Bz = (`LONG_WIDTH'd1 << `SCALE);
`MOD: wAddSubC_Bz = (`LONG_WIDTH'd1 << `SCALE);
default: wAddSubC_Bz = (iChannel_Bz[31] == 1'b1) ? {32'hFFFFFFFF,iChannel_Bz} : {32'b0,iChannel_Bz};
endcase
end
//-----------------------------------------
 
assign wAddSubC_Operation
= (
iOperation == `SUB
|| iOperation == `CROSS
|| iOperation == `DEC
|| iOperation == `MOD
) ? 1 : 0;
 
FixedAddSub AddSubChannel_C
(
.Clock( Clock ),
.Reset( Reset ),
.A( wAddSubC_Az ),
.B( wAddSubC_Bz ),
.R( wAddSubC_Result ),
.iOperation( wAddSubC_Operation ),
.iInputReady( wAddSubC_InputReady ),
.OutputReady( wAddSubC_OutputReady )
);
 
 
always @ ( * )
begin
case (iOperation)
`CROSS: wAddSubC_InputReady = wMultiplicationE_OutputReady &&
wMultiplicationF_OutputReady;
default: wAddSubC_InputReady = iInputReady;
endcase
end
 
//------------------------------------------------------
wire [`WIDTH-1:0] wSquareRoot_Result;
wire wSquareRoot_OutputReady;
 
 
FixedPointSquareRoot SQROOT1
(
.Clock( Clock ),
.Reset( Reset ),
.Operand( wAddSubB_Result ),
.iInputReady( wAddSubBOutputReady && iOperation == `MAG),
.OutputReady( wSquareRoot_OutputReady ),
.Result( wSquareRoot_Result )
);
//------------------------------------------------------
 
assign wModulus2N_ResultA = (iChannel_Ax & wAddSubA_Result );
assign wModulus2N_ResultB = (iChannel_Ay & wAddSubB_Result );
assign wModulus2N_ResultC = (iChannel_Az & wAddSubC_Result );
 
 
 
 
 
 
//&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&//
//****Mux for ResultA***
// Notice that the Dot Product or the Magnitud Result will
// output in ResultA.
 
always @ ( * )
begin
case ( iOperation )
`RETURN: ResultA = iChannel_Ax;
`ADD: ResultA = (wAddSubA_Result[63] == 1'b1) ? { 1'b1,wAddSubA_Result[30:0]} : {1'b0,wAddSubA_Result[30:0]};// & 32'h7FFFFFFF;
`SUB: ResultA = (wAddSubA_Result[63] == 1'b1) ? { 1'b1,wAddSubA_Result[30:0]} : {1'b0,wAddSubA_Result[30:0]};//wAddSubA_Result[31:0];
`CROSS: ResultA = (wAddSubA_Result[63] == 1'b1) ? { 1'b1,wAddSubA_Result[30:0]} : {1'b0,wAddSubA_Result[30:0]};//wAddSubA_Result[31:0];
`DIV: ResultA = wDivisionA_Result;
`MUL: ResultA = wMultiplicationA_Result[31:0];
`IMUL: ResultA = wMultiplicationA_Result[31:0];
`DOT: ResultA = (wAddSubB_Result[63] == 1'b1) ? { 1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]};//wAddSubB_Result[31:0];
`MAG: ResultA = wSquareRoot_Result;
`ZERO: ResultA = 32'b0;
`COPY: ResultA = iChannel_Ax;
`SWIZZLE3D: ResultA = wSwizzleOutputX;
//Set Operations
`UNSCALE: ResultA = iChannel_Ax >> `SCALE;
`SETX: ResultA = iChannel_Ax;
`SETY: ResultA = iChannel_Bx;
`SETZ: ResultA = iChannel_Bx;
`INC,`INCX,`INCY,`INCZ: ResultA = (wAddSubA_Result[63] == 1'b1) ? { 1'b1,wAddSubA_Result[30:0]} : {1'b0,wAddSubA_Result[30:0]};
`DEC: ResultA = (wAddSubA_Result[63] == 1'b1) ? { 1'b1,wAddSubA_Result[30:0]} : {1'b0,wAddSubA_Result[30:0]};
`MOD: ResultA = wModulus2N_ResultA;
`FRAC: ResultA = iChannel_Ax & (`WIDTH'hFFFFFFFF >> (`WIDTH - `SCALE));
`MULP: ResultA = iChannel_Ax;
`NEG: ResultA = ~iChannel_Ax + 1'b1;
`XCHANGEX: ResultA = iChannel_Bx;
 
default:
begin
`ifdef DEBUG
// $display("%dns ALU: Error Unknown Operation: %d",$time,iOperation);
// $stop();
`endif
ResultA = 32'b0;
end
endcase
end
//------------------------------------------------------
//****Mux for RB***
always @ ( * )
begin
case ( iOperation )
`RETURN: ResultB = iChannel_Ax;
`ADD: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]}; // & 32'h7FFFFFFF;
`SUB: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]}; //wAddSubB_Result[31:0];
`CROSS: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]};//wAddSubB_Result[31:0];
`DIV: ResultB = wDivisionB_Result;
`MUL: ResultB = wMultiplicationB_Result[31:0];
`IMUL: ResultB = wMultiplicationB_Result[31:0];
`DOT: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]};//wAddSubB_Result[31:0];
`MAG: ResultB = wSquareRoot_Result;
`ZERO: ResultB = 32'b0;
`COPY: ResultB = iChannel_Ay;
//Set Operations
`UNSCALE: ResultB = iChannel_Ay >> `SCALE;
`SETX: ResultB = iChannel_By; // {Source1[95:64],Source0[63:32],Source0[31:0]};
`SETY: ResultB = iChannel_Ax; // {Source0[95:64],Source1[95:64],Source0[31:0]};
`SETZ: ResultB = iChannel_By; // {Source0[95:64],Source0[63:32],Source1[95:64]};
`SWIZZLE3D: ResultB = wSwizzleOutputY;
`INC,`INCX,`INCY,`INCZ: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]}; // & 32'h7FFFFFFF;
`DEC: ResultB = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]}; // & 32'h7FFFFFFF;
`MOD: ResultB = wModulus2N_ResultB;
`FRAC: ResultB = iChannel_Ay & (`WIDTH'hFFFFFFFF >> (`WIDTH - `SCALE));
`MULP: ResultB = iChannel_Ay;
`NEG: ResultB = ~iChannel_Ay + 1'b1;
`XCHANGEX: ResultB = iChannel_Ay;
default:
begin
`ifdef DEBUG
//$display("%dns ALU: Error Unknown Operation: %d",$time,iOperation);
//$stop();
`endif
ResultB = 32'b0;
end
endcase
end
//------------------------------------------------------
//****Mux for RC***
always @ ( * )
begin
case ( iOperation )
`RETURN: ResultC = iChannel_Ax;
`ADD: ResultC = (wAddSubC_Result[63] == 1'b1) ? {1'b1,wAddSubC_Result[30:0]} : {1'b0,wAddSubC_Result[30:0]}; //wAddSubC_Result[31:0];// & 32'h7FFFFFFF;
`SUB: ResultC = (wAddSubC_Result[63] == 1'b1) ? {1'b1,wAddSubC_Result[30:0]} : {1'b0,wAddSubC_Result[30:0]}; //wAddSubC_Result[31:0];
`CROSS: ResultC = (wAddSubC_Result[63] == 1'b1) ? {1'b1,wAddSubC_Result[30:0]} : {1'b0,wAddSubC_Result[30:0]};//wAddSubC_Result[31:0];
`DIV: ResultC = wDivisionC_Result;
`MUL: ResultC = wMultiplicationC_Result[31:0];
`IMUL: ResultC = wMultiplicationC_Result[31:0];
`DOT: ResultC = (wAddSubB_Result[63] == 1'b1) ? {1'b1,wAddSubB_Result[30:0]} : {1'b0,wAddSubB_Result[30:0]};//wAddSubB_Result[31:0];
`MAG: ResultC = wSquareRoot_Result;
`ZERO: ResultC = 32'b0;
`COPY: ResultC = iChannel_Az;
`SWIZZLE3D: ResultC = wSwizzleOutputZ;
//Set Operations
`UNSCALE: ResultC = iChannel_Az >> `SCALE;
`SETX: ResultC = iChannel_Bz; // {Source1[95:64],Source0[63:32],Source0[31:0]};
`SETY: ResultC = iChannel_Bz; // {Source0[95:64],Source1[95:64],Source0[31:0]};
`SETZ: ResultC = iChannel_Ax; // {Source0[95:64],Source0[63:32],Source1[95:64]};
`INC,`INCX,`INCY,`INCZ: ResultC = (wAddSubC_Result[63] == 1'b1) ? {1'b1,wAddSubC_Result[30:0]} : {1'b0,wAddSubC_Result[30:0]}; //wAddSubC_Result[31:0];// & 32'h7FFFFFFF;
`DEC: ResultC = (wAddSubC_Result[63] == 1'b1) ? {1'b1,wAddSubC_Result[30:0]} : {1'b0,wAddSubC_Result[30:0]}; //wAddSubC_Result[31:0];// & 32'h7FFFFFFF;
`MOD: ResultC = wModulus2N_ResultC;
`FRAC: ResultC = iChannel_Az & (`WIDTH'hFFFFFFFF >> (`WIDTH - `SCALE));
`MULP: ResultC = wMultiplicationA_Result[31:0];
`NEG: ResultC = ~iChannel_Az + 1'b1;
`XCHANGEX: ResultC = iChannel_Az;
default:
begin
`ifdef DEBUG
//$display("%dns ALU: Error Unknown Operation: %d",$time,iOperation);
//$stop();
`endif
ResultC = 32'b0;
end
endcase
end
//------------------------------------------------------------------------
 
 
always @ ( * )
begin
case (iOperation)
`JMP: oBranchTaken = 1;
`JGX: oBranchTaken = wArithmeticComparison_Result;
`JGY: oBranchTaken = wArithmeticComparison_Result;
`JGZ: oBranchTaken = wArithmeticComparison_Result;
`JLX: oBranchTaken = wArithmeticComparison_Result;
`JLY: oBranchTaken = wArithmeticComparison_Result;
`JLZ: oBranchTaken = wArithmeticComparison_Result;
`JEQX: oBranchTaken = wArithmeticComparison_Result;
`JEQY: oBranchTaken = wArithmeticComparison_Result;
`JEQZ: oBranchTaken = wArithmeticComparison_Result;
`JNEX: oBranchTaken = wArithmeticComparison_Result;
`JNEY: oBranchTaken = wArithmeticComparison_Result;
`JNEZ: oBranchTaken = wArithmeticComparison_Result;
`JGEX: oBranchTaken = wArithmeticComparison_Result;
`JGEY: oBranchTaken = wArithmeticComparison_Result;
`JGEZ: oBranchTaken = wArithmeticComparison_Result;
`JLEX: oBranchTaken = wArithmeticComparison_Result;
`JLEY: oBranchTaken = wArithmeticComparison_Result;
`JLEZ: oBranchTaken = wArithmeticComparison_Result;
default: oBranchTaken = 0;
endcase
end
 
always @ ( * )
begin
case (iOperation)
`JMP,`JGX,`JGY,`JGZ,`JLX,`JLY,`JLZ,`JEQX,`JEQY,`JEQZ,
`JNEX,`JNEY,`JNEZ,`JGEX,`JGEY,`JGEZ: oBranchNotTaken = !oBranchTaken && OutputReady;
`JLEX: oBranchNotTaken = !oBranchTaken && OutputReady;
`JLEY: oBranchNotTaken = !oBranchTaken && OutputReady;
`JLEZ: oBranchNotTaken = !oBranchTaken && OutputReady;
default:
oBranchNotTaken = 0;
endcase
end
//------------------------------------------------------------------------
//Output ready logic Stuff for Division...
//Some FFT will hopefully do the trick
 
wire wDivisionOutputReadyA,wDivisionOutputReadyB,wDivisionOutputReadyC;
wire wDivisionOutputReady;
 
 
assign wAddSubAOutputReady = wAddSubA_OutputReady;
assign wAddSubBOutputReady = wAddSubB_OutputReady;
assign wAddSubCOutputReady = wAddSubC_OutputReady;
 
 
FFT1 FFT_DivisionA
(
.D(1'b1),
.Clock( wDivisionA_OutputReady ),
.Reset( iInputReady ),
.Q( wDivisionOutputReadyA )
);
 
FFT1 FFT_DivisionB
(
.D(1'b1),
.Clock( wDivisionB_OutputReady ),
.Reset( iInputReady ),
.Q( wDivisionOutputReadyB )
);
FFT1 FFT_DivisionC
(
.D(1'b1),
.Clock( wDivisionC_OutputReady ),
.Reset( iInputReady ),
.Q( wDivisionOutputReadyC )
);
assign wDivisionOutputReady =
( wDivisionOutputReadyA && wDivisionOutputReadyB && wDivisionOutputReadyC );
assign wMultiplicationOutputReadyA = wMultiplicationA_OutputReady;
assign wMultiplicationOutputReadyB = wMultiplicationB_OutputReady;
assign wMultiplicationOutputReadyC = wMultiplicationC_OutputReady;
assign wMultiplicationOutputReadyD = wMultiplicationD_OutputReady;
assign wMultiplicationOutputReady =
( wMultiplicationOutputReadyA && wMultiplicationOutputReadyB && wMultiplicationOutputReadyC );
wire wSquareRootOutputReady;
FFT1 FFT_Sqrt
(
.D(1'b1),
.Clock( wSquareRoot_OutputReady ),
.Reset( iInputReady ),
.Q( wSquareRootOutputReady )
);
//------------------------------------------------------------------------
wire wOutputDelay1Cycle;
 
 
FFD_POSEDGE_ASYNC_RESET # (1) FFOutputReadyDelay2
(
.Clock( Clock ),
.Clear( Reset ),
.D( iInputReady ),
.Q( wOutputDelay1Cycle )
);
 
wire [`INSTRUCTION_OP_LENGTH-1:0] wOperation;
 
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `INSTRUCTION_OP_LENGTH ) SourceZ2
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( iInputReady ),
.D( iOperation ),
.Q(wOperation)
);
 
 
//Mux for output ready signal
always @ ( * )
begin
case ( wOperation )
`UNSCALE: OutputReady = wOutputDelay1Cycle;
`RETURN: OutputReady = wOutputDelay1Cycle;
`NOP: OutputReady = wOutputDelay1Cycle;
`FRAC: OutputReady = wOutputDelay1Cycle;
`NEG: OutputReady = wOutputDelay1Cycle;
`ifdef DEBUG
//Debug Print behaves as a NOP in terms of ALU...
`DEBUG_PRINT: OutputReady = wOutputDelay1Cycle;
`endif
`ADD,`INC,`INCX,`INCY,`INCZ: OutputReady = wAddSubAOutputReady &&
wAddSubBOutputReady &&
wAddSubCOutputReady;
`SUB,`DEC: OutputReady = wAddSubAOutputReady &&
wAddSubBOutputReady &&
wAddSubCOutputReady;
`DIV: OutputReady = wDivisionOutputReady;
`MUL,`IMUL: OutputReady = wMultiplicationOutputReady;
`MULP: OutputReady = wMultiplicationOutputReadyA;
`DOT: OutputReady = wAddSubBOutputReady;
`CROSS: OutputReady = wAddSubAOutputReady &&
wAddSubBOutputReady &&
wAddSubCOutputReady;
`MAG: OutputReady = wSquareRootOutputReady;
`ZERO: OutputReady = wOutputDelay1Cycle;
`COPY: OutputReady = wOutputDelay1Cycle;
`SWIZZLE3D: OutputReady = wOutputDelay1Cycle;
`SETX,`SETY,`SETZ,`JMP: OutputReady = wOutputDelay1Cycle;
 
`JGX,`JGY,`JGZ: OutputReady = ArithmeticComparison_OutputReady;
`JLX,`JLY,`JLZ: OutputReady = ArithmeticComparison_OutputReady;
`JEQX,`JEQY,`JEQZ: OutputReady = ArithmeticComparison_OutputReady;
`JNEX,`JNEY,`JNEZ: OutputReady = ArithmeticComparison_OutputReady;
`JGEX,`JGEY,`JGEZ: OutputReady = ArithmeticComparison_OutputReady;
`JLEX,`JLEY,`JLEZ: OutputReady = ArithmeticComparison_OutputReady;
`MOD: OutputReady = wAddSubAOutputReady && //TODO: wait 1 more cycle
wAddSubBOutputReady &&
wAddSubCOutputReady;
`XCHANGEX: OutputReady = wOutputDelay1Cycle;
default:
begin
OutputReady = 32'b0;
$display("*** ALU ERROR: iOperation = %d ***",iOperation);
end
endcase
end
 
endmodule
//------------------------------------------------------------------------
/Module_InstructionDecode.v
0,0 → 1,136
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
module InstructionDecode
(
input wire Clock,
input wire Reset,
input wire iInstructionAvailable,
input wire[`INSTRUCTION_WIDTH-1:0] iEncodedInstruction,
input wire[`DATA_ROW_WIDTH-1:0] iRamValue0,
input wire[`DATA_ROW_WIDTH-1:0] iRamValue1,
output wire[`DATA_ADDRESS_WIDTH-1:0] oRamAddress0,oRamAddress1,
output wire[`INSTRUCTION_OP_LENGTH-1:0] oOperation,
output wire [`DATA_ROW_WIDTH-1:0] oSource0,oSource1,
output wire [`DATA_ADDRESS_WIDTH-1:0] oDestination,
input wire [`DATA_ROW_WIDTH-1:0] iDataForward,
input wire [`DATA_ADDRESS_WIDTH-1:0] iLastDestination,
 
`ifdef DEBUG
input wire [`ROM_ADDRESS_WIDTH-1:0] iDebug_CurrentIP,
output wire [`ROM_ADDRESS_WIDTH-1:0] oDebug_CurrentIP,
`endif
output wire oDataReadyForExe
 
);
wire wInmediateOperand;
wire [`DATA_ROW_WIDTH-1:0] wSource0,wSource1;
wire wTriggerSource0DataForward,wTriggerSource1DataForward;
wire wSource0AddrssEqualsLastDestination,wSource1AddrssEqualsLastDestination;
 
`ifdef DEBUG
assign oDebug_CurrentIP = iDebug_CurrentIP;
`endif
//See if operation takes scalar argument
assign wInmediateOperand = iEncodedInstruction[`INSTRUCTION_IMM_BITPOS];
 
//Has the value of the first argument fetched from IMEM
assign wSource0 = iRamValue0;
//Has the value of the second argument fetched from IMEM, or the value of the
//destinatin register in case of scalar operation
assign wSource1 = ( wInmediateOperand ) ? {oRamAddress1,iEncodedInstruction[15:0] ,32'b0,32'b0} : iRamValue1; //{oRamAddress1,oRamAddress0,32'b0,32'b0} : iRamValue1;
 
//Data forwarding logic
assign wSource0AddrssEqualsLastDestination = (oRamAddress0 == iLastDestination) ? 1'b1: 1'b0;
assign wSource1AddrssEqualsLastDestination = (oRamAddress1 == iLastDestination) ? 1'b1: 1'b0;
assign wTriggerSource0DataForward = wSource0AddrssEqualsLastDestination;
assign wTriggerSource1DataForward = wSource1AddrssEqualsLastDestination && !wInmediateOperand;
 
//The data address to fetch from IMEM
assign oRamAddress1 = iEncodedInstruction[31:16];
 
//If operation takes a scalar value, then ask IMEM
//for the previous value of the destination ([47:32])
//and have this value ready at oRamAddress0
MUXFULLPARALELL_16bits_2SEL RAMAddr0MUX
(
.Sel( wInmediateOperand ),
.I1( iEncodedInstruction[15:0] ),
.I2( iEncodedInstruction[47:32] ),
.O1( oRamAddress0 )
);
 
 
//One clock cycle after the new instruction becomes
//available to IDU, it should be decoded and ready
//for execution
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD1
(
.Clock( Clock ),
.Reset( Reset ),
.Enable(1'b1),
.D( iInstructionAvailable ),
.Q( oDataReadyForExe )
);
 
 
//Latch the Operation
FFD_POSEDGE_SYNCRONOUS_RESET # ( `INSTRUCTION_OP_LENGTH ) FFD3
(
.Clock(Clock),
.Reset(Reset),
.Enable(iInstructionAvailable),
.D(iEncodedInstruction[`INSTRUCTION_WIDTH-1:`INSTRUCTION_WIDTH-`INSTRUCTION_OP_LENGTH]),
.Q(oOperation )
);
//Latch the Destination
FFD_POSEDGE_SYNCRONOUS_RESET # ( `DATA_ADDRESS_WIDTH ) FFD2
(
.Clock(Clock),
.Reset(Reset),
.Enable(iInstructionAvailable),
.D(iEncodedInstruction[47:32]),
.Q(oDestination )
);
 
 
//Once we made a decicions if the Sources must be forwarded or not, a series of muxes
//are used to routed the correct data into the decoded Source outputs
 
MUXFULLPARALELL_96bits_2SEL Source0_Mux
(
.Sel( wTriggerSource0DataForward ),
.I1( wSource0 ),
.I2( iDataForward ),
.O1( oSource0 )
);
 
MUXFULLPARALELL_96bits_2SEL Source1_Mux
(
.Sel( wTriggerSource1DataForward ),
.I1( wSource1 ),
.I2( iDataForward ),
.O1( oSource1 )
);
 
endmodule
 
/Module_InstructionEntryPoint.v
0,0 → 1,32
`timescale 1ns / 1ps
`include "aDefinitions.v"
module InstructionEntryPoint
(
input wire Clock,
input wire Reset,
input wire iTrigger,
input wire[`ROM_ADDRESS_WIDTH-1:0] iInitialCodeAddress,
input wire [`INSTRUCTION_WIDTH-1:0] iIMemInput,
 
output wire oEPU_Busy,
output wire [`ROM_ADDRESS_WIDTH-1:0] oEntryPoint,
output wire oTriggerIFU,
output wire [`ROM_ADDRESS_WIDTH-1:0] oInstructionAddr
);
 
assign oInstructionAddr = (oTriggerIFU) ? oEntryPoint : iInitialCodeAddress;
assign oEPU_Busy = iTrigger | oTriggerIFU;
 
 
 
FFD_POSEDGE_ASYNC_RESET # ( 1 ) FFD1
(
.Clock(Clock),
.Clear( Reset ),
.D(iTrigger),
.Q(oTriggerIFU)
);
 
assign oEntryPoint = (oTriggerIFU) ? iIMemInput[`ROM_ADDRESS_WIDTH-1:0] : `ROM_ADDRESS_WIDTH'b0;
 
endmodule

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.