OpenCores
URL https://opencores.org/ocsvn/theia_gpu/theia_gpu/trunk

Subversion Repositories theia_gpu

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /
    from Rev 212 to Rev 213
    Reverse comparison

Rev 212 → Rev 213

/theia_gpu/branches/beta_2.0/rtl/Module_RadixRMul.v
0,0 → 1,340
`timescale 1ns / 1ps
`include "aDefinitions.v"
//////////////////////////////////////////////////////////////////////////////////
// Company:
// Engineer:
//
// Create Date: 19:49:14 01/13/2009
// Design Name:
// Module Name: RadixRMul
// Project Name:
// Target Devices:
// Tool versions:
// Description:
//
// Dependencies:
//
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
//
//////////////////////////////////////////////////////////////////////////////////
 
`default_nettype none
 
 
//---------------------------------------------------
module MUX_4_TO_1_32Bits_FullParallel
(
input wire [31:0] i1,i2,i3,i4,
output reg [31:0] O,
input wire [1:0] Sel
);
 
always @ ( Sel or i1 or i2 or i3 or i4 )
begin
case (Sel)
2'b00: O = i1;
2'b01: O = i2;
2'b10: O = i3;
2'b11: O = i4;
endcase
end
 
endmodule
//---------------------------------------------------
/*
module SHIFTER2_16_BITS
(
input wire C,
input wire[15:0] In,
output reg[15:0] Out
);
 
reg [15:0] Temp;
always @ (posedge C )
begin
Out = In << 2;
end
 
endmodule
*/
//---------------------------------------------------
module RADIX_R_MUL_32_FULL_PARALLEL
(
input wire Clock,
input wire Reset,
input wire[31:0] A,
input wire[31:0] B,
output wire[63:0] R,
input wire iUnscaled,
input wire iInputReady,
output wire OutputReady
 
);
 
 
wire wInputDelay1;
//-------------------
wire [31:0] wALatched,wBLatched;
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) FFD1
(
.Clock( Clock ),
.Reset( Reset),
.Enable( iInputReady ),
.D( A ),
.Q( wALatched)
);
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) FFD2
(
.Clock( Clock ),
.Reset( Reset),
.Enable( iInputReady ),
.D( B ),
.Q( wBLatched )
);
 
//-------------------
 
 
FFD_POSEDGE_SYNCRONOUS_RESET #(1) FFOutputReadyDelay1
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( 1'b1 ),
.D( iInputReady ),
.Q( wInputDelay1 )
);
 
FFD_POSEDGE_SYNCRONOUS_RESET #(1) FFOutputReadyDelay2
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( 1'b1 ),
.D( wInputDelay1 ),
.Q( OutputReady )
);
 
wire [31:0] wA, w2A, w3A, wB;
wire SignA,SignB;
 
assign SignA = wALatched[31];
assign SignB = wBLatched[31];
 
 
assign wB = (SignB == 1) ? ~wBLatched + 1'b1 : wBLatched;
assign wA = (SignA == 1) ? ~wALatched + 1'b1 : wALatched;
 
assign w2A = wA << 1;
assign w3A = w2A + wA;
 
wire [31:0] wPartialResult0,wPartialResult1,wPartialResult2,wPartialResult3,wPartialResult4,wPartialResult5;
wire [31:0] wPartialResult6,wPartialResult7,wPartialResult8,wPartialResult9,wPartialResult10,wPartialResult11;
wire [31:0] wPartialResult12,wPartialResult13,wPartialResult14,wPartialResult15;
 
MUX_4_TO_1_32Bits_FullParallel MUX0
(
.i1( 32'b 0 ),
.i2( wA ),
.i3( w2A ),
.i4( w3A ),
.Sel( {wB[1],wB[0]} ),
.O( wPartialResult0 )
);
 
 
MUX_4_TO_1_32Bits_FullParallel MUX1
(
.i1( 32'b 0 ),
.i2( wA ),
.i3( w2A ),
.i4( w3A ),
.Sel( {wB[3],wB[2]} ),
.O( wPartialResult1 )
);
 
MUX_4_TO_1_32Bits_FullParallel MUX2
(
.i1( 32'b 0 ),
.i2( wA ),
.i3( w2A ),
.i4( w3A ),
.Sel( {wB[5],wB[4]} ),
.O( wPartialResult2 )
);
 
MUX_4_TO_1_32Bits_FullParallel MUX3
(
.i1( 32'b 0 ),
.i2( wA ),
.i3( w2A ),
.i4( w3A ),
.Sel( {wB[7],wB[6]} ),
.O( wPartialResult3 )
);
 
MUX_4_TO_1_32Bits_FullParallel MUX4
(
.i1( 32'b 0 ),
.i2( wA ),
.i3( w2A ),
.i4( w3A ),
.Sel( {wB[9],wB[8]} ),
.O( wPartialResult4 )
);
 
MUX_4_TO_1_32Bits_FullParallel MUX5
(
.i1( 32'b 0 ),
.i2( wA ),
.i3( w2A ),
.i4( w3A ),
.Sel( {wB[11],wB[10]} ),
.O( wPartialResult5 )
);
 
MUX_4_TO_1_32Bits_FullParallel MUX6
(
.i1( 32'b 0 ),
.i2( wA ),
.i3( w2A ),
.i4( w3A ),
.Sel( {wB[13],wB[12]} ),
.O( wPartialResult6 )
);
 
MUX_4_TO_1_32Bits_FullParallel MUX7
(
.i1( 32'b 0 ),
.i2( wA ),
.i3( w2A ),
.i4( w3A ),
.Sel( {wB[15],wB[14]} ),
.O( wPartialResult7 )
);
 
MUX_4_TO_1_32Bits_FullParallel MUX8
(
.i1( 32'b 0 ),
.i2( wA ),
.i3( w2A ),
.i4( w3A ),
.Sel( {wB[17],wB[16]} ),
.O( wPartialResult8 )
);
 
MUX_4_TO_1_32Bits_FullParallel MUX9
(
.i1( 32'b 0 ),
.i2( wA ),
.i3( w2A ),
.i4( w3A ),
.Sel( {wB[19],wB[18]} ),
.O( wPartialResult9 )
);
 
MUX_4_TO_1_32Bits_FullParallel MUX10
(
.i1( 32'b 0 ),
.i2( wA ),
.i3( w2A ),
.i4( w3A ),
.Sel( {wB[21],wB[20]} ),
.O( wPartialResult10 )
);
 
MUX_4_TO_1_32Bits_FullParallel MUX11
(
.i1( 32'b 0 ),
.i2( wA ),
.i3( w2A ),
.i4( w3A ),
.Sel( {wB[23],wB[22]} ),
.O( wPartialResult11 )
);
 
MUX_4_TO_1_32Bits_FullParallel MUX12
(
.i1( 32'b 0 ),
.i2( wA ),
.i3( w2A ),
.i4( w3A ),
.Sel( {wB[25],wB[24]} ),
.O( wPartialResult12 )
);
 
MUX_4_TO_1_32Bits_FullParallel MUX13
(
.i1( 32'b 0 ),
.i2( wA ),
.i3( w2A ),
.i4( w3A ),
.Sel( {wB[27],wB[26]} ),
.O( wPartialResult13 )
);
 
MUX_4_TO_1_32Bits_FullParallel MUX14
(
.i1( 32'b 0 ),
.i2( wA ),
.i3( w2A ),
.i4( w3A ),
.Sel( {wB[29],wB[28]} ),
.O( wPartialResult14 )
);
 
MUX_4_TO_1_32Bits_FullParallel MUX15
(
.i1( 32'b 0 ),
.i2( wA ),
.i3( w2A ),
.i4( w3A ),
.Sel( {wB[31],wB[30]} ),
.O( wPartialResult15 )
);
 
 
 
wire[63:0] wPartialResult1_0,wPartialResult1_1,wPartialResult1_2,wPartialResult1_3,
wPartialResult1_4,wPartialResult1_5,wPartialResult1_6,wPartialResult1_7;
 
 
 
assign wPartialResult1_0 = ({32'b0,wPartialResult0}) + ({32'b0,wPartialResult1}<<2);
assign wPartialResult1_1 = ({32'b0,wPartialResult2} << 4) + ({32'b0,wPartialResult3}<<6);
assign wPartialResult1_2 = ({32'b0,wPartialResult4} << 8) + ({32'b0,wPartialResult5}<<10);
assign wPartialResult1_3 = ({32'b0,wPartialResult6} << 12)+ ({32'b0,wPartialResult7}<<14);
assign wPartialResult1_4 = ({32'b0,wPartialResult8} << 16)+ ({32'b0,wPartialResult9}<<18);
assign wPartialResult1_5 = ({32'b0,wPartialResult10} << 20) + ({32'b0,wPartialResult11}<< 22);
assign wPartialResult1_6 = ({32'b0,wPartialResult12} << 24) + ({32'b0,wPartialResult13} << 26);
assign wPartialResult1_7 = ({32'b0,wPartialResult14} << 28) + ({32'b0,wPartialResult15} << 30);
 
 
 
 
wire [63:0] wPartialResult2_0,wPartialResult2_1,wPartialResult2_2,wPartialResult2_3;
 
assign wPartialResult2_0 = wPartialResult1_0 + wPartialResult1_1;
assign wPartialResult2_1 = wPartialResult1_2 + wPartialResult1_3;
assign wPartialResult2_2 = wPartialResult1_4 + wPartialResult1_5;
assign wPartialResult2_3 = wPartialResult1_6 + wPartialResult1_7;
 
wire [63:0] wPartialResult3_0,wPartialResult3_1;
 
assign wPartialResult3_0 = wPartialResult2_0 + wPartialResult2_1;
assign wPartialResult3_1 = wPartialResult2_2 + wPartialResult2_3;
 
wire [63:0] R_pre1,R_pre2;
 
//assign R_pre1 = (wPartialResult3_0 + wPartialResult3_1);
assign R_pre1 = (iUnscaled == 1) ? (wPartialResult3_0 + wPartialResult3_1) : ((wPartialResult3_0 + wPartialResult3_1) >> `SCALE);
 
assign R_pre2 = ( (SignA ^ SignB) == 1) ? ~R_pre1 + 1'b1 : R_pre1;
 
//assign R = R_pre2 >> `SCALE;
assign R = R_pre2;
 
endmodule
/theia_gpu/branches/beta_2.0/rtl/aDefinitions.v
0,0 → 1,374
/**********************************************************************************
Theaia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2009 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
 
/*******************************************************************************
Module Description:
 
This module defines constants that are going to be used
all over the code. By now you have may noticed that all
constants are pre-compilation define directives. This is
for simulation perfomance reasons mainly.
*******************************************************************************/
 
`define THEIA_TOP uut
`define CP_TOP `THEIA_TOP.CP
`define VP_TOP `THEIA_TOP.VPX[ CVPID ].VP
 
`define CONTROL_PROCESSOR_OP_WIDTH 5
`define CONTROL_PROCESSOR_OP_NOP `CONTROL_PROCESSOR_OP_WIDTH'd0
`define CONTROL_PROCESSOR_OP_DELIVER_COMMAND `CONTROL_PROCESSOR_OP_WIDTH'd1
`define CONTROL_PROCESSOR_OP_ADD `CONTROL_PROCESSOR_OP_WIDTH'd2
`define CONTROL_PROCESSOR_OP_SUB `CONTROL_PROCESSOR_OP_WIDTH'd3
`define CONTROL_PROCESSOR_OP_AND `CONTROL_PROCESSOR_OP_WIDTH'd4
`define CONTROL_PROCESSOR_OP_OR `CONTROL_PROCESSOR_OP_WIDTH'd5
`define CONTROL_PROCESSOR_OP_BRANCH `CONTROL_PROCESSOR_OP_WIDTH'd6
`define CONTROL_PROCESSOR_OP_BEQ `CONTROL_PROCESSOR_OP_WIDTH'd7
`define CONTROL_PROCESSOR_OP_BNE `CONTROL_PROCESSOR_OP_WIDTH'd8
`define CONTROL_PROCESSOR_OP_BG `CONTROL_PROCESSOR_OP_WIDTH'd9
`define CONTROL_PROCESSOR_OP_BL `CONTROL_PROCESSOR_OP_WIDTH'd10
`define CONTROL_PROCESSOR_OP_BGE `CONTROL_PROCESSOR_OP_WIDTH'd11
`define CONTROL_PROCESSOR_OP_BLE `CONTROL_PROCESSOR_OP_WIDTH'd12
`define CONTROL_PROCESSOR_ASSIGN `CONTROL_PROCESSOR_OP_WIDTH'd13
`define CONTROL_PROCESSOR_OP_COPYBLOCK `CONTROL_PROCESSOR_OP_WIDTH'd14
`define CONTROL_PROCESSOR_OP_EXIT `CONTROL_PROCESSOR_OP_WIDTH'd15
`define CONTROL_PROCESSOR_OP_NOT `CONTROL_PROCESSOR_OP_WIDTH'd16
`define CONTROL_PROCESSOR_OP_SHL `CONTROL_PROCESSOR_OP_WIDTH'd17
`define CONTROL_PROCESSOR_OP_SHR `CONTROL_PROCESSOR_OP_WIDTH'd18
 
`define CONTROL_PROCESSOR_REG_STATUS `CONTROL_PROCESSOR_OP_WIDTH'd2
`define CONTROL_PROCESSOR_REG_BLOCK_DST `CONTROL_PROCESSOR_OP_WIDTH'd3
 
 
`define VPID_WIDTH 7
`define VP_COMMAND_START_MAIN_THREAD 0
`define VP_COMMAND_STOP_MAIN_THREAD 1
 
//`define VERILATOR 1
//`define CONTROL_BUS_WIDTH 32
`define CBC_BUS_WIDTH 32
`define CP_MSG_ARGS_RNG 15:0
`define CP_MSG_OPERATION_RNG 23:16
`define CP_MSG_DST_RNG 31:24
`define CP_MSG_BCAST 31
 
`define OMEM_SIZE 250000
 
`define APR06 1
`define MCU_REQUEST_SIZE 81 //32 + 32 + 8 + 8
`define MCU_FIFO_DEPTH 8
`define MCU_COPYMEMBLOCKCMD_DSTOFF_RNG 19:0//23:0
`define MCU_COPYMEMBLOCKCMD_BLKLEN_RNG 30:20//31:24
`define MCU_COPYMEMBLOCK_TAG_BIT 31
`define MCU_COPYMEMBLOCKCMD_SRCOFF_RNG 63:32
`define MCU_COPYMEMBLOCKCMD_VPMASK_RNG 79:64
//`define MCU_REQUEST_TYPE_BIT 80 //See if it is CPBLOCKCOPY or VPCOMMAND
`define MCU_COPYMEMBLOCKCMD_DSTTYPE_VPCODEMEM 1'b1
`define MCU_COPYMEMBLOCKCMD_DSTTYPE_VPDATAMEM 1'b0
 
`define MCU_TAG_SIZE 2
`define TAG_NULL 2'b00
`define TAG_INSTRUCTION_ADDRESS_TYPE 2'b10
`define TAG_DATA_ADDRESS_TYPE 2'b01
 
`define MAX_THREADS 2
`define MAX_CORES 4 //The number of cores, make sure you update MAX_CORE_BITS!
`define MAX_CORE_BITS 2 // 2 ^ MAX_CORE_BITS = MAX_CORES
`define MAX_TMEM_BANKS 4 //The number of memory banks for TMEM
`define MAX_TMEM_BITS 2 //2 ^ MAX_TMEM_BANKS = MAX_TMEM_BITS
`define SELECT_ALL_CORES `MAX_CORES'b1111 //XXX: Change for more cores
 
//Defnitions for the input file size (avoid nasty warnings about the size of the file being different from the
//size of the array which stores the file in verilog
`define PARAMS_ARRAY_SIZE 43 //The maximum number of byte in this input file
`define VERTEX_ARRAY_SIZE 7000 //The maximum number of byte in this input file
`define TEXTURE_BUFFER_SIZE 196608 //The maximum number of byte in this input file
//---------------------------------------------------------------------------------
//Verilog provides a `default_nettype none compiler directive. When
//this directive is set, implicit data types are disabled, which will make any
//undeclared signal name a syntax error.This is very usefull to avoid annoying
//automatic 1 bit long wire declaration where you don't want them to be!
`default_nettype none
 
//The clock cycle
`define CLOCK_CYCLE 5
`define CLOCK_PERIOD 10
//---------------------------------------------------------------------------------
//Defines the Scale. This very important because it sets the fixed point precision.
//The Scale defines the number bits that are used as the decimal part of the number.
//The code has been written in such a way that allows you to change the value of the
//Scale, so that it is possible to experiment with different scenarios. SCALE can be
//no smaller that 1 and no bigger that WIDTH.
`define SCALE 17
 
//The next section defines the length of the registers, buses and other structures,
//do not change this valued unless you really know what you are doing (seriously!)
`define WIDTH 32
`define WB_WIDTH 32 //width of wish-bone buses
`define LONG_WIDTH 64
 
`define WB_SIMPLE_READ_CYCLE 0
`define WB_SIMPLE_WRITE_CYCLE 1
//---------------------------------------------------------------------------------
 
`define OPERATION_NOP 4'b0000
`define OPERATION_ADD 4'b0001
`define OPERATION_DIV 4'b0010
`define OPERATION_MUL 4'b0011
`define OPERATION_SQRT 4'b0100
`define OPERATION_LOGIC 4'b0101
`define OPERATION_OUT 4'b0110
 
 
`define RS_ADD0 1 //001
`define RS_ADD1 2 //010
`define RS_DIV 3 //011
`define RS_MUL 4 //100
`define RS_SQRT 5 //101
`define RS_LOGIC 6 //110
`define RS_IO 7 //111
//----------------------------------------------------------------
//Issue bus packet structure
 
 
`define ISSUE_PACKET_SIZE 237 //The size of the packet
`define ISSUE_SRCTAG_SIZE 9
 
`define ISSUE_RSID_RNG 236:233 //4 bits
`define ISSUE_DST_RNG 232:225 //8 bits
`define ISSUE_WE_RNG 224:222 //3 bits
`define ISSUE_SCALE_OP 221
`define ISSUE_SCALER 220
`define ISSUE_SCALE0 219
`define ISSUE_SCALE1 218
`define SCALE_SIZE 4
`define ISSUE_SCALE_RNG 221:218 //4 bits
`define ISSUE_SRC1RS_RNG 217:214 //4 bits
`define ISSUE_SIGN1_RNG 213:211 //3 bits
`define ISSUE_SWZZ1_RNG 210:205 //6 bits
`define ISSUE_SRC1_DATA_RNG 204:109 //96 bits
 
`define ISSUE_SRC0RS_RNG 108:105 //4 bits
`define ISSUE_SIGN0_RNG 104:102 //3 bits
`define ISSUE_SWZZ0_RNG 101:96 //6 bits
`define ISSUE_SRC0_DATA_RNG 95:0 //96 bits
 
`define ISSUE_SRC1_TAG_RNG 213:205
`define ISSUE_SRC0_TAG_RNG 104:96
`define TAG_SIGNX 8
`define TAG_SIGNY 7
`define TAG_SIGNZ 6
`define TAG_SWLX_RNG 5:4
`define TAG_SWLY_RNG 3:2
`define TAG_SWLZ_RNG 1:0
//----------------------------------------------------------------
`define MOD_ISSUE_PACKET_SIZE 219
`define MOD_ISSUE_RSID_RNG 218:215
`define MOD_ISSUE_DST_RNG 214:207
`define MOD_ISSUE_WE_RNG 206:204
`define MOD_ISSUE_SCALE_RNG 203:200
`define MOD_ISSUE_SRC1RS_RNG 199:196
`define MOD_ISSUE_SRC1_DATA_RNG 195:100
`define MOD_ISSUE_SRC0RS_RNG 99:96
`define MOD_ISSUE_SRC0_DATA_RNG 95:0
 
`define MOD_ISSUE_TAG1_RNG 8:0
`define MOD_ISSUE_TAG0_RNG 8:0
 
`define MOD_ISSUE_SRC_SIZE 87//`DATA_ROW_WIDTH-`ISSUE_SRCTAG_SIZE
//----------------------------------------------------------------
// Commit bus packet structure
 
`define COMMIT_PACKET_SIZE 111 // The size of the packet
`define COMMIT_RSID_RNG 110:107 //4 bits
`define COMMIT_WE_RNG 106:104 //3 bits
`define COMMIT_WE_X 106
`define COMMIT_WE_Y 105
`define COMMIT_WE_Z 104
`define COMMIT_DST_RNG 103:96 //8 bits
`define COMMIT_DATA_RNG 95:0 //95 bits
`define COMMIT_X_RNG 95:64 //32 bits
`define COMMIT_Y_RNG 63:32 //32 bits
`define COMMIT_Z_RNG 31:0 //32 bits
 
`define COMMIT_SIGN_X 95
`define COMMIT_SIGN_Y 63
`define COMMIT_SIGN_Z 31
//----------------------------------------------------------------
`define MOD_COMMIT_PACKET_SIZE 114
`define MOD_SCALE_RNG 113:110
`define MOD_SIGN_RNG 109:106
`define MOD_COMMIT_TAG_RNG 109:100
`define MOD_COMMIT_SWZ_RNG 105:100
`define MOD_COMMIT_RSID_RNG 99:96
`define MOD_COMMIT_DATA_RNG 95:0 //95 bits
//----------------------------------------------------------------
`define OP_SIZE 16 //Size of the operation part of the instruction
`define OP_RNG 63:48 //Range of the operation part of the instruction
`define OP_BIT_IMM 15
//`define OP_WE_RNG 14:12
`define OP_BREAK 11
`define OP_CODE_RNG 10:0
//----------------------------------------------------------------
// Source0 structure
`define SRC0_SIZE 17
`define SRC0_RNG 16:0
`define SRC0_ADDR_SIZE 8
`define SRC0_SIGN_RNG 16:14
`define SRC0_SWZX_RNG 13:8
`define SRC0_ADDR_RNG 7:0
//----------------------------------------------------------------
// Source1 structure
`define SRC1_SIZE 17
`define SRC1_RNG 33:17
`define SRC1_ADDR_SIZE 8
`define SRC1_SIGN_RNG 16:14
`define SRC1_SWZX_RNG 13:8
`define SRC1_ADDR_RNG 7:0
//----------------------------------------------------------------
 
`define NUMBER_OF_RSVR_STATIONS 7
 
//---------------------------------------------------------------
//Instruction structure
`define INST_IMM_RNG 31:0
`define INST_SRC0_ADDR_RNG 7:0
`define INST_SRC0_SWZL_RNG 13:8
`define INST_SRC0_SWLZ_RNG 9:8
`define INST_SRC0_SWLY_RNG 11:10
`define INST_SRC0_SWLX_RNG 13:12
`define INST_SRC0_SIGN_RNG 16:14
`define INST_SRC0_SIGNZ 14
`define INST_SRC0_SIGNY 15
`define INST_SRC0_SIGNX 16
`define INST_SCR1_ADDR_RNG 24:17
`define INST_SCR1_SWZL_RNG 30:25
`define INST_SRC1_SWLZ_RNG 26:25
`define INST_SRC1_SWLY_RNG 28:27
`define INST_SRC1_SWLX_RNG 30:29
`define INST_SRC1_SIGN_RNG 33:31
`define INST_SRC1_SIGNZ 31
`define INST_SRC1_SIGNY 32
`define INST_SRC1_SIGNX 33
`define INST_DST_RNG 41:34
`define INST_WE_Z 42
`define INST_WE_Y 43
`define INST_WE_X 44
/*
`define INST_RESERVED_RNG 46:42
*/
 
`define INST_SRC0_DISPLACED 45
`define INST_SRC1_DISPLACED 46
`define INST_DEST_ZERO 47
`define INST_ADDRMODE_RNG 47:45
`define INST_CODE_RNG 50:48
//`define INST_SCOP_RNG 53:51
`define INST_RESERVED_RNG 51:53
`define INST_BRANCH_OP_RNG 56:54
`define INST_BRANCH_BIT 57
`define INST_EOF_RNG 58 //End of flow
`define INST_SCOP_RNG 62:59
`define INST_IMM 63
 
`define INST_WE_RNG 44:42
`define SCALE_SRC1_EN 0
`define SCALE_SRC0_EN 1
`define SCALE_SRCR_EN 2
`define SCALE_OP 3
//---------------------------------------------------------------
//Compiler has to put the WE.x, WE.y and WE.z in zero (no write)
//for the branch instructions
`define BRANCH_ALWAYS 3'b000 //JMP
`define BRANCH_IF_ZERO 3'b001 //==
`define BRANCH_IF_NOT_ZERO 3'b010 //!=
`define BRANCH_IF_SIGN 3'b011 //<
`define BRANCH_IF_NOT_SIGN 3'b100 //>
`define BRANCH_IF_ZERO_OR_SIGN 3'b101 //<=
`define BRANCH_IF_ZERO_OR_NOT_SIGN 3'b110 //>=
//---------------------------------------------------------------
 
`define SRC_RET_ADDR_RNG 95:64
`define X_RNG 95:64
`define Y_RNG 63:32
`define Z_RNG 31:0
 
 
`define ALU_BIT_ADD 0 //Bit 2 of operation is div bit
`define ALU_BIT_ASSIGN 1 //Bit 2 of operation is div bit
`define ALU_BIT_DIV 2 //Bit 2 of operation is div bit
`define ALU_BIT_MUL 3
 
 
`define OPERAND_BIT_X 15
`define OPERAND_BIT_Y 14
`define OPERAND_BIT_Z 13
 
`define NOP `INSTRUCTION_OP_LENGTH'b0_000000000000000
`define ADD `INSTRUCTION_OP_LENGTH'b0_000000000000001
`define AND `INSTRUCTION_OP_LENGTH'b0_000000000000010
`define DIV `INSTRUCTION_OP_LENGTH'b0_000000000000100
`define MUL `INSTRUCTION_OP_LENGTH'b0_000000000001000
 
 
 
//You can play around with the size of instuctions, but keep
//in mind that Bits 3 and 4 of the Operand have a special meaning
//that is used for the jump familiy of instructions (see Documentation).
//Also the MSB of Operand is used by the decoder to distinguish
//between Type I and Type II instructions.
 
 
`define INSTRUCTION_WIDTH 64
 
//Defines the Lenght of Memory blocks
//`define RESOURCE_VECTOR_SIZE 11
`define INSTRUCTION_ADDR_WIDTH 16
`define DATA_ROW_WIDTH 96
`define DATA_ADDRESS_WIDTH 8//7
`define ROM_ADDRESS_WIDTH 16
`define ROM_ADDRESS_SEL_MASK `ROM_ADDRESS_WIDTH'h8000
 
 
`define SPR_CONTROL0 `DATA_ADDRESS_WIDTH'd2
`define SPR_CONTROL1 `DATA_ADDRESS_WIDTH'd3
`define SPR_TCONTROL0_MT_ENABLED 0
`define SPR_TCONTROL0_T0_INST_OFFSET_RNG 16:1
 
`define C1 `DATA_ADDRESS_WIDTH'd64
`define C2 `DATA_ADDRESS_WIDTH'd65
`define C3 `DATA_ADDRESS_WIDTH'd66
`define C4 `DATA_ADDRESS_WIDTH'd67
`define C5 `DATA_ADDRESS_WIDTH'd68
`define C6 `DATA_ADDRESS_WIDTH'd69
`define C7 `DATA_ADDRESS_WIDTH'd70
`define R1 `DATA_ADDRESS_WIDTH'd71
`define R2 `DATA_ADDRESS_WIDTH'd72
`define R3 `DATA_ADDRESS_WIDTH'd73
`define R4 `DATA_ADDRESS_WIDTH'd74
`define R5 `DATA_ADDRESS_WIDTH'd75
`define R6 `DATA_ADDRESS_WIDTH'd76
`define R7 `DATA_ADDRESS_WIDTH'd77
`define R8 `DATA_ADDRESS_WIDTH'd78
`define R9 `DATA_ADDRESS_WIDTH'd79
`define R10 `DATA_ADDRESS_WIDTH'd80
`define R11 `DATA_ADDRESS_WIDTH'd81
`define R12 `DATA_ADDRESS_WIDTH'd82
 
/theia_gpu/branches/beta_2.0/rtl/Module_IO_Station.v
0,0 → 1,91
`include "aDefinitions.v"
 
 
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2012 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
 
module IO_STATION
(
input wire Clock,
input wire Reset,
input wire [`MOD_ISSUE_PACKET_SIZE-1:0] iIssueBus,
input wire [`MOD_COMMIT_PACKET_SIZE-1:0] iCommitBus,
input wire [3:0] iId,
output wire [`COMMIT_PACKET_SIZE-1:0] oCommitData,
output wire oCommitResquest,
input wire iCommitGranted,
output wire oBusy,
output wire [`DATA_ROW_WIDTH-1:0] oOMEMWriteAddress,
output wire [`DATA_ROW_WIDTH-1:0] oOMEMWriteData,
output wire oOMEMWriteEnable
);
 
wire wExeDone;
wire [2:0] wExeDoneTmp;
wire wRS_OMWRITE_Trigger;
wire [`DATA_ROW_WIDTH-1:0] wRS1_OperandA;
wire [`DATA_ROW_WIDTH-1:0] wRS1_OperandB;
wire [`DATA_ROW_WIDTH-1:0] wResult;
wire wCommitGranted;
 
//ReservationStation_1Cycle RS
ReservationStation RS
(
.Clock( Clock ),
.Reset( Reset ),
.iIssueBus( iIssueBus ),
.iCommitBus( iCommitBus ),
.iMyId( iId ),
.iExecutionDone( wExeDone ),
.iResult( wResult ),
.iCommitGranted( wCommitGranted ),
.oSrc1Latched( wRS1_OperandB ),
.oSrc0Latched( wRS1_OperandA ),
.oBusy( oBusy ),
.oTrigger( wRS_OMWRITE_Trigger )
);
 
 
assign oCommitResquest = 1'b0; //This is always zero since we are writting anything into the RF
assign oCommitData = `COMMIT_PACKET_SIZE'd0; //This is always zero since we are writting anything into the RF
assign oOMEMWriteData = wRS1_OperandA; //Write 96 bits to external memory OMEM
assign oOMEMWriteAddress = wRS1_OperandB; //Each 32 bit words has the write address
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) DONE_FFD0
( Clock, Reset, 1'b1 , wRS_OMWRITE_Trigger | wExeDone_pre1 | wExeDone_pre2, oOMEMWriteEnable );
 
//It takes 3 clock cycles to write the 96 bits into OMEM
wire wExeDone_pre1,wExeDone_pre2;
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) DONE_FFD1
( Clock, Reset, 1'b1 , wRS_OMWRITE_Trigger, wExeDone_pre1 );
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) DONE_FFD2
( Clock, Reset, 1'b1 , wExeDone_pre1, wExeDone_pre2 );
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) DONE_FFD3
( Clock, Reset, 1'b1 , wExeDone_pre2, wExeDone );
 
assign wCommitGranted = wExeDone;
 
endmodule
/theia_gpu/branches/beta_2.0/rtl/Module_WishBoneSlave.v
0,0 → 1,159
`timescale 1ns / 1ps
`include "aDefinitions.v"
 
 
 
`define TAG_WBS_INSTRUCTION_ADDRESS_TYPE 2'b10
`define TAG_WBS_DATA_ADDRESS_TYPE 2'b01
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
//------------------------------------------------------------------------------
module WishBoneSlaveUnit
(
//WB Input signals
input wire CLK_I,
input wire RST_I,
input wire STB_I,
input wire WE_I,
input wire[`WB_WIDTH-1:0] DAT_I,
input wire[`WB_WIDTH-1:0] ADR_I,
input wire [`MCU_TAG_SIZE-1:0] TGA_I,
output wire ACK_O,
input wire MST_I, //Master In!
input wire CYC_I,
output wire[`DATA_ADDRESS_WIDTH-1:0] oDataWriteAddress,
output wire [`DATA_ROW_WIDTH-1:0] oDataBus,
output wire [`INSTRUCTION_ADDR_WIDTH-1:0] oInstructionWriteAddress,
output wire [`INSTRUCTION_WIDTH-1:0] oInstructionBus,
output wire oDataWriteEnable,
output wire oInstructionWriteEnable
 
);
 
FFD_POSEDGE_SYNCRONOUS_RESET # (16) FFADR
(
.Clock( CYC_I ),
.Reset( RST_I ),
.Enable(1'b1),
.D( ADR_I[15:0] ),
.Q( oInstructionWriteAddress )
);
 
assign oDataWriteAddress = oInstructionWriteAddress;
 
wire[`MCU_TAG_SIZE-1:0] wTGA_Latched;
 
FFD_POSEDGE_SYNCRONOUS_RESET # (2) FFADDRTYPE
(
.Clock( CYC_I ),
.Reset( RST_I ),
.Enable(1'b1),
.D( TGA_I ),
.Q( wTGA_Latched )
);
 
 
 
wire Clock,Reset;
assign Clock = CLK_I;
assign Reset = RST_I;
 
 
wire wLatchNow;
assign wLatchNow = STB_I & WE_I;
 
//1 Clock cycle after we assert the latch signal
//then the FF has the data ready to propagate
wire wDelay;
FFD_POSEDGE_SYNCRONOUS_RESET # (1) FFOutputDelay
(
.Clock( Clock ),
.Enable( 1'b1 ),
.Reset( Reset ),
.D( wLatchNow ),
.Q( wDelay )
);
 
assign ACK_O = wDelay & STB_I; //make sure we set ACK_O back to zero when STB_I is zero
 
 
wire [2:0] wXYZSel;
 
SHIFTLEFT_POSEDGE #(3) SHL
(
.Clock(CLK_I),
.Enable(STB_I & ~ACK_O),
.Reset(~CYC_I),
.Initial(3'b1),
.O(wXYZSel)
);
 
 
//Flip Flop to Store Vx
wire [`WIDTH-1:0] wVx;
FFD_POSEDGE_SYNCRONOUS_RESET # (`WIDTH) FFD32_WBS2MEM_Vx
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( wXYZSel[0] & STB_I ),
.D( DAT_I ),
.Q( wVx )
);
 
 
//Flip Flop to Store Vy
wire [`WIDTH-1:0] wVy;
FFD_POSEDGE_SYNCRONOUS_RESET # (`WIDTH) FFD32_WBS2MEM_Vy
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( wXYZSel[1] & STB_I ),
.D( DAT_I ),
.Q( wVy )
);
 
//Flip Flop to Store Vz
wire [`WIDTH-1:0] wVz;
 
FFD_POSEDGE_SYNCRONOUS_RESET # (`WIDTH) FFD32_WBS2MEM_Vz
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( wXYZSel[2] & STB_I ),
.D( DAT_I ),
.Q( wVz )
);
 
assign oDataBus = {wVx,wVy,wVz};
assign oInstructionBus = {wVx,wVy};
wire wIsInstructionAddress,wIsDataAddress;
assign wIsInstructionAddress = (wTGA_Latched == `TAG_WBS_INSTRUCTION_ADDRESS_TYPE) ? 1'b1 : 1'b0;
assign wIsDataAddress = (wTGA_Latched == `TAG_WBS_DATA_ADDRESS_TYPE ) ? 1'b1 : 1'b0;
 
assign oDataWriteEnable = (MST_I & !CYC_I & wIsDataAddress & WE_I ) ? 1'b1 : 1'b0;
assign oInstructionWriteEnable = ( MST_I & !CYC_I & wIsInstructionAddress & WE_I) ? 1'b1 : 1'b0;
 
 
 
endmodule
//------------------------------------------------------------------------------
/theia_gpu/branches/beta_2.0/rtl/Unit_Control.v
0,0 → 1,133
`include "aDefinitions.v"
 
`define CU_STATE_AFTER_RESET 0
`define CU_STATE_WAIT_FOR_CP 1
`define CU_STATE_HANDLE_CP_REQUEST 2
`define CU_STATE_START_MAIN_THREAD 3
`define CU_STATE_STOP_MAIN_THREAD 4
 
module ControlUnit
(
input wire Clock,
input wire Reset,
input wire [`CBC_BUS_WIDTH-1:0] iCpCommand,
input wire [`VPID_WIDTH-1:0] iVPID,
output wire oVpEnabled,
output wire oBusy
);
 
 
 
reg [4:0] rCurrentState, rNextState;
wire wRequestDetected;
reg rPopFifo;
reg rToggleVpEnabled;
wire [`CBC_BUS_WIDTH-1:0] wCurrentRequest;
 
 
assign wRequestDetected = (iCpCommand[`CP_MSG_BCAST] || (iCpCommand[`CP_MSG_DST_RNG] == iVPID) ) ? 1'b1 : 1'b0;
 
//Incomming requests are stored in the FIFO
sync_fifo # (`CBC_BUS_WIDTH,8 ) IN_FIFO
(
.clk( Clock ),
.reset( Reset ),
.din( iCpCommand ),
.wr_en( wRequestDetected ),
.rd_en( rPopFifo ),
.dout( wCurrentRequest ),
.full( oBusy )
);
 
 
UPCOUNTER_POSEDGE # (1) UP1
(
.Clock( Clock ),
.Reset( Reset ),
.Initial( 1'b0 ),
.Enable( rToggleVpEnabled ),
.Q( oVpEnabled )
);
 
//Next states logic and Reset sequence
always @(posedge Clock )
begin
if (Reset )
rCurrentState <= `CU_STATE_AFTER_RESET;
else
rCurrentState <= rNextState;
end
 
 
 
 
always @ ( * )
begin
case (rCurrentState)
//--------------------------------------
`CU_STATE_AFTER_RESET:
begin
rPopFifo = 1'b0;
rToggleVpEnabled = 1'b0;
rNextState = `CU_STATE_WAIT_FOR_CP;
end
//--------------------------------------
`CU_STATE_WAIT_FOR_CP:
begin
rPopFifo = 1'b0;
rToggleVpEnabled = 1'b0;
if ( wRequestDetected )
rNextState = `CU_STATE_HANDLE_CP_REQUEST;
else
rNextState = `CU_STATE_WAIT_FOR_CP;
end
//--------------------------------------
`CU_STATE_HANDLE_CP_REQUEST:
begin
rPopFifo = 1'b0;
rToggleVpEnabled = 1'b0;
case ( wCurrentRequest[`CP_MSG_OPERATION_RNG] )
`VP_COMMAND_START_MAIN_THREAD: rNextState = `CU_STATE_START_MAIN_THREAD;
`VP_COMMAND_STOP_MAIN_THREAD: rNextState = `CU_STATE_STOP_MAIN_THREAD;
default:
rNextState = `CU_STATE_WAIT_FOR_CP;
endcase
end
//--------------------------------------
`CU_STATE_START_MAIN_THREAD:
begin
rPopFifo = 1'b0;
rToggleVpEnabled = ~oVpEnabled;
rNextState = `CU_STATE_WAIT_FOR_CP;
end
//--------------------------------------
`CU_STATE_STOP_MAIN_THREAD:
begin
rPopFifo = 1'b0;
rToggleVpEnabled = oVpEnabled;
rNextState = `CU_STATE_WAIT_FOR_CP;
end
//--------------------------------------
default:
begin
rPopFifo = 1'b0;
rToggleVpEnabled = 1'b0;
rNextState = `CU_STATE_AFTER_RESET;
end
//--------------------------------------
endcase
end //always
endmodule
/theia_gpu/branches/beta_2.0/rtl/Unit_ControlProcessor.v
0,0 → 1,391
`include "aDefinitions.v"
 
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2012 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
 
`define CONTROL_PROCESSOR_OP_WIDTH 5
`define CONTROL_PROCESSOR_ADDR_WIDTH 8
`define CONTROL_PROCESSOR_ISSUE_CMD_RNG 24:0
`define CONTROL_PROCESSOR_INSTRUCTION_WIDTH 32
 
`define CONTROL_PROCESSOR_INST_OP_RNG 31:24
`define CONTROL_PROCESSOR_INST_OP_DST_RNG 23:16
`define CONTROL_PROCESSOR_INST_OP_SRC1_RNG 15:8
`define CONTROL_PROCESSOR_INST_OP_SRC0_RNG 7:0
 
 
`define CONTROL_PROCESSOR_OP_NOP `CONTROL_PROCESSOR_OP_WIDTH'd0
`define CONTROL_PROCESSOR_OP_DELIVER_COMMAND `CONTROL_PROCESSOR_OP_WIDTH'd1
`define CONTROL_PROCESSOR_OP_ADD `CONTROL_PROCESSOR_OP_WIDTH'd2
`define CONTROL_PROCESSOR_OP_SUB `CONTROL_PROCESSOR_OP_WIDTH'd3
`define CONTROL_PROCESSOR_OP_AND `CONTROL_PROCESSOR_OP_WIDTH'd4
`define CONTROL_PROCESSOR_OP_OR `CONTROL_PROCESSOR_OP_WIDTH'd5
`define CONTROL_PROCESSOR_OP_BRANCH `CONTROL_PROCESSOR_OP_WIDTH'd6
`define CONTROL_PROCESSOR_OP_BEQ `CONTROL_PROCESSOR_OP_WIDTH'd7
`define CONTROL_PROCESSOR_OP_BNE `CONTROL_PROCESSOR_OP_WIDTH'd8
`define CONTROL_PROCESSOR_OP_BG `CONTROL_PROCESSOR_OP_WIDTH'd9
`define CONTROL_PROCESSOR_OP_BL `CONTROL_PROCESSOR_OP_WIDTH'd10
`define CONTROL_PROCESSOR_OP_BGE `CONTROL_PROCESSOR_OP_WIDTH'd11
`define CONTROL_PROCESSOR_OP_BLE `CONTROL_PROCESSOR_OP_WIDTH'd12
`define CONTROL_PROCESSOR_ASSIGN `CONTROL_PROCESSOR_OP_WIDTH'd13
`define CONTROL_PROCESSOR_OP_COPYBLOCK `CONTROL_PROCESSOR_OP_WIDTH'd14
`define CONTROL_PROCESSOR_OP_EXIT `CONTROL_PROCESSOR_OP_WIDTH'd15
`define CONTROL_PROCESSOR_OP_NOT `CONTROL_PROCESSOR_OP_WIDTH'd16
`define CONTROL_PROCESSOR_OP_SHL `CONTROL_PROCESSOR_OP_WIDTH'd17
`define CONTROL_PROCESSOR_OP_SHR `CONTROL_PROCESSOR_OP_WIDTH'd18
 
 
module ControlProcessor
(
input wire Clock,
input wire Reset,
output wire[`CBC_BUS_WIDTH-1:0] oControlBus,
input wire iMCUFifoEmpty,
output reg [`MCU_REQUEST_SIZE-1:0] oCopyBlockCommand
);
 
 
 
wire [`CONTROL_PROCESSOR_ADDR_WIDTH-1:0] wIP,wIP_temp;
reg rWriteEnable,rBranchTaken;
reg [`CBC_BUS_WIDTH-1:0] rIssueCommand;
wire [`CONTROL_PROCESSOR_INSTRUCTION_WIDTH-1:0] wInstruction;
wire [`CONTROL_PROCESSOR_OP_WIDTH-1:0] wOperation;
reg [`WIDTH-1:0] rResult;
wire [`WIDTH-1:0] wPrevResult;
wire [`CONTROL_PROCESSOR_ADDR_WIDTH-1:0] wSourceAddr0,wSourceAddr1,wDestination,wPrevDestination;
wire [`WIDTH-1:0] wSourceData0,wSourceData1,wIPInitialValue,wImmediateValue;
 
 
assign oControlBus = rIssueCommand;
 
RAM_SINGLE_READ_PORT # (`CONTROL_PROCESSOR_INSTRUCTION_WIDTH, `CONTROL_PROCESSOR_ADDR_WIDTH, 256) InstructionRam
(
.Clock( Clock ),
.iWriteEnable( 1'b0 ),
.iReadAddress0( wIP ),
.oDataOut0( wInstruction )
);
 
 
wire [`WIDTH-1:0] wSourceData0_FromMem,wSourceData1_FromMem,wSourceData0_FromMem_Pre,wSourceData1_FromMem_Pre;
RAM_DUAL_READ_PORT # (`WIDTH,`CONTROL_PROCESSOR_ADDR_WIDTH) DataRam
(
.Clock( Clock ),
.iWriteEnable( rWriteEnable ),
.iReadAddress0( wInstruction[`CONTROL_PROCESSOR_INST_OP_SRC0_RNG] ),
.iReadAddress1( wInstruction[`CONTROL_PROCESSOR_INST_OP_SRC1_RNG] ),
.iWriteAddress( wDestination ),
.iDataIn( rResult ),
.oDataOut0( wSourceData0_FromMem_Pre ),
.oDataOut1( wSourceData1_FromMem_Pre )
);
 
wire [`WIDTH-1:0] wSprBlockDestination;
FFD_POSEDGE_SYNCRONOUS_RESET # (`WIDTH ) FFD_SPR_COREID
(
.Clock(Clock),
.Reset(Reset),
.Enable(rWriteEnable && (wDestination == `CONTROL_PROCESSOR_REG_BLOCK_DST)),
.D(rResult),
.Q(wSprBlockDestination)
);
 
assign wSourceData0_FromMem = (wSourceAddr0 == `CONTROL_PROCESSOR_REG_STATUS) ? { 30'b0,iMCUFifoEmpty} : wSourceData0_FromMem_Pre;
assign wSourceData1_FromMem = (wSourceAddr1 == `CONTROL_PROCESSOR_REG_STATUS) ? { 30'b0,iMCUFifoEmpty} :wSourceData1_FromMem_Pre;
assign wSourceData0 = ( wSourceAddr0 == wPrevDestination ) ? wPrevResult : wSourceData0_FromMem ;
assign wSourceData1 = ( wSourceAddr1 == wPrevDestination) ? wPrevResult : wSourceData1_FromMem ;
 
assign wIPInitialValue = (Reset) ? `CONTROL_PROCESSOR_ADDR_WIDTH'b0 : wDestination;
UPCOUNTER_POSEDGE # (`CONTROL_PROCESSOR_ADDR_WIDTH) IP
(
.Clock( Clock ),
.Reset( Reset | rBranchTaken ),
.Initial( wIPInitialValue + 1 ),
.Enable( 1'b1 ),
.Q( wIP_temp )
);
assign wIP = (rBranchTaken) ? wIPInitialValue : wIP_temp;
 
 
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `CONTROL_PROCESSOR_OP_WIDTH ) FFD1
(
.Clock(Clock),
.Reset(Reset),
.Enable(1'b1),
.D(wInstruction[`CONTROL_PROCESSOR_INST_OP_RNG]),
.Q(wOperation)
);
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) FFD2
(
.Clock(Clock),
.Reset(Reset),
.Enable(1'b1),
.D(rResult),
.Q(wPrevResult)
);
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `CONTROL_PROCESSOR_ADDR_WIDTH ) FFD255
(
.Clock(Clock),
.Reset(Reset),
.Enable(1'b1),
.D(wInstruction[`CONTROL_PROCESSOR_INST_OP_SRC0_RNG]),
.Q(wSourceAddr0)
);
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `CONTROL_PROCESSOR_ADDR_WIDTH ) FFD3
(
.Clock(Clock),
.Reset(Reset),
.Enable(1'b1),
.D(wInstruction[`CONTROL_PROCESSOR_INST_OP_SRC1_RNG]),
.Q(wSourceAddr1)
);
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `CONTROL_PROCESSOR_ADDR_WIDTH ) FFD4
(
.Clock(Clock),
.Reset(Reset),
.Enable(1'b1),
.D(wInstruction[`CONTROL_PROCESSOR_INST_OP_DST_RNG]),
.Q(wDestination)
);
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `CONTROL_PROCESSOR_ADDR_WIDTH ) FFD44
(
.Clock(Clock),
.Reset(Reset),
.Enable(1'b1),
.D(wDestination),
.Q(wPrevDestination)
);
 
 
assign wImmediateValue = {wSourceAddr1,wSourceAddr0};
 
 
 
always @ ( * )
begin
case (wOperation)
//-------------------------------------
`CONTROL_PROCESSOR_OP_COPYBLOCK:
begin
rIssueCommand = `CBC_BUS_WIDTH'b0;
oCopyBlockCommand =
{wSprBlockDestination[15:0],wSourceData1,wSourceData0[`MCU_COPYMEMBLOCK_TAG_BIT],wSourceData0[`MCU_COPYMEMBLOCKCMD_BLKLEN_RNG],wSourceData0[`MCU_COPYMEMBLOCKCMD_DSTOFF_RNG]};
rWriteEnable = 1'b0;
rResult = 0;
rBranchTaken = 1'b0;
end
//-------------------------------------
`CONTROL_PROCESSOR_OP_DELIVER_COMMAND:
begin
oCopyBlockCommand = `MCU_REQUEST_SIZE'b0;
rIssueCommand = {wDestination[7:0],wSourceData1[7:0],wSourceData0[15:0]};
rWriteEnable = 1'b0;
rResult = 0;
rBranchTaken = 1'b0;
end
//-------------------------------------
`CONTROL_PROCESSOR_OP_NOP:
begin
oCopyBlockCommand = `MCU_REQUEST_SIZE'b0;
rIssueCommand = `CBC_BUS_WIDTH'b0;
rBranchTaken = 1'b0;
rWriteEnable = 1'b0;
rResult = 0;
end
//-------------------------------------
`CONTROL_PROCESSOR_OP_ADD:
begin
oCopyBlockCommand = `MCU_REQUEST_SIZE'b0;
rIssueCommand = `CBC_BUS_WIDTH'b0;
rBranchTaken = 1'b0;
rWriteEnable = 1'b1;
rResult = wSourceData1 + wSourceData0;
end
//-------------------------------------
`CONTROL_PROCESSOR_OP_SUB:
begin
oCopyBlockCommand = `MCU_REQUEST_SIZE'b0;
rIssueCommand = `CBC_BUS_WIDTH'b0;
rBranchTaken = 1'b0;
rWriteEnable = 1'b1;
rResult = wSourceData1 - wSourceData0;
end
//-------------------------------------
`CONTROL_PROCESSOR_OP_AND:
begin
oCopyBlockCommand = `MCU_REQUEST_SIZE'b0;
rIssueCommand = `CBC_BUS_WIDTH'b0;
rBranchTaken = 1'b0;
rWriteEnable = 1'b1;
rResult = wSourceData1 & wSourceData0;
end
//-------------------------------------
`CONTROL_PROCESSOR_OP_SHL:
begin
oCopyBlockCommand = `MCU_REQUEST_SIZE'b0;
rIssueCommand = `CBC_BUS_WIDTH'b0;
rBranchTaken = 1'b0;
rWriteEnable = 1'b1;
rResult = wSourceData1 << wSourceData0;
end
//-------------------------------------
`CONTROL_PROCESSOR_OP_SHR:
begin
oCopyBlockCommand = `MCU_REQUEST_SIZE'b0;
rIssueCommand = `CBC_BUS_WIDTH'b0;
rBranchTaken = 1'b0;
rWriteEnable = 1'b1;
rResult = wSourceData1 >> wSourceData0;
end
//-------------------------------------
`CONTROL_PROCESSOR_OP_OR:
begin
oCopyBlockCommand = `MCU_REQUEST_SIZE'b0;
rIssueCommand = `CBC_BUS_WIDTH'b0;
rBranchTaken = 1'b0;
rWriteEnable = 1'b1;
rResult = wSourceData1 | wSourceData0;
end
//-------------------------------------
`CONTROL_PROCESSOR_OP_BLE:
begin
oCopyBlockCommand = `MCU_REQUEST_SIZE'b0;
rIssueCommand = `CBC_BUS_WIDTH'b0;
rWriteEnable = 1'b0;
rResult = 0;
if (wSourceData1 <= wSourceData0 )
rBranchTaken = 1'b1;
else
rBranchTaken = 1'b0;
end
//-------------------------------------
`CONTROL_PROCESSOR_OP_BL:
begin
oCopyBlockCommand = `MCU_REQUEST_SIZE'b0;
rIssueCommand = `CBC_BUS_WIDTH'b0;
rWriteEnable = 1'b0;
rResult = 0;
if (wSourceData1 < wSourceData0 )
rBranchTaken = 1'b1;
else
rBranchTaken = 1'b0;
end
//-------------------------------------
`CONTROL_PROCESSOR_OP_BG:
begin
oCopyBlockCommand = `MCU_REQUEST_SIZE'b0;
rIssueCommand = `CBC_BUS_WIDTH'b0;
rWriteEnable = 1'b0;
rResult = 0;
if (wSourceData1 > wSourceData0 )
rBranchTaken = 1'b1;
else
rBranchTaken = 1'b0;
end
//-------------------------------------
`CONTROL_PROCESSOR_OP_BGE:
begin
oCopyBlockCommand = `MCU_REQUEST_SIZE'b0;
rIssueCommand = `CBC_BUS_WIDTH'b0;
rWriteEnable = 1'b0;
rResult = 0;
if (wSourceData1 >= wSourceData0 )
rBranchTaken = 1'b1;
else
rBranchTaken = 1'b0;
end
//-------------------------------------
`CONTROL_PROCESSOR_OP_BEQ:
begin
oCopyBlockCommand = `MCU_REQUEST_SIZE'b0;
rIssueCommand = `CBC_BUS_WIDTH'b0;
rWriteEnable = 1'b0;
rResult = 0;
if (wSourceData1 == wSourceData0 )
rBranchTaken = 1'b1;
else
rBranchTaken = 1'b0;
end
//-------------------------------------
`CONTROL_PROCESSOR_OP_BNE:
begin
oCopyBlockCommand = `MCU_REQUEST_SIZE'b0;
rIssueCommand = `CBC_BUS_WIDTH'b0;
rWriteEnable = 1'b0;
rResult = 0;
if (wSourceData1 != wSourceData0 )
rBranchTaken = 1'b1;
else
rBranchTaken = 1'b0;
end
//-------------------------------------
`CONTROL_PROCESSOR_OP_BRANCH:
begin
oCopyBlockCommand = `MCU_REQUEST_SIZE'b0;
rIssueCommand = `CBC_BUS_WIDTH'b0;
rWriteEnable = 1'b0;
rResult = 0;
rBranchTaken = 1'b1;
end
//-------------------------------------
`CONTROL_PROCESSOR_ASSIGN:
begin
oCopyBlockCommand = `MCU_REQUEST_SIZE'b0;
rIssueCommand = `CBC_BUS_WIDTH'b0;
rWriteEnable = 1'b1;
rResult = wImmediateValue;
rBranchTaken = 1'b0;
end
//-------------------------------------
default:
begin
oCopyBlockCommand = `MCU_REQUEST_SIZE'b0;
rIssueCommand = `CBC_BUS_WIDTH'b0;
rWriteEnable = 1'b0;
rResult = 0;
rBranchTaken = 1'b0;
end
//-------------------------------------
endcase
end
 
 
 
 
 
endmodule
/theia_gpu/branches/beta_2.0/rtl/Module_MemoryController.v
0,0 → 1,314
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
//--------------------------------------------------------
 
`define MCU_STATE_AFTER_RESET 0
`define MCU_WAIT_FOR_REQUEST 1
`define MCU_TRANSFER_BLOCK_TO_VPCODEMEM 2
`define MCU_TRANSFER_BLOCK_TO_VPDATAMEM 3
`define MCU_INC_TRANSFER_BLOCK_ADDR 4
 
module MemoryController # (parameter CORE_COUNT=`MAX_CORES )
(
input wire Clock,
input wire Reset,
input wire [`MCU_REQUEST_SIZE-1:0] iRequest,
output wire oMEM_ReadRequest,
output wire [`WB_WIDTH-1:0] oMEM_ReadAddress,
input wire [`WB_WIDTH-1:0] iMEM_ReadData,
output wire oPendingRequests, //Connected to FIFO
output wire oFifoFull,
output wire oFifoEmpty,
input wire iMEM_DataAvailable,
//Wishbone signals
output wire [`WB_WIDTH-1:0] DAT_O,
output wire [`WB_WIDTH-1:0] ADR_O,
output wire STB_O,
output wire [CORE_COUNT-1:0] WE_O,
output reg [1:0] TAG_O,
output reg CYC_O,
output reg MST_O,
input wire ACK_I
);
 
 
reg rPopFifo;
wire [`MCU_REQUEST_SIZE-1:0] wCurrentRequest;
wire wMEM_DataAvailable;
reg rIncrementAddress;
wire [10:0] wCycCount;
reg rResetCycCount;
reg rMEM_ReadRequest;
wire w64BisTransmitted,w96BisTransmitted;
wire wRequestDetected;
wire[2:0] wStbCount;
reg rResetStbCount;
wire wLastBlock;
wire wRequestType;
wire wStall; //If ACK is not received afte STB_O wait for ACK
 
assign DAT_O = iMEM_ReadData;
assign wRequestDetected = (iRequest[`MCU_COPYMEMBLOCKCMD_VPMASK_RNG] != 0) ? 1'b1 : 1'b0;
assign oMEM_ReadRequest = rMEM_ReadRequest & ~iMEM_DataAvailable ;
 
 
//assign STB_O = wMEM_DataAvailable;
 
 
wire wSTB_O;
UPCOUNTER_POSEDGE # (1) STB_O_UP
(
.Clock( Clock ),
.Reset( Reset | wRequestDetected ),
.Initial( 1'b0 ),
.Enable( wMEM_DataAvailable | ACK_I ),
.Q( wSTB_O )
);
 
assign STB_O = (wSTB_O );//| wMEM_DataAvailable);// & ~ACK_I;
 
assign w64BisTransmitted = (wStbCount == 3'd2) ? 1'b1 : 1'b0;
assign w96BisTransmitted = (wStbCount == 3'd3) ? 1'b1 : 1'b0;
assign wLastBlock = (wCycCount == wCurrentRequest[`MCU_COPYMEMBLOCKCMD_BLKLEN_RNG]) ? 1'b1 : 1'b0;
assign wRequestType = wCurrentRequest[`MCU_COPYMEMBLOCK_TAG_BIT];
 
 
 
UPCOUNTER_POSEDGE # (`WB_WIDTH) OUT_MEM_ADR_UP
(
.Clock( Clock ),
.Reset( Reset | wRequestDetected ),
.Initial( iRequest[`MCU_COPYMEMBLOCKCMD_SRCOFF_RNG] ),
.Enable( ACK_I ),
.Q( oMEM_ReadAddress )
);
 
 
//Incomming requests are stored in the FIFO
sync_fifo # (`MCU_REQUEST_SIZE,`MCU_FIFO_DEPTH ) IN_FIFO
(
.clk( Clock ),
.reset( Reset ),
.din( iRequest ),
.wr_en( wRequestDetected ),
.rd_en( rPopFifo ),
.dout( wCurrentRequest ),
.empty( oFifoEmpty ),
.full( oFifoFull )
);
 
 
 
PULSE P1
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( 1'b1 ),
.D( iMEM_DataAvailable ),
.Q( wMEM_DataAvailable )
);
 
 
 
UPCOUNTER_POSEDGE # (11) UP_CYC
(
.Clock( Clock ),
.Reset( Reset | rResetCycCount ),
.Initial( 11'b1 ),
.Enable( rIncrementAddress ),
.Q( wCycCount )
);
 
wire wStbPulse;
PULSE P2
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( 1'b1 ),
.D( STB_O ),
.Q( wStbPulse )
);
 
 
UPCOUNTER_POSEDGE # (3) UP_STB
(
.Clock( Clock ),
.Reset( Reset | rResetStbCount ),
.Initial( 3'b0 ),
.Enable( wStbPulse ),
.Q( wStbCount )
);
 
UPCOUNTER_POSEDGE # (`WB_WIDTH) UP_VPADDR
(
.Clock( Clock ),
.Reset( Reset | wRequestDetected ),
.Initial( {12'b0,iRequest[`MCU_COPYMEMBLOCKCMD_DSTOFF_RNG]} ),
.Enable( rIncrementAddress ),
.Q( ADR_O )
);
 
 
 
SELECT_1_TO_N # ( $clog2(`MAX_CORES), `MAX_CORES ) WESEL
(
.Sel(wCurrentRequest[`MCU_COPYMEMBLOCKCMD_VPMASK_RNG]),
.En( ~oFifoEmpty),
.O( WE_O )
);
 
 
reg [4:0] rCurrentState, rNextState;
//Next states logic and Reset sequence
always @(posedge Clock )
begin
if (Reset )
rCurrentState <= `MCU_STATE_AFTER_RESET;
else
rCurrentState <= rNextState;
end
 
 
 
always @ ( * )
begin
case (rCurrentState)
//--------------------------------------
`MCU_STATE_AFTER_RESET:
begin
rPopFifo = 1'b0;
rIncrementAddress = 1'b0;
TAG_O = `TAG_NULL;
MST_O = 1'b0;
CYC_O = 1'b0;
rResetCycCount = 1'b1;
rMEM_ReadRequest = 1'b0;
rResetStbCount = 1'b0;
rNextState = `MCU_WAIT_FOR_REQUEST;
end
//--------------------------------------
/*
Wait until a request becomes available
*/
`MCU_WAIT_FOR_REQUEST:
begin
rPopFifo = 1'b0;
rIncrementAddress = 1'b0;
TAG_O = `TAG_NULL;
MST_O = 1'b0;
CYC_O = 1'b0;
rResetCycCount = 1'b1;
rMEM_ReadRequest = 1'b0;
rResetStbCount = 1'b1;
if (~oFifoEmpty && wRequestType == `MCU_COPYMEMBLOCKCMD_DSTTYPE_VPCODEMEM)
rNextState = `MCU_TRANSFER_BLOCK_TO_VPCODEMEM;
else if (~oFifoEmpty && wRequestType == `MCU_COPYMEMBLOCKCMD_DSTTYPE_VPDATAMEM)
rNextState = `MCU_TRANSFER_BLOCK_TO_VPDATAMEM;
else
rNextState = `MCU_WAIT_FOR_REQUEST;
end
//--------------------------------------
//Code MEM is 64 bits
`MCU_TRANSFER_BLOCK_TO_VPCODEMEM:
begin
rPopFifo = 1'b0;
rIncrementAddress = 1'b0;
TAG_O = `TAG_INSTRUCTION_ADDRESS_TYPE;
MST_O = 1'b1;
CYC_O = 1'b1;
rResetCycCount = 1'b0;
rMEM_ReadRequest = ~w64BisTransmitted;
rResetStbCount = 1'b0;
if (w64BisTransmitted)
rNextState = `MCU_INC_TRANSFER_BLOCK_ADDR;
else
rNextState = `MCU_TRANSFER_BLOCK_TO_VPCODEMEM;
end
//--------------------------------------
`MCU_TRANSFER_BLOCK_TO_VPDATAMEM:
begin
rPopFifo = 1'b0;
rIncrementAddress = 1'b0;
TAG_O = `TAG_INSTRUCTION_ADDRESS_TYPE;
MST_O = 1'b1;
CYC_O = 1'b1;
rResetCycCount = 1'b0;
rMEM_ReadRequest = ~w96BisTransmitted;
rResetStbCount = 1'b0;
if (w96BisTransmitted)
rNextState = `MCU_INC_TRANSFER_BLOCK_ADDR;
else
rNextState = `MCU_TRANSFER_BLOCK_TO_VPDATAMEM;
end
//--------------------------------------
`MCU_INC_TRANSFER_BLOCK_ADDR:
begin
rPopFifo = wLastBlock;
rIncrementAddress = ~wLastBlock;
TAG_O = `TAG_NULL;
MST_O = 1'b1;
CYC_O = 1'b0;
rResetCycCount = 1'b0;
rMEM_ReadRequest = 1'b0;
rResetStbCount = 1'b1;
if (wLastBlock)
rNextState = `MCU_WAIT_FOR_REQUEST;
else if (wRequestType == `MCU_COPYMEMBLOCKCMD_DSTTYPE_VPCODEMEM)
rNextState = `MCU_TRANSFER_BLOCK_TO_VPCODEMEM;
else if (wRequestType == `MCU_COPYMEMBLOCKCMD_DSTTYPE_VPDATAMEM)
rNextState = `MCU_TRANSFER_BLOCK_TO_VPDATAMEM;
else
rNextState = `MCU_WAIT_FOR_REQUEST; //Should never reach this!
end
//--------------------------------------
default:
begin
rPopFifo = 1'b0;
rIncrementAddress = 1'b0;
TAG_O = `TAG_NULL;
MST_O = 1'b0;
CYC_O = 1'b0;
rResetCycCount = 1'b1;
rMEM_ReadRequest = 1'b0;
rResetStbCount = 1'b0;
rNextState = `MCU_STATE_AFTER_RESET;
end
//--------------------------------------
endcase
end
 
 
endmodule
/theia_gpu/branches/beta_2.0/rtl/Module_OMemInterface.v
0,0 → 1,47
`timescale 1ns / 1ps
`include "aDefinitions.v"
 
module Module_OMemInterface
(
input wire Clock,
input wire Reset,
input wire iWriteEnable,
input wire [`DATA_ROW_WIDTH-1:0] iData,
input wire [`DATA_ROW_WIDTH-1:0] iAddress,
output wire [`WB_WIDTH-1:0] ADR_O,
output wire[`WB_WIDTH-1:0] DAT_O,
output wire WE_O
);
wire [2:0] wCurrentWord;
assign WE_O = iWriteEnable;
 
CIRCULAR_SHIFTLEFT_POSEDGE #(3) SHL
(
.Clock(Clock),
.Enable(iWriteEnable),
.Reset(Reset),
.Initial(3'b1),
.O(wCurrentWord)
);
 
MUXFULLPARALELL_3SEL_WALKINGONE # ( `WB_WIDTH ) MUX1
(
.Sel( wCurrentWord ),
.I3(iAddress[31:0]),
.I2(iAddress[63:32]),
.I1(iAddress[95:64]),
.O1( ADR_O )
);
MUXFULLPARALELL_3SEL_WALKINGONE # ( `WB_WIDTH ) MUX2
(
.Sel( wCurrentWord ),
.I3(iData[31:0]),
.I2(iData[63:32]),
.I1(iData[95:64]),
.O1( DAT_O )
);
endmodule
/theia_gpu/branches/beta_2.0/rtl/Module_FixedPointDivision.v
0,0 → 1,317
/*
Fixed point Multiplication Module Qm.n
C = (A << n) / B
*/
 
`timescale 1ns / 1ps
`include "aDefinitions.v"
//---------------------------------------------------------------------------
// serial_divide_uu.v -- Serial division module
//
//
// Description: See description below (which suffices for IP core
// specification document.)
//
// Copyright (C) 2002 John Clayton and OPENCORES.ORG (this Verilog version)
//
// This source file may be used and distributed without restriction provided
// that this copyright statement is not removed from the file and that any
// derivative work contains the original copyright notice and the associated
// disclaimer.
//
// This source file is free software; you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published
// by the Free Software Foundation; either version 2.1 of the License, or
// (at your option) any later version.
//
// This source is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
// License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this source.
// If not, download it from http://www.opencores.org/lgpl.shtml
//
//-----------------------------------------------------------------------------
//
// Author: John Clayton
// Date : Jan. 30, 2003
// Update: Jan. 30, 2003 Copied this file from "vga_crosshair.v"
// Stripped out extraneous stuff.
// Update: Mar. 14, 2003 Added S_PP parameter, made some simple changes to
// implement quotient leading zero "skip" feature.
// Update: Mar. 24, 2003 Updated comments to improve readability.
//
//-----------------------------------------------------------------------------
// Description:
//
// This module performs a division operation serially, producing one bit of the
// answer per clock cycle. The dividend and the divisor are both taken to be
// unsigned quantities. The divider is conceived as an integer divider (as
// opposed to a divider for fractional quantities) but the user can configure
// the divider to divide fractional quantities as long as the position of the
// binary point is carefully monitored.
//
// The widths of the signals are configurable by parameters, as follows:
//
// M_PP = Bit width of the dividend
// N_PP = Bit width of the divisor
// R_PP = Remainder bits desired
// S_PP = Skipped quotient bits
//
// The skipped quotient bits parameter provides a way to prevent the divider
// from calculating the full M_PP+R_PP output bits, in case some of the leading
// bits are already known to be zero. This is the case, for example, when
// dividing two quantities to obtain a result that is a fraction between 0 and 1
// (as when measuring PWM signals). In that case the integer portion of the
// quotient is always zero, and therefore it need not be calculated.
//
// The divide operation is begun by providing a pulse on the divide_i input.
// The quotient is provided (M_PP+R_PP-S_PP) clock cycles later.
// The divide_i pulse stores the input parameters in registers, so they do
// not need to be maintained at the inputs throughout the operation of the module.
// If a divide_i pulse is given to the serial_divide_uu module during the time
// when it is already working on a previous divide operation, it will abort the
// operation it was doing, and begin working on the new one.
//
// The user is responsible for treating the results correctly. The position
// of the binary point is not given, but it is understood that the integer part
// of the result is the M_PP most significant bits of the quotient output.
// The remaining R_PP least significant bits are the fractional part.
//
// This is illustrated graphically:
//
// [ M_PP bits ][ R_PP bits]
// [ S_PP bits ][quotient_o]
//
// The quotient will consist of whatever bits are left after removing the S_PP
// most significant bits from the (M_PP+R_PP) result bits.
//
// Attempting to divide by zero will simply produce a result of all ones.
// This core is so simple, that no checking for this condition is provided.
// If the user is concerned about a possible divide by zero condition, he should
// compare the divisor to zero and flag that condition himself!
//
// The COUNT_WIDTH_PP parameter must be sized so that 2^COUNT_WIDTH_PP-1 is >=
// M_PP+R_PP-S_PP-1. The unit terminates the divide operation when the count
// is equal to M_PP+R_PP-S_PP-1.
//
// The HELD_OUTPUT_PP parameter causes the unit to keep its output result in
// a register other than the one which it uses to compute the quotient. This
// is useful for applications where the divider is used repeatedly and the
// previous divide result (quotient) must be stable during the computation of the
// next divide result. Using the additional output register does incur some
// additional utilization of resources.
//
//-----------------------------------------------------------------------------
module serial_divide_uu (
clk_i,
clk_en_i,
rst_i,
divide_i,
dividend_i,
divisor_i,
quotient_o,
done_o
);
/*
M_PP => 21,
N_PP => 21,
R_PP => 0,
S_PP => 0,
HELD_OUTPUT_PP => 1
*/
parameter M_PP = 21; // Size of dividend
parameter N_PP = 21; // Size of divisor
parameter R_PP = 0; // Size of remainder
parameter S_PP = 0; // Skip this many bits (known leading zeros)
parameter COUNT_WIDTH_PP = 5; // 2^COUNT_WIDTH_PP-1 >= (M_PP+R_PP-S_PP-1)
parameter HELD_OUTPUT_PP = 1; // Set to 1 if stable output should be held
// from previous operation, during current
// operation. Using this option will increase
// the resource utilization (costs extra
// d-flip-flops.)
// I/O declarations
input clk_i; //
input clk_en_i;
input rst_i; // synchronous reset
input divide_i; // starts division operation
input [M_PP-1:0] dividend_i; //
input [N_PP-1:0] divisor_i; //
output [M_PP+R_PP-S_PP-1:0] quotient_o; //
output done_o; // indicates completion of operation
//reg [M_PP+R_PP-1:0] quotient_o;
reg done_o;
// Internal signal declarations
reg [M_PP+R_PP-1:0] grand_dividend;
reg [M_PP+N_PP+R_PP-2:0] grand_divisor;
reg [M_PP+R_PP-S_PP-1:0] quotient;
reg [M_PP+R_PP-1:0] quotient_reg; // Used exclusively for the held output
reg [COUNT_WIDTH_PP-1:0] divide_count;
wire [M_PP+N_PP+R_PP-1:0] subtract_node; // Subtract node has extra "sign" bit
wire [M_PP+R_PP-1:0] quotient_node; // Shifted version of quotient
wire [M_PP+N_PP+R_PP-2:0] divisor_node; // Shifted version of grand divisor
//--------------------------------------------------------------------------
// Module code
// Serial dividing module
always @(posedge clk_i)
begin
if (rst_i)
begin
grand_dividend <= 0;
grand_divisor <= 0;
divide_count <= 0;
quotient <= 0;
done_o <= 0;
end
else if (clk_en_i)
begin
done_o <= 0;
if (divide_i) // Start a new division
begin
quotient <= 0;
divide_count <= 0;
// dividend placed initially so that remainder bits are zero...
grand_dividend <= dividend_i << R_PP;
// divisor placed initially for a 1 bit overlap with dividend...
// But adjust it back by S_PP, to account for bits that are known
// to be leading zeros in the quotient.
/* verilator lint_off WIDTH */
grand_divisor <= divisor_i << (N_PP+R_PP-S_PP-1);
/* verilator lint_on WIDTH */
end
/* verilator lint_off WIDTH */
else if (divide_count == M_PP+R_PP-S_PP-1)
/* verilator lint_on WIDTH */
begin
if (~done_o) quotient <= quotient_node; // final shift...
if (~done_o) quotient_reg <= quotient_node; // final shift (held output)
done_o <= 1; // Indicate done, just sit
end
else // Division in progress
begin
// If the subtraction yields a positive result, then store that result
/* verilator lint_off WIDTH */
if (~subtract_node[M_PP+N_PP+R_PP-1]) grand_dividend <= subtract_node;
/* verilator lint_on WIDTH */
// If the subtraction yields a positive result, then a 1 bit goes into
// the quotient, via a shift register
quotient <= quotient_node;
// shift the grand divisor to the right, to cut it in half next clock cycle
grand_divisor <= divisor_node;
// Advance the counter
divide_count <= divide_count + 1;
end
end // End of else if clk_en_i
end // End of always block
/* verilator lint_off WIDTH */
assign subtract_node = {1'b0,grand_dividend} - {1'b0,grand_divisor};
/* verilator lint_on WIDTH */
assign quotient_node =
{quotient[M_PP+R_PP-S_PP-2:0],~subtract_node[M_PP+N_PP+R_PP-1]};
assign divisor_node = {1'b0,grand_divisor[M_PP+N_PP+R_PP-2:1]};
assign quotient_o = (HELD_OUTPUT_PP == 0)?quotient:quotient_reg;
endmodule
 
module SignedIntegerDivision
(
input wire Clock,
input wire Reset,
output wire [`WIDTH-1:0] oQuotient,
input wire [`LONG_WIDTH-1:0] iDividend,
input wire [`LONG_WIDTH-1:0] iDivisor,
input wire iInputReady,
output wire OutputReady
);
 
 
wire wInputReadyDelay1,wInputReadyPulse;
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FF_DELAY1
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( 1'b1 ),
.D( iInputReady ),
.Q(wInputReadyDelay1)
);
 
assign wInputReadyPulse = iInputReady & ~wInputReadyDelay1;
wire [`LONG_WIDTH-1:0] wDividend,wDivisor,wScaledDividend;
wire [`LONG_WIDTH-1:0] wNegDividend,wNegDivisor;
assign wNegDividend = ~iDividend+1'b1;
assign wNegDivisor = ~iDivisor + 1'b1;
wire [`LONG_WIDTH-1:0] wQuotient;
//Assign the sign extended signed value
assign wDividend = (iDividend[`LONG_WIDTH-1] == 1'b1) ? wNegDividend : iDividend;
assign wDivisor = (iDivisor[`LONG_WIDTH-1] == 1'b1) ? wNegDivisor : iDivisor ;
wire wNegativeOutput;
assign wNegativeOutput = iDividend[`LONG_WIDTH-1] ^ iDivisor[`LONG_WIDTH-1];
wire wNegativeOutput_Latched;
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FF_NEG
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( iInputReady ),
.D( wNegativeOutput ),
.Q(wNegativeOutput_Latched)
);
wire wDividerEnable;
UPCOUNTER_POSEDGE # (1) UP1
(
.Clock(Clock),
.Reset(Reset),
.Initial(1'b0),
.Enable(OutputReady | iInputReady ),
.Q(wDividerEnable)
);
assign oQuotient = (wNegativeOutput_Latched) ? ~wQuotient[`WIDTH-1:0]+1'b1 : wQuotient[`WIDTH-1:0];
wire wOutputReady,wOutputReadyDelay1;
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FF_DELAY2
(
.Clock( Clock ),
.Reset( Reset | iInputReady),
.Enable( 1'b1 ),
.D( wOutputReady ),
.Q(wOutputReadyDelay1)
);
assign OutputReady = (wOutputReady ^ wOutputReadyDelay1) & wDividerEnable;
assign wScaledDividend = (wDividend); //<< `SCALE);
serial_divide_uu # ( 64,64,0,0,6,1 ) uu_div(
.clk_i(Clock),
.clk_en_i(
//wDividerEnable | Reset),
1'b1),
.rst_i(Reset),
.divide_i(wInputReadyPulse),//iInputReady),
.dividend_i(wScaledDividend),
.divisor_i(wDivisor),
.quotient_o(wQuotient),
.done_o(wOutputReady)
);
endmodule
/theia_gpu/branches/beta_2.0/rtl/FlowDumper.v
0,0 → 1,480
`include "aDefinitions.v"
 
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2012 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
//`define VP2_TOP( core ) `THEIA_TOP.\VPX[ core ].VP
`define MAX_OMEM_DUMP_SIZE 32
`define MAX_RF_MEM_DUMP_SIZE 128
module VectorProcessor_Dumper # (parameter CVPID = 2);
 
 
integer RESULT_FILE,VP_LOG,OMEM_LOG,VP_REG_LOG,i;
reg [255:1] VPLogFileName,OMEMLogFileName,RegLogFileName;
 
 
 
 
initial
begin
//Open output file
$swrite(VPLogFileName,"vp.%01d.log",CVPID);
$swrite(OMEMLogFileName,"OMEM.vp.%01d.log",CVPID);
$swrite(RegLogFileName,"rf.vp.%01d.log",CVPID);
RESULT_FILE = $fopen("test_result.log");
VP_LOG = $fopen(VPLogFileName);
end
 
 
 
//always @ (posedge `THEIA_TOP.VPX[ CVPID ].VP.Clock )
always @ (posedge `VP_TOP.Clock)
begin
//-----------------------------------------------------------------
 
if (`VP_TOP.EXE.II0.iInstruction0[`INST_EOF_RNG])
begin
$display(VP_LOG,"End of flow instruction detected");
$fwrite(RESULT_FILE,"Simulation ended at time %dns\n",$time);
$fwrite(RESULT_FILE,"multithread = %d\n",`VP_TOP.EXE.wThreadControl[`SPR_TCONTROL0_MT_ENABLED]);
$fwrite(RESULT_FILE,"Simulation RESULT %h\n",`VP_TOP.EXE.RF.RF_X.Ram[66]);
$fclose(RESULT_FILE);
$fclose( VP_LOG );
//Now write the output log
OMEM_LOG = $fopen(OMEMLogFileName);
for (i = 0; i < `MAX_OMEM_DUMP_SIZE; i = i +1)
begin
$fwrite(OMEM_LOG,"@%d\t%h\n",i,`THEIA_TOP.VPX[ CVPID ].OMEM.Ram[i]);
end
$fclose(OMEM_LOG);
VP_REG_LOG = $fopen(RegLogFileName);
for (i = 0; i < `MAX_RF_MEM_DUMP_SIZE; i = i +1)
begin
$fwrite(VP_REG_LOG,"r%01d\t%h %h %h\n",i,
`THEIA_TOP.VPX[ CVPID ].VP.EXE.RF.RF_X.Ram[i],
`THEIA_TOP.VPX[ CVPID ].VP.EXE.RF.RF_Y.Ram[i],
`THEIA_TOP.VPX[ CVPID ].VP.EXE.RF.RF_Z.Ram[i]);
end
$fclose(VP_REG_LOG);
$stop;
$finish;
end
//`ifdef 0
if (`VP_TOP.EXE.II0.rIssueNow && `VP_TOP.EXE.II0.oIssueBcast[`ISSUE_RSID_RNG] != 0)
begin
//Issue state dump
$fwrite(VP_LOG,"\n%dns VP[%d] IP %d ISSUE ",$time,`VP_TOP.iVPID-1,`VP_TOP.EXE.II0.oIP0-1);
//Issue instruction undecoded
$fwrite(VP_LOG," (%h) \t",`VP_TOP.EXE.II0.iInstruction0);
if (`VP_TOP.EXE.II0.iInstruction0[`INST_BRANCH_BIT])
$fwrite(VP_LOG," BRANCH ");
case ( `VP_TOP.EXE.II0.oIssueBcast[`ISSUE_RSID_RNG] )
`RS_ADD0: $fwrite(VP_LOG," ADD_0 ");
`RS_ADD1: $fwrite(VP_LOG," ADD_1 ");
`RS_DIV: $fwrite(VP_LOG," DIV ");
`RS_MUL: $fwrite(VP_LOG," MUL ");
`RS_SQRT: $fwrite(VP_LOG," SQRT ");
`RS_LOGIC:
begin
$fwrite(VP_LOG," LOGIC( ");
case (`VP_TOP.EXE.II0.oIssueBcast[`ISSUE_SCALE_RNG])
0: $fwrite(VP_LOG,"AND");
1: $fwrite(VP_LOG,"OR");
2: $fwrite(VP_LOG,"NOT");
3: $fwrite(VP_LOG,"SHL");
4: $fwrite(VP_LOG,"SHR");
default:
$fwrite(VP_LOG,"UNKNOWN");
endcase
$fwrite(VP_LOG,") ");
end
`RS_IO:$fwrite(VP_LOG," IO ");
default:
$fwrite(VP_LOG," %b ",`VP_TOP.EXE.II0.oIssueBcast[`ISSUE_RSID_RNG]);
endcase
if ( `VP_TOP.EXE.II0.iInstruction0[`INST_IMM] == 0)
begin
if (`VP_TOP.EXE.II0.iInstruction0[`INST_DEST_ZERO])
$fwrite(VP_LOG, "R[%d + %d]", `VP_TOP.EXE.II0.iInstruction0[`INST_DST_RNG],`VP_TOP.EXE.II0.iFrameOffset);
else
$fwrite(VP_LOG, "R[%d]", `VP_TOP.EXE.II0.iInstruction0[`INST_DST_RNG]);
end
else
begin
case (`VP_TOP.EXE.II0.iInstruction0[`INST_ADDRMODE_RNG])
3'b000: $fwrite(VP_LOG,"R[%d]",`VP_TOP.EXE.II0.iInstruction0[`INST_DST_RNG]);
3'b001: $fwrite(VP_LOG,"R[%d + %d]",`VP_TOP.EXE.II0.iInstruction0[`INST_DST_RNG],`VP_TOP.EXE.II0.iFrameOffset);
3'b010: $fwrite(VP_LOG,"R[%d + %d]",`VP_TOP.EXE.II0.iInstruction0[`INST_DST_RNG],`VP_TOP.EXE.II0.iFrameOffset);
3'b011: $fwrite(VP_LOG,"R[%d + %d + %d]",`VP_TOP.EXE.II0.iInstruction0[`INST_DST_RNG],`VP_TOP.EXE.II0.iFrameOffset,`VP_TOP.EXE.II0.wSource1_Temp[`X_RNG]);
3'b100: $fwrite(VP_LOG,"R[%d]",`VP_TOP.EXE.II0.iInstruction0[`INST_DST_RNG]);
3'b101: $fwrite(VP_LOG,"R[%d + %d]",`VP_TOP.EXE.II0.iInstruction0[`INST_DST_RNG],`VP_TOP.EXE.II0.iFrameOffset);
3'b110: $fwrite(VP_LOG,"R[%d + %d]",`VP_TOP.EXE.II0.iInstruction0[`INST_DST_RNG],`VP_TOP.EXE.II0.iFrameOffset);
3'b111: $fwrite(VP_LOG,"R[%d + %d]",`VP_TOP.EXE.II0.iInstruction0[`INST_DST_RNG],`VP_TOP.EXE.II0.iFrameOffset);
endcase
end
case ( `VP_TOP.EXE.II0.oIssueBcast[`ISSUE_WE_RNG] )
3'b000: $fwrite(VP_LOG,".nowrite ");
3'b001: $fwrite(VP_LOG,".z ");
3'b010: $fwrite(VP_LOG,".y ");
3'b100: $fwrite(VP_LOG,".x ");
3'b111: $fwrite(VP_LOG,".xyz ");
default:
$fwrite(VP_LOG," %b ",`VP_TOP.EXE.II0.oIssueBcast[`ISSUE_WE_RNG]);
endcase
if (`VP_TOP.EXE.II0.iInstruction0[`INST_IMM])
/*if (`VP_TOP.EXE.II0.iInstruction0[`INST_SRC0_DISPLACED] && `VP_TOP.EXE.II0.iInstruction0[`INST_SRC1_DISPLACED])
$fwrite(VP_LOG, "R[%d] 0 ",`VP_TOP.EXE.II0.oSourceAddress0);
else
$fwrite(VP_LOG, "I(%h)",`VP_TOP.EXE.II0.iInstruction0[`INST_IMM_RNG]);*/
case (`VP_TOP.EXE.II0.iInstruction0[`INST_ADDRMODE_RNG])
3'b000: $fwrite(VP_LOG,"I(%h) R[%d]",`VP_TOP.EXE.II0.iInstruction0[`INST_IMM_RNG], `VP_TOP.EXE.II0.iInstruction0[`INST_DST_RNG]);
3'b001: $fwrite(VP_LOG,"**!!I(%h) R[%d + %d] ",`VP_TOP.EXE.II0.iInstruction0[`INST_IMM_RNG],`VP_TOP.EXE.II0.iInstruction0[`INST_DST_RNG],`VP_TOP.EXE.II0.iFrameOffset);
3'b010: $fwrite(VP_LOG,"R[%d+%d] R[%d+%d+%d]",`VP_TOP.EXE.II0.oSourceAddress1,`VP_TOP.EXE.II0.iFrameOffset,`VP_TOP.EXE.II0.oSourceAddress0,`VP_TOP.EXE.II0.iFrameOffset,`VP_TOP.EXE.II0.iIndexRegister);
3'b011: $fwrite(VP_LOG,"0 R[%d + %d]",`VP_TOP.EXE.II0.oSourceAddress0,`VP_TOP.EXE.II0.iFrameOffset);
3'b100: $fwrite(VP_LOG,"I(%h) 0",`VP_TOP.EXE.II0.iInstruction0[`INST_IMM_RNG]);
3'b101: $fwrite(VP_LOG,"I(%h) 0",`VP_TOP.EXE.II0.iInstruction0[`INST_IMM_RNG]);
3'b110: $fwrite(VP_LOG,"R[%d + %d + %d] 0", `VP_TOP.EXE.II0.oSourceAddress1,`VP_TOP.EXE.II0.iFrameOffset,`VP_TOP.EXE.II0.iIndexRegister);
3'b111: $fwrite(VP_LOG,"R[%d + %d + %d] R[%d + %d]",`VP_TOP.EXE.II0.iInstruction0[`INST_SCR1_ADDR_RNG],`VP_TOP.EXE.II0.iFrameOffset,`VP_TOP.EXE.II0.iIndexRegister,`VP_TOP.EXE.II0.oSourceAddress0,`VP_TOP.EXE.II0.iFrameOffset);
endcase
else
begin
if (`VP_TOP.EXE.II0.iInstruction0[`INST_SRC1_DISPLACED] == 0)
$fwrite(VP_LOG, "R[%d] ",`VP_TOP.EXE.II0.oSourceAddress1);
else
$fwrite(VP_LOG, "R[%d + %d] ", `VP_TOP.EXE.II0.iInstruction0[`INST_SCR1_ADDR_RNG],`VP_TOP.EXE.II0.iFrameOffset);
if (`VP_TOP.EXE.II0.iInstruction0[`INST_SRC0_DISPLACED] == 0)
$fwrite(VP_LOG, "R[%d] ",`VP_TOP.EXE.II0.oSourceAddress0);
else
$fwrite(VP_LOG, "R[%d + %d] ", `VP_TOP.EXE.II0.iInstruction0[`INST_SRC0_ADDR_RNG],`VP_TOP.EXE.II0.iFrameOffset);
end
$fwrite(VP_LOG,"\t\t\t\t");
case ( `VP_TOP.EXE.II0.oIssueBcast[`ISSUE_SRC1RS_RNG] )
`RS_ADD0: $fwrite(VP_LOG," ADD_0 ");
`RS_ADD1: $fwrite(VP_LOG," ADD_1 ");
`RS_DIV: $fwrite(VP_LOG," DIV ");
`RS_MUL: $fwrite(VP_LOG," MUL ");
`RS_SQRT: $fwrite(VP_LOG," SQRT ");
`RS_IO: $fwrite(VP_LOG," IO ");
default:
$fwrite(VP_LOG," %b ",`VP_TOP.EXE.II0.oIssueBcast[`ISSUE_SRC1RS_RNG]);
endcase
$fwrite(VP_LOG," | ");
case ( `VP_TOP.EXE.II0.oIssueBcast[`ISSUE_SRC0RS_RNG] )
`RS_ADD0: $fwrite(VP_LOG," ADD_0 ");
`RS_ADD1: $fwrite(VP_LOG," ADD_1 ");
`RS_DIV: $fwrite(VP_LOG," DIV ");
`RS_MUL: $fwrite(VP_LOG," MUL ");
`RS_SQRT: $fwrite(VP_LOG," SQRT ");
`RS_IO: $fwrite(VP_LOG," IO ");
default:
$fwrite(VP_LOG," %b ",`VP_TOP.EXE.II0.oIssueBcast[`ISSUE_SRC0RS_RNG]);
endcase
$fwrite(VP_LOG," | ");
$fwrite(VP_LOG," %h.%b | %h.%b s(%b)| -> ",
`VP_TOP.EXE.II0.oIssueBcast[`ISSUE_SRC1_DATA_RNG],
`VP_TOP.EXE.II0.oIssueBcast[`ISSUE_SWZZ1_RNG],
`VP_TOP.EXE.II0.oIssueBcast[`ISSUE_SRC0_DATA_RNG],
`VP_TOP.EXE.II0.oIssueBcast[`ISSUE_SWZZ0_RNG],
`VP_TOP.EXE.II0.oIssueBcast[`ISSUE_SCALE_RNG]);
$fwrite(VP_LOG," %h | %h",
`VP_TOP.EXE.wModIssue[`MOD_ISSUE_SRC1_DATA_RNG],
`VP_TOP.EXE.wModIssue[`MOD_ISSUE_SRC0_DATA_RNG]
);
end
 
 
////////////// Same for thread 1...
 
 
 
 
 
 
 
 
//-----------------------------------------------------------------
if (`VP_TOP.EXE.II1.rIssueNow && `VP_TOP.EXE.II1.oIssueBcast[`ISSUE_RSID_RNG] != 0)
begin
//Issue state dump
$fwrite(VP_LOG,"\n THREAD 1 %dns IP %d ISSUE ",$time,`VP_TOP.EXE.II1.oIP0-1);
//Issue instruction undecoded
$fwrite(VP_LOG," (%h) \t",`VP_TOP.EXE.II1.iInstruction0);
if (`VP_TOP.EXE.II1.iInstruction0[`INST_BRANCH_BIT])
$fwrite(VP_LOG," BRANCH ");
case ( `VP_TOP.EXE.II1.oIssueBcast[`ISSUE_RSID_RNG] )
`RS_ADD0: $fwrite(VP_LOG," ADD_0 ");
`RS_ADD1: $fwrite(VP_LOG," ADD_1 ");
`RS_DIV: $fwrite(VP_LOG," DIV ");
`RS_MUL: $fwrite(VP_LOG," MUL ");
`RS_SQRT: $fwrite(VP_LOG," SQRT ");
default:
$fwrite(VP_LOG," %b ",`VP_TOP.EXE.II1.oIssueBcast[`ISSUE_RSID_RNG]);
endcase
if ( `VP_TOP.EXE.II1.iInstruction0[`INST_IMM] == 0)
begin
if (`VP_TOP.EXE.II1.iInstruction0[`INST_DEST_ZERO])
$fwrite(VP_LOG, "R[%d + %d]", `VP_TOP.EXE.II1.iInstruction0[`INST_DST_RNG],`VP_TOP.EXE.II1.iFrameOffset);
else
$fwrite(VP_LOG, "R[%d]", `VP_TOP.EXE.II1.iInstruction0[`INST_DST_RNG]);
end
else
begin
case (`VP_TOP.EXE.II1.iInstruction0[`INST_ADDRMODE_RNG])
3'b000: $fwrite(VP_LOG,"R[%d]",`VP_TOP.EXE.II1.iInstruction0[`INST_DST_RNG]);
3'b001: $fwrite(VP_LOG,"R[%d + %d]",`VP_TOP.EXE.II1.iInstruction0[`INST_DST_RNG],`VP_TOP.EXE.II1.iFrameOffset);
3'b010: $fwrite(VP_LOG,"R[%d + %d]",`VP_TOP.EXE.II1.iInstruction0[`INST_DST_RNG],`VP_TOP.EXE.II1.iFrameOffset);
3'b011: $fwrite(VP_LOG,"R[%d + %d + %d]",`VP_TOP.EXE.II1.iInstruction0[`INST_DST_RNG],`VP_TOP.EXE.II1.iFrameOffset,`VP_TOP.EXE.II1.wSource1_Temp[`X_RNG]);
3'b100: $fwrite(VP_LOG,"R[%d]",`VP_TOP.EXE.II1.iInstruction0[`INST_DST_RNG]);
3'b101: $fwrite(VP_LOG,"R[%d + %d]",`VP_TOP.EXE.II1.iInstruction0[`INST_DST_RNG],`VP_TOP.EXE.II1.iFrameOffset);
3'b110: $fwrite(VP_LOG,"R[%d + %d]",`VP_TOP.EXE.II1.iInstruction0[`INST_DST_RNG],`VP_TOP.EXE.II1.iFrameOffset);
3'b111: $fwrite(VP_LOG,"R[%d + %d]",`VP_TOP.EXE.II1.iInstruction0[`INST_DST_RNG],`VP_TOP.EXE.II1.iFrameOffset);
endcase
end
case ( `VP_TOP.EXE.II1.oIssueBcast[`ISSUE_WE_RNG] )
3'b000: $fwrite(VP_LOG,".nowrite ");
3'b001: $fwrite(VP_LOG,".z ");
3'b010: $fwrite(VP_LOG,".y ");
3'b100: $fwrite(VP_LOG,".x ");
3'b111: $fwrite(VP_LOG,".xyz ");
default:
$fwrite(VP_LOG," %b ",`VP_TOP.EXE.II1.oIssueBcast[`ISSUE_WE_RNG]);
endcase
if (`VP_TOP.EXE.II1.iInstruction0[`INST_IMM])
/*if (`VP_TOP.EXE.II1.iInstruction0[`INST_SRC0_DISPLACED] && `VP_TOP.EXE.II1.iInstruction0[`INST_SRC1_DISPLACED])
$fwrite(VP_LOG, "R[%d] 0 ",`VP_TOP.EXE.II1.oSourceAddress0);
else
$fwrite(VP_LOG, "I(%h)",`VP_TOP.EXE.II1.iInstruction0[`INST_IMM_RNG]);*/
case (`VP_TOP.EXE.II1.iInstruction0[`INST_ADDRMODE_RNG])
3'b000: $fwrite(VP_LOG,"I(%h) R[%d]",`VP_TOP.EXE.II1.iInstruction0[`INST_IMM_RNG], `VP_TOP.EXE.II1.iInstruction0[`INST_DST_RNG]);
3'b001: $fwrite(VP_LOG,"I(%h) R[%d + %d]",`VP_TOP.EXE.II1.iInstruction0[`INST_DST_RNG],`VP_TOP.EXE.II1.oSourceAddress0,`VP_TOP.EXE.II1.iFrameOffset);
3'b010: $fwrite(VP_LOG,"R[%d+%d] R[%d+%d+%d]",`VP_TOP.EXE.II1.oSourceAddress1,`VP_TOP.EXE.II1.iFrameOffset,`VP_TOP.EXE.II1.oSourceAddress0,`VP_TOP.EXE.II1.iFrameOffset,`VP_TOP.EXE.II1.iIndexRegister);
3'b011: $fwrite(VP_LOG,"0 R[%d + %d]",`VP_TOP.EXE.II1.oSourceAddress0,`VP_TOP.EXE.II1.iFrameOffset);
3'b100: $fwrite(VP_LOG,"I(%h) 0",`VP_TOP.EXE.II1.iInstruction0[`INST_IMM_RNG]);
3'b101: $fwrite(VP_LOG,"I(%h) 0",`VP_TOP.EXE.II1.iInstruction0[`INST_IMM_RNG]);
3'b110: $fwrite(VP_LOG,"R[%d + %d + %d] 0", `VP_TOP.EXE.II1.oSourceAddress1,`VP_TOP.EXE.II1.iFrameOffset,`VP_TOP.EXE.II1.iIndexRegister);
3'b111: $fwrite(VP_LOG,"R[%d + %d + %d] R[%d + %d]",`VP_TOP.EXE.II1.iInstruction0[`INST_SCR1_ADDR_RNG],`VP_TOP.EXE.II1.iFrameOffset,`VP_TOP.EXE.II1.iIndexRegister,`VP_TOP.EXE.II1.oSourceAddress0,`VP_TOP.EXE.II1.iFrameOffset);
endcase
else
begin
if (`VP_TOP.EXE.II1.iInstruction0[`INST_SRC1_DISPLACED] == 0)
$fwrite(VP_LOG, "R[%d] ",`VP_TOP.EXE.II1.oSourceAddress1);
else
$fwrite(VP_LOG, "R[%d + %d] ", `VP_TOP.EXE.II1.iInstruction0[`INST_SCR1_ADDR_RNG],`VP_TOP.EXE.II1.iFrameOffset);
if (`VP_TOP.EXE.II1.iInstruction0[`INST_SRC0_DISPLACED] == 0)
$fwrite(VP_LOG, "R[%d] ",`VP_TOP.EXE.II1.oSourceAddress0);
else
$fwrite(VP_LOG, "R[%d + %d] ", `VP_TOP.EXE.II1.iInstruction0[`INST_SRC0_ADDR_RNG],`VP_TOP.EXE.II1.iFrameOffset);
end
$fwrite(VP_LOG,"\t\t\t\t");
case ( `VP_TOP.EXE.II1.oIssueBcast[`ISSUE_SRC1RS_RNG] )
`RS_ADD0: $fwrite(VP_LOG," ADD_0 ");
`RS_ADD1: $fwrite(VP_LOG," ADD_1 ");
`RS_DIV: $fwrite(VP_LOG," DIV ");
`RS_MUL: $fwrite(VP_LOG," MUL ");
`RS_SQRT: $fwrite(VP_LOG," SQRT ");
`RS_IO: $fwrite(VP_LOG," IO ");
default:
$fwrite(VP_LOG," %b ",`VP_TOP.EXE.II1.oIssueBcast[`ISSUE_SRC1RS_RNG]);
endcase
$fwrite(VP_LOG," | ");
case ( `VP_TOP.EXE.II1.oIssueBcast[`ISSUE_SRC0RS_RNG] )
`RS_ADD0: $fwrite(VP_LOG," ADD_0 ");
`RS_ADD1: $fwrite(VP_LOG," ADD_1 ");
`RS_DIV: $fwrite(VP_LOG," DIV ");
`RS_MUL: $fwrite(VP_LOG," MUL ");
`RS_SQRT: $fwrite(VP_LOG," SQRT ");
`RS_IO: $fwrite(VP_LOG," IO ");
default:
$fwrite(VP_LOG," %b ",`VP_TOP.EXE.II1.oIssueBcast[`ISSUE_SRC0RS_RNG]);
endcase
$fwrite(VP_LOG," | ");
$fwrite(VP_LOG," %h | %h",
`VP_TOP.EXE.wModIssue[`MOD_ISSUE_SRC1_DATA_RNG],
`VP_TOP.EXE.wModIssue[`MOD_ISSUE_SRC0_DATA_RNG]
);
end
 
 
 
 
 
 
////////////////////////// DUMP EXE UNITS!
 
if (`VP_TOP.EXE.II0.wCommitFromPendingStation)
begin
if ( `VP_TOP.EXE.II0.wBranchTaken)
$fwrite(VP_LOG,"\nTHREAD 0: BRANCH TAKEN ");
end
if (`VP_TOP.EXE.II1.wCommitFromPendingStation)
begin
if ( `VP_TOP.EXE.II1.wBranchTaken)
$fwrite(VP_LOG,"\nTHREAD 1: BRANCH TAKEN ");
end
//-----------------------------------------------------------------
if (`VP_TOP.EXE.LOGIC_STA.RS.iCommitGranted)
begin
$fwrite(VP_LOG,"\n%dns VP[%d]\t COMMIT LOGIC( ",$time,`VP_TOP.iVPID-1);
case (`VP_TOP.EXE.LOGIC_STA.wResultSelector)
0: $fwrite(VP_LOG,"AND");
1: $fwrite(VP_LOG,"OR");
2: $fwrite(VP_LOG,"NOT");
3: $fwrite(VP_LOG,"SHL");
4: $fwrite(VP_LOG,"SHR");
default:
$fwrite(VP_LOG,"UNKNOWN");
endcase
$fwrite(VP_LOG,") ");
$fwrite(VP_LOG," R[%d]",`VP_TOP.EXE.LOGIC_STA.oCommitData[`COMMIT_DST_RNG]);
case ( `VP_TOP.EXE.LOGIC_STA.oCommitData[`COMMIT_WE_RNG] )
3'b000: $fwrite(VP_LOG,".nowrite ");
3'b001: $fwrite(VP_LOG,".z ");
3'b010: $fwrite(VP_LOG,".y ");
3'b100: $fwrite(VP_LOG,".x ");
3'b111: $fwrite(VP_LOG,".xyz ");
default:
$fwrite(VP_LOG," %b ",`VP_TOP.EXE.LOGIC_STA.oCommitData[`COMMIT_WE_RNG]);
endcase
$fwrite(VP_LOG," %h %h %h\n",`VP_TOP.EXE.LOGIC_STA.oCommitData[`COMMIT_X_RNG],`VP_TOP.EXE.LOGIC_STA.oCommitData[`COMMIT_Y_RNG],`VP_TOP.EXE.LOGIC_STA.oCommitData[`COMMIT_Z_RNG]);
end
//-----------------------------------------------------------------
if (`VP_TOP.EXE.ADD_STA0.RS.iCommitGranted)
begin
$fwrite(VP_LOG,"\n%dns\t VP[%d] COMMIT ADD_0 R[%d]",$time,`VP_TOP.iVPID-1,`VP_TOP.EXE.ADD_STA0.oCommitData[`COMMIT_DST_RNG]);
case ( `VP_TOP.EXE.ADD_STA0.oCommitData[`COMMIT_WE_RNG] )
3'b000: $fwrite(VP_LOG,".nowrite ");
3'b001: $fwrite(VP_LOG,".z ");
3'b010: $fwrite(VP_LOG,".y ");
3'b100: $fwrite(VP_LOG,".x ");
3'b111: $fwrite(VP_LOG,".xyz ");
default:
$fwrite(VP_LOG," %b ",`VP_TOP.EXE.ADD_STA0.oCommitData[`COMMIT_WE_RNG]);
endcase
$fwrite(VP_LOG," %h %h %h\n",`VP_TOP.EXE.ADD_STA0.oCommitData[`COMMIT_X_RNG],`VP_TOP.EXE.ADD_STA0.oCommitData[`COMMIT_Y_RNG],`VP_TOP.EXE.ADD_STA0.oCommitData[`COMMIT_Z_RNG]);
end
//-----------------------------------------------------------------
if (`VP_TOP.EXE.ADD_STA1.RS.iCommitGranted)
begin
$fwrite(VP_LOG,"\n%dns\t VP[%d] COMMIT ADD_1 R[%d]",$time,`VP_TOP.iVPID-1,`VP_TOP.EXE.ADD_STA1.oCommitData[`COMMIT_DST_RNG]);
case ( `VP_TOP.EXE.ADD_STA1.oCommitData[`COMMIT_WE_RNG] )
3'b000: $fwrite(VP_LOG,".nowrite ");
3'b001: $fwrite(VP_LOG,".z ");
3'b010: $fwrite(VP_LOG,".y ");
3'b100: $fwrite(VP_LOG,".x ");
3'b111: $fwrite(VP_LOG,".xyz ");
default:
$fwrite(VP_LOG," %b ",`VP_TOP.EXE.ADD_STA1.oCommitData[`COMMIT_WE_RNG]);
endcase
$fwrite(VP_LOG," %h %h %h\n",`VP_TOP.EXE.ADD_STA1.oCommitData[`COMMIT_X_RNG],`VP_TOP.EXE.ADD_STA1.oCommitData[`COMMIT_Y_RNG],`VP_TOP.EXE.ADD_STA1.oCommitData[`COMMIT_Z_RNG]);
end
//-----------------------------------------------------------------
if (`VP_TOP.EXE.DIV_STA.RS.iCommitGranted)
begin
$fwrite(VP_LOG,"\n%dns\t VP[%d] COMMIT DIV R[%d]",$time,`VP_TOP.iVPID-1,`VP_TOP.EXE.DIV_STA.oCommitData[`COMMIT_DST_RNG]);
case ( `VP_TOP.EXE.DIV_STA.oCommitData[`COMMIT_WE_RNG] )
3'b000: $fwrite(VP_LOG,".nowrite ");
3'b001: $fwrite(VP_LOG,".z ");
3'b010: $fwrite(VP_LOG,".y ");
3'b100: $fwrite(VP_LOG,".x ");
3'b111: $fwrite(VP_LOG,".xyz ");
default:
$fwrite(VP_LOG," %b ",`VP_TOP.EXE.DIV_STA.oCommitData[`COMMIT_WE_RNG]);
endcase
$fwrite(VP_LOG," %h %h %h\n",`VP_TOP.EXE.DIV_STA.oCommitData[`COMMIT_X_RNG],`VP_TOP.EXE.DIV_STA.oCommitData[`COMMIT_Y_RNG],`VP_TOP.EXE.DIV_STA.oCommitData[`COMMIT_Z_RNG]);
end
//-----------------------------------------------------------------
if (`VP_TOP.EXE.MUL_STA.RS.iCommitGranted)
begin
$fwrite(VP_LOG,"\n%dns\t VP[%d] COMMIT MUL R[%d]",$time,`VP_TOP.iVPID-1, `VP_TOP.EXE.MUL_STA.oCommitData[`COMMIT_DST_RNG]);
case ( `VP_TOP.EXE.MUL_STA.oCommitData[`COMMIT_WE_RNG] )
3'b000: $fwrite(VP_LOG,".nowrite ");
3'b001: $fwrite(VP_LOG,".z ");
3'b010: $fwrite(VP_LOG,".y ");
3'b100: $fwrite(VP_LOG,".x ");
3'b111: $fwrite(VP_LOG,".xyz ");
default:
$fwrite(VP_LOG," %b ",`VP_TOP.EXE.MUL_STA.oCommitData[`COMMIT_WE_RNG]);
endcase
$fwrite(VP_LOG," %h %h %h\n",`VP_TOP.EXE.MUL_STA.oCommitData[`COMMIT_X_RNG],`VP_TOP.EXE.MUL_STA.oCommitData[`COMMIT_Y_RNG],`VP_TOP.EXE.MUL_STA.oCommitData[`COMMIT_Z_RNG]);
end
//-----------------------------------------------------------------
if (`VP_TOP.EXE.SQRT_STA.RS.iCommitGranted)
begin
$fwrite(VP_LOG,"\n%dns\t VP[%d] COMMIT SQRT R[%d]",$time,`VP_TOP.iVPID-1,`VP_TOP.EXE.SQRT_STA.oCommitData[`COMMIT_DST_RNG]);
case ( `VP_TOP.EXE.SQRT_STA.oCommitData[`COMMIT_WE_RNG] )
3'b000: $fwrite(VP_LOG,".nowrite ");
3'b001: $fwrite(VP_LOG,".z ");
3'b010: $fwrite(VP_LOG,".y ");
3'b100: $fwrite(VP_LOG,".x ");
3'b111: $fwrite(VP_LOG,".xyz ");
default:
$fwrite(VP_LOG," %b ",`VP_TOP.EXE.SQRT_STA.oCommitData[`COMMIT_WE_RNG]);
endcase
$fwrite(VP_LOG," %h %h %h \n",`VP_TOP.EXE.SQRT_STA.oCommitData[`COMMIT_X_RNG],`VP_TOP.EXE.SQRT_STA.oCommitData[`COMMIT_Y_RNG], `VP_TOP.EXE.SQRT_STA.oCommitData[`COMMIT_Z_RNG]);
end
//-----------------------------------------------------------------
 
end //always
 
 
endmodule
/theia_gpu/branches/beta_2.0/rtl/Module_RAM.v
0,0 → 1,84
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
//--------------------------------------------------------
//Dual port RAM.
 
 
module RAM_DUAL_READ_PORT # ( parameter DATA_WIDTH=`DATA_ROW_WIDTH, parameter ADDR_WIDTH=`DATA_ADDRESS_WIDTH )
(
input wire Clock,
input wire iWriteEnable,
input wire[ADDR_WIDTH-1:0] iReadAddress0,
input wire[ADDR_WIDTH-1:0] iReadAddress1,
input wire[ADDR_WIDTH-1:0] iWriteAddress,
input wire[DATA_WIDTH-1:0] iDataIn,
output reg [DATA_WIDTH-1:0] oDataOut0,
output reg [DATA_WIDTH-1:0] oDataOut1
);
 
parameter DEPTH = 2**ADDR_WIDTH;
reg [DATA_WIDTH-1:0] Ram [DEPTH-1:0];
 
 
always @(posedge Clock)
begin
/* verilator lint_off WIDTH */
if (iWriteEnable)
Ram[iWriteAddress] <= iDataIn;
oDataOut0 <= Ram[iReadAddress0];
oDataOut1 <= Ram[iReadAddress1];
/* verilator lint_on WIDTH */
end
endmodule
//--------------------------------------------------------
 
module RAM_SINGLE_READ_PORT # ( parameter DATA_WIDTH=`DATA_ROW_WIDTH, parameter ADDR_WIDTH=`DATA_ADDRESS_WIDTH, parameter MEM_SIZE=128 )
(
input wire Clock,
input wire iWriteEnable,
input wire[ADDR_WIDTH-1:0] iReadAddress0,
input wire[ADDR_WIDTH-1:0] iWriteAddress,
input wire[DATA_WIDTH-1:0] iDataIn,
output reg [DATA_WIDTH-1:0] oDataOut0
);
 
reg [DATA_WIDTH -1:0] Ram [MEM_SIZE-1:0];
 
always @(posedge Clock)
begin
if (iWriteEnable)
Ram[iWriteAddress] <= iDataIn;
oDataOut0 <= Ram[iReadAddress0];
end
endmodule
 
 
/theia_gpu/branches/beta_2.0/rtl/Module_Multiply_Station.v
0,0 → 1,116
`include "aDefinitions.v"
 
 
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2012 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
 
module MUL_STATION
(
input wire Clock,
input wire Reset,
input wire [`MOD_ISSUE_PACKET_SIZE-1:0] iIssueBus,
input wire [`MOD_COMMIT_PACKET_SIZE-1:0] iCommitBus,
input wire [3:0] iId,
output wire [`COMMIT_PACKET_SIZE-1:0] oCommitData,
output wire oCommitResquest,
input wire iCommitGranted,
output wire oBusy
);
 
wire wExeDone;
wire [2:0] wExeDoneTmp;
wire wRS1_2_ADD_Trigger;
wire [`DATA_ROW_WIDTH-1:0] wRS1_OperandA;
wire [`DATA_ROW_WIDTH-1:0] wRS1_OperandB;
wire [`DATA_ROW_WIDTH-1:0] wResult;
wire [`SCALE_SIZE-1:0] wScale;
 
ReservationStation_1Cycle RS
(
.Clock( Clock ),
.Reset( Reset ),
.iIssueBus( iIssueBus ),
.iCommitBus( iCommitBus ),
.iMyId( iId ),
.iExecutionDone( wExeDone ),
.iResult( wResult ),
.iCommitGranted( iCommitGranted ),
.oSource1( wRS1_OperandA ),
.oSource0( wRS1_OperandB ),
.oBusy( oBusy ),
.oTrigger( wRS1_2_ADD_Trigger ),
.oCommitRequest( oCommitResquest ),
.oId( oCommitData[`COMMIT_RSID_RNG] ),
.oWE( oCommitData[`COMMIT_WE_RNG] ),
.oDestination( oCommitData[`COMMIT_DST_RNG] ),
.oScale( wScale ),
.oResult( {oCommitData[`X_RNG],oCommitData[`Y_RNG],oCommitData[`Z_RNG]})
);
 
 
 
assign wExeDone = wExeDoneTmp[0] & wExeDoneTmp[1] & wExeDoneTmp[2];
RADIX_R_MUL_32_FULL_PARALLEL MUL0
(
.Clock( Clock ),
.Reset( Reset ),
.A( wRS1_OperandA[`X_RNG] ),
.B( wRS1_OperandB[`X_RNG] ),
.R( wResult[`X_RNG] ),
.iUnscaled( ~wScale[`SCALE_SRCR_EN] ),
.iInputReady( wRS1_2_ADD_Trigger ),
.OutputReady( wExeDoneTmp[0] )
 
);
 
RADIX_R_MUL_32_FULL_PARALLEL MUL1
(
.Clock( Clock ),
.Reset( Reset ),
.A( wRS1_OperandA[`Y_RNG] ),
.B( wRS1_OperandB[`Y_RNG] ),
.R( wResult[`Y_RNG] ),
.iUnscaled( ~wScale[`SCALE_SRCR_EN] ),
.iInputReady( wRS1_2_ADD_Trigger ),
.OutputReady( wExeDoneTmp[1] )
 
);
 
RADIX_R_MUL_32_FULL_PARALLEL MUL2
(
.Clock( Clock ),
.Reset( Reset ),
.A( wRS1_OperandA[`Z_RNG] ),
.B( wRS1_OperandB[`Z_RNG] ),
.R( wResult[`Z_RNG] ),
.iUnscaled( ~wScale[`SCALE_SRCR_EN] ),
.iInputReady( wRS1_2_ADD_Trigger ),
.OutputReady( wExeDoneTmp[2] )
 
);
 
endmodule
/theia_gpu/branches/beta_2.0/rtl/Module_Adder_Station.v
0,0 → 1,104
`include "aDefinitions.v"
 
 
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2012 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
 
module ADDER_STATION
(
input wire Clock,
input wire Reset,
input wire [`MOD_ISSUE_PACKET_SIZE-1:0] iIssueBus,
input wire [`MOD_COMMIT_PACKET_SIZE-1:0] iCommitBus,
input wire [3:0] iId,
output wire [`COMMIT_PACKET_SIZE-1:0] oCommitData,
output wire oCommitResquest,
input wire iCommitGranted,
output wire oBusy
);
 
wire wExeDone;
wire [2:0] wExeDoneTmp;
wire wRS1_2_ADD_Trigger;
wire [`DATA_ROW_WIDTH-1:0] wRS1_OperandA;
wire [`DATA_ROW_WIDTH-1:0] wRS1_OperandB;
wire [`DATA_ROW_WIDTH-1:0] wResult;
 
ReservationStation_1Cycle RS
(
.Clock( Clock ),
.Reset( Reset ),
.iIssueBus( iIssueBus ),
.iCommitBus( iCommitBus ),
.iMyId( iId ),
.iExecutionDone( wExeDone ),
.iResult( wResult ),
.iCommitGranted( iCommitGranted ),
.oSource1( wRS1_OperandA ),
.oSource0( wRS1_OperandB ),
.oBusy( oBusy ),
.oTrigger( wRS1_2_ADD_Trigger ),
.oCommitRequest( oCommitResquest ),
.oId( oCommitData[`COMMIT_RSID_RNG] ),
.oWE( oCommitData[`COMMIT_WE_RNG] ),
.oDestination( oCommitData[`COMMIT_DST_RNG] ),
.oResult( {oCommitData[`X_RNG],oCommitData[`Y_RNG],oCommitData[`Z_RNG]})
);
 
assign wExeDone = wExeDoneTmp[0] & wExeDoneTmp[1] & wExeDoneTmp[2];
 
ADDER # (`WIDTH) ADD_0
(
.Clock( Clock ),
.Reset( Reset ),
.iTrigger( wRS1_2_ADD_Trigger ),
.iA( wRS1_OperandA[`X_RNG] ),
.iB( wRS1_OperandB[`X_RNG] ),
.oDone( wExeDoneTmp[0] ),
.oR( wResult[`X_RNG] )
);
 
ADDER # (`WIDTH) ADD_1
(
.Clock( Clock ),
.Reset( Reset ),
.iTrigger( wRS1_2_ADD_Trigger ),
.iA( wRS1_OperandA[`Y_RNG] ),
.iB( wRS1_OperandB[`Y_RNG] ),
.oDone( wExeDoneTmp[1] ),
.oR( wResult[`Y_RNG] )
);
 
ADDER # (`WIDTH) ADD_2
(
.Clock( Clock ),
.Reset( Reset ),
.iTrigger( wRS1_2_ADD_Trigger ),
.iA( wRS1_OperandA[`Z_RNG] ),
.iB( wRS1_OperandB[`Z_RNG] ),
.oDone( wExeDoneTmp[2] ),
.oR( wResult[`Z_RNG] )
);
 
endmodule
/theia_gpu/branches/beta_2.0/rtl/ControlCodeDumper.v
0,0 → 1,150
`include "aDefinitions.v"
 
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2012 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
module ContolCode_Dumper;
//wait( `CP_TOP.Reset == 0 );
always @ ( posedge `CP_TOP.Clock )
begin
case (`CP_TOP.wOperation)
//-------------------------------------
`CONTROL_PROCESSOR_OP_COPYBLOCK:
begin
$write("\n%dns CP: COPYBLOCK DSTID: %d BLKLEN: %d TAG: %d DSTOFF: %h SRCOFF: %h\n\n",$time,
`CP_TOP.oCopyBlockCommand[`MCU_COPYMEMBLOCKCMD_VPMASK_RNG],
`CP_TOP.oCopyBlockCommand[`MCU_COPYMEMBLOCKCMD_BLKLEN_RNG],
`CP_TOP.oCopyBlockCommand[`MCU_COPYMEMBLOCK_TAG_BIT],
`CP_TOP.oCopyBlockCommand[`MCU_COPYMEMBLOCKCMD_DSTOFF_RNG],
`CP_TOP.oCopyBlockCommand[`MCU_COPYMEMBLOCKCMD_SRCOFF_RNG]);
end
//-------------------------------------
`CONTROL_PROCESSOR_OP_DELIVER_COMMAND:
begin
$write("%dns CP: DELIVER_COMMAND VP[%d] ",$time,
`CP_TOP.wDestination);
case (`CP_TOP.wSourceAddr1)
`VP_COMMAND_START_MAIN_THREAD: $write( " START_MAIN_THREAD ");
`VP_COMMAND_STOP_MAIN_THREAD: $write( " STOP_MAIN_THREAD ");
endcase
end
//-------------------------------------
`CONTROL_PROCESSOR_OP_NOP:
begin
$write("%dns CP: NOP\n",$time);
end
//-------------------------------------
`CONTROL_PROCESSOR_OP_EXIT:
begin
$write("%dns CP: EXIT\n",$time);
//$stop;
end
//-------------------------------------
`CONTROL_PROCESSOR_OP_ADD:
begin
if (`CP_TOP.rWriteEnable)
$write("%dns CP: ADD R[%d] R[%d]{%h} R[%d]{%h} = %h\n",$time,`CP_TOP.wDestination,`CP_TOP.wSourceAddr1,`CP_TOP.wSourceData1,`CP_TOP.wSourceAddr0,`CP_TOP.wSourceData0,`CP_TOP.rResult);
end
//-------------------------------------
`CONTROL_PROCESSOR_OP_SUB:
begin
$write("%dns CP: SUB R[%d] R[%d]{%h} R[%d]{%h} = %h\n",$time,`CP_TOP.wDestination,`CP_TOP.wSourceAddr1,`CP_TOP.wSourceData1,`CP_TOP.wSourceAddr0,`CP_TOP.wSourceData0,`CP_TOP.rResult);
end
//-------------------------------------
`CONTROL_PROCESSOR_OP_AND:
begin
$write("%dns CP: AND R[%d] R[%d]{%h} R[%d]{%h} = %h\n",$time,`CP_TOP.wDestination,`CP_TOP.wSourceAddr1,`CP_TOP.wSourceData1,`CP_TOP.wSourceAddr0,`CP_TOP.wSourceData0,`CP_TOP.rResult);
end
//-------------------------------------
`CONTROL_PROCESSOR_OP_OR:
begin
$write("%dns CP: OR R[%d] R[%d]{%h} R[%d]{%h} = %h\n",$time,`CP_TOP.wDestination,`CP_TOP.wSourceAddr1,`CP_TOP.wSourceData1,`CP_TOP.wSourceAddr0,`CP_TOP.wSourceData0,`CP_TOP.rResult);
end
//-------------------------------------
`CONTROL_PROCESSOR_OP_SHL:
begin
$write("%dns CP: SHL R[%d] R[%d]{%h} R[%d]{%h} = %h\n",$time,`CP_TOP.wDestination,`CP_TOP.wSourceAddr1,`CP_TOP.wSourceData1,`CP_TOP.wSourceAddr0,`CP_TOP.wSourceData0,`CP_TOP.rResult);
end
//-------------------------------------
`CONTROL_PROCESSOR_OP_SHR:
begin
$write("%dns CP: SHR R[%d] R[%d]{%h} R[%d]{%h} = %h\n",$time,`CP_TOP.wDestination,`CP_TOP.wSourceAddr1,`CP_TOP.wSourceData1,`CP_TOP.wSourceAddr0,`CP_TOP.wSourceData0,`CP_TOP.rResult);
end
//-------------------------------------
`CONTROL_PROCESSOR_OP_BLE:
begin
$write("%dns CP: BLE\n",$time);
end
//-------------------------------------
`CONTROL_PROCESSOR_OP_BL:
begin
$write("%dns CP: BL\n",$time);
end
//-------------------------------------
`CONTROL_PROCESSOR_OP_BG:
begin
$write("%dns CP: BG\n",$time);
end
//-------------------------------------
`CONTROL_PROCESSOR_OP_BGE:
begin
$write("%dns CP: BGE\n",$time);
end
//-------------------------------------
`CONTROL_PROCESSOR_OP_BEQ:
begin
$write("%dns CP: BEQ %d, R[%d] {%h} R[%d] {%h}\n",$time,`CP_TOP.wDestination,`CP_TOP.wSourceAddr1,`CP_TOP.wSourceData1,`CP_TOP.wSourceAddr0,`CP_TOP.wSourceData0);
end
//-------------------------------------
`CONTROL_PROCESSOR_OP_BNE:
begin
$write("%dns CP: BNE\n",$time);
end
//-------------------------------------
`CONTROL_PROCESSOR_OP_BRANCH:
begin
$write("%dns CP: BRANCH %h\n",$time,`CP_TOP.wDestination );
end
//-------------------------------------
`CONTROL_PROCESSOR_ASSIGN:
begin
$write("%dns CP: ASSIGN R[%d] I(%h)= %h\n",$time,`CP_TOP.wDestination,`CP_TOP.wImmediateValue,`CP_TOP.rResult);
end
//-------------------------------------
default:
begin
end
//-------------------------------------
endcase
end
endmodule
 
/theia_gpu/branches/beta_2.0/rtl/Module_RegisterFile.v
0,0 → 1,100
`include "aDefinitions.v"
 
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2012 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
 
module RegisterFile # ( parameter DATA_WIDTH=`DATA_ROW_WIDTH, parameter ADDR_WIDTH=`DATA_ADDRESS_WIDTH )
(
input wire Clock,
input wire Reset,
input wire [ADDR_WIDTH-1:0] iReadAddress0,
input wire [ADDR_WIDTH-1:0] iReadAddress1,
input wire [2:0] iWriteEnable,
input wire [ADDR_WIDTH-1:0] iWriteAddress,
input wire [DATA_WIDTH-1:0] iData,
output wire [`DATA_ADDRESS_WIDTH-1:0] oFrameOffset,oIndexRegister,
output wire [`WIDTH-1:0] oThreadControlRegister,
output wire [DATA_WIDTH-1:0] oData0,
output wire [DATA_WIDTH-1:0] oData1
 
);
 
parameter DATA_CHANNEL_WIDTH = DATA_WIDTH / 3;
 
wire wEnableFrameOffsetOverwrite,wEnableControlRegOverwrite;
wire [`DATA_ADDRESS_WIDTH-1:0] wIndexRegister;
assign wEnableFrameOffsetOverwrite = (iWriteAddress == `SPR_CONTROL1) ? 1'b1 : 1'b0;
assign wEnableControlRegOverwrite = (iWriteAddress == `SPR_CONTROL0) ? 1'b1 : 1'b0;
 
//This stores the frame offset register
FFD_POSEDGE_SYNCRONOUS_RESET # ( `DATA_ADDRESS_WIDTH ) FDD_FRAMEOFFSET
( Clock, Reset, (wEnableFrameOffsetOverwrite & iWriteEnable[2]) ,iData[`X_RNG], oFrameOffset );
 
//This stores the index register
FFD_POSEDGE_SYNCRONOUS_RESET # ( `DATA_ADDRESS_WIDTH ) FDD_INDEXREGISTER
( Clock, Reset, (wEnableFrameOffsetOverwrite & iWriteEnable[0]) ,iData[`Z_RNG], wIndexRegister );
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) FDD_CONTROLREGISTER
( Clock, Reset, (wEnableControlRegOverwrite & iWriteEnable[0]) ,iData[`Z_RNG], oThreadControlRegister );
 
 
//Can't afford to wait 1 extra clock cycle just to allow the data to be written to the flop!
assign oIndexRegister = (wEnableFrameOffsetOverwrite & iWriteEnable[0]) ? iData[`Z_RNG] : wIndexRegister;
 
RAM_DUAL_READ_PORT # ( DATA_CHANNEL_WIDTH, ADDR_WIDTH ) RF_X
(
.Clock( Clock ),
.iWriteEnable( iWriteEnable[2] ),
.iReadAddress0( iReadAddress0 ),
.iReadAddress1( iReadAddress1 ),
.iWriteAddress( iWriteAddress ),
.iDataIn( iData[`X_RNG] ),
.oDataOut0( oData0[`X_RNG] ),
.oDataOut1( oData1[`X_RNG] )
);
 
 
RAM_DUAL_READ_PORT # ( DATA_CHANNEL_WIDTH, ADDR_WIDTH ) RF_Y
(
.Clock( Clock ),
.iWriteEnable( iWriteEnable[1] ),
.iReadAddress0( iReadAddress0 ),
.iReadAddress1( iReadAddress1 ),
.iWriteAddress( iWriteAddress ),
.iDataIn( iData[`Y_RNG] ),
.oDataOut0( oData0[`Y_RNG] ),
.oDataOut1( oData1[`Y_RNG] )
);
 
 
RAM_DUAL_READ_PORT # ( DATA_CHANNEL_WIDTH, ADDR_WIDTH ) RF_Z
(
.Clock( Clock ),
.iWriteEnable( iWriteEnable[0] ),
.iReadAddress0( iReadAddress0 ),
.iReadAddress1( iReadAddress1 ),
.iWriteAddress( iWriteAddress ),
.iDataIn( iData[`Z_RNG] ),
.oDataOut0( oData0[`Z_RNG] ),
.oDataOut1( oData1[`Z_RNG] )
);
 
endmodule
/theia_gpu/branches/beta_2.0/rtl/Theia.v
0,0 → 1,142
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
//--------------------------------------------------------
 
module THEIA
(
input wire Clock,
input wire Reset,
input wire iEnable,
input wire [31:0] iMemReadData,
input wire iMemDataAvailable,
output wire [31:0] oMemReadAddress,
output wire oMEM_ReadRequest
 
);
 
 
wire [`WB_WIDTH-1:0] wMCU_2_VP_InstructionWriteAddress;
wire [`WB_WIDTH-1:0] wMCU_2_VP_InstructionWriteData;
wire [`MAX_CORES-1:0] wMCU_2_VP_InstructionWriteEnable;
wire [`MCU_TAG_SIZE-1:0] wMCU_2_VP_Tag;
wire wMCU_2_VP_STB;
wire wMCU_2_VP_Cyc;
wire wMCU_2_VP_Mst;
wire [`MAX_CORES-1:0] wVP_2_MCU_ACK;
wire wVP_Slave_ACK;
wire [`MCU_REQUEST_SIZE-1:0] wCP_2MCU_BlockCopyCommand;
wire[`CBC_BUS_WIDTH-1:0] wCP_VP__ControlCommandBus;
wire wMCU_2_CP__FIFOEmpty;
wire wOMem_WE[`MAX_CORES-1:0];
wire [`WB_WIDTH-1:0] wOMEM_Address[`MAX_CORES-1:0];
wire [`WB_WIDTH-1:0] wOMEM_Dat[`MAX_CORES-1:0];
 
 
//////////////////////////////////////////////
//
// The control processor
//
//////////////////////////////////////////////
ControlProcessor CP
(
.Clock( Clock ),
.Reset( Reset ),
.oControlBus( wCP_VP__ControlCommandBus ),
.iMCUFifoEmpty( wMCU_2_CP__FIFOEmpty ),
.oCopyBlockCommand( wCP_2MCU_BlockCopyCommand )
);
 
//////////////////////////////////////////////
//
// The control processor
//
//////////////////////////////////////////////
assign wVP_Slave_ACK = wVP_2_MCU_ACK[0] | wVP_2_MCU_ACK[1] | wVP_2_MCU_ACK[2] | wVP_2_MCU_ACK[3];
 
MemoryController #(`MAX_CORES) MCU
(
.Clock( Clock ),
.Reset( Reset ),
.iRequest( wCP_2MCU_BlockCopyCommand ),
.oMEM_ReadAddress( oMemReadAddress ),
.oMEM_ReadRequest( oMEM_ReadRequest ),
.oFifoEmpty( wMCU_2_CP__FIFOEmpty ),
.iMEM_ReadData( iMemReadData ),
.iMEM_DataAvailable( iMemDataAvailable ),
.DAT_O( wMCU_2_VP_InstructionWriteData ),
.ADR_O( wMCU_2_VP_InstructionWriteAddress ),
.STB_O( wMCU_2_VP_STB ),
.WE_O( wMCU_2_VP_InstructionWriteEnable ),
.TAG_O( wMCU_2_VP_Tag ),
.CYC_O( wMCU_2_VP_Cyc ),
.MST_O( wMCU_2_VP_Mst ),
.ACK_I( wVP_Slave_ACK )
);
 
//////////////////////////////////////////////
//
// The vector processors
//
//////////////////////////////////////////////
genvar i;
generate
for (i = 0; i < `MAX_CORES; i = i +1)
begin : VPX
VectorProcessor VP
(
.Clock( Clock ),
.Reset( Reset ),
.iEnable( iEnable ),
.iVPID( i+1 ),
.iCpCommand( wCP_VP__ControlCommandBus ),
.MCU_STB_I( wMCU_2_VP_STB ),
.MCU_WE_I( wMCU_2_VP_InstructionWriteEnable[i] ),
.MCU_DAT_I( wMCU_2_VP_InstructionWriteData ),
.MCU_ADR_I( wMCU_2_VP_InstructionWriteAddress ),
.MCU_TAG_I( wMCU_2_VP_Tag ),
.MCU_ACK_O( wVP_2_MCU_ACK[i] ),
.MCU_MST_I( wMCU_2_VP_Mst ),
.MCU_CYC_I( wMCU_2_VP_Cyc ),
.OMEM_WE( wOMem_WE[i] ),
.OMEM_ADDR( wOMEM_Address[i] ),
.OMEM_DATA( wOMEM_Dat[i] )
 
);
RAM_SINGLE_READ_PORT # ( `WB_WIDTH, `WB_WIDTH, `OMEM_SIZE ) OMEM
(
.Clock( Clock ),
.iWriteEnable( wOMem_WE[i] ),
.iWriteAddress( wOMEM_Address[i] ),
.iDataIn( wOMEM_Dat[i] ),
.iReadAddress0( wOMEM_Address[i] )
//.oDataOut0( wOMEM_Dat[i] )
);
 
end // for
endgenerate
 
 
endmodule
/theia_gpu/branches/beta_2.0/rtl/Module_DivisionStation.v
0,0 → 1,131
`timescale 1ns / 1ps
`include "aDefinitions.v"
 
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2012 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
module DIVISION_STATION
(
input wire Clock,
input wire Reset,
input wire [`MOD_ISSUE_PACKET_SIZE-1:0] iIssueBus,
input wire [`MOD_COMMIT_PACKET_SIZE-1:0] iCommitBus,
input wire [3:0] iId,
output wire [`COMMIT_PACKET_SIZE-1:0] oCommitData,
output wire oCommitResquest,
input wire iCommitGranted,
output wire oBusy
);
 
wire wExeDone;
wire [2:0] wExeDoneTmp;
wire wRS1_2_ADD_Trigger;
wire [`DATA_ROW_WIDTH-1:0] wRS1_OperandA;
wire [`DATA_ROW_WIDTH-1:0] wRS1_OperandB;
wire [`DATA_ROW_WIDTH-1:0] wResult;
wire [`LONG_WIDTH-1:0] wDividend64X,wDividendX64_Pre;
wire [`LONG_WIDTH-1:0] wDividend64Y,wDividendY64_Pre;
wire [`LONG_WIDTH-1:0] wDividend64Z,wDividendZ64_Pre;
wire [`LONG_WIDTH-1:0] wDivisor64X,wDivisorX64_Pre;
wire [`LONG_WIDTH-1:0] wDivisor64Y,wDivisorY64_Pre;
wire [`LONG_WIDTH-1:0] wDivisor64Z,wDivisorZ64_Pre;
wire [2:0] wScaleSelect;
 
ReservationStation RS
(
.Clock( Clock ),
.Reset( Reset ),
.iIssueBus( iIssueBus ),
.iCommitBus( iCommitBus ),
.iMyId( iId ),
.iExecutionDone( wExeDone ),
.iResult( wResult ),
.iCommitGranted( iCommitGranted ),
.oSource1( wRS1_OperandA ),
.oSource0( wRS1_OperandB ),
.oBusy( oBusy ),
.oTrigger( wRS1_2_ADD_Trigger ),
.oCommitRequest( oCommitResquest ),
.oId( oCommitData[`COMMIT_RSID_RNG] ),
.oWE( oCommitData[`COMMIT_WE_RNG] ),
.oDestination( oCommitData[`COMMIT_DST_RNG] ),
.oResult( {oCommitData[`X_RNG],oCommitData[`Y_RNG],oCommitData[`Z_RNG]})
);
 
assign wExeDone = wExeDoneTmp[0] & wExeDoneTmp[1] & wExeDoneTmp[2];
 
 
 
assign wDividendX64_Pre = {{32{wRS1_OperandB[95]}},wRS1_OperandB[`X_RNG]};
assign wDividendY64_Pre = {{32{wRS1_OperandB[63]}},wRS1_OperandB[`Y_RNG]};
assign wDividendZ64_Pre = {{32{wRS1_OperandB[31]}},wRS1_OperandB[`Z_RNG]};
 
assign wDivisorX64_Pre = {{32{wRS1_OperandA[95]}},wRS1_OperandA[`X_RNG]};
assign wDivisorY64_Pre = {{32{wRS1_OperandA[63]}},wRS1_OperandA[`Y_RNG]};
assign wDivisorZ64_Pre = {{32{wRS1_OperandA[31]}},wRS1_OperandA[`Z_RNG]};
 
assign wScaleSelect = iIssueBus[`MOD_ISSUE_SCALE_RNG];
//Perform the scale logic, the unscale part is done by the IIU
assign wDividend64X = (~wScaleSelect[2] & wScaleSelect[1]) ? (wDividendX64_Pre << `SCALE) : wDividendX64_Pre;
assign wDividend64Y = (~wScaleSelect[2] & wScaleSelect[1]) ? (wDividendY64_Pre << `SCALE) : wDividendY64_Pre;
assign wDividend64Z = (~wScaleSelect[2] & wScaleSelect[1]) ? (wDividendZ64_Pre << `SCALE) : wDividendZ64_Pre;
assign wDivisor64X = (~wScaleSelect[2] & wScaleSelect[0]) ? (wDivisorX64_Pre << `SCALE) : wDivisorX64_Pre;
assign wDivisor64Y = (~wScaleSelect[2] & wScaleSelect[0]) ? (wDivisorY64_Pre << `SCALE) : wDivisorY64_Pre;
assign wDivisor64Z = (~wScaleSelect[2] & wScaleSelect[0]) ? (wDivisorZ64_Pre << `SCALE) : wDivisorZ64_Pre;
 
 
SignedIntegerDivision DIV_0
(
.Clock( Clock ),
.Reset( Reset ),
.iInputReady( wRS1_2_ADD_Trigger ),
.iDividend( wDividend64X ),
.iDivisor( wDivisor64X ),
.OutputReady( wExeDoneTmp[0] ),
.oQuotient( wResult[`X_RNG] )
);
 
SignedIntegerDivision DIV_1
(
.Clock( Clock ),
.Reset( Reset ),
.iInputReady( wRS1_2_ADD_Trigger ),
.iDividend( wDividend64Y ),
.iDivisor( wDivisor64Y ),
.OutputReady( wExeDoneTmp[1] ),
.oQuotient( wResult[`Y_RNG] )
);
 
SignedIntegerDivision DIV_2
(
.Clock( Clock ),
.Reset( Reset ),
.iInputReady( wRS1_2_ADD_Trigger ),
.iDividend( wDividend64Z ),
.iDivisor( wDivisor64Z ),
.OutputReady( wExeDoneTmp[2] ),
.oQuotient( wResult[`Z_RNG] )
);
 
endmodule
/theia_gpu/branches/beta_2.0/rtl/Module_OperandModifiers.v
0,0 → 1,438
`include "aDefinitions.v"
 
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2012 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
//-----------------------------------------------------------------------------------
module ModfierQueue
(
input wire Clock,
input wire Reset,
input wire iKeep,
input wire iGranted,
input wire [3:0] iRs,
input wire [2:0] iScale,
output wire [2:0] oScale,
input wire[`ISSUE_SRCTAG_SIZE-1:0] iTag,
input wire[`COMMIT_PACKET_SIZE-1:0] iData,
output wire[`COMMIT_PACKET_SIZE-1:0] oData,
output wire[3:0] oRsID,
input wire[3:0] iKey,
output wire oRequest,
output wire oBusy,
output wire[`ISSUE_SRCTAG_SIZE-1:0] oTag
);
 
wire wMatch,wGranted;
 
PULSE P1
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( 1'b1 ),
.D(iGranted),
.Q(wGranted)
);
UPCOUNTER_POSEDGE # (1) UPBUSY
(
.Clock( Clock ),
.Reset( Reset ),
.Initial( 1'b0 ),
.Enable( iKeep | wGranted ),
.Q( oBusy )
);
 
UPCOUNTER_POSEDGE # (1) UPREQ
(
.Clock( Clock ),
.Reset( Reset ),
.Initial( 1'b0 ),
.Enable( wMatch | (wGranted & oRequest) ),
.Q( oRequest )
);
 
 
assign wMatch = (iKey == oRsID && oBusy == 1'b1)? 1'b1 : 1'b0;
 
//20 DST, SWZZL 6 bits, SCALE 3 bits, SIGN 3 bits = 15
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `ISSUE_SRCTAG_SIZE ) FFD1
( Clock, Reset, iKeep ,iTag , oTag );
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `COMMIT_PACKET_SIZE ) FFD2
( Clock, Reset, wMatch ,iData , oData );
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 4 ) FFD3
( Clock, Reset, iKeep ,iRs , oRsID );
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 3 ) FFD4
( Clock, Reset, iKeep ,iScale , oScale );
 
endmodule
//-----------------------------------------------------------------------------------
 
module ModifierBlock
(
input wire Clock,
input wire Reset,
input wire [`ISSUE_SRCTAG_SIZE-1:0] iTag,
input wire [1:0] iScale,
input wire [`DATA_ROW_WIDTH-1:0] iData,
output wire [`DATA_ROW_WIDTH-1:0] oData
);
wire [`DATA_ROW_WIDTH-1:0] wSignedData;
wire [`DATA_ROW_WIDTH-1:0] wScaledData;
wire [`DATA_ROW_WIDTH-1:0] wSwizzledData;
`ifdef DISABLE_FEATURE_SIGN_CONTROL
assign wSignedData = iData;
 
`else
assign wSignedData[`X_RNG] = (iTag[`TAG_SIGNX]) ? -iData[`X_RNG] : iData[`X_RNG];
assign wSignedData[`Y_RNG] = (iTag[`TAG_SIGNY]) ? -iData[`Y_RNG] : iData[`Y_RNG];
assign wSignedData[`Z_RNG] = (iTag[`TAG_SIGNZ]) ? -iData[`Z_RNG] : iData[`Z_RNG];
 
`endif
 
`ifdef DISABLE_FEATURE_SCALE_CONTROL
 
assign wScaledData = wSignedData;
 
 
`else
wire signed [`WIDTH-1:0] wSignedData_X,wSignedData_Y,wSignedData_Z;
wire [`DATA_ROW_WIDTH-1:0] wScaledData_Pre,wUnscaledData_Pre;
assign wSignedData_X = wSignedData[`X_RNG];
assign wSignedData_Y = wSignedData[`Y_RNG];
assign wSignedData_Z = wSignedData[`Z_RNG];
 
assign wScaledData_Pre = wSignedData;//{(wSignedData_X << `SCALE),(wSignedData_Y << `SCALE),(wSignedData_Z << `SCALE)};
assign wUnscaledData_Pre = {(wSignedData_X >>> `SCALE),(wSignedData_Y >>> `SCALE),(wSignedData_Z >>> `SCALE)};
assign wScaledData = (iScale[0]) ? ((iScale[1]) ? wUnscaledData_Pre : wScaledData_Pre ): wSignedData;
/*
MUXFULLPARALELL_3SEL_GENERIC # ( `DATA_ROW_WIDTH ) MUX_SCALE0
(
.Sel( iScale ),
.I1( wSignedData ),
.I2( wScaledData_Pre ),
.I3( wSignedData ),
.I4( wScaledData_Pre ),
.I5( wSignedData ),
.I6( wUnscaledData_Pre ),
.I7( wSignedData ),
.I8( wUnscaledData_Pre ),
.O1( wScaledData )
);
*/
`endif
 
`ifdef DISABLE_FEATURE_SWIZZLE_CONTROL
assign wSwizzledData = wScaledData;
`else
MUXFULLPARALELL_3SEL_EN SWIZZLE0X
(
.I1(wScaledData[`X_RNG]),
.I2(wScaledData[`Z_RNG]),
.I3(wScaledData[`Y_RNG]),
.EN(1'b1),
.SEL(iTag[`TAG_SWLX_RNG]),
.O1(wSwizzledData[`X_RNG])
);
 
MUXFULLPARALELL_3SEL_EN SWIZZLE0Y
(
.I1(wScaledData[`Y_RNG]),
.I2(wScaledData[`Z_RNG]),
.I3(wScaledData[`X_RNG]),
.EN(1'b1),
.SEL(iTag[`TAG_SWLY_RNG]),
.O1(wSwizzledData[`Y_RNG])
);
 
MUXFULLPARALELL_3SEL_EN SWIZZLE0Z
(
.I1(wScaledData[`Z_RNG]),
.I2(wScaledData[`Y_RNG]),
.I3(wScaledData[`X_RNG]),
.EN(1'b1),
.SEL(iTag[`TAG_SWLZ_RNG]),
.O1(wSwizzledData[`Z_RNG])
);
 
`endif
 
assign oData = wSwizzledData;
endmodule
//-----------------------------------------------------------------------------------
module OperandModifiers
(
input wire Clock,
input wire Reset,
input wire [`ISSUE_PACKET_SIZE-1:0] iIssueBus,
input wire [`COMMIT_PACKET_SIZE-1:0] iCommitBus,
output wire [`MOD_ISSUE_PACKET_SIZE-1:0] oModIssue,
output wire [`MOD_COMMIT_PACKET_SIZE-1:0] oCommitBus
);
 
 
wire [`ISSUE_PACKET_SIZE-1:0] wIssueBus;
wire [2:0] wStationRequest;
wire [2:0] wStationGrant;
wire wIssue;
wire [3:0] wBusy;
wire [3:0] wKeep;
wire wFifoEmpty;
wire wDependencySrc0,wDependencySrc1;
wire [`ISSUE_SRCTAG_SIZE-1:0] wInTag0,wInTag1,wInTag2,wInTag3; //8+3+ISSUE_SRCTAG_SIZE(9) = 20
wire [`ISSUE_SRCTAG_SIZE-1:0] wOutTag0,wOutTag1,wOutTag2,wOutTag3; //8+3+ISSUE_SRCTAG_SIZE(9) = 20
wire [`DATA_ROW_WIDTH-1:0] wData0,wData1,wData2,wData3;
wire [(`ISSUE_SRCTAG_SIZE+`DATA_ROW_WIDTH)-1:0] wSrcA_Pre;
wire [4:0] wRequest,wGranted;
wire [3:0] wInRs0,wInRs1,wInRs2,wInRs3;
wire [3:0] wOutRs0,wOutRs1,wOutRs2,wOutRs3,wOutRsCommit;
wire [2:0] wOutScale0,wOutScale1,wOutScale2,wOutScale3,wSrcA_Scale;
wire [2:0] wInScale0,wInScale1,wInScale2,wInScale3;
 
 
assign wIssueBus = iIssueBus;
 
//If at least 1 bit of the RSID is 1 then IIU is currently Issuing a packet
assign wIssue = (iIssueBus[`ISSUE_RSID_RNG]) ? 1'b1 : 1'b0;
 
assign wDependencySrc0 = (iIssueBus[`ISSUE_SRC0RS_RNG] != 0) ? 1 : 0;
assign wDependencySrc1 = (iIssueBus[`ISSUE_SRC1RS_RNG] != 0) ? 1 : 0;
 
assign wKeep[0] = wDependencySrc0 & ~wBusy[0] |
wDependencySrc1 & ~wDependencySrc0 & ~wBusy[0] & wBusy[1];
assign wKeep[1] = wDependencySrc1 & ~wBusy[1] |
wDependencySrc0 & ~wDependencySrc0 & wBusy[0] & ~wBusy[1];
 
assign wKeep[2] = wDependencySrc0 & wBusy[0] & ~wBusy[2]; //|
//wDependencySrc1 & ~wDependencySrc0 & wBusy[0] & wBusy[1] & ~wBusy[2];
assign wKeep[3] = wDependencySrc1 & wBusy[1] & ~wBusy[3];// |
//wDependencySrc0 & ~wDependencySrc1 & wBusy[0] & wBusy[1] & wBusy[2] & ~wBusy[3];
 
 
assign wInTag0 = ( wDependencySrc0 ) ? iIssueBus[`ISSUE_SRC0_TAG_RNG] : iIssueBus[`ISSUE_SRC1_TAG_RNG];
assign wInTag1 = ( wDependencySrc1 ) ? iIssueBus[`ISSUE_SRC1_TAG_RNG] : iIssueBus[`ISSUE_SRC0_TAG_RNG];
assign wInTag2 = ( wDependencySrc0 ) ? iIssueBus[`ISSUE_SRC0_TAG_RNG] : iIssueBus[`ISSUE_SRC1_TAG_RNG];
assign wInTag3 = ( wDependencySrc1 ) ? iIssueBus[`ISSUE_SRC1_TAG_RNG] : iIssueBus[`ISSUE_SRC0_TAG_RNG];
 
assign wInRs0 = ( wDependencySrc0 ) ? iIssueBus[`ISSUE_SRC0RS_RNG] : iIssueBus[`ISSUE_SRC1RS_RNG];
assign wInRs1 = ( wDependencySrc1 ) ? iIssueBus[`ISSUE_SRC1RS_RNG] : iIssueBus[`ISSUE_SRC0RS_RNG];
assign wInRs2 = ( wDependencySrc0 ) ? iIssueBus[`ISSUE_SRC0RS_RNG] : iIssueBus[`ISSUE_SRC1RS_RNG];
assign wInRs3 = ( wDependencySrc1 ) ? iIssueBus[`ISSUE_SRC1RS_RNG] : iIssueBus[`ISSUE_SRC0RS_RNG];
 
 
assign wInScale0 = ( wDependencySrc0 ) ? {iIssueBus[`ISSUE_SCALER],iIssueBus[`ISSUE_SCALE_OP],iIssueBus[`ISSUE_SCALE0]} : {iIssueBus[`ISSUE_SCALER],iIssueBus[`ISSUE_SCALE_OP],iIssueBus[`ISSUE_SCALE1]};
assign wInScale1 = ( wDependencySrc1 ) ? {iIssueBus[`ISSUE_SCALER],iIssueBus[`ISSUE_SCALE_OP],iIssueBus[`ISSUE_SCALE1]} : {iIssueBus[`ISSUE_SCALER],iIssueBus[`ISSUE_SCALE_OP],iIssueBus[`ISSUE_SCALE0]};
assign wInScale2 = ( wDependencySrc0 ) ? {iIssueBus[`ISSUE_SCALER],iIssueBus[`ISSUE_SCALE_OP],iIssueBus[`ISSUE_SCALE0]} : {iIssueBus[`ISSUE_SCALER],iIssueBus[`ISSUE_SCALE_OP],iIssueBus[`ISSUE_SCALE1]};
assign wInScale3 = ( wDependencySrc1 ) ? {iIssueBus[`ISSUE_SCALER],iIssueBus[`ISSUE_SCALE_OP],iIssueBus[`ISSUE_SCALE1]} : {iIssueBus[`ISSUE_SCALER],iIssueBus[`ISSUE_SCALE_OP],iIssueBus[`ISSUE_SCALE0]};
assign wRequest[0] = 1'b0;
ModfierQueue Q0
(
.Clock( Clock ),
.Reset( Reset ),
.iRs( wInRs0 ),
.oRsID( wOutRs0 ),
.iTag( wInTag0 ),
.iScale( wInScale0 ),
.oScale( wOutScale0 ),
.iKeep( wKeep[0] ),
.iKey( iCommitBus[`COMMIT_RSID_RNG] ),
.iData( iCommitBus ),
.oTag( wOutTag0 ),
.oData( wData0 ),
.oRequest( wRequest[1] ),
.iGranted( wGranted[1] ),
.oBusy( wBusy[0] )
);
 
 
ModfierQueue Q1
(
.Clock( Clock ),
.Reset( Reset ),
.iRs( wInRs1 ),
.oRsID( wOutRs1 ),
.iTag( wInTag1 ),
.iScale( wInScale1 ),
.oScale( wOutScale1 ),
.iKeep( wKeep[1] ),
.iKey( iCommitBus[`COMMIT_RSID_RNG] ),
.iData( iCommitBus ),
.oTag( wOutTag1 ),
.oData( wData1 ),
.oRequest( wRequest[2] ),
.iGranted( wGranted[2] ),
.oBusy( wBusy[1] )
);
 
 
ModfierQueue Q2
(
.Clock( Clock ),
.Reset( Reset ),
.iRs( wInRs2 ),
.iTag( wInTag2 ),
.iScale( wInScale2 ),
.oScale( wOutScale2 ),
.oRsID( wOutRs2 ),
.iKeep( wKeep[2] ),
.iKey( iCommitBus[`COMMIT_RSID_RNG] ),
.iData( iCommitBus ),
.oTag( wOutTag2 ),
.oData( wData2 ),
.oRequest( wRequest[3] ),
.iGranted( wGranted[3] ),
.oBusy( wBusy[2] )
);
 
ModfierQueue Q3
(
.Clock( Clock ),
.Reset( Reset ),
.iRs( wInRs3 ),
.oRsID( wOutRs3 ),
.iTag( wInTag3 ),
.iScale( wInScale3 ),
.oScale( wOutScale3 ),
.iKeep( wKeep[3] ),
.iKey( iCommitBus[`COMMIT_RSID_RNG] ),
.iData( iCommitBus ),
.oTag( wOutTag3 ),
.oData( wData3 ),
.oRequest( wRequest[4] ),
.iGranted( wGranted[4] ),
.oBusy( wBusy[3] )
);
 
 
ROUND_ROBIN_5_ENTRIES ARBXXX
(
.Clock( Clock ),
.Reset( Reset ),
.iRequest0( wIssue),
.iRequest1( wRequest[1] & ~wIssue ), //Issues from IIU have priority
.iRequest2( wRequest[2] & ~wIssue ), //Issues from IIU have priority
.iRequest3( wRequest[3] & ~wIssue ), //Issues from IIU have priority,
.iRequest4( wRequest[4] & ~wIssue ),
 
.oPriorityGrant( wGranted[0] ),
.oGrant1( wGranted[1] ),
.oGrant2( wGranted[2] ),
.oGrant3( wGranted[3] ),
.oGrant4( wGranted[4] )
 
);
 
 
wire[3:0] wBusSelector;
DECODER_ONEHOT_2_BINARY DECODER
(
.iIn( {1'b0,wGranted} ),
.oOut( wBusSelector )
);
 
MUXFULLPARALELL_3SEL_GENERIC # (`ISSUE_SRCTAG_SIZE + `DATA_ROW_WIDTH ) MUX
(
.Sel(wBusSelector),
.I1( {`ISSUE_SRCTAG_SIZE'b0,`DATA_ROW_WIDTH'b0} ),
.I2( {wIssueBus[`ISSUE_SRC0_TAG_RNG],wIssueBus[`ISSUE_SRC0_DATA_RNG]} ),
.I3( {wOutTag0,wData0} ),
.I4( {wOutTag1,wData1} ),
.I5( {wOutTag2,wData2} ),
.I6( {wOutTag3,wData3} ),
.O1( wSrcA_Pre )
);
MUXFULLPARALELL_3SEL_GENERIC # ( 4 ) MUX2
(
.Sel(wBusSelector),
.I1( 4'b0 ),
.I2( 4'b0 ),
.I3( wOutRs0 ),
.I4( wOutRs1 ),
.I5( wOutRs2 ),
.I6( wOutRs3 ),
.O1( wOutRsCommit )
);
MUXFULLPARALELL_3SEL_GENERIC # ( 3 ) MUX3
(
.Sel(wBusSelector),
.I1( 3'b0 ),
.I2( 3'b0 ),
.I3( wOutScale0 ),
.I4( wOutScale1 ),
.I5( wOutScale2 ),
.I6( wOutScale3 ),
.O1( wSrcA_Scale )
);
wire [`DATA_ROW_WIDTH-1:0] wModIssueSource0, wModIssueSource1;
ModifierBlock MD1
(
.Clock( Clock ),
.Reset( Reset ),
.iScale( {wSrcA_Scale[1:0]} ),
.iTag( wSrcA_Pre[`ISSUE_SRC0_TAG_RNG] ),
.iData( wSrcA_Pre[`ISSUE_SRC0_DATA_RNG] ),
.oData( wModIssueSource0 )
);
assign oCommitBus = {wSrcA_Scale,wSrcA_Pre[`ISSUE_SRC0_TAG_RNG],wOutRsCommit,oModIssue[`MOD_ISSUE_SRC0_DATA_RNG]};
wire [3:0] wScale;
assign wScale = wIssueBus[`ISSUE_SCALE_RNG];
ModifierBlock MD2
(
.Clock( Clock ),
.Reset( Reset ),
.iScale( {wScale[`SCALE_OP],wScale[`SCALE_SRC1_EN]} ),
.iTag( wIssueBus[`ISSUE_SRC1_TAG_RNG] ),
.iData( wIssueBus[`ISSUE_SRC1_DATA_RNG] ),
.oData( wModIssueSource1 )
);
assign oModIssue[`MOD_ISSUE_SRC1_DATA_RNG] = (wDependencySrc1) ? {`MOD_ISSUE_SRC_SIZE'b0,wInTag1} : wModIssueSource1;
assign oModIssue[`MOD_ISSUE_SRC0_DATA_RNG] = (wDependencySrc0) ? {`MOD_ISSUE_SRC_SIZE'b0,wInTag0} : wModIssueSource0;
assign oModIssue[`MOD_ISSUE_SRC0RS_RNG] = wIssueBus[`ISSUE_SRC0RS_RNG];
assign oModIssue[`MOD_ISSUE_SRC1RS_RNG] = wIssueBus[`ISSUE_SRC1RS_RNG];
assign oModIssue[`MOD_ISSUE_WE_RNG] = wIssueBus[`ISSUE_WE_RNG];
assign oModIssue[`MOD_ISSUE_SCALE_RNG] = wIssueBus[`ISSUE_SCALE_RNG];
assign oModIssue[`MOD_ISSUE_DST_RNG] = wIssueBus[`ISSUE_DST_RNG];
assign oModIssue[`MOD_ISSUE_RSID_RNG] = wIssueBus[`ISSUE_RSID_RNG];
endmodule
//-----------------------------------------------------------------------------------
/theia_gpu/branches/beta_2.0/rtl/Module_InstructionIssue.v
0,0 → 1,642
`include "aDefinitions.v"
 
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2012 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
//`define ADDRESSING_MODES_DISABLED 1
//`define NO_STALL_ON_BRANCH_DEPS 1
 
`define II_STATE_AFTER_RESET 0
`define II_FETCH_INSTRUCTION 1
`define II_ISSUE_REQUEST_WITH_DATA_FWD 2
`define II_ISSUE_REQUEST 3
`define II_FIFO_UPDATE 4
`define II_ISSUE_BRANCH_OPERATION 5
`define II_UPDATE_PC_BRANCH_OPERATION 6
 
`define TAGMEM_OWNER_ISSUE 1'b0
`define TAGMEM_OWNER_FIFO 1'b1
 
module InstructionIssue
(
input wire Clock,
input wire Reset,
input wire iEnable,
input wire [`INSTRUCTION_WIDTH-1:0] iInstruction0, //Instruction fetched from IM
input wire [`INSTRUCTION_WIDTH-1:0] iInstruction1, //Branch taken instruction prefetch
input wire [`DATA_ROW_WIDTH-1:0] iSourceData0, //Source0 value from RF
input wire [`DATA_ROW_WIDTH-1:0] iSourceData1, //Source1 value from RF
input wire [`NUMBER_OF_RSVR_STATIONS-1:0] iRStationBusy,
input wire [`COMMIT_PACKET_SIZE-1:0] iResultBcast, //Contains DST and RsId from last commited operation
input wire iSignFlag,
input wire iZeroFlag,
input wire iMtEnabled,
input wire iIgnoreResultBcast,
output wire [`DATA_ADDRESS_WIDTH-1:0] oSourceAddress0,
output wire [`DATA_ADDRESS_WIDTH-1:0] oSourceAddress1,
output wire [`ISSUE_PACKET_SIZE-1:0] oIssueBcast,
input wire [`DATA_ADDRESS_WIDTH -1:0] iFrameOffset,iIndexRegister,
input wire [`INSTRUCTION_ADDR_WIDTH-1:0] iCodeOffset,
output wire [`INSTRUCTION_ADDR_WIDTH -1:0] oIP0,
output wire [`INSTRUCTION_ADDR_WIDTH -1:0] oIP1
);
 
 
parameter SB_ENTRY_WIDTH = 4;
 
wire[SB_ENTRY_WIDTH-1:0] wSource0_Station; //Reservation Station that is currently calculationg Source0, zero means none
wire[SB_ENTRY_WIDTH-1:0] wSource1_Station; //Reservation Station that is currently calculationg Source1, zero means none
wire[SB_ENTRY_WIDTH-1:0] wSource0_RsSb;
wire[`DATA_ADDRESS_WIDTH-1:0] wSBWriteAddress;
wire [SB_ENTRY_WIDTH-1:0] wSBWriteData;
wire wStall;
wire [`DATA_ROW_WIDTH-1:0] wSourceData0;
wire [`DATA_ROW_WIDTH-1:0] wSourceData1;
wire wFIFO_ReadEnable;
wire [`DATA_ADDRESS_WIDTH-1:0] wFIFO_Dst;
wire [`DATA_ADDRESS_WIDTH-1:0] wIssue_Dst;
wire [`DATA_ADDRESS_WIDTH-1:0] wSource0Addr_Displaced,wSourceAddress0_Imm,wSource0Addr_Displaced_plus_Index;
wire [`DATA_ADDRESS_WIDTH-1:0] wSource1Addr_Displaced,wSourceAddress1_Imm,wSource1Addr_Displaced_plus_Index;
wire wSBWriteEnable;
wire[`DATA_ROW_WIDTH-1:0] wSignedSourceData0;
wire[`DATA_ROW_WIDTH-1:0] wSignedSourceData1;
wire[`DATA_ROW_WIDTH-1:0] wSwizzledSourceData0;
wire[`DATA_ROW_WIDTH-1:0] wSwizzledSourceData1;
wire [`DATA_ROW_WIDTH-1:0] wResultData;
wire [`DATA_ROW_WIDTH-1:0] wSourceData1Temp;
wire [`DATA_ROW_WIDTH-1:0] wScaledSourceData0;
wire [`DATA_ROW_WIDTH-1:0] wScaledSourceData1;
wire [`DATA_ROW_WIDTH-1:0] wScaledSourceData0_Pre;
wire [`DATA_ROW_WIDTH-1:0] wScaledSourceData1_Pre;
wire [`DATA_ROW_WIDTH-1:0] wUnscaleSourceData0_Pre;
wire [`DATA_ROW_WIDTH-1:0] wUnscaleSourceData1_Pre;
wire [6:0] wOp;
wire wBranchTaken;
wire wCommitBusInputFifo_Empty;
wire wCommitBusDataAvailabe;
wire wReservationStationBusy;
wire [`COMMIT_PACKET_SIZE-1:0] wResultFifoData;
reg rTagMemoryWE,rTagMemOwner,rIssueNow,rIncrementPC,rPopFifo,rBypassFifo,rUseForwardedData;
reg rSetPCBranchTaken;
wire wBranchWithDependency;
 
wire wMtHasOnceMoreTimeSlot,wEnabled_Delay;
wire wIO_Operation;
assign wIO_Operation = (~wOp[0] & wOp[1] & wOp[2] & ~wOp[3]);
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD123
( Clock, Reset, 1'b1 , iEnable , wEnabled_Delay );
 
assign wMtHasOnceMoreTimeSlot = ~wEnabled_Delay;
 
assign wStall = iInstruction0[`INST_EOF_RNG];
 
reg [4:0] rCurrentState, rNextState;
//Next states logic and Reset sequence
always @(posedge Clock )
begin
if (Reset )
rCurrentState <= `II_STATE_AFTER_RESET;
else
rCurrentState <= rNextState;
end
 
 
 
 
always @ ( * )
begin
case (rCurrentState)
//--------------------------------------
`II_STATE_AFTER_RESET:
begin
rTagMemoryWE = 1'b0;
rTagMemOwner = 1'b0;
rIssueNow = 1'b0;
rIncrementPC = 1'b0;
rPopFifo = 1'b0;
rBypassFifo = 1'b0;
rUseForwardedData = 1'b0;
rSetPCBranchTaken = 1'b0;
rNextState = `II_FETCH_INSTRUCTION;
end
//--------------------------------------
/*The PC will be incremented except for the scenario where we need to wait
for reservation stations to become available. If we increment the PC, then the
value of PC will get update the next clock cycle, and another clock cycle
after that the instruction will get updated.
1- If there is data waiting on the commit bus input port this cycle,
then do not queue this data into the FIFO but instead set
set the score board write enable to 1, set the wSBWriteAddress
to the CommitPacket Destination range and update the score board
bit to zero, so than in the next state the score board bit associated
to the commit data has been updated.
2 - If there is no data waiting on the commit bus this clock cycle, but there
is data that has been queued into the input FIFO, then go to a state where this
data status on the scoreboard gets updated.
3 - If there are no available reservation stations left to handle this
instruction (structural hazard) then just stay in these same state to wait for
a reservation station to become availabe.
*/
`II_FETCH_INSTRUCTION:
begin
rTagMemoryWE = wCommitBusDataAvailabe;
rTagMemOwner = `TAGMEM_OWNER_ISSUE;
rIssueNow = 1'b0;
rIncrementPC = (( ~wReservationStationBusy & ~iInstruction0[`INST_BRANCH_BIT] & wCommitBusInputFifo_Empty) | (~wReservationStationBusy & ~iInstruction0[`INST_BRANCH_BIT] & wCommitBusDataAvailabe));
rPopFifo = 1'b0;
rBypassFifo = wCommitBusDataAvailabe; //Write iCommitBus data directly into tag mem
rUseForwardedData = 1'b0;
rSetPCBranchTaken = 1'b0;
if (wCommitBusDataAvailabe & ~wReservationStationBusy /**/& (iMtEnabled & wMtHasOnceMoreTimeSlot | ~iMtEnabled)/**/)
rNextState = `II_ISSUE_REQUEST_WITH_DATA_FWD;
else if (~wCommitBusInputFifo_Empty)
rNextState = `II_FIFO_UPDATE;
else if ( wReservationStationBusy | (iMtEnabled & ~wMtHasOnceMoreTimeSlot))
rNextState = `II_FETCH_INSTRUCTION;
else
rNextState = `II_ISSUE_REQUEST;
end
//--------------------------------------
//TODO: If the reservation station is Busy (static hazard)
//Then we shall stall the machine...
`II_ISSUE_REQUEST:
begin
rTagMemoryWE = ~iInstruction0[`INST_BRANCH_BIT] & ~wIO_Operation;
rTagMemOwner = `TAGMEM_OWNER_ISSUE;
rIssueNow = iEnable;
rIncrementPC = (iInstruction0[`INST_BRANCH_BIT] & ~wBranchWithDependency & iEnable);
rPopFifo = 1'b0;
rBypassFifo = 1'b0;
rUseForwardedData = 1'b0;
rSetPCBranchTaken = 1'b0;
if (~iEnable & ~wCommitBusInputFifo_Empty)
rNextState = `II_FIFO_UPDATE;
else if (~iEnable & wCommitBusInputFifo_Empty)
rNextState = `II_ISSUE_REQUEST;///////////////////
else
if (iInstruction0[`INST_BRANCH_BIT])
rNextState = `II_UPDATE_PC_BRANCH_OPERATION;
else
rNextState = `II_FETCH_INSTRUCTION;
end
//--------------------------------------
/*
Here the instruction remains the same as in the
previous clock cycle.
*/
`II_ISSUE_REQUEST_WITH_DATA_FWD:
begin
rTagMemoryWE = ~iInstruction0[`INST_BRANCH_BIT] & ~wIO_Operation;
rTagMemOwner = `TAGMEM_OWNER_ISSUE;
rIssueNow = iEnable;
rIncrementPC = (iInstruction0[`INST_BRANCH_BIT] & ~wBranchWithDependency & iEnable);
rPopFifo = 1'b1;
rBypassFifo = 1'b0;
rUseForwardedData = 1'b1;
rSetPCBranchTaken = 1'b0;//wBranchTaken;
if (~iEnable & ~wCommitBusInputFifo_Empty)
rNextState = `II_FIFO_UPDATE;
else if (~iEnable & wCommitBusInputFifo_Empty)
rNextState = `II_ISSUE_REQUEST_WITH_DATA_FWD;
else
if (iInstruction0[`INST_BRANCH_BIT])
rNextState = `II_UPDATE_PC_BRANCH_OPERATION;
else
rNextState = `II_FETCH_INSTRUCTION;
end
//--------------------------------------
`II_FIFO_UPDATE:
begin
rTagMemoryWE = 1'b1;
rTagMemOwner = `TAGMEM_OWNER_FIFO;
rIssueNow = 1'b0;
rIncrementPC = (iMtEnabled & wMtHasOnceMoreTimeSlot /*| ~iMtEnabled*/) & ~wBranchWithDependency & (( ~wReservationStationBusy & ~iInstruction0[`INST_BRANCH_BIT]));//1'b0;
rPopFifo = 1'b1;
rBypassFifo = 1'b0;
rUseForwardedData = 1'b0;
rSetPCBranchTaken = 1'b0;
if (wBranchWithDependency & ~iMtEnabled)
rNextState = `II_UPDATE_PC_BRANCH_OPERATION;
else if ((~iMtEnabled | (iMtEnabled & wMtHasOnceMoreTimeSlot)) & ~wBranchWithDependency & (( ~wReservationStationBusy & ~iInstruction0[`INST_BRANCH_BIT])))
rNextState = `II_ISSUE_REQUEST;
else
rNextState = `II_FETCH_INSTRUCTION;
end
//--------------------------------------
//FIXME: You are assuming that the branch takes 1 cycle.
//This may noy always be the case..
`II_UPDATE_PC_BRANCH_OPERATION:
begin
rTagMemoryWE = 1'b0;
rTagMemOwner = `TAGMEM_OWNER_FIFO;
rIssueNow = 1'b0;
rIncrementPC = 1'b0;
rPopFifo = 1'b1;
rBypassFifo = 1'b0;
rUseForwardedData = 1'b0;
rSetPCBranchTaken = wBranchTaken;
`ifdef NO_STALL_ON_BRANCH_DEPS
rNextState = `II_FETCH_INSTRUCTION;
`else
if (~wBranchWithDependency)
rNextState = `II_FETCH_INSTRUCTION;
else if (~wCommitBusInputFifo_Empty)
rNextState = `II_FIFO_UPDATE;
else
rNextState = `II_UPDATE_PC_BRANCH_OPERATION;
`endif
end
//--------------------------------------
default:
begin
rTagMemOwner = `TAGMEM_OWNER_ISSUE;
rTagMemoryWE = 1'b0;
rIssueNow = 1'b0;
rIncrementPC = 1'b0;
rPopFifo = 1'b0;
rBypassFifo = 1'b0;
rUseForwardedData = 1'b0;
rSetPCBranchTaken = 1'b0;
rNextState = `II_STATE_AFTER_RESET;
end
//--------------------------------------
endcase
end
 
wire [2:0] wInstructionBranchSelection;
assign wInstructionBranchSelection = iInstruction0[`INST_BRANCH_OP_RNG];
wire wCommitFromPendingStation;
assign wCommitFromPendingStation = (iResultBcast[`COMMIT_RSID_RNG] == wReservationStation) ? 1'b1 : 1'b0;
 
assign wBranchTaken =
wCommitFromPendingStation &
iInstruction0[`INST_BRANCH_BIT] &
(
~wInstructionBranchSelection[2] & ~wInstructionBranchSelection[1] & ~wInstructionBranchSelection[0] | //inconditional BRANCH
~wInstructionBranchSelection[2] & ~wInstructionBranchSelection[1] & wInstructionBranchSelection[0] & iZeroFlag | //==
~wInstructionBranchSelection[2] & wInstructionBranchSelection[1] & ~wInstructionBranchSelection[0] & ~iZeroFlag | //!=
~wInstructionBranchSelection[2] & wInstructionBranchSelection[1] & wInstructionBranchSelection[0] & iSignFlag | //<
wInstructionBranchSelection[2] & ~wInstructionBranchSelection[1] & ~wInstructionBranchSelection[0] & (~iSignFlag & ~iZeroFlag)| //>
wInstructionBranchSelection[2] & ~wInstructionBranchSelection[1] & wInstructionBranchSelection[0] & (iSignFlag | iZeroFlag) | //<=
wInstructionBranchSelection[2] & wInstructionBranchSelection[1] & ~wInstructionBranchSelection[0] & (~iSignFlag | iZeroFlag) //>=
);
 
wire [`COMMIT_PACKET_SIZE-1:0] wCommitData_Latched;
FFD_POSEDGE_SYNCRONOUS_RESET # ( `COMMIT_PACKET_SIZE ) ICOMMIT_BYPASS_FFD
( Clock, Reset, 1'b1 ,iResultBcast , wCommitData_Latched );
 
 
//The Reservation Station scoreboard
wire [SB_ENTRY_WIDTH-1:0] wSBDataPort0;
wire [SB_ENTRY_WIDTH-1:0] wSBDataPort1;
wire[3:0] wReservationStation;
 
`ifdef ADDRESSING_MODES_DISABLED
assign wSBWriteAddress
= (rTagMemOwner == `TAGMEM_OWNER_ISSUE) ? ((rBypassFifo)?iResultBcast[`COMMIT_DST_RNG]:iInstruction0[`INST_DST_RNG])
: wResultFifoData[`COMMIT_DST_RNG];
`else
assign wSBWriteAddress
= (rTagMemOwner == `TAGMEM_OWNER_ISSUE) ? ((rBypassFifo)?iResultBcast[`COMMIT_DST_RNG]:wDestinationIndex)
: wResultFifoData[`COMMIT_DST_RNG];
`endif
assign wSBWriteData
= (rTagMemOwner == `TAGMEM_OWNER_ISSUE) ? ((rBypassFifo)?1'b0:wReservationStation) : 4'b0;
 
wire wTagMemoryWE;
assign wTagMemoryWE = rTagMemoryWE;//(rTagMemoryWE && (iInstruction0[`INST_CODE_RNG] != `OPERATION_OUT)); //Dont store dependencies for IO operations
 
RAM_DUAL_READ_PORT # ( SB_ENTRY_WIDTH, `DATA_ADDRESS_WIDTH ) SB
(
.Clock( Clock ),
.iWriteEnable( wTagMemoryWE ),
.iReadAddress0( oSourceAddress0 ),
.iReadAddress1( oSourceAddress1 ),
.iWriteAddress( wSBWriteAddress ),
.iDataIn( wSBWriteData ),
.oDataOut0( wSBDataPort0 ),
.oDataOut1( wSBDataPort1 )
);
 
 
wire [`INSTRUCTION_ADDR_WIDTH-1:0] wPCInitialValue;
wire [`INSTRUCTION_ADDR_WIDTH-1:0] wPCInitialTmp;
assign wPCInitialTmp = (iInstruction0[`INST_IMM])? wSourceData0[`SRC_RET_ADDR_RNG] : {2'b0,iInstruction0[`INST_DST_RNG]};
 
 
 
assign wPCInitialValue = (rSetPCBranchTaken & ~Reset) ? wPCInitialTmp : iCodeOffset;
 
 
 
//The program counter
UPCOUNTER_POSEDGE # (`INSTRUCTION_ADDR_WIDTH ) PC
(
.Clock( Clock ),
.Reset( Reset | rSetPCBranchTaken ),
.Enable( rIncrementPC & ~wStall ),
.Initial( wPCInitialValue ),
.Q( oIP0 )
);
 
assign oIP1 = iInstruction0[`INST_DST_RNG];
 
 
`ifdef ADDRESSING_MODES_DISABLED
assign oSourceAddress1 = iInstruction0[`INST_SCR1_ADDR_RNG];
 
 
`else
 
assign oSourceAddress1 = (iInstruction0[`INST_IMM]) ? wSourceAddress1_Imm :
((iInstruction0[`INST_SRC1_DISPLACED]) ? wSource1Addr_Displaced: iInstruction0[`INST_SCR1_ADDR_RNG]);
 
assign wSource1Addr_Displaced = iInstruction0[`INST_SCR1_ADDR_RNG] + iFrameOffset;
assign wSource1Addr_Displaced_plus_Index = wSource1Addr_Displaced + iIndexRegister;
 
MUXFULLPARALELL_3SEL_GENERIC # ( `DATA_ADDRESS_WIDTH ) SRC1ADDRMUX
(
.Sel(iInstruction0[`INST_ADDRMODE_RNG]),
.I1(`DATA_ADDRESS_WIDTH'b0),
.I2(`DATA_ADDRESS_WIDTH'b0),
.I3(iInstruction0[`INST_SCR1_ADDR_RNG]),
.I4(iInstruction0[`INST_SCR1_ADDR_RNG]),
.I5(`DATA_ADDRESS_WIDTH'b0),
.I6(`DATA_ADDRESS_WIDTH'b0),
.I7(wSource1Addr_Displaced_plus_Index),
.I8(wSource1Addr_Displaced_plus_Index),
.O1(wSourceAddress1_Imm)
);
`endif
 
 
`ifdef ADDRESSING_MODES_DISABLED
assign oSourceAddress0 = (iInstruction0[`INST_IMM] ) ? iInstruction0[`INST_DST_RNG] : iInstruction0[`INST_SRC0_ADDR_RNG];
`else
 
assign oSourceAddress0 = (iInstruction0[`INST_IMM]) ? wSourceAddress0_Imm :
((iInstruction0[`INST_SRC0_DISPLACED]) ? wSource0Addr_Displaced: iInstruction0[`INST_SRC0_ADDR_RNG]);
 
assign wSource0Addr_Displaced = iInstruction0[`INST_SRC0_ADDR_RNG] + iFrameOffset;
assign wSource0Addr_Displaced_plus_Index = wSource0Addr_Displaced + iIndexRegister;
 
MUXFULLPARALELL_2SEL_GENERIC # ( `DATA_ADDRESS_WIDTH ) SRC0ADDRMUX
(
.Sel({iInstruction0[`INST_SRC1_DISPLACED],iInstruction0[`INST_SRC0_DISPLACED]}),
.I1(iInstruction0[`INST_DST_RNG]),
.I2(iInstruction0[`INST_DST_RNG]),
.I3(wSource0Addr_Displaced_plus_Index),
.I4(wSource0Addr_Displaced),
.O1(wSourceAddress0_Imm)
);
`endif
 
 
assign wCommitBusDataAvailabe = ((iResultBcast[`COMMIT_RSID_RNG] != `OPERATION_NOP) && (~iIgnoreResultBcast));
 
 
sync_fifo # (`COMMIT_PACKET_SIZE,2 ) RESULT_IN_FIFO
(
.clk( Clock ),
.reset( Reset ),
.din( iResultBcast ),
.wr_en( wCommitBusDataAvailabe ),
.rd_en( rPopFifo ),
.dout( wResultFifoData ),
.empty( wCommitBusInputFifo_Empty )
);
 
 
 
 
 
//Source 1 for IMM values is really DST
 
//Reservation station for SRC0 when handling IMM values is zero
 
wire wSB0FromInCommit,wSB0ForwardDetected;
wire wSB1FromInCommit,wSB1ForwardDetected;
 
assign wSB0FromInCommit = 1'b0;//(rIssueNow && (iResultBcast[`COMMIT_DST_RNG] == oSourceAddress0)) ? 1'b1 : 1'b0;
assign wSB1FromInCommit = 1'b0;//(rIssueNow && (iResultBcast[`COMMIT_DST_RNG] == oSourceAddress1)) ? 1'b1 : 1'b0;
 
`ifdef ADDRESSING_MODES_DISABLED
wire [`DATA_ADDRESS_WIDTH-1:0] wTmpAddr0;
assign wTmpAddr0 = (iInstruction0[`INST_IMM]) ? iInstruction0[`INST_DST_RNG] : iInstruction0[`INST_SRC0_ADDR_RNG];
 
assign wSB0ForwardDetected = (rUseForwardedData && (wCommitData_Latched[`COMMIT_DST_RNG] == wTmpAddr0) ) ? 1'b1 : 1'b0;
assign wSB1ForwardDetected = (rUseForwardedData && (wCommitData_Latched[`COMMIT_DST_RNG] == iInstruction0[`INST_SCR1_ADDR_RNG]) ) ? 1'b1 : 1'b0;
`else
wire [`DATA_ADDRESS_WIDTH-1:0] wTmpAddr0,wTmpAddr1;
assign wTmpAddr0 = oSourceAddress0;
assign wTmpAddr1 = oSourceAddress1;
 
assign wSB0ForwardDetected = (rUseForwardedData && (wCommitData_Latched[`COMMIT_DST_RNG] == wTmpAddr0) /*&& ( wSource0_Station == iResultBcast[`COMMIT_RSID_RNG])*/ ) ? 1'b1 : 1'b0;
assign wSB1ForwardDetected = (rUseForwardedData && (wCommitData_Latched[`COMMIT_DST_RNG] == wTmpAddr1) /*&& ( wSource1_Station == iResultBcast[`COMMIT_RSID_RNG])*/ ) ? 1'b1 : 1'b0;
`endif
 
//FIX!!! FIX!!! Use the table to know when dependencies for SRC0 and SRC1 are don't care
//Fix this should not be (iInstruction0[`INST_IMM] & iInstruction0[`INST_DEST_ZERO]) but isntead should use the table
assign wSource0_Station = (wSB0FromInCommit | wSB0ForwardDetected | (iInstruction0[`INST_IMM] & iInstruction0[`INST_DEST_ZERO])) ? 4'b0 : wSBDataPort0;
assign wSource1_Station = (iInstruction0[`INST_IMM] | wSB1FromInCommit | wSB1ForwardDetected) ? 4'b0: wSBDataPort1;
 
 
//Handle literal values for IMM. IMM is stored in SRC1.X
 
 
wire [`DATA_ROW_WIDTH-1:0] wImmValue,wSource1_Temp,wSource0_Temp,wSourceData1_Imm,wSourceData0_Imm;
assign wImmValue[`X_RNG] = (iInstruction0[`INST_WE_X]) ? iInstruction0[`INST_IMM_RNG] : `WIDTH'b0;
assign wImmValue[`Y_RNG] = (iInstruction0[`INST_WE_Y]) ? iInstruction0[`INST_IMM_RNG] : `WIDTH'b0;
assign wImmValue[`Z_RNG] = (iInstruction0[`INST_WE_Z]) ? iInstruction0[`INST_IMM_RNG] : `WIDTH'b0;
 
 
 
assign wSource1_Temp[`X_RNG] = (wSB1FromInCommit & iResultBcast[`COMMIT_WE_X]) ? iResultBcast[`COMMIT_X_RNG] :
( (wSB1ForwardDetected & wCommitData_Latched[`COMMIT_WE_X])? wCommitData_Latched[`X_RNG] : iSourceData1[`X_RNG]);
 
assign wSource1_Temp[`Y_RNG] = (wSB1FromInCommit & iResultBcast[`COMMIT_WE_Y]) ? iResultBcast[`COMMIT_Y_RNG] :
( (wSB1ForwardDetected & wCommitData_Latched[`COMMIT_WE_Y]) ? wCommitData_Latched[`Y_RNG] : iSourceData1[`Y_RNG]);
assign wSource1_Temp[`Z_RNG] = (wSB1FromInCommit & iResultBcast[`COMMIT_WE_Z]) ? iResultBcast[`COMMIT_Z_RNG] :
( (wSB1ForwardDetected & wCommitData_Latched[`COMMIT_WE_Z]) ? wCommitData_Latched[`Z_RNG] : iSourceData1[`Z_RNG]);
 
assign wSource0_Temp[`X_RNG] = (wSB0FromInCommit & iResultBcast[`COMMIT_WE_X]) ? iResultBcast[`COMMIT_X_RNG]:
( (wSB0ForwardDetected & & wCommitData_Latched[`COMMIT_WE_X] )? wCommitData_Latched[`X_RNG]:iSourceData0[`X_RNG]);
assign wSource0_Temp[`Y_RNG] = (wSB0FromInCommit & iResultBcast[`COMMIT_WE_Y]) ? iResultBcast[`COMMIT_Y_RNG]:
( (wSB0ForwardDetected & & wCommitData_Latched[`COMMIT_WE_Y])? wCommitData_Latched[`Y_RNG] : iSourceData0[`Y_RNG]);
 
assign wSource0_Temp[`Z_RNG] = (wSB0FromInCommit & iResultBcast[`COMMIT_WE_Z]) ? iResultBcast[`COMMIT_Z_RNG]:
( (wSB0ForwardDetected & & wCommitData_Latched[`COMMIT_WE_Z])? wCommitData_Latched[`Z_RNG] : iSourceData0[`Z_RNG]);
 
 
 
//If the data we are looking for just arrived at iResultBcast the use that
//other wise used the data from the Register file or the Immediate values
//assign wSourceData1 = (iInstruction0[`INST_IMM]) ? wImmValue : wSource1_Temp;
//assign wSourceData0 = (iInstruction0[`INST_IMM] && iInstruction0[`INST_DEST_ZERO]) ? `DATA_ROW_WIDTH'd0 : wSource0_Temp;
 
 
assign wSourceData1 = (iInstruction0[`INST_IMM]) ? wSourceData1_Imm : wSource1_Temp;
assign wSourceData0 = (iInstruction0[`INST_IMM]) ? wSourceData0_Imm : wSource0_Temp;
//assign wSourceData0 = (iInstruction0[`INST_IMM] && iInstruction0[`INST_DEST_ZERO]) ? `DATA_ROW_WIDTH'd0 : wSource0_Temp;
 
MUXFULLPARALELL_3SEL_GENERIC # ( `DATA_ROW_WIDTH ) SRC1MUX
(
.Sel({iInstruction0[`INST_DEST_ZERO],iInstruction0[`INST_SRC1_DISPLACED],iInstruction0[`INST_SRC0_DISPLACED]}),
.I1(wImmValue),
.I2(wImmValue),
.I3(wSource1_Temp),
.I4( `DATA_ROW_WIDTH'b0),
.I5( wImmValue ),
.I6( wImmValue ),
.I7( wSource1_Temp ),
.I8( wSource1_Temp ),
.O1(wSourceData1_Imm)
);
 
 
MUXFULLPARALELL_3SEL_GENERIC # ( `DATA_ROW_WIDTH ) SRC0MUX
(
.Sel({iInstruction0[`INST_DEST_ZERO],iInstruction0[`INST_SRC1_DISPLACED],iInstruction0[`INST_SRC0_DISPLACED]}),
.I1( wSource0_Temp ),
.I2( wSource0_Temp ),
.I3( wSource0_Temp ),
.I4( wSource0_Temp ),
.I5( `DATA_ROW_WIDTH'b0 ),
.I6( `DATA_ROW_WIDTH'b0 ),
.I7( `DATA_ROW_WIDTH'b0 ),
.I8( wSource0_Temp ),
.O1( wSourceData0_Imm )
);
 
 
assign wReservationStationBusy = (~iEnable) |
(
((iInstruction0[`INST_CODE_RNG] == `OPERATION_ADD ) && (iRStationBusy[ 0 ] && iRStationBusy[ 1 ])) ||
((iInstruction0[`INST_CODE_RNG] == `OPERATION_DIV ) && iRStationBusy[ 2 ]) ||
((iInstruction0[`INST_CODE_RNG] == `OPERATION_MUL ) && iRStationBusy[ 3 ]) ||
((iInstruction0[`INST_CODE_RNG] == `OPERATION_OUT ) && iRStationBusy[ 6 ])
);
 
assign wBranchWithDependency = (iInstruction0[`INST_BRANCH_BIT] && (wSource0_Station != 0 || wSource1_Station != 0));
 
 
assign wOp = iInstruction0[`INST_CODE_RNG];
 
assign wReservationStation[0] =
(wOp[0] & ~wOp[1] & ~wOp[2] & ~wOp[3] & ~iRStationBusy[ 0 ]) |
(~wOp[0] & wOp[1] & ~wOp[2] & ~wOp[3] & ~iRStationBusy[ 2 ]) |
(~wOp[0] & ~wOp[1] & wOp[2] & ~wOp[3] & ~iRStationBusy[ 4 ]) |
(~wOp[0] & wOp[1] & wOp[2] & ~wOp[3] & ~iRStationBusy[ 6 ]);
 
assign wReservationStation[1] =
(~wOp[0] & wOp[1] & wOp[2] & ~wOp[3] & ~iRStationBusy[ 6 ]) |
(wOp[0] & ~wOp[1] & wOp[2] & ~wOp[3] & ~iRStationBusy[5] ) |
(wOp[0] & ~wOp[1] & ~wOp[2] & ~wOp[3] & iRStationBusy[ 0 ] & ~iRStationBusy[1]) |
(~wOp[0] & wOp[1] & ~wOp[2] & ~wOp[3] & ~iRStationBusy[ 2 ]);
 
 
assign wReservationStation[2] =
(~wOp[0] & wOp[1] & wOp[2] & ~wOp[3] & ~iRStationBusy[ 6 ]) |
(wOp[0] & ~wOp[1] & wOp[2] & ~wOp[3] & ~iRStationBusy[5]) |
(wOp[0] & wOp[1] & ~wOp[2] & ~wOp[3] & ~iRStationBusy[3]) |
(~wOp[0] & ~wOp[1] & wOp[2] & ~wOp[3] & ~iRStationBusy[ 4 ]);
 
assign wReservationStation[3] = 1'b0;
 
//Sign control logic.
//Only works for non literal opeations (INST_IMM == 0)
wire [`ISSUE_SRCTAG_SIZE-1:0] wIssueTag0,wIssueTag1;
 
assign wIssueTag0 = (iInstruction0[`INST_IMM]) ? `ISSUE_SRCTAG_SIZE'b0 : {iInstruction0[`INST_SRC0_SIGN_RNG],iInstruction0[`INST_SRC0_SWZL_RNG] };
assign wIssueTag1 = (iInstruction0[`INST_IMM]) ? `ISSUE_SRCTAG_SIZE'b0 : {iInstruction0[`INST_SRC1_SIGN_RNG],iInstruction0[`INST_SCR1_SWZL_RNG] };
wire [`DATA_ADDRESS_WIDTH -1:0] wDestinationIndex;
 
 
`ifdef ADDRESSING_MODES_DISABLED
assign wDestinationIndex = iInstruction0[`INST_DST_RNG];
`else
 
wire [`DATA_ADDRESS_WIDTH -1:0] wDestIndexDisplaced,wDestinationIndex_NoIMM,wDestinationIndex_IMM;
 
assign wDestIndexDisplaced = (iInstruction0[`INST_DST_RNG] + iFrameOffset);
assign wDestinationIndex_NoIMM = (iInstruction0[`INST_DEST_ZERO]) ? wDestIndexDisplaced : iInstruction0[`INST_DST_RNG];
 
 
MUXFULLPARALELL_3SEL_GENERIC # ( `DATA_ADDRESS_WIDTH ) DSTMUX
(
.Sel({iInstruction0[`INST_DEST_ZERO],iInstruction0[`INST_SRC1_DISPLACED],iInstruction0[`INST_SRC0_DISPLACED]}),
.I1(iInstruction0[`INST_DST_RNG]),
.I2(wDestIndexDisplaced),
.I3(wDestIndexDisplaced),
.I4(wDestIndexDisplaced + wSource1_Temp[`X_RNG]),
.I5(iInstruction0[`INST_DST_RNG]),
.I6(wDestIndexDisplaced),
.I7(iInstruction0[`INST_DST_RNG]),
.I8(wDestIndexDisplaced),
.O1(wDestinationIndex_IMM)
);
 
 
assign wDestinationIndex = (iInstruction0[`INST_IMM]) ? wDestinationIndex_IMM : wDestinationIndex_NoIMM;
`endif
 
assign oIssueBcast = (Reset | ~rIssueNow | wStall ) ? `ISSUE_PACKET_SIZE'b0 :
{
wReservationStation,
wDestinationIndex,
iInstruction0[`INST_WE_RNG],
iInstruction0[`INST_SCOP_RNG],
wSource1_Station,
wIssueTag1,
wSourceData1,
wSource0_Station,
wIssueTag0,
wSourceData0
 
};
 
endmodule
/theia_gpu/branches/beta_2.0/rtl/Module_Sqrt_Station.v
0,0 → 1,83
`include "aDefinitions.v"
 
 
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2012 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
 
module SQRT_STATION
(
input wire Clock,
input wire Reset,
input wire [`MOD_ISSUE_PACKET_SIZE-1:0] iIssueBus,
input wire [`MOD_COMMIT_PACKET_SIZE-1:0] iCommitBus,
input wire [3:0] iId,
output wire [`COMMIT_PACKET_SIZE-1:0] oCommitData,
output wire oCommitResquest,
input wire iCommitGranted,
output wire oBusy
);
 
wire wExeDone;
wire [2:0] wExeDoneTmp;
wire wRS1_2_ADD_Trigger;
wire [`DATA_ROW_WIDTH-1:0] wRS1_OperandA;
wire [`DATA_ROW_WIDTH-1:0] wRS1_OperandB;
wire [`WIDTH-1:0] wResult;
 
ReservationStation_1Cycle RS
(
.Clock( Clock ),
.Reset( Reset ),
.iIssueBus( iIssueBus ),
.iCommitBus( iCommitBus ),
.iMyId( iId ),
.iExecutionDone( wExeDone ),
.iResult( {wResult,wResult,wResult} ),
.iCommitGranted( iCommitGranted ),
.oSource1( wRS1_OperandA ),
.oSource0( wRS1_OperandB ),
.oBusy( oBusy ),
.oTrigger( wRS1_2_ADD_Trigger ),
.oCommitRequest( oCommitResquest ),
.oId( oCommitData[`COMMIT_RSID_RNG] ),
.oWE( oCommitData[`COMMIT_WE_RNG] ),
.oDestination( oCommitData[`COMMIT_DST_RNG] ),
.oResult( {oCommitData[`X_RNG],oCommitData[`Y_RNG],oCommitData[`Z_RNG]})
);
 
 
 
 
FixedPointSquareRoot SQRT
(
.Clock( Clock ),
.Reset( Reset ),
.Operand( wRS1_OperandA[`X_RNG] ),
.iInputReady( wRS1_2_ADD_Trigger ),
.OutputReady(wExeDone ),
.Result( wResult )
);
 
 
endmodule
/theia_gpu/branches/beta_2.0/rtl/Unit_Execution.v
0,0 → 1,382
 
`include "aDefinitions.v"
 
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2012 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
module Unit_Execution
(
input wire Clock,
input wire Reset,
input wire iEnable,
input wire [`INSTRUCTION_ADDR_WIDTH-1:0] iInstructionMem_WriteAddress,
input wire iInstructionMem_WriteEnable,
input wire [`INSTRUCTION_WIDTH-1:0] iInstructionMem_WriteData,
output wire [`DATA_ROW_WIDTH-1:0] oOMEMWriteAddress,
output wire [`DATA_ROW_WIDTH-1:0] oOMEMWriteData,
output wire oOMEMWriteEnable
 
);
 
wire [`INSTRUCTION_ADDR_WIDTH -1:0] wII_2_IM_IP0;
wire [`INSTRUCTION_ADDR_WIDTH -1:0] wII_2_IM_IP1;
wire [`INSTRUCTION_WIDTH-1:0] wIM_2_II_Instruction0;
wire [`INSTRUCTION_WIDTH-1:0] wIM_2_II_Instruction1;
wire [`DATA_ADDRESS_WIDTH-1:0] wII_2_RF_Addr0;
wire [`DATA_ADDRESS_WIDTH-1:0] wII_2_RF_Addr1;
wire [`DATA_ROW_WIDTH-1:0] wRF_2_II_Data0;
wire [`DATA_ROW_WIDTH-1:0] wRF_2_II_Data1;
wire [`NUMBER_OF_RSVR_STATIONS-1:0] wRS_2_II_Busy;
wire [`ISSUE_PACKET_SIZE-1:0] wIssueBus,wModIssue;
wire [`NUMBER_OF_RSVR_STATIONS-1:0] wStationCommitRequest;
wire [`NUMBER_OF_RSVR_STATIONS-1:0] wStationCommitGrant;
wire [`COMMIT_PACKET_SIZE-1:0] wCommitBus;
wire [`MOD_COMMIT_PACKET_SIZE-1:0] wModCommitBus;
wire [`COMMIT_PACKET_SIZE-1:0] wCommitData_Adder0;
wire [`COMMIT_PACKET_SIZE-1:0] wCommitData_Adder1;
wire [`COMMIT_PACKET_SIZE-1:0] wCommitData_Div;
wire [`COMMIT_PACKET_SIZE-1:0] wCommitData_Mul;
wire [`COMMIT_PACKET_SIZE-1:0] wCommitData_Sqrt;
wire [`COMMIT_PACKET_SIZE-1:0] wCommitData_Logic;
wire [`COMMIT_PACKET_SIZE-1:0] wCommitData_IO;
wire wZeroFlag;
wire wSignFlag;
wire [`DATA_ADDRESS_WIDTH-1:0] wFrameOffset,wIndexRegister;
wire [`WIDTH-1:0] wThreadControl;
 
// The Register File
RegisterFile # ( `DATA_ROW_WIDTH,`DATA_ADDRESS_WIDTH ) RF
(
.Clock( Clock ),
.Reset( Reset ),
.iWriteEnable( wCommitBus[`COMMIT_WE_RNG] ),
.iReadAddress0( wII_2_RF_Addr0 ),
.iReadAddress1( wII_2_RF_Addr1 ),
.iWriteAddress( wCommitBus[`COMMIT_DST_RNG] ),
.oFrameOffset( wFrameOffset ),
.oIndexRegister( wIndexRegister ),
.oThreadControlRegister( wThreadControl ),
.iData( wCommitBus[`COMMIT_DATA_RNG] ),
.oData0( wRF_2_II_Data0 ),
.oData1( wRF_2_II_Data1 )
);
 
 
 
 
//Code bank 0
RAM_DUAL_READ_PORT # (`INSTRUCTION_WIDTH, `INSTRUCTION_ADDR_WIDTH) IM
(
.Clock( Clock ),
.iWriteEnable( iInstructionMem_WriteEnable ),
.iReadAddress0( wII0_IP0 ),
.iReadAddress1( wII1_IP0 ),
.iWriteAddress( iInstructionMem_WriteAddress ),
.iDataIn( iInstructionMem_WriteData ),
.oDataOut0( wInstrThread0 ),
.oDataOut1( wInstrThread1 )
);
 
 
//**********************************************
parameter MaxThreads = 3;
wire [MaxThreads-1:0] wDelay;
 
 
UPCOUNTER_POSEDGE # (MaxThreads) UP111
(
.Clock( Clock), .Reset( Reset),
.Initial(0),
.Enable(1'b1),
.Q(wDelay)
);
 
wire [`INSTRUCTION_ADDR_WIDTH -1:0] wII0_IP0,wII0_IP1;
wire [`INSTRUCTION_ADDR_WIDTH -1:0] wII1_IP0,wII1_IP1;
wire [`DATA_ADDRESS_WIDTH-1:0] wII0_RF_Addr0,wII0_RF_Addr1;
wire [`DATA_ADDRESS_WIDTH-1:0] wII1_RF_Addr0,wII1_RF_Addr1;
wire [`ISSUE_PACKET_SIZE-1:0] wII0_IBus,wII1_IBus;
 
 
assign wII_2_RF_Addr0 = (wCurrentActiveThread[0]) ? wII0_RF_Addr0 : wII1_RF_Addr0;
 
assign wII_2_RF_Addr1 = (wCurrentActiveThread[0]) ? wII0_RF_Addr1 : wII1_RF_Addr1;
 
assign wIssueBus = (wCurrentActiveThread[0]) ? wII0_IBus: wII1_IBus;
 
 
wire [`MAX_THREADS-1:0] wCurrentActiveThread,wCurrentActiveThread_Pre,wCurrentActiveThread_Pre2;
 
CIRCULAR_SHIFTLEFT_POSEDGE_EX # ( `MAX_THREADS ) THREAD_SELECT
(
.Clock( Clock ),
.Reset( Reset ),
.Initial(`MAX_THREADS'b1),
.Enable( wDelay[0] /*& wDelay[1]*/ & wThreadControl[`SPR_TCONTROL0_MT_ENABLED]),
.O( wCurrentActiveThread_Pre )
);
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `MAX_THREADS ) FFD12
( Clock, Reset, 1'b1 , wCurrentActiveThread_Pre , wCurrentActiveThread_Pre2 );
 
assign wCurrentActiveThread = (wThreadControl[`SPR_TCONTROL0_MT_ENABLED]) ? wCurrentActiveThread_Pre2 : `MAX_THREADS'b1;
 
 
//**********************************************
wire [`INSTRUCTION_WIDTH-1:0] wInstrThread0;
//When the thread is inactive I want to keep this input just the way it was,
//sort of "time freezing"...
 
 
 
InstructionIssue II0
(
.Clock( Clock ),
.Reset( Reset ),
.iEnable( wCurrentActiveThread[0] & iEnable),
.iFrameOffset( wFrameOffset ),
/* New Apr 06*/.iCodeOffset( `INSTRUCTION_ADDR_WIDTH'b0 ),
.iMtEnabled(wThreadControl[`SPR_TCONTROL0_MT_ENABLED]),
.iIndexRegister( wIndexRegister ),
.iInstruction0( wInstrThread0 ),
// .iInstruction1( wIM_2_II_Instruction1 ),
.iSourceData0( wRF_2_II_Data0 ),
.iSourceData1( wRF_2_II_Data1 ),
.iRStationBusy( wRS_2_II_Busy ),
.iResultBcast( wCommitBus ),
.iSignFlag( wSignFlag ),
.iZeroFlag( wZeroFlag ),
.iIgnoreResultBcast( wResultBCastDst[7] & wThreadControl[`SPR_TCONTROL0_MT_ENABLED] ),
.oSourceAddress0( wII0_RF_Addr0 ),//wII_2_RF_Addr0 ),
.oSourceAddress1( wII0_RF_Addr1 ),//wII_2_RF_Addr1 ),
.oIssueBcast( wII0_IBus ),//wIssueBus ),
.oIP0( wII0_IP0 )//wII_2_IM_IP0 ),
//.oIP1( wII0_IP1 )//wII_2_IM_IP1 )
);
 
 
 
wire [`INSTRUCTION_WIDTH-1:0] wInstrThread1;
//When the thread is inactive I want to keep this input just the way it was,
//sort of "time freezing"...
 
//Add the offset to the thread instructions... 1 16 bit adder wasted :(
//assign wInstrThread1 = wInstrThread1_Pre;
 
wire [`DATA_ADDRESS_WIDTH-1:0] wResultBCastDst;
assign wResultBCastDst = wCommitBus[`COMMIT_DST_RNG];
 
 
InstructionIssue II1
(
.Clock( Clock ),
.Reset( Reset || ~wThreadControl[`SPR_TCONTROL0_MT_ENABLED] ),
.iEnable( wCurrentActiveThread[1] & iEnable ),
.iFrameOffset( wFrameOffset ),
.iCodeOffset( wThreadControl[`SPR_TCONTROL0_T0_INST_OFFSET_RNG] ),
.iMtEnabled( wThreadControl[`SPR_TCONTROL0_MT_ENABLED] ),
.iIndexRegister( wIndexRegister ),
.iInstruction0( wInstrThread1 ),
.iSourceData0( wRF_2_II_Data0 ),
.iSourceData1( wRF_2_II_Data1 ),
.iRStationBusy( wRS_2_II_Busy ),
.iResultBcast( wCommitBus ),
.iSignFlag( wSignFlag ),
.iZeroFlag( wZeroFlag ),
.iIgnoreResultBcast( ~wResultBCastDst[7] ),
.oSourceAddress0( wII1_RF_Addr0 ),
.oSourceAddress1( wII1_RF_Addr1 ),
.oIssueBcast( wII1_IBus ),
.oIP0( wII1_IP0 )
//.oIP1( wII1_IP1 )
);
 
 
 
OperandModifiers SMU
(
.Clock( Clock ),
.Reset( Reset ),
.iIssueBus( wIssueBus ),
.iCommitBus( wCommitBus ),
.oModIssue( wModIssue ),
.oCommitBus( wModCommitBus )
);
 
assign wSignFlag = wCommitBus[`COMMIT_SIGN_X] & wCommitBus[`COMMIT_SIGN_Y] & wCommitBus[`COMMIT_SIGN_Z];
assign wZeroFlag = (wCommitBus[`COMMIT_DATA_RNG] == `DATA_ROW_WIDTH'b0) ? 1'b1 : 1'b0;
 
 
ADDER_STATION ADD_STA0
(
.Clock( Clock ),
.Reset( Reset ),
.iId( `RS_ADD0 ),
.iIssueBus( wModIssue ),
.iCommitBus( wModCommitBus ),
.oCommitData( wCommitData_Adder0 ),
.oCommitResquest( wStationCommitRequest[0] ),
.iCommitGranted( wStationCommitGrant[0] ),
.oBusy( wRS_2_II_Busy[ 0 ] )
);
 
ADDER_STATION ADD_STA1
(
.Clock( Clock ),
.Reset( Reset ),
.iId( `RS_ADD1 ),
.iIssueBus( wModIssue ),
.iCommitBus( wModCommitBus ),
.oCommitData( wCommitData_Adder1 ),
.oCommitResquest( wStationCommitRequest[1] ),
.iCommitGranted( wStationCommitGrant[1] ),
.oBusy( wRS_2_II_Busy[ 1 ] )
);
 
 
DIVISION_STATION DIV_STA
(
.Clock( Clock ),
.Reset( Reset ),
.iId( `RS_DIV ),
.iIssueBus( wModIssue ),
.iCommitBus( wModCommitBus ),
.oCommitData( wCommitData_Div ),
.oCommitResquest( wStationCommitRequest[2] ),
.iCommitGranted( wStationCommitGrant[2] ),
.oBusy( wRS_2_II_Busy[2] )
);
 
 
MUL_STATION MUL_STA
(
.Clock( Clock ),
.Reset( Reset ),
.iId( `RS_MUL ),
.iIssueBus( wModIssue ),
.iCommitBus( wModCommitBus ),
.oCommitData( wCommitData_Mul ),
.oCommitResquest( wStationCommitRequest[3] ),
.iCommitGranted( wStationCommitGrant[3] ),
.oBusy( wRS_2_II_Busy[3] )
);
 
 
SQRT_STATION SQRT_STA
(
.Clock( Clock ),
.Reset( Reset ),
.iId( `RS_SQRT ),
.iIssueBus( wModIssue ),
.iCommitBus( wModCommitBus ),
.oCommitData( wCommitData_Sqrt ),
.oCommitResquest( wStationCommitRequest[4] ),
.iCommitGranted( wStationCommitGrant[4] ),
.oBusy( wRS_2_II_Busy[4] )
);
 
 
 
LOGIC_STATION LOGIC_STA
(
.Clock( Clock ),
.Reset( Reset ),
.iId( `RS_LOGIC ),
.iIssueBus( wModIssue ),
.iCommitBus( wModCommitBus ),
.oCommitData( wCommitData_Logic ),
.oCommitResquest( wStationCommitRequest[5] ),
.iCommitGranted( wStationCommitGrant[5] ),
.oBusy( wRS_2_II_Busy[5] )
);
 
IO_STATION IO_STA
(
.Clock( Clock ),
.Reset( Reset ),
.iId( `RS_IO ),
.iIssueBus( wModIssue ),
.iCommitBus( wModCommitBus ),
.oCommitData( wCommitData_IO ),
.oCommitResquest( wStationCommitRequest[6] ),
.iCommitGranted( wStationCommitGrant[6] ),
.oBusy( wRS_2_II_Busy[6] ),
.oOMEMWriteAddress( oOMEMWriteAddress ),
.oOMEMWriteData( oOMEMWriteData ),
.oOMEMWriteEnable( oOMEMWriteEnable )
);
 
ROUND_ROBIN_7_ENTRIES ARB
//ROUND_ROBIN_6_ENTRIES ARB
(
.Clock( Clock ),
.Reset( Reset ),
.iRequest0( wStationCommitRequest[0] ),
.iRequest1( wStationCommitRequest[1] ),
.iRequest2( wStationCommitRequest[2] ),
.iRequest3( wStationCommitRequest[3] ),
.iRequest4( wStationCommitRequest[4] ),
.iRequest5( wStationCommitRequest[5] ),
.iRequest6( wStationCommitRequest[6] ),
.oGrant0( wStationCommitGrant[0] ),
.oGrant1( wStationCommitGrant[1] ),
.oGrant2( wStationCommitGrant[2] ),
.oGrant3( wStationCommitGrant[3] ),
.oGrant4( wStationCommitGrant[4] ),
.oGrant5( wStationCommitGrant[5] ),
.oGrant6( wStationCommitGrant[6] )
 
);
 
 
wire[3:0] wBusSelector;
DECODER_ONEHOT_2_BINARY DECODER
(
.iIn( wStationCommitGrant ),
.oOut( wBusSelector )
);
 
 
MUXFULLPARALELL_3SEL_GENERIC # (`COMMIT_PACKET_SIZE ) MUX
(
.Sel(wBusSelector),
.I1(`COMMIT_PACKET_SIZE'b0),
.I2(wCommitData_Adder0),
.I3(wCommitData_Adder1),
.I4(wCommitData_Div),
.I5(wCommitData_Mul),
.I6(wCommitData_Sqrt),
.I7(wCommitData_Logic),
.O1(wCommitBus)
);
 
endmodule
/theia_gpu/branches/beta_2.0/rtl/Module_Logic_Station.v
0,0 → 1,151
`include "aDefinitions.v"
 
 
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2012 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
 
module LOGIC_STATION
(
input wire Clock,
input wire Reset,
input wire [`MOD_ISSUE_PACKET_SIZE-1:0] iIssueBus,
input wire [`MOD_COMMIT_PACKET_SIZE-1:0] iCommitBus,
input wire [3:0] iId,
output wire [`COMMIT_PACKET_SIZE-1:0] oCommitData,
output wire oCommitResquest,
input wire iCommitGranted,
output wire oBusy
);
 
wire wExeDone;
wire [2:0] wExeDoneTmp;
wire wRS1_2_ADD_Trigger;
wire [`DATA_ROW_WIDTH-1:0] wRS1_OperandA;
wire [`DATA_ROW_WIDTH-1:0] wRS1_OperandB;
wire [`DATA_ROW_WIDTH-1:0] wAND,wOR,wResult;
wire [1:0] wResultSelector;
 
ReservationStation_1Cycle RS
(
.Clock( Clock ),
.Reset( Reset ),
.iIssueBus( iIssueBus ),
.iCommitBus( iCommitBus ),
.iMyId( iId ),
.iExecutionDone( wExeDone ),
.iResult( wResult ),
.iCommitGranted( iCommitGranted ),
.oSource1( wRS1_OperandA ),
.oSource0( wRS1_OperandB ),
.oBusy( oBusy ),
.oTrigger( wRS1_2_ADD_Trigger ),
.oCommitRequest( oCommitResquest ),
.oId( oCommitData[`COMMIT_RSID_RNG] ),
.oWE( oCommitData[`COMMIT_WE_RNG] ),
.oDestination( oCommitData[`COMMIT_DST_RNG] ),
.oScale(wResultSelector),
.oResult( {oCommitData[`X_RNG],oCommitData[`Y_RNG],oCommitData[`Z_RNG]})
);
 
 
MUXFULLPARALELL_2SEL_GENERIC # ( `DATA_ROW_WIDTH ) MUX1
(
.Sel( wResultSelector ),
.I1( wAND ),
.I2( wOR ),
.I3(`DATA_ROW_WIDTH'b0),
.I4(`DATA_ROW_WIDTH'b0),
.O1(wResult)
);
 
assign wExeDone = wExeDoneTmp[0] & wExeDoneTmp[1] & wExeDoneTmp[2];
 
//TODO: For now I am only supporting AND, eventually you will have to use the MOD_ISSUE_SCALE_RNG
//to select between AND, OR, NOT, etc.
AND # (`WIDTH) AND_0
(
.Clock( Clock ),
.Reset( Reset ),
.iTrigger( wRS1_2_ADD_Trigger ),
.iA( wRS1_OperandA[`X_RNG] ),
.iB( wRS1_OperandB[`X_RNG] ),
.oDone( wExeDoneTmp[0] ),
.oR( wAND[`X_RNG] )
);
 
AND # (`WIDTH) AND_1
(
.Clock( Clock ),
.Reset( Reset ),
.iTrigger( wRS1_2_ADD_Trigger ),
.iA( wRS1_OperandA[`Y_RNG] ),
.iB( wRS1_OperandB[`Y_RNG] ),
.oDone( wExeDoneTmp[1] ),
.oR( wAND[`Y_RNG] )
);
 
AND # (`WIDTH) AND_2
(
.Clock( Clock ),
.Reset( Reset ),
.iTrigger( wRS1_2_ADD_Trigger ),
.iA( wRS1_OperandA[`Z_RNG] ),
.iB( wRS1_OperandB[`Z_RNG] ),
.oDone( wExeDoneTmp[2] ),
.oR( wAND[`Z_RNG] )
);
 
OR # (`WIDTH) OR_0
(
.Clock( Clock ),
.Reset( Reset ),
.iTrigger( wRS1_2_ADD_Trigger ),
.iA( wRS1_OperandA[`X_RNG] ),
.iB( wRS1_OperandB[`X_RNG] ),
.oDone( wExeDoneTmp[0] ),
.oR( wOR[`X_RNG] )
);
 
OR # (`WIDTH) OR_1
(
.Clock( Clock ),
.Reset( Reset ),
.iTrigger( wRS1_2_ADD_Trigger ),
.iA( wRS1_OperandA[`Y_RNG] ),
.iB( wRS1_OperandB[`Y_RNG] ),
.oDone( wExeDoneTmp[1] ),
.oR( wOR[`Y_RNG] )
);
 
OR # (`WIDTH) OR_2
(
.Clock( Clock ),
.Reset( Reset ),
.iTrigger( wRS1_2_ADD_Trigger ),
.iA( wRS1_OperandA[`Z_RNG] ),
.iB( wRS1_OperandB[`Z_RNG] ),
.oDone( wExeDoneTmp[2] ),
.oR( wOR[`Z_RNG] )
);
endmodule
/theia_gpu/branches/beta_2.0/rtl/Module_FixedPointSquareRoot.v
0,0 → 1,212
`timescale 1ns / 1ps
`include "aDefinitions.v"
 
 
//LUT contains LUT for numbers up to 100 in FIXED point scale 17
module SQUAREROOT_LUT
(
input wire[`WIDTH-1:0] I,
output reg [`WIDTH-1:0] O
);
 
 
always @( I )
begin
case (I)
32'h0: O = 32'h0;
32'h20000: O = 32'h20000;
32'h40000: O = 32'h2d413;
32'h60000: O = 32'h376cf;
32'h80000: O = 32'h40000;
32'ha0000: O = 32'h478dd;
32'hc0000: O = 32'h4e623;
32'he0000: O = 32'h54a9f;
32'h100000: O = 32'h5a827;
32'h120000: O = 32'h60000;
32'h140000: O = 32'h65316;
32'h160000: O = 32'h6a21c;
32'h180000: O = 32'h6ed9e;
32'h1a0000: O = 32'h7360a;
32'h1c0000: O = 32'h77bba;
32'h1e0000: O = 32'h7bef7;
32'h200000: O = 32'h80000;
32'h220000: O = 32'h83f07;
32'h240000: O = 32'h87c3b;
32'h260000: O = 32'h8b7c1;
32'h280000: O = 32'h8f1bb;
32'h2a0000: O = 32'h92a47;
32'h2c0000: O = 32'h9617e;
32'h2e0000: O = 32'h99777;
32'h300000: O = 32'h9cc47;
32'h320000: O = 32'ha0000;
32'h340000: O = 32'ha32b2;
32'h360000: O = 32'ha646e;
32'h380000: O = 32'ha953f;
32'h3a0000: O = 32'hac534;
32'h3c0000: O = 32'haf456;
32'h3e0000: O = 32'hb22b2;
32'h400000: O = 32'hb504f;
32'h420000: O = 32'hb7d37;
32'h440000: O = 32'hba972;
32'h460000: O = 32'hbd508;
32'h480000: O = 32'hc0000;
32'h4a0000: O = 32'hc2a5f;
32'h4c0000: O = 32'hc542e;
32'h4e0000: O = 32'hc7d70;
32'h500000: O = 32'hca62c;
32'h520000: O = 32'hcce66;
32'h540000: O = 32'hcf623;
32'h560000: O = 32'hd1d68;
32'h580000: O = 32'hd4439;
32'h5a0000: O = 32'hd6a99;
32'h5c0000: O = 32'hd908d;
32'h5e0000: O = 32'hdb618;
32'h600000: O = 32'hddb3d;
32'h620000: O = 32'he0000;
32'h640000: O = 32'he2463;
32'h660000: O = 32'he4869;
32'h680000: O = 32'he6c15;
32'h6a0000: O = 32'he8f6a;
32'h6c0000: O = 32'heb26a;
32'h6e0000: O = 32'hed517;
32'h700000: O = 32'hef775;
32'h720000: O = 32'hf1983;
32'h740000: O = 32'hf3b46;
32'h760000: O = 32'hf5cbf;
32'h780000: O = 32'hf7def;
32'h7a0000: O = 32'hf9ed9;
32'h7c0000: O = 32'hfbf7d;
32'h7e0000: O = 32'hfdfdf;
32'h800000: O = 32'h100000;
32'h820000: O = 32'h101fe0;
32'h840000: O = 32'h103f81;
32'h860000: O = 32'h105ee6;
32'h880000: O = 32'h107e0f;
32'h8a0000: O = 32'h109cfd;
32'h8c0000: O = 32'h10bbb3;
32'h8e0000: O = 32'h10da30;
32'h900000: O = 32'h10f876;
32'h920000: O = 32'h111687;
32'h940000: O = 32'h113463;
32'h960000: O = 32'h11520c;
32'h980000: O = 32'h116f83;
32'h9a0000: O = 32'h118cc8;
32'h9c0000: O = 32'h11a9dc;
32'h9e0000: O = 32'h11c6c1;
32'ha00000: O = 32'h11e377;
32'ha20000: O = 32'h120000;
32'ha40000: O = 32'h121c5b;
32'ha60000: O = 32'h12388a;
32'ha80000: O = 32'h12548e;
32'haa0000: O = 32'h127068;
32'hac0000: O = 32'h128c17;
32'hae0000: O = 32'h12a79e;
32'hb00000: O = 32'h12c2fc;
32'hb20000: O = 32'h12de32;
32'hb40000: O = 32'h12f942;
32'hb60000: O = 32'h13142b;
32'hb80000: O = 32'h132eee;
32'hba0000: O = 32'h13498c;
32'hbc0000: O = 32'h136406;
32'hbe0000: O = 32'h137e5b;
32'hc00000: O = 32'h13988e;
32'hc20000: O = 32'h13b29d;
32'hc40000: O = 32'h13cc8a;
32'hc60000: O = 32'h13e655;
32'hc80000: O = 32'h140000;
32'hca0000: O = 32'h141989;
32'hcc0000: O = 32'h1432f2;
32'hce0000: O = 32'h144c3b;
32'hd00000: O = 32'h146565;
32'hd20000: O = 32'h147e70;
32'hd40000: O = 32'h14975c;
32'hd60000: O = 32'h14b02b;
32'hd80000: O = 32'h14c8dc;
32'hda0000: O = 32'h14e16f;
32'hdc0000: O = 32'h14f9e6;
32'hde0000: O = 32'h151241;
32'he00000: O = 32'h152a7f;
32'he20000: O = 32'h1542a2;
32'he40000: O = 32'h155aaa;
32'he60000: O = 32'h157296;
32'he80000: O = 32'h158a68;
32'hea0000: O = 32'h15a220;
32'hec0000: O = 32'h15b9be;
32'hee0000: O = 32'h15d142;
32'hf00000: O = 32'h15e8ad;
32'hf20000: O = 32'h160000;
32'hf40000: O = 32'h161739;
32'hf60000: O = 32'h162e5a;
32'hf80000: O = 32'h164564;
32'hfa0000: O = 32'h165c55;
32'hfc0000: O = 32'h16732f;
32'hfe0000: O = 32'h1689f2; //127 -> 1111111,00000000000000000
 
default:
begin
//$display("SQUARE ROOT SAYS: Shit, got %d\n",I << `SCALE);
O = 32'h00caca;
end
endcase
end //always
endmodule
 
module FixedPointSquareRoot
(
input wire Clock,
input wire Reset,
input wire[`LONG_WIDTH-1:0] Operand,
input wire iInputReady,
output wire OutputReady,
output wire [`WIDTH-1:0] Result
);
 
 
FFD_POSEDGE_SYNCRONOUS_RESET # (1) FFDelay1
(
.Clock( Clock ),
.Reset( Reset ),
.Enable(1'b1 ),
.D( iInputReady ),
.Q( OutputReady )
);
 
//LUT only has values from 0 to 127, lets see if the value is bigger than that
wire wNotInLUT;
assign wNotInLUT = Operand[7+`SCALE];
//If the value is not on the LUT then divide by 64, so SQRT(x) = SQRT(64*x/64)
//=16*SQRT(x/64)
 
wire[`WIDTH-1:0] wScaledOperand;
 
assign wScaledOperand = (wNotInLUT == 1'b0 ) ?
{Operand[`WIDTH-1:`SCALE],{`SCALE{1'b0}}} : //Aproximate the Square root to an integer value
{6'b0,Operand[`WIDTH-1:`SCALE+6],{`SCALE{1'b0}}}; //Shift right two bits (divide by 4)
 
wire [`WIDTH-1:0] wResult,wScaleResult;
SQUAREROOT_LUT SQRT
(
.I(wScaledOperand),
.O(wScaleResult)
 
);
 
 
 
assign wResult = (wNotInLUT == 1'b0 ) ? wScaleResult : {wScaleResult[`WIDTH-3:0],1'b0};
 
 
FFD_POSEDGE_SYNCRONOUS_RESET # (`WIDTH) FFRESULT
(
.Clock( Clock ),
.Reset( Reset ),
.Enable(1'b1 ),
.D( wResult ),
.Q( Result )
);
 
 
//--------------------------------------------------------------------------------
endmodule
 
/theia_gpu/branches/beta_2.0/rtl/Collaterals.v
0,0 → 1,1478
`ifndef COLLATERALS_V
`define COLLATERALS_V
 
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
 
//----------------------------------------------------
module FFD_POSEDGE_SYNCRONOUS_RESET # ( parameter SIZE=`WIDTH )
(
input wire Clock,
input wire Reset,
input wire Enable,
input wire [SIZE-1:0] D,
output reg [SIZE-1:0] Q
);
 
always @ (posedge Clock)
begin
if ( Reset )
Q <= {SIZE{1'b0}};
else
begin
if (Enable)
Q <= D;
end
end//always
 
endmodule
//------------------------------------------------
module PULSE
(
input wire Clock,
input wire Reset,
input wire Enable,
input wire D,
output wire Q
);
 
wire wDelay;
FFD_POSEDGE_SYNCRONOUS_RESET # (1) FFD (Clock,Reset,Enable,D,wDelay);
 
assign Q = (Enable) ? (D ^ wDelay) & D: 1'b0;
 
endmodule
//------------------------------------------------
module ADDER # (parameter SIZE=`WIDTH)
(
input wire Clock,
input wire Reset,
input wire iTrigger,
input wire [SIZE-1:0] iA,iB,
output wire [SIZE-1:0] oR,
output wire oDone
);
wire [SIZE-1:0] wR,wR_Delay;
assign wR = iA + iB;
 
FFD_POSEDGE_SYNCRONOUS_RESET # (1) FFD0 (Clock,Reset,1'b1,iTrigger,oDone);
 
FFD_POSEDGE_SYNCRONOUS_RESET # (SIZE) FFD (Clock,Reset,iTrigger,wR,wR_Delay);
assign oR = wR_Delay;
 
endmodule
//------------------------------------------------
module OR # (parameter SIZE=`WIDTH)
(
input wire Clock,
input wire Reset,
input wire iTrigger,
input wire [SIZE-1:0] iA,iB,
output wire [SIZE-1:0] oR,
output wire oDone
);
wire [SIZE-1:0] wR,wR_Delay;
assign wR = iA | iB;
 
FFD_POSEDGE_SYNCRONOUS_RESET # (1) FFD0 (Clock,Reset,1'b1,iTrigger,oDone);
 
 
FFD_POSEDGE_SYNCRONOUS_RESET # (SIZE) FFD (Clock,Reset,iTrigger,wR,wR_Delay);
assign oR = wR_Delay;
 
endmodule
//------------------------------------------------
module AND # (parameter SIZE=`WIDTH)
(
input wire Clock,
input wire Reset,
input wire iTrigger,
input wire [SIZE-1:0] iA,iB,
output wire [SIZE-1:0] oR,
output wire oDone
);
wire [SIZE-1:0] wR,wR_Delay;
assign wR = iA & iB;
 
FFD_POSEDGE_SYNCRONOUS_RESET # (1) FFD0 (Clock,Reset,1'b1,iTrigger,oDone);
 
 
FFD_POSEDGE_SYNCRONOUS_RESET # (SIZE) FFD (Clock,Reset,iTrigger,wR,wR_Delay);
assign oR = wR_Delay;
 
endmodule
//------------------------------------------------
module UPCOUNTER_POSEDGE # (parameter SIZE=`WIDTH)
(
input wire Clock, Reset,
input wire [SIZE-1:0] Initial,
input wire Enable,
output reg [SIZE-1:0] Q
);
 
 
always @(posedge Clock )
begin
if (Reset)
Q <= Initial;
else
begin
if (Enable)
Q <= Q + 1;
end
end
 
endmodule
 
//----------------------------------------------------------------------
module DECODER_ONEHOT_2_BINARY
(
input wire [5:0] iIn,
output reg[5:0] oOut
);
 
always @ (*)
begin
case (iIn)
6'b000000: oOut = 0;
6'b000001: oOut = 1;
6'b000010: oOut = 2;
6'b000100: oOut = 3;
6'b001000: oOut = 4;
6'b010000: oOut = 5;
6'b100000: oOut = 6;
default:
oOut = 0;
endcase
end
endmodule
//----------------------------------------------------------------------
 
module SELECT_1_TO_N # ( parameter SEL_WIDTH=4, parameter OUTPUT_WIDTH=16 )
(
input wire [SEL_WIDTH-1:0] Sel,
input wire En,
output wire [OUTPUT_WIDTH-1:0] O
);
 
reg[OUTPUT_WIDTH-1:0] shift;
 
always @ ( * )
begin
if (~En)
shift = 1;
else
shift = (1 << Sel);
 
 
end
 
assign O = ( ~En ) ? 0 : shift ;
 
//assign O = En & (1 << Sel);
 
endmodule
 
//----------------------------------------------------------------------
module MUXFULLPARALELL_GENERIC #(parameter WIDTH = `WIDTH, parameter CHANNELS = 4, parameter SELBITS = 2)
(
 
input wire [(CHANNELS*WIDTH)-1:0] in_bus,
input wire [SELBITS-1:0] sel,
 
output wire [WIDTH-1:0] out
);
 
genvar ig;
wire [WIDTH-1:0] input_array [0:CHANNELS-1];
 
assign out = input_array[sel];
 
generate
for(ig=0; ig<CHANNELS; ig=ig+1)
begin: array_assignments
assign input_array[ig] = in_bus[(ig*WIDTH)+:WIDTH];
end
endgenerate
 
 
 
endmodule
//----------------------------------------------------------------------
module MUXFULLPARALELL_2SEL_GENERIC # ( parameter SIZE=`WIDTH )
(
input wire [1:0] Sel,
input wire [SIZE-1:0]I1, I2, I3,I4,
output reg [SIZE-1:0] O1
);
 
always @( * )
 
begin
 
case (Sel)
 
2'b00: O1 = I1;
2'b01: O1 = I2;
2'b10: O1 = I3;
2'b11: O1 = I4;
default: O1 = SIZE;
 
endcase
 
end
 
endmodule
//------------------------------------------------------------------------
module MUXFULLPARALELL_3SEL_GENERIC # ( parameter SIZE=`WIDTH )
(
input wire [2:0] Sel,
input wire [SIZE-1:0]I1, I2, I3,I4,I5,I6,I7,I8,
output reg [SIZE-1:0] O1
);
 
always @( * )
 
begin
 
case (Sel)
 
3'b000: O1 = I1;
3'b001: O1 = I2;
3'b010: O1 = I3;
3'b011: O1 = I4;
3'b100: O1 = I5;
3'b101: O1 = I6;
3'b110: O1 = I7;
3'b111: O1 = I8;
default: O1 = SIZE;
 
endcase
 
end
 
endmodule
//------------------------------------------------------------------------
module CIRCULAR_SHIFTLEFT_POSEDGE_EX # ( parameter SIZE=`WIDTH )
( input wire Clock,
input wire Reset,
input wire[SIZE-1:0] Initial,
input wire Enable,
output wire[SIZE-1:0] O
);
 
reg [SIZE-1:0] tmp;
 
 
always @(posedge Clock)
begin
if (Reset)
tmp <= Initial;
else
begin
if (Enable)
begin
if (tmp[SIZE-1])
begin
tmp <= Initial;
end
else
begin
tmp <= tmp << 1;
end
end
end
end
assign O = tmp;
endmodule
//------------------------------------------------
module MUXFULLPARALELL_3SEL_WALKINGONE # ( parameter SIZE=`WIDTH )
(
input wire [2:0] Sel,
input wire [SIZE-1:0]I1, I2, I3,
output reg [SIZE-1:0] O1
);
 
always @( * )
 
begin
 
case (Sel)
 
3'b001: O1 = I1;
3'b010: O1 = I2;
3'b100: O1 = I3;
default: O1 = SIZE;
 
endcase
 
end
 
endmodule
//------------------------------------------------
module MUXFULLPARALELL_3SEL_EN # ( parameter SIZE=`WIDTH )
(
input wire [1:0] SEL,
input wire [SIZE-1:0]I1, I2, I3,
input wire EN,
output reg [SIZE-1:0] O1
);
 
always @( * )
 
begin
if (EN)
begin
case (SEL)
 
2'b00: O1 = I1;
2'b01: O1 = I2;
2'b10: O1 = I3;
default: O1 = SIZE;
 
endcase
end
else
begin
O1 = I1;
end
end
 
endmodule
//------------------------------------------------
module MUXFULLPARALELL_4SEL_WALKINGONE # ( parameter SIZE=`WIDTH )
(
input wire [2:0] Sel,
input wire [SIZE-1:0]I1, I2, I3, I4,
output reg [SIZE-1:0] O1
);
 
always @( * )
 
begin
 
case (Sel)
 
4'b0001: O1 = I1;
4'b0010: O1 = I2;
4'b0100: O1 = I3;
4'b1000: O1 = I4;
default: O1 = SIZE;
 
endcase
 
end
 
endmodule
//------------------------------------------------
module SHIFTLEFT_POSEDGE # ( parameter SIZE=`WIDTH )
( input wire Clock,
input wire Reset,
input wire[SIZE-1:0] Initial,
input wire Enable,
output wire[SIZE-1:0] O
);
 
reg [SIZE-1:0] tmp;
 
 
always @(posedge Clock)
begin
if (Reset)
tmp <= Initial;
else
begin
if (Enable)
tmp <= tmp << 1;
end
end
assign O = tmp;
endmodule
//------------------------------------------------
//------------------------------------------------
module CIRCULAR_SHIFTLEFT_POSEDGE # ( parameter SIZE=`WIDTH )
( input wire Clock,
input wire Reset,
input wire[SIZE-1:0] Initial,
input wire Enable,
output wire[SIZE-1:0] O
);
 
reg [SIZE-1:0] tmp;
 
 
always @(posedge Clock)
begin
if (Reset || tmp[SIZE-1])
tmp <= Initial;
else
begin
if (Enable)
tmp <= tmp << 1;
end
end
assign O = tmp;
endmodule
//-----------------------------------------------------------
/*
Sorry forgot how this flop is called.
Any way Truth table is this
Q S Q_next R
0 0 0 0
0 1 1 0
1 0 1 0
1 1 1 0
X X 0 1
The idea is that it toggles from 0 to 1 when S = 1, but if it
gets another S = 1, it keeps the output to 1.
*/
module FFToggleOnce_1Bit
(
input wire Clock,
input wire Reset,
input wire Enable,
input wire S,
output reg Q
);
 
 
reg Q_next;
 
always @ (negedge Clock)
begin
Q <= Q_next;
end
 
always @ ( posedge Clock )
begin
if (Reset)
Q_next <= 0;
else if (Enable)
Q_next <= (S && !Q) || Q;
else
Q_next <= Q;
end
endmodule
 
//-----------------------------------------------------------
 
 
module FFD32_POSEDGE
(
input wire Clock,
input wire[31:0] D,
output reg[31:0] Q
);
always @ (posedge Clock)
Q <= D;
endmodule
 
//------------------------------------------------
module MUXFULLPARALELL_96bits_2SEL
(
input wire Sel,
input wire [95:0]I1, I2,
output reg [95:0] O1
);
 
 
 
always @( * )
 
begin
 
case (Sel)
 
1'b0: O1 = I1;
1'b1: O1 = I2;
 
endcase
 
end
 
endmodule
 
//------------------------------------------------
module MUXFULLPARALELL_16bits_2SEL
(
input wire Sel,
input wire [15:0]I1, I2,
output reg [15:0] O1
);
 
 
 
always @( * )
 
begin
 
case (Sel)
 
1'b0: O1 = I1;
1'b1: O1 = I2;
 
endcase
 
end
 
endmodule
 
//--------------------------------------------------------------
 
module FFT1
(
input wire D,
input wire Clock,
input wire Reset ,
output reg Q
);
always @ ( posedge Clock or posedge Reset )
begin
if (Reset)
begin
Q <= 1'b0;
end
else
begin
if (D)
Q <= ! Q;
end
end//always
endmodule
//--------------------------------------------------------------
/*
module FIFO_SYNCHRNOUS_RESET # ( parameter SIZE=`WIDTH, parameter DEPTH=16 )
(
input wire Clock,
input wire Reset,
wr_cs , // Write chip select
rd_cs , // Read chipe select
input wire iData,
input wire iReadEnable,
input wire[SIZE-1:0] iWriteEnable,
output reg[SIZE-1:0] oData,
output wire oEmpy,
output wire oFull
);
// FIFO constants
parameter DATA_WIDTH = 8;
parameter ADDR_WIDTH = 8;
parameter RAM_DEPTH = (1 << ADDR_WIDTH);
// Port Declarations
input clk ;
input rst ;
input wr_cs ;
input rd_cs ;
input rd_en ;
input wr_en ;
input [DATA_WIDTH-1:0] data_in ;
output full ;
output empty ;
output [DATA_WIDTH-1:0] data_out ;
 
//-----------Internal variables-------------------
reg [ADDR_WIDTH-1:0] wr_pointer;
reg [ADDR_WIDTH-1:0] rd_pointer;
reg [ADDR_WIDTH :0] status_cnt;
reg [DATA_WIDTH-1:0] data_out ;
wire [DATA_WIDTH-1:0] data_ram ;
 
//-----------Variable assignments---------------
assign full = (status_cnt == (RAM_DEPTH-1));
assign empty = (status_cnt == 0);
 
//-----------Code Start---------------------------
always @ (posedge clk or posedge rst)
begin : WRITE_POINTER
if (rst) begin
wr_pointer <= 0;
end else if (wr_cs && wr_en ) begin
wr_pointer <= wr_pointer + 1;
end
end
 
always @ (posedge clk or posedge rst)
begin : READ_POINTER
if (rst) begin
rd_pointer <= 0;
end else if (rd_cs && rd_en ) begin
rd_pointer <= rd_pointer + 1;
end
end
 
always @ (posedge clk or posedge rst)
begin : READ_DATA
if (rst) begin
data_out <= 0;
end else if (rd_cs && rd_en ) begin
data_out <= data_ram;
end
end
 
always @ (posedge clk or posedge rst)
begin : STATUS_COUNTER
if (rst) begin
status_cnt <= 0;
// Read but no write.
end else if ((rd_cs && rd_en) && !(wr_cs && wr_en)
&& (status_cnt != 0)) begin
status_cnt <= status_cnt - 1;
// Write but no read.
end else if ((wr_cs && wr_en) && !(rd_cs && rd_en)
&& (status_cnt != RAM_DEPTH)) begin
status_cnt <= status_cnt + 1;
end
end
ram_dp_ar_aw #(DATA_WIDTH,ADDR_WIDTH)DP_RAM (
.address_0 (wr_pointer) , // address_0 input
.data_0 (data_in) , // data_0 bi-directional
.cs_0 (wr_cs) , // chip select
.we_0 (wr_en) , // write enable
.oe_0 (1'b0) , // output enable
.address_1 (rd_pointer) , // address_q input
.data_1 (data_ram) , // data_1 bi-directional
.cs_1 (rd_cs) , // chip select
.we_1 (1'b0) , // Read enable
.oe_1 (rd_en) // output enable
);
 
endmodule
*/
 
 
module sync_fifo #( parameter DATA_WIDTH = 8, parameter DEPTH = 8 )
(
 
input wire [DATA_WIDTH-1:0] din,
input wire wr_en,
input wire rd_en,
output wire[DATA_WIDTH-1:0] dout,
output reg full,
output reg empty,
input wire clk,
input wire reset
 
);
 
parameter ADDR_WIDTH = $clog2(DEPTH);
 
reg [ADDR_WIDTH : 0] rd_ptr; // note MSB is not really address
reg [ADDR_WIDTH : 0] wr_ptr; // note MSB is not really address
wire [ADDR_WIDTH-1 : 0] wr_loc;
wire [ADDR_WIDTH-1 : 0] rd_loc;
reg [DATA_WIDTH-1 : 0] mem[DEPTH-1 : 0];
 
 
assign wr_loc = wr_ptr[ADDR_WIDTH-1 : 0];
assign rd_loc = rd_ptr[ADDR_WIDTH-1 : 0];
 
always @(posedge clk) begin
if(reset) begin
wr_ptr <= 'h0;
rd_ptr <= 'h0;
end // end if
 
else begin
if(wr_en & (~full))begin
wr_ptr <= wr_ptr+1;
end
if(rd_en & (~empty))
rd_ptr <= rd_ptr+1;
end //end else
 
end//end always
 
 
//empty if all the bits of rd_ptr and wr_ptr are the same.
 
//full if all bits except the MSB are equal and MSB differes
 
always @(rd_ptr or wr_ptr)begin
 
//default catch-alls
 
empty <= 1'b0;
 
full <= 1'b0;
 
if(rd_ptr[ADDR_WIDTH-1:0]==wr_ptr[ADDR_WIDTH-1:0])begin
 
if(rd_ptr[ADDR_WIDTH]==wr_ptr[ADDR_WIDTH])
 
empty <= 1'b1;
 
else
 
full <= 1'b1;
 
end//end if
 
end//end always
 
 
always @(posedge clk) begin
 
if (wr_en)
 
mem[wr_loc] <= din;
 
end //end always
 
assign dout = mem[rd_loc];//rd_en ? mem[rd_loc]:'h0;
 
endmodule
 
//---------------------------------------------------------------------
 
/*
Synchronous memory blocks have two independent address ports, allowing
for operations on two unique addresses simultaneously. A read operation and a write
operation can share the same port if they share the same address.
In the synchronous RAM block architecture, there is no priority between the two
ports. Therefore, if you write to the same location on both ports at the same time, the
result is indeterminate in the device architecture.
When a read and write operation occurs on the same port for
the same address, the new data being written to the memory is read. When a read and
write operation occurs on different ports for the same address, the old data in the
memory is read. Simultaneous writes to the same location on both ports results in
indeterminate behavior.
 
*/
module RAM_DUAL_READ_DUAL_WRITE_PORT # ( parameter DATA_WIDTH = 8, parameter ADDR_WIDTH = 6 )
(
input wire [(DATA_WIDTH-1):0] data_a, data_b,
input wire [(ADDR_WIDTH-1):0] addr_a, addr_b,
input wire we_a, we_b, clk,
output reg [(DATA_WIDTH-1):0] q_a, q_b
);
 
 
// Declare the RAM variable
reg [DATA_WIDTH-1:0] ram[2**ADDR_WIDTH-1:0];
always @ (posedge clk)
begin // Port A
if (we_a)
begin
ram[addr_a] <= data_a;
q_a <= data_a;
end
else
q_a <= ram[addr_a];
end
always @ (posedge clk)
begin // Port b
if (we_b)
begin
ram[addr_b] <= data_b;
q_b <= data_b;
end
else
q_b <= ram[addr_b];
end
endmodule
 
 
module RAM_QUAD_PORT # ( parameter DATA_WIDTH = 8, parameter ADDR_WIDTH = 6 )
(
input wire [(DATA_WIDTH-1):0] data_a, data_b,
input wire [(ADDR_WIDTH-1):0] waddr_a, waddr_b,
input wire [(ADDR_WIDTH-1):0] raddr_a, raddr_b,
input wire we_a, we_b, clk,
output reg [(DATA_WIDTH-1):0] q_a, q_b
);
 
 
// Declare the RAM variable
reg [DATA_WIDTH-1:0] ram[2**ADDR_WIDTH-1:0];
always @ (posedge clk)
begin // Port A
if (we_a)
begin
ram[waddr_a] <= data_a;
q_a <= data_a;
end
else
q_a <= ram[waddr_a];
end
always @ (posedge clk)
begin // Port B
if (we_b)
begin
ram[waddr_b] <= data_b;
q_b <= data_b;
end
else
q_b <= ram[waddr_b];
end
endmodule
//-------------------------------------------------------------------------------
//----------------------------------------------------
// A four level, round-robin arbiter. This was
// orginally coded by WD Peterson in VHDL.
//----------------------------------------------------
module ROUND_ROBIN_ARBITER (
clk,
rst,
req4,
req3,
req2,
req1,
req0,
gnt4,
gnt3,
gnt2,
gnt1,
gnt0
);
// --------------Port Declaration-----------------------
input clk;
input rst;
input req4;
input req3;
input req2;
input req1;
input req0;
output gnt4;
output gnt3;
output gnt2;
output gnt1;
output gnt0;
//--------------Internal Registers----------------------
wire [2:0] gnt ;
wire comreq ;
wire beg ;
wire [2:0] lgnt ;
wire lcomreq ;
reg lgnt0 ;
reg lgnt1 ;
reg lgnt2 ;
reg lgnt3 ;
reg lgnt4 ;
reg lasmask ;
reg lmask0 ;
reg lmask1 ;
reg lmask2 ;
reg ledge ;
//--------------Code Starts Here-----------------------
always @ (posedge clk)
if (rst) begin
lgnt0 <= 0;
lgnt1 <= 0;
lgnt2 <= 0;
lgnt3 <= 0;
lgnt4 <= 0;
end else begin
lgnt0 <=(~lcomreq & ~lmask2 & ~lmask1 & ~lmask0 & ~req4 & ~req3 & ~req2 & ~req1 & req0)
| (~lcomreq & ~lmask2 & ~lmask1 & lmask0 & ~req4 & ~req3 & ~req2 & req0)
| (~lcomreq & ~lmask2 & lmask1 & ~lmask0 & ~req4 & ~req3 & req0)
| (~lcomreq & ~lmask2 & lmask1 & lmask0 & ~req4 & req0 )
| (~lcomreq & lmask2 & ~lmask1 & ~lmask0 & req0 )
| ( lcomreq & lgnt0 );
lgnt1 <=(~lcomreq & ~lmask2 & ~lmask1 & ~lmask0 & req1)
| (~lcomreq & ~lmask2 & ~lmask1 & lmask0 & ~req4 & ~req3 & ~req2 & req1 & ~req0)
| (~lcomreq & ~lmask2 & lmask1 & ~lmask0 & ~req4 & ~req3 & req1 & ~req0)
| (~lcomreq & ~lmask2 & lmask1 & lmask0 & ~req4 & req1 & ~req0)
| (~lcomreq & lmask2 & ~lmask1 & ~lmask0 & req1 & ~req0)
| ( lcomreq & lgnt1);
lgnt2 <=(~lcomreq & ~lmask2 & ~lmask1 & ~lmask0 & req2 & ~req1)
| (~lcomreq & ~lmask2 & ~lmask1 & lmask0 & req2)
| (~lcomreq & ~lmask2 & lmask1 & ~lmask0 & ~req4 & ~req3 & req2 & ~req1 & ~req0)
| (~lcomreq & ~lmask2 & lmask1 & lmask0 & ~req4 & req2 & ~req1 & ~req0)
| ( lcomreq & lmask2 & ~lmask1 & ~lmask0 & req2 & ~req1 & ~req0)
| ( lcomreq & lgnt2);
lgnt3 <=(~lcomreq & ~lmask2 & ~lmask1 & ~lmask0 & ~req4 & req3 & ~req2 & ~req1)
| (~lcomreq & ~lmask2 & ~lmask1 & lmask0 & ~req4 & req3 & ~req2)
| (~lcomreq & ~lmask2 & lmask1 & ~lmask0 & ~req4 & req3)
| (~lcomreq & ~lmask2 & ~lmask2 & lmask1 & lmask0 & req3)
| ( lcomreq & lmask2 & ~lmask1 & ~lmask0 & ~req4 & req3 & ~req2 & ~req1 & ~req0)
| ( lcomreq & lgnt3);
lgnt4 <=(~lcomreq & ~lmask2 & ~lmask1 & ~lmask0 & req4 & ~req3 & ~req2 & ~req1 & ~req0)
| (~lcomreq & ~lmask2 & ~lmask1 & lmask0 & req4 & ~req3 & ~req2 & ~req1 )
| (~lcomreq & ~lmask2 & lmask1 & ~lmask0 & req4 & ~req3 & ~req2 )
| (~lcomreq & ~lmask2 & lmask1 & lmask0 & req4 & ~req3 )
| ( lcomreq & lmask2 & ~lmask1 & ~lmask0 & req4 )
| ( lcomreq & lgnt3);
end
//----------------------------------------------------
// lasmask state machine.
//----------------------------------------------------
assign beg = (req4 | req3 | req2 | req1 | req0) & ~lcomreq;
always @ (posedge clk)
begin
lasmask <= (beg & ~ledge & ~lasmask);
ledge <= (beg & ~ledge & lasmask)
| (beg & ledge & ~lasmask);
end
//----------------------------------------------------
// comreq logic.
//----------------------------------------------------
assign lcomreq =
( req4 & lgnt4 )
| ( req3 & lgnt3 )
| ( req2 & lgnt2 )
| ( req1 & lgnt1 )
| ( req0 & lgnt0 );
//----------------------------------------------------
// Encoder logic.
//----------------------------------------------------
assign lgnt = {lgnt4,(lgnt3 | lgnt2),(lgnt3 | lgnt1)};
//----------------------------------------------------
// lmask register.
//----------------------------------------------------
always @ (posedge clk )
if( rst ) begin
lmask2 <= 0;
lmask1 <= 0;
lmask0 <= 0;
end else if(lasmask) begin
lmask2 <= lgnt[2];
lmask1 <= lgnt[1];
lmask0 <= lgnt[0];
end else begin
lmask2 <= lmask2;
lmask1 <= lmask1;
lmask0 <= lmask0;
end
assign comreq = lcomreq;
assign gnt = lgnt;
//----------------------------------------------------
// Drive the outputs
//----------------------------------------------------
assign gnt4 = lgnt4;
assign gnt3 = lgnt3;
assign gnt2 = lgnt2;
assign gnt1 = lgnt1;
assign gnt0 = lgnt0;
endmodule
//-------------------------------------------------------------------------------
module ROUND_ROBIN_5_ENTRIES
(
input wire Clock,
input wire Reset,
input wire iRequest0,
input wire iRequest1,
input wire iRequest2,
input wire iRequest3,
input wire iRequest4,
output wire oGrant0,
output wire oGrant1,
output wire oGrant2,
output wire oGrant3,
output wire oGrant4,
output wire oPriorityGrant
 
);
wire wMaks2,wMaks1,wMaks0;
wire wGrant0,wGrant1,wGrant2,wGrant3,wGrant4;
 
assign wGrant0 =
(wMaks2 & ~wMaks1 & ~wMaks0 & iRequest0 & ~iRequest4 )
|(~wMaks2 & wMaks1 & wMaks0 & iRequest0 & ~iRequest4 & ~iRequest3 )
|(~wMaks2 & wMaks1 & ~wMaks0 & iRequest0 & ~iRequest4 & ~iRequest3 & ~iRequest2 )
|(~wMaks2 & ~wMaks1 & wMaks0 & iRequest0 & ~iRequest4 & ~iRequest3 & ~iRequest2 & ~iRequest1 )
|(~wMaks2 & ~wMaks1 & ~wMaks0 & iRequest0 );
assign wGrant1 =
(wMaks2 & ~wMaks1 & ~wMaks0 & iRequest1 & ~iRequest0 & ~iRequest4)
|(~wMaks2 & wMaks1 & wMaks0 & iRequest1 & ~iRequest0 & ~iRequest4 & ~iRequest3 )
|(~wMaks2 & wMaks1 & ~wMaks0 & iRequest1 & ~iRequest0 & ~iRequest4 & ~iRequest3 & ~iRequest2 )
|(~wMaks2 & ~wMaks1 & wMaks0 & iRequest1 )
|(~wMaks2 & ~wMaks1 & ~wMaks0 & iRequest1 & ~iRequest0);
 
assign wGrant2 =
(wMaks2 & ~wMaks1 & ~wMaks0 & iRequest2 & ~iRequest1 & ~iRequest0 & ~iRequest4 )
|(~wMaks2 & wMaks1 & wMaks0 & iRequest2 & ~iRequest1 & ~iRequest0 & ~iRequest4 & ~iRequest3 )
|(~wMaks2 & wMaks1 & ~wMaks0 & iRequest2 )
|(~wMaks2 & ~wMaks1 & wMaks0 & iRequest2 & ~iRequest1 )
|(~wMaks2 & ~wMaks1 & ~wMaks0 & iRequest2 & ~iRequest1 & ~iRequest0 );
assign wGrant3 =
(wMaks2 & ~wMaks1 & ~wMaks0 & iRequest3 & ~iRequest2 & ~iRequest1 & ~iRequest0 & ~iRequest4 )
|(~wMaks2 & wMaks1 & wMaks0 & iRequest3 )
|(~wMaks2 & wMaks1 & ~wMaks0 & iRequest3 & ~iRequest2 )
|(~wMaks2 & ~wMaks1 & wMaks0 & iRequest3 & ~iRequest2 & ~iRequest1 )
|(~wMaks2 & ~wMaks1 & ~wMaks0 & iRequest3 & ~iRequest2 & ~iRequest1 & ~iRequest0 );
 
assign wGrant4 =
( wMaks2 & ~wMaks1 & ~wMaks0 & iRequest4 )
|(~wMaks2 & wMaks1 & wMaks0 & iRequest4 & ~iRequest3 )
|(~wMaks2 & wMaks1 & ~wMaks0 & iRequest4 & ~iRequest3 & ~iRequest2 )
|(~wMaks2 & ~wMaks1 & wMaks0 & iRequest4 & ~iRequest3 & ~iRequest2 & ~iRequest1 )
|(~wMaks2 & ~wMaks1 & ~wMaks0 & iRequest4 & ~iRequest3 & ~iRequest2 & ~iRequest1 & ~iRequest0 );
 
 
assign oPriorityGrant = wGrant0;
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD0
( Clock, Reset, 1'b1 , wGrant0, oGrant0);
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD1
( Clock, Reset, 1'b1 , wGrant1, oGrant1 );
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD2
( Clock, Reset, 1'b1 , wGrant2, oGrant2 );
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD3
( Clock, Reset, 1'b1 , wGrant3, oGrant3 );
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD4
( Clock, Reset, 1'b1 , wGrant4, oGrant4 );
 
 
 
 
reg [4:0] rCurrentState, rNextState;
//Next states logic and Reset sequence
always @(posedge Clock )
begin
if (Reset )
rCurrentState <= 0;
else
rCurrentState <= rNextState;
end
reg[2:0] rMask;
 
assign wMaks0 = rMask[0];
assign wMaks1 = rMask[1];
assign wMaks2 = rMask[2];
 
always @ ( * )
begin
case (rCurrentState)
//--------------------------------------
0:
begin
rMask = 3'd0;
rNextState = 1;
end
1:
begin
rMask = 3'd1;
rNextState = 2;
end
2:
begin
rMask = 3'd2;
rNextState = 3;
end
3:
begin
rMask = 3'd3;
rNextState = 4;
end
4:
begin
rMask = 3'd4;
rNextState = 0;
end
endcase
end
/*
UPCOUNTER_POSEDGE # (3) UP1
(
.Clock( Clock ),
.Reset( Reset ),
.Initial( 3'b0 ),
.Enable( 1'b1 ),
.Q({wMaks2,wMaks1,wMaks0})
);
*/
endmodule
//-------------------------------------------------------------------------------
module ROUND_ROBIN_6_ENTRIES
(
input wire Clock,
input wire Reset,
input wire iRequest0,
input wire iRequest1,
input wire iRequest2,
input wire iRequest3,
input wire iRequest4,
input wire iRequest5,
output wire oGrant0,
output wire oGrant1,
output wire oGrant2,
output wire oGrant3,
output wire oGrant4,
output wire oGrant5,
output wire oPriorityGrant
 
);
wire wMaks2,wMaks1,wMaks0;
wire wGrant0,wGrant1,wGrant2,wGrant3,wGrant4,wGrant5;
 
assign wGrant0 =
(wMaks2 & ~wMaks1 & wMaks0 & iRequest0 & ~iRequest5 )
|( wMaks2 & ~wMaks1 & ~wMaks0 & iRequest0 & ~iRequest5 & ~iRequest4 )
|(~wMaks2 & wMaks1 & wMaks0 & iRequest0 & ~iRequest5 & ~iRequest4 & ~iRequest3 )
|(~wMaks2 & wMaks1 & ~wMaks0 & iRequest0 & ~iRequest5 & ~iRequest4 & ~iRequest3 & ~iRequest2 )
|(~wMaks2 & ~wMaks1 & wMaks0 & iRequest0 & ~iRequest5 & ~iRequest4 & ~iRequest3 & ~iRequest2 & ~iRequest1)
|(~wMaks2 & ~wMaks1 & ~wMaks0 & iRequest0 );
assign wGrant1 =
(wMaks2 & ~wMaks1 & wMaks0 & iRequest1 & ~iRequest0 & ~iRequest5)
|( wMaks2 & ~wMaks1 & ~wMaks0 & iRequest1 & ~iRequest0 & ~iRequest5 & ~iRequest4 )
|(~wMaks2 & wMaks1 & wMaks0 & iRequest1 & ~iRequest0 & ~iRequest5 & ~iRequest4 & ~iRequest3 )
|(~wMaks2 & wMaks1 & ~wMaks0 & iRequest1 & ~iRequest0 & ~iRequest5 & ~iRequest4 & ~iRequest3 & ~iRequest2)
|(~wMaks2 & ~wMaks1 & wMaks0 & iRequest1 )
|(~wMaks2 & ~wMaks1 & ~wMaks0 & iRequest1 & ~iRequest0);
 
assign wGrant2 =
(wMaks2 & ~wMaks1 & wMaks0 & iRequest2 & ~iRequest1 & ~iRequest0 & ~iRequest5 )
|( wMaks2 & ~wMaks1 & ~wMaks0 & iRequest2 & ~iRequest1 & ~iRequest0 & ~iRequest5 & ~iRequest4 )
|(~wMaks2 & wMaks1 & wMaks0 & iRequest2 & ~iRequest1 & ~iRequest0 & ~iRequest5 & ~iRequest4 & ~iRequest3)
|(~wMaks2 & wMaks1 & ~wMaks0 & iRequest2 )
|(~wMaks2 & ~wMaks1 & wMaks0 & iRequest2 & ~iRequest1 )
|(~wMaks2 & ~wMaks1 & ~wMaks0 & iRequest2 & ~iRequest1 & ~iRequest0 );
assign wGrant3 =
( wMaks2 & ~wMaks1 & wMaks0 & iRequest3 & ~iRequest2 & ~iRequest1 & ~iRequest0 & ~iRequest5 )
|( wMaks2 & ~wMaks1 & ~wMaks0 & iRequest3 & ~iRequest2 & ~iRequest1 & ~iRequest0 & ~iRequest5 & ~iRequest4)
|(~wMaks2 & wMaks1 & wMaks0 & iRequest3 )
|(~wMaks2 & wMaks1 & ~wMaks0 & iRequest3 & ~iRequest2 )
|(~wMaks2 & ~wMaks1 & wMaks0 & iRequest3 & ~iRequest2 & ~iRequest1 )
|(~wMaks2 & ~wMaks1 & ~wMaks0 & iRequest3 & ~iRequest2 & ~iRequest1 & ~iRequest0 );
 
assign wGrant4 =
( wMaks2 & ~wMaks1 & wMaks0 & iRequest4 & ~iRequest3 & ~iRequest2 & ~iRequest1 & ~iRequest0 & ~iRequest5)
|( wMaks2 & ~wMaks1 & ~wMaks0 & iRequest4 )
|(~wMaks2 & wMaks1 & wMaks0 & iRequest4 & ~iRequest3 )
|(~wMaks2 & wMaks1 & ~wMaks0 & iRequest4 & ~iRequest3 & ~iRequest2 )
|(~wMaks2 & ~wMaks1 & wMaks0 & iRequest4 & ~iRequest3 & ~iRequest2 & ~iRequest1 )
|(~wMaks2 & ~wMaks1 & ~wMaks0 & iRequest4 & ~iRequest3 & ~iRequest2 & ~iRequest1 & ~iRequest0 );
 
assign wGrant5 =
( wMaks2 & ~wMaks1 & wMaks0 & iRequest5 )
|( wMaks2 & ~wMaks1 & ~wMaks0 & iRequest5 & ~iRequest4 )
|(~wMaks2 & wMaks1 & wMaks0 & iRequest5 & ~iRequest4 & ~iRequest3 )
|(~wMaks2 & wMaks1 & ~wMaks0 & iRequest5 & ~iRequest4 & ~iRequest3 & ~iRequest2 )
|(~wMaks2 & ~wMaks1 & wMaks0 & iRequest5 & ~iRequest4 & ~iRequest3 & ~iRequest2 & ~iRequest1 )
|(~wMaks2 & ~wMaks1 & ~wMaks0 & iRequest5 & ~iRequest4 & ~iRequest3 & ~iRequest2 & ~iRequest1 & ~iRequest0 );
 
 
assign oPriorityGrant = wGrant0;
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD0
( Clock, Reset, 1'b1 , wGrant0, oGrant0);
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD1
( Clock, Reset, 1'b1 , wGrant1, oGrant1 );
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD2
( Clock, Reset, 1'b1 , wGrant2, oGrant2 );
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD3
( Clock, Reset, 1'b1 , wGrant3, oGrant3 );
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD4
( Clock, Reset, 1'b1 , wGrant4, oGrant4 );
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD5
( Clock, Reset, 1'b1 , wGrant5, oGrant5 );
 
 
reg [4:0] rCurrentState, rNextState;
//Next states logic and Reset sequence
always @(posedge Clock )
begin
if (Reset )
rCurrentState <= 0;
else
rCurrentState <= rNextState;
end
reg[2:0] rMask;
 
assign wMaks0 = rMask[0];
assign wMaks1 = rMask[1];
assign wMaks2 = rMask[2];
 
always @ ( * )
begin
case (rCurrentState)
//--------------------------------------
0:
begin
rMask = 3'd0;
rNextState = 1;
end
1:
begin
rMask = 3'd1;
rNextState = 2;
end
2:
begin
rMask = 3'd2;
rNextState = 3;
end
3:
begin
rMask = 3'd3;
rNextState = 4;
end
4:
begin
rMask = 3'd4;
rNextState = 5;
end
5:
begin
rMask = 3'd5;
rNextState = 0;
end
endcase
end
/*
UPCOUNTER_POSEDGE # (3) UP1
(
.Clock( Clock ),
.Reset( Reset ),
.Initial( 3'b0 ),
.Enable( 1'b1 ),
.Q({wMaks2,wMaks1,wMaks0})
);
*/
endmodule
//-------------------------------------------------------------------------------
//-------------------------------------------------------------------------------
module ROUND_ROBIN_7_ENTRIES
(
input wire Clock,
input wire Reset,
input wire iRequest0,
input wire iRequest1,
input wire iRequest2,
input wire iRequest3,
input wire iRequest4,
input wire iRequest5,
input wire iRequest6,
output wire oGrant0,
output wire oGrant1,
output wire oGrant2,
output wire oGrant3,
output wire oGrant4,
output wire oGrant5,
output wire oGrant6,
output wire oPriorityGrant
 
);
wire wMaks2,wMaks1,wMaks0;
wire wGrant0,wGrant1,wGrant2,wGrant3,wGrant4,wGrant5,wGrant6;
 
assign wGrant0 =
( wMaks2 & wMaks1 & ~wMaks0 & iRequest0 & ~iRequest6 )
|( wMaks2 & ~wMaks1 & wMaks0 & iRequest0 & ~iRequest6 & ~iRequest5 )
|( wMaks2 & ~wMaks1 & ~wMaks0 & iRequest0 & ~iRequest6 & ~iRequest5 & ~iRequest4 )
|(~wMaks2 & wMaks1 & wMaks0 & iRequest0 & ~iRequest6 & ~iRequest5 & ~iRequest4 & ~iRequest3 )
|(~wMaks2 & wMaks1 & ~wMaks0 & iRequest0 & ~iRequest6 & ~iRequest5 & ~iRequest4 & ~iRequest3 & ~iRequest2)
|(~wMaks2 & ~wMaks1 & wMaks0 & iRequest0 & ~iRequest6 & ~iRequest5 & ~iRequest4 & ~iRequest3 & ~iRequest2 & ~iRequest1)
|(~wMaks2 & ~wMaks1 & ~wMaks0 & iRequest0 );
assign wGrant1 =
( wMaks2 & wMaks1 & ~wMaks0 & iRequest1 & ~iRequest0 & ~iRequest6)
|( wMaks2 & ~wMaks1 & wMaks0 & iRequest1 & ~iRequest0 & ~iRequest6 & ~iRequest5 )
|( wMaks2 & ~wMaks1 & ~wMaks0 & iRequest1 & ~iRequest0 & ~iRequest6 & ~iRequest5 & ~iRequest4 )
|(~wMaks2 & wMaks1 & wMaks0 & iRequest1 & ~iRequest0 & ~iRequest6 & ~iRequest5 & ~iRequest4 & ~iRequest3)
|(~wMaks2 & wMaks1 & ~wMaks0 & iRequest1 & ~iRequest0 & ~iRequest6 & ~iRequest5 & ~iRequest4 & ~iRequest3 & ~iRequest2)
|(~wMaks2 & ~wMaks1 & wMaks0 & iRequest1 )
|(~wMaks2 & ~wMaks1 & ~wMaks0 & iRequest1 & ~iRequest0);
 
assign wGrant2 =
( wMaks2 & wMaks1 & ~wMaks0 & iRequest2 & ~iRequest1 & ~iRequest0 & ~iRequest6 )
|( wMaks2 & ~wMaks1 & wMaks0 & iRequest2 & ~iRequest1 & ~iRequest0 & ~iRequest6 & ~iRequest5 )
|( wMaks2 & ~wMaks1 & ~wMaks0 & iRequest2 & ~iRequest1 & ~iRequest0 & ~iRequest6 & ~iRequest5 & ~iRequest4)
|(~wMaks2 & wMaks1 & wMaks0 & iRequest2 & ~iRequest1 & ~iRequest0 & ~iRequest6 & ~iRequest5 & ~iRequest4 & ~iRequest3)
|(~wMaks2 & wMaks1 & ~wMaks0 & iRequest2 )
|(~wMaks2 & ~wMaks1 & wMaks0 & iRequest2 & ~iRequest1 )
|(~wMaks2 & ~wMaks1 & ~wMaks0 & iRequest2 & ~iRequest1 & ~iRequest0 );
assign wGrant3 =
( wMaks2 & wMaks1 & ~wMaks0 & iRequest3 & ~iRequest2 & ~iRequest1 & ~iRequest0 & ~iRequest6 )
|( wMaks2 & ~wMaks1 & wMaks0 & iRequest3 & ~iRequest2 & ~iRequest1 & ~iRequest0 & ~iRequest6 & ~iRequest5)
|( wMaks2 & ~wMaks1 & ~wMaks0 & iRequest3 & ~iRequest2 & ~iRequest1 & ~iRequest0 & ~iRequest6 & ~iRequest5 & ~iRequest4)
|(~wMaks2 & wMaks1 & wMaks0 & iRequest3 )
|(~wMaks2 & wMaks1 & ~wMaks0 & iRequest3 & ~iRequest2 )
|(~wMaks2 & ~wMaks1 & wMaks0 & iRequest3 & ~iRequest2 & ~iRequest1 )
|(~wMaks2 & ~wMaks1 & ~wMaks0 & iRequest3 & ~iRequest2 & ~iRequest1 & ~iRequest0 );
 
assign wGrant4 =
( wMaks2 & wMaks1 & ~wMaks0 & iRequest4 & ~iRequest3 & ~iRequest2 & ~iRequest1 & ~iRequest0 & ~iRequest6)
|( wMaks2 & ~wMaks1 & wMaks0 & iRequest4 & ~iRequest3 & ~iRequest2 & ~iRequest1 & ~iRequest0 & ~iRequest6 & ~iRequest5)
|( wMaks2 & ~wMaks1 & ~wMaks0 & iRequest4 )
|(~wMaks2 & wMaks1 & wMaks0 & iRequest4 & ~iRequest3 )
|(~wMaks2 & wMaks1 & ~wMaks0 & iRequest4 & ~iRequest3 & ~iRequest2 )
|(~wMaks2 & ~wMaks1 & wMaks0 & iRequest4 & ~iRequest3 & ~iRequest2 & ~iRequest1 )
|(~wMaks2 & ~wMaks1 & ~wMaks0 & iRequest4 & ~iRequest3 & ~iRequest2 & ~iRequest1 & ~iRequest0 );
 
assign wGrant5 =
( wMaks2 & wMaks1 & ~wMaks0 & iRequest5 & ~iRequest4 )
|( wMaks2 & ~wMaks1 & wMaks0 & iRequest5 )
|( wMaks2 & ~wMaks1 & ~wMaks0 & iRequest5 & ~iRequest4 & ~iRequest3 )
|(~wMaks2 & wMaks1 & wMaks0 & iRequest5 & ~iRequest4 & ~iRequest3 & ~iRequest2 )
|(~wMaks2 & wMaks1 & ~wMaks0 & iRequest5 & ~iRequest4 & ~iRequest3 & ~iRequest2 & ~iRequest1 )
|(~wMaks2 & ~wMaks1 & wMaks0 & iRequest5 & ~iRequest4 & ~iRequest3 & ~iRequest2 & ~iRequest1 & ~iRequest0 )
|(~wMaks2 & ~wMaks1 & ~wMaks0 & iRequest5 & ~iRequest4 & ~iRequest3 & ~iRequest2 & ~iRequest1 & ~iRequest0 & ~iRequest6);
 
assign wGrant6 =
( wMaks2 & wMaks1 & ~wMaks0 & iRequest6 )
|( wMaks2 & ~wMaks1 & wMaks0 & iRequest6 & ~iRequest5 )
|( wMaks2 & ~wMaks1 & ~wMaks0 & iRequest6 & ~iRequest5 & ~iRequest4 )
|(~wMaks2 & wMaks1 & wMaks0 & iRequest6 & ~iRequest5 & ~iRequest4 & ~iRequest3 )
|(~wMaks2 & wMaks1 & ~wMaks0 & iRequest6 & ~iRequest5 & ~iRequest4 & ~iRequest3 & ~iRequest2 )
|(~wMaks2 & ~wMaks1 & wMaks0 & iRequest6 & ~iRequest5 & ~iRequest4 & ~iRequest3 & ~iRequest2 & ~iRequest1 )
|(~wMaks2 & ~wMaks1 & ~wMaks0 & iRequest6 & ~iRequest5 & ~iRequest4 & ~iRequest3 & ~iRequest2 & ~iRequest1 & ~iRequest0 );
 
 
assign oPriorityGrant = wGrant0;
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD0
( Clock, Reset, 1'b1 , wGrant0, oGrant0);
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD1
( Clock, Reset, 1'b1 , wGrant1, oGrant1 );
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD2
( Clock, Reset, 1'b1 , wGrant2, oGrant2 );
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD3
( Clock, Reset, 1'b1 , wGrant3, oGrant3 );
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD4
( Clock, Reset, 1'b1 , wGrant4, oGrant4 );
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD5
( Clock, Reset, 1'b1 , wGrant5, oGrant5 );
 
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD6
( Clock, Reset, 1'b1 , wGrant6, oGrant6 );
 
 
reg [4:0] rCurrentState, rNextState;
//Next states logic and Reset sequence
always @(posedge Clock )
begin
if (Reset )
rCurrentState <= 0;
else
rCurrentState <= rNextState;
end
reg[2:0] rMask;
 
assign wMaks0 = rMask[0];
assign wMaks1 = rMask[1];
assign wMaks2 = rMask[2];
 
always @ ( * )
begin
case (rCurrentState)
//--------------------------------------
0:
begin
rMask = 3'd0;
rNextState = 1;
end
1:
begin
rMask = 3'd1;
rNextState = 2;
end
2:
begin
rMask = 3'd2;
rNextState = 3;
end
3:
begin
rMask = 3'd3;
rNextState = 4;
end
4:
begin
rMask = 3'd4;
rNextState = 5;
end
5:
begin
rMask = 3'd5;
rNextState = 6;
end
6:
begin
rMask = 3'd6;
rNextState = 0;
end
endcase
end
 
endmodule
`endif
/theia_gpu/branches/beta_2.0/rtl/Unit_IO.v
0,0 → 1,100
`timescale 1ns / 1ps
`include "aDefinitions.v"
 
 
 
`define TAG_WBS_INSTRUCTION_ADDRESS_TYPE 2'b10
`define TAG_WBS_DATA_ADDRESS_TYPE 2'b01
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
//------------------------------------------------------------------------------
module Unit_IO
(
//WB Input signals
input wire CLK_I,
input wire RST_I,
input wire MCU_STB_I,
input wire MCU_WE_I,
input wire[`WB_WIDTH-1:0] MCU_DAT_I,
input wire[`WB_WIDTH-1:0] MCU_ADR_I,
input wire [1:0] MCU_TGA_I,
output wire MCU_ACK_O,
input wire MCU_MST_I,
input wire MCU_CYC_I,
 
//Internal Slave signals
output wire[`DATA_ADDRESS_WIDTH-1:0] oDataWriteAddress,
output wire [`DATA_ROW_WIDTH-1:0] oDataBus,
output wire [`INSTRUCTION_ADDR_WIDTH-1:0] oInstructionWriteAddress,
output wire [`INSTRUCTION_WIDTH-1:0] oInstructionBus,
output wire oDataWriteEnable,
output wire oInstructionWriteEnable,
 
//Output memory
input wire [`DATA_ROW_WIDTH-1:0] iOMEM_WriteAddress,
input wire [`DATA_ROW_WIDTH-1:0] iOMEM_WriteData,
input wire iOMEM_WriteEnable,
output wire [`WB_WIDTH-1:0] OMEM_DAT_O,
output wire [`WB_WIDTH-1:0] OMEM_ADR_O,
output wire OMEM_WE_O
 
);
 
 
WishBoneSlaveUnit WBS
(
//WB Input signals
.CLK_I( CLK_I),
.RST_I( RST_I ),
.STB_I( MCU_STB_I ),
.WE_I( MCU_WE_I ),
.DAT_I( MCU_DAT_I ),
.ADR_I( MCU_ADR_I ),
.TGA_I( MCU_TGA_I ),
.ACK_O( MCU_ACK_O ),
.MST_I( MCU_MST_I ),
.CYC_I( MCU_CYC_I ),
 
.oDataWriteAddress( oDataWriteAddress ),
.oDataBus( oDataBus ),
.oInstructionWriteAddress( oInstructionWriteAddress ),
.oInstructionBus( oInstructionBus ),
.oDataWriteEnable( oDataWriteEnable ),
.oInstructionWriteEnable( oInstructionWriteEnable )
 
);
 
 
Module_OMemInterface OMI
(
.Clock( CLK_I ),
.Reset( RST_I ),
.iWriteEnable( iOMEM_WriteEnable ),
.iData( iOMEM_WriteData ),
.iAddress( iOMEM_WriteAddress ),
.ADR_O( OMEM_ADR_O ),
.DAT_O( OMEM_DAT_O ),
.WE_O( OMEM_WE_O )
);
 
 
 
endmodule
/theia_gpu/branches/beta_2.0/rtl/Module_VectorProcessor.v
0,0 → 1,112
`timescale 1ns / 1ps
`include "aDefinitions.v"
/**********************************************************************************
Theia, Ray Cast Programable graphic Processing Unit.
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
 
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 
***********************************************************************************/
//--------------------------------------------------------
 
module VectorProcessor
(
input wire Clock,
input wire Reset,
input wire iEnable,
input wire [`CBC_BUS_WIDTH-1:0] iCpCommand,
input wire [`VPID_WIDTH-1:0] iVPID,
input wire MCU_WE_I,
input wire MCU_MST_I,
input wire MCU_STB_I,
input wire MCU_CYC_I,
input wire [`MCU_TAG_SIZE-1:0] MCU_TAG_I,
input wire [`WB_WIDTH-1:0] MCU_DAT_I,
input wire [`WB_WIDTH-1:0] MCU_ADR_I,
output wire MCU_ACK_O,
output wire OMEM_WE,
output wire [`WB_WIDTH-1:0] OMEM_ADDR,
output wire [`WB_WIDTH-1:0] OMEM_DATA
 
);
wire [`INSTRUCTION_ADDR_WIDTH-1:0] wIO_2_MEM__InstructionWriteAddress;
wire [`INSTRUCTION_WIDTH-1:0] wIO_2_MEM__Instruction;
wire wIO_2_MEM__InstructionWriteEnable;
wire wControl_2_Exe_Enabled;
wire [`DATA_ROW_WIDTH-1:0] wEXE_2_IO__OMEM_WriteAddress;
wire [`DATA_ROW_WIDTH-1:0] wEXE_2_IO__OMEM_WriteData;
wire wEXE_2_IO__OMEM_WriteEnable;
 
 
ControlUnit CONTROL
(
.Clock( Clock ),
.Reset( Reset ),
.iCpCommand( iCpCommand ),
.iVPID( iVPID ),
.oVpEnabled( wControl_2_Exe_Enabled )
);
 
 
 
Unit_IO IO
(
//WB Input signals
.CLK_I( Clock ),
.RST_I( Reset ),
.MCU_STB_I( MCU_STB_I ),
.MCU_WE_I( MCU_WE_I ),
.MCU_DAT_I( MCU_DAT_I ),
.MCU_ADR_I( MCU_ADR_I ),
.MCU_TGA_I( MCU_TAG_I ),
.MCU_ACK_O( MCU_ACK_O ),
.MCU_MST_I( MCU_MST_I ),
.MCU_CYC_I( MCU_CYC_I ),
 
//.oDataWriteAddress,
//.oDataBus,
.oInstructionWriteAddress( wIO_2_MEM__InstructionWriteAddress ),
.oInstructionBus( wIO_2_MEM__Instruction ),
//.oDataWriteEnable( wIO_2_MEM__DataWriteEnable ),
.oInstructionWriteEnable( wIO_2_MEM__InstructionWriteEnable ),
 
.iOMEM_WriteAddress( wEXE_2_IO__OMEM_WriteAddress ),
.iOMEM_WriteData( wEXE_2_IO__OMEM_WriteData ),
.iOMEM_WriteEnable( wEXE_2_IO__OMEM_WriteEnable ),
.OMEM_DAT_O( OMEM_DATA ),
.OMEM_ADR_O( OMEM_ADDR ),
.OMEM_WE_O( OMEM_WE )
 
 
 
);
 
Unit_Execution EXE
(
.Clock( Clock ),
.Reset( Reset ),
.iEnable( wControl_2_Exe_Enabled ),
.iInstructionMem_WriteAddress( wIO_2_MEM__InstructionWriteAddress ),
.iInstructionMem_WriteEnable( wIO_2_MEM__InstructionWriteEnable ),
.iInstructionMem_WriteData( wIO_2_MEM__Instruction ),
.oOMEMWriteAddress( wEXE_2_IO__OMEM_WriteAddress ),
.oOMEMWriteData( wEXE_2_IO__OMEM_WriteData ),
.oOMEMWriteEnable( wEXE_2_IO__OMEM_WriteEnable )
 
);
endmodule
/theia_gpu/branches/beta_2.0/rtl/Module_ReservationStation.v
0,0 → 1,259
`include "aDefinitions.v"
 
module ReservationStation
(
input wire Clock,
input wire Reset,
input wire [`MOD_ISSUE_PACKET_SIZE-1:0] iIssueBus,
input wire [`MOD_COMMIT_PACKET_SIZE-1:0] iCommitBus,
input wire [3:0] iMyId,
input wire iExecutionDone,
input wire iCommitGranted,
input wire [`DATA_ROW_WIDTH-1:0] iResult,
output wire [`DATA_ROW_WIDTH-1:0] oSource1,
output wire [`DATA_ROW_WIDTH-1:0] oSource0,
output wire [`DATA_ADDRESS_WIDTH-1:0] oDestination,
output wire [`DATA_ROW_WIDTH-1:0] oResult,
output wire [2:0] oWE,
output wire [3:0] oId,
output wire oBusy,
output wire oTrigger,
output wire oCommitRequest,
output wire [`DATA_ROW_WIDTH-1:0] oSrc0Latched,oSrc1Latched
);
 
wire wStall;
wire wLatchRequest;
wire [3:0] wSource1_RS;
wire [3:0] wSource0_RS;
//wire [3:0] wMyId;
wire wTrigger;
//wire wFIFO_Pop;
 
wire [`MOD_ISSUE_PACKET_SIZE-1:0] wIssue_Latched;
wire [`DATA_ADDRESS_WIDTH-1:0] wDestination;
wire [3:0] wID;
wire [2:0] wWE;
wire wCommitFifoFull;
wire [`ISSUE_SRCTAG_SIZE-1:0] wTag0,wTag1;
 
 
//assign wFIFO_Pop = iExecutionDone;
assign oCommitRequest = iExecutionDone;
assign wLatchRequest = ( iIssueBus[`MOD_ISSUE_RSID_RNG] == iMyId) ? 1'b1 : 1'b0;
//If there are no dependencies then just trigger execution
//assign oTrigger = (wTrigger /*&& (iIssueBus[`ISSUE_SRC0RS_RNG] == 0) && (iIssueBus[`ISSUE_SRC1RS_RNG] == 0)*/ ) ? 1'b1 : 0;
assign oTrigger = ( (wLatchRequest | wLatchData0FromCommitBus | wLatchData1FromCommitBus) & ~wStall);
 
assign wStall = (wLatchRequest && (iIssueBus[`MOD_ISSUE_SRC1RS_RNG] != 0 || iIssueBus[`MOD_ISSUE_SRC0RS_RNG] != 0)) ? 1'b1 : 1'b0;
//assign wStall = (wSource1_RS == 0 & wSource0_RS == 0) ? 1'b0 : 1'b1;
 
wire wLatchData0FromCommitBus;
wire wLatchData1FromCommitBus;
 
 
assign wLatchData0FromCommitBus = ((wStall == 1'b1) && (iCommitBus[`MOD_COMMIT_RSID_RNG] == wSource0_RS)) ? 1'b1 : 1'b0;
assign wLatchData1FromCommitBus = ((wStall == 1'b1) && (iCommitBus[`MOD_COMMIT_RSID_RNG] == wSource1_RS)) ? 1'b1 : 1'b0;
 
wire wBusy;
assign oBusy = wBusy | wCommitFifoFull & ~iCommitGranted;
wire wCommitGrantedDelay;
 
UPCOUNTER_POSEDGE # ( 1 ) BUSY
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( wLatchRequest | iCommitGranted ),
.Initial( 1'b0 ),
.Q( wBusy )
);
 
 
 
 
 
assign oSource1 = (wLatchData0FromCommitBus) ? iCommitBus[`MOD_COMMIT_DATA_RNG] : iIssueBus[`MOD_ISSUE_SRC0_DATA_RNG];
assign oSource0 = (wLatchData1FromCommitBus) ? (/*(wDstZero)?`DATA_ROW_WIDTH'b0:*/iCommitBus[`MOD_COMMIT_DATA_RNG]) : iIssueBus[`MOD_ISSUE_SRC1_DATA_RNG];
assign wTrigger = ( wLatchRequest | wLatchData0FromCommitBus | wLatchData1FromCommitBus);
 
 
wire [`DATA_ROW_WIDTH-1:0] wSrc1,wSrc0;
//FFD_POSEDGE_SYNCRONOUS_RESET # ( `MOD_ISSUE_PACKET_SIZE ) ISSUE_FFD
//( Clock, Reset, wLatchRequest , iIssueBus, {wDstZero,wID,wWE,wDestination,wSource1_RS,wSource0_RS,wSrc1,wSrc0} );
 
wire [3:0] wScale;
FFD_POSEDGE_SYNCRONOUS_RESET # ( `MOD_ISSUE_PACKET_SIZE ) ISSUE_FFD
( Clock, Reset, wLatchRequest , iIssueBus, {wID,wDestination,wWE,wScale,wSource1_RS,wSrc1,wSource0_RS,wSrc0} );
 
assign oSrc0Latched = wSrc0;
assign oSrc1Latched = wSrc1;
 
assign wTag0 = wSrc0[`MOD_ISSUE_TAG0_RNG];
assign wTag1 = wSrc1[`MOD_ISSUE_TAG0_RNG];
sync_fifo # (`COMMIT_PACKET_SIZE ) COMMIT_OUT_FIFO
(
.clk( Clock ),
.reset( Reset ),
.din( {wID,wWE,wDestination,iResult} ),
.wr_en( iExecutionDone ),
.rd_en( iCommitGranted ),
.dout( {oId,oWE,oDestination,oResult} ),
.full( wCommitFifoFull )
);
 
/*
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD_Trigger
( Clock, Reset, 1'b1 , wLatchRequest, wTrigger );
 
*/
endmodule
 
//-------------------------------------------------------------------------------------------------
module ReservationStation_1Cycle
(
input wire Clock,
input wire Reset,
input wire [`MOD_ISSUE_PACKET_SIZE-1:0] iIssueBus,
input wire [`MOD_COMMIT_PACKET_SIZE-1:0] iCommitBus,
input wire [3:0] iMyId,
input wire iExecutionDone,
input wire iCommitGranted,
input wire [`DATA_ROW_WIDTH-1:0] iResult,
output wire [`DATA_ROW_WIDTH-1:0] oSource1,
output wire [`DATA_ROW_WIDTH-1:0] oSource0,
output wire [`DATA_ADDRESS_WIDTH-1:0] oDestination,
output wire [`DATA_ROW_WIDTH-1:0] oResult,
output wire [`SCALE_SIZE-1:0] oScale,
output wire [2:0] oWE,
output wire [3:0] oId,
output wire oBusy,
output wire oTrigger,
output wire oCommitRequest
);
 
 
wire [3:0] wSource1_RS;
wire [3:0] wSource0_RS;
wire [3:0] wMyId;
wire wTrigger;
wire [`DATA_ADDRESS_WIDTH-1:0] wDestination;
wire [3:0] wID;
wire [2:0] wWE;
wire [`DATA_ROW_WIDTH-1:0] wSrc1,wSrc0,wResult;
//wire wDstZero;
wire [`DATA_ROW_WIDTH-1:0] wSrc1_Fwd;
wire [`DATA_ROW_WIDTH-1:0] wSrc0_Fwd;
 
wire wSrc0_Dependency_Initial, wSrc0_Dependency;
wire wSrc1_Dependency_Initial, wSrc1_Dependency;
wire wSrc0_DependencyResolved, wSrc0_DependencyLatch;
wire wSrc1_DependencyResolved, wSrc1_DependencyLatch;
wire wWaitingDependency;
wire wHandleCurrentIssue;
wire wDependencyResolved;
wire [`ISSUE_SRCTAG_SIZE-1:0] wTag0,wTag1;
wire wSrc0_DependencyLatch_Pre,wSrc1_DependencyLatch_Pre;
 
assign wHandleCurrentIssue = ( iIssueBus[`MOD_ISSUE_RSID_RNG] == iMyId) ? 1'b1 : 1'b0;
assign wSrc0_Dependency_Initial = wHandleCurrentIssue & (iIssueBus[96] | iIssueBus[97] | iIssueBus[98] | iIssueBus[99]);
assign wSrc1_Dependency_Initial = wHandleCurrentIssue & (iIssueBus[196] | iIssueBus[197] | iIssueBus[198] | iIssueBus[199]);
 
 
assign oTrigger =
(~wWaitingDependency & wHandleCurrentIssue & ~wSrc0_Dependency_Initial & ~wSrc1_Dependency_Initial)
|(wWaitingDependency & ~wSrc1_Dependency & wSrc0_Dependency & wSrc0_DependencyResolved )
|(wWaitingDependency & wSrc1_Dependency & ~wSrc0_Dependency & wSrc1_DependencyResolved )
|(wWaitingDependency & wSrc1_Dependency & wSrc0_Dependency & wSrc1_DependencyResolved & wSrc0_DependencyResolved );
assign wDependencyResolved = wWaitingDependency & ~wSrc1_Dependency & ~wSrc0_Dependency;
 
assign wSrc0_DependencyLatch_Pre = ( wSrc0_Dependency && (iCommitBus[`MOD_COMMIT_RSID_RNG] == wSource0_RS && iCommitBus[`MOD_COMMIT_TAG_RNG] == wTag0) ) ? 1'b1 : 1'b0;
assign wSrc1_DependencyLatch_Pre = ( wSrc1_Dependency && (iCommitBus[`MOD_COMMIT_RSID_RNG] == wSource1_RS && iCommitBus[`MOD_COMMIT_TAG_RNG] == wTag1) ) ? 1'b1 : 1'b0;
 
PULSE P1 ( Clock,Reset, 1'b1, wSrc0_DependencyLatch_Pre, wSrc0_DependencyLatch);
PULSE P2 ( Clock,Reset, 1'b1, wSrc1_DependencyLatch_Pre, wSrc1_DependencyLatch);
 
wire wWaitingForCommitGranted;
UPCOUNTER_POSEDGE # ( 1 ) FFD_101
( Clock, Reset, 1'b0, (oBusy & (iCommitGranted ^ wDependencyResolved) ), wWaitingForCommitGranted );
 
UPCOUNTER_POSEDGE # ( 1 ) FFD_10
( Clock, Reset, 1'b0, wSrc0_DependencyLatch | wDependencyResolved, wSrc0_DependencyResolved );
 
UPCOUNTER_POSEDGE # ( 1 ) FFD_11
( Clock, Reset, 1'b0, wSrc1_DependencyLatch | wDependencyResolved, wSrc1_DependencyResolved );
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `DATA_ROW_WIDTH ) FFD_DEP0
( Clock, Reset, wSrc1_DependencyLatch, iCommitBus[`MOD_COMMIT_DATA_RNG],wSrc1_Fwd );
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `DATA_ROW_WIDTH ) FFD_DEP1
( Clock, Reset, wSrc0_DependencyLatch, iCommitBus[`MOD_COMMIT_DATA_RNG],wSrc0_Fwd );
 
 
//assign oBusy = wWaitingDependency;
UPCOUNTER_POSEDGE # ( 1 ) BUSY
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( wSrc0_Dependency_Initial | wSrc1_Dependency_Initial | ((wWaitingForCommitGranted|wDependencyResolved)/*WaitingDependency*/ & iCommitGranted) ),//***
.Initial( 1'b0 ),
.Q( oBusy )
);
 
wire wCommitRequest;
UPCOUNTER_POSEDGE # ( 1 ) CRQ
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( oTrigger | iCommitGranted ),
.Initial( 1'b0 ),
.Q( wCommitRequest )
);
assign oCommitRequest = oTrigger | (wCommitRequest & ~iCommitGranted);
 
assign oResult = iResult;
assign oSource1 = (wWaitingDependency) ? ((wSrc1_Dependency)? (wSrc1_Fwd):wSrc1) : iIssueBus[`MOD_ISSUE_SRC1_DATA_RNG];
assign oSource0 = (wWaitingDependency) ? ((wSrc0_Dependency)? (wSrc0_Fwd):wSrc0) : iIssueBus[`MOD_ISSUE_SRC0_DATA_RNG];
 
 
UPCOUNTER_POSEDGE # ( 1 ) DEP
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( wSrc0_Dependency_Initial | wSrc1_Dependency_Initial | wDependencyResolved ),//***
.Initial( 1'b0 ),
.Q( wWaitingDependency )
);
UPCOUNTER_POSEDGE # ( 1 ) DEPA
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( wSrc0_Dependency_Initial | wSrc0_DependencyResolved ),
.Initial( 1'b0 ),
.Q( wSrc0_Dependency )
);
 
UPCOUNTER_POSEDGE # ( 1 ) DEPB
(
.Clock( Clock ),
.Reset( Reset ),
.Enable( wSrc1_Dependency_Initial | wSrc1_DependencyResolved ),
.Initial( 1'b0 ),
.Q( wSrc1_Dependency )
);
 
 
FFD_POSEDGE_SYNCRONOUS_RESET # ( `MOD_ISSUE_PACKET_SIZE ) ISSUE_FFD
( Clock, Reset, wHandleCurrentIssue , iIssueBus, {oId,oDestination,oWE,oScale,wSource1_RS,wSrc1,wSource0_RS,wSrc0} );
 
assign wTag0 = wSrc0[`MOD_ISSUE_TAG0_RNG];
assign wTag1 = wSrc1[`MOD_ISSUE_TAG0_RNG];
 
endmodule
 
//-------------------------------------------------------------------------------------------------

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.