URL
https://opencores.org/ocsvn/theia_gpu/theia_gpu/trunk
Subversion Repositories theia_gpu
Compare Revisions
- This comparison shows the changes necessary to convert path
/theia_gpu/tags/latest_stable/rtl/Collaterals
- from Rev 143 to Rev 150
- ↔ Reverse comparison
Rev 143 → Rev 150
/aDefinitions.v
0,0 → 1,371
/********************************************************************************** |
Theaia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2009 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
|
|
/******************************************************************************* |
Module Description: |
|
This module defines constants that are going to be used |
all over the code. By now you have may noticed that all |
constants are pre-compilation define directives. This is |
for simulation perfomance reasons mainly. |
*******************************************************************************/ |
|
`define MAX_CORES 4 //The number of cores, make sure you update MAX_CORE_BITS! |
`define MAX_CORE_BITS 2 // 2 ^ MAX_CORE_BITS = MAX_CORES |
`define MAX_TMEM_BANKS 4 //The number of memory banks for TMEM |
`define SELECT_ALL_CORES `MAX_CORES'b1111 //XXX: Change for more cores |
//--------------------------------------------------------------------------------- |
//Verilog provides a `default_nettype none compiler directive. When |
//this directive is set, implicit data types are disabled, which will make any |
//undeclared signal name a syntax error.This is very usefull to avoid annoying |
//automatic 1 bit long wire declaration where you don't want them to be! |
`default_nettype none |
|
//The clock cycle |
`define CLOCK_CYCLE 5 |
`define CLOCK_PERIOD 10 |
//--------------------------------------------------------------------------------- |
//Defines the Scale. This very important because it sets the fixed point precision. |
//The Scale defines the number bits that are used as the decimal part of the number. |
//The code has been written in such a way that allows you to change the value of the |
//Scale, so that it is possible to experiment with different scenarios. SCALE can be |
//no smaller that 1 and no bigger that WIDTH. |
`define SCALE 17 |
|
//The next section defines the length of the registers, buses and other structures, |
//do not change this valued unless you really know what you are doing (seriously!) |
`define WIDTH 32 |
`define WB_WIDTH 32 //width of wish-bone buses |
`define LONG_WIDTH 64 |
|
`define WB_SIMPLE_READ_CYCLE 0 |
`define WB_SIMPLE_WRITE_CYCLE 1 |
//--------------------------------------------------------------------------------- |
//Next are the constants that define the size of the instructions. |
//instructions are formed like this: |
// Tupe I: |
// Operand (of size INSTRUCTION_OP_LENGTH ) |
// DestinationAddr (of size DATA_ADDRESS_WIDTH ) |
// SourceAddrr1 (of size DATA_ADDRESS_WIDTH ) |
// SourceAddrr2 (of size DATA_ADDRESS_WIDTH ) |
//Type II: |
// Operand (of size INSTRUCTION_OP_LENGTH ) |
// DestinationAddr (of size DATA_ADDRESS_WIDTH ) |
// InmeadiateValue (of size WIDTH = DATA_ADDRESS_WIDTH * 2 ) |
// |
//You can play around with the size of instuctions, but keep |
//in mind that Bits 3 and 4 of the Operand have a special meaning |
//that is used for the jump familiy of instructions (see Documentation). |
//Also the MSB of Operand is used by the decoder to distinguish |
//between Type I and Type II instructions. |
`define INSTRUCTION_WIDTH 64 |
`define INSTRUCTION_OP_LENGTH 16 |
`define INSTRUCTION_IMM_BITPOS 54 |
`define INSTRUCTION_IMM_BIT 6 //don't change this! |
|
//Defines the Lenght of Memory blocks |
`define DATA_ROW_WIDTH 96 |
`define DATA_ADDRESS_WIDTH 16 |
`define ROM_ADDRESS_WIDTH 16 |
`define ROM_ADDRESS_SEL_MASK `ROM_ADDRESS_WIDTH'h8000 |
|
//--------------------------------------------------------------------------------- |
//The next section defines the code memory entry point for the various code routines |
//Please keep this syntax ENTRYPOINT_ADDR_* because the perl script that |
//parses the user code expects this pattern in order to read in the tokens |
|
//Internal Entry points (default ROM Address) |
`define ENTRYPOINT_ADRR_INITIAL `ROM_ADDRESS_WIDTH'd0 //0 - This should always be zero |
`define ENTRYPOINT_ADRR_CPPU `ROM_ADDRESS_WIDTH'd44 |
`define ENTRYPOINT_ADRR_RGU `ROM_ADDRESS_WIDTH'd47 |
`define ENTRYPOINT_ADRR_AABBIU `ROM_ADDRESS_WIDTH'd69 |
`define ENTRYPOINT_ADRR_BIU `ROM_ADDRESS_WIDTH'd157 |
`define ENTRYPOINT_ADRR_PSU `ROM_ADDRESS_WIDTH'd232 |
`define ENTRYPOINT_ADRR_PSU2 `ROM_ADDRESS_WIDTH'd248 |
`define ENTRYPOINT_ADRR_TCC `ROM_ADDRESS_WIDTH'd190 |
`define ENTRYPOINT_ADRR_NPG `ROM_ADDRESS_WIDTH'd55 |
//User Entry points (default ROM Address) |
`define ENTRYPOINT_ADRR_USERCONSTANTS `ROM_ADDRESS_WIDTH'd276 |
`define ENTRYPOINT_ADRR_PIXELSHADER `ROM_ADDRESS_WIDTH'd278 |
`define ENTRYPOINT_ADRR_MAIN `ROM_ADDRESS_WIDTH'd37 |
|
//Please keep this syntax ENTRYPOINT_INDEX_* because the perl script that |
//parses the user code expects this pattern in order to read in the tokens |
//Internal subroutines |
`define ENTRYPOINT_INDEX_INITIAL `ROM_ADDRESS_WIDTH'h8000 |
`define ENTRYPOINT_INDEX_CPPU `ROM_ADDRESS_WIDTH'h8001 |
`define ENTRYPOINT_INDEX_RGU `ROM_ADDRESS_WIDTH'h8002 |
`define ENTRYPOINT_INDEX_AABBIU `ROM_ADDRESS_WIDTH'h8003 |
`define ENTRYPOINT_INDEX_BIU `ROM_ADDRESS_WIDTH'h8004 |
`define ENTRYPOINT_INDEX_PSU `ROM_ADDRESS_WIDTH'h8005 |
`define ENTRYPOINT_INDEX_PSU2 `ROM_ADDRESS_WIDTH'h8006 |
`define ENTRYPOINT_INDEX_TCC `ROM_ADDRESS_WIDTH'h8007 |
`define ENTRYPOINT_INDEX_NPG `ROM_ADDRESS_WIDTH'h8008 |
//User defined subroutines |
`define ENTRYPOINT_INDEX_USERCONSTANTS `ROM_ADDRESS_WIDTH'h8009 |
`define ENTRYPOINT_INDEX_PIXELSHADER `ROM_ADDRESS_WIDTH'h800A |
`define ENTRYPOINT_INDEX_MAIN `ROM_ADDRESS_WIDTH'h800B |
|
`define USER_AABBIU_UCODE_ADDRESS `ROM_ADDRESS_WIDTH'b1000000000000000 |
//--------------------------------------------------------------------------------- |
//This handy little macro allows me to print stuff either to STDOUT or a file. |
//Notice that the compilation vairable DUMP_CODE must be set if you want to print |
//to a file. In XILINX right click 'Simulate Beahvioral Model' -> Properties and |
//under 'Specify `define macro name and value' type 'DEBUG=1|DUMP_CODE=1|DEBUG_CORE=<core you want to dump>' |
`ifdef DUMP_CODE |
|
`define LOGME $fwrite(ucode_file, |
`else |
`define LOGME $write( |
`endif |
//--------------------------------------------------------------------------------- |
`define TRUE 32'h1 |
`define FALSE 32'h0 |
`define RT_TRUE 48'b1 |
`define RT_FALSE 48'b0 |
//--------------------------------------------------------------------------------- |
|
`define GENERAL_PURPOSE_REG_ADDR_MASK `DATA_ADDRESS_WIDTH'h1F |
`define VOID `DATA_ADDRESS_WIDTH'd0 //0000 |
//** Control register bits **// |
`define CR_EN_LIGHTS 0 |
`define CR_EN_TEXTURE 1 |
`define CR_USER_AABBIU 2 |
/** Swapping registers **/ |
//** Configuration Registers **// |
`define CREG_LIGHT_INFO `DATA_ADDRESS_WIDTH'd0 |
`define CREG_CAMERA_POSITION `DATA_ADDRESS_WIDTH'd1 |
`define CREG_PROJECTION_WINDOW_MIN `DATA_ADDRESS_WIDTH'd2 |
`define CREG_PROJECTION_WINDOW_MAX `DATA_ADDRESS_WIDTH'd3 |
`define CREG_RESOLUTION `DATA_ADDRESS_WIDTH'd4 |
`define CREG_TEXTURE_SIZE `DATA_ADDRESS_WIDTH'd5 |
`define CREG_PIXEL_2D_INITIAL_POSITION `DATA_ADDRESS_WIDTH'd6 |
`define CREG_PIXEL_2D_FINAL_POSITION `DATA_ADDRESS_WIDTH'd7 |
`define CREG_FIRST_LIGTH `DATA_ADDRESS_WIDTH'd8 |
`define CREG_FIRST_LIGTH_DIFFUSE `DATA_ADDRESS_WIDTH'd8 |
//OK, so from address 0x06 to 0x0F is where the lights are,watch out values are harcoded |
//for now!! (look in ROM.v for hardcoded values!!!) |
|
|
//Don't change the order of the registers. CREG_V* and CREG_UV* registers |
//need to be in that specific order for the triangle fetcher to work |
//correctly! |
|
`define CREG_AABBMIN `DATA_ADDRESS_WIDTH'd42 |
`define CREG_AABBMAX `DATA_ADDRESS_WIDTH'd43 |
`define CREG_V0 `DATA_ADDRESS_WIDTH'd44 |
`define CREG_UV0 `DATA_ADDRESS_WIDTH'd45 |
`define CREG_V1 `DATA_ADDRESS_WIDTH'd46 |
`define CREG_UV1 `DATA_ADDRESS_WIDTH'd47 |
`define CREG_V2 `DATA_ADDRESS_WIDTH'd48 |
`define CREG_UV2 `DATA_ADDRESS_WIDTH'd49 |
`define CREG_TRI_DIFFUSE `DATA_ADDRESS_WIDTH'd50 |
`define CREG_TEX_COLOR1 `DATA_ADDRESS_WIDTH'd53 |
`define CREG_TEX_COLOR2 `DATA_ADDRESS_WIDTH'd54 |
`define CREG_TEX_COLOR3 `DATA_ADDRESS_WIDTH'd55 |
`define CREG_TEX_COLOR4 `DATA_ADDRESS_WIDTH'd56 |
`define CREG_TEX_COLOR5 `DATA_ADDRESS_WIDTH'd57 |
`define CREG_TEX_COLOR6 `DATA_ADDRESS_WIDTH'd58 |
`define CREG_TEX_COLOR7 `DATA_ADDRESS_WIDTH'd59 |
|
|
/** Non-Swapping registers **/ |
// ** User Registers **// |
//General Purpose registers, the user may put what ever he/she |
//wants in here... |
`define C1 `DATA_ADDRESS_WIDTH'd64 |
`define C2 `DATA_ADDRESS_WIDTH'd65 |
`define C3 `DATA_ADDRESS_WIDTH'd66 |
`define C4 `DATA_ADDRESS_WIDTH'd67 |
`define C5 `DATA_ADDRESS_WIDTH'd68 |
`define C6 `DATA_ADDRESS_WIDTH'd69 |
`define C7 `DATA_ADDRESS_WIDTH'd70 |
`define R1 `DATA_ADDRESS_WIDTH'd71 |
`define R2 `DATA_ADDRESS_WIDTH'd72 |
`define R3 `DATA_ADDRESS_WIDTH'd73 |
`define R4 `DATA_ADDRESS_WIDTH'd74 |
`define R5 `DATA_ADDRESS_WIDTH'd75 |
`define R6 `DATA_ADDRESS_WIDTH'd76 |
`define R7 `DATA_ADDRESS_WIDTH'd77 |
`define R8 `DATA_ADDRESS_WIDTH'd78 |
`define R9 `DATA_ADDRESS_WIDTH'd79 |
`define R10 `DATA_ADDRESS_WIDTH'd80 |
`define R11 `DATA_ADDRESS_WIDTH'd81 |
`define R12 `DATA_ADDRESS_WIDTH'd82 |
|
//** Internal Registers **// |
`define CREG_PROJECTION_WINDOW_SCALE `DATA_ADDRESS_WIDTH'd83 |
`define CREG_UNORMALIZED_DIRECTION `DATA_ADDRESS_WIDTH'd84 |
`define CREG_RAY_DIRECTION `DATA_ADDRESS_WIDTH'd85 |
`define CREG_E1_LAST `DATA_ADDRESS_WIDTH'd86 |
`define CREG_E2_LAST `DATA_ADDRESS_WIDTH'd87 |
`define CREG_T `DATA_ADDRESS_WIDTH'd88 |
`define CREG_P `DATA_ADDRESS_WIDTH'd89 |
`define CREG_Q `DATA_ADDRESS_WIDTH'd90 |
`define CREG_UV0_LAST `DATA_ADDRESS_WIDTH'd91 |
`define CREG_UV1_LAST `DATA_ADDRESS_WIDTH'd92 |
`define CREG_UV2_LAST `DATA_ADDRESS_WIDTH'd93 |
`define CREG_TRI_DIFFUSE_LAST `DATA_ADDRESS_WIDTH'd94 |
`define CREG_LAST_t `DATA_ADDRESS_WIDTH'd95 |
`define CREG_LAST_u `DATA_ADDRESS_WIDTH'd96 |
`define CREG_LAST_v `DATA_ADDRESS_WIDTH'd97 |
`define CREG_COLOR_ACC `DATA_ADDRESS_WIDTH'd98 |
`define CREG_t `DATA_ADDRESS_WIDTH'd99 |
`define CREG_E1 `DATA_ADDRESS_WIDTH'd100 |
`define CREG_E2 `DATA_ADDRESS_WIDTH'd101 |
`define CREG_DELTA `DATA_ADDRESS_WIDTH'd102 |
`define CREG_u `DATA_ADDRESS_WIDTH'd103 |
`define CREG_v `DATA_ADDRESS_WIDTH'd104 |
`define CREG_H1 `DATA_ADDRESS_WIDTH'd105 |
`define CREG_H2 `DATA_ADDRESS_WIDTH'd106 |
`define CREG_H3 `DATA_ADDRESS_WIDTH'd107 |
`define CREG_PIXEL_PITCH `DATA_ADDRESS_WIDTH'd108 |
|
`define CREG_LAST_COL `DATA_ADDRESS_WIDTH'd109 //the last valid column, simply CREG_RESOLUTIONX - 1 |
`define CREG_TEXTURE_COLOR `DATA_ADDRESS_WIDTH'd110 |
`define CREG_PIXEL_2D_POSITION `DATA_ADDRESS_WIDTH'd111 |
`define CREG_TEXWEIGHT1 `DATA_ADDRESS_WIDTH'd112 |
`define CREG_TEXWEIGHT2 `DATA_ADDRESS_WIDTH'd113 |
`define CREG_TEXWEIGHT3 `DATA_ADDRESS_WIDTH'd114 |
`define CREG_TEXWEIGHT4 `DATA_ADDRESS_WIDTH'd115 |
`define CREG_TEX_COORD1 `DATA_ADDRESS_WIDTH'd116 |
`define CREG_TEX_COORD2 `DATA_ADDRESS_WIDTH'd117 |
`define R99 `DATA_ADDRESS_WIDTH'd118 |
`define CREG_ZERO `DATA_ADDRESS_WIDTH'd119 |
`define CREG_CURRENT_OUTPUT_PIXEL `DATA_ADDRESS_WIDTH'd120 |
`define CREG_3 `DATA_ADDRESS_WIDTH'd121 |
`define CREG_012 `DATA_ADDRESS_WIDTH'd122 |
|
//** Ouput registers **// |
|
`define OREG_PIXEL_COLOR `DATA_ADDRESS_WIDTH'd128 |
`define OREG_TEX_COORD1 `DATA_ADDRESS_WIDTH'd129 |
`define OREG_TEX_COORD2 `DATA_ADDRESS_WIDTH'd130 |
`define OREG_ADDR_O `DATA_ADDRESS_WIDTH'd131 |
//------------------------------------------------------------- |
//*** Instruction Set *** |
//The order of the instructions is important here!. Don't change |
//it unless you know what you are doing. For example all the 'SET' |
//family of instructions have the MSB bit in 1. This means that |
//if you add an instruction and the MSB=1, this instruction will treated |
//as type II (see manual) meaning the second 32bit argument is expected to be |
//an inmediate value instead of a register address! |
//Another example is that in the JUMP family Bits 3 and 4 have a special |
//meaning: b4b3 = 01 => X jump type, b4b3 = 10 => Y jump type, finally |
//b4b3 = 11 means Z jump type. |
//All this is just to tell you: Don't play with these values! |
|
// *** Type I Instructions (OP DST REG1 REG2) *** |
`define NOP `INSTRUCTION_OP_LENGTH'b0_000000 //0 |
`define ADD `INSTRUCTION_OP_LENGTH'b0_000001 //1 |
`define SUB `INSTRUCTION_OP_LENGTH'b0_000010 //2 |
`define DIV `INSTRUCTION_OP_LENGTH'b0_000011 //3 |
`define MUL `INSTRUCTION_OP_LENGTH'b0_000100 //4 |
`define MAG `INSTRUCTION_OP_LENGTH'b0_000101 //5 |
`define COPY `INSTRUCTION_OP_LENGTH'b0_000111 //7 |
`define JGX `INSTRUCTION_OP_LENGTH'b0_001_000 //8 |
`define JLX `INSTRUCTION_OP_LENGTH'b0_001_001 //9 |
`define JEQX `INSTRUCTION_OP_LENGTH'b0_001_010 //10 - A |
`define JNEX `INSTRUCTION_OP_LENGTH'b0_001_011 //11 - B |
`define JGEX `INSTRUCTION_OP_LENGTH'b0_001_100 //12 - C |
`define JLEX `INSTRUCTION_OP_LENGTH'b0_001_101 //13 - D |
`define INC `INSTRUCTION_OP_LENGTH'b0_001_110 //14 - E |
`define ZERO `INSTRUCTION_OP_LENGTH'b0_001_111 //15 - F |
`define JGY `INSTRUCTION_OP_LENGTH'b0_010_000 //16 |
`define JLY `INSTRUCTION_OP_LENGTH'b0_010_001 //17 |
`define JEQY `INSTRUCTION_OP_LENGTH'b0_010_010 //18 |
`define JNEY `INSTRUCTION_OP_LENGTH'b0_010_011 //19 |
`define JGEY `INSTRUCTION_OP_LENGTH'b0_010_100 //20 |
`define JLEY `INSTRUCTION_OP_LENGTH'b0_010_101 //21 |
`define CROSS `INSTRUCTION_OP_LENGTH'b0_010_110 //22 |
`define DOT `INSTRUCTION_OP_LENGTH'b0_010_111 //23 |
`define JGZ `INSTRUCTION_OP_LENGTH'b0_011_000 //24 |
`define JLZ `INSTRUCTION_OP_LENGTH'b0_011_001 //25 |
`define JEQZ `INSTRUCTION_OP_LENGTH'b0_011_010 //26 |
`define JNEZ `INSTRUCTION_OP_LENGTH'b0_011_011 //27 |
`define JGEZ `INSTRUCTION_OP_LENGTH'b0_011_100 //28 |
`define JLEZ `INSTRUCTION_OP_LENGTH'b0_011_101 //29 |
|
//The next instruction is for simulation debug only |
//not to be synthetized! Pretty much behaves the same |
//as a NOP, only that prints the register value to |
//a log file called 'Registers.log' |
`ifdef DEBUG |
`define DEBUG_PRINT `INSTRUCTION_OP_LENGTH'b0_011_110 //30 |
`endif |
|
`define MULP `INSTRUCTION_OP_LENGTH'b0_011_111 //31 R1.z = S1.x * S1.y |
`define MOD `INSTRUCTION_OP_LENGTH'b0_100_000 //32 R = MODULO( S1,S2 ) |
`define FRAC `INSTRUCTION_OP_LENGTH'b0_100_001 //33 R =FractionalPart( S1 ) |
`define INTP `INSTRUCTION_OP_LENGTH'b0_100_010 //34 R =IntergerPart( S1 ) |
`define NEG `INSTRUCTION_OP_LENGTH'b0_100_011 //35 R = -S1 |
`define DEC `INSTRUCTION_OP_LENGTH'b0_100_100 //36 R = S1-- |
`define XCHANGEX `INSTRUCTION_OP_LENGTH'b0_100_101 // R.x = S2.x, R.y = S1.y, R.z = S1.z |
`define XCHANGEY `INSTRUCTION_OP_LENGTH'b0_100_110 // R.x = S1.x, R.y = S2.y, R.z = S1.z |
`define XCHANGEZ `INSTRUCTION_OP_LENGTH'b0_100_111 // R.x = S1.x, R.y = S1.y, R.z = S2.z |
`define IMUL `INSTRUCTION_OP_LENGTH'b0_101_000 // R = INTEGER( S1 * S2 ) |
`define UNSCALE `INSTRUCTION_OP_LENGTH'b0_101_001 // R = S1 >> SCALE |
`define RESCALE `INSTRUCTION_OP_LENGTH'b0_101_010 // R = S1 << SCALE |
`define INCX `INSTRUCTION_OP_LENGTH'b0_101_011 // R.X = S1.X + 1 |
`define INCY `INSTRUCTION_OP_LENGTH'b0_101_100 // R.Y = S1.Y + 1 |
`define INCZ `INSTRUCTION_OP_LENGTH'b0_101_101 // R.Z = S1.Z + 1 |
`define OMWRITE `INSTRUCTION_OP_LENGTH'b0_101_111 //47 IO write to O memory |
`define TMREAD `INSTRUCTION_OP_LENGTH'b0_110_000 //48 IO read from T memory |
`define LEA `INSTRUCTION_OP_LENGTH'b0_110_001 //49 Load effective address |
|
//*** Type II Instructions (OP DST REG1 IMM) *** |
`define RETURN `INSTRUCTION_OP_LENGTH'b1_000000 //64 0x40 |
`define SETX `INSTRUCTION_OP_LENGTH'b1_000001 //65 0x41 |
`define SETY `INSTRUCTION_OP_LENGTH'b1_000010 //66 |
`define SETZ `INSTRUCTION_OP_LENGTH'b1_000011 //67 |
`define SWIZZLE3D `INSTRUCTION_OP_LENGTH'b1_000100 //68 |
`define JMP `INSTRUCTION_OP_LENGTH'b1_011000 //56 |
`define CALL `INSTRUCTION_OP_LENGTH'b1_011001 //57 |
`define RET `INSTRUCTION_OP_LENGTH'b1_011010 //58 |
|
//------------------------------------------------------------- |
|
//All the posible values for the SWIZZLE3D instruction are defined next |
`define SWIZZLE_XXX 32'd0 |
`define SWIZZLE_YYY 32'd1 |
`define SWIZZLE_ZZZ 32'd2 |
`define SWIZZLE_XYY 32'd3 |
`define SWIZZLE_XXY 32'd4 |
`define SWIZZLE_XZZ 32'd5 |
`define SWIZZLE_XXZ 32'd6 |
`define SWIZZLE_YXX 32'd7 |
`define SWIZZLE_YYX 32'd8 |
`define SWIZZLE_YZZ 32'd9 |
`define SWIZZLE_YYZ 32'd10 |
`define SWIZZLE_ZXX 32'd11 |
`define SWIZZLE_ZZX 32'd12 |
`define SWIZZLE_ZYY 32'd13 |
`define SWIZZLE_ZZY 32'd14 |
`define SWIZZLE_XZX 32'd15 |
`define SWIZZLE_XYX 32'd16 |
`define SWIZZLE_YXY 32'd17 |
`define SWIZZLE_YZY 32'd18 |
`define SWIZZLE_ZXZ 32'd19 |
`define SWIZZLE_ZYZ 32'd20 |
`define SWIZZLE_YXZ 32'd21 |
|
|
|
/Module_FixedPointDivision.v
0,0 → 1,328
/* |
Fixed point Multiplication Module Qm.n |
C = (A << n) / B |
|
*/ |
|
|
//Division State Machine Constants |
`define INITIAL_DIVISION_STATE 6'd1 |
`define DIVISION_REVERSE_LAST_ITERATION 6'd2 |
`define PRE_CALCULATE_REMAINDER 6'd3 |
`define CALCULATE_REMAINDER 6'd4 |
`define WRITE_DIVISION_RESULT 6'd5 |
|
|
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
`define FPS_AFTER_RESET_STATE 0 |
//----------------------------------------------------------------- |
//This only works if you dividend is power of 2 |
//x % 2^n == x & (2^n - 1). |
/* |
module Modulus2N |
( |
input wire Clock, |
input wire Reset, |
input wire [`WIDTH-1:0] iDividend,iDivisor, |
output reg [`WIDTH-1:0] oQuotient, |
input wire iInputReady, //Is the input data valid? |
output reg oOutputReady //Our output data is ready! |
); |
|
|
|
FF1_POSEDGE_SYNCRONOUS_RESET FFOutputReadyDelay2 |
( |
.Clock( Clock ), |
.Clear( Reset ), |
.D( iInputReady ), |
.Q( oOutputReady ) |
); |
|
assign oQuotient = (iDividend & (iDivisor-1'b1)); |
|
|
endmodule |
*/ |
//----------------------------------------------------------------- |
/* |
Be aware that the unsgined division algorith doesn't know or care |
about the sign bit of the Result (bit 31). So if you divisor is very |
small there is a chance that the bit 31 from the usginned division is |
one even thogh the result should be positive |
|
*/ |
module SignedIntegerDivision |
( |
input wire Clock,Reset, |
input wire [`WIDTH-1:0] iDividend,iDivisor, |
output reg [`WIDTH-1:0] xQuotient, |
input wire iInputReady, //Is the input data valid? |
output reg OutputReady //Our output data is ready! |
); |
|
|
parameter SIGN = 31; |
wire Sign; |
|
wire [`WIDTH-1:0] wDividend,wDivisor; |
wire wInputReady; |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) FFD1 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( iInputReady ), |
.D( iDividend ), |
.Q( wDividend) |
); |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) FFD2 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( iInputReady ), |
.D( iDivisor ), |
.Q( wDivisor ) |
); |
|
FFD_POSEDGE_SYNCRONOUS_RESET # ( 1 ) FFD3 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( 1'b1 ), |
.D( iInputReady ), |
.Q( wInputReady ) |
); |
|
|
//wire [7:0] wExitStatus; |
wire [`WIDTH-1:0] wAbsDividend,wAbsDivisor; |
wire [`WIDTH-1:0] wQuottientTemp; |
wire [`WIDTH-1:0] wAbsQuotient; |
|
assign Sign = wDividend[SIGN] ^ wDivisor[SIGN]; |
|
assign wAbsDividend = ( wDividend[SIGN] == 1 )? |
~wDividend + 1'b1 : wDividend; |
|
assign wAbsDivisor = ( wDivisor[SIGN] == 1 )? |
~wDivisor + 1'b1 : wDivisor; |
|
wire DivReady; |
|
|
UnsignedIntegerDivision UDIV |
( |
.Clock(Clock), |
.Reset( Reset ), |
.iDividend( wAbsDividend), |
.iDivisor( wAbsDivisor ), |
.xQuotient(wQuottientTemp), |
.iInputReady( wInputReady ), |
.OutputReady( DivReady ) |
|
); |
|
//Make sure the output from the 'unsigned' operation is really posity |
assign wAbsQuotient = wQuottientTemp & 32'h7FFFFFFF; |
|
//assign Quotient = wAbsQuotient; |
|
//----------------------------------------------- |
always @ ( posedge Clock ) |
begin |
|
if ( DivReady ) |
begin |
if ( Sign == 1 ) |
xQuotient = ~wAbsQuotient + 1'b1; |
else |
xQuotient = wAbsQuotient; |
|
end |
|
OutputReady = DivReady; |
|
if (Reset == 1) |
OutputReady = 0; |
|
|
end |
//----------------------------------------------- |
|
endmodule |
//----------------------------------------------------------------- |
/* |
|
Returns the integer part (Quotient) of a division. |
|
Division is the process of repeated subtraction. |
Like the long division we learned in grade school, |
a binary division algorithm works from the high |
order digits to the low order digits and generates |
a quotient (division result) with each step. |
The division algorithm is divided into two steps: |
* Shift the upper bits of the dividend (the number |
we are dividing into) into the remainder. |
* Subtract the divisor from the value in the remainder. |
The high order bit of the result become a bit of |
the quotient (division result). |
|
*/ |
|
//----------------------------------------------------------------- |
/* |
Try to implemet the division as a FSM, |
this basically because the behavioral Division has a for loop, |
with a variable loop limit counter which I think is not friendly |
to the synthetiser (dumb dumb synthetizer :) ) |
*/ |
module UnsignedIntegerDivision( |
input wire Clock,Reset, |
input wire [`WIDTH-1:0] iDividend,iDivisor, |
//output reg [`WIDTH-1:0] Quotient,Remainder, |
|
output reg [`WIDTH-1:0] xQuotient, |
|
input wire iInputReady, //Is the input data valid? |
output reg OutputReady //Our output data is ready! |
//output reg [7:0] ExitStatus |
); |
|
//reg [`WIDTH-1:0] Dividend, Divisor; |
|
reg [63:0] Dividend,Divisor; |
|
//reg [`WIDTH-1:0] t, q, d, i,Bit, num_bits; |
reg [`WIDTH-1:0] i,num_bits; |
reg [63:0] t, q, d, Bit; |
reg [63:0] Quotient,Remainder; |
|
reg [5:0] CurrentState, NextState; |
//---------------------------------------- |
//Next states logic and Reset sequence |
always @(negedge Clock) |
begin |
if( Reset!=1 ) |
CurrentState = NextState; |
else |
CurrentState = `FPS_AFTER_RESET_STATE; |
end |
//---------------------------------------- |
|
always @ (posedge Clock) |
begin |
case (CurrentState) |
//---------------------------------------- |
`FPS_AFTER_RESET_STATE: |
begin |
OutputReady = 0; |
NextState = ( iInputReady == 1 ) ? |
`INITIAL_DIVISION_STATE : `FPS_AFTER_RESET_STATE; |
end |
//---------------------------------------- |
`INITIAL_DIVISION_STATE: |
begin |
Dividend = iDividend; |
Dividend = Dividend << `SCALE; |
|
Divisor = iDivisor; |
Remainder = 0; |
Quotient = 0; |
|
if (Divisor == 0) |
begin |
Quotient[31:0] = 32'h0FFF_FFFF; |
// ExitStatus = `DIVISION_BY_ZERO; |
NextState = `WRITE_DIVISION_RESULT; |
end |
else if (Divisor > Dividend) |
begin |
Remainder = Dividend; |
//ExitStatus = `NORMAL_EXIT; |
NextState = `WRITE_DIVISION_RESULT; |
end |
else if (Divisor == Dividend) |
begin |
Quotient = 1; |
// ExitStatus = `NORMAL_EXIT; |
NextState = `WRITE_DIVISION_RESULT; |
end |
else |
begin |
NextState = `PRE_CALCULATE_REMAINDER; |
end |
//num_bits = 32; |
num_bits = 64; |
end |
|
//---------------------------------------- |
`PRE_CALCULATE_REMAINDER: |
begin |
|
//Bit = (Dividend & 32'h80000000) >> 31; |
Bit = (Dividend & 64'h8000000000000000 ) >> 63; |
Remainder = (Remainder << 1) | Bit; |
d = Dividend; |
Dividend = Dividend << 1; |
num_bits = num_bits - 1; |
|
|
// $display("num_bits %d Remainder %d Divisor %d\n",num_bits,Remainder,Divisor); |
NextState = (Remainder < Divisor) ? |
`PRE_CALCULATE_REMAINDER : `DIVISION_REVERSE_LAST_ITERATION; |
end |
//---------------------------------------- |
/* |
The loop, above, always goes one iteration too far. |
To avoid inserting an "if" statement inside the loop |
the last iteration is simply reversed. |
*/ |
`DIVISION_REVERSE_LAST_ITERATION: |
begin |
Dividend = d; |
Remainder = Remainder >> 1; |
num_bits = num_bits + 1; |
i = 0; |
|
NextState = `CALCULATE_REMAINDER; |
end |
//---------------------------------------- |
`CALCULATE_REMAINDER: |
begin |
//Bit = (Dividend & 32'h80000000) >> 31; |
Bit = (Dividend & 64'h8000000000000000 ) >> 63; |
Remainder = (Remainder << 1) | Bit; |
t = Remainder - Divisor; |
//q = !((t & 32'h80000000) >> 31); |
q = !((t & 64'h8000000000000000 ) >> 63); |
Dividend = Dividend << 1; |
Quotient = (Quotient << 1) | q; |
if ( q != 0 ) |
Remainder = t; |
i = i + 1; |
|
if (i < num_bits) |
NextState = `CALCULATE_REMAINDER; |
else |
NextState = `WRITE_DIVISION_RESULT; |
end |
//---------------------------------------- |
//Will go to the IDLE leaving the Result Registers |
//with the current results until next stuff comes |
//So, stay in this state until our client sets iInputReady |
//to 0 telling us he read the result |
`WRITE_DIVISION_RESULT: |
begin |
xQuotient = Quotient[32:0]; //Simply chop to round |
OutputReady = 1; |
// $display("Quotient = %h - %b \n", Quotient, Quotient); |
|
NextState = (iInputReady == 0) ? |
`FPS_AFTER_RESET_STATE : `WRITE_DIVISION_RESULT; |
end |
endcase |
|
end //always |
endmodule |
//----------------------------------------------------------------- |
/Module_FixedPointSquareRoot.v
0,0 → 1,120
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
|
//Square Root State Machine Constants |
`define SQUARE_ROOT_LOOP 1 |
`define WRITE_SQUARE_ROOT_RESULT 2 |
|
|
`define SR_AFTER_RESET_STATE 0 |
//----------------------------------------------------------------- |
/* |
|
Calcualtes the SquareRoot of a Fixed Point Number |
Input: Q32.32 |
Output: Q16.16 |
Notice that the result has half the precicion as the operands!! |
*/ |
module FixedPointSquareRoot |
( |
input wire Clock, |
input wire Reset, |
input wire[`LONG_WIDTH-1:0] Operand, |
input wire iInputReady, |
output reg OutputReady, |
output reg [`WIDTH-1:0] Result |
); |
|
reg[63:0] x; |
reg[0:`WIDTH-1] group,sum,diff; |
reg[0:`WIDTH-1] temp1,temp2; |
reg [5:0] CurrentState, NextState; |
|
reg myInputReady; |
|
//---------------------------------------- |
always @(posedge Clock) |
begin |
myInputReady = iInputReady; |
end |
//---------------------------------------- |
//Next states logic |
always @(negedge Clock) |
begin |
if( Reset!=1 ) |
CurrentState = NextState; |
else |
CurrentState = `SR_AFTER_RESET_STATE; |
end |
//---------------------------------------- |
|
always @ (posedge Clock) |
begin |
case (CurrentState) |
//---------------------------------------- |
`SR_AFTER_RESET_STATE: |
begin |
OutputReady = 0; |
Result = 0; |
sum = 0; |
diff = 0; |
group=32; //WAS 16 |
x = 0; |
if ( myInputReady == 1 ) |
begin |
// x[31:0] = Operand; |
x = Operand; |
x = x << `SCALE; |
NextState = `SQUARE_ROOT_LOOP; |
end else |
NextState = `SR_AFTER_RESET_STATE; |
|
end |
//---------------------------------------- |
`SQUARE_ROOT_LOOP: |
begin |
|
|
|
sum = sum << 1; |
sum = sum + 1; |
temp1 = diff << 2; |
//diff = diff + (x>>(group*2)) &3; |
temp2 = group << 1; //group * 2 ?? |
diff = temp1 + ((x >> temp2) &3); |
|
if (sum > diff) |
begin |
sum = sum -1; |
end |
else |
begin |
Result = Result + (1<<group); |
diff = diff - sum; |
sum = sum + 1; |
end//if |
|
|
if ( group != 0 ) |
begin |
group = group - 1; |
NextState = `SQUARE_ROOT_LOOP; |
end |
else |
begin |
NextState = `WRITE_SQUARE_ROOT_RESULT; |
|
end |
end |
//---------------------------------------- |
`WRITE_SQUARE_ROOT_RESULT: |
begin |
OutputReady = 1; |
NextState = (iInputReady == 0) ? |
`SR_AFTER_RESET_STATE : `WRITE_SQUARE_ROOT_RESULT; |
end |
//---------------------------------------- |
endcase |
end //always |
endmodule |
//----------------------------------------------------------------- |
/Collaterals.v
0,0 → 1,653
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
/********************************************************************************** |
Theia, Ray Cast Programable graphic Processing Unit. |
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com) |
|
This program is free software; you can redistribute it and/or |
modify it under the terms of the GNU General Public License |
as published by the Free Software Foundation; either version 2 |
of the License, or (at your option) any later version. |
|
This program is distributed in the hope that it will be useful, |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
GNU General Public License for more details. |
|
You should have received a copy of the GNU General Public License |
along with this program; if not, write to the Free Software |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
***********************************************************************************/ |
//------------------------------------------------ |
module FFD_POSEDGE_ASYNC_RESET # ( parameter SIZE=`WIDTH ) |
( |
input wire Clock, |
input wire Clear, |
input wire [SIZE-1:0] D, |
output reg [SIZE-1:0] Q |
); |
|
always @(posedge Clock or posedge Clear) |
begin |
if (Clear) |
Q = 0; |
else |
Q = D; |
end |
endmodule |
//---------------------------------------------------- |
module FFD_POSEDGE_SYNCRONOUS_RESET # ( parameter SIZE=`WIDTH ) |
( |
input wire Clock, |
input wire Reset, |
input wire Enable, |
input wire [SIZE-1:0] D, |
output reg [SIZE-1:0] Q |
); |
|
|
always @ (posedge Clock) |
begin |
if ( Reset ) |
Q <= `WIDTH'b0; |
else |
begin |
if (Enable) |
Q <= D; |
end |
|
end//always |
|
endmodule |
//------------------------------------------------ |
module UPCOUNTER_POSEDGE # (parameter SIZE=`WIDTH) |
( |
input wire Clock, Reset, |
input wire [SIZE-1:0] Initial, |
input wire Enable, |
output reg [SIZE-1:0] Q |
); |
|
|
always @(posedge Clock ) |
begin |
if (Reset) |
Q = Initial; |
else |
begin |
if (Enable) |
Q = Q + 1; |
|
end |
end |
|
endmodule |
|
//---------------------------------------------------------------------- |
|
module SELECT_1_TO_N # ( parameter SEL_WIDTH=4, parameter OUTPUT_WIDTH=16 ) |
( |
input wire [SEL_WIDTH-1:0] Sel, |
input wire En, |
output wire [OUTPUT_WIDTH-1:0] O |
); |
|
reg[OUTPUT_WIDTH-1:0] shift; |
|
always @ ( * ) |
begin |
if (~En) |
shift = 1; |
else |
shift = (1 << Sel); |
|
|
end |
|
assign O = ( ~En ) ? 0 : shift ; |
|
//assign O = En & (1 << Sel); |
|
endmodule |
|
//---------------------------------------------------------------------- |
|
module MUXFULLPARALELL_2SEL_GENERIC # ( parameter SIZE=`WIDTH ) |
( |
input wire [1:0] Sel, |
input wire [SIZE-1:0]I1, I2, I3,I4, |
output reg [SIZE-1:0] O1 |
); |
|
always @( * ) |
|
begin |
|
case (Sel) |
|
2'b00: O1 = I1; |
2'b01: O1 = I2; |
2'b10: O1 = I3; |
2'b11: O1 = I4; |
default: O1 = SIZE-1'b0; |
|
endcase |
|
end |
|
endmodule |
|
/* |
module MUXFULLPARALELL_CORESELECT # ( parameter SIZE=`WIDTH ) |
( |
input wire [`MAX_CORES-1:0] Sel, |
input wire [SIZE-1:0] I1[`MAX_CORES-1:0], |
output reg [SIZE-1:0] O1 |
); |
|
always @( * ) |
|
begin |
|
case (Sel) |
genvar i; |
generate |
for (i = 0; i < `MAX_CORES; i = i +1) |
begin : CORE |
`MAX_CORES'di: O1 = I1[i]; |
|
|
end |
endgenerate |
default: O1 = SIZE-1'b0; |
endcase |
|
end |
|
endmodule |
*/ |
//-------- |
module CIRCULAR_SHIFTLEFT_POSEDGE_EX # ( parameter SIZE=`WIDTH ) |
( input wire Clock, |
input wire Reset, |
input wire[SIZE-1:0] Initial, |
input wire Enable, |
output wire[SIZE-1:0] O |
); |
|
reg [SIZE-1:0] tmp; |
|
|
always @(posedge Clock) |
begin |
if (Reset) |
tmp <= Initial; |
else |
begin |
if (Enable) |
begin |
if (tmp[SIZE-1]) |
begin |
tmp <= Initial; |
end |
else |
begin |
tmp <= tmp << 1; |
end |
end |
end |
end |
|
|
assign O = tmp; |
endmodule |
//------------------------------------------------ |
module MUXFULLPARALELL_3SEL_WALKINGONE # ( parameter SIZE=`WIDTH ) |
( |
input wire [2:0] Sel, |
input wire [SIZE-1:0]I1, I2, I3, |
output reg [SIZE-1:0] O1 |
); |
|
always @( * ) |
|
begin |
|
case (Sel) |
|
3'b001: O1 = I1; |
3'b010: O1 = I2; |
3'b100: O1 = I3; |
default: O1 = SIZE-1'b0; |
|
endcase |
|
end |
|
endmodule |
//------------------------------------------------ |
module SHIFTLEFT_POSEDGE # ( parameter SIZE=`WIDTH ) |
( input wire Clock, |
input wire Reset, |
input wire[SIZE-1:0] Initial, |
input wire Enable, |
output wire[SIZE-1:0] O |
); |
|
reg [SIZE-1:0] tmp; |
|
|
always @(posedge Clock) |
begin |
if (Reset) |
tmp <= Initial; |
else |
begin |
if (Enable) |
tmp <= tmp << 1; |
end |
end |
|
|
assign O = tmp; |
endmodule |
//------------------------------------------------ |
//------------------------------------------------ |
module CIRCULAR_SHIFTLEFT_POSEDGE # ( parameter SIZE=`WIDTH ) |
( input wire Clock, |
input wire Reset, |
input wire[SIZE-1:0] Initial, |
input wire Enable, |
output wire[SIZE-1:0] O |
); |
|
reg [SIZE-1:0] tmp; |
|
|
always @(posedge Clock) |
begin |
if (Reset || tmp[SIZE-1]) |
tmp <= Initial; |
else |
begin |
if (Enable) |
tmp <= tmp << 1; |
end |
end |
|
|
assign O = tmp; |
endmodule |
//----------------------------------------------------------- |
/* |
Sorry forgot how this flop is called. |
Any way Truth table is this |
|
Q S Q_next R |
0 0 0 0 |
0 1 1 0 |
1 0 1 0 |
1 1 1 0 |
X X 0 1 |
|
The idea is that it toggles from 0 to 1 when S = 1, but if it |
gets another S = 1, it keeps the output to 1. |
*/ |
module FFToggleOnce_1Bit |
( |
input wire Clock, |
input wire Reset, |
input wire Enable, |
input wire S, |
output reg Q |
|
); |
|
|
reg Q_next; |
|
always @ (negedge Clock) |
begin |
Q <= Q_next; |
end |
|
always @ ( posedge Clock ) |
begin |
if (Reset) |
Q_next <= 0; |
else if (Enable) |
Q_next <= (S && !Q) || Q; |
else |
Q_next <= Q; |
end |
endmodule |
|
//-------------------------------------------------------------- |
//************************OLD MODS***************************// |
//************************OLD MODS***************************// |
//************************OLD MODS***************************// |
//************************OLD MODS***************************// |
//----------------------------------------------------------- |
|
/* |
module UpCounterXXX_16 |
( |
input wire Clock, Reset, |
input wire [15:0] Initial, |
output reg [15:0] Q |
); |
|
|
always @(posedge Clock ) |
begin |
if (Reset) |
Q = Initial; |
else |
Q = Q + 1'b1; |
end |
|
endmodule |
*/ |
//----------------------------------------------------------- |
module UpCounter_16E |
( |
input wire Clock, |
input wire Reset, |
input wire [15:0] Initial, |
input wire Enable, |
output wire [15:0] Q |
); |
reg [15:0] Temp; |
|
|
always @(posedge Clock or posedge Reset) |
begin |
if (Reset) |
Temp = Initial; |
else |
if (Enable) |
Temp = Temp + 1'b1; |
end |
assign Q = Temp; |
|
endmodule |
//----------------------------------------------------------- |
module UpCounter_32 |
( |
input wire Clock, |
input wire Reset, |
input wire [31:0] Initial, |
input wire Enable, |
output wire [31:0] Q |
); |
reg [31:0] Temp; |
|
|
always @(posedge Clock or posedge Reset) |
begin |
if (Reset) |
begin |
Temp = Initial; |
end |
else |
begin |
if (Enable) |
begin |
Temp = Temp + 1'b1; |
end |
end |
end |
assign Q = Temp; |
|
endmodule |
//----------------------------------------------------------- |
module UpCounter_3 |
( |
input wire Clock, |
input wire Reset, |
input wire [2:0] Initial, |
input wire Enable, |
output wire [2:0] Q |
); |
reg [2:0] Temp; |
|
|
always @(posedge Clock or posedge Reset) |
begin |
if (Reset) |
Temp = Initial; |
else |
if (Enable) |
Temp = Temp + 3'b1; |
end |
assign Q = Temp; |
|
endmodule |
|
|
module FFD32_POSEDGE |
( |
input wire Clock, |
input wire[31:0] D, |
output reg[31:0] Q |
); |
|
always @ (posedge Clock) |
Q <= D; |
|
endmodule |
|
//------------------------------------------------ |
/* |
module FF_OPCODE_POSEDGE_SYNCRONOUS_RESET |
( |
input wire Clock, |
input wire Clear, |
input wire[`INSTRUCTION_OP_LENGTH-1:0] D, |
output reg[`INSTRUCTION_OP_LENGTH-1:0] Q |
); |
|
always @(posedge Clock or posedge Clear) |
begin |
if (Clear) |
Q = `INSTRUCTION_OP_LENGTH'b0; |
else |
Q = D; |
end |
endmodule |
//------------------------------------------------ |
|
module FF32_POSEDGE_SYNCRONOUS_RESET |
( |
input wire Clock, |
input wire Clear, |
input wire[31:0] D, |
output reg[31:0] Q |
); |
|
always @(posedge Clock or posedge Clear) |
begin |
if (Clear) |
Q = 32'b0; |
else |
Q = D; |
end |
endmodule |
//------------------------------------------------ |
|
module FF16_POSEDGE_SYNCRONOUS_RESET |
( |
input wire Clock, |
input wire Clear, |
input wire[15:0] D, |
output reg[15:0] Q |
); |
|
always @(posedge Clock or posedge Clear) |
begin |
if (Clear) |
Q = 16'b0; |
else |
Q = D; |
end |
endmodule |
*/ |
//------------------------------------------------ |
module MUXFULLPARALELL_96bits_2SEL |
( |
input wire Sel, |
input wire [95:0]I1, I2, |
output reg [95:0] O1 |
); |
|
|
|
always @( * ) |
|
begin |
|
case (Sel) |
|
1'b0: O1 = I1; |
1'b1: O1 = I2; |
|
endcase |
|
end |
|
endmodule |
//------------------------------------------------ |
|
module MUXFULLPARALELL_16bits_2SEL_X |
( |
input wire [1:0] Sel, |
input wire [15:0]I1, I2, I3, |
output reg [15:0] O1 |
); |
|
|
|
always @( * ) |
|
begin |
|
case (Sel) |
|
2'b00: O1 = I1; |
2'b01: O1 = I2; |
2'b10: O1 = I3; |
default: O1 = 16'b0; |
|
endcase |
|
end |
|
endmodule |
//------------------------------------------------ |
module MUXFULLPARALELL_16bits_2SEL |
( |
input wire Sel, |
input wire [15:0]I1, I2, |
output reg [15:0] O1 |
); |
|
|
|
always @( * ) |
|
begin |
|
case (Sel) |
|
1'b0: O1 = I1; |
1'b1: O1 = I2; |
|
endcase |
|
end |
|
endmodule |
|
|
//------------------------------------------------ |
/* |
module MUXFULLPARALELL_1Bit_1SEL |
( |
input wire Sel, |
input wire I1, I2, |
output reg O1 |
); |
|
|
|
always @( * ) |
|
begin |
|
case (Sel) |
|
1'b0: O1 = I1; |
1'b1: O1 = I2; |
|
endcase |
|
end |
|
endmodule |
*/ |
//-------------------------------------------------------------- |
/* |
module FFD_OPCODE_POSEDGE |
( |
input wire Clock, |
input wire[`INSTRUCTION_OP_LENGTH-1:0] D, |
output reg[`INSTRUCTION_OP_LENGTH-1:0] Q |
); |
|
always @ (posedge Clock) |
Q <= D; |
|
endmodule |
*/ |
//-------------------------------------------------------------- |
/* |
module FFD16_POSEDGE |
( |
input wire Clock, |
input wire[15:0] D, |
output reg[15:0] Q |
); |
|
always @ (posedge Clock) |
Q <= D; |
|
endmodule |
*/ |
//-------------------------------------------------------------- |
|
module FFT1 |
( |
input wire D, |
input wire Clock, |
input wire Reset , |
output reg Q |
); |
|
always @ ( posedge Clock or posedge Reset ) |
begin |
|
if (Reset) |
begin |
Q <= 1'b0; |
end |
else |
begin |
if (D) |
Q <= ! Q; |
end |
|
end//always |
|
endmodule |
//-------------------------------------------------------------- |
/Module_RadixRMul.v
0,0 → 1,337
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
////////////////////////////////////////////////////////////////////////////////// |
// Company: |
// Engineer: |
// |
// Create Date: 19:49:14 01/13/2009 |
// Design Name: |
// Module Name: RadixRMul |
// Project Name: |
// Target Devices: |
// Tool versions: |
// Description: |
// |
// Dependencies: |
// |
// Revision: |
// Revision 0.01 - File Created |
// Additional Comments: |
// |
////////////////////////////////////////////////////////////////////////////////// |
|
`default_nettype none |
|
|
//--------------------------------------------------- |
module MUX_4_TO_1_32Bits_FullParallel |
( |
input wire [31:0] i1,i2,i3,i4, |
output reg [31:0] O, |
input wire [1:0] Sel |
); |
|
always @ ( Sel or i1 or i2 or i3 or i4 ) |
begin |
case (Sel) |
2'b00: O = i1; |
2'b01: O = i2; |
2'b10: O = i3; |
2'b11: O = i4; |
endcase |
|
end |
|
endmodule |
//--------------------------------------------------- |
/* |
module SHIFTER2_16_BITS |
( |
input wire C, |
input wire[15:0] In, |
output reg[15:0] Out |
); |
|
reg [15:0] Temp; |
always @ (posedge C ) |
begin |
Out = In << 2; |
|
end |
|
endmodule |
*/ |
//--------------------------------------------------- |
module RADIX_R_MUL_32_FULL_PARALLEL |
( |
input wire Clock, |
input wire Reset, |
input wire[31:0] A, |
input wire[31:0] B, |
output wire[63:0] R, |
input wire iUnscaled, |
input wire iInputReady, |
output wire OutputReady |
|
|
); |
|
|
wire wInputDelay1; |
//------------------- |
wire [31:0] wALatched,wBLatched; |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) FFD1 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( iInputReady ), |
.D( A ), |
.Q( wALatched) |
); |
FFD_POSEDGE_SYNCRONOUS_RESET # ( `WIDTH ) FFD2 |
( |
.Clock( Clock ), |
.Reset( Reset), |
.Enable( iInputReady ), |
.D( B ), |
.Q( wBLatched ) |
); |
|
//------------------- |
|
|
FFD_POSEDGE_ASYNC_RESET #(1) FFOutputReadyDelay1 |
( |
.Clock( Clock ), |
.Clear( Reset ), |
.D( iInputReady ), |
.Q( wInputDelay1 ) |
); |
|
FFD_POSEDGE_ASYNC_RESET #(1) FFOutputReadyDelay2 |
( |
.Clock( Clock ), |
.Clear( Reset ), |
.D( wInputDelay1 ), |
.Q( OutputReady ) |
); |
|
wire [31:0] wA, w2A, w3A, wB; |
wire SignA,SignB; |
|
assign SignA = wALatched[31]; |
assign SignB = wBLatched[31]; |
|
|
assign wB = (SignB == 1) ? ~wBLatched + 1'b1 : wBLatched; |
assign wA = (SignA == 1) ? ~wALatched + 1'b1 : wALatched; |
|
assign w2A = wA << 1; |
assign w3A = w2A + wA; |
|
wire [31:0] wPartialResult0,wPartialResult1,wPartialResult2,wPartialResult3,wPartialResult4,wPartialResult5; |
wire [31:0] wPartialResult6,wPartialResult7,wPartialResult8,wPartialResult9,wPartialResult10,wPartialResult11; |
wire [31:0] wPartialResult12,wPartialResult13,wPartialResult14,wPartialResult15; |
|
MUX_4_TO_1_32Bits_FullParallel MUX0 |
( |
.i1( 32'b 0 ), |
.i2( wA ), |
.i3( w2A ), |
.i4( w3A ), |
.Sel( {wB[1],wB[0]} ), |
.O( wPartialResult0 ) |
); |
|
|
MUX_4_TO_1_32Bits_FullParallel MUX1 |
( |
.i1( 32'b 0 ), |
.i2( wA ), |
.i3( w2A ), |
.i4( w3A ), |
.Sel( {wB[3],wB[2]} ), |
.O( wPartialResult1 ) |
); |
|
MUX_4_TO_1_32Bits_FullParallel MUX2 |
( |
.i1( 32'b 0 ), |
.i2( wA ), |
.i3( w2A ), |
.i4( w3A ), |
.Sel( {wB[5],wB[4]} ), |
.O( wPartialResult2 ) |
); |
|
MUX_4_TO_1_32Bits_FullParallel MUX3 |
( |
.i1( 32'b 0 ), |
.i2( wA ), |
.i3( w2A ), |
.i4( w3A ), |
.Sel( {wB[7],wB[6]} ), |
.O( wPartialResult3 ) |
); |
|
MUX_4_TO_1_32Bits_FullParallel MUX4 |
( |
.i1( 32'b 0 ), |
.i2( wA ), |
.i3( w2A ), |
.i4( w3A ), |
.Sel( {wB[9],wB[8]} ), |
.O( wPartialResult4 ) |
); |
|
MUX_4_TO_1_32Bits_FullParallel MUX5 |
( |
.i1( 32'b 0 ), |
.i2( wA ), |
.i3( w2A ), |
.i4( w3A ), |
.Sel( {wB[11],wB[10]} ), |
.O( wPartialResult5 ) |
); |
|
MUX_4_TO_1_32Bits_FullParallel MUX6 |
( |
.i1( 32'b 0 ), |
.i2( wA ), |
.i3( w2A ), |
.i4( w3A ), |
.Sel( {wB[13],wB[12]} ), |
.O( wPartialResult6 ) |
); |
|
MUX_4_TO_1_32Bits_FullParallel MUX7 |
( |
.i1( 32'b 0 ), |
.i2( wA ), |
.i3( w2A ), |
.i4( w3A ), |
.Sel( {wB[15],wB[14]} ), |
.O( wPartialResult7 ) |
); |
|
MUX_4_TO_1_32Bits_FullParallel MUX8 |
( |
.i1( 32'b 0 ), |
.i2( wA ), |
.i3( w2A ), |
.i4( w3A ), |
.Sel( {wB[17],wB[16]} ), |
.O( wPartialResult8 ) |
); |
|
MUX_4_TO_1_32Bits_FullParallel MUX9 |
( |
.i1( 32'b 0 ), |
.i2( wA ), |
.i3( w2A ), |
.i4( w3A ), |
.Sel( {wB[19],wB[18]} ), |
.O( wPartialResult9 ) |
); |
|
MUX_4_TO_1_32Bits_FullParallel MUX10 |
( |
.i1( 32'b 0 ), |
.i2( wA ), |
.i3( w2A ), |
.i4( w3A ), |
.Sel( {wB[21],wB[20]} ), |
.O( wPartialResult10 ) |
); |
|
MUX_4_TO_1_32Bits_FullParallel MUX11 |
( |
.i1( 32'b 0 ), |
.i2( wA ), |
.i3( w2A ), |
.i4( w3A ), |
.Sel( {wB[23],wB[22]} ), |
.O( wPartialResult11 ) |
); |
|
MUX_4_TO_1_32Bits_FullParallel MUX12 |
( |
.i1( 32'b 0 ), |
.i2( wA ), |
.i3( w2A ), |
.i4( w3A ), |
.Sel( {wB[25],wB[24]} ), |
.O( wPartialResult12 ) |
); |
|
MUX_4_TO_1_32Bits_FullParallel MUX13 |
( |
.i1( 32'b 0 ), |
.i2( wA ), |
.i3( w2A ), |
.i4( w3A ), |
.Sel( {wB[27],wB[26]} ), |
.O( wPartialResult13 ) |
); |
|
MUX_4_TO_1_32Bits_FullParallel MUX14 |
( |
.i1( 32'b 0 ), |
.i2( wA ), |
.i3( w2A ), |
.i4( w3A ), |
.Sel( {wB[29],wB[28]} ), |
.O( wPartialResult14 ) |
); |
|
MUX_4_TO_1_32Bits_FullParallel MUX15 |
( |
.i1( 32'b 0 ), |
.i2( wA ), |
.i3( w2A ), |
.i4( w3A ), |
.Sel( {wB[31],wB[30]} ), |
.O( wPartialResult15 ) |
); |
|
|
|
wire[63:0] wPartialResult1_0,wPartialResult1_1,wPartialResult1_2,wPartialResult1_3, |
wPartialResult1_4,wPartialResult1_5,wPartialResult1_6,wPartialResult1_7; |
|
|
assign wPartialResult1_0 = (wPartialResult0) + (wPartialResult1<<2); |
assign wPartialResult1_1 = (wPartialResult2 << 4) + (wPartialResult3<<6); |
assign wPartialResult1_2 = (wPartialResult4 << 8) + (wPartialResult5<<10); |
assign wPartialResult1_3 = (wPartialResult6 << 12)+ (wPartialResult7<<14); |
assign wPartialResult1_4 = (wPartialResult8 << 16)+ (wPartialResult9<<18); |
assign wPartialResult1_5 = (wPartialResult10 << 20) + (wPartialResult11<< 22); |
assign wPartialResult1_6 = (wPartialResult12 << 24) + (wPartialResult13 << 26); |
assign wPartialResult1_7 = (wPartialResult14 << 28) + (wPartialResult15 << 30); |
|
|
|
|
wire [63:0] wPartialResult2_0,wPartialResult2_1,wPartialResult2_2,wPartialResult2_3; |
|
assign wPartialResult2_0 = wPartialResult1_0 + wPartialResult1_1; |
assign wPartialResult2_1 = wPartialResult1_2 + wPartialResult1_3; |
assign wPartialResult2_2 = wPartialResult1_4 + wPartialResult1_5; |
assign wPartialResult2_3 = wPartialResult1_6 + wPartialResult1_7; |
|
wire [63:0] wPartialResult3_0,wPartialResult3_1; |
|
assign wPartialResult3_0 = wPartialResult2_0 + wPartialResult2_1; |
assign wPartialResult3_1 = wPartialResult2_2 + wPartialResult2_3; |
|
wire [63:0] R_pre1,R_pre2; |
|
//assign R_pre1 = (wPartialResult3_0 + wPartialResult3_1); |
assign R_pre1 = (iUnscaled == 1) ? (wPartialResult3_0 + wPartialResult3_1) : ((wPartialResult3_0 + wPartialResult3_1) >> `SCALE); |
|
assign R_pre2 = ( (SignA ^ SignB) == 1) ? ~R_pre1 + 1'b1 : R_pre1; |
|
//assign R = R_pre2 >> `SCALE; |
assign R = R_pre2; |
|
endmodule |
/Module_FixedPointAddtionSubstraction.v
0,0 → 1,67
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
|
|
//----------------------------------------------------------- |
module INCREMENT # ( parameter SIZE=`WIDTH ) |
( |
input wire Clock, |
input wire Reset, |
input wire[SIZE-1:0] A, |
output reg [SIZE-1:0] R |
); |
always @ (posedge Clock) |
begin |
R = A + 1; |
end |
|
|
endmodule |
//----------------------------------------------------------- |
module FixedAddSub |
( |
input wire Clock, |
input wire Reset, |
input wire[`LONG_WIDTH-1:0] A, |
input wire[`LONG_WIDTH-1:0] B, |
output reg[`LONG_WIDTH-1:0] R, |
input wire iOperation, |
input wire iInputReady, //Is the input data valid? |
output wire OutputReady //Our output data is ready! |
); |
|
reg MyOutputReady = 0; |
|
wire [`LONG_WIDTH-1:0] wB; |
|
assign wB = ( iOperation ) ? ~B + 1'b1 : B; |
|
//Output ready just take 1 cycle |
//assign OutputReady = iInputReady; |
|
FFD_POSEDGE_ASYNC_RESET #(1) FFOutputReadyDelay2 |
( |
.Clock( Clock ), |
.Clear( Reset ), |
.D( iInputReady ), |
.Q( OutputReady ) |
); |
|
|
//------------------------------- |
always @ (posedge Clock) |
begin |
|
if (iInputReady == 1) |
begin |
R = ( A + wB ); |
end |
else |
begin |
R = 64'hFFFFFFFF; |
|
end |
|
end // always |
|
endmodule |
/Module_ArithmeticComparison.v
0,0 → 1,64
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
|
//------------------------------------------------------------------ |
module ArithmeticComparison |
( |
input wire Clock, |
input wire[`WIDTH-1:0] X,Y, |
input wire[2:0] iOperation, |
input wire iInputReady, |
output reg OutputReady, |
output reg Result |
); |
|
|
wire [`WIDTH-1:0] wX,wY; |
wire SignX,SignY; |
reg rGreaterThan; |
wire wUGt,wULT,wEQ; |
|
assign SignX = (X == 0) ? 0: X[31]; |
assign SignY = (Y == 0) ? 0: Y[31]; |
|
assign wX = ( SignX ) ? ~X + 1'b1 : X; |
assign wY = ( SignY ) ? ~Y + 1'b1 : Y; |
|
assign wUGt = wX > wY; |
assign wULT = wX < wY; |
assign wEQ = wX == wY; |
|
always @ ( * ) |
begin |
case ( {SignX,SignY} ) |
//Greater than test ( X > Y ) |
2'b00: rGreaterThan = wUGt; //both numbers positive |
2'b01: rGreaterThan = 1; //X positive, y negative |
2'b10: rGreaterThan = 0; //X negative, y positive |
2'b11: rGreaterThan = wULT; //X negative, y negative |
endcase |
end |
|
always @ ( posedge Clock ) |
begin |
|
if (iInputReady) |
begin |
case ( iOperation ) |
3'b000: Result = rGreaterThan; //X > Y |
3'b001: Result = ~rGreaterThan; //X < Y |
3'b010: Result = wEQ; //X == Y |
3'b011: Result = ~wEQ; //X != Y |
3'b100: Result = rGreaterThan || wEQ; // X >= Y |
3'b101: Result = ~rGreaterThan || wEQ; // X <= Y |
default: Result = 0; |
endcase |
OutputReady = 1; |
end |
else |
OutputReady = 0; |
end |
|
|
endmodule |
//--------------------------------------------- |
/Module_Swizzle.v
0,0 → 1,53
`timescale 1ns / 1ps |
`include "aDefinitions.v" |
//--------------------------------------------------------------------------- |
module Swizzle3D |
( |
input wire [`WIDTH-1:0] Source0_X, |
input wire [`WIDTH-1:0] Source0_Y, |
input wire [`WIDTH-1:0] Source0_Z, |
input wire [`WIDTH-1:0] iOperation, |
|
output reg [`WIDTH-1:0] SwizzleX, |
output reg [`WIDTH-1:0] SwizzleY, |
output reg [`WIDTH-1:0] SwizzleZ |
|
); |
|
//wire [31:0] SwizzleX,SwizzleY,SwizzleZ; |
//----------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`SWIZZLE_XXX: SwizzleX = Source0_X; |
`SWIZZLE_YYY: SwizzleX = Source0_Y; |
`SWIZZLE_ZZZ: SwizzleX = Source0_Z; |
`SWIZZLE_YXZ: SwizzleX = Source0_Y; |
default: SwizzleX = `DATA_ROW_WIDTH'd0; |
endcase |
end |
//----------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`SWIZZLE_XXX: SwizzleY = Source0_X; |
`SWIZZLE_YYY: SwizzleY = Source0_Y; |
`SWIZZLE_ZZZ: SwizzleY = Source0_Z; |
`SWIZZLE_YXZ: SwizzleY = Source0_X; |
default: SwizzleY = `DATA_ROW_WIDTH'd0; |
endcase |
end |
//----------------------------------------------------- |
always @ ( * ) |
begin |
case (iOperation) |
`SWIZZLE_XXX: SwizzleZ = Source0_X; |
`SWIZZLE_YYY: SwizzleZ = Source0_Y; |
`SWIZZLE_ZZZ: SwizzleZ = Source0_Z; |
`SWIZZLE_YXZ: SwizzleZ = Source0_Z; |
default: SwizzleZ = `DATA_ROW_WIDTH'd0; |
endcase |
end |
//----------------------------------------------------- |
endmodule |
//--------------------------------------------------------------------------- |