1 |
152 |
diegovalve |
|
2 |
|
|
|
3 |
|
|
/**********************************************************************************
|
4 |
|
|
Theia, Ray Cast Programable graphic Processing Unit.
|
5 |
|
|
Copyright (C) 2010 Diego Valverde (diego.valverde.g@gmail.com)
|
6 |
|
|
|
7 |
|
|
This program is free software; you can redistribute it and/or
|
8 |
|
|
modify it under the terms of the GNU General Public License
|
9 |
|
|
as published by the Free Software Foundation; either version 2
|
10 |
|
|
of the License, or (at your option) any later version.
|
11 |
|
|
|
12 |
|
|
This program is distributed in the hope that it will be useful,
|
13 |
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14 |
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15 |
|
|
GNU General Public License for more details.
|
16 |
|
|
|
17 |
|
|
You should have received a copy of the GNU General Public License
|
18 |
|
|
along with this program; if not, write to the Free Software
|
19 |
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
20 |
|
|
|
21 |
|
|
***********************************************************************************/
|
22 |
|
|
|
23 |
|
|
|
24 |
|
|
/*******************************************************************************
|
25 |
|
|
Module Description:
|
26 |
|
|
|
27 |
|
|
This is the Main test bench of the GPU. It simulates the behavior of
|
28 |
|
|
an external control unit or CPU that sends configuration information into DUT.
|
29 |
|
|
It also implements a second processs that simulates a Wishbone slave that sends
|
30 |
|
|
data from an external memory. These blocks are just behavioral CTE and therefore
|
31 |
|
|
are not meant to be synthethized.
|
32 |
|
|
|
33 |
|
|
*******************************************************************************/
|
34 |
|
|
|
35 |
|
|
|
36 |
|
|
|
37 |
|
|
`timescale 1ns / 1ps
|
38 |
|
|
`include "aDefinitions.v"
|
39 |
|
|
`define RESOLUTION_WIDTH (rSceneParameters[13] >> `SCALE)
|
40 |
|
|
`define RESOLUTION_HEIGHT (rSceneParameters[14] >> `SCALE)
|
41 |
|
|
`define DELTA_ROW (32'h1 << `SCALE)
|
42 |
|
|
`define DELTA_COL (32'h1 << `SCALE)
|
43 |
|
|
`define TEXTURE_BUFFER_SIZE (256*256*3)
|
44 |
|
|
`define MAX_WIDTH 200
|
45 |
|
|
`define MAX_SCREENBUFFER (`MAX_WIDTH*`MAX_WIDTH*3)
|
46 |
|
|
module TestBench_Theia;
|
47 |
|
|
|
48 |
|
|
|
49 |
|
|
//------------------------------------------------------------------------
|
50 |
|
|
//**WARNING: Declare all of your varaibles at the begining
|
51 |
|
|
//of the file. I hve noticed that sometimes the verilog
|
52 |
|
|
//simulator allows you to use some regs even if they have not been
|
53 |
|
|
//previously declared, leadeing to crahses or unexpected behavior
|
54 |
|
|
// Inputs
|
55 |
|
|
reg Clock;
|
56 |
|
|
reg Reset;
|
57 |
|
|
wire [`WB_WIDTH-1:0] DAT_O;
|
58 |
|
|
reg ACK_O;
|
59 |
|
|
wire ACK_I;
|
60 |
|
|
wire [`WB_WIDTH-1:0] ADR_I,ADR_O;
|
61 |
|
|
wire WE_I,STB_I;
|
62 |
|
|
wire CYC_O,WE_O,TGC_O,STB_O;
|
63 |
|
|
wire [1:0] TGA_O;
|
64 |
|
|
wire [1:0] TGA_I;
|
65 |
|
|
reg [`WB_WIDTH-1:0] TMADR_O,TMDAT_O;
|
66 |
|
|
reg [`MAX_TMEM_BANKS-1:0] TMSEL_O;
|
67 |
|
|
reg TMWE_O;
|
68 |
|
|
reg [31:0] rControlRegister[2:0];
|
69 |
|
|
integer file, log;
|
70 |
|
|
reg [31:0] rSceneParameters[120:0];
|
71 |
|
|
reg [31:0] rVertexBuffer[7000:0];
|
72 |
|
|
reg [31:0] rInstructionBuffer[512:0];
|
73 |
|
|
reg [31:0] rTextures[`TEXTURE_BUFFER_SIZE:0]; //Lets asume we use 256*256 textures
|
74 |
|
|
reg [7:0] rScreen[`MAX_SCREENBUFFER-1:0];
|
75 |
|
|
wire [`MAX_CORES-1:0] wCoreSelect;
|
76 |
|
|
wire [3:0] CYC_I,GNT_O;
|
77 |
|
|
wire MST_O;
|
78 |
|
|
wire wDone;
|
79 |
|
|
wire [`MAX_CORES-1:0] RENDREN_O;
|
80 |
|
|
reg [`MAX_CORE_BITS-1:0] wOMEMBankSelect;
|
81 |
|
|
reg [`WB_WIDTH-1:0] wOMEMReadAddr; //Output adress (relative to current bank)
|
82 |
|
|
wire [`WB_WIDTH-1:0] wOMEMData; //Output data bus (Wishbone)
|
83 |
|
|
reg rHostEnable;
|
84 |
|
|
integer k,out2;
|
85 |
|
|
wire GRDY_I;
|
86 |
|
|
wire GACK_O;
|
87 |
|
|
wire STDONE_O;
|
88 |
|
|
wire wGPUCommitedResults;
|
89 |
|
|
wire wHostDataAvailable;
|
90 |
|
|
|
91 |
|
|
|
92 |
|
|
THEIA GPU
|
93 |
|
|
(
|
94 |
|
|
.CLK_I( Clock ),
|
95 |
|
|
.RST_I( Reset ),
|
96 |
|
|
.RENDREN_I( RENDREN_O ),
|
97 |
|
|
.DAT_I( DAT_O ),
|
98 |
|
|
.ACK_I( ACK_O ),
|
99 |
|
|
|
100 |
|
|
.CYC_I( CYC_O ),
|
101 |
|
|
.MST_I( MST_O ),
|
102 |
|
|
.TGA_I( TGA_O ),
|
103 |
|
|
.ACK_O( ACK_I ),
|
104 |
|
|
.ADR_I( ADR_O ),
|
105 |
|
|
.WE_I( WE_O ),
|
106 |
|
|
.SEL_I( wCoreSelect ),
|
107 |
|
|
.STB_I( STB_O ),
|
108 |
|
|
|
109 |
|
|
//Output memory
|
110 |
|
|
.OMBSEL_I( wOMEMBankSelect ),
|
111 |
|
|
.OMADR_I( wOMEMReadAddr ),
|
112 |
|
|
.OMEM_O( wOMEMData ),
|
113 |
|
|
.TMDAT_I( TMDAT_O ),
|
114 |
|
|
.TMADR_I( TMADR_O ),
|
115 |
|
|
.TMWE_I( TMWE_O ),
|
116 |
|
|
.TMSEL_I( TMSEL_O ),
|
117 |
|
|
|
118 |
|
|
.HDL_O( GRDY_I ),
|
119 |
|
|
.HDLACK_I( GACK_O ),
|
120 |
|
|
.STDONE_I( STDONE_O ),
|
121 |
|
|
.RCOMMIT_O( wGPUCommitedResults ),
|
122 |
|
|
.HDA_I( wHostDataAvailable ),
|
123 |
|
|
|
124 |
|
|
//Control register
|
125 |
|
|
.CREG_I( rControlRegister[0][15:0] ),
|
126 |
|
|
//Other stuff
|
127 |
|
|
.DONE_O( wDone )
|
128 |
|
|
|
129 |
|
|
);
|
130 |
|
|
|
131 |
|
|
wire[`WB_WIDTH-1:0] wHostReadAddress;
|
132 |
|
|
wire[`WB_WIDTH-1:0] wHostReadData;
|
133 |
|
|
wire[`WB_WIDTH-1:0] wMemorySize;
|
134 |
|
|
wire[1:0] wMemSelect;
|
135 |
|
|
|
136 |
|
|
MUXFULLPARALELL_2SEL_GENERIC # ( `WB_WIDTH ) MUX1
|
137 |
|
|
(
|
138 |
|
|
.Sel( wMemSelect ),
|
139 |
|
|
.I1( rInstructionBuffer[wHostReadAddress] ),
|
140 |
|
|
.I2( rSceneParameters[wHostReadAddress] ),
|
141 |
|
|
.I3( rVertexBuffer[wHostReadAddress] ),
|
142 |
|
|
.I4(0),
|
143 |
|
|
.O1(wHostReadData)
|
144 |
|
|
);
|
145 |
|
|
|
146 |
|
|
MUXFULLPARALELL_2SEL_GENERIC # ( `WB_WIDTH ) MUX2
|
147 |
|
|
(
|
148 |
|
|
.Sel( wMemSelect ),
|
149 |
|
|
.I1( rInstructionBuffer[0] ),
|
150 |
|
|
.I2( rSceneParameters[0] ),
|
151 |
|
|
.I3( rVertexBuffer[0] ),
|
152 |
|
|
.I4(0),
|
153 |
|
|
.O1(wMemorySize)
|
154 |
|
|
);
|
155 |
|
|
|
156 |
|
|
Module_Host HOST
|
157 |
|
|
(
|
158 |
|
|
.Clock( Clock ),
|
159 |
|
|
.Reset( Reset ),
|
160 |
|
|
.iEnable( rHostEnable ),
|
161 |
|
|
.oHostDataAvailable( wHostDataAvailable ),
|
162 |
|
|
.iHostDataReadConfirmed( GRDY_I ),
|
163 |
|
|
.iMemorySize( wMemorySize ),
|
164 |
|
|
.iPrimitiveCount( (rVertexBuffer[6]+1) *7 ), //This is wrong I think
|
165 |
|
|
.iGPUCommitedResults( wGPUCommitedResults ),
|
166 |
|
|
.STDONE_O( STDONE_O ),
|
167 |
|
|
.iGPUDone( wDone ),
|
168 |
|
|
|
169 |
|
|
`ifndef NO_DISPLAY_STATS
|
170 |
|
|
.iDebugWidth( `RESOLUTION_WIDTH ),
|
171 |
|
|
`endif
|
172 |
|
|
|
173 |
|
|
//To Memory
|
174 |
|
|
.oReadAddress( wHostReadAddress ),
|
175 |
|
|
.iReadData( wHostReadData ),
|
176 |
|
|
|
177 |
|
|
//To Hub/Switch
|
178 |
|
|
.oCoreSelectMask( wCoreSelect ),
|
179 |
|
|
.oMemSelect( wMemSelect ),
|
180 |
|
|
.DAT_O( DAT_O),
|
181 |
|
|
.ADR_O( ADR_O ),
|
182 |
|
|
.TGA_O( TGA_O ),
|
183 |
|
|
.RENDREN_O( RENDREN_O ),
|
184 |
|
|
.CYC_O( CYC_O ),
|
185 |
|
|
.STB_O( STB_O ),
|
186 |
|
|
.MST_O( MST_O ),
|
187 |
|
|
|
188 |
|
|
.GRDY_I( GRDY_I ),
|
189 |
|
|
.GACK_O( GACK_O ),
|
190 |
|
|
|
191 |
|
|
.WE_O( WE_O ),
|
192 |
|
|
|
193 |
|
|
|
194 |
|
|
.ACK_I( ACK_I )
|
195 |
|
|
);
|
196 |
|
|
//---------------------------------------------
|
197 |
|
|
//generate the clock signal here
|
198 |
|
|
always begin
|
199 |
|
|
#`CLOCK_CYCLE Clock = ! Clock;
|
200 |
|
|
|
201 |
|
|
end
|
202 |
|
|
//---------------------------------------------
|
203 |
|
|
|
204 |
|
|
|
205 |
|
|
//-------------------------------------------------------------------------------------
|
206 |
|
|
/*
|
207 |
|
|
This makes sure the simulation actually writes the results to the PPM image file
|
208 |
|
|
once all the cores are done executing
|
209 |
|
|
*/
|
210 |
|
|
`define PARTITION_SIZE `RESOLUTION_HEIGHT/`MAX_CORES
|
211 |
|
|
integer i,j,kk;
|
212 |
|
|
reg [31:0] R;
|
213 |
|
|
always @ ( * )
|
214 |
|
|
begin
|
215 |
|
|
|
216 |
|
|
|
217 |
|
|
if (wDone == 1'b1)
|
218 |
|
|
begin
|
219 |
|
|
|
220 |
|
|
$display("Partition Size = %d",`PARTITION_SIZE);
|
221 |
|
|
for (kk = 0; kk < `MAX_CORES; kk = kk+1)
|
222 |
|
|
begin
|
223 |
|
|
wOMEMBankSelect = kk;
|
224 |
|
|
$display("wOMEMBankSelect = %d\n",wOMEMBankSelect);
|
225 |
|
|
for (j=0; j < `PARTITION_SIZE; j=j+1)
|
226 |
|
|
begin
|
227 |
|
|
|
228 |
|
|
for (i = 0; i < `RESOLUTION_HEIGHT*3; i = i +1)
|
229 |
|
|
begin
|
230 |
|
|
wOMEMReadAddr = i+j*`RESOLUTION_WIDTH*3;
|
231 |
|
|
#`CLOCK_PERIOD;
|
232 |
|
|
#1;
|
233 |
|
|
R = ((wOMEMData >> (`SCALE-8)) > 255) ? 255 : (wOMEMData >> (`SCALE-8));
|
234 |
|
|
$fwrite(out2,"%d " , R );
|
235 |
|
|
|
236 |
|
|
if ((i %3) == 0)
|
237 |
|
|
$fwrite(out2,"\n# %d %d\n",i/3,j);
|
238 |
|
|
|
239 |
|
|
end
|
240 |
|
|
end
|
241 |
|
|
end
|
242 |
|
|
|
243 |
|
|
|
244 |
|
|
|
245 |
|
|
$fclose(out2);
|
246 |
|
|
$fwrite(log, "Simulation end time : %dns\n",$time);
|
247 |
|
|
$fclose(log);
|
248 |
|
|
|
249 |
|
|
|
250 |
|
|
$stop();
|
251 |
|
|
|
252 |
|
|
|
253 |
|
|
end
|
254 |
|
|
end
|
255 |
|
|
//-------------------------------------------------------------------------------------
|
256 |
|
|
|
257 |
|
|
reg [15:0] rTimeOut;
|
258 |
|
|
|
259 |
|
|
// `define MAX_INSTRUCTIONS 2
|
260 |
|
|
|
261 |
|
|
initial begin
|
262 |
|
|
// Initialize Inputs
|
263 |
|
|
|
264 |
|
|
|
265 |
|
|
Clock = 0;
|
266 |
|
|
Reset = 0;
|
267 |
|
|
rTimeOut = 0;
|
268 |
|
|
rHostEnable = 0;
|
269 |
|
|
//Read Config register values
|
270 |
|
|
$write("Loading control register.... ");
|
271 |
|
|
$readmemh("Creg.mem",rControlRegister);
|
272 |
|
|
$display("Done");
|
273 |
|
|
|
274 |
|
|
|
275 |
|
|
|
276 |
|
|
//Read configuration Data
|
277 |
|
|
$write("Loading scene parameters.... ");
|
278 |
|
|
$readmemh("Params.mem", rSceneParameters );
|
279 |
|
|
$display("Done");
|
280 |
|
|
|
281 |
|
|
|
282 |
|
|
//Read Scene Data
|
283 |
|
|
$write("Loading scene geometry.... ");
|
284 |
|
|
$readmemh("Vertex.mem",rVertexBuffer);
|
285 |
|
|
$display("Done");
|
286 |
|
|
|
287 |
|
|
$display("Number of primitives(%d): %d",rVertexBuffer[6],(rVertexBuffer[6]+1) *7);
|
288 |
|
|
|
289 |
|
|
|
290 |
|
|
//Read Texture Data
|
291 |
|
|
$write("Loading scene texture.... ");
|
292 |
|
|
$readmemh("Textures.mem",rTextures);
|
293 |
|
|
$display("Done");
|
294 |
|
|
|
295 |
|
|
|
296 |
|
|
//Read instruction data
|
297 |
|
|
$write("Loading code allocation table and user shaders.... ");
|
298 |
|
|
$readmemh("Instructions.mem",rInstructionBuffer);
|
299 |
|
|
$display("Done");
|
300 |
|
|
|
301 |
|
|
$display("Control Register : %b",rControlRegister[0]);
|
302 |
|
|
$display("Resolution : %d X %d",`RESOLUTION_WIDTH, `RESOLUTION_HEIGHT );
|
303 |
|
|
|
304 |
|
|
|
305 |
|
|
log = $fopen("Simulation.log");
|
306 |
|
|
$fwrite(log, "Simulation start time : %dns\n",$time);
|
307 |
|
|
$fwrite(log, "Width : %d\n",`RESOLUTION_WIDTH);
|
308 |
|
|
$fwrite(log, "Height : %d\n",`RESOLUTION_HEIGHT);
|
309 |
|
|
|
310 |
|
|
|
311 |
|
|
//Open output file
|
312 |
|
|
out2 = $fopen("Output.ppm");
|
313 |
|
|
|
314 |
|
|
$fwrite(out2,"P3\n");
|
315 |
|
|
$fwrite(out2,"#This file was generated by Theia's RTL simulation\n");
|
316 |
|
|
$fwrite(out2,"%d %d\n",`RESOLUTION_WIDTH, `RESOLUTION_HEIGHT );
|
317 |
|
|
$fwrite(out2,"255\n");
|
318 |
|
|
|
319 |
|
|
#10
|
320 |
|
|
Reset = 1;
|
321 |
|
|
|
322 |
|
|
|
323 |
|
|
// Wait 100 ns for global reset to finish
|
324 |
|
|
TMWE_O = 1;
|
325 |
|
|
#100 Reset = 0;
|
326 |
|
|
TMWE_O = 1;
|
327 |
|
|
|
328 |
|
|
$display("Intilializing TMEM @ %dns",$time);
|
329 |
|
|
//starts in 2 to skip Width and Height
|
330 |
|
|
for (k = 0;k < `TEXTURE_BUFFER_SIZE; k = k + 1)
|
331 |
|
|
begin
|
332 |
|
|
|
333 |
|
|
TMADR_O <= (k >> (`MAX_CORE_BITS));
|
334 |
|
|
TMSEL_O <= (k & (`MAX_TMEM_BANKS-1)); //X mod 2^n == X & (2^n - 1)
|
335 |
|
|
TMDAT_O <= rTextures[k];
|
336 |
|
|
#10;
|
337 |
|
|
end
|
338 |
|
|
$display("Done Intilializing TMEM @ %dns",$time);
|
339 |
|
|
TMWE_O = 0;
|
340 |
|
|
rHostEnable = 1;
|
341 |
|
|
|
342 |
|
|
end
|
343 |
|
|
|
344 |
|
|
|
345 |
|
|
endmodule
|