OpenCores
URL https://opencores.org/ocsvn/eco32/eco32/trunk

Subversion Repositories eco32

[/] [eco32/] [trunk/] [fp/] [implementation/] [mmix/] [mmix-config.w] - Blame information for rev 15

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 15 hellwig
% This file is part of the MMIXware package (c) Donald E Knuth 1999
2
@i boilerplate.w %<< legal stuff: PLEASE READ IT BEFORE MAKING ANY CHANGES!
3
 
4
\def\title{MMIX-CONFIG}
5
\def\MMIX{\.{MMIX}}
6
\def\Hex#1{\hbox{$^{\scriptscriptstyle\#}$\tt#1}} % experimental hex constant
7
@s bool int
8
@s cache int
9
@s func int
10
@s coroutine int
11
@s octa int
12
@s cacheset int
13
@s cacheblock int
14
@s fetch int
15
@s control int
16
@s write_node int
17
@s internal_opcode int
18
@s replace_policy int
19
@s PV TeX
20
@s mmix_opcode int
21
@s specnode int
22
\def\PV{\\{PV}} % use italics, not \tt
23
@s CPV TeX
24
\def\CPV{\\{CPV}}
25
@s OP TeX
26
\def\OP{\\{OP}}
27
@s and normal @q unreserve a C++ keyword @>
28
@s or normal @q unreserve a C++ keyword @>
29
@s xor normal @q unreserve a C++ keyword @>
30
 
31
@*Input format. Configuration files allow this simulator to adapt itself to
32
infinitely many possible combinations of hardware features. The purpose of the
33
present module is to read a configuration file, check it for validity, and
34
set up the relevant data structures.
35
 
36
All data in a configuration file consists simply of {\it tokens\/} separated
37
by one or more units of white space, where a ``token'' is any sequence of
38
nonspace characters that doesn't contain a percent sign. Percent signs
39
and anything following them on a line are ignored; this convention allows
40
a user to include comments in the file. Here's a simple (but weird) example:
41
$$\vbox{\halign{\tt#\hfil\cr
42
\% Silly configuration\cr
43
writebuffer 200\cr
44
memaddresstime 100\cr
45
Dcache associativity 4 lru\cr
46
Dcache blocksize 1024\cr
47
unit ODD 5555555555555555555555555555555555555555555555555555555555555555\cr
48
unit EVEN aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\cr
49
div 40 30 20\ \ \% three-stage divide\cr
50
}}$$
51
It means that (1) the write buffer has capacity for 200 octabytes;
52
(2)~the memory bus takes 100 cycles to process an address;
53
(3)~there's a D-cache, in which each set has 4 blocks and the replacement
54
policy is least-recently-used;
55
(4)~each block in the D-cache has 1024 bytes;
56
(5)~there are two functional units, one for all the odd-numbered opcodes
57
and one for all the rest;
58
(6)~the division instructions take three pipeline stages, spending 40 cycles
59
in the first stage, 30~in the second, and 20 in the last;
60
(7)~all other parameters have default values.
61
 
62
@ Four kinds of specifications can appear in a configuration file,
63
according to the following syntax:
64
\def\<#1>{\hbox{$\langle\,$#1$\,\rangle$}}\let\is=\longrightarrow
65
$$\vbox{\halign{$#$\hfil\cr
66
\\is\\mid\\mid\\mid
67
  \\cr
68
\\is\\\cr
69
\\is\\\\\cr
70
\\is\\\cr
71
\\is\.{unit}\ \\<64 hexadecimal digits>\cr}}$$
72
 
73
@ A \ simply assigns a given value to a given parameter. The
74
possibilities for \ are as follows:
75
 
76
\def\bull#1 {\smallskip\hang\textindent{$\bullet$}\.{#1}\enspace}
77
\bull fetchbuffer (default 4), maximum instructions in the fetch buffer;
78
must be $\ge1$.
79
 
80
\bull writebuffer (default 2), maximum octabytes in the write buffer;
81
must be $\ge1$.
82
 
83
\bull reorderbuffer (default 5), maximum instructions issued but not
84
committed; must be $\ge1$.
85
 
86
\bull renameregs (default 5), maximum partial results in the reorder
87
buffer; must be $\ge1$.
88
 
89
\bull memslots (default 2), maximum store instructions in the reorder
90
buffer; must be $\ge1$.
91
 
92
\bull localregs (default 256), number of local registers in ring;
93
must be 256, 512, or 1024.
94
 
95
\bull fetchmax (default 2), maximum instructions fetched per cycle;
96
must be $\ge1$.
97
 
98
\bull dispatchmax (default 1), maximum instructions issued per cycle;
99
must be $\ge1$.
100
 
101
\bull peekahead (default 1), maximum lookahead for jumps per cycle.
102
 
103
\bull commitmax (default 1), maximum instructions committed per cycle;
104
must be $\ge1$.
105
 
106
\bull fremmax (default 1), maximum reductions in \.{FREM} computation per
107
cycle; must be $\ge1$.
108
 
109
\bull denin (default 1), extra cycles taken if a floating point input
110
is subnormal.
111
 
112
\bull denout (default 1), extra cycles taken if a floating point result
113
is subnormal.
114
 
115
\bull writeholdingtime (default 0), minimum number of cycles for data to
116
remain in the write buffer.
117
 
118
\bull memaddresstime (default 20), cycles to process memory address;
119
must be $\ge1$.
120
 
121
\bull memreadtime (default 20), cycles to read one memory busload;
122
must be $\ge1$.
123
 
124
\bull memwritetime (default 20), cycles to write one memory busload;
125
must be $\ge1$.
126
 
127
\bull membusbytes (default 8), number of bytes per memory busload; must be a
128
power of~2 that is 8~or~more.
129
 
130
\bull branchpredictbits (default 0), number of bits in each branch prediction
131
table entry; must be $\le8$.
132
 
133
\bull branchaddressbits (default 0), number of bits in instruction address
134
used to index the branch prediction table.
135
 
136
\bull branchhistorybits (default 0), number of bits in branch history used to
137
index the branch prediction table.
138
 
139
\bull branchdualbits (default 0), number of bits of
140
instruction-address-xor-branch-history used to index the branch prediction
141
table.
142
 
143
\bull hardwarepagetable (default 1), is zero if page table calculations
144
must be emulated by the operating system.
145
 
146
\bull disablesecurity (default 0), is 1 if the hot-seat security checks
147
are turned off. This option is used only for testing purposes; it means
148
that the `\.s' interrupt will not occur, and the `\.p' interrupt will
149
be signaled only when going from a nonnegative location to a negative one.
150
 
151
\bull memchunksmax (default 1000), maximum number of $2^{16}$-byte chunks of
152
simulated memory; must be $\ge1$.
153
 
154
\bull hashprime (default 2003), prime number used to address simulated memory;
155
must exceed \.{memchunksmax}, preferably by a factor of about~2.
156
 
157
\smallskip\noindent
158
The values of \.{memchunksmax} and \.{hashprime} affect only the speed of the
159
simulator, not its results---unless a very huge program is being simulated.
160
The stated defaults for \.{memchunksmax} and \.{hashprime}
161
should be adequate for almost all applications.
162
 
163
@ A \ assigns a given value to a parameter affecting one of five
164
possible caches:
165
$$\vbox{\halign{$#$\hfil\cr
166
\\is\\\\\cr
167
\\is\.{ITcache}\mid\.{DTcache}\mid\.{Icache}\mid\.{Dcache}
168
  \mid\.{Scache}\cr
169
\\is\\mid\.{random}\mid\.{serial}
170
          \mid\.{pseudolru}\mid\.{lru}\cr}}$$
171
The possibilities for \ are as follows:
172
 
173
\bull associativity (default 1), number of cache blocks per cache set;
174
must be a power of~2. (A cache with associativity~1 is said to be
175
``direct-mapped.'')
176
 
177
\bull blocksize (default 8), number of bytes per cache block; must be a power
178
of~2, at least equal to the granularity, and at most equal to~8192.
179
The blocksize of \.{ITcache} and \.{DTcache} must be~8.
180
 
181
\bull setsize (default 1), number of sets of cache blocks; must be a power
182
of~2. (A cache with set size~1 is said to be ``fully associative.'')
183
 
184
\bull granularity (default 8), number of bytes per ``dirty bit,'' used to
185
remember which items of data have changed since they were read from memory;
186
must be a power of~2 and at least~8. The granularity must be~8 if
187
\.{writeallocate} is~0.
188
 
189
\bull victimsize (default 0), number of cache blocks in the victim buffer,
190
which holds blocks removed from the main cache sets; must be zero or a power
191
of~2.
192
 
193
\bull writeback (default 0), is 1 in a ``write-back'' cache, which holds dirty
194
data as long as possible; is 0 in a ``write-through'' cache, which cleans
195
all data as soon as possible.
196
 
197
\bull writeallocate (default 0), is 1 in a ``write-allocate'' cache,
198
which remembers all recently written data;
199
is 0 in a ``write-around'' cache, which doesn't make space for newly written
200
data that fails to hit an existing cache block.
201
 
202
\bull accesstime (default 1), number of cycles to query the cache;
203
must be $\ge1$. (Hits in the S-cache actually require {\it twice}
204
the accesstime, once to query the tag and once to transmit the data.)
205
 
206
\bull copyintime (default 1), number of cycles to move a cache block from
207
its input buffer into the cache proper; must be $\ge1$.
208
 
209
\bull copyouttime (default 1), number of cycles to move a cache block
210
from the cache proper to its output buffer; must be $\ge1$.
211
 
212
\bull ports (default 1), number of processes that can simultaneous
213
query the cache; must be $\ge1$.
214
 
215
\smallskip
216
The \ parameter should be nonempty only on cache specifications
217
for parameters
218
\.{associativity} and \.{victimsize}. If no replacement policy is specified,
219
\.{random} is the default. All four policies are equivalent when the
220
\.{associativity} or \.{victimsize} is~1; \.{pseudolru} is equivalent
221
to \.{lru} when the \.{associativity} or \.{victimsize} is~2.
222
 
223
The \.{granularity}, \.{writeback}, \.{writeallocate}, and \.{copyouttime}
224
parameters affect the performance only of the D-cache and S-cache; the other
225
three caches are read-only, so they never need to write their data.
226
 
227
The \.{ports} parameter affects the performance of the D-cache and
228
DT-cache, and (if the \.{PREGO} command is used) the performance of the
229
I-cache and IT-cache. The S-cache accommodates only one process at a time,
230
regardless of the number of specified ports.
231
 
232
Only the translation caches (the IT-cache and DT-cache) are present by
233
default. But if any specifications are given for, say, an I-cache,
234
all of the unspecified I-cache parameters take their default values.
235
 
236
The existence of an S-cache (secondary cache) implies the existence of both
237
I-cache and D-cache (primary caches for instructions and data).
238
The block size of the secondary cache must not be less than the block
239
size of the primary caches. The secondary cache must have the
240
same granularity as the D-cache.
241
 
242
@ A \ governs the execution time of potentially slow operations.
243
$$\vbox{\halign{$#$\hfil\cr
244
\\is\\\cr
245
\\is\\mid\\\cr}}$$
246
Here the \ is one of the following:
247
 
248
\bull mul0 through \.{mul8} (default 10); the values for \.{mul}$j$ refer
249
to products in which the second operand is less than $2^{8j}$, where $j$
250
is as small as possible. Thus, for example, \.{mul1} applies to
251
nonzero one-byte multipliers.
252
 
253
\bull div (default 60); this applies to integer division, signed and unsigned.
254
 
255
\bull sh (default 1); this applies to left and right shifts, signed and
256
unsigned.
257
 
258
\bull mux (default 1); the multiplex operator.
259
 
260
\bull sadd (default 1); the sideways addition operator.
261
 
262
\bull mor (default 1); the boolean matrix multiplication operators \.{MOR} and
263
\.{MXOR}.
264
 
265
\bull fadd (default 4); floating point addition and subtraction.
266
 
267
\bull fmul (default 4); floating point multiplication.
268
 
269
\bull fdiv (default 40); floating point division.
270
 
271
\bull fsqrt (default 40); floating point square root.
272
 
273
\bull fint (default 4); floating point integerization.
274
 
275
\bull fix (default 2); conversion from floating to fixed, signed and unsigned.
276
 
277
\bull flot (default 2); conversion from fixed to floating, signed and unsigned.
278
 
279
\bull feps (default 4); floating comparison with respect to epsilon.
280
 
281
\smallskip\noindent
282
In each case one can specify a sequence of pipeline stages, with a positive
283
number of cycles to be spent in each stage. For example, a specification like
284
`\.{fmul}~\.{3}~\.{1}' would say that a functional unit that supports
285
\.{FMUL} takes a total of four cycles to compute the floating point product
286
in two stages; it can start working on a second product after three cycles
287
have gone by.
288
 
289
If a floating point operation has a subnormal input, \.{denin} is added to
290
the time for the first stage. If a floating point operation has a subnormal
291
result, \.{denout} is added to the time for the last stage.
292
 
293
@ The fourth and final kind of specification defines a functional unit:
294
$$\\is\.{unit}\ \\<64 hexadecimal digits>$$
295
The symbolic name should be at most fifteen characters long.
296
The 64 hexadecimal digits contain 256 bits, with `1' for each supported
297
opcode; the most significant (leftmost) bit is for opcode 0 (\.{TRAP}),
298
and the least significant bit is for opcode 255 (\.{TRIP}).
299
 
300
For example, we can define a load/store unit (which handles register/memory
301
operations), a multiplication unit (which handles fixed and floating point
302
multiplication), a boolean unit (which handles only bitwise operations),
303
and a more general arithmetic-logical unit, as follows:
304
$$\vbox{\halign{\tt#\hfil\cr
305
unit LSU 00000000000000000000000000000000fffffffcfffffffc0000000000000000\cr
306
unit MUL 000080f000000000000000000000000000000000000000000000000000000000\cr
307
unit BIT 000000000000000000000000000000000000000000000000ffff00ff00ff0000\cr
308
unit ALU f0000000ffffffffffffffffffffffff0000000300000003ffffffffffffffff\cr
309
}}$$
310
 
311
The order in which units are specified is important, because \MMIX's dispatcher
312
will try to match each instruction with the first functional unit that
313
supports its opcode. Therefore it is best to list more specialized
314
units (like the \.{BIT} unit in this example) before more general ones;
315
this lets the specialized units have first chance at the instructions
316
they can handle.
317
 
318
There can be any number of functional units, having possibly identical
319
specifications. One should, however, give each unit a unique name
320
(e.g., \.{ALU1} and \.{ALU2} if there are two arithmetic-logical units),
321
since these names are used in diagnostic messages.
322
 
323
Opcodes that aren't supported by any specified unit will cause an
324
emulation trap.
325
@^emulation@>
326
 
327
@ Full details about the significance of all these parameters can be found
328
in the \.{mmix-pipe} module, which defines and discusses the data structures
329
that need to be configured and initialized.
330
 
331
Of course the specifications in a configuration file needn't make any sense,
332
nor need they be practically achievable. We could, for example, specify
333
a unit that handles only the two opcodes \.{NXOR} and \.{DIVUI};
334
we could specify 1-cycle division but pipelined 100-cycle shifts, or
335
1-cycle memory access but 100-cycle cache access. We could create
336
a thousand rename registers and issue a hundred instructions per cycle,
337
etc. Some combinations of parameters are clearly ridiculous.
338
 
339
But there remain a huge number of possibilities of interest, especially
340
as technology continues to evolve. By experimenting with configurations that
341
are extreme by present-day standards, we can see how much might be gained
342
if the corresponding hardware could be built economically.
343
 
344
@* Basic input/output. Let's get ready to program the |MMIX_config| subroutine
345
by building some simple infrastructure. First we need some macros to
346
print error messages.
347
 
348
@d errprint0(f) fprintf(stderr,f)
349
@d errprint1(f,a) fprintf(stderr,f,a)
350
@d errprint2(f,a,b) fprintf(stderr,f,a,b)
351
@d errprint3(f,a,b,c) fprintf(stderr,f,a,b,c)
352
@d panic(x)@+ {@+x;@+errprint0("!\n");@+exit(-1);@+}
353
 
354
@ And we need a place to look at the input.
355
 
356
@d BUF_SIZE 100 /* we don't need long lines */
357
 
358
@=
359
FILE *config_file; /* input comes from here */
360
char buffer[BUF_SIZE]; /* input lines go here */
361
char token[BUF_SIZE]; /* and tokens are copied to here */
362
char *buf_pointer=buffer; /* this is our current position */
363
bool token_prescanned; /* does |token| contain the next token already? */
364
 
365
@ The |get_token| routine copies the next token of input into the |token|
366
buffer. After the input has ended, a final `\.{end}' is appended.
367
 
368
@=
369
static void get_token @,@,@[ARGS((void))@];@+@t}\6{@>
370
static void get_token() /* set |token| to the next token of the configuration file */
371
{
372
  register char *p,*q;
373
  if (token_prescanned) {
374
    token_prescanned=false;@+ return;
375
  }
376
  while(1) { /* scan past white space */
377
    if (*buf_pointer=='\0' || *buf_pointer=='\n' || *buf_pointer=='%') {
378
      if (!fgets(buffer,BUF_SIZE,config_file)) {
379
        strcpy(token,"end");@+return;
380
      }
381
      if (strlen(buffer)==BUF_SIZE-1 && buffer[BUF_SIZE-2]!='\n')
382
        panic(errprint1("config file line too long: `%s...'",buffer));
383
@.config file line...@>
384
      buf_pointer=buffer;
385
    }@+else if (!isspace(*buf_pointer)) break;
386
    else buf_pointer++;
387
  }
388
  for (p=buf_pointer,q=token;!isspace(*p) && *p!='%';p++,q++) *q=*p;
389
  buf_pointer=p;@+ *q='\0';
390
  return;
391
}
392
 
393
@ The |get_int| routine is called when we wish to input a decimal value.
394
It returns $-1$ if the next token isn't a string of decimal digits.
395
 
396
@=
397
static int get_int @,@,@[ARGS((void))@];@+@t}\6{@>
398
static int get_int()
399
{@+ int v;
400
  char *p;
401
  get_token();
402
  for (p=token,v=0; *p>='0' && *p<='9'; p++) v=10*v+*p-'0';
403
  if (*p) return -1;
404
  return v;
405
}
406
 
407
@ A simple data structure makes it fairly easy to deal with
408
parameter/value specifications.
409
 
410
@=
411
typedef struct {
412
  char name[20]; /* symbolic name */
413
  int *v; /* internal name */
414
  int defval; /* default value */
415
  int minval, maxval; /* minimum and maximum legal values */
416
  bool power_of_two; /* must it be a power of two? */
417
} pv_spec;
418
 
419
@ Cache parameters are a bit more difficult, but still not bad.
420
 
421
@=
422
typedef enum {@!assoc,@!blksz,@!setsz,@!gran,@!vctsz,
423
  @!wrb,@!wra,@!acctm,@!citm,@!cotm,@!prts} c_param;
424
@#
425
typedef struct {
426
  char name[20]; /* symbolic name */
427
  c_param v; /* internal code */
428
  int defval; /* default value */
429
  int minval, maxval; /* minimum and maximum legal values */
430
  bool power_of_two; /* must it be a power of two? */
431
} cpv_spec;
432
 
433
@ Operation codes are the easiest of all.
434
 
435
@=
436
typedef struct {
437
  char name[8]; /* symbolic name */
438
  internal_opcode v; /* internal code */
439
  int defval; /* default value */
440
} op_spec;
441
 
442
@ Most of the parameters are external variables declared in the header
443
file \.{mmix-pipe.h}; but some are private to this module. Here we
444
define the main tables used below.
445
 
446
@=
447
int fetch_buf_size,write_buf_size,reorder_buf_size,mem_bus_bytes,hardware_PT;
448
int max_cycs=60;
449
pv_spec PV[]={@/
450
{"fetchbuffer", &fetch_buf_size, 4, 1, INT_MAX, false},@/
451
{"writebuffer", &write_buf_size, 2, 1, INT_MAX, false},@/
452
{"reorderbuffer", &reorder_buf_size, 5, 1, INT_MAX, false},@/
453
{"renameregs", &max_rename_regs, 5, 1, INT_MAX, false},@/
454
{"memslots", &max_mem_slots, 2, 1, INT_MAX, false},@/
455
{"localregs", &lring_size, 256, 256, 1024, true},@/
456
{"fetchmax", &fetch_max, 2, 1, INT_MAX, false},@/
457
{"dispatchmax", &dispatch_max, 1, 1, INT_MAX, false},@/
458
{"peekahead", &peekahead, 1, 0, INT_MAX, false},@/
459
{"commitmax", &commit_max, 1, 1, INT_MAX, false},@/
460
{"fremmax", &frem_max, 1, 1, INT_MAX, false},@/
461
{"denin",&denin_penalty, 1, 0, INT_MAX, false},@/
462
{"denout",&denout_penalty, 1, 0, INT_MAX, false},@/
463
{"writeholdingtime", &holding_time, 0, 0, INT_MAX, false},@/
464
{"memaddresstime", &mem_addr_time, 20, 1, INT_MAX, false},@/
465
{"memreadtime", &mem_read_time, 20, 1, INT_MAX, false},@/
466
{"memwritetime", &mem_write_time, 20, 1, INT_MAX, false},@/
467
{"membusbytes", &mem_bus_bytes, 8, 8, INT_MAX, true},@/
468
{"branchpredictbits", &bp_n, 0, 0, 8, false},@/
469
{"branchaddressbits", &bp_a, 0, 0, 32, false},@/
470
{"branchhistorybits", &bp_b, 0, 0, 32, false},@/
471
{"branchdualbits", &bp_c, 0, 0, 32, false},@/
472
{"hardwarepagetable", &hardware_PT, 1, 0, 1, false},@/
473
{"disablesecurity", (int*)&security_disabled, 0, 0, 1, false},@/
474
{"memchunksmax", &mem_chunks_max, 1000, 1, INT_MAX, false},@/
475
{"hashprime", &hash_prime, 2003, 2, INT_MAX, false}};
476
@#
477
cpv_spec CPV[]={
478
{"associativity", assoc, 1, 1, INT_MAX, true},@/
479
{"blocksize", blksz, 8, 8, 8192, true},@/
480
{"setsize", setsz, 1, 1, INT_MAX, true},@/
481
{"granularity", gran, 8, 8, 8192, true},@/
482
{"victimsize", vctsz, 0, 0, INT_MAX, true},@/
483
{"writeback", wrb, 0, 0, 1,false},@/
484
{"writeallocate", wra, 0, 0, 1,false},@/
485
{"accesstime", acctm, 1, 1, INT_MAX, false},@/
486
{"copyintime", citm, 1, 1, INT_MAX, false},@/
487
{"copyouttime", cotm, 1, 1, INT_MAX, false},@/
488
{"ports", prts, 1, 1, INT_MAX,false}};
489
@#
490
op_spec OP[]={
491
{"mul0", mul0, 10},
492
{"mul1", mul1, 10},
493
{"mul2", mul2, 10},
494
{"mul3", mul3, 10},
495
{"mul4", mul4, 10},
496
{"mul5", mul5, 10},
497
{"mul6", mul6, 10},
498
{"mul7", mul7, 10},
499
{"mul8", mul8, 10},@|
500
{"div", div, 60},
501
{"sh", sh, 1},
502
{"mux", mux, 1},
503
{"sadd", sadd, 1},
504
{"mor", mor, 1},@|
505
{"fadd", fadd, 4},
506
{"fmul", fmul, 4},
507
{"fdiv", fdiv, 40},
508
{"fsqrt", fsqrt, 40},
509
{"fint", fint, 4},@|
510
{"fix", fix, 2},
511
{"flot", flot, 2},
512
{"feps", feps, 4}};
513
int PV_size,CPV_size,OP_size; /* the number of entries in |PV|, |CPV|, |OP| */
514
 
515
@ The |new_cache| routine creates a \&{cache} structure with default values.
516
(These default values are ``hard-wired'' into the program, not actually
517
read from the |CPV| table.)
518
 
519
@=
520
static cache* new_cache @,@,@[ARGS((char*))@];@+@t}\6{@>
521
static cache* new_cache(name)
522
  char *name;
523
{@+register cache *c=(cache*)calloc(1,sizeof(cache));
524
  if (!c) panic(errprint1("Can't allocate %s",name));
525
@.Can't allocate...@>
526
  c->aa=1; /* default associativity, should equal |CPV[0].defval| */
527
  c->bb=8; /* default blocksize */
528
  c->cc=1; /* default setsize */
529
  c->gg=8; /* default granularity */
530
  c->vv=0; /* default victimsize */
531
  c->repl=random; /* default replacement policy */
532
  c->vrepl=random; /* default victim replacement policy */
533
  c->mode=0; /* default mode is write-through and write-around */
534
  c->access_time=c->copy_in_time=c->copy_out_time=1;
535
  c->filler.ctl=&(c->filler_ctl);
536
  c->filler_ctl.ptr_a=(void*)c;
537
  c->filler_ctl.go.o.l=4;
538
  c->flusher.ctl=&(c->flusher_ctl);
539
  c->flusher_ctl.ptr_a=(void*)c;
540
  c->flusher_ctl.go.o.l=4;
541
  c->ports=1;
542
  c->name=name;
543
  return c;
544
}
545
 
546
@ @=
547
PV_size=(sizeof PV)/sizeof(pv_spec);
548
CPV_size=(sizeof CPV)/sizeof(cpv_spec);
549
OP_size=(sizeof OP)/sizeof(op_spec);
550
ITcache=new_cache("ITcache");
551
DTcache=new_cache("DTcache");
552
Icache=Dcache=Scache=NULL;
553
for (j=0;j
554
for (j=0;j
555
  pipe_seq[OP[j].v][0]=OP[j].defval;
556
  pipe_seq[OP[j].v][1]=0; /* one stage */
557
}
558
 
559
@* Reading the specs. Before we're ready to process the configuration file,
560
we need to count the number of functional units, so that we know
561
how much space to allocate for them.
562
 
563
A special background unit is always provided, just to make sure that
564
\.{TRAP} and \.{TRIP} instructions are handled by somebody.
565
 
566
@=
567
funit_count=0;
568
while (strcmp(token,"end")!=0) {
569
  get_token();
570
  if (strcmp(token,"unit")==0) {
571
    funit_count++;
572
    get_token();@+get_token(); /* a unit might be named \.{unit} or \.{end} */
573
  }
574
}
575
funit=(func*)calloc(funit_count+1,sizeof(func));
576
if (!funit) panic(errprint0("Can't allocate the functional units"));
577
@.Can't allocate...@>
578
strcpy(funit[funit_count].name,"%%");
579
@.\%\%@>
580
funit[funit_count].ops[0]=0x80000000; /* \.{TRAP} */
581
funit[funit_count].ops[7]=0x1; /* \.{TRIP} */
582
 
583
@ Now we can read the specifications and obey them. This program doesn't
584
bother to be very tolerant of errors, nor does it try to be very efficient.
585
 
586
Incidentally, the specifications don't have to be broken into individual lines
587
in any meaningful way. We simply read them token by token.
588
 
589
@=
590
rewind(config_file);
591
funit_count=0;
592
token[0]='\0';
593
while (strcmp(token,"end")!=0) {
594
  get_token();
595
  if (strcmp(token,"end")==0) break;
596
  @;
597
  @;
598
  @;
599
  if (strcmp(token,"unit")==0) @;
600
  panic(errprint1(
601
   "Configuration syntax error: Specification can't start with `%s'",token));
602
@.Configuration syntax error...@>
603
}
604
 
605
@ @=
606
for (j=0;j
607
  n=get_int();
608
  if (n
609
@.Configuration error...@>
610
     "Configuration error: %s must be >= %d",PV[j].name,PV[j].minval));
611
  if (n>PV[j].maxval) panic(errprint2(
612
     "Configuration error: %s must be <= %d",PV[j].name,PV[j].maxval));
613
  if (PV[j].power_of_two && (n&(n-1))) panic(errprint1(
614
     "Configuration error: %s must be a power of 2",PV[j].name));
615
  *(PV[j].v)=n;
616
  break;
617
}
618
if (j
619
 
620
@ @=
621
if (strcmp(token,"ITcache")==0) {
622
  pcs(ITcache);@+continue;
623
}@+else if (strcmp(token,"DTcache")==0) {
624
  pcs(DTcache);@+continue;
625
}@+else if (strcmp(token,"Icache")==0) {
626
  if (!Icache) Icache=new_cache("Icache");
627
  pcs(Icache);@+continue;
628
}@+else if (strcmp(token,"Dcache")==0) {
629
  if (!Dcache) Dcache=new_cache("Dcache");
630
  pcs(Dcache);@+continue;
631
}@+else if (strcmp(token,"Scache")==0) {
632
  if (!Icache) Icache=new_cache("Icache");
633
  if (!Dcache) Dcache=new_cache("Dcache");
634
  if (!Scache) Scache=new_cache("Scache");
635
  pcs(Scache);@+continue;
636
}
637
 
638
@ @=
639
static void ppol @,@,@[ARGS((replace_policy*))@];@+@t}\6{@>
640
static void ppol(rr) /* subroutine to scan for a replacement policy */
641
  replace_policy *rr;
642
{
643
  get_token();
644
  if (strcmp(token,"random")==0) *rr=random;
645
  else if (strcmp(token,"serial")==0) *rr=serial;
646
  else if (strcmp(token,"pseudolru")==0) *rr=pseudo_lru;
647
  else if (strcmp(token,"lru")==0) *rr=lru;
648
  else token_prescanned=true; /* oops, we should rescan that token */
649
}
650
 
651
@ @=
652
static void pcs @,@,@[ARGS((cache*))@];@+@t}\6{@>
653
static void pcs(c) /* subroutine to process a cache spec */
654
  cache *c;
655
{
656
  register int j,n;
657
  get_token();
658
  for (j=0;j
659
  if (j==CPV_size) panic(errprint1(
660
     "Configuration syntax error: `%s' isn't a cache parameter name",token));
661
@.Configuration syntax error...@>
662
  n=get_int();
663
  if (n
664
     "Configuration error: %s must be >= %d",CPV[j].name,CPV[j].minval));
665
@.Configuration error...@>
666
  if (n>CPV[j].maxval) panic(errprint2(
667
     "Configuration error: %s must be <= %d",CPV[j].name,CPV[j].maxval));
668
  if (CPV[j].power_of_two && (n&(n-1))) panic(errprint1(
669
     "Configuration error: %s must be power of 2",CPV[j].name));
670
  switch (CPV[j].v) {
671
 case assoc: c->aa=n;@+ppol(&(c->repl));@+break;
672
 case blksz: c->bb=n;@+break;
673
 case setsz: c->cc=n;@+break;
674
 case gran: c->gg=n;@+break;
675
 case vctsz: c->vv=n;@+ppol(&(c->vrepl));@+break;
676
 case wrb: c->mode=(c->mode&~WRITE_BACK)+n*WRITE_BACK;@+break;
677
 case wra: c->mode=(c->mode&~WRITE_ALLOC)+n*WRITE_ALLOC;@+break;
678
 case acctm:@+ if (n>max_cycs) max_cycs=n;
679
   c->access_time=n;@+break;
680
 case citm:@+ if (n>max_cycs) max_cycs=n;
681
   c->copy_in_time=n;@+break;
682
 case cotm:@+ if (n>max_cycs) max_cycs=n;
683
   c->copy_out_time=n;@+break;
684
 case prts: c->ports=n;@+break;
685
  }
686
}
687
 
688
@ @=
689
for (j=0;j
690
  for (i=0;;i++) {
691
    n=get_int();
692
    if (n<0) break;
693
    if (n==0) panic(errprint0(
694
      "Configuration error: Pipeline cycles must be positive"));
695
@.Configuration error...@>
696
    if (n>255) panic(errprint0(
697
      "Configuration error: Pipeline cycles must be <= 255"));
698
    if (n>max_cycs) max_cycs=n;
699
    if (i>=pipe_limit) panic(errprint1(
700
      "Configuration error: More than %d pipeline stages",pipe_limit));
701
    pipe_seq[OP[j].v][i]=n;
702
  }
703
  token_prescanned=true;
704
  break;
705
}
706
if (j
707
 
708
@ @=
709
{
710
  get_token();
711
  if (strlen(token)>15) panic(errprint1(
712
       "Configuration error: `%s' is more than 15 characters long",token));
713
@.Configuration error...@>
714
  strcpy(funit[funit_count].name,token);
715
  get_token();
716
  if (strlen(token)!=64) panic(errprint1(
717
       "Configuration error: unit %s doesn't have 64 hex digit specs",
718
                   funit[funit_count].name));
719
  for (i=j=n=0;j<64;j++) {
720
    if (token[j]>='0' && token[j]<='9') n=(n<<4)+(token[j]-'0');
721
    else if (token[j]>='a' && token[j]<='f') n=(n<<4)+(token[j]-'a'+10);
722
    else if (token[j]>='A' && token[j]<='F') n=(n<<4)+(token[j]-'A'+10);
723
    else panic(errprint1(
724
        "Configuration error: `%c' is not a hex digit",token[j]));
725
    if ((j&0x7)==0x7) funit[funit_count].ops[i++]=n, n=0;
726
  }
727
  funit_count++;
728
  continue;
729
}
730
 
731
@* Checking and allocating. The battle is only half over when we've
732
absorbed all the data of the configuration file. We still must check for
733
interactions between different quantities, and we must allocate
734
space for cache blocks, coroutines, etc.
735
 
736
One of the most difficult tasks facing us to determine the maximum number
737
of pipeline stages needed by each functional unit. Let's tackle that first.
738
 
739
@=
740
@;
741
for (j=0;j<=funit_count;j++) {
742
  @;
743
  funit[j].k=n;
744
  funit[j].co=(coroutine*)calloc(n,sizeof(coroutine));
745
  for (i=0;i
746
    funit[j].co[i].name=funit[j].name;
747
    funit[j].co[i].stage=i+1;
748
  }
749
}
750
 
751
@ @=
752
for (j=div;j<=max_pipe_op;j++) int_stages[j]=strlen(pipe_seq[j]);
753
for (;j<=max_real_command;j++) int_stages[j]=1;
754
for (j=mul0,n=0;j<=mul8;j++)
755
  if (strlen(pipe_seq[j])>n) n=strlen(pipe_seq[j]);
756
int_stages[mul]=n;
757
int_stages[ld]=int_stages[st]=int_stages[frem]=2;
758
for (j=0;j<256;j++) stages[j]=int_stages[int_op[j]];
759
 
760
@ The |int_op| conversion table is similar to the |internal_op| array of
761
the \\{MMIX\_pipe} routine, but it replaces |divu| by |div|,
762
|fsub| by |fadd|, etc.
763
 
764
@=
765
internal_opcode int_op[256]={@/
766
  trap,fcmp,funeq,funeq,fadd,fix,fadd,fix,@/
767
  flot,flot,flot,flot,flot,flot,flot,flot,@/
768
  fmul,feps,feps,feps,fdiv,fsqrt,frem,fint,@/
769
  mul,mul,mul,mul,div,div,div,div,@/
770
  add,add,addu,addu,sub,sub,subu,subu,@/
771
  addu,addu,addu,addu,addu,addu,addu,addu,@/
772
  cmp,cmp,cmpu,cmpu,sub,sub,subu,subu,@/
773
  sh,sh,sh,sh,sh,sh,sh,sh,@/
774
  br,br,br,br,br,br,br,br,@/
775
  br,br,br,br,br,br,br,br,@/
776
  pbr,pbr,pbr,pbr,pbr,pbr,pbr,pbr,@/
777
  pbr,pbr,pbr,pbr,pbr,pbr,pbr,pbr,@/
778
  cset,cset,cset,cset,cset,cset,cset,cset,@/
779
  cset,cset,cset,cset,cset,cset,cset,cset,@/
780
  zset,zset,zset,zset,zset,zset,zset,zset,@/
781
  zset,zset,zset,zset,zset,zset,zset,zset,@/
782
  ld,ld,ld,ld,ld,ld,ld,ld,@/
783
  ld,ld,ld,ld,ld,ld,ld,ld,@/
784
  ld,ld,ld,ld,ld,ld,ld,ld,@/
785
  ld,ld,ld,ld,prego,prego,go,go,@/
786
  st,st,st,st,st,st,st,st,@/
787
  st,st,st,st,st,st,st,st,@/
788
  st,st,st,st,st,st,st,st,@/
789
  st,st,st,st,st,st,pushgo,pushgo,@/
790
  or,or,orn,orn,nor,nor,xor,xor,@/
791
  and,and,andn,andn,nand,nand,nxor,nxor,@/
792
  bdif,bdif,wdif,wdif,tdif,tdif,odif,odif,@/
793
  mux,mux,sadd,sadd,mor,mor,mor,mor,@/
794
  set,set,set,set,addu,addu,addu,addu,@/
795
  or,or,or,or,andn,andn,andn,andn,@/
796
  noop,noop,pushj,pushj,set,set,put,put,@/
797
  pop,resume,save,unsave,sync,noop,get,trip};
798
int int_stages[max_real_command+1];
799
       /* stages as function of |internal_opcode| */
800
int stages[256]; /* stages as function of |mmix_opcode| */
801
 
802
@ @=
803
for (i=n=0;i<256;i++)
804
  if (((funit[j].ops[i>>5]<<(i&0x1f))&0x80000000) && stages[i]>n)
805
    n=stages[i];
806
if (n==0) panic(errprint1(
807
       "Configuration error: unit %s doesn't do anything",funit[j].name));
808
@.Configuration error...@>
809
 
810
@ The next hardest thing on our agenda is to set up the cache structure
811
fields that depend on the parameters. For example, although we have defined
812
the parameter in the |bb| field (the block size), we also need to compute the
813
|b|~field (log of the block size), and we must create the cache blocks
814
themselves.
815
 
816
@=
817
static int lg @,@,@[ARGS((int))@];@+@t}\6{@>
818
static int lg(n) /* compute binary logarithm */
819
  int n;
820
{@+register int j,l;
821
  for (j=n,l=0;j;j>>=1) l++;
822
  return l-1;
823
}
824
 
825
@ @=
826
static void alloc_cache @,@,@[ARGS((cache*,char*))@];@+@t}\6{@>
827
static void alloc_cache(c,name)
828
  cache *c;
829
  char *name;
830
{@+register int j,k;
831
  if (c->bbgg) panic(errprint1(
832
      "Configuration error: blocksize of %s is less than granularity",name));
833
@.Configuration error...@>
834
  if (name[1]=='T' && c->bb!=8) panic(errprint1(
835
      "Configuration error: blocksize of %s must be 8",name));
836
  c->a=lg(c->aa);
837
  c->b=lg(c->bb);
838
  c->c=lg(c->cc);
839
  c->g=lg(c->gg);
840
  c->v=lg(c->vv);
841
  c->tagmask=-(1<<(c->b+c->c));
842
  if (c->a+c->b+c->c>=32) panic(errprint1(
843
     "Configuration error: %s has >= 4 gigabytes of data",name));
844
  if (c->gg!=8 && !(c->mode&WRITE_ALLOC)) panic(errprint2(
845
     "Configuration error: %s does write-around with granularity %d",
846
        name,c->gg));
847
  @;
848
  if (c->vv) @;
849
  c->inbuf.dirty=(char*)calloc(c->bb>>c->g,sizeof(char));
850
  if (!c->inbuf.dirty) panic(errprint1(
851
     "Can't allocate dirty bits for inbuffer of %s",name));
852
@.Can't allocate...@>
853
  c->inbuf.data=(octa *)calloc(c->bb>>3,sizeof(octa));
854
    if (!c->inbuf.data) panic(errprint1(
855
     "Can't allocate data for inbuffer of %s",name));
856
  c->outbuf.dirty=(char*)calloc(c->bb>>c->g,sizeof(char));
857
  if (!c->outbuf.dirty) panic(errprint1(
858
     "Can't allocate dirty bits for outbuffer of %s",name));
859
  c->outbuf.data=(octa *)calloc(c->bb>>3,sizeof(octa));
860
    if (!c->outbuf.data) panic(errprint1(
861
     "Can't allocate data for outbuffer of %s",name));
862
  if (name[0]!='S') @;
863
}
864
 
865
@ @d sign_bit 0x80000000
866
 
867
@=
868
c->set=(cacheset *)calloc(c->cc,sizeof(cacheset));
869
if (!c->set) panic(errprint1(
870
     "Can't allocate cache sets for %s",name));
871
@.Can't allocate...@>
872
for (j=0;jcc;j++) {
873
  c->set[j]=(cacheblock *)calloc(c->aa,sizeof(cacheblock));
874
  if (!c->set[j]) panic(errprint2(
875
    "Can't allocate cache blocks for set %d of %s",j,name));
876
  for (k=0;kaa;k++) {
877
    c->set[j][k].tag.h=sign_bit; /* invalid tag */
878
    c->set[j][k].dirty=(char *)calloc(c->bb>>c->g,sizeof(char));
879
    if (!c->set[j][k].dirty) panic(errprint3(
880
      "Can't allocate dirty bits for block %d of set %d of %s",k,j,name));
881
    c->set[j][k].data=(octa *)calloc(c->bb>>3,sizeof(octa));
882
    if (!c->set[j][k].data) panic(errprint3(
883
      "Can't allocate data for block %d of set %d of %s",k,j,name));
884
  }
885
}
886
 
887
@ @=
888
{
889
  c->victim=(cacheblock*)calloc(c->vv,sizeof(cacheblock));
890
  if (!c->victim) panic(errprint1(
891
      "Can't allocate blocks for victim cache of %s",name));
892
  for (k=0;kvv;k++) {
893
    c->victim[k].tag.h=sign_bit; /* invalid tag */
894
    c->victim[k].dirty=(char *)calloc(c->bb>>c->g,sizeof(char));
895
    if (!c->victim[k].dirty) panic(errprint2(
896
      "Can't allocate dirty bits for block %d of victim cache of %s",
897
                       k,name));
898
@.Can't allocate...@>
899
    c->victim[k].data=(octa *)calloc(c->bb>>3,sizeof(octa));
900
    if (!c->victim[k].data) panic(errprint2(
901
      "Can't allocate data for block %d of victim cache of %s",k,name));
902
  }
903
}
904
 
905
@ @=
906
{
907
  c->reader=(coroutine*)calloc(c->ports,sizeof(coroutine));
908
  if (!c->reader) panic(errprint1(
909
@.Can't allocate...@>
910
        "Can't allocate readers for %s",name));
911
  for (j=0;jports;j++) {
912
    c->reader[j].stage=vanish;
913
    c->reader[j].name=(name[0]=='D'? (name[1]=='T'? "DTreader": "Dreader"):
914
                                     (name[1]=='T'? "ITreader": "Ireader"));
915
  }
916
}
917
 
918
@ @=
919
alloc_cache(ITcache,"ITcache");
920
ITcache->filler.name="ITfiller";@+ ITcache->filler.stage=fill_from_virt;
921
alloc_cache(DTcache,"DTcache");
922
DTcache->filler.name="DTfiller";@+ DTcache->filler.stage=fill_from_virt;
923
if (Icache) {
924
  alloc_cache(Icache,"Icache");
925
  Icache->filler.name="Ifiller";@+ Icache->filler.stage=fill_from_mem;
926
}
927
if (Dcache) {
928
  alloc_cache(Dcache,"Dcache");
929
  Dcache->filler.name="Dfiller";@+ Dcache->filler.stage=fill_from_mem;
930
  Dcache->flusher.name="Dflusher";@+ Dcache->flusher.stage=flush_to_mem;
931
}
932
if (Scache) {
933
  alloc_cache(Scache,"Scache");
934
  if (Scache->bbbb) panic(errprint0(
935
     "Configuration error: Scache blocks smaller than Icache blocks"));
936
@.Configuration error...@>
937
  if (Scache->bbbb) panic(errprint0(
938
     "Configuration error: Scache blocks smaller than Dcache blocks"));
939
  if (Scache->gg!=Dcache->gg) panic(errprint0(
940
     "Configuration error: Scache granularity differs from the Dcache"));
941
  Icache->filler.stage=fill_from_S;
942
  Dcache->filler.stage=fill_from_S;@+ Dcache->flusher.stage=flush_to_S;
943
  Scache->filler.name="Sfiller";@+ Scache->filler.stage=fill_from_mem;
944
  Scache->flusher.name="Sflusher";@+ Scache->flusher.stage=flush_to_mem;
945
}
946
 
947
@ Now we are nearly done. The only nontrivial task remaining is
948
to allocate the ring of queues for coroutine scheduling; for this we
949
need to determine the maximum waiting time that will occur between
950
scheduler and schedulee.
951
 
952
@=
953
bus_words=mem_bus_bytes>>3;
954
j=(mem_read_time
955
n=1;
956
if (Scache && Scache->bb>n) n=Scache->bb;
957
if (Icache && Icache->bb>n) n=Icache->bb;
958
if (Dcache && Dcache->bb>n) n=Dcache->bb;
959
n=mem_addr_time+((int)(n+bus_words-1)/bus_words)*j;
960
if (n>max_cycs) max_cycs=n; /* now |max_cycs| bounds the waiting time */
961
ring_size=max_cycs+1;
962
ring=(coroutine *)calloc(ring_size,sizeof(coroutine));
963
if (!ring) panic(errprint0("Can't allocate the scheduling ring"));
964
@.Can't allocate...@>
965
{@+register coroutine *p;
966
  for (p=ring;p
967
    p->name=""; /* header nodes are nameless */
968
    p->stage=max_stage;
969
  }
970
}
971
 
972
@ @s chunknode int
973
 
974
@=
975
if (hash_prime<=mem_chunks_max) panic(errprint0(
976
  "Configuration error: hashprime must exceed memchunksmax"));
977
@.Configuration error...@>
978
mem_hash=(chunknode *)calloc(hash_prime+1,sizeof(chunknode));
979
if (!mem_hash) panic(errprint0("Can't allocate the hash table"));
980
@.Can't allocate...@>
981
mem_hash[0].chunk=(octa*)calloc(1<<13,sizeof(octa));
982
if (!mem_hash[0].chunk) panic(errprint0("Can't allocate chunk 0"));
983
mem_hash[hash_prime].chunk=(octa*)calloc(1<<13,sizeof(octa));
984
if (!mem_hash[hash_prime].chunk) panic(errprint0("Can't allocate 0 chunk"));
985
mem_chunks=1;
986
fetch_bot=(fetch*)calloc(fetch_buf_size+1,sizeof(fetch));
987
if (!fetch_bot) panic(errprint0("Can't allocate the fetch buffer"));
988
fetch_top=fetch_bot+fetch_buf_size;
989
reorder_bot=(control*)calloc(reorder_buf_size+1,sizeof(control));
990
if (!reorder_bot) panic(errprint0("Can't allocate the reorder buffer"));
991
reorder_top=reorder_bot+reorder_buf_size;
992
wbuf_bot=(write_node*)calloc(write_buf_size+1,sizeof(write_node));
993
if (!wbuf_bot) panic(errprint0("Can't allocate the write buffer"));
994
wbuf_top=wbuf_bot+write_buf_size;
995
if (bp_n==0) bp_table=NULL;
996
else { /* a branch prediction table is desired */
997
  if (bp_a+bp_b+bp_c>=32) panic(errprint0(
998
     "Configuration error: Branch table has >= 4 gigabytes of data"));
999
  bp_table=(char*)calloc(1<<(bp_a+bp_b+bp_c),sizeof(char));
1000
  if (!bp_table) panic(errprint0("Can't allocate the branch table"));
1001
}
1002
l=(specnode*)calloc(lring_size,sizeof(specnode));
1003
if (!l) panic(errprint0("Can't allocate local registers"));
1004
j=bus_words;
1005
if (Icache && Icache->bb>j) j=Icache->bb;
1006
fetched=(octa*)calloc(j,sizeof(octa));
1007
if (!fetched) panic(errprint0("Can't allocate prefetch buffer"));
1008
dispatch_stat=(int*)calloc(dispatch_max+1,sizeof(int));
1009
if (!dispatch_stat) panic(errprint0("Can't allocate dispatch counts"));
1010
no_hardware_PT=1-hardware_PT;
1011
 
1012
@* Putting it all together. Here then is the desired configuration
1013
subroutine.
1014
 
1015
@c
1016
#include  /* |fopen|, |fgets|, |sscanf|, |rewind| */
1017
#include  /* |calloc|, |exit| */
1018
#include  /* |isspace| */
1019
#include  /* |strcpy|, |strlen|, |strcmp| */
1020
#include  /* |INT_MAX| */
1021
#include "mmix-pipe.h"
1022
@@;
1023
@@;
1024
@@;
1025
void MMIX_config(filename)
1026
  char *filename;
1027
{@+register int i,j,n;
1028
  config_file=fopen(filename,"r");
1029
  if (!config_file)
1030
    panic(errprint1("Can't open configuration file %s",filename));
1031
@.Can't open...@>
1032
  @;
1033
  @;
1034
  @;
1035
  @;
1036
  @;
1037
  @;
1038
  @;
1039
}
1040
 
1041
@*Index.

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.