% This file is part of the MMIXware package (c) Donald E Knuth 1999
|
% This file is part of the MMIXware package (c) Donald E Knuth 1999
|
@i boilerplate.w %<< legal stuff: PLEASE READ IT BEFORE MAKING ANY CHANGES!
|
@i boilerplate.w %<< legal stuff: PLEASE READ IT BEFORE MAKING ANY CHANGES!
|
|
|
\def\title{MMIX-CONFIG}
|
\def\title{MMIX-CONFIG}
|
\def\MMIX{\.{MMIX}}
|
\def\MMIX{\.{MMIX}}
|
\def\Hex#1{\hbox{$^{\scriptscriptstyle\#}$\tt#1}} % experimental hex constant
|
\def\Hex#1{\hbox{$^{\scriptscriptstyle\#}$\tt#1}} % experimental hex constant
|
@s bool int
|
@s bool int
|
@s cache int
|
@s cache int
|
@s func int
|
@s func int
|
@s coroutine int
|
@s coroutine int
|
@s octa int
|
@s octa int
|
@s cacheset int
|
@s cacheset int
|
@s cacheblock int
|
@s cacheblock int
|
@s fetch int
|
@s fetch int
|
@s control int
|
@s control int
|
@s write_node int
|
@s write_node int
|
@s internal_opcode int
|
@s internal_opcode int
|
@s replace_policy int
|
@s replace_policy int
|
@s PV TeX
|
@s PV TeX
|
@s mmix_opcode int
|
@s mmix_opcode int
|
@s specnode int
|
@s specnode int
|
\def\PV{\\{PV}} % use italics, not \tt
|
\def\PV{\\{PV}} % use italics, not \tt
|
@s CPV TeX
|
@s CPV TeX
|
\def\CPV{\\{CPV}}
|
\def\CPV{\\{CPV}}
|
@s OP TeX
|
@s OP TeX
|
\def\OP{\\{OP}}
|
\def\OP{\\{OP}}
|
@s and normal @q unreserve a C++ keyword @>
|
@s and normal @q unreserve a C++ keyword @>
|
@s or normal @q unreserve a C++ keyword @>
|
@s or normal @q unreserve a C++ keyword @>
|
@s xor normal @q unreserve a C++ keyword @>
|
@s xor normal @q unreserve a C++ keyword @>
|
|
|
@*Input format. Configuration files allow this simulator to adapt itself to
|
@*Input format. Configuration files allow this simulator to adapt itself to
|
infinitely many possible combinations of hardware features. The purpose of the
|
infinitely many possible combinations of hardware features. The purpose of the
|
present module is to read a configuration file, check it for validity, and
|
present module is to read a configuration file, check it for validity, and
|
set up the relevant data structures.
|
set up the relevant data structures.
|
|
|
All data in a configuration file consists simply of {\it tokens\/} separated
|
All data in a configuration file consists simply of {\it tokens\/} separated
|
by one or more units of white space, where a ``token'' is any sequence of
|
by one or more units of white space, where a ``token'' is any sequence of
|
nonspace characters that doesn't contain a percent sign. Percent signs
|
nonspace characters that doesn't contain a percent sign. Percent signs
|
and anything following them on a line are ignored; this convention allows
|
and anything following them on a line are ignored; this convention allows
|
a user to include comments in the file. Here's a simple (but weird) example:
|
a user to include comments in the file. Here's a simple (but weird) example:
|
$$\vbox{\halign{\tt#\hfil\cr
|
$$\vbox{\halign{\tt#\hfil\cr
|
\% Silly configuration\cr
|
\% Silly configuration\cr
|
writebuffer 200\cr
|
writebuffer 200\cr
|
memaddresstime 100\cr
|
memaddresstime 100\cr
|
Dcache associativity 4 lru\cr
|
Dcache associativity 4 lru\cr
|
Dcache blocksize 1024\cr
|
Dcache blocksize 1024\cr
|
unit ODD 5555555555555555555555555555555555555555555555555555555555555555\cr
|
unit ODD 5555555555555555555555555555555555555555555555555555555555555555\cr
|
unit EVEN aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\cr
|
unit EVEN aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\cr
|
div 40 30 20\ \ \% three-stage divide\cr
|
div 40 30 20\ \ \% three-stage divide\cr
|
}}$$
|
}}$$
|
It means that (1) the write buffer has capacity for 200 octabytes;
|
It means that (1) the write buffer has capacity for 200 octabytes;
|
(2)~the memory bus takes 100 cycles to process an address;
|
(2)~the memory bus takes 100 cycles to process an address;
|
(3)~there's a D-cache, in which each set has 4 blocks and the replacement
|
(3)~there's a D-cache, in which each set has 4 blocks and the replacement
|
policy is least-recently-used;
|
policy is least-recently-used;
|
(4)~each block in the D-cache has 1024 bytes;
|
(4)~each block in the D-cache has 1024 bytes;
|
(5)~there are two functional units, one for all the odd-numbered opcodes
|
(5)~there are two functional units, one for all the odd-numbered opcodes
|
and one for all the rest;
|
and one for all the rest;
|
(6)~the division instructions take three pipeline stages, spending 40 cycles
|
(6)~the division instructions take three pipeline stages, spending 40 cycles
|
in the first stage, 30~in the second, and 20 in the last;
|
in the first stage, 30~in the second, and 20 in the last;
|
(7)~all other parameters have default values.
|
(7)~all other parameters have default values.
|
|
|
@ Four kinds of specifications can appear in a configuration file,
|
@ Four kinds of specifications can appear in a configuration file,
|
according to the following syntax:
|
according to the following syntax:
|
\def\<#1>{\hbox{$\langle\,$#1$\,\rangle$}}\let\is=\longrightarrow
|
\def\<#1>{\hbox{$\langle\,$#1$\,\rangle$}}\let\is=\longrightarrow
|
$$\vbox{\halign{$#$\hfil\cr
|
$$\vbox{\halign{$#$\hfil\cr
|
\\is\\mid\\mid\\mid
|
\\is\\mid\\mid\\mid
|
\\cr
|
\\cr
|
\\is\\\cr
|
\\is\\\cr
|
\\is\\\\\cr
|
\\is\\\\\cr
|
\\is\\\cr
|
\\is\\\cr
|
\\is\.{unit}\ \\<64 hexadecimal digits>\cr}}$$
|
\\is\.{unit}\ \\<64 hexadecimal digits>\cr}}$$
|
|
|
@ A \ simply assigns a given value to a given parameter. The
|
@ A \ simply assigns a given value to a given parameter. The
|
possibilities for \ are as follows:
|
possibilities for \ are as follows:
|
|
|
\def\bull#1 {\smallskip\hang\textindent{$\bullet$}\.{#1}\enspace}
|
\def\bull#1 {\smallskip\hang\textindent{$\bullet$}\.{#1}\enspace}
|
\bull fetchbuffer (default 4), maximum instructions in the fetch buffer;
|
\bull fetchbuffer (default 4), maximum instructions in the fetch buffer;
|
must be $\ge1$.
|
must be $\ge1$.
|
|
|
\bull writebuffer (default 2), maximum octabytes in the write buffer;
|
\bull writebuffer (default 2), maximum octabytes in the write buffer;
|
must be $\ge1$.
|
must be $\ge1$.
|
|
|
\bull reorderbuffer (default 5), maximum instructions issued but not
|
\bull reorderbuffer (default 5), maximum instructions issued but not
|
committed; must be $\ge1$.
|
committed; must be $\ge1$.
|
|
|
\bull renameregs (default 5), maximum partial results in the reorder
|
\bull renameregs (default 5), maximum partial results in the reorder
|
buffer; must be $\ge1$.
|
buffer; must be $\ge1$.
|
|
|
\bull memslots (default 2), maximum store instructions in the reorder
|
\bull memslots (default 2), maximum store instructions in the reorder
|
buffer; must be $\ge1$.
|
buffer; must be $\ge1$.
|
|
|
\bull localregs (default 256), number of local registers in ring;
|
\bull localregs (default 256), number of local registers in ring;
|
must be 256, 512, or 1024.
|
must be 256, 512, or 1024.
|
|
|
\bull fetchmax (default 2), maximum instructions fetched per cycle;
|
\bull fetchmax (default 2), maximum instructions fetched per cycle;
|
must be $\ge1$.
|
must be $\ge1$.
|
|
|
\bull dispatchmax (default 1), maximum instructions issued per cycle;
|
\bull dispatchmax (default 1), maximum instructions issued per cycle;
|
must be $\ge1$.
|
must be $\ge1$.
|
|
|
\bull peekahead (default 1), maximum lookahead for jumps per cycle.
|
\bull peekahead (default 1), maximum lookahead for jumps per cycle.
|
|
|
\bull commitmax (default 1), maximum instructions committed per cycle;
|
\bull commitmax (default 1), maximum instructions committed per cycle;
|
must be $\ge1$.
|
must be $\ge1$.
|
|
|
\bull fremmax (default 1), maximum reductions in \.{FREM} computation per
|
\bull fremmax (default 1), maximum reductions in \.{FREM} computation per
|
cycle; must be $\ge1$.
|
cycle; must be $\ge1$.
|
|
|
\bull denin (default 1), extra cycles taken if a floating point input
|
\bull denin (default 1), extra cycles taken if a floating point input
|
is subnormal.
|
is subnormal.
|
|
|
\bull denout (default 1), extra cycles taken if a floating point result
|
\bull denout (default 1), extra cycles taken if a floating point result
|
is subnormal.
|
is subnormal.
|
|
|
\bull writeholdingtime (default 0), minimum number of cycles for data to
|
\bull writeholdingtime (default 0), minimum number of cycles for data to
|
remain in the write buffer.
|
remain in the write buffer.
|
|
|
\bull memaddresstime (default 20), cycles to process memory address;
|
\bull memaddresstime (default 20), cycles to process memory address;
|
must be $\ge1$.
|
must be $\ge1$.
|
|
|
\bull memreadtime (default 20), cycles to read one memory busload;
|
\bull memreadtime (default 20), cycles to read one memory busload;
|
must be $\ge1$.
|
must be $\ge1$.
|
|
|
\bull memwritetime (default 20), cycles to write one memory busload;
|
\bull memwritetime (default 20), cycles to write one memory busload;
|
must be $\ge1$.
|
must be $\ge1$.
|
|
|
\bull membusbytes (default 8), number of bytes per memory busload; must be a
|
\bull membusbytes (default 8), number of bytes per memory busload; must be a
|
power of~2 that is 8~or~more.
|
power of~2 that is 8~or~more.
|
|
|
\bull branchpredictbits (default 0), number of bits in each branch prediction
|
\bull branchpredictbits (default 0), number of bits in each branch prediction
|
table entry; must be $\le8$.
|
table entry; must be $\le8$.
|
|
|
\bull branchaddressbits (default 0), number of bits in instruction address
|
\bull branchaddressbits (default 0), number of bits in instruction address
|
used to index the branch prediction table.
|
used to index the branch prediction table.
|
|
|
\bull branchhistorybits (default 0), number of bits in branch history used to
|
\bull branchhistorybits (default 0), number of bits in branch history used to
|
index the branch prediction table.
|
index the branch prediction table.
|
|
|
\bull branchdualbits (default 0), number of bits of
|
\bull branchdualbits (default 0), number of bits of
|
instruction-address-xor-branch-history used to index the branch prediction
|
instruction-address-xor-branch-history used to index the branch prediction
|
table.
|
table.
|
|
|
\bull hardwarepagetable (default 1), is zero if page table calculations
|
\bull hardwarepagetable (default 1), is zero if page table calculations
|
must be emulated by the operating system.
|
must be emulated by the operating system.
|
|
|
\bull disablesecurity (default 0), is 1 if the hot-seat security checks
|
\bull disablesecurity (default 0), is 1 if the hot-seat security checks
|
are turned off. This option is used only for testing purposes; it means
|
are turned off. This option is used only for testing purposes; it means
|
that the `\.s' interrupt will not occur, and the `\.p' interrupt will
|
that the `\.s' interrupt will not occur, and the `\.p' interrupt will
|
be signaled only when going from a nonnegative location to a negative one.
|
be signaled only when going from a nonnegative location to a negative one.
|
|
|
\bull memchunksmax (default 1000), maximum number of $2^{16}$-byte chunks of
|
\bull memchunksmax (default 1000), maximum number of $2^{16}$-byte chunks of
|
simulated memory; must be $\ge1$.
|
simulated memory; must be $\ge1$.
|
|
|
\bull hashprime (default 2003), prime number used to address simulated memory;
|
\bull hashprime (default 2003), prime number used to address simulated memory;
|
must exceed \.{memchunksmax}, preferably by a factor of about~2.
|
must exceed \.{memchunksmax}, preferably by a factor of about~2.
|
|
|
\smallskip\noindent
|
\smallskip\noindent
|
The values of \.{memchunksmax} and \.{hashprime} affect only the speed of the
|
The values of \.{memchunksmax} and \.{hashprime} affect only the speed of the
|
simulator, not its results---unless a very huge program is being simulated.
|
simulator, not its results---unless a very huge program is being simulated.
|
The stated defaults for \.{memchunksmax} and \.{hashprime}
|
The stated defaults for \.{memchunksmax} and \.{hashprime}
|
should be adequate for almost all applications.
|
should be adequate for almost all applications.
|
|
|
@ A \ assigns a given value to a parameter affecting one of five
|
@ A \ assigns a given value to a parameter affecting one of five
|
possible caches:
|
possible caches:
|
$$\vbox{\halign{$#$\hfil\cr
|
$$\vbox{\halign{$#$\hfil\cr
|
\\is\\\\\cr
|
\\is\\\\\cr
|
\\is\.{ITcache}\mid\.{DTcache}\mid\.{Icache}\mid\.{Dcache}
|
\\is\.{ITcache}\mid\.{DTcache}\mid\.{Icache}\mid\.{Dcache}
|
\mid\.{Scache}\cr
|
\mid\.{Scache}\cr
|
\\is\\mid\.{random}\mid\.{serial}
|
\\is\\mid\.{random}\mid\.{serial}
|
\mid\.{pseudolru}\mid\.{lru}\cr}}$$
|
\mid\.{pseudolru}\mid\.{lru}\cr}}$$
|
The possibilities for \ are as follows:
|
The possibilities for \ are as follows:
|
|
|
\bull associativity (default 1), number of cache blocks per cache set;
|
\bull associativity (default 1), number of cache blocks per cache set;
|
must be a power of~2. (A cache with associativity~1 is said to be
|
must be a power of~2. (A cache with associativity~1 is said to be
|
``direct-mapped.'')
|
``direct-mapped.'')
|
|
|
\bull blocksize (default 8), number of bytes per cache block; must be a power
|
\bull blocksize (default 8), number of bytes per cache block; must be a power
|
of~2, at least equal to the granularity, and at most equal to~8192.
|
of~2, at least equal to the granularity, and at most equal to~8192.
|
The blocksize of \.{ITcache} and \.{DTcache} must be~8.
|
The blocksize of \.{ITcache} and \.{DTcache} must be~8.
|
|
|
\bull setsize (default 1), number of sets of cache blocks; must be a power
|
\bull setsize (default 1), number of sets of cache blocks; must be a power
|
of~2. (A cache with set size~1 is said to be ``fully associative.'')
|
of~2. (A cache with set size~1 is said to be ``fully associative.'')
|
|
|
\bull granularity (default 8), number of bytes per ``dirty bit,'' used to
|
\bull granularity (default 8), number of bytes per ``dirty bit,'' used to
|
remember which items of data have changed since they were read from memory;
|
remember which items of data have changed since they were read from memory;
|
must be a power of~2 and at least~8. The granularity must be~8 if
|
must be a power of~2 and at least~8. The granularity must be~8 if
|
\.{writeallocate} is~0.
|
\.{writeallocate} is~0.
|
|
|
\bull victimsize (default 0), number of cache blocks in the victim buffer,
|
\bull victimsize (default 0), number of cache blocks in the victim buffer,
|
which holds blocks removed from the main cache sets; must be zero or a power
|
which holds blocks removed from the main cache sets; must be zero or a power
|
of~2.
|
of~2.
|
|
|
\bull writeback (default 0), is 1 in a ``write-back'' cache, which holds dirty
|
\bull writeback (default 0), is 1 in a ``write-back'' cache, which holds dirty
|
data as long as possible; is 0 in a ``write-through'' cache, which cleans
|
data as long as possible; is 0 in a ``write-through'' cache, which cleans
|
all data as soon as possible.
|
all data as soon as possible.
|
|
|
\bull writeallocate (default 0), is 1 in a ``write-allocate'' cache,
|
\bull writeallocate (default 0), is 1 in a ``write-allocate'' cache,
|
which remembers all recently written data;
|
which remembers all recently written data;
|
is 0 in a ``write-around'' cache, which doesn't make space for newly written
|
is 0 in a ``write-around'' cache, which doesn't make space for newly written
|
data that fails to hit an existing cache block.
|
data that fails to hit an existing cache block.
|
|
|
\bull accesstime (default 1), number of cycles to query the cache;
|
\bull accesstime (default 1), number of cycles to query the cache;
|
must be $\ge1$. (Hits in the S-cache actually require {\it twice}
|
must be $\ge1$. (Hits in the S-cache actually require {\it twice}
|
the accesstime, once to query the tag and once to transmit the data.)
|
the accesstime, once to query the tag and once to transmit the data.)
|
|
|
\bull copyintime (default 1), number of cycles to move a cache block from
|
\bull copyintime (default 1), number of cycles to move a cache block from
|
its input buffer into the cache proper; must be $\ge1$.
|
its input buffer into the cache proper; must be $\ge1$.
|
|
|
\bull copyouttime (default 1), number of cycles to move a cache block
|
\bull copyouttime (default 1), number of cycles to move a cache block
|
from the cache proper to its output buffer; must be $\ge1$.
|
from the cache proper to its output buffer; must be $\ge1$.
|
|
|
\bull ports (default 1), number of processes that can simultaneous
|
\bull ports (default 1), number of processes that can simultaneous
|
query the cache; must be $\ge1$.
|
query the cache; must be $\ge1$.
|
|
|
\smallskip
|
\smallskip
|
The \ parameter should be nonempty only on cache specifications
|
The \ parameter should be nonempty only on cache specifications
|
for parameters
|
for parameters
|
\.{associativity} and \.{victimsize}. If no replacement policy is specified,
|
\.{associativity} and \.{victimsize}. If no replacement policy is specified,
|
\.{random} is the default. All four policies are equivalent when the
|
\.{random} is the default. All four policies are equivalent when the
|
\.{associativity} or \.{victimsize} is~1; \.{pseudolru} is equivalent
|
\.{associativity} or \.{victimsize} is~1; \.{pseudolru} is equivalent
|
to \.{lru} when the \.{associativity} or \.{victimsize} is~2.
|
to \.{lru} when the \.{associativity} or \.{victimsize} is~2.
|
|
|
The \.{granularity}, \.{writeback}, \.{writeallocate}, and \.{copyouttime}
|
The \.{granularity}, \.{writeback}, \.{writeallocate}, and \.{copyouttime}
|
parameters affect the performance only of the D-cache and S-cache; the other
|
parameters affect the performance only of the D-cache and S-cache; the other
|
three caches are read-only, so they never need to write their data.
|
three caches are read-only, so they never need to write their data.
|
|
|
The \.{ports} parameter affects the performance of the D-cache and
|
The \.{ports} parameter affects the performance of the D-cache and
|
DT-cache, and (if the \.{PREGO} command is used) the performance of the
|
DT-cache, and (if the \.{PREGO} command is used) the performance of the
|
I-cache and IT-cache. The S-cache accommodates only one process at a time,
|
I-cache and IT-cache. The S-cache accommodates only one process at a time,
|
regardless of the number of specified ports.
|
regardless of the number of specified ports.
|
|
|
Only the translation caches (the IT-cache and DT-cache) are present by
|
Only the translation caches (the IT-cache and DT-cache) are present by
|
default. But if any specifications are given for, say, an I-cache,
|
default. But if any specifications are given for, say, an I-cache,
|
all of the unspecified I-cache parameters take their default values.
|
all of the unspecified I-cache parameters take their default values.
|
|
|
The existence of an S-cache (secondary cache) implies the existence of both
|
The existence of an S-cache (secondary cache) implies the existence of both
|
I-cache and D-cache (primary caches for instructions and data).
|
I-cache and D-cache (primary caches for instructions and data).
|
The block size of the secondary cache must not be less than the block
|
The block size of the secondary cache must not be less than the block
|
size of the primary caches. The secondary cache must have the
|
size of the primary caches. The secondary cache must have the
|
same granularity as the D-cache.
|
same granularity as the D-cache.
|
|
|
@ A \ governs the execution time of potentially slow operations.
|
@ A \ governs the execution time of potentially slow operations.
|
$$\vbox{\halign{$#$\hfil\cr
|
$$\vbox{\halign{$#$\hfil\cr
|
\\is\\\cr
|
\\is\\\cr
|
\\is\\mid\\\cr}}$$
|
\\is\\mid\\\cr}}$$
|
Here the \ is one of the following:
|
Here the \ is one of the following:
|
|
|
\bull mul0 through \.{mul8} (default 10); the values for \.{mul}$j$ refer
|
\bull mul0 through \.{mul8} (default 10); the values for \.{mul}$j$ refer
|
to products in which the second operand is less than $2^{8j}$, where $j$
|
to products in which the second operand is less than $2^{8j}$, where $j$
|
is as small as possible. Thus, for example, \.{mul1} applies to
|
is as small as possible. Thus, for example, \.{mul1} applies to
|
nonzero one-byte multipliers.
|
nonzero one-byte multipliers.
|
|
|
\bull div (default 60); this applies to integer division, signed and unsigned.
|
\bull div (default 60); this applies to integer division, signed and unsigned.
|
|
|
\bull sh (default 1); this applies to left and right shifts, signed and
|
\bull sh (default 1); this applies to left and right shifts, signed and
|
unsigned.
|
unsigned.
|
|
|
\bull mux (default 1); the multiplex operator.
|
\bull mux (default 1); the multiplex operator.
|
|
|
\bull sadd (default 1); the sideways addition operator.
|
\bull sadd (default 1); the sideways addition operator.
|
|
|
\bull mor (default 1); the boolean matrix multiplication operators \.{MOR} and
|
\bull mor (default 1); the boolean matrix multiplication operators \.{MOR} and
|
\.{MXOR}.
|
\.{MXOR}.
|
|
|
\bull fadd (default 4); floating point addition and subtraction.
|
\bull fadd (default 4); floating point addition and subtraction.
|
|
|
\bull fmul (default 4); floating point multiplication.
|
\bull fmul (default 4); floating point multiplication.
|
|
|
\bull fdiv (default 40); floating point division.
|
\bull fdiv (default 40); floating point division.
|
|
|
\bull fsqrt (default 40); floating point square root.
|
\bull fsqrt (default 40); floating point square root.
|
|
|
\bull fint (default 4); floating point integerization.
|
\bull fint (default 4); floating point integerization.
|
|
|
\bull fix (default 2); conversion from floating to fixed, signed and unsigned.
|
\bull fix (default 2); conversion from floating to fixed, signed and unsigned.
|
|
|
\bull flot (default 2); conversion from fixed to floating, signed and unsigned.
|
\bull flot (default 2); conversion from fixed to floating, signed and unsigned.
|
|
|
\bull feps (default 4); floating comparison with respect to epsilon.
|
\bull feps (default 4); floating comparison with respect to epsilon.
|
|
|
\smallskip\noindent
|
\smallskip\noindent
|
In each case one can specify a sequence of pipeline stages, with a positive
|
In each case one can specify a sequence of pipeline stages, with a positive
|
number of cycles to be spent in each stage. For example, a specification like
|
number of cycles to be spent in each stage. For example, a specification like
|
`\.{fmul}~\.{3}~\.{1}' would say that a functional unit that supports
|
`\.{fmul}~\.{3}~\.{1}' would say that a functional unit that supports
|
\.{FMUL} takes a total of four cycles to compute the floating point product
|
\.{FMUL} takes a total of four cycles to compute the floating point product
|
in two stages; it can start working on a second product after three cycles
|
in two stages; it can start working on a second product after three cycles
|
have gone by.
|
have gone by.
|
|
|
If a floating point operation has a subnormal input, \.{denin} is added to
|
If a floating point operation has a subnormal input, \.{denin} is added to
|
the time for the first stage. If a floating point operation has a subnormal
|
the time for the first stage. If a floating point operation has a subnormal
|
result, \.{denout} is added to the time for the last stage.
|
result, \.{denout} is added to the time for the last stage.
|
|
|
@ The fourth and final kind of specification defines a functional unit:
|
@ The fourth and final kind of specification defines a functional unit:
|
$$\\is\.{unit}\ \\<64 hexadecimal digits>$$
|
$$\\is\.{unit}\ \\<64 hexadecimal digits>$$
|
The symbolic name should be at most fifteen characters long.
|
The symbolic name should be at most fifteen characters long.
|
The 64 hexadecimal digits contain 256 bits, with `1' for each supported
|
The 64 hexadecimal digits contain 256 bits, with `1' for each supported
|
opcode; the most significant (leftmost) bit is for opcode 0 (\.{TRAP}),
|
opcode; the most significant (leftmost) bit is for opcode 0 (\.{TRAP}),
|
and the least significant bit is for opcode 255 (\.{TRIP}).
|
and the least significant bit is for opcode 255 (\.{TRIP}).
|
|
|
For example, we can define a load/store unit (which handles register/memory
|
For example, we can define a load/store unit (which handles register/memory
|
operations), a multiplication unit (which handles fixed and floating point
|
operations), a multiplication unit (which handles fixed and floating point
|
multiplication), a boolean unit (which handles only bitwise operations),
|
multiplication), a boolean unit (which handles only bitwise operations),
|
and a more general arithmetic-logical unit, as follows:
|
and a more general arithmetic-logical unit, as follows:
|
$$\vbox{\halign{\tt#\hfil\cr
|
$$\vbox{\halign{\tt#\hfil\cr
|
unit LSU 00000000000000000000000000000000fffffffcfffffffc0000000000000000\cr
|
unit LSU 00000000000000000000000000000000fffffffcfffffffc0000000000000000\cr
|
unit MUL 000080f000000000000000000000000000000000000000000000000000000000\cr
|
unit MUL 000080f000000000000000000000000000000000000000000000000000000000\cr
|
unit BIT 000000000000000000000000000000000000000000000000ffff00ff00ff0000\cr
|
unit BIT 000000000000000000000000000000000000000000000000ffff00ff00ff0000\cr
|
unit ALU f0000000ffffffffffffffffffffffff0000000300000003ffffffffffffffff\cr
|
unit ALU f0000000ffffffffffffffffffffffff0000000300000003ffffffffffffffff\cr
|
}}$$
|
}}$$
|
|
|
The order in which units are specified is important, because \MMIX's dispatcher
|
The order in which units are specified is important, because \MMIX's dispatcher
|
will try to match each instruction with the first functional unit that
|
will try to match each instruction with the first functional unit that
|
supports its opcode. Therefore it is best to list more specialized
|
supports its opcode. Therefore it is best to list more specialized
|
units (like the \.{BIT} unit in this example) before more general ones;
|
units (like the \.{BIT} unit in this example) before more general ones;
|
this lets the specialized units have first chance at the instructions
|
this lets the specialized units have first chance at the instructions
|
they can handle.
|
they can handle.
|
|
|
There can be any number of functional units, having possibly identical
|
There can be any number of functional units, having possibly identical
|
specifications. One should, however, give each unit a unique name
|
specifications. One should, however, give each unit a unique name
|
(e.g., \.{ALU1} and \.{ALU2} if there are two arithmetic-logical units),
|
(e.g., \.{ALU1} and \.{ALU2} if there are two arithmetic-logical units),
|
since these names are used in diagnostic messages.
|
since these names are used in diagnostic messages.
|
|
|
Opcodes that aren't supported by any specified unit will cause an
|
Opcodes that aren't supported by any specified unit will cause an
|
emulation trap.
|
emulation trap.
|
@^emulation@>
|
@^emulation@>
|
|
|
@ Full details about the significance of all these parameters can be found
|
@ Full details about the significance of all these parameters can be found
|
in the \.{mmix-pipe} module, which defines and discusses the data structures
|
in the \.{mmix-pipe} module, which defines and discusses the data structures
|
that need to be configured and initialized.
|
that need to be configured and initialized.
|
|
|
Of course the specifications in a configuration file needn't make any sense,
|
Of course the specifications in a configuration file needn't make any sense,
|
nor need they be practically achievable. We could, for example, specify
|
nor need they be practically achievable. We could, for example, specify
|
a unit that handles only the two opcodes \.{NXOR} and \.{DIVUI};
|
a unit that handles only the two opcodes \.{NXOR} and \.{DIVUI};
|
we could specify 1-cycle division but pipelined 100-cycle shifts, or
|
we could specify 1-cycle division but pipelined 100-cycle shifts, or
|
1-cycle memory access but 100-cycle cache access. We could create
|
1-cycle memory access but 100-cycle cache access. We could create
|
a thousand rename registers and issue a hundred instructions per cycle,
|
a thousand rename registers and issue a hundred instructions per cycle,
|
etc. Some combinations of parameters are clearly ridiculous.
|
etc. Some combinations of parameters are clearly ridiculous.
|
|
|
But there remain a huge number of possibilities of interest, especially
|
But there remain a huge number of possibilities of interest, especially
|
as technology continues to evolve. By experimenting with configurations that
|
as technology continues to evolve. By experimenting with configurations that
|
are extreme by present-day standards, we can see how much might be gained
|
are extreme by present-day standards, we can see how much might be gained
|
if the corresponding hardware could be built economically.
|
if the corresponding hardware could be built economically.
|
|
|
@* Basic input/output. Let's get ready to program the |MMIX_config| subroutine
|
@* Basic input/output. Let's get ready to program the |MMIX_config| subroutine
|
by building some simple infrastructure. First we need some macros to
|
by building some simple infrastructure. First we need some macros to
|
print error messages.
|
print error messages.
|
|
|
@d errprint0(f) fprintf(stderr,f)
|
@d errprint0(f) fprintf(stderr,f)
|
@d errprint1(f,a) fprintf(stderr,f,a)
|
@d errprint1(f,a) fprintf(stderr,f,a)
|
@d errprint2(f,a,b) fprintf(stderr,f,a,b)
|
@d errprint2(f,a,b) fprintf(stderr,f,a,b)
|
@d errprint3(f,a,b,c) fprintf(stderr,f,a,b,c)
|
@d errprint3(f,a,b,c) fprintf(stderr,f,a,b,c)
|
@d panic(x)@+ {@+x;@+errprint0("!\n");@+exit(-1);@+}
|
@d panic(x)@+ {@+x;@+errprint0("!\n");@+exit(-1);@+}
|
|
|
@ And we need a place to look at the input.
|
@ And we need a place to look at the input.
|
|
|
@d BUF_SIZE 100 /* we don't need long lines */
|
@d BUF_SIZE 100 /* we don't need long lines */
|
|
|
@=
|
@=
|
FILE *config_file; /* input comes from here */
|
FILE *config_file; /* input comes from here */
|
char buffer[BUF_SIZE]; /* input lines go here */
|
char buffer[BUF_SIZE]; /* input lines go here */
|
char token[BUF_SIZE]; /* and tokens are copied to here */
|
char token[BUF_SIZE]; /* and tokens are copied to here */
|
char *buf_pointer=buffer; /* this is our current position */
|
char *buf_pointer=buffer; /* this is our current position */
|
bool token_prescanned; /* does |token| contain the next token already? */
|
bool token_prescanned; /* does |token| contain the next token already? */
|
|
|
@ The |get_token| routine copies the next token of input into the |token|
|
@ The |get_token| routine copies the next token of input into the |token|
|
buffer. After the input has ended, a final `\.{end}' is appended.
|
buffer. After the input has ended, a final `\.{end}' is appended.
|
|
|
@=
|
@=
|
static void get_token @,@,@[ARGS((void))@];@+@t}\6{@>
|
static void get_token @,@,@[ARGS((void))@];@+@t}\6{@>
|
static void get_token() /* set |token| to the next token of the configuration file */
|
static void get_token() /* set |token| to the next token of the configuration file */
|
{
|
{
|
register char *p,*q;
|
register char *p,*q;
|
if (token_prescanned) {
|
if (token_prescanned) {
|
token_prescanned=false;@+ return;
|
token_prescanned=false;@+ return;
|
}
|
}
|
while(1) { /* scan past white space */
|
while(1) { /* scan past white space */
|
if (*buf_pointer=='\0' || *buf_pointer=='\n' || *buf_pointer=='%') {
|
if (*buf_pointer=='\0' || *buf_pointer=='\n' || *buf_pointer=='%') {
|
if (!fgets(buffer,BUF_SIZE,config_file)) {
|
if (!fgets(buffer,BUF_SIZE,config_file)) {
|
strcpy(token,"end");@+return;
|
strcpy(token,"end");@+return;
|
}
|
}
|
if (strlen(buffer)==BUF_SIZE-1 && buffer[BUF_SIZE-2]!='\n')
|
if (strlen(buffer)==BUF_SIZE-1 && buffer[BUF_SIZE-2]!='\n')
|
panic(errprint1("config file line too long: `%s...'",buffer));
|
panic(errprint1("config file line too long: `%s...'",buffer));
|
@.config file line...@>
|
@.config file line...@>
|
buf_pointer=buffer;
|
buf_pointer=buffer;
|
}@+else if (!isspace(*buf_pointer)) break;
|
}@+else if (!isspace(*buf_pointer)) break;
|
else buf_pointer++;
|
else buf_pointer++;
|
}
|
}
|
for (p=buf_pointer,q=token;!isspace(*p) && *p!='%';p++,q++) *q=*p;
|
for (p=buf_pointer,q=token;!isspace(*p) && *p!='%';p++,q++) *q=*p;
|
buf_pointer=p;@+ *q='\0';
|
buf_pointer=p;@+ *q='\0';
|
return;
|
return;
|
}
|
}
|
|
|
@ The |get_int| routine is called when we wish to input a decimal value.
|
@ The |get_int| routine is called when we wish to input a decimal value.
|
It returns $-1$ if the next token isn't a string of decimal digits.
|
It returns $-1$ if the next token isn't a string of decimal digits.
|
|
|
@=
|
@=
|
static int get_int @,@,@[ARGS((void))@];@+@t}\6{@>
|
static int get_int @,@,@[ARGS((void))@];@+@t}\6{@>
|
static int get_int()
|
static int get_int()
|
{@+ int v;
|
{@+ int v;
|
char *p;
|
char *p;
|
get_token();
|
get_token();
|
for (p=token,v=0; *p>='0' && *p<='9'; p++) v=10*v+*p-'0';
|
for (p=token,v=0; *p>='0' && *p<='9'; p++) v=10*v+*p-'0';
|
if (*p) return -1;
|
if (*p) return -1;
|
return v;
|
return v;
|
}
|
}
|
|
|
@ A simple data structure makes it fairly easy to deal with
|
@ A simple data structure makes it fairly easy to deal with
|
parameter/value specifications.
|
parameter/value specifications.
|
|
|
@=
|
@=
|
typedef struct {
|
typedef struct {
|
char name[20]; /* symbolic name */
|
char name[20]; /* symbolic name */
|
int *v; /* internal name */
|
int *v; /* internal name */
|
int defval; /* default value */
|
int defval; /* default value */
|
int minval, maxval; /* minimum and maximum legal values */
|
int minval, maxval; /* minimum and maximum legal values */
|
bool power_of_two; /* must it be a power of two? */
|
bool power_of_two; /* must it be a power of two? */
|
} pv_spec;
|
} pv_spec;
|
|
|
@ Cache parameters are a bit more difficult, but still not bad.
|
@ Cache parameters are a bit more difficult, but still not bad.
|
|
|
@=
|
@=
|
typedef enum {@!assoc,@!blksz,@!setsz,@!gran,@!vctsz,
|
typedef enum {@!assoc,@!blksz,@!setsz,@!gran,@!vctsz,
|
@!wrb,@!wra,@!acctm,@!citm,@!cotm,@!prts} c_param;
|
@!wrb,@!wra,@!acctm,@!citm,@!cotm,@!prts} c_param;
|
@#
|
@#
|
typedef struct {
|
typedef struct {
|
char name[20]; /* symbolic name */
|
char name[20]; /* symbolic name */
|
c_param v; /* internal code */
|
c_param v; /* internal code */
|
int defval; /* default value */
|
int defval; /* default value */
|
int minval, maxval; /* minimum and maximum legal values */
|
int minval, maxval; /* minimum and maximum legal values */
|
bool power_of_two; /* must it be a power of two? */
|
bool power_of_two; /* must it be a power of two? */
|
} cpv_spec;
|
} cpv_spec;
|
|
|
@ Operation codes are the easiest of all.
|
@ Operation codes are the easiest of all.
|
|
|
@=
|
@=
|
typedef struct {
|
typedef struct {
|
char name[8]; /* symbolic name */
|
char name[8]; /* symbolic name */
|
internal_opcode v; /* internal code */
|
internal_opcode v; /* internal code */
|
int defval; /* default value */
|
int defval; /* default value */
|
} op_spec;
|
} op_spec;
|
|
|
@ Most of the parameters are external variables declared in the header
|
@ Most of the parameters are external variables declared in the header
|
file \.{mmix-pipe.h}; but some are private to this module. Here we
|
file \.{mmix-pipe.h}; but some are private to this module. Here we
|
define the main tables used below.
|
define the main tables used below.
|
|
|
@=
|
@=
|
int fetch_buf_size,write_buf_size,reorder_buf_size,mem_bus_bytes,hardware_PT;
|
int fetch_buf_size,write_buf_size,reorder_buf_size,mem_bus_bytes,hardware_PT;
|
int max_cycs=60;
|
int max_cycs=60;
|
pv_spec PV[]={@/
|
pv_spec PV[]={@/
|
{"fetchbuffer", &fetch_buf_size, 4, 1, INT_MAX, false},@/
|
{"fetchbuffer", &fetch_buf_size, 4, 1, INT_MAX, false},@/
|
{"writebuffer", &write_buf_size, 2, 1, INT_MAX, false},@/
|
{"writebuffer", &write_buf_size, 2, 1, INT_MAX, false},@/
|
{"reorderbuffer", &reorder_buf_size, 5, 1, INT_MAX, false},@/
|
{"reorderbuffer", &reorder_buf_size, 5, 1, INT_MAX, false},@/
|
{"renameregs", &max_rename_regs, 5, 1, INT_MAX, false},@/
|
{"renameregs", &max_rename_regs, 5, 1, INT_MAX, false},@/
|
{"memslots", &max_mem_slots, 2, 1, INT_MAX, false},@/
|
{"memslots", &max_mem_slots, 2, 1, INT_MAX, false},@/
|
{"localregs", &lring_size, 256, 256, 1024, true},@/
|
{"localregs", &lring_size, 256, 256, 1024, true},@/
|
{"fetchmax", &fetch_max, 2, 1, INT_MAX, false},@/
|
{"fetchmax", &fetch_max, 2, 1, INT_MAX, false},@/
|
{"dispatchmax", &dispatch_max, 1, 1, INT_MAX, false},@/
|
{"dispatchmax", &dispatch_max, 1, 1, INT_MAX, false},@/
|
{"peekahead", &peekahead, 1, 0, INT_MAX, false},@/
|
{"peekahead", &peekahead, 1, 0, INT_MAX, false},@/
|
{"commitmax", &commit_max, 1, 1, INT_MAX, false},@/
|
{"commitmax", &commit_max, 1, 1, INT_MAX, false},@/
|
{"fremmax", &frem_max, 1, 1, INT_MAX, false},@/
|
{"fremmax", &frem_max, 1, 1, INT_MAX, false},@/
|
{"denin",&denin_penalty, 1, 0, INT_MAX, false},@/
|
{"denin",&denin_penalty, 1, 0, INT_MAX, false},@/
|
{"denout",&denout_penalty, 1, 0, INT_MAX, false},@/
|
{"denout",&denout_penalty, 1, 0, INT_MAX, false},@/
|
{"writeholdingtime", &holding_time, 0, 0, INT_MAX, false},@/
|
{"writeholdingtime", &holding_time, 0, 0, INT_MAX, false},@/
|
{"memaddresstime", &mem_addr_time, 20, 1, INT_MAX, false},@/
|
{"memaddresstime", &mem_addr_time, 20, 1, INT_MAX, false},@/
|
{"memreadtime", &mem_read_time, 20, 1, INT_MAX, false},@/
|
{"memreadtime", &mem_read_time, 20, 1, INT_MAX, false},@/
|
{"memwritetime", &mem_write_time, 20, 1, INT_MAX, false},@/
|
{"memwritetime", &mem_write_time, 20, 1, INT_MAX, false},@/
|
{"membusbytes", &mem_bus_bytes, 8, 8, INT_MAX, true},@/
|
{"membusbytes", &mem_bus_bytes, 8, 8, INT_MAX, true},@/
|
{"branchpredictbits", &bp_n, 0, 0, 8, false},@/
|
{"branchpredictbits", &bp_n, 0, 0, 8, false},@/
|
{"branchaddressbits", &bp_a, 0, 0, 32, false},@/
|
{"branchaddressbits", &bp_a, 0, 0, 32, false},@/
|
{"branchhistorybits", &bp_b, 0, 0, 32, false},@/
|
{"branchhistorybits", &bp_b, 0, 0, 32, false},@/
|
{"branchdualbits", &bp_c, 0, 0, 32, false},@/
|
{"branchdualbits", &bp_c, 0, 0, 32, false},@/
|
{"hardwarepagetable", &hardware_PT, 1, 0, 1, false},@/
|
{"hardwarepagetable", &hardware_PT, 1, 0, 1, false},@/
|
{"disablesecurity", (int*)&security_disabled, 0, 0, 1, false},@/
|
{"disablesecurity", (int*)&security_disabled, 0, 0, 1, false},@/
|
{"memchunksmax", &mem_chunks_max, 1000, 1, INT_MAX, false},@/
|
{"memchunksmax", &mem_chunks_max, 1000, 1, INT_MAX, false},@/
|
{"hashprime", &hash_prime, 2003, 2, INT_MAX, false}};
|
{"hashprime", &hash_prime, 2003, 2, INT_MAX, false}};
|
@#
|
@#
|
cpv_spec CPV[]={
|
cpv_spec CPV[]={
|
{"associativity", assoc, 1, 1, INT_MAX, true},@/
|
{"associativity", assoc, 1, 1, INT_MAX, true},@/
|
{"blocksize", blksz, 8, 8, 8192, true},@/
|
{"blocksize", blksz, 8, 8, 8192, true},@/
|
{"setsize", setsz, 1, 1, INT_MAX, true},@/
|
{"setsize", setsz, 1, 1, INT_MAX, true},@/
|
{"granularity", gran, 8, 8, 8192, true},@/
|
{"granularity", gran, 8, 8, 8192, true},@/
|
{"victimsize", vctsz, 0, 0, INT_MAX, true},@/
|
{"victimsize", vctsz, 0, 0, INT_MAX, true},@/
|
{"writeback", wrb, 0, 0, 1,false},@/
|
{"writeback", wrb, 0, 0, 1,false},@/
|
{"writeallocate", wra, 0, 0, 1,false},@/
|
{"writeallocate", wra, 0, 0, 1,false},@/
|
{"accesstime", acctm, 1, 1, INT_MAX, false},@/
|
{"accesstime", acctm, 1, 1, INT_MAX, false},@/
|
{"copyintime", citm, 1, 1, INT_MAX, false},@/
|
{"copyintime", citm, 1, 1, INT_MAX, false},@/
|
{"copyouttime", cotm, 1, 1, INT_MAX, false},@/
|
{"copyouttime", cotm, 1, 1, INT_MAX, false},@/
|
{"ports", prts, 1, 1, INT_MAX,false}};
|
{"ports", prts, 1, 1, INT_MAX,false}};
|
@#
|
@#
|
op_spec OP[]={
|
op_spec OP[]={
|
{"mul0", mul0, 10},
|
{"mul0", mul0, 10},
|
{"mul1", mul1, 10},
|
{"mul1", mul1, 10},
|
{"mul2", mul2, 10},
|
{"mul2", mul2, 10},
|
{"mul3", mul3, 10},
|
{"mul3", mul3, 10},
|
{"mul4", mul4, 10},
|
{"mul4", mul4, 10},
|
{"mul5", mul5, 10},
|
{"mul5", mul5, 10},
|
{"mul6", mul6, 10},
|
{"mul6", mul6, 10},
|
{"mul7", mul7, 10},
|
{"mul7", mul7, 10},
|
{"mul8", mul8, 10},@|
|
{"mul8", mul8, 10},@|
|
{"div", div, 60},
|
{"div", div, 60},
|
{"sh", sh, 1},
|
{"sh", sh, 1},
|
{"mux", mux, 1},
|
{"mux", mux, 1},
|
{"sadd", sadd, 1},
|
{"sadd", sadd, 1},
|
{"mor", mor, 1},@|
|
{"mor", mor, 1},@|
|
{"fadd", fadd, 4},
|
{"fadd", fadd, 4},
|
{"fmul", fmul, 4},
|
{"fmul", fmul, 4},
|
{"fdiv", fdiv, 40},
|
{"fdiv", fdiv, 40},
|
{"fsqrt", fsqrt, 40},
|
{"fsqrt", fsqrt, 40},
|
{"fint", fint, 4},@|
|
{"fint", fint, 4},@|
|
{"fix", fix, 2},
|
{"fix", fix, 2},
|
{"flot", flot, 2},
|
{"flot", flot, 2},
|
{"feps", feps, 4}};
|
{"feps", feps, 4}};
|
int PV_size,CPV_size,OP_size; /* the number of entries in |PV|, |CPV|, |OP| */
|
int PV_size,CPV_size,OP_size; /* the number of entries in |PV|, |CPV|, |OP| */
|
|
|
@ The |new_cache| routine creates a \&{cache} structure with default values.
|
@ The |new_cache| routine creates a \&{cache} structure with default values.
|
(These default values are ``hard-wired'' into the program, not actually
|
(These default values are ``hard-wired'' into the program, not actually
|
read from the |CPV| table.)
|
read from the |CPV| table.)
|
|
|
@=
|
@=
|
static cache* new_cache @,@,@[ARGS((char*))@];@+@t}\6{@>
|
static cache* new_cache @,@,@[ARGS((char*))@];@+@t}\6{@>
|
static cache* new_cache(name)
|
static cache* new_cache(name)
|
char *name;
|
char *name;
|
{@+register cache *c=(cache*)calloc(1,sizeof(cache));
|
{@+register cache *c=(cache*)calloc(1,sizeof(cache));
|
if (!c) panic(errprint1("Can't allocate %s",name));
|
if (!c) panic(errprint1("Can't allocate %s",name));
|
@.Can't allocate...@>
|
@.Can't allocate...@>
|
c->aa=1; /* default associativity, should equal |CPV[0].defval| */
|
c->aa=1; /* default associativity, should equal |CPV[0].defval| */
|
c->bb=8; /* default blocksize */
|
c->bb=8; /* default blocksize */
|
c->cc=1; /* default setsize */
|
c->cc=1; /* default setsize */
|
c->gg=8; /* default granularity */
|
c->gg=8; /* default granularity */
|
c->vv=0; /* default victimsize */
|
c->vv=0; /* default victimsize */
|
c->repl=random; /* default replacement policy */
|
c->repl=random; /* default replacement policy */
|
c->vrepl=random; /* default victim replacement policy */
|
c->vrepl=random; /* default victim replacement policy */
|
c->mode=0; /* default mode is write-through and write-around */
|
c->mode=0; /* default mode is write-through and write-around */
|
c->access_time=c->copy_in_time=c->copy_out_time=1;
|
c->access_time=c->copy_in_time=c->copy_out_time=1;
|
c->filler.ctl=&(c->filler_ctl);
|
c->filler.ctl=&(c->filler_ctl);
|
c->filler_ctl.ptr_a=(void*)c;
|
c->filler_ctl.ptr_a=(void*)c;
|
c->filler_ctl.go.o.l=4;
|
c->filler_ctl.go.o.l=4;
|
c->flusher.ctl=&(c->flusher_ctl);
|
c->flusher.ctl=&(c->flusher_ctl);
|
c->flusher_ctl.ptr_a=(void*)c;
|
c->flusher_ctl.ptr_a=(void*)c;
|
c->flusher_ctl.go.o.l=4;
|
c->flusher_ctl.go.o.l=4;
|
c->ports=1;
|
c->ports=1;
|
c->name=name;
|
c->name=name;
|
return c;
|
return c;
|
}
|
}
|
|
|
@ @=
|
@ @=
|
PV_size=(sizeof PV)/sizeof(pv_spec);
|
PV_size=(sizeof PV)/sizeof(pv_spec);
|
CPV_size=(sizeof CPV)/sizeof(cpv_spec);
|
CPV_size=(sizeof CPV)/sizeof(cpv_spec);
|
OP_size=(sizeof OP)/sizeof(op_spec);
|
OP_size=(sizeof OP)/sizeof(op_spec);
|
ITcache=new_cache("ITcache");
|
ITcache=new_cache("ITcache");
|
DTcache=new_cache("DTcache");
|
DTcache=new_cache("DTcache");
|
Icache=Dcache=Scache=NULL;
|
Icache=Dcache=Scache=NULL;
|
for (j=0;j
|
for (j=0;j
|
for (j=0;j
|
for (j=0;j
|
pipe_seq[OP[j].v][0]=OP[j].defval;
|
pipe_seq[OP[j].v][0]=OP[j].defval;
|
pipe_seq[OP[j].v][1]=0; /* one stage */
|
pipe_seq[OP[j].v][1]=0; /* one stage */
|
}
|
}
|
|
|
@* Reading the specs. Before we're ready to process the configuration file,
|
@* Reading the specs. Before we're ready to process the configuration file,
|
we need to count the number of functional units, so that we know
|
we need to count the number of functional units, so that we know
|
how much space to allocate for them.
|
how much space to allocate for them.
|
|
|
A special background unit is always provided, just to make sure that
|
A special background unit is always provided, just to make sure that
|
\.{TRAP} and \.{TRIP} instructions are handled by somebody.
|
\.{TRAP} and \.{TRIP} instructions are handled by somebody.
|
|
|
@=
|
@=
|
funit_count=0;
|
funit_count=0;
|
while (strcmp(token,"end")!=0) {
|
while (strcmp(token,"end")!=0) {
|
get_token();
|
get_token();
|
if (strcmp(token,"unit")==0) {
|
if (strcmp(token,"unit")==0) {
|
funit_count++;
|
funit_count++;
|
get_token();@+get_token(); /* a unit might be named \.{unit} or \.{end} */
|
get_token();@+get_token(); /* a unit might be named \.{unit} or \.{end} */
|
}
|
}
|
}
|
}
|
funit=(func*)calloc(funit_count+1,sizeof(func));
|
funit=(func*)calloc(funit_count+1,sizeof(func));
|
if (!funit) panic(errprint0("Can't allocate the functional units"));
|
if (!funit) panic(errprint0("Can't allocate the functional units"));
|
@.Can't allocate...@>
|
@.Can't allocate...@>
|
strcpy(funit[funit_count].name,"%%");
|
strcpy(funit[funit_count].name,"%%");
|
@.\%\%@>
|
@.\%\%@>
|
funit[funit_count].ops[0]=0x80000000; /* \.{TRAP} */
|
funit[funit_count].ops[0]=0x80000000; /* \.{TRAP} */
|
funit[funit_count].ops[7]=0x1; /* \.{TRIP} */
|
funit[funit_count].ops[7]=0x1; /* \.{TRIP} */
|
|
|
@ Now we can read the specifications and obey them. This program doesn't
|
@ Now we can read the specifications and obey them. This program doesn't
|
bother to be very tolerant of errors, nor does it try to be very efficient.
|
bother to be very tolerant of errors, nor does it try to be very efficient.
|
|
|
Incidentally, the specifications don't have to be broken into individual lines
|
Incidentally, the specifications don't have to be broken into individual lines
|
in any meaningful way. We simply read them token by token.
|
in any meaningful way. We simply read them token by token.
|
|
|
@=
|
@=
|
rewind(config_file);
|
rewind(config_file);
|
funit_count=0;
|
funit_count=0;
|
token[0]='\0';
|
token[0]='\0';
|
while (strcmp(token,"end")!=0) {
|
while (strcmp(token,"end")!=0) {
|
get_token();
|
get_token();
|
if (strcmp(token,"end")==0) break;
|
if (strcmp(token,"end")==0) break;
|
@;
|
@;
|
@;
|
@;
|
@;
|
@;
|
if (strcmp(token,"unit")==0) @;
|
if (strcmp(token,"unit")==0) @;
|
panic(errprint1(
|
panic(errprint1(
|
"Configuration syntax error: Specification can't start with `%s'",token));
|
"Configuration syntax error: Specification can't start with `%s'",token));
|
@.Configuration syntax error...@>
|
@.Configuration syntax error...@>
|
}
|
}
|
|
|
@ @=
|
@ @=
|
for (j=0;j
|
for (j=0;j
|
n=get_int();
|
n=get_int();
|
if (n
|
if (n
|
@.Configuration error...@>
|
@.Configuration error...@>
|
"Configuration error: %s must be >= %d",PV[j].name,PV[j].minval));
|
"Configuration error: %s must be >= %d",PV[j].name,PV[j].minval));
|
if (n>PV[j].maxval) panic(errprint2(
|
if (n>PV[j].maxval) panic(errprint2(
|
"Configuration error: %s must be <= %d",PV[j].name,PV[j].maxval));
|
"Configuration error: %s must be <= %d",PV[j].name,PV[j].maxval));
|
if (PV[j].power_of_two && (n&(n-1))) panic(errprint1(
|
if (PV[j].power_of_two && (n&(n-1))) panic(errprint1(
|
"Configuration error: %s must be a power of 2",PV[j].name));
|
"Configuration error: %s must be a power of 2",PV[j].name));
|
*(PV[j].v)=n;
|
*(PV[j].v)=n;
|
break;
|
break;
|
}
|
}
|
if (j
|
if (j
|
|
|
@ @=
|
@ @=
|
if (strcmp(token,"ITcache")==0) {
|
if (strcmp(token,"ITcache")==0) {
|
pcs(ITcache);@+continue;
|
pcs(ITcache);@+continue;
|
}@+else if (strcmp(token,"DTcache")==0) {
|
}@+else if (strcmp(token,"DTcache")==0) {
|
pcs(DTcache);@+continue;
|
pcs(DTcache);@+continue;
|
}@+else if (strcmp(token,"Icache")==0) {
|
}@+else if (strcmp(token,"Icache")==0) {
|
if (!Icache) Icache=new_cache("Icache");
|
if (!Icache) Icache=new_cache("Icache");
|
pcs(Icache);@+continue;
|
pcs(Icache);@+continue;
|
}@+else if (strcmp(token,"Dcache")==0) {
|
}@+else if (strcmp(token,"Dcache")==0) {
|
if (!Dcache) Dcache=new_cache("Dcache");
|
if (!Dcache) Dcache=new_cache("Dcache");
|
pcs(Dcache);@+continue;
|
pcs(Dcache);@+continue;
|
}@+else if (strcmp(token,"Scache")==0) {
|
}@+else if (strcmp(token,"Scache")==0) {
|
if (!Icache) Icache=new_cache("Icache");
|
if (!Icache) Icache=new_cache("Icache");
|
if (!Dcache) Dcache=new_cache("Dcache");
|
if (!Dcache) Dcache=new_cache("Dcache");
|
if (!Scache) Scache=new_cache("Scache");
|
if (!Scache) Scache=new_cache("Scache");
|
pcs(Scache);@+continue;
|
pcs(Scache);@+continue;
|
}
|
}
|
|
|
@ @=
|
@ @=
|
static void ppol @,@,@[ARGS((replace_policy*))@];@+@t}\6{@>
|
static void ppol @,@,@[ARGS((replace_policy*))@];@+@t}\6{@>
|
static void ppol(rr) /* subroutine to scan for a replacement policy */
|
static void ppol(rr) /* subroutine to scan for a replacement policy */
|
replace_policy *rr;
|
replace_policy *rr;
|
{
|
{
|
get_token();
|
get_token();
|
if (strcmp(token,"random")==0) *rr=random;
|
if (strcmp(token,"random")==0) *rr=random;
|
else if (strcmp(token,"serial")==0) *rr=serial;
|
else if (strcmp(token,"serial")==0) *rr=serial;
|
else if (strcmp(token,"pseudolru")==0) *rr=pseudo_lru;
|
else if (strcmp(token,"pseudolru")==0) *rr=pseudo_lru;
|
else if (strcmp(token,"lru")==0) *rr=lru;
|
else if (strcmp(token,"lru")==0) *rr=lru;
|
else token_prescanned=true; /* oops, we should rescan that token */
|
else token_prescanned=true; /* oops, we should rescan that token */
|
}
|
}
|
|
|
@ @=
|
@ @=
|
static void pcs @,@,@[ARGS((cache*))@];@+@t}\6{@>
|
static void pcs @,@,@[ARGS((cache*))@];@+@t}\6{@>
|
static void pcs(c) /* subroutine to process a cache spec */
|
static void pcs(c) /* subroutine to process a cache spec */
|
cache *c;
|
cache *c;
|
{
|
{
|
register int j,n;
|
register int j,n;
|
get_token();
|
get_token();
|
for (j=0;j
|
for (j=0;j
|
if (j==CPV_size) panic(errprint1(
|
if (j==CPV_size) panic(errprint1(
|
"Configuration syntax error: `%s' isn't a cache parameter name",token));
|
"Configuration syntax error: `%s' isn't a cache parameter name",token));
|
@.Configuration syntax error...@>
|
@.Configuration syntax error...@>
|
n=get_int();
|
n=get_int();
|
if (n
|
if (n
|
"Configuration error: %s must be >= %d",CPV[j].name,CPV[j].minval));
|
"Configuration error: %s must be >= %d",CPV[j].name,CPV[j].minval));
|
@.Configuration error...@>
|
@.Configuration error...@>
|
if (n>CPV[j].maxval) panic(errprint2(
|
if (n>CPV[j].maxval) panic(errprint2(
|
"Configuration error: %s must be <= %d",CPV[j].name,CPV[j].maxval));
|
"Configuration error: %s must be <= %d",CPV[j].name,CPV[j].maxval));
|
if (CPV[j].power_of_two && (n&(n-1))) panic(errprint1(
|
if (CPV[j].power_of_two && (n&(n-1))) panic(errprint1(
|
"Configuration error: %s must be power of 2",CPV[j].name));
|
"Configuration error: %s must be power of 2",CPV[j].name));
|
switch (CPV[j].v) {
|
switch (CPV[j].v) {
|
case assoc: c->aa=n;@+ppol(&(c->repl));@+break;
|
case assoc: c->aa=n;@+ppol(&(c->repl));@+break;
|
case blksz: c->bb=n;@+break;
|
case blksz: c->bb=n;@+break;
|
case setsz: c->cc=n;@+break;
|
case setsz: c->cc=n;@+break;
|
case gran: c->gg=n;@+break;
|
case gran: c->gg=n;@+break;
|
case vctsz: c->vv=n;@+ppol(&(c->vrepl));@+break;
|
case vctsz: c->vv=n;@+ppol(&(c->vrepl));@+break;
|
case wrb: c->mode=(c->mode&~WRITE_BACK)+n*WRITE_BACK;@+break;
|
case wrb: c->mode=(c->mode&~WRITE_BACK)+n*WRITE_BACK;@+break;
|
case wra: c->mode=(c->mode&~WRITE_ALLOC)+n*WRITE_ALLOC;@+break;
|
case wra: c->mode=(c->mode&~WRITE_ALLOC)+n*WRITE_ALLOC;@+break;
|
case acctm:@+ if (n>max_cycs) max_cycs=n;
|
case acctm:@+ if (n>max_cycs) max_cycs=n;
|
c->access_time=n;@+break;
|
c->access_time=n;@+break;
|
case citm:@+ if (n>max_cycs) max_cycs=n;
|
case citm:@+ if (n>max_cycs) max_cycs=n;
|
c->copy_in_time=n;@+break;
|
c->copy_in_time=n;@+break;
|
case cotm:@+ if (n>max_cycs) max_cycs=n;
|
case cotm:@+ if (n>max_cycs) max_cycs=n;
|
c->copy_out_time=n;@+break;
|
c->copy_out_time=n;@+break;
|
case prts: c->ports=n;@+break;
|
case prts: c->ports=n;@+break;
|
}
|
}
|
}
|
}
|
|
|
@ @=
|
@ @=
|
for (j=0;j
|
for (j=0;j
|
for (i=0;;i++) {
|
for (i=0;;i++) {
|
n=get_int();
|
n=get_int();
|
if (n<0) break;
|
if (n<0) break;
|
if (n==0) panic(errprint0(
|
if (n==0) panic(errprint0(
|
"Configuration error: Pipeline cycles must be positive"));
|
"Configuration error: Pipeline cycles must be positive"));
|
@.Configuration error...@>
|
@.Configuration error...@>
|
if (n>255) panic(errprint0(
|
if (n>255) panic(errprint0(
|
"Configuration error: Pipeline cycles must be <= 255"));
|
"Configuration error: Pipeline cycles must be <= 255"));
|
if (n>max_cycs) max_cycs=n;
|
if (n>max_cycs) max_cycs=n;
|
if (i>=pipe_limit) panic(errprint1(
|
if (i>=pipe_limit) panic(errprint1(
|
"Configuration error: More than %d pipeline stages",pipe_limit));
|
"Configuration error: More than %d pipeline stages",pipe_limit));
|
pipe_seq[OP[j].v][i]=n;
|
pipe_seq[OP[j].v][i]=n;
|
}
|
}
|
token_prescanned=true;
|
token_prescanned=true;
|
break;
|
break;
|
}
|
}
|
if (j
|
if (j
|
|
|
@ @=
|
@ @=
|
{
|
{
|
get_token();
|
get_token();
|
if (strlen(token)>15) panic(errprint1(
|
if (strlen(token)>15) panic(errprint1(
|
"Configuration error: `%s' is more than 15 characters long",token));
|
"Configuration error: `%s' is more than 15 characters long",token));
|
@.Configuration error...@>
|
@.Configuration error...@>
|
strcpy(funit[funit_count].name,token);
|
strcpy(funit[funit_count].name,token);
|
get_token();
|
get_token();
|
if (strlen(token)!=64) panic(errprint1(
|
if (strlen(token)!=64) panic(errprint1(
|
"Configuration error: unit %s doesn't have 64 hex digit specs",
|
"Configuration error: unit %s doesn't have 64 hex digit specs",
|
funit[funit_count].name));
|
funit[funit_count].name));
|
for (i=j=n=0;j<64;j++) {
|
for (i=j=n=0;j<64;j++) {
|
if (token[j]>='0' && token[j]<='9') n=(n<<4)+(token[j]-'0');
|
if (token[j]>='0' && token[j]<='9') n=(n<<4)+(token[j]-'0');
|
else if (token[j]>='a' && token[j]<='f') n=(n<<4)+(token[j]-'a'+10);
|
else if (token[j]>='a' && token[j]<='f') n=(n<<4)+(token[j]-'a'+10);
|
else if (token[j]>='A' && token[j]<='F') n=(n<<4)+(token[j]-'A'+10);
|
else if (token[j]>='A' && token[j]<='F') n=(n<<4)+(token[j]-'A'+10);
|
else panic(errprint1(
|
else panic(errprint1(
|
"Configuration error: `%c' is not a hex digit",token[j]));
|
"Configuration error: `%c' is not a hex digit",token[j]));
|
if ((j&0x7)==0x7) funit[funit_count].ops[i++]=n, n=0;
|
if ((j&0x7)==0x7) funit[funit_count].ops[i++]=n, n=0;
|
}
|
}
|
funit_count++;
|
funit_count++;
|
continue;
|
continue;
|
}
|
}
|
|
|
@* Checking and allocating. The battle is only half over when we've
|
@* Checking and allocating. The battle is only half over when we've
|
absorbed all the data of the configuration file. We still must check for
|
absorbed all the data of the configuration file. We still must check for
|
interactions between different quantities, and we must allocate
|
interactions between different quantities, and we must allocate
|
space for cache blocks, coroutines, etc.
|
space for cache blocks, coroutines, etc.
|
|
|
One of the most difficult tasks facing us to determine the maximum number
|
One of the most difficult tasks facing us to determine the maximum number
|
of pipeline stages needed by each functional unit. Let's tackle that first.
|
of pipeline stages needed by each functional unit. Let's tackle that first.
|
|
|
@=
|
@=
|
@;
|
@;
|
for (j=0;j<=funit_count;j++) {
|
for (j=0;j<=funit_count;j++) {
|
@;
|
@;
|
funit[j].k=n;
|
funit[j].k=n;
|
funit[j].co=(coroutine*)calloc(n,sizeof(coroutine));
|
funit[j].co=(coroutine*)calloc(n,sizeof(coroutine));
|
for (i=0;i
|
for (i=0;i
|
funit[j].co[i].name=funit[j].name;
|
funit[j].co[i].name=funit[j].name;
|
funit[j].co[i].stage=i+1;
|
funit[j].co[i].stage=i+1;
|
}
|
}
|
}
|
}
|
|
|
@ @=
|
@ @=
|
for (j=div;j<=max_pipe_op;j++) int_stages[j]=strlen(pipe_seq[j]);
|
for (j=div;j<=max_pipe_op;j++) int_stages[j]=strlen(pipe_seq[j]);
|
for (;j<=max_real_command;j++) int_stages[j]=1;
|
for (;j<=max_real_command;j++) int_stages[j]=1;
|
for (j=mul0,n=0;j<=mul8;j++)
|
for (j=mul0,n=0;j<=mul8;j++)
|
if (strlen(pipe_seq[j])>n) n=strlen(pipe_seq[j]);
|
if (strlen(pipe_seq[j])>n) n=strlen(pipe_seq[j]);
|
int_stages[mul]=n;
|
int_stages[mul]=n;
|
int_stages[ld]=int_stages[st]=int_stages[frem]=2;
|
int_stages[ld]=int_stages[st]=int_stages[frem]=2;
|
for (j=0;j<256;j++) stages[j]=int_stages[int_op[j]];
|
for (j=0;j<256;j++) stages[j]=int_stages[int_op[j]];
|
|
|
@ The |int_op| conversion table is similar to the |internal_op| array of
|
@ The |int_op| conversion table is similar to the |internal_op| array of
|
the \\{MMIX\_pipe} routine, but it replaces |divu| by |div|,
|
the \\{MMIX\_pipe} routine, but it replaces |divu| by |div|,
|
|fsub| by |fadd|, etc.
|
|fsub| by |fadd|, etc.
|
|
|
@=
|
@=
|
internal_opcode int_op[256]={@/
|
internal_opcode int_op[256]={@/
|
trap,fcmp,funeq,funeq,fadd,fix,fadd,fix,@/
|
trap,fcmp,funeq,funeq,fadd,fix,fadd,fix,@/
|
flot,flot,flot,flot,flot,flot,flot,flot,@/
|
flot,flot,flot,flot,flot,flot,flot,flot,@/
|
fmul,feps,feps,feps,fdiv,fsqrt,frem,fint,@/
|
fmul,feps,feps,feps,fdiv,fsqrt,frem,fint,@/
|
mul,mul,mul,mul,div,div,div,div,@/
|
mul,mul,mul,mul,div,div,div,div,@/
|
add,add,addu,addu,sub,sub,subu,subu,@/
|
add,add,addu,addu,sub,sub,subu,subu,@/
|
addu,addu,addu,addu,addu,addu,addu,addu,@/
|
addu,addu,addu,addu,addu,addu,addu,addu,@/
|
cmp,cmp,cmpu,cmpu,sub,sub,subu,subu,@/
|
cmp,cmp,cmpu,cmpu,sub,sub,subu,subu,@/
|
sh,sh,sh,sh,sh,sh,sh,sh,@/
|
sh,sh,sh,sh,sh,sh,sh,sh,@/
|
br,br,br,br,br,br,br,br,@/
|
br,br,br,br,br,br,br,br,@/
|
br,br,br,br,br,br,br,br,@/
|
br,br,br,br,br,br,br,br,@/
|
pbr,pbr,pbr,pbr,pbr,pbr,pbr,pbr,@/
|
pbr,pbr,pbr,pbr,pbr,pbr,pbr,pbr,@/
|
pbr,pbr,pbr,pbr,pbr,pbr,pbr,pbr,@/
|
pbr,pbr,pbr,pbr,pbr,pbr,pbr,pbr,@/
|
cset,cset,cset,cset,cset,cset,cset,cset,@/
|
cset,cset,cset,cset,cset,cset,cset,cset,@/
|
cset,cset,cset,cset,cset,cset,cset,cset,@/
|
cset,cset,cset,cset,cset,cset,cset,cset,@/
|
zset,zset,zset,zset,zset,zset,zset,zset,@/
|
zset,zset,zset,zset,zset,zset,zset,zset,@/
|
zset,zset,zset,zset,zset,zset,zset,zset,@/
|
zset,zset,zset,zset,zset,zset,zset,zset,@/
|
ld,ld,ld,ld,ld,ld,ld,ld,@/
|
ld,ld,ld,ld,ld,ld,ld,ld,@/
|
ld,ld,ld,ld,ld,ld,ld,ld,@/
|
ld,ld,ld,ld,ld,ld,ld,ld,@/
|
ld,ld,ld,ld,ld,ld,ld,ld,@/
|
ld,ld,ld,ld,ld,ld,ld,ld,@/
|
ld,ld,ld,ld,prego,prego,go,go,@/
|
ld,ld,ld,ld,prego,prego,go,go,@/
|
st,st,st,st,st,st,st,st,@/
|
st,st,st,st,st,st,st,st,@/
|
st,st,st,st,st,st,st,st,@/
|
st,st,st,st,st,st,st,st,@/
|
st,st,st,st,st,st,st,st,@/
|
st,st,st,st,st,st,st,st,@/
|
st,st,st,st,st,st,pushgo,pushgo,@/
|
st,st,st,st,st,st,pushgo,pushgo,@/
|
or,or,orn,orn,nor,nor,xor,xor,@/
|
or,or,orn,orn,nor,nor,xor,xor,@/
|
and,and,andn,andn,nand,nand,nxor,nxor,@/
|
and,and,andn,andn,nand,nand,nxor,nxor,@/
|
bdif,bdif,wdif,wdif,tdif,tdif,odif,odif,@/
|
bdif,bdif,wdif,wdif,tdif,tdif,odif,odif,@/
|
mux,mux,sadd,sadd,mor,mor,mor,mor,@/
|
mux,mux,sadd,sadd,mor,mor,mor,mor,@/
|
set,set,set,set,addu,addu,addu,addu,@/
|
set,set,set,set,addu,addu,addu,addu,@/
|
or,or,or,or,andn,andn,andn,andn,@/
|
or,or,or,or,andn,andn,andn,andn,@/
|
noop,noop,pushj,pushj,set,set,put,put,@/
|
noop,noop,pushj,pushj,set,set,put,put,@/
|
pop,resume,save,unsave,sync,noop,get,trip};
|
pop,resume,save,unsave,sync,noop,get,trip};
|
int int_stages[max_real_command+1];
|
int int_stages[max_real_command+1];
|
/* stages as function of |internal_opcode| */
|
/* stages as function of |internal_opcode| */
|
int stages[256]; /* stages as function of |mmix_opcode| */
|
int stages[256]; /* stages as function of |mmix_opcode| */
|
|
|
@ @=
|
@ @=
|
for (i=n=0;i<256;i++)
|
for (i=n=0;i<256;i++)
|
if (((funit[j].ops[i>>5]<<(i&0x1f))&0x80000000) && stages[i]>n)
|
if (((funit[j].ops[i>>5]<<(i&0x1f))&0x80000000) && stages[i]>n)
|
n=stages[i];
|
n=stages[i];
|
if (n==0) panic(errprint1(
|
if (n==0) panic(errprint1(
|
"Configuration error: unit %s doesn't do anything",funit[j].name));
|
"Configuration error: unit %s doesn't do anything",funit[j].name));
|
@.Configuration error...@>
|
@.Configuration error...@>
|
|
|
@ The next hardest thing on our agenda is to set up the cache structure
|
@ The next hardest thing on our agenda is to set up the cache structure
|
fields that depend on the parameters. For example, although we have defined
|
fields that depend on the parameters. For example, although we have defined
|
the parameter in the |bb| field (the block size), we also need to compute the
|
the parameter in the |bb| field (the block size), we also need to compute the
|
|b|~field (log of the block size), and we must create the cache blocks
|
|b|~field (log of the block size), and we must create the cache blocks
|
themselves.
|
themselves.
|
|
|
@=
|
@=
|
static int lg @,@,@[ARGS((int))@];@+@t}\6{@>
|
static int lg @,@,@[ARGS((int))@];@+@t}\6{@>
|
static int lg(n) /* compute binary logarithm */
|
static int lg(n) /* compute binary logarithm */
|
int n;
|
int n;
|
{@+register int j,l;
|
{@+register int j,l;
|
for (j=n,l=0;j;j>>=1) l++;
|
for (j=n,l=0;j;j>>=1) l++;
|
return l-1;
|
return l-1;
|
}
|
}
|
|
|
@ @=
|
@ @=
|
static void alloc_cache @,@,@[ARGS((cache*,char*))@];@+@t}\6{@>
|
static void alloc_cache @,@,@[ARGS((cache*,char*))@];@+@t}\6{@>
|
static void alloc_cache(c,name)
|
static void alloc_cache(c,name)
|
cache *c;
|
cache *c;
|
char *name;
|
char *name;
|
{@+register int j,k;
|
{@+register int j,k;
|
if (c->bbgg) panic(errprint1(
|
if (c->bbgg) panic(errprint1(
|
"Configuration error: blocksize of %s is less than granularity",name));
|
"Configuration error: blocksize of %s is less than granularity",name));
|
@.Configuration error...@>
|
@.Configuration error...@>
|
if (name[1]=='T' && c->bb!=8) panic(errprint1(
|
if (name[1]=='T' && c->bb!=8) panic(errprint1(
|
"Configuration error: blocksize of %s must be 8",name));
|
"Configuration error: blocksize of %s must be 8",name));
|
c->a=lg(c->aa);
|
c->a=lg(c->aa);
|
c->b=lg(c->bb);
|
c->b=lg(c->bb);
|
c->c=lg(c->cc);
|
c->c=lg(c->cc);
|
c->g=lg(c->gg);
|
c->g=lg(c->gg);
|
c->v=lg(c->vv);
|
c->v=lg(c->vv);
|
c->tagmask=-(1<<(c->b+c->c));
|
c->tagmask=-(1<<(c->b+c->c));
|
if (c->a+c->b+c->c>=32) panic(errprint1(
|
if (c->a+c->b+c->c>=32) panic(errprint1(
|
"Configuration error: %s has >= 4 gigabytes of data",name));
|
"Configuration error: %s has >= 4 gigabytes of data",name));
|
if (c->gg!=8 && !(c->mode&WRITE_ALLOC)) panic(errprint2(
|
if (c->gg!=8 && !(c->mode&WRITE_ALLOC)) panic(errprint2(
|
"Configuration error: %s does write-around with granularity %d",
|
"Configuration error: %s does write-around with granularity %d",
|
name,c->gg));
|
name,c->gg));
|
@;
|
@;
|
if (c->vv) @;
|
if (c->vv) @;
|
c->inbuf.dirty=(char*)calloc(c->bb>>c->g,sizeof(char));
|
c->inbuf.dirty=(char*)calloc(c->bb>>c->g,sizeof(char));
|
if (!c->inbuf.dirty) panic(errprint1(
|
if (!c->inbuf.dirty) panic(errprint1(
|
"Can't allocate dirty bits for inbuffer of %s",name));
|
"Can't allocate dirty bits for inbuffer of %s",name));
|
@.Can't allocate...@>
|
@.Can't allocate...@>
|
c->inbuf.data=(octa *)calloc(c->bb>>3,sizeof(octa));
|
c->inbuf.data=(octa *)calloc(c->bb>>3,sizeof(octa));
|
if (!c->inbuf.data) panic(errprint1(
|
if (!c->inbuf.data) panic(errprint1(
|
"Can't allocate data for inbuffer of %s",name));
|
"Can't allocate data for inbuffer of %s",name));
|
c->outbuf.dirty=(char*)calloc(c->bb>>c->g,sizeof(char));
|
c->outbuf.dirty=(char*)calloc(c->bb>>c->g,sizeof(char));
|
if (!c->outbuf.dirty) panic(errprint1(
|
if (!c->outbuf.dirty) panic(errprint1(
|
"Can't allocate dirty bits for outbuffer of %s",name));
|
"Can't allocate dirty bits for outbuffer of %s",name));
|
c->outbuf.data=(octa *)calloc(c->bb>>3,sizeof(octa));
|
c->outbuf.data=(octa *)calloc(c->bb>>3,sizeof(octa));
|
if (!c->outbuf.data) panic(errprint1(
|
if (!c->outbuf.data) panic(errprint1(
|
"Can't allocate data for outbuffer of %s",name));
|
"Can't allocate data for outbuffer of %s",name));
|
if (name[0]!='S') @;
|
if (name[0]!='S') @;
|
}
|
}
|
|
|
@ @d sign_bit 0x80000000
|
@ @d sign_bit 0x80000000
|
|
|
@=
|
@=
|
c->set=(cacheset *)calloc(c->cc,sizeof(cacheset));
|
c->set=(cacheset *)calloc(c->cc,sizeof(cacheset));
|
if (!c->set) panic(errprint1(
|
if (!c->set) panic(errprint1(
|
"Can't allocate cache sets for %s",name));
|
"Can't allocate cache sets for %s",name));
|
@.Can't allocate...@>
|
@.Can't allocate...@>
|
for (j=0;jcc;j++) {
|
for (j=0;jcc;j++) {
|
c->set[j]=(cacheblock *)calloc(c->aa,sizeof(cacheblock));
|
c->set[j]=(cacheblock *)calloc(c->aa,sizeof(cacheblock));
|
if (!c->set[j]) panic(errprint2(
|
if (!c->set[j]) panic(errprint2(
|
"Can't allocate cache blocks for set %d of %s",j,name));
|
"Can't allocate cache blocks for set %d of %s",j,name));
|
for (k=0;kaa;k++) {
|
for (k=0;kaa;k++) {
|
c->set[j][k].tag.h=sign_bit; /* invalid tag */
|
c->set[j][k].tag.h=sign_bit; /* invalid tag */
|
c->set[j][k].dirty=(char *)calloc(c->bb>>c->g,sizeof(char));
|
c->set[j][k].dirty=(char *)calloc(c->bb>>c->g,sizeof(char));
|
if (!c->set[j][k].dirty) panic(errprint3(
|
if (!c->set[j][k].dirty) panic(errprint3(
|
"Can't allocate dirty bits for block %d of set %d of %s",k,j,name));
|
"Can't allocate dirty bits for block %d of set %d of %s",k,j,name));
|
c->set[j][k].data=(octa *)calloc(c->bb>>3,sizeof(octa));
|
c->set[j][k].data=(octa *)calloc(c->bb>>3,sizeof(octa));
|
if (!c->set[j][k].data) panic(errprint3(
|
if (!c->set[j][k].data) panic(errprint3(
|
"Can't allocate data for block %d of set %d of %s",k,j,name));
|
"Can't allocate data for block %d of set %d of %s",k,j,name));
|
}
|
}
|
}
|
}
|
|
|
@ @=
|
@ @=
|
{
|
{
|
c->victim=(cacheblock*)calloc(c->vv,sizeof(cacheblock));
|
c->victim=(cacheblock*)calloc(c->vv,sizeof(cacheblock));
|
if (!c->victim) panic(errprint1(
|
if (!c->victim) panic(errprint1(
|
"Can't allocate blocks for victim cache of %s",name));
|
"Can't allocate blocks for victim cache of %s",name));
|
for (k=0;kvv;k++) {
|
for (k=0;kvv;k++) {
|
c->victim[k].tag.h=sign_bit; /* invalid tag */
|
c->victim[k].tag.h=sign_bit; /* invalid tag */
|
c->victim[k].dirty=(char *)calloc(c->bb>>c->g,sizeof(char));
|
c->victim[k].dirty=(char *)calloc(c->bb>>c->g,sizeof(char));
|
if (!c->victim[k].dirty) panic(errprint2(
|
if (!c->victim[k].dirty) panic(errprint2(
|
"Can't allocate dirty bits for block %d of victim cache of %s",
|
"Can't allocate dirty bits for block %d of victim cache of %s",
|
k,name));
|
k,name));
|
@.Can't allocate...@>
|
@.Can't allocate...@>
|
c->victim[k].data=(octa *)calloc(c->bb>>3,sizeof(octa));
|
c->victim[k].data=(octa *)calloc(c->bb>>3,sizeof(octa));
|
if (!c->victim[k].data) panic(errprint2(
|
if (!c->victim[k].data) panic(errprint2(
|
"Can't allocate data for block %d of victim cache of %s",k,name));
|
"Can't allocate data for block %d of victim cache of %s",k,name));
|
}
|
}
|
}
|
}
|
|
|
@ @=
|
@ @=
|
{
|
{
|
c->reader=(coroutine*)calloc(c->ports,sizeof(coroutine));
|
c->reader=(coroutine*)calloc(c->ports,sizeof(coroutine));
|
if (!c->reader) panic(errprint1(
|
if (!c->reader) panic(errprint1(
|
@.Can't allocate...@>
|
@.Can't allocate...@>
|
"Can't allocate readers for %s",name));
|
"Can't allocate readers for %s",name));
|
for (j=0;jports;j++) {
|
for (j=0;jports;j++) {
|
c->reader[j].stage=vanish;
|
c->reader[j].stage=vanish;
|
c->reader[j].name=(name[0]=='D'? (name[1]=='T'? "DTreader": "Dreader"):
|
c->reader[j].name=(name[0]=='D'? (name[1]=='T'? "DTreader": "Dreader"):
|
(name[1]=='T'? "ITreader": "Ireader"));
|
(name[1]=='T'? "ITreader": "Ireader"));
|
}
|
}
|
}
|
}
|
|
|
@ @=
|
@ @=
|
alloc_cache(ITcache,"ITcache");
|
alloc_cache(ITcache,"ITcache");
|
ITcache->filler.name="ITfiller";@+ ITcache->filler.stage=fill_from_virt;
|
ITcache->filler.name="ITfiller";@+ ITcache->filler.stage=fill_from_virt;
|
alloc_cache(DTcache,"DTcache");
|
alloc_cache(DTcache,"DTcache");
|
DTcache->filler.name="DTfiller";@+ DTcache->filler.stage=fill_from_virt;
|
DTcache->filler.name="DTfiller";@+ DTcache->filler.stage=fill_from_virt;
|
if (Icache) {
|
if (Icache) {
|
alloc_cache(Icache,"Icache");
|
alloc_cache(Icache,"Icache");
|
Icache->filler.name="Ifiller";@+ Icache->filler.stage=fill_from_mem;
|
Icache->filler.name="Ifiller";@+ Icache->filler.stage=fill_from_mem;
|
}
|
}
|
if (Dcache) {
|
if (Dcache) {
|
alloc_cache(Dcache,"Dcache");
|
alloc_cache(Dcache,"Dcache");
|
Dcache->filler.name="Dfiller";@+ Dcache->filler.stage=fill_from_mem;
|
Dcache->filler.name="Dfiller";@+ Dcache->filler.stage=fill_from_mem;
|
Dcache->flusher.name="Dflusher";@+ Dcache->flusher.stage=flush_to_mem;
|
Dcache->flusher.name="Dflusher";@+ Dcache->flusher.stage=flush_to_mem;
|
}
|
}
|
if (Scache) {
|
if (Scache) {
|
alloc_cache(Scache,"Scache");
|
alloc_cache(Scache,"Scache");
|
if (Scache->bbbb) panic(errprint0(
|
if (Scache->bbbb) panic(errprint0(
|
"Configuration error: Scache blocks smaller than Icache blocks"));
|
"Configuration error: Scache blocks smaller than Icache blocks"));
|
@.Configuration error...@>
|
@.Configuration error...@>
|
if (Scache->bbbb) panic(errprint0(
|
if (Scache->bbbb) panic(errprint0(
|
"Configuration error: Scache blocks smaller than Dcache blocks"));
|
"Configuration error: Scache blocks smaller than Dcache blocks"));
|
if (Scache->gg!=Dcache->gg) panic(errprint0(
|
if (Scache->gg!=Dcache->gg) panic(errprint0(
|
"Configuration error: Scache granularity differs from the Dcache"));
|
"Configuration error: Scache granularity differs from the Dcache"));
|
Icache->filler.stage=fill_from_S;
|
Icache->filler.stage=fill_from_S;
|
Dcache->filler.stage=fill_from_S;@+ Dcache->flusher.stage=flush_to_S;
|
Dcache->filler.stage=fill_from_S;@+ Dcache->flusher.stage=flush_to_S;
|
Scache->filler.name="Sfiller";@+ Scache->filler.stage=fill_from_mem;
|
Scache->filler.name="Sfiller";@+ Scache->filler.stage=fill_from_mem;
|
Scache->flusher.name="Sflusher";@+ Scache->flusher.stage=flush_to_mem;
|
Scache->flusher.name="Sflusher";@+ Scache->flusher.stage=flush_to_mem;
|
}
|
}
|
|
|
@ Now we are nearly done. The only nontrivial task remaining is
|
@ Now we are nearly done. The only nontrivial task remaining is
|
to allocate the ring of queues for coroutine scheduling; for this we
|
to allocate the ring of queues for coroutine scheduling; for this we
|
need to determine the maximum waiting time that will occur between
|
need to determine the maximum waiting time that will occur between
|
scheduler and schedulee.
|
scheduler and schedulee.
|
|
|
@=
|
@=
|
bus_words=mem_bus_bytes>>3;
|
bus_words=mem_bus_bytes>>3;
|
j=(mem_read_time
|
j=(mem_read_time
|
n=1;
|
n=1;
|
if (Scache && Scache->bb>n) n=Scache->bb;
|
if (Scache && Scache->bb>n) n=Scache->bb;
|
if (Icache && Icache->bb>n) n=Icache->bb;
|
if (Icache && Icache->bb>n) n=Icache->bb;
|
if (Dcache && Dcache->bb>n) n=Dcache->bb;
|
if (Dcache && Dcache->bb>n) n=Dcache->bb;
|
n=mem_addr_time+((int)(n+bus_words-1)/bus_words)*j;
|
n=mem_addr_time+((int)(n+bus_words-1)/bus_words)*j;
|
if (n>max_cycs) max_cycs=n; /* now |max_cycs| bounds the waiting time */
|
if (n>max_cycs) max_cycs=n; /* now |max_cycs| bounds the waiting time */
|
ring_size=max_cycs+1;
|
ring_size=max_cycs+1;
|
ring=(coroutine *)calloc(ring_size,sizeof(coroutine));
|
ring=(coroutine *)calloc(ring_size,sizeof(coroutine));
|
if (!ring) panic(errprint0("Can't allocate the scheduling ring"));
|
if (!ring) panic(errprint0("Can't allocate the scheduling ring"));
|
@.Can't allocate...@>
|
@.Can't allocate...@>
|
{@+register coroutine *p;
|
{@+register coroutine *p;
|
for (p=ring;p
|
for (p=ring;p
|
p->name=""; /* header nodes are nameless */
|
p->name=""; /* header nodes are nameless */
|
p->stage=max_stage;
|
p->stage=max_stage;
|
}
|
}
|
}
|
}
|
|
|
@ @s chunknode int
|
@ @s chunknode int
|
|
|
@=
|
@=
|
if (hash_prime<=mem_chunks_max) panic(errprint0(
|
if (hash_prime<=mem_chunks_max) panic(errprint0(
|
"Configuration error: hashprime must exceed memchunksmax"));
|
"Configuration error: hashprime must exceed memchunksmax"));
|
@.Configuration error...@>
|
@.Configuration error...@>
|
mem_hash=(chunknode *)calloc(hash_prime+1,sizeof(chunknode));
|
mem_hash=(chunknode *)calloc(hash_prime+1,sizeof(chunknode));
|
if (!mem_hash) panic(errprint0("Can't allocate the hash table"));
|
if (!mem_hash) panic(errprint0("Can't allocate the hash table"));
|
@.Can't allocate...@>
|
@.Can't allocate...@>
|
mem_hash[0].chunk=(octa*)calloc(1<<13,sizeof(octa));
|
mem_hash[0].chunk=(octa*)calloc(1<<13,sizeof(octa));
|
if (!mem_hash[0].chunk) panic(errprint0("Can't allocate chunk 0"));
|
if (!mem_hash[0].chunk) panic(errprint0("Can't allocate chunk 0"));
|
mem_hash[hash_prime].chunk=(octa*)calloc(1<<13,sizeof(octa));
|
mem_hash[hash_prime].chunk=(octa*)calloc(1<<13,sizeof(octa));
|
if (!mem_hash[hash_prime].chunk) panic(errprint0("Can't allocate 0 chunk"));
|
if (!mem_hash[hash_prime].chunk) panic(errprint0("Can't allocate 0 chunk"));
|
mem_chunks=1;
|
mem_chunks=1;
|
fetch_bot=(fetch*)calloc(fetch_buf_size+1,sizeof(fetch));
|
fetch_bot=(fetch*)calloc(fetch_buf_size+1,sizeof(fetch));
|
if (!fetch_bot) panic(errprint0("Can't allocate the fetch buffer"));
|
if (!fetch_bot) panic(errprint0("Can't allocate the fetch buffer"));
|
fetch_top=fetch_bot+fetch_buf_size;
|
fetch_top=fetch_bot+fetch_buf_size;
|
reorder_bot=(control*)calloc(reorder_buf_size+1,sizeof(control));
|
reorder_bot=(control*)calloc(reorder_buf_size+1,sizeof(control));
|
if (!reorder_bot) panic(errprint0("Can't allocate the reorder buffer"));
|
if (!reorder_bot) panic(errprint0("Can't allocate the reorder buffer"));
|
reorder_top=reorder_bot+reorder_buf_size;
|
reorder_top=reorder_bot+reorder_buf_size;
|
wbuf_bot=(write_node*)calloc(write_buf_size+1,sizeof(write_node));
|
wbuf_bot=(write_node*)calloc(write_buf_size+1,sizeof(write_node));
|
if (!wbuf_bot) panic(errprint0("Can't allocate the write buffer"));
|
if (!wbuf_bot) panic(errprint0("Can't allocate the write buffer"));
|
wbuf_top=wbuf_bot+write_buf_size;
|
wbuf_top=wbuf_bot+write_buf_size;
|
if (bp_n==0) bp_table=NULL;
|
if (bp_n==0) bp_table=NULL;
|
else { /* a branch prediction table is desired */
|
else { /* a branch prediction table is desired */
|
if (bp_a+bp_b+bp_c>=32) panic(errprint0(
|
if (bp_a+bp_b+bp_c>=32) panic(errprint0(
|
"Configuration error: Branch table has >= 4 gigabytes of data"));
|
"Configuration error: Branch table has >= 4 gigabytes of data"));
|
bp_table=(char*)calloc(1<<(bp_a+bp_b+bp_c),sizeof(char));
|
bp_table=(char*)calloc(1<<(bp_a+bp_b+bp_c),sizeof(char));
|
if (!bp_table) panic(errprint0("Can't allocate the branch table"));
|
if (!bp_table) panic(errprint0("Can't allocate the branch table"));
|
}
|
}
|
l=(specnode*)calloc(lring_size,sizeof(specnode));
|
l=(specnode*)calloc(lring_size,sizeof(specnode));
|
if (!l) panic(errprint0("Can't allocate local registers"));
|
if (!l) panic(errprint0("Can't allocate local registers"));
|
j=bus_words;
|
j=bus_words;
|
if (Icache && Icache->bb>j) j=Icache->bb;
|
if (Icache && Icache->bb>j) j=Icache->bb;
|
fetched=(octa*)calloc(j,sizeof(octa));
|
fetched=(octa*)calloc(j,sizeof(octa));
|
if (!fetched) panic(errprint0("Can't allocate prefetch buffer"));
|
if (!fetched) panic(errprint0("Can't allocate prefetch buffer"));
|
dispatch_stat=(int*)calloc(dispatch_max+1,sizeof(int));
|
dispatch_stat=(int*)calloc(dispatch_max+1,sizeof(int));
|
if (!dispatch_stat) panic(errprint0("Can't allocate dispatch counts"));
|
if (!dispatch_stat) panic(errprint0("Can't allocate dispatch counts"));
|
no_hardware_PT=1-hardware_PT;
|
no_hardware_PT=1-hardware_PT;
|
|
|
@* Putting it all together. Here then is the desired configuration
|
@* Putting it all together. Here then is the desired configuration
|
subroutine.
|
subroutine.
|
|
|
@c
|
@c
|
#include /* |fopen|, |fgets|, |sscanf|, |rewind| */
|
#include /* |fopen|, |fgets|, |sscanf|, |rewind| */
|
#include /* |calloc|, |exit| */
|
#include /* |calloc|, |exit| */
|
#include /* |isspace| */
|
#include /* |isspace| */
|
#include /* |strcpy|, |strlen|, |strcmp| */
|
#include /* |strcpy|, |strlen|, |strcmp| */
|
#include /* |INT_MAX| */
|
#include /* |INT_MAX| */
|
#include "mmix-pipe.h"
|
#include "mmix-pipe.h"
|
@@;
|
@@;
|
@@;
|
@@;
|
@@;
|
@@;
|
void MMIX_config(filename)
|
void MMIX_config(filename)
|
char *filename;
|
char *filename;
|
{@+register int i,j,n;
|
{@+register int i,j,n;
|
config_file=fopen(filename,"r");
|
config_file=fopen(filename,"r");
|
if (!config_file)
|
if (!config_file)
|
panic(errprint1("Can't open configuration file %s",filename));
|
panic(errprint1("Can't open configuration file %s",filename));
|
@.Can't open...@>
|
@.Can't open...@>
|
@;
|
@;
|
@;
|
@;
|
@;
|
@;
|
@;
|
@;
|
@;
|
@;
|
@;
|
@;
|
@;
|
@;
|
}
|
}
|
|
|
@*Index.
|
@*Index.
|
|
|