OpenCores

% This file is part of the MMIXware package (c) Donald E Knuth 1999

% This file is part of the MMIXware package (c) Donald E Knuth 1999

@i boilerplate.w %<< legal stuff: PLEASE READ IT BEFORE MAKING ANY CHANGES!

@i boilerplate.w %<< legal stuff: PLEASE READ IT BEFORE MAKING ANY CHANGES!

\def\title{MMIX-CONFIG}

\def\title{MMIX-CONFIG}

\def\MMIX{\.{MMIX}}

\def\MMIX{\.{MMIX}}

\def\Hex#1{\hbox{$^{\scriptscriptstyle\#}$\tt#1}} % experimental hex constant

\def\Hex#1{\hbox{$^{\scriptscriptstyle\#}$\tt#1}} % experimental hex constant

@s bool int

@s bool int

@s cache int

@s cache int

@s func int

@s func int

@s coroutine int

@s coroutine int

@s octa int

@s octa int

@s cacheset int

@s cacheset int

@s cacheblock int

@s cacheblock int

@s fetch int

@s fetch int

@s control int

@s control int

@s write_node int

@s write_node int

@s internal_opcode int

@s internal_opcode int

@s replace_policy int

@s replace_policy int

@s PV TeX

@s PV TeX

@s mmix_opcode int

@s mmix_opcode int

@s specnode int

@s specnode int

\def\PV{\\{PV}} % use italics, not \tt

\def\PV{\\{PV}} % use italics, not \tt

@s CPV TeX

@s CPV TeX

\def\CPV{\\{CPV}}

\def\CPV{\\{CPV}}

@s OP TeX

@s OP TeX

\def\OP{\\{OP}}

\def\OP{\\{OP}}

@s and normal @q unreserve a C++ keyword @>

@s and normal @q unreserve a C++ keyword @>

@s or normal @q unreserve a C++ keyword @>

@s or normal @q unreserve a C++ keyword @>

@s xor normal @q unreserve a C++ keyword @>

@s xor normal @q unreserve a C++ keyword @>

@*Input format. Configuration files allow this simulator to adapt itself to

@*Input format. Configuration files allow this simulator to adapt itself to

infinitely many possible combinations of hardware features. The purpose of the

infinitely many possible combinations of hardware features. The purpose of the

present module is to read a configuration file, check it for validity, and

present module is to read a configuration file, check it for validity, and

set up the relevant data structures.

set up the relevant data structures.

All data in a configuration file consists simply of {\it tokens\/} separated

All data in a configuration file consists simply of {\it tokens\/} separated

by one or more units of white space, where a ``token'' is any sequence of

by one or more units of white space, where a ``token'' is any sequence of

nonspace characters that doesn't contain a percent sign. Percent signs

nonspace characters that doesn't contain a percent sign. Percent signs

and anything following them on a line are ignored; this convention allows

and anything following them on a line are ignored; this convention allows

a user to include comments in the file. Here's a simple (but weird) example:

a user to include comments in the file. Here's a simple (but weird) example:

$$\vbox{\halign{\tt#\hfil\cr

$$\vbox{\halign{\tt#\hfil\cr

\% Silly configuration\cr

\% Silly configuration\cr

writebuffer 200\cr

writebuffer 200\cr

memaddresstime 100\cr

memaddresstime 100\cr

Dcache associativity 4 lru\cr

Dcache associativity 4 lru\cr

Dcache blocksize 1024\cr

Dcache blocksize 1024\cr

unit ODD 5555555555555555555555555555555555555555555555555555555555555555\cr

unit ODD 5555555555555555555555555555555555555555555555555555555555555555\cr

unit EVEN aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\cr

unit EVEN aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\cr

div 40 30 20\ \ \% three-stage divide\cr

div 40 30 20\ \ \% three-stage divide\cr

}}$$

}}$$

It means that (1) the write buffer has capacity for 200 octabytes;

It means that (1) the write buffer has capacity for 200 octabytes;

(2)~the memory bus takes 100 cycles to process an address;

(2)~the memory bus takes 100 cycles to process an address;

(3)~there's a D-cache, in which each set has 4 blocks and the replacement

(3)~there's a D-cache, in which each set has 4 blocks and the replacement

policy is least-recently-used;

policy is least-recently-used;

(4)~each block in the D-cache has 1024 bytes;

(4)~each block in the D-cache has 1024 bytes;

(5)~there are two functional units, one for all the odd-numbered opcodes

(5)~there are two functional units, one for all the odd-numbered opcodes

and one for all the rest;

and one for all the rest;

(6)~the division instructions take three pipeline stages, spending 40 cycles

(6)~the division instructions take three pipeline stages, spending 40 cycles

in the first stage, 30~in the second, and 20 in the last;

in the first stage, 30~in the second, and 20 in the last;

(7)~all other parameters have default values.

(7)~all other parameters have default values.

@ Four kinds of specifications can appear in a configuration file,

@ Four kinds of specifications can appear in a configuration file,

according to the following syntax:

according to the following syntax:

\def\<#1>{\hbox{$\langle\,$#1$\,\rangle$}}\let\is=\longrightarrow

\def\<#1>{\hbox{$\langle\,$#1$\,\rangle$}}\let\is=\longrightarrow

$$\vbox{\halign{$#$\hfil\cr

$$\vbox{\halign{$#$\hfil\cr

\\is\\mid\\mid\\mid

\\is\\mid\\mid\\mid

  \\cr

  \\cr

\\is\\\cr

\\is\\\cr

\\is\\\\\cr

\\is\\\\\cr

\\is\\\cr

\\is\\\cr

\\is\.{unit}\ \\<64 hexadecimal digits>\cr}}$$

\\is\.{unit}\ \\<64 hexadecimal digits>\cr}}$$

@ A \ simply assigns a given value to a given parameter. The

@ A \ simply assigns a given value to a given parameter. The

possibilities for \ are as follows:

possibilities for \ are as follows:

\def\bull#1 {\smallskip\hang\textindent{$\bullet$}\.{#1}\enspace}

\def\bull#1 {\smallskip\hang\textindent{$\bullet$}\.{#1}\enspace}

\bull fetchbuffer (default 4), maximum instructions in the fetch buffer;

\bull fetchbuffer (default 4), maximum instructions in the fetch buffer;

must be $\ge1$.

must be $\ge1$.

\bull writebuffer (default 2), maximum octabytes in the write buffer;

\bull writebuffer (default 2), maximum octabytes in the write buffer;

must be $\ge1$.

must be $\ge1$.

\bull reorderbuffer (default 5), maximum instructions issued but not

\bull reorderbuffer (default 5), maximum instructions issued but not

committed; must be $\ge1$.

committed; must be $\ge1$.

\bull renameregs (default 5), maximum partial results in the reorder

\bull renameregs (default 5), maximum partial results in the reorder

buffer; must be $\ge1$.

buffer; must be $\ge1$.

\bull memslots (default 2), maximum store instructions in the reorder

\bull memslots (default 2), maximum store instructions in the reorder

buffer; must be $\ge1$.

buffer; must be $\ge1$.

\bull localregs (default 256), number of local registers in ring;

\bull localregs (default 256), number of local registers in ring;

must be 256, 512, or 1024.

must be 256, 512, or 1024.

\bull fetchmax (default 2), maximum instructions fetched per cycle;

\bull fetchmax (default 2), maximum instructions fetched per cycle;

must be $\ge1$.

must be $\ge1$.

\bull dispatchmax (default 1), maximum instructions issued per cycle;

\bull dispatchmax (default 1), maximum instructions issued per cycle;

must be $\ge1$.

must be $\ge1$.

\bull peekahead (default 1), maximum lookahead for jumps per cycle.

\bull peekahead (default 1), maximum lookahead for jumps per cycle.

\bull commitmax (default 1), maximum instructions committed per cycle;

\bull commitmax (default 1), maximum instructions committed per cycle;

must be $\ge1$.

must be $\ge1$.

\bull fremmax (default 1), maximum reductions in \.{FREM} computation per

\bull fremmax (default 1), maximum reductions in \.{FREM} computation per

cycle; must be $\ge1$.

cycle; must be $\ge1$.

\bull denin (default 1), extra cycles taken if a floating point input

\bull denin (default 1), extra cycles taken if a floating point input

is subnormal.

is subnormal.

\bull denout (default 1), extra cycles taken if a floating point result

\bull denout (default 1), extra cycles taken if a floating point result

is subnormal.

is subnormal.

\bull writeholdingtime (default 0), minimum number of cycles for data to

\bull writeholdingtime (default 0), minimum number of cycles for data to

remain in the write buffer.

remain in the write buffer.

\bull memaddresstime (default 20), cycles to process memory address;

\bull memaddresstime (default 20), cycles to process memory address;

must be $\ge1$.

must be $\ge1$.

\bull memreadtime (default 20), cycles to read one memory busload;

\bull memreadtime (default 20), cycles to read one memory busload;

must be $\ge1$.

must be $\ge1$.

\bull memwritetime (default 20), cycles to write one memory busload;

\bull memwritetime (default 20), cycles to write one memory busload;

must be $\ge1$.

must be $\ge1$.

\bull membusbytes (default 8), number of bytes per memory busload; must be a

\bull membusbytes (default 8), number of bytes per memory busload; must be a

power of~2 that is 8~or~more.

power of~2 that is 8~or~more.

\bull branchpredictbits (default 0), number of bits in each branch prediction

\bull branchpredictbits (default 0), number of bits in each branch prediction

table entry; must be $\le8$.

table entry; must be $\le8$.

\bull branchaddressbits (default 0), number of bits in instruction address

\bull branchaddressbits (default 0), number of bits in instruction address

used to index the branch prediction table.

used to index the branch prediction table.

\bull branchhistorybits (default 0), number of bits in branch history used to

\bull branchhistorybits (default 0), number of bits in branch history used to

index the branch prediction table.

index the branch prediction table.

\bull branchdualbits (default 0), number of bits of

\bull branchdualbits (default 0), number of bits of

instruction-address-xor-branch-history used to index the branch prediction

instruction-address-xor-branch-history used to index the branch prediction

table.

table.

\bull hardwarepagetable (default 1), is zero if page table calculations

\bull hardwarepagetable (default 1), is zero if page table calculations

must be emulated by the operating system.

must be emulated by the operating system.

\bull disablesecurity (default 0), is 1 if the hot-seat security checks

\bull disablesecurity (default 0), is 1 if the hot-seat security checks

are turned off. This option is used only for testing purposes; it means

are turned off. This option is used only for testing purposes; it means

that the `\.s' interrupt will not occur, and the `\.p' interrupt will

that the `\.s' interrupt will not occur, and the `\.p' interrupt will

be signaled only when going from a nonnegative location to a negative one.

be signaled only when going from a nonnegative location to a negative one.

\bull memchunksmax (default 1000), maximum number of $2^{16}$-byte chunks of

\bull memchunksmax (default 1000), maximum number of $2^{16}$-byte chunks of

simulated memory; must be $\ge1$.

simulated memory; must be $\ge1$.

\bull hashprime (default 2003), prime number used to address simulated memory;

\bull hashprime (default 2003), prime number used to address simulated memory;

must exceed \.{memchunksmax}, preferably by a factor of about~2.

must exceed \.{memchunksmax}, preferably by a factor of about~2.

\smallskip\noindent

\smallskip\noindent

The values of \.{memchunksmax} and \.{hashprime} affect only the speed of the

The values of \.{memchunksmax} and \.{hashprime} affect only the speed of the

simulator, not its results---unless a very huge program is being simulated.

simulator, not its results---unless a very huge program is being simulated.

The stated defaults for \.{memchunksmax} and \.{hashprime}

The stated defaults for \.{memchunksmax} and \.{hashprime}

should be adequate for almost all applications.

should be adequate for almost all applications.

@ A \ assigns a given value to a parameter affecting one of five

@ A \ assigns a given value to a parameter affecting one of five

possible caches:

possible caches:

$$\vbox{\halign{$#$\hfil\cr

$$\vbox{\halign{$#$\hfil\cr

\\is\\\\\cr

\\is\\\\\cr

\\is\.{ITcache}\mid\.{DTcache}\mid\.{Icache}\mid\.{Dcache}

\\is\.{ITcache}\mid\.{DTcache}\mid\.{Icache}\mid\.{Dcache}

  \mid\.{Scache}\cr

  \mid\.{Scache}\cr

\\is\\mid\.{random}\mid\.{serial}

\\is\\mid\.{random}\mid\.{serial}

          \mid\.{pseudolru}\mid\.{lru}\cr}}$$

          \mid\.{pseudolru}\mid\.{lru}\cr}}$$

The possibilities for \ are as follows:

The possibilities for \ are as follows:

\bull associativity (default 1), number of cache blocks per cache set;

\bull associativity (default 1), number of cache blocks per cache set;

must be a power of~2. (A cache with associativity~1 is said to be

must be a power of~2. (A cache with associativity~1 is said to be

``direct-mapped.'')

``direct-mapped.'')

\bull blocksize (default 8), number of bytes per cache block; must be a power

\bull blocksize (default 8), number of bytes per cache block; must be a power

of~2, at least equal to the granularity, and at most equal to~8192.

of~2, at least equal to the granularity, and at most equal to~8192.

The blocksize of \.{ITcache} and \.{DTcache} must be~8.

The blocksize of \.{ITcache} and \.{DTcache} must be~8.

\bull setsize (default 1), number of sets of cache blocks; must be a power

\bull setsize (default 1), number of sets of cache blocks; must be a power

of~2. (A cache with set size~1 is said to be ``fully associative.'')

of~2. (A cache with set size~1 is said to be ``fully associative.'')

\bull granularity (default 8), number of bytes per ``dirty bit,'' used to

\bull granularity (default 8), number of bytes per ``dirty bit,'' used to

remember which items of data have changed since they were read from memory;

remember which items of data have changed since they were read from memory;

must be a power of~2 and at least~8. The granularity must be~8 if

must be a power of~2 and at least~8. The granularity must be~8 if

\.{writeallocate} is~0.

\.{writeallocate} is~0.

\bull victimsize (default 0), number of cache blocks in the victim buffer,

\bull victimsize (default 0), number of cache blocks in the victim buffer,

which holds blocks removed from the main cache sets; must be zero or a power

which holds blocks removed from the main cache sets; must be zero or a power

of~2.

of~2.

\bull writeback (default 0), is 1 in a ``write-back'' cache, which holds dirty

\bull writeback (default 0), is 1 in a ``write-back'' cache, which holds dirty

data as long as possible; is 0 in a ``write-through'' cache, which cleans

data as long as possible; is 0 in a ``write-through'' cache, which cleans

all data as soon as possible.

all data as soon as possible.

\bull writeallocate (default 0), is 1 in a ``write-allocate'' cache,

\bull writeallocate (default 0), is 1 in a ``write-allocate'' cache,

which remembers all recently written data;

which remembers all recently written data;

is 0 in a ``write-around'' cache, which doesn't make space for newly written

is 0 in a ``write-around'' cache, which doesn't make space for newly written

data that fails to hit an existing cache block.

data that fails to hit an existing cache block.

\bull accesstime (default 1), number of cycles to query the cache;

\bull accesstime (default 1), number of cycles to query the cache;

must be $\ge1$. (Hits in the S-cache actually require {\it twice}

must be $\ge1$. (Hits in the S-cache actually require {\it twice}

the accesstime, once to query the tag and once to transmit the data.)

the accesstime, once to query the tag and once to transmit the data.)

\bull copyintime (default 1), number of cycles to move a cache block from

\bull copyintime (default 1), number of cycles to move a cache block from

its input buffer into the cache proper; must be $\ge1$.

its input buffer into the cache proper; must be $\ge1$.

\bull copyouttime (default 1), number of cycles to move a cache block

\bull copyouttime (default 1), number of cycles to move a cache block

from the cache proper to its output buffer; must be $\ge1$.

from the cache proper to its output buffer; must be $\ge1$.

\bull ports (default 1), number of processes that can simultaneous

\bull ports (default 1), number of processes that can simultaneous

query the cache; must be $\ge1$.

query the cache; must be $\ge1$.

\smallskip

\smallskip

The \ parameter should be nonempty only on cache specifications

The \ parameter should be nonempty only on cache specifications

for parameters

for parameters

\.{associativity} and \.{victimsize}. If no replacement policy is specified,

\.{associativity} and \.{victimsize}. If no replacement policy is specified,

\.{random} is the default. All four policies are equivalent when the

\.{random} is the default. All four policies are equivalent when the

\.{associativity} or \.{victimsize} is~1; \.{pseudolru} is equivalent

\.{associativity} or \.{victimsize} is~1; \.{pseudolru} is equivalent

to \.{lru} when the \.{associativity} or \.{victimsize} is~2.

to \.{lru} when the \.{associativity} or \.{victimsize} is~2.

The \.{granularity}, \.{writeback}, \.{writeallocate}, and \.{copyouttime}

The \.{granularity}, \.{writeback}, \.{writeallocate}, and \.{copyouttime}

parameters affect the performance only of the D-cache and S-cache; the other

parameters affect the performance only of the D-cache and S-cache; the other

three caches are read-only, so they never need to write their data.

three caches are read-only, so they never need to write their data.

The \.{ports} parameter affects the performance of the D-cache and

The \.{ports} parameter affects the performance of the D-cache and

DT-cache, and (if the \.{PREGO} command is used) the performance of the

DT-cache, and (if the \.{PREGO} command is used) the performance of the

I-cache and IT-cache. The S-cache accommodates only one process at a time,

I-cache and IT-cache. The S-cache accommodates only one process at a time,

regardless of the number of specified ports.

regardless of the number of specified ports.

Only the translation caches (the IT-cache and DT-cache) are present by

Only the translation caches (the IT-cache and DT-cache) are present by

default. But if any specifications are given for, say, an I-cache,

default. But if any specifications are given for, say, an I-cache,

all of the unspecified I-cache parameters take their default values.

all of the unspecified I-cache parameters take their default values.

The existence of an S-cache (secondary cache) implies the existence of both

The existence of an S-cache (secondary cache) implies the existence of both

I-cache and D-cache (primary caches for instructions and data).

I-cache and D-cache (primary caches for instructions and data).

The block size of the secondary cache must not be less than the block

The block size of the secondary cache must not be less than the block

size of the primary caches. The secondary cache must have the

size of the primary caches. The secondary cache must have the

same granularity as the D-cache.

same granularity as the D-cache.

@ A \ governs the execution time of potentially slow operations.

@ A \ governs the execution time of potentially slow operations.

$$\vbox{\halign{$#$\hfil\cr

$$\vbox{\halign{$#$\hfil\cr

\\is\\\cr

\\is\\\cr

\\is\\mid\\\cr}}$$

\\is\\mid\\\cr}}$$

Here the \ is one of the following:

Here the \ is one of the following:

\bull mul0 through \.{mul8} (default 10); the values for \.{mul}$j$ refer

\bull mul0 through \.{mul8} (default 10); the values for \.{mul}$j$ refer

to products in which the second operand is less than $2^{8j}$, where $j$

to products in which the second operand is less than $2^{8j}$, where $j$

is as small as possible. Thus, for example, \.{mul1} applies to

is as small as possible. Thus, for example, \.{mul1} applies to

nonzero one-byte multipliers.

nonzero one-byte multipliers.

\bull div (default 60); this applies to integer division, signed and unsigned.

\bull div (default 60); this applies to integer division, signed and unsigned.

\bull sh (default 1); this applies to left and right shifts, signed and

\bull sh (default 1); this applies to left and right shifts, signed and

unsigned.

unsigned.

\bull mux (default 1); the multiplex operator.

\bull mux (default 1); the multiplex operator.

\bull sadd (default 1); the sideways addition operator.

\bull sadd (default 1); the sideways addition operator.

\bull mor (default 1); the boolean matrix multiplication operators \.{MOR} and

\bull mor (default 1); the boolean matrix multiplication operators \.{MOR} and

\.{MXOR}.

\.{MXOR}.

\bull fadd (default 4); floating point addition and subtraction.

\bull fadd (default 4); floating point addition and subtraction.

\bull fmul (default 4); floating point multiplication.

\bull fmul (default 4); floating point multiplication.

\bull fdiv (default 40); floating point division.

\bull fdiv (default 40); floating point division.

\bull fsqrt (default 40); floating point square root.

\bull fsqrt (default 40); floating point square root.

\bull fint (default 4); floating point integerization.

\bull fint (default 4); floating point integerization.

\bull fix (default 2); conversion from floating to fixed, signed and unsigned.

\bull fix (default 2); conversion from floating to fixed, signed and unsigned.

\bull flot (default 2); conversion from fixed to floating, signed and unsigned.

\bull flot (default 2); conversion from fixed to floating, signed and unsigned.

\bull feps (default 4); floating comparison with respect to epsilon.

\bull feps (default 4); floating comparison with respect to epsilon.

\smallskip\noindent

\smallskip\noindent

In each case one can specify a sequence of pipeline stages, with a positive

In each case one can specify a sequence of pipeline stages, with a positive

number of cycles to be spent in each stage. For example, a specification like

number of cycles to be spent in each stage. For example, a specification like

`\.{fmul}~\.{3}~\.{1}' would say that a functional unit that supports

`\.{fmul}~\.{3}~\.{1}' would say that a functional unit that supports

\.{FMUL} takes a total of four cycles to compute the floating point product

\.{FMUL} takes a total of four cycles to compute the floating point product

in two stages; it can start working on a second product after three cycles

in two stages; it can start working on a second product after three cycles

have gone by.

have gone by.

If a floating point operation has a subnormal input, \.{denin} is added to

If a floating point operation has a subnormal input, \.{denin} is added to

the time for the first stage. If a floating point operation has a subnormal

the time for the first stage. If a floating point operation has a subnormal

result, \.{denout} is added to the time for the last stage.

result, \.{denout} is added to the time for the last stage.

@ The fourth and final kind of specification defines a functional unit:

@ The fourth and final kind of specification defines a functional unit:

$$\\is\.{unit}\ \\<64 hexadecimal digits>$$

$$\\is\.{unit}\ \\<64 hexadecimal digits>$$

The symbolic name should be at most fifteen characters long.

The symbolic name should be at most fifteen characters long.

The 64 hexadecimal digits contain 256 bits, with `1' for each supported

The 64 hexadecimal digits contain 256 bits, with `1' for each supported

opcode; the most significant (leftmost) bit is for opcode 0 (\.{TRAP}),

opcode; the most significant (leftmost) bit is for opcode 0 (\.{TRAP}),

and the least significant bit is for opcode 255 (\.{TRIP}).

and the least significant bit is for opcode 255 (\.{TRIP}).

For example, we can define a load/store unit (which handles register/memory

For example, we can define a load/store unit (which handles register/memory

operations), a multiplication unit (which handles fixed and floating point

operations), a multiplication unit (which handles fixed and floating point

multiplication), a boolean unit (which handles only bitwise operations),

multiplication), a boolean unit (which handles only bitwise operations),

and a more general arithmetic-logical unit, as follows:

and a more general arithmetic-logical unit, as follows:

$$\vbox{\halign{\tt#\hfil\cr

$$\vbox{\halign{\tt#\hfil\cr

unit LSU 00000000000000000000000000000000fffffffcfffffffc0000000000000000\cr

unit LSU 00000000000000000000000000000000fffffffcfffffffc0000000000000000\cr

unit MUL 000080f000000000000000000000000000000000000000000000000000000000\cr

unit MUL 000080f000000000000000000000000000000000000000000000000000000000\cr

unit BIT 000000000000000000000000000000000000000000000000ffff00ff00ff0000\cr

unit BIT 000000000000000000000000000000000000000000000000ffff00ff00ff0000\cr

unit ALU f0000000ffffffffffffffffffffffff0000000300000003ffffffffffffffff\cr

unit ALU f0000000ffffffffffffffffffffffff0000000300000003ffffffffffffffff\cr

}}$$

}}$$

The order in which units are specified is important, because \MMIX's dispatcher

The order in which units are specified is important, because \MMIX's dispatcher

will try to match each instruction with the first functional unit that

will try to match each instruction with the first functional unit that

supports its opcode. Therefore it is best to list more specialized

supports its opcode. Therefore it is best to list more specialized

units (like the \.{BIT} unit in this example) before more general ones;

units (like the \.{BIT} unit in this example) before more general ones;

this lets the specialized units have first chance at the instructions

this lets the specialized units have first chance at the instructions

they can handle.

they can handle.

There can be any number of functional units, having possibly identical

There can be any number of functional units, having possibly identical

specifications. One should, however, give each unit a unique name

specifications. One should, however, give each unit a unique name

(e.g., \.{ALU1} and \.{ALU2} if there are two arithmetic-logical units),

(e.g., \.{ALU1} and \.{ALU2} if there are two arithmetic-logical units),

since these names are used in diagnostic messages.

since these names are used in diagnostic messages.

Opcodes that aren't supported by any specified unit will cause an

Opcodes that aren't supported by any specified unit will cause an

emulation trap.

emulation trap.

@^emulation@>

@^emulation@>

@ Full details about the significance of all these parameters can be found

@ Full details about the significance of all these parameters can be found

in the \.{mmix-pipe} module, which defines and discusses the data structures

in the \.{mmix-pipe} module, which defines and discusses the data structures

that need to be configured and initialized.

that need to be configured and initialized.

Of course the specifications in a configuration file needn't make any sense,

Of course the specifications in a configuration file needn't make any sense,

nor need they be practically achievable. We could, for example, specify

nor need they be practically achievable. We could, for example, specify

a unit that handles only the two opcodes \.{NXOR} and \.{DIVUI};

a unit that handles only the two opcodes \.{NXOR} and \.{DIVUI};

we could specify 1-cycle division but pipelined 100-cycle shifts, or

we could specify 1-cycle division but pipelined 100-cycle shifts, or

1-cycle memory access but 100-cycle cache access. We could create

1-cycle memory access but 100-cycle cache access. We could create

a thousand rename registers and issue a hundred instructions per cycle,

a thousand rename registers and issue a hundred instructions per cycle,

etc. Some combinations of parameters are clearly ridiculous.

etc. Some combinations of parameters are clearly ridiculous.

But there remain a huge number of possibilities of interest, especially

But there remain a huge number of possibilities of interest, especially

as technology continues to evolve. By experimenting with configurations that

as technology continues to evolve. By experimenting with configurations that

are extreme by present-day standards, we can see how much might be gained

are extreme by present-day standards, we can see how much might be gained

if the corresponding hardware could be built economically.

if the corresponding hardware could be built economically.

@* Basic input/output. Let's get ready to program the |MMIX_config| subroutine

@* Basic input/output. Let's get ready to program the |MMIX_config| subroutine

by building some simple infrastructure. First we need some macros to

by building some simple infrastructure. First we need some macros to

print error messages.

print error messages.

@d errprint0(f) fprintf(stderr,f)

@d errprint0(f) fprintf(stderr,f)

@d errprint1(f,a) fprintf(stderr,f,a)

@d errprint1(f,a) fprintf(stderr,f,a)

@d errprint2(f,a,b) fprintf(stderr,f,a,b)

@d errprint2(f,a,b) fprintf(stderr,f,a,b)

@d errprint3(f,a,b,c) fprintf(stderr,f,a,b,c)

@d errprint3(f,a,b,c) fprintf(stderr,f,a,b,c)

@d panic(x)@+ {@+x;@+errprint0("!\n");@+exit(-1);@+}

@d panic(x)@+ {@+x;@+errprint0("!\n");@+exit(-1);@+}

@ And we need a place to look at the input.

@ And we need a place to look at the input.

@d BUF_SIZE 100 /* we don't need long lines */

@d BUF_SIZE 100 /* we don't need long lines */

@=

@=

FILE *config_file; /* input comes from here */

FILE *config_file; /* input comes from here */

char buffer[BUF_SIZE]; /* input lines go here */

char buffer[BUF_SIZE]; /* input lines go here */

char token[BUF_SIZE]; /* and tokens are copied to here */

char token[BUF_SIZE]; /* and tokens are copied to here */

char *buf_pointer=buffer; /* this is our current position */

char *buf_pointer=buffer; /* this is our current position */

bool token_prescanned; /* does |token| contain the next token already? */

bool token_prescanned; /* does |token| contain the next token already? */

@ The |get_token| routine copies the next token of input into the |token|

@ The |get_token| routine copies the next token of input into the |token|

buffer. After the input has ended, a final `\.{end}' is appended.

buffer. After the input has ended, a final `\.{end}' is appended.

@=

@=

static void get_token @,@,@[ARGS((void))@];@+@t}\6{@>

static void get_token @,@,@[ARGS((void))@];@+@t}\6{@>

static void get_token() /* set |token| to the next token of the configuration file */

static void get_token() /* set |token| to the next token of the configuration file */

  register char *p,*q;

  register char *p,*q;

  if (token_prescanned) {

  if (token_prescanned) {

    token_prescanned=false;@+ return;

    token_prescanned=false;@+ return;

  while(1) { /* scan past white space */

  while(1) { /* scan past white space */

    if (*buf_pointer=='\0' || *buf_pointer=='\n' || *buf_pointer=='%') {

    if (*buf_pointer=='\0' || *buf_pointer=='\n' || *buf_pointer=='%') {

      if (!fgets(buffer,BUF_SIZE,config_file)) {

      if (!fgets(buffer,BUF_SIZE,config_file)) {

        strcpy(token,"end");@+return;

        strcpy(token,"end");@+return;

      if (strlen(buffer)==BUF_SIZE-1 && buffer[BUF_SIZE-2]!='\n')

      if (strlen(buffer)==BUF_SIZE-1 && buffer[BUF_SIZE-2]!='\n')

        panic(errprint1("config file line too long: `%s...'",buffer));

        panic(errprint1("config file line too long: `%s...'",buffer));

@.config file line...@>

@.config file line...@>

      buf_pointer=buffer;

      buf_pointer=buffer;

    }@+else if (!isspace(*buf_pointer)) break;

    }@+else if (!isspace(*buf_pointer)) break;

    else buf_pointer++;

    else buf_pointer++;

  for (p=buf_pointer,q=token;!isspace(*p) && *p!='%';p++,q++) *q=*p;

  for (p=buf_pointer,q=token;!isspace(*p) && *p!='%';p++,q++) *q=*p;

  buf_pointer=p;@+ *q='\0';

  buf_pointer=p;@+ *q='\0';

  return;

  return;

@ The |get_int| routine is called when we wish to input a decimal value.

@ The |get_int| routine is called when we wish to input a decimal value.

It returns $-1$ if the next token isn't a string of decimal digits.

It returns $-1$ if the next token isn't a string of decimal digits.

@=

@=

static int get_int @,@,@[ARGS((void))@];@+@t}\6{@>

static int get_int @,@,@[ARGS((void))@];@+@t}\6{@>

static int get_int()

static int get_int()

{@+ int v;

{@+ int v;

  char *p;

  char *p;

  get_token();

  get_token();

  for (p=token,v=0; *p>='0' && *p<='9'; p++) v=10*v+*p-'0';

  for (p=token,v=0; *p>='0' && *p<='9'; p++) v=10*v+*p-'0';

  if (*p) return -1;

  if (*p) return -1;

  return v;

  return v;

@ A simple data structure makes it fairly easy to deal with

@ A simple data structure makes it fairly easy to deal with

parameter/value specifications.

parameter/value specifications.

@=

@=

typedef struct {

typedef struct {

  char name[20]; /* symbolic name */

  char name[20]; /* symbolic name */

  int *v; /* internal name */

  int *v; /* internal name */

  int defval; /* default value */

  int defval; /* default value */

  int minval, maxval; /* minimum and maximum legal values */

  int minval, maxval; /* minimum and maximum legal values */

  bool power_of_two; /* must it be a power of two? */

  bool power_of_two; /* must it be a power of two? */

} pv_spec;

} pv_spec;

@ Cache parameters are a bit more difficult, but still not bad.

@ Cache parameters are a bit more difficult, but still not bad.

@=

@=

typedef enum {@!assoc,@!blksz,@!setsz,@!gran,@!vctsz,

typedef enum {@!assoc,@!blksz,@!setsz,@!gran,@!vctsz,

  @!wrb,@!wra,@!acctm,@!citm,@!cotm,@!prts} c_param;

  @!wrb,@!wra,@!acctm,@!citm,@!cotm,@!prts} c_param;

@#

@#

typedef struct {

typedef struct {

  char name[20]; /* symbolic name */

  char name[20]; /* symbolic name */

  c_param v; /* internal code */

  c_param v; /* internal code */

  int defval; /* default value */

  int defval; /* default value */

  int minval, maxval; /* minimum and maximum legal values */

  int minval, maxval; /* minimum and maximum legal values */

  bool power_of_two; /* must it be a power of two? */

  bool power_of_two; /* must it be a power of two? */

} cpv_spec;

} cpv_spec;

@ Operation codes are the easiest of all.

@ Operation codes are the easiest of all.

@=

@=

typedef struct {

typedef struct {

  char name[8]; /* symbolic name */

  char name[8]; /* symbolic name */

  internal_opcode v; /* internal code */

  internal_opcode v; /* internal code */

  int defval; /* default value */

  int defval; /* default value */

} op_spec;

} op_spec;

@ Most of the parameters are external variables declared in the header

@ Most of the parameters are external variables declared in the header

file \.{mmix-pipe.h}; but some are private to this module. Here we

file \.{mmix-pipe.h}; but some are private to this module. Here we

define the main tables used below.

define the main tables used below.

@=

@=

int fetch_buf_size,write_buf_size,reorder_buf_size,mem_bus_bytes,hardware_PT;

int fetch_buf_size,write_buf_size,reorder_buf_size,mem_bus_bytes,hardware_PT;

int max_cycs=60;

int max_cycs=60;

pv_spec PV[]={@/

pv_spec PV[]={@/

{"fetchbuffer", &fetch_buf_size, 4, 1, INT_MAX, false},@/

{"fetchbuffer", &fetch_buf_size, 4, 1, INT_MAX, false},@/

{"writebuffer", &write_buf_size, 2, 1, INT_MAX, false},@/

{"writebuffer", &write_buf_size, 2, 1, INT_MAX, false},@/

{"reorderbuffer", &reorder_buf_size, 5, 1, INT_MAX, false},@/

{"reorderbuffer", &reorder_buf_size, 5, 1, INT_MAX, false},@/

{"renameregs", &max_rename_regs, 5, 1, INT_MAX, false},@/

{"renameregs", &max_rename_regs, 5, 1, INT_MAX, false},@/

{"memslots", &max_mem_slots, 2, 1, INT_MAX, false},@/

{"memslots", &max_mem_slots, 2, 1, INT_MAX, false},@/

{"localregs", &lring_size, 256, 256, 1024, true},@/

{"localregs", &lring_size, 256, 256, 1024, true},@/

{"fetchmax", &fetch_max, 2, 1, INT_MAX, false},@/

{"fetchmax", &fetch_max, 2, 1, INT_MAX, false},@/

{"dispatchmax", &dispatch_max, 1, 1, INT_MAX, false},@/

{"dispatchmax", &dispatch_max, 1, 1, INT_MAX, false},@/

{"peekahead", &peekahead, 1, 0, INT_MAX, false},@/

{"peekahead", &peekahead, 1, 0, INT_MAX, false},@/

{"commitmax", &commit_max, 1, 1, INT_MAX, false},@/

{"commitmax", &commit_max, 1, 1, INT_MAX, false},@/

{"fremmax", &frem_max, 1, 1, INT_MAX, false},@/

{"fremmax", &frem_max, 1, 1, INT_MAX, false},@/

{"denin",&denin_penalty, 1, 0, INT_MAX, false},@/

{"denin",&denin_penalty, 1, 0, INT_MAX, false},@/

{"denout",&denout_penalty, 1, 0, INT_MAX, false},@/

{"denout",&denout_penalty, 1, 0, INT_MAX, false},@/

{"writeholdingtime", &holding_time, 0, 0, INT_MAX, false},@/

{"writeholdingtime", &holding_time, 0, 0, INT_MAX, false},@/

{"memaddresstime", &mem_addr_time, 20, 1, INT_MAX, false},@/

{"memaddresstime", &mem_addr_time, 20, 1, INT_MAX, false},@/

{"memreadtime", &mem_read_time, 20, 1, INT_MAX, false},@/

{"memreadtime", &mem_read_time, 20, 1, INT_MAX, false},@/

{"memwritetime", &mem_write_time, 20, 1, INT_MAX, false},@/

{"memwritetime", &mem_write_time, 20, 1, INT_MAX, false},@/

{"membusbytes", &mem_bus_bytes, 8, 8, INT_MAX, true},@/

{"membusbytes", &mem_bus_bytes, 8, 8, INT_MAX, true},@/

{"branchpredictbits", &bp_n, 0, 0, 8, false},@/

{"branchpredictbits", &bp_n, 0, 0, 8, false},@/

{"branchaddressbits", &bp_a, 0, 0, 32, false},@/

{"branchaddressbits", &bp_a, 0, 0, 32, false},@/

{"branchhistorybits", &bp_b, 0, 0, 32, false},@/

{"branchhistorybits", &bp_b, 0, 0, 32, false},@/

{"branchdualbits", &bp_c, 0, 0, 32, false},@/

{"branchdualbits", &bp_c, 0, 0, 32, false},@/

{"hardwarepagetable", &hardware_PT, 1, 0, 1, false},@/

{"hardwarepagetable", &hardware_PT, 1, 0, 1, false},@/

{"disablesecurity", (int*)&security_disabled, 0, 0, 1, false},@/

{"disablesecurity", (int*)&security_disabled, 0, 0, 1, false},@/

{"memchunksmax", &mem_chunks_max, 1000, 1, INT_MAX, false},@/

{"memchunksmax", &mem_chunks_max, 1000, 1, INT_MAX, false},@/

{"hashprime", &hash_prime, 2003, 2, INT_MAX, false}};

{"hashprime", &hash_prime, 2003, 2, INT_MAX, false}};

@#

@#

cpv_spec CPV[]={

cpv_spec CPV[]={

{"associativity", assoc, 1, 1, INT_MAX, true},@/

{"associativity", assoc, 1, 1, INT_MAX, true},@/

{"blocksize", blksz, 8, 8, 8192, true},@/

{"blocksize", blksz, 8, 8, 8192, true},@/

{"setsize", setsz, 1, 1, INT_MAX, true},@/

{"setsize", setsz, 1, 1, INT_MAX, true},@/

{"granularity", gran, 8, 8, 8192, true},@/

{"granularity", gran, 8, 8, 8192, true},@/

{"victimsize", vctsz, 0, 0, INT_MAX, true},@/

{"victimsize", vctsz, 0, 0, INT_MAX, true},@/

{"writeback", wrb, 0, 0, 1,false},@/

{"writeback", wrb, 0, 0, 1,false},@/

{"writeallocate", wra, 0, 0, 1,false},@/

{"writeallocate", wra, 0, 0, 1,false},@/

{"accesstime", acctm, 1, 1, INT_MAX, false},@/

{"accesstime", acctm, 1, 1, INT_MAX, false},@/

{"copyintime", citm, 1, 1, INT_MAX, false},@/

{"copyintime", citm, 1, 1, INT_MAX, false},@/

{"copyouttime", cotm, 1, 1, INT_MAX, false},@/

{"copyouttime", cotm, 1, 1, INT_MAX, false},@/

{"ports", prts, 1, 1, INT_MAX,false}};

{"ports", prts, 1, 1, INT_MAX,false}};

@#

@#

op_spec OP[]={

op_spec OP[]={

{"mul0", mul0, 10},

{"mul0", mul0, 10},

{"mul1", mul1, 10},

{"mul1", mul1, 10},

{"mul2", mul2, 10},

{"mul2", mul2, 10},

{"mul3", mul3, 10},

{"mul3", mul3, 10},

{"mul4", mul4, 10},

{"mul4", mul4, 10},

{"mul5", mul5, 10},

{"mul5", mul5, 10},

{"mul6", mul6, 10},

{"mul6", mul6, 10},

{"mul7", mul7, 10},

{"mul7", mul7, 10},

{"mul8", mul8, 10},@|

{"mul8", mul8, 10},@|

{"div", div, 60},

{"div", div, 60},

{"sh", sh, 1},

{"sh", sh, 1},

{"mux", mux, 1},

{"mux", mux, 1},

{"sadd", sadd, 1},

{"sadd", sadd, 1},

{"mor", mor, 1},@|

{"mor", mor, 1},@|

{"fadd", fadd, 4},

{"fadd", fadd, 4},

{"fmul", fmul, 4},

{"fmul", fmul, 4},

{"fdiv", fdiv, 40},

{"fdiv", fdiv, 40},

{"fsqrt", fsqrt, 40},

{"fsqrt", fsqrt, 40},

{"fint", fint, 4},@|

{"fint", fint, 4},@|

{"fix", fix, 2},

{"fix", fix, 2},

{"flot", flot, 2},

{"flot", flot, 2},

{"feps", feps, 4}};

{"feps", feps, 4}};

int PV_size,CPV_size,OP_size; /* the number of entries in |PV|, |CPV|, |OP| */

int PV_size,CPV_size,OP_size; /* the number of entries in |PV|, |CPV|, |OP| */

@ The |new_cache| routine creates a \&{cache} structure with default values.

@ The |new_cache| routine creates a \&{cache} structure with default values.

(These default values are ``hard-wired'' into the program, not actually

(These default values are ``hard-wired'' into the program, not actually

read from the |CPV| table.)

read from the |CPV| table.)

@=

@=

static cache* new_cache @,@,@[ARGS((char*))@];@+@t}\6{@>

static cache* new_cache @,@,@[ARGS((char*))@];@+@t}\6{@>

static cache* new_cache(name)

static cache* new_cache(name)

  char *name;

  char *name;

{@+register cache *c=(cache*)calloc(1,sizeof(cache));

{@+register cache *c=(cache*)calloc(1,sizeof(cache));

  if (!c) panic(errprint1("Can't allocate %s",name));

  if (!c) panic(errprint1("Can't allocate %s",name));

@.Can't allocate...@>

@.Can't allocate...@>

  c->aa=1; /* default associativity, should equal |CPV[0].defval| */

  c->aa=1; /* default associativity, should equal |CPV[0].defval| */

  c->bb=8; /* default blocksize */

  c->bb=8; /* default blocksize */

  c->cc=1; /* default setsize */

  c->cc=1; /* default setsize */

  c->gg=8; /* default granularity */

  c->gg=8; /* default granularity */

  c->vv=0; /* default victimsize */

  c->vv=0; /* default victimsize */

  c->repl=random; /* default replacement policy */

  c->repl=random; /* default replacement policy */

  c->vrepl=random; /* default victim replacement policy */

  c->vrepl=random; /* default victim replacement policy */

  c->mode=0; /* default mode is write-through and write-around */

  c->mode=0; /* default mode is write-through and write-around */

  c->access_time=c->copy_in_time=c->copy_out_time=1;

  c->access_time=c->copy_in_time=c->copy_out_time=1;

  c->filler.ctl=&(c->filler_ctl);

  c->filler.ctl=&(c->filler_ctl);

  c->filler_ctl.ptr_a=(void*)c;

  c->filler_ctl.ptr_a=(void*)c;

  c->filler_ctl.go.o.l=4;

  c->filler_ctl.go.o.l=4;

  c->flusher.ctl=&(c->flusher_ctl);

  c->flusher.ctl=&(c->flusher_ctl);

  c->flusher_ctl.ptr_a=(void*)c;

  c->flusher_ctl.ptr_a=(void*)c;

  c->flusher_ctl.go.o.l=4;

  c->flusher_ctl.go.o.l=4;

  c->ports=1;

  c->ports=1;

  c->name=name;

  c->name=name;

  return c;

  return c;

@ @=

@ @=

PV_size=(sizeof PV)/sizeof(pv_spec);

PV_size=(sizeof PV)/sizeof(pv_spec);

CPV_size=(sizeof CPV)/sizeof(cpv_spec);

CPV_size=(sizeof CPV)/sizeof(cpv_spec);

OP_size=(sizeof OP)/sizeof(op_spec);

OP_size=(sizeof OP)/sizeof(op_spec);

ITcache=new_cache("ITcache");

ITcache=new_cache("ITcache");

DTcache=new_cache("DTcache");

DTcache=new_cache("DTcache");

Icache=Dcache=Scache=NULL;

Icache=Dcache=Scache=NULL;

for (j=0;j

for (j=0;j

for (j=0;j

for (j=0;j

  pipe_seq[OP[j].v][0]=OP[j].defval;

  pipe_seq[OP[j].v][0]=OP[j].defval;

  pipe_seq[OP[j].v][1]=0; /* one stage */

  pipe_seq[OP[j].v][1]=0; /* one stage */

@* Reading the specs. Before we're ready to process the configuration file,

@* Reading the specs. Before we're ready to process the configuration file,

we need to count the number of functional units, so that we know

we need to count the number of functional units, so that we know

how much space to allocate for them.

how much space to allocate for them.

A special background unit is always provided, just to make sure that

A special background unit is always provided, just to make sure that

\.{TRAP} and \.{TRIP} instructions are handled by somebody.

\.{TRAP} and \.{TRIP} instructions are handled by somebody.

@=

@=

funit_count=0;

funit_count=0;

while (strcmp(token,"end")!=0) {

while (strcmp(token,"end")!=0) {

  get_token();

  get_token();

  if (strcmp(token,"unit")==0) {

  if (strcmp(token,"unit")==0) {

    funit_count++;

    funit_count++;

    get_token();@+get_token(); /* a unit might be named \.{unit} or \.{end} */

    get_token();@+get_token(); /* a unit might be named \.{unit} or \.{end} */

funit=(func*)calloc(funit_count+1,sizeof(func));

funit=(func*)calloc(funit_count+1,sizeof(func));

if (!funit) panic(errprint0("Can't allocate the functional units"));

if (!funit) panic(errprint0("Can't allocate the functional units"));

@.Can't allocate...@>

@.Can't allocate...@>

strcpy(funit[funit_count].name,"%%");

strcpy(funit[funit_count].name,"%%");

@.\%\%@>

@.\%\%@>

funit[funit_count].ops[0]=0x80000000; /* \.{TRAP} */

funit[funit_count].ops[0]=0x80000000; /* \.{TRAP} */

funit[funit_count].ops[7]=0x1; /* \.{TRIP} */

funit[funit_count].ops[7]=0x1; /* \.{TRIP} */

@ Now we can read the specifications and obey them. This program doesn't

@ Now we can read the specifications and obey them. This program doesn't

bother to be very tolerant of errors, nor does it try to be very efficient.

bother to be very tolerant of errors, nor does it try to be very efficient.

Incidentally, the specifications don't have to be broken into individual lines

Incidentally, the specifications don't have to be broken into individual lines

in any meaningful way. We simply read them token by token.

in any meaningful way. We simply read them token by token.

@=

@=

rewind(config_file);

rewind(config_file);

funit_count=0;

funit_count=0;

token[0]='\0';

token[0]='\0';

while (strcmp(token,"end")!=0) {

while (strcmp(token,"end")!=0) {

  get_token();

  get_token();

  if (strcmp(token,"end")==0) break;

  if (strcmp(token,"end")==0) break;

@;

@;

@;

@;

@;

@;

  if (strcmp(token,"unit")==0) @;

  if (strcmp(token,"unit")==0) @;

  panic(errprint1(

  panic(errprint1(

   "Configuration syntax error: Specification can't start with `%s'",token));

   "Configuration syntax error: Specification can't start with `%s'",token));

@.Configuration syntax error...@>

@.Configuration syntax error...@>

@ @=

@ @=

for (j=0;j

for (j=0;j

  n=get_int();

  n=get_int();

  if (n

  if (n

@.Configuration error...@>

@.Configuration error...@>

     "Configuration error: %s must be >= %d",PV[j].name,PV[j].minval));

     "Configuration error: %s must be >= %d",PV[j].name,PV[j].minval));

  if (n>PV[j].maxval) panic(errprint2(

  if (n>PV[j].maxval) panic(errprint2(

     "Configuration error: %s must be <= %d",PV[j].name,PV[j].maxval));

     "Configuration error: %s must be <= %d",PV[j].name,PV[j].maxval));

  if (PV[j].power_of_two && (n&(n-1))) panic(errprint1(

  if (PV[j].power_of_two && (n&(n-1))) panic(errprint1(

     "Configuration error: %s must be a power of 2",PV[j].name));

     "Configuration error: %s must be a power of 2",PV[j].name));

  *(PV[j].v)=n;

  *(PV[j].v)=n;

  break;

  break;

if (j

if (j

@ @=

@ @=

if (strcmp(token,"ITcache")==0) {

if (strcmp(token,"ITcache")==0) {

  pcs(ITcache);@+continue;

  pcs(ITcache);@+continue;

}@+else if (strcmp(token,"DTcache")==0) {

}@+else if (strcmp(token,"DTcache")==0) {

  pcs(DTcache);@+continue;

  pcs(DTcache);@+continue;

}@+else if (strcmp(token,"Icache")==0) {

}@+else if (strcmp(token,"Icache")==0) {

  if (!Icache) Icache=new_cache("Icache");

  if (!Icache) Icache=new_cache("Icache");

  pcs(Icache);@+continue;

  pcs(Icache);@+continue;

}@+else if (strcmp(token,"Dcache")==0) {

}@+else if (strcmp(token,"Dcache")==0) {

  if (!Dcache) Dcache=new_cache("Dcache");

  if (!Dcache) Dcache=new_cache("Dcache");

  pcs(Dcache);@+continue;

  pcs(Dcache);@+continue;

}@+else if (strcmp(token,"Scache")==0) {

}@+else if (strcmp(token,"Scache")==0) {

  if (!Icache) Icache=new_cache("Icache");

  if (!Icache) Icache=new_cache("Icache");

  if (!Dcache) Dcache=new_cache("Dcache");

  if (!Dcache) Dcache=new_cache("Dcache");

  if (!Scache) Scache=new_cache("Scache");

  if (!Scache) Scache=new_cache("Scache");

  pcs(Scache);@+continue;

  pcs(Scache);@+continue;

@ @=

@ @=

static void ppol @,@,@[ARGS((replace_policy*))@];@+@t}\6{@>

static void ppol @,@,@[ARGS((replace_policy*))@];@+@t}\6{@>

static void ppol(rr) /* subroutine to scan for a replacement policy */

static void ppol(rr) /* subroutine to scan for a replacement policy */

  replace_policy *rr;

  replace_policy *rr;

  get_token();

  get_token();

  if (strcmp(token,"random")==0) *rr=random;

  if (strcmp(token,"random")==0) *rr=random;

  else if (strcmp(token,"serial")==0) *rr=serial;

  else if (strcmp(token,"serial")==0) *rr=serial;

  else if (strcmp(token,"pseudolru")==0) *rr=pseudo_lru;

  else if (strcmp(token,"pseudolru")==0) *rr=pseudo_lru;

  else if (strcmp(token,"lru")==0) *rr=lru;

  else if (strcmp(token,"lru")==0) *rr=lru;

  else token_prescanned=true; /* oops, we should rescan that token */

  else token_prescanned=true; /* oops, we should rescan that token */

@ @=

@ @=

static void pcs @,@,@[ARGS((cache*))@];@+@t}\6{@>

static void pcs @,@,@[ARGS((cache*))@];@+@t}\6{@>

static void pcs(c) /* subroutine to process a cache spec */

static void pcs(c) /* subroutine to process a cache spec */

  cache *c;

  cache *c;

  register int j,n;

  register int j,n;

  get_token();

  get_token();

  for (j=0;j

  for (j=0;j

  if (j==CPV_size) panic(errprint1(

  if (j==CPV_size) panic(errprint1(

     "Configuration syntax error: `%s' isn't a cache parameter name",token));

     "Configuration syntax error: `%s' isn't a cache parameter name",token));

@.Configuration syntax error...@>

@.Configuration syntax error...@>

  n=get_int();

  n=get_int();

  if (n

  if (n

     "Configuration error: %s must be >= %d",CPV[j].name,CPV[j].minval));

     "Configuration error: %s must be >= %d",CPV[j].name,CPV[j].minval));

@.Configuration error...@>

@.Configuration error...@>

  if (n>CPV[j].maxval) panic(errprint2(

  if (n>CPV[j].maxval) panic(errprint2(

     "Configuration error: %s must be <= %d",CPV[j].name,CPV[j].maxval));

     "Configuration error: %s must be <= %d",CPV[j].name,CPV[j].maxval));

  if (CPV[j].power_of_two && (n&(n-1))) panic(errprint1(

  if (CPV[j].power_of_two && (n&(n-1))) panic(errprint1(

     "Configuration error: %s must be power of 2",CPV[j].name));

     "Configuration error: %s must be power of 2",CPV[j].name));

  switch (CPV[j].v) {

  switch (CPV[j].v) {

 case assoc: c->aa=n;@+ppol(&(c->repl));@+break;

 case assoc: c->aa=n;@+ppol(&(c->repl));@+break;

 case blksz: c->bb=n;@+break;

 case blksz: c->bb=n;@+break;

 case setsz: c->cc=n;@+break;

 case setsz: c->cc=n;@+break;

 case gran: c->gg=n;@+break;

 case gran: c->gg=n;@+break;

 case vctsz: c->vv=n;@+ppol(&(c->vrepl));@+break;

 case vctsz: c->vv=n;@+ppol(&(c->vrepl));@+break;

 case wrb: c->mode=(c->mode&~WRITE_BACK)+n*WRITE_BACK;@+break;

 case wrb: c->mode=(c->mode&~WRITE_BACK)+n*WRITE_BACK;@+break;

 case wra: c->mode=(c->mode&~WRITE_ALLOC)+n*WRITE_ALLOC;@+break;

 case wra: c->mode=(c->mode&~WRITE_ALLOC)+n*WRITE_ALLOC;@+break;

 case acctm:@+ if (n>max_cycs) max_cycs=n;

 case acctm:@+ if (n>max_cycs) max_cycs=n;

   c->access_time=n;@+break;

   c->access_time=n;@+break;

 case citm:@+ if (n>max_cycs) max_cycs=n;

 case citm:@+ if (n>max_cycs) max_cycs=n;

   c->copy_in_time=n;@+break;

   c->copy_in_time=n;@+break;

 case cotm:@+ if (n>max_cycs) max_cycs=n;

 case cotm:@+ if (n>max_cycs) max_cycs=n;

   c->copy_out_time=n;@+break;

   c->copy_out_time=n;@+break;

 case prts: c->ports=n;@+break;

 case prts: c->ports=n;@+break;

@ @=

@ @=

for (j=0;j

for (j=0;j

  for (i=0;;i++) {

  for (i=0;;i++) {

    n=get_int();

    n=get_int();

    if (n<0) break;

    if (n<0) break;

    if (n==0) panic(errprint0(

    if (n==0) panic(errprint0(

      "Configuration error: Pipeline cycles must be positive"));

      "Configuration error: Pipeline cycles must be positive"));

@.Configuration error...@>

@.Configuration error...@>

    if (n>255) panic(errprint0(

    if (n>255) panic(errprint0(

      "Configuration error: Pipeline cycles must be <= 255"));

      "Configuration error: Pipeline cycles must be <= 255"));

    if (n>max_cycs) max_cycs=n;

    if (n>max_cycs) max_cycs=n;

    if (i>=pipe_limit) panic(errprint1(

    if (i>=pipe_limit) panic(errprint1(

      "Configuration error: More than %d pipeline stages",pipe_limit));

      "Configuration error: More than %d pipeline stages",pipe_limit));

    pipe_seq[OP[j].v][i]=n;

    pipe_seq[OP[j].v][i]=n;

  token_prescanned=true;

  token_prescanned=true;

  break;

  break;

if (j

if (j

@ @=

@ @=

  get_token();

  get_token();

  if (strlen(token)>15) panic(errprint1(

  if (strlen(token)>15) panic(errprint1(

       "Configuration error: `%s' is more than 15 characters long",token));

       "Configuration error: `%s' is more than 15 characters long",token));

@.Configuration error...@>

@.Configuration error...@>

  strcpy(funit[funit_count].name,token);

  strcpy(funit[funit_count].name,token);

  get_token();

  get_token();

  if (strlen(token)!=64) panic(errprint1(

  if (strlen(token)!=64) panic(errprint1(

       "Configuration error: unit %s doesn't have 64 hex digit specs",

       "Configuration error: unit %s doesn't have 64 hex digit specs",

                   funit[funit_count].name));

                   funit[funit_count].name));

  for (i=j=n=0;j<64;j++) {

  for (i=j=n=0;j<64;j++) {

    if (token[j]>='0' && token[j]<='9') n=(n<<4)+(token[j]-'0');

    if (token[j]>='0' && token[j]<='9') n=(n<<4)+(token[j]-'0');

    else if (token[j]>='a' && token[j]<='f') n=(n<<4)+(token[j]-'a'+10);

    else if (token[j]>='a' && token[j]<='f') n=(n<<4)+(token[j]-'a'+10);

    else if (token[j]>='A' && token[j]<='F') n=(n<<4)+(token[j]-'A'+10);

    else if (token[j]>='A' && token[j]<='F') n=(n<<4)+(token[j]-'A'+10);

    else panic(errprint1(

    else panic(errprint1(

        "Configuration error: `%c' is not a hex digit",token[j]));

        "Configuration error: `%c' is not a hex digit",token[j]));

    if ((j&0x7)==0x7) funit[funit_count].ops[i++]=n, n=0;

    if ((j&0x7)==0x7) funit[funit_count].ops[i++]=n, n=0;

  funit_count++;

  funit_count++;

  continue;

  continue;

@* Checking and allocating. The battle is only half over when we've

@* Checking and allocating. The battle is only half over when we've

absorbed all the data of the configuration file. We still must check for

absorbed all the data of the configuration file. We still must check for

interactions between different quantities, and we must allocate

interactions between different quantities, and we must allocate

space for cache blocks, coroutines, etc.

space for cache blocks, coroutines, etc.

One of the most difficult tasks facing us to determine the maximum number

One of the most difficult tasks facing us to determine the maximum number

of pipeline stages needed by each functional unit. Let's tackle that first.

of pipeline stages needed by each functional unit. Let's tackle that first.

@=

@=

@;

@;

for (j=0;j<=funit_count;j++) {

for (j=0;j<=funit_count;j++) {

@;

@;

  funit[j].k=n;

  funit[j].k=n;

  funit[j].co=(coroutine*)calloc(n,sizeof(coroutine));

  funit[j].co=(coroutine*)calloc(n,sizeof(coroutine));

  for (i=0;i

  for (i=0;i

    funit[j].co[i].name=funit[j].name;

    funit[j].co[i].name=funit[j].name;

    funit[j].co[i].stage=i+1;

    funit[j].co[i].stage=i+1;

@ @=

@ @=

for (j=div;j<=max_pipe_op;j++) int_stages[j]=strlen(pipe_seq[j]);

for (j=div;j<=max_pipe_op;j++) int_stages[j]=strlen(pipe_seq[j]);

for (;j<=max_real_command;j++) int_stages[j]=1;

for (;j<=max_real_command;j++) int_stages[j]=1;

for (j=mul0,n=0;j<=mul8;j++)

for (j=mul0,n=0;j<=mul8;j++)

  if (strlen(pipe_seq[j])>n) n=strlen(pipe_seq[j]);

  if (strlen(pipe_seq[j])>n) n=strlen(pipe_seq[j]);

int_stages[mul]=n;

int_stages[mul]=n;

int_stages[ld]=int_stages[st]=int_stages[frem]=2;

int_stages[ld]=int_stages[st]=int_stages[frem]=2;

for (j=0;j<256;j++) stages[j]=int_stages[int_op[j]];

for (j=0;j<256;j++) stages[j]=int_stages[int_op[j]];

@ The |int_op| conversion table is similar to the |internal_op| array of

@ The |int_op| conversion table is similar to the |internal_op| array of

the \\{MMIX\_pipe} routine, but it replaces |divu| by |div|,

the \\{MMIX\_pipe} routine, but it replaces |divu| by |div|,

|fsub| by |fadd|, etc.

|fsub| by |fadd|, etc.

@=

@=

internal_opcode int_op[256]={@/

internal_opcode int_op[256]={@/

  trap,fcmp,funeq,funeq,fadd,fix,fadd,fix,@/

  trap,fcmp,funeq,funeq,fadd,fix,fadd,fix,@/

  flot,flot,flot,flot,flot,flot,flot,flot,@/

  flot,flot,flot,flot,flot,flot,flot,flot,@/

  fmul,feps,feps,feps,fdiv,fsqrt,frem,fint,@/

  fmul,feps,feps,feps,fdiv,fsqrt,frem,fint,@/

  mul,mul,mul,mul,div,div,div,div,@/

  mul,mul,mul,mul,div,div,div,div,@/

  add,add,addu,addu,sub,sub,subu,subu,@/

  add,add,addu,addu,sub,sub,subu,subu,@/

  addu,addu,addu,addu,addu,addu,addu,addu,@/

  addu,addu,addu,addu,addu,addu,addu,addu,@/

  cmp,cmp,cmpu,cmpu,sub,sub,subu,subu,@/

  cmp,cmp,cmpu,cmpu,sub,sub,subu,subu,@/

  sh,sh,sh,sh,sh,sh,sh,sh,@/

  sh,sh,sh,sh,sh,sh,sh,sh,@/

  br,br,br,br,br,br,br,br,@/

  br,br,br,br,br,br,br,br,@/

  br,br,br,br,br,br,br,br,@/

  br,br,br,br,br,br,br,br,@/

  pbr,pbr,pbr,pbr,pbr,pbr,pbr,pbr,@/

  pbr,pbr,pbr,pbr,pbr,pbr,pbr,pbr,@/

  pbr,pbr,pbr,pbr,pbr,pbr,pbr,pbr,@/

  pbr,pbr,pbr,pbr,pbr,pbr,pbr,pbr,@/

  cset,cset,cset,cset,cset,cset,cset,cset,@/

  cset,cset,cset,cset,cset,cset,cset,cset,@/

  cset,cset,cset,cset,cset,cset,cset,cset,@/

  cset,cset,cset,cset,cset,cset,cset,cset,@/

  zset,zset,zset,zset,zset,zset,zset,zset,@/

  zset,zset,zset,zset,zset,zset,zset,zset,@/

  zset,zset,zset,zset,zset,zset,zset,zset,@/

  zset,zset,zset,zset,zset,zset,zset,zset,@/

  ld,ld,ld,ld,ld,ld,ld,ld,@/

  ld,ld,ld,ld,ld,ld,ld,ld,@/

  ld,ld,ld,ld,ld,ld,ld,ld,@/

  ld,ld,ld,ld,ld,ld,ld,ld,@/

  ld,ld,ld,ld,ld,ld,ld,ld,@/

  ld,ld,ld,ld,ld,ld,ld,ld,@/

  ld,ld,ld,ld,prego,prego,go,go,@/

  ld,ld,ld,ld,prego,prego,go,go,@/

  st,st,st,st,st,st,st,st,@/

  st,st,st,st,st,st,st,st,@/

  st,st,st,st,st,st,st,st,@/

  st,st,st,st,st,st,st,st,@/

  st,st,st,st,st,st,st,st,@/

  st,st,st,st,st,st,st,st,@/

  st,st,st,st,st,st,pushgo,pushgo,@/

  st,st,st,st,st,st,pushgo,pushgo,@/

  or,or,orn,orn,nor,nor,xor,xor,@/

  or,or,orn,orn,nor,nor,xor,xor,@/

  and,and,andn,andn,nand,nand,nxor,nxor,@/

  and,and,andn,andn,nand,nand,nxor,nxor,@/

  bdif,bdif,wdif,wdif,tdif,tdif,odif,odif,@/

  bdif,bdif,wdif,wdif,tdif,tdif,odif,odif,@/

  mux,mux,sadd,sadd,mor,mor,mor,mor,@/

  mux,mux,sadd,sadd,mor,mor,mor,mor,@/

  set,set,set,set,addu,addu,addu,addu,@/

  set,set,set,set,addu,addu,addu,addu,@/

  or,or,or,or,andn,andn,andn,andn,@/

  or,or,or,or,andn,andn,andn,andn,@/

  noop,noop,pushj,pushj,set,set,put,put,@/

  noop,noop,pushj,pushj,set,set,put,put,@/

  pop,resume,save,unsave,sync,noop,get,trip};

  pop,resume,save,unsave,sync,noop,get,trip};

int int_stages[max_real_command+1];

int int_stages[max_real_command+1];

       /* stages as function of |internal_opcode| */

       /* stages as function of |internal_opcode| */

int stages[256]; /* stages as function of |mmix_opcode| */

int stages[256]; /* stages as function of |mmix_opcode| */

@ @=

@ @=

for (i=n=0;i<256;i++)

for (i=n=0;i<256;i++)

  if (((funit[j].ops[i>>5]<<(i&0x1f))&0x80000000) && stages[i]>n)

  if (((funit[j].ops[i>>5]<<(i&0x1f))&0x80000000) && stages[i]>n)

    n=stages[i];

    n=stages[i];

if (n==0) panic(errprint1(

if (n==0) panic(errprint1(

       "Configuration error: unit %s doesn't do anything",funit[j].name));

       "Configuration error: unit %s doesn't do anything",funit[j].name));

@.Configuration error...@>

@.Configuration error...@>

@ The next hardest thing on our agenda is to set up the cache structure

@ The next hardest thing on our agenda is to set up the cache structure

fields that depend on the parameters. For example, although we have defined

fields that depend on the parameters. For example, although we have defined

the parameter in the |bb| field (the block size), we also need to compute the

the parameter in the |bb| field (the block size), we also need to compute the

|b|~field (log of the block size), and we must create the cache blocks

|b|~field (log of the block size), and we must create the cache blocks

themselves.

themselves.

@=

@=

static int lg @,@,@[ARGS((int))@];@+@t}\6{@>

static int lg @,@,@[ARGS((int))@];@+@t}\6{@>

static int lg(n) /* compute binary logarithm */

static int lg(n) /* compute binary logarithm */

  int n;

  int n;

{@+register int j,l;

{@+register int j,l;

  for (j=n,l=0;j;j>>=1) l++;

  for (j=n,l=0;j;j>>=1) l++;

  return l-1;

  return l-1;

@ @=

@ @=

static void alloc_cache @,@,@[ARGS((cache*,char*))@];@+@t}\6{@>

static void alloc_cache @,@,@[ARGS((cache*,char*))@];@+@t}\6{@>

static void alloc_cache(c,name)

static void alloc_cache(c,name)

  cache *c;

  cache *c;

  char *name;

  char *name;

{@+register int j,k;

{@+register int j,k;

  if (c->bbgg) panic(errprint1(

  if (c->bbgg) panic(errprint1(

      "Configuration error: blocksize of %s is less than granularity",name));

      "Configuration error: blocksize of %s is less than granularity",name));

@.Configuration error...@>

@.Configuration error...@>

  if (name[1]=='T' && c->bb!=8) panic(errprint1(

  if (name[1]=='T' && c->bb!=8) panic(errprint1(

      "Configuration error: blocksize of %s must be 8",name));

      "Configuration error: blocksize of %s must be 8",name));

  c->a=lg(c->aa);

  c->a=lg(c->aa);

  c->b=lg(c->bb);

  c->b=lg(c->bb);

  c->c=lg(c->cc);

  c->c=lg(c->cc);

  c->g=lg(c->gg);

  c->g=lg(c->gg);

  c->v=lg(c->vv);

  c->v=lg(c->vv);

  c->tagmask=-(1<<(c->b+c->c));

  c->tagmask=-(1<<(c->b+c->c));

  if (c->a+c->b+c->c>=32) panic(errprint1(

  if (c->a+c->b+c->c>=32) panic(errprint1(

     "Configuration error: %s has >= 4 gigabytes of data",name));

     "Configuration error: %s has >= 4 gigabytes of data",name));

  if (c->gg!=8 && !(c->mode&WRITE_ALLOC)) panic(errprint2(

  if (c->gg!=8 && !(c->mode&WRITE_ALLOC)) panic(errprint2(

     "Configuration error: %s does write-around with granularity %d",

     "Configuration error: %s does write-around with granularity %d",

        name,c->gg));

        name,c->gg));

@;

@;

  if (c->vv) @;

  if (c->vv) @;

  c->inbuf.dirty=(char*)calloc(c->bb>>c->g,sizeof(char));

  c->inbuf.dirty=(char*)calloc(c->bb>>c->g,sizeof(char));

  if (!c->inbuf.dirty) panic(errprint1(

  if (!c->inbuf.dirty) panic(errprint1(

     "Can't allocate dirty bits for inbuffer of %s",name));

     "Can't allocate dirty bits for inbuffer of %s",name));

@.Can't allocate...@>

@.Can't allocate...@>

  c->inbuf.data=(octa *)calloc(c->bb>>3,sizeof(octa));

  c->inbuf.data=(octa *)calloc(c->bb>>3,sizeof(octa));

    if (!c->inbuf.data) panic(errprint1(

    if (!c->inbuf.data) panic(errprint1(

     "Can't allocate data for inbuffer of %s",name));

     "Can't allocate data for inbuffer of %s",name));

  c->outbuf.dirty=(char*)calloc(c->bb>>c->g,sizeof(char));

  c->outbuf.dirty=(char*)calloc(c->bb>>c->g,sizeof(char));

  if (!c->outbuf.dirty) panic(errprint1(

  if (!c->outbuf.dirty) panic(errprint1(

     "Can't allocate dirty bits for outbuffer of %s",name));

     "Can't allocate dirty bits for outbuffer of %s",name));

  c->outbuf.data=(octa *)calloc(c->bb>>3,sizeof(octa));

  c->outbuf.data=(octa *)calloc(c->bb>>3,sizeof(octa));

    if (!c->outbuf.data) panic(errprint1(

    if (!c->outbuf.data) panic(errprint1(

     "Can't allocate data for outbuffer of %s",name));

     "Can't allocate data for outbuffer of %s",name));

  if (name[0]!='S') @;

  if (name[0]!='S') @;

@ @d sign_bit 0x80000000

@ @d sign_bit 0x80000000

@=

@=

c->set=(cacheset *)calloc(c->cc,sizeof(cacheset));

c->set=(cacheset *)calloc(c->cc,sizeof(cacheset));

if (!c->set) panic(errprint1(

if (!c->set) panic(errprint1(

     "Can't allocate cache sets for %s",name));

     "Can't allocate cache sets for %s",name));

@.Can't allocate...@>

@.Can't allocate...@>

for (j=0;jcc;j++) {

for (j=0;jcc;j++) {

  c->set[j]=(cacheblock *)calloc(c->aa,sizeof(cacheblock));

  c->set[j]=(cacheblock *)calloc(c->aa,sizeof(cacheblock));

  if (!c->set[j]) panic(errprint2(

  if (!c->set[j]) panic(errprint2(

    "Can't allocate cache blocks for set %d of %s",j,name));

    "Can't allocate cache blocks for set %d of %s",j,name));

  for (k=0;kaa;k++) {

  for (k=0;kaa;k++) {

    c->set[j][k].tag.h=sign_bit; /* invalid tag */

    c->set[j][k].tag.h=sign_bit; /* invalid tag */

    c->set[j][k].dirty=(char *)calloc(c->bb>>c->g,sizeof(char));

    c->set[j][k].dirty=(char *)calloc(c->bb>>c->g,sizeof(char));

    if (!c->set[j][k].dirty) panic(errprint3(

    if (!c->set[j][k].dirty) panic(errprint3(

      "Can't allocate dirty bits for block %d of set %d of %s",k,j,name));

      "Can't allocate dirty bits for block %d of set %d of %s",k,j,name));

    c->set[j][k].data=(octa *)calloc(c->bb>>3,sizeof(octa));

    c->set[j][k].data=(octa *)calloc(c->bb>>3,sizeof(octa));

    if (!c->set[j][k].data) panic(errprint3(

    if (!c->set[j][k].data) panic(errprint3(

      "Can't allocate data for block %d of set %d of %s",k,j,name));

      "Can't allocate data for block %d of set %d of %s",k,j,name));

@ @=

@ @=

  c->victim=(cacheblock*)calloc(c->vv,sizeof(cacheblock));

  c->victim=(cacheblock*)calloc(c->vv,sizeof(cacheblock));

  if (!c->victim) panic(errprint1(

  if (!c->victim) panic(errprint1(

      "Can't allocate blocks for victim cache of %s",name));

      "Can't allocate blocks for victim cache of %s",name));

  for (k=0;kvv;k++) {

  for (k=0;kvv;k++) {

    c->victim[k].tag.h=sign_bit; /* invalid tag */

    c->victim[k].tag.h=sign_bit; /* invalid tag */

    c->victim[k].dirty=(char *)calloc(c->bb>>c->g,sizeof(char));

    c->victim[k].dirty=(char *)calloc(c->bb>>c->g,sizeof(char));

    if (!c->victim[k].dirty) panic(errprint2(

    if (!c->victim[k].dirty) panic(errprint2(

      "Can't allocate dirty bits for block %d of victim cache of %s",

      "Can't allocate dirty bits for block %d of victim cache of %s",

                       k,name));

                       k,name));

@.Can't allocate...@>

@.Can't allocate...@>

    c->victim[k].data=(octa *)calloc(c->bb>>3,sizeof(octa));

    c->victim[k].data=(octa *)calloc(c->bb>>3,sizeof(octa));

    if (!c->victim[k].data) panic(errprint2(

    if (!c->victim[k].data) panic(errprint2(

      "Can't allocate data for block %d of victim cache of %s",k,name));

      "Can't allocate data for block %d of victim cache of %s",k,name));

@ @=

@ @=

  c->reader=(coroutine*)calloc(c->ports,sizeof(coroutine));

  c->reader=(coroutine*)calloc(c->ports,sizeof(coroutine));

  if (!c->reader) panic(errprint1(

  if (!c->reader) panic(errprint1(

@.Can't allocate...@>

@.Can't allocate...@>

        "Can't allocate readers for %s",name));

        "Can't allocate readers for %s",name));

  for (j=0;jports;j++) {

  for (j=0;jports;j++) {

    c->reader[j].stage=vanish;

    c->reader[j].stage=vanish;

    c->reader[j].name=(name[0]=='D'? (name[1]=='T'? "DTreader": "Dreader"):

    c->reader[j].name=(name[0]=='D'? (name[1]=='T'? "DTreader": "Dreader"):

                                     (name[1]=='T'? "ITreader": "Ireader"));

                                     (name[1]=='T'? "ITreader": "Ireader"));

@ @=

@ @=

alloc_cache(ITcache,"ITcache");

alloc_cache(ITcache,"ITcache");

ITcache->filler.name="ITfiller";@+ ITcache->filler.stage=fill_from_virt;

ITcache->filler.name="ITfiller";@+ ITcache->filler.stage=fill_from_virt;

alloc_cache(DTcache,"DTcache");

alloc_cache(DTcache,"DTcache");

DTcache->filler.name="DTfiller";@+ DTcache->filler.stage=fill_from_virt;

DTcache->filler.name="DTfiller";@+ DTcache->filler.stage=fill_from_virt;

if (Icache) {

if (Icache) {

  alloc_cache(Icache,"Icache");

  alloc_cache(Icache,"Icache");

  Icache->filler.name="Ifiller";@+ Icache->filler.stage=fill_from_mem;

  Icache->filler.name="Ifiller";@+ Icache->filler.stage=fill_from_mem;

if (Dcache) {

if (Dcache) {

  alloc_cache(Dcache,"Dcache");

  alloc_cache(Dcache,"Dcache");

  Dcache->filler.name="Dfiller";@+ Dcache->filler.stage=fill_from_mem;

  Dcache->filler.name="Dfiller";@+ Dcache->filler.stage=fill_from_mem;

  Dcache->flusher.name="Dflusher";@+ Dcache->flusher.stage=flush_to_mem;

  Dcache->flusher.name="Dflusher";@+ Dcache->flusher.stage=flush_to_mem;

if (Scache) {

if (Scache) {

  alloc_cache(Scache,"Scache");

  alloc_cache(Scache,"Scache");

  if (Scache->bbbb) panic(errprint0(

  if (Scache->bbbb) panic(errprint0(

     "Configuration error: Scache blocks smaller than Icache blocks"));

     "Configuration error: Scache blocks smaller than Icache blocks"));

@.Configuration error...@>

@.Configuration error...@>

  if (Scache->bbbb) panic(errprint0(

  if (Scache->bbbb) panic(errprint0(

     "Configuration error: Scache blocks smaller than Dcache blocks"));

     "Configuration error: Scache blocks smaller than Dcache blocks"));

  if (Scache->gg!=Dcache->gg) panic(errprint0(

  if (Scache->gg!=Dcache->gg) panic(errprint0(

     "Configuration error: Scache granularity differs from the Dcache"));

     "Configuration error: Scache granularity differs from the Dcache"));

  Icache->filler.stage=fill_from_S;

  Icache->filler.stage=fill_from_S;

  Dcache->filler.stage=fill_from_S;@+ Dcache->flusher.stage=flush_to_S;

  Dcache->filler.stage=fill_from_S;@+ Dcache->flusher.stage=flush_to_S;

  Scache->filler.name="Sfiller";@+ Scache->filler.stage=fill_from_mem;

  Scache->filler.name="Sfiller";@+ Scache->filler.stage=fill_from_mem;

  Scache->flusher.name="Sflusher";@+ Scache->flusher.stage=flush_to_mem;

  Scache->flusher.name="Sflusher";@+ Scache->flusher.stage=flush_to_mem;

@ Now we are nearly done. The only nontrivial task remaining is

@ Now we are nearly done. The only nontrivial task remaining is

to allocate the ring of queues for coroutine scheduling; for this we

to allocate the ring of queues for coroutine scheduling; for this we

need to determine the maximum waiting time that will occur between

need to determine the maximum waiting time that will occur between

scheduler and schedulee.

scheduler and schedulee.

@=

@=

bus_words=mem_bus_bytes>>3;

bus_words=mem_bus_bytes>>3;

j=(mem_read_time

j=(mem_read_time

n=1;

n=1;

if (Scache && Scache->bb>n) n=Scache->bb;

if (Scache && Scache->bb>n) n=Scache->bb;

if (Icache && Icache->bb>n) n=Icache->bb;

if (Icache && Icache->bb>n) n=Icache->bb;

if (Dcache && Dcache->bb>n) n=Dcache->bb;

if (Dcache && Dcache->bb>n) n=Dcache->bb;

n=mem_addr_time+((int)(n+bus_words-1)/bus_words)*j;

n=mem_addr_time+((int)(n+bus_words-1)/bus_words)*j;

if (n>max_cycs) max_cycs=n; /* now |max_cycs| bounds the waiting time */

if (n>max_cycs) max_cycs=n; /* now |max_cycs| bounds the waiting time */

ring_size=max_cycs+1;

ring_size=max_cycs+1;

ring=(coroutine *)calloc(ring_size,sizeof(coroutine));

ring=(coroutine *)calloc(ring_size,sizeof(coroutine));

if (!ring) panic(errprint0("Can't allocate the scheduling ring"));

if (!ring) panic(errprint0("Can't allocate the scheduling ring"));

@.Can't allocate...@>

@.Can't allocate...@>

{@+register coroutine *p;

{@+register coroutine *p;

  for (p=ring;p

  for (p=ring;p

    p->name=""; /* header nodes are nameless */

    p->name=""; /* header nodes are nameless */

    p->stage=max_stage;

    p->stage=max_stage;

@ @s chunknode int

@ @s chunknode int

@=

@=

if (hash_prime<=mem_chunks_max) panic(errprint0(

if (hash_prime<=mem_chunks_max) panic(errprint0(

  "Configuration error: hashprime must exceed memchunksmax"));

  "Configuration error: hashprime must exceed memchunksmax"));

@.Configuration error...@>

@.Configuration error...@>

mem_hash=(chunknode *)calloc(hash_prime+1,sizeof(chunknode));

mem_hash=(chunknode *)calloc(hash_prime+1,sizeof(chunknode));

if (!mem_hash) panic(errprint0("Can't allocate the hash table"));

if (!mem_hash) panic(errprint0("Can't allocate the hash table"));

@.Can't allocate...@>

@.Can't allocate...@>

mem_hash[0].chunk=(octa*)calloc(1<<13,sizeof(octa));

mem_hash[0].chunk=(octa*)calloc(1<<13,sizeof(octa));

if (!mem_hash[0].chunk) panic(errprint0("Can't allocate chunk 0"));

if (!mem_hash[0].chunk) panic(errprint0("Can't allocate chunk 0"));

mem_hash[hash_prime].chunk=(octa*)calloc(1<<13,sizeof(octa));

mem_hash[hash_prime].chunk=(octa*)calloc(1<<13,sizeof(octa));

if (!mem_hash[hash_prime].chunk) panic(errprint0("Can't allocate 0 chunk"));

if (!mem_hash[hash_prime].chunk) panic(errprint0("Can't allocate 0 chunk"));

mem_chunks=1;

mem_chunks=1;

fetch_bot=(fetch*)calloc(fetch_buf_size+1,sizeof(fetch));

fetch_bot=(fetch*)calloc(fetch_buf_size+1,sizeof(fetch));

if (!fetch_bot) panic(errprint0("Can't allocate the fetch buffer"));

if (!fetch_bot) panic(errprint0("Can't allocate the fetch buffer"));

fetch_top=fetch_bot+fetch_buf_size;

fetch_top=fetch_bot+fetch_buf_size;

reorder_bot=(control*)calloc(reorder_buf_size+1,sizeof(control));

reorder_bot=(control*)calloc(reorder_buf_size+1,sizeof(control));

if (!reorder_bot) panic(errprint0("Can't allocate the reorder buffer"));

if (!reorder_bot) panic(errprint0("Can't allocate the reorder buffer"));

reorder_top=reorder_bot+reorder_buf_size;

reorder_top=reorder_bot+reorder_buf_size;

wbuf_bot=(write_node*)calloc(write_buf_size+1,sizeof(write_node));

wbuf_bot=(write_node*)calloc(write_buf_size+1,sizeof(write_node));

if (!wbuf_bot) panic(errprint0("Can't allocate the write buffer"));

if (!wbuf_bot) panic(errprint0("Can't allocate the write buffer"));

wbuf_top=wbuf_bot+write_buf_size;

wbuf_top=wbuf_bot+write_buf_size;

if (bp_n==0) bp_table=NULL;

if (bp_n==0) bp_table=NULL;

else { /* a branch prediction table is desired */

else { /* a branch prediction table is desired */

  if (bp_a+bp_b+bp_c>=32) panic(errprint0(

  if (bp_a+bp_b+bp_c>=32) panic(errprint0(

     "Configuration error: Branch table has >= 4 gigabytes of data"));

     "Configuration error: Branch table has >= 4 gigabytes of data"));

  bp_table=(char*)calloc(1<<(bp_a+bp_b+bp_c),sizeof(char));

  bp_table=(char*)calloc(1<<(bp_a+bp_b+bp_c),sizeof(char));

  if (!bp_table) panic(errprint0("Can't allocate the branch table"));

  if (!bp_table) panic(errprint0("Can't allocate the branch table"));

l=(specnode*)calloc(lring_size,sizeof(specnode));

l=(specnode*)calloc(lring_size,sizeof(specnode));

if (!l) panic(errprint0("Can't allocate local registers"));

if (!l) panic(errprint0("Can't allocate local registers"));

j=bus_words;

j=bus_words;

if (Icache && Icache->bb>j) j=Icache->bb;

if (Icache && Icache->bb>j) j=Icache->bb;

fetched=(octa*)calloc(j,sizeof(octa));

fetched=(octa*)calloc(j,sizeof(octa));

if (!fetched) panic(errprint0("Can't allocate prefetch buffer"));

if (!fetched) panic(errprint0("Can't allocate prefetch buffer"));

dispatch_stat=(int*)calloc(dispatch_max+1,sizeof(int));

dispatch_stat=(int*)calloc(dispatch_max+1,sizeof(int));

if (!dispatch_stat) panic(errprint0("Can't allocate dispatch counts"));

if (!dispatch_stat) panic(errprint0("Can't allocate dispatch counts"));

no_hardware_PT=1-hardware_PT;

no_hardware_PT=1-hardware_PT;

@* Putting it all together. Here then is the desired configuration

@* Putting it all together. Here then is the desired configuration

subroutine.

subroutine.

@c

@c

#include  /* |fopen|, |fgets|, |sscanf|, |rewind| */

#include  /* |fopen|, |fgets|, |sscanf|, |rewind| */

#include  /* |calloc|, |exit| */

#include  /* |calloc|, |exit| */

#include  /* |isspace| */

#include  /* |isspace| */

#include  /* |strcpy|, |strlen|, |strcmp| */

#include  /* |strcpy|, |strlen|, |strcmp| */

#include  /* |INT_MAX| */

#include  /* |INT_MAX| */

#include "mmix-pipe.h"

#include "mmix-pipe.h"

@@;

@@;

@@;

@@;

@@;

@@;

void MMIX_config(filename)

void MMIX_config(filename)

  char *filename;

  char *filename;

{@+register int i,j,n;

{@+register int i,j,n;

  config_file=fopen(filename,"r");

  config_file=fopen(filename,"r");

  if (!config_file)

  if (!config_file)

    panic(errprint1("Can't open configuration file %s",filename));

    panic(errprint1("Can't open configuration file %s",filename));

@.Can't open...@>

@.Can't open...@>

@;

@;

@;

@;

@;

@;

@;

@;

@;

@;

@;

@;

@;

@;

@*Index.

@*Index.

Browse

Tools

Subversion Repositories eco32

[/] [eco32/] [tags/] [eco32-0.22/] [fp/] [implementation/] [mmix/] [mmix-config.w] - Diff between revs 15 and 21