1 |
15 |
hellwig |
% This file is part of the MMIXware package (c) Donald E Knuth 1999
|
2 |
|
|
@i boilerplate.w %<< legal stuff: PLEASE READ IT BEFORE MAKING ANY CHANGES!
|
3 |
|
|
|
4 |
|
|
\def\title{MMIXAL}
|
5 |
|
|
|
6 |
|
|
\def\MMIX{\.{MMIX}}
|
7 |
|
|
\def\MMIXAL{\.{MMIXAL}}
|
8 |
|
|
\def\Hex#1{\hbox{$^{\scriptscriptstyle\#}$\tt#1}} % experimental hex constant
|
9 |
|
|
\def\<#1>{\hbox{$\langle\,$#1$\,\rangle$}}\let\is=\longrightarrow
|
10 |
|
|
\def\bull{\smallbreak\textindent{$\bullet$}}
|
11 |
|
|
@s and normal @q unreserve a C++ keyword @>
|
12 |
|
|
@s or normal @q unreserve a C++ keyword @>
|
13 |
|
|
@s xor normal @q unreserve a C++ keyword @>
|
14 |
|
|
|
15 |
|
|
\ifx\exotic+
|
16 |
|
|
\font\heb=heb8 at 10pt
|
17 |
|
|
\font\rus=lhwnr8
|
18 |
|
|
\input unicode
|
19 |
|
|
\unicodeptsize=8pt
|
20 |
|
|
\fi
|
21 |
|
|
|
22 |
|
|
@* Definition of MMIXAL. This program takes input written in \MMIXAL,
|
23 |
|
|
the \MMIX\ assembly language, and translates it
|
24 |
|
|
@^assembly language@>
|
25 |
|
|
into binary files that can be loaded and executed
|
26 |
|
|
on \MMIX\ simulators. \MMIXAL\ is much simpler than the ``industrial
|
27 |
|
|
strength'' assembly languages that computer manufacturers usually provide,
|
28 |
|
|
because it is primarily intended for the simple demonstration programs
|
29 |
|
|
in {\sl The Art of Computer Programming}. Yet it tries to have enough
|
30 |
|
|
features to serve also as the back end of compilers for \CEE/ and other
|
31 |
|
|
high-level languages.
|
32 |
|
|
|
33 |
|
|
Instructions for using the program appear at the end of this document.
|
34 |
|
|
First we will discuss the input and output languages in detail; then we'll
|
35 |
|
|
consider the translation process, step by step; then we'll put everything
|
36 |
|
|
together.
|
37 |
|
|
|
38 |
|
|
@ A program in \MMIXAL\ consists of a series of {\it lines}, each of which
|
39 |
|
|
usually contains a single instruction. However, lines with no instructions are
|
40 |
|
|
possible, and so are lines with two or more instructions.
|
41 |
|
|
|
42 |
|
|
Each instruction has
|
43 |
|
|
three parts called its label field, opcode field, and operand field; these
|
44 |
|
|
fields are separated from each other by one or more spaces.
|
45 |
|
|
The label field, which is often empty, consists of all characters up to the
|
46 |
|
|
first blank space. The opcode field, which is never empty, runs from the first
|
47 |
|
|
nonblank after the label to the next blank space. The operand field, which
|
48 |
|
|
again might be empty, runs from the next nonblank character (if any) to the
|
49 |
|
|
first blank or semicolon that isn't part of a string or character constant.
|
50 |
|
|
If the operand field is followed by a semicolon, possibly with intervening
|
51 |
|
|
blanks, a new instruction begins immediately after the semicolon; otherwise
|
52 |
|
|
the rest of the line is ignored. The end of a line is treated as a blank space
|
53 |
|
|
for the purposes of these rules, with the additional proviso that
|
54 |
|
|
string or character constants are not allowed to extend from one line to
|
55 |
|
|
another.
|
56 |
|
|
|
57 |
|
|
The label field must begin with a letter or a digit; otherwise the entire
|
58 |
|
|
line is treated as a comment. Popular ways to introduce comments,
|
59 |
|
|
either at the beginning of a line or after the operand field, are to
|
60 |
|
|
precede them by the character \.\% as in \TeX, or by \.{//} as in \CPLUSPLUS/;
|
61 |
|
|
\MMIXAL\ is not very particular. However, Lisp-style comments introduced
|
62 |
|
|
by single semicolons will fail if they follow an instruction, because
|
63 |
|
|
they will be assumed to introduce another instruction.
|
64 |
|
|
|
65 |
|
|
@ \MMIXAL\ has no built-in macro capability, nor does it know how to
|
66 |
|
|
include header files and such things. But users can run their files
|
67 |
|
|
through a standard \CEE/ preprocessor to obtain \MMIXAL\ programs in which
|
68 |
|
|
macros and such things have been expanded. (Caution: The preprocessor also
|
69 |
|
|
removes \CEE/-style comments, unless it is told not to do so.)
|
70 |
|
|
Literate programming tools could also be used for preprocessing.
|
71 |
|
|
@^C preprocessor@>
|
72 |
|
|
@^literate programming@>
|
73 |
|
|
|
74 |
|
|
If a line begins with the special form `\.\# \ \',
|
75 |
|
|
this program interprets it as a {\it line directive\/} emitted by a
|
76 |
|
|
preprocessor. For example,
|
77 |
|
|
$$\leftline{\indent\.{\# 13 "foo.mms"}}$$
|
78 |
|
|
means that the following line was line 13 in the user's source file
|
79 |
|
|
\.{foo.mms}. Line directives allow us to correlate errors with the
|
80 |
|
|
user's original file; we also pass them to the output, for use by
|
81 |
|
|
simulators and debuggers.
|
82 |
|
|
@^line directives@>
|
83 |
|
|
|
84 |
|
|
@ \MMIXAL\ deals primarily with {\it symbols\/} and {\it constants}, which it
|
85 |
|
|
interprets and combines to form machine language instructions and data.
|
86 |
|
|
Constants are simplest, so we will discuss them first.
|
87 |
|
|
|
88 |
|
|
A {\it decimal constant\/} is a sequence of digits, representing a number in
|
89 |
|
|
radix~10. A~{\it hexadecimal constant\/} is a sequence of hexadecimal digits,
|
90 |
|
|
preceded by~\.\#, representing a number in radix~16:
|
91 |
|
|
$$\vbox{\halign{$#$\hfil\cr
|
92 |
|
|
\\is\.0\mid\.1\mid\.2\mid\.3\mid\.4\mid
|
93 |
|
|
\.5\mid\.6\mid\.7\mid\.8\mid\.9\cr
|
94 |
|
|
\\is\\mid\.A\mid\.B\mid\.C\mid\.D\mid\.E\mid\.F\mid
|
95 |
|
|
\.a\mid\.b\mid\.c\mid\.d\mid\.e\mid\.f\cr
|
96 |
|
|
\\is\\mid\\\cr
|
97 |
|
|
\\is\.\#\\mid\\\cr
|
98 |
|
|
}}$$
|
99 |
|
|
Constants whose value is $2^{64}$ or more are reduced modulo $2^{64}$.
|
100 |
|
|
|
101 |
|
|
@ A {\it character constant\/} is a single character enclosed in
|
102 |
|
|
single quote marks; it denotes the {\mc ASCII} or Unicode number
|
103 |
|
|
@^Unicode@>
|
104 |
|
|
corresponding to that character. For example, \.{'a'}
|
105 |
|
|
represents the constant \.{\#61}, also known as~\.{97}. The quoted character
|
106 |
|
|
can be
|
107 |
|
|
anything except the character that the \CEE/ library calls \.{\\n} or {\it
|
108 |
|
|
newline}; that character should be represented as \.{\#a}.
|
109 |
|
|
$$\vbox{\halign{$#$\hfil\cr
|
110 |
|
|
\\is\.'\\.'\cr
|
111 |
|
|
\\is\\mid\\mid\
|
112 |
|
|
\cr}}$$
|
113 |
|
|
Notice that \.{'''} represents a single quote, the code \.{\#27}; and
|
114 |
|
|
\.{'\\'} represents a backslash, the code \.{\#5c}. \MMIXAL~characters are
|
115 |
|
|
never ``quoted'' by backslashes as in the \CEE/~language.
|
116 |
|
|
|
117 |
|
|
In the present implementation
|
118 |
|
|
a character constant will always be at most 255, since wyde character
|
119 |
|
|
input is not supported.
|
120 |
|
|
\ifx\exotic+ But if the input were in Unicode one could write,
|
121 |
|
|
say, \.'{\heb\char"40}\.' or \.'{\rus ZH}\.' for \.{\#05d0} or
|
122 |
|
|
\.{\#0416}. \fi
|
123 |
|
|
The present program
|
124 |
|
|
does not support Unicode directly because basic software for inputting and
|
125 |
|
|
outputting 16-bit characters was still in a primitive state at the time of
|
126 |
|
|
writing. But the data structures below are designed so that a change to
|
127 |
|
|
Unicode will not be difficult when the time is ripe.
|
128 |
|
|
|
129 |
|
|
@ A {\it string constant\/} like \.{"Hello"} is an abbreviation for
|
130 |
|
|
a sequence of one or more character constants separated by commas:
|
131 |
|
|
\.{'H','e','l','l','o'}.
|
132 |
|
|
Any character except newline or the double quote mark~\."
|
133 |
|
|
can appear between the double quotes of a string constant.
|
134 |
|
|
\ifx\exotic+ Similarly,
|
135 |
|
|
\."\Uni1.08:24:24:-1:20% Unicode char "9ad8
|
136 |
|
|
<002000001800000806ffffff00000002004003ffe00300e00300c00300c003ffc0%
|
137 |
|
|
0300c02000043ffffe30000e31008c31ffcc3181cc31818c31818c31ff8c31818c3%
|
138 |
|
|
0007c300018>%
|
139 |
|
|
\thinspace\Uni1.08:24:24:-1:20% Unicode char "5fb7
|
140 |
|
|
<1c038018030018030631ffff30060067860446fffe86ccce0ccccc0ccccc18cccc%
|
141 |
|
|
18fffc38c00c38001878fffc58040098030818398618b18318b00b19b0081b300c1%
|
142 |
|
|
b3ffc181ff8>%
|
143 |
|
|
\thinspace\Uni1.08:24:24:-1:20% Unicode char "7eb3
|
144 |
|
|
<0601c00e01800c018018018018218231bfff61b187433186ff3186c631860c3186%
|
145 |
|
|
18334630332663b6367e341660380600300600300603b0061e3006f03006c030060%
|
146 |
|
|
0303e00300c>%
|
147 |
|
|
\kern.1em\." is an abbreviation for
|
148 |
|
|
\.'\Uni1.08:24:24:-1:20% Unicode char "9ad8
|
149 |
|
|
<002000001800000806ffffff00000002004003ffe00300e00300c00300c003ffc0%
|
150 |
|
|
0300c02000043ffffe30000e31008c31ffcc3181cc31818c31818c31ff8c31818c3%
|
151 |
|
|
0007c300018>%
|
152 |
|
|
\.{','}\Uni1.08:24:24:-1:20% Unicode char "5fb7
|
153 |
|
|
<1c038018030018030631ffff30060067860446fffe86ccce0ccccc0ccccc18cccc%
|
154 |
|
|
18fffc38c00c38001878fffc58040098030818398618b18318b00b19b0081b300c1%
|
155 |
|
|
b3ffc181ff8>%
|
156 |
|
|
\.{','}\Uni1.08:24:24:-1:20% Unicode char "7eb3
|
157 |
|
|
<0601c00e01800c018018018018218231bfff61b187433186ff3186c631860c3186%
|
158 |
|
|
18334630332663b6367e341660380600300600300603b0061e3006f03006c030060%
|
159 |
|
|
0303e00300c>%
|
160 |
|
|
\.' (namely \.{\#9ad8,\#5fb7,\#7eb3}) when Unicode is supported.
|
161 |
|
|
@^Unicode@>
|
162 |
|
|
\fi
|
163 |
|
|
|
164 |
|
|
@ A {\it symbol\/} in \MMIXAL\ is any sequence of letters and digits,
|
165 |
|
|
beginning with a letter. A~colon~`\.:' or underscore symbol `\.\_'
|
166 |
|
|
is regarded as a letter, for purposes of this definition.
|
167 |
|
|
All extended-ASCII characters like `{\tt \'e}',
|
168 |
|
|
whose 8-bit code exceeds 126, are also treated as letters.
|
169 |
|
|
$$\vbox{\halign{$#$\hfil\cr
|
170 |
|
|
\\is\.A\mid\.B\mid\cdots\mid\.Z\mid\.a\mid\.b\mid\cdots\mid\.z\mid
|
171 |
|
|
\.:\mid\.\_\mid\<{character with code value $>126$}>\cr
|
172 |
|
|
\\is\\mid\\\mid\\\cr
|
173 |
|
|
}}$$
|
174 |
|
|
|
175 |
|
|
In future implementations, when \MMIXAL\ is used with Unicode,
|
176 |
|
|
@^Unicode@>
|
177 |
|
|
all wyde characters whose 16-bit code exceeds 126 will be regarded
|
178 |
|
|
as letters; thus \MMIXAL\ symbols will be able to involve Greek letters or
|
179 |
|
|
Chinese characters or thousands of other glyphs.
|
180 |
|
|
@ A symbol is said to
|
181 |
|
|
be {\it fully qualified\/} if it begins with a colon. Every symbol
|
182 |
|
|
that is not fully qualified is an abbreviation for the fully qualified
|
183 |
|
|
symbol obtained by placing the {\it current prefix\/} in front of it;
|
184 |
|
|
the current prefix is always fully qualified. At the beginning of an
|
185 |
|
|
\MMIXAL\ program the current prefix is simply the single character~`\.:',
|
186 |
|
|
but the user can change it with the \.{PREFIX} command. For example,
|
187 |
|
|
$$\vbox{\halign{&\quad\tt#\hfil\cr
|
188 |
|
|
ADD&x,y,z&\% means ADD :x,:y,:z\cr
|
189 |
|
|
PREFIX&Foo:&\% current prefix is :Foo:\cr
|
190 |
|
|
ADD&x,y,z&\% means ADD :Foo:x,:Foo:y,:Foo:z\cr
|
191 |
|
|
PREFIX&Bar:&\% current prefix is :Foo:Bar:\cr
|
192 |
|
|
ADD&:x,y,:z&\% means ADD :x,:Foo:Bar:y,:z\cr
|
193 |
|
|
PREFIX&:&\% current prefix reverts to :\cr
|
194 |
|
|
ADD&x,Foo:Bar:y,Foo:z&\% means ADD :x,:Foo:Bar:y,:Foo:z\cr
|
195 |
|
|
}}$$
|
196 |
|
|
This mechanism allows large programs to avoid conflicts between symbol names,
|
197 |
|
|
when parts of the program are independent and/or written by different users.
|
198 |
|
|
The current prefix conventionally ends with a colon, but this convention
|
199 |
|
|
need not be obeyed.
|
200 |
|
|
|
201 |
|
|
@ A {\it local symbol\/} is a decimal digit followed by one of the
|
202 |
|
|
letters \.B, \.F, or~\.H, meaning ``backward,'' ``forward,'' or ``here'':
|
203 |
|
|
$$\vbox{\halign{$#$\hfill\cr
|
204 |
|
|
\\is\\,\.B\mid\\,\.F\cr
|
205 |
|
|
\\is\\,\.H\cr
|
206 |
|
|
}}$$
|
207 |
|
|
The \.B and \.F forms are permitted only in the operand field of \MMIXAL\
|
208 |
|
|
instructions; the \.H form is permitted only in the label field. A local
|
209 |
|
|
operand such as~\.{2B} stands for the last local label~\.{2H}
|
210 |
|
|
in instructions before the current one, or 0 if \.{2H} has not yet appeared
|
211 |
|
|
as a label. A~local operand such as~\.{2F} stands
|
212 |
|
|
for the first \.{2H} in instructions after the current one. Thus, in a
|
213 |
|
|
sequence such as
|
214 |
|
|
$$\vbox{\halign{\tt#\cr 2H JMP 2F\cr 2H JMP 2B\cr}}$$
|
215 |
|
|
the first instruction jumps to the second and the second jumps to the first.
|
216 |
|
|
|
217 |
|
|
Local symbols are useful for references to nearby points of a program, in
|
218 |
|
|
cases where no meaningful name is appropriate. They can also be useful
|
219 |
|
|
in special situations where a redefinable symbol is needed; for example,
|
220 |
|
|
an instruction like
|
221 |
|
|
$$\.{9H IS 9B+1}$$
|
222 |
|
|
will maintain a running counter.
|
223 |
|
|
|
224 |
|
|
@ Each symbol receives a value called its {\it equivalent\/} when it
|
225 |
|
|
appears in the label field of an instruction; it is said to be {\it defined\/}
|
226 |
|
|
after its equivalent has been established. A few symbols, like \.{rA}
|
227 |
|
|
and \.{ROUND\_OFF} and \.{Fopen},
|
228 |
|
|
are predefined because they refer to fixed constants
|
229 |
|
|
associated with the \MMIX\ hardware or its rudimentary operating system;
|
230 |
|
|
otherwise every symbol should be
|
231 |
|
|
defined exactly once. The two appearances of `\.{2H}' in the example
|
232 |
|
|
above do not violate this rule, because the second `\.{2H}' is not the
|
233 |
|
|
same symbol as the first.
|
234 |
|
|
|
235 |
|
|
A predefined symbol can be redefined (given a new equivalent). After it
|
236 |
|
|
has been redefined it acts like an ordinary symbol and cannot be
|
237 |
|
|
redefined again. A complete list of the predefined symbols appears
|
238 |
|
|
in the program listing below.
|
239 |
|
|
@^predefined symbols@>
|
240 |
|
|
|
241 |
|
|
Equivalents are either {\it pure\/} or {\it register numbers}. A pure
|
242 |
|
|
equivalent is an unsigned octabyte, but a register number
|
243 |
|
|
equivalent is a one-byte value, between 0 and~255.
|
244 |
|
|
A dollar sign is used to change a pure number into a register number;
|
245 |
|
|
for example, `\.{\$20}' means register number~20.
|
246 |
|
|
|
247 |
|
|
@ Constants and symbols are combined into {\it expressions\/} in a simple way:
|
248 |
|
|
$$\vbox{\halign{$#$\hfil\cr
|
249 |
|
|
\\is\\mid\\mid\\mid
|
250 |
|
|
\.{@@}\mid\cr
|
251 |
|
|
\hskip12pc\.(\\.)\mid\\\cr
|
252 |
|
|
\\is\\mid
|
253 |
|
|
\\\\cr
|
254 |
|
|
\\is\\mid\\\\cr
|
255 |
|
|
\\is\.+\mid\.-\mid\.\~\mid\.\$\mid\.\&\cr
|
256 |
|
|
\\is\.*\mid\./\mid\.{//}\mid\.\%\mid\.{<<}\mid\.{>>}
|
257 |
|
|
\mid\.\&\cr
|
258 |
|
|
\\is\.+\mid\.-\mid\.{\char'174}\mid\.\^\cr
|
259 |
|
|
}}$$
|
260 |
|
|
Each expression has a value that is either pure or a register number.
|
261 |
|
|
The character \.{@@} stands for the current location, which is always pure.
|
262 |
|
|
The unary operators
|
263 |
|
|
\.+, \.-, \.\~, \.\$, and \.\& mean, respectively, ``do nothing,''
|
264 |
|
|
``subtract from zero,'' ``complement the bits,'' ``change from pure value
|
265 |
|
|
to register number,'' and ``take the serial number.'' Only the first of these,
|
266 |
|
|
\.+, can be applied to a register number. The last unary operator, \.\&,
|
267 |
|
|
applies only to symbols, and it is of interest primarily to system programmers;
|
268 |
|
|
it converts a symbol to the unique positive integer that is used to identify
|
269 |
|
|
it in the binary file output by \MMIXAL.
|
270 |
|
|
@^serial number@>
|
271 |
|
|
|
272 |
|
|
Binary operators come in two flavors, strong and weak. The strong ones
|
273 |
|
|
are essentially concerned with multiplication or division: \.{x*y},
|
274 |
|
|
\.{x/y}, \.{x//y}, \.{x\%y}, \.{x<>y}, and \.{x\&y}
|
275 |
|
|
stand respectively for
|
276 |
|
|
$(x\times y)\bmod2^{64}$ (multiplication), $\lfloor x/y\rfloor$ (division),
|
277 |
|
|
$\lfloor2^{64}x/y\rfloor$ (fractional division), $x\bmod y$ (remainder),
|
278 |
|
|
$(x\times2^y)\bmod2^{64}$ (left~shift), $\lfloor x/2^y\rfloor$
|
279 |
|
|
(right shift), and $x\land y$ (bitwise and) on unsigned octabytes.
|
280 |
|
|
Division is legal only if $y>0$; fractional division is
|
281 |
|
|
legal only if $x
|
282 |
|
|
applied to register numbers.
|
283 |
|
|
|
284 |
|
|
The weak binary operations \.{x+y}, \.{x-y}, \.{x\char'174 y}, and
|
285 |
|
|
\.{x\^y} stand respectively for $(x+y)\bmod2^{64}$ (addition),
|
286 |
|
|
$(x-y)\bmod2^{64}$ (subtraction),
|
287 |
|
|
$x\lor y$ (bitwise or), and $x\oplus y$ (bitwise exclusive-or) on
|
288 |
|
|
unsigned octabytes. These operations can be applied to register
|
289 |
|
|
numbers only in four contexts: $\+\$, $\+\$,
|
290 |
|
|
$\-\$
|
291 |
|
|
and $\-\$. For example, if \.{x} denotes \.{\$1} and
|
292 |
|
|
\.{y} denotes \.{\$10}, then \.{x+3} and \.{3+x} denote \.{\$4}, and
|
293 |
|
|
\.{y-x} denotes the pure value \.{9}.
|
294 |
|
|
|
295 |
|
|
Register numbers within expressions are allowed to be
|
296 |
|
|
arbitrary octabytes, but a register number assigned as the
|
297 |
|
|
equivalent of a symbol should not exceed 255.
|
298 |
|
|
|
299 |
|
|
(Incidentally, one might ask why the designer of \MMIXAL\ did not simply
|
300 |
|
|
adopt the existing rules of \CEE/ for expressions. The primary reason is that
|
301 |
|
|
the designers of \CEE/ chose to give \.{<<}, \.{>>}, and \.\& a lower
|
302 |
|
|
precedence than~\.+; but in \MMIXAL\ we want to be able to write things
|
303 |
|
|
like \.{o<<24+x<<16+y<<8+z} or \.{@@+yz<<2} or \.{@@+(\#100-@@)\&\#ff}.
|
304 |
|
|
Since the conventions of \CEE/ were inappropriate, it was better
|
305 |
|
|
to make a clean break, not pretending to have a close relationship
|
306 |
|
|
with that language. The new rules are quite easily memorized,
|
307 |
|
|
because \MMIXAL\ has just two levels of precedence, and the strong binary
|
308 |
|
|
operations are all essentially multiplicative by nature
|
309 |
|
|
while the weak binary operations are essentially additive.)
|
310 |
|
|
|
311 |
|
|
@ A symbol is called a {\it future reference\/} until it has been defined.
|
312 |
|
|
\MMIXAL\ restricts the use of future references, so that programs can
|
313 |
|
|
be assembled quickly in one pass over the input; therefore all
|
314 |
|
|
expressions can be evaluated when the \MMIXAL\ processor first sees them.
|
315 |
|
|
|
316 |
|
|
The restrictions are easily stated: Future references
|
317 |
|
|
cannot be used in expressions together with unary or binary operators (except
|
318 |
|
|
the unary~\.+, which does nothing); moreover, future references
|
319 |
|
|
can appear as operands only in instructions that have relative
|
320 |
|
|
addresses (namely branches, probable branches, \.{JMP}, \.{PUSHJ},
|
321 |
|
|
\.{GETA}) or in octabyte constants (the pseudo-operation \.{OCTA}).
|
322 |
|
|
Thus, for example, one can say \.{JMP}~\.{1F} or \.{JMP}~\.{1B-4}, but not
|
323 |
|
|
\.{JMP}~\.{1F-4}.
|
324 |
|
|
|
325 |
|
|
@ We noted earlier that each \MMIXAL\ instruction contains
|
326 |
|
|
a label field, an opcode field, and an operand field. The label field is
|
327 |
|
|
either empty or a symbol or local label; when it is nonempty, the
|
328 |
|
|
symbol or local label receives an equivalent. The operand field is
|
329 |
|
|
either empty or a sequence of expressions separated by commas; when
|
330 |
|
|
it is empty, it is equivalent to the simple operand field~`\.0'.
|
331 |
|
|
$$\vbox{\halign{$#$\hfil\cr
|
332 |
|
|
\\is\
|
333 |
|
|
\
|
334 |
|
|
\\is\\mid\\cr
|
335 |
|
|
\\is\\mid\\.,\\cr
|
336 |
|
|
}}$$
|
337 |
|
|
|
338 |
|
|
The opcode field either contains a symbolic \MMIX\ operation name (like
|
339 |
|
|
\.{ADD}), or an {\it alias operation}, or a {\it pseudo-operation}.
|
340 |
|
|
Alias operations are alternate names for \MMIX\ operations whose standard
|
341 |
|
|
names are inappropriate in certain contexts.
|
342 |
|
|
Pseudo-operations do not correspond
|
343 |
|
|
directly to \MMIX\ commands, but they govern the assembly process in
|
344 |
|
|
important ways.
|
345 |
|
|
|
346 |
|
|
There are two alias operations:
|
347 |
|
|
|
348 |
|
|
\bull \.{SET} \.{\$X,\$Y} is equivalent to \.{OR} \.{\$X,\$Y,0}; it sets
|
349 |
|
|
register~X to register~Y. Similarly, \.{SET} \.{\$X,Y} (when \.Y is
|
350 |
|
|
not a register) is equivalent to \.{SETL} \.{\$X,Y}.
|
351 |
|
|
@.SET@>
|
352 |
|
|
|
353 |
|
|
\bull \.{LDA} \.{\$X,\$Y,\$Z} is equivalent to \.{ADDU} \.{\$X,\$Y,\$Z};
|
354 |
|
|
it loads the address of memory location $\rm \$Y+\$Z$ into register~X.
|
355 |
|
|
Similarly, \.{LDA} \.{\$X,\$Y,Z} is equivalent to \.{ADDU} \.{\$X,\$Y,Z}.
|
356 |
|
|
@.LDA@>
|
357 |
|
|
|
358 |
|
|
\smallskip
|
359 |
|
|
The symbolic operation names for genuine \MMIX\ operations
|
360 |
|
|
should not include the suffix~\.I for an immediate operation or the suffix~\.B
|
361 |
|
|
for a backward jump; \MMIXAL\ determines such things automatically.
|
362 |
|
|
Thus, one never writes \.{ADDI} or \.{JMPB} in the source input to
|
363 |
|
|
\MMIXAL, although such opcodes might appear when a simulator or
|
364 |
|
|
debugger or disassembler is presenting a numeric instruction in symbolic form.
|
365 |
|
|
$$\vbox{\halign{$#$\hfil\cr
|
366 |
|
|
\\is\\mid\\cr
|
367 |
|
|
\hskip12pc\mid\\cr
|
368 |
|
|
\\is\.{TRAP}\mid\.{FCMP}\mid\cdots\mid\.{TRIP}\cr
|
369 |
|
|
\\is\.{SET}\mid\.{LDA}\cr
|
370 |
|
|
\\is\.{IS}\mid\.{LOC}\mid\.{PREFIX}\mid
|
371 |
|
|
\.{GREG}\mid\.{LOCAL}\mid\.{BSPEC}\mid\.{ESPEC}\cr
|
372 |
|
|
\hskip12pc\mid\.{BYTE}\mid\.{WYDE}\mid\.{TETRA}\mid\.{OCTA}\cr
|
373 |
|
|
}}$$
|
374 |
|
|
|
375 |
|
|
@ \MMIX\ operations like \.{ADD} require exactly three expressions as
|
376 |
|
|
operands. The first two must be register numbers. The third must be either a
|
377 |
|
|
register number or a pure number between 0 and~255; in the latter case,
|
378 |
|
|
\.{ADD} becomes \.{ADDI} in the assembled output. Thus, for example,
|
379 |
|
|
the command ``set register~1 to the sum of register~2 and register~3'' could be
|
380 |
|
|
expressed as
|
381 |
|
|
$$\.{ADD \$1,\$2,\$3}$$
|
382 |
|
|
or as, say,
|
383 |
|
|
$$\.{ADD x,y,y+1}$$
|
384 |
|
|
if the equivalent of \.x is \.{\$1} and the equivalent of \.y is \.{\$2}.
|
385 |
|
|
The command ``subtract 5 from register~1'' could be expressed as
|
386 |
|
|
$$\.{SUB \$1,\$1,5}$$
|
387 |
|
|
or as
|
388 |
|
|
$$\.{SUB x,x,5}$$
|
389 |
|
|
but not as `\.{SUBI} \.{\$1,\$1,5}' or `\.{SUBI} \.{x,x,5}'.
|
390 |
|
|
|
391 |
|
|
\MMIX\ operations like \.{FLOT} require either three operands
|
392 |
|
|
(register, pure, register/pure) or only two (register, register/pure).
|
393 |
|
|
In the first case the middle operand is the rounding mode, which is
|
394 |
|
|
best expressed in terms of the predefined symbolic values
|
395 |
|
|
\.{ROUND\_CURRENT}, \.{ROUND\_OFF}, \.{ROUND\_UP}, \.{ROUND\_DOWN},
|
396 |
|
|
\.{ROUND\_NEAR}, for $(0,1,2,3,4)$ respectively. In the second case
|
397 |
|
|
the middle operand is understood to be zero (namely,
|
398 |
|
|
\.{ROUND\_CURRENT}).
|
399 |
|
|
@:ROUND_OFF}\.{ROUND\_OFF@>
|
400 |
|
|
@:ROUND_UP}\.{ROUND\_UP@>
|
401 |
|
|
@:ROUND_DOWN}\.{ROUND\_DOWN@>
|
402 |
|
|
@:ROUND_NEAR}\.{ROUND\_NEAR@>
|
403 |
|
|
@:ROUND_CURRENT}\.{ROUND\_CURRENT@>
|
404 |
|
|
|
405 |
|
|
\MMIX\ operations like \.{SETL} or \.{INCH}, which involve a wyde
|
406 |
|
|
intermediate constant, require exactly two operands, (register, pure).
|
407 |
|
|
The value of the second operand should fit in two bytes.
|
408 |
|
|
|
409 |
|
|
\MMIX\ operations like \.{BNZ}, which mention a register and a
|
410 |
|
|
relative address, also require two operands. The first operand
|
411 |
|
|
should be a register number. The second operand should yield a result~$r$
|
412 |
|
|
in the range $-2^{16}\le r<2^{16}$ when the current location is subtracted
|
413 |
|
|
from it and the result is divided by~4. The second operand might also
|
414 |
|
|
be undefined; in that case, the eventual value must satisfy the
|
415 |
|
|
restriction stated for defined values. The opcodes \.{GETA} and
|
416 |
|
|
\.{PUSHJ} are similar, except that the first operand to \.{PUSHJ}
|
417 |
|
|
might also be pure (see below). The \.{JMP} operation is also
|
418 |
|
|
similar, but it has only one operand, and it allows the larger
|
419 |
|
|
address range $-2^{24}\le r<2^{24}$.
|
420 |
|
|
|
421 |
|
|
\MMIX\ operations that refer to memory, like \.{LDO} and \.{STHT} and \.{GO},
|
422 |
|
|
are treated like \.{ADD}
|
423 |
|
|
if they have three operands, except that the first operand should be
|
424 |
|
|
pure (not a register number) in the case of \.{PRELD}, \.{PREGO},
|
425 |
|
|
\.{PREST}, \.{STCO}, \.{SYNCD}, and \.{SYNCID}. These opcodes
|
426 |
|
|
also accept a special two-operand form, in which the second operand
|
427 |
|
|
stands for a {\it base address\/} and an immediate offset (see below).
|
428 |
|
|
|
429 |
|
|
The first operand of \.{PUSHJ} and \.{PUSHGO} can be either a pure
|
430 |
|
|
number or a register number. In the first case (`\.{PUSHJ}~\.{2,Sub}'
|
431 |
|
|
or `\.{PUSHGO}~\.{2,Sub}')
|
432 |
|
|
the programmer might be thinking ``let's push down two registers'';
|
433 |
|
|
in the second case (`\.{PUSHJ}~\.{\$2,Sub}' or `\.{PUSHGO}~\.{\$2,Sub}')
|
434 |
|
|
the programmer might be thinking ``let's make register~2 the hole
|
435 |
|
|
position for this subroutine call.'' Both cases result in the same
|
436 |
|
|
assembled output.
|
437 |
|
|
|
438 |
|
|
The remaining \MMIX\ opcodes are idiosyncratic:
|
439 |
|
|
$$\def\\{{\rm\quad or\quad}}
|
440 |
|
|
\vbox{\halign{\tt#\hfill\cr
|
441 |
|
|
NEG r,p,z;\cr
|
442 |
|
|
PUT s,z;\cr
|
443 |
|
|
GET r,s;\cr
|
444 |
|
|
POP p,yz;\cr
|
445 |
|
|
RESUME xyz;\cr
|
446 |
|
|
SAVE r,0;\cr
|
447 |
|
|
UNSAVE r;\cr
|
448 |
|
|
SYNC xyz;\cr
|
449 |
|
|
TRAP x,y,z\\TRAP x,yz\\TRAP xyz;\cr
|
450 |
|
|
}}$$
|
451 |
|
|
\.{SWYM} and \.{TRIP} are like \.{TRAP}. Here \.s is an integer
|
452 |
|
|
between 0 and~31, preferably given by one of the predefined
|
453 |
|
|
symbols \.{rA}, \.{rB}, \dots~for special register codes;
|
454 |
|
|
\.r is a register number; \.p is a pure byte; \.x, \.y, and \.z are
|
455 |
|
|
either register numbers or pure bytes; \.{yz} and \.{xyz} are pure
|
456 |
|
|
values that fit respectively in two and three bytes.
|
457 |
|
|
|
458 |
|
|
All of these rules can be summarized by saying that \MMIXAL\ treats each
|
459 |
|
|
\MMIX\ opcode in the most natural way. When there are three operands,
|
460 |
|
|
they affect fields X,~Y, and~Z of the assembled \MMIX\ instruction;
|
461 |
|
|
when there are two operands, they affect fields X and~YZ;
|
462 |
|
|
when there is just one operand, it affects field XYZ.
|
463 |
|
|
|
464 |
|
|
@ In all cases when the opcode corresponds to an \MMIX\ operation,
|
465 |
|
|
the \MMIXAL\ instruction tells the assembler to carry out four steps:
|
466 |
|
|
(1)~Align the current location
|
467 |
|
|
so that it is a multiple of~4, by adding 1, 2, or~3 if necessary;
|
468 |
|
|
(2)~Define the equivalent of the label field to be the
|
469 |
|
|
current location, if the label is nonempty;
|
470 |
|
|
(3)~Evaluate the operands and assemble the specified \MMIX\ instruction into
|
471 |
|
|
the current location;
|
472 |
|
|
(4)~Increase the current location by~4.
|
473 |
|
|
|
474 |
|
|
@ Now let's consider the pseudo-operations, starting with the simplest cases.
|
475 |
|
|
|
476 |
|
|
\bull\
|
477 |
|
|
defines the value of the label to be the value of the expression,
|
478 |
|
|
which must not be a future reference. The expression may be
|
479 |
|
|
either pure or a register number.
|
480 |
|
|
|
481 |
|
|
\bull\
|
482 |
|
|
first defines the label to be the value of the current location, if the label
|
483 |
|
|
is nonempty. Then the current location is changed to the value of the
|
484 |
|
|
expression, which must be pure.
|
485 |
|
|
|
486 |
|
|
\smallskip For example, `\.{LOC} \.{\#1000}' will start assembling subsequent
|
487 |
|
|
instructions or data in location whose hexa\-decimal value is \Hex{1000}.
|
488 |
|
|
`\.X~\.{LOC}~\.{@@+500}' defines \.X to be the address of the first
|
489 |
|
|
of 500 bytes in memory; assembly will continue at location $\.X+500$.
|
490 |
|
|
The operation of aligning the current location to a multiple of~256,
|
491 |
|
|
if it is not already aligned in that way, can be expressed as
|
492 |
|
|
`\.{LOC}~\.{@@+(256-@@)\&255}'.
|
493 |
|
|
|
494 |
|
|
A less trivial example arises if we want to emit instructions and data into
|
495 |
|
|
two separate areas of memory, but we want to intermix them in the
|
496 |
|
|
\MMIXAL\ source file. We could start by defining \.{8H} and \.{9H}
|
497 |
|
|
to be the starting addresses of the instruction and data segments,
|
498 |
|
|
respectively. Then, a sequence of instructions could be enclosed
|
499 |
|
|
in `\.{LOC}~\.{8B}; \dots; \.{8H}~\.{IS}~\.{@@}'; a sequence of
|
500 |
|
|
data could be enclosed in `\.{LOC}~\.{9B}; \dots; \.{9H}~\.{IS}~\.{@@}'.
|
501 |
|
|
Any number of such sequences could then be combined.
|
502 |
|
|
Instead of the two pseudo-instructions `\.{8H}~\.{IS}~\.{@@;} \.{LOC}~\.{9B}'
|
503 |
|
|
one could in fact write simply `\.{8H}~\.{LOC}~\.{9B}' when
|
504 |
|
|
switching from instructions to data.
|
505 |
|
|
|
506 |
|
|
\bull \.{PREFIX} \
|
507 |
|
|
redefines the current prefix to be the given symbol (fully qualified).
|
508 |
|
|
The label field should be blank.
|
509 |
|
|
|
510 |
|
|
@ The next pseudo-operations assemble bytes, wydes, tetrabytes, or
|
511 |
|
|
octabytes of data.
|
512 |
|
|
|
513 |
|
|
\bull \
|
514 |
|
|
defines the label to be the current location, if the label field is nonempty;
|
515 |
|
|
then it assembles one byte for each expression in the expression list, and
|
516 |
|
|
advances the current location by the number of bytes. The expressions
|
517 |
|
|
should all be pure numbers that fit in one byte.
|
518 |
|
|
|
519 |
|
|
String constants are often used in such expression lists.
|
520 |
|
|
For example, if the current location is \Hex{1000}, the instruction
|
521 |
|
|
\.{BYTE}~\.{"Hello",0} assembles six bytes containing the constants
|
522 |
|
|
\.{'H'}, \.{'e'}, \.{'l'}, \.{'l'}, \.{'o'}, and~\.0 into locations
|
523 |
|
|
\Hex{1000}, \dots,~\Hex{1005}, and advances the current location
|
524 |
|
|
to \Hex{1006}.
|
525 |
|
|
|
526 |
|
|
\bull \
|
527 |
|
|
is similar, but it first makes the current location even, by adding~1 to it
|
528 |
|
|
if necessary. Then it defines the label (if a nonempty label is present),
|
529 |
|
|
and assembles each expression as a two-byte value. The current location
|
530 |
|
|
is advanced by twice the number of expressions in the list. The
|
531 |
|
|
expressions should all be pure numbers that fit in two bytes.
|
532 |
|
|
|
533 |
|
|
\bull \
|
534 |
|
|
is similar, but it aligns the current location to a multiple of~4
|
535 |
|
|
before defining the label; then it
|
536 |
|
|
assembles each expression as a four-byte value. The current location
|
537 |
|
|
is advanced by $4n$ if there are $n$~expressions in the list. Each
|
538 |
|
|
expression should be a pure number that fits in four bytes.
|
539 |
|
|
|
540 |
|
|
\bull \
|
541 |
|
|
is similar, but it first aligns the current location to a multiple of~8;
|
542 |
|
|
it assembles each expression as an eight-byte value. The current location
|
543 |
|
|
is advanced by $8n$ if there are $n$~expressions in the list. Any or all
|
544 |
|
|
of the expressions may be future references, but they should all
|
545 |
|
|
be defined as pure numbers eventually.
|
546 |
|
|
|
547 |
|
|
@ Global registers are important for accessing memory in \MMIX\ programs.
|
548 |
|
|
They could be allocated by hand, and defined with \.{IS} instructions,
|
549 |
|
|
but \MMIXAL\ provides a mechanism that is usually much more convenient:
|
550 |
|
|
|
551 |
|
|
\bull \
|
552 |
|
|
allocates a new global register, and assigns its number as the
|
553 |
|
|
equivalent of the label.
|
554 |
|
|
At the beginning of assembly, the current global threshold~G is~\$255.
|
555 |
|
|
Each distinct \.{GREG} instruction decreases~G by~1; the final value of~G will
|
556 |
|
|
be the initial value of~rG when the assembled program is loaded.
|
557 |
|
|
|
558 |
|
|
The value of the expression will be loaded into the global register
|
559 |
|
|
at the beginning of the program. {\it If this value is nonzero, it
|
560 |
|
|
should remain constant throughout the program execution\/}; such
|
561 |
|
|
global registers are considered to be {\it base addresses}. Two or
|
562 |
|
|
more base addresses with the same constant value are assigned to the
|
563 |
|
|
same global register number.
|
564 |
|
|
|
565 |
|
|
Base addresses can simplify memory accesses in an important way.
|
566 |
|
|
Suppose, for example, five octabyte values appear in a data segment,
|
567 |
|
|
and their addresses are called \.{AA}, \.{BB}, \.{CC}, \.{DD}, and
|
568 |
|
|
\.{EE}:
|
569 |
|
|
$$\.{AA LOC @@+8;BB LOC @@+8;CC LOC @@+8;DD LOC @@+8;EE LOC @@+8}$$
|
570 |
|
|
Then if you say \.{Base GREG AA}, you will be able to write simply
|
571 |
|
|
`\.{LDO}~\.{\$1,AA}' to bring \.{AA} into register~\.{\$1}, and
|
572 |
|
|
`\.{LDO}~\.{\$2,CC}' to bring \.{CC} into register~\.{\$2}.
|
573 |
|
|
|
574 |
|
|
Here's how it works: Whenever a memory operation such as
|
575 |
|
|
\.{LDO} or \.{STB} or \.{GO} has only two operands, the second
|
576 |
|
|
operand should be a pure number whose value can be expressed
|
577 |
|
|
as $b+\delta$, where $0\le\delta<256$ and $b$ is the value of
|
578 |
|
|
a base address in one of the preceding \.{GREG} commands. The \MMIXAL\
|
579 |
|
|
processor will find the closest base address and manufacture an
|
580 |
|
|
appropriate command. For example, the instruction `\.{LDO}~\.{\$2,CC}' in the
|
581 |
|
|
example of the preceding paragraph would be converted automatically to
|
582 |
|
|
`\.{LDO}~\.{\$2,Base,16}'.
|
583 |
|
|
|
584 |
|
|
If no base address is close enough, an error message will be
|
585 |
|
|
generated, unless this program is run with the \.{-x} option
|
586 |
|
|
on the command line. The \.{-x} option inserts additional instructions
|
587 |
|
|
if necessary, using global register~255, so that any address is
|
588 |
|
|
accessible. For example,
|
589 |
|
|
if there is no base address that allows \.{LDO}~\.{\$2,FF} to be
|
590 |
|
|
implemented in a single instruction, but if \.{FF} equals \.{Base+1000},
|
591 |
|
|
then the \.{-x} option would assemble two instructions,
|
592 |
|
|
$$\.{SETL \$255,1000; LDO \$2,Base,\$255}$$
|
593 |
|
|
in place of \.{LDO}~\.{\$2,FF}. Caution:~The \.{-x} feature makes the
|
594 |
|
|
number of actual \MMIX\ instructions hard to predict, so extreme care must
|
595 |
|
|
be used if your style of coding includes relative branch instructions
|
596 |
|
|
in dangerous forms like `\.{BNZ}~\.{x,@@+8}'.
|
597 |
|
|
|
598 |
|
|
This base address convention can be used also with the alias
|
599 |
|
|
operation~\.{LDA}. For example, `\.{LDA}~\.{\$3,CC}' loads the
|
600 |
|
|
@.LDA@>
|
601 |
|
|
address of \.{CC} into register~3, by assembling the instruction
|
602 |
|
|
`\.{ADDU}~\.{\$3,Base,16}'.
|
603 |
|
|
|
604 |
|
|
\MMIXAL\ also allows a two-operand form for memory operations such as
|
605 |
|
|
$$\hbox{\.{LDO} \.{\$1,\$2}}$$
|
606 |
|
|
to be an abbreviation for `\.{LDO} \.{\$1,\$2,0}'.
|
607 |
|
|
|
608 |
|
|
When \MMIXAL\ programs use subroutines with a memory stack in addition
|
609 |
|
|
to the built-in register stack, they usually begin with the
|
610 |
|
|
instructions `\.{sp}~\.{GREG}~\.{0;fp}~\.{GREG}~\.0'; these instructions
|
611 |
|
|
allocate a {\it stack pointer\/} \.{sp=\$254} and a {\it frame pointer\/}
|
612 |
|
|
\.{fp=\$253}. However, subroutine libraries are free to implement any
|
613 |
|
|
conventions for global registers and stacks that they like.
|
614 |
|
|
@^stack pointer@>
|
615 |
|
|
@^frame pointer@>
|
616 |
|
|
|
617 |
|
|
@ Short programs rarely run out of global registers, but long programs
|
618 |
|
|
need a mechanism to check that \.{GREG} hasn't been used too often.
|
619 |
|
|
The following pseudo-instruction provides the necessary safety valve:
|
620 |
|
|
|
621 |
|
|
\bull \.{LOCAL} \
|
622 |
|
|
ensures that the expression will be a local register in the program
|
623 |
|
|
being assembled. The expression should be a register number, and
|
624 |
|
|
the label field should be blank. At the close of
|
625 |
|
|
assembly, \MMIXAL\ will report an error if the final value of~G does
|
626 |
|
|
not exceed all register numbers that are declared local in this way.
|
627 |
|
|
|
628 |
|
|
A \.{LOCAL} instruction need not be given unless the register number
|
629 |
|
|
is 32 or~more. (\MMIX\ always considers \.{\$0} through \.{\$31} to be
|
630 |
|
|
local, so \MMIXAL\ implicitly acts as if the
|
631 |
|
|
instruction `\.{LOCAL}~\.{\$31}' were present.)
|
632 |
|
|
|
633 |
|
|
@ Finally, there are two pseudo-instructions to pass information
|
634 |
|
|
and hints to the loading routine and/or to debuggers that will be
|
635 |
|
|
using the assembled program.
|
636 |
|
|
|
637 |
|
|
\bull \.{BSPEC} \
|
638 |
|
|
begins ``special mode''; the \ should have a value that
|
639 |
|
|
fits in two bytes, and the label field should be blank.
|
640 |
|
|
|
641 |
|
|
\bull \.{ESPEC}
|
642 |
|
|
ends ``special mode''; the operand field is ignored, and the label
|
643 |
|
|
field should be blank.
|
644 |
|
|
|
645 |
|
|
\smallskip\noindent
|
646 |
|
|
All material assembled between \.{BSPEC} and \.{ESPEC} is passed
|
647 |
|
|
directly to the output, but not loaded as part of the assembled program.
|
648 |
|
|
Ordinary \MMIX\ instructions cannot appear in special mode; only the
|
649 |
|
|
pseudo-operations \.{IS}, \.{PREFIX}, \.{BYTE}, \.{WYDE}, \.{TETRA},
|
650 |
|
|
\.{OCTA}, \.{GREG}, and \.{LOCAL} are allowed. The operand of
|
651 |
|
|
\.{BSPEC} should have a value that fits in two bytes; this value
|
652 |
|
|
identifies the kind of data that follows. (For example, \.{BSPEC}~\.0
|
653 |
|
|
might introduce information about subroutine calling conventions at the
|
654 |
|
|
current location, and \.{BSPEC}~\.1 might introduce line numbers from
|
655 |
|
|
a high-level-language program that was compiled into the code at
|
656 |
|
|
the current place.
|
657 |
|
|
System routines often need to pass such information through an assembler
|
658 |
|
|
to the operating system, hence \MMIXAL\ provides a general-purpose conduit.)
|
659 |
|
|
|
660 |
|
|
@ A program should begin at the special symbolic location \.{Main}
|
661 |
|
|
@.Main@>
|
662 |
|
|
(more precisely, at the address corresponding to
|
663 |
|
|
the fully qualified symbol \.{:Main}).
|
664 |
|
|
This symbol always has serial number~1, and it must always be defined.
|
665 |
|
|
@^serial number@>
|
666 |
|
|
|
667 |
|
|
Locations should not receive assembled data more than once.
|
668 |
|
|
(More precisely, the loader will load the bitwise~xor of all the
|
669 |
|
|
data assembled for each byte position; but the general rule ``do not load
|
670 |
|
|
two things into the same byte'' is safest.)
|
671 |
|
|
All locations that do not receive assembled data are initially zero,
|
672 |
|
|
except that the loading routine will put register stack data into
|
673 |
|
|
segment~3, and the operating system may put command-line data and
|
674 |
|
|
debugger data into segment~2.
|
675 |
|
|
(The rudimentary \MMIX\ operating system starts a program
|
676 |
|
|
with the number of command-line arguments in~\$0, and a pointer to
|
677 |
|
|
the beginning of an array of argument pointers in~\$1.)
|
678 |
|
|
Segments 2 and 3 should not get assembled data, unless the
|
679 |
|
|
user is a true hacker who is willing to take the risk that such data
|
680 |
|
|
might crash the system.
|
681 |
|
|
|
682 |
|
|
@* Binary MMO output. When the \MMIXAL\ processor assembles a file
|
683 |
|
|
called \.{foo.mms}, it produces a binary output file called \.{foo.mmo}.
|
684 |
|
|
(The suffix \.{mms} stands for ``\MMIX\ symbolic,'' and \.{mmo} stands
|
685 |
|
|
for ``\MMIX\ object.'') Such \.{mmo} files have a simple structure
|
686 |
|
|
consisting of a sequence of tetrabytes. Some of the tetrabytes are
|
687 |
|
|
instructions to a loading routine; others are data to be loaded.
|
688 |
|
|
@^object files@>
|
689 |
|
|
|
690 |
|
|
Loader instructions are distinguished from tetrabytes of data by their
|
691 |
|
|
first (most significant) byte, which has the special escape-code value
|
692 |
|
|
\Hex{98}, called |mm| in the program below. This code value corresponds
|
693 |
|
|
to \MMIX's opcode \.{LDVTS}, which is unlikely to occur in tetras of
|
694 |
|
|
data. The second byte~X of a loader instruction is the loader opcode,
|
695 |
|
|
called the {\it lopcode}. The third and fourth bytes, Y~and~Z, are
|
696 |
|
|
operands. Sometimes they are combined into a single 16-bit operand called~YZ.
|
697 |
|
|
@^lopcodes@>
|
698 |
|
|
|
699 |
|
|
@d mm 0x98
|
700 |
|
|
|
701 |
|
|
@ A small, contrived example will help explain the basic ideas of \.{mmo}
|
702 |
|
|
format. Consider the following input file, called \.{test.mms}:
|
703 |
|
|
$$\obeyspaces\vbox{\halign{\tt#\hfil\cr
|
704 |
|
|
\% A peculiar example of MMIXAL\cr
|
705 |
|
|
\ LOC Data\_Segment \% location \#2000000000000000\cr
|
706 |
|
|
\ OCTA 1F \% a future reference\cr
|
707 |
|
|
a GREG @@ \% \$254 is base address for ABCD\cr
|
708 |
|
|
ABCD BYTE "ab" \% two bytes of data\cr
|
709 |
|
|
\ LOC \#123456789 \% switch to the instruction segment\cr
|
710 |
|
|
Main JMP 1F \% another future reference\cr
|
711 |
|
|
\ LOC @@+\#4000 \% skip past 16384 bytes\cr
|
712 |
|
|
2H LDB \$3,ABCD+1 \% use the base address\cr
|
713 |
|
|
\ BZ \$3,1F; TRAP \% and refer to the future again\cr
|
714 |
|
|
\# 3 "foo.mms" \% this comment is a line directive\cr
|
715 |
|
|
\ LOC 2B-4*10 \% move 10 tetras before previous location\cr
|
716 |
|
|
1H JMP 2B \% resolve previous references to 1F\cr
|
717 |
|
|
\ BSPEC 5 \% begin special data of type 5\cr
|
718 |
|
|
\ TETRA {\AM}a<<8 \% four bytes of special data\cr
|
719 |
|
|
\ WYDE a-\$0 \% two more bytes of special data\cr
|
720 |
|
|
\ ESPEC \% end a special data packet\cr
|
721 |
|
|
\ LOC ABCD+2 \% resume the data segment\cr
|
722 |
|
|
\ BYTE "cd",\#98 \% assemble three more bytes of data\cr
|
723 |
|
|
}}$$
|
724 |
|
|
It defines a silly program that essentially puts \.{'b'} into register~3;
|
725 |
|
|
the program halts when it gets to an all-zero \.{TRAP} instruction
|
726 |
|
|
following the~\.{BZ}. But the assembled output of this file illustrates most
|
727 |
|
|
of the features of \MMIX\ objects, and in fact \.{test.mms} was the
|
728 |
|
|
first test file tried by the author when the \MMIXAL\ processor was originally
|
729 |
|
|
written.
|
730 |
|
|
|
731 |
|
|
The binary output file \.{test.mmo} assembled from \.{test.mms} consists
|
732 |
|
|
of the following tetrabytes, shown in hexadecimal notation with brief
|
733 |
|
|
comments. Fuller explanations
|
734 |
|
|
appear with the descriptions of individual lopcodes below.
|
735 |
|
|
$$
|
736 |
|
|
\halign{\hskip.5in\tt#&\quad#\hfil\cr
|
737 |
|
|
98090101&|lop_pre| $1,1$ (preamble, version 1, 1 tetra)\cr
|
738 |
|
|
36f4a363&(the file creation time)\cr
|
739 |
|
|
% Sat Mar 20 23:44:35 1999
|
740 |
|
|
98012001&|lop_loc| $\Hex{20},1$ (data segment, 1 tetra)\cr
|
741 |
|
|
00000000&(low tetrabyte of address in data segment)\cr
|
742 |
|
|
00000000&(high tetrabyte of \.{OCTA} \.{1F})\cr
|
743 |
|
|
00000000&(low tetrabyte, will be fixed up later)\cr
|
744 |
|
|
61620000&(\.{"ab"}, padded with trailing zeros)\cr
|
745 |
|
|
\noalign{\penalty-200}
|
746 |
|
|
98010002&|lop_loc| $0,2$ (instruction segment, 2 tetras)\cr
|
747 |
|
|
00000001&(high tetrabyte of address in instruction segment)\cr
|
748 |
|
|
2345678c&(low tetrabyte of address, after alignment)\cr
|
749 |
|
|
98060002&|lop_file| $0,2$ (file name 0, 2 tetras)\cr
|
750 |
|
|
74657374&(\.{"test"})\cr
|
751 |
|
|
2e6d6d73&(\.{".mms"})\cr
|
752 |
|
|
98070007&|lop_line| 7 (line 7 of the current file)\cr
|
753 |
|
|
f0000000&(\.{JMP} \.{1F}, will be fixed up later)\cr
|
754 |
|
|
98024000&|lop_skip| \Hex{4000} (advance 16384 bytes)\cr
|
755 |
|
|
98070009&|lop_line| 9 (line 9 of the current file)\cr
|
756 |
|
|
8103fe01&(\.{LDB} \.{\$3,b,1}, uses base address \.b)\cr
|
757 |
|
|
42030000&(\.{BZ} \.{\$3,1F}, will be fixed later)\cr
|
758 |
|
|
9807000a&|lop_line| 10 (stay on line 10)\cr
|
759 |
|
|
00000000&(\.{TRAP})\cr
|
760 |
|
|
98010002&|lop_loc| $0,2$ (instruction segment, 2 tetras)\cr
|
761 |
|
|
00000001&(high tetrabyte of address in instruction segment)\cr
|
762 |
|
|
2345a768&(low tetrabyte of address \.{1H})\cr
|
763 |
|
|
98050010&|lop_fixrx| 16 (fix 16-bit relative address)\cr
|
764 |
|
|
0100fff5&(fixup for location \.{@@-4*-11})\cr
|
765 |
|
|
98040ff7&|lop_fixr| \Hex{ff7} (fix \.{@@-4*\#ff7})\cr
|
766 |
|
|
98032001&|lop_fixo| $\Hex{20},1$ (data segment, 1 tetra)\cr
|
767 |
|
|
00000000&(low tetrabyte of data segment address to fix)\cr
|
768 |
|
|
98060102&|lop_file| $1,2$ (file name 1, 2 tetras)\cr
|
769 |
|
|
666f6f2e&(\.{"foo."})\cr
|
770 |
|
|
6d6d7300&(\.{"mms",0})\cr
|
771 |
|
|
98070004&|lop_line| 4 (line 4 of the current file)\cr
|
772 |
|
|
f000000a&(\.{JMP} \.{2B})\cr
|
773 |
|
|
98080005&|lop_spec| 5 (begin special data of type 5)\cr
|
774 |
|
|
00000200&(\.{TETRA} \.{\&a<<8})\cr
|
775 |
|
|
00fe0000&(\.{WYDE} \.{a-\$0})\cr
|
776 |
|
|
98012001&|lop_loc| $\Hex{20},1$ (data segment, 1 tetra)\cr
|
777 |
|
|
0000000a&(low tetrabyte of address in data segment)\cr
|
778 |
|
|
00006364&(\.{"cd"} with leading zeros, because of alignment)\cr
|
779 |
|
|
98000001&|lop_quote| (don't treat next tetrabyte as a lopcode)\cr
|
780 |
|
|
98000000&(\.{BYTE} \.{\#98}, padded with trailing zeros)\cr
|
781 |
|
|
980a00fe&|lop_post| \$254 (begin postamble, G is 254)\cr
|
782 |
|
|
20000000&(high tetrabyte of the initial contents of \$254)\cr
|
783 |
|
|
00000008&(low tetrabyte of base address \$254)\cr
|
784 |
|
|
00000001&(high tetrabyte of the initial contents of \$255)\cr
|
785 |
|
|
2345678c&(low tetrabyte of \$255, is address of \.{Main})\cr
|
786 |
|
|
980b0000&|lop_stab| (begin symbol table)\cr
|
787 |
|
|
203a5040&(compressed form for symbol table as a ternary trie)\cr
|
788 |
|
|
50404020\cr
|
789 |
|
|
41204220\cr
|
790 |
|
|
43094408\cr
|
791 |
|
|
83404020&(\.{ABCD} = \Hex{2000000000000008}, serial 3)\cr
|
792 |
|
|
4d206120\cr
|
793 |
|
|
69056e01\cr
|
794 |
|
|
2345678c\cr
|
795 |
|
|
81400f61&(\.{Main} = \Hex{000000012345678c}, serial 1)\cr
|
796 |
|
|
fe820000&(\.{a} = \$254, serial 2)\cr
|
797 |
|
|
980c000a&|lop_end| (end symbol table, 10 tetras)\cr
|
798 |
|
|
}$$
|
799 |
|
|
|
800 |
|
|
@ When a tetrabyte of the \.{mmo} file does not begin with the escape code,
|
801 |
|
|
it is loaded into the current location~$\lambda$, and $\lambda$ is increased
|
802 |
|
|
to the next higher multiple of~4.
|
803 |
|
|
(If $\lambda$ is not a multiple of~4, the tetrabyte actually goes
|
804 |
|
|
into location $\lambda\land(-4)=4\lfloor\lambda/4\rfloor$, according
|
805 |
|
|
to \MMIX's usual conventions.) The current line number is also increased
|
806 |
|
|
by~1, if it is nonzero.
|
807 |
|
|
|
808 |
|
|
When a tetrabyte does begin with the escape code, its next byte
|
809 |
|
|
is the lopcode defining a loader instruction. There are thirteen lopcodes:
|
810 |
|
|
|
811 |
|
|
\bull |lop_quote|: $\rm X=\Hex{00}$, $\rm YZ=1$. Treat the next tetra as
|
812 |
|
|
an ordinary tetrabyte, even if it begins with the escape code.
|
813 |
|
|
|
814 |
|
|
\bull |lop_loc|: $\rm X=\Hex{01}$, $\rm Y=high$ byte, $\rm Z=tetra$ count
|
815 |
|
|
($\rm Z=1$~or~2). Set the current location to the 64-bit address defined
|
816 |
|
|
by the next Z tetras, plus $\rm 2^{56}Y$. Usually $\rm Y=0$ (for the
|
817 |
|
|
instruction segment) or $\rm Y=\Hex{20}$ (for the data segment).
|
818 |
|
|
If $\rm Z=2$, the high tetra appears first.
|
819 |
|
|
|
820 |
|
|
\bull |lop_skip|: $\rm X=\Hex{02}$, $\rm YZ=delta$. Increase the
|
821 |
|
|
current location by~YZ.
|
822 |
|
|
|
823 |
|
|
\bull |lop_fixo|: $\rm X=\Hex{03}$, $\rm Y=high$ byte, $\rm Z=tetra$ count
|
824 |
|
|
($\rm Z=1$~or~2). Load the value of the current location~$\lambda$ into
|
825 |
|
|
octabyte~P, where P~is the 64-bit address defined by the next Z tetras
|
826 |
|
|
plus $\rm2^{56}Y$ as in |lop_loc|. (The octabyte at~P was previously assembled
|
827 |
|
|
as zero because of a future reference.)
|
828 |
|
|
|
829 |
|
|
\bull |lop_fixr|: $\rm X=\Hex{04}$, $\rm YZ=delta$. Load YZ into the YZ~field
|
830 |
|
|
of the tetrabyte in location~P, where P~is
|
831 |
|
|
$\rm\lambda-4YZ$, namely the address that precedes the current location
|
832 |
|
|
by YZ~tetrabytes. (This tetrabyte was previously loaded with an \MMIX\
|
833 |
|
|
instruction that takes a relative address: a branch, probable branch,
|
834 |
|
|
\.{JMP}, \.{PUSHJ}, or~\.{GETA}. Its YZ~field was previously
|
835 |
|
|
assembled as zero because of a future reference.)
|
836 |
|
|
|
837 |
|
|
\bull |lop_fixrx|: $\rm X=\Hex{05}$, $\rm Y=0$, $\rm Z=16$ or 24.
|
838 |
|
|
Proceed as in |lop_fixr|,
|
839 |
|
|
but load $\delta$ into tetrabyte $\rm P=\lambda-4\delta$ instead of loading
|
840 |
|
|
YZ into $\rm P=\lambda-4YZ$. Here $\delta$ is the value of the tetrabyte
|
841 |
|
|
following the |lop_fixrx| instruction; its leading byte will either
|
842 |
|
|
|
843 |
|
|
{\it negative\/} number $(\delta\land\Hex{ffffff})-2^{\rm Z}$ when
|
844 |
|
|
calculating the address~P. (The latter case arises only rarely,
|
845 |
|
|
but it is needed when fixing up a relative ``future'' reference that
|
846 |
|
|
ultimately leads to a ``backward'' instruction. The value of~$\delta$ that
|
847 |
|
|
is xored into location~P in such cases will change \.{BZ} to \.{BZB},
|
848 |
|
|
or \.{JMP} to \.{JMPB}, etc.; we have $\rm Z=24$ when fixing a~\.{JMP},
|
849 |
|
|
$\rm Z=16$ otherwise.)
|
850 |
|
|
|
851 |
|
|
\bull |lop_file|: $\rm X=\Hex{06}$, $\rm Y=file$ number, $\rm Z=tetra$ count.
|
852 |
|
|
Set the current file number to~Y and the current line number to~zero. If this
|
853 |
|
|
file number has occurred previously, Z~should be zero; otherwise Z~should be
|
854 |
|
|
positive, and the next Z tetrabytes are the characters of the file name in
|
855 |
|
|
big-endian order.
|
856 |
|
|
Trailing zeros follow the file name if its length is not a multiple of~4.
|
857 |
|
|
|
858 |
|
|
\bull |lop_line|: $\rm X=\Hex{07}$, $\rm YZ=line$ number. Set the current line
|
859 |
|
|
number to~YZ\null. If the line number is nonzero, the current file and current
|
860 |
|
|
line should correspond to the source location that generated the next data to
|
861 |
|
|
be loaded, for use in diagnostic messages. (The \MMIXAL\ processor gives
|
862 |
|
|
precise line numbers to the sources of tetrabytes in segment~0, which tend to
|
863 |
|
|
be instructions, but not to the sources of tetrabytes assembled in other
|
864 |
|
|
segments.)
|
865 |
|
|
|
866 |
|
|
\bull |lop_spec|: $\rm X=\Hex{08}$, $\rm YZ=type$. Begin special data of
|
867 |
|
|
type~YZ\null. The subsequent tetrabytes, continuing until the next loader
|
868 |
|
|
operation other than |lop_quote|, comprise the special data. A |lop_quote|
|
869 |
|
|
instruction allows tetrabytes of special data to begin with the escape code.
|
870 |
|
|
|
871 |
|
|
\bull |lop_pre|: $\rm X=\Hex{09}$, $\rm Y=1$, $\rm Z=tetra$ count. A~|lop_pre|
|
872 |
|
|
instruction, which defines the ``preamble,'' must be the first tetrabyte of
|
873 |
|
|
every \.{mmo} file. The Y~field specifies the version number of \.{mmo}
|
874 |
|
|
format, currently~1; other version numbers may be defined later, but
|
875 |
|
|
version~1 should always be supported as described in the present document.
|
876 |
|
|
The Z~tetrabytes following a |lop_pre| command provide additional information
|
877 |
|
|
that might be of interest to system routines. If $\rm Z>0$, the first tetra
|
878 |
|
|
of additional information records the time that this \.{mmo} file was
|
879 |
|
|
created, measured in seconds since 00:00:00 Greenwich Mean Time on
|
880 |
|
|
1~Jan~1970.
|
881 |
|
|
|
882 |
|
|
\bull |lop_post|: $\rm X=\Hex{0a}$, $\rm Y=0$, $\rm Z=G$ (must be 32~or~more).
|
883 |
|
|
This instruction begins the {\it postamble}, which follows all instructions
|
884 |
|
|
and data to be loaded. It causes the loaded program to begin with rG equal to
|
885 |
|
|
the stated value of~G, and with \$G, $\rm G+1$, \dots,~\$255 initially set to
|
886 |
|
|
the values of the next $\rm(256-G)*2$ tetrabytes. These tetrabytes specify
|
887 |
|
|
$\rm 256-G$ octabytes in big-endian fashion (high half first).
|
888 |
|
|
|
889 |
|
|
\bull |lop_stab|: $\rm X=\Hex{0b}$, $\rm YZ=0$. This instruction must appear
|
890 |
|
|
immediately after the $\rm(256-G)*2$ tetrabytes following~|lop_post|. It is
|
891 |
|
|
followed by the symbol table, which lists the equivalents of all user-defined
|
892 |
|
|
symbols in a compact form that will be described later.
|
893 |
|
|
|
894 |
|
|
\bull |lop_end|: $\rm X=\Hex{0c}$, $\rm YZ=tetra$ count. This instruction
|
895 |
|
|
must be the very last tetrabyte of each \.{mmo} file. Furthermore,
|
896 |
|
|
exactly YZ tetrabytes must appear between it and the |lop_stab| command.
|
897 |
|
|
(Therefore a program can easily find the symbol table without reading
|
898 |
|
|
forward through the entire \.{mmo} file.)
|
899 |
|
|
|
900 |
|
|
\smallskip
|
901 |
|
|
A separate routine called \.{MMOtype} is available to translate
|
902 |
|
|
binary \.{mmo} files into human-readable form.
|
903 |
|
|
|
904 |
|
|
@d lop_quote 0x0 /* the quotation lopcode */
|
905 |
|
|
@d lop_loc 0x1 /* the location lopcode */
|
906 |
|
|
@d lop_skip 0x2 /* the skip lopcode */
|
907 |
|
|
@d lop_fixo 0x3 /* the octabyte-fix lopcode */
|
908 |
|
|
@d lop_fixr 0x4 /* the relative-fix lopcode */
|
909 |
|
|
@d lop_fixrx 0x5 /* extended relative-fix lopcode */
|
910 |
|
|
@d lop_file 0x6 /* the file name lopcode */
|
911 |
|
|
@d lop_line 0x7 /* the file position lopcode */
|
912 |
|
|
@d lop_spec 0x8 /* the special hook lopcode */
|
913 |
|
|
@d lop_pre 0x9 /* the preamble lopcode */
|
914 |
|
|
@d lop_post 0xa /* the postamble lopcode */
|
915 |
|
|
@d lop_stab 0xb /* the symbol table lopcode */
|
916 |
|
|
@d lop_end 0xc /* the end-it-all lopcode */
|
917 |
|
|
|
918 |
|
|
@ Many readers will have noticed that \MMIXAL\ has no facilities for
|
919 |
|
|
relocatable output, nor does \.{mmo} format support such features. The
|
920 |
|
|
author's first drafts of \MMIXAL\ and \.{mmo} did allow relocatable objects,
|
921 |
|
|
with external linkages, but the rules were substantially more complicated and
|
922 |
|
|
therefore inconsistent with the goals of {\sl The Art of Computer Programming}.
|
923 |
|
|
The present design might actually prove to be superior to the current
|
924 |
|
|
practice, now that computer memory is significantly cheaper than it
|
925 |
|
|
used to be, because one-pass assembly and loading are extremely fast when
|
926 |
|
|
relocatability and external linkages are disallowed. Different program modules
|
927 |
|
|
can be assembled together about as fast as they could be linked together under
|
928 |
|
|
a relocatable scheme, and they can communicate with each other in much more
|
929 |
|
|
flexible ways. Debugging tools are enhanced when open-source libraries are
|
930 |
|
|
combined with user programs, and such libraries will certainly improve in
|
931 |
|
|
quality when their source form is accessible to a larger community of users.
|
932 |
|
|
|
933 |
|
|
@* Basic data types.
|
934 |
|
|
This program for the 64-bit \MMIX\ architecture is based on 32-bit integer
|
935 |
|
|
arithmetic, because nearly every computer available to the author at the time
|
936 |
|
|
of writing was limited in that way.
|
937 |
|
|
Details of the basic arithmetic appear in a separate program module
|
938 |
|
|
called {\mc MMIX-ARITH}, because the same routines are needed also
|
939 |
|
|
for the simulators. The definition of type \&{tetra} should be changed, if
|
940 |
|
|
necessary, to conform with the definitions found in {\mc MMIX-ARITH}.
|
941 |
|
|
@^system dependencies@>
|
942 |
|
|
|
943 |
|
|
@=
|
944 |
|
|
typedef unsigned int tetra;
|
945 |
|
|
/* assumes that an int is exactly 32 bits wide */
|
946 |
|
|
typedef struct { tetra h,l;} octa; /* two tetrabytes make one octabyte */
|
947 |
|
|
typedef enum {@!false,@!true}@+@!bool;
|
948 |
|
|
|
949 |
|
|
@ @=
|
950 |
|
|
extern octa zero_octa; /* |zero_octa.h=zero_octa.l=0| */
|
951 |
|
|
extern octa neg_one; /* |neg_one.h=neg_one.l=-1| */
|
952 |
|
|
extern octa aux; /* auxiliary output of a subroutine */
|
953 |
|
|
extern bool overflow; /* set by certain subroutines for signed arithmetic */
|
954 |
|
|
|
955 |
|
|
@ Most of the subroutines in {\mc MMIX-ARITH} return an octabyte as
|
956 |
|
|
a function of two octabytes; for example, |oplus(y,z)| returns the
|
957 |
|
|
sum of octabytes |y| and~|z|. Division inputs the high
|
958 |
|
|
half of a dividend in the global variable~|aux| and returns
|
959 |
|
|
the remainder in~|aux|.
|
960 |
|
|
|
961 |
|
|
@=
|
962 |
|
|
extern octa oplus @,@,@[ARGS((octa y,octa z))@];
|
963 |
|
|
/* unsigned $y+z$ */
|
964 |
|
|
extern octa ominus @,@,@[ARGS((octa y,octa z))@];
|
965 |
|
|
/* unsigned $y-z$ */
|
966 |
|
|
extern octa incr @,@,@[ARGS((octa y,int delta))@];
|
967 |
|
|
/* unsigned $y+\delta$ ($\delta$ is signed) */
|
968 |
|
|
extern octa oand @,@,@[ARGS((octa y,octa z))@];
|
969 |
|
|
/* $y\land z$ */
|
970 |
|
|
extern octa shift_left @,@,@[ARGS((octa y,int s))@];
|
971 |
|
|
/* $y\LL s$, $0\le s\le64$ */
|
972 |
|
|
extern octa shift_right @,@,@[ARGS((octa y,int s,int uns))@];
|
973 |
|
|
/* $y\GG s$, signed if |!uns| */
|
974 |
|
|
extern octa omult @,@,@[ARGS((octa y,octa z))@];
|
975 |
|
|
/* unsigned $(|aux|,x)=y\times z$ */
|
976 |
|
|
extern octa odiv @,@,@[ARGS((octa x,octa y,octa z))@];
|
977 |
|
|
/* unsigned $(x,y)/z$; $|aux|=(x,y)\bmod z$ */
|
978 |
|
|
|
979 |
|
|
@ Here's a rudimentary check to see if arithmetic is in trouble.
|
980 |
|
|
|
981 |
|
|
@=
|
982 |
|
|
acc=shift_left(neg_one,1);
|
983 |
|
|
if (acc.h!=0xffffffff) panic("Type tetra is not implemented correctly");
|
984 |
|
|
@.Type tetra...@>
|
985 |
|
|
|
986 |
|
|
@ Future versions of this program will work with symbols formed from Unicode
|
987 |
|
|
characters, but the present code limits itself to an 8-bit subset.
|
988 |
|
|
@^Unicode@>
|
989 |
|
|
The type \&{Char} is defined here in order to ease the later transition:
|
990 |
|
|
At present, \&{Char} is the same as \&{unsigned} \&{char}, but
|
991 |
|
|
\&{Char} can be changed to a 16-bit type in the Unicode version.
|
992 |
|
|
|
993 |
|
|
Other changes will also be necessary when the transition to Unicode is made;
|
994 |
|
|
for example, some calls of |fprintf| will become calls of |fwprintf|,
|
995 |
|
|
and some occurrences of \.{\%s} will become \.{\%ls} in print formats.
|
996 |
|
|
The switchable type name \&{Char} provides at least a first step
|
997 |
|
|
towards a brighter future with Unicode.
|
998 |
|
|
|
999 |
|
|
@=
|
1000 |
|
|
typedef unsigned char Char; /* bytes that will become wydes some day */
|
1001 |
|
|
|
1002 |
|
|
@ While we're talking about classic systems versus future systems, we
|
1003 |
|
|
might as well define the |ARGS| macro, which makes function prototypes
|
1004 |
|
|
available on {\mc ANSI \CEE/} systems without making them
|
1005 |
|
|
uncompilable on older systems. Each subroutine below is declared first
|
1006 |
|
|
with a prototype, then with an old-style definition.
|
1007 |
|
|
|
1008 |
|
|
@=
|
1009 |
|
|
#ifdef __STDC__
|
1010 |
|
|
#define ARGS(list) list
|
1011 |
|
|
#else
|
1012 |
|
|
#define ARGS(list) ()
|
1013 |
|
|
#endif
|
1014 |
|
|
|
1015 |
|
|
@* Basic input and output. Input goes into a buffer that is normally
|
1016 |
|
|
limited to 72 characters. This limit can be raised, by using the
|
1017 |
|
|
\.{-b} option when invoking the assembler; but short buffers will keep listings
|
1018 |
|
|
from becoming unwieldy, because a symbolic listing adds 19 characters per~line.
|
1019 |
|
|
|
1020 |
|
|
@=
|
1021 |
|
|
if (buf_size<72) buf_size=72;
|
1022 |
|
|
buffer=(Char*)calloc(buf_size+1,sizeof(Char));
|
1023 |
|
|
lab_field=(Char*)calloc(buf_size+1,sizeof(Char));
|
1024 |
|
|
op_field=(Char*)calloc(buf_size,sizeof(Char));
|
1025 |
|
|
operand_list=(Char*)calloc(buf_size,sizeof(Char));
|
1026 |
|
|
err_buf=(Char*)calloc(buf_size+60,sizeof(Char));
|
1027 |
|
|
if (!buffer || !lab_field || !op_field || !operand_list || !err_buf)
|
1028 |
|
|
panic("No room for the buffers");
|
1029 |
|
|
@.No room...@>
|
1030 |
|
|
|
1031 |
|
|
@ @=
|
1032 |
|
|
Char *buffer; /* raw input of the current line */
|
1033 |
|
|
Char *buf_ptr; /* current position within |buffer| */
|
1034 |
|
|
Char *lab_field; /* copy of the label field of the current instruction */
|
1035 |
|
|
Char *op_field; /* copy of the opcode field of the current instruction */
|
1036 |
|
|
Char *operand_list; /* copy of the operand field of the current instruction */
|
1037 |
|
|
Char *err_buf; /* place where dynamic error messages are sprinted */
|
1038 |
|
|
|
1039 |
|
|
@ @=
|
1040 |
|
|
if (!fgets(buffer,buf_size+1,src_file)) break;
|
1041 |
|
|
line_no++;
|
1042 |
|
|
line_listed=false;
|
1043 |
|
|
j=strlen(buffer);
|
1044 |
|
|
if (buffer[j-1]=='\n') buffer[j-1]='\0'; /* remove the newline */
|
1045 |
|
|
else if ((j=fgetc(src_file))!=EOF)
|
1046 |
|
|
@;
|
1047 |
|
|
if (buffer[0]=='#') @;
|
1048 |
|
|
buf_ptr=buffer;
|
1049 |
|
|
|
1050 |
|
|
@ @=
|
1051 |
|
|
{
|
1052 |
|
|
while(j!='\n' && j!= EOF) j=fgetc(src_file);
|
1053 |
|
|
if (!long_warning_given) {
|
1054 |
|
|
long_warning_given=true;
|
1055 |
|
|
err("*trailing characters of long input line have been dropped");
|
1056 |
|
|
@.trailing characters...@>
|
1057 |
|
|
fprintf(stderr,
|
1058 |
|
|
"(say `-b ' to increase the length of my input buffer)\n");
|
1059 |
|
|
}@+else err("*trailing characters dropped");
|
1060 |
|
|
}
|
1061 |
|
|
|
1062 |
|
|
@ @=
|
1063 |
|
|
int cur_file; /* index of the current file in |filename| */
|
1064 |
|
|
int line_no; /* current position in the file */
|
1065 |
|
|
bool line_listed; /* have we listed the buffer contents? */
|
1066 |
|
|
bool long_warning_given; /* have we given the hint about \.{-b}? */
|
1067 |
|
|
|
1068 |
|
|
@ We keep track of source file name and line number at all times, for
|
1069 |
|
|
error reporting and for synchronization data in the object file.
|
1070 |
|
|
Up to 256 different source file names can be remembered.
|
1071 |
|
|
|
1072 |
|
|
@=
|
1073 |
|
|
Char *filename[257];
|
1074 |
|
|
/* source file names, including those in line directives */
|
1075 |
|
|
int filename_count; /* how many |filename| entries have we filled? */
|
1076 |
|
|
|
1077 |
|
|
@ If the current line is a line directive, it will also be treated
|
1078 |
|
|
as a comment by the assembler.
|
1079 |
|
|
|
1080 |
|
|
@=
|
1081 |
|
|
{
|
1082 |
|
|
for (p=buffer+1;isspace(*p);p++);
|
1083 |
|
|
for (j=*p++-'0';isdigit(*p);p++) j=10*j+*p-'0';
|
1084 |
|
|
for (;isspace(*p);p++);
|
1085 |
|
|
if (*p=='\"') {
|
1086 |
|
|
if (!filename[filename_count]) {
|
1087 |
|
|
filename[filename_count]=(Char*)calloc(FILENAME_MAX+1,sizeof(Char));
|
1088 |
|
|
if (!filename[filename_count])
|
1089 |
|
|
panic("Capacity exceeded: Out of filename memory");
|
1090 |
|
|
@.Capacity exceeded...@>
|
1091 |
|
|
}
|
1092 |
|
|
for (p++,q=filename[filename_count];*p && *p!='\"';p++,q++) *q=*p;
|
1093 |
|
|
if (*p=='\"' && *(p-1)!='\"') { /* yes, it's a line directive */
|
1094 |
|
|
*q='\0';
|
1095 |
|
|
for (k=0;strcmp(filename[k],filename[filename_count])!=0;k++);
|
1096 |
|
|
if (k==filename_count) filename_count++;
|
1097 |
|
|
cur_file=k;
|
1098 |
|
|
line_no=j-1;
|
1099 |
|
|
}
|
1100 |
|
|
}
|
1101 |
|
|
}
|
1102 |
|
|
|
1103 |
|
|
@ Archaic versions of the \CEE/ library do not define |FILENAME_MAX|.
|
1104 |
|
|
|
1105 |
|
|
@=
|
1106 |
|
|
#ifndef FILENAME_MAX
|
1107 |
|
|
#define FILENAME_MAX 256
|
1108 |
|
|
#endif
|
1109 |
|
|
|
1110 |
|
|
@ @=
|
1111 |
|
|
register Char *p,*q; /* the place where we're currently scanning */
|
1112 |
|
|
|
1113 |
|
|
@ The next several subroutines are useful for preparing a listing of
|
1114 |
|
|
the assembled results. In such a listing, which the user can request
|
1115 |
|
|
with a command-line option, we fill the leftmost 19 columns with
|
1116 |
|
|
a representation of the output that has been assembled from the
|
1117 |
|
|
input in the buffer. Sometimes the assembled output requires
|
1118 |
|
|
more than one line, because we have room to output only a tetrabyte per line.
|
1119 |
|
|
|
1120 |
|
|
The |flush_listing_line| subroutine is called when we have finished
|
1121 |
|
|
generating one line's worth of assembled material. Its parameter is
|
1122 |
|
|
a string to be printed between the assembled material and the
|
1123 |
|
|
buffer contents, if the input line hasn't yet been echoed. The length
|
1124 |
|
|
of this string should be 19 minus the number of characters already printed
|
1125 |
|
|
on the current line of the listing.
|
1126 |
|
|
|
1127 |
|
|
@=
|
1128 |
|
|
void flush_listing_line @,@,@[ARGS((char*))@];@+@t}\6{@>
|
1129 |
|
|
void flush_listing_line(s)
|
1130 |
|
|
char *s;
|
1131 |
|
|
{
|
1132 |
|
|
if (line_listed) fprintf(listing_file,"\n");
|
1133 |
|
|
else {
|
1134 |
|
|
fprintf(listing_file,"%s%s\n",s,buffer);
|
1135 |
|
|
line_listed=true;
|
1136 |
|
|
}
|
1137 |
|
|
}
|
1138 |
|
|
|
1139 |
|
|
@ Only the three least significant hex digits of a location are shown on
|
1140 |
|
|
the listing, unless the other digits have changed. The following subroutine
|
1141 |
|
|
prints an extra line when a change needs to be shown.
|
1142 |
|
|
|
1143 |
|
|
@=
|
1144 |
|
|
void update_listing_loc @,@,@[ARGS((int))@];@+@t}\6{@>
|
1145 |
|
|
void update_listing_loc(k)
|
1146 |
|
|
int k; /* the location to display, mod 4 */
|
1147 |
|
|
{
|
1148 |
|
|
if (cur_loc.h!=listing_loc.h || ((cur_loc.l^listing_loc.l)&0xfffff000)) {
|
1149 |
|
|
fprintf(listing_file,"%08x%08x:",cur_loc.h,(cur_loc.l&-4)|k);
|
1150 |
|
|
flush_listing_line(" ");
|
1151 |
|
|
}
|
1152 |
|
|
listing_loc.h=cur_loc.h;@+
|
1153 |
|
|
listing_loc.l=(cur_loc.l&-4)|k;
|
1154 |
|
|
}
|
1155 |
|
|
|
1156 |
|
|
@ @=
|
1157 |
|
|
octa cur_loc; /* current location of assembled output */
|
1158 |
|
|
octa listing_loc; /* current location on the listing */
|
1159 |
|
|
unsigned char hold_buf[4]; /* assembled bytes */
|
1160 |
|
|
unsigned char held_bits; /* which bytes of |hold_buf| are active? */
|
1161 |
|
|
unsigned char listing_bits; /* which of them haven't been listed yet? */
|
1162 |
|
|
bool spec_mode; /* are we between |BSPEC| and |ESPEC|? */
|
1163 |
|
|
tetra spec_mode_loc; /* number of bytes in the current special output */
|
1164 |
|
|
|
1165 |
|
|
@ When bytes are assembled, they are placed into the |hold_buf|.
|
1166 |
|
|
More precisely, a byte assembled for a location that is |j|~plus a
|
1167 |
|
|
multiple of~4 is placed into |hold_buf[j]|; two auxiliary variables,
|
1168 |
|
|
|held_bits| and |listing_bits|, are then increased by |1<
|
1169 |
|
|
Furthermore, |listing_bits|
|
1170 |
|
|
is increased by |0x10<
|
1171 |
|
|
resolved later.
|
1172 |
|
|
|
1173 |
|
|
The bytes are held until we need to output them.
|
1174 |
|
|
The |listing_clear| routine lists any that have been held
|
1175 |
|
|
but not yet shown. It should be called only when |listing_bits!=0|.
|
1176 |
|
|
|
1177 |
|
|
@=
|
1178 |
|
|
void listing_clear @,@,@[ARGS((void))@];@+@t}\6{@>
|
1179 |
|
|
void listing_clear()
|
1180 |
|
|
{
|
1181 |
|
|
register int j,k;
|
1182 |
|
|
for (k=0;k<4;k++) if (listing_bits&(1<
|
1183 |
|
|
if (spec_mode) fprintf(listing_file," ");
|
1184 |
|
|
else {
|
1185 |
|
|
update_listing_loc(k);
|
1186 |
|
|
fprintf(listing_file," ...%03x: ",(listing_loc.l&0xffc)|k);
|
1187 |
|
|
}
|
1188 |
|
|
for (j=0;j<4;j++)
|
1189 |
|
|
if (listing_bits&(0x10<
|
1190 |
|
|
else if (listing_bits&(1<
|
1191 |
|
|
else fprintf(listing_file," ");
|
1192 |
|
|
flush_listing_line(" ");
|
1193 |
|
|
listing_bits=0;
|
1194 |
|
|
}
|
1195 |
|
|
|
1196 |
|
|
@ Error messages are written to |stderr|. If the message begins with
|
1197 |
|
|
`\.*' it is merely a warning; if it begins with `\.!' it is fatal;
|
1198 |
|
|
otherwise the error is probably serious enough to make manual correction
|
1199 |
|
|
necessary, yet it is not tragic. Errors and warnings appear
|
1200 |
|
|
also on the optional listing file.
|
1201 |
|
|
|
1202 |
|
|
@d err(m) {@+report_error(m);@+if (m[0]!='*') goto bypass;@+}
|
1203 |
|
|
@d derr(m,p) {@+sprintf(err_buf,m,p);
|
1204 |
|
|
report_error(err_buf);@+if (err_buf[0]!='*') goto bypass;@+}
|
1205 |
|
|
@d dderr(m,p,q) {@+sprintf(err_buf,m,p,q);
|
1206 |
|
|
report_error(err_buf);@+if (err_buf[0]!='*') goto bypass;@+}
|
1207 |
|
|
@d panic(m) {@+sprintf(err_buf,"!%s",m);@+report_error(err_buf);@+}
|
1208 |
|
|
@d dpanic(m,p) {@+err_buf[0]='!';@+sprintf(err_buf+1,m,p);@+
|
1209 |
|
|
report_error(err_buf);@+}
|
1210 |
|
|
|
1211 |
|
|
@=
|
1212 |
|
|
void report_error @,@,@[ARGS((char*))@];@+@t}\6{@>
|
1213 |
|
|
void report_error(message)
|
1214 |
|
|
char *message;
|
1215 |
|
|
{
|
1216 |
|
|
if (!filename[cur_file]) filename[cur_file]="(nofile)";
|
1217 |
|
|
if (message[0]=='*')
|
1218 |
|
|
fprintf(stderr,"\"%s\", line %d warning: %s\n",
|
1219 |
|
|
filename[cur_file],line_no,message+1);
|
1220 |
|
|
else if (message[0]=='!')
|
1221 |
|
|
fprintf(stderr,"\"%s\", line %d fatal error: %s\n",
|
1222 |
|
|
filename[cur_file],line_no,message+1);
|
1223 |
|
|
else {
|
1224 |
|
|
fprintf(stderr,"\"%s\", line %d: %s!\n",
|
1225 |
|
|
filename[cur_file],line_no,message);
|
1226 |
|
|
err_count++;
|
1227 |
|
|
}
|
1228 |
|
|
if (listing_file) {
|
1229 |
|
|
if (!line_listed) flush_listing_line("****************** ");
|
1230 |
|
|
if (message[0]=='*') fprintf(listing_file,
|
1231 |
|
|
"************ warning: %s\n",message+1);
|
1232 |
|
|
else if (message[0]=='!') fprintf(listing_file,
|
1233 |
|
|
"******** fatal error: %s!\n",message+1);
|
1234 |
|
|
else fprintf(listing_file,
|
1235 |
|
|
"********** error: %s!\n",message);
|
1236 |
|
|
}
|
1237 |
|
|
if (message[0]=='!') exit(-2);
|
1238 |
|
|
}
|
1239 |
|
|
|
1240 |
|
|
@ @=
|
1241 |
|
|
int err_count; /* this many errors were found */
|
1242 |
|
|
|
1243 |
|
|
@ Output to the binary |obj_file| occurs four bytes at a time. The
|
1244 |
|
|
bytes are assembled in small buffers, not output as single tetrabytes,
|
1245 |
|
|
because we want the output to be big-endian even when the assembler
|
1246 |
|
|
is running on a little-endian machine.
|
1247 |
|
|
@^big-endian versus little-endian@>
|
1248 |
|
|
@^little-endian versus big-endian@>
|
1249 |
|
|
|
1250 |
|
|
@d mmo_write(buf) if (fwrite(buf,1,4,obj_file)!=4)
|
1251 |
|
|
dpanic("Can't write on %s",obj_file_name)
|
1252 |
|
|
@.Can't write...@>
|
1253 |
|
|
|
1254 |
|
|
@=
|
1255 |
|
|
void mmo_clear @,@,@[ARGS((void))@];
|
1256 |
|
|
void mmo_out @,@,@[ARGS((void))@];
|
1257 |
|
|
unsigned char lop_quote_command[4]={mm,lop_quote,0,1};
|
1258 |
|
|
void mmo_clear() /* clears |hold_buf|, when |held_bits!=0| */
|
1259 |
|
|
{
|
1260 |
|
|
if (hold_buf[0]==mm) mmo_write(lop_quote_command);
|
1261 |
|
|
mmo_write(hold_buf);
|
1262 |
|
|
if (listing_file && listing_bits) listing_clear();
|
1263 |
|
|
held_bits=0;
|
1264 |
|
|
hold_buf[0]=hold_buf[1]=hold_buf[2]=hold_buf[3]=0;
|
1265 |
|
|
mmo_cur_loc=incr(mmo_cur_loc,4);@+ mmo_cur_loc.l&=-4;
|
1266 |
|
|
if (mmo_line_no) mmo_line_no++;
|
1267 |
|
|
}
|
1268 |
|
|
@#
|
1269 |
|
|
unsigned char mmo_buf[4];
|
1270 |
|
|
int mmo_ptr;
|
1271 |
|
|
void mmo_out() /* output the contents of |mmo_buf| */
|
1272 |
|
|
{
|
1273 |
|
|
if (held_bits) mmo_clear();
|
1274 |
|
|
mmo_write(mmo_buf);
|
1275 |
|
|
}
|
1276 |
|
|
|
1277 |
|
|
@ @=
|
1278 |
|
|
void mmo_tetra @,@,@[ARGS((tetra))@];
|
1279 |
|
|
void mmo_byte @,@,@[ARGS((unsigned char))@];
|
1280 |
|
|
void mmo_lop @,@,@[ARGS((char,unsigned char,unsigned char))@];
|
1281 |
|
|
void mmo_lopp @,@,@[ARGS((char,unsigned short))@];
|
1282 |
|
|
void mmo_tetra(t) /* output a tetrabyte */
|
1283 |
|
|
tetra t;
|
1284 |
|
|
{
|
1285 |
|
|
mmo_buf[0]=t>>24;@+ mmo_buf[1]=(t>>16)&0xff;
|
1286 |
|
|
mmo_buf[2]=(t>>8)&0xff;@+ mmo_buf[3]=t&0xff;
|
1287 |
|
|
mmo_out();
|
1288 |
|
|
}
|
1289 |
|
|
@#
|
1290 |
|
|
void mmo_byte(b)
|
1291 |
|
|
unsigned char b;
|
1292 |
|
|
{
|
1293 |
|
|
mmo_buf[(mmo_ptr++)&3]=b;
|
1294 |
|
|
if (!(mmo_ptr&3)) mmo_out();
|
1295 |
|
|
}
|
1296 |
|
|
@#
|
1297 |
|
|
void mmo_lop(x,y,z) /* output a loader operation */
|
1298 |
|
|
char x;
|
1299 |
|
|
unsigned char y,z;
|
1300 |
|
|
{
|
1301 |
|
|
mmo_buf[0]=mm;@+ mmo_buf[1]=x;@+ mmo_buf[2]=y;@+ mmo_buf[3]=z;
|
1302 |
|
|
mmo_out();
|
1303 |
|
|
}
|
1304 |
|
|
@#
|
1305 |
|
|
void mmo_lopp(x,yz) /* output a loader operation with two-byte operand */
|
1306 |
|
|
char x;
|
1307 |
|
|
unsigned short yz;
|
1308 |
|
|
{
|
1309 |
|
|
mmo_buf[0]=mm;@+ mmo_buf[1]=x;@+
|
1310 |
|
|
mmo_buf[2]=yz>>8;@+ mmo_buf[3]=yz&0xff;
|
1311 |
|
|
mmo_out();
|
1312 |
|
|
}
|
1313 |
|
|
|
1314 |
|
|
@ The |mmo_loc| subroutine makes the current location in the object file
|
1315 |
|
|
equal to |cur_loc|.
|
1316 |
|
|
|
1317 |
|
|
@=
|
1318 |
|
|
void mmo_loc @,@,@[ARGS((void))@];@+@t}\6{@>
|
1319 |
|
|
void mmo_loc()
|
1320 |
|
|
{
|
1321 |
|
|
octa o;
|
1322 |
|
|
if (held_bits) mmo_clear();
|
1323 |
|
|
o=ominus(cur_loc,mmo_cur_loc);
|
1324 |
|
|
if (o.h==0 && o.l<0x10000) {
|
1325 |
|
|
if (o.l) mmo_lopp(lop_skip,o.l);
|
1326 |
|
|
}@+else {
|
1327 |
|
|
if (cur_loc.h&0xffffff) {
|
1328 |
|
|
mmo_lop(lop_loc,0,2);
|
1329 |
|
|
mmo_tetra(cur_loc.h);
|
1330 |
|
|
}@+else mmo_lop(lop_loc,cur_loc.h>>24,1);
|
1331 |
|
|
mmo_tetra(cur_loc.l);
|
1332 |
|
|
}
|
1333 |
|
|
mmo_cur_loc=cur_loc;
|
1334 |
|
|
}
|
1335 |
|
|
|
1336 |
|
|
@ Similarly, the |mmo_sync| subroutine makes sure that the current file and
|
1337 |
|
|
line number in the output file agree with |cur_file| and |line_no|.
|
1338 |
|
|
|
1339 |
|
|
@=
|
1340 |
|
|
void mmo_sync @,@,@[ARGS((void))@];@+@t}\6{@>
|
1341 |
|
|
void mmo_sync()
|
1342 |
|
|
{
|
1343 |
|
|
register int j; register unsigned char *p;
|
1344 |
|
|
if (cur_file!=mmo_cur_file) {
|
1345 |
|
|
if (filename_passed[cur_file]) mmo_lop(lop_file,cur_file,0);
|
1346 |
|
|
else {
|
1347 |
|
|
mmo_lop(lop_file,cur_file,(strlen(filename[cur_file])+3)>>2);
|
1348 |
|
|
for (j=0,p=filename[cur_file];*p;p++,j=(j+1)&3) {
|
1349 |
|
|
mmo_buf[j]=*p;
|
1350 |
|
|
if (j==3) mmo_out();
|
1351 |
|
|
}
|
1352 |
|
|
if (j) {
|
1353 |
|
|
for (;j<4;j++) mmo_buf[j]=0;
|
1354 |
|
|
mmo_out();
|
1355 |
|
|
}
|
1356 |
|
|
filename_passed[cur_file]=1;
|
1357 |
|
|
}
|
1358 |
|
|
mmo_cur_file=cur_file;
|
1359 |
|
|
mmo_line_no=0;
|
1360 |
|
|
}
|
1361 |
|
|
if (line_no!=mmo_line_no) {
|
1362 |
|
|
if (line_no>=0x10000)
|
1363 |
|
|
panic("I can't deal with line numbers exceeding 65535");
|
1364 |
|
|
@.I can't deal with...@>
|
1365 |
|
|
mmo_lopp(lop_line,line_no);
|
1366 |
|
|
mmo_line_no=line_no;
|
1367 |
|
|
}
|
1368 |
|
|
}
|
1369 |
|
|
|
1370 |
|
|
@ @=
|
1371 |
|
|
octa mmo_cur_loc; /* current location in the object file */
|
1372 |
|
|
int mmo_line_no; /* current line number in the \.{mmo} output so far */
|
1373 |
|
|
int mmo_cur_file; /* index of the current file in the \.{mmo} output so far */
|
1374 |
|
|
char filename_passed[256]; /* has a filename been recorded in the output? */
|
1375 |
|
|
|
1376 |
|
|
@ Here is a basic subroutine that assembles |k| bytes starting at |cur_loc|.
|
1377 |
|
|
The value of |k| should be 1, 2, or~4, and |cur_loc| should be a multiple
|
1378 |
|
|
of~|k|. The |x_bits| parameter tells which bytes, if any, are part of
|
1379 |
|
|
a future reference.
|
1380 |
|
|
|
1381 |
|
|
@=
|
1382 |
|
|
void assemble @,@,@[ARGS((char,tetra,unsigned char))@];@+@t}\6{@>
|
1383 |
|
|
void assemble(k,dat,x_bits)
|
1384 |
|
|
char k;
|
1385 |
|
|
tetra dat;
|
1386 |
|
|
unsigned char x_bits;
|
1387 |
|
|
{
|
1388 |
|
|
register int j,jj,l;
|
1389 |
|
|
if (spec_mode) l=spec_mode_loc;
|
1390 |
|
|
else {
|
1391 |
|
|
l=cur_loc.l;
|
1392 |
|
|
@;
|
1393 |
|
|
if (!held_bits && !(cur_loc.h&0xe0000000)) mmo_sync();
|
1394 |
|
|
}
|
1395 |
|
|
for (j=0;j
|
1396 |
|
|
jj=(l+j)&3;
|
1397 |
|
|
hold_buf[jj]=(dat>>(8*(k-1-j)))&0xff;
|
1398 |
|
|
held_bits|=1<
|
1399 |
|
|
listing_bits|=1<
|
1400 |
|
|
}
|
1401 |
|
|
listing_bits|=x_bits;
|
1402 |
|
|
if (((l+k)&3)==0) {
|
1403 |
|
|
if (listing_file) listing_clear();
|
1404 |
|
|
mmo_clear();
|
1405 |
|
|
}
|
1406 |
|
|
if (spec_mode) spec_mode_loc+=k; else cur_loc=incr(cur_loc,k);
|
1407 |
|
|
}
|
1408 |
|
|
|
1409 |
|
|
@ @=
|
1410 |
|
|
if (cur_loc.h!=mmo_cur_loc.h || ((cur_loc.l^mmo_cur_loc.l)&0xfffffffc))
|
1411 |
|
|
mmo_loc();
|
1412 |
|
|
|
1413 |
|
|
@* The symbol table. Symbols are stored and retrieved by means of
|
1414 |
|
|
a {\it ternary search trie}, following ideas of Bentley and
|
1415 |
|
|
Sedgewick. (See {\sl ACM--SIAM Symp.\ on Discrete Algorithms\/ \bf8} (1997),
|
1416 |
|
|
360--369; R.~Sedgewick, {\sl Algorithms in C\/} (Reading, Mass.:\
|
1417 |
|
|
Addison--Wesley, 1998), \S15.4.) Each trie node stores a character,
|
1418 |
|
|
@^Bentley, Jon Louis@>
|
1419 |
|
|
@^Sedgewick, Robert@>
|
1420 |
|
|
and there are branches to subtries for the cases where a given character
|
1421 |
|
|
is less than, equal to, or greater than the character in the trie.
|
1422 |
|
|
There also is a pointer to a symbol table entry if a symbol ends at
|
1423 |
|
|
the current node.
|
1424 |
|
|
|
1425 |
|
|
@s sym_tab_struct int
|
1426 |
|
|
|
1427 |
|
|
@=
|
1428 |
|
|
typedef struct ternary_trie_struct {
|
1429 |
|
|
unsigned short ch; /* the (possibly wyde) character stored here */
|
1430 |
|
|
struct ternary_trie_struct *left, *mid, *right; /* downward
|
1431 |
|
|
in the ternary trie */
|
1432 |
|
|
struct sym_tab_struct *sym; /* equivalents of symbols */
|
1433 |
|
|
} trie_node;
|
1434 |
|
|
|
1435 |
|
|
@ We allocate trie nodes in chunks of 1000 at a time.
|
1436 |
|
|
|
1437 |
|
|
@=
|
1438 |
|
|
trie_node* new_trie_node @,@,@[ARGS((void))@];@+@t}\6{@>
|
1439 |
|
|
trie_node* new_trie_node()
|
1440 |
|
|
{
|
1441 |
|
|
register trie_node *t=next_trie_node;
|
1442 |
|
|
if (t==last_trie_node) {
|
1443 |
|
|
t=(trie_node*)calloc(1000,sizeof(trie_node));
|
1444 |
|
|
if (!t) panic("Capacity exceeded: Out of trie memory");
|
1445 |
|
|
@.Capacity exceeded...@>
|
1446 |
|
|
last_trie_node=t+1000;
|
1447 |
|
|
}
|
1448 |
|
|
next_trie_node=t+1;
|
1449 |
|
|
return t;
|
1450 |
|
|
}
|
1451 |
|
|
|
1452 |
|
|
@ @=
|
1453 |
|
|
trie_node *trie_root; /* root of the trie */
|
1454 |
|
|
trie_node *op_root; /* root of subtrie for opcodes */
|
1455 |
|
|
trie_node *next_trie_node, *last_trie_node; /* allocation control */
|
1456 |
|
|
trie_node *cur_prefix; /* root of subtrie for unqualified symbols */
|
1457 |
|
|
|
1458 |
|
|
@ The |trie_search| subroutine starts at a given node of the trie and finds
|
1459 |
|
|
a given string in its middle subtrie, inserting new nodes if necessary.
|
1460 |
|
|
The string ends with the first nonletter or nondigit; the location
|
1461 |
|
|
of the terminating character is stored in global variable~|terminator|.
|
1462 |
|
|
|
1463 |
|
|
@d isletter(c) (isalpha(c)||c=='_'||c==':'||c>126)
|
1464 |
|
|
|
1465 |
|
|
@=
|
1466 |
|
|
trie_node *trie_search @,@,@[ARGS((trie_node*,Char*))@];
|
1467 |
|
|
Char *terminator; /* where the search ended */
|
1468 |
|
|
trie_node *trie_search(t,s)
|
1469 |
|
|
trie_node *t;
|
1470 |
|
|
Char *s;
|
1471 |
|
|
{
|
1472 |
|
|
register trie_node *tt=t;
|
1473 |
|
|
register Char *p=s;
|
1474 |
|
|
while (1) {
|
1475 |
|
|
if (!isletter(*p) && !isdigit(*p)) {
|
1476 |
|
|
terminator=p;@+return tt;
|
1477 |
|
|
}
|
1478 |
|
|
if (tt->mid) {
|
1479 |
|
|
tt=tt->mid;
|
1480 |
|
|
while (*p!=tt->ch) {
|
1481 |
|
|
if (*pch) {
|
1482 |
|
|
if (tt->left) tt=tt->left;
|
1483 |
|
|
else {
|
1484 |
|
|
tt->left=new_trie_node();@+tt=tt->left;@+goto store_new_char;
|
1485 |
|
|
}
|
1486 |
|
|
}@+else {
|
1487 |
|
|
if (tt->right) tt=tt->right;
|
1488 |
|
|
else {
|
1489 |
|
|
tt->right=new_trie_node();@+tt=tt->right;@+goto store_new_char;
|
1490 |
|
|
}
|
1491 |
|
|
}
|
1492 |
|
|
}
|
1493 |
|
|
p++;
|
1494 |
|
|
}@+else {
|
1495 |
|
|
tt->mid=new_trie_node();@+tt=tt->mid;
|
1496 |
|
|
store_new_char: tt->ch=*p++;
|
1497 |
|
|
}
|
1498 |
|
|
}
|
1499 |
|
|
}
|
1500 |
|
|
|
1501 |
|
|
@ Symbol table nodes hold the serial numbers and
|
1502 |
|
|
equivalents of defined symbols. They also
|
1503 |
|
|
hold ``fixup information'' for undefined symbols; this will allow the
|
1504 |
|
|
loader to correct any previously assembled instructions that refer to such
|
1505 |
|
|
symbols when they are eventually defined.
|
1506 |
|
|
|
1507 |
|
|
In the symbol table node for a defined symbol, the |link| field
|
1508 |
|
|
has one of the special codes |DEFINED| or |REGISTER| or |PREDEFINED|, and the
|
1509 |
|
|
|equiv| field holds the defined value. The |serial| number
|
1510 |
|
|
is a unique identifier for all user-defined symbols.
|
1511 |
|
|
|
1512 |
|
|
In the symbol table node for an undefined symbol, the |equiv| field
|
1513 |
|
|
is ignored. The |link| field
|
1514 |
|
|
points to the first node of fixup information; that node is, in turn,
|
1515 |
|
|
a symbol table node that might link to other fixups. The |serial| number
|
1516 |
|
|
in a fixup node is either 0 or 1 or 2, meaning respectively ``fixup the
|
1517 |
|
|
octabyte pointed to by |equiv|'' or ``fixup the relative address in the YZ
|
1518 |
|
|
field of the instruction pointed to by |equiv|'' or ``fixup the relative
|
1519 |
|
|
address in the XYZ field of the instruction pointed to by |equiv|.''
|
1520 |
|
|
|
1521 |
|
|
@s sym_node int
|
1522 |
|
|
@s bool int
|
1523 |
|
|
|
1524 |
|
|
@d DEFINED (sym_node*)1 /* code value for octabyte equivalents */
|
1525 |
|
|
@d REGISTER (sym_node*)2 /* code value for register-number equivalents */
|
1526 |
|
|
@d PREDEFINED (sym_node*)3 /* code value for not-yet-used equivalents */
|
1527 |
|
|
@d fix_o 0 /* |serial| code for octabyte fixup */
|
1528 |
|
|
@d fix_yz 1 /* |serial| code for relative fixup */
|
1529 |
|
|
@d fix_xyz 2 /* |serial| code for \.{JMP} fixup */
|
1530 |
|
|
|
1531 |
|
|
@=
|
1532 |
|
|
typedef struct sym_tab_struct {
|
1533 |
|
|
int serial; /* serial number of symbol; type number for fixups */
|
1534 |
|
|
struct sym_tab_struct *link; /* |DEFINED| status or link to fixup */
|
1535 |
|
|
octa equiv; /* the equivalent value */
|
1536 |
|
|
} sym_node;
|
1537 |
|
|
|
1538 |
|
|
@ The allocation of new symbol table nodes proceeds in chunks, like the
|
1539 |
|
|
allocation of trie nodes. But in this case we also have the possibility
|
1540 |
|
|
of reusing old fixup nodes that are no longer needed.
|
1541 |
|
|
|
1542 |
|
|
@d recycle_fixup(pp) pp->link=sym_avail, sym_avail=pp
|
1543 |
|
|
|
1544 |
|
|
@=
|
1545 |
|
|
sym_node* new_sym_node @,@,@[ARGS((bool))@];@+@t}\6{@>
|
1546 |
|
|
sym_node* new_sym_node(serialize)
|
1547 |
|
|
bool serialize; /* should the new node receive a unique serial number? */
|
1548 |
|
|
{
|
1549 |
|
|
register sym_node *p=sym_avail;
|
1550 |
|
|
if (p) {
|
1551 |
|
|
sym_avail=p->link;@+p->link=NULL;@+p->serial=0;@+p->equiv=zero_octa;
|
1552 |
|
|
}@+else {
|
1553 |
|
|
p=next_sym_node;
|
1554 |
|
|
if (p==last_sym_node) {
|
1555 |
|
|
p=(sym_node*)calloc(1000,sizeof(sym_node));
|
1556 |
|
|
if (!p) panic("Capacity exceeded: Out of symbol memory");
|
1557 |
|
|
@.Capacity exceeded...@>
|
1558 |
|
|
last_sym_node=p+1000;
|
1559 |
|
|
}
|
1560 |
|
|
next_sym_node=p+1;
|
1561 |
|
|
}
|
1562 |
|
|
if (serialize) p->serial=++serial_number;
|
1563 |
|
|
return p;
|
1564 |
|
|
}
|
1565 |
|
|
|
1566 |
|
|
@ @=
|
1567 |
|
|
int serial_number;
|
1568 |
|
|
sym_node *sym_root; /* root of the sym */
|
1569 |
|
|
sym_node *next_sym_node, *last_sym_node; /* allocation control */
|
1570 |
|
|
sym_node *sym_avail; /* stack of recycled symbol table nodes */
|
1571 |
|
|
|
1572 |
|
|
@ We initialize the trie by inserting all the predefined symbols.
|
1573 |
|
|
Opcodes are given the prefix \.{\^}, to distinguish them from
|
1574 |
|
|
ordinary symbols; this character nicely divides uppercase letters from
|
1575 |
|
|
lowercase letters.
|
1576 |
|
|
|
1577 |
|
|
@=
|
1578 |
|
|
trie_root=new_trie_node();
|
1579 |
|
|
cur_prefix=trie_root;
|
1580 |
|
|
op_root=new_trie_node();
|
1581 |
|
|
trie_root->mid=op_root;
|
1582 |
|
|
trie_root->ch=':';
|
1583 |
|
|
op_root->ch='^';
|
1584 |
|
|
@;
|
1585 |
|
|
@;
|
1586 |
|
|
@;
|
1587 |
|
|
|
1588 |
|
|
@ Most of the assembly work can be table driven, based on bits that
|
1589 |
|
|
are stored as the ``equivalents'' of opcode symbols like \.{\^ADD}.
|
1590 |
|
|
|
1591 |
|
|
@d rel_addr_bit 0x1 /* is YZ or XYZ relative? */
|
1592 |
|
|
@d immed_bit 0x2 /* should opcode be immediate if Z or YZ not register? */
|
1593 |
|
|
@d zar_bit 0x4 /* should register status of Z be ignored? */
|
1594 |
|
|
@d zr_bit 0x8 /* must Z be a register? */
|
1595 |
|
|
@d yar_bit 0x10 /* should register status of Y be ignored? */
|
1596 |
|
|
@d yr_bit 0x20 /* must Y be a register? */
|
1597 |
|
|
@d xar_bit 0x40 /* should register status of X be ignored? */
|
1598 |
|
|
@d xr_bit 0x80 /* must X be a register? */
|
1599 |
|
|
@d yzar_bit 0x100 /* should register status of YZ be ignored? */
|
1600 |
|
|
@d yzr_bit 0x200 /* must YZ be a register? */
|
1601 |
|
|
@d xyzar_bit 0x400 /* should register status of XYZ be ignored? */
|
1602 |
|
|
@d xyzr_bit 0x800 /* must XYZ be a register? */
|
1603 |
|
|
@d one_arg_bit 0x1000 /* is it OK to have zero or one operand? */
|
1604 |
|
|
@d two_arg_bit 0x2000 /* is it OK to have exactly two operands? */
|
1605 |
|
|
@d three_arg_bit 0x4000 /* is it OK to have exactly three operands? */
|
1606 |
|
|
@d many_arg_bit 0x8000 /* is it OK to have more than three operands? */
|
1607 |
|
|
@d align_bits 0x30000 /* how much alignment: byte, wyde, tetra, or octa? */
|
1608 |
|
|
@d no_label_bit 0x40000 /* should the label be blank? */
|
1609 |
|
|
@d mem_bit 0x80000 /* must YZ be a memory reference? */
|
1610 |
|
|
@d spec_bit 0x100000 /* is this opcode allowed in \.{SPEC} mode? */
|
1611 |
|
|
|
1612 |
|
|
@=
|
1613 |
|
|
typedef struct {
|
1614 |
|
|
Char *name; /* symbolic opcode */
|
1615 |
|
|
short code; /* numeric opcode */
|
1616 |
|
|
int bits; /* treatment of operands */
|
1617 |
|
|
} op_spec;
|
1618 |
|
|
@#
|
1619 |
|
|
typedef enum {
|
1620 |
|
|
@!SET=0x100,@!IS,@!LOC,@!PREFIX,@!BSPEC,@!ESPEC,@!GREG,@!LOCAL,@/
|
1621 |
|
|
@!BYTE,@!WYDE,@!TETRA,@!OCTA}@+@!pseudo_op;
|
1622 |
|
|
|
1623 |
|
|
@ @=
|
1624 |
|
|
op_spec op_init_table[]={@/
|
1625 |
|
|
{"TRAP", 0x00, 0x27554},
|
1626 |
|
|
@.TRAP@>
|
1627 |
|
|
{"FCMP", 0x01, 0x240a8},
|
1628 |
|
|
@.FCMP@>
|
1629 |
|
|
{"FUN", 0x02, 0x240a8},
|
1630 |
|
|
@.FUN@>
|
1631 |
|
|
{"FEQL", 0x03, 0x240a8},@/
|
1632 |
|
|
@.FEQL@>
|
1633 |
|
|
{"FADD", 0x04, 0x240a8},
|
1634 |
|
|
@.FADD@>
|
1635 |
|
|
{"FIX", 0x05, 0x26288},
|
1636 |
|
|
@.FIX@>
|
1637 |
|
|
{"FSUB", 0x06, 0x240a8},
|
1638 |
|
|
@.FSUB@>
|
1639 |
|
|
{"FIXU", 0x07, 0x26288},@/
|
1640 |
|
|
@.FIXU@>
|
1641 |
|
|
{"FLOT", 0x08, 0x26282},
|
1642 |
|
|
@.FLOT@>
|
1643 |
|
|
{"FLOTU", 0x0a, 0x26282},
|
1644 |
|
|
@.FLOTU@>
|
1645 |
|
|
{"SFLOT", 0x0c, 0x26282},
|
1646 |
|
|
@.SFLOT@>
|
1647 |
|
|
{"SFLOTU", 0x0e, 0x26282},@/
|
1648 |
|
|
@.SFLOTU@>
|
1649 |
|
|
{"FMUL", 0x10, 0x240a8},
|
1650 |
|
|
@.FMUL@>
|
1651 |
|
|
{"FCMPE", 0x11, 0x240a8},
|
1652 |
|
|
@.FCMPE@>
|
1653 |
|
|
{"FUNE", 0x12, 0x240a8},
|
1654 |
|
|
@.FUNE@>
|
1655 |
|
|
{"FEQLE", 0x13, 0x240a8},@/
|
1656 |
|
|
@.FEQLE@>
|
1657 |
|
|
{"FDIV", 0x14, 0x240a8},
|
1658 |
|
|
@.FDIV@>
|
1659 |
|
|
{"FSQRT", 0x15, 0x26288},
|
1660 |
|
|
@.FSQRT@>
|
1661 |
|
|
{"FREM", 0x16, 0x240a8},
|
1662 |
|
|
@.FREM@>
|
1663 |
|
|
{"FINT", 0x17, 0x26288},@/
|
1664 |
|
|
@.FINT@>
|
1665 |
|
|
{"MUL", 0x18, 0x240a2},
|
1666 |
|
|
@.MUL@>
|
1667 |
|
|
{"MULU", 0x1a, 0x240a2},
|
1668 |
|
|
@.MULU@>
|
1669 |
|
|
{"DIV", 0x1c, 0x240a2},
|
1670 |
|
|
@.DIV@>
|
1671 |
|
|
{"DIVU", 0x1e, 0x240a2},@/
|
1672 |
|
|
@.DIVU@>
|
1673 |
|
|
{"ADD", 0x20, 0x240a2},
|
1674 |
|
|
@.ADD@>
|
1675 |
|
|
{"ADDU", 0x22, 0x240a2},
|
1676 |
|
|
@.ADDU@>
|
1677 |
|
|
{"SUB", 0x24, 0x240a2},
|
1678 |
|
|
@.SUB@>
|
1679 |
|
|
{"SUBU", 0x26, 0x240a2},@/
|
1680 |
|
|
@.SUBU@>
|
1681 |
|
|
{"2ADDU", 0x28, 0x240a2},
|
1682 |
|
|
@.2ADDU@>
|
1683 |
|
|
{"4ADDU", 0x2a, 0x240a2},
|
1684 |
|
|
@.4ADDU@>
|
1685 |
|
|
{"8ADDU", 0x2c, 0x240a2},
|
1686 |
|
|
@.8ADDU@>
|
1687 |
|
|
{"16ADDU", 0x2e, 0x240a2},@/
|
1688 |
|
|
@.16ADDU@>
|
1689 |
|
|
{"CMP", 0x30, 0x240a2},
|
1690 |
|
|
@.CMP@>
|
1691 |
|
|
{"CMPU", 0x32, 0x240a2},
|
1692 |
|
|
@.CMPU@>
|
1693 |
|
|
{"NEG", 0x34, 0x26082},
|
1694 |
|
|
@.NEG@>
|
1695 |
|
|
{"NEGU", 0x36, 0x26082},@/
|
1696 |
|
|
@.NEGU@>
|
1697 |
|
|
{"SL", 0x38, 0x240a2},
|
1698 |
|
|
@.SL@>
|
1699 |
|
|
{"SLU", 0x3a, 0x240a2},
|
1700 |
|
|
@.SLU@>
|
1701 |
|
|
{"SR", 0x3c, 0x240a2},
|
1702 |
|
|
@.SR@>
|
1703 |
|
|
{"SRU", 0x3e, 0x240a2},@/
|
1704 |
|
|
@.SRU@>
|
1705 |
|
|
{"BN", 0x40, 0x22081},
|
1706 |
|
|
@.BN@>
|
1707 |
|
|
{"BZ", 0x42, 0x22081},
|
1708 |
|
|
@.BZ@>
|
1709 |
|
|
{"BP", 0x44, 0x22081},
|
1710 |
|
|
@.BP@>
|
1711 |
|
|
{"BOD", 0x46, 0x22081},@/
|
1712 |
|
|
@.BOD@>
|
1713 |
|
|
{"BNN", 0x48, 0x22081},
|
1714 |
|
|
@.BNN@>
|
1715 |
|
|
{"BNZ", 0x4a, 0x22081},
|
1716 |
|
|
@.BNZ@>
|
1717 |
|
|
{"BNP", 0x4c, 0x22081},
|
1718 |
|
|
@.BNP@>
|
1719 |
|
|
{"BEV", 0x4e, 0x22081},@/
|
1720 |
|
|
@.BEV@>
|
1721 |
|
|
{"PBN", 0x50, 0x22081},
|
1722 |
|
|
@.PBN@>
|
1723 |
|
|
{"PBZ", 0x52, 0x22081},
|
1724 |
|
|
@.PBZ@>
|
1725 |
|
|
{"PBP", 0x54, 0x22081},
|
1726 |
|
|
@.PBP@>
|
1727 |
|
|
{"PBOD", 0x56, 0x22081},@/
|
1728 |
|
|
@.PBOD@>
|
1729 |
|
|
{"PBNN", 0x58, 0x22081},
|
1730 |
|
|
@.PBNN@>
|
1731 |
|
|
{"PBNZ", 0x5a, 0x22081},
|
1732 |
|
|
@.PBNZ@>
|
1733 |
|
|
{"PBNP", 0x5c, 0x22081},
|
1734 |
|
|
@.PBNP@>
|
1735 |
|
|
{"PBEV", 0x5e, 0x22081},@/
|
1736 |
|
|
@.PBEV@>
|
1737 |
|
|
{"CSN", 0x60, 0x240a2},
|
1738 |
|
|
@.CSN@>
|
1739 |
|
|
{"CSZ", 0x62, 0x240a2},
|
1740 |
|
|
@.CSZ@>
|
1741 |
|
|
{"CSP", 0x64, 0x240a2},
|
1742 |
|
|
@.CSP@>
|
1743 |
|
|
{"CSOD", 0x66, 0x240a2},@/
|
1744 |
|
|
@.CSOD@>
|
1745 |
|
|
{"CSNN", 0x68, 0x240a2},
|
1746 |
|
|
@.CSNN@>
|
1747 |
|
|
{"CSNZ", 0x6a, 0x240a2},
|
1748 |
|
|
@.CSNZ@>
|
1749 |
|
|
{"CSNP", 0x6c, 0x240a2},
|
1750 |
|
|
@.CSNP@>
|
1751 |
|
|
{"CSEV", 0x6e, 0x240a2},@/
|
1752 |
|
|
@.CSEV@>
|
1753 |
|
|
{"ZSN", 0x70, 0x240a2},
|
1754 |
|
|
@.ZSN@>
|
1755 |
|
|
{"ZSZ", 0x72, 0x240a2},
|
1756 |
|
|
@.ZSZ@>
|
1757 |
|
|
{"ZSP", 0x74, 0x240a2},
|
1758 |
|
|
@.ZSP@>
|
1759 |
|
|
{"ZSOD", 0x76, 0x240a2},@/
|
1760 |
|
|
@.ZSOD@>
|
1761 |
|
|
{"ZSNN", 0x78, 0x240a2},
|
1762 |
|
|
@.ZSNN@>
|
1763 |
|
|
{"ZSNZ", 0x7a, 0x240a2},
|
1764 |
|
|
@.ZSNZ@>
|
1765 |
|
|
{"ZSNP", 0x7c, 0x240a2},
|
1766 |
|
|
@.ZSNP@>
|
1767 |
|
|
{"ZSEV", 0x7e, 0x240a2},@/
|
1768 |
|
|
@.ZSEV@>
|
1769 |
|
|
{"LDB", 0x80, 0xa60a2},
|
1770 |
|
|
@.LDB@>
|
1771 |
|
|
{"LDBU", 0x82, 0xa60a2},
|
1772 |
|
|
@.LDBU@>
|
1773 |
|
|
{"LDW", 0x84, 0xa60a2},
|
1774 |
|
|
@.LDW@>
|
1775 |
|
|
{"LDWU", 0x86, 0xa60a2},@/
|
1776 |
|
|
@.LDWU@>
|
1777 |
|
|
{"LDT", 0x88, 0xa60a2},
|
1778 |
|
|
@.LDT@>
|
1779 |
|
|
{"LDTU", 0x8a, 0xa60a2},
|
1780 |
|
|
@.LDTU@>
|
1781 |
|
|
{"LDO", 0x8c, 0xa60a2},
|
1782 |
|
|
@.LDO@>
|
1783 |
|
|
{"LDOU", 0x8e, 0xa60a2},@/
|
1784 |
|
|
@.LDOU@>
|
1785 |
|
|
{"LDSF", 0x90, 0xa60a2},
|
1786 |
|
|
@.LDSF@>
|
1787 |
|
|
{"LDHT", 0x92, 0xa60a2},
|
1788 |
|
|
@.LDHT@>
|
1789 |
|
|
{"CSWAP", 0x94, 0xa60a2},
|
1790 |
|
|
@.CSWAP@>
|
1791 |
|
|
{"LDUNC", 0x96, 0xa60a2},@/
|
1792 |
|
|
@.LDUNC@>
|
1793 |
|
|
{"LDVTS", 0x98, 0xa60a2},
|
1794 |
|
|
@.LDVTS@>
|
1795 |
|
|
{"PRELD", 0x9a, 0xa6022},
|
1796 |
|
|
@.PRELD@>
|
1797 |
|
|
{"PREGO", 0x9c, 0xa6022},
|
1798 |
|
|
@.PREGO@>
|
1799 |
|
|
{"GO", 0x9e, 0xa60a2},@/
|
1800 |
|
|
@.GO@>
|
1801 |
|
|
{"STB", 0xa0, 0xa60a2},
|
1802 |
|
|
@.STB@>
|
1803 |
|
|
{"STBU", 0xa2, 0xa60a2},
|
1804 |
|
|
@.STBU@>
|
1805 |
|
|
{"STW", 0xa4, 0xa60a2},
|
1806 |
|
|
@.STW@>
|
1807 |
|
|
{"STWU", 0xa6, 0xa60a2},@/
|
1808 |
|
|
@.STWU@>
|
1809 |
|
|
{"STT", 0xa8, 0xa60a2},
|
1810 |
|
|
@.STT@>
|
1811 |
|
|
{"STTU", 0xaa, 0xa60a2},
|
1812 |
|
|
@.STTU@>
|
1813 |
|
|
{"STO", 0xac, 0xa60a2},
|
1814 |
|
|
@.STO@>
|
1815 |
|
|
{"STOU", 0xae, 0xa60a2},@/
|
1816 |
|
|
@.STOU@>
|
1817 |
|
|
{"STSF", 0xb0, 0xa60a2},
|
1818 |
|
|
@.STSF@>
|
1819 |
|
|
{"STHT", 0xb2, 0xa60a2},
|
1820 |
|
|
@.STHT@>
|
1821 |
|
|
{"STCO", 0xb4, 0xa6022},
|
1822 |
|
|
@.STCO@>
|
1823 |
|
|
{"STUNC", 0xb6, 0xa60a2},@/
|
1824 |
|
|
@.STUNC@>
|
1825 |
|
|
{"SYNCD", 0xb8, 0xa6022},
|
1826 |
|
|
@.SYNCD@>
|
1827 |
|
|
{"PREST", 0xba, 0xa6022},
|
1828 |
|
|
@.PREST@>
|
1829 |
|
|
{"SYNCID", 0xbc, 0xa6022},
|
1830 |
|
|
@.SYNCID@>
|
1831 |
|
|
{"PUSHGO", 0xbe, 0xa6062},@/
|
1832 |
|
|
@.PUSHGO@>
|
1833 |
|
|
{"OR", 0xc0, 0x240a2},
|
1834 |
|
|
@.OR@>
|
1835 |
|
|
{"ORN", 0xc2, 0x240a2},
|
1836 |
|
|
@.ORN@>
|
1837 |
|
|
{"NOR", 0xc4, 0x240a2},
|
1838 |
|
|
@.NOR@>
|
1839 |
|
|
{"XOR", 0xc6, 0x240a2},@/
|
1840 |
|
|
@.XOR@>
|
1841 |
|
|
{"AND", 0xc8, 0x240a2},
|
1842 |
|
|
@.AND@>
|
1843 |
|
|
{"ANDN", 0xca, 0x240a2},
|
1844 |
|
|
@.ANDN@>
|
1845 |
|
|
{"NAND", 0xcc, 0x240a2},
|
1846 |
|
|
@.NAND@>
|
1847 |
|
|
{"NXOR", 0xce, 0x240a2},@/
|
1848 |
|
|
@.NXOR@>
|
1849 |
|
|
{"BDIF", 0xd0, 0x240a2},
|
1850 |
|
|
@.BDIF@>
|
1851 |
|
|
{"WDIF", 0xd2, 0x240a2},
|
1852 |
|
|
@.WDIF@>
|
1853 |
|
|
{"TDIF", 0xd4, 0x240a2},
|
1854 |
|
|
@.TDIF@>
|
1855 |
|
|
{"ODIF", 0xd6, 0x240a2},@/
|
1856 |
|
|
@.ODIF@>
|
1857 |
|
|
{"MUX", 0xd8, 0x240a2},
|
1858 |
|
|
@.MUX@>
|
1859 |
|
|
{"SADD", 0xda, 0x240a2},
|
1860 |
|
|
@.SADD@>
|
1861 |
|
|
{"MOR", 0xdc, 0x240a2},
|
1862 |
|
|
@.MOR@>
|
1863 |
|
|
{"MXOR", 0xde, 0x240a2},@/
|
1864 |
|
|
@.MXOR@>
|
1865 |
|
|
{"SETH", 0xe0, 0x22080},
|
1866 |
|
|
@.SETH@>
|
1867 |
|
|
{"SETMH", 0xe1, 0x22080},
|
1868 |
|
|
@.SETMH@>
|
1869 |
|
|
{"SETML", 0xe2, 0x22080},
|
1870 |
|
|
@.SETML@>
|
1871 |
|
|
{"SETL", 0xe3, 0x22080},@/
|
1872 |
|
|
@.SETL@>
|
1873 |
|
|
{"INCH", 0xe4, 0x22080},
|
1874 |
|
|
@.INCH@>
|
1875 |
|
|
{"INCMH", 0xe5, 0x22080},
|
1876 |
|
|
@.INCMH@>
|
1877 |
|
|
{"INCML", 0xe6, 0x22080},
|
1878 |
|
|
@.INCML@>
|
1879 |
|
|
{"INCL", 0xe7, 0x22080},@/
|
1880 |
|
|
@.INCL@>
|
1881 |
|
|
{"ORH", 0xe8, 0x22080},
|
1882 |
|
|
@.ORH@>
|
1883 |
|
|
{"ORMH", 0xe9, 0x22080},
|
1884 |
|
|
@.ORMH@>
|
1885 |
|
|
{"ORML", 0xea, 0x22080},
|
1886 |
|
|
@.ORML@>
|
1887 |
|
|
{"ORL", 0xeb, 0x22080},@/
|
1888 |
|
|
@.ORL@>
|
1889 |
|
|
{"ANDNH", 0xec, 0x22080},
|
1890 |
|
|
@.ANDNH@>
|
1891 |
|
|
{"ANDNMH", 0xed, 0x22080},
|
1892 |
|
|
@.ANDNMH@>
|
1893 |
|
|
{"ANDNML", 0xee, 0x22080},
|
1894 |
|
|
@.ANDNML@>
|
1895 |
|
|
{"ANDNL", 0xef, 0x22080},@/
|
1896 |
|
|
@.ANDNL@>
|
1897 |
|
|
{"JMP", 0xf0, 0x21001},
|
1898 |
|
|
@.JMP@>
|
1899 |
|
|
{"PUSHJ", 0xf2, 0x22041},
|
1900 |
|
|
@.PUSHJ@>
|
1901 |
|
|
{"GETA", 0xf4, 0x22081},
|
1902 |
|
|
@.GETA@>
|
1903 |
|
|
{"PUT", 0xf6, 0x22002},@/
|
1904 |
|
|
@.PUT@>
|
1905 |
|
|
{"POP", 0xf8, 0x23000},
|
1906 |
|
|
@.POP@>
|
1907 |
|
|
{"RESUME", 0xf9, 0x21000},
|
1908 |
|
|
@.RESUME@>
|
1909 |
|
|
{"SAVE", 0xfa, 0x22080},
|
1910 |
|
|
@.SAVE@>
|
1911 |
|
|
{"UNSAVE", 0xfb, 0x23a00},@/
|
1912 |
|
|
@.UNSAVE@>
|
1913 |
|
|
{"SYNC", 0xfc, 0x21000},
|
1914 |
|
|
@.SYNC@>
|
1915 |
|
|
{"SWYM", 0xfd, 0x27554},
|
1916 |
|
|
@.SWYM@>
|
1917 |
|
|
{"GET", 0xfe, 0x22080},
|
1918 |
|
|
@.GET@>
|
1919 |
|
|
{"TRIP", 0xff, 0x27554},@/
|
1920 |
|
|
@.TRIP@>
|
1921 |
|
|
{"SET",SET, 0x22180},
|
1922 |
|
|
@.SET@>
|
1923 |
|
|
{"LDA", 0x22, 0xa60a2},@/
|
1924 |
|
|
@.LDA@>
|
1925 |
|
|
{"IS", IS, 0x101400},
|
1926 |
|
|
@.IS@>
|
1927 |
|
|
{"LOC", LOC, 0x1400},
|
1928 |
|
|
@.LOC@>
|
1929 |
|
|
{"PREFIX", PREFIX, 0x141000},@/
|
1930 |
|
|
@.PREFIX@>
|
1931 |
|
|
{"BYTE", BYTE, 0x10f000},
|
1932 |
|
|
@.BYTE@>
|
1933 |
|
|
{"WYDE", WYDE, 0x11f000},
|
1934 |
|
|
@.WYDE@>
|
1935 |
|
|
{"TETRA", TETRA, 0x12f000},
|
1936 |
|
|
@.TETRA@>
|
1937 |
|
|
{"OCTA", OCTA, 0x13f000},@/
|
1938 |
|
|
@.OCTA@>
|
1939 |
|
|
{"BSPEC", BSPEC, 0x41400},
|
1940 |
|
|
@.BSPEC@>
|
1941 |
|
|
{"ESPEC", ESPEC, 0x141000},@/
|
1942 |
|
|
@.ESPEC@>
|
1943 |
|
|
{"GREG", GREG, 0x101000},
|
1944 |
|
|
@.GREG@>
|
1945 |
|
|
{"LOCAL", LOCAL, 0x141800}};
|
1946 |
|
|
@.LOCAL@>
|
1947 |
|
|
int op_init_size; /* the number of items in |op_init_table| */
|
1948 |
|
|
|
1949 |
|
|
@ @=
|
1950 |
|
|
op_init_size=(sizeof op_init_table)/sizeof(op_spec);
|
1951 |
|
|
for (j=0;j
|
1952 |
|
|
tt=trie_search(op_root,op_init_table[j].name);
|
1953 |
|
|
pp=tt->sym=new_sym_node(false);
|
1954 |
|
|
pp->link=PREDEFINED;
|
1955 |
|
|
pp->equiv.h=op_init_table[j].code, pp->equiv.l=op_init_table[j].bits;
|
1956 |
|
|
}
|
1957 |
|
|
|
1958 |
|
|
@ @=
|
1959 |
|
|
register trie_node *tt;
|
1960 |
|
|
register sym_node *pp,*qq;
|
1961 |
|
|
|
1962 |
|
|
@ @=
|
1963 |
|
|
for (j=0;j<32;j++) {
|
1964 |
|
|
tt=trie_search(trie_root,special_name[j]);
|
1965 |
|
|
pp=tt->sym=new_sym_node(false);
|
1966 |
|
|
pp->link=PREDEFINED;
|
1967 |
|
|
pp->equiv.l=j;
|
1968 |
|
|
}
|
1969 |
|
|
|
1970 |
|
|
@ @=
|
1971 |
|
|
Char *special_name[32]={"rB","rD","rE","rH","rJ","rM","rR","rBB",
|
1972 |
|
|
"rC","rN","rO","rS","rI","rT","rTT","rK","rQ","rU","rV","rG","rL",
|
1973 |
|
|
"rA","rF","rP","rW","rX","rY","rZ","rWW","rXX","rYY","rZZ"};
|
1974 |
|
|
@^predefined symbols@>
|
1975 |
|
|
|
1976 |
|
|
@ @=
|
1977 |
|
|
typedef struct {
|
1978 |
|
|
Char* name;
|
1979 |
|
|
tetra h,l;
|
1980 |
|
|
}@+predef_spec;
|
1981 |
|
|
|
1982 |
|
|
@ @=
|
1983 |
|
|
predef_spec predefs[]={
|
1984 |
|
|
{"ROUND_CURRENT",0,0},
|
1985 |
|
|
@:ROUND_CURRENT}\.{ROUND\_CURRENT@>
|
1986 |
|
|
{"ROUND_OFF",0,1},
|
1987 |
|
|
@:ROUND_OFF}\.{ROUND\_OFF@>
|
1988 |
|
|
{"ROUND_UP",0,2},
|
1989 |
|
|
@:ROUND_UP}\.{ROUND\_UP@>
|
1990 |
|
|
{"ROUND_DOWN",0,3},
|
1991 |
|
|
@:ROUND_DOWN}\.{ROUND\_DOWN@>
|
1992 |
|
|
{"ROUND_NEAR",0,4},@/
|
1993 |
|
|
@:ROUND_NEAR}\.{ROUND\_NEAR@>
|
1994 |
|
|
{"Inf",0x7ff00000,0},@/
|
1995 |
|
|
@.Inf@>
|
1996 |
|
|
{"Data_Segment",0x20000000,0},
|
1997 |
|
|
@:Data_Segment}\.{Data\_Segment@>
|
1998 |
|
|
{"Pool_Segment",0x40000000,0},
|
1999 |
|
|
@:Pool_Segment}\.{Pool\_Segment@>
|
2000 |
|
|
{"Stack_Segment",0x60000000,0},@/
|
2001 |
|
|
@:Stack_Segment}\.{Stack\_Segment@>
|
2002 |
|
|
{"D_BIT",0,0x80},
|
2003 |
|
|
@:D_BIT}\.{D\_BIT@>
|
2004 |
|
|
{"V_BIT",0,0x40},
|
2005 |
|
|
@:V_BIT}\.{V\_BIT@>
|
2006 |
|
|
{"W_BIT",0,0x20},
|
2007 |
|
|
@:W_BIT}\.{W\_BIT@>
|
2008 |
|
|
{"I_BIT",0,0x10},
|
2009 |
|
|
@:I_BIT}\.{I\_BIT@>
|
2010 |
|
|
{"O_BIT",0,0x08},
|
2011 |
|
|
@:O_BIT}\.{O\_BIT@>
|
2012 |
|
|
{"U_BIT",0,0x04},
|
2013 |
|
|
@:U_BIT}\.{U\_BIT@>
|
2014 |
|
|
{"Z_BIT",0,0x02},
|
2015 |
|
|
@:Z_BIT}\.{Z\_BIT@>
|
2016 |
|
|
{"X_BIT",0,0x01},@/
|
2017 |
|
|
@:X_BIT}\.{X\_BIT@>
|
2018 |
|
|
{"D_Handler",0,0x10},
|
2019 |
|
|
@:D_Handler}\.{D\_Handler@>
|
2020 |
|
|
{"V_Handler",0,0x20},
|
2021 |
|
|
@:V_Handler}\.{V\_Handler@>
|
2022 |
|
|
{"W_Handler",0,0x30},
|
2023 |
|
|
@:W_Handler}\.{W\_Handler@>
|
2024 |
|
|
{"I_Handler",0,0x40},
|
2025 |
|
|
@:I_Handler}\.{I\_Handler@>
|
2026 |
|
|
{"O_Handler",0,0x50},
|
2027 |
|
|
@:O_Handler}\.{O\_Handler@>
|
2028 |
|
|
{"U_Handler",0,0x60},
|
2029 |
|
|
@:U_Handler}\.{U\_Handler@>
|
2030 |
|
|
{"Z_Handler",0,0x70},
|
2031 |
|
|
@:Z_Handler}\.{Z\_Handler@>
|
2032 |
|
|
{"X_Handler",0,0x80},@/
|
2033 |
|
|
@:X_Handler}\.{X\_Handler@>
|
2034 |
|
|
{"StdIn",0,0},
|
2035 |
|
|
@.StdIn@>
|
2036 |
|
|
{"StdOut",0,1},
|
2037 |
|
|
@.StdOut@>
|
2038 |
|
|
{"StdErr",0,2},@/
|
2039 |
|
|
@.StdErr@>
|
2040 |
|
|
{"TextRead",0,0},
|
2041 |
|
|
@.TextRead@>
|
2042 |
|
|
{"TextWrite",0,1},
|
2043 |
|
|
@.TextWrite@>
|
2044 |
|
|
{"BinaryRead",0,2},
|
2045 |
|
|
@.BinaryRead@>
|
2046 |
|
|
{"BinaryWrite",0,3},
|
2047 |
|
|
@.BinaryWrite@>
|
2048 |
|
|
{"BinaryReadWrite",0,4},@/
|
2049 |
|
|
@.BinaryReadWrite@>
|
2050 |
|
|
{"Halt",0,0},
|
2051 |
|
|
@.Halt@>
|
2052 |
|
|
{"Fopen",0,1},
|
2053 |
|
|
@.Fopen@>
|
2054 |
|
|
{"Fclose",0,2},
|
2055 |
|
|
@.Fclose@>
|
2056 |
|
|
{"Fread",0,3},
|
2057 |
|
|
@.Fread@>
|
2058 |
|
|
{"Fgets",0,4},
|
2059 |
|
|
@.Fgets@>
|
2060 |
|
|
{"Fgetws",0,5},
|
2061 |
|
|
@.Fgetws@>
|
2062 |
|
|
{"Fwrite",0,6},
|
2063 |
|
|
@.Fwrite@>
|
2064 |
|
|
{"Fputs",0,7},
|
2065 |
|
|
@.Fputs@>
|
2066 |
|
|
{"Fputws",0,8},
|
2067 |
|
|
@.Fputws@>
|
2068 |
|
|
{"Fseek",0,9},
|
2069 |
|
|
@.Fseek@>
|
2070 |
|
|
{"Ftell",0,10}};
|
2071 |
|
|
@.Ftell@>
|
2072 |
|
|
int predef_size;
|
2073 |
|
|
@^predefined symbols@>
|
2074 |
|
|
|
2075 |
|
|
@ @=
|
2076 |
|
|
predef_size=(sizeof predefs)/sizeof(predef_spec);
|
2077 |
|
|
for (j=0;j
|
2078 |
|
|
tt=trie_search(trie_root,predefs[j].name);
|
2079 |
|
|
pp=tt->sym=new_sym_node(false);
|
2080 |
|
|
pp->link=PREDEFINED;
|
2081 |
|
|
pp->equiv.h=predefs[j].h, pp->equiv.l=predefs[j].l;
|
2082 |
|
|
}
|
2083 |
|
|
|
2084 |
|
|
@ We place \.{Main} into the trie at the beginning of assembly,
|
2085 |
|
|
so that it will show up as an undefined symbol if the user
|
2086 |
|
|
specifies no starting point.
|
2087 |
|
|
@.Main@>
|
2088 |
|
|
|
2089 |
|
|
@=
|
2090 |
|
|
trie_search(trie_root,"Main")->sym=new_sym_node(true);
|
2091 |
|
|
|
2092 |
|
|
@ At the end of assembly we traverse the entire symbol table, visiting each
|
2093 |
|
|
symbol in lexicographic order and transmitting the trie structure to the
|
2094 |
|
|
output file. We detect any undefined future references at this time.
|
2095 |
|
|
|
2096 |
|
|
The order of traversal has a simple recursive pattern: To traverse the subtrie
|
2097 |
|
|
rooted at~|t|, we
|
2098 |
|
|
$$\vbox{\halign{#\hfil\cr
|
2099 |
|
|
traverse |t->left|, if the left subtrie is nonempty;\cr
|
2100 |
|
|
visit |t->sym|, if this symbol table entry is present;\cr
|
2101 |
|
|
traverse |t->mid|, if the middle subtrie is nonempty;\cr
|
2102 |
|
|
traverse |t->right|, if the right subtrie is nonempty.\cr
|
2103 |
|
|
}}$$
|
2104 |
|
|
This pattern leads to a compact representation in the \.{mmo} file, usually
|
2105 |
|
|
requiring fewer than two bytes per trie node plus the bytes needed to encode
|
2106 |
|
|
the equivalents and serial numbers. Each node of the trie is encoded as a
|
2107 |
|
|
``master byte'' followed by the encodings of the left subtrie,
|
2108 |
|
|
character, equivalent, middle subtrie, and right subtrie.
|
2109 |
|
|
The master byte is the sum of
|
2110 |
|
|
$$\vbox{\halign{#\hfil\cr
|
2111 |
|
|
\Hex{80}, if the character occupies two bytes instead of one;\cr
|
2112 |
|
|
\Hex{40}, if the left subtrie is nonempty;\cr
|
2113 |
|
|
\Hex{20}, if the middle subtrie is nonempty;\cr
|
2114 |
|
|
\Hex{10}, if the right subtrie is nonempty;\cr
|
2115 |
|
|
\Hex{01} to \Hex{08}, if the symbol's equivalent is one to eight bytes long;\cr
|
2116 |
|
|
\Hex{09} to \Hex{0e}, if the symbol's equivalent is $2^{61}$ plus one
|
2117 |
|
|
to six bytes;\cr
|
2118 |
|
|
\Hex{0f}, if the symbol's equivalent is \$0 plus one byte;\cr}}$$
|
2119 |
|
|
the character is omitted if the middle subtrie and the equivalent are
|
2120 |
|
|
both empty. The ``equivalent'' of an undefined symbol is zero, but
|
2121 |
|
|
stated as two bytes long.
|
2122 |
|
|
Symbol equivalents are followed by the serial number, represented as a
|
2123 |
|
|
sequence of one or more bytes in radix~128; the final byte of the serial
|
2124 |
|
|
number is tagged by adding~128. (Thus, serial number $2^{14}-1$ is
|
2125 |
|
|
encoded as \Hex{7fff}; serial number $2^{14}$ is \Hex{010080}.)
|
2126 |
|
|
|
2127 |
|
|
@ First we prune the trie by removing all predefined symbols that the
|
2128 |
|
|
user did not redefine.
|
2129 |
|
|
|
2130 |
|
|
@=
|
2131 |
|
|
trie_node* prune @,@,@[ARGS((trie_node*))@];@+@t}\6{@>
|
2132 |
|
|
trie_node* prune(t)
|
2133 |
|
|
trie_node* t;
|
2134 |
|
|
{
|
2135 |
|
|
register int useful=0;
|
2136 |
|
|
if (t->sym) {
|
2137 |
|
|
if (t->sym->serial) useful=1;
|
2138 |
|
|
else t->sym=NULL;
|
2139 |
|
|
}
|
2140 |
|
|
if (t->left) {
|
2141 |
|
|
t->left=prune(t->left);
|
2142 |
|
|
if (t->left) useful=1;
|
2143 |
|
|
}
|
2144 |
|
|
if (t->mid) {
|
2145 |
|
|
t->mid=prune(t->mid);
|
2146 |
|
|
if (t->mid) useful=1;
|
2147 |
|
|
}
|
2148 |
|
|
if (t->right) {
|
2149 |
|
|
t->right=prune(t->right);
|
2150 |
|
|
if (t->right) useful=1;
|
2151 |
|
|
}
|
2152 |
|
|
if (useful) return t;
|
2153 |
|
|
else return NULL;
|
2154 |
|
|
}
|
2155 |
|
|
|
2156 |
|
|
@ Then we output the trie by following the recursive traversal pattern.
|
2157 |
|
|
|
2158 |
|
|
@=
|
2159 |
|
|
void out_stab @,@,@[ARGS((trie_node*))@];@+@t}\6{@>
|
2160 |
|
|
void out_stab(t)
|
2161 |
|
|
trie_node* t;
|
2162 |
|
|
{
|
2163 |
|
|
register int m=0,j;
|
2164 |
|
|
register sym_node *pp;
|
2165 |
|
|
if (t->ch>0xff) m+=0x80;
|
2166 |
|
|
if (t->left) m+=0x40;
|
2167 |
|
|
if (t->mid) m+=0x20;
|
2168 |
|
|
if (t->right) m+=0x10;
|
2169 |
|
|
if (t->sym) {
|
2170 |
|
|
if (t->sym->link==REGISTER) m+=0xf;
|
2171 |
|
|
else if (t->sym->link==DEFINED)
|
2172 |
|
|
@sym->equiv|@>@;
|
2173 |
|
|
else if (t->sym->link || t->sym->serial==1) @;
|
2174 |
|
|
}
|
2175 |
|
|
mmo_byte(m);
|
2176 |
|
|
if (t->left) out_stab(t->left);
|
2177 |
|
|
if (m&0x2f) @mid|@>;
|
2178 |
|
|
if (t->right) out_stab(t->right);
|
2179 |
|
|
}
|
2180 |
|
|
|
2181 |
|
|
@ A global variable called |sym_buf| holds all characters on middle branches to
|
2182 |
|
|
the current trie node; |sym_ptr| is the first currently unused
|
2183 |
|
|
character in |sym_buf|.
|
2184 |
|
|
@^Unicode@>
|
2185 |
|
|
|
2186 |
|
|
@mid|@>=
|
2187 |
|
|
{
|
2188 |
|
|
if (m&0x80) mmo_byte(t->ch>>8);
|
2189 |
|
|
mmo_byte(t->ch&0xff);
|
2190 |
|
|
*sym_ptr++=(m&0x80? '?': t->ch); /* Unicode? not yet */
|
2191 |
|
|
m&=0xf;@+ if (m && t->sym->link) {
|
2192 |
|
|
if (listing_file) @;
|
2193 |
|
|
if (m==15) m=1;
|
2194 |
|
|
else if (m>8) m-=8;
|
2195 |
|
|
for (;m>0;m--)
|
2196 |
|
|
if (m>4) mmo_byte((t->sym->equiv.h>>(8*(m-5)))&0xff);
|
2197 |
|
|
else mmo_byte((t->sym->equiv.l>>(8*(m-1)))&0xff);
|
2198 |
|
|
for (m=0;m<4;m++) if (t->sym->serial<(1<<(7*(m+1)))) break;
|
2199 |
|
|
for (;m>=0;m--)
|
2200 |
|
|
mmo_byte(((t->sym->serial>>(7*m))&0x7f)+(m? 0: 0x80));
|
2201 |
|
|
}
|
2202 |
|
|
if (t->mid) out_stab(t->mid);
|
2203 |
|
|
sym_ptr--;
|
2204 |
|
|
}
|
2205 |
|
|
|
2206 |
|
|
@ @sym->equiv|@>=
|
2207 |
|
|
{@+register tetra x;
|
2208 |
|
|
if ((t->sym->equiv.h&0xffff0000)==0x20000000)
|
2209 |
|
|
m+=8, x=t->sym->equiv.h-0x20000000; /* data segment */
|
2210 |
|
|
else x=t->sym->equiv.h;
|
2211 |
|
|
if (x) m+=4;@+ else x=t->sym->equiv.l;
|
2212 |
|
|
for (j=1;j<4;j++) if (x<(1<<(8*j))) break;
|
2213 |
|
|
m+=j;
|
2214 |
|
|
}
|
2215 |
|
|
|
2216 |
|
|
@ We make room for symbols up to 999 bytes long. Strictly speaking,
|
2217 |
|
|
the program should check if this limit is exceeded; but really!
|
2218 |
|
|
|
2219 |
|
|
@=
|
2220 |
|
|
Char sym_buf[1000];
|
2221 |
|
|
Char *sym_ptr;
|
2222 |
|
|
|
2223 |
|
|
@ The initial `\.:' of each fully qualified symbol is omitted here, since most
|
2224 |
|
|
users of \MMIXAL\ will probably not need the \.{PREFIX} feature. One
|
2225 |
|
|
consequence of this omission is that the one-character symbol~`\.:'
|
2226 |
|
|
itself, which is allowed by the rules of \MMIXAL, is printed as the null
|
2227 |
|
|
string.
|
2228 |
|
|
|
2229 |
|
|
@=
|
2230 |
|
|
{
|
2231 |
|
|
*sym_ptr='\0';
|
2232 |
|
|
fprintf(listing_file," %s = ",sym_buf+1);
|
2233 |
|
|
pp=t->sym;
|
2234 |
|
|
if (pp->link==DEFINED)
|
2235 |
|
|
fprintf(listing_file,"#%08x%08x",pp->equiv.h,pp->equiv.l);
|
2236 |
|
|
else if (pp->link==REGISTER)
|
2237 |
|
|
fprintf(listing_file,"$%03d",pp->equiv.l);
|
2238 |
|
|
else fprintf(listing_file,"?");
|
2239 |
|
|
fprintf(listing_file," (%d)\n",pp->serial);
|
2240 |
|
|
}
|
2241 |
|
|
|
2242 |
|
|
@ @=
|
2243 |
|
|
{
|
2244 |
|
|
*sym_ptr=(m&0x80? '?': t->ch); /* Unicode? not yet */
|
2245 |
|
|
*(sym_ptr+1)='\0';
|
2246 |
|
|
fprintf(stderr,"undefined symbol: %s\n",sym_buf+1);
|
2247 |
|
|
@.undefined symbol@>
|
2248 |
|
|
err_count++;
|
2249 |
|
|
m+=2;
|
2250 |
|
|
}
|
2251 |
|
|
|
2252 |
|
|
@ @=
|
2253 |
|
|
op_root->mid=NULL; /* annihilate all the opcodes */
|
2254 |
|
|
prune(trie_root);
|
2255 |
|
|
sym_ptr=sym_buf;
|
2256 |
|
|
if (listing_file) fprintf(listing_file,"\nSymbol table:\n");
|
2257 |
|
|
mmo_lop(lop_stab,0,0);
|
2258 |
|
|
out_stab(trie_root);
|
2259 |
|
|
while (mmo_ptr&3) mmo_byte(0);
|
2260 |
|
|
mmo_lopp(lop_end,mmo_ptr>>2);
|
2261 |
|
|
|
2262 |
|
|
@* Expressions. The most intricate part of the assembly process is
|
2263 |
|
|
the task of scanning and evaluating expressions in the operand field.
|
2264 |
|
|
Fortunately, \MMIXAL's expressions have a simple structure that can
|
2265 |
|
|
be handled easily with a stack-based approach.
|
2266 |
|
|
|
2267 |
|
|
Two stacks hold pending data as the operand field is scanned and evaluated.
|
2268 |
|
|
The |op_stack| contains operators that have not yet been performed; the
|
2269 |
|
|
|val_stack| contains values that have not yet been used. After an entire
|
2270 |
|
|
operand list has been scanned, the |op_stack| will be empty and the
|
2271 |
|
|
|val_stack| will hold the operand values needed to assemble the current
|
2272 |
|
|
instruction.
|
2273 |
|
|
|
2274 |
|
|
@ Entries on |op_stack| have one of the constant values defined here, and they
|
2275 |
|
|
have one of the precedence levels defined here.
|
2276 |
|
|
|
2277 |
|
|
Entries on |val_stack| have |equiv|, |link|, and |status| fields; the |link|
|
2278 |
|
|
points to a trie node if the expression is a symbol that has not yet
|
2279 |
|
|
been subjected to any operations.
|
2280 |
|
|
|
2281 |
|
|
@=
|
2282 |
|
|
typedef enum {@!negate,@!serialize,@!complement,@!registerize,@!inner_lp,@|
|
2283 |
|
|
@!plus,@!minus,@!times,@!over,@!frac,@!mod,@!shl,@!shr,@!and,@!or,@!xor,@|
|
2284 |
|
|
@!outer_lp,@!outer_rp,@!inner_rp} @!stack_op;
|
2285 |
|
|
typedef enum {@!zero,@!weak,@!strong,@!unary} @!prec;
|
2286 |
|
|
typedef enum {@!pure,@!reg_val,@!undefined} @!stat;
|
2287 |
|
|
typedef struct {
|
2288 |
|
|
octa equiv; /* current value */
|
2289 |
|
|
trie_node *link; /* trie reference for symbol */
|
2290 |
|
|
stat status; /* |pure|, |reg_val|, or |undefined| */
|
2291 |
|
|
} val_node;
|
2292 |
|
|
|
2293 |
|
|
@ @d top_op op_stack[op_ptr-1] /* top entry on the operator stack */
|
2294 |
|
|
@d top_val val_stack[val_ptr-1] /* top entry on the value stack */
|
2295 |
|
|
@d next_val val_stack[val_ptr-2] /* next-to-top entry of the value stack */
|
2296 |
|
|
|
2297 |
|
|
@=
|
2298 |
|
|
stack_op *op_stack; /* stack for pending operators */
|
2299 |
|
|
int op_ptr; /* number of items on |op_stack| */
|
2300 |
|
|
val_node *val_stack; /* stack for pending operands */
|
2301 |
|
|
int val_ptr; /* number of items on |val_stack| */
|
2302 |
|
|
prec precedence[]={unary,unary,unary,unary,zero,@|
|
2303 |
|
|
weak,weak,strong,strong,strong,strong,strong,strong,strong,weak,weak,@|
|
2304 |
|
|
zero,zero,zero}; /* precedences of the respective |stack_op| values */
|
2305 |
|
|
stack_op rt_op; /* newly scanned operator */
|
2306 |
|
|
octa acc; /* temporary accumulator */
|
2307 |
|
|
|
2308 |
|
|
@ @=
|
2309 |
|
|
op_stack=(stack_op*)calloc(buf_size,sizeof(stack_op));
|
2310 |
|
|
val_stack=(val_node*)calloc(buf_size,sizeof(val_node));
|
2311 |
|
|
if (!op_stack || !val_stack) panic("No room for the stacks");
|
2312 |
|
|
@.No room...@>
|
2313 |
|
|
|
2314 |
|
|
@ The operand field of an instruction will have been copied into a separate
|
2315 |
|
|
\&{Char} array called |operand_list| when we reach this part of the program.
|
2316 |
|
|
|
2317 |
|
|
@=
|
2318 |
|
|
p=operand_list;
|
2319 |
|
|
val_ptr=0; /* |val_stack| is empty */
|
2320 |
|
|
op_stack[0]=outer_lp, op_ptr=1;
|
2321 |
|
|
/* |op_stack| contains an ``outer left parenthesis'' */
|
2322 |
|
|
while (1) {
|
2323 |
|
|
@;
|
2324 |
|
|
scan_close: @;
|
2325 |
|
|
while (precedence[top_op]>=precedence[rt_op])
|
2326 |
|
|
@;
|
2327 |
|
|
hold_op: op_stack[op_ptr++]=rt_op;
|
2328 |
|
|
}
|
2329 |
|
|
operands_done:@;
|
2330 |
|
|
|
2331 |
|
|
@ A comment that follows an empty operand list needs to be detected here.
|
2332 |
|
|
|
2333 |
|
|
@=
|
2334 |
|
|
scan_open:@+if (isletter(*p)) @@;
|
2335 |
|
|
else if (isdigit(*p)) {
|
2336 |
|
|
if (*(p+1)=='F') @@;
|
2337 |
|
|
else if (*(p+1)=='B') @@;
|
2338 |
|
|
else @;
|
2339 |
|
|
}@+else@+ switch(*p++) {
|
2340 |
|
|
case '#': @;@+break;
|
2341 |
|
|
case '\'': @;@+break;
|
2342 |
|
|
case '\"': @;@+break;
|
2343 |
|
|
case '@@': @;@+break;
|
2344 |
|
|
case '-': op_stack[op_ptr++]=negate;
|
2345 |
|
|
case '+': goto scan_open;
|
2346 |
|
|
case '&': op_stack[op_ptr++]=serialize;@+goto scan_open;
|
2347 |
|
|
case '~': op_stack[op_ptr++]=complement;@+goto scan_open;
|
2348 |
|
|
case '$': op_stack[op_ptr++]=registerize;@+goto scan_open;
|
2349 |
|
|
case '(': op_stack[op_ptr++]=inner_lp;@+goto scan_open;
|
2350 |
|
|
default: if (p==operand_list+1) { /* treat operand list as empty */
|
2351 |
|
|
operand_list[0]='0', operand_list[1]='\0', p=operand_list;
|
2352 |
|
|
goto scan_open;
|
2353 |
|
|
}
|
2354 |
|
|
if (*(p-1)) derr("syntax error at character `%c'",*(p-1));
|
2355 |
|
|
derr("syntax error after character `%c'",*(p-2));
|
2356 |
|
|
@.syntax error...@>
|
2357 |
|
|
}
|
2358 |
|
|
|
2359 |
|
|
@ @=
|
2360 |
|
|
{
|
2361 |
|
|
if (*p==':') tt=trie_search(trie_root,p+1);
|
2362 |
|
|
else tt=trie_search(cur_prefix,p);
|
2363 |
|
|
p=terminator;
|
2364 |
|
|
symbol_found: val_ptr++;
|
2365 |
|
|
pp=tt->sym;
|
2366 |
|
|
if (!pp) pp=tt->sym=new_sym_node(true);
|
2367 |
|
|
top_val.link=tt, top_val.equiv=pp->equiv;
|
2368 |
|
|
if (pp->link==PREDEFINED) pp->link=DEFINED;
|
2369 |
|
|
top_val.status=(pp->link==DEFINED? pure: pp->link==REGISTER? reg_val:
|
2370 |
|
|
undefined);
|
2371 |
|
|
}
|
2372 |
|
|
|
2373 |
|
|
@ @=
|
2374 |
|
|
{
|
2375 |
|
|
tt=&forward_local_host[*p-'0'];@+ p+=2;@+ goto symbol_found;
|
2376 |
|
|
}
|
2377 |
|
|
|
2378 |
|
|
@ @=
|
2379 |
|
|
{
|
2380 |
|
|
tt=&backward_local_host[*p-'0'];@+ p+=2;@+ goto symbol_found;
|
2381 |
|
|
}
|
2382 |
|
|
|
2383 |
|
|
@ Statically allocated variables |forward_local_host[j]| and
|
2384 |
|
|
|backward_local_host[j]| masquerade as nodes of the trie.
|
2385 |
|
|
|
2386 |
|
|
@=
|
2387 |
|
|
trie_node forward_local_host[10], backward_local_host[10];
|
2388 |
|
|
sym_node forward_local[10], backward_local[10];
|
2389 |
|
|
|
2390 |
|
|
@ Initially \.{0H}, \.{1H}, \dots, \.{9H} are defined to be zero.
|
2391 |
|
|
|
2392 |
|
|
@=
|
2393 |
|
|
for (j=0;j<10;j++) {
|
2394 |
|
|
forward_local_host[j].sym=&forward_local[j];
|
2395 |
|
|
backward_local_host[j].sym=&backward_local[j];
|
2396 |
|
|
backward_local[j].link=DEFINED;
|
2397 |
|
|
}
|
2398 |
|
|
|
2399 |
|
|
@ We have already checked to make sure that the character constant is legal.
|
2400 |
|
|
|
2401 |
|
|
@=
|
2402 |
|
|
acc.h=0, acc.l=*p;
|
2403 |
|
|
p+=2;
|
2404 |
|
|
goto constant_found;
|
2405 |
|
|
|
2406 |
|
|
@ @=
|
2407 |
|
|
acc.h=0, acc.l=*p;
|
2408 |
|
|
if (*p=='\"') {
|
2409 |
|
|
p++; acc.l=0; err("*null string is treated as zero");
|
2410 |
|
|
@.null string...@>
|
2411 |
|
|
}@+else if (*(p+1)=='\"') p+=2;
|
2412 |
|
|
else *p='\"', *--p=',';
|
2413 |
|
|
goto constant_found;
|
2414 |
|
|
|
2415 |
|
|
@ @=
|
2416 |
|
|
acc.h=0, acc.l=*p-'0';
|
2417 |
|
|
for (p++;isdigit(*p);p++) {
|
2418 |
|
|
acc=oplus(acc,shift_left(acc,2));
|
2419 |
|
|
acc=incr(shift_left(acc,1),*p-'0');
|
2420 |
|
|
}
|
2421 |
|
|
constant_found: val_ptr++;
|
2422 |
|
|
top_val.link=NULL;
|
2423 |
|
|
top_val.equiv=acc;
|
2424 |
|
|
top_val.status=pure;
|
2425 |
|
|
|
2426 |
|
|
@ @=
|
2427 |
|
|
if (!isxdigit(*p)) err("illegal hexadecimal constant");
|
2428 |
|
|
@.illegal hexadecimal constant@>
|
2429 |
|
|
acc.h=acc.l=0;
|
2430 |
|
|
for (;isxdigit(*p);p++) {
|
2431 |
|
|
acc=incr(shift_left(acc,4),*p-'0');
|
2432 |
|
|
if (*p>='a') acc=incr(acc,'0'-'a'+10);
|
2433 |
|
|
else if (*p>='A') acc=incr(acc,'0'-'A'+10);
|
2434 |
|
|
}
|
2435 |
|
|
goto constant_found;
|
2436 |
|
|
|
2437 |
|
|
@ @=
|
2438 |
|
|
acc=cur_loc;
|
2439 |
|
|
goto constant_found;
|
2440 |
|
|
|
2441 |
|
|
@ @=
|
2442 |
|
|
switch(*p++) {
|
2443 |
|
|
case '+': rt_op=plus;@+break;
|
2444 |
|
|
case '-': rt_op=minus;@+break;
|
2445 |
|
|
case '*': rt_op=times;@+break;
|
2446 |
|
|
case '/':@+if (*p!='/') rt_op=over;
|
2447 |
|
|
else p++,rt_op=frac;@+break;
|
2448 |
|
|
case '%': rt_op=mod;@+break;
|
2449 |
|
|
case '<': rt_op=shl;@+goto sh_check;
|
2450 |
|
|
case '>': rt_op=shr;
|
2451 |
|
|
sh_check:@+if (*p++==*(p-1)) break;
|
2452 |
|
|
derr("syntax error at `%c'",*(p-2));
|
2453 |
|
|
@.syntax error...@>
|
2454 |
|
|
case '&': rt_op=and;@+break;
|
2455 |
|
|
case '|': rt_op=or;@+break;
|
2456 |
|
|
case '^': rt_op=xor;@+break;
|
2457 |
|
|
case ')': rt_op=inner_rp;@+break;
|
2458 |
|
|
case '\0': case ',': rt_op=outer_rp;@+break;
|
2459 |
|
|
default: derr("syntax error at `%c'",*(p-1));
|
2460 |
|
|
}
|
2461 |
|
|
|
2462 |
|
|
@ @=
|
2463 |
|
|
switch(op_stack[--op_ptr]) {
|
2464 |
|
|
case inner_lp:@+if (rt_op==inner_rp) goto scan_close;
|
2465 |
|
|
err("*missing right parenthesis");@+break;
|
2466 |
|
|
@.missing right parenthesis@>
|
2467 |
|
|
case outer_lp:@+if (rt_op==outer_rp) {
|
2468 |
|
|
if (top_val.status==reg_val && (top_val.equiv.l>0xff||top_val.equiv.h)) {
|
2469 |
|
|
err("*register number too large, will be reduced mod 256");
|
2470 |
|
|
@.register number...@>
|
2471 |
|
|
top_val.equiv.h=0, top_val.equiv.l &= 0xff;
|
2472 |
|
|
}
|
2473 |
|
|
if (!*(p-1)) goto operands_done;
|
2474 |
|
|
else rt_op=outer_lp;@+goto hold_op; /* comma */
|
2475 |
|
|
}@+else {
|
2476 |
|
|
op_ptr++;
|
2477 |
|
|
err("*missing left parenthesis");
|
2478 |
|
|
@.missing left parenthesis@>
|
2479 |
|
|
goto scan_close;
|
2480 |
|
|
}
|
2481 |
|
|
@t\4@>@@;
|
2482 |
|
|
@t\4@>@@;
|
2483 |
|
|
}
|
2484 |
|
|
|
2485 |
|
|
@ Now we come to the part where equivalents are changed by unary
|
2486 |
|
|
or binary operators found in the expression being scanned.
|
2487 |
|
|
|
2488 |
|
|
The most typical operator, and in some ways the fussiest one
|
2489 |
|
|
to deal with, is binary addition. Once we've written the code for
|
2490 |
|
|
this case, the other cases almost take care of themselves.
|
2491 |
|
|
|
2492 |
|
|
@=
|
2493 |
|
|
case plus:@+if (top_val.status==undefined)
|
2494 |
|
|
err("cannot add an undefined quantity");
|
2495 |
|
|
@.cannot add...@>
|
2496 |
|
|
if (next_val.status==undefined)
|
2497 |
|
|
err("cannot add to an undefined quantity");
|
2498 |
|
|
if (top_val.status==reg_val && next_val.status==reg_val)
|
2499 |
|
|
err("cannot add two register numbers");
|
2500 |
|
|
next_val.equiv=oplus(next_val.equiv,top_val.equiv);
|
2501 |
|
|
fin_bin: next_val.status=(top_val.status==next_val.status? pure: reg_val);
|
2502 |
|
|
val_ptr--;
|
2503 |
|
|
delink: top_val.link=NULL;@+break;
|
2504 |
|
|
|
2505 |
|
|
@ @d unary_check(verb) if (top_val.status!=pure)
|
2506 |
|
|
derr("can %s pure values only",verb)
|
2507 |
|
|
|
2508 |
|
|
@=
|
2509 |
|
|
case negate: unary_check("negate");
|
2510 |
|
|
@.can negate...@>
|
2511 |
|
|
top_val.equiv=ominus(zero_octa,top_val.equiv);@+goto delink;
|
2512 |
|
|
case complement: unary_check("complement");
|
2513 |
|
|
@.can complement...@>
|
2514 |
|
|
top_val.equiv.h=~top_val.equiv.h, top_val.equiv.l=~top_val.equiv.l;
|
2515 |
|
|
goto delink;
|
2516 |
|
|
case registerize: unary_check("registerize");
|
2517 |
|
|
@.can registerize...@>
|
2518 |
|
|
top_val.status=reg_val;@+goto delink;
|
2519 |
|
|
case serialize:@+if (!top_val.link)
|
2520 |
|
|
err("can take serial number of symbol only");
|
2521 |
|
|
@.can take serial number...@>
|
2522 |
|
|
top_val.equiv.h=0, top_val.equiv.l=top_val.link->sym->serial;
|
2523 |
|
|
top_val.status=pure;@+goto delink;
|
2524 |
|
|
|
2525 |
|
|
@ @d binary_check(verb)
|
2526 |
|
|
if (top_val.status!=pure || next_val.status!=pure)
|
2527 |
|
|
derr("can %s pure values only",verb)
|
2528 |
|
|
|
2529 |
|
|
@=
|
2530 |
|
|
case minus:@+if (top_val.status==undefined)
|
2531 |
|
|
err("cannot subtract an undefined quantity");
|
2532 |
|
|
@.cannot subtract...@>
|
2533 |
|
|
if (next_val.status==undefined)
|
2534 |
|
|
err("cannot subtract from an undefined quantity");
|
2535 |
|
|
if (top_val.status==reg_val && next_val.status!=reg_val)
|
2536 |
|
|
err("cannot subtract register number from pure value");
|
2537 |
|
|
next_val.equiv=ominus(next_val.equiv,top_val.equiv);@+goto fin_bin;
|
2538 |
|
|
case times: binary_check("multiply");
|
2539 |
|
|
@.can multiply...@>
|
2540 |
|
|
next_val.equiv=omult(next_val.equiv,top_val.equiv);@+goto fin_bin;
|
2541 |
|
|
case over: case mod: binary_check("divide");
|
2542 |
|
|
@.can divide...@>
|
2543 |
|
|
if (top_val.equiv.l==0 && top_val.equiv.h==0)
|
2544 |
|
|
err("*division by zero");
|
2545 |
|
|
@.division by zero@>
|
2546 |
|
|
next_val.equiv=odiv(zero_octa,next_val.equiv,top_val.equiv);
|
2547 |
|
|
if (op_stack[op_ptr]==mod) next_val.equiv=aux;
|
2548 |
|
|
goto fin_bin;
|
2549 |
|
|
case frac: binary_check("compute a ratio of");
|
2550 |
|
|
@.can compute...@>
|
2551 |
|
|
if (next_val.equiv.h>=top_val.equiv.h &&
|
2552 |
|
|
(next_val.equiv.l>=top_val.equiv.l || next_val.equiv.h>top_val.equiv.h))
|
2553 |
|
|
err("*illegal fraction");
|
2554 |
|
|
@.illegal fraction@>
|
2555 |
|
|
next_val.equiv=odiv(next_val.equiv,zero_octa,top_val.equiv);@+goto fin_bin;
|
2556 |
|
|
case shl: case shr: binary_check("compute a bitwise shift of");
|
2557 |
|
|
if (top_val.equiv.h || top_val.equiv.l>63) next_val.equiv=zero_octa;
|
2558 |
|
|
else if (op_stack[op_ptr]==shl)
|
2559 |
|
|
next_val.equiv=shift_left(next_val.equiv,top_val.equiv.l);
|
2560 |
|
|
else next_val.equiv=shift_right(next_val.equiv,top_val.equiv.l,true);
|
2561 |
|
|
goto fin_bin;
|
2562 |
|
|
case and: binary_check("compute bitwise and of");
|
2563 |
|
|
next_val.equiv.h&=top_val.equiv.h, next_val.equiv.l&=top_val.equiv.l;
|
2564 |
|
|
goto fin_bin;
|
2565 |
|
|
case or: binary_check("compute bitwise or of");
|
2566 |
|
|
next_val.equiv.h|=top_val.equiv.h, next_val.equiv.l|=top_val.equiv.l;
|
2567 |
|
|
goto fin_bin;
|
2568 |
|
|
case xor: binary_check("compute bitwise xor of");
|
2569 |
|
|
next_val.equiv.h^=top_val.equiv.h, next_val.equiv.l^=top_val.equiv.l;
|
2570 |
|
|
goto fin_bin;
|
2571 |
|
|
|
2572 |
|
|
@* Assembling an instruction.
|
2573 |
|
|
Now let's move up from the expression level to the instruction level. We get to
|
2574 |
|
|
this part of the program at the beginning of a line, or after a
|
2575 |
|
|
semicolon at the end of an instruction earlier on the current line.
|
2576 |
|
|
Our current position in the buffer is the value of |buf_ptr|.
|
2577 |
|
|
|
2578 |
|
|
@=
|
2579 |
|
|
p=buf_ptr;@+ buf_ptr="";
|
2580 |
|
|
@;
|
2581 |
|
|
@;
|
2582 |
|
|
@;
|
2583 |
|
|
buf_ptr=p;
|
2584 |
|
|
if (spec_mode && !(op_bits&spec_bit))
|
2585 |
|
|
derr("cannot use `%s' in special mode",op_field);
|
2586 |
|
|
@.cannot use...@>
|
2587 |
|
|
if ((op_bits&no_label_bit) && lab_field[0]) {
|
2588 |
|
|
derr("*label field of `%s' instruction is ignored",op_field);
|
2589 |
|
|
lab_field[0]='\0';
|
2590 |
|
|
}
|
2591 |
|
|
@.label field...ignored@>
|
2592 |
|
|
if (op_bits&align_bits) @;
|
2593 |
|
|
@;
|
2594 |
|
|
if (opcode==GREG) @;
|
2595 |
|
|
if (lab_field[0]) @;
|
2596 |
|
|
@;
|
2597 |
|
|
bypass:@;
|
2598 |
|
|
|
2599 |
|
|
@ @=
|
2600 |
|
|
if (!*p) goto bypass;
|
2601 |
|
|
q=lab_field;
|
2602 |
|
|
if (!isspace(*p)) {
|
2603 |
|
|
if (!isdigit(*p)&&!isletter(*p)) goto bypass; /* comment */
|
2604 |
|
|
for (*q++=*p++;isdigit(*p)||isletter(*p);p++,q++) *q=*p;
|
2605 |
|
|
if (*p && !isspace(*p)) derr("label syntax error at `%c'",*p);
|
2606 |
|
|
@.label syntax error...@>
|
2607 |
|
|
}
|
2608 |
|
|
*q='\0';
|
2609 |
|
|
if (isdigit(lab_field[0]) && (lab_field[1]!='H' || lab_field[2]))
|
2610 |
|
|
derr("improper local label `%s'",lab_field);
|
2611 |
|
|
@.improper local label...@>
|
2612 |
|
|
for (p++;isspace(*p);p++);
|
2613 |
|
|
|
2614 |
|
|
@ We copy the opcode field to a special buffer because we might
|
2615 |
|
|
want to refer to the symbolic opcode in error messages.
|
2616 |
|
|
|
2617 |
|
|
@=
|
2618 |
|
|
q=op_field;@+
|
2619 |
|
|
while (isletter(*p)||isdigit(*p)) *q++=*p++;
|
2620 |
|
|
*q='\0';
|
2621 |
|
|
if (!isspace(*p) && *p && op_field[0]) derr("opcode syntax error at `%c'",*p);
|
2622 |
|
|
@.opcode syntax error...@>
|
2623 |
|
|
pp=trie_search(op_root,op_field)->sym;
|
2624 |
|
|
if (!pp) {
|
2625 |
|
|
if (op_field[0]) derr("unknown operation code `%s'",op_field);
|
2626 |
|
|
@.unknown operation code@>
|
2627 |
|
|
if (lab_field[0]) derr("*no opcode; label `%s' will be ignored",lab_field);
|
2628 |
|
|
@.no opcode...@>
|
2629 |
|
|
goto bypass;
|
2630 |
|
|
}
|
2631 |
|
|
opcode=pp->equiv.h, op_bits=pp->equiv.l;
|
2632 |
|
|
while (isspace(*p)) p++;
|
2633 |
|
|
|
2634 |
|
|
@ @=
|
2635 |
|
|
tetra opcode; /* numeric code for \MMIX\ operation or \MMIXAL\ pseudo-op */
|
2636 |
|
|
tetra op_bits; /* flags describing an operator's special characteristics */
|
2637 |
|
|
|
2638 |
|
|
@ We copy the operand field to a special buffer so that we can
|
2639 |
|
|
change string constants while scanning them later.
|
2640 |
|
|
|
2641 |
|
|
@=
|
2642 |
|
|
q=operand_list;
|
2643 |
|
|
while (*p) {
|
2644 |
|
|
if (*p==';') break;
|
2645 |
|
|
if (*p=='\'') {
|
2646 |
|
|
*q++=*p++;
|
2647 |
|
|
if (!*p) err("incomplete character constant");
|
2648 |
|
|
@.incomplete...constant@>
|
2649 |
|
|
*q++=*p++;
|
2650 |
|
|
if (*p!='\'') err("illegal character constant");
|
2651 |
|
|
@.illegal character constant@>
|
2652 |
|
|
}@+else if (*p=='\"') {
|
2653 |
|
|
for (*q++=*p++;*p && *p!='\"';p++,q++) *q=*p;
|
2654 |
|
|
if (!*p) err("incomplete string constant");
|
2655 |
|
|
}
|
2656 |
|
|
*q++=*p++;
|
2657 |
|
|
if (isspace(*p)) break;
|
2658 |
|
|
}
|
2659 |
|
|
while (isspace(*p)) p++;
|
2660 |
|
|
if (*p==';') p++;
|
2661 |
|
|
else p=""; /* if not followed by semicolon, rest of the line is a comment */
|
2662 |
|
|
if (q==operand_list) *q++='0'; /* change empty operand field to `\.0' */
|
2663 |
|
|
*q='\0';
|
2664 |
|
|
|
2665 |
|
|
@ It is important to do the alignment in this step before defining
|
2666 |
|
|
the label or evaluating the operand field.
|
2667 |
|
|
|
2668 |
|
|
@=
|
2669 |
|
|
{
|
2670 |
|
|
j=(op_bits&align_bits)>>16;
|
2671 |
|
|
acc.h=-1, acc.l=-(1<
|
2672 |
|
|
cur_loc=oand(incr(cur_loc,(1<
|
2673 |
|
|
}
|
2674 |
|
|
|
2675 |
|
|
@ @=
|
2676 |
|
|
{
|
2677 |
|
|
if (val_stack[0].equiv.l || val_stack[0].equiv.h) {
|
2678 |
|
|
for (j=greg;j<255;j++)
|
2679 |
|
|
if (greg_val[j].l==val_stack[0].equiv.l &&
|
2680 |
|
|
greg_val[j].h==val_stack[0].equiv.h) {
|
2681 |
|
|
cur_greg=j; goto got_greg;
|
2682 |
|
|
}
|
2683 |
|
|
}
|
2684 |
|
|
if (greg==32) err("too many global registers");
|
2685 |
|
|
@.too many global registers@>
|
2686 |
|
|
greg--;
|
2687 |
|
|
greg_val[greg]=val_stack[0].equiv;@+ cur_greg=greg;
|
2688 |
|
|
got_greg:;
|
2689 |
|
|
}
|
2690 |
|
|
|
2691 |
|
|
@ If the label is, say \.{2H}, we will already have used the old
|
2692 |
|
|
value of \.{2B} when evaluating the operands. Furthermore, an
|
2693 |
|
|
operand of \.{2F} will have been treated as undefined, which it
|
2694 |
|
|
still is.
|
2695 |
|
|
|
2696 |
|
|
Symbols can be defined more than once, but only if each definition
|
2697 |
|
|
gives them the same equivalent value.
|
2698 |
|
|
|
2699 |
|
|
A warning message is given when a predefined symbol is being redefined,
|
2700 |
|
|
if its predefined value has already been used.
|
2701 |
|
|
|
2702 |
|
|
@=
|
2703 |
|
|
{
|
2704 |
|
|
sym_node *new_link=DEFINED;
|
2705 |
|
|
acc=cur_loc;
|
2706 |
|
|
if (opcode==IS) {
|
2707 |
|
|
cur_loc=val_stack[0].equiv;
|
2708 |
|
|
if (val_stack[0].status==reg_val) new_link=REGISTER;
|
2709 |
|
|
}@+else if (opcode==GREG) cur_loc.h=0, cur_loc.l=cur_greg, new_link=REGISTER;
|
2710 |
|
|
@;
|
2711 |
|
|
if (pp->link==DEFINED || pp->link==REGISTER) {
|
2712 |
|
|
if (pp->equiv.l!=cur_loc.l||pp->equiv.h!=cur_loc.h || pp->link!=new_link) {
|
2713 |
|
|
if (pp->serial) derr("symbol `%s' is already defined",lab_field);
|
2714 |
|
|
@.symbol...already defined@>
|
2715 |
|
|
pp->serial=++serial_number;
|
2716 |
|
|
derr("*redefinition of predefined symbol `%s'",lab_field);
|
2717 |
|
|
@.redefinition...@>
|
2718 |
|
|
}
|
2719 |
|
|
}@+ else if (pp->link==PREDEFINED) pp->serial=++serial_number;
|
2720 |
|
|
else if (pp->link) {
|
2721 |
|
|
if (new_link==REGISTER) err("future reference cannot be to a register");
|
2722 |
|
|
@.future reference cannot...@>
|
2723 |
|
|
do @@;@+while (pp->link);
|
2724 |
|
|
}
|
2725 |
|
|
if (isdigit(lab_field[0])) pp=&backward_local[lab_field[0]-'0'];
|
2726 |
|
|
pp->equiv=cur_loc;@+ pp->link=new_link;
|
2727 |
|
|
@;
|
2728 |
|
|
if (listing_file && (opcode==IS || opcode==LOC))
|
2729 |
|
|
@;
|
2730 |
|
|
cur_loc=acc;
|
2731 |
|
|
}
|
2732 |
|
|
|
2733 |
|
|
@ @=
|
2734 |
|
|
if (!isdigit(lab_field[0]))
|
2735 |
|
|
for (j=0;j
|
2736 |
|
|
if (val_stack[j].status==undefined && val_stack[j].link->sym==pp) {
|
2737 |
|
|
val_stack[j].status=(new_link==REGISTER? reg_val: pure);
|
2738 |
|
|
val_stack[j].equiv=cur_loc;
|
2739 |
|
|
}
|
2740 |
|
|
|
2741 |
|
|
@ @=
|
2742 |
|
|
if (isdigit(lab_field[0])) pp=&forward_local[lab_field[0]-'0'];
|
2743 |
|
|
else {
|
2744 |
|
|
if (lab_field[0]==':') tt=trie_search(trie_root,lab_field+1);
|
2745 |
|
|
else tt=trie_search(cur_prefix,lab_field);
|
2746 |
|
|
pp=tt->sym;
|
2747 |
|
|
if (!pp) pp=tt->sym=new_sym_node(true);
|
2748 |
|
|
}
|
2749 |
|
|
|
2750 |
|
|
@ @=
|
2751 |
|
|
{
|
2752 |
|
|
qq=pp->link;
|
2753 |
|
|
pp->link=qq->link;
|
2754 |
|
|
mmo_loc();
|
2755 |
|
|
if (qq->serial==fix_o) @@;
|
2756 |
|
|
else @;
|
2757 |
|
|
recycle_fixup(qq);
|
2758 |
|
|
}
|
2759 |
|
|
|
2760 |
|
|
@ @=
|
2761 |
|
|
{
|
2762 |
|
|
if (qq->equiv.h&0xffffff) {
|
2763 |
|
|
mmo_lop(lop_fixo,0,2);
|
2764 |
|
|
mmo_tetra(qq->equiv.h);
|
2765 |
|
|
}@+else mmo_lop(lop_fixo,qq->equiv.h>>24,1);
|
2766 |
|
|
mmo_tetra(qq->equiv.l);
|
2767 |
|
|
}
|
2768 |
|
|
|
2769 |
|
|
@ @=
|
2770 |
|
|
{
|
2771 |
|
|
octa o;
|
2772 |
|
|
o=ominus(cur_loc,qq->equiv);
|
2773 |
|
|
if (o.l&3)
|
2774 |
|
|
dderr("*relative address in location #%08x%08x not divisible by 4",
|
2775 |
|
|
@.relative address...@>
|
2776 |
|
|
qq->equiv.h,qq->equiv.l);
|
2777 |
|
|
o=shift_right(o,2,0);@+
|
2778 |
|
|
k=0;
|
2779 |
|
|
if (o.h==0)
|
2780 |
|
|
if (o.l<0x10000) mmo_lopp(lop_fixr,o.l);
|
2781 |
|
|
else if (qq->serial==fix_xyz && o.l<0x1000000) {
|
2782 |
|
|
mmo_lop(lop_fixrx,0,24);@+mmo_tetra(o.l);
|
2783 |
|
|
}@+else k=1;
|
2784 |
|
|
else if (o.h==0xffffffff)
|
2785 |
|
|
if (qq->serial==fix_xyz && o.l>=0xff000000) {
|
2786 |
|
|
mmo_lop(lop_fixrx,0,24);@+mmo_tetra(o.l&0x1ffffff);
|
2787 |
|
|
}@+else if (qq->serial==fix_yz && o.l>=0xffff0000) {
|
2788 |
|
|
mmo_lop(lop_fixrx,0,16);@+mmo_tetra(o.l&0x100ffff);
|
2789 |
|
|
}@+else k=1;
|
2790 |
|
|
else k=1;
|
2791 |
|
|
if (k) dderr("relative address in location #%08x%08x is too far away",
|
2792 |
|
|
qq->equiv.h,qq->equiv.l);
|
2793 |
|
|
}
|
2794 |
|
|
|
2795 |
|
|
@ @=
|
2796 |
|
|
if (new_link==DEFINED) {
|
2797 |
|
|
fprintf(listing_file,"(%08x%08x)",cur_loc.h,cur_loc.l);
|
2798 |
|
|
flush_listing_line(" ");
|
2799 |
|
|
}@+else {
|
2800 |
|
|
fprintf(listing_file,"($%03d)",cur_loc.l&0xff);
|
2801 |
|
|
flush_listing_line(" ");
|
2802 |
|
|
}
|
2803 |
|
|
|
2804 |
|
|
@ @=
|
2805 |
|
|
future_bits=0;
|
2806 |
|
|
if (op_bits&many_arg_bit) @@;
|
2807 |
|
|
else@+switch (val_ptr) {
|
2808 |
|
|
case 1:@+if (!(op_bits&one_arg_bit))
|
2809 |
|
|
derr("opcode `%s' needs more than one operand",op_field);
|
2810 |
|
|
@.opcode...operand(s)@>
|
2811 |
|
|
@;
|
2812 |
|
|
case 2:@+if (!(op_bits&two_arg_bit))
|
2813 |
|
|
if (op_bits&one_arg_bit)
|
2814 |
|
|
derr("opcode `%s' must not have two operands",op_field)@;
|
2815 |
|
|
else derr("opcode `%s' must have more than two operands",op_field);
|
2816 |
|
|
@;
|
2817 |
|
|
case 3:@+if (!(op_bits&three_arg_bit))
|
2818 |
|
|
derr("opcode `%s' must not have three operands",op_field);
|
2819 |
|
|
@;
|
2820 |
|
|
default: derr("too many operands for opcode `%s'",op_field);
|
2821 |
|
|
@.too many operands...@>
|
2822 |
|
|
}
|
2823 |
|
|
|
2824 |
|
|
@ The many-operand operators are |BYTE|, |WYDE|, |TETRA|, and |OCTA|.
|
2825 |
|
|
|
2826 |
|
|
@=
|
2827 |
|
|
for (j=0;j
|
2828 |
|
|
@;
|
2829 |
|
|
k=1<<(opcode-BYTE);
|
2830 |
|
|
if ((val_stack[j].equiv.h && opcode
|
2831 |
|
|
(val_stack[j].equiv.l>0xffff && opcode
|
2832 |
|
|
(val_stack[j].equiv.l>0xff && opcode
|
2833 |
|
|
if (k==1) err("*constant doesn't fit in one byte")@;
|
2834 |
|
|
@.constant doesn't fit...@>
|
2835 |
|
|
else derr("*constant doesn't fit in %d bytes",k);
|
2836 |
|
|
if (k<8) assemble(k,val_stack[j].equiv.l,0);
|
2837 |
|
|
else if (val_stack[j].status==undefined)
|
2838 |
|
|
assemble(4,0,0xf0), assemble(4,0,0xf0);
|
2839 |
|
|
else assemble(4,val_stack[j].equiv.h,0), assemble(4,val_stack[j].equiv.l,0);
|
2840 |
|
|
}
|
2841 |
|
|
|
2842 |
|
|
@ @=
|
2843 |
|
|
if (val_stack[j].status==reg_val)
|
2844 |
|
|
err("*register number used as a constant")@;
|
2845 |
|
|
@.register number...@>
|
2846 |
|
|
else if (val_stack[j].status==undefined) {
|
2847 |
|
|
if (opcode!=OCTA) err("undefined constant");
|
2848 |
|
|
@.undefined constant@>
|
2849 |
|
|
pp=val_stack[j].link->sym;
|
2850 |
|
|
qq=new_sym_node(false);
|
2851 |
|
|
qq->link=pp->link;
|
2852 |
|
|
pp->link=qq;
|
2853 |
|
|
qq->serial=fix_o;
|
2854 |
|
|
qq->equiv=cur_loc;
|
2855 |
|
|
}
|
2856 |
|
|
|
2857 |
|
|
@ @=
|
2858 |
|
|
@;
|
2859 |
|
|
@;
|
2860 |
|
|
assemble_X: @;
|
2861 |
|
|
assemble_inst: assemble(4,(opcode<<24)+xyz,future_bits);
|
2862 |
|
|
break;
|
2863 |
|
|
|
2864 |
|
|
@ Individual fields of an instruction are placed into
|
2865 |
|
|
global variables |z|, |y|, |x|, |yz|, and/or |xyz|.
|
2866 |
|
|
|
2867 |
|
|
@=
|
2868 |
|
|
tetra z,y,x,yz,xyz; /* pieces for assembly */
|
2869 |
|
|
int future_bits; /* places where there are future references */
|
2870 |
|
|
|
2871 |
|
|
@ @=
|
2872 |
|
|
if (val_stack[2].status==undefined) err("Z field is undefined");
|
2873 |
|
|
@.Z field is undefined@>
|
2874 |
|
|
if (val_stack[2].status==reg_val) {
|
2875 |
|
|
if (!(op_bits&(immed_bit+zr_bit+zar_bit)))
|
2876 |
|
|
derr("*Z field of `%s' should not be a register number",op_field);
|
2877 |
|
|
@.Z field...register number@>
|
2878 |
|
|
}@+ else if (op_bits&immed_bit) opcode++; /* immediate */
|
2879 |
|
|
else if (op_bits&zr_bit)
|
2880 |
|
|
derr("*Z field of `%s' should be a register number",op_field);
|
2881 |
|
|
if (val_stack[2].equiv.h || val_stack[2].equiv.l>0xff)
|
2882 |
|
|
err("*Z field doesn't fit in one byte");
|
2883 |
|
|
@.Z field doesn't fit...@>
|
2884 |
|
|
z=val_stack[2].equiv.l&0xff;
|
2885 |
|
|
|
2886 |
|
|
@ @=
|
2887 |
|
|
if (val_stack[1].status==undefined) err("Y field is undefined");
|
2888 |
|
|
@.Y field is undefined@>
|
2889 |
|
|
if (val_stack[1].status==reg_val) {
|
2890 |
|
|
if (!(op_bits&(yr_bit+yar_bit)))
|
2891 |
|
|
derr("*Y field of `%s' should not be a register number",op_field);
|
2892 |
|
|
@.Y field...register number@>
|
2893 |
|
|
}@+ else if (op_bits&yr_bit)
|
2894 |
|
|
derr("*Y field of `%s' should be a register number",op_field);
|
2895 |
|
|
if (val_stack[1].equiv.h || val_stack[1].equiv.l>0xff)
|
2896 |
|
|
err("*Y field doesn't fit in one byte");
|
2897 |
|
|
@.Y field doesn't fit...@>
|
2898 |
|
|
y=val_stack[1].equiv.l&0xff;@+
|
2899 |
|
|
yz=(y<<8)+z;
|
2900 |
|
|
|
2901 |
|
|
@ @=
|
2902 |
|
|
if (val_stack[0].status==undefined) err("X field is undefined");
|
2903 |
|
|
@.X field is undefined@>
|
2904 |
|
|
if (val_stack[0].status==reg_val) {
|
2905 |
|
|
if (!(op_bits&(xr_bit+xar_bit)))
|
2906 |
|
|
derr("*X field of `%s' should not be a register number",op_field);
|
2907 |
|
|
@.X field...register number@>
|
2908 |
|
|
}@+ else if (op_bits&xr_bit)
|
2909 |
|
|
derr("*X field of `%s' should be a register number",op_field);
|
2910 |
|
|
if (val_stack[0].equiv.h || val_stack[0].equiv.l>0xff)
|
2911 |
|
|
err("*X field doesn't fit in one byte");
|
2912 |
|
|
@.X field doesn't fit...@>
|
2913 |
|
|
x=val_stack[0].equiv.l&0xff;@+
|
2914 |
|
|
xyz=(x<<16)+yz;
|
2915 |
|
|
|
2916 |
|
|
@ @=
|
2917 |
|
|
if (val_stack[1].status==undefined) {
|
2918 |
|
|
if (op_bits&rel_addr_bit)
|
2919 |
|
|
@@;
|
2920 |
|
|
else err("YZ field is undefined");
|
2921 |
|
|
@.YZ field is undefined@>
|
2922 |
|
|
}@+else if (val_stack[1].status==reg_val) {
|
2923 |
|
|
if (!(op_bits&(immed_bit+yzr_bit+yzar_bit)))
|
2924 |
|
|
derr("*YZ field of `%s' should not be a register number",op_field);
|
2925 |
|
|
@.YZ field...register number@>
|
2926 |
|
|
if (opcode==SET) val_stack[1].equiv.l<<=8,opcode=0xc1; /* change to \.{OR} */
|
2927 |
|
|
else if (op_bits&mem_bit)
|
2928 |
|
|
val_stack[1].equiv.l<<=8,opcode++; /* silently append \.{,0} */
|
2929 |
|
|
}@+ else { /* |val_stack[1].status==pure| */
|
2930 |
|
|
if (op_bits&mem_bit)
|
2931 |
|
|
@;
|
2932 |
|
|
if (opcode==SET) opcode=0xe3; /* change to \.{SETL} */
|
2933 |
|
|
else if (op_bits&immed_bit) opcode++; /* immediate */
|
2934 |
|
|
else if (op_bits&yzr_bit) {
|
2935 |
|
|
derr("*YZ field of `%s' should be a register number",op_field);
|
2936 |
|
|
}
|
2937 |
|
|
if (op_bits&rel_addr_bit)
|
2938 |
|
|
@;
|
2939 |
|
|
}
|
2940 |
|
|
if (val_stack[1].equiv.h || val_stack[1].equiv.l>0xffff)
|
2941 |
|
|
err("*YZ field doesn't fit in two bytes");
|
2942 |
|
|
@.YZ field doesn't fit...@>
|
2943 |
|
|
yz=val_stack[1].equiv.l&0xffff;
|
2944 |
|
|
goto assemble_X;
|
2945 |
|
|
|
2946 |
|
|
@ @=
|
2947 |
|
|
{
|
2948 |
|
|
pp=val_stack[1].link->sym;
|
2949 |
|
|
qq=new_sym_node(false);
|
2950 |
|
|
qq->link=pp->link;
|
2951 |
|
|
pp->link=qq;
|
2952 |
|
|
qq->serial=fix_yz;
|
2953 |
|
|
qq->equiv=cur_loc;
|
2954 |
|
|
yz=0;
|
2955 |
|
|
future_bits=0xc0;
|
2956 |
|
|
goto assemble_X;
|
2957 |
|
|
}
|
2958 |
|
|
|
2959 |
|
|
@ @=
|
2960 |
|
|
{
|
2961 |
|
|
octa source, dest;
|
2962 |
|
|
if (val_stack[1].equiv.l&3)
|
2963 |
|
|
err("*relative address is not divisible by 4");
|
2964 |
|
|
@.relative address...@>
|
2965 |
|
|
source=shift_right(cur_loc,2,0);
|
2966 |
|
|
dest=shift_right(val_stack[1].equiv,2,0);
|
2967 |
|
|
acc=ominus(dest,source);
|
2968 |
|
|
if (!(acc.h&0x80000000)) {
|
2969 |
|
|
if (acc.l>0xffff || acc.h)
|
2970 |
|
|
err("relative address is more than #ffff tetrabytes forward");
|
2971 |
|
|
}@+else {
|
2972 |
|
|
acc=incr(acc,0x10000);
|
2973 |
|
|
opcode++;
|
2974 |
|
|
if (acc.l>0xffff || acc.h)
|
2975 |
|
|
err("relative address is more than #10000 tetrabytes backward");
|
2976 |
|
|
}
|
2977 |
|
|
yz=acc.l;
|
2978 |
|
|
goto assemble_X;
|
2979 |
|
|
}
|
2980 |
|
|
|
2981 |
|
|
@ @=
|
2982 |
|
|
{
|
2983 |
|
|
octa o;
|
2984 |
|
|
o=val_stack[1].equiv, k=0;
|
2985 |
|
|
for (j=greg;j<255;j++) if (greg_val[j].h || greg_val[j].l) {
|
2986 |
|
|
acc=ominus(val_stack[1].equiv,greg_val[j]);
|
2987 |
|
|
if (acc.h<=o.h && (acc.l<=o.l || acc.h
|
2988 |
|
|
}
|
2989 |
|
|
if (o.l<=0xff && !o.h && k) yz=(k<<8)+o.l, opcode++;
|
2990 |
|
|
else if (!expanding) err("no base address is close enough to the address A")@;
|
2991 |
|
|
@.no base address...@>
|
2992 |
|
|
else @;
|
2993 |
|
|
goto assemble_X;
|
2994 |
|
|
}
|
2995 |
|
|
|
2996 |
|
|
@ @d SETH 0xe0
|
2997 |
|
|
@d ORH 0xe8
|
2998 |
|
|
@d ORL 0xeb
|
2999 |
|
|
|
3000 |
|
|
@=
|
3001 |
|
|
{
|
3002 |
|
|
for (j=SETH;j<=ORL;j++) {
|
3003 |
|
|
switch (j&3) {
|
3004 |
|
|
case 0: yz=o.h>>16;@+break; /* \.{SETH} */
|
3005 |
|
|
case 1: yz=o.h&0xffff;@+break; /* \.{SETMH} or \.{ORMH} */
|
3006 |
|
|
case 2: yz=o.l>>16;@+break; /* \.{SETML} or \.{ORML} */
|
3007 |
|
|
case 3: yz=o.l&0xffff;@+break; /* \.{SETL} or \.{ORL} */
|
3008 |
|
|
}
|
3009 |
|
|
if (yz) {
|
3010 |
|
|
assemble(4,(j<<24)+(255<<16)+yz,0);
|
3011 |
|
|
j |= ORH;
|
3012 |
|
|
}
|
3013 |
|
|
}
|
3014 |
|
|
if (k) yz=(k<<8)+255; /* Y = \$$k$, Z = \$255 */
|
3015 |
|
|
else yz=255<<8, opcode++; /* Y = \$255, Z = 0 */
|
3016 |
|
|
}
|
3017 |
|
|
|
3018 |
|
|
@ @=
|
3019 |
|
|
if (val_stack[0].status==undefined) {
|
3020 |
|
|
if (op_bits&rel_addr_bit)
|
3021 |
|
|
@@;
|
3022 |
|
|
else if (opcode!=PREFIX) err("the operand is undefined");
|
3023 |
|
|
@.the operand is undefined@>
|
3024 |
|
|
}@+else if (val_stack[0].status==reg_val) {
|
3025 |
|
|
if (!(op_bits&(xyzr_bit+xyzar_bit)))
|
3026 |
|
|
derr("*operand of `%s' should not be a register number",op_field);
|
3027 |
|
|
@.operand...register number@>
|
3028 |
|
|
}@+ else { /* |val_stack[0].status==pure| */
|
3029 |
|
|
if (op_bits&xyzr_bit)
|
3030 |
|
|
derr("*operand of `%s' should be a register number",op_field);
|
3031 |
|
|
if (op_bits&rel_addr_bit)
|
3032 |
|
|
@;
|
3033 |
|
|
}
|
3034 |
|
|
if (opcode>0xff) @;
|
3035 |
|
|
if (val_stack[0].equiv.h || val_stack[0].equiv.l>0xffffff)
|
3036 |
|
|
err("*XYZ field doesn't fit in three bytes");
|
3037 |
|
|
@.XYZ field doesn't fit...@>
|
3038 |
|
|
xyz=val_stack[0].equiv.l&0xffffff;
|
3039 |
|
|
goto assemble_inst;
|
3040 |
|
|
|
3041 |
|
|
@ @=
|
3042 |
|
|
{
|
3043 |
|
|
pp=val_stack[0].link->sym;
|
3044 |
|
|
qq=new_sym_node(false);
|
3045 |
|
|
qq->link=pp->link;
|
3046 |
|
|
pp->link=qq;
|
3047 |
|
|
qq->serial=fix_xyz;
|
3048 |
|
|
qq->equiv=cur_loc;
|
3049 |
|
|
xyz=0;
|
3050 |
|
|
future_bits=0xe0;
|
3051 |
|
|
goto assemble_inst;
|
3052 |
|
|
}
|
3053 |
|
|
|
3054 |
|
|
@ @=
|
3055 |
|
|
{
|
3056 |
|
|
octa source, dest;
|
3057 |
|
|
if (val_stack[0].equiv.l&3)
|
3058 |
|
|
err("*relative address is not divisible by 4");
|
3059 |
|
|
@.relative address...@>
|
3060 |
|
|
source=shift_right(cur_loc,2,0);
|
3061 |
|
|
dest=shift_right(val_stack[0].equiv,2,0);
|
3062 |
|
|
acc=ominus(dest,source);
|
3063 |
|
|
if (!(acc.h&0x80000000)) {
|
3064 |
|
|
if (acc.l>0xffffff || acc.h)
|
3065 |
|
|
err("relative address is more than #ffffff tetrabytes forward");
|
3066 |
|
|
}@+else {
|
3067 |
|
|
acc=incr(acc,0x1000000);
|
3068 |
|
|
opcode++;
|
3069 |
|
|
if (acc.l>0xffffff || acc.h)
|
3070 |
|
|
err("relative address is more than #1000000 tetrabytes backward");
|
3071 |
|
|
}
|
3072 |
|
|
xyz=acc.l;
|
3073 |
|
|
goto assemble_inst;
|
3074 |
|
|
}
|
3075 |
|
|
|
3076 |
|
|
@ @=
|
3077 |
|
|
switch(opcode) {
|
3078 |
|
|
case LOC: cur_loc=val_stack[0].equiv;
|
3079 |
|
|
case IS: goto bypass;
|
3080 |
|
|
case PREFIX:@+if (!val_stack[0].link) err("not a valid prefix");
|
3081 |
|
|
@.not a valid prefix@>
|
3082 |
|
|
cur_prefix=val_stack[0].link;@+goto bypass;
|
3083 |
|
|
case GREG:@+if (listing_file) @;
|
3084 |
|
|
goto bypass;
|
3085 |
|
|
case LOCAL:@+if (val_stack[0].equiv.l>lreg) lreg=val_stack[0].equiv.l;
|
3086 |
|
|
if (listing_file) {
|
3087 |
|
|
fprintf(listing_file,"($%03d)",val_stack[0].equiv.l);
|
3088 |
|
|
flush_listing_line(" ");
|
3089 |
|
|
}
|
3090 |
|
|
goto bypass;
|
3091 |
|
|
case BSPEC:@+if (val_stack[0].equiv.l>0xffff || val_stack[0].equiv.h)
|
3092 |
|
|
err("*operand of `BSPEC' doesn't fit in two bytes");
|
3093 |
|
|
@.operand of `BSPEC'...@>
|
3094 |
|
|
mmo_loc();@+mmo_sync();
|
3095 |
|
|
mmo_lopp(lop_spec,val_stack[0].equiv.l);
|
3096 |
|
|
spec_mode=true;@+spec_mode_loc=0;@+ goto bypass;
|
3097 |
|
|
case ESPEC: spec_mode=false;@+goto bypass;
|
3098 |
|
|
}
|
3099 |
|
|
|
3100 |
|
|
@ @=
|
3101 |
|
|
octa greg_val[256]; /* initial values of global registers */
|
3102 |
|
|
|
3103 |
|
|
@ @=
|
3104 |
|
|
if (val_stack[0].equiv.l || val_stack[0].equiv.h) {
|
3105 |
|
|
fprintf(listing_file,"($%03d=#%08x",cur_greg,val_stack[0].equiv.h);
|
3106 |
|
|
flush_listing_line(" ");
|
3107 |
|
|
fprintf(listing_file," %08x)",val_stack[0].equiv.l);
|
3108 |
|
|
flush_listing_line(" ");
|
3109 |
|
|
}@+else {
|
3110 |
|
|
fprintf(listing_file,"($%03d)",cur_greg);
|
3111 |
|
|
flush_listing_line(" ");
|
3112 |
|
|
}
|
3113 |
|
|
|
3114 |
|
|
@* Running the program. On a \UNIX/-like system, the command
|
3115 |
|
|
$$\.{mmixal [options] sourcefilename}$$
|
3116 |
|
|
will assemble the \MMIXAL\ program in file \.{sourcefilename},
|
3117 |
|
|
writing any error messages on the standard error file. (Nothing is written to
|
3118 |
|
|
the standard output.) The options, which may appear in any order, are:
|
3119 |
|
|
|
3120 |
|
|
\bull\.{-o objectfilename}\quad Send the output to a binary file called
|
3121 |
|
|
\.{objectfilename}.
|
3122 |
|
|
If no \.{-o} specification is given, the object file name is obtained from the
|
3123 |
|
|
input file name by changing the final letter from `\.s' to~`\.o', or by
|
3124 |
|
|
appending `\.{.mmo}' if \.{sourcefilename} doesn't end with~\.s.
|
3125 |
|
|
|
3126 |
|
|
\bull\.{-l listingname}\quad Output a listing of the assembled input and
|
3127 |
|
|
output to a text file called \.{listingname}.
|
3128 |
|
|
|
3129 |
|
|
\bull\.{-x}\quad Expand memory-oriented commands that cannot be assembled
|
3130 |
|
|
as single instructions, by assembling auxiliary instructions that make
|
3131 |
|
|
temporary use of global register~\$255.
|
3132 |
|
|
|
3133 |
|
|
\bull\.{-b bufsize}\quad Allow up to \.{bufsize} characters per line of input.
|
3134 |
|
|
|
3135 |
|
|
@ Here, finally, is the overall structure of this program.
|
3136 |
|
|
|
3137 |
|
|
@c
|
3138 |
|
|
#include
|
3139 |
|
|
#include
|
3140 |
|
|
#include
|
3141 |
|
|
#include
|
3142 |
|
|
#include
|
3143 |
|
|
@#
|
3144 |
|
|
@@;
|
3145 |
|
|
@@;
|
3146 |
|
|
@@;
|
3147 |
|
|
@@;
|
3148 |
|
|
@#
|
3149 |
|
|
int main(argc,argv)
|
3150 |
|
|
int argc;@+
|
3151 |
|
|
char *argv[];
|
3152 |
|
|
{
|
3153 |
|
|
register int j,k; /* all-purpose integers */
|
3154 |
|
|
@;
|
3155 |
|
|
@;
|
3156 |
|
|
@;
|
3157 |
|
|
while(1) {
|
3158 |
|
|
@;
|
3159 |
|
|
while(1) {
|
3160 |
|
|
@;
|
3161 |
|
|
if (!*buf_ptr) break;
|
3162 |
|
|
}
|
3163 |
|
|
if (listing_file) {
|
3164 |
|
|
if (listing_bits) listing_clear();
|
3165 |
|
|
else if (!line_listed) flush_listing_line(" ");
|
3166 |
|
|
}
|
3167 |
|
|
}
|
3168 |
|
|
@;
|
3169 |
|
|
}
|
3170 |
|
|
|
3171 |
|
|
@ The space after |"-b"| is optional, because
|
3172 |
|
|
{\mc MMIX-SIM} does not use a space in this context.
|
3173 |
|
|
|
3174 |
|
|
@=
|
3175 |
|
|
for (j=1;j
|
3176 |
|
|
if (argv[j][1]=='x') expanding=1;
|
3177 |
|
|
else if (argv[j][1]=='o') j++,strcpy(obj_file_name,argv[j]);
|
3178 |
|
|
else if (argv[j][1]=='l') j++,strcpy(listing_name,argv[j]);
|
3179 |
|
|
else if (argv[j][1]=='b' && sscanf(argv[j+1],"%d",&buf_size)==1) j++;
|
3180 |
|
|
else break;
|
3181 |
|
|
}@+else if (argv[j][1]!='b' || sscanf(argv[j]+1,"%d",&buf_size)!=1) break;
|
3182 |
|
|
if (j!=argc-1) {
|
3183 |
|
|
fprintf(stderr,"Usage: %s %s sourcefilename\n",
|
3184 |
|
|
@.Usage: ...@>
|
3185 |
|
|
argv[0],"[-x] [-l listingname] [-b buffersize] [-o objectfilename]");
|
3186 |
|
|
exit(-1);
|
3187 |
|
|
}
|
3188 |
|
|
src_file_name=argv[j];
|
3189 |
|
|
|
3190 |
|
|
@ @=
|
3191 |
|
|
src_file=fopen(src_file_name,"r");
|
3192 |
|
|
if (!src_file) dpanic("Can't open the source file %s",src_file_name);
|
3193 |
|
|
@.Can't open...@>
|
3194 |
|
|
if (!obj_file_name[0]) {
|
3195 |
|
|
j=strlen(src_file_name);
|
3196 |
|
|
if (src_file_name[j-1]=='s') {
|
3197 |
|
|
strcpy(obj_file_name,src_file_name);@+ obj_file_name[j-1]='o';
|
3198 |
|
|
} else sprintf(obj_file_name,"%s.mmo",src_file_name);
|
3199 |
|
|
}
|
3200 |
|
|
obj_file=fopen(obj_file_name,"wb");
|
3201 |
|
|
if (!obj_file) dpanic("Can't open the object file %s",obj_file_name);
|
3202 |
|
|
if (listing_name[0]) {
|
3203 |
|
|
listing_file=fopen(listing_name,"w");
|
3204 |
|
|
if (!listing_file) dpanic("Can't open the listing file %s",listing_name);
|
3205 |
|
|
}
|
3206 |
|
|
|
3207 |
|
|
@ @=
|
3208 |
|
|
char *src_file_name; /* name of the \MMIXAL\ input file */
|
3209 |
|
|
char obj_file_name[FILENAME_MAX+1]; /* name of the binary output file */
|
3210 |
|
|
char listing_name[FILENAME_MAX+1]; /* name of the optional listing file */
|
3211 |
|
|
FILE *src_file, *obj_file, *listing_file;
|
3212 |
|
|
int expanding; /* are we expanding instructions when base address fail? */
|
3213 |
|
|
int buf_size; /* maximum number of characters per line of input */
|
3214 |
|
|
|
3215 |
|
|
@ @=
|
3216 |
|
|
@;
|
3217 |
|
|
filename[0]=src_file_name;
|
3218 |
|
|
filename_count=1;
|
3219 |
|
|
@
|
3220 |
|
|
|
3221 |
|
|
@ @
|
3222 |
|
|
mmo_lop(lop_pre,1,1);
|
3223 |
|
|
mmo_tetra(time(NULL));
|
3224 |
|
|
mmo_cur_file=-1;
|
3225 |
|
|
|
3226 |
|
|
@ @=
|
3227 |
|
|
if (lreg>=greg)
|
3228 |
|
|
dpanic("Danger: Must reduce the number of GREGs by %d",lreg-greg+1);
|
3229 |
|
|
@.Danger@>
|
3230 |
|
|
@
|
3231 |
|
|
@;
|
3232 |
|
|
@;
|
3233 |
|
|
if (err_count) {
|
3234 |
|
|
if (err_count>1) fprintf(stderr,"(%d errors were found.)\n",err_count);
|
3235 |
|
|
else fprintf(stderr,"(One error was found.)\n");
|
3236 |
|
|
}
|
3237 |
|
|
exit(err_count);
|
3238 |
|
|
|
3239 |
|
|
@ @=
|
3240 |
|
|
int greg=255; /* global register allocator */
|
3241 |
|
|
int cur_greg; /* global register just allocated */
|
3242 |
|
|
int lreg=32; /* local register allocator */
|
3243 |
|
|
|
3244 |
|
|
@ @
|
3245 |
|
|
mmo_lop(lop_post,0,greg);
|
3246 |
|
|
greg_val[255]=trie_search(trie_root,"Main")->sym->equiv;
|
3247 |
|
|
for (j=greg;j<256;j++) {
|
3248 |
|
|
mmo_tetra(greg_val[j].h);
|
3249 |
|
|
mmo_tetra(greg_val[j].l);
|
3250 |
|
|
}
|
3251 |
|
|
|
3252 |
|
|
@ @=
|
3253 |
|
|
for (j=0;j<10;j++) if (forward_local[j].link)
|
3254 |
|
|
err_count++,fprintf(stderr,"undefined local symbol %dF\n",j);
|
3255 |
|
|
@.undefined local symbol@>
|
3256 |
|
|
|
3257 |
|
|
@* Index.
|
3258 |
|
|
|