1 |
3 |
howe.r.j.8 |
|
2 |
|
|
\ ========================================================================== \
|
3 |
|
|
\ This file is currently not used!
|
4 |
|
|
\ ========================================================================== \
|
5 |
|
|
|
6 |
|
|
\ NOTE: This file was originally from ,
|
7 |
|
|
\ and is a metacompiler for the 'embed' virtual machine, it will need porting
|
8 |
|
|
\ so it works with the H2, however the embed VM is derived from the simulator
|
9 |
|
|
\ for the H2, so porting should be fairly easy.
|
10 |
|
|
\ @todo Reformat this file for a column width of 64, into Forth blocks.
|
11 |
|
|
|
12 |
|
|
\ # meta.fth
|
13 |
|
|
\
|
14 |
|
|
\ | Project | A Small Forth VM/Implementation |
|
15 |
|
|
\ | ---------- | --------------------------------- |
|
16 |
|
|
\ | Author | Richard James Howe |
|
17 |
|
|
\ | Copyright | 2017 Richard James Howe |
|
18 |
|
|
\ | License | MIT |
|
19 |
|
|
\ | Email | howe.r.j.89@gmail.com |
|
20 |
|
|
\ | Repository | |
|
21 |
|
|
|
22 |
|
|
\ ## A Meta-compiler, an implementation of eForth, and a tutorial on both.
|
23 |
|
|
|
24 |
|
|
\ ## Introduction
|
25 |
|
|
\ In this file a meta-compiler (or a cross compiler written in Forth) is
|
26 |
|
|
\ described and implemented, and after that a working Forth interpreter
|
27 |
|
|
\ is both described and implemented. This new interpreter can be used in turn
|
28 |
|
|
\ to meta-compile the original program, ad infinitum. The design decisions
|
29 |
|
|
\ and the philosophy behind Forth and the system will also be elucidated.
|
30 |
|
|
\
|
31 |
|
|
\ ### What is Forth?
|
32 |
|
|
\
|
33 |
|
|
\ Forth is a stack based procedural language, which uses Reverse Polish
|
34 |
|
|
\ Notation (RPN) to enter expressions. It is a minimal language with no type
|
35 |
|
|
\ checking and little to no error handling depending on the implementation.
|
36 |
|
|
\ Despite its small size and simplicity it has various features usually found
|
37 |
|
|
\ in higher level languages, such as reflection, incremental compilation and
|
38 |
|
|
\ an interactive read-evaluate-print loop.
|
39 |
|
|
\
|
40 |
|
|
\ It is still at heart a language that is close to the machine with low
|
41 |
|
|
\ level capabilities and direct access to memory. Memory manage itself is
|
42 |
|
|
\ mostly manual, with preallocation of all needed memory the preferred method
|
43 |
|
|
\ of program writing.
|
44 |
|
|
\
|
45 |
|
|
\ Forth has mostly fallen out of favor in recent years, it performed admirably
|
46 |
|
|
\ on the microcomputer systems available in the 1980s and is still suitable
|
47 |
|
|
\ for very memory constrained embed systems (having on a few kilobytes of
|
48 |
|
|
\ memory available to them), but lacks a lot of features modern languages
|
49 |
|
|
\ provide.
|
50 |
|
|
\
|
51 |
|
|
\ A catalogue of deficiencies hamper Forth adoption; poor string handling,
|
52 |
|
|
\ lack of libraries and poor integration with the operating system (on hosted
|
53 |
|
|
\ platforms), mutually incompatible and wildly different Forth implementations
|
54 |
|
|
\ and as mentioned - little error detection and handling.
|
55 |
|
|
\
|
56 |
|
|
\ Despite this fact it has a core of adherents who find uses for the language,
|
57 |
|
|
\ in fact some of its deficiencies are actually trade-offs. Having no type
|
58 |
|
|
\ checking means there is no type checking to do, having very little in the way
|
59 |
|
|
\ of error detection means errors do not have to be detected. This off loads
|
60 |
|
|
\ the complexity of the problem to the programmer and means a Forth
|
61 |
|
|
\ implementation can be minimal and terse.
|
62 |
|
|
\
|
63 |
|
|
\ The saying "Once you have seen one Forth implementation, you have seen
|
64 |
|
|
\ one Forth implementation." comes about because of how easy it is to implement
|
65 |
|
|
\ a Forth, which is a double edged sword. It is possible to completely
|
66 |
|
|
\ understand a Forth system, the software, the hardware and the problems you
|
67 |
|
|
\ are trying to solve and optimize everything towards this goal. This is oft
|
68 |
|
|
\ not possible with modern systems, a single person cannot totally understand
|
69 |
|
|
\ even subcomponents of modern systems in its entirety (such as compilers
|
70 |
|
|
\ or the operating system kernels we use).
|
71 |
|
|
\
|
72 |
|
|
\ Another saying from the creator of Forth, Charles Moore,
|
73 |
|
|
\ "Forth is Sudoku for programmers". The reason the author uses Forth is
|
74 |
|
|
\ because it is fun, no more justification is needed.
|
75 |
|
|
\
|
76 |
|
|
\ ### Project Origins
|
77 |
|
|
\
|
78 |
|
|
\ This project derives from a simulator for a CPU written in VHDL, designed
|
79 |
|
|
\ to execute Forth primitives directly, available on GitHub at,
|
80 |
|
|
\ . The CPU and Forth interpreter
|
81 |
|
|
\ themselves have their own sources, which all makes for a confusing pedigree.
|
82 |
|
|
\ The CPU, called the H2, was derived from a well known Forth CPU written
|
83 |
|
|
\ in Verilog, called the J1 , and
|
84 |
|
|
\ the Forth running on the H2 comes from an adaption of eForth written
|
85 |
|
|
\ for the J1 which itself was derived
|
86 |
|
|
\ from 'The Zen of eForth' by C. H. Ting.
|
87 |
|
|
\
|
88 |
|
|
\ Instead of a metacompiler written in Forth a cross compiler for a Forth like
|
89 |
|
|
\ language was made, which could create an image readable by both the
|
90 |
|
|
\ simulator, and the FPGA development tools. The simulator was cut down and
|
91 |
|
|
\ modified for use on a computer, with new instructions for input and output.
|
92 |
|
|
\
|
93 |
|
|
\ This system, with a cross compiler and virtual machine written in C, was
|
94 |
|
|
\ used to develop the present system which consists of only the virtual
|
95 |
|
|
\ machine, a binary image containing a Forth interpreter, and this metacompiler
|
96 |
|
|
\ with the metacompiled Forth. These changes and the discarding of the cross
|
97 |
|
|
\ compiler written in C can be seen in the Git repository this project comes
|
98 |
|
|
\ in ().
|
99 |
|
|
\
|
100 |
|
|
\ ### The Virtual Machine
|
101 |
|
|
\
|
102 |
|
|
\ The virtual machine is incredibly simple and cut down at around 200 lines of
|
103 |
|
|
\ C code, with most of the code being not being the virtual machine itself,
|
104 |
|
|
\ but code to get data in and out of the system correctly, or setting the
|
105 |
|
|
\ machine up. It is described in the appendix (at the end of this file), which
|
106 |
|
|
\ also contains an example implementation of the virtual machine.
|
107 |
|
|
\
|
108 |
|
|
\ The virtual machine is 16-bit dual stack machine with an instruction set
|
109 |
|
|
\ encoding which allows for many Forth words to be implemented in a single
|
110 |
|
|
\ instruction. As the CPU is designed to execute Forth, Subroutine Threaded
|
111 |
|
|
\ Code (STC) is the most efficient method of running Forth upon it.
|
112 |
|
|
\
|
113 |
|
|
\ @todo Complete the introduction
|
114 |
|
|
\ - Describe where the Forth came from (from a VHDL CPU project, eForth, ...)
|
115 |
|
|
\ - Philosophy of Forth
|
116 |
|
|
\ - Simplicity, Factoring, analyzing the problem from all angles, ...
|
117 |
|
|
\ - What Forth is good for, and what it is not.
|
118 |
|
|
\ - What a meta compiler is
|
119 |
|
|
\ - Purpose of this document
|
120 |
|
|
\ - A little bit about Forth, a simple introduction
|
121 |
|
|
\ - How Vocabularies work
|
122 |
|
|
\ - Stack comments, also standardize stack comments
|
123 |
|
|
\ - Conventions within Forth, Forth blocks, naming of words (for example
|
124 |
|
|
\ using '@' or '!' within word names).
|
125 |
|
|
\ - Design tradeoffs and constraints
|
126 |
|
|
\ - For example: having a separate string storage area
|
127 |
|
|
\ - Limitations of the Virtual Machines code space
|
128 |
|
|
\ - Compiler security (depth checking, compile-only words, ... )
|
129 |
|
|
\ Some Forths do not bother with it
|
130 |
|
|
\ - More modern Forths which optimize more but lose the simplicity
|
131 |
|
|
\ of Forth
|
132 |
|
|
\ - Lack of user variables, making a ROMable Forth, compression
|
133 |
|
|
\ - Making the metacompiler use look like ordinary Forth, so understanding
|
134 |
|
|
\ how to put together a Forth interpreter can be done by an ordinary
|
135 |
|
|
\ Forth programmer (which complicates the metacompiler, but only a little).
|
136 |
|
|
\ - The document should describe both how, and why, things are the
|
137 |
|
|
\ way they are. The design decisions are just as important as the decision
|
138 |
|
|
\ itself, more so even, as understand the why a decision was made allows
|
139 |
|
|
\ you to change or challenge the implementation.
|
140 |
|
|
\
|
141 |
|
|
\ The project, documentation and Forth images are under an MIT license,
|
142 |
|
|
\ and the
|
143 |
|
|
\ repository is available at .
|
144 |
|
|
|
145 |
|
|
\ The document is structured in roughly the following order:
|
146 |
|
|
\ 1. The metacompiler
|
147 |
|
|
\ 2. The assembler
|
148 |
|
|
\ 3. Image header generation
|
149 |
|
|
\ 4. Basic Setup, Variables and Special cases
|
150 |
|
|
\ 5. Simple Forth Words, Numeric I/O
|
151 |
|
|
\ 6. Interpreter
|
152 |
|
|
\ 7. Control Words
|
153 |
|
|
\ 8. I/O Control, Boot Words
|
154 |
|
|
\ 9. 'See', the Disassembler
|
155 |
|
|
\ 10. Block Editor
|
156 |
|
|
\ 11. Finishing
|
157 |
|
|
\ 12. APPENDIX
|
158 |
|
|
|
159 |
|
|
\ What you are reading is itself a Forth program, all the explanatory text is
|
160 |
|
|
\ are Forth comments. The file should eventually be fed through a preprocessor
|
161 |
|
|
\ to turn it into a Markdown file for further processing.
|
162 |
|
|
\ See for more information
|
163 |
|
|
\ about Markdown.
|
164 |
|
|
|
165 |
|
|
\ Many Forths are written in an assembly language, especially the ones geared
|
166 |
|
|
\ towards microcontrollers, although it is more common for new Forth
|
167 |
|
|
\ interpreters to be written in C. A metacompiler is a Cross Compiler
|
168 |
|
|
\ written in Forth.
|
169 |
|
|
|
170 |
|
|
\ References
|
171 |
|
|
\ * 'The Zen of eForth' by C. H. Ting
|
172 |
|
|
\ * (This project)
|
173 |
|
|
\ *
|
174 |
|
|
\ *
|
175 |
|
|
\ Jones Forth:
|
176 |
|
|
\ *
|
177 |
|
|
\ *
|
178 |
|
|
\ J1 CPU
|
179 |
|
|
\ *
|
180 |
|
|
\ *
|
181 |
|
|
\ *
|
182 |
|
|
\ *
|
183 |
|
|
|
184 |
|
|
\ The Virtual Machine is specifically designed to execute Forth, it is a stack
|
185 |
|
|
\ machine that allows many Forth words to be encoded in one instruction but
|
186 |
|
|
\ does not contain any high level Forth words, just words like '@', 'r>' and
|
187 |
|
|
\ a few basic words for I/O. A full description of the virtual machine is
|
188 |
|
|
\ in the appendix.
|
189 |
|
|
|
190 |
|
|
\ ## Metacompilation wordset
|
191 |
|
|
\ This section defines the metacompilation wordset as well as the
|
192 |
|
|
\ assembler. The metacompiler, or cross compiler, requires some assembly
|
193 |
|
|
\ instructions to be defined so the two word sets are interlinked.
|
194 |
|
|
\
|
195 |
|
|
\ A clear understanding of how Forth vocabularies work is needed before
|
196 |
|
|
\ proceeding with the tutorial. Vocabularies are the way Forth manages
|
197 |
|
|
\ namespaces and are generally talked about that much, they are especially
|
198 |
|
|
\ useful (in fact pretty much required) for writing a metacompiler.
|
199 |
|
|
|
200 |
|
|
only forth definitions hex
|
201 |
|
|
variable meta ( Metacompilation vocabulary )
|
202 |
|
|
meta +order definitions
|
203 |
|
|
|
204 |
|
|
variable assembler.1 ( Target assembler vocabulary )
|
205 |
|
|
variable target.1 ( Target dictionary )
|
206 |
|
|
variable tcp ( Target dictionary pointer )
|
207 |
|
|
variable tlast ( Last defined word in target )
|
208 |
|
|
variable tdoVar ( Location of doVar in target )
|
209 |
|
|
variable tdoConst ( Location of doConst in target )
|
210 |
|
|
variable tdoNext ( Location of doNext in target )
|
211 |
|
|
variable tdoPrintString ( Location of .string in target )
|
212 |
|
|
variable tdoStringLit ( Location of string-literal in target )
|
213 |
|
|
variable fence ( Do not peephole optimize before this point )
|
214 |
|
|
1984 constant #version ( Version number )
|
215 |
|
|
5000 constant #target ( Memory location where the target image will be built )
|
216 |
|
|
2000 constant #max ( Max number of cells in generated image )
|
217 |
|
|
2 constant =cell ( Target cell size )
|
218 |
|
|
-1 constant optimize ( Turn optimizations on [-1] or off [0] )
|
219 |
|
|
|
220 |
|
|
$4280 constant pad-area ( area for pad storage )
|
221 |
|
|
variable header -1 header ! ( If true Headers in the target will be generated )
|
222 |
|
|
|
223 |
|
|
1 constant verbose ( verbosity level, higher is more verbose )
|
224 |
|
|
#target #max 0 fill ( Erase the target memory location )
|
225 |
|
|
|
226 |
|
|
: ]asm assembler.1 +order ; immediate ( -- )
|
227 |
|
|
: a: get-current assembler.1 set-current : ; ( "name" -- wid link )
|
228 |
|
|
: a; [compile] ; set-current ; immediate ( wid link -- )
|
229 |
|
|
|
230 |
|
|
: ( [char] ) parse 2drop ; immediate
|
231 |
|
|
: \ source drop @ >in ! ; immediate
|
232 |
|
|
: there tcp @ ; ( -- a : target dictionary pointer value )
|
233 |
|
|
: tc! #target + c! ; ( u a -- )
|
234 |
|
|
: tc@ #target + c@ ; ( a -- u )
|
235 |
|
|
: [last] tlast @ ; ( -- a )
|
236 |
|
|
: low swap-endianess 0= if 1+ then ; ( b -- b )
|
237 |
|
|
: high swap-endianess if 1+ then ; ( b -- b )
|
238 |
|
|
: t! over ff and over high tc! swap 8 rshift swap low tc! ; ( u a -- )
|
239 |
|
|
: t@ dup high tc@ swap low tc@ 8 lshift or ; ( a -- u )
|
240 |
|
|
: 2/ 1 rshift ; ( u -- u )
|
241 |
|
|
: talign there 1 and tcp +! ; ( -- )
|
242 |
|
|
: tc, there tc! 1 tcp +! ; ( c -- )
|
243 |
|
|
: t, there t! =cell tcp +! ; ( u -- )
|
244 |
|
|
: tallot tcp +! ; ( n -- )
|
245 |
|
|
: update-fence there fence ! ; ( -- )
|
246 |
|
|
: $literal ( , -- )
|
247 |
|
|
[char] " word count dup tc, 1- for count tc, next drop talign update-fence ;
|
248 |
|
|
: tcells =cell * ; ( u -- a )
|
249 |
|
|
: tbody 1 tcells + ; ( a -- a )
|
250 |
|
|
: meta! ! ; ( u a -- )
|
251 |
|
|
: dump-hex #target there 16 + dump ; ( -- )
|
252 |
|
|
: locations ( -- : list all words and locations in target dictionary )
|
253 |
|
|
target.1 @
|
254 |
|
|
begin
|
255 |
|
|
dup
|
256 |
|
|
while
|
257 |
|
|
dup
|
258 |
|
|
nfa count type space dup
|
259 |
|
|
cfa >body @ u. cr
|
260 |
|
|
$3fff and @
|
261 |
|
|
repeat drop ;
|
262 |
|
|
|
263 |
|
|
: display ( -- : display metacompilation and target information )
|
264 |
|
|
verbose 0= if exit then
|
265 |
|
|
hex
|
266 |
|
|
." COMPILATION COMPLETE" cr
|
267 |
|
|
verbose 1 u> if
|
268 |
|
|
dump-hex cr
|
269 |
|
|
." TARGET DICTIONARY: " cr
|
270 |
|
|
locations
|
271 |
|
|
then
|
272 |
|
|
." HOST: " here . cr
|
273 |
|
|
." TARGET: " there . cr
|
274 |
|
|
." HEADER: " #target 20 dump cr ;
|
275 |
|
|
|
276 |
|
|
: checksum #target there crc ; ( -- u : calculate CRC of target image )
|
277 |
|
|
|
278 |
|
|
: save-hex ( -- : save target binary to file )
|
279 |
|
|
#target #target there + (save) throw ;
|
280 |
|
|
|
281 |
|
|
: finished ( -- : save target image and display statistics )
|
282 |
|
|
display
|
283 |
|
|
only forth definitions hex
|
284 |
|
|
." SAVING... " save-hex ." DONE! " cr
|
285 |
|
|
." STACK> " .s cr ;
|
286 |
|
|
|
287 |
|
|
: [a] ( "name" -- : find word and compile an assembler word )
|
288 |
|
|
token assembler.1 search-wordlist 0= abort" [a]? "
|
289 |
|
|
cfa compile, ; immediate
|
290 |
|
|
|
291 |
|
|
: asm[ assembler.1 -order ; immediate ( -- )
|
292 |
|
|
|
293 |
|
|
\ There are five types of instructions, which are differentiated from each
|
294 |
|
|
\ other by the top bits of the instruction.
|
295 |
|
|
|
296 |
|
|
a: #literal $8000 a; ( literal instruction - top bit set )
|
297 |
|
|
a: #alu $6000 a; ( ALU instruction, further encoding below... )
|
298 |
|
|
a: #call $4000 a; ( function call instruction )
|
299 |
|
|
a: #?branch $2000 a; ( branch if zero instruction )
|
300 |
|
|
a: #branch $0000 a; ( unconditional branch )
|
301 |
|
|
|
302 |
|
|
\ An ALU instruction has a more complex encoding which can be seen in the table
|
303 |
|
|
\ in the appendix, it consists of a few flags for moving values to different
|
304 |
|
|
\ registers before and after the ALU operation to perform, an ALU operation,
|
305 |
|
|
\ and a return and variable stack increment/decrement.
|
306 |
|
|
\
|
307 |
|
|
\ Some of these operations are more complex than they first appear, either
|
308 |
|
|
\ because they do more than a single line explanation allows for, or because
|
309 |
|
|
\ they are not typical instructions that you would find in an actual processors
|
310 |
|
|
\ ALU and are only possible within the context of a virtual machine. Operations
|
311 |
|
|
\ like '#u/mod' are an example of the former, '#save' is an example of the
|
312 |
|
|
\ later.
|
313 |
|
|
\
|
314 |
|
|
\ The most succinct description of these operations, and the virtual machine,
|
315 |
|
|
\ is the source code for it which weighs in at under two hundred lines of
|
316 |
|
|
\ C code. Unfortunately this would not include that rationale that led to
|
317 |
|
|
\ the virtual machine being the way it is.
|
318 |
|
|
|
319 |
|
|
\ ALU Operations
|
320 |
|
|
a: #t 0000 a; ( T = t )
|
321 |
|
|
a: #n 0100 a; ( T = n )
|
322 |
|
|
a: #r 0200 a; ( T = Top of Return Stack )
|
323 |
|
|
a: #[t] 0300 a; ( T = memory[t] )
|
324 |
|
|
a: #n->[t] 0400 a; ( memory[t] = n )
|
325 |
|
|
a: #t+n 0500 a; ( n = n+t, T = carry )
|
326 |
|
|
a: #t*n 0600 a; ( n = n*t, T = upper bits of multiplication )
|
327 |
|
|
a: #t&n 0700 a; ( T = T and N )
|
328 |
|
|
a: #t|n 0800 a; ( T = T or N )
|
329 |
|
|
a: #t^n 0900 a; ( T = T xor N )
|
330 |
|
|
a: #~t 0a00 a; ( Invert T )
|
331 |
|
|
a: #t-1 0b00 a; ( T == t - 1 )
|
332 |
|
|
a: #t==0 0c00 a; ( T == 0? )
|
333 |
|
|
a: #t==n 0d00 a; ( T = n == t? )
|
334 |
|
|
a: #nu
|
335 |
|
|
a: #n
|
336 |
|
|
a: #n>>t 1000 a; ( T = n right shift by t places )
|
337 |
|
|
a: #n<
|
338 |
|
|
a: #sp@ 1200 a; ( T = variable stack depth )
|
339 |
|
|
a: #rp@ 1300 a; ( T = return stack depth )
|
340 |
|
|
a: #sp! 1400 a; ( set variable stack depth )
|
341 |
|
|
a: #rp! 1500 a; ( set return stack depth )
|
342 |
|
|
a: #save 1600 a; ( Save memory disk: n = start, T = end, T' = error )
|
343 |
|
|
a: #tx 1700 a; ( Transmit Byte: t = byte, T' = error )
|
344 |
|
|
a: #rx 1800 a; ( Block until byte received, T = byte/error )
|
345 |
|
|
a: #u/mod 1900 a; ( Remainder/Divide: )
|
346 |
|
|
a: #/mod 1a00 a; ( Signed Remainder/Divide: )
|
347 |
|
|
a: #bye 1b00 a; ( Exit Interpreter )
|
348 |
|
|
|
349 |
|
|
\ The Stack Delta Operations occur after the ALU operations have been executed.
|
350 |
|
|
\ They affect either the Return or the Variable Stack. An ALU instruction
|
351 |
|
|
\ without one of these operations (generally) do not affect the stacks.
|
352 |
|
|
a: d+1 0001 or a; ( increment variable stack by one )
|
353 |
|
|
a: d-1 0003 or a; ( decrement variable stack by one )
|
354 |
|
|
a: d-2 0002 or a; ( decrement variable stack by two )
|
355 |
|
|
a: r+1 0004 or a; ( increment variable stack by one )
|
356 |
|
|
a: r-1 000c or a; ( decrement variable stack by one )
|
357 |
|
|
a: r-2 0008 or a; ( decrement variable stack by two )
|
358 |
|
|
|
359 |
|
|
\ All of these instructions execute after the ALU and stack delta operations
|
360 |
|
|
\ have been performed except r->pc, which occurs before. They form part of
|
361 |
|
|
\ an ALU operation.
|
362 |
|
|
a: r->pc 0010 or a; ( Set Program Counter to Top of Return Stack )
|
363 |
|
|
a: n->t 0020 or a; ( Set Top of Variable Stack to Next on Variable Stack )
|
364 |
|
|
a: t->r 0040 or a; ( Set Top of Return Stack to Top on Variable Stack )
|
365 |
|
|
a: t->n 0080 or a; ( Set Next on Variable Stack to Top on Variable Stack )
|
366 |
|
|
|
367 |
|
|
\ There are five types of instructions; ALU operations, branches,
|
368 |
|
|
\ conditional branches, function calls and literals. ALU instructions
|
369 |
|
|
\ comprise of an ALU operation, stack effects and register move bits. Function
|
370 |
|
|
\ returns are part of the ALU operation instruction set.
|
371 |
|
|
|
372 |
|
|
: ?set dup $e000 and abort" argument too large " ;
|
373 |
|
|
a: branch 2/ ?set [a] #branch or t, a; ( a -- : an Unconditional branch )
|
374 |
|
|
a: ?branch 2/ ?set [a] #?branch or t, a; ( a -- : Conditional branch )
|
375 |
|
|
a: call 2/ ?set [a] #call or t, a; ( a -- : Function call )
|
376 |
|
|
a: ALU ?set [a] #alu or a; ( u -- : Make ALU instruction )
|
377 |
|
|
a: alu [a] ALU t, a; ( u -- : ALU operation )
|
378 |
|
|
a: literal ( n -- : compile a number into target )
|
379 |
|
|
dup [a] #literal and if ( numbers above $7fff take up two instructions )
|
380 |
|
|
invert recurse ( the number is inverted, an literal is called again )
|
381 |
|
|
[a] #~t [a] alu ( then an invert instruction is compiled into the target )
|
382 |
|
|
else
|
383 |
|
|
[a] #literal or t, ( numbers below $8000 are single instructions )
|
384 |
|
|
then a;
|
385 |
|
|
a: return ( -- : Compile a return into the target )
|
386 |
|
|
[a] #t [a] r->pc [a] r-1 [a] alu a;
|
387 |
|
|
|
388 |
|
|
\ The following words implement a primitive peephole optimizer, which is not
|
389 |
|
|
\ the only optimization done, but is the major one. It performs tail call
|
390 |
|
|
\ optimizations and merges the return instruction with the previous instruction
|
391 |
|
|
\ if possible. These simple optimizations really make a lot of difference
|
392 |
|
|
\ in the size of metacompiled program. It means proper tail recursive
|
393 |
|
|
\ procedures can be constructed.
|
394 |
|
|
\
|
395 |
|
|
\ The optimizer is wrapped up in the "exit," word, it checks a fence variable
|
396 |
|
|
\ first, then the previously compiled cell to see if it can replace the last
|
397 |
|
|
\ compiled cell.
|
398 |
|
|
\
|
399 |
|
|
\ The fence variable is an address below which the peephole optimizer should
|
400 |
|
|
\ not look, this is to prevent the optimizer looking at data and merging with
|
401 |
|
|
\ it, or breaking control structures.
|
402 |
|
|
\
|
403 |
|
|
\ An exit can be merged into an ALU instruction if it does not contain
|
404 |
|
|
\ any return stack manipulation, or information from the return stack. This
|
405 |
|
|
\ includes operations such as "r->pc", or "r+1".
|
406 |
|
|
\
|
407 |
|
|
\ A call then an exit can be replaced with an unconditional branch to the
|
408 |
|
|
\ call.
|
409 |
|
|
\
|
410 |
|
|
\ If no optimization can be performed an 'exit' instruction is written into
|
411 |
|
|
\ the target.
|
412 |
|
|
\
|
413 |
|
|
\ The optimizer can be held off manually be inserting a "nop", or a call
|
414 |
|
|
\ or instruction which does nothing, before the 'exit'.
|
415 |
|
|
\
|
416 |
|
|
\ Other optimizations performed by the metacompiler, but not this optimizer,
|
417 |
|
|
\ include; inlining constant values and addresses, allowing the creation of
|
418 |
|
|
\ headerless words which are named only in the metacompiler and not in the
|
419 |
|
|
\ target, and the 'fallthrough;' word which allows for greater code sharing.
|
420 |
|
|
\ Some of these optimizations have a manual element to them, such as
|
421 |
|
|
\ 'fallthrough;'.
|
422 |
|
|
|
423 |
|
|
: previous there =cell - ; ( -- a )
|
424 |
|
|
: lookback previous t@ ; ( -- u )
|
425 |
|
|
: call? lookback $e000 and [a] #call = ; ( -- t )
|
426 |
|
|
: call>goto previous dup t@ $1fff and swap t! ; ( -- )
|
427 |
|
|
: fence? fence @ previous u> ; ( -- t )
|
428 |
|
|
: safe? lookback $e000 and [a] #alu = lookback $001c and 0= and ; ( -- t )
|
429 |
|
|
: alu>return previous dup t@ [a] r->pc [a] r-1 swap t! ; ( -- )
|
430 |
|
|
: exit-optimize ( -- )
|
431 |
|
|
fence? if [a] return exit then
|
432 |
|
|
call? if call>goto exit then
|
433 |
|
|
safe? if alu>return exit then
|
434 |
|
|
[a] return ;
|
435 |
|
|
: exit, exit-optimize update-fence ; ( -- )
|
436 |
|
|
|
437 |
|
|
: compile-only tlast @ t@ $8000 or tlast @ t! ; ( -- )
|
438 |
|
|
: immediate tlast @ t@ $4000 or tlast @ t! ; ( -- )
|
439 |
|
|
|
440 |
|
|
\ create a word in the metacompilers dictionary, not the targets
|
441 |
|
|
: tcreate get-current >r target.1 set-current create r> set-current ;
|
442 |
|
|
|
443 |
|
|
: thead ( b u -- : compile word header into target dictionary )
|
444 |
|
|
header @ 0= if 2drop exit then
|
445 |
|
|
talign
|
446 |
|
|
there [last] t, tlast !
|
447 |
|
|
there #target + pack$ c@ 1+ aligned tcp +! talign ;
|
448 |
|
|
|
449 |
|
|
: lookahead ( -- b u : parse a word, but leave it in the input stream )
|
450 |
|
|
>in @ >r bl parse r> >in ! ;
|
451 |
|
|
|
452 |
|
|
\ The word 'h:' creates a headerless word in the target dictionary for
|
453 |
|
|
\ space saving reasons and to declutter the target search order. Ideally
|
454 |
|
|
\ it would instead add the word to a different vocabulary, so it is still
|
455 |
|
|
\ accessible to the programmer, but there is already very little room on the
|
456 |
|
|
\ target.
|
457 |
|
|
|
458 |
|
|
: literal [a] literal ; ( u -- )
|
459 |
|
|
: h: ( -- : create a word with no name in the target dictionary )
|
460 |
|
|
' literal !
|
461 |
|
|
$f00d tcreate there , update-fence does> @ [a] call ;
|
462 |
|
|
|
463 |
|
|
: t: ( "name", -- : creates a word in the target dictionary )
|
464 |
|
|
lookahead thead h: ;
|
465 |
|
|
|
466 |
|
|
\ @warning: Only use 'fallthrough' to fallthrough to words defined with 'h:'.
|
467 |
|
|
: fallthrough;
|
468 |
|
|
' (literal) !
|
469 |
|
|
$f00d <> if source type cr 1 abort" unstructured! " then ;
|
470 |
|
|
: t;
|
471 |
|
|
fallthrough; optimize if exit, else [a] return then ;
|
472 |
|
|
|
473 |
|
|
: fetch-xt @ dup 0= abort" (null) " ; ( a -- xt )
|
474 |
|
|
|
475 |
|
|
: tconstant ( "name", n -- , Run Time: -- n )
|
476 |
|
|
>r
|
477 |
|
|
lookahead
|
478 |
|
|
thead
|
479 |
|
|
there tdoConst fetch-xt [a] call r> t, >r
|
480 |
|
|
tcreate r> ,
|
481 |
|
|
does> @ tbody t@ [a] literal ;
|
482 |
|
|
|
483 |
|
|
: tvariable ( "name", n -- , Run Time: -- a )
|
484 |
|
|
>r
|
485 |
|
|
lookahead
|
486 |
|
|
thead
|
487 |
|
|
there tdoVar fetch-xt [a] call r> t, >r
|
488 |
|
|
tcreate r> ,
|
489 |
|
|
does> @ tbody [a] literal ;
|
490 |
|
|
|
491 |
|
|
: tlocation ( "name", n -- : Reserve space in target for a memory location )
|
492 |
|
|
there swap t, tcreate , does> @ [a] literal ;
|
493 |
|
|
|
494 |
|
|
: [t] ( "name", -- a : get the address of a target word )
|
495 |
|
|
token target.1 search-wordlist 0= abort" [t]? "
|
496 |
|
|
cfa >body @ ;
|
497 |
|
|
|
498 |
|
|
\ @warning only use "[v]" on variables, not tlocations
|
499 |
|
|
: [v] [t] =cell + ; ( "name", -- a )
|
500 |
|
|
|
501 |
|
|
\ xchange takes two vocabularies defined in the target by their variable
|
502 |
|
|
\ names, "name1" and "name2", and updates "name1" so it contains the previously
|
503 |
|
|
\ defined words, and makes "name2" the vocabulary which subsequent definitions
|
504 |
|
|
\ are added to.
|
505 |
|
|
: xchange ( "name1", "name2", -- : exchange target vocabularies )
|
506 |
|
|
[last] [t] t! [t] t@ tlast meta! ;
|
507 |
|
|
|
508 |
|
|
\ These words implement the basic control structures needed to make
|
509 |
|
|
\ applications in the metacompiled program, they are no immediate words
|
510 |
|
|
\ and they do not need to be, 't:' and 't;' do not change the interpreter
|
511 |
|
|
\ state, once the actual metacompilation begins everything is command mode.
|
512 |
|
|
: begin there update-fence ; ( -- a )
|
513 |
|
|
: until [a] ?branch ; ( a -- )
|
514 |
|
|
: if there update-fence 0 [a] ?branch ; ( -- a )
|
515 |
|
|
: skip there update-fence 0 [a] branch ; ( -- a )
|
516 |
|
|
: then begin 2/ over t@ or swap t! ; ( a -- )
|
517 |
|
|
: else skip swap then ; ( a -- a )
|
518 |
|
|
: while if swap ; ( a -- a a )
|
519 |
|
|
: repeat [a] branch then update-fence ; ( a -- )
|
520 |
|
|
: again [a] branch update-fence ; ( a -- )
|
521 |
|
|
: aft drop skip begin swap ; ( a -- a )
|
522 |
|
|
: constant tcreate , does> @ literal ; ( "name", a -- )
|
523 |
|
|
: [char] char literal ; ( "name" )
|
524 |
|
|
: postpone [t] [a] call ; ( "name", -- )
|
525 |
|
|
: next tdoNext fetch-xt [a] call t, update-fence ; ( a -- )
|
526 |
|
|
: exit exit, ; ( -- )
|
527 |
|
|
: ' [t] literal ; ( "name", -- )
|
528 |
|
|
: ." tdoPrintString fetch-xt [a] call $literal ; ( "string", -- )
|
529 |
|
|
: $" tdoStringLit fetch-xt [a] call $literal ; ( "string", -- )
|
530 |
|
|
|
531 |
|
|
\ The following section adds the words implementable in assembly to the
|
532 |
|
|
\ metacompiler, when one of these words is used in the metacompiled program
|
533 |
|
|
\ it will be implemented in assembly.
|
534 |
|
|
|
535 |
|
|
\ @todo implement 'd/mod', 'ud/mod', and 'ud*'?
|
536 |
|
|
|
537 |
|
|
: nop ]asm #t alu asm[ ;
|
538 |
|
|
: dup ]asm #t t->n d+1 alu asm[ ;
|
539 |
|
|
: over ]asm #n t->n d+1 alu asm[ ;
|
540 |
|
|
: invert ]asm #~t alu asm[ ;
|
541 |
|
|
: um+ ]asm #t+n alu asm[ ;
|
542 |
|
|
: + ]asm #t+n n->t d-1 alu asm[ ;
|
543 |
|
|
: um* ]asm #t*n alu asm[ ;
|
544 |
|
|
: * ]asm #t*n n->t d-1 alu asm[ ;
|
545 |
|
|
: swap ]asm #n t->n alu asm[ ;
|
546 |
|
|
: nip ]asm #t d-1 alu asm[ ;
|
547 |
|
|
: drop ]asm #n d-1 alu asm[ ;
|
548 |
|
|
: >r ]asm #n t->r d-1 r+1 alu asm[ ;
|
549 |
|
|
: r> ]asm #r t->n d+1 r-1 alu asm[ ;
|
550 |
|
|
: r@ ]asm #r t->n d+1 alu asm[ ;
|
551 |
|
|
: @ ]asm #[t] alu asm[ ;
|
552 |
|
|
: ! ]asm #n->[t] d-1 alu asm[ ;
|
553 |
|
|
: rshift ]asm #n>>t d-1 alu asm[ ;
|
554 |
|
|
: lshift ]asm #n<
|
555 |
|
|
: = ]asm #t==n d-1 alu asm[ ;
|
556 |
|
|
: u< ]asm #nu
|
557 |
|
|
: < ]asm #n
|
558 |
|
|
: and ]asm #t&n d-1 alu asm[ ;
|
559 |
|
|
: xor ]asm #t^n d-1 alu asm[ ;
|
560 |
|
|
: or ]asm #t|n d-1 alu asm[ ;
|
561 |
|
|
: sp@ ]asm #sp@ t->n d+1 alu asm[ ;
|
562 |
|
|
: sp! ]asm #sp! alu asm[ ;
|
563 |
|
|
: 1- ]asm #t-1 alu asm[ ;
|
564 |
|
|
: rp@ ]asm #rp@ t->n d+1 alu asm[ ;
|
565 |
|
|
: rp! ]asm #rp! d-1 alu asm[ ;
|
566 |
|
|
: 0= ]asm #t==0 alu asm[ ;
|
567 |
|
|
: (bye) ]asm #bye alu asm[ ;
|
568 |
|
|
: rx? ]asm #rx t->n d+1 alu asm[ ;
|
569 |
|
|
: tx! ]asm #tx n->t d-1 alu asm[ ;
|
570 |
|
|
: (save) ]asm #save d-1 alu asm[ ;
|
571 |
|
|
: u/mod ]asm #u/mod t->n alu asm[ ;
|
572 |
|
|
\ : u/ ]asm #u/mod d-1 alu asm[ ;
|
573 |
|
|
\ : umod ]asm #u/mod n->t d-1 alu asm[ ;
|
574 |
|
|
: /mod ]asm #/mod t->n alu asm[ ;
|
575 |
|
|
: / ]asm #/mod d-1 alu asm[ ;
|
576 |
|
|
: mod ]asm #/mod n->t d-1 alu asm[ ;
|
577 |
|
|
: rdrop ]asm #t r-1 alu asm[ ;
|
578 |
|
|
\ Some words can be implemented in a single instruction which have no
|
579 |
|
|
\ analogue within Forth.
|
580 |
|
|
: dup-@ ]asm #[t] t->n d+1 alu asm[ ;
|
581 |
|
|
: dup>r ]asm #t t->r r+1 alu asm[ ;
|
582 |
|
|
: 2dup= ]asm #t==n t->n d+1 alu asm[ ;
|
583 |
|
|
: 2dupxor ]asm #t^n t->n d+1 alu asm[ ;
|
584 |
|
|
: rxchg ]asm #r t->r alu asm[ ;
|
585 |
|
|
|
586 |
|
|
\ 'for' needs the new definition of '>r' to work correctly.
|
587 |
|
|
: for >r begin ;
|
588 |
|
|
|
589 |
|
|
: meta: : ;
|
590 |
|
|
\ : :noname h: ;
|
591 |
|
|
: : t: ;
|
592 |
|
|
meta: ; t; ;
|
593 |
|
|
hide meta:
|
594 |
|
|
hide t:
|
595 |
|
|
hide t;
|
596 |
|
|
|
597 |
|
|
]asm #~t ALU asm[ constant =invert ( invert instruction )
|
598 |
|
|
]asm #t r->pc r-1 ALU asm[ constant =exit ( return/exit instruction )
|
599 |
|
|
]asm #n t->r d-1 r+1 ALU asm[ constant =>r ( to r. stk. instruction )
|
600 |
|
|
$20 constant =bl ( blank, or space )
|
601 |
|
|
$d constant =cr ( carriage return )
|
602 |
|
|
$a constant =lf ( line feed )
|
603 |
|
|
$8 constant =bs ( back space )
|
604 |
|
|
$1b constant =escape ( escape character )
|
605 |
|
|
|
606 |
|
|
$10 constant dump-width ( number of columns for 'dump' )
|
607 |
|
|
$50 constant tib-length ( size of terminal input buffer )
|
608 |
|
|
$1f constant word-length ( maximum length of a word )
|
609 |
|
|
|
610 |
|
|
$40 constant c/l ( characters per line in a block )
|
611 |
|
|
$10 constant l/b ( lines in a block )
|
612 |
|
|
$4400 constant sp0 ( start of variable stack )
|
613 |
|
|
$7fff constant rp0 ( start of return stack )
|
614 |
|
|
$2bad constant magic ( magic number for compiler security )
|
615 |
|
|
$f constant #highest ( highest bit in cell )
|
616 |
|
|
|
617 |
|
|
( Volatile variables )
|
618 |
|
|
$4000 constant ( used in skip/test )
|
619 |
|
|
$4002 constant last-def ( last, possibly unlinked, word definition )
|
620 |
|
|
$4006 constant id ( used for source id )
|
621 |
|
|
$4008 constant seed ( seed used for the PRNG )
|
622 |
|
|
$400A constant handler ( current handler for throw/catch )
|
623 |
|
|
$400C constant block-dirty ( -1 if loaded block buffer is modified )
|
624 |
|
|
$4010 constant ( -- c : new character, blocking input )
|
625 |
|
|
$4012 constant ( c -- : emit character )
|
626 |
|
|
$4014 constant ( "accept" vector )
|
627 |
|
|
\ $4016 constant ( "tap" vector, for terminal handling )
|
628 |
|
|
\ $4018 constant ( c -- : emit character )
|
629 |
|
|
\ $4020 constant ( -- : display prompt )
|
630 |
|
|
\ $4022 constant _literal ( u -- u | : handles literals )
|
631 |
|
|
$4110 constant context ( holds current context for search order )
|
632 |
|
|
$4122 constant #tib ( Current count of terminal input buffer )
|
633 |
|
|
$4124 constant tib-buf ( ... and address )
|
634 |
|
|
$4126 constant tib-start ( backup tib-buf value )
|
635 |
|
|
\ $4280 == pad-area
|
636 |
|
|
|
637 |
|
|
$c constant header-length ( location of length in header )
|
638 |
|
|
$e constant header-crc ( location of CRC in header )
|
639 |
|
|
$14 constant header-options ( location of options bits in header )
|
640 |
|
|
|
641 |
|
|
target.1 +order ( Add target word dictionary to search order )
|
642 |
|
|
meta -order meta +order ( Reorder so 'meta' has a higher priority )
|
643 |
|
|
forth-wordlist -order ( Remove normal Forth words to prevent accidents )
|
644 |
|
|
|
645 |
|
|
\ # The Target Forth
|
646 |
|
|
\ With the assembler and meta compiler complete, we can now make our target
|
647 |
|
|
\ application, a Forth interpreter which will be able to read in this file
|
648 |
|
|
\ and create new, possibly modified, images for the Forth virtual machine
|
649 |
|
|
\ to run.
|
650 |
|
|
|
651 |
|
|
\ ## The Image Header
|
652 |
|
|
\ The following 't,' sequence reserves space and partially populates the
|
653 |
|
|
\ image header with file format information, based upon the PNG specification.
|
654 |
|
|
\ See and
|
655 |
|
|
\ for more information about
|
656 |
|
|
\ how to design binary formats.
|
657 |
|
|
\
|
658 |
|
|
\ The header contains enough information to identify the format, the
|
659 |
|
|
\ version of the format, and to detect corruption of data, as well as
|
660 |
|
|
\ having a few other nice properties - some having to do with how other
|
661 |
|
|
\ systems and programs may deal with the binary (such as have a string literal
|
662 |
|
|
\ 'FTH' to help identify the binary format, and the first byte being outside
|
663 |
|
|
\ the ASCII range of characters so it is obvious that the file is meant to
|
664 |
|
|
\ be treated as a binary and not as text).
|
665 |
|
|
\
|
666 |
|
|
|
667 |
|
|
|
668 |
|
|
|
669 |
|
|
$4689 t, \ $4: 0x89 'F'
|
670 |
|
|
$4854 t, \ $6: 'T' 'H'
|
671 |
|
|
$0a0d t, \ $8: '\r' '\n'
|
672 |
|
|
$0a1a t, \ $A: ^Z '\n'
|
673 |
|
|
|
674 |
|
|
|
675 |
|
|
$0001 t, \ $10: Endianess check
|
676 |
|
|
#version t, \ $12: Version information
|
677 |
|
|
$0001 t, \ $14: Header options
|
678 |
|
|
|
679 |
|
|
\ ## First Word Definitions
|
680 |
|
|
\
|
681 |
|
|
\ After the header two short words are defined, visible only to the meta
|
682 |
|
|
\ compiler and used by its internal machinery. The words are needed by
|
683 |
|
|
\ 'tvariable' and 'tconstant', and these constructs cannot be used without
|
684 |
|
|
\ them. This is an example of the metacompiler and the metacompiled program
|
685 |
|
|
\ being intermingled, which should be kept to a minimum.
|
686 |
|
|
|
687 |
|
|
h: doVar r> ; ( -- a : push return address and exit to caller )
|
688 |
|
|
h: doConst r> @ ; ( -- u : push value at return address and exit to caller )
|
689 |
|
|
|
690 |
|
|
\ Here the address of 'doVar' and 'doConst' in the target is stored in
|
691 |
|
|
\ variables accessible by the metacompiler, so 'tconstant' and 'tvariable' can
|
692 |
|
|
\ compile references to them in the target.
|
693 |
|
|
|
694 |
|
|
[t] doVar tdoVar meta!
|
695 |
|
|
[t] doConst tdoConst meta!
|
696 |
|
|
|
697 |
|
|
\ Next some space is reserved for variables which will have no name in the
|
698 |
|
|
\ target and are not on the target Forths search order. We do this with
|
699 |
|
|
\ 'tlocation'. These variables are needed for the internal working of the
|
700 |
|
|
\ interpreter but the application programmer using the target Forth can make
|
701 |
|
|
\ do without them, so they do not have names within the target.
|
702 |
|
|
\
|
703 |
|
|
\ A short description of the variables and their uses:
|
704 |
|
|
\
|
705 |
|
|
\ 'cp' is the dictionary pointer, which usually is only incremented in order
|
706 |
|
|
\ to reserve space in this dictionary. Words like "," and ":" advance this
|
707 |
|
|
\ variable.
|
708 |
|
|
\
|
709 |
|
|
\ 'root-voc', 'editor-voc', 'assembler-voc', and '_forth-wordlist' are
|
710 |
|
|
\ variables which point to word lists, they can be used with 'set-order'
|
711 |
|
|
\ and pointers to them may be returned by 'get-order'. By default the only
|
712 |
|
|
\ vocabularies loaded are the root vocabulary (which contains only a few
|
713 |
|
|
\ vocabulary manipulation words) and the forth vocabulary are loaded (which
|
714 |
|
|
\ contains most of the words in a standard Forth).
|
715 |
|
|
\
|
716 |
|
|
\ 'current' contains a pointer to the vocabulary which new words will be
|
717 |
|
|
\ added to when the target is up and running, this will be the forth
|
718 |
|
|
\ vocabulary, or '_forth-wordlist'.
|
719 |
|
|
\
|
720 |
|
|
\ None of these variables are set to any meaningful values here and will be
|
721 |
|
|
\ updated during the metacompilation process.
|
722 |
|
|
\
|
723 |
|
|
|
724 |
|
|
|
725 |
|
|
|
726 |
|
|
|
727 |
|
|
\ 0 tlocation assembler-voc ( assembler vocabulary )
|
728 |
|
|
|
729 |
|
|
|
730 |
|
|
|
731 |
|
|
\ ## Target Assembly Words
|
732 |
|
|
|
733 |
|
|
\ The first words added to the target Forths dictionary are all based on
|
734 |
|
|
\ assembly instructions. The definitions may seem like nonsense, how does the
|
735 |
|
|
\ definition of '+' work? It appears that the definition calls itself, which
|
736 |
|
|
\ obviously would not work. The answer is in the order new words are added
|
737 |
|
|
\ into the dictionary. In Forth, a word definition is not placed in the
|
738 |
|
|
\ search order until the definition of that word is complete, this allows
|
739 |
|
|
\ the previous definition of a word to be use within that definition, and
|
740 |
|
|
\ requires a separate word ("recurse") to implement recursion.
|
741 |
|
|
\
|
742 |
|
|
\ However, the words ':' and ';' are not the normal Forth define and end
|
743 |
|
|
\ definitions words, they are the metacompilers and they behave differently,
|
744 |
|
|
\ ':' is implemented with 't:' and ';' with 't;'.
|
745 |
|
|
\
|
746 |
|
|
\ 't:' uses 'create' to make a new variable in the metacompilers
|
747 |
|
|
\ dictionary that points to a word definition in the target, it also creates
|
748 |
|
|
\ the words header in the target ('h:' is the same, but without a header
|
749 |
|
|
\ being made in the target). The word is compilable into the target as soon
|
750 |
|
|
\ as it is defined, yet the definition of '+' is not recursive because the
|
751 |
|
|
\ metacompilers search order, "meta", is higher that the search order for
|
752 |
|
|
\ the words containing the metacompiled target addresses, "target.1", so the
|
753 |
|
|
\ assembly for '+' gets compiled into the definition of '+'.
|
754 |
|
|
\
|
755 |
|
|
\ Manipulation of the word search order is key in understanding how the
|
756 |
|
|
\ metacompiler works.
|
757 |
|
|
\
|
758 |
|
|
\ The following words will be part of the main search order, in
|
759 |
|
|
\ 'forth-wordlist' and in the assembly search order.
|
760 |
|
|
\
|
761 |
|
|
|
762 |
|
|
\ : nop nop ; ( -- : do nothing )
|
763 |
|
|
: dup dup ; ( n -- n n : duplicate value on top of stack )
|
764 |
|
|
: over over ; ( n1 n2 -- n1 n2 n1 : duplicate second value on stack )
|
765 |
|
|
: invert invert ; ( u -- u : bitwise invert of value on top of stack )
|
766 |
|
|
: um+ um+ ; ( u u -- u carry : addition with carry )
|
767 |
|
|
: + + ; ( u u -- u : addition without carry )
|
768 |
|
|
: um* um* ; ( u u -- ud : multiplication )
|
769 |
|
|
: * * ; ( u u -- u : multiplication )
|
770 |
|
|
: swap swap ; ( n1 n2 -- n2 n1 : swap two values on stack )
|
771 |
|
|
: nip nip ; ( n1 n2 -- n2 : remove second item on stack )
|
772 |
|
|
: drop drop ; ( n -- : remove item on stack )
|
773 |
|
|
: @ @ ; ( a -- u : load value at address )
|
774 |
|
|
: ! ! ; ( u a -- : store 'u' at address 'a' )
|
775 |
|
|
: rshift rshift ; ( u1 u2 -- u : shift u2 by u1 places to the right )
|
776 |
|
|
: lshift lshift ; ( u1 u2 -- u : shift u2 by u1 places to the left )
|
777 |
|
|
: = = ; ( u1 u2 -- t : does u2 equal u1? )
|
778 |
|
|
: u< u< ; ( u1 u2 -- t : is u2 less than u1 )
|
779 |
|
|
: < < ; ( u1 u2 -- t : is u2 less than u1, signed version )
|
780 |
|
|
: and and ; ( u u -- u : bitwise and )
|
781 |
|
|
: xor xor ; ( u u -- u : bitwise exclusive or )
|
782 |
|
|
: or or ; ( u u -- u : bitwise or )
|
783 |
|
|
\ : sp@ sp@ ; ( ??? -- u : get stack depth )
|
784 |
|
|
\ : sp! sp! ; ( u -- ??? : set stack depth )
|
785 |
|
|
: 1- 1- ; ( u -- u : decrement top of stack )
|
786 |
|
|
: 0= 0= ; ( u -- t : if top of stack equal to zero )
|
787 |
|
|
: (bye) (bye) ; ( u -- !!! : exit VM with 'u' as return value )
|
788 |
|
|
: rx? rx? ; ( -- c | -1 : fetch a single character, or EOF )
|
789 |
|
|
: tx! tx! ; ( c -- : transmit single character )
|
790 |
|
|
: (save) (save) ; ( u1 u2 -- u : save memory from u1 to u2 inclusive )
|
791 |
|
|
: u/mod u/mod ; ( u1 u2 -- rem div : unsigned divide/modulo )
|
792 |
|
|
: /mod /mod ; ( u1 u2 -- rem div : signed divide/modulo )
|
793 |
|
|
: / / ; ( u1 u2 -- u : u1 divided by u2 )
|
794 |
|
|
: mod mod ; ( u1 u2 -- u : remainder of u1 divided by u2 )
|
795 |
|
|
|
796 |
|
|
\ ### Forth Implementation of Arithmetic Functions
|
797 |
|
|
\
|
798 |
|
|
\ As an aside, the basic arithmetic functions of Forth can be implemented
|
799 |
|
|
\ in terms of simple addition, some tests and bit manipulation, if they
|
800 |
|
|
\ are not available for your system. Division, the remainder operation and
|
801 |
|
|
\ multiplication are provided by the virtual machine in this case, but it
|
802 |
|
|
\ is interesting to see how these words are put together. The first task it
|
803 |
|
|
\ to implement an add with carry, or 'um+'. Once this is available, 'um/mod'
|
804 |
|
|
\ and 'um*' are coded.
|
805 |
|
|
\
|
806 |
|
|
\ : s>d dup 0< ; ( n -- d : single to double )
|
807 |
|
|
\
|
808 |
|
|
\ : um+ ( w w -- w carry )
|
809 |
|
|
\ over over + >r
|
810 |
|
|
\ r@ 0 < invert >r
|
811 |
|
|
\ over over and
|
812 |
|
|
\ 0 < r> or >r
|
813 |
|
|
\ or 0 < r> and invert 1 +
|
814 |
|
|
\ r> swap ;
|
815 |
|
|
\
|
816 |
|
|
\ $f constant #highest
|
817 |
|
|
\ : um/mod ( ud u -- r q )
|
818 |
|
|
\ ?dup 0= if $a -throw exit then
|
819 |
|
|
\ 2dup u<
|
820 |
|
|
\ if negate #highest
|
821 |
|
|
\ for >r dup um+ >r >r dup um+ r> + dup
|
822 |
|
|
\ r> r@ swap >r um+ r> or
|
823 |
|
|
\ if >r drop 1+ r> else drop then r>
|
824 |
|
|
\ next
|
825 |
|
|
\ drop swap exit
|
826 |
|
|
\ then drop 2drop [-1] dup ;
|
827 |
|
|
\
|
828 |
|
|
\ : m/mod ( d n -- r q ) \ floored division
|
829 |
|
|
\ dup 0< dup>r
|
830 |
|
|
\ if
|
831 |
|
|
\ negate >r dnegate r>
|
832 |
|
|
\ then
|
833 |
|
|
\ >r dup 0< if r@ + then r> um/mod r>
|
834 |
|
|
\ if swap negate swap exit then ;
|
835 |
|
|
\
|
836 |
|
|
\ : um* ( u u -- ud )
|
837 |
|
|
\ 0 swap ( u1 0 u2 ) #highest
|
838 |
|
|
\ for dup um+ >r >r dup um+ r> + r>
|
839 |
|
|
\ if >r over um+ r> + then
|
840 |
|
|
\ next rot drop ;
|
841 |
|
|
\
|
842 |
|
|
\ The other arithmetic operations follow from the previous definitions almost
|
843 |
|
|
\ trivially:
|
844 |
|
|
\
|
845 |
|
|
\ : /mod over 0< swap m/mod ; ( n n -- r q )
|
846 |
|
|
\ : mod /mod drop ; ( n n -- r )
|
847 |
|
|
\ : / /mod nip ; ( n n -- q )
|
848 |
|
|
\ : * um* drop ; ( n n -- n )
|
849 |
|
|
\ : m* 2dup xor 0< >r abs swap abs um* r> if dnegate then ; ( n n -- d )
|
850 |
|
|
\ : */mod >r m* r> m/mod ; ( n n n -- r q )
|
851 |
|
|
\ : */ */mod nip ; ( n n n -- q )
|
852 |
|
|
\
|
853 |
|
|
|
854 |
|
|
\ ### Inline Words
|
855 |
|
|
\ These words can also be implemented in a single instruction, yet their
|
856 |
|
|
\ definition is different for multiple reasons. These words should only be
|
857 |
|
|
\ use within a word definition begin defined within the running target Forth,
|
858 |
|
|
\ so they have a bit set in their header indicating as such.
|
859 |
|
|
\
|
860 |
|
|
\ Another difference is how these words are compiled into a word definition
|
861 |
|
|
\ in the target, which is due to the fact they manipulate the return stack.
|
862 |
|
|
\ These words are 'inlined', which means the instruction they contain is
|
863 |
|
|
\ written directly into a definition being defined in the running target
|
864 |
|
|
\ Forth instead of a call to a word that contains the assembly instruction,
|
865 |
|
|
\ calls obviously change the return stack, so these words would either have
|
866 |
|
|
\ to take that into account or the assembly instruction could be inlined,
|
867 |
|
|
\ the later option has been taken.
|
868 |
|
|
\
|
869 |
|
|
\ As these words are never actually called, as they are only of use in
|
870 |
|
|
\ compile mode, and then they are inlined instead of being called, we can
|
871 |
|
|
\ leave off ';' which would write an exit instruction on the end of the
|
872 |
|
|
\ definition.
|
873 |
|
|
\
|
874 |
|
|
|
875 |
|
|
there constant inline-start
|
876 |
|
|
\ : rp@ rp@ fallthrough; compile-only ( -- u )
|
877 |
|
|
\ : rp! rp! fallthrough; compile-only ( u --, R: --- ??? )
|
878 |
|
|
: exit exit fallthrough; compile-only ( -- )
|
879 |
|
|
: >r >r fallthrough; compile-only ( u --, R: -- u )
|
880 |
|
|
: r> r> fallthrough; compile-only ( -- u, R: u -- )
|
881 |
|
|
: r@ r@ fallthrough; compile-only ( -- u )
|
882 |
|
|
: rdrop rdrop fallthrough; compile-only ( --, R: u -- )
|
883 |
|
|
there constant inline-end
|
884 |
|
|
|
885 |
|
|
\ Finally we can set the 'assembler-voc' to variable, we will add to the
|
886 |
|
|
\ assembly vocabulary later, but all of the words defined so far belong in
|
887 |
|
|
\ the assembly vocabulary. Unfortunately, the assembler when run in the
|
888 |
|
|
\ target Forth interpreter will compile calls to the instructions like '+'
|
889 |
|
|
\ or 'xor', only a few words will be inlined. There are potential solutions
|
890 |
|
|
\ to this problem, but they are not worth further complicating the Forth just
|
891 |
|
|
\ yet.
|
892 |
|
|
|
893 |
|
|
\ [last] [t] assembler-voc t!
|
894 |
|
|
|
895 |
|
|
$2 tconstant cell ( size of a cell in bytes )
|
896 |
|
|
$0 tvariable >in ( Hold character pointer when parsing input )
|
897 |
|
|
$0 tvariable state ( compiler state variable )
|
898 |
|
|
$0 tvariable hld ( Pointer into hold area for numeric output )
|
899 |
|
|
$10 tvariable base ( Current output radix )
|
900 |
|
|
$0 tvariable span ( Hold character count received by expect )
|
901 |
|
|
$8 tconstant #vocs ( number of vocabularies in allowed )
|
902 |
|
|
$400 tconstant b/buf ( size of a block )
|
903 |
|
|
|
904 |
|
|
#version constant ver ( eForth version )
|
905 |
|
|
pad-area tconstant pad ( pad variable - offset into temporary storage )
|
906 |
|
|
|
907 |
|
|
|
908 |
|
|
|
909 |
|
|
|
910 |
|
|
\ The following execution vectors would/will be added if there is enough
|
911 |
|
|
\ space, it is very useful to have hooks into the system to change how
|
912 |
|
|
\ the interpreter behaviour works. Being able to change how the Forth
|
913 |
|
|
\ interpreter handles number parsing allows the processing of double or
|
914 |
|
|
\ floating point numbers in a system that could otherwise not handle them.
|
915 |
|
|
|
916 |
|
|
\ 0 tvariable ( execution vector for interpreter error )
|
917 |
|
|
\ 0 tvariable ( execution vector for interpreter )
|
918 |
|
|
\ 0 tvariable ( execution vector for abort handler )
|
919 |
|
|
\ 0 tvariable ( execution vector for at-xy )
|
920 |
|
|
\ 0 tvariable ( execution vector for page )
|
921 |
|
|
\ 0 tvariable ( execution vector for >number )
|
922 |
|
|
\ 0 tvariable hidden ( vocabulary for hidden words )
|
923 |
|
|
|
924 |
|
|
|
925 |
|
|
\ ### Basic Word Set
|
926 |
|
|
\
|
927 |
|
|
\ The following section of words is purely a space saving measure, or
|
928 |
|
|
\ they allow for other optimizations which also save space. Examples
|
929 |
|
|
\ of this include "[-1]"; any number about $7fff requires two instructions
|
930 |
|
|
\ to encode, numbers below only one, -1 is a commonly used number so this
|
931 |
|
|
\ allows us to save on space.
|
932 |
|
|
\
|
933 |
|
|
\ This does not explain the creation of a word to push the number zero
|
934 |
|
|
\ though, this only takes up one instruction. This is instead explained
|
935 |
|
|
\ by the interaction of the peephole optimizer with function calls, calls
|
936 |
|
|
\ to function can be turned into a branch if that instruction were to be
|
937 |
|
|
\ followed by an exit instruction because it is at the end of a word
|
938 |
|
|
\ definition. This cannot be said of literals. This allows us to save
|
939 |
|
|
\ space under special circumstances.
|
940 |
|
|
\
|
941 |
|
|
\ The following example illustrates this:
|
942 |
|
|
\
|
943 |
|
|
\ | FORTH CODE | PSEUDO ASSEMBLER |
|
944 |
|
|
\ | ---------------------------- | ------------------------ |
|
945 |
|
|
\ | : push-zero 0 literal ; | LITERAL(0) EXIT |
|
946 |
|
|
\ | : example-1 drop 0 literal ; | DROP LITERAL(0) EXIT |
|
947 |
|
|
\ | : example-2 drop 0 literal ; | DROP BRANCH(push-zero) |
|
948 |
|
|
\
|
949 |
|
|
\ Where "example-1" being unoptimized requires three instructions, whereas
|
950 |
|
|
\ "example-2" requires only two, with the two instruction overhead of
|
951 |
|
|
\ "push-zero".
|
952 |
|
|
\
|
953 |
|
|
\ Optimizations like this explain some of the structure of the Forth
|
954 |
|
|
\ code, it is better to exit early and heavily factorize code if space is at
|
955 |
|
|
\ a premium, which it is due to the way the virtual machine works (both it
|
956 |
|
|
\ being 16-bit only, and only allowing the first 16KiB to be used for program
|
957 |
|
|
\ storage). Factoring code like this is similar to performing LZW compression,
|
958 |
|
|
\ or similar dictionary related compression schemes.
|
959 |
|
|
\
|
960 |
|
|
\
|
961 |
|
|
\
|
962 |
|
|
\ Whilst factoring words into smaller, cleaner, definitions is highly
|
963 |
|
|
\ encouraged for Forth code (it is often an art coming up with the right
|
964 |
|
|
\ word name and associated concept it encapsulates), making words like
|
965 |
|
|
\ "2drop-0" is not. It hurts readability as there is no reason or idea backing
|
966 |
|
|
\ a word like "2drop-0", even if it is fairly clear what it does from its
|
967 |
|
|
\ name.
|
968 |
|
|
\
|
969 |
|
|
h: [-1] -1 ; ( -- -1 : space saving measure, push -1 )
|
970 |
|
|
h: 0x8000 $8000 ; ( -- $8000 : space saving measure, push $8000 )
|
971 |
|
|
h: 2drop-0 drop fallthrough; ( n n -- 0 )
|
972 |
|
|
h: drop-0 drop fallthrough; ( n -- 0 )
|
973 |
|
|
h: 0x0000 $0000 ; ( -- $0000 : space/optimization, push $0000 )
|
974 |
|
|
h: state@ state @ ; ( -- u )
|
975 |
|
|
h: first-bit 1 and ; ( u -- u )
|
976 |
|
|
h: in! >in ! ; ( u -- )
|
977 |
|
|
h: in@ >in @ ; ( -- u )
|
978 |
|
|
|
979 |
|
|
\ Now the implementation of the Forth interpreter without the apologies
|
980 |
|
|
\ for the words in the prior section. This group of words implement some
|
981 |
|
|
\ of the basic words expected in Forth; simple stack manipulation, tests,
|
982 |
|
|
\ and other one, or two line definitions that do not really require an
|
983 |
|
|
\ explanation of how they work - only why they are useful. Some of the words
|
984 |
|
|
\ are described by their stack comment entirely, like "2drop", other like
|
985 |
|
|
\ "cell+" require a reason for such a simple word (they embody a concept or
|
986 |
|
|
\ they help hide implementation details).
|
987 |
|
|
|
988 |
|
|
: 2drop drop drop ; ( n n -- )
|
989 |
|
|
: 1+ 1 + ; ( n -- n : increment a value )
|
990 |
|
|
: negate invert 1+ ; ( n -- n : negate a number )
|
991 |
|
|
: - negate + ; ( n1 n2 -- n : subtract n1 from n2 )
|
992 |
|
|
h: over- over - ; ( u u -- u u )
|
993 |
|
|
h: over+ over + ; ( u1 u2 -- u1 u1+2 )
|
994 |
|
|
: aligned dup first-bit + ; ( b -- a )
|
995 |
|
|
: bye 0 (bye) ; ( -- : leave the interpreter )
|
996 |
|
|
h: cell- cell - ; ( a -- a : adjust address to previous cell )
|
997 |
|
|
: cell+ cell + ; ( a -- a : move address forward to next cell )
|
998 |
|
|
: cells 1 lshift ; ( n -- n : convert cells count to address count )
|
999 |
|
|
: chars 1 rshift ; ( n -- n : convert bytes to number of cells )
|
1000 |
|
|
: ?dup dup if dup exit then ; ( n -- 0 | n n : duplicate non zero value )
|
1001 |
|
|
: > swap < ; ( n1 n2 -- t : signed greater than, n1 > n2 )
|
1002 |
|
|
: u> swap u< ; ( u1 u2 -- t : unsigned greater than, u1 > u2 )
|
1003 |
|
|
h: u>= u< invert ; ( u1 u2 -- t : unsigned greater/equal )
|
1004 |
|
|
: <> = invert ; ( n n -- t : not equal )
|
1005 |
|
|
: 0<> 0= invert ; ( n n -- t : not equal to zero )
|
1006 |
|
|
: 0> 0 > ; ( n -- t : greater than zero? )
|
1007 |
|
|
: 0< 0 < ; ( n -- t : less than zero? )
|
1008 |
|
|
: 2dup over over ; ( n1 n2 -- n1 n2 n1 n2 )
|
1009 |
|
|
: tuck swap over ; ( n1 n2 -- n2 n1 n2 )
|
1010 |
|
|
: +! tuck @ + fallthrough; ( n a -- : increment value at 'a' by 'n' )
|
1011 |
|
|
h: swap! swap ! ; ( a u -- )
|
1012 |
|
|
: 1+! 1 swap +! ; ( a -- : increment value at address by 1 )
|
1013 |
|
|
: 1-! [-1] swap +! ; ( a -- : decrement value at address by 1 )
|
1014 |
|
|
: 2! ( d a -- ) tuck ! cell+ ! ; ( n n a -- )
|
1015 |
|
|
: 2@ ( a -- d ) dup cell+ @ swap @ ; ( a -- n n )
|
1016 |
|
|
: get-current current @ ; ( -- wid )
|
1017 |
|
|
: set-current current ! ; ( wid -- )
|
1018 |
|
|
: bl =bl ; ( -- c )
|
1019 |
|
|
: within over- >r - r> u< ; ( u lo hi -- t )
|
1020 |
|
|
: abs dup 0< if negate exit then ; ( n -- u )
|
1021 |
|
|
: tib #tib cell+ @ ; ( -- a )
|
1022 |
|
|
: source #tib 2@ ; ( -- a u )
|
1023 |
|
|
: source-id id @ ; ( -- 0 | -1 )
|
1024 |
|
|
: d0= 0= swap 0= and ; ( d -- t )
|
1025 |
|
|
: dnegate invert >r invert 1 um+ r> + ; ( d -- d )
|
1026 |
|
|
\ : even first-bit 0= ; ( u -- t )
|
1027 |
|
|
\ : odd even 0= ; ( u -- t )
|
1028 |
|
|
|
1029 |
|
|
|
1030 |
|
|
\ 'execute' requires an understanding of the return stack, much like
|
1031 |
|
|
\ 'doConst' and 'doVar', when given an execution token of a word, a pointer
|
1032 |
|
|
\ to its Code Field Address (or CFA), 'execute' will call that word. This
|
1033 |
|
|
\ allows us to call arbitrary function and change, or vector, execution at
|
1034 |
|
|
\ run time. All 'execute' needs to do is push the address onto the return
|
1035 |
|
|
\ stack and when 'execute' exits it will jump to the desired word, the callers
|
1036 |
|
|
\ address is still on the return stack, so when the called word exit it will
|
1037 |
|
|
\ jump back to 'executes' caller.
|
1038 |
|
|
\
|
1039 |
|
|
\ '@execute' is similar but it only executes the token if it is non-zero.
|
1040 |
|
|
\
|
1041 |
|
|
|
1042 |
|
|
: execute >r ; ( cfa -- : execute a function )
|
1043 |
|
|
h: @execute @ ?dup if >r then ; ( cfa -- )
|
1044 |
|
|
\
|
1045 |
|
|
\ As the virtual machine is only addressable by cells, and not by characters,
|
1046 |
|
|
\ the words 'c@' and 'c!' cannot be defined as simple assembly primitives,
|
1047 |
|
|
\ they must be defined in terms of '@' and '!'. It is not difficult to do,
|
1048 |
|
|
\ but does mean these two primitives are slower than might be first thought.
|
1049 |
|
|
\
|
1050 |
|
|
|
1051 |
|
|
: c@ dup-@ swap first-bit ( b -- c : load character from address )
|
1052 |
|
|
if
|
1053 |
|
|
8 rshift exit
|
1054 |
|
|
then
|
1055 |
|
|
$ff and ;
|
1056 |
|
|
: c! ( c b -- : store character at address )
|
1057 |
|
|
swap $ff and dup 8 lshift or swap
|
1058 |
|
|
swap over dup @ swap first-bit 0= $ff xor
|
1059 |
|
|
>r over xor r> and xor swap ! ;
|
1060 |
|
|
|
1061 |
|
|
\ 'command?' will be used later for words that are state away. State awareness
|
1062 |
|
|
\ and whether the interpreter is in command mode, or compile mode, as well as
|
1063 |
|
|
\ immediate words, will require a lot of explanation for the beginner until
|
1064 |
|
|
\ they are understood. This is best done elsewhere. 'command?' is used to
|
1065 |
|
|
\ determine if the interpreter is in command mode or not, it returns true
|
1066 |
|
|
\ if it is.
|
1067 |
|
|
\
|
1068 |
|
|
|
1069 |
|
|
h: command? state@ 0= ; ( -- t )
|
1070 |
|
|
|
1071 |
|
|
\ 'here', 'align', 'cp!' and 'allow' all manipulate the dictionary pointer,
|
1072 |
|
|
\ which is a common operation. 'align' aligns the pointer up to the next
|
1073 |
|
|
\ cell boundary, and 'cp!' sets the dictionary pointer to a value whilst
|
1074 |
|
|
\ enforcing that the value written is aligned.
|
1075 |
|
|
\
|
1076 |
|
|
\ 'here' retrieves the current dictionary pointer, which is needed for
|
1077 |
|
|
\ compiling control structures into words, so is used by words like 'if',
|
1078 |
|
|
\ and has other uses as well.
|
1079 |
|
|
\
|
1080 |
|
|
|
1081 |
|
|
: here cp @ ; ( -- a )
|
1082 |
|
|
: align here fallthrough; ( -- )
|
1083 |
|
|
h: cp! aligned cp ! ; ( n -- )
|
1084 |
|
|
|
1085 |
|
|
\ 'allot' is used in Forth to allocate memory after using 'create' to make
|
1086 |
|
|
\ a new word. It reserves space in the dictionary, space can be deallocated
|
1087 |
|
|
\ by giving it a negative number, however this may trash whatever data is
|
1088 |
|
|
\ written there and should not generally be done.
|
1089 |
|
|
\
|
1090 |
|
|
\ Typical usage:
|
1091 |
|
|
\
|
1092 |
|
|
\ create x $20 allot ( Create an array of 32 values )
|
1093 |
|
|
\ $cafe x $10 ! ( Store '$cafe' at element 16 in array )
|
1094 |
|
|
\
|
1095 |
|
|
|
1096 |
|
|
: allot cp +! ; ( n -- )
|
1097 |
|
|
|
1098 |
|
|
\ 'rot' and '-rot' manipulate three items on the stack, rotating them, if you
|
1099 |
|
|
\ find yourself using them too much it is best to refactor the code as code
|
1100 |
|
|
\ that uses them tends to be very confusing.
|
1101 |
|
|
|
1102 |
|
|
: rot >r swap r> swap ; ( n1 n2 n3 -- n2 n3 n1 )
|
1103 |
|
|
: -rot swap >r swap r> ; ( n1 n2 n3 -- n3 n1 n2 )
|
1104 |
|
|
|
1105 |
|
|
\ '2>r' and '2r>' are like 'rot' and '-rot', useful but they should not
|
1106 |
|
|
\ be overused. The words move two values to and from the return stack. Care
|
1107 |
|
|
\ should exercised when using them as the return stack can easily be corrupted,
|
1108 |
|
|
\ leading to unpredictable behavior, much like all the return stack words.
|
1109 |
|
|
\ The optimizer might also change a call to one of these words into a jump,
|
1110 |
|
|
\ which should be avoided as it could cause problems in edge cases, so do not
|
1111 |
|
|
\ use this word directly before an 'exit' or ';'.
|
1112 |
|
|
\
|
1113 |
|
|
|
1114 |
|
|
h: 2>r rxchg swap >r >r ; ( u1 u2 --, R: -- u1 u2 )
|
1115 |
|
|
h: 2r> r> r> swap rxchg nop ; ( -- u1 u2, R: u1 u2 -- )
|
1116 |
|
|
|
1117 |
|
|
\ 'doNext' needs to be defined before we can use 'for...next' loops, the
|
1118 |
|
|
\ metacompiler compiles a reference to this word when 'next' is encountered.
|
1119 |
|
|
\
|
1120 |
|
|
\ It is worth explaining how this word works, it is a complex word that
|
1121 |
|
|
\ requires an understanding of how the return stack words, as well as how
|
1122 |
|
|
\ both 'for' and 'next work.
|
1123 |
|
|
\
|
1124 |
|
|
\ The 'for...next' loop accepts a value, 'u' and runs for 'u+1' times.
|
1125 |
|
|
\ 'for' puts the loop counter onto the return stack, meaning the
|
1126 |
|
|
\ loop counter value is available to us as the first stack element, but
|
1127 |
|
|
\ also meaning if we want to exit from within a 'for...next' loop we must
|
1128 |
|
|
\ pop off the value from the return stack first.
|
1129 |
|
|
\
|
1130 |
|
|
\ The 'next' word compiles a 'doNext' into the dictionary, and then the
|
1131 |
|
|
\ address to jump back to, just after the 'for' has compiled a '>r' into the
|
1132 |
|
|
\ dictionary.
|
1133 |
|
|
\
|
1134 |
|
|
\ 'doNext' has two possible actions, it either takes the branch back to
|
1135 |
|
|
\ the place after 'for' if the loop counter non zero, being careful to
|
1136 |
|
|
\ decrement the loop counter and restoring it the correct place, or
|
1137 |
|
|
\ it jumps over the place where the back jump address is stored in the
|
1138 |
|
|
\ case when the loop counter is zero, removing the loop counter from the
|
1139 |
|
|
\ return stack.
|
1140 |
|
|
\
|
1141 |
|
|
\ This is all possible, because when 'doNext' is called (and it must be
|
1142 |
|
|
\ called, not jumped to), the return address points to the cell after
|
1143 |
|
|
\ 'doNext' is compiled into. By manipulating the return stack correctly it
|
1144 |
|
|
\ can change the program flow to either jump over the next cell, or jump
|
1145 |
|
|
\ to the address contained in the next cell. Understanding the simpler
|
1146 |
|
|
\ 'doConst' and 'doVar' helps in understanding this word.
|
1147 |
|
|
\
|
1148 |
|
|
|
1149 |
|
|
h: doNext 2r> ?dup if 1- >r @ >r exit then cell+ >r ;
|
1150 |
|
|
[t] doNext tdoNext meta!
|
1151 |
|
|
|
1152 |
|
|
\ 'min' and 'max' are standard operations in many languages, they operate
|
1153 |
|
|
\ on signed values. Note how they are factored to use the 'mux' word, with
|
1154 |
|
|
\ 'min' falling through into it, and 'max' calling 'mux', all to save on space.
|
1155 |
|
|
\
|
1156 |
|
|
|
1157 |
|
|
: min 2dup < fallthrough; ( n n -- n )
|
1158 |
|
|
h: mux if drop exit then nip ; ( n1 n2 b -- n : multiplex operation )
|
1159 |
|
|
: max 2dup > mux ; ( n n -- n )
|
1160 |
|
|
|
1161 |
|
|
|
1162 |
|
|
\ 'key' retrieves a single character of input, it is a vectored word so the
|
1163 |
|
|
\ method used to get data can be changed.
|
1164 |
|
|
\
|
1165 |
|
|
\ It calls 'bye' if the End of File character is returned (-1, which is
|
1166 |
|
|
\ outside the normal byte range).
|
1167 |
|
|
\
|
1168 |
|
|
|
1169 |
|
|
: key @execute dup [-1] = if bye 0x0000 exit then ; ( -- c )
|
1170 |
|
|
|
1171 |
|
|
\ '/string', '+string' and 'count' are for manipulating strings, 'count'
|
1172 |
|
|
\ words best on counted strings which have a length prefix, but can be used
|
1173 |
|
|
\ to advance through an array of byte data, whereas '/string' is used with
|
1174 |
|
|
\ a address and length pair that is already on the stack. '/string' will not
|
1175 |
|
|
\ advance the pair beyond the end of the string.
|
1176 |
|
|
\
|
1177 |
|
|
|
1178 |
|
|
: /string over min rot over+ -rot - ; ( b u1 u2 -- b u : advance string u2 )
|
1179 |
|
|
h: +string 1 /string ; ( b u -- b u : )
|
1180 |
|
|
: count dup 1+ swap c@ ; ( b -- b u )
|
1181 |
|
|
h: string@ over c@ ; ( b u -- b u c )
|
1182 |
|
|
|
1183 |
|
|
\ 'crc' computes the 16-bit CCITT CRC over a segment of memory, and 'ccitt'
|
1184 |
|
|
\ is the word that does the polynomial checking. It can be also be used as a
|
1185 |
|
|
\ crude Pseudo Random Number Generator. CRC routines are useful for detecting
|
1186 |
|
|
\ memory corruption in the Forth image.
|
1187 |
|
|
\
|
1188 |
|
|
|
1189 |
|
|
h: ccitt ( crc c -- crc : crc polynomial $1021 AKA "x16 + x12 + x5 + 1" )
|
1190 |
|
|
over $8 rshift xor ( crc x )
|
1191 |
|
|
dup $4 rshift xor ( crc x )
|
1192 |
|
|
dup $5 lshift xor ( crc x )
|
1193 |
|
|
dup $c lshift xor ( crc x )
|
1194 |
|
|
swap $8 lshift xor ; ( crc )
|
1195 |
|
|
|
1196 |
|
|
: crc ( b u -- u : calculate ccitt-ffff CRC )
|
1197 |
|
|
$ffff >r
|
1198 |
|
|
begin
|
1199 |
|
|
dup
|
1200 |
|
|
while
|
1201 |
|
|
string@ r> swap ccitt >r 1 /string
|
1202 |
|
|
repeat 2drop r> ;
|
1203 |
|
|
|
1204 |
|
|
|
1205 |
|
|
|
1206 |
|
|
\ : random ( -- u : pseudo random number )
|
1207 |
|
|
\ seed @ 0= seed toggle seed @ 0 ccitt dup seed ! ;
|
1208 |
|
|
|
1209 |
|
|
\ 'address' and '@address' are for use with the previous word point in the word
|
1210 |
|
|
\ header, the top two bits for other purposes (a 'compile-only' and an
|
1211 |
|
|
\ 'immediate' word bit). This makes traversing the dictionary a little more
|
1212 |
|
|
\ tricker than normal, and affects functions like 'words' and 'search-wordlist'
|
1213 |
|
|
\ Code can only exist in the first 16KiB of space anyway, so the top two bits
|
1214 |
|
|
\ cannot be used for anything else. It is debatable as to what the best way
|
1215 |
|
|
\ of marking words as immediate is, to use unused bits in a word header, or to
|
1216 |
|
|
\ place 'immediate' words in a special vocabulary which a minority of Forths
|
1217 |
|
|
\ do. Most Forths use the 'unused-bits-in-word-header' approach.
|
1218 |
|
|
\
|
1219 |
|
|
|
1220 |
|
|
h: @address @ fallthrough; ( a -- a )
|
1221 |
|
|
h: address $3fff and ; ( a -- a : mask off address bits )
|
1222 |
|
|
|
1223 |
|
|
\ 'last' gets a pointer to the most recently defined word, which is used to
|
1224 |
|
|
\ implement words like 'recurse', as well as in words which must traverse the
|
1225 |
|
|
\ current word list.
|
1226 |
|
|
|
1227 |
|
|
h: last get-current @address ; ( -- pwd )
|
1228 |
|
|
|
1229 |
|
|
\ A few character emitting words will now be defined, it should be obvious
|
1230 |
|
|
\ what these words do, 'emit' is the word that forms the basis of all these
|
1231 |
|
|
\ words. By default it is set to the primitive virtual machine instruction
|
1232 |
|
|
\ 'tx!'. In eForth another character emitting primitive was defined alongside
|
1233 |
|
|
\ 'emit', which was 'echo', which allowed for more control in how the
|
1234 |
|
|
\ interpreter interacts with the programmer and other programs. It is not
|
1235 |
|
|
\ necessary in a hosted Forth to have such a mechanism, but it can be added
|
1236 |
|
|
\ back in as needed, we will see commented out relics of this features later,
|
1237 |
|
|
\ when we see '^h' and 'ktap'.
|
1238 |
|
|
\
|
1239 |
|
|
|
1240 |
|
|
\ h: echo @execute ; ( c -- )
|
1241 |
|
|
: emit @execute ; ( c -- : write out a char )
|
1242 |
|
|
: cr =cr emit =lf emit ; ( -- : emit a newline )
|
1243 |
|
|
h: colon [char] : emit ; ( -- )
|
1244 |
|
|
: space =bl emit ; ( -- : emit a space )
|
1245 |
|
|
h: spaces =bl fallthrough; ( +n -- )
|
1246 |
|
|
h: nchars ( +n c -- : emit c n times )
|
1247 |
|
|
swap 0 max for aft dup emit then next drop ;
|
1248 |
|
|
|
1249 |
|
|
\ 'depth' and 'pick' require knowledge of how this Forth implements its
|
1250 |
|
|
\ stacks. 'sp0' contains the location of the stack pointer when there is
|
1251 |
|
|
\ nothing on the stack, and 'sp@' contains the current position. Using this
|
1252 |
|
|
\ it is possible to work out how many items, or how deep it is. 'pick' is
|
1253 |
|
|
\ used to pick an item at an arbitrary depth from the return stack. This
|
1254 |
|
|
\ version of 'pick' does this by indexing into the correct position and using
|
1255 |
|
|
\ a memory load operation to do this.
|
1256 |
|
|
\
|
1257 |
|
|
\ In some systems Forth is implemented on this is not possible to do, for
|
1258 |
|
|
\ example some Forths running on stack CPUs specifically designed to run Forth
|
1259 |
|
|
\ have stacks made in hardware which are not memory mapped. This is not just
|
1260 |
|
|
\ a hypothetical, the H2 Forth CPU (based on the J1 CPU) available at
|
1261 |
|
|
\ has stacks whose only operations are
|
1262 |
|
|
\ to increment and decrement them by a small number, and to get the current
|
1263 |
|
|
\ stack depth. On a platform like this, 'pick' can still be implemented,
|
1264 |
|
|
\ but it is more complex and can be done like this:
|
1265 |
|
|
\
|
1266 |
|
|
\ : pick ?dup if swap >r 1- pick r> swap exit then dup ;
|
1267 |
|
|
\
|
1268 |
|
|
|
1269 |
|
|
: depth sp@ sp0 - chars ; ( -- u : get current depth )
|
1270 |
|
|
: pick cells sp@ swap - @ ; ( vn...v0 u -- vn...v0 vu )
|
1271 |
|
|
|
1272 |
|
|
\ '>char' takes a character and converts it to an underscore if it is not
|
1273 |
|
|
\ printable, which is useful for printing out arbitrary sections of memory
|
1274 |
|
|
\ which may contain spaces, tabs, or non-printable. 'list' and 'dump' use
|
1275 |
|
|
\ this word. 'typist' can either use '>char' to print out a memory range
|
1276 |
|
|
\ or it can print out the value regardless if it is printable.
|
1277 |
|
|
\
|
1278 |
|
|
|
1279 |
|
|
h: >char $7f and dup $7f =bl within if drop [char] _ then ; ( c -- c )
|
1280 |
|
|
: type 0 fallthrough; ( b u -- )
|
1281 |
|
|
h: typist ( b u f -- : print a string )
|
1282 |
|
|
>r begin dup while
|
1283 |
|
|
swap count r@
|
1284 |
|
|
if
|
1285 |
|
|
>char
|
1286 |
|
|
then
|
1287 |
|
|
emit
|
1288 |
|
|
swap 1-
|
1289 |
|
|
repeat
|
1290 |
|
|
rdrop 2drop ;
|
1291 |
|
|
h: print count type ; ( b -- )
|
1292 |
|
|
h: $type [-1] typist ; ( b u -- )
|
1293 |
|
|
|
1294 |
|
|
\ 'cmove' and 'fill' are generic memory related functions for moving blocks
|
1295 |
|
|
\ of memory around and setting blocks of memory to a specific value
|
1296 |
|
|
\ respectively.
|
1297 |
|
|
|
1298 |
|
|
: cmove for aft >r dup c@ r@ c! 1+ r> 1+ then next 2drop ; ( b b u -- )
|
1299 |
|
|
: fill swap for swap aft 2dup c! 1+ then next 2drop ; ( b u c -- )
|
1300 |
|
|
|
1301 |
|
|
\ 'ndrop' removes a variable number of items off the variable stack, which
|
1302 |
|
|
\ is sometimes needed for cleaning things up before exiting a word.
|
1303 |
|
|
\
|
1304 |
|
|
|
1305 |
|
|
h: ndrop for aft drop then next ; ( 0u....nu n -- : drop n cells )
|
1306 |
|
|
|
1307 |
|
|
\ ### Exception Handling
|
1308 |
|
|
\
|
1309 |
|
|
\ 'catch' and 'throw' are complex and very useful words, these words are
|
1310 |
|
|
\ minor modifications to the ones provided in the ANS Forth standard,
|
1311 |
|
|
\ See and
|
1312 |
|
|
\ http://forth.sourceforge.net/standard/dpans/dpans9.htm
|
1313 |
|
|
\
|
1314 |
|
|
\ The standard describes the stack effects of 'catch' and 'throw' as so:
|
1315 |
|
|
\
|
1316 |
|
|
\ catch ( i*x xt -- j*x 0 | i*x n )
|
1317 |
|
|
\ throw ( k*x n -- k*x | i*x n )
|
1318 |
|
|
\
|
1319 |
|
|
\ Which is apparently meant to mean something, to someone. Either way, these
|
1320 |
|
|
\ words are how Forth implements exception handling, as many modern languages
|
1321 |
|
|
\ do. Their use is quite simple.
|
1322 |
|
|
\
|
1323 |
|
|
\ 'catch' accepts an execution token, which it executes, if throw is somehow
|
1324 |
|
|
\ invoked when the execution token is run, then 'catch' catches the exception
|
1325 |
|
|
\ and returns the exception number thrown. If no exception was thrown then
|
1326 |
|
|
\ 'catch' returns zero.
|
1327 |
|
|
\
|
1328 |
|
|
\ 'throw' is used to throw exceptions, it can be used anyway to indicate
|
1329 |
|
|
\ something has gone wrong, or to affect the control throw of a program in
|
1330 |
|
|
\ a portable way (instead of messing around with the return stack using '>r'
|
1331 |
|
|
\ and 'r>', which is non-portable), although it is ill advised to use
|
1332 |
|
|
\ exceptions for control flow purposes. 'throw' only throws an exception if
|
1333 |
|
|
\ the value provided to it was non-zero, otherwise execution continues as
|
1334 |
|
|
\ normal.
|
1335 |
|
|
\
|
1336 |
|
|
\ Example usage:
|
1337 |
|
|
\
|
1338 |
|
|
\ : word-the-might-fail 2 2 + 5 = if -9001 throw then ;
|
1339 |
|
|
\ : foo
|
1340 |
|
|
\ ' word-the-might-fail catch
|
1341 |
|
|
\ ?dup if abort" Something has gone terribly wrong..." then
|
1342 |
|
|
\ ." Everything went peachy" cr ;
|
1343 |
|
|
\
|
1344 |
|
|
|
1345 |
|
|
: catch ( i*x xt -- j*x 0 | i*x n )
|
1346 |
|
|
sp@ >r
|
1347 |
|
|
handler @ >r
|
1348 |
|
|
rp@ handler !
|
1349 |
|
|
execute
|
1350 |
|
|
r> handler !
|
1351 |
|
|
r> drop-0 ;
|
1352 |
|
|
|
1353 |
|
|
: throw ( k*x n -- k*x | i*x n )
|
1354 |
|
|
?dup if
|
1355 |
|
|
handler @ rp!
|
1356 |
|
|
r> handler !
|
1357 |
|
|
rxchg ( 'rxchg' is equivalent to 'r> swap >r' )
|
1358 |
|
|
sp! drop r>
|
1359 |
|
|
then ;
|
1360 |
|
|
|
1361 |
|
|
\ Negative numbers take up two cells in a word definition when compiled into
|
1362 |
|
|
\ one, whilst positive words only take up one cell, as a space saving
|
1363 |
|
|
\ measure we define '-throw'. Which negates a number before calling 'throw'.
|
1364 |
|
|
\
|
1365 |
|
|
\ We then do something curious, we set the second cell in the target virtual
|
1366 |
|
|
\ machine image to a branch to '-throw'. This is because it is the
|
1367 |
|
|
\ the virtual machine sets the program counter to the second cell (at address
|
1368 |
|
|
\ $2, or 1 cell in) whenever an exception is raised when executing an
|
1369 |
|
|
\ instruction, such as a division by zero. By setting the cell to the
|
1370 |
|
|
\ execution token divided by two we are setting that cell to an unconditional
|
1371 |
|
|
\ branch to '-throw'. The virtual machine also puts a number indicating what
|
1372 |
|
|
\ the exception was on the top of the stack so it is possible to determine
|
1373 |
|
|
\ what went wrong.
|
1374 |
|
|
\
|
1375 |
|
|
|
1376 |
|
|
h: -throw negate throw ; ( u -- : negate and throw )
|
1377 |
|
|
[t] -throw 2/ 1 tcells t!
|
1378 |
|
|
|
1379 |
|
|
\ '?ndepth' throws an exception if a certain number of items on the stack
|
1380 |
|
|
\ do not exist. It is possible to use this primitive to implement some basic
|
1381 |
|
|
\ checks to make sure that words are passed the correct number of arguments.
|
1382 |
|
|
\
|
1383 |
|
|
\ By using '?ndepth' strategically it is possible to catch errors quite
|
1384 |
|
|
\ quickly with minimal overhead in speed and size by selecting only a few
|
1385 |
|
|
\ words to put depth checking in.
|
1386 |
|
|
|
1387 |
|
|
h: 1depth 1 fallthrough; ( ??? -- : check depth is at least one )
|
1388 |
|
|
h: ?ndepth depth 1- u> if 4 -throw exit then ; ( ??? n -- check depth )
|
1389 |
|
|
h: 2depth 2 ?ndepth ; ( ??? -- : check depth is at least two )
|
1390 |
|
|
|
1391 |
|
|
\ @todo implement a more efficient version of 'um/mod' using built in division
|
1392 |
|
|
: um/mod ( ud u -- r q )
|
1393 |
|
|
?dup 0= if $a -throw exit then
|
1394 |
|
|
2dup u<
|
1395 |
|
|
if negate #highest
|
1396 |
|
|
for >r dup um+ >r >r dup um+ r> + dup
|
1397 |
|
|
r> r@ swap >r um+ r> or
|
1398 |
|
|
if >r drop 1+ r> else drop then r>
|
1399 |
|
|
next
|
1400 |
|
|
drop swap exit
|
1401 |
|
|
then drop 2drop [-1] dup ;
|
1402 |
|
|
|
1403 |
|
|
\ ## Numeric Output
|
1404 |
|
|
\ With the basic word set in place and exception handling, as well as some
|
1405 |
|
|
\ variables being defined, it is possible to define the numeric output wordset,
|
1406 |
|
|
\ this word set will allow numbers to be printed or strings to the numeric
|
1407 |
|
|
\ representation of a number defined.
|
1408 |
|
|
\
|
1409 |
|
|
\ Numeric output in Forth, as well as input, is controlled by the 'base'
|
1410 |
|
|
\ variable, it is possible to enter numbers and print them out in binary,
|
1411 |
|
|
\ octal, decimal, hexadecimal or any base from base 2 to base 36 inclusive.
|
1412 |
|
|
\ This section only deals with numeric output. The numeric output string
|
1413 |
|
|
\ is formed within a temporary buffer which can either be printed out or
|
1414 |
|
|
\ copied to another location as needed, the method for doing this is known
|
1415 |
|
|
\ as Pictured Numeric Output. The words '<#', '#', '#>' and 'hold' form the
|
1416 |
|
|
\ kernel from which the other numeric output words are formed.
|
1417 |
|
|
\
|
1418 |
|
|
\ As a note, Base 36 can be used as a fairly compact textual encoding of binary
|
1419 |
|
|
\ data if needed, like Base-64 encoding but without the need for a special
|
1420 |
|
|
\ encoding and decoding scheme (See: ).
|
1421 |
|
|
\
|
1422 |
|
|
\ First, a few utility words are formed, 'decimal' and 'hex' set the base
|
1423 |
|
|
\ to known values. These are needed so the programmer can get the interpret
|
1424 |
|
|
\ back to a known state when they have set the radix to a value (remember,
|
1425 |
|
|
\ '10' is valid and different value in every base!), as well as a quick
|
1426 |
|
|
\ shorthand.
|
1427 |
|
|
\
|
1428 |
|
|
\ The word 'radix' is then defined, which is used to check that the 'base'
|
1429 |
|
|
\ variable is set to a meaningful value before it is used, between 2 and 36,
|
1430 |
|
|
\ inclusive as previously mentioned. Radixes or bases higher than 10 uses the
|
1431 |
|
|
\ alphabet after 0-9 to represent higher numbers, so 'a' corresponds to '10',
|
1432 |
|
|
\ there are only 36 alphanumeric characters (ignoring character case) meaning
|
1433 |
|
|
\ higher numbers cannot be represented. If the radix is outside of this range,
|
1434 |
|
|
\ then base is set back to its default, hexadecimal, and an exception is
|
1435 |
|
|
\ thrown.
|
1436 |
|
|
\
|
1437 |
|
|
|
1438 |
|
|
: decimal $a base ! ; ( -- )
|
1439 |
|
|
: hex $10 base ! ; ( -- )
|
1440 |
|
|
h: radix base @ dup 2 - $22 u> if hex $28 -throw exit then ; ( -- u )
|
1441 |
|
|
|
1442 |
|
|
\ 'digit' converts a number to its character representation, but it only
|
1443 |
|
|
\ deals with numbers less than 36, it does no checking for the output base,
|
1444 |
|
|
\ but is a straight forward conversion utility. It assumes that the ASCII
|
1445 |
|
|
\ character set is being used (See: ).
|
1446 |
|
|
\
|
1447 |
|
|
\ 'extract' is used to divide the number being converted by the current
|
1448 |
|
|
\ output radix, extracting a single digit which can be passed to 'digit'
|
1449 |
|
|
\ for conversion.
|
1450 |
|
|
\
|
1451 |
|
|
\ 'hold' then takes this extracted and converted digit and places in the
|
1452 |
|
|
\ hold space, this is where the string representing the number to be output
|
1453 |
|
|
\ is held. It adds characters in reverse order to the hold space, which is
|
1454 |
|
|
\ due to how 'extract' works, 'extract' gets the lowest digit first.
|
1455 |
|
|
\
|
1456 |
|
|
\ Lets look at how conversion would work for the number 1234 in base 10. We
|
1457 |
|
|
\ start of the number to convert and extract the first digit:
|
1458 |
|
|
\
|
1459 |
|
|
\ 1234 / 10 = 123, remainder 4 (hold ASCII 4)
|
1460 |
|
|
\
|
1461 |
|
|
\ We then add '4' to the hold space, which is the last digit that needs to
|
1462 |
|
|
\ be output, hence storage in reverse order. We then continue on with the
|
1463 |
|
|
\ output conversion:
|
1464 |
|
|
\
|
1465 |
|
|
\ 123 / 10 = 12, remainder 3 (hold ASCII 3)
|
1466 |
|
|
\ 12 / 10 = 1, remainder 2 (hold ASCII 2)
|
1467 |
|
|
\ 1 / 10 = 0, remainder 1 (hold ASCII 1)
|
1468 |
|
|
\ 0 <- conversion complete!
|
1469 |
|
|
\
|
1470 |
|
|
\ If we did not store the string in reverse order, we would end up printing
|
1471 |
|
|
\ '4321' which is the exact opposite of what we want!
|
1472 |
|
|
\
|
1473 |
|
|
\ The words '<#', '#', and '#>' call these words to do the work of numeric
|
1474 |
|
|
\ conversion, but before this is described, Notice the checking that is done
|
1475 |
|
|
\ within each of these words, 'hold' makes sure that too many characters are
|
1476 |
|
|
\ not stored in the hold space. '#' checks the depth of the stack before it
|
1477 |
|
|
\ is called and the base variable is only accessed with the 'radix' word.
|
1478 |
|
|
\ This combination of checking catches most errors that occur and makes sure
|
1479 |
|
|
\ they do not propagate.
|
1480 |
|
|
|
1481 |
|
|
\ @todo Check '?hold' works correctly
|
1482 |
|
|
: hold hld @ 1- dup hld ! c! fallthrough; ( c -- )
|
1483 |
|
|
h: ?hold hld @ pad $100 + u> if $11 -throw exit then ; ( -- )
|
1484 |
|
|
h: extract dup >r um/mod r> swap >r um/mod r> rot ; ( ud ud -- ud u )
|
1485 |
|
|
h: digit 9 over < 7 and + [char] 0 + ; ( u -- c )
|
1486 |
|
|
|
1487 |
|
|
\ The quartet formed by "<# # #s #>" look intimidating to the new comer, but
|
1488 |
|
|
\ are quite simple. They allow the format of numeric output to be controlled
|
1489 |
|
|
\ quite easily by the programmer. A description of these words:
|
1490 |
|
|
\
|
1491 |
|
|
\ * '<#' resets the hold space and initializes the conversion.
|
1492 |
|
|
\ * '#' extracts, converts and holds a single character.
|
1493 |
|
|
\ * '#s' repeatedly calls '#' until the number has been fully converted.
|
1494 |
|
|
\ * '#>' finalizes the conversion and pushes a string.
|
1495 |
|
|
\
|
1496 |
|
|
\ Anywhere within this conversion arbitrary characters can be added to
|
1497 |
|
|
\ the hold space with 'hold'. These words should always be used together,
|
1498 |
|
|
\ whenever you see '<#', you should see an enclosing '#>'. Once the '#>' has
|
1499 |
|
|
\ been called the number provided has been fully converted to a number string
|
1500 |
|
|
\ in the current output base, and can be printed or copied.
|
1501 |
|
|
\
|
1502 |
|
|
\ These words will go on to form more convenient words for numeric output,
|
1503 |
|
|
\ like '.', or 'u.r'.
|
1504 |
|
|
\
|
1505 |
|
|
|
1506 |
|
|
: #> 2drop hld @ pad over - ; ( w -- b u )
|
1507 |
|
|
: # 2depth 0 base @ extract digit hold ; ( d -- d )
|
1508 |
|
|
: #s begin # 2dup d0= until ; ( d -- 0 )
|
1509 |
|
|
: <# pad hld ! ; ( -- )
|
1510 |
|
|
|
1511 |
|
|
\ 'sign' is used with the Pictured Numeric Output words to add a sign character
|
1512 |
|
|
\ if the number is negative, 'str' is then defined to convert a number in any
|
1513 |
|
|
\ base to its signed representation, in actual use however we will only use
|
1514 |
|
|
\ 'str' for base 10 numeric output, we usually do not want to print the sign
|
1515 |
|
|
\ of a number when operating with a hexadecimal base.
|
1516 |
|
|
\
|
1517 |
|
|
|
1518 |
|
|
: sign 0< if [char] - hold exit then ; ( n -- )
|
1519 |
|
|
h: str ( n -- b u : convert a signed integer to a numeric string )
|
1520 |
|
|
dup>r abs 0 <# #s r> sign #> ;
|
1521 |
|
|
|
1522 |
|
|
\ '.r', 'u.r' are used as the basis of further words with a more controlled
|
1523 |
|
|
\ output format. They implement right justification of a number (signed for
|
1524 |
|
|
\ '.r', unsigned for 'u.r') by the number of spaces specified by the first
|
1525 |
|
|
\ argument provided to them. This is useful for printing out columns of
|
1526 |
|
|
\ data that is for human consumption.
|
1527 |
|
|
\
|
1528 |
|
|
\ 'u.' prints out an unsigned number with a trailing space, and '.' prints out
|
1529 |
|
|
\ a signed number when operating in decimal, and an unsigned one otherwise,
|
1530 |
|
|
\ again with a trailing space. Neither adds leading spaces.
|
1531 |
|
|
\
|
1532 |
|
|
|
1533 |
|
|
h: (u.) 0 <# #s #> ; ( u -- b u : turn 'u' into number string )
|
1534 |
|
|
: u.r >r (u.) r> fallthrough; ( u +n -- : print u right justified by +n)
|
1535 |
|
|
h: adjust over- spaces type ; ( b n n -- )
|
1536 |
|
|
h: 5u.r 5 u.r ; ( u -- )
|
1537 |
|
|
\ : .r >r str r> adjust ; ( n n -- : print n, right justified by +n )
|
1538 |
|
|
: u. (u.) space type ; ( u -- : print unsigned number )
|
1539 |
|
|
: . radix $a xor if u. exit then str space type ; ( n -- print number )
|
1540 |
|
|
\ : d. base @ >r decimal . r> base ! ;
|
1541 |
|
|
\ : h. base @ >r hex u. r> base ! ;
|
1542 |
|
|
|
1543 |
|
|
\ 'holds' and '.base' can be defined as so:
|
1544 |
|
|
\
|
1545 |
|
|
\ : holds begin dup while 1- 2dup + c@ hold repeat 2drop ;
|
1546 |
|
|
\ : .base base @ dup decimal base ! ; ( -- )
|
1547 |
|
|
\
|
1548 |
|
|
\ If needed. '?' is another common utility for printing out the contents at an
|
1549 |
|
|
\ address:
|
1550 |
|
|
\
|
1551 |
|
|
\ : ? @ . ; ( a -- : display the contents in a memory cell )
|
1552 |
|
|
\
|
1553 |
|
|
|
1554 |
|
|
\ Words that use the numeric output words can be defined, '.free' can now
|
1555 |
|
|
\ be defined which prints out the amount of space left in memory for programs
|
1556 |
|
|
\
|
1557 |
|
|
|
1558 |
|
|
h: unused $4000 here - ; ( -- u : unused program space )
|
1559 |
|
|
h: .free unused u. ; ( -- : print unused program space )
|
1560 |
|
|
|
1561 |
|
|
\ ### String Handling and Input
|
1562 |
|
|
\ 'pack$' and '=string' are more advanced string handling words for copying
|
1563 |
|
|
\ strings to a new location, turning it into a counted string, which 'pack$'
|
1564 |
|
|
\ does, or for comparing two string, which '=string' does.
|
1565 |
|
|
\
|
1566 |
|
|
\ 'expect', 'query' and 'accept' are used for fetching a line of input which
|
1567 |
|
|
\ can be further processed. Input in Forth is fundamentally line based, a
|
1568 |
|
|
\ line is fetch from the terminal, which is then parsed into tokens delimited
|
1569 |
|
|
\ by whitespace called words, which are then processed by the Forth
|
1570 |
|
|
\ interpreter. On hosted systems the input typed into a terminal is usually
|
1571 |
|
|
\ line buffered, you type in a line and hit return, then the line is passed
|
1572 |
|
|
\ to the program reading from the terminal. This is not the case on all
|
1573 |
|
|
\ systems, often it is the case that the Forth will be on a different target
|
1574 |
|
|
\ and the programmer is talking to it via a serial port, the programmer sends
|
1575 |
|
|
\ a character - not an entire line - and the Forth interpreter echos back a
|
1576 |
|
|
\ character, or not. This means that the Forth interpreter has to mimic the
|
1577 |
|
|
\ line handling normally provided by the terminal emulator. As this is a hosted
|
1578 |
|
|
\ Forth it does not have to do this, however the capability to handle input
|
1579 |
|
|
\ in this way is left commented out so the system is easier to port across
|
1580 |
|
|
\ to such platforms. Either way, a line of text needs to be fetched from an
|
1581 |
|
|
\ input stream.
|
1582 |
|
|
\
|
1583 |
|
|
\ First to describe 'pack$' and '=string'.
|
1584 |
|
|
\
|
1585 |
|
|
\ 'pack$' takes a string, its length, and a place which is assumed to be
|
1586 |
|
|
\ large enough to store the string in as a final argument. Creates a counted
|
1587 |
|
|
\ string and places it in the target location, this word is especially useful
|
1588 |
|
|
\ for the creation of word headers which will be needed later.
|
1589 |
|
|
\
|
1590 |
|
|
\ '=string' checks for string equality, as it has the length of both strings
|
1591 |
|
|
\ it checks their lengths first before proceeding to check all of the string,
|
1592 |
|
|
\ this saves a little time. Only a yes/no to the comparison is returned as
|
1593 |
|
|
\ a boolean answer, unlike 'strcmp' in C
|
1594 |
|
|
\ (See: )
|
1595 |
|
|
|
1596 |
|
|
: pack$ ( b u a -- a ) \ null fill
|
1597 |
|
|
aligned dup>r over
|
1598 |
|
|
dup cell negate and ( align down )
|
1599 |
|
|
- over+ 0 swap! 2dup c! 1+ swap cmove r> ;
|
1600 |
|
|
|
1601 |
|
|
: =string ( a1 u2 a1 u2 -- t : string equality )
|
1602 |
|
|
>r swap r> ( a1 a2 u1 u2 )
|
1603 |
|
|
over xor if drop 2drop-0 exit then
|
1604 |
|
|
for ( a1 a2 )
|
1605 |
|
|
aft
|
1606 |
|
|
count >r swap count r> xor
|
1607 |
|
|
if rdrop 2drop-0 exit then
|
1608 |
|
|
then
|
1609 |
|
|
next 2drop [-1] ;
|
1610 |
|
|
|
1611 |
|
|
\ '^h' and 'ktap' are commented out as they are not needed, they deal with
|
1612 |
|
|
\ line based input, some sections of 'tap' and 'accept' are commented out
|
1613 |
|
|
\ as well as they are not needed on a hosted system, thus the job of 'accept'
|
1614 |
|
|
\ is simplified. It is still worth talking about what these words do however,
|
1615 |
|
|
\ in case they need to be added back in.
|
1616 |
|
|
\
|
1617 |
|
|
\ 'accept' is the word that does all of the work and calls '^h' and 'ktap' when
|
1618 |
|
|
\ needed, 'query' and 'expect' a wrappers around it. Image we are talking to
|
1619 |
|
|
\ the Forth interpreter over a serial line, if the programmer sends a character
|
1620 |
|
|
\ to the Forth interpreter it is up to the Forth interpreter how to respond,
|
1621 |
|
|
\ the remote interpreter needs to echo back characters to the programmer
|
1622 |
|
|
\ otherwise they will be greeted with a blank terminal. Likewise the remote
|
1623 |
|
|
\ Forth interpreter needs to process not only new lines, indicating a new
|
1624 |
|
|
\ line of input should be accepted for processing, but it also needs to deal
|
1625 |
|
|
\ with backspace characters, which are used to delete characters in the current
|
1626 |
|
|
\ input line. It also needs to make sure it does not overflow the input buffer,
|
1627 |
|
|
\ or when deleting characters go beyond the beginning of the input buffer.
|
1628 |
|
|
\
|
1629 |
|
|
\ 'accept' takes an address of an input buffer and a length of it, the words
|
1630 |
|
|
\ '^h', 'tap' and 'ktap' are free to move around inside that buffer and do
|
1631 |
|
|
\ so based on the latest character received.
|
1632 |
|
|
\
|
1633 |
|
|
\ '^h' takes a pointer to the bottom of the input buffer, one to the end of
|
1634 |
|
|
\ the input buffer and the current position. It decrements the current position
|
1635 |
|
|
\ within the buffer and uses 'echo' - not 'emit' - to output a backspace to
|
1636 |
|
|
\ move the terminal cursor back one place, a space to erase the character and
|
1637 |
|
|
\ a backspace to move the cursor back one space again. It only does this if it
|
1638 |
|
|
\ will not go below the bottom of the input buffer.
|
1639 |
|
|
\
|
1640 |
|
|
\ : ^h ( bot eot cur -- bot eot cur )
|
1641 |
|
|
\ >r over r@ < dup
|
1642 |
|
|
\ if
|
1643 |
|
|
\ =bs dup echo =bl echo echo
|
1644 |
|
|
\ then r> + ;
|
1645 |
|
|
\
|
1646 |
|
|
\ 'ktap' processes the current character received, and has the same buffer
|
1647 |
|
|
\ information that '^h' does, in fact it passes that information to '^h' to
|
1648 |
|
|
\ process if the character is a backspace. It also handles the case where a
|
1649 |
|
|
\ line feed, indicating a new line, has been entered. It returns a modified
|
1650 |
|
|
\ buffer.
|
1651 |
|
|
\
|
1652 |
|
|
\ : ktap ( bot eot cur c -- bot eot cur )
|
1653 |
|
|
\ dup =lf ( <-- was =cr ) xor
|
1654 |
|
|
\ if =bs xor
|
1655 |
|
|
\ if =bl tap else ^h then
|
1656 |
|
|
\ exit
|
1657 |
|
|
\ then drop nip dup ;
|
1658 |
|
|
\
|
1659 |
|
|
\ 'tap' is used to store a character in the current line, 'ktap' can also
|
1660 |
|
|
\ call this word, 'tap' uses echo to echo back the character - it is currently
|
1661 |
|
|
\ commented out as this is handled by the terminal emulator but would be
|
1662 |
|
|
\ needed for serial communication.
|
1663 |
|
|
|
1664 |
|
|
h: tap ( dup echo ) over c! 1+ ; ( bot eot cur c -- bot eot cur )
|
1665 |
|
|
|
1666 |
|
|
\ 'accept' takes an buffer input buffer and returns a pointer and line length
|
1667 |
|
|
\ once a line of text has been fully entered. A line is considered fully
|
1668 |
|
|
\ entered when either a new line is received or the maximum number of input
|
1669 |
|
|
\ characters in a line is received, 'accept' checks for both of these
|
1670 |
|
|
\ conditions. It calls 'tap to do the work. In an alternate version it calls
|
1671 |
|
|
\ the execution vector '', which is usually set to 'ktap', which is shown
|
1672 |
|
|
\ as a commented out line.
|
1673 |
|
|
\
|
1674 |
|
|
|
1675 |
|
|
: accept ( b u -- b u )
|
1676 |
|
|
over+ over
|
1677 |
|
|
begin
|
1678 |
|
|
2dupxor
|
1679 |
|
|
while
|
1680 |
|
|
key dup =lf xor if tap else drop nip dup then
|
1681 |
|
|
\ The alternative 'accept' code replaces the line above:
|
1682 |
|
|
\
|
1683 |
|
|
\ key dup =bl - 95 u< if tap else @execute then
|
1684 |
|
|
\
|
1685 |
|
|
repeat drop over- ;
|
1686 |
|
|
|
1687 |
|
|
\ '' is set to 'accept' later on in this file, but can be changed if
|
1688 |
|
|
\ needed. 'expect' and 'query' both call 'accept' this way. 'query' uses the
|
1689 |
|
|
\ systems input buffer and stores the result in a know location so that the
|
1690 |
|
|
\ rest of the interpreter can use the results. 'expect' gets a buffer from the
|
1691 |
|
|
\ use and stores the length of the resulting string in 'span'.
|
1692 |
|
|
|
1693 |
|
|
: expect @execute span ! drop ; ( b u -- )
|
1694 |
|
|
: query tib tib-length @execute #tib ! drop-0 in! ; ( -- )
|
1695 |
|
|
|
1696 |
|
|
\ 'query' stores its results in the Terminal Input Buffer (TIB), which is way
|
1697 |
|
|
\ the word 'tib' gets its name. The TIB is a simple data structure which
|
1698 |
|
|
\ contains a pointer to the buffer itself and the length of the most current
|
1699 |
|
|
\ input line.
|
1700 |
|
|
|
1701 |
|
|
\ Now we have a line based input system, and from the previous chapters we
|
1702 |
|
|
\ also have numeric output, the Forth interpreter is starting to take shape.
|
1703 |
|
|
|
1704 |
|
|
\ ## Dictionary Words
|
1705 |
|
|
\ These words either navigate around the word header, or search through the
|
1706 |
|
|
\ dictionary to find a word, both word sets are related. This section now
|
1707 |
|
|
\ requires an understanding on how this Forth lays out its word headers, each
|
1708 |
|
|
\ Forth tends to do this in a slightly different way and more modern Forths
|
1709 |
|
|
\ use more advanced (and more complex, perhaps un-forth-like) techniques.
|
1710 |
|
|
\
|
1711 |
|
|
\ The dictionary is organized into word lists, and these words lists are simply
|
1712 |
|
|
\ linked lists of all the words in that list. To find a definition by name
|
1713 |
|
|
\ we search through all of the word lists in the current search order following
|
1714 |
|
|
\ the linked lists until they terminate.
|
1715 |
|
|
\
|
1716 |
|
|
\ This Forth, like Jones Forth (another literate Forth designed to teach how
|
1717 |
|
|
\ a Forth interpreter works, see
|
1718 |
|
|
\ ), lays
|
1719 |
|
|
\ out its dictionary in a way that is simpler than having a separate storage
|
1720 |
|
|
\ area for word names, but complicates other parts of the system. Most other
|
1721 |
|
|
\ Forths have a separate space for string storage where names of words and
|
1722 |
|
|
\ pointers to their code field are kept. This is how things were done in the
|
1723 |
|
|
\ original eForth model.
|
1724 |
|
|
\
|
1725 |
|
|
\ A word list looks like this..
|
1726 |
|
|
\
|
1727 |
|
|
\ ^
|
1728 |
|
|
\ |
|
1729 |
|
|
\ .-----.-----.-----.----.----
|
1730 |
|
|
\ | PWD | NFA | CFA | ... <--- Word definition
|
1731 |
|
|
\ .-----.-----.-----.----.----
|
1732 |
|
|
\ ^ ^ ^
|
1733 |
|
|
\ | |--Word Body... |
|
1734 |
|
|
\ |
|
1735 |
|
|
\ .-----.-----.-----.----.----
|
1736 |
|
|
\ | PWD | NFA | CFA | ...
|
1737 |
|
|
\ .-----.-----.-----.----.----
|
1738 |
|
|
\ ^ ^
|
1739 |
|
|
\ |---Word Header---|
|
1740 |
|
|
\
|
1741 |
|
|
\
|
1742 |
|
|
\
|
1743 |
|
|
\ We 'PWD' is the Previous Word pointer, 'NFA' is the Name Field Address, and
|
1744 |
|
|
\ the 'CFA' is the first code word in the Forth word. In other Forths this
|
1745 |
|
|
\ is a special field, but as this is a Subroutine Threaded forth
|
1746 |
|
|
\ (see https://en.wikipedia.org/wiki/Threaded_code), designed to execute
|
1747 |
|
|
\ on a Forth machine, this field does not need to do anything. It is simply
|
1748 |
|
|
\ the first bit of code that gets executed in a Forth definition, and things
|
1749 |
|
|
\ point to the 'CFA'. The rest of the word definition follows and includes the
|
1750 |
|
|
\ 'CFA'. The 'NFA' is a variable length field containing the name of the
|
1751 |
|
|
\ Forth word, it is stored as a counted string, which is what 'pack$' was
|
1752 |
|
|
\ defined for.
|
1753 |
|
|
\
|
1754 |
|
|
\ The 'PWD' field is not just a simple pointer, as already mentioned, it is
|
1755 |
|
|
\ also used to store information about the word, the top two most bits store
|
1756 |
|
|
\ whether a word is compile only (the top most bit) meaning the word should
|
1757 |
|
|
\ only be used within a word definition, being compiled into it, and whether
|
1758 |
|
|
\ or not the word is an immediate word or not (the second highest bit in the
|
1759 |
|
|
\ 'PWD' field). Another property of a word is whether it is an inline word
|
1760 |
|
|
\ or not, which is a property of the location of where the word is defined,
|
1761 |
|
|
\ and applies to words like 'r@' and 'r>', this is done by checking to see
|
1762 |
|
|
\ if the word is between two sentinel values.
|
1763 |
|
|
\
|
1764 |
|
|
\ Immediate and compiling words are a very important concept in Forth, and it
|
1765 |
|
|
\ is possible for the programmer to define their own immediate words. The
|
1766 |
|
|
\ Forth Read-Evaluate loop is quite simple, as this diagram shows:
|
1767 |
|
|
\
|
1768 |
|
|
\ .--------------------------. .----------------------------.
|
1769 |
|
|
\ .->| Get Next Word From Input |<-------| Error: Throw an Exception! |
|
1770 |
|
|
\ | .--------------------------. .----------------------------.
|
1771 |
|
|
\ | | ^
|
1772 |
|
|
\ | \|/ | (No)
|
1773 |
|
|
\ | . |
|
1774 |
|
|
\ ^ .-----------------. (Not Found) .--------------------.
|
1775 |
|
|
\ | | Search For Word |------------------------>| Is token a number? |
|
1776 |
|
|
\ | .-----------------. .--------------------.
|
1777 |
|
|
\ | | |
|
1778 |
|
|
\ ^ \|/ (Found) \|/ (Yes)
|
1779 |
|
|
\ | . .
|
1780 |
|
|
\ | .-------------------------. .-------------------------.
|
1781 |
|
|
\ | | Are we in command mode? | | Are we in command mode? |
|
1782 |
|
|
\ ^ .-------------------------. .-------------------------.
|
1783 |
|
|
\ | | | | |
|
1784 |
|
|
\ | \|/ (Yes) | (No) \|/ (Yes) | (No)
|
1785 |
|
|
\ | . | . |
|
1786 |
|
|
\ | .--------------. | .------------------------. |
|
1787 |
|
|
\ .--| Execute Word | | | Push Number onto Stack | |
|
1788 |
|
|
\ | .--------------. | .------------------------. |
|
1789 |
|
|
\ | ^ \|/ | |
|
1790 |
|
|
\ ^ | . | \|/
|
1791 |
|
|
\ | | (Yes) .--------------------. | .
|
1792 |
|
|
\ | ----------| Is word immediate? | | .------------------------.
|
1793 |
|
|
\ | .--------------------. | | Compile number literal |
|
1794 |
|
|
\ ^ | | | into next available |
|
1795 |
|
|
\ | \|/ (No) | | location in the |
|
1796 |
|
|
\ | . | | dictionary |
|
1797 |
|
|
\ | .--------------------------------. | .------------------------.
|
1798 |
|
|
\ | | Compile Pointer to word | | |
|
1799 |
|
|
\ .--| in next available location in | | |
|
1800 |
|
|
\ | | the dictionary | \|/ \|/
|
1801 |
|
|
\ | .--------------------------------. . .
|
1802 |
|
|
\ | | |
|
1803 |
|
|
\ .----<-------<-------<-------<-------<------.------<-------<------.
|
1804 |
|
|
\
|
1805 |
|
|
\ No matter how complex the Forth system may appear, this loop forms the heart
|
1806 |
|
|
\ of it: parse a word and execute it or compile it, with numbers handled as
|
1807 |
|
|
\ a special case. Immediate words are always executed, whereas compiling words
|
1808 |
|
|
\ may be executed depending on whether we are in command mode, or in compile
|
1809 |
|
|
\ mode, which is stored in the 'state' variable. There are several words that
|
1810 |
|
|
\ affect the interpreter state, such as ':', ';', '[', and ']'.
|
1811 |
|
|
\
|
1812 |
|
|
\ We should now talk about some examples of immediate and compiling words,
|
1813 |
|
|
\ ':' is a compiling word that when executed reads in a single token and
|
1814 |
|
|
\ creates a new word header with this token. It does not add the new word
|
1815 |
|
|
\ to the definition just yet. It also does one other things, it switches the
|
1816 |
|
|
\ interpreter to compile mode, words that are not immediate are instead
|
1817 |
|
|
\ compiled into the dictionary, as are numbers. This allows us to define
|
1818 |
|
|
\ new words. However, we need immediate words so that we can break out of
|
1819 |
|
|
\ compile mode, and enter back into command word so we can actually execute
|
1820 |
|
|
\ something. ';' is an example of an immediate word, it is executed instead
|
1821 |
|
|
\ of compiled into the dictionary, it switches the interpreter back into
|
1822 |
|
|
\ command mode, as well as finishing off the word definition by compiling
|
1823 |
|
|
\ an exit instructions into the dictionary, and adding the word into the
|
1824 |
|
|
\ current definitions word list.
|
1825 |
|
|
\
|
1826 |
|
|
\ The control structure words, like 'if', 'for', and 'begin', are just words
|
1827 |
|
|
\ as well, they are immediate words and will be explained later.
|
1828 |
|
|
\
|
1829 |
|
|
\ 'nfa' and 'cfa' both take a pointer to the 'PWD' field and adjust it to
|
1830 |
|
|
\ point to different sections of the word.
|
1831 |
|
|
|
1832 |
|
|
: nfa address cell+ ; ( pwd -- nfa : move to name field address)
|
1833 |
|
|
: cfa nfa dup c@ + cell+ $fffe and ; ( pwd -- cfa )
|
1834 |
|
|
|
1835 |
|
|
\ '.id' prints out a words name field.
|
1836 |
|
|
|
1837 |
|
|
h: .id nfa print ; ( pwd -- : print out a word )
|
1838 |
|
|
|
1839 |
|
|
\ 'immediate?', 'compile-only?' and 'inline?' are the tests words that
|
1840 |
|
|
\ all take a pointer to the 'PWD' field.
|
1841 |
|
|
|
1842 |
|
|
h: immediate? @ $4000 and fallthrough; ( pwd -- t : immediate word? )
|
1843 |
|
|
h: logical 0= 0= ; ( n -- t )
|
1844 |
|
|
h: compile-only? @ 0x8000 and logical ; ( pwd -- t : is compile only? )
|
1845 |
|
|
h: inline? inline-start inline-end within ; ( pwd -- t : is word inline? )
|
1846 |
|
|
|
1847 |
|
|
\ Now we know the structure of the dictionary we can define some words that
|
1848 |
|
|
\ work with it. We will define 'find' and 'search-wordlist', which will
|
1849 |
|
|
\ be derived from 'finder' and 'searcher'. 'searcher' will attempt to
|
1850 |
|
|
\ find a word in a specific word list, and 'find' will attempt to find a
|
1851 |
|
|
\ word in all the word lists in the current order.
|
1852 |
|
|
\
|
1853 |
|
|
\ 'searcher' and 'finder' both return as much information about the words
|
1854 |
|
|
\ they find as possible, if they find them. 'searcher' returns
|
1855 |
|
|
\ the 'PWD' of the word that points to the found word, the 'PWD' of the
|
1856 |
|
|
\ found word and a number indicating whether the found word is immediate
|
1857 |
|
|
\ (1) or a compiling word (-1). It returns zero, the original counted string,
|
1858 |
|
|
\ and another zero if the word could not be found in its first argument, a word
|
1859 |
|
|
\ list (wid). The word is provided as a counted string in the second argument
|
1860 |
|
|
\ to 'searcher'.
|
1861 |
|
|
\
|
1862 |
|
|
\ 'finder' wraps up 'searcher' and applies it to all the words in the
|
1863 |
|
|
\ search order. It returns the same values as 'searcher' if a word is found,
|
1864 |
|
|
\ returns the original counted word string address if it was not found, as
|
1865 |
|
|
\ well as zeros.
|
1866 |
|
|
\
|
1867 |
|
|
|
1868 |
|
|
h: searcher ( a wid -- pwd pwd 1 | pwd pwd -1 | 0 a 0 : find a word in a WID )
|
1869 |
|
|
swap >r dup
|
1870 |
|
|
begin
|
1871 |
|
|
dup
|
1872 |
|
|
while
|
1873 |
|
|
dup nfa count r@ count =string
|
1874 |
|
|
if ( found! )
|
1875 |
|
|
dup immediate? if 1 else [-1] then
|
1876 |
|
|
rdrop exit
|
1877 |
|
|
then
|
1878 |
|
|
nip dup @address
|
1879 |
|
|
repeat
|
1880 |
|
|
rdrop 2drop-0 ;
|
1881 |
|
|
|
1882 |
|
|
h: finder ( a -- pwd pwd 1 | pwd pwd -1 | 0 a 0 : find a word dictionary )
|
1883 |
|
|
>r
|
1884 |
|
|
context
|
1885 |
|
|
begin
|
1886 |
|
|
dup-@
|
1887 |
|
|
while
|
1888 |
|
|
dup-@ @ r@ swap searcher ?dup
|
1889 |
|
|
if
|
1890 |
|
|
>r rot drop r> rdrop exit
|
1891 |
|
|
then
|
1892 |
|
|
cell+
|
1893 |
|
|
repeat drop-0 r> 0x0000 ;
|
1894 |
|
|
|
1895 |
|
|
\ 'search-wordlist' and 'find' are simple applications of 'searcher' and
|
1896 |
|
|
\ 'finder', there is a difference between this Forths version of 'find' and
|
1897 |
|
|
\ the standard one, this version of 'find' does not return an execution token
|
1898 |
|
|
\ but a pointer in the word header which can be turned into an execution token
|
1899 |
|
|
\ with 'cfa'.
|
1900 |
|
|
|
1901 |
|
|
: search-wordlist searcher rot drop ; ( a wid -- pwd 1 | pwd -1 | a 0 )
|
1902 |
|
|
: find ( a -- pwd 1 | pwd -1 | a 0 : find a word in the dictionary )
|
1903 |
|
|
finder rot drop ;
|
1904 |
|
|
|
1905 |
|
|
\ ## Numeric Input
|
1906 |
|
|
\ Numeric input is handled next, converting a string into a number, which is
|
1907 |
|
|
\ similar to numeric output. First we define a series of words for checking
|
1908 |
|
|
\ whether a character belongs to a certain character class, whether it is
|
1909 |
|
|
\ a decimal number, or whether it is lowercase, and a word for converting
|
1910 |
|
|
\ uppercase letters to lower case, as numbers above base ten use the alphabet
|
1911 |
|
|
\ to represent numbers and can be input in either case.
|
1912 |
|
|
\
|
1913 |
|
|
|
1914 |
|
|
h: decimal? [char] 0 [char] : within ; ( c -- t : decimal char? )
|
1915 |
|
|
h: lowercase? [char] a [char] { within ; ( c -- t )
|
1916 |
|
|
h: uppercase? [char] A [char] [ within ; ( c -- t )
|
1917 |
|
|
h: >lower ( c -- c : convert to lower case )
|
1918 |
|
|
dup uppercase? if =bl xor exit then ;
|
1919 |
|
|
|
1920 |
|
|
\ 'numeric?' determines whether a character is possibly a number character
|
1921 |
|
|
\ in any base, from base 2 to base 36, and converts it to a numeric value
|
1922 |
|
|
\ if it is, or returns -1 if it is a non-numeric character.
|
1923 |
|
|
|
1924 |
|
|
h: numeric? ( char -- n|-1 : convert character in 0-9 a-z range to number )
|
1925 |
|
|
>lower
|
1926 |
|
|
dup lowercase? if $57 - exit then ( 97 = 'a', +10 as 'a' == 10 )
|
1927 |
|
|
dup decimal? if [char] 0 - exit then
|
1928 |
|
|
drop [-1] ;
|
1929 |
|
|
|
1930 |
|
|
\ 'digit?' restricts the output of 'numeric?' to numbers within the current
|
1931 |
|
|
\ numeric radix, so if the current base is 16, characters '0-9', 'a-f' and
|
1932 |
|
|
\ 'A-F' all return the boolean value as true for when passed to 'digit?', but
|
1933 |
|
|
\ characters outside this range return false.
|
1934 |
|
|
\
|
1935 |
|
|
|
1936 |
|
|
h: digit? >lower numeric? base @ u< ; ( c -- t : is char a digit given base )
|
1937 |
|
|
|
1938 |
|
|
\ (number) does the work of the numeric conversion, getting a character
|
1939 |
|
|
\ from an input array, converting the character to a number, multiplying it
|
1940 |
|
|
\ by the current input base and adding in to the number being converted. It
|
1941 |
|
|
\ stops on the first non-numeric character.
|
1942 |
|
|
\
|
1943 |
|
|
\ (number) accepts a string as an address-length pair which are the first
|
1944 |
|
|
\ two arguments, and a starting number for the number conversion (which is
|
1945 |
|
|
\ usually zero). (number) returns a string containing the unconverted
|
1946 |
|
|
\ characters, if any, as well as the converted number.
|
1947 |
|
|
\
|
1948 |
|
|
|
1949 |
|
|
\ @todo Fix (number) to work with double cell numbers
|
1950 |
|
|
h: (number) ( n b u -- n b u : convert string to number )
|
1951 |
|
|
begin
|
1952 |
|
|
( get next character )
|
1953 |
|
|
2dup 2>r drop c@ dup digit? ( n char bool, Rt: b u )
|
1954 |
|
|
if ( n char )
|
1955 |
|
|
swap base @ * swap numeric? + ( accumulate number )
|
1956 |
|
|
else ( n char )
|
1957 |
|
|
drop
|
1958 |
|
|
2r> ( restore string )
|
1959 |
|
|
nop exit
|
1960 |
|
|
then
|
1961 |
|
|
2r> ( restore string )
|
1962 |
|
|
+string dup 0= ( advance string and test for end )
|
1963 |
|
|
until ;
|
1964 |
|
|
|
1965 |
|
|
\ 'negative?' and 'base?' should be thought of as working in conjunction
|
1966 |
|
|
\ with '>number' only. Numbers can have certain prefixes which change the
|
1967 |
|
|
\ interpretation of the number, for example a prefix of '-' means the number
|
1968 |
|
|
\ is negative, a '$' prefix means the number is hexadecimal regardless of the
|
1969 |
|
|
\ current input base, and a '#' prefix (which is a less common prefix) means
|
1970 |
|
|
\ the number is decimal. 'negative?' handles the negative case, and could
|
1971 |
|
|
\ potentially be used as standalone word, however using 'base?' comes with
|
1972 |
|
|
\ caveats, it changes the current base if one of the base changing prefixes
|
1973 |
|
|
\ is encountered, '>number' is careful to restore the base back to what it
|
1974 |
|
|
\ was when it uses 'base?'.
|
1975 |
|
|
\
|
1976 |
|
|
|
1977 |
|
|
h: negative? ( b u -- t : is >number negative? )
|
1978 |
|
|
string@ [char] - = if +string [-1] exit then 0x0000 ;
|
1979 |
|
|
|
1980 |
|
|
h: base? ( b u -- )
|
1981 |
|
|
string@ [char] $ = ( $hex )
|
1982 |
|
|
if
|
1983 |
|
|
+string hex exit
|
1984 |
|
|
then ( #decimal )
|
1985 |
|
|
string@ [char] # = if +string decimal exit then ;
|
1986 |
|
|
|
1987 |
|
|
\ '>number' converts a string in its entirety, it takes all the same arguments
|
1988 |
|
|
\ as (number) and passes them to it to do the work, but not before doing
|
1989 |
|
|
\ the prefix handling. After it does the base restoration. It returns the
|
1990 |
|
|
\ same arguments as (number)
|
1991 |
|
|
|
1992 |
|
|
\ @todo '>number' should accept a double cell number and return one
|
1993 |
|
|
|
1994 |
|
|
\ : digit? ( c base -- u f )
|
1995 |
|
|
\ >r [char] 0 - 9 over <
|
1996 |
|
|
\ if 7 - dup 10 < or then dup r> u< ;
|
1997 |
|
|
|
1998 |
|
|
\ : >number ( ud a u -- ud a u )
|
1999 |
|
|
\ begin dup
|
2000 |
|
|
\ while >r dup >r c@ base @ digit?
|
2001 |
|
|
\ while swap base @ um* drop rot base @ um* d+ r> char+ r> 1 -
|
2002 |
|
|
\ repeat drop r> r> then ;
|
2003 |
|
|
|
2004 |
|
|
: >number ( n b u -- n b u : convert string )
|
2005 |
|
|
radix >r
|
2006 |
|
|
negative? >r
|
2007 |
|
|
base?
|
2008 |
|
|
(number)
|
2009 |
|
|
r> if rot negate -rot then
|
2010 |
|
|
r> base ! ;
|
2011 |
|
|
|
2012 |
|
|
\ '>number' is a generic word, but awkward to use, it contains information
|
2013 |
|
|
\ that the programmer probably does not need to know, 'number?' does some
|
2014 |
|
|
\ processing of the results to '>number', a string containing a number to
|
2015 |
|
|
\ be converted it passed in, and a boolean is returned as well as the
|
2016 |
|
|
\ converted number. The boolean indicates if the entire string was numeric
|
2017 |
|
|
\ only
|
2018 |
|
|
\
|
2019 |
|
|
|
2020 |
|
|
h: number? 0 -rot >number nip 0= ; ( b u -- n f : is number? )
|
2021 |
|
|
|
2022 |
|
|
\ ## Parsing
|
2023 |
|
|
\ After a line of text has been fetched the line needs to be tokenized into
|
2024 |
|
|
\ space delimited words, which is as complex as parsing gets in Forth.
|
2025 |
|
|
\ Technically Forth has no fixed grammar as any Forth word is free to parse
|
2026 |
|
|
\ the following input stream however it likes, but it is rare that to deviate
|
2027 |
|
|
\ from the norm and words which do complex processing are highly discouraged.
|
2028 |
|
|
\
|
2029 |
|
|
|
2030 |
|
|
h: -trailing ( b u -- b u : remove trailing spaces )
|
2031 |
|
|
for
|
2032 |
|
|
aft =bl over r@ + c@ <
|
2033 |
|
|
if r> 1+ exit then
|
2034 |
|
|
then
|
2035 |
|
|
next 0x0000 ;
|
2036 |
|
|
|
2037 |
|
|
h: lookfor ( b u c -- b u : skip until succeeds )
|
2038 |
|
|
>r
|
2039 |
|
|
begin
|
2040 |
|
|
dup
|
2041 |
|
|
while
|
2042 |
|
|
string@ r@ - r@ =bl = @execute if rdrop exit then
|
2043 |
|
|
+string
|
2044 |
|
|
repeat rdrop ;
|
2045 |
|
|
|
2046 |
|
|
h: skipTest if 0> exit then 0<> ; ( n f -- t )
|
2047 |
|
|
h: scanTest skipTest invert ; ( n f -- t )
|
2048 |
|
|
h: skipper ' skipTest ! lookfor ; ( b u c -- u c )
|
2049 |
|
|
h: scanner ' scanTest ! lookfor ; ( b u c -- u c )
|
2050 |
|
|
|
2051 |
|
|
h: parser ( b u c -- b u delta )
|
2052 |
|
|
>r over r> swap 2>r
|
2053 |
|
|
r@ skipper 2dup
|
2054 |
|
|
r> scanner swap r> - >r - r> 1+ ;
|
2055 |
|
|
|
2056 |
|
|
: parse ( c -- b u ; )
|
2057 |
|
|
>r tib in@ + #tib @ in@ - r> parser >in +! -trailing 0 max ;
|
2058 |
|
|
: ) ; immediate ( -- : do nothing )
|
2059 |
|
|
: ( [char] ) parse 2drop ; immediate \ ) ( parse until matching paren )
|
2060 |
|
|
: .( [char] ) parse type ; ( print out text until matching parenthesis )
|
2061 |
|
|
: \ #tib @ in! ; immediate ( comment until new line )
|
2062 |
|
|
h: ?length dup word-length u> if $13 -throw exit then ;
|
2063 |
|
|
: word 1depth parse ?length here pack$ ; ( c -- a ; )
|
2064 |
|
|
: token =bl word ; ( -- a )
|
2065 |
|
|
: char token count drop c@ ; ( -- c; )
|
2066 |
|
|
|
2067 |
|
|
\ ## The Interpreter
|
2068 |
|
|
|
2069 |
|
|
h: ?dictionary dup $3f00 u> if 8 -throw exit then ;
|
2070 |
|
|
: , here dup cell+ ?dictionary cp! ! ; ( u -- : store 'u' in dictionary )
|
2071 |
|
|
: c, here ?dictionary c! cp 1+! ; ( c -- : store 'c' in the dictionary )
|
2072 |
|
|
h: doLit 0x8000 or , ; ( n+ -- : compile literal )
|
2073 |
|
|
: literal ( n -- : write a literal into the dictionary )
|
2074 |
|
|
dup 0x8000 and ( n > $7fff ? )
|
2075 |
|
|
if
|
2076 |
|
|
invert doLit =invert , exit ( store inversion of n the invert it )
|
2077 |
|
|
then
|
2078 |
|
|
doLit ( turn into literal, write into dictionary )
|
2079 |
|
|
; compile-only immediate
|
2080 |
|
|
|
2081 |
|
|
h: make-callable chars $4000 or ; ( cfa -- instruction )
|
2082 |
|
|
: compile, make-callable , ; ( cfa -- : compile a code field address )
|
2083 |
|
|
h: $compile dup inline? if cfa @ , exit then cfa compile, ; ( pwd -- )
|
2084 |
|
|
h: not-found source type $d -throw ; ( -- : throw 'word not found' )
|
2085 |
|
|
|
2086 |
|
|
h: ?compile dup compile-only? if source type $e -throw exit then ;
|
2087 |
|
|
: (literal) state@ if postpone literal exit then ; ( u -- u | )
|
2088 |
|
|
: interpret ( ??? a -- ??? : The command/compiler loop )
|
2089 |
|
|
find ?dup if
|
2090 |
|
|
state@
|
2091 |
|
|
if
|
2092 |
|
|
0> if cfa execute exit then ( <- immediate word )
|
2093 |
|
|
$compile exit ( <- compiling word )
|
2094 |
|
|
then
|
2095 |
|
|
drop ?compile cfa execute exit
|
2096 |
|
|
then
|
2097 |
|
|
\ not a word
|
2098 |
|
|
dup count number? if
|
2099 |
|
|
nip @execute exit
|
2100 |
|
|
then
|
2101 |
|
|
not-found ;
|
2102 |
|
|
|
2103 |
|
|
\ NB. 'compile' only works for words, instructions, and numbers below $8000
|
2104 |
|
|
: compile r> dup-@ , cell+ >r ; compile-only ( --:Compile next compiled word )
|
2105 |
|
|
: immediate $4000 last fallthrough; ( -- : previous word immediate )
|
2106 |
|
|
h: toggle tuck @ xor swap! ; ( u a -- : xor value at addr with u )
|
2107 |
|
|
\ : compile-only $8000 last toggle ;
|
2108 |
|
|
|
2109 |
|
|
: smudge last fallthrough;
|
2110 |
|
|
h: (smudge) nfa $80 swap toggle ; ( pwd -- )
|
2111 |
|
|
|
2112 |
|
|
\ ## Strings
|
2113 |
|
|
\ The string word set is quite small, there are words already defined for
|
2114 |
|
|
\ manipulating strings such 'c@', and 'count', but they are not exclusively
|
2115 |
|
|
\ used for strings. These woulds will allow string literals to be embedded
|
2116 |
|
|
\ within word definitions.
|
2117 |
|
|
\
|
2118 |
|
|
\ Forth uses counted strings, at least traditionally, which contain the string
|
2119 |
|
|
\ length as the first byte of the string. This limits the string length to
|
2120 |
|
|
\ 255 characters, which is enough for our small Forth but is quite limiting.
|
2121 |
|
|
\
|
2122 |
|
|
\ More modern Forths either use NUL terminated strings, or larger counts
|
2123 |
|
|
\ for their counted strings. Both methods have trade-offs.
|
2124 |
|
|
|
2125 |
|
|
\ NUL terminated strings allow for arbitrary lengths, and are often used by
|
2126 |
|
|
\ programs written in C, or built upon the C runtime, along with their
|
2127 |
|
|
\ libraries. NUL terminated strings cannot hold binary data however, and have
|
2128 |
|
|
\ an overhead for string length related operations.
|
2129 |
|
|
\
|
2130 |
|
|
\ Using a larger count prefix obviously allows for larger strings, but it
|
2131 |
|
|
\ is not standard and new words would have to be written, or new conventions
|
2132 |
|
|
\ followed, when dealing with these strings. For example the 'count' word can
|
2133 |
|
|
\ be used on the entire string if the string size is a single byte in size.
|
2134 |
|
|
\ Another complication is how big should the length prefix be? 16, 32, or
|
2135 |
|
|
\ 64-bit? This might depend on the intended use, the preferences of the
|
2136 |
|
|
\ programmer, or what is most natural on the platform.
|
2137 |
|
|
\
|
2138 |
|
|
\ Another complication in modern string handling is UTF-8,
|
2139 |
|
|
\ and other character encoding schemes,
|
2140 |
|
|
\ which is something to be aware of, but not a subject we will go in to.
|
2141 |
|
|
\
|
2142 |
|
|
\ The issues described only talk about problems with Forths representation
|
2143 |
|
|
\ of strings, nothing has even been said about the difficulty of using them
|
2144 |
|
|
\ for string heavy applications!
|
2145 |
|
|
\
|
2146 |
|
|
\ Only a few string specific words will be defined, for compiling string
|
2147 |
|
|
\ literals into a word definition, and for returning an address to a compiled
|
2148 |
|
|
\ string. There are two things the string words will have to do; parse a
|
2149 |
|
|
\ string from the input stream and compile the string and an action into
|
2150 |
|
|
\ the dictionary. The action will be more complicated than it might seem as
|
2151 |
|
|
\ the string literal will be compiled into a word definition, like code is, so
|
2152 |
|
|
\ the action will have also manipulate the return stack so the string literal
|
2153 |
|
|
\ is jumped over.
|
2154 |
|
|
\
|
2155 |
|
|
|
2156 |
|
|
h: do$ r> r@ r> count + aligned >r swap >r ; ( -- a )
|
2157 |
|
|
h: string-literal do$ nop ; ( -- a : do string NB. nop to fool optimizer )
|
2158 |
|
|
h: .string do$ print ; ( -- : print string )
|
2159 |
|
|
|
2160 |
|
|
\ To allow the metacompilers version of '."' and '$"' to work we will need
|
2161 |
|
|
\ to populate two variables in the metacompiler with the correct actions.
|
2162 |
|
|
[t] .string tdoPrintString meta!
|
2163 |
|
|
[t] string-literal tdoStringLit meta!
|
2164 |
|
|
|
2165 |
|
|
h: parse-string [char] " word count + cp! ; ( -- )
|
2166 |
|
|
( , --, Run: -- b )
|
2167 |
|
|
: $" compile string-literal parse-string ; immediate compile-only
|
2168 |
|
|
: ." compile .string parse-string ; immediate compile-only ( , -- )
|
2169 |
|
|
: abort [-1] (bye) ; ( -- )
|
2170 |
|
|
h: ?abort swap if print cr abort else drop then ; ( u a -- )
|
2171 |
|
|
h: (abort) do$ ?abort ; ( -- )
|
2172 |
|
|
: abort" compile (abort) parse-string ; immediate compile-only ( u -- )
|
2173 |
|
|
|
2174 |
|
|
\ ## Evaluator
|
2175 |
|
|
\ With the following few words extra words we will have a working Forth
|
2176 |
|
|
\ interpreter, capable of reading in a line of text and executing it. More
|
2177 |
|
|
\ words will need to be defined to make the system usable, as well as some
|
2178 |
|
|
\ house keeping.
|
2179 |
|
|
\
|
2180 |
|
|
\ 'quit' is the name for the word which implements the interpreter loop,
|
2181 |
|
|
\ which is an odd name for its function, but this is what it is traditionally
|
2182 |
|
|
\ called. 'quit' calls a few minor functions:
|
2183 |
|
|
\
|
2184 |
|
|
\ * '', which is the execution vector for the Forth prompt, its contents
|
2185 |
|
|
\ are executed by 'eval'.
|
2186 |
|
|
\ * '(ok)' defines the default prompt, it only prints 'ok' if we are in
|
2187 |
|
|
\ command mode.
|
2188 |
|
|
\ * '[' and ']' for changing the interpreter state to command and compile mode
|
2189 |
|
|
\ respectively. Note that '[' must be an immediate word.
|
2190 |
|
|
\ * 'preset' which resets the input line variables
|
2191 |
|
|
\ * '?error' handles an error condition from 'throw', it forms the error
|
2192 |
|
|
\ handling part of the error handler of last resort. It prints out the
|
2193 |
|
|
\ non-zero error number that occurred and attempts to return the interpreter
|
2194 |
|
|
\ into a sensible state.
|
2195 |
|
|
\ * '?depth' checks for a stack underflow
|
2196 |
|
|
\ * 'eval' tokenizes the current input line which 'quit' has fetched and
|
2197 |
|
|
\ calls 'interpret' on each token until there is no more. Any errors that
|
2198 |
|
|
\ occur within 'interpreter' or 'eval' will be caught by 'quit'.
|
2199 |
|
|
|
2200 |
|
|
h: preset tib-start #tib cell+ ! 0 in! 0 id ! ;
|
2201 |
|
|
: ] [-1] state ! ;
|
2202 |
|
|
: [ 0 state ! ; immediate
|
2203 |
|
|
|
2204 |
|
|
h: ?error ( n -- : perform actions on error )
|
2205 |
|
|
?dup if
|
2206 |
|
|
. ( print error number )
|
2207 |
|
|
[char] ? emit ( print '?' )
|
2208 |
|
|
cr
|
2209 |
|
|
sp0 sp! ( empty stack )
|
2210 |
|
|
preset ( reset I/O streams )
|
2211 |
|
|
[ ( back into interpret mode )
|
2212 |
|
|
exit
|
2213 |
|
|
then ;
|
2214 |
|
|
|
2215 |
|
|
h: (ok) command? if ." ok " cr exit then ; ( -- )
|
2216 |
|
|
h: ?depth sp@ sp0 u< if 4 -throw exit then ; ( u -- : depth check )
|
2217 |
|
|
h: eval ( -- )
|
2218 |
|
|
begin
|
2219 |
|
|
token dup c@
|
2220 |
|
|
while
|
2221 |
|
|
interpret ?depth
|
2222 |
|
|
repeat drop @execute ;
|
2223 |
|
|
|
2224 |
|
|
\ 'quit' can now be defined, it sets up the input line variables, sets the
|
2225 |
|
|
\ interpreter into command mode, enters an infinite loop it does not exit
|
2226 |
|
|
\ from, and within that loop it reads in a line of input, evaluates it and
|
2227 |
|
|
\ processes any errors that occur, if any.
|
2228 |
|
|
|
2229 |
|
|
: quit preset [ begin query ' eval catch ?error again ; ( -- )
|
2230 |
|
|
|
2231 |
|
|
\ 'evaluate' is used to evaluate a string of text as if it were typed in by
|
2232 |
|
|
\ the programmer. It takes an address and its length, this is used in the
|
2233 |
|
|
\ word 'load'. It needs two new words apart from 'eval', which are 'get-input'
|
2234 |
|
|
\ and 'set-input', these two words are used to retrieve and set the input
|
2235 |
|
|
\ input stream, which 'evaluate' needs to manipulate. 'evaluate' saves the
|
2236 |
|
|
\ current input stream specification and changes it to the new string,
|
2237 |
|
|
\ restoring to what it was before the evaluation took place. It is careful
|
2238 |
|
|
\ to catch errors and always perform the restore, any errors that thrown are
|
2239 |
|
|
\ rethrown after the input is restored.
|
2240 |
|
|
\
|
2241 |
|
|
|
2242 |
|
|
h: get-input source in@ id @ @ ; ( -- n1...n5 )
|
2243 |
|
|
h: set-input ! id ! in! #tib 2! ; ( n1...n5 -- )
|
2244 |
|
|
: evaluate ( a u -- )
|
2245 |
|
|
get-input 2>r 2>r >r
|
2246 |
|
|
|
2247 |
|
|
' eval catch
|
2248 |
|
|
r> 2r> 2r> set-input
|
2249 |
|
|
throw ;
|
2250 |
|
|
|
2251 |
|
|
\ ## I/O Control
|
2252 |
|
|
\ The I/O control section is a relic from eForth that is not really needed
|
2253 |
|
|
\ in a hosted Forth, at least one where the terminal emulator used handles
|
2254 |
|
|
\ things like line editing. It is left in here so it can be quickly be added
|
2255 |
|
|
\ back in if this Forth were to be ported to an embed environment, one in
|
2256 |
|
|
\ which communications with the Forth took place over a UART.
|
2257 |
|
|
\
|
2258 |
|
|
\ Open and reading from different files is also not needed, it is handled
|
2259 |
|
|
\ by the virtual machine.
|
2260 |
|
|
|
2261 |
|
|
h: io! preset fallthrough; ( -- : initialize I/O )
|
2262 |
|
|
h: console ' rx? ! ' tx! ! fallthrough;
|
2263 |
|
|
h: hand ' (ok) ( ' drop <-- was emit ) ( ' ktap ) fallthrough;
|
2264 |
|
|
h: xio ' accept ! ( ! ) ( ! ) ! ;
|
2265 |
|
|
\ h: pace 11 emit ;
|
2266 |
|
|
\ t: file ' pace ' drop ' ktap xio ;
|
2267 |
|
|
|
2268 |
|
|
\ ## Control Structures and defining words
|
2269 |
|
|
\ The next set of words defines relating to control structures and defining
|
2270 |
|
|
\ new words, along with some basic error checking for the control structures
|
2271 |
|
|
\ (which is known as 'compiler security' in the Forth community). Some of the
|
2272 |
|
|
\ words defined here are quite complex, even if their definitions are only
|
2273 |
|
|
\ a few lines long.
|
2274 |
|
|
\
|
2275 |
|
|
\ Control structure words include 'if', 'else', 'then', 'begin', 'again',
|
2276 |
|
|
\ 'until', 'for', 'next', 'recurse' and 'tail'. These words are all immediate
|
2277 |
|
|
\ words and are also compile only.
|
2278 |
|
|
\
|
2279 |
|
|
\ New control structure words can be defined by the user quite easily, for
|
2280 |
|
|
\ example a word called 'unless' can be made which executes a clause if the
|
2281 |
|
|
\ test provided is false, which is the opposite of 'if'. This is one of the
|
2282 |
|
|
\ many unique features of Forth, that new words can be defined.
|
2283 |
|
|
\
|
2284 |
|
|
\
|
2285 |
|
|
|
2286 |
|
|
h: ?check ( magic-number -- : check for magic number on the stack )
|
2287 |
|
|
magic <> if $16 -throw exit then ;
|
2288 |
|
|
h: ?unique ( a -- a : print a message if a word definition is not unique )
|
2289 |
|
|
dup last @ searcher
|
2290 |
|
|
if
|
2291 |
|
|
( source type )
|
2292 |
|
|
space
|
2293 |
|
|
2drop last-def @ nfa print ." redefined " cr exit
|
2294 |
|
|
then ;
|
2295 |
|
|
h: ?nul ( b -- : check for zero length strings )
|
2296 |
|
|
count 0= if $a -throw exit then 1- ;
|
2297 |
|
|
|
2298 |
|
|
h: find-token token find 0= if not-found exit then ; ( -- pwd, )
|
2299 |
|
|
h: find-cfa find-token cfa ; ( -- xt, )
|
2300 |
|
|
: ' find-cfa state@ if postpone literal exit then ; immediate
|
2301 |
|
|
: [compile] find-cfa compile, ; immediate compile-only ( --, )
|
2302 |
|
|
: [char] char postpone literal ; immediate compile-only ( --, : )
|
2303 |
|
|
\ h: ?quit command? if $38 -throw exit then ;
|
2304 |
|
|
: ; ( ?quit ) ?check =exit , [ fallthrough; immediate compile-only
|
2305 |
|
|
h: get-current! ?dup if get-current ! exit then ; ( -- wid )
|
2306 |
|
|
: : align here dup last-def ! ( "name", -- colon-sys )
|
2307 |
|
|
last , token ?nul ?unique count + cp! magic ] ;
|
2308 |
|
|
: begin here ; immediate compile-only ( -- a )
|
2309 |
|
|
: until chars $2000 or , ; immediate compile-only ( a -- )
|
2310 |
|
|
: again chars , ; immediate compile-only ( a -- )
|
2311 |
|
|
h: here-0 here 0x0000 ;
|
2312 |
|
|
h: >mark here-0 postpone again ;
|
2313 |
|
|
: if here-0 postpone until ; immediate compile-only
|
2314 |
|
|
: then fallthrough; immediate compile-only
|
2315 |
|
|
h: >resolve here chars over @ or swap! ;
|
2316 |
|
|
: else >mark swap >resolve ; immediate compile-only
|
2317 |
|
|
: while postpone if ; immediate compile-only
|
2318 |
|
|
: repeat swap postpone again postpone then ; immediate compile-only
|
2319 |
|
|
h: last-cfa last-def @ cfa ; ( -- u )
|
2320 |
|
|
: recurse last-cfa compile, ; immediate compile-only
|
2321 |
|
|
: tail last-cfa postpone again ; immediate compile-only
|
2322 |
|
|
: create postpone : drop compile doVar get-current ! [ ;
|
2323 |
|
|
: >body cell+ ; ( a -- a )
|
2324 |
|
|
h: doDoes r> chars here chars last-cfa dup cell+ doLit ! , ;
|
2325 |
|
|
: does> compile doDoes nop ; immediate compile-only
|
2326 |
|
|
: variable create 0 , ;
|
2327 |
|
|
: constant create ' doConst make-callable here cell- ! , ;
|
2328 |
|
|
: :noname here-0 magic ] ;
|
2329 |
|
|
: for =>r , here ; immediate compile-only
|
2330 |
|
|
: next compile doNext , ; immediate compile-only
|
2331 |
|
|
: aft drop >mark postpone begin swap ; immediate compile-only
|
2332 |
|
|
\ : doer create =exit last-cfa ! =exit , ;
|
2333 |
|
|
\ : make ( "name1", "name2", -- : make name1 do name2 )
|
2334 |
|
|
\ find-cfa find-cfa make-callable
|
2335 |
|
|
\ state@
|
2336 |
|
|
\ if
|
2337 |
|
|
\ postpone literal postpone literal compile ! nop exit
|
2338 |
|
|
\ then
|
2339 |
|
|
\ swap! ; immediate
|
2340 |
|
|
|
2341 |
|
|
\ @todo Use a 'SMUDGE' system to hide the current word from the search order
|
2342 |
|
|
\ @todo improve 'hide' so it unlinks a word from the linked list
|
2343 |
|
|
: hide find-token (smudge) ; ( --, : hide word by name )
|
2344 |
|
|
|
2345 |
|
|
\ ## Vocabulary Words
|
2346 |
|
|
\ The vocabulary word set should already be well understood, if the
|
2347 |
|
|
\ metacompiler has been, the vocabulary word set is how Forth organizes words
|
2348 |
|
|
\ and controls visibility of words.
|
2349 |
|
|
\
|
2350 |
|
|
|
2351 |
|
|
h: find-empty-cell 0 fallthrough; ( a -- )
|
2352 |
|
|
h: find-cell >r begin dup-@ r@ <> while cell+ repeat rdrop ; ( u a -- a )
|
2353 |
|
|
|
2354 |
|
|
: get-order ( -- widn ... wid1 n : get the current search order )
|
2355 |
|
|
context
|
2356 |
|
|
find-empty-cell
|
2357 |
|
|
dup cell- swap
|
2358 |
|
|
context - chars dup>r 1- dup 0< if $32 -throw exit then
|
2359 |
|
|
for aft dup-@ swap cell- then next @ r> ;
|
2360 |
|
|
|
2361 |
|
|
xchange _forth-wordlist root-voc
|
2362 |
|
|
: forth-wordlist _forth-wordlist ;
|
2363 |
|
|
|
2364 |
|
|
: set-order ( widn ... wid1 n -- : set the current search order )
|
2365 |
|
|
dup [-1] = if drop root-voc 1 set-order exit then
|
2366 |
|
|
dup #vocs > if $31 -throw exit then
|
2367 |
|
|
context swap for aft tuck ! cell+ then next 0 swap! ;
|
2368 |
|
|
|
2369 |
|
|
: forth root-voc forth-wordlist 2 set-order ; ( -- )
|
2370 |
|
|
|
2371 |
|
|
\ The name fields length in a counted string is used to store a bit
|
2372 |
|
|
\ indicating the word is hidden. This is the highest bit in the count byte.
|
2373 |
|
|
h: not-hidden? nfa c@ $80 and 0= ; ( pwd -- )
|
2374 |
|
|
h: .words space
|
2375 |
|
|
begin
|
2376 |
|
|
dup
|
2377 |
|
|
while dup not-hidden? if dup .id space then @address repeat drop cr ;
|
2378 |
|
|
: words
|
2379 |
|
|
get-order begin ?dup while swap dup cr u. colon @ .words 1- repeat ;
|
2380 |
|
|
|
2381 |
|
|
xchange root-voc _forth-wordlist
|
2382 |
|
|
|
2383 |
|
|
\ : previous get-order swap drop 1- set-order ; ( -- )
|
2384 |
|
|
\ : also get-order over swap 1+ set-order ; ( wid -- )
|
2385 |
|
|
: only [-1] set-order ; ( -- )
|
2386 |
|
|
\ : order get-order for aft . then next cr ; ( -- )
|
2387 |
|
|
\ : anonymous get-order 1+ here 1 cells allot swap set-order ; ( -- )
|
2388 |
|
|
: definitions context @ set-current ; ( -- )
|
2389 |
|
|
h: (order) ( w wid*n n -- wid*n w n )
|
2390 |
|
|
dup if
|
2391 |
|
|
1- swap >r (order) over r@ xor
|
2392 |
|
|
if
|
2393 |
|
|
1+ r> -rot exit
|
2394 |
|
|
then rdrop
|
2395 |
|
|
then ;
|
2396 |
|
|
: -order get-order (order) nip set-order ; ( wid -- )
|
2397 |
|
|
: +order dup>r -order get-order r> swap 1+ set-order ; ( wid -- )
|
2398 |
|
|
|
2399 |
|
|
: editor decimal editor-voc +order ; ( -- )
|
2400 |
|
|
\ : assembler root-voc assembler-voc 2 set-order ; ( -- )
|
2401 |
|
|
\ : ;code assembler ; immediate ( -- )
|
2402 |
|
|
\ : code postpone : assembler ; ( -- )
|
2403 |
|
|
|
2404 |
|
|
\ xchange _forth-wordlist assembler-voc
|
2405 |
|
|
\ : end-code forth postpone ; ; immediate ( -- )
|
2406 |
|
|
\ xchange assembler-voc _forth-wordlist
|
2407 |
|
|
|
2408 |
|
|
\ ## Block Word Set
|
2409 |
|
|
\ The block word set abstracts out how access to mass storage works in just
|
2410 |
|
|
\ a handful of words. The main word is 'block', with the words 'update',
|
2411 |
|
|
\ 'flush' and the variable 'blk' also integral to the working of the block
|
2412 |
|
|
\ word set. All of the other words can be implemented upon these.
|
2413 |
|
|
\
|
2414 |
|
|
\ Block storage is an outdated, but simple, method of accessing
|
2415 |
|
|
\ mass storage that demands little from the hardware or the system it is
|
2416 |
|
|
\ implemented under, just that data can be transfered from memory to disk
|
2417 |
|
|
\ somehow. It has no requirements that there be a file system, which is
|
2418 |
|
|
\ perfect for embedded devices as well as upon the microcomputers it
|
2419 |
|
|
\ originated on.
|
2420 |
|
|
\
|
2421 |
|
|
\ A 'Forth block' is 1024 byte long buffer which is backed by a mass
|
2422 |
|
|
\ storage device, which we will refer to as 'disk'. Very compact programs
|
2423 |
|
|
\ can be written that have their data stored persistently. Source can
|
2424 |
|
|
\ and data can be stored in blocks and evaluated, which will be described
|
2425 |
|
|
\ more in the 'Block editor' section of this document.
|
2426 |
|
|
\
|
2427 |
|
|
\ The 'block' word does most of the work. The way it is usually implemented
|
2428 |
|
|
\ is as follows:
|
2429 |
|
|
\
|
2430 |
|
|
\ 1. A user provides a block number to the 'block' word. The block
|
2431 |
|
|
\ number is checked to make sure it is valid, and an exception is thrown
|
2432 |
|
|
\ if it is not.
|
2433 |
|
|
\ 2. If the block if already loaded into a block buffer from disk, the
|
2434 |
|
|
\ address of the memory it is loaded into is returned.
|
2435 |
|
|
\ 3. If it was not, then 'block' looks for a free block buffer, loads
|
2436 |
|
|
\ the 1024 byte section off disk into the block buffer and returns an
|
2437 |
|
|
\ address to that.
|
2438 |
|
|
\ 4. If there are no free block buffers then it looks for a block buffer
|
2439 |
|
|
\ that is marked as being dirty with 'update' (which marks the previously
|
2440 |
|
|
\ loaded block as being dirty when called), then transfers that dirty block
|
2441 |
|
|
\ to disk. Now that there is a free block buffer, it loads the data that
|
2442 |
|
|
\ the user wants off of disk and returns a pointer to that, as in the
|
2443 |
|
|
\ previous bullet point. If none of the buffers are marked as dirty then
|
2444 |
|
|
\ then any one of them could be reused - they have not been marked as being
|
2445 |
|
|
\ modified so their contents could be retrieved off of disk if needed.
|
2446 |
|
|
\ 5. Under all cases, before the address of the loaded block has been
|
2447 |
|
|
\ returned, the variable 'blk' is updated to contain the latest loaded
|
2448 |
|
|
\ block.
|
2449 |
|
|
\
|
2450 |
|
|
\ This word does a lot, but is quite simple to use. It implements a simple
|
2451 |
|
|
\ cache where data is transfered back to disk only if needed, and multiple
|
2452 |
|
|
\ sections of memory from disk can be loaded into memory at the same time.
|
2453 |
|
|
\ The mechanism by which this happens is entirely hidden from the user.
|
2454 |
|
|
\
|
2455 |
|
|
\ This Forth implements 'block' in a slightly different way. The entire
|
2456 |
|
|
\ virtual machine image is loaded at start up, and can be saved back to
|
2457 |
|
|
\ disk (or small sections of it) with the "(save)" instruction. 'update'
|
2458 |
|
|
\ marks the entire image as needing to be saved back to disk, whilst
|
2459 |
|
|
\ 'flush' calls "(save)". 'block' then only has to check the block number
|
2460 |
|
|
\ is within range, and return a pointer to the block number multiplied by
|
2461 |
|
|
\ the size of a block - so this means that this version of 'block' is just
|
2462 |
|
|
\ an index into main memory. This is similar to how 'colorForth' implements
|
2463 |
|
|
\ its block word.
|
2464 |
|
|
\
|
2465 |
|
|
: update [-1] block-dirty ! ; ( -- )
|
2466 |
|
|
h: blk-@ blk @ ; ( -- k : retrieve current loaded block )
|
2467 |
|
|
h: +block blk-@ + ; ( -- )
|
2468 |
|
|
: save 0 here (save) throw ; ( -- : save blocks )
|
2469 |
|
|
: flush block-dirty @ if 0 [-1] (save) throw exit then ; ( -- )
|
2470 |
|
|
|
2471 |
|
|
: block ( k -- a )
|
2472 |
|
|
1depth
|
2473 |
|
|
dup $3f u> if $23 -throw exit then
|
2474 |
|
|
dup blk !
|
2475 |
|
|
$a lshift ( <-- b/buf * ) ;
|
2476 |
|
|
|
2477 |
|
|
\ The block word set has the following additional words, which augment the
|
2478 |
|
|
\ set nicely, they are 'list', 'load' and 'thru'. The 'list' word is used
|
2479 |
|
|
\ for displaying the contents of a block, it does this by splitting the
|
2480 |
|
|
\ block into 16 lines each, 64 characters long. 'load' evaluates a given
|
2481 |
|
|
\ block, and 'thru' evaluates a range of blocks.
|
2482 |
|
|
\
|
2483 |
|
|
\ This is how source code was stored and evaluated during the microcomputer
|
2484 |
|
|
\ era, as opposed to storing the source in named byte stream oriented files
|
2485 |
|
|
\ as is common nowadays.
|
2486 |
|
|
\
|
2487 |
|
|
\ It is more difficult, but possible, to store and edit source code in this
|
2488 |
|
|
\ manner, but it requires that the programmer(s) follow certain conventions
|
2489 |
|
|
\ when editing blocks, both in how programs are split up, and how they are
|
2490 |
|
|
\ formatted.
|
2491 |
|
|
\
|
2492 |
|
|
\ A block shown with 'list' might look like the following:
|
2493 |
|
|
\
|
2494 |
|
|
\ ----------------------------------------------------------------
|
2495 |
|
|
\ 0|( Simple Math Routines 31/12/1989 RJH 3/4 #20 ) |
|
2496 |
|
|
\ 1| |
|
2497 |
|
|
\ 2|: square dup * ; ( u -- u ) |
|
2498 |
|
|
\ 3|: sum-of-squares square swap square + ; ( u u -- u ) |
|
2499 |
|
|
\ 4|: even 1 and 0= ; ( u -- b ) |
|
2500 |
|
|
\ 5|: odd even 0= ; ( u -- b ) |
|
2501 |
|
|
\ 6|: log >r 0 swap ( u base -- u ) |
|
2502 |
|
|
\ 7| begin swap 1+ swap r@ / dup 0= until |
|
2503 |
|
|
\ 8| drop 1- rdrop ; |
|
2504 |
|
|
\ 9|: signum ( n -- -1 | 0 | 1 ) |
|
2505 |
|
|
\ 10| dup 0> if drop 1 exit then |
|
2506 |
|
|
\ 11| 0< if -1 exit then |
|
2507 |
|
|
\ 12| 0 ; |
|
2508 |
|
|
\ 13|: >< dup 8 rshift swap 8 lshift or ; ( u -- u : swap bytes ) |
|
2509 |
|
|
\ 14|: #digits dup 0= if 1+ exit then base @ log 1+ ; |
|
2510 |
|
|
\ 15| |
|
2511 |
|
|
\ ----------------------------------------------------------------
|
2512 |
|
|
\
|
2513 |
|
|
\ Longer comments for a source code block were stored in a 'shadow block',
|
2514 |
|
|
\ which is only enforced by convention. One possible convention is to store
|
2515 |
|
|
\ the source in even numbered blocks, and the comments in the odd numbered
|
2516 |
|
|
\ blocks.
|
2517 |
|
|
\
|
2518 |
|
|
\ By storing a comment in the first line of a block a word called 'index'
|
2519 |
|
|
\ could be used to make a table of contents all of the blocks available on
|
2520 |
|
|
\ disk, 'index' simply displays the first line of each block within a block
|
2521 |
|
|
\ range.
|
2522 |
|
|
|
2523 |
|
|
\ The common theme is that convention is key to successfully using blocks.
|
2524 |
|
|
\
|
2525 |
|
|
\ An example of using blocks to store data is how some old Forths used to
|
2526 |
|
|
\ store their error messages. Instead of storing strings of error messages
|
2527 |
|
|
\ in memory (either in RAM or ROM) they were stored in block storage, with
|
2528 |
|
|
\ one error message per line. The error messages were stored in order, and
|
2529 |
|
|
\ a word 'message' is used to convert an error code like '-4' to its error
|
2530 |
|
|
\ string (the numbers and what they mean are standardized, there is a table
|
2531 |
|
|
\ in the appendix), which was then printed out. The full list of the
|
2532 |
|
|
\ standardized error messages can be stored in four blocks, and form a neat
|
2533 |
|
|
\ and easy to use database.
|
2534 |
|
|
\
|
2535 |
|
|
|
2536 |
|
|
h: c/l* ( c/l * ) 6 lshift ; ( u -- u )
|
2537 |
|
|
h: c/l/ ( c/l / ) 6 rshift ; ( u -- u )
|
2538 |
|
|
h: line swap block swap c/l* + c/l ; ( k u -- a u )
|
2539 |
|
|
h: loadline line evaluate ; ( k u -- )
|
2540 |
|
|
: load 0 l/b 1- for 2dup 2>r loadline 2r> 1+ next 2drop ; ( k -- )
|
2541 |
|
|
h: pipe $7c emit ; ( -- )
|
2542 |
|
|
\ h: .line line -trailing $type ; ( k u -- )
|
2543 |
|
|
h: .border 3 spaces c/l $2d nchars cr ; ( -- )
|
2544 |
|
|
h: #line dup 2 u.r ; ( u -- u : print line number )
|
2545 |
|
|
\ : thru over- for dup load 1+ next drop ; ( k1 k2 -- )
|
2546 |
|
|
h: blank =bl fill ; ( b u -- )
|
2547 |
|
|
\ : message l/b extract .line cr ; ( u -- )
|
2548 |
|
|
h: retrieve block drop ; ( k -- )
|
2549 |
|
|
: list ( k -- )
|
2550 |
|
|
dup retrieve
|
2551 |
|
|
cr
|
2552 |
|
|
.border
|
2553 |
|
|
|
2554 |
|
|
dup l/b <
|
2555 |
|
|
while
|
2556 |
|
|
2dup #line pipe line $type pipe cr 1+
|
2557 |
|
|
repeat .border 2drop ;
|
2558 |
|
|
|
2559 |
|
|
\ t: index ( k1 k2 -- : show titles for block k1 to k2 )
|
2560 |
|
|
\ over- cr
|
2561 |
|
|
\ for
|
2562 |
|
|
\ dup 5u.r space pipe space dup 0 .line cr 1+
|
2563 |
|
|
\ next drop ;
|
2564 |
|
|
|
2565 |
|
|
\ ## Booting
|
2566 |
|
|
\ We are now nearing the end of this tutorial, after the boot sequence
|
2567 |
|
|
\ word set has been completed we will have a working Forth system. The
|
2568 |
|
|
\ boot sequence consists of getting the Forth system into a known working
|
2569 |
|
|
\ state, checking for corruption in the image, and printing out a welcome
|
2570 |
|
|
\ message. This behaviour can be changed if needed.
|
2571 |
|
|
\
|
2572 |
|
|
\ The boot sequence is as follows:
|
2573 |
|
|
\
|
2574 |
|
|
\ 1. The virtual machine starts execution at address 0 which will be set
|
2575 |
|
|
\ to point to the word 'boot-sequence'.
|
2576 |
|
|
\ 2. The word 'boot-sequence' is executed, which will run the word 'cold'
|
2577 |
|
|
\ to perform the system setup.
|
2578 |
|
|
\ 3. The word 'cold' checks that the image length and CRC in the image header
|
2579 |
|
|
\ match the values it calculates, zeros blocks of memory, and initializes the
|
2580 |
|
|
\ systems I/O.
|
2581 |
|
|
\ 4. 'boot-sequence' continues execution by executing the execution token
|
2582 |
|
|
\ stored in the variable ''. This is set to 'normal-running' by default.
|
2583 |
|
|
\ 5. 'normal-running' prints out the welcome message by calling the word 'hi',
|
2584 |
|
|
\ and then entering the Forth Read-Evaluate Loop, known as 'quit'. This should
|
2585 |
|
|
\ not normally return.
|
2586 |
|
|
\ 6. If the function returns, 'bye' is called, halting the virtual machine.
|
2587 |
|
|
\
|
2588 |
|
|
\ The boot sequence is modifiable by the user by either writing an execution
|
2589 |
|
|
\ token to the '' variable, or by writing to a jump to a word into memory
|
2590 |
|
|
\ location zero, if the image is saved, the next time it is run execution will
|
2591 |
|
|
\ take place at the new location.
|
2592 |
|
|
\
|
2593 |
|
|
\ It should be noted that the self-check routine, 'bist', disables checking
|
2594 |
|
|
\ in generated images by manipulating the word header once the check has
|
2595 |
|
|
\ succeeded. This is because after the system boots up the image is going
|
2596 |
|
|
\ to change in RAM soon after 'cold' has finished, this could be organized
|
2597 |
|
|
\ better so only sections of memory that do not (or should not change) get
|
2598 |
|
|
\ checked, one way this could be achieved is by locating all variables
|
2599 |
|
|
\ in specific sections, and checking only code. This has the advantage that
|
2600 |
|
|
\ the system image could be stored in Read Only Memory (ROM), which would
|
2601 |
|
|
\ facilitate porting the system to low memory systems, such as a
|
2602 |
|
|
\ microcontroller as they only have a few kilobytes of RAM to work with. The
|
2603 |
|
|
\ virtual machine primitives for load and store could be changed so that
|
2604 |
|
|
\ reads get mapped to RAM or ROM based on their address, and writes to RAM
|
2605 |
|
|
\ also (with attempted writes to ROM throwing an exception). The image is
|
2606 |
|
|
\ only 6KiB in size, and only a few kilobytes are needed in RAM for a working
|
2607 |
|
|
\ forth image, perhaps as little as ~2-4 KiB.
|
2608 |
|
|
\
|
2609 |
|
|
\ A few other interesting things could be done during the execution of 'cold',
|
2610 |
|
|
\ the image could be compressed by the metacompiler and the boot sequence
|
2611 |
|
|
\ could decompress it (which would require the decompressor to be one of the
|
2612 |
|
|
\ first things to run). Experimenting with various schemes it appears
|
2613 |
|
|
\ Adaptive Huffman Coding
|
2614 |
|
|
\ (see ) produces
|
2615 |
|
|
\ the best results, followed by LZSS (see
|
2616 |
|
|
\ ). The schemes
|
2617 |
|
|
\ are also simple and small enough (both in size and memory requirements)
|
2618 |
|
|
\ that they could be implemented in Forth.
|
2619 |
|
|
\
|
2620 |
|
|
\ Another possibility is to obfuscate the image by exclusive or'ing it with
|
2621 |
|
|
\ a value to frustrate the reserve engineering of binaries, or even to
|
2622 |
|
|
\ encrypt it and ask for the decryption key on startup.
|
2623 |
|
|
\
|
2624 |
|
|
\ Obfuscation and compression are more difficult to implement than a simple
|
2625 |
|
|
\ CRC check and require a more careful organization and control of the
|
2626 |
|
|
\ boot sequence than is provided. It is possible to make 'turn-key'
|
2627 |
|
|
\ applications that start execution at an arbitrary point (call 'turn-key'
|
2628 |
|
|
\ because all the user has to do is 'turn the key' and the program is ready
|
2629 |
|
|
\ to go) by setting the boot variable to the desired word and saving the
|
2630 |
|
|
\ image with 'save'.
|
2631 |
|
|
\
|
2632 |
|
|
|
2633 |
|
|
h: check-header? header-options @ first-bit 0= ; ( -- t )
|
2634 |
|
|
h: disable-check $1 header-options toggle ; ( -- )
|
2635 |
|
|
|
2636 |
|
|
\ 'bist' checks the length field in the header matches 'here' and that the
|
2637 |
|
|
\ CRC in the header matches the CRC it calculates in the image, it has to
|
2638 |
|
|
\ zero the CRC field out first.
|
2639 |
|
|
|
2640 |
|
|
h: bist ( -- u : built in self test )
|
2641 |
|
|
check-header? if 0x0000 exit then ( is checking disabled? Success? )
|
2642 |
|
|
header-length @ here xor if 2 exit then ( length check )
|
2643 |
|
|
header-crc @ 0 header-crc ! ( retrieve and zero CRC )
|
2644 |
|
|
|
2645 |
|
|
disable-check 0x0000 ; ( disable check, success )
|
2646 |
|
|
|
2647 |
|
|
\ 'cold' performs the self check, and exits if it fails. It then
|
2648 |
|
|
\ goes on to zero memory, set the initial value of 'blk', set the I/O
|
2649 |
|
|
\ vectors to sensible values, set the vocabularies to the default search
|
2650 |
|
|
\ order and reset the variable stack.
|
2651 |
|
|
|
2652 |
|
|
: cold ( -- : performs a cold boot )
|
2653 |
|
|
bist ?dup if negate (bye) exit then
|
2654 |
|
|
$10 block b/buf 0 fill
|
2655 |
|
|
$12 retrieve io!
|
2656 |
|
|
forth sp0 sp!
|
2657 |
|
|
@execute bye ;
|
2658 |
|
|
|
2659 |
|
|
\ 'hi' prints out the welcome message,
|
2660 |
|
|
|
2661 |
|
|
h: hi hex cr ." eFORTH V " ver 0 u.r cr here . .free cr [ ; ( -- )
|
2662 |
|
|
h: normal-running hi quit ; ( -- : boot word )
|
2663 |
|
|
|
2664 |
|
|
\ ## See : The Forth Disassembler
|
2665 |
|
|
|
2666 |
|
|
\ @warning This disassembler is experimental, and not liable to work
|
2667 |
|
|
\ @todo improve this with better output and exit detection.
|
2668 |
|
|
\ 'see' could be improved with a word that detects when the end of a
|
2669 |
|
|
\ word actually occurs, and with a disassembler for instructions. The output
|
2670 |
|
|
\ could also be better formatted, or optionally made to be more or less
|
2671 |
|
|
\ verbose. Another improvement would be to do the word lookup for branches
|
2672 |
|
|
\ that occur outside of the word definition, or prior to it, as there
|
2673 |
|
|
\ are many words which have been turned into tail calls - or a single
|
2674 |
|
|
\ branch.
|
2675 |
|
|
|
2676 |
|
|
\ @todo handle various special cases in the decompiler
|
2677 |
|
|
\ Such as literals spanning two instructions, merged exits, tail calls,
|
2678 |
|
|
\ variables, constants, created words, strings and possibly more.
|
2679 |
|
|
|
2680 |
|
|
h: validate tuck cfa <> if drop-0 exit then nfa ; ( cfa pwd -- nfa | 0 )
|
2681 |
|
|
|
2682 |
|
|
h: search-for-cfa ( wid cfa -- nfa : search for CFA in a word list )
|
2683 |
|
|
address cells >r
|
2684 |
|
|
begin
|
2685 |
|
|
dup
|
2686 |
|
|
while
|
2687 |
|
|
address dup r@ swap dup-@ address swap within ( simplify? )
|
2688 |
|
|
( @bug does not continue with search if validate fails )
|
2689 |
|
|
if @address r> swap validate exit then
|
2690 |
|
|
address @
|
2691 |
|
|
repeat rdrop ;
|
2692 |
|
|
|
2693 |
|
|
h: name ( cwf -- a | 0 )
|
2694 |
|
|
>r
|
2695 |
|
|
get-order
|
2696 |
|
|
begin
|
2697 |
|
|
dup
|
2698 |
|
|
while
|
2699 |
|
|
swap r@ search-for-cfa ?dup if >r 1- ndrop r> rdrop exit then
|
2700 |
|
|
1- repeat rdrop ;
|
2701 |
|
|
|
2702 |
|
|
h: .name name ?dup 0= if $" ???" then print ;
|
2703 |
|
|
h: ?instruction ( i m e -- i 0 | e -1 )
|
2704 |
|
|
>r over and r> tuck = if nip [-1] exit then drop-0 ;
|
2705 |
|
|
|
2706 |
|
|
h: .instruction ( u -- u )
|
2707 |
|
|
0x8000 0x8000 ?instruction if ." LIT " exit then
|
2708 |
|
|
$6000 $6000 ?instruction if ." ALU " exit then
|
2709 |
|
|
$6000 $4000 ?instruction if ." CAL " exit then
|
2710 |
|
|
$6000 $2000 ?instruction if ." BRZ " exit then
|
2711 |
|
|
drop-0 ." BRN " ;
|
2712 |
|
|
|
2713 |
|
|
: decompile ( u -- : decompile instruction )
|
2714 |
|
|
dup .instruction $4000 =
|
2715 |
|
|
if space .name exit then drop ;
|
2716 |
|
|
|
2717 |
|
|
h: decompiler ( previous current -- : decompile starting at address )
|
2718 |
|
|
>r
|
2719 |
|
|
begin dup r@ u< while
|
2720 |
|
|
dup 5u.r colon space
|
2721 |
|
|
dup-@
|
2722 |
|
|
dup 5u.r space decompile cr cell+
|
2723 |
|
|
repeat rdrop drop ;
|
2724 |
|
|
|
2725 |
|
|
\ 'see' is the Forth disassembler, it takes a word and (attempts) to
|
2726 |
|
|
\ turn it back into readable Forth source code. The disassembler is only
|
2727 |
|
|
\ a few hundred bytes in size, which is a testament to the brevity achievable
|
2728 |
|
|
\ with Forth.
|
2729 |
|
|
\
|
2730 |
|
|
\ If the word 'see' was good enough we could potentially dispense with the
|
2731 |
|
|
\ source code entirely: the entire dictionary could be disassembled and saved
|
2732 |
|
|
\ to disk, modified, then recompiled yielding a modified Forth. Although
|
2733 |
|
|
\ comments would not be present, meaning this would be more of an intellectual
|
2734 |
|
|
\ exercise than of any utility.
|
2735 |
|
|
|
2736 |
|
|
: see ( --, : decompile a word )
|
2737 |
|
|
token finder 0= if not-found exit then
|
2738 |
|
|
swap 2dup= if drop here then >r
|
2739 |
|
|
cr colon space dup .id space dup cr
|
2740 |
|
|
cfa r> decompiler space $3b emit
|
2741 |
|
|
dup compile-only? if ." compile-only " then
|
2742 |
|
|
dup inline? if ." inline " then
|
2743 |
|
|
immediate? if ." immediate " then cr ;
|
2744 |
|
|
|
2745 |
|
|
\ A few useful utility words will be added next, which are not strictly
|
2746 |
|
|
\ necessary but are useful. Those are '.s' for examining the contents of the
|
2747 |
|
|
\ variable stack, and 'dump' for showing the contents of a section of memory
|
2748 |
|
|
\
|
2749 |
|
|
\ The '.s' word must be careful not to alter that variable stack whilst
|
2750 |
|
|
\ trying to print it out. It uses the word 'pick' to achieve this, otherwise
|
2751 |
|
|
\ there is nothing special about this word, and it is very useful for
|
2752 |
|
|
\ debugging code interactively to see what its stack effects are.
|
2753 |
|
|
\
|
2754 |
|
|
\ The dump keyword is fairly useful for the implementer so that they can
|
2755 |
|
|
\ use Forth to debug if the compilation is working, or if a new word
|
2756 |
|
|
\ is producing the correct assembly. It can also be used as a utility to
|
2757 |
|
|
\ export binary sections of memory as text.
|
2758 |
|
|
\
|
2759 |
|
|
\ The programmer might want to edit the 'dump' word to customize its output,
|
2760 |
|
|
\ the addition of the 'dc+' word is one way it could be extended, which is
|
2761 |
|
|
\ commented out below. Like 'dm+', 'dc+' operates on a single line to
|
2762 |
|
|
\ be displayed, however 'dc+' decompiles the memory into human readable
|
2763 |
|
|
\ instructions instead of numbers, unfortunately the lines it produces are
|
2764 |
|
|
\ too long.
|
2765 |
|
|
\
|
2766 |
|
|
\ Normally the word 'dump' outputs the memory contents in hexadecimal,
|
2767 |
|
|
\ however a design decision was taken to output the contents of memory in
|
2768 |
|
|
\ what the current numeric output base is instead. This makes the word
|
2769 |
|
|
\ more flexible, more consistent and shorter, than it otherwise would be as
|
2770 |
|
|
\ the current output base would have to be saved and then restored.
|
2771 |
|
|
\
|
2772 |
|
|
|
2773 |
|
|
: .s cr depth for aft r@ pick . then next ."
|
2774 |
|
|
h: dm+ chars for aft dup-@ space 5u.r cell+ then next ; ( a u -- a )
|
2775 |
|
|
\ h: dc+ chars for aft dup-@ space decompile cell+ then next ; ( a u -- a )
|
2776 |
|
|
|
2777 |
|
|
: dump ( a u -- )
|
2778 |
|
|
$10 + \ align up by dump-width
|
2779 |
|
|
4 rshift ( <-- equivalent to "dump-width /" )
|
2780 |
|
|
for
|
2781 |
|
|
aft
|
2782 |
|
|
cr dump-width 2dup
|
2783 |
|
|
over 5u.r colon space
|
2784 |
|
|
dm+ ( dump-width dc+ ) \ <-- dc+ is optional
|
2785 |
|
|
-rot
|
2786 |
|
|
2 spaces $type
|
2787 |
|
|
then
|
2788 |
|
|
next drop ;
|
2789 |
|
|
|
2790 |
|
|
\ The standard Forth dictionary is now complete, but the variables containing
|
2791 |
|
|
\ the word list need to be updated a final time. The next section implements
|
2792 |
|
|
\ the block editor, which is in the 'editor' word set. Their are two variables
|
2793 |
|
|
\ that need updating, '_forth-wordlist', a vocabulary we have already
|
2794 |
|
|
\ encountered. An 'current', which contains a pointer to a word list, this
|
2795 |
|
|
\ word list is the one new definitions (defined by ':', or 'create') are
|
2796 |
|
|
\ added to. It will be set to '_forth-wordlist' so new definitions are added
|
2797 |
|
|
\ to the default vocabulary.
|
2798 |
|
|
[last] [t] _forth-wordlist t!
|
2799 |
|
|
[t] _forth-wordlist [t] current t!
|
2800 |
|
|
|
2801 |
|
|
\ ## Block Editor
|
2802 |
|
|
\ This block editor is an excellent example of a Forth application; it is
|
2803 |
|
|
\ small, terse, and uses the facilities already built into Forth to do all
|
2804 |
|
|
\ of the heavy lifting, specifically vocabularies, the block word set and
|
2805 |
|
|
\ the text interpreter.
|
2806 |
|
|
\
|
2807 |
|
|
\ Forth blocks used to be the canonical way of storing both source code and
|
2808 |
|
|
\ data within a Forth system, it is a simply way of abstracting out how mass
|
2809 |
|
|
\ storage works and worked well on the microcomputers available in the 1980s.
|
2810 |
|
|
\ With the rise of computers with a more capable operating system the Block
|
2811 |
|
|
\ Word Set (See ) fell out of
|
2812 |
|
|
\ favour, being replaced instead by the File Access Word Set
|
2813 |
|
|
\ (See ), allowing named files
|
2814 |
|
|
\ to be accessed as a byte stream.
|
2815 |
|
|
\
|
2816 |
|
|
\ To keep things simple this editor uses the block word set and in typical
|
2817 |
|
|
\ Forth fashion simplifies the problem to the extreme, whilst also sacrificing
|
2818 |
|
|
\ usability and functionally - the block editor allows for the editing of
|
2819 |
|
|
\ programs but it is more difficult and more limited than traditional editors.
|
2820 |
|
|
\ It has no spell checking, or syntax highlighting, and does little in the
|
2821 |
|
|
\ way of error checking. But it is very small, compact, easy to understand, and
|
2822 |
|
|
\ if needed could be extended.
|
2823 |
|
|
\
|
2824 |
|
|
\ The way this editor works is by replacing the current search order with
|
2825 |
|
|
\ a set of words that implement text editing on the currently loaded block,
|
2826 |
|
|
\ as well as managing what block is loaded. The defined words are short,
|
2827 |
|
|
\ often just a single letter long.
|
2828 |
|
|
\
|
2829 |
|
|
\ The act of editing text is simplified as well, instead of keeping track of
|
2830 |
|
|
\ variable width lines of text and files, a single block (1024 characters) is
|
2831 |
|
|
\ divided up into 16 lines, each 64 characters in length. This is the essence
|
2832 |
|
|
\ of Forth, radically simplifying the problem from all possible angels; the
|
2833 |
|
|
\ algorithms used, the software itself and where possible the hardware. Not
|
2834 |
|
|
\ every task can be approached this way, nor would everyone be happy with
|
2835 |
|
|
\ the results, the editor being presented more as a curiosity than anything
|
2836 |
|
|
\ else.
|
2837 |
|
|
\
|
2838 |
|
|
\ We have a way of loading and saving data from disks (the 'block', 'update'
|
2839 |
|
|
\ and 'flush' words) as well as a way of viewing the data in a block (the
|
2840 |
|
|
\ 'list' word) and evaluating the text within a block (with the 'load' word).
|
2841 |
|
|
\ The variable 'blk' is also of use as it holds the latest block we have
|
2842 |
|
|
\ retrieved from disk. By defining a new word set we can skip the part of
|
2843 |
|
|
\ reading in a parsing commands and numbers, we can use text interpreter and
|
2844 |
|
|
\ line oriented input to do the work for us, as discussed.
|
2845 |
|
|
\
|
2846 |
|
|
\ Only one extra word is actually need given the words we already have, one
|
2847 |
|
|
\ which can destructively replace a line starting at a given column in the
|
2848 |
|
|
\ currently loaded block. All of the other commands are simple derivations
|
2849 |
|
|
\ of existing words. This word is called 'ia', short for 'insert at', which
|
2850 |
|
|
\ takes two numeric arguments (starting line as the first, and column as the
|
2851 |
|
|
\ second) and reads all text on the line after the 'ia' and places it at the
|
2852 |
|
|
\ specified line/column.
|
2853 |
|
|
\
|
2854 |
|
|
\ The command description and their definitions are the best descriptions
|
2855 |
|
|
\ of how this editor works. Try to use the word set interactively to get
|
2856 |
|
|
\ a feel for it:
|
2857 |
|
|
\
|
2858 |
|
|
\ | A1 | A2 | Command | Description |
|
2859 |
|
|
\ | -- | -- | ------- | ------------------------------------------ |
|
2860 |
|
|
\ | #2 | #1 | ia | insert text into column #1 on line #2 |
|
2861 |
|
|
\ | | #1 | i | insert text into column 0 on line #1 |
|
2862 |
|
|
\ | | #1 | b | load block number #1 |
|
2863 |
|
|
\ | | #1 | d | blank line number #1 |
|
2864 |
|
|
\ | | | x | blank currently loaded block |
|
2865 |
|
|
\ | | | l | redisplay currently loaded block |
|
2866 |
|
|
\ | | | q | remove editor word set from search order |
|
2867 |
|
|
\ | | | n | load next block |
|
2868 |
|
|
\ | | | p | load previous block |
|
2869 |
|
|
\ | | | s | save changes to disk |
|
2870 |
|
|
\ | | | e | evaluate block |
|
2871 |
|
|
\
|
2872 |
|
|
\ An example command session might be:
|
2873 |
|
|
\
|
2874 |
|
|
\ | Command Sequence | Description |
|
2875 |
|
|
\ | ------------------------ | --------------------------------------- |
|
2876 |
|
|
\ | editor | add the editor word set to search order |
|
2877 |
|
|
\ | $20 b l | load block $20 (hex) and display it |
|
2878 |
|
|
\ | x | blank block $20 |
|
2879 |
|
|
\ | 0 i .( Hello, World ) cr | Put ".( Hello, World ) cr" on line 0 |
|
2880 |
|
|
\ | 1 i 2 2 + . cr | Put "2 2 + . cr" on line 1 |
|
2881 |
|
|
\ | l | list block $20 again |
|
2882 |
|
|
\ | e | evaluate block $20 |
|
2883 |
|
|
\ | s | save contents |
|
2884 |
|
|
\ | q | unload block word set |
|
2885 |
|
|
\
|
2886 |
|
|
\ See: for the origin of
|
2887 |
|
|
\ this block editor, and for different implementations.
|
2888 |
|
|
|
2889 |
|
|
|
2890 |
|
|
h: [block] blk-@ block ; ( k -- a : loaded block address )
|
2891 |
|
|
h: [check] dup b/buf c/l/ u>= if $18 -throw exit then ;
|
2892 |
|
|
h: [line] [check] c/l* [block] + ; ( u -- a )
|
2893 |
|
|
: b retrieve ; ( k -- : load a block )
|
2894 |
|
|
: l blk-@ list ; ( -- : list current block )
|
2895 |
|
|
: n 1 +block b l ; ( -- : load and list next block )
|
2896 |
|
|
: p [-1] +block b l ; ( -- : load and list previous block )
|
2897 |
|
|
: d [line] c/l blank ; ( u -- : delete line )
|
2898 |
|
|
: x [block] b/buf blank ; ( -- : erase loaded block )
|
2899 |
|
|
: s update flush ; ( -- : flush changes to disk )
|
2900 |
|
|
: q editor-voc -order ; ( -- : quit editor )
|
2901 |
|
|
: e q blk-@ load editor ; ( -- : evaluate block )
|
2902 |
|
|
: ia c/l* + [block] + source drop in@ + ( u u -- )
|
2903 |
|
|
swap source nip in@ - cmove postpone \ ;
|
2904 |
|
|
: i 0 swap ia ; ( u -- )
|
2905 |
|
|
\ : u update ; ( -- : set block set as dirty )
|
2906 |
|
|
\ : w words ;
|
2907 |
|
|
\ : yank pad c/l ;
|
2908 |
|
|
\ : c [line] yank >r swap r> cmove ;
|
2909 |
|
|
\ : y [line] yank cmove ;
|
2910 |
|
|
\ : ct swap y c ;
|
2911 |
|
|
\ : ea [line] c/l evaluate ;
|
2912 |
|
|
\ : sw 2dup y [line] swap [line] swap c/l cmove c ;
|
2913 |
|
|
[last] [t] editor-voc t! 0 tlast meta!
|
2914 |
|
|
|
2915 |
|
|
\ ## Final Touches
|
2916 |
|
|
|
2917 |
|
|
there [t] cp t!
|
2918 |
|
|
[t] (literal) [v] t! ( set literal execution vector )
|
2919 |
|
|
[t] cold 2/ 0 t! ( set starting word )
|
2920 |
|
|
[t] normal-running [v] t!
|
2921 |
|
|
|
2922 |
|
|
there 6 tcells t! \ Set Length First!
|
2923 |
|
|
checksum 7 tcells t! \ Calculate image CRC
|
2924 |
|
|
|
2925 |
|
|
finished
|
2926 |
|
|
bye
|
2927 |
|
|
|
2928 |
|
|
# APPENDIX
|
2929 |
|
|
|
2930 |
|
|
## The Virtual Machine
|
2931 |
|
|
|
2932 |
|
|
The Virtual Machine is a 16-bit stack machine based on the [H2 CPU][], a
|
2933 |
|
|
derivative of the [J1 CPU][], but adapted for use on a computer.
|
2934 |
|
|
|
2935 |
|
|
Its instruction set allows for a fairly dense encoding, and the project
|
2936 |
|
|
goal is to be fairly small whilst still being useful. It is small enough
|
2937 |
|
|
that is should be easily understandable with little explanation, and it
|
2938 |
|
|
is hackable and extensible by modification of the source code.
|
2939 |
|
|
|
2940 |
|
|
## Virtual Machine Memory Map
|
2941 |
|
|
|
2942 |
|
|
There is 64KiB of memory available to the Forth virtual machine, of which only
|
2943 |
|
|
the first 16KiB can contain program instructions (or more accurately branch
|
2944 |
|
|
locations can only be in the first 16KiB of memory). The virtual machine memory
|
2945 |
|
|
can divided into three regions of memory, the applications further divide the
|
2946 |
|
|
memory into different sections.
|
2947 |
|
|
|
2948 |
|
|
| Block | Region |
|
2949 |
|
|
| ------- | ---------------- |
|
2950 |
|
|
| 0 - 15 | Program Storage |
|
2951 |
|
|
| 16 | User Data |
|
2952 |
|
|
| 17 | Variable Stack |
|
2953 |
|
|
| 18 - 62 | User data |
|
2954 |
|
|
| 63 | Return Stack |
|
2955 |
|
|
|
2956 |
|
|
Program execution begins at address zero. The variable stack starts at the
|
2957 |
|
|
beginning of block 17 and grows upwards, the return stack starts at the end of
|
2958 |
|
|
block 63 and grows downward.
|
2959 |
|
|
|
2960 |
|
|
## Instruction Set Encoding
|
2961 |
|
|
|
2962 |
|
|
For a detailed look at how the instructions are encoded the source code is the
|
2963 |
|
|
definitive guide, available in the file [forth.c][].
|
2964 |
|
|
|
2965 |
|
|
A quick overview:
|
2966 |
|
|
|
2967 |
|
|
+---------------------------------------------------------------+
|
2968 |
|
|
| F | E | D | C | B | A | 9 | 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
|
2969 |
|
|
+---------------------------------------------------------------+
|
2970 |
|
|
| 1 | LITERAL VALUE |
|
2971 |
|
|
+---------------------------------------------------------------+
|
2972 |
|
|
| 0 | 0 | 0 | BRANCH TARGET ADDRESS |
|
2973 |
|
|
+---------------------------------------------------------------+
|
2974 |
|
|
| 0 | 0 | 1 | CONDITIONAL BRANCH TARGET ADDRESS |
|
2975 |
|
|
+---------------------------------------------------------------+
|
2976 |
|
|
| 0 | 1 | 0 | CALL TARGET ADDRESS |
|
2977 |
|
|
+---------------------------------------------------------------+
|
2978 |
|
|
| 0 | 1 | 1 | ALU OPERATION |T2N|T2R|N2T|R2P| RSTACK| DSTACK|
|
2979 |
|
|
+---------------------------------------------------------------+
|
2980 |
|
|
| F | E | D | C | B | A | 9 | 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
|
2981 |
|
|
+---------------------------------------------------------------+
|
2982 |
|
|
|
2983 |
|
|
T : Top of data stack
|
2984 |
|
|
N : Next on data stack
|
2985 |
|
|
PC : Program Counter
|
2986 |
|
|
|
2987 |
|
|
LITERAL VALUES : push a value onto the data stack
|
2988 |
|
|
CONDITIONAL : BRANCHS pop and test the T
|
2989 |
|
|
CALLS : PC+1 onto the return stack
|
2990 |
|
|
|
2991 |
|
|
T2N : Move T to N
|
2992 |
|
|
T2R : Move T to top of return stack
|
2993 |
|
|
N2T : Move the new value of T (or D) to N
|
2994 |
|
|
R2P : Move top of return stack to PC
|
2995 |
|
|
|
2996 |
|
|
RSTACK and DSTACK are signed values (twos compliment) that are
|
2997 |
|
|
the stack delta (the amount to increment or decrement the stack
|
2998 |
|
|
by for their respective stacks: return and data)
|
2999 |
|
|
|
3000 |
|
|
### ALU Operations
|
3001 |
|
|
|
3002 |
|
|
The ALU can be programmed to do the following operations on an ALU instruction,
|
3003 |
|
|
some operations trap on error (U/MOD, /MOD).
|
3004 |
|
|
|
3005 |
|
|
| # | Mnemonic | Description |
|
3006 |
|
|
| --- | -------- | -------------------- |
|
3007 |
|
|
| 0 | T | Top of Stack |
|
3008 |
|
|
| 1 | N | Copy T to N |
|
3009 |
|
|
| 2 | R | Top of return stack |
|
3010 |
|
|
| 3 | T@ | Load from address |
|
3011 |
|
|
| 4 | NtoT | Store to address |
|
3012 |
|
|
| 5 | T+N | Double cell addition |
|
3013 |
|
|
| 6 | T\*N | Double cell multiply |
|
3014 |
|
|
| 7 | T&N | Bitwise AND |
|
3015 |
|
|
| 8 | TorN | Bitwise OR |
|
3016 |
|
|
| 9 | T^N | Bitwise XOR |
|
3017 |
|
|
| 10 | ~T | Bitwise Inversion |
|
3018 |
|
|
| 11 | T-- | Decrement |
|
3019 |
|
|
| 12 | T=0 | Equal to zero |
|
3020 |
|
|
| 13 | T=N | Equality test |
|
3021 |
|
|
| 14 | Nu<T | Unsigned comparison |
|
3022 |
|
|
| 15 | N<T | Signed comparison |
|
3023 |
|
|
| 16 | NrshiftT | Logical Right Shift |
|
3024 |
|
|
| 17 | NlshiftT | Logical Left Shift |
|
3025 |
|
|
| 18 | SP@ | Depth of stack |
|
3026 |
|
|
| 19 | RP@ | R Stack Depth |
|
3027 |
|
|
| 20 | SP! | Set Stack Depth |
|
3028 |
|
|
| 21 | RP! | Set R Stack Depth |
|
3029 |
|
|
| 22 | SAVE | Save Image |
|
3030 |
|
|
| 23 | TX | Get byte |
|
3031 |
|
|
| 24 | RX | Send byte |
|
3032 |
|
|
| 25 | U/MOD | u/mod |
|
3033 |
|
|
| 26 | /MOD | /mod |
|
3034 |
|
|
| 27 | BYE | Return |
|
3035 |
|
|
|
3036 |
|
|
### Encoding of Forth Words
|
3037 |
|
|
|
3038 |
|
|
Many Forth words can be encoded directly in the instruction set, some of the
|
3039 |
|
|
ALU operations have extra stack and register effects as well, which although
|
3040 |
|
|
would be difficult to achieve in hardware is easy enough to do in software.
|
3041 |
|
|
|
3042 |
|
|
| Word | Mnemonic | T2N | T2R | N2T | R2P | RP | SP |
|
3043 |
|
|
| ------ | -------- | --- | --- | --- | --- | --- | --- |
|
3044 |
|
|
| dup | T | T2N | | | | | +1 |
|
3045 |
|
|
| over | N | T2N | | | | | +1 |
|
3046 |
|
|
| invert | ~T | | | | | | |
|
3047 |
|
|
| um+ | T+N | | | | | | |
|
3048 |
|
|
| \+ | T+N | | | N2T | | | -1 |
|
3049 |
|
|
| um\* | T\*N | | | | | | |
|
3050 |
|
|
| \* | T\*N | | | N2T | | | -1 |
|
3051 |
|
|
| swap | N | T2N | | | | | |
|
3052 |
|
|
| nip | T | | | | | | -1 |
|
3053 |
|
|
| drop | N | | | | | | -1 |
|
3054 |
|
|
| exit | T | | | | R2P | -1 | |
|
3055 |
|
|
| >r | N | | T2R | | | 1 | -1 |
|
3056 |
|
|
| r> | R | T2N | | | | -1 | 1 |
|
3057 |
|
|
| r@ | R | T2N | | | | | 1 |
|
3058 |
|
|
| @ | T@ | | | | | | |
|
3059 |
|
|
| ! | NtoT | | | | | | -1 |
|
3060 |
|
|
| rshift | NrshiftT | | | | | | -1 |
|
3061 |
|
|
| lshift | NlshiftT | | | | | | -1 |
|
3062 |
|
|
| = | T=N | | | | | | -1 |
|
3063 |
|
|
| u< | Nu<T | | | | | | -1 |
|
3064 |
|
|
| < | N<T | | | | | | -1 |
|
3065 |
|
|
| and | T&N | | | | | | -1 |
|
3066 |
|
|
| xor | T^N | | | | | | -1 |
|
3067 |
|
|
| or | T|N | | | | | | -1 |
|
3068 |
|
|
| sp@ | SP@ | T2N | | | | | 1 |
|
3069 |
|
|
| sp! | SP! | | | | | | |
|
3070 |
|
|
| 1- | T-- | | | | | | |
|
3071 |
|
|
| rp@ | RP@ | T2N | | | | | 1 |
|
3072 |
|
|
| rp! | RP! | | | | | | -1 |
|
3073 |
|
|
| 0= | T=0 | | | | | | |
|
3074 |
|
|
| nop | T | | | | | | |
|
3075 |
|
|
| (bye) | BYE | | | | | | |
|
3076 |
|
|
| rx? | RX | T2N | | | | | 1 |
|
3077 |
|
|
| tx! | TX | | | N2T | | | -1 |
|
3078 |
|
|
| (save) | SAVE | | | | | | -1 |
|
3079 |
|
|
| u/mod | U/MOD | T2N | | | | | |
|
3080 |
|
|
| /mod | /MOD | T2N | | | | | |
|
3081 |
|
|
| / | /MOD | | | | | | -1 |
|
3082 |
|
|
| mod | /MOD | | | N2T | | | -1 |
|
3083 |
|
|
| rdrop | T | | | | | -1 | |
|
3084 |
|
|
|
3085 |
|
|
## Interaction
|
3086 |
|
|
|
3087 |
|
|
The outside world can be interacted with in two ways, with single character
|
3088 |
|
|
input and output, or by saving the current Forth image. The interaction is
|
3089 |
|
|
performed by three instructions.
|
3090 |
|
|
|
3091 |
|
|
## eForth
|
3092 |
|
|
|
3093 |
|
|
The interpreter is based on eForth by C. H. Ting, with some modifications
|
3094 |
|
|
to the model.
|
3095 |
|
|
|
3096 |
|
|
## eForth Memory model
|
3097 |
|
|
|
3098 |
|
|
The eForth model imposes extra semantics to certain areas of memory.
|
3099 |
|
|
|
3100 |
|
|
| Address | Block | Meaning |
|
3101 |
|
|
| ------------- | ------ | ------------------------------ |
|
3102 |
|
|
| $0000 | 0 | Start of execution |
|
3103 |
|
|
| $0002 | 0 | Trap Handler |
|
3104 |
|
|
| $0004-EOD | 0 | The dictionary |
|
3105 |
|
|
| EOD-PAD1 | ? | Compilation and Numeric Output |
|
3106 |
|
|
| PAD1-PAD2 | ? | Pad Area |
|
3107 |
|
|
| PAD2-$3FFF | 15 | End of dictionary |
|
3108 |
|
|
| $4000 | 16 | Interpreter variable storage |
|
3109 |
|
|
| $4400 | 17 | Start of variable stack |
|
3110 |
|
|
| $4800-$FBFF | 18-63 | Empty blocks for user data |
|
3111 |
|
|
| $FC00-$FFFF | 0 | Return stack block |
|
3112 |
|
|
|
3113 |
|
|
## Error Codes
|
3114 |
|
|
|
3115 |
|
|
This is a list of Error codes, not all of which are used by the application.
|
3116 |
|
|
|
3117 |
|
|
| Hex | Dec | Message |
|
3118 |
|
|
| ---- | ---- | --------------------------------------------- |
|
3119 |
|
|
| FFFF | -1 | ABORT |
|
3120 |
|
|
| FFFE | -2 | ABORT" |
|
3121 |
|
|
| FFFD | -3 | stack overflow |
|
3122 |
|
|
| FFFC | -4 | stack underflow |
|
3123 |
|
|
| FFFB | -5 | return stack overflow |
|
3124 |
|
|
| FFFA | -6 | return stack underflow |
|
3125 |
|
|
| FFF9 | -7 | do-loops nested too deeply during execution |
|
3126 |
|
|
| FFF8 | -8 | dictionary overflow |
|
3127 |
|
|
| FFF7 | -9 | invalid memory address |
|
3128 |
|
|
| FFF6 | -10 | division by zero |
|
3129 |
|
|
| FFF5 | -11 | result out of range |
|
3130 |
|
|
| FFF4 | -12 | argument type mismatch |
|
3131 |
|
|
| FFF3 | -13 | undefined word |
|
3132 |
|
|
| FFF2 | -14 | interpreting a compile-only word |
|
3133 |
|
|
| FFF1 | -15 | invalid FORGET |
|
3134 |
|
|
| FFF0 | -16 | attempt to use zero-length string as a name |
|
3135 |
|
|
| FFEF | -17 | pictured numeric output string overflow |
|
3136 |
|
|
| FFEE | -18 | parsed string overflow |
|
3137 |
|
|
| FFED | -19 | definition name too long |
|
3138 |
|
|
| FFEC | -20 | write to a read-only location |
|
3139 |
|
|
| FFEB | -21 | unsupported operation |
|
3140 |
|
|
| FFEA | -22 | control structure mismatch |
|
3141 |
|
|
| FFE9 | -23 | address alignment exception |
|
3142 |
|
|
| FFE8 | -24 | invalid numeric argument |
|
3143 |
|
|
| FFE7 | -25 | return stack imbalance |
|
3144 |
|
|
| FFE6 | -26 | loop parameters unavailable |
|
3145 |
|
|
| FFE5 | -27 | invalid recursion |
|
3146 |
|
|
| FFE4 | -28 | user interrupt |
|
3147 |
|
|
| FFE3 | -29 | compiler nesting |
|
3148 |
|
|
| FFE2 | -30 | obsolescent feature |
|
3149 |
|
|
| FFE1 | -31 | >BODY used on non-CREATEd definition |
|
3150 |
|
|
| FFE0 | -32 | invalid name argument (e.g., TO xxx) |
|
3151 |
|
|
| FFDF | -33 | block read exception |
|
3152 |
|
|
| FFDE | -34 | block write exception |
|
3153 |
|
|
| FFDD | -35 | invalid block number |
|
3154 |
|
|
| FFDC | -36 | invalid file position |
|
3155 |
|
|
| FFDB | -37 | file I/O exception |
|
3156 |
|
|
| FFDA | -38 | non-existent file |
|
3157 |
|
|
| FFD9 | -39 | unexpected end of file |
|
3158 |
|
|
| FFD8 | -40 | invalid BASE for floating point conversion |
|
3159 |
|
|
| FFD7 | -41 | loss of precision |
|
3160 |
|
|
| FFD6 | -42 | floating-point divide by zero |
|
3161 |
|
|
| FFD5 | -43 | floating-point result out of range |
|
3162 |
|
|
| FFD4 | -44 | floating-point stack overflow |
|
3163 |
|
|
| FFD3 | -45 | floating-point stack underflow |
|
3164 |
|
|
| FFD2 | -46 | floating-point invalid argument |
|
3165 |
|
|
| FFD1 | -47 | compilation word list deleted |
|
3166 |
|
|
| FFD0 | -48 | invalid POSTPONE |
|
3167 |
|
|
| FFCF | -49 | search-order overflow |
|
3168 |
|
|
| FFCE | -50 | search-order underflow |
|
3169 |
|
|
| FFCD | -51 | compilation word list changed |
|
3170 |
|
|
| FFCC | -52 | control-flow stack overflow |
|
3171 |
|
|
| FFCB | -53 | exception stack overflow |
|
3172 |
|
|
| FFCA | -54 | floating-point underflow |
|
3173 |
|
|
| FFC9 | -55 | floating-point unidentified fault |
|
3174 |
|
|
| FFC8 | -56 | QUIT |
|
3175 |
|
|
| FFC7 | -57 | exception in sending or receiving a character |
|
3176 |
|
|
| FFC6 | -58 | [IF], [ELSE], or [THEN] exception |
|
3177 |
|
|
|
3178 |
|
|
|
3179 |
|
|
|
3180 |
|
|
| Hex | Dec | Message |
|
3181 |
|
|
| ---- | ---- | --------------------------------------------- |
|
3182 |
|
|
| FFC5 | -59 | ALLOCATE |
|
3183 |
|
|
| FFC4 | -60 | FREE |
|
3184 |
|
|
| FFC3 | -61 | RESIZE |
|
3185 |
|
|
| FFC2 | -62 | CLOSE-FILE |
|
3186 |
|
|
| FFC1 | -63 | CREATE-FILE |
|
3187 |
|
|
| FFC0 | -64 | DELETE-FILE |
|
3188 |
|
|
| FFBF | -65 | FILE-POSITION |
|
3189 |
|
|
| FFBE | -66 | FILE-SIZE |
|
3190 |
|
|
| FFBD | -67 | FILE-STATUS |
|
3191 |
|
|
| FFBC | -68 | FLUSH-FILE |
|
3192 |
|
|
| FFBB | -69 | OPEN-FILE |
|
3193 |
|
|
| FFBA | -70 | READ-FILE |
|
3194 |
|
|
| FFB9 | -71 | READ-LINE |
|
3195 |
|
|
| FFB8 | -72 | RENAME-FILE |
|
3196 |
|
|
| FFB7 | -73 | REPOSITION-FILE |
|
3197 |
|
|
| FFB6 | -74 | RESIZE-FILE |
|
3198 |
|
|
| FFB5 | -75 | WRITE-FILE |
|
3199 |
|
|
| FFB4 | -76 | WRITE-LINE |
|
3200 |
|
|
|
3201 |
|
|
## To Do / Wish List
|
3202 |
|
|
|
3203 |
|
|
* Relative jumps could be used instead of absolute jumps in the code, this
|
3204 |
|
|
would make relocation easier, and could make all code position independent. It
|
3205 |
|
|
may also make the resulting code easier to compress, especially if the
|
3206 |
|
|
majority of jumps are to near locations. Perhaps relative addressing should
|
3207 |
|
|
only be used for branches and not calls, or vice versa. Absolute jumps could
|
3208 |
|
|
be faked if needed with the correct wordset, self modifying code, or the
|
3209 |
|
|
correct compliation methods.
|
3210 |
|
|
* More assertions and range checks should be added to the interpreter, for
|
3211 |
|
|
example the **save** function needs checks for bounds.
|
3212 |
|
|
* The forth virtual machine in [forth.c][] should be made to be crash proof,
|
3213 |
|
|
with checks to make sure indices never go out of bounds.
|
3214 |
|
|
* Documentation could be extracted from the [meta.fth][] file, which should
|
3215 |
|
|
describe the entire system: The metacompiler, the target virtual machine,
|
3216 |
|
|
and how Forth works.
|
3217 |
|
|
* Add more references, and turn this program into a literate file.
|
3218 |
|
|
- Compression routines would be a nice to have feature for reducing
|
3219 |
|
|
the saved image size. LZSS could be used, see:
|
3220 |
|
|
|
3221 |
|
|
Adaptive Huffman encoding performs even better.
|
3222 |
|
|
- Talk about and write about:
|
3223 |
|
|
- Potential additions
|
3224 |
|
|
- The philosophy of Forth
|
3225 |
|
|
- How the meta compiler words
|
3226 |
|
|
- Implementing allocation routines, and floating point routines
|
3227 |
|
|
- Compression and the similarity of Forth Factoring and LZW compression
|
3228 |
|
|
* This Forth needs a series of unit tests to make sure the basic functionality
|
3229 |
|
|
of all the words is correct
|
3230 |
|
|
* This Forth lacks a version of 'FORGET', as well as 'MARKER', which is
|
3231 |
|
|
unfortunate, as they are useful. This is due to how word lists are
|
3232 |
|
|
implemented.
|
3233 |
|
|
* For Floating Point Routines, written in portable Forth for a 16-bit platform,
|
3234 |
|
|
look at the Forth Dimensions magazine, volume 4, issue 1
|
3235 |
|
|
* One possible exercise would be to reduce the image size to its absoluate
|
3236 |
|
|
minimum, by removing unneeded functionality for the metacompilation process,
|
3237 |
|
|
such as the block editor, and 'see', as well as any words not actually used
|
3238 |
|
|
in the metacompilation process.
|
3239 |
|
|
* An image could be prepared with the smallest possible Forth interpreter,
|
3240 |
|
|
it would not necessarily have to be able to meta-compile.
|
3241 |
|
|
* A more traditional block storage method would be more useful, instead of
|
3242 |
|
|
saving sections of the virtual machine image a 'block transfer' instruction
|
3243 |
|
|
could be made, which would index into a file and retrieve/create blocks, which
|
3244 |
|
|
would allow much more memory to be used as mass storage (65536*1024 Bytes).
|
3245 |
|
|
* Look at the libforth test bench and reimplement it
|
3246 |
|
|
* Talk about making 'state' an execution token, with '[' and ']' just changing
|
3247 |
|
|
the value of state.
|
3248 |
|
|
* Allow an arbitrary character to be used for numeric output alignment, not
|
3249 |
|
|
just spaces. This would allow leading zeros to be added to numbers.
|
3250 |
|
|
* Some more words need adding in, like "postpone", "[']", "[if]", "[else]",
|
3251 |
|
|
"[then]", "T{", "}T", ... Of note is that "postpone" would have to deal
|
3252 |
|
|
with inlineable words.
|
3253 |
|
|
: [[ ; \ Stop postponing
|
3254 |
|
|
: ]]
|
3255 |
|
|
begin
|
3256 |
|
|
>in @ ' ['] [[ <>
|
3257 |
|
|
while
|
3258 |
|
|
>in ! postpone postpone
|
3259 |
|
|
repeat
|
3260 |
|
|
drop ; immediate
|
3261 |
|
|
* For the words with vectored exection, it might be a good idea to create
|
3262 |
|
|
default actions if the vector is null, currently the default action is to
|
3263 |
|
|
do nothing. For example:
|
3264 |
|
|
|
3265 |
|
|
: emit ?dup if @execute exit then drop ;
|
3266 |
|
|
: key ?dup if @execute exit then -1 ( -1 = end of file ) ;
|
3267 |
|
|
( An alternative version of 'key' would be: )
|
3268 |
|
|
: key ?dup if @execute exit then rx? ;
|
3269 |
|
|
|
3270 |
|
|
|
3271 |
|
|
## Virtual Machine Implementation in C
|
3272 |
|
|
|
3273 |
|
|
/** @file forth.c
|
3274 |
|
|
* @brief Forth Virtual Machine
|
3275 |
|
|
* @copyright Richard James Howe (2017)
|
3276 |
|
|
* @license MIT */
|
3277 |
|
|
|
3278 |
|
|
#include
|
3279 |
|
|
#include
|
3280 |
|
|
#include
|
3281 |
|
|
#include
|
3282 |
|
|
#include
|
3283 |
|
|
#include
|
3284 |
|
|
|
3285 |
|
|
#define CORE (65536u) /* core size in bytes */
|
3286 |
|
|
#define SP0 (8704u) /* Variable Stack Start: 8192 + 512 */
|
3287 |
|
|
#define RP0 (32767u) /* Return Stack Start: end of CORE in words */
|
3288 |
|
|
|
3289 |
|
|
#ifdef TRON
|
3290 |
|
|
#define TRACE(PC,I,SP,RP) \
|
3291 |
|
|
fprintf(stderr, "%04x %04x %04x %04x\n", (unsigned)(PC), (unsigned)(I),\
|
3292 |
|
|
(unsigned)(SP), (unsigned)(RP));
|
3293 |
|
|
#else
|
3294 |
|
|
#define TRACE(PC, I, SP, RP)
|
3295 |
|
|
#endif
|
3296 |
|
|
|
3297 |
|
|
typedef uint16_t uw_t;
|
3298 |
|
|
typedef int16_t sw_t;
|
3299 |
|
|
typedef uint32_t ud_t;
|
3300 |
|
|
|
3301 |
|
|
typedef struct {
|
3302 |
|
|
uw_t pc, t, rp, sp, core[CORE/sizeof(uw_t)];
|
3303 |
|
|
} forth_t;
|
3304 |
|
|
|
3305 |
|
|
static FILE *fopen_or_die(const char *file, const char *mode)
|
3306 |
|
|
{
|
3307 |
|
|
FILE *f = NULL;
|
3308 |
|
|
errno = 0;
|
3309 |
|
|
assert(file && mode);
|
3310 |
|
|
if(!(f = fopen(file, mode))) {
|
3311 |
|
|
fprintf(stderr, "failed to open file '%s' (mode %s): %s\n",
|
3312 |
|
|
file, mode, strerror(errno));
|
3313 |
|
|
exit(EXIT_FAILURE);
|
3314 |
|
|
}
|
3315 |
|
|
return f;
|
3316 |
|
|
}
|
3317 |
|
|
|
3318 |
|
|
static int binary_memory_load(FILE *input, uw_t *p, const size_t length)
|
3319 |
|
|
{
|
3320 |
|
|
assert(input && p && length <= 0x8000);
|
3321 |
|
|
for(size_t i = 0; i < length; i++) {
|
3322 |
|
|
const int r1 = fgetc(input);
|
3323 |
|
|
const int r2 = fgetc(input);
|
3324 |
|
|
if(r1 < 0 || r2 < 0)
|
3325 |
|
|
return -1;
|
3326 |
|
|
p[i] = (((unsigned)r1 & 0xffu))|(((unsigned)r2 & 0xffu) << 8u);
|
3327 |
|
|
}
|
3328 |
|
|
return 0;
|
3329 |
|
|
}
|
3330 |
|
|
|
3331 |
|
|
static int binary_memory_save(FILE *output, uw_t *p, const size_t start,
|
3332 |
|
|
const size_t length)
|
3333 |
|
|
{
|
3334 |
|
|
assert(output && p /* && ((start + length) < 0x8000 || (start > length))*/);
|
3335 |
|
|
for(size_t i = start; i < length; i++) {
|
3336 |
|
|
errno = 0;
|
3337 |
|
|
const int r1 = fputc((p[i]) & 0xff, output);
|
3338 |
|
|
const int r2 = fputc((p[i] >> 8u) & 0xff, output);
|
3339 |
|
|
if(r1 < 0 || r2 < 0) {
|
3340 |
|
|
fprintf(stderr, "write failed: %s\n", strerror(errno));
|
3341 |
|
|
return -1;
|
3342 |
|
|
}
|
3343 |
|
|
}
|
3344 |
|
|
return 0;
|
3345 |
|
|
}
|
3346 |
|
|
|
3347 |
|
|
int load(forth_t *h, const char *name)
|
3348 |
|
|
{
|
3349 |
|
|
assert(h && name);
|
3350 |
|
|
FILE *input = fopen_or_die(name, "rb");
|
3351 |
|
|
const int r = binary_memory_load(input, h->core, CORE/sizeof(uw_t));
|
3352 |
|
|
fclose(input);
|
3353 |
|
|
h->pc = 0; h->t = 0; h->rp = RP0; h->sp = SP0;
|
3354 |
|
|
return r;
|
3355 |
|
|
}
|
3356 |
|
|
|
3357 |
|
|
int save(forth_t *h, const char *name, size_t start, size_t length)
|
3358 |
|
|
{
|
3359 |
|
|
assert(h);
|
3360 |
|
|
if(!name)
|
3361 |
|
|
return -1;
|
3362 |
|
|
FILE *output = fopen_or_die(name, "wb");
|
3363 |
|
|
const int r = binary_memory_save(output, h->core, start, length);
|
3364 |
|
|
fclose(output);
|
3365 |
|
|
return r;
|
3366 |
|
|
}
|
3367 |
|
|
|
3368 |
|
|
int forth(forth_t *h, FILE *in, FILE *out, const char *block)
|
3369 |
|
|
{
|
3370 |
|
|
static const uw_t delta[] = { 0x0000, 0x0001, 0xFFFE, 0xFFFF };
|
3371 |
|
|
assert(h && in && out);
|
3372 |
|
|
uw_t pc = h->pc, t = h->t, rp = h->rp, sp = h->sp, *m = h->core;
|
3373 |
|
|
ud_t d;
|
3374 |
|
|
for(;;) {
|
3375 |
|
|
const uw_t instruction = m[pc];
|
3376 |
|
|
TRACE(pc, instruction, sp, rp);
|
3377 |
|
|
assert(!(sp & 0x8000) && !(rp & 0x8000));
|
3378 |
|
|
|
3379 |
|
|
if(0x8000 & instruction) { /* literal */
|
3380 |
|
|
m[++sp] = t;
|
3381 |
|
|
t = instruction & 0x7FFF;
|
3382 |
|
|
pc++;
|
3383 |
|
|
} else if ((0xE000 & instruction) == 0x6000) { /* ALU */
|
3384 |
|
|
uw_t n = m[sp], T = t;
|
3385 |
|
|
|
3386 |
|
|
pc = instruction & 0x10 ? m[rp] >> 1 : pc + 1;
|
3387 |
|
|
|
3388 |
|
|
switch((instruction >> 8u) & 0x1f) {
|
3389 |
|
|
case 0: /*T = t;*/ break;
|
3390 |
|
|
case 1: T = n; break;
|
3391 |
|
|
case 2: T = m[rp]; break;
|
3392 |
|
|
case 3: T = m[t>>1]; break;
|
3393 |
|
|
case 4: m[t>>1] = n; T = m[--sp]; break;
|
3394 |
|
|
case 5: d = (ud_t)t + (ud_t)n; T = d >> 16; m[sp] = d; n = d; break;
|
3395 |
|
|
case 6: d = (ud_t)t * (ud_t)n; T = d >> 16; m[sp] = d; n = d; break;
|
3396 |
|
|
case 7: T &= n; break;
|
3397 |
|
|
case 8: T |= n; break;
|
3398 |
|
|
case 9: T ^= n; break;
|
3399 |
|
|
case 10: T = ~t; break;
|
3400 |
|
|
case 11: T--; break;
|
3401 |
|
|
case 12: T = -(t == 0); break;
|
3402 |
|
|
case 13: T = -(t == n); break;
|
3403 |
|
|
case 14: T = -(n < t); break;
|
3404 |
|
|
case 15: T = -((sw_t)n < (sw_t)t); break;
|
3405 |
|
|
case 16: T = n >> t; break;
|
3406 |
|
|
case 17: T = n << t; break;
|
3407 |
|
|
case 18: T = sp << 1; break;
|
3408 |
|
|
case 19: T = rp << 1; break;
|
3409 |
|
|
case 20: sp = t >> 1; break;
|
3410 |
|
|
case 21: rp = t >> 1; T = n; break;
|
3411 |
|
|
case 22: T = save(h, block, n>>1, ((ud_t)T+1)>>1); break;
|
3412 |
|
|
case 23: T = fputc(t, out); break;
|
3413 |
|
|
case 24: T = fgetc(in); break;
|
3414 |
|
|
case 25: if(t) { T=n/t; t=n%t; n=t; }
|
3415 |
|
|
else { pc=1; T=10; n=T; t=n; } break;
|
3416 |
|
|
case 26: if(t) { T=(sw_t)n/(sw_t)t; t=(sw_t)n%(sw_t)t; n=t; }
|
3417 |
|
|
else { pc=1; T=10; n=T; t=n; } break;
|
3418 |
|
|
case 27: goto finished;
|
3419 |
|
|
}
|
3420 |
|
|
sp += delta[ instruction & 0x3];
|
3421 |
|
|
rp -= delta[(instruction >> 2) & 0x3];
|
3422 |
|
|
if(instruction & 0x20)
|
3423 |
|
|
T = n;
|
3424 |
|
|
if(instruction & 0x40)
|
3425 |
|
|
m[rp] = t;
|
3426 |
|
|
if(instruction & 0x80)
|
3427 |
|
|
m[sp] = t;
|
3428 |
|
|
t = T;
|
3429 |
|
|
} else if (0x4000 & instruction) { /* call */
|
3430 |
|
|
m[--rp] = (pc + 1) << 1;
|
3431 |
|
|
pc = instruction & 0x1FFF;
|
3432 |
|
|
} else if (0x2000 & instruction) { /* 0branch */
|
3433 |
|
|
pc = !t ? instruction & 0x1FFF : pc + 1;
|
3434 |
|
|
t = m[sp--];
|
3435 |
|
|
} else { /* branch */
|
3436 |
|
|
pc = instruction & 0x1FFF;
|
3437 |
|
|
}
|
3438 |
|
|
}
|
3439 |
|
|
finished:
|
3440 |
|
|
h->pc = pc; h->sp = sp; h->rp = rp; h->t = t;
|
3441 |
|
|
return (int16_t)t;
|
3442 |
|
|
}
|
3443 |
|
|
|
3444 |
|
|
int main(int argc, char **argv)
|
3445 |
|
|
{
|
3446 |
|
|
static forth_t h;
|
3447 |
|
|
int interactive = 0;
|
3448 |
|
|
if(argc < 4)
|
3449 |
|
|
goto fail;
|
3450 |
|
|
if(!strcmp(argv[1], "i"))
|
3451 |
|
|
interactive = 1;
|
3452 |
|
|
else if(strcmp(argv[1], "f"))
|
3453 |
|
|
goto fail;
|
3454 |
|
|
load(&h, argv[2]);
|
3455 |
|
|
for(int i = 4; i < argc; i++) {
|
3456 |
|
|
FILE *in = fopen_or_die(argv[i], "rb");
|
3457 |
|
|
const int r = forth(&h, in, stdout, argv[3]);
|
3458 |
|
|
fclose(in);
|
3459 |
|
|
if(r != 0) {
|
3460 |
|
|
fprintf(stderr, "run failed: %d\n", r);
|
3461 |
|
|
return r;
|
3462 |
|
|
}
|
3463 |
|
|
}
|
3464 |
|
|
if(interactive)
|
3465 |
|
|
return forth(&h, stdin, stdout, argv[3]);
|
3466 |
|
|
return 0;
|
3467 |
|
|
fail:
|
3468 |
|
|
fprintf(stderr, "usage: %s f|i input.blk output.blk file.fth\n", argv[0]);
|
3469 |
|
|
return -1;
|
3470 |
|
|
}
|
3471 |
|
|
|
3472 |
|
|
|
3473 |
|
|
### References
|
3474 |
|
|
|
3475 |
|
|
[H2 CPU]: https://github.com/howerj/forth-cpu
|
3476 |
|
|
[J1 CPU]: http://excamera.com/sphinx/fpga-j1.html
|
3477 |
|
|
[forth.c]: forth.c
|
3478 |
|
|
[compiler.c]: compiler.c
|
3479 |
|
|
[eforth.fth]: eforth.fth
|
3480 |
|
|
[C compiler]: https://gcc.gnu.org/
|
3481 |
|
|
[make]: https://www.gnu.org/software/make/
|
3482 |
|
|
[Windows]: https://en.wikipedia.org/wiki/Microsoft_Windows
|
3483 |
|
|
[Linux]: https://en.wikipedia.org/wiki/Linux
|
3484 |
|
|
[C99]: https://en.wikipedia.org/wiki/C99
|
3485 |
|
|
[meta.fth]: meta.fth
|
3486 |
|
|
[DOS]: https://en.wikipedia.org/wiki/DOS
|
3487 |
|
|
[8086]: https://en.wikipedia.org/wiki/Intel_8086
|
3488 |
|
|
[LZSS]: https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Storer%E2%80%93Szymanski
|
3489 |
|
|
[Run Length Encoding]: https://en.wikipedia.org/wiki/Run-length_encoding
|
3490 |
|
|
[Huffman]: https://en.wikipedia.org/wiki/Huffman_coding
|
3491 |
|
|
[Adaptive Huffman]: https://en.wikipedia.org/wiki/Adaptive_Huffman_coding
|