1 |
2 |
howe.r.j.8 |
-- File: bit.vhd
|
2 |
|
|
-- Author: Richard James Howe
|
3 |
|
|
-- Repository: https://github.com/howerj/bit-serial
|
4 |
|
|
-- License: MIT
|
5 |
|
|
-- Description: An N-bit, simple and small bit serial CPU
|
6 |
|
|
|
7 |
|
|
library ieee, work, std;
|
8 |
|
|
use ieee.std_logic_1164.all;
|
9 |
|
|
use ieee.numeric_std.all;
|
10 |
|
|
use std.textio.all; -- for debug only, not needed for synthesis
|
11 |
|
|
|
12 |
|
|
-- The bit-serial CPU itself, the interface is bit-serial as well as the
|
13 |
|
|
-- CPU itself, address and data are N bits wide. The enable lines are held
|
14 |
|
|
-- high for N cycles when data is clocked in or out, and a complete read or
|
15 |
|
|
-- write consists of N cycles (although those cycles may not be contiguous,
|
16 |
|
|
-- it is up to the BCPU). The three enable lines are mutually exclusive,
|
17 |
|
|
-- only one will be active at any time.
|
18 |
|
|
--
|
19 |
|
|
-- There are a few configurable items, but the defaults should work fine.
|
20 |
|
|
entity bcpu is
|
21 |
|
|
generic (
|
22 |
|
|
asynchronous_reset: boolean := true; -- use asynchronous reset if true, synchronous if false
|
23 |
|
|
delay: time := 0 ns; -- simulation only, gate delay
|
24 |
|
|
N: positive := 16; -- size the CPU, minimum is 8
|
25 |
|
|
parity: std_ulogic := '0'; -- set parity (even = '0'/odd = '1') of parity flag
|
26 |
|
|
jumpz: std_ulogic := '1'; -- jump on zero = '1', jump on non-zero = '0'
|
27 |
|
|
debug: natural := 0); -- debug level, 0 = off
|
28 |
|
|
port (
|
29 |
|
|
clk, rst: in std_ulogic;
|
30 |
|
|
i: in std_ulogic;
|
31 |
|
|
o, a: out std_ulogic;
|
32 |
|
|
oe, ie, ae: buffer std_ulogic;
|
33 |
|
|
stop: out std_ulogic);
|
34 |
|
|
end;
|
35 |
|
|
|
36 |
|
|
architecture rtl of bcpu is
|
37 |
|
|
type state_t is (RESET, FETCH, INDIRECT, OPERAND, EXECUTE, STORE, LOAD, ADVANCE, HALT);
|
38 |
|
|
type cmd_t is (
|
39 |
|
|
iOR, iAND, iXOR, iADD,
|
40 |
|
|
iLSHIFT, iRSHIFT, iLOAD, iSTORE,
|
41 |
|
|
iLOADC, iSTOREC, iLITERAL, iUNUSED,
|
42 |
|
|
iJUMP, iJUMPZ, iSET, iGET);
|
43 |
|
|
|
44 |
|
|
constant Cy: integer := 0; -- Carry; set by addition
|
45 |
|
|
constant Z: integer := 1; -- Accumulator is zero
|
46 |
|
|
constant Ng: integer := 2; -- Accumulator is negative
|
47 |
|
|
constant R: integer := 3; -- Reset CPU
|
48 |
|
|
constant HLT: integer := 4; -- Halt CPU
|
49 |
|
|
|
50 |
|
|
type registers_t is record
|
51 |
|
|
state: state_t; -- state machine register
|
52 |
|
|
choice: state_t; -- computed next state
|
53 |
|
|
first: boolean; -- First flag, for setting up an instruction
|
54 |
|
|
last4: boolean; -- Are we processing the last 4 bits of the instruction?
|
55 |
|
|
indir: boolean; -- does the instruction require indirection of the operand?
|
56 |
|
|
tcarry: std_ulogic; -- temporary carry flag
|
57 |
|
|
dline: std_ulogic_vector(N - 1 downto 0); -- delay line, 16 cycles, our timer
|
58 |
|
|
acc: std_ulogic_vector(N - 1 downto 0); -- accumulator
|
59 |
|
|
pc: std_ulogic_vector(N - 1 downto 0); -- program counter
|
60 |
|
|
op: std_ulogic_vector(N - 1 downto 0); -- operand to instruction
|
61 |
|
|
flags: std_ulogic_vector(N - 1 downto 0); -- flags register
|
62 |
|
|
cmd: std_ulogic_vector(3 downto 0); -- instruction
|
63 |
|
|
end record;
|
64 |
|
|
|
65 |
|
|
constant registers_default: registers_t := (
|
66 |
|
|
state => RESET,
|
67 |
|
|
choice => RESET,
|
68 |
|
|
first => true,
|
69 |
|
|
last4 => false,
|
70 |
|
|
indir => false,
|
71 |
|
|
tcarry => 'X',
|
72 |
|
|
dline => (others => '0'),
|
73 |
|
|
acc => (others => 'X'),
|
74 |
|
|
pc => (others => 'X'),
|
75 |
|
|
op => (others => 'X'),
|
76 |
|
|
flags => (others => 'X'),
|
77 |
|
|
cmd => (others => 'X'));
|
78 |
|
|
|
79 |
|
|
signal c, f: registers_t := registers_default; -- BCPU registers, all of them.
|
80 |
|
|
-- These signals are not used to hold state. The 'c' and 'f' registers
|
81 |
|
|
-- do that.
|
82 |
|
|
signal cmd: cmd_t; -- Shows up nicely in traces as an enumerated value
|
83 |
|
|
signal add1, add2, acin, ares, acout: std_ulogic; -- shared adder signals
|
84 |
|
|
signal last4, last: std_ulogic; -- state sequence signals
|
85 |
|
|
|
86 |
|
|
-- 'adder' implements a full adder, which is all we need to implement
|
87 |
|
|
-- N-bit addition in a bit serial architecture. It is used in the instruction
|
88 |
|
|
-- and to increment the program counter.
|
89 |
|
|
procedure adder (x, y, cin: in std_ulogic; signal sum, cout: out std_ulogic) is
|
90 |
|
|
begin
|
91 |
|
|
sum <= x xor y xor cin after delay;
|
92 |
|
|
cout <= (x and y) or (cin and (x xor y)) after delay;
|
93 |
|
|
end procedure;
|
94 |
|
|
|
95 |
|
|
-- 'bit_count' is used for assertions and nothing else. It counts the
|
96 |
|
|
-- number of bits in a 'std_ulogic_vector'.
|
97 |
|
|
function bit_count(bc: in std_ulogic_vector) return natural is
|
98 |
|
|
variable count: natural := 0;
|
99 |
|
|
begin
|
100 |
|
|
for index in bc'range loop
|
101 |
|
|
if bc(index) = '1' then
|
102 |
|
|
count := count + 1;
|
103 |
|
|
end if;
|
104 |
|
|
end loop;
|
105 |
|
|
return count;
|
106 |
|
|
end function;
|
107 |
|
|
|
108 |
|
|
-- Obviously this does not synthesis, which is why synthesis is turned
|
109 |
|
|
-- off for the body of this function, it does make debugging much easier
|
110 |
|
|
-- though, we will be able to see which instructions are executed and do so
|
111 |
|
|
-- by name.
|
112 |
|
|
procedure print_debug_info is
|
113 |
|
|
variable ll: line;
|
114 |
|
|
|
115 |
|
|
function hx(slv: in std_ulogic_vector) return string is -- std_ulogic_vector to hex string
|
116 |
|
|
constant cv: string := "0123456789ABCDEF";
|
117 |
|
|
constant qu: integer := slv'length / 4;
|
118 |
|
|
constant rm: integer := slv'length mod 4;
|
119 |
|
|
variable rs: string(1 to qu);
|
120 |
|
|
variable sl: std_ulogic_vector(3 downto 0);
|
121 |
|
|
begin
|
122 |
|
|
assert rm = 0 severity failure;
|
123 |
|
|
for l in 0 to qu - 1 loop
|
124 |
|
|
sl := slv((l * 4) + 3 downto (l * 4));
|
125 |
|
|
rs(qu - l) := cv(to_integer(unsigned(sl)) + 1);
|
126 |
|
|
end loop;
|
127 |
|
|
return rs;
|
128 |
|
|
end function;
|
129 |
|
|
|
130 |
|
|
function yn(sl: std_ulogic; ch: character) return string is -- print a flag
|
131 |
|
|
variable rs: string(1 to 2) := "- ";
|
132 |
|
|
begin
|
133 |
|
|
if sl = '1' then
|
134 |
|
|
rs(1) := ch;
|
135 |
|
|
end if;
|
136 |
|
|
return rs;
|
137 |
|
|
end function;
|
138 |
|
|
begin
|
139 |
|
|
-- synthesis translate_off
|
140 |
|
|
if debug > 0 then
|
141 |
|
|
if c.state = EXECUTE and c.first then
|
142 |
|
|
write(ll, hx(c.pc) & ": ");
|
143 |
|
|
write(ll, cmd_t'image(cmd) & HT);
|
144 |
|
|
write(ll, hx(c.acc) & " ");
|
145 |
|
|
write(ll, hx(c.op) & " ");
|
146 |
|
|
write(ll, hx(c.flags) & " ");
|
147 |
|
|
write(ll, yn(c.flags(Cy), 'C'));
|
148 |
|
|
write(ll, yn(c.flags(Z), 'Z'));
|
149 |
|
|
write(ll, yn(c.flags(Ng), 'N'));
|
150 |
|
|
write(ll, yn(c.flags(R), 'R'));
|
151 |
|
|
write(ll, yn(c.flags(HLT), 'H'));
|
152 |
|
|
writeline(OUTPUT, ll);
|
153 |
|
|
end if;
|
154 |
|
|
if debug > 1 and last = '1' then
|
155 |
|
|
write(ll, state_t'image(c.state) & " => ");
|
156 |
|
|
write(ll, state_t'image(f.state));
|
157 |
|
|
writeline(OUTPUT, ll);
|
158 |
|
|
end if;
|
159 |
|
|
end if;
|
160 |
|
|
-- synthesis translate_on
|
161 |
|
|
end procedure;
|
162 |
|
|
begin
|
163 |
|
|
assert N >= 8 report "CPU Width too small: N >= 8" severity failure;
|
164 |
|
|
assert not (ie = '1' and oe = '1') report "input/output at the same time" severity failure;
|
165 |
|
|
assert not (ie = '1' and ae = '1') report "input whilst changing address" severity failure;
|
166 |
|
|
assert not (oe = '1' and ae = '1') report "output whilst changing address" severity failure;
|
167 |
|
|
|
168 |
|
|
adder (add1, add2, acin, ares, acout); -- shared adder
|
169 |
|
|
cmd <= cmd_t'val(to_integer(unsigned(c.cmd))); -- used for debug purposes
|
170 |
|
|
last4 <= c.dline(c.dline'high - 4) after delay; -- processing last four bits?
|
171 |
|
|
last <= c.dline(c.dline'high) after delay; -- processing last bit?
|
172 |
|
|
|
173 |
|
|
process (clk, rst) begin
|
174 |
|
|
if rst = '1' and asynchronous_reset then
|
175 |
|
|
c.dline <= (others => '0') after delay; -- parallel reset!
|
176 |
|
|
c.state <= RESET after delay;
|
177 |
|
|
elsif rising_edge(clk) then
|
178 |
|
|
c <= f after delay;
|
179 |
|
|
if rst = '1' and not asynchronous_reset then
|
180 |
|
|
c.dline <= (others => '0') after delay;
|
181 |
|
|
c.state <= RESET after delay;
|
182 |
|
|
else
|
183 |
|
|
-- These are just assertions/debug logging, they are not required for
|
184 |
|
|
-- running, but we can make sure there are no unexpected state transitions,
|
185 |
|
|
-- and report on the internal state.
|
186 |
|
|
print_debug_info;
|
187 |
|
|
if c.state = RESET and last = '1' then assert f.state = FETCH; end if;
|
188 |
|
|
if c.state = LOAD and last = '1' then assert f.state = ADVANCE; end if;
|
189 |
|
|
if c.state = STORE and last = '1' then assert f.state = ADVANCE; end if;
|
190 |
|
|
if c.state = ADVANCE and last = '1' then assert f.state = FETCH; end if;
|
191 |
|
|
if c.state = HALT then assert f.state = HALT; end if;
|
192 |
|
|
if c.state = EXECUTE and last = '1' then
|
193 |
|
|
assert f.state = ADVANCE or f.state = LOAD or f.state = STORE or f.state = FETCH;
|
194 |
|
|
end if;
|
195 |
|
|
end if;
|
196 |
|
|
assert not (c.first xor f.dline(0) = '1') report "first/dline";
|
197 |
|
|
end if;
|
198 |
|
|
end process;
|
199 |
|
|
|
200 |
|
|
process (i, c, cmd, ares, acout, last, last4) begin
|
201 |
|
|
o <= '0' after delay;
|
202 |
|
|
a <= '0' after delay;
|
203 |
|
|
ie <= '0' after delay;
|
204 |
|
|
ae <= '0' after delay;
|
205 |
|
|
oe <= '0' after delay;
|
206 |
|
|
stop <= '0' after delay;
|
207 |
|
|
add1 <= '0' after delay;
|
208 |
|
|
add2 <= '0' after delay;
|
209 |
|
|
acin <= '0' after delay;
|
210 |
|
|
f <= c after delay;
|
211 |
|
|
f.dline <= c.dline(c.dline'high - 1 downto 0) & "0" after delay;
|
212 |
|
|
|
213 |
|
|
-- Our delay line should only contain zero on one bit at a time
|
214 |
|
|
if c.first then
|
215 |
|
|
assert bit_count(c.dline) = 0 report "too many dline bits";
|
216 |
|
|
else
|
217 |
|
|
assert bit_count(c.dline) = 1 report "missing dline bit";
|
218 |
|
|
end if;
|
219 |
|
|
|
220 |
|
|
-- The processor works by using a delay line (shift register) to
|
221 |
|
|
-- sequence actions, the top four bits are used for the
|
222 |
|
|
-- instruction (and if the highest bit is set indirection
|
223 |
|
|
-- is _not_ allowed), with the lowest twelve bits as an operand
|
224 |
|
|
-- to use as a literal value or an address.
|
225 |
|
|
--
|
226 |
|
|
-- As such, we will want to trigger actions when processing the first
|
227 |
|
|
-- bit, the last four bits and the last bit.
|
228 |
|
|
--
|
229 |
|
|
-- This delay line is used to save gates as opposed using a counter,
|
230 |
|
|
-- which would require an adder (but not a comparator - we could check
|
231 |
|
|
-- whether individual bits are set because all the comparisons are
|
232 |
|
|
-- against power of two values).
|
233 |
|
|
--
|
234 |
|
|
if last = '1' then
|
235 |
|
|
f.state <= c.choice after delay;
|
236 |
|
|
f.first <= true after delay;
|
237 |
|
|
f.last4 <= false after delay;
|
238 |
|
|
-- This is a bit of a hack, in order to place it in its proper
|
239 |
|
|
-- place within the 'FETCH' state we would need to move the
|
240 |
|
|
-- 'indirection allowed on instruction' bit from the highest
|
241 |
|
|
-- bit to a lower bit so we can perform the state decision before
|
242 |
|
|
-- the bit is being processed.
|
243 |
|
|
if i = '0' and c.state = FETCH then
|
244 |
|
|
f.indir <= true;
|
245 |
|
|
f.state <= INDIRECT; -- Override FETCH Choice!
|
246 |
|
|
end if;
|
247 |
|
|
elsif last4 = '1' then
|
248 |
|
|
f.last4 <= true after delay;
|
249 |
|
|
end if;
|
250 |
|
|
|
251 |
|
|
-- Each state lasts N (which defaults to 16) + 1 cycles.
|
252 |
|
|
-- Of note: we could make the FETCH state last only 4 + 1 cycles
|
253 |
|
|
-- and merge the operand fetching in FETCH (and OPERAND state) into
|
254 |
|
|
-- the 'EXECUTE' state.
|
255 |
|
|
case c.state is
|
256 |
|
|
when RESET =>
|
257 |
|
|
f.choice <= FETCH;
|
258 |
|
|
if c.first then
|
259 |
|
|
f.dline(0) <= '1' after delay;
|
260 |
|
|
f.first <= false after delay;
|
261 |
|
|
else
|
262 |
|
|
ae <= '1' after delay;
|
263 |
|
|
f.acc <= "0" & c.acc(c.acc'high downto 1) after delay;
|
264 |
|
|
f.pc <= "0" & c.pc (c.pc'high downto 1) after delay;
|
265 |
|
|
f.op <= "0" & c.op (c.op'high downto 1) after delay;
|
266 |
|
|
f.flags <= "0" & c.flags(c.flags'high downto 1) after delay;
|
267 |
|
|
end if;
|
268 |
|
|
-- When in the running state all state transitions pass through FETCH.
|
269 |
|
|
-- FETCH does what you expect from it, it fetches the instruction. It also
|
270 |
|
|
-- partially decodes it and sets flags that the accumulator depends on.
|
271 |
|
|
--
|
272 |
|
|
-- What is meant by partially decoding is this; it is determined if we
|
273 |
|
|
-- should go to the INDIRECT state next or to the EXECUTE state, also
|
274 |
|
|
-- it is determined whether an I/O operation should be performed for those
|
275 |
|
|
-- instructions capable of doing I/O.
|
276 |
|
|
when FETCH =>
|
277 |
|
|
if c.first then
|
278 |
|
|
f.dline(0) <= '1' after delay;
|
279 |
|
|
f.first <= false after delay;
|
280 |
|
|
f.indir <= false after delay;
|
281 |
|
|
f.flags(Z) <= '1' after delay;
|
282 |
|
|
else
|
283 |
|
|
ie <= '1' after delay;
|
284 |
|
|
|
285 |
|
|
if c.acc(0) = '1' then -- determine flag status before EXECUTE
|
286 |
|
|
f.flags(Z) <= '0' after delay;
|
287 |
|
|
end if;
|
288 |
|
|
f.acc <= c.acc(0) & c.acc(c.acc'high downto 1) after delay;
|
289 |
|
|
|
290 |
|
|
if not c.last4 then
|
291 |
|
|
f.op <= i & c.op(c.op'high downto 1) after delay;
|
292 |
|
|
else
|
293 |
|
|
f.cmd <= i & c.cmd(c.cmd'high downto 1) after delay;
|
294 |
|
|
f.op <= "0" & c.op (c.op'high downto 1) after delay;
|
295 |
|
|
end if;
|
296 |
|
|
end if;
|
297 |
|
|
|
298 |
|
|
f.flags(Ng) <= c.acc(0) after delay; -- contains highest bit when 'last' is true
|
299 |
|
|
|
300 |
|
|
-- NB. 'f.choice' may be overwritten for INDIRECT.
|
301 |
|
|
if c.flags(HLT) = '1' then
|
302 |
|
|
f.choice <= HALT after delay;
|
303 |
|
|
elsif c.flags(R) = '1' then
|
304 |
|
|
f.choice <= RESET after delay;
|
305 |
|
|
else
|
306 |
|
|
f.choice <= EXECUTE after delay;
|
307 |
|
|
end if;
|
308 |
|
|
-- INDIRECT is only used instruction allows for indirection
|
309 |
|
|
-- (ie. All those instructions in which the top bit is not set).
|
310 |
|
|
-- The indirection add 2*(N+1) cycles to the instruction so is quite expensive.
|
311 |
|
|
--
|
312 |
|
|
-- We could avoid having this state and CPU functionality if we were to
|
313 |
|
|
-- make use of self-modifying code, however that would make programming the CPU
|
314 |
|
|
-- more difficult.
|
315 |
|
|
--
|
316 |
|
|
when INDIRECT =>
|
317 |
|
|
assert c.cmd(c.cmd'high) = '0' severity error;
|
318 |
|
|
f.choice <= EXECUTE after delay;
|
319 |
|
|
if c.first then
|
320 |
|
|
f.dline(0) <= '1' after delay;
|
321 |
|
|
f.first <= false after delay;
|
322 |
|
|
else
|
323 |
|
|
ae <= '1' after delay;
|
324 |
|
|
a <= c.op(0) after delay;
|
325 |
|
|
f.op <= "0" & c.op(c.op'high downto 1) after delay;
|
326 |
|
|
f.choice <= OPERAND after delay;
|
327 |
|
|
end if;
|
328 |
|
|
-- OPERAND fetches the operand *again*, this time using the operand
|
329 |
|
|
-- acquired in EXECUTE, the address being set in the previous INDIRECT state.
|
330 |
|
|
when OPERAND =>
|
331 |
|
|
f.choice <= EXECUTE after delay;
|
332 |
|
|
if c.first then
|
333 |
|
|
f.dline(0) <= '1' after delay;
|
334 |
|
|
f.first <= false after delay;
|
335 |
|
|
else
|
336 |
|
|
ie <= '1' after delay;
|
337 |
|
|
f.op <= i & c.op(c.op'high downto 1) after delay;
|
338 |
|
|
end if;
|
339 |
|
|
-- The EXECUTE state implements the ALU. It is the most seemingly the
|
340 |
|
|
-- most complex state, but it is not (FETCH is more difficult to
|
341 |
|
|
-- understand).
|
342 |
|
|
when EXECUTE =>
|
343 |
|
|
assert not (c.flags(Z) = '1' and c.flags(Ng) = '1') report "zero and negative?";
|
344 |
|
|
f.choice <= ADVANCE after delay;
|
345 |
|
|
if c.first then
|
346 |
|
|
f.dline(0) <= '1' after delay;
|
347 |
|
|
f.first <= false after delay;
|
348 |
|
|
if cmd = iADD then f.flags(Cy) <= '0' after delay; end if;
|
349 |
|
|
-- 'tcarry' is added to the program counter in the ADVANCE
|
350 |
|
|
-- state, instructions that affect the program counter clear
|
351 |
|
|
-- it (such as iJUMP, and iJUMPZ/iSET (conditionally).
|
352 |
|
|
f.tcarry <= '1' after delay;
|
353 |
|
|
else
|
354 |
|
|
case cmd is -- ALU
|
355 |
|
|
when iOR =>
|
356 |
|
|
f.op <= "0" & c.op (c.op'high downto 1) after delay;
|
357 |
|
|
f.acc <= (c.op(0) or c.acc(0)) & c.acc(c.acc'high downto 1) after delay;
|
358 |
|
|
when iAND =>
|
359 |
|
|
f.acc <= c.acc(0) & c.acc(c.acc'high downto 1) after delay;
|
360 |
|
|
if (not c.last4) or c.indir then
|
361 |
|
|
f.op <= "0" & c.op (c.op'high downto 1) after delay;
|
362 |
|
|
f.acc <= (c.op(0) and c.acc(0)) & c.acc(c.acc'high downto 1) after delay;
|
363 |
|
|
end if;
|
364 |
|
|
when iXOR =>
|
365 |
|
|
f.op <= "0" & c.op (c.op'high downto 1) after delay;
|
366 |
|
|
f.acc <= (c.op(0) xor c.acc(0)) & c.acc(c.acc'high downto 1) after delay;
|
367 |
|
|
when iADD =>
|
368 |
|
|
f.acc <= "0" & c.acc(c.acc'high downto 1) after delay;
|
369 |
|
|
f.op <= "0" & c.op(c.op'high downto 1) after delay;
|
370 |
|
|
add1 <= c.acc(0) after delay;
|
371 |
|
|
add2 <= c.op(0) after delay;
|
372 |
|
|
acin <= c.flags(Cy) after delay;
|
373 |
|
|
f.acc(f.acc'high) <= ares after delay;
|
374 |
|
|
f.flags(Cy) <= acout after delay;
|
375 |
|
|
-- A barrel shifter is usually quite an expensive piece of hardware,
|
376 |
|
|
-- but it ends up being quite cheap for obvious reasons. If we really
|
377 |
|
|
-- needed to we could dispense with the right shift, we could mask off
|
378 |
|
|
-- low bits and rotate (either way) to emulate it.
|
379 |
|
|
when iLSHIFT =>
|
380 |
|
|
if c.op(0) = '1' then
|
381 |
|
|
f.acc <= c.acc(c.acc'high - 1 downto 0) & "0" after delay;
|
382 |
|
|
end if;
|
383 |
|
|
f.op <= "0" & c.op (c.op'high downto 1) after delay;
|
384 |
|
|
when iRSHIFT =>
|
385 |
|
|
if c.op(0) = '1' then
|
386 |
|
|
f.acc <= "0" & c.acc(c.acc'high downto 1) after delay;
|
387 |
|
|
end if;
|
388 |
|
|
f.op <= "0" & c.op (c.op'high downto 1) after delay;
|
389 |
|
|
-- We have two sets of LOAD/STORE instructions, one set which
|
390 |
|
|
-- optionally respects the indirect flag, and one set (the latter)
|
391 |
|
|
-- which never does. This allows us to perform direct LOAD/STORES
|
392 |
|
|
-- when the indirect flag is on.
|
393 |
|
|
when iLOAD =>
|
394 |
|
|
ae <= '1' after delay;
|
395 |
|
|
a <= c.op(0) after delay;
|
396 |
|
|
f.op <= c.op(0) & c.op(c.op'high downto 1) after delay;
|
397 |
|
|
f.choice <= LOAD after delay;
|
398 |
|
|
when iSTORE =>
|
399 |
|
|
ae <= '1' after delay;
|
400 |
|
|
a <= c.op(0) after delay;
|
401 |
|
|
f.op <= "0" & c.op(c.op'high downto 1) after delay;
|
402 |
|
|
f.choice <= STORE after delay;
|
403 |
|
|
when iLOADC =>
|
404 |
|
|
ae <= '1' after delay;
|
405 |
|
|
a <= c.op(0) after delay;
|
406 |
|
|
f.op <= c.op(0) & c.op(c.op'high downto 1) after delay;
|
407 |
|
|
f.choice <= LOAD after delay;
|
408 |
|
|
when iSTOREC =>
|
409 |
|
|
ae <= '1' after delay;
|
410 |
|
|
a <= c.op(0) after delay;
|
411 |
|
|
f.op <= "0" & c.op(c.op'high downto 1) after delay;
|
412 |
|
|
f.choice <= STORE after delay;
|
413 |
|
|
when iLITERAL =>
|
414 |
|
|
f.acc <= c.op(0) & c.acc(c.acc'high downto 1) after delay;
|
415 |
|
|
f.op <= "0" & c.op (c.op'high downto 1) after delay;
|
416 |
|
|
when iUNUSED =>
|
417 |
|
|
-- We could use this if we need to extend the instruction set
|
418 |
|
|
-- for any reason. I cannot think of a good one that justifies the
|
419 |
|
|
-- cost of a new instruction. So this will remain blank for now.
|
420 |
|
|
--
|
421 |
|
|
-- Candidates for an instruction include:
|
422 |
|
|
--
|
423 |
|
|
-- * Arithmetic Right Shift
|
424 |
|
|
-- * Subtraction
|
425 |
|
|
-- * Swap Low/High Byte (may be difficult to implement)
|
426 |
|
|
--
|
427 |
|
|
-- However, this instruction may not have its indirection bit set,
|
428 |
|
|
-- This would not be a problem for the swap instruction. Alternatively
|
429 |
|
|
-- an 'add-constant' could be added.
|
430 |
|
|
--
|
431 |
|
|
when iJUMP =>
|
432 |
|
|
ae <= '1' after delay;
|
433 |
|
|
a <= c.op(0) after delay;
|
434 |
|
|
f.op <= "0" & c.op(c.op'high downto 1) after delay;
|
435 |
|
|
f.pc <= c.op(0) & c.pc(c.pc'high downto 1) after delay;
|
436 |
|
|
f.choice <= FETCH after delay;
|
437 |
|
|
when iJUMPZ =>
|
438 |
|
|
if c.flags(Z) = jumpz then
|
439 |
|
|
ae <= '1' after delay;
|
440 |
|
|
a <= c.op(0) after delay;
|
441 |
|
|
f.op <= "0" & c.op(c.op'high downto 1) after delay;
|
442 |
|
|
f.pc <= c.op(0) & c.pc(c.pc'high downto 1) after delay;
|
443 |
|
|
f.choice <= FETCH after delay;
|
444 |
|
|
end if;
|
445 |
|
|
-- NB. We could probably eliminate these instructions by mapping
|
446 |
|
|
-- the registers into the memory address space, this would free
|
447 |
|
|
-- up another two instructions, and potentially simplify the CPU.
|
448 |
|
|
--
|
449 |
|
|
when iSET =>
|
450 |
|
|
if c.op(0) = '0' then
|
451 |
|
|
-- NB. We could set the address directly here and
|
452 |
|
|
-- go to FETCH but that costs us too much time and gates.
|
453 |
|
|
f.pc <= c.acc(0) & c.pc(c.pc'high downto 1) after delay;
|
454 |
|
|
f.tcarry <= '0' after delay;
|
455 |
|
|
else
|
456 |
|
|
f.flags <= c.acc(0) & c.flags(c.flags'high downto 1) after delay;
|
457 |
|
|
end if;
|
458 |
|
|
f.acc <= c.acc(0) & c.acc(c.acc'high downto 1) after delay;
|
459 |
|
|
when iGET =>
|
460 |
|
|
if c.op(0) = '0' then
|
461 |
|
|
f.acc <= c.pc(0) & c.acc(c.acc'high downto 1) after delay;
|
462 |
|
|
f.pc <= c.pc(0) & c.pc(c.pc'high downto 1) after delay;
|
463 |
|
|
else
|
464 |
|
|
f.acc <= c.flags(0) & c.acc(c.acc'high downto 1) after delay;
|
465 |
|
|
f.flags <= c.flags(0) & c.flags(c.flags'high downto 1) after delay;
|
466 |
|
|
end if;
|
467 |
|
|
end case;
|
468 |
|
|
end if;
|
469 |
|
|
-- Unfortunately we cannot perform a load or a store whilst we are
|
470 |
|
|
-- performing an EXECUTE, so we require STORE and LOAD states to do
|
471 |
|
|
-- more work after a LOAD or STORE instruction.
|
472 |
|
|
when STORE =>
|
473 |
|
|
f.choice <= ADVANCE after delay;
|
474 |
|
|
if c.first then
|
475 |
|
|
f.dline(0) <= '1' after delay;
|
476 |
|
|
f.first <= false after delay;
|
477 |
|
|
else
|
478 |
|
|
o <= c.acc(0) after delay;
|
479 |
|
|
oe <= '1' after delay;
|
480 |
|
|
f.acc <= c.acc(0) & c.acc(c.acc'high downto 1) after delay;
|
481 |
|
|
end if;
|
482 |
|
|
when LOAD =>
|
483 |
|
|
f.choice <= ADVANCE after delay;
|
484 |
|
|
if c.first then
|
485 |
|
|
f.dline(0) <= '1' after delay;
|
486 |
|
|
f.first <= false after delay;
|
487 |
|
|
else
|
488 |
|
|
ie <= '1' after delay;
|
489 |
|
|
f.acc <= i & c.acc(c.acc'high downto 1) after delay;
|
490 |
|
|
end if;
|
491 |
|
|
-- ADVANCE reuses our adder in iADD to add one to the program counter
|
492 |
|
|
-- this state *is* reached when we do a iJUMP, iJUMPZ or an iSET on the
|
493 |
|
|
-- program counter, those instructions clear the 'tcarry', which is
|
494 |
|
|
-- normally '1'.
|
495 |
|
|
when ADVANCE =>
|
496 |
|
|
f.choice <= FETCH after delay;
|
497 |
|
|
if c.first then
|
498 |
|
|
f.dline(0) <= '1' after delay;
|
499 |
|
|
f.first <= false after delay;
|
500 |
|
|
else
|
501 |
|
|
f.pc <= "0" & c.pc(c.pc'high downto 1) after delay;
|
502 |
|
|
add1 <= c.pc(0) after delay;
|
503 |
|
|
-- A 'skip' facility could be made by optionally setting this to '1'
|
504 |
|
|
-- for the first cycle, incrementing the program counter by 2.
|
505 |
|
|
add2 <= '0' after delay;
|
506 |
|
|
acin <= c.tcarry after delay;
|
507 |
|
|
f.pc(f.pc'high) <= ares after delay;
|
508 |
|
|
a <= ares after delay;
|
509 |
|
|
f.tcarry <= acout after delay;
|
510 |
|
|
ae <= '1' after delay;
|
511 |
|
|
end if;
|
512 |
|
|
when HALT => stop <= '1' after delay;
|
513 |
|
|
end case;
|
514 |
|
|
end process;
|
515 |
|
|
end architecture;
|
516 |
|
|
|