OpenCores
URL https://opencores.org/ocsvn/bitserial/bitserial/trunk

Subversion Repositories bitserial

[/] [bitserial/] [trunk/] [bit.vhd] - Rev 2

Compare with Previous | Blame | View Log

-- File:        bit.vhd
-- Author:      Richard James Howe
-- Repository:  https://github.com/howerj/bit-serial
-- License:     MIT
-- Description: An N-bit, simple and small bit serial CPU
 
library ieee, work, std;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use std.textio.all; -- for debug only, not needed for synthesis
 
-- The bit-serial CPU itself, the interface is bit-serial as well as the
-- CPU itself, address and data are N bits wide. The enable lines are held
-- high for N cycles when data is clocked in or out, and a complete read or
-- write consists of N cycles (although those cycles may not be contiguous,
-- it is up to the BCPU). The three enable lines are mutually exclusive,
-- only one will be active at any time.
--
-- There are a few configurable items, but the defaults should work fine.
entity bcpu is
	generic (
		asynchronous_reset: boolean    := true;   -- use asynchronous reset if true, synchronous if false
		delay:              time       := 0 ns;   -- simulation only, gate delay
		N:                  positive   := 16;     -- size the CPU, minimum is 8
		parity:             std_ulogic := '0';    -- set parity (even = '0'/odd = '1') of parity flag
		jumpz:              std_ulogic := '1';    -- jump on zero = '1', jump on non-zero = '0'
		debug:              natural    := 0);     -- debug level, 0 = off
	port (
		clk, rst:       in std_ulogic;
		i:              in std_ulogic;
		o, a:          out std_ulogic;
		oe, ie, ae: buffer std_ulogic;
		stop:          out std_ulogic);
end;
 
architecture rtl of bcpu is
	type state_t is (RESET, FETCH, INDIRECT, OPERAND, EXECUTE, STORE, LOAD, ADVANCE, HALT);
	type cmd_t is (
		iOR,     iAND,    iXOR,     iADD,
		iLSHIFT, iRSHIFT, iLOAD,    iSTORE,
		iLOADC,  iSTOREC, iLITERAL, iUNUSED,
		iJUMP,   iJUMPZ,  iSET,     iGET);
 
	constant Cy:  integer :=  0; -- Carry; set by addition
	constant Z:   integer :=  1; -- Accumulator is zero
	constant Ng:  integer :=  2; -- Accumulator is negative
	constant R:   integer :=  3; -- Reset CPU
	constant HLT: integer :=  4; -- Halt CPU
 
	type registers_t is record
		state:  state_t;    -- state machine register
		choice: state_t;    -- computed next state
		first:  boolean;    -- First flag, for setting up an instruction
		last4:  boolean;    -- Are we processing the last 4 bits of the instruction?
		indir:  boolean;    -- does the instruction require indirection of the operand?
		tcarry: std_ulogic; -- temporary carry flag
		dline:  std_ulogic_vector(N - 1 downto 0); -- delay line, 16 cycles, our timer
		acc:    std_ulogic_vector(N - 1 downto 0); -- accumulator
		pc:     std_ulogic_vector(N - 1 downto 0); -- program counter
		op:     std_ulogic_vector(N - 1 downto 0); -- operand to instruction
		flags:  std_ulogic_vector(N - 1 downto 0); -- flags register
		cmd:    std_ulogic_vector(3 downto 0);     -- instruction
	end record;
 
	constant registers_default: registers_t := (
		state  => RESET,
		choice => RESET,
		first  => true,
		last4  => false,
		indir  => false,
		tcarry => 'X',
		dline  => (others => '0'),
		acc    => (others => 'X'),
		pc     => (others => 'X'),
		op     => (others => 'X'),
		flags  => (others => 'X'),
		cmd    => (others => 'X'));
 
	signal c, f: registers_t := registers_default; -- BCPU registers, all of them.
	-- These signals are not used to hold state. The 'c' and 'f' registers
	-- do that.
	signal cmd: cmd_t; -- Shows up nicely in traces as an enumerated value
	signal add1, add2, acin, ares, acout: std_ulogic; -- shared adder signals
	signal last4, last:                   std_ulogic; -- state sequence signals
 
	-- 'adder' implements a full adder, which is all we need to implement
	-- N-bit addition in a bit serial architecture. It is used in the instruction
	-- and to increment the program counter.
	procedure adder (x, y, cin: in std_ulogic; signal sum, cout: out std_ulogic) is
	begin
		sum  <= x xor y xor cin after delay;
		cout <= (x and y) or (cin and (x xor y)) after delay;
	end procedure;
 
	-- 'bit_count' is used for assertions and nothing else. It counts the
	-- number of bits in a 'std_ulogic_vector'.
	function bit_count(bc: in std_ulogic_vector) return natural is
		variable count: natural := 0;
	begin
		for index in bc'range loop
			if bc(index) = '1' then
				count := count + 1;
			end if;
		end loop;
		return count;
	end function;
 
	-- Obviously this does not synthesis, which is why synthesis is turned
	-- off for the body of this function, it does make debugging much easier
	-- though, we will be able to see which instructions are executed and do so
	-- by name.
	procedure print_debug_info is
		variable ll: line;
 
		function hx(slv: in std_ulogic_vector) return string is -- std_ulogic_vector to hex string
			constant cv: string := "0123456789ABCDEF";
			constant qu: integer := slv'length   / 4;
			constant rm: integer := slv'length mod 4;
			variable rs: string(1 to qu);
			variable sl: std_ulogic_vector(3 downto 0);
		begin
			assert rm = 0 severity failure;
			for l in 0 to qu - 1 loop
				sl := slv((l * 4) + 3 downto (l * 4));
				rs(qu - l) := cv(to_integer(unsigned(sl)) + 1);
			end loop;
			return rs;
		end function;
 
		function yn(sl: std_ulogic; ch: character) return string is -- print a flag
			variable rs: string(1 to 2) := "- ";
		begin
			if sl = '1' then
				rs(1) := ch;
			end if;
			return rs;
		end function;
	begin
		-- synthesis translate_off
		if debug > 0 then
			if c.state = EXECUTE and c.first then
				write(ll, hx(c.pc)    & ": ");
				write(ll, cmd_t'image(cmd)   & HT);
				write(ll, hx(c.acc)   & " ");
				write(ll, hx(c.op)    & " ");
				write(ll, hx(c.flags) & " ");
				write(ll, yn(c.flags(Cy),  'C'));
				write(ll, yn(c.flags(Z),   'Z'));
				write(ll, yn(c.flags(Ng),  'N'));
				write(ll, yn(c.flags(R),   'R'));
				write(ll, yn(c.flags(HLT), 'H'));
				writeline(OUTPUT, ll);
			end if;
			if debug > 1 and last = '1' then
				write(ll, state_t'image(c.state) & " => ");
				write(ll, state_t'image(f.state));
				writeline(OUTPUT, ll);
			end if;
		end if;
		-- synthesis translate_on
	end procedure;
begin
	assert N >= 8                      report "CPU Width too small: N >= 8"    severity failure;
	assert not (ie = '1' and oe = '1') report "input/output at the same time"  severity failure;
	assert not (ie = '1' and ae = '1') report "input whilst changing address"  severity failure;
	assert not (oe = '1' and ae = '1') report "output whilst changing address" severity failure;
 
	adder (add1, add2, acin, ares, acout);                 -- shared adder
	cmd   <= cmd_t'val(to_integer(unsigned(c.cmd))); -- used for debug purposes
	last4 <= c.dline(c.dline'high - 4) after delay;  -- processing last four bits?
	last  <= c.dline(c.dline'high)     after delay;  -- processing last bit?
 
	process (clk, rst) begin
		if rst = '1' and asynchronous_reset then
			c.dline <= (others => '0') after delay; -- parallel reset!
			c.state <= RESET after delay;
		elsif rising_edge(clk) then
			c <= f after delay;
			if rst = '1' and not asynchronous_reset then
				c.dline <= (others => '0') after delay;
				c.state <= RESET after delay;
			else
				-- These are just assertions/debug logging, they are not required for
				-- running, but we can make sure there are no unexpected state transitions,
				-- and report on the internal state.
				print_debug_info;
				if c.state = RESET   and last = '1' then assert f.state = FETCH;   end if;
				if c.state = LOAD    and last = '1' then assert f.state = ADVANCE; end if;
				if c.state = STORE   and last = '1' then assert f.state = ADVANCE; end if;
				if c.state = ADVANCE and last = '1' then assert f.state = FETCH;   end if;
				if c.state = HALT then assert f.state = HALT; end if;
				if c.state = EXECUTE and last = '1' then
					assert f.state = ADVANCE or f.state = LOAD or f.state = STORE or f.state = FETCH;
				end if;
			end if;
			assert not (c.first xor f.dline(0) = '1') report "first/dline";
		end if;
	end process;
 
	process (i, c, cmd, ares, acout, last, last4) begin
		o       <= '0' after delay;
		a       <= '0' after delay;
		ie      <= '0' after delay;
		ae      <= '0' after delay;
		oe      <= '0' after delay;
		stop    <= '0' after delay;
		add1    <= '0' after delay;
		add2    <= '0' after delay;
		acin    <= '0' after delay;
		f       <= c after delay;
		f.dline <= c.dline(c.dline'high - 1 downto 0) & "0" after delay;
 
		-- Our delay line should only contain zero on one bit at a time
		if c.first then
			assert bit_count(c.dline) = 0 report "too many dline bits";
		else
			assert bit_count(c.dline) = 1 report "missing dline bit";
		end if;
 
		-- The processor works by using a delay line (shift register) to
		-- sequence actions, the top four bits are used for the
		-- instruction (and if the highest bit is set indirection
		-- is _not_ allowed), with the lowest twelve bits as an operand
		-- to use as a literal value or an address.
		--
		-- As such, we will want to trigger actions when processing the first
		-- bit, the last four bits and the last bit.
		--
		-- This delay line is used to save gates as opposed using a counter,
		-- which would require an adder (but not a comparator - we could check
		-- whether individual bits are set because all the comparisons are
		-- against power of two values).
		--
		if last = '1' then
			f.state <= c.choice after delay;
			f.first <= true     after delay;
			f.last4 <= false    after delay;
			-- This is a bit of a hack, in order to place it in its proper
			-- place within the 'FETCH' state we would need to move the
			-- 'indirection allowed on instruction' bit from the highest
			-- bit to a lower bit so we can perform the state decision before
			-- the bit is being processed.
			if i = '0' and c.state = FETCH then
				f.indir <= true;
				f.state <= INDIRECT; -- Override FETCH Choice!
			end if;
		elsif last4 = '1' then
			f.last4 <= true after delay;
		end if;
 
		-- Each state lasts N (which defaults to 16) + 1 cycles.
		-- Of note: we could make the FETCH state last only 4 + 1 cycles
		-- and merge the operand fetching in FETCH (and OPERAND state) into
		-- the 'EXECUTE' state.
		case c.state is
		when RESET   =>
			f.choice <= FETCH;
			if c.first then
				f.dline(0)  <= '1'   after delay;
				f.first     <= false after delay;
			else
				ae      <= '1' after delay;
				f.acc   <= "0" & c.acc(c.acc'high downto 1) after delay;
				f.pc    <= "0" & c.pc (c.pc'high  downto 1) after delay;
				f.op    <= "0" & c.op (c.op'high  downto 1) after delay;
				f.flags <= "0" & c.flags(c.flags'high downto 1) after delay;
			end if;
		-- When in the running state all state transitions pass through FETCH.
		-- FETCH does what you expect from it, it fetches the instruction. It also
		-- partially decodes it and sets flags that the accumulator depends on.
		--
		-- What is meant by partially decoding is this; it is determined if we
		-- should go to the INDIRECT state next or to the EXECUTE state, also
		-- it is determined whether an I/O operation should be performed for those
		-- instructions capable of doing I/O.
		when FETCH   =>
			if c.first then
				f.dline(0)   <= '1'    after delay;
				f.first      <= false  after delay;
				f.indir      <= false  after delay;
				f.flags(Z)   <= '1'    after delay;
			else
				ie           <= '1' after delay;
 
				if c.acc(0) = '1' then -- determine flag status before EXECUTE
					f.flags(Z) <= '0' after delay;
				end if;
				f.acc <= c.acc(0) & c.acc(c.acc'high downto 1) after delay;
 
				if not c.last4 then
					f.op    <= i & c.op(c.op'high downto 1) after delay;
				else
					f.cmd   <= i   & c.cmd(c.cmd'high downto 1) after delay;
					f.op    <= "0" & c.op (c.op'high  downto 1) after delay;
				end if;
			end if;
 
			f.flags(Ng)  <= c.acc(0) after delay; -- contains highest bit when 'last' is true
 
			-- NB. 'f.choice' may be overwritten for INDIRECT.
			   if c.flags(HLT) = '1' then
				f.choice <= HALT after delay;
			elsif c.flags(R) = '1' then
				f.choice <= RESET after delay;
			else
				f.choice <= EXECUTE after delay;
			end if;
		-- INDIRECT is only used instruction allows for indirection
		-- (ie. All those instructions in which the top bit is not set).
		-- The indirection add 2*(N+1) cycles to the instruction so is quite expensive.
		--
		-- We could avoid having this state and CPU functionality if we were to
		-- make use of self-modifying code, however that would make programming the CPU
		-- more difficult.
		--
		when INDIRECT =>
			assert c.cmd(c.cmd'high) = '0' severity error;
			f.choice <= EXECUTE after delay;
			if c.first then
				f.dline(0)  <= '1'   after delay;
				f.first     <= false after delay;
			else
				ae       <=     '1' after delay;
				a        <= c.op(0) after delay;
				f.op     <=     "0" & c.op(c.op'high downto 1) after delay;
				f.choice <= OPERAND after delay;
			end if;
		-- OPERAND fetches the operand *again*, this time using the operand
		-- acquired in EXECUTE, the address being set in the previous INDIRECT state.
		when OPERAND =>
			f.choice <= EXECUTE after delay;
			if c.first then
				f.dline(0) <= '1'   after delay;
				f.first    <= false after delay;
			else
				ie      <= '1' after delay;
				f.op    <= i & c.op(c.op'high downto 1) after delay;
			end if;
		-- The EXECUTE state implements the ALU. It is the most seemingly the
		-- most complex state, but it is not (FETCH is more difficult to
		-- understand).
		when EXECUTE =>
			assert not (c.flags(Z) = '1' and c.flags(Ng) = '1') report "zero and negative?";
			f.choice     <= ADVANCE after delay;
			if c.first then
				f.dline(0)  <= '1'   after delay;
				f.first     <= false after delay;
				if cmd = iADD then f.flags(Cy) <= '0' after delay; end if;
				-- 'tcarry' is added to the program counter in the ADVANCE
				-- state, instructions that affect the program counter clear
				-- it (such as iJUMP, and iJUMPZ/iSET (conditionally).
				f.tcarry    <= '1'   after delay;
			else
				case cmd is -- ALU
				when iOR =>
					f.op  <= "0" & c.op (c.op'high  downto 1) after delay;
					f.acc <= (c.op(0) or c.acc(0)) & c.acc(c.acc'high downto 1) after delay;
				when iAND =>
					f.acc <= c.acc(0) & c.acc(c.acc'high downto 1) after delay;
					if (not c.last4) or c.indir then
						f.op  <= "0" & c.op (c.op'high downto 1) after delay;
						f.acc <= (c.op(0) and c.acc(0)) & c.acc(c.acc'high downto 1) after delay;
					end if;
				when iXOR =>
					f.op  <= "0" & c.op (c.op'high downto 1) after delay;
					f.acc <= (c.op(0) xor c.acc(0)) & c.acc(c.acc'high downto 1) after delay;
				when iADD =>
					f.acc <= "0" & c.acc(c.acc'high downto 1) after delay;
					f.op  <= "0" & c.op(c.op'high downto 1)   after delay;
					add1  <=    c.acc(0) after delay;
					add2  <=     c.op(0) after delay;
					acin  <= c.flags(Cy) after delay;
					f.acc(f.acc'high) <= ares after delay;
					f.flags(Cy) <= acout after delay;
				-- A barrel shifter is usually quite an expensive piece of hardware,
				-- but it ends up being quite cheap for obvious reasons. If we really
				-- needed to we could dispense with the right shift, we could mask off
				-- low bits and rotate (either way) to emulate it.
				when iLSHIFT =>
					if c.op(0) = '1' then
						f.acc  <= c.acc(c.acc'high - 1 downto 0) & "0" after delay;
					end if;
					f.op   <= "0" & c.op (c.op'high downto 1) after delay;
				when iRSHIFT =>
					if c.op(0) = '1' then
						f.acc  <= "0" & c.acc(c.acc'high downto 1) after delay;
					end if;
					f.op   <= "0" & c.op (c.op'high downto 1) after delay;
				-- We have two sets of LOAD/STORE instructions, one set which
				-- optionally respects the indirect flag, and one set (the latter)
				-- which never does. This allows us to perform direct LOAD/STORES
				-- when the indirect flag is on.
				when iLOAD =>
					ae       <=     '1' after delay;
					a        <= c.op(0) after delay;
					f.op     <= c.op(0) & c.op(c.op'high downto 1) after delay;
					f.choice <= LOAD after delay;
				when iSTORE =>
					ae       <=     '1' after delay;
					a        <= c.op(0) after delay;
					f.op     <=     "0" & c.op(c.op'high downto 1) after delay;
					f.choice <= STORE after delay;
				when iLOADC =>
					ae       <=     '1' after delay;
					a        <= c.op(0) after delay;
					f.op     <= c.op(0) & c.op(c.op'high downto 1) after delay;
					f.choice <= LOAD after delay;
				when iSTOREC =>
					ae       <=     '1' after delay;
					a        <= c.op(0) after delay;
					f.op     <=     "0" & c.op(c.op'high downto 1) after delay;
					f.choice <= STORE after delay;
				when iLITERAL =>
					f.acc  <= c.op(0) & c.acc(c.acc'high downto 1) after delay;
					f.op   <=     "0" & c.op (c.op'high downto 1)  after delay;
				when iUNUSED =>
				-- We could use this if we need to extend the instruction set
				-- for any reason. I cannot think of a good one that justifies the
				-- cost of a new instruction. So this will remain blank for now.
				--
				-- Candidates for an instruction include:
				--
				-- * Arithmetic Right Shift
				-- * Subtraction
				-- * Swap Low/High Byte (may be difficult to implement)
				--
				-- However, this instruction may not have its indirection bit set,
				-- This would not be a problem for the swap instruction. Alternatively
				-- an 'add-constant' could be added.
				--
				when iJUMP =>
					ae       <=     '1' after delay;
					a        <= c.op(0) after delay;
					f.op     <=     "0" & c.op(c.op'high downto 1) after delay;
					f.pc     <= c.op(0) & c.pc(c.pc'high downto 1) after delay;
					f.choice <= FETCH after delay;
				when iJUMPZ =>
					if c.flags(Z) = jumpz then
						ae     <=     '1' after delay;
						a      <= c.op(0) after delay;
						f.op   <=     "0" & c.op(c.op'high downto 1) after delay;
						f.pc   <= c.op(0) & c.pc(c.pc'high downto 1) after delay;
						f.choice <= FETCH after delay;
					end if;
				-- NB. We could probably eliminate these instructions by mapping
				-- the registers into the memory address space, this would free
				-- up another two instructions, and potentially simplify the CPU.
				--
				when iSET =>
					if c.op(0) = '0' then
						-- NB. We could set the address directly here and
						-- go to FETCH but that costs us too much time and gates.
						f.pc     <= c.acc(0) & c.pc(c.pc'high downto 1) after delay;
						f.tcarry <= '0' after delay;
					else
						f.flags  <= c.acc(0) & c.flags(c.flags'high downto 1) after delay;
					end if;
					f.acc    <= c.acc(0) & c.acc(c.acc'high downto 1) after delay;
				when iGET =>
					if c.op(0) = '0' then
						f.acc    <= c.pc(0) & c.acc(c.acc'high downto 1) after delay;
						f.pc     <= c.pc(0) & c.pc(c.pc'high downto 1)   after delay;
					else
						f.acc    <= c.flags(0) & c.acc(c.acc'high downto 1)     after delay;
						f.flags  <= c.flags(0) & c.flags(c.flags'high downto 1) after delay;
					end if;
				end case;
			end if;
		-- Unfortunately we cannot perform a load or a store whilst we are
		-- performing an EXECUTE, so we require STORE and LOAD states to do
		-- more work after a LOAD or STORE instruction.
		when STORE   =>
			f.choice <= ADVANCE after delay;
			if c.first then
				f.dline(0) <= '1'   after delay;
				f.first    <= false after delay;
			else
				o      <= c.acc(0) after delay;
				oe     <= '1'      after delay;
				f.acc  <= c.acc(0) & c.acc(c.acc'high downto 1) after delay;
			end if;
		when LOAD    =>
			f.choice <= ADVANCE after delay;
			if c.first then
				f.dline(0) <= '1'   after delay;
				f.first    <= false after delay;
			else
				ie    <= '1' after delay;
				f.acc <= i & c.acc(c.acc'high downto 1) after delay;
			end if;
		-- ADVANCE reuses our adder in iADD to add one to the program counter
		-- this state *is* reached when we do a iJUMP, iJUMPZ or an iSET on the
		-- program counter, those instructions clear the 'tcarry', which is
		-- normally '1'.
		when ADVANCE =>
			f.choice <= FETCH after delay;
			if c.first then
				f.dline(0) <= '1'   after delay;
				f.first    <= false after delay;
			else
				f.pc <= "0" & c.pc(c.pc'high downto 1) after delay;
				add1 <= c.pc(0)  after delay;
				-- A 'skip' facility could be made by optionally setting this to '1'
				-- for the first cycle, incrementing the program counter by 2.
				add2 <= '0'      after delay;
				acin <= c.tcarry after delay;
				f.pc(f.pc'high) <= ares  after delay;
				a               <= ares  after delay;
				f.tcarry        <= acout after delay;
				ae   <= '1' after delay;
			end if;
		when HALT => stop <= '1' after delay;
		end case;
	end process;
end architecture;
 
 

Compare with Previous | Blame | View Log

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.