1 |
2 |
ja_rd |
--------------------------------------------------------------------------------
|
2 |
|
|
-- ion_cpu.vhdl -- MIPS-I(tm) compatible CPU core
|
3 |
|
|
--------------------------------------------------------------------------------
|
4 |
|
|
-- project: ION (http://www.opencores.org/project,ion_cpu)
|
5 |
|
|
-- author: Jose A. Ruiz (ja_rd@hotmail.com)
|
6 |
|
|
-- created: Jan/11/2011
|
7 |
|
|
-- last modified: Jan/25/2011 (ja_rd@hotmail.com)
|
8 |
|
|
--------------------------------------------------------------------------------
|
9 |
|
|
-- Software placed into the public domain by the author. Use under the terms of
|
10 |
|
|
-- the GPL.
|
11 |
|
|
-- Software 'as is' without warranty. Author liable for nothing.
|
12 |
|
|
--------------------------------------------------------------------------------
|
13 |
|
|
-- NOTE: exceptions only partially implemented; jumps, loads and stores are
|
14 |
|
|
-- not aborted.
|
15 |
|
|
--
|
16 |
|
|
--
|
17 |
|
|
--------------------------------------------------------------------------------
|
18 |
|
|
--### PLASMA features not implemented yet
|
19 |
|
|
-- # MUL/DIV
|
20 |
|
|
--
|
21 |
|
|
--### MIPS-I things not implemented
|
22 |
|
|
-- # Invalid instruction detection
|
23 |
|
|
-- # Kernel/user status
|
24 |
|
|
-- # RTE instruction
|
25 |
|
|
-- # Most of the CP0 registers and of course all of the CP1
|
26 |
|
|
-- # External interrupts
|
27 |
|
|
--
|
28 |
|
|
--### Things implemented but not tested
|
29 |
|
|
-- # Syscall instruction (does a jal to 0x3c and that's it)
|
30 |
|
|
-- # Memory pause input
|
31 |
|
|
--
|
32 |
|
|
--### Things with provisional implementation
|
33 |
|
|
--
|
34 |
|
|
-- 1.- Load interlocks: the pipeline is stalled for every load instruction, even
|
35 |
|
|
-- if the target register is not used in the following instruction. So that
|
36 |
|
|
-- every load takes two cycles.
|
37 |
|
|
-- The interlock logic should check register indices.
|
38 |
|
|
--
|
39 |
|
|
-- 2.- Invalid instructions are not detected as such. Their behaviour is
|
40 |
|
|
-- undefined and inpredictable.
|
41 |
|
|
-- Invalid instructions should trigger an exception or at least just NOP.
|
42 |
|
|
-- This is closely related to privilege level so it will have to wait.
|
43 |
|
|
--------------------------------------------------------------------------------
|
44 |
|
|
|
45 |
|
|
library ieee;
|
46 |
|
|
use ieee.std_logic_1164.all;
|
47 |
|
|
use ieee.std_logic_arith.all;
|
48 |
|
|
use ieee.std_logic_unsigned.all;
|
49 |
|
|
use work.mips_pkg.all;
|
50 |
|
|
|
51 |
|
|
entity mips_cpu is
|
52 |
|
|
generic(
|
53 |
|
|
XILINX_REGBANK : string := "distributed" -- {distributed|block}
|
54 |
|
|
);
|
55 |
|
|
port(
|
56 |
|
|
clk : in std_logic;
|
57 |
|
|
reset : in std_logic;
|
58 |
|
|
interrupt : in std_logic;
|
59 |
|
|
|
60 |
|
|
data_rd_addr : out std_logic_vector(31 downto 0);
|
61 |
|
|
data_rd : in std_logic_vector(31 downto 0);
|
62 |
|
|
data_rd_vma : out std_logic;
|
63 |
|
|
|
64 |
|
|
code_rd_addr : out std_logic_vector(31 downto 2);
|
65 |
|
|
code_rd : in std_logic_vector(31 downto 0);
|
66 |
|
|
code_rd_vma : out std_logic;
|
67 |
|
|
|
68 |
|
|
data_wr_addr : out std_logic_vector(31 downto 2);
|
69 |
|
|
byte_we : out std_logic_vector(3 downto 0);
|
70 |
|
|
data_wr : out std_logic_vector(31 downto 0);
|
71 |
|
|
|
72 |
|
|
-- NOTE: needs to be synchronous to clk
|
73 |
|
|
mem_wait : in std_logic
|
74 |
|
|
);
|
75 |
|
|
end; --entity mips_cpu
|
76 |
|
|
|
77 |
|
|
architecture rtl of mips_cpu is
|
78 |
|
|
|
79 |
|
|
--------------------------------------------------------------------------------
|
80 |
|
|
-- Pipeline stage 0
|
81 |
|
|
|
82 |
|
|
signal p0_pc_reg : t_pc;
|
83 |
|
|
signal p0_pc_incremented : t_pc;
|
84 |
|
|
signal p0_pc_jump : t_pc;
|
85 |
|
|
signal p0_pc_branch : t_pc;
|
86 |
|
|
signal p0_pc_target : t_pc;
|
87 |
|
|
signal p0_pc_next : t_pc;
|
88 |
|
|
signal p0_rs_num : t_regnum;
|
89 |
|
|
signal p0_rt_num : t_regnum;
|
90 |
|
|
signal p0_jump_cond_value : std_logic;
|
91 |
|
|
signal p0_rbank_rs_hazard : std_logic;
|
92 |
|
|
signal p0_rbank_rt_hazard : std_logic;
|
93 |
|
|
|
94 |
|
|
--------------------------------------------------------------------------------
|
95 |
|
|
-- Pipeline stage 1
|
96 |
|
|
|
97 |
|
|
|
98 |
|
|
signal p1_rbank : t_rbank := (others => X"00000000");
|
99 |
|
|
|
100 |
|
|
-- IMPORTANT: This attribute is used by Xilinx tools to select how to implement
|
101 |
|
|
-- the register bank. If we don't use it, by default XST would infer 2 BRAMs for
|
102 |
|
|
-- the 1024-bit 3-port reg bank, which you probably don't want.
|
103 |
|
|
-- This can take the values {distributed|block}.
|
104 |
|
|
attribute ram_style : string;
|
105 |
|
|
attribute ram_style of p1_rbank : signal is "distributed";
|
106 |
|
|
|
107 |
|
|
signal p1_rs, p1_rt : t_word;
|
108 |
|
|
signal p1_rs_rbank : t_word;
|
109 |
|
|
signal p1_rt_rbank : t_word;
|
110 |
|
|
signal p1_rbank_forward : t_word;
|
111 |
|
|
signal p1_rd_num : t_regnum;
|
112 |
|
|
signal p1_rbank_wr_addr : t_regnum;
|
113 |
|
|
signal p1_rbank_we : std_logic;
|
114 |
|
|
signal p1_rbank_wr_data : t_word;
|
115 |
|
|
signal p1_alu_inp1 : t_word;
|
116 |
|
|
signal p1_alu_inp2 : t_word;
|
117 |
|
|
signal p1_alu_outp : t_word;
|
118 |
|
|
-- ALU control inputs (shortened name for brevity in expressions)
|
119 |
|
|
signal p1_ac : t_alu_control;
|
120 |
|
|
-- ALU flag outputs (comparison results)
|
121 |
|
|
signal p1_alu_flags : t_alu_flags;
|
122 |
|
|
-- immediate data, sign- or zero-extended as required by IR
|
123 |
|
|
signal p1_data_imm : t_word;
|
124 |
|
|
signal p1_muldiv_result : t_dword;
|
125 |
|
|
signal p1_branch_offset : t_pc;
|
126 |
|
|
signal p1_branch_offset_sex:std_logic_vector(31 downto 18);
|
127 |
|
|
signal p1_rbank_rs_hazard : std_logic;
|
128 |
|
|
signal p1_rbank_rt_hazard : std_logic;
|
129 |
|
|
signal p1_jump_type_set0 : std_logic_vector(1 downto 0);
|
130 |
|
|
signal p1_jump_type_set1 : std_logic_vector(1 downto 0);
|
131 |
|
|
signal p1_ir_reg : std_logic_vector(31 downto 0);
|
132 |
|
|
signal p1_ir_op : std_logic_vector(31 downto 26);
|
133 |
|
|
signal p1_ir_fn : std_logic_vector(5 downto 0);
|
134 |
|
|
signal p1_op_special : std_logic;
|
135 |
|
|
signal p1_exception : std_logic;
|
136 |
|
|
signal p1_do_reg_jump : std_logic;
|
137 |
|
|
signal p1_do_zero_ext_imm : std_logic;
|
138 |
|
|
signal p1_set_cp0 : std_logic;
|
139 |
|
|
signal p1_get_cp0 : std_logic;
|
140 |
|
|
signal p1_load_hi : std_logic;
|
141 |
|
|
signal p1_load_lo : std_logic;
|
142 |
|
|
signal p1_alu_op2_sel : std_logic_vector(1 downto 0);
|
143 |
|
|
signal p1_alu_op2_sel_set0: std_logic_vector(1 downto 0);
|
144 |
|
|
signal p1_alu_op2_sel_set1: std_logic_vector(1 downto 0);
|
145 |
|
|
signal p1_do_load : std_logic;
|
146 |
|
|
signal p1_do_store : std_logic;
|
147 |
|
|
signal p1_store_size : std_logic_vector(1 downto 0);
|
148 |
|
|
signal p1_we_control : std_logic_vector(5 downto 0);
|
149 |
|
|
signal p1_load_alu : std_logic;
|
150 |
|
|
signal p1_load_alu_set0 : std_logic;
|
151 |
|
|
signal p1_load_alu_set1 : std_logic;
|
152 |
|
|
signal p1_ld_upper_hword : std_logic;
|
153 |
|
|
signal p1_ld_upper_byte : std_logic;
|
154 |
|
|
signal p1_ld_unsigned : std_logic;
|
155 |
|
|
signal p1_jump_type : std_logic_vector(1 downto 0);
|
156 |
|
|
signal p1_link : std_logic;
|
157 |
|
|
signal p1_jump_cond_sel : std_logic_vector(2 downto 0);
|
158 |
|
|
signal p1_data_addr : t_addr;
|
159 |
|
|
signal p1_data_offset : t_addr;
|
160 |
|
|
|
161 |
|
|
--------------------------------------------------------------------------------
|
162 |
|
|
-- Pipeline stage 2
|
163 |
|
|
|
164 |
|
|
signal p2_exception : std_logic;
|
165 |
|
|
signal p2_rd_addr : std_logic_vector(1 downto 0);
|
166 |
|
|
signal p2_rd_mux_control : std_logic_vector(3 downto 0);
|
167 |
|
|
signal p2_load_target : t_regnum;
|
168 |
|
|
signal p2_do_load : std_logic;
|
169 |
|
|
signal p2_ld_upper_hword : std_logic;
|
170 |
|
|
signal p2_ld_upper_byte : std_logic;
|
171 |
|
|
signal p2_ld_unsigned : std_logic;
|
172 |
|
|
signal p2_wback_mux_sel : std_logic_vector(1 downto 0);
|
173 |
|
|
signal p2_data_word_rd : t_word;
|
174 |
|
|
signal p2_data_word_ext : std_logic;
|
175 |
|
|
|
176 |
|
|
--------------------------------------------------------------------------------
|
177 |
|
|
-- Global control signals
|
178 |
|
|
|
179 |
|
|
signal load_interlock : std_logic;
|
180 |
|
|
signal stall_pipeline : std_logic;
|
181 |
|
|
-- pipeline is stalled for any reason
|
182 |
|
|
signal pipeline_stalled : std_logic;
|
183 |
|
|
-- pipeline is stalled because of a load instruction interlock
|
184 |
|
|
signal pipeline_interlocked:std_logic;
|
185 |
|
|
|
186 |
|
|
|
187 |
|
|
--------------------------------------------------------------------------------
|
188 |
|
|
-- Multiplier interface registers
|
189 |
|
|
|
190 |
|
|
signal mdiv_hi_reg : t_word;
|
191 |
|
|
signal mdiv_lo_reg : t_word;
|
192 |
|
|
|
193 |
|
|
--------------------------------------------------------------------------------
|
194 |
|
|
-- CP0 registers and signals
|
195 |
|
|
|
196 |
|
|
-- CP0[12]: status register
|
197 |
|
|
-- FIXME status flags unimplemented
|
198 |
|
|
signal cp0_status : std_logic_vector(1 downto 0);
|
199 |
|
|
-- Output of CP0 register bank (only a few regs are implemented)
|
200 |
|
|
signal cp0_reg_read : t_word;
|
201 |
|
|
-- CP0[14]: EPC register (PC value saved at exceptions)
|
202 |
|
|
signal cp0_epc : t_pc;
|
203 |
|
|
|
204 |
|
|
begin
|
205 |
|
|
|
206 |
|
|
--##############################################################################
|
207 |
|
|
-- Register bank & datapath
|
208 |
|
|
|
209 |
|
|
-- Register indices are 'decoded' out of the instruction word BEFORE loading IR
|
210 |
|
|
p0_rs_num <= std_logic_vector(code_rd(25 downto 21));
|
211 |
|
|
with p1_ir_reg(31 downto 26) select p1_rd_num <=
|
212 |
|
|
p1_ir_reg(15 downto 11) when "000000",
|
213 |
|
|
p1_ir_reg(20 downto 16) when others;
|
214 |
|
|
|
215 |
|
|
p0_rt_num <= std_logic_vector(code_rd(20 downto 16)); -- also called rs2 in the docs
|
216 |
|
|
|
217 |
|
|
--------------------------------------------------------------------------------
|
218 |
|
|
-- Data input shifter & masker (LB,LBU,LH,LHU,LW)
|
219 |
|
|
|
220 |
|
|
p2_rd_mux_control <= p2_ld_upper_hword & p2_ld_upper_byte & p2_rd_addr;
|
221 |
|
|
|
222 |
|
|
-- Extension for unused bits will be zero or the sign (bit 7 or bit 15)
|
223 |
|
|
p2_data_word_ext <= '0' when p2_ld_unsigned='1' else
|
224 |
|
|
data_rd(15) when p2_ld_upper_byte='1' else
|
225 |
|
|
data_rd(7) when p2_rd_addr="11" else
|
226 |
|
|
data_rd(15) when p2_rd_addr="10" else
|
227 |
|
|
data_rd(23);
|
228 |
|
|
|
229 |
|
|
-- byte 0 may come from any of the 4 bytes of the input word
|
230 |
|
|
with p2_rd_mux_control select p2_data_word_rd(7 downto 0) <=
|
231 |
|
|
data_rd(31 downto 24) when "0000",
|
232 |
|
|
data_rd(23 downto 16) when "0001",
|
233 |
|
|
data_rd(23 downto 16) when "0100",
|
234 |
|
|
data_rd(15 downto 8) when "0010",
|
235 |
|
|
data_rd( 7 downto 0) when others;
|
236 |
|
|
|
237 |
|
|
-- byte 1 may come from input bytes 1 or 3 or may be extended for LB, LBU
|
238 |
|
|
with p2_rd_mux_control select p2_data_word_rd(15 downto 8) <=
|
239 |
|
|
data_rd(31 downto 24) when "0100",
|
240 |
|
|
data_rd(15 downto 8) when "0110",
|
241 |
|
|
data_rd(15 downto 8) when "1100",
|
242 |
|
|
data_rd(15 downto 8) when "1101",
|
243 |
|
|
data_rd(15 downto 8) when "1110",
|
244 |
|
|
data_rd(15 downto 8) when "1111",
|
245 |
|
|
(others => p2_data_word_ext) when others;
|
246 |
|
|
|
247 |
|
|
-- bytes 2,3 come straight from input or are extended for LH,LHU
|
248 |
|
|
with p2_ld_upper_hword select p2_data_word_rd(31 downto 16) <=
|
249 |
|
|
(others => p2_data_word_ext) when '0',
|
250 |
|
|
data_rd(31 downto 16) when others;
|
251 |
|
|
|
252 |
|
|
-- Select which data is to be written back to the reg bank and where
|
253 |
|
|
p1_rbank_wr_addr <= p1_rd_num when p2_do_load='0' and p1_link='0' else
|
254 |
|
|
"11111" when p2_do_load='0' and p1_link='1' else
|
255 |
|
|
p2_load_target;
|
256 |
|
|
|
257 |
|
|
p2_wback_mux_sel <=
|
258 |
|
|
"00" when p2_do_load='0' and p1_get_cp0='0' and p1_link='0' else
|
259 |
|
|
"01" when p2_do_load='1' and p1_get_cp0='0' and p1_link='0' else
|
260 |
|
|
"10" when p2_do_load='0' and p1_get_cp0='1' and p1_link='0' else
|
261 |
|
|
"11";
|
262 |
|
|
|
263 |
|
|
with (p2_wback_mux_sel) select p1_rbank_wr_data <=
|
264 |
|
|
p1_alu_outp when "00",
|
265 |
|
|
p2_data_word_rd when "01",
|
266 |
|
|
p0_pc_incremented & "00" when "11",
|
267 |
|
|
cp0_reg_read when others;
|
268 |
|
|
|
269 |
|
|
p1_rbank_we <= '1' when (p2_do_load='1' or p1_load_alu='1' or
|
270 |
|
|
p1_link='1' or p1_get_cp0='1') and
|
271 |
|
|
p1_rbank_wr_addr/="00000" and
|
272 |
|
|
-- on exception, abort next instruction (by preventing
|
273 |
|
|
-- regbank writeback).
|
274 |
|
|
p2_exception='0'
|
275 |
|
|
else '0';
|
276 |
|
|
|
277 |
|
|
-- Register bank as triple-port RAM. Should synth to 2 BRAMs unless you use
|
278 |
|
|
-- synth attributes to prevent it (see 'ram_style' attribute above) or your
|
279 |
|
|
-- FPGA has 3-port BRAMS, or has none.
|
280 |
|
|
synchronous_reg_bank:
|
281 |
|
|
process(clk)
|
282 |
|
|
begin
|
283 |
|
|
if clk'event and clk='1' then
|
284 |
|
|
if p1_rbank_we='1' and
|
285 |
|
|
(pipeline_stalled='0' or pipeline_interlocked='1') then -- @note1
|
286 |
|
|
p1_rbank(conv_integer(p1_rbank_wr_addr)) <= p1_rbank_wr_data;
|
287 |
|
|
end if;
|
288 |
|
|
p1_rt_rbank <= p1_rbank(conv_integer(p0_rt_num));
|
289 |
|
|
p1_rs_rbank <= p1_rbank(conv_integer(p0_rs_num));
|
290 |
|
|
end if;
|
291 |
|
|
end process synchronous_reg_bank;
|
292 |
|
|
|
293 |
|
|
-- Register writeback data in case it needs to be forwarded.
|
294 |
|
|
data_forward_register:
|
295 |
|
|
process(clk)
|
296 |
|
|
begin
|
297 |
|
|
if clk'event and clk='1' then
|
298 |
|
|
if p1_rbank_we='1' then -- no need to check for stall cycles
|
299 |
|
|
p1_rbank_forward <= p1_rbank_wr_data;
|
300 |
|
|
end if;
|
301 |
|
|
end if;
|
302 |
|
|
end process data_forward_register;
|
303 |
|
|
|
304 |
|
|
-- Bypass sync RAM if we're reading and writing to the same address. This saves
|
305 |
|
|
-- 1 stall cycle and fixes the data hazard.
|
306 |
|
|
p0_rbank_rs_hazard <= '1' when p1_rbank_wr_addr=p0_rs_num and p1_rbank_we='1'
|
307 |
|
|
else '0';
|
308 |
|
|
p0_rbank_rt_hazard <= '1' when p1_rbank_wr_addr=p0_rt_num and p1_rbank_we='1'
|
309 |
|
|
else '0';
|
310 |
|
|
|
311 |
|
|
p1_rs <= p1_rs_rbank when p1_rbank_rs_hazard='0' else p1_rbank_forward;
|
312 |
|
|
p1_rt <= p1_rt_rbank when p1_rbank_rt_hazard='0' else p1_rbank_forward;
|
313 |
|
|
|
314 |
|
|
-- Zero extension/Sign extension for instruction immediate data
|
315 |
|
|
p1_data_imm(15 downto 0) <= p1_ir_reg(15 downto 0);
|
316 |
|
|
|
317 |
|
|
with p1_do_zero_ext_imm select p1_data_imm(31 downto 16) <=
|
318 |
|
|
(others => '0') when '1',
|
319 |
|
|
(others => p1_ir_reg(15)) when others;
|
320 |
|
|
|
321 |
|
|
|
322 |
|
|
--------------------------------------------------------------------------------
|
323 |
|
|
-- ALU & ALU input multiplexors
|
324 |
|
|
|
325 |
|
|
p1_alu_inp1 <= p1_rs;
|
326 |
|
|
|
327 |
|
|
with p1_alu_op2_sel select p1_alu_inp2 <=
|
328 |
|
|
p1_data_imm when "11",
|
329 |
|
|
p1_muldiv_result(63 downto 32) when "01",
|
330 |
|
|
p1_muldiv_result(31 downto 0) when "10",
|
331 |
|
|
p1_rt when others;
|
332 |
|
|
|
333 |
|
|
alu_inst : entity work.mips_alu
|
334 |
|
|
port map (
|
335 |
|
|
clk => clk,
|
336 |
|
|
reset => reset,
|
337 |
|
|
ac => p1_ac,
|
338 |
|
|
flags => p1_alu_flags,
|
339 |
|
|
|
340 |
|
|
inp1 => p1_alu_inp1,
|
341 |
|
|
inp2 => p1_alu_inp2,
|
342 |
|
|
outp => p1_alu_outp
|
343 |
|
|
);
|
344 |
|
|
|
345 |
|
|
|
346 |
|
|
--------------------------------------------------------------------------------
|
347 |
|
|
-- Mul/Div block interface
|
348 |
|
|
|
349 |
|
|
-- FIXME when MUL*/DIV* are implemented, these registers and the load enable
|
350 |
|
|
-- logic will change a little. It may be better to move them into the alu.
|
351 |
|
|
mult_registers:
|
352 |
|
|
process(clk)
|
353 |
|
|
begin
|
354 |
|
|
if clk'event and clk='1' then
|
355 |
|
|
-- MTHI, MTLO are never involved in stall cycles, no need to check
|
356 |
|
|
if p1_load_hi='1' then
|
357 |
|
|
mdiv_hi_reg <= p1_rs;
|
358 |
|
|
end if;
|
359 |
|
|
if p1_load_lo='1' then
|
360 |
|
|
mdiv_lo_reg <= p1_rs;
|
361 |
|
|
end if;
|
362 |
|
|
end if;
|
363 |
|
|
end process mult_registers;
|
364 |
|
|
|
365 |
|
|
p1_muldiv_result <= mdiv_hi_reg & mdiv_lo_reg; -- FIXME stub, mdiv missing
|
366 |
|
|
|
367 |
|
|
|
368 |
|
|
--##############################################################################
|
369 |
|
|
-- PC register and branch logic
|
370 |
|
|
|
371 |
|
|
-- p0_pc_reg will not be incremented on stall cycles
|
372 |
|
|
p0_pc_incremented <= p0_pc_reg + (not stall_pipeline);
|
373 |
|
|
|
374 |
|
|
-- main pc mux: jump or continue
|
375 |
|
|
p0_pc_next <=
|
376 |
|
|
p0_pc_target when
|
377 |
|
|
-- We jump on jump instructions whose condition is met...
|
378 |
|
|
((p1_jump_type(1)='1' and p0_jump_cond_value='1') or
|
379 |
|
|
-- ... or on exceptions...
|
380 |
|
|
p1_exception='1')
|
381 |
|
|
-- ... but only if the pipeline is not stalled
|
382 |
|
|
and stall_pipeline='0'
|
383 |
|
|
else p0_pc_incremented;
|
384 |
|
|
|
385 |
|
|
pc_register:
|
386 |
|
|
process(clk)
|
387 |
|
|
begin
|
388 |
|
|
if clk'event and clk='1' then
|
389 |
|
|
if reset='1' then
|
390 |
|
|
-- reset to 0xffffffff so that 1st fetch addr is 0x00000000
|
391 |
|
|
p0_pc_reg <= (others => '1');
|
392 |
|
|
else
|
393 |
|
|
-- p0_pc_reg holds the same value as external sync ram addr register
|
394 |
|
|
p0_pc_reg <= p0_pc_next;
|
395 |
|
|
end if;
|
396 |
|
|
end if;
|
397 |
|
|
end process pc_register;
|
398 |
|
|
|
399 |
|
|
-- p0_pc_reg holds the same addr as the addr register of the external synchronous
|
400 |
|
|
-- memory; what we put on the addr bus is p0_pc_next.
|
401 |
|
|
data_rd_addr <= p1_data_addr(31 downto 0);
|
402 |
|
|
|
403 |
|
|
-- FIXME these two need to pushed behind a register, they are glitch-prone
|
404 |
|
|
data_rd_vma <= p1_do_load and not pipeline_stalled; -- FIXME register
|
405 |
|
|
code_rd_vma <= not stall_pipeline; -- FIXME registe
|
406 |
|
|
|
407 |
|
|
code_rd_addr <= p0_pc_next;
|
408 |
|
|
|
409 |
|
|
data_wr_addr <= p1_data_addr(31 downto 2);
|
410 |
|
|
|
411 |
|
|
-- compute target of J/JR instructions
|
412 |
|
|
p0_pc_jump <= p1_rs(31 downto 2) when p1_do_reg_jump='1' else
|
413 |
|
|
p0_pc_reg(31 downto 28) & p1_ir_reg(25 downto 0);
|
414 |
|
|
|
415 |
|
|
-- compute target of relative branch instructions
|
416 |
|
|
p1_branch_offset_sex <= (others => p1_ir_reg(15));
|
417 |
|
|
p1_branch_offset <= p1_branch_offset_sex & p1_ir_reg(15 downto 0);
|
418 |
|
|
-- p0_pc_reg is the addr of the instruction in delay slot
|
419 |
|
|
p0_pc_branch <= p0_pc_reg + p1_branch_offset;
|
420 |
|
|
|
421 |
|
|
-- decide which jump target is to be used
|
422 |
|
|
p0_pc_target <= X"0000003"&"11" when p1_exception='1' else
|
423 |
|
|
p0_pc_jump when p1_jump_type(0)='1' else
|
424 |
|
|
p0_pc_branch;
|
425 |
|
|
|
426 |
|
|
|
427 |
|
|
--##############################################################################
|
428 |
|
|
-- Instruction decoding and IR
|
429 |
|
|
|
430 |
|
|
instruction_register:
|
431 |
|
|
process(clk)
|
432 |
|
|
begin
|
433 |
|
|
if clk'event and clk='1' then
|
434 |
|
|
if reset='1' then
|
435 |
|
|
p1_ir_reg <= (others => '0');
|
436 |
|
|
elsif stall_pipeline='0' then
|
437 |
|
|
p1_ir_reg <= code_rd;
|
438 |
|
|
end if;
|
439 |
|
|
end if;
|
440 |
|
|
end process instruction_register;
|
441 |
|
|
|
442 |
|
|
-- 'Extract' main fields from IR, for convenience
|
443 |
|
|
p1_ir_op <= p1_ir_reg(31 downto 26);
|
444 |
|
|
p1_ir_fn <= p1_ir_reg(5 downto 0);
|
445 |
|
|
|
446 |
|
|
-- Decode jump type, if any, for instructions with op/=0
|
447 |
|
|
with p1_ir_op select p1_jump_type_set0 <=
|
448 |
|
|
-- FIXME weed out invalid instructions
|
449 |
|
|
"10" when "000001", -- BLTZ, BGEZ, BLTZAL, BGTZAL
|
450 |
|
|
"11" when "000010", -- J
|
451 |
|
|
"11" when "000011", -- JAL
|
452 |
|
|
"10" when "000100", -- BEQ
|
453 |
|
|
"10" when "000101", -- BNE
|
454 |
|
|
"10" when "000110", -- BLEZ
|
455 |
|
|
"10" when "000111", -- BGTZ
|
456 |
|
|
"00" when others; -- no jump
|
457 |
|
|
|
458 |
|
|
-- Decode jump type, if any, for instructions with op=0
|
459 |
|
|
p1_jump_type_set1 <= "11" when p1_op_special='1' and
|
460 |
|
|
p1_ir_reg(5 downto 1)="00100"
|
461 |
|
|
else "00";
|
462 |
|
|
|
463 |
|
|
-- Decode jump type for the instruction in IR (composite of two formats)
|
464 |
|
|
p1_jump_type <= p1_jump_type_set0 or p1_jump_type_set1;
|
465 |
|
|
|
466 |
|
|
p1_link <= '1' when (p1_ir_op="000000" and p1_ir_reg(5 downto 0)="001001") or
|
467 |
|
|
(p1_ir_op="000001" and p1_ir_reg(20)='1') or
|
468 |
|
|
(p1_ir_op="000011")
|
469 |
|
|
else '0';
|
470 |
|
|
|
471 |
|
|
-- Decode jump condition: encode a mux control signal from IR...
|
472 |
|
|
p1_jump_cond_sel <=
|
473 |
|
|
"001" when p1_ir_op="000001" and p1_ir_reg(16)='0' else -- op1 < 0 BLTZ*
|
474 |
|
|
"101" when p1_ir_op="000001" and p1_ir_reg(16)='1' else -- !(op1 < 0) BNLTZ*
|
475 |
|
|
"010" when p1_ir_op="000100" else -- op1 == op2 BEQ
|
476 |
|
|
"110" when p1_ir_op="000101" else -- !(op1 == op2) BNE
|
477 |
|
|
"011" when p1_ir_op="000110" else -- op1 <= 0 BLEZ
|
478 |
|
|
"111" when p1_ir_op="000111" else -- !(op1 <= 0) BGTZ
|
479 |
|
|
"000"; -- always
|
480 |
|
|
|
481 |
|
|
-- ... and use mux control signal to select the condition value
|
482 |
|
|
with p1_jump_cond_sel select p0_jump_cond_value <=
|
483 |
|
|
p1_alu_flags.inp1_lt_zero when "001",
|
484 |
|
|
not p1_alu_flags.inp1_lt_zero when "101",
|
485 |
|
|
p1_alu_flags.inp1_eq_inp2 when "010",
|
486 |
|
|
not p1_alu_flags.inp1_eq_inp2 when "110",
|
487 |
|
|
(p1_alu_flags.inp1_lt_inp2 or
|
488 |
|
|
p1_alu_flags.inp1_eq_inp2) when "011",
|
489 |
|
|
not (p1_alu_flags.inp1_lt_inp2 or
|
490 |
|
|
p1_alu_flags.inp1_eq_inp2) when "111",
|
491 |
|
|
'1' when others;
|
492 |
|
|
|
493 |
|
|
-- Decode instructions that launch exceptions
|
494 |
|
|
p1_exception <= '1' when p1_op_special='1' and p1_ir_reg(5 downto 1)="00110"
|
495 |
|
|
else '0';
|
496 |
|
|
|
497 |
|
|
-- Decode MTC0/MFC0 instructions
|
498 |
|
|
p1_set_cp0 <= '1' when p1_ir_reg(31 downto 21)="01000000100" else '0';
|
499 |
|
|
p1_get_cp0 <= '1' when p1_ir_reg(31 downto 21)="01000000000" else '0';
|
500 |
|
|
|
501 |
|
|
-- FIXME elaborate and explain this
|
502 |
|
|
|
503 |
|
|
p1_op_special <= '1' when p1_ir_op="000000" else '0';
|
504 |
|
|
|
505 |
|
|
p1_do_reg_jump <= '1' when p1_op_special='1' and p1_ir_fn(5 downto 1)="00100" else '0';
|
506 |
|
|
p1_do_zero_ext_imm <= '1' when (p1_ir_op(31 downto 28)="0011") else '0';
|
507 |
|
|
|
508 |
|
|
-- Decode input data mux control (LW, LH, LB, LBU, LHU) and load enable
|
509 |
|
|
p1_do_load <= '1' when p1_ir_op(31 downto 29)="100" else '0';
|
510 |
|
|
|
511 |
|
|
p1_load_alu_set0 <= '1'
|
512 |
|
|
when p1_op_special='1' and
|
513 |
|
|
((p1_ir_op(31 downto 29)="000" and p1_ir_op(27 downto 26)="00") or
|
514 |
|
|
(p1_ir_op(31 downto 29)="000" and p1_ir_op(27 downto 26)="10") or
|
515 |
|
|
(p1_ir_op(31 downto 29)="000" and p1_ir_op(27 downto 26)="11") or
|
516 |
|
|
(p1_ir_op(31 downto 29)="000" and p1_ir_op(27 downto 26)="00") or
|
517 |
|
|
(p1_ir_op(31 downto 28)="0100" and p1_ir_op(27 downto 26)="00") or
|
518 |
|
|
(p1_ir_op(31 downto 28)="0100" and p1_ir_op(27 downto 26)="10") or
|
519 |
|
|
(p1_ir_op(31 downto 28)="1000") or
|
520 |
|
|
(p1_ir_op(31 downto 28)="1001") or
|
521 |
|
|
(p1_ir_op(31 downto 28)="1010" and p1_ir_op(27 downto 26)="10") or
|
522 |
|
|
(p1_ir_op(31 downto 28)="1010" and p1_ir_op(27 downto 26)="11") or
|
523 |
|
|
(p1_ir_op(31 downto 28)="0010" and p1_ir_op(27 downto 26)="01"))
|
524 |
|
|
else '0';
|
525 |
|
|
|
526 |
|
|
with p1_ir_op select p1_load_alu_set1 <=
|
527 |
|
|
'1' when "001000",
|
528 |
|
|
'1' when "001001",
|
529 |
|
|
'1' when "001010",
|
530 |
|
|
'1' when "001011",
|
531 |
|
|
'1' when "001100",
|
532 |
|
|
'1' when "001101",
|
533 |
|
|
'1' when "001110",
|
534 |
|
|
'1' when "001111",
|
535 |
|
|
-- FIXME a few others missing: MFC0, etc
|
536 |
|
|
'0' when others;
|
537 |
|
|
p1_load_alu <= p1_load_alu_set0 or p1_load_alu_set1;
|
538 |
|
|
|
539 |
|
|
p1_ld_upper_hword <= p1_ir_op(27); -- use input upper hword vs. sign extend/zero
|
540 |
|
|
p1_ld_upper_byte <= p1_ir_op(26); -- use input upper byte vs. sign extend/zero
|
541 |
|
|
p1_ld_unsigned <= p1_ir_op(28); -- sign extend vs. zero extend
|
542 |
|
|
|
543 |
|
|
-- ALU input-2 selection: use external data for 2x opcodes (loads)
|
544 |
|
|
p1_alu_op2_sel_set0 <=
|
545 |
|
|
"11" when p1_ir_op(31 downto 30)="10" or p1_ir_op(29)='1' else
|
546 |
|
|
"00";
|
547 |
|
|
|
548 |
|
|
-- ALU input-2 selection: use registers Hi and Lo for MFHI, MFLO
|
549 |
|
|
with p1_ir_fn select p1_alu_op2_sel_set1 <=
|
550 |
|
|
"01" when "010000",
|
551 |
|
|
"10" when "010010",
|
552 |
|
|
"00" when others;
|
553 |
|
|
|
554 |
|
|
-- ALU input-2 final selection
|
555 |
|
|
p1_alu_op2_sel <= p1_alu_op2_sel_set0 or p1_alu_op2_sel_set1;
|
556 |
|
|
|
557 |
|
|
-- Decode store operations
|
558 |
|
|
p1_do_store <= '1' when p1_ir_op(31 downto 29)="101" else '0';
|
559 |
|
|
p1_store_size <= p1_ir_op(27 downto 26);
|
560 |
|
|
|
561 |
|
|
|
562 |
|
|
-- Decode load enables for Hi and Lo registers (MTHI and MTLO)
|
563 |
|
|
p1_load_hi <= '1' when p1_op_special='1' and p1_ir_fn="010001" else '0';
|
564 |
|
|
p1_load_lo <= '1' when p1_op_special='1' and p1_ir_fn="010011" else '0';
|
565 |
|
|
|
566 |
|
|
-- Decode ALU control dignals
|
567 |
|
|
|
568 |
|
|
p1_ac.use_slt <= '1' when (p1_ir_op="000001" and p1_ir_reg(20 downto 17)="01000") or
|
569 |
|
|
(p1_ir_op="000000" and p1_ir_reg(5 downto 1)="10101") or
|
570 |
|
|
p1_ir_op="001010" or p1_ir_op="001011"
|
571 |
|
|
else '0';
|
572 |
|
|
p1_ac.arith_unsigned <= p1_ac.use_slt and p1_ir_reg(0);
|
573 |
|
|
|
574 |
|
|
p1_ac.use_logic(0) <= '1' when (p1_op_special='1' and p1_ir_fn(5 downto 3)/="000") or
|
575 |
|
|
-- all immediate arith and logic
|
576 |
|
|
p1_ir_op(31 downto 29)="001"
|
577 |
|
|
else '0';
|
578 |
|
|
p1_ac.use_logic(1) <= '1' when (p1_op_special='1' and p1_ir_fn="100111") else '0';
|
579 |
|
|
|
580 |
|
|
p1_ac.use_arith <= '1' when p1_ir_op(31 downto 28)="0010" or
|
581 |
|
|
(p1_op_special='1' and
|
582 |
|
|
(p1_ir_fn(5 downto 2)="1000" or
|
583 |
|
|
p1_ir_fn(5 downto 2)="1010"))
|
584 |
|
|
else '0';
|
585 |
|
|
|
586 |
|
|
-- selection of 2nd internal alu operand: {i2, /i2, i2<<16, 0x0}
|
587 |
|
|
p1_ac.neg_sel(1)<= '1' when p1_ir_op(29 downto 26) = "1111" else '0';
|
588 |
|
|
p1_ac.neg_sel(0)<= '1' when p1_ir_op="001010" or
|
589 |
|
|
p1_ir_op="001011" or
|
590 |
|
|
p1_ir_op(31 downto 28)="0001" or
|
591 |
|
|
(p1_op_special='1' and
|
592 |
|
|
(p1_ir_fn="100010" or
|
593 |
|
|
p1_ir_fn="100011" or
|
594 |
|
|
p1_ir_fn(5 downto 2)="1010"))
|
595 |
|
|
else '0';
|
596 |
|
|
p1_ac.cy_in <= p1_ac.neg_sel(0);
|
597 |
|
|
|
598 |
|
|
p1_ac.shift_sel <= p1_ir_fn(1 downto 0);
|
599 |
|
|
|
600 |
|
|
p1_ac.logic_sel <= "00" when (p1_op_special='1' and p1_ir_fn="100100") else
|
601 |
|
|
"01" when (p1_op_special='1' and p1_ir_fn="100101") else
|
602 |
|
|
"10" when (p1_op_special='1' and p1_ir_fn="100110") else
|
603 |
|
|
"01" when (p1_op_special='1' and p1_ir_fn="100111") else
|
604 |
|
|
"00" when (p1_ir_op="001100") else
|
605 |
|
|
"01" when (p1_ir_op="001101") else
|
606 |
|
|
"10" when (p1_ir_op="001110") else
|
607 |
|
|
"11";
|
608 |
|
|
|
609 |
|
|
p1_ac.shift_amount <= p1_ir_reg(10 downto 6) when p1_ir_fn(2)='0' else p1_rs(4 downto 0);
|
610 |
|
|
|
611 |
|
|
--------------------------------------------------------------------------------
|
612 |
|
|
|
613 |
|
|
-- Stage 1 pipeline register. Involved in ALU control.
|
614 |
|
|
pipeline_stage1_register:
|
615 |
|
|
process(clk)
|
616 |
|
|
begin
|
617 |
|
|
if clk'event and clk='1' then
|
618 |
|
|
if reset='1' then
|
619 |
|
|
p1_rbank_rs_hazard <= '0';
|
620 |
|
|
p1_rbank_rt_hazard <= '0';
|
621 |
|
|
elsif stall_pipeline='0' then
|
622 |
|
|
p1_rbank_rs_hazard <= p0_rbank_rs_hazard;
|
623 |
|
|
p1_rbank_rt_hazard <= p0_rbank_rt_hazard;
|
624 |
|
|
end if;
|
625 |
|
|
end if;
|
626 |
|
|
end process pipeline_stage1_register;
|
627 |
|
|
|
628 |
|
|
-- Stage 2 pipeline register. Involved in memory loads.
|
629 |
|
|
-- This register deals with two kinds of stalls:
|
630 |
|
|
-- * When the pipeline stalls because of a load interlock, this register is
|
631 |
|
|
-- allowed to update so that the load operation can complete while the rest of
|
632 |
|
|
-- the pipeline is frozen.
|
633 |
|
|
-- * When the stall is caused by any other reason, this register freezes with
|
634 |
|
|
-- the rest of the machine.
|
635 |
|
|
pipeline_stage2_register:
|
636 |
|
|
process(clk)
|
637 |
|
|
begin
|
638 |
|
|
if clk'event and clk='1' then
|
639 |
|
|
if reset='1' then
|
640 |
|
|
p2_exception <= '0';
|
641 |
|
|
p2_do_load <= '0';
|
642 |
|
|
p2_ld_upper_hword <= '0';
|
643 |
|
|
p2_ld_upper_byte <= '0';
|
644 |
|
|
p2_ld_unsigned <= '0';
|
645 |
|
|
p2_load_target <= "00000";
|
646 |
|
|
elsif stall_pipeline='0' or load_interlock='1' then
|
647 |
|
|
if pipeline_stalled='0' then
|
648 |
|
|
p2_do_load <= p1_do_load;
|
649 |
|
|
else
|
650 |
|
|
p2_do_load <= '0';
|
651 |
|
|
end if;
|
652 |
|
|
p2_load_target <= p1_rd_num;
|
653 |
|
|
p2_ld_upper_hword <= p1_ld_upper_hword;
|
654 |
|
|
p2_ld_upper_byte <= p1_ld_upper_byte;
|
655 |
|
|
p2_ld_unsigned <= p1_ld_unsigned;
|
656 |
|
|
p2_rd_addr <= p1_data_addr(1 downto 0);
|
657 |
|
|
p2_exception <= p1_exception;
|
658 |
|
|
end if;
|
659 |
|
|
end if;
|
660 |
|
|
end process pipeline_stage2_register;
|
661 |
|
|
|
662 |
|
|
--------------------------------------------------------------------------------
|
663 |
|
|
|
664 |
|
|
-- FIXME stall when needed: mem pause, mdiv pause and load interlock
|
665 |
|
|
|
666 |
|
|
pipeline_stall_registers:
|
667 |
|
|
process(clk)
|
668 |
|
|
begin
|
669 |
|
|
if clk'event and clk='1' then
|
670 |
|
|
if reset='1' then
|
671 |
|
|
pipeline_stalled <= '0';
|
672 |
|
|
pipeline_interlocked <= '0';
|
673 |
|
|
else
|
674 |
|
|
if stall_pipeline='1' then
|
675 |
|
|
pipeline_stalled <= '1';
|
676 |
|
|
else
|
677 |
|
|
pipeline_stalled <= '0';
|
678 |
|
|
end if;
|
679 |
|
|
if load_interlock='1' then
|
680 |
|
|
pipeline_interlocked <= '1';
|
681 |
|
|
else
|
682 |
|
|
pipeline_interlocked <= '0';
|
683 |
|
|
end if;
|
684 |
|
|
end if;
|
685 |
|
|
end if;
|
686 |
|
|
end process pipeline_stall_registers;
|
687 |
|
|
|
688 |
|
|
-- FIXME make sure this combinational will not have bad glitches
|
689 |
|
|
stall_pipeline <= mem_wait or load_interlock;
|
690 |
|
|
|
691 |
|
|
|
692 |
|
|
-- FIXME load interlock should happen only if the instruction following
|
693 |
|
|
-- the load actually uses the load target register. Something like this:
|
694 |
|
|
-- (p1_do_load='1' and (p1_rd_num=p0_rs_num or p1_rd_num=p0_rt_num))
|
695 |
|
|
load_interlock <= '1' when (p1_do_load='1' and pipeline_stalled='0') else '0';
|
696 |
|
|
|
697 |
|
|
--------------------------------------------------------------------------------
|
698 |
|
|
|
699 |
|
|
p1_data_offset(31 downto 16) <= (others => p1_data_imm(15));
|
700 |
|
|
p1_data_offset(15 downto 0) <= p1_data_imm(15 downto 0);
|
701 |
|
|
|
702 |
|
|
p1_data_addr <= p1_rs + p1_data_offset;
|
703 |
|
|
|
704 |
|
|
--------------------------------------------------------------------------------
|
705 |
|
|
|
706 |
|
|
-- byte_we is a function of the write size and alignment
|
707 |
|
|
-- size = {00=1,01=2,11=4}; we 3 is MSB, 0 is LSB; big endian => 00 is msb
|
708 |
|
|
p1_we_control <= pipeline_stalled & p1_do_store & p1_store_size & p1_data_addr(1 downto 0);
|
709 |
|
|
|
710 |
|
|
with p1_we_control select byte_we <=
|
711 |
|
|
"1000" when "010000", -- SB %0
|
712 |
|
|
"0100" when "010001", -- SB %1
|
713 |
|
|
"0010" when "010010", -- SB %2
|
714 |
|
|
"0001" when "010011", -- SB %3
|
715 |
|
|
"1100" when "010100", -- SH %0
|
716 |
|
|
"0011" when "010110", -- SH %2
|
717 |
|
|
"1111" when "011100", -- SW %4
|
718 |
|
|
"0000" when others; -- all other combinations are spurious so don't write
|
719 |
|
|
|
720 |
|
|
-- Data to be stored always comes straight from the reg bank, but it needs to
|
721 |
|
|
-- be shifted so that the LSB is aligned to the write address:
|
722 |
|
|
|
723 |
|
|
data_wr(7 downto 0) <= p1_rt(7 downto 0);
|
724 |
|
|
|
725 |
|
|
with p1_we_control select data_wr(15 downto 8) <=
|
726 |
|
|
p1_rt( 7 downto 0) when "010010", -- SB %2
|
727 |
|
|
p1_rt(15 downto 8) when others;
|
728 |
|
|
|
729 |
|
|
with p1_we_control select data_wr(23 downto 16) <=
|
730 |
|
|
p1_rt( 7 downto 0) when "010001", -- SB %1
|
731 |
|
|
p1_rt( 7 downto 0) when "010100", -- SH %0
|
732 |
|
|
p1_rt(23 downto 16) when others;
|
733 |
|
|
|
734 |
|
|
with p1_we_control select data_wr(31 downto 24) <=
|
735 |
|
|
p1_rt( 7 downto 0) when "010000", -- SB %0
|
736 |
|
|
p1_rt(15 downto 8) when "010100", -- SH %0
|
737 |
|
|
p1_rt(31 downto 24) when others;
|
738 |
|
|
|
739 |
|
|
|
740 |
|
|
--##############################################################################
|
741 |
|
|
-- CP0 (what little is implemented of it)
|
742 |
|
|
|
743 |
|
|
process(clk)
|
744 |
|
|
begin
|
745 |
|
|
if clk'event and clk='1' then
|
746 |
|
|
if reset='1' then
|
747 |
|
|
-- "10" => mode=kernel; ints=disabled
|
748 |
|
|
cp0_status <= "10";
|
749 |
|
|
else
|
750 |
|
|
-- no need to check for stall cycles when loading these
|
751 |
|
|
if p1_set_cp0='1' then
|
752 |
|
|
-- FIXME check for CP0 reg index
|
753 |
|
|
cp0_status <= p1_rs(cp0_status'high downto 0);
|
754 |
|
|
end if;
|
755 |
|
|
if p1_exception='1' then
|
756 |
|
|
cp0_epc <= p0_pc_reg;
|
757 |
|
|
end if;
|
758 |
|
|
end if;
|
759 |
|
|
end if;
|
760 |
|
|
end process;
|
761 |
|
|
|
762 |
|
|
-- FIXME the mux should mask to zero for any unused reg index
|
763 |
|
|
cp0_reg_read <= X"0000000" & "00" & cp0_status when p1_rd_num="01100" else
|
764 |
|
|
cp0_epc & "00";
|
765 |
|
|
|
766 |
|
|
end architecture rtl;
|
767 |
|
|
|
768 |
|
|
--------------------------------------------------------------------------------
|
769 |
|
|
-- Implementation notes
|
770 |
|
|
--------------------------------------------------------------------------------
|
771 |
|
|
-- @note1 :
|
772 |
|
|
--
|
773 |
|
|
-- This is the meaning of these two signals:
|
774 |
|
|
-- pipeline_stalled & pipeline_interlocked =>
|
775 |
|
|
-- "00" => normal state
|
776 |
|
|
-- "01" => normal state (makes for easier decoding)
|
777 |
|
|
-- "10" => all stages of pipeline stalled, including rbank
|
778 |
|
|
-- "11" => all stages of pipeline stalled, except reg bank write port
|
779 |
|
|
--
|
780 |
|
|
-- Just to clarify, 'stage X stalled' here means that the registers named
|
781 |
|
|
-- pX_* don't load.
|
782 |
|
|
--
|
783 |
|
|
-- The register bank WE is enabled when the pipeline is not stalled and when
|
784 |
|
|
-- it is stalled because of a load interlock; so that in case of interlock the
|
785 |
|
|
-- load operation can complete while the rest of the pipeline is frozen.
|
786 |
|
|
--------------------------------------------------------------------------------
|