1 |
2 |
ja_rd |
--------------------------------------------------------------------------------
|
2 |
|
|
-- ion_cpu.vhdl -- MIPS-I(tm) compatible CPU core
|
3 |
|
|
--------------------------------------------------------------------------------
|
4 |
|
|
-- project: ION (http://www.opencores.org/project,ion_cpu)
|
5 |
|
|
-- author: Jose A. Ruiz (ja_rd@hotmail.com)
|
6 |
|
|
-- created: Jan/11/2011
|
7 |
101 |
ja_rd |
-- last modified: Mar/26/2011 (ja_rd@hotmail.com)
|
8 |
2 |
ja_rd |
--------------------------------------------------------------------------------
|
9 |
|
|
-- Software placed into the public domain by the author. Use under the terms of
|
10 |
|
|
-- the GPL.
|
11 |
|
|
-- Software 'as is' without warranty. Author liable for nothing.
|
12 |
|
|
--
|
13 |
|
|
--------------------------------------------------------------------------------
|
14 |
|
|
--### MIPS-I things not implemented
|
15 |
28 |
ja_rd |
-- # Invalid instruction trapping:
|
16 |
30 |
ja_rd |
-- * invalid opcodes do trap but the logic that prevents bad opcodes from
|
17 |
|
|
-- having side affects has not been tested yet.
|
18 |
2 |
ja_rd |
-- # Kernel/user status
|
19 |
30 |
ja_rd |
-- # RTE instruction (or ERET)
|
20 |
2 |
ja_rd |
-- # Most of the CP0 registers and of course all of the CP1
|
21 |
|
|
-- # External interrupts
|
22 |
|
|
--
|
23 |
|
|
--### Things implemented but not tested
|
24 |
62 |
ja_rd |
-- # Memory pause input -- only tested with stub cache
|
25 |
2 |
ja_rd |
--
|
26 |
|
|
--### Things with provisional implementation
|
27 |
|
|
--
|
28 |
|
|
-- 1.- Load interlocks: the pipeline is stalled for every load instruction, even
|
29 |
|
|
-- if the target register is not used in the following instruction. So that
|
30 |
|
|
-- every load takes two cycles.
|
31 |
|
|
-- The interlock logic should check register indices.
|
32 |
|
|
--
|
33 |
|
|
--------------------------------------------------------------------------------
|
34 |
|
|
|
35 |
|
|
library ieee;
|
36 |
|
|
use ieee.std_logic_1164.all;
|
37 |
|
|
use ieee.std_logic_arith.all;
|
38 |
|
|
use ieee.std_logic_unsigned.all;
|
39 |
|
|
use work.mips_pkg.all;
|
40 |
|
|
|
41 |
|
|
entity mips_cpu is
|
42 |
|
|
generic(
|
43 |
62 |
ja_rd |
-- Reset vector address minus 4
|
44 |
|
|
RESET_VECTOR_M4 : t_word := RESET_VECTOR_M4;
|
45 |
|
|
-- Trap vector address
|
46 |
|
|
TRAP_VECTOR : t_word := TRAP_VECTOR;
|
47 |
|
|
-- Type of memory to be used for register bank in xilinx HW
|
48 |
|
|
XILINX_REGBANK : string := "distributed" -- {distributed|block}
|
49 |
2 |
ja_rd |
);
|
50 |
|
|
port(
|
51 |
|
|
clk : in std_logic;
|
52 |
|
|
reset : in std_logic;
|
53 |
|
|
interrupt : in std_logic;
|
54 |
|
|
|
55 |
96 |
ja_rd |
data_addr : out std_logic_vector(31 downto 0);
|
56 |
|
|
|
57 |
2 |
ja_rd |
data_rd : in std_logic_vector(31 downto 0);
|
58 |
|
|
data_rd_vma : out std_logic;
|
59 |
96 |
ja_rd |
|
60 |
|
|
byte_we : out std_logic_vector(3 downto 0);
|
61 |
|
|
data_wr : out std_logic_vector(31 downto 0);
|
62 |
2 |
ja_rd |
|
63 |
|
|
code_rd_addr : out std_logic_vector(31 downto 2);
|
64 |
|
|
code_rd : in std_logic_vector(31 downto 0);
|
65 |
|
|
code_rd_vma : out std_logic;
|
66 |
101 |
ja_rd |
|
67 |
|
|
cache_enable : out std_logic;
|
68 |
|
|
ic_invalidate : out std_logic;
|
69 |
96 |
ja_rd |
|
70 |
2 |
ja_rd |
mem_wait : in std_logic
|
71 |
|
|
);
|
72 |
|
|
end; --entity mips_cpu
|
73 |
|
|
|
74 |
|
|
architecture rtl of mips_cpu is
|
75 |
|
|
|
76 |
|
|
--------------------------------------------------------------------------------
|
77 |
|
|
-- Pipeline stage 0
|
78 |
|
|
|
79 |
|
|
signal p0_pc_reg : t_pc;
|
80 |
8 |
ja_rd |
signal p0_pc_restart : t_pc;
|
81 |
2 |
ja_rd |
signal p0_pc_incremented : t_pc;
|
82 |
|
|
signal p0_pc_jump : t_pc;
|
83 |
|
|
signal p0_pc_branch : t_pc;
|
84 |
|
|
signal p0_pc_target : t_pc;
|
85 |
|
|
signal p0_pc_next : t_pc;
|
86 |
|
|
signal p0_rs_num : t_regnum;
|
87 |
|
|
signal p0_rt_num : t_regnum;
|
88 |
|
|
signal p0_jump_cond_value : std_logic;
|
89 |
|
|
signal p0_rbank_rs_hazard : std_logic;
|
90 |
|
|
signal p0_rbank_rt_hazard : std_logic;
|
91 |
28 |
ja_rd |
signal p0_uses_rs1 : std_logic;
|
92 |
|
|
signal p0_uses_rs2 : std_logic;
|
93 |
2 |
ja_rd |
|
94 |
28 |
ja_rd |
signal p1_rs1_hazard : std_logic;
|
95 |
|
|
signal p1_rs2_hazard : std_logic;
|
96 |
|
|
|
97 |
2 |
ja_rd |
--------------------------------------------------------------------------------
|
98 |
|
|
-- Pipeline stage 1
|
99 |
|
|
|
100 |
|
|
|
101 |
|
|
signal p1_rbank : t_rbank := (others => X"00000000");
|
102 |
|
|
|
103 |
|
|
-- IMPORTANT: This attribute is used by Xilinx tools to select how to implement
|
104 |
|
|
-- the register bank. If we don't use it, by default XST would infer 2 BRAMs for
|
105 |
|
|
-- the 1024-bit 3-port reg bank, which you probably don't want.
|
106 |
|
|
-- This can take the values {distributed|block}.
|
107 |
|
|
attribute ram_style : string;
|
108 |
30 |
ja_rd |
attribute ram_style of p1_rbank : signal is XILINX_REGBANK;
|
109 |
2 |
ja_rd |
|
110 |
|
|
signal p1_rs, p1_rt : t_word;
|
111 |
|
|
signal p1_rs_rbank : t_word;
|
112 |
|
|
signal p1_rt_rbank : t_word;
|
113 |
|
|
signal p1_rbank_forward : t_word;
|
114 |
|
|
signal p1_rd_num : t_regnum;
|
115 |
28 |
ja_rd |
signal p1_c0_rs_num : t_regnum;
|
116 |
2 |
ja_rd |
signal p1_rbank_wr_addr : t_regnum;
|
117 |
|
|
signal p1_rbank_we : std_logic;
|
118 |
|
|
signal p1_rbank_wr_data : t_word;
|
119 |
|
|
signal p1_alu_inp1 : t_word;
|
120 |
|
|
signal p1_alu_inp2 : t_word;
|
121 |
|
|
signal p1_alu_outp : t_word;
|
122 |
|
|
-- ALU control inputs (shortened name for brevity in expressions)
|
123 |
|
|
signal p1_ac : t_alu_control;
|
124 |
|
|
-- ALU flag outputs (comparison results)
|
125 |
|
|
signal p1_alu_flags : t_alu_flags;
|
126 |
|
|
-- immediate data, sign- or zero-extended as required by IR
|
127 |
|
|
signal p1_data_imm : t_word;
|
128 |
|
|
signal p1_branch_offset : t_pc;
|
129 |
|
|
signal p1_branch_offset_sex:std_logic_vector(31 downto 18);
|
130 |
|
|
signal p1_rbank_rs_hazard : std_logic;
|
131 |
|
|
signal p1_rbank_rt_hazard : std_logic;
|
132 |
|
|
signal p1_jump_type_set0 : std_logic_vector(1 downto 0);
|
133 |
|
|
signal p1_jump_type_set1 : std_logic_vector(1 downto 0);
|
134 |
|
|
signal p1_ir_reg : std_logic_vector(31 downto 0);
|
135 |
|
|
signal p1_ir_op : std_logic_vector(31 downto 26);
|
136 |
|
|
signal p1_ir_fn : std_logic_vector(5 downto 0);
|
137 |
|
|
signal p1_op_special : std_logic;
|
138 |
|
|
signal p1_exception : std_logic;
|
139 |
|
|
signal p1_do_reg_jump : std_logic;
|
140 |
|
|
signal p1_do_zero_ext_imm : std_logic;
|
141 |
|
|
signal p1_set_cp0 : std_logic;
|
142 |
|
|
signal p1_get_cp0 : std_logic;
|
143 |
|
|
signal p1_alu_op2_sel : std_logic_vector(1 downto 0);
|
144 |
|
|
signal p1_alu_op2_sel_set0: std_logic_vector(1 downto 0);
|
145 |
|
|
signal p1_alu_op2_sel_set1: std_logic_vector(1 downto 0);
|
146 |
|
|
signal p1_do_load : std_logic;
|
147 |
|
|
signal p1_do_store : std_logic;
|
148 |
|
|
signal p1_store_size : std_logic_vector(1 downto 0);
|
149 |
|
|
signal p1_we_control : std_logic_vector(5 downto 0);
|
150 |
|
|
signal p1_load_alu : std_logic;
|
151 |
|
|
signal p1_load_alu_set0 : std_logic;
|
152 |
|
|
signal p1_load_alu_set1 : std_logic;
|
153 |
|
|
signal p1_ld_upper_hword : std_logic;
|
154 |
|
|
signal p1_ld_upper_byte : std_logic;
|
155 |
|
|
signal p1_ld_unsigned : std_logic;
|
156 |
|
|
signal p1_jump_type : std_logic_vector(1 downto 0);
|
157 |
|
|
signal p1_link : std_logic;
|
158 |
|
|
signal p1_jump_cond_sel : std_logic_vector(2 downto 0);
|
159 |
|
|
signal p1_data_addr : t_addr;
|
160 |
|
|
signal p1_data_offset : t_addr;
|
161 |
|
|
|
162 |
12 |
ja_rd |
signal p1_muldiv_result : t_word;
|
163 |
|
|
signal p1_muldiv_func : t_mult_function;
|
164 |
|
|
signal p1_muldiv_running : std_logic;
|
165 |
|
|
signal p1_muldiv_started : std_logic;
|
166 |
|
|
signal p1_muldiv_stall : std_logic;
|
167 |
|
|
|
168 |
23 |
ja_rd |
signal p1_unknown_opcode : std_logic;
|
169 |
12 |
ja_rd |
|
170 |
2 |
ja_rd |
--------------------------------------------------------------------------------
|
171 |
|
|
-- Pipeline stage 2
|
172 |
|
|
|
173 |
12 |
ja_rd |
signal p2_muldiv_started : std_logic;
|
174 |
2 |
ja_rd |
signal p2_exception : std_logic;
|
175 |
|
|
signal p2_rd_addr : std_logic_vector(1 downto 0);
|
176 |
|
|
signal p2_rd_mux_control : std_logic_vector(3 downto 0);
|
177 |
|
|
signal p2_load_target : t_regnum;
|
178 |
|
|
signal p2_do_load : std_logic;
|
179 |
|
|
signal p2_ld_upper_hword : std_logic;
|
180 |
|
|
signal p2_ld_upper_byte : std_logic;
|
181 |
|
|
signal p2_ld_unsigned : std_logic;
|
182 |
|
|
signal p2_wback_mux_sel : std_logic_vector(1 downto 0);
|
183 |
|
|
signal p2_data_word_rd : t_word;
|
184 |
|
|
signal p2_data_word_ext : std_logic;
|
185 |
|
|
|
186 |
|
|
--------------------------------------------------------------------------------
|
187 |
|
|
-- Global control signals
|
188 |
|
|
|
189 |
|
|
signal load_interlock : std_logic;
|
190 |
|
|
signal stall_pipeline : std_logic;
|
191 |
|
|
-- pipeline is stalled for any reason
|
192 |
|
|
signal pipeline_stalled : std_logic;
|
193 |
46 |
ja_rd |
|
194 |
|
|
signal stalled_memwait : std_logic;
|
195 |
|
|
signal stalled_muldiv : std_logic;
|
196 |
2 |
ja_rd |
-- pipeline is stalled because of a load instruction interlock
|
197 |
46 |
ja_rd |
signal stalled_interlock : std_logic;
|
198 |
2 |
ja_rd |
|
199 |
101 |
ja_rd |
signal reset_done : std_logic;
|
200 |
46 |
ja_rd |
|
201 |
2 |
ja_rd |
--------------------------------------------------------------------------------
|
202 |
|
|
-- CP0 registers and signals
|
203 |
|
|
|
204 |
|
|
-- CP0[12]: status register
|
205 |
|
|
signal cp0_status : std_logic_vector(1 downto 0);
|
206 |
101 |
ja_rd |
-- CP0[12]: status register, cache control
|
207 |
|
|
signal cp0_cache_control : std_logic_vector(17 downto 16);
|
208 |
2 |
ja_rd |
-- Output of CP0 register bank (only a few regs are implemented)
|
209 |
|
|
signal cp0_reg_read : t_word;
|
210 |
|
|
-- CP0[14]: EPC register (PC value saved at exceptions)
|
211 |
|
|
signal cp0_epc : t_pc;
|
212 |
28 |
ja_rd |
-- CP0[13]: 'Cause' register (cause and attributes of exception)
|
213 |
|
|
signal cp0_cause : t_word;
|
214 |
|
|
signal cp0_in_delay_slot : std_logic;
|
215 |
|
|
signal cp0_cause_bd : std_logic;
|
216 |
|
|
signal cp0_cause_ce : std_logic_vector(1 downto 0);
|
217 |
|
|
signal cp0_cause_exc_code : std_logic_vector(4 downto 0);
|
218 |
2 |
ja_rd |
|
219 |
|
|
begin
|
220 |
|
|
|
221 |
|
|
--##############################################################################
|
222 |
|
|
-- Register bank & datapath
|
223 |
|
|
|
224 |
|
|
-- Register indices are 'decoded' out of the instruction word BEFORE loading IR
|
225 |
|
|
p0_rs_num <= std_logic_vector(code_rd(25 downto 21));
|
226 |
|
|
with p1_ir_reg(31 downto 26) select p1_rd_num <=
|
227 |
|
|
p1_ir_reg(15 downto 11) when "000000",
|
228 |
|
|
p1_ir_reg(20 downto 16) when others;
|
229 |
|
|
|
230 |
|
|
p0_rt_num <= std_logic_vector(code_rd(20 downto 16)); -- also called rs2 in the docs
|
231 |
|
|
|
232 |
|
|
--------------------------------------------------------------------------------
|
233 |
|
|
-- Data input shifter & masker (LB,LBU,LH,LHU,LW)
|
234 |
|
|
|
235 |
|
|
p2_rd_mux_control <= p2_ld_upper_hword & p2_ld_upper_byte & p2_rd_addr;
|
236 |
|
|
|
237 |
|
|
-- Extension for unused bits will be zero or the sign (bit 7 or bit 15)
|
238 |
35 |
ja_rd |
p2_data_word_ext <= '0' when p2_ld_unsigned='1' else
|
239 |
83 |
ja_rd |
-- LH
|
240 |
|
|
data_rd(31) when p2_ld_upper_byte='1' and p2_rd_addr="00" else
|
241 |
|
|
data_rd(15) when p2_ld_upper_byte='1' and p2_rd_addr="10" else
|
242 |
|
|
-- LB
|
243 |
|
|
data_rd(7) when p2_rd_addr="11" else
|
244 |
2 |
ja_rd |
data_rd(15) when p2_rd_addr="10" else
|
245 |
83 |
ja_rd |
data_rd(23) when p2_rd_addr="01" else
|
246 |
|
|
data_rd(31);
|
247 |
2 |
ja_rd |
|
248 |
83 |
ja_rd |
-- data_rd(15) when p2_ld_upper_byte='1' else
|
249 |
|
|
-- data_rd(7) when p2_rd_addr="11" else
|
250 |
|
|
-- data_rd(15) when p2_rd_addr="10" else
|
251 |
|
|
-- data_rd(23);
|
252 |
|
|
|
253 |
2 |
ja_rd |
-- byte 0 may come from any of the 4 bytes of the input word
|
254 |
|
|
with p2_rd_mux_control select p2_data_word_rd(7 downto 0) <=
|
255 |
|
|
data_rd(31 downto 24) when "0000",
|
256 |
|
|
data_rd(23 downto 16) when "0001",
|
257 |
|
|
data_rd(23 downto 16) when "0100",
|
258 |
|
|
data_rd(15 downto 8) when "0010",
|
259 |
|
|
data_rd( 7 downto 0) when others;
|
260 |
|
|
|
261 |
|
|
-- byte 1 may come from input bytes 1 or 3 or may be extended for LB, LBU
|
262 |
|
|
with p2_rd_mux_control select p2_data_word_rd(15 downto 8) <=
|
263 |
|
|
data_rd(31 downto 24) when "0100",
|
264 |
|
|
data_rd(15 downto 8) when "0110",
|
265 |
|
|
data_rd(15 downto 8) when "1100",
|
266 |
|
|
data_rd(15 downto 8) when "1101",
|
267 |
|
|
data_rd(15 downto 8) when "1110",
|
268 |
|
|
data_rd(15 downto 8) when "1111",
|
269 |
|
|
(others => p2_data_word_ext) when others;
|
270 |
|
|
|
271 |
|
|
-- bytes 2,3 come straight from input or are extended for LH,LHU
|
272 |
|
|
with p2_ld_upper_hword select p2_data_word_rd(31 downto 16) <=
|
273 |
46 |
ja_rd |
(others => p2_data_word_ext) when '0',
|
274 |
|
|
data_rd(31 downto 16) when others;
|
275 |
2 |
ja_rd |
|
276 |
|
|
-- Select which data is to be written back to the reg bank and where
|
277 |
|
|
p1_rbank_wr_addr <= p1_rd_num when p2_do_load='0' and p1_link='0' else
|
278 |
|
|
"11111" when p2_do_load='0' and p1_link='1' else
|
279 |
|
|
p2_load_target;
|
280 |
|
|
|
281 |
|
|
p2_wback_mux_sel <=
|
282 |
|
|
"00" when p2_do_load='0' and p1_get_cp0='0' and p1_link='0' else
|
283 |
|
|
"01" when p2_do_load='1' and p1_get_cp0='0' and p1_link='0' else
|
284 |
|
|
"10" when p2_do_load='0' and p1_get_cp0='1' and p1_link='0' else
|
285 |
|
|
"11";
|
286 |
|
|
|
287 |
|
|
with (p2_wback_mux_sel) select p1_rbank_wr_data <=
|
288 |
|
|
p1_alu_outp when "00",
|
289 |
|
|
p2_data_word_rd when "01",
|
290 |
|
|
p0_pc_incremented & "00" when "11",
|
291 |
|
|
cp0_reg_read when others;
|
292 |
|
|
|
293 |
|
|
p1_rbank_we <= '1' when (p2_do_load='1' or p1_load_alu='1' or
|
294 |
|
|
p1_link='1' or p1_get_cp0='1') and
|
295 |
35 |
ja_rd |
-- If target register is $zero, ignore write
|
296 |
2 |
ja_rd |
p1_rbank_wr_addr/="00000" and
|
297 |
35 |
ja_rd |
-- if the cache controller keeps the cpu stopped, do
|
298 |
|
|
-- not writeback
|
299 |
|
|
mem_wait='0' and
|
300 |
46 |
ja_rd |
-- if stalled because of muldiv, block writeback
|
301 |
|
|
stalled_muldiv='0' and --@note1
|
302 |
2 |
ja_rd |
-- on exception, abort next instruction (by preventing
|
303 |
|
|
-- regbank writeback).
|
304 |
|
|
p2_exception='0'
|
305 |
|
|
else '0';
|
306 |
|
|
|
307 |
|
|
-- Register bank as triple-port RAM. Should synth to 2 BRAMs unless you use
|
308 |
|
|
-- synth attributes to prevent it (see 'ram_style' attribute above) or your
|
309 |
|
|
-- FPGA has 3-port BRAMS, or has none.
|
310 |
|
|
synchronous_reg_bank:
|
311 |
|
|
process(clk)
|
312 |
|
|
begin
|
313 |
|
|
if clk'event and clk='1' then
|
314 |
46 |
ja_rd |
if p1_rbank_we='1' then
|
315 |
2 |
ja_rd |
p1_rbank(conv_integer(p1_rbank_wr_addr)) <= p1_rbank_wr_data;
|
316 |
|
|
end if;
|
317 |
46 |
ja_rd |
-- the rbank read port loads in the same conditions as the IR: don't
|
318 |
|
|
-- update Rs or Rt if the pipeline is frozen
|
319 |
|
|
if stall_pipeline='0' then
|
320 |
|
|
p1_rt_rbank <= p1_rbank(conv_integer(p0_rt_num));
|
321 |
|
|
p1_rs_rbank <= p1_rbank(conv_integer(p0_rs_num));
|
322 |
|
|
end if;
|
323 |
2 |
ja_rd |
end if;
|
324 |
|
|
end process synchronous_reg_bank;
|
325 |
|
|
|
326 |
|
|
-- Register writeback data in case it needs to be forwarded.
|
327 |
|
|
data_forward_register:
|
328 |
|
|
process(clk)
|
329 |
|
|
begin
|
330 |
|
|
if clk'event and clk='1' then
|
331 |
|
|
if p1_rbank_we='1' then -- no need to check for stall cycles
|
332 |
|
|
p1_rbank_forward <= p1_rbank_wr_data;
|
333 |
|
|
end if;
|
334 |
|
|
end if;
|
335 |
|
|
end process data_forward_register;
|
336 |
|
|
|
337 |
|
|
-- Bypass sync RAM if we're reading and writing to the same address. This saves
|
338 |
|
|
-- 1 stall cycle and fixes the data hazard.
|
339 |
|
|
p0_rbank_rs_hazard <= '1' when p1_rbank_wr_addr=p0_rs_num and p1_rbank_we='1'
|
340 |
|
|
else '0';
|
341 |
|
|
p0_rbank_rt_hazard <= '1' when p1_rbank_wr_addr=p0_rt_num and p1_rbank_we='1'
|
342 |
|
|
else '0';
|
343 |
|
|
|
344 |
|
|
p1_rs <= p1_rs_rbank when p1_rbank_rs_hazard='0' else p1_rbank_forward;
|
345 |
|
|
p1_rt <= p1_rt_rbank when p1_rbank_rt_hazard='0' else p1_rbank_forward;
|
346 |
|
|
|
347 |
|
|
-- Zero extension/Sign extension for instruction immediate data
|
348 |
|
|
p1_data_imm(15 downto 0) <= p1_ir_reg(15 downto 0);
|
349 |
|
|
|
350 |
|
|
with p1_do_zero_ext_imm select p1_data_imm(31 downto 16) <=
|
351 |
|
|
(others => '0') when '1',
|
352 |
|
|
(others => p1_ir_reg(15)) when others;
|
353 |
|
|
|
354 |
|
|
|
355 |
|
|
--------------------------------------------------------------------------------
|
356 |
|
|
-- ALU & ALU input multiplexors
|
357 |
|
|
|
358 |
|
|
p1_alu_inp1 <= p1_rs;
|
359 |
|
|
|
360 |
|
|
with p1_alu_op2_sel select p1_alu_inp2 <=
|
361 |
12 |
ja_rd |
p1_data_imm when "11",
|
362 |
23 |
ja_rd |
p1_muldiv_result when "01",
|
363 |
|
|
--p1_muldiv_result when "10", -- FIXME mux input wasted!
|
364 |
12 |
ja_rd |
p1_rt when others;
|
365 |
2 |
ja_rd |
|
366 |
|
|
alu_inst : entity work.mips_alu
|
367 |
|
|
port map (
|
368 |
|
|
clk => clk,
|
369 |
|
|
reset => reset,
|
370 |
|
|
ac => p1_ac,
|
371 |
|
|
flags => p1_alu_flags,
|
372 |
|
|
|
373 |
|
|
inp1 => p1_alu_inp1,
|
374 |
|
|
inp2 => p1_alu_inp2,
|
375 |
|
|
outp => p1_alu_outp
|
376 |
|
|
);
|
377 |
|
|
|
378 |
|
|
|
379 |
|
|
--------------------------------------------------------------------------------
|
380 |
|
|
-- Mul/Div block interface
|
381 |
|
|
|
382 |
12 |
ja_rd |
-- Compute the mdiv block function word. If p1_muldiv_func has any value other
|
383 |
|
|
-- than MULT_NOTHING a new mdiv operation will start, truncating whatever other
|
384 |
|
|
-- operation that may have been in course.
|
385 |
|
|
-- So we encode here the function to be performed and make sure the value stays
|
386 |
|
|
-- there for only one cycle (the first ALU cycle of the mul/div instruction).
|
387 |
2 |
ja_rd |
|
388 |
12 |
ja_rd |
-- This will be '1' for all mul/div operations other than NOP...
|
389 |
|
|
p1_muldiv_func(3) <= '1' when p1_op_special='1' and
|
390 |
|
|
p1_ir_fn(5 downto 4)="01" and
|
391 |
|
|
-- ...but only if the mdiv is not already running
|
392 |
|
|
p2_muldiv_started = '0' and
|
393 |
|
|
p1_muldiv_running ='0'
|
394 |
|
|
else '0';
|
395 |
2 |
ja_rd |
|
396 |
12 |
ja_rd |
-- When bit(3) is zero, the rest are zeroed too. Otherwise, they come from IR
|
397 |
|
|
p1_muldiv_func(2 downto 0) <=
|
398 |
|
|
p1_ir_fn(3) & p1_ir_fn(1 downto 0) when p1_muldiv_func(3)='1'
|
399 |
|
|
else "000";
|
400 |
2 |
ja_rd |
|
401 |
12 |
ja_rd |
mult_div: entity work.mips_mult
|
402 |
|
|
port map (
|
403 |
|
|
a => p1_rs,
|
404 |
|
|
b => p1_rt,
|
405 |
|
|
c_mult => p1_muldiv_result,
|
406 |
|
|
pause_out => p1_muldiv_running,
|
407 |
|
|
mult_func => p1_muldiv_func,
|
408 |
|
|
clk => clk,
|
409 |
|
|
reset_in => reset
|
410 |
|
|
);
|
411 |
|
|
|
412 |
|
|
-- Active only for the 1st ALU cycle of any mul/div instruction
|
413 |
|
|
p1_muldiv_started <= '1' when p1_op_special='1' and
|
414 |
|
|
p1_ir_fn(5 downto 3)="011" and
|
415 |
|
|
--
|
416 |
|
|
p1_muldiv_running='0'
|
417 |
|
|
else '0';
|
418 |
|
|
|
419 |
|
|
-- Stall the pipeline to enable mdiv operation completion.
|
420 |
|
|
-- We need p2_muldiv_started to distinguish the cycle before p1_muldiv_running
|
421 |
|
|
-- is asserted and the cycle after it deasserts.
|
422 |
|
|
-- Otherwise we would reexecute the same muldiv endlessly instruction after
|
423 |
|
|
-- deassertion of p1_muldiv_running, since the IR was stalled and still contains
|
424 |
|
|
-- the mul opcode...
|
425 |
|
|
p1_muldiv_stall <= '1' when
|
426 |
|
|
-- Active for the cycle immediately before p1_muldiv_running asserts
|
427 |
|
|
-- and NOT for the cycle after it deasserts
|
428 |
|
|
(p1_muldiv_started='1' and p2_muldiv_started='0') or
|
429 |
|
|
-- Active until operation is complete
|
430 |
|
|
p1_muldiv_running = '1'
|
431 |
|
|
else '0';
|
432 |
|
|
|
433 |
|
|
|
434 |
2 |
ja_rd |
--##############################################################################
|
435 |
|
|
-- PC register and branch logic
|
436 |
|
|
|
437 |
|
|
-- p0_pc_reg will not be incremented on stall cycles
|
438 |
|
|
p0_pc_incremented <= p0_pc_reg + (not stall_pipeline);
|
439 |
|
|
|
440 |
|
|
-- main pc mux: jump or continue
|
441 |
|
|
p0_pc_next <=
|
442 |
|
|
p0_pc_target when
|
443 |
|
|
-- We jump on jump instructions whose condition is met...
|
444 |
6 |
ja_rd |
((p1_jump_type(1)='1' and p0_jump_cond_value='1' and
|
445 |
|
|
-- ...except we abort any jump that follows the victim of an exception
|
446 |
|
|
p2_exception='0') or
|
447 |
|
|
-- We jump on exceptions too...
|
448 |
2 |
ja_rd |
p1_exception='1')
|
449 |
6 |
ja_rd |
-- ... but we only jump at all if the pipeline is not stalled
|
450 |
2 |
ja_rd |
and stall_pipeline='0'
|
451 |
|
|
else p0_pc_incremented;
|
452 |
|
|
|
453 |
|
|
pc_register:
|
454 |
|
|
process(clk)
|
455 |
|
|
begin
|
456 |
|
|
if clk'event and clk='1' then
|
457 |
|
|
if reset='1' then
|
458 |
62 |
ja_rd |
-- reset to <vector>-4 so that 1st fetch addr is <vector>
|
459 |
|
|
p0_pc_reg <= RESET_VECTOR_M4(31 downto 2);
|
460 |
2 |
ja_rd |
else
|
461 |
101 |
ja_rd |
if reset_done='1' then
|
462 |
2 |
ja_rd |
-- p0_pc_reg holds the same value as external sync ram addr register
|
463 |
|
|
p0_pc_reg <= p0_pc_next;
|
464 |
8 |
ja_rd |
-- p0_pc_restart = addr saved to EPC on interrupts (@note2)
|
465 |
28 |
ja_rd |
-- It's the addr of the instruction triggering the exception,
|
466 |
|
|
-- except when the triggering instruction is in a delay slot. In
|
467 |
|
|
-- that case, this is the previous jump instruction address.
|
468 |
|
|
-- I.e. all as per the mips-1 specs.
|
469 |
8 |
ja_rd |
if (p1_jump_type="00" or p0_jump_cond_value='0') then
|
470 |
|
|
p0_pc_restart <= p0_pc_reg;
|
471 |
28 |
ja_rd |
-- remember if we are in a delay slot, in case there's a trap
|
472 |
|
|
cp0_in_delay_slot <= '0'; -- NOT in a delay slot
|
473 |
|
|
else
|
474 |
|
|
cp0_in_delay_slot <= '1'; -- in a delay slot
|
475 |
8 |
ja_rd |
end if;
|
476 |
101 |
ja_rd |
end if;
|
477 |
2 |
ja_rd |
end if;
|
478 |
|
|
end if;
|
479 |
|
|
end process pc_register;
|
480 |
|
|
|
481 |
96 |
ja_rd |
-- Common rd/wr address; lowest 2 bits are output as debugging aid only
|
482 |
|
|
data_addr <= p1_data_addr(31 downto 0);
|
483 |
2 |
ja_rd |
|
484 |
|
|
-- FIXME these two need to pushed behind a register, they are glitch-prone
|
485 |
|
|
data_rd_vma <= p1_do_load and not pipeline_stalled; -- FIXME register
|
486 |
101 |
ja_rd |
code_rd_vma <= (not stall_pipeline) and reset_done; -- FIXME register
|
487 |
2 |
ja_rd |
|
488 |
101 |
ja_rd |
-- reset_done will be asserted after the reset process is finished, when the
|
489 |
|
|
-- CPU can start operating normally.
|
490 |
|
|
-- We only use it to make sure code_rd_vma is not asserted prematurely.
|
491 |
|
|
wait_for_end_of_reset:
|
492 |
|
|
process(clk)
|
493 |
|
|
begin
|
494 |
|
|
if clk'event and clk='1' then
|
495 |
|
|
if reset='1' then
|
496 |
|
|
reset_done <= '0';
|
497 |
|
|
else
|
498 |
|
|
reset_done <= '1';
|
499 |
|
|
end if;
|
500 |
|
|
end if;
|
501 |
|
|
end process wait_for_end_of_reset;
|
502 |
|
|
|
503 |
|
|
|
504 |
2 |
ja_rd |
code_rd_addr <= p0_pc_next;
|
505 |
|
|
|
506 |
|
|
-- compute target of J/JR instructions
|
507 |
|
|
p0_pc_jump <= p1_rs(31 downto 2) when p1_do_reg_jump='1' else
|
508 |
|
|
p0_pc_reg(31 downto 28) & p1_ir_reg(25 downto 0);
|
509 |
|
|
|
510 |
|
|
-- compute target of relative branch instructions
|
511 |
|
|
p1_branch_offset_sex <= (others => p1_ir_reg(15));
|
512 |
|
|
p1_branch_offset <= p1_branch_offset_sex & p1_ir_reg(15 downto 0);
|
513 |
|
|
-- p0_pc_reg is the addr of the instruction in delay slot
|
514 |
|
|
p0_pc_branch <= p0_pc_reg + p1_branch_offset;
|
515 |
|
|
|
516 |
|
|
-- decide which jump target is to be used
|
517 |
62 |
ja_rd |
p0_pc_target <=
|
518 |
|
|
TRAP_VECTOR(31 downto 2) when p1_exception='1' else
|
519 |
|
|
p0_pc_jump when p1_jump_type(0)='1' else
|
520 |
|
|
p0_pc_branch;
|
521 |
2 |
ja_rd |
|
522 |
|
|
|
523 |
|
|
--##############################################################################
|
524 |
|
|
-- Instruction decoding and IR
|
525 |
|
|
|
526 |
|
|
instruction_register:
|
527 |
|
|
process(clk)
|
528 |
|
|
begin
|
529 |
|
|
if clk'event and clk='1' then
|
530 |
|
|
if reset='1' then
|
531 |
|
|
p1_ir_reg <= (others => '0');
|
532 |
|
|
elsif stall_pipeline='0' then
|
533 |
|
|
p1_ir_reg <= code_rd;
|
534 |
|
|
end if;
|
535 |
|
|
end if;
|
536 |
|
|
end process instruction_register;
|
537 |
|
|
|
538 |
|
|
-- 'Extract' main fields from IR, for convenience
|
539 |
|
|
p1_ir_op <= p1_ir_reg(31 downto 26);
|
540 |
|
|
p1_ir_fn <= p1_ir_reg(5 downto 0);
|
541 |
|
|
|
542 |
|
|
-- Decode jump type, if any, for instructions with op/=0
|
543 |
|
|
with p1_ir_op select p1_jump_type_set0 <=
|
544 |
|
|
-- FIXME weed out invalid instructions
|
545 |
|
|
"10" when "000001", -- BLTZ, BGEZ, BLTZAL, BGTZAL
|
546 |
|
|
"11" when "000010", -- J
|
547 |
|
|
"11" when "000011", -- JAL
|
548 |
|
|
"10" when "000100", -- BEQ
|
549 |
|
|
"10" when "000101", -- BNE
|
550 |
|
|
"10" when "000110", -- BLEZ
|
551 |
|
|
"10" when "000111", -- BGTZ
|
552 |
|
|
"00" when others; -- no jump
|
553 |
|
|
|
554 |
|
|
-- Decode jump type, if any, for instructions with op=0
|
555 |
|
|
p1_jump_type_set1 <= "11" when p1_op_special='1' and
|
556 |
|
|
p1_ir_reg(5 downto 1)="00100"
|
557 |
|
|
else "00";
|
558 |
|
|
|
559 |
|
|
-- Decode jump type for the instruction in IR (composite of two formats)
|
560 |
|
|
p1_jump_type <= p1_jump_type_set0 or p1_jump_type_set1;
|
561 |
|
|
|
562 |
|
|
p1_link <= '1' when (p1_ir_op="000000" and p1_ir_reg(5 downto 0)="001001") or
|
563 |
|
|
(p1_ir_op="000001" and p1_ir_reg(20)='1') or
|
564 |
|
|
(p1_ir_op="000011")
|
565 |
|
|
else '0';
|
566 |
|
|
|
567 |
|
|
-- Decode jump condition: encode a mux control signal from IR...
|
568 |
|
|
p1_jump_cond_sel <=
|
569 |
|
|
"001" when p1_ir_op="000001" and p1_ir_reg(16)='0' else -- op1 < 0 BLTZ*
|
570 |
|
|
"101" when p1_ir_op="000001" and p1_ir_reg(16)='1' else -- !(op1 < 0) BNLTZ*
|
571 |
|
|
"010" when p1_ir_op="000100" else -- op1 == op2 BEQ
|
572 |
|
|
"110" when p1_ir_op="000101" else -- !(op1 == op2) BNE
|
573 |
|
|
"011" when p1_ir_op="000110" else -- op1 <= 0 BLEZ
|
574 |
|
|
"111" when p1_ir_op="000111" else -- !(op1 <= 0) BGTZ
|
575 |
|
|
"000"; -- always
|
576 |
|
|
|
577 |
|
|
-- ... and use mux control signal to select the condition value
|
578 |
|
|
with p1_jump_cond_sel select p0_jump_cond_value <=
|
579 |
|
|
p1_alu_flags.inp1_lt_zero when "001",
|
580 |
|
|
not p1_alu_flags.inp1_lt_zero when "101",
|
581 |
|
|
p1_alu_flags.inp1_eq_inp2 when "010",
|
582 |
|
|
not p1_alu_flags.inp1_eq_inp2 when "110",
|
583 |
|
|
(p1_alu_flags.inp1_lt_inp2 or
|
584 |
|
|
p1_alu_flags.inp1_eq_inp2) when "011",
|
585 |
|
|
not (p1_alu_flags.inp1_lt_inp2 or
|
586 |
|
|
p1_alu_flags.inp1_eq_inp2) when "111",
|
587 |
|
|
'1' when others;
|
588 |
|
|
|
589 |
|
|
-- Decode instructions that launch exceptions
|
590 |
23 |
ja_rd |
p1_exception <= '1' when
|
591 |
|
|
(p1_op_special='1' and p1_ir_reg(5 downto 1)="00110") or
|
592 |
|
|
p1_unknown_opcode='1'
|
593 |
|
|
else '0';
|
594 |
2 |
ja_rd |
|
595 |
|
|
-- Decode MTC0/MFC0 instructions
|
596 |
|
|
p1_set_cp0 <= '1' when p1_ir_reg(31 downto 21)="01000000100" else '0';
|
597 |
|
|
p1_get_cp0 <= '1' when p1_ir_reg(31 downto 21)="01000000000" else '0';
|
598 |
|
|
|
599 |
|
|
-- FIXME elaborate and explain this
|
600 |
|
|
|
601 |
|
|
p1_op_special <= '1' when p1_ir_op="000000" else '0';
|
602 |
|
|
|
603 |
|
|
p1_do_reg_jump <= '1' when p1_op_special='1' and p1_ir_fn(5 downto 1)="00100" else '0';
|
604 |
|
|
p1_do_zero_ext_imm <= '1' when (p1_ir_op(31 downto 28)="0011") else '0';
|
605 |
|
|
|
606 |
|
|
-- Decode input data mux control (LW, LH, LB, LBU, LHU) and load enable
|
607 |
30 |
ja_rd |
p1_do_load <= '1' when
|
608 |
|
|
p1_ir_op(31 downto 29)="100" and
|
609 |
|
|
p1_ir_op(28 downto 26)/="010" and -- LWL
|
610 |
|
|
p1_ir_op(28 downto 26)/="110" and -- LWR
|
611 |
|
|
p1_ir_op(28 downto 26)/="111" and -- LWR
|
612 |
|
|
p2_exception='0' -- abort load if previous instruction triggered trap
|
613 |
|
|
else '0';
|
614 |
2 |
ja_rd |
|
615 |
|
|
p1_load_alu_set0 <= '1'
|
616 |
|
|
when p1_op_special='1' and
|
617 |
|
|
((p1_ir_op(31 downto 29)="000" and p1_ir_op(27 downto 26)="00") or
|
618 |
|
|
(p1_ir_op(31 downto 29)="000" and p1_ir_op(27 downto 26)="10") or
|
619 |
|
|
(p1_ir_op(31 downto 29)="000" and p1_ir_op(27 downto 26)="11") or
|
620 |
|
|
(p1_ir_op(31 downto 29)="000" and p1_ir_op(27 downto 26)="00") or
|
621 |
|
|
(p1_ir_op(31 downto 28)="0100" and p1_ir_op(27 downto 26)="00") or
|
622 |
|
|
(p1_ir_op(31 downto 28)="0100" and p1_ir_op(27 downto 26)="10") or
|
623 |
|
|
(p1_ir_op(31 downto 28)="1000") or
|
624 |
|
|
(p1_ir_op(31 downto 28)="1001") or
|
625 |
|
|
(p1_ir_op(31 downto 28)="1010" and p1_ir_op(27 downto 26)="10") or
|
626 |
|
|
(p1_ir_op(31 downto 28)="1010" and p1_ir_op(27 downto 26)="11") or
|
627 |
|
|
(p1_ir_op(31 downto 28)="0010" and p1_ir_op(27 downto 26)="01"))
|
628 |
|
|
else '0';
|
629 |
|
|
|
630 |
|
|
with p1_ir_op select p1_load_alu_set1 <=
|
631 |
28 |
ja_rd |
'1' when "001000", -- addi
|
632 |
|
|
'1' when "001001", -- addiu
|
633 |
|
|
'1' when "001010", -- slti
|
634 |
|
|
'1' when "001011", -- sltiu
|
635 |
|
|
'1' when "001100", -- andi
|
636 |
|
|
'1' when "001101", -- ori
|
637 |
|
|
'1' when "001110", -- xori
|
638 |
|
|
'1' when "001111", -- lui
|
639 |
2 |
ja_rd |
'0' when others;
|
640 |
28 |
ja_rd |
p1_load_alu <= (p1_load_alu_set0 or p1_load_alu_set1) and
|
641 |
|
|
not p1_unknown_opcode;
|
642 |
2 |
ja_rd |
|
643 |
|
|
p1_ld_upper_hword <= p1_ir_op(27); -- use input upper hword vs. sign extend/zero
|
644 |
|
|
p1_ld_upper_byte <= p1_ir_op(26); -- use input upper byte vs. sign extend/zero
|
645 |
|
|
p1_ld_unsigned <= p1_ir_op(28); -- sign extend vs. zero extend
|
646 |
|
|
|
647 |
|
|
-- ALU input-2 selection: use external data for 2x opcodes (loads)
|
648 |
|
|
p1_alu_op2_sel_set0 <=
|
649 |
|
|
"11" when p1_ir_op(31 downto 30)="10" or p1_ir_op(29)='1' else
|
650 |
|
|
"00";
|
651 |
|
|
|
652 |
|
|
-- ALU input-2 selection: use registers Hi and Lo for MFHI, MFLO
|
653 |
22 |
ja_rd |
p1_alu_op2_sel_set1 <=
|
654 |
|
|
"01" when p1_op_special='1' and (p1_ir_fn="010000" or p1_ir_fn="010010")
|
655 |
|
|
else "00";
|
656 |
2 |
ja_rd |
|
657 |
|
|
-- ALU input-2 final selection
|
658 |
|
|
p1_alu_op2_sel <= p1_alu_op2_sel_set0 or p1_alu_op2_sel_set1;
|
659 |
|
|
|
660 |
|
|
-- Decode store operations
|
661 |
30 |
ja_rd |
p1_do_store <= '1' when
|
662 |
|
|
p1_ir_op(31 downto 29)="101" and
|
663 |
|
|
(p1_ir_op(28 downto 26)="000" or -- SB
|
664 |
|
|
p1_ir_op(28 downto 26)="001" or -- SH
|
665 |
|
|
p1_ir_op(28 downto 26)="011") and -- SWH
|
666 |
28 |
ja_rd |
p2_exception='0' -- abort when previous instruction triggered exception
|
667 |
|
|
else '0';
|
668 |
2 |
ja_rd |
p1_store_size <= p1_ir_op(27 downto 26);
|
669 |
|
|
|
670 |
|
|
|
671 |
28 |
ja_rd |
-- Extract source and destination C0 register indices
|
672 |
|
|
p1_c0_rs_num <= p1_ir_reg(15 downto 11);
|
673 |
2 |
ja_rd |
|
674 |
|
|
-- Decode ALU control dignals
|
675 |
|
|
|
676 |
|
|
p1_ac.use_slt <= '1' when (p1_ir_op="000001" and p1_ir_reg(20 downto 17)="01000") or
|
677 |
|
|
(p1_ir_op="000000" and p1_ir_reg(5 downto 1)="10101") or
|
678 |
|
|
p1_ir_op="001010" or p1_ir_op="001011"
|
679 |
|
|
else '0';
|
680 |
83 |
ja_rd |
p1_ac.arith_unsigned <= p1_ac.use_slt and (p1_ir_reg(0) or p1_ir_op(26));
|
681 |
2 |
ja_rd |
|
682 |
|
|
p1_ac.use_logic(0) <= '1' when (p1_op_special='1' and p1_ir_fn(5 downto 3)/="000") or
|
683 |
|
|
-- all immediate arith and logic
|
684 |
|
|
p1_ir_op(31 downto 29)="001"
|
685 |
|
|
else '0';
|
686 |
|
|
p1_ac.use_logic(1) <= '1' when (p1_op_special='1' and p1_ir_fn="100111") else '0';
|
687 |
|
|
|
688 |
|
|
p1_ac.use_arith <= '1' when p1_ir_op(31 downto 28)="0010" or
|
689 |
|
|
(p1_op_special='1' and
|
690 |
|
|
(p1_ir_fn(5 downto 2)="1000" or
|
691 |
|
|
p1_ir_fn(5 downto 2)="1010"))
|
692 |
|
|
else '0';
|
693 |
|
|
|
694 |
|
|
-- selection of 2nd internal alu operand: {i2, /i2, i2<<16, 0x0}
|
695 |
|
|
p1_ac.neg_sel(1)<= '1' when p1_ir_op(29 downto 26) = "1111" else '0';
|
696 |
|
|
p1_ac.neg_sel(0)<= '1' when p1_ir_op="001010" or
|
697 |
|
|
p1_ir_op="001011" or
|
698 |
|
|
p1_ir_op(31 downto 28)="0001" or
|
699 |
|
|
(p1_op_special='1' and
|
700 |
|
|
(p1_ir_fn="100010" or
|
701 |
|
|
p1_ir_fn="100011" or
|
702 |
|
|
p1_ir_fn(5 downto 2)="1010"))
|
703 |
|
|
else '0';
|
704 |
|
|
p1_ac.cy_in <= p1_ac.neg_sel(0);
|
705 |
|
|
|
706 |
|
|
p1_ac.shift_sel <= p1_ir_fn(1 downto 0);
|
707 |
|
|
|
708 |
|
|
p1_ac.logic_sel <= "00" when (p1_op_special='1' and p1_ir_fn="100100") else
|
709 |
|
|
"01" when (p1_op_special='1' and p1_ir_fn="100101") else
|
710 |
|
|
"10" when (p1_op_special='1' and p1_ir_fn="100110") else
|
711 |
|
|
"01" when (p1_op_special='1' and p1_ir_fn="100111") else
|
712 |
|
|
"00" when (p1_ir_op="001100") else
|
713 |
|
|
"01" when (p1_ir_op="001101") else
|
714 |
|
|
"10" when (p1_ir_op="001110") else
|
715 |
|
|
"11";
|
716 |
|
|
|
717 |
|
|
p1_ac.shift_amount <= p1_ir_reg(10 downto 6) when p1_ir_fn(2)='0' else p1_rs(4 downto 0);
|
718 |
|
|
|
719 |
23 |
ja_rd |
|
720 |
2 |
ja_rd |
--------------------------------------------------------------------------------
|
721 |
23 |
ja_rd |
-- Decoding of unimplemented and privileged instructions
|
722 |
2 |
ja_rd |
|
723 |
23 |
ja_rd |
-- Unimplemented instructions include:
|
724 |
|
|
-- 1.- All instructions above architecture MIPS-I except:
|
725 |
|
|
-- 1.1.- eret
|
726 |
|
|
-- 2.- Unaligned stores and loads (LWL,LWR,SWL,SWR)
|
727 |
|
|
-- 3.- All CP0 instructions other than mfc0 and mtc0
|
728 |
|
|
-- 4.- All CPi instructions
|
729 |
|
|
-- 5.- All cache instructions
|
730 |
|
|
-- For the time being, we'll decode them all together.
|
731 |
|
|
|
732 |
|
|
-- FIXME: some of these should trap but others should just NOP (e.g. EHB)
|
733 |
|
|
|
734 |
|
|
p1_unknown_opcode <= '1' when
|
735 |
|
|
-- decode by 'opcode' field
|
736 |
|
|
p1_ir_op(31 downto 29)="011" or
|
737 |
|
|
p1_ir_op(31 downto 29)="110" or
|
738 |
|
|
p1_ir_op(31 downto 29)="111" or
|
739 |
|
|
(p1_ir_op(31 downto 29)="010" and p1_ir_op(28 downto 26)/="000") or
|
740 |
|
|
p1_ir_op="101111" or -- CACHE
|
741 |
|
|
p1_ir_op="100010" or -- LWL
|
742 |
|
|
p1_ir_op="100110" or -- LWR
|
743 |
|
|
p1_ir_op="101010" or -- SWL
|
744 |
|
|
p1_ir_op="101110" or -- SWR
|
745 |
|
|
p1_ir_op="100111" or
|
746 |
|
|
p1_ir_op="101100" or
|
747 |
|
|
p1_ir_op="101101" or
|
748 |
|
|
-- decode instructions in the 'special' opcode group
|
749 |
|
|
(p1_ir_op="000000" and
|
750 |
|
|
(p1_ir_fn(5 downto 4)="11" or
|
751 |
|
|
p1_ir_fn="000001" or
|
752 |
|
|
p1_ir_fn="000101" or
|
753 |
|
|
p1_ir_fn="001010" or
|
754 |
|
|
p1_ir_fn="001011" or
|
755 |
|
|
p1_ir_fn="001110" or
|
756 |
|
|
p1_ir_fn(5 downto 2)="0101" or
|
757 |
|
|
p1_ir_fn(5 downto 2)="0111" or
|
758 |
|
|
p1_ir_fn(5 downto 2)="1011")) or
|
759 |
|
|
-- decode instructions in the 'regimm' opcode group
|
760 |
|
|
(p1_ir_op="000001" and
|
761 |
|
|
(p1_ir_reg(20 downto 16)/="00000" and -- BLTZ is valid
|
762 |
|
|
p1_ir_reg(20 downto 16)/="00001" and -- BGEZ is valid
|
763 |
|
|
p1_ir_reg(20 downto 16)/="10000" and -- BLTZAL is valid
|
764 |
|
|
p1_ir_reg(20 downto 16)/="10001")) -- BGEZAL is valid
|
765 |
|
|
|
766 |
|
|
else '0';
|
767 |
|
|
|
768 |
|
|
--------------------------------------------------------------------------------
|
769 |
|
|
|
770 |
2 |
ja_rd |
-- Stage 1 pipeline register. Involved in ALU control.
|
771 |
|
|
pipeline_stage1_register:
|
772 |
|
|
process(clk)
|
773 |
|
|
begin
|
774 |
|
|
if clk'event and clk='1' then
|
775 |
|
|
if reset='1' then
|
776 |
|
|
p1_rbank_rs_hazard <= '0';
|
777 |
|
|
p1_rbank_rt_hazard <= '0';
|
778 |
|
|
elsif stall_pipeline='0' then
|
779 |
|
|
p1_rbank_rs_hazard <= p0_rbank_rs_hazard;
|
780 |
|
|
p1_rbank_rt_hazard <= p0_rbank_rt_hazard;
|
781 |
|
|
end if;
|
782 |
|
|
end if;
|
783 |
|
|
end process pipeline_stage1_register;
|
784 |
|
|
|
785 |
12 |
ja_rd |
pipeline_stage1_register2:
|
786 |
|
|
process(clk)
|
787 |
|
|
begin
|
788 |
|
|
if clk'event and clk='1' then
|
789 |
|
|
if reset='1' then
|
790 |
|
|
p2_muldiv_started <= '0';
|
791 |
|
|
else
|
792 |
|
|
p2_muldiv_started <= p1_muldiv_running;
|
793 |
|
|
end if;
|
794 |
|
|
end if;
|
795 |
|
|
end process pipeline_stage1_register2;
|
796 |
6 |
ja_rd |
|
797 |
12 |
ja_rd |
|
798 |
6 |
ja_rd |
-- Stage 2 pipeline register. Split in two for convenience.
|
799 |
2 |
ja_rd |
-- This register deals with two kinds of stalls:
|
800 |
|
|
-- * When the pipeline stalls because of a load interlock, this register is
|
801 |
|
|
-- allowed to update so that the load operation can complete while the rest of
|
802 |
|
|
-- the pipeline is frozen.
|
803 |
|
|
-- * When the stall is caused by any other reason, this register freezes with
|
804 |
|
|
-- the rest of the machine.
|
805 |
6 |
ja_rd |
|
806 |
|
|
-- Part of stage 2 register that controls load operation
|
807 |
|
|
pipeline_stage2_register_load_control:
|
808 |
2 |
ja_rd |
process(clk)
|
809 |
|
|
begin
|
810 |
|
|
if clk'event and clk='1' then
|
811 |
6 |
ja_rd |
-- Clear load control, effectively preventing load, at reset or if
|
812 |
|
|
-- the previous instruction raised an exception.
|
813 |
|
|
if reset='1' or p2_exception='1' then
|
814 |
2 |
ja_rd |
p2_do_load <= '0';
|
815 |
|
|
p2_ld_upper_hword <= '0';
|
816 |
|
|
p2_ld_upper_byte <= '0';
|
817 |
|
|
p2_ld_unsigned <= '0';
|
818 |
|
|
p2_load_target <= "00000";
|
819 |
6 |
ja_rd |
|
820 |
|
|
-- Load signals from previous stage only if there is no pipeline stall
|
821 |
|
|
-- unless the stall is caused by interlock (@note1).
|
822 |
|
|
elsif (stall_pipeline='0' or load_interlock='1') then
|
823 |
|
|
-- Disable reg bank writeback if pipeline is stalled; this prevents
|
824 |
|
|
-- duplicate writes in case the stall is a mem_wait.
|
825 |
2 |
ja_rd |
if pipeline_stalled='0' then
|
826 |
|
|
p2_do_load <= p1_do_load;
|
827 |
|
|
else
|
828 |
|
|
p2_do_load <= '0';
|
829 |
|
|
end if;
|
830 |
|
|
p2_load_target <= p1_rd_num;
|
831 |
|
|
p2_ld_upper_hword <= p1_ld_upper_hword;
|
832 |
|
|
p2_ld_upper_byte <= p1_ld_upper_byte;
|
833 |
|
|
p2_ld_unsigned <= p1_ld_unsigned;
|
834 |
6 |
ja_rd |
end if;
|
835 |
|
|
end if;
|
836 |
|
|
end process pipeline_stage2_register_load_control;
|
837 |
|
|
|
838 |
|
|
-- All the rest of the stage 2 register
|
839 |
|
|
pipeline_stage2_register_others:
|
840 |
|
|
process(clk)
|
841 |
|
|
begin
|
842 |
|
|
if clk'event and clk='1' then
|
843 |
|
|
if reset='1' then
|
844 |
|
|
p2_exception <= '0';
|
845 |
|
|
|
846 |
|
|
-- Load signals from previous stage only if there is no pipeline stall
|
847 |
|
|
-- unless the stall is caused by interlock (@note1).
|
848 |
|
|
elsif (stall_pipeline='0' or load_interlock='1') then
|
849 |
2 |
ja_rd |
p2_rd_addr <= p1_data_addr(1 downto 0);
|
850 |
6 |
ja_rd |
p2_exception <= p1_exception;
|
851 |
2 |
ja_rd |
end if;
|
852 |
|
|
end if;
|
853 |
6 |
ja_rd |
end process pipeline_stage2_register_others;
|
854 |
2 |
ja_rd |
|
855 |
|
|
--------------------------------------------------------------------------------
|
856 |
|
|
|
857 |
|
|
-- FIXME stall when needed: mem pause, mdiv pause and load interlock
|
858 |
|
|
|
859 |
46 |
ja_rd |
|
860 |
|
|
-- FIXME make sure this combinational will not have bad glitches
|
861 |
|
|
stall_pipeline <= mem_wait or load_interlock or p1_muldiv_stall;
|
862 |
|
|
|
863 |
|
|
|
864 |
|
|
-- FIXME load interlock should happen only if the instruction following
|
865 |
|
|
-- the load actually uses the load target register. Something like this:
|
866 |
|
|
-- (p1_do_load='1' and (p1_rd_num=p0_rs_num or p1_rd_num=p0_rt_num))
|
867 |
|
|
load_interlock <= '1' when
|
868 |
|
|
p1_do_load='1' and -- this is a load instruction
|
869 |
|
|
pipeline_stalled='0' and -- not already stalled (i.e. assert for 1 cycle)
|
870 |
|
|
(p1_rs1_hazard='1' or p1_rs2_hazard='1')
|
871 |
|
|
else '0';
|
872 |
|
|
|
873 |
|
|
|
874 |
|
|
|
875 |
|
|
|
876 |
|
|
pipeline_stalled <= stalled_interlock or stalled_memwait or stalled_muldiv;
|
877 |
|
|
|
878 |
2 |
ja_rd |
pipeline_stall_registers:
|
879 |
|
|
process(clk)
|
880 |
|
|
begin
|
881 |
|
|
if clk'event and clk='1' then
|
882 |
|
|
if reset='1' then
|
883 |
46 |
ja_rd |
stalled_interlock <= '0';
|
884 |
|
|
stalled_memwait <= '0';
|
885 |
|
|
stalled_muldiv <= '0';
|
886 |
2 |
ja_rd |
else
|
887 |
46 |
ja_rd |
if mem_wait='1' then
|
888 |
|
|
stalled_memwait <= '1';
|
889 |
2 |
ja_rd |
else
|
890 |
46 |
ja_rd |
stalled_memwait <= '0';
|
891 |
2 |
ja_rd |
end if;
|
892 |
35 |
ja_rd |
|
893 |
46 |
ja_rd |
if p1_muldiv_stall='1' then
|
894 |
|
|
stalled_muldiv <= '1';
|
895 |
|
|
else
|
896 |
|
|
stalled_muldiv <= '0';
|
897 |
|
|
end if;
|
898 |
|
|
|
899 |
35 |
ja_rd |
-- stalls caused by mem_wait and load_interlock are independent and
|
900 |
|
|
-- must not overlap; so when mem_wait='1' the cache stall takes
|
901 |
|
|
-- precedence and the loa interlock must wait.
|
902 |
|
|
if mem_wait='0' then
|
903 |
|
|
if load_interlock='1' then
|
904 |
46 |
ja_rd |
stalled_interlock <= '1';
|
905 |
35 |
ja_rd |
else
|
906 |
46 |
ja_rd |
stalled_interlock <= '0';
|
907 |
35 |
ja_rd |
end if;
|
908 |
2 |
ja_rd |
end if;
|
909 |
|
|
end if;
|
910 |
|
|
end if;
|
911 |
|
|
end process pipeline_stall_registers;
|
912 |
|
|
|
913 |
28 |
ja_rd |
p1_rs1_hazard <= '1'; --'1' when p0_uses_rs1='1' and p1_rd_num=p0_rs_num else '0';
|
914 |
|
|
p1_rs2_hazard <= '1'; --'1' when p0_uses_rs2='1' and p1_rd_num=p0_rt_num else '0';
|
915 |
|
|
|
916 |
|
|
with p1_ir_op select p0_uses_rs1 <=
|
917 |
|
|
'0' when "000010",
|
918 |
|
|
'0' when "000011",
|
919 |
|
|
'0' when "001111",
|
920 |
|
|
'0' when "001000",
|
921 |
|
|
'1' when others;
|
922 |
|
|
|
923 |
|
|
with p1_ir_op select p0_uses_rs2 <=
|
924 |
|
|
'1' when "000000",
|
925 |
|
|
'1' when "000100",
|
926 |
|
|
'1' when "000101",
|
927 |
|
|
'1' when "000110",
|
928 |
|
|
'1' when "000111",
|
929 |
|
|
'1' when "010000",
|
930 |
|
|
'1' when "101000",
|
931 |
|
|
'1' when "101001",
|
932 |
|
|
'1' when "101010",
|
933 |
|
|
'1' when "101011",
|
934 |
|
|
'1' when "101110",
|
935 |
|
|
'0' when others;
|
936 |
|
|
|
937 |
|
|
|
938 |
2 |
ja_rd |
--------------------------------------------------------------------------------
|
939 |
|
|
|
940 |
|
|
p1_data_offset(31 downto 16) <= (others => p1_data_imm(15));
|
941 |
|
|
p1_data_offset(15 downto 0) <= p1_data_imm(15 downto 0);
|
942 |
|
|
|
943 |
|
|
p1_data_addr <= p1_rs + p1_data_offset;
|
944 |
|
|
|
945 |
|
|
--------------------------------------------------------------------------------
|
946 |
|
|
|
947 |
|
|
-- byte_we is a function of the write size and alignment
|
948 |
|
|
-- size = {00=1,01=2,11=4}; we 3 is MSB, 0 is LSB; big endian => 00 is msb
|
949 |
|
|
p1_we_control <= pipeline_stalled & p1_do_store & p1_store_size & p1_data_addr(1 downto 0);
|
950 |
|
|
|
951 |
|
|
with p1_we_control select byte_we <=
|
952 |
|
|
"1000" when "010000", -- SB %0
|
953 |
|
|
"0100" when "010001", -- SB %1
|
954 |
|
|
"0010" when "010010", -- SB %2
|
955 |
|
|
"0001" when "010011", -- SB %3
|
956 |
|
|
"1100" when "010100", -- SH %0
|
957 |
|
|
"0011" when "010110", -- SH %2
|
958 |
|
|
"1111" when "011100", -- SW %4
|
959 |
|
|
"0000" when others; -- all other combinations are spurious so don't write
|
960 |
|
|
|
961 |
|
|
-- Data to be stored always comes straight from the reg bank, but it needs to
|
962 |
|
|
-- be shifted so that the LSB is aligned to the write address:
|
963 |
|
|
|
964 |
|
|
data_wr(7 downto 0) <= p1_rt(7 downto 0);
|
965 |
|
|
|
966 |
|
|
with p1_we_control select data_wr(15 downto 8) <=
|
967 |
|
|
p1_rt( 7 downto 0) when "010010", -- SB %2
|
968 |
|
|
p1_rt(15 downto 8) when others;
|
969 |
|
|
|
970 |
|
|
with p1_we_control select data_wr(23 downto 16) <=
|
971 |
|
|
p1_rt( 7 downto 0) when "010001", -- SB %1
|
972 |
|
|
p1_rt( 7 downto 0) when "010100", -- SH %0
|
973 |
|
|
p1_rt(23 downto 16) when others;
|
974 |
|
|
|
975 |
|
|
with p1_we_control select data_wr(31 downto 24) <=
|
976 |
|
|
p1_rt( 7 downto 0) when "010000", -- SB %0
|
977 |
|
|
p1_rt(15 downto 8) when "010100", -- SH %0
|
978 |
|
|
p1_rt(31 downto 24) when others;
|
979 |
|
|
|
980 |
|
|
|
981 |
|
|
--##############################################################################
|
982 |
|
|
-- CP0 (what little is implemented of it)
|
983 |
|
|
|
984 |
|
|
process(clk)
|
985 |
|
|
begin
|
986 |
|
|
if clk'event and clk='1' then
|
987 |
|
|
if reset='1' then
|
988 |
|
|
-- "10" => mode=kernel; ints=disabled
|
989 |
|
|
cp0_status <= "10";
|
990 |
101 |
ja_rd |
cp0_cache_control <= "00";
|
991 |
28 |
ja_rd |
cp0_cause_exc_code <= "00000";
|
992 |
|
|
cp0_cause_bd <= '0';
|
993 |
2 |
ja_rd |
else
|
994 |
|
|
-- no need to check for stall cycles when loading these
|
995 |
|
|
if p1_set_cp0='1' then
|
996 |
|
|
-- FIXME check for CP0 reg index
|
997 |
|
|
cp0_status <= p1_rs(cp0_status'high downto 0);
|
998 |
101 |
ja_rd |
cp0_cache_control <= p1_rs(17 downto 16);
|
999 |
2 |
ja_rd |
end if;
|
1000 |
62 |
ja_rd |
if p1_exception='1' and pipeline_stalled='0' then
|
1001 |
8 |
ja_rd |
cp0_epc <= p0_pc_restart;
|
1002 |
28 |
ja_rd |
|
1003 |
|
|
if p1_unknown_opcode='1' then
|
1004 |
|
|
cp0_cause_exc_code <= "01010"; -- bad opcode
|
1005 |
|
|
else
|
1006 |
|
|
if p1_ir_fn(0)='0' then
|
1007 |
|
|
cp0_cause_exc_code <= "01000"; -- syscall
|
1008 |
|
|
else
|
1009 |
|
|
cp0_cause_exc_code <= "01001"; -- break
|
1010 |
|
|
end if;
|
1011 |
|
|
end if;
|
1012 |
|
|
|
1013 |
|
|
cp0_cause_bd <= cp0_in_delay_slot;
|
1014 |
2 |
ja_rd |
end if;
|
1015 |
|
|
end if;
|
1016 |
|
|
end if;
|
1017 |
|
|
end process;
|
1018 |
|
|
|
1019 |
101 |
ja_rd |
cache_enable <= cp0_cache_control(17);
|
1020 |
|
|
ic_invalidate <= cp0_cache_control(16);
|
1021 |
|
|
|
1022 |
28 |
ja_rd |
cp0_cause_ce <= "00"; -- FIXME CP* traps merged with unimplemented opcode traps
|
1023 |
|
|
cp0_cause <= cp0_cause_bd & '0' & cp0_cause_ce &
|
1024 |
|
|
X"00000" & "000" &
|
1025 |
|
|
cp0_cause_exc_code;
|
1026 |
|
|
|
1027 |
2 |
ja_rd |
-- FIXME the mux should mask to zero for any unused reg index
|
1028 |
28 |
ja_rd |
with p1_c0_rs_num select cp0_reg_read <=
|
1029 |
|
|
X"0000000" & "00" & cp0_status when "01100",
|
1030 |
|
|
cp0_cause when "01101",
|
1031 |
|
|
cp0_epc & "00" when others;
|
1032 |
2 |
ja_rd |
|
1033 |
28 |
ja_rd |
|
1034 |
2 |
ja_rd |
end architecture rtl;
|
1035 |
|
|
|
1036 |
|
|
--------------------------------------------------------------------------------
|
1037 |
|
|
-- Implementation notes
|
1038 |
|
|
--------------------------------------------------------------------------------
|
1039 |
|
|
-- @note1 :
|
1040 |
|
|
--
|
1041 |
|
|
-- This is the meaning of these two signals:
|
1042 |
46 |
ja_rd |
-- pipeline_stalled & stalled_interlock =>
|
1043 |
2 |
ja_rd |
-- "00" => normal state
|
1044 |
|
|
-- "01" => normal state (makes for easier decoding)
|
1045 |
|
|
-- "10" => all stages of pipeline stalled, including rbank
|
1046 |
|
|
-- "11" => all stages of pipeline stalled, except reg bank write port
|
1047 |
|
|
--
|
1048 |
|
|
-- Just to clarify, 'stage X stalled' here means that the registers named
|
1049 |
|
|
-- pX_* don't load.
|
1050 |
|
|
--
|
1051 |
|
|
-- The register bank WE is enabled when the pipeline is not stalled and when
|
1052 |
|
|
-- it is stalled because of a load interlock; so that in case of interlock the
|
1053 |
|
|
-- load operation can complete while the rest of the pipeline is frozen.
|
1054 |
|
|
--------------------------------------------------------------------------------
|