1 |
2 |
idiolatrie |
--------------------------------------------------------------------------------
|
2 |
|
|
-- MIPS™ I CPU --
|
3 |
|
|
--------------------------------------------------------------------------------
|
4 |
|
|
-- --
|
5 |
|
|
-- POSSIBLE FAULTS --
|
6 |
|
|
-- --
|
7 |
|
|
-- o The upper 4bits of a branch/jump instruction depend on the PC of the --
|
8 |
|
|
-- branch delay slot. This special case has not been tested: --
|
9 |
|
|
-- --
|
10 |
|
|
-- PC INSTRUCTION --
|
11 |
|
|
-- +------------+-----------------+ --
|
12 |
|
|
-- | 0x0fffffff | some branch OP | --
|
13 |
|
|
-- | 0x10000000 | ADD $s1, $s1, 1 | --
|
14 |
|
|
-- +------------+-----------------+ --
|
15 |
|
|
-- --
|
16 |
|
|
-- Whenever the upper 4 PC bits of the jump instruction differs from the --
|
17 |
|
|
-- delay slot instruction address, there might be a chance of incorrect --
|
18 |
|
|
-- behavoir. --
|
19 |
|
|
-- --
|
20 |
|
|
-- o Interrupts are still experimental. --
|
21 |
|
|
-- --
|
22 |
|
|
--------------------------------------------------------------------------------
|
23 |
|
|
-- Copyright (C)2011 Mathias Hörtnagl <mathias.hoertnagl@gmail.comt> --
|
24 |
|
|
-- --
|
25 |
|
|
-- This program is free software: you can redistribute it and/or modify --
|
26 |
|
|
-- it under the terms of the GNU General Public License as published by --
|
27 |
|
|
-- the Free Software Foundation, either version 3 of the License, or --
|
28 |
|
|
-- (at your option) any later version. --
|
29 |
|
|
-- --
|
30 |
|
|
-- This program is distributed in the hope that it will be useful, --
|
31 |
|
|
-- but WITHOUT ANY WARRANTY; without even the implied warranty of --
|
32 |
|
|
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the --
|
33 |
|
|
-- GNU General Public License for more details. --
|
34 |
|
|
-- --
|
35 |
|
|
-- You should have received a copy of the GNU General Public License --
|
36 |
|
|
-- along with this program. If not, see <http://www.gnu.org/licenses/>. --
|
37 |
|
|
--------------------------------------------------------------------------------
|
38 |
|
|
library ieee;
|
39 |
|
|
use ieee.std_logic_1164.all;
|
40 |
|
|
use ieee.numeric_std.all;
|
41 |
|
|
|
42 |
|
|
library work;
|
43 |
|
|
use work.mips1.all;
|
44 |
|
|
use work.tcpu.all;
|
45 |
|
|
use work.icpu.all;
|
46 |
|
|
use work.fcpu.all;
|
47 |
|
|
|
48 |
|
|
|
49 |
|
|
entity cpu is
|
50 |
|
|
port(
|
51 |
|
|
ci : in cpu_in_t;
|
52 |
|
|
co : out cpu_out_t
|
53 |
|
|
);
|
54 |
|
|
end cpu;
|
55 |
|
|
|
56 |
|
|
architecture rtl of cpu is
|
57 |
|
|
|
58 |
|
|
signal f, fin : fe_t; -- FE: [FETCH STAGE]
|
59 |
|
|
signal d, din : de_t; -- DE: [DECODE STAGE]
|
60 |
|
|
signal e, ein : ex_t; -- EX: [EXECUTION STAGE]
|
61 |
|
|
signal m, min : me_t; -- ME: [MEMORY STAGE]
|
62 |
|
|
signal cp, cpin : cp0_t; -- Coprocessor 0 registers
|
63 |
|
|
|
64 |
|
|
-- Interrupt related signals.
|
65 |
|
|
signal intr_bd : boolean; -- Branch delay flag.
|
66 |
|
|
signal intr_bdd : boolean; -- Branch delay delay flag.
|
67 |
|
|
signal intr_im : std_logic_vector(7 downto 0); -- Interrupt mask.
|
68 |
|
|
signal intr_iec : std_logic; -- IEc: Interrupt enable current.
|
69 |
|
|
signal intr_iep : std_logic; -- IEp: Interrupt enable previous.
|
70 |
|
|
signal intr_ieo : std_logic; -- IEo: Interrupt enable old.
|
71 |
|
|
|
72 |
|
|
-- Aliases for the instruction's GPR RS and RT addresses.
|
73 |
|
|
alias rs_a : std_logic_vector(4 downto 0) is ci.ins(25 downto 21);
|
74 |
|
|
alias rt_a : std_logic_vector(4 downto 0) is ci.ins(20 downto 16);
|
75 |
|
|
|
76 |
|
|
signal rs_o, rt_o : std_logic_vector(31 downto 0); -- GPR output data.
|
77 |
|
|
begin
|
78 |
|
|
|
79 |
|
|
-----------------------------------------------------------------------------
|
80 |
|
|
-- FETCH STAGE --
|
81 |
|
|
-----------------------------------------------------------------------------
|
82 |
|
|
fe : process(f.pc, ci.irq, e.f, d.cc, cp.epc, cp.sr, intr_bd, intr_im,
|
83 |
|
|
intr_iec, intr_iep, intr_ieo)
|
84 |
|
|
|
85 |
|
|
--------------------------------------------------------------------------
|
86 |
|
|
-- EPC Address --
|
87 |
|
|
--------------------------------------------------------------------------
|
88 |
|
|
-- SETTING: Interrupt handler routine address.
|
89 |
|
|
constant INTR_ADR : unsigned(31 downto 0) := x"200000c0";
|
90 |
|
|
|
91 |
|
|
variable v : fe_t;
|
92 |
|
|
variable c : cp0_t;
|
93 |
|
|
|
94 |
|
|
variable intr : boolean;
|
95 |
|
|
begin
|
96 |
|
|
v := f;
|
97 |
|
|
c := cp;
|
98 |
|
|
|
99 |
|
|
-- Address of the next instruction to be fetched.
|
100 |
|
|
co.iadr <= std_logic_vector(f.pc) & "00";
|
101 |
|
|
|
102 |
|
|
-- Program Counter
|
103 |
|
|
-- if e.f.jmp = '1' then
|
104 |
|
|
-- v.pc := e.f.j;
|
105 |
|
|
-- else if d.f.jmp = '1' then
|
106 |
|
|
-- v.pc := d.f.j;
|
107 |
|
|
-- else
|
108 |
|
|
-- v.pc := f.pc + 1;
|
109 |
|
|
-- end if;
|
110 |
|
|
|
111 |
|
|
-- Program Counter
|
112 |
|
|
if e.f.jmp = '1' then v.pc := e.f.j; else v.pc := f.pc + 1; end if;
|
113 |
|
|
|
114 |
|
|
--------------------------------------------------------------------------
|
115 |
|
|
-- INTR Interrupt --
|
116 |
|
|
--------------------------------------------------------------------------
|
117 |
|
|
-- The interrupt mechanism consists of several tasks: --
|
118 |
|
|
-- o Push a '0' onto the IE stack to avoid further interrupt triggers. --
|
119 |
|
|
-- o Set the EPC to the instruction following the current instruction. --
|
120 |
|
|
-- o Jump to EPC_ADR, which is the hardcoded address of the interrupt --
|
121 |
|
|
-- dispatch routine. --
|
122 |
|
|
--------------------------------------------------------------------------
|
123 |
|
|
|
124 |
|
|
-- Restore from exception.
|
125 |
|
|
-- Do before interrupt handling, or else we might push the IE stack twice.
|
126 |
|
|
if d.cc.rfe then c := pop_ie(c); end if;
|
127 |
|
|
|
128 |
|
|
-- Set SR of CP0. [DE]
|
129 |
|
|
-- Set (Disable) SR before interrupt handling.
|
130 |
|
|
if d.cc.mtsr then
|
131 |
|
|
c.sr.im := intr_im;
|
132 |
|
|
c.sr.iec := intr_iec;
|
133 |
|
|
c.sr.iep := intr_iep;
|
134 |
|
|
c.sr.ieo := intr_ieo;
|
135 |
|
|
end if;
|
136 |
|
|
|
137 |
|
|
-- Delay interrupt if we are in one of the to delay slots.
|
138 |
|
|
-- Push the IE stack, Save return address and jump to the interrupt
|
139 |
|
|
-- handler.
|
140 |
|
|
intr := ( (cp.sr.im and ci.irq) /= x"00" ) and (cp.sr.iec = '1');
|
141 |
|
|
|
142 |
|
|
if (not intr_bd) and (not intr_bdd) and intr then
|
143 |
|
|
c := push_ie(c);
|
144 |
|
|
-- Save PC weather it is from a jump or just incremented.
|
145 |
|
|
c.epc := v.pc; -- e.f.j or f.pc + 1.
|
146 |
|
|
v.pc := INTR_ADR(31 downto 2);
|
147 |
|
|
end if;
|
148 |
|
|
|
149 |
|
|
fin <= v;
|
150 |
|
|
cpin <= c;
|
151 |
|
|
end process;
|
152 |
|
|
|
153 |
|
|
|
154 |
|
|
|
155 |
|
|
-----------------------------------------------------------------------------
|
156 |
|
|
-- DECODE STAGE --
|
157 |
|
|
-----------------------------------------------------------------------------
|
158 |
|
|
de : process(ci.ins, d, m, m.wc, d.cc, d.dc, d.ec, d.ec.alu, d.ec.alu.src,
|
159 |
|
|
d.ec.jmp, d.mc, d.mc.mem, d.wc)
|
160 |
|
|
|
161 |
|
|
variable v : de_t;
|
162 |
|
|
|
163 |
|
|
alias rgcp0 : std_logic_vector(4 downto 0) is ci.ins(15 downto 11);
|
164 |
|
|
begin
|
165 |
|
|
v := d;
|
166 |
|
|
|
167 |
|
|
-- synthesis translate_off
|
168 |
|
|
co.op <= op(ci.ins);
|
169 |
|
|
co.alu <= aluop(ci.ins);
|
170 |
|
|
co.rimm <= rimmop(ci.ins);
|
171 |
|
|
co.cp0op <= cp0op(ci.ins);
|
172 |
|
|
co.cp0reg <= cp0reg(ci.ins(4 downto 0));
|
173 |
|
|
-- synthesis translate_on
|
174 |
|
|
|
175 |
|
|
--------------------------------------------------------------------------
|
176 |
|
|
-- Decode --
|
177 |
|
|
--------------------------------------------------------------------------
|
178 |
|
|
-- Default values emulate a NOP [SLL $0, $0, 0] operation.
|
179 |
|
|
|
180 |
|
|
v.cc.mtsr := false; -- Move To Status Register. [INTR]
|
181 |
|
|
v.cc.rfe := false; -- Restore from Exception. [INTR]
|
182 |
|
|
--v.f.jmp := '0';
|
183 |
|
|
--v.f.j := (others => '-');
|
184 |
|
|
v.ec.wbr := RD; -- Write back register.
|
185 |
|
|
v.ec.alu.op := SLL0; -- ALU operation. [ALU]
|
186 |
|
|
v.ec.alu.src.a := REG; -- Source for ALU input A. [ALU Source Choice]
|
187 |
|
|
v.ec.alu.src.b := REG; -- Source for ALU input B. [ALU Source Choice]
|
188 |
|
|
v.ec.jmp.src := REG; -- Jump source. [Branch/Jump]
|
189 |
|
|
v.ec.jmp.op := NOP; -- Jump type. [Branch/Jump]
|
190 |
|
|
v.mc.mem.we := '0'; -- Memory write enable.
|
191 |
|
|
v.mc.mem.ext := ZERO; -- Memory data extension. [Data Extension]
|
192 |
|
|
v.mc.mem.byt := NONE; -- Number of data bytes. [MEMORY STAGE]
|
193 |
|
|
v.mc.src := ALU; -- ALU or MEM to GPR. [MEMORY STAGE]
|
194 |
|
|
v.wc.we := '0'; -- GPR write enable.
|
195 |
|
|
|
196 |
|
|
intr_bd <= false; -- Marks a branch delay slot. [INTR]
|
197 |
|
|
|
198 |
|
|
case op(ci.ins) is
|
199 |
|
|
when AD =>
|
200 |
|
|
case aluop(ci.ins) is
|
201 |
|
|
when JALR =>
|
202 |
|
|
v := link(v);
|
203 |
|
|
v.ec.jmp.op := JMP;
|
204 |
|
|
intr_bd <= true;
|
205 |
|
|
when JR =>
|
206 |
|
|
v.ec.jmp.op := JMP;
|
207 |
|
|
intr_bd <= true;
|
208 |
|
|
when SLL0 | SRA0 | SRL0 =>
|
209 |
|
|
v.ec.alu.op := aluop(ci.ins);
|
210 |
|
|
v.ec.alu.src.a := SH_CONST;
|
211 |
|
|
v.wc.we := '1';
|
212 |
|
|
when others =>
|
213 |
|
|
v.ec.alu.op := aluop(ci.ins);
|
214 |
|
|
v.wc.we := '1';
|
215 |
|
|
end case;
|
216 |
|
|
|
217 |
|
|
-----------------------------------------------------------------------
|
218 |
|
|
-- Immediate Branches --
|
219 |
|
|
-----------------------------------------------------------------------
|
220 |
|
|
when RI =>
|
221 |
|
|
case rimmop(ci.ins) is
|
222 |
|
|
when BGEZ =>
|
223 |
|
|
v.ec.jmp.src := BRA;
|
224 |
|
|
v.ec.jmp.op := GEZ;
|
225 |
|
|
intr_bd <= true;
|
226 |
|
|
when BGEZAL =>
|
227 |
|
|
v := link(v);
|
228 |
|
|
v.ec.wbr := RA;
|
229 |
|
|
v.ec.jmp.src := BRA;
|
230 |
|
|
v.ec.jmp.op := GEZ;
|
231 |
|
|
intr_bd <= true;
|
232 |
|
|
when BLTZ =>
|
233 |
|
|
v.ec.jmp.src := BRA;
|
234 |
|
|
v.ec.jmp.op := LTZ;
|
235 |
|
|
intr_bd <= true;
|
236 |
|
|
when BLTZAL =>
|
237 |
|
|
v := link(v);
|
238 |
|
|
v.ec.wbr := RA;
|
239 |
|
|
v.ec.jmp.src := BRA;
|
240 |
|
|
v.ec.jmp.op := LTZ;
|
241 |
|
|
intr_bd <= true;
|
242 |
|
|
when ERR =>
|
243 |
|
|
end case;
|
244 |
|
|
|
245 |
|
|
-----------------------------------------------------------------------
|
246 |
|
|
-- Normal Jumps/Branches --
|
247 |
|
|
-----------------------------------------------------------------------
|
248 |
|
|
when J =>
|
249 |
|
|
-- v.f.jmp := '1';
|
250 |
|
|
-- v.f.j := f.pc(31 downto 28) & unsigned(ci.ins);
|
251 |
|
|
v.ec.jmp.src := JMP;
|
252 |
|
|
v.ec.jmp.op := JMP;
|
253 |
|
|
intr_bd <= true;
|
254 |
|
|
when JAL =>
|
255 |
|
|
v := link(v);
|
256 |
|
|
v.ec.wbr := RA;
|
257 |
|
|
-- v.f.jmp := '1';
|
258 |
|
|
-- v.f.j := f.pc(31 downto 28) & unsigned(ci.ins);
|
259 |
|
|
v.ec.jmp.src := JMP;
|
260 |
|
|
v.ec.jmp.op := JMP;
|
261 |
|
|
intr_bd <= true;
|
262 |
|
|
when BEQ =>
|
263 |
|
|
v.ec.jmp.src := BRA;
|
264 |
|
|
v.ec.jmp.op := EQ;
|
265 |
|
|
intr_bd <= true;
|
266 |
|
|
when BNE =>
|
267 |
|
|
v.ec.jmp.src := BRA;
|
268 |
|
|
v.ec.jmp.op := NEQ;
|
269 |
|
|
intr_bd <= true;
|
270 |
|
|
when BLEZ =>
|
271 |
|
|
v.ec.jmp.src := BRA;
|
272 |
|
|
v.ec.jmp.op := LEZ;
|
273 |
|
|
intr_bd <= true;
|
274 |
|
|
when BGTZ =>
|
275 |
|
|
v.ec.jmp.src := BRA;
|
276 |
|
|
v.ec.jmp.op := GTZ;
|
277 |
|
|
intr_bd <= true;
|
278 |
|
|
|
279 |
|
|
-----------------------------------------------------------------------
|
280 |
|
|
-- Immediate Operations --
|
281 |
|
|
-----------------------------------------------------------------------
|
282 |
|
|
when ADDI =>
|
283 |
|
|
v := simm(v);
|
284 |
|
|
v.ec.alu.op := ADD;
|
285 |
|
|
when ADDIU =>
|
286 |
|
|
v := simm(v);
|
287 |
|
|
v.ec.alu.op := ADDU;
|
288 |
|
|
when SLTI =>
|
289 |
|
|
v := simm(v);
|
290 |
|
|
v.ec.alu.op := SLT;
|
291 |
|
|
when SLTIU =>
|
292 |
|
|
v := simm(v);
|
293 |
|
|
v.ec.alu.op := SLTU;
|
294 |
|
|
when ANDI =>
|
295 |
|
|
v := zimm(v);
|
296 |
|
|
v.ec.alu.op := AND0;
|
297 |
|
|
when ORI =>
|
298 |
|
|
v := zimm(v);
|
299 |
|
|
v.ec.alu.op := OR0;
|
300 |
|
|
when XORI =>
|
301 |
|
|
v := zimm(v);
|
302 |
|
|
v.ec.alu.op := XOR0;
|
303 |
|
|
when LUI =>
|
304 |
|
|
v := zimm(v);
|
305 |
|
|
v.ec.alu.src.a := SH_16;
|
306 |
|
|
|
307 |
|
|
-----------------------------------------------------------------------
|
308 |
|
|
-- Load And Store Data --
|
309 |
|
|
-----------------------------------------------------------------------
|
310 |
|
|
when LB =>
|
311 |
|
|
v := load(v);
|
312 |
|
|
v.mc.mem.ext := SIGN;
|
313 |
|
|
v.mc.mem.byt := BYTE;
|
314 |
|
|
when LH =>
|
315 |
|
|
v := load(v);
|
316 |
|
|
v.mc.mem.ext := SIGN;
|
317 |
|
|
v.mc.mem.byt := HALF;
|
318 |
|
|
when LW =>
|
319 |
|
|
v := load(v);
|
320 |
|
|
v.mc.mem.byt := WORD;
|
321 |
|
|
when LBU =>
|
322 |
|
|
v := load(v);
|
323 |
|
|
v.mc.mem.byt := BYTE;
|
324 |
|
|
when LHU =>
|
325 |
|
|
v := load(v);
|
326 |
|
|
v.mc.mem.byt := HALF;
|
327 |
|
|
when SB =>
|
328 |
|
|
v := store(v);
|
329 |
|
|
v.mc.mem.byt := BYTE;
|
330 |
|
|
when SH =>
|
331 |
|
|
v := store(v);
|
332 |
|
|
v.mc.mem.byt := HALF;
|
333 |
|
|
when SW =>
|
334 |
|
|
v := store(v);
|
335 |
|
|
v.mc.mem.byt := WORD;
|
336 |
|
|
|
337 |
|
|
-----------------------------------------------------------------------
|
338 |
|
|
-- Co-Processor 0 --
|
339 |
|
|
-----------------------------------------------------------------------
|
340 |
|
|
when CP0 =>
|
341 |
|
|
case cp0op(ci.ins) is
|
342 |
|
|
when MFCP0 =>
|
343 |
|
|
v.ec.wbr := RT;
|
344 |
|
|
v.ec.alu.op := MFCP0;
|
345 |
|
|
v.wc.we := '1';
|
346 |
|
|
when MTCP0 =>
|
347 |
|
|
v.ec.alu.op := MTCP0;
|
348 |
|
|
if cp0reg(rgcp0) = SR then v.cc.mtsr := true; end if;
|
349 |
|
|
when RFE =>
|
350 |
|
|
v.ec.alu.op := RFE;
|
351 |
|
|
v.cc.rfe := true;
|
352 |
|
|
when ERR =>
|
353 |
|
|
end case;
|
354 |
|
|
when ERR =>
|
355 |
|
|
end case;
|
356 |
|
|
|
357 |
|
|
v.i := ci.ins(25 downto 0);
|
358 |
|
|
v.dc.we := m.wc.we; -- Forward write enable.
|
359 |
|
|
v.rd := m.rd; -- Forward destination register.
|
360 |
|
|
v.res := m.res; -- Forward data.
|
361 |
|
|
din <= v;
|
362 |
|
|
end process;
|
363 |
|
|
|
364 |
|
|
-----------------------------------------------------------------------------
|
365 |
|
|
-- GPR General Purpose Registers --
|
366 |
|
|
-----------------------------------------------------------------------------
|
367 |
|
|
gp : gpr port map(
|
368 |
|
|
clk_i => ci.clk, -- Clock.
|
369 |
|
|
hld_i => ci.hld, -- Hold register data.
|
370 |
|
|
rs_a => rs_a, -- RS register address.
|
371 |
|
|
rt_a => rt_a, -- RT register address.
|
372 |
|
|
rd_a => m.rd, -- Write back register address.
|
373 |
|
|
rd_we => m.wc.we, -- Write back enable.
|
374 |
|
|
rd_i => m.res, -- Write back register data.
|
375 |
|
|
rs_o => rs_o, -- RS register data.
|
376 |
|
|
rt_o => rt_o -- RT register data.
|
377 |
|
|
);
|
378 |
|
|
|
379 |
|
|
|
380 |
|
|
|
381 |
|
|
-----------------------------------------------------------------------------
|
382 |
|
|
-- EXECUTION STAGE --
|
383 |
|
|
-----------------------------------------------------------------------------
|
384 |
|
|
ex : process(ci.irq, rs_o, rt_o, cp, cp.sr, f, d, d.dc, d.ec, d.ec.alu,
|
385 |
|
|
d.ec.alu.src, d.ec.jmp, d.mc, d.mc.mem, d.wc, e, e.f, e.mc,
|
386 |
|
|
e.mc.mem, e.wc, m, m.wc)
|
387 |
|
|
|
388 |
|
|
variable v : ex_t;
|
389 |
|
|
variable a, b : std_logic_vector(31 downto 0); -- ALU input.
|
390 |
|
|
variable fa, fb : std_logic_vector(31 downto 0); -- Forwarded data.
|
391 |
|
|
variable equ, eqz : std_logic; -- fa=fb, fa=0
|
392 |
|
|
variable atmp : std_logic_vector(31 downto 0); -- Temporary result.
|
393 |
|
|
|
394 |
|
|
--------------------------------------------------------------------------
|
395 |
|
|
-- R-Type Register --
|
396 |
|
|
-- +--------------------------------------------------------------+ --
|
397 |
|
|
-- | | rgs | rgt | rgd | smt | | --
|
398 |
|
|
-- +--------------------------------------------------------------+ --
|
399 |
|
|
-- I-Type Register --
|
400 |
|
|
-- +--------------------------------------------------------------+ --
|
401 |
|
|
-- | | imm | --
|
402 |
|
|
-- +--------------------------------------------------------------+ --
|
403 |
|
|
--------------------------------------------------------------------------
|
404 |
|
|
alias rgs : std_logic_vector(4 downto 0) is d.i(25 downto 21);
|
405 |
|
|
alias rgt : std_logic_vector(4 downto 0) is d.i(20 downto 16);
|
406 |
|
|
alias rgd : std_logic_vector(4 downto 0) is d.i(15 downto 11);
|
407 |
|
|
alias imm : std_logic_vector(15 downto 0) is d.i(15 downto 0);
|
408 |
|
|
alias smt : std_logic_vector(4 downto 0) is d.i(10 downto 6);
|
409 |
|
|
begin
|
410 |
|
|
v := e;
|
411 |
|
|
|
412 |
|
|
-- Choose the write back register.
|
413 |
|
|
case d.ec.wbr is
|
414 |
|
|
when RD => v.rd := rgd;
|
415 |
|
|
when RT => v.rd := rgt;
|
416 |
|
|
when RA => v.rd := b"11111";
|
417 |
|
|
end case;
|
418 |
|
|
|
419 |
|
|
--------------------------------------------------------------------------
|
420 |
|
|
-- Forwarding --
|
421 |
|
|
--------------------------------------------------------------------------
|
422 |
|
|
-- In a pipeline there can be data that has not been written into the --
|
423 |
|
|
-- GPR registers yet. For example see: --
|
424 |
|
|
-- --
|
425 |
|
|
-- addu $t0, $t1, $t2 -+ --
|
426 |
|
|
-- sll $t3, $t0, 4 -+- $t0 --
|
427 |
|
|
-- --
|
428 |
|
|
-- Here GPR $t0 is not up to date, since the instruction addu --
|
429 |
|
|
-- manipulates $t0, but is available after EX, so we choose the newer --
|
430 |
|
|
-- data from after the EX stage instead of the GPR data. --
|
431 |
|
|
-- However, if we load data and use it in the next instruction, we get --
|
432 |
|
|
-- the wrong result. Loaded data is available two cycles after the load --
|
433 |
|
|
-- instruction. The compiler solves this problem, since it inserts an --
|
434 |
|
|
-- independend instruction or a NOP operation. --
|
435 |
|
|
-- Other problems arise when: --
|
436 |
|
|
-- --
|
437 |
|
|
-- ori $t0, $s1, 3 -+ --
|
438 |
|
|
-- sw $s2, 4($sp) | --
|
439 |
|
|
-- subu $s2, $t0, $s3 -+- $t0 --
|
440 |
|
|
-- --
|
441 |
|
|
-- In the second example, the updated data is available after the ME --
|
442 |
|
|
-- stage. Up untill now we considered the situation from the viewpoint --
|
443 |
|
|
-- of the EX stage. --
|
444 |
|
|
-- --
|
445 |
|
|
-- andi $t0, $t0, 1 -+ --
|
446 |
|
|
-- lui $s1, 0xf4 | --
|
447 |
|
|
-- lw $s2, 0($sp) | --
|
448 |
|
|
-- sra $t1, $t0, 3 -+- $t0 --
|
449 |
|
|
-- --
|
450 |
|
|
-- The third example shows a problem that arrises one cycle earlier. --
|
451 |
|
|
-- When we read the register contents. The data to be written is --
|
452 |
|
|
-- present as well but not available yet. One solution might be, to --
|
453 |
|
|
-- prefer write before read operations. the other way suggests to store --
|
454 |
|
|
-- the write back data, address and write enable flag one more cycle. --
|
455 |
|
|
-- We can then decide in the EX stage again if the data is more recent. --
|
456 |
|
|
--------------------------------------------------------------------------
|
457 |
|
|
fa := rs_o;
|
458 |
|
|
fb := rt_o;
|
459 |
|
|
|
460 |
|
|
-- Forward from Write Back Stage (Data stored in DE Stage).
|
461 |
|
|
if (d.rd /= "00000") and (d.dc.we = '1') then
|
462 |
|
|
if rgs = d.rd then fa := d.res; end if;
|
463 |
|
|
if rgt = d.rd then fb := d.res; end if;
|
464 |
|
|
end if;
|
465 |
|
|
|
466 |
|
|
-- Forward from Memory Stage.
|
467 |
|
|
if (m.rd /= "00000") and (m.wc.we = '1') then
|
468 |
|
|
if rgs = m.rd then fa := m.res; end if;
|
469 |
|
|
if rgt = m.rd then fb := m.res; end if;
|
470 |
|
|
end if;
|
471 |
|
|
|
472 |
|
|
-- Forward from Execution Stage.
|
473 |
|
|
if (e.rd /= "00000") and (e.wc.we = '1') then
|
474 |
|
|
if rgs = e.rd then fa := e.res; end if;
|
475 |
|
|
if rgt = e.rd then fb := e.res; end if;
|
476 |
|
|
end if;
|
477 |
|
|
|
478 |
|
|
--------------------------------------------------------------------------
|
479 |
|
|
-- ALU Source Choice --
|
480 |
|
|
--------------------------------------------------------------------------
|
481 |
|
|
-- SH_CONST: Constant shift amount (SLL, SRL, SRA). --
|
482 |
|
|
-- SH_16: Shift 16-bit (LUI). --
|
483 |
|
|
-- ADD_4: Plus 4. --
|
484 |
|
|
-- REG: Forwarded RS register value. [Forwarding] --
|
485 |
|
|
--------------------------------------------------------------------------
|
486 |
|
|
case d.ec.alu.src.a is
|
487 |
|
|
when SH_CONST => a := zext(smt, 32);
|
488 |
|
|
when SH_16 => a := zext(b"10000", 32);
|
489 |
|
|
when ADD_4 => a := zext(b"00100", 32);
|
490 |
|
|
when REG => a := fa;
|
491 |
|
|
end case;
|
492 |
|
|
|
493 |
|
|
--------------------------------------------------------------------------
|
494 |
|
|
-- SIGN: Sign extend 16-bit immediate value according to MSB. --
|
495 |
|
|
-- ZERO: Zero extend 16-bit immediate value. --
|
496 |
|
|
-- PC: Current PC value. --
|
497 |
|
|
-- REG: Forwarded RT register value. [Forwarding] --
|
498 |
|
|
--------------------------------------------------------------------------
|
499 |
|
|
case d.ec.alu.src.b is
|
500 |
|
|
when SIGN => b := sext(imm, 32);
|
501 |
|
|
when ZERO => b := zext(imm, 32);
|
502 |
|
|
when PC => b := std_logic_vector(f.pc) & "00";
|
503 |
|
|
when REG => b := fb;
|
504 |
|
|
end case;
|
505 |
|
|
|
506 |
|
|
--------------------------------------------------------------------------
|
507 |
|
|
-- ALU --
|
508 |
|
|
--------------------------------------------------------------------------
|
509 |
|
|
-- IMPROVE: optimze SLT,SLTU.
|
510 |
|
|
intr_iec <= b(0); -- IEc: Interrupt enable current. [INTR]
|
511 |
|
|
intr_iep <= b(2); -- IEp: Interrupt enable previous. [INTR]
|
512 |
|
|
intr_ieo <= b(4); -- IEo: Interrupt enable old. [INTR]
|
513 |
|
|
intr_im <= b(15 downto 8); -- Interrupt mask. [INTR]
|
514 |
|
|
|
515 |
|
|
atmp := addsub(a, b, d.ec.alu.op);
|
516 |
|
|
v.res := (others => '0');
|
517 |
|
|
|
518 |
|
|
case d.ec.alu.op is
|
519 |
|
|
when ADD | ADDU |
|
520 |
|
|
SUB | SUBU => v.res := atmp;
|
521 |
|
|
when SLT => v.res := fslt(a, b);
|
522 |
|
|
when SLTU => v.res := fsltu(a, b);
|
523 |
|
|
when SLL0 | SLLV => v.res := fsll(b, a(4 downto 0));
|
524 |
|
|
when SRA0 | SRAV => v.res := fsra(b, a(4 downto 0));
|
525 |
|
|
when SRL0 | SRLV => v.res := fsrl(b, a(4 downto 0));
|
526 |
|
|
when AND0 => v.res := a and b;
|
527 |
|
|
when OR0 => v.res := a or b;
|
528 |
|
|
when NOR0 => v.res := a nor b;
|
529 |
|
|
when XOR0 => v.res := a xor b;
|
530 |
|
|
when JALR | JR =>
|
531 |
|
|
when MFCP0 =>
|
532 |
|
|
case cp0reg(rgd) is
|
533 |
|
|
when SR => v.res := get_sr(cp);
|
534 |
|
|
when CAUSE => v.res(15 downto 8) := ci.irq;
|
535 |
|
|
when EPC => v.res := std_logic_vector(cp.epc) & "00";
|
536 |
|
|
when ERR =>
|
537 |
|
|
end case;
|
538 |
|
|
when MTCP0 => -- MTCP0 and RFE operations are handled at the
|
539 |
|
|
when RFE => -- IF stage.
|
540 |
|
|
when ERR =>
|
541 |
|
|
end case;
|
542 |
|
|
|
543 |
|
|
--------------------------------------------------------------------------
|
544 |
|
|
-- Branch/Jump --
|
545 |
|
|
--------------------------------------------------------------------------
|
546 |
|
|
-- IMPROVE: move jump to DE stage.
|
547 |
|
|
if fa = fb then equ := '1'; else equ := '0'; end if;
|
548 |
|
|
if fa = x"00000000" then eqz := '1'; else eqz := '0'; end if;
|
549 |
|
|
|
550 |
|
|
case d.ec.jmp.op is
|
551 |
|
|
when NOP => v.f.jmp := '0';
|
552 |
|
|
when JMP => v.f.jmp := '1';
|
553 |
|
|
when EQ => v.f.jmp := equ;
|
554 |
|
|
when NEQ => v.f.jmp := not equ;
|
555 |
|
|
when LTZ => v.f.jmp := fa(31);
|
556 |
|
|
when GTZ => v.f.jmp := not (fa(31) or eqz);
|
557 |
|
|
when LEZ => v.f.jmp := fa(31) or eqz;
|
558 |
|
|
when GEZ => v.f.jmp := not fa(31);
|
559 |
|
|
end case;
|
560 |
|
|
|
561 |
|
|
case d.ec.jmp.src is
|
562 |
|
|
when REG => v.f.j := unsigned(fa(31 downto 2));
|
563 |
|
|
when JMP => v.f.j := f.pc(31 downto 28) & unsigned(d.i);
|
564 |
|
|
when BRA => v.f.j := unsigned(to_integer(f.pc) + signed(sext(imm,30)));
|
565 |
|
|
end case;
|
566 |
|
|
|
567 |
|
|
v.mc := d.mc;
|
568 |
|
|
v.wc := d.wc;
|
569 |
|
|
v.str := fb;
|
570 |
|
|
ein <= v;
|
571 |
|
|
end process;
|
572 |
|
|
|
573 |
|
|
|
574 |
|
|
|
575 |
|
|
-----------------------------------------------------------------------------
|
576 |
|
|
-- MEMORY STAGE --
|
577 |
|
|
-----------------------------------------------------------------------------
|
578 |
|
|
me : process(ci.dat, e, e.mc, e.mc.mem, e.wc, m, m.wc)
|
579 |
|
|
variable v : me_t;
|
580 |
|
|
variable dat : std_logic_vector(31 downto 0); -- Fetched memory data
|
581 |
|
|
begin
|
582 |
|
|
v := m;
|
583 |
|
|
|
584 |
|
|
co.we <= e.mc.mem.we;
|
585 |
|
|
co.dadr <= e.res;
|
586 |
|
|
co.dat <= e.str;
|
587 |
|
|
|
588 |
|
|
--------------------------------------------------------------------------
|
589 |
|
|
-- Address Decode --
|
590 |
|
|
--------------------------------------------------------------------------
|
591 |
|
|
-- Translate lower address bits to Wishbone Bus selection scheme. --
|
592 |
|
|
--------------------------------------------------------------------------
|
593 |
|
|
case e.mc.mem.byt is
|
594 |
|
|
when NONE => co.sel <= "0000";
|
595 |
|
|
when BYTE =>
|
596 |
|
|
case e.res(1 downto 0) is
|
597 |
|
|
when "00" => co.sel <= "1000";
|
598 |
|
|
when "01" => co.sel <= "0100";
|
599 |
|
|
when "10" => co.sel <= "0010";
|
600 |
|
|
when "11" => co.sel <= "0001";
|
601 |
|
|
when others => co.sel <= "0000";
|
602 |
|
|
end case;
|
603 |
|
|
when HALF =>
|
604 |
|
|
case e.res(1) is
|
605 |
|
|
when '0' => co.sel <= "1100";
|
606 |
|
|
when '1' => co.sel <= "0011";
|
607 |
|
|
when others => co.sel <= "0000";
|
608 |
|
|
end case;
|
609 |
|
|
when WORD => co.sel <= "1111";
|
610 |
|
|
end case;
|
611 |
|
|
|
612 |
|
|
--------------------------------------------------------------------------
|
613 |
|
|
-- Data Extension --
|
614 |
|
|
--------------------------------------------------------------------------
|
615 |
|
|
-- Fetched data can be extended with zeros or according to its MSB. --
|
616 |
|
|
--------------------------------------------------------------------------
|
617 |
|
|
case e.mc.mem.byt is
|
618 |
|
|
when NONE => dat := (others => '0'); -- AREA: (others => '-');
|
619 |
|
|
when BYTE =>
|
620 |
|
|
case e.mc.mem.ext is
|
621 |
|
|
when ZERO => dat := zext(ci.dat(7 downto 0), 32);
|
622 |
|
|
when SIGN => dat := sext(ci.dat(7 downto 0), 32);
|
623 |
|
|
end case;
|
624 |
|
|
when HALF =>
|
625 |
|
|
case e.mc.mem.ext is
|
626 |
|
|
when ZERO => dat := zext(ci.dat(15 downto 0), 32);
|
627 |
|
|
when SIGN => dat := sext(ci.dat(15 downto 0), 32);
|
628 |
|
|
end case;
|
629 |
|
|
when WORD => dat := ci.dat;
|
630 |
|
|
end case;
|
631 |
|
|
|
632 |
|
|
case e.mc.src is
|
633 |
|
|
when ALU => v.res := e.res; -- Take either the result of the ALU
|
634 |
|
|
when MEM => v.res := dat; -- or the loaded data from memory.
|
635 |
|
|
end case;
|
636 |
|
|
|
637 |
|
|
v.wc := e.wc;
|
638 |
|
|
v.rd := e.rd;
|
639 |
|
|
min <= v;
|
640 |
|
|
end process;
|
641 |
|
|
|
642 |
|
|
|
643 |
|
|
|
644 |
|
|
-----------------------------------------------------------------------------
|
645 |
|
|
-- REGISTERS --
|
646 |
|
|
-----------------------------------------------------------------------------
|
647 |
|
|
reg : process(ci.clk)
|
648 |
|
|
begin
|
649 |
|
|
if rising_edge(ci.clk) then
|
650 |
|
|
if ci.hld = '0' then
|
651 |
|
|
f <= fin; -- IF
|
652 |
|
|
d <= din; -- DE
|
653 |
|
|
e <= ein; -- EX
|
654 |
|
|
m <= min; -- ME
|
655 |
|
|
cp <= cpin; -- CP0
|
656 |
|
|
|
657 |
|
|
--------------------------------------------------------------------
|
658 |
|
|
-- Branch Correction --
|
659 |
|
|
--------------------------------------------------------------------
|
660 |
|
|
-- The simplest way one can think of, is to consider every branch --
|
661 |
|
|
-- or jump to be NOT taken and to load instructions in sequence. --
|
662 |
|
|
-- If we actually do jump, we already loaded an incorrect --
|
663 |
|
|
-- instruction in the IF stage. To anihilate the effects of this --
|
664 |
|
|
-- instruction, we will clear the DE stage one cycle later. --
|
665 |
|
|
-- [fcpu.clear(v_i : de_t)] --
|
666 |
|
|
--------------------------------------------------------------------
|
667 |
|
|
if e.f.jmp = '1' then d <= clear(d); end if;
|
668 |
|
|
|
669 |
|
|
-- Set Branch Delay Delay slot flag.
|
670 |
|
|
intr_bdd <= intr_bd;
|
671 |
|
|
end if;
|
672 |
|
|
|
673 |
|
|
-- On reset clear all relevant control signals.
|
674 |
|
|
if ci.rst = '1' then
|
675 |
|
|
f <= clear(f); -- IF
|
676 |
|
|
d <= clear(d); -- DE
|
677 |
|
|
e <= clear(e); -- EX
|
678 |
|
|
m <= clear(m); -- ME
|
679 |
|
|
cp <= clear(cp); -- CP0
|
680 |
|
|
end if;
|
681 |
|
|
end if;
|
682 |
|
|
end process;
|
683 |
|
|
end rtl;
|