OpenCores
URL https://opencores.org/ocsvn/ion/ion/trunk

Subversion Repositories ion

[/] [ion/] [trunk/] [vhdl/] [mips_cpu.vhdl] - Rev 14

Go to most recent revision | Compare with Previous | Blame | View Log

--------------------------------------------------------------------------------
-- ion_cpu.vhdl -- MIPS-I(tm) compatible CPU core
--------------------------------------------------------------------------------
-- project:       ION (http://www.opencores.org/project,ion_cpu)
-- author:        Jose A. Ruiz (ja_rd@hotmail.com)
-- created:       Jan/11/2011
-- last modified: Jan/25/2011 (ja_rd@hotmail.com)
--------------------------------------------------------------------------------
-- Software placed into the public domain by the author. Use under the terms of
-- the GPL.
-- Software 'as is' without warranty.  Author liable for nothing.
--------------------------------------------------------------------------------
-- NOTE: exceptions only partially implemented; jumps, loads and stores are
-- not aborted.
-- 
--
--------------------------------------------------------------------------------
--### PLASMA features not implemented yet
--  # MUL/DIV
--
--### MIPS-I things not implemented
--  # Invalid instruction detection
--  # Kernel/user status
--  # RTE instruction
--  # Most of the CP0 registers and of course all of the CP1
--  # External interrupts
--
--### Things implemented but not tested
--  # Syscall instruction (does a jal to 0x3c and that's it)
--  # Memory pause input
--
--### Things with provisional implementation
-- 
-- 1.- Load interlocks: the pipeline is stalled for every load instruction, even
--     if the target register is not used in the following instruction. So that
--     every load takes two cycles.
--     The interlock logic should check register indices.
--
-- 2.- Invalid instructions are not detected as such. Their behaviour is
--     undefined and inpredictable.
--     Invalid instructions should trigger an exception or at least just NOP.
--     This is closely related to privilege level so it will have to wait.
--------------------------------------------------------------------------------
 
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;
use work.mips_pkg.all;
 
entity mips_cpu is
    generic(
        XILINX_REGBANK  : string  := "distributed" -- {distributed|block}
    );
    port(
        clk             : in std_logic;
        reset           : in std_logic;
        interrupt       : in std_logic;
 
        data_rd_addr    : out std_logic_vector(31 downto 0);
        data_rd         : in std_logic_vector(31 downto 0);
        data_rd_vma     : out std_logic;
 
        code_rd_addr    : out std_logic_vector(31 downto 2);
        code_rd         : in std_logic_vector(31 downto 0);
        code_rd_vma     : out std_logic;
 
        data_wr_addr    : out std_logic_vector(31 downto 2);
        byte_we         : out std_logic_vector(3 downto 0);
        data_wr         : out std_logic_vector(31 downto 0);
 
        -- NOTE: needs to be synchronous to clk
        mem_wait        : in std_logic
    );
end; --entity mips_cpu
 
architecture rtl of mips_cpu is
 
--------------------------------------------------------------------------------
-- Pipeline stage 0
 
signal p0_pc_reg :          t_pc;
signal p0_pc_restart :      t_pc;
signal p0_pc_incremented :  t_pc;
signal p0_pc_jump :         t_pc;
signal p0_pc_branch :       t_pc;
signal p0_pc_target :       t_pc;
signal p0_pc_next :         t_pc;
signal p0_rs_num :          t_regnum;
signal p0_rt_num :          t_regnum;
signal p0_jump_cond_value : std_logic;
signal p0_rbank_rs_hazard : std_logic;
signal p0_rbank_rt_hazard : std_logic;
 
--------------------------------------------------------------------------------
-- Pipeline stage 1
 
 
signal p1_rbank :           t_rbank := (others => X"00000000");
 
-- IMPORTANT: This attribute is used by Xilinx tools to select how to implement
-- the register bank. If we don't use it, by default XST would infer 2 BRAMs for
-- the 1024-bit 3-port reg bank, which you probably don't want.
-- This can take the values {distributed|block}.
attribute ram_style :       string;
attribute ram_style of p1_rbank : signal is "distributed";
 
signal p1_rs, p1_rt :       t_word;
signal p1_rs_rbank :        t_word;
signal p1_rt_rbank :        t_word;
signal p1_rbank_forward :   t_word;
signal p1_rd_num :          t_regnum;
signal p1_rbank_wr_addr :   t_regnum;
signal p1_rbank_we :        std_logic;
signal p1_rbank_wr_data :   t_word;
signal p1_alu_inp1 :        t_word;
signal p1_alu_inp2 :        t_word;
signal p1_alu_outp :        t_word;
-- ALU control inputs (shortened name for brevity in expressions)
signal p1_ac :              t_alu_control;
-- ALU flag outputs (comparison results)
signal p1_alu_flags :       t_alu_flags;
-- immediate data, sign- or zero-extended as required by IR
signal p1_data_imm :        t_word;
signal p1_branch_offset :   t_pc;
signal p1_branch_offset_sex:std_logic_vector(31 downto 18);
signal p1_rbank_rs_hazard : std_logic;
signal p1_rbank_rt_hazard : std_logic;
signal p1_jump_type_set0 :  std_logic_vector(1 downto 0);
signal p1_jump_type_set1 :  std_logic_vector(1 downto 0);
signal p1_ir_reg :          std_logic_vector(31 downto 0);
signal p1_ir_op :           std_logic_vector(31 downto 26);
signal p1_ir_fn :           std_logic_vector(5 downto 0);
signal p1_op_special :      std_logic;
signal p1_exception :       std_logic;
signal p1_do_reg_jump :     std_logic;
signal p1_do_zero_ext_imm : std_logic;
signal p1_set_cp0 :         std_logic;
signal p1_get_cp0 :         std_logic;
signal p1_load_hi :         std_logic;
signal p1_load_lo :         std_logic;
signal p1_alu_op2_sel :     std_logic_vector(1 downto 0);
signal p1_alu_op2_sel_set0: std_logic_vector(1 downto 0);
signal p1_alu_op2_sel_set1: std_logic_vector(1 downto 0);
signal p1_do_load :         std_logic;
signal p1_do_store :        std_logic;
signal p1_store_size :      std_logic_vector(1 downto 0);
signal p1_we_control :      std_logic_vector(5 downto 0);
signal p1_load_alu :        std_logic;
signal p1_load_alu_set0 :   std_logic;
signal p1_load_alu_set1 :   std_logic;
signal p1_ld_upper_hword :  std_logic;
signal p1_ld_upper_byte :   std_logic;
signal p1_ld_unsigned :     std_logic;
signal p1_jump_type :       std_logic_vector(1 downto 0);
signal p1_link :            std_logic;
signal p1_jump_cond_sel :   std_logic_vector(2 downto 0);
signal p1_data_addr :       t_addr;
signal p1_data_offset :     t_addr;
 
signal p1_muldiv_result :   t_word;
signal p1_muldiv_func :     t_mult_function; 
signal p1_muldiv_running :  std_logic;
signal p1_muldiv_started :  std_logic;
signal p1_muldiv_stall :    std_logic;
 
 
--------------------------------------------------------------------------------
-- Pipeline stage 2
 
signal p2_muldiv_started :  std_logic;
signal p2_exception :       std_logic;
signal p2_rd_addr :         std_logic_vector(1 downto 0);
signal p2_rd_mux_control :  std_logic_vector(3 downto 0);
signal p2_load_target :     t_regnum;
signal p2_do_load :         std_logic;
signal p2_ld_upper_hword :  std_logic;
signal p2_ld_upper_byte :   std_logic;
signal p2_ld_unsigned :     std_logic;
signal p2_wback_mux_sel :   std_logic_vector(1 downto 0);
signal p2_data_word_rd :    t_word;
signal p2_data_word_ext :   std_logic;
 
--------------------------------------------------------------------------------
-- Global control signals 
 
signal load_interlock :     std_logic;
signal stall_pipeline :     std_logic;
-- pipeline is stalled for any reason
signal pipeline_stalled :   std_logic;
-- pipeline is stalled because of a load instruction interlock
signal pipeline_interlocked:std_logic;
 
--------------------------------------------------------------------------------
-- CP0 registers and signals
 
-- CP0[12]: status register 
-- FIXME status flags unimplemented
signal cp0_status :         std_logic_vector(1 downto 0);
-- Output of CP0 register bank (only a few regs are implemented)
signal cp0_reg_read :       t_word;
-- CP0[14]: EPC register (PC value saved at exceptions)
signal cp0_epc :            t_pc;
 
begin
 
--##############################################################################
-- Register bank & datapath
 
-- Register indices are 'decoded' out of the instruction word BEFORE loading IR
p0_rs_num <= std_logic_vector(code_rd(25 downto 21));
with p1_ir_reg(31 downto 26) select p1_rd_num <= 
    p1_ir_reg(15 downto 11)    when "000000",
    p1_ir_reg(20 downto 16)    when others;
 
p0_rt_num <= std_logic_vector(code_rd(20 downto 16)); -- also called rs2 in the docs
 
--------------------------------------------------------------------------------
-- Data input shifter & masker (LB,LBU,LH,LHU,LW)
 
p2_rd_mux_control <= p2_ld_upper_hword & p2_ld_upper_byte & p2_rd_addr;
 
-- Extension for unused bits will be zero or the sign (bit 7 or bit 15)
p2_data_word_ext <= '0'         when p2_ld_unsigned='1' else
                    data_rd(15)  when p2_ld_upper_byte='1' else
                    data_rd(7)   when p2_rd_addr="11" else
                    data_rd(15)  when p2_rd_addr="10" else
                    data_rd(23);
 
-- byte 0 may come from any of the 4 bytes of the input word
with p2_rd_mux_control select p2_data_word_rd(7 downto 0) <=
    data_rd(31 downto 24)        when "0000",
    data_rd(23 downto 16)        when "0001",
    data_rd(23 downto 16)        when "0100",
    data_rd(15 downto  8)        when "0010",
    data_rd( 7 downto  0)        when others;
 
-- byte 1 may come from input bytes 1 or 3 or may be extended for LB, LBU
with p2_rd_mux_control select p2_data_word_rd(15 downto 8) <=
    data_rd(31 downto 24)        when "0100",
    data_rd(15 downto  8)        when "0110",
    data_rd(15 downto  8)        when "1100",
    data_rd(15 downto  8)        when "1101",
    data_rd(15 downto  8)        when "1110",
    data_rd(15 downto  8)        when "1111",
    (others => p2_data_word_ext) when others;
 
-- bytes 2,3 come straight from input or are extended for LH,LHU
with p2_ld_upper_hword select p2_data_word_rd(31 downto 16) <=
    (others => p2_data_word_ext)    when '0',
    data_rd(31 downto 16)            when others;
 
-- Select which data is to be written back to the reg bank and where
p1_rbank_wr_addr <= p1_rd_num   when p2_do_load='0' and p1_link='0' else
                    "11111"     when p2_do_load='0' and p1_link='1' else 
                    p2_load_target;
 
p2_wback_mux_sel <= 
    "00" when p2_do_load='0' and p1_get_cp0='0' and p1_link='0' else
    "01" when p2_do_load='1' and p1_get_cp0='0' and p1_link='0' else
    "10" when p2_do_load='0' and p1_get_cp0='1' and p1_link='0' else
    "11";
 
with (p2_wback_mux_sel) select p1_rbank_wr_data <=
    p1_alu_outp                when "00",
    p2_data_word_rd            when "01",
    p0_pc_incremented & "00"   when "11",
    cp0_reg_read               when others;
 
p1_rbank_we <= '1' when (p2_do_load='1' or p1_load_alu='1' or 
                        p1_link='1' or p1_get_cp0='1') and 
                        p1_rbank_wr_addr/="00000" and
                        -- on exception, abort next instruction (by preventing 
                        -- regbank writeback).
                        p2_exception='0'
                else '0';
 
-- Register bank as triple-port RAM. Should synth to 2 BRAMs unless you use
-- synth attributes to prevent it (see 'ram_style' attribute above) or your
-- FPGA has 3-port BRAMS, or has none.
synchronous_reg_bank:
process(clk)
begin
    if clk'event and clk='1' then
        if p1_rbank_we='1' and 
           (pipeline_stalled='0' or pipeline_interlocked='1') then -- @note1
            p1_rbank(conv_integer(p1_rbank_wr_addr)) <= p1_rbank_wr_data;
        end if;
        p1_rt_rbank <= p1_rbank(conv_integer(p0_rt_num));
        p1_rs_rbank <= p1_rbank(conv_integer(p0_rs_num));
    end if;
end process synchronous_reg_bank;
 
-- Register writeback data in case it needs to be forwarded.
data_forward_register:
process(clk)
begin
    if clk'event and clk='1' then
        if p1_rbank_we='1' then -- no need to check for stall cycles
            p1_rbank_forward <= p1_rbank_wr_data;
        end if;
    end if;
end process data_forward_register;
 
-- Bypass sync RAM if we're reading and writing to the same address. This saves
-- 1 stall cycle and fixes the data hazard.
p0_rbank_rs_hazard <= '1' when p1_rbank_wr_addr=p0_rs_num and p1_rbank_we='1' 
                      else '0';
p0_rbank_rt_hazard <= '1' when p1_rbank_wr_addr=p0_rt_num and p1_rbank_we='1' 
                      else '0';
 
p1_rs <= p1_rs_rbank when p1_rbank_rs_hazard='0' else p1_rbank_forward;
p1_rt <= p1_rt_rbank when p1_rbank_rt_hazard='0' else p1_rbank_forward;
 
-- Zero extension/Sign extension for instruction immediate data
p1_data_imm(15 downto 0)  <= p1_ir_reg(15 downto 0);
 
with p1_do_zero_ext_imm select p1_data_imm(31 downto 16) <= 
    (others => '0')             when '1',
    (others => p1_ir_reg(15))   when others;
 
 
--------------------------------------------------------------------------------
-- ALU & ALU input multiplexors
 
p1_alu_inp1 <= p1_rs;
 
with p1_alu_op2_sel select p1_alu_inp2 <= 
    p1_data_imm         when "11",
    p1_muldiv_result    when "01", -- FIXME mux input wasted!
    p1_muldiv_result    when "10",
    p1_rt               when others;
 
alu_inst : entity work.mips_alu
    port map (
        clk             => clk,
        reset           => reset,
        ac              => p1_ac,
        flags           => p1_alu_flags,
 
        inp1            => p1_alu_inp1,
        inp2            => p1_alu_inp2,
        outp            => p1_alu_outp
    );
 
 
--------------------------------------------------------------------------------
-- Mul/Div block interface
 
-- Compute the mdiv block function word. If p1_muldiv_func has any value other
-- than MULT_NOTHING a new mdiv operation will start, truncating whatever other
-- operation that may have been in course.
-- So we encode here the function to be performed and make sure the value stays
-- there for only one cycle (the first ALU cycle of the mul/div instruction).
 
-- This will be '1' for all mul/div operations other than NOP...
p1_muldiv_func(3) <= '1' when p1_op_special='1' and 
                              p1_ir_fn(5 downto 4)="01" and
                              -- ...but only if the mdiv is not already running
                              p2_muldiv_started = '0' and
                              p1_muldiv_running ='0'
                      else '0';
 
-- When bit(3) is zero, the rest are zeroed too. Otherwise, they come from IR
p1_muldiv_func(2 downto 0) <= 
    p1_ir_fn(3) & p1_ir_fn(1 downto 0) when p1_muldiv_func(3)='1'
    else "000";
 
mult_div: entity work.mips_mult
    port map (
        a           => p1_rs,
        b           => p1_rt,
        c_mult      => p1_muldiv_result,
        pause_out   => p1_muldiv_running,
        mult_func   => p1_muldiv_func,
        clk         => clk,
        reset_in    => reset
    );
 
-- Active only for the 1st ALU cycle of any mul/div instruction
p1_muldiv_started <= '1' when p1_op_special='1' and 
                              p1_ir_fn(5 downto 3)="011" and
                              -- 
                              p1_muldiv_running='0'
                      else '0';
 
-- Stall the pipeline to enable mdiv operation completion.
-- We need p2_muldiv_started to distinguish the cycle before p1_muldiv_running
-- is asserted and the cycle after it deasserts.
-- Otherwise we would reexecute the same muldiv endlessly instruction after 
-- deassertion of p1_muldiv_running, since the IR was stalled and still contains 
-- the mul opcode...
p1_muldiv_stall <= '1' when
        -- Active for the cycle immediately before p1_muldiv_running asserts
        -- and NOT for the cycle after it deasserts
        (p1_muldiv_started='1' and p2_muldiv_started='0') or
        -- Active until operation is complete
        p1_muldiv_running = '1'
        else '0';
 
 
--##############################################################################
-- PC register and branch logic
 
-- p0_pc_reg will not be incremented on stall cycles
p0_pc_incremented <= p0_pc_reg + (not stall_pipeline);
 
-- main pc mux: jump or continue
p0_pc_next <= 
    p0_pc_target when
        -- We jump on jump instructions whose condition is met...
        ((p1_jump_type(1)='1' and p0_jump_cond_value='1' and 
        -- ...except we abort any jump that follows the victim of an exception
          p2_exception='0') or
        -- We jump on exceptions too...
        p1_exception='1') 
        -- ... but we only jump at all if the pipeline is not stalled
        and stall_pipeline='0'
    else p0_pc_incremented;
 
pc_register:
process(clk)
begin
    if clk'event and clk='1' then
        if reset='1' then
            -- reset to 0xffffffff so that 1st fetch addr is 0x00000000
            p0_pc_reg <= (others => '1');
        else
            -- p0_pc_reg holds the same value as external sync ram addr register
            p0_pc_reg <= p0_pc_next;
            -- p0_pc_restart = addr saved to EPC on interrupts (@note2)
            -- It's the addr of the instruction triggering the exception
            -- FIXME handle delay slot case
            if (p1_jump_type="00" or p0_jump_cond_value='0') then 
                p0_pc_restart <= p0_pc_reg;
            end if;
        end if;
    end if;
end process pc_register;
 
-- p0_pc_reg holds the same addr as the addr register of the external synchronous 
-- memory; what we put on the addr bus is p0_pc_next.
data_rd_addr <= p1_data_addr(31 downto 0);
 
-- FIXME these two need to pushed behind a register, they are glitch-prone
data_rd_vma <= p1_do_load and not pipeline_stalled; -- FIXME register
code_rd_vma <= not stall_pipeline; -- FIXME registe
 
code_rd_addr <= p0_pc_next;
 
data_wr_addr <= p1_data_addr(31 downto 2);
 
-- compute target of J/JR instructions
p0_pc_jump <=   p1_rs(31 downto 2) when p1_do_reg_jump='1' else
                p0_pc_reg(31 downto 28) & p1_ir_reg(25 downto 0); 
 
-- compute target of relative branch instructions
p1_branch_offset_sex <= (others => p1_ir_reg(15));
p1_branch_offset <= p1_branch_offset_sex & p1_ir_reg(15 downto 0);
-- p0_pc_reg is the addr of the instruction in delay slot
p0_pc_branch <= p0_pc_reg + p1_branch_offset;
 
-- decide which jump target is to be used
p0_pc_target <= X"0000003"&"11"     when p1_exception='1' else 
             p0_pc_jump             when p1_jump_type(0)='1' else 
             p0_pc_branch;
 
 
--##############################################################################
-- Instruction decoding and IR
 
instruction_register:
process(clk)
begin
    if clk'event and clk='1' then
        if reset='1' then
            p1_ir_reg <= (others => '0');
        elsif stall_pipeline='0' then
            p1_ir_reg <= code_rd;
        end if;
    end if;
end process instruction_register;
 
-- 'Extract' main fields from IR, for convenience
p1_ir_op <= p1_ir_reg(31 downto 26);
p1_ir_fn <= p1_ir_reg(5 downto 0);
 
-- Decode jump type, if any, for instructions with op/=0
with p1_ir_op select p1_jump_type_set0 <=
    -- FIXME weed out invalid instructions
    "10" when "000001", -- BLTZ, BGEZ, BLTZAL, BGTZAL
    "11" when "000010", -- J
    "11" when "000011", -- JAL
    "10" when "000100", -- BEQ
    "10" when "000101", -- BNE
    "10" when "000110", -- BLEZ
    "10" when "000111", -- BGTZ
    "00" when others;   -- no jump
 
-- Decode jump type, if any, for instructions with op=0
p1_jump_type_set1 <= "11" when p1_op_special='1' and 
                               p1_ir_reg(5 downto 1)="00100" 
                     else "00";
 
-- Decode jump type for the instruction in IR (composite of two formats)
p1_jump_type <= p1_jump_type_set0 or p1_jump_type_set1;
 
p1_link <= '1' when (p1_ir_op="000000" and p1_ir_reg(5 downto 0)="001001") or
                    (p1_ir_op="000001" and p1_ir_reg(20)='1') or
                    (p1_ir_op="000011")
           else '0';
 
-- Decode jump condition: encode a mux control signal from IR...
p1_jump_cond_sel <= 
    "001" when p1_ir_op="000001" and p1_ir_reg(16)='0' else --   op1 < 0   BLTZ*
    "101" when p1_ir_op="000001" and p1_ir_reg(16)='1' else -- !(op1 < 0) BNLTZ*
    "010" when p1_ir_op="000100" else                       --   op1 == op2  BEQ
    "110" when p1_ir_op="000101" else                       -- !(op1 == op2) BNE
    "011" when p1_ir_op="000110" else                       --   op1 <= 0   BLEZ
    "111" when p1_ir_op="000111" else                       -- !(op1 <= 0)  BGTZ
    "000";                                                  -- always
 
-- ... and use mux control signal to select the condition value
with p1_jump_cond_sel select p0_jump_cond_value <=
        p1_alu_flags.inp1_lt_zero       when "001",
    not p1_alu_flags.inp1_lt_zero       when "101",
        p1_alu_flags.inp1_eq_inp2       when "010",
    not p1_alu_flags.inp1_eq_inp2       when "110",
        (p1_alu_flags.inp1_lt_inp2 or 
         p1_alu_flags.inp1_eq_inp2)     when "011",
    not (p1_alu_flags.inp1_lt_inp2 or 
         p1_alu_flags.inp1_eq_inp2)     when "111",
    '1'                                 when others;
 
-- Decode instructions that launch exceptions
p1_exception <= '1' when p1_op_special='1' and p1_ir_reg(5 downto 1)="00110" 
                else '0';
 
-- Decode MTC0/MFC0 instructions
p1_set_cp0 <= '1' when p1_ir_reg(31 downto 21)="01000000100" else '0';
p1_get_cp0 <= '1' when p1_ir_reg(31 downto 21)="01000000000" else '0';
 
-- FIXME elaborate and explain this
 
p1_op_special <= '1' when p1_ir_op="000000" else '0';
 
p1_do_reg_jump <= '1' when p1_op_special='1' and p1_ir_fn(5 downto 1)="00100" else '0';
p1_do_zero_ext_imm <= '1' when (p1_ir_op(31 downto 28)="0011") else '0';
 
-- Decode input data mux control (LW, LH, LB, LBU, LHU) and load enable
p1_do_load <= '1' when p1_ir_op(31 downto 29)="100" and
                       p2_exception='0'
              else '0';
 
p1_load_alu_set0 <= '1' 
    when p1_op_special='1' and 
        ((p1_ir_op(31 downto 29)="000" and p1_ir_op(27 downto 26)="00") or
         (p1_ir_op(31 downto 29)="000" and p1_ir_op(27 downto 26)="10") or
         (p1_ir_op(31 downto 29)="000" and p1_ir_op(27 downto 26)="11") or
         (p1_ir_op(31 downto 29)="000" and p1_ir_op(27 downto 26)="00") or
         (p1_ir_op(31 downto 28)="0100" and p1_ir_op(27 downto 26)="00") or
         (p1_ir_op(31 downto 28)="0100" and p1_ir_op(27 downto 26)="10") or
         (p1_ir_op(31 downto 28)="1000") or
         (p1_ir_op(31 downto 28)="1001") or
         (p1_ir_op(31 downto 28)="1010" and p1_ir_op(27 downto 26)="10") or
         (p1_ir_op(31 downto 28)="1010" and p1_ir_op(27 downto 26)="11") or
         (p1_ir_op(31 downto 28)="0010" and p1_ir_op(27 downto 26)="01"))
    else '0';
 
with p1_ir_op select p1_load_alu_set1 <= 
    '1' when "001000",
    '1' when "001001",
    '1' when "001010",
    '1' when "001011",
    '1' when "001100",
    '1' when "001101",
    '1' when "001110",
    '1' when "001111",
    -- FIXME a few others missing: MFC0, etc
    '0' when others;
p1_load_alu <= p1_load_alu_set0 or p1_load_alu_set1;
 
p1_ld_upper_hword <= p1_ir_op(27); -- use input upper hword vs. sign extend/zero
p1_ld_upper_byte <= p1_ir_op(26);  -- use input upper byte vs. sign extend/zero
p1_ld_unsigned <= p1_ir_op(28);    -- sign extend vs. zero extend
 
-- ALU input-2 selection: use external data for 2x opcodes (loads)
p1_alu_op2_sel_set0 <= 
    "11" when    p1_ir_op(31 downto 30)="10" or p1_ir_op(29)='1' else 
    "00";
 
-- ALU input-2 selection: use registers Hi and Lo for MFHI, MFLO
with p1_ir_fn select p1_alu_op2_sel_set1 <=
    "01" when "010000",
    "10" when "010010",
    "00" when others;
 
-- ALU input-2 final selection
p1_alu_op2_sel <= p1_alu_op2_sel_set0 or p1_alu_op2_sel_set1;
 
-- Decode store operations
p1_do_store <= '1' when p1_ir_op(31 downto 29)="101" else '0';
p1_store_size <= p1_ir_op(27 downto 26);
 
 
-- Decode load enables for Hi and Lo registers (MTHI and MTLO)
p1_load_hi <= '1' when p1_op_special='1' and p1_ir_fn="010001" else '0';
p1_load_lo <= '1' when p1_op_special='1' and p1_ir_fn="010011" else '0';
 
-- Decode ALU control dignals
 
p1_ac.use_slt <= '1' when (p1_ir_op="000001" and p1_ir_reg(20 downto 17)="01000") or
                        (p1_ir_op="000000" and p1_ir_reg(5 downto 1)="10101") or
                        p1_ir_op="001010" or p1_ir_op="001011"
               else '0';
p1_ac.arith_unsigned <= p1_ac.use_slt and p1_ir_reg(0);
 
p1_ac.use_logic(0) <= '1' when (p1_op_special='1' and p1_ir_fn(5 downto 3)/="000") or
                    -- all immediate arith and logic
                    p1_ir_op(31 downto 29)="001"
                 else '0';
p1_ac.use_logic(1) <= '1' when (p1_op_special='1' and p1_ir_fn="100111") else '0';
 
p1_ac.use_arith <= '1' when p1_ir_op(31 downto 28)="0010" or 
                            (p1_op_special='1' and 
                                (p1_ir_fn(5 downto 2)="1000" or
                                p1_ir_fn(5 downto 2)="1010"))
                 else '0';
 
-- selection of 2nd internal alu operand: {i2, /i2, i2<<16, 0x0}
p1_ac.neg_sel(1)<= '1' when p1_ir_op(29 downto 26) = "1111" else '0';
p1_ac.neg_sel(0)<= '1' when   p1_ir_op="001010" or 
                            p1_ir_op="001011" or
                            p1_ir_op(31 downto 28)="0001" or
                            (p1_op_special='1' and
                                (p1_ir_fn="100010" or
                                 p1_ir_fn="100011" or
                                 p1_ir_fn(5 downto 2)="1010"))
                 else '0';
p1_ac.cy_in <= p1_ac.neg_sel(0);
 
p1_ac.shift_sel <= p1_ir_fn(1 downto 0);
 
p1_ac.logic_sel <= "00" when (p1_op_special='1' and p1_ir_fn="100100") else
                 "01" when (p1_op_special='1' and p1_ir_fn="100101") else
                 "10" when (p1_op_special='1' and p1_ir_fn="100110") else
                 "01" when (p1_op_special='1' and p1_ir_fn="100111") else
                 "00" when (p1_ir_op="001100") else
                 "01" when (p1_ir_op="001101") else
                 "10" when (p1_ir_op="001110") else
                 "11";
 
p1_ac.shift_amount <= p1_ir_reg(10 downto 6) when p1_ir_fn(2)='0' else p1_rs(4 downto 0);
 
--------------------------------------------------------------------------------
 
-- Stage 1 pipeline register. Involved in ALU control.
pipeline_stage1_register:
process(clk)
begin
    if clk'event and clk='1' then
        if reset='1' then
            p1_rbank_rs_hazard <= '0';
            p1_rbank_rt_hazard <= '0';
        elsif stall_pipeline='0' then
            p1_rbank_rs_hazard <= p0_rbank_rs_hazard;
            p1_rbank_rt_hazard <= p0_rbank_rt_hazard;
        end if;
    end if;
end process pipeline_stage1_register;
 
pipeline_stage1_register2:
process(clk)
begin
    if clk'event and clk='1' then
        if reset='1' then
            p2_muldiv_started <= '0';
        else
            p2_muldiv_started <= p1_muldiv_running;
        end if;
    end if;
end process pipeline_stage1_register2;
 
 
-- Stage 2 pipeline register. Split in two for convenience.
-- This register deals with two kinds of stalls:
-- * When the pipeline stalls because of a load interlock, this register is 
--   allowed to update so that the load operation can complete while the rest of
--   the pipeline is frozen.
-- * When the stall is caused by any other reason, this register freezes with 
--   the rest of the machine.
 
-- Part of stage 2 register that controls load operation
pipeline_stage2_register_load_control:
process(clk)
begin
    if clk'event and clk='1' then
        -- Clear load control, effectively preventing load, at reset or if
        -- the previous instruction raised an exception.
        if reset='1' or p2_exception='1' then
            p2_do_load <= '0';
            p2_ld_upper_hword <= '0';
            p2_ld_upper_byte <= '0';
            p2_ld_unsigned <= '0';
            p2_load_target <= "00000";
 
        -- Load signals from previous stage only if there is no pipeline stall
        -- unless the stall is caused by interlock (@note1).
        elsif (stall_pipeline='0' or load_interlock='1') then
            -- Disable reg bank writeback if pipeline is stalled; this prevents
            -- duplicate writes in case the stall is a mem_wait.
            if pipeline_stalled='0' then
                p2_do_load <= p1_do_load;
            else
                p2_do_load <= '0';
            end if;
            p2_load_target <= p1_rd_num;
            p2_ld_upper_hword <= p1_ld_upper_hword;
            p2_ld_upper_byte <= p1_ld_upper_byte;
            p2_ld_unsigned <= p1_ld_unsigned;
        end if;
    end if;
end process pipeline_stage2_register_load_control;
 
-- All the rest of the stage 2 register
pipeline_stage2_register_others:
process(clk)
begin
    if clk'event and clk='1' then
        if reset='1' then
            p2_exception <= '0';
 
        -- Load signals from previous stage only if there is no pipeline stall
        -- unless the stall is caused by interlock (@note1).
        elsif (stall_pipeline='0' or load_interlock='1') then
            p2_rd_addr <= p1_data_addr(1 downto 0);
            p2_exception <= p1_exception;
        end if;
    end if;
end process pipeline_stage2_register_others;
 
--------------------------------------------------------------------------------
 
-- FIXME stall when needed: mem pause, mdiv pause and load interlock
 
pipeline_stall_registers:
process(clk)
begin
    if clk'event and clk='1' then
        if reset='1' then
            pipeline_stalled <= '0';
            pipeline_interlocked <= '0';
        else
            if stall_pipeline='1' then
                pipeline_stalled <= '1';
            else
                pipeline_stalled <= '0';
            end if;
            if load_interlock='1' then
                pipeline_interlocked <= '1';
            else
                pipeline_interlocked <= '0';
            end if;
        end if;
    end if;
end process pipeline_stall_registers;
 
-- FIXME make sure this combinational will not have bad glitches
stall_pipeline <= mem_wait or load_interlock or p1_muldiv_stall;
 
 
-- FIXME load interlock should happen only if the instruction following 
-- the load actually uses the load target register. Something like this:
-- (p1_do_load='1' and (p1_rd_num=p0_rs_num or p1_rd_num=p0_rt_num))
load_interlock <= '1' when (p1_do_load='1' and pipeline_stalled='0') else '0';
 
--------------------------------------------------------------------------------
 
p1_data_offset(31 downto 16) <= (others => p1_data_imm(15));
p1_data_offset(15 downto 0) <= p1_data_imm(15 downto 0);
 
p1_data_addr <= p1_rs + p1_data_offset;
 
--------------------------------------------------------------------------------
 
-- byte_we is a function of the write size and alignment
-- size = {00=1,01=2,11=4}; we 3 is MSB, 0 is LSB; big endian => 00 is msb
p1_we_control <= pipeline_stalled & p1_do_store & p1_store_size & p1_data_addr(1 downto 0);
 
with p1_we_control select byte_we <=
    "1000"  when "010000",    -- SB %0
    "0100"  when "010001",    -- SB %1
    "0010"  when "010010",    -- SB %2
    "0001"  when "010011",    -- SB %3
    "1100"  when "010100",    -- SH %0
    "0011"  when "010110",    -- SH %2
    "1111"  when "011100",    -- SW %4
    "0000"  when others; -- all other combinations are spurious so don't write
 
-- Data to be stored always comes straight from the reg bank, but it needs to 
-- be shifted so that the LSB is aligned to the write address:
 
data_wr(7 downto 0) <= p1_rt(7 downto 0);
 
with p1_we_control select data_wr(15 downto 8) <= 
    p1_rt( 7 downto  0) when "010010",  -- SB %2
    p1_rt(15 downto  8) when others;
 
with p1_we_control select data_wr(23 downto 16) <= 
    p1_rt( 7 downto  0) when "010001",  -- SB %1
    p1_rt( 7 downto  0) when "010100",  -- SH %0
    p1_rt(23 downto 16) when others;
 
with p1_we_control select data_wr(31 downto 24) <= 
    p1_rt( 7 downto  0) when "010000",  -- SB %0
    p1_rt(15 downto  8) when "010100",  -- SH %0
    p1_rt(31 downto 24) when others;
 
 
--##############################################################################
-- CP0 (what little is implemented of it)
 
process(clk)
begin
    if clk'event and clk='1' then
        if reset='1' then
            -- "10" => mode=kernel; ints=disabled
            cp0_status <= "10";
        else
            -- no need to check for stall cycles when loading these
            if p1_set_cp0='1' then
                -- FIXME check for CP0 reg index
                cp0_status <= p1_rs(cp0_status'high downto 0);
            end if;
            if p1_exception='1' then
                cp0_epc <= p0_pc_restart;
            end if;
        end if;
    end if;
end process;
 
-- FIXME the mux should mask to zero for any unused reg index
cp0_reg_read <= X"0000000" & "00" & cp0_status when p1_rd_num="01100" else
                cp0_epc & "00";
 
end architecture rtl;
 
--------------------------------------------------------------------------------
-- Implementation notes
--------------------------------------------------------------------------------
-- @note1 : 
--
-- This is the meaning of these two signals:
-- pipeline_stalled & pipeline_interlocked =>
--  "00" => normal state
--  "01" => normal state (makes for easier decoding)
--  "10" => all stages of pipeline stalled, including rbank
--  "11" => all stages of pipeline stalled, except reg bank write port
-- 
-- Just to clarify, 'stage X stalled' here means that the registers named 
-- pX_* don't load.
--
-- The register bank WE is enabled when the pipeline is not stalled and when 
-- it is stalled because of a load interlock; so that in case of interlock the
-- load operation can complete while the rest of the pipeline is frozen.
--------------------------------------------------------------------------------
 

Go to most recent revision | Compare with Previous | Blame | View Log

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.