URL
https://opencores.org/ocsvn/rv01_riscv_core/rv01_riscv_core/trunk
Subversion Repositories rv01_riscv_core
[/] [rv01_riscv_core/] [trunk/] [VHDL/] [RV01_jrpu.vhd] - Rev 2
Compare with Previous | Blame | View Log
----------------------------------------------------------------- -- -- ----------------------------------------------------------------- -- -- -- Copyright (C) 2017 Stefano Tonello -- -- -- -- This source file may be used and distributed without -- -- restriction provided that this copyright statement is not -- -- removed from the file and that any derivative work contains -- -- the original copyright notice and the associated disclaimer.-- -- -- -- THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY -- -- EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -- -- TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -- -- FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR -- -- OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -- -- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -- -- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE -- -- GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -- -- BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -- -- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -- -- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT -- -- OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -- -- POSSIBILITY OF SUCH DAMAGE. -- -- -- ----------------------------------------------------------------- --------------------------------------------------------------- -- RV01 JALR Prediction Unit --------------------------------------------------------------- library IEEE; use IEEE.std_logic_1164.all; use IEEE.numeric_std.all; library work; use work.RV01_CONSTS_PKG.all; use work.RV01_TYPES_PKG.all; use work.RV01_FUNCS_PKG.all; use work.RV01_IDEC_PKG.all; use work.RV01_OP_PKG.all; entity RV01_JRPU is generic( RAS_DEPTH : natural := 4; JRVQ_DEPTH : natural := 2; PXE : std_logic := '1'; NW : natural := 2 ); port( CLK_i : in std_logic; RST_i : in std_logic; CLR_i : in std_logic; KLL1_i : in std_logic; FSTLL_i : in std_logic; BJX_i : in std_logic; -- prediction inputs INSTR_i : in std_logic_vector(ILEN*2-1 downto 0); IF2_V_i : in std_logic_vector(NW-1 downto 0); IF2_INSTR_i : in DEC_INSTR_VEC_T(NW-1 downto 0); IF2_PC_i : in ADR_VEC_T(NW-1 downto 0); -- verification inputs IX1_V_i : in std_logic_vector(NW-1 downto 0); IX1_INSTR_i : in DEC_INSTR_VEC_T(NW-1 downto 0); IX1_OPA0_i : SDWORD_T; IX1_OPA1_i : SDWORD_T; IX1_PCP4_i : ADR_VEC_T(NW-1 downto 0); -- RAS management IX3_V_i : in std_logic_vector(NW-1 downto 0); IX3_INSTR_i : in DEC_INSTR_VEC_T(NW-1 downto 0); IX3_PCP4_i : ADR_VEC_T(NW-1 downto 0); KLL1_o : out std_logic; PJRX_o : out std_logic; PJRTA_o : out ADR_T; MPJRX_o : out std_logic_vector(NW-1 downto 0) ); end RV01_JRPU; architecture ARC of RV01_JRPU is component RV01_STACK is generic( DEPTH : natural := 4; WIDTH : natural := 32 ); port( CLK_i : in std_logic; RST_i : in std_logic; CLR_i : in std_logic; PUSH_i : in std_logic; POP_i : in std_logic; D_i : in std_logic_vector(WIDTH-1 downto 0); SE_o : out std_logic; SF_o : out std_logic; Q_o : out std_logic_vector(WIDTH-1 downto 0) ); end component; component RV01_QUEUE is generic( DEPTH : natural := 2; WIDTH : natural := 32 ); port( CLK_i : in std_logic; RST_i : in std_logic; CLR_i : in std_logic; RE_i : in std_logic; WE_i : in std_logic; D_i : in std_logic_vector(WIDTH-1 downto 0); QE_o : out std_logic; QF_o : out std_logic; Q_o : out std_logic_vector(WIDTH-1 downto 0) ); end component; signal CLRS : std_logic := '0'; signal RAS_PUSH,RAS_POP,RAS_FPOP_q : std_logic; signal RAS_SE,RAS_SF : std_logic; signal RAS_D,RAS_Q : std_logic_vector(SDLEN-1 downto 0); signal IX1_JRTA,PJRTA : ADR_T; signal RAS_RE,RAS_RF : std_logic; signal IF2_JAL : std_logic_vector(NW-1 downto 0); signal IF2_PJALR : std_logic_vector(NW-1 downto 0); signal IX1_JAL : std_logic_vector(NW-1 downto 0); signal IX1_NORA_JAL : std_logic_vector(NW-1 downto 0); signal IX1_JALR : std_logic_vector(NW-1 downto 0); signal IX3_JAL : std_logic_vector(NW-1 downto 0); signal IX3_NORA_JAL : std_logic_vector(NW-1 downto 0); signal IX3_JALR : std_logic_vector(NW-1 downto 0); signal VJALR : std_logic_vector(NW-1 downto 0); signal VQ_RE,VQ_WE : std_logic; signal VQ_QE,VQ_QF : std_logic; signal VQ_D,VQ_Q : std_logic_vector(SDLEN-1 downto 0); signal VJRTA : ADR_T; signal IF2_PJRX,IF2_NK_PJRX,IX1_JRX,IX3_JRX,PJRVX : std_logic; signal MPJRX : std_logic_vector(NW-1 downto 0); signal VJRX : std_logic; signal MTCH : std_logic_vector(NW-1 downto 0); signal SJC_q : natural range 0 to 2; signal INSTR0,INSTR1: std_logic_vector(ILEN-1 downto 0); signal OPCODE0,OPCODE1: std_logic_vector(7-1 downto 0); signal RD0,RD1 : RID_T; signal IMM0,IMM1: unsigned(12-1 downto 0); signal IF2_JAL_X : std_logic_vector(NW-1 downto 0); signal IF2_PJALR_X : std_logic_vector(NW-1 downto 0); begin ------------------------------------ -- Note ------------------------------------ -- Predicted JALR are only those used -- to return from a function call, which -- have r0 as rd (and zero immediate -- operand). -- JAL and JALR instructions supplying -- prediction info are those used in -- starting a function call, which have -- rd different from r0 (and zero -- immediate operand). -- Jalr instructions involved in the -- prediction mechanism are those used to -- return from a function call and therefore -- always have immediate field set to zero -- and write return address to r0. -- In other words, only instruction of the -- type jalr r0,rn,0 are predicted using -- the RAS. -- *** Prediction *** -- When a jalr instruction of the type of -- above reaches IF2, a jump is executed at -- address output by the RAS, unless the RAS -- is empty (in such case no jump is -- performed). -- *** RAS push *** -- When a jalr instruction of the type -- jalr rm,rn,0, or a jal instruction of the -- type jal rn,0, reaches IX1,it's return -- address (PC+4) is pushed on the RAS. -- *** RAS pop *** -- Every prediction triggers a RAS pop. -- Idea: push RAS in IX1 and remove speculative -- entries when a B-J mis-prediction occurs or -- when clearing pipe. VQ must be emptied in the -- same events. ------------------------------------ -- RAS ------------------------------------ U_RAS : RV01_STACK generic map( DEPTH => RAS_DEPTH, WIDTH => SDLEN ) port map( CLK_i => CLK_i, RST_i => RST_i, CLR_i => CLRS, PUSH_i => RAS_PUSH, POP_i => RAS_POP, D_i => RAS_D, SE_o => RAS_SE, SF_o => RAS_SF, Q_o => RAS_Q ); -- RAS is popped when a prediction occurs. RAS_POP <= (IF2_NK_PJRX and not(FSTLL_i) and not(IF2_JAL(0))) or RAS_FPOP_q; -- RAS data output is jalr predicted TA PJRTA <= to_unsigned(RAS_Q); -- RAS is pushed when a suitable jal or -- jalr instruction reaches IX1 stage. RAS_PUSH <= IX1_JRX; -- RAS data input is a TA supplied by a -- jal or jalr instruction. RAS_D <= to_std_logic_vector(IX1_JRTA); ------------------------------------ -- JR Verification queue ------------------------------------ U_JRVQ : RV01_QUEUE generic map( DEPTH => JRVQ_DEPTH, WIDTH => SDLEN ) port map( CLK_i => CLK_i, RST_i => RST_i, CLR_i => CLR_i, RE_i => VQ_RE, WE_i => VQ_WE, D_i => VQ_D, QE_o => VQ_QE, QF_o => VQ_QF, Q_o => VQ_Q ); -- VQ is written when a jalr prediction occurs. VQ_WE <= IF2_NK_PJRX and not(FSTLL_i) and not(IF2_JAL(0)); -- VQ data input is RAS data output VQ_D <= RAS_Q; -- VQ is read when a prediction is verified. VQ_RE <= PJRVX and not(VQ_QE); -- VQ data output is the TA to compared with -- the actual TA. VJRTA <= to_unsigned(VQ_Q); ------------------------------------ -- Speculative Jump Count ------------------------------------ -- RAS is pushed speculatively, as -- instructions in IX1 may be nullified -- later (because of a mis-prediction -- detected in IX2 or an interrupt-like -- event raised in IX3). -- A count of speculative entries is kept -- in order to force-pop them from RAS -- in case the related instructions get -- nullified. -- Every entry is treated as speculative -- when it's pushed on RAS, thus incrementing -- the count. -- The count is decremented when: -- 1) a predictable jalr reaches IX3 stage. -- 2) a jalr prediction occurs. -- 3) a forced pop occurs. process(CLK_i) begin if(CLK_i = '1' and CLK_i'event) then if(RST_i = '1' or CLR_i = '1') then SJC_q <= 0; elsif(RAS_PUSH = '1') then SJC_q <= SJC_q + 1; elsif( (SJC_q > 0) and ( IX3_JRX = '1' or (IF2_NK_PJRX = '1' and FSTLL_i = '0' and IF2_JAL(0) = '0') or RAS_FPOP_q = '1' ) ) then SJC_q <= SJC_q - 1; end if; end if; end process; ------------------------------------ -- RAS recovery ------------------------------------ process(CLK_i) begin if(CLK_i = '1' and CLK_i'event) then if(RST_i = '1') then RAS_FPOP_q <= '0'; elsif((BJX_i = '1' or not(MPJRX = "00")) and SJC_q > 0) then RAS_FPOP_q <= '1'; elsif(RAS_FPOP_q = '1' and SJC_q <= 1) then RAS_FPOP_q <= '0'; end if; end if; end process; ------------------------------------ -- IF2 stage logic (prediction) ------------------------------------ -- Note: only predictable JALR instructions -- are considered here! INSTR0 <= INSTR_i(ILEN-1 downto 0); INSTR1 <= INSTR_i(ILEN*2-1 downto ILEN); OPCODE0 <= INSTR0(6 downto 0); OPCODE1 <= INSTR1(6 downto 0); RD0 <= to_integer(to_unsigned(INSTR0(11 downto 7))); RD1 <= to_integer(to_unsigned(INSTR1(11 downto 7))); IMM0 <= to_unsigned(INSTR0(31 downto 20)); IMM1 <= to_unsigned(INSTR1(31 downto 20)); -- slot #0 jalr instruction flag IF2_PJALR(0) <= IF2_V_i(0) when ( OPCODE0 = OP_JALR and RD0 = 0 and IMM0 = 0 ) else '0'; -- slot #1 jalr instruction flag IF2_PJALR(1) <= (IF2_V_i(1) and PXE) when ( OPCODE1 = OP_JALR and RD1 = 0 and IMM1 = 0 ) else '0'; -- slot #0 jal instruction flag IF2_JAL(0) <= IF2_V_i(0) when ( OPCODE0 = OP_JAL ) else '0'; -- slot #1 jal instruction flag IF2_JAL(1) <= (IF2_V_i(1) and PXE) when ( OPCODE1 = OP_JAL ) else '0'; ---- slot #0 jalr instruction flag --IF2_PJALR_X(0) <= IF2_V_i(0) when ( -- IF2_INSTR_i(0).BJ_OP = BJ_JALR and -- IF2_INSTR_i(0).WRD = '0' and -- IF2_INSTR_i(0).IMM = 0 --) else '0'; ---- slot #0 jal instruction flag --IF2_JAL_X(0) <= IF2_V_i(0) when ( -- IF2_INSTR_i(0).BJ_OP = BJ_JAL --) else '0'; --GPXE0_1 : if(PXE = '1') generate ---- slot #1 jalr instruction flag --IF2_PJALR_X(1) <= IF2_V_i(1) when ( -- IF2_INSTR_i(1).BJ_OP = BJ_JALR and -- IF2_INSTR_i(1).WRD = '0' and -- IF2_INSTR_i(1).IMM = 0 --) else '0'; -- slot #1 jal instruction flag --IF2_JAL_X(1) <= IF2_V_i(1) when ( -- IF2_INSTR_i(1).BJ_OP = BJ_JAL --) else '0'; --end generate; --GPXE0_0 : if(PXE = '0') generate --IF2_PJALR_X(1) <= '0'; --IF2_JAL_X(1) <= '0'; --end generate; -- A jump is actually predicted if: -- 1) RAS is not empty, AND -- 2.a) instruction #0 is a valid -- P-JALR, OR -- 2.b) instruction #0 is not a -- valid JAL (which would nullify -- instruction #1 and instruction -- #1 is a valid P-JALR -- AND -- 3) fetching is not stalled, AND -- 4) instruction #0 is not a taken branch. IF2_NK_PJRX <= not(RAS_SE) and (IF2_PJALR(0) or (IF2_PJALR(1) and not(KLL1_i))); IF2_PJRX <= not(RAS_SE) and (IF2_PJALR(0) or IF2_PJALR(1)); -- Predicted Jump-register execute flag PJRX_o <= IF2_PJRX; -- predicted JALR target address PJRTA_o <= PJRTA; -- If instruction #0 is predicted jump -- instruction #1 must be nullified. KLL1_o <= IF2_PJALR(0); -- not actually used? ------------------------------------ -- IX1 stage logic (Verification) ------------------------------------ -- Note: the JALR instructions subject to -- verification are the same candidate -- for prediction. -- slot #0 jalr instruction flag VJALR(0) <= IX1_V_i(0) when ( IX1_INSTR_i(0).BJ_OP = BJ_JALR and IX1_INSTR_i(0).WRD = '0' and IX1_INSTR_i(0).IMM = 0 ) else '0'; -- Check if instruction #0 return address -- matches VQ output. MTCH(0) <= not(VQ_QE) when (IX1_OPA0_i = to_signed(VQ_Q)) else '0'; GPXE1_1 : if(PXE = '1') generate -- slot #1 jalr instruction flag VJALR(1) <= IX1_V_i(1) when ( IX1_INSTR_i(1).BJ_OP = BJ_JALR and IX1_INSTR_i(1).WRD = '0' and IX1_INSTR_i(1).IMM = 0 ) else '0'; -- Check if instruction #1 return address -- matches VQ output. MTCH(1) <= not(VQ_QE) when (IX1_OPA1_i = to_signed(VQ_Q)) else '0'; end generate; GPXE1_0 : if(PXE = '0') generate VJALR(1) <= '0'; MTCH(1) <= '0'; end generate; -- A mis-prediction occurs if: -- 1) instr. #0 is a P-JALR but RA doesn't -- match, OR -- 2) instr. #0 is NOT a P-JALR, and instr. -- #1 is a P-JALR but RA doesn't match. MPJRX(0) <= '1' when ( (VJALR(0) = '1' and MTCH(0) = '0') or (IX1_V_i(0) = '1' and IX1_INSTR_i(0).BJ_OP = BJ_JALR and (IX1_INSTR_i(0).WRD = '1' or IX1_INSTR_i(0).IMM /= 0) ) ) else '0'; MPJRX(1) <= '1' when ( --(IX1_NORA_JAL(0) = '0' and VJALR(0) = '0' and VJALR(1) = '1' and MTCH(1) = '0') or (VJALR(0) = '0' and VJALR(1) = '1' and MTCH(1) = '0') or (IX1_V_i(1) = '1' and IX1_INSTR_i(1).BJ_OP = BJ_JALR and (IX1_INSTR_i(1).WRD = '1' or IX1_INSTR_i(1).IMM /= 0) ) ) else '0'; -- This signal is used to remove VQ oldest entry. PJRVX <= '1' when ( VJALR(0) = '1' or VJALR(1) = '1' ) else '0'; -- Mis-predicted JALR flag MPJRX_o <= MPJRX; ------------------------------------ -- IX3 stage logic (RAS pushing) ------------------------------------ -- Note: JALR instructions used to -- supply prediction info are of type -- different from those used for -- prediction, as they have rd != r0. -- JAL instructions with rd = r0 -- are detected by IX1_NORA_JAL() -- flags. -- slot #0 jalr instruction flag IX3_JALR(0) <= IX3_V_i(0) when ( IX3_INSTR_i(0).BJ_OP = BJ_JALR and IX3_INSTR_i(0).WRD = '1' ) else '0'; -- slot #0 jal instruction flag IX3_JAL(0) <= IX3_V_i(0) when ( IX3_INSTR_i(0).BJ_OP = BJ_JAL and IX3_INSTR_i(0).WRD = '1' ) else '0'; -- slot #0 no-RA jal instruction flag IX3_NORA_JAL(0) <= IX3_V_i(0) when ( IX3_INSTR_i(0).BJ_OP = BJ_JAL and IX3_INSTR_i(0).WRD = '0' ) else '0'; -- slot #1 jalr instruction flag IX3_JALR(1) <= (IX3_V_i(1) and PXE) when ( IX3_INSTR_i(1).BJ_OP = BJ_JALR and IX3_INSTR_i(1).WRD = '1' ) else '0'; -- slot #1 jal instruction flag IX3_JAL(1) <= (IX3_V_i(1) and PXE) when ( IX3_INSTR_i(1).BJ_OP = BJ_JAL and IX3_INSTR_i(1).WRD = '1' ) else '0'; -- slot #1 no-RA jal instruction flag IX3_NORA_JAL(1) <= (IX3_V_i(1) and PXE) when ( IX3_INSTR_i(1).BJ_OP = BJ_JAL and IX3_INSTR_i(1).WRD = '0' ) else '0'; ------------------------------------ -- slot #0 jalr instruction flag IX1_JALR(0) <= IX1_V_i(0) when ( IX1_INSTR_i(0).BJ_OP = BJ_JALR and IX1_INSTR_i(0).WRD = '1' ) else '0'; -- slot #0 jal instruction flag IX1_JAL(0) <= IX1_V_i(0) when ( IX1_INSTR_i(0).BJ_OP = BJ_JAL and IX1_INSTR_i(0).WRD = '1' ) else '0'; -- slot #0 no-RA jal instruction flag IX1_NORA_JAL(0) <= IX1_V_i(0) when ( IX1_INSTR_i(0).BJ_OP = BJ_JAL and IX1_INSTR_i(0).WRD = '0' ) else '0'; -- slot #1 jalr instruction flag IX1_JALR(1) <= (IX1_V_i(1) and PXE) when ( IX1_INSTR_i(1).BJ_OP = BJ_JALR and IX1_INSTR_i(1).WRD = '1' ) else '0'; -- slot #1 jal instruction flag IX1_JAL(1) <= (IX1_V_i(1) and PXE) when ( IX1_INSTR_i(1).BJ_OP = BJ_JAL and IX1_INSTR_i(1).WRD = '1' ) else '0'; -- slot #1 no-RA jal instruction flag IX1_NORA_JAL(1) <= (IX1_V_i(1) and PXE) when ( IX1_INSTR_i(1).BJ_OP = BJ_JAL and IX1_INSTR_i(1).WRD = '0' ) else '0'; -- IX1 JALR execute flag IX1_JRX <= (IX1_JALR(0) or IX1_JAL(0)) or (not(IX1_NORA_JAL(0)) and(IX1_JALR(1) or IX1_JAL(1))); -- JALR return address IX1_JRTA <= IX1_PCP4_i(0) when ( IX1_JALR(0) = '1' or IX1_JAL(0) = '1' ) else IX1_PCP4_i(1); -- IX3 JALR execute flag IX3_JRX <= (IX3_JALR(0) or IX3_JAL(0)) or (not(IX3_NORA_JAL(0)) and(IX3_JALR(1) or IX3_JAL(1))); end ARC;