OpenCores
URL https://opencores.org/ocsvn/rv01_riscv_core/rv01_riscv_core/trunk

Subversion Repositories rv01_riscv_core

[/] [rv01_riscv_core/] [trunk/] [VHDL/] [RV01_sbuf_2w.vhd] - Rev 2

Compare with Previous | Blame | View Log

-----------------------------------------------------------------
--                                                             --
-----------------------------------------------------------------
--                                                             --
-- Copyright (C) 2017 Stefano Tonello                          --
--                                                             --
-- This source file may be used and distributed without        --
-- restriction provided that this copyright statement is not   --
-- removed from the file and that any derivative work contains --
-- the original copyright notice and the associated disclaimer.--
--                                                             --
-- THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY         --
-- EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED   --
-- TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS   --
-- FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR      --
-- OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,         --
-- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES    --
-- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE   --
-- GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR        --
-- BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF  --
-- LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT  --
-- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT  --
-- OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE         --
-- POSSIBILITY OF SUCH DAMAGE.                                 --
--                                                             --
-----------------------------------------------------------------
 
---------------------------------------------------------------
-- RV01 Store buffer
---------------------------------------------------------------
 
library IEEE;
use IEEE.std_logic_1164.all;
use IEEE.numeric_std.all;
 
library work;
use work.RV01_CONSTS_PKG.all;
use work.RV01_TYPES_PKG.all;
use work.RV01_FUNCS_PKG.all;
use work.RV01_OP_PKG.all;
 
entity RV01_SBUF_2W is
  generic(
    NW : natural := 2;
    DEPTH : natural := 4;
    SIMULATION_ONLY : std_logic := '0'
  );
  port(
    CLK_i : in std_logic;
    RST_i : in std_logic;
    CLRB_i : in std_logic; -- clear buffer flag
    KTS_i : in std_logic; -- kill top store
    RE_i : in std_logic_vector(NW-1 downto 0); -- SB read enable
    WE_i : in std_logic_vector(NW-1 downto 0); -- SB write enable
    BE0_i : in std_logic_vector(4-1 downto 0); -- inst #0 byte enable
    BE1_i : in std_logic_vector(4-1 downto 0); -- inst #1 byte enable
    D0_i : in std_logic_vector(SDLEN-1 downto 0); -- inst #0 store data
    D1_i : in std_logic_vector(SDLEN-1 downto 0); -- inst #1 store data
    IX1_V_i : std_logic_vector(2-1 downto 0);
    LS_OP0_i : in LS_OP_T;
    LS_OP1_i : in LS_OP_T;
    DADR0_i : in ADR_T;
    DADR1_i : in ADR_T;
    -- just for debugging purpose
    SADR0_i : in ADR_T;
    SADR1_i : in ADR_T;
 
    BF_o : out std_logic; -- buffer full flag
    NOPR_o : out std_logic; -- no pending reads flag
    S2LAC_o : out std_logic_vector(2-1 downto 0); -- store-2-load conflict
    WE_o : out std_logic;
    LS_OP_o : out LS_OP_T;
    BE_o : out std_logic_vector(4-1 downto 0);
    Q_o : out std_logic_vector(SDLEN-1 downto 0);
    SADR_o : out ADR_T
  );
end RV01_SBUF_2W;
 
architecture ARC of RV01_SBUF_2W is
 
  constant ZERO : std_logic_vector(DEPTH-1 downto 0) := (others => '0');
  constant MTCH_WIDTH : natural := 8;
 
  -- store buffer entry type
  type SB_ENTRY_T is record
    LS_OP : LS_OP_T;
    BE : std_logic_vector(4-1 downto 0);
    DATA : std_logic_vector(SDLEN-1 downto 0);
    ADR : ADR_T;
  end record;
 
  -- store buffer type
  type SB_T is array (natural range<>) of SB_ENTRY_T;
 
  signal SB,SB_q : SB_T(DEPTH-1 downto 0);
  signal SBV_q,SBV : std_logic_vector(DEPTH-1 downto 0);
  signal SB_NEW0,SB_NEW1 : SB_ENTRY_T;
  signal TP,TP_q : integer range -1 to DEPTH+2;
  signal PUSH : std_logic_vector(NW-1 downto 0);
  signal POP,BF : std_logic;
  signal MTCH0 : std_logic_vector(DEPTH-1 downto 0);
  signal MTCH1 : std_logic_vector(DEPTH downto 0);
  signal LD0,LD1,ST0,LS0 : std_logic;
  signal PR_CNT_q : natural range 0 to DEPTH-1;
  signal PR_CNT : integer range -1 to DEPTH+1;
  signal S2LAC : std_logic_vector(2-1 downto 0);
  signal PRV_q,PRV : std_logic_vector(DEPTH-1 downto 0);
 
  function wired_or(V : std_logic_vector) return std_logic is
    variable WO : std_logic;
  begin
    WO := '0';
    for i in V'LOW to V'HIGH loop
      WO := WO or V(i);
    end loop;
    return(WO);
  end function;
 
begin
 
  ----------------------------------------------------
  -- Notes
  ----------------------------------------------------
 
  -- Store buffer is organised like a queue which is
  -- written when a store reaches IX1 stage and read
  -- when a store reaches IX3 stage (but only if memory
  -- write port is not occupied by an active load, in 
  -- order to minimize pipe stalls).
 
  -- Buffered stores are always read from entry zero
  -- and written on entry pointed by TP_q (the tail
  -- pointer).
 
  -- In order to detect store-to-load conflicts, both
  -- load addresses must be compared against buffered
  -- store addresses. In addition, IX1 instruction #1
  -- load address must be compared to IX1 instruction
  -- #0 store adddress.
  -- When a conflict is detected, the involved store
  -- must be allowed to proceed in order to remove
  -- the conflict. If the store is buffered, a buffer
  -- read is forced, while, if it's still in IX1 , 
  -- it's allowed to move to next pipe stages.
 
  -- CLRB_i and NOPR_o signals have been added to
  -- support exception processing: CLRB_i allows to
  -- empty the buffer when an exception is raised and
  -- NOPR_o tells exception logic that is safe to raise
  -- exceptions because stores eventually remaining in
  -- the buffer are newer than IX3 instructions ready
  -- to raise exceptions.
 
  -- This "2w" version support dual stores.
 
  -- 10/11/2015
  -- CLRB_i is now coincident with CLRP signal, as 
  -- NOPR_o has ben permanently set to '1'. In this
  -- way re-fetch, exception servicing and return from
  -- exception can start even if there're still pending
  -- read in store buffer. Such result is obtained by
  -- invalidating, on CLRB_i assertion, store buffer
  -- entries for which no read request is pending (the
  -- remaining ones are related to instructions older
  -- the one(s) in IX3 and therefore can be completed
  -- safely). 
 
  -- 11/02/2017
  -- KTS_i input is added to B/J handle mis-predictions
  -- (and some special case of jalr instruction)
  -- triggering a B/J in IX2.
  -- KTS_i is set if, in the previous cycle, IX1
  -- instruction #0 triggered a B/J when instruction
  -- #1 was a store (under such condition an entry
  -- corresponding to a nullified store has been written
  -- to be buffer.
  -- If KTS_i is set, current top entry must be
  -- invalidated. 
  -- Note: when KTS_i is set, WE_i is always equal to
  -- "00". 
 
  ----------------------------------------------------
  -- Pending read counter
  ----------------------------------------------------
 
  -- If a buffer read is requested (RE_i  = '1') but the
  -- buffer can't be read because memory write port is
  -- in use by a valid load/store instruction, the read
  -- request is recorded by incrementing the pending
  -- read counter PR_CNT_q.
  -- A pending read is actually performed when the memory
  -- write port is available (no valid load/store is using
  -- it), when this event occurs, the pending read counter
  -- is decremented.
  -- If a read is requested in the same cycle where a
  -- pending read is performed, the pending read counter
  -- remains un-changed.
 
  process(CLK_i)
  begin
    if(CLK_i = '1' and CLK_i'event) then
      --if(RST_i = '1' or CLRB_i = '1') then
      if(RST_i = '1') then
        PR_CNT_q <= 0;
        PRV_q <= (others => '0');
      else
        PR_CNT_q <= PR_CNT;
        PRV_q <= PRV;
      end if;
    end if;
  end process;
 
  process(PR_CNT_q,RE_i,POP)
    variable TMP : std_logic_vector(3-1 downto 0);
  begin
    TMP := POP & RE_i;
    case TMP is
      when "001"|"010"|"111" => PR_CNT <= PR_CNT_q + 1;
      when "011" => PR_CNT <= PR_CNT_q + 2;
      when "100" => PR_CNT <= PR_CNT_q - 1;
      when others => PR_CNT <= PR_CNT_q; 
    end case;
  end process;
 
  -- PRV_q is an "alternative" view of PR_CNT_q: if
  -- PR_CNT_q = n, PRV_q(n-1:0) = all-1. PRV_q is
  -- used to set SBV_q when CLRB_i gets asserted.
 
  process(PRV_q,RE_i,POP)
    variable TMP : std_logic_vector(3-1 downto 0);
  begin
    TMP := POP & RE_i;
    case TMP is
      when "001"|"010"|"111" =>
        PRV <= PRV_q(DEPTH-2 downto 0) & '1';
      when "011" =>
        PRV <= PRV_q(DEPTH-3 downto 0) & "11";
      when "100" =>
        PRV <= '0' & PRV_q(DEPTH-1 downto 1);
      when others => 
        PRV <= PRV_q;
    end case;
  end process;
 
  NOPR_o <= '1' when (PR_CNT_q = 0 and RE_i = "00") else '0';
  --NOPR_o <= '1';
 
  ----------------------------------------------------
  -- Buffer data registers
  ----------------------------------------------------
 
  -- When CLRB_i gets asserted, SBV_q is set to PRV_q
  -- thereby invalidating all entries for which there's
  -- no pending read, remaining entries are older than
  -- instruction(s) in IX3 and can be completed safely.
  -- Such "trick" allow instruction flow change to run
  -- in parallel with buffer entries completion.
 
  process(CLK_i)
  begin
    if(CLK_i = '1' and CLK_i'event) then
      --if(RST_i = '1' or CLRB_i = '1') then
      if(RST_i = '1') then
        SBV_q <= (others => '0');
        TP_q <= 0;
      elsif(CLRB_i = '1') then
        TP_q <= PR_CNT;
        SBV_q <= PRV;
      else
        SBV_q <= SBV;
        TP_q <= TP;
      end if;
      SB_q <= SB;
    end if;
  end process;
 
  ----------------------------------------------------
  -- Buffer data updating logic
  ----------------------------------------------------
 
  -- store buffer new entry
 
  SB_NEW0 <= (
    LS_OP0_i,
    BE0_i,
    D0_i,
    DADR0_i
  );
 
  SB_NEW1 <= (
    LS_OP1_i,
    BE1_i,
    D1_i,
    DADR1_i
  );
 
  -- Buffer is written when a valid store instruction
  -- reaches stage IX1.
 
  PUSH <= WE_i;
 
  -- Buffer is read when:
  -- 1) IX1 instruction #0 is not a valid L/S and there's an
  -- active read request (RE_i = '1'), OR
  -- 2) IX1 instruction #0 is not a valid L/S and there's a
  -- pending read request (PR_CNT_q > 0), OR
  -- 3) a forced pop is needed.
 
  POP <= not(CLRB_i) when (
    LS0 = '0' and (RE_i /= "00" or PR_CNT_q > 0) 
  ) else '0';
 
  -- store buffer data updating logic
 
  process(SB_q,SBV_q,TP_q,PUSH,POP,KTS_i,SB_NEW0,SB_NEW1)
  begin
    for k in 0 to DEPTH-1 loop
      if(PUSH = "11" and POP = '1') then
        -- used entries are shifted down one position
        -- (deleting bottom one end emptying top),
        -- emptied top entry and entry above it are
        -- loaded with new data.
        if(k = TP_q) then
          SBV(k) <= '1';
          SB(k) <= SB_NEW1;
        elsif(k = TP_q-1) then
          SBV(k) <= '1';
          SB(k) <= SB_NEW0;
        elsif(k < DEPTH-1) then
          SBV(k) <= SBV_q(k+1);
          SB(k) <= SB_q(k+1);          
        else
          SBV(k) <= '0';
          SB(k) <= SB_q(k);
        end if;
        TP <= TP_q + 1;
      elsif((PUSH = "01" or PUSH = "10") and POP = '1') then
        -- used entries are shifted down one position
        -- (deleting bottom one end emptying top),
        -- emptied top entry is loaded with new data.
        if(k = TP_q-1) then
          SBV(k) <= '1';
          if(PUSH = "01") then
            SB(k) <= SB_NEW0;
          else
            SB(k) <= SB_NEW1;
          end if;
        elsif(k < DEPTH-1) then
          SBV(k) <= SBV_q(k+1);
          SB(k) <= SB_q(k+1);          
        else
          SBV(k) <= '0';
          SB(k) <= SB_q(k);
        end if;
        TP <= TP_q;
      elsif(PUSH = "11") then
        -- top empty entry and entry above it are
        -- loaded with new data, other entries remain
        -- unchanged.
        if(k = TP_q+1) then
          SBV(k) <= '1';
          SB(k) <= SB_NEW1;
        elsif(k = TP_q) then
          SBV(k) <= '1';
          SB(k) <= SB_NEW0;
        else
          SBV(k) <= SBV_q(k);
          SB(k) <= SB_q(k);
        end if;
        TP <= TP_q + 2;
      elsif(PUSH = "01" or PUSH = "10") then
        -- top empty entry is loaded with new data,
        -- other entries remain unchanged.
        if(k = TP_q) then
          SBV(k) <= '1';
          if(PUSH = "01") then
            SB(k) <= SB_NEW0;
          else
            SB(k) <= SB_NEW1;
          end if;
        else
          SBV(k) <= SBV_q(k);
          SB(k) <= SB_q(k);
        end if;
        TP <= TP_q + 1;
      elsif(POP = '1') then
        -- used entries are shifted down one position
        -- (deleting bottom one end emptying top).
        if(k = TP_q-1) then
          SBV(k) <= '0';
          SB(k) <= SB_q(k); -- don't care!
        elsif(k = TP_q-2 and KTS_i = '1') then
          SBV(k) <= '0';
          SB(k) <= SB_q(k); -- don't care       
        elsif(k < DEPTH-1) then
          SBV(k) <= SBV_q(k+1);
          SB(k) <= SB_q(k+1);
        else
          SBV(k) <= '0';
          SB(k) <= SB_q(k);
        end if;
        if(KTS_i = '1') then
          TP <= TP_q - 2;
        else
          TP <= TP_q - 1;
        end if;
      else
        if(k = TP_q-1 and KTS_i = '1') then
          SBV(k) <= '0';
        else
          SBV(k) <= SBV_q(k);
        end if;
        SB(k) <= SB_q(k);
        if(KTS_i = '1') then
          TP <= TP_q - 1;
        else
          TP <= TP_q;
        end if;
      end if;
    end loop;
  end process;
 
  ----------------------------------------------------
  -- Store-to-load conflict check
  ----------------------------------------------------
 
  -- MTCHm(n) flag is set if store buffer n-th entry is
  -- valid and slot #m load addresses matches entry
  -- address. MTCH(DEPTH) is set if inst. #0 is a store
  -- and inst. #1 load addr. matches inst #0 store one.
 
  -- Comparison is restricted to MTCH_WIDTH bits, at the 
  -- cost of possible "fake" matches, in order to reduce
  -- delay.
 
  process(SB_q,SBV_q,DADR0_i,DADR1_i,ST0)
  begin
 
    for k in 0 to DEPTH-1 loop
      if(DADR0_i((MTCH_WIDTH+2)-1 downto 2) = 
        SB_q(k).ADR((MTCH_WIDTH+2)-1 downto 2)
      ) then
        MTCH0(k) <= SBV_q(k);
      else
        MTCH0(k) <= '0';
      end if;
      if(DADR1_i((MTCH_WIDTH+2)-1 downto 2) = 
        SB_q(k).ADR((MTCH_WIDTH+2)-1 downto 2)
      ) then
        MTCH1(k) <= SBV_q(k);
      else
        MTCH1(k) <= '0';
      end if;
    end loop;
 
    if(DADR0_i((MTCH_WIDTH+2)-1 downto 2) = 
      DADR1_i((MTCH_WIDTH+2)-1 downto 2)
    ) then
      MTCH1(DEPTH) <= ST0; 
    else 
      MTCH1(DEPTH) <= '0';
    end if;
 
  end process;
 
  -- inst. #0 store flag
  ST0 <= IX1_V_i(0) when (
    LS_OP0_i = LS_SB or
    LS_OP0_i = LS_SH or
    LS_OP0_i = LS_SW
  ) else '0';
 
  -- inst. #0 load flag
  LD0 <= IX1_V_i(0) when (
    LS_OP0_i = LS_LB or
    LS_OP0_i = LS_LH or
    LS_OP0_i = LS_LW
  ) else '0';
 
  -- inst. #1 load flag
  LD1 <= IX1_V_i(1) when (
    LS_OP1_i = LS_LB or
    LS_OP1_i = LS_LH or
    LS_OP1_i = LS_LW
  ) else '0';
 
  -- inst. #0 load/store flag
  LS0 <= LD0 or ST0;
 
  -- Buffer full flag (buffer is treated as full
  -- when less than the number of empty entries
  -- equals the number of pushed ones).
 
  --BF <= '1' when (
    --(SBV_q(DEPTH-3) = '1' and (PUSH = "11")) or
    --(SBV_q(DEPTH-2) = '1' and (PUSH = "10" or PUSH = "01"))
  --) else '0';
 
  BF <= SBV_q(DEPTH-4); 
 
  -- A conflict is detected if an active load address
  -- matches a buffered store one.
  -- A force-pop creates a special case of conflict
  -- because the load in slot #0 can't be performed
  -- in order to execute the pending store which is
  -- force-popped.
 
  S2LAC(0) <= LD0 and wired_or(MTCH0);
  S2LAC(1) <= LD1 and wired_or(MTCH1);
 
  ----------------------------------------------------
  -- outputs
  ----------------------------------------------------
 
  BF_o <= BF;
 
  S2LAC_o(0) <= S2LAC(0);
  S2LAC_o(1) <= S2LAC(1);
 
  WE_o <= POP;
  LS_OP_o <= SB_q(0).LS_OP;
  BE_o <= SB_q(0).BE;
  Q_o <= SB_q(0).DATA;
  SADR_o <= SB_q(0).ADR;
 
  ----------------------------------------------------
  -- Checkers
  ----------------------------------------------------
 
  -- synthesis translate_off
 
  GCHK0: if SIMULATION_ONLY = '1' generate
 
  --assert not(
  --  (WE_i /= "00" and BF = '1') and 
  --  (CLK_i = '1' and CLK_i'event)and
  --  (RST_i = '0')
  --) 
  --report "attempted write when store buffer is full!"
  --severity FAILURE;
 
  assert not(
    (RE_i /= "00" and SBV_q(0) = '0') and 
    (CLK_i = '1' and CLK_i'event)and
    (RST_i = '0')
  ) 
  report "attempted read when store buffer is empty!"
  severity FAILURE;
 
  assert not(
    (
      (PR_CNT_q > TP_q) or
      (PR_CNT_q > TP_q-1 and (RE_i = "01" or RE_i ="10")) or
      (PR_CNT_q > TP_q-2 and RE_i = "11")
    ) and 
    (CLK_i = '1' and CLK_i'event) and
    (RST_i = '0')
  ) 
  report "pending read count + read requests > tail pointer in store buffer!"
  severity FAILURE;
 
  assert not(
    (
      (RE_i = "01" and SADR0_i /= SB_q(PR_CNT_q).ADR) or
      (RE_i = "10" and SADR1_i /= SB_q(PR_CNT_q).ADR) or
      (RE_i = "11" and (SADR0_i /= SB_q(PR_CNT_q).ADR or SADR1_i /= SB_q(PR_CNT_q+1).ADR)) 
    ) and 
    (CLK_i = '1' and CLK_i'event) and
    (RST_i = '0')
  ) 
  report "invalid read requests!"
  severity FAILURE;
 
  end generate;
 
  -- synthesis translate_on
 
end ARC;
 

Compare with Previous | Blame | View Log

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.