---------------------------------------------------------------------
|
---------------------------------------------------------------------
|
-- Instruction cache
|
-- Instruction cache
|
--
|
--
|
-- Part of the LXP32 CPU
|
-- Part of the LXP32 CPU
|
--
|
--
|
-- Copyright (c) 2016 by Alex I. Kuznetsov
|
-- Copyright (c) 2016 by Alex I. Kuznetsov
|
--
|
--
|
-- A simple single-page buffer providing both caching and
|
-- A simple single-page buffer providing both caching and
|
-- prefetching capabilities. Useful for high-latency memory,
|
-- prefetching capabilities. Useful for high-latency memory,
|
-- such as external SDRAM.
|
-- such as external SDRAM.
|
---------------------------------------------------------------------
|
---------------------------------------------------------------------
|
|
|
library ieee;
|
library ieee;
|
use ieee.std_logic_1164.all;
|
use ieee.std_logic_1164.all;
|
use ieee.numeric_std.all;
|
use ieee.numeric_std.all;
|
|
|
entity lxp32_icache is
|
entity lxp32_icache is
|
generic(
|
generic(
|
BURST_SIZE: integer;
|
BURST_SIZE: integer;
|
PREFETCH_SIZE: integer
|
PREFETCH_SIZE: integer
|
);
|
);
|
port(
|
port(
|
clk_i: in std_logic;
|
clk_i: in std_logic;
|
rst_i: in std_logic;
|
rst_i: in std_logic;
|
|
|
lli_re_i: in std_logic;
|
lli_re_i: in std_logic;
|
lli_adr_i: in std_logic_vector(29 downto 0);
|
lli_adr_i: in std_logic_vector(29 downto 0);
|
lli_dat_o: out std_logic_vector(31 downto 0);
|
lli_dat_o: out std_logic_vector(31 downto 0);
|
lli_busy_o: out std_logic;
|
lli_busy_o: out std_logic;
|
|
|
wbm_cyc_o: out std_logic;
|
wbm_cyc_o: out std_logic;
|
wbm_stb_o: out std_logic;
|
wbm_stb_o: out std_logic;
|
wbm_cti_o: out std_logic_vector(2 downto 0);
|
wbm_cti_o: out std_logic_vector(2 downto 0);
|
wbm_bte_o: out std_logic_vector(1 downto 0);
|
wbm_bte_o: out std_logic_vector(1 downto 0);
|
wbm_ack_i: in std_logic;
|
wbm_ack_i: in std_logic;
|
wbm_adr_o: out std_logic_vector(29 downto 0);
|
wbm_adr_o: out std_logic_vector(29 downto 0);
|
wbm_dat_i: in std_logic_vector(31 downto 0)
|
wbm_dat_i: in std_logic_vector(31 downto 0)
|
);
|
);
|
end entity;
|
end entity;
|
|
|
architecture rtl of lxp32_icache is
|
architecture rtl of lxp32_icache is
|
|
|
signal lli_adr_reg: std_logic_vector(lli_adr_i'range);
|
signal lli_adr_reg: std_logic_vector(lli_adr_i'range);
|
signal lli_adr_mux: std_logic_vector(lli_adr_i'range);
|
signal lli_adr_mux: std_logic_vector(lli_adr_i'range);
|
|
|
signal ram_waddr: std_logic_vector(7 downto 0);
|
signal ram_waddr: std_logic_vector(7 downto 0);
|
signal ram_raddr: std_logic_vector(7 downto 0);
|
signal ram_raddr: std_logic_vector(7 downto 0);
|
signal ram_re: std_logic;
|
signal ram_re: std_logic;
|
signal ram_we: std_logic;
|
signal ram_we: std_logic;
|
|
|
signal read_base: unsigned(21 downto 0);
|
signal read_base: unsigned(21 downto 0);
|
signal read_offset: unsigned(7 downto 0);
|
signal read_offset: unsigned(7 downto 0);
|
|
|
signal init: std_logic:='0';
|
signal init: std_logic:='0';
|
signal burst1: std_logic;
|
signal burst1: std_logic;
|
signal terminate_burst: std_logic;
|
signal terminate_burst: std_logic;
|
signal near_miss: std_logic:='0';
|
signal near_miss: std_logic:='0';
|
signal prefetch_distance: unsigned(7 downto 0);
|
signal prefetch_distance: unsigned(7 downto 0);
|
signal wrap_cnt: integer range 0 to 3:=0;
|
signal wrap_cnt: integer range 0 to 3:=0;
|
signal burst_cnt: integer range 0 to BURST_SIZE:=0;
|
signal burst_cnt: integer range 0 to BURST_SIZE:=0;
|
signal wb_stb: std_logic:='0';
|
signal wb_stb: std_logic:='0';
|
signal wb_cti: std_logic_vector(2 downto 0);
|
signal wb_cti: std_logic_vector(2 downto 0);
|
|
|
-- Note: the following five signals are zero-initialized for
|
-- Note: the following five signals are zero-initialized for
|
-- simulation only, to suppress warnings from numeric_std.
|
-- simulation only, to suppress warnings from numeric_std.
|
-- This initialization is not required for synthesis.
|
-- This initialization is not required for synthesis.
|
|
|
signal current_base: unsigned(21 downto 0):=(others=>'0');
|
signal current_base: unsigned(21 downto 0):=(others=>'0');
|
signal current_offset: unsigned(7 downto 0):=(others=>'0');
|
signal current_offset: unsigned(7 downto 0):=(others=>'0');
|
signal prev_base: unsigned(21 downto 0):=(others=>'0');
|
signal prev_base: unsigned(21 downto 0):=(others=>'0');
|
signal next_base: unsigned(21 downto 0):=(others=>'0');
|
signal next_base: unsigned(21 downto 0):=(others=>'0');
|
signal start_offset: unsigned(7 downto 0):=(others=>'0');
|
signal start_offset: unsigned(7 downto 0):=(others=>'0');
|
|
|
signal hitc: std_logic;
|
signal hitc: std_logic;
|
signal hitp: std_logic;
|
signal hitp: std_logic;
|
signal miss: std_logic:='0';
|
signal miss: std_logic:='0';
|
|
|
begin
|
begin
|
|
|
assert PREFETCH_SIZE>=4
|
assert PREFETCH_SIZE>=4
|
report "PREFETCH_SIZE cannot be less than 4"
|
report "PREFETCH_SIZE cannot be less than 4"
|
severity failure;
|
severity failure;
|
assert BURST_SIZE>=4
|
assert BURST_SIZE>=4
|
report "BURST_SIZE cannot be less than 4"
|
report "BURST_SIZE cannot be less than 4"
|
severity failure;
|
severity failure;
|
assert PREFETCH_SIZE+BURST_SIZE<=128
|
assert PREFETCH_SIZE+BURST_SIZE<=128
|
report "PREFETCH_SIZE and BURST_SIZE combined cannot be greater than 128"
|
report "PREFETCH_SIZE and BURST_SIZE combined cannot be greater than 128"
|
severity failure;
|
severity failure;
|
|
|
|
|
process (clk_i) is
|
process (clk_i) is
|
begin
|
begin
|
if rising_edge(clk_i) then
|
if rising_edge(clk_i) then
|
if miss='0' then
|
if miss='0' then
|
lli_adr_reg<=lli_adr_i;
|
lli_adr_reg<=lli_adr_i;
|
end if;
|
end if;
|
end if;
|
end if;
|
end process;
|
end process;
|
|
|
lli_adr_mux<=lli_adr_i when miss='0' else lli_adr_reg;
|
lli_adr_mux<=lli_adr_i when miss='0' else lli_adr_reg;
|
|
|
read_base<=unsigned(lli_adr_mux(29 downto 8));
|
read_base<=unsigned(lli_adr_mux(29 downto 8));
|
read_offset<=unsigned(lli_adr_mux(7 downto 0));
|
read_offset<=unsigned(lli_adr_mux(7 downto 0));
|
|
|
-- Cache RAM
|
-- Cache RAM
|
|
|
ram_waddr<=std_logic_vector(current_offset);
|
ram_waddr<=std_logic_vector(current_offset);
|
ram_raddr<=std_logic_vector(read_offset);
|
ram_raddr<=std_logic_vector(read_offset);
|
ram_we<=wb_stb and wbm_ack_i;
|
ram_we<=wb_stb and wbm_ack_i;
|
ram_re<=lli_re_i or miss;
|
ram_re<=lli_re_i or miss;
|
|
|
ram_inst: entity work.lxp32_ram256x32(rtl)
|
ram_inst: entity work.lxp32_ram256x32(rtl)
|
port map(
|
port map(
|
clk_i=>clk_i,
|
clk_i=>clk_i,
|
|
|
we_i=>ram_we,
|
we_i=>ram_we,
|
waddr_i=>ram_waddr,
|
waddr_i=>ram_waddr,
|
wdata_i=>wbm_dat_i,
|
wdata_i=>wbm_dat_i,
|
|
|
re_i=>ram_re,
|
re_i=>ram_re,
|
raddr_i=>ram_raddr,
|
raddr_i=>ram_raddr,
|
rdata_o=>lli_dat_o
|
rdata_o=>lli_dat_o
|
);
|
);
|
|
|
-- Determine hit/miss
|
-- Determine hit/miss
|
|
|
-- This cache uses a single ring buffer. Address in buffer corresponds
|
-- This cache uses a single ring buffer. Address in buffer corresponds
|
-- to the lower 8 bits of the full address. The part of the buffer that
|
-- to the lower 8 bits of the full address. The part of the buffer that
|
-- is higher than current_offset represents a previous block ("p"), the
|
-- is higher than current_offset represents a previous block ("p"), the
|
-- other part represents a current block ("c").
|
-- other part represents a current block ("c").
|
|
|
hitc<='1' when read_base=current_base and read_offset<current_offset and
|
hitc<='1' when read_base=current_base and read_offset<current_offset and
|
((wrap_cnt=1 and read_offset>=start_offset) or
|
((wrap_cnt=1 and read_offset>=start_offset) or
|
wrap_cnt=2 or wrap_cnt=3) else '0';
|
wrap_cnt=2 or wrap_cnt=3) else '0';
|
|
|
hitp<='1' when read_base=prev_base and read_offset>current_offset and
|
hitp<='1' when read_base=prev_base and read_offset>current_offset and
|
((wrap_cnt=2 and read_offset>=start_offset) or
|
((wrap_cnt=2 and read_offset>=start_offset) or
|
wrap_cnt=3) else '0';
|
wrap_cnt=3) else '0';
|
|
|
process (clk_i) is
|
process (clk_i) is
|
begin
|
begin
|
if rising_edge(clk_i) then
|
if rising_edge(clk_i) then
|
if rst_i='1' then
|
if rst_i='1' then
|
miss<='0';
|
miss<='0';
|
else
|
else
|
if hitc='0' and hitp='0' and ram_re='1' then
|
if hitc='0' and hitp='0' and ram_re='1' then
|
miss<='1';
|
miss<='1';
|
else
|
else
|
miss<='0';
|
miss<='0';
|
end if;
|
end if;
|
end if;
|
end if;
|
end if;
|
end if;
|
end process;
|
end process;
|
|
|
lli_busy_o<=miss;
|
lli_busy_o<=miss;
|
|
|
-- Set INIT flag when the first lli_re_i signal is detected
|
-- Set INIT flag when the first lli_re_i signal is detected
|
|
|
process (clk_i) is
|
process (clk_i) is
|
begin
|
begin
|
if rising_edge(clk_i) then
|
if rising_edge(clk_i) then
|
if rst_i='1' then
|
if rst_i='1' then
|
init<='0';
|
init<='0';
|
elsif lli_re_i='1' then
|
elsif lli_re_i='1' then
|
init<='1';
|
init<='1';
|
end if;
|
end if;
|
end if;
|
end if;
|
end process;
|
end process;
|
|
|
-- Fill cache
|
-- Fill cache
|
|
|
prefetch_distance<=current_offset-read_offset;
|
prefetch_distance<=current_offset-read_offset;
|
|
|
-- Note: "near_miss" signal prevents cache invalidation when difference
|
-- Note: "near_miss" signal prevents cache invalidation when difference
|
-- between the requested address and the currently fetched address
|
-- between the requested address and the currently fetched address
|
-- is too small (and, therefore, the requested data will be fetched soon
|
-- is too small (and, therefore, the requested data will be fetched soon
|
-- without invalidation).
|
-- without invalidation).
|
|
|
process (clk_i) is
|
process (clk_i) is
|
begin
|
begin
|
if rising_edge(clk_i) then
|
if rising_edge(clk_i) then
|
if rst_i='1' then
|
if rst_i='1' then
|
near_miss<='0';
|
near_miss<='0';
|
elsif wrap_cnt>0 and read_offset-current_offset<=to_unsigned(BURST_SIZE/2,8) and
|
elsif wrap_cnt>0 and read_offset-current_offset<=to_unsigned(BURST_SIZE/2,8) and
|
((read_base=current_base and read_offset>=current_offset) or
|
((read_base=current_base and read_offset>=current_offset) or
|
(read_base=next_base and read_offset<current_offset))
|
(read_base=next_base and read_offset<current_offset))
|
then
|
then
|
near_miss<='1';
|
near_miss<='1';
|
else
|
else
|
near_miss<='0';
|
near_miss<='0';
|
end if;
|
end if;
|
end if;
|
end if;
|
end process;
|
end process;
|
|
|
terminate_burst<='1' when burst_cnt<BURST_SIZE-1 and miss='1' and
|
terminate_burst<='1' when burst_cnt<BURST_SIZE-1 and miss='1' and
|
(burst_cnt>2 or burst1='0') and near_miss='0' else '0';
|
(burst_cnt>2 or burst1='0') and near_miss='0' else '0';
|
|
|
process (clk_i) is
|
process (clk_i) is
|
begin
|
begin
|
if rising_edge(clk_i) then
|
if rising_edge(clk_i) then
|
if rst_i='1' then
|
if rst_i='1' then
|
burst_cnt<=0;
|
burst_cnt<=0;
|
wb_stb<='0';
|
wb_stb<='0';
|
wrap_cnt<=0;
|
wrap_cnt<=0;
|
wb_cti<=(others=>'-');
|
wb_cti<=(others=>'-');
|
burst1<='-';
|
burst1<='-';
|
current_offset<=(others=>'-');
|
current_offset<=(others=>'-');
|
start_offset<=(others=>'-');
|
start_offset<=(others=>'-');
|
current_base<=(others=>'-');
|
current_base<=(others=>'-');
|
next_base<=(others=>'-');
|
next_base<=(others=>'-');
|
prev_base<=(others=>'-');
|
prev_base<=(others=>'-');
|
|
|
-- To suppress numeric_std warnings
|
-- To suppress numeric_std warnings
|
-- synthesis translate_off
|
-- synthesis translate_off
|
current_offset<=(others=>'0');
|
current_offset<=(others=>'0');
|
start_offset<=(others=>'0');
|
start_offset<=(others=>'0');
|
current_base<=(others=>'0');
|
current_base<=(others=>'0');
|
next_base<=(others=>'0');
|
next_base<=(others=>'0');
|
prev_base<=(others=>'0');
|
prev_base<=(others=>'0');
|
-- synthesis translate_on
|
-- synthesis translate_on
|
else
|
else
|
if burst_cnt=0 and init='1' then
|
if burst_cnt=0 and init='1' then
|
if miss='1' and near_miss='0' then
|
if miss='1' and near_miss='0' then
|
wb_stb<='1';
|
wb_stb<='1';
|
wb_cti<="010";
|
wb_cti<="010";
|
current_offset<=read_offset;
|
current_offset<=read_offset;
|
start_offset<=read_offset;
|
start_offset<=read_offset;
|
current_base<=read_base;
|
current_base<=read_base;
|
next_base<=read_base+1;
|
next_base<=read_base+1;
|
burst_cnt<=1;
|
burst_cnt<=1;
|
burst1<='1';
|
burst1<='1';
|
wrap_cnt<=1;
|
wrap_cnt<=1;
|
elsif prefetch_distance<to_unsigned(PREFETCH_SIZE,8) or near_miss='1' then
|
elsif prefetch_distance<to_unsigned(PREFETCH_SIZE,8) or near_miss='1' then
|
wb_stb<='1';
|
wb_stb<='1';
|
wb_cti<="010";
|
wb_cti<="010";
|
burst_cnt<=1;
|
burst_cnt<=1;
|
burst1<='0';
|
burst1<='0';
|
end if;
|
end if;
|
else
|
else
|
if wbm_ack_i='1' then
|
if wbm_ack_i='1' then
|
current_offset<=current_offset+1;
|
current_offset<=current_offset+1;
|
if current_offset=X"FF" then
|
if current_offset=X"FF" then
|
current_base<=next_base;
|
current_base<=next_base;
|
next_base<=next_base+1;
|
next_base<=next_base+1;
|
prev_base<=current_base;
|
prev_base<=current_base;
|
if wrap_cnt<3 then
|
if wrap_cnt<3 then
|
wrap_cnt<=wrap_cnt+1;
|
wrap_cnt<=wrap_cnt+1;
|
end if;
|
end if;
|
end if;
|
end if;
|
if burst_cnt=BURST_SIZE-1 or terminate_burst='1' then
|
if burst_cnt=BURST_SIZE-1 or terminate_burst='1' then
|
burst_cnt<=BURST_SIZE;
|
burst_cnt<=BURST_SIZE;
|
wb_cti<="111";
|
wb_cti<="111";
|
elsif burst_cnt<BURST_SIZE-1 then
|
elsif burst_cnt<BURST_SIZE-1 then
|
burst_cnt<=burst_cnt+1;
|
burst_cnt<=burst_cnt+1;
|
wb_cti<="010";
|
wb_cti<="010";
|
else
|
else
|
if miss='1' and near_miss='0' then
|
if miss='1' and near_miss='0' then
|
wb_stb<='1';
|
wb_stb<='1';
|
wb_cti<="010";
|
wb_cti<="010";
|
current_offset<=read_offset;
|
current_offset<=read_offset;
|
start_offset<=read_offset;
|
start_offset<=read_offset;
|
current_base<=read_base;
|
current_base<=read_base;
|
next_base<=read_base+1;
|
next_base<=read_base+1;
|
burst_cnt<=1;
|
burst_cnt<=1;
|
burst1<='1';
|
burst1<='1';
|
wrap_cnt<=1;
|
wrap_cnt<=1;
|
elsif prefetch_distance<to_unsigned(PREFETCH_SIZE,8) or near_miss='1' then
|
elsif prefetch_distance<to_unsigned(PREFETCH_SIZE,8) or near_miss='1' then
|
wb_stb<='1';
|
wb_stb<='1';
|
wb_cti<="010";
|
wb_cti<="010";
|
burst_cnt<=1;
|
burst_cnt<=1;
|
burst1<='0';
|
burst1<='0';
|
else
|
else
|
burst_cnt<=0;
|
burst_cnt<=0;
|
wb_stb<='0';
|
wb_stb<='0';
|
end if;
|
end if;
|
end if;
|
end if;
|
end if;
|
end if;
|
end if;
|
end if;
|
end if;
|
end if;
|
end if;
|
end if;
|
end process;
|
end process;
|
|
|
wbm_cyc_o<=wb_stb;
|
wbm_cyc_o<=wb_stb;
|
wbm_stb_o<=wb_stb;
|
wbm_stb_o<=wb_stb;
|
wbm_cti_o<=wb_cti;
|
wbm_cti_o<=wb_cti;
|
wbm_bte_o<="00";
|
wbm_bte_o<="00";
|
wbm_adr_o<=std_logic_vector(current_base¤t_offset);
|
wbm_adr_o<=std_logic_vector(current_base¤t_offset);
|
|
|
end architecture;
|
end architecture;
|
|
|