1 |
2 |
madsilicon |
-----------------------------------------------------------------
|
2 |
|
|
-- --
|
3 |
|
|
-----------------------------------------------------------------
|
4 |
|
|
-- --
|
5 |
|
|
-- Copyright (C) 2016 Stefano Tonello --
|
6 |
|
|
-- --
|
7 |
|
|
-- This source file may be used and distributed without --
|
8 |
|
|
-- restriction provided that this copyright statement is not --
|
9 |
|
|
-- removed from the file and that any derivative work contains --
|
10 |
|
|
-- the original copyright notice and the associated disclaimer.--
|
11 |
|
|
-- --
|
12 |
|
|
-- THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY --
|
13 |
|
|
-- EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED --
|
14 |
|
|
-- TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS --
|
15 |
|
|
-- FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR --
|
16 |
|
|
-- OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, --
|
17 |
|
|
-- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES --
|
18 |
|
|
-- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE --
|
19 |
|
|
-- GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR --
|
20 |
|
|
-- BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF --
|
21 |
|
|
-- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT --
|
22 |
|
|
-- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT --
|
23 |
|
|
-- OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE --
|
24 |
|
|
-- POSSIBILITY OF SUCH DAMAGE. --
|
25 |
|
|
-- --
|
26 |
|
|
-----------------------------------------------------------------
|
27 |
|
|
|
28 |
|
|
---------------------------------------------------------------
|
29 |
|
|
-- RV01 Branch History Table (Branch Prediction Sub-Unit)
|
30 |
|
|
---------------------------------------------------------------
|
31 |
|
|
|
32 |
|
|
library IEEE;
|
33 |
|
|
use IEEE.std_logic_1164.all;
|
34 |
|
|
use IEEE.numeric_std.all;
|
35 |
|
|
|
36 |
|
|
library work;
|
37 |
|
|
use work.RV01_CONSTS_PKG.all;
|
38 |
|
|
use work.RV01_TYPES_PKG.all;
|
39 |
|
|
use work.RV01_FUNCS_PKG.all;
|
40 |
|
|
use work.RV01_ARITH_PKG.all;
|
41 |
|
|
use work.RV01_OP_PKG.all;
|
42 |
|
|
|
43 |
|
|
entity RV01_BHT is
|
44 |
|
|
generic(
|
45 |
|
|
BHT_SIZE : natural := 64;
|
46 |
|
|
PXE : std_logic := '1'
|
47 |
|
|
);
|
48 |
|
|
port(
|
49 |
|
|
CLK_i : in std_logic;
|
50 |
|
|
RST_i : in std_logic;
|
51 |
|
|
BHTV_WE_i : in std_logic;
|
52 |
|
|
BHTV_WADR_i : natural range 0 to BHT_SIZE-1;
|
53 |
|
|
-- prediction port
|
54 |
|
|
IF_V_i : std_logic;
|
55 |
|
|
IF_PC_i : unsigned(ALEN-1 downto 0);
|
56 |
|
|
IF2_V_i : std_logic;
|
57 |
|
|
IF2_PC_i : unsigned(ALEN-1 downto 0);
|
58 |
|
|
-- verification port
|
59 |
|
|
BHT_BTA_i : in ADR_T;
|
60 |
|
|
BHT_PC_i : in ADR_T;
|
61 |
|
|
BHT_CNT_i : in std_logic_vector(2-1 downto 0);
|
62 |
|
|
BHT_WE_i : in std_logic;
|
63 |
|
|
|
64 |
|
|
-- prediction port
|
65 |
|
|
PBX_o : out std_logic;
|
66 |
|
|
PBTA_o : out unsigned(ALEN-1 downto 0);
|
67 |
|
|
-- verification port
|
68 |
|
|
BPVD_o : out std_logic_vector(3-1 downto 0)
|
69 |
|
|
);
|
70 |
|
|
end RV01_BHT;
|
71 |
|
|
|
72 |
|
|
architecture ARC of RV01_BHT is
|
73 |
|
|
|
74 |
|
|
function to_natural(B : std_logic) return natural is
|
75 |
|
|
begin
|
76 |
|
|
if(B = '1') then
|
77 |
|
|
return(1);
|
78 |
|
|
else
|
79 |
|
|
return(0);
|
80 |
|
|
end if;
|
81 |
|
|
end function;
|
82 |
|
|
|
83 |
|
|
constant FIX : natural := 1-to_natural(PXE);
|
84 |
|
|
constant L2BHT_SIZE : natural := log2(BHT_SIZE);
|
85 |
|
|
constant TAG_SIZE : natural := ALEN - (L2BHT_SIZE+3-FIX);
|
86 |
|
|
constant CNT_SIZE : natural := 2;
|
87 |
|
|
|
88 |
|
|
-- Each BHT entry holds:
|
89 |
|
|
-- 1) branch target address (ALEN bits)
|
90 |
|
|
-- 2) address tag (ALEN-L2BHT_SIZE bits)
|
91 |
|
|
-- 3) saturating counter(2 bits)
|
92 |
|
|
|
93 |
|
|
constant BHT_WIDTH : natural := ALEN + TAG_SIZE + CNT_SIZE;
|
94 |
|
|
|
95 |
|
|
component RV01_RAM_1RW1R is
|
96 |
|
|
generic(
|
97 |
|
|
-- I/O data bus width
|
98 |
|
|
DWIDTH : integer := 16;
|
99 |
|
|
-- word count
|
100 |
|
|
WCOUNT : integer := 256;
|
101 |
|
|
STYLE : string := "auto"
|
102 |
|
|
);
|
103 |
|
|
port(
|
104 |
|
|
CLK_i : in std_logic;
|
105 |
|
|
A_i : in unsigned(log2(WCOUNT)-1 downto 0);
|
106 |
|
|
DPRA_i : in unsigned(log2(WCOUNT)-1 downto 0);
|
107 |
|
|
D_i : in std_logic_vector(DWIDTH-1 downto 0);
|
108 |
|
|
WE_i : in std_logic;
|
109 |
|
|
|
110 |
|
|
Q_o : out std_logic_vector(DWIDTH-1 downto 0);
|
111 |
|
|
DPQ_o : out std_logic_vector(DWIDTH-1 downto 0)
|
112 |
|
|
);
|
113 |
|
|
end component ;
|
114 |
|
|
|
115 |
|
|
signal BHT_V : std_logic;
|
116 |
|
|
signal BHT_BTA : unsigned(ALEN-1 downto 0);
|
117 |
|
|
signal BHT_PADR,BHT_VADR : unsigned(L2BHT_SIZE-1 downto 0);
|
118 |
|
|
signal BHT_TAG : unsigned(TAG_SIZE-1 downto 0);
|
119 |
|
|
signal BHT_CNT : std_logic_vector(CNT_SIZE-1 downto 0);
|
120 |
|
|
signal BHT_TKN : std_logic;
|
121 |
|
|
signal BHT_HIT : std_logic;
|
122 |
|
|
signal BHT_D,BHT_Q : std_logic_vector(BHT_WIDTH-1 downto 0);
|
123 |
|
|
signal BHTV_D,BHTV_Q : std_logic_vector(0 downto 0);
|
124 |
|
|
signal BHTV_WE,BHT_INIT_q : std_logic;
|
125 |
|
|
signal BHTV_VADR : unsigned(L2BHT_SIZE-1 downto 0);
|
126 |
|
|
signal TAG_MTCH : std_logic;
|
127 |
|
|
|
128 |
|
|
begin
|
129 |
|
|
|
130 |
|
|
------------------------------------
|
131 |
|
|
-- Notes
|
132 |
|
|
------------------------------------
|
133 |
|
|
|
134 |
|
|
-- This module implements a simple branch history
|
135 |
|
|
-- table (BHT) based on 2-bit saturating counters.
|
136 |
|
|
-- Each BHT entry stores branch prediction info
|
137 |
|
|
-- for an instruction
|
138 |
|
|
-- Prediction logic doesn't explicitly check if a
|
139 |
|
|
-- fetched instruction is a branch, it just
|
140 |
|
|
-- searches BHT for an entry matching fetch
|
141 |
|
|
-- address.
|
142 |
|
|
-- BHT entry valid bits are stored in a separated
|
143 |
|
|
-- RAM allowing them to be cleared when BHT is
|
144 |
|
|
-- initialized. A true RAM is used instead of a
|
145 |
|
|
-- BHT_SIZE-bit register to reduce logic resource
|
146 |
|
|
-- utlization for large BHT's.
|
147 |
|
|
|
148 |
|
|
-- When parallel execution is enabled, BHT RAM
|
149 |
|
|
-- address make reference to an instruction pair
|
150 |
|
|
-- and therefore LS 3b are removed from fetch
|
151 |
|
|
-- address.
|
152 |
|
|
-- When parallel execution is NOT enabled, BHT RAM
|
153 |
|
|
-- address make reference to a single instruction
|
154 |
|
|
-- and therefore LS 2b only are removed from fetch
|
155 |
|
|
-- address (this accomplished using FIX constant
|
156 |
|
|
-- which is derived from PXE generic).
|
157 |
|
|
-- BHT RAM size is doubled to keep the total entry
|
158 |
|
|
-- count unchanged.
|
159 |
|
|
|
160 |
|
|
------------------------------------
|
161 |
|
|
-- BHT data RAM
|
162 |
|
|
------------------------------------
|
163 |
|
|
|
164 |
|
|
U_BHT : RV01_RAM_1RW1R
|
165 |
|
|
generic map(
|
166 |
|
|
DWIDTH => BHT_WIDTH,
|
167 |
|
|
WCOUNT => BHT_SIZE, --*(1+FIX)
|
168 |
|
|
STYLE => "BLOCK"
|
169 |
|
|
)
|
170 |
|
|
port map(
|
171 |
|
|
CLK_i => CLK_i,
|
172 |
|
|
A_i => BHT_VADR,
|
173 |
|
|
DPRA_i => BHT_PADR,
|
174 |
|
|
D_i => BHT_D,
|
175 |
|
|
WE_i => BHT_WE_i,
|
176 |
|
|
|
177 |
|
|
Q_o => open,
|
178 |
|
|
DPQ_o => BHT_Q
|
179 |
|
|
);
|
180 |
|
|
|
181 |
|
|
------------------------------------
|
182 |
|
|
-- BHT valid bits RAM
|
183 |
|
|
------------------------------------
|
184 |
|
|
|
185 |
|
|
U_BHTV : RV01_RAM_1RW1R
|
186 |
|
|
generic map(
|
187 |
|
|
DWIDTH => 1,
|
188 |
|
|
WCOUNT => BHT_SIZE --*(1+FIX)
|
189 |
|
|
)
|
190 |
|
|
port map(
|
191 |
|
|
CLK_i => CLK_i,
|
192 |
|
|
A_i => BHTV_VADR,
|
193 |
|
|
DPRA_i => BHT_PADR,
|
194 |
|
|
D_i => BHTV_D,
|
195 |
|
|
WE_i => BHTV_WE,
|
196 |
|
|
|
197 |
|
|
Q_o => open,
|
198 |
|
|
DPQ_o => BHTV_Q
|
199 |
|
|
);
|
200 |
|
|
|
201 |
|
|
-- Valid bits are cleared during BHT initialization
|
202 |
|
|
-- by explicitly writing '0' to each BHTV entry.
|
203 |
|
|
|
204 |
|
|
-- The value written to valid bit is '0' during
|
205 |
|
|
-- initialization and '1' during normal operations
|
206 |
|
|
-- (i.e. entries are never invalidated during
|
207 |
|
|
-- normal operations, they can only be overwritten
|
208 |
|
|
-- with data related to a aliased branch/jal).
|
209 |
|
|
|
210 |
|
|
BHTV_D(0) <= not(BHTV_WE_i);
|
211 |
|
|
|
212 |
|
|
BHTV_WE <= BHT_WE_i or BHTV_WE_i;
|
213 |
|
|
|
214 |
|
|
BHTV_VADR <= BHT_VADR when BHTV_WE_i = '0' else
|
215 |
|
|
to_unsigned(BHTV_WADR_i,L2BHT_SIZE);
|
216 |
|
|
|
217 |
|
|
------------------------------------
|
218 |
|
|
-- Branch prediction logic
|
219 |
|
|
------------------------------------
|
220 |
|
|
|
221 |
|
|
BHT_V <= BHTV_Q(0);
|
222 |
|
|
|
223 |
|
|
-- BHT prediction address (PC LS bits)
|
224 |
|
|
BHT_PADR <= IF_PC_i(L2BHT_SIZE+3-FIX-1 downto 3-FIX);
|
225 |
|
|
|
226 |
|
|
-- BHT verification address (PC LS bits)
|
227 |
|
|
BHT_VADR <= BHT_PC_i(L2BHT_SIZE+3-FIX-1 downto 3-FIX);
|
228 |
|
|
|
229 |
|
|
-- Extract BTA, address tag, counter value and istruction
|
230 |
|
|
-- selecting flag from BHT output
|
231 |
|
|
|
232 |
|
|
BHT_BTA <= to_unsigned(BHT_Q(BHT_WIDTH-1 downto TAG_SIZE+CNT_SIZE));
|
233 |
|
|
BHT_TAG <= to_unsigned(BHT_Q(TAG_SIZE+CNT_SIZE-1 downto CNT_SIZE));
|
234 |
|
|
BHT_CNT <= BHT_Q(CNT_SIZE-1 downto 0);
|
235 |
|
|
|
236 |
|
|
-- BHT tag match flag
|
237 |
|
|
TAG_MTCH <=
|
238 |
|
|
'1' when(BHT_TAG = IF2_PC_i(ALEN-1 downto L2BHT_SIZE+3-FIX)) else
|
239 |
|
|
'0';
|
240 |
|
|
|
241 |
|
|
-- Prediction "hits" if:
|
242 |
|
|
-- 1) target BHT entry is valid, AND
|
243 |
|
|
-- 2) IF2 stage valid bit is set, AND
|
244 |
|
|
-- 3) address tag matches.
|
245 |
|
|
|
246 |
|
|
-- BHT prediction hit flag
|
247 |
|
|
BHT_HIT <= BHT_V and IF2_V_i and TAG_MTCH;
|
248 |
|
|
|
249 |
|
|
-- Predicted branch execute flag
|
250 |
|
|
PBX_o <= TAG_MTCH and (
|
251 |
|
|
not(BHT_CNT(1)) and BHT_V and IF2_V_i
|
252 |
|
|
);
|
253 |
|
|
|
254 |
|
|
-- Predicted branch target address (BHT BTA).
|
255 |
|
|
PBTA_o <= BHT_BTA;
|
256 |
|
|
|
257 |
|
|
------------------------------------
|
258 |
|
|
-- BHT updating logic
|
259 |
|
|
------------------------------------
|
260 |
|
|
|
261 |
|
|
-- BHT updating data consists of:
|
262 |
|
|
-- 1) prediction hit flag,
|
263 |
|
|
-- 2) prediction count (this avoid to
|
264 |
|
|
-- re-read BHT).
|
265 |
|
|
-- 3) branch fetch address.
|
266 |
|
|
-- Only items 1) and 2) need to be
|
267 |
|
|
-- actually provided, as fetch address is
|
268 |
|
|
-- available for each instruction.
|
269 |
|
|
|
270 |
|
|
BPVD_o <= (BHT_CNT & '1') when (BHT_HIT = '1')
|
271 |
|
|
else "110";
|
272 |
|
|
|
273 |
|
|
-- BHT updated data and valid bit
|
274 |
|
|
|
275 |
|
|
BHT_D <=
|
276 |
|
|
to_std_logic_vector(BHT_BTA_i) &
|
277 |
|
|
to_std_logic_vector(BHT_PC_i(ALEN-1 downto L2BHT_SIZE+3-FIX)) &
|
278 |
|
|
BHT_CNT_i;
|
279 |
|
|
|
280 |
|
|
end ARC;
|
281 |
|
|
|