1 |
48 |
alirezamon |
/******************************************************************************
|
2 |
|
|
This Source Code Form is subject to the terms of the
|
3 |
|
|
Open Hardware Description License, v. 1.0. If a copy
|
4 |
|
|
of the OHDL was not distributed with this file, You
|
5 |
|
|
can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt
|
6 |
|
|
|
7 |
|
|
Description: Data cache LRU implementation
|
8 |
|
|
|
9 |
|
|
Copyright (C) 2012 Stefan Wallentowitz <stefan.wallentowitz@tum.de>
|
10 |
|
|
|
11 |
|
|
******************************************************************************/
|
12 |
|
|
|
13 |
|
|
// This is the least-recently-used (LRU) calculation module. It
|
14 |
|
|
// essentially has two types of input and output. First, the history
|
15 |
|
|
// information needs to be evaluated to calculate the LRU value.
|
16 |
|
|
// Second, the current access and the LRU are one hot values of the
|
17 |
|
|
// ways.
|
18 |
|
|
//
|
19 |
|
|
// This module is pure combinational. All registering is done outside
|
20 |
|
|
// this module. The following parameter exists:
|
21 |
|
|
//
|
22 |
|
|
// * NUMWAYS: Number of ways (must be greater than 1)
|
23 |
|
|
//
|
24 |
|
|
// The following ports exist:
|
25 |
|
|
//
|
26 |
|
|
// * current: The current LRU history
|
27 |
|
|
// * update: The new LRU history after access
|
28 |
|
|
//
|
29 |
|
|
// * access: 0 if no access or one-hot of the way that accesses
|
30 |
|
|
// * lru_pre: LRU before the access (one hot of ways)
|
31 |
|
|
// * lru_post: LRU after the access (one hot of ways)
|
32 |
|
|
//
|
33 |
|
|
// The latter three have the width of NUMWAYS apparently. The first
|
34 |
|
|
// three are more complicated as this is an optimized way of storing
|
35 |
|
|
// the history information, which will be shortly described in the
|
36 |
|
|
// following.
|
37 |
|
|
//
|
38 |
|
|
// A naive approach to store the history of the access is to store the
|
39 |
|
|
// relative "age" of each element in a vector, for example for four
|
40 |
|
|
// ways:
|
41 |
|
|
//
|
42 |
|
|
// 0: 1 1: 3 2: 1 3:0
|
43 |
|
|
//
|
44 |
|
|
// This needs 4x2 bits, but more important it also needs a set of
|
45 |
|
|
// comparators and adders. This can become increasingly complex when
|
46 |
|
|
// using a higher number of cache ways with an impact on area and
|
47 |
|
|
// timing.
|
48 |
|
|
//
|
49 |
|
|
// Similarly, it is possible to store a "stack" of the access and
|
50 |
|
|
// reorder this stack on an access. But the problems are similar, it
|
51 |
|
|
// needs comparators etc.
|
52 |
|
|
//
|
53 |
|
|
// A neat approach is to store the history efficiently coded, while
|
54 |
|
|
// also easing the calculation. This approach stores the information
|
55 |
|
|
// whether each entry is older than the others. For example for the
|
56 |
|
|
// four-way example (x<y means x is older than y):
|
57 |
|
|
//
|
58 |
|
|
// |0<1|0<2|0<3|1<0|1<2|1<3|2<0|2<1|2<3|3<0|3<1|3<2|
|
59 |
|
|
//
|
60 |
|
|
// This is redundant as two entries can never be equally old meaning
|
61 |
|
|
// x<y == !y<x, leading to a simpler version
|
62 |
|
|
//
|
63 |
|
|
// |0<1|0<2|0<3|1<2|1<3|2<3|
|
64 |
|
|
//
|
65 |
|
|
// The calculations on this vector are much simpler and it is
|
66 |
|
|
// therefore used by this module.
|
67 |
|
|
//
|
68 |
|
|
// The width of this vector is the triangular number of (NUMWAYS-1),
|
69 |
|
|
// specifically:
|
70 |
|
|
// WIDTH=NUMWAYS*(NUMWAYS-1)/2.
|
71 |
|
|
//
|
72 |
|
|
// The details of the algorithms are described below. The designer
|
73 |
|
|
// just needs to apply current history vector and the access and gets
|
74 |
|
|
// the updated history and the LRU before and after the access.
|
75 |
|
|
//
|
76 |
|
|
// Instantiation example:
|
77 |
|
|
// mor1kx_dcache_lru
|
78 |
|
|
// (.NUMWAYS(4))
|
79 |
|
|
// u_lru(.current (current_history[((NUMWAYS*(NUMWAYS-1))>>1)-1:0])),
|
80 |
|
|
// .update (updated_history[((NUMWAYS*(NUMWAYS-1))>>1)-1:0])),
|
81 |
|
|
// .access (access[NUMWAYS-1:0]),
|
82 |
|
|
// .lru_pre (lru_pre[NUMWAYS-1:0]),
|
83 |
|
|
// .lru_post (lru_post[NUMWAYS-1:0]));
|
84 |
|
|
|
85 |
|
|
|
86 |
|
|
module mor1kx_cache_lru(/*AUTOARG*/
|
87 |
|
|
// Outputs
|
88 |
|
|
update, lru_pre, lru_post,
|
89 |
|
|
// Inputs
|
90 |
|
|
current, access
|
91 |
|
|
);
|
92 |
|
|
parameter NUMWAYS = 2;
|
93 |
|
|
|
94 |
|
|
// Triangular number
|
95 |
|
|
localparam WIDTH = NUMWAYS*(NUMWAYS-1) >> 1;
|
96 |
|
|
|
97 |
|
|
input [WIDTH-1:0] current;
|
98 |
|
|
output reg [WIDTH-1:0] update;
|
99 |
|
|
|
100 |
|
|
input [NUMWAYS-1:0] access;
|
101 |
|
|
output reg [NUMWAYS-1:0] lru_pre;
|
102 |
|
|
output reg [NUMWAYS-1:0] lru_post;
|
103 |
|
|
|
104 |
|
|
reg [NUMWAYS-1:0] expand [0:NUMWAYS-1];
|
105 |
|
|
|
106 |
|
|
integer i, j;
|
107 |
|
|
integer offset;
|
108 |
|
|
|
109 |
|
|
//
|
110 |
|
|
// < 0 1 2 3
|
111 |
|
|
// 0 1 (0<1) (0<2) (0<3)
|
112 |
|
|
// 1 (1<0) 1 (1<2) (1<3)
|
113 |
|
|
// 2 (2<0) (2<1) 1 (2<3)
|
114 |
|
|
// 3 (3<0) (3<1) (3<2) 1
|
115 |
|
|
//
|
116 |
|
|
// As two entries can never be equally old (needs to be avoided on
|
117 |
|
|
// the outside) this is equivalent to:
|
118 |
|
|
//
|
119 |
|
|
// < 0 1 2 3
|
120 |
|
|
// 0 1 (0<1) (0<2) (0<3)
|
121 |
|
|
// 1 !(0<1) 1 (1<2) (1<3)
|
122 |
|
|
// 2 !(0<2) !(1<2) 1 (2<3)
|
123 |
|
|
// 3 !(0<3) !(1<3) !(2<3) 1
|
124 |
|
|
//
|
125 |
|
|
// The lower half below the diagonal is the inverted mirror of the
|
126 |
|
|
// upper half. The number of entries in each half is of course
|
127 |
|
|
// equal to the width of our LRU vector and the upper half is
|
128 |
|
|
// filled with the values from the vector.
|
129 |
|
|
//
|
130 |
|
|
// The algorithm works as follows:
|
131 |
|
|
//
|
132 |
|
|
// 1. Fill the matrix (expand) with the values. The entry (i,i) is
|
133 |
|
|
// statically one.
|
134 |
|
|
//
|
135 |
|
|
// 2. The LRU_pre vector is the vector of the ANDs of the each row.
|
136 |
|
|
//
|
137 |
|
|
// 3. Update the values with the access vector (if any) in the
|
138 |
|
|
// following way: If access[i] is set, the values in row i are
|
139 |
|
|
// set to 0. Similarly, the values in column i are set to 1.
|
140 |
|
|
//
|
141 |
|
|
// 4. The update vector of the lru history is then generated by
|
142 |
|
|
// copying the upper half of the matrix back.
|
143 |
|
|
//
|
144 |
|
|
// 5. The LRU_post vector is the vector of the ANDs of each row.
|
145 |
|
|
//
|
146 |
|
|
// In the following an example will be used to demonstrate the algorithm:
|
147 |
|
|
//
|
148 |
|
|
// NUMWAYS = 4
|
149 |
|
|
// current = 6'b110100;
|
150 |
|
|
// access = 4'b0010;
|
151 |
|
|
//
|
152 |
|
|
// This current history is:
|
153 |
|
|
//
|
154 |
|
|
// 0<1 0<2 0<3 1<2 1<3 2<3
|
155 |
|
|
// 0 0 1 0 1 1
|
156 |
|
|
//
|
157 |
|
|
// and way 2 is accessed.
|
158 |
|
|
//
|
159 |
|
|
// The history of accesses is 3>0>1>2 and the expected result is an
|
160 |
|
|
// update to 2>3>0>1 with LRU_pre=2 and LRU_post=1
|
161 |
|
|
|
162 |
|
|
|
163 |
|
|
always @(*) begin : comb
|
164 |
|
|
// The offset is used to transfer the flat history vector into
|
165 |
|
|
// the upper half of the
|
166 |
|
|
offset = 0;
|
167 |
|
|
|
168 |
|
|
// 1. Fill the matrix (expand) with the values. The entry (i,i) is
|
169 |
|
|
// statically one.
|
170 |
|
|
for (i = 0; i < NUMWAYS; i = i + 1) begin
|
171 |
|
|
expand[i][i] = 1'b1;
|
172 |
|
|
|
173 |
|
|
for (j = i + 1; j < NUMWAYS; j = j + 1) begin
|
174 |
|
|
expand[i][j] = current[offset+j-i-1];
|
175 |
|
|
end
|
176 |
|
|
for (j = 0; j < i; j = j + 1) begin
|
177 |
|
|
expand[i][j] = !expand[j][i];
|
178 |
|
|
end
|
179 |
|
|
|
180 |
|
|
offset = offset + NUMWAYS - i - 1;
|
181 |
|
|
end // for (i = 0; i < NUMWAYS; i = i + 1)
|
182 |
|
|
|
183 |
|
|
// For the example expand is now:
|
184 |
|
|
// < 0 1 2 3 0 1 2 3
|
185 |
|
|
// 0 1 (0<1) (0<2) (0<3) 0 1 0 0 1
|
186 |
|
|
// 1 (1<0) 1 (1<2) (1<3) => 1 1 1 0 1
|
187 |
|
|
// 2 (2<0) (2<1) 1 (2<3) 2 1 1 1 1
|
188 |
|
|
// 3 (3<0) (3<1) (3<2) 1 3 0 0 0 1
|
189 |
|
|
|
190 |
|
|
|
191 |
|
|
// 2. The LRU_pre vector is the vector of the ANDs of the each
|
192 |
|
|
// row.
|
193 |
|
|
for (i = 0; i < NUMWAYS; i = i + 1) begin
|
194 |
|
|
lru_pre[i] = &expand[i];
|
195 |
|
|
end
|
196 |
|
|
|
197 |
|
|
// We derive why this is the case for the example here:
|
198 |
|
|
// lru_pre[2] is high when the following condition holds:
|
199 |
|
|
//
|
200 |
|
|
// (2<0) & (2<1) & (2<3).
|
201 |
|
|
//
|
202 |
|
|
// Applying the negation transform we get:
|
203 |
|
|
//
|
204 |
|
|
// !(0<2) & !(1<2) & (2<3)
|
205 |
|
|
//
|
206 |
|
|
// and this is exactly row [2], so that here
|
207 |
|
|
//
|
208 |
|
|
// lru_pre[2] = &expand[2] = 1'b1;
|
209 |
|
|
//
|
210 |
|
|
// At this point you can also see why we initialize the diagonal
|
211 |
|
|
// with 1.
|
212 |
|
|
|
213 |
|
|
// 3. Update the values with the access vector (if any) in the
|
214 |
|
|
// following way: If access[i] is set, the values in row i
|
215 |
|
|
// are set to 0. Similarly, the values in column i are set
|
216 |
|
|
// to 1.
|
217 |
|
|
for (i = 0; i < NUMWAYS; i = i + 1) begin
|
218 |
|
|
if (access[i]) begin
|
219 |
|
|
for (j = 0; j < NUMWAYS; j = j + 1) begin
|
220 |
|
|
if (i != j) begin
|
221 |
|
|
expand[i][j] = 1'b0;
|
222 |
|
|
end
|
223 |
|
|
end
|
224 |
|
|
for (j = 0; j < NUMWAYS; j = j + 1) begin
|
225 |
|
|
if (i != j) begin
|
226 |
|
|
expand[j][i] = 1'b1;
|
227 |
|
|
end
|
228 |
|
|
end
|
229 |
|
|
end
|
230 |
|
|
end // for (i = 0; i < NUMWAYS; i = i + 1)
|
231 |
|
|
|
232 |
|
|
// Again this becomes obvious when you see what we do here.
|
233 |
|
|
// Accessing way 2 leads means now
|
234 |
|
|
//
|
235 |
|
|
// (0<2) = (1<2) = (3<2) = 1, and
|
236 |
|
|
// (2<0) = (2<1) = (2<3) = 0
|
237 |
|
|
//
|
238 |
|
|
// The matrix changes accordingly
|
239 |
|
|
//
|
240 |
|
|
// 0 1 2 3 0 1 2 3
|
241 |
|
|
// 0 1 0 0 1 0 1 0 1 1
|
242 |
|
|
// 1 1 1 0 1 => 1 1 1 1 1
|
243 |
|
|
// 2 1 1 1 1 2 0 0 1 0
|
244 |
|
|
// 3 0 0 0 1 3 0 0 1 1
|
245 |
|
|
|
246 |
|
|
// 4. The update vector of the lru history is then generated by
|
247 |
|
|
// copying the upper half of the matrix back.
|
248 |
|
|
offset = 0;
|
249 |
|
|
for (i = 0; i < NUMWAYS; i = i + 1) begin
|
250 |
|
|
for (j = i + 1; j < NUMWAYS; j = j + 1) begin
|
251 |
|
|
update[offset+j-i-1] = expand[i][j];
|
252 |
|
|
end
|
253 |
|
|
offset = offset + NUMWAYS - i - 1;
|
254 |
|
|
end
|
255 |
|
|
|
256 |
|
|
// This is the opposite operation of step 1 and is clear now.
|
257 |
|
|
// Update becomes:
|
258 |
|
|
//
|
259 |
|
|
// update = 6'b011110
|
260 |
|
|
//
|
261 |
|
|
// This is translated to
|
262 |
|
|
//
|
263 |
|
|
// 0<1 0<2 0<3 1<2 1<3 2<3
|
264 |
|
|
// 0 1 1 1 1 0
|
265 |
|
|
//
|
266 |
|
|
// which is: 2>3>0>1, which is what we expected.
|
267 |
|
|
|
268 |
|
|
// 5. The LRU_post vector is the vector of the ANDs of each row.
|
269 |
|
|
for (i = 0; i < NUMWAYS; i = i + 1) begin
|
270 |
|
|
lru_post[i] = &expand[i];
|
271 |
|
|
end
|
272 |
|
|
|
273 |
|
|
// This final step is equal to step 2 and also clear now.
|
274 |
|
|
//
|
275 |
|
|
// lru_post[1] = &expand[1] = 1'b1;
|
276 |
|
|
//
|
277 |
|
|
// lru_post = 4'b0010 is what we expected.
|
278 |
|
|
end
|
279 |
|
|
|
280 |
|
|
|
281 |
|
|
endmodule // mor1kx_dcache_lru
|