1 |
9 |
Agner |
//////////////////////////////////////////////////////////////////////////////////
|
2 |
|
|
// Engineer: Agner Fog
|
3 |
|
|
//
|
4 |
|
|
// Create Date: 2020-06-13
|
5 |
|
|
// Last modified: 2021-08-03
|
6 |
|
|
// Module Name: subfunctions
|
7 |
|
|
// Project Name: ForwardCom soft core
|
8 |
|
|
// Target Devices: Artix 7
|
9 |
|
|
// Tool Versions: Vivado v. 2020.1
|
10 |
|
|
// License: CERN-OHL-W v. 2 or later
|
11 |
|
|
// Description: Subfunctions for calculations:
|
12 |
|
|
// bitscan: find highest set bit
|
13 |
|
|
// popcount: count number of 1-bits
|
14 |
|
|
// reversebits: reverse order of bits
|
15 |
|
|
// truth_table_lookup: 3-input truth table
|
16 |
|
|
//////////////////////////////////////////////////////////////////////////////////
|
17 |
|
|
`include "defines.vh"
|
18 |
|
|
|
19 |
|
|
// 6-input popcount, fits into 6-input LUT.
|
20 |
|
|
function [2:0] popcount6;
|
21 |
|
|
input [5:0] inp;
|
22 |
|
|
integer sum;
|
23 |
|
|
sum = 0;
|
24 |
|
|
for (integer k = 0; k < 6; k ++) begin
|
25 |
|
|
sum += {2'b00, inp[k]};
|
26 |
|
|
end
|
27 |
|
|
return sum;
|
28 |
|
|
endfunction
|
29 |
|
|
|
30 |
|
|
// 32 input popcount
|
31 |
|
|
function [5:0] popcount32;
|
32 |
|
|
input [31:0] inp;
|
33 |
|
|
logic[5:0] sum;
|
34 |
|
|
sum = 0;
|
35 |
|
|
for (integer j = 0; j < 5; j++) begin
|
36 |
|
|
sum += popcount6(inp[(j*6)+:6]);
|
37 |
|
|
end
|
38 |
|
|
sum += popcount6({4'b0,inp[31:30]});
|
39 |
|
|
return sum;
|
40 |
|
|
endfunction
|
41 |
|
|
|
42 |
|
|
// 64 input popcount
|
43 |
|
|
function [6:0] popcount64;
|
44 |
|
|
input [63:0] inp;
|
45 |
|
|
logic[6:0] sum;
|
46 |
|
|
sum = 0;
|
47 |
|
|
for (integer j = 0; j < 10; j++) begin
|
48 |
|
|
sum += popcount6(inp[(j*6)+:6]);
|
49 |
|
|
end
|
50 |
|
|
sum += popcount6({2'b0,inp[63:60]});
|
51 |
|
|
return sum;
|
52 |
|
|
endfunction
|
53 |
|
|
|
54 |
|
|
// 64 input bit scan
|
55 |
|
|
// (also known as leading zero counter or priority encoder)
|
56 |
|
|
// return value:
|
57 |
|
|
// bitscan64[6:1] is an index to the highest 1-bit in the input
|
58 |
|
|
// bitscan64[0] is 1 if all input bits are zero
|
59 |
|
|
function [6:0] bitscan64A;
|
60 |
|
|
input [63:0] m0; // 64 bits input
|
61 |
|
|
logic [5:0] r; // index to highest 1-bit
|
62 |
|
|
logic iszero; // indicates that input is zero
|
63 |
|
|
|
64 |
|
|
logic [15:0] m1; // subdivision
|
65 |
|
|
logic [3:0] m2; // subdivision
|
66 |
|
|
r = 0;
|
67 |
|
|
|
68 |
|
|
// divide into four blocks of 16 bits each
|
69 |
|
|
if (|m0[63:48]) begin
|
70 |
|
|
r[5:4] = 3; // r[5:4] indicates which 16-bit block contains the highest 1-bit
|
71 |
|
|
m1 = m0[63:48]; // m1 is the 16-bit block that contains the highest 1-bit
|
72 |
|
|
end else if (|m0[47:32]) begin
|
73 |
|
|
r[5:4] = 2;
|
74 |
|
|
m1 = m0[47:32];
|
75 |
|
|
end else if (|m0[31:16]) begin
|
76 |
|
|
r[5:4] = 1;
|
77 |
|
|
m1 = m0[31:16];
|
78 |
|
|
end else begin
|
79 |
|
|
r[5:4] = 0;
|
80 |
|
|
m1 = m0[15:0];
|
81 |
|
|
end
|
82 |
|
|
|
83 |
|
|
// now subdivide m1 into four blocks of 4 bits each
|
84 |
|
|
if (|m1[15:12]) begin
|
85 |
|
|
r[3:2] = 3; // r[3:2] indicates which 4-bit block of m1 contains the highest 1-bit
|
86 |
|
|
m2 = m1[15:12]; // m2 is the 4-bit block that contains the highest 1-bit
|
87 |
|
|
end else if (|m1[11:8]) begin
|
88 |
|
|
m2 = m1[11:8];
|
89 |
|
|
r[3:2] = 2;
|
90 |
|
|
end else if (|m1[7:4]) begin
|
91 |
|
|
m2 = m1[7:4];
|
92 |
|
|
r[3:2] = 1;
|
93 |
|
|
end else begin
|
94 |
|
|
m2 = m1[3:0];
|
95 |
|
|
r[3:2] = 0;
|
96 |
|
|
end
|
97 |
|
|
|
98 |
|
|
// finally, test each of the four bits in m2
|
99 |
|
|
if (m2[3]) r[1:0] = 3; // r[1:0] indicates which of the 4 bit bits in m2 contains the highest 1-bit
|
100 |
|
|
else if (m2[2]) r[1:0] = 2;
|
101 |
|
|
else if (m2[1]) r[1:0] = 1;
|
102 |
|
|
else r[1:0] = 0;
|
103 |
|
|
|
104 |
|
|
// test if everything is zero
|
105 |
|
|
iszero = ~|m2;
|
106 |
|
|
|
107 |
|
|
// return two values
|
108 |
|
|
return {r, iszero};
|
109 |
|
|
endfunction
|
110 |
|
|
|
111 |
|
|
|
112 |
|
|
// 64 input bit scan, alternative implementation
|
113 |
|
|
// (this one is slightly slower)
|
114 |
|
|
// return value:
|
115 |
|
|
// bitscan64[6:1] is an index to the highest 1-bit in the input
|
116 |
|
|
// bitscan64[0] is 1 if all input bits are zero
|
117 |
|
|
function [6:0] bitscan64B;
|
118 |
|
|
input [63:0] m0; // 64 bits input
|
119 |
|
|
logic [5:0] r; // index to highest 1-bit
|
120 |
|
|
logic iszero; // indicates that input is zero
|
121 |
|
|
logic [3:0] m1; // subdivision flags
|
122 |
|
|
logic [3:0] m2; // subdivision
|
123 |
|
|
r = 0;
|
124 |
|
|
|
125 |
|
|
if (|m0[63:48]) begin
|
126 |
|
|
r[5:4] = 3;
|
127 |
|
|
m1[3] = |m0[63:60];
|
128 |
|
|
m1[2] = |m0[59:56];
|
129 |
|
|
m1[1] = |m0[55:52];
|
130 |
|
|
m1[0] = |m0[51:48];
|
131 |
|
|
|
132 |
|
|
end else if (|m0[47:32]) begin
|
133 |
|
|
r[5:4] = 2;
|
134 |
|
|
m1[3] = |m0[47:44];
|
135 |
|
|
m1[2] = |m0[43:40];
|
136 |
|
|
m1[1] = |m0[39:36];
|
137 |
|
|
m1[0] = |m0[35:32];
|
138 |
|
|
|
139 |
|
|
end else if (|m0[31:16]) begin
|
140 |
|
|
r[5:4] = 1;
|
141 |
|
|
m1[3] = |m0[31:28];
|
142 |
|
|
m1[2] = |m0[27:24];
|
143 |
|
|
m1[1] = |m0[23:20];
|
144 |
|
|
m1[0] = |m0[19:16];
|
145 |
|
|
|
146 |
|
|
end else begin
|
147 |
|
|
r[5:4] = 0;
|
148 |
|
|
m1[3] = |m0[15:12];
|
149 |
|
|
m1[2] = |m0[11:8];
|
150 |
|
|
m1[1] = |m0[7:4];
|
151 |
|
|
m1[0] = |m0[3:0];
|
152 |
|
|
end
|
153 |
|
|
|
154 |
|
|
if (m1[3]) begin
|
155 |
|
|
r[3:2] = 3;
|
156 |
|
|
end else if (m1[2]) begin
|
157 |
|
|
r[3:2] = 2;
|
158 |
|
|
end else if (m1[1]) begin
|
159 |
|
|
r[3:2] = 1;
|
160 |
|
|
end else begin
|
161 |
|
|
r[3:2] = 0;
|
162 |
|
|
end
|
163 |
|
|
|
164 |
|
|
// extract the 4-bit block that contains the highest 1-bit
|
165 |
|
|
m2 = m0[{r[5:2],2'b0}+: 4];
|
166 |
|
|
|
167 |
|
|
if (m2[3]) r[1:0] = 3;
|
168 |
|
|
else if (m2[2]) r[1:0] = 2;
|
169 |
|
|
else if (m2[1]) r[1:0] = 1;
|
170 |
|
|
else r[1:0] = 0;
|
171 |
|
|
|
172 |
|
|
// test if everything is zero
|
173 |
|
|
iszero = ~|m2;
|
174 |
|
|
|
175 |
|
|
// return two values
|
176 |
|
|
return {r, iszero};
|
177 |
|
|
endfunction
|
178 |
|
|
|
179 |
|
|
|
180 |
|
|
// 64 input bit scan, alternative implementation
|
181 |
|
|
// (this one appears to be the fastest)
|
182 |
|
|
// return value:
|
183 |
|
|
// bitscan64[6:1] is an index to the highest 1-bit in the input
|
184 |
|
|
// bitscan64[0] is 1 if all input bits are zero
|
185 |
|
|
function [6:0] bitscan64C;
|
186 |
|
|
input [63:0] m0; // 64 bits input
|
187 |
|
|
logic [5:0] r; // index to highest 1-bit
|
188 |
|
|
logic iszero; // indicates that input is zero
|
189 |
|
|
logic [15:0] m1; // subdivision flags
|
190 |
|
|
logic [3:0] m2; // subdivision
|
191 |
|
|
logic [3:0] m3; // subdivision
|
192 |
|
|
r = 0;
|
193 |
|
|
|
194 |
|
|
m1[15] = |m0[63:60];
|
195 |
|
|
m1[14] = |m0[59:56];
|
196 |
|
|
m1[13] = |m0[55:52];
|
197 |
|
|
m1[12] = |m0[51:48];
|
198 |
|
|
m1[11] = |m0[47:44];
|
199 |
|
|
m1[10] = |m0[43:40];
|
200 |
|
|
m1[9] = |m0[39:36];
|
201 |
|
|
m1[8] = |m0[35:32];
|
202 |
|
|
m1[7] = |m0[31:28];
|
203 |
|
|
m1[6] = |m0[27:24];
|
204 |
|
|
m1[5] = |m0[23:20];
|
205 |
|
|
m1[4] = |m0[19:16];
|
206 |
|
|
m1[3] = |m0[15:12];
|
207 |
|
|
m1[2] = |m0[11:8];
|
208 |
|
|
m1[1] = |m0[7:4];
|
209 |
|
|
m1[0] = |m0[3:0];
|
210 |
|
|
|
211 |
|
|
m2[3] = |m1[15:12];
|
212 |
|
|
m2[2] = |m1[11:8];
|
213 |
|
|
m2[1] = |m1[7:4];
|
214 |
|
|
m2[1] = |m1[3:0];
|
215 |
|
|
|
216 |
|
|
if (m2[3]) begin
|
217 |
|
|
r[5:4] = 3;
|
218 |
|
|
if (m1[15]) r[3:2] = 3;
|
219 |
|
|
else if (m1[14]) r[3:2] = 2;
|
220 |
|
|
else if (m1[13]) r[3:2] = 1;
|
221 |
|
|
else r[3:2] = 0;
|
222 |
|
|
|
223 |
|
|
end else if (m2[2]) begin
|
224 |
|
|
r[5:4] = 2;
|
225 |
|
|
if (m1[11]) r[3:2] = 3;
|
226 |
|
|
else if (m1[10]) r[3:2] = 2;
|
227 |
|
|
else if (m1[9]) r[3:2] = 1;
|
228 |
|
|
else r[3:2] = 0;
|
229 |
|
|
|
230 |
|
|
end else if (m2[1]) begin
|
231 |
|
|
r[5:4] = 1;
|
232 |
|
|
if (m1[7]) r[3:2] = 3;
|
233 |
|
|
else if (m1[6]) r[3:2] = 2;
|
234 |
|
|
else if (m1[5]) r[3:2] = 1;
|
235 |
|
|
else r[3:2] = 0;
|
236 |
|
|
|
237 |
|
|
end else begin
|
238 |
|
|
r[5:4] = 0;
|
239 |
|
|
if (m1[3]) r[3:2] = 3;
|
240 |
|
|
else if (m1[2]) r[3:2] = 2;
|
241 |
|
|
else if (m1[1]) r[3:2] = 1;
|
242 |
|
|
else r[3:2] = 0;
|
243 |
|
|
|
244 |
|
|
end
|
245 |
|
|
|
246 |
|
|
// extract the 4-bit block that contains the highest 1-bit
|
247 |
|
|
m3 = m0[{r[5:2],2'b0}+: 4];
|
248 |
|
|
|
249 |
|
|
if (m3[3]) r[1:0] = 3;
|
250 |
|
|
else if (m3[2]) r[1:0] = 2;
|
251 |
|
|
else if (m3[1]) r[1:0] = 1;
|
252 |
|
|
else r[1:0] = 0;
|
253 |
|
|
|
254 |
|
|
// test if everything is zero
|
255 |
|
|
iszero = ~|m2;
|
256 |
|
|
|
257 |
|
|
// return two values
|
258 |
|
|
return {r, iszero};
|
259 |
|
|
endfunction
|
260 |
|
|
|
261 |
|
|
|
262 |
|
|
// This function finds the index to a single bit in a 64-bit input
|
263 |
|
|
// where only one bit is set. Used when bitscan relies on the output of roundp2
|
264 |
|
|
// Use the formula b = a & ~(a-1) to isolate the lowest set bit before
|
265 |
|
|
// calling bitindex. Reverse the order of the bits to find the highest set bit.
|
266 |
|
|
// The return value is {r, iszero} where r is the position of the single 1-bit,
|
267 |
|
|
// iszero is 1 if all input bits are zero.
|
268 |
|
|
// Note that this function does not work if more than one input bit is 1.
|
269 |
|
|
function [6:0] bitindex;
|
270 |
|
|
input [63:0] m0; // 64 bits input
|
271 |
|
|
logic [5:0] r; // index to highest 1-bit
|
272 |
|
|
logic iszero; // indicates that input is zero
|
273 |
|
|
|
274 |
|
|
logic [15:0] m2; // OR combination of groups of four bits
|
275 |
|
|
|
276 |
|
|
m2[15] = |m0[63:60];
|
277 |
|
|
m2[14] = |m0[59:56];
|
278 |
|
|
m2[13] = |m0[55:52];
|
279 |
|
|
m2[12] = |m0[51:48];
|
280 |
|
|
|
281 |
|
|
m2[11] = |m0[47:44];
|
282 |
|
|
m2[10] = |m0[43:40];
|
283 |
|
|
m2[9] = |m0[39:36];
|
284 |
|
|
m2[8] = |m0[35:32];
|
285 |
|
|
|
286 |
|
|
m2[7] = |m0[31:28];
|
287 |
|
|
m2[6] = |m0[27:24];
|
288 |
|
|
m2[5] = |m0[23:20];
|
289 |
|
|
m2[4] = |m0[19:16];
|
290 |
|
|
|
291 |
|
|
m2[3] = |m0[15:12];
|
292 |
|
|
m2[2] = |m0[11:8];
|
293 |
|
|
m2[1] = |m0[7:4];
|
294 |
|
|
m2[0] = 0;//|m0[3:0]; // not used
|
295 |
|
|
|
296 |
|
|
r[5] = m2[8]|m2[9]|m2[10]|m2[11]|m2[12]|m2[13]|m2[14]|m2[15];
|
297 |
|
|
r[4] = m2[4]|m2[5]|m2[6]|m2[7]|m2[12]|m2[13]|m2[14]|m2[15];
|
298 |
|
|
r[3] = m2[2]|m2[3]|m2[6]|m2[7]|m2[10]|m2[11]|m2[14]|m2[15];
|
299 |
|
|
r[2] = m2[1]|m2[3]|m2[5]|m2[7]|m2[9]|m2[11]|m2[13]|m2[15];
|
300 |
|
|
r[1] = m0[2]|m0[3]|m0[6]|m0[7]|m0[10]|m0[11]|m0[14]|m0[15]|
|
301 |
|
|
m0[18]|m0[19]|m0[22]|m0[23]|m0[26]|m0[27]|m0[30]|m0[31]|
|
302 |
|
|
m0[34]|m0[35]|m0[38]|m0[39]|m0[42]|m0[43]|m0[46]|m0[47]|
|
303 |
|
|
m0[50]|m0[51]|m0[54]|m0[55]|m0[58]|m0[59]|m0[62]|m0[63];
|
304 |
|
|
r[0] = m0[1]|m0[3]|m0[5]|m0[7]|m0[9]|m0[11]|m0[13]|m0[15]|
|
305 |
|
|
m0[17]|m0[19]|m0[21]|m0[23]|m0[25]|m0[27]|m0[29]|m0[31]|
|
306 |
|
|
m0[33]|m0[35]|m0[37]|m0[39]|m0[41]|m0[43]|m0[45]|m0[47]|
|
307 |
|
|
m0[49]|m0[51]|m0[53]|m0[55]|m0[57]|m0[59]|m0[61]|m0[63];
|
308 |
|
|
|
309 |
|
|
iszero = (~|r) && ~(m0[0]);
|
310 |
|
|
|
311 |
|
|
// return two values
|
312 |
|
|
return {r, iszero};
|
313 |
|
|
endfunction
|
314 |
|
|
|
315 |
|
|
|
316 |
|
|
// reverse order of bits
|
317 |
|
|
function [7:0] reversebits8;
|
318 |
|
|
input [7:0] in; // 8 bits input
|
319 |
|
|
return {in[0],in[1],in[2],in[3],in[4],in[5],in[6],in[7]};
|
320 |
|
|
endfunction
|
321 |
|
|
|
322 |
|
|
// reverse order of bits
|
323 |
|
|
function [15:0] reversebits16;
|
324 |
|
|
input [15:0] in; // 16 bits input
|
325 |
|
|
return {reversebits8(in[7:0]),reversebits8(in[15:8])};
|
326 |
|
|
endfunction
|
327 |
|
|
|
328 |
|
|
// reverse order of bits
|
329 |
|
|
function [31:0] reversebits32;
|
330 |
|
|
input [31:0] in; // 32 bits input
|
331 |
|
|
return {reversebits8(in[7:0]),reversebits8(in[15:8]),reversebits8(in[23:16]),reversebits8(in[31:24])};
|
332 |
|
|
endfunction
|
333 |
|
|
|
334 |
|
|
// reverse order of bits
|
335 |
|
|
function [63:0] reversebits64;
|
336 |
|
|
input [63:0] in; // 32 bits input
|
337 |
|
|
return {reversebits8(in[7:0]),reversebits8(in[15:8]),reversebits8(in[23:16]),reversebits8(in[31:24]),
|
338 |
|
|
reversebits8(in[39:32]),reversebits8(in[47:40]),reversebits8(in[55:48]),reversebits8(in[63:56])};
|
339 |
|
|
endfunction
|
340 |
|
|
|
341 |
|
|
// Truth table lookup with three inputs for truth_tab3 instruction
|
342 |
|
|
function [`RB1:0] truth_table_lookup;
|
343 |
|
|
input [`RB1:0] in1; // input 1
|
344 |
|
|
input [`RB1:0] in2; // input 2
|
345 |
|
|
input [`RB1:0] in3; // input 3
|
346 |
|
|
input [7:0] ttable; // 8 bit truth table
|
347 |
|
|
logic [`RB1:0] res; // result
|
348 |
|
|
for (integer k = 0; k < `RB; k++) begin // loop through bits
|
349 |
|
|
res[k] = ttable[{in3[k],in2[k],in1[k]}]; // lookup with 3 bits index
|
350 |
|
|
end
|
351 |
|
|
truth_table_lookup = res;// result
|
352 |
|
|
endfunction
|