1 |
74 |
csantifort |
//////////////////////////////////////////////////////////////////
|
2 |
|
|
// //
|
3 |
|
|
// Barrel Shifter for Amber 2 Core //
|
4 |
|
|
// //
|
5 |
|
|
// The design is optimized for Altera family of FPGAs, //
|
6 |
|
|
// and it can be used directly or adapted other N-to-1 LUT //
|
7 |
|
|
// FPGA platforms. //
|
8 |
|
|
// //
|
9 |
|
|
// This file is part of the Amber project //
|
10 |
|
|
// http://www.opencores.org/project,amber //
|
11 |
|
|
// //
|
12 |
|
|
// Description //
|
13 |
|
|
// Provides 32-bit shifts LSL, LSR, ASR and ROR //
|
14 |
|
|
// //
|
15 |
|
|
// Author(s): //
|
16 |
|
|
// - Dmitry Tarnyagin, dmitry.tarnyagin@lockless.no //
|
17 |
|
|
// //
|
18 |
|
|
//////////////////////////////////////////////////////////////////
|
19 |
|
|
// //
|
20 |
|
|
// Copyright (C) 2010-2013 Authors and OPENCORES.ORG //
|
21 |
|
|
// //
|
22 |
|
|
// This source file may be used and distributed without //
|
23 |
|
|
// restriction provided that this copyright statement is not //
|
24 |
|
|
// removed from the file and that any derivative work contains //
|
25 |
|
|
// the original copyright notice and the associated disclaimer. //
|
26 |
|
|
// //
|
27 |
|
|
// This source file is free software; you can redistribute it //
|
28 |
|
|
// and/or modify it under the terms of the GNU Lesser General //
|
29 |
|
|
// Public License as published by the Free Software Foundation; //
|
30 |
|
|
// either version 2.1 of the License, or (at your option) any //
|
31 |
|
|
// later version. //
|
32 |
|
|
// //
|
33 |
|
|
// This source is distributed in the hope that it will be //
|
34 |
|
|
// useful, but WITHOUT ANY WARRANTY; without even the implied //
|
35 |
|
|
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR //
|
36 |
|
|
// PURPOSE. See the GNU Lesser General Public License for more //
|
37 |
|
|
// details. //
|
38 |
|
|
// //
|
39 |
|
|
// You should have received a copy of the GNU Lesser General //
|
40 |
|
|
// Public License along with this source; if not, download it //
|
41 |
|
|
// from http://www.opencores.org/lgpl.shtml //
|
42 |
|
|
// //
|
43 |
|
|
//////////////////////////////////////////////////////////////////
|
44 |
|
|
|
45 |
|
|
|
46 |
|
|
module a23_barrel_shift_fpga (
|
47 |
|
|
|
48 |
|
|
input [31:0] i_in,
|
49 |
|
|
input i_carry_in,
|
50 |
|
|
input [7:0] i_shift_amount, // uses 8 LSBs of Rs, or a 5 bit immediate constant
|
51 |
|
|
input i_shift_imm_zero, // high when immediate shift value of zero selected
|
52 |
|
|
input [1:0] i_function,
|
53 |
|
|
|
54 |
|
|
output [31:0] o_out,
|
55 |
|
|
output o_carry_out
|
56 |
|
|
|
57 |
|
|
);
|
58 |
|
|
|
59 |
82 |
csantifort |
`include "a23_localparams.vh"
|
60 |
74 |
csantifort |
|
61 |
|
|
wire [31:0] rot_prod; // Input rotated by the shift amount
|
62 |
|
|
|
63 |
|
|
wire [1:0] lsl_out; // LSL: {carry, bit_31}
|
64 |
|
|
wire [1:0] lsr_out; // LSR: {carry, bit_31}
|
65 |
|
|
wire [1:0] asr_out; // ASR: {carry, bit_31}
|
66 |
|
|
wire [1:0] ror_out; // ROR: {carry, bit_31}
|
67 |
|
|
|
68 |
|
|
reg [32:0] lsl_mask; // Left-hand mask
|
69 |
|
|
reg [32:0] lsr_mask; // Right-hand mask
|
70 |
|
|
reg [15:0] low_mask; // Mask calculation helper
|
71 |
|
|
|
72 |
|
|
reg [4:0] shift_amount; // Shift amount for the low-level shifter
|
73 |
|
|
|
74 |
|
|
reg [2:0] lsl_selector; // Left shift {shift_32, shift_over, shift_amount[4]}
|
75 |
|
|
reg [2:0] lsr_selector; // Right shift {shift_32, shift_over, shift_amount[4]}
|
76 |
|
|
reg [3:0] low_selector; // {shift_amount[3:0]}
|
77 |
|
|
|
78 |
|
|
reg shift_nzero; // Amount is not zero
|
79 |
|
|
reg shift_over; // Amount is 32 or higher
|
80 |
|
|
reg shift_32; // Amount is exactly 32
|
81 |
|
|
reg asr_sign; // Sign for ASR shift
|
82 |
|
|
reg direction; // Shift direction
|
83 |
|
|
|
84 |
|
|
wire [31:0] p_r; // 1 bit rotated rot_prod
|
85 |
|
|
wire [31:0] p_l; // Alias for the rot_prod
|
86 |
|
|
|
87 |
|
|
|
88 |
|
|
// Implementation details:
|
89 |
|
|
// Design is based on masking of rotated input by a left- and right- hand masks.
|
90 |
|
|
// Rotated product calculation requires 5 levels of combinational logic, and masks
|
91 |
|
|
// must be ready before the product is ready. In fact masks require just 3 to 4 levels
|
92 |
|
|
// of logic cells using 4-to-1/2x3-to-1 Altera.
|
93 |
|
|
|
94 |
|
|
always @*
|
95 |
|
|
begin
|
96 |
|
|
shift_32 = i_shift_amount == 32;
|
97 |
|
|
|
98 |
|
|
shift_over = |i_shift_amount[7:5];
|
99 |
|
|
|
100 |
|
|
shift_nzero = |i_shift_amount[7:0];
|
101 |
|
|
|
102 |
|
|
shift_amount = i_shift_amount[4:0];
|
103 |
|
|
|
104 |
|
|
if (i_shift_imm_zero) begin
|
105 |
|
|
if (i_function == LSR || i_function == ASR) begin
|
106 |
|
|
// The form of the shift field which might be
|
107 |
|
|
// expected to correspond to LSR #0 is used
|
108 |
|
|
// to encode LSR #32, which has a zero result
|
109 |
|
|
// with bit 31 of Rm as the carry output.
|
110 |
|
|
shift_nzero = 1'b1;
|
111 |
|
|
shift_over = 1'b1;
|
112 |
|
|
// Redundant and can be optimized out
|
113 |
|
|
// shift_32 = 1'b1;
|
114 |
|
|
end else if (i_function == ROR) begin
|
115 |
|
|
// RXR, (ROR w/ imm 0)
|
116 |
|
|
shift_amount[0] = 1'b1;
|
117 |
|
|
shift_nzero = 1'b1;
|
118 |
|
|
end
|
119 |
|
|
end
|
120 |
|
|
|
121 |
|
|
// LSB sub-selector calculation. Usually it is taken
|
122 |
|
|
// directly from the shift_amount, but ROR requires
|
123 |
|
|
// no masking at all.
|
124 |
|
|
case (i_function)
|
125 |
|
|
LSL: low_selector = shift_amount[3:0];
|
126 |
|
|
LSR: low_selector = shift_amount[3:0];
|
127 |
|
|
ASR: low_selector = shift_amount[3:0];
|
128 |
|
|
ROR: low_selector = 4'b0000;
|
129 |
|
|
endcase
|
130 |
|
|
|
131 |
|
|
// Left-hand MSB sub-selector calculation. Opaque for every function but LSL.
|
132 |
|
|
case (i_function)
|
133 |
|
|
LSL: lsl_selector = {shift_32, shift_over, shift_amount[4]};
|
134 |
|
|
LSR: lsl_selector = 3'b0_1_0; // Opaque mask selector
|
135 |
|
|
ASR: lsl_selector = 3'b0_1_0; // Opaque mask selector
|
136 |
|
|
ROR: lsl_selector = 3'b0_1_0; // Opaque mask selector
|
137 |
|
|
endcase
|
138 |
|
|
|
139 |
|
|
// Right-hand MSB sub-selector calculation. Opaque for LSL, transparent for ROR.
|
140 |
|
|
case (i_function)
|
141 |
|
|
LSL: lsr_selector = 3'b0_1_0; // Opaque mask selector
|
142 |
|
|
LSR: lsr_selector = {shift_32, shift_over, shift_amount[4]};
|
143 |
|
|
ASR: lsr_selector = {shift_32, shift_over, shift_amount[4]};
|
144 |
|
|
ROR: lsr_selector = 3'b0_0_0; // Transparent mask selector
|
145 |
|
|
endcase
|
146 |
|
|
|
147 |
|
|
// Direction
|
148 |
|
|
case (i_function)
|
149 |
|
|
LSL: direction = 1'b0; // Left shift
|
150 |
|
|
LSR: direction = 1'b1; // Right shift
|
151 |
|
|
ASR: direction = 1'b1; // Right shift
|
152 |
|
|
ROR: direction = 1'b1; // Right shift
|
153 |
|
|
endcase
|
154 |
|
|
|
155 |
|
|
// Sign for ASR shift
|
156 |
|
|
asr_sign = 1'b0;
|
157 |
|
|
if (i_function == ASR && i_in[31])
|
158 |
|
|
asr_sign = 1'b1;
|
159 |
|
|
end
|
160 |
|
|
|
161 |
|
|
// Generic rotate. Theoretical cost: 32x5 4-to-1 LUTs.
|
162 |
|
|
// Practically a bit higher due to high fanout of "direction".
|
163 |
|
|
generate
|
164 |
|
|
genvar i, j;
|
165 |
|
|
for (i = 0; i < 5; i = i + 1)
|
166 |
|
|
begin : netgen
|
167 |
|
|
wire [31:0] in;
|
168 |
|
|
reg [31:0] out;
|
169 |
|
|
for (j = 0; j < 32; j = j + 1)
|
170 |
|
|
begin : net
|
171 |
|
|
always @*
|
172 |
|
|
out[j] = in[j] & (~shift_amount[i] ^ direction) |
|
173 |
|
|
in[wrap(j, i)] & (shift_amount[i] ^ direction);
|
174 |
|
|
end
|
175 |
|
|
end
|
176 |
|
|
|
177 |
|
|
// Order is reverted with respect to volatile shift_amount[0]
|
178 |
|
|
assign netgen[4].in = i_in;
|
179 |
|
|
for (i = 1; i < 5; i = i + 1)
|
180 |
|
|
begin : router
|
181 |
|
|
assign netgen[i-1].in = netgen[i].out;
|
182 |
|
|
end
|
183 |
|
|
endgenerate
|
184 |
|
|
|
185 |
|
|
// Aliasing
|
186 |
|
|
assign rot_prod = netgen[0].out;
|
187 |
|
|
|
188 |
|
|
// Submask calculated from LSB sub-selector.
|
189 |
|
|
// Cost: 16 4-to-1 LUTs.
|
190 |
|
|
always @*
|
191 |
|
|
case (low_selector) // synthesis full_case parallel_case
|
192 |
|
|
4'b0000: low_mask = 16'hffff;
|
193 |
|
|
4'b0001: low_mask = 16'hfffe;
|
194 |
|
|
4'b0010: low_mask = 16'hfffc;
|
195 |
|
|
4'b0011: low_mask = 16'hfff8;
|
196 |
|
|
4'b0100: low_mask = 16'hfff0;
|
197 |
|
|
4'b0101: low_mask = 16'hffe0;
|
198 |
|
|
4'b0110: low_mask = 16'hffc0;
|
199 |
|
|
4'b0111: low_mask = 16'hff80;
|
200 |
|
|
4'b1000: low_mask = 16'hff00;
|
201 |
|
|
4'b1001: low_mask = 16'hfe00;
|
202 |
|
|
4'b1010: low_mask = 16'hfc00;
|
203 |
|
|
4'b1011: low_mask = 16'hf800;
|
204 |
|
|
4'b1100: low_mask = 16'hf000;
|
205 |
|
|
4'b1101: low_mask = 16'he000;
|
206 |
|
|
4'b1110: low_mask = 16'hc000;
|
207 |
|
|
4'b1111: low_mask = 16'h8000;
|
208 |
|
|
endcase
|
209 |
|
|
|
210 |
|
|
// Left-hand mask calculation.
|
211 |
|
|
// Cost: 33 4-to-1 LUTs.
|
212 |
|
|
always @*
|
213 |
|
|
casez (lsl_selector) // synthesis full_case parallel_case
|
214 |
|
|
7'b1??: lsl_mask = 33'h_1_0000_0000;
|
215 |
|
|
7'b01?: lsl_mask = 33'h_0_0000_0000;
|
216 |
|
|
7'b001: lsl_mask = { 1'h_1, low_mask, 16'h_0000};
|
217 |
|
|
7'b000: lsl_mask = {17'h_1_ffff, low_mask};
|
218 |
|
|
endcase
|
219 |
|
|
|
220 |
|
|
// Right-hand mask calculation.
|
221 |
|
|
// Cost: 33 4-to-1 LUTs.
|
222 |
|
|
always @*
|
223 |
|
|
casez (lsr_selector) // synthesis full_case parallel_case
|
224 |
|
|
7'b1??: lsr_mask = 33'h_1_0000_0000;
|
225 |
|
|
7'b01?: lsr_mask = 33'h_0_0000_0000;
|
226 |
|
|
7'b000: lsr_mask = { 1'h_1, bit_swap(low_mask), 16'h_ffff};
|
227 |
|
|
7'b001: lsr_mask = {17'h_1_0000, bit_swap(low_mask)};
|
228 |
|
|
endcase
|
229 |
|
|
|
230 |
|
|
// Alias: right-rotated
|
231 |
|
|
assign p_r = {rot_prod[30:0], rot_prod[31]};
|
232 |
|
|
|
233 |
|
|
// Alias: left-rotated
|
234 |
|
|
assign p_l = rot_prod[31:0];
|
235 |
|
|
|
236 |
|
|
// ROR MSB, handling special cases
|
237 |
|
|
assign ror_out[0] = i_shift_imm_zero ? i_carry_in :
|
238 |
|
|
p_r[31];
|
239 |
|
|
|
240 |
|
|
// ROR carry, handling special cases
|
241 |
|
|
assign ror_out[1] = i_shift_imm_zero ? i_in[0] :
|
242 |
|
|
shift_nzero ? p_r[31] :
|
243 |
|
|
i_carry_in;
|
244 |
|
|
|
245 |
|
|
// LSL MSB
|
246 |
|
|
assign lsl_out[0] = p_l[31] & lsl_mask[31];
|
247 |
|
|
|
248 |
|
|
// LSL carry, handling special cases
|
249 |
|
|
assign lsl_out[1] = shift_nzero ? p_l[0] & lsl_mask[32]:
|
250 |
|
|
i_carry_in;
|
251 |
|
|
|
252 |
|
|
// LSR MSB
|
253 |
|
|
assign lsr_out[0] = p_r[31] & lsr_mask[31];
|
254 |
|
|
|
255 |
|
|
// LSR carry, handling special cases
|
256 |
|
|
assign lsr_out[1] = i_shift_imm_zero ? i_in[31] :
|
257 |
|
|
shift_nzero ? p_r[31] & lsr_mask[32]:
|
258 |
|
|
i_carry_in;
|
259 |
|
|
|
260 |
|
|
// ASR MSB
|
261 |
|
|
assign asr_out[0] = i_in[31] ? i_in[31] :
|
262 |
|
|
p_r[31] & lsr_mask[31] ;
|
263 |
|
|
|
264 |
|
|
// LSR carry, handling special cases
|
265 |
|
|
assign asr_out[1] = shift_over ? i_in[31] :
|
266 |
|
|
shift_nzero ? p_r[31] :
|
267 |
|
|
i_carry_in;
|
268 |
|
|
|
269 |
|
|
// Carry and MSB are calculated as above
|
270 |
|
|
assign {o_carry_out, o_out[31]} = i_function == LSL ? lsl_out :
|
271 |
|
|
i_function == LSR ? lsr_out :
|
272 |
|
|
i_function == ASR ? asr_out :
|
273 |
|
|
ror_out ;
|
274 |
|
|
|
275 |
|
|
// And the rest of result is the masked rotated input.
|
276 |
|
|
assign o_out[30:0] = (p_l[30:0] & lsl_mask[30:0]) |
|
277 |
|
|
(p_r[30:0] & lsr_mask[30:0]) |
|
278 |
|
|
(~lsr_mask[30:0] & {31{asr_sign}});
|
279 |
|
|
|
280 |
|
|
// Rotate: calculate bit pos for level "level" and offset "pos"
|
281 |
|
|
function [4:0] wrap;
|
282 |
|
|
input integer pos;
|
283 |
|
|
input integer level;
|
284 |
|
|
integer out;
|
285 |
|
|
begin
|
286 |
|
|
out = pos - (1 << level);
|
287 |
|
|
wrap = out[4:0];
|
288 |
|
|
end
|
289 |
|
|
endfunction
|
290 |
|
|
|
291 |
|
|
// Swap bits in the input 16-bit value
|
292 |
|
|
function [15:0] bit_swap;
|
293 |
|
|
input [15:0] value;
|
294 |
|
|
integer i;
|
295 |
|
|
begin
|
296 |
|
|
for (i = 0; i < 16; i = i + 1)
|
297 |
|
|
bit_swap[i] = value[15 - i];
|
298 |
|
|
end
|
299 |
|
|
endfunction
|
300 |
|
|
|
301 |
|
|
endmodule
|