1 |
2 |
hmanske |
------------------------------------------------------------------
|
2 |
4 |
hmanske |
-- PROJECT: HiCoVec (highly configurable vector processor)
|
3 |
2 |
hmanske |
--
|
4 |
|
|
-- ENTITY: shuffle
|
5 |
|
|
--
|
6 |
|
|
-- PURPOSE: shuffle vector registers
|
7 |
|
|
-- also required for vmov commands
|
8 |
|
|
--
|
9 |
|
|
-- AUTHOR: harald manske, haraldmanske@gmx.de
|
10 |
|
|
--
|
11 |
|
|
-- VERSION: 1.0
|
12 |
|
|
------------------------------------------------------------------
|
13 |
|
|
|
14 |
|
|
library ieee;
|
15 |
|
|
use ieee.std_logic_1164.all;
|
16 |
|
|
use ieee.std_logic_arith.all;
|
17 |
|
|
use ieee.numeric_std.all;
|
18 |
|
|
use ieee.std_logic_unsigned.all;
|
19 |
|
|
|
20 |
|
|
use work.cfg.all;
|
21 |
|
|
use work.datatypes.all;
|
22 |
|
|
|
23 |
|
|
entity shuffle is
|
24 |
|
|
port (
|
25 |
|
|
clk: in std_logic;
|
26 |
|
|
shuffle_go: in std_logic;
|
27 |
|
|
shuffle_valid: out std_logic;
|
28 |
|
|
data_in_v: in vectordata_type;
|
29 |
|
|
data_in_w: in vectordata_type;
|
30 |
|
|
vn: in std_logic_vector(7 downto 0);
|
31 |
|
|
ssss: in std_logic_vector(3 downto 0);
|
32 |
|
|
vwidth: in std_logic_vector(1 downto 0);
|
33 |
|
|
shuffle_out_sel: in std_logic_vector(1 downto 0);
|
34 |
|
|
data_out: out vectordata_type
|
35 |
|
|
);
|
36 |
|
|
end shuffle;
|
37 |
|
|
|
38 |
|
|
architecture rtl of shuffle is
|
39 |
|
|
constant unit_width: integer := max_shuffle_width / 4;
|
40 |
|
|
|
41 |
|
|
signal v, w, shuffle_output, output : std_logic_vector(32 * k -1 downto 0);
|
42 |
|
|
signal input, reg: std_logic_vector (max_shuffle_width -1 downto 0);
|
43 |
|
|
signal perm00, perm01, perm10, permutation : std_logic_vector (max_shuffle_width -1 downto 0);
|
44 |
|
|
signal perm00_rev, perm01_rev, perm10_rev, permutation_rev : std_logic_vector (max_shuffle_width -1 downto 0);
|
45 |
|
|
|
46 |
|
|
|
47 |
|
|
signal reg_input: std_logic_vector(unit_width -1 downto 0);
|
48 |
|
|
|
49 |
|
|
signal shift: std_logic;
|
50 |
|
|
signal source: std_logic;
|
51 |
|
|
signal sel: std_logic_vector(1 downto 0);
|
52 |
|
|
|
53 |
|
|
type statetype is (waiting, shuffle);
|
54 |
|
|
signal state : statetype := waiting;
|
55 |
|
|
signal nextstate : statetype := waiting;
|
56 |
|
|
|
57 |
|
|
signal counter: std_logic_vector(1 downto 0) := "00";
|
58 |
|
|
signal inc, reset: std_logic;
|
59 |
|
|
|
60 |
|
|
begin
|
61 |
|
|
-- convert input from array to std_logic_vector format
|
62 |
|
|
v_gen : for i in 0 to k-1 generate
|
63 |
|
|
v((i+1) * 32 -1 downto i * 32) <= data_in_v(i);
|
64 |
|
|
end generate v_gen;
|
65 |
|
|
|
66 |
|
|
-- perform shuffle command
|
67 |
|
|
shuffle_gen: if use_shuffle generate
|
68 |
|
|
w_gen : for i in 0 to k-1 generate
|
69 |
|
|
w((i+1) * 32 -1 downto i * 32) <= data_in_w(i);
|
70 |
|
|
end generate w_gen;
|
71 |
|
|
|
72 |
|
|
-- state register
|
73 |
|
|
process
|
74 |
|
|
begin
|
75 |
|
|
wait until clk ='1' and clk'event;
|
76 |
|
|
state <= nextstate;
|
77 |
|
|
end process;
|
78 |
|
|
|
79 |
|
|
-- state transitions
|
80 |
|
|
process (state, counter, shuffle_go)
|
81 |
|
|
begin
|
82 |
|
|
-- avoid latches
|
83 |
|
|
inc <= '0';
|
84 |
|
|
reset <= '0';
|
85 |
|
|
shift <= '0';
|
86 |
|
|
shuffle_valid <= '0';
|
87 |
|
|
|
88 |
|
|
case state is
|
89 |
|
|
-- WAITING STATE
|
90 |
|
|
when waiting =>
|
91 |
|
|
shuffle_valid <= '1';
|
92 |
|
|
reset <= '1';
|
93 |
|
|
|
94 |
|
|
if shuffle_go = '1' then
|
95 |
|
|
nextstate <= shuffle;
|
96 |
|
|
else
|
97 |
|
|
nextstate <= waiting;
|
98 |
|
|
end if;
|
99 |
|
|
|
100 |
|
|
-- SHUFFLE STATE
|
101 |
|
|
when shuffle =>
|
102 |
|
|
shift <= '1';
|
103 |
|
|
inc <= '1';
|
104 |
|
|
|
105 |
|
|
if counter = "11" then
|
106 |
|
|
nextstate <= waiting;
|
107 |
|
|
else
|
108 |
|
|
nextstate <= shuffle;
|
109 |
|
|
end if;
|
110 |
|
|
end case;
|
111 |
|
|
end process;
|
112 |
|
|
|
113 |
|
|
-- counter
|
114 |
|
|
process
|
115 |
|
|
begin
|
116 |
|
|
wait until clk ='1' and clk'event;
|
117 |
|
|
if reset = '1' then
|
118 |
|
|
counter <= (others => '0');
|
119 |
|
|
else
|
120 |
|
|
if inc = '1' then
|
121 |
|
|
counter <= counter + '1';
|
122 |
|
|
else
|
123 |
|
|
counter <= counter;
|
124 |
|
|
end if;
|
125 |
|
|
end if;
|
126 |
|
|
end process;
|
127 |
|
|
|
128 |
|
|
-- shift register
|
129 |
|
|
process
|
130 |
|
|
begin
|
131 |
|
|
wait until clk ='1' and clk'event;
|
132 |
|
|
if shift = '1' then
|
133 |
|
|
reg(max_shuffle_width - unit_width -1 downto 0) <= reg(max_shuffle_width -1 downto unit_width);
|
134 |
|
|
reg(max_shuffle_width -1 downto max_shuffle_width - unit_width ) <= reg_input;
|
135 |
|
|
else
|
136 |
|
|
reg <= reg;
|
137 |
|
|
end if;
|
138 |
|
|
end process;
|
139 |
|
|
|
140 |
|
|
-- multiplexer
|
141 |
|
|
reg_input <= permutation(1* unit_width -1 downto 0 * unit_width) when (sel = "00") else
|
142 |
|
|
permutation(2* unit_width -1 downto 1 * unit_width) when (sel = "01") else
|
143 |
|
|
permutation(3* unit_width -1 downto 2 * unit_width) when (sel = "10") else
|
144 |
|
|
permutation(4* unit_width- 1 downto 3 * unit_width);
|
145 |
|
|
|
146 |
|
|
-- sel
|
147 |
|
|
sel <= vn(7 downto 6) when (counter = "11") else
|
148 |
|
|
vn(5 downto 4) when (counter = "10") else
|
149 |
|
|
vn(3 downto 2) when (counter = "01") else
|
150 |
|
|
vn(1 downto 0);
|
151 |
|
|
|
152 |
|
|
--source
|
153 |
|
|
source <= ssss(3) when (counter = "11") else
|
154 |
|
|
ssss(2) when (counter = "10") else
|
155 |
|
|
ssss(1) when (counter = "01") else
|
156 |
|
|
ssss(0);
|
157 |
|
|
|
158 |
|
|
-- input multiplexer
|
159 |
|
|
input <= v(max_shuffle_width -1 downto 0) when source = '0' else
|
160 |
|
|
w(max_shuffle_width -1 downto 0);
|
161 |
|
|
|
162 |
|
|
|
163 |
|
|
-- permutations
|
164 |
|
|
permutation_gen : for i in 0 to 3 generate
|
165 |
|
|
|
166 |
|
|
perm_gen_10: for j in 0 to 1 generate
|
167 |
|
|
perm10((i*2+j+1) * unit_width/2 -1 downto (i*2+j)*unit_width/2)
|
168 |
|
|
<= input((j*4+i+1)* unit_width/2 -1 downto (j*4+i)* unit_width/2);
|
169 |
|
|
|
170 |
|
|
perm10_rev((j*4+i+1)* unit_width/2 -1 downto (j*4+i)* unit_width/2)
|
171 |
|
|
<= reg((i*2+j+1) * unit_width/2 -1 downto (i*2+j)*unit_width/2);
|
172 |
|
|
end generate;
|
173 |
|
|
|
174 |
|
|
perm_gen_01: for j in 0 to 3 generate
|
175 |
|
|
perm01((i*4+j+1) * unit_width/4 -1 downto (i*4+j)*unit_width/4)
|
176 |
|
|
<= input((j*4+i+1)* unit_width/4 -1 downto (j*4+i)* unit_width/4);
|
177 |
|
|
|
178 |
|
|
perm01_rev((j*4+i+1)* unit_width/4 -1 downto (j*4+i)* unit_width/4)
|
179 |
|
|
<= reg((i*4+j+1) * unit_width/4 -1 downto (i*4+j)*unit_width/4);
|
180 |
|
|
end generate;
|
181 |
|
|
|
182 |
|
|
perm_gen_00: for j in 0 to 7 generate
|
183 |
|
|
perm00((i*8+j+1) * unit_width/8 -1 downto (i*8+j)*unit_width/8)
|
184 |
|
|
<= input((j*4+i+1)* unit_width/8 -1 downto (j*4+i)* unit_width/8);
|
185 |
|
|
|
186 |
|
|
perm00_rev((j*4+i+1)* unit_width/8 -1 downto (j*4+i)* unit_width/8)
|
187 |
|
|
<= reg((i*8+j+1) * unit_width/8 -1 downto (i*8+j)*unit_width/8);
|
188 |
|
|
end generate;
|
189 |
|
|
|
190 |
|
|
end generate;
|
191 |
|
|
|
192 |
|
|
|
193 |
|
|
-- vwidth multiplexer
|
194 |
|
|
permutation <= input when (vwidth = "11") else
|
195 |
|
|
perm10 when (vwidth = "10") else
|
196 |
|
|
perm01 when (vwidth = "01") else
|
197 |
|
|
perm00;
|
198 |
|
|
|
199 |
|
|
permutation_rev <= reg when (vwidth = "11") else
|
200 |
|
|
perm10_rev when (vwidth = "10") else
|
201 |
|
|
perm01_rev when (vwidth = "01") else
|
202 |
|
|
perm00_rev;
|
203 |
|
|
|
204 |
|
|
|
205 |
|
|
-- output multiplexer
|
206 |
|
|
shuffle_output(max_shuffle_width -1 downto 0) <= permutation_rev(max_shuffle_width -1 downto 0);
|
207 |
|
|
|
208 |
|
|
greater_gen: if (k*32 > max_shuffle_width) generate
|
209 |
|
|
shuffle_output(k*32-1 downto max_shuffle_width) <= v(k*32-1 downto max_shuffle_width);
|
210 |
|
|
end generate greater_gen;
|
211 |
|
|
end generate;
|
212 |
|
|
|
213 |
|
|
-- move
|
214 |
|
|
not_shuffle_not_shift_gen: if ((not use_shuffle) and (not use_vectorshift)) generate
|
215 |
|
|
output <= v;
|
216 |
|
|
end generate;
|
217 |
|
|
|
218 |
|
|
-- move and shuffle
|
219 |
|
|
shuffle_not_shift_gen: if ((use_shuffle) and (not use_vectorshift)) generate
|
220 |
|
|
output <= shuffle_output when shuffle_out_sel(0) = '0' else v;
|
221 |
|
|
end generate;
|
222 |
|
|
|
223 |
|
|
-- move and vectorshift
|
224 |
|
|
not_shuffle_shift_gen: if ((not use_shuffle) and (use_vectorshift)) generate
|
225 |
|
|
output <= v(vectorshift_width -1 downto 0) & v(32*k-1 downto vectorshift_width) when shuffle_out_sel = "10" else
|
226 |
|
|
v(32*k-vectorshift_width-1 downto 0) & v(32*k-1 downto 32*k-vectorshift_width) when shuffle_out_sel = "11" else
|
227 |
|
|
v;
|
228 |
|
|
end generate;
|
229 |
|
|
|
230 |
|
|
-- move, shuffle and vectorshift
|
231 |
|
|
shuffle_shift_gen: if ((use_shuffle) and (use_vectorshift)) generate
|
232 |
|
|
output <= shuffle_output when shuffle_out_sel = "00" else
|
233 |
|
|
v when shuffle_out_sel = "01" else
|
234 |
|
|
v(vectorshift_width -1 downto 0) & v(32*k-1 downto vectorshift_width) when shuffle_out_sel = "10" else
|
235 |
|
|
v(32*k-vectorshift_width-1 downto 0) & v(32*k-1 downto 32*k-vectorshift_width);
|
236 |
|
|
end generate;
|
237 |
|
|
|
238 |
|
|
-- convert output from std_logic_vector in array format
|
239 |
|
|
out_gen : for i in 0 to k-1 generate
|
240 |
|
|
data_out(i) <= output((i+1)* 32 -1 downto i * 32);
|
241 |
|
|
end generate out_gen;
|
242 |
|
|
|
243 |
|
|
end rtl;
|