OpenCores
URL https://opencores.org/ocsvn/mod_sim_exp/mod_sim_exp/trunk

Subversion Repositories mod_sim_exp

[/] [mod_sim_exp/] [tags/] [start_version/] [rtl/] [vhdl/] [core/] [systolic_pipeline.vhd] - Blame information for rev 48

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 2 JonasDC
------------------------------------------------------------------------------------ 
2
--                      
3
-- Geoffrey Ottoy - DraMCo research group
4
--
5
-- Module Name: systolic_pipeline.vhd / entity systolic_pipeline
6
-- 
7
-- Last Modified:       05/01/2012 
8
-- 
9
-- Description:         pipelined systolic array implementation of a montgomery multiplier
10
--
11
--
12
-- Dependencies:        first_stage,
13
--                standard_stage,
14
--                last_stage,
15
--                stepping_control
16
--
17
-- Revision:
18
-- Revision 3.00 - Made x_selection external
19
-- Revision 2.02 - Changed design to cope with new stepping_control (next_x)
20
-- Revision 2.01 - Created an extra contant s (step size = n/t) to fix a problem
21
--                 that occured when t not = sqrt(n).
22
-- Revision 2.00 - Moved stepping logic and x_selection to seperate submodules
23
--      Revision 1.00 - Architecture
24
--      Revision 0.01 - File Created
25
--
26
--
27
------------------------------------------------------------------------------------
28
--
29
-- NOTICE:
30
--
31
-- Copyright DraMCo research group. 2011. This code may be contain portions patented
32
-- by other third parties!
33
--
34
------------------------------------------------------------------------------------
35
library IEEE;
36
use IEEE.STD_LOGIC_1164.ALL;
37
use IEEE.STD_LOGIC_ARITH.ALL;
38
use IEEE.STD_LOGIC_UNSIGNED.ALL;
39
 
40
---- Uncomment the following library declaration if instantiating
41
---- any Xilinx primitives in this code.
42
--library UNISIM;
43
--use UNISIM.VComponents.all;
44
 
45
-- p_sel: 
46
-- 01 = lower part
47
-- 10 = upper part
48
-- 11 = full range
49
 
50
entity systolic_pipeline is
51
        generic( n : integer := 1536; -- width of the operands (# bits)
52
                                t : integer := 192;     -- number of stages (divider of n) >= 2
53
                                tl: integer := 64
54
                                -- best take t = sqrt(n)
55
        );
56
   port(core_clk : in  STD_LOGIC;
57
                             my : in  STD_LOGIC_VECTOR((n) downto 0);
58
               y : in  STD_LOGIC_VECTOR((n-1) downto 0);
59
               m : in  STD_LOGIC_VECTOR((n-1) downto 0);
60
              xi : in  STD_LOGIC;
61
                          start : in  STD_LOGIC;
62
                          reset : in  STD_LOGIC;
63
                          p_sel : in  STD_LOGIC_VECTOR(1 downto 0); -- select which piece of the multiplier will be used
64
                          ready : out STD_LOGIC;
65
                         next_x : out STD_LOGIC;
66
               r : out STD_LOGIC_VECTOR((n+1) downto 0)
67
        );
68
end systolic_pipeline;
69
 
70
architecture Structural of systolic_pipeline is
71
 
72
        constant s : integer := n/t; -- defines the size of the stages (# bits)
73
        constant size_l : integer := s*tl;
74
        constant size_h : integer :=  n - size_l;
75
 
76
        component first_stage
77
        generic(width : integer := 4 -- must be the same as width of the standard stage
78
        );
79
        port(core_clk : in  STD_LOGIC;
80
                                  my : in  STD_LOGIC_VECTOR((width) downto 0);
81
                                        y : in  STD_LOGIC_VECTOR((width) downto 0);
82
                                        m : in  STD_LOGIC_VECTOR((width) downto 0);
83
                                 xin : in  STD_LOGIC;
84
                                xout : out STD_LOGIC;
85
                                qout : out STD_LOGIC;
86
                          a_msb : in  STD_LOGIC;
87
                                cout : out STD_LOGIC;
88
                          start : in  STD_LOGIC;
89
                          reset : in  STD_LOGIC;
90
                          --ready : out STD_LOGIC;
91
                           done : out STD_LOGIC;
92
                                        r : out STD_LOGIC_VECTOR((width-1) downto 0)
93
        );
94
        end component;
95
 
96
        component standard_stage
97
        generic(width : integer := 4
98
        );
99
   port(core_clk : in  STD_LOGIC;
100
                             my : in  STD_LOGIC_VECTOR((width-1) downto 0);
101
               y : in  STD_LOGIC_VECTOR((width-1) downto 0);
102
               m : in  STD_LOGIC_VECTOR((width-1) downto 0);
103
             xin : in  STD_LOGIC;
104
             qin : in  STD_LOGIC;
105
                           xout : out STD_LOGIC;
106
            qout : out STD_LOGIC;
107
                          a_msb : in  STD_LOGIC;
108
                            cin : in  STD_LOGIC;
109
                           cout : out STD_LOGIC;
110
                          start : in  STD_LOGIC;
111
                          reset : in  STD_LOGIC;
112
                         -- ready : out STD_LOGIC;
113
                           done : out STD_LOGIC;
114
               r : out STD_LOGIC_VECTOR((width-1) downto 0)
115
        );
116
        end component;
117
 
118
        component last_stage
119
        generic(width : integer := 4 -- must be the same as width of the standard stage
120
        );
121
   port(core_clk : in  STD_LOGIC;
122
                             my : in  STD_LOGIC_VECTOR((width-1) downto 0);
123
               y : in  STD_LOGIC_VECTOR((width-2) downto 0);
124
               m : in  STD_LOGIC_VECTOR((width-2) downto 0);
125
             xin : in  STD_LOGIC;
126
             qin : in  STD_LOGIC;
127
                            cin : in  STD_LOGIC;
128
                          start : in  STD_LOGIC;
129
                          reset : in  STD_LOGIC;
130
                        --  ready : out STD_LOGIC;
131
                        --   done : out STD_LOGIC;
132
               r : out STD_LOGIC_VECTOR((width+1) downto 0)
133
        );
134
        end component;
135
 
136
        component stepping_logic
137
        generic( n : integer := 16; -- max nr of steps required to complete a multiplication
138
                                t : integer := 4 -- total nr of steps in the pipeline
139
        );
140
   port(core_clk : in  STD_LOGIC;
141
                          start : in  STD_LOGIC;
142
                          reset : in  STD_LOGIC;
143
                          t_sel : in integer range 0 to t; -- nr of stages in the pipeline piece
144
                          n_sel : in integer range 0 to n; -- nr of steps required for a complete multiplication
145
                start_first_stage : out STD_LOGIC;
146
                stepping_done : out STD_LOGIC
147
        );
148
        end component;
149
 
150
        signal start_stage_i : std_logic_vector((t-1) downto 0);
151
        --signal stage_ready_i : std_logic_vector((t-1) downto 0);
152
        signal stage_done_i : std_logic_vector((t-2) downto 0);
153
 
154
        signal x_i : std_logic_vector((t-1) downto 0) := (others=>'0');
155
        signal q_i : std_logic_vector((t-2) downto 0) := (others=>'0');
156
        signal c_i : std_logic_vector((t-2) downto 0) := (others=>'0');
157
        signal a_i : std_logic_vector((n+1) downto 0) := (others=>'0');
158
        signal r_tot : std_logic_vector((n+1) downto 0) := (others=>'0');
159
        signal r_h : std_logic_vector(s-1 downto 0) := (others=>'0');
160
        signal r_l : std_logic_vector((s+1) downto 0) := (others=>'0');
161
        signal a_h : std_logic_vector((s*2)-1 downto 0) := (others=>'0');
162
        signal a_l : std_logic_vector((s*2)-1 downto 0) := (others=>'0');
163
 
164
        --signal ready_i : std_logic;
165
        signal stepping_done_i : std_logic;
166
        signal t_sel : integer range 0 to t := t;
167
        signal n_sel : integer range 0 to n := n;
168
        signal split : std_logic := '0';
169
        signal lower_e_i : std_logic := '0';
170
        signal higher_e_i : std_logic := '0';
171
        signal start_pulses_i : std_logic := '0';
172
        signal start_higher_i : std_logic := '0';
173
        signal higher_0_done_i : std_logic := '0';
174
        signal h_x_0, h_x_1 : std_logic := '0';
175
        signal h_q_0, h_q_1 : std_logic := '0';
176
        signal h_c_0, h_c_1 : std_logic := '0';
177
        signal x_offset_i : integer range 0 to tl*s := 0;
178
        signal next_x_i : std_logic := '0';
179
begin
180
 
181
        -- output mapping
182
        r <= a_i; -- mogelijks moet er nog een shift operatie gebeuren
183
        ready <= stepping_done_i;
184
 
185
        -- result feedback
186
        a_i((n+1) downto ((tl+1)*s)) <= r_tot((n+1) downto ((tl+1)*s));
187
        a_i(((tl-1)*s-1) downto 0) <= r_tot(((tl-1)*s-1) downto 0);
188
 
189
        a_l((s+1) downto 0) <= r_l;
190
        a_h((s*2)-1 downto s) <= r_h;
191
        with p_sel select
192
                a_i(((tl+1)*s-1) downto ((tl-1)*s)) <= a_l when "01",
193
                                                                                                                        a_h  when "10",
194
                                                                                                                        r_tot(((tl+1)*s-1) downto ((tl-1)*s)) when others;
195
 
196
 
197
        -- signals from x_selection
198
        next_x_i <= start_stage_i(1) or (start_stage_i(tl+1) and higher_e_i);
199
        --
200
        next_x <= next_x_i;
201
        x_i(0) <= xi;
202
 
203
        -- this module controls the pipeline operation
204
        with p_sel select
205
                t_sel <= tl when "01",
206
                         t-tl when "10",
207
                                        t when others;
208
 
209
        with p_sel select
210
                n_sel <= size_l-1 when "01",
211
                                        size_h-1 when "10",
212
                                        n-1 when others;
213
 
214
        with p_sel select
215
                lower_e_i <= '0' when "10",
216
                                                 '1' when others;
217
 
218
        with p_sel select
219
                higher_e_i <= '1' when "10",
220
                                                 '0' when others;
221
 
222
        split <= p_sel(0) and p_sel(1);
223
 
224
 
225
        stepping_control: stepping_logic
226
        generic map( n => n, -- max nr of steps required to complete a multiplication
227
                                t => t -- total nr of steps in the pipeline
228
        )
229
   port map(core_clk => core_clk,
230
                          start => start,
231
                          reset => reset,
232
                          t_sel => t_sel,
233
                          n_sel => n_sel,
234
                start_first_stage => start_pulses_i,
235
                stepping_done => stepping_done_i
236
        );
237
 
238
        -- start signals for first stage of lower and higher part
239
        start_stage_i(0) <= start_pulses_i and lower_e_i;
240
        start_higher_i <= start_pulses_i and (higher_e_i and not split);
241
 
242
        -- start signals for stage tl and tl+1 (full pipeline operation)
243
        start_stage_i(tl) <= stage_done_i(tl-1) and split;
244
        start_stage_i(tl+1) <= stage_done_i(tl) or higher_0_done_i;
245
 
246
        -- nothing special here, previous stages starts the next
247
        start_signals_l: for i in 1 to tl-1 generate
248
                        start_stage_i(i) <= stage_done_i(i-1);
249
        end generate;
250
        start_signals_h: for i in tl+2 to t-1 generate
251
                        start_stage_i(i) <= stage_done_i(i-1);
252
        end generate;
253
 
254
        stage_0: first_stage
255
        generic map(width => s
256
        )
257
        port map(core_clk => core_clk,
258
                                  my => my(s downto 0),
259
                                        y => y(s downto 0),
260
                                        m => m(s downto 0),
261
                                 xin => x_i(0),
262
                                xout => x_i(1),
263
                                qout => q_i(0),
264
                          a_msb => a_i(s),
265
                                cout => c_i(0),
266
                          start => start_stage_i(0),
267
                          reset => reset,
268
                          --ready => stage_ready_i(0),
269
                           done => stage_done_i(0),
270
                                        r => r_tot((s-1) downto 0)
271
        );
272
 
273
        stages_l: for i in 1 to (tl) generate
274
                standard_stages: standard_stage
275
                generic map(width => s
276
                )
277
                port map(core_clk => core_clk,
278
                                          my => my(((i+1)*s) downto ((s*i)+1)),
279
                                                y => y(((i+1)*s) downto ((s*i)+1)),
280
                                                m => m(((i+1)*s) downto ((s*i)+1)),
281
                                         xin => x_i(i),
282
                                         qin => q_i(i-1),
283
                                        xout => x_i(i+1),
284
                                        qout => q_i(i),
285
                                  a_msb => a_i((i+1)*s),
286
                                         cin => c_i(i-1),
287
                                        cout => c_i(i),
288
                                  start => start_stage_i(i),
289
                                  reset => reset,
290
                                  --ready => stage_ready_i(i),
291
                                        done => stage_done_i(i),
292
                                                r => r_tot((((i+1)*s)-1) downto (s*i))
293
                );
294
        end generate;
295
 
296
        h_c_1 <= h_c_0 or c_i(tl);
297
        h_q_1 <= h_q_0 or q_i(tl);
298
        h_x_1 <= h_x_0 or x_i(tl+1);
299
 
300
        stage_tl_1: standard_stage
301
                generic map(width => s
302
                )
303
                port map(core_clk => core_clk,
304
                                          my => my(((tl+2)*s) downto ((s*(tl+1))+1)),
305
                                                y => y(((tl+2)*s) downto ((s*(tl+1))+1)),
306
                                                m => m(((tl+2)*s) downto ((s*(tl+1))+1)),
307
                                         --xin => x_i(tl+1),
308
                                         xin => h_x_1,
309
                                         --qin => q_i(tl),
310
                                         qin => h_q_1,
311
                                        xout => x_i(tl+2),
312
                                        qout => q_i(tl+1),
313
                                  a_msb => a_i((tl+2)*s),
314
                                         --cin => c_i(tl),
315
                                         cin => h_c_1,
316
                                        cout => c_i(tl+1),
317
                                  start => start_stage_i(tl+1),
318
                                  reset => reset,
319
                                  --ready => stage_ready_i(i),
320
                                        done => stage_done_i(tl+1),
321
                                                r => r_tot((((tl+2)*s)-1) downto (s*(tl+1)))
322
                );
323
 
324
        stages_h: for i in (tl+2) to (t-2) generate
325
                standard_stages: standard_stage
326
                generic map(width => s
327
                )
328
                port map(core_clk => core_clk,
329
                                          my => my(((i+1)*s) downto ((s*i)+1)),
330
                                                y => y(((i+1)*s) downto ((s*i)+1)),
331
                                                m => m(((i+1)*s) downto ((s*i)+1)),
332
                                         xin => x_i(i),
333
                                         qin => q_i(i-1),
334
                                        xout => x_i(i+1),
335
                                        qout => q_i(i),
336
                                  a_msb => a_i((i+1)*s),
337
                                         cin => c_i(i-1),
338
                                        cout => c_i(i),
339
                                  start => start_stage_i(i),
340
                                  reset => reset,
341
                                  --ready => stage_ready_i(i),
342
                                        done => stage_done_i(i),
343
                                                r => r_tot((((i+1)*s)-1) downto (s*i))
344
                );
345
        end generate;
346
 
347
        stage_t: last_stage
348
        generic map(width => s -- must be the same as width of the standard stage
349
        )
350
   port map(core_clk => core_clk,
351
                                 my => my(n downto ((n-s)+1)),          --width-1
352
                                  y => y((n-1) downto ((n-s)+1)),       --width-2
353
                                  m => m((n-1) downto ((n-s)+1)),       --width-2
354
                                          xin => x_i(t-1),
355
                                          qin => q_i(t-2),
356
                                          cin => c_i(t-2),
357
                              start => start_stage_i(t-1),
358
                                   reset => reset,
359
                                   --ready => stage_ready_i(t-1),
360
                   r => r_tot((n+1) downto (n-s))               --width+1
361
        );
362
 
363
        mid_start: first_stage
364
        generic map(width => s
365
        )
366
        port map(core_clk => core_clk,
367
                                  my => my((tl*s+s) downto tl*s),
368
                                        y => y((tl*s+s) downto tl*s),
369
                                        m => m((tl*s+s) downto tl*s),
370
                                 xin => x_i(0),
371
                                xout => h_x_0,
372
                                qout => h_q_0,
373
                          a_msb => a_i((tl+1)*s),
374
                                cout => h_c_0,
375
                          start => start_higher_i,
376
                          reset => reset,
377
                          --ready => stage_ready_i(0),
378
                           done => higher_0_done_i,
379
                                        r => r_h
380
        );
381
 
382
        mid_end: last_stage
383
        generic map(width => s -- must be the same as width of the standard stage
384
        )
385
   port map(core_clk => core_clk,
386
                                 my => my((tl*s) downto ((tl-1)*s)+1),          --width-1
387
                                  y => y(((tl*s)-1) downto ((tl-1)*s)+1),       --width-2
388
                                  m => m(((tl*s)-1) downto ((tl-1)*s)+1),       --width-2
389
                                          xin => x_i(tl-1),
390
                                          qin => q_i(tl-2),
391
                                          cin => c_i(tl-2),
392
                              start => start_stage_i(tl-1),
393
                                   reset => reset,
394
                                   --ready => stage_ready_i(t-1),
395
                   r => r_l             --width+1
396
        );
397
 
398
end Structural;

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.