URL
https://opencores.org/ocsvn/mod_sim_exp/mod_sim_exp/trunk
Subversion Repositories mod_sim_exp
Compare Revisions
- This comparison shows the changes necessary to convert path
/mod_sim_exp/trunk
- from Rev 21 to Rev 22
- ↔ Reverse comparison
Rev 21 → Rev 22
/rtl/vhdl/core/mod_sim_exp_pkg.vhd
409,6 → 409,42
); |
end component x_shift_reg; |
|
-------------------------------------------------------------------- |
-- systolic_pipeline |
-------------------------------------------------------------------- |
-- systolic pipeline implementation of the montgommery multiplier |
-- devides the pipeline into 2 parts, so 3 operand widths are supported |
-- |
-- p_sel: |
-- 01 = lower part |
-- 10 = upper part |
-- 11 = full range |
component systolic_pipeline is |
generic( |
n : integer := 1536; -- width of the operands (# bits) |
t : integer := 192; -- total number of stages (divider of n) >= 2 |
tl : integer := 64 -- lower number of stages (best take t = sqrt(n)) |
); |
port( |
-- clock input |
core_clk : in std_logic; |
-- modulus and y opperand input (n)-bit |
my : in std_logic_vector((n) downto 0); -- m+y |
y : in std_logic_vector((n-1) downto 0); |
m : in std_logic_vector((n-1) downto 0); |
-- x operand input (serial) |
xi : in std_logic; |
-- control signals |
start : in std_logic; -- start multiplier |
reset : in std_logic; |
p_sel : in std_logic_vector(1 downto 0); -- select which piece of the multiplier will be used |
ready : out std_logic; -- multiplication ready |
next_x : out std_logic; -- next x operand bit |
-- result out |
r : out std_logic_vector((n+1) downto 0) |
); |
end component systolic_pipeline; |
|
component autorun_cntrl is |
port ( |
clk : in std_logic; |
595,25 → 631,4
); |
end component operands_sp; |
|
component systolic_pipeline is |
generic( |
n : integer := 1536; -- width of the operands (# bits) |
t : integer := 192; -- number of stages (divider of n) >= 2 |
tl : integer := 64 -- best take t = sqrt(n) |
); |
port( |
core_clk : in std_logic; |
my : in std_logic_vector((n) downto 0); |
y : in std_logic_vector((n-1) downto 0); |
m : in std_logic_vector((n-1) downto 0); |
xi : in std_logic; |
start : in std_logic; |
reset : in std_logic; |
p_sel : in std_logic_vector(1 downto 0); -- select which piece of the multiplier will be used |
ready : out std_logic; |
next_x : out std_logic; |
r : out std_logic_vector((n+1) downto 0) |
); |
end component systolic_pipeline; |
|
end package mod_sim_exp_pkg; |
/rtl/vhdl/core/systolic_pipeline.vhd
6,8 → 6,8
---- http://www.opencores.org/cores/mod_sim_exp/ ---- |
---- ---- |
---- Description ---- |
---- pipelined systolic array implementation of a montgomery ---- |
---- multiplier ---- |
---- structural description of a pipelined systolic array ---- |
---- implementation of a montgomery multiplier. ---- |
---- ---- |
---- Dependencies: ---- |
---- - stepping_logic ---- |
54,29 → 54,35
library mod_sim_exp; |
use mod_sim_exp.mod_sim_exp_pkg.all; |
|
|
-- systolic pipeline implementation of the montgommery multiplier |
-- devides the pipeline into 2 parts, so 3 operand widths are supported |
-- |
-- p_sel: |
-- 01 = lower part |
-- 10 = upper part |
-- 11 = full range |
|
-- 01 = lower part |
-- 10 = upper part |
-- 11 = full range |
entity systolic_pipeline is |
generic( |
n : integer := 1536; -- width of the operands (# bits) |
t : integer := 192; -- number of stages (divider of n) >= 2 |
tl : integer := 64 -- best take t = sqrt(n) |
t : integer := 192; -- total number of stages (divider of n) >= 2 |
tl : integer := 64 -- lower number of stages (best take t = sqrt(n)) |
); |
port( |
-- clock input |
core_clk : in std_logic; |
my : in std_logic_vector((n) downto 0); |
-- modulus and y opperand input (n)-bit |
my : in std_logic_vector((n) downto 0); -- m+y |
y : in std_logic_vector((n-1) downto 0); |
m : in std_logic_vector((n-1) downto 0); |
-- x operand input (serial) |
xi : in std_logic; |
start : in std_logic; |
-- control signals |
start : in std_logic; -- start multiplier |
reset : in std_logic; |
p_sel : in std_logic_vector(1 downto 0); -- select which piece of the multiplier will be used |
ready : out std_logic; |
next_x : out std_logic; |
ready : out std_logic; -- multiplication ready |
next_x : out std_logic; -- next x operand bit |
-- result out |
r : out std_logic_vector((n+1) downto 0) |
); |
end systolic_pipeline; |
83,85 → 89,95
|
|
architecture Structural of systolic_pipeline is |
constant s : integer := n/t; -- defines the size of the stages (# bits) |
constant size_l : integer := s*tl; |
constant size_h : integer := n - size_l; |
constant s : integer := n/t; -- stage width (# bits) |
constant nl : integer := s*tl; -- lower pipeline width (# bits) |
constant nh : integer := n - nl; -- higher pipeline width (# bits) |
|
signal start_stage_i : std_logic_vector((t-1) downto 0); |
--signal stage_ready_i : std_logic_vector((t-1) downto 0); |
signal stage_done_i : std_logic_vector((t-2) downto 0); |
-- pipeline selection flags |
signal p_full_selected : std_logic; -- full |
signal p_low_full_selected : std_logic; -- low or full |
signal p_high_selected : std_logic; -- high |
|
signal t_sel : integer range 0 to t; -- width in stages of selected pipeline part |
signal n_sel : integer range 0 to n; -- width in bits of selected pipeline part |
|
-- general stage interconnect signals |
signal start_stage : std_logic_vector((t-1) downto 0); -- vector for the start bits for the stages |
signal done_stage : std_logic_vector((t-2) downto 0); -- vector for the done bits of the stages |
signal xin_stage : std_logic_vector((t-1) downto 0); -- vector for the xin bits of the stages |
signal qout_stage : std_logic_vector((t-2) downto 0); -- vector for the qout bits of the stages |
signal cout_stage : std_logic_vector((t-2) downto 0); -- vector for the cout bits of the stages |
|
-- stage result signals |
signal r_tot : std_logic_vector((n+1) downto 0); -- result of the total multiplier |
signal r_stage_midstart : std_logic_vector(s-1 downto 0); -- result of the mid-start stage of the multiplier |
signal r_stage_midend : std_logic_vector((s+1) downto 0); -- result of the mid-end stage of the multiplier |
|
-- mapped result registers |
signal r_i : std_logic_vector((n+1) downto 0); |
signal r_i_stage_midstart : std_logic_vector((s*2)-1 downto 0); |
signal r_i_stage_midend : std_logic_vector((s*2)-1 downto 0); |
|
signal x_i : std_logic_vector((t-1) downto 0) := (others => '0'); |
signal q_i : std_logic_vector((t-2) downto 0) := (others => '0'); |
signal c_i : std_logic_vector((t-2) downto 0) := (others => '0'); |
signal a_i : std_logic_vector((n+1) downto 0) := (others => '0'); |
signal r_tot : std_logic_vector((n+1) downto 0) := (others => '0'); |
signal r_h : std_logic_vector(s-1 downto 0) := (others => '0'); |
signal r_l : std_logic_vector((s+1) downto 0) := (others => '0'); |
signal a_h : std_logic_vector((s*2)-1 downto 0) := (others => '0'); |
signal a_l : std_logic_vector((s*2)-1 downto 0) := (others => '0'); |
|
--signal ready_i : std_logic; |
signal stepping_done_i : std_logic; |
signal t_sel : integer range 0 to t := t; |
signal n_sel : integer range 0 to n := n; |
signal split : std_logic := '0'; |
signal lower_e_i : std_logic := '0'; |
signal higher_e_i : std_logic := '0'; |
signal start_pulses_i : std_logic := '0'; |
signal start_higher_i : std_logic := '0'; |
signal higher_0_done_i : std_logic := '0'; |
signal h_x_0, h_x_1 : std_logic := '0'; |
signal h_q_0, h_q_1 : std_logic := '0'; |
signal h_c_0, h_c_1 : std_logic := '0'; |
signal x_offset_i : integer range 0 to tl*s := 0; |
signal next_x_i : std_logic := '0'; |
|
-- pipeline start signals |
signal start_first_stage : std_logic; -- start for full and low pipeline |
signal start_higher : std_logic; -- start for higher pipeline |
|
-- midstart stage signals |
signal done_stage_midstart : std_logic; |
signal xout_stage_midstart : std_logic; |
signal qout_stage_midstart : std_logic; |
signal cout_stage_midstart : std_logic; |
|
-- tl+1 stage signals |
signal xin_stage_tl_1 : std_logic; |
signal qin_stage_tl_1 : std_logic; |
signal cin_stage_tl_1 : std_logic; |
begin |
|
-- output mapping |
r <= a_i; -- mogelijks moet er nog een shift operatie gebeuren |
ready <= stepping_done_i; |
r <= r_i; |
|
-- result feedback |
a_i((n+1) downto ((tl+1)*s)) <= r_tot((n+1) downto ((tl+1)*s)); |
a_i(((tl-1)*s-1) downto 0) <= r_tot(((tl-1)*s-1) downto 0); |
r_i((n+1) downto ((tl+1)*s)) <= r_tot((n+1) downto ((tl+1)*s)); |
r_i(((tl-1)*s-1) downto 0) <= r_tot(((tl-1)*s-1) downto 0); |
|
a_l((s+1) downto 0) <= r_l; |
a_h((s*2)-1 downto s) <= r_h; |
r_i_stage_midend((s*2)-1 downto s+2) <= (others=>'0'); |
r_i_stage_midend((s+1) downto 0) <= r_stage_midend; |
r_i_stage_midstart((s*2)-1 downto s) <= r_stage_midstart; |
r_i_stage_midstart((s-1) downto 0) <= (others=>'0'); |
with p_sel select |
a_i(((tl+1)*s-1) downto ((tl-1)*s)) <= a_l when "01", |
a_h when "10", |
r_tot(((tl+1)*s-1) downto ((tl-1)*s)) when others; |
r_i(((tl+1)*s-1) downto ((tl-1)*s)) <= r_i_stage_midend when "01", |
r_i_stage_midstart when "10", |
r_tot(((tl+1)*s-1) downto ((tl-1)*s)) when others; |
|
-- signals from x_selection |
next_x_i <= start_stage_i(1) or (start_stage_i(tl+1) and higher_e_i); |
-- |
next_x <= next_x_i; |
x_i(0) <= xi; |
next_x <= start_stage(1) or (start_stage(tl+1) and p_high_selected); |
xin_stage(0) <= xi; |
|
-- this module controls the pipeline operation |
-- width in stages for selected pipeline |
with p_sel select |
t_sel <= tl when "01", |
t-tl when "10", |
t when others; |
|
t_sel <= tl when "01", -- lower pipeline part |
t-tl when "10", -- higher pipeline part |
t when others; -- full pipeline |
|
-- width in bits for selected pipeline |
with p_sel select |
n_sel <= size_l-1 when "01", |
size_h-1 when "10", |
n-1 when others; |
n_sel <= nl-1 when "01", -- lower pipeline part |
nh-1 when "10", -- higher pipeline part |
n-1 when others; -- full pipeline |
|
with p_sel select |
lower_e_i <= '0' when "10", |
'1' when others; |
p_low_full_selected <= '0' when "10", -- higher pipeline part |
'1' when others; -- full or lower pipeline |
|
with p_sel select |
higher_e_i <= '1' when "10", |
'0' when others; |
p_high_selected <= '1' when "10", -- higher pipeline part |
'0' when others; -- full or lower pipeline |
|
split <= p_sel(0) and p_sel(1); |
p_full_selected <= p_sel(0) and p_sel(1); |
|
|
-- stepping control logic to keep track off the multiplication and when it is done |
stepping_control : stepping_logic |
generic map( |
n => n, -- max nr of steps required to complete a multiplication |
173,27 → 189,29
reset => reset, |
t_sel => t_sel, |
n_sel => n_sel, |
start_first_stage => start_pulses_i, |
stepping_done => stepping_done_i |
start_first_stage => start_first_stage, |
stepping_done => ready |
); |
|
-- start signals for first stage of lower and higher part |
start_stage_i(0) <= start_pulses_i and lower_e_i; |
start_higher_i <= start_pulses_i and (higher_e_i and not split); |
start_stage(0) <= start_first_stage and p_low_full_selected; |
start_higher <= start_first_stage and p_high_selected; |
|
-- start signals for stage tl and tl+1 (full pipeline operation) |
start_stage_i(tl) <= stage_done_i(tl-1) and split; |
start_stage_i(tl+1) <= stage_done_i(tl) or higher_0_done_i; |
start_stage(tl) <= done_stage(tl-1) and p_full_selected; -- only pass the start signal if full pipeline |
start_stage(tl+1) <= done_stage(tl) or done_stage_midstart; |
|
-- nothing special here, previous stages starts the next |
start_signals_l: for i in 1 to tl-1 generate |
start_stage_i(i) <= stage_done_i(i-1); |
start_stage(i) <= done_stage(i-1); |
end generate; |
|
start_signals_h: for i in tl+2 to t-1 generate |
start_stage_i(i) <= stage_done_i(i-1); |
start_stage(i) <= done_stage(i-1); |
end generate; |
|
|
-- first stage |
-- bits (s downto 0) |
stage_0 : first_stage |
generic map( |
width => s |
203,18 → 221,20
my => my(s downto 0), |
y => y(s downto 0), |
m => m(s downto 0), |
xin => x_i(0), |
xout => x_i(1), |
qout => q_i(0), |
a_msb => a_i(s), |
cout => c_i(0), |
start => start_stage_i(0), |
xin => xin_stage(0), |
xout => xin_stage(1), |
qout => qout_stage(0), |
a_msb => r_i(s), |
cout => cout_stage(0), |
start => start_stage(0), |
reset => reset, |
--ready => stage_ready_i(0), |
done => stage_done_i(0), |
r => r_tot((s-1) downto 0) |
done => done_stage(0), |
r => r_tot((s-1) downto 0) |
); |
|
-- lower pipeline standard stages: stages tl downto 1 |
-- bits ((tl+1)*s downto s+1) |
-- (nl downto s+1) |
stages_l : for i in 1 to (tl) generate |
standard_stages : standard_stage |
generic map( |
225,24 → 245,23
my => my(((i+1)*s) downto ((s*i)+1)), |
y => y(((i+1)*s) downto ((s*i)+1)), |
m => m(((i+1)*s) downto ((s*i)+1)), |
xin => x_i(i), |
qin => q_i(i-1), |
xout => x_i(i+1), |
qout => q_i(i), |
a_msb => a_i((i+1)*s), |
cin => c_i(i-1), |
cout => c_i(i), |
start => start_stage_i(i), |
xin => xin_stage(i), |
qin => qout_stage(i-1), |
xout => xin_stage(i+1), |
qout => qout_stage(i), |
a_msb => r_i((i+1)*s), |
cin => cout_stage(i-1), |
cout => cout_stage(i), |
start => start_stage(i), |
reset => reset, |
--ready => stage_ready_i(i), |
done => stage_done_i(i), |
r => r_tot((((i+1)*s)-1) downto (s*i)) |
done => done_stage(i), |
r => r_tot((((i+1)*s)-1) downto (s*i)) |
); |
end generate; |
|
h_c_1 <= h_c_0 or c_i(tl); |
h_q_1 <= h_q_0 or q_i(tl); |
h_x_1 <= h_x_0 or x_i(tl+1); |
cin_stage_tl_1 <= cout_stage_midstart or cout_stage(tl); |
qin_stage_tl_1 <= qout_stage_midstart or qout_stage(tl); |
xin_stage_tl_1 <= xout_stage_midstart or xin_stage(tl+1); |
|
stage_tl_1 : standard_stage |
generic map( |
253,22 → 272,19
my => my(((tl+2)*s) downto ((s*(tl+1))+1)), |
y => y(((tl+2)*s) downto ((s*(tl+1))+1)), |
m => m(((tl+2)*s) downto ((s*(tl+1))+1)), |
--xin => x_i(tl+1), |
xin => h_x_1, |
--qin => q_i(tl), |
qin => h_q_1, |
xout => x_i(tl+2), |
qout => q_i(tl+1), |
a_msb => a_i((tl+2)*s), |
--cin => c_i(tl), |
cin => h_c_1, |
cout => c_i(tl+1), |
start => start_stage_i(tl+1), |
reset => reset, |
--ready => stage_ready_i(i), |
done => stage_done_i(tl+1), |
r => r_tot((((tl+2)*s)-1) downto (s*(tl+1))) |
xin => xin_stage_tl_1, |
qin => qin_stage_tl_1, |
xout => xin_stage(tl+2), |
qout => qout_stage(tl+1), |
a_msb => r_i((tl+2)*s), |
cin => cin_stage_tl_1, |
cout => cout_stage(tl+1), |
start => start_stage(tl+1), |
reset => reset, |
done => done_stage(tl+1), |
r => r_tot((((tl+2)*s)-1) downto (s*(tl+1))) |
); |
|
|
stages_h : for i in (tl+2) to (t-2) generate |
standard_stages : standard_stage |
280,18 → 296,17
my => my(((i+1)*s) downto ((s*i)+1)), |
y => y(((i+1)*s) downto ((s*i)+1)), |
m => m(((i+1)*s) downto ((s*i)+1)), |
xin => x_i(i), |
qin => q_i(i-1), |
xout => x_i(i+1), |
qout => q_i(i), |
a_msb => a_i((i+1)*s), |
cin => c_i(i-1), |
cout => c_i(i), |
start => start_stage_i(i), |
xin => xin_stage(i), |
qin => qout_stage(i-1), |
xout => xin_stage(i+1), |
qout => qout_stage(i), |
a_msb => r_i((i+1)*s), |
cin => cout_stage(i-1), |
cout => cout_stage(i), |
start => start_stage(i), |
reset => reset, |
--ready => stage_ready_i(i), |
done => stage_done_i(i), |
r => r_tot((((i+1)*s)-1) downto (s*i)) |
done => done_stage(i), |
r => r_tot((((i+1)*s)-1) downto (s*i)) |
); |
end generate; |
|
304,12 → 319,11
my => my(n downto ((n-s)+1)), --width-1 |
y => y((n-1) downto ((n-s)+1)), --width-2 |
m => m((n-1) downto ((n-s)+1)), --width-2 |
xin => x_i(t-1), |
qin => q_i(t-2), |
cin => c_i(t-2), |
start => start_stage_i(t-1), |
xin => xin_stage(t-1), |
qin => qout_stage(t-2), |
cin => cout_stage(t-2), |
start => start_stage(t-1), |
reset => reset, |
--ready => stage_ready_i(t-1), |
r => r_tot((n+1) downto (n-s)) --width+1 |
); |
|
322,16 → 336,15
my => my((tl*s+s) downto tl*s), |
y => y((tl*s+s) downto tl*s), |
m => m((tl*s+s) downto tl*s), |
xin => x_i(0), |
xout => h_x_0, |
qout => h_q_0, |
a_msb => a_i((tl+1)*s), |
cout => h_c_0, |
start => start_higher_i, |
xin => xin_stage(0), |
xout => xout_stage_midstart, |
qout => qout_stage_midstart, |
a_msb => r_i((tl+1)*s), |
cout => cout_stage_midstart, |
start => start_higher, |
reset => reset, |
--ready => stage_ready_i(0), |
done => higher_0_done_i, |
r => r_h |
done => done_stage_midstart, |
r => r_stage_midstart |
); |
|
mid_end : last_stage |
343,13 → 356,12
my => my((tl*s) downto ((tl-1)*s)+1), --width-1 |
y => y(((tl*s)-1) downto ((tl-1)*s)+1), --width-2 |
m => m(((tl*s)-1) downto ((tl-1)*s)+1), --width-2 |
xin => x_i(tl-1), |
qin => q_i(tl-2), |
cin => c_i(tl-2), |
start => start_stage_i(tl-1), |
xin => xin_stage(tl-1), |
qin => qout_stage(tl-2), |
cin => cout_stage(tl-2), |
start => start_stage(tl-1), |
reset => reset, |
--ready => stage_ready_i(t-1), |
r => r_l --width+1 |
r => r_stage_midend --width+1 |
); |
|
end Structural; |