OpenCores
URL https://opencores.org/ocsvn/mod_sim_exp/mod_sim_exp/trunk

Subversion Repositories mod_sim_exp

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /mod_sim_exp/trunk/rtl/vhdl
    from Rev 21 to Rev 22
    Reverse comparison

Rev 21 → Rev 22

/core/mod_sim_exp_pkg.vhd
409,6 → 409,42
);
end component x_shift_reg;
--------------------------------------------------------------------
-- systolic_pipeline
--------------------------------------------------------------------
-- systolic pipeline implementation of the montgommery multiplier
-- devides the pipeline into 2 parts, so 3 operand widths are supported
--
-- p_sel:
-- 01 = lower part
-- 10 = upper part
-- 11 = full range
component systolic_pipeline is
generic(
n : integer := 1536; -- width of the operands (# bits)
t : integer := 192; -- total number of stages (divider of n) >= 2
tl : integer := 64 -- lower number of stages (best take t = sqrt(n))
);
port(
-- clock input
core_clk : in std_logic;
-- modulus and y opperand input (n)-bit
my : in std_logic_vector((n) downto 0); -- m+y
y : in std_logic_vector((n-1) downto 0);
m : in std_logic_vector((n-1) downto 0);
-- x operand input (serial)
xi : in std_logic;
-- control signals
start : in std_logic; -- start multiplier
reset : in std_logic;
p_sel : in std_logic_vector(1 downto 0); -- select which piece of the multiplier will be used
ready : out std_logic; -- multiplication ready
next_x : out std_logic; -- next x operand bit
-- result out
r : out std_logic_vector((n+1) downto 0)
);
end component systolic_pipeline;
component autorun_cntrl is
port (
clk : in std_logic;
595,25 → 631,4
);
end component operands_sp;
component systolic_pipeline is
generic(
n : integer := 1536; -- width of the operands (# bits)
t : integer := 192; -- number of stages (divider of n) >= 2
tl : integer := 64 -- best take t = sqrt(n)
);
port(
core_clk : in std_logic;
my : in std_logic_vector((n) downto 0);
y : in std_logic_vector((n-1) downto 0);
m : in std_logic_vector((n-1) downto 0);
xi : in std_logic;
start : in std_logic;
reset : in std_logic;
p_sel : in std_logic_vector(1 downto 0); -- select which piece of the multiplier will be used
ready : out std_logic;
next_x : out std_logic;
r : out std_logic_vector((n+1) downto 0)
);
end component systolic_pipeline;
end package mod_sim_exp_pkg;
/core/systolic_pipeline.vhd
6,8 → 6,8
---- http://www.opencores.org/cores/mod_sim_exp/ ----
---- ----
---- Description ----
---- pipelined systolic array implementation of a montgomery ----
---- multiplier ----
---- structural description of a pipelined systolic array ----
---- implementation of a montgomery multiplier. ----
---- ----
---- Dependencies: ----
---- - stepping_logic ----
54,29 → 54,35
library mod_sim_exp;
use mod_sim_exp.mod_sim_exp_pkg.all;
 
 
-- systolic pipeline implementation of the montgommery multiplier
-- devides the pipeline into 2 parts, so 3 operand widths are supported
--
-- p_sel:
-- 01 = lower part
-- 10 = upper part
-- 11 = full range
 
-- 01 = lower part
-- 10 = upper part
-- 11 = full range
entity systolic_pipeline is
generic(
n : integer := 1536; -- width of the operands (# bits)
t : integer := 192; -- number of stages (divider of n) >= 2
tl : integer := 64 -- best take t = sqrt(n)
t : integer := 192; -- total number of stages (divider of n) >= 2
tl : integer := 64 -- lower number of stages (best take t = sqrt(n))
);
port(
-- clock input
core_clk : in std_logic;
my : in std_logic_vector((n) downto 0);
-- modulus and y opperand input (n)-bit
my : in std_logic_vector((n) downto 0); -- m+y
y : in std_logic_vector((n-1) downto 0);
m : in std_logic_vector((n-1) downto 0);
-- x operand input (serial)
xi : in std_logic;
start : in std_logic;
-- control signals
start : in std_logic; -- start multiplier
reset : in std_logic;
p_sel : in std_logic_vector(1 downto 0); -- select which piece of the multiplier will be used
ready : out std_logic;
next_x : out std_logic;
ready : out std_logic; -- multiplication ready
next_x : out std_logic; -- next x operand bit
-- result out
r : out std_logic_vector((n+1) downto 0)
);
end systolic_pipeline;
83,85 → 89,95
 
 
architecture Structural of systolic_pipeline is
constant s : integer := n/t; -- defines the size of the stages (# bits)
constant size_l : integer := s*tl;
constant size_h : integer := n - size_l;
constant s : integer := n/t; -- stage width (# bits)
constant nl : integer := s*tl; -- lower pipeline width (# bits)
constant nh : integer := n - nl; -- higher pipeline width (# bits)
 
signal start_stage_i : std_logic_vector((t-1) downto 0);
--signal stage_ready_i : std_logic_vector((t-1) downto 0);
signal stage_done_i : std_logic_vector((t-2) downto 0);
-- pipeline selection flags
signal p_full_selected : std_logic; -- full
signal p_low_full_selected : std_logic; -- low or full
signal p_high_selected : std_logic; -- high
signal t_sel : integer range 0 to t; -- width in stages of selected pipeline part
signal n_sel : integer range 0 to n; -- width in bits of selected pipeline part
-- general stage interconnect signals
signal start_stage : std_logic_vector((t-1) downto 0); -- vector for the start bits for the stages
signal done_stage : std_logic_vector((t-2) downto 0); -- vector for the done bits of the stages
signal xin_stage : std_logic_vector((t-1) downto 0); -- vector for the xin bits of the stages
signal qout_stage : std_logic_vector((t-2) downto 0); -- vector for the qout bits of the stages
signal cout_stage : std_logic_vector((t-2) downto 0); -- vector for the cout bits of the stages
-- stage result signals
signal r_tot : std_logic_vector((n+1) downto 0); -- result of the total multiplier
signal r_stage_midstart : std_logic_vector(s-1 downto 0); -- result of the mid-start stage of the multiplier
signal r_stage_midend : std_logic_vector((s+1) downto 0); -- result of the mid-end stage of the multiplier
-- mapped result registers
signal r_i : std_logic_vector((n+1) downto 0);
signal r_i_stage_midstart : std_logic_vector((s*2)-1 downto 0);
signal r_i_stage_midend : std_logic_vector((s*2)-1 downto 0);
 
signal x_i : std_logic_vector((t-1) downto 0) := (others => '0');
signal q_i : std_logic_vector((t-2) downto 0) := (others => '0');
signal c_i : std_logic_vector((t-2) downto 0) := (others => '0');
signal a_i : std_logic_vector((n+1) downto 0) := (others => '0');
signal r_tot : std_logic_vector((n+1) downto 0) := (others => '0');
signal r_h : std_logic_vector(s-1 downto 0) := (others => '0');
signal r_l : std_logic_vector((s+1) downto 0) := (others => '0');
signal a_h : std_logic_vector((s*2)-1 downto 0) := (others => '0');
signal a_l : std_logic_vector((s*2)-1 downto 0) := (others => '0');
 
--signal ready_i : std_logic;
signal stepping_done_i : std_logic;
signal t_sel : integer range 0 to t := t;
signal n_sel : integer range 0 to n := n;
signal split : std_logic := '0';
signal lower_e_i : std_logic := '0';
signal higher_e_i : std_logic := '0';
signal start_pulses_i : std_logic := '0';
signal start_higher_i : std_logic := '0';
signal higher_0_done_i : std_logic := '0';
signal h_x_0, h_x_1 : std_logic := '0';
signal h_q_0, h_q_1 : std_logic := '0';
signal h_c_0, h_c_1 : std_logic := '0';
signal x_offset_i : integer range 0 to tl*s := 0;
signal next_x_i : std_logic := '0';
 
-- pipeline start signals
signal start_first_stage : std_logic; -- start for full and low pipeline
signal start_higher : std_logic; -- start for higher pipeline
-- midstart stage signals
signal done_stage_midstart : std_logic;
signal xout_stage_midstart : std_logic;
signal qout_stage_midstart : std_logic;
signal cout_stage_midstart : std_logic;
-- tl+1 stage signals
signal xin_stage_tl_1 : std_logic;
signal qin_stage_tl_1 : std_logic;
signal cin_stage_tl_1 : std_logic;
begin
 
-- output mapping
r <= a_i; -- mogelijks moet er nog een shift operatie gebeuren
ready <= stepping_done_i;
r <= r_i;
 
-- result feedback
a_i((n+1) downto ((tl+1)*s)) <= r_tot((n+1) downto ((tl+1)*s));
a_i(((tl-1)*s-1) downto 0) <= r_tot(((tl-1)*s-1) downto 0);
r_i((n+1) downto ((tl+1)*s)) <= r_tot((n+1) downto ((tl+1)*s));
r_i(((tl-1)*s-1) downto 0) <= r_tot(((tl-1)*s-1) downto 0);
a_l((s+1) downto 0) <= r_l;
a_h((s*2)-1 downto s) <= r_h;
r_i_stage_midend((s*2)-1 downto s+2) <= (others=>'0');
r_i_stage_midend((s+1) downto 0) <= r_stage_midend;
r_i_stage_midstart((s*2)-1 downto s) <= r_stage_midstart;
r_i_stage_midstart((s-1) downto 0) <= (others=>'0');
with p_sel select
a_i(((tl+1)*s-1) downto ((tl-1)*s)) <= a_l when "01",
a_h when "10",
r_tot(((tl+1)*s-1) downto ((tl-1)*s)) when others;
r_i(((tl+1)*s-1) downto ((tl-1)*s)) <= r_i_stage_midend when "01",
r_i_stage_midstart when "10",
r_tot(((tl+1)*s-1) downto ((tl-1)*s)) when others;
 
-- signals from x_selection
next_x_i <= start_stage_i(1) or (start_stage_i(tl+1) and higher_e_i);
--
next_x <= next_x_i;
x_i(0) <= xi;
next_x <= start_stage(1) or (start_stage(tl+1) and p_high_selected);
xin_stage(0) <= xi;
-- this module controls the pipeline operation
-- width in stages for selected pipeline
with p_sel select
t_sel <= tl when "01",
t-tl when "10",
t when others;
t_sel <= tl when "01", -- lower pipeline part
t-tl when "10", -- higher pipeline part
t when others; -- full pipeline
 
-- width in bits for selected pipeline
with p_sel select
n_sel <= size_l-1 when "01",
size_h-1 when "10",
n-1 when others;
n_sel <= nl-1 when "01", -- lower pipeline part
nh-1 when "10", -- higher pipeline part
n-1 when others; -- full pipeline
with p_sel select
lower_e_i <= '0' when "10",
'1' when others;
p_low_full_selected <= '0' when "10", -- higher pipeline part
'1' when others; -- full or lower pipeline
with p_sel select
higher_e_i <= '1' when "10",
'0' when others;
p_high_selected <= '1' when "10", -- higher pipeline part
'0' when others; -- full or lower pipeline
split <= p_sel(0) and p_sel(1);
p_full_selected <= p_sel(0) and p_sel(1);
-- stepping control logic to keep track off the multiplication and when it is done
stepping_control : stepping_logic
generic map(
n => n, -- max nr of steps required to complete a multiplication
173,27 → 189,29
reset => reset,
t_sel => t_sel,
n_sel => n_sel,
start_first_stage => start_pulses_i,
stepping_done => stepping_done_i
start_first_stage => start_first_stage,
stepping_done => ready
);
-- start signals for first stage of lower and higher part
start_stage_i(0) <= start_pulses_i and lower_e_i;
start_higher_i <= start_pulses_i and (higher_e_i and not split);
start_stage(0) <= start_first_stage and p_low_full_selected;
start_higher <= start_first_stage and p_high_selected;
-- start signals for stage tl and tl+1 (full pipeline operation)
start_stage_i(tl) <= stage_done_i(tl-1) and split;
start_stage_i(tl+1) <= stage_done_i(tl) or higher_0_done_i;
start_stage(tl) <= done_stage(tl-1) and p_full_selected; -- only pass the start signal if full pipeline
start_stage(tl+1) <= done_stage(tl) or done_stage_midstart;
-- nothing special here, previous stages starts the next
start_signals_l: for i in 1 to tl-1 generate
start_stage_i(i) <= stage_done_i(i-1);
start_stage(i) <= done_stage(i-1);
end generate;
start_signals_h: for i in tl+2 to t-1 generate
start_stage_i(i) <= stage_done_i(i-1);
start_stage(i) <= done_stage(i-1);
end generate;
 
-- first stage
-- bits (s downto 0)
stage_0 : first_stage
generic map(
width => s
203,18 → 221,20
my => my(s downto 0),
y => y(s downto 0),
m => m(s downto 0),
xin => x_i(0),
xout => x_i(1),
qout => q_i(0),
a_msb => a_i(s),
cout => c_i(0),
start => start_stage_i(0),
xin => xin_stage(0),
xout => xin_stage(1),
qout => qout_stage(0),
a_msb => r_i(s),
cout => cout_stage(0),
start => start_stage(0),
reset => reset,
--ready => stage_ready_i(0),
done => stage_done_i(0),
r => r_tot((s-1) downto 0)
done => done_stage(0),
r => r_tot((s-1) downto 0)
);
-- lower pipeline standard stages: stages tl downto 1
-- bits ((tl+1)*s downto s+1)
-- (nl downto s+1)
stages_l : for i in 1 to (tl) generate
standard_stages : standard_stage
generic map(
225,24 → 245,23
my => my(((i+1)*s) downto ((s*i)+1)),
y => y(((i+1)*s) downto ((s*i)+1)),
m => m(((i+1)*s) downto ((s*i)+1)),
xin => x_i(i),
qin => q_i(i-1),
xout => x_i(i+1),
qout => q_i(i),
a_msb => a_i((i+1)*s),
cin => c_i(i-1),
cout => c_i(i),
start => start_stage_i(i),
xin => xin_stage(i),
qin => qout_stage(i-1),
xout => xin_stage(i+1),
qout => qout_stage(i),
a_msb => r_i((i+1)*s),
cin => cout_stage(i-1),
cout => cout_stage(i),
start => start_stage(i),
reset => reset,
--ready => stage_ready_i(i),
done => stage_done_i(i),
r => r_tot((((i+1)*s)-1) downto (s*i))
done => done_stage(i),
r => r_tot((((i+1)*s)-1) downto (s*i))
);
end generate;
h_c_1 <= h_c_0 or c_i(tl);
h_q_1 <= h_q_0 or q_i(tl);
h_x_1 <= h_x_0 or x_i(tl+1);
cin_stage_tl_1 <= cout_stage_midstart or cout_stage(tl);
qin_stage_tl_1 <= qout_stage_midstart or qout_stage(tl);
xin_stage_tl_1 <= xout_stage_midstart or xin_stage(tl+1);
stage_tl_1 : standard_stage
generic map(
253,22 → 272,19
my => my(((tl+2)*s) downto ((s*(tl+1))+1)),
y => y(((tl+2)*s) downto ((s*(tl+1))+1)),
m => m(((tl+2)*s) downto ((s*(tl+1))+1)),
--xin => x_i(tl+1),
xin => h_x_1,
--qin => q_i(tl),
qin => h_q_1,
xout => x_i(tl+2),
qout => q_i(tl+1),
a_msb => a_i((tl+2)*s),
--cin => c_i(tl),
cin => h_c_1,
cout => c_i(tl+1),
start => start_stage_i(tl+1),
reset => reset,
--ready => stage_ready_i(i),
done => stage_done_i(tl+1),
r => r_tot((((tl+2)*s)-1) downto (s*(tl+1)))
xin => xin_stage_tl_1,
qin => qin_stage_tl_1,
xout => xin_stage(tl+2),
qout => qout_stage(tl+1),
a_msb => r_i((tl+2)*s),
cin => cin_stage_tl_1,
cout => cout_stage(tl+1),
start => start_stage(tl+1),
reset => reset,
done => done_stage(tl+1),
r => r_tot((((tl+2)*s)-1) downto (s*(tl+1)))
);
 
stages_h : for i in (tl+2) to (t-2) generate
standard_stages : standard_stage
280,18 → 296,17
my => my(((i+1)*s) downto ((s*i)+1)),
y => y(((i+1)*s) downto ((s*i)+1)),
m => m(((i+1)*s) downto ((s*i)+1)),
xin => x_i(i),
qin => q_i(i-1),
xout => x_i(i+1),
qout => q_i(i),
a_msb => a_i((i+1)*s),
cin => c_i(i-1),
cout => c_i(i),
start => start_stage_i(i),
xin => xin_stage(i),
qin => qout_stage(i-1),
xout => xin_stage(i+1),
qout => qout_stage(i),
a_msb => r_i((i+1)*s),
cin => cout_stage(i-1),
cout => cout_stage(i),
start => start_stage(i),
reset => reset,
--ready => stage_ready_i(i),
done => stage_done_i(i),
r => r_tot((((i+1)*s)-1) downto (s*i))
done => done_stage(i),
r => r_tot((((i+1)*s)-1) downto (s*i))
);
end generate;
 
304,12 → 319,11
my => my(n downto ((n-s)+1)), --width-1
y => y((n-1) downto ((n-s)+1)), --width-2
m => m((n-1) downto ((n-s)+1)), --width-2
xin => x_i(t-1),
qin => q_i(t-2),
cin => c_i(t-2),
start => start_stage_i(t-1),
xin => xin_stage(t-1),
qin => qout_stage(t-2),
cin => cout_stage(t-2),
start => start_stage(t-1),
reset => reset,
--ready => stage_ready_i(t-1),
r => r_tot((n+1) downto (n-s)) --width+1
);
 
322,16 → 336,15
my => my((tl*s+s) downto tl*s),
y => y((tl*s+s) downto tl*s),
m => m((tl*s+s) downto tl*s),
xin => x_i(0),
xout => h_x_0,
qout => h_q_0,
a_msb => a_i((tl+1)*s),
cout => h_c_0,
start => start_higher_i,
xin => xin_stage(0),
xout => xout_stage_midstart,
qout => qout_stage_midstart,
a_msb => r_i((tl+1)*s),
cout => cout_stage_midstart,
start => start_higher,
reset => reset,
--ready => stage_ready_i(0),
done => higher_0_done_i,
r => r_h
done => done_stage_midstart,
r => r_stage_midstart
);
 
mid_end : last_stage
343,13 → 356,12
my => my((tl*s) downto ((tl-1)*s)+1), --width-1
y => y(((tl*s)-1) downto ((tl-1)*s)+1), --width-2
m => m(((tl*s)-1) downto ((tl-1)*s)+1), --width-2
xin => x_i(tl-1),
qin => q_i(tl-2),
cin => c_i(tl-2),
start => start_stage_i(tl-1),
xin => xin_stage(tl-1),
qin => qout_stage(tl-2),
cin => cout_stage(tl-2),
start => start_stage(tl-1),
reset => reset,
--ready => stage_ready_i(t-1),
r => r_l --width+1
r => r_stage_midend --width+1
);
 
end Structural;

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.