OpenCores
URL https://opencores.org/ocsvn/mod_sim_exp/mod_sim_exp/trunk

Subversion Repositories mod_sim_exp

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /mod_sim_exp
    from Rev 41 to Rev 40
    Reverse comparison

Rev 41 → Rev 40

/trunk/rtl/vhdl/core/mod_sim_exp_pkg.vhd
202,6 → 202,32
end component adder_block;
--------------------------------------------------------------------
-- adder_n
--------------------------------------------------------------------
-- n-bit adder using adder blocks. works in stages, to prevent
-- large carry propagation.
-- Result avaiable after (width/block_width) clock cycles
--
component adder_n is
generic (
width : integer := 1536; -- adder operands width
block_width : integer := 8 -- adder blocks size
);
port (
-- clock input
core_clk : in std_logic;
-- adder input operands (width)-bit
a : in std_logic_vector((width-1) downto 0);
b : in std_logic_vector((width-1) downto 0);
-- carry in, out
cin : in std_logic;
cout : out std_logic;
-- adder output result (width)-bit
r : out std_logic_vector((width-1) downto 0)
);
end component adder_n;
--------------------------------------------------------------------
-- standard_cell_block
--------------------------------------------------------------------
-- a standard cell block of (width)-bit for the montgommery multiplier
230,6 → 256,109
end component standard_cell_block;
--------------------------------------------------------------------
-- standard_stage
--------------------------------------------------------------------
-- standard stage for use in the montgommery multiplier pipeline
-- the result is available after 1 clock cycle
--
component standard_stage is
generic(
width : integer := 32
);
port(
-- clock input
core_clk : in std_logic;
-- modulus and y operand input (width)-bit
my : in std_logic_vector((width-1) downto 0);
y : in std_logic_vector((width-1) downto 0);
m : in std_logic_vector((width-1) downto 0);
-- q and x operand input (serial input)
xin : in std_logic;
qin : in std_logic;
-- q and x operand output (serial output)
xout : out std_logic;
qout : out std_logic;
-- msb input (lsb from next stage, for shift right operation)
a_msb : in std_logic;
-- carry out(clocked) and in
cin : in std_logic;
cout : out std_logic;
-- control singals
start : in std_logic;
reset : in std_logic;
done : out std_logic;
-- result out
r : out std_logic_vector((width-1) downto 0)
);
end component standard_stage;
--------------------------------------------------------------------
-- first_stage
--------------------------------------------------------------------
-- first stage for use in the montgommery multiplier pipeline
-- generates the q signal for all following stages
-- the result is available after 1 clock cycle
--
component first_stage is
generic(
width : integer := 16 -- must be the same as width of the standard stage
);
port(
-- clock input
core_clk : in std_logic;
-- modulus and y operand input (width+1)-bit
my : in std_logic_vector((width) downto 0);
y : in std_logic_vector((width) downto 0);
m : in std_logic_vector((width) downto 0);
-- x operand input (serial input)
xin : in std_logic;
-- q and x operand output (serial output)
xout : out std_logic;
qout : out std_logic;
-- msb input (lsb from next stage, for shift right operation)
a_msb : in std_logic;
-- carry out
cout : out std_logic;
-- control signals
start : in std_logic;
reset : in std_logic;
done : out std_logic;
-- result out
r : out std_logic_vector((width-1) downto 0)
);
end component first_stage;
--------------------------------------------------------------------
-- last_stage
--------------------------------------------------------------------
-- last stage for use in the montgommery multiplier pipeline
-- the result is available after 1 clock cycle
--
component last_stage is
generic(
width : integer := 16 -- must be the same as width of the standard stage
);
port(
-- clock input
core_clk : in std_logic;
-- modulus and y operand input (width(-1))-bit
my : in std_logic_vector((width-1) downto 0);
y : in std_logic_vector((width-2) downto 0);
m : in std_logic_vector((width-2) downto 0);
-- q and x operand input (serial input)
xin : in std_logic;
qin : in std_logic;
-- carry in
cin : in std_logic;
-- control signals
start : in std_logic;
reset : in std_logic;
-- result out
r : out std_logic_vector((width+1) downto 0)
);
end component last_stage;
--------------------------------------------------------------------
-- counter_sync
--------------------------------------------------------------------
-- counter with synchronous count enable. It generates an
297,6 → 426,78
xi : out std_logic
);
end component x_shift_reg;
--------------------------------------------------------------------
-- systolic_pipeline
--------------------------------------------------------------------
-- systolic pipeline implementation of the montgommery multiplier
-- devides the pipeline into 2 parts, so 3 operand widths are supported
--
-- p_sel:
-- 01 = lower part
-- 10 = upper part
-- 11 = full range
component systolic_pipeline is
generic(
n : integer := 1536; -- width of the operands (# bits)
t : integer := 192; -- total number of stages (divider of n) >= 2
tl : integer := 64 -- lower number of stages (best take t = sqrt(n))
);
port(
-- clock input
core_clk : in std_logic;
-- modulus and y opperand input (n)-bit
my : in std_logic_vector((n) downto 0); -- m+y
y : in std_logic_vector((n-1) downto 0);
m : in std_logic_vector((n-1) downto 0);
-- x operand input (serial)
xi : in std_logic;
-- control signals
start : in std_logic; -- start multiplier
reset : in std_logic;
p_sel : in std_logic_vector(1 downto 0); -- select which piece of the multiplier will be used
ready : out std_logic; -- multiplication ready
next_x : out std_logic; -- next x operand bit
-- result out
r : out std_logic_vector((n+1) downto 0)
);
end component systolic_pipeline;
--------------------------------------------------------------------
-- mont_mult_sys_pipeline
--------------------------------------------------------------------
-- Structural description of the montgommery multiply pipeline
-- contains the x operand shift register, my adder, the pipeline and
-- reduction adder. To do a multiplication, the following actions must take place:
--
-- * load in the x operand in the shift register using the xy bus and load_x
-- * place the y operand on the xy bus for the rest of the operation
-- * generate a start pulse of 1 clk cycle long on start
-- * wait for ready signal
-- * result is avaiable on the r bus
--
component mont_mult_sys_pipeline is
generic (
n : integer := 1536; -- width of the operands
t : integer := 96; -- total number of stages
tl : integer := 32 -- lower number of stages
);
port (
-- clock input
core_clk : in std_logic;
-- operand inputs
xy : in std_logic_vector((n-1) downto 0); -- bus for x or y operand
m : in std_logic_vector((n-1) downto 0); -- modulus
-- result output
r : out std_logic_vector((n-1) downto 0); -- result
-- control signals
start : in std_logic;
reset : in std_logic;
p_sel : in std_logic_vector(1 downto 0);
load_x : in std_logic;
ready : out std_logic
);
end component mont_mult_sys_pipeline;
 
--------------------------------------------------------------------
-- mod_sim_exp_core
/trunk/rtl/vhdl/core/first_stage.vhd
0,0 → 1,199
-- deprecated design file because of new pipeline structure --
----------------------------------------------------------------------
---- first_stage ----
---- ----
---- This file is part of the ----
---- Modular Simultaneous Exponentiation Core project ----
---- http://www.opencores.org/cores/mod_sim_exp/ ----
---- ----
---- Description ----
---- first stage for use in the montgommery multiplier ----
---- systolic array pipeline ----
---- ----
---- Dependencies: ----
---- - standard_cell_block ----
---- - d_flip_flop ----
---- - register_n ----
---- - register_1b ----
---- - cell_1b_mux ----
---- ----
---- Authors: ----
---- - Geoffrey Ottoy, DraMCo research group ----
---- - Jonas De Craene, JonasDC@opencores.org ----
---- ----
----------------------------------------------------------------------
---- ----
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
---- ----
---- This source file may be used and distributed without ----
---- restriction provided that this copyright statement is not ----
---- removed from the file and that any derivative work contains ----
---- the original copyright notice and the associated disclaimer. ----
---- ----
---- This source file is free software; you can redistribute it ----
---- and/or modify it under the terms of the GNU Lesser General ----
---- Public License as published by the Free Software Foundation; ----
---- either version 2.1 of the License, or (at your option) any ----
---- later version. ----
---- ----
---- This source is distributed in the hope that it will be ----
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
---- PURPOSE. See the GNU Lesser General Public License for more ----
---- details. ----
---- ----
---- You should have received a copy of the GNU Lesser General ----
---- Public License along with this source; if not, download it ----
---- from http://www.opencores.org/lgpl.shtml ----
---- ----
----------------------------------------------------------------------
 
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;
 
library mod_sim_exp;
use mod_sim_exp.mod_sim_exp_pkg.all;
 
-- first stage for use in the montgommery multiplier pipeline
-- generates the q signal for all following stages
-- the result is available after 1 clock cycle
entity first_stage is
generic(
width : integer := 16 -- must be the same as width of the standard stage
);
port(
-- clock input
core_clk : in std_logic;
-- modulus and y operand input (width+1)-bit
my : in std_logic_vector((width) downto 0);
y : in std_logic_vector((width) downto 0);
m : in std_logic_vector((width) downto 0);
-- x operand input (serial input)
xin : in std_logic;
-- q and x operand output (serial output)
xout : out std_logic;
qout : out std_logic;
-- msb input (lsb from next stage, for shift right operation)
a_msb : in std_logic;
-- carry out
cout : out std_logic;
-- control signals
start : in std_logic;
reset : in std_logic;
done : out std_logic;
-- result out
r : out std_logic_vector((width-1) downto 0)
);
end first_stage;
 
 
architecture Structural of first_stage is
-- output
signal cout_i : std_logic;
signal r_i : std_logic_vector((width-1) downto 0);
signal r_i_reg : std_logic_vector((width-1) downto 0);
signal qout_i : std_logic;
 
-- interconnection
signal first_res : std_logic;
signal c_first_res : std_logic;
signal a : std_logic_vector((width) downto 0);
 
begin
-- map internal signals to outputs
r <= r_i_reg;
-- a is equal to the right shifted version(/2) of r_reg with a_msb as MSB
a <= a_msb & r_i_reg;
 
-- compute first q and carry
qout_i <= a(0) xor (y(0) and xin);
c_first_res <= a(0) and first_res;
first_cell : cell_1b_mux
port map(
my => my(0),
y => y(0),
m => m(0),
x => xin,
q => qout_i,
result => first_res
);
 
-- structure of (width) standard_cell_blocks
cell_block : standard_cell_block
generic map(
width => width
)
port map(
my => my(width downto 1),
y => y(width downto 1),
m => m(width downto 1),
x => xin,
q => qout_i,
a => a(width downto 1),
cin => c_first_res,
cout => cout_i,
r => r_i((width-1) downto 0)
);
 
-- stage done signal
-- 1 cycle after start of stage
done_signal : d_flip_flop
port map(
core_clk => core_clk,
reset => reset,
din => start,
dout => done
);
 
-- output registers
--------------------
-- result register (width)-bit
result_reg : register_n
generic map(
width => width
)
port map(
core_clk => core_clk,
ce => start,
reset => reset,
din => r_i,
dout => r_i_reg
);
 
-- xout register
xout_reg : register_1b
port map(
core_clk => core_clk,
ce => start,
reset => reset,
din => xin,
dout => xout
);
-- qout register
qout_reg : register_1b
port map(
core_clk => core_clk,
ce => start,
reset => reset,
din => qout_i,
dout => qout
);
 
-- carry out register
cout_reg : register_1b
port map(
core_clk => core_clk,
ce => start,
reset => reset,
din => cout_i,
dout => cout
);
 
end Structural;
/trunk/rtl/vhdl/core/systolic_pipeline.vhd
0,0 → 1,368
-- deprecated design file because of new pipeline structure --
----------------------------------------------------------------------
---- systolic_pipeline ----
---- ----
---- This file is part of the ----
---- Modular Simultaneous Exponentiation Core project ----
---- http://www.opencores.org/cores/mod_sim_exp/ ----
---- ----
---- Description ----
---- structural description of a pipelined systolic array ----
---- implementation of a montgomery multiplier. ----
---- ----
---- Dependencies: ----
---- - stepping_logic ----
---- - first_stage ----
---- - standard_stage ----
---- - last_stage ----
---- ----
---- Authors: ----
---- - Geoffrey Ottoy, DraMCo research group ----
---- - Jonas De Craene, JonasDC@opencores.org ----
---- ----
----------------------------------------------------------------------
---- ----
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
---- ----
---- This source file may be used and distributed without ----
---- restriction provided that this copyright statement is not ----
---- removed from the file and that any derivative work contains ----
---- the original copyright notice and the associated disclaimer. ----
---- ----
---- This source file is free software; you can redistribute it ----
---- and/or modify it under the terms of the GNU Lesser General ----
---- Public License as published by the Free Software Foundation; ----
---- either version 2.1 of the License, or (at your option) any ----
---- later version. ----
---- ----
---- This source is distributed in the hope that it will be ----
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
---- PURPOSE. See the GNU Lesser General Public License for more ----
---- details. ----
---- ----
---- You should have received a copy of the GNU Lesser General ----
---- Public License along with this source; if not, download it ----
---- from http://www.opencores.org/lgpl.shtml ----
---- ----
----------------------------------------------------------------------
 
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;
 
library mod_sim_exp;
use mod_sim_exp.mod_sim_exp_pkg.all;
 
-- systolic pipeline implementation of the montgommery multiplier
-- devides the pipeline into 2 parts, so 3 operand widths are supported
--
-- p_sel:
-- 01 = lower part
-- 10 = upper part
-- 11 = full range
entity systolic_pipeline is
generic(
n : integer := 1536; -- width of the operands (# bits)
t : integer := 192; -- total number of stages (divider of n) >= 2
tl : integer := 64 -- lower number of stages (best take t = sqrt(n))
);
port(
-- clock input
core_clk : in std_logic;
-- modulus and y opperand input (n)-bit
my : in std_logic_vector((n) downto 0); -- m+y
y : in std_logic_vector((n-1) downto 0);
m : in std_logic_vector((n-1) downto 0);
-- x operand input (serial)
xi : in std_logic;
-- control signals
start : in std_logic; -- start multiplier
reset : in std_logic;
p_sel : in std_logic_vector(1 downto 0); -- select which piece of the multiplier will be used
ready : out std_logic; -- multiplication ready
next_x : out std_logic; -- next x operand bit
-- result out
r : out std_logic_vector((n+1) downto 0)
);
end systolic_pipeline;
 
 
architecture Structural of systolic_pipeline is
constant s : integer := n/t; -- stage width (# bits)
constant nl : integer := s*tl; -- lower pipeline width (# bits)
constant nh : integer := n - nl; -- higher pipeline width (# bits)
 
-- pipeline selection flags
signal p_full_selected : std_logic; -- full
signal p_low_full_selected : std_logic; -- low or full
signal p_high_selected : std_logic; -- high
signal t_sel : integer range 0 to t; -- width in stages of selected pipeline part
signal n_sel : integer range 0 to n; -- width in bits of selected pipeline part
-- general stage interconnect signals
signal start_stage : std_logic_vector((t-1) downto 0); -- vector for the start bits for the stages
signal done_stage : std_logic_vector((t-2) downto 0); -- vector for the done bits of the stages
signal xin_stage : std_logic_vector((t-1) downto 0); -- vector for the xin bits of the stages
signal qout_stage : std_logic_vector((t-2) downto 0); -- vector for the qout bits of the stages
signal cout_stage : std_logic_vector((t-2) downto 0); -- vector for the cout bits of the stages
-- stage result signals
signal r_tot : std_logic_vector((n+1) downto 0); -- result of the total multiplier
signal r_stage_midstart : std_logic_vector(s-1 downto 0); -- result of the mid-start stage of the multiplier
signal r_stage_midend : std_logic_vector((s+1) downto 0); -- result of the mid-end stage of the multiplier
-- mapped result registers
signal r_i : std_logic_vector((n+1) downto 0);
signal r_i_stage_midstart : std_logic_vector((s*2)-1 downto 0);
signal r_i_stage_midend : std_logic_vector((s*2)-1 downto 0);
 
-- pipeline start signals
signal start_first_stage : std_logic; -- start for full and low pipeline
signal start_higher : std_logic; -- start for higher pipeline
-- midstart stage signals
signal done_stage_midstart : std_logic;
signal xout_stage_midstart : std_logic;
signal qout_stage_midstart : std_logic;
signal cout_stage_midstart : std_logic;
-- tl+1 stage signals
signal xin_stage_tl_1 : std_logic;
signal qin_stage_tl_1 : std_logic;
signal cin_stage_tl_1 : std_logic;
begin
 
-- output mapping
r <= r_i;
 
-- result feedback
r_i((n+1) downto ((tl+1)*s)) <= r_tot((n+1) downto ((tl+1)*s));
r_i(((tl-1)*s-1) downto 0) <= r_tot(((tl-1)*s-1) downto 0);
r_i_stage_midend((s*2)-1 downto s+2) <= (others=>'0');
r_i_stage_midend((s+1) downto 0) <= r_stage_midend;
r_i_stage_midstart((s*2)-1 downto s) <= r_stage_midstart;
r_i_stage_midstart((s-1) downto 0) <= (others=>'0');
with p_sel select
r_i(((tl+1)*s-1) downto ((tl-1)*s)) <= r_i_stage_midend when "01",
r_i_stage_midstart when "10",
r_tot(((tl+1)*s-1) downto ((tl-1)*s)) when others;
 
-- signals from x_selection
next_x <= start_stage(1) or (start_stage(tl+1) and p_high_selected);
xin_stage(0) <= xi;
-- this module controls the pipeline operation
-- width in stages for selected pipeline
with p_sel select
t_sel <= tl when "01", -- lower pipeline part
t-tl when "10", -- higher pipeline part
t when others; -- full pipeline
 
-- width in bits for selected pipeline
with p_sel select
n_sel <= nl-1 when "01", -- lower pipeline part
nh-1 when "10", -- higher pipeline part
n-1 when others; -- full pipeline
with p_sel select
p_low_full_selected <= '0' when "10", -- higher pipeline part
'1' when others; -- full or lower pipeline
with p_sel select
p_high_selected <= '1' when "10", -- higher pipeline part
'0' when others; -- full or lower pipeline
p_full_selected <= p_sel(0) and p_sel(1);
-- stepping control logic to keep track off the multiplication and when it is done
stepping_control : stepping_logic
generic map(
n => n, -- max nr of steps required to complete a multiplication
t => t -- total nr of steps in the pipeline
)
port map(
core_clk => core_clk,
start => start,
reset => reset,
t_sel => t_sel,
n_sel => n_sel,
start_first_stage => start_first_stage,
stepping_done => ready
);
-- start signals for first stage of lower and higher part
start_stage(0) <= start_first_stage and p_low_full_selected;
start_higher <= start_first_stage and p_high_selected;
-- start signals for stage tl and tl+1 (full pipeline operation)
start_stage(tl) <= done_stage(tl-1) and p_full_selected; -- only pass the start signal if full pipeline
start_stage(tl+1) <= done_stage(tl) or done_stage_midstart;
-- nothing special here, previous stages starts the next
start_signals_l: for i in 1 to tl-1 generate
start_stage(i) <= done_stage(i-1);
end generate;
start_signals_h: for i in tl+2 to t-1 generate
start_stage(i) <= done_stage(i-1);
end generate;
-- first stage
-- bits (s downto 0)
stage_0 : first_stage
generic map(
width => s
)
port map(
core_clk => core_clk,
my => my(s downto 0),
y => y(s downto 0),
m => m(s downto 0),
xin => xin_stage(0),
xout => xin_stage(1),
qout => qout_stage(0),
a_msb => r_i(s),
cout => cout_stage(0),
start => start_stage(0),
reset => reset,
done => done_stage(0),
r => r_tot((s-1) downto 0)
);
-- lower pipeline standard stages: stages tl downto 1
-- bits ((tl+1)*s downto s+1)
-- (nl downto s+1)
stages_l : for i in 1 to (tl) generate
standard_stages : standard_stage
generic map(
width => s
)
port map(
core_clk => core_clk,
my => my(((i+1)*s) downto ((s*i)+1)),
y => y(((i+1)*s) downto ((s*i)+1)),
m => m(((i+1)*s) downto ((s*i)+1)),
xin => xin_stage(i),
qin => qout_stage(i-1),
xout => xin_stage(i+1),
qout => qout_stage(i),
a_msb => r_i((i+1)*s),
cin => cout_stage(i-1),
cout => cout_stage(i),
start => start_stage(i),
reset => reset,
done => done_stage(i),
r => r_tot((((i+1)*s)-1) downto (s*i))
);
end generate;
cin_stage_tl_1 <= cout_stage_midstart or cout_stage(tl);
qin_stage_tl_1 <= qout_stage_midstart or qout_stage(tl);
xin_stage_tl_1 <= xout_stage_midstart or xin_stage(tl+1);
stage_tl_1 : standard_stage
generic map(
width => s
)
port map(
core_clk => core_clk,
my => my(((tl+2)*s) downto ((s*(tl+1))+1)),
y => y(((tl+2)*s) downto ((s*(tl+1))+1)),
m => m(((tl+2)*s) downto ((s*(tl+1))+1)),
xin => xin_stage_tl_1,
qin => qin_stage_tl_1,
xout => xin_stage(tl+2),
qout => qout_stage(tl+1),
a_msb => r_i((tl+2)*s),
cin => cin_stage_tl_1,
cout => cout_stage(tl+1),
start => start_stage(tl+1),
reset => reset,
done => done_stage(tl+1),
r => r_tot((((tl+2)*s)-1) downto (s*(tl+1)))
);
 
stages_h : for i in (tl+2) to (t-2) generate
standard_stages : standard_stage
generic map(
width => s
)
port map(
core_clk => core_clk,
my => my(((i+1)*s) downto ((s*i)+1)),
y => y(((i+1)*s) downto ((s*i)+1)),
m => m(((i+1)*s) downto ((s*i)+1)),
xin => xin_stage(i),
qin => qout_stage(i-1),
xout => xin_stage(i+1),
qout => qout_stage(i),
a_msb => r_i((i+1)*s),
cin => cout_stage(i-1),
cout => cout_stage(i),
start => start_stage(i),
reset => reset,
done => done_stage(i),
r => r_tot((((i+1)*s)-1) downto (s*i))
);
end generate;
 
stage_t : last_stage
generic map(
width => s -- must be the same as width of the standard stage
)
port map(
core_clk => core_clk,
my => my(n downto ((n-s)+1)), --width-1
y => y((n-1) downto ((n-s)+1)), --width-2
m => m((n-1) downto ((n-s)+1)), --width-2
xin => xin_stage(t-1),
qin => qout_stage(t-2),
cin => cout_stage(t-2),
start => start_stage(t-1),
reset => reset,
r => r_tot((n+1) downto (n-s)) --width+1
);
 
mid_start : first_stage
generic map(
width => s
)
port map(
core_clk => core_clk,
my => my((tl*s+s) downto tl*s),
y => y((tl*s+s) downto tl*s),
m => m((tl*s+s) downto tl*s),
xin => xin_stage(0),
xout => xout_stage_midstart,
qout => qout_stage_midstart,
a_msb => r_i((tl+1)*s),
cout => cout_stage_midstart,
start => start_higher,
reset => reset,
done => done_stage_midstart,
r => r_stage_midstart
);
 
mid_end : last_stage
generic map(
width => s -- must be the same as width of the standard stage
)
port map(
core_clk => core_clk,
my => my((tl*s) downto ((tl-1)*s)+1), --width-1
y => y(((tl*s)-1) downto ((tl-1)*s)+1), --width-2
m => m(((tl*s)-1) downto ((tl-1)*s)+1), --width-2
xin => xin_stage(tl-1),
qin => qout_stage(tl-2),
cin => cout_stage(tl-2),
start => start_stage(tl-1),
reset => reset,
r => r_stage_midend --width+1
);
 
end Structural;
/trunk/rtl/vhdl/core/mont_mult_sys_pipeline.vhd
0,0 → 1,279
-- deprecated design file because of new pipeline structure --
----------------------------------------------------------------------
---- mont_mult_sys_pipeline ----
---- ----
---- This file is part of the ----
---- Modular Simultaneous Exponentiation Core project ----
---- http://www.opencores.org/cores/mod_sim_exp/ ----
---- ----
---- Description ----
---- n-bit montgomery multiplier with a pipelined systolic ----
---- array ----
---- ----
---- Dependencies: ----
---- - x_shift_reg ----
---- - adder_n ----
---- - d_flip_flop ----
---- - systolic_pipeline ----
---- - cell_1b_adder ----
---- ----
---- Authors: ----
---- - Geoffrey Ottoy, DraMCo research group ----
---- - Jonas De Craene, JonasDC@opencores.org ----
---- ----
----------------------------------------------------------------------
---- ----
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
---- ----
---- This source file may be used and distributed without ----
---- restriction provided that this copyright statement is not ----
---- removed from the file and that any derivative work contains ----
---- the original copyright notice and the associated disclaimer. ----
---- ----
---- This source file is free software; you can redistribute it ----
---- and/or modify it under the terms of the GNU Lesser General ----
---- Public License as published by the Free Software Foundation; ----
---- either version 2.1 of the License, or (at your option) any ----
---- later version. ----
---- ----
---- This source is distributed in the hope that it will be ----
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
---- PURPOSE. See the GNU Lesser General Public License for more ----
---- details. ----
---- ----
---- You should have received a copy of the GNU Lesser General ----
---- Public License along with this source; if not, download it ----
---- from http://www.opencores.org/lgpl.shtml ----
---- ----
----------------------------------------------------------------------
 
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;
 
library mod_sim_exp;
use mod_sim_exp.mod_sim_exp_pkg.all;
 
 
-- Structural description of the montgommery multiply pipeline
-- contains the x operand shift register, my adder, the pipeline and
-- reduction adder. To do a multiplication, the following actions must take place:
--
-- * load in the x operand in the shift register using the xy bus and load_x
-- * place the y operand on the xy bus for the rest of the operation
-- * generate a start pulse of 1 clk cycle long on start
-- * wait for ready signal
-- * result is avaiable on the r bus
--
entity mont_mult_sys_pipeline is
generic (
n : integer := 1536; -- width of the operands
t : integer := 96; -- total number of stages
tl : integer := 32 -- lower number of stages
);
port (
-- clock input
core_clk : in std_logic;
-- operand inputs
xy : in std_logic_vector((n-1) downto 0); -- bus for x or y operand
m : in std_logic_vector((n-1) downto 0); -- modulus
-- result output
r : out std_logic_vector((n-1) downto 0); -- result
-- control signals
start : in std_logic;
reset : in std_logic;
p_sel : in std_logic_vector(1 downto 0);
load_x : in std_logic;
ready : out std_logic
);
end mont_mult_sys_pipeline;
 
architecture Structural of mont_mult_sys_pipeline is
constant stage_width : integer := n/t;
constant bits_l : integer := stage_width * tl;
constant bits_h : integer := n - bits_l;
 
signal my : std_logic_vector(n downto 0);
signal my_h_cin : std_logic;
signal my_l_cout : std_logic;
signal r_pipeline : std_logic_vector(n+1 downto 0);
signal r_red : std_logic_vector(n-1 downto 0);
signal r_i : std_logic_vector(n-1 downto 0);
signal c_red_l : std_logic_vector(2 downto 0);
signal c_red_h : std_logic_vector(2 downto 0);
signal cin_red_h : std_logic;
signal r_sel : std_logic;
signal reset_multiplier : std_logic;
signal start_multiplier : std_logic;
signal m_inv : std_logic_vector(n-1 downto 0);
 
signal next_xi : std_logic;
signal xi : std_logic;
begin
 
-- register to store the x value in
-- outputs the operand in serial using a shift register
x_selection : x_shift_reg
generic map(
n => n,
t => t,
tl => tl
)
port map(
clk => core_clk,
reset => reset,
x_in => xy,
load_x => load_x,
next_x => next_xi,
p_sel => p_sel,
xi => xi
);
 
-- precomputation of my (m+y)
-- lower part of pipeline
my_adder_l : adder_n
generic map(
width => bits_l,
block_width => stage_width
)
port map(
core_clk => core_clk,
a => m((bits_l-1) downto 0),
b => xy((bits_l-1) downto 0),
cin => '0',
cout => my_l_cout,
r => my((bits_l-1) downto 0)
);
--higher part of pipeline
my_adder_h : adder_n
generic map(
width => bits_h,
block_width => stage_width
)
port map(
core_clk => core_clk,
a => m((n-1) downto bits_l),
b => xy((n-1) downto bits_l),
cin => my_h_cin,
cout => my(n),
r => my((n-1) downto bits_l)
);
-- if higher pipeline selected, do not give through carry, but 0
my_h_cin <= '0' when (p_sel(1) and (not p_sel(0)))='1' else my_l_cout;
-- multiplication
-- multiplier is reset every calculation or reset
reset_multiplier <= reset or start;
 
-- start is delayed 1 cycle
delay_1_cycle : d_flip_flop
port map(
core_clk => core_clk,
reset => reset,
din => start,
dout => start_multiplier
);
 
the_multiplier : systolic_pipeline
generic map(
n => n, -- width of the operands (# bits)
t => t, -- number of stages (divider of n) >= 2
tl => tl
)
port map(
core_clk => core_clk,
my => my,
y => xy,
m => m,
xi => xi,
start => start_multiplier,
reset => reset_multiplier,
p_sel => p_sel,
ready => ready,
next_x => next_xi,
r => r_pipeline
);
-- post-computation (reduction)
-- if the result is greater than the modulus, a final reduction with m is needed
-- this is done by using an adder and the 2s complement of m
m_inv <= not(m);
-- calculate r_l - m_l
reduction_adder_l : adder_n
generic map(
width => bits_l,
block_width => stage_width
)
port map(
core_clk => core_clk,
a => m_inv((bits_l-1) downto 0),
b => r_pipeline((bits_l-1) downto 0),
cin => '1', -- +1 for 2s complement
cout => c_red_l(0),
r => r_red((bits_l-1) downto 0)
);
-- pipeline result may be greater, check following bits
reduction_adder_l_a : cell_1b_adder
port map(
a => '1', -- for 2s complement of m
b => r_pipeline(bits_l),
cin => c_red_l(0),
cout => c_red_l(1)
--r =>
);
 
reduction_adder_l_b : cell_1b_adder
port map(
a => '1', -- for 2s complement of m
b => r_pipeline(bits_l+1),
cin => c_red_l(1),
cout => c_red_l(2)
-- r =>
);
 
-- pass cout from lower stages if full pipeline selected, else '1' (+1 for 2s complement)
cin_red_h <= c_red_l(0) when p_sel(0) = '1' else '1';
reduction_adder_h : adder_n
generic map(
width => bits_h,
block_width => stage_width
)
port map(
core_clk => core_clk,
a => m_inv((n-1) downto bits_l),
b => r_pipeline((n-1) downto bits_l),
cin => cin_red_h,
cout => c_red_h(0),
r => r_red((n-1) downto bits_l)
);
-- pipeline result may be greater, check following bits
reduction_adder_h_a : cell_1b_adder
port map(
a => '1', -- for 2s complement of m
b => r_pipeline(n),
cin => c_red_h(0),
cout => c_red_h(1)
);
 
reduction_adder_h_b : cell_1b_adder
port map(
a => '1', -- for 2s complement of m
b => r_pipeline(n+1),
cin => c_red_h(1),
cout => c_red_h(2)
);
-- select the correct result
r_sel <= (c_red_h(2) and p_sel(1)) or (c_red_l(2) and (p_sel(0) and (not p_sel(1))));
r_i <= r_red when r_sel = '1' else r_pipeline((n-1) downto 0);
-- output
r <= r_i;
end Structural;
/trunk/rtl/vhdl/core/standard_stage.vhd
0,0 → 1,182
-- deprecated design file because of new pipeline structure --
----------------------------------------------------------------------
---- standard_stage ----
---- ----
---- This file is part of the ----
---- Modular Simultaneous Exponentiation Core project ----
---- http://www.opencores.org/cores/mod_sim_exp/ ----
---- ----
---- Description ----
---- standard stage for use in the montgommery multiplier ----
---- systolic array pipeline ----
---- ----
---- Dependencies: ----
---- - standard_cell_block ----
---- - d_flip_flop ----
---- - register_n ----
---- - register_1b ----
---- ----
---- Authors: ----
---- - Geoffrey Ottoy, DraMCo research group ----
---- - Jonas De Craene, JonasDC@opencores.org ----
---- ----
----------------------------------------------------------------------
---- ----
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
---- ----
---- This source file may be used and distributed without ----
---- restriction provided that this copyright statement is not ----
---- removed from the file and that any derivative work contains ----
---- the original copyright notice and the associated disclaimer. ----
---- ----
---- This source file is free software; you can redistribute it ----
---- and/or modify it under the terms of the GNU Lesser General ----
---- Public License as published by the Free Software Foundation; ----
---- either version 2.1 of the License, or (at your option) any ----
---- later version. ----
---- ----
---- This source is distributed in the hope that it will be ----
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
---- PURPOSE. See the GNU Lesser General Public License for more ----
---- details. ----
---- ----
---- You should have received a copy of the GNU Lesser General ----
---- Public License along with this source; if not, download it ----
---- from http://www.opencores.org/lgpl.shtml ----
---- ----
----------------------------------------------------------------------
 
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;
 
library mod_sim_exp;
use mod_sim_exp.mod_sim_exp_pkg.all;
 
-- standard stage for use in the montgommery multiplier pipeline
-- the result is available after 1 clock cycle
entity standard_stage is
generic(
width : integer := 32
);
port(
-- clock input
core_clk : in std_logic;
-- modulus and y operand input (width)-bit
my : in std_logic_vector((width-1) downto 0);
y : in std_logic_vector((width-1) downto 0);
m : in std_logic_vector((width-1) downto 0);
-- q and x operand input (serial input)
xin : in std_logic;
qin : in std_logic;
-- q and x operand output (serial output)
xout : out std_logic;
qout : out std_logic;
-- msb input (lsb from next stage, for shift right operation)
a_msb : in std_logic;
-- carry out(clocked) and in
cin : in std_logic;
cout : out std_logic;
-- control singals
start : in std_logic;
reset : in std_logic;
done : out std_logic;
-- result out
r : out std_logic_vector((width-1) downto 0)
);
end standard_stage;
 
 
architecture Structural of standard_stage is
-- output
signal cout_i : std_logic;
signal r_i : std_logic_vector((width-1) downto 0);
signal r_i_reg : std_logic_vector((width-1) downto 0);
 
-- interconnect
signal a : std_logic_vector((width-1) downto 0);
 
begin
 
-- map internal signals to outputs
r <= r_i_reg;
-- a is equal to the right shifted version(/2) of r_reg with a_msb as MSB
a <= a_msb & r_i_reg((width-1) downto 1);
-- structure of (width) standard_cell_blocks
cell_block : standard_cell_block
generic map(
width => width
)
port map(
my => my,
y => y,
m => m,
x => xin,
q => qin,
a => a,
cin => cin,
cout => cout_i,
r => r_i
);
-- stage done signal
-- 1 cycle after start of stage
done_signal : d_flip_flop
port map(
core_clk => core_clk,
reset => reset,
din => start,
dout => done
);
 
-- output registers
--------------------
-- result register (width)-bit
result_reg : register_n
generic map(
width => width
)
port map(
core_clk => core_clk,
ce => start,
reset => reset,
din => r_i,
dout => r_i_reg
);
-- xout register
xout_reg : register_1b
port map(
core_clk => core_clk,
ce => start,
reset => reset,
din => xin,
dout => xout
);
-- qout register
qout_reg : register_1b
port map(
core_clk => core_clk,
ce => start,
reset => reset,
din => qin,
dout => qout
);
 
-- carry out register
cout_reg : register_1b
port map(
core_clk => core_clk,
ce => start,
reset => reset,
din => cout_i,
dout => cout
);
end Structural;
/trunk/rtl/vhdl/core/last_stage.vhd
0,0 → 1,154
-- deprecated design file because of new pipeline structure --
----------------------------------------------------------------------
---- last_stage ----
---- ----
---- This file is part of the ----
---- Modular Simultaneous Exponentiation Core project ----
---- http://www.opencores.org/cores/mod_sim_exp/ ----
---- ----
---- Description ----
---- last stage for use in the montgommery multiplier ----
---- systolic array pipeline ----
---- ----
---- Dependencies: ----
---- - standard_cell_block ----
---- - register_n ----
---- - cell_1b ----
---- ----
---- Authors: ----
---- - Geoffrey Ottoy, DraMCo research group ----
---- - Jonas De Craene, JonasDC@opencores.org ----
---- ----
----------------------------------------------------------------------
---- ----
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
---- ----
---- This source file may be used and distributed without ----
---- restriction provided that this copyright statement is not ----
---- removed from the file and that any derivative work contains ----
---- the original copyright notice and the associated disclaimer. ----
---- ----
---- This source file is free software; you can redistribute it ----
---- and/or modify it under the terms of the GNU Lesser General ----
---- Public License as published by the Free Software Foundation; ----
---- either version 2.1 of the License, or (at your option) any ----
---- later version. ----
---- ----
---- This source is distributed in the hope that it will be ----
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
---- PURPOSE. See the GNU Lesser General Public License for more ----
---- details. ----
---- ----
---- You should have received a copy of the GNU Lesser General ----
---- Public License along with this source; if not, download it ----
---- from http://www.opencores.org/lgpl.shtml ----
---- ----
----------------------------------------------------------------------
 
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;
 
library mod_sim_exp;
use mod_sim_exp.mod_sim_exp_pkg.all;
 
-- last stage for use in the montgommery multiplier pipeline
-- the result is available after 1 clock cycle
entity last_stage is
generic(
width : integer := 16 -- must be the same as width of the standard stage
);
port(
-- clock input
core_clk : in std_logic;
-- modulus and y operand input (width(-1))-bit
my : in std_logic_vector((width-1) downto 0);
y : in std_logic_vector((width-2) downto 0);
m : in std_logic_vector((width-2) downto 0);
-- q and x operand input (serial input)
xin : in std_logic;
qin : in std_logic;
-- carry in
cin : in std_logic;
-- control signals
start : in std_logic;
reset : in std_logic;
-- result out
r : out std_logic_vector((width+1) downto 0)
);
end last_stage;
 
 
architecture Structural of last_stage is
-- input
signal my_i : std_logic_vector(width downto 0);
signal m_i : std_logic_vector(width downto 0);
signal y_i : std_logic_vector(width downto 0);
-- output
signal r_i : std_logic_vector((width+1) downto 0);
signal r_i_reg : std_logic_vector((width+1) downto 0);
 
-- interconnection
signal carry : std_logic;
signal a : std_logic_vector((width) downto 0);
begin
-- map internal signals to outputs
r <= r_i_reg;
-- map inputs to internal signals
my_i <= '0' & my;
m_i <= "00" & m;
y_i <= "00" & y;
 
-- a is equal to the right shifted version(/2) of r_reg
a <= r_i_reg((width+1) downto 1);
-- structure of (width) standard_cell_blocks
cell_block : standard_cell_block
generic map(
width => width
)
port map(
my => my_i(width-1 downto 0),
y => y_i(width-1 downto 0),
m => m_i(width-1 downto 0),
x => xin,
q => qin,
a => a((width-1) downto 0),
cin => cin,
cout => carry,
r => r_i((width-1) downto 0)
);
 
-- last cell of the pipeline
last_cell : cell_1b
port map(
my => my_i(width),
y => y_i(width),
m => m_i(width),
x => xin,
q => qin,
a => a(width),
cin => carry,
cout => r_i(width+1),
r => r_i(width)
);
 
-- output register (width+2)-bit
result_reg : register_n
generic map(
width => (width+2)
)
port map(
core_clk => core_clk,
ce => start,
reset => reset,
din => r_i,
dout => r_i_reg
);
end Structural;
/trunk/rtl/vhdl/core/adder_n.vhd
0,0 → 1,111
-- deprecated design file because of new pipeline structure --
----------------------------------------------------------------------
---- adder_n ----
---- ----
---- This file is part of the ----
---- Modular Simultaneous Exponentiation Core project ----
---- http://www.opencores.org/cores/mod_sim_exp/ ----
---- ----
---- Description ----
---- This file contains the implementation of a n-bit adder ----
---- using adder_blocks, divides the adder in stages ----
---- used for the montgommery multiplier pre- and post- ----
---- computation adder ----
---- ----
---- Dependencies: ----
---- - adder_block ----
---- ----
---- Author(s): ----
---- - Geoffrey Ottoy, DraMCo research group ----
---- - Jonas De Craene, JonasDC@opencores.org ----
---- ----
----------------------------------------------------------------------
---- ----
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
---- ----
---- This source file may be used and distributed without ----
---- restriction provided that this copyright statement is not ----
---- removed from the file and that any derivative work contains ----
---- the original copyright notice and the associated disclaimer. ----
---- ----
---- This source file is free software; you can redistribute it ----
---- and/or modify it under the terms of the GNU Lesser General ----
---- Public License as published by the Free Software Foundation; ----
---- either version 2.1 of the License, or (at your option) any ----
---- later version. ----
---- ----
---- This source is distributed in the hope that it will be ----
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
---- PURPOSE. See the GNU Lesser General Public License for more ----
---- details. ----
---- ----
---- You should have received a copy of the GNU Lesser General ----
---- Public License along with this source; if not, download it ----
---- from http://www.opencores.org/lgpl.shtml ----
---- ----
----------------------------------------------------------------------
 
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;
 
library mod_sim_exp;
use mod_sim_exp.mod_sim_exp_pkg.all;
 
-- n-bit adder using adder blocks. works in stages, to prevent large
-- carry propagation
-- Result avaiable after (width/block_width) clock cycles
entity adder_n is
generic (
width : integer := 1536; -- adder operands width
block_width : integer := 8 -- adder blocks size
);
port (
-- clock input
core_clk : in std_logic;
-- adder input operands (width)-bit
a : in std_logic_vector((width-1) downto 0);
b : in std_logic_vector((width-1) downto 0);
-- carry in, out
cin : in std_logic;
cout : out std_logic;
-- adder output result (width)-bit
r : out std_logic_vector((width-1) downto 0)
);
end adder_n;
 
 
architecture Structural of adder_n is
constant nr_of_blocks : integer := width/block_width; -- number of blocks/stages in the adder
signal carry : std_logic_vector(nr_of_blocks downto 0); -- vector for the carry bits
begin
-- report failure if width is not dividable by block_width
assert (width mod block_width)=0
report "adder_n: width is not divisible by block_width!!" severity failure;
-- carry in
carry(0) <= cin;
 
-- structure of (nr_of_blocks) adder_blocks
adder_block_chain : for i in 0 to (nr_of_blocks-1) generate
adder_blocks : adder_block
generic map(
width => block_width
)
port map(
core_clk => core_clk,
a => a((((i+1)*block_width)-1) downto (i*block_width)),
b => b((((i+1)*block_width)-1) downto (i*block_width)),
cin => carry(i),
cout => carry(i+1),
r => r((((i+1)*block_width)-1) downto (i*block_width))
);
end generate;
 
-- carry out
cout <= carry(nr_of_blocks);
 
end Structural;
/trunk/sim/Makefile
9,6 → 9,7
##
CORE_SRC =$(HDL_DIR)/core/mod_sim_exp_pkg.vhd \
$(HDL_DIR)/core/adder_block.vhd \
$(HDL_DIR)/core/adder_n.vhd \
$(HDL_DIR)/core/autorun_cntrl.vhd \
$(HDL_DIR)/core/cell_1b_adder.vhd \
$(HDL_DIR)/core/cell_1b_mux.vhd \
16,8 → 17,11
$(HDL_DIR)/core/counter_sync.vhd \
$(HDL_DIR)/core/d_flip_flop.vhd \
$(HDL_DIR)/core/fifo_primitive.vhd \
$(HDL_DIR)/core/first_stage.vhd \
$(HDL_DIR)/core/last_stage.vhd \
$(HDL_DIR)/core/modulus_ram.vhd \
$(HDL_DIR)/core/mont_ctrl.vhd \
$(HDL_DIR)/core/mont_mult_sys_pipeline.vhd \
$(HDL_DIR)/core/mod_sim_exp_core.vhd \
$(HDL_DIR)/core/operand_dp.vhd \
$(HDL_DIR)/core/operand_mem.vhd \
26,7 → 30,9
$(HDL_DIR)/core/register_1b.vhd \
$(HDL_DIR)/core/register_n.vhd \
$(HDL_DIR)/core/standard_cell_block.vhd \
$(HDL_DIR)/core/standard_stage.vhd \
$(HDL_DIR)/core/stepping_logic.vhd \
$(HDL_DIR)/core/systolic_pipeline.vhd \
$(HDL_DIR)/core/x_shift_reg.vhd \
$(HDL_DIR)/core/sys_stage.vhd \
$(HDL_DIR)/core/sys_last_cell_logic.vhd \

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.