URL
https://opencores.org/ocsvn/mod_sim_exp/mod_sim_exp/trunk
Subversion Repositories mod_sim_exp
Compare Revisions
- This comparison shows the changes necessary to convert path
/mod_sim_exp/trunk/rtl/vhdl
- from Rev 24 to Rev 25
- ↔ Reverse comparison
Rev 24 → Rev 25
/core/sys_stage.vhd
0,0 → 1,226
---------------------------------------------------------------------- |
---- sys_stage ---- |
---- ---- |
---- This file is part of the ---- |
---- Modular Simultaneous Exponentiation Core project ---- |
---- http://www.opencores.org/cores/mod_sim_exp/ ---- |
---- ---- |
---- Description ---- |
---- stage for use in the montgommery multiplier pipelined ---- |
---- systolic array ---- |
---- ---- |
---- Dependencies: ---- |
---- - adder_block ---- |
---- - standard_cell_block ---- |
---- - d_flip_flop ---- |
---- - register_n ---- |
---- - register_1b ---- |
---- ---- |
---- Authors: ---- |
---- - Geoffrey Ottoy, DraMCo research group ---- |
---- - Jonas De Craene, JonasDC@opencores.org ---- |
---- ---- |
---------------------------------------------------------------------- |
---- ---- |
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ---- |
---- ---- |
---- This source file may be used and distributed without ---- |
---- restriction provided that this copyright statement is not ---- |
---- removed from the file and that any derivative work contains ---- |
---- the original copyright notice and the associated disclaimer. ---- |
---- ---- |
---- This source file is free software; you can redistribute it ---- |
---- and/or modify it under the terms of the GNU Lesser General ---- |
---- Public License as published by the Free Software Foundation; ---- |
---- either version 2.1 of the License, or (at your option) any ---- |
---- later version. ---- |
---- ---- |
---- This source is distributed in the hope that it will be ---- |
---- useful, but WITHOUT ANY WARRANTY; without even the implied ---- |
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ---- |
---- PURPOSE. See the GNU Lesser General Public License for more ---- |
---- details. ---- |
---- ---- |
---- You should have received a copy of the GNU Lesser General ---- |
---- Public License along with this source; if not, download it ---- |
---- from http://www.opencores.org/lgpl.shtml ---- |
---- ---- |
---------------------------------------------------------------------- |
|
library ieee; |
use ieee.std_logic_1164.all; |
use ieee.std_logic_unsigned.all; |
|
library mod_sim_exp; |
use mod_sim_exp.mod_sim_exp_pkg.all; |
|
entity sys_stage is |
generic( |
width : integer := 32 -- width of the stage |
); |
port( |
-- clock input |
core_clk : in std_logic; |
-- modulus and y operand input (width)-bit |
y : in std_logic_vector((width-1) downto 0); |
m : in std_logic_vector((width) downto 0); |
my_cin : in std_logic; |
my_cout : out std_logic; |
-- q and x operand input (serial input) |
xin : in std_logic; |
qin : in std_logic; |
-- q and x operand output (serial output) |
xout : out std_logic; |
qout : out std_logic; |
-- msb input (lsb from next stage, for shift right operation) |
a_msb : in std_logic; |
a_0 : out std_logic; |
-- carry out(clocked) and in |
cin : in std_logic; |
cout : out std_logic; |
-- reduction adder carry's |
red_cin : in std_logic; |
red_cout : out std_logic; |
-- control singals |
start : in std_logic; |
reset : in std_logic; |
done : out std_logic; |
-- result out |
r_sel : in std_logic; -- result selection: 0 -> pipeline result, 1 -> reducted result |
r : out std_logic_vector((width-1) downto 0) |
); |
end sys_stage; |
|
architecture Structural of sys_stage is |
signal my : std_logic_vector((width-1) downto 0); |
signal m_inv : std_logic_vector((width-1) downto 0); |
signal a : std_logic_vector((width-1) downto 0); |
signal cell_result : std_logic_vector((width-1) downto 0); |
signal cell_result_reg : std_logic_vector((width-1) downto 0); |
signal red_r : std_logic_vector((width-1) downto 0); |
|
signal cout_i : std_logic; |
|
begin |
|
-- my adder |
------------ |
my_adder : adder_block |
generic map ( |
width => width |
) |
port map( |
core_clk => core_clk, |
a => m(width downto 1), |
b => y, |
cin => my_cin, |
cout => my_cout, |
r => my |
); |
|
|
-- systolic pipeline cells |
--------------------------- |
a <= a_msb & cell_result_reg((width-1) downto 1); |
a_0 <= cell_result_reg(0); |
sys_cells : standard_cell_block |
generic map ( |
width => width |
) |
port map ( |
-- modulus and y operand input (width)-bit |
my => my, |
y => y, |
m => m(width downto 1), |
-- q and x operand input (serial input) |
x => xin, |
q => qin, |
-- previous result in (width)-bit |
a => a, |
-- carry in and out |
cin => cin, |
cout => cout_i, |
-- result out (width)-bit |
r => cell_result |
); |
|
-- cell result register (width)-bit |
result_reg : register_n |
generic map( |
width => width |
) |
port map( |
core_clk => core_clk, |
ce => start, |
reset => reset, |
din => cell_result, |
dout => cell_result_reg |
); |
|
|
-- result reduction |
-------------------- |
m_inv <= not(m(width-1 downto 0)); |
|
reduction_adder : adder_block |
generic map ( |
width => width |
) |
port map( |
core_clk => core_clk, |
a => m_inv, |
b => cell_result_reg, |
cin => red_cin, |
cout => red_cout, |
r => red_r |
); |
|
with r_sel select |
r <= cell_result_reg when '0', |
red_r when others; |
|
|
-- stage clocked outputs |
------------------------- |
-- stage done signal |
-- 1 cycle after start of stage |
done_signal : d_flip_flop |
port map( |
core_clk => core_clk, |
reset => reset, |
din => start, |
dout => done |
); |
|
-- xout register |
xout_reg : register_1b |
port map( |
core_clk => core_clk, |
ce => start, |
reset => reset, |
din => xin, |
dout => xout |
); |
|
-- qout register |
qout_reg : register_1b |
port map( |
core_clk => core_clk, |
ce => start, |
reset => reset, |
din => qin, |
dout => qout |
); |
|
-- carry out register |
cout_reg : register_1b |
port map( |
core_clk => core_clk, |
ce => start, |
reset => reset, |
din => cout_i, |
dout => cout |
); |
|
end Structural; |
|
/core/sys_pipeline.vhd
0,0 → 1,224
---------------------------------------------------------------------- |
---- sys_pipeline ---- |
---- ---- |
---- This file is part of the ---- |
---- Modular Simultaneous Exponentiation Core project ---- |
---- http://www.opencores.org/cores/mod_sim_exp/ ---- |
---- ---- |
---- Description ---- |
---- the pipelined systolic array for a montgommery multiplier ---- |
---- ---- |
---- Dependencies: ---- |
---- - sys_stage ---- |
---- - register_n ---- |
---- - d_flip_flop ---- |
---- - cell_1b_adder ---- |
---- - cell_1b_mux ---- |
---- ---- |
---- Authors: ---- |
---- - Geoffrey Ottoy, DraMCo research group ---- |
---- - Jonas De Craene, JonasDC@opencores.org ---- |
---- ---- |
---------------------------------------------------------------------- |
---- ---- |
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ---- |
---- ---- |
---- This source file may be used and distributed without ---- |
---- restriction provided that this copyright statement is not ---- |
---- removed from the file and that any derivative work contains ---- |
---- the original copyright notice and the associated disclaimer. ---- |
---- ---- |
---- This source file is free software; you can redistribute it ---- |
---- and/or modify it under the terms of the GNU Lesser General ---- |
---- Public License as published by the Free Software Foundation; ---- |
---- either version 2.1 of the License, or (at your option) any ---- |
---- later version. ---- |
---- ---- |
---- This source is distributed in the hope that it will be ---- |
---- useful, but WITHOUT ANY WARRANTY; without even the implied ---- |
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ---- |
---- PURPOSE. See the GNU Lesser General Public License for more ---- |
---- details. ---- |
---- ---- |
---- You should have received a copy of the GNU Lesser General ---- |
---- Public License along with this source; if not, download it ---- |
---- from http://www.opencores.org/lgpl.shtml ---- |
---- ---- |
---------------------------------------------------------------------- |
|
library ieee; |
use ieee.std_logic_1164.all; |
use ieee.std_logic_unsigned.all; |
|
library mod_sim_exp; |
use mod_sim_exp.mod_sim_exp_pkg.all; |
|
-- the pipelined systolic array for a montgommery multiplier |
-- contains a structural description of the pipeline using the systolic stages |
entity sys_pipeline is |
generic( |
n : integer := 1536; -- width of the operands (# bits) |
t : integer := 192; -- total number of stages (divider of n) >= 2 |
tl : integer := 64 -- lower number of stages (best take t = sqrt(n)) |
); |
port( |
-- clock input |
core_clk : in std_logic; |
-- modulus and y opperand input (n)-bit |
y : in std_logic_vector((n-1) downto 0); |
m : in std_logic_vector((n-1) downto 0); |
-- x operand input (serial) |
xi : in std_logic; |
next_x : out std_logic; -- next x operand bit |
-- control signals |
start : in std_logic; -- start multiplier |
reset : in std_logic; |
p_sel : in std_logic_vector(1 downto 0); -- select which piece of the pipeline will be used |
-- result out |
r : out std_logic_vector((n-1) downto 0) |
); |
end sys_pipeline; |
|
architecture Structural of sys_pipeline is |
constant s : integer := n/t; |
|
|
signal m_i : std_logic_vector(n downto 0); |
signal y_i : std_logic_vector(n downto 0); |
|
-- systolic stages signals |
signal my_cin_stage : std_logic_vector((t-1) downto 0); |
signal my_cout_stage : std_logic_vector((t-1) downto 0); |
signal xin_stage : std_logic_vector((t-1) downto 0); |
signal qin_stage : std_logic_vector((t-1) downto 0); |
signal xout_stage : std_logic_vector((t-1) downto 0); |
signal qout_stage : std_logic_vector((t-1) downto 0); |
signal a_msb_stage : std_logic_vector((t-1) downto 0); |
signal a_0_stage : std_logic_vector((t-1) downto 0); |
signal cin_stage : std_logic_vector((t-1) downto 0); |
signal cout_stage : std_logic_vector((t-1) downto 0); |
signal red_cin_stage : std_logic_vector((t-1) downto 0); |
signal red_cout_stage : std_logic_vector((t-1) downto 0); |
signal start_stage : std_logic_vector((t-1) downto 0); |
signal done_stage : std_logic_vector((t-1) downto 0); |
signal r_sel : std_logic; |
|
-- first cell signals |
signal my0_mux_result : std_logic; |
signal my0 : std_logic; |
|
-- last cell signals |
signal a_high : std_logic_vector(1 downto 0); |
signal a_high_reg : std_logic_vector(1 downto 0); |
signal red_cout_end : std_logic_vector(1 downto 0); |
|
|
begin |
|
m_i <= '0' & m; |
y_i <= '0' & y; |
|
-- generate the stages for the full pipeline |
pipeline_stages : for i in 0 to (t-1) generate |
stage : sys_stage |
generic map( |
width => s |
) |
port map( |
core_clk => core_clk, |
y => y_i((i+1)*s downto (i*s)+1), |
m => m_i((i+1)*s downto (i*s)), |
my_cin => my_cin_stage(i), |
my_cout => my_cout_stage(i), |
xin => xin_stage(i), |
qin => qin_stage(i), |
xout => xout_stage(i), |
qout => qout_stage(i), |
a_0 => a_0_stage(i), |
a_msb => a_msb_stage(i), |
cin => cin_stage(i), |
cout => cout_stage(i), |
red_cin => red_cin_stage(i), |
red_cout => red_cout_stage(i), |
start => start_stage(i), |
reset => reset, |
done => done_stage(i), |
r_sel => r_sel, |
r => r(((i+1)*s)-1 downto (i*s)) |
); |
end generate; |
|
-- link stages to eachother |
stage_connect : for i in 1 to (t-1) generate |
my_cin_stage(i) <= my_cout_stage(i-1); |
cin_stage(i) <= cout_stage(i-1); |
xin_stage(i) <= xout_stage(i-1); |
qin_stage(i) <= qout_stage(i-1); |
red_cin_stage(i) <= red_cout_stage(i-1); |
start_stage(i) <= done_stage(i-1); |
a_msb_stage(i-1) <= a_0_stage(i); |
end generate; |
|
-- first cell logic |
-------------------- |
my0 <= m_i(0) xor y_i(0); -- m0 + y0 |
-- stage 0 connections |
my_cin_stage(0) <= m_i(0) and y_i(0); -- m0 + y0 carry |
xin_stage(0) <= xi; |
qin_stage(0) <= (xi and y_i(0)) xor a_0_stage(0); |
cin_stage(0) <= my0_mux_result and a_0_stage(0); |
red_cin_stage(0) <= '1'; -- add 1 for 2s complement |
start_stage(0) <= start; |
|
my0_mux : cell_1b_mux |
port map( |
my => my0, |
m => m_i(0), |
y => y_i(0), |
x => xin_stage(0), |
q => qin_stage(0), |
result => my0_mux_result |
); |
|
next_x <= done_stage(0); |
|
-- last cell logic |
------------------- |
-- half adder: cout_stage(t-1) + a_high_reg(1) |
a_high(0) <= cout_stage(t-1) xor a_high_reg(1); --result |
a_high(1) <= cout_stage(t-1) and a_high_reg(1); --cout |
|
a_msb_stage(t-1) <= a_high_reg(0); |
|
last_reg : register_n |
generic map( |
width => 2 |
) |
port map( |
core_clk => core_clk, |
ce => done_stage(t-1), |
reset => reset, |
din => a_high, |
dout => a_high_reg |
); |
|
-- reduction finishing last 2 bits |
reduction_adder_a : cell_1b_adder |
port map( |
a => '1', -- for 2s complement of m |
b => a_high_reg(0), |
cin => red_cout_stage(t-1), |
cout => red_cout_end(0) |
); |
|
reduction_adder_b : cell_1b_adder |
port map( |
a => '1', -- for 2s complement of m |
b => a_high_reg(1), |
cin => red_cout_end(0), |
cout => red_cout_end(1) |
); |
|
r_sel <= red_cout_end(1); |
|
end Structural; |
/core/mod_sim_exp_pkg.vhd
655,4 → 655,96
); |
end component operands_sp; |
|
|
component sys_stage is |
generic( |
width : integer := 32 -- width of the stage |
); |
port( |
-- clock input |
core_clk : in std_logic; |
-- modulus and y operand input (width)-bit |
y : in std_logic_vector((width-1) downto 0); |
m : in std_logic_vector((width) downto 0); |
my_cin : in std_logic; |
my_cout : out std_logic; |
-- q and x operand input (serial input) |
xin : in std_logic; |
qin : in std_logic; |
-- q and x operand output (serial output) |
xout : out std_logic; |
qout : out std_logic; |
-- msb input (lsb from next stage, for shift right operation) |
a_msb : in std_logic; |
a_0 : out std_logic; |
-- carry out(clocked) and in |
cin : in std_logic; |
cout : out std_logic; |
-- reduction adder carry's |
red_cin : in std_logic; |
red_cout : out std_logic; |
-- control singals |
start : in std_logic; |
reset : in std_logic; |
done : out std_logic; |
-- result out |
r_sel : in std_logic; -- result selection: 0 -> pipeline result, 1 -> reducted result |
r : out std_logic_vector((width-1) downto 0) |
); |
end component sys_stage; |
|
|
-------------------------------------------------------------------- |
-- sys_pipeline |
-------------------------------------------------------------------- |
-- the pipelined systolic array for a montgommery multiplier |
-- contains a structural description of the pipeline using the systolic stages |
-- |
component sys_pipeline is |
generic( |
n : integer := 1536; -- width of the operands (# bits) |
t : integer := 192; -- total number of stages (divider of n) >= 2 |
tl : integer := 64 -- lower number of stages (best take t = sqrt(n)) |
); |
port( |
-- clock input |
core_clk : in std_logic; |
-- modulus and y opperand input (n)-bit |
y : in std_logic_vector((n-1) downto 0); |
m : in std_logic_vector((n-1) downto 0); |
-- x operand input (serial) |
xi : in std_logic; |
next_x : out std_logic; -- next x operand bit |
-- control signals |
start : in std_logic; -- start multiplier |
reset : in std_logic; |
p_sel : in std_logic_vector(1 downto 0); -- select which piece of the pipeline will be used |
-- result out |
r : out std_logic_vector((n-1) downto 0) |
); |
end component sys_pipeline; |
|
component mont_multiplier is |
generic ( |
n : integer := 1536; -- width of the operands |
nr_stages : integer := 96; -- total number of stages |
stages_low : integer := 32 -- lower number of stages |
); |
port ( |
-- clock input |
core_clk : in std_logic; |
-- operand inputs |
xy : in std_logic_vector((n-1) downto 0); -- bus for x or y operand |
m : in std_logic_vector((n-1) downto 0); -- modulus |
-- result output |
r : out std_logic_vector((n-1) downto 0); -- result |
-- control signals |
start : in std_logic; |
reset : in std_logic; |
p_sel : in std_logic_vector(1 downto 0); |
load_x : in std_logic; |
ready : out std_logic |
); |
end component mont_multiplier; |
|
end package mod_sim_exp_pkg; |
/core/mont_multiplier.vhd
0,0 → 1,167
---------------------------------------------------------------------- |
---- mont_multiplier ---- |
---- ---- |
---- This file is part of the ---- |
---- Modular Simultaneous Exponentiation Core project ---- |
---- http://www.opencores.org/cores/mod_sim_exp/ ---- |
---- ---- |
---- Description ---- |
---- n-bit montgomery multiplier with a pipelined systolic ---- |
---- array ---- |
---- ---- |
---- Dependencies: ---- |
---- - x_shift_reg ---- |
---- - adder_n ---- |
---- - d_flip_flop ---- |
---- - sys_pipeline ---- |
---- - cell_1b_adder ---- |
---- ---- |
---- Authors: ---- |
---- - Geoffrey Ottoy, DraMCo research group ---- |
---- - Jonas De Craene, JonasDC@opencores.org ---- |
---- ---- |
---------------------------------------------------------------------- |
---- ---- |
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ---- |
---- ---- |
---- This source file may be used and distributed without ---- |
---- restriction provided that this copyright statement is not ---- |
---- removed from the file and that any derivative work contains ---- |
---- the original copyright notice and the associated disclaimer. ---- |
---- ---- |
---- This source file is free software; you can redistribute it ---- |
---- and/or modify it under the terms of the GNU Lesser General ---- |
---- Public License as published by the Free Software Foundation; ---- |
---- either version 2.1 of the License, or (at your option) any ---- |
---- later version. ---- |
---- ---- |
---- This source is distributed in the hope that it will be ---- |
---- useful, but WITHOUT ANY WARRANTY; without even the implied ---- |
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ---- |
---- PURPOSE. See the GNU Lesser General Public License for more ---- |
---- details. ---- |
---- ---- |
---- You should have received a copy of the GNU Lesser General ---- |
---- Public License along with this source; if not, download it ---- |
---- from http://www.opencores.org/lgpl.shtml ---- |
---- ---- |
---------------------------------------------------------------------- |
library ieee; |
use ieee.std_logic_1164.all; |
use ieee.std_logic_unsigned.all; |
|
library mod_sim_exp; |
use mod_sim_exp.mod_sim_exp_pkg.all; |
|
-- Structural description of the montgommery multiply pipeline |
-- contains the x operand shift register, my adder, the pipeline and |
-- reduction adder. To do a multiplication, the following actions must take place: |
-- |
-- * load in the x operand in the shift register using the xy bus and load_x |
-- * place the y operand on the xy bus for the rest of the operation |
-- * generate a start pulse of 1 clk cycle long on start |
-- * wait for ready signal |
-- * result is avaiable on the r bus |
-- |
entity mont_multiplier is |
generic ( |
n : integer := 1536; -- width of the operands |
nr_stages : integer := 96; -- total number of stages |
stages_low : integer := 32 -- lower number of stages |
); |
port ( |
-- clock input |
core_clk : in std_logic; |
-- operand inputs |
xy : in std_logic_vector((n-1) downto 0); -- bus for x or y operand |
m : in std_logic_vector((n-1) downto 0); -- modulus |
-- result output |
r : out std_logic_vector((n-1) downto 0); -- result |
-- control signals |
start : in std_logic; |
reset : in std_logic; |
p_sel : in std_logic_vector(1 downto 0); |
load_x : in std_logic; |
ready : out std_logic |
); |
end mont_multiplier; |
|
architecture Structural of mont_multiplier is |
constant s : integer := n/nr_stages; -- stage width (# bits) |
|
signal reset_multiplier : std_logic; |
signal start_multiplier : std_logic; |
|
signal next_xi : std_logic; |
signal xi : std_logic; |
|
signal start_first_stage : std_logic; |
|
begin |
|
-- multiplier is reset every calculation or reset |
reset_multiplier <= reset or start; |
|
-- start is delayed 1 cycle |
delay_1_cycle : d_flip_flop |
port map( |
core_clk => core_clk, |
reset => reset, |
din => start, |
dout => start_multiplier |
); |
|
-- register to store the x value in |
-- outputs the operand in serial using a shift register |
x_selection : x_shift_reg |
generic map( |
n => n, |
t => nr_stages, |
tl => stages_low |
) |
port map( |
clk => core_clk, |
reset => reset, |
x_in => xy, |
load_x => load_x, |
next_x => next_xi, |
p_sel => p_sel, |
xi => xi |
); |
|
-- stepping control logic to keep track off the multiplication and when it is done |
stepping_control : stepping_logic |
generic map( |
n => n, -- max nr of steps required to complete a multiplication |
t => nr_stages -- total nr of steps in the pipeline |
) |
port map( |
core_clk => core_clk, |
start => start_multiplier, |
reset => reset_multiplier, |
t_sel => nr_stages, |
n_sel => n-1, |
start_first_stage => start_first_stage, |
stepping_done => ready |
); |
|
systolic_array : sys_pipeline |
generic map( |
n => n, |
t => nr_stages, |
tl => stages_low |
) |
port map( |
core_clk => core_clk, |
y => xy, |
m => m, |
xi => xi, |
next_x => next_xi, |
start => start_first_stage, |
reset => reset_multiplier, |
p_sel => p_sel, |
r => r |
); |
|
end Structural; |
|