OpenCores
URL https://opencores.org/ocsvn/mod_sim_exp/mod_sim_exp/trunk

Subversion Repositories mod_sim_exp

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /mod_sim_exp/trunk/rtl/vhdl/core
    from Rev 2 to Rev 3
    Reverse comparison

Rev 2 → Rev 3

/std_logic_textio.vhd File deleted \ No newline at end of file
/autorun_cntrl.vhd
1,83 → 1,99
------------------------------------------------------------------------------------
--
-- Geoffrey Ottoy - DraMCo research group
--
-- Module Name: autorun_cntrl.vhd / entity autorun_cntrl
--
-- Last Modified: 25/04/2012
--
-- Description: autorun control unit for a pipelined montgomery multiplier
--
--
-- Dependencies: none
--
-- Revision 2.00 - Major bug fix: bit_counter should count from 15 downto 0.
-- Revision 1.00 - Architecture created
-- Revision 0.01 - File Created
-- Additional Comments:
--
--
------------------------------------------------------------------------------------
--
-- NOTICE:
--
-- Copyright DraMCo research group. 2011. This code may be contain portions patented
-- by other third parties!
--
----------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;
----------------------------------------------------------------------
---- autorun_ctrl ----
---- ----
---- This file is part of the ----
---- Modular Simultaneous Exponentiation Core project ----
---- http://www.opencores.org/cores/mod_sim_exp/ ----
---- ----
---- Description ----
---- autorun control unit for a pipelined montgomery ----
---- multiplier ----
---- ----
---- Dependencies: none ----
---- ----
---- Authors: ----
---- - Geoffrey Ottoy, DraMCo research group ----
---- - Jonas De Craene, JonasDC@opencores.org ----
---- ----
----------------------------------------------------------------------
---- ----
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
---- ----
---- This source file may be used and distributed without ----
---- restriction provided that this copyright statement is not ----
---- removed from the file and that any derivative work contains ----
---- the original copyright notice and the associated disclaimer. ----
---- ----
---- This source file is free software; you can redistribute it ----
---- and/or modify it under the terms of the GNU Lesser General ----
---- Public License as published by the Free Software Foundation; ----
---- either version 2.1 of the License, or (at your option) any ----
---- later version. ----
---- ----
---- This source is distributed in the hope that it will be ----
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
---- PURPOSE. See the GNU Lesser General Public License for more ----
---- details. ----
---- ----
---- You should have received a copy of the GNU Lesser General ----
---- Public License along with this source; if not, download it ----
---- from http://www.opencores.org/lgpl.shtml ----
---- ----
----------------------------------------------------------------------
 
---- Uncomment the following library declaration if instantiating
---- any Xilinx primitives in this code.
--library UNISIM;
--use UNISIM.VComponents.all;
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;
 
 
entity autorun_cntrl is
Port ( clk : in STD_LOGIC;
reset : in STD_LOGIC;
start : in STD_LOGIC;
done : out STD_LOGIC;
op_sel : out STD_LOGIC_VECTOR (1 downto 0);
start_multiplier : out STD_LOGIC;
multiplier_done : in STD_LOGIC;
read_buffer : out STD_LOGIC;
buffer_din : in STD_LOGIC_VECTOR (31 downto 0);
buffer_empty : in STD_LOGIC);
port (
clk : in std_logic;
reset : in std_logic;
start : in std_logic;
done : out std_logic;
op_sel : out std_logic_vector (1 downto 0);
start_multiplier : out std_logic;
multiplier_done : in std_logic;
read_buffer : out std_logic;
buffer_din : in std_logic_vector (31 downto 0);
buffer_empty : in std_logic
);
end autorun_cntrl;
 
 
architecture Behavioral of autorun_cntrl is
 
signal bit_counter_i : integer range 0 to 15 := 0;
signal bit_counter_0_i : std_logic;
signal bit_counter_15_i : std_logic;
signal next_bit_i : std_logic := '0';
signal next_bit_del_i : std_logic;
signal start_cycle_i : std_logic := '0';
signal start_cycle_del_i : std_logic;
signal bit_counter_i : integer range 0 to 15 := 0;
signal bit_counter_0_i : std_logic;
signal bit_counter_15_i : std_logic;
signal next_bit_i : std_logic := '0';
signal next_bit_del_i : std_logic;
signal start_cycle_i : std_logic := '0';
signal start_cycle_del_i : std_logic;
signal done_i : std_logic;
signal start_i : std_logic;
signal running_i : std_logic;
signal start_multiplier_i : std_logic;
signal start_multiplier_del_i : std_logic;
signal mult_done_del_i : std_logic;
signal e0_i : std_logic_vector(15 downto 0);
signal e1_i : std_logic_vector(15 downto 0);
signal e0_bit_i : std_logic;
signal e1_bit_i : std_logic;
signal e_bits_i : std_logic_vector(1 downto 0);
signal e_bits_0_i : std_logic;
signal cycle_counter_i : std_logic;
signal op_sel_sel_i : std_logic;
signal op_sel_i : std_logic_vector(1 downto 0);
begin
 
signal done_i : std_logic;
signal start_i : std_logic;
signal running_i : std_logic;
signal start_multiplier_i : std_logic;
signal start_multiplier_del_i : std_logic;
signal mult_done_del_i : std_logic;
signal e0_i : std_logic_vector(15 downto 0);
signal e1_i : std_logic_vector(15 downto 0);
signal e0_bit_i : std_logic;
signal e1_bit_i : std_logic;
signal e_bits_i : std_logic_vector(1 downto 0);
signal e_bits_0_i : std_logic;
signal cycle_counter_i : std_logic;
signal op_sel_sel_i : std_logic;
signal op_sel_i : std_logic_vector(1 downto 0);
begin
--done <= (multiplier_done and (not running_i)) or (start and buffer_empty);
done <= done_i;
-- the two exponents
/cell_1b_adder.vhd
1,54 → 1,69
------------------------------------------------------------------------------------
--
-- Geoffrey Ottoy - DraMCo research group
--
-- Module Name: cell_1b_adder.vhd / entity cell_1b_adder
--
-- Last Modified: 18/11/2011
--
-- Description: full adder for use in the montgommery multiplier systolic array
-- currently a behavioral description
--
--
-- Dependencies: none
--
-- Revision:
-- Revision 2.00 - Major error resolved (carry & sum output were switched)
-- Revision 1.00 - Architecture
-- Revision 0.01 - File Created
--
--
------------------------------------------------------------------------------------
--
-- NOTICE:
--
-- Copyright DraMCo research group. 2011. This code may be contain portions patented
-- by other third parties!
--
------------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;
----------------------------------------------------------------------
---- cell_1b_adder ----
---- ----
---- This file is part of the ----
---- Modular Simultaneous Exponentiation Core project ----
---- http://www.opencores.org/cores/mod_sim_exp/ ----
---- ----
---- Description ----
---- This file contains the implementation of a 1-bit full ----
---- adder cell using logic gates ----
---- used in adder_block ----
---- ----
---- Dependencies: none ----
---- ----
---- Authors: ----
---- - Geoffrey Ottoy, DraMCo research group ----
---- - Jonas De Craene, JonasDC@opencores.org ----
---- ----
----------------------------------------------------------------------
---- ----
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
---- ----
---- This source file may be used and distributed without ----
---- restriction provided that this copyright statement is not ----
---- removed from the file and that any derivative work contains ----
---- the original copyright notice and the associated disclaimer. ----
---- ----
---- This source file is free software; you can redistribute it ----
---- and/or modify it under the terms of the GNU Lesser General ----
---- Public License as published by the Free Software Foundation; ----
---- either version 2.1 of the License, or (at your option) any ----
---- later version. ----
---- ----
---- This source is distributed in the hope that it will be ----
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
---- PURPOSE. See the GNU Lesser General Public License for more ----
---- details. ----
---- ----
---- You should have received a copy of the GNU Lesser General ----
---- Public License along with this source; if not, download it ----
---- from http://www.opencores.org/lgpl.shtml ----
---- ----
----------------------------------------------------------------------
 
---- Uncomment the following library declaration if instantiating
---- any Xilinx primitives in this code.
--library UNISIM;
--use UNISIM.VComponents.all;
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;
 
 
entity cell_1b_adder is
Port ( a : in STD_LOGIC;
mux_result : in STD_LOGIC;
cin : in STD_LOGIC;
cout : out STD_LOGIC;
r : out STD_LOGIC);
port (
a : in std_logic;
mux_result : in std_logic;
cin : in std_logic;
cout : out std_logic;
r : out std_logic
);
end cell_1b_adder;
 
 
architecture Behavioral of cell_1b_adder is
signal a_xor_mux_result: std_logic;
signal a_xor_mux_result : std_logic;
begin
a_xor_mux_result <= a xor mux_result;
r <= a_xor_mux_result xor cin;
cout <= (a and mux_result) or (cin and a_xor_mux_result);
a_xor_mux_result <= a xor mux_result;
r <= a_xor_mux_result xor cin;
cout <= (a and mux_result) or (cin and a_xor_mux_result);
end Behavioral;
 
/x_shift_reg.vhd
1,60 → 1,76
------------------------------------------------------------------------------------
--
-- Geoffrey Ottoy - DraMCo research group
--
-- Module Name: x_shift_reg.vhd / entity x_shift_reg
--
-- Last Modified: 18/06/2012
--
-- Description: n-bit shift register with lsb output
--
--
-- Dependencies: none
--
-- Revision:
-- Revision 1.00 - Architecture
-- Revision 0.01 - File Created
--
--
------------------------------------------------------------------------------------
--
-- NOTICE:
--
-- Copyright DraMCo research group. 2011. This code may be contain portions patented
-- by other third parties!
--
------------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;
----------------------------------------------------------------------
---- x_shift_reg ----
---- ----
---- This file is part of the ----
---- Modular Simultaneous Exponentiation Core project ----
---- http://www.opencores.org/cores/mod_sim_exp/ ----
---- ----
---- Description ----
---- 1536 bit shift register with lsb output ----
---- ----
---- Dependencies: none ----
---- ----
---- Authors: ----
---- - Geoffrey Ottoy, DraMCo research group ----
---- - Jonas De Craene, JonasDC@opencores.org ----
---- ----
----------------------------------------------------------------------
---- ----
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
---- ----
---- This source file may be used and distributed without ----
---- restriction provided that this copyright statement is not ----
---- removed from the file and that any derivative work contains ----
---- the original copyright notice and the associated disclaimer. ----
---- ----
---- This source file is free software; you can redistribute it ----
---- and/or modify it under the terms of the GNU Lesser General ----
---- Public License as published by the Free Software Foundation; ----
---- either version 2.1 of the License, or (at your option) any ----
---- later version. ----
---- ----
---- This source is distributed in the hope that it will be ----
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
---- PURPOSE. See the GNU Lesser General Public License for more ----
---- details. ----
---- ----
---- You should have received a copy of the GNU Lesser General ----
---- Public License along with this source; if not, download it ----
---- from http://www.opencores.org/lgpl.shtml ----
---- ----
----------------------------------------------------------------------
 
---- Uncomment the following library declaration if instantiating
---- any Xilinx primitives in this code.
--library UNISIM;
--use UNISIM.VComponents.all;
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;
 
 
entity x_shift_reg is
generic( n : integer := 1536;
t : integer := 48;
tl : integer := 16
);
port( clk : in STD_LOGIC;
reset : in STD_LOGIC;
x_in : in STD_LOGIC_VECTOR((n-1) downto 0);
load_x : in STD_LOGIC;
next_x : in STD_LOGIC;
p_sel : in STD_LOGIC_VECTOR(1 downto 0);
x_i : out STD_LOGIC
);
generic(
n : integer := 1536;
t : integer := 48;
tl : integer := 16
);
port(
clk : in std_logic;
reset : in std_logic;
x_in : in std_logic_vector((n-1) downto 0);
load_x : in std_logic;
next_x : in std_logic;
p_sel : in std_logic_vector(1 downto 0);
x_i : out std_logic
);
end x_shift_reg;
 
 
architecture Behavioral of x_shift_reg is
signal x_reg_i : std_logic_vector((n-1) downto 0); -- register
constant s : integer := n/t; -- nr of stages
constant offset : integer := s*tl; -- calculate startbit pos of higher part of pipeline
signal x_reg_i : std_logic_vector((n-1) downto 0); -- register
constant s : integer := n/t; -- nr of stages
constant offset : integer := s*tl; -- calculate startbit pos of higher part of pipeline
begin
 
REG_PROC: process(reset, clk)
begin
if reset = '1' then -- Reset, clear the register
71,8 → 87,7
end process;
 
with p_sel select -- pipeline select
x_i <= x_reg_i(offset) when "10", -- use bit at offset for high part of pipeline
x_reg_i(0) when others; -- use LS bit for lower part of pipeline
x_i <= x_reg_i(offset) when "10", -- use bit at offset for high part of pipeline
x_reg_i(0) when others; -- use LS bit for lower part of pipeline
 
end Behavioral;
 
/register_n.vhd
1,53 → 1,71
------------------------------------------------------------------------------------
--
-- Geoffrey Ottoy - DraMCo research group
--
-- Module Name: register_n.vhd / entity register_n
--
-- Last Modified: 24/11/2011
--
-- Description: n bit register
--
--
-- Dependencies: FDCE
--
-- Revision:
-- Revision 3.00 - Replaced LDCE primitive with FDCE primitive
-- Revision 2.00 - Replaced behavioral architecture with structural using FPGA
-- primitives.
-- Revision 1.00 - Architecture
-- Revision 0.01 - File Created
--
--
------------------------------------------------------------------------------------
--
-- NOTICE:
--
-- Copyright DraMCo research group. 2011. This code may be contain portions patented
-- by other third parties!
--
------------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;
----------------------------------------------------------------------
---- register_n ----
---- ----
---- This file is part of the ----
---- Modular Simultaneous Exponentiation Core project ----
---- http://www.opencores.org/cores/mod_sim_exp/ ----
---- ----
---- Description ----
---- n bit register ----
---- used in montgommery multiplier systolic array stages ----
---- ----
---- Dependencies: none ----
---- ----
---- Authors: ----
---- - Geoffrey Ottoy, DraMCo research group ----
---- - Jonas De Craene, JonasDC@opencores.org ----
---- ----
----------------------------------------------------------------------
---- ----
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
---- ----
---- This source file may be used and distributed without ----
---- restriction provided that this copyright statement is not ----
---- removed from the file and that any derivative work contains ----
---- the original copyright notice and the associated disclaimer. ----
---- ----
---- This source file is free software; you can redistribute it ----
---- and/or modify it under the terms of the GNU Lesser General ----
---- Public License as published by the Free Software Foundation; ----
---- either version 2.1 of the License, or (at your option) any ----
---- later version. ----
---- ----
---- This source is distributed in the hope that it will be ----
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
---- PURPOSE. See the GNU Lesser General Public License for more ----
---- details. ----
---- ----
---- You should have received a copy of the GNU Lesser General ----
---- Public License along with this source; if not, download it ----
---- from http://www.opencores.org/lgpl.shtml ----
---- ----
----------------------------------------------------------------------
 
---- Uncomment the following library declaration if instantiating
---- any Xilinx primitives in this code.
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;
 
-- Xilinx primitives used
library UNISIM;
use UNISIM.VComponents.all;
 
 
entity register_n is
generic( n : integer := 4
);
port(core_clk : in STD_LOGIC;
ce : in STD_LOGIC;
reset : in STD_LOGIC;
din : in STD_LOGIC_VECTOR((n-1) downto 0);
dout : out STD_LOGIC_VECTOR((n-1) downto 0)
);
generic(
n : integer := 4
);
port(
core_clk : in std_logic;
ce : in std_logic;
reset : in std_logic;
din : in std_logic_vector((n-1) downto 0);
dout : out std_logic_vector((n-1) downto 0)
);
end register_n;
 
 
architecture Structural of register_n is
signal dout_i : std_logic_vector((n-1) downto 0) := (others => '0');
begin
54,18 → 72,18
dout <= dout_i;
N_REGS: for i in 0 to n-1 generate
FDCE_inst : FDCE
generic map (
INIT => '0') -- Initial value of latch ('0' or '1')
port map (
Q => dout_i(i), -- Data output
CLR => reset, -- Asynchronous clear/reset input
D => din(i), -- Data input
C => core_clk, -- Gate input
CE => ce -- Gate enable input
);
end generate;
N_REGS : for i in 0 to n-1 generate
FDCE_inst : FDCE
generic map (
INIT => '0' -- Initial value of latch ('0' or '1')
)
port map (
Q => dout_i(i), -- Data output
CLR => reset, -- Asynchronous clear/reset input
D => din(i), -- Data input
C => core_clk, -- Gate input
CE => ce -- Gate enable input
);
end generate;
end Structural;
end Structural;
/cell_1b.vhd
1,88 → 1,94
------------------------------------------------------------------------------------
--
-- Geoffrey Ottoy - DraMCo research group
--
-- Module Name: cell_1b.vhd / entity cell_1b
--
-- Last Modified: 14/11/2011
--
-- Description: cell for use in the montgommery multiplier systolic array
--
--
-- Dependencies: cell_1b_adder
-- cell_1b_mux
--
-- Revision:
-- Revision 1.00 - Architecture
-- Revision 0.01 - File Created
--
--
------------------------------------------------------------------------------------
--
-- NOTICE:
--
-- Copyright DraMCo research group. 2011. This code may be contain portions patented
-- by other third parties!
--
------------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;
----------------------------------------------------------------------
---- cel_1b ----
---- ----
---- This file is part of the ----
---- Modular Simultaneous Exponentiation Core project ----
---- http://www.opencores.org/cores/mod_sim_exp/ ----
---- ----
---- Description ----
---- 1 bit cell for use in the montgommery multiplier systolic ----
---- array ----
---- ----
---- Dependencies: ----
---- - cell_1bit_adder ----
---- - cell_1bit_mux ----
---- ----
---- Authors: ----
---- - Geoffrey Ottoy, DraMCo research group ----
---- - Jonas De Craene, JonasDC@opencores.org ----
---- ----
----------------------------------------------------------------------
---- ----
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
---- ----
---- This source file may be used and distributed without ----
---- restriction provided that this copyright statement is not ----
---- removed from the file and that any derivative work contains ----
---- the original copyright notice and the associated disclaimer. ----
---- ----
---- This source file is free software; you can redistribute it ----
---- and/or modify it under the terms of the GNU Lesser General ----
---- Public License as published by the Free Software Foundation; ----
---- either version 2.1 of the License, or (at your option) any ----
---- later version. ----
---- ----
---- This source is distributed in the hope that it will be ----
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
---- PURPOSE. See the GNU Lesser General Public License for more ----
---- details. ----
---- ----
---- You should have received a copy of the GNU Lesser General ----
---- Public License along with this source; if not, download it ----
---- from http://www.opencores.org/lgpl.shtml ----
---- ----
----------------------------------------------------------------------
 
---- Uncomment the following library declaration if instantiating
---- any Xilinx primitives in this code.
--library UNISIM;
--use UNISIM.VComponents.all;
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;
 
library mod_sim_exp;
use mod_sim_exp.mod_sim_exp_pkg.all;
 
 
entity cell_1b is
Port ( my : in STD_LOGIC;
y : in STD_LOGIC;
m : in STD_LOGIC;
x : in STD_LOGIC;
q : in STD_LOGIC;
a : in STD_LOGIC;
cin : in STD_LOGIC;
cout : out STD_LOGIC;
r : out STD_LOGIC);
port (
my : in std_logic;
y : in std_logic;
m : in std_logic;
x : in std_logic;
q : in std_logic;
a : in std_logic;
cin : in std_logic;
cout : out std_logic;
r : out std_logic
);
end cell_1b;
 
 
architecture Structural of cell_1b is
component cell_1b_mux
Port ( my : in STD_LOGIC;
y : in STD_LOGIC;
m : in STD_LOGIC;
x : in STD_LOGIC;
q : in STD_LOGIC;
result : out STD_LOGIC);
end component;
component cell_1b_adder
Port ( a : in STD_LOGIC;
mux_result : in STD_LOGIC;
cin : in STD_LOGIC;
cout : out STD_LOGIC;
r : out STD_LOGIC);
end component;
 
signal mux2adder : std_logic;
signal mux2adder : std_logic;
begin
cell_mux: cell_1b_mux
port map( my => my,
y => y,
m => m,
x => x,
q => q,
result => mux2adder
);
 
cell_adder: cell_1b_adder
port map(a => a,
mux_result => mux2adder,
cin => cin,
cout => cout,
r => r
);
cell_mux : cell_1b_mux
port map(
my => my,
y => y,
m => m,
x => x,
q => q,
result => mux2adder
);
 
end Structural;
cell_adder : cell_1b_adder
port map(
a => a,
mux_result => mux2adder,
cin => cin,
cout => cout,
r => r
);
 
end Structural;
/mont_ctrl.vhd
1,110 → 1,112
------------------------------------------------------------------------------------
--
-- Geoffrey Ottoy - DraMCo research group
--
-- Module Name: mont_ctrl.vhd / entity mont_ctrl
--
-- Last Modified: 25/04/2012
--
-- Description: control unit for a pipelined montgomery multiplier, with split
-- pipeline operation and "auto-run" support
--
--
-- Dependencies: autorun_cntrl
--
-- Revision:
-- Revision 2.00 - Added autorun_control_logic
-- Revision 1.00 - Architecture with support for single multiplication
-- Revision 0.01 - File Created
--
--
------------------------------------------------------------------------------------
--
-- NOTICE:
--
-- Copyright DraMCo research group. 2011. This code may be contain portions patented
-- by other third parties!
--
------------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;
----------------------------------------------------------------------
---- mont_ctrl ----
---- ----
---- This file is part of the ----
---- Modular Simultaneous Exponentiation Core project ----
---- http://www.opencores.org/cores/mod_sim_exp/ ----
---- ----
---- Description ----
---- control unit for a pipelined montgomery multiplier, with ----
---- split pipeline operation and "auto-run" support ----
---- ----
---- Dependencies: ----
---- - autorun_cntrl ----
---- ----
---- Authors: ----
---- - Geoffrey Ottoy, DraMCo research group ----
---- - Jonas De Craene, JonasDC@opencores.org ----
---- ----
----------------------------------------------------------------------
---- ----
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
---- ----
---- This source file may be used and distributed without ----
---- restriction provided that this copyright statement is not ----
---- removed from the file and that any derivative work contains ----
---- the original copyright notice and the associated disclaimer. ----
---- ----
---- This source file is free software; you can redistribute it ----
---- and/or modify it under the terms of the GNU Lesser General ----
---- Public License as published by the Free Software Foundation; ----
---- either version 2.1 of the License, or (at your option) any ----
---- later version. ----
---- ----
---- This source is distributed in the hope that it will be ----
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
---- PURPOSE. See the GNU Lesser General Public License for more ----
---- details. ----
---- ----
---- You should have received a copy of the GNU Lesser General ----
---- Public License along with this source; if not, download it ----
---- from http://www.opencores.org/lgpl.shtml ----
---- ----
----------------------------------------------------------------------
 
---- Uncomment the following library declaration if instantiating
---- any Xilinx primitives in this code.
--library UNISIM;
--use UNISIM.VComponents.all;
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;
 
library mod_sim_exp;
use mod_sim_exp.mod_sim_exp_pkg.all;
 
 
entity mont_ctrl is
port ( clk : in std_logic; --v
reset : in std_logic; --v
-- bus side
start : in std_logic; --v
--p_sel : in std_logic_vector(1 downto 0);
x_sel_single : in std_logic_vector(1 downto 0); --v
y_sel_single : in std_logic_vector(1 downto 0); --v
run_auto : in std_logic;
op_buffer_empty : in std_logic;
op_sel_buffer : in std_logic_vector(31 downto 0);
read_buffer : out std_logic;
buffer_noread : in std_logic;
done : out std_logic;
calc_time : out std_logic; -- v
-- multiplier side
op_sel : out std_logic_vector(1 downto 0); --v
load_x : out std_logic; -- v
load_result : out std_logic; --v
start_multiplier : out std_logic; -- v
multiplier_ready : in std_logic
port (
clk : in std_logic;
reset : in std_logic;
-- bus side
start : in std_logic;
x_sel_single : in std_logic_vector(1 downto 0);
y_sel_single : in std_logic_vector(1 downto 0);
run_auto : in std_logic;
op_buffer_empty : in std_logic;
op_sel_buffer : in std_logic_vector(31 downto 0);
read_buffer : out std_logic;
buffer_noread : in std_logic;
done : out std_logic;
calc_time : out std_logic;
-- multiplier side
op_sel : out std_logic_vector(1 downto 0);
load_x : out std_logic;
load_result : out std_logic;
start_multiplier : out std_logic;
multiplier_ready : in std_logic
);
end mont_ctrl;
 
 
architecture Behavioral of mont_ctrl is
signal start_delayed_i : std_logic; -- delayed version of start input
signal start_pulse_i : std_logic;
signal auto_start_pulse_i : std_logic;
signal start_multiplier_i : std_logic;
signal start_up_counter_i : std_logic_vector(2 downto 0):= "100"; -- used in op_sel at multiplier start
signal auto_start_i : std_logic := '0';
signal store_autorun_i : std_logic;
signal run_auto_i : std_logic;
signal run_auto_stored_i : std_logic := '0';
signal single_start_pulse_i : std_logic;
signal calc_time_i : std_logic; -- high ('1') during multiplication
signal x_sel_i : std_logic_vector(1 downto 0); -- the operand used as x input
signal y_sel_i : std_logic_vector(1 downto 0); -- the operand used as y input
signal x_sel_buffer_i : std_logic_vector(1 downto 0); -- x operand as specified by fifo buffer (autorun)
signal start_delayed_i : std_logic; -- delayed version of start input
signal start_pulse_i : std_logic;
signal auto_start_pulse_i : std_logic;
signal start_multiplier_i : std_logic;
signal start_up_counter_i : std_logic_vector(2 downto 0) := "100"; -- used in op_sel at multiplier start
signal auto_start_i : std_logic := '0';
signal store_autorun_i : std_logic;
signal run_auto_i : std_logic;
signal run_auto_stored_i : std_logic := '0';
signal single_start_pulse_i : std_logic;
 
signal auto_done_i : std_logic;
signal start_auto_i : std_logic;
signal new_buf_part_i : std_logic;
signal new_buf_word_i : std_logic;
signal buf_part_i : std_logic_vector(3 downto 0);
signal pop_i : std_logic;
signal start_autorun_cycle_i : std_logic;
signal start_autorun_cycle_1_i : std_logic;
signal autorun_counter_i : std_logic_vector(1 downto 0);
signal part_counter_i : std_logic_vector(2 downto 0);
signal auto_multiplier_done_i : std_logic;
signal calc_time_i : std_logic; -- high ('1') during multiplication
 
signal x_sel_i : std_logic_vector(1 downto 0); -- the operand used as x input
signal y_sel_i : std_logic_vector(1 downto 0); -- the operand used as y input
signal x_sel_buffer_i : std_logic_vector(1 downto 0); -- x operand as specified by fifo buffer (autorun)
 
signal auto_done_i : std_logic;
signal start_auto_i : std_logic;
signal new_buf_part_i : std_logic;
signal new_buf_word_i : std_logic;
signal buf_part_i : std_logic_vector(3 downto 0);
signal pop_i : std_logic;
signal start_autorun_cycle_i : std_logic;
signal start_autorun_cycle_1_i : std_logic;
signal autorun_counter_i : std_logic_vector(1 downto 0);
signal part_counter_i : std_logic_vector(2 downto 0);
signal auto_multiplier_done_i : std_logic;
COMPONENT autorun_cntrl
PORT(
clk : IN std_logic;
reset : IN std_logic;
start : IN std_logic;
multiplier_done : IN std_logic;
buffer_din : IN std_logic_vector(31 downto 0);
buffer_empty : IN std_logic;
done : OUT std_logic;
op_sel : OUT std_logic_vector(1 downto 0);
start_multiplier : OUT std_logic;
read_buffer : OUT std_logic
);
END COMPONENT;
begin
 
-----------------------------------------------------------------------------------
117,11 → 119,8
start_delayed_i <= start;
end if;
end process START_PULSE_PROC;
--start_pulse_i <= store_autorun_i and (not run_auto_i);
start_pulse_i <= start and (not start_delayed_i);
single_start_pulse_i <= start_pulse_i and (not run_auto_i);
--store_autorun_i <= (start and (not start_delayed_i));
--start_auto_i <= store_autorun_i and run_auto_i;
start_auto_i <= start_pulse_i and run_auto_i;
 
-- to start the multiplier we first need to select the y_operand and
203,22 → 202,21
-- end if;
-- end process STORE_AUTORUN_PROC;
run_auto_i <= run_auto;
--run_auto_i <= run_auto or run_auto_stored_i;
-- multiplier_ready is only passed to autorun control when in autorun mode
auto_multiplier_done_i <= (multiplier_ready and run_auto_i);
autorun_control_logic: autorun_cntrl PORT MAP(
clk => clk,
reset => reset,
start => start_auto_i,
done => auto_done_i,
op_sel => x_sel_buffer_i,
start_multiplier => auto_start_pulse_i,
multiplier_done => auto_multiplier_done_i,
read_buffer => read_buffer,
buffer_din => op_sel_buffer,
buffer_empty => op_buffer_empty
);
autorun_control_logic : autorun_cntrl port map(
clk => clk,
reset => reset,
start => start_auto_i,
done => auto_done_i,
op_sel => x_sel_buffer_i,
start_multiplier => auto_start_pulse_i,
multiplier_done => auto_multiplier_done_i,
read_buffer => read_buffer,
buffer_din => op_sel_buffer,
buffer_empty => op_buffer_empty
);
 
end Behavioral;
 
/mont_mult_sys_pipeline.vhd
1,278 → 1,251
------------------------------------------------------------------------------------
--
-- Geoffrey Ottoy - DraMCo research group
--
-- Module Name: mont_mult_sys_pipeline.vhd / entity mont_mult_sys_pipeline
--
-- Last Modified: 18/06/2012
--
-- Description: n-bit montgomery multiplier with a pipelined systolic array
--
--
-- Dependencies: systolic_pipeline
-- adder_n
-- cell_1b_adder
-- x_shift_register
--
-- Revision:
-- Revision 3.00 - shift register for x selection in stead of decoding logic
-- Revision 2.01 - Bug fix of the bug fix
-- Revision 2.00 - Major bug fix in reduction logic (carry in upper part)
-- Revision 1.00 - Architecture
-- Revision 0.01 - File Created
--
--
------------------------------------------------------------------------------------
--
-- NOTICE:
--
-- Copyright DraMCo research group. 2011. This code may be contain portions patented
-- by other third parties!
--
------------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;
----------------------------------------------------------------------
---- mont_mult_sys_pipline ----
---- ----
---- This file is part of the ----
---- Modular Simultaneous Exponentiation Core project ----
---- http://www.opencores.org/cores/mod_sim_exp/ ----
---- ----
---- Description ----
---- n-bit montgomery multiplier with a pipelined systolic ----
---- array ----
---- ----
---- Dependencies: ----
---- - x_shift_reg ----
---- - adder_n ----
---- - d_flip_flop ----
---- - systolic_pipeline ----
---- - cell_1b_adder ----
---- ----
---- Authors: ----
---- - Geoffrey Ottoy, DraMCo research group ----
---- - Jonas De Craene, JonasDC@opencores.org ----
---- ----
----------------------------------------------------------------------
---- ----
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
---- ----
---- This source file may be used and distributed without ----
---- restriction provided that this copyright statement is not ----
---- removed from the file and that any derivative work contains ----
---- the original copyright notice and the associated disclaimer. ----
---- ----
---- This source file is free software; you can redistribute it ----
---- and/or modify it under the terms of the GNU Lesser General ----
---- Public License as published by the Free Software Foundation; ----
---- either version 2.1 of the License, or (at your option) any ----
---- later version. ----
---- ----
---- This source is distributed in the hope that it will be ----
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
---- PURPOSE. See the GNU Lesser General Public License for more ----
---- details. ----
---- ----
---- You should have received a copy of the GNU Lesser General ----
---- Public License along with this source; if not, download it ----
---- from http://www.opencores.org/lgpl.shtml ----
---- ----
----------------------------------------------------------------------
 
---- Uncomment the following library declaration if instantiating
---- any Xilinx primitives in this code.
--library UNISIM;
--use UNISIM.VComponents.all;
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;
 
library mod_sim_exp;
use mod_sim_exp.mod_sim_exp_pkg.all;
 
 
entity mont_mult_sys_pipeline is
generic ( n : integer := 1536;
nr_stages : integer := 96; --(divides n, bits_low & (n-bits_low))
stages_low : integer := 32
);
Port ( core_clk : in STD_LOGIC;
xy : in STD_LOGIC_VECTOR((n-1) downto 0);
m : in STD_LOGIC_VECTOR((n-1) downto 0);
r : out STD_LOGIC_VECTOR((n-1) downto 0);
start : in STD_LOGIC;
reset : in STD_LOGIC;
p_sel : in STD_LOGIC_VECTOR(1 downto 0);
load_x : in std_logic;
ready : out STD_LOGIC
);
generic (
n : integer := 1536;
nr_stages : integer := 96; --(divides n, bits_low & (n-bits_low))
stages_low : integer := 32
);
port (
core_clk : in std_logic;
xy : in std_logic_vector((n-1) downto 0);
m : in std_logic_vector((n-1) downto 0);
r : out std_logic_vector((n-1) downto 0);
start : in std_logic;
reset : in std_logic;
p_sel : in std_logic_vector(1 downto 0);
load_x : in std_logic;
ready : out std_logic
);
end mont_mult_sys_pipeline;
 
 
architecture Structural of mont_mult_sys_pipeline is
component adder_n
generic ( width : integer := 16;
block_width : integer := 4
);
Port ( core_clk : in STD_LOGIC;
a : in STD_LOGIC_VECTOR((width-1) downto 0);
b : in STD_LOGIC_VECTOR((width-1) downto 0);
cin : in STD_LOGIC;
cout : out STD_LOGIC;
s : out STD_LOGIC_VECTOR((width-1) downto 0)
);
end component;
component systolic_pipeline
generic( n : integer := 1536; -- width of the operands (# bits)
t : integer := 96; -- number of stages (divider of n) >= 2
tl: integer := 32
);
port(core_clk : in STD_LOGIC;
my : in STD_LOGIC_VECTOR((n) downto 0);
y : in STD_LOGIC_VECTOR((n-1) downto 0);
m : in STD_LOGIC_VECTOR((n-1) downto 0);
xi : in STD_LOGIC;
start : in STD_LOGIC;
reset : in STD_LOGIC;
p_sel : in STD_LOGIC_VECTOR(1 downto 0);
ready : out STD_LOGIC;
next_x : out STD_LOGIC;
r : out STD_LOGIC_VECTOR((n+1) downto 0)
);
end component;
component x_shift_reg
generic( n : integer := 32;
t : integer := 8;
tl : integer := 3
);
port( clk : in STD_LOGIC;
reset : in STD_LOGIC;
x_in : in STD_LOGIC_VECTOR((n-1) downto 0);
load_x : in STD_LOGIC;
next_x : in STD_LOGIC;
p_sel : in STD_LOGIC_VECTOR(1 downto 0);
x_i : out STD_LOGIC
);
end component;
component cell_1b_adder
Port ( a : in STD_LOGIC;
mux_result : in STD_LOGIC;
cin : in STD_LOGIC;
cout : out STD_LOGIC;
r : out STD_LOGIC);
end component;
constant stage_width : integer := n/nr_stages;
constant bits_l : integer := stage_width * stages_low;
constant bits_h : integer := n - bits_l;
 
component d_flip_flop
port(core_clk : in STD_LOGIC;
reset : in STD_LOGIC;
din : in STD_LOGIC;
dout : out STD_LOGIC
);
end component;
signal my : std_logic_vector(n downto 0);
signal my_h_cin : std_logic;
signal my_l_cout : std_logic;
signal r_pipeline : std_logic_vector(n+1 downto 0);
signal r_red : std_logic_vector(n-1 downto 0);
signal r_i : std_logic_vector(n-1 downto 0);
signal c_red_l : std_logic_vector(2 downto 0);
signal c_red_h : std_logic_vector(2 downto 0);
signal cin_red_h : std_logic;
signal r_sel : std_logic;
signal reset_multiplier : std_logic;
signal start_multiplier : std_logic;
signal m_inv : std_logic_vector(n-1 downto 0);
 
constant stage_width : integer := n/nr_stages;
constant bits_l : integer := stage_width * stages_low;
constant bits_h : integer := n - bits_l;
signal my : std_logic_vector(n downto 0);
signal my_h_cin : std_logic;
signal my_l_cout : std_logic;
signal r_pipeline : std_logic_vector(n+1 downto 0);
signal r_red : std_logic_vector(n-1 downto 0);
signal r_i : std_logic_vector(n-1 downto 0);
signal c_red_l : std_logic_vector(2 downto 0);
signal c_red_h : std_logic_vector(2 downto 0);
signal cin_red_h : std_logic;
signal r_sel : std_logic;
signal reset_multiplier : std_logic;
signal start_multiplier : std_logic;
signal m_inv : std_logic_vector(n-1 downto 0);
signal next_x_i : std_logic;
signal x_i : std_logic;
signal next_x_i : std_logic;
signal x_i : std_logic;
begin
-- x selection
x_selection: x_shift_reg
generic map( n => n,
t => nr_stages,
tl => stages_low
)
port map(clk => core_clk,
reset => reset,
x_in => xy,
load_x => load_x,
next_x => next_x_i,
p_sel => p_sel,
x_i => x_i
);
 
-- precomputation of my (m+y)
my_adder_l: adder_n
generic map( width => bits_l,
block_width => stage_width
)
port map( core_clk => core_clk,
a => m((bits_l-1) downto 0),
b => xy((bits_l-1) downto 0),
cin => '0',
cout => my_l_cout,
s => my((bits_l-1) downto 0)
);
-- x selection
x_selection : x_shift_reg
generic map(
n => n,
t => nr_stages,
tl => stages_low
)
port map(
clk => core_clk,
reset => reset,
x_in => xy,
load_x => load_x,
next_x => next_x_i,
p_sel => p_sel,
x_i => x_i
);
 
-- precomputation of my (m+y)
my_adder_l : adder_n
generic map(
width => bits_l,
block_width => stage_width
)
port map(
core_clk => core_clk,
a => m((bits_l-1) downto 0),
b => xy((bits_l-1) downto 0),
cin => '0',
cout => my_l_cout,
s => my((bits_l-1) downto 0)
);
my_adder_h: adder_n
generic map( width => bits_h,
block_width => stage_width
)
port map( core_clk => core_clk,
a => m((n-1) downto bits_l),
b => xy((n-1) downto bits_l),
cin => my_h_cin,
cout => my(n),
s => my((n-1) downto bits_l)
);
my_adder_h : adder_n
generic map(
width => bits_h,
block_width => stage_width
)
port map(
core_clk => core_clk,
a => m((n-1) downto bits_l),
b => xy((n-1) downto bits_l),
cin => my_h_cin,
cout => my(n),
s => my((n-1) downto bits_l)
);
 
my_h_cin <= '0' when (p_sel(1) and (not p_sel(0)))='1' else my_l_cout;
-- multiplication
reset_multiplier <= reset or start;
 
delay_1_cycle: d_flip_flop
port map(core_clk => core_clk,
reset => reset,
din => start,
dout => start_multiplier
);
delay_1_cycle : d_flip_flop
port map(
core_clk => core_clk,
reset => reset,
din => start,
dout => start_multiplier
);
 
the_multiplier: systolic_pipeline
generic map( n => n, -- width of the operands (# bits)
t => nr_stages, -- number of stages (divider of n) >= 2
tl => stages_low
)
port map(core_clk => core_clk,
my => my,
y => xy,
m => m,
xi => x_i,
start => start_multiplier,
reset => reset_multiplier,
p_sel => p_sel,
ready => ready, -- misschien net iets te vroeg?
next_x => next_x_i,
r => r_pipeline
);
the_multiplier : systolic_pipeline
generic map(
n => n, -- width of the operands (# bits)
t => nr_stages, -- number of stages (divider of n) >= 2
tl => stages_low
)
port map(
core_clk => core_clk,
my => my,
y => xy,
m => m,
xi => x_i,
start => start_multiplier,
reset => reset_multiplier,
p_sel => p_sel,
ready => ready, -- misschien net iets te vroeg?
next_x => next_x_i,
r => r_pipeline
);
-- post-computation (reduction)
m_inv <= not(m);
reduction_adder_l: adder_n
generic map( width => bits_l,
block_width => stage_width
)
port map( core_clk => core_clk,
a => m_inv((bits_l-1) downto 0),
b => r_pipeline((bits_l-1) downto 0),
cin => '1',
cout => c_red_l(0),
s => r_red((bits_l-1) downto 0)
);
reduction_adder_l : adder_n
generic map(
width => bits_l,
block_width => stage_width
)
port map(
core_clk => core_clk,
a => m_inv((bits_l-1) downto 0),
b => r_pipeline((bits_l-1) downto 0),
cin => '1',
cout => c_red_l(0),
s => r_red((bits_l-1) downto 0)
);
 
reduction_adder_l_a : cell_1b_adder
port map(
a => '1',
mux_result => r_pipeline(bits_l),
cin => c_red_l(0),
cout => c_red_l(1)
--r =>
);
 
reduction_adder_l_b : cell_1b_adder
port map(
a => '1',
mux_result => r_pipeline(bits_l+1),
cin => c_red_l(1),
cout => c_red_l(2)
-- r =>
);
reduction_adder_l_a: cell_1b_adder
port map(a => '1',
mux_result => r_pipeline(bits_l),
cin => c_red_l(0),
cout => c_red_l(1)
--r =>
);
reduction_adder_l_b: cell_1b_adder
port map(a => '1',
mux_result => r_pipeline(bits_l+1),
cin => c_red_l(1),
cout => c_red_l(2)
-- r =>
);
--cin_red_h <= p_sel(1) and (not p_sel(0));
cin_red_h <= c_red_l(0) when p_sel(0) = '1' else '1';
reduction_adder_h: adder_n
generic map( width => bits_h,
block_width => stage_width
)
port map( core_clk => core_clk,
a => m_inv((n-1) downto bits_l),
b => r_pipeline((n-1) downto bits_l),
cin => cin_red_h,
cout => c_red_h(0),
s => r_red((n-1) downto bits_l)
);
reduction_adder_h_a: cell_1b_adder
port map(a => '1',
mux_result => r_pipeline(n),
cin => c_red_h(0),
cout => c_red_h(1)
);
reduction_adder_h_b: cell_1b_adder
port map(a => '1',
mux_result => r_pipeline(n+1),
cin => c_red_h(1),
cout => c_red_h(2)
);
reduction_adder_h : adder_n
generic map(
width => bits_h,
block_width => stage_width
)
port map(
core_clk => core_clk,
a => m_inv((n-1) downto bits_l),
b => r_pipeline((n-1) downto bits_l),
cin => cin_red_h,
cout => c_red_h(0),
s => r_red((n-1) downto bits_l)
);
 
reduction_adder_h_a : cell_1b_adder
port map(
a => '1',
mux_result => r_pipeline(n),
cin => c_red_h(0),
cout => c_red_h(1)
);
 
reduction_adder_h_b : cell_1b_adder
port map(
a => '1',
mux_result => r_pipeline(n+1),
cin => c_red_h(1),
cout => c_red_h(2)
);
 
r_sel <= (c_red_h(2) and p_sel(1)) or (c_red_l(2) and (p_sel(0) and (not p_sel(1))));
r_i <= r_red when r_sel = '1' else r_pipeline((n-1) downto 0);
/mod_sim_exp_pkg.vhd
0,0 → 1,436
 
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_unsigned.all;
 
package mod_sim_exp_pkg is
component adder_n is
generic (
width : integer := 1536;
block_width : integer := 8
);
port (
core_clk : in std_logic;
a : in std_logic_vector((width-1) downto 0);
b : in std_logic_vector((width-1) downto 0);
cin : in std_logic;
cout : out std_logic;
s : out std_logic_vector((width-1) downto 0)
);
end component adder_n;
component adder_block is
generic (
width : integer := 32
);
port (
core_clk : in std_logic;
a : in std_logic_vector((width-1) downto 0);
b : in std_logic_vector((width-1) downto 0);
cin : in std_logic;
cout : out std_logic;
s : out std_logic_vector((width-1) downto 0)
);
end component adder_block;
component autorun_cntrl is
port (
clk : in std_logic;
reset : in std_logic;
start : in std_logic;
done : out std_logic;
op_sel : out std_logic_vector (1 downto 0);
start_multiplier : out std_logic;
multiplier_done : in std_logic;
read_buffer : out std_logic;
buffer_din : in std_logic_vector (31 downto 0);
buffer_empty : in std_logic
);
end component autorun_cntrl;
component cell_1b_adder is
port (
a : in std_logic;
mux_result : in std_logic;
cin : in std_logic;
cout : out std_logic;
r : out std_logic
);
end component cell_1b_adder;
component cell_1b_mux is
port (
my : in std_logic;
y : in std_logic;
m : in std_logic;
x : in std_logic;
q : in std_logic;
result : out std_logic
);
end component cell_1b_mux;
component cell_1b is
port (
my : in std_logic;
y : in std_logic;
m : in std_logic;
x : in std_logic;
q : in std_logic;
a : in std_logic;
cin : in std_logic;
cout : out std_logic;
r : out std_logic
);
end component cell_1b;
component counter_sync is
generic(
max_value : integer := 1024
);
port(
reset_value : in integer;
core_clk : in std_logic;
ce : in std_logic;
reset : in std_logic;
overflow : out std_logic
);
end component counter_sync;
component d_flip_flop is
port(
core_clk : in std_logic;
reset : in std_logic;
din : in std_logic;
dout : out std_logic
);
end component d_flip_flop;
component fifo_primitive is
port (
clk : in std_logic;
din : in std_logic_vector (31 downto 0);
dout : out std_logic_vector (31 downto 0);
empty : out std_logic;
full : out std_logic;
push : in std_logic;
pop : in std_logic;
reset : in std_logic;
nopop : out std_logic;
nopush : out std_logic
);
end component fifo_primitive;
component first_stage is
generic(
width : integer := 16 -- must be the same as width of the standard stage
);
port(
core_clk : in std_logic;
my : in std_logic_vector((width) downto 0);
y : in std_logic_vector((width) downto 0);
m : in std_logic_vector((width) downto 0);
xin : in std_logic;
xout : out std_logic;
qout : out std_logic;
a_msb : in std_logic;
cout : out std_logic;
start : in std_logic;
reset : in std_logic;
done : out std_logic;
r : out std_logic_vector((width-1) downto 0)
);
end component first_stage;
component last_stage is
generic(
width : integer := 16 -- must be the same as width of the standard stage
);
port(
core_clk : in std_logic;
my : in std_logic_vector((width-1) downto 0);
y : in std_logic_vector((width-2) downto 0);
m : in std_logic_vector((width-2) downto 0);
xin : in std_logic;
qin : in std_logic;
cin : in std_logic;
start : in std_logic;
reset : in std_logic;
r : out std_logic_vector((width+1) downto 0)
);
end component last_stage;
component modulus_ram is
port(
clk : in std_logic;
modulus_addr : in std_logic_vector(5 downto 0);
write_modulus : in std_logic;
modulus_in : in std_logic_vector(31 downto 0);
modulus_out : out std_logic_vector(1535 downto 0)
);
end component modulus_ram;
component mont_ctrl is
port (
clk : in std_logic;
reset : in std_logic;
-- bus side
start : in std_logic;
x_sel_single : in std_logic_vector(1 downto 0);
y_sel_single : in std_logic_vector(1 downto 0);
run_auto : in std_logic;
op_buffer_empty : in std_logic;
op_sel_buffer : in std_logic_vector(31 downto 0);
read_buffer : out std_logic;
buffer_noread : in std_logic;
done : out std_logic;
calc_time : out std_logic;
-- multiplier side
op_sel : out std_logic_vector(1 downto 0);
load_x : out std_logic;
load_result : out std_logic;
start_multiplier : out std_logic;
multiplier_ready : in std_logic
);
end component mont_ctrl;
component mont_mult_sys_pipeline is
generic (
n : integer := 1536;
nr_stages : integer := 96; --(divides n, bits_low & (n-bits_low))
stages_low : integer := 32
);
port (
core_clk : in std_logic;
xy : in std_logic_vector((n-1) downto 0);
m : in std_logic_vector((n-1) downto 0);
r : out std_logic_vector((n-1) downto 0);
start : in std_logic;
reset : in std_logic;
p_sel : in std_logic_vector(1 downto 0);
load_x : in std_logic;
ready : out std_logic
);
end component mont_mult_sys_pipeline;
component multiplier_core is
port(
clk : in std_logic;
reset : in std_logic;
-- operand memory interface (plb shared memory)
write_enable : in std_logic;
data_in : in std_logic_vector (31 downto 0);
rw_address : in std_logic_vector (8 downto 0);
data_out : out std_logic_vector (31 downto 0);
collision : out std_logic;
-- op_sel fifo interface
fifo_din : in std_logic_vector (31 downto 0);
fifo_push : in std_logic;
fifo_full : out std_logic;
fifo_nopush : out std_logic;
-- ctrl signals
start : in std_logic;
run_auto : in std_logic;
ready : out std_logic;
x_sel_single : in std_logic_vector (1 downto 0);
y_sel_single : in std_logic_vector (1 downto 0);
dest_op_single : in std_logic_vector (1 downto 0);
p_sel : in std_logic_vector (1 downto 0);
calc_time : out std_logic
);
end component multiplier_core;
component operand_dp is
port (
clka : in std_logic;
wea : in std_logic_vector(0 downto 0);
addra : in std_logic_vector(5 downto 0);
dina : in std_logic_vector(31 downto 0);
douta : out std_logic_vector(511 downto 0);
clkb : in std_logic;
web : in std_logic_vector(0 downto 0);
addrb : in std_logic_vector(5 downto 0);
dinb : in std_logic_vector(511 downto 0);
doutb : out std_logic_vector(31 downto 0)
);
end component operand_dp;
component operand_mem is
generic(n : integer := 1536
);
port(
-- data interface (plb side)
data_in : in std_logic_vector(31 downto 0);
data_out : out std_logic_vector(31 downto 0);
rw_address : in std_logic_vector(8 downto 0);
-- address structure:
-- bit: 8 -> '1': modulus
-- '0': operands
-- bits: 7-6 -> operand_in_sel in case of bit 8 = '0'
-- don't care in case of modulus
-- bits: 5-0 -> modulus_addr / operand_addr resp.
-- operand interface (multiplier side)
op_sel : in std_logic_vector(1 downto 0);
xy_out : out std_logic_vector(1535 downto 0);
m : out std_logic_vector(1535 downto 0);
result_in : in std_logic_vector(1535 downto 0);
-- control signals
load_op : in std_logic;
load_m : in std_logic;
load_result : in std_logic;
result_dest_op : in std_logic_vector(1 downto 0);
collision : out std_logic;
-- system clock
clk : in std_logic
);
end component operand_mem;
component operand_ram is
port( -- write_operand_ack voorzien?
-- global ports
clk : in std_logic;
collision : out std_logic;
-- bus side connections (32-bit serial)
operand_addr : in std_logic_vector(5 downto 0);
operand_in : in std_logic_vector(31 downto 0);
operand_in_sel : in std_logic_vector(1 downto 0);
result_out : out std_logic_vector(31 downto 0);
write_operand : in std_logic;
-- multiplier side connections (1536 bit parallel)
result_dest_op : in std_logic_vector(1 downto 0);
operand_out : out std_logic_vector(1535 downto 0);
operand_out_sel : in std_logic_vector(1 downto 0); -- controlled by bus side
write_result : in std_logic;
result_in : in std_logic_vector(1535 downto 0)
);
end component operand_ram;
component operands_sp is
port (
clka : in std_logic;
wea : in std_logic_vector(0 downto 0);
addra : in std_logic_vector(4 downto 0);
dina : in std_logic_vector(31 downto 0);
douta : out std_logic_vector(511 downto 0)
);
end component operands_sp;
component register_1b is
port(
core_clk : in std_logic;
ce : in std_logic;
reset : in std_logic;
din : in std_logic;
dout : out std_logic
);
end component register_1b;
component register_n is
generic(
n : integer := 4
);
port(
core_clk : in std_logic;
ce : in std_logic;
reset : in std_logic;
din : in std_logic_vector((n-1) downto 0);
dout : out std_logic_vector((n-1) downto 0)
);
end component register_n;
component standard_cell_block is
generic (
width : integer := 16
);
port (
my : in std_logic_vector((width-1) downto 0);
y : in std_logic_vector((width-1) downto 0);
m : in std_logic_vector((width-1) downto 0);
x : in std_logic;
q : in std_logic;
a : in std_logic_vector((width-1) downto 0);
cin : in std_logic;
cout : out std_logic;
r : out std_logic_vector((width-1) downto 0)
);
end component standard_cell_block;
component standard_stage is
generic(
width : integer := 32
);
port(
core_clk : in std_logic;
my : in std_logic_vector((width-1) downto 0);
y : in std_logic_vector((width-1) downto 0);
m : in std_logic_vector((width-1) downto 0);
xin : in std_logic;
qin : in std_logic;
xout : out std_logic;
qout : out std_logic;
a_msb : in std_logic;
cin : in std_logic;
cout : out std_logic;
start : in std_logic;
reset : in std_logic;
done : out std_logic;
r : out std_logic_vector((width-1) downto 0)
);
end component standard_stage;
component stepping_logic is
generic(
n : integer := 1536; -- max nr of steps required to complete a multiplication
t : integer := 192 -- total nr of steps in the pipeline
);
port(
core_clk : in std_logic;
start : in std_logic;
reset : in std_logic;
t_sel : in integer range 0 to t; -- nr of stages in the pipeline piece
n_sel : in integer range 0 to n; -- nr of steps required for a complete multiplication
start_first_stage : out std_logic;
stepping_done : out std_logic
);
end component stepping_logic;
component systolic_pipeline is
generic(
n : integer := 1536; -- width of the operands (# bits)
t : integer := 192; -- number of stages (divider of n) >= 2
tl : integer := 64 -- best take t = sqrt(n)
);
port(
core_clk : in std_logic;
my : in std_logic_vector((n) downto 0);
y : in std_logic_vector((n-1) downto 0);
m : in std_logic_vector((n-1) downto 0);
xi : in std_logic;
start : in std_logic;
reset : in std_logic;
p_sel : in std_logic_vector(1 downto 0); -- select which piece of the multiplier will be used
ready : out std_logic;
next_x : out std_logic;
r : out std_logic_vector((n+1) downto 0)
);
end component systolic_pipeline;
component x_shift_reg is
generic(
n : integer := 1536;
t : integer := 48;
tl : integer := 16
);
port(
clk : in std_logic;
reset : in std_logic;
x_in : in std_logic_vector((n-1) downto 0);
load_x : in std_logic;
next_x : in std_logic;
p_sel : in std_logic_vector(1 downto 0);
x_i : out std_logic
);
end component x_shift_reg;
end package mod_sim_exp_pkg;
/stepping_logic.vhd
1,95 → 1,88
------------------------------------------------------------------------------------
--
-- Geoffrey Ottoy - DraMCo research group
--
-- Module Name: stepping_logic.vhd / entity stepping_logic
--
-- Last Modified: 23/01/2012
--
-- Description: stepping logic for the pipelined montgomery multiplier
--
--
-- Dependencies: counter_sync
--
-- Revision:
-- Revision 5.01 - defined integer range for t_sel and n_sel resulting in less LUTs
-- Revision 5.00 - made the reset value changeable in runtime
-- Revision 4.01 - Delayed ready pulse with 1 clk cylce. This delay is necessary
-- for the reduction to complete.
-- Revision 4.00 - Changed design to fit new pipeline-architecture
-- (i.e. 1 clock cycle / stage)
-- Revision 3.00 - Removed second delay on next_x
-- Revision 2.00 - Changed operation to give a pulse on stepping_done when pipeline
-- operation has finished
-- Revision 1.00 - Architecture
-- Revision 0.01 - File Created
--
--
------------------------------------------------------------------------------------
--
-- NOTICE:
--
-- Copyright DraMCo research group. 2011. This code may be contain portions patented
-- by other third parties!
--
------------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;
----------------------------------------------------------------------
---- stepping_logic ----
---- ----
---- This file is part of the ----
---- Modular Simultaneous Exponentiation Core project ----
---- http://www.opencores.org/cores/mod_sim_exp/ ----
---- ----
---- Description ----
---- stepping logic for the pipelined montgomery multiplier ----
---- ----
---- Dependencies: ----
---- - d_flip_flop ----
---- - counter_sync ----
---- ----
---- Authors: ----
---- - Geoffrey Ottoy, DraMCo research group ----
---- - Jonas De Craene, JonasDC@opencores.org ----
---- ----
----------------------------------------------------------------------
---- ----
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
---- ----
---- This source file may be used and distributed without ----
---- restriction provided that this copyright statement is not ----
---- removed from the file and that any derivative work contains ----
---- the original copyright notice and the associated disclaimer. ----
---- ----
---- This source file is free software; you can redistribute it ----
---- and/or modify it under the terms of the GNU Lesser General ----
---- Public License as published by the Free Software Foundation; ----
---- either version 2.1 of the License, or (at your option) any ----
---- later version. ----
---- ----
---- This source is distributed in the hope that it will be ----
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
---- PURPOSE. See the GNU Lesser General Public License for more ----
---- details. ----
---- ----
---- You should have received a copy of the GNU Lesser General ----
---- Public License along with this source; if not, download it ----
---- from http://www.opencores.org/lgpl.shtml ----
---- ----
----------------------------------------------------------------------
 
---- Uncomment the following library declaration if instantiating
---- any Xilinx primitives in this code.
--library UNISIM;
--use UNISIM.VComponents.all;
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;
 
library mod_sim_exp;
use mod_sim_exp.mod_sim_exp_pkg.all;
 
 
entity stepping_logic is
generic( n : integer := 1536; -- max nr of steps required to complete a multiplication
t : integer := 192 -- total nr of steps in the pipeline
);
port( core_clk : in STD_LOGIC;
start : in STD_LOGIC;
reset : in STD_LOGIC;
t_sel : in integer range 0 to t; -- nr of stages in the pipeline piece
n_sel : in integer range 0 to n; -- nr of steps required for a complete multiplication
start_first_stage : out STD_LOGIC;
stepping_done : out STD_LOGIC
);
generic(
n : integer := 1536; -- max nr of steps required to complete a multiplication
t : integer := 192 -- total nr of steps in the pipeline
);
port(
core_clk : in std_logic;
start : in std_logic;
reset : in std_logic;
t_sel : in integer range 0 to t; -- nr of stages in the pipeline piece
n_sel : in integer range 0 to n; -- nr of steps required for a complete multiplication
start_first_stage : out std_logic;
stepping_done : out std_logic
);
end stepping_logic;
 
 
architecture Behavioral of stepping_logic is
component d_flip_flop
port(core_clk : in STD_LOGIC;
reset : in STD_LOGIC;
din : in STD_LOGIC;
dout : out STD_LOGIC
);
end component;
component counter_sync
generic(max_value : integer := 16
);
port(reset_value : in integer;
core_clk : in STD_LOGIC;
ce : in STD_LOGIC;
reset : in STD_LOGIC;
overflow : out STD_LOGIC
);
end component;
signal laststeps_in_i : std_logic := '0';
signal laststeps_out_i : std_logic := '0';
signal start_stop_in_i : std_logic := '0';
signal start_stop_out_i : std_logic := '0';
signal steps_in_i : std_logic := '0';
signal steps_out_i : std_logic := '0';
signal substeps_in_i : std_logic := '0';
signal substeps_out_i : std_logic := '0';
signal done_reg_in_i : std_logic := '0';
signal done_reg_out_i : std_logic := '0';
signal start_first_stage_i : std_logic := '0';
signal start_i : std_logic := '0';
 
signal laststeps_in_i : std_logic := '0';
signal laststeps_out_i : std_logic := '0';
signal start_stop_in_i : std_logic := '0';
signal start_stop_out_i : std_logic := '0';
signal steps_in_i : std_logic := '0';
signal steps_out_i : std_logic := '0';
signal substeps_in_i : std_logic := '0';
signal substeps_out_i : std_logic := '0';
signal done_reg_in_i : std_logic := '0';
signal done_reg_out_i : std_logic := '0';
signal start_first_stage_i : std_logic := '0';
signal start_i : std_logic := '0';
 
begin
start_i <= start;
 
106,51 → 99,59
start_first_stage_i <= start_i or steps_in_i;
--start_first_stage_i <= steps_in_i;
done_reg: d_flip_flop
port map(core_clk => core_clk,
reset => reset,
din => done_reg_in_i,
dout => done_reg_out_i
);
start_stop_reg: d_flip_flop
port map(core_clk => core_clk,
reset => reset,
din => start_stop_in_i,
dout => start_stop_out_i
);
-- for counting the last steps
laststeps_counter: counter_sync
generic map(max_value => t
)
port map(reset_value => t_sel,
core_clk => core_clk,
ce => laststeps_in_i,
reset => reset,
overflow => laststeps_out_i
);
-- counter for keeping track of the steps
steps_counter: counter_sync
generic map(max_value => n
)
port map(reset_value => (n_sel),
core_clk => core_clk,
ce => steps_in_i,
reset => reset,
overflow => steps_out_i
);
-- makes sure we don't start too early with a new step
substeps_counter: counter_sync
generic map(max_value => 2
)
port map(reset_value => 2,
core_clk => core_clk,
ce => substeps_in_i,
reset => reset,
overflow => substeps_out_i
);
done_reg : d_flip_flop
port map(
core_clk => core_clk,
reset => reset,
din => done_reg_in_i,
dout => done_reg_out_i
);
 
start_stop_reg : d_flip_flop
port map(
core_clk => core_clk,
reset => reset,
din => start_stop_in_i,
dout => start_stop_out_i
);
 
-- for counting the last steps
laststeps_counter : counter_sync
generic map(
max_value => t
)
port map(
reset_value => t_sel,
core_clk => core_clk,
ce => laststeps_in_i,
reset => reset,
overflow => laststeps_out_i
);
 
-- counter for keeping track of the steps
steps_counter : counter_sync
generic map(
max_value => n
)
port map(
reset_value => (n_sel),
core_clk => core_clk,
ce => steps_in_i,
reset => reset,
overflow => steps_out_i
);
 
-- makes sure we don't start too early with a new step
substeps_counter : counter_sync
generic map(
max_value => 2
)
port map(
reset_value => 2,
core_clk => core_clk,
ce => substeps_in_i,
reset => reset,
overflow => substeps_out_i
);
 
end Behavioral;
/register_1b.vhd
1,48 → 1,68
------------------------------------------------------------------------------------
--
-- Geoffrey Ottoy - DraMCo research group
--
-- Module Name: register_1b.vhd / entity register_1b
--
-- Last Modified: 24/11/2011
--
-- Description: 1 bit register
--
--
-- Dependencies: LDCE
--
-- Revision:
-- Revision 1.00 - Architecture
-- Revision 0.01 - File Created
--
--
------------------------------------------------------------------------------------
--
-- NOTICE:
--
-- Copyright DraMCo research group. 2011. This code may be contain portions patented
-- by other third parties!
--
------------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;
----------------------------------------------------------------------
---- register_1b ----
---- ----
---- This file is part of the ----
---- Modular Simultaneous Exponentiation Core project ----
---- http://www.opencores.org/cores/mod_sim_exp/ ----
---- ----
---- Description ----
---- 1 bit register ----
---- used in montgommery multiplier systolic array stages ----
---- ----
---- Dependencies: none ----
---- ----
---- Authors: ----
---- - Geoffrey Ottoy, DraMCo research group ----
---- - Jonas De Craene, JonasDC@opencores.org ----
---- ----
----------------------------------------------------------------------
---- ----
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
---- ----
---- This source file may be used and distributed without ----
---- restriction provided that this copyright statement is not ----
---- removed from the file and that any derivative work contains ----
---- the original copyright notice and the associated disclaimer. ----
---- ----
---- This source file is free software; you can redistribute it ----
---- and/or modify it under the terms of the GNU Lesser General ----
---- Public License as published by the Free Software Foundation; ----
---- either version 2.1 of the License, or (at your option) any ----
---- later version. ----
---- ----
---- This source is distributed in the hope that it will be ----
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
---- PURPOSE. See the GNU Lesser General Public License for more ----
---- details. ----
---- ----
---- You should have received a copy of the GNU Lesser General ----
---- Public License along with this source; if not, download it ----
---- from http://www.opencores.org/lgpl.shtml ----
---- ----
----------------------------------------------------------------------
 
---- Uncomment the following library declaration if instantiating
---- any Xilinx primitives in this code.
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;
 
-- Xilinx primitives used
library UNISIM;
use UNISIM.VComponents.all;
 
 
entity register_1b is
port(core_clk : in STD_LOGIC;
ce : in STD_LOGIC;
reset : in STD_LOGIC;
din : in STD_LOGIC;
dout : out STD_LOGIC
);
port(
core_clk : in std_logic;
ce : in std_logic;
reset : in std_logic;
din : in std_logic;
dout : out std_logic
);
end register_1b;
 
 
architecture Structural of register_1b is
signal dout_i : std_logic;
begin
49,15 → 69,16
dout <= dout_i;
FDCE_inst : FDCE
generic map (
INIT => '0') -- Initial value of latch ('0' or '1')
port map (
Q => dout_i, -- Data output
CLR => reset, -- Asynchronous clear/reset input
D => din, -- Data input
C => core_clk, -- Gate input
CE => ce -- Gate enable input
);
FDCE_inst : FDCE
generic map (
INIT => '0' -- Initial value of latch ('0' or '1')
)
port map (
Q => dout_i, -- Data output
CLR => reset, -- Asynchronous clear/reset input
D => din, -- Data input
C => core_clk, -- Gate input
CE => ce -- Gate enable input
);
end Structural;
/standard_cell_block.vhd
1,84 → 1,95
------------------------------------------------------------------------------------
--
-- Geoffrey Ottoy - DraMCo research group
--
-- Module Name: standard_cell_block.vhd / entity standard_cell_block
--
-- Last Modified: 14/11/2011
--
-- Description: cell_block for use in the montgommery multiplier systolic array
--
--
-- Dependencies: none
--
-- Revision:
-- Revision 1.00 - Architecture
-- Revision 0.01 - File Created
--
--
------------------------------------------------------------------------------------
--
-- NOTICE:
--
-- Copyright DraMCo research group. 2011. This code may be contain portions patented
-- by other third parties!
--
------------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;
----------------------------------------------------------------------
---- standard_cell_block ----
---- ----
---- This file is part of the ----
---- Modular Simultaneous Exponentiation Core project ----
---- http://www.opencores.org/cores/mod_sim_exp/ ----
---- ----
---- Description ----
---- a block of [width] cell_1b cells for use in the ----
---- montgommery multiplier systolic array ----
---- ----
---- Dependencies: ----
---- - cell_1b ----
---- ----
---- Authors: ----
---- - Geoffrey Ottoy, DraMCo research group ----
---- - Jonas De Craene, JonasDC@opencores.org ----
---- ----
----------------------------------------------------------------------
---- ----
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
---- ----
---- This source file may be used and distributed without ----
---- restriction provided that this copyright statement is not ----
---- removed from the file and that any derivative work contains ----
---- the original copyright notice and the associated disclaimer. ----
---- ----
---- This source file is free software; you can redistribute it ----
---- and/or modify it under the terms of the GNU Lesser General ----
---- Public License as published by the Free Software Foundation; ----
---- either version 2.1 of the License, or (at your option) any ----
---- later version. ----
---- ----
---- This source is distributed in the hope that it will be ----
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
---- PURPOSE. See the GNU Lesser General Public License for more ----
---- details. ----
---- ----
---- You should have received a copy of the GNU Lesser General ----
---- Public License along with this source; if not, download it ----
---- from http://www.opencores.org/lgpl.shtml ----
---- ----
----------------------------------------------------------------------
 
---- Uncomment the following library declaration if instantiating
---- any Xilinx primitives in this code.
--library UNISIM;
--use UNISIM.VComponents.all;
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;
 
library mod_sim_exp;
use mod_sim_exp.mod_sim_exp_pkg.all;
 
 
entity standard_cell_block is
generic ( width : integer := 16
);
Port ( my : in STD_LOGIC_VECTOR((width-1) downto 0);
y : in STD_LOGIC_VECTOR((width-1) downto 0);
m : in STD_LOGIC_VECTOR((width-1) downto 0);
x : in STD_LOGIC;
q : in STD_LOGIC;
a : in STD_LOGIC_VECTOR((width-1) downto 0);
cin : in STD_LOGIC;
cout : out STD_LOGIC;
r : out STD_LOGIC_VECTOR((width-1) downto 0));
generic (
width : integer := 16
);
port (
my : in std_logic_vector((width-1) downto 0);
y : in std_logic_vector((width-1) downto 0);
m : in std_logic_vector((width-1) downto 0);
x : in std_logic;
q : in std_logic;
a : in std_logic_vector((width-1) downto 0);
cin : in std_logic;
cout : out std_logic;
r : out std_logic_vector((width-1) downto 0)
);
end standard_cell_block;
 
 
architecture Structural of standard_cell_block is
component cell_1b
Port ( my : in STD_LOGIC;
y : in STD_LOGIC;
m : in STD_LOGIC;
x : in STD_LOGIC;
q : in STD_LOGIC;
a : in STD_LOGIC;
cin : in STD_LOGIC;
cout : out STD_LOGIC;
r : out STD_LOGIC);
end component;
 
signal carry : std_logic_vector(width downto 0);
begin
carry(0) <= cin;
cell_block: for i in 0 to (width-1) generate
cells: cell_1b
port map( my => my(i),
y => y(i),
m => m(i),
x => x,
q => q,
a => a(i),
cin => carry(i),
cout => carry(i+1),
r => r(i)
);
end generate;
cell_block : for i in 0 to (width-1) generate
cells : cell_1b
port map(
my => my(i),
y => y(i),
m => m(i),
x => x,
q => q,
a => a(i),
cin => carry(i),
cout => carry(i+1),
r => r(i)
);
end generate;
 
cout <= carry(width);
end Structural;
end Structural;
/first_stage.vhd
1,152 → 1,104
------------------------------------------------------------------------------------
--
-- Geoffrey Ottoy - DraMCo research group
--
-- Module Name: first_stage.vhd / entity first_stage
--
-- Last Modified: 24/11/2011
--
-- Description: first stage for use in the montgommery multiplier systolic
-- array pipeline
--
--
-- Dependencies: standard_cell_block
-- cell_mux_1b
-- register_n,
-- register_1b,
-- d_flip_flop
--
-- Revision:
-- Revision 4.00 - Removed input registers and used start signal as load_out_regs
-- Revision 3.00 - Removed "a" input and replaced with "a_msb" (which is the only one
-- that matters.
-- Revision 2.02 - removed "ready" output signal
-- Revision 2.01 - replaced the behavioral description of the registers with a
-- component instantiation
-- Revision 2.00 - added register to store input value xin (because this
-- can change during operation)
-- Revision 1.03 - added done pulse
-- Revision 1.02 - appended "_i" to name of all internal signals
-- Revision 1.01 - ready is '1' after reset
-- Revision 1.00 - Architecture
-- Revision 0.01 - File Created
--
--
------------------------------------------------------------------------------------
--
-- NOTICE:
--
-- Copyright DraMCo research group. 2011. This code may be contain portions patented
-- by other third parties!
--
------------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;
----------------------------------------------------------------------
---- first_stage ----
---- ----
---- This file is part of the ----
---- Modular Simultaneous Exponentiation Core project ----
---- http://www.opencores.org/cores/mod_sim_exp/ ----
---- ----
---- Description ----
---- first stage for use in the montgommery multiplier ----
---- systolic array pipeline ----
---- ----
---- Dependencies: ----
---- - standard_cell_block ----
---- - d_flip_flop ----
---- - register_n ----
---- - register_1b ----
---- - cell_1b_mux ----
---- ----
---- Authors: ----
---- - Geoffrey Ottoy, DraMCo research group ----
---- - Jonas De Craene, JonasDC@opencores.org ----
---- ----
----------------------------------------------------------------------
---- ----
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
---- ----
---- This source file may be used and distributed without ----
---- restriction provided that this copyright statement is not ----
---- removed from the file and that any derivative work contains ----
---- the original copyright notice and the associated disclaimer. ----
---- ----
---- This source file is free software; you can redistribute it ----
---- and/or modify it under the terms of the GNU Lesser General ----
---- Public License as published by the Free Software Foundation; ----
---- either version 2.1 of the License, or (at your option) any ----
---- later version. ----
---- ----
---- This source is distributed in the hope that it will be ----
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
---- PURPOSE. See the GNU Lesser General Public License for more ----
---- details. ----
---- ----
---- You should have received a copy of the GNU Lesser General ----
---- Public License along with this source; if not, download it ----
---- from http://www.opencores.org/lgpl.shtml ----
---- ----
----------------------------------------------------------------------
 
---- Uncomment the following library declaration if instantiating
---- any Xilinx primitives in this code.
--library UNISIM;
--use UNISIM.VComponents.all;
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;
 
library mod_sim_exp;
use mod_sim_exp.mod_sim_exp_pkg.all;
 
 
entity first_stage is
generic(width : integer := 16 -- must be the same as width of the standard stage
);
port(core_clk : in STD_LOGIC;
my : in STD_LOGIC_VECTOR((width) downto 0);
y : in STD_LOGIC_VECTOR((width) downto 0);
m : in STD_LOGIC_VECTOR((width) downto 0);
xin : in STD_LOGIC;
xout : out STD_LOGIC;
qout : out STD_LOGIC;
a_msb : in STD_LOGIC;
cout : out STD_LOGIC;
start : in STD_LOGIC;
reset : in STD_LOGIC;
-- ready : out STD_LOGIC;
done : out STD_LOGIC;
r : out STD_LOGIC_VECTOR((width-1) downto 0)
);
generic(
width : integer := 16 -- must be the same as width of the standard stage
);
port(
core_clk : in std_logic;
my : in std_logic_vector((width) downto 0);
y : in std_logic_vector((width) downto 0);
m : in std_logic_vector((width) downto 0);
xin : in std_logic;
xout : out std_logic;
qout : out std_logic;
a_msb : in std_logic;
cout : out std_logic;
start : in std_logic;
reset : in std_logic;
done : out std_logic;
r : out std_logic_vector((width-1) downto 0)
);
end first_stage;
 
 
architecture Structural of first_stage is
-- input
signal xin_i : std_logic;
signal a_msb_i : std_logic;
 
component d_flip_flop
port(core_clk : in STD_LOGIC;
reset : in STD_LOGIC;
din : in STD_LOGIC;
dout : out STD_LOGIC
);
end component;
-- output
signal cout_i : std_logic;
signal r_i : std_logic_vector((width-1) downto 0);
signal cout_reg_i : std_logic;
signal xout_reg_i : std_logic;
signal qout_reg_i : std_logic;
signal r_reg_i : std_logic_vector((width-1) downto 0);
 
component register_1b
port(core_clk : in STD_LOGIC;
ce : in STD_LOGIC;
reset : in STD_LOGIC;
din : in STD_LOGIC;
dout : out STD_LOGIC
);
end component;
component register_n
generic( n : integer := 4
);
port(core_clk : in STD_LOGIC;
ce : in STD_LOGIC;
reset : in STD_LOGIC;
din : in STD_LOGIC_VECTOR((n-1) downto 0);
dout : out STD_LOGIC_VECTOR((n-1) downto 0)
);
end component;
-- interconnection
signal q_i : std_logic;
signal c_i : std_logic;
signal first_res_i : std_logic;
signal a_i : std_logic_vector((width) downto 0);
 
component standard_cell_block
generic ( width : integer := 32
);
Port ( my : in STD_LOGIC_VECTOR((width-1) downto 0);
y : in STD_LOGIC_VECTOR((width-1) downto 0);
m : in STD_LOGIC_VECTOR((width-1) downto 0);
x : in STD_LOGIC;
q : in STD_LOGIC;
a : in STD_LOGIC_VECTOR((width-1) downto 0);
cin : in STD_LOGIC;
cout : out STD_LOGIC;
r : out STD_LOGIC_VECTOR((width-1) downto 0));
end component;
 
component cell_1b_mux
port ( my : in STD_LOGIC;
y : in STD_LOGIC;
m : in STD_LOGIC;
x : in STD_LOGIC;
q : in STD_LOGIC;
result : out STD_LOGIC);
end component;
 
-- input
signal xin_i : std_logic;
signal a_msb_i : std_logic;
-- signal xin_reg_i : std_logic;
-- signal a_msb_reg_i : std_logic;
 
-- output
signal cout_i : std_logic;
signal r_i : std_logic_vector((width-1) downto 0);
signal cout_reg_i : std_logic;
signal xout_reg_i : std_logic;
signal qout_reg_i : std_logic;
signal r_reg_i : std_logic_vector((width-1) downto 0);
 
-- interconnection
signal q_i : std_logic;
signal c_i : std_logic;
signal first_res_i : std_logic;
signal a_i : std_logic_vector((width) downto 0);
-- control signals
signal done_i : std_logic := '1';
--signal ready_del_i : std_logic := '1';
-- signal load_out_regs_i : std_logic;
-- control signals
signal done_i : std_logic := '1';
begin
-- map inputs to internal signals
159,113 → 111,86
cout <= cout_reg_i;
qout <= qout_reg_i;
xout <= xout_reg_i;
-- two posibilities:
--done <= ready_i and (not ready_del_i); -- slow
--done <= not ready_i; -- faster but not sure if it will work (DONE_PROC can be omitted)
-- a_i <= a_msb_reg_i & r_reg_i;
a_i <= a_msb_i & r_reg_i;
 
-- -- input registers
-- A_REG: register_1b
-- port map(core_clk => core_clk,
-- ce => start,
-- reset => reset,
-- din => a_msb_i,
-- dout => a_msb_reg_i
-- );
--
-- XIN_REG: register_1b
-- port map(core_clk => core_clk,
-- ce => start,
-- reset => reset,
-- din => xin_i,
-- dout => xin_reg_i
-- );
-- compute first q_i and carry
-- q_i <= a_i(0) xor (y(0) and xin_reg_i);
q_i <= a_i(0) xor (y(0) and xin_i);
c_i <= a_i(0) and first_res_i;
first_cell: cell_1b_mux
port map( my => my(0),
y => y(0),
m => m(0),
-- x => xin_reg_i,
x => xin_i,
q => q_i,
result => first_res_i
);
cell_block: standard_cell_block
generic map( width => width
)
port map( my => my(width downto 1),
y => y(width downto 1),
m => m(width downto 1),
-- x => xin_reg_i,
x => xin_i,
q => q_i,
a => a_i(width downto 1),
cin => c_i,
cout => cout_i,
r => r_i((width-1) downto 0)
);
-- delay_1_cycle: d_flip_flop
-- port map(core_clk => core_clk,
-- reset => reset,
-- din => start,
-- dout => load_out_regs_i
-- );
done_signal: d_flip_flop
port map(core_clk => core_clk,
reset => reset,
-- din => load_out_regs_i,
din => start,
dout => done_i
);
-- output registers
RESULT_REG: register_n
generic map( n => width
)
port map(core_clk => core_clk,
-- ce => load_out_regs_i,
ce => start,
reset => reset,
din => r_i,
dout => r_reg_i
);
XOUT_REG: register_1b
port map(core_clk => core_clk,
-- ce => load_out_regs_i,
ce => start,
reset => reset,
-- din => xin_reg_i,
din => xin_i,
dout => xout_reg_i
);
QOUT_REG: register_1b
port map(core_clk => core_clk,
-- ce => load_out_regs_i,
ce => start,
reset => reset,
din => q_i,
dout => qout_reg_i
);
COUT_REG: register_1b
port map(core_clk => core_clk,
-- ce => load_out_regs_i,
ce => start,
reset => reset,
din => cout_i,
dout => cout_reg_i
);
first_cell : cell_1b_mux
port map(
my => my(0),
y => y(0),
m => m(0),
x => xin_i,
q => q_i,
result => first_res_i
);
 
cell_block : standard_cell_block
generic map(
width => width
)
port map(
my => my(width downto 1),
y => y(width downto 1),
m => m(width downto 1),
x => xin_i,
q => q_i,
a => a_i(width downto 1),
cin => c_i,
cout => cout_i,
r => r_i((width-1) downto 0)
);
 
done_signal : d_flip_flop
port map(
core_clk => core_clk,
reset => reset,
din => start,
dout => done_i
);
 
-- output registers
RESULT_REG : register_n
generic map(
n => width
)
port map(
core_clk => core_clk,
ce => start,
reset => reset,
din => r_i,
dout => r_reg_i
);
 
XOUT_REG : register_1b
port map(
core_clk => core_clk,
ce => start,
reset => reset,
din => xin_i,
dout => xout_reg_i
);
 
QOUT_REG : register_1b
port map(
core_clk => core_clk,
ce => start,
reset => reset,
din => q_i,
dout => qout_reg_i
);
 
COUT_REG : register_1b
port map(
core_clk => core_clk,
ce => start,
reset => reset,
din => cout_i,
dout => cout_reg_i
);
 
 
end Structural;
/counter_sync.vhd
1,52 → 1,67
------------------------------------------------------------------------------------
--
-- Geoffrey Ottoy - DraMCo research group
--
-- Module Name: counter_sync.vhd / entity counter_sync
--
-- Last Modified: 23/01/2012
--
-- Description: counter with synchronous count enable. It generates an
-- overflow when max_value is reached
--
--
-- Dependencies: none
--
-- Revision:
-- Revision 2.00 - moved max_value from generic to port so it is changeable in runtime
-- Revision 1.00 - Architecture
-- Revision 0.01 - File Created
--
--
------------------------------------------------------------------------------------
--
-- NOTICE:
--
-- Copyright DraMCo research group. 2011. This code may be contain portions patented
-- by other third parties!
--
------------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;
----------------------------------------------------------------------
---- counter_sync ----
---- ----
---- This file is part of the ----
---- Modular Simultaneous Exponentiation Core project ----
---- http://www.opencores.org/cores/mod_sim_exp/ ----
---- ----
---- Description ----
---- counter with synchronous count enable. It generates an ----
---- overflow when max_value is reached ----
---- ----
---- Dependencies: none ----
---- ----
---- Authors: ----
---- - Geoffrey Ottoy, DraMCo research group ----
---- - Jonas De Craene, JonasDC@opencores.org ----
---- ----
----------------------------------------------------------------------
---- ----
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
---- ----
---- This source file may be used and distributed without ----
---- restriction provided that this copyright statement is not ----
---- removed from the file and that any derivative work contains ----
---- the original copyright notice and the associated disclaimer. ----
---- ----
---- This source file is free software; you can redistribute it ----
---- and/or modify it under the terms of the GNU Lesser General ----
---- Public License as published by the Free Software Foundation; ----
---- either version 2.1 of the License, or (at your option) any ----
---- later version. ----
---- ----
---- This source is distributed in the hope that it will be ----
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
---- PURPOSE. See the GNU Lesser General Public License for more ----
---- details. ----
---- ----
---- You should have received a copy of the GNU Lesser General ----
---- Public License along with this source; if not, download it ----
---- from http://www.opencores.org/lgpl.shtml ----
---- ----
----------------------------------------------------------------------
 
---- Uncomment the following library declaration if instantiating
---- any Xilinx primitives in this code.
--library UNISIM;
--use UNISIM.VComponents.all;
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;
 
 
entity counter_sync is
generic(max_value : integer := 1024
);
port(reset_value : in integer;
core_clk : in STD_LOGIC;
ce : in STD_LOGIC;
reset : in STD_LOGIC;
overflow : out STD_LOGIC
);
generic(
max_value : integer := 1024
);
port(
reset_value : in integer;
core_clk : in std_logic;
ce : in std_logic;
reset : in std_logic;
overflow : out std_logic
);
end counter_sync;
 
 
architecture Behavioral of counter_sync is
signal overflow_i : std_logic := '0';
/fifo_primitive.vhd
1,53 → 1,73
------------------------------------------------------------------------------------
--
-- Geoffrey Ottoy - DraMCo research group
--
-- Module Name: adder_n.vhd / entity adder_n
--
-- Last Modified: 04/04/2012
--
-- Description: 512x32-bit fifo
--
--
-- Dependencies: FIFO18E1 primitive
--
-- Revision:
-- Revision 1.00 - Architecture
-- Revision 0.01 - File Created
--
--
------------------------------------------------------------------------------------
--
-- NOTICE:
--
-- Copyright DraMCo research group. 2011. This code may be contain portions patented
-- by other third parties!
--
------------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;
----------------------------------------------------------------------
---- fifo_primitive ----
---- ----
---- This file is part of the ----
---- Modular Simultaneous Exponentiation Core project ----
---- http://www.opencores.org/cores/mod_sim_exp/ ----
---- ----
---- Description ----
---- 512 x 32 bit fifo ----
---- ----
---- Dependencies: ----
---- - FIFO18E1 (xilinx primitive) ----
---- ----
---- Authors: ----
---- - Geoffrey Ottoy, DraMCo research group ----
---- - Jonas De Craene, JonasDC@opencores.org ----
---- ----
----------------------------------------------------------------------
---- ----
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
---- ----
---- This source file may be used and distributed without ----
---- restriction provided that this copyright statement is not ----
---- removed from the file and that any derivative work contains ----
---- the original copyright notice and the associated disclaimer. ----
---- ----
---- This source file is free software; you can redistribute it ----
---- and/or modify it under the terms of the GNU Lesser General ----
---- Public License as published by the Free Software Foundation; ----
---- either version 2.1 of the License, or (at your option) any ----
---- later version. ----
---- ----
---- This source is distributed in the hope that it will be ----
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
---- PURPOSE. See the GNU Lesser General Public License for more ----
---- details. ----
---- ----
---- You should have received a copy of the GNU Lesser General ----
---- Public License along with this source; if not, download it ----
---- from http://www.opencores.org/lgpl.shtml ----
---- ----
----------------------------------------------------------------------
 
---- Uncomment the following library declaration if instantiating
---- any Xilinx primitives in this code.
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;
 
-- Xilinx primitives used in this code.
library UNISIM;
use UNISIM.VComponents.all;
 
 
entity fifo_primitive is
Port ( clk : in STD_LOGIC;
din : in STD_LOGIC_VECTOR (31 downto 0);
dout : out STD_LOGIC_VECTOR (31 downto 0);
empty : out STD_LOGIC;
full : out STD_LOGIC;
push : in STD_LOGIC;
pop : in STD_LOGIC;
reset : in STD_LOGIC;
nopop : out STD_LOGIC;
nopush : out STD_LOGIC
);
port (
clk : in std_logic;
din : in std_logic_vector (31 downto 0);
dout : out std_logic_vector (31 downto 0);
empty : out std_logic;
full : out std_logic;
push : in std_logic;
pop : in std_logic;
reset : in std_logic;
nopop : out std_logic;
nopush : out std_logic
);
end fifo_primitive;
 
 
architecture Behavioral of fifo_primitive is
signal rdcount : std_logic_vector(11 downto 0); -- debugging
signal wrcount : std_logic_vector(11 downto 0); -- debugging
90,7 → 110,7
DATA_WIDTH => 36, -- Sets data width to 4, 9, 18, or 36
DO_REG => 1, -- Enable output register (0 or 1) Must be 1 if EN_SYN = "FALSE"
EN_SYN => TRUE, -- Specifies FIFO as dual-clock ("FALSE") or Synchronous ("TRUE")
FIFO_MODE => "FIFO18_36", -- Sets mode to FIFO18 or FIFO18_36
FIFO_MODE => "FIFO18_36", -- Sets mode to FIFO18 or FIFO18_36
FIRST_WORD_FALL_THROUGH => FALSE, -- Sets the FIFO FWFT to "TRUE" or "FALSE"
INIT => X"000000000", -- Initial values on output port
SRVAL => X"000000000" -- Set/Reset value for output port
103,22 → 123,21
EMPTY => empty_i, -- 1-bit empty output flag
FULL => full_i, -- 1-bit full output flag
-- WRCOUNT, RDCOUNT: 12-bit (each) FIFO pointers
RDCOUNT => RDCOUNT, -- 12-bit read count output
WRCOUNT => WRCOUNT, -- 12-bit write count output
RDCOUNT => RDCOUNT, -- 12-bit read count output
WRCOUNT => WRCOUNT, -- 12-bit write count output
-- WRERR, RDERR: 1-bit (each) FIFO full or empty error
RDERR => rderr_i, -- 1-bit read error output
WRERR => wrerr_i, -- 1-bit write error
DI => din, -- 32-bit data input
DIP => "0000", -- 4-bit parity input
RDEN => pop_i, -- 1-bit read enable input
REGCE => '1', -- 1-bit clock enable input
RST => reset_i, -- 1-bit reset input
RSTREG => reset_i, -- 1-bit output register set/reset
DI => din, -- 32-bit data input
DIP => "0000", -- 4-bit parity input
RDEN => pop_i, -- 1-bit read enable input
REGCE => '1', -- 1-bit clock enable input
RST => reset_i, -- 1-bit reset input
RSTREG => reset_i, -- 1-bit output register set/reset
-- WRCLK, RDCLK: 1-bit (each) Clocks
RDCLK => clk, -- 1-bit read clock input
WRCLK => clk, -- 1-bit write clock input
RDCLK => clk, -- 1-bit read clock input
WRCLK => clk, -- 1-bit write clock input
WREN => push_i -- 1-bit write enable input
);
 
end Behavioral;
 
/operand_dp.vhd
1,31 → 1,75
--------------------------------------------------------------------------------
-- This file is owned and controlled by Xilinx and must be used --
-- solely for design, simulation, implementation and creation of --
-- design files limited to Xilinx devices or technologies. Use --
-- with non-Xilinx devices or technologies is expressly prohibited --
-- and immediately terminates your license. --
-- --
-- XILINX IS PROVIDING THIS DESIGN, CODE, OR INFORMATION "AS IS" --
-- SOLELY FOR USE IN DEVELOPING PROGRAMS AND SOLUTIONS FOR --
-- XILINX DEVICES. BY PROVIDING THIS DESIGN, CODE, OR INFORMATION --
-- AS ONE POSSIBLE IMPLEMENTATION OF THIS FEATURE, APPLICATION --
-- OR STANDARD, XILINX IS MAKING NO REPRESENTATION THAT THIS --
-- IMPLEMENTATION IS FREE FROM ANY CLAIMS OF INFRINGEMENT, --
-- AND YOU ARE RESPONSIBLE FOR OBTAINING ANY RIGHTS YOU MAY REQUIRE --
-- FOR YOUR IMPLEMENTATION. XILINX EXPRESSLY DISCLAIMS ANY --
-- WARRANTY WHATSOEVER WITH RESPECT TO THE ADEQUACY OF THE --
-- IMPLEMENTATION, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OR --
-- REPRESENTATIONS THAT THIS IMPLEMENTATION IS FREE FROM CLAIMS OF --
-- INFRINGEMENT, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS --
-- FOR A PARTICULAR PURPOSE. --
-- --
-- Xilinx products are not intended for use in life support --
-- appliances, devices, or systems. Use in such applications are --
-- expressly prohibited. --
-- --
-- (c) Copyright 1995-2009 Xilinx, Inc. --
-- All rights reserved. --
--------------------------------------------------------------------------------
----------------------------------------------------------------------
---- operand_dp ----
---- ----
---- This file is part of the ----
---- Modular Simultaneous Exponentiation Core project ----
---- http://www.opencores.org/cores/mod_sim_exp/ ----
---- ----
---- Description ----
---- 4 x 512 bit dual port ram for the operands ----
---- 32 bit read and write for bus side and 512 bit read and ----
---- write for multiplier side ----
---- ----
---- Dependencies: none ----
---- ----
---- Authors: ----
---- - Geoffrey Ottoy, DraMCo research group ----
---- - Jonas De Craene, JonasDC@opencores.org ----
---- ----
----------------------------------------------------------------------
---- ----
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
---- ----
---- This source file may be used and distributed without ----
---- restriction provided that this copyright statement is not ----
---- removed from the file and that any derivative work contains ----
---- the original copyright notice and the associated disclaimer. ----
---- ----
---- This source file is free software; you can redistribute it ----
---- and/or modify it under the terms of the GNU Lesser General ----
---- Public License as published by the Free Software Foundation; ----
---- either version 2.1 of the License, or (at your option) any ----
---- later version. ----
---- ----
---- This source is distributed in the hope that it will be ----
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
---- PURPOSE. See the GNU Lesser General Public License for more ----
---- details. ----
---- ----
---- You should have received a copy of the GNU Lesser General ----
---- Public License along with this source; if not, download it ----
---- from http://www.opencores.org/lgpl.shtml ----
---- ----
----------------------------------------------------------------------
----------------------------------------------------------------------
-- This file is owned and controlled by Xilinx and must be used --
-- solely for design, simulation, implementation and creation of --
-- design files limited to Xilinx devices or technologies. Use --
-- with non-Xilinx devices or technologies is expressly prohibited --
-- and immediately terminates your license. --
-- --
-- XILINX IS PROVIDING THIS DESIGN, CODE, OR INFORMATION "AS IS" --
-- SOLELY FOR USE IN DEVELOPING PROGRAMS AND SOLUTIONS FOR --
-- XILINX DEVICES. BY PROVIDING THIS DESIGN, CODE, OR INFORMATION --
-- AS ONE POSSIBLE IMPLEMENTATION OF THIS FEATURE, APPLICATION --
-- OR STANDARD, XILINX IS MAKING NO REPRESENTATION THAT THIS --
-- IMPLEMENTATION IS FREE FROM ANY CLAIMS OF INFRINGEMENT, --
-- AND YOU ARE RESPONSIBLE FOR OBTAINING ANY RIGHTS YOU MAY REQUIRE --
-- FOR YOUR IMPLEMENTATION. XILINX EXPRESSLY DISCLAIMS ANY --
-- WARRANTY WHATSOEVER WITH RESPECT TO THE ADEQUACY OF THE --
-- IMPLEMENTATION, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OR --
-- REPRESENTATIONS THAT THIS IMPLEMENTATION IS FREE FROM CLAIMS OF --
-- INFRINGEMENT, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS --
-- FOR A PARTICULAR PURPOSE. --
-- --
-- Xilinx products are not intended for use in life support --
-- appliances, devices, or systems. Use in such applications are --
-- expressly prohibited. --
-- --
-- (c) Copyright 1995-2009 Xilinx, Inc. --
-- All rights reserved. --
----------------------------------------------------------------------
-- You must compile the wrapper file operand_dp.vhd when simulating
-- the core, operand_dp. When compiling the wrapper file, be sure to
-- reference the XilinxCoreLib VHDL simulation library. For detailed
35,40 → 79,46
-- below are supported by Xilinx, Mentor Graphics and Synplicity
-- synthesis tools. Ensure they are correct for your synthesis tool(s).
 
LIBRARY ieee;
USE ieee.std_logic_1164.ALL;
 
library ieee;
use ieee.std_logic_1164.ALL;
-- synthesis translate_off
Library XilinxCoreLib;
library XilinxCoreLib;
-- synthesis translate_on
ENTITY operand_dp IS
port (
clka: IN std_logic;
wea: IN std_logic_VECTOR(0 downto 0);
addra: IN std_logic_VECTOR(5 downto 0);
dina: IN std_logic_VECTOR(31 downto 0);
douta: OUT std_logic_VECTOR(511 downto 0);
clkb: IN std_logic;
web: IN std_logic_VECTOR(0 downto 0);
addrb: IN std_logic_VECTOR(5 downto 0);
dinb: IN std_logic_VECTOR(511 downto 0);
doutb: OUT std_logic_VECTOR(31 downto 0));
END operand_dp;
 
ARCHITECTURE operand_dp_a OF operand_dp IS
 
entity operand_dp is
port (
clka : in std_logic;
wea : in std_logic_vector(0 downto 0);
addra : in std_logic_vector(5 downto 0);
dina : in std_logic_vector(31 downto 0);
douta : out std_logic_vector(511 downto 0);
clkb : in std_logic;
web : in std_logic_vector(0 downto 0);
addrb : in std_logic_vector(5 downto 0);
dinb : in std_logic_vector(511 downto 0);
doutb : out std_logic_vector(31 downto 0)
);
end operand_dp;
 
 
architecture operand_dp_a of operand_dp is
-- synthesis translate_off
component wrapped_operand_dp
port (
clka: IN std_logic;
wea: IN std_logic_VECTOR(0 downto 0);
addra: IN std_logic_VECTOR(5 downto 0);
dina: IN std_logic_VECTOR(31 downto 0);
douta: OUT std_logic_VECTOR(511 downto 0);
clkb: IN std_logic;
web: IN std_logic_VECTOR(0 downto 0);
addrb: IN std_logic_VECTOR(5 downto 0);
dinb: IN std_logic_VECTOR(511 downto 0);
doutb: OUT std_logic_VECTOR(31 downto 0));
end component;
component wrapped_operand_dp
port (
clka : in std_logic;
wea : in std_logic_vector(0 downto 0);
addra : in std_logic_vector(5 downto 0);
dina : in std_logic_vector(31 downto 0);
douta : out std_logic_vector(511 downto 0);
clkb : in std_logic;
web : in std_logic_vector(0 downto 0);
addrb : in std_logic_vector(5 downto 0);
dinb : in std_logic_vector(511 downto 0);
doutb : out std_logic_vector(31 downto 0)
);
end component;
 
-- Configuration specification
for all : wrapped_operand_dp use entity XilinxCoreLib.blk_mem_gen_v3_3(behavioral)
122,23 → 172,24
c_use_byte_wea => 0,
c_rst_priority_b => "CE",
c_rst_priority_a => "CE",
c_use_default_data => 0);
c_use_default_data => 0
);
-- synthesis translate_on
BEGIN
begin
-- synthesis translate_off
U0 : wrapped_operand_dp
port map (
clka => clka,
wea => wea,
addra => addra,
dina => dina,
douta => douta,
clkb => clkb,
web => web,
addrb => addrb,
dinb => dinb,
doutb => doutb);
U0 : wrapped_operand_dp
port map (
clka => clka,
wea => wea,
addra => addra,
dina => dina,
douta => douta,
clkb => clkb,
web => web,
addrb => addrb,
dinb => dinb,
doutb => doutb
);
-- synthesis translate_on
 
END operand_dp_a;
 
end operand_dp_a;
/d_flip_flop.vhd
1,62 → 1,80
------------------------------------------------------------------------------------
--
-- Geoffrey Ottoy - DraMCo research group
--
-- Module Name: d_flip_flop.vhd / entity d_flip_flop
--
-- Last Modified: 24/11/2011
--
-- Description: 1 bit D flip-flop
--
--
-- Dependencies: LDCE
--
-- Revision:
-- Revision 1.00 - Architecture
-- Revision 0.01 - File Created
--
--
------------------------------------------------------------------------------------
--
-- NOTICE:
--
-- Copyright DraMCo research group. 2011. This code may be contain portions patented
-- by other third parties!
--
------------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;
----------------------------------------------------------------------
---- d_flip_flop ----
---- ----
---- This file is part of the ----
---- Modular Simultaneous Exponentiation Core project ----
---- http://www.opencores.org/cores/mod_sim_exp/ ----
---- ----
---- Description ----
---- 1 bit D flip-flop currently still uses primitives ----
---- ----
---- Dependencies: none ----
---- ----
---- Authors: ----
---- - Geoffrey Ottoy, DraMCo research group ----
---- - Jonas De Craene, JonasDC@opencores.org ----
---- ----
----------------------------------------------------------------------
---- ----
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
---- ----
---- This source file may be used and distributed without ----
---- restriction provided that this copyright statement is not ----
---- removed from the file and that any derivative work contains ----
---- the original copyright notice and the associated disclaimer. ----
---- ----
---- This source file is free software; you can redistribute it ----
---- and/or modify it under the terms of the GNU Lesser General ----
---- Public License as published by the Free Software Foundation; ----
---- either version 2.1 of the License, or (at your option) any ----
---- later version. ----
---- ----
---- This source is distributed in the hope that it will be ----
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
---- PURPOSE. See the GNU Lesser General Public License for more ----
---- details. ----
---- ----
---- You should have received a copy of the GNU Lesser General ----
---- Public License along with this source; if not, download it ----
---- from http://www.opencores.org/lgpl.shtml ----
---- ----
----------------------------------------------------------------------
 
---- Uncomment the following library declaration if instantiating
---- any Xilinx primitives in this code.
library UNISIM;
use UNISIM.VComponents.all;
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;
-- Xilinx primitives used
library unisim;
use unisim.vcomponents.all;
 
 
entity d_flip_flop is
port(core_clk : in STD_LOGIC;
reset : in STD_LOGIC;
din : in STD_LOGIC;
dout : out STD_LOGIC
);
port(
core_clk : in std_logic;
reset : in std_logic;
din : in std_logic;
dout : out std_logic
);
end d_flip_flop;
 
 
architecture Structural of d_flip_flop is
signal dout_i : std_logic;
signal dout_i : std_logic;
begin
dout <= dout_i;
FDCE_inst : FDCE
generic map (
INIT => '0') -- Initial value of latch ('0' or '1')
port map (
Q => dout_i, -- Data output
CLR => reset, -- Asynchronous clear/reset input
D => din, -- Data input
C => core_clk, -- Gate input
CE => '1' -- Gate enable input
);
end Structural;
 
dout <= dout_i;
 
FDCE_inst : FDCE
generic map (
INIT => '0') -- Initial value of latch ('0' or '1')
port map (
Q => dout_i, -- Data output
CLR => reset, -- Asynchronous clear/reset input
D => din, -- Data input
C => core_clk, -- Gate input
CE => '1' -- Gate enable input
);
 
end Structural;
/operands_sp.vhd
1,33 → 1,76
--------------------------------------------------------------------------------
-- This file is owned and controlled by Xilinx and must be used --
-- solely for design, simulation, implementation and creation of --
-- design files limited to Xilinx devices or technologies. Use --
-- with non-Xilinx devices or technologies is expressly prohibited --
-- and immediately terminates your license. --
-- --
-- XILINX IS PROVIDING THIS DESIGN, CODE, OR INFORMATION "AS IS" --
-- SOLELY FOR USE IN DEVELOPING PROGRAMS AND SOLUTIONS FOR --
-- XILINX DEVICES. BY PROVIDING THIS DESIGN, CODE, OR INFORMATION --
-- AS ONE POSSIBLE IMPLEMENTATION OF THIS FEATURE, APPLICATION --
-- OR STANDARD, XILINX IS MAKING NO REPRESENTATION THAT THIS --
-- IMPLEMENTATION IS FREE FROM ANY CLAIMS OF INFRINGEMENT, --
-- AND YOU ARE RESPONSIBLE FOR OBTAINING ANY RIGHTS YOU MAY REQUIRE --
-- FOR YOUR IMPLEMENTATION. XILINX EXPRESSLY DISCLAIMS ANY --
-- WARRANTY WHATSOEVER WITH RESPECT TO THE ADEQUACY OF THE --
-- IMPLEMENTATION, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OR --
-- REPRESENTATIONS THAT THIS IMPLEMENTATION IS FREE FROM CLAIMS OF --
-- INFRINGEMENT, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS --
-- FOR A PARTICULAR PURPOSE. --
-- --
-- Xilinx products are not intended for use in life support --
-- appliances, devices, or systems. Use in such applications are --
-- expressly prohibited. --
-- --
-- (c) Copyright 1995-2009 Xilinx, Inc. --
-- All rights reserved. --
--------------------------------------------------------------------------------
-- You must compile the wrapper file operands_sp.vhd when simulating
-- the core, operands_sp. When compiling the wrapper file, be sure to
----------------------------------------------------------------------
---- operands_sp ----
---- ----
---- This file is part of the ----
---- Modular Simultaneous Exponentiation Core project ----
---- http://www.opencores.org/cores/mod_sim_exp/ ----
---- ----
---- Description ----
---- 512 bit single port ram for the modulus ----
---- 32 write for bus side and 512 bit read for multplier side ----
---- ----
---- Dependencies: none ----
---- ----
---- Authors: ----
---- - Geoffrey Ottoy, DraMCo research group ----
---- - Jonas De Craene, JonasDC@opencores.org ----
---- ----
----------------------------------------------------------------------
---- ----
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
---- ----
---- This source file may be used and distributed without ----
---- restriction provided that this copyright statement is not ----
---- removed from the file and that any derivative work contains ----
---- the original copyright notice and the associated disclaimer. ----
---- ----
---- This source file is free software; you can redistribute it ----
---- and/or modify it under the terms of the GNU Lesser General ----
---- Public License as published by the Free Software Foundation; ----
---- either version 2.1 of the License, or (at your option) any ----
---- later version. ----
---- ----
---- This source is distributed in the hope that it will be ----
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
---- PURPOSE. See the GNU Lesser General Public License for more ----
---- details. ----
---- ----
---- You should have received a copy of the GNU Lesser General ----
---- Public License along with this source; if not, download it ----
---- from http://www.opencores.org/lgpl.shtml ----
---- ----
----------------------------------------------------------------------
----------------------------------------------------------------------
-- This file is owned and controlled by Xilinx and must be used --
-- solely for design, simulation, implementation and creation of --
-- design files limited to Xilinx devices or technologies. Use --
-- with non-Xilinx devices or technologies is expressly prohibited --
-- and immediately terminates your license. --
-- --
-- XILINX IS PROVIDING THIS DESIGN, CODE, OR INFORMATION "AS IS" --
-- SOLELY FOR USE IN DEVELOPING PROGRAMS AND SOLUTIONS FOR --
-- XILINX DEVICES. BY PROVIDING THIS DESIGN, CODE, OR INFORMATION --
-- AS ONE POSSIBLE IMPLEMENTATION OF THIS FEATURE, APPLICATION --
-- OR STANDARD, XILINX IS MAKING NO REPRESENTATION THAT THIS --
-- IMPLEMENTATION IS FREE FROM ANY CLAIMS OF INFRINGEMENT, --
-- AND YOU ARE RESPONSIBLE FOR OBTAINING ANY RIGHTS YOU MAY REQUIRE --
-- FOR YOUR IMPLEMENTATION. XILINX EXPRESSLY DISCLAIMS ANY --
-- WARRANTY WHATSOEVER WITH RESPECT TO THE ADEQUACY OF THE --
-- IMPLEMENTATION, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OR --
-- REPRESENTATIONS THAT THIS IMPLEMENTATION IS FREE FROM CLAIMS OF --
-- INFRINGEMENT, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS --
-- FOR A PARTICULAR PURPOSE. --
-- --
-- Xilinx products are not intended for use in life support --
-- appliances, devices, or systems. Use in such applications are --
-- expressly prohibited. --
-- --
-- (c) Copyright 1995-2009 Xilinx, Inc. --
-- All rights reserved. --
----------------------------------------------------------------------
-- You must compile the wrapper file operand_dp.vhd when simulating
-- the core, operand_dp. When compiling the wrapper file, be sure to
-- reference the XilinxCoreLib VHDL simulation library. For detailed
-- instructions, please refer to the "CORE Generator Help".
 
35,30 → 78,36
-- below are supported by Xilinx, Mentor Graphics and Synplicity
-- synthesis tools. Ensure they are correct for your synthesis tool(s).
 
LIBRARY ieee;
USE ieee.std_logic_1164.ALL;
 
library ieee;
use ieee.std_logic_1164.all;
-- synthesis translate_off
Library XilinxCoreLib;
library XilinxCoreLib;
-- synthesis translate_on
ENTITY operands_sp IS
port (
clka: IN std_logic;
wea: IN std_logic_VECTOR(0 downto 0);
addra: IN std_logic_VECTOR(4 downto 0);
dina: IN std_logic_VECTOR(31 downto 0);
douta: OUT std_logic_VECTOR(511 downto 0));
END operands_sp;
 
ARCHITECTURE operands_sp_a OF operands_sp IS
 
entity operands_sp is
port (
clka : in std_logic;
wea : in std_logic_vector(0 downto 0);
addra : in std_logic_vector(4 downto 0);
dina : in std_logic_vector(31 downto 0);
douta : out std_logic_vector(511 downto 0)
);
end operands_sp;
 
 
architecture operands_sp_a of operands_sp is
-- synthesis translate_off
component wrapped_operands_sp
port (
clka: IN std_logic;
wea: IN std_logic_VECTOR(0 downto 0);
addra: IN std_logic_VECTOR(4 downto 0);
dina: IN std_logic_VECTOR(31 downto 0);
douta: OUT std_logic_VECTOR(511 downto 0));
end component;
component wrapped_operands_sp
port (
clka : in std_logic;
wea : in std_logic_vector(0 downto 0);
addra : in std_logic_vector(4 downto 0);
dina : in std_logic_vector(31 downto 0);
douta : out std_logic_vector(511 downto 0)
);
end component;
 
-- Configuration specification
for all : wrapped_operands_sp use entity XilinxCoreLib.blk_mem_gen_v3_3(behavioral)
112,18 → 161,20
c_use_byte_wea => 0,
c_rst_priority_b => "CE",
c_rst_priority_a => "CE",
c_use_default_data => 0);
c_use_default_data => 0
);
-- synthesis translate_on
BEGIN
 
begin
-- synthesis translate_off
U0 : wrapped_operands_sp
port map (
clka => clka,
wea => wea,
addra => addra,
dina => dina,
douta => douta);
u0 : wrapped_operands_sp
port map (
clka => clka,
wea => wea,
addra => addra,
dina => dina,
douta => douta
);
-- synthesis translate_on
 
END operands_sp_a;
 
end operands_sp_a;
/standard_stage.vhd
1,145 → 1,106
------------------------------------------------------------------------------------
--
-- Geoffrey Ottoy - DraMCo research group
--
-- Module Name: standard_stage.vhd / entity standard_stage
--
-- Last Modified: 24/11/2011
--
-- Description: standard stage for use in the montgommery multiplier systolic
-- array pipeline
--
--
-- Dependencies: standard_cell_block,
-- register_n,
-- register_1b,
-- d_flip_flop
--
-- Revision:
-- Revision 4.00 - Removed input registers and used start signal as load_out_regs
-- Revision 3.00 - Removed "a" input and replaced with "a_msb" (which is the only one
-- that matters.
-- Revision 2.02 - removed "ready" output signal
-- Revision 2.01 - replaced the behavioral description of the registers with a
-- component instantiation
-- Revision 2.00 - added registers to store input values xin, cin, qin (because they
-- can change during operation)
-- Revision 1.03 - added done pulse
-- Revision 1.02 - appended "_i" to name of all internal signals
-- Revision 1.01 - ready is '1' after reset
-- Revision 1.00 - Architecture
-- Revision 0.01 - File Created
--
--
------------------------------------------------------------------------------------
--
-- NOTICE:
--
-- Copyright DraMCo research group. 2011. This code may be contain portions patented
-- by other third parties!
--
------------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;
----------------------------------------------------------------------
---- standard_stage ----
---- ----
---- This file is part of the ----
---- Modular Simultaneous Exponentiation Core project ----
---- http://www.opencores.org/cores/mod_sim_exp/ ----
---- ----
---- Description ----
---- standard stage for use in the montgommery multiplier ----
---- systolic array pipeline ----
---- ----
---- Dependencies: ----
---- - standard_cell_block ----
---- - d_flip_flop ----
---- - register_n ----
---- - register_1b ----
---- ----
---- Authors: ----
---- - Geoffrey Ottoy, DraMCo research group ----
---- - Jonas De Craene, JonasDC@opencores.org ----
---- ----
----------------------------------------------------------------------
---- ----
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
---- ----
---- This source file may be used and distributed without ----
---- restriction provided that this copyright statement is not ----
---- removed from the file and that any derivative work contains ----
---- the original copyright notice and the associated disclaimer. ----
---- ----
---- This source file is free software; you can redistribute it ----
---- and/or modify it under the terms of the GNU Lesser General ----
---- Public License as published by the Free Software Foundation; ----
---- either version 2.1 of the License, or (at your option) any ----
---- later version. ----
---- ----
---- This source is distributed in the hope that it will be ----
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
---- PURPOSE. See the GNU Lesser General Public License for more ----
---- details. ----
---- ----
---- You should have received a copy of the GNU Lesser General ----
---- Public License along with this source; if not, download it ----
---- from http://www.opencores.org/lgpl.shtml ----
---- ----
----------------------------------------------------------------------
 
---- Uncomment the following library declaration if instantiating
---- any Xilinx primitives in this code.
--library UNISIM;
--use UNISIM.VComponents.all;
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;
 
library mod_sim_exp;
use mod_sim_exp.mod_sim_exp_pkg.all;
 
 
entity standard_stage is
generic(width : integer := 32
);
port(core_clk : in STD_LOGIC;
my : in STD_LOGIC_VECTOR((width-1) downto 0);
y : in STD_LOGIC_VECTOR((width-1) downto 0);
m : in STD_LOGIC_VECTOR((width-1) downto 0);
xin : in STD_LOGIC;
qin : in STD_LOGIC;
xout : out STD_LOGIC;
qout : out STD_LOGIC;
a_msb : in STD_LOGIC;
cin : in STD_LOGIC;
cout : out STD_LOGIC;
start : in STD_LOGIC;
reset : in STD_LOGIC;
-- ready : out STD_LOGIC;
done : out STD_LOGIC;
r : out STD_LOGIC_VECTOR((width-1) downto 0)
);
generic(
width : integer := 32
);
port(
core_clk : in std_logic;
my : in std_logic_vector((width-1) downto 0);
y : in std_logic_vector((width-1) downto 0);
m : in std_logic_vector((width-1) downto 0);
xin : in std_logic;
qin : in std_logic;
xout : out std_logic;
qout : out std_logic;
a_msb : in std_logic;
cin : in std_logic;
cout : out std_logic;
start : in std_logic;
reset : in std_logic;
done : out std_logic;
r : out std_logic_vector((width-1) downto 0)
);
end standard_stage;
 
 
architecture Structural of standard_stage is
component d_flip_flop
port(core_clk : in STD_LOGIC;
reset : in STD_LOGIC;
din : in STD_LOGIC;
dout : out STD_LOGIC
);
end component;
-- input
signal cin_i : std_logic;
signal xin_i : std_logic;
signal qin_i : std_logic;
signal a_msb_i : std_logic;
 
component register_1b
port(core_clk : in STD_LOGIC;
ce : in STD_LOGIC;
reset : in STD_LOGIC;
din : in STD_LOGIC;
dout : out STD_LOGIC
);
end component;
component register_n
generic( n : integer := 4
);
port(core_clk : in STD_LOGIC;
ce : in STD_LOGIC;
reset : in STD_LOGIC;
din : in STD_LOGIC_VECTOR((n-1) downto 0);
dout : out STD_LOGIC_VECTOR((n-1) downto 0)
);
end component;
component standard_cell_block
generic ( width : integer := 32
);
Port ( my : in STD_LOGIC_VECTOR((width-1) downto 0);
y : in STD_LOGIC_VECTOR((width-1) downto 0);
m : in STD_LOGIC_VECTOR((width-1) downto 0);
x : in STD_LOGIC;
q : in STD_LOGIC;
a : in STD_LOGIC_VECTOR((width-1) downto 0);
cin : in STD_LOGIC;
cout : out STD_LOGIC;
r : out STD_LOGIC_VECTOR((width-1) downto 0));
end component;
-- output
signal cout_i : std_logic;
signal r_i : std_logic_vector((width-1) downto 0);
signal cout_reg_i : std_logic;
signal xout_reg_i : std_logic;
signal qout_reg_i : std_logic;
signal r_reg_i : std_logic_vector((width-1) downto 0);
 
-- input
signal cin_i : std_logic;
signal xin_i : std_logic;
signal qin_i : std_logic;
signal a_msb_i : std_logic;
-- signal cin_reg_i : std_logic;
-- signal xin_reg_i : std_logic;
-- signal qin_reg_i : std_logic;
-- signal a_msb_reg_i : std_logic;
-- interconnect
signal a_i : std_logic_vector((width-1) downto 0);
 
-- output
signal cout_i : std_logic;
signal r_i : std_logic_vector((width-1) downto 0);
signal cout_reg_i : std_logic;
signal xout_reg_i : std_logic;
signal qout_reg_i : std_logic;
signal r_reg_i : std_logic_vector((width-1) downto 0);
-- control
signal done_i : std_logic := '1';
begin
 
-- interconnect
signal a_i : std_logic_vector((width-1) downto 0);
 
-- control
-- signal load_out_regs_i : std_logic;
signal done_i : std_logic := '1';
--signal ready_del_i : std_logic := '1';
begin
-- map internal signals to outputs
done <= done_i;
r <= r_reg_i;
146,9 → 107,6
cout <= cout_reg_i;
qout <= qout_reg_i;
xout <= xout_reg_i;
-- two posibilities:
--done <= ready_i and (not ready_del_i); -- slow
--done <= not ready_i; -- faster but not sure if it will work (DONE_PROC can be omitted)
-- map inputs to internal signals
xin_i <= xin;
156,113 → 114,71
cin_i <= cin;
a_msb_i <= a_msb;
-- a_i <= a_msb_reg_i & r_reg_i((width-1) downto 1);
a_i <= a_msb_i & r_reg_i((width-1) downto 1);
-- input registers
-- A_REG: register_1b
-- port map(core_clk => core_clk,
-- ce => start,
-- reset => reset,
-- din => a_msb_i,
-- dout => a_msb_reg_i
-- );
cell_block : standard_cell_block
generic map(
width => width
)
port map(
my => my,
y => y,
m => m,
x => xin_i,
q => qin_i,
a => a_i,
cin => cin_i,
cout => cout_i,
r => r_i
);
-- XIN_REG: register_1b
-- port map(core_clk => core_clk,
-- ce => start,
-- reset => reset,
-- din => xin_i,
-- dout => xin_reg_i
-- );
done_signal : d_flip_flop
port map(
core_clk => core_clk,
reset => reset,
din => start,
dout => done_i
);
 
-- output registers
RESULT_REG : register_n
generic map(
n => width
)
port map(
core_clk => core_clk,
ce => start,
reset => reset,
din => r_i,
dout => r_reg_i
);
 
XOUT_REG : register_1b
port map(
core_clk => core_clk,
ce => start,
reset => reset,
din => xin_i,
dout => xout_reg_i
);
 
QOUT_REG : register_1b
port map(
core_clk => core_clk,
ce => start,
reset => reset,
din => qin_i,
dout => qout_reg_i
);
 
COUT_REG : register_1b
port map(
core_clk => core_clk,
ce => start,
reset => reset,
din => cout_i,
dout => cout_reg_i
);
 
-- QIN_REG: register_1b
-- port map(core_clk => core_clk,
-- ce => start,
-- reset => reset,
-- din => qin_i,
-- dout => qin_reg_i
-- );
-- CIN_REG: register_1b
-- port map(core_clk => core_clk,
-- ce => start,
-- reset => reset,
-- din => cin_i,
-- dout => cin_reg_i
-- );
cell_block: standard_cell_block
generic map( width => width
)
Port map( my => my,
y => y,
m => m,
-- x => xin_reg_i,
-- q => qin_reg_i,
x => xin_i,
q => qin_i,
a => a_i,
-- cin => cin_reg_i,
cin => cin_i,
cout => cout_i,
r => r_i
);
-- delay_1_cycle: d_flip_flop
-- port map(core_clk => core_clk,
-- reset => reset,
-- din => start,
-- dout => load_out_regs_i
-- );
done_signal: d_flip_flop
port map(core_clk => core_clk,
reset => reset,
-- din => load_out_regs_i,
din => start,
dout => done_i
);
-- output registers
RESULT_REG: register_n
generic map( n => width
)
port map(core_clk => core_clk,
-- ce => load_out_regs_i,
ce => start,
reset => reset,
din => r_i,
dout => r_reg_i
);
XOUT_REG: register_1b
port map(core_clk => core_clk,
-- ce => load_out_regs_i,
ce => start,
reset => reset,
-- din => xin_reg_i,
din => xin_i,
dout => xout_reg_i
);
QOUT_REG: register_1b
port map(core_clk => core_clk,
-- ce => load_out_regs_i,
ce => start,
reset => reset,
-- din => qin_reg_i,
din => qin_i,
dout => qout_reg_i
);
COUT_REG: register_1b
port map(core_clk => core_clk,
-- ce => load_out_regs_i,
ce => start,
reset => reset,
din => cout_i,
dout => cout_reg_i
);
end Structural;
/last_stage.vhd
1,160 → 1,98
------------------------------------------------------------------------------------
--
-- Geoffrey Ottoy - DraMCo research group
--
-- Module Name: last_stage.vhd / entity last_stage
--
-- Last Modified: 24/11/2011
--
-- Description: last stage for use in the montgommery multiplier systolic
-- array pipeline
--
--
-- Dependencies: standard_cell_block
-- cell_1b
--
-- Revision:
-- Revision 5.00 - Removed input registers and used start signal as load_out_regs
-- Revision 4.01 - Remove "done" input
-- Revision 4.00 - Removed "a" input with internal feedback
-- Revision 3.03 - fixed switched last two bits
-- Revision 3.02 - removed "ready" output signal
-- Revision 3.01 - replaced the behavioral description of the registers with a
-- component instantiation
-- Revision 3.00 - added registers to store input values xin, cin, qin (because they
-- can change during operation)
-- Revision 2.00 - changed indices in signals my, y and m
-- Revision 1.03 - added done pulse
-- Revision 1.02 - appended "_i" to name of all internal signals
-- Revision 1.01 - ready is '1' after reset
-- Revision 1.00 - Architecture
-- Revision 0.01 - File Created
--
--
------------------------------------------------------------------------------------
--
-- NOTICE:
--
-- Copyright DraMCo research group. 2011. This code may be contain portions patented
-- by other third parties!
--
------------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;
----------------------------------------------------------------------
---- first_stage ----
---- ----
---- This file is part of the ----
---- Modular Simultaneous Exponentiation Core project ----
---- http://www.opencores.org/cores/mod_sim_exp/ ----
---- ----
---- Description ----
---- last stage for use in the montgommery multiplier ----
---- systolic array pipeline ----
---- ----
---- Dependencies: ----
---- - standard_cell_block ----
---- - register_n ----
---- - cell_1b ----
---- ----
---- Authors: ----
---- - Geoffrey Ottoy, DraMCo research group ----
---- - Jonas De Craene, JonasDC@opencores.org ----
---- ----
----------------------------------------------------------------------
---- ----
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
---- ----
---- This source file may be used and distributed without ----
---- restriction provided that this copyright statement is not ----
---- removed from the file and that any derivative work contains ----
---- the original copyright notice and the associated disclaimer. ----
---- ----
---- This source file is free software; you can redistribute it ----
---- and/or modify it under the terms of the GNU Lesser General ----
---- Public License as published by the Free Software Foundation; ----
---- either version 2.1 of the License, or (at your option) any ----
---- later version. ----
---- ----
---- This source is distributed in the hope that it will be ----
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
---- PURPOSE. See the GNU Lesser General Public License for more ----
---- details. ----
---- ----
---- You should have received a copy of the GNU Lesser General ----
---- Public License along with this source; if not, download it ----
---- from http://www.opencores.org/lgpl.shtml ----
---- ----
----------------------------------------------------------------------
 
---- Uncomment the following library declaration if instantiating
---- any Xilinx primitives in this code.
--library UNISIM;
--use UNISIM.VComponents.all;
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;
 
library mod_sim_exp;
use mod_sim_exp.mod_sim_exp_pkg.all;
 
 
entity last_stage is
generic(width : integer := 16 -- must be the same as width of the standard stage
);
port(core_clk : in STD_LOGIC;
my : in STD_LOGIC_VECTOR((width-1) downto 0);
y : in STD_LOGIC_VECTOR((width-2) downto 0);
m : in STD_LOGIC_VECTOR((width-2) downto 0);
xin : in STD_LOGIC;
qin : in STD_LOGIC;
cin : in STD_LOGIC;
start : in STD_LOGIC;
reset : in STD_LOGIC;
-- ready : out STD_LOGIC;
-- done : out STD_LOGIC;
r : out STD_LOGIC_VECTOR((width+1) downto 0)
);
generic(
width : integer := 16 -- must be the same as width of the standard stage
);
port(
core_clk : in std_logic;
my : in std_logic_vector((width-1) downto 0);
y : in std_logic_vector((width-2) downto 0);
m : in std_logic_vector((width-2) downto 0);
xin : in std_logic;
qin : in std_logic;
cin : in std_logic;
start : in std_logic;
reset : in std_logic;
r : out std_logic_vector((width+1) downto 0)
);
end last_stage;
 
 
architecture Structural of last_stage is
component d_flip_flop
port(core_clk : in STD_LOGIC;
reset : in STD_LOGIC;
din : in STD_LOGIC;
dout : out STD_LOGIC
);
end component;
-- input
signal my_i : std_logic_vector(width downto 0);
signal m_i : std_logic_vector(width downto 0);
signal y_i : std_logic_vector(width downto 0);
signal cin_i : std_logic;
signal xin_i : std_logic;
signal qin_i : std_logic;
signal a_i : std_logic_vector((width) downto 0);
 
component register_1b
port(core_clk : in STD_LOGIC;
ce : in STD_LOGIC;
reset : in STD_LOGIC;
din : in STD_LOGIC;
dout : out STD_LOGIC
);
end component;
component register_n
generic( n : integer := 4
);
port(core_clk : in STD_LOGIC;
ce : in STD_LOGIC;
reset : in STD_LOGIC;
din : in STD_LOGIC_VECTOR((n-1) downto 0);
dout : out STD_LOGIC_VECTOR((n-1) downto 0)
);
end component;
component standard_cell_block
generic ( width : integer := 32
);
Port ( my : in STD_LOGIC_VECTOR((width-1) downto 0);
y : in STD_LOGIC_VECTOR((width-1) downto 0);
m : in STD_LOGIC_VECTOR((width-1) downto 0);
x : in STD_LOGIC;
q : in STD_LOGIC;
a : in STD_LOGIC_VECTOR((width-1) downto 0);
cin : in STD_LOGIC;
cout : out STD_LOGIC;
r : out STD_LOGIC_VECTOR((width-1) downto 0));
end component;
-- output
signal r_i : std_logic_vector((width+1) downto 0);
signal r_reg_i : std_logic_vector((width+1) downto 0);
 
component cell_1b
port ( my : in STD_LOGIC;
y : in STD_LOGIC;
m : in STD_LOGIC;
x : in STD_LOGIC;
q : in STD_LOGIC;
a : in STD_LOGIC;
cin : in STD_LOGIC;
cout : out STD_LOGIC;
r : out STD_LOGIC);
end component;
 
-- input
signal my_i : std_logic_vector(width downto 0);
signal m_i : std_logic_vector(width downto 0);
signal y_i : std_logic_vector(width downto 0);
signal cin_i : std_logic;
signal xin_i : std_logic;
signal qin_i : std_logic;
signal a_i : std_logic_vector((width) downto 0);
-- signal cin_reg_i : std_logic;
-- signal xin_reg_i : std_logic;
-- signal qin_reg_i : std_logic;
-- signal a_reg_i : std_logic_vector((width) downto 0);
-- interconnection
signal cout_i : std_logic;
-- output
signal r_i : std_logic_vector((width+1) downto 0);
signal r_reg_i : std_logic_vector((width+1) downto 0);
 
-- interconnection
signal cout_i : std_logic;
-- control signals
-- signal load_out_regs_i : std_logic;
-- signal done_i : std_logic := '1';
--signal ready_del_i : std_logic := '1';
begin
-- map internal signals to outputs
-- done <= done_i;
r <= r_reg_i;
-- two posibilities:
--done <= ready_i and (not ready_del_i); -- slow
--done <= not ready_i; -- faster but not sure if it will work (DONE_PROC can be omitted)
-- map inputs to internal signals
my_i <= '0' & my;
166,86 → 104,46
 
a_i <= r_reg_i((width+1) downto 1);
cell_block: standard_cell_block
generic map( width => width
)
Port map( my => my_i(width-1 downto 0),
y => y_i(width-1 downto 0),
m => m_i(width-1 downto 0),
-- x => xin_reg_i,
-- q => qin_reg_i,
x => xin_i,
q => qin_i,
a => a_i((width-1) downto 0),
-- cin => cin_reg_i,
cin => cin_i,
cout => cout_i,
r => r_i((width-1) downto 0)
);
cell_block : standard_cell_block
generic map(
width => width
)
port map(
my => my_i(width-1 downto 0),
y => y_i(width-1 downto 0),
m => m_i(width-1 downto 0),
x => xin_i,
q => qin_i,
a => a_i((width-1) downto 0),
cin => cin_i,
cout => cout_i,
r => r_i((width-1) downto 0)
);
 
last_cell : cell_1b
port map(
my => my_i(width),
y => y_i(width),
m => m_i(width),
x => xin_i,
q => qin_i,
a => a_i(width),
cin => cout_i,
cout => r_i(width+1),
r => r_i(width)
);
 
-- output registers
RESULT_REG : register_n
generic map(
n => (width+2)
)
port map(
core_clk => core_clk,
ce => start,
reset => reset,
din => r_i,
dout => r_reg_i
);
last_cell: cell_1b
port map( my => my_i(width),
y => y_i(width),
m => m_i(width),
-- x => xin_reg_i,
-- q => qin_reg_i,
x => xin_i,
q => qin_i,
a => a_i(width),
cin => cout_i,
cout => r_i(width+1),
r => r_i(width)
);
-- XIN_REG: register_1b
-- port map(core_clk => core_clk,
-- ce => start,
-- reset => reset,
-- din => xin_i,
-- dout => xin_reg_i
-- );
-- QIN_REG: register_1b
-- port map(core_clk => core_clk,
-- ce => start,
-- reset => reset,
-- din => qin_i,
-- dout => qin_reg_i
-- );
-- CIN_REG: register_1b
-- port map(core_clk => core_clk,
-- ce => start,
-- reset => reset,
-- din => cin_i,
-- dout => cin_reg_i
-- );
-- control
-- delay_1_cycle: d_flip_flop
-- port map(core_clk => core_clk,
-- reset => reset,
-- din => start,
-- dout => load_out_regs_i
-- );
-- done_signal: d_flip_flop
-- port map(core_clk => core_clk,
-- reset => reset,
-- din => load_out_regs_i,
-- dout => done_i
-- );
-- output registers
RESULT_REG: register_n
generic map( n => (width+2)
)
port map(core_clk => core_clk,
-- ce => load_out_regs_i,
ce => start,
reset => reset,
din => r_i,
dout => r_reg_i
);
end Structural;
/operand_mem.vhd
1,112 → 1,102
------------------------------------------------------------------------------------
--
-- Geoffrey Ottoy - DraMCo research group
--
-- Module Name: operand_mem.vhd / entity operand_mem
--
-- Last Modified: 18/06/2012
--
-- Description: BRAM memory and logic to the store 4 (1536-bit) operands and the
-- modulus for the montgomery multiplier
--
--
-- Dependencies: modulus_ram, operand_ram
--
-- Revision:
-- Revision 2.00 - Removed y_register -> seperate module
-- Revision 1.01 - Added "result_dest_op" input
-- Revision 1.00 - Architecture
-- Revision 0.01 - File Created
--
--
------------------------------------------------------------------------------------
--
-- NOTICE:
--
-- Copyright DraMCo research group. 2011. This code may be contain portions patented
-- by other third parties!
--
------------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;
----------------------------------------------------------------------
---- operand_mem ----
---- ----
---- This file is part of the ----
---- Modular Simultaneous Exponentiation Core project ----
---- http://www.opencores.org/cores/mod_sim_exp/ ----
---- ----
---- Description ----
---- BRAM memory and logic to the store 4 (1536-bit) operands ----
---- and the modulus for the montgomery multiplier ----
---- ----
---- Dependencies: ----
---- - operand_ram ----
---- - modulus_ram ----
---- ----
---- Authors: ----
---- - Geoffrey Ottoy, DraMCo research group ----
---- - Jonas De Craene, JonasDC@opencores.org ----
---- ----
----------------------------------------------------------------------
---- ----
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
---- ----
---- This source file may be used and distributed without ----
---- restriction provided that this copyright statement is not ----
---- removed from the file and that any derivative work contains ----
---- the original copyright notice and the associated disclaimer. ----
---- ----
---- This source file is free software; you can redistribute it ----
---- and/or modify it under the terms of the GNU Lesser General ----
---- Public License as published by the Free Software Foundation; ----
---- either version 2.1 of the License, or (at your option) any ----
---- later version. ----
---- ----
---- This source is distributed in the hope that it will be ----
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
---- PURPOSE. See the GNU Lesser General Public License for more ----
---- details. ----
---- ----
---- You should have received a copy of the GNU Lesser General ----
---- Public License along with this source; if not, download it ----
---- from http://www.opencores.org/lgpl.shtml ----
---- ----
----------------------------------------------------------------------
 
---- Uncomment the following library declaration if instantiating
---- any Xilinx primitives in this code.
--library UNISIM;
--use UNISIM.VComponents.all;
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;
 
library mod_sim_exp;
use mod_sim_exp.mod_sim_exp_pkg.all;
 
 
entity operand_mem is
generic(n : integer := 1536
);
port(-- data interface (plb side)
data_in : in std_logic_vector(31 downto 0);
data_out : out std_logic_vector(31 downto 0);
rw_address : in std_logic_vector(8 downto 0);
-- address structure:
-- bit: 8 -> '1': modulus
-- '0': operands
-- bits: 7-6 -> operand_in_sel in case of bit 8 = '0'
-- don't care in case of modulus
-- bits: 5-0 -> modulus_addr / operand_addr resp.
-- operand interface (multiplier side)
op_sel : in std_logic_vector(1 downto 0);
xy_out : out std_logic_vector(1535 downto 0);
m : out std_logic_vector(1535 downto 0);
result_in : in std_logic_vector(1535 downto 0);
-- control signals
load_op : in std_logic;
load_m : in std_logic;
load_result : in std_logic;
result_dest_op : in std_logic_vector(1 downto 0);
collision : out std_logic;
-- system clock
clk : in std_logic
);
generic(n : integer := 1536
);
port(
-- data interface (plb side)
data_in : in std_logic_vector(31 downto 0);
data_out : out std_logic_vector(31 downto 0);
rw_address : in std_logic_vector(8 downto 0);
-- address structure:
-- bit: 8 -> '1': modulus
-- '0': operands
-- bits: 7-6 -> operand_in_sel in case of bit 8 = '0'
-- don't care in case of modulus
-- bits: 5-0 -> modulus_addr / operand_addr resp.
 
-- operand interface (multiplier side)
op_sel : in std_logic_vector(1 downto 0);
xy_out : out std_logic_vector(1535 downto 0);
m : out std_logic_vector(1535 downto 0);
result_in : in std_logic_vector(1535 downto 0);
-- control signals
load_op : in std_logic;
load_m : in std_logic;
load_result : in std_logic;
result_dest_op : in std_logic_vector(1 downto 0);
collision : out std_logic;
-- system clock
clk : in std_logic
);
end operand_mem;
 
 
architecture Behavioral of operand_mem is
-- single port (32-bit -> 1536-bit) block ram
component modulus_ram
port(
clk : in std_logic;
modulus_addr : in std_logic_vector(5 downto 0);
write_modulus : in std_logic;
modulus_in : in std_logic_vector(31 downto 0);
modulus_out : out std_logic_vector(1535 downto 0)
);
end component;
signal xy_data_i : std_logic_vector(31 downto 0);
signal xy_addr_i : std_logic_vector(5 downto 0);
signal operand_in_sel_i : std_logic_vector(1 downto 0);
signal collision_i : std_logic;
 
-- dual port block ram
component operand_ram
port(
clk : in std_logic;
operand_addr : in std_logic_vector(5 downto 0);
operand_in : in std_logic_vector(31 downto 0);
operand_in_sel : in std_logic_vector(1 downto 0);
write_operand : in std_logic;
operand_out_sel : in std_logic_vector(1 downto 0);
result_dest_op : in std_logic_vector(1 downto 0);
write_result : in std_logic;
result_in : in std_logic_vector(1535 downto 0);
collision : out std_logic;
result_out : out std_logic_vector(31 downto 0);
operand_out : out std_logic_vector(1535 downto 0)
);
end component;
signal xy_op_i : std_logic_vector(1535 downto 0);
 
signal xy_data_i : std_logic_vector(31 downto 0);
signal xy_addr_i : std_logic_vector(5 downto 0);
signal operand_in_sel_i : std_logic_vector(1 downto 0);
signal collision_i : std_logic;
signal m_addr_i : std_logic_vector(5 downto 0);
signal write_m_i : std_logic;
signal m_data_i : std_logic_vector(31 downto 0);
 
signal xy_op_i : std_logic_vector(1535 downto 0);
signal m_addr_i : std_logic_vector(5 downto 0);
signal write_m_i : std_logic;
signal m_data_i : std_logic_vector(31 downto 0);
begin
 
-- map outputs
121,31 → 111,31
m_data_i <= data_in;
write_m_i <= load_m;
 
-- xy operand storage
xy_ram: operand_ram port map(
clk => clk,
collision => collision_i,
operand_addr => xy_addr_i,
operand_in => xy_data_i,
operand_in_sel => operand_in_sel_i,
result_out => data_out,
write_operand => load_op,
operand_out => xy_op_i,
operand_out_sel => op_sel,
result_dest_op => result_dest_op,
write_result => load_result,
result_in => result_in
);
-- xy operand storage
xy_ram : operand_ram
port map(
clk => clk,
collision => collision_i,
operand_addr => xy_addr_i,
operand_in => xy_data_i,
operand_in_sel => operand_in_sel_i,
result_out => data_out,
write_operand => load_op,
operand_out => xy_op_i,
operand_out_sel => op_sel,
result_dest_op => result_dest_op,
write_result => load_result,
result_in => result_in
);
 
-- modulus storage
m_ram : modulus_ram
port map(
clk => clk,
modulus_addr => m_addr_i,
write_modulus => write_m_i,
modulus_in => m_data_i,
modulus_out => m
);
-- modulus storage
m_ram : modulus_ram
port map(
clk => clk,
modulus_addr => m_addr_i,
write_modulus => write_m_i,
modulus_in => m_data_i,
modulus_out => m
);
end Behavioral;
 
/modulus_ram.vhd
1,57 → 1,71
----------------------------------------------------------------------------------
-- Company:
-- Engineer:
--
-- Create Date: 13:57:21 03/08/2012
-- Design Name:
-- Module Name: modulus_ram - Behavioral
-- Project Name:
-- Target Devices:
-- Tool versions:
-- Description:
--
-- Dependencies:
--
-- Revision:
-- Revision 0.01 - File Created
-- Additional Comments:
--
----------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;
----------------------------------------------------------------------
---- modulus_ram ----
---- ----
---- This file is part of the ----
---- Modular Simultaneous Exponentiation Core project ----
---- http://www.opencores.org/cores/mod_sim_exp/ ----
---- ----
---- Description ----
---- BRAM memory and logic to store the 1536-bit modulus ----
---- ----
---- Dependencies: ----
---- - operands_sp (coregen) ----
---- ----
---- Authors: ----
---- - Geoffrey Ottoy, DraMCo research group ----
---- - Jonas De Craene, JonasDC@opencores.org ----
---- ----
----------------------------------------------------------------------
---- ----
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
---- ----
---- This source file may be used and distributed without ----
---- restriction provided that this copyright statement is not ----
---- removed from the file and that any derivative work contains ----
---- the original copyright notice and the associated disclaimer. ----
---- ----
---- This source file is free software; you can redistribute it ----
---- and/or modify it under the terms of the GNU Lesser General ----
---- Public License as published by the Free Software Foundation; ----
---- either version 2.1 of the License, or (at your option) any ----
---- later version. ----
---- ----
---- This source is distributed in the hope that it will be ----
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
---- PURPOSE. See the GNU Lesser General Public License for more ----
---- details. ----
---- ----
---- You should have received a copy of the GNU Lesser General ----
---- Public License along with this source; if not, download it ----
---- from http://www.opencores.org/lgpl.shtml ----
---- ----
----------------------------------------------------------------------
 
---- Uncomment the following library declaration if instantiating
---- any Xilinx primitives in this code.
--library UNISIM;
--use UNISIM.VComponents.all;
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;
 
library mod_sim_exp;
use mod_sim_exp.mod_sim_exp_pkg.all;
 
 
entity modulus_ram is
port(
clk : in std_logic;
modulus_addr : in std_logic_vector(5 downto 0);
write_modulus : in std_logic;
modulus_in : in std_logic_vector(31 downto 0);
modulus_out : out std_logic_vector(1535 downto 0)
);
port(
clk : in std_logic;
modulus_addr : in std_logic_vector(5 downto 0);
write_modulus : in std_logic;
modulus_in : in std_logic_vector(31 downto 0);
modulus_out : out std_logic_vector(1535 downto 0)
);
end modulus_ram;
 
 
architecture Behavioral of modulus_ram is
-- single port blockram to store modulus
component operands_sp
port(
clka: in std_logic;
wea: in std_logic_vector(0 downto 0);
addra: in std_logic_vector(4 downto 0);
dina: in std_logic_vector(31 downto 0);
douta: out std_logic_vector(511 downto 0)
);
end component;
signal part_enable : std_logic_vector(3 downto 0);
signal wea : std_logic_vector(3 downto 0);
signal addra : std_logic_vector(4 downto 0);
signal part_enable : std_logic_vector(3 downto 0);
signal wea : std_logic_vector(3 downto 0);
signal addra : std_logic_vector(4 downto 0);
begin
 
-- the blockram has a write depth of 2 but we only use the lower half
59,10 → 73,10
-- the two highest bits of the address are used to select the bloc
with modulus_addr(5 downto 4) select
part_enable <= "0001" when "00",
"0010" when "01",
"0100" when "10",
"1000" when others;
part_enable <= "0001" when "00",
"0010" when "01",
"0100" when "10",
"1000" when others;
 
with write_modulus select
wea <= part_enable when '1',
69,41 → 83,31
"0000" when others;
-- 4 instances of 512 bits blockram
modulus_0 : operands_sp
port map (
clka => clk,
wea => wea(0 downto 0),
addra => addra,
dina => modulus_in,
douta => modulus_out(511 downto 0)
);
modulus_1 : operands_sp
port map (
clka => clk,
wea => wea(1 downto 1),
addra => addra,
dina => modulus_in,
douta => modulus_out(1023 downto 512)
);
modulus_2 : operands_sp
port map (
clka => clk,
wea => wea(2 downto 2),
addra => addra,
dina => modulus_in,
douta => modulus_out(1535 downto 1024)
);
-- modulus_3 : operands_sp
-- port map (
-- clka => clk,
-- wea => wea(3 downto 3),
-- addra => addra,
-- dina => modulus_in,
-- douta => modulus_out(2047 downto 1536)
-- );
modulus_0 : operands_sp
port map (
clka => clk,
wea => wea(0 downto 0),
addra => addra,
dina => modulus_in,
douta => modulus_out(511 downto 0)
);
 
modulus_1 : operands_sp
port map (
clka => clk,
wea => wea(1 downto 1),
addra => addra,
dina => modulus_in,
douta => modulus_out(1023 downto 512)
);
 
modulus_2 : operands_sp
port map (
clka => clk,
wea => wea(2 downto 2),
addra => addra,
dina => modulus_in,
douta => modulus_out(1535 downto 1024)
);
 
end Behavioral;
 
/operand_ram.vhd
1,93 → 1,95
------------------------------------------------------------------------------------
--
-- Geoffrey Ottoy - DraMCo research group
--
-- Module Name: operand_mem.vhd / entity operand_mem
--
-- Last Modified: 25/04/2012
--
-- Description: BRAM memory and logic to the store 4 (1536-bit) operands and the
-- modulus for the montgomery multiplier
--
--
-- Dependencies: operand_dp (coregen)
--
-- Revision:
-- Revision 1.01 - added "result_dest_op" input
-- Revision 1.00 - Architecture
-- Revision 0.01 - File Created
-- Additional Comments:
--
--
------------------------------------------------------------------------------------
--
-- NOTICE:
--
-- Copyright DraMCo research group. 2011. This code may be contain portions patented
-- by other third parties!
--
------------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;
----------------------------------------------------------------------
---- operand_ram ----
---- ----
---- This file is part of the ----
---- Modular Simultaneous Exponentiation Core project ----
---- http://www.opencores.org/cores/mod_sim_exp/ ----
---- ----
---- Description ----
---- BRAM memory and logic to the store 4 (1536-bit) operands ----
---- for the montgomery multiplier ----
---- ----
---- Dependencies: ----
---- - operand_dp (coregen) ----
---- ----
---- Authors: ----
---- - Geoffrey Ottoy, DraMCo research group ----
---- - Jonas De Craene, JonasDC@opencores.org ----
---- ----
----------------------------------------------------------------------
---- ----
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
---- ----
---- This source file may be used and distributed without ----
---- restriction provided that this copyright statement is not ----
---- removed from the file and that any derivative work contains ----
---- the original copyright notice and the associated disclaimer. ----
---- ----
---- This source file is free software; you can redistribute it ----
---- and/or modify it under the terms of the GNU Lesser General ----
---- Public License as published by the Free Software Foundation; ----
---- either version 2.1 of the License, or (at your option) any ----
---- later version. ----
---- ----
---- This source is distributed in the hope that it will be ----
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
---- PURPOSE. See the GNU Lesser General Public License for more ----
---- details. ----
---- ----
---- You should have received a copy of the GNU Lesser General ----
---- Public License along with this source; if not, download it ----
---- from http://www.opencores.org/lgpl.shtml ----
---- ----
----------------------------------------------------------------------
 
---- Uncomment the following library declaration if instantiating
---- any Xilinx primitives in this code.
--library UNISIM;
--use UNISIM.VComponents.all;
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;
 
library mod_sim_exp;
use mod_sim_exp.mod_sim_exp_pkg.all;
 
 
entity operand_ram is
port( -- write_operand_ack voorzien?
-- global ports
clk : in std_logic;
collision : out std_logic;
-- bus side connections (32-bit serial)
operand_addr : in std_logic_vector(5 downto 0);
operand_in : in std_logic_vector(31 downto 0);
operand_in_sel : in std_logic_vector(1 downto 0);
result_out : out std_logic_vector(31 downto 0);
write_operand : in std_logic;
-- multiplier side connections (+1024 bit parallel)
result_dest_op : in std_logic_vector(1 downto 0);
operand_out : out std_logic_vector(1535 downto 0);
operand_out_sel : in std_logic_vector(1 downto 0); -- controlled by bus side :)
write_result : in std_logic;
result_in : in std_logic_vector(1535 downto 0)
);
port( -- write_operand_ack voorzien?
-- global ports
clk : in std_logic;
collision : out std_logic;
-- bus side connections (32-bit serial)
operand_addr : in std_logic_vector(5 downto 0);
operand_in : in std_logic_vector(31 downto 0);
operand_in_sel : in std_logic_vector(1 downto 0);
result_out : out std_logic_vector(31 downto 0);
write_operand : in std_logic;
-- multiplier side connections (1536 bit parallel)
result_dest_op : in std_logic_vector(1 downto 0);
operand_out : out std_logic_vector(1535 downto 0);
operand_out_sel : in std_logic_vector(1 downto 0); -- controlled by bus side
write_result : in std_logic;
result_in : in std_logic_vector(1535 downto 0)
);
end operand_ram;
 
 
architecture Behavioral of operand_ram is
-- dual port blockram to store and update operands
component operand_dp
port (
clka: in std_logic;
wea: in std_logic_vector(0 downto 0);
addra: in std_logic_vector(5 downto 0);
dina: in std_logic_vector(31 downto 0);
douta: out std_logic_vector(511 downto 0);
clkb: in std_logic;
web: IN std_logic_VECTOR(0 downto 0);
addrb: IN std_logic_VECTOR(5 downto 0);
dinb: IN std_logic_VECTOR(511 downto 0);
doutb: OUT std_logic_VECTOR(31 downto 0));
end component;
-- port a signals
signal addra : std_logic_vector(5 downto 0);
signal part_enable : std_logic_vector(3 downto 0);
signal wea : std_logic_vector(3 downto 0);
signal write_operand_i : std_logic;
-- port b signals
signal addrb : std_logic_vector(5 downto 0);
signal web : std_logic_vector(0 downto 0);
signal doutb0 : std_logic_vector(31 downto 0);
signal doutb1 : std_logic_vector(31 downto 0);
signal doutb2 : std_logic_vector(31 downto 0);
signal doutb3 : std_logic_vector(31 downto 0);
-- port a signals
signal addra : std_logic_vector(5 downto 0);
signal part_enable : std_logic_vector(3 downto 0);
signal wea : std_logic_vector(3 downto 0);
signal write_operand_i : std_logic;
 
-- port b signals
signal addrb : std_logic_vector(5 downto 0);
signal web : std_logic_vector(0 downto 0);
signal doutb0 : std_logic_vector(31 downto 0);
signal doutb1 : std_logic_vector(31 downto 0);
signal doutb2 : std_logic_vector(31 downto 0);
signal doutb3 : std_logic_vector(31 downto 0);
 
begin
 
-- WARNING: Very Important!
-- wea & web signals must never be high at the same time !!
-- web has priority
103,10 → 105,10
operand_out_sel & "0000" when others;
with operand_addr(5 downto 4) select
part_enable <= "0001" when "00",
"0010" when "01",
"0100" when "10",
"1000" when others;
part_enable <= "0001" when "00",
"0010" when "01",
"0100" when "10",
"1000" when others;
 
with write_operand_i select
wea <= part_enable when '1',
115,83 → 117,54
-- we can only read back from the result (stored in result_dest_op)
addrb <= result_dest_op & operand_addr(3 downto 0);
-- register_output_proc: process(clk)
-- begin
-- if rising_edge(clk) then
-- case operand_addr(5 downto 4) is
-- when "00" =>
-- result_out <= doutb0;
-- when "01" =>
-- result_out <= doutb1;
-- when "10" =>
-- result_out <= doutb2;
-- when others =>
-- result_out <= doutb3;
-- end case;
-- end if;
-- end process;
with operand_addr(5 downto 4) select
result_out <= doutb0 when "00",
doutb1 when "01",
doutb2 when "10",
doutb3 when others;
doutb2 when "10",
doutb3 when others;
-- 4 instances of a dual port ram to store the parts of the operand
op_0 : operand_dp
port map (
clka => clk,
wea => wea(0 downto 0),
addra => addra,
dina => operand_in,
douta => operand_out(511 downto 0),
clkb => clk,
web => web,
addrb => addrb,
dinb => result_in(511 downto 0),
doutb => doutb0
);
op_1 : operand_dp
port map (
clka => clk,
wea => wea(1 downto 1),
addra => addra,
dina => operand_in,
douta => operand_out(1023 downto 512),
clkb => clk,
web => web,
addrb => addrb,
dinb => result_in(1023 downto 512),
doutb => doutb1
);
op_2 : operand_dp
port map (
clka => clk,
wea => wea(2 downto 2),
addra => addra,
dina => operand_in,
douta => operand_out(1535 downto 1024),
clkb => clk,
web => web,
addrb => addrb,
dinb => result_in(1535 downto 1024),
doutb => doutb2
);
-- op_3 : operand_dp
-- port map (
-- clka => clk,
-- wea => wea(3 downto 3),
-- addra => addra,
-- dina => operand_in,
-- douta => operand_out(2047 downto 1536),
-- clkb => clk,
-- web => web,
-- addrb => addrb,
-- dinb => result_in(2047 downto 1536),
-- doutb => doutb3
-- );
-- 3 instances of a dual port ram to store the parts of the operand
op_0 : operand_dp
port map (
clka => clk,
wea => wea(0 downto 0),
addra => addra,
dina => operand_in,
douta => operand_out(511 downto 0),
clkb => clk,
web => web,
addrb => addrb,
dinb => result_in(511 downto 0),
doutb => doutb0
);
 
op_1 : operand_dp
port map (
clka => clk,
wea => wea(1 downto 1),
addra => addra,
dina => operand_in,
douta => operand_out(1023 downto 512),
clkb => clk,
web => web,
addrb => addrb,
dinb => result_in(1023 downto 512),
doutb => doutb1
);
 
op_2 : operand_dp
port map (
clka => clk,
wea => wea(2 downto 2),
addra => addra,
dina => operand_in,
douta => operand_out(1535 downto 1024),
clkb => clk,
web => web,
addrb => addrb,
dinb => result_in(1535 downto 1024),
doutb => doutb2
);
 
end Behavioral;
 
/adder_block.vhd
1,90 → 1,96
------------------------------------------------------------------------------------
--
-- Geoffrey Ottoy - DraMCo research group
--
-- Module Name: adder_block.vhd / entity adder_block
--
-- Last Modified: 25/11/2011
--
-- Description: adder block for use in the montgommery multiplier pre- and post-
-- computation adders
--
--
-- Dependencies: cell_1b_adder,
-- d_flip_flop
--
-- Revision:
-- Revision 1.00 - Architecture
-- Revision 0.01 - File Created
--
--
------------------------------------------------------------------------------------
--
-- NOTICE:
--
-- Copyright DraMCo research group. 2011. This code may be contain portions patented
-- by other third parties!
--
------------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;
----------------------------------------------------------------------
---- adder_block ----
---- ----
---- This file is part of the ----
---- Modular Simultaneous Exponentiation Core project ----
---- http://www.opencores.org/cores/mod_sim_exp/ ----
---- ----
---- Description ----
---- Adder block with a flipflop for the carry out ----
---- for use in the montgommery multiplier pre and post ----
---- computation adders ----
---- ----
---- Dependencies: ----
---- - cell_1b_adder ----
---- - d_flip_flop ----
---- ----
---- Authors: ----
---- - Geoffrey Ottoy, DraMCo research group ----
---- - Jonas De Craene, JonasDC@opencores.org ----
---- ----
----------------------------------------------------------------------
---- ----
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
---- ----
---- This source file may be used and distributed without ----
---- restriction provided that this copyright statement is not ----
---- removed from the file and that any derivative work contains ----
---- the original copyright notice and the associated disclaimer. ----
---- ----
---- This source file is free software; you can redistribute it ----
---- and/or modify it under the terms of the GNU Lesser General ----
---- Public License as published by the Free Software Foundation; ----
---- either version 2.1 of the License, or (at your option) any ----
---- later version. ----
---- ----
---- This source is distributed in the hope that it will be ----
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
---- PURPOSE. See the GNU Lesser General Public License for more ----
---- details. ----
---- ----
---- You should have received a copy of the GNU Lesser General ----
---- Public License along with this source; if not, download it ----
---- from http://www.opencores.org/lgpl.shtml ----
---- ----
----------------------------------------------------------------------
 
---- Uncomment the following library declaration if instantiating
---- any Xilinx primitives in this code.
--library UNISIM;
--use UNISIM.VComponents.all;
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;
 
library mod_sim_exp;
use mod_sim_exp.mod_sim_exp_pkg.all;
 
entity adder_block is
generic ( width : integer := 32
);
Port ( core_clk : in STD_LOGIC;
a : in STD_LOGIC_VECTOR((width-1) downto 0);
b : in STD_LOGIC_VECTOR((width-1) downto 0);
cin : in STD_LOGIC;
cout : out STD_LOGIC;
s : out STD_LOGIC_VECTOR((width-1) downto 0)
);
generic (
width : integer := 32
);
port (
core_clk : in std_logic;
a : in std_logic_vector((width-1) downto 0);
b : in std_logic_vector((width-1) downto 0);
cin : in std_logic;
cout : out std_logic;
s : out std_logic_vector((width-1) downto 0)
);
end adder_block;
 
 
architecture Structural of adder_block is
component cell_1b_adder
Port ( a : in STD_LOGIC;
mux_result : in STD_LOGIC;
cin : in STD_LOGIC;
cout : out STD_LOGIC;
r : out STD_LOGIC);
end component;
component d_flip_flop
port(core_clk : in STD_LOGIC;
reset : in STD_LOGIC;
din : in STD_LOGIC;
dout : out STD_LOGIC
);
end component;
signal carry : std_logic_vector(width downto 0);
signal carry : std_logic_vector(width downto 0);
begin
carry(0) <= cin;
adder_chain: for i in 0 to (width-1) generate
adders: cell_1b_adder
port map(a => a(i),
mux_result => b(i),
cin => carry(i),
cout => carry(i+1),
r => s(i)
);
end generate;
delay_1_cycle: d_flip_flop
port map(core_clk => core_clk,
reset => '0',
din => carry(width),
dout => cout
);
end Structural;
 
carry(0) <= cin;
 
adder_chain : for i in 0 to (width-1) generate
adders : cell_1b_adder
port map(
a => a(i),
mux_result => b(i),
cin => carry(i),
cout => carry(i+1),
r => s(i)
);
end generate;
 
delay_1_cycle : d_flip_flop
port map(
core_clk => core_clk,
reset => '0',
din => carry(width),
dout => cout
);
 
end Structural;
/cell_1b_mux.vhd
1,60 → 1,74
------------------------------------------------------------------------------------
--
-- Geoffrey Ottoy - DraMCo research group
--
-- Module Name: cell_1b_mux.vhd / entity cell_1b_mux
--
-- Last Modified: 14/11/2011
--
-- Description: mux for use in the montgommery multiplier systolic array
-- currently a behavioral description
--
--
-- Dependencies: none
--
-- Revision:
-- Revision 1.00 - Architecture
-- Revision 0.01 - File Created
--
--
------------------------------------------------------------------------------------
--
-- NOTICE:
--
-- Copyright DraMCo research group. 2011. This code may be contain portions patented
-- by other third parties!
--
------------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;
----------------------------------------------------------------------
---- cel_1b_mux ----
---- ----
---- This file is part of the ----
---- Modular Simultaneous Exponentiation Core project ----
---- http://www.opencores.org/cores/mod_sim_exp/ ----
---- ----
---- Description ----
---- mux for use in the montgommery multiplier systolic array ----
---- ----
---- Dependencies: none ----
---- ----
---- Authors: ----
---- - Geoffrey Ottoy, DraMCo research group ----
---- - Jonas De Craene, JonasDC@opencores.org ----
---- ----
----------------------------------------------------------------------
---- ----
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
---- ----
---- This source file may be used and distributed without ----
---- restriction provided that this copyright statement is not ----
---- removed from the file and that any derivative work contains ----
---- the original copyright notice and the associated disclaimer. ----
---- ----
---- This source file is free software; you can redistribute it ----
---- and/or modify it under the terms of the GNU Lesser General ----
---- Public License as published by the Free Software Foundation; ----
---- either version 2.1 of the License, or (at your option) any ----
---- later version. ----
---- ----
---- This source is distributed in the hope that it will be ----
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
---- PURPOSE. See the GNU Lesser General Public License for more ----
---- details. ----
---- ----
---- You should have received a copy of the GNU Lesser General ----
---- Public License along with this source; if not, download it ----
---- from http://www.opencores.org/lgpl.shtml ----
---- ----
----------------------------------------------------------------------
 
---- Uncomment the following library declaration if instantiating
---- any Xilinx primitives in this code.
--library UNISIM;
--use UNISIM.VComponents.all;
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;
 
 
entity cell_1b_mux is
Port ( my : in STD_LOGIC;
y : in STD_LOGIC;
m : in STD_LOGIC;
x : in STD_LOGIC;
q : in STD_LOGIC;
result : out STD_LOGIC);
port (
my : in std_logic;
y : in std_logic;
m : in std_logic;
x : in std_logic;
q : in std_logic;
result : out std_logic
);
end cell_1b_mux;
 
 
architecture Behavioral of cell_1b_mux is
signal sel : std_logic_vector(1 downto 0);
signal sel : std_logic_vector(1 downto 0);
begin
sel <= x & q;
with sel select
result <= my when "11",
y when "10",
m when "01",
'0' when others;
 
sel <= x & q;
 
with sel select
result <= my when "11",
y when "10",
m when "01",
'0' when others;
 
end Behavioral;
 
/adder_n.vhd
1,7 → 1,6
----------------------------------------------------------------------
----------------------------------------------------------------------
---- adder_n ----
---- ----
---- adder_n.vhd ----
---- ----
---- This file is part of the ----
---- Modular Simultaneous Exponentiation Core project ----
---- http://www.opencores.org/cores/mod_sim_exp/ ----
8,8 → 7,8
---- ----
---- Description ----
---- This file contains the implementation of a n-bit adder ----
---- using the adder blocks. ----
---- used as the montgommery multiplier pre- and post- ----
---- using adder_blocks ----
---- used for the montgommery multiplier pre- and post- ----
---- computation adder ----
---- ----
---- Dependencies: ----
46,61 → 45,53
---- ----
----------------------------------------------------------------------
 
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;
 
---- Uncomment the following library declaration if instantiating
---- any Xilinx primitives in this code.
--library UNISIM;
--use UNISIM.VComponents.all;
library mod_sim_exp;
use mod_sim_exp.mod_sim_exp_pkg.all;
 
 
entity adder_n is
generic ( width : integer := 1536;
block_width : integer := 8
);
Port ( core_clk : in STD_LOGIC;
a : in STD_LOGIC_VECTOR((width-1) downto 0);
b : in STD_LOGIC_VECTOR((width-1) downto 0);
cin : in STD_LOGIC;
cout : out STD_LOGIC;
s : out STD_LOGIC_VECTOR((width-1) downto 0)
);
generic (
width : integer := 1536;
block_width : integer := 8
);
port (
core_clk : in std_logic;
a : in std_logic_vector((width-1) downto 0);
b : in std_logic_vector((width-1) downto 0);
cin : in std_logic;
cout : out std_logic;
s : out std_logic_vector((width-1) downto 0)
);
end adder_n;
 
 
architecture Structural of adder_n is
component adder_block
generic ( width : integer := 32
);
Port ( core_clk : in STD_LOGIC;
a : in STD_LOGIC_VECTOR((width-1) downto 0);
b : in STD_LOGIC_VECTOR((width-1) downto 0);
cin : in STD_LOGIC;
cout : out STD_LOGIC;
s : out STD_LOGIC_VECTOR((width-1) downto 0)
);
end component;
constant nr_of_blocks : integer := width/block_width;
signal carry : std_logic_vector(nr_of_blocks downto 0);
constant nr_of_blocks : integer := width/block_width;
signal carry : std_logic_vector(nr_of_blocks downto 0);
begin
carry(0) <= cin;
adder_block_chain: for i in 0 to (nr_of_blocks-1) generate
adder_blocks: adder_block
generic map( width => block_width
)
port map( core_clk => core_clk,
a => a((((i+1)*block_width)-1) downto (i*block_width)),
b => b((((i+1)*block_width)-1) downto (i*block_width)),
cin => carry(i),
cout => carry(i+1),
s => s((((i+1)*block_width)-1) downto (i*block_width))
);
end generate;
cout <= carry(nr_of_blocks);
 
carry(0) <= cin;
 
adder_block_chain : for i in 0 to (nr_of_blocks-1) generate
adder_blocks : adder_block
generic map(
width => block_width
)
port map(
core_clk => core_clk,
a => a((((i+1)*block_width)-1) downto (i*block_width)),
b => b((((i+1)*block_width)-1) downto (i*block_width)),
cin => carry(i),
cout => carry(i+1),
s => s((((i+1)*block_width)-1) downto (i*block_width))
);
end generate;
 
cout <= carry(nr_of_blocks);
 
end Structural;
/multiplier_core.vhd
1,244 → 1,188
------------------------------------------------------------------------------------
--
-- Geoffrey Ottoy - DraMCo research group
--
-- Module Name: multiplier_core.vhd / entity multiplier_core
--
-- Last Modified: 18/06/2012
--
-- Description: a pipelined montgomery multiplier, with split
-- pipeline operation and "auto-run" support
--
--
-- Dependencies: mont_mult_sys_pipeline, operand_mem, fifo_primitive, mont_cntrl
--
-- Revision:
-- Revision 6.00 - created seperate module for x-operand (x_shift_reg)
-- Revision 5.00 - moved fifo interface to shared memory
-- Revision 4.00 - added dest_op_single input
-- Revision 3.00 - added auto-run control
-- Revision 2.01 - Split ctrl_reg input to separate inputs with more descriptive
-- names
-- Revision 2.00 - Control logic moved to separate design module and added fifo
-- Revision 1.00 - Architecture based on multiplier IP core "mont_mult1536_v1_00_a"
-- Revision 0.01 - File Created
--
--
------------------------------------------------------------------------------------
--
-- NOTICE:
--
-- Copyright DraMCo research group. 2011. This code may be contain portions patented
-- by other third parties!
--
------------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;
----------------------------------------------------------------------
---- multiplier_core ----
---- ----
---- This file is part of the ----
---- Modular Simultaneous Exponentiation Core project ----
---- http://www.opencores.org/cores/mod_sim_exp/ ----
---- ----
---- Description ----
---- toplevel of a modular simultaneous exponentiation core ----
---- using a pipelined montgommery multiplier with split ----
---- pipeline support and auto-run support ----
---- ----
---- Dependencies: ----
---- - mont_mult_sys_pipeline ----
---- - operand_mem ----
---- - fifo_primitive ----
---- - mont_ctrl ----
---- ----
---- Authors: ----
---- - Geoffrey Ottoy, DraMCo research group ----
---- - Jonas De Craene, JonasDC@opencores.org ----
---- ----
----------------------------------------------------------------------
---- ----
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
---- ----
---- This source file may be used and distributed without ----
---- restriction provided that this copyright statement is not ----
---- removed from the file and that any derivative work contains ----
---- the original copyright notice and the associated disclaimer. ----
---- ----
---- This source file is free software; you can redistribute it ----
---- and/or modify it under the terms of the GNU Lesser General ----
---- Public License as published by the Free Software Foundation; ----
---- either version 2.1 of the License, or (at your option) any ----
---- later version. ----
---- ----
---- This source is distributed in the hope that it will be ----
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
---- PURPOSE. See the GNU Lesser General Public License for more ----
---- details. ----
---- ----
---- You should have received a copy of the GNU Lesser General ----
---- Public License along with this source; if not, download it ----
---- from http://www.opencores.org/lgpl.shtml ----
---- ----
----------------------------------------------------------------------
 
---- Uncomment the following library declaration if instantiating
---- any Xilinx primitives in this code.
--library UNISIM;
--use UNISIM.VComponents.all;
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;
 
library mod_sim_exp;
use mod_sim_exp.mod_sim_exp_pkg.all;
 
 
entity multiplier_core is
port( clk : in std_logic;
reset : in std_logic;
-- operand memory interface (plb shared memory)
write_enable : in std_logic;
data_in : in std_logic_vector (31 downto 0);
rw_address : in std_logic_vector (8 downto 0);
data_out : out std_logic_vector (31 downto 0);
collision : out std_logic;
-- op_sel fifo interface
fifo_din : in std_logic_vector (31 downto 0);
fifo_push : in std_logic;
fifo_full : out std_logic;
fifo_nopush : out std_logic;
-- ctrl signals
start : in std_logic;
run_auto : in std_logic;
ready : out std_logic;
x_sel_single : in std_logic_vector (1 downto 0);
y_sel_single : in std_logic_vector (1 downto 0);
dest_op_single : in std_logic_vector (1 downto 0);
p_sel : in std_logic_vector (1 downto 0);
calc_time : out std_logic
);
port(
clk : in std_logic;
reset : in std_logic;
-- operand memory interface (plb shared memory)
write_enable : in std_logic;
data_in : in std_logic_vector (31 downto 0);
rw_address : in std_logic_vector (8 downto 0);
data_out : out std_logic_vector (31 downto 0);
collision : out std_logic;
-- op_sel fifo interface
fifo_din : in std_logic_vector (31 downto 0);
fifo_push : in std_logic;
fifo_full : out std_logic;
fifo_nopush : out std_logic;
-- ctrl signals
start : in std_logic;
run_auto : in std_logic;
ready : out std_logic;
x_sel_single : in std_logic_vector (1 downto 0);
y_sel_single : in std_logic_vector (1 downto 0);
dest_op_single : in std_logic_vector (1 downto 0);
p_sel : in std_logic_vector (1 downto 0);
calc_time : out std_logic
);
end multiplier_core;
 
 
architecture Behavioral of multiplier_core is
component mont_mult_sys_pipeline
generic ( n : integer := 32;
nr_stages : integer := 8; --(divides n, bits_low & (n-bits_low))
stages_low : integer := 3
);
Port ( core_clk : in STD_LOGIC;
xy : in STD_LOGIC_VECTOR((n-1) downto 0);
m : in STD_LOGIC_VECTOR((n-1) downto 0);
r : out STD_LOGIC_VECTOR((n-1) downto 0);
start : in STD_LOGIC;
reset : in STD_LOGIC;
p_sel : in STD_LOGIC_VECTOR(1 downto 0);
load_x : in std_logic;
ready : out STD_LOGIC
);
end component;
component operand_mem
port(
data_in : in std_logic_vector(31 downto 0);
data_out : out std_logic_vector(31 downto 0);
rw_address : in std_logic_vector(8 downto 0);
op_sel : in std_logic_vector(1 downto 0);
xy_out : out std_logic_vector(1535 downto 0);
m : out std_logic_vector(1535 downto 0);
result_in : in std_logic_vector(1535 downto 0);
load_op : in std_logic;
load_m : in std_logic;
load_result : in std_logic;
result_dest_op : in std_logic_vector(1 downto 0);
collision : out std_logic;
clk : in std_logic
);
end component;
component fifo_primitive
port(
clk : in std_logic;
din : in std_logic_vector(31 downto 0);
push : in std_logic;
pop : in std_logic;
reset : in std_logic;
dout : out std_logic_vector(31 downto 0);
empty : out std_logic;
full : out std_logic;
nopop : out std_logic;
nopush : out std_logic
);
end component;
component mont_ctrl
port(
clk : in std_logic;
reset : in std_logic;
start : in std_logic;
x_sel_single : in std_logic_vector(1 downto 0);
y_sel_single : in std_logic_vector(1 downto 0);
run_auto : in std_logic;
op_sel_buffer : in std_logic_vector(31 downto 0);
read_buffer : out std_logic;
multiplier_ready : in std_logic;
op_buffer_empty : in std_logic;
buffer_noread : in std_logic;
done : out std_logic;
calc_time : out std_logic;
op_sel : out std_logic_vector(1 downto 0);
load_x : out std_logic;
load_result : out std_logic;
start_multiplier : out std_logic
);
end component;
signal xy_i : std_logic_vector(1535 downto 0);
signal x_i : std_logic;
signal m : std_logic_vector(1535 downto 0);
signal r : std_logic_vector(1535 downto 0);
signal op_sel : std_logic_vector(1 downto 0);
signal result_dest_op_i : std_logic_vector(1 downto 0);
signal mult_ready : std_logic;
signal start_mult : std_logic;
signal load_op : std_logic;
signal load_x_i : std_logic;
signal load_m : std_logic;
signal load_result : std_logic;
signal fifo_empty : std_logic;
signal fifo_pop : std_logic;
signal fifo_nopop : std_logic;
signal fifo_dout : std_logic_vector(31 downto 0);
--signal fifo_push : std_logic;
constant n : integer := 1536;
constant t : integer := 96;
constant tl : integer := 32;
signal xy_i : std_logic_vector(1535 downto 0);
signal x_i : std_logic;
signal m : std_logic_vector(1535 downto 0);
signal r : std_logic_vector(1535 downto 0);
 
signal op_sel : std_logic_vector(1 downto 0);
signal result_dest_op_i : std_logic_vector(1 downto 0);
signal mult_ready : std_logic;
signal start_mult : std_logic;
signal load_op : std_logic;
signal load_x_i : std_logic;
signal load_m : std_logic;
signal load_result : std_logic;
 
signal fifo_empty : std_logic;
signal fifo_pop : std_logic;
signal fifo_nopop : std_logic;
signal fifo_dout : std_logic_vector(31 downto 0);
--signal fifo_push : std_logic;
 
constant n : integer := 1536;
constant t : integer := 96;
constant tl : integer := 32;
 
begin
 
-- The actual multiplier
the_multiplier: mont_mult_sys_pipeline generic map(
n => n,
nr_stages => t, --(divides n, bits_low & (n-bits_low))
stages_low => tl
)
port map(
core_clk => clk,
xy => xy_i,
m => m,
r => r,
start => start_mult,
reset => reset,
p_sel => p_sel,
load_x => load_x_i,
ready => mult_ready
);
-- Block ram memory for storing the operands and the modulus
the_memory: operand_mem port map(
data_in => data_in,
data_out => data_out,
rw_address => rw_address,
op_sel => op_sel,
xy_out => xy_i,
m => m,
result_in => r,
load_op => load_op,
load_m => load_m,
load_result => load_result,
result_dest_op => result_dest_op_i,
collision => collision,
clk => clk
);
-- The actual multiplier
the_multiplier : mont_mult_sys_pipeline generic map(
n => n,
nr_stages => t, --(divides n, bits_low & (n-bits_low))
stages_low => tl
)
port map(
core_clk => clk,
xy => xy_i,
m => m,
r => r,
start => start_mult,
reset => reset,
p_sel => p_sel,
load_x => load_x_i,
ready => mult_ready
);
 
-- Block ram memory for storing the operands and the modulus
the_memory : operand_mem port map(
data_in => data_in,
data_out => data_out,
rw_address => rw_address,
op_sel => op_sel,
xy_out => xy_i,
m => m,
result_in => r,
load_op => load_op,
load_m => load_m,
load_result => load_result,
result_dest_op => result_dest_op_i,
collision => collision,
clk => clk
);
 
load_op <= write_enable when (rw_address(8) = '0') else '0';
load_m <= write_enable when (rw_address(8) = '1') else '0';
result_dest_op_i <= dest_op_single when run_auto = '0' else "11"; -- in autorun mode we always store the result in operand3
-- A fifo for auto-run operand selection
the_exponent_fifo: fifo_primitive port map(
clk => clk,
din => fifo_din,
dout => fifo_dout,
empty => fifo_empty,
full => fifo_full,
push => fifo_push,
pop => fifo_pop,
reset => reset,
nopop => fifo_nopop,
nopush => fifo_nopush
);
-- A fifo for auto-run operand selection
the_exponent_fifo : fifo_primitive port map(
clk => clk,
din => fifo_din,
dout => fifo_dout,
empty => fifo_empty,
full => fifo_full,
push => fifo_push,
pop => fifo_pop,
reset => reset,
nopop => fifo_nopop,
nopush => fifo_nopush
);
-- The control logic for the core
the_control_unit: mont_ctrl port map(
clk => clk,
reset => reset,
start => start,
x_sel_single => x_sel_single,
y_sel_single => y_sel_single,
run_auto => run_auto,
op_buffer_empty => fifo_empty,
op_sel_buffer => fifo_dout,
read_buffer => fifo_pop,
buffer_noread => fifo_nopop,
done => ready,
calc_time => calc_time,
op_sel => op_sel,
load_x => load_x_i,
load_result => load_result,
start_multiplier => start_mult,
multiplier_ready => mult_ready
);
-- The control logic for the core
the_control_unit : mont_ctrl port map(
clk => clk,
reset => reset,
start => start,
x_sel_single => x_sel_single,
y_sel_single => y_sel_single,
run_auto => run_auto,
op_buffer_empty => fifo_empty,
op_sel_buffer => fifo_dout,
read_buffer => fifo_pop,
buffer_noread => fifo_nopop,
done => ready,
calc_time => calc_time,
op_sel => op_sel,
load_x => load_x_i,
load_result => load_result,
start_multiplier => start_mult,
multiplier_ready => mult_ready
);
 
end Behavioral;
 
/systolic_pipeline.vhd
1,47 → 1,60
------------------------------------------------------------------------------------
--
-- Geoffrey Ottoy - DraMCo research group
--
-- Module Name: systolic_pipeline.vhd / entity systolic_pipeline
--
-- Last Modified: 05/01/2012
--
-- Description: pipelined systolic array implementation of a montgomery multiplier
--
--
-- Dependencies: first_stage,
-- standard_stage,
-- last_stage,
-- stepping_control
--
-- Revision:
-- Revision 3.00 - Made x_selection external
-- Revision 2.02 - Changed design to cope with new stepping_control (next_x)
-- Revision 2.01 - Created an extra contant s (step size = n/t) to fix a problem
-- that occured when t not = sqrt(n).
-- Revision 2.00 - Moved stepping logic and x_selection to seperate submodules
-- Revision 1.00 - Architecture
-- Revision 0.01 - File Created
--
--
------------------------------------------------------------------------------------
--
-- NOTICE:
--
-- Copyright DraMCo research group. 2011. This code may be contain portions patented
-- by other third parties!
--
------------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;
----------------------------------------------------------------------
---- systolic_pipeline ----
---- ----
---- This file is part of the ----
---- Modular Simultaneous Exponentiation Core project ----
---- http://www.opencores.org/cores/mod_sim_exp/ ----
---- ----
---- Description ----
---- pipelined systolic array implementation of a montgomery ----
---- multiplier ----
---- ----
---- Dependencies: ----
---- - stepping_logic ----
---- - first_stage ----
---- - standard_stage ----
---- - last_stage ----
---- ----
---- Authors: ----
---- - Geoffrey Ottoy, DraMCo research group ----
---- - Jonas De Craene, JonasDC@opencores.org ----
---- ----
----------------------------------------------------------------------
---- ----
---- Copyright (C) 2011 DraMCo research group and OPENCORES.ORG ----
---- ----
---- This source file may be used and distributed without ----
---- restriction provided that this copyright statement is not ----
---- removed from the file and that any derivative work contains ----
---- the original copyright notice and the associated disclaimer. ----
---- ----
---- This source file is free software; you can redistribute it ----
---- and/or modify it under the terms of the GNU Lesser General ----
---- Public License as published by the Free Software Foundation; ----
---- either version 2.1 of the License, or (at your option) any ----
---- later version. ----
---- ----
---- This source is distributed in the hope that it will be ----
---- useful, but WITHOUT ANY WARRANTY; without even the implied ----
---- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ----
---- PURPOSE. See the GNU Lesser General Public License for more ----
---- details. ----
---- ----
---- You should have received a copy of the GNU Lesser General ----
---- Public License along with this source; if not, download it ----
---- from http://www.opencores.org/lgpl.shtml ----
---- ----
----------------------------------------------------------------------
 
---- Uncomment the following library declaration if instantiating
---- any Xilinx primitives in this code.
--library UNISIM;
--use UNISIM.VComponents.all;
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;
 
library mod_sim_exp;
use mod_sim_exp.mod_sim_exp_pkg.all;
 
 
-- p_sel:
-- 01 = lower part
-- 10 = upper part
48,134 → 61,62
-- 11 = full range
 
entity systolic_pipeline is
generic( n : integer := 1536; -- width of the operands (# bits)
t : integer := 192; -- number of stages (divider of n) >= 2
tl: integer := 64
-- best take t = sqrt(n)
);
port(core_clk : in STD_LOGIC;
my : in STD_LOGIC_VECTOR((n) downto 0);
y : in STD_LOGIC_VECTOR((n-1) downto 0);
m : in STD_LOGIC_VECTOR((n-1) downto 0);
xi : in STD_LOGIC;
start : in STD_LOGIC;
reset : in STD_LOGIC;
p_sel : in STD_LOGIC_VECTOR(1 downto 0); -- select which piece of the multiplier will be used
ready : out STD_LOGIC;
next_x : out STD_LOGIC;
r : out STD_LOGIC_VECTOR((n+1) downto 0)
);
generic(
n : integer := 1536; -- width of the operands (# bits)
t : integer := 192; -- number of stages (divider of n) >= 2
tl : integer := 64 -- best take t = sqrt(n)
);
port(
core_clk : in std_logic;
my : in std_logic_vector((n) downto 0);
y : in std_logic_vector((n-1) downto 0);
m : in std_logic_vector((n-1) downto 0);
xi : in std_logic;
start : in std_logic;
reset : in std_logic;
p_sel : in std_logic_vector(1 downto 0); -- select which piece of the multiplier will be used
ready : out std_logic;
next_x : out std_logic;
r : out std_logic_vector((n+1) downto 0)
);
end systolic_pipeline;
 
 
architecture Structural of systolic_pipeline is
constant s : integer := n/t; -- defines the size of the stages (# bits)
constant size_l : integer := s*tl;
constant size_h : integer := n - size_l;
 
constant s : integer := n/t; -- defines the size of the stages (# bits)
constant size_l : integer := s*tl;
constant size_h : integer := n - size_l;
component first_stage
generic(width : integer := 4 -- must be the same as width of the standard stage
);
port(core_clk : in STD_LOGIC;
my : in STD_LOGIC_VECTOR((width) downto 0);
y : in STD_LOGIC_VECTOR((width) downto 0);
m : in STD_LOGIC_VECTOR((width) downto 0);
xin : in STD_LOGIC;
xout : out STD_LOGIC;
qout : out STD_LOGIC;
a_msb : in STD_LOGIC;
cout : out STD_LOGIC;
start : in STD_LOGIC;
reset : in STD_LOGIC;
--ready : out STD_LOGIC;
done : out STD_LOGIC;
r : out STD_LOGIC_VECTOR((width-1) downto 0)
);
end component;
component standard_stage
generic(width : integer := 4
);
port(core_clk : in STD_LOGIC;
my : in STD_LOGIC_VECTOR((width-1) downto 0);
y : in STD_LOGIC_VECTOR((width-1) downto 0);
m : in STD_LOGIC_VECTOR((width-1) downto 0);
xin : in STD_LOGIC;
qin : in STD_LOGIC;
xout : out STD_LOGIC;
qout : out STD_LOGIC;
a_msb : in STD_LOGIC;
cin : in STD_LOGIC;
cout : out STD_LOGIC;
start : in STD_LOGIC;
reset : in STD_LOGIC;
-- ready : out STD_LOGIC;
done : out STD_LOGIC;
r : out STD_LOGIC_VECTOR((width-1) downto 0)
);
end component;
component last_stage
generic(width : integer := 4 -- must be the same as width of the standard stage
);
port(core_clk : in STD_LOGIC;
my : in STD_LOGIC_VECTOR((width-1) downto 0);
y : in STD_LOGIC_VECTOR((width-2) downto 0);
m : in STD_LOGIC_VECTOR((width-2) downto 0);
xin : in STD_LOGIC;
qin : in STD_LOGIC;
cin : in STD_LOGIC;
start : in STD_LOGIC;
reset : in STD_LOGIC;
-- ready : out STD_LOGIC;
-- done : out STD_LOGIC;
r : out STD_LOGIC_VECTOR((width+1) downto 0)
);
end component;
component stepping_logic
generic( n : integer := 16; -- max nr of steps required to complete a multiplication
t : integer := 4 -- total nr of steps in the pipeline
);
port(core_clk : in STD_LOGIC;
start : in STD_LOGIC;
reset : in STD_LOGIC;
t_sel : in integer range 0 to t; -- nr of stages in the pipeline piece
n_sel : in integer range 0 to n; -- nr of steps required for a complete multiplication
start_first_stage : out STD_LOGIC;
stepping_done : out STD_LOGIC
);
end component;
signal start_stage_i : std_logic_vector((t-1) downto 0);
--signal stage_ready_i : std_logic_vector((t-1) downto 0);
signal stage_done_i : std_logic_vector((t-2) downto 0);
 
signal start_stage_i : std_logic_vector((t-1) downto 0);
--signal stage_ready_i : std_logic_vector((t-1) downto 0);
signal stage_done_i : std_logic_vector((t-2) downto 0);
signal x_i : std_logic_vector((t-1) downto 0) := (others=>'0');
signal q_i : std_logic_vector((t-2) downto 0) := (others=>'0');
signal c_i : std_logic_vector((t-2) downto 0) := (others=>'0');
signal a_i : std_logic_vector((n+1) downto 0) := (others=>'0');
signal r_tot : std_logic_vector((n+1) downto 0) := (others=>'0');
signal r_h : std_logic_vector(s-1 downto 0) := (others=>'0');
signal r_l : std_logic_vector((s+1) downto 0) := (others=>'0');
signal a_h : std_logic_vector((s*2)-1 downto 0) := (others=>'0');
signal a_l : std_logic_vector((s*2)-1 downto 0) := (others=>'0');
--signal ready_i : std_logic;
signal stepping_done_i : std_logic;
signal t_sel : integer range 0 to t := t;
signal n_sel : integer range 0 to n := n;
signal split : std_logic := '0';
signal lower_e_i : std_logic := '0';
signal higher_e_i : std_logic := '0';
signal start_pulses_i : std_logic := '0';
signal start_higher_i : std_logic := '0';
signal higher_0_done_i : std_logic := '0';
signal h_x_0, h_x_1 : std_logic := '0';
signal h_q_0, h_q_1 : std_logic := '0';
signal h_c_0, h_c_1 : std_logic := '0';
signal x_offset_i : integer range 0 to tl*s := 0;
signal next_x_i : std_logic := '0';
signal x_i : std_logic_vector((t-1) downto 0) := (others => '0');
signal q_i : std_logic_vector((t-2) downto 0) := (others => '0');
signal c_i : std_logic_vector((t-2) downto 0) := (others => '0');
signal a_i : std_logic_vector((n+1) downto 0) := (others => '0');
signal r_tot : std_logic_vector((n+1) downto 0) := (others => '0');
signal r_h : std_logic_vector(s-1 downto 0) := (others => '0');
signal r_l : std_logic_vector((s+1) downto 0) := (others => '0');
signal a_h : std_logic_vector((s*2)-1 downto 0) := (others => '0');
signal a_l : std_logic_vector((s*2)-1 downto 0) := (others => '0');
 
--signal ready_i : std_logic;
signal stepping_done_i : std_logic;
signal t_sel : integer range 0 to t := t;
signal n_sel : integer range 0 to n := n;
signal split : std_logic := '0';
signal lower_e_i : std_logic := '0';
signal higher_e_i : std_logic := '0';
signal start_pulses_i : std_logic := '0';
signal start_higher_i : std_logic := '0';
signal higher_0_done_i : std_logic := '0';
signal h_x_0, h_x_1 : std_logic := '0';
signal h_q_0, h_q_1 : std_logic := '0';
signal h_c_0, h_c_1 : std_logic := '0';
signal x_offset_i : integer range 0 to tl*s := 0;
signal next_x_i : std_logic := '0';
 
begin
 
-- output mapping
190,10 → 131,9
a_h((s*2)-1 downto s) <= r_h;
with p_sel select
a_i(((tl+1)*s-1) downto ((tl-1)*s)) <= a_l when "01",
a_h when "10",
r_tot(((tl+1)*s-1) downto ((tl-1)*s)) when others;
a_h when "10",
r_tot(((tl+1)*s-1) downto ((tl-1)*s)) when others;
 
 
-- signals from x_selection
next_x_i <= start_stage_i(1) or (start_stage_i(tl+1) and higher_e_i);
--
202,38 → 142,40
-- this module controls the pipeline operation
with p_sel select
t_sel <= tl when "01",
t-tl when "10",
t when others;
t_sel <= tl when "01",
t-tl when "10",
t when others;
with p_sel select
n_sel <= size_l-1 when "01",
size_h-1 when "10",
n-1 when others;
size_h-1 when "10",
n-1 when others;
with p_sel select
lower_e_i <= '0' when "10",
'1' when others;
lower_e_i <= '0' when "10",
'1' when others;
with p_sel select
higher_e_i <= '1' when "10",
'0' when others;
'0' when others;
split <= p_sel(0) and p_sel(1);
stepping_control: stepping_logic
generic map( n => n, -- max nr of steps required to complete a multiplication
t => t -- total nr of steps in the pipeline
)
port map(core_clk => core_clk,
start => start,
reset => reset,
t_sel => t_sel,
n_sel => n_sel,
start_first_stage => start_pulses_i,
stepping_done => stepping_done_i
);
stepping_control : stepping_logic
generic map(
n => n, -- max nr of steps required to complete a multiplication
t => t -- total nr of steps in the pipeline
)
port map(
core_clk => core_clk,
start => start,
reset => reset,
t_sel => t_sel,
n_sel => n_sel,
start_first_stage => start_pulses_i,
stepping_done => stepping_done_i
);
-- start signals for first stage of lower and higher part
start_stage_i(0) <= start_pulses_i and lower_e_i;
245,154 → 187,169
-- nothing special here, previous stages starts the next
start_signals_l: for i in 1 to tl-1 generate
start_stage_i(i) <= stage_done_i(i-1);
start_stage_i(i) <= stage_done_i(i-1);
end generate;
start_signals_h: for i in tl+2 to t-1 generate
start_stage_i(i) <= stage_done_i(i-1);
start_stage_i(i) <= stage_done_i(i-1);
end generate;
 
stage_0 : first_stage
generic map(
width => s
)
port map(
core_clk => core_clk,
my => my(s downto 0),
y => y(s downto 0),
m => m(s downto 0),
xin => x_i(0),
xout => x_i(1),
qout => q_i(0),
a_msb => a_i(s),
cout => c_i(0),
start => start_stage_i(0),
reset => reset,
--ready => stage_ready_i(0),
done => stage_done_i(0),
r => r_tot((s-1) downto 0)
);
stage_0: first_stage
generic map(width => s
)
port map(core_clk => core_clk,
my => my(s downto 0),
y => y(s downto 0),
m => m(s downto 0),
xin => x_i(0),
xout => x_i(1),
qout => q_i(0),
a_msb => a_i(s),
cout => c_i(0),
start => start_stage_i(0),
reset => reset,
--ready => stage_ready_i(0),
done => stage_done_i(0),
r => r_tot((s-1) downto 0)
);
stages_l : for i in 1 to (tl) generate
standard_stages : standard_stage
generic map(
width => s
)
port map(
core_clk => core_clk,
my => my(((i+1)*s) downto ((s*i)+1)),
y => y(((i+1)*s) downto ((s*i)+1)),
m => m(((i+1)*s) downto ((s*i)+1)),
xin => x_i(i),
qin => q_i(i-1),
xout => x_i(i+1),
qout => q_i(i),
a_msb => a_i((i+1)*s),
cin => c_i(i-1),
cout => c_i(i),
start => start_stage_i(i),
reset => reset,
--ready => stage_ready_i(i),
done => stage_done_i(i),
r => r_tot((((i+1)*s)-1) downto (s*i))
);
end generate;
stages_l: for i in 1 to (tl) generate
standard_stages: standard_stage
generic map(width => s
)
port map(core_clk => core_clk,
my => my(((i+1)*s) downto ((s*i)+1)),
y => y(((i+1)*s) downto ((s*i)+1)),
m => m(((i+1)*s) downto ((s*i)+1)),
xin => x_i(i),
qin => q_i(i-1),
xout => x_i(i+1),
qout => q_i(i),
a_msb => a_i((i+1)*s),
cin => c_i(i-1),
cout => c_i(i),
start => start_stage_i(i),
reset => reset,
--ready => stage_ready_i(i),
done => stage_done_i(i),
r => r_tot((((i+1)*s)-1) downto (s*i))
);
end generate;
h_c_1 <= h_c_0 or c_i(tl);
h_q_1 <= h_q_0 or q_i(tl);
h_x_1 <= h_x_0 or x_i(tl+1);
stage_tl_1: standard_stage
generic map(width => s
)
port map(core_clk => core_clk,
my => my(((tl+2)*s) downto ((s*(tl+1))+1)),
y => y(((tl+2)*s) downto ((s*(tl+1))+1)),
m => m(((tl+2)*s) downto ((s*(tl+1))+1)),
--xin => x_i(tl+1),
xin => h_x_1,
--qin => q_i(tl),
qin => h_q_1,
xout => x_i(tl+2),
qout => q_i(tl+1),
a_msb => a_i((tl+2)*s),
--cin => c_i(tl),
cin => h_c_1,
cout => c_i(tl+1),
start => start_stage_i(tl+1),
reset => reset,
--ready => stage_ready_i(i),
done => stage_done_i(tl+1),
r => r_tot((((tl+2)*s)-1) downto (s*(tl+1)))
);
stage_tl_1 : standard_stage
generic map(
width => s
)
port map(
core_clk => core_clk,
my => my(((tl+2)*s) downto ((s*(tl+1))+1)),
y => y(((tl+2)*s) downto ((s*(tl+1))+1)),
m => m(((tl+2)*s) downto ((s*(tl+1))+1)),
--xin => x_i(tl+1),
xin => h_x_1,
--qin => q_i(tl),
qin => h_q_1,
xout => x_i(tl+2),
qout => q_i(tl+1),
a_msb => a_i((tl+2)*s),
--cin => c_i(tl),
cin => h_c_1,
cout => c_i(tl+1),
start => start_stage_i(tl+1),
reset => reset,
--ready => stage_ready_i(i),
done => stage_done_i(tl+1),
r => r_tot((((tl+2)*s)-1) downto (s*(tl+1)))
);
stages_h: for i in (tl+2) to (t-2) generate
standard_stages: standard_stage
generic map(width => s
)
port map(core_clk => core_clk,
my => my(((i+1)*s) downto ((s*i)+1)),
y => y(((i+1)*s) downto ((s*i)+1)),
m => m(((i+1)*s) downto ((s*i)+1)),
xin => x_i(i),
qin => q_i(i-1),
xout => x_i(i+1),
qout => q_i(i),
a_msb => a_i((i+1)*s),
cin => c_i(i-1),
cout => c_i(i),
start => start_stage_i(i),
reset => reset,
--ready => stage_ready_i(i),
done => stage_done_i(i),
r => r_tot((((i+1)*s)-1) downto (s*i))
);
end generate;
stage_t: last_stage
generic map(width => s -- must be the same as width of the standard stage
)
port map(core_clk => core_clk,
my => my(n downto ((n-s)+1)), --width-1
y => y((n-1) downto ((n-s)+1)), --width-2
m => m((n-1) downto ((n-s)+1)), --width-2
xin => x_i(t-1),
qin => q_i(t-2),
cin => c_i(t-2),
start => start_stage_i(t-1),
reset => reset,
--ready => stage_ready_i(t-1),
r => r_tot((n+1) downto (n-s)) --width+1
);
stages_h : for i in (tl+2) to (t-2) generate
standard_stages : standard_stage
generic map(
width => s
)
port map(
core_clk => core_clk,
my => my(((i+1)*s) downto ((s*i)+1)),
y => y(((i+1)*s) downto ((s*i)+1)),
m => m(((i+1)*s) downto ((s*i)+1)),
xin => x_i(i),
qin => q_i(i-1),
xout => x_i(i+1),
qout => q_i(i),
a_msb => a_i((i+1)*s),
cin => c_i(i-1),
cout => c_i(i),
start => start_stage_i(i),
reset => reset,
--ready => stage_ready_i(i),
done => stage_done_i(i),
r => r_tot((((i+1)*s)-1) downto (s*i))
);
end generate;
 
mid_start: first_stage
generic map(width => s
)
port map(core_clk => core_clk,
my => my((tl*s+s) downto tl*s),
y => y((tl*s+s) downto tl*s),
m => m((tl*s+s) downto tl*s),
xin => x_i(0),
xout => h_x_0,
qout => h_q_0,
a_msb => a_i((tl+1)*s),
cout => h_c_0,
start => start_higher_i,
reset => reset,
--ready => stage_ready_i(0),
done => higher_0_done_i,
r => r_h
);
mid_end: last_stage
generic map(width => s -- must be the same as width of the standard stage
)
port map(core_clk => core_clk,
my => my((tl*s) downto ((tl-1)*s)+1), --width-1
y => y(((tl*s)-1) downto ((tl-1)*s)+1), --width-2
m => m(((tl*s)-1) downto ((tl-1)*s)+1), --width-2
xin => x_i(tl-1),
qin => q_i(tl-2),
cin => c_i(tl-2),
start => start_stage_i(tl-1),
reset => reset,
--ready => stage_ready_i(t-1),
r => r_l --width+1
);
stage_t : last_stage
generic map(
width => s -- must be the same as width of the standard stage
)
port map(
core_clk => core_clk,
my => my(n downto ((n-s)+1)), --width-1
y => y((n-1) downto ((n-s)+1)), --width-2
m => m((n-1) downto ((n-s)+1)), --width-2
xin => x_i(t-1),
qin => q_i(t-2),
cin => c_i(t-2),
start => start_stage_i(t-1),
reset => reset,
--ready => stage_ready_i(t-1),
r => r_tot((n+1) downto (n-s)) --width+1
);
 
end Structural;
mid_start : first_stage
generic map(
width => s
)
port map(
core_clk => core_clk,
my => my((tl*s+s) downto tl*s),
y => y((tl*s+s) downto tl*s),
m => m((tl*s+s) downto tl*s),
xin => x_i(0),
xout => h_x_0,
qout => h_q_0,
a_msb => a_i((tl+1)*s),
cout => h_c_0,
start => start_higher_i,
reset => reset,
--ready => stage_ready_i(0),
done => higher_0_done_i,
r => r_h
);
 
mid_end : last_stage
generic map(
width => s -- must be the same as width of the standard stage
)
port map(
core_clk => core_clk,
my => my((tl*s) downto ((tl-1)*s)+1), --width-1
y => y(((tl*s)-1) downto ((tl-1)*s)+1), --width-2
m => m(((tl*s)-1) downto ((tl-1)*s)+1), --width-2
xin => x_i(tl-1),
qin => q_i(tl-2),
cin => c_i(tl-2),
start => start_stage_i(tl-1),
reset => reset,
--ready => stage_ready_i(t-1),
r => r_l --width+1
);
 
end Structural;

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.