OpenCores
URL https://opencores.org/ocsvn/heap_sorter/heap_sorter/trunk

Subversion Repositories heap_sorter

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /heap_sorter/trunk
    from Rev 4 to Rev 5
    Reverse comparison

Rev 4 → Rev 5

/sort_test_check.py File deleted
sort_test_check.py Property changes : Deleted: svn:executable ## -1 +0,0 ## -* \ No newline at end of property Index: src/sys_config.vhd =================================================================== --- src/sys_config.vhd (revision 4) +++ src/sys_config.vhd (nonexistent) @@ -1,9 +0,0 @@ -library ieee; -use ieee.std_logic_1164.all; -library work; -package sys_config is - constant SORT_DEBUG : boolean :=false; - constant SYS_NLEVELS : integer :=5; - constant DATA_REC_SORT_KEY_WIDTH : integer :=8; - constant DATA_REC_PAYLOAD_WIDTH : integer :=4; -end sys_config; Index: src/dpram4.vhd =================================================================== --- src/dpram4.vhd (revision 4) +++ src/dpram4.vhd (nonexistent) @@ -1,87 +0,0 @@ --- Simulation model of the dual port RAM (DP RAM) with single clock --- and with "read-after-write" operation. --- This file was combined from multiple descriptions and models of dual port RAMs --- which I was able to find in the Internet and in the documentation provided --- by vendors like Xilinx or Altera. --- Therefore the only thing I can do is to publish it as PUBLIC DOMAIN --- --- Please note, that for synthesis you should replace this file with --- another DP RAM wrapper inferring the real DP RAM -library ieee; -use ieee.std_logic_1164.all; -use ieee.numeric_std.all; - -entity dp_ram_scl is - - generic - ( - DATA_WIDTH : natural; - ADDR_WIDTH : natural - ); - - port - ( - clk : in std_logic; - addr_a : in std_logic_vector(ADDR_WIDTH-1 downto 0); - addr_b : in std_logic_vector(ADDR_WIDTH-1 downto 0); - data_a : in std_logic_vector((DATA_WIDTH-1) downto 0); - data_b : in std_logic_vector((DATA_WIDTH-1) downto 0); - we_a : in std_logic := '1'; - we_b : in std_logic := '1'; - q_a : out std_logic_vector((DATA_WIDTH -1) downto 0); - q_b : out std_logic_vector((DATA_WIDTH -1) downto 0) - ); - -end dp_ram_scl; - -architecture rtl of dp_ram_scl is - - signal v_addr_a : natural range 0 to 2**ADDR_WIDTH - 1; - signal v_addr_b : natural range 0 to 2**ADDR_WIDTH - 1; - subtype word_t is std_logic_vector((DATA_WIDTH-1) downto 0); - type memory_t is array((2**ADDR_WIDTH-1) downto 0) of word_t; - - signal ram : memory_t := (others => x"33"); -- For debugging - initialize - -- simulated RAM with x"33" - -begin - - v_addr_a <= to_integer(unsigned(addr_a(ADDR_WIDTH-1 downto 0))); - v_addr_b <= to_integer(unsigned(addr_b(ADDR_WIDTH-1 downto 0))); - - process(clk) - begin - - if(rising_edge(clk)) then - -- Port A - if(we_a = '1') then - ram(v_addr_a) <= data_a; - -- read-after-write behavior - q_a <= data_a; - else - -- simulate "unknown" value when the same address is written via one port - -- and immediately read via another port - if we_b='1' and v_addr_a=v_addr_b then - q_a <= (others => 'X'); - else - q_a <= ram(v_addr_a); - end if; - end if; - -- Port B - if(we_b = '1') then - ram(v_addr_b) <= data_b; - -- read-after-write behavior - q_b <= data_b; - else - -- simulate "unknown" value when the same address is written via one port - -- and immediately read via another port - if we_a='1' and v_addr_a=v_addr_b then - q_b <= (others => 'X'); - else - q_b <= ram(v_addr_b); - end if; - end if; - end if; - end process; - -end rtl; Index: src/sort_dpram.vhd =================================================================== --- src/sort_dpram.vhd (revision 4) +++ src/sort_dpram.vhd (nonexistent) @@ -1,175 +0,0 @@ -------------------------------------------------------------------------------- --- Title : Parametrized DP RAM for heap-sorter --- Project : heap-sorter -------------------------------------------------------------------------------- --- File : sort_dpram.vhd --- Author : Wojciech M. Zabolotny --- Company : --- Created : 2010-05-14 --- Last update: 2011-07-06 --- Platform : --- Standard : VHDL'93 -------------------------------------------------------------------------------- --- Description: -------------------------------------------------------------------------------- --- Copyright (c) 2010 Wojciech M. Zabolotny --- This file is published under the BSD license, so you can freely adapt --- it for your own purposes. --- Additionally this design has been described in my article: --- Wojciech M. Zabolotny, "Dual port memory based Heapsort implementation --- for FPGA", Proc. SPIE 8008, 80080E (2011); doi:10.1117/12.905281 --- I'd be glad if you cite this article when you publish something based --- on my design. -------------------------------------------------------------------------------- --- Revisions : --- Date Version Author Description --- 2010-05-14 1.0 wzab Created -------------------------------------------------------------------------------- - -library ieee; -use ieee.std_logic_1164.all; -use ieee.numeric_std.all; -use ieee.std_logic_textio.all; -use std.textio.all; -library work; -use work.sorter_pkg.all; -use work.sys_config.all; - -entity sort_dp_ram is - - generic - ( - ADDR_WIDTH : natural; - NLEVELS : natural; - NAME : string := "X" - ); - - port - ( - clk : in std_logic; - addr_a : in std_logic_vector(NLEVELS-1 downto 0); - addr_b : in std_logic_vector(NLEVELS-1 downto 0); - data_a : in T_DATA_REC; - data_b : in T_DATA_REC; - we_a : in std_logic; - we_b : in std_logic; - q_a : out T_DATA_REC; - q_b : out T_DATA_REC - ); - -end sort_dp_ram; - -architecture rtl of sort_dp_ram is - - signal vq_a, vq_b, tdata_a, tdata_b : std_logic_vector(DATA_REC_WIDTH-1 downto 0); - signal reg : T_DATA_REC := DATA_REC_INIT_DATA; - - component dp_ram_scl - generic ( - DATA_WIDTH : natural; - ADDR_WIDTH : natural); - port ( - clk : in std_logic; - addr_a : in std_logic_vector(ADDR_WIDTH-1 downto 0); - addr_b : in std_logic_vector(ADDR_WIDTH-1 downto 0); - data_a : in std_logic_vector((DATA_WIDTH-1) downto 0); - data_b : in std_logic_vector((DATA_WIDTH-1) downto 0); - we_a : in std_logic := '1'; - we_b : in std_logic := '1'; - q_a : out std_logic_vector((DATA_WIDTH -1) downto 0); - q_b : out std_logic_vector((DATA_WIDTH -1) downto 0)); - end component; - -begin - - -- Convert our data records int std_logic_vector, so that - -- standard DP RAM may handle it - tdata_a <= tdrec2stlv(data_a); - tdata_b <= tdrec2stlv(data_b); - - - i1 : if ADDR_WIDTH > 0 generate - -- When ADDR_WIDTH is above 0 embed the real DP RAM - -- (even though synthesis tool may still replace it with - -- registers during optimization for low ADDR_WIDTH) - - q_a <= stlv2tdrec(vq_a); - q_b <= stlv2tdrec(vq_b); - - dp_ram_1 : dp_ram_scl - generic map ( - DATA_WIDTH => DATA_REC_WIDTH, - ADDR_WIDTH => ADDR_WIDTH) - port map ( - clk => clk, - addr_a => addr_a(ADDR_WIDTH-1 downto 0), - addr_b => addr_b(ADDR_WIDTH-1 downto 0), - data_a => tdata_a, - data_b => tdata_b, - we_a => we_a, - we_b => we_b, - q_a => vq_a, - q_b => vq_b); - - end generate i1; - - i2 : if ADDR_WIDTH = 0 generate - -- When ADDR_WIDTH is 0, DP RAM should be simply replaced - -- with a register implemented below - - p1 : process (clk) - begin -- process p1 - if clk'event and clk = '1' then -- rising clock edge - if we_a = '1' then - reg <= data_a; - q_a <= data_a; - q_b <= data_a; - elsif we_b = '1' then - reg <= data_b; - q_a <= data_b; - q_b <= data_b; - else - q_a <= reg; - q_b <= reg; - end if; - end if; - end process p1; - - end generate i2; - - dbg1 : if SORT_DEBUG generate - - -- Process monitoring read/write accesses to the memory (only for debugging) - p3 : process (clk) - variable rline : line; - begin -- process p1 - if clk'event and clk = '1' then -- rising clock edge - if(we_a = '1' and we_b = '1') then - write(rline, NAME); - write(rline, ADDR_WIDTH); - write(rline, string'(" Possible write collision!")); - writeline(reports, rline); - end if; - - if we_a = '1' then - write(rline, NAME); - write(rline, ADDR_WIDTH); - write(rline, string'(" WR_A:")); - wrstlv(rline, addr_a(ADDR_WIDTH-1 downto 0)); - write(rline, string'(" VAL:")); - wrstlv(rline, tdata_a); - writeline(reports, rline); - end if; - if we_b = '1' then - write(rline, NAME); - write(rline, ADDR_WIDTH); - write(rline, string'(" WR_B:")); - wrstlv(rline, addr_b(ADDR_WIDTH-1 downto 0)); - write(rline, string'(" VAL:")); - wrstlv(rline, tdata_b); - writeline(reports, rline); - end if; - end if; - end process p3; - end generate dbg1; -end rtl; Index: src/sorter_sys.vhd =================================================================== --- src/sorter_sys.vhd (revision 4) +++ src/sorter_sys.vhd (nonexistent) @@ -1,290 +0,0 @@ -------------------------------------------------------------------------------- --- Title : Top entity of heap-sorter --- Project : heap-sorter -------------------------------------------------------------------------------- --- File : sorter_sys.vhd --- Author : Wojciech M. Zabolotny --- Company : --- Created : 2010-05-14 --- Last update: 2011-07-11 --- Platform : --- Standard : VHDL'93 -------------------------------------------------------------------------------- --- Description: -------------------------------------------------------------------------------- --- Copyright (c) 2010 Wojciech M. Zabolotny --- This file is published under the BSD license, so you can freely adapt --- it for your own purposes. --- Additionally this design has been described in my article --- Additionally this design has been described in my article: --- Wojciech M. Zabolotny, "Dual port memory based Heapsort implementation --- for FPGA", Proc. SPIE 8008, 80080E (2011); doi:10.1117/12.905281 --- I'd be glad if you cite this article when you publish something based --- on my design. -------------------------------------------------------------------------------- --- Revisions : --- Date Version Author Description --- 2010-05-14 1.0 wzab Created -------------------------------------------------------------------------------- - -library ieee; -use ieee.std_logic_1164.all; -use ieee.numeric_std.all; -use ieee.std_logic_textio.all; -use std.textio.all; -library work; -use work.sorter_pkg.all; -use work.sys_config.all; - -entity sorter_sys is - generic ( - NLEVELS : integer := SYS_NLEVELS -- number of levels in the sorter heap - ); - - port ( - din : in T_DATA_REC; - we : in std_logic; - dout : out T_DATA_REC; - dav : out std_logic; - clk : in std_logic; - rst_n : in std_logic; - ready : out std_logic); -end sorter_sys; - -architecture sorter_sys_arch1 of sorter_sys is - - component sort_dp_ram - generic ( - ADDR_WIDTH : natural; - NLEVELS : natural; - NAME : string); - port ( - clk : in std_logic; - addr_a : in std_logic_vector(NLEVELS-1 downto 0); - addr_b : in std_logic_vector(NLEVELS-1 downto 0); - data_a : in T_DATA_REC; - data_b : in T_DATA_REC; - we_a : in std_logic; - we_b : in std_logic; - q_a : out T_DATA_REC; - q_b : out T_DATA_REC); - end component; - - component sorter_ctrl - generic ( - NLEVELS : integer; - NADDRBITS : integer); - port ( - tm_din : in T_DATA_REC; - tm_dout : out T_DATA_REC; - tm_addr : out std_logic_vector(NLEVELS-1 downto 0); - tm_we : out std_logic; - lm_din : in T_DATA_REC; - lm_dout : out T_DATA_REC; - lm_addr : out std_logic_vector(NLEVELS-1 downto 0); - lm_we : out std_logic; - rm_din : in T_DATA_REC; - rm_dout : out T_DATA_REC; - rm_addr : out std_logic_vector(NLEVELS-1 downto 0); - rm_we : out std_logic; - up_in : in std_logic; - up_in_val : in T_DATA_REC; - up_in_addr : in std_logic_vector(NLEVELS-1 downto 0); - up_out : out std_logic; - up_out_val : out T_DATA_REC; - up_out_addr : out std_logic_vector(NLEVELS-1 downto 0); - low_out : out std_logic; - low_out_val : out T_DATA_REC; - low_out_addr : out std_logic_vector(NLEVELS-1 downto 0); - low_in : in std_logic; - low_in_val : in T_DATA_REC; - low_in_addr : in std_logic_vector(NLEVELS-1 downto 0); - clk : in std_logic; - clk_en : in std_logic; - ready_in : in std_logic; - ready_out : out std_logic; - rst_n : in std_logic); - end component; - - -- Create signals for address buses - -- Some of them will remain unused. - subtype T_SORT_BUS_ADDR is std_logic_vector(NLEVELS-1 downto 0); - type T_SORT_ADDR_BUSES is array (NLEVELS downto 0) of T_SORT_BUS_ADDR; - signal low_addr, up_addr, addr_dr, addr_dl, addr_u : T_SORT_ADDR_BUSES := (others => (others => '0')); - type T_SORT_DATA_BUSES is array (NLEVELS downto 0) of T_DATA_REC; - signal up_update_path, low_update_path, data_d, data_dl, data_dr, data_u : T_SORT_DATA_BUSES := (others => DATA_REC_INIT_DATA); - signal q_dr, q_dl, q_u, q_ul, q_ur : T_SORT_DATA_BUSES := (others => DATA_REC_INIT_DATA); - signal we_ul, we_ur, we_u, we_dl, we_dr, low_update, up_update, s_ready : std_logic_vector(NLEVELS downto 0) := (others => '0'); - signal addr_switch, addr_switch_del : std_logic_vector(NLEVELS downto 0); - signal l0_reg : T_DATA_REC; - signal clk_en : std_logic := '1'; - -begin -- sorter_sys_arch1 - --- Build the sorting tree - - g1 : for i in 0 to NLEVELS-1 generate - - -- Two RAMs from the upper level are seen as a single RAM - -- We use the most significant bit (i-th bit) to distinguish RAM - -- In all RAMs the A-ports are used for upstream connections - -- and the B-ports are used for downstream connections - - -- Below are processes used to combine two upstream RAMs in a single one - i0a : if i >= 1 generate - addr_switch(i) <= addr_u(i)(i-1); - end generate i0a; - i0b : if i = 0 generate - addr_switch(i) <= '0'; - end generate i0b; - - -- There is a problem with reading of data provided by two upstream RAMs - -- we need to multiplex the data... - -- Delay for read data multiplexer - s1 : process (clk, rst_n) - begin -- process s1 - if rst_n = '0' then -- asynchronous reset (active low) - addr_switch_del(i) <= '0'; - elsif clk'event and clk = '1' then -- rising clock edge - addr_switch_del(i) <= addr_switch(i); - end if; - end process s1; - - -- Upper RAM signals' multiplexer - c1 : process (addr_switch, addr_switch_del, q_ul, q_ur, we_u) - begin -- process c1 - we_ul(i) <= '0'; - we_ur(i) <= '0'; - if addr_switch(i) = '1' then - we_ul(i) <= we_u(i); - else - we_ur(i) <= we_u(i); - end if; - if addr_switch_del(i) = '1' then - q_u(i) <= q_ul(i); - else - q_u(i) <= q_ur(i); - end if; - end process c1; - - dp_ram_l : sort_dp_ram - generic map ( - NLEVELS => NLEVELS, - ADDR_WIDTH => i, - NAME => "L") - port map ( - clk => clk, - addr_a => addr_dl(i), - addr_b => addr_u(i+1), - data_a => data_dl(i), - data_b => data_u(i+1), - we_a => we_dl(i), - we_b => we_ul(i+1), - q_a => q_dl(i), - q_b => q_ul(i+1)); - - dp_ram_r : sort_dp_ram - generic map ( - NLEVELS => NLEVELS, - ADDR_WIDTH => i, - NAME => "R") - port map ( - clk => clk, - addr_a => addr_dr(i), - addr_b => addr_u(i+1), - data_a => data_dr(i), - data_b => data_u(i+1), - we_a => we_dr(i), - we_b => we_ur(i+1), - q_a => q_dr(i), - q_b => q_ur(i+1)); - - sorter_ctrl_1 : sorter_ctrl - generic map ( - NLEVELS => NLEVELS, - NADDRBITS => i) - port map ( - tm_din => q_u(i), - tm_dout => data_u(i), - tm_addr => addr_u(i), - tm_we => we_u(i), - lm_din => q_dl(i), - lm_dout => data_dl(i), - lm_addr => addr_dl(i), - lm_we => we_dl(i), - rm_din => q_dr(i), - rm_dout => data_dr(i), - rm_addr => addr_dr(i), - rm_we => we_dr(i), - up_in => up_update(i), - up_in_val => up_update_path(i), - up_in_addr => up_addr(i), - up_out => low_update(i), - up_out_val => low_update_path(i), - up_out_addr => low_addr(i), - low_in => low_update(i+1), - low_in_val => low_update_path(i+1), - low_in_addr => low_addr(i+1), - low_out => up_update(i+1), -- connections to the next level - low_out_val => up_update_path(i+1), - low_out_addr => up_addr(i+1), - clk => clk, - clk_en => clk_en, - ready_in => s_ready(i+1), - ready_out => s_ready(i), - rst_n => rst_n); - - end generate g1; - -- top level - - -- On the top level we have only a single register - process (clk, rst_n) - variable rline : line; - begin -- process - if rst_n = '0' then -- asynchronous reset (active low) - l0_reg <= DATA_REC_INIT_DATA; - elsif clk'event and clk = '1' then -- rising clock edge - dav <= '0'; - if we_u(0) = '1' then - l0_reg <= data_u(0); - dout <= data_u(0); - dav <= '1'; - if SORT_DEBUG then - write(rline, string'("OUT: ")); - write(rline, tdrec2stlv(data_u(0))); - writeline(reports, rline); - end if; - elsif we = '1' then - if SORT_DEBUG then - write(rline, string'("IN: ")); - write(rline, tdrec2stlv(din)); - writeline(reports, rline); - end if; - l0_reg <= din; - dout <= din; - else - dout <= l0_reg; - end if; - end if; - end process; - ready <= s_ready(0); - q_ur(0) <= l0_reg; - q_ul(0) <= l0_reg; - up_update(0) <= we; - up_update_path(0) <= din; - up_addr(0) <= (others => '0'); - - -- signals for the last level - - s_ready(NLEVELS) <= '1'; - --addr(NLEVELS) <= (others => '0'); - data_dr(NLEVELS) <= DATA_REC_INIT_DATA; - data_dl(NLEVELS) <= DATA_REC_INIT_DATA; - we_dl(NLEVELS) <= '0'; - we_dr(NLEVELS) <= '0'; - - low_update(NLEVELS) <= '0'; - low_update_path(NLEVELS) <= DATA_REC_INIT_DATA; - low_addr(0) <= (others => '0'); - -end sorter_sys_arch1; Index: src/dpram4_synth.vhd =================================================================== --- src/dpram4_synth.vhd (revision 4) +++ src/dpram4_synth.vhd (nonexistent) @@ -1,63 +0,0 @@ --- Dual port, single clock memory, inferrable in Xilinx and Altera FPGA - -library ieee; -use ieee.std_logic_1164.all; - -entity dp_ram_scl is - - generic - ( - DATA_WIDTH : natural := 8; - ADDR_WIDTH : natural := 6 - ); - - port - ( - clk : in std_logic; - addr_a : in natural range 0 to 2**ADDR_WIDTH - 1; - addr_b : in natural range 0 to 2**ADDR_WIDTH - 1; - data_a : in std_logic_vector((DATA_WIDTH-1) downto 0); - data_b : in std_logic_vector((DATA_WIDTH-1) downto 0); - we_a : in std_logic := '1'; - we_b : in std_logic := '1'; - q_a : out std_logic_vector((DATA_WIDTH -1) downto 0); - q_b : out std_logic_vector((DATA_WIDTH -1) downto 0) - ); - -end dp_ram_scl; - - -architecture rtl of dp_ram_scl is - - -- Create a type for data word - subtype data_word is std_logic_vector((DATA_WIDTH-1) downto 0); - type ram_memory is array((2**ADDR_WIDTH-1) downto 0) of data_word; - - -- Declare the RAM variable. - shared variable ram : ram_memory; - -begin - - process(clk) - begin - if(rising_edge(clk)) then - -- Port B - if(we_b = '1') then - ram(addr_b) := data_b; - end if; - q_b <= ram(addr_b); - end if; - end process; - - process(clk) - begin - if(rising_edge(clk)) then - -- Port A - if(we_a = '1') then - ram(addr_a) := data_a; - end if; - q_a <= ram(addr_a); - end if; - end process; - -end rtl; Index: src/dpram_inf.vhd =================================================================== --- src/dpram_inf.vhd (revision 4) +++ src/dpram_inf.vhd (nonexistent) @@ -1,60 +0,0 @@ --- A parameterized, inferable, true dual-port, common-clock block RAM in VHDL. --- Original file was taken from: http://danstrother.com/2010/09/11/inferring-rams-in-fpgas/ --- No license information were provided by the original author. --- Minimal modifications were introduced by me to make it suitable for my sorter. - -library ieee; -use ieee.std_logic_1164.all; -use ieee.std_logic_unsigned.all; - -entity dp_ram_scl is - generic ( - DATA_WIDTH : integer := 72; - ADDR_WIDTH : integer := 10 - ); - port ( --- common clock - clk : in std_logic; - -- Port A - we_a : in std_logic; - addr_a : in std_logic_vector(ADDR_WIDTH-1 downto 0); - data_a : in std_logic_vector(DATA_WIDTH-1 downto 0); - q_a : out std_logic_vector(DATA_WIDTH-1 downto 0); - - -- Port B - we_b : in std_logic; - addr_b : in std_logic_vector(ADDR_WIDTH-1 downto 0); - data_b : in std_logic_vector(DATA_WIDTH-1 downto 0); - q_b : out std_logic_vector(DATA_WIDTH-1 downto 0) - ); -end dp_ram_scl; - -architecture rtl of dp_ram_scl is - -- Shared memory - type mem_type is array ((2**ADDR_WIDTH)-1 downto 0) of std_logic_vector(DATA_WIDTH-1 downto 0); - shared variable mem : mem_type; -begin - --- Port A - process(clk) - begin - if(clk'event and clk = '1') then - if(we_a = '1') then - mem(conv_integer(addr_a)) := data_a; - end if; - q_a <= mem(conv_integer(addr_a)); - end if; - end process; - --- Port B - process(clk) - begin - if(clk'event and clk = '1') then - if(we_b = '1') then - mem(conv_integer(addr_b)) := data_b; - end if; - q_b <= mem(conv_integer(addr_b)); - end if; - end process; - -end rtl; Index: src/sorter_pkg.vhd =================================================================== --- src/sorter_pkg.vhd (revision 4) +++ src/sorter_pkg.vhd (nonexistent) @@ -1,214 +0,0 @@ -------------------------------------------------------------------------------- --- Title : Definitions for heap-sorter --- Project : heap-sorter -------------------------------------------------------------------------------- --- File : sorter_pkg.vhd --- Author : Wojciech M. Zabolotny --- Company : --- Created : 2010-05-14 --- Last update: 2011-07-11 --- Platform : --- Standard : VHDL'93 -------------------------------------------------------------------------------- --- Description: -------------------------------------------------------------------------------- --- Copyright (c) 2010 Wojciech M. Zabolotny --- This file is published under the BSD license, so you can freely adapt --- it for your own purposes. --- Additionally this design has been described in my article: --- Wojciech M. Zabolotny, "Dual port memory based Heapsort implementation --- for FPGA", Proc. SPIE 8008, 80080E (2011); doi:10.1117/12.905281 --- I'd be glad if you cite this article when you publish something based --- on my design. -------------------------------------------------------------------------------- --- Revisions : --- Date Version Author Description --- 2010-05-14 1.0 wzab Created -------------------------------------------------------------------------------- -library ieee; -use ieee.std_logic_1164.all; -use ieee.numeric_std.all; -use ieee.std_logic_textio.all; -use std.textio.all; -library work; -use work.sys_config.all; - -package sorter_pkg is - constant DATA_REC_WIDTH : integer := DATA_REC_SORT_KEY_WIDTH + - DATA_REC_PAYLOAD_WIDTH + 2; - - - subtype T_SORT_KEY is unsigned (DATA_REC_SORT_KEY_WIDTH - 1 downto 0); - subtype T_PAYLOAD is std_logic_vector(DATA_REC_PAYLOAD_WIDTH - 1 downto 0); - - --alias T_SORT_KEY is unsigned (12 downto 0); - type T_DATA_REC is record - d_key : T_SORT_KEY; - init : std_logic; - valid : std_logic; - d_payload : T_PAYLOAD; - end record; - - -- Special constant used to initially fill the sorter - -- Must be sorted so, that is smaller, than any other data - constant DATA_REC_INIT_DATA : T_DATA_REC := ( - d_key => to_unsigned(0, DATA_REC_SORT_KEY_WIDTH), - init => '1', - valid => '0', - d_payload => (others => '0') - ); - - -- Special constant used to ``flush'' the sorter at the end - constant DATA_REC_END_DATA : T_DATA_REC := ( - d_key => to_unsigned(0, DATA_REC_SORT_KEY_WIDTH), - init => '1', - valid => '1', - d_payload => (others => '0') - ); - - - function sort_cmp_lt ( - constant v1 : T_DATA_REC; - constant v2 : T_DATA_REC) - return boolean; - - function tdrec2stlv ( - constant drec : T_DATA_REC) - return std_logic_vector; - - function stlv2tdrec ( - constant dstlv : std_logic_vector) - return T_DATA_REC; - - procedure wrstlv ( - rline : inout line; - constant vect : std_logic_vector); - - file reports : text open write_mode is "STD_OUTPUT"; - -end sorter_pkg; - -package body sorter_pkg is - - function stlv2tdrec ( - constant dstlv : std_logic_vector) - return T_DATA_REC is - variable result : T_DATA_REC; - variable j : integer := 0; - begin -- stlv2drec - j := 0; - result.d_key := unsigned(dstlv(j-1+DATA_REC_SORT_KEY_WIDTH downto j)); - j := j+DATA_REC_SORT_KEY_WIDTH; - result.valid := dstlv(j); - j := j+1; - result.init := dstlv(j); - j := j+1; - result.d_payload := dstlv(j-1+DATA_REC_PAYLOAD_WIDTH downto j); - j := j+DATA_REC_PAYLOAD_WIDTH; - return result; - end stlv2tdrec; - - function tdrec2stlv ( - constant drec : T_DATA_REC) - return std_logic_vector is - variable result : std_logic_vector(DATA_REC_WIDTH-1 downto 0); - variable j : integer := 0; - begin -- tdrec2stlv - j := 0; - result(j-1+DATA_REC_SORT_KEY_WIDTH downto j) := std_logic_vector(drec.d_key); - j := j+DATA_REC_SORT_KEY_WIDTH; - result(j) := drec.valid; - j := j+1; - result(j) := drec.init; - j := j+1; - result(j-1+DATA_REC_PAYLOAD_WIDTH downto j) := std_logic_vector(drec.d_payload); - j := j+DATA_REC_PAYLOAD_WIDTH; - return result; - end tdrec2stlv; - - - -- Function sort_cmp_lt returns TRUE when the first opperand is ``less'' than - -- the second one - function sort_cmp_lt ( - constant v1 : T_DATA_REC; - constant v2 : T_DATA_REC) - return boolean is - variable rline : line; - variable dcomp : unsigned(DATA_REC_SORT_KEY_WIDTH-1 downto 0) := (others => '0'); - begin -- sort_cmp_lt - -- Check the special cases - if (v1.init = '1') and (v2.init = '0') then - -- v1 is the special record, v2 is the standard one - if v1.valid = '0' then - -- initialization record - ``smaller'' than all standard records - return true; - else - -- end record - ``bigger'' than all standard records - return false; - end if; - elsif (v1.init = '0') and (v2.init = '1') then - -- v2 is the special record, v1 is the standard one - if (v2.valid = '0') then - -- v2 is the initialization record - it is ``smaller'' than standard record v1 - return false; - else - -- v2 is the end record - it is ``bigger'' than standard record v1 - return true; - end if; - elsif (v1.init = '1') and (v2.init = '1') then - -- both v1 and v2 are special records - if (v1.valid = '0') and (v2.valid = '1') then - -- v1 - initial record, v2 - end record - return true; - else - -- v1 is end record, so it is ``bigger'' or ``equal'' to other records - return false; - end if; - elsif (v1.init = '0') and (v2.init = '0') then - -- We compare standard words - -- We must consider the fact, that in longer sequences of data records - -- the sort keys may wrap around - -- therefore we perform subtraction modulo - -- 2**DATA_REC_SORT_KEY_WIDTH and check the MSB - dcomp := v1.d_key-v2.d_key; - if dcomp(DATA_REC_SORT_KEY_WIDTH-1) = '1' then - --if signed(v1.d_key - v2.d_key)<0 then -- old implementation - return true; - elsif v2.d_key = v1.d_key then - if v2.valid = '1' then - return true; - else - -- Empty data records should wait - return false; - end if; - else - return false; - end if; - else - assert false report "Wrong records in sort_cmp_lt" severity error; - return false; - end if; - return false; -- should never happen - end sort_cmp_lt; - - - procedure wrstlv ( - rline : inout line; - constant vect : std_logic_vector) is - begin -- stlv2str - for i in vect'left downto vect'right loop - case vect(i) is - when 'U' => write(rline, string'("u")); - when 'Z' => write(rline, string'("z")); - when 'X' => write(rline, string'("x")); - when 'L' => write(rline, string'("L")); - when 'H' => write(rline, string'("H")); - when '1' => write(rline, string'("1")); - when '0' => write(rline, string'("0")); - when others => write(rline, string'("?")); - end case; - end loop; -- i - end wrstlv; - -end sorter_pkg; - Index: src/sorter_sys_tb.vhd =================================================================== --- src/sorter_sys_tb.vhd (revision 4) +++ src/sorter_sys_tb.vhd (nonexistent) @@ -1,158 +0,0 @@ -------------------------------------------------------------------------------- --- Title : Testbench for design "heap-sorter" --- Project : heap-sorter -------------------------------------------------------------------------------- --- File : sorter_sys_tb.vhd --- Author : Wojciech M. Zabolotny --- Company : --- Created : 2010-05-14 --- Last update: 2011-07-06 --- Platform : --- Standard : VHDL'93 -------------------------------------------------------------------------------- --- Description: -------------------------------------------------------------------------------- --- Copyright (c) 2010 Wojciech M. Zabolotny --- This file is published under the BSD license, so you can freely adapt --- it for your own purposes. --- Additionally this design has been described in my article: --- Wojciech M. Zabolotny, "Dual port memory based Heapsort implementation --- for FPGA", Proc. SPIE 8008, 80080E (2011); doi:10.1117/12.905281 --- I'd be glad if you cite this article when you publish something based --- on my design. -------------------------------------------------------------------------------- --- Revisions : --- Date Version Author Description --- 2010-05-14 1.0 wzab Created -------------------------------------------------------------------------------- - -library ieee; -use ieee.std_logic_1164.all; -use ieee.numeric_std.all; -use ieee.std_logic_textio.all; -use std.textio.all; -library work; -use work.sys_config.all; -use work.sorter_pkg.all; - - -------------------------------------------------------------------------------- - -entity sorter_sys_tb is - -end entity sorter_sys_tb; - -------------------------------------------------------------------------------- - -architecture sort_tb_beh of sorter_sys_tb is - - constant NLEVELS : integer := SYS_NLEVELS; - -- component ports - signal din : T_DATA_REC := DATA_REC_INIT_DATA; - signal dout : T_DATA_REC; - signal we : std_logic := '0'; - signal dav : std_logic := '0'; - signal rst_n : std_logic := '0'; - signal ready : std_logic := '0'; - - component sorter_sys - generic ( - NADDRBITS : integer); - port ( - din : in T_DATA_REC; - we : in std_logic; - dout : out T_DATA_REC; - dav : out std_logic; - clk : in std_logic; - rst_n : in std_logic; - ready : out std_logic); - end component; - -- clock - signal Clk : std_logic := '1'; - - signal end_sim : boolean := false; - signal div : integer range 0 to 8 := 0; - -begin -- architecture sort_tb_beh - - -- component instantiation - DUT : entity work.sorter_sys - generic map ( - NLEVELS => NLEVELS) - port map ( - din => din, - we => we, - dout => dout, - dav => dav, - clk => clk, - rst_n => rst_n, - ready => ready); - - -- clock generation - Clk <= not Clk after 10 ns when end_sim = false else '0'; - - -- waveform generation - WaveGen_Proc : process - file events_in : text open read_mode is "events.in"; - variable input_line : line; - file events_out : text open write_mode is "events.out"; - variable output_line : line; - variable rec : T_DATA_REC; - variable skey : std_logic_vector(DATA_REC_SORT_KEY_WIDTH-1 downto 0); - variable spayload : std_logic_vector(DATA_REC_PAYLOAD_WIDTH-1 downto 0); - begin - -- insert signal assignments here - - wait until Clk = '1'; - wait for 31 ns; - rst_n <= '1'; - wait until ready = '1'; - loop - wait until Clk = '0'; - wait until Clk = '1'; - we <= '0'; - if div = 3 then - div <= 0; - exit when endfile(events_in); - readline(events_in, input_line); - read(input_line, rec.init); - read(input_line, rec.valid); - read(input_line, skey); - read(input_line, spayload); - rec.d_key := unsigned(skey); - rec.d_payload := spayload; - din <= rec; - we <= '1'; - else - div <= div+1; - end if; - if dav = '1' then - -- Process read event - rec := dout; - write(output_line, rec.init); - write(output_line, rec.valid); - write(output_line,string'(" ")); - write(output_line, std_logic_vector(rec.d_key)); - write(output_line,string'(" ")); - write(output_line, std_logic_vector(rec.d_payload)); - writeline(events_out, output_line); - end if; - end loop; - end_sim <= true; - rec.valid := '0'; - din <= rec; - wait; - end process WaveGen_Proc; - - - -end architecture sort_tb_beh; - -------------------------------------------------------------------------------- - -configuration sorter_sys_tb_sort_tb_beh_cfg of sorter_sys_tb is - for sort_tb_beh - end for; -end sorter_sys_tb_sort_tb_beh_cfg; - -------------------------------------------------------------------------------- Index: src/sorter_ctrl.vhd =================================================================== --- src/sorter_ctrl.vhd (revision 4) +++ src/sorter_ctrl.vhd (nonexistent) @@ -1,324 +0,0 @@ -------------------------------------------------------------------------------- --- Title : Sorting node controller for heap-sorter --- Project : heap-sorter -------------------------------------------------------------------------------- --- File : sorter_ctrl.vhd --- Author : Wojciech M. Zabolotny --- Company : --- Created : 2010-05-14 --- Last update: 2013-07-04 --- Platform : --- Standard : VHDL'93 -------------------------------------------------------------------------------- --- Description: -------------------------------------------------------------------------------- --- Copyright (c) 2010 Wojciech M. Zabolotny --- This file is published under the BSD license, so you can freely adapt --- it for your own purposes. --- Additionally this design has been described in my article: --- Wojciech M. Zabolotny, "Dual port memory based Heapsort implementation --- for FPGA", Proc. SPIE 8008, 80080E (2011); doi:10.1117/12.905281 --- I'd be glad if you cite this article when you publish something based --- on my design. -------------------------------------------------------------------------------- --- Revisions : --- Date Version Author Description --- 2010-05-14 1.0 wzab Created -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- --- The sorter controller is connected with three dual port memories. --- The first dual port memory tm_... provides the "upstream data" --- The second dual port memory lm_... provides the "left branch of downstream data" --- The third dual port memory rm_... provides the "right branch of downstream data" --- The controller is notified about availability of the new data by the --- "update" signal. --- However in this architecture we need to service two upstream memories! --- That's because we want to save one cycle, and to be able to issue --- --- Important feature of each controller is the ability to clear the memory --- after reset. -------------------------------------------------------------------------------- -library ieee; -use ieee.std_logic_1164.all; -use ieee.numeric_std.all; -use ieee.std_logic_textio.all; -use std.textio.all; -library work; -use work.sorter_pkg.all; -use work.sys_config.all; - -entity sorter_ctrl is - - generic ( - NLEVELS : integer; -- number of levels (max number of - -- address bits - NADDRBITS : integer -- number of used address bits - ); - - port ( - -- Top memory connections - tm_din : in T_DATA_REC; - tm_dout : out T_DATA_REC; - tm_addr : out std_logic_vector(NLEVELS-1 downto 0); - tm_we : out std_logic; - -- Left memory connections - lm_din : in T_DATA_REC; - lm_dout : out T_DATA_REC; - lm_addr : out std_logic_vector(NLEVELS-1 downto 0); - lm_we : out std_logic; - -- Right memory connections - rm_din : in T_DATA_REC; - rm_dout : out T_DATA_REC; - rm_addr : out std_logic_vector(NLEVELS-1 downto 0); - rm_we : out std_logic; - -- Upper level controller connections - up_in : in std_logic; - up_in_val : in T_DATA_REC; - up_in_addr : in std_logic_vector(NLEVELS-1 downto 0); - -- Upper level update notifier - up_out : out std_logic; - up_out_val : out T_DATA_REC; - up_out_addr : out std_logic_vector(NLEVELS-1 downto 0); - -- Lower level controller connections - low_out : out std_logic; - low_out_val : out T_DATA_REC; - low_out_addr : out std_logic_vector(NLEVELS-1 downto 0); - low_in : in std_logic; - low_in_val : in T_DATA_REC; - low_in_addr : in std_logic_vector(NLEVELS-1 downto 0); - -- Lower level update notifier - -- System connections - clk : in std_logic; - clk_en : in std_logic; - ready_in : in std_logic; - ready_out : out std_logic; -- signals, when memory is cleared - -- after reset - rst_n : in std_logic); -end sorter_ctrl; - -architecture sorter_ctrl_arch1 of sorter_ctrl is - - type T_CTRL_STATE is (CTRL_RESET, CTRL_CLEAR, CTRL_IDLE, CTRL_S1, CTRL_S0); - signal ctrl_state, ctrl_state_next : T_CTRL_STATE := CTRL_IDLE; - signal addr, addr_i : std_logic_vector(NLEVELS-1 downto 0); - signal s_low_in_addr, s_low_in_addr_i : std_logic_vector(NLEVELS-1 downto 0); - signal s_up_in_addr, s_up_in_addr_i : std_logic_vector(NLEVELS-1 downto 0); - signal s_ready_out, s_ready_out_i : std_logic; - signal s_low_in, s_low_in_i : std_logic; - signal s_addr_out : std_logic_vector(NLEVELS-1 downto 0); - signal s_tm_dout : T_DATA_REC; - signal s_up_in_val_i, s_up_in_val : T_DATA_REC := DATA_REC_INIT_DATA; - signal s_low_in_val_i, s_low_in_val : T_DATA_REC := DATA_REC_INIT_DATA; - - - constant ADDR_MAX : std_logic_vector(NLEVELS-1 downto 0) := std_logic_vector(to_unsigned(2**NADDRBITS-1, NLEVELS)); - -begin - - tm_dout <= s_tm_dout; --- We have the two-process state machine. - p1 : process (addr, ctrl_state, lm_din, low_in, low_in_addr, low_in_val, - ready_in, rm_din, s_addr_out, s_low_in, s_low_in_addr, - s_low_in_val, s_ready_out, s_up_in_val, up_in, up_in_addr, - up_in_val) - variable rline : line; - variable l_val : T_DATA_REC; - variable r_val : T_DATA_REC; - - begin -- process p1 - -- defaults - ctrl_state_next <= ctrl_state; - tm_we <= '0'; - rm_we <= '0'; - lm_we <= '0'; - lm_addr <= (others => '0'); - rm_addr <= (others => '0'); - tm_addr <= (others => '0'); - s_ready_out_i <= s_ready_out; - addr_i <= addr; - up_out_val <= DATA_REC_INIT_DATA; -- to avoid latches - low_out_val <= DATA_REC_INIT_DATA; -- to avoid latches - s_low_in_addr_i <= s_low_in_addr; - s_low_in_i <= low_in; - low_out <= '0'; - up_out <= '0'; - up_out_addr <= (others => '0'); - s_up_in_val_i <= s_up_in_val; - s_low_in_val_i <= s_low_in_val; - lm_dout <= DATA_REC_INIT_DATA; - rm_dout <= DATA_REC_INIT_DATA; - s_tm_dout <= DATA_REC_INIT_DATA; - s_addr_out <= (others => '0'); - case ctrl_state is - when CTRL_RESET => - addr_i <= (others => '0'); - s_ready_out_i <= '0'; - ctrl_state_next <= CTRL_CLEAR; - when CTRL_CLEAR => - lm_addr <= addr; - rm_addr <= addr; - lm_dout <= DATA_REC_INIT_DATA; - rm_dout <= DATA_REC_INIT_DATA; - lm_we <= '1'; - rm_we <= '1'; - if addr = ADDR_MAX then - if ready_in = '1' then - s_ready_out_i <= '1'; - ctrl_state_next <= CTRL_IDLE; - end if; - else - addr_i <= std_logic_vector(unsigned(addr)+1); - end if; - when CTRL_IDLE => - -- We read "down" memories ("upper" value is provided by the ``bypass channel'') - if up_in = '1' then - ctrl_state_next <= CTRL_S1; - tm_addr <= up_in_addr; - lm_addr <= up_in_addr; - rm_addr <= up_in_addr; - addr_i <= up_in_addr; - s_up_in_val_i <= up_in_val; - if low_in = '1' then - s_low_in_val_i <= low_in_val; - s_low_in_addr_i <= low_in_addr; - end if; - end if; - when CTRL_S1 => - -- In this cycle we can compare data - -- Debug output! - if SORT_DEBUG then - write(rline, string'("CMP ")); - write(rline, NADDRBITS); - write(rline, string'(" U:")); - wrstlv(rline, tdrec2stlv(s_up_in_val)); - end if; - l_val := lm_din; - r_val := rm_din; - -- Check, if we need to take value from lower ``bypass channel'' - if s_low_in = '1' then - if SORT_DEBUG then - write(rline, string'(" x! ")); - end if; - if (addr(NADDRBITS-1 downto 0) = s_low_in_addr(NADDRBITS-1 downto 0)) then - -- We are reading a value which was just updated, so we need to get it - -- from ``bypass channel'' instead of memory - if SORT_DEBUG then - write(rline, string'(" y! ")); - end if; - if s_low_in_addr(NADDRBITS) = '1' then - l_val := s_low_in_val; - else - r_val := s_low_in_val; - end if; - end if; - end if; - if SORT_DEBUG then - write(rline, string'(" L:")); - wrstlv(rline, tdrec2stlv(l_val)); - write(rline, string'(" R:")); - wrstlv(rline, tdrec2stlv(r_val)); - write(rline, string'(" A:")); - end if; - if sort_cmp_lt(l_val, s_up_in_val) and sort_cmp_lt(l_val, r_val) then - -- The L-ram value is the smallest - -- Output the value from the L-ram and put the new value into the L-ram - s_tm_dout <= l_val; - tm_addr <= addr; - tm_we <= '1'; - - up_out_val <= l_val; - up_out <= '1'; - up_out_addr <= addr; - - lm_addr <= addr; - lm_dout <= s_up_in_val; - lm_we <= '1'; - - low_out <= '1'; - low_out_val <= s_up_in_val; - s_addr_out(NADDRBITS) <= '1'; - - if NADDRBITS > 0 then - s_addr_out(NADDRBITS-1 downto 0) <= addr(NADDRBITS-1 downto 0); - end if; - wrstlv(rline, s_addr_out); - ctrl_state_next <= CTRL_IDLE; - if SORT_DEBUG then - write(rline, string'(" T<->L")); - end if; - elsif sort_cmp_lt(r_val, s_up_in_val) then - -- The R-ram value is the smallest - -- Output the value from the R-ram and put the new value into the R-ram - s_tm_dout <= r_val; - tm_addr <= addr; - tm_we <= '1'; - - up_out_val <= r_val; - up_out <= '1'; - up_out_addr <= addr; - - rm_addr <= addr; - rm_dout <= s_up_in_val; - rm_we <= '1'; - - low_out <= '1'; - low_out_val <= s_up_in_val; - - s_addr_out(NADDRBITS) <= '0'; - if NADDRBITS > 0 then - s_addr_out(NADDRBITS-1 downto 0) <= addr(NADDRBITS-1 downto 0); - end if; - ctrl_state_next <= CTRL_IDLE; - if SORT_DEBUG then - wrstlv(rline, s_addr_out); - write(rline, string'(" T<->R")); - end if; - else - -- The new value is the smallest - -- Nothing to do, no update downstream - s_tm_dout <= s_up_in_val; - tm_we <= '1'; - tm_addr <= addr; - - up_out_val <= s_up_in_val; - up_out <= '1'; - up_out_addr <= addr; - - ctrl_state_next <= CTRL_IDLE; - wrstlv(rline, up_in_addr); - if SORT_DEBUG then - write(rline, string'(" T===T")); - end if; - end if; - if SORT_DEBUG then - writeline(reports, rline); - end if; - when others => null; - end case; - end process p1; - - p2 : process (clk, rst_n) is - begin -- process p2 - if rst_n = '0' then -- asynchronous reset (active low) - ctrl_state <= CTRL_RESET; - s_ready_out <= '0'; - addr <= (others => '0'); - s_low_in_addr <= (others => '0'); - s_low_in <= '0'; - s_low_in_val <= DATA_REC_INIT_DATA; - s_up_in_val <= DATA_REC_INIT_DATA; - --update_out <= '0'; - --addr_out <= (others => '0'); - elsif clk'event and clk = '1' then -- rising clock edge - s_ready_out <= s_ready_out_i; - ctrl_state <= ctrl_state_next; - addr <= addr_i; - s_low_in_addr <= s_low_in_addr_i; - s_low_in_val <= s_low_in_val_i; - s_up_in_val <= s_up_in_val_i; - s_low_in <= s_low_in_i; - end if; - end process p2; - ready_out <= s_ready_out; - low_out_addr <= s_addr_out; -end sorter_ctrl_arch1; Index: comp/dummy.txt =================================================================== --- comp/dummy.txt (revision 4) +++ comp/dummy.txt (nonexistent) @@ -1 +0,0 @@ -This file is here to ensure creation of comp directory \ No newline at end of file Index: sort_test_gen.py =================================================================== --- sort_test_gen.py (revision 4) +++ sort_test_gen.py (nonexistent) @@ -1,70 +0,0 @@ -#!/usr/bin/python -# -# This Python script generates the input patterns for the sorter -# It also generates the sys_config.vhd file with constants -# describing structure of data records -# -# You can customize constants below -key_width = 8 # Width of the key part of the record -pay_width = 4 # Width of the payload part of the record -max_dist = 63 # Maximum distance between unsorted records -seq_len = 2000 # Length of the generated sequence -sort_debug = "true" #uncomment this, or the next line -sort_debug = "false" #alwayse set sort_debug to false for synthesis! -# -# The algorithm is very simple - we just generate the sequence -# of records with continuously increasing key numbers -# Then we increase the key number in each record by the random -# number, taken from the range: [0,max_dist] -import math -import sys -# calculate necessary amount of levels in the sorter -sys_nlevels=1 -nrec=4 -while nrec ((1<<(key_width-1))-1): - print "Too high maximum distance between unsorted records" - print "for defined width of the sort key. Please increase" - print "the key_width value in the sort_test_gen.py file!" - sys.exit(1) -# Then we prepare the VHDL file with system configuration -sc=open('src/sys_config.vhd','w') -l="library ieee;\n" -l+="use ieee.std_logic_1164.all;\n" -l+="library work;\n" -l+="package sys_config is\n" -l+=" constant SORT_DEBUG : boolean :="+sort_debug+";\n" -l+=" constant SYS_NLEVELS : integer :="+str(sys_nlevels)+";\n" -l+=" constant DATA_REC_SORT_KEY_WIDTH : integer :="+str(key_width)+";\n" -l+=" constant DATA_REC_PAYLOAD_WIDTH : integer :="+str(pay_width)+";\n" -l+="end sys_config;\n" -sc.write(l) -sc.close() -# Generate the input patterns -fo=open('events.in','w') -t=range(1,seq_len+1) -import random -r=random.Random() -r.seed() -t2=[i+r.randint(0,max_dist) for i in t] -# Now let's prepare the input events: -key_format="{0:0"+str(key_width)+"b}" -pay_format="{0:0"+str(pay_width)+"b}" -for i in t2: - j = i & ((1<
sort_test_gen.py Property changes : Deleted: svn:executable ## -1 +0,0 ## -* \ No newline at end of property Index: makefile =================================================================== --- makefile (revision 4) +++ makefile (nonexistent) @@ -1,31 +0,0 @@ -VHDLS = \ - src/sys_config.vhd \ - src/sorter_pkg.vhd \ - src/dpram4.vhd \ - src/sort_dpram.vhd \ - src/sorter_ctrl.vhd \ - src/sorter_sys.vhd \ - src/sorter_sys_tb.vhd \ - -#STD=standard -STD=synopsys -VSTD=93c -ENTITY=sorter_sys_tb -RUN_OPTIONS= -all: events.in events.out test -events.in: sort_test_gen.py - ./sort_test_gen.py -reader: ${ENTITY} ${ENTITY}.ghw - gtkwave ${ENTITY}.ghw ${ENTITY}.sav -${ENTITY}: ${VHDLS} -# vhdlp -work fmf fmf/*.vhd - ghdl -a --workdir=comp --std=${VSTD} --ieee=${STD} ${VHDLS} - ghdl -e --workdir=comp --std=${VSTD} -fexplicit --ieee=${STD} ${ENTITY} -events.out: ${ENTITY} events.in -# ./${ENTITY} --wave=${ENTITY}.ghw ${RUN_OPTIONS} --stop-time=50000ns 2>&1 > res.txt - ./${ENTITY} ${RUN_OPTIONS} 2>&1 > res.txt -test: - ./sort_test_check.py -clean: - rm -f comp/* *.o *.vcd *.ghw events* ${ENTITY} - Index: high_speed_pipelined_4clk_per_word/README =================================================================== --- high_speed_pipelined_4clk_per_word/README (nonexistent) +++ high_speed_pipelined_4clk_per_word/README (revision 5) @@ -0,0 +1,5 @@ +This version is prepared for very high-speed setups. +The memory uses additional output register. +The comparator also uses additional pipeline register. +The sorter user 4 clock cycles per word, but clock frequency may be +much higher. Index: high_speed_pipelined_4clk_per_word/comp/dummy =================================================================== --- high_speed_pipelined_4clk_per_word/comp/dummy (nonexistent) +++ high_speed_pipelined_4clk_per_word/comp/dummy (revision 5) @@ -0,0 +1 @@ +OK Index: high_speed_pipelined_4clk_per_word/makefile =================================================================== --- high_speed_pipelined_4clk_per_word/makefile (nonexistent) +++ high_speed_pipelined_4clk_per_word/makefile (revision 5) @@ -0,0 +1,39 @@ +VHDLS = \ + src/sys_config.vhd \ + src/sorter_pkg.vhd \ + src/dpram4.vhd \ + src/dp_ram_scl_sorter.vhd \ + src/dp_ram_scl_sorter_distributed.vhd \ + src/sort_dpram.vhd \ + src/sorter_ctrl.vhd \ + src/sorter_sys.vhd \ + src/sorter_sys_tb.vhd \ + +#STD=standard +STD=synopsys +VSTD=93c +ENTITY=sorter_sys_tb +RUN_OPTIONS= +all: events.in events.out test +events.in: sort_test_gen.py + ./sort_test_gen.py +reader: ${ENTITY} ${ENTITY}.ghw + gtkwave ${ENTITY}.ghw #${ENTITY}.sav +${ENTITY}.ghw: all + ghdl -i --workdir=comp --std=${VSTD} -fexplicit --ieee=${STD} ${ENTITY} + ghdl -m --workdir=comp --std=${VSTD} -fexplicit --ieee=${STD} ${ENTITY} + ./${ENTITY} --wave=${ENTITY}.ghw + # ghdl -r ${ENTITY} --vcd=${ENTITY}.vcd + +${ENTITY}: ${VHDLS} +# vhdlp -work fmf fmf/*.vhd + ghdl -a --workdir=comp --std=${VSTD} --ieee=${STD} ${VHDLS} + ghdl -e --workdir=comp --std=${VSTD} -fexplicit --ieee=${STD} ${ENTITY} +events.out: ${ENTITY} events.in +# ./${ENTITY} --wave=${ENTITY}.ghw ${RUN_OPTIONS} --stop-time=200000ns 2>&1 > res.txt + ./${ENTITY} ${RUN_OPTIONS} 2>&1 > res.txt +test: + ./sort_test_check.py +clean: + rm -f comp/* *.o *.vcd *.ghw events* ${ENTITY} + Index: high_speed_pipelined_4clk_per_word/sort_test_check.py =================================================================== --- high_speed_pipelined_4clk_per_word/sort_test_check.py (nonexistent) +++ high_speed_pipelined_4clk_per_word/sort_test_check.py (revision 5) @@ -0,0 +1,46 @@ +#!/usr/bin/python +# This Python script checks if the records were sorted +# correctly... +import sys +# We read the input records and store them in one vector +fi=open("events.in","r") +ri=fi.read().split("\n") +ri=[i.split(" ") for i in ri] +# Leave only valid records +ri=[i for i in ri if len(i)==3 and i[0]=="01"] +# We read the output vectors and store them in a second vector +fo=open("events.out","r") +ro=fo.read().split("\n") +ro=[i.split(" ") for i in ro ] +# Leave only valid records +ro=[i for i in ro if len(i)==3 and i[0]=="01"] +# We check if the output vectors are correctly sorted +for i in range(1,len(ro)): + # Theoretically we could simply check the condition: + # int(ro[i-1][1],2) <= int(ro[i][1],2) + # However for longer sequences we may need to + # consider the fact that sort keys (time stamps) + # will wrap around. + # Therefore we need to perform slightly more + # complicated test - if we use N bits to store + # the sort key, then we need to subtract keys modulo + # 2**N and if the difference is in range (0,2**(N-1)] + # we consider the difference positive, while in range + # (2**(N-1),(2**N)-1) we consider it negative. + k1 = ro[i-1][1] + k2 = ro[i][1] + dlim = 1< dlim/2: + print "Records unsorted!\n" + print str(i-1)+": "+str(ro[i-1]) + print str(i)+": "+str(ro[i]) + sys.exit(1) +# We check if all input vectors were transferred to the output +# Now we only check size of vectors +if len(ro) != len(ri): + print "Not all records transferred!\n" + sys.exit(1) +print "Test passed!\n" +sys.exit(0) +
high_speed_pipelined_4clk_per_word/sort_test_check.py Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: high_speed_pipelined_4clk_per_word/sort_test_gen.py =================================================================== --- high_speed_pipelined_4clk_per_word/sort_test_gen.py (nonexistent) +++ high_speed_pipelined_4clk_per_word/sort_test_gen.py (revision 5) @@ -0,0 +1,70 @@ +#!/usr/bin/python +# +# This Python script generates the input patterns for the sorter +# It also generates the sys_config.vhd file with constants +# describing structure of data records +# +# You can customize constants below +key_width = 18 # Width of the key part of the record +pay_width = 18 # Width of the payload part of the record +max_dist = 1024 # Maximum distance between unsorted records +seq_len = 10000 # Length of the generated sequence +# sort_debug = "true" #uncomment this, or the next line +sort_debug = "false" #alwayse set sort_debug to false for synthesis! +# +# The algorithm is very simple - we just generate the sequence +# of records with continuously increasing key numbers +# Then we increase the key number in each record by the random +# number, taken from the range: [0,max_dist] +import math +import sys +# calculate necessary amount of levels in the sorter +sys_nlevels=1 +nrec=4 +while nrec ((1<<(key_width-1))-1): + print "Too high maximum distance between unsorted records" + print "for defined width of the sort key. Please increase" + print "the key_width value in the sort_test_gen.py file!" + sys.exit(1) +# Then we prepare the VHDL file with system configuration +sc=open('src/sys_config.vhd','w') +l="library ieee;\n" +l+="use ieee.std_logic_1164.all;\n" +l+="library work;\n" +l+="package sys_config is\n" +l+=" constant SORT_DEBUG : boolean :="+sort_debug+";\n" +l+=" constant SYS_NLEVELS : integer :="+str(sys_nlevels)+";\n" +l+=" constant DATA_REC_SORT_KEY_WIDTH : integer :="+str(key_width)+";\n" +l+=" constant DATA_REC_PAYLOAD_WIDTH : integer :="+str(pay_width)+";\n" +l+="end sys_config;\n" +sc.write(l) +sc.close() +# Generate the input patterns +fo=open('events.in','w') +t=range(1,seq_len+1) +import random +r=random.Random() +r.seed() +t2=[i+r.randint(0,max_dist) for i in t] +# Now let's prepare the input events: +key_format="{0:0"+str(key_width)+"b}" +pay_format="{0:0"+str(pay_width)+"b}" +for i in t2: + j = i & ((1<
high_speed_pipelined_4clk_per_word/sort_test_gen.py Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: high_speed_pipelined_4clk_per_word/src/dp_ram_scl_sorter.vhd =================================================================== --- high_speed_pipelined_4clk_per_word/src/dp_ram_scl_sorter.vhd (nonexistent) +++ high_speed_pipelined_4clk_per_word/src/dp_ram_scl_sorter.vhd (revision 5) @@ -0,0 +1,64 @@ +-- A parameterized, inferable, true dual-port, common-clock block RAM in VHDL. +-- Original file was taken from: http://danstrother.com/2010/09/11/inferring-rams-in-fpgas/ +-- No license information were provided by the original author. +-- Minimal modifications were introduced by me to make it suitable for my sorter. + +library ieee; +use ieee.std_logic_1164.all; +use ieee.std_logic_unsigned.all; + +entity dp_ram_scl_sorter is + generic ( + DATA_WIDTH : integer := 72; + ADDR_WIDTH : integer := 10 + ); + port ( +-- common clock + clk : in std_logic; + -- Port A + we_a : in std_logic; + addr_a : in std_logic_vector(ADDR_WIDTH-1 downto 0); + data_a : in std_logic_vector(DATA_WIDTH-1 downto 0); + q_a : out std_logic_vector(DATA_WIDTH-1 downto 0); + + -- Port B + we_b : in std_logic; + addr_b : in std_logic_vector(ADDR_WIDTH-1 downto 0); + data_b : in std_logic_vector(DATA_WIDTH-1 downto 0); + q_b : out std_logic_vector(DATA_WIDTH-1 downto 0) + ); +end dp_ram_scl_sorter; + +architecture rtl of dp_ram_scl_sorter is + -- Shared memory + type mem_type is array ((2**ADDR_WIDTH)-1 downto 0) of std_logic_vector(DATA_WIDTH-1 downto 0); + shared variable mem : mem_type; + signal tmp_a : std_logic_vector(DATA_WIDTH-1 downto 0); + signal tmp_b : std_logic_vector(DATA_WIDTH-1 downto 0); +begin + +-- Port A + process(clk) + begin + if(clk'event and clk = '1') then + if(we_a = '1') then + mem(conv_integer(addr_a)) := data_a; + end if; + tmp_a <= mem(conv_integer(addr_a)); + q_a <= tmp_a; + end if; + end process; + +-- Port B + process(clk) + begin + if(clk'event and clk = '1') then + if(we_b = '1') then + mem(conv_integer(addr_b)) := data_b; + end if; + tmp_b <= mem(conv_integer(addr_b)); + q_b <= tmp_b; + end if; + end process; + +end rtl; Index: high_speed_pipelined_4clk_per_word/src/dp_ram_scl_sorter_distributed.vhd =================================================================== --- high_speed_pipelined_4clk_per_word/src/dp_ram_scl_sorter_distributed.vhd (nonexistent) +++ high_speed_pipelined_4clk_per_word/src/dp_ram_scl_sorter_distributed.vhd (revision 5) @@ -0,0 +1,61 @@ +-- A parameterized, inferable, true dual-port, common-clock block RAM in VHDL. +-- Original file was taken from: http://danstrother.com/2010/09/11/inferring-rams-in-fpgas/ +-- No license information were provided by the original author. +-- Minimal modifications were introduced by me to make it suitable for my sorter. + +library ieee; +use ieee.std_logic_1164.all; +use ieee.std_logic_unsigned.all; + +entity dp_ram_scl_sorter_distributed is + generic ( + DATA_WIDTH : integer := 72; + ADDR_WIDTH : integer := 10 + ); + port ( +-- common clock + clk : in std_logic; + -- Port A + we_a : in std_logic; + addr_a : in std_logic_vector(ADDR_WIDTH-1 downto 0); + data_a : in std_logic_vector(DATA_WIDTH-1 downto 0); + q_a : out std_logic_vector(DATA_WIDTH-1 downto 0); + + -- Port B + we_b : in std_logic; + addr_b : in std_logic_vector(ADDR_WIDTH-1 downto 0); + data_b : in std_logic_vector(DATA_WIDTH-1 downto 0); + q_b : out std_logic_vector(DATA_WIDTH-1 downto 0) + ); +end dp_ram_scl_sorter_distributed; + +architecture rtl of dp_ram_scl_sorter_distributed is + -- Shared memory + type mem_type is array ((2**ADDR_WIDTH)-1 downto 0) of std_logic_vector(DATA_WIDTH-1 downto 0); + + signal mem : mem_type; + signal tmp_a : std_logic_vector(DATA_WIDTH-1 downto 0); + signal tmp_b : std_logic_vector(DATA_WIDTH-1 downto 0); + +begin + +-- Port A + process(clk) + begin + if(clk'event and clk = '1') then + if(we_a = '1') then + mem(conv_integer(addr_a)) <= data_a; + + elsif(we_b = '1') then + mem(conv_integer(addr_b)) <= data_b; + + end if; + + tmp_a <= mem(conv_integer(addr_a)); + q_a <= tmp_a; + tmp_b <= mem(conv_integer(addr_b)); + q_b <= tmp_b; + end if; + end process; + +end rtl; Index: high_speed_pipelined_4clk_per_word/src/dpram4.vhd =================================================================== --- high_speed_pipelined_4clk_per_word/src/dpram4.vhd (nonexistent) +++ high_speed_pipelined_4clk_per_word/src/dpram4.vhd (revision 5) @@ -0,0 +1,87 @@ +-- Simulation model of the dual port RAM (DP RAM) with single clock +-- and with "read-after-write" operation. +-- This file was combined from multiple descriptions and models of dual port RAMs +-- which I was able to find in the Internet and in the documentation provided +-- by vendors like Xilinx or Altera. +-- Therefore the only thing I can do is to publish it as PUBLIC DOMAIN +-- +-- Please note, that for synthesis you should replace this file with +-- another DP RAM wrapper inferring the real DP RAM +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +entity dp_ram_scl is + + generic + ( + DATA_WIDTH : natural; + ADDR_WIDTH : natural + ); + + port + ( + clk : in std_logic; + addr_a : in std_logic_vector(ADDR_WIDTH-1 downto 0); + addr_b : in std_logic_vector(ADDR_WIDTH-1 downto 0); + data_a : in std_logic_vector((DATA_WIDTH-1) downto 0); + data_b : in std_logic_vector((DATA_WIDTH-1) downto 0); + we_a : in std_logic := '1'; + we_b : in std_logic := '1'; + q_a : out std_logic_vector((DATA_WIDTH -1) downto 0); + q_b : out std_logic_vector((DATA_WIDTH -1) downto 0) + ); + +end dp_ram_scl; + +architecture rtl of dp_ram_scl is + + signal v_addr_a : natural range 0 to 2**ADDR_WIDTH - 1; + signal v_addr_b : natural range 0 to 2**ADDR_WIDTH - 1; + subtype word_t is std_logic_vector((DATA_WIDTH-1) downto 0); + type memory_t is array((2**ADDR_WIDTH-1) downto 0) of word_t; + + signal ram : memory_t := (others => std_logic_vector( to_unsigned( 16#33#, DATA_WIDTH ) ) ); -- For debugging - initialize + -- simulated RAM with x"33" + +begin + + v_addr_a <= to_integer(unsigned(addr_a(ADDR_WIDTH-1 downto 0))); + v_addr_b <= to_integer(unsigned(addr_b(ADDR_WIDTH-1 downto 0))); + + process(clk) + begin + + if(rising_edge(clk)) then + -- Port A + if(we_a = '1') then + ram(v_addr_a) <= data_a; + -- read-after-write behavior + q_a <= data_a; + else + -- simulate "unknown" value when the same address is written via one port + -- and immediately read via another port + if we_b='1' and v_addr_a=v_addr_b then + q_a <= (others => 'X'); + else + q_a <= ram(v_addr_a); + end if; + end if; + -- Port B + if(we_b = '1') then + ram(v_addr_b) <= data_b; + -- read-after-write behavior + q_b <= data_b; + else + -- simulate "unknown" value when the same address is written via one port + -- and immediately read via another port + if we_a='1' and v_addr_a=v_addr_b then + q_b <= (others => 'X'); + else + q_b <= ram(v_addr_b); + end if; + end if; + end if; + end process; + +end rtl; Index: high_speed_pipelined_4clk_per_word/src/dpram4_synth.vhd =================================================================== --- high_speed_pipelined_4clk_per_word/src/dpram4_synth.vhd (nonexistent) +++ high_speed_pipelined_4clk_per_word/src/dpram4_synth.vhd (revision 5) @@ -0,0 +1,63 @@ +-- Dual port, single clock memory, inferrable in Xilinx and Altera FPGA + +library ieee; +use ieee.std_logic_1164.all; + +entity dp_ram_scl_sorter is + + generic + ( + DATA_WIDTH : natural := 8; + ADDR_WIDTH : natural := 6 + ); + + port + ( + clk : in std_logic; + addr_a : in natural range 0 to 2**ADDR_WIDTH - 1; + addr_b : in natural range 0 to 2**ADDR_WIDTH - 1; + data_a : in std_logic_vector((DATA_WIDTH-1) downto 0); + data_b : in std_logic_vector((DATA_WIDTH-1) downto 0); + we_a : in std_logic := '1'; + we_b : in std_logic := '1'; + q_a : out std_logic_vector((DATA_WIDTH -1) downto 0); + q_b : out std_logic_vector((DATA_WIDTH -1) downto 0) + ); + +end dp_ram_scl_sorter; + + +architecture rtl of dp_ram_scl_sorter is + + -- Create a type for data word + subtype data_word is std_logic_vector((DATA_WIDTH-1) downto 0); + type ram_memory is array((2**ADDR_WIDTH-1) downto 0) of data_word; + + -- Declare the RAM variable. + shared variable ram : ram_memory; + +begin + + process(clk) + begin + if(rising_edge(clk)) then + -- Port B + if(we_b = '1') then + ram(addr_b) := data_b; + end if; + q_b <= ram(addr_b); + end if; + end process; + + process(clk) + begin + if(rising_edge(clk)) then + -- Port A + if(we_a = '1') then + ram(addr_a) := data_a; + end if; + q_a <= ram(addr_a); + end if; + end process; + +end rtl; Index: high_speed_pipelined_4clk_per_word/src/dpram_inf.vhd =================================================================== --- high_speed_pipelined_4clk_per_word/src/dpram_inf.vhd (nonexistent) +++ high_speed_pipelined_4clk_per_word/src/dpram_inf.vhd (revision 5) @@ -0,0 +1,60 @@ +-- A parameterized, inferable, true dual-port, common-clock block RAM in VHDL. +-- Original file was taken from: http://danstrother.com/2010/09/11/inferring-rams-in-fpgas/ +-- No license information were provided by the original author. +-- Minimal modifications were introduced by me to make it suitable for my sorter. + +library ieee; +use ieee.std_logic_1164.all; +use ieee.std_logic_unsigned.all; + +entity dp_ram_scl is + generic ( + DATA_WIDTH : integer := 72; + ADDR_WIDTH : integer := 10 + ); + port ( +-- common clock + clk : in std_logic; + -- Port A + we_a : in std_logic; + addr_a : in std_logic_vector(ADDR_WIDTH-1 downto 0); + data_a : in std_logic_vector(DATA_WIDTH-1 downto 0); + q_a : out std_logic_vector(DATA_WIDTH-1 downto 0); + + -- Port B + we_b : in std_logic; + addr_b : in std_logic_vector(ADDR_WIDTH-1 downto 0); + data_b : in std_logic_vector(DATA_WIDTH-1 downto 0); + q_b : out std_logic_vector(DATA_WIDTH-1 downto 0) + ); +end dp_ram_scl; + +architecture rtl of dp_ram_scl is + -- Shared memory + type mem_type is array ((2**ADDR_WIDTH)-1 downto 0) of std_logic_vector(DATA_WIDTH-1 downto 0); + shared variable mem : mem_type; +begin + +-- Port A + process(clk) + begin + if(clk'event and clk = '1') then + if(we_a = '1') then + mem(conv_integer(addr_a)) := data_a; + end if; + q_a <= mem(conv_integer(addr_a)); + end if; + end process; + +-- Port B + process(clk) + begin + if(clk'event and clk = '1') then + if(we_b = '1') then + mem(conv_integer(addr_b)) := data_b; + end if; + q_b <= mem(conv_integer(addr_b)); + end if; + end process; + +end rtl; Index: high_speed_pipelined_4clk_per_word/src/sort_dpram.vhd =================================================================== --- high_speed_pipelined_4clk_per_word/src/sort_dpram.vhd (nonexistent) +++ high_speed_pipelined_4clk_per_word/src/sort_dpram.vhd (revision 5) @@ -0,0 +1,186 @@ +------------------------------------------------------------------------------- +-- Title : Parametrized DP RAM for heap-sorter +-- Project : heap-sorter +------------------------------------------------------------------------------- +-- File : sort_dpram.vhd +-- Author : Wojciech M. Zabolotny +-- Company : +-- Created : 2010-05-14 +-- Last update: 2018-03-11 +-- Platform : +-- Standard : VHDL'93 +------------------------------------------------------------------------------- +-- Description: +------------------------------------------------------------------------------- +-- Copyright (c) 2010 Wojciech M. Zabolotny +-- This file is published under the BSD license, so you can freely adapt +-- it for your own purposes. +-- Additionally this design has been described in my article: +-- Wojciech M. Zabolotny, "Dual port memory based Heapsort implementation +-- for FPGA", Proc. SPIE 8008, 80080E (2011); doi:10.1117/12.905281 +-- I'd be glad if you cite this article when you publish something based +-- on my design. +------------------------------------------------------------------------------- +-- Revisions : +-- Date Version Author Description +-- 2010-05-14 1.0 wzab Created +------------------------------------------------------------------------------- + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; +use ieee.std_logic_textio.all; +use std.textio.all; +library work; +use work.sorter_pkg.all; +use work.sys_config.all; + +entity sort_dp_ram is + + generic + ( + ADDR_WIDTH : natural; + NLEVELS : natural; + NAME : string := "X"; + RAM_STYLE_G : string := "block" + ); + + port + ( + clk : in std_logic; + addr_a : in std_logic_vector(NLEVELS-1 downto 0); + addr_b : in std_logic_vector(NLEVELS-1 downto 0); + data_a : in T_DATA_REC; + data_b : in T_DATA_REC; + we_a : in std_logic; + we_b : in std_logic; + q_a : out T_DATA_REC; + q_b : out T_DATA_REC + ); + +end sort_dp_ram; + +architecture rtl of sort_dp_ram is + + constant SYNC_CLK2 : boolean := false; + + signal vq_a, vq_b, tdata_a, tdata_b : std_logic_vector(DATA_REC_WIDTH-1 downto 0); + signal reg0, reg1 : T_DATA_REC := DATA_REC_INIT_DATA; + + +begin + + -- Convert our data records int std_logic_vector, so that + -- standard DP RAM may handle it + tdata_a <= tdrec2stlv(data_a); + tdata_b <= tdrec2stlv(data_b); + + i1 : if ADDR_WIDTH > 0 generate + -- When ADDR_WIDTH is above 0 embed the real DP RAM + -- (even though synthesis tool may still replace it with + -- registers during optimization for low ADDR_WIDTH) + + q_a <= stlv2tdrec(vq_a); + q_b <= stlv2tdrec(vq_b); + blockgen : if RAM_STYLE_G = "block" generate + dp_ram_1 : entity work.dp_ram_scl_sorter + generic map ( + DATA_WIDTH => DATA_REC_WIDTH, + ADDR_WIDTH => ADDR_WIDTH) + port map ( + clk => clk, + addr_a => addr_a(ADDR_WIDTH-1 downto 0), + addr_b => addr_b(ADDR_WIDTH-1 downto 0), + data_a => tdata_a, + data_b => tdata_b, + we_a => we_a, + we_b => we_b, + q_a => vq_a, + q_b => vq_b); + + end generate; + + distribgen : if RAM_STYLE_G = "distributed" generate + dp_ram_1 : entity work.dp_ram_scl_sorter_distributed + generic map ( + DATA_WIDTH => DATA_REC_WIDTH, + ADDR_WIDTH => ADDR_WIDTH) + port map ( + clk => clk, + addr_a => addr_a(ADDR_WIDTH-1 downto 0), + addr_b => addr_b(ADDR_WIDTH-1 downto 0), + data_a => tdata_a, + data_b => tdata_b, + we_a => we_a, + we_b => we_b, + q_a => vq_a, + q_b => vq_b); + + end generate; + + + end generate i1; + + i2 : if ADDR_WIDTH = 0 generate + -- When ADDR_WIDTH is 0, DP RAM should be simply replaced + -- with a register implemented below + + p1 : process (clk) + begin -- process p1 + if clk'event and clk = '1' then -- rising clock edge + if we_a = '1' then + reg0 <= data_a; + reg1 <= data_a; + q_a <= data_a; + q_b <= data_a; + elsif we_b = '1' then + reg0 <= data_b; + reg1 <= data_b; + q_a <= data_b; + q_b <= data_b; + else + reg1 <= reg0; + q_a <= reg1; + q_b <= reg1; + end if; + end if; + end process p1; + + end generate i2; + + --dbg1 : if SORT_DEBUG generate + + -- -- Process monitoring read/write accesses to the memory (only for debugging) + -- p3 : process (clk) + -- variable rline : line; + -- begin -- process p1 + -- if clk'event and clk = '1' then -- rising clock edge + -- if(we_a = '1' and we_b = '1') then + -- write(rline, NAME); + -- write(rline, ADDR_WIDTH); + -- write(rline, string'(" Possible write collision!")); + -- writeline(reports, rline); + -- end if; + +-- if we_a = '1' then +-- write(rline, NAME); +-- write(rline, ADDR_WIDTH); +-- write(rline, string'(" WR_A:")); +-- wrstlv(rline, addr_a(ADDR_WIDTH-1 downto 0)); +-- write(rline, string'(" VAL:")); +-- wrstlv(rline, tdata_a); +-- writeline(reports, rline); +-- end if; +-- if we_b = '1' then +-- write(rline, NAME); +-- write(rline, ADDR_WIDTH); +-- write(rline, string'(" WR_B:")); +-- wrstlv(rline, addr_b(ADDR_WIDTH-1 downto 0)); +-- write(rline, string'(" VAL:")); +-- wrstlv(rline, tdata_b); +-- writeline(reports, rline); +-- end if; +-- end if; +-- end process p3; +--end generate dbg1; +end rtl; Index: high_speed_pipelined_4clk_per_word/src/sort_dpram.vhd_sim =================================================================== --- high_speed_pipelined_4clk_per_word/src/sort_dpram.vhd_sim (nonexistent) +++ high_speed_pipelined_4clk_per_word/src/sort_dpram.vhd_sim (revision 5) @@ -0,0 +1,235 @@ +------------------------------------------------------------------------------- +-- Title : Parametrized DP RAM for heap-sorter +-- Project : heap-sorter +------------------------------------------------------------------------------- +-- File : sort_dpram.vhd +-- Author : Wojciech M. Zabolotny +-- Company : +-- Created : 2010-05-14 +-- Last update: 2018-03-09 +-- Platform : +-- Standard : VHDL'93 +------------------------------------------------------------------------------- +-- Description: +------------------------------------------------------------------------------- +-- Copyright (c) 2010 Wojciech M. Zabolotny +-- This file is published under the BSD license, so you can freely adapt +-- it for your own purposes. +-- Additionally this design has been described in my article: +-- Wojciech M. Zabolotny, "Dual port memory based Heapsort implementation +-- for FPGA", Proc. SPIE 8008, 80080E (2011); doi:10.1117/12.905281 +-- I'd be glad if you cite this article when you publish something based +-- on my design. +------------------------------------------------------------------------------- +-- Revisions : +-- Date Version Author Description +-- 2010-05-14 1.0 wzab Created +------------------------------------------------------------------------------- + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; +use ieee.std_logic_textio.all; +use std.textio.all; +library work; +use work.sorter_pkg.all; +use work.sys_config.all; + +entity sort_dp_ram is + + generic + ( + ADDR_WIDTH : natural; + NLEVELS : natural; + NAME : string := "X"; + RAM_STYLE_G : string := "block" + ); + + port + ( + clk : in std_logic; + clk2 : in std_logic; + addr_a : in std_logic_vector(NLEVELS-1 downto 0); + addr_b : in std_logic_vector(NLEVELS-1 downto 0); + data_a : in T_DATA_REC; + data_b : in T_DATA_REC; + we_a : in std_logic; + we_b : in std_logic; + q_a : out T_DATA_REC; + q_b : out T_DATA_REC + ); + +end sort_dp_ram; + +architecture rtl of sort_dp_ram is + + constant SYNC_CLK2 : boolean := false; + + signal vq_a, vq_b, tdata_a, tdata_b : std_logic_vector(DATA_REC_WIDTH-1 downto 0); + signal reg : T_DATA_REC := DATA_REC_INIT_DATA; + signal det1, det2, det3 : std_logic := '1'; + signal gwe_a, gwe_b : std_logic := '1'; + signal xaddr_a : std_logic_vector(NLEVELS-1 downto 0); + signal xaddr_b : std_logic_vector(NLEVELS-1 downto 0); + +begin + + -- Convert our data records int std_logic_vector, so that + -- standard DP RAM may handle it + tdata_a <= tdrec2stlv(data_a); + tdata_b <= tdrec2stlv(data_b); + + + -- Simulate the combinational net settling after 17.5 ns + process is + begin -- process + wait until clk'event and clk = '1'; -- rising clock edge + xaddr_a <= (others => 'X'); + xaddr_a <= (others => 'X'); + wait for 17.5 ns; + xaddr_a <= addr_a; + xaddr_b <= addr_b; + end process; + + -- We need to ensure, that the memory is written only every second cycle (so + -- that the external combinatorial net has two Clk2 cycles to settle) + -- + rs1 : process (clk) is + begin -- process rs1 + if clk'event and clk = '1' then -- rising clock edge + det1 <= not det1; + end if; + end process rs1; + + rs2 : process (clk2) is + begin -- process rs1 + if clk2'event and clk2 = '1' then -- rising clock edge + det2 <= det1; + end if; + end process rs2; + -- We should activate WE only when det1 and det2 are equal + -- (i.e., after the clk2 pulse not associated with the clk pulse) + + ig1 : if SYNC_CLK2 generate + gwe : process (det1, det2, we_a, we_b) is + begin -- process gwe + gwe_a <= '0'; + gwe_b <= '0'; + if det2 = det1 then + gwe_a <= we_a; + gwe_b <= we_b; + end if; + end process gwe; + end generate ig1; + + ig2 : if not SYNC_CLK2 generate + gwe_a <= we_a; + gwe_b <= we_b; + end generate ig2; + + i1 : if ADDR_WIDTH > 0 generate + -- When ADDR_WIDTH is above 0 embed the real DP RAM + -- (even though synthesis tool may still replace it with + -- registers during optimization for low ADDR_WIDTH) + + q_a <= stlv2tdrec(vq_a); + q_b <= stlv2tdrec(vq_b); + blockgen : if RAM_STYLE_G = "block" generate + dp_ram_1 : entity work.dp_ram_scl_sorter + generic map ( + DATA_WIDTH => DATA_REC_WIDTH, + ADDR_WIDTH => ADDR_WIDTH) + port map ( + clk => clk2, + addr_a => xaddr_a(ADDR_WIDTH-1 downto 0), + addr_b => xaddr_b(ADDR_WIDTH-1 downto 0), + data_a => tdata_a, + data_b => tdata_b, + we_a => gwe_a, + we_b => gwe_b, + q_a => vq_a, + q_b => vq_b); + + end generate; + + distribgen : if RAM_STYLE_G = "distributed" generate + dp_ram_1 : entity work.dp_ram_scl_sorter_distributed + generic map ( + DATA_WIDTH => DATA_REC_WIDTH, + ADDR_WIDTH => ADDR_WIDTH) + port map ( + clk => clk2, + addr_a => xaddr_a(ADDR_WIDTH-1 downto 0), + addr_b => xaddr_b(ADDR_WIDTH-1 downto 0), + data_a => tdata_a, + data_b => tdata_b, + we_a => gwe_a, + we_b => gwe_b, + q_a => vq_a, + q_b => vq_b); + + end generate; + + + end generate i1; + + i2 : if ADDR_WIDTH = 0 generate + -- When ADDR_WIDTH is 0, DP RAM should be simply replaced + -- with a register implemented below + + p1 : process (clk) + begin -- process p1 + if clk'event and clk = '1' then -- rising clock edge + if we_a = '1' then + reg <= data_a; + q_a <= data_a; + q_b <= data_a; + elsif we_b = '1' then + reg <= data_b; + q_a <= data_b; + q_b <= data_b; + else + q_a <= reg; + q_b <= reg; + end if; + end if; + end process p1; + + end generate i2; + + --dbg1 : if SORT_DEBUG generate + + -- -- Process monitoring read/write accesses to the memory (only for debugging) + -- p3 : process (clk) + -- variable rline : line; + -- begin -- process p1 + -- if clk'event and clk = '1' then -- rising clock edge + -- if(we_a = '1' and we_b = '1') then + -- write(rline, NAME); + -- write(rline, ADDR_WIDTH); + -- write(rline, string'(" Possible write collision!")); + -- writeline(reports, rline); + -- end if; + +-- if we_a = '1' then +-- write(rline, NAME); +-- write(rline, ADDR_WIDTH); +-- write(rline, string'(" WR_A:")); +-- wrstlv(rline, addr_a(ADDR_WIDTH-1 downto 0)); +-- write(rline, string'(" VAL:")); +-- wrstlv(rline, tdata_a); +-- writeline(reports, rline); +-- end if; +-- if we_b = '1' then +-- write(rline, NAME); +-- write(rline, ADDR_WIDTH); +-- write(rline, string'(" WR_B:")); +-- wrstlv(rline, addr_b(ADDR_WIDTH-1 downto 0)); +-- write(rline, string'(" VAL:")); +-- wrstlv(rline, tdata_b); +-- writeline(reports, rline); +-- end if; +-- end if; +-- end process p3; +--end generate dbg1; +end rtl; Index: high_speed_pipelined_4clk_per_word/src/sort_dpram.vhd_synth =================================================================== --- high_speed_pipelined_4clk_per_word/src/sort_dpram.vhd_synth (nonexistent) +++ high_speed_pipelined_4clk_per_word/src/sort_dpram.vhd_synth (revision 5) @@ -0,0 +1,212 @@ +------------------------------------------------------------------------------- +-- Title : Parametrized DP RAM for heap-sorter +-- Project : heap-sorter +------------------------------------------------------------------------------- +-- File : sort_dpram.vhd +-- Author : Wojciech M. Zabolotny +-- Company : +-- Created : 2010-05-14 +-- Last update: 2018-03-09 +-- Platform : +-- Standard : VHDL'93 +------------------------------------------------------------------------------- +-- Description: +------------------------------------------------------------------------------- +-- Copyright (c) 2010 Wojciech M. Zabolotny +-- This file is published under the BSD license, so you can freely adapt +-- it for your own purposes. +-- Additionally this design has been described in my article: +-- Wojciech M. Zabolotny, "Dual port memory based Heapsort implementation +-- for FPGA", Proc. SPIE 8008, 80080E (2011); doi:10.1117/12.905281 +-- I'd be glad if you cite this article when you publish something based +-- on my design. +------------------------------------------------------------------------------- +-- Revisions : +-- Date Version Author Description +-- 2010-05-14 1.0 wzab Created +------------------------------------------------------------------------------- + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; +use ieee.std_logic_textio.all; +use std.textio.all; +library work; +use work.sorter_pkg.all; +use work.sys_config.all; + +entity sort_dp_ram is + + generic + ( + ADDR_WIDTH : natural; + NLEVELS : natural; + NAME : string := "X"; + RAM_STYLE_G : string := "block" + ); + + port + ( + clk : in std_logic; + clk2 : in std_logic; + addr_a : in std_logic_vector(NLEVELS-1 downto 0); + addr_b : in std_logic_vector(NLEVELS-1 downto 0); + data_a : in T_DATA_REC; + data_b : in T_DATA_REC; + we_a : in std_logic; + we_b : in std_logic; + q_a : out T_DATA_REC; + q_b : out T_DATA_REC + ); + +end sort_dp_ram; + +architecture rtl of sort_dp_ram is + + signal vq_a, vq_b, tdata_a, tdata_b : std_logic_vector(DATA_REC_WIDTH-1 downto 0); + signal reg : T_DATA_REC := DATA_REC_INIT_DATA; + signal det1, det2, det3 : std_logic := '1'; + signal gwe_a, gwe_b : std_logic := '1'; + +begin + + -- Convert our data records int std_logic_vector, so that + -- standard DP RAM may handle it + tdata_a <= tdrec2stlv(data_a); + tdata_b <= tdrec2stlv(data_b); + + -- We need to ensure, that the memory is written only every second cycle (so + -- that the external combinatorial net has two Clk2 cycles to settle) + -- + rs1 : process (clk) is + begin -- process rs1 + if clk'event and clk = '1' then -- rising clock edge + det1 <= not det1; + end if; + end process rs1; + + rs2 : process (clk2) is + begin -- process rs1 + if clk2'event and clk2 = '1' then -- rising clock edge + det2 <= det1; + end if; + end process rs2; + -- We should activate WE only when det1 and det2 are equal + -- (i.e., after the clk2 pulse not associated with the clk pulse) + + gwe : process (det1, det2, we_a, we_b) is + begin -- process gwe + gwe_a <= '0'; + gwe_b <= '0'; + if det2 = det1 then + gwe_a <= we_a; + gwe_b <= we_b; + end if; + end process gwe; + + i1 : if ADDR_WIDTH > 0 generate + -- When ADDR_WIDTH is above 0 embed the real DP RAM + -- (even though synthesis tool may still replace it with + -- registers during optimization for low ADDR_WIDTH) + + q_a <= stlv2tdrec(vq_a); + q_b <= stlv2tdrec(vq_b); + blockgen : if RAM_STYLE_G = "block" generate + dp_ram_1 : entity work.dp_ram_scl_sorter + generic map ( + DATA_WIDTH => DATA_REC_WIDTH, + ADDR_WIDTH => ADDR_WIDTH) + port map ( + clk => clk2, + addr_a => addr_a(ADDR_WIDTH-1 downto 0), + addr_b => addr_b(ADDR_WIDTH-1 downto 0), + data_a => tdata_a, + data_b => tdata_b, + we_a => gwe_a, + we_b => gwe_b, + q_a => vq_a, + q_b => vq_b); + + end generate; + + distribgen : if RAM_STYLE_G = "distributed" generate + dp_ram_1 : entity work.dp_ram_scl_sorter_distributed + generic map ( + DATA_WIDTH => DATA_REC_WIDTH, + ADDR_WIDTH => ADDR_WIDTH) + port map ( + clk => clk2, + addr_a => addr_a(ADDR_WIDTH-1 downto 0), + addr_b => addr_b(ADDR_WIDTH-1 downto 0), + data_a => tdata_a, + data_b => tdata_b, + we_a => gwe_a, + we_b => gwe_b, + q_a => vq_a, + q_b => vq_b); + + end generate; + + + end generate i1; + + i2 : if ADDR_WIDTH = 0 generate + -- When ADDR_WIDTH is 0, DP RAM should be simply replaced + -- with a register implemented below + + p1 : process (clk) + begin -- process p1 + if clk'event and clk = '1' then -- rising clock edge + if we_a = '1' then + reg <= data_a; + q_a <= data_a; + q_b <= data_a; + elsif we_b = '1' then + reg <= data_b; + q_a <= data_b; + q_b <= data_b; + else + q_a <= reg; + q_b <= reg; + end if; + end if; + end process p1; + + end generate i2; + + --dbg1 : if SORT_DEBUG generate + + -- -- Process monitoring read/write accesses to the memory (only for debugging) + -- p3 : process (clk) + -- variable rline : line; + -- begin -- process p1 + -- if clk'event and clk = '1' then -- rising clock edge + -- if(we_a = '1' and we_b = '1') then + -- write(rline, NAME); + -- write(rline, ADDR_WIDTH); + -- write(rline, string'(" Possible write collision!")); + -- writeline(reports, rline); + -- end if; + +-- if we_a = '1' then +-- write(rline, NAME); +-- write(rline, ADDR_WIDTH); +-- write(rline, string'(" WR_A:")); +-- wrstlv(rline, addr_a(ADDR_WIDTH-1 downto 0)); +-- write(rline, string'(" VAL:")); +-- wrstlv(rline, tdata_a); +-- writeline(reports, rline); +-- end if; +-- if we_b = '1' then +-- write(rline, NAME); +-- write(rline, ADDR_WIDTH); +-- write(rline, string'(" WR_B:")); +-- wrstlv(rline, addr_b(ADDR_WIDTH-1 downto 0)); +-- write(rline, string'(" VAL:")); +-- wrstlv(rline, tdata_b); +-- writeline(reports, rline); +-- end if; +-- end if; +-- end process p3; +--end generate dbg1; +end rtl; Index: high_speed_pipelined_4clk_per_word/src/sorter_ctrl.vhd =================================================================== --- high_speed_pipelined_4clk_per_word/src/sorter_ctrl.vhd (nonexistent) +++ high_speed_pipelined_4clk_per_word/src/sorter_ctrl.vhd (revision 5) @@ -0,0 +1,339 @@ +------------------------------------------------------------------------------- +-- Title : Sorting node controller for heap-sorter +-- Project : heap-sorter +------------------------------------------------------------------------------- +-- File : sorter_ctrl.vhd +-- Author : Wojciech M. Zabolotny +-- Company : +-- Created : 2010-05-14 +-- Last update: 2018-03-11 +-- Platform : +-- Standard : VHDL'93 +------------------------------------------------------------------------------- +-- Description: +------------------------------------------------------------------------------- +-- Copyright (c) 2010 Wojciech M. Zabolotny +-- This file is published under the BSD license, so you can freely adapt +-- it for your own purposes. +-- Additionally this design has been described in my article: +-- Wojciech M. Zabolotny, "Dual port memory based Heapsort implementation +-- for FPGA", Proc. SPIE 8008, 80080E (2011); doi:10.1117/12.905281 +-- I'd be glad if you cite this article when you publish something based +-- on my design. +------------------------------------------------------------------------------- +-- Revisions : +-- Date Version Author Description +-- 2010-05-14 1.0 wzab Created +------------------------------------------------------------------------------- +------------------------------------------------------------------------------- +-- The sorter controller is connected with three dual port memories. +-- The first dual port memory tm_... provides the "upstream data" +-- The second dual port memory lm_... provides the "left branch of downstream data" +-- The third dual port memory rm_... provides the "right branch of downstream data" +-- The controller is notified about availability of the new data by the +-- "update" signal. +-- However in this architecture we need to service two upstream memories! +-- That's because we want to save one cycle, and to be able to issue +-- +-- Important feature of each controller is the ability to clear the memory +-- after reset. +------------------------------------------------------------------------------- +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; +use ieee.std_logic_textio.all; +use std.textio.all; +library work; +use work.sorter_pkg.all; +use work.sys_config.all; + +entity sorter_ctrl is + + generic ( + NLEVELS : integer; -- number of levels (max number of + -- address bits + NADDRBITS : integer -- number of used address bits + ); + + port ( + -- Top memory connections + tm_din : in T_DATA_REC; + tm_dout : out T_DATA_REC; + tm_addr : out std_logic_vector(NLEVELS-1 downto 0); + tm_we : out std_logic; + -- Left memory connections + lm_din : in T_DATA_REC; + lm_dout : out T_DATA_REC; + lm_addr : out std_logic_vector(NLEVELS-1 downto 0); + lm_we : out std_logic; + -- Right memory connections + rm_din : in T_DATA_REC; + rm_dout : out T_DATA_REC; + rm_addr : out std_logic_vector(NLEVELS-1 downto 0); + rm_we : out std_logic; + -- Upper level controller connections + up_in : in std_logic; + up_in_val : in T_DATA_REC; + up_in_addr : in std_logic_vector(NLEVELS-1 downto 0); + -- Upper level update notifier + up_out : out std_logic; + up_out_val : out T_DATA_REC; + up_out_addr : out std_logic_vector(NLEVELS-1 downto 0); + -- Lower level controller connections + low_out : out std_logic; + low_out_val : out T_DATA_REC; + low_out_addr : out std_logic_vector(NLEVELS-1 downto 0); + low_in : in std_logic; + low_in_val : in T_DATA_REC; + low_in_addr : in std_logic_vector(NLEVELS-1 downto 0); + -- Lower level update notifier + -- System connections + clk : in std_logic; + clk_en : in std_logic; + ready_in : in std_logic; + ready_out : out std_logic; -- signals, when memory is cleared + -- after reset + rst_n : in std_logic); +end sorter_ctrl; + +architecture sorter_ctrl_arch1 of sorter_ctrl is + + type T_CTRL_STATE is (CTRL_RESET, CTRL_CLEAR, CTRL_IDLE, CTRL_S2, CTRL_S1, CTRL_S0); + signal ctrl_state, ctrl_state_next : T_CTRL_STATE := CTRL_IDLE; + signal addr, addr_i : std_logic_vector(NLEVELS-1 downto 0); + signal s_up_in_addr, s_up_in_addr_i : std_logic_vector(NLEVELS-1 downto 0); + signal s_up_out_addr, s_up_out_addr_i : std_logic_vector(NLEVELS-1 downto 0); + signal s_ready_out, s_ready_out_i : std_logic; + signal s_up_out, s_up_out_i : std_logic; + signal s_addr_out : std_logic_vector(NLEVELS-1 downto 0); + signal s_tm_dout : T_DATA_REC; + signal s_up_out_val_i, s_up_out_val : T_DATA_REC := DATA_REC_INIT_DATA; + signal s_up_in_val_i, s_up_in_val : T_DATA_REC := DATA_REC_INIT_DATA; + signal res_sort_cmp_lt_l_up, res_sort_cmp_lt_l_r, res_sort_cmp_lt_r_up : boolean := false; + signal s_l_val, s_l_val_i : T_DATA_REC; + signal s_r_val, s_r_val_i : T_DATA_REC; + + + constant ADDR_MAX : std_logic_vector(NLEVELS-1 downto 0) := std_logic_vector(to_unsigned(2**NADDRBITS-1, NLEVELS)); + +begin + + tm_dout <= s_tm_dout; +-- We have the two-process state machine. + p1 : process (addr, ctrl_state, lm_din, low_in, low_in_addr(NADDRBITS), + low_in_addr(NADDRBITS-1 downto 0), low_in_val, ready_in, + res_sort_cmp_lt_l_r, res_sort_cmp_lt_l_up, + res_sort_cmp_lt_r_up, rm_din, s_l_val, s_r_val, s_ready_out, + s_up_in_val, up_in, up_in_addr, up_in_val) + variable rline : line; + + begin -- process p1 + -- defaults + ctrl_state_next <= ctrl_state; + tm_we <= '0'; + rm_we <= '0'; + lm_we <= '0'; + lm_addr <= (others => '0'); + rm_addr <= (others => '0'); + tm_addr <= (others => '0'); + s_ready_out_i <= s_ready_out; + addr_i <= addr; + low_out_val <= DATA_REC_INIT_DATA; -- to avoid latches + low_out <= '0'; + s_up_in_val_i <= s_up_in_val; + up_out_addr <= (others => '0'); + up_out_val <= DATA_REC_INIT_DATA; + up_out <= '0'; + lm_dout <= DATA_REC_INIT_DATA; + rm_dout <= DATA_REC_INIT_DATA; + s_tm_dout <= DATA_REC_INIT_DATA; + s_addr_out <= (others => '0'); + s_l_val_i <= s_l_val; + s_r_val_i <= s_r_val; + case ctrl_state is + when CTRL_RESET => + addr_i <= (others => '0'); + s_ready_out_i <= '0'; + ctrl_state_next <= CTRL_CLEAR; + when CTRL_CLEAR => + lm_addr <= addr; + rm_addr <= addr; + lm_dout <= DATA_REC_INIT_DATA; + rm_dout <= DATA_REC_INIT_DATA; + lm_we <= '1'; + rm_we <= '1'; + if addr = ADDR_MAX then + if ready_in = '1' then + s_ready_out_i <= '1'; + ctrl_state_next <= CTRL_IDLE; + end if; + else + addr_i <= std_logic_vector(unsigned(addr)+1); + end if; + when CTRL_IDLE => + -- We read "down" memories ("upper" value is provided by the ``bypass channel'') + if up_in = '1' then + ctrl_state_next <= CTRL_S0; + tm_addr <= up_in_addr; + lm_addr <= up_in_addr; + rm_addr <= up_in_addr; + addr_i <= up_in_addr; + s_up_in_val_i <= up_in_val; + end if; + when CTRL_S0 => + tm_addr <= addr; + lm_addr <= addr; + rm_addr <= addr; + -- We wait, until the memories produce the data, (there is an output + -- register on the memory's output!) + ctrl_state_next <= CTRL_S1; + when CTRL_S1 => + -- The memory values are available, so we start the comparison + s_l_val_i <= lm_din; + s_r_val_i <= rm_din; + -- Check, if we need to take value from lower ``bypass channel'' + if low_in = '1' then + --if SORT_DEBUG then + -- write(rline, string'(" x! ")); + --end if; + if (addr(NADDRBITS-1 downto 0) = low_in_addr(NADDRBITS-1 downto 0)) then + -- We are reading a value which was just updated, so we need to get it + -- from ``bypass channel'' instead of memory + --if SORT_DEBUG then + -- write(rline, string'(" y! ")); + --end if; + if low_in_addr(NADDRBITS) = '1' then + s_l_val_i <= low_in_val; + else + s_r_val_i <= low_in_val; + end if; + end if; + end if; + ctrl_state_next <= CTRL_S2; + when CTRL_S2 => + -- In this cycle we can compare data + -- Debug output! + --if SORT_DEBUG then + -- write(rline, string'("CMP ")); + -- write(rline, NADDRBITS); + -- write(rline, string'(" U:")); + -- wrstlv(rline, tdrec2stlv(s_up_in_val)); + --end if; + --if SORT_DEBUG then + -- write(rline, string'(" L:")); + -- wrstlv(rline, tdrec2stlv(l_val)); + -- write(rline, string'(" R:")); + -- wrstlv(rline, tdrec2stlv(r_val)); + -- write(rline, string'(" A:")); + --end if; + s_up_out_i <= '0'; -- Cancel previous UP notification + if res_sort_cmp_lt_l_up and res_sort_cmp_lt_l_r then + -- The L-ram value is the smallest + -- Output the value from the L-ram and put the new value into the L-ram + s_tm_dout <= s_l_val; + tm_addr <= addr; + tm_we <= '1'; + + up_out_val <= s_l_val; + up_out <= '1'; + up_out_addr <= addr; + + lm_addr <= addr; + lm_dout <= s_up_in_val; + lm_we <= '1'; + + -- Low out must be sent one cycle before idle! + low_out <= '1'; + low_out_val <= s_up_in_val; + s_addr_out(NADDRBITS) <= '1'; + + if NADDRBITS > 0 then + s_addr_out(NADDRBITS-1 downto 0) <= addr(NADDRBITS-1 downto 0); + end if; + --wrstlv(rline, s_addr_out); + ctrl_state_next <= CTRL_IDLE; + --if SORT_DEBUG then + -- write(rline, string'(" T<->L")); + --end if; + elsif res_sort_cmp_lt_r_up then + -- The R-ram value is the smallest + -- Output the value from the R-ram and put the new value into the R-ram + s_tm_dout <= s_r_val; + tm_addr <= addr; + tm_we <= '1'; + + up_out_val <= s_r_val; + up_out <= '1'; + up_out_addr <= addr; + + rm_addr <= addr; + rm_dout <= s_up_in_val; + rm_we <= '1'; + + low_out <= '1'; + low_out_val <= s_up_in_val; + + s_addr_out(NADDRBITS) <= '0'; + if NADDRBITS > 0 then + s_addr_out(NADDRBITS-1 downto 0) <= addr(NADDRBITS-1 downto 0); + end if; + ctrl_state_next <= CTRL_IDLE; + --if SORT_DEBUG then + -- wrstlv(rline, s_addr_out); + -- write(rline, string'(" T<->R")); + --end if; + else + -- The new value is the smallest + -- Nothing to do, no update downstream + s_tm_dout <= s_up_in_val; + tm_we <= '1'; + tm_addr <= addr; + + up_out_val <= s_up_in_val; + up_out <= '1'; + up_out_addr <= addr; + + ctrl_state_next <= CTRL_IDLE; + --wrstlv(rline, up_in_addr); + --if SORT_DEBUG then + -- write(rline, string'(" T===T")); + --end if; + end if; + --if SORT_DEBUG then + -- writeline(reports, rline); + --end if; + when others => null; + end case; + end process p1; + + p2 : process (clk) is + begin -- process p2 + if clk'event and clk = '1' then -- rising clock edge + if rst_n = '0' then -- asynchronous reset (active low) + ctrl_state <= CTRL_RESET; + s_ready_out <= '0'; + addr <= (others => '0'); + s_up_in_val <= DATA_REC_INIT_DATA; + s_l_val <= DATA_REC_INIT_DATA; + s_r_val <= DATA_REC_INIT_DATA; + res_sort_cmp_lt_l_up <= false; + res_sort_cmp_lt_l_r <= false; + res_sort_cmp_lt_r_up <= false; + --update_out <= '0'; + --addr_out <= (others => '0'); + else + res_sort_cmp_lt_l_up <= sort_cmp_lt(s_l_val_i, s_up_in_val); + res_sort_cmp_lt_l_r <= sort_cmp_lt(s_l_val_i, s_r_val_i); + res_sort_cmp_lt_r_up <= sort_cmp_lt(s_r_val_i, s_up_in_val); + s_ready_out <= s_ready_out_i; + ctrl_state <= ctrl_state_next; + addr <= addr_i; + s_up_in_val <= s_up_in_val_i; + s_l_val <= s_l_val_i; + s_r_val <= s_r_val_i; + end if; + end if; + end process p2; + ready_out <= s_ready_out; + low_out_addr <= s_addr_out; +end sorter_ctrl_arch1; Index: high_speed_pipelined_4clk_per_word/src/sorter_pkg.vhd =================================================================== --- high_speed_pipelined_4clk_per_word/src/sorter_pkg.vhd (nonexistent) +++ high_speed_pipelined_4clk_per_word/src/sorter_pkg.vhd (revision 5) @@ -0,0 +1,216 @@ +------------------------------------------------------------------------------- +-- Title : Definitions for heap-sorter +-- Project : heap-sorter +------------------------------------------------------------------------------- +-- File : sorter_pkg.vhd +-- Author : Wojciech M. Zabolotny +-- Company : +-- Created : 2010-05-14 +-- Last update: 2011-07-11 +-- Platform : +-- Standard : VHDL'93 +------------------------------------------------------------------------------- +-- Description: +------------------------------------------------------------------------------- +-- Copyright (c) 2010 Wojciech M. Zabolotny +-- This file is published under the BSD license, so you can freely adapt +-- it for your own purposes. +-- Additionally this design has been described in my article: +-- Wojciech M. Zabolotny, "Dual port memory based Heapsort implementation +-- for FPGA", Proc. SPIE 8008, 80080E (2011); doi:10.1117/12.905281 +-- I'd be glad if you cite this article when you publish something based +-- on my design. +------------------------------------------------------------------------------- +-- Revisions : +-- Date Version Author Description +-- 2010-05-14 1.0 wzab Created +------------------------------------------------------------------------------- +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; +use ieee.std_logic_textio.all; +use std.textio.all; +library work; +use work.sys_config.all; + +package sorter_pkg is + constant DATA_REC_WIDTH : integer := DATA_REC_SORT_KEY_WIDTH + + DATA_REC_PAYLOAD_WIDTH + 2; + + + subtype T_SORT_KEY is unsigned (DATA_REC_SORT_KEY_WIDTH - 1 downto 0); + subtype T_PAYLOAD is std_logic_vector(DATA_REC_PAYLOAD_WIDTH - 1 downto 0); + + --alias T_SORT_KEY is unsigned (12 downto 0); + type T_DATA_REC is record + d_key : T_SORT_KEY; + init : std_logic; + valid : std_logic; + d_payload : T_PAYLOAD; + end record; + + type T_DATA_REC_ARR is array (natural range <>) of T_DATA_REC; + + -- Special constant used to initially fill the sorter + -- Must be sorted so, that is smaller, than any other data + constant DATA_REC_INIT_DATA : T_DATA_REC := ( + d_key => to_unsigned(0, DATA_REC_SORT_KEY_WIDTH), + init => '1', + valid => '0', + d_payload => (others => '0') + ); + + -- Special constant used to ``flush'' the sorter at the end + constant DATA_REC_END_DATA : T_DATA_REC := ( + d_key => to_unsigned(0, DATA_REC_SORT_KEY_WIDTH), + init => '1', + valid => '1', + d_payload => (others => '0') + ); + + + function sort_cmp_lt ( + constant v1 : T_DATA_REC; + constant v2 : T_DATA_REC) + return boolean; + + function tdrec2stlv ( + constant drec : T_DATA_REC) + return std_logic_vector; + + function stlv2tdrec ( + constant dstlv : std_logic_vector) + return T_DATA_REC; + + --procedure wrstlv ( + -- rline : inout line; + -- constant vect : std_logic_vector); + + file reports : text open write_mode is "STD_OUTPUT"; + +end sorter_pkg; + +package body sorter_pkg is + + function stlv2tdrec ( + constant dstlv : std_logic_vector) + return T_DATA_REC is + variable result : T_DATA_REC; + variable j : integer := 0; + begin -- stlv2drec + j := 0; + result.d_key := unsigned(dstlv(j-1+DATA_REC_SORT_KEY_WIDTH downto j)); + j := j+DATA_REC_SORT_KEY_WIDTH; + result.valid := dstlv(j); + j := j+1; + result.init := dstlv(j); + j := j+1; + result.d_payload := dstlv(j-1+DATA_REC_PAYLOAD_WIDTH downto j); + j := j+DATA_REC_PAYLOAD_WIDTH; + return result; + end stlv2tdrec; + + function tdrec2stlv ( + constant drec : T_DATA_REC) + return std_logic_vector is + variable result : std_logic_vector(DATA_REC_WIDTH-1 downto 0); + variable j : integer := 0; + begin -- tdrec2stlv + j := 0; + result(j-1+DATA_REC_SORT_KEY_WIDTH downto j) := std_logic_vector(drec.d_key); + j := j+DATA_REC_SORT_KEY_WIDTH; + result(j) := drec.valid; + j := j+1; + result(j) := drec.init; + j := j+1; + result(j-1+DATA_REC_PAYLOAD_WIDTH downto j) := std_logic_vector(drec.d_payload); + j := j+DATA_REC_PAYLOAD_WIDTH; + return result; + end tdrec2stlv; + + + -- Function sort_cmp_lt returns TRUE when the first opperand is ``less'' than + -- the second one + function sort_cmp_lt ( + constant v1 : T_DATA_REC; + constant v2 : T_DATA_REC) + return boolean is + variable rline : line; + variable dcomp : unsigned(DATA_REC_SORT_KEY_WIDTH-1 downto 0) := (others => '0'); + begin -- sort_cmp_lt + -- Check the special cases + if (v1.init = '1') and (v2.init = '0') then + -- v1 is the special record, v2 is the standard one + if v1.valid = '0' then + -- initialization record - ``smaller'' than all standard records + return true; + else + -- end record - ``bigger'' than all standard records + return false; + end if; + elsif (v1.init = '0') and (v2.init = '1') then + -- v2 is the special record, v1 is the standard one + if (v2.valid = '0') then + -- v2 is the initialization record - it is ``smaller'' than standard record v1 + return false; + else + -- v2 is the end record - it is ``bigger'' than standard record v1 + return true; + end if; + elsif (v1.init = '1') and (v2.init = '1') then + -- both v1 and v2 are special records + if (v1.valid = '0') and (v2.valid = '1') then + -- v1 - initial record, v2 - end record + return true; + else + -- v1 is end record, so it is ``bigger'' or ``equal'' to other records + return false; + end if; + elsif (v1.init = '0') and (v2.init = '0') then + -- We compare standard words + -- We must consider the fact, that in longer sequences of data records + -- the sort keys may wrap around + -- therefore we perform subtraction modulo + -- 2**DATA_REC_SORT_KEY_WIDTH and check the MSB + dcomp := v1.d_key-v2.d_key; + if dcomp(DATA_REC_SORT_KEY_WIDTH-1) = '1' then + --if signed(v1.d_key - v2.d_key)<0 then -- old implementation + return true; + elsif v2.d_key = v1.d_key then + if v2.valid = '1' then + return true; + else + -- Empty data records should wait + return false; + end if; + else + return false; + end if; + else + --assert false report "Wrong records in sort_cmp_lt" severity error; + return false; + end if; + return false; -- should never happen + end sort_cmp_lt; + + + --procedure wrstlv ( + -- rline : inout string; + -- constant vect : std_logic_vector) is + --begin -- stlv2str + -- for i in vect'left downto vect'right loop + -- case vect(i) is + -- when 'U' => write(line(rline), string'("u")); + -- when 'Z' => write(line(rline), string'("z")); + -- when 'X' => write(line(rline), string'("x")); + -- when 'L' => write(line(rline), string'("L")); + -- when 'H' => write(line(rline), string'("H")); + -- when '1' => write(line(rline), string'("1")); + -- when '0' => write(line(rline), string'("0")); + -- when others => write(line(rline), string'("?")); + -- end case; + -- end loop; -- i + --end wrstlv; + +end sorter_pkg; + Index: high_speed_pipelined_4clk_per_word/src/sorter_sys.vhd =================================================================== --- high_speed_pipelined_4clk_per_word/src/sorter_sys.vhd (nonexistent) +++ high_speed_pipelined_4clk_per_word/src/sorter_sys.vhd (revision 5) @@ -0,0 +1,305 @@ +------------------------------------------------------------------------------- +-- Title : Top entity of heap-sorter +-- Project : heap-sorter +------------------------------------------------------------------------------- +-- File : sorter_sys.vhd +-- Author : Wojciech M. Zabolotny +-- Company : +-- Created : 2010-05-14 +-- Last update: 2018-03-11 +-- Platform : +-- Standard : VHDL'93 +------------------------------------------------------------------------------- +-- Description: +------------------------------------------------------------------------------- +-- Copyright (c) 2010 Wojciech M. Zabolotny +-- This file is published under the BSD license, so you can freely adapt +-- it for your own purposes. +-- Additionally this design has been described in my article +-- Additionally this design has been described in my article: +-- Wojciech M. Zabolotny, "Dual port memory based Heapsort implementation +-- for FPGA", Proc. SPIE 8008, 80080E (2011); doi:10.1117/12.905281 +-- I'd be glad if you cite this article when you publish something based +-- on my design. +------------------------------------------------------------------------------- +-- Revisions : +-- Date Version Author Description +-- 2010-05-14 1.0 wzab Created +------------------------------------------------------------------------------- + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; +use ieee.std_logic_textio.all; +use std.textio.all; +library work; +use work.sorter_pkg.all; +use work.sys_config.all; + +entity sorter_sys is + generic ( + NLEVELS : integer := SYS_NLEVELS -- number of levels in the sorter heap + ); + + port ( + din : in T_DATA_REC; + we : in std_logic; + dout : out T_DATA_REC; + dav : out std_logic; + clk : in std_logic; + rst_n : in std_logic; + ready : out std_logic); +end sorter_sys; + +architecture sorter_sys_arch1 of sorter_sys is + + component sort_dp_ram + generic ( + ADDR_WIDTH : natural; + NLEVELS : natural; + NAME : string; + RAM_STYLE_G : string := "block"); + port ( + clk : in std_logic; + addr_a : in std_logic_vector(NLEVELS-1 downto 0); + addr_b : in std_logic_vector(NLEVELS-1 downto 0); + data_a : in T_DATA_REC; + data_b : in T_DATA_REC; + we_a : in std_logic; + we_b : in std_logic; + q_a : out T_DATA_REC; + q_b : out T_DATA_REC); + end component; + + component sorter_ctrl + generic ( + NLEVELS : integer; + NADDRBITS : integer); + port ( + tm_din : in T_DATA_REC; + tm_dout : out T_DATA_REC; + tm_addr : out std_logic_vector(NLEVELS-1 downto 0); + tm_we : out std_logic; + lm_din : in T_DATA_REC; + lm_dout : out T_DATA_REC; + lm_addr : out std_logic_vector(NLEVELS-1 downto 0); + lm_we : out std_logic; + rm_din : in T_DATA_REC; + rm_dout : out T_DATA_REC; + rm_addr : out std_logic_vector(NLEVELS-1 downto 0); + rm_we : out std_logic; + up_in : in std_logic; + up_in_val : in T_DATA_REC; + up_in_addr : in std_logic_vector(NLEVELS-1 downto 0); + up_out : out std_logic; + up_out_val : out T_DATA_REC; + up_out_addr : out std_logic_vector(NLEVELS-1 downto 0); + low_out : out std_logic; + low_out_val : out T_DATA_REC; + low_out_addr : out std_logic_vector(NLEVELS-1 downto 0); + low_in : in std_logic; + low_in_val : in T_DATA_REC; + low_in_addr : in std_logic_vector(NLEVELS-1 downto 0); + clk : in std_logic; + clk_en : in std_logic; + ready_in : in std_logic; + ready_out : out std_logic; + rst_n : in std_logic); + end component; + + -- Create signals for address buses + -- Some of them will remain unused. + subtype T_SORT_BUS_ADDR is std_logic_vector(NLEVELS-1 downto 0); + type T_SORT_ADDR_BUSES is array (NLEVELS downto 0) of T_SORT_BUS_ADDR; + signal low_addr, up_addr, addr_dr, addr_dl, addr_u : T_SORT_ADDR_BUSES := (others => (others => '0')); + type T_SORT_DATA_BUSES is array (NLEVELS downto 0) of T_DATA_REC; + signal up_update_path, low_update_path, data_d, data_dl, data_dr, data_u : T_SORT_DATA_BUSES := (others => DATA_REC_INIT_DATA); + signal q_dr, q_dl, q_u, q_ul, q_ur : T_SORT_DATA_BUSES := (others => DATA_REC_INIT_DATA); + signal we_ul, we_ur, we_u, we_dl, we_dr, low_update, up_update, s_ready : std_logic_vector(NLEVELS downto 0) := (others => '0'); + signal addr_switch, addr_switch_del : std_logic_vector(NLEVELS downto 0); + signal l0_reg : T_DATA_REC; + signal clk_en : std_logic := '1'; + + function f_sel_bram_style ( + addrwidth : natural ) + return string is + begin + if addrwidth >= 4 then + return "block"; + else + return "distributed"; + end if; + end function f_sel_bram_style; + +begin -- sorter_sys_arch1 + +-- Build the sorting tree + + g1 : for i in 0 to NLEVELS-1 generate + + -- Two RAMs from the upper level are seen as a single RAM + -- We use the most significant bit (i-th bit) to distinguish RAM + -- In all RAMs the A-ports are used for upstream connections + -- and the B-ports are used for downstream connections + + -- Below are processes used to combine two upstream RAMs in a single one + i0a : if i >= 1 generate + addr_switch(i) <= addr_u(i)(i-1); + end generate i0a; + i0b : if i = 0 generate + addr_switch(i) <= '0'; + end generate i0b; + + -- There is a problem with reading of data provided by two upstream RAMs + -- we need to multiplex the data... + -- Delay for read data multiplexer + s1 : process (clk, rst_n) + begin -- process s1 + if rst_n = '0' then -- asynchronous reset (active low) + addr_switch_del(i) <= '0'; + elsif clk'event and clk = '1' then -- rising clock edge + addr_switch_del(i) <= addr_switch(i); + end if; + end process s1; + + -- Upper RAM signals' multiplexer + c1 : process (addr_switch, addr_switch_del, q_ul, q_ur, we_u) + begin -- process c1 + we_ul(i) <= '0'; + we_ur(i) <= '0'; + if addr_switch(i) = '1' then + we_ul(i) <= we_u(i); + else + we_ur(i) <= we_u(i); + end if; + if addr_switch_del(i) = '1' then + q_u(i) <= q_ul(i); + else + q_u(i) <= q_ur(i); + end if; + end process c1; + + dp_ram_l : sort_dp_ram + generic map ( + NLEVELS => NLEVELS, + ADDR_WIDTH => i, + NAME => "L", + RAM_STYLE_G => f_sel_bram_style(i)) + port map ( + clk => clk, + addr_a => addr_dl(i), + addr_b => addr_u(i+1), + data_a => data_dl(i), + data_b => data_u(i+1), + we_a => we_dl(i), + we_b => we_ul(i+1), + q_a => q_dl(i), + q_b => q_ul(i+1)); + + dp_ram_r : sort_dp_ram + generic map ( + NLEVELS => NLEVELS, + ADDR_WIDTH => i, + NAME => "R", + RAM_STYLE_G => f_sel_bram_style(i)) + port map ( + clk => clk, + addr_a => addr_dr(i), + addr_b => addr_u(i+1), + data_a => data_dr(i), + data_b => data_u(i+1), + we_a => we_dr(i), + we_b => we_ur(i+1), + q_a => q_dr(i), + q_b => q_ur(i+1)); + + sorter_ctrl_1 : sorter_ctrl + generic map ( + NLEVELS => NLEVELS, + NADDRBITS => i) + port map ( + tm_din => q_u(i), + tm_dout => data_u(i), + tm_addr => addr_u(i), + tm_we => we_u(i), + lm_din => q_dl(i), + lm_dout => data_dl(i), + lm_addr => addr_dl(i), + lm_we => we_dl(i), + rm_din => q_dr(i), + rm_dout => data_dr(i), + rm_addr => addr_dr(i), + rm_we => we_dr(i), + up_in => up_update(i), + up_in_val => up_update_path(i), + up_in_addr => up_addr(i), + up_out => low_update(i), + up_out_val => low_update_path(i), + up_out_addr => low_addr(i), + low_in => low_update(i+1), + low_in_val => low_update_path(i+1), + low_in_addr => low_addr(i+1), + low_out => up_update(i+1), -- connections to the next level + low_out_val => up_update_path(i+1), + low_out_addr => up_addr(i+1), + clk => clk, + clk_en => clk_en, + ready_in => s_ready(i+1), + ready_out => s_ready(i), + rst_n => rst_n); + + end generate g1; + -- top level + + -- On the top level we have only a single register + process (clk, rst_n) + variable rline : line; + begin -- process + if rst_n = '0' then -- asynchronous reset (active low) + l0_reg <= DATA_REC_INIT_DATA; + dav <= '0'; + elsif clk'event and clk = '1' then -- rising clock edge + dav <= '0'; + if we_u(0) = '1' then + l0_reg <= data_u(0); + dout <= data_u(0); + dav <= '1'; + --if SORT_DEBUG then + -- write(rline, string'("OUT: ")); + -- write(rline, tdrec2stlv(data_u(0))); + -- writeline(reports, rline); + --end if; + elsif we = '1' then + --if SORT_DEBUG then + -- write(rline, string'("IN: ")); + -- write(rline, tdrec2stlv(din)); + -- writeline(reports, rline); + --end if; + l0_reg <= din; + -- dout <= din; + -- else + -- dout <= l0_reg; + end if; + end if; + end process; + ready <= s_ready(0); + q_ur(0) <= l0_reg; + q_ul(0) <= l0_reg; + up_update(0) <= we; + up_update_path(0) <= din; + up_addr(0) <= (others => '0'); + + -- signals for the last level + + s_ready(NLEVELS) <= '1'; + --addr(NLEVELS) <= (others => '0'); + data_dr(NLEVELS) <= DATA_REC_INIT_DATA; + data_dl(NLEVELS) <= DATA_REC_INIT_DATA; + we_dl(NLEVELS) <= '0'; + we_dr(NLEVELS) <= '0'; + + low_update(NLEVELS) <= '0'; + low_update_path(NLEVELS) <= DATA_REC_INIT_DATA; + low_addr(0) <= (others => '0'); + +end sorter_sys_arch1; Index: high_speed_pipelined_4clk_per_word/src/sorter_sys_tb.vhd =================================================================== --- high_speed_pipelined_4clk_per_word/src/sorter_sys_tb.vhd (nonexistent) +++ high_speed_pipelined_4clk_per_word/src/sorter_sys_tb.vhd (revision 5) @@ -0,0 +1,160 @@ +------------------------------------------------------------------------------- +-- Title : Testbench for design "heap-sorter" +-- Project : heap-sorter +------------------------------------------------------------------------------- +-- File : sorter_sys_tb.vhd +-- Author : Wojciech M. Zabolotny +-- Company : +-- Created : 2010-05-14 +-- Last update: 2018-03-11 +-- Platform : +-- Standard : VHDL'93 +------------------------------------------------------------------------------- +-- Description: +------------------------------------------------------------------------------- +-- Copyright (c) 2010 Wojciech M. Zabolotny +-- This file is published under the BSD license, so you can freely adapt +-- it for your own purposes. +-- Additionally this design has been described in my article: +-- Wojciech M. Zabolotny, "Dual port memory based Heapsort implementation +-- for FPGA", Proc. SPIE 8008, 80080E (2011); doi:10.1117/12.905281 +-- I'd be glad if you cite this article when you publish something based +-- on my design. +------------------------------------------------------------------------------- +-- Revisions : +-- Date Version Author Description +-- 2010-05-14 1.0 wzab Created +------------------------------------------------------------------------------- + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; +use ieee.std_logic_textio.all; +use std.textio.all; +library work; +use work.sys_config.all; +use work.sorter_pkg.all; + + +------------------------------------------------------------------------------- + +entity sorter_sys_tb is + +end entity sorter_sys_tb; + +------------------------------------------------------------------------------- + +architecture sort_tb_beh of sorter_sys_tb is + + constant NLEVELS : integer := SYS_NLEVELS; + -- component ports + signal din : T_DATA_REC := DATA_REC_INIT_DATA; + signal dout : T_DATA_REC; + signal we : std_logic := '0'; + signal dav : std_logic := '0'; + signal rst_n : std_logic := '0'; + signal ready : std_logic := '0'; + + component sorter_sys + generic ( + NADDRBITS : integer); + port ( + din : in T_DATA_REC; + we : in std_logic; + dout : out T_DATA_REC; + dav : out std_logic; + clk : in std_logic; + rst_n : in std_logic; + ready : out std_logic); + end component; + -- clock + signal Clk : std_logic := '1'; + --signal Clk2 : std_logic := '1'; + + signal end_sim : boolean := false; + signal div : integer range 0 to 8 := 0; + +begin -- architecture sort_tb_beh + + -- component instantiation + DUT : entity work.sorter_sys + generic map ( + NLEVELS => NLEVELS) + port map ( + din => din, + we => we, + dout => dout, + dav => dav, + clk => clk, + rst_n => rst_n, + ready => ready); + + -- clock generation + --Clk2 <= not Clk2 after 5 ns when end_sim = false else '0'; + Clk <= not Clk after 10 ns when end_sim = false else '0'; + + -- waveform generation + WaveGen_Proc : process + file events_in : text open read_mode is "./events.in"; + variable input_line : line; + file events_out : text open write_mode is "./events.out"; + variable output_line : line; + variable rec : T_DATA_REC; + variable skey : std_logic_vector(DATA_REC_SORT_KEY_WIDTH-1 downto 0); + variable spayload : std_logic_vector(DATA_REC_PAYLOAD_WIDTH-1 downto 0); + begin + -- insert signal assignments here + + wait until Clk = '1'; + wait for 31 ns; + rst_n <= '1'; + wait until ready = '1'; + loop + wait until Clk = '0'; + wait until Clk = '1'; + we <= '0'; + if div = 3 then + div <= 0; + exit when endfile(events_in); + readline(events_in, input_line); + read(input_line, rec.init); + read(input_line, rec.valid); + read(input_line, skey); + read(input_line, spayload); + rec.d_key := unsigned(skey); + rec.d_payload := spayload; + din <= rec; + we <= '1'; + else + div <= div+1; + end if; + if dav = '1' then + -- Process read event + rec := dout; + write(output_line, rec.init); + write(output_line, rec.valid); + write(output_line,string'(" ")); + write(output_line, std_logic_vector(rec.d_key)); + write(output_line,string'(" ")); + write(output_line, std_logic_vector(rec.d_payload)); + writeline(events_out, output_line); + end if; + end loop; + end_sim <= true; + rec.valid := '0'; + din <= rec; + wait; + end process WaveGen_Proc; + + + +end architecture sort_tb_beh; + +------------------------------------------------------------------------------- + +configuration sorter_sys_tb_sort_tb_beh_cfg of sorter_sys_tb is + for sort_tb_beh + end for; +end sorter_sys_tb_sort_tb_beh_cfg; + +------------------------------------------------------------------------------- Index: high_speed_pipelined_4clk_per_word/src/sys_config.vhd =================================================================== --- high_speed_pipelined_4clk_per_word/src/sys_config.vhd (nonexistent) +++ high_speed_pipelined_4clk_per_word/src/sys_config.vhd (revision 5) @@ -0,0 +1,9 @@ +library ieee; +use ieee.std_logic_1164.all; +library work; +package sys_config is + constant SORT_DEBUG : boolean :=false; + constant SYS_NLEVELS : integer :=10; + constant DATA_REC_SORT_KEY_WIDTH : integer :=18; + constant DATA_REC_PAYLOAD_WIDTH : integer :=18; +end sys_config; Index: standard_version/comp/dummy.txt =================================================================== --- standard_version/comp/dummy.txt (nonexistent) +++ standard_version/comp/dummy.txt (revision 5) @@ -0,0 +1 @@ +This file is here to ensure creation of comp directory \ No newline at end of file Index: standard_version/makefile =================================================================== --- standard_version/makefile (nonexistent) +++ standard_version/makefile (revision 5) @@ -0,0 +1,31 @@ +VHDLS = \ + src/sys_config.vhd \ + src/sorter_pkg.vhd \ + src/dpram4.vhd \ + src/sort_dpram.vhd \ + src/sorter_ctrl.vhd \ + src/sorter_sys.vhd \ + src/sorter_sys_tb.vhd \ + +#STD=standard +STD=synopsys +VSTD=93c +ENTITY=sorter_sys_tb +RUN_OPTIONS= +all: events.in events.out test +events.in: sort_test_gen.py + ./sort_test_gen.py +reader: ${ENTITY} ${ENTITY}.ghw + gtkwave ${ENTITY}.ghw ${ENTITY}.sav +${ENTITY}: ${VHDLS} +# vhdlp -work fmf fmf/*.vhd + ghdl -a --workdir=comp --std=${VSTD} --ieee=${STD} ${VHDLS} + ghdl -e --workdir=comp --std=${VSTD} -fexplicit --ieee=${STD} ${ENTITY} +events.out: ${ENTITY} events.in +# ./${ENTITY} --wave=${ENTITY}.ghw ${RUN_OPTIONS} --stop-time=50000ns 2>&1 > res.txt + ./${ENTITY} ${RUN_OPTIONS} 2>&1 > res.txt +test: + ./sort_test_check.py +clean: + rm -f comp/* *.o *.vcd *.ghw events* ${ENTITY} + Index: standard_version/sort_test_check.py =================================================================== --- standard_version/sort_test_check.py (nonexistent) +++ standard_version/sort_test_check.py (revision 5) @@ -0,0 +1,46 @@ +#!/usr/bin/python +# This Python script checks if the records were sorted +# correctly... +import sys +# We read the input records and store them in one vector +fi=open("events.in","r") +ri=fi.read().split("\n") +ri=[i.split(" ") for i in ri] +# Leave only valid records +ri=[i for i in ri if len(i)==3 and i[0]=="01"] +# We read the output vectors and store them in a second vector +fo=open("events.out","r") +ro=fo.read().split("\n") +ro=[i.split(" ") for i in ro ] +# Leave only valid records +ro=[i for i in ro if len(i)==3 and i[0]=="01"] +# We check if the output vectors are correctly sorted +for i in range(1,len(ro)): + # Theoretically we could simply check the condition: + # int(ro[i-1][1],2) <= int(ro[i][1],2) + # However for longer sequences we may need to + # consider the fact that sort keys (time stamps) + # will wrap around. + # Therefore we need to perform slightly more + # complicated test - if we use N bits to store + # the sort key, then we need to subtract keys modulo + # 2**N and if the difference is in range (0,2**(N-1)] + # we consider the difference positive, while in range + # (2**(N-1),(2**N)-1) we consider it negative. + k1 = ro[i-1][1] + k2 = ro[i][1] + dlim = 1< dlim/2: + print "Records unsorted!\n" + print str(i-1)+": "+str(ro[i-1]) + print str(i)+": "+str(ro[i]) + sys.exit(1) +# We check if all input vectors were transferred to the output +# Now we only check size of vectors +if len(ro) != len(ri): + print "Not all records transferred!\n" + sys.exit(1) +print "Test passed!\n" +sys.exit(0) +
standard_version/sort_test_check.py Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: standard_version/sort_test_gen.py =================================================================== --- standard_version/sort_test_gen.py (nonexistent) +++ standard_version/sort_test_gen.py (revision 5) @@ -0,0 +1,70 @@ +#!/usr/bin/python +# +# This Python script generates the input patterns for the sorter +# It also generates the sys_config.vhd file with constants +# describing structure of data records +# +# You can customize constants below +key_width = 8 # Width of the key part of the record +pay_width = 4 # Width of the payload part of the record +max_dist = 63 # Maximum distance between unsorted records +seq_len = 2000 # Length of the generated sequence +sort_debug = "true" #uncomment this, or the next line +sort_debug = "false" #alwayse set sort_debug to false for synthesis! +# +# The algorithm is very simple - we just generate the sequence +# of records with continuously increasing key numbers +# Then we increase the key number in each record by the random +# number, taken from the range: [0,max_dist] +import math +import sys +# calculate necessary amount of levels in the sorter +sys_nlevels=1 +nrec=4 +while nrec ((1<<(key_width-1))-1): + print "Too high maximum distance between unsorted records" + print "for defined width of the sort key. Please increase" + print "the key_width value in the sort_test_gen.py file!" + sys.exit(1) +# Then we prepare the VHDL file with system configuration +sc=open('src/sys_config.vhd','w') +l="library ieee;\n" +l+="use ieee.std_logic_1164.all;\n" +l+="library work;\n" +l+="package sys_config is\n" +l+=" constant SORT_DEBUG : boolean :="+sort_debug+";\n" +l+=" constant SYS_NLEVELS : integer :="+str(sys_nlevels)+";\n" +l+=" constant DATA_REC_SORT_KEY_WIDTH : integer :="+str(key_width)+";\n" +l+=" constant DATA_REC_PAYLOAD_WIDTH : integer :="+str(pay_width)+";\n" +l+="end sys_config;\n" +sc.write(l) +sc.close() +# Generate the input patterns +fo=open('events.in','w') +t=range(1,seq_len+1) +import random +r=random.Random() +r.seed() +t2=[i+r.randint(0,max_dist) for i in t] +# Now let's prepare the input events: +key_format="{0:0"+str(key_width)+"b}" +pay_format="{0:0"+str(pay_width)+"b}" +for i in t2: + j = i & ((1<
standard_version/sort_test_gen.py Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: standard_version/src/dpram4_synth.vhd =================================================================== --- standard_version/src/dpram4_synth.vhd (nonexistent) +++ standard_version/src/dpram4_synth.vhd (revision 5) @@ -0,0 +1,63 @@ +-- Dual port, single clock memory, inferrable in Xilinx and Altera FPGA + +library ieee; +use ieee.std_logic_1164.all; + +entity dp_ram_scl is + + generic + ( + DATA_WIDTH : natural := 8; + ADDR_WIDTH : natural := 6 + ); + + port + ( + clk : in std_logic; + addr_a : in natural range 0 to 2**ADDR_WIDTH - 1; + addr_b : in natural range 0 to 2**ADDR_WIDTH - 1; + data_a : in std_logic_vector((DATA_WIDTH-1) downto 0); + data_b : in std_logic_vector((DATA_WIDTH-1) downto 0); + we_a : in std_logic := '1'; + we_b : in std_logic := '1'; + q_a : out std_logic_vector((DATA_WIDTH -1) downto 0); + q_b : out std_logic_vector((DATA_WIDTH -1) downto 0) + ); + +end dp_ram_scl; + + +architecture rtl of dp_ram_scl is + + -- Create a type for data word + subtype data_word is std_logic_vector((DATA_WIDTH-1) downto 0); + type ram_memory is array((2**ADDR_WIDTH-1) downto 0) of data_word; + + -- Declare the RAM variable. + shared variable ram : ram_memory; + +begin + + process(clk) + begin + if(rising_edge(clk)) then + -- Port B + if(we_b = '1') then + ram(addr_b) := data_b; + end if; + q_b <= ram(addr_b); + end if; + end process; + + process(clk) + begin + if(rising_edge(clk)) then + -- Port A + if(we_a = '1') then + ram(addr_a) := data_a; + end if; + q_a <= ram(addr_a); + end if; + end process; + +end rtl; Index: standard_version/src/sorter_ctrl.vhd =================================================================== --- standard_version/src/sorter_ctrl.vhd (nonexistent) +++ standard_version/src/sorter_ctrl.vhd (revision 5) @@ -0,0 +1,324 @@ +------------------------------------------------------------------------------- +-- Title : Sorting node controller for heap-sorter +-- Project : heap-sorter +------------------------------------------------------------------------------- +-- File : sorter_ctrl.vhd +-- Author : Wojciech M. Zabolotny +-- Company : +-- Created : 2010-05-14 +-- Last update: 2013-07-04 +-- Platform : +-- Standard : VHDL'93 +------------------------------------------------------------------------------- +-- Description: +------------------------------------------------------------------------------- +-- Copyright (c) 2010 Wojciech M. Zabolotny +-- This file is published under the BSD license, so you can freely adapt +-- it for your own purposes. +-- Additionally this design has been described in my article: +-- Wojciech M. Zabolotny, "Dual port memory based Heapsort implementation +-- for FPGA", Proc. SPIE 8008, 80080E (2011); doi:10.1117/12.905281 +-- I'd be glad if you cite this article when you publish something based +-- on my design. +------------------------------------------------------------------------------- +-- Revisions : +-- Date Version Author Description +-- 2010-05-14 1.0 wzab Created +------------------------------------------------------------------------------- +------------------------------------------------------------------------------- +-- The sorter controller is connected with three dual port memories. +-- The first dual port memory tm_... provides the "upstream data" +-- The second dual port memory lm_... provides the "left branch of downstream data" +-- The third dual port memory rm_... provides the "right branch of downstream data" +-- The controller is notified about availability of the new data by the +-- "update" signal. +-- However in this architecture we need to service two upstream memories! +-- That's because we want to save one cycle, and to be able to issue +-- +-- Important feature of each controller is the ability to clear the memory +-- after reset. +------------------------------------------------------------------------------- +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; +use ieee.std_logic_textio.all; +use std.textio.all; +library work; +use work.sorter_pkg.all; +use work.sys_config.all; + +entity sorter_ctrl is + + generic ( + NLEVELS : integer; -- number of levels (max number of + -- address bits + NADDRBITS : integer -- number of used address bits + ); + + port ( + -- Top memory connections + tm_din : in T_DATA_REC; + tm_dout : out T_DATA_REC; + tm_addr : out std_logic_vector(NLEVELS-1 downto 0); + tm_we : out std_logic; + -- Left memory connections + lm_din : in T_DATA_REC; + lm_dout : out T_DATA_REC; + lm_addr : out std_logic_vector(NLEVELS-1 downto 0); + lm_we : out std_logic; + -- Right memory connections + rm_din : in T_DATA_REC; + rm_dout : out T_DATA_REC; + rm_addr : out std_logic_vector(NLEVELS-1 downto 0); + rm_we : out std_logic; + -- Upper level controller connections + up_in : in std_logic; + up_in_val : in T_DATA_REC; + up_in_addr : in std_logic_vector(NLEVELS-1 downto 0); + -- Upper level update notifier + up_out : out std_logic; + up_out_val : out T_DATA_REC; + up_out_addr : out std_logic_vector(NLEVELS-1 downto 0); + -- Lower level controller connections + low_out : out std_logic; + low_out_val : out T_DATA_REC; + low_out_addr : out std_logic_vector(NLEVELS-1 downto 0); + low_in : in std_logic; + low_in_val : in T_DATA_REC; + low_in_addr : in std_logic_vector(NLEVELS-1 downto 0); + -- Lower level update notifier + -- System connections + clk : in std_logic; + clk_en : in std_logic; + ready_in : in std_logic; + ready_out : out std_logic; -- signals, when memory is cleared + -- after reset + rst_n : in std_logic); +end sorter_ctrl; + +architecture sorter_ctrl_arch1 of sorter_ctrl is + + type T_CTRL_STATE is (CTRL_RESET, CTRL_CLEAR, CTRL_IDLE, CTRL_S1, CTRL_S0); + signal ctrl_state, ctrl_state_next : T_CTRL_STATE := CTRL_IDLE; + signal addr, addr_i : std_logic_vector(NLEVELS-1 downto 0); + signal s_low_in_addr, s_low_in_addr_i : std_logic_vector(NLEVELS-1 downto 0); + signal s_up_in_addr, s_up_in_addr_i : std_logic_vector(NLEVELS-1 downto 0); + signal s_ready_out, s_ready_out_i : std_logic; + signal s_low_in, s_low_in_i : std_logic; + signal s_addr_out : std_logic_vector(NLEVELS-1 downto 0); + signal s_tm_dout : T_DATA_REC; + signal s_up_in_val_i, s_up_in_val : T_DATA_REC := DATA_REC_INIT_DATA; + signal s_low_in_val_i, s_low_in_val : T_DATA_REC := DATA_REC_INIT_DATA; + + + constant ADDR_MAX : std_logic_vector(NLEVELS-1 downto 0) := std_logic_vector(to_unsigned(2**NADDRBITS-1, NLEVELS)); + +begin + + tm_dout <= s_tm_dout; +-- We have the two-process state machine. + p1 : process (addr, ctrl_state, lm_din, low_in, low_in_addr, low_in_val, + ready_in, rm_din, s_addr_out, s_low_in, s_low_in_addr, + s_low_in_val, s_ready_out, s_up_in_val, up_in, up_in_addr, + up_in_val) + variable rline : line; + variable l_val : T_DATA_REC; + variable r_val : T_DATA_REC; + + begin -- process p1 + -- defaults + ctrl_state_next <= ctrl_state; + tm_we <= '0'; + rm_we <= '0'; + lm_we <= '0'; + lm_addr <= (others => '0'); + rm_addr <= (others => '0'); + tm_addr <= (others => '0'); + s_ready_out_i <= s_ready_out; + addr_i <= addr; + up_out_val <= DATA_REC_INIT_DATA; -- to avoid latches + low_out_val <= DATA_REC_INIT_DATA; -- to avoid latches + s_low_in_addr_i <= s_low_in_addr; + s_low_in_i <= low_in; + low_out <= '0'; + up_out <= '0'; + up_out_addr <= (others => '0'); + s_up_in_val_i <= s_up_in_val; + s_low_in_val_i <= s_low_in_val; + lm_dout <= DATA_REC_INIT_DATA; + rm_dout <= DATA_REC_INIT_DATA; + s_tm_dout <= DATA_REC_INIT_DATA; + s_addr_out <= (others => '0'); + case ctrl_state is + when CTRL_RESET => + addr_i <= (others => '0'); + s_ready_out_i <= '0'; + ctrl_state_next <= CTRL_CLEAR; + when CTRL_CLEAR => + lm_addr <= addr; + rm_addr <= addr; + lm_dout <= DATA_REC_INIT_DATA; + rm_dout <= DATA_REC_INIT_DATA; + lm_we <= '1'; + rm_we <= '1'; + if addr = ADDR_MAX then + if ready_in = '1' then + s_ready_out_i <= '1'; + ctrl_state_next <= CTRL_IDLE; + end if; + else + addr_i <= std_logic_vector(unsigned(addr)+1); + end if; + when CTRL_IDLE => + -- We read "down" memories ("upper" value is provided by the ``bypass channel'') + if up_in = '1' then + ctrl_state_next <= CTRL_S1; + tm_addr <= up_in_addr; + lm_addr <= up_in_addr; + rm_addr <= up_in_addr; + addr_i <= up_in_addr; + s_up_in_val_i <= up_in_val; + if low_in = '1' then + s_low_in_val_i <= low_in_val; + s_low_in_addr_i <= low_in_addr; + end if; + end if; + when CTRL_S1 => + -- In this cycle we can compare data + -- Debug output! + if SORT_DEBUG then + write(rline, string'("CMP ")); + write(rline, NADDRBITS); + write(rline, string'(" U:")); + wrstlv(rline, tdrec2stlv(s_up_in_val)); + end if; + l_val := lm_din; + r_val := rm_din; + -- Check, if we need to take value from lower ``bypass channel'' + if s_low_in = '1' then + if SORT_DEBUG then + write(rline, string'(" x! ")); + end if; + if (addr(NADDRBITS-1 downto 0) = s_low_in_addr(NADDRBITS-1 downto 0)) then + -- We are reading a value which was just updated, so we need to get it + -- from ``bypass channel'' instead of memory + if SORT_DEBUG then + write(rline, string'(" y! ")); + end if; + if s_low_in_addr(NADDRBITS) = '1' then + l_val := s_low_in_val; + else + r_val := s_low_in_val; + end if; + end if; + end if; + if SORT_DEBUG then + write(rline, string'(" L:")); + wrstlv(rline, tdrec2stlv(l_val)); + write(rline, string'(" R:")); + wrstlv(rline, tdrec2stlv(r_val)); + write(rline, string'(" A:")); + end if; + if sort_cmp_lt(l_val, s_up_in_val) and sort_cmp_lt(l_val, r_val) then + -- The L-ram value is the smallest + -- Output the value from the L-ram and put the new value into the L-ram + s_tm_dout <= l_val; + tm_addr <= addr; + tm_we <= '1'; + + up_out_val <= l_val; + up_out <= '1'; + up_out_addr <= addr; + + lm_addr <= addr; + lm_dout <= s_up_in_val; + lm_we <= '1'; + + low_out <= '1'; + low_out_val <= s_up_in_val; + s_addr_out(NADDRBITS) <= '1'; + + if NADDRBITS > 0 then + s_addr_out(NADDRBITS-1 downto 0) <= addr(NADDRBITS-1 downto 0); + end if; + wrstlv(rline, s_addr_out); + ctrl_state_next <= CTRL_IDLE; + if SORT_DEBUG then + write(rline, string'(" T<->L")); + end if; + elsif sort_cmp_lt(r_val, s_up_in_val) then + -- The R-ram value is the smallest + -- Output the value from the R-ram and put the new value into the R-ram + s_tm_dout <= r_val; + tm_addr <= addr; + tm_we <= '1'; + + up_out_val <= r_val; + up_out <= '1'; + up_out_addr <= addr; + + rm_addr <= addr; + rm_dout <= s_up_in_val; + rm_we <= '1'; + + low_out <= '1'; + low_out_val <= s_up_in_val; + + s_addr_out(NADDRBITS) <= '0'; + if NADDRBITS > 0 then + s_addr_out(NADDRBITS-1 downto 0) <= addr(NADDRBITS-1 downto 0); + end if; + ctrl_state_next <= CTRL_IDLE; + if SORT_DEBUG then + wrstlv(rline, s_addr_out); + write(rline, string'(" T<->R")); + end if; + else + -- The new value is the smallest + -- Nothing to do, no update downstream + s_tm_dout <= s_up_in_val; + tm_we <= '1'; + tm_addr <= addr; + + up_out_val <= s_up_in_val; + up_out <= '1'; + up_out_addr <= addr; + + ctrl_state_next <= CTRL_IDLE; + wrstlv(rline, up_in_addr); + if SORT_DEBUG then + write(rline, string'(" T===T")); + end if; + end if; + if SORT_DEBUG then + writeline(reports, rline); + end if; + when others => null; + end case; + end process p1; + + p2 : process (clk, rst_n) is + begin -- process p2 + if rst_n = '0' then -- asynchronous reset (active low) + ctrl_state <= CTRL_RESET; + s_ready_out <= '0'; + addr <= (others => '0'); + s_low_in_addr <= (others => '0'); + s_low_in <= '0'; + s_low_in_val <= DATA_REC_INIT_DATA; + s_up_in_val <= DATA_REC_INIT_DATA; + --update_out <= '0'; + --addr_out <= (others => '0'); + elsif clk'event and clk = '1' then -- rising clock edge + s_ready_out <= s_ready_out_i; + ctrl_state <= ctrl_state_next; + addr <= addr_i; + s_low_in_addr <= s_low_in_addr_i; + s_low_in_val <= s_low_in_val_i; + s_up_in_val <= s_up_in_val_i; + s_low_in <= s_low_in_i; + end if; + end process p2; + ready_out <= s_ready_out; + low_out_addr <= s_addr_out; +end sorter_ctrl_arch1; Index: standard_version/src/dpram_inf.vhd =================================================================== --- standard_version/src/dpram_inf.vhd (nonexistent) +++ standard_version/src/dpram_inf.vhd (revision 5) @@ -0,0 +1,60 @@ +-- A parameterized, inferable, true dual-port, common-clock block RAM in VHDL. +-- Original file was taken from: http://danstrother.com/2010/09/11/inferring-rams-in-fpgas/ +-- No license information were provided by the original author. +-- Minimal modifications were introduced by me to make it suitable for my sorter. + +library ieee; +use ieee.std_logic_1164.all; +use ieee.std_logic_unsigned.all; + +entity dp_ram_scl is + generic ( + DATA_WIDTH : integer := 72; + ADDR_WIDTH : integer := 10 + ); + port ( +-- common clock + clk : in std_logic; + -- Port A + we_a : in std_logic; + addr_a : in std_logic_vector(ADDR_WIDTH-1 downto 0); + data_a : in std_logic_vector(DATA_WIDTH-1 downto 0); + q_a : out std_logic_vector(DATA_WIDTH-1 downto 0); + + -- Port B + we_b : in std_logic; + addr_b : in std_logic_vector(ADDR_WIDTH-1 downto 0); + data_b : in std_logic_vector(DATA_WIDTH-1 downto 0); + q_b : out std_logic_vector(DATA_WIDTH-1 downto 0) + ); +end dp_ram_scl; + +architecture rtl of dp_ram_scl is + -- Shared memory + type mem_type is array ((2**ADDR_WIDTH)-1 downto 0) of std_logic_vector(DATA_WIDTH-1 downto 0); + shared variable mem : mem_type; +begin + +-- Port A + process(clk) + begin + if(clk'event and clk = '1') then + if(we_a = '1') then + mem(conv_integer(addr_a)) := data_a; + end if; + q_a <= mem(conv_integer(addr_a)); + end if; + end process; + +-- Port B + process(clk) + begin + if(clk'event and clk = '1') then + if(we_b = '1') then + mem(conv_integer(addr_b)) := data_b; + end if; + q_b <= mem(conv_integer(addr_b)); + end if; + end process; + +end rtl; Index: standard_version/src/sorter_pkg.vhd =================================================================== --- standard_version/src/sorter_pkg.vhd (nonexistent) +++ standard_version/src/sorter_pkg.vhd (revision 5) @@ -0,0 +1,214 @@ +------------------------------------------------------------------------------- +-- Title : Definitions for heap-sorter +-- Project : heap-sorter +------------------------------------------------------------------------------- +-- File : sorter_pkg.vhd +-- Author : Wojciech M. Zabolotny +-- Company : +-- Created : 2010-05-14 +-- Last update: 2011-07-11 +-- Platform : +-- Standard : VHDL'93 +------------------------------------------------------------------------------- +-- Description: +------------------------------------------------------------------------------- +-- Copyright (c) 2010 Wojciech M. Zabolotny +-- This file is published under the BSD license, so you can freely adapt +-- it for your own purposes. +-- Additionally this design has been described in my article: +-- Wojciech M. Zabolotny, "Dual port memory based Heapsort implementation +-- for FPGA", Proc. SPIE 8008, 80080E (2011); doi:10.1117/12.905281 +-- I'd be glad if you cite this article when you publish something based +-- on my design. +------------------------------------------------------------------------------- +-- Revisions : +-- Date Version Author Description +-- 2010-05-14 1.0 wzab Created +------------------------------------------------------------------------------- +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; +use ieee.std_logic_textio.all; +use std.textio.all; +library work; +use work.sys_config.all; + +package sorter_pkg is + constant DATA_REC_WIDTH : integer := DATA_REC_SORT_KEY_WIDTH + + DATA_REC_PAYLOAD_WIDTH + 2; + + + subtype T_SORT_KEY is unsigned (DATA_REC_SORT_KEY_WIDTH - 1 downto 0); + subtype T_PAYLOAD is std_logic_vector(DATA_REC_PAYLOAD_WIDTH - 1 downto 0); + + --alias T_SORT_KEY is unsigned (12 downto 0); + type T_DATA_REC is record + d_key : T_SORT_KEY; + init : std_logic; + valid : std_logic; + d_payload : T_PAYLOAD; + end record; + + -- Special constant used to initially fill the sorter + -- Must be sorted so, that is smaller, than any other data + constant DATA_REC_INIT_DATA : T_DATA_REC := ( + d_key => to_unsigned(0, DATA_REC_SORT_KEY_WIDTH), + init => '1', + valid => '0', + d_payload => (others => '0') + ); + + -- Special constant used to ``flush'' the sorter at the end + constant DATA_REC_END_DATA : T_DATA_REC := ( + d_key => to_unsigned(0, DATA_REC_SORT_KEY_WIDTH), + init => '1', + valid => '1', + d_payload => (others => '0') + ); + + + function sort_cmp_lt ( + constant v1 : T_DATA_REC; + constant v2 : T_DATA_REC) + return boolean; + + function tdrec2stlv ( + constant drec : T_DATA_REC) + return std_logic_vector; + + function stlv2tdrec ( + constant dstlv : std_logic_vector) + return T_DATA_REC; + + procedure wrstlv ( + rline : inout line; + constant vect : std_logic_vector); + + file reports : text open write_mode is "STD_OUTPUT"; + +end sorter_pkg; + +package body sorter_pkg is + + function stlv2tdrec ( + constant dstlv : std_logic_vector) + return T_DATA_REC is + variable result : T_DATA_REC; + variable j : integer := 0; + begin -- stlv2drec + j := 0; + result.d_key := unsigned(dstlv(j-1+DATA_REC_SORT_KEY_WIDTH downto j)); + j := j+DATA_REC_SORT_KEY_WIDTH; + result.valid := dstlv(j); + j := j+1; + result.init := dstlv(j); + j := j+1; + result.d_payload := dstlv(j-1+DATA_REC_PAYLOAD_WIDTH downto j); + j := j+DATA_REC_PAYLOAD_WIDTH; + return result; + end stlv2tdrec; + + function tdrec2stlv ( + constant drec : T_DATA_REC) + return std_logic_vector is + variable result : std_logic_vector(DATA_REC_WIDTH-1 downto 0); + variable j : integer := 0; + begin -- tdrec2stlv + j := 0; + result(j-1+DATA_REC_SORT_KEY_WIDTH downto j) := std_logic_vector(drec.d_key); + j := j+DATA_REC_SORT_KEY_WIDTH; + result(j) := drec.valid; + j := j+1; + result(j) := drec.init; + j := j+1; + result(j-1+DATA_REC_PAYLOAD_WIDTH downto j) := std_logic_vector(drec.d_payload); + j := j+DATA_REC_PAYLOAD_WIDTH; + return result; + end tdrec2stlv; + + + -- Function sort_cmp_lt returns TRUE when the first opperand is ``less'' than + -- the second one + function sort_cmp_lt ( + constant v1 : T_DATA_REC; + constant v2 : T_DATA_REC) + return boolean is + variable rline : line; + variable dcomp : unsigned(DATA_REC_SORT_KEY_WIDTH-1 downto 0) := (others => '0'); + begin -- sort_cmp_lt + -- Check the special cases + if (v1.init = '1') and (v2.init = '0') then + -- v1 is the special record, v2 is the standard one + if v1.valid = '0' then + -- initialization record - ``smaller'' than all standard records + return true; + else + -- end record - ``bigger'' than all standard records + return false; + end if; + elsif (v1.init = '0') and (v2.init = '1') then + -- v2 is the special record, v1 is the standard one + if (v2.valid = '0') then + -- v2 is the initialization record - it is ``smaller'' than standard record v1 + return false; + else + -- v2 is the end record - it is ``bigger'' than standard record v1 + return true; + end if; + elsif (v1.init = '1') and (v2.init = '1') then + -- both v1 and v2 are special records + if (v1.valid = '0') and (v2.valid = '1') then + -- v1 - initial record, v2 - end record + return true; + else + -- v1 is end record, so it is ``bigger'' or ``equal'' to other records + return false; + end if; + elsif (v1.init = '0') and (v2.init = '0') then + -- We compare standard words + -- We must consider the fact, that in longer sequences of data records + -- the sort keys may wrap around + -- therefore we perform subtraction modulo + -- 2**DATA_REC_SORT_KEY_WIDTH and check the MSB + dcomp := v1.d_key-v2.d_key; + if dcomp(DATA_REC_SORT_KEY_WIDTH-1) = '1' then + --if signed(v1.d_key - v2.d_key)<0 then -- old implementation + return true; + elsif v2.d_key = v1.d_key then + if v2.valid = '1' then + return true; + else + -- Empty data records should wait + return false; + end if; + else + return false; + end if; + else + assert false report "Wrong records in sort_cmp_lt" severity error; + return false; + end if; + return false; -- should never happen + end sort_cmp_lt; + + + procedure wrstlv ( + rline : inout line; + constant vect : std_logic_vector) is + begin -- stlv2str + for i in vect'left downto vect'right loop + case vect(i) is + when 'U' => write(rline, string'("u")); + when 'Z' => write(rline, string'("z")); + when 'X' => write(rline, string'("x")); + when 'L' => write(rline, string'("L")); + when 'H' => write(rline, string'("H")); + when '1' => write(rline, string'("1")); + when '0' => write(rline, string'("0")); + when others => write(rline, string'("?")); + end case; + end loop; -- i + end wrstlv; + +end sorter_pkg; + Index: standard_version/src/sorter_sys_tb.vhd =================================================================== --- standard_version/src/sorter_sys_tb.vhd (nonexistent) +++ standard_version/src/sorter_sys_tb.vhd (revision 5) @@ -0,0 +1,158 @@ +------------------------------------------------------------------------------- +-- Title : Testbench for design "heap-sorter" +-- Project : heap-sorter +------------------------------------------------------------------------------- +-- File : sorter_sys_tb.vhd +-- Author : Wojciech M. Zabolotny +-- Company : +-- Created : 2010-05-14 +-- Last update: 2011-07-06 +-- Platform : +-- Standard : VHDL'93 +------------------------------------------------------------------------------- +-- Description: +------------------------------------------------------------------------------- +-- Copyright (c) 2010 Wojciech M. Zabolotny +-- This file is published under the BSD license, so you can freely adapt +-- it for your own purposes. +-- Additionally this design has been described in my article: +-- Wojciech M. Zabolotny, "Dual port memory based Heapsort implementation +-- for FPGA", Proc. SPIE 8008, 80080E (2011); doi:10.1117/12.905281 +-- I'd be glad if you cite this article when you publish something based +-- on my design. +------------------------------------------------------------------------------- +-- Revisions : +-- Date Version Author Description +-- 2010-05-14 1.0 wzab Created +------------------------------------------------------------------------------- + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; +use ieee.std_logic_textio.all; +use std.textio.all; +library work; +use work.sys_config.all; +use work.sorter_pkg.all; + + +------------------------------------------------------------------------------- + +entity sorter_sys_tb is + +end entity sorter_sys_tb; + +------------------------------------------------------------------------------- + +architecture sort_tb_beh of sorter_sys_tb is + + constant NLEVELS : integer := SYS_NLEVELS; + -- component ports + signal din : T_DATA_REC := DATA_REC_INIT_DATA; + signal dout : T_DATA_REC; + signal we : std_logic := '0'; + signal dav : std_logic := '0'; + signal rst_n : std_logic := '0'; + signal ready : std_logic := '0'; + + component sorter_sys + generic ( + NADDRBITS : integer); + port ( + din : in T_DATA_REC; + we : in std_logic; + dout : out T_DATA_REC; + dav : out std_logic; + clk : in std_logic; + rst_n : in std_logic; + ready : out std_logic); + end component; + -- clock + signal Clk : std_logic := '1'; + + signal end_sim : boolean := false; + signal div : integer range 0 to 8 := 0; + +begin -- architecture sort_tb_beh + + -- component instantiation + DUT : entity work.sorter_sys + generic map ( + NLEVELS => NLEVELS) + port map ( + din => din, + we => we, + dout => dout, + dav => dav, + clk => clk, + rst_n => rst_n, + ready => ready); + + -- clock generation + Clk <= not Clk after 10 ns when end_sim = false else '0'; + + -- waveform generation + WaveGen_Proc : process + file events_in : text open read_mode is "events.in"; + variable input_line : line; + file events_out : text open write_mode is "events.out"; + variable output_line : line; + variable rec : T_DATA_REC; + variable skey : std_logic_vector(DATA_REC_SORT_KEY_WIDTH-1 downto 0); + variable spayload : std_logic_vector(DATA_REC_PAYLOAD_WIDTH-1 downto 0); + begin + -- insert signal assignments here + + wait until Clk = '1'; + wait for 31 ns; + rst_n <= '1'; + wait until ready = '1'; + loop + wait until Clk = '0'; + wait until Clk = '1'; + we <= '0'; + if div = 3 then + div <= 0; + exit when endfile(events_in); + readline(events_in, input_line); + read(input_line, rec.init); + read(input_line, rec.valid); + read(input_line, skey); + read(input_line, spayload); + rec.d_key := unsigned(skey); + rec.d_payload := spayload; + din <= rec; + we <= '1'; + else + div <= div+1; + end if; + if dav = '1' then + -- Process read event + rec := dout; + write(output_line, rec.init); + write(output_line, rec.valid); + write(output_line,string'(" ")); + write(output_line, std_logic_vector(rec.d_key)); + write(output_line,string'(" ")); + write(output_line, std_logic_vector(rec.d_payload)); + writeline(events_out, output_line); + end if; + end loop; + end_sim <= true; + rec.valid := '0'; + din <= rec; + wait; + end process WaveGen_Proc; + + + +end architecture sort_tb_beh; + +------------------------------------------------------------------------------- + +configuration sorter_sys_tb_sort_tb_beh_cfg of sorter_sys_tb is + for sort_tb_beh + end for; +end sorter_sys_tb_sort_tb_beh_cfg; + +------------------------------------------------------------------------------- Index: standard_version/src/sys_config.vhd =================================================================== --- standard_version/src/sys_config.vhd (nonexistent) +++ standard_version/src/sys_config.vhd (revision 5) @@ -0,0 +1,9 @@ +library ieee; +use ieee.std_logic_1164.all; +library work; +package sys_config is + constant SORT_DEBUG : boolean :=false; + constant SYS_NLEVELS : integer :=5; + constant DATA_REC_SORT_KEY_WIDTH : integer :=8; + constant DATA_REC_PAYLOAD_WIDTH : integer :=4; +end sys_config; Index: standard_version/src/dpram4.vhd =================================================================== --- standard_version/src/dpram4.vhd (nonexistent) +++ standard_version/src/dpram4.vhd (revision 5) @@ -0,0 +1,87 @@ +-- Simulation model of the dual port RAM (DP RAM) with single clock +-- and with "read-after-write" operation. +-- This file was combined from multiple descriptions and models of dual port RAMs +-- which I was able to find in the Internet and in the documentation provided +-- by vendors like Xilinx or Altera. +-- Therefore the only thing I can do is to publish it as PUBLIC DOMAIN +-- +-- Please note, that for synthesis you should replace this file with +-- another DP RAM wrapper inferring the real DP RAM +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +entity dp_ram_scl is + + generic + ( + DATA_WIDTH : natural; + ADDR_WIDTH : natural + ); + + port + ( + clk : in std_logic; + addr_a : in std_logic_vector(ADDR_WIDTH-1 downto 0); + addr_b : in std_logic_vector(ADDR_WIDTH-1 downto 0); + data_a : in std_logic_vector((DATA_WIDTH-1) downto 0); + data_b : in std_logic_vector((DATA_WIDTH-1) downto 0); + we_a : in std_logic := '1'; + we_b : in std_logic := '1'; + q_a : out std_logic_vector((DATA_WIDTH -1) downto 0); + q_b : out std_logic_vector((DATA_WIDTH -1) downto 0) + ); + +end dp_ram_scl; + +architecture rtl of dp_ram_scl is + + signal v_addr_a : natural range 0 to 2**ADDR_WIDTH - 1; + signal v_addr_b : natural range 0 to 2**ADDR_WIDTH - 1; + subtype word_t is std_logic_vector((DATA_WIDTH-1) downto 0); + type memory_t is array((2**ADDR_WIDTH-1) downto 0) of word_t; + + signal ram : memory_t := (others => x"33"); -- For debugging - initialize + -- simulated RAM with x"33" + +begin + + v_addr_a <= to_integer(unsigned(addr_a(ADDR_WIDTH-1 downto 0))); + v_addr_b <= to_integer(unsigned(addr_b(ADDR_WIDTH-1 downto 0))); + + process(clk) + begin + + if(rising_edge(clk)) then + -- Port A + if(we_a = '1') then + ram(v_addr_a) <= data_a; + -- read-after-write behavior + q_a <= data_a; + else + -- simulate "unknown" value when the same address is written via one port + -- and immediately read via another port + if we_b='1' and v_addr_a=v_addr_b then + q_a <= (others => 'X'); + else + q_a <= ram(v_addr_a); + end if; + end if; + -- Port B + if(we_b = '1') then + ram(v_addr_b) <= data_b; + -- read-after-write behavior + q_b <= data_b; + else + -- simulate "unknown" value when the same address is written via one port + -- and immediately read via another port + if we_a='1' and v_addr_a=v_addr_b then + q_b <= (others => 'X'); + else + q_b <= ram(v_addr_b); + end if; + end if; + end if; + end process; + +end rtl; Index: standard_version/src/sort_dpram.vhd =================================================================== --- standard_version/src/sort_dpram.vhd (nonexistent) +++ standard_version/src/sort_dpram.vhd (revision 5) @@ -0,0 +1,175 @@ +------------------------------------------------------------------------------- +-- Title : Parametrized DP RAM for heap-sorter +-- Project : heap-sorter +------------------------------------------------------------------------------- +-- File : sort_dpram.vhd +-- Author : Wojciech M. Zabolotny +-- Company : +-- Created : 2010-05-14 +-- Last update: 2011-07-06 +-- Platform : +-- Standard : VHDL'93 +------------------------------------------------------------------------------- +-- Description: +------------------------------------------------------------------------------- +-- Copyright (c) 2010 Wojciech M. Zabolotny +-- This file is published under the BSD license, so you can freely adapt +-- it for your own purposes. +-- Additionally this design has been described in my article: +-- Wojciech M. Zabolotny, "Dual port memory based Heapsort implementation +-- for FPGA", Proc. SPIE 8008, 80080E (2011); doi:10.1117/12.905281 +-- I'd be glad if you cite this article when you publish something based +-- on my design. +------------------------------------------------------------------------------- +-- Revisions : +-- Date Version Author Description +-- 2010-05-14 1.0 wzab Created +------------------------------------------------------------------------------- + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; +use ieee.std_logic_textio.all; +use std.textio.all; +library work; +use work.sorter_pkg.all; +use work.sys_config.all; + +entity sort_dp_ram is + + generic + ( + ADDR_WIDTH : natural; + NLEVELS : natural; + NAME : string := "X" + ); + + port + ( + clk : in std_logic; + addr_a : in std_logic_vector(NLEVELS-1 downto 0); + addr_b : in std_logic_vector(NLEVELS-1 downto 0); + data_a : in T_DATA_REC; + data_b : in T_DATA_REC; + we_a : in std_logic; + we_b : in std_logic; + q_a : out T_DATA_REC; + q_b : out T_DATA_REC + ); + +end sort_dp_ram; + +architecture rtl of sort_dp_ram is + + signal vq_a, vq_b, tdata_a, tdata_b : std_logic_vector(DATA_REC_WIDTH-1 downto 0); + signal reg : T_DATA_REC := DATA_REC_INIT_DATA; + + component dp_ram_scl + generic ( + DATA_WIDTH : natural; + ADDR_WIDTH : natural); + port ( + clk : in std_logic; + addr_a : in std_logic_vector(ADDR_WIDTH-1 downto 0); + addr_b : in std_logic_vector(ADDR_WIDTH-1 downto 0); + data_a : in std_logic_vector((DATA_WIDTH-1) downto 0); + data_b : in std_logic_vector((DATA_WIDTH-1) downto 0); + we_a : in std_logic := '1'; + we_b : in std_logic := '1'; + q_a : out std_logic_vector((DATA_WIDTH -1) downto 0); + q_b : out std_logic_vector((DATA_WIDTH -1) downto 0)); + end component; + +begin + + -- Convert our data records int std_logic_vector, so that + -- standard DP RAM may handle it + tdata_a <= tdrec2stlv(data_a); + tdata_b <= tdrec2stlv(data_b); + + + i1 : if ADDR_WIDTH > 0 generate + -- When ADDR_WIDTH is above 0 embed the real DP RAM + -- (even though synthesis tool may still replace it with + -- registers during optimization for low ADDR_WIDTH) + + q_a <= stlv2tdrec(vq_a); + q_b <= stlv2tdrec(vq_b); + + dp_ram_1 : dp_ram_scl + generic map ( + DATA_WIDTH => DATA_REC_WIDTH, + ADDR_WIDTH => ADDR_WIDTH) + port map ( + clk => clk, + addr_a => addr_a(ADDR_WIDTH-1 downto 0), + addr_b => addr_b(ADDR_WIDTH-1 downto 0), + data_a => tdata_a, + data_b => tdata_b, + we_a => we_a, + we_b => we_b, + q_a => vq_a, + q_b => vq_b); + + end generate i1; + + i2 : if ADDR_WIDTH = 0 generate + -- When ADDR_WIDTH is 0, DP RAM should be simply replaced + -- with a register implemented below + + p1 : process (clk) + begin -- process p1 + if clk'event and clk = '1' then -- rising clock edge + if we_a = '1' then + reg <= data_a; + q_a <= data_a; + q_b <= data_a; + elsif we_b = '1' then + reg <= data_b; + q_a <= data_b; + q_b <= data_b; + else + q_a <= reg; + q_b <= reg; + end if; + end if; + end process p1; + + end generate i2; + + dbg1 : if SORT_DEBUG generate + + -- Process monitoring read/write accesses to the memory (only for debugging) + p3 : process (clk) + variable rline : line; + begin -- process p1 + if clk'event and clk = '1' then -- rising clock edge + if(we_a = '1' and we_b = '1') then + write(rline, NAME); + write(rline, ADDR_WIDTH); + write(rline, string'(" Possible write collision!")); + writeline(reports, rline); + end if; + + if we_a = '1' then + write(rline, NAME); + write(rline, ADDR_WIDTH); + write(rline, string'(" WR_A:")); + wrstlv(rline, addr_a(ADDR_WIDTH-1 downto 0)); + write(rline, string'(" VAL:")); + wrstlv(rline, tdata_a); + writeline(reports, rline); + end if; + if we_b = '1' then + write(rline, NAME); + write(rline, ADDR_WIDTH); + write(rline, string'(" WR_B:")); + wrstlv(rline, addr_b(ADDR_WIDTH-1 downto 0)); + write(rline, string'(" VAL:")); + wrstlv(rline, tdata_b); + writeline(reports, rline); + end if; + end if; + end process p3; + end generate dbg1; +end rtl; Index: standard_version/src/sorter_sys.vhd =================================================================== --- standard_version/src/sorter_sys.vhd (nonexistent) +++ standard_version/src/sorter_sys.vhd (revision 5) @@ -0,0 +1,290 @@ +------------------------------------------------------------------------------- +-- Title : Top entity of heap-sorter +-- Project : heap-sorter +------------------------------------------------------------------------------- +-- File : sorter_sys.vhd +-- Author : Wojciech M. Zabolotny +-- Company : +-- Created : 2010-05-14 +-- Last update: 2011-07-11 +-- Platform : +-- Standard : VHDL'93 +------------------------------------------------------------------------------- +-- Description: +------------------------------------------------------------------------------- +-- Copyright (c) 2010 Wojciech M. Zabolotny +-- This file is published under the BSD license, so you can freely adapt +-- it for your own purposes. +-- Additionally this design has been described in my article +-- Additionally this design has been described in my article: +-- Wojciech M. Zabolotny, "Dual port memory based Heapsort implementation +-- for FPGA", Proc. SPIE 8008, 80080E (2011); doi:10.1117/12.905281 +-- I'd be glad if you cite this article when you publish something based +-- on my design. +------------------------------------------------------------------------------- +-- Revisions : +-- Date Version Author Description +-- 2010-05-14 1.0 wzab Created +------------------------------------------------------------------------------- + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; +use ieee.std_logic_textio.all; +use std.textio.all; +library work; +use work.sorter_pkg.all; +use work.sys_config.all; + +entity sorter_sys is + generic ( + NLEVELS : integer := SYS_NLEVELS -- number of levels in the sorter heap + ); + + port ( + din : in T_DATA_REC; + we : in std_logic; + dout : out T_DATA_REC; + dav : out std_logic; + clk : in std_logic; + rst_n : in std_logic; + ready : out std_logic); +end sorter_sys; + +architecture sorter_sys_arch1 of sorter_sys is + + component sort_dp_ram + generic ( + ADDR_WIDTH : natural; + NLEVELS : natural; + NAME : string); + port ( + clk : in std_logic; + addr_a : in std_logic_vector(NLEVELS-1 downto 0); + addr_b : in std_logic_vector(NLEVELS-1 downto 0); + data_a : in T_DATA_REC; + data_b : in T_DATA_REC; + we_a : in std_logic; + we_b : in std_logic; + q_a : out T_DATA_REC; + q_b : out T_DATA_REC); + end component; + + component sorter_ctrl + generic ( + NLEVELS : integer; + NADDRBITS : integer); + port ( + tm_din : in T_DATA_REC; + tm_dout : out T_DATA_REC; + tm_addr : out std_logic_vector(NLEVELS-1 downto 0); + tm_we : out std_logic; + lm_din : in T_DATA_REC; + lm_dout : out T_DATA_REC; + lm_addr : out std_logic_vector(NLEVELS-1 downto 0); + lm_we : out std_logic; + rm_din : in T_DATA_REC; + rm_dout : out T_DATA_REC; + rm_addr : out std_logic_vector(NLEVELS-1 downto 0); + rm_we : out std_logic; + up_in : in std_logic; + up_in_val : in T_DATA_REC; + up_in_addr : in std_logic_vector(NLEVELS-1 downto 0); + up_out : out std_logic; + up_out_val : out T_DATA_REC; + up_out_addr : out std_logic_vector(NLEVELS-1 downto 0); + low_out : out std_logic; + low_out_val : out T_DATA_REC; + low_out_addr : out std_logic_vector(NLEVELS-1 downto 0); + low_in : in std_logic; + low_in_val : in T_DATA_REC; + low_in_addr : in std_logic_vector(NLEVELS-1 downto 0); + clk : in std_logic; + clk_en : in std_logic; + ready_in : in std_logic; + ready_out : out std_logic; + rst_n : in std_logic); + end component; + + -- Create signals for address buses + -- Some of them will remain unused. + subtype T_SORT_BUS_ADDR is std_logic_vector(NLEVELS-1 downto 0); + type T_SORT_ADDR_BUSES is array (NLEVELS downto 0) of T_SORT_BUS_ADDR; + signal low_addr, up_addr, addr_dr, addr_dl, addr_u : T_SORT_ADDR_BUSES := (others => (others => '0')); + type T_SORT_DATA_BUSES is array (NLEVELS downto 0) of T_DATA_REC; + signal up_update_path, low_update_path, data_d, data_dl, data_dr, data_u : T_SORT_DATA_BUSES := (others => DATA_REC_INIT_DATA); + signal q_dr, q_dl, q_u, q_ul, q_ur : T_SORT_DATA_BUSES := (others => DATA_REC_INIT_DATA); + signal we_ul, we_ur, we_u, we_dl, we_dr, low_update, up_update, s_ready : std_logic_vector(NLEVELS downto 0) := (others => '0'); + signal addr_switch, addr_switch_del : std_logic_vector(NLEVELS downto 0); + signal l0_reg : T_DATA_REC; + signal clk_en : std_logic := '1'; + +begin -- sorter_sys_arch1 + +-- Build the sorting tree + + g1 : for i in 0 to NLEVELS-1 generate + + -- Two RAMs from the upper level are seen as a single RAM + -- We use the most significant bit (i-th bit) to distinguish RAM + -- In all RAMs the A-ports are used for upstream connections + -- and the B-ports are used for downstream connections + + -- Below are processes used to combine two upstream RAMs in a single one + i0a : if i >= 1 generate + addr_switch(i) <= addr_u(i)(i-1); + end generate i0a; + i0b : if i = 0 generate + addr_switch(i) <= '0'; + end generate i0b; + + -- There is a problem with reading of data provided by two upstream RAMs + -- we need to multiplex the data... + -- Delay for read data multiplexer + s1 : process (clk, rst_n) + begin -- process s1 + if rst_n = '0' then -- asynchronous reset (active low) + addr_switch_del(i) <= '0'; + elsif clk'event and clk = '1' then -- rising clock edge + addr_switch_del(i) <= addr_switch(i); + end if; + end process s1; + + -- Upper RAM signals' multiplexer + c1 : process (addr_switch, addr_switch_del, q_ul, q_ur, we_u) + begin -- process c1 + we_ul(i) <= '0'; + we_ur(i) <= '0'; + if addr_switch(i) = '1' then + we_ul(i) <= we_u(i); + else + we_ur(i) <= we_u(i); + end if; + if addr_switch_del(i) = '1' then + q_u(i) <= q_ul(i); + else + q_u(i) <= q_ur(i); + end if; + end process c1; + + dp_ram_l : sort_dp_ram + generic map ( + NLEVELS => NLEVELS, + ADDR_WIDTH => i, + NAME => "L") + port map ( + clk => clk, + addr_a => addr_dl(i), + addr_b => addr_u(i+1), + data_a => data_dl(i), + data_b => data_u(i+1), + we_a => we_dl(i), + we_b => we_ul(i+1), + q_a => q_dl(i), + q_b => q_ul(i+1)); + + dp_ram_r : sort_dp_ram + generic map ( + NLEVELS => NLEVELS, + ADDR_WIDTH => i, + NAME => "R") + port map ( + clk => clk, + addr_a => addr_dr(i), + addr_b => addr_u(i+1), + data_a => data_dr(i), + data_b => data_u(i+1), + we_a => we_dr(i), + we_b => we_ur(i+1), + q_a => q_dr(i), + q_b => q_ur(i+1)); + + sorter_ctrl_1 : sorter_ctrl + generic map ( + NLEVELS => NLEVELS, + NADDRBITS => i) + port map ( + tm_din => q_u(i), + tm_dout => data_u(i), + tm_addr => addr_u(i), + tm_we => we_u(i), + lm_din => q_dl(i), + lm_dout => data_dl(i), + lm_addr => addr_dl(i), + lm_we => we_dl(i), + rm_din => q_dr(i), + rm_dout => data_dr(i), + rm_addr => addr_dr(i), + rm_we => we_dr(i), + up_in => up_update(i), + up_in_val => up_update_path(i), + up_in_addr => up_addr(i), + up_out => low_update(i), + up_out_val => low_update_path(i), + up_out_addr => low_addr(i), + low_in => low_update(i+1), + low_in_val => low_update_path(i+1), + low_in_addr => low_addr(i+1), + low_out => up_update(i+1), -- connections to the next level + low_out_val => up_update_path(i+1), + low_out_addr => up_addr(i+1), + clk => clk, + clk_en => clk_en, + ready_in => s_ready(i+1), + ready_out => s_ready(i), + rst_n => rst_n); + + end generate g1; + -- top level + + -- On the top level we have only a single register + process (clk, rst_n) + variable rline : line; + begin -- process + if rst_n = '0' then -- asynchronous reset (active low) + l0_reg <= DATA_REC_INIT_DATA; + elsif clk'event and clk = '1' then -- rising clock edge + dav <= '0'; + if we_u(0) = '1' then + l0_reg <= data_u(0); + dout <= data_u(0); + dav <= '1'; + if SORT_DEBUG then + write(rline, string'("OUT: ")); + write(rline, tdrec2stlv(data_u(0))); + writeline(reports, rline); + end if; + elsif we = '1' then + if SORT_DEBUG then + write(rline, string'("IN: ")); + write(rline, tdrec2stlv(din)); + writeline(reports, rline); + end if; + l0_reg <= din; + dout <= din; + else + dout <= l0_reg; + end if; + end if; + end process; + ready <= s_ready(0); + q_ur(0) <= l0_reg; + q_ul(0) <= l0_reg; + up_update(0) <= we; + up_update_path(0) <= din; + up_addr(0) <= (others => '0'); + + -- signals for the last level + + s_ready(NLEVELS) <= '1'; + --addr(NLEVELS) <= (others => '0'); + data_dr(NLEVELS) <= DATA_REC_INIT_DATA; + data_dl(NLEVELS) <= DATA_REC_INIT_DATA; + we_dl(NLEVELS) <= '0'; + we_dr(NLEVELS) <= '0'; + + low_update(NLEVELS) <= '0'; + low_update_path(NLEVELS) <= DATA_REC_INIT_DATA; + low_addr(0) <= (others => '0'); + +end sorter_sys_arch1;

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.