OpenCores
URL https://opencores.org/ocsvn/versatile_fft/versatile_fft/trunk

Subversion Repositories versatile_fft

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /
    from Rev 2 to Rev 3
    Reverse comparison

Rev 2 → Rev 3

/versatile_fft/trunk/comp/keep_me File deleted \ No newline at end of file
/versatile_fft/trunk/single_unit/src/dpram_inf.vhd
0,0 → 1,60
-- A parameterized, inferable, true dual-port, common-clock block RAM in VHDL.
-- Original file was taken from: http://danstrother.com/2010/09/11/inferring-rams-in-fpgas/
-- No license information were provided by the original author.
-- Minimal modifications were introduced by me to make it suitable for my sorter.
 
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_unsigned.all;
 
entity dp_ram_scl is
generic (
DATA_WIDTH : integer := 72;
ADDR_WIDTH : integer := 10
);
port (
-- common clock
clk : in std_logic;
-- Port A
we_a : in std_logic;
addr_a : in std_logic_vector(ADDR_WIDTH-1 downto 0);
data_a : in std_logic_vector(DATA_WIDTH-1 downto 0);
q_a : out std_logic_vector(DATA_WIDTH-1 downto 0);
 
-- Port B
we_b : in std_logic;
addr_b : in std_logic_vector(ADDR_WIDTH-1 downto 0);
data_b : in std_logic_vector(DATA_WIDTH-1 downto 0);
q_b : out std_logic_vector(DATA_WIDTH-1 downto 0)
);
end dp_ram_scl;
 
architecture rtl of dp_ram_scl is
-- Shared memory
type mem_type is array ((2**ADDR_WIDTH)-1 downto 0) of std_logic_vector(DATA_WIDTH-1 downto 0);
shared variable mem : mem_type;
begin
 
-- Port A
process(clk)
begin
if(clk'event and clk = '1') then
if(we_a = '1') then
mem(conv_integer(addr_a)) := data_a;
end if;
q_a <= mem(conv_integer(addr_a));
end if;
end process;
 
-- Port B
process(clk)
begin
if(clk'event and clk = '1') then
if(we_b = '1') then
mem(conv_integer(addr_b)) := data_b;
end if;
q_b <= mem(conv_integer(addr_b));
end if;
end process;
 
end rtl;
/versatile_fft/trunk/single_unit/src/butterfly.vhd
0,0 → 1,75
-------------------------------------------------------------------------------
-- Title : butterfly and twiddle factor multiplier
-- Project :
-------------------------------------------------------------------------------
-- File : butterfly.vhd
-- Author : Wojciech Zabolotny wzab01<at>gmail.com
-- Company :
-- Licanse : BSD
-- Created : 2014-01-19
-- Last update: 2014-02-05
-- Platform :
-- Standard : VHDL'87
-------------------------------------------------------------------------------
-- Description: This block performs the buttefly calculation
-- And multiplies the result by the twiddle factor
-- Input data and output data are in our icpx_number format
-------------------------------------------------------------------------------
-- Copyright (c) 2014
-------------------------------------------------------------------------------
-- Revisions :
-- Date Version Author Description
-- 2014-01-19 1.0 wzab Created
-------------------------------------------------------------------------------
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library work;
use work.icpx.all;
-------------------------------------------------------------------------------
 
 
entity butterfly is
port (
-- Input data
din0 : in icpx_number;
din1 : in icpx_number;
-- Twiddle factor
tf : in icpx_number;
-- Output data: real and imaginary parts
dout0 : out icpx_number;
dout1 : out icpx_number
);
 
end butterfly;
 
architecture beh1 of butterfly is
 
signal vdr0, vdi0 : signed(ICPX_WIDTH downto 0);
signal vdr1, vdi1 : signed(ICPX_WIDTH downto 0);
 
signal sout1r, sout1i : signed(2*ICPX_WIDTH downto 0);
 
begin -- beh1
 
-- Result may have one bit more, we add 1 for better rounding
vdr0 <= resize(din0.re, ICPX_WIDTH+1) + resize(din1.re, ICPX_WIDTH+1);
dout0.re <= resize(vdr0(ICPX_WIDTH downto 1), ICPX_WIDTH);
vdi0 <= resize(din0.im, ICPX_WIDTH+1) + resize(din1.im, ICPX_WIDTH+1);
dout0.im <= resize(vdi0(ICPX_WIDTH downto 1), ICPX_WIDTH);
 
vdr1 <= resize(din0.re, ICPX_WIDTH+1) - resize(din1.re, ICPX_WIDTH+1);
vdi1 <= resize(din0.im, ICPX_WIDTH+1) - resize(din1.im, ICPX_WIDTH+1);
 
-- Multiple by the twiddle factor
 
sout1r <= (vdr1 * tf.re - vdi1 * tf.im);
sout1i <= (vdr1 * tf.im + vdi1 * tf.re);
 
-- Now we drop the lower bits
dout1.re <= resize(sout1r(2*ICPX_WIDTH-1 downto ICPX_WIDTH-1), ICPX_WIDTH);
dout1.im <= resize(sout1i(2*ICPX_WIDTH-1 downto ICPX_WIDTH-1), ICPX_WIDTH);
 
end beh1;
/versatile_fft/trunk/single_unit/src/icpxram.vhd
0,0 → 1,99
-------------------------------------------------------------------------------
-- Title : icpxram
-- Project :
-------------------------------------------------------------------------------
-- File : icpxram.vhd
-- Author : Wojciech Zabolotny wzab01<at>gmail.com
-- Company :
-- License : BSD
-- Created : 2014-01-19
-- Last update: 2014-02-05
-- Platform :
-- Standard : VHDL'93
-------------------------------------------------------------------------------
-- Description: This block is a wrapper, which allows storing and reading
-- of complex numbers in our internal format in a normal
-- DPRAM storing the bit vectors
-------------------------------------------------------------------------------
-- Copyright (c) 2014
-------------------------------------------------------------------------------
-- Revisions :
-- Date Version Author Description
-- 2014-01-19 1.0 wzab Created
-------------------------------------------------------------------------------
 
 
 
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_unsigned.all;
library work;
use work.icpx.all;
 
entity dp_ram_icpx is
generic (
ADDR_WIDTH : integer := 10
);
port (
-- common clock
clk : in std_logic;
-- Port A
we_a : in std_logic;
addr_a : in std_logic_vector(ADDR_WIDTH-1 downto 0);
data_a : in icpx_number;
q_a : out icpx_number;
 
-- Port B
we_b : in std_logic;
addr_b : in std_logic_vector(ADDR_WIDTH-1 downto 0);
data_b : in icpx_number;
q_b : out icpx_number
);
end dp_ram_icpx;
 
architecture rtl of dp_ram_icpx is
 
signal s_data_a : std_logic_vector(ICPX_BV_LEN-1 downto 0);
signal s_q_a : std_logic_vector(ICPX_BV_LEN-1 downto 0);
signal s_data_b : std_logic_vector(ICPX_BV_LEN-1 downto 0);
signal s_q_b : std_logic_vector(ICPX_BV_LEN-1 downto 0);
 
component dp_ram_scl
generic (
DATA_WIDTH : integer;
ADDR_WIDTH : integer);
port (
clk : in std_logic;
we_a : in std_logic;
addr_a : in std_logic_vector(ADDR_WIDTH-1 downto 0);
data_a : in std_logic_vector(DATA_WIDTH-1 downto 0);
q_a : out std_logic_vector(DATA_WIDTH-1 downto 0);
we_b : in std_logic;
addr_b : in std_logic_vector(ADDR_WIDTH-1 downto 0);
data_b : in std_logic_vector(DATA_WIDTH-1 downto 0);
q_b : out std_logic_vector(DATA_WIDTH-1 downto 0));
end component;
begin
 
dp_ram_scl_1 : dp_ram_scl
generic map (
DATA_WIDTH => ICPX_BV_LEN,
ADDR_WIDTH => ADDR_WIDTH)
port map (
clk => clk,
we_a => we_a,
addr_a => addr_a,
data_a => s_data_a,
q_a => s_q_a,
we_b => we_b,
addr_b => addr_b,
data_b => s_data_b,
q_b => s_q_b);
 
s_data_a <= icpx2stlv(data_a);
s_data_b <= icpx2stlv(data_b);
q_a <= stlv2icpx(s_q_a);
q_b <= stlv2icpx(s_q_b);
end rtl;
/versatile_fft/trunk/single_unit/src/fft_engine.vhd
0,0 → 1,433
-------------------------------------------------------------------------------
-- Title : fft_engine
-- Project : DP RAM based FFT processor
-------------------------------------------------------------------------------
-- File : fft_engine.vhd
-- Author : Wojciech Zabolotny wzab01@gmail.com
-- Company :
-- License : BSD
-- Created : 2014-01-18
-- Platform :
-- Standard : VHDL'93
-------------------------------------------------------------------------------
-- Description: This file implements a FFT processor based on a dual port RAM
-- This implementation uses a single "butterfly calculation unit"
-------------------------------------------------------------------------------
-- Copyright (c) 2014
-------------------------------------------------------------------------------
-- Revisions :
-- Date Version Author Description
-- 2014-01-18 1.0 wzab Created
-------------------------------------------------------------------------------
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use ieee.math_real.all;
use ieee.math_complex.all;
library work;
use work.icpx.all;
 
entity fft_engine is
generic (
LOG2_FFT_LEN : integer := 8 );
port (
din : in icpx_number;
addr_in : in integer;
wr_in : in std_logic;
dout : out icpx_number;
addr_out : in integer;
ready : out std_logic;
busy : out std_logic;
start : in std_logic;
rst_n : in std_logic;
syn_rst_n : in std_logic;
clk : in std_logic);
 
end fft_engine;
 
architecture fft_engine_beh of fft_engine is
 
constant ADDR_WIDTH : integer := LOG2_FFT_LEN;
constant BTFLY_LATENCY : integer := 0;
constant FFT_LEN : integer := 2 ** LOG2_FFT_LEN;
 
type T_FFT_STATE is (FFT_STATE_RESET, FFT_STATE_READ, FFT_STATE_PROCESS);
 
-- The type below defines the registers used by the state machine
type T_FFT_REGS is record
state : T_FFT_STATE;
stage : integer;
step_in : integer;
step_out : integer;
stage_out_started : std_logic;
mem_switch : std_logic;
ready : std_logic;
busy : std_logic;
tf : icpx_number;
latency_cnt : integer;
end record;
 
-- The initial value, set during the reset
constant fft_regs_init : T_FFT_REGS := (
state => FFT_STATE_RESET,
stage => 0,
step_in => 0,
step_out => 0,
stage_out_started => '0',
mem_switch => '0',
ready => '0',
busy => '0',
tf => icpx_zero,
latency_cnt => 0
);
 
signal r_o, r_i : T_FFT_REGS := fft_regs_init;
 
-- The type below defines the combinatorial outputs of the state machine
type T_FFT_COMB is record
dpr0_aa : integer;
dpr0_ab : integer;
dpr0_ia : icpx_number;
dpr0_ib : icpx_number;
dpr0_wa : std_logic;
dpr0_wb : std_logic;
dpr1_aa : integer;
dpr1_ab : integer;
dpr1_ia : icpx_number;
dpr1_ib : icpx_number;
dpr1_wa : std_logic;
dpr1_wb : std_logic;
dout : icpx_number;
end record;
 
-- The default value. Set at the begining of process to avoid
-- creation of latches
constant fft_comb_default : T_FFT_COMB := (
dpr0_aa => 0,
dpr0_ab => 0,
dpr0_ia => icpx_zero,
dpr0_ib => icpx_zero,
dpr0_wa => '0',
dpr0_wb => '0',
dpr1_aa => 0,
dpr1_ab => 0,
dpr1_ia => icpx_zero,
dpr1_ib => icpx_zero,
dpr1_wa => '0',
dpr1_wb => '0',
dout => icpx_zero
);
 
signal c : T_FFT_COMB := fft_comb_default;
 
-- Function used to convert integer indices to std_logic_vector
-- used to address the memory
function i2a (
constant ia : integer)
return std_logic_vector is
variable res : std_logic_vector(ADDR_WIDTH-1 downto 0);
begin -- i2a
res := std_logic_vector(to_unsigned(ia, ADDR_WIDTH));
return res;
end i2a;
 
-- The function below calculates the address of the argument
-- used by the particular butterly module in the particular
-- stage of the algorithm
function n2k (
constant stage : integer; -- stage number
constant step : integer; -- butterfly block number
constant nin : integer -- input number (0 or 1)
)
return integer is
variable k_uns : unsigned(LOG2_FFT_LEN-1 downto 0);
variable k_int : integer;
begin
k_uns := to_unsigned(step, LOG2_FFT_LEN);
if stage > 0 then
k_uns(LOG2_FFT_LEN-1 downto LOG2_FFT_LEN-stage) :=
k_uns(LOG2_FFT_LEN-2 downto LOG2_FFT_LEN-stage-1);
end if;
if nin = 0 then
k_uns(LOG2_FFT_LEN-stage-1) := '0';
else
k_uns(LOG2_FFT_LEN-stage-1) := '1';
end if;
k_int := to_integer(k_uns);
return k_int;
end n2k;
 
-- Type used to store twiddle factors
type T_TF_TABLE is array (0 to FFT_LEN/2-1) of icpx_number;
 
-- Function initializing the twiddle factor memory
-- (during synthesis it is evaluated only during compilation!!!)
function tf_table_init
return t_tf_table is
variable x : real;
variable res : t_tf_table;
begin -- i1st
for i in 0 to FFT_LEN/2-1 loop
x := -real(i)*MATH_PI*2.0/(2.0 ** LOG2_FFT_LEN);
res(i) := cplx2icpx(complex'(cos(x), sin(x)));
end loop; -- i
return res;
end tf_table_init;
 
-- Twiddle factors ROM memory
constant tf_table : T_TF_TABLE := tf_table_init;
 
-- Function returning the appropriate twiddle factor
function tf_select (
constant step_in : integer; -- number of the butterfly block
constant stage : integer -- stage of the algorithm
)
return integer is
variable res : integer;
variable adr : unsigned(LOG2_FFT_LEN-2 downto 0);
begin -- tf_select
adr := to_unsigned(step_in, LOG2_FFT_LEN-1);
adr := shift_left(adr, stage);
res := to_integer(adr);
return res;
end tf_select;
 
 
component dp_ram_icpx
generic (
ADDR_WIDTH : integer);
port (
clk : in std_logic;
we_a : in std_logic;
addr_a : in std_logic_vector(ADDR_WIDTH-1 downto 0);
data_a : in icpx_number;
q_a : out icpx_number;
we_b : in std_logic;
addr_b : in std_logic_vector(ADDR_WIDTH-1 downto 0);
data_b : in icpx_number;
q_b : out icpx_number);
end component;
 
component butterfly
port (
din0 : in icpx_number;
din1 : in icpx_number;
tf : in icpx_number;
dout0 : out icpx_number;
dout1 : out icpx_number);
end component;
 
-- signals for inputs and outputs of the dpram
type icpx_vector is array (0 to LOG2_FFT_LEN) of icpx_number;
signal dpr0_oa : icpx_number;
signal dpr0_ob : icpx_number;
signal dpr1_oa : icpx_number;
signal dpr1_ob : icpx_number;
signal din0 : icpx_number;
signal din1 : icpx_number;
signal dout0 : icpx_number;
signal dout1 : icpx_number;
 
signal dpr0_aa : std_logic_vector(ADDR_WIDTH-1 downto 0) := (others => '0');
signal dpr0_ab : std_logic_vector(ADDR_WIDTH-1 downto 0) := (others => '0');
signal dpr1_aa : std_logic_vector(ADDR_WIDTH-1 downto 0) := (others => '0');
signal dpr1_ab : std_logic_vector(ADDR_WIDTH-1 downto 0) := (others => '0');
begin -- fft_engine_beh
 
dpr0_aa <= i2a(c.dpr0_aa);
dpr0_ab <= i2a(c.dpr0_ab);
dpr1_aa <= i2a(c.dpr1_aa);
dpr1_ab <= i2a(c.dpr1_ab);
 
-- To allow fluent operation even in case of butterfly blocks
-- with non-zero latency, we use two DPRAMs
 
dp_ram_0 : dp_ram_icpx
generic map (
ADDR_WIDTH => ADDR_WIDTH)
port map (
clk => clk,
we_a => c.dpr0_wa,
addr_a => dpr0_aa,
data_a => c.dpr0_ia,
q_a => dpr0_oa,
we_b => c.dpr0_wb,
addr_b => dpr0_ab,
data_b => c.dpr0_ib,
q_b => dpr0_ob);
 
dp_ram_1 : dp_ram_icpx
generic map (
ADDR_WIDTH => ADDR_WIDTH)
port map (
clk => clk,
we_a => c.dpr1_wa,
addr_a => dpr1_aa,
data_a => c.dpr1_ia,
q_a => dpr1_oa,
we_b => c.dpr1_wb,
addr_b => dpr1_ab,
data_b => c.dpr1_ib,
q_b => dpr1_ob);
 
-- The "butterfly block"
butterfly_1 : butterfly
port map (
din0 => din0,
din1 => din1,
tf => r_o.tf,
dout0 => dout0,
dout1 => dout1);
 
dout <= c.dout;
ready <= r_o.ready;
busy <= r_o.busy;
 
 
-- Process routing the input data from the appropriate memory
process (dpr0_oa, dpr0_ob, dpr1_oa, dpr1_ob, r_o)
begin -- process
if r_o.mem_switch = '0' then
din0 <= dpr0_oa;
din1 <= dpr0_ob;
else
din0 <= dpr1_oa;
din1 <= dpr1_ob;
end if;
end process;
 
-- Combinatorial process of the main state machine
p1 : process (addr_in, addr_out, din, dout0, dout1, dpr0_ob, dpr1_ob, r_o,
start, wr_in)
begin -- process
c <= fft_comb_default;
r_i <= r_o;
-- We work, depending on the mode
case r_o.state is
when FFT_STATE_RESET =>
r_i.state <= FFT_STATE_READ;
r_i.ready <= '1'; -- Signal, the we have left reset
when FFT_STATE_READ =>
-- Route input signals to allow writing and reading of data
-- Writing of new data
-- Routing of data depends on the state of the flip-flop
if r_o.mem_switch = '0' then
-- Write the new data to the memory 0
c.dpr0_aa <= addr_in;
c.dpr0_ia <= din;
c.dpr0_wa <= wr_in;
-- Read the data from the memory 1
c.dpr1_ab <= addr_out;
c.dout <= dpr1_ob;
else
-- Write the new data to the memory 1
c.dpr1_aa <= addr_in;
c.dpr1_ia <= din;
c.dpr1_wa <= wr_in;
-- Read the data from the memory 0
c.dpr0_ab <= addr_out;
c.dout <= dpr0_ob;
end if;
-- Read the data, until the processing is started
r_i.stage <= 0;
r_i.step_in <= 0;
r_i.step_out <= 0;
r_i.stage_out_started <= '0';
if start = '1' then
r_i.state <= FFT_STATE_PROCESS;
r_i.ready <= '0';
r_i.busy <= '1';
r_i.latency_cnt <= BTFLY_LATENCY; -- start the latency counter
end if;
when FFT_STATE_PROCESS =>
-- First we prepare to read the data
-- The memory used to read the data depends
-- on number of stage
if r_o.mem_switch = '0' then
c.dpr0_aa <= n2k(r_o.stage, r_o.step_in, 0);
c.dpr0_ab <= n2k(r_o.stage, r_o.step_in, 1);
else
c.dpr1_aa <= n2k(r_o.stage, r_o.step_in, 0);
c.dpr1_ab <= n2k(r_o.stage, r_o.step_in, 1);
end if;
-- data will be available in the next clock
-- so we need to output the twiddle factor also the next clock
-- Twiddle factor
-- Selection of the twiddle factor
r_i.tf <= tf_table(tf_select(r_o.step_in, r_o.stage)); -- to be corrected!
-- Increase number of step in the current stage
if r_o.step_in < FFT_LEN/2-1 then
r_i.step_in <= r_o.step_in+1;
else
-- Increasing number of the stage is done
-- in the part, which handles writing of results
null;
end if;
-- Check, if we should start writing of data
if r_o.latency_cnt > 0 then
r_i.latency_cnt <= r_o.latency_cnt - 1;
else
r_i.stage_out_started <= '1';
end if;
-- Now we handle writing of data
if r_o.stage_out_started = '1' then
-- First we prepare to write the data
-- The memory used to read the data depends
-- on number of stage
if r_o.mem_switch = '0' then
c.dpr1_aa <= n2k(r_o.stage, r_o.step_out, 0);
c.dpr1_ia <= dout0;
c.dpr1_ab <= n2k(r_o.stage, r_o.step_out, 1);
c.dpr1_ib <= dout1;
c.dpr1_wa <= '1';
c.dpr1_wb <= '1';
else
c.dpr0_aa <= n2k(r_o.stage, r_o.step_out, 0);
c.dpr0_ia <= dout0;
c.dpr0_ab <= n2k(r_o.stage, r_o.step_out, 1);
c.dpr0_ib <= dout1;
c.dpr0_wa <= '1';
c.dpr0_wb <= '1';
end if;
-- Now update the step counter
if r_o.step_out < FFT_LEN/2-1 then
r_i.step_out <= r_o.step_out + 1;
else
r_i.step_out <= 0;
r_i.stage_out_started <= '0';
if r_o.stage < LOG2_FFT_LEN-1 then
-- go to the next stage
r_i.stage <= r_o.stage + 1;
r_i.mem_switch <= not r_o.mem_switch;
r_i.stage_out_started <= '0';
r_i.step_in <= 0;
r_i.latency_cnt <= BTFLY_LATENCY; -- start the latency counter
else
-- We have completed all stages, so we can go to the
-- data read state
r_i.state <= FFT_STATE_READ;
r_i.busy <= '0';
r_i.ready <= '1'; -- signal, that data may be read
end if;
end if;
end if;
when others => null;
end case;
end process p1;
 
p2 : process (clk, rst_n)
begin -- process p2
if rst_n = '0' then -- asynchronous reset (active low)
r_o <= fft_regs_init;
elsif clk'event and clk = '1' then -- rising clock edge
if syn_rst_n = '0' then
-- We use also synchronous reset, to avoid races
r_o <= fft_regs_init;
else
r_o <= r_i;
end if;
end if;
end process p2;
 
end fft_engine_beh;
/versatile_fft/trunk/single_unit/src/icpx_pkg.vhd
0,0 → 1,112
-------------------------------------------------------------------------------
-- Title : icpx
-- Project : DP RAM based FFT processor
-------------------------------------------------------------------------------
-- File : icpx_pkg.vhd
-- Author : Wojciech Zabolotny wzab01<at>gmail.com
-- Company :
-- License : BSD
-- Created : 2014-01-18
-- Platform :
-- Standard : VHDL'93
-------------------------------------------------------------------------------
-- Description: This package defines the format used to store complex numbers
-- In this implementation we store numbers from range <-2.0, 2.0)
-- scaled to signed integers with width of ICPX_WIDTH (including
-- the sign bit)
-------------------------------------------------------------------------------
-- Copyright (c) 2014
-------------------------------------------------------------------------------
-- Revisions :
-- Date Version Author Description
-- 2014-01-18 1.0 wzab Created
-------------------------------------------------------------------------------
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use ieee.math_real.all;
use ieee.math_complex.all;
 
package icpx is
constant ICPX_WIDTH : integer := 16;
 
-- constant defining the size of std_logic_vector
-- needed to store our complex number
constant ICPX_BV_LEN : integer := ICPX_WIDTH * 2;
 
type icpx_number is record
Re : signed(ICPX_WIDTH-1 downto 0);
Im : signed(ICPX_WIDTH-1 downto 0);
end record;
 
 
-- conversion functions
function icpx2stlv (
constant din : icpx_number)
return std_logic_vector;
 
function stlv2icpx (
constant din : std_logic_vector)
return icpx_number;
 
function cplx2icpx (
constant din : complex)
return icpx_number;
 
function icpx_zero
return icpx_number;
 
end icpx;
 
package body icpx is
 
function icpx2stlv (
constant din : icpx_number)
return std_logic_vector is
 
variable vres : std_logic_vector(2*ICPX_WIDTH-1 downto 0) :=
(others => '0');
begin -- icpx2stlv
vres := std_logic_vector(din.re) & std_logic_vector(din.im);
return vres;
end icpx2stlv;
 
function stlv2icpx (
constant din : std_logic_vector)
return icpx_number is
 
variable vres : ICPX_NUMBER := icpx_zero;
 
begin -- stlv2icpx
vres.Re := signed(din(2*ICPX_WIDTH-1 downto ICPX_WIDTH));
vres.Im := signed(din(ICPX_WIDTH-1 downto 0));
return vres;
end stlv2icpx;
 
function cplx2icpx (
constant din : complex)
return icpx_number is
 
variable vres : ICPX_NUMBER := icpx_zero;
 
begin -- cplx2icpx
vres.Re := to_signed(integer(din.Re*(2.0**(ICPX_WIDTH-2))), ICPX_WIDTH);
vres.Im := to_signed(integer(din.Im*(2.0**(ICPX_WIDTH-2))), ICPX_WIDTH);
return vres;
end cplx2icpx;
 
function icpx_zero
return icpx_number is
 
variable vres : ICPX_NUMBER;
begin -- icpx_zero
 
vres.Re := (others => '0');
vres.Im := (others => '0');
return vres;
end icpx_zero;
end icpx;
/versatile_fft/trunk/single_unit/src/butterfly_tb.vhd
0,0 → 1,147
-------------------------------------------------------------------------------
-- Title : Testbench for design "butterfly"
-- Project :
-------------------------------------------------------------------------------
-- File : butterfly_tb.vhd
-- Author : Wojciech Zabolotny wzab01<at>gmail.com
-- Company :
-- License : BSD
-- Created : 2014-01-19
-- Last update: 2014-02-05
-- Platform :
-- Standard : VHDL'93
-------------------------------------------------------------------------------
-- Description:
-------------------------------------------------------------------------------
-- Copyright (c) 2014
-------------------------------------------------------------------------------
-- Revisions :
-- Date Version Author Description
-- 2014-01-19 1.0 wzab Created
-------------------------------------------------------------------------------
 
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use ieee.math_real.all;
use ieee.math_complex.all;
library work;
use work.icpx.all;
 
-------------------------------------------------------------------------------
 
entity butterfly_tb is
 
end butterfly_tb;
 
-------------------------------------------------------------------------------
 
architecture beh1 of butterfly_tb is
 
function r2s (
constant dt : real;
constant l : integer)
return signed is
begin -- r2s
return to_signed(integer(dt*(2.0**(l-1))), l);
end r2s;
 
procedure repicpx (
constant name : in string;
constant ire : in signed;
constant iim : in signed) is
begin -- repicpx
report name & "=" & integer'image(to_integer(ire)) &
"+j*" & integer'image(to_integer(iim))
severity note;
end repicpx;
 
component butterfly
port (
din0 : in icpx_number;
din1 : in icpx_number;
tf : in icpx_number;
dout0 : out icpx_number;
dout1 : out icpx_number);
end component;
 
 
constant W_n_p : complex_polar := (1.0, -MATH_PI/3.0);
constant W_n : complex := polar_to_complex(W_n_p);
 
constant fd0r : real := 0.93;
constant fd0i : real := -0.32;
constant fd1r : real := -0.27;
constant fd1i : real := 0.51;
 
-- complex signals for verification
signal cd0, cd1 : complex := (0.0, 0.0);
signal cres0 : complex := (0.0, 0.0);
signal cres1 : complex := (0.0, 0.0);
 
signal res0, res1 : icpx_number;
 
 
-- component ports
signal din0 : icpx_number := icpx_zero;
signal din1 : icpx_number := icpx_zero;
signal tf : icpx_number := icpx_zero;
signal dout0 : icpx_number := icpx_zero;
signal dout1 : icpx_number := icpx_zero;
 
-- clock
signal Clk : std_logic := '1';
 
begin -- beh1
 
cd0 <= cmplx(fd0r, fd0i);
cd1 <= cmplx(fd1r, fd1i);
 
din0 <= cplx2icpx(cd0);
din1 <= cplx2icpx(cd1);
 
cres0 <= (cd0 + cd1) / 2.0;
cres1 <= ((cd0 - cd1) * W_n) / 2.0;
 
res0 <= cplx2icpx(cres0);
res1 <= cplx2icpx(cres1);
 
tf <= cplx2icpx(W_n);
 
-- component instantiation
butterfly_1 : butterfly
port map (
din0 => din0,
din1 => din1,
tf => tf,
dout0 => dout0,
dout1 => dout1);
 
-- clock generation
Clk <= not Clk after 10 ns;
 
-- waveform generation
WaveGen_Proc : process
begin
-- insert signal assignments here
wait until Clk = '1';
report "Wn=" & real'image(W_n.re) & "+j*" & real'image(W_n.im) severity note;
-- report result of complex calculations
repicpx ("tf", tf.re, tf.im);
repicpx ("Cres0", res0.re, res0.im);
repicpx ("Cres1", res1.re, res1.im);
repicpx ("Dout0", dout0.re, dout0.im);
repicpx ("Dout1", dout1.re, dout1.im);
end process WaveGen_Proc;
 
end beh1;
 
-------------------------------------------------------------------------------
 
configuration butterfly_tb_beh1_cfg of butterfly_tb is
for beh1
end for;
end butterfly_tb_beh1_cfg;
 
-------------------------------------------------------------------------------
/versatile_fft/trunk/single_unit/src/fft_engine_tb.vhd
0,0 → 1,225
-------------------------------------------------------------------------------
-- Title : Testbench for design "fft_engine"
-- Project :
-------------------------------------------------------------------------------
-- File : fft_engine_tb.vhd
-- Author : Wojciech Zabolotny wzab01<at>gmail.com
-- Company :
-- License : BSD
-- Created : 2014-01-25
-- Last update: 2014-02-05
-- Platform :
-- Standard : VHDL'93
-------------------------------------------------------------------------------
-- Description:
-------------------------------------------------------------------------------
-- Copyright (c) 2014
-------------------------------------------------------------------------------
-- Revisions :
-- Date Version Author Description
-- 2014-01-25 1.0 wzab Created
-------------------------------------------------------------------------------
 
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use ieee.math_real.all;
use ieee.math_complex.all;
library std;
use std.textio.all;
 
library work;
use work.fft_len.all;
use work.icpx.all;
-------------------------------------------------------------------------------
 
entity fft_engine_tb is
 
end fft_engine_tb;
 
-------------------------------------------------------------------------------
 
architecture beh1 of fft_engine_tb is
-- constant LOG2_FFT_LEN : integer := 8;
-- FFT_LEN will be read from the fft_len package,
-- generated by the Octave or MAtlab script
-- together with the test data
constant ADDR_WIDTH : integer := LOG2_FFT_LEN;
constant FFT_LEN : integer := 2 ** LOG2_FFT_LEN;
 
component fft_engine
generic (
LOG2_FFT_LEN : integer);
port (
din : in icpx_number;
addr_in : in integer;
wr_in : in std_logic;
dout : out icpx_number;
addr_out : in integer;
ready : out std_logic;
busy : out std_logic;
start : in std_logic;
rst_n : in std_logic;
syn_rst_n : in std_logic;
clk : in std_logic);
end component;
 
signal end_sim : boolean := false;
 
-- component ports
signal din : icpx_number := icpx_zero;
signal addr_in : integer := 0;
signal wr_in : std_logic := '0';
signal dout : icpx_number := icpx_zero;
signal addr_out : integer := 0;
signal ready : std_logic := '0';
signal start : std_logic := '0';
signal busy : std_logic := '0';
signal rst_n : std_logic := '0';
signal syn_rst_n : std_logic := '0';
 
type cplx_vector is array (0 to FFT_LEN-1) of complex;
 
-- Function for easy conversion of integer indices
-- std_logic_vectors used to address the memory
function i2a (
constant ia : integer)
return std_logic_vector is
variable res : std_logic_vector(ADDR_WIDTH-1 downto 0);
begin -- i2a
res := std_logic_vector(to_unsigned(ia, ADDR_WIDTH));
return res;
end i2a;
 
-- Function reversing the std_logic_vector was written
-- by Jonathan Bromley and published on comp.lang.vhdl
-- Thread: Slicing of an array: wrong direction
-- https://groups.google.com/forum/#!msg/comp.lang.vhdl/eBZQXrw2Ngk/4H7oL8hdHMcJ
function rev_slv (a : in std_logic_vector)
return std_logic_vector is
variable result : std_logic_vector(a'range);
alias aa : std_logic_vector(a'reverse_range) is a;
begin
for i in aa'range loop
result(i) := aa(i);
end loop;
return result;
end; -- function rev_slv
 
-- clock
signal Clk : std_logic := '1';
 
 
begin -- beh1
 
-- component instantiation
DUT : fft_engine
generic map (
LOG2_FFT_LEN => LOG2_FFT_LEN)
port map (
din => din,
addr_in => addr_in,
wr_in => wr_in,
dout => dout,
addr_out => addr_out,
ready => ready,
busy => busy,
start => start,
rst_n => rst_n,
syn_rst_n => syn_rst_n,
clk => clk);
 
-- clock generation
Clk <= not Clk after 10 ns when end_sim = false else '0';
 
-- purpose: synchronization of reset
process (clk, rst_n)
begin -- process
if rst_n = '0' then -- asynchronous reset (active low)
syn_rst_n <= '0';
elsif clk'event and clk = '1' then -- rising clock edge
syn_rst_n <= rst_n;
end if;
end process;
 
-- waveform generation
WaveGen_Proc : process
 
file data_in : text open read_mode is "data_in.txt";
variable input_line : line;
file data_out : text open write_mode is "data_out.txt";
variable output_line : line;
variable tre, tim : real;
constant sep : string := " ";
begin
-- insert signal assignments here
wait until Clk = '1';
wait for 15 ns;
-- End of reset
rst_n <= '1';
-- wait until the FFT engine is ready after reset
wait until ready = '1';
wait until Clk = '0';
-- Now we read data from the input file
-- and put the data to the input memory
for i in 0 to FFT_LEN-1 loop
readline(data_in, input_line);
read(input_line, tre);
read(input_line, tim);
addr_in <= i;
din <= cplx2icpx(cmplx(tre, tim));
wr_in <= '1';
wait until Clk = '1';
wait until Clk = '0';
end loop; -- i
-- stop writing of data
wr_in <= '0';
-- wait one clock period to finish writing
wait until Clk = '1';
wait until Clk = '0';
-- start processing
start <= '1';
wait until Clk = '1';
wait until Clk = '0';
-- swith off the start signal, so that the FFT
-- engine will not start processing again
start <= '0';
-- wait until the FFT engine is ready
wait until ready = '1';
-- wait one clock (is it necessary?)
wait until Clk = '1';
wait until Clk = '0';
-- rad the data, and write them to the output file
for i in 0 to FFT_LEN-1 loop
-- Data after FFT are scrambled, we need to reverse
-- order of address bits!
addr_out <= to_integer(unsigned(rev_slv(i2a(i))));
-- we use a synchronous memory, so data can be read after
-- the next clock pulse
wait until Clk = '1';
wait until Clk = '0';
-- write data
write(output_line, integer'image(to_integer(dout.re)));
write(output_line, sep);
write(output_line, integer'image(to_integer(dout.im)));
writeline(data_out, output_line);
end loop; -- i
-- signal end of the simulation
end_sim <= true;
wait;
end process WaveGen_Proc;
 
 
end beh1;
 
-------------------------------------------------------------------------------
 
configuration fft_engine_tb_beh1_cfg of fft_engine_tb is
for beh1
end for;
end fft_engine_tb_beh1_cfg;
 
-------------------------------------------------------------------------------
/versatile_fft/trunk/single_unit/src/fft_len.vhd
0,0 → 1,3
package fft_len is
constant LOG2_FFT_LEN : integer := 10;
end fft_len;
/versatile_fft/trunk/single_unit/test_fft.m
0,0 → 1,39
% Modify the length of the FFT in the line below
log2fftlen = 10;
% If you modify the number of bits used to represent
% real and imaginary part of the complex number,
% you should also modify the ICPX_WIDTH constant
% in the icpx_pkg.vhd file
icpx_width = 16;
% Do not modify below
% Write the package defining length of the FFT
fo=fopen("src/fft_len.vhd","w");
fprintf(fo,"package fft_len is\n");
fprintf(fo,"constant LOG2_FFT_LEN : integer := %d;\n",log2fftlen);
fprintf(fo,"end fft_len;\n");
fclose(fo)
fftlen=2 ** log2fftlen;
%Generate the data. Now it is only a noise, but you
%can generate something with periodic components
%It is important, that values fit in range of representation
%(-2,2) for standard implementation.
%May be changed if you redefine our icpx_number format
re=3*rand(1,fftlen)-1.5;
im=3*rand(1,fftlen)-1.5;
fo=fopen("data_in.txt","w");
for i=1:fftlen
fprintf(fo,"%g %g\n",re(i),im(i));
end
fclose(fo)
scale = 2 ** (icpx_width-2);
di = (re+j*im)*scale/fftlen;
fr = fft(di);
fo=fopen("data_oct.txt","w");
for i=1:fftlen
fprintf(fo,"%d %d\n",floor(real(fr(i))),floor(imag(fr(i))));
end
fclose(fo)
%Run the simulation
system("make clean; make")
%Compare results calculated in octave and in our IP core
system("vim -d data_oct.txt data_out.txt")
/versatile_fft/trunk/single_unit/makefile
0,0 → 1,31
VHDLS = \
src/fft_len.vhd \
src/icpx_pkg.vhd \
src/butterfly.vhd \
src/dpram_inf.vhd \
src/icpxram.vhd \
src/fft_engine.vhd \
src/fft_engine_tb.vhd \
 
 
#STD=standard
STD=synopsys
VSTD=93c
ENTITY=fft_engine_tb
#RUN_OPTIONS= --stop-time=10000ns --wave=${ENTITY}.ghw
RUN_OPTIONS=
#--trace-processes
all: ${ENTITY}.ghw
reader: ${ENTITY} ${ENTITY}.ghw
gtkwave ${ENTITY}.ghw ${ENTITY}.sav
${ENTITY}: ${VHDLS}
# vhdlp -work fmf fmf/*.vhd
ghdl -a --workdir=comp --std=${VSTD} --ieee=${STD} ${VHDLS}
ghdl -e --workdir=comp --std=${VSTD} -fexplicit --ieee=${STD} ${ENTITY}
${ENTITY}.ghw: ${ENTITY}
# ./${ENTITY} --wave=${ENTITY}.ghw ${RUN_OPTIONS} --stop-time=50000ns 2>&1 > res.txt
./${ENTITY} ${RUN_OPTIONS}
#> res.txt 2>&1
clean:
rm -f comp/* *.o *.vcd *.ghw events* ${ENTITY}
/versatile_fft/trunk/single_unit/comp/keep_me
0,0 → 1,31
I'm here just to ensure creation of directory...
/versatile_fft/trunk/multiple_units/src/dpram_inf.vhd
0,0 → 1,60
-- A parameterized, inferable, true dual-port, common-clock block RAM in VHDL.
-- Original file was taken from: http://danstrother.com/2010/09/11/inferring-rams-in-fpgas/
-- No license information were provided by the original author.
-- Minimal modifications were introduced by me to make it suitable for my sorter.
 
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_unsigned.all;
 
entity dp_ram_scl is
generic (
DATA_WIDTH : integer := 72;
ADDR_WIDTH : integer := 10
);
port (
-- common clock
clk : in std_logic;
-- Port A
we_a : in std_logic;
addr_a : in std_logic_vector(ADDR_WIDTH-1 downto 0);
data_a : in std_logic_vector(DATA_WIDTH-1 downto 0);
q_a : out std_logic_vector(DATA_WIDTH-1 downto 0);
 
-- Port B
we_b : in std_logic;
addr_b : in std_logic_vector(ADDR_WIDTH-1 downto 0);
data_b : in std_logic_vector(DATA_WIDTH-1 downto 0);
q_b : out std_logic_vector(DATA_WIDTH-1 downto 0)
);
end dp_ram_scl;
 
architecture rtl of dp_ram_scl is
-- Shared memory
type mem_type is array ((2**ADDR_WIDTH)-1 downto 0) of std_logic_vector(DATA_WIDTH-1 downto 0);
shared variable mem : mem_type;
begin
 
-- Port A
process(clk)
begin
if(clk'event and clk = '1') then
if(we_a = '1') then
mem(conv_integer(addr_a)) := data_a;
end if;
q_a <= mem(conv_integer(addr_a));
end if;
end process;
 
-- Port B
process(clk)
begin
if(clk'event and clk = '1') then
if(we_b = '1') then
mem(conv_integer(addr_b)) := data_b;
end if;
q_b <= mem(conv_integer(addr_b));
end if;
end process;
 
end rtl;
/versatile_fft/trunk/multiple_units/src/fft_engine.vhd
0,0 → 1,321
-------------------------------------------------------------------------------
-- Title : fft_top
-- Project : Pipelined, DP RAM based FFT processor
-------------------------------------------------------------------------------
-- File : fft_top.vhd
-- Author : Wojciech Zabolotny
-- Company :
-- License : BSD
-- Created : 2014-01-18
-- Platform :
-- Standard : VHDL'93
-------------------------------------------------------------------------------
-- Description: This file implements a FFT processor based on a dual port RAM
-------------------------------------------------------------------------------
-- Copyright (c) 2014
-------------------------------------------------------------------------------
-- Revisions :
-- Date Version Author Description
-- 2014-01-18 1.0 wzab Created
-------------------------------------------------------------------------------
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use ieee.math_real.all;
use ieee.math_complex.all;
library work;
use work.fft_len.all;
use work.icpx.all;
use work.fft_support_pkg.all;
 
entity fft_engine is
generic (
LOG2_FFT_LEN : integer := 4); -- Defines order of FFT
port (
-- System interface
rst_n : in std_logic;
clk : in std_logic;
-- Input memory interface
din : in icpx_number; -- data input
valid : out std_logic;
saddr : out unsigned(LOG2_FFT_LEN-2 downto 0);
saddr_rev : out unsigned(LOG2_FFT_LEN-2 downto 0);
sout0 : out icpx_number; -- spectrum output
sout1 : out icpx_number -- spectrum output
);
 
end fft_engine;
 
architecture fft_engine_beh of fft_engine is
 
constant MULT_LATENCY : integer := 3;
 
-- Type used to store twiddle factors
type T_TF_TABLE is array (0 to FFT_LEN/2-1) of icpx_number;
 
-- Function initializing the twiddle factor memory
-- (during synthesis it is evaluated only during compilation,
-- so no floating point arithmetics must be synthesized!)
function tf_table_init
return t_tf_table is
variable x : real;
variable res : t_tf_table;
begin -- i1st
for i in 0 to FFT_LEN/2-1 loop
x := -real(i)*MATH_PI*2.0/(2.0 ** LOG2_FFT_LEN);
res(i) := cplx2icpx(complex'(cos(x), sin(x)));
end loop; -- i
return res;
end tf_table_init;
 
-- Twiddle factors ROM memory
constant tf_table : T_TF_TABLE := tf_table_init;
 
-- Type used to store the window function
type T_WINDOW_TABLE is array (0 to FFT_LEN-1) of icpx_number;
function tw_table_init
return T_WINDOW_TABLE is
variable x : real;
variable res : T_WINDOW_TABLE;
begin -- function tw_table_init
for i in 0 to FFT_LEN-1 loop
x := real(i)*2.0*MATH_PI/real(FFT_LEN-1);
res(i) := cplx2icpx(complex'(0.5*(1.0-cos(x)), 0.0));
--s(i) := cplx2icpx(complex'(1.0, 0.0));
end loop; -- i
return res;
end function tw_table_init;
-- Window function ROM memory
constant window_function : T_WINDOW_TABLE := tw_table_init;
 
type T_STEP_MULT is array (0 to LOG2_FFT_LEN) of integer;
function step_mult_init
return T_STEP_MULT is
variable res : T_STEP_MULT;
begin -- function step_mult_init
for i in 0 to LOG2_FFT_LEN loop
res(i) := 2**i;
end loop; -- i
return res;
end function step_mult_init;
 
component icpx_mul is
generic (
MULT_LATENCY : integer);
port (
din0 : in icpx_number;
din1 : in icpx_number;
dout : out icpx_number;
clk : in std_logic);
end component icpx_mul;
 
constant BF_DELAY : integer := 3;
-- Table for index multipliers, when geting TF from the table
constant STEP_MULT : T_STEP_MULT := step_mult_init;
 
type T_FFT_STATE is (TFS_IDLE, TFS_RUN);
 
-- The input data are stored in the cyclical input buffer of length (?)
-- Then we feed the data to the first processing unit.
 
type T_FFT_DATA_ARRAY is array (LOG2_FFT_LEN downto 0) of icpx_number;
signal in0, in1, out0, out1, tft : T_FFT_DATA_ARRAY;
signal r_din0, r_din1, wf0, wf1 : icpx_number := icpx_zero;
 
signal s_saddr, dptr0 : unsigned(LOG2_FFT_LEN-2 downto 0);
signal start0_del : integer range 0 to MULT_LATENCY := 0;
signal start0, start0_pre : std_logic := '0';
 
 
signal started : std_logic_vector(LOG2_FFT_LEN downto 0) := (others => '0');
signal start_dr : std_logic_vector(LOG2_FFT_LEN downto 0) := (others => '0');
 
type T_FFT_INTS is array (0 to LOG2_FFT_LEN) of integer;
signal next_delay : T_FFT_INTS := (others => 0);
signal step_bf : T_FFT_INTS := (others => 0);
signal start_delay : T_FFT_INTS := (others => 0);
 
begin -- fft_top_beh
 
-- We need something, to synchronize all stages after reset...
-- This mechanism should consider the processing latency...
g0 : for i in 0 to LOG2_FFT_LEN-2 generate
next_delay(i) <= 2**(LOG2_FFT_LEN-2-i);
end generate g0;
 
 
-- Processing of input data -- using the window function!
dp_ram_rbw_icpx_1 : entity work.dp_ram_rbw_icpx
generic map (
ADDR_WIDTH => LOG2_FFT_LEN-1)
port map (
clk => clk,
we_a => '1',
addr_a => std_logic_vector(dptr0),
data_a => din,
q_a => r_din0,
we_b => '0',
addr_b => std_logic_vector(dptr0),
data_b => din,
q_b => open);
 
 
-- Process reading the input data (directly, and from delay line)
-- Additionally we consider the delay associated with multiplication
-- by the window function
ip1 : process (clk, rst_n) is
begin -- process st2
if rst_n = '0' then -- asynchronous reset (active low)
dptr0 <= (others => '0');
r_din1 <= icpx_zero;
start0 <= '0';
elsif clk'event and clk = '1' then -- rising clock edge
r_din1 <= din;
if dptr0 < (2**(LOG2_FFT_LEN-1))-1 then
dptr0 <= dptr0+1;
else
dptr0 <= (others => '0');
start0_pre <= '1';
end if;
if start0_pre = '1' then
if start0_del = MULT_LATENCY-1 then
start0 <= '1';
else
start0_del <= start0_del + 1;
end if;
end if;
end if;
end process ip1;
 
-- Process providing the values of the window function
mw1 : process (clk) is
begin -- process mw1
if clk'event and clk = '1' then -- rising clock edge
wf0 <= window_function(to_integer(dptr0));
wf1 <= window_function(to_integer(dptr0)+FFT_LEN/2);
end if;
end process mw1;
-- Now connect the output signals to the multipliers
icpx_mul_1 : entity work.icpx_mul
generic map (
MULT_LATENCY => MULT_LATENCY)
port map (
din0 => r_din0,
din1 => wf0,
dout => in0(0),
rst_n => rst_n,
clk => clk);
icpx_mul_2 : entity work.icpx_mul
generic map (
MULT_LATENCY => MULT_LATENCY)
port map (
din0 => r_din1,
din1 => wf1,
dout => in1(0),
rst_n => rst_n,
clk => clk);
 
started(0) <= start0;
-- Now we generate blocks for different stages
-- For each stage we must maintain three counters
-- phase - 0 or 1
-- step - 0 to 2**(STAGE_N)
-- cycle - jak to nazwac?
 
g1 : for st in 0 to LOG2_FFT_LEN-1 generate
-- Here we generate structures for a single stage of FFT
-- First the butterfly unit
butterfly_1 : entity work.butterfly
generic map (
LATENCY => BF_DELAY)
port map (
din0 => in0(st),
din1 => in1(st),
tf => tft(st),
dout0 => out0(st),
dout1 => out1(st),
clk => clk,
rst_n => rst_n
);
 
-- Process controlling selection of twiddle factor for the butterfly unit
-- after our stage is started, we increase the twiddle factor cyclically
-- Process also delays starting of data switch
 
process (clk, rst_n) is
constant STEP_BF_LIMIT : integer := 2**(LOG2_FFT_LEN-st-1)-1;
begin -- process
if rst_n = '0' then -- asynchronous reset (active low)
step_bf(st) <= 0;
start_delay(st) <= 0;
start_dr(st) <= '0';
elsif clk'event and clk = '1' then -- rising clock edge
if started(st) = '1' then
if start_delay(st) = BF_DELAY then
start_dr(st) <= '1'; -- start the "data switch"
end if;
if start_delay(st) = BF_DELAY+next_delay(st) then
started(st+1) <= '1'; -- start the next stage
end if;
if start_delay(st) /= BF_DELAY+next_delay(st) then
start_delay(st) <= start_delay(st)+1;
end if;
if step_bf(st) < STEP_BF_LIMIT then
step_bf(st) <= step_bf(st) + 1;
else
step_bf(st) <= 0;
end if;
end if;
end if;
end process;
 
-- Twiddle factor ROM
process (clk) is
begin -- process
if clk'event and clk = '1' then -- rising clock edge
tft(st) <= tf_table(step_bf(st)*STEP_MULT(st));
end if;
end process;
 
-- Next the data switch, but not for the last stage!
i3 : if st /= LOG2_FFT_LEN-1 generate
fft_switch_1 : entity work.fft_data_switch
generic map (
LOG2_FFT_LEN => LOG2_FFT_LEN,
STAGE => st)
port map (
in0 => out0(st),
in1 => out1(st),
out0 => in0(st+1),
out1 => in1(st+1),
enable => start_dr(st),
rst_n => rst_n,
clk => clk);
end generate i3;
-- In the last stage, we simply count the output samples
i4 : if st = LOG2_FFT_LEN-1 generate
process (clk, rst_n) is
begin -- process
if rst_n = '0' then -- asynchronous reset (active low)
s_saddr <= (others => '0');
elsif clk'event and clk = '1' then -- rising clock edge
if start_dr(st) = '1' then
if s_saddr = FFT_LEN/2-1 then
s_saddr <= (others => '0');
else
s_saddr <= s_saddr+1;
end if;
end if;
end if;
end process;
end generate i4;
 
end generate g1;
valid <= started(LOG2_FFT_LEN);
saddr <= s_saddr;
saddr_rev <= rev(s_saddr);
sout0 <= out0(LOG2_FFT_LEN-1);
sout1 <= out1(LOG2_FFT_LEN-1);
end fft_engine_beh;
/versatile_fft/trunk/multiple_units/src/icpx_pkg.vhd
0,0 → 1,114
-------------------------------------------------------------------------------
-- Title : icpx
-- Project : DP RAM based FFT processor
-------------------------------------------------------------------------------
-- File : icpx_pkg.vhd
-- Author : Wojciech Zabolotny wzab01<at>gmail.com
-- Company :
-- License : BSD
-- Created : 2014-01-18
-- Platform :
-- Standard : VHDL'93
-------------------------------------------------------------------------------
-- Description: This package defines the format used to store complex numbers
-- In this implementation we store numbers from range <-2.0, 2.0)
-- scaled to signed integers with width of ICPX_WIDTH (including
-- the sign bit)
-------------------------------------------------------------------------------
-- Copyright (c) 2014
-------------------------------------------------------------------------------
-- Revisions :
-- Date Version Author Description
-- 2014-01-18 1.0 wzab Created
-------------------------------------------------------------------------------
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use ieee.math_real.all;
use ieee.math_complex.all;
library work;
use work.fft_len.all;
package icpx is
-- Definition below is generated in the fft_len package
--constant ICPX_WIDTH : integer := 16;
 
-- constant defining the size of std_logic_vector
-- needed to store the number
constant ICPX_BV_LEN : integer := ICPX_WIDTH * 2;
 
type icpx_number is record
Re : signed(ICPX_WIDTH-1 downto 0);
Im : signed(ICPX_WIDTH-1 downto 0);
end record;
 
 
-- conversion functions
function icpx2stlv (
constant din : icpx_number)
return std_logic_vector;
 
function stlv2icpx (
constant din : std_logic_vector)
return icpx_number;
 
function cplx2icpx (
constant din : complex)
return icpx_number;
 
function icpx_zero
return icpx_number;
 
end icpx;
 
package body icpx is
 
function icpx2stlv (
constant din : icpx_number)
return std_logic_vector is
 
variable vres : std_logic_vector(2*ICPX_WIDTH-1 downto 0) :=
(others => '0');
begin -- icpx2stlv
vres := std_logic_vector(din.re) & std_logic_vector(din.im);
return vres;
end icpx2stlv;
 
function stlv2icpx (
constant din : std_logic_vector)
return icpx_number is
 
variable vres : ICPX_NUMBER := icpx_zero;
 
begin -- stlv2icpx
vres.Re := signed(din(2*ICPX_WIDTH-1 downto ICPX_WIDTH));
vres.Im := signed(din(ICPX_WIDTH-1 downto 0));
return vres;
end stlv2icpx;
 
function cplx2icpx (
constant din : complex)
return icpx_number is
 
variable vres : ICPX_NUMBER := icpx_zero;
 
begin -- cplx2icpx
vres.Re := to_signed(integer(din.Re*(2.0**(ICPX_WIDTH-2))), ICPX_WIDTH);
vres.Im := to_signed(integer(din.Im*(2.0**(ICPX_WIDTH-2))), ICPX_WIDTH);
return vres;
end cplx2icpx;
 
function icpx_zero
return icpx_number is
 
variable vres : ICPX_NUMBER;
begin -- icpx_zero
 
vres.Re := (others => '0');
vres.Im := (others => '0');
return vres;
end icpx_zero;
end icpx;
/versatile_fft/trunk/multiple_units/src/butterfly_d3.vhd
0,0 → 1,107
-------------------------------------------------------------------------------
-- Title : butterfly and twiddle factor multiplier
-- Project :
-------------------------------------------------------------------------------
-- File : butterfly.vhd
-- Author : Wojciech Zabolotny wzab01<at>gmail.com
-- Company :
-- Licanse : BSD
-- Created : 2014-01-19
-- Last update: 2014-05-02
-- Platform :
-- Standard : VHDL'87
-------------------------------------------------------------------------------
-- Description: This block performs the buttefly calculation
-- And multiplies the result by the twiddle factor
-- Input data and output data are in our icpx_number format
-------------------------------------------------------------------------------
-- Copyright (c) 2014
-------------------------------------------------------------------------------
-- Revisions :
-- Date Version Author Description
-- 2014-01-19 1.0 wzab Created
-------------------------------------------------------------------------------
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library work;
use work.fft_len.all;
use work.icpx.all;
-------------------------------------------------------------------------------
 
 
entity butterfly is
generic (
LATENCY : integer := 0);
port (
-- Input data
din0 : in icpx_number;
din1 : in icpx_number;
-- Twiddle factor
tf : in icpx_number;
-- Output data: real and imaginary parts
dout0 : out icpx_number;
dout1 : out icpx_number;
-- System interface
clk : in std_logic;
rst_n : in std_logic
);
 
end butterfly;
 
architecture beh1 of butterfly is
 
signal vdr0, vdr0_d, vdr0_d2, vdi0, vdi0_d, vdi0_d2 : signed(ICPX_WIDTH downto 0);
signal vdr1, vdi1 : signed(ICPX_WIDTH downto 0);
 
signal sout1r, sout1i : signed(2*ICPX_WIDTH downto 0);
signal sout1r_a, sout1i_a : signed(2*ICPX_WIDTH downto 0);
signal sout1r_b, sout1i_b : signed(2*ICPX_WIDTH downto 0);
signal stf, stf_d0 : icpx_number;
type T_DELIN is array (1 to LATENCY) of ICPX_NUMBER;
signal vin0, vin1, vtf : T_DELIN := (others => icpx_zero);
begin -- beh1
-- If requested, we introduce latency on the input
-- The register balancing function will distribute it
p1 : process (clk, rst_n)
begin -- process p1
if rst_n = '0' then -- asynchronous reset (active low)
vin0 <= (others => icpx_zero);
vin1 <= (others => icpx_zero);
vtf <= (others => icpx_zero);
elsif clk'event and clk = '1' then -- rising clock edge
-- delayed by 1 clock
vdr1 <= resize(din0.re, ICPX_WIDTH+1) - resize(din1.re, ICPX_WIDTH+1);
vdi1 <= resize(din0.im, ICPX_WIDTH+1) - resize(din1.im, ICPX_WIDTH+1);
vdr0 <= resize(din0.re, ICPX_WIDTH+1) + resize(din1.re, ICPX_WIDTH+1);
vdi0 <= resize(din0.im, ICPX_WIDTH+1) + resize(din1.im, ICPX_WIDTH+1);
stf_d0 <= tf;
-- delayed by 2 clocks
vdr0_d <= vdr0;
vdi0_d <= vdi0;
sout1r_a <= vdr1 * stf_d0.re;
sout1r_b <= vdi1 * stf_d0.im;
sout1i_a <= vdr1 * stf_d0.im;
sout1i_b <= vdi1 * stf_d0.re;
-- delayed by 3 clocks
vdr0_d2 <= vdr0_d;
vdi0_d2 <= vdi0_d;
sout1r <= sout1r_a - sout1r_b;
sout1i <= sout1i_a + sout1i_b;
end if;
end process p1;
dout1.re <= resize(sout1r(2*ICPX_WIDTH-1 downto ICPX_WIDTH-1), ICPX_WIDTH);
dout1.im <= resize(sout1i(2*ICPX_WIDTH-1 downto ICPX_WIDTH-1), ICPX_WIDTH);
dout0.re <= resize(vdr0_d2(ICPX_WIDTH downto 1), ICPX_WIDTH);
dout0.im <= resize(vdi0_d2(ICPX_WIDTH downto 1), ICPX_WIDTH);
 
-- Result may have one bit more, we add 1 for better rounding
 
 
-- Multiple by the twiddle factor
 
 
-- Now we drop the lower bits
-- first step - leave one more bit for rounding
end beh1;
/versatile_fft/trunk/multiple_units/src/icpx_mul_d3.vhd
0,0 → 1,89
-------------------------------------------------------------------------------
-- Title : Multiplier used to multiply the input sample by the value of
-- a window function
-- Project :
-------------------------------------------------------------------------------
-- File : icpx_mul.vhd
-- Author : Wojciech Zabolotny
-- Company :
-- License : BSD
-- Created : 2014-01-19
-- Last update: 2014-05-02
-- Platform :
-- Standard : VHDL'93
-------------------------------------------------------------------------------
-- Description: Multiplier with latency of 3 clk
-------------------------------------------------------------------------------
-- Copyright (c) 2014
-------------------------------------------------------------------------------
-- Revisions :
-- Date Version Author Description
-- 2014-01-19 1.0 wzab Created
-------------------------------------------------------------------------------
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use ieee.math_complex.all;
library work;
use work.fft_len.all;
use work.icpx.all;
-------------------------------------------------------------------------------
 
 
entity icpx_mul is
generic (
MULT_LATENCY : integer := 1);
port (
-- Input data
din0 : in icpx_number;
din1 : in icpx_number;
-- Output data: real and imaginary parts
dout : out icpx_number;
-- clock
rst_n : in std_logic;
clk : in std_logic
);
 
end icpx_mul;
 
architecture beh1 of icpx_mul is
signal sout1r, sout1r_a, sout1r_b, sout1i, sout1i_a, sout1i_b : signed(2*ICPX_WIDTH-1 downto 0);
signal s_din0, s_din1, s_out : icpx_number;
begin -- beh1
 
-- Multiple the values
 
 
-- Now we drop the lower bits
-- Delay the result to allow more efficient, pipelined implementation
-- (Register balancing in the synthesis tools should do the rest...)
process (clk, rst_n) is
begin -- process
if rst_n = '0' then
sout1r <= (others => '0');
sout1r_a <= (others => '0');
sout1r_b <= (others => '0');
sout1i <= (others => '0');
sout1i_a <= (others => '0');
sout1i_b <= (others => '0');
s_din0 <= icpx_zero;
s_din1 <= icpx_zero;
elsif clk'event and clk = '1' then -- rising clock edge
-- delayed by 1 clk
s_din0 <= din0;
s_din1 <= din1;
-- delayed by 2 clk
sout1r_a <= s_din0.re * s_din1.re;
sout1r_b <= s_din0.im * s_din1.im;
sout1i_a <= s_din0.re * s_din1.im;
sout1i_b <= s_din0.im * s_din1.re;
-- delayed by 3 clk
sout1r <= (sout1r_a - sout1r_b);
sout1i <= (sout1i_a + sout1i_b);
end if;
end process;
s_out.re <= resize(sout1r(2*ICPX_WIDTH-2 downto ICPX_WIDTH-2),ICPX_WIDTH);
s_out.im <= resize(sout1i(2*ICPX_WIDTH-2 downto ICPX_WIDTH-2),ICPX_WIDTH);
dout <= s_out;
end beh1;
 
/versatile_fft/trunk/multiple_units/src/dpram_rbw_inf.vhd
0,0 → 1,60
-- A parameterized, inferable, true dual-port, common-clock block RAM in VHDL.
-- Original file was taken from: http://danstrother.com/2010/09/11/inferring-rams-in-fpgas/
-- No license information were provided by the original author.
-- Minimal modifications were introduced by me to make it suitable for my FFT core.
 
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_unsigned.all;
 
entity dp_ram_rbw_scl is
generic (
DATA_WIDTH : integer := 72;
ADDR_WIDTH : integer := 10
);
port (
-- common clock
clk : in std_logic;
-- Port A
we_a : in std_logic;
addr_a : in std_logic_vector(ADDR_WIDTH-1 downto 0);
data_a : in std_logic_vector(DATA_WIDTH-1 downto 0);
q_a : out std_logic_vector(DATA_WIDTH-1 downto 0);
 
-- Port B
we_b : in std_logic;
addr_b : in std_logic_vector(ADDR_WIDTH-1 downto 0);
data_b : in std_logic_vector(DATA_WIDTH-1 downto 0);
q_b : out std_logic_vector(DATA_WIDTH-1 downto 0)
);
end dp_ram_rbw_scl;
 
architecture rtl of dp_ram_rbw_scl is
-- Shared memory
type mem_type is array ((2**ADDR_WIDTH)-1 downto 0) of std_logic_vector(DATA_WIDTH-1 downto 0);
shared variable mem : mem_type;
begin
 
-- Port A
process(clk)
begin
if(clk'event and clk = '1') then
q_a <= mem(conv_integer(addr_a));
if(we_a = '1') then
mem(conv_integer(addr_a)) := data_a;
end if;
end if;
end process;
 
-- Port B
process(clk)
begin
if(clk'event and clk = '1') then
q_b <= mem(conv_integer(addr_b));
if(we_b = '1') then
mem(conv_integer(addr_b)) := data_b;
end if;
end if;
end process;
 
end rtl;
/versatile_fft/trunk/multiple_units/src/fft_data_switch.vhd
0,0 → 1,189
-------------------------------------------------------------------------------
-- Title : fft_top
-- Project : Pipelined, DP RAM based FFT processor
-------------------------------------------------------------------------------
-- File : fft_switch.vhd
-- Author : Wojciech Zabolotny
-- Company :
-- Licanse : BSD
-- Created : 2014-01-18
-- Platform :
-- Standard : VHDL'93
-------------------------------------------------------------------------------
-- Description: This file implements a data switching block connecting
-- consecutive stages of the FFT processor based on a dual
-- port RAM
-------------------------------------------------------------------------------
-- Copyright (c) 2014
-------------------------------------------------------------------------------
-- Revisions :
-- Date Version Author Description
-- 2014-01-18 1.0 wzab Created
-------------------------------------------------------------------------------
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use ieee.math_real.all;
use ieee.math_complex.all;
library work;
use work.icpx.all;
use work.fft_support_pkg.all;
 
entity fft_data_switch is
 
generic (
LOG2_FFT_LEN : integer := 4;
STAGE : integer := 2
);
port (
in0 : in icpx_number;
in1 : in icpx_number;
out0 : out icpx_number;
out1 : out icpx_number;
enable : in std_logic;
rst_n : in std_logic;
clk : in std_logic);
 
end fft_data_switch;
 
architecture fft_s_beh of fft_data_switch is
 
constant LOG2_STAGE_N : integer := LOG2_FFT_LEN-STAGE-1;
constant STAGE_N : integer := 2 ** LOG2_STAGE_N;
constant STAGE_N2 : integer := 2 ** (LOG2_STAGE_N-1);
constant ADDR_WIDTH : integer := LOG2_STAGE_N;
constant STEP_LIMIT : integer := 2**(LOG2_FFT_LEN-2-STAGE)-1;
constant CYCLE_LIMIT : integer := 2**STAGE-1;
 
signal in0_del, in1_del : icpx_number := icpx_zero;
signal phase_del, phase_del2 : integer range 0 to 1;
 
signal step, step_del : integer range 0 to STEP_LIMIT;
signal phase : integer range 0 to 1;
signal cycle : integer range 0 to CYCLE_LIMIT;
 
 
component dp_ram_rbw_icpx
generic (
ADDR_WIDTH : integer);
port (
clk : in std_logic;
we_a : in std_logic;
addr_a : in std_logic_vector(ADDR_WIDTH-1 downto 0);
data_a : in icpx_number;
q_a : out icpx_number;
we_b : in std_logic;
addr_b : in std_logic_vector(ADDR_WIDTH-1 downto 0);
data_b : in icpx_number;
q_b : out icpx_number);
end component;
 
signal dpr_wa, dpr_wb : std_logic := '0';
signal dpr_aa, dpr_ab : std_logic_vector(ADDR_WIDTH-1 downto 0) := (others => '0');
signal dpr_ia, dpr_ib, dpr_qa, dpr_qb : icpx_number;
begin -- fft_top_beh
 
dp_ram_1 : dp_ram_rbw_icpx
generic map (
ADDR_WIDTH => ADDR_WIDTH)
port map (
clk => clk,
we_a => dpr_wa,
addr_a => dpr_aa,
data_a => dpr_ia,
q_a => dpr_qa,
we_b => dpr_wb,
addr_b => dpr_ab,
data_b => dpr_ib,
q_b => dpr_qb);
 
dpr_aa <= std_logic_vector(to_unsigned(step, ADDR_WIDTH));
-- It is important, that synthesis tool recognizes the addition below
-- as a simple bit operation!
dpr_ab <= std_logic_vector(to_unsigned(step+STAGE_N2, ADDR_WIDTH));
 
-- Output values router.
dr1 : process (dpr_qa, dpr_qb, in0_del, phase_del) is
begin -- process dr1
if phase_del = 0 then
out0 <= dpr_qb;
out1 <= dpr_qa;
else
out1 <= in0_del;
out0 <= dpr_qa;
end if;
end process dr1;
 
-- purpose: main state machine
-- type : combinational
st1 : process (in0, in1, phase) is
begin -- process st1
dpr_wa <= '0';
dpr_wb <= '0';
dpr_ia <= icpx_zero;
dpr_ib <= icpx_zero;
if phase = 0 then
dpr_ia <= in0;
dpr_ib <= in1;
dpr_wa <= '1';
dpr_wb <= '1';
else
-- phase = 1
dpr_ia <= in1;
dpr_wa <= '1';
end if;
end process st1;
 
-- We always access data on addresses:
-- "step" and "step+N/2"
 
-- Main process of our router
-- This block always introduces latency of one cycle!
process (clk, rst_n) is
begin -- process
if rst_n = '0' then -- asynchronous reset (active low)
in0_del <= icpx_zero;
in1_del <= icpx_zero;
phase_del <= 0;
phase_del2 <= 0;
step_del <= 0;
elsif clk'event and clk = '1' then -- rising clock edge
-- prepare the delayed version of control signals
in0_del <= in0;
in1_del <= in1;
phase_del <= phase;
phase_del2 <= phase_del;
step_del <= step;
end if;
end process;
 
st2 : process (clk, rst_n) is
begin -- process st2
if rst_n = '0' then -- asynchronous reset (active low)
step <= 0;
phase <= 0;
cycle <= 0;
elsif clk'event and clk = '1' then -- rising clock edge
if enable = '1' then
if step = STEP_LIMIT then
step <= 0;
if phase = 1 then
phase <= 0;
if cycle = CYCLE_LIMIT then
cycle <= 0;
else
cycle <= cycle+1;
end if;
else
phase <= 1;
end if;
else
step <= step+1;
end if;
end if;
end if;
end process st2;
 
end fft_s_beh;
/versatile_fft/trunk/multiple_units/src/fft_engine_tb.vhd
0,0 → 1,150
-------------------------------------------------------------------------------
-- Title : Testbench for design "fft_top"
-- Project :
-------------------------------------------------------------------------------
-- File : fft_top_tb.vhd
-- Author : Wojciech Zabolotny
-- Company :
-- License : BSD
-- Created : 2014-01-21
-- Last update: 2015-03-24
-- Platform :
-- Standard : VHDL'87
-------------------------------------------------------------------------------
-- Description:
-------------------------------------------------------------------------------
-- Copyright (c) 2014
-------------------------------------------------------------------------------
-- Revisions :
-- Date Version Author Description
-- 2014-01-21 1.0 wzab Created
-------------------------------------------------------------------------------
 
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use ieee.math_real.all;
use ieee.math_complex.all;
library std;
use std.textio.all;
library work;
use work.fft_len.all;
use work.icpx.all;
use work.fft_support_pkg.all;
 
-------------------------------------------------------------------------------
 
entity fft_engine_tb is
 
end fft_engine_tb;
 
-------------------------------------------------------------------------------
 
architecture beh1 of fft_engine_tb is
 
type T_OUT_DATA is array (0 to FFT_LEN-1) of icpx_number;
 
signal dptr : integer range 0 to 15;
signal din, sout0, sout1 : icpx_number;
signal saddr, saddr_rev : unsigned(LOG2_FFT_LEN-2 downto 0);
signal end_of_data, end_sim : boolean := false;
 
component fft_engine is
generic (
LOG2_FFT_LEN : integer);
port (
rst_n : in std_logic;
clk : in std_logic;
din : in icpx_number;
valid : out std_logic;
saddr : out unsigned(LOG2_FFT_LEN-2 downto 0);
saddr_rev : out unsigned(LOG2_FFT_LEN-2 downto 0);
sout0 : out icpx_number;
sout1 : out icpx_number
);
end component fft_engine;
 
-- component ports
signal rst_n : std_logic := '0';
 
-- clock
signal Clk : std_logic := '1';
 
begin -- beh1
 
-- component instantiation
fft_engine_1 : entity work.fft_engine
generic map (
LOG2_FFT_LEN => LOG2_FFT_LEN)
port map (
rst_n => rst_n,
clk => clk,
din => din,
saddr => saddr,
saddr_rev => saddr_rev,
sout0 => sout0,
sout1 => sout1);
-- clock generation
 
Clk <= not Clk after 10 ns when end_sim = false else '0';
 
-- waveform generation
WaveGen_Proc : process
file data_in : text open read_mode is "data_in.txt";
variable input_line : line;
file data_out : text open write_mode is "data_out.txt";
variable output_line : line;
variable tre, tim : real;
constant sep : string := " ";
variable vout : T_OUT_DATA;
begin
-- insert signal assignments here
wait until Clk = '1';
wait for 15 ns;
wait until clk = '0';
wait until clk = '1';
rst_n <= '1';
dptr <= 0;
l1 : while not end_sim loop
if not endfile(data_in) then
readline(data_in, input_line);
read(input_line, tre);
read(input_line, tim);
else
end_of_data <= true;
end if;
din <= cplx2icpx(complex'(tre, tim));
if dptr < 15 then
dptr <= dptr + 1;
else
dptr <= 0;
end if;
-- Copy the data produced by the core to the output buffer
vout(to_integer(saddr_rev)) := sout0;
vout(to_integer('1' & saddr_rev)) := sout1;
-- If the full set of data is calculated, write the output buffer
if saddr = FFT_LEN/2-1 then
write(output_line, string'("FFT RESULT BEGIN"));
writeline(data_out, output_line);
for i in 0 to FFT_LEN-1 loop
write(output_line, integer'image(to_integer(vout(i).re)));
write(output_line, sep);
write(output_line, integer'image(to_integer(vout(i).im)));
writeline(data_out, output_line);
end loop; -- i
write(output_line, string'("FFT RESULT END"));
writeline(data_out, output_line);
exit l1 when end_of_data;
end if;
wait until clk = '0';
wait until clk = '1';
end loop l1;
end_sim <= true;
end process WaveGen_Proc;
 
 
end beh1;
 
 
/versatile_fft/trunk/multiple_units/src/fft_support.vhd
0,0 → 1,68
-------------------------------------------------------------------------------
-- Title : vhdl_support
-- Project :
-------------------------------------------------------------------------------
-- File : fft_support.vhd
-- Author : Wojciech Zabolotny
-- Company :
-- License : BSD
-- Created : 2014-01-20
-- Last update: 2014-05-02
-- Platform :
-- Standard : VHDL'87
-------------------------------------------------------------------------------
-- Description:
-------------------------------------------------------------------------------
-- Copyright (c) 2014
-------------------------------------------------------------------------------
-- Revisions :
-- Date Version Author Description
-- 2014-01-20 1.0 wzab Created
-------------------------------------------------------------------------------
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use ieee.math_real.all;
use ieee.math_complex.all;
library work;
use work.icpx.all;
 
package fft_support_pkg is
 
-- In the synthesizable version, we should replace functions
-- with precalculated tables (probably)
 
function rev(a : in unsigned)
return unsigned;
function rev(a : in std_logic_vector)
return std_logic_vector;
 
end fft_support_pkg;
 
package body fft_support_pkg is
 
function rev(a : in std_logic_vector)
return std_logic_vector is
variable result : std_logic_vector(a'range);
alias aa : std_logic_vector(a'reverse_range) is a;
begin
for i in aa'range loop
result(i) := aa(i);
end loop;
return result;
end; -- function reverse_any_bus
 
function rev(a : in unsigned)
return unsigned is
variable result : unsigned(a'range);
alias aa : unsigned(a'reverse_range) is a;
begin
for i in aa'range loop
result(i) := aa(i);
end loop;
return result;
end; -- function reverse_any_bus
 
end package body fft_support_pkg;
/versatile_fft/trunk/multiple_units/src/fft_len.vhd
0,0 → 1,5
package fft_len is
constant LOG2_FFT_LEN : integer := 4;
constant FFT_LEN : integer := 2 ** LOG2_FFT_LEN;
constant ICPX_WIDTH : integer := 16;
end fft_len;
/versatile_fft/trunk/multiple_units/src/icpxram_rbw.vhd
0,0 → 1,99
-------------------------------------------------------------------------------
-- Title : icpxram
-- Project :
-------------------------------------------------------------------------------
-- File : icpxram_rbw.vhd
-- Author : Wojciech Zabolotny
-- Company :
-- License : BSD
-- Created : 2014-01-19
-- Last update: 2014-04-25
-- Platform :
-- Standard : VHDL'93
-------------------------------------------------------------------------------
-- Description: This block holds the complex numbers with real and imaginary
-- parts stored as signed integers with defined bit number
-- This memory implements "read before write" behaviour!
-------------------------------------------------------------------------------
-- Copyright (c) 2014
-------------------------------------------------------------------------------
-- Revisions :
-- Date Version Author Description
-- 2014-01-19 1.0 wzab Created
-------------------------------------------------------------------------------
 
 
 
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_unsigned.all;
library work;
use work.icpx.all;
 
entity dp_ram_rbw_icpx is
generic (
ADDR_WIDTH : integer := 10
);
port (
-- common clock
clk : in std_logic;
-- Port A
we_a : in std_logic;
addr_a : in std_logic_vector(ADDR_WIDTH-1 downto 0);
data_a : in icpx_number;
q_a : out icpx_number;
 
-- Port B
we_b : in std_logic;
addr_b : in std_logic_vector(ADDR_WIDTH-1 downto 0);
data_b : in icpx_number;
q_b : out icpx_number
);
end dp_ram_rbw_icpx;
 
architecture rtl of dp_ram_rbw_icpx is
 
signal s_data_a : std_logic_vector(ICPX_BV_LEN-1 downto 0);
signal s_q_a : std_logic_vector(ICPX_BV_LEN-1 downto 0);
signal s_data_b : std_logic_vector(ICPX_BV_LEN-1 downto 0);
signal s_q_b : std_logic_vector(ICPX_BV_LEN-1 downto 0);
 
component dp_ram_rbw_scl
generic (
DATA_WIDTH : integer;
ADDR_WIDTH : integer);
port (
clk : in std_logic;
we_a : in std_logic;
addr_a : in std_logic_vector(ADDR_WIDTH-1 downto 0);
data_a : in std_logic_vector(DATA_WIDTH-1 downto 0);
q_a : out std_logic_vector(DATA_WIDTH-1 downto 0);
we_b : in std_logic;
addr_b : in std_logic_vector(ADDR_WIDTH-1 downto 0);
data_b : in std_logic_vector(DATA_WIDTH-1 downto 0);
q_b : out std_logic_vector(DATA_WIDTH-1 downto 0));
end component;
begin
 
dp_ram_scl_1 : dp_ram_rbw_scl
generic map (
DATA_WIDTH => ICPX_BV_LEN,
ADDR_WIDTH => ADDR_WIDTH)
port map (
clk => clk,
we_a => we_a,
addr_a => addr_a,
data_a => s_data_a,
q_a => s_q_a,
we_b => we_b,
addr_b => addr_b,
data_b => s_data_b,
q_b => s_q_b);
 
s_data_a <= icpx2stlv(data_a);
s_data_b <= icpx2stlv(data_b);
q_a <= stlv2icpx(s_q_a);
q_b <= stlv2icpx(s_q_b);
end rtl;
/versatile_fft/trunk/multiple_units/test_fft.m
0,0 → 1,57
% Modify the length of the FFT in the line below
log2fftlen = 4;
% If you modify the number of bits used to represent
% real and imaginary part of the complex number,
% you should also modify the ICPX_WIDTH constant
% in the icpx_pkg.vhd file
icpx_width = 16;
% Do not modify below
% Write the package defining length of the FFT
fo=fopen("src/fft_len.vhd","w");
fprintf(fo,"package fft_len is\n");
fprintf(fo,"constant LOG2_FFT_LEN : integer := %d;\n",log2fftlen);
fprintf(fo,"constant FFT_LEN : integer := 2 ** LOG2_FFT_LEN;\n");
fprintf(fo,"constant ICPX_WIDTH : integer := %d;\n",icpx_width);
fprintf(fo,"end fft_len;\n");
fclose(fo)
fftlen=2 ** log2fftlen;
%Generate the data. Now it is only a noise, but you
%can generate something with periodic components
%It is important, that values fit in range of representation
%(-2,2) for standard implementation.
%May be changed if you redefine our icpx_number format
%To check that calculation of spectrum for overlapping windows
%works correctly, we generate a longer data stream...
len_of_data=fftlen*5
re=3*rand(1,len_of_data)-1.5;
im=3*rand(1,len_of_data)-1.5;
fo=fopen("data_in.txt","w");
for i=1:len_of_data
fprintf(fo,"%g %g\n",re(i),im(i));
end
fclose(fo)
%Create the Hann window.
%Remember, that you must use the same window function
%in your VHDL code!
x=0:(fftlen-1);
hann=0.5*(1-cos(2*pi*x/(fftlen-1)));
%Now we calculate the FFT in octave
scale = 2 ** (icpx_width-2);
fo=fopen("data_oct.txt","w");
for i=1:(fftlen/2):(len_of_data-fftlen)
x=i:(i+fftlen-1);
di = (re(x)+j*im(x))*scale/fftlen;
fr = fft(di.*hann);
% fr = fft(di);
fprintf(fo,"FFT RESULT BEGIN\n")
for k=1:fftlen
fprintf(fo,"%d %d\n",floor(real(fr(k))),floor(imag(fr(k))));
end
fprintf(fo,"FFT RESULT END\n")
end
fclose(fo)
%Run the simulation
system("make clean; make")
%Compare results calculated in octave and in our IP core
system("vim -d data_oct.txt data_out.txt")
/versatile_fft/trunk/multiple_units/makefile
0,0 → 1,37
VHDLS = \
src/fft_len.vhd \
src/icpx_pkg.vhd \
src/fft_support.vhd \
src/dpram_rbw_inf.vhd \
src/icpxram_rbw.vhd \
src/butterfly_d3.vhd \
src/icpx_mul_d3.vhd \
src/dpram_inf.vhd \
src/fft_data_switch.vhd \
src/fft_engine.vhd \
src/fft_engine_tb.vhd \
 
 
#STD=standard
STD=synopsys
VSTD=93c
ENTITY=fft_engine_tb
#RUN_OPTIONS= --stop-time=1000ns --wave=${ENTITY}.ghw
RUN_OPTIONS= --wave=${ENTITY}.ghw
#--trace-signals --trace-processes
#RUN_OPTIONS=
#--trace-processes
all: ${ENTITY}.ghw
reader: ${ENTITY} ${ENTITY}.ghw
gtkwave ${ENTITY}.ghw ${ENTITY}.sav
${ENTITY}: ${VHDLS}
# vhdlp -work fmf fmf/*.vhd
ghdl -a -g --mb-comments --workdir=comp --std=${VSTD} --ieee=${STD} ${VHDLS}
ghdl -e -g --mb-comments --workdir=comp --std=${VSTD} -fexplicit --ieee=${STD} ${ENTITY}
${ENTITY}.ghw: ${ENTITY}
# ./${ENTITY} --wave=${ENTITY}.ghw ${RUN_OPTIONS} --stop-time=50000ns 2>&1 > res.txt
./${ENTITY} ${RUN_OPTIONS}
#> res.txt 2>&1
clean:
rm -f comp/* *.o *.vcd *.ghw events* ${ENTITY}
/versatile_fft/trunk/multiple_units/comp/keep_me
0,0 → 1,37
I'm here just to ensure creation of directory...

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.