OpenCores
URL https://opencores.org/ocsvn/spi_master_slave/spi_master_slave/trunk

Subversion Repositories spi_master_slave

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /spi_master_slave/trunk/rtl
    from Rev 4 to Rev 5
    Reverse comparison

Rev 4 → Rev 5

/spi_loopback.ucf
1,7 → 1,5
 
#Created by Constraints Editor (xc6slx45t-csg484-3) - 2011/06/08
NET "m_spi_2x_clk_i" TNM_NET = m_spi_2x_clk_i;
TIMESPEC TS_m_spi_2x_clk_i = PERIOD "m_spi_2x_clk_i" 15 ns HIGH 50%;
NET "s_clk_i" TNM_NET = s_clk_i;
TIMESPEC TS_s_clk_i = PERIOD "s_clk_i" 8 ns HIGH 50%;
#Created by Constraints Editor (xc6slx45t-csg484-3) - 2011/06/08
/spi_loopback.vhd
9,6 → 9,8
-- Target Devices:
-- Tool versions:
-- Description:
-- This is a simple wrapper for the 'spi_master' and 'spi_slave' cores, to synthesize the 2 cores and
-- test them in the simulator.
--
-- Dependencies:
--
20,6 → 22,9
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
 
library work;
use work.all;
 
-- Uncomment the following library declaration if using
-- arithmetic functions with Signed or Unsigned values
--use IEEE.NUMERIC_STD.ALL;
29,8 → 34,6
--library UNISIM;
--use UNISIM.VComponents.all;
 
--library WORK;
--use WORK.ALL;
 
entity spi_loopback is
Generic (
37,11 → 40,11
N : positive := 32; -- 32bit serial word length is default
CPOL : std_logic := '0'; -- SPI mode selection (mode 0 default)
CPHA : std_logic := '1'; -- CPOL = clock polarity, CPHA = clock phase.
PREFETCH : positive := 2 -- prefetch lookahead cycles
PREFETCH : positive := 2; -- prefetch lookahead cycles
SPI_2X_CLK_DIV : positive := 5 -- for a 100MHz sclk_i, yields a 10MHz SCK
);
Port(
----------------MASTER-----------------------
m_spi_2x_clk_i : IN std_logic;
m_clk_i : IN std_logic;
m_rst_i : IN std_logic;
m_spi_ssel_o : OUT std_logic;
84,125 → 87,62
end spi_loopback;
 
architecture Structural of spi_loopback is
begin
 
COMPONENT spi_master
GENERIC (
N : positive := 32;
CPOL : std_logic := '0';
CPHA : std_logic := '1';
PREFETCH : positive := 2
);
PORT(
spi_2x_clk_i : IN std_logic;
clk_i : IN std_logic;
rst_i : IN std_logic;
spi_ssel_o : OUT std_logic;
spi_sck_o : OUT std_logic;
spi_mosi_o : OUT std_logic;
spi_miso_i : IN std_logic;
di_req_o : OUT std_logic;
di_i : IN std_logic_vector(N-1 downto 0);
wren_i : IN std_logic;
do_valid_o : OUT std_logic;
do_o : OUT std_logic_vector(N-1 downto 0);
----- debug -----
do_transfer_o : OUT std_logic;
wren_o : OUT std_logic;
wren_ack_o : OUT std_logic;
rx_bit_reg_o : OUT std_logic;
state_dbg_o : OUT std_logic_vector(5 downto 0);
core_clk_o : OUT std_logic;
core_n_clk_o : OUT std_logic;
sh_reg_dbg_o : OUT std_logic_vector(N-1 downto 0)
);
END COMPONENT;
--=============================================================================================
-- Component instantiation for the SPI master port
--=============================================================================================
Inst_spi_master: entity work.spi_master(rtl)
generic map (N => N, CPOL => CPOL, CPHA => CPHA, PREFETCH => PREFETCH, SPI_2X_CLK_DIV => SPI_2X_CLK_DIV)
port map(
sclk_i => m_clk_i, -- system clock is used for serial and parallel ports
pclk_i => m_clk_i,
rst_i => m_rst_i,
spi_ssel_o => m_spi_ssel_o,
spi_sck_o => m_spi_sck_o,
spi_mosi_o => m_spi_mosi_o,
spi_miso_i => m_spi_miso_i,
di_req_o => m_di_req_o,
di_i => m_di_i,
wren_i => m_wren_i,
do_valid_o => m_do_valid_o,
do_o => m_do_o,
----- debug -----
do_transfer_o => m_do_transfer_o,
wren_o => m_wren_o,
wren_ack_o => m_wren_ack_o,
rx_bit_reg_o => m_rx_bit_reg_o,
state_dbg_o => m_state_dbg_o,
core_clk_o => m_core_clk_o,
core_n_clk_o => m_core_n_clk_o,
sh_reg_dbg_o => m_sh_reg_dbg_o
);
 
COMPONENT spi_slave
GENERIC (
N : positive := 32;
CPOL : std_logic := '0';
CPHA : std_logic := '1';
PREFETCH : positive := 2
--=============================================================================================
-- Component instantiation for the SPI slave port
--=============================================================================================
Inst_spi_slave: entity work.spi_slave(rtl)
generic map (N => N, CPOL => CPOL, CPHA => CPHA, PREFETCH => PREFETCH)
port map(
clk_i => s_clk_i,
spi_ssel_i => s_spi_ssel_i,
spi_sck_i => s_spi_sck_i,
spi_mosi_i => s_spi_mosi_i,
spi_miso_o => s_spi_miso_o,
di_req_o => s_di_req_o,
di_i => s_di_i,
wren_i => s_wren_i,
do_valid_o => s_do_valid_o,
do_o => s_do_o,
----- debug -----
do_transfer_o => s_do_transfer_o,
wren_o => s_wren_o,
wren_ack_o => s_wren_ack_o,
rx_bit_reg_o => s_rx_bit_reg_o,
state_dbg_o => s_state_dbg_o
-- sh_reg_dbg_o => s_sh_reg_dbg_o
);
PORT(
clk_i : IN std_logic;
spi_ssel_i : IN std_logic;
spi_sck_i : IN std_logic;
spi_mosi_i : IN std_logic;
spi_miso_o : OUT std_logic;
di_req_o : OUT std_logic;
di_i : IN std_logic_vector(N-1 downto 0);
wren_i : IN std_logic;
do_valid_o : OUT std_logic;
do_o : OUT std_logic_vector(N-1 downto 0);
----- debug -----
do_transfer_o : OUT std_logic;
wren_o : OUT std_logic;
wren_ack_o : OUT std_logic;
rx_bit_reg_o : OUT std_logic;
state_dbg_o : OUT std_logic_vector(5 downto 0)
-- sh_reg_dbg_o : OUT std_logic_vector(N-1 downto 0)
);
END COMPONENT;
 
begin
 
Inst_spi_master: spi_master
GENERIC MAP (
N => N,
CPOL => CPOL,
CPHA => CPHA,
PREFETCH => PREFETCH)
PORT MAP(
spi_2x_clk_i => m_spi_2x_clk_i,
clk_i => m_clk_i,
rst_i => m_rst_i,
spi_ssel_o => m_spi_ssel_o,
spi_sck_o => m_spi_sck_o,
spi_mosi_o => m_spi_mosi_o,
spi_miso_i => m_spi_miso_i,
di_req_o => m_di_req_o,
di_i => m_di_i,
wren_i => m_wren_i,
do_valid_o => m_do_valid_o,
do_o => m_do_o,
----- debug -----
do_transfer_o => m_do_transfer_o,
wren_o => m_wren_o,
wren_ack_o => m_wren_ack_o,
rx_bit_reg_o => m_rx_bit_reg_o,
state_dbg_o => m_state_dbg_o,
core_clk_o => m_core_clk_o,
core_n_clk_o => m_core_n_clk_o,
sh_reg_dbg_o => m_sh_reg_dbg_o
);
 
Inst_spi_slave: spi_slave
GENERIC MAP (
N => N,
CPOL => CPOL,
CPHA => CPHA,
PREFETCH => PREFETCH)
PORT MAP(
clk_i => s_clk_i,
spi_ssel_i => s_spi_ssel_i,
spi_sck_i => s_spi_sck_i,
spi_mosi_i => s_spi_mosi_i,
spi_miso_o => s_spi_miso_o,
di_req_o => s_di_req_o,
di_i => s_di_i,
wren_i => s_wren_i,
do_valid_o => s_do_valid_o,
do_o => s_do_o,
----- debug -----
do_transfer_o => s_do_transfer_o,
wren_o => s_wren_o,
wren_ack_o => s_wren_ack_o,
rx_bit_reg_o => s_rx_bit_reg_o,
state_dbg_o => s_state_dbg_o
-- sh_reg_dbg_o => s_sh_reg_dbg_o
);
 
end Structural;
 
 
/spi_master.vhd
9,14 → 9,29
-- Description:
--
-- This block is the SPI master interface, implemented in one single entity.
-- All internal core operations are synchronous to a spi base clock, that generates the spi sck clock directly.
-- All parallel i/o interface operations are synchronous to a system clock, that can be asynchronous to the spi base clock.
-- Fully pipelined circuitry guarantees that no setup artifacts occur on the buffers that are accessed by the two clock domains.
-- All internal core operations are synchronous to the 'sclk_i', and a spi base clock is generated by dividing sclk_i downto
-- a frequency that is 2x the spi SCK line frequency. The divider value is passed as a generic parameter during instantiation.
-- All parallel i/o interface operations are synchronous to the 'pclk_i' high speed clock, that can be asynchronous to the serial
-- 'sclk_i' clock.
-- Fully pipelined cross-clock circuitry guarantees that no setup artifacts occur on the buffers that are accessed by the two
-- clock domains.
-- The block is very simple to use, and has parallel inputs and outputs that behave like a synchronous memory i/o.
-- It is parameterizable via generics for the data width ('N'), SPI mode (CPHA and CPOL), and lookahead prefetch
-- signaling ('PREFETCH').
-- It is parameterizable via generics for the data width ('N'), SPI mode (CPHA and CPOL), lookahead prefetch signaling
-- ('PREFETCH'), and spi base clock division from sclk_i ('SPI_2X_CLK_DIV').
--
-- SPI CLOCK GENERATION
-- ====================
--
-- The clock generation for the SPI SCK is derived from the high-speed 'sclk_i' clock. The core divides this reference
-- clock to form the SPI base clock, by the 'SPI_2X_CLK_DIV' generic parameter. The user must set the divider value for the
-- SPI_2X clock, which is 2x the desired SCK frequency.
-- All registers in the core are clocked by the high-speed clocks, and clock enables are used to run the FSM and other logic
-- at lower rates. This architecture preserves FPGA clock resources like global clock buffers, and avoids path delays caused
-- by combinatorial clock dividers outputs.
-- The core has async clock domain circuitry to handle asynchronous clocks for the SPI and parallel interfaces.
--
-- PARALLEL WRITE INTERFACE
-- ========================
-- The parallel interface has an input port 'di_i' and an output port 'do_o'.
-- Parallel load is controlled using 3 signals: 'di_i', 'di_req_o' and 'wren_i'. 'di_req_o' is a look ahead data request line,
-- that is set 'PREFETCH' clock cycles in advance to synchronize a pipelined memory or fifo to present the
33,20 → 48,21
-- PARALLEL WRITE SEQUENCE
-- =======================
-- __ __ __ __ __ __ __
-- clk_i __/ \__/ \__/ \__/ \__/ \__/ \__/ \... -- parallel interface clock
-- pclk_i __/ \__/ \__/ \__/ \__/ \__/ \__/ \... -- parallel interface clock
-- ___________
-- di_req_o ________/ \_____________________... -- 'di_req_o' asserted on rising edge of 'clk_i'
-- di_req_o ________/ \_____________________... -- 'di_req_o' asserted on rising edge of 'pclk_i'
-- ______________ ___________________________...
-- di_i __old_data____X______new_data_____________... -- user circuit loads data on 'di_i' at next 'clk_i' rising edge
-- di_i __old_data____X______new_data_____________... -- user circuit loads data on 'di_i' at next 'pclk_i' rising edge
-- _______
-- wren_i __________________________/ \_______... -- user strobes 'wren_i' for one cycle of 'clk_i'
-- wren_i __________________________/ \_______... -- user strobes 'wren_i' for one cycle of 'pclk_i'
--
--
-- PARALLEL READ INTERFACE
-- =======================
-- An internal buffer is used to copy the internal shift register data to drive the 'do_o' port. When a complete word is received,
-- the core shift register is transferred to the buffer, at the rising edge of the spi clock, 'spi_2x_clk_i'.
-- The signal 'do_valid_o' is set one 'spi_2x_clk_i' clock after, to directly drive a synchronous memory or fifo write enable.
-- 'do_valid_o' is synchronous to the parallel interface clock, and changes only on rising edges of 'clk_i'.
-- 'do_valid_o' is synchronous to the parallel interface clock, and changes only on rising edges of 'pclk_i'.
-- When the interface is idle, data at the 'do_o' port holds the last word received.
--
-- PARALLEL READ SEQUENCE
54,12 → 70,12
-- ______ ______ ______ ______
-- spi_2x_clk_i bit1 \______/ bitN \______/bitN-1\______/bitN-2\__... -- spi 2x base clock
-- _ __ __ __ __ __ __ __ __
-- clk_i \__/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \_... -- parallel interface clock
-- pclk_i \__/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \_... -- parallel interface clock (may be async to sclk_i)
-- _____________ _____________________________________... -- 1) rx data is transferred to 'do_buffer_reg'
-- do_o ___old_data__X__________new_data___________________... -- after last rx bit, at rising 'spi_2x_clk_i'.
-- ____________
-- do_valid_o ____________________________/ \_________... -- 2) 'do_valid_o' strobed for 2 'clk_i' cycles
-- -- on the 3rd 'clk_i' rising edge.
-- do_valid_o ____________________________/ \_________... -- 2) 'do_valid_o' strobed for 2 'pclk_i' cycles
-- -- on the 3rd 'pclk_i' rising edge.
--
--
-- The propagation delay of spi_sck_o and spi_mosi_o, referred to the internal clock, is balanced by similar path delays,
120,20 → 136,29
--
--
-----------------------------------------------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.NUMERIC_STD.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use ieee.std_logic_unsigned.all;
 
--================================================================================================================
-- There are several output ports that are used to simulate and verify the core operation.
-- Do not map any signals to the unused ports, and the synthesis tool will remove the related interfacing
-- circuitry.
-- The same is valid for the transmit and receive ports. If the receive ports are not mapped, the
-- synthesis tool will remove the receive logic from the generated circuitry.
--================================================================================================================
 
entity spi_master is
Generic (
N : positive := 32; -- 32bit serial word length is default
CPOL : std_logic := '0'; -- SPI mode selection (mode 0 default)
CPHA : std_logic := '0'; -- CPOL = clock polarity, CPHA = clock phase.
PREFETCH : positive := 2); -- prefetch lookahead cycles
PREFETCH : positive := 2; -- prefetch lookahead cycles
SPI_2X_CLK_DIV : positive := 5); -- for a 100MHz sclk_i, yields a 10MHz SCK
Port (
spi_2x_clk_i : in std_logic := 'X'; -- spi base reference clock: 2x 'spi_sck_o'
clk_i : in std_logic := 'X'; -- parallel interface clock
sclk_i : in std_logic := 'X'; -- high-speed serial interface system clock
pclk_i : in std_logic := 'X'; -- high-speed parallel interface system clock
rst_i : in std_logic := 'X'; -- reset core
spi_ssel_o : out std_logic; -- spi bus slave select line
spi_sck_o : out std_logic; -- spi bus sck
152,6 → 177,8
state_dbg_o : out std_logic_vector (5 downto 0); -- debug: internal state register
core_clk_o : out std_logic;
core_n_clk_o : out std_logic;
core_ce_o : out std_logic;
core_n_ce_o : out std_logic;
sh_reg_dbg_o : out std_logic_vector (N-1 downto 0) -- debug: internal shift register
);
end spi_master;
160,15 → 187,19
-- this architecture is a pipelined register-transfer description.
-- all signals are clocked at the rising edge of the system clock 'spi_2x_clk_i'.
--================================================================================================================
architecture RTL of spi_master is
architecture rtl of spi_master is
-- core clocks, generated from 'spi_2x_clk_i': initialized to differential values
signal core_clk : std_logic := '0'; -- continuous fsm core clock, positive logic
signal core_n_clk : std_logic := '1'; -- continuous fsm core clock, negative logic
signal core_clk : std_logic := '0'; -- continuous core clock, positive logic
signal core_n_clk : std_logic := '1'; -- continuous core clock, negative logic
signal core_ce : std_logic := '0'; -- core clock enable, positive logic
signal core_n_ce : std_logic := '1'; -- core clock enable, negative logic
-- spi bus clock, generated from the CPOL selected core clock polarity
signal spi_clk : std_logic; -- spi bus output clock
-- core fsm clock
signal fsm_clk : std_logic; -- data change clock: fsm registers clocked at rising edge
signal samp_clk : std_logic; -- data sampling clock: input serial data clocked at rising edge
signal spi_2x_ce : std_logic := '1'; -- spi_2x clock enable
signal spi_clk : std_logic := '0'; -- spi bus output clock
signal spi_clk_reg : std_logic := '0'; -- output pipeline delay for spi sck
-- core fsm clock enables
signal fsm_ce : std_logic := '1'; -- fsm clock enable
signal samp_ce : std_logic := '1'; -- data sampling clock enable
--
-- GLOBAL RESET:
-- all signals are initialized to zero at GSR (global set/reset) by giving explicit
179,17 → 210,17
-- By using GSR for the initialization, and reducing RESET local init to the bare
-- essential, the model achieves better LUT/FF packing and CLB usability.
--
-- internal state signals for register and combinational stages
-- internal state signals for register and combinatorial stages
signal state_next : natural range N+1 downto 0 := 0;
signal state_reg : natural range N+1 downto 0 := 0;
-- shifter signals for register and combinational stages
-- shifter signals for register and combinatorial stages
signal sh_next : std_logic_vector (N-1 downto 0) := (others => '0');
signal sh_reg : std_logic_vector (N-1 downto 0) := (others => '0');
-- input bit sampled buffer
signal rx_bit_reg : std_logic := '0';
-- buffered di_i data signals for register and combinational stages
-- buffered di_i data signals for register and combinatorial stages
signal di_reg : std_logic_vector (N-1 downto 0) := (others => '0');
-- internal wren_i stretcher for fsm combinational stage
-- internal wren_i stretcher for fsm combinatorial stage
signal wren : std_logic := '0';
signal wren_ack_next : std_logic := '0';
signal wren_ack_reg : std_logic := '0';
199,7 → 230,7
-- internal SCK enable control signals
signal ena_sck_next : std_logic := '0';
signal ena_sck_reg : std_logic := '0';
-- buffered do_o data signals for register and combinational stages
-- buffered do_o data signals for register and combinatorial stages
signal do_buffer_next : std_logic_vector (N-1 downto 0) := (others => '0');
signal do_buffer_reg : std_logic_vector (N-1 downto 0) := (others => '0');
-- internal signal to flag transfer to do_buffer_reg
208,19 → 239,19
-- internal input data request signal
signal di_req_next : std_logic := '0';
signal di_req_reg : std_logic := '0';
-- cross-clock do_valid_o pipeline
signal do_valid_next : std_logic := '0';
-- cross-clock do_transfer_reg -> do_valid_o_reg pipeline
signal do_valid_A : std_logic := '0';
signal do_valid_B : std_logic := '0';
signal do_valid_C : std_logic := '0';
signal do_valid_D : std_logic := '0';
signal do_valid_next : std_logic := '0';
signal do_valid_o_reg : std_logic := '0';
-- cross-clock di_req_o pipeline
signal di_req_o_next : std_logic := '1';
-- cross-clock di_req_reg -> di_req_o_reg pipeline
signal di_req_o_A : std_logic := '0';
signal di_req_o_B : std_logic := '0';
signal di_req_o_C : std_logic := '0';
signal di_req_o_D : std_logic := '0';
signal di_req_o_next : std_logic := '1';
signal di_req_o_reg : std_logic := '1';
begin
--=============================================================================================
228,37 → 259,69
--=============================================================================================
-- minimum word width is 8 bits
assert N >= 8
report "Generic parameter 'N' error: SPI shift register size needs to be 8 bits minimum"
report "Generic parameter 'N' (shift register size) needs to be 8 bits minimum"
severity FAILURE;
-- minimum prefetch lookahead check
assert PREFETCH >= 2
report "Generic parameter 'PREFETCH' error: needs to be 1 minimum"
report "Generic parameter 'PREFETCH' (lookahead count) needs to be 1 minimum"
severity FAILURE;
-- maximum prefetch lookahead check
assert PREFETCH <= N-5
report "Generic parameter 'PREFETCH' error: lookahead count out of range, needs to be N-5 maximum"
report "Generic parameter 'PREFETCH' (lookahead count) out of range, needs to be N-5 maximum"
severity FAILURE;
-- SPI_2X_CLK_DIV clock divider value must not be zero
assert SPI_2X_CLK_DIV > 0
report "Generic parameter 'SPI_2X_CLK_DIV' must not be zero"
severity FAILURE;
 
--=============================================================================================
-- CLOCK GENERATION
--=============================================================================================
-- In order to preserve global clocking resources, the core clocking scheme is completely based
-- on using clock enables to process the serial high-speed clock at lower rates for the core fsm,
-- the spi clock generator and the input sampling clock.
-- The clock generation block derive 2 continuous antiphase signals from the 2x spi base clock
-- for the core clocking.
-- The 2 clock phases are generated by sepparate and synchronous FFDs, and should have only
-- The 2 clock phases are generated by sepparate and synchronous FFs, and should have only
-- interconnect delays.
-- The clock phase is selected for serial input sampling, fsm clocking, and spi SCK output, based
-- on the configuration of CPOL and CPHA.
-- Clock enable signals are generated with the same phase as the 2 core clocks, and these clock
-- enables are used to control clocking of all internal synchronous circuitry.
-- The clock enable phase is selected for serial input sampling, fsm clocking, and spi SCK output,
-- based on the configuration of CPOL and CPHA.
-- Each phase is selected so that all the registers can be clocked with a rising edge on all SPI
-- modes.
-- modes, by a single high-speed global clock, preserving clock resources.
-----------------------------------------------------------------------------------------------
-- divide down 'spi_2x_clk_i' by 2
-- this should be synthesized as two synchronous FFDs
core_clock_gen_proc : process (spi_2x_clk_i) is
-- generate the core clock enables from the serial high-speed input clock
spi_2x_ce_gen_proc: process (sclk_i) is
variable clk_cnt : integer range SPI_2X_CLK_DIV-1 downto 0 := 0;
begin
if spi_2x_clk_i'event and spi_2x_clk_i = '1' then
core_clk <= core_n_clk; -- divided by 2 clock, differential
core_n_clk <= not core_n_clk;
if sclk_i'event and sclk_i = '1' then
if clk_cnt = SPI_2X_CLK_DIV-1 then
spi_2x_ce <= '1';
clk_cnt := 0;
else
spi_2x_ce <= '0';
clk_cnt := clk_cnt + 1;
end if;
end if;
end process spi_2x_ce_gen_proc;
-----------------------------------------------------------------------------------------------
-- generate the core antiphase clocks and clock enables.
core_clock_gen_proc : process (sclk_i) is
begin
if sclk_i'event and sclk_i = '1' then
if spi_2x_ce = '1' then
-- generate the 2 antiphase core clocks
core_clk <= core_n_clk;
core_n_clk <= not core_n_clk;
-- generate the 2 phase core clock enables
core_ce <= core_n_clk;
core_n_ce <= not core_n_clk;
else
core_ce <= '0';
core_n_ce <= '0';
end if;
end if;
end process core_clock_gen_proc;
-----------------------------------------------------------------------------------------------
-- spi clk generator: generate spi_clk from core_clk depending on CPOL
273,29 → 336,29
spi_clk <= core_n_clk; -- for CPOL=1, spi clk has idle HIGH
end generate;
-----------------------------------------------------------------------------------------------
-- Sampling clock generation: generate 'samp_clk' from core_clk or core_n_clk depending on CPHA
-- always sample data at the half-cycle of the fsm update cell
smp_cpha_0_proc :
-- Sampling clock enable generation: generate 'samp_ce' from 'core_ce' or 'core_n_ce' depending on CPHA
-- always sample data at the half-cycle of the fsm update cell
samp_ce_cpha_0_proc :
if CPHA = '0' generate
begin
samp_clk <= core_clk;
samp_ce <= core_ce;
end generate;
smp_cpha_1_proc :
samp_ce_cpha_1_proc :
if CPHA = '1' generate
begin
samp_clk <= core_n_clk;
samp_ce <= core_n_ce;
end generate;
-----------------------------------------------------------------------------------------------
-- FSM clock generation: generate 'fsm_clock' from core_clk or core_n_clk depending on CPHA
fsm_cpha_0_proc :
-- FSM clock generation: generate 'fsm_ce' from core_ce or core_n_ce depending on CPHA
fsm_ce_cpha_0_proc :
if CPHA = '0' generate
begin
fsm_clk <= core_n_clk; -- for CPHA=0, latch registers at rising edge of negative core clock
fsm_ce <= core_n_ce; -- for CPHA=0, latch registers at rising edge of negative core clock enable
end generate;
fsm_cpha_1_proc :
fsm_ce_cpha_1_proc :
if CPHA = '1' generate
begin
fsm_clk <= core_clk; -- for CPHA=1, latch registers at rising edge of positive core clock
fsm_ce <= core_ce; -- for CPHA=1, latch registers at rising edge of positive core clock enable
end generate;
 
--=============================================================================================
306,11 → 369,13
-- ATTENTION: REMOVING THE FLIPFLOP (DIRECT CONNECTION) WE GET HIGHER PERFORMANCE DUE TO
-- REDUCED DEMAND ON MISO SETUP TIME.
--
rx_bit_proc : process (samp_clk, spi_miso_i) is
rx_bit_proc : process (sclk_i) is
begin
-- if samp_clk'event and samp_clk = '1' then -- uncomment to have the input register
rx_bit_reg <= spi_miso_i;
-- end if; -- uncomment to have the input register
if sclk_i'event and sclk_i = '1' then
if samp_ce = '1' then
rx_bit_reg <= spi_miso_i;
end if;
end if;
end process rx_bit_proc;
 
--=============================================================================================
317,25 → 382,27
-- RTL REGISTER PROCESSES
--=============================================================================================
-- fsm state and data registers: synchronous to the spi base reference clock
core_reg_proc : process (fsm_clk) is
core_reg_proc : process (sclk_i) is
begin
-- FFD registers clocked on rising edge and cleared on sync rst_i
if fsm_clk'event and fsm_clk = '1' then
-- FF registers clocked on rising edge and cleared on sync rst_i
if sclk_i'event and sclk_i = '1' then
if rst_i = '1' then -- sync reset
state_reg <= 0; -- only provide local reset for the state machine
else
elsif fsm_ce = '1' then -- fsm_ce is clock enable for the fsm
state_reg <= state_next; -- state register
end if;
end if;
-- FFD registers clocked on rising edge
if fsm_clk'event and fsm_clk = '1' then
sh_reg <= sh_next; -- shift register
ena_ssel_reg <= ena_ssel_next; -- spi select enable
ena_sck_reg <= ena_sck_next; -- spi clock enable
do_buffer_reg <= do_buffer_next; -- registered output data buffer
do_transfer_reg <= do_transfer_next; -- output data transferred to buffer
di_req_reg <= di_req_next; -- input data request
wren_ack_reg <= wren_ack_next; -- wren ack for data load synchronization
-- FF registers clocked on rising edge
if sclk_i'event and sclk_i = '1' then
if fsm_ce = '1' then
sh_reg <= sh_next; -- shift register
ena_ssel_reg <= ena_ssel_next; -- spi select enable
ena_sck_reg <= ena_sck_next; -- spi clock enable
do_buffer_reg <= do_buffer_next; -- registered output data buffer
do_transfer_reg <= do_transfer_next; -- output data transferred to buffer
di_req_reg <= di_req_next; -- input data request
wren_ack_reg <= wren_ack_next; -- wren ack for data load synchronization
end if;
end if;
end process core_reg_proc;
 
345,24 → 412,24
-- do_valid_o and di_req_o strobe output logic
-- this is a delayed pulse generator with a ripple-transfer FFD pipeline, that generates a
-- fixed-length delayed pulse for the output flags, at the parallel clock domain
out_transfer_proc : process ( clk_i, do_transfer_reg, di_req_reg,
out_transfer_proc : process ( pclk_i, do_transfer_reg, di_req_reg,
do_valid_A, do_valid_B, do_valid_D,
di_req_o_A, di_req_o_B, di_req_o_D) is
di_req_o_A, di_req_o_B, di_req_o_D ) is
begin
if clk_i'event and clk_i = '1' then -- clock at parallel port clock
if pclk_i'event and pclk_i = '1' then -- clock at parallel port clock
-- do_transfer_reg -> do_valid_o_reg
do_valid_A <= do_transfer_reg; -- the input signal must be at least 2 clocks long
do_valid_B <= do_valid_A; -- feed it to a ripple chain of FFDs
do_valid_A <= do_transfer_reg; -- the input signal must be at least 2 clocks long
do_valid_B <= do_valid_A; -- feed it to a ripple chain of FFDs
do_valid_C <= do_valid_B;
do_valid_D <= do_valid_C;
do_valid_o_reg <= do_valid_next; -- registered output pulse
do_valid_o_reg <= do_valid_next; -- registered output pulse
--------------------------------
-- di_req_reg -> di_req_o_reg
di_req_o_A <= di_req_reg; -- the input signal must be at least 2 clocks long
di_req_o_B <= di_req_o_A; -- feed it to a ripple chain of FFDs
di_req_o_C <= di_req_o_B;
di_req_o_D <= di_req_o_C;
di_req_o_reg <= di_req_o_next; -- registered output pulse
di_req_o_A <= di_req_reg; -- the input signal must be at least 2 clocks long
di_req_o_B <= di_req_o_A; -- feed it to a ripple chain of FFDs
di_req_o_C <= di_req_o_B;
di_req_o_D <= di_req_o_C;
di_req_o_reg <= di_req_o_next; -- registered output pulse
end if;
-- generate a 2-clocks pulse at the 3rd clock cycle
do_valid_next <= do_valid_A and do_valid_B and not do_valid_D;
369,19 → 436,19
di_req_o_next <= di_req_o_A and di_req_o_B and not di_req_o_D;
end process out_transfer_proc;
-- parallel load input registers: data register and write enable
in_transfer_proc: process (clk_i, wren_i, wren_ack_reg) is
in_transfer_proc: process ( pclk_i, wren_i, wren_ack_reg ) is
begin
-- registered data input, input register with clock enable
if clk_i'event and clk_i = '1' then
if pclk_i'event and pclk_i = '1' then
if wren_i = '1' then
di_reg <= di_i; -- parallel data input buffer register
di_reg <= di_i; -- parallel data input buffer register
end if;
end if;
-- stretch wren pulse to be detected by spi fsm (ffd with sync preset and sync reset)
if clk_i'event and clk_i = '1' then
if wren_i = '1' then -- wren_i is the sync preset for wren
if pclk_i'event and pclk_i = '1' then
if wren_i = '1' then -- wren_i is the sync preset for wren
wren <= '1';
elsif wren_ack_reg = '1' then -- wren_ack is the sync reset for wren
elsif wren_ack_reg = '1' then -- wren_ack is the sync reset for wren
wren <= '0';
end if;
end if;
388,11 → 455,11
end process in_transfer_proc;
 
--=============================================================================================
-- RTL COMBINATIONAL LOGIC PROCESSES
-- RTL combinatorial LOGIC PROCESSES
--=============================================================================================
-- state and datapath combinational logic
-- state and datapath combinatorial logic
core_combi_proc : process ( sh_reg, state_reg, rx_bit_reg, ena_ssel_reg, ena_sck_reg, do_buffer_reg,
do_transfer_reg, di_reg, wren) is
do_transfer_reg, di_reg, wren ) is
begin
sh_next <= sh_reg; -- all output signals are assigned to (avoid latches)
ena_ssel_next <= ena_ssel_reg; -- controls the slave select line
458,19 → 525,21
do_valid_o_proc: do_valid_o <= do_valid_o_reg; -- copy registered do_valid_o to output
di_req_o_proc: di_req_o <= di_req_o_reg; -- copy registered di_req_o to output
-----------------------------------------------------------------------------------------------
-- SCK out logic: output mux for the SPI sck
--------------------------------------------
-- This is modelled as a mux instead of a register because it requires a FDCPE (ffd with preset and clear),
-- which generates very inneficient logic in Spartan-6. Instead, we have a mux that translates to a AND gate,
-- and can be optimized to a fast CLB gate.
spi_sck_gen_proc : process (ena_sck_reg, spi_clk) is
-- SCK out logic: pipeline phase compensation for the SCK line
-----------------------------------------------------------------------------------------------
-- This is a MUX with an output register. The register gives us a pipeline delay for the SCK line,
-- enabling higher SCK frequency. The MOSI and SCK phase are compensated by the pipeline delay.
spi_sck_o_gen_proc : process (sclk_i, ena_sck_reg, spi_clk, spi_clk_reg) is
begin
if ena_sck_reg = '1' then
spi_sck_o <= spi_clk; -- copy the selected clock polarity
else
spi_sck_o <= CPOL; -- when clock disabled, set to idle polarity
if sclk_i'event and sclk_i = '1' then
if ena_sck_reg = '1' then
spi_clk_reg <= spi_clk; -- copy the selected clock polarity
else
spi_clk_reg <= CPOL; -- when clock disabled, set to idle polarity
end if;
end if;
end process spi_sck_gen_proc;
spi_sck_o <= spi_clk_reg; -- connect register to output
end process spi_sck_o_gen_proc;
 
--=============================================================================================
-- DEBUG LOGIC PROCESSES
481,9 → 550,11
rx_bit_reg_proc: rx_bit_reg_o <= rx_bit_reg;
wren_o_proc: wren_o <= wren;
wren_ack_o_proc: wren_ack_o <= wren_ack_reg;
sh_reg_dbg_proc: sh_reg_dbg_o <= sh_reg; -- export sh_reg to debug
sh_reg_dbg_proc: sh_reg_dbg_o <= sh_reg; -- export sh_reg to debug
core_clk_o_proc: core_clk_o <= core_clk;
core_n_clk_o_proc: core_n_clk_o <= core_n_clk;
core_ce_o_proc: core_ce_o <= core_ce;
core_n_ce_o_proc: core_n_ce_o <= core_n_ce;
 
end architecture RTL;
end architecture rtl;
 
/readme.txt
1,22 → 1,42
SPI_MASTER_SLAVE
----------------
 
 
This project was started from the need to have a robust yet simple SPI interface core
written in VHDL to use in generic FPGA-to-device interfacing.
The resulting cores generates very small and efficient circuits, that operate from very
slow SPI clocks up to over 50MHz SPI clocks.
 
 
VHDL files for spi master/slave project:
---------------------------------------
 
spi_master.vhd spi master module, can be used independently
spi_slave.vhd spi slave module, can be used independently
spi_loopback.vhd wrapper module for the master and slave modules
spi_loopback_test.vhd testbench for the loopback module, test master against slave
spi_loopback.ucf constraints file for Spartan-6, optimized for area, LUT compression.
spi_master.vhd spi master module, can be used independently
spi_slave.vhd spi slave module, can be used independently
spi_loopback.vhd wrapper module for simulating the master and slave modules
spi_loopback_test.vhd testbench for the loopback module, test master against slave
spi_loopback.ucf constraints for simulation: Spartan-6, area, LUT compression.
 
 
The original development is done in Xilinx ISE 13.1, targeted to a Spartan-6 device.
 
Verification was done in ISIM, after Place & Route, with default constraints, for the slowest
Spartan-6 device, synthesis generated 59 slices, and the design was tested at 40MHz for the
spi_2x_clk (20MHz spi SCK), and 125MHz for the parallel interfaces clocks.
With the attached .ucf file, optimized for area and using LUT compression, synthesis generated
44 slices, and design tested OK at 20MHz of SPI clock.
Spartan-6 device, synthesis generated 41 slices, and the design was simulated at 25MHz spi SCK, and 100MHz for the parallel interfaces clocks.
 
 
Design verification in silicon was done in a Digilent Atlys board, and the verification project can be found at the \trunk\syn directory, with all the required files to replicate the verification tests, including pinlock constraints for the Atlys board.
 
 
If you have any questions or usage issues with this core, please open a thread in OpenCores forum, and I will be pleased to answer.
 
If you find a bug or a design fault in the models, please open a bug in the OpenCores bugtracker for this project.
 
 
In any case, thank you for testing and using this core.
 
 
Jonny Doin
jdoin@opencores.org
 
 
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.