
Subversion Repositories spi_master_slave

Compare Revisions

  • This comparison shows the changes necessary to convert path
    from Rev 23 to Rev 24
    Reverse comparison

Rev 23 → Rev 24

/spi_loopback_test.vhd File deleted
/readme.txt File deleted
/spi_master.vhd File deleted
/grp_debouncer.vhd File deleted
/spi_loopback.ucf File deleted
/spi_loopback.vhd File deleted
0,0 → 1,115
#Created by Constraints Editor (xc6slx45t-csg484-3) - 2011/06/08
NET "s_clk_i" TNM_NET = s_clk_i;
TIMESPEC TS_s_clk_i = PERIOD "s_clk_i" 8 ns HIGH 50%;
#Created by Constraints Editor (xc6slx45t-csg484-3) - 2011/06/08
NET "m_clk_i" TNM_NET = m_clk_i;
TIMESPEC TS_m_clk_i = PERIOD "m_clk_i" 8 ns HIGH 50%;
NET "s_spi_sck_i" TNM_NET = s_spi_sck_i;
TIMESPEC TS_s_spi_sck_i = PERIOD "s_spi_sck_i" 30 ns HIGH 50%;
NET "m_spi_sck_o_OBUF" TNM_NET = m_spi_sck_o_OBUF;
TIMESPEC TS_m_spi_sck_o_OBUF = PERIOD "m_spi_sck_o_OBUF" 30 ns HIGH 50%;
NET "Inst_spi_master/core_n_clk" TNM_NET = Inst_spi_master/core_n_clk;
TIMESPEC TS_Inst_spi_master_core_n_clk = PERIOD "Inst_spi_master/core_n_clk" 30 ns HIGH 50%;
INST "m_di_i<0>" TNM = m_di;
INST "m_di_i<1>" TNM = m_di;
INST "m_di_i<2>" TNM = m_di;
INST "m_di_i<3>" TNM = m_di;
INST "m_di_i<4>" TNM = m_di;
INST "m_di_i<5>" TNM = m_di;
INST "m_di_i<6>" TNM = m_di;
INST "m_di_i<7>" TNM = m_di;
INST "m_di_i<8>" TNM = m_di;
INST "m_di_i<9>" TNM = m_di;
INST "m_di_i<10>" TNM = m_di;
INST "m_di_i<11>" TNM = m_di;
INST "m_di_i<12>" TNM = m_di;
INST "m_di_i<13>" TNM = m_di;
INST "m_di_i<14>" TNM = m_di;
INST "m_di_i<15>" TNM = m_di;
INST "m_di_i<16>" TNM = m_di;
INST "m_di_i<17>" TNM = m_di;
INST "m_di_i<18>" TNM = m_di;
INST "m_di_i<19>" TNM = m_di;
INST "m_di_i<20>" TNM = m_di;
INST "m_di_i<21>" TNM = m_di;
INST "m_di_i<22>" TNM = m_di;
INST "m_di_i<23>" TNM = m_di;
INST "m_di_i<24>" TNM = m_di;
INST "m_di_i<25>" TNM = m_di;
INST "m_di_i<26>" TNM = m_di;
INST "m_di_i<27>" TNM = m_di;
INST "m_di_i<28>" TNM = m_di;
INST "m_di_i<29>" TNM = m_di;
INST "m_di_i<30>" TNM = m_di;
INST "m_di_i<31>" TNM = m_di;
TIMEGRP "m_di" OFFSET = IN 8 ns VALID 8 ns BEFORE "m_clk_i" RISING;
INST "m_spi_miso_i" TNM = m_miso;
INST "s_di_i<0>" TNM = s_di;
INST "s_di_i<1>" TNM = s_di;
INST "s_di_i<2>" TNM = s_di;
INST "s_di_i<3>" TNM = s_di;
INST "s_di_i<4>" TNM = s_di;
INST "s_di_i<5>" TNM = s_di;
INST "s_di_i<6>" TNM = s_di;
INST "s_di_i<7>" TNM = s_di;
INST "s_di_i<8>" TNM = s_di;
INST "s_di_i<9>" TNM = s_di;
INST "s_di_i<10>" TNM = s_di;
INST "s_di_i<11>" TNM = s_di;
INST "s_di_i<12>" TNM = s_di;
INST "s_di_i<13>" TNM = s_di;
INST "s_di_i<14>" TNM = s_di;
INST "s_di_i<15>" TNM = s_di;
INST "s_di_i<16>" TNM = s_di;
INST "s_di_i<17>" TNM = s_di;
INST "s_di_i<18>" TNM = s_di;
INST "s_di_i<19>" TNM = s_di;
INST "s_di_i<20>" TNM = s_di;
INST "s_di_i<21>" TNM = s_di;
INST "s_di_i<22>" TNM = s_di;
INST "s_di_i<23>" TNM = s_di;
INST "s_di_i<24>" TNM = s_di;
INST "s_di_i<25>" TNM = s_di;
INST "s_di_i<26>" TNM = s_di;
INST "s_di_i<27>" TNM = s_di;
INST "s_di_i<28>" TNM = s_di;
INST "s_di_i<29>" TNM = s_di;
INST "s_di_i<30>" TNM = s_di;
INST "s_di_i<31>" TNM = s_di;
TIMEGRP "s_di" OFFSET = IN 8 ns VALID 8 ns BEFORE "s_clk_i" RISING;
INST "s_spi_mosi_i" TNM = s_mosi;
INST "m_do_o<0>" TNM = m_do;
INST "m_do_o<1>" TNM = m_do;
INST "m_do_o<2>" TNM = m_do;
INST "m_do_o<3>" TNM = m_do;
INST "m_do_o<4>" TNM = m_do;
INST "m_do_o<5>" TNM = m_do;
INST "m_do_o<6>" TNM = m_do;
INST "m_do_o<7>" TNM = m_do;
INST "m_do_o<8>" TNM = m_do;
INST "m_do_o<9>" TNM = m_do;
INST "m_do_o<10>" TNM = m_do;
INST "m_do_o<11>" TNM = m_do;
INST "m_do_o<12>" TNM = m_do;
INST "m_do_o<13>" TNM = m_do;
INST "m_do_o<14>" TNM = m_do;
INST "m_do_o<15>" TNM = m_do;
INST "m_do_o<16>" TNM = m_do;
INST "m_do_o<17>" TNM = m_do;
INST "m_do_o<18>" TNM = m_do;
INST "m_do_o<19>" TNM = m_do;
INST "m_do_o<20>" TNM = m_do;
INST "m_do_o<21>" TNM = m_do;
INST "m_do_o<22>" TNM = m_do;
INST "m_do_o<23>" TNM = m_do;
INST "m_do_o<24>" TNM = m_do;
INST "m_do_o<25>" TNM = m_do;
INST "m_do_o<26>" TNM = m_do;
INST "m_do_o<27>" TNM = m_do;
INST "m_do_o<28>" TNM = m_do;
INST "m_do_o<29>" TNM = m_do;
INST "m_do_o<30>" TNM = m_do;
INST "m_do_o<31>" TNM = m_do;
#Created by Constraints Editor (xc6slx45t-csg484-3) - 2011/06/09
INST "m_rx_bit_reg_o" TNM = m_rx_bit;
0,0 → 1,305
-- Company:
-- Engineer: Jonny Doin
-- Create Date: 22:59:18 04/25/2011
-- Design Name: spi_master_slave
-- Module Name: spi_master_slave/spi_loopback_test.vhd
-- Project Name: SPI_interface
-- Target Device: Spartan-6
-- Tool versions: ISE 13.1
-- Description: Testbench to simulate the master and slave SPI interfaces. Each module can be tested
-- in a "real" environment, where the 'spi_master' exchanges data with the 'spi_slave'
-- module, simulating the internal working of each design.
-- In behavioral simulation, select a matching data width (N) and spi mode (CPOL, CPHA) for
-- both modules, and also a different clock domain for each parallel interface.
-- Different values for PREFETCH for each interface can be tested, to model the best value
-- for the pipelined memory / bus that is attached to the di/do ports.
-- To test the parallel interfaces, a simple ROM memory is simulated for each interface, with
-- 8 words of data to be sent, synchronous to each clock and flow control signals.
-- VHDL Test Bench Created by ISE for modules: 'spi_master' and 'spi_slave'
-- Dependencies:
-- Revision:
-- Revision 0.01 - File Created
-- Revision 0.10 - Implemented FIFO simulation for each interface.
-- Additional Comments:
-- Notes:
-- This testbench has been automatically generated using types std_logic and
-- std_logic_vector for the ports of the unit under test. Xilinx recommends
-- that these types always be used for the top-level I/O of a design in order
-- to guarantee that the testbench will bind correctly to the post-implementation
-- simulation model.
USE ieee.std_logic_1164.ALL;
USE ieee.numeric_std.ALL;
library work;
use work.all;
ENTITY spi_loopback_test IS
N : positive := 32; -- 32bit serial word length is default
CPOL : std_logic := '0'; -- SPI mode selection (mode 0 default)
CPHA : std_logic := '1'; -- CPOL = clock polarity, CPHA = clock phase.
PREFETCH : positive := 2 -- prefetch lookahead cycles
END spi_loopback_test;
ARCHITECTURE behavior OF spi_loopback_test IS
-- Component declaration for the Unit Under Test (UUT)
COMPONENT spi_loopback
m_clk_i : IN std_logic;
m_rst_i : IN std_logic;
m_spi_miso_i : IN std_logic;
m_di_i : IN std_logic_vector(31 downto 0);
m_wren_i : IN std_logic;
s_clk_i : IN std_logic;
s_spi_ssel_i : IN std_logic;
s_spi_sck_i : IN std_logic;
s_spi_mosi_i : IN std_logic;
s_di_i : IN std_logic_vector(31 downto 0);
s_wren_i : IN std_logic;
m_spi_ssel_o : OUT std_logic;
m_spi_sck_o : OUT std_logic;
m_spi_mosi_o : OUT std_logic;
m_di_req_o : OUT std_logic;
m_do_valid_o : OUT std_logic;
m_do_o : OUT std_logic_vector(31 downto 0);
m_do_transfer_o : OUT std_logic;
m_wren_o : OUT std_logic;
m_wren_ack_o : OUT std_logic;
m_rx_bit_reg_o : OUT std_logic;
m_state_dbg_o : OUT std_logic_vector(5 downto 0);
m_core_clk_o : OUT std_logic;
m_core_n_clk_o : OUT std_logic;
m_sh_reg_dbg_o : OUT std_logic_vector(31 downto 0);
s_spi_miso_o : OUT std_logic;
s_di_req_o : OUT std_logic;
s_do_valid_o : OUT std_logic;
s_do_o : OUT std_logic_vector(31 downto 0);
s_do_transfer_o : OUT std_logic;
s_wren_o : OUT std_logic;
s_wren_ack_o : OUT std_logic;
s_rx_bit_reg_o : OUT std_logic;
s_state_dbg_o : OUT std_logic_vector(5 downto 0)
-- constants
constant fifo_memory_size : integer := 16;
-- types
type fifo_memory_type is array (0 to fifo_memory_size-1) of std_logic_vector (N-1 downto 0);
-- signals to connect the instances
-- internal clk and rst
signal m_clk : std_logic := '0'; -- clock domain for the master parallel interface. Must be faster than spi bus sck.
signal s_clk : std_logic := '0'; -- clock domain for the slave parallel interface. Must be faster than spi bus sck.
signal rst : std_logic := 'U';
-- spi bus wires
signal spi_sck : std_logic;
signal spi_ssel : std_logic;
signal spi_miso : std_logic;
signal spi_mosi : std_logic;
-- master parallel interface
signal di_m : std_logic_vector (N-1 downto 0) := (others => '0');
signal do_m : std_logic_vector (N-1 downto 0) := (others => 'U');
signal do_valid_m : std_logic;
signal do_transfer_m : std_logic;
signal di_req_m : std_logic;
signal wren_m : std_logic := '0';
signal wren_o_m : std_logic := 'U';
signal wren_ack_o_m : std_logic := 'U';
signal rx_bit_reg_m : std_logic;
signal state_m : std_logic_vector (5 downto 0);
signal core_clk_o_m : std_logic;
signal core_n_clk_o_m : std_logic;
signal sh_reg_m : std_logic_vector (N-1 downto 0) := (others => '0');
-- slave parallel interface
signal di_s : std_logic_vector (N-1 downto 0) := (others => '0');
signal do_s : std_logic_vector (N-1 downto 0);
signal do_valid_s : std_logic;
signal do_transfer_s : std_logic;
signal di_req_s : std_logic;
signal wren_s : std_logic := '0';
signal wren_o_s : std_logic := 'U';
signal wren_ack_o_s : std_logic := 'U';
signal rx_bit_reg_s : std_logic;
signal state_s : std_logic_vector (5 downto 0);
-- signal sh_reg_s : std_logic_vector (N-1 downto 0);
-- Clock period definitions
constant m_clk_period : time := 10 ns; -- 100MHz master parallel clock
constant s_clk_period : time := 10 ns; -- 100MHz slave parallel clock
-- Component instantiation for the Unit Under Test (UUT)
Inst_spi_loopback: spi_loopback
port map(
m_clk_i => m_clk,
m_rst_i => rst,
m_spi_ssel_o => spi_ssel,
m_spi_sck_o => spi_sck,
m_spi_mosi_o => spi_mosi,
m_spi_miso_i => spi_miso,
m_di_req_o => di_req_m,
m_di_i => di_m,
m_wren_i => wren_m,
m_do_valid_o => do_valid_m,
m_do_o => do_m,
----- debug -----
m_do_transfer_o => do_transfer_m,
m_wren_o => wren_o_m,
m_wren_ack_o => wren_ack_o_m,
m_rx_bit_reg_o => rx_bit_reg_m,
m_state_dbg_o => state_m,
m_core_clk_o => core_clk_o_m,
m_core_n_clk_o => core_n_clk_o_m,
m_sh_reg_dbg_o => sh_reg_m,
s_clk_i => s_clk,
s_spi_ssel_i => spi_ssel,
s_spi_sck_i => spi_sck,
s_spi_mosi_i => spi_mosi,
s_spi_miso_o => spi_miso,
s_di_req_o => di_req_s,
s_di_i => di_s,
s_wren_i => wren_s,
s_do_valid_o => do_valid_s,
s_do_o => do_s,
----- debug -----
s_do_transfer_o => do_transfer_s,
s_wren_o => wren_o_s,
s_wren_ack_o => wren_ack_o_s,
s_rx_bit_reg_o => rx_bit_reg_s,
s_state_dbg_o => state_s
-- s_sh_reg_dbg_o => sh_reg_s
-- Clock generator processes
m_clk_process : process
m_clk <= '0';
wait for m_clk_period/2;
m_clk <= '1';
wait for m_clk_period/2;
end process m_clk_process;
s_clk_process : process
s_clk <= '0';
wait for s_clk_period/2;
s_clk <= '1';
wait for s_clk_period/2;
end process s_clk_process;
-- rst_i process
rst <= '0', '1' after 20 ns, '0' after 100 ns;
-- Master interface process
master_tx_fifo_proc: process is
variable fifo_memory : fifo_memory_type :=
variable fifo_head : integer range 0 to fifo_memory_size-1;
-- synchronous rst_i
wait until rst = '1';
wait until m_clk'event and m_clk = '1';
di_m <= (others => '0');
wren_m <= '0';
fifo_head := 0;
wait until rst = '0';
wait until di_req_m = '1'; -- wait shift register request for data
-- load next fifo contents into shift register
for cnt in 0 to (fifo_memory_size/2)-1 loop
fifo_head := cnt; -- pre-compute next pointer
wait until m_clk'event and m_clk = '1'; -- sync fifo data load at next rising edge
di_m <= fifo_memory(fifo_head); -- place data into tx_data input bus
wait until m_clk'event and m_clk = '1'; -- sync fifo data load at next rising edge
wren_m <= '1'; -- write data into spi master
wait until m_clk'event and m_clk = '1'; -- sync fifo data load at next rising edge
wait until m_clk'event and m_clk = '1'; -- sync fifo data load at next rising edge
wren_m <= '0'; -- remove write enable signal
wait until di_req_m = '1'; -- wait shift register request for data
end loop;
wait until spi_ssel = '1';
wait for 2000 ns;
for cnt in (fifo_memory_size/2) to fifo_memory_size-1 loop
fifo_head := cnt; -- pre-compute next pointer
wait until m_clk'event and m_clk = '1'; -- sync fifo data load at next rising edge
di_m <= fifo_memory(fifo_head); -- place data into tx_data input bus
wait until m_clk'event and m_clk = '1'; -- sync fifo data load at next rising edge
wren_m <= '1'; -- write data into spi master
wait until m_clk'event and m_clk = '1'; -- sync fifo data load at next rising edge
wait until m_clk'event and m_clk = '1'; -- sync fifo data load at next rising edge
wren_m <= '0'; -- remove write enable signal
wait until di_req_m = '1'; -- wait shift register request for data
end loop;
end process master_tx_fifo_proc;
-- Slave interface process
slave_tx_fifo_proc: process is
variable fifo_memory : fifo_memory_type :=
variable fifo_head : integer range 0 to fifo_memory_size-1;
-- synchronous rst_i
wait until rst = '1';
wait until s_clk'event and s_clk = '1';
di_s <= (others => '0');
wren_s <= '0';
fifo_head := 0;
wait until rst = '0';
wait until di_req_s = '1'; -- wait shift register request for data
-- load next fifo contents into shift register
for cnt in 0 to fifo_memory_size-1 loop
fifo_head := cnt; -- pre-compute next pointer
wait until s_clk'event and s_clk = '1'; -- sync fifo data load at next rising edge
di_s <= fifo_memory(fifo_head); -- place data into tx_data input bus
wait until s_clk'event and s_clk = '1'; -- sync fifo data load at next rising edge
wren_s <= '1'; -- write data into shift register
wait until s_clk'event and s_clk = '1'; -- sync fifo data load at next rising edge
wait until s_clk'event and s_clk = '1'; -- sync fifo data load at next rising edge
wren_s <= '0'; -- remove write enable signal
wait until di_req_s = '1'; -- wait shift register request for data
end loop;
end process slave_tx_fifo_proc;
0,0 → 1,139
-- Company:
-- Engineer:
-- Create Date: 23:44:37 05/17/2011
-- Design Name:
-- Module Name: spi_loopback - Behavioral
-- Project Name:
-- Target Devices:
-- Tool versions:
-- Description:
-- This is a simple wrapper for the 'spi_master' and 'spi_slave' cores, to synthesize the 2 cores and
-- test them in the simulator.
-- Dependencies:
-- Revision:
-- Revision 0.01 - File Created
-- Additional Comments:
library ieee;
use ieee.std_logic_1164.all;
library work;
use work.all;
entity spi_loopback is
Generic (
N : positive := 32; -- 32bit serial word length is default
CPOL : std_logic := '0'; -- SPI mode selection (mode 0 default)
CPHA : std_logic := '1'; -- CPOL = clock polarity, CPHA = clock phase.
PREFETCH : positive := 2; -- prefetch lookahead cycles
SPI_2X_CLK_DIV : positive := 5 -- for a 100MHz sclk_i, yields a 10MHz SCK
m_clk_i : IN std_logic;
m_rst_i : IN std_logic;
m_spi_ssel_o : OUT std_logic;
m_spi_sck_o : OUT std_logic;
m_spi_mosi_o : OUT std_logic;
m_spi_miso_i : IN std_logic;
m_di_req_o : OUT std_logic;
m_di_i : IN std_logic_vector(N-1 downto 0);
m_wren_i : IN std_logic;
m_do_valid_o : OUT std_logic;
m_do_o : OUT std_logic_vector(N-1 downto 0);
----- debug -----
m_do_transfer_o : OUT std_logic;
m_wren_o : OUT std_logic;
m_wren_ack_o : OUT std_logic;
m_rx_bit_reg_o : OUT std_logic;
m_state_dbg_o : OUT std_logic_vector(5 downto 0);
m_core_clk_o : OUT std_logic;
m_core_n_clk_o : OUT std_logic;
m_sh_reg_dbg_o : OUT std_logic_vector(N-1 downto 0);
s_clk_i : IN std_logic;
s_spi_ssel_i : IN std_logic;
s_spi_sck_i : IN std_logic;
s_spi_mosi_i : IN std_logic;
s_spi_miso_o : OUT std_logic;
s_di_req_o : OUT std_logic; -- preload lookahead data request line
s_di_i : IN std_logic_vector (N-1 downto 0) := (others => 'X'); -- parallel load data in (clocked in on rising edge of clk_i)
s_wren_i : IN std_logic := 'X'; -- user data write enable
s_do_valid_o : OUT std_logic; -- do_o data valid strobe, valid during one clk_i rising edge.
s_do_o : OUT std_logic_vector (N-1 downto 0); -- parallel output (clocked out on falling clk_i)
----- debug -----
s_do_transfer_o : OUT std_logic; -- debug: internal transfer driver
s_wren_o : OUT std_logic;
s_wren_ack_o : OUT std_logic;
s_rx_bit_reg_o : OUT std_logic;
s_state_dbg_o : OUT std_logic_vector (5 downto 0) -- debug: internal state register
-- s_sh_reg_dbg_o : OUT std_logic_vector (N-1 downto 0) -- debug: internal shift register
end spi_loopback;
architecture Structural of spi_loopback is
-- Component instantiation for the SPI master port
Inst_spi_master: entity work.spi_master(rtl)
port map(
sclk_i => m_clk_i, -- system clock is used for serial and parallel ports
pclk_i => m_clk_i,
rst_i => m_rst_i,
spi_ssel_o => m_spi_ssel_o,
spi_sck_o => m_spi_sck_o,
spi_mosi_o => m_spi_mosi_o,
spi_miso_i => m_spi_miso_i,
di_req_o => m_di_req_o,
di_i => m_di_i,
wren_i => m_wren_i,
do_valid_o => m_do_valid_o,
do_o => m_do_o,
----- debug -----
do_transfer_o => m_do_transfer_o,
wren_o => m_wren_o,
wren_ack_o => m_wren_ack_o,
rx_bit_reg_o => m_rx_bit_reg_o,
state_dbg_o => m_state_dbg_o,
core_clk_o => m_core_clk_o,
core_n_clk_o => m_core_n_clk_o,
sh_reg_dbg_o => m_sh_reg_dbg_o
-- Component instantiation for the SPI slave port
Inst_spi_slave: entity work.spi_slave(rtl)
generic map (N => N, CPOL => CPOL, CPHA => CPHA, PREFETCH => PREFETCH)
port map(
clk_i => s_clk_i,
spi_ssel_i => s_spi_ssel_i,
spi_sck_i => s_spi_sck_i,
spi_mosi_i => s_spi_mosi_i,
spi_miso_o => s_spi_miso_o,
di_req_o => s_di_req_o,
di_i => s_di_i,
wren_i => s_wren_i,
do_valid_o => s_do_valid_o,
do_o => s_do_o,
----- debug -----
do_transfer_o => s_do_transfer_o,
wren_o => s_wren_o,
wren_ack_o => s_wren_ack_o,
rx_bit_reg_o => s_rx_bit_reg_o,
state_dbg_o => s_state_dbg_o
-- sh_reg_dbg_o => s_sh_reg_dbg_o
end Structural;
0,0 → 1,458
-- Author: Jonny Doin,
-- Create Date: 15:36:20 05/15/2011
-- Module Name: SPI_SLAVE - RTL
-- Project Name: SPI INTERFACE
-- Target Devices: Spartan-6
-- Tool versions: ISE 13.1
-- Description:
-- This block is the SPI slave interface, implemented in one single entity.
-- All internal core operations are synchronous to the external SPI clock, and follows the general SPI de-facto standard.
-- The parallel read/write interface is synchronous to a supplied system master clock, 'clk_i'.
-- Synchronization for the parallel ports is provided by input data request and write enable lines, and output data valid line.
-- Fully pipelined cross-clock circuitry guarantees that no setup artifacts occur on the buffers that are accessed by the two
-- clock domains.
-- The block is very simple to use, and has parallel inputs and outputs that behave like a synchronous memory i/o.
-- It is parameterizable via generics for the data width ('N'), SPI mode (CPHA and CPOL), and lookahead prefetch
-- signaling ('PREFETCH').
-- The parallel interface has a input port 'di_i' and an output port 'do_o'.
-- Parallel load is controlled using 3 signals: 'di_i', 'di_req_o' and 'wren_i'.
-- When the core needs input data, a look ahead data request strobe , 'di_req_o' is pulsed 'PREFETCH' 'spi_sck_i'
-- cycles in advance to synchronize a user pipelined memory or fifo to present the next input data at 'di_i'
-- in time to have continuous clock at the spi bus, to allow back-to-back continuous load.
-- The data request strobe on 'di_req_o' is 2 'clk_i' clock cycles long.
-- The write to 'di_i' must occur at most one 'spi_sck_i' cycle before actual load to the core shift register, to avoid
-- race conditions at the register transfer.
-- The user circuit places data at the 'di_i' port and strobes the 'wren_i' line for one rising edge of 'clk_i'.
-- For a pipelined sync RAM, a PREFETCH of 3 cycles allows an address generator to present the new adress to the RAM in one
-- cycle, and the RAM to respond in one more cycle, in time for 'di_i' to be latched by the interface one clock before transfer.
-- If the user sequencer needs a different value for PREFETCH, the generic can be altered at instantiation time.
-- The 'wren_i' write enable strobe must be valid at least one setup time before the rising edge of the last clock cycle,
-- if continuous transmission is intended.
-- When the interface is idle ('spi_ssel_i' is HIGH), the top bit of the latched 'di_i' port is presented at port 'spi_miso_o'.
-- =================================
-- __ __ __ __ __ __ __
-- clk_i __/ \__/ \__/ \__/ \__/ \__/ \__/ \... -- parallel interface clock
-- ___________
-- di_req_o ________/ \_____________________... -- 'di_req_o' asserted on rising edge of 'clk_i'
-- ______________ ___________________________...
-- di_i __old_data____X______new_data_____________... -- user circuit loads data on 'di_i' at next 'clk_i' rising edge
-- ________
-- wren_i __________________________/ \______... -- 'wren_i' enables latch on rising edge of 'clk_i'
-- An internal buffer is used to copy the internal shift register data to drive the 'do_o' port. When a complete
-- word is received, the core shift register is transferred to the buffer, at the rising edge of the spi clock, 'spi_sck_i'.
-- The signal 'do_valid_o' is strobed 3 'clk_i' clocks after, to directly drive a synchronous memory or fifo write enable.
-- 'do_valid_o' is synchronous to the parallel interface clock, and changes only on rising edges of 'clk_i'.
-- When the interface is idle, data at the 'do_o' port holds the last word received.
-- ================================
-- ______ ______ ______ ______
-- clk_spi_i ___/ bit1 \______/ bitN \______/bitN-1\______/bitN-2\__... -- spi base clock
-- __ __ __ __ __ __ __ __ __
-- clk_i __/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \_... -- parallel interface clock
-- _________________ _____________________________________... -- 1) received data is transferred to 'do_buffer_reg'
-- do_o __old_data_______X__________new_data___________________... -- after last bit received, at next shift clock.
-- ____________
-- do_valid_o ________________________________/ \_________... -- 2) 'do_valid_o' strobed for 2 'clk_i' cycles
-- -- on the 3rd 'clk_i' rising edge.
-- This design was originally targeted to a Spartan-6 platform, synthesized with XST and normal constraints.
------------------------------ COPYRIGHT NOTICE -----------------------------------------------------------------------
-- This file is part of the SPI MASTER/SLAVE INTERFACE project,spi_master_slave
-- Author(s): Jonny Doin,,
-- Copyright (C) 2011 Jonny Doin
-- -----------------------------
-- This source file may be used and distributed without restriction provided that this copyright statement is not
-- removed from the file and that any derivative work contains the original copyright notice and the associated
-- disclaimer.
-- This source file is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser
-- General Public License as published by the Free Software Foundation; either version 2.1 of the License, or
-- (at your option) any later version.
-- This source is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
-- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
-- details.
-- You should have received a copy of the GNU Lesser General Public License along with this source; if not, download
-- it from
------------------------------ REVISION HISTORY -----------------------------------------------------------------------
-- 2011/05/15 v0.10.0050 [JD] created the slave logic, with 2 clock domains, from SPI_MASTER module.
-- 2011/05/15 v0.15.0055 [JD] fixed logic for starting state when CPHA='1'.
-- 2011/05/17 v0.80.0049 [JD] added explicit clock synchronization circuitry across clock boundaries.
-- 2011/05/18 v0.95.0050 [JD] clock generation circuitry, with generators for all-rising-edge clock core.
-- 2011/06/05 v0.96.0053 [JD] changed async clear to sync resets.
-- 2011/06/07 v0.97.0065 [JD] added cross-clock buffers, fixed fsm async glitches.
-- 2011/06/09 v0.97.0068 [JD] reduced control sets (resets, CE, presets) to the absolute minimum to operate, to reduce
-- synthesis LUT overhead in Spartan-6 architecture.
-- 2011/06/11 v0.97.0075 [JD] redesigned all parallel data interfacing ports, and implemented cross-clock strobe logic.
-- 2011/06/12 v0.97.0079 [JD] implemented wr_ack and di_req logic for state 0, and eliminated unnecessary registers reset.
-- 2011/06/17 v0.97.0079 [JD] implemented wr_ack and di_req logic for state 0, and eliminated unnecessary registers reset.
-- 2011/07/16 v1.11.0080 [JD] verified both spi_master and spi_slave in loopback at 50MHz SPI clock.
-- 2011/07/29 v2.00.0110 [JD] FIX: CPHA bugs:
-- - redesigned core clocking to address all CPOL and CPHA configurations.
-- - added CHANGE_EDGE to the FSM register transfer logic, to have MISO change at opposite
-- clock phases from SHIFT_EDGE.
-- Removed global signal setting at the FSM, implementing exhaustive explicit signal attributions
-- for each state, to avoid reported inference problems in some synthesis engines.
-- Streamlined port names and indentation blocks.
-- 2011/08/01 v2.01.0115 [JD] Adjusted 'do_valid_o' pulse width to be 2 'clk_i', as in the master core.
-- Simulated in iSim with the master core for continuous transmission mode.
-- 2011/08/02 v2.02.0120 [JD] Added mux for MISO at reset state, to output di(N-1) at start. This fixed a bug in first bit.
-- The master and slave cores were verified in FPGA with continuous transmission, for all SPI modes.
-- 2011/08/04 v2.02.0121 [JD] Changed minor comment bugs in the combinatorial fsm logic.
-- 2011/08/08 v2.02.0122 [JD] FIX: continuous transfer mode bug. When wren_i is not strobed prior to state 1 (last bit), the
-- sequencer goes to state 0, and then to state 'N' again. This produces a wrong bit-shift for received
-- data. The fix consists in engaging continuous transfer regardless of the user strobing write enable, and
-- sequencing from state 1 to N as long as the master clock is present. If the user does not write new
-- data, the last data word is repeated.
-- 2011/08/08 v2.02.0123 [JD] ISSUE: continuous transfer mode bug, for ignored 'di_req' cycles. Instead of repeating the last data word,
-- the slave will send (others => '0') instead.
-- 2011/08/28 v2.02.0126 [JD] ISSUE: the miso_o MUX that preloads tx_bit when slave is desselected will glitch for CPHA='1'.
-- FIX: added a registered drive for the MUX select that will transfer the tx_reg only after the first tx_reg update.
-- ====
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use ieee.std_logic_unsigned.all;
entity spi_slave is
Generic (
N : positive := 32; -- 32bit serial word length is default
CPOL : std_logic := '0'; -- SPI mode selection (mode 0 default)
CPHA : std_logic := '0'; -- CPOL = clock polarity, CPHA = clock phase.
PREFETCH : positive := 3); -- prefetch lookahead cycles
Port (
clk_i : in std_logic := 'X'; -- internal interface clock (clocks di/do registers)
spi_ssel_i : in std_logic := 'X'; -- spi bus slave select line
spi_sck_i : in std_logic := 'X'; -- spi bus sck clock (clocks the shift register core)
spi_mosi_i : in std_logic := 'X'; -- spi bus mosi input
spi_miso_o : out std_logic := 'X'; -- spi bus spi_miso_o output
di_req_o : out std_logic; -- preload lookahead data request line
di_i : in std_logic_vector (N-1 downto 0) := (others => 'X'); -- parallel load data in (clocked in on rising edge of clk_i)
wren_i : in std_logic := 'X'; -- user data write enable
wr_ack_o : out std_logic; -- write acknowledge
do_valid_o : out std_logic; -- do_o data valid strobe, valid during one clk_i rising edge.
do_o : out std_logic_vector (N-1 downto 0); -- parallel output (clocked out on falling clk_i)
--- debug ports: can be removed for the application circuit ---
do_transfer_o : out std_logic; -- debug: internal transfer driver
wren_o : out std_logic; -- debug: internal state of the wren_i pulse stretcher
rx_bit_next_o : out std_logic; -- debug: internal rx bit
state_dbg_o : out std_logic_vector (3 downto 0); -- debug: internal state register
sh_reg_dbg_o : out std_logic_vector (N-1 downto 0) -- debug: internal shift register
end spi_slave;
-- ========================
-- There are several output ports that are used to simulate and verify the core operation.
-- Do not map any signals to the unused ports, and the synthesis tool will remove the related interfacing
-- circuitry.
-- The same is valid for the transmit and receive ports. If the receive ports are not mapped, the
-- synthesis tool will remove the receive logic from the generated circuitry.
-- Alternatively, you can remove these ports and related circuitry once the core is verified and
-- integrated to your circuit.
architecture rtl of spi_slave is
-- constants to control FlipFlop synthesis
constant SHIFT_EDGE : std_logic := (CPOL xnor CPHA); -- MOSI data is captured and shifted at this SCK edge
constant CHANGE_EDGE : std_logic := (CPOL xor CPHA); -- MISO data is updated at this SCK edge
-- all signals are initialized to zero at GSR (global set/reset) by giving explicit
-- initialization values at declaration. This is needed for all Xilinx FPGAs, and
-- especially for the Spartan-6 and newer CLB architectures, where a local reset can
-- reduce the usability of the slice registers, due to the need to share the control
-- set (RESET/PRESET, CLOCK ENABLE and CLOCK) by all 8 registers in a slice.
-- By using GSR for the initialization, and reducing RESET local init to the really
-- essential, the model achieves better LUT/FF packing and CLB usability.
-- internal state signals for register and combinatorial stages
signal state_next : natural range N downto 0 := 0; -- state 0 is idle state
signal state_reg : natural range N downto 0 := 0; -- state 0 is idle state
-- shifter signals for register and combinatorial stages
signal sh_next : std_logic_vector (N-1 downto 0);
signal sh_reg : std_logic_vector (N-1 downto 0);
-- mosi and miso connections
signal rx_bit_next : std_logic; -- sample of MOSI input
signal tx_bit_next : std_logic;
signal tx_bit_reg : std_logic; -- drives MISO during sequential logic
signal preload_miso : std_logic; -- controls the MISO MUX
-- buffered di_i data signals for register and combinatorial stages
signal di_reg : std_logic_vector (N-1 downto 0);
-- internal wren_i stretcher for fsm combinatorial stage
signal wren : std_logic;
signal wr_ack_next : std_logic := '0';
signal wr_ack_reg : std_logic := '0';
-- buffered do_o data signals for register and combinatorial stages
signal do_buffer_next : std_logic_vector (N-1 downto 0);
signal do_buffer_reg : std_logic_vector (N-1 downto 0);
-- internal signal to flag transfer to do_buffer_reg
signal do_transfer_next : std_logic := '0';
signal do_transfer_reg : std_logic := '0';
-- internal input data request signal
signal di_req_next : std_logic := '0';
signal di_req_reg : std_logic := '0';
-- cross-clock do_valid_o logic
signal do_valid_next : std_logic := '0';
signal do_valid_A : std_logic := '0';
signal do_valid_B : std_logic := '0';
signal do_valid_C : std_logic := '0';
signal do_valid_D : std_logic := '0';
signal do_valid_o_reg : std_logic := '0';
-- cross-clock di_req_o logic
signal di_req_o_next : std_logic := '0';
signal di_req_o_A : std_logic := '0';
signal di_req_o_B : std_logic := '0';
signal di_req_o_C : std_logic := '0';
signal di_req_o_D : std_logic := '0';
signal di_req_o_reg : std_logic := '0';
-- minimum word width is 8 bits
assert N >= 8
report "Generic parameter 'N' error: SPI shift register size needs to be 8 bits minimum"
severity FAILURE;
-- maximum prefetch lookahead check
assert PREFETCH <= N-5
report "Generic parameter 'PREFETCH' error: lookahead count out of range, needs to be N-5 maximum"
severity FAILURE;
-- connect rx bit input
rx_bit_proc : rx_bit_next <= spi_mosi_i;
-- do_valid_o and di_req_o strobe output logic
-- this is a delayed pulse generator with a ripple-transfer FFD pipeline, that generates a
-- fixed-length delayed pulse for the output flags, at the parallel clock domain
out_transfer_proc : process ( clk_i, do_transfer_reg, di_req_reg,
do_valid_A, do_valid_B, do_valid_D,
di_req_o_A, di_req_o_B, di_req_o_D) is
if clk_i'event and clk_i = '1' then -- clock at parallel port clock
-- do_transfer_reg -> do_valid_o_reg
do_valid_A <= do_transfer_reg; -- the input signal must be at least 2 clocks long
do_valid_B <= do_valid_A; -- feed it to a ripple chain of FFDs
do_valid_C <= do_valid_B;
do_valid_D <= do_valid_C;
do_valid_o_reg <= do_valid_next; -- registered output pulse
-- di_req_reg -> di_req_o_reg
di_req_o_A <= di_req_reg; -- the input signal must be at least 2 clocks long
di_req_o_B <= di_req_o_A; -- feed it to a ripple chain of FFDs
di_req_o_C <= di_req_o_B;
di_req_o_D <= di_req_o_C;
di_req_o_reg <= di_req_o_next; -- registered output pulse
end if;
-- generate a 2-clocks pulse at the 3rd clock cycle
do_valid_next <= do_valid_A and do_valid_B and not do_valid_D;
di_req_o_next <= di_req_o_A and di_req_o_B and not di_req_o_D;
end process out_transfer_proc;
-- parallel load input registers: data register and write enable
in_transfer_proc: process (clk_i, wren_i, wr_ack_reg) is
-- registered data input, input register with clock enable
if clk_i'event and clk_i = '1' then
if wren_i = '1' then
di_reg <= di_i; -- parallel data input buffer register
end if;
end if;
-- stretch wren pulse to be detected by spi fsm (ffd with sync preset and sync reset)
if clk_i'event and clk_i = '1' then
if wren_i = '1' then -- wren_i is the sync preset for wren
wren <= '1';
elsif wr_ack_reg = '1' then -- wr_ack is the sync reset for wren
wren <= '0';
end if;
end if;
end process in_transfer_proc;
-- fsm state and data registers change on spi SHIFT_EDGE
core_reg_proc : process (spi_sck_i, spi_ssel_i) is
-- FFD registers clocked on SHIFT edge and cleared on idle (spi_ssel_i = 1)
-- state fsm register (fdr)
if spi_ssel_i = '1' then -- async clr
state_reg <= 0; -- state falls back to idle when slave not selected
elsif spi_sck_i'event and spi_sck_i = SHIFT_EDGE then -- on SHIFT edge, update state register
state_reg <= state_next; -- core fsm changes state with spi SHIFT clock
end if;
-- FFD registers clocked on SHIFT edge
-- rtl core registers (fd)
if spi_sck_i'event and spi_sck_i = SHIFT_EDGE then -- on fsm state change, update all core registers
sh_reg <= sh_next; -- core shift register
do_buffer_reg <= do_buffer_next; -- registered data output
do_transfer_reg <= do_transfer_next; -- cross-clock transfer flag
di_req_reg <= di_req_next; -- input data request
wr_ack_reg <= wr_ack_next; -- wren ack for data load synchronization
end if;
-- FFD registers clocked on CHANGE edge and cleared on idle (spi_ssel_i = 1)
-- miso MUX preload control register (fdp)
if spi_ssel_i = '1' then -- async preset
preload_miso <= '1'; -- miso MUX sees top bit of parallel input when slave not selected
elsif spi_sck_i'event and spi_sck_i = CHANGE_EDGE then -- on CHANGE edge, change to tx_reg output
preload_miso <= spi_ssel_i; -- miso MUX sees tx_bit_reg when it is driven by SCK
end if;
-- FFD registers clocked on CHANGE edge
-- tx_bit register (fd)
if spi_sck_i'event and spi_sck_i = CHANGE_EDGE then
tx_bit_reg <= tx_bit_next; -- update MISO driver from the MSb
end if;
end process core_reg_proc;
-- state and datapath combinatorial logic
core_combi_proc : process ( sh_reg, sh_next, state_reg, tx_bit_reg, rx_bit_next, do_buffer_reg,
do_transfer_reg, di_reg, di_req_reg, wren, wr_ack_reg) is
-- all output signals are assigned to (avoid latches)
sh_next <= sh_reg; -- shift register
tx_bit_next <= tx_bit_reg; -- MISO driver
do_buffer_next <= do_buffer_reg; -- output data buffer
do_transfer_next <= do_transfer_reg; -- output data flag
wr_ack_next <= wr_ack_reg; -- write enable acknowledge
di_req_next <= di_req_reg; -- data input request
state_next <= state_reg; -- fsm control state
case state_reg is
when (N) => -- deassert 'di_rdy' and stretch do_valid
wr_ack_next <= '0'; -- acknowledge data in transfer
di_req_next <= '0'; -- prefetch data request: deassert when shifting data
tx_bit_next <= sh_reg(N-1); -- output next MSbit
sh_next(N-1 downto 1) <= sh_reg(N-2 downto 0); -- shift inner bits
sh_next(0) <= rx_bit_next; -- shift in rx bit into LSb
state_next <= state_reg - 1; -- update next state at each sck pulse
when (N-1) downto (PREFETCH+3) => -- remove 'do_transfer' and shift bits
do_transfer_next <= '0'; -- reset 'do_valid' transfer signal
di_req_next <= '0'; -- prefetch data request: deassert when shifting data
wr_ack_next <= '0'; -- remove data load ack for all but the load stages
tx_bit_next <= sh_reg(N-1); -- output next MSbit
sh_next(N-1 downto 1) <= sh_reg(N-2 downto 0); -- shift inner bits
sh_next(0) <= rx_bit_next; -- shift in rx bit into LSb
state_next <= state_reg - 1; -- update next state at each sck pulse
when (PREFETCH+2) downto 3 => -- raise prefetch 'di_req_o' signal
di_req_next <= '1'; -- request data in advance to allow for pipeline delays
wr_ack_next <= '0'; -- remove data load ack for all but the load stages
tx_bit_next <= sh_reg(N-1); -- output next MSbit
sh_next(N-1 downto 1) <= sh_reg(N-2 downto 0); -- shift inner bits
sh_next(0) <= rx_bit_next; -- shift in rx bit into LSb
state_next <= state_reg - 1; -- update next state at each sck pulse
when 2 => -- transfer received data to do_buffer_reg on next cycle
di_req_next <= '1'; -- request data in advance to allow for pipeline delays
wr_ack_next <= '0'; -- remove data load ack for all but the load stages
tx_bit_next <= sh_reg(N-1); -- output next MSbit
sh_next(N-1 downto 1) <= sh_reg(N-2 downto 0); -- shift inner bits
sh_next(0) <= rx_bit_next; -- shift in rx bit into LSb
do_transfer_next <= '1'; -- signal transfer to do_buffer on next cycle
do_buffer_next <= sh_next; -- get next data directly into rx buffer
state_next <= state_reg - 1; -- update next state at each sck pulse
when 1 => -- transfer rx data to do_buffer and restart if new data is written
sh_next(0) <= rx_bit_next; -- shift in rx bit into LSb
di_req_next <= '0'; -- prefetch data request: deassert when shifting data
state_next <= N; -- next state is top bit of new data
if wren = '1' then -- load tx register if valid data present at di_reg
wr_ack_next <= '1'; -- acknowledge data in transfer
sh_next(N-1 downto 1) <= di_reg(N-2 downto 0); -- shift inner bits
tx_bit_next <= di_reg(N-1); -- first output bit comes from the MSb of parallel data
wr_ack_next <= '0'; -- no data reload for continuous transfer mode
sh_next(N-1 downto 1) <= (others => '0'); -- clear transmit shift register
tx_bit_next <= '0'; -- send ZERO
end if;
when 0 => -- idle state: start and end of transmission
sh_next(0) <= rx_bit_next; -- shift in rx bit into LSb
sh_next(N-1 downto 1) <= di_reg(N-2 downto 0); -- shift inner bits
tx_bit_next <= di_reg(N-1); -- first output bit comes from the MSb of parallel data
wr_ack_next <= '1'; -- acknowledge data in transfer
di_req_next <= '0'; -- prefetch data request: deassert when shifting data
do_transfer_next <= '0'; -- clear signal transfer to do_buffer
state_next <= N; -- next state is top bit of new data
when others =>
state_next <= 0; -- safe state
end case;
end process core_combi_proc;
-- data output processes
do_o_proc : do_o <= do_buffer_reg; -- do_o always available
do_valid_o_proc: do_valid_o <= do_valid_o_reg; -- copy registered do_valid_o to output
di_req_o_proc: di_req_o <= di_req_o_reg; -- copy registered di_req_o to output
wr_ack_o_proc: wr_ack_o <= wr_ack_reg; -- copy registered wr_ack_o to output
-- MISO driver process: preload top bit of parallel data to MOSI at reset
-- this is a MUX that selects the combinatorial next tx bit at reset, and the registered tx bit
-- at sequential operation. The mux gives us a preload of the first bit, simplifying the shifter logic.
spi_miso_o_proc: process (preload_miso, tx_bit_reg, di_reg) is
if preload_miso = '1' then
spi_miso_o <= di_reg(N-1); -- copy top bit of parallel data at reset
spi_miso_o <= tx_bit_reg; -- copy top bit of shifter at sequential operation
end if;
end process spi_miso_o_proc;
-- these signals are useful for verification, and can be deleted after debug.
do_transfer_proc: do_transfer_o <= do_transfer_reg;
state_debug_proc: state_dbg_o <= std_logic_vector(to_unsigned(state_reg, 4)); -- export internal state to debug
rx_bit_next_proc: rx_bit_next_o <= rx_bit_next;
wren_o_proc: wren_o <= wren;
sh_reg_debug_proc: sh_reg_dbg_o <= sh_reg; -- export sh_reg to debug
end architecture rtl;
0,0 → 1,620
-- Author: Jonny Doin,,
-- Create Date: 12:18:12 04/25/2011
-- Module Name: SPI_MASTER - RTL
-- Target Devices: Spartan-6
-- Tool versions: ISE 13.1
-- Description:
-- This block is the SPI master interface, implemented in one single entity.
-- All internal core operations are synchronous to the 'sclk_i', and a spi base clock is generated by dividing sclk_i downto
-- a frequency that is 2x the spi SCK line frequency. The divider value is passed as a generic parameter during instantiation.
-- All parallel i/o interface operations are synchronous to the 'pclk_i' high speed clock, that can be asynchronous to the serial
-- 'sclk_i' clock.
-- For optimized use of longlines, connect 'sclk_i' and 'pclk_i' to the same global clock line.
-- Fully pipelined cross-clock circuitry guarantees that no setup artifacts occur on the buffers that are accessed by the two
-- clock domains.
-- The block is very simple to use, and has parallel inputs and outputs that behave like a synchronous memory i/o.
-- It is parameterizable via generics for the data width ('N'), SPI mode (CPHA and CPOL), lookahead prefetch signaling
-- ('PREFETCH'), and spi base clock division from sclk_i ('SPI_2X_CLK_DIV').
-- ====================
-- The clock generation for the SPI SCK is derived from the high-speed 'sclk_i' clock. The core divides this reference
-- clock to form the SPI base clock, by the 'SPI_2X_CLK_DIV' generic parameter. The user must set the divider value for the
-- SPI_2X clock, which is 2x the desired SCK frequency.
-- All registers in the core are clocked by the high-speed clocks, and clock enables are used to run the FSM and other logic
-- at lower rates. This architecture preserves FPGA clock resources like global clock buffers, and avoids path delays caused
-- by combinatorial clock dividers outputs.
-- The core has async clock domain circuitry to handle asynchronous clocks for the SPI and parallel interfaces.
-- ========================
-- The parallel interface has an input port 'di_i' and an output port 'do_o'.
-- Parallel load is controlled using 3 signals: 'di_i', 'di_req_o' and 'wren_i'. 'di_req_o' is a look ahead data request line,
-- that is set 'PREFETCH' clock cycles in advance to synchronize a pipelined memory or fifo to present the
-- next input data at 'di_i' in time to have continuous clock at the spi bus, to allow back-to-back continuous load.
-- For a pipelined sync RAM, a PREFETCH of 2 cycles allows an address generator to present the new adress to the RAM in one
-- cycle, and the RAM to respond in one more cycle, in time for 'di_i' to be latched by the shifter.
-- If the user sequencer needs a different value for PREFETCH, the generic can be altered at instantiation time.
-- The 'wren_i' write enable strobe must be valid at least one setup time before the rising edge of the last SPI clock cycle,
-- if continuous transmission is intended. If 'wren_i' is not valid 2 SPI clock cycles after the last transmitted bit, the interface
-- enters idle state and deasserts SSEL.
-- When the interface is idle, 'wren_i' write strobe loads the data and starts transmission. 'di_req_o' will strobe when entering
-- idle state, if a previously loaded data has already been transferred.
-- =======================
-- __ __ __ __ __ __ __
-- pclk_i __/ \__/ \__/ \__/ \__/ \__/ \__/ \... -- parallel interface clock
-- ___________
-- di_req_o ________/ \_____________________... -- 'di_req_o' asserted on rising edge of 'pclk_i'
-- ______________ ___________________________...
-- di_i __old_data____X______new_data_____________... -- user circuit loads data on 'di_i' at next 'pclk_i' rising edge
-- _______
-- wren_i __________________________/ \_______... -- user strobes 'wren_i' for one cycle of 'pclk_i'
-- =======================
-- An internal buffer is used to copy the internal shift register data to drive the 'do_o' port. When a complete word is received,
-- the core shift register is transferred to the buffer, at the rising edge of the spi clock, 'spi_clk'.
-- The signal 'do_valid_o' is set one 'spi_clk' clock after, to directly drive a synchronous memory or fifo write enable.
-- 'do_valid_o' is synchronous to the parallel interface clock, and changes only on rising edges of 'pclk_i'.
-- When the interface is idle, data at the 'do_o' port holds the last word received.
-- ======================
-- ______ ______ ______ ______
-- spi_clk bit1 \______/ bitN \______/bitN-1\______/bitN-2\__... -- internal spi 2x base clock
-- _ __ __ __ __ __ __ __ __
-- pclk_i \__/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \_... -- parallel interface clock (may be async to sclk_i)
-- _____________ _____________________________________... -- 1) rx data is transferred to 'do_buffer_reg'
-- do_o ___old_data__X__________new_data___________________... -- after last rx bit, at rising 'spi_clk'.
-- ____________
-- do_valid_o ____________________________/ \_________... -- 2) 'do_valid_o' strobed for 2 'pclk_i' cycles
-- -- on the 3rd 'pclk_i' rising edge.
-- The propagation delay of spi_sck_o and spi_mosi_o, referred to the internal clock, is balanced by similar path delays,
-- but the sampling delay of spi_miso_i imposes a setup time referred to the sck signal that limits the high frequency
-- of the interface, for full duplex operation.
-- This design was originally targeted to a Spartan-6 platform, synthesized with XST and normal constraints.
-- The VHDL dialect used is VHDL'93, accepted largely by all synthesis tools.
------------------------------ COPYRIGHT NOTICE -----------------------------------------------------------------------
-- This file is part of the SPI MASTER/SLAVE INTERFACE project,spi_master_slave
-- Author(s): Jonny Doin,,
-- Copyright (C) 2011 Jonny Doin
-- -----------------------------
-- This source file may be used and distributed without restriction provided that this copyright statement is not
-- removed from the file and that any derivative work contains the original copyright notice and the associated
-- disclaimer.
-- This source file is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser
-- General Public License as published by the Free Software Foundation; either version 2.1 of the License, or
-- (at your option) any later version.
-- This source is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
-- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
-- details.
-- You should have received a copy of the GNU Lesser General Public License along with this source; if not, download
-- it from
------------------------------ REVISION HISTORY -----------------------------------------------------------------------
-- 2011/04/28 v0.01.0010 [JD] shifter implemented as a sequential process. timing problems and async issues in synthesis.
-- 2011/05/01 v0.01.0030 [JD] changed original shifter design to a fully pipelined RTL fsmd. solved all synthesis issues.
-- 2011/05/05 v0.01.0034 [JD] added an internal buffer register for rx_data, to allow greater liberty in data load/store.
-- 2011/05/08 v0.10.0038 [JD] increased one state to have SSEL start one cycle before SCK. Implemented full CPOL/CPHA
-- logic, based on generics, and do_valid_o signal.
-- 2011/05/13 v0.20.0045 [JD] streamlined signal names, added PREFETCH parameter, added assertions.
-- 2011/05/17 v0.80.0049 [JD] added explicit clock synchronization circuitry across clock boundaries.
-- 2011/05/18 v0.95.0050 [JD] clock generation circuitry, with generators for all-rising-edge clock core.
-- 2011/06/05 v0.96.0053 [JD] changed async clear to sync resets.
-- 2011/06/07 v0.97.0065 [JD] added cross-clock buffers, fixed fsm async glitches.
-- 2011/06/09 v0.97.0068 [JD] reduced control sets (resets, CE, presets) to the absolute minimum to operate, to reduce
-- synthesis LUT overhead in Spartan-6 architecture.
-- 2011/06/11 v0.97.0075 [JD] redesigned all parallel data interfacing ports, and implemented cross-clock strobe logic.
-- 2011/06/12 v0.97.0079 [JD] streamlined wr_ack for all cases and eliminated unnecessary register resets.
-- 2011/06/14 v0.97.0083 [JD] (bug CPHA effect) : redesigned SCK output circuit.
-- (minor bug) : removed fsm registers from (not rst_i) chip enable.
-- 2011/06/15 v0.97.0086 [JD] removed master MISO input register, to relax MISO data setup time (to get higher speed).
-- 2011/07/09 v1.00.0095 [JD] changed all clocking scheme to use a single high-speed clock with clock enables to control lower
-- frequency sequential circuits, to preserve clocking resources and avoid path delay glitches.
-- 2011/07/10 v1.00.0098 [JD] implemented SCK clock divider circuit to generate spi clock directly from system clock.
-- 2011/07/10 v1.10.0075 [JD] verified spi_master_slave in silicon at 50MHz, 25MHz, 16.666MHz, 12.5MHz, 10MHz, 8.333MHz,
-- 7.1428MHz, 6.25MHz, 1MHz and 500kHz. The core proved very robust at all tested frequencies.
-- 2011/07/16 v1.11.0080 [JD] verified both spi_master and spi_slave in loopback at 50MHz SPI clock.
-- 2011/07/17 v1.11.0080 [JD] BUG: CPOL='1', CPHA='1' @50MHz causes MOSI to be shifted one bit earlier.
-- BUG: CPOL='0', CPHA='1' causes SCK to have one extra pulse with one sclk_i width at the end.
-- 2011/07/18 v1.12.0105 [JD] CHG: spi sck output register changed to remove glitch at last clock when CPHA='1'.
-- for CPHA='1', max spi clock is 25MHz. for CPHA= '0', max spi clock is >50MHz.
-- 2011/07/24 v1.13.0125 [JD] FIX: 'sck_ena_ce' is on half-cycle advanced to 'fsm_ce', elliminating CPHA='1' glitches.
-- Core verified for all CPOL, CPHA at up to 50MHz, simulates to over 100MHz.
-- 2011/07/29 v1.14.0130 [JD] Removed global signal setting at the FSM, implementing exhaustive explicit signal attributions
-- for each state, to avoid reported inference problems in some synthesis engines.
-- Streamlined port names and indentation blocks.
-- 2011/08/01 v1.15.0135 [JD] Fixed latch inference for spi_mosi_o driver at the fsm.
-- The master and slave cores were verified in FPGA with continuous transmission, for all SPI modes.
-- 2011/08/04 v1.15.0136 [JD] Fixed assertions (PREFETCH >= 1) and minor comment bugs.
-- ====
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use ieee.std_logic_unsigned.all;
-- ========================
-- There are several output ports that are used to simulate and verify the core operation.
-- Do not map any signals to the unused ports, and the synthesis tool will remove the related interfacing
-- circuitry.
-- The same is valid for the transmit and receive ports. If the receive ports are not mapped, the
-- synthesis tool will remove the receive logic from the generated circuitry.
-- Alternatively, you can remove these ports and related circuitry once the core is verified and
-- integrated to your circuit.
entity spi_master is
Generic (
N : positive := 32; -- 32bit serial word length is default
CPOL : std_logic := '0'; -- SPI mode selection (mode 0 default)
CPHA : std_logic := '0'; -- CPOL = clock polarity, CPHA = clock phase.
PREFETCH : positive := 2; -- prefetch lookahead cycles
SPI_2X_CLK_DIV : positive := 5); -- for a 100MHz sclk_i, yields a 10MHz SCK
Port (
sclk_i : in std_logic := 'X'; -- high-speed serial interface system clock
pclk_i : in std_logic := 'X'; -- high-speed parallel interface system clock
rst_i : in std_logic := 'X'; -- reset core
---- serial interface ----
spi_ssel_o : out std_logic; -- spi bus slave select line
spi_sck_o : out std_logic; -- spi bus sck
spi_mosi_o : out std_logic; -- spi bus mosi output
spi_miso_i : in std_logic := 'X'; -- spi bus spi_miso_i input
---- parallel interface ----
di_req_o : out std_logic; -- preload lookahead data request line
di_i : in std_logic_vector (N-1 downto 0) := (others => 'X'); -- parallel data in (clocked on rising spi_clk after last bit)
wren_i : in std_logic := 'X'; -- user data write enable, starts transmission when interface is idle
wr_ack_o : out std_logic; -- write acknowledge
do_valid_o : out std_logic; -- do_o data valid signal, valid during one spi_clk rising edge.
do_o : out std_logic_vector (N-1 downto 0); -- parallel output (clocked on rising spi_clk after last bit)
--- debug ports: can be removed or left unconnected for the application circuit ---
sck_ena_o : out std_logic; -- debug: internal sck enable signal
sck_ena_ce_o : out std_logic; -- debug: internal sck clock enable signal
do_transfer_o : out std_logic; -- debug: internal transfer driver
wren_o : out std_logic; -- debug: internal state of the wren_i pulse stretcher
rx_bit_reg_o : out std_logic; -- debug: internal rx bit
state_dbg_o : out std_logic_vector (3 downto 0); -- debug: internal state register
core_clk_o : out std_logic;
core_n_clk_o : out std_logic;
core_ce_o : out std_logic;
core_n_ce_o : out std_logic;
sh_reg_dbg_o : out std_logic_vector (N-1 downto 0) -- debug: internal shift register
end spi_master;
-- this architecture is a pipelined register-transfer description.
-- all signals are clocked at the rising edge of the system clock 'sclk_i'.
architecture rtl of spi_master is
-- core clocks, generated from 'sclk_i': initialized at GSR to differential values
signal core_clk : std_logic := '0'; -- continuous core clock, positive logic
signal core_n_clk : std_logic := '1'; -- continuous core clock, negative logic
signal core_ce : std_logic := '0'; -- core clock enable, positive logic
signal core_n_ce : std_logic := '1'; -- core clock enable, negative logic
-- spi bus clock, generated from the CPOL selected core clock polarity
signal spi_2x_ce : std_logic := '1'; -- spi_2x clock enable
signal spi_clk : std_logic := '0'; -- spi bus output clock
signal spi_clk_reg : std_logic; -- output pipeline delay for spi sck (do NOT global initialize)
-- core fsm clock enables
signal fsm_ce : std_logic := '1'; -- fsm clock enable
signal sck_ena_ce : std_logic := '1'; -- SCK clock enable
signal samp_ce : std_logic := '1'; -- data sampling clock enable
-- all signals are initialized to zero at GSR (global set/reset) by giving explicit
-- initialization values at declaration. This is needed for all Xilinx FPGAs, and
-- especially for the Spartan-6 and newer CLB architectures, where a async reset can
-- reduce the usability of the slice registers, due to the need to share the control
-- set (RESET/PRESET, CLOCK ENABLE and CLOCK) by all 8 registers in a slice.
-- By using GSR for the initialization, and reducing async RESET local init to the bare
-- essential, the model achieves better LUT/FF packing and CLB usability.
-- internal state signals for register and combinatorial stages
signal state_next : natural range N+1 downto 0 := 0;
signal state_reg : natural range N+1 downto 0 := 0;
-- shifter signals for register and combinatorial stages
signal sh_next : std_logic_vector (N-1 downto 0);
signal sh_reg : std_logic_vector (N-1 downto 0);
-- input bit sampled buffer
signal rx_bit_reg : std_logic := '0';
-- buffered di_i data signals for register and combinatorial stages
signal di_reg : std_logic_vector (N-1 downto 0);
-- internal wren_i stretcher for fsm combinatorial stage
signal wren : std_logic;
signal wr_ack_next : std_logic := '0';
signal wr_ack_reg : std_logic := '0';
-- internal SSEL enable control signals
signal ssel_ena_next : std_logic := '0';
signal ssel_ena_reg : std_logic := '0';
-- internal SCK enable control signals
signal sck_ena_next : std_logic;
signal sck_ena_reg : std_logic;
-- buffered do_o data signals for register and combinatorial stages
signal do_buffer_next : std_logic_vector (N-1 downto 0);
signal do_buffer_reg : std_logic_vector (N-1 downto 0);
-- internal signal to flag transfer to do_buffer_reg
signal do_transfer_next : std_logic := '0';
signal do_transfer_reg : std_logic := '0';
-- internal input data request signal
signal di_req_next : std_logic := '0';
signal di_req_reg : std_logic := '0';
-- cross-clock do_transfer_reg -> do_valid_o_reg pipeline
signal do_valid_A : std_logic := '0';
signal do_valid_B : std_logic := '0';
signal do_valid_C : std_logic := '0';
signal do_valid_D : std_logic := '0';
signal do_valid_next : std_logic := '0';
signal do_valid_o_reg : std_logic := '0';
-- cross-clock di_req_reg -> di_req_o_reg pipeline
signal di_req_o_A : std_logic := '0';
signal di_req_o_B : std_logic := '0';
signal di_req_o_C : std_logic := '0';
signal di_req_o_D : std_logic := '0';
signal di_req_o_next : std_logic := '1';
signal di_req_o_reg : std_logic := '1';
-- minimum word width is 8 bits
assert N >= 8
report "Generic parameter 'N' (shift register size) needs to be 8 bits minimum"
severity FAILURE;
-- minimum prefetch lookahead check
assert PREFETCH >= 1
report "Generic parameter 'PREFETCH' (lookahead count) needs to be 1 minimum"
severity FAILURE;
-- maximum prefetch lookahead check
assert PREFETCH <= N-5
report "Generic parameter 'PREFETCH' (lookahead count) out of range, needs to be N-5 maximum"
severity FAILURE;
-- SPI_2X_CLK_DIV clock divider value must not be zero
assert SPI_2X_CLK_DIV > 0
report "Generic parameter 'SPI_2X_CLK_DIV' must not be zero"
severity FAILURE;
-- In order to preserve global clocking resources, the core clocking scheme is completely based
-- on using clock enables to process the serial high-speed clock at lower rates for the core fsm,
-- the spi clock generator and the input sampling clock.
-- The clock generation block derives 2 continuous antiphase signals from the 2x spi base clock
-- for the core clocking.
-- The 2 clock phases are generated by separate and synchronous FFs, and should have only
-- differential interconnect delay skew.
-- Clock enable signals are generated with the same phase as the 2 core clocks, and these clock
-- enables are used to control clocking of all internal synchronous circuitry.
-- The clock enable phase is selected for serial input sampling, fsm clocking, and spi SCK output,
-- based on the configuration of CPOL and CPHA.
-- Each phase is selected so that all the registers can be clocked with a rising edge on all SPI
-- modes, by a single high-speed global clock, preserving clock resources and clock to data skew.
-- generate the 2x spi base clock enable from the serial high-speed input clock
spi_2x_ce_gen_proc: process (sclk_i) is
variable clk_cnt : integer range SPI_2X_CLK_DIV-1 downto 0 := 0;
if sclk_i'event and sclk_i = '1' then
if clk_cnt = SPI_2X_CLK_DIV-1 then
spi_2x_ce <= '1';
clk_cnt := 0;
spi_2x_ce <= '0';
clk_cnt := clk_cnt + 1;
end if;
end if;
end process spi_2x_ce_gen_proc;
-- generate the core antiphase clocks and clock enables from the 2x base CE.
core_clock_gen_proc : process (sclk_i) is
if sclk_i'event and sclk_i = '1' then
if spi_2x_ce = '1' then
-- generate the 2 antiphase core clocks
core_clk <= core_n_clk;
core_n_clk <= not core_n_clk;
-- generate the 2 phase core clock enables
core_ce <= core_n_clk;
core_n_ce <= not core_n_clk;
core_ce <= '0';
core_n_ce <= '0';
end if;
end if;
end process core_clock_gen_proc;
-- spi clk generator: generate spi_clk from core_clk depending on CPOL
spi_sck_cpol_0_proc: if CPOL = '0' generate
spi_clk <= core_clk; -- for CPOL=0, spi clk has idle LOW
end generate;
spi_sck_cpol_1_proc: if CPOL = '1' generate
spi_clk <= core_n_clk; -- for CPOL=1, spi clk has idle HIGH
end generate;
-- Sampling clock enable generation: generate 'samp_ce' from 'core_ce' or 'core_n_ce' depending on CPHA
-- always sample data at the half-cycle of the fsm update cell
samp_ce_cpha_0_proc: if CPHA = '0' generate
samp_ce <= core_ce;
end generate;
samp_ce_cpha_1_proc: if CPHA = '1' generate
samp_ce <= core_n_ce;
end generate;
-- FSM clock enable generation: generate 'fsm_ce' from core_ce or core_n_ce depending on CPHA
fsm_ce_cpha_0_proc: if CPHA = '0' generate
fsm_ce <= core_n_ce; -- for CPHA=0, latch registers at rising edge of negative core clock enable
end generate;
fsm_ce_cpha_1_proc: if CPHA = '1' generate
fsm_ce <= core_ce; -- for CPHA=1, latch registers at rising edge of positive core clock enable
end generate;
-- sck enable control: control sck advance phase for CPHA='1' relative to fsm clock
sck_ena_ce <= core_n_ce; -- for CPHA=1, SCK is advanced one-half cycle
-- rx bit flop: capture rx bit after SAMPLE edge of sck
rx_bit_proc : process (sclk_i, spi_miso_i) is
if sclk_i'event and sclk_i = '1' then
if samp_ce = '1' then
rx_bit_reg <= spi_miso_i;
end if;
end if;
end process rx_bit_proc;
-- do_valid_o and di_req_o strobe output logic
-- this is a delayed pulse generator with a ripple-transfer FFD pipeline, that generates a
-- fixed-length delayed pulse for the output flags, at the parallel clock domain
out_transfer_proc : process ( pclk_i, do_transfer_reg, di_req_reg,
do_valid_A, do_valid_B, do_valid_D,
di_req_o_A, di_req_o_B, di_req_o_D ) is
if pclk_i'event and pclk_i = '1' then -- clock at parallel port clock
-- do_transfer_reg -> do_valid_o_reg
do_valid_A <= do_transfer_reg; -- the input signal must be at least 2 clocks long
do_valid_B <= do_valid_A; -- feed it to a ripple chain of FFDs
do_valid_C <= do_valid_B;
do_valid_D <= do_valid_C;
do_valid_o_reg <= do_valid_next; -- registered output pulse
-- di_req_reg -> di_req_o_reg
di_req_o_A <= di_req_reg; -- the input signal must be at least 2 clocks long
di_req_o_B <= di_req_o_A; -- feed it to a ripple chain of FFDs
di_req_o_C <= di_req_o_B;
di_req_o_D <= di_req_o_C;
di_req_o_reg <= di_req_o_next; -- registered output pulse
end if;
-- generate a 2-clocks pulse at the 3rd clock cycle
do_valid_next <= do_valid_A and do_valid_B and not do_valid_D;
di_req_o_next <= di_req_o_A and di_req_o_B and not di_req_o_D;
end process out_transfer_proc;
-- parallel load input registers: data register and write enable
in_transfer_proc: process ( pclk_i, wren_i, wr_ack_reg ) is
-- registered data input, input register with clock enable
if pclk_i'event and pclk_i = '1' then
if wren_i = '1' then
di_reg <= di_i; -- parallel data input buffer register
end if;
end if;
-- stretch wren pulse to be detected by spi fsm (ffd with sync preset and sync reset)
if pclk_i'event and pclk_i = '1' then
if wren_i = '1' then -- wren_i is the sync preset for wren
wren <= '1';
elsif wr_ack_reg = '1' then -- wr_ack is the sync reset for wren
wren <= '0';
end if;
end if;
end process in_transfer_proc;
-- fsm state and data registers: synchronous to the spi base reference clock
core_reg_proc : process (sclk_i) is
-- FF registers clocked on rising edge and cleared on sync rst_i
if sclk_i'event and sclk_i = '1' then
if rst_i = '1' then -- sync reset
state_reg <= 0; -- only provide local reset for the state machine
elsif fsm_ce = '1' then -- fsm_ce is clock enable for the fsm
state_reg <= state_next; -- state register
end if;
end if;
-- FF registers clocked synchronous to the fsm state
if sclk_i'event and sclk_i = '1' then
if fsm_ce = '1' then
sh_reg <= sh_next; -- shift register
ssel_ena_reg <= ssel_ena_next; -- spi select enable
do_buffer_reg <= do_buffer_next; -- registered output data buffer
do_transfer_reg <= do_transfer_next; -- output data transferred to buffer
di_req_reg <= di_req_next; -- input data request
wr_ack_reg <= wr_ack_next; -- write acknowledge for data load synchronization
end if;
end if;
-- FF registers clocked one-half cycle earlier than the fsm state
if sclk_i'event and sclk_i = '1' then
if sck_ena_ce = '1' then
sck_ena_reg <= sck_ena_next; -- spi clock enable: look ahead logic
end if;
end if;
end process core_reg_proc;
-- state and datapath combinatorial logic
core_combi_proc : process ( sh_reg, state_reg, rx_bit_reg, ssel_ena_reg, sck_ena_reg, do_buffer_reg,
do_transfer_reg, wr_ack_reg, di_req_reg, di_reg, wren ) is
sh_next <= sh_reg; -- all output signals are assigned to (avoid latches)
ssel_ena_next <= ssel_ena_reg; -- controls the slave select line
sck_ena_next <= sck_ena_reg; -- controls the clock enable of spi sck line
do_buffer_next <= do_buffer_reg; -- output data buffer
do_transfer_next <= do_transfer_reg; -- output data flag
wr_ack_next <= wr_ack_reg; -- write acknowledge
di_req_next <= di_req_reg; -- prefetch data request
spi_mosi_o <= sh_reg(N-1); -- default to avoid latch inference
state_next <= state_reg; -- next state
case state_reg is
when (N+1) => -- this state is to enable SSEL before SCK
spi_mosi_o <= sh_reg(N-1); -- shift out tx bit from the MSb
ssel_ena_next <= '1'; -- tx in progress: will assert SSEL
sck_ena_next <= '1'; -- enable SCK on next cycle (stays off on first SSEL clock cycle)
di_req_next <= '0'; -- prefetch data request: deassert when shifting data
wr_ack_next <= '0'; -- remove write acknowledge for all but the load stages
state_next <= state_reg - 1; -- update next state at each sck pulse
when (N) => -- deassert 'di_rdy' and stretch do_valid
spi_mosi_o <= sh_reg(N-1); -- shift out tx bit from the MSb
di_req_next <= '0'; -- prefetch data request: deassert when shifting data
sh_next(N-1 downto 1) <= sh_reg(N-2 downto 0); -- shift inner bits
sh_next(0) <= rx_bit_reg; -- shift in rx bit into LSb
wr_ack_next <= '0'; -- remove write acknowledge for all but the load stages
state_next <= state_reg - 1; -- update next state at each sck pulse
when (N-1) downto (PREFETCH+3) => -- remove 'do_transfer' and shift bits
spi_mosi_o <= sh_reg(N-1); -- shift out tx bit from the MSb
di_req_next <= '0'; -- prefetch data request: deassert when shifting data
do_transfer_next <= '0'; -- reset 'do_valid' transfer signal
sh_next(N-1 downto 1) <= sh_reg(N-2 downto 0); -- shift inner bits
sh_next(0) <= rx_bit_reg; -- shift in rx bit into LSb
wr_ack_next <= '0'; -- remove write acknowledge for all but the load stages
state_next <= state_reg - 1; -- update next state at each sck pulse
when (PREFETCH+2) downto 2 => -- raise prefetch 'di_req_o' signal
spi_mosi_o <= sh_reg(N-1); -- shift out tx bit from the MSb
di_req_next <= '1'; -- request data in advance to allow for pipeline delays
sh_next(N-1 downto 1) <= sh_reg(N-2 downto 0); -- shift inner bits
sh_next(0) <= rx_bit_reg; -- shift in rx bit into LSb
wr_ack_next <= '0'; -- remove write acknowledge for all but the load stages
state_next <= state_reg - 1; -- update next state at each sck pulse
when 1 => -- transfer rx data to do_buffer and restart if new data is written
spi_mosi_o <= sh_reg(N-1); -- shift out tx bit from the MSb
di_req_next <= '1'; -- request data in advance to allow for pipeline delays
do_buffer_next(N-1 downto 1) <= sh_reg(N-2 downto 0); -- shift rx data directly into rx buffer
do_buffer_next(0) <= rx_bit_reg; -- shift last rx bit into rx buffer
do_transfer_next <= '1'; -- signal transfer to do_buffer
if wren = '1' then -- load tx register if valid data present at di_i
state_next <= N; -- next state is top bit of new data
sh_next <= di_reg; -- load parallel data from di_reg into shifter
sck_ena_next <= '1'; -- SCK enabled
wr_ack_next <= '1'; -- acknowledge data in transfer
sck_ena_next <= '0'; -- SCK disabled: tx empty, no data to send
wr_ack_next <= '0'; -- remove write acknowledge for all but the load stages
state_next <= state_reg - 1; -- update next state at each sck pulse
end if;
when 0 => -- idle state: start and end of transmission
di_req_next <= '1'; -- will request data if shifter empty
sck_ena_next <= '0'; -- SCK disabled: tx empty, no data to send
if wren = '1' then -- load tx register if valid data present at di_i
spi_mosi_o <= di_reg(N-1); -- special case: shift out first tx bit from the MSb (look ahead)
ssel_ena_next <= '1'; -- enable interface SSEL
state_next <= N+1; -- start from idle: let one cycle for SSEL settling
sh_next <= di_reg; -- load bits from di_reg into shifter
wr_ack_next <= '1'; -- acknowledge data in transfer
spi_mosi_o <= sh_reg(N-1); -- shift out tx bit from the MSb
ssel_ena_next <= '0'; -- deassert SSEL: interface is idle
wr_ack_next <= '0'; -- remove write acknowledge for all but the load stages
state_next <= 0; -- when idle, keep this state
end if;
when others =>
state_next <= 0; -- state 0 is safe state
end case;
end process core_combi_proc;
-- data output processes
spi_ssel_o_proc: spi_ssel_o <= not ssel_ena_reg; -- active-low slave select line
do_o_proc: do_o <= do_buffer_reg; -- parallel data out
do_valid_o_proc: do_valid_o <= do_valid_o_reg; -- data out valid
di_req_o_proc: di_req_o <= di_req_o_reg; -- input data request for next cycle
wr_ack_o_proc: wr_ack_o <= wr_ack_reg; -- write acknowledge
-- SCK out logic: pipeline phase compensation for the SCK line
-- This is a MUX with an output register.
-- The register gives us a pipeline delay for the SCK line, pairing with the state machine moore
-- output pipeline delay for the MOSI line, and thus enabling higher SCK frequency.
spi_sck_o_gen_proc : process (sclk_i, sck_ena_reg, spi_clk, spi_clk_reg) is
if sclk_i'event and sclk_i = '1' then
if sck_ena_reg = '1' then
spi_clk_reg <= spi_clk; -- copy the selected clock polarity
spi_clk_reg <= CPOL; -- when clock disabled, set to idle polarity
end if;
end if;
spi_sck_o <= spi_clk_reg; -- connect register to output
end process spi_sck_o_gen_proc;
-- these signals are useful for verification, and can be deleted after debug.
do_transfer_proc: do_transfer_o <= do_transfer_reg;
state_dbg_proc: state_dbg_o <= std_logic_vector(to_unsigned(state_reg, 4));
rx_bit_reg_proc: rx_bit_reg_o <= rx_bit_reg;
wren_o_proc: wren_o <= wren;
sh_reg_dbg_proc: sh_reg_dbg_o <= sh_reg;
core_clk_o_proc: core_clk_o <= core_clk;
core_n_clk_o_proc: core_n_clk_o <= core_n_clk;
core_ce_o_proc: core_ce_o <= core_ce;
core_n_ce_o_proc: core_n_ce_o <= core_n_ce;
sck_ena_o_proc: sck_ena_o <= sck_ena_reg;
sck_ena_ce_o_proc: sck_ena_ce_o <= sck_ena_ce;
end architecture rtl;
0,0 → 1,52
This project was started from the need to have a robust yet simple SPI interface core
written in VHDL to use in generic FPGA-to-device interfacing.
The resulting cores generate very small and efficient circuits, that operate from very
slow SPI clocks up to over 50MHz SPI clocks.
VHDL files for spi master/slave project:
spi_master.vhd spi master module, can be used independently
spi_slave.vhd spi slave module, can be used independently
spi_loopback.vhd wrapper module for simulating the master and slave modules
spi_loopback_test.vhd testbench for simulating the loopback module, test master against slave
spi_loopback.ucf constraints for simulation: Spartan-6, area, LUT compression.
The original development is done in Xilinx ISE 13.1, targeted to a Spartan-6 device.
VHDL simulation was done in ISIM, after Place & Route, with default constraints, for the slowest
Spartan-6 device, synthesis generated 41 slices, and the design was simulated at 25MHz spi SCK, and 100MHz for the parallel interfaces clocks.
Design verification in silicon was done in a Digilent Atlys board, and the verification project can be found at the \trunk\syn directory, with all the required files to replicate the verification tests, including pinlock constraints for the Atlys board.
This work is licensed as a LGPL work. If you find this licensing too restrictive for hardware, or it is not adequate for you, please get in touch with me and we can arrange a more suitable open source hardware licensing.
If you have any questions or usage issues with this core, please open a thread in OpenCores forum, and I will be pleased to answer.
If you find a bug or a design fault in the models, or if you have an issue that you like to be addressed, please open a bug/issue in the OpenCores bugtracker for this project, at,spi_master_slave,bugtracker.
In any case, thank you for testing and using this core.
Jonny Doin
0,0 → 1,204
-- Author: Jonny Doin,,
-- Create Date: 09:56:30 07/06/2011
-- Module Name: grp_debouncer - RTL
-- Project Name: basic functions
-- Target Devices: Spartan-6
-- Tool versions: ISE 13.1
-- Description:
-- This block is a generic multiple input debouncing circuit.
-- It handles multiple inputs, like mechanical switch inputs, and outputs a debounced, stable registered version of the inputs.
-- A 'new_data' one-cycle strobe is also available, to sync downstream logic.
-- ==================
-- W
-- /----------------/----------------\
-- | |
-- | |
-- | ______ ______ | _____
-- | W | | W |fdr | W | W |cmp \
-- \----/---| +1 |---/----| |--/--+----/----| \
-- | | | | | \
-- ------ | | \ |
-- | | | = |-----\
-- |> R | / | |
-- ---+-- | / |
-- | CNT_VAL---| / |
-- | |____/ |
-- | |
-- \------------\ |
-- | |
-- N ____ | |
-- /-------/---)) \ ____ | |
-- | ))XOR |-----) \ | |
-- | /------))___/ )OR |-----/ |
-- | | /---)___/ |
-- | | | |
-- | | \----------\ |
-- | | N | |
-- | \--------/-----------\ +----------------------+---------\
-- | | | |
-- \---\ | | |
-- ______ | ______ | | ______ |
-- | fd | | | fd | | | |fde | |
-- [data_i]----/-----| |---/---+---/----| |---/---+----)---| |---/---+---/-----------)------------------------[data_o]
-- N | | N N | | N | | | | N | N |
-- | | | | | \---|CE | | |
-- | | | | | | | | |
-- [clk_i]----> |> | |> | | |> | | | ____ ______
-- ------ ------ | ------ | N ____ \---| \ | fd |
-- | \---/---)) \ |AND |-----| |----[strb_o]
-- | ))XOR |-----|___/ | |
-- \-------------------------/---))___/ | |
-- N | |
-- |> |
-- ------
-- ==============
-- This debouncer circuit detects edges in an input signal, and waits the signal to stabilize for the designated time
-- before transferring the stable signal to the registered output.
-- A one-clock-cyle strobe is pulsed at the output to signalize a new data available.
-- The core clock should be the system clock, to optimize use of global clock resources.
-- ================
-- A change in state in any bit in the input word causes reload of the delay counter, and the output word is updated only
-- when all bits are stable for the specified period. Therefore, the grouping of signals and delay selection should match
-- behaviour of the selected signals.
-- ==============
-- The number of registers inferred is: 3*N + (LOG(CNT_VAL)/LOG(2)) + 1 registers.
-- The number of LUTs inferred is roughly: ((4*N+2)/6)+2.
-- The slice distribution will vary, and depends on the control set restrictions and LUT-FF pairs resulting from map+p&r.
-- This design was originally targeted to a Spartan-6 platform, synthesized with XST and normal constraints.
-- Verification in silicon was done on a Digilent Atlys board with a Spartan-6 FPGA @100MHz clock.
-- The VHDL dialect used is VHDL'93, accepted largely by all synthesis tools.
------------------------------ COPYRIGHT NOTICE -----------------------------------------------------------------------
-- Author(s): Jonny Doin,,
-- Copyright (C) 2011 Jonny Doin
-- -----------------------------
-- This source file may be used and distributed without restriction provided that this copyright statement is not
-- removed from the file and that any derivative work contains the original copyright notice and the associated
-- disclaimer.
-- This source file is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser
-- General Public License as published by the Free Software Foundation; either version 2.1 of the License, or
-- (at your option) any later version.
-- This source is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
-- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
-- details.
-- You should have received a copy of the GNU Lesser General Public License along with this source; if not, download
-- it from
------------------------------ REVISION HISTORY -----------------------------------------------------------------------
-- 2011/07/06 v0.01.0010 [JD] started development. verification of synthesis circuit inference.
-- 2011/07/07 v1.00.0020 [JD] verification in silicon. operation at 100MHz, tested on the Atlys board (Spartan-6 LX45).
-- 2011/08/10 v1.01.0025 [JD] added one pipeline delay to new data strobe output.
-- 2011/09/19 v1.01.0030 [JD] changed range for internal counter (cnt_reg, cnt_next) to avoid adder flipover (Altera/ModelSim).
-- ====
-- The circuit can easily be extended to have a signature of which inputs changed at the data out port.
library ieee;
use ieee.std_logic_1164.all;
entity grp_debouncer is
Generic (
N : positive := 8; -- input bus width
CNT_VAL : positive := 10000); -- clock counts for debounce period
Port (
clk_i : in std_logic := 'X'; -- system clock
data_i : in std_logic_vector (N-1 downto 0) := (others => 'X'); -- noisy input data
data_o : out std_logic_vector (N-1 downto 0); -- registered stable output data
strb_o : out std_logic -- strobe for new data available
end grp_debouncer;
architecture rtl of grp_debouncer is
-- datapath pipeline
signal reg_A, reg_B : std_logic_vector (N-1 downto 0) := (others => '0'); -- debounce edge detectors
signal reg_out : std_logic_vector (N-1 downto 0) := (others => '0'); -- registered output
signal dat_strb : std_logic := '0'; -- data transfer strobe
signal strb_reg : std_logic := '0'; -- registered strobe
signal strb_next : std_logic := '0'; -- lookahead strobe
signal dat_diff : std_logic := '0'; -- edge detector
-- debounce counter
signal cnt_reg : integer range CNT_VAL + 1 downto 0 := 0; -- debounce period counter
signal cnt_next : integer range CNT_VAL + 1 downto 0 := 0; -- combinatorial signal
-- This counter is implemented as a up-counter with reset and final count detection via compare,
-- instead of a down-counter with preset and final count detection via nonzero detection.
-- This is better for Spartan-6 and Virtex-6 CLB architecture, because it uses less control sets.
-- cnt_reg register transfer logic
cnt_reg_proc: process (clk_i) is
if clk_i'event and clk_i = '1' then
cnt_reg <= cnt_next;
end if;
end process cnt_reg_proc;
-- cnt_next combinatorial logic
cnt_next_proc: cnt_next <= 0 when dat_diff = '1' or dat_strb = '1' else cnt_reg + 1;
-- final count combinatorial logic
final_cnt_proc: dat_strb <= '1' when cnt_reg = CNT_VAL else '0';
-- input pipeline logic
pipeline_proc: process (clk_i) is
if clk_i'event and clk_i = '1' then
-- edge detection pipeline
reg_A <= data_i;
reg_B <= reg_A;
-- new data strobe pipeline delay
strb_reg <= strb_next;
end if;
-- output data pipeline
if clk_i'event and clk_i = '1' then
if dat_strb = '1' then
reg_out <= reg_B;
end if;
end if;
end process pipeline_proc;
-- edge detector
edge_detector_proc: dat_diff <= '1' when reg_A /= reg_B else '0';
-- lookahead new data strobe
next_strobe_proc: strb_next <= '1' when ((reg_out /= reg_B) and dat_strb = '1') else '0';
-- connect output ports
data_o_proc: data_o <= reg_out;
strb_o_proc: strb_o <= strb_reg;
end rtl;

powered by: WebSVN 2.1.0

© copyright 1999-2024, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.