OpenCores
URL https://opencores.org/ocsvn/spi_master_slave/spi_master_slave/trunk

Subversion Repositories spi_master_slave

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /spi_master_slave/trunk/rtl
    from Rev 3 to Rev 4
    Reverse comparison

Rev 3 → Rev 4

/spi_loopback.ucf
1,13 → 1,18
 
#Created by Constraints Editor (xc6slx45t-csg484-3) - 2011/06/05
NET "m_spi_clk_i" TNM_NET = m_spi_clk_i;
TIMESPEC TS_m_spi_clk_i = PERIOD "m_spi_clk_i" 20 ns HIGH 50%;
#Created by Constraints Editor (xc6slx45t-csg484-3) - 2011/06/08
NET "m_spi_2x_clk_i" TNM_NET = m_spi_2x_clk_i;
TIMESPEC TS_m_spi_2x_clk_i = PERIOD "m_spi_2x_clk_i" 15 ns HIGH 50%;
NET "s_clk_i" TNM_NET = s_clk_i;
TIMESPEC TS_s_clk_i = PERIOD "s_clk_i" 8 ns HIGH 50%;
#Created by Constraints Editor (xc6slx45t-csg484-3) - 2011/06/08
NET "m_clk_i" TNM_NET = m_clk_i;
TIMESPEC TS_m_clk_i = PERIOD "m_clk_i" 8 ns HIGH 50%;
NET "s_spi_sck_i" TNM_NET = s_spi_sck_i;
TIMESPEC TS_s_spi_sck_i = PERIOD "s_spi_sck_i" 40 ns HIGH 50%;
NET "m_par_clk_i" TNM_NET = m_par_clk_i;
TIMESPEC TS_m_par_clk_i = PERIOD "m_par_clk_i" 10 ns HIGH 50%;
NET "s_clk_i" TNM_NET = s_clk_i;
TIMESPEC TS_s_clk_i = PERIOD "s_clk_i" 10 ns HIGH 50%;
TIMESPEC TS_s_spi_sck_i = PERIOD "s_spi_sck_i" 30 ns HIGH 50%;
NET "m_spi_sck_o_OBUF" TNM_NET = m_spi_sck_o_OBUF;
TIMESPEC TS_m_spi_sck_o_OBUF = PERIOD "m_spi_sck_o_OBUF" 30 ns HIGH 50%;
NET "Inst_spi_master/core_n_clk" TNM_NET = Inst_spi_master/core_n_clk;
TIMESPEC TS_Inst_spi_master_core_n_clk = PERIOD "Inst_spi_master/core_n_clk" 30 ns HIGH 50%;
INST "m_di_i<0>" TNM = m_di;
INST "m_di_i<1>" TNM = m_di;
INST "m_di_i<2>" TNM = m_di;
40,7 → 45,8
INST "m_di_i<29>" TNM = m_di;
INST "m_di_i<30>" TNM = m_di;
INST "m_di_i<31>" TNM = m_di;
TIMEGRP "m_di" OFFSET = IN 10 ns VALID 10 ns BEFORE "m_par_clk_i" RISING;
TIMEGRP "m_di" OFFSET = IN 8 ns VALID 8 ns BEFORE "m_clk_i" RISING;
INST "m_spi_miso_i" TNM = m_miso;
INST "s_di_i<0>" TNM = s_di;
INST "s_di_i<1>" TNM = s_di;
INST "s_di_i<2>" TNM = s_di;
73,7 → 79,39
INST "s_di_i<29>" TNM = s_di;
INST "s_di_i<30>" TNM = s_di;
INST "s_di_i<31>" TNM = s_di;
TIMEGRP "s_di" OFFSET = IN 10 ns VALID 10 ns BEFORE "s_clk_i" RISING;
NET "s_spi_mosi_i" OFFSET = IN 20 ns VALID 20 ns BEFORE "s_spi_sck_i" RISING;
#Created by Constraints Editor (xc6slx45t-csg484-3) - 2011/06/05
NET "s_spi_miso_o" OFFSET = OUT 40 ns AFTER "s_spi_sck_i";
TIMEGRP "s_di" OFFSET = IN 8 ns VALID 8 ns BEFORE "s_clk_i" RISING;
INST "s_spi_mosi_i" TNM = s_mosi;
INST "m_do_o<0>" TNM = m_do;
INST "m_do_o<1>" TNM = m_do;
INST "m_do_o<2>" TNM = m_do;
INST "m_do_o<3>" TNM = m_do;
INST "m_do_o<4>" TNM = m_do;
INST "m_do_o<5>" TNM = m_do;
INST "m_do_o<6>" TNM = m_do;
INST "m_do_o<7>" TNM = m_do;
INST "m_do_o<8>" TNM = m_do;
INST "m_do_o<9>" TNM = m_do;
INST "m_do_o<10>" TNM = m_do;
INST "m_do_o<11>" TNM = m_do;
INST "m_do_o<12>" TNM = m_do;
INST "m_do_o<13>" TNM = m_do;
INST "m_do_o<14>" TNM = m_do;
INST "m_do_o<15>" TNM = m_do;
INST "m_do_o<16>" TNM = m_do;
INST "m_do_o<17>" TNM = m_do;
INST "m_do_o<18>" TNM = m_do;
INST "m_do_o<19>" TNM = m_do;
INST "m_do_o<20>" TNM = m_do;
INST "m_do_o<21>" TNM = m_do;
INST "m_do_o<22>" TNM = m_do;
INST "m_do_o<23>" TNM = m_do;
INST "m_do_o<24>" TNM = m_do;
INST "m_do_o<25>" TNM = m_do;
INST "m_do_o<26>" TNM = m_do;
INST "m_do_o<27>" TNM = m_do;
INST "m_do_o<28>" TNM = m_do;
INST "m_do_o<29>" TNM = m_do;
INST "m_do_o<30>" TNM = m_do;
INST "m_do_o<31>" TNM = m_do;
#Created by Constraints Editor (xc6slx45t-csg484-3) - 2011/06/09
INST "m_rx_bit_reg_o" TNM = m_rx_bit;
/spi_loopback.vhd
36,102 → 36,118
Generic (
N : positive := 32; -- 32bit serial word length is default
CPOL : std_logic := '0'; -- SPI mode selection (mode 0 default)
CPHA : std_logic := '0'; -- CPOL = clock polarity, CPHA = clock phase.
PREFETCH : positive := 1 -- prefetch lookahead cycles
CPHA : std_logic := '1'; -- CPOL = clock polarity, CPHA = clock phase.
PREFETCH : positive := 2 -- prefetch lookahead cycles
);
Port(
----------------MASTER-----------------------
m_spi_clk_i : IN std_logic;
m_par_clk_i : IN std_logic;
m_spi_2x_clk_i : IN std_logic;
m_clk_i : IN std_logic;
m_rst_i : IN std_logic;
m_spi_ssel_o : OUT std_logic;
m_spi_sck_o : OUT std_logic;
m_spi_mosi_o : OUT std_logic;
m_spi_miso_i : IN std_logic;
m_di_req_o : OUT std_logic;
m_di_i : IN std_logic_vector(N-1 downto 0);
m_do_o : OUT std_logic_vector(N-1 downto 0);
m_di_rdy_o : OUT std_logic;
m_wren_i : IN std_logic;
m_do_valid_o : OUT std_logic;
m_do_o : OUT std_logic_vector(N-1 downto 0);
----- debug -----
m_do_transfer_o : OUT std_logic;
m_wren_o : OUT std_logic;
m_wren_ack_o : OUT std_logic;
m_rx_bit_reg_o : OUT std_logic;
m_state_dbg_o : OUT std_logic_vector(5 downto 0);
m_rx_bit_reg_o : OUT std_logic;
m_core_clk_o : OUT std_logic;
m_core_n_clk_o : OUT std_logic;
m_sh_reg_dbg_o : OUT std_logic_vector(N-1 downto 0);
----------------SLAVE-----------------------
s_clk_i : IN std_logic;
s_rst_i : IN std_logic;
s_spi_ssel_i : IN std_logic;
s_spi_sck_i : IN std_logic;
s_spi_mosi_i : IN std_logic;
s_spi_miso_o : OUT std_logic;
s_di_i : IN std_logic_vector(N-1 downto 0);
s_do_o : OUT std_logic_vector(N-1 downto 0);
s_di_rdy_o : OUT std_logic;
s_wren_i : IN std_logic;
s_do_valid_o : OUT std_logic;
s_do_transfer_o : OUT std_logic;
s_state_dbg_o : OUT std_logic_vector(5 downto 0)
-- s_sh_reg_dbg_o : OUT std_logic_vector(31 downto 0)
s_clk_i : IN std_logic;
s_spi_ssel_i : IN std_logic;
s_spi_sck_i : IN std_logic;
s_spi_mosi_i : IN std_logic;
s_spi_miso_o : OUT std_logic;
s_di_req_o : OUT std_logic; -- preload lookahead data request line
s_di_i : IN std_logic_vector (N-1 downto 0) := (others => 'X'); -- parallel load data in (clocked in on rising edge of clk_i)
s_wren_i : IN std_logic := 'X'; -- user data write enable
s_do_valid_o : OUT std_logic; -- do_o data valid strobe, valid during one clk_i rising edge.
s_do_o : OUT std_logic_vector (N-1 downto 0); -- parallel output (clocked out on falling clk_i)
----- debug -----
s_do_transfer_o : OUT std_logic; -- debug: internal transfer driver
s_wren_o : OUT std_logic;
s_wren_ack_o : OUT std_logic;
s_rx_bit_reg_o : OUT std_logic;
s_state_dbg_o : OUT std_logic_vector (5 downto 0) -- debug: internal state register
-- s_sh_reg_dbg_o : OUT std_logic_vector (N-1 downto 0) -- debug: internal shift register
);
end spi_loopback;
 
architecture Structural of spi_loopback is
 
COMPONENT spi_master
COMPONENT spi_master
GENERIC (
N : positive := 32;
CPOL : std_logic := '0';
CPHA : std_logic := '0';
PREFETCH : positive := 1
CPHA : std_logic := '1';
PREFETCH : positive := 2
);
PORT(
spi_2x_clk_i : IN std_logic;
par_clk_i : IN std_logic;
rst_i : IN std_logic;
spi_miso_i : IN std_logic;
di_i : IN std_logic_vector(N-1 downto 0);
wren_i : IN std_logic;
spi_ssel_o : OUT std_logic;
spi_sck_o : OUT std_logic;
spi_mosi_o : OUT std_logic;
do_o : OUT std_logic_vector(N-1 downto 0);
di_rdy_o : OUT std_logic;
do_valid_o : OUT std_logic;
PORT(
spi_2x_clk_i : IN std_logic;
clk_i : IN std_logic;
rst_i : IN std_logic;
spi_ssel_o : OUT std_logic;
spi_sck_o : OUT std_logic;
spi_mosi_o : OUT std_logic;
spi_miso_i : IN std_logic;
di_req_o : OUT std_logic;
di_i : IN std_logic_vector(N-1 downto 0);
wren_i : IN std_logic;
do_valid_o : OUT std_logic;
do_o : OUT std_logic_vector(N-1 downto 0);
----- debug -----
do_transfer_o : OUT std_logic;
state_dbg_o : OUT std_logic_vector(5 downto 0);
wren_o : OUT std_logic;
wren_ack_o : OUT std_logic;
rx_bit_reg_o : OUT std_logic;
sh_reg_dbg_o : OUT std_logic_vector(N-1 downto 0)
);
END COMPONENT;
state_dbg_o : OUT std_logic_vector(5 downto 0);
core_clk_o : OUT std_logic;
core_n_clk_o : OUT std_logic;
sh_reg_dbg_o : OUT std_logic_vector(N-1 downto 0)
);
END COMPONENT;
 
COMPONENT spi_slave
COMPONENT spi_slave
GENERIC (
N : positive := 32;
CPOL : std_logic := '0';
CPHA : std_logic := '0';
PREFETCH : positive := 1
CPHA : std_logic := '1';
PREFETCH : positive := 2
);
PORT(
clk_i : IN std_logic;
rst_i : IN std_logic;
spi_ssel_i : IN std_logic;
spi_sck_i : IN std_logic;
spi_mosi_i : IN std_logic;
di_i : IN std_logic_vector(N-1 downto 0);
wren_i : IN std_logic;
spi_miso_o : OUT std_logic;
do_o : OUT std_logic_vector(N-1 downto 0);
di_rdy_o : OUT std_logic;
do_valid_o : OUT std_logic;
PORT(
clk_i : IN std_logic;
spi_ssel_i : IN std_logic;
spi_sck_i : IN std_logic;
spi_mosi_i : IN std_logic;
spi_miso_o : OUT std_logic;
di_req_o : OUT std_logic;
di_i : IN std_logic_vector(N-1 downto 0);
wren_i : IN std_logic;
do_valid_o : OUT std_logic;
do_o : OUT std_logic_vector(N-1 downto 0);
----- debug -----
do_transfer_o : OUT std_logic;
state_dbg_o : OUT std_logic_vector(5 downto 0)
wren_o : OUT std_logic;
wren_ack_o : OUT std_logic;
rx_bit_reg_o : OUT std_logic;
state_dbg_o : OUT std_logic_vector(5 downto 0)
-- sh_reg_dbg_o : OUT std_logic_vector(N-1 downto 0)
);
END COMPONENT;
);
END COMPONENT;
 
begin
 
Inst_spi_master: spi_master
Inst_spi_master: spi_master
GENERIC MAP (
N => N,
CPOL => CPOL,
138,25 → 154,30
CPHA => CPHA,
PREFETCH => PREFETCH)
PORT MAP(
spi_2x_clk_i => m_spi_clk_i,
par_clk_i => m_par_clk_i,
rst_i => m_rst_i,
spi_ssel_o => m_spi_ssel_o,
spi_sck_o => m_spi_sck_o,
spi_mosi_o => m_spi_mosi_o,
spi_miso_i => m_spi_miso_i,
di_i => m_di_i,
do_o => m_do_o,
di_rdy_o => m_di_rdy_o,
wren_i => m_wren_i,
do_valid_o => m_do_valid_o,
spi_2x_clk_i => m_spi_2x_clk_i,
clk_i => m_clk_i,
rst_i => m_rst_i,
spi_ssel_o => m_spi_ssel_o,
spi_sck_o => m_spi_sck_o,
spi_mosi_o => m_spi_mosi_o,
spi_miso_i => m_spi_miso_i,
di_req_o => m_di_req_o,
di_i => m_di_i,
wren_i => m_wren_i,
do_valid_o => m_do_valid_o,
do_o => m_do_o,
----- debug -----
do_transfer_o => m_do_transfer_o,
state_dbg_o => m_state_dbg_o,
wren_o => m_wren_o,
wren_ack_o => m_wren_ack_o,
rx_bit_reg_o => m_rx_bit_reg_o,
sh_reg_dbg_o => m_sh_reg_dbg_o
);
state_dbg_o => m_state_dbg_o,
core_clk_o => m_core_clk_o,
core_n_clk_o => m_core_n_clk_o,
sh_reg_dbg_o => m_sh_reg_dbg_o
);
 
Inst_spi_slave: spi_slave
Inst_spi_slave: spi_slave
GENERIC MAP (
N => N,
CPOL => CPOL,
163,21 → 184,24
CPHA => CPHA,
PREFETCH => PREFETCH)
PORT MAP(
clk_i => s_clk_i,
rst_i => s_rst_i,
spi_ssel_i => s_spi_ssel_i,
spi_sck_i => s_spi_sck_i,
spi_mosi_i => s_spi_mosi_i,
spi_miso_o => s_spi_miso_o,
di_i => s_di_i,
do_o => s_do_o,
di_rdy_o => s_di_rdy_o,
wren_i => s_wren_i,
do_valid_o => s_do_valid_o,
clk_i => s_clk_i,
spi_ssel_i => s_spi_ssel_i,
spi_sck_i => s_spi_sck_i,
spi_mosi_i => s_spi_mosi_i,
spi_miso_o => s_spi_miso_o,
di_req_o => s_di_req_o,
di_i => s_di_i,
wren_i => s_wren_i,
do_valid_o => s_do_valid_o,
do_o => s_do_o,
----- debug -----
do_transfer_o => s_do_transfer_o,
state_dbg_o => s_state_dbg_o
wren_o => s_wren_o,
wren_ack_o => s_wren_ack_o,
rx_bit_reg_o => s_rx_bit_reg_o,
state_dbg_o => s_state_dbg_o
-- sh_reg_dbg_o => s_sh_reg_dbg_o
);
);
 
end Structural;
 
/spi_slave.vhd
11,20 → 11,22
-- This block is the SPI slave interface, implemented in one single entity.
-- All internal core operations are synchronous to the external SPI clock, and follows the general SPI de-facto standard.
-- The parallel read/write interface is synchronous to a supplied system master clock, 'clk_i'.
-- To avoid async glitches caused by setup violations between the core registers and the parallel i/o registers,
-- access to the parallel ports 'di_i' and 'do_o' must be synchronized with the 'di_rdi_o' and 'do_valid_o' signals.
-- Synchronization for the parallel ports is provided by input data request and write enable lines, and output data valid line.
--
-- The block is very simple to use, and has parallel inputs and outputs that behave like a synchronous memory i/o.
-- It is parameterizable for the data width ('N'), SPI mode via generics (CPHA and CPOL), and lookahead prefetch
-- It is parameterizable via generics for the data width ('N'), SPI mode (CPHA and CPOL), and lookahead prefetch
-- signaling ('PREFETCH').
--
-- PARALLEL WRITE INTERFACE
-- The parallel interface has a input port 'di_i' and an output port 'do_o'.
-- Parallel load is controlled using 3 signals: 'di_i', 'di_rdy_o' and 'wren_i'. 'di_rdy_o' is a look ahead data request line,
-- that is set 'PREFETCH' 'spi_sck_i' cycles in advance to synchronize a pipelined memory or fifo to present the
-- next input data at 'di_i' in time to have continuous clock at the spi bus, to allow back-to-back continuous load.
-- Parallel load is controlled using 3 signals: 'di_i', 'di_req_o' and 'wren_i'.
-- When the core needs input data, a look ahead data request strobe , 'di_req_o' is pulsed 'PREFETCH' 'spi_sck_i'
-- cycles in advance to synchronize a user pipelined memory or fifo to present the next input data at 'di_i'
-- in time to have continuous clock at the spi bus, to allow back-to-back continuous load.
-- The data request strobe on 'di_req_o' is 2 'clk_i' clock cycles long.
-- The write to 'di_i' must occur at most one 'spi_sck_i' cycle before actual load to the core shift register, to avoid
-- race conditions at the register transfer.
-- The user circuit places data at the 'di_i' port and strobes the 'wren_i' line for one rising edge of 'clk_i'.
-- For a pipelined sync RAM, a PREFETCH of 3 cycles allows an address generator to present the new adress to the RAM in one
-- cycle, and the RAM to respond in one more cycle, in time for 'di_i' to be latched by the interface one clock before transfer.
-- If the user sequencer needs a different value for PREFETCH, the generic can be altered at instantiation time.
34,35 → 36,36
--
-- PARALLEL WRITE PIPELINED SEQUENCE
-- =================================
-- __ __ __ __ __ __ __ __ __ __ __
-- clk_i __/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \__... -- parallel interface clock
-- ___________________________________
-- di_rdy_o ________/ \________________... -- 'di_rdy_o' asserted on rising edge of 'clk_par_i'
-- ______________ ______________________________________________...
-- di_i __old_data____X__________new_data____________________________... -- user circuit loads data on 'di_i' at next rising edge
-- ________________________________ -- user circuit asserts 'wren_i' at next edge, and removes
-- wren_i __________________/ \_______... -- 'wren_i' after 'di_rdy_o' is removed
-- __ __ __ __ __ __ __
-- clk_i __/ \__/ \__/ \__/ \__/ \__/ \__/ \... -- parallel interface clock
-- ___________
-- di_req_o ________/ \_____________________... -- 'di_req_o' asserted on rising edge of 'clk_i'
-- ______________ ___________________________...
-- di_i __old_data____X______new_data_____________... -- user circuit loads data on 'di_i' at next 'clk_i' rising edge
-- ________
-- wren_i __________________________/ \______... -- 'wren_i' enables latch on rising edge of 'clk_i'
--
--
-- PARALLEL READ INTERFACE
-- An internal buffer is used to copy the internal shift register data to drive the 'do_o' port. When a complete word is received,
-- the core shift register is transferred to the buffer, at the rising edge of the spi clock, 'spi_sck_i'.
-- The signal 'do_valid_o' is set one 'spi_sck_i' clock after, to directly drive a synchronous memory or fifo write enable.
-- An internal buffer is used to copy the internal shift register data to drive the 'do_o' port. When a complete
-- word is received, the core shift register is transferred to the buffer, at the rising edge of the spi clock, 'spi_sck_i'.
-- The signal 'do_valid_o' is strobed 3 'clk_i' clocks after, to directly drive a synchronous memory or fifo write enable.
-- 'do_valid_o' is synchronous to the parallel interface clock, and changes only on rising edges of 'clk_i'.
-- When the interface is idle, data at the 'do_o' port holds the last word received.
--
-- PARALLEL READ PIPELINED SEQUENCE
-- ================================
-- ______ ______ ______ ______ _
-- clk_spi_i ___/ bit1 \______/ bitN \______/bitN-1\______/bitN-2\______/b... -- spi base clock
-- __ __ __ __ __ __ __ __ __ __
-- clk_par_i __/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \_... -- parallel interface clock
-- _________________ ___________________________________________... -- 1) received data is transferred to 'do_buffer_reg'
-- do_o __old_data_______X__________new_data_________________________... -- after last bit received, at 'clk_spi_i' rising edge.
-- _________________ -- 2) 'do_valid_o' asserted on rising edge of 'clk_par_i',
-- do_valid_o ______________________________________/ \____... -- at next bit (bit N-1) of the SPI transfer.
--
-- ______ ______ ______ ______
-- clk_spi_i ___/ bit1 \______/ bitN \______/bitN-1\______/bitN-2\__... -- spi base clock
-- __ __ __ __ __ __ __ __ __
-- clk_i __/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \_... -- parallel interface clock
-- _________________ _____________________________________... -- 1) received data is transferred to 'do_buffer_reg'
-- do_o __old_data_______X__________new_data___________________... -- after last bit received, at next shift clock.
-- ____________
-- do_valid_o ________________________________/ \_________... -- 2) 'do_valid_o' strobed for 2 'clk_i' cycles
-- -- on the 3rd 'clk_i' rising edge.
--
--
-- This design was originally targeted to a Spartan-6 platform, synthesized with XST and normal constraints.
--
------------------------------ COPYRIGHT NOTICE -----------------------------------------------------------------------
97,6 → 100,10
-- 2011/05/18 v0.95.0050 [JD] clock generation circuitry, with generators for all-rising-edge clock core.
-- 2011/06/05 v0.96.0053 [JD] changed async clear to sync resets.
-- 2011/06/07 v0.97.0065 [JD] added cross-clock buffers, fixed fsm async glitches.
-- 2011/06/09 v0.97.0068 [JD] reduced control sets (resets, CE, presets) to the absolute minimum to operate, to reduce
-- synthesis LUT overhead in Spartan-6 architecture.
-- 2011/06/11 v0.97.0075 [JD] redesigned all parallel data interfacing ports, and implemented cross-clock strobe logic.
-- 2011/06/12 v0.97.0079 [JD] implemented wren_ack and di_req logic for state 0, and eliminated unnecessary registers reset.
--
--
-----------------------------------------------------------------------------------------------------------------------
103,10 → 110,7
-- TODO
-- ====
--
-- - DEBUG_PACKAGE:
-- - package to export signals to the verification testbench
--
--
-----------------------------------------------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
118,22 → 122,23
N : positive := 32; -- 32bit serial word length is default
CPOL : std_logic := '0'; -- SPI mode selection (mode 0 default)
CPHA : std_logic := '0'; -- CPOL = clock polarity, CPHA = clock phase.
PREFETCH : positive := 1); -- prefetch lookahead cycles
PREFETCH : positive := 2); -- prefetch lookahead cycles
Port (
clk_i : in std_logic := 'X'; -- internal interface clock (clocks di/do registers)
rst_i : in std_logic := 'X'; -- synchronous rst_i: clear registers at clk_i rising edge
spi_ssel_i : in std_logic; -- spi bus slave select line
spi_sck_i : in std_logic; -- spi bus sck clock (clocks the shift register core)
spi_mosi_i : in std_logic; -- spi bus mosi input
spi_ssel_i : in std_logic := 'X'; -- spi bus slave select line
spi_sck_i : in std_logic := 'X'; -- spi bus sck clock (clocks the shift register core)
spi_mosi_i : in std_logic := 'X'; -- spi bus mosi input
spi_miso_o : out std_logic := 'X'; -- spi bus spi_miso_o output
di_req_o : out std_logic; -- preload lookahead data request line
di_i : in std_logic_vector (N-1 downto 0) := (others => 'X'); -- parallel load data in (clocked in on rising edge of clk_i)
wren_i : in std_logic := 'X'; -- user data write enable
do_valid_o : out std_logic; -- do_o data valid strobe, valid during one clk_i rising edge.
do_o : out std_logic_vector (N-1 downto 0); -- parallel output (clocked out on falling clk_i)
di_rdy_o : out std_logic; -- preload lookahead: HIGH when ready for new input data
wren_i : in std_logic := 'X'; -- write enable (write di_i data at next rising clk_i edge)
-- wren_i starts transmission. must be valid 1 clk_i cycle
-- before current transmission ends.
do_valid_o : out std_logic; -- do_o data valid signal, valid during one clk_i rising edge.
--- debug ports: can be removed for the application circuit ---
do_transfer_o : out std_logic; -- debug: internal transfer driver
wren_o : out std_logic; -- debug: internal state of the wren_i pulse stretcher
wren_ack_o : out std_logic; -- debug: wren ack from state machine
rx_bit_reg_o : out std_logic; -- debug: internal rx bit
state_dbg_o : out std_logic_vector (5 downto 0) -- debug: internal state register
-- sh_reg_dbg_o : out std_logic_vector (N-1 downto 0) -- debug: internal shift register
);
149,29 → 154,52
constant SAMPLE_EDGE : std_logic := (CPOL xnor CPHA);
constant SAMPLE_LEVEL : std_logic := SAMPLE_EDGE;
constant SHIFT_EDGE : std_logic := (CPOL xor CPHA);
--
-- GLOBAL RESET:
-- all signals are initialized to zero at GSR (global set/reset) by giving explicit
-- initialization values at declaration. This is needed for all Xilinx FPGAs, and
-- especially for the Spartan-6 and newer CLB architectures, where a local reset can
-- reduce the usability of the slice registers, due to the need to share the control
-- set (RESET/PRESET, CLOCK ENABLE and CLOCK) by all 8 registers in a slice.
-- By using GSR for the initialization, and reducing RESET local init to the bare
-- essential, the model achieves better LUT/FF packing and CLB usability.
--
-- internal state signals for register and combinational stages
signal state_next : natural range N+1 downto 0 := 0;
signal state_reg : natural range N+1 downto 0 := 0;
signal state_next : natural range N+1 downto 0 := 0;
-- shifter signals for register and combinational stages
signal sh_reg : std_logic_vector (N-1 downto 0);
signal sh_next : std_logic_vector (N-1 downto 0);
signal sh_next : std_logic_vector (N-1 downto 0) := (others => '0');
signal sh_reg : std_logic_vector (N-1 downto 0) := (others => '0');
-- input bit sampled buffer
signal rx_bit_reg : std_logic;
signal di_reg : std_logic_vector (N-1 downto 0);
signal rx_bit_reg : std_logic := '0';
-- buffered di_i data signals for register and combinational stages
signal di_reg : std_logic_vector (N-1 downto 0) := (others => '0');
-- internal wren_i stretcher for fsm combinational stage
signal wren : std_logic := '0';
signal wren_ack_next : std_logic := '0';
signal wren_ack_reg : std_logic := '0';
-- buffered do_o data signals for register and combinational stages
signal do_buffer_reg : std_logic_vector (N-1 downto 0);
signal do_buffer_next : std_logic_vector (N-1 downto 0);
signal do_buffer_next : std_logic_vector (N-1 downto 0) := (others => '0');
signal do_buffer_reg : std_logic_vector (N-1 downto 0) := (others => '0');
-- internal signal to flag transfer to do_buffer_reg
signal do_transfer_reg : std_logic;
signal do_transfer_next : std_logic;
-- internal registered do_valid_o
signal do_valid_oreg : std_logic;
signal do_valid_reg : std_logic;
signal do_valid_next : std_logic;
-- internal registered di_rdy_o
signal di_rdy_oreg : std_logic;
signal di_rdy_reg : std_logic;
signal di_rdy_next : std_logic;
signal do_transfer_next : std_logic := '0';
signal do_transfer_reg : std_logic := '0';
-- internal input data request signal
signal di_req : std_logic := '0';
-- cross-clock do_valid_o logic
signal do_valid_next : std_logic := '0';
signal do_valid_A : std_logic := '0';
signal do_valid_B : std_logic := '0';
signal do_valid_C : std_logic := '0';
signal do_valid_D : std_logic := '0';
signal do_valid_o_reg : std_logic := '0';
-- cross-clock di_req_o logic
signal di_req_o_next : std_logic := '0';
signal di_req_o_A : std_logic := '0';
signal di_req_o_B : std_logic := '0';
signal di_req_o_C : std_logic := '0';
signal di_req_o_D : std_logic := '0';
signal di_req_o_reg : std_logic := '0';
begin
--=============================================================================================
-- GENERICS CONSTRAINTS CHECKING
186,9 → 214,9
severity FAILURE;
 
--=============================================================================================
-- RTL REGISTER PROCESSES
-- REGISTERED INPUTS
--=============================================================================================
-- capture rx bit at SAMPLE edge of sck
-- rx bit flop: capture rx bit after SAMPLE edge of sck
rx_bit_proc : process (spi_sck_i, spi_mosi_i) is
begin
if spi_sck_i'event and spi_sck_i = SAMPLE_EDGE then
195,125 → 223,134
rx_bit_reg <= spi_mosi_i;
end if;
end process rx_bit_proc;
-- state and data registers change on SHIFT edge of sck (ffd with async clear)
core_reg_proc : process (spi_sck_i, rst_i, spi_ssel_i) is
 
--=============================================================================================
-- RTL CORE REGISTER PROCESSES
--=============================================================================================
-- fsm state and data registers change on spi SHIFT clock
core_reg_proc : process (spi_sck_i, spi_ssel_i) is
begin
-- registers cleared on reset
if rst_i = '1' then -- async clr
do_buffer_reg <= (others => '0');
do_transfer_reg <= '0';
elsif spi_sck_i'event and spi_sck_i = SHIFT_EDGE then -- on SHIFT edge, update all core registers
do_buffer_reg <= do_buffer_next;
do_transfer_reg <= do_transfer_next;
-- FFD registers clocked on SHIFT edge and cleared on idle (spi_ssel_i = 1)
if spi_ssel_i = '1' then -- async clr
state_reg <= 0; -- state falls back to idle when slave not selected
elsif spi_sck_i'event and spi_sck_i = SHIFT_EDGE then -- on SHIFT edge, update all core registers
state_reg <= state_next; -- core fsm changes state with spi SHIFT clock
end if;
-- registers cleared on idle (spi_ssel_i = 1)
if spi_ssel_i = '1' then -- async clr
state_reg <= 0;
sh_reg <= (others => '0');
elsif spi_sck_i'event and spi_sck_i = SHIFT_EDGE then -- on SHIFT edge, update all core registers
state_reg <= state_next;
sh_reg <= sh_next;
-- FFD registers clocked on SHIFT edge
if spi_sck_i'event and spi_sck_i = SHIFT_EDGE then -- on fsm state change, update all core registers
sh_reg <= sh_next; -- core shift register
do_buffer_reg <= do_buffer_next; -- registered data output
do_transfer_reg <= do_transfer_next; -- cross-clock transfer flag
wren_ack_reg <= wren_ack_next; -- wren ack for data load synchronization
end if;
end process core_reg_proc;
-- cross-clock registers change on half-cycle of sck (ffd with async clear)
-- this is to prevent fsm state change glitches causing setup time artifacts at async clk_i edges
cross_reg_proc : process (rst_i, spi_sck_i, spi_ssel_i) is
 
--=============================================================================================
-- CROSS-CLOCK PIPELINE TRANSFER LOGIC
--=============================================================================================
-- do_valid_o and di_req_o strobe output logic
-- this is a delayed pulse generator with a ripple-transfer FFD pipeline, that generates a
-- fixed-length delayed pulse for the output flags, at the parallel clock domain
out_transfer_proc : process ( clk_i, do_transfer_reg, di_req,
do_valid_A, do_valid_B, do_valid_D,
di_req_o_A, di_req_o_B, di_req_o_D) is
begin
if spi_ssel_i = '1' then
di_rdy_reg <= '1'; -- di_rdy true during idle
elsif spi_sck_i'event and spi_sck_i = SAMPLE_EDGE then -- on half-cycle edge, update cross registers
di_rdy_reg <= di_rdy_next;
if clk_i'event and clk_i = '1' then -- clock at parallel port clock
-- do_transfer_reg -> do_valid_o_reg
do_valid_A <= do_transfer_reg; -- the input signal must be at least 2 clocks long
do_valid_B <= do_valid_A; -- feed it to a ripple chain of FFDs
do_valid_C <= do_valid_B;
do_valid_D <= do_valid_C;
do_valid_o_reg <= do_valid_next; -- registered output pulse
--------------------------------
-- di_req -> di_req_o_reg
di_req_o_A <= di_req; -- the input signal must be at least 2 clocks long
di_req_o_B <= di_req_o_A; -- feed it to a ripple chain of FFDs
di_req_o_C <= di_req_o_B;
di_req_o_D <= di_req_o_C;
di_req_o_reg <= di_req_o_next; -- registered output pulse
end if;
if rst_i = '1' then
do_valid_reg <= '0'; -- async clear on do_valid
elsif spi_sck_i'event and spi_sck_i = SAMPLE_EDGE then -- on half-cycle edge, update cross registers
do_valid_reg <= do_valid_next;
end if;
end process cross_reg_proc;
-- parallel load input registers (to elliminate async clock glitches)
par_reg_proc: process (clk_i, rst_i, wren_i, spi_sck_i, spi_ssel_i) is
-- generate a 2-clocks pulse at the 3rd clock cycle
do_valid_next <= do_valid_A and do_valid_B and not do_valid_D;
di_req_o_next <= di_req_o_A and di_req_o_B and not di_req_o_D;
end process out_transfer_proc;
-- parallel load input registers: data register and write enable
in_transfer_proc: process (clk_i, wren_i, wren_ack_reg) is
begin
-- registered data input, input register with clock enable
if clk_i'event and clk_i = '1' then
-- output flags registers
if rst_i = '1' then -- sync rst for output flags
di_rdy_oreg <= '0';
do_valid_oreg <= '0';
else
di_rdy_oreg <= di_rdy_reg; -- transfer buffer regs to out regs
do_valid_oreg <= (do_valid_reg and not spi_ssel_i) or (do_transfer_reg and spi_ssel_i);
if wren_i = '1' then
di_reg <= di_i; -- parallel data input buffer register
end if;
-- input register, with 'rst_i' sync reset and 'wren_i' clock enable
if rst_i = '1' then -- sync rst for di_reg
di_reg <= (others => '0');
elsif wren_i = '1' then -- wren_i is the clock enable for di_reg
di_reg <= di_i; -- parallel data input buffer register
end if;
-- stretch wren pulse to be detected by spi fsm (ffd with sync preset and sync reset)
if clk_i'event and clk_i = '1' then
if wren_i = '1' then -- wren_i is the sync preset for wren
wren <= '1';
elsif wren_ack_reg = '1' then -- wren_ack is the sync reset for wren
wren <= '0';
end if;
end if;
end process par_reg_proc;
end process in_transfer_proc;
 
--=============================================================================================
-- RTL COMBINATIONAL LOGIC PROCESSES
--=============================================================================================
-- state and datapath combinational logic
core_combi_proc : process ( rst_i, sh_reg, state_reg, rx_bit_reg, do_buffer_reg,
do_valid_reg, do_transfer_reg, di_reg, di_rdy_reg, wren_i ) is
core_combi_proc : process ( sh_reg, state_reg, rx_bit_reg, do_buffer_reg,
do_transfer_reg, di_reg, wren, wren_ack_reg) is
begin
sh_next <= sh_reg; -- all output signals are assigned to (avoid latches)
do_buffer_next <= do_buffer_reg;
do_valid_next <= do_valid_reg;
do_transfer_next <= do_transfer_reg;
di_rdy_next <= di_rdy_reg;
spi_miso_o <= '0'; -- will output '0' when shifter is empty
state_next <= state_reg - 1; -- update next state at each sck pulse
sh_next <= sh_reg; -- all output signals are assigned to (avoid latches)
do_buffer_next <= do_buffer_reg; -- output data buffer
do_transfer_next <= do_transfer_reg; -- output data flag
wren_ack_next <= '0'; -- remove data load ack for all but the load stages
di_req <= '0'; -- prefetch data request: deassert when shifting data
spi_miso_o <= sh_reg(N-1); -- output serial data from the MSb
state_next <= state_reg - 1; -- update next state at each sck pulse
case state_reg is
when (N) =>
di_rdy_next <= '0'; -- deassert next-data request when start shifting
spi_miso_o <= sh_reg(N-1); -- shift out tx bit from the MSb
sh_next(N-1 downto 1) <= sh_reg(N-2 downto 0); -- shift inner bits
sh_next(0) <= rx_bit_reg; -- shift in rx bit into LSb
do_transfer_next <= '0'; -- reset transfer signal
sh_next(N-1 downto 1) <= sh_reg(N-2 downto 0); -- shift inner bits
sh_next(0) <= rx_bit_reg; -- shift in rx bit into LSb
when (N-1) downto (PREFETCH+3) =>
di_rdy_next <= '0'; -- deassert next-data request when start shifting
do_valid_next <= do_transfer_reg; -- assert valid rx data, with plenty of pipeline delay for 'do_buffer'
do_transfer_next <= '0'; -- reset transfer signal
spi_miso_o <= sh_reg(N-1); -- shift out tx bit from the MSb
sh_next(N-1 downto 1) <= sh_reg(N-2 downto 0); -- shift inner bits
sh_next(0) <= rx_bit_reg; -- shift in rx bit into LSb
do_transfer_next <= '0'; -- reset transfer signal
sh_next(N-1 downto 1) <= sh_reg(N-2 downto 0); -- shift inner bits
sh_next(0) <= rx_bit_reg; -- shift in rx bit into LSb
when (PREFETCH+2) downto 2 =>
-- raise prefetch 'di_rdy_next' signal and remove 'do_valid'
di_rdy_next <= '1'; -- request data in advance to allow for pipeline delays
do_valid_next <= '0'; -- make do_valid_o HIGH for one cycle only
spi_miso_o <= sh_reg(N-1); -- shift out tx bit from the MSb
sh_next(N-1 downto 1) <= sh_reg(N-2 downto 0); -- shift inner bits
sh_next(0) <= rx_bit_reg; -- shift in rx bit into LSb
-- raise data prefetch request
di_req <= '1'; -- request data in advance to allow for pipeline delays
sh_next(N-1 downto 1) <= sh_reg(N-2 downto 0); -- shift inner bits
sh_next(0) <= rx_bit_reg; -- shift in rx bit into LSb
when 1 =>
-- restart from state 'N' if more sck pulses come
do_buffer_next(N-1 downto 1) <= sh_reg(N-2 downto 0); -- shift rx data directly into rx buffer
do_buffer_next(0) <= rx_bit_reg; -- shift last rx bit into rx buffer
do_transfer_next <= '1'; -- signal transfer to do_buffer
state_next <= N; -- next state is top bit of new data
spi_miso_o <= sh_reg(N-1); -- shift out last tx bit from the MSb
if wren_i = '1' then -- load tx register if valid data present at di_reg
sh_next <= di_reg; -- load parallel data from di_reg into shifter
di_req <= '1'; -- request data in advance to allow for pipeline delays
do_buffer_next(N-1 downto 1) <= sh_reg(N-2 downto 0); -- shift rx data directly into rx buffer
do_buffer_next(0) <= rx_bit_reg; -- shift last rx bit into rx buffer
do_transfer_next <= '1'; -- signal transfer to do_buffer
state_next <= N; -- next state is top bit of new data
if wren = '1' then -- load tx register if valid data present at di_reg
sh_next <= di_reg; -- load parallel data from di_reg into shifter
wren_ack_next <= '1'; -- acknowledge data in transfer
else
sh_next <= (others => '0'); -- load null data (output '0' if no load)
sh_next <= (others => '0'); -- load null data (output '0' if no load)
end if;
when 0 =>
do_transfer_next <= '0'; -- clear signal transfer to do_buffer
do_valid_next <= do_transfer_reg; -- assert valid rx data after data received, when interface idle
di_rdy_next <= '1'; -- will request data if shifter empty
spi_miso_o <= di_reg(N-1); -- shift out first tx bit from the MSb
di_req <= not wren_ack_reg; -- will request data if shifter empty
do_transfer_next <= '0'; -- clear signal transfer to do_buffer
spi_miso_o <= di_reg(N-1); -- shift out first tx bit from the MSb
if CPHA = '0' then
-- initial state for CPHA=0, when slave interface is first selected or idle
state_next <= N-1; -- next state is top bit of new data
sh_next(N-1 downto 1) <= di_reg(N-2 downto 0); -- shift inner bits
sh_next(0) <= rx_bit_reg; -- shift in rx bit into LSb
state_next <= N-1; -- next state is top bit of new data
sh_next(0) <= rx_bit_reg; -- shift in rx bit into LSb
sh_next(N-1 downto 1) <= di_reg(N-2 downto 0); -- shift inner bits
wren_ack_next <= '1'; -- acknowledge data in transfer
else
-- initial state for CPHA=1, when slave interface is first selected or idle
state_next <= N; -- next state is top bit of new data
sh_next <= di_reg; -- load parallel data from di_reg into shifter
state_next <= N; -- next state is top bit of new data
sh_next <= di_reg; -- load parallel data from di_reg into shifter
end if;
when others =>
null;
state_next <= 0; -- state 0 is safe state
end case;
end process core_combi_proc;
 
321,15 → 358,19
-- RTL OUTPUT LOGIC PROCESSES
--=============================================================================================
-- data output processes
do_proc : do_o <= do_buffer_reg; -- do_o always available
do_valid_proc: do_valid_o <= do_valid_oreg; -- copy registered do_valid_o to output
di_rdy_proc: di_rdy_o <= di_rdy_oreg; -- copy registered di_rdy_o to output
do_o_proc : do_o <= do_buffer_reg; -- do_o always available
do_valid_o_proc: do_valid_o <= do_valid_o_reg; -- copy registered do_valid_o to output
di_req_o_proc: di_req_o <= di_req_o_reg; -- copy registered di_req_o to output
 
--=============================================================================================
-- DEBUG LOGIC PROCESSES
--=============================================================================================
-- these signals are useful for verification, and can be deleted or commented-out after debug.
do_transfer_proc: do_transfer_o <= do_transfer_reg;
state_debug_proc: state_dbg_o <= std_logic_vector(to_unsigned(state_reg, 6)); -- export internal state to debug
rx_bit_reg_proc: rx_bit_reg_o <= rx_bit_reg;
wren_o_proc: wren_o <= wren;
wren_ack_o_proc: wren_ack_o <= wren_ack_reg;
-- sh_reg_debug_proc: sh_reg_dbg_o <= sh_reg; -- export sh_reg to debug
end architecture RTL;
 
/spi_master.vhd
13,33 → 13,33
-- All parallel i/o interface operations are synchronous to a system clock, that can be asynchronous to the spi base clock.
-- Fully pipelined circuitry guarantees that no setup artifacts occur on the buffers that are accessed by the two clock domains.
-- The block is very simple to use, and has parallel inputs and outputs that behave like a synchronous memory i/o.
-- It is parameterizable for the data width ('N'), SPI mode via generics (CPHA and CPOL), and lookahead prefetch
-- It is parameterizable via generics for the data width ('N'), SPI mode (CPHA and CPOL), and lookahead prefetch
-- signaling ('PREFETCH').
--
-- PARALLEL WRITE INTERFACE
-- The parallel interface has a input port 'di_i' and an output port 'do_o'.
-- Parallel load is controlled using 3 signals: 'di_i', 'di_rdy_o' and 'wren_i'. 'di_rdy_o' is a look ahead data request line,
-- The parallel interface has an input port 'di_i' and an output port 'do_o'.
-- Parallel load is controlled using 3 signals: 'di_i', 'di_req_o' and 'wren_i'. 'di_req_o' is a look ahead data request line,
-- that is set 'PREFETCH' clock cycles in advance to synchronize a pipelined memory or fifo to present the
-- next input data at 'di_i' in time to have continuous clock at the spi bus, to allow back-to-back continuous load.
-- For a pipelined sync RAM, a PREFETCH of 2 cycles allows an address generator to present the new adress to the RAM in one
-- cycle, and the RAM to respond in one more cycle, in time for 'di_i' to be latched by the shifter.
-- If the user sequencer needs a different value for PREFETCH, the generic can be altered at instantiation time.
-- The 'wren_i' write enable strobe must be valid at least one setup time before the rising edge of the last clock cycle,
-- if continuous transmission is intended. If 'wren_i' is not valid 2 clock cycles after the last tranmitted bit, the interface
-- The 'wren_i' write enable strobe must be valid at least one setup time before the rising edge of the last SPI clock cycle,
-- if continuous transmission is intended. If 'wren_i' is not valid 2 SPI clock cycles after the last transmitted bit, the interface
-- enters idle state and deasserts SSEL.
-- When the interface is idle, 'wren_i' write strobe loads the data and starts transmission. 'di_rdy_o' is always asserted when idle.
-- The interaction for data load is:
-- When the interface is idle, 'wren_i' write strobe loads the data and starts transmission. 'di_req_o' will strobe when entering
-- idle state, if a previously loaded data has already been transferred.
--
-- PARALLEL WRITE PIPELINED SEQUENCE
-- =================================
-- __ __ __ __ __ __ __ __ __
-- par_clk_i __/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \__... -- parallel interface clock
-- ___________________________________
-- di_rdy_o ________/ \___________... -- 'di_rdy_o' asserted on rising edge of 'par_clk_i'
-- ______________ _________________________________________
-- di_i __old_data____X__________new_data_______________________... -- user circuit loads data on 'di_i' at next rising edge
-- ________________________________ -- user circuit asserts 'wren_i' at next edge,
-- wren_i __________________/ \____... -- and removes 'wren_i' after 'di_rdy_o' is removed
-- PARALLEL WRITE SEQUENCE
-- =======================
-- __ __ __ __ __ __ __
-- clk_i __/ \__/ \__/ \__/ \__/ \__/ \__/ \... -- parallel interface clock
-- ___________
-- di_req_o ________/ \_____________________... -- 'di_req_o' asserted on rising edge of 'clk_i'
-- ______________ ___________________________...
-- di_i __old_data____X______new_data_____________... -- user circuit loads data on 'di_i' at next 'clk_i' rising edge
-- _______
-- wren_i __________________________/ \_______... -- user strobes 'wren_i' for one cycle of 'clk_i'
--
--
-- PARALLEL READ INTERFACE
46,26 → 46,28
-- An internal buffer is used to copy the internal shift register data to drive the 'do_o' port. When a complete word is received,
-- the core shift register is transferred to the buffer, at the rising edge of the spi clock, 'spi_2x_clk_i'.
-- The signal 'do_valid_o' is set one 'spi_2x_clk_i' clock after, to directly drive a synchronous memory or fifo write enable.
-- 'do_valid_o' is synchronous to the parallel interface clock, and changes only on rising edges of 'par_clk_i'.
-- 'do_valid_o' is synchronous to the parallel interface clock, and changes only on rising edges of 'clk_i'.
-- When the interface is idle, data at the 'do_o' port holds the last word received.
--
-- PARALLEL READ PIPELINED SEQUENCE
-- ================================
-- ______ ______ ______ ______ ______
-- spi_2x_clk_i bit1 \______/ bitN \______/bitN-1\______/bitN-2\______/bitN-3\_... -- spi 2x base clock
-- _ __ __ __ __ __ __ __ __ __ __
-- par_clk_i \__/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \__... -- parallel interface clock
-- _____________ __________________________________________________... -- 1) rx data is transferred to 'do_buffer_reg'
-- do_o ___old_data__X__________new_data________________________________... -- after last rx bit, at rising 'spi_2x_clk_i'.
-- ___________ -- 2) 'do_valid_o' asserted on rising 'par_clk_i',
-- do_valid_o __________________________________/ \_________________... -- at next bit (bit N-1) of the SPI transfer.
--
-- PARALLEL READ SEQUENCE
-- ======================
-- ______ ______ ______ ______
-- spi_2x_clk_i bit1 \______/ bitN \______/bitN-1\______/bitN-2\__... -- spi 2x base clock
-- _ __ __ __ __ __ __ __ __
-- clk_i \__/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \_... -- parallel interface clock
-- _____________ _____________________________________... -- 1) rx data is transferred to 'do_buffer_reg'
-- do_o ___old_data__X__________new_data___________________... -- after last rx bit, at rising 'spi_2x_clk_i'.
-- ____________
-- do_valid_o ____________________________/ \_________... -- 2) 'do_valid_o' strobed for 2 'clk_i' cycles
-- -- on the 3rd 'clk_i' rising edge.
--
--
-- The propagation delay of spi_sck_o and spi_mosi_o, referred to the internal clock, is balanced by similar path delays,
-- but the sampling delay of spi_miso_i imposes a setup time referred to the sck signal that limits the high frequency
-- of the interface, for full duplex operation.
--
-- This design was originally targeted to a Spartan-6 platform, synthesized with XST and normal constraints.
-- The VHDL dialect used is VHDL'93, accepted largely by all synthesis tools.
--
------------------------------ COPYRIGHT NOTICE -----------------------------------------------------------------------
--
96,7 → 98,7
-- 2011/04/28 v0.01.0010 [JD] shifter implemented as a sequential process. timing problems and async issues in synthesis.
-- 2011/05/01 v0.01.0030 [JD] changed original shifter design to a fully pipelined RTL fsmd. solved all synthesis issues.
-- 2011/05/05 v0.01.0034 [JD] added an internal buffer register for rx_data, to allow greater liberty in data load/store.
-- 2011/05/08 v0.10.0038 [JD] increased one state to have SSEL start one cycle before SCK. Implemented full CPOL/CPHA
-- 2011/05/08 v0.10.0038 [JD] increased one state to have SSEL start one cycle before SCK. Implemented full CPOL/CPHA
-- logic, based on generics, and do_valid_o signal.
-- 2011/05/13 v0.20.0045 [JD] streamlined signal names, added PREFETCH parameter, added assertions.
-- 2011/05/17 v0.80.0049 [JD] added explicit clock synchronization circuitry across clock boundaries.
103,16 → 105,20
-- 2011/05/18 v0.95.0050 [JD] clock generation circuitry, with generators for all-rising-edge clock core.
-- 2011/06/05 v0.96.0053 [JD] changed async clear to sync resets.
-- 2011/06/07 v0.97.0065 [JD] added cross-clock buffers, fixed fsm async glitches.
-- 2011/06/09 v0.97.0068 [JD] reduced control sets (resets, CE, presets) to the absolute minimum to operate, to reduce
-- synthesis LUT overhead in Spartan-6 architecture.
-- 2011/06/11 v0.97.0075 [JD] redesigned all parallel data interfacing ports, and implemented cross-clock strobe logic.
-- 2011/06/12 v0.97.0079 [JD] streamlined wren_ack for all cases and eliminated unnecessary register resets.
-- 2011/06/14 v0.97.0083 [JD] (bug CPHA effect) : redesigned SCK output circuit.
-- (minor bug) : removed fsm registers from (not rst_i) chip enable.
-- 2011/06/15 v0.97.0086 [JD] removed master MISO input register, to relax MISO data setup time (to get higher speed).
--
--
--
-----------------------------------------------------------------------------------------------------------------------
-- TODO
-- ====
--
-- - DEBUG_PACKAGE:
-- - package to export signals to the verification testbench
--
--
-----------------------------------------------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
124,25 → 130,28
N : positive := 32; -- 32bit serial word length is default
CPOL : std_logic := '0'; -- SPI mode selection (mode 0 default)
CPHA : std_logic := '0'; -- CPOL = clock polarity, CPHA = clock phase.
PREFETCH : positive := 1); -- prefetch lookahead cycles
PREFETCH : positive := 2); -- prefetch lookahead cycles
Port (
spi_2x_clk_i : in std_logic := 'X'; -- spi base reference clock: 2x 'spi_sck_o'
par_clk_i : in std_logic := 'X'; -- parallel interface clock
rst_i : in std_logic := 'X'; -- async reset: clear all registers
clk_i : in std_logic := 'X'; -- parallel interface clock
rst_i : in std_logic := 'X'; -- reset core
spi_ssel_o : out std_logic; -- spi bus slave select line
spi_sck_o : out std_logic; -- spi bus sck
spi_mosi_o : out std_logic; -- spi bus mosi output
spi_miso_i : in std_logic := 'X'; -- spi bus spi_miso_i input
di_req_o : out std_logic; -- preload lookahead data request line
di_i : in std_logic_vector (N-1 downto 0) := (others => 'X'); -- parallel data in (clocked on rising spi_2x_clk_i after last bit)
wren_i : in std_logic := 'X'; -- user data write enable, starts transmission when interface is idle
do_valid_o : out std_logic; -- do_o data valid signal, valid during one spi_2x_clk_i rising edge.
do_o : out std_logic_vector (N-1 downto 0); -- parallel output (clocked on rising spi_2x_clk_i after last bit)
di_rdy_o : out std_logic; -- preload lookahead: HIGH for PREFETCH cycles before last bit
wren_i : in std_logic := 'X'; -- write enable (write di_i data at next rising spi_2x_clk_i edge)
-- wren_i starts transmission. must be valid 1 spi_2x_clk_i cycle
-- before current transmission ends.
do_valid_o : out std_logic; -- do_o data valid signal, valid during one spi_2x_clk_i rising edge.
--- debug ports: can be removed for the application circuit ---
do_transfer_o : out std_logic; -- debug: internal transfer driver
wren_o : out std_logic; -- debug: internal state of the wren_i pulse stretcher
wren_ack_o : out std_logic; -- debug: wren ack from state machine
rx_bit_reg_o : out std_logic; -- debug: internal rx bit
state_dbg_o : out std_logic_vector (5 downto 0); -- debug: internal state register
rx_bit_reg_o : out std_logic; -- debug: internal rx bit
core_clk_o : out std_logic;
core_n_clk_o : out std_logic;
sh_reg_dbg_o : out std_logic_vector (N-1 downto 0) -- debug: internal shift register
);
end spi_master;
152,46 → 161,67
-- all signals are clocked at the rising edge of the system clock 'spi_2x_clk_i'.
--================================================================================================================
architecture RTL of spi_master is
 
-- core clocks, generated from 'spi_2x_clk_i'
signal core_clk : std_logic; -- continuous fsm core clock, positive logic
signal core_n_clk : std_logic; -- continuous fsm core clock, negative logic
-- spi base clock, generated from 'spi_2x_clk_i'
signal spi_clk : std_logic; -- spi bus output clock, positive polarity
signal spi_n_clk : std_logic; -- spi bus output clock, negative polarity
-- core clocks, generated from 'spi_2x_clk_i': initialized to differential values
signal core_clk : std_logic := '0'; -- continuous fsm core clock, positive logic
signal core_n_clk : std_logic := '1'; -- continuous fsm core clock, negative logic
-- spi bus clock, generated from the CPOL selected core clock polarity
signal spi_clk : std_logic; -- spi bus output clock
-- core fsm clock
signal fsm_clk : std_logic; -- data change clock: fsm registers clocked at rising edge
signal samp_clk : std_logic; -- data sampling clock: input serial data clocked at rising edge
signal fsm_clk : std_logic; -- data change clock: fsm registers clocked at rising edge
signal samp_clk : std_logic; -- data sampling clock: input serial data clocked at rising edge
--
-- GLOBAL RESET:
-- all signals are initialized to zero at GSR (global set/reset) by giving explicit
-- initialization values at declaration. This is needed for all Xilinx FPGAs, and
-- especially for the Spartan-6 and newer CLB architectures, where a local reset can
-- reduce the usability of the slice registers, due to the need to share the control
-- set (RESET/PRESET, CLOCK ENABLE and CLOCK) by all 8 registers in a slice.
-- By using GSR for the initialization, and reducing RESET local init to the bare
-- essential, the model achieves better LUT/FF packing and CLB usability.
--
-- internal state signals for register and combinational stages
signal state_next : natural range N+1 downto 0 := 0;
signal state_reg : natural range N+1 downto 0 := 0;
signal state_next : natural range N+1 downto 0 := 0;
-- shifter signals for register and combinational stages
signal sh_next : std_logic_vector (N-1 downto 0) := (others => '0');
signal sh_reg : std_logic_vector (N-1 downto 0) := (others => '0');
signal sh_next : std_logic_vector (N-1 downto 0) := (others => '0');
-- input bit sampled buffer
signal rx_bit_reg : std_logic := '0';
-- buffered di_i data signals for register and combinational stages
signal di_reg : std_logic_vector (N-1 downto 0) := (others => '0');
-- internal wren_i stretcher for fsm combinational stage
signal wren : std_logic := '0';
signal wren_ack_next : std_logic := '0';
signal wren_ack_reg : std_logic := '0';
-- internal SSEL enable control signals
signal ena_ssel_next : std_logic := '0';
signal ena_ssel_reg : std_logic := '0';
signal ena_ssel_next : std_logic := '0';
-- internal SCK enable control signals
signal ena_sck_next : std_logic := '0';
signal ena_sck_reg : std_logic := '0';
signal ena_sck_next : std_logic := '0';
-- buffered do_o data signals for register and combinational stages
signal do_buffer_next : std_logic_vector (N-1 downto 0) := (others => '0');
signal do_buffer_reg : std_logic_vector (N-1 downto 0) := (others => '0');
signal do_buffer_next : std_logic_vector (N-1 downto 0) := (others => '0');
-- internal signal to flag transfer to do_buffer_reg
signal do_transfer_next : std_logic := '0';
signal do_transfer_reg : std_logic := '0';
signal do_transfer_next : std_logic := '0';
-- internal registered do_valid_o
signal do_valid_oreg : std_logic := '0';
signal do_valid_reg : std_logic := '0';
-- internal input data request signal
signal di_req_next : std_logic := '0';
signal di_req_reg : std_logic := '0';
-- cross-clock do_valid_o pipeline
signal do_valid_next : std_logic := '0';
-- internal registered di_rdy_o
signal di_rdy_oreg : std_logic := '0';
signal di_rdy_reg : std_logic := '1';
signal di_rdy_next : std_logic := '1';
signal do_valid_A : std_logic := '0';
signal do_valid_B : std_logic := '0';
signal do_valid_C : std_logic := '0';
signal do_valid_D : std_logic := '0';
signal do_valid_o_reg : std_logic := '0';
-- cross-clock di_req_o pipeline
signal di_req_o_next : std_logic := '1';
signal di_req_o_A : std_logic := '0';
signal di_req_o_B : std_logic := '0';
signal di_req_o_C : std_logic := '0';
signal di_req_o_D : std_logic := '0';
signal di_req_o_reg : std_logic := '1';
begin
--=============================================================================================
-- GENERICS CONSTRAINTS CHECKING
200,6 → 230,10
assert N >= 8
report "Generic parameter 'N' error: SPI shift register size needs to be 8 bits minimum"
severity FAILURE;
-- minimum prefetch lookahead check
assert PREFETCH >= 2
report "Generic parameter 'PREFETCH' error: needs to be 1 minimum"
severity FAILURE;
-- maximum prefetch lookahead check
assert PREFETCH <= N-5
report "Generic parameter 'PREFETCH' error: lookahead count out of range, needs to be N-5 maximum"
208,66 → 242,48
--=============================================================================================
-- CLOCK GENERATION
--=============================================================================================
-- The clock generation block derive 2 sets of signals from the 2x spi base clock, with positive
-- and negative phase. The core clock runs continuously and drives the core fsm, and the spi clock
-- drives the spi bus 'spi_sck_o' output directly, and is controlled by 'ena_sck_reg', driven by the
-- fsm logic.
-- The 2 clocks are generated each with one FFD, with a selected phase to drive the core with rising
-- edge clocks only. The 2 sets of clocks have similar logic delays, which is important for the data
-- setup time of the serial input related to the data setup time of the serial output.
-- The clock generation block derive 2 continuous antiphase signals from the 2x spi base clock
-- for the core clocking.
-- The 2 clock phases are generated by sepparate and synchronous FFDs, and should have only
-- interconnect delays.
-- The clock phase is selected for serial input sampling, fsm clocking, and spi SCK output, based
-- on the configuration of CPOL and CPHA.
-- Each phase is selected so that all the registers can be clocked with a rising edge on all SPI
-- modes.
-----------------------------------------------------------------------------------------------
-- divide down 'spi_2x_clk_i' by 2
-- this should be synthesized as a single ffd with sync reset
core_clock_gen_proc : process (rst_i, spi_2x_clk_i) is
-- this should be synthesized as two synchronous FFDs
core_clock_gen_proc : process (spi_2x_clk_i) is
begin
if spi_2x_clk_i'event and spi_2x_clk_i = '1' then
if rst_i = '1' then
core_clk <= '0'; -- positive logic clk: idle LOW
core_n_clk <= '1'; -- negative logic clk: idle HIGH
else
core_clk <= core_n_clk; -- divided by 2 clock, differential
core_n_clk <= not core_n_clk;
end if;
core_clk <= core_n_clk; -- divided by 2 clock, differential
core_n_clk <= not core_n_clk;
end if;
end process core_clock_gen_proc;
-----------------------------------------------------------------------------------------------
-- spi sck generator: divide input 2x clock by 2, with a CE controlled by the fsm
-- this should be sinthesized as a single FFD with sync reset and clock enable
spi_clock_gen_proc : process (rst_i, spi_2x_clk_i, ena_sck_reg) is
begin
if spi_2x_clk_i'event and spi_2x_clk_i = '1' then
if rst_i = '1' then
spi_clk <= '0'; -- positive logic clk: idle LOW
spi_n_clk <= '1'; -- negative logic clk: idle HIGH
elsif ena_sck_reg = '1' then
spi_clk <= spi_n_clk; -- divided by 2 clock, differential
spi_n_clk <= not spi_n_clk;
end if;
end if;
end process spi_clock_gen_proc;
-----------------------------------------------------------------------------------------------
-- SCK out logic: generate sck from spi_clk or spi_n_clk depending on CPOL
-- spi clk generator: generate spi_clk from core_clk depending on CPOL
spi_sck_cpol_0_proc :
if CPOL = '0' generate
begin
spi_sck_o <= spi_clk; -- for CPOL=0, spi clk has idle LOW
spi_clk <= core_clk; -- for CPOL=0, spi clk has idle LOW
end generate;
spi_sck_cpol_1_proc :
if CPOL = '1' generate
begin
spi_sck_o <= spi_n_clk; -- for CPOL=1, spi clk has idle HIGH
spi_clk <= core_n_clk; -- for CPOL=1, spi clk has idle HIGH
end generate;
-----------------------------------------------------------------------------------------------
-- Sampling clock generation: generate 'samp_clk' from core_clk or core_n_clk depending on CPHA
-- always sample data at the half-cycle of the fsm update cell
smp_cpha_0_proc :
if CPHA = '0' generate
begin
samp_clk <= spi_clk; -- for CPHA=0, sample at end of sample cell
samp_clk <= core_clk;
end generate;
smp_cpha_1_proc :
if CPHA = '1' generate
begin
samp_clk <= spi_n_clk; -- for CPHA=1, sample at end of sample cell
samp_clk <= core_n_clk;
end generate;
-----------------------------------------------------------------------------------------------
-- FSM clock generation: generate 'fsm_clock' from core_clk or core_n_clk depending on CPHA
283,66 → 299,93
end generate;
 
--=============================================================================================
-- RTL REGISTER PROCESSES
-- REGISTERED INPUTS
--=============================================================================================
-- rx bit flop: capture rx bit after SAMPLE edge of sck
--
-- ATTENTION: REMOVING THE FLIPFLOP (DIRECT CONNECTION) WE GET HIGHER PERFORMANCE DUE TO
-- REDUCED DEMAND ON MISO SETUP TIME.
--
rx_bit_proc : process (samp_clk, spi_miso_i) is
begin
if samp_clk'event and samp_clk = '1' then
-- if samp_clk'event and samp_clk = '1' then -- uncomment to have the input register
rx_bit_reg <= spi_miso_i;
end if;
-- end if; -- uncomment to have the input register
end process rx_bit_proc;
-- state and data registers: synchronous to the spi base reference clock
core_reg_proc : process (fsm_clk, rst_i) is
 
--=============================================================================================
-- RTL REGISTER PROCESSES
--=============================================================================================
-- fsm state and data registers: synchronous to the spi base reference clock
core_reg_proc : process (fsm_clk) is
begin
-- FFD registers clocked on rising edge and cleared on sync rst_i
if fsm_clk'event and fsm_clk = '1' then
if rst_i = '1' then -- sync reset
sh_reg <= (others => '0');
state_reg <= 0;
ena_ssel_reg <= '0';
ena_sck_reg <= '0';
do_buffer_reg <= (others => '0');
do_transfer_reg <= '0';
state_reg <= 0; -- only provide local reset for the state machine
else
sh_reg <= sh_next;
state_reg <= state_next;
ena_ssel_reg <= ena_ssel_next;
ena_sck_reg <= ena_sck_next;
do_buffer_reg <= do_buffer_next;
do_transfer_reg <= do_transfer_next;
state_reg <= state_next; -- state register
end if;
end if;
-- FFD registers clocked on rising edge
if fsm_clk'event and fsm_clk = '1' then
sh_reg <= sh_next; -- shift register
ena_ssel_reg <= ena_ssel_next; -- spi select enable
ena_sck_reg <= ena_sck_next; -- spi clock enable
do_buffer_reg <= do_buffer_next; -- registered output data buffer
do_transfer_reg <= do_transfer_next; -- output data transferred to buffer
di_req_reg <= di_req_next; -- input data request
wren_ack_reg <= wren_ack_next; -- wren ack for data load synchronization
end if;
end process core_reg_proc;
-- cross-clock registers change on half-cycle of sck (ffd with async clear)
-- this is to prevent fsm state change glitches causing setup time artifacts at async clk_i edges
cross_reg_proc : process (rst_i, fsm_clk, ena_ssel_reg) is
 
--=============================================================================================
-- CROSS-CLOCK PIPELINE TRANSFER LOGIC
--=============================================================================================
-- do_valid_o and di_req_o strobe output logic
-- this is a delayed pulse generator with a ripple-transfer FFD pipeline, that generates a
-- fixed-length delayed pulse for the output flags, at the parallel clock domain
out_transfer_proc : process ( clk_i, do_transfer_reg, di_req_reg,
do_valid_A, do_valid_B, do_valid_D,
di_req_o_A, di_req_o_B, di_req_o_D) is
begin
if ena_ssel_reg = '0' then
di_rdy_reg <= '1'; -- di_rdy true during idle
elsif fsm_clk'event and fsm_clk = '0' then -- on half-cycle edge, update cross registers
di_rdy_reg <= di_rdy_next;
if clk_i'event and clk_i = '1' then -- clock at parallel port clock
-- do_transfer_reg -> do_valid_o_reg
do_valid_A <= do_transfer_reg; -- the input signal must be at least 2 clocks long
do_valid_B <= do_valid_A; -- feed it to a ripple chain of FFDs
do_valid_C <= do_valid_B;
do_valid_D <= do_valid_C;
do_valid_o_reg <= do_valid_next; -- registered output pulse
--------------------------------
-- di_req_reg -> di_req_o_reg
di_req_o_A <= di_req_reg; -- the input signal must be at least 2 clocks long
di_req_o_B <= di_req_o_A; -- feed it to a ripple chain of FFDs
di_req_o_C <= di_req_o_B;
di_req_o_D <= di_req_o_C;
di_req_o_reg <= di_req_o_next; -- registered output pulse
end if;
if rst_i = '1' then
do_valid_reg <= '0'; -- async clear on do_valid
elsif fsm_clk'event and fsm_clk = '0' then -- on half-cycle edge, update cross registers
do_valid_reg <= do_valid_next;
end if;
end process cross_reg_proc;
-- parallel i/o interface registers: synchronous to the parallel interface clock
par_reg_proc : process (rst_i, par_clk_i, ena_ssel_reg) is
-- generate a 2-clocks pulse at the 3rd clock cycle
do_valid_next <= do_valid_A and do_valid_B and not do_valid_D;
di_req_o_next <= di_req_o_A and di_req_o_B and not di_req_o_D;
end process out_transfer_proc;
-- parallel load input registers: data register and write enable
in_transfer_proc: process (clk_i, wren_i, wren_ack_reg) is
begin
if par_clk_i'event and par_clk_i = '1' then
if rst_i = '1' then -- sync reset
di_rdy_oreg <= '0';
do_valid_oreg <= '0';
di_reg <= (others => '0');
else
di_rdy_oreg <= di_rdy_reg; -- di_rdy is synchronous to parallel interface clock
do_valid_oreg <= (do_valid_reg and ena_ssel_reg) or (do_transfer_reg and not ena_ssel_reg);
di_reg <= di_i; -- sample di_i at interface clock
-- registered data input, input register with clock enable
if clk_i'event and clk_i = '1' then
if wren_i = '1' then
di_reg <= di_i; -- parallel data input buffer register
end if;
end if;
end process par_reg_proc;
end if;
-- stretch wren pulse to be detected by spi fsm (ffd with sync preset and sync reset)
if clk_i'event and clk_i = '1' then
if wren_i = '1' then -- wren_i is the sync preset for wren
wren <= '1';
elsif wren_ack_reg = '1' then -- wren_ack is the sync reset for wren
wren <= '0';
end if;
end if;
end process in_transfer_proc;
 
--=============================================================================================
-- RTL COMBINATIONAL LOGIC PROCESSES
349,69 → 392,60
--=============================================================================================
-- state and datapath combinational logic
core_combi_proc : process ( sh_reg, state_reg, rx_bit_reg, ena_ssel_reg, ena_sck_reg, do_buffer_reg,
do_valid_reg, do_transfer_reg, di_reg, di_rdy_reg, wren_i ) is
do_transfer_reg, di_reg, wren) is
begin
sh_next <= sh_reg; -- all output signals are assigned to (avoid latches)
ena_ssel_next <= ena_ssel_reg;
ena_sck_next <= ena_sck_reg;
do_buffer_next <= do_buffer_reg;
do_valid_next <= do_valid_reg;
do_transfer_next <= do_transfer_reg;
di_rdy_next <= di_rdy_reg;
spi_mosi_o <= '0'; -- will output '0' when shifter is empty
state_next <= state_reg - 1; -- next state is next bit
ena_ssel_next <= ena_ssel_reg; -- controls the slave select line
ena_sck_next <= ena_sck_reg; -- controls the clock enable of spi sck line
do_buffer_next <= do_buffer_reg; -- output data buffer
do_transfer_next <= do_transfer_reg; -- output data flag
wren_ack_next <= '0'; -- remove data load ack for all but the load stages
di_req_next <= '0'; -- prefetch data request: deassert when shifting data
spi_mosi_o <= sh_reg(N-1); -- shift out tx bit from the MSb
state_next <= state_reg - 1; -- update next state at each sck pulse
case state_reg is
when (N+1) => -- this state is to enable SSEL before SCK
when (N+1) => -- this state is to enable SSEL before SCK
ena_ssel_next <= '1'; -- tx in progress: will assert SSEL
ena_sck_next <= '1'; -- enable SCK on next cycle (stays off on first SSEL clock cycle)
di_rdy_next <= '0'; -- deassert next-data request when shifting data
spi_mosi_o <= sh_reg(N-1); -- shift out tx bit from the MSb
when (N) => -- deassert 'di_rdy'
di_rdy_next <= '0'; -- deassert next-data request when shifting data
spi_mosi_o <= sh_reg(N-1); -- shift out tx bit from the MSb
when (N) => -- deassert 'di_rdy'
sh_next(N-1 downto 1) <= sh_reg(N-2 downto 0); -- shift inner bits
sh_next(0) <= rx_bit_reg; -- shift in rx bit into LSb
when (N-1) downto (PREFETCH+3) => -- if rx data is valid, raise 'do_valid'. remove 'do_transfer'
di_rdy_next <= '0'; -- deassert next-data request when start shifting
do_valid_next <= do_transfer_reg; -- assert valid rx data, with plenty of pipeline delay for 'do_buffer'
when (N-1) downto (PREFETCH+3) => -- if rx data is valid, raise 'do_valid'. remove 'do_transfer'
do_transfer_next <= '0'; -- reset transfer signal
spi_mosi_o <= sh_reg(N-1); -- shift out tx bit from the MSb
sh_next(N-1 downto 1) <= sh_reg(N-2 downto 0); -- shift inner bits
sh_next(0) <= rx_bit_reg; -- shift in rx bit into LSb
when (PREFETCH+2) downto 2 => -- raise prefetch 'di_rdy_next' signal and remove 'do_valid'
di_rdy_next <= '1'; -- request data in advance to allow for pipeline delays
do_valid_next <= '0'; -- make do_valid_o HIGH for one cycle only
spi_mosi_o <= sh_reg(N-1); -- shift out tx bit from the MSb
when (PREFETCH+2) downto 2 => -- raise prefetch 'di_req_o_next' signal and remove 'do_valid'
di_req_next <= '1'; -- request data in advance to allow for pipeline delays
sh_next(N-1 downto 1) <= sh_reg(N-2 downto 0); -- shift inner bits
sh_next(0) <= rx_bit_reg; -- shift in rx bit into LSb
when 1 => -- transfer rx data to do_buffer and restart if wren
when 1 => -- transfer rx data to do_buffer and restart if wren
di_req_next <= '1'; -- request data in advance to allow for pipeline delays
do_buffer_next(N-1 downto 1) <= sh_reg(N-2 downto 0); -- shift rx data directly into rx buffer
do_buffer_next(0) <= rx_bit_reg; -- shift last rx bit into rx buffer
do_transfer_next <= '1'; -- signal transfer to do_buffer
spi_mosi_o <= sh_reg(N-1); -- shift out last tx bit from the MSb
if wren_i = '1' then -- load tx register if valid data present at di_i
if wren = '1' then -- load tx register if valid data present at di_i
state_next <= N; -- next state is top bit of new data
sh_next <= di_reg; -- load parallel data from di_reg into shifter
ena_sck_next <= '1'; -- SCK enabled
wren_ack_next <= '1'; -- acknowledge data in transfer
else
ena_sck_next <= '0'; -- SCK disabled: tx empty, no data to send
end if;
when 0 =>
di_req_next <= '1'; -- will request data if shifter empty
ena_sck_next <= '0'; -- SCK disabled: tx empty, no data to send
di_rdy_next <= '1'; -- will request data if shifter empty
do_valid_next <= do_transfer_reg; -- assert valid rx data after data received, when interface idle
if wren_i = '1' then -- load tx register if valid data present at di_i
if wren = '1' then -- load tx register if valid data present at di_i
ena_ssel_next <= '1'; -- enable interface SSEL
state_next <= N+1; -- start from idle: let one cycle for SSEL settling
do_valid_next <= '0'; -- start: clear rx data valid signal
spi_mosi_o <= di_reg(N-1); -- shift out first tx bit from the MSb
spi_mosi_o <= di_reg(N-1); -- special case: shift out first tx bit from the MSb (look ahead)
sh_next <= di_reg; -- load bits from di_reg into shifter
wren_ack_next <= '1'; -- acknowledge data in transfer
else
ena_ssel_next <= '0'; -- deassert SSEL: interface is idle
state_next <= 0; -- when idle, keep this state
end if;
when others =>
null;
state_next <= 0; -- state 0 is safe state
end case;
end process core_combi_proc;
 
418,19 → 452,38
--=============================================================================================
-- OUTPUT LOGIC PROCESSES
--=============================================================================================
-- output signal connections
spi_ssel_proc: spi_ssel_o <= not ena_ssel_reg; -- drive active-low slave select line
do_proc : do_o <= do_buffer_reg; -- do_o always available
do_valid_proc: do_valid_o <= do_valid_oreg; -- copy registered do_valid_o to output
di_rdy_proc: di_rdy_o <= di_rdy_oreg; -- copy registered di_rdy_o to output
-- data output processes
spi_ssel_o_proc: spi_ssel_o <= not ena_ssel_reg; -- drive active-low slave select line
do_o_proc : do_o <= do_buffer_reg; -- do_o always available
do_valid_o_proc: do_valid_o <= do_valid_o_reg; -- copy registered do_valid_o to output
di_req_o_proc: di_req_o <= di_req_o_reg; -- copy registered di_req_o to output
-----------------------------------------------------------------------------------------------
-- SCK out logic: output mux for the SPI sck
--------------------------------------------
-- This is modelled as a mux instead of a register because it requires a FDCPE (ffd with preset and clear),
-- which generates very inneficient logic in Spartan-6. Instead, we have a mux that translates to a AND gate,
-- and can be optimized to a fast CLB gate.
spi_sck_gen_proc : process (ena_sck_reg, spi_clk) is
begin
if ena_sck_reg = '1' then
spi_sck_o <= spi_clk; -- copy the selected clock polarity
else
spi_sck_o <= CPOL; -- when clock disabled, set to idle polarity
end if;
end process spi_sck_gen_proc;
 
--=============================================================================================
-- DEBUG LOGIC PROCESSES
--=============================================================================================
-- these signals are useful for verification, and can be deleted or commented-out after debug.
do_transfer_proc: do_transfer_o <= do_transfer_reg;
state_dbg_proc: state_dbg_o <= std_logic_vector(to_unsigned(state_reg, 6)); -- export internal state to debug
rx_bit_reg_proc: rx_bit_reg_o <= rx_bit_reg;
wren_o_proc: wren_o <= wren;
wren_ack_o_proc: wren_ack_o <= wren_ack_reg;
sh_reg_dbg_proc: sh_reg_dbg_o <= sh_reg; -- export sh_reg to debug
rx_bit_reg_proc: rx_bit_reg_o <= rx_bit_reg;
core_clk_o_proc: core_clk_o <= core_clk;
core_n_clk_o_proc: core_n_clk_o <= core_n_clk;
 
end architecture RTL;
 
/readme.txt
7,12 → 7,16
spi_slave.vhd spi slave module, can be used independently
spi_loopback.vhd wrapper module for the master and slave modules
spi_loopback_test.vhd testbench for the loopback module, test master against slave
spi_loopback.ucf constraints file for Spartan-6, optimized for area, LUT compression.
 
 
The original development is done in Xilinx ISE 13.1, targeted to a Spartan-6 device.
 
Verification was done in ISIM, after Place & Route, with default constraints, for the slowest
Spartan-6 device, tested at 50MHz for the spi_2x_clk (25MHz spi SCK), and 125MHz for the parallel
interfaces clocks.
Spartan-6 device, synthesis generated 59 slices, and the design was tested at 40MHz for the
spi_2x_clk (20MHz spi SCK), and 125MHz for the parallel interfaces clocks.
With the attached .ucf file, optimized for area and using LUT compression, synthesis generated
44 slices, and design tested OK at 20MHz of SPI clock.
 
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.