URL
https://opencores.org/ocsvn/spi_master_slave/spi_master_slave/trunk
Subversion Repositories spi_master_slave
Compare Revisions
- This comparison shows the changes necessary to convert path
/spi_master_slave/trunk/rtl
- from Rev 3 to Rev 4
- ↔ Reverse comparison
Rev 3 → Rev 4
/spi_loopback.ucf
1,13 → 1,18
|
#Created by Constraints Editor (xc6slx45t-csg484-3) - 2011/06/05 |
NET "m_spi_clk_i" TNM_NET = m_spi_clk_i; |
TIMESPEC TS_m_spi_clk_i = PERIOD "m_spi_clk_i" 20 ns HIGH 50%; |
#Created by Constraints Editor (xc6slx45t-csg484-3) - 2011/06/08 |
NET "m_spi_2x_clk_i" TNM_NET = m_spi_2x_clk_i; |
TIMESPEC TS_m_spi_2x_clk_i = PERIOD "m_spi_2x_clk_i" 15 ns HIGH 50%; |
NET "s_clk_i" TNM_NET = s_clk_i; |
TIMESPEC TS_s_clk_i = PERIOD "s_clk_i" 8 ns HIGH 50%; |
#Created by Constraints Editor (xc6slx45t-csg484-3) - 2011/06/08 |
NET "m_clk_i" TNM_NET = m_clk_i; |
TIMESPEC TS_m_clk_i = PERIOD "m_clk_i" 8 ns HIGH 50%; |
NET "s_spi_sck_i" TNM_NET = s_spi_sck_i; |
TIMESPEC TS_s_spi_sck_i = PERIOD "s_spi_sck_i" 40 ns HIGH 50%; |
NET "m_par_clk_i" TNM_NET = m_par_clk_i; |
TIMESPEC TS_m_par_clk_i = PERIOD "m_par_clk_i" 10 ns HIGH 50%; |
NET "s_clk_i" TNM_NET = s_clk_i; |
TIMESPEC TS_s_clk_i = PERIOD "s_clk_i" 10 ns HIGH 50%; |
TIMESPEC TS_s_spi_sck_i = PERIOD "s_spi_sck_i" 30 ns HIGH 50%; |
NET "m_spi_sck_o_OBUF" TNM_NET = m_spi_sck_o_OBUF; |
TIMESPEC TS_m_spi_sck_o_OBUF = PERIOD "m_spi_sck_o_OBUF" 30 ns HIGH 50%; |
NET "Inst_spi_master/core_n_clk" TNM_NET = Inst_spi_master/core_n_clk; |
TIMESPEC TS_Inst_spi_master_core_n_clk = PERIOD "Inst_spi_master/core_n_clk" 30 ns HIGH 50%; |
INST "m_di_i<0>" TNM = m_di; |
INST "m_di_i<1>" TNM = m_di; |
INST "m_di_i<2>" TNM = m_di; |
40,7 → 45,8
INST "m_di_i<29>" TNM = m_di; |
INST "m_di_i<30>" TNM = m_di; |
INST "m_di_i<31>" TNM = m_di; |
TIMEGRP "m_di" OFFSET = IN 10 ns VALID 10 ns BEFORE "m_par_clk_i" RISING; |
TIMEGRP "m_di" OFFSET = IN 8 ns VALID 8 ns BEFORE "m_clk_i" RISING; |
INST "m_spi_miso_i" TNM = m_miso; |
INST "s_di_i<0>" TNM = s_di; |
INST "s_di_i<1>" TNM = s_di; |
INST "s_di_i<2>" TNM = s_di; |
73,7 → 79,39
INST "s_di_i<29>" TNM = s_di; |
INST "s_di_i<30>" TNM = s_di; |
INST "s_di_i<31>" TNM = s_di; |
TIMEGRP "s_di" OFFSET = IN 10 ns VALID 10 ns BEFORE "s_clk_i" RISING; |
NET "s_spi_mosi_i" OFFSET = IN 20 ns VALID 20 ns BEFORE "s_spi_sck_i" RISING; |
#Created by Constraints Editor (xc6slx45t-csg484-3) - 2011/06/05 |
NET "s_spi_miso_o" OFFSET = OUT 40 ns AFTER "s_spi_sck_i"; |
TIMEGRP "s_di" OFFSET = IN 8 ns VALID 8 ns BEFORE "s_clk_i" RISING; |
INST "s_spi_mosi_i" TNM = s_mosi; |
INST "m_do_o<0>" TNM = m_do; |
INST "m_do_o<1>" TNM = m_do; |
INST "m_do_o<2>" TNM = m_do; |
INST "m_do_o<3>" TNM = m_do; |
INST "m_do_o<4>" TNM = m_do; |
INST "m_do_o<5>" TNM = m_do; |
INST "m_do_o<6>" TNM = m_do; |
INST "m_do_o<7>" TNM = m_do; |
INST "m_do_o<8>" TNM = m_do; |
INST "m_do_o<9>" TNM = m_do; |
INST "m_do_o<10>" TNM = m_do; |
INST "m_do_o<11>" TNM = m_do; |
INST "m_do_o<12>" TNM = m_do; |
INST "m_do_o<13>" TNM = m_do; |
INST "m_do_o<14>" TNM = m_do; |
INST "m_do_o<15>" TNM = m_do; |
INST "m_do_o<16>" TNM = m_do; |
INST "m_do_o<17>" TNM = m_do; |
INST "m_do_o<18>" TNM = m_do; |
INST "m_do_o<19>" TNM = m_do; |
INST "m_do_o<20>" TNM = m_do; |
INST "m_do_o<21>" TNM = m_do; |
INST "m_do_o<22>" TNM = m_do; |
INST "m_do_o<23>" TNM = m_do; |
INST "m_do_o<24>" TNM = m_do; |
INST "m_do_o<25>" TNM = m_do; |
INST "m_do_o<26>" TNM = m_do; |
INST "m_do_o<27>" TNM = m_do; |
INST "m_do_o<28>" TNM = m_do; |
INST "m_do_o<29>" TNM = m_do; |
INST "m_do_o<30>" TNM = m_do; |
INST "m_do_o<31>" TNM = m_do; |
#Created by Constraints Editor (xc6slx45t-csg484-3) - 2011/06/09 |
INST "m_rx_bit_reg_o" TNM = m_rx_bit; |
/spi_loopback.vhd
36,102 → 36,118
Generic ( |
N : positive := 32; -- 32bit serial word length is default |
CPOL : std_logic := '0'; -- SPI mode selection (mode 0 default) |
CPHA : std_logic := '0'; -- CPOL = clock polarity, CPHA = clock phase. |
PREFETCH : positive := 1 -- prefetch lookahead cycles |
CPHA : std_logic := '1'; -- CPOL = clock polarity, CPHA = clock phase. |
PREFETCH : positive := 2 -- prefetch lookahead cycles |
); |
Port( |
----------------MASTER----------------------- |
m_spi_clk_i : IN std_logic; |
m_par_clk_i : IN std_logic; |
m_spi_2x_clk_i : IN std_logic; |
m_clk_i : IN std_logic; |
m_rst_i : IN std_logic; |
m_spi_ssel_o : OUT std_logic; |
m_spi_sck_o : OUT std_logic; |
m_spi_mosi_o : OUT std_logic; |
m_spi_miso_i : IN std_logic; |
m_di_req_o : OUT std_logic; |
m_di_i : IN std_logic_vector(N-1 downto 0); |
m_do_o : OUT std_logic_vector(N-1 downto 0); |
m_di_rdy_o : OUT std_logic; |
m_wren_i : IN std_logic; |
m_do_valid_o : OUT std_logic; |
m_do_o : OUT std_logic_vector(N-1 downto 0); |
----- debug ----- |
m_do_transfer_o : OUT std_logic; |
m_wren_o : OUT std_logic; |
m_wren_ack_o : OUT std_logic; |
m_rx_bit_reg_o : OUT std_logic; |
m_state_dbg_o : OUT std_logic_vector(5 downto 0); |
m_rx_bit_reg_o : OUT std_logic; |
m_core_clk_o : OUT std_logic; |
m_core_n_clk_o : OUT std_logic; |
m_sh_reg_dbg_o : OUT std_logic_vector(N-1 downto 0); |
----------------SLAVE----------------------- |
s_clk_i : IN std_logic; |
s_rst_i : IN std_logic; |
s_spi_ssel_i : IN std_logic; |
s_spi_sck_i : IN std_logic; |
s_spi_mosi_i : IN std_logic; |
s_spi_miso_o : OUT std_logic; |
s_di_i : IN std_logic_vector(N-1 downto 0); |
s_do_o : OUT std_logic_vector(N-1 downto 0); |
s_di_rdy_o : OUT std_logic; |
s_wren_i : IN std_logic; |
s_do_valid_o : OUT std_logic; |
s_do_transfer_o : OUT std_logic; |
s_state_dbg_o : OUT std_logic_vector(5 downto 0) |
-- s_sh_reg_dbg_o : OUT std_logic_vector(31 downto 0) |
s_clk_i : IN std_logic; |
s_spi_ssel_i : IN std_logic; |
s_spi_sck_i : IN std_logic; |
s_spi_mosi_i : IN std_logic; |
s_spi_miso_o : OUT std_logic; |
s_di_req_o : OUT std_logic; -- preload lookahead data request line |
s_di_i : IN std_logic_vector (N-1 downto 0) := (others => 'X'); -- parallel load data in (clocked in on rising edge of clk_i) |
s_wren_i : IN std_logic := 'X'; -- user data write enable |
s_do_valid_o : OUT std_logic; -- do_o data valid strobe, valid during one clk_i rising edge. |
s_do_o : OUT std_logic_vector (N-1 downto 0); -- parallel output (clocked out on falling clk_i) |
----- debug ----- |
s_do_transfer_o : OUT std_logic; -- debug: internal transfer driver |
s_wren_o : OUT std_logic; |
s_wren_ack_o : OUT std_logic; |
s_rx_bit_reg_o : OUT std_logic; |
s_state_dbg_o : OUT std_logic_vector (5 downto 0) -- debug: internal state register |
-- s_sh_reg_dbg_o : OUT std_logic_vector (N-1 downto 0) -- debug: internal shift register |
); |
end spi_loopback; |
|
architecture Structural of spi_loopback is |
|
COMPONENT spi_master |
COMPONENT spi_master |
GENERIC ( |
N : positive := 32; |
CPOL : std_logic := '0'; |
CPHA : std_logic := '0'; |
PREFETCH : positive := 1 |
CPHA : std_logic := '1'; |
PREFETCH : positive := 2 |
); |
PORT( |
spi_2x_clk_i : IN std_logic; |
par_clk_i : IN std_logic; |
rst_i : IN std_logic; |
spi_miso_i : IN std_logic; |
di_i : IN std_logic_vector(N-1 downto 0); |
wren_i : IN std_logic; |
spi_ssel_o : OUT std_logic; |
spi_sck_o : OUT std_logic; |
spi_mosi_o : OUT std_logic; |
do_o : OUT std_logic_vector(N-1 downto 0); |
di_rdy_o : OUT std_logic; |
do_valid_o : OUT std_logic; |
PORT( |
spi_2x_clk_i : IN std_logic; |
clk_i : IN std_logic; |
rst_i : IN std_logic; |
spi_ssel_o : OUT std_logic; |
spi_sck_o : OUT std_logic; |
spi_mosi_o : OUT std_logic; |
spi_miso_i : IN std_logic; |
di_req_o : OUT std_logic; |
di_i : IN std_logic_vector(N-1 downto 0); |
wren_i : IN std_logic; |
do_valid_o : OUT std_logic; |
do_o : OUT std_logic_vector(N-1 downto 0); |
----- debug ----- |
do_transfer_o : OUT std_logic; |
state_dbg_o : OUT std_logic_vector(5 downto 0); |
wren_o : OUT std_logic; |
wren_ack_o : OUT std_logic; |
rx_bit_reg_o : OUT std_logic; |
sh_reg_dbg_o : OUT std_logic_vector(N-1 downto 0) |
); |
END COMPONENT; |
state_dbg_o : OUT std_logic_vector(5 downto 0); |
core_clk_o : OUT std_logic; |
core_n_clk_o : OUT std_logic; |
sh_reg_dbg_o : OUT std_logic_vector(N-1 downto 0) |
); |
END COMPONENT; |
|
COMPONENT spi_slave |
COMPONENT spi_slave |
GENERIC ( |
N : positive := 32; |
CPOL : std_logic := '0'; |
CPHA : std_logic := '0'; |
PREFETCH : positive := 1 |
CPHA : std_logic := '1'; |
PREFETCH : positive := 2 |
); |
PORT( |
clk_i : IN std_logic; |
rst_i : IN std_logic; |
spi_ssel_i : IN std_logic; |
spi_sck_i : IN std_logic; |
spi_mosi_i : IN std_logic; |
di_i : IN std_logic_vector(N-1 downto 0); |
wren_i : IN std_logic; |
spi_miso_o : OUT std_logic; |
do_o : OUT std_logic_vector(N-1 downto 0); |
di_rdy_o : OUT std_logic; |
do_valid_o : OUT std_logic; |
PORT( |
clk_i : IN std_logic; |
spi_ssel_i : IN std_logic; |
spi_sck_i : IN std_logic; |
spi_mosi_i : IN std_logic; |
spi_miso_o : OUT std_logic; |
di_req_o : OUT std_logic; |
di_i : IN std_logic_vector(N-1 downto 0); |
wren_i : IN std_logic; |
do_valid_o : OUT std_logic; |
do_o : OUT std_logic_vector(N-1 downto 0); |
----- debug ----- |
do_transfer_o : OUT std_logic; |
state_dbg_o : OUT std_logic_vector(5 downto 0) |
wren_o : OUT std_logic; |
wren_ack_o : OUT std_logic; |
rx_bit_reg_o : OUT std_logic; |
state_dbg_o : OUT std_logic_vector(5 downto 0) |
-- sh_reg_dbg_o : OUT std_logic_vector(N-1 downto 0) |
); |
END COMPONENT; |
); |
END COMPONENT; |
|
begin |
|
Inst_spi_master: spi_master |
Inst_spi_master: spi_master |
GENERIC MAP ( |
N => N, |
CPOL => CPOL, |
138,25 → 154,30
CPHA => CPHA, |
PREFETCH => PREFETCH) |
PORT MAP( |
spi_2x_clk_i => m_spi_clk_i, |
par_clk_i => m_par_clk_i, |
rst_i => m_rst_i, |
spi_ssel_o => m_spi_ssel_o, |
spi_sck_o => m_spi_sck_o, |
spi_mosi_o => m_spi_mosi_o, |
spi_miso_i => m_spi_miso_i, |
di_i => m_di_i, |
do_o => m_do_o, |
di_rdy_o => m_di_rdy_o, |
wren_i => m_wren_i, |
do_valid_o => m_do_valid_o, |
spi_2x_clk_i => m_spi_2x_clk_i, |
clk_i => m_clk_i, |
rst_i => m_rst_i, |
spi_ssel_o => m_spi_ssel_o, |
spi_sck_o => m_spi_sck_o, |
spi_mosi_o => m_spi_mosi_o, |
spi_miso_i => m_spi_miso_i, |
di_req_o => m_di_req_o, |
di_i => m_di_i, |
wren_i => m_wren_i, |
do_valid_o => m_do_valid_o, |
do_o => m_do_o, |
----- debug ----- |
do_transfer_o => m_do_transfer_o, |
state_dbg_o => m_state_dbg_o, |
wren_o => m_wren_o, |
wren_ack_o => m_wren_ack_o, |
rx_bit_reg_o => m_rx_bit_reg_o, |
sh_reg_dbg_o => m_sh_reg_dbg_o |
); |
state_dbg_o => m_state_dbg_o, |
core_clk_o => m_core_clk_o, |
core_n_clk_o => m_core_n_clk_o, |
sh_reg_dbg_o => m_sh_reg_dbg_o |
); |
|
Inst_spi_slave: spi_slave |
Inst_spi_slave: spi_slave |
GENERIC MAP ( |
N => N, |
CPOL => CPOL, |
163,21 → 184,24
CPHA => CPHA, |
PREFETCH => PREFETCH) |
PORT MAP( |
clk_i => s_clk_i, |
rst_i => s_rst_i, |
spi_ssel_i => s_spi_ssel_i, |
spi_sck_i => s_spi_sck_i, |
spi_mosi_i => s_spi_mosi_i, |
spi_miso_o => s_spi_miso_o, |
di_i => s_di_i, |
do_o => s_do_o, |
di_rdy_o => s_di_rdy_o, |
wren_i => s_wren_i, |
do_valid_o => s_do_valid_o, |
clk_i => s_clk_i, |
spi_ssel_i => s_spi_ssel_i, |
spi_sck_i => s_spi_sck_i, |
spi_mosi_i => s_spi_mosi_i, |
spi_miso_o => s_spi_miso_o, |
di_req_o => s_di_req_o, |
di_i => s_di_i, |
wren_i => s_wren_i, |
do_valid_o => s_do_valid_o, |
do_o => s_do_o, |
----- debug ----- |
do_transfer_o => s_do_transfer_o, |
state_dbg_o => s_state_dbg_o |
wren_o => s_wren_o, |
wren_ack_o => s_wren_ack_o, |
rx_bit_reg_o => s_rx_bit_reg_o, |
state_dbg_o => s_state_dbg_o |
-- sh_reg_dbg_o => s_sh_reg_dbg_o |
); |
); |
|
end Structural; |
|
/spi_slave.vhd
11,20 → 11,22
-- This block is the SPI slave interface, implemented in one single entity. |
-- All internal core operations are synchronous to the external SPI clock, and follows the general SPI de-facto standard. |
-- The parallel read/write interface is synchronous to a supplied system master clock, 'clk_i'. |
-- To avoid async glitches caused by setup violations between the core registers and the parallel i/o registers, |
-- access to the parallel ports 'di_i' and 'do_o' must be synchronized with the 'di_rdi_o' and 'do_valid_o' signals. |
-- Synchronization for the parallel ports is provided by input data request and write enable lines, and output data valid line. |
-- |
-- The block is very simple to use, and has parallel inputs and outputs that behave like a synchronous memory i/o. |
-- It is parameterizable for the data width ('N'), SPI mode via generics (CPHA and CPOL), and lookahead prefetch |
-- It is parameterizable via generics for the data width ('N'), SPI mode (CPHA and CPOL), and lookahead prefetch |
-- signaling ('PREFETCH'). |
-- |
-- PARALLEL WRITE INTERFACE |
-- The parallel interface has a input port 'di_i' and an output port 'do_o'. |
-- Parallel load is controlled using 3 signals: 'di_i', 'di_rdy_o' and 'wren_i'. 'di_rdy_o' is a look ahead data request line, |
-- that is set 'PREFETCH' 'spi_sck_i' cycles in advance to synchronize a pipelined memory or fifo to present the |
-- next input data at 'di_i' in time to have continuous clock at the spi bus, to allow back-to-back continuous load. |
-- Parallel load is controlled using 3 signals: 'di_i', 'di_req_o' and 'wren_i'. |
-- When the core needs input data, a look ahead data request strobe , 'di_req_o' is pulsed 'PREFETCH' 'spi_sck_i' |
-- cycles in advance to synchronize a user pipelined memory or fifo to present the next input data at 'di_i' |
-- in time to have continuous clock at the spi bus, to allow back-to-back continuous load. |
-- The data request strobe on 'di_req_o' is 2 'clk_i' clock cycles long. |
-- The write to 'di_i' must occur at most one 'spi_sck_i' cycle before actual load to the core shift register, to avoid |
-- race conditions at the register transfer. |
-- The user circuit places data at the 'di_i' port and strobes the 'wren_i' line for one rising edge of 'clk_i'. |
-- For a pipelined sync RAM, a PREFETCH of 3 cycles allows an address generator to present the new adress to the RAM in one |
-- cycle, and the RAM to respond in one more cycle, in time for 'di_i' to be latched by the interface one clock before transfer. |
-- If the user sequencer needs a different value for PREFETCH, the generic can be altered at instantiation time. |
34,35 → 36,36
-- |
-- PARALLEL WRITE PIPELINED SEQUENCE |
-- ================================= |
-- __ __ __ __ __ __ __ __ __ __ __ |
-- clk_i __/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \__... -- parallel interface clock |
-- ___________________________________ |
-- di_rdy_o ________/ \________________... -- 'di_rdy_o' asserted on rising edge of 'clk_par_i' |
-- ______________ ______________________________________________... |
-- di_i __old_data____X__________new_data____________________________... -- user circuit loads data on 'di_i' at next rising edge |
-- ________________________________ -- user circuit asserts 'wren_i' at next edge, and removes |
-- wren_i __________________/ \_______... -- 'wren_i' after 'di_rdy_o' is removed |
-- __ __ __ __ __ __ __ |
-- clk_i __/ \__/ \__/ \__/ \__/ \__/ \__/ \... -- parallel interface clock |
-- ___________ |
-- di_req_o ________/ \_____________________... -- 'di_req_o' asserted on rising edge of 'clk_i' |
-- ______________ ___________________________... |
-- di_i __old_data____X______new_data_____________... -- user circuit loads data on 'di_i' at next 'clk_i' rising edge |
-- ________ |
-- wren_i __________________________/ \______... -- 'wren_i' enables latch on rising edge of 'clk_i' |
-- |
-- |
-- PARALLEL READ INTERFACE |
-- An internal buffer is used to copy the internal shift register data to drive the 'do_o' port. When a complete word is received, |
-- the core shift register is transferred to the buffer, at the rising edge of the spi clock, 'spi_sck_i'. |
-- The signal 'do_valid_o' is set one 'spi_sck_i' clock after, to directly drive a synchronous memory or fifo write enable. |
-- An internal buffer is used to copy the internal shift register data to drive the 'do_o' port. When a complete |
-- word is received, the core shift register is transferred to the buffer, at the rising edge of the spi clock, 'spi_sck_i'. |
-- The signal 'do_valid_o' is strobed 3 'clk_i' clocks after, to directly drive a synchronous memory or fifo write enable. |
-- 'do_valid_o' is synchronous to the parallel interface clock, and changes only on rising edges of 'clk_i'. |
-- When the interface is idle, data at the 'do_o' port holds the last word received. |
-- |
-- PARALLEL READ PIPELINED SEQUENCE |
-- ================================ |
-- ______ ______ ______ ______ _ |
-- clk_spi_i ___/ bit1 \______/ bitN \______/bitN-1\______/bitN-2\______/b... -- spi base clock |
-- __ __ __ __ __ __ __ __ __ __ |
-- clk_par_i __/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \_... -- parallel interface clock |
-- _________________ ___________________________________________... -- 1) received data is transferred to 'do_buffer_reg' |
-- do_o __old_data_______X__________new_data_________________________... -- after last bit received, at 'clk_spi_i' rising edge. |
-- _________________ -- 2) 'do_valid_o' asserted on rising edge of 'clk_par_i', |
-- do_valid_o ______________________________________/ \____... -- at next bit (bit N-1) of the SPI transfer. |
-- |
-- ______ ______ ______ ______ |
-- clk_spi_i ___/ bit1 \______/ bitN \______/bitN-1\______/bitN-2\__... -- spi base clock |
-- __ __ __ __ __ __ __ __ __ |
-- clk_i __/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \_... -- parallel interface clock |
-- _________________ _____________________________________... -- 1) received data is transferred to 'do_buffer_reg' |
-- do_o __old_data_______X__________new_data___________________... -- after last bit received, at next shift clock. |
-- ____________ |
-- do_valid_o ________________________________/ \_________... -- 2) 'do_valid_o' strobed for 2 'clk_i' cycles |
-- -- on the 3rd 'clk_i' rising edge. |
-- |
-- |
-- This design was originally targeted to a Spartan-6 platform, synthesized with XST and normal constraints. |
-- |
------------------------------ COPYRIGHT NOTICE ----------------------------------------------------------------------- |
97,6 → 100,10
-- 2011/05/18 v0.95.0050 [JD] clock generation circuitry, with generators for all-rising-edge clock core. |
-- 2011/06/05 v0.96.0053 [JD] changed async clear to sync resets. |
-- 2011/06/07 v0.97.0065 [JD] added cross-clock buffers, fixed fsm async glitches. |
-- 2011/06/09 v0.97.0068 [JD] reduced control sets (resets, CE, presets) to the absolute minimum to operate, to reduce |
-- synthesis LUT overhead in Spartan-6 architecture. |
-- 2011/06/11 v0.97.0075 [JD] redesigned all parallel data interfacing ports, and implemented cross-clock strobe logic. |
-- 2011/06/12 v0.97.0079 [JD] implemented wren_ack and di_req logic for state 0, and eliminated unnecessary registers reset. |
-- |
-- |
----------------------------------------------------------------------------------------------------------------------- |
103,10 → 110,7
-- TODO |
-- ==== |
-- |
-- - DEBUG_PACKAGE: |
-- - package to export signals to the verification testbench |
-- |
-- |
----------------------------------------------------------------------------------------------------------------------- |
library IEEE; |
use IEEE.STD_LOGIC_1164.ALL; |
118,22 → 122,23
N : positive := 32; -- 32bit serial word length is default |
CPOL : std_logic := '0'; -- SPI mode selection (mode 0 default) |
CPHA : std_logic := '0'; -- CPOL = clock polarity, CPHA = clock phase. |
PREFETCH : positive := 1); -- prefetch lookahead cycles |
PREFETCH : positive := 2); -- prefetch lookahead cycles |
Port ( |
clk_i : in std_logic := 'X'; -- internal interface clock (clocks di/do registers) |
rst_i : in std_logic := 'X'; -- synchronous rst_i: clear registers at clk_i rising edge |
spi_ssel_i : in std_logic; -- spi bus slave select line |
spi_sck_i : in std_logic; -- spi bus sck clock (clocks the shift register core) |
spi_mosi_i : in std_logic; -- spi bus mosi input |
spi_ssel_i : in std_logic := 'X'; -- spi bus slave select line |
spi_sck_i : in std_logic := 'X'; -- spi bus sck clock (clocks the shift register core) |
spi_mosi_i : in std_logic := 'X'; -- spi bus mosi input |
spi_miso_o : out std_logic := 'X'; -- spi bus spi_miso_o output |
di_req_o : out std_logic; -- preload lookahead data request line |
di_i : in std_logic_vector (N-1 downto 0) := (others => 'X'); -- parallel load data in (clocked in on rising edge of clk_i) |
wren_i : in std_logic := 'X'; -- user data write enable |
do_valid_o : out std_logic; -- do_o data valid strobe, valid during one clk_i rising edge. |
do_o : out std_logic_vector (N-1 downto 0); -- parallel output (clocked out on falling clk_i) |
di_rdy_o : out std_logic; -- preload lookahead: HIGH when ready for new input data |
wren_i : in std_logic := 'X'; -- write enable (write di_i data at next rising clk_i edge) |
-- wren_i starts transmission. must be valid 1 clk_i cycle |
-- before current transmission ends. |
do_valid_o : out std_logic; -- do_o data valid signal, valid during one clk_i rising edge. |
--- debug ports: can be removed for the application circuit --- |
do_transfer_o : out std_logic; -- debug: internal transfer driver |
wren_o : out std_logic; -- debug: internal state of the wren_i pulse stretcher |
wren_ack_o : out std_logic; -- debug: wren ack from state machine |
rx_bit_reg_o : out std_logic; -- debug: internal rx bit |
state_dbg_o : out std_logic_vector (5 downto 0) -- debug: internal state register |
-- sh_reg_dbg_o : out std_logic_vector (N-1 downto 0) -- debug: internal shift register |
); |
149,29 → 154,52
constant SAMPLE_EDGE : std_logic := (CPOL xnor CPHA); |
constant SAMPLE_LEVEL : std_logic := SAMPLE_EDGE; |
constant SHIFT_EDGE : std_logic := (CPOL xor CPHA); |
-- |
-- GLOBAL RESET: |
-- all signals are initialized to zero at GSR (global set/reset) by giving explicit |
-- initialization values at declaration. This is needed for all Xilinx FPGAs, and |
-- especially for the Spartan-6 and newer CLB architectures, where a local reset can |
-- reduce the usability of the slice registers, due to the need to share the control |
-- set (RESET/PRESET, CLOCK ENABLE and CLOCK) by all 8 registers in a slice. |
-- By using GSR for the initialization, and reducing RESET local init to the bare |
-- essential, the model achieves better LUT/FF packing and CLB usability. |
-- |
-- internal state signals for register and combinational stages |
signal state_next : natural range N+1 downto 0 := 0; |
signal state_reg : natural range N+1 downto 0 := 0; |
signal state_next : natural range N+1 downto 0 := 0; |
-- shifter signals for register and combinational stages |
signal sh_reg : std_logic_vector (N-1 downto 0); |
signal sh_next : std_logic_vector (N-1 downto 0); |
signal sh_next : std_logic_vector (N-1 downto 0) := (others => '0'); |
signal sh_reg : std_logic_vector (N-1 downto 0) := (others => '0'); |
-- input bit sampled buffer |
signal rx_bit_reg : std_logic; |
signal di_reg : std_logic_vector (N-1 downto 0); |
signal rx_bit_reg : std_logic := '0'; |
-- buffered di_i data signals for register and combinational stages |
signal di_reg : std_logic_vector (N-1 downto 0) := (others => '0'); |
-- internal wren_i stretcher for fsm combinational stage |
signal wren : std_logic := '0'; |
signal wren_ack_next : std_logic := '0'; |
signal wren_ack_reg : std_logic := '0'; |
-- buffered do_o data signals for register and combinational stages |
signal do_buffer_reg : std_logic_vector (N-1 downto 0); |
signal do_buffer_next : std_logic_vector (N-1 downto 0); |
signal do_buffer_next : std_logic_vector (N-1 downto 0) := (others => '0'); |
signal do_buffer_reg : std_logic_vector (N-1 downto 0) := (others => '0'); |
-- internal signal to flag transfer to do_buffer_reg |
signal do_transfer_reg : std_logic; |
signal do_transfer_next : std_logic; |
-- internal registered do_valid_o |
signal do_valid_oreg : std_logic; |
signal do_valid_reg : std_logic; |
signal do_valid_next : std_logic; |
-- internal registered di_rdy_o |
signal di_rdy_oreg : std_logic; |
signal di_rdy_reg : std_logic; |
signal di_rdy_next : std_logic; |
signal do_transfer_next : std_logic := '0'; |
signal do_transfer_reg : std_logic := '0'; |
-- internal input data request signal |
signal di_req : std_logic := '0'; |
-- cross-clock do_valid_o logic |
signal do_valid_next : std_logic := '0'; |
signal do_valid_A : std_logic := '0'; |
signal do_valid_B : std_logic := '0'; |
signal do_valid_C : std_logic := '0'; |
signal do_valid_D : std_logic := '0'; |
signal do_valid_o_reg : std_logic := '0'; |
-- cross-clock di_req_o logic |
signal di_req_o_next : std_logic := '0'; |
signal di_req_o_A : std_logic := '0'; |
signal di_req_o_B : std_logic := '0'; |
signal di_req_o_C : std_logic := '0'; |
signal di_req_o_D : std_logic := '0'; |
signal di_req_o_reg : std_logic := '0'; |
begin |
--============================================================================================= |
-- GENERICS CONSTRAINTS CHECKING |
186,9 → 214,9
severity FAILURE; |
|
--============================================================================================= |
-- RTL REGISTER PROCESSES |
-- REGISTERED INPUTS |
--============================================================================================= |
-- capture rx bit at SAMPLE edge of sck |
-- rx bit flop: capture rx bit after SAMPLE edge of sck |
rx_bit_proc : process (spi_sck_i, spi_mosi_i) is |
begin |
if spi_sck_i'event and spi_sck_i = SAMPLE_EDGE then |
195,125 → 223,134
rx_bit_reg <= spi_mosi_i; |
end if; |
end process rx_bit_proc; |
-- state and data registers change on SHIFT edge of sck (ffd with async clear) |
core_reg_proc : process (spi_sck_i, rst_i, spi_ssel_i) is |
|
--============================================================================================= |
-- RTL CORE REGISTER PROCESSES |
--============================================================================================= |
-- fsm state and data registers change on spi SHIFT clock |
core_reg_proc : process (spi_sck_i, spi_ssel_i) is |
begin |
-- registers cleared on reset |
if rst_i = '1' then -- async clr |
do_buffer_reg <= (others => '0'); |
do_transfer_reg <= '0'; |
elsif spi_sck_i'event and spi_sck_i = SHIFT_EDGE then -- on SHIFT edge, update all core registers |
do_buffer_reg <= do_buffer_next; |
do_transfer_reg <= do_transfer_next; |
-- FFD registers clocked on SHIFT edge and cleared on idle (spi_ssel_i = 1) |
if spi_ssel_i = '1' then -- async clr |
state_reg <= 0; -- state falls back to idle when slave not selected |
elsif spi_sck_i'event and spi_sck_i = SHIFT_EDGE then -- on SHIFT edge, update all core registers |
state_reg <= state_next; -- core fsm changes state with spi SHIFT clock |
end if; |
-- registers cleared on idle (spi_ssel_i = 1) |
if spi_ssel_i = '1' then -- async clr |
state_reg <= 0; |
sh_reg <= (others => '0'); |
elsif spi_sck_i'event and spi_sck_i = SHIFT_EDGE then -- on SHIFT edge, update all core registers |
state_reg <= state_next; |
sh_reg <= sh_next; |
-- FFD registers clocked on SHIFT edge |
if spi_sck_i'event and spi_sck_i = SHIFT_EDGE then -- on fsm state change, update all core registers |
sh_reg <= sh_next; -- core shift register |
do_buffer_reg <= do_buffer_next; -- registered data output |
do_transfer_reg <= do_transfer_next; -- cross-clock transfer flag |
wren_ack_reg <= wren_ack_next; -- wren ack for data load synchronization |
end if; |
end process core_reg_proc; |
-- cross-clock registers change on half-cycle of sck (ffd with async clear) |
-- this is to prevent fsm state change glitches causing setup time artifacts at async clk_i edges |
cross_reg_proc : process (rst_i, spi_sck_i, spi_ssel_i) is |
|
--============================================================================================= |
-- CROSS-CLOCK PIPELINE TRANSFER LOGIC |
--============================================================================================= |
-- do_valid_o and di_req_o strobe output logic |
-- this is a delayed pulse generator with a ripple-transfer FFD pipeline, that generates a |
-- fixed-length delayed pulse for the output flags, at the parallel clock domain |
out_transfer_proc : process ( clk_i, do_transfer_reg, di_req, |
do_valid_A, do_valid_B, do_valid_D, |
di_req_o_A, di_req_o_B, di_req_o_D) is |
begin |
if spi_ssel_i = '1' then |
di_rdy_reg <= '1'; -- di_rdy true during idle |
elsif spi_sck_i'event and spi_sck_i = SAMPLE_EDGE then -- on half-cycle edge, update cross registers |
di_rdy_reg <= di_rdy_next; |
if clk_i'event and clk_i = '1' then -- clock at parallel port clock |
-- do_transfer_reg -> do_valid_o_reg |
do_valid_A <= do_transfer_reg; -- the input signal must be at least 2 clocks long |
do_valid_B <= do_valid_A; -- feed it to a ripple chain of FFDs |
do_valid_C <= do_valid_B; |
do_valid_D <= do_valid_C; |
do_valid_o_reg <= do_valid_next; -- registered output pulse |
-------------------------------- |
-- di_req -> di_req_o_reg |
di_req_o_A <= di_req; -- the input signal must be at least 2 clocks long |
di_req_o_B <= di_req_o_A; -- feed it to a ripple chain of FFDs |
di_req_o_C <= di_req_o_B; |
di_req_o_D <= di_req_o_C; |
di_req_o_reg <= di_req_o_next; -- registered output pulse |
end if; |
if rst_i = '1' then |
do_valid_reg <= '0'; -- async clear on do_valid |
elsif spi_sck_i'event and spi_sck_i = SAMPLE_EDGE then -- on half-cycle edge, update cross registers |
do_valid_reg <= do_valid_next; |
end if; |
end process cross_reg_proc; |
-- parallel load input registers (to elliminate async clock glitches) |
par_reg_proc: process (clk_i, rst_i, wren_i, spi_sck_i, spi_ssel_i) is |
-- generate a 2-clocks pulse at the 3rd clock cycle |
do_valid_next <= do_valid_A and do_valid_B and not do_valid_D; |
di_req_o_next <= di_req_o_A and di_req_o_B and not di_req_o_D; |
end process out_transfer_proc; |
-- parallel load input registers: data register and write enable |
in_transfer_proc: process (clk_i, wren_i, wren_ack_reg) is |
begin |
-- registered data input, input register with clock enable |
if clk_i'event and clk_i = '1' then |
-- output flags registers |
if rst_i = '1' then -- sync rst for output flags |
di_rdy_oreg <= '0'; |
do_valid_oreg <= '0'; |
else |
di_rdy_oreg <= di_rdy_reg; -- transfer buffer regs to out regs |
do_valid_oreg <= (do_valid_reg and not spi_ssel_i) or (do_transfer_reg and spi_ssel_i); |
if wren_i = '1' then |
di_reg <= di_i; -- parallel data input buffer register |
end if; |
-- input register, with 'rst_i' sync reset and 'wren_i' clock enable |
if rst_i = '1' then -- sync rst for di_reg |
di_reg <= (others => '0'); |
elsif wren_i = '1' then -- wren_i is the clock enable for di_reg |
di_reg <= di_i; -- parallel data input buffer register |
end if; |
-- stretch wren pulse to be detected by spi fsm (ffd with sync preset and sync reset) |
if clk_i'event and clk_i = '1' then |
if wren_i = '1' then -- wren_i is the sync preset for wren |
wren <= '1'; |
elsif wren_ack_reg = '1' then -- wren_ack is the sync reset for wren |
wren <= '0'; |
end if; |
end if; |
end process par_reg_proc; |
end process in_transfer_proc; |
|
--============================================================================================= |
-- RTL COMBINATIONAL LOGIC PROCESSES |
--============================================================================================= |
-- state and datapath combinational logic |
core_combi_proc : process ( rst_i, sh_reg, state_reg, rx_bit_reg, do_buffer_reg, |
do_valid_reg, do_transfer_reg, di_reg, di_rdy_reg, wren_i ) is |
core_combi_proc : process ( sh_reg, state_reg, rx_bit_reg, do_buffer_reg, |
do_transfer_reg, di_reg, wren, wren_ack_reg) is |
begin |
sh_next <= sh_reg; -- all output signals are assigned to (avoid latches) |
do_buffer_next <= do_buffer_reg; |
do_valid_next <= do_valid_reg; |
do_transfer_next <= do_transfer_reg; |
di_rdy_next <= di_rdy_reg; |
spi_miso_o <= '0'; -- will output '0' when shifter is empty |
state_next <= state_reg - 1; -- update next state at each sck pulse |
sh_next <= sh_reg; -- all output signals are assigned to (avoid latches) |
do_buffer_next <= do_buffer_reg; -- output data buffer |
do_transfer_next <= do_transfer_reg; -- output data flag |
wren_ack_next <= '0'; -- remove data load ack for all but the load stages |
di_req <= '0'; -- prefetch data request: deassert when shifting data |
spi_miso_o <= sh_reg(N-1); -- output serial data from the MSb |
state_next <= state_reg - 1; -- update next state at each sck pulse |
case state_reg is |
when (N) => |
di_rdy_next <= '0'; -- deassert next-data request when start shifting |
spi_miso_o <= sh_reg(N-1); -- shift out tx bit from the MSb |
sh_next(N-1 downto 1) <= sh_reg(N-2 downto 0); -- shift inner bits |
sh_next(0) <= rx_bit_reg; -- shift in rx bit into LSb |
do_transfer_next <= '0'; -- reset transfer signal |
sh_next(N-1 downto 1) <= sh_reg(N-2 downto 0); -- shift inner bits |
sh_next(0) <= rx_bit_reg; -- shift in rx bit into LSb |
when (N-1) downto (PREFETCH+3) => |
di_rdy_next <= '0'; -- deassert next-data request when start shifting |
do_valid_next <= do_transfer_reg; -- assert valid rx data, with plenty of pipeline delay for 'do_buffer' |
do_transfer_next <= '0'; -- reset transfer signal |
spi_miso_o <= sh_reg(N-1); -- shift out tx bit from the MSb |
sh_next(N-1 downto 1) <= sh_reg(N-2 downto 0); -- shift inner bits |
sh_next(0) <= rx_bit_reg; -- shift in rx bit into LSb |
do_transfer_next <= '0'; -- reset transfer signal |
sh_next(N-1 downto 1) <= sh_reg(N-2 downto 0); -- shift inner bits |
sh_next(0) <= rx_bit_reg; -- shift in rx bit into LSb |
when (PREFETCH+2) downto 2 => |
-- raise prefetch 'di_rdy_next' signal and remove 'do_valid' |
di_rdy_next <= '1'; -- request data in advance to allow for pipeline delays |
do_valid_next <= '0'; -- make do_valid_o HIGH for one cycle only |
spi_miso_o <= sh_reg(N-1); -- shift out tx bit from the MSb |
sh_next(N-1 downto 1) <= sh_reg(N-2 downto 0); -- shift inner bits |
sh_next(0) <= rx_bit_reg; -- shift in rx bit into LSb |
-- raise data prefetch request |
di_req <= '1'; -- request data in advance to allow for pipeline delays |
sh_next(N-1 downto 1) <= sh_reg(N-2 downto 0); -- shift inner bits |
sh_next(0) <= rx_bit_reg; -- shift in rx bit into LSb |
when 1 => |
-- restart from state 'N' if more sck pulses come |
do_buffer_next(N-1 downto 1) <= sh_reg(N-2 downto 0); -- shift rx data directly into rx buffer |
do_buffer_next(0) <= rx_bit_reg; -- shift last rx bit into rx buffer |
do_transfer_next <= '1'; -- signal transfer to do_buffer |
state_next <= N; -- next state is top bit of new data |
spi_miso_o <= sh_reg(N-1); -- shift out last tx bit from the MSb |
if wren_i = '1' then -- load tx register if valid data present at di_reg |
sh_next <= di_reg; -- load parallel data from di_reg into shifter |
di_req <= '1'; -- request data in advance to allow for pipeline delays |
do_buffer_next(N-1 downto 1) <= sh_reg(N-2 downto 0); -- shift rx data directly into rx buffer |
do_buffer_next(0) <= rx_bit_reg; -- shift last rx bit into rx buffer |
do_transfer_next <= '1'; -- signal transfer to do_buffer |
state_next <= N; -- next state is top bit of new data |
if wren = '1' then -- load tx register if valid data present at di_reg |
sh_next <= di_reg; -- load parallel data from di_reg into shifter |
wren_ack_next <= '1'; -- acknowledge data in transfer |
else |
sh_next <= (others => '0'); -- load null data (output '0' if no load) |
sh_next <= (others => '0'); -- load null data (output '0' if no load) |
end if; |
when 0 => |
do_transfer_next <= '0'; -- clear signal transfer to do_buffer |
do_valid_next <= do_transfer_reg; -- assert valid rx data after data received, when interface idle |
di_rdy_next <= '1'; -- will request data if shifter empty |
spi_miso_o <= di_reg(N-1); -- shift out first tx bit from the MSb |
di_req <= not wren_ack_reg; -- will request data if shifter empty |
do_transfer_next <= '0'; -- clear signal transfer to do_buffer |
spi_miso_o <= di_reg(N-1); -- shift out first tx bit from the MSb |
if CPHA = '0' then |
-- initial state for CPHA=0, when slave interface is first selected or idle |
state_next <= N-1; -- next state is top bit of new data |
sh_next(N-1 downto 1) <= di_reg(N-2 downto 0); -- shift inner bits |
sh_next(0) <= rx_bit_reg; -- shift in rx bit into LSb |
state_next <= N-1; -- next state is top bit of new data |
sh_next(0) <= rx_bit_reg; -- shift in rx bit into LSb |
sh_next(N-1 downto 1) <= di_reg(N-2 downto 0); -- shift inner bits |
wren_ack_next <= '1'; -- acknowledge data in transfer |
else |
-- initial state for CPHA=1, when slave interface is first selected or idle |
state_next <= N; -- next state is top bit of new data |
sh_next <= di_reg; -- load parallel data from di_reg into shifter |
state_next <= N; -- next state is top bit of new data |
sh_next <= di_reg; -- load parallel data from di_reg into shifter |
end if; |
when others => |
null; |
state_next <= 0; -- state 0 is safe state |
end case; |
end process core_combi_proc; |
|
321,15 → 358,19
-- RTL OUTPUT LOGIC PROCESSES |
--============================================================================================= |
-- data output processes |
do_proc : do_o <= do_buffer_reg; -- do_o always available |
do_valid_proc: do_valid_o <= do_valid_oreg; -- copy registered do_valid_o to output |
di_rdy_proc: di_rdy_o <= di_rdy_oreg; -- copy registered di_rdy_o to output |
do_o_proc : do_o <= do_buffer_reg; -- do_o always available |
do_valid_o_proc: do_valid_o <= do_valid_o_reg; -- copy registered do_valid_o to output |
di_req_o_proc: di_req_o <= di_req_o_reg; -- copy registered di_req_o to output |
|
--============================================================================================= |
-- DEBUG LOGIC PROCESSES |
--============================================================================================= |
-- these signals are useful for verification, and can be deleted or commented-out after debug. |
do_transfer_proc: do_transfer_o <= do_transfer_reg; |
state_debug_proc: state_dbg_o <= std_logic_vector(to_unsigned(state_reg, 6)); -- export internal state to debug |
rx_bit_reg_proc: rx_bit_reg_o <= rx_bit_reg; |
wren_o_proc: wren_o <= wren; |
wren_ack_o_proc: wren_ack_o <= wren_ack_reg; |
-- sh_reg_debug_proc: sh_reg_dbg_o <= sh_reg; -- export sh_reg to debug |
end architecture RTL; |
|
/spi_master.vhd
13,33 → 13,33
-- All parallel i/o interface operations are synchronous to a system clock, that can be asynchronous to the spi base clock. |
-- Fully pipelined circuitry guarantees that no setup artifacts occur on the buffers that are accessed by the two clock domains. |
-- The block is very simple to use, and has parallel inputs and outputs that behave like a synchronous memory i/o. |
-- It is parameterizable for the data width ('N'), SPI mode via generics (CPHA and CPOL), and lookahead prefetch |
-- It is parameterizable via generics for the data width ('N'), SPI mode (CPHA and CPOL), and lookahead prefetch |
-- signaling ('PREFETCH'). |
-- |
-- PARALLEL WRITE INTERFACE |
-- The parallel interface has a input port 'di_i' and an output port 'do_o'. |
-- Parallel load is controlled using 3 signals: 'di_i', 'di_rdy_o' and 'wren_i'. 'di_rdy_o' is a look ahead data request line, |
-- The parallel interface has an input port 'di_i' and an output port 'do_o'. |
-- Parallel load is controlled using 3 signals: 'di_i', 'di_req_o' and 'wren_i'. 'di_req_o' is a look ahead data request line, |
-- that is set 'PREFETCH' clock cycles in advance to synchronize a pipelined memory or fifo to present the |
-- next input data at 'di_i' in time to have continuous clock at the spi bus, to allow back-to-back continuous load. |
-- For a pipelined sync RAM, a PREFETCH of 2 cycles allows an address generator to present the new adress to the RAM in one |
-- cycle, and the RAM to respond in one more cycle, in time for 'di_i' to be latched by the shifter. |
-- If the user sequencer needs a different value for PREFETCH, the generic can be altered at instantiation time. |
-- The 'wren_i' write enable strobe must be valid at least one setup time before the rising edge of the last clock cycle, |
-- if continuous transmission is intended. If 'wren_i' is not valid 2 clock cycles after the last tranmitted bit, the interface |
-- The 'wren_i' write enable strobe must be valid at least one setup time before the rising edge of the last SPI clock cycle, |
-- if continuous transmission is intended. If 'wren_i' is not valid 2 SPI clock cycles after the last transmitted bit, the interface |
-- enters idle state and deasserts SSEL. |
-- When the interface is idle, 'wren_i' write strobe loads the data and starts transmission. 'di_rdy_o' is always asserted when idle. |
-- The interaction for data load is: |
-- When the interface is idle, 'wren_i' write strobe loads the data and starts transmission. 'di_req_o' will strobe when entering |
-- idle state, if a previously loaded data has already been transferred. |
-- |
-- PARALLEL WRITE PIPELINED SEQUENCE |
-- ================================= |
-- __ __ __ __ __ __ __ __ __ |
-- par_clk_i __/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \__... -- parallel interface clock |
-- ___________________________________ |
-- di_rdy_o ________/ \___________... -- 'di_rdy_o' asserted on rising edge of 'par_clk_i' |
-- ______________ _________________________________________ |
-- di_i __old_data____X__________new_data_______________________... -- user circuit loads data on 'di_i' at next rising edge |
-- ________________________________ -- user circuit asserts 'wren_i' at next edge, |
-- wren_i __________________/ \____... -- and removes 'wren_i' after 'di_rdy_o' is removed |
-- PARALLEL WRITE SEQUENCE |
-- ======================= |
-- __ __ __ __ __ __ __ |
-- clk_i __/ \__/ \__/ \__/ \__/ \__/ \__/ \... -- parallel interface clock |
-- ___________ |
-- di_req_o ________/ \_____________________... -- 'di_req_o' asserted on rising edge of 'clk_i' |
-- ______________ ___________________________... |
-- di_i __old_data____X______new_data_____________... -- user circuit loads data on 'di_i' at next 'clk_i' rising edge |
-- _______ |
-- wren_i __________________________/ \_______... -- user strobes 'wren_i' for one cycle of 'clk_i' |
-- |
-- |
-- PARALLEL READ INTERFACE |
46,26 → 46,28
-- An internal buffer is used to copy the internal shift register data to drive the 'do_o' port. When a complete word is received, |
-- the core shift register is transferred to the buffer, at the rising edge of the spi clock, 'spi_2x_clk_i'. |
-- The signal 'do_valid_o' is set one 'spi_2x_clk_i' clock after, to directly drive a synchronous memory or fifo write enable. |
-- 'do_valid_o' is synchronous to the parallel interface clock, and changes only on rising edges of 'par_clk_i'. |
-- 'do_valid_o' is synchronous to the parallel interface clock, and changes only on rising edges of 'clk_i'. |
-- When the interface is idle, data at the 'do_o' port holds the last word received. |
-- |
-- PARALLEL READ PIPELINED SEQUENCE |
-- ================================ |
-- ______ ______ ______ ______ ______ |
-- spi_2x_clk_i bit1 \______/ bitN \______/bitN-1\______/bitN-2\______/bitN-3\_... -- spi 2x base clock |
-- _ __ __ __ __ __ __ __ __ __ __ |
-- par_clk_i \__/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \__... -- parallel interface clock |
-- _____________ __________________________________________________... -- 1) rx data is transferred to 'do_buffer_reg' |
-- do_o ___old_data__X__________new_data________________________________... -- after last rx bit, at rising 'spi_2x_clk_i'. |
-- ___________ -- 2) 'do_valid_o' asserted on rising 'par_clk_i', |
-- do_valid_o __________________________________/ \_________________... -- at next bit (bit N-1) of the SPI transfer. |
-- |
-- PARALLEL READ SEQUENCE |
-- ====================== |
-- ______ ______ ______ ______ |
-- spi_2x_clk_i bit1 \______/ bitN \______/bitN-1\______/bitN-2\__... -- spi 2x base clock |
-- _ __ __ __ __ __ __ __ __ |
-- clk_i \__/ \__/ \__/ \__/ \__/ \__/ \__/ \__/ \_... -- parallel interface clock |
-- _____________ _____________________________________... -- 1) rx data is transferred to 'do_buffer_reg' |
-- do_o ___old_data__X__________new_data___________________... -- after last rx bit, at rising 'spi_2x_clk_i'. |
-- ____________ |
-- do_valid_o ____________________________/ \_________... -- 2) 'do_valid_o' strobed for 2 'clk_i' cycles |
-- -- on the 3rd 'clk_i' rising edge. |
-- |
-- |
-- The propagation delay of spi_sck_o and spi_mosi_o, referred to the internal clock, is balanced by similar path delays, |
-- but the sampling delay of spi_miso_i imposes a setup time referred to the sck signal that limits the high frequency |
-- of the interface, for full duplex operation. |
-- |
-- This design was originally targeted to a Spartan-6 platform, synthesized with XST and normal constraints. |
-- The VHDL dialect used is VHDL'93, accepted largely by all synthesis tools. |
-- |
------------------------------ COPYRIGHT NOTICE ----------------------------------------------------------------------- |
-- |
96,7 → 98,7
-- 2011/04/28 v0.01.0010 [JD] shifter implemented as a sequential process. timing problems and async issues in synthesis. |
-- 2011/05/01 v0.01.0030 [JD] changed original shifter design to a fully pipelined RTL fsmd. solved all synthesis issues. |
-- 2011/05/05 v0.01.0034 [JD] added an internal buffer register for rx_data, to allow greater liberty in data load/store. |
-- 2011/05/08 v0.10.0038 [JD] increased one state to have SSEL start one cycle before SCK. Implemented full CPOL/CPHA |
-- 2011/05/08 v0.10.0038 [JD] increased one state to have SSEL start one cycle before SCK. Implemented full CPOL/CPHA |
-- logic, based on generics, and do_valid_o signal. |
-- 2011/05/13 v0.20.0045 [JD] streamlined signal names, added PREFETCH parameter, added assertions. |
-- 2011/05/17 v0.80.0049 [JD] added explicit clock synchronization circuitry across clock boundaries. |
103,16 → 105,20
-- 2011/05/18 v0.95.0050 [JD] clock generation circuitry, with generators for all-rising-edge clock core. |
-- 2011/06/05 v0.96.0053 [JD] changed async clear to sync resets. |
-- 2011/06/07 v0.97.0065 [JD] added cross-clock buffers, fixed fsm async glitches. |
-- 2011/06/09 v0.97.0068 [JD] reduced control sets (resets, CE, presets) to the absolute minimum to operate, to reduce |
-- synthesis LUT overhead in Spartan-6 architecture. |
-- 2011/06/11 v0.97.0075 [JD] redesigned all parallel data interfacing ports, and implemented cross-clock strobe logic. |
-- 2011/06/12 v0.97.0079 [JD] streamlined wren_ack for all cases and eliminated unnecessary register resets. |
-- 2011/06/14 v0.97.0083 [JD] (bug CPHA effect) : redesigned SCK output circuit. |
-- (minor bug) : removed fsm registers from (not rst_i) chip enable. |
-- 2011/06/15 v0.97.0086 [JD] removed master MISO input register, to relax MISO data setup time (to get higher speed). |
-- |
-- |
-- |
----------------------------------------------------------------------------------------------------------------------- |
-- TODO |
-- ==== |
-- |
-- - DEBUG_PACKAGE: |
-- - package to export signals to the verification testbench |
-- |
-- |
----------------------------------------------------------------------------------------------------------------------- |
library IEEE; |
use IEEE.STD_LOGIC_1164.ALL; |
124,25 → 130,28
N : positive := 32; -- 32bit serial word length is default |
CPOL : std_logic := '0'; -- SPI mode selection (mode 0 default) |
CPHA : std_logic := '0'; -- CPOL = clock polarity, CPHA = clock phase. |
PREFETCH : positive := 1); -- prefetch lookahead cycles |
PREFETCH : positive := 2); -- prefetch lookahead cycles |
Port ( |
spi_2x_clk_i : in std_logic := 'X'; -- spi base reference clock: 2x 'spi_sck_o' |
par_clk_i : in std_logic := 'X'; -- parallel interface clock |
rst_i : in std_logic := 'X'; -- async reset: clear all registers |
clk_i : in std_logic := 'X'; -- parallel interface clock |
rst_i : in std_logic := 'X'; -- reset core |
spi_ssel_o : out std_logic; -- spi bus slave select line |
spi_sck_o : out std_logic; -- spi bus sck |
spi_mosi_o : out std_logic; -- spi bus mosi output |
spi_miso_i : in std_logic := 'X'; -- spi bus spi_miso_i input |
di_req_o : out std_logic; -- preload lookahead data request line |
di_i : in std_logic_vector (N-1 downto 0) := (others => 'X'); -- parallel data in (clocked on rising spi_2x_clk_i after last bit) |
wren_i : in std_logic := 'X'; -- user data write enable, starts transmission when interface is idle |
do_valid_o : out std_logic; -- do_o data valid signal, valid during one spi_2x_clk_i rising edge. |
do_o : out std_logic_vector (N-1 downto 0); -- parallel output (clocked on rising spi_2x_clk_i after last bit) |
di_rdy_o : out std_logic; -- preload lookahead: HIGH for PREFETCH cycles before last bit |
wren_i : in std_logic := 'X'; -- write enable (write di_i data at next rising spi_2x_clk_i edge) |
-- wren_i starts transmission. must be valid 1 spi_2x_clk_i cycle |
-- before current transmission ends. |
do_valid_o : out std_logic; -- do_o data valid signal, valid during one spi_2x_clk_i rising edge. |
--- debug ports: can be removed for the application circuit --- |
do_transfer_o : out std_logic; -- debug: internal transfer driver |
wren_o : out std_logic; -- debug: internal state of the wren_i pulse stretcher |
wren_ack_o : out std_logic; -- debug: wren ack from state machine |
rx_bit_reg_o : out std_logic; -- debug: internal rx bit |
state_dbg_o : out std_logic_vector (5 downto 0); -- debug: internal state register |
rx_bit_reg_o : out std_logic; -- debug: internal rx bit |
core_clk_o : out std_logic; |
core_n_clk_o : out std_logic; |
sh_reg_dbg_o : out std_logic_vector (N-1 downto 0) -- debug: internal shift register |
); |
end spi_master; |
152,46 → 161,67
-- all signals are clocked at the rising edge of the system clock 'spi_2x_clk_i'. |
--================================================================================================================ |
architecture RTL of spi_master is |
|
-- core clocks, generated from 'spi_2x_clk_i' |
signal core_clk : std_logic; -- continuous fsm core clock, positive logic |
signal core_n_clk : std_logic; -- continuous fsm core clock, negative logic |
-- spi base clock, generated from 'spi_2x_clk_i' |
signal spi_clk : std_logic; -- spi bus output clock, positive polarity |
signal spi_n_clk : std_logic; -- spi bus output clock, negative polarity |
-- core clocks, generated from 'spi_2x_clk_i': initialized to differential values |
signal core_clk : std_logic := '0'; -- continuous fsm core clock, positive logic |
signal core_n_clk : std_logic := '1'; -- continuous fsm core clock, negative logic |
-- spi bus clock, generated from the CPOL selected core clock polarity |
signal spi_clk : std_logic; -- spi bus output clock |
-- core fsm clock |
signal fsm_clk : std_logic; -- data change clock: fsm registers clocked at rising edge |
signal samp_clk : std_logic; -- data sampling clock: input serial data clocked at rising edge |
signal fsm_clk : std_logic; -- data change clock: fsm registers clocked at rising edge |
signal samp_clk : std_logic; -- data sampling clock: input serial data clocked at rising edge |
-- |
-- GLOBAL RESET: |
-- all signals are initialized to zero at GSR (global set/reset) by giving explicit |
-- initialization values at declaration. This is needed for all Xilinx FPGAs, and |
-- especially for the Spartan-6 and newer CLB architectures, where a local reset can |
-- reduce the usability of the slice registers, due to the need to share the control |
-- set (RESET/PRESET, CLOCK ENABLE and CLOCK) by all 8 registers in a slice. |
-- By using GSR for the initialization, and reducing RESET local init to the bare |
-- essential, the model achieves better LUT/FF packing and CLB usability. |
-- |
-- internal state signals for register and combinational stages |
signal state_next : natural range N+1 downto 0 := 0; |
signal state_reg : natural range N+1 downto 0 := 0; |
signal state_next : natural range N+1 downto 0 := 0; |
-- shifter signals for register and combinational stages |
signal sh_next : std_logic_vector (N-1 downto 0) := (others => '0'); |
signal sh_reg : std_logic_vector (N-1 downto 0) := (others => '0'); |
signal sh_next : std_logic_vector (N-1 downto 0) := (others => '0'); |
-- input bit sampled buffer |
signal rx_bit_reg : std_logic := '0'; |
-- buffered di_i data signals for register and combinational stages |
signal di_reg : std_logic_vector (N-1 downto 0) := (others => '0'); |
-- internal wren_i stretcher for fsm combinational stage |
signal wren : std_logic := '0'; |
signal wren_ack_next : std_logic := '0'; |
signal wren_ack_reg : std_logic := '0'; |
-- internal SSEL enable control signals |
signal ena_ssel_next : std_logic := '0'; |
signal ena_ssel_reg : std_logic := '0'; |
signal ena_ssel_next : std_logic := '0'; |
-- internal SCK enable control signals |
signal ena_sck_next : std_logic := '0'; |
signal ena_sck_reg : std_logic := '0'; |
signal ena_sck_next : std_logic := '0'; |
-- buffered do_o data signals for register and combinational stages |
signal do_buffer_next : std_logic_vector (N-1 downto 0) := (others => '0'); |
signal do_buffer_reg : std_logic_vector (N-1 downto 0) := (others => '0'); |
signal do_buffer_next : std_logic_vector (N-1 downto 0) := (others => '0'); |
-- internal signal to flag transfer to do_buffer_reg |
signal do_transfer_next : std_logic := '0'; |
signal do_transfer_reg : std_logic := '0'; |
signal do_transfer_next : std_logic := '0'; |
-- internal registered do_valid_o |
signal do_valid_oreg : std_logic := '0'; |
signal do_valid_reg : std_logic := '0'; |
-- internal input data request signal |
signal di_req_next : std_logic := '0'; |
signal di_req_reg : std_logic := '0'; |
-- cross-clock do_valid_o pipeline |
signal do_valid_next : std_logic := '0'; |
-- internal registered di_rdy_o |
signal di_rdy_oreg : std_logic := '0'; |
signal di_rdy_reg : std_logic := '1'; |
signal di_rdy_next : std_logic := '1'; |
signal do_valid_A : std_logic := '0'; |
signal do_valid_B : std_logic := '0'; |
signal do_valid_C : std_logic := '0'; |
signal do_valid_D : std_logic := '0'; |
signal do_valid_o_reg : std_logic := '0'; |
-- cross-clock di_req_o pipeline |
signal di_req_o_next : std_logic := '1'; |
signal di_req_o_A : std_logic := '0'; |
signal di_req_o_B : std_logic := '0'; |
signal di_req_o_C : std_logic := '0'; |
signal di_req_o_D : std_logic := '0'; |
signal di_req_o_reg : std_logic := '1'; |
begin |
--============================================================================================= |
-- GENERICS CONSTRAINTS CHECKING |
200,6 → 230,10
assert N >= 8 |
report "Generic parameter 'N' error: SPI shift register size needs to be 8 bits minimum" |
severity FAILURE; |
-- minimum prefetch lookahead check |
assert PREFETCH >= 2 |
report "Generic parameter 'PREFETCH' error: needs to be 1 minimum" |
severity FAILURE; |
-- maximum prefetch lookahead check |
assert PREFETCH <= N-5 |
report "Generic parameter 'PREFETCH' error: lookahead count out of range, needs to be N-5 maximum" |
208,66 → 242,48
--============================================================================================= |
-- CLOCK GENERATION |
--============================================================================================= |
-- The clock generation block derive 2 sets of signals from the 2x spi base clock, with positive |
-- and negative phase. The core clock runs continuously and drives the core fsm, and the spi clock |
-- drives the spi bus 'spi_sck_o' output directly, and is controlled by 'ena_sck_reg', driven by the |
-- fsm logic. |
-- The 2 clocks are generated each with one FFD, with a selected phase to drive the core with rising |
-- edge clocks only. The 2 sets of clocks have similar logic delays, which is important for the data |
-- setup time of the serial input related to the data setup time of the serial output. |
-- The clock generation block derive 2 continuous antiphase signals from the 2x spi base clock |
-- for the core clocking. |
-- The 2 clock phases are generated by sepparate and synchronous FFDs, and should have only |
-- interconnect delays. |
-- The clock phase is selected for serial input sampling, fsm clocking, and spi SCK output, based |
-- on the configuration of CPOL and CPHA. |
-- Each phase is selected so that all the registers can be clocked with a rising edge on all SPI |
-- modes. |
----------------------------------------------------------------------------------------------- |
-- divide down 'spi_2x_clk_i' by 2 |
-- this should be synthesized as a single ffd with sync reset |
core_clock_gen_proc : process (rst_i, spi_2x_clk_i) is |
-- this should be synthesized as two synchronous FFDs |
core_clock_gen_proc : process (spi_2x_clk_i) is |
begin |
if spi_2x_clk_i'event and spi_2x_clk_i = '1' then |
if rst_i = '1' then |
core_clk <= '0'; -- positive logic clk: idle LOW |
core_n_clk <= '1'; -- negative logic clk: idle HIGH |
else |
core_clk <= core_n_clk; -- divided by 2 clock, differential |
core_n_clk <= not core_n_clk; |
end if; |
core_clk <= core_n_clk; -- divided by 2 clock, differential |
core_n_clk <= not core_n_clk; |
end if; |
end process core_clock_gen_proc; |
----------------------------------------------------------------------------------------------- |
-- spi sck generator: divide input 2x clock by 2, with a CE controlled by the fsm |
-- this should be sinthesized as a single FFD with sync reset and clock enable |
spi_clock_gen_proc : process (rst_i, spi_2x_clk_i, ena_sck_reg) is |
begin |
if spi_2x_clk_i'event and spi_2x_clk_i = '1' then |
if rst_i = '1' then |
spi_clk <= '0'; -- positive logic clk: idle LOW |
spi_n_clk <= '1'; -- negative logic clk: idle HIGH |
elsif ena_sck_reg = '1' then |
spi_clk <= spi_n_clk; -- divided by 2 clock, differential |
spi_n_clk <= not spi_n_clk; |
end if; |
end if; |
end process spi_clock_gen_proc; |
----------------------------------------------------------------------------------------------- |
-- SCK out logic: generate sck from spi_clk or spi_n_clk depending on CPOL |
-- spi clk generator: generate spi_clk from core_clk depending on CPOL |
spi_sck_cpol_0_proc : |
if CPOL = '0' generate |
begin |
spi_sck_o <= spi_clk; -- for CPOL=0, spi clk has idle LOW |
spi_clk <= core_clk; -- for CPOL=0, spi clk has idle LOW |
end generate; |
spi_sck_cpol_1_proc : |
if CPOL = '1' generate |
begin |
spi_sck_o <= spi_n_clk; -- for CPOL=1, spi clk has idle HIGH |
spi_clk <= core_n_clk; -- for CPOL=1, spi clk has idle HIGH |
end generate; |
----------------------------------------------------------------------------------------------- |
-- Sampling clock generation: generate 'samp_clk' from core_clk or core_n_clk depending on CPHA |
-- always sample data at the half-cycle of the fsm update cell |
smp_cpha_0_proc : |
if CPHA = '0' generate |
begin |
samp_clk <= spi_clk; -- for CPHA=0, sample at end of sample cell |
samp_clk <= core_clk; |
end generate; |
smp_cpha_1_proc : |
if CPHA = '1' generate |
begin |
samp_clk <= spi_n_clk; -- for CPHA=1, sample at end of sample cell |
samp_clk <= core_n_clk; |
end generate; |
----------------------------------------------------------------------------------------------- |
-- FSM clock generation: generate 'fsm_clock' from core_clk or core_n_clk depending on CPHA |
283,66 → 299,93
end generate; |
|
--============================================================================================= |
-- RTL REGISTER PROCESSES |
-- REGISTERED INPUTS |
--============================================================================================= |
-- rx bit flop: capture rx bit after SAMPLE edge of sck |
-- |
-- ATTENTION: REMOVING THE FLIPFLOP (DIRECT CONNECTION) WE GET HIGHER PERFORMANCE DUE TO |
-- REDUCED DEMAND ON MISO SETUP TIME. |
-- |
rx_bit_proc : process (samp_clk, spi_miso_i) is |
begin |
if samp_clk'event and samp_clk = '1' then |
-- if samp_clk'event and samp_clk = '1' then -- uncomment to have the input register |
rx_bit_reg <= spi_miso_i; |
end if; |
-- end if; -- uncomment to have the input register |
end process rx_bit_proc; |
-- state and data registers: synchronous to the spi base reference clock |
core_reg_proc : process (fsm_clk, rst_i) is |
|
--============================================================================================= |
-- RTL REGISTER PROCESSES |
--============================================================================================= |
-- fsm state and data registers: synchronous to the spi base reference clock |
core_reg_proc : process (fsm_clk) is |
begin |
-- FFD registers clocked on rising edge and cleared on sync rst_i |
if fsm_clk'event and fsm_clk = '1' then |
if rst_i = '1' then -- sync reset |
sh_reg <= (others => '0'); |
state_reg <= 0; |
ena_ssel_reg <= '0'; |
ena_sck_reg <= '0'; |
do_buffer_reg <= (others => '0'); |
do_transfer_reg <= '0'; |
state_reg <= 0; -- only provide local reset for the state machine |
else |
sh_reg <= sh_next; |
state_reg <= state_next; |
ena_ssel_reg <= ena_ssel_next; |
ena_sck_reg <= ena_sck_next; |
do_buffer_reg <= do_buffer_next; |
do_transfer_reg <= do_transfer_next; |
state_reg <= state_next; -- state register |
end if; |
end if; |
-- FFD registers clocked on rising edge |
if fsm_clk'event and fsm_clk = '1' then |
sh_reg <= sh_next; -- shift register |
ena_ssel_reg <= ena_ssel_next; -- spi select enable |
ena_sck_reg <= ena_sck_next; -- spi clock enable |
do_buffer_reg <= do_buffer_next; -- registered output data buffer |
do_transfer_reg <= do_transfer_next; -- output data transferred to buffer |
di_req_reg <= di_req_next; -- input data request |
wren_ack_reg <= wren_ack_next; -- wren ack for data load synchronization |
end if; |
end process core_reg_proc; |
-- cross-clock registers change on half-cycle of sck (ffd with async clear) |
-- this is to prevent fsm state change glitches causing setup time artifacts at async clk_i edges |
cross_reg_proc : process (rst_i, fsm_clk, ena_ssel_reg) is |
|
--============================================================================================= |
-- CROSS-CLOCK PIPELINE TRANSFER LOGIC |
--============================================================================================= |
-- do_valid_o and di_req_o strobe output logic |
-- this is a delayed pulse generator with a ripple-transfer FFD pipeline, that generates a |
-- fixed-length delayed pulse for the output flags, at the parallel clock domain |
out_transfer_proc : process ( clk_i, do_transfer_reg, di_req_reg, |
do_valid_A, do_valid_B, do_valid_D, |
di_req_o_A, di_req_o_B, di_req_o_D) is |
begin |
if ena_ssel_reg = '0' then |
di_rdy_reg <= '1'; -- di_rdy true during idle |
elsif fsm_clk'event and fsm_clk = '0' then -- on half-cycle edge, update cross registers |
di_rdy_reg <= di_rdy_next; |
if clk_i'event and clk_i = '1' then -- clock at parallel port clock |
-- do_transfer_reg -> do_valid_o_reg |
do_valid_A <= do_transfer_reg; -- the input signal must be at least 2 clocks long |
do_valid_B <= do_valid_A; -- feed it to a ripple chain of FFDs |
do_valid_C <= do_valid_B; |
do_valid_D <= do_valid_C; |
do_valid_o_reg <= do_valid_next; -- registered output pulse |
-------------------------------- |
-- di_req_reg -> di_req_o_reg |
di_req_o_A <= di_req_reg; -- the input signal must be at least 2 clocks long |
di_req_o_B <= di_req_o_A; -- feed it to a ripple chain of FFDs |
di_req_o_C <= di_req_o_B; |
di_req_o_D <= di_req_o_C; |
di_req_o_reg <= di_req_o_next; -- registered output pulse |
end if; |
if rst_i = '1' then |
do_valid_reg <= '0'; -- async clear on do_valid |
elsif fsm_clk'event and fsm_clk = '0' then -- on half-cycle edge, update cross registers |
do_valid_reg <= do_valid_next; |
end if; |
end process cross_reg_proc; |
-- parallel i/o interface registers: synchronous to the parallel interface clock |
par_reg_proc : process (rst_i, par_clk_i, ena_ssel_reg) is |
-- generate a 2-clocks pulse at the 3rd clock cycle |
do_valid_next <= do_valid_A and do_valid_B and not do_valid_D; |
di_req_o_next <= di_req_o_A and di_req_o_B and not di_req_o_D; |
end process out_transfer_proc; |
-- parallel load input registers: data register and write enable |
in_transfer_proc: process (clk_i, wren_i, wren_ack_reg) is |
begin |
if par_clk_i'event and par_clk_i = '1' then |
if rst_i = '1' then -- sync reset |
di_rdy_oreg <= '0'; |
do_valid_oreg <= '0'; |
di_reg <= (others => '0'); |
else |
di_rdy_oreg <= di_rdy_reg; -- di_rdy is synchronous to parallel interface clock |
do_valid_oreg <= (do_valid_reg and ena_ssel_reg) or (do_transfer_reg and not ena_ssel_reg); |
di_reg <= di_i; -- sample di_i at interface clock |
-- registered data input, input register with clock enable |
if clk_i'event and clk_i = '1' then |
if wren_i = '1' then |
di_reg <= di_i; -- parallel data input buffer register |
end if; |
end if; |
end process par_reg_proc; |
end if; |
-- stretch wren pulse to be detected by spi fsm (ffd with sync preset and sync reset) |
if clk_i'event and clk_i = '1' then |
if wren_i = '1' then -- wren_i is the sync preset for wren |
wren <= '1'; |
elsif wren_ack_reg = '1' then -- wren_ack is the sync reset for wren |
wren <= '0'; |
end if; |
end if; |
end process in_transfer_proc; |
|
--============================================================================================= |
-- RTL COMBINATIONAL LOGIC PROCESSES |
349,69 → 392,60
--============================================================================================= |
-- state and datapath combinational logic |
core_combi_proc : process ( sh_reg, state_reg, rx_bit_reg, ena_ssel_reg, ena_sck_reg, do_buffer_reg, |
do_valid_reg, do_transfer_reg, di_reg, di_rdy_reg, wren_i ) is |
do_transfer_reg, di_reg, wren) is |
begin |
sh_next <= sh_reg; -- all output signals are assigned to (avoid latches) |
ena_ssel_next <= ena_ssel_reg; |
ena_sck_next <= ena_sck_reg; |
do_buffer_next <= do_buffer_reg; |
do_valid_next <= do_valid_reg; |
do_transfer_next <= do_transfer_reg; |
di_rdy_next <= di_rdy_reg; |
spi_mosi_o <= '0'; -- will output '0' when shifter is empty |
state_next <= state_reg - 1; -- next state is next bit |
ena_ssel_next <= ena_ssel_reg; -- controls the slave select line |
ena_sck_next <= ena_sck_reg; -- controls the clock enable of spi sck line |
do_buffer_next <= do_buffer_reg; -- output data buffer |
do_transfer_next <= do_transfer_reg; -- output data flag |
wren_ack_next <= '0'; -- remove data load ack for all but the load stages |
di_req_next <= '0'; -- prefetch data request: deassert when shifting data |
spi_mosi_o <= sh_reg(N-1); -- shift out tx bit from the MSb |
state_next <= state_reg - 1; -- update next state at each sck pulse |
case state_reg is |
when (N+1) => -- this state is to enable SSEL before SCK |
when (N+1) => -- this state is to enable SSEL before SCK |
ena_ssel_next <= '1'; -- tx in progress: will assert SSEL |
ena_sck_next <= '1'; -- enable SCK on next cycle (stays off on first SSEL clock cycle) |
di_rdy_next <= '0'; -- deassert next-data request when shifting data |
spi_mosi_o <= sh_reg(N-1); -- shift out tx bit from the MSb |
when (N) => -- deassert 'di_rdy' |
di_rdy_next <= '0'; -- deassert next-data request when shifting data |
spi_mosi_o <= sh_reg(N-1); -- shift out tx bit from the MSb |
when (N) => -- deassert 'di_rdy' |
sh_next(N-1 downto 1) <= sh_reg(N-2 downto 0); -- shift inner bits |
sh_next(0) <= rx_bit_reg; -- shift in rx bit into LSb |
when (N-1) downto (PREFETCH+3) => -- if rx data is valid, raise 'do_valid'. remove 'do_transfer' |
di_rdy_next <= '0'; -- deassert next-data request when start shifting |
do_valid_next <= do_transfer_reg; -- assert valid rx data, with plenty of pipeline delay for 'do_buffer' |
when (N-1) downto (PREFETCH+3) => -- if rx data is valid, raise 'do_valid'. remove 'do_transfer' |
do_transfer_next <= '0'; -- reset transfer signal |
spi_mosi_o <= sh_reg(N-1); -- shift out tx bit from the MSb |
sh_next(N-1 downto 1) <= sh_reg(N-2 downto 0); -- shift inner bits |
sh_next(0) <= rx_bit_reg; -- shift in rx bit into LSb |
when (PREFETCH+2) downto 2 => -- raise prefetch 'di_rdy_next' signal and remove 'do_valid' |
di_rdy_next <= '1'; -- request data in advance to allow for pipeline delays |
do_valid_next <= '0'; -- make do_valid_o HIGH for one cycle only |
spi_mosi_o <= sh_reg(N-1); -- shift out tx bit from the MSb |
when (PREFETCH+2) downto 2 => -- raise prefetch 'di_req_o_next' signal and remove 'do_valid' |
di_req_next <= '1'; -- request data in advance to allow for pipeline delays |
sh_next(N-1 downto 1) <= sh_reg(N-2 downto 0); -- shift inner bits |
sh_next(0) <= rx_bit_reg; -- shift in rx bit into LSb |
when 1 => -- transfer rx data to do_buffer and restart if wren |
when 1 => -- transfer rx data to do_buffer and restart if wren |
di_req_next <= '1'; -- request data in advance to allow for pipeline delays |
do_buffer_next(N-1 downto 1) <= sh_reg(N-2 downto 0); -- shift rx data directly into rx buffer |
do_buffer_next(0) <= rx_bit_reg; -- shift last rx bit into rx buffer |
do_transfer_next <= '1'; -- signal transfer to do_buffer |
spi_mosi_o <= sh_reg(N-1); -- shift out last tx bit from the MSb |
if wren_i = '1' then -- load tx register if valid data present at di_i |
if wren = '1' then -- load tx register if valid data present at di_i |
state_next <= N; -- next state is top bit of new data |
sh_next <= di_reg; -- load parallel data from di_reg into shifter |
ena_sck_next <= '1'; -- SCK enabled |
wren_ack_next <= '1'; -- acknowledge data in transfer |
else |
ena_sck_next <= '0'; -- SCK disabled: tx empty, no data to send |
end if; |
when 0 => |
di_req_next <= '1'; -- will request data if shifter empty |
ena_sck_next <= '0'; -- SCK disabled: tx empty, no data to send |
di_rdy_next <= '1'; -- will request data if shifter empty |
do_valid_next <= do_transfer_reg; -- assert valid rx data after data received, when interface idle |
if wren_i = '1' then -- load tx register if valid data present at di_i |
if wren = '1' then -- load tx register if valid data present at di_i |
ena_ssel_next <= '1'; -- enable interface SSEL |
state_next <= N+1; -- start from idle: let one cycle for SSEL settling |
do_valid_next <= '0'; -- start: clear rx data valid signal |
spi_mosi_o <= di_reg(N-1); -- shift out first tx bit from the MSb |
spi_mosi_o <= di_reg(N-1); -- special case: shift out first tx bit from the MSb (look ahead) |
sh_next <= di_reg; -- load bits from di_reg into shifter |
wren_ack_next <= '1'; -- acknowledge data in transfer |
else |
ena_ssel_next <= '0'; -- deassert SSEL: interface is idle |
state_next <= 0; -- when idle, keep this state |
end if; |
when others => |
null; |
state_next <= 0; -- state 0 is safe state |
end case; |
end process core_combi_proc; |
|
418,19 → 452,38
--============================================================================================= |
-- OUTPUT LOGIC PROCESSES |
--============================================================================================= |
-- output signal connections |
spi_ssel_proc: spi_ssel_o <= not ena_ssel_reg; -- drive active-low slave select line |
do_proc : do_o <= do_buffer_reg; -- do_o always available |
do_valid_proc: do_valid_o <= do_valid_oreg; -- copy registered do_valid_o to output |
di_rdy_proc: di_rdy_o <= di_rdy_oreg; -- copy registered di_rdy_o to output |
-- data output processes |
spi_ssel_o_proc: spi_ssel_o <= not ena_ssel_reg; -- drive active-low slave select line |
do_o_proc : do_o <= do_buffer_reg; -- do_o always available |
do_valid_o_proc: do_valid_o <= do_valid_o_reg; -- copy registered do_valid_o to output |
di_req_o_proc: di_req_o <= di_req_o_reg; -- copy registered di_req_o to output |
----------------------------------------------------------------------------------------------- |
-- SCK out logic: output mux for the SPI sck |
-------------------------------------------- |
-- This is modelled as a mux instead of a register because it requires a FDCPE (ffd with preset and clear), |
-- which generates very inneficient logic in Spartan-6. Instead, we have a mux that translates to a AND gate, |
-- and can be optimized to a fast CLB gate. |
spi_sck_gen_proc : process (ena_sck_reg, spi_clk) is |
begin |
if ena_sck_reg = '1' then |
spi_sck_o <= spi_clk; -- copy the selected clock polarity |
else |
spi_sck_o <= CPOL; -- when clock disabled, set to idle polarity |
end if; |
end process spi_sck_gen_proc; |
|
--============================================================================================= |
-- DEBUG LOGIC PROCESSES |
--============================================================================================= |
-- these signals are useful for verification, and can be deleted or commented-out after debug. |
do_transfer_proc: do_transfer_o <= do_transfer_reg; |
state_dbg_proc: state_dbg_o <= std_logic_vector(to_unsigned(state_reg, 6)); -- export internal state to debug |
rx_bit_reg_proc: rx_bit_reg_o <= rx_bit_reg; |
wren_o_proc: wren_o <= wren; |
wren_ack_o_proc: wren_ack_o <= wren_ack_reg; |
sh_reg_dbg_proc: sh_reg_dbg_o <= sh_reg; -- export sh_reg to debug |
rx_bit_reg_proc: rx_bit_reg_o <= rx_bit_reg; |
core_clk_o_proc: core_clk_o <= core_clk; |
core_n_clk_o_proc: core_n_clk_o <= core_n_clk; |
|
end architecture RTL; |
|
/readme.txt
7,12 → 7,16
spi_slave.vhd spi slave module, can be used independently |
spi_loopback.vhd wrapper module for the master and slave modules |
spi_loopback_test.vhd testbench for the loopback module, test master against slave |
spi_loopback.ucf constraints file for Spartan-6, optimized for area, LUT compression. |
|
|
The original development is done in Xilinx ISE 13.1, targeted to a Spartan-6 device. |
|
Verification was done in ISIM, after Place & Route, with default constraints, for the slowest |
Spartan-6 device, tested at 50MHz for the spi_2x_clk (25MHz spi SCK), and 125MHz for the parallel |
interfaces clocks. |
Spartan-6 device, synthesis generated 59 slices, and the design was tested at 40MHz for the |
spi_2x_clk (20MHz spi SCK), and 125MHz for the parallel interfaces clocks. |
With the attached .ucf file, optimized for area and using LUT compression, synthesis generated |
44 slices, and design tested OK at 20MHz of SPI clock. |
|
|
|