##// END OF EJS Templates
MiniSpartan6:...
MiniSpartan6: added ftdi chip config to switch between UART and Async FIFO. added few WIP designs with either spwlight core, FIFO_deom IP... Libs: added SpaceWire Light IP (Works really well!) started design of ahb_ftdi_fifo -> same protocol than AHBUART but over FTDI's Async FIFO interface. This might lead to much faster transfers UP to 12MB/s.

File last commit:

r681:9d85f9f8f05a default
r681:9d85f9f8f05a default
Show More
spwrecvfront_fast.vhd
424 lines | 14.9 KiB | text/x-vhdl | VhdlLexer
--
-- Front-end for SpaceWire Receiver
--
-- This entity samples the input signals DataIn and StrobeIn to detect
-- valid bit transitions. Received bits are handed to the application
-- in groups of "rxchunk" bits at a time, synchronous to the system clock.
--
-- This receiver is based on synchronous oversampling of the input signals.
-- Inputs are sampled on the rising and falling edges of an externally
-- supplied sample clock "rxclk". Therefore the maximum bitrate of the
-- incoming signal must be significantly lower than two times the "rxclk"
-- clock frequency. The maximum incoming bitrate must also be strictly
-- lower than rxchunk times the system clock frequency.
--
-- This code is tuned for implementation on Xilinx Spartan-3.
--
-- Details
-- -------
--
-- Stage A: The inputs "spw_di" and "spw_si" are handled as DDR signals,
-- synchronously sampled on both edges of "rxclk".
--
-- Stage B: The input signals are re-registered on the rising edge of "rxclk"
-- for further processing. This implies that every rising edge of "rxclk"
-- produces two new samples of "spw_di" and two new samples of "spw_si".
--
-- Stage C: Transitions in input signals are detected by comparing the XOR
-- of data and strobe to the XOR of the previous data and strobe samples.
-- If there is a difference, we know that either data or strobe has changed
-- and the new value of data is a valid new bit. Every rising edge of "rxclk"
-- thus produces either zero, or one or two new data bits.
--
-- Stage D: Received bits are collected in groups of "rxchunk" bits
-- (unless rxchunk=1, in which case groups of 2 bits are used). Complete
-- groups are pushed into an 8-deep cyclic buffer. A 3-bit counter "headptr"
-- indicates the current position in the cyclic buffer.
--
-- The system clock domain reads bit groups from the cyclic buffer. A tail
-- pointer indicates the next location to read from the buffer. A comparison
-- between the "tailptr" and a re-synchronized copy of the "headptr" determines
-- whether valid bits are available in the buffer.
--
-- Activity detection is based on a 3-bit counter "bitcnt". This counter is
-- incremented whenever the rxclk domain receives 1 or 2 new bits. The system
-- clock domain monitors a re-synchronized copy of the activity counter to
-- determine whether it has been updated since the previous system clock cycle.
--
-- Implementation guidelines
-- -------------------------
--
-- IOB flip-flops must be used to sample spw_di and spw_si.
-- Clock skew between the IOBs for spw_di and spw_si must be minimized.
--
-- "rxclk" must be at least as fast as the system clock;
-- "rxclk" does not need to be phase-related to the system clock;
-- it is allowed for "rxclk" to be equal to the system clock.
--
-- The following timing constraints are needed:
-- * PERIOD constraint on the system clock;
-- * PERIOD constraint on "rxclk";
-- * FROM-TO constraint from "rxclk" to system clock, equal to one "rxclk" period;
-- * FROM-TO constraint from system clock to "rxclk", equal to one "rxclk" period.
--
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use work.spwpkg.all;
entity spwrecvfront_fast is
generic (
-- Number of bits to pass to the application per system clock.
rxchunk: integer range 1 to 4 );
port (
-- System clock.
clk: in std_logic;
-- Sample clock.
rxclk: in std_logic;
-- High to enable receiver; low to disable and reset receiver.
rxen: in std_logic;
-- High if there has been recent activity on the input lines.
inact: out std_logic;
-- High if inbits contains a valid group of received bits.
-- If inbvalid='1', the application must sample inbits on
-- the rising edge of clk.
inbvalid: out std_logic;
-- Received bits (bit 0 is the earliest received bit).
inbits: out std_logic_vector(rxchunk-1 downto 0);
-- Data In signal from SpaceWire bus.
spw_di: in std_logic;
-- Strobe In signal from SpaceWire bus.
spw_si: in std_logic );
-- Turn off FSM extraction.
-- Without this, XST will happily apply one-hot encoding to rrx.headptr.
attribute FSM_EXTRACT: string;
attribute FSM_EXTRACT of spwrecvfront_fast: entity is "NO";
end entity spwrecvfront_fast;
architecture spwrecvfront_arch of spwrecvfront_fast is
-- width of bit groups in cyclic buffer;
-- typically equal to rxchunk, except when rxchunk = 1
type memwidth_array_type is array(1 to 4) of integer;
constant chunk_to_memwidth: memwidth_array_type := ( 2, 2, 3, 4 );
constant memwidth: integer := chunk_to_memwidth(rxchunk);
-- registers in rxclk domain
type rxregs_type is record
-- stage B: re-register input samples
b_di0: std_ulogic;
b_si0: std_ulogic;
b_di1: std_ulogic;
b_si1: std_ulogic;
-- stage C: data/strobe decoding
c_bit: std_logic_vector(1 downto 0);
c_val: std_logic_vector(1 downto 0);
c_xor1: std_ulogic;
-- stage D: collect groups of memwidth bits
d_shift: std_logic_vector(memwidth-1 downto 0);
d_count: std_logic_vector(memwidth-1 downto 0);
-- cyclic buffer access
bufdata: std_logic_vector(memwidth-1 downto 0);
bufwrite: std_ulogic;
headptr: std_logic_vector(2 downto 0);
-- activity detection
bitcnt: std_logic_vector(2 downto 0);
end record;
-- registers in system clock domain
type regs_type is record
-- data path from buffer to output
tailptr: std_logic_vector(2 downto 0);
inbvalid: std_ulogic;
-- split 2-bit groups if rxchunk=1
splitbit: std_ulogic;
splitinx: std_ulogic;
splitvalid: std_ulogic;
-- activity detection
bitcntp: std_logic_vector(2 downto 0);
inact: std_ulogic;
-- reset signal towards rxclk domain
rxdis: std_ulogic;
end record;
constant regs_reset: regs_type := (
tailptr => "000",
inbvalid => '0',
splitbit => '0',
splitinx => '0',
splitvalid => '0',
bitcntp => "000",
inact => '0',
rxdis => '1' );
-- Signals that are re-synchronized from rxclk to system clock domain.
type syncsys_type is record
headptr: std_logic_vector(2 downto 0); -- pointer in cyclic buffer
bitcnt: std_logic_vector(2 downto 0); -- activity detection
end record;
-- Registers.
signal r: regs_type := regs_reset;
signal rin: regs_type;
signal rrx, rrxin: rxregs_type;
-- Synchronized signals after crossing clock domains.
signal syncrx_rstn: std_logic;
signal syncsys: syncsys_type;
-- Output data from cyclic buffer.
signal s_bufdout: std_logic_vector(memwidth-1 downto 0);
-- stage A: input flip-flops for rising/falling rxclk
signal s_a_di0: std_logic;
signal s_a_si0: std_logic;
signal s_a_di1: std_logic;
signal s_a_si1: std_logic;
signal s_a_di2: std_logic;
signal s_a_si2: std_logic;
-- force use of IOB flip-flops
attribute IOB: string;
attribute IOB of s_a_di1: signal is "TRUE";
attribute IOB of s_a_si1: signal is "TRUE";
attribute IOB of s_a_di2: signal is "TRUE";
attribute IOB of s_a_si2: signal is "TRUE";
begin
-- Cyclic data buffer.
bufmem: spwram
generic map (
abits => 3,
dbits => memwidth )
port map (
rclk => clk,
wclk => rxclk,
ren => '1',
raddr => r.tailptr,
rdata => s_bufdout,
wen => rrx.bufwrite,
waddr => rrx.headptr,
wdata => rrx.bufdata );
-- Synchronize reset signal for rxclk domain.
syncrx_reset: syncdff
port map ( clk => rxclk, rst => r.rxdis, di => '1', do => syncrx_rstn );
-- Synchronize signals from rxclk domain to system clock domain.
syncsys_headptr0: syncdff
port map ( clk => clk, rst => r.rxdis, di => rrx.headptr(0), do => syncsys.headptr(0) );
syncsys_headptr1: syncdff
port map ( clk => clk, rst => r.rxdis, di => rrx.headptr(1), do => syncsys.headptr(1) );
syncsys_headptr2: syncdff
port map ( clk => clk, rst => r.rxdis, di => rrx.headptr(2), do => syncsys.headptr(2) );
syncsys_bitcnt0: syncdff
port map ( clk => clk, rst => r.rxdis, di => rrx.bitcnt(0), do => syncsys.bitcnt(0) );
syncsys_bitcnt1: syncdff
port map ( clk => clk, rst => r.rxdis, di => rrx.bitcnt(1), do => syncsys.bitcnt(1) );
syncsys_bitcnt2: syncdff
port map ( clk => clk, rst => r.rxdis, di => rrx.bitcnt(2), do => syncsys.bitcnt(2) );
-- sample inputs on rising edge of rxclk
process (rxclk) is
begin
if rising_edge(rxclk) then
s_a_di1 <= spw_di;
s_a_si1 <= spw_si;
end if;
end process;
-- sample inputs on falling edge of rxclk
process (rxclk) is
begin
if falling_edge(rxclk) then
s_a_di2 <= spw_di;
s_a_si2 <= spw_si;
-- reregister inputs in fabric flip-flops
s_a_di0 <= s_a_di2;
s_a_si0 <= s_a_si2;
end if;
end process;
-- combinatorial process
process (r, rrx, rxen, syncrx_rstn, syncsys, s_bufdout, s_a_di0, s_a_si0, s_a_di1, s_a_si1)
variable v: regs_type;
variable vrx: rxregs_type;
begin
v := r;
vrx := rrx;
-- ---- SAMPLE CLOCK DOMAIN ----
-- stage B: re-register input samples
vrx.b_di0 := s_a_di0;
vrx.b_si0 := s_a_si0;
vrx.b_di1 := s_a_di1;
vrx.b_si1 := s_a_si1;
-- stage C: decode data/strobe and detect valid bits
if (rrx.b_di0 xor rrx.b_si0 xor rrx.c_xor1) = '1' then
vrx.c_bit(0) := rrx.b_di0;
else
vrx.c_bit(0) := rrx.b_di1;
end if;
vrx.c_bit(1) := rrx.b_di1;
vrx.c_val(0) := (rrx.b_di0 xor rrx.b_si0 xor rrx.c_xor1) or
(rrx.b_di0 xor rrx.b_si0 xor rrx.b_di1 xor rrx.b_si1);
vrx.c_val(1) := (rrx.b_di0 xor rrx.b_si0 xor rrx.c_xor1) and
(rrx.b_di0 xor rrx.b_si0 xor rrx.b_di1 xor rrx.b_si1);
vrx.c_xor1 := rrx.b_di1 xor rrx.b_si1;
-- Note:
-- c_val = "00" if no new bits are received
-- c_val = "01" if one new bit is received; the new bit is in c_bit(0)
-- c_val = "11" if two new bits are received
-- stage D: collect groups of memwidth bits
if rrx.c_val(0) = '1' then
-- shift incoming bits into register
if rrx.c_val(1) = '1' then
vrx.d_shift := rrx.c_bit & rrx.d_shift(memwidth-1 downto 2);
else
vrx.d_shift := rrx.c_bit(0) & rrx.d_shift(memwidth-1 downto 1);
end if;
-- prepare to store a group of memwidth bits
if rrx.d_count(0) = '1' then
-- only one more bit needed
vrx.bufdata := rrx.c_bit(0) & rrx.d_shift(memwidth-1 downto 1);
else
vrx.bufdata := rrx.c_bit & rrx.d_shift(memwidth-1 downto 2);
end if;
-- countdown nr of needed bits (one-hot counter)
if rrx.c_val(1) = '1' then
vrx.d_count := rrx.d_count(1 downto 0) & rrx.d_count(memwidth-1 downto 2);
else
vrx.d_count := rrx.d_count(0 downto 0) & rrx.d_count(memwidth-1 downto 1);
end if;
end if;
-- stage D: store groups of memwidth bits
vrx.bufwrite := rrx.c_val(0) and (rrx.d_count(0) or (rrx.c_val(1) and rrx.d_count(1)));
-- Increment head pointer.
if rrx.bufwrite = '1' then
vrx.headptr := std_logic_vector(unsigned(rrx.headptr) + 1);
end if;
-- Activity detection.
if rrx.c_val(0) = '1' then
vrx.bitcnt := std_logic_vector(unsigned(rrx.bitcnt) + 1);
end if;
-- Synchronous reset of rxclk domain.
if syncrx_rstn = '0' then
vrx.c_val := "00";
vrx.c_xor1 := '0';
vrx.d_count := (others => '0');
vrx.d_count(memwidth-1) := '1';
vrx.bufwrite := '0';
vrx.headptr := "000";
vrx.bitcnt := "000";
end if;
-- ---- SYSTEM CLOCK DOMAIN ----
-- Compare tailptr to headptr to decide whether there is new data.
-- If the values are equal, we are about to read a location which has
-- not yet been written by the rxclk domain.
if r.tailptr = syncsys.headptr then
-- No more data in cyclic buffer.
v.inbvalid := '0';
else
-- Reading valid data from cyclic buffer.
v.inbvalid := '1';
-- Increment tail pointer.
if rxchunk /= 1 then
v.tailptr := std_logic_vector(unsigned(r.tailptr) + 1);
end if;
end if;
-- If rxchunk=1, split 2-bit groups into separate bits.
if rxchunk = 1 then
-- Select one of the two bits.
if r.splitinx = '0' then
v.splitbit := s_bufdout(0);
else
v.splitbit := s_bufdout(1);
end if;
-- Indicate valid bit.
v.splitvalid := r.inbvalid;
-- Increment tail pointer.
if r.inbvalid = '1' then
v.splitinx := not r.splitinx;
if r.splitinx = '0' then
v.tailptr := std_logic_vector(unsigned(r.tailptr) + 1);
end if;
end if;
end if;
-- Activity detection.
v.bitcntp := syncsys.bitcnt;
if r.bitcntp = syncsys.bitcnt then
v.inact := '0';
else
v.inact := '1';
end if;
-- Synchronous reset of system clock domain.
if rxen = '0' then
v := regs_reset;
end if;
-- Register rxen to ensure glitch-free signal to rxclk domain
v.rxdis := not rxen;
-- drive outputs
inact <= r.inact;
if rxchunk = 1 then
inbvalid <= r.splitvalid;
inbits(0) <= r.splitbit;
else
inbvalid <= r.inbvalid;
inbits <= s_bufdout;
end if;
-- update registers
rrxin <= vrx;
rin <= v;
end process;
-- update registers on rising edge of rxclk
process (rxclk) is
begin
if rising_edge(rxclk) then
rrx <= rrxin;
end if;
end process;
-- update registers on rising edge of system clock
process (clk) is
begin
if rising_edge(clk) then
r <= rin;
end if;
end process;
end architecture spwrecvfront_arch;