OpenCores
URL https://opencores.org/ocsvn/async_sdm_noc/async_sdm_noc/trunk

Subversion Repositories async_sdm_noc

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /async_sdm_noc/branches/clos_opt/sdm/src
    from Rev 47 to Rev 57
    Reverse comparison

Rev 47 → Rev 57

/input_buf.v
0,0 → 1,266
/*
Asynchronous SDM NoC
(C)2011 Wei Song
Advanced Processor Technologies Group
Computer Science, the Univ. of Manchester, UK
Authors:
Wei Song wsong83@gmail.com
License: LGPL 3.0 or later
Input buffer for Wormhole/SDM routers.
*** SystemVerilog is used ***
References
* Lookahead pipelines
Montek Singh and Steven M. Nowick, The design of high-performance dynamic asynchronous pipelines: lookahead style, IEEE Transactions on Very Large Scale Integration (VLSI) Systems, 2007(15), 1256-1269. doi:10.1109/TVLSI.2007.902205
* Channel slicing
Wei Song and Doug Edwards, A low latency wormhole router for asynchronous on-chip networks, Asia and South Pacific Design Automation Conference, 2010, 437-443.
* SDM
Wei Song and Doug Edwards, Asynchronous spatial division multiplexing router, Microprocessors and Microsystems, 2011(35), 85-97.
History:
05/05/2009 Initial version. <wsong83@gmail.com>
20/09/2010 Supporting channel slicing and SDM using macro difinitions. <wsong83@gmail.com>
24/05/2011 Clean up for opensource. <wsong83@gmail.com>
01/06/2011 Use the comp4 common comparator rather than the chain_comparator defined in this module. <wsong83@gmail.com>
*/
 
// the router structure definitions
`include "define.v"
 
module inp_buf (/*AUTOARG*/
// Outputs
o0, o1, o2, o3, o4, ia, arb_r,
// Inputs
rst_n, i0, i1, i2, i3, i4, oa, addrx, addry, arb_ra
);
 
//-------------------------- parameters ---------------------------------------//
parameter DIR = 0; // the port direction: south, west, north, east, and local
parameter RN = 4; // the number of request outputs, must match the direction
parameter DW = 16; // the data-width of the data-path
parameter PD = 2; // the depth of the input buffer
parameter SCN = DW/2;
 
//-------------------------- I/O ports ---------------------------------------//
input rst_n; // global reset, active low
input [SCN-1:0] i0, i1, i2, i3; // data input
output [SCN-1:0] o0, o1, o2, o3; // data output
`ifdef ENABLE_CHANNEL_SLICING
input [SCN-1:0] i4, oa;
output [SCN-1:0] o4, ia;
`else
input i4, oa;
output o4, ia;
`endif
input [7:0] addrx, addry;
output [RN-1:0] arb_r;
input arb_ra;
//-------------------------- control signals ---------------------------------------//
wire rten; // routing enable
wire frame_end; // identify the end of a frame
wire [7:0] pipe_xd, pipe_yd; // the target address from the incoming frame
wire [PD:0][SCN-1:0] pd0, pd1, pd2, pd3; // data wires for the internal pipeline satges
wire [5:0] raw_dec; // the routing decision from the comparator
wire [4:0] dec_reg; // the routing decision kept by C-gates
wire x_equal; // addr x = target x
wire rt_err; // route decoder error
wire rt_ack; // route build ack
`ifdef ENABLE_CHANNEL_SLICING
wire [SCN-1:0] rtrst; // rt decoder reset for each sub-channel
wire [PD:0][SCN-1:0] pd4, pda, pdan; // data wires for the internal pipeline stages
 
`else
wire rtrst; // rt decode reset
wire [PD:0] pd4, pda, pdan; // data wires for the internal pipeline satges
`endif // !`ifdef ENABLE_CHANNEL_SLICING
 
genvar i, j;
 
//------------------------- pipelines ------------------------------------- //
generate for(i=0; i<PD; i++) begin: DP
`ifdef ENABLE_CHANNEL_SLICING
for(j=0; j<SCN; j++) begin: SC
pipe4 #(.DW(2))
P (
.o0 ( pd0[i][j] ),
.o1 ( pd1[i][j] ),
.o2 ( pd2[i][j] ),
.o3 ( pd3[i][j] ),
.o4 ( pd4[i][j] ),
.ia ( pda[i+1][j] ),
.i0 ( pd0[i+1][j] ),
.i1 ( pd1[i+1][j] ),
.i2 ( pd2[i+1][j] ),
.i3 ( pd3[i+1][j] ),
.i4 ( pd4[i+1][j] ),
.oa ( pdan[i][j] )
);
end // block: SC
 
`else // !`ifdef ENABLE_CHANNEL_SLICING
pipe4 #(.DW(DW))
P (
.o0 ( pd0[i] ),
.o1 ( pd1[i] ),
.o2 ( pd2[i] ),
.o3 ( pd3[i] ),
.o4 ( pd4[i] ),
.ia ( pda[i+1] ),
.i0 ( pd0[i+1] ),
.i1 ( pd1[i+1] ),
.i2 ( pd2[i+1] ),
.i3 ( pd3[i+1] ),
.i4 ( pd4[i+1] ),
.oa ( pdan[i] )
);
`endif // !`ifdef ENABLE_CHANNEL_SLICING
end // block: DP
endgenerate
 
generate for(i=1; i<PD; i++) begin: DPA
assign pdan[i] = rst_n ? ~(pda[i]|pd4[i-1]) : 0;
end
endgenerate
 
assign ia = pda[PD]|pd4[PD-1];
assign pd0[PD] = i0;
assign pd1[PD] = i1;
assign pd2[PD] = i2;
assign pd3[PD] = i3;
assign pd4[PD] = i4;
assign o0 = pd0[0];
assign o1 = pd1[0];
assign o2 = pd2[0];
assign o3 = pd3[0];
assign o4 = pd4[0];
//---------------------------- route decoder related -------------------------- //
// fetch the x and y target
and Px_0 (pipe_xd[0], rten, pd0[1][0]);
and Px_1 (pipe_xd[1], rten, pd1[1][0]);
and Px_2 (pipe_xd[2], rten, pd2[1][0]);
and Px_3 (pipe_xd[3], rten, pd3[1][0]);
and Px_4 (pipe_xd[4], rten, pd0[1][1]);
and Px_5 (pipe_xd[5], rten, pd1[1][1]);
and Px_6 (pipe_xd[6], rten, pd2[1][1]);
and Px_7 (pipe_xd[7], rten, pd3[1][1]);
and Py_0 (pipe_yd[0], rten, pd0[1][2]);
and Py_1 (pipe_yd[1], rten, pd1[1][2]);
and Py_2 (pipe_yd[2], rten, pd2[1][2]);
and Py_3 (pipe_yd[3], rten, pd3[1][2]);
and Py_4 (pipe_yd[4], rten, pd0[1][3]);
and Py_5 (pipe_yd[5], rten, pd1[1][3]);
and Py_6 (pipe_yd[6], rten, pd2[1][3]);
and Py_7 (pipe_yd[7], rten, pd3[1][3]);
 
routing_decision // the comparator
RTD(
.addrx ( addrx )
,.addry ( addry )
,.pipe_xd ( pipe_xd )
,.pipe_yd ( pipe_yd )
,.decision ( raw_dec )
);
 
// keep the routing decision until the tail flit is received by all sub-channels
c2p C_RTD0 ( .b(raw_dec[0]), .a((~frame_end)&rst_n), .q(dec_reg[0]));
c2p C_RTD1 ( .b(raw_dec[1]), .a((~frame_end)&rst_n), .q(dec_reg[1]));
c2p C_RT_XEQ (.b(raw_dec[2]), .a((~frame_end)&rst_n), .q(x_equal) );
c2p C_RTD2 ( .b(raw_dec[3]), .a(x_equal), .q(dec_reg[2]));
c2p C_RTD3 ( .b(raw_dec[4]), .a(x_equal), .q(dec_reg[3]));
c2p C_RTD4 ( .b(raw_dec[5]), .a(x_equal), .q(dec_reg[4]));
 
// generate the arbiter request signals
assign arb_r =
DIR == 0 ? {dec_reg[4],dec_reg[2],dec_reg[1],dec_reg[3]} : // south port
DIR == 1 ? {dec_reg[4],dec_reg[2]} : // west port
DIR == 2 ? {dec_reg[4],dec_reg[2],dec_reg[3],dec_reg[0]} : // north port
DIR == 3 ? {dec_reg[4],dec_reg[3]} : // east port
{dec_reg[2],dec_reg[1],dec_reg[3],dec_reg[0]} ; // local port
 
assign rt_err =
DIR == 0 ? |{dec_reg[0]} : // south port
DIR == 1 ? |{dec_reg[0],dec_reg[1],dec_reg[3]} : // west port
DIR == 2 ? |{dec_reg[1]} : // north port
DIR == 3 ? |{dec_reg[0],dec_reg[1],dec_reg[2]} : // east port
|{dec_reg[4]} ; // local port
 
or IP_RTACK (rt_ack, rt_err, arb_ra);
 
// ------------------------ pipeline control ------------------------------ //
`ifdef ENABLE_CHANNEL_SLICING
for(j=0; j<SCN; j++) begin: SC
// the sub-channel controller
subc_ctl SCH_C (
.nack ( pdan[0][j] ),
.rt_rst ( rtrst[j] ),
.ai2cb ( oa[j] ),
.ack ( pda[1][j] ),
.eof ( pd4[0][j] ),
.rt_ra ( rt_ack ),
.rt_err ( rt_err ),
.rst_n ( rst_n )
);
end // block: SC
`else // !`ifdef ENABLE_CHANNEL_SLICING
subc_ctl SCH_C (
.nack ( pdan[0] ),
.rt_rst ( rtrst ),
.ai2cb ( oa ),
.ack ( pda[1] ),
.eof ( pd4[0] ),
.rt_ra ( rt_ack ),
.rt_err ( rt_err ),
.rst_n ( rst_n )
);
`endif // !`ifdef ENABLE_CHANNEL_SLICING
// the router controller part
assign rten = ~rt_ack;
assign frame_end = &rtrst;
 
endmodule // inp_buf
 
 
// the routing decision making procedure, comparitors
module routing_decision (
addrx
,addry
,pipe_xd
,pipe_yd
,decision
);
 
// compare with (2,3)
input [7:0] addrx;
input [7:0] addry;
input [7:0] pipe_xd;
input [7:0] pipe_yd;
output [5:0] decision;
 
wire [2:0] x_cmp [1:0];
wire [2:0] y_cmp [1:0];
 
comp4 X0 ( .a(pipe_xd[3:0]), .b(addrx[3:0]), .q(x_cmp[0]));
comp4 X1 ( .a(pipe_xd[7:4]), .b(addrx[7:4]), .q(x_cmp[1]));
comp4 Y0 ( .a(pipe_yd[3:0]), .b(addry[3:0]), .q(y_cmp[0]));
comp4 Y1 ( .a(pipe_yd[7:4]), .b(addry[7:4]), .q(y_cmp[1]));
 
assign decision[0] = x_cmp[1][0] | (x_cmp[1][2]&x_cmp[0][0]); // frame x > addr x
assign decision[1] = x_cmp[1][1] | (x_cmp[1][2]&x_cmp[0][1]); // frame x < addr x
assign decision[2] = x_cmp[1][2] & x_cmp[0][2]; // frame x = addr x
assign decision[3] = y_cmp[1][0] | (y_cmp[1][2]&y_cmp[0][0]); // frame y > addr y
assign decision[4] = y_cmp[1][1] | (y_cmp[1][2]&y_cmp[0][1]); // frame y < addr y
assign decision[5] = y_cmp[1][2] & y_cmp[0][2]; // frame y = addr y
 
endmodule // routing_decision
/im_alloc.v
0,0 → 1,126
/*
Asynchronous SDM NoC
(C)2011 Wei Song
Advanced Processor Technologies Group
Computer Science, the Univ. of Manchester, UK
Authors:
Wei Song wsong83@gmail.com
License: LGPL 3.0 or later
IM allocator (the IM dispatcher in the thesis)
*** SystemVerilog is used ***
References
For the detail structure, please refer to Section 6.3.1 of the thesis:
Wei Song, Spatial parallelism in the routers of asynchronous on-chip networks, PhD thesis, the University of Manchester, 2011.
History:
05/09/2009 Initial version. <wsong83@gmail.com>
10/10/2009 Add the reset port. <wsong83@gmail.com>
05/11/2009 Speed up the arbiter. <wsong83@gmail.com>
10/06/2010 [Major] change to use PIM structure. <wsong83@gmail.com>
23/08/2010 Fix the non-QDI request withdraw process. <wsong83@gmail.com>
27/05/2011 Clean up for opensource. <wsong83@gmail.com>
*/
 
// the router structure definitions
`include "define.v"
 
module im_alloc (/*AUTOARG*/
`ifndef ENABLE_CRRD
CMs,
`endif
// Outputs
IMa, cfg,
// Inputs
IMr, rst_n
) ;
// parameters
parameter VCN = 2; // the number of virtual circuits on one port
parameter CMN = 2; // the number of central modules
parameter SN = 2; // the possible output port choice of a port
 
input [VCN-1:0][SN-1:0] IMr; // the requests from virtual circuits
output [VCN-1:0] IMa; // switch ready, ack for the request
 
`ifndef ENABLE_CRRD
input [CMN-1:0][SN-1:0] CMs; // the states from CMs
`endif
input rst_n; // the negtive active reset
 
output [CMN-1:0][VCN-1:0] cfg; // the matrix configuration signals
 
// internal wires
`ifdef ENABLE_CRRD
`ifdef ENABLE_MRMA
wire [VCN-1:0] IPr; // request to the MRMA
wire [CMN-1:0] OPrdy, OPblk; // OP ready and blocked status
wire [CMN:0] OPrst_n; // the buffered resets to avoid metastability
`else
wire [VCN-1:0][CMN-1:0] IPr; // request to the MNMA
`endif
`else
// using the feedback from CMs
wire [VCN-1:0][CMN-1:0][SN-1:0] IPrm; // to generate the practical IPr
wire [VCN-1:0][CMN-1:0] IPr;
`endif
// generate variables
genvar i, j, k;
 
//----------------------------------------
// the PIM crossbar allocator
`ifndef ENABLE_MRMA
mnma #(.N(VCN), .M(CMN))
PIMA (
.cfg ( cfg ),
.r ( IPr ),
.ra ( IMa )
);
 
generate
for(i=0; i<VCN; i++) begin: IPC
for(j=0; j<CMN; j++) begin: OPC
`ifdef ENABLE_CRRD
assign IPr[i][j] = |IMr[i];
`else
assign IPr[i][j] = |IPrm[i][j];
for(k=0; k<SN; k++) begin: DIRC
c2p IPRen (.q(IPrm[i][j][k]), .a(IMr[i][k]), .b(~CMs[j][k]));
end
`endif
end
end // block: IPC
endgenerate
`else
mrma #(.N(VCN), .M(CMN))
PIMA (
.ca ( IMa ),
.ra ( OPblk ),
.cfg ( cfg ),
.c ( IPr ),
.r ( OPrdy ),
.rst_n ( rst_n )
);
generate
for(i=0; i<CMN; i++) begin: OPC
delay DLY ( .q(OPrst_n[i+1]), .a(OPrst_n[i])); // dont touch
assign OPrdy[i] = (~OPblk[i])&OPrst_n[i+1];
end
 
for(i=0; i<VCN; i++) begin: IPC
assign IPr[i] = |IMr[i];
end
endgenerate
assign OPrst_n[0] = rst_n;
`endif // !`ifndef ENABLE_MRMA
endmodule // im_alloc
/clos_sch.v
0,0 → 1,232
/*
Asynchronous SDM NoC
(C)2011 Wei Song
Advanced Processor Technologies Group
Computer Science, the Univ. of Manchester, UK
Authors:
Wei Song wsong83@gmail.com
License: LGPL 3.0 or later
Clos scheduler
*** SystemVerilog is used ***
References
For the detail structure, please refer to Section 6.3.1 of the thesis:
Wei Song, Spatial parallelism in the routers of asynchronous on-chip networks, PhD thesis, the University of Manchester, 2011.
History:
11/12/2009 Initial version. <wsong83@gmail.com>
10/06/2010 Change to use PIM structure <wsong83@gmail.com>
23/08/2010 Fix the non-QDI request withdraw process <wsong83@gmail.com>
23/09/2010 Modified for Clos SDM router <wsong83@gmail.com>
27/05/2011 Clean up for opensource. <wsong83@gmail.com>
*/
 
// the router structure definitions
`include "define.v"
 
module clos_sch (/*AUTOARG*/
// Outputs
sack, wack, nack, eack, lack, imc, scfg, ncfg, wcfg, ecfg, lcfg,
// Inputs
sreq, nreq, lreq, wreq, ereq, rst_n
);
 
parameter M = 2; // the number of CMs
parameter N = 2; // the number of ports in IMs/OMs
 
// reuests from all input buffers
input [N-1:0][3:0] sreq, nreq, lreq;
input [N-1:0][1:0] wreq, ereq;
 
// ack to input buffers
output [N-1:0] sack, wack, nack, eack, lack;
 
// IM acks
wire [4:0][N-1:0] imra;
wire [4:0][N-1:0] cmra;
 
// IM cfgs and CM cfgs
output [4:0][M-1:0][N-1:0] imc;
output [M-1:0][1:0] scfg, ncfg;
output [M-1:0][3:0] wcfg, ecfg, lcfg;
 
input rst_n; // reset, active low
 
// the requests from IMs to CMs
wire [M-1:0][1:0] wr, er;
wire [M-1:0][3:0] sr, nr, lr;
wire [M-1:0] sra, wra, nra, era, lra;
`ifndef ENABLE_CRRD
wire [M-1:0][4:0] cms; // the states from CMs
 
wire [M-1:0][3:0] scms, ncms, lcms;
wire [M-1:0][1:0] wcms, ecms;
`endif
genvar i;
 
// IM schedulers
im_alloc #(.VCN(N), .CMN(M), .SN(4))
SIM (
.IMr ( sreq ),
.IMa ( imra[0] ),
`ifndef ENABLE_CRRD
.CMs ( scms ),
`endif
.cfg ( imc[0] ),
.rst_n ( rst_n )
);
rcb #(.NN(N), .MN(M), .DW(4))
SRIM (
.ireq ( sreq ),
.ira ( cmra[0] ),
.oreq ( sr ),
.ora ( sra ),
.cfg ( imc[0] )
);
 
// the C-element to force the request withdrawal sequence
generate for(i=0; i<N; i++) begin: SA
c2 UA (.q(sack[i]), .a0(imra[0][i]), .a1(cmra[0][i]));
end endgenerate
 
im_alloc #(.VCN(N), .CMN(M), .SN(2))
WIM (
.IMr ( wreq ),
.IMa ( imra[1] ),
`ifndef ENABLE_CRRD
.CMs ( wcms ),
`endif
.cfg ( imc[1] ),
.rst_n ( rst_n )
);
 
rcb #(.NN(N), .MN(M), .DW(2))
WRIM (
.ireq ( wreq ),
.ira ( cmra[1] ),
.oreq ( wr ),
.ora ( wra ),
.cfg ( imc[1] )
);
 
generate for(i=0; i<N; i++) begin: WA
c2 UA (.q(wack[i]), .a0(imra[1][i]), .a1(cmra[1][i]));
end endgenerate
 
im_alloc #(.VCN(N), .CMN(M), .SN(4))
NIM (
.IMr ( nreq ),
.IMa ( imra[2] ),
`ifndef ENABLE_CRRD
.CMs ( ncms ),
`endif
.cfg ( imc[2] ),
.rst_n ( rst_n )
);
 
rcb #(.NN(N), .MN(M), .DW(4))
NRIM (
.ireq ( nreq ),
.ira ( cmra[2] ),
.oreq ( nr ),
.ora ( nra ),
.cfg ( imc[2] )
);
 
generate for(i=0; i<N; i++) begin: NA
c2 UA (.q(nack[i]), .a0(imra[2][i]), .a1(cmra[2][i]));
end endgenerate
 
im_alloc #(.VCN(N), .CMN(M), .SN(2))
EIM (
.IMr ( ereq ),
.IMa ( imra[3] ),
`ifndef ENABLE_CRRD
.CMs ( ecms ),
`endif
.cfg ( imc[3] ),
.rst_n ( rst_n )
);
 
rcb #(.NN(N), .MN(M), .DW(2))
ERIM (
.ireq ( ereq ),
.ira ( cmra[3] ),
.oreq ( er ),
.ora ( era ),
.cfg ( imc[3] )
);
 
generate for(i=0; i<N; i++) begin: EA
c2 UA (.q(eack[i]), .a0(imra[3][i]), .a1(cmra[3][i]));
end endgenerate
 
im_alloc #(.VCN(N), .CMN(M), .SN(4))
LIM (
.IMr ( lreq ),
.IMa ( imra[4] ),
`ifndef ENABLE_CRRD
.CMs ( lcms ),
`endif
.cfg ( imc[4] ),
.rst_n ( rst_n )
);
 
rcb #(.NN(N), .MN(M), .DW(4))
LRIM (
.ireq ( lreq ),
.ira ( cmra[4] ),
.oreq ( lr ),
.ora ( lra ),
.cfg ( imc[4] )
);
 
generate for(i=0; i<N; i++) begin: LA
c2 UA (.q(lack[i]), .a0(imra[4][i]), .a1(cmra[4][i]));
end endgenerate
 
// CM schedulers
generate
for(i=0; i<M; i=i+1) begin: CMSch
cm_alloc S (
.sra ( sra[i] ),
.wra ( wra[i] ),
.nra ( nra[i] ),
.era ( era[i] ),
.lra ( lra[i] ),
.scfg ( scfg[i] ),
.ncfg ( ncfg[i] ),
.wcfg ( wcfg[i] ),
.ecfg ( ecfg[i] ),
.lcfg ( lcfg[i] ),
`ifndef ENABLE_CRRD
.s ( cms[i] ),
`endif
.wr ( wr[i] ),
.er ( er[i] ),
.sr ( sr[i] ),
.nr ( nr[i] ),
.lr ( lr[i] )
);
`ifndef ENABLE_CRRD
assign scms[i] = {cms[i][4], cms[i][3], cms[i][2], cms[i][1]};
assign wcms[i] = {cms[i][4], cms[i][3]};
assign ncms[i] = {cms[i][4], cms[i][3], cms[i][1], cms[i][0]};
assign ecms[i] = {cms[i][4], cms[i][1]};
assign lcms[i] = {cms[i][3], cms[i][2], cms[i][1], cms[i][0]};
`endif
end
endgenerate
 
endmodule // clos_sch
 
/subc_ctl.v
0,0 → 1,68
/*
Asynchronous SDM NoC
(C)2011 Wei Song
Advanced Processor Technologies Group
Computer Science, the Univ. of Manchester, UK
Authors:
Wei Song wsong83@gmail.com
License: LGPL 3.0 or later
Sub-channel controller
References
* Lookahead pipelines
Montek Singh and Steven M. Nowick, The design of high-performance dynamic asynchronous pipelines: lookahead style, IEEE Transactions on Very Large Scale Integration (VLSI) Systems, 2007(15), 1256-1269. doi:10.1109/TVLSI.2007.902205
* Channel slicing
Wei Song and Doug Edwards, A low latency wormhole router for asynchronous on-chip networks, Asia and South Pacific Design Automation Conference, 2010, 437-443.
For the detail structure, please refer to Section 7.1.1 of the thesis:
Wei Song, Spatial parallelism in the routers of asynchronous on-chip networks, PhD thesis, the University of Manchester, 2011.
History:
05/05/2009 Initial version. <wsong83@gmail.com>
22/10/2010 Make it more timing robust. <wsong83@gmail.com>
24/05/2011 Clean up for opensource. <wsong83@gmail.com>
*/
 
// the router structure definitions
`include "define.v"
 
module subc_ctl (/*AUTOARG*/
// Outputs
nack, rt_rst,
// Inputs
ai2cb, ack, eof, rt_ra, rt_err, rst_n
);
 
input ai2cb; // the ack from output ports
input ack; // the ack from the last stage of the input buffer
input eof; // the eof bit from the last stage of the input buffer
input rt_ra; // ack from the switch allocator
input rt_err; // invalid router decision
input rst_n; // the global active low reset signal
output nack; // the ack to the last stage of the input buffer
output rt_rst; // the router reset signal
wire csc; // internal wires to handle the CSC of the STG
wire acko; // the ack signal after the C2N gate
wire fend; // the end of frame indicator
wire acken; // active low ack enable
`ifdef ENABLE_LOOKAHEAD
c2n CD (.q(acko), .a(ai2cb), .b(ack)); // the C2N gate to avoid early withdrawal
`else
assign acko = ai2cb;
`endif
c2p CEN (.b(eof), .a(acko), .q(fend));
c2 C (.a0(rt_ra), .a1(fend), .q(csc));
nand U1 ( acken, rt_ra, ~csc);
nor U2 ( rt_rst, fend, ~csc);
nor AG ( nack, acko&(~eof), acken|(rt_err&ack), ~rst_n);
endmodule // subc_ctl
 
/router.v
0,0 → 1,497
/*
Asynchronous SDM NoC
(C)2011 Wei Song
Advanced Processor Technologies Group
Computer Science, the Univ. of Manchester, UK
Authors:
Wei Song wsong83@gmail.com
License: LGPL 3.0 or later
Wormhole/SDM router top level module
*** SystemVerilog is used ***
History:
28/05/2009 Initial version. <wsong83@gmail.com>
23/09/2010 Supporting channel slicing and SDM using macro difinitions. <wsong83@gmail.com>
22/10/2010 Parameterize the number of pipelines in output buffers. <wsong83@gmail.com>
25/05/2011 Clean up for opensource. <wsong83@gmail.com>
*/
 
// the router structure definitions
`include "define.v"
 
module router(/*AUTOARG*/
// Outputs
so0, so1, so2, so3, wo0, wo1, wo2, wo3, no0, no1, no2, no3, eo0,
eo1, eo2, eo3, lo0, lo1, lo2, lo3, so4, wo4, no4, eo4, lo4, sia,
wia, nia, eia, lia,
// Inputs
si0, si1, si2, si3, wi0, wi1, wi2, wi3, ni0, ni1, ni2, ni3, ei0,
ei1, ei2, ei3, li0, li1, li2, li3, si4, wi4, ni4, ei4, li4, soa,
woa, noa, eoa, loa, addrx, addry, rst_n
);
 
parameter VCN = 1; // number of virtual circuits in each direction. When VCN == 1, it is a wormhole router
parameter DW = 32; // the datawidth of a single virtual circuit, the total data width of the router is DW*VCN
parameter IPD = 1; // the number of half-buffer stages in input buffers
parameter OPD = 2; // the number of half-buffer stages in output buffers
parameter SCN = DW/2; // the number of 1-of-4 sub-channel in each virtual circuit
 
input [VCN-1:0][SCN-1:0] si0, si1, si2, si3; // south input [0], X+1
input [VCN-1:0][SCN-1:0] wi0, wi1, wi2, wi3; // west input [1], Y-1
input [VCN-1:0][SCN-1:0] ni0, ni1, ni2, ni3; // north input [2], X-1
input [VCN-1:0][SCN-1:0] ei0, ei1, ei2, ei3; // east input [3], Y+1
input [VCN-1:0][SCN-1:0] li0, li1, li2, li3; // local input
output [VCN-1:0][SCN-1:0] so0, so1, so2, so3; // south output
output [VCN-1:0][SCN-1:0] wo0, wo1, wo2, wo3; // west output
output [VCN-1:0][SCN-1:0] no0, no1, no2, no3; // north output
output [VCN-1:0][SCN-1:0] eo0, eo1, eo2, eo3; // east output
output [VCN-1:0][SCN-1:0] lo0, lo1, lo2, lo3; // local output
// eof bits and ack lines
`ifdef ENABLE_CHANNEL_SLICING
input [VCN-1:0][SCN-1:0] si4, wi4, ni4, ei4, li4;
output [VCN-1:0][SCN-1:0] so4, wo4, no4, eo4, lo4;
output [VCN-1:0][SCN-1:0] sia, wia, nia, eia, lia;
input [VCN-1:0][SCN-1:0] soa, woa, noa, eoa, loa;
`else
input [VCN-1:0] si4, wi4, ni4, ei4, li4;
output [VCN-1:0] so4, wo4, no4, eo4, lo4;
output [VCN-1:0] sia, wia, nia, eia, lia;
input [VCN-1:0] soa, woa, noa, eoa, loa;
`endif // !`ifdef ENABLE_CHANNEL_SLICING
 
input [7:0] addrx, addry; // the local address of the router, coded in 1-of-4 coding
input rst_n; // active low reset signal
 
// internal wires, input buffers to switches (crossbar): [dir]2[cb][1-of-4 index]
wire [VCN-1:0][SCN-1:0] s2c0, s2c1, s2c2, s2c3; // south input to switch data
wire [VCN-1:0][SCN-1:0] w2c0, w2c1, w2c2, w2c3;
wire [VCN-1:0][SCN-1:0] n2c0, n2c1, n2c2, n2c3;
wire [VCN-1:0][SCN-1:0] e2c0, e2c1, e2c2, e2c3;
wire [VCN-1:0][SCN-1:0] l2c0, l2c1, l2c2, l2c3;
// internal wires, switches (crossbar) to output buffers: [cb]2[dir][1-of-4 index]
wire [VCN-1:0][SCN-1:0] c2s0, c2s1, c2s2, c2s3;
wire [VCN-1:0][SCN-1:0] c2w0, c2w1, c2w2, c2w3;
wire [VCN-1:0][SCN-1:0] c2n0, c2n1, c2n2, c2n3; // switch to north output
wire [VCN-1:0][SCN-1:0] c2e0, c2e1, c2e2, c2e3;
wire [VCN-1:0][SCN-1:0] c2l0, c2l1, c2l2, c2l3;
 
// internal wires for ack and eof bits
`ifdef ENABLE_CHANNEL_SLICING
wire [VCN-1:0][SCN-1:0] s2c4, w2c4, n2c4, e2c4, l2c4;
wire [VCN-1:0][SCN-1:0] c2s4, c2w4, c2n4, c2e4, c2l4;
wire [VCN-1:0][SCN-1:0] s2ca, w2ca, n2ca, e2ca, l2ca;
wire [VCN-1:0][SCN-1:0] c2sa, c2wa, c2na, c2ea, c2la;
`else
wire [VCN-1:0] s2c4, w2c4, n2c4, e2c4, l2c4;
wire [VCN-1:0] c2s4, c2w4, c2n4, c2e4, c2l4;
wire [VCN-1:0] s2ca, w2ca, n2ca, e2ca, l2ca;
wire [VCN-1:0] c2sa, c2wa, c2na, c2ea, c2la;
`endif // !`ifdef ENABLE_CHANNEL_SLICING
 
// the requests/acks from/to input buffers to switch allocators
wire [VCN-1:0][3:0] sreq, nreq, lreq;
wire [VCN-1:0][1:0] wreq, ereq;
wire [VCN-1:0] sack, wack, nack, eack, lack;
 
// configuration bits for the switches
`ifdef ENABLE_CLOS
wire [4:0][VCN-1:0][VCN-1:0] imcfg;
wire [VCN-1:0][1:0] scfg, ncfg;
wire [VCN-1:0][3:0] wcfg, ecfg, lcfg;
`else // normal crossbar based SDM
wire [VCN-1:0][2*VCN-1:0] scfg, ncfg;
wire [VCN-1:0][4*VCN-1:0] wcfg, ecfg, lcfg;
`endif
genvar i;
 
generate
for (i=0; i<VCN; i++) begin: SC
 
// --------------- input buffers ------------------- //
 
inp_buf #(.DIR(0), .RN(4), .DW(DW), .PD(IPD))
SIB (
.o0 ( s2c0[i] ),
.o1 ( s2c1[i] ),
.o2 ( s2c2[i] ),
.o3 ( s2c3[i] ),
.o4 ( s2c4[i] ),
.ia ( sia[i] ),
.arb_r ( sreq[i] ),
.rst_n ( rst_n ),
.i0 ( si0[i] ),
.i1 ( si1[i] ),
.i2 ( si2[i] ),
.i3 ( si3[i] ),
.i4 ( si4[i] ),
.oa ( s2ca[i] ),
.addrx ( addrx ),
.addry ( addry ),
.arb_ra ( sack[i] )
);
 
inp_buf #(.DIR(1), .RN(2), .DW(DW), .PD(IPD))
WIB (
.o0 ( w2c0[i] ),
.o1 ( w2c1[i] ),
.o2 ( w2c2[i] ),
.o3 ( w2c3[i] ),
.o4 ( w2c4[i] ),
.ia ( wia[i] ),
.arb_r ( wreq[i] ),
.rst_n ( rst_n ),
.i0 ( wi0[i] ),
.i1 ( wi1[i] ),
.i2 ( wi2[i] ),
.i3 ( wi3[i] ),
.i4 ( wi4[i] ),
.oa ( w2ca[i] ),
.addrx ( addrx ),
.addry ( addry ),
.arb_ra ( wack[i] )
);
 
inp_buf #(.DIR(2), .RN(4), .DW(DW), .PD(IPD))
NIB (
.o0 ( n2c0[i] ),
.o1 ( n2c1[i] ),
.o2 ( n2c2[i] ),
.o3 ( n2c3[i] ),
.o4 ( n2c4[i] ),
.ia ( nia[i] ),
.arb_r ( nreq[i] ),
.rst_n ( rst_n ),
.i0 ( ni0[i] ),
.i1 ( ni1[i] ),
.i2 ( ni2[i] ),
.i3 ( ni3[i] ),
.i4 ( ni4[i] ),
.oa ( n2ca[i] ),
.addrx ( addrx ),
.addry ( addry ),
.arb_ra ( nack[i] )
);
 
inp_buf #(.DIR(3), .RN(2), .DW(DW), .PD(IPD))
EIB (
.o0 ( e2c0[i] ),
.o1 ( e2c1[i] ),
.o2 ( e2c2[i] ),
.o3 ( e2c3[i] ),
.o4 ( e2c4[i] ),
.ia ( eia[i] ),
.arb_r ( ereq[i] ),
.rst_n ( rst_n ),
.i0 ( ei0[i] ),
.i1 ( ei1[i] ),
.i2 ( ei2[i] ),
.i3 ( ei3[i] ),
.i4 ( ei4[i] ),
.oa ( e2ca[i] ),
.addrx ( addrx ),
.addry ( addry ),
.arb_ra ( eack[i] )
);
 
inp_buf #(.DIR(4), .RN(4), .DW(DW), .PD(IPD))
LIB (
.o0 ( l2c0[i] ),
.o1 ( l2c1[i] ),
.o2 ( l2c2[i] ),
.o3 ( l2c3[i] ),
.o4 ( l2c4[i] ),
.ia ( lia[i] ),
.arb_r ( lreq[i] ),
.rst_n ( rst_n ),
.i0 ( li0[i] ),
.i1 ( li1[i] ),
.i2 ( li2[i] ),
.i3 ( li3[i] ),
.i4 ( li4[i] ),
.oa ( l2ca[i] ),
.addrx ( addrx ),
.addry ( addry ),
.arb_ra ( lack[i] )
);
 
// --------------------- output buffers ---------------- //
outp_buf #(.DW(DW), .PD(OPD))
SOB (
.o0 ( so0[i] ),
.o1 ( so1[i] ),
.o2 ( so2[i] ),
.o3 ( so3[i] ),
.o4 ( so4[i] ),
.oa ( soa[i] ),
.i0 ( c2s0[i] ),
.i1 ( c2s1[i] ),
.i2 ( c2s2[i] ),
.i3 ( c2s3[i] ),
.i4 ( c2s4[i] ),
.ia ( c2sa[i] ),
.rst_n ( rst_n )
);
outp_buf #(.DW(DW), .PD(OPD))
WOB (
.o0 ( wo0[i] ),
.o1 ( wo1[i] ),
.o2 ( wo2[i] ),
.o3 ( wo3[i] ),
.o4 ( wo4[i] ),
.oa ( woa[i] ),
.i0 ( c2w0[i] ),
.i1 ( c2w1[i] ),
.i2 ( c2w2[i] ),
.i3 ( c2w3[i] ),
.i4 ( c2w4[i] ),
.ia ( c2wa[i] ),
.rst_n ( rst_n )
);
outp_buf #(.DW(DW), .PD(OPD))
NOB (
.o0 ( no0[i] ),
.o1 ( no1[i] ),
.o2 ( no2[i] ),
.o3 ( no3[i] ),
.o4 ( no4[i] ),
.oa ( noa[i] ),
.i0 ( c2n0[i] ),
.i1 ( c2n1[i] ),
.i2 ( c2n2[i] ),
.i3 ( c2n3[i] ),
.i4 ( c2n4[i] ),
.ia ( c2na[i] ),
.rst_n ( rst_n )
);
outp_buf #(.DW(DW), .PD(OPD))
EOB (
.o0 ( eo0[i] ),
.o1 ( eo1[i] ),
.o2 ( eo2[i] ),
.o3 ( eo3[i] ),
.o4 ( eo4[i] ),
.oa ( eoa[i] ),
.i0 ( c2e0[i] ),
.i1 ( c2e1[i] ),
.i2 ( c2e2[i] ),
.i3 ( c2e3[i] ),
.i4 ( c2e4[i] ),
.ia ( c2ea[i] ),
.rst_n ( rst_n )
);
outp_buf #(.DW(DW), .PD(OPD))
LOB (
.o0 ( lo0[i] ),
.o1 ( lo1[i] ),
.o2 ( lo2[i] ),
.o3 ( lo3[i] ),
.o4 ( lo4[i] ),
.oa ( loa[i] ),
.i0 ( c2l0[i] ),
.i1 ( c2l1[i] ),
.i2 ( c2l2[i] ),
.i3 ( c2l3[i] ),
.i4 ( c2l4[i] ),
.ia ( c2la[i] ),
.rst_n ( rst_n )
);
end // block: SC
endgenerate
 
`ifdef ENABLE_CLOS
dclos #(.MN(VCN), .NN(VCN), .DW(DW))
CB (
.so0 ( c2s0 ),
.so1 ( c2s1 ),
.so2 ( c2s2 ),
.so3 ( c2s3 ),
.so4 ( c2s4 ),
.soa ( c2sa ),
.wo0 ( c2w0 ),
.wo1 ( c2w1 ),
.wo2 ( c2w2 ),
.wo3 ( c2w3 ),
.wo4 ( c2w4 ),
.woa ( c2wa ),
.no0 ( c2n0 ),
.no1 ( c2n1 ),
.no2 ( c2n2 ),
.no3 ( c2n3 ),
.no4 ( c2n4 ),
.noa ( c2na ),
.eo0 ( c2e0 ),
.eo1 ( c2e1 ),
.eo2 ( c2e2 ),
.eo3 ( c2e3 ),
.eo4 ( c2e4 ),
.eoa ( c2ea ),
.lo0 ( c2l0 ),
.lo1 ( c2l1 ),
.lo2 ( c2l2 ),
.lo3 ( c2l3 ),
.lo4 ( c2l4 ),
.loa ( c2la ),
.si0 ( s2c0 ),
.si1 ( s2c1 ),
.si2 ( s2c2 ),
.si3 ( s2c3 ),
.si4 ( s2c4 ),
.sia ( s2ca ),
.wi0 ( w2c0 ),
.wi1 ( w2c1 ),
.wi2 ( w2c2 ),
.wi3 ( w2c3 ),
.wi4 ( w2c4 ),
.wia ( w2ca ),
.ni0 ( n2c0 ),
.ni1 ( n2c1 ),
.ni2 ( n2c2 ),
.ni3 ( n2c3 ),
.ni4 ( n2c4 ),
.nia ( n2ca ),
.ei0 ( e2c0 ),
.ei1 ( e2c1 ),
.ei2 ( e2c2 ),
.ei3 ( e2c3 ),
.ei4 ( e2c4 ),
.eia ( e2ca ),
.li0 ( l2c0 ),
.li1 ( l2c1 ),
.li2 ( l2c2 ),
.li3 ( l2c3 ),
.li4 ( l2c4 ),
.lia ( l2ca ),
.imcfg ( imcfg ),
.wcfg ( wcfg ),
.ecfg ( ecfg ),
.lcfg ( lcfg ),
.scfg ( scfg ),
.ncfg ( ncfg )
) ;
 
clos_sch #(.M(VCN), .N(VCN))
ALLOC (
.sack ( sack ),
.wack ( wack ),
.nack ( nack ),
.eack ( eack ),
.lack ( lack ),
.imc ( imcfg ),
.scfg ( scfg ),
.ncfg ( ncfg ),
.wcfg ( wcfg ),
.ecfg ( ecfg ),
.lcfg ( lcfg ),
.sreq ( sreq ),
.nreq ( nreq ),
.lreq ( lreq ),
.wreq ( wreq ),
.ereq ( ereq ),
.rst_n ( rst_n )
);
`else // Crossbar based SDM
 
dcb_xy #(.VCN(VCN), .VCW(DW))
CB (
.so0 ( c2s0 ),
.so1 ( c2s1 ),
.so2 ( c2s2 ),
.so3 ( c2s3 ),
.so4 ( c2s4 ),
.soa ( c2sa ),
.wo0 ( c2w0 ),
.wo1 ( c2w1 ),
.wo2 ( c2w2 ),
.wo3 ( c2w3 ),
.wo4 ( c2w4 ),
.woa ( c2wa ),
.no0 ( c2n0 ),
.no1 ( c2n1 ),
.no2 ( c2n2 ),
.no3 ( c2n3 ),
.no4 ( c2n4 ),
.noa ( c2na ),
.eo0 ( c2e0 ),
.eo1 ( c2e1 ),
.eo2 ( c2e2 ),
.eo3 ( c2e3 ),
.eo4 ( c2e4 ),
.eoa ( c2ea ),
.lo0 ( c2l0 ),
.lo1 ( c2l1 ),
.lo2 ( c2l2 ),
.lo3 ( c2l3 ),
.lo4 ( c2l4 ),
.loa ( c2la ),
.si0 ( s2c0 ),
.si1 ( s2c1 ),
.si2 ( s2c2 ),
.si3 ( s2c3 ),
.si4 ( s2c4 ),
.sia ( s2ca ),
.wi0 ( w2c0 ),
.wi1 ( w2c1 ),
.wi2 ( w2c2 ),
.wi3 ( w2c3 ),
.wi4 ( w2c4 ),
.wia ( w2ca ),
.ni0 ( n2c0 ),
.ni1 ( n2c1 ),
.ni2 ( n2c2 ),
.ni3 ( n2c3 ),
.ni4 ( n2c4 ),
.nia ( n2ca ),
.ei0 ( e2c0 ),
.ei1 ( e2c1 ),
.ei2 ( e2c2 ),
.ei3 ( e2c3 ),
.ei4 ( e2c4 ),
.eia ( e2ca ),
.li0 ( l2c0 ),
.li1 ( l2c1 ),
.li2 ( l2c2 ),
.li3 ( l2c3 ),
.li4 ( l2c4 ),
.lia ( l2ca ),
.wcfg ( wcfg ),
.ecfg ( ecfg ),
.lcfg ( lcfg ),
.scfg ( scfg ),
.ncfg ( ncfg )
) ;
sdm_sch #(.VCN(VCN))
ALLOC (
.sack ( sack ),
.wack ( wack ),
.nack ( nack ),
.eack ( eack ),
.lack ( lack ),
.scfg ( scfg ),
.ncfg ( ncfg ),
.wcfg ( wcfg ),
.ecfg ( ecfg ),
.lcfg ( lcfg ),
.sreq ( sreq ),
.nreq ( nreq ),
.lreq ( lreq ),
.wreq ( wreq ),
.ereq ( ereq ),
.rst_n ( rst_n )
);
`endif
 
endmodule // router
router.v Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: sdm_sch.v =================================================================== --- sdm_sch.v (nonexistent) +++ sdm_sch.v (revision 57) @@ -0,0 +1,244 @@ +/* + Asynchronous SDM NoC + (C)2011 Wei Song + Advanced Processor Technologies Group + Computer Science, the Univ. of Manchester, UK + + Authors: + Wei Song wsong83@gmail.com + + License: LGPL 3.0 or later + + Crossbar based SDM switch allocator + *** SystemVerilog is used *** + + References + For the detail structure, please refer to Section 6.3.1 of the thesis: + Wei Song, Spatial parallelism in the routers of asynchronous on-chip networks, PhD thesis, the University of Manchester, 2011. + + History: + 28/09/2009 Initial version. + 27/05/2011 Clean up for opensource. + +*/ + +// the router structure definitions +`include "define.v" + +module sdm_sch (/*AUTOARG*/ + // Outputs + sack, wack, nack, eack, lack, scfg, ncfg, wcfg, ecfg, lcfg, + // Inputs + sreq, nreq, lreq, wreq, ereq, rst_n + ); + + parameter VCN = 2; // the number of virtual circuits per port + + // income requests + input [VCN-1:0][3:0] sreq, nreq, lreq; + input [VCN-1:0][1:0] wreq, ereq; + + // ack to input buffers + output [VCN-1:0] sack, wack, nack, eack, lack; + + // configuration to the crossbar + output [VCN-1:0][1:0][VCN-1:0] scfg, ncfg; + output [VCN-1:0][3:0][VCN-1:0] wcfg, ecfg, lcfg; + + input rst_n; // active low global reset + + // requests to arbiters +`ifndef ENABLE_MRMA + wire [1:0][VCN-1:0][VCN-1:0] r2s, r2n; // shuffle the incoming request signals + wire [3:0][VCN-1:0][VCN-1:0] r2w, r2e, r2l; +`else + wire [1:0][VCN-1:0] r2s, r2n; // shuffle the incoming request signals + wire [3:0][VCN-1:0] r2w, r2e, r2l; +`endif + + // ack from arbiters + wire [VCN-1:0][3:0] a2s, a2n, a2l; + wire [VCN-1:0][1:0] a2w, a2e; + + // ack of the arbiters + wire [1:0][VCN-1:0] r2sa, r2na; + wire [3:0][VCN-1:0] r2wa, r2ea, r2la; + +`ifdef ENABLE_MRMA + wire [VCN:0] OPrst_n; // the buffered resets to avoid metastability + wire [VCN-1:0] SOPrdy, SOPblk; // OP ready and blocked status + wire [VCN-1:0] WOPrdy, WOPblk; // OP ready and blocked status + wire [VCN-1:0] NOPrdy, NOPblk; // OP ready and blocked status + wire [VCN-1:0] EOPrdy, EOPblk; // OP ready and blocked status + wire [VCN-1:0] LOPrdy, LOPblk; // OP ready and blocked status +`endif + + genvar i,j; + + // wire shuffle + generate for(i=0; i + 20/09/2010 Modified for the Clos SDM router + 25/05/2011 Clean up for opensource. + +*/ + +// the router structure definitions +`include "define.v" + +module cm_alloc (/*AUTOARG*/ +`ifndef ENABLE_CRRD + s, +`endif + // Outputs + sra, wra, nra, era, lra, scfg, ncfg, wcfg, ecfg, lcfg, + // Inputs + wr, er, sr, nr, lr + ) ; + + //requests from all IMs + input [1:0] wr, er; + input [3:0] sr, nr, lr; + + // ack to IMs + output sra, wra, nra, era, lra; + + // the configuration to the local CM + output [1:0] scfg, ncfg; + output [3:0] wcfg, ecfg, lcfg; + + // when using the asynchronous dispatching algorithm, status is sent back to IMs +`ifndef ENABLE_CRRD + output [4:0] s; +`endif + + // arbiters + mutex_arb #(2) + SA ( .req ( {lr[0], nr[0]} ), + .gnt ( scfg ) + ); + + mutex_arb #(4) + WA ( .req ( {lr[1], er[0], nr[1], sr[0]} ), + .gnt ( wcfg ) + ); + + mutex_arb #(2) + NA ( .req ( {lr[2], sr[1]} ), + .gnt ( ncfg ) + ); + + mutex_arb #(4) + EA ( .req ( {lr[3], nr[2], wr[0], sr[2]} ), + .gnt ( ecfg ) + ); + + mutex_arb #(4) + LA ( .req ( {er[1], nr[3], wr[1], sr[3]} ), + .gnt ( lcfg ) + ); + + // generating the ack + assign sra = |{wcfg[0], ncfg[0], ecfg[0], lcfg[0]}; + assign wra = |{ecfg[1], lcfg[1]}; + assign nra = |{scfg[0], wcfg[1], ecfg[2], lcfg[2]}; + assign era = |{wcfg[2], lcfg[3]}; + assign lra = |{scfg[1], wcfg[3], ncfg[1], ecfg[3]}; + + // generating the status +`ifndef ENABLE_CRRD + assign s = {|lcfg, |ecfg, |ncfg, |wcfg, |scfg}; +`endif + +endmodule // cm_alloc + +
cm_alloc.v Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: output_buf.v =================================================================== --- output_buf.v (nonexistent) +++ output_buf.v (revision 57) @@ -0,0 +1,143 @@ +/* + Asynchronous SDM NoC + (C)2011 Wei Song + Advanced Processor Technologies Group + Computer Science, the Univ. of Manchester, UK + + Authors: + Wei Song wsong83@gmail.com + + License: LGPL 3.0 or later + + Output buffer for Wormhole/SDM routers. + *** SystemVerilog is used *** + + References + * Lookahead pipelines + Montek Singh and Steven M. Nowick}, The design of high-performance dynamic asynchronous pipelines: lookahead style, IEEE Transactions on Very Large Scale Integration (VLSI) Systems, 2007(15), 1256-1269. doi:10.1109/TVLSI.2007.902205 + + History: + 26/05/2009 Initial version. + 20/09/2010 Supporting channel slicing and SDM using macro difinitions. + 22/10/2010 Parameterize the number of pipelines in output buffers. + 23/05/2011 Clean up for opensource. + +*/ + +// the router structure definitions +`include "define.v" + +// the out buffer +module outp_buf (/*AUTOARG*/ + // Outputs + o0, o1, o2, o3, o4, ia, + // Inputs + rst_n, i0, i1, i2, i3, i4, oa + ); + + parameter DW = 16; // the datawidth of a single virtual circuit + parameter PD = 2; // buffer depth + parameter SCN = DW/2; // the number of 1-of-4 sub-channel in each virtual circuit + + input rst_n; // global reset, active low + input [SCN-1:0] i0, i1, i2, i3; // data input + output [SCN-1:0] o0, o1, o2, o3; // data output + wire [PD:0][SCN-1:0] pd0, pd1, pd2, pd3; // data wires for the internal pipeline satges +`ifdef ENABLE_CHANNEL_SLICING + input [SCN-1:0] i4, oa; // eof and ack + output [SCN-1:0] o4, ia; + wire [SCN-1:0] ian_dly; + wire [PD:0][SCN-1:0] pd4, pda, pdan; // internal eof and ack +`else + input i4, oa; // eof and ack + output o4, ia; + wire ian_dly; + wire [PD:0] pd4, pda, pdan; // internal eof and ack +`endif + + +//-------------------------- pipeline ---------------------------------------// + genvar i,j; + generate for(i=0; i
output_buf.v Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.