URL
https://opencores.org/ocsvn/async_sdm_noc/async_sdm_noc/trunk
Subversion Repositories async_sdm_noc
Compare Revisions
- This comparison shows the changes necessary to convert path
/async_sdm_noc/branches/clos_opt/sdm/src
- from Rev 47 to Rev 57
- ↔ Reverse comparison
Rev 47 → Rev 57
/input_buf.v
0,0 → 1,266
/* |
Asynchronous SDM NoC |
(C)2011 Wei Song |
Advanced Processor Technologies Group |
Computer Science, the Univ. of Manchester, UK |
|
Authors: |
Wei Song wsong83@gmail.com |
|
License: LGPL 3.0 or later |
|
Input buffer for Wormhole/SDM routers. |
*** SystemVerilog is used *** |
|
References |
* Lookahead pipelines |
Montek Singh and Steven M. Nowick, The design of high-performance dynamic asynchronous pipelines: lookahead style, IEEE Transactions on Very Large Scale Integration (VLSI) Systems, 2007(15), 1256-1269. doi:10.1109/TVLSI.2007.902205 |
* Channel slicing |
Wei Song and Doug Edwards, A low latency wormhole router for asynchronous on-chip networks, Asia and South Pacific Design Automation Conference, 2010, 437-443. |
* SDM |
Wei Song and Doug Edwards, Asynchronous spatial division multiplexing router, Microprocessors and Microsystems, 2011(35), 85-97. |
|
History: |
05/05/2009 Initial version. <wsong83@gmail.com> |
20/09/2010 Supporting channel slicing and SDM using macro difinitions. <wsong83@gmail.com> |
24/05/2011 Clean up for opensource. <wsong83@gmail.com> |
01/06/2011 Use the comp4 common comparator rather than the chain_comparator defined in this module. <wsong83@gmail.com> |
|
*/ |
|
// the router structure definitions |
`include "define.v" |
|
module inp_buf (/*AUTOARG*/ |
// Outputs |
o0, o1, o2, o3, o4, ia, arb_r, |
// Inputs |
rst_n, i0, i1, i2, i3, i4, oa, addrx, addry, arb_ra |
); |
|
//-------------------------- parameters ---------------------------------------// |
parameter DIR = 0; // the port direction: south, west, north, east, and local |
parameter RN = 4; // the number of request outputs, must match the direction |
parameter DW = 16; // the data-width of the data-path |
parameter PD = 2; // the depth of the input buffer |
parameter SCN = DW/2; |
|
//-------------------------- I/O ports ---------------------------------------// |
input rst_n; // global reset, active low |
input [SCN-1:0] i0, i1, i2, i3; // data input |
output [SCN-1:0] o0, o1, o2, o3; // data output |
`ifdef ENABLE_CHANNEL_SLICING |
input [SCN-1:0] i4, oa; |
output [SCN-1:0] o4, ia; |
`else |
input i4, oa; |
output o4, ia; |
`endif |
input [7:0] addrx, addry; |
output [RN-1:0] arb_r; |
input arb_ra; |
|
//-------------------------- control signals ---------------------------------------// |
wire rten; // routing enable |
wire frame_end; // identify the end of a frame |
wire [7:0] pipe_xd, pipe_yd; // the target address from the incoming frame |
wire [PD:0][SCN-1:0] pd0, pd1, pd2, pd3; // data wires for the internal pipeline satges |
wire [5:0] raw_dec; // the routing decision from the comparator |
wire [4:0] dec_reg; // the routing decision kept by C-gates |
wire x_equal; // addr x = target x |
wire rt_err; // route decoder error |
wire rt_ack; // route build ack |
|
`ifdef ENABLE_CHANNEL_SLICING |
wire [SCN-1:0] rtrst; // rt decoder reset for each sub-channel |
wire [PD:0][SCN-1:0] pd4, pda, pdan; // data wires for the internal pipeline stages |
|
`else |
wire rtrst; // rt decode reset |
wire [PD:0] pd4, pda, pdan; // data wires for the internal pipeline satges |
`endif // !`ifdef ENABLE_CHANNEL_SLICING |
|
genvar i, j; |
|
//------------------------- pipelines ------------------------------------- // |
generate for(i=0; i<PD; i++) begin: DP |
`ifdef ENABLE_CHANNEL_SLICING |
for(j=0; j<SCN; j++) begin: SC |
pipe4 #(.DW(2)) |
P ( |
.o0 ( pd0[i][j] ), |
.o1 ( pd1[i][j] ), |
.o2 ( pd2[i][j] ), |
.o3 ( pd3[i][j] ), |
.o4 ( pd4[i][j] ), |
.ia ( pda[i+1][j] ), |
.i0 ( pd0[i+1][j] ), |
.i1 ( pd1[i+1][j] ), |
.i2 ( pd2[i+1][j] ), |
.i3 ( pd3[i+1][j] ), |
.i4 ( pd4[i+1][j] ), |
.oa ( pdan[i][j] ) |
); |
end // block: SC |
|
`else // !`ifdef ENABLE_CHANNEL_SLICING |
pipe4 #(.DW(DW)) |
P ( |
.o0 ( pd0[i] ), |
.o1 ( pd1[i] ), |
.o2 ( pd2[i] ), |
.o3 ( pd3[i] ), |
.o4 ( pd4[i] ), |
.ia ( pda[i+1] ), |
.i0 ( pd0[i+1] ), |
.i1 ( pd1[i+1] ), |
.i2 ( pd2[i+1] ), |
.i3 ( pd3[i+1] ), |
.i4 ( pd4[i+1] ), |
.oa ( pdan[i] ) |
); |
`endif // !`ifdef ENABLE_CHANNEL_SLICING |
end // block: DP |
endgenerate |
|
generate for(i=1; i<PD; i++) begin: DPA |
assign pdan[i] = rst_n ? ~(pda[i]|pd4[i-1]) : 0; |
end |
endgenerate |
|
assign ia = pda[PD]|pd4[PD-1]; |
assign pd0[PD] = i0; |
assign pd1[PD] = i1; |
assign pd2[PD] = i2; |
assign pd3[PD] = i3; |
assign pd4[PD] = i4; |
assign o0 = pd0[0]; |
assign o1 = pd1[0]; |
assign o2 = pd2[0]; |
assign o3 = pd3[0]; |
assign o4 = pd4[0]; |
|
//---------------------------- route decoder related -------------------------- // |
// fetch the x and y target |
and Px_0 (pipe_xd[0], rten, pd0[1][0]); |
and Px_1 (pipe_xd[1], rten, pd1[1][0]); |
and Px_2 (pipe_xd[2], rten, pd2[1][0]); |
and Px_3 (pipe_xd[3], rten, pd3[1][0]); |
and Px_4 (pipe_xd[4], rten, pd0[1][1]); |
and Px_5 (pipe_xd[5], rten, pd1[1][1]); |
and Px_6 (pipe_xd[6], rten, pd2[1][1]); |
and Px_7 (pipe_xd[7], rten, pd3[1][1]); |
and Py_0 (pipe_yd[0], rten, pd0[1][2]); |
and Py_1 (pipe_yd[1], rten, pd1[1][2]); |
and Py_2 (pipe_yd[2], rten, pd2[1][2]); |
and Py_3 (pipe_yd[3], rten, pd3[1][2]); |
and Py_4 (pipe_yd[4], rten, pd0[1][3]); |
and Py_5 (pipe_yd[5], rten, pd1[1][3]); |
and Py_6 (pipe_yd[6], rten, pd2[1][3]); |
and Py_7 (pipe_yd[7], rten, pd3[1][3]); |
|
|
routing_decision // the comparator |
RTD( |
.addrx ( addrx ) |
,.addry ( addry ) |
,.pipe_xd ( pipe_xd ) |
,.pipe_yd ( pipe_yd ) |
,.decision ( raw_dec ) |
); |
|
// keep the routing decision until the tail flit is received by all sub-channels |
c2p C_RTD0 ( .b(raw_dec[0]), .a((~frame_end)&rst_n), .q(dec_reg[0])); |
c2p C_RTD1 ( .b(raw_dec[1]), .a((~frame_end)&rst_n), .q(dec_reg[1])); |
c2p C_RT_XEQ (.b(raw_dec[2]), .a((~frame_end)&rst_n), .q(x_equal) ); |
c2p C_RTD2 ( .b(raw_dec[3]), .a(x_equal), .q(dec_reg[2])); |
c2p C_RTD3 ( .b(raw_dec[4]), .a(x_equal), .q(dec_reg[3])); |
c2p C_RTD4 ( .b(raw_dec[5]), .a(x_equal), .q(dec_reg[4])); |
|
// generate the arbiter request signals |
assign arb_r = |
DIR == 0 ? {dec_reg[4],dec_reg[2],dec_reg[1],dec_reg[3]} : // south port |
DIR == 1 ? {dec_reg[4],dec_reg[2]} : // west port |
DIR == 2 ? {dec_reg[4],dec_reg[2],dec_reg[3],dec_reg[0]} : // north port |
DIR == 3 ? {dec_reg[4],dec_reg[3]} : // east port |
{dec_reg[2],dec_reg[1],dec_reg[3],dec_reg[0]} ; // local port |
|
|
assign rt_err = |
DIR == 0 ? |{dec_reg[0]} : // south port |
DIR == 1 ? |{dec_reg[0],dec_reg[1],dec_reg[3]} : // west port |
DIR == 2 ? |{dec_reg[1]} : // north port |
DIR == 3 ? |{dec_reg[0],dec_reg[1],dec_reg[2]} : // east port |
|{dec_reg[4]} ; // local port |
|
or IP_RTACK (rt_ack, rt_err, arb_ra); |
|
// ------------------------ pipeline control ------------------------------ // |
|
`ifdef ENABLE_CHANNEL_SLICING |
for(j=0; j<SCN; j++) begin: SC |
// the sub-channel controller |
subc_ctl SCH_C ( |
.nack ( pdan[0][j] ), |
.rt_rst ( rtrst[j] ), |
.ai2cb ( oa[j] ), |
.ack ( pda[1][j] ), |
.eof ( pd4[0][j] ), |
.rt_ra ( rt_ack ), |
.rt_err ( rt_err ), |
.rst_n ( rst_n ) |
); |
end // block: SC |
`else // !`ifdef ENABLE_CHANNEL_SLICING |
subc_ctl SCH_C ( |
.nack ( pdan[0] ), |
.rt_rst ( rtrst ), |
.ai2cb ( oa ), |
.ack ( pda[1] ), |
.eof ( pd4[0] ), |
.rt_ra ( rt_ack ), |
.rt_err ( rt_err ), |
.rst_n ( rst_n ) |
); |
`endif // !`ifdef ENABLE_CHANNEL_SLICING |
|
// the router controller part |
assign rten = ~rt_ack; |
assign frame_end = &rtrst; |
|
endmodule // inp_buf |
|
|
// the routing decision making procedure, comparitors |
module routing_decision ( |
addrx |
,addry |
,pipe_xd |
,pipe_yd |
,decision |
); |
|
// compare with (2,3) |
input [7:0] addrx; |
input [7:0] addry; |
|
input [7:0] pipe_xd; |
input [7:0] pipe_yd; |
output [5:0] decision; |
|
wire [2:0] x_cmp [1:0]; |
wire [2:0] y_cmp [1:0]; |
|
comp4 X0 ( .a(pipe_xd[3:0]), .b(addrx[3:0]), .q(x_cmp[0])); |
comp4 X1 ( .a(pipe_xd[7:4]), .b(addrx[7:4]), .q(x_cmp[1])); |
comp4 Y0 ( .a(pipe_yd[3:0]), .b(addry[3:0]), .q(y_cmp[0])); |
comp4 Y1 ( .a(pipe_yd[7:4]), .b(addry[7:4]), .q(y_cmp[1])); |
|
assign decision[0] = x_cmp[1][0] | (x_cmp[1][2]&x_cmp[0][0]); // frame x > addr x |
assign decision[1] = x_cmp[1][1] | (x_cmp[1][2]&x_cmp[0][1]); // frame x < addr x |
assign decision[2] = x_cmp[1][2] & x_cmp[0][2]; // frame x = addr x |
assign decision[3] = y_cmp[1][0] | (y_cmp[1][2]&y_cmp[0][0]); // frame y > addr y |
assign decision[4] = y_cmp[1][1] | (y_cmp[1][2]&y_cmp[0][1]); // frame y < addr y |
assign decision[5] = y_cmp[1][2] & y_cmp[0][2]; // frame y = addr y |
|
endmodule // routing_decision |
/im_alloc.v
0,0 → 1,126
/* |
Asynchronous SDM NoC |
(C)2011 Wei Song |
Advanced Processor Technologies Group |
Computer Science, the Univ. of Manchester, UK |
|
Authors: |
Wei Song wsong83@gmail.com |
|
License: LGPL 3.0 or later |
|
IM allocator (the IM dispatcher in the thesis) |
*** SystemVerilog is used *** |
|
References |
For the detail structure, please refer to Section 6.3.1 of the thesis: |
Wei Song, Spatial parallelism in the routers of asynchronous on-chip networks, PhD thesis, the University of Manchester, 2011. |
|
History: |
05/09/2009 Initial version. <wsong83@gmail.com> |
10/10/2009 Add the reset port. <wsong83@gmail.com> |
05/11/2009 Speed up the arbiter. <wsong83@gmail.com> |
10/06/2010 [Major] change to use PIM structure. <wsong83@gmail.com> |
23/08/2010 Fix the non-QDI request withdraw process. <wsong83@gmail.com> |
27/05/2011 Clean up for opensource. <wsong83@gmail.com> |
|
*/ |
|
// the router structure definitions |
`include "define.v" |
|
module im_alloc (/*AUTOARG*/ |
`ifndef ENABLE_CRRD |
CMs, |
`endif |
// Outputs |
IMa, cfg, |
// Inputs |
IMr, rst_n |
) ; |
// parameters |
parameter VCN = 2; // the number of virtual circuits on one port |
parameter CMN = 2; // the number of central modules |
parameter SN = 2; // the possible output port choice of a port |
|
input [VCN-1:0][SN-1:0] IMr; // the requests from virtual circuits |
output [VCN-1:0] IMa; // switch ready, ack for the request |
|
`ifndef ENABLE_CRRD |
input [CMN-1:0][SN-1:0] CMs; // the states from CMs |
`endif |
|
input rst_n; // the negtive active reset |
|
output [CMN-1:0][VCN-1:0] cfg; // the matrix configuration signals |
|
// internal wires |
`ifdef ENABLE_CRRD |
`ifdef ENABLE_MRMA |
wire [VCN-1:0] IPr; // request to the MRMA |
wire [CMN-1:0] OPrdy, OPblk; // OP ready and blocked status |
wire [CMN:0] OPrst_n; // the buffered resets to avoid metastability |
`else |
wire [VCN-1:0][CMN-1:0] IPr; // request to the MNMA |
`endif |
`else |
// using the feedback from CMs |
wire [VCN-1:0][CMN-1:0][SN-1:0] IPrm; // to generate the practical IPr |
wire [VCN-1:0][CMN-1:0] IPr; |
`endif |
|
// generate variables |
genvar i, j, k; |
|
//---------------------------------------- |
// the PIM crossbar allocator |
`ifndef ENABLE_MRMA |
mnma #(.N(VCN), .M(CMN)) |
PIMA ( |
.cfg ( cfg ), |
.r ( IPr ), |
.ra ( IMa ) |
); |
|
generate |
for(i=0; i<VCN; i++) begin: IPC |
for(j=0; j<CMN; j++) begin: OPC |
`ifdef ENABLE_CRRD |
assign IPr[i][j] = |IMr[i]; |
`else |
assign IPr[i][j] = |IPrm[i][j]; |
for(k=0; k<SN; k++) begin: DIRC |
c2p IPRen (.q(IPrm[i][j][k]), .a(IMr[i][k]), .b(~CMs[j][k])); |
end |
`endif |
end |
end // block: IPC |
endgenerate |
|
`else |
mrma #(.N(VCN), .M(CMN)) |
PIMA ( |
.ca ( IMa ), |
.ra ( OPblk ), |
.cfg ( cfg ), |
.c ( IPr ), |
.r ( OPrdy ), |
.rst_n ( rst_n ) |
); |
|
generate |
for(i=0; i<CMN; i++) begin: OPC |
delay DLY ( .q(OPrst_n[i+1]), .a(OPrst_n[i])); // dont touch |
assign OPrdy[i] = (~OPblk[i])&OPrst_n[i+1]; |
end |
|
for(i=0; i<VCN; i++) begin: IPC |
assign IPr[i] = |IMr[i]; |
end |
endgenerate |
|
assign OPrst_n[0] = rst_n; |
|
`endif // !`ifndef ENABLE_MRMA |
|
endmodule // im_alloc |
/clos_sch.v
0,0 → 1,232
/* |
Asynchronous SDM NoC |
(C)2011 Wei Song |
Advanced Processor Technologies Group |
Computer Science, the Univ. of Manchester, UK |
|
Authors: |
Wei Song wsong83@gmail.com |
|
License: LGPL 3.0 or later |
|
Clos scheduler |
*** SystemVerilog is used *** |
|
References |
For the detail structure, please refer to Section 6.3.1 of the thesis: |
Wei Song, Spatial parallelism in the routers of asynchronous on-chip networks, PhD thesis, the University of Manchester, 2011. |
|
History: |
11/12/2009 Initial version. <wsong83@gmail.com> |
10/06/2010 Change to use PIM structure <wsong83@gmail.com> |
23/08/2010 Fix the non-QDI request withdraw process <wsong83@gmail.com> |
23/09/2010 Modified for Clos SDM router <wsong83@gmail.com> |
27/05/2011 Clean up for opensource. <wsong83@gmail.com> |
|
*/ |
|
// the router structure definitions |
`include "define.v" |
|
module clos_sch (/*AUTOARG*/ |
// Outputs |
sack, wack, nack, eack, lack, imc, scfg, ncfg, wcfg, ecfg, lcfg, |
// Inputs |
sreq, nreq, lreq, wreq, ereq, rst_n |
); |
|
parameter M = 2; // the number of CMs |
parameter N = 2; // the number of ports in IMs/OMs |
|
// reuests from all input buffers |
input [N-1:0][3:0] sreq, nreq, lreq; |
input [N-1:0][1:0] wreq, ereq; |
|
// ack to input buffers |
output [N-1:0] sack, wack, nack, eack, lack; |
|
// IM acks |
wire [4:0][N-1:0] imra; |
wire [4:0][N-1:0] cmra; |
|
// IM cfgs and CM cfgs |
output [4:0][M-1:0][N-1:0] imc; |
output [M-1:0][1:0] scfg, ncfg; |
output [M-1:0][3:0] wcfg, ecfg, lcfg; |
|
input rst_n; // reset, active low |
|
// the requests from IMs to CMs |
wire [M-1:0][1:0] wr, er; |
wire [M-1:0][3:0] sr, nr, lr; |
wire [M-1:0] sra, wra, nra, era, lra; |
|
`ifndef ENABLE_CRRD |
wire [M-1:0][4:0] cms; // the states from CMs |
|
wire [M-1:0][3:0] scms, ncms, lcms; |
wire [M-1:0][1:0] wcms, ecms; |
`endif |
|
genvar i; |
|
// IM schedulers |
im_alloc #(.VCN(N), .CMN(M), .SN(4)) |
SIM ( |
.IMr ( sreq ), |
.IMa ( imra[0] ), |
`ifndef ENABLE_CRRD |
.CMs ( scms ), |
`endif |
.cfg ( imc[0] ), |
.rst_n ( rst_n ) |
); |
|
rcb #(.NN(N), .MN(M), .DW(4)) |
SRIM ( |
.ireq ( sreq ), |
.ira ( cmra[0] ), |
.oreq ( sr ), |
.ora ( sra ), |
.cfg ( imc[0] ) |
); |
|
// the C-element to force the request withdrawal sequence |
generate for(i=0; i<N; i++) begin: SA |
c2 UA (.q(sack[i]), .a0(imra[0][i]), .a1(cmra[0][i])); |
end endgenerate |
|
im_alloc #(.VCN(N), .CMN(M), .SN(2)) |
WIM ( |
.IMr ( wreq ), |
.IMa ( imra[1] ), |
`ifndef ENABLE_CRRD |
.CMs ( wcms ), |
`endif |
.cfg ( imc[1] ), |
.rst_n ( rst_n ) |
); |
|
rcb #(.NN(N), .MN(M), .DW(2)) |
WRIM ( |
.ireq ( wreq ), |
.ira ( cmra[1] ), |
.oreq ( wr ), |
.ora ( wra ), |
.cfg ( imc[1] ) |
); |
|
generate for(i=0; i<N; i++) begin: WA |
c2 UA (.q(wack[i]), .a0(imra[1][i]), .a1(cmra[1][i])); |
end endgenerate |
|
im_alloc #(.VCN(N), .CMN(M), .SN(4)) |
NIM ( |
.IMr ( nreq ), |
.IMa ( imra[2] ), |
`ifndef ENABLE_CRRD |
.CMs ( ncms ), |
`endif |
.cfg ( imc[2] ), |
.rst_n ( rst_n ) |
); |
|
rcb #(.NN(N), .MN(M), .DW(4)) |
NRIM ( |
.ireq ( nreq ), |
.ira ( cmra[2] ), |
.oreq ( nr ), |
.ora ( nra ), |
.cfg ( imc[2] ) |
); |
|
generate for(i=0; i<N; i++) begin: NA |
c2 UA (.q(nack[i]), .a0(imra[2][i]), .a1(cmra[2][i])); |
end endgenerate |
|
im_alloc #(.VCN(N), .CMN(M), .SN(2)) |
EIM ( |
.IMr ( ereq ), |
.IMa ( imra[3] ), |
`ifndef ENABLE_CRRD |
.CMs ( ecms ), |
`endif |
.cfg ( imc[3] ), |
.rst_n ( rst_n ) |
); |
|
rcb #(.NN(N), .MN(M), .DW(2)) |
ERIM ( |
.ireq ( ereq ), |
.ira ( cmra[3] ), |
.oreq ( er ), |
.ora ( era ), |
.cfg ( imc[3] ) |
); |
|
generate for(i=0; i<N; i++) begin: EA |
c2 UA (.q(eack[i]), .a0(imra[3][i]), .a1(cmra[3][i])); |
end endgenerate |
|
im_alloc #(.VCN(N), .CMN(M), .SN(4)) |
LIM ( |
.IMr ( lreq ), |
.IMa ( imra[4] ), |
`ifndef ENABLE_CRRD |
.CMs ( lcms ), |
`endif |
.cfg ( imc[4] ), |
.rst_n ( rst_n ) |
); |
|
rcb #(.NN(N), .MN(M), .DW(4)) |
LRIM ( |
.ireq ( lreq ), |
.ira ( cmra[4] ), |
.oreq ( lr ), |
.ora ( lra ), |
.cfg ( imc[4] ) |
); |
|
generate for(i=0; i<N; i++) begin: LA |
c2 UA (.q(lack[i]), .a0(imra[4][i]), .a1(cmra[4][i])); |
end endgenerate |
|
// CM schedulers |
generate |
for(i=0; i<M; i=i+1) begin: CMSch |
cm_alloc S ( |
.sra ( sra[i] ), |
.wra ( wra[i] ), |
.nra ( nra[i] ), |
.era ( era[i] ), |
.lra ( lra[i] ), |
.scfg ( scfg[i] ), |
.ncfg ( ncfg[i] ), |
.wcfg ( wcfg[i] ), |
.ecfg ( ecfg[i] ), |
.lcfg ( lcfg[i] ), |
`ifndef ENABLE_CRRD |
.s ( cms[i] ), |
`endif |
.wr ( wr[i] ), |
.er ( er[i] ), |
.sr ( sr[i] ), |
.nr ( nr[i] ), |
.lr ( lr[i] ) |
); |
|
`ifndef ENABLE_CRRD |
assign scms[i] = {cms[i][4], cms[i][3], cms[i][2], cms[i][1]}; |
assign wcms[i] = {cms[i][4], cms[i][3]}; |
assign ncms[i] = {cms[i][4], cms[i][3], cms[i][1], cms[i][0]}; |
assign ecms[i] = {cms[i][4], cms[i][1]}; |
assign lcms[i] = {cms[i][3], cms[i][2], cms[i][1], cms[i][0]}; |
`endif |
|
end |
endgenerate |
|
endmodule // clos_sch |
|
|
/subc_ctl.v
0,0 → 1,68
/* |
Asynchronous SDM NoC |
(C)2011 Wei Song |
Advanced Processor Technologies Group |
Computer Science, the Univ. of Manchester, UK |
|
Authors: |
Wei Song wsong83@gmail.com |
|
License: LGPL 3.0 or later |
|
Sub-channel controller |
|
References |
* Lookahead pipelines |
Montek Singh and Steven M. Nowick, The design of high-performance dynamic asynchronous pipelines: lookahead style, IEEE Transactions on Very Large Scale Integration (VLSI) Systems, 2007(15), 1256-1269. doi:10.1109/TVLSI.2007.902205 |
* Channel slicing |
Wei Song and Doug Edwards, A low latency wormhole router for asynchronous on-chip networks, Asia and South Pacific Design Automation Conference, 2010, 437-443. |
|
For the detail structure, please refer to Section 7.1.1 of the thesis: |
Wei Song, Spatial parallelism in the routers of asynchronous on-chip networks, PhD thesis, the University of Manchester, 2011. |
|
History: |
05/05/2009 Initial version. <wsong83@gmail.com> |
22/10/2010 Make it more timing robust. <wsong83@gmail.com> |
24/05/2011 Clean up for opensource. <wsong83@gmail.com> |
|
*/ |
|
// the router structure definitions |
`include "define.v" |
|
module subc_ctl (/*AUTOARG*/ |
// Outputs |
nack, rt_rst, |
// Inputs |
ai2cb, ack, eof, rt_ra, rt_err, rst_n |
); |
|
input ai2cb; // the ack from output ports |
input ack; // the ack from the last stage of the input buffer |
input eof; // the eof bit from the last stage of the input buffer |
input rt_ra; // ack from the switch allocator |
input rt_err; // invalid router decision |
input rst_n; // the global active low reset signal |
output nack; // the ack to the last stage of the input buffer |
output rt_rst; // the router reset signal |
|
wire csc; // internal wires to handle the CSC of the STG |
wire acko; // the ack signal after the C2N gate |
wire fend; // the end of frame indicator |
wire acken; // active low ack enable |
|
`ifdef ENABLE_LOOKAHEAD |
c2n CD (.q(acko), .a(ai2cb), .b(ack)); // the C2N gate to avoid early withdrawal |
`else |
assign acko = ai2cb; |
`endif |
|
c2p CEN (.b(eof), .a(acko), .q(fend)); |
c2 C (.a0(rt_ra), .a1(fend), .q(csc)); |
nand U1 ( acken, rt_ra, ~csc); |
nor U2 ( rt_rst, fend, ~csc); |
nor AG ( nack, acko&(~eof), acken|(rt_err&ack), ~rst_n); |
|
endmodule // subc_ctl |
|
|
/router.v
0,0 → 1,497
/* |
Asynchronous SDM NoC |
(C)2011 Wei Song |
Advanced Processor Technologies Group |
Computer Science, the Univ. of Manchester, UK |
|
Authors: |
Wei Song wsong83@gmail.com |
|
License: LGPL 3.0 or later |
|
Wormhole/SDM router top level module |
*** SystemVerilog is used *** |
|
History: |
28/05/2009 Initial version. <wsong83@gmail.com> |
23/09/2010 Supporting channel slicing and SDM using macro difinitions. <wsong83@gmail.com> |
22/10/2010 Parameterize the number of pipelines in output buffers. <wsong83@gmail.com> |
25/05/2011 Clean up for opensource. <wsong83@gmail.com> |
|
*/ |
|
// the router structure definitions |
`include "define.v" |
|
module router(/*AUTOARG*/ |
// Outputs |
so0, so1, so2, so3, wo0, wo1, wo2, wo3, no0, no1, no2, no3, eo0, |
eo1, eo2, eo3, lo0, lo1, lo2, lo3, so4, wo4, no4, eo4, lo4, sia, |
wia, nia, eia, lia, |
// Inputs |
si0, si1, si2, si3, wi0, wi1, wi2, wi3, ni0, ni1, ni2, ni3, ei0, |
ei1, ei2, ei3, li0, li1, li2, li3, si4, wi4, ni4, ei4, li4, soa, |
woa, noa, eoa, loa, addrx, addry, rst_n |
); |
|
parameter VCN = 1; // number of virtual circuits in each direction. When VCN == 1, it is a wormhole router |
parameter DW = 32; // the datawidth of a single virtual circuit, the total data width of the router is DW*VCN |
parameter IPD = 1; // the number of half-buffer stages in input buffers |
parameter OPD = 2; // the number of half-buffer stages in output buffers |
parameter SCN = DW/2; // the number of 1-of-4 sub-channel in each virtual circuit |
|
input [VCN-1:0][SCN-1:0] si0, si1, si2, si3; // south input [0], X+1 |
input [VCN-1:0][SCN-1:0] wi0, wi1, wi2, wi3; // west input [1], Y-1 |
input [VCN-1:0][SCN-1:0] ni0, ni1, ni2, ni3; // north input [2], X-1 |
input [VCN-1:0][SCN-1:0] ei0, ei1, ei2, ei3; // east input [3], Y+1 |
input [VCN-1:0][SCN-1:0] li0, li1, li2, li3; // local input |
output [VCN-1:0][SCN-1:0] so0, so1, so2, so3; // south output |
output [VCN-1:0][SCN-1:0] wo0, wo1, wo2, wo3; // west output |
output [VCN-1:0][SCN-1:0] no0, no1, no2, no3; // north output |
output [VCN-1:0][SCN-1:0] eo0, eo1, eo2, eo3; // east output |
output [VCN-1:0][SCN-1:0] lo0, lo1, lo2, lo3; // local output |
|
// eof bits and ack lines |
`ifdef ENABLE_CHANNEL_SLICING |
input [VCN-1:0][SCN-1:0] si4, wi4, ni4, ei4, li4; |
output [VCN-1:0][SCN-1:0] so4, wo4, no4, eo4, lo4; |
output [VCN-1:0][SCN-1:0] sia, wia, nia, eia, lia; |
input [VCN-1:0][SCN-1:0] soa, woa, noa, eoa, loa; |
`else |
input [VCN-1:0] si4, wi4, ni4, ei4, li4; |
output [VCN-1:0] so4, wo4, no4, eo4, lo4; |
output [VCN-1:0] sia, wia, nia, eia, lia; |
input [VCN-1:0] soa, woa, noa, eoa, loa; |
`endif // !`ifdef ENABLE_CHANNEL_SLICING |
|
input [7:0] addrx, addry; // the local address of the router, coded in 1-of-4 coding |
input rst_n; // active low reset signal |
|
// internal wires, input buffers to switches (crossbar): [dir]2[cb][1-of-4 index] |
wire [VCN-1:0][SCN-1:0] s2c0, s2c1, s2c2, s2c3; // south input to switch data |
wire [VCN-1:0][SCN-1:0] w2c0, w2c1, w2c2, w2c3; |
wire [VCN-1:0][SCN-1:0] n2c0, n2c1, n2c2, n2c3; |
wire [VCN-1:0][SCN-1:0] e2c0, e2c1, e2c2, e2c3; |
wire [VCN-1:0][SCN-1:0] l2c0, l2c1, l2c2, l2c3; |
// internal wires, switches (crossbar) to output buffers: [cb]2[dir][1-of-4 index] |
wire [VCN-1:0][SCN-1:0] c2s0, c2s1, c2s2, c2s3; |
wire [VCN-1:0][SCN-1:0] c2w0, c2w1, c2w2, c2w3; |
wire [VCN-1:0][SCN-1:0] c2n0, c2n1, c2n2, c2n3; // switch to north output |
wire [VCN-1:0][SCN-1:0] c2e0, c2e1, c2e2, c2e3; |
wire [VCN-1:0][SCN-1:0] c2l0, c2l1, c2l2, c2l3; |
|
// internal wires for ack and eof bits |
`ifdef ENABLE_CHANNEL_SLICING |
wire [VCN-1:0][SCN-1:0] s2c4, w2c4, n2c4, e2c4, l2c4; |
wire [VCN-1:0][SCN-1:0] c2s4, c2w4, c2n4, c2e4, c2l4; |
wire [VCN-1:0][SCN-1:0] s2ca, w2ca, n2ca, e2ca, l2ca; |
wire [VCN-1:0][SCN-1:0] c2sa, c2wa, c2na, c2ea, c2la; |
`else |
wire [VCN-1:0] s2c4, w2c4, n2c4, e2c4, l2c4; |
wire [VCN-1:0] c2s4, c2w4, c2n4, c2e4, c2l4; |
wire [VCN-1:0] s2ca, w2ca, n2ca, e2ca, l2ca; |
wire [VCN-1:0] c2sa, c2wa, c2na, c2ea, c2la; |
`endif // !`ifdef ENABLE_CHANNEL_SLICING |
|
// the requests/acks from/to input buffers to switch allocators |
wire [VCN-1:0][3:0] sreq, nreq, lreq; |
wire [VCN-1:0][1:0] wreq, ereq; |
wire [VCN-1:0] sack, wack, nack, eack, lack; |
|
// configuration bits for the switches |
`ifdef ENABLE_CLOS |
wire [4:0][VCN-1:0][VCN-1:0] imcfg; |
wire [VCN-1:0][1:0] scfg, ncfg; |
wire [VCN-1:0][3:0] wcfg, ecfg, lcfg; |
`else // normal crossbar based SDM |
wire [VCN-1:0][2*VCN-1:0] scfg, ncfg; |
wire [VCN-1:0][4*VCN-1:0] wcfg, ecfg, lcfg; |
`endif |
|
|
genvar i; |
|
generate |
for (i=0; i<VCN; i++) begin: SC |
|
// --------------- input buffers ------------------- // |
|
inp_buf #(.DIR(0), .RN(4), .DW(DW), .PD(IPD)) |
SIB ( |
.o0 ( s2c0[i] ), |
.o1 ( s2c1[i] ), |
.o2 ( s2c2[i] ), |
.o3 ( s2c3[i] ), |
.o4 ( s2c4[i] ), |
.ia ( sia[i] ), |
.arb_r ( sreq[i] ), |
.rst_n ( rst_n ), |
.i0 ( si0[i] ), |
.i1 ( si1[i] ), |
.i2 ( si2[i] ), |
.i3 ( si3[i] ), |
.i4 ( si4[i] ), |
.oa ( s2ca[i] ), |
.addrx ( addrx ), |
.addry ( addry ), |
.arb_ra ( sack[i] ) |
); |
|
inp_buf #(.DIR(1), .RN(2), .DW(DW), .PD(IPD)) |
WIB ( |
.o0 ( w2c0[i] ), |
.o1 ( w2c1[i] ), |
.o2 ( w2c2[i] ), |
.o3 ( w2c3[i] ), |
.o4 ( w2c4[i] ), |
.ia ( wia[i] ), |
.arb_r ( wreq[i] ), |
.rst_n ( rst_n ), |
.i0 ( wi0[i] ), |
.i1 ( wi1[i] ), |
.i2 ( wi2[i] ), |
.i3 ( wi3[i] ), |
.i4 ( wi4[i] ), |
.oa ( w2ca[i] ), |
.addrx ( addrx ), |
.addry ( addry ), |
.arb_ra ( wack[i] ) |
); |
|
inp_buf #(.DIR(2), .RN(4), .DW(DW), .PD(IPD)) |
NIB ( |
.o0 ( n2c0[i] ), |
.o1 ( n2c1[i] ), |
.o2 ( n2c2[i] ), |
.o3 ( n2c3[i] ), |
.o4 ( n2c4[i] ), |
.ia ( nia[i] ), |
.arb_r ( nreq[i] ), |
.rst_n ( rst_n ), |
.i0 ( ni0[i] ), |
.i1 ( ni1[i] ), |
.i2 ( ni2[i] ), |
.i3 ( ni3[i] ), |
.i4 ( ni4[i] ), |
.oa ( n2ca[i] ), |
.addrx ( addrx ), |
.addry ( addry ), |
.arb_ra ( nack[i] ) |
); |
|
inp_buf #(.DIR(3), .RN(2), .DW(DW), .PD(IPD)) |
EIB ( |
.o0 ( e2c0[i] ), |
.o1 ( e2c1[i] ), |
.o2 ( e2c2[i] ), |
.o3 ( e2c3[i] ), |
.o4 ( e2c4[i] ), |
.ia ( eia[i] ), |
.arb_r ( ereq[i] ), |
.rst_n ( rst_n ), |
.i0 ( ei0[i] ), |
.i1 ( ei1[i] ), |
.i2 ( ei2[i] ), |
.i3 ( ei3[i] ), |
.i4 ( ei4[i] ), |
.oa ( e2ca[i] ), |
.addrx ( addrx ), |
.addry ( addry ), |
.arb_ra ( eack[i] ) |
); |
|
inp_buf #(.DIR(4), .RN(4), .DW(DW), .PD(IPD)) |
LIB ( |
.o0 ( l2c0[i] ), |
.o1 ( l2c1[i] ), |
.o2 ( l2c2[i] ), |
.o3 ( l2c3[i] ), |
.o4 ( l2c4[i] ), |
.ia ( lia[i] ), |
.arb_r ( lreq[i] ), |
.rst_n ( rst_n ), |
.i0 ( li0[i] ), |
.i1 ( li1[i] ), |
.i2 ( li2[i] ), |
.i3 ( li3[i] ), |
.i4 ( li4[i] ), |
.oa ( l2ca[i] ), |
.addrx ( addrx ), |
.addry ( addry ), |
.arb_ra ( lack[i] ) |
); |
|
// --------------------- output buffers ---------------- // |
outp_buf #(.DW(DW), .PD(OPD)) |
SOB ( |
.o0 ( so0[i] ), |
.o1 ( so1[i] ), |
.o2 ( so2[i] ), |
.o3 ( so3[i] ), |
.o4 ( so4[i] ), |
.oa ( soa[i] ), |
.i0 ( c2s0[i] ), |
.i1 ( c2s1[i] ), |
.i2 ( c2s2[i] ), |
.i3 ( c2s3[i] ), |
.i4 ( c2s4[i] ), |
.ia ( c2sa[i] ), |
.rst_n ( rst_n ) |
); |
|
outp_buf #(.DW(DW), .PD(OPD)) |
WOB ( |
.o0 ( wo0[i] ), |
.o1 ( wo1[i] ), |
.o2 ( wo2[i] ), |
.o3 ( wo3[i] ), |
.o4 ( wo4[i] ), |
.oa ( woa[i] ), |
.i0 ( c2w0[i] ), |
.i1 ( c2w1[i] ), |
.i2 ( c2w2[i] ), |
.i3 ( c2w3[i] ), |
.i4 ( c2w4[i] ), |
.ia ( c2wa[i] ), |
.rst_n ( rst_n ) |
); |
|
outp_buf #(.DW(DW), .PD(OPD)) |
NOB ( |
.o0 ( no0[i] ), |
.o1 ( no1[i] ), |
.o2 ( no2[i] ), |
.o3 ( no3[i] ), |
.o4 ( no4[i] ), |
.oa ( noa[i] ), |
.i0 ( c2n0[i] ), |
.i1 ( c2n1[i] ), |
.i2 ( c2n2[i] ), |
.i3 ( c2n3[i] ), |
.i4 ( c2n4[i] ), |
.ia ( c2na[i] ), |
.rst_n ( rst_n ) |
); |
|
outp_buf #(.DW(DW), .PD(OPD)) |
EOB ( |
.o0 ( eo0[i] ), |
.o1 ( eo1[i] ), |
.o2 ( eo2[i] ), |
.o3 ( eo3[i] ), |
.o4 ( eo4[i] ), |
.oa ( eoa[i] ), |
.i0 ( c2e0[i] ), |
.i1 ( c2e1[i] ), |
.i2 ( c2e2[i] ), |
.i3 ( c2e3[i] ), |
.i4 ( c2e4[i] ), |
.ia ( c2ea[i] ), |
.rst_n ( rst_n ) |
); |
|
outp_buf #(.DW(DW), .PD(OPD)) |
LOB ( |
.o0 ( lo0[i] ), |
.o1 ( lo1[i] ), |
.o2 ( lo2[i] ), |
.o3 ( lo3[i] ), |
.o4 ( lo4[i] ), |
.oa ( loa[i] ), |
.i0 ( c2l0[i] ), |
.i1 ( c2l1[i] ), |
.i2 ( c2l2[i] ), |
.i3 ( c2l3[i] ), |
.i4 ( c2l4[i] ), |
.ia ( c2la[i] ), |
.rst_n ( rst_n ) |
); |
|
end // block: SC |
endgenerate |
|
`ifdef ENABLE_CLOS |
dclos #(.MN(VCN), .NN(VCN), .DW(DW)) |
CB ( |
.so0 ( c2s0 ), |
.so1 ( c2s1 ), |
.so2 ( c2s2 ), |
.so3 ( c2s3 ), |
.so4 ( c2s4 ), |
.soa ( c2sa ), |
.wo0 ( c2w0 ), |
.wo1 ( c2w1 ), |
.wo2 ( c2w2 ), |
.wo3 ( c2w3 ), |
.wo4 ( c2w4 ), |
.woa ( c2wa ), |
.no0 ( c2n0 ), |
.no1 ( c2n1 ), |
.no2 ( c2n2 ), |
.no3 ( c2n3 ), |
.no4 ( c2n4 ), |
.noa ( c2na ), |
.eo0 ( c2e0 ), |
.eo1 ( c2e1 ), |
.eo2 ( c2e2 ), |
.eo3 ( c2e3 ), |
.eo4 ( c2e4 ), |
.eoa ( c2ea ), |
.lo0 ( c2l0 ), |
.lo1 ( c2l1 ), |
.lo2 ( c2l2 ), |
.lo3 ( c2l3 ), |
.lo4 ( c2l4 ), |
.loa ( c2la ), |
.si0 ( s2c0 ), |
.si1 ( s2c1 ), |
.si2 ( s2c2 ), |
.si3 ( s2c3 ), |
.si4 ( s2c4 ), |
.sia ( s2ca ), |
.wi0 ( w2c0 ), |
.wi1 ( w2c1 ), |
.wi2 ( w2c2 ), |
.wi3 ( w2c3 ), |
.wi4 ( w2c4 ), |
.wia ( w2ca ), |
.ni0 ( n2c0 ), |
.ni1 ( n2c1 ), |
.ni2 ( n2c2 ), |
.ni3 ( n2c3 ), |
.ni4 ( n2c4 ), |
.nia ( n2ca ), |
.ei0 ( e2c0 ), |
.ei1 ( e2c1 ), |
.ei2 ( e2c2 ), |
.ei3 ( e2c3 ), |
.ei4 ( e2c4 ), |
.eia ( e2ca ), |
.li0 ( l2c0 ), |
.li1 ( l2c1 ), |
.li2 ( l2c2 ), |
.li3 ( l2c3 ), |
.li4 ( l2c4 ), |
.lia ( l2ca ), |
.imcfg ( imcfg ), |
.wcfg ( wcfg ), |
.ecfg ( ecfg ), |
.lcfg ( lcfg ), |
.scfg ( scfg ), |
.ncfg ( ncfg ) |
) ; |
|
clos_sch #(.M(VCN), .N(VCN)) |
ALLOC ( |
.sack ( sack ), |
.wack ( wack ), |
.nack ( nack ), |
.eack ( eack ), |
.lack ( lack ), |
.imc ( imcfg ), |
.scfg ( scfg ), |
.ncfg ( ncfg ), |
.wcfg ( wcfg ), |
.ecfg ( ecfg ), |
.lcfg ( lcfg ), |
.sreq ( sreq ), |
.nreq ( nreq ), |
.lreq ( lreq ), |
.wreq ( wreq ), |
.ereq ( ereq ), |
.rst_n ( rst_n ) |
); |
`else // Crossbar based SDM |
|
dcb_xy #(.VCN(VCN), .VCW(DW)) |
CB ( |
.so0 ( c2s0 ), |
.so1 ( c2s1 ), |
.so2 ( c2s2 ), |
.so3 ( c2s3 ), |
.so4 ( c2s4 ), |
.soa ( c2sa ), |
.wo0 ( c2w0 ), |
.wo1 ( c2w1 ), |
.wo2 ( c2w2 ), |
.wo3 ( c2w3 ), |
.wo4 ( c2w4 ), |
.woa ( c2wa ), |
.no0 ( c2n0 ), |
.no1 ( c2n1 ), |
.no2 ( c2n2 ), |
.no3 ( c2n3 ), |
.no4 ( c2n4 ), |
.noa ( c2na ), |
.eo0 ( c2e0 ), |
.eo1 ( c2e1 ), |
.eo2 ( c2e2 ), |
.eo3 ( c2e3 ), |
.eo4 ( c2e4 ), |
.eoa ( c2ea ), |
.lo0 ( c2l0 ), |
.lo1 ( c2l1 ), |
.lo2 ( c2l2 ), |
.lo3 ( c2l3 ), |
.lo4 ( c2l4 ), |
.loa ( c2la ), |
.si0 ( s2c0 ), |
.si1 ( s2c1 ), |
.si2 ( s2c2 ), |
.si3 ( s2c3 ), |
.si4 ( s2c4 ), |
.sia ( s2ca ), |
.wi0 ( w2c0 ), |
.wi1 ( w2c1 ), |
.wi2 ( w2c2 ), |
.wi3 ( w2c3 ), |
.wi4 ( w2c4 ), |
.wia ( w2ca ), |
.ni0 ( n2c0 ), |
.ni1 ( n2c1 ), |
.ni2 ( n2c2 ), |
.ni3 ( n2c3 ), |
.ni4 ( n2c4 ), |
.nia ( n2ca ), |
.ei0 ( e2c0 ), |
.ei1 ( e2c1 ), |
.ei2 ( e2c2 ), |
.ei3 ( e2c3 ), |
.ei4 ( e2c4 ), |
.eia ( e2ca ), |
.li0 ( l2c0 ), |
.li1 ( l2c1 ), |
.li2 ( l2c2 ), |
.li3 ( l2c3 ), |
.li4 ( l2c4 ), |
.lia ( l2ca ), |
.wcfg ( wcfg ), |
.ecfg ( ecfg ), |
.lcfg ( lcfg ), |
.scfg ( scfg ), |
.ncfg ( ncfg ) |
) ; |
|
|
sdm_sch #(.VCN(VCN)) |
ALLOC ( |
.sack ( sack ), |
.wack ( wack ), |
.nack ( nack ), |
.eack ( eack ), |
.lack ( lack ), |
.scfg ( scfg ), |
.ncfg ( ncfg ), |
.wcfg ( wcfg ), |
.ecfg ( ecfg ), |
.lcfg ( lcfg ), |
.sreq ( sreq ), |
.nreq ( nreq ), |
.lreq ( lreq ), |
.wreq ( wreq ), |
.ereq ( ereq ), |
.rst_n ( rst_n ) |
); |
`endif |
|
endmodule // router |
router.v
Property changes :
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Index: sdm_sch.v
===================================================================
--- sdm_sch.v (nonexistent)
+++ sdm_sch.v (revision 57)
@@ -0,0 +1,244 @@
+/*
+ Asynchronous SDM NoC
+ (C)2011 Wei Song
+ Advanced Processor Technologies Group
+ Computer Science, the Univ. of Manchester, UK
+
+ Authors:
+ Wei Song wsong83@gmail.com
+
+ License: LGPL 3.0 or later
+
+ Crossbar based SDM switch allocator
+ *** SystemVerilog is used ***
+
+ References
+ For the detail structure, please refer to Section 6.3.1 of the thesis:
+ Wei Song, Spatial parallelism in the routers of asynchronous on-chip networks, PhD thesis, the University of Manchester, 2011.
+
+ History:
+ 28/09/2009 Initial version.
+ 27/05/2011 Clean up for opensource.
+
+*/
+
+// the router structure definitions
+`include "define.v"
+
+module sdm_sch (/*AUTOARG*/
+ // Outputs
+ sack, wack, nack, eack, lack, scfg, ncfg, wcfg, ecfg, lcfg,
+ // Inputs
+ sreq, nreq, lreq, wreq, ereq, rst_n
+ );
+
+ parameter VCN = 2; // the number of virtual circuits per port
+
+ // income requests
+ input [VCN-1:0][3:0] sreq, nreq, lreq;
+ input [VCN-1:0][1:0] wreq, ereq;
+
+ // ack to input buffers
+ output [VCN-1:0] sack, wack, nack, eack, lack;
+
+ // configuration to the crossbar
+ output [VCN-1:0][1:0][VCN-1:0] scfg, ncfg;
+ output [VCN-1:0][3:0][VCN-1:0] wcfg, ecfg, lcfg;
+
+ input rst_n; // active low global reset
+
+ // requests to arbiters
+`ifndef ENABLE_MRMA
+ wire [1:0][VCN-1:0][VCN-1:0] r2s, r2n; // shuffle the incoming request signals
+ wire [3:0][VCN-1:0][VCN-1:0] r2w, r2e, r2l;
+`else
+ wire [1:0][VCN-1:0] r2s, r2n; // shuffle the incoming request signals
+ wire [3:0][VCN-1:0] r2w, r2e, r2l;
+`endif
+
+ // ack from arbiters
+ wire [VCN-1:0][3:0] a2s, a2n, a2l;
+ wire [VCN-1:0][1:0] a2w, a2e;
+
+ // ack of the arbiters
+ wire [1:0][VCN-1:0] r2sa, r2na;
+ wire [3:0][VCN-1:0] r2wa, r2ea, r2la;
+
+`ifdef ENABLE_MRMA
+ wire [VCN:0] OPrst_n; // the buffered resets to avoid metastability
+ wire [VCN-1:0] SOPrdy, SOPblk; // OP ready and blocked status
+ wire [VCN-1:0] WOPrdy, WOPblk; // OP ready and blocked status
+ wire [VCN-1:0] NOPrdy, NOPblk; // OP ready and blocked status
+ wire [VCN-1:0] EOPrdy, EOPblk; // OP ready and blocked status
+ wire [VCN-1:0] LOPrdy, LOPblk; // OP ready and blocked status
+`endif
+
+ genvar i,j;
+
+ // wire shuffle
+ generate for(i=0; i
+ 20/09/2010 Modified for the Clos SDM router
+ 25/05/2011 Clean up for opensource.
+
+*/
+
+// the router structure definitions
+`include "define.v"
+
+module cm_alloc (/*AUTOARG*/
+`ifndef ENABLE_CRRD
+ s,
+`endif
+ // Outputs
+ sra, wra, nra, era, lra, scfg, ncfg, wcfg, ecfg, lcfg,
+ // Inputs
+ wr, er, sr, nr, lr
+ ) ;
+
+ //requests from all IMs
+ input [1:0] wr, er;
+ input [3:0] sr, nr, lr;
+
+ // ack to IMs
+ output sra, wra, nra, era, lra;
+
+ // the configuration to the local CM
+ output [1:0] scfg, ncfg;
+ output [3:0] wcfg, ecfg, lcfg;
+
+ // when using the asynchronous dispatching algorithm, status is sent back to IMs
+`ifndef ENABLE_CRRD
+ output [4:0] s;
+`endif
+
+ // arbiters
+ mutex_arb #(2)
+ SA ( .req ( {lr[0], nr[0]} ),
+ .gnt ( scfg )
+ );
+
+ mutex_arb #(4)
+ WA ( .req ( {lr[1], er[0], nr[1], sr[0]} ),
+ .gnt ( wcfg )
+ );
+
+ mutex_arb #(2)
+ NA ( .req ( {lr[2], sr[1]} ),
+ .gnt ( ncfg )
+ );
+
+ mutex_arb #(4)
+ EA ( .req ( {lr[3], nr[2], wr[0], sr[2]} ),
+ .gnt ( ecfg )
+ );
+
+ mutex_arb #(4)
+ LA ( .req ( {er[1], nr[3], wr[1], sr[3]} ),
+ .gnt ( lcfg )
+ );
+
+ // generating the ack
+ assign sra = |{wcfg[0], ncfg[0], ecfg[0], lcfg[0]};
+ assign wra = |{ecfg[1], lcfg[1]};
+ assign nra = |{scfg[0], wcfg[1], ecfg[2], lcfg[2]};
+ assign era = |{wcfg[2], lcfg[3]};
+ assign lra = |{scfg[1], wcfg[3], ncfg[1], ecfg[3]};
+
+ // generating the status
+`ifndef ENABLE_CRRD
+ assign s = {|lcfg, |ecfg, |ncfg, |wcfg, |scfg};
+`endif
+
+endmodule // cm_alloc
+
+
cm_alloc.v
Property changes :
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Index: output_buf.v
===================================================================
--- output_buf.v (nonexistent)
+++ output_buf.v (revision 57)
@@ -0,0 +1,143 @@
+/*
+ Asynchronous SDM NoC
+ (C)2011 Wei Song
+ Advanced Processor Technologies Group
+ Computer Science, the Univ. of Manchester, UK
+
+ Authors:
+ Wei Song wsong83@gmail.com
+
+ License: LGPL 3.0 or later
+
+ Output buffer for Wormhole/SDM routers.
+ *** SystemVerilog is used ***
+
+ References
+ * Lookahead pipelines
+ Montek Singh and Steven M. Nowick}, The design of high-performance dynamic asynchronous pipelines: lookahead style, IEEE Transactions on Very Large Scale Integration (VLSI) Systems, 2007(15), 1256-1269. doi:10.1109/TVLSI.2007.902205
+
+ History:
+ 26/05/2009 Initial version.
+ 20/09/2010 Supporting channel slicing and SDM using macro difinitions.
+ 22/10/2010 Parameterize the number of pipelines in output buffers.
+ 23/05/2011 Clean up for opensource.
+
+*/
+
+// the router structure definitions
+`include "define.v"
+
+// the out buffer
+module outp_buf (/*AUTOARG*/
+ // Outputs
+ o0, o1, o2, o3, o4, ia,
+ // Inputs
+ rst_n, i0, i1, i2, i3, i4, oa
+ );
+
+ parameter DW = 16; // the datawidth of a single virtual circuit
+ parameter PD = 2; // buffer depth
+ parameter SCN = DW/2; // the number of 1-of-4 sub-channel in each virtual circuit
+
+ input rst_n; // global reset, active low
+ input [SCN-1:0] i0, i1, i2, i3; // data input
+ output [SCN-1:0] o0, o1, o2, o3; // data output
+ wire [PD:0][SCN-1:0] pd0, pd1, pd2, pd3; // data wires for the internal pipeline satges
+`ifdef ENABLE_CHANNEL_SLICING
+ input [SCN-1:0] i4, oa; // eof and ack
+ output [SCN-1:0] o4, ia;
+ wire [SCN-1:0] ian_dly;
+ wire [PD:0][SCN-1:0] pd4, pda, pdan; // internal eof and ack
+`else
+ input i4, oa; // eof and ack
+ output o4, ia;
+ wire ian_dly;
+ wire [PD:0] pd4, pda, pdan; // internal eof and ack
+`endif
+
+
+//-------------------------- pipeline ---------------------------------------//
+ genvar i,j;
+ generate for(i=0; i
output_buf.v
Property changes :
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property