OpenCores
URL https://opencores.org/ocsvn/async_sdm_noc/async_sdm_noc/trunk

Subversion Repositories async_sdm_noc

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /async_sdm_noc/branches
    from Rev 72 to Rev 73
    Reverse comparison

Rev 72 → Rev 73

/clos_opt/clos_opt/src/input_buf.v
62,11 → 62,12
output [RN-1:0] deco; // the decoded routing requests
//-------------------------- control signals ---------------------------------------//
wire rten; // routing enable
wire rta; // the ack of the dec reg pipeline stage
wire frame_end; // identify the end of a frame
wire [7:0] pipe_xd, pipe_yd; // the target address from the incoming frame
wire [PD:0][SCN-1:0] pd0, pd1, pd2, pd3; // data wires for the internal pipeline satges
wire [5:0] raw_dec; // the routing decision from the comparator
wire [5:0] xy_dec; // the routing decision of the XY routing algorithm
wire [4:0] dec_reg; // the routing decision kept by C-gates
wire x_equal; // addr x = target x
wire rt_err; // route decoder error
73,10 → 74,14
`ifdef ENABLE_CHANNEL_SLICING
wire [SCN-1:0] deca; // the ack for routing requests
wire [SCN-1:0] pda1; // the ack for the 1st pipeline stage
wire [SCN-1:0] acko; // the ack from CB
wire [PD:0][SCN-1:0] pd4, pda, pdan, pd4an; // data wires for the internal pipeline stages
 
`else
wire deca; // the ack for routing requests
wire pda1; // the ack for the 1st pipeline stage
wire acko; // the ack from CB
wire [PD:0] pd4, pda, pdan, pd4an; // data wires for the internal pipeline satges
`endif // !`ifdef ENABLE_CHANNEL_SLICING
wire decan;
110,7 → 115,6
);
end // block: SC
 
`else // !`ifdef ENABLE_CHANNEL_SLICING
pipe4 #(.DW(DW))
139,13 → 143,19
end // block: DP
endgenerate
 
generate for(i=1; i<PD; i++) begin: DPA
generate for(i=2; i<PD; i++) begin: DPA
assign pdan[i] = rst_n ? ~(pda[i]|pd4[i-1]) : 0;
assign pd4an[i] = pdan[i];
end
 
if(PD>1)
assign ia = pda[PD]|pd4[PD-1];
else
assign ia = pda1;
endgenerate
 
assign ia = pda[PD]|pd4[PD-1];
//assign ia = pda[PD]|pd4[PD-1];
assign pd0[PD] = i0;
assign pd1[PD] = i1;
assign pd2[PD] = i2;
159,22 → 169,22
//---------------------------- route decoder related -------------------------- //
// fetch the x and y target
and Px_0 (pipe_xd[0], rten, pd0[1][0]);
and Px_1 (pipe_xd[1], rten, pd1[1][0]);
and Px_2 (pipe_xd[2], rten, pd2[1][0]);
and Px_3 (pipe_xd[3], rten, pd3[1][0]);
and Px_4 (pipe_xd[4], rten, pd0[1][1]);
and Px_5 (pipe_xd[5], rten, pd1[1][1]);
and Px_6 (pipe_xd[6], rten, pd2[1][1]);
and Px_7 (pipe_xd[7], rten, pd3[1][1]);
and Py_0 (pipe_yd[0], rten, pd0[1][2]);
and Py_1 (pipe_yd[1], rten, pd1[1][2]);
and Py_2 (pipe_yd[2], rten, pd2[1][2]);
and Py_3 (pipe_yd[3], rten, pd3[1][2]);
and Py_4 (pipe_yd[4], rten, pd0[1][3]);
and Py_5 (pipe_yd[5], rten, pd1[1][3]);
and Py_6 (pipe_yd[6], rten, pd2[1][3]);
and Py_7 (pipe_yd[7], rten, pd3[1][3]);
and Px_0 (pipe_xd[0], ~rta, pd0[1][0]);
and Px_1 (pipe_xd[1], ~rta, pd1[1][0]);
and Px_2 (pipe_xd[2], ~rta, pd2[1][0]);
and Px_3 (pipe_xd[3], ~rta, pd3[1][0]);
and Px_4 (pipe_xd[4], ~rta, pd0[1][1]);
and Px_5 (pipe_xd[5], ~rta, pd1[1][1]);
and Px_6 (pipe_xd[6], ~rta, pd2[1][1]);
and Px_7 (pipe_xd[7], ~rta, pd3[1][1]);
and Py_0 (pipe_yd[0], ~rta, pd0[1][2]);
and Py_1 (pipe_yd[1], ~rta, pd1[1][2]);
and Py_2 (pipe_yd[2], ~rta, pd2[1][2]);
and Py_3 (pipe_yd[3], ~rta, pd3[1][2]);
and Py_4 (pipe_yd[4], ~rta, pd0[1][3]);
and Py_5 (pipe_yd[5], ~rta, pd1[1][3]);
and Py_6 (pipe_yd[6], ~rta, pd2[1][3]);
and Py_7 (pipe_yd[7], ~rta, pd3[1][3]);
 
routing_decision // the comparator
186,16 → 196,21
,.decision ( raw_dec )
);
 
// keep the routing decision until the tail flit is received by all sub-channels
c2p C_RTD0 ( .b(raw_dec[0]), .a((~frame_end)&rst_n), .q(dec_reg[0]));
c2p C_RTD1 ( .b(raw_dec[1]), .a((~frame_end)&rst_n), .q(dec_reg[1]));
c2p C_RT_XEQ (.b(raw_dec[2]), .a((~frame_end)&rst_n), .q(x_equal) );
c2p C_RTD2 ( .b(raw_dec[3]), .a(x_equal), .q(dec_reg[2]));
c2p C_RTD3 ( .b(raw_dec[4]), .a(x_equal), .q(dec_reg[3]));
c2p C_RTD4 ( .b(raw_dec[5]), .a(x_equal), .q(dec_reg[4]));
// translate it into the XY dec; not QDI here as the circuit can be slow
assign xy_dec[1:0] = raw_dec[1:0];
assign xy_dec[4:2] = raw_dec[2] ? raw_dec[5:3] : 0;
// the decoded routing requests
pipen #(.DW(RN))
PDEC (
.d_in_a ( rta ),
.d_out ( dec_reg ),
.d_in ( raw_dec ),
.d_out_a ( xy_dec )
);
 
// generate the arbiter request signals
assign arb_r =
assign deco =
DIR == 0 ? {dec_reg[4],dec_reg[2],dec_reg[1],dec_reg[3]} : // south port
DIR == 1 ? {dec_reg[4],dec_reg[2]} : // west port
DIR == 2 ? {dec_reg[4],dec_reg[2],dec_reg[3],dec_reg[0]} : // north port
210,24 → 225,36
DIR == 3 ? |{dec_reg[0],dec_reg[1],dec_reg[2]} : // east port
|{dec_reg[4]} ; // local port
 
or IP_RTACK (rt_ack, rt_err, arb_ra);
 
// ------------------------ pipeline control ------------------------------ //
`ifdef ENABLE_CHANNEL_SLICING
for(j=0; j<SCN; j++) begin: SC
// the sub-channel controller
subc_ctl SCH_C (
.nack ( pdan[0][j] ),
.rt_rst ( rtrst[j] ),
.ai2cb ( oa[j] ),
.ack ( pda[1][j] ),
.eof ( pd4[0][j] ),
.rt_ra ( rt_ack ),
.rt_err ( rt_err ),
.rst_n ( rst_n )
);
ppc SCH_C (
.nack ( pdan[0][j] ),
.rt_rst ( rtrst[j] ),
.ai2cb ( oa[j] ),
.ack ( pda[1][j] ),
.eof ( pd4[0][j] ),
.rt_ra ( rt_ack ),
.rt_err ( rt_err ),
.rst_n ( rst_n )
);
assign pd4an[0][j] = pdan[0][j];
 
ppc SCH_C (
.deca ( deca[j] ),
.dia ( pda1[j] ),
.eof ( pd4[0][j] ),
.doa ( acko[j]|(pda[0][j]&rt_err) ), // to handle faulty frames
.dec ( rta )
);
`ifdef ENABLE_LOOKAHEAD
c2n CD (.q(acko[j]), .a(oa[j]), .b(pda[0][j])); // the C2N gate to avoid early withdrawal
`else
assign acko = ai2cb;
`endif
end // block: SC
`else // !`ifdef ENABLE_CHANNEL_SLICING
subc_ctl SCH_C (
/clos_opt/clos_opt/src/subc_ctl.v
0,0 → 1,68
/*
Asynchronous SDM NoC
(C)2011 Wei Song
Advanced Processor Technologies Group
Computer Science, the Univ. of Manchester, UK
Authors:
Wei Song wsong83@gmail.com
License: LGPL 3.0 or later
Sub-channel controller
References
* Lookahead pipelines
Montek Singh and Steven M. Nowick, The design of high-performance dynamic asynchronous pipelines: lookahead style, IEEE Transactions on Very Large Scale Integration (VLSI) Systems, 2007(15), 1256-1269. doi:10.1109/TVLSI.2007.902205
* Channel slicing
Wei Song and Doug Edwards, A low latency wormhole router for asynchronous on-chip networks, Asia and South Pacific Design Automation Conference, 2010, 437-443.
For the detail structure, please refer to Section 7.1.1 of the thesis:
Wei Song, Spatial parallelism in the routers of asynchronous on-chip networks, PhD thesis, the University of Manchester, 2011.
History:
05/05/2009 Initial version. <wsong83@gmail.com>
22/10/2010 Make it more timing robust. <wsong83@gmail.com>
24/05/2011 Clean up for opensource. <wsong83@gmail.com>
*/
 
// the router structure definitions
`include "define.v"
 
module subc_ctl (/*AUTOARG*/
// Outputs
nack, rt_rst,
// Inputs
ai2cb, ack, eof, rt_ra, rt_err, rst_n
);
 
input ai2cb; // the ack from output ports
input ack; // the ack from the last stage of the input buffer
input eof; // the eof bit from the last stage of the input buffer
input rt_ra; // ack from the switch allocator
input rt_err; // invalid router decision
input rst_n; // the global active low reset signal
output nack; // the ack to the last stage of the input buffer
output rt_rst; // the router reset signal
wire csc; // internal wires to handle the CSC of the STG
wire acko; // the ack signal after the C2N gate
wire fend; // the end of frame indicator
wire acken; // active low ack enable
`ifdef ENABLE_LOOKAHEAD
c2n CD (.q(acko), .a(ai2cb), .b(ack)); // the C2N gate to avoid early withdrawal
`else
assign acko = ai2cb;
`endif
c2p CEN (.b(eof), .a(acko), .q(fend));
c2 C (.a0(rt_ra), .a1(fend), .q(csc));
nand U1 ( acken, rt_ra, ~csc);
nor U2 ( rt_rst, fend, ~csc);
nor AG ( nack, acko&(~eof), acken|(rt_err&ack), ~rst_n);
endmodule // subc_ctl
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.