| 1 |
16 |
wsong0210 |
/*
|
| 2 |
|
|
Asynchronous SDM NoC
|
| 3 |
|
|
(C)2011 Wei Song
|
| 4 |
|
|
Advanced Processor Technologies Group
|
| 5 |
|
|
Computer Science, the Univ. of Manchester, UK
|
| 6 |
|
|
|
| 7 |
|
|
Authors:
|
| 8 |
|
|
Wei Song wsong83@gmail.com
|
| 9 |
|
|
|
| 10 |
|
|
License: LGPL 3.0 or later
|
| 11 |
|
|
|
| 12 |
|
|
Input buffer for Wormhole/SDM routers.
|
| 13 |
|
|
*** SystemVerilog is used ***
|
| 14 |
|
|
|
| 15 |
|
|
References
|
| 16 |
|
|
* Lookahead pipelines
|
| 17 |
17 |
wsong0210 |
Montek Singh and Steven M. Nowick, The design of high-performance dynamic asynchronous pipelines: lookahead style, IEEE Transactions on Very Large Scale Integration (VLSI) Systems, 2007(15), 1256-1269. doi:10.1109/TVLSI.2007.902205
|
| 18 |
16 |
wsong0210 |
* Channel slicing
|
| 19 |
|
|
Wei Song and Doug Edwards, A low latency wormhole router for asynchronous on-chip networks, Asia and South Pacific Design Automation Conference, 2010, 437-443.
|
| 20 |
|
|
* SDM
|
| 21 |
|
|
Wei Song and Doug Edwards, Asynchronous spatial division multiplexing router, Microprocessors and Microsystems, 2011(35), 85-97.
|
| 22 |
|
|
|
| 23 |
|
|
History:
|
| 24 |
|
|
05/05/2009 Initial version. <wsong83@gmail.com>
|
| 25 |
|
|
20/09/2010 Supporting channel slicing and SDM using macro difinitions. <wsong83@gmail.com>
|
| 26 |
|
|
24/05/2011 Clean up for opensource. <wsong83@gmail.com>
|
| 27 |
47 |
wsong0210 |
01/06/2011 Use the comp4 common comparator rather than the chain_comparator defined in this module. <wsong83@gmail.com>
|
| 28 |
|
|
|
| 29 |
16 |
wsong0210 |
*/
|
| 30 |
|
|
|
| 31 |
|
|
// the router structure definitions
|
| 32 |
|
|
`include "define.v"
|
| 33 |
|
|
|
| 34 |
|
|
module inp_buf (/*AUTOARG*/
|
| 35 |
|
|
// Outputs
|
| 36 |
|
|
o0, o1, o2, o3, o4, ia, arb_r,
|
| 37 |
|
|
// Inputs
|
| 38 |
|
|
rst_n, i0, i1, i2, i3, i4, oa, addrx, addry, arb_ra
|
| 39 |
|
|
);
|
| 40 |
|
|
|
| 41 |
|
|
//-------------------------- parameters ---------------------------------------//
|
| 42 |
|
|
parameter DIR = 0; // the port direction: south, west, north, east, and local
|
| 43 |
|
|
parameter RN = 4; // the number of request outputs, must match the direction
|
| 44 |
|
|
parameter DW = 16; // the data-width of the data-path
|
| 45 |
|
|
parameter PD = 2; // the depth of the input buffer
|
| 46 |
|
|
parameter SCN = DW/2;
|
| 47 |
|
|
|
| 48 |
|
|
//-------------------------- I/O ports ---------------------------------------//
|
| 49 |
|
|
input rst_n; // global reset, active low
|
| 50 |
|
|
input [SCN-1:0] i0, i1, i2, i3; // data input
|
| 51 |
|
|
output [SCN-1:0] o0, o1, o2, o3; // data output
|
| 52 |
|
|
`ifdef ENABLE_CHANNEL_SLICING
|
| 53 |
|
|
input [SCN-1:0] i4, oa;
|
| 54 |
|
|
output [SCN-1:0] o4, ia;
|
| 55 |
|
|
`else
|
| 56 |
|
|
input i4, oa;
|
| 57 |
|
|
output o4, ia;
|
| 58 |
|
|
`endif
|
| 59 |
|
|
input [7:0] addrx, addry;
|
| 60 |
|
|
output [RN-1:0] arb_r;
|
| 61 |
|
|
input arb_ra;
|
| 62 |
|
|
|
| 63 |
|
|
//-------------------------- control signals ---------------------------------------//
|
| 64 |
|
|
wire rten; // routing enable
|
| 65 |
|
|
wire frame_end; // identify the end of a frame
|
| 66 |
|
|
wire [7:0] pipe_xd, pipe_yd; // the target address from the incoming frame
|
| 67 |
|
|
wire [PD:0][SCN-1:0] pd0, pd1, pd2, pd3; // data wires for the internal pipeline satges
|
| 68 |
|
|
wire [5:0] raw_dec; // the routing decision from the comparator
|
| 69 |
|
|
wire [4:0] dec_reg; // the routing decision kept by C-gates
|
| 70 |
|
|
wire x_equal; // addr x = target x
|
| 71 |
|
|
wire rt_err; // route decoder error
|
| 72 |
|
|
wire rt_ack; // route build ack
|
| 73 |
|
|
|
| 74 |
|
|
`ifdef ENABLE_CHANNEL_SLICING
|
| 75 |
|
|
wire [SCN-1:0] rtrst; // rt decoder reset for each sub-channel
|
| 76 |
|
|
wire [PD:0][SCN-1:0] pd4, pda, pdan; // data wires for the internal pipeline stages
|
| 77 |
|
|
|
| 78 |
|
|
`else
|
| 79 |
|
|
wire rtrst; // rt decode reset
|
| 80 |
|
|
wire [PD:0] pd4, pda, pdan; // data wires for the internal pipeline satges
|
| 81 |
|
|
`endif // !`ifdef ENABLE_CHANNEL_SLICING
|
| 82 |
|
|
|
| 83 |
|
|
genvar i, j;
|
| 84 |
|
|
|
| 85 |
|
|
//------------------------- pipelines ------------------------------------- //
|
| 86 |
|
|
generate for(i=0; i<PD; i++) begin: DP
|
| 87 |
|
|
`ifdef ENABLE_CHANNEL_SLICING
|
| 88 |
|
|
for(j=0; j<SCN; j++) begin: SC
|
| 89 |
|
|
pipe4 #(.DW(2))
|
| 90 |
|
|
P (
|
| 91 |
|
|
.o0 ( pd0[i][j] ),
|
| 92 |
|
|
.o1 ( pd1[i][j] ),
|
| 93 |
|
|
.o2 ( pd2[i][j] ),
|
| 94 |
|
|
.o3 ( pd3[i][j] ),
|
| 95 |
|
|
.o4 ( pd4[i][j] ),
|
| 96 |
|
|
.ia ( pda[i+1][j] ),
|
| 97 |
|
|
.i0 ( pd0[i+1][j] ),
|
| 98 |
|
|
.i1 ( pd1[i+1][j] ),
|
| 99 |
|
|
.i2 ( pd2[i+1][j] ),
|
| 100 |
|
|
.i3 ( pd3[i+1][j] ),
|
| 101 |
|
|
.i4 ( pd4[i+1][j] ),
|
| 102 |
|
|
.oa ( pdan[i][j] )
|
| 103 |
|
|
);
|
| 104 |
|
|
end // block: SC
|
| 105 |
|
|
|
| 106 |
|
|
`else // !`ifdef ENABLE_CHANNEL_SLICING
|
| 107 |
|
|
pipe4 #(.DW(DW))
|
| 108 |
|
|
P (
|
| 109 |
|
|
.o0 ( pd0[i] ),
|
| 110 |
|
|
.o1 ( pd1[i] ),
|
| 111 |
|
|
.o2 ( pd2[i] ),
|
| 112 |
|
|
.o3 ( pd3[i] ),
|
| 113 |
|
|
.o4 ( pd4[i] ),
|
| 114 |
|
|
.ia ( pda[i+1] ),
|
| 115 |
|
|
.i0 ( pd0[i+1] ),
|
| 116 |
|
|
.i1 ( pd1[i+1] ),
|
| 117 |
|
|
.i2 ( pd2[i+1] ),
|
| 118 |
|
|
.i3 ( pd3[i+1] ),
|
| 119 |
|
|
.i4 ( pd4[i+1] ),
|
| 120 |
|
|
.oa ( pdan[i] )
|
| 121 |
|
|
);
|
| 122 |
|
|
`endif // !`ifdef ENABLE_CHANNEL_SLICING
|
| 123 |
|
|
end // block: DP
|
| 124 |
|
|
endgenerate
|
| 125 |
|
|
|
| 126 |
|
|
generate for(i=1; i<PD; i++) begin: DPA
|
| 127 |
|
|
assign pdan[i] = rst_n ? ~(pda[i]|pd4[i-1]) : 0;
|
| 128 |
|
|
end
|
| 129 |
|
|
endgenerate
|
| 130 |
|
|
|
| 131 |
|
|
assign ia = pda[PD]|pd4[PD-1];
|
| 132 |
|
|
assign pd0[PD] = i0;
|
| 133 |
|
|
assign pd1[PD] = i1;
|
| 134 |
|
|
assign pd2[PD] = i2;
|
| 135 |
|
|
assign pd3[PD] = i3;
|
| 136 |
|
|
assign pd4[PD] = i4;
|
| 137 |
|
|
assign o0 = pd0[0];
|
| 138 |
|
|
assign o1 = pd1[0];
|
| 139 |
|
|
assign o2 = pd2[0];
|
| 140 |
|
|
assign o3 = pd3[0];
|
| 141 |
|
|
assign o4 = pd4[0];
|
| 142 |
|
|
|
| 143 |
|
|
//---------------------------- route decoder related -------------------------- //
|
| 144 |
|
|
// fetch the x and y target
|
| 145 |
|
|
and Px_0 (pipe_xd[0], rten, pd0[1][0]);
|
| 146 |
|
|
and Px_1 (pipe_xd[1], rten, pd1[1][0]);
|
| 147 |
|
|
and Px_2 (pipe_xd[2], rten, pd2[1][0]);
|
| 148 |
|
|
and Px_3 (pipe_xd[3], rten, pd3[1][0]);
|
| 149 |
|
|
and Px_4 (pipe_xd[4], rten, pd0[1][1]);
|
| 150 |
|
|
and Px_5 (pipe_xd[5], rten, pd1[1][1]);
|
| 151 |
|
|
and Px_6 (pipe_xd[6], rten, pd2[1][1]);
|
| 152 |
|
|
and Px_7 (pipe_xd[7], rten, pd3[1][1]);
|
| 153 |
|
|
and Py_0 (pipe_yd[0], rten, pd0[1][2]);
|
| 154 |
|
|
and Py_1 (pipe_yd[1], rten, pd1[1][2]);
|
| 155 |
|
|
and Py_2 (pipe_yd[2], rten, pd2[1][2]);
|
| 156 |
|
|
and Py_3 (pipe_yd[3], rten, pd3[1][2]);
|
| 157 |
|
|
and Py_4 (pipe_yd[4], rten, pd0[1][3]);
|
| 158 |
|
|
and Py_5 (pipe_yd[5], rten, pd1[1][3]);
|
| 159 |
|
|
and Py_6 (pipe_yd[6], rten, pd2[1][3]);
|
| 160 |
|
|
and Py_7 (pipe_yd[7], rten, pd3[1][3]);
|
| 161 |
|
|
|
| 162 |
|
|
|
| 163 |
|
|
routing_decision // the comparator
|
| 164 |
|
|
RTD(
|
| 165 |
|
|
.addrx ( addrx )
|
| 166 |
|
|
,.addry ( addry )
|
| 167 |
|
|
,.pipe_xd ( pipe_xd )
|
| 168 |
|
|
,.pipe_yd ( pipe_yd )
|
| 169 |
|
|
,.decision ( raw_dec )
|
| 170 |
|
|
);
|
| 171 |
|
|
|
| 172 |
|
|
// keep the routing decision until the tail flit is received by all sub-channels
|
| 173 |
28 |
wsong0210 |
c2p C_RTD0 ( .b(raw_dec[0]), .a((~frame_end)&rst_n), .q(dec_reg[0]));
|
| 174 |
|
|
c2p C_RTD1 ( .b(raw_dec[1]), .a((~frame_end)&rst_n), .q(dec_reg[1]));
|
| 175 |
|
|
c2p C_RT_XEQ (.b(raw_dec[2]), .a((~frame_end)&rst_n), .q(x_equal) );
|
| 176 |
|
|
c2p C_RTD2 ( .b(raw_dec[3]), .a(x_equal), .q(dec_reg[2]));
|
| 177 |
|
|
c2p C_RTD3 ( .b(raw_dec[4]), .a(x_equal), .q(dec_reg[3]));
|
| 178 |
|
|
c2p C_RTD4 ( .b(raw_dec[5]), .a(x_equal), .q(dec_reg[4]));
|
| 179 |
16 |
wsong0210 |
|
| 180 |
|
|
// generate the arbiter request signals
|
| 181 |
|
|
assign arb_r =
|
| 182 |
|
|
DIR == 0 ? {dec_reg[4],dec_reg[2],dec_reg[1],dec_reg[3]} : // south port
|
| 183 |
|
|
DIR == 1 ? {dec_reg[4],dec_reg[2]} : // west port
|
| 184 |
|
|
DIR == 2 ? {dec_reg[4],dec_reg[2],dec_reg[3],dec_reg[0]} : // north port
|
| 185 |
|
|
DIR == 3 ? {dec_reg[4],dec_reg[3]} : // east port
|
| 186 |
|
|
{dec_reg[2],dec_reg[1],dec_reg[3],dec_reg[0]} ; // local port
|
| 187 |
|
|
|
| 188 |
|
|
|
| 189 |
|
|
assign rt_err =
|
| 190 |
|
|
DIR == 0 ? |{dec_reg[0]} : // south port
|
| 191 |
|
|
DIR == 1 ? |{dec_reg[0],dec_reg[1],dec_reg[3]} : // west port
|
| 192 |
|
|
DIR == 2 ? |{dec_reg[1]} : // north port
|
| 193 |
|
|
DIR == 3 ? |{dec_reg[0],dec_reg[1],dec_reg[2]} : // east port
|
| 194 |
|
|
|{dec_reg[4]} ; // local port
|
| 195 |
|
|
|
| 196 |
|
|
or IP_RTACK (rt_ack, rt_err, arb_ra);
|
| 197 |
|
|
|
| 198 |
|
|
// ------------------------ pipeline control ------------------------------ //
|
| 199 |
|
|
|
| 200 |
|
|
`ifdef ENABLE_CHANNEL_SLICING
|
| 201 |
|
|
for(j=0; j<SCN; j++) begin: SC
|
| 202 |
|
|
// the sub-channel controller
|
| 203 |
|
|
subc_ctl SCH_C (
|
| 204 |
|
|
.nack ( pdan[0][j] ),
|
| 205 |
|
|
.rt_rst ( rtrst[j] ),
|
| 206 |
|
|
.ai2cb ( oa[j] ),
|
| 207 |
|
|
.ack ( pda[1][j] ),
|
| 208 |
|
|
.eof ( pd4[0][j] ),
|
| 209 |
|
|
.rt_ra ( rt_ack ),
|
| 210 |
|
|
.rt_err ( rt_err ),
|
| 211 |
|
|
.rst_n ( rst_n )
|
| 212 |
|
|
);
|
| 213 |
|
|
end // block: SC
|
| 214 |
|
|
`else // !`ifdef ENABLE_CHANNEL_SLICING
|
| 215 |
|
|
subc_ctl SCH_C (
|
| 216 |
|
|
.nack ( pdan[0] ),
|
| 217 |
|
|
.rt_rst ( rtrst ),
|
| 218 |
|
|
.ai2cb ( oa ),
|
| 219 |
|
|
.ack ( pda[1] ),
|
| 220 |
|
|
.eof ( pd4[0] ),
|
| 221 |
|
|
.rt_ra ( rt_ack ),
|
| 222 |
|
|
.rt_err ( rt_err ),
|
| 223 |
|
|
.rst_n ( rst_n )
|
| 224 |
|
|
);
|
| 225 |
|
|
`endif // !`ifdef ENABLE_CHANNEL_SLICING
|
| 226 |
|
|
|
| 227 |
|
|
// the router controller part
|
| 228 |
|
|
assign rten = ~rt_ack;
|
| 229 |
|
|
assign frame_end = &rtrst;
|
| 230 |
|
|
|
| 231 |
|
|
endmodule // inp_buf
|
| 232 |
|
|
|
| 233 |
|
|
|
| 234 |
|
|
// the routing decision making procedure, comparitors
|
| 235 |
|
|
module routing_decision (
|
| 236 |
|
|
addrx
|
| 237 |
|
|
,addry
|
| 238 |
|
|
,pipe_xd
|
| 239 |
|
|
,pipe_yd
|
| 240 |
|
|
,decision
|
| 241 |
|
|
);
|
| 242 |
|
|
|
| 243 |
|
|
// compare with (2,3)
|
| 244 |
|
|
input [7:0] addrx;
|
| 245 |
|
|
input [7:0] addry;
|
| 246 |
|
|
|
| 247 |
|
|
input [7:0] pipe_xd;
|
| 248 |
|
|
input [7:0] pipe_yd;
|
| 249 |
|
|
output [5:0] decision;
|
| 250 |
|
|
|
| 251 |
|
|
wire [2:0] x_cmp [1:0];
|
| 252 |
|
|
wire [2:0] y_cmp [1:0];
|
| 253 |
|
|
|
| 254 |
47 |
wsong0210 |
comp4 X0 ( .a(pipe_xd[3:0]), .b(addrx[3:0]), .q(x_cmp[0]));
|
| 255 |
|
|
comp4 X1 ( .a(pipe_xd[7:4]), .b(addrx[7:4]), .q(x_cmp[1]));
|
| 256 |
|
|
comp4 Y0 ( .a(pipe_yd[3:0]), .b(addry[3:0]), .q(y_cmp[0]));
|
| 257 |
|
|
comp4 Y1 ( .a(pipe_yd[7:4]), .b(addry[7:4]), .q(y_cmp[1]));
|
| 258 |
16 |
wsong0210 |
|
| 259 |
|
|
assign decision[0] = x_cmp[1][0] | (x_cmp[1][2]&x_cmp[0][0]); // frame x > addr x
|
| 260 |
|
|
assign decision[1] = x_cmp[1][1] | (x_cmp[1][2]&x_cmp[0][1]); // frame x < addr x
|
| 261 |
|
|
assign decision[2] = x_cmp[1][2] & x_cmp[0][2]; // frame x = addr x
|
| 262 |
|
|
assign decision[3] = y_cmp[1][0] | (y_cmp[1][2]&y_cmp[0][0]); // frame y > addr y
|
| 263 |
|
|
assign decision[4] = y_cmp[1][1] | (y_cmp[1][2]&y_cmp[0][1]); // frame y < addr y
|
| 264 |
|
|
assign decision[5] = y_cmp[1][2] & y_cmp[0][2]; // frame y = addr y
|
| 265 |
|
|
|
| 266 |
|
|
endmodule // routing_decision
|