1 |
16 |
wsong0210 |
/*
|
2 |
|
|
Asynchronous SDM NoC
|
3 |
|
|
(C)2011 Wei Song
|
4 |
|
|
Advanced Processor Technologies Group
|
5 |
|
|
Computer Science, the Univ. of Manchester, UK
|
6 |
|
|
|
7 |
|
|
Authors:
|
8 |
|
|
Wei Song wsong83@gmail.com
|
9 |
|
|
|
10 |
|
|
License: LGPL 3.0 or later
|
11 |
|
|
|
12 |
|
|
Input buffer for Wormhole/SDM routers.
|
13 |
|
|
*** SystemVerilog is used ***
|
14 |
|
|
|
15 |
|
|
References
|
16 |
|
|
* Lookahead pipelines
|
17 |
17 |
wsong0210 |
Montek Singh and Steven M. Nowick, The design of high-performance dynamic asynchronous pipelines: lookahead style, IEEE Transactions on Very Large Scale Integration (VLSI) Systems, 2007(15), 1256-1269. doi:10.1109/TVLSI.2007.902205
|
18 |
16 |
wsong0210 |
* Channel slicing
|
19 |
|
|
Wei Song and Doug Edwards, A low latency wormhole router for asynchronous on-chip networks, Asia and South Pacific Design Automation Conference, 2010, 437-443.
|
20 |
|
|
* SDM
|
21 |
|
|
Wei Song and Doug Edwards, Asynchronous spatial division multiplexing router, Microprocessors and Microsystems, 2011(35), 85-97.
|
22 |
|
|
|
23 |
|
|
History:
|
24 |
|
|
05/05/2009 Initial version. <wsong83@gmail.com>
|
25 |
|
|
20/09/2010 Supporting channel slicing and SDM using macro difinitions. <wsong83@gmail.com>
|
26 |
|
|
24/05/2011 Clean up for opensource. <wsong83@gmail.com>
|
27 |
47 |
wsong0210 |
01/06/2011 Use the comp4 common comparator rather than the chain_comparator defined in this module. <wsong83@gmail.com>
|
28 |
|
|
|
29 |
16 |
wsong0210 |
*/
|
30 |
|
|
|
31 |
|
|
// the router structure definitions
|
32 |
|
|
`include "define.v"
|
33 |
|
|
|
34 |
|
|
module inp_buf (/*AUTOARG*/
|
35 |
|
|
// Outputs
|
36 |
|
|
o0, o1, o2, o3, o4, ia, arb_r,
|
37 |
|
|
// Inputs
|
38 |
|
|
rst_n, i0, i1, i2, i3, i4, oa, addrx, addry, arb_ra
|
39 |
|
|
);
|
40 |
|
|
|
41 |
|
|
//-------------------------- parameters ---------------------------------------//
|
42 |
|
|
parameter DIR = 0; // the port direction: south, west, north, east, and local
|
43 |
|
|
parameter RN = 4; // the number of request outputs, must match the direction
|
44 |
|
|
parameter DW = 16; // the data-width of the data-path
|
45 |
|
|
parameter PD = 2; // the depth of the input buffer
|
46 |
|
|
parameter SCN = DW/2;
|
47 |
|
|
|
48 |
|
|
//-------------------------- I/O ports ---------------------------------------//
|
49 |
|
|
input rst_n; // global reset, active low
|
50 |
|
|
input [SCN-1:0] i0, i1, i2, i3; // data input
|
51 |
|
|
output [SCN-1:0] o0, o1, o2, o3; // data output
|
52 |
|
|
`ifdef ENABLE_CHANNEL_SLICING
|
53 |
|
|
input [SCN-1:0] i4, oa;
|
54 |
|
|
output [SCN-1:0] o4, ia;
|
55 |
|
|
`else
|
56 |
|
|
input i4, oa;
|
57 |
|
|
output o4, ia;
|
58 |
|
|
`endif
|
59 |
|
|
input [7:0] addrx, addry;
|
60 |
|
|
output [RN-1:0] arb_r;
|
61 |
|
|
input arb_ra;
|
62 |
|
|
|
63 |
|
|
//-------------------------- control signals ---------------------------------------//
|
64 |
|
|
wire rten; // routing enable
|
65 |
|
|
wire frame_end; // identify the end of a frame
|
66 |
|
|
wire [7:0] pipe_xd, pipe_yd; // the target address from the incoming frame
|
67 |
|
|
wire [PD:0][SCN-1:0] pd0, pd1, pd2, pd3; // data wires for the internal pipeline satges
|
68 |
|
|
wire [5:0] raw_dec; // the routing decision from the comparator
|
69 |
|
|
wire [4:0] dec_reg; // the routing decision kept by C-gates
|
70 |
|
|
wire x_equal; // addr x = target x
|
71 |
|
|
wire rt_err; // route decoder error
|
72 |
|
|
wire rt_ack; // route build ack
|
73 |
|
|
|
74 |
|
|
`ifdef ENABLE_CHANNEL_SLICING
|
75 |
|
|
wire [SCN-1:0] rtrst; // rt decoder reset for each sub-channel
|
76 |
|
|
wire [PD:0][SCN-1:0] pd4, pda, pdan; // data wires for the internal pipeline stages
|
77 |
|
|
|
78 |
|
|
`else
|
79 |
|
|
wire rtrst; // rt decode reset
|
80 |
|
|
wire [PD:0] pd4, pda, pdan; // data wires for the internal pipeline satges
|
81 |
|
|
`endif // !`ifdef ENABLE_CHANNEL_SLICING
|
82 |
|
|
|
83 |
|
|
genvar i, j;
|
84 |
|
|
|
85 |
|
|
//------------------------- pipelines ------------------------------------- //
|
86 |
|
|
generate for(i=0; i<PD; i++) begin: DP
|
87 |
|
|
`ifdef ENABLE_CHANNEL_SLICING
|
88 |
|
|
for(j=0; j<SCN; j++) begin: SC
|
89 |
|
|
pipe4 #(.DW(2))
|
90 |
|
|
P (
|
91 |
|
|
.o0 ( pd0[i][j] ),
|
92 |
|
|
.o1 ( pd1[i][j] ),
|
93 |
|
|
.o2 ( pd2[i][j] ),
|
94 |
|
|
.o3 ( pd3[i][j] ),
|
95 |
|
|
.o4 ( pd4[i][j] ),
|
96 |
|
|
.ia ( pda[i+1][j] ),
|
97 |
|
|
.i0 ( pd0[i+1][j] ),
|
98 |
|
|
.i1 ( pd1[i+1][j] ),
|
99 |
|
|
.i2 ( pd2[i+1][j] ),
|
100 |
|
|
.i3 ( pd3[i+1][j] ),
|
101 |
|
|
.i4 ( pd4[i+1][j] ),
|
102 |
|
|
.oa ( pdan[i][j] )
|
103 |
|
|
);
|
104 |
|
|
end // block: SC
|
105 |
|
|
|
106 |
|
|
`else // !`ifdef ENABLE_CHANNEL_SLICING
|
107 |
|
|
pipe4 #(.DW(DW))
|
108 |
|
|
P (
|
109 |
|
|
.o0 ( pd0[i] ),
|
110 |
|
|
.o1 ( pd1[i] ),
|
111 |
|
|
.o2 ( pd2[i] ),
|
112 |
|
|
.o3 ( pd3[i] ),
|
113 |
|
|
.o4 ( pd4[i] ),
|
114 |
|
|
.ia ( pda[i+1] ),
|
115 |
|
|
.i0 ( pd0[i+1] ),
|
116 |
|
|
.i1 ( pd1[i+1] ),
|
117 |
|
|
.i2 ( pd2[i+1] ),
|
118 |
|
|
.i3 ( pd3[i+1] ),
|
119 |
|
|
.i4 ( pd4[i+1] ),
|
120 |
|
|
.oa ( pdan[i] )
|
121 |
|
|
);
|
122 |
|
|
`endif // !`ifdef ENABLE_CHANNEL_SLICING
|
123 |
|
|
end // block: DP
|
124 |
|
|
endgenerate
|
125 |
|
|
|
126 |
|
|
generate for(i=1; i<PD; i++) begin: DPA
|
127 |
|
|
assign pdan[i] = rst_n ? ~(pda[i]|pd4[i-1]) : 0;
|
128 |
|
|
end
|
129 |
|
|
endgenerate
|
130 |
|
|
|
131 |
|
|
assign ia = pda[PD]|pd4[PD-1];
|
132 |
|
|
assign pd0[PD] = i0;
|
133 |
|
|
assign pd1[PD] = i1;
|
134 |
|
|
assign pd2[PD] = i2;
|
135 |
|
|
assign pd3[PD] = i3;
|
136 |
|
|
assign pd4[PD] = i4;
|
137 |
|
|
assign o0 = pd0[0];
|
138 |
|
|
assign o1 = pd1[0];
|
139 |
|
|
assign o2 = pd2[0];
|
140 |
|
|
assign o3 = pd3[0];
|
141 |
|
|
assign o4 = pd4[0];
|
142 |
|
|
|
143 |
|
|
//---------------------------- route decoder related -------------------------- //
|
144 |
|
|
// fetch the x and y target
|
145 |
|
|
and Px_0 (pipe_xd[0], rten, pd0[1][0]);
|
146 |
|
|
and Px_1 (pipe_xd[1], rten, pd1[1][0]);
|
147 |
|
|
and Px_2 (pipe_xd[2], rten, pd2[1][0]);
|
148 |
|
|
and Px_3 (pipe_xd[3], rten, pd3[1][0]);
|
149 |
|
|
and Px_4 (pipe_xd[4], rten, pd0[1][1]);
|
150 |
|
|
and Px_5 (pipe_xd[5], rten, pd1[1][1]);
|
151 |
|
|
and Px_6 (pipe_xd[6], rten, pd2[1][1]);
|
152 |
|
|
and Px_7 (pipe_xd[7], rten, pd3[1][1]);
|
153 |
|
|
and Py_0 (pipe_yd[0], rten, pd0[1][2]);
|
154 |
|
|
and Py_1 (pipe_yd[1], rten, pd1[1][2]);
|
155 |
|
|
and Py_2 (pipe_yd[2], rten, pd2[1][2]);
|
156 |
|
|
and Py_3 (pipe_yd[3], rten, pd3[1][2]);
|
157 |
|
|
and Py_4 (pipe_yd[4], rten, pd0[1][3]);
|
158 |
|
|
and Py_5 (pipe_yd[5], rten, pd1[1][3]);
|
159 |
|
|
and Py_6 (pipe_yd[6], rten, pd2[1][3]);
|
160 |
|
|
and Py_7 (pipe_yd[7], rten, pd3[1][3]);
|
161 |
|
|
|
162 |
|
|
|
163 |
|
|
routing_decision // the comparator
|
164 |
|
|
RTD(
|
165 |
|
|
.addrx ( addrx )
|
166 |
|
|
,.addry ( addry )
|
167 |
|
|
,.pipe_xd ( pipe_xd )
|
168 |
|
|
,.pipe_yd ( pipe_yd )
|
169 |
|
|
,.decision ( raw_dec )
|
170 |
|
|
);
|
171 |
|
|
|
172 |
|
|
// keep the routing decision until the tail flit is received by all sub-channels
|
173 |
28 |
wsong0210 |
c2p C_RTD0 ( .b(raw_dec[0]), .a((~frame_end)&rst_n), .q(dec_reg[0]));
|
174 |
|
|
c2p C_RTD1 ( .b(raw_dec[1]), .a((~frame_end)&rst_n), .q(dec_reg[1]));
|
175 |
|
|
c2p C_RT_XEQ (.b(raw_dec[2]), .a((~frame_end)&rst_n), .q(x_equal) );
|
176 |
|
|
c2p C_RTD2 ( .b(raw_dec[3]), .a(x_equal), .q(dec_reg[2]));
|
177 |
|
|
c2p C_RTD3 ( .b(raw_dec[4]), .a(x_equal), .q(dec_reg[3]));
|
178 |
|
|
c2p C_RTD4 ( .b(raw_dec[5]), .a(x_equal), .q(dec_reg[4]));
|
179 |
16 |
wsong0210 |
|
180 |
|
|
// generate the arbiter request signals
|
181 |
|
|
assign arb_r =
|
182 |
|
|
DIR == 0 ? {dec_reg[4],dec_reg[2],dec_reg[1],dec_reg[3]} : // south port
|
183 |
|
|
DIR == 1 ? {dec_reg[4],dec_reg[2]} : // west port
|
184 |
|
|
DIR == 2 ? {dec_reg[4],dec_reg[2],dec_reg[3],dec_reg[0]} : // north port
|
185 |
|
|
DIR == 3 ? {dec_reg[4],dec_reg[3]} : // east port
|
186 |
|
|
{dec_reg[2],dec_reg[1],dec_reg[3],dec_reg[0]} ; // local port
|
187 |
|
|
|
188 |
|
|
|
189 |
|
|
assign rt_err =
|
190 |
|
|
DIR == 0 ? |{dec_reg[0]} : // south port
|
191 |
|
|
DIR == 1 ? |{dec_reg[0],dec_reg[1],dec_reg[3]} : // west port
|
192 |
|
|
DIR == 2 ? |{dec_reg[1]} : // north port
|
193 |
|
|
DIR == 3 ? |{dec_reg[0],dec_reg[1],dec_reg[2]} : // east port
|
194 |
|
|
|{dec_reg[4]} ; // local port
|
195 |
|
|
|
196 |
|
|
or IP_RTACK (rt_ack, rt_err, arb_ra);
|
197 |
|
|
|
198 |
|
|
// ------------------------ pipeline control ------------------------------ //
|
199 |
|
|
|
200 |
|
|
`ifdef ENABLE_CHANNEL_SLICING
|
201 |
|
|
for(j=0; j<SCN; j++) begin: SC
|
202 |
|
|
// the sub-channel controller
|
203 |
|
|
subc_ctl SCH_C (
|
204 |
|
|
.nack ( pdan[0][j] ),
|
205 |
|
|
.rt_rst ( rtrst[j] ),
|
206 |
|
|
.ai2cb ( oa[j] ),
|
207 |
|
|
.ack ( pda[1][j] ),
|
208 |
|
|
.eof ( pd4[0][j] ),
|
209 |
|
|
.rt_ra ( rt_ack ),
|
210 |
|
|
.rt_err ( rt_err ),
|
211 |
|
|
.rst_n ( rst_n )
|
212 |
|
|
);
|
213 |
|
|
end // block: SC
|
214 |
|
|
`else // !`ifdef ENABLE_CHANNEL_SLICING
|
215 |
|
|
subc_ctl SCH_C (
|
216 |
|
|
.nack ( pdan[0] ),
|
217 |
|
|
.rt_rst ( rtrst ),
|
218 |
|
|
.ai2cb ( oa ),
|
219 |
|
|
.ack ( pda[1] ),
|
220 |
|
|
.eof ( pd4[0] ),
|
221 |
|
|
.rt_ra ( rt_ack ),
|
222 |
|
|
.rt_err ( rt_err ),
|
223 |
|
|
.rst_n ( rst_n )
|
224 |
|
|
);
|
225 |
|
|
`endif // !`ifdef ENABLE_CHANNEL_SLICING
|
226 |
|
|
|
227 |
|
|
// the router controller part
|
228 |
|
|
assign rten = ~rt_ack;
|
229 |
|
|
assign frame_end = &rtrst;
|
230 |
|
|
|
231 |
|
|
endmodule // inp_buf
|
232 |
|
|
|
233 |
|
|
|
234 |
|
|
// the routing decision making procedure, comparitors
|
235 |
|
|
module routing_decision (
|
236 |
|
|
addrx
|
237 |
|
|
,addry
|
238 |
|
|
,pipe_xd
|
239 |
|
|
,pipe_yd
|
240 |
|
|
,decision
|
241 |
|
|
);
|
242 |
|
|
|
243 |
|
|
// compare with (2,3)
|
244 |
|
|
input [7:0] addrx;
|
245 |
|
|
input [7:0] addry;
|
246 |
|
|
|
247 |
|
|
input [7:0] pipe_xd;
|
248 |
|
|
input [7:0] pipe_yd;
|
249 |
|
|
output [5:0] decision;
|
250 |
|
|
|
251 |
|
|
wire [2:0] x_cmp [1:0];
|
252 |
|
|
wire [2:0] y_cmp [1:0];
|
253 |
|
|
|
254 |
47 |
wsong0210 |
comp4 X0 ( .a(pipe_xd[3:0]), .b(addrx[3:0]), .q(x_cmp[0]));
|
255 |
|
|
comp4 X1 ( .a(pipe_xd[7:4]), .b(addrx[7:4]), .q(x_cmp[1]));
|
256 |
|
|
comp4 Y0 ( .a(pipe_yd[3:0]), .b(addry[3:0]), .q(y_cmp[0]));
|
257 |
|
|
comp4 Y1 ( .a(pipe_yd[7:4]), .b(addry[7:4]), .q(y_cmp[1]));
|
258 |
16 |
wsong0210 |
|
259 |
|
|
assign decision[0] = x_cmp[1][0] | (x_cmp[1][2]&x_cmp[0][0]); // frame x > addr x
|
260 |
|
|
assign decision[1] = x_cmp[1][1] | (x_cmp[1][2]&x_cmp[0][1]); // frame x < addr x
|
261 |
|
|
assign decision[2] = x_cmp[1][2] & x_cmp[0][2]; // frame x = addr x
|
262 |
|
|
assign decision[3] = y_cmp[1][0] | (y_cmp[1][2]&y_cmp[0][0]); // frame y > addr y
|
263 |
|
|
assign decision[4] = y_cmp[1][1] | (y_cmp[1][2]&y_cmp[0][1]); // frame y < addr y
|
264 |
|
|
assign decision[5] = y_cmp[1][2] & y_cmp[0][2]; // frame y = addr y
|
265 |
|
|
|
266 |
|
|
endmodule // routing_decision
|