1 |
280 |
jeremybenn |
/* Interchange heuristics and transform for loop interchange on
|
2 |
|
|
polyhedral representation.
|
3 |
|
|
|
4 |
|
|
Copyright (C) 2009 Free Software Foundation, Inc.
|
5 |
|
|
Contributed by Sebastian Pop <sebastian.pop@amd.com> and
|
6 |
|
|
Harsha Jagasia <harsha.jagasia@amd.com>.
|
7 |
|
|
|
8 |
|
|
This file is part of GCC.
|
9 |
|
|
|
10 |
|
|
GCC is free software; you can redistribute it and/or modify
|
11 |
|
|
it under the terms of the GNU General Public License as published by
|
12 |
|
|
the Free Software Foundation; either version 3, or (at your option)
|
13 |
|
|
any later version.
|
14 |
|
|
|
15 |
|
|
GCC is distributed in the hope that it will be useful,
|
16 |
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17 |
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18 |
|
|
GNU General Public License for more details.
|
19 |
|
|
|
20 |
|
|
You should have received a copy of the GNU General Public License
|
21 |
|
|
along with GCC; see the file COPYING3. If not see
|
22 |
|
|
<http://www.gnu.org/licenses/>. */
|
23 |
|
|
#include "config.h"
|
24 |
|
|
#include "system.h"
|
25 |
|
|
#include "coretypes.h"
|
26 |
|
|
#include "tm.h"
|
27 |
|
|
#include "ggc.h"
|
28 |
|
|
#include "tree.h"
|
29 |
|
|
#include "rtl.h"
|
30 |
|
|
#include "output.h"
|
31 |
|
|
#include "basic-block.h"
|
32 |
|
|
#include "diagnostic.h"
|
33 |
|
|
#include "tree-flow.h"
|
34 |
|
|
#include "toplev.h"
|
35 |
|
|
#include "tree-dump.h"
|
36 |
|
|
#include "timevar.h"
|
37 |
|
|
#include "cfgloop.h"
|
38 |
|
|
#include "tree-chrec.h"
|
39 |
|
|
#include "tree-data-ref.h"
|
40 |
|
|
#include "tree-scalar-evolution.h"
|
41 |
|
|
#include "tree-pass.h"
|
42 |
|
|
#include "domwalk.h"
|
43 |
|
|
#include "value-prof.h"
|
44 |
|
|
#include "pointer-set.h"
|
45 |
|
|
#include "gimple.h"
|
46 |
|
|
#include "params.h"
|
47 |
|
|
|
48 |
|
|
#ifdef HAVE_cloog
|
49 |
|
|
#include "cloog/cloog.h"
|
50 |
|
|
#include "ppl_c.h"
|
51 |
|
|
#include "sese.h"
|
52 |
|
|
#include "graphite-ppl.h"
|
53 |
|
|
#include "graphite.h"
|
54 |
|
|
#include "graphite-poly.h"
|
55 |
|
|
|
56 |
|
|
/* Builds a linear expression, of dimension DIM, representing PDR's
|
57 |
|
|
memory access:
|
58 |
|
|
|
59 |
|
|
L = r_{n}*r_{n-1}*...*r_{1}*s_{0} + ... + r_{n}*s_{n-1} + s_{n}.
|
60 |
|
|
|
61 |
|
|
For an array A[10][20] with two subscript locations s0 and s1, the
|
62 |
|
|
linear memory access is 20 * s0 + s1: a stride of 1 in subscript s0
|
63 |
|
|
corresponds to a memory stride of 20.
|
64 |
|
|
|
65 |
|
|
OFFSET is a number of dimensions to prepend before the
|
66 |
|
|
subscript dimensions: s_0, s_1, ..., s_n.
|
67 |
|
|
|
68 |
|
|
Thus, the final linear expression has the following format:
|
69 |
|
|
|
70 |
|
|
where the expression itself is:
|
71 |
|
|
c_0 * s_0 + c_1 * s_1 + ... c_n * s_n. */
|
72 |
|
|
|
73 |
|
|
static ppl_Linear_Expression_t
|
74 |
|
|
build_linearized_memory_access (ppl_dimension_type offset, poly_dr_p pdr)
|
75 |
|
|
{
|
76 |
|
|
ppl_Linear_Expression_t res;
|
77 |
|
|
ppl_Linear_Expression_t le;
|
78 |
|
|
ppl_dimension_type i;
|
79 |
|
|
ppl_dimension_type first = pdr_subscript_dim (pdr, 0);
|
80 |
|
|
ppl_dimension_type last = pdr_subscript_dim (pdr, PDR_NB_SUBSCRIPTS (pdr));
|
81 |
|
|
Value size, sub_size;
|
82 |
|
|
graphite_dim_t dim = offset + pdr_dim (pdr);
|
83 |
|
|
|
84 |
|
|
ppl_new_Linear_Expression_with_dimension (&res, dim);
|
85 |
|
|
|
86 |
|
|
value_init (size);
|
87 |
|
|
value_set_si (size, 1);
|
88 |
|
|
value_init (sub_size);
|
89 |
|
|
value_set_si (sub_size, 1);
|
90 |
|
|
|
91 |
|
|
for (i = last - 1; i >= first; i--)
|
92 |
|
|
{
|
93 |
|
|
ppl_set_coef_gmp (res, i + offset, size);
|
94 |
|
|
|
95 |
|
|
ppl_new_Linear_Expression_with_dimension (&le, dim - offset);
|
96 |
|
|
ppl_set_coef (le, i, 1);
|
97 |
|
|
ppl_max_for_le_pointset (PDR_ACCESSES (pdr), le, sub_size);
|
98 |
|
|
value_multiply (size, size, sub_size);
|
99 |
|
|
ppl_delete_Linear_Expression (le);
|
100 |
|
|
}
|
101 |
|
|
|
102 |
|
|
value_clear (sub_size);
|
103 |
|
|
value_clear (size);
|
104 |
|
|
return res;
|
105 |
|
|
}
|
106 |
|
|
|
107 |
|
|
/* Builds a partial difference equations and inserts them
|
108 |
|
|
into pointset powerset polyhedron P. Polyhedron is assumed
|
109 |
|
|
to have the format: T|I|T'|I'|G|S|S'|l1|l2.
|
110 |
|
|
|
111 |
|
|
TIME_DEPTH is the time dimension w.r.t. which we are
|
112 |
|
|
differentiating.
|
113 |
|
|
OFFSET represents the number of dimensions between
|
114 |
|
|
columns t_{time_depth} and t'_{time_depth}.
|
115 |
|
|
DIM_SCTR is the number of scattering dimensions. It is
|
116 |
|
|
essentially the dimensionality of the T vector.
|
117 |
|
|
|
118 |
|
|
The following equations are inserted into the polyhedron P:
|
119 |
|
|
| t_1 = t_1'
|
120 |
|
|
| ...
|
121 |
|
|
| t_{time_depth-1} = t'_{time_depth-1}
|
122 |
|
|
| t_{time_depth} = t'_{time_depth} + 1
|
123 |
|
|
| t_{time_depth+1} = t'_{time_depth + 1}
|
124 |
|
|
| ...
|
125 |
|
|
| t_{dim_sctr} = t'_{dim_sctr}. */
|
126 |
|
|
|
127 |
|
|
static void
|
128 |
|
|
build_partial_difference (ppl_Pointset_Powerset_C_Polyhedron_t *p,
|
129 |
|
|
ppl_dimension_type time_depth,
|
130 |
|
|
ppl_dimension_type offset,
|
131 |
|
|
ppl_dimension_type dim_sctr)
|
132 |
|
|
{
|
133 |
|
|
ppl_Constraint_t new_cstr;
|
134 |
|
|
ppl_Linear_Expression_t le;
|
135 |
|
|
ppl_dimension_type i;
|
136 |
|
|
ppl_dimension_type dim;
|
137 |
|
|
ppl_Pointset_Powerset_C_Polyhedron_t temp;
|
138 |
|
|
|
139 |
|
|
/* Add the equality: t_{time_depth} = t'_{time_depth} + 1.
|
140 |
|
|
This is the core part of this alogrithm, since this
|
141 |
|
|
constraint asks for the memory access stride (difference)
|
142 |
|
|
between two consecutive points in time dimensions. */
|
143 |
|
|
|
144 |
|
|
ppl_Pointset_Powerset_C_Polyhedron_space_dimension (*p, &dim);
|
145 |
|
|
ppl_new_Linear_Expression_with_dimension (&le, dim);
|
146 |
|
|
ppl_set_coef (le, time_depth, 1);
|
147 |
|
|
ppl_set_coef (le, time_depth + offset, -1);
|
148 |
|
|
ppl_set_inhomogeneous (le, 1);
|
149 |
|
|
ppl_new_Constraint (&new_cstr, le, PPL_CONSTRAINT_TYPE_EQUAL);
|
150 |
|
|
ppl_Pointset_Powerset_C_Polyhedron_add_constraint (*p, new_cstr);
|
151 |
|
|
ppl_delete_Linear_Expression (le);
|
152 |
|
|
ppl_delete_Constraint (new_cstr);
|
153 |
|
|
|
154 |
|
|
/* Add equalities:
|
155 |
|
|
| t1 = t1'
|
156 |
|
|
| ...
|
157 |
|
|
| t_{time_depth-1} = t'_{time_depth-1}
|
158 |
|
|
| t_{time_depth+1} = t'_{time_depth+1}
|
159 |
|
|
| ...
|
160 |
|
|
| t_{dim_sctr} = t'_{dim_sctr}
|
161 |
|
|
|
162 |
|
|
This means that all the time dimensions are equal except for
|
163 |
|
|
time_depth, where the constraint is t_{depth} = t'_{depth} + 1
|
164 |
|
|
step. More to this: we should be carefull not to add equalities
|
165 |
|
|
to the 'coupled' dimensions, which happens when the one dimension
|
166 |
|
|
is stripmined dimension, and the other dimension corresponds
|
167 |
|
|
to the point loop inside stripmined dimension. */
|
168 |
|
|
|
169 |
|
|
ppl_new_Pointset_Powerset_C_Polyhedron_from_Pointset_Powerset_C_Polyhedron (&temp, *p);
|
170 |
|
|
|
171 |
|
|
for (i = 0; i < dim_sctr; i++)
|
172 |
|
|
if (i != time_depth)
|
173 |
|
|
{
|
174 |
|
|
ppl_new_Linear_Expression_with_dimension (&le, dim);
|
175 |
|
|
ppl_set_coef (le, i, 1);
|
176 |
|
|
ppl_set_coef (le, i + offset, -1);
|
177 |
|
|
ppl_new_Constraint (&new_cstr, le, PPL_CONSTRAINT_TYPE_EQUAL);
|
178 |
|
|
ppl_Pointset_Powerset_C_Polyhedron_add_constraint (temp, new_cstr);
|
179 |
|
|
|
180 |
|
|
if (ppl_Pointset_Powerset_C_Polyhedron_is_empty (temp))
|
181 |
|
|
{
|
182 |
|
|
ppl_delete_Pointset_Powerset_C_Polyhedron (temp);
|
183 |
|
|
ppl_new_Pointset_Powerset_C_Polyhedron_from_Pointset_Powerset_C_Polyhedron (&temp, *p);
|
184 |
|
|
}
|
185 |
|
|
else
|
186 |
|
|
ppl_Pointset_Powerset_C_Polyhedron_add_constraint (*p, new_cstr);
|
187 |
|
|
ppl_delete_Linear_Expression (le);
|
188 |
|
|
ppl_delete_Constraint (new_cstr);
|
189 |
|
|
}
|
190 |
|
|
|
191 |
|
|
ppl_delete_Pointset_Powerset_C_Polyhedron (temp);
|
192 |
|
|
}
|
193 |
|
|
|
194 |
|
|
|
195 |
|
|
/* Set STRIDE to the stride of PDR in memory by advancing by one in
|
196 |
|
|
the loop at DEPTH. */
|
197 |
|
|
|
198 |
|
|
static void
|
199 |
|
|
pdr_stride_in_loop (Value stride, graphite_dim_t depth, poly_dr_p pdr)
|
200 |
|
|
{
|
201 |
|
|
ppl_dimension_type time_depth;
|
202 |
|
|
ppl_Linear_Expression_t le, lma;
|
203 |
|
|
ppl_Constraint_t new_cstr;
|
204 |
|
|
ppl_dimension_type i, *map;
|
205 |
|
|
ppl_Pointset_Powerset_C_Polyhedron_t p1, p2, sctr;
|
206 |
|
|
graphite_dim_t nb_subscripts = PDR_NB_SUBSCRIPTS (pdr) + 1;
|
207 |
|
|
poly_bb_p pbb = PDR_PBB (pdr);
|
208 |
|
|
ppl_dimension_type offset = pbb_nb_scattering_transform (pbb)
|
209 |
|
|
+ pbb_nb_local_vars (pbb)
|
210 |
|
|
+ pbb_dim_iter_domain (pbb);
|
211 |
|
|
ppl_dimension_type offsetg = offset + pbb_nb_params (pbb);
|
212 |
|
|
ppl_dimension_type dim_sctr = pbb_nb_scattering_transform (pbb)
|
213 |
|
|
+ pbb_nb_local_vars (pbb);
|
214 |
|
|
ppl_dimension_type dim_L1 = offset + offsetg + 2 * nb_subscripts;
|
215 |
|
|
ppl_dimension_type dim_L2 = offset + offsetg + 2 * nb_subscripts + 1;
|
216 |
|
|
ppl_dimension_type new_dim = offset + offsetg + 2 * nb_subscripts + 2;
|
217 |
|
|
|
218 |
|
|
/* The resulting polyhedron should have the following format:
|
219 |
|
|
T|I|T'|I'|G|S|S'|l1|l2
|
220 |
|
|
where:
|
221 |
|
|
| T = t_1..t_{dim_sctr}
|
222 |
|
|
| I = i_1..i_{dim_iter_domain}
|
223 |
|
|
| T'= t'_1..t'_{dim_sctr}
|
224 |
|
|
| I'= i'_1..i'_{dim_iter_domain}
|
225 |
|
|
| G = g_1..g_{nb_params}
|
226 |
|
|
| S = s_1..s_{nb_subscripts}
|
227 |
|
|
| S'= s'_1..s'_{nb_subscripts}
|
228 |
|
|
| l1 and l2 are scalars.
|
229 |
|
|
|
230 |
|
|
Some invariants:
|
231 |
|
|
offset = dim_sctr + dim_iter_domain + nb_local_vars
|
232 |
|
|
offsetg = dim_sctr + dim_iter_domain + nb_local_vars + nb_params. */
|
233 |
|
|
|
234 |
|
|
/* Construct the T|I|0|0|G|0|0|0|0 part. */
|
235 |
|
|
{
|
236 |
|
|
ppl_new_Pointset_Powerset_C_Polyhedron_from_C_Polyhedron
|
237 |
|
|
(&sctr, PBB_TRANSFORMED_SCATTERING (pbb));
|
238 |
|
|
ppl_Pointset_Powerset_C_Polyhedron_add_space_dimensions_and_embed
|
239 |
|
|
(sctr, 2 * nb_subscripts + 2);
|
240 |
|
|
ppl_insert_dimensions_pointset (sctr, offset, offset);
|
241 |
|
|
}
|
242 |
|
|
|
243 |
|
|
/* Construct the 0|I|0|0|G|S|0|0|0 part. */
|
244 |
|
|
{
|
245 |
|
|
ppl_new_Pointset_Powerset_C_Polyhedron_from_Pointset_Powerset_C_Polyhedron
|
246 |
|
|
(&p1, PDR_ACCESSES (pdr));
|
247 |
|
|
ppl_Pointset_Powerset_C_Polyhedron_add_space_dimensions_and_embed
|
248 |
|
|
(p1, nb_subscripts + 2);
|
249 |
|
|
ppl_insert_dimensions_pointset (p1, 0, dim_sctr);
|
250 |
|
|
ppl_insert_dimensions_pointset (p1, offset, offset);
|
251 |
|
|
}
|
252 |
|
|
|
253 |
|
|
/* Construct the 0|0|0|0|0|S|0|l1|0 part. */
|
254 |
|
|
{
|
255 |
|
|
lma = build_linearized_memory_access (offset + dim_sctr, pdr);
|
256 |
|
|
ppl_set_coef (lma, dim_L1, -1);
|
257 |
|
|
ppl_new_Constraint (&new_cstr, lma, PPL_CONSTRAINT_TYPE_EQUAL);
|
258 |
|
|
ppl_Pointset_Powerset_C_Polyhedron_add_constraint (p1, new_cstr);
|
259 |
|
|
ppl_delete_Linear_Expression (lma);
|
260 |
|
|
ppl_delete_Constraint (new_cstr);
|
261 |
|
|
}
|
262 |
|
|
|
263 |
|
|
/* Now intersect all the parts to get the polyhedron P1:
|
264 |
|
|
T|I|0|0|G|0|0|0 |0
|
265 |
|
|
0|I|0|0|G|S|0|0 |0
|
266 |
|
|
0|0|0|0|0|S|0|l1|0
|
267 |
|
|
------------------
|
268 |
|
|
T|I|0|0|G|S|0|l1|0. */
|
269 |
|
|
|
270 |
|
|
ppl_Pointset_Powerset_C_Polyhedron_intersection_assign (p1, sctr);
|
271 |
|
|
ppl_delete_Pointset_Powerset_C_Polyhedron (sctr);
|
272 |
|
|
|
273 |
|
|
/* Build P2, which would have the following form:
|
274 |
|
|
0|0|T'|I'|G|0|S'|0|l2
|
275 |
|
|
|
276 |
|
|
P2 is built, by remapping the P1 polyhedron:
|
277 |
|
|
T|I|0|0|G|S|0|l1|0
|
278 |
|
|
|
279 |
|
|
using the following mapping:
|
280 |
|
|
T->T'
|
281 |
|
|
I->I'
|
282 |
|
|
S->S'
|
283 |
|
|
l1->l2. */
|
284 |
|
|
{
|
285 |
|
|
ppl_new_Pointset_Powerset_C_Polyhedron_from_Pointset_Powerset_C_Polyhedron
|
286 |
|
|
(&p2, p1);
|
287 |
|
|
|
288 |
|
|
map = ppl_new_id_map (new_dim);
|
289 |
|
|
|
290 |
|
|
/* TI -> T'I'. */
|
291 |
|
|
for (i = 0; i < offset; i++)
|
292 |
|
|
ppl_interchange (map, i, i + offset);
|
293 |
|
|
|
294 |
|
|
/* l1 -> l2. */
|
295 |
|
|
ppl_interchange (map, dim_L1, dim_L2);
|
296 |
|
|
|
297 |
|
|
/* S -> S'. */
|
298 |
|
|
for (i = 0; i < nb_subscripts; i++)
|
299 |
|
|
ppl_interchange (map, offset + offsetg + i,
|
300 |
|
|
offset + offsetg + nb_subscripts + i);
|
301 |
|
|
|
302 |
|
|
ppl_Pointset_Powerset_C_Polyhedron_map_space_dimensions (p2, map, new_dim);
|
303 |
|
|
free (map);
|
304 |
|
|
}
|
305 |
|
|
|
306 |
|
|
time_depth = psct_dynamic_dim (pbb, depth);
|
307 |
|
|
|
308 |
|
|
/* P1 = P1 inter P2. */
|
309 |
|
|
ppl_Pointset_Powerset_C_Polyhedron_intersection_assign (p1, p2);
|
310 |
|
|
build_partial_difference (&p1, time_depth, offset, dim_sctr);
|
311 |
|
|
|
312 |
|
|
/* Maximise the expression L2 - L1. */
|
313 |
|
|
{
|
314 |
|
|
ppl_new_Linear_Expression_with_dimension (&le, new_dim);
|
315 |
|
|
ppl_set_coef (le, dim_L2, 1);
|
316 |
|
|
ppl_set_coef (le, dim_L1, -1);
|
317 |
|
|
ppl_max_for_le_pointset (p1, le, stride);
|
318 |
|
|
}
|
319 |
|
|
|
320 |
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
321 |
|
|
{
|
322 |
|
|
fprintf (dump_file, "\nStride in BB_%d, DR_%d, depth %d:",
|
323 |
|
|
pbb_index (pbb), PDR_ID (pdr), (int) depth);
|
324 |
|
|
value_print (dump_file, " %s ", stride);
|
325 |
|
|
}
|
326 |
|
|
|
327 |
|
|
ppl_delete_Pointset_Powerset_C_Polyhedron (p1);
|
328 |
|
|
ppl_delete_Pointset_Powerset_C_Polyhedron (p2);
|
329 |
|
|
ppl_delete_Linear_Expression (le);
|
330 |
|
|
}
|
331 |
|
|
|
332 |
|
|
|
333 |
|
|
/* Sets STRIDES to the sum of all the strides of the data references
|
334 |
|
|
accessed in LOOP at DEPTH. */
|
335 |
|
|
|
336 |
|
|
static void
|
337 |
|
|
memory_strides_in_loop_1 (lst_p loop, graphite_dim_t depth, Value strides)
|
338 |
|
|
{
|
339 |
|
|
int i, j;
|
340 |
|
|
lst_p l;
|
341 |
|
|
poly_dr_p pdr;
|
342 |
|
|
Value s, n;
|
343 |
|
|
|
344 |
|
|
value_init (s);
|
345 |
|
|
value_init (n);
|
346 |
|
|
|
347 |
|
|
for (j = 0; VEC_iterate (lst_p, LST_SEQ (loop), j, l); j++)
|
348 |
|
|
if (LST_LOOP_P (l))
|
349 |
|
|
memory_strides_in_loop_1 (l, depth, strides);
|
350 |
|
|
else
|
351 |
|
|
for (i = 0; VEC_iterate (poly_dr_p, PBB_DRS (LST_PBB (l)), i, pdr); i++)
|
352 |
|
|
{
|
353 |
|
|
pdr_stride_in_loop (s, depth, pdr);
|
354 |
|
|
value_set_si (n, PDR_NB_REFS (pdr));
|
355 |
|
|
value_multiply (s, s, n);
|
356 |
|
|
value_addto (strides, strides, s);
|
357 |
|
|
}
|
358 |
|
|
|
359 |
|
|
value_clear (s);
|
360 |
|
|
value_clear (n);
|
361 |
|
|
}
|
362 |
|
|
|
363 |
|
|
/* Sets STRIDES to the sum of all the strides of the data references
|
364 |
|
|
accessed in LOOP at DEPTH. */
|
365 |
|
|
|
366 |
|
|
static void
|
367 |
|
|
memory_strides_in_loop (lst_p loop, graphite_dim_t depth, Value strides)
|
368 |
|
|
{
|
369 |
|
|
if (value_mone_p (loop->memory_strides))
|
370 |
|
|
{
|
371 |
|
|
value_set_si (strides, 0);
|
372 |
|
|
memory_strides_in_loop_1 (loop, depth, strides);
|
373 |
|
|
}
|
374 |
|
|
else
|
375 |
|
|
value_assign (strides, loop->memory_strides);
|
376 |
|
|
}
|
377 |
|
|
|
378 |
|
|
/* Return true when the interchange of loops LOOP1 and LOOP2 is
|
379 |
|
|
profitable.
|
380 |
|
|
|
381 |
|
|
Example:
|
382 |
|
|
|
383 |
|
|
| int a[100][100];
|
384 |
|
|
|
|
385 |
|
|
| int
|
386 |
|
|
| foo (int N)
|
387 |
|
|
| {
|
388 |
|
|
| int j;
|
389 |
|
|
| int i;
|
390 |
|
|
|
|
391 |
|
|
| for (i = 0; i < N; i++)
|
392 |
|
|
| for (j = 0; j < N; j++)
|
393 |
|
|
| a[j][2 * i] += 1;
|
394 |
|
|
|
|
395 |
|
|
| return a[N][12];
|
396 |
|
|
| }
|
397 |
|
|
|
398 |
|
|
The data access A[j][i] is described like this:
|
399 |
|
|
|
400 |
|
|
| i j N a s0 s1 1
|
401 |
|
|
| 0 0 0 1 0 0 -5 = 0
|
402 |
|
|
| 0 -1 0 0 1 0 0 = 0
|
403 |
|
|
|-2 0 0 0 0 1 0 = 0
|
404 |
|
|
| 0 0 0 0 1 0 0 >= 0
|
405 |
|
|
| 0 0 0 0 0 1 0 >= 0
|
406 |
|
|
| 0 0 0 0 -1 0 100 >= 0
|
407 |
|
|
| 0 0 0 0 0 -1 100 >= 0
|
408 |
|
|
|
409 |
|
|
The linearized memory access L to A[100][100] is:
|
410 |
|
|
|
411 |
|
|
| i j N a s0 s1 1
|
412 |
|
|
| 0 0 0 0 100 1 0
|
413 |
|
|
|
414 |
|
|
TODO: the shown format is not valid as it does not show the fact
|
415 |
|
|
that the iteration domain "i j" is transformed using the scattering.
|
416 |
|
|
|
417 |
|
|
Next, to measure the impact of iterating once in loop "i", we build
|
418 |
|
|
a maximization problem: first, we add to DR accesses the dimensions
|
419 |
|
|
k, s2, s3, L1 = 100 * s0 + s1, L2, and D1: this is the polyhedron P1.
|
420 |
|
|
L1 and L2 are the linearized memory access functions.
|
421 |
|
|
|
422 |
|
|
| i j N a s0 s1 k s2 s3 L1 L2 D1 1
|
423 |
|
|
| 0 0 0 1 0 0 0 0 0 0 0 0 -5 = 0 alias = 5
|
424 |
|
|
| 0 -1 0 0 1 0 0 0 0 0 0 0 0 = 0 s0 = j
|
425 |
|
|
|-2 0 0 0 0 1 0 0 0 0 0 0 0 = 0 s1 = 2 * i
|
426 |
|
|
| 0 0 0 0 1 0 0 0 0 0 0 0 0 >= 0
|
427 |
|
|
| 0 0 0 0 0 1 0 0 0 0 0 0 0 >= 0
|
428 |
|
|
| 0 0 0 0 -1 0 0 0 0 0 0 0 100 >= 0
|
429 |
|
|
| 0 0 0 0 0 -1 0 0 0 0 0 0 100 >= 0
|
430 |
|
|
| 0 0 0 0 100 1 0 0 0 -1 0 0 0 = 0 L1 = 100 * s0 + s1
|
431 |
|
|
|
432 |
|
|
Then, we generate the polyhedron P2 by interchanging the dimensions
|
433 |
|
|
(s0, s2), (s1, s3), (L1, L2), (k, i)
|
434 |
|
|
|
435 |
|
|
| i j N a s0 s1 k s2 s3 L1 L2 D1 1
|
436 |
|
|
| 0 0 0 1 0 0 0 0 0 0 0 0 -5 = 0 alias = 5
|
437 |
|
|
| 0 -1 0 0 0 0 0 1 0 0 0 0 0 = 0 s2 = j
|
438 |
|
|
| 0 0 0 0 0 0 -2 0 1 0 0 0 0 = 0 s3 = 2 * k
|
439 |
|
|
| 0 0 0 0 0 0 0 1 0 0 0 0 0 >= 0
|
440 |
|
|
| 0 0 0 0 0 0 0 0 1 0 0 0 0 >= 0
|
441 |
|
|
| 0 0 0 0 0 0 0 -1 0 0 0 0 100 >= 0
|
442 |
|
|
| 0 0 0 0 0 0 0 0 -1 0 0 0 100 >= 0
|
443 |
|
|
| 0 0 0 0 0 0 0 100 1 0 -1 0 0 = 0 L2 = 100 * s2 + s3
|
444 |
|
|
|
445 |
|
|
then we add to P2 the equality k = i + 1:
|
446 |
|
|
|
447 |
|
|
|-1 0 0 0 0 0 1 0 0 0 0 0 -1 = 0 k = i + 1
|
448 |
|
|
|
449 |
|
|
and finally we maximize the expression "D1 = max (P1 inter P2, L2 - L1)".
|
450 |
|
|
|
451 |
|
|
Similarly, to determine the impact of one iteration on loop "j", we
|
452 |
|
|
interchange (k, j), we add "k = j + 1", and we compute D2 the
|
453 |
|
|
maximal value of the difference.
|
454 |
|
|
|
455 |
|
|
Finally, the profitability test is D1 < D2: if in the outer loop
|
456 |
|
|
the strides are smaller than in the inner loop, then it is
|
457 |
|
|
profitable to interchange the loops at DEPTH1 and DEPTH2. */
|
458 |
|
|
|
459 |
|
|
static bool
|
460 |
|
|
lst_interchange_profitable_p (lst_p loop1, lst_p loop2)
|
461 |
|
|
{
|
462 |
|
|
Value d1, d2;
|
463 |
|
|
bool res;
|
464 |
|
|
|
465 |
|
|
gcc_assert (loop1 && loop2
|
466 |
|
|
&& LST_LOOP_P (loop1) && LST_LOOP_P (loop2)
|
467 |
|
|
&& lst_depth (loop1) < lst_depth (loop2));
|
468 |
|
|
|
469 |
|
|
value_init (d1);
|
470 |
|
|
value_init (d2);
|
471 |
|
|
|
472 |
|
|
memory_strides_in_loop (loop1, lst_depth (loop1), d1);
|
473 |
|
|
memory_strides_in_loop (loop2, lst_depth (loop2), d2);
|
474 |
|
|
|
475 |
|
|
res = value_lt (d1, d2);
|
476 |
|
|
|
477 |
|
|
value_clear (d1);
|
478 |
|
|
value_clear (d2);
|
479 |
|
|
|
480 |
|
|
return res;
|
481 |
|
|
}
|
482 |
|
|
|
483 |
|
|
/* Interchanges the loops at DEPTH1 and DEPTH2 of the original
|
484 |
|
|
scattering and assigns the resulting polyhedron to the transformed
|
485 |
|
|
scattering. */
|
486 |
|
|
|
487 |
|
|
static void
|
488 |
|
|
pbb_interchange_loop_depths (graphite_dim_t depth1, graphite_dim_t depth2,
|
489 |
|
|
poly_bb_p pbb)
|
490 |
|
|
{
|
491 |
|
|
ppl_dimension_type i, dim;
|
492 |
|
|
ppl_dimension_type *map;
|
493 |
|
|
ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
|
494 |
|
|
ppl_dimension_type dim1 = psct_dynamic_dim (pbb, depth1);
|
495 |
|
|
ppl_dimension_type dim2 = psct_dynamic_dim (pbb, depth2);
|
496 |
|
|
|
497 |
|
|
ppl_Polyhedron_space_dimension (poly, &dim);
|
498 |
|
|
map = (ppl_dimension_type *) XNEWVEC (ppl_dimension_type, dim);
|
499 |
|
|
|
500 |
|
|
for (i = 0; i < dim; i++)
|
501 |
|
|
map[i] = i;
|
502 |
|
|
|
503 |
|
|
map[dim1] = dim2;
|
504 |
|
|
map[dim2] = dim1;
|
505 |
|
|
|
506 |
|
|
ppl_Polyhedron_map_space_dimensions (poly, map, dim);
|
507 |
|
|
free (map);
|
508 |
|
|
}
|
509 |
|
|
|
510 |
|
|
/* Apply the interchange of loops at depths DEPTH1 and DEPTH2 to all
|
511 |
|
|
the statements below LST. */
|
512 |
|
|
|
513 |
|
|
static void
|
514 |
|
|
lst_apply_interchange (lst_p lst, int depth1, int depth2)
|
515 |
|
|
{
|
516 |
|
|
if (!lst)
|
517 |
|
|
return;
|
518 |
|
|
|
519 |
|
|
if (LST_LOOP_P (lst))
|
520 |
|
|
{
|
521 |
|
|
int i;
|
522 |
|
|
lst_p l;
|
523 |
|
|
|
524 |
|
|
for (i = 0; VEC_iterate (lst_p, LST_SEQ (lst), i, l); i++)
|
525 |
|
|
lst_apply_interchange (l, depth1, depth2);
|
526 |
|
|
}
|
527 |
|
|
else
|
528 |
|
|
pbb_interchange_loop_depths (depth1, depth2, LST_PBB (lst));
|
529 |
|
|
}
|
530 |
|
|
|
531 |
|
|
/* Return true when the nest starting at LOOP1 and ending on LOOP2 is
|
532 |
|
|
perfect: i.e. there are no sequence of statements. */
|
533 |
|
|
|
534 |
|
|
static bool
|
535 |
|
|
lst_perfectly_nested_p (lst_p loop1, lst_p loop2)
|
536 |
|
|
{
|
537 |
|
|
if (loop1 == loop2)
|
538 |
|
|
return true;
|
539 |
|
|
|
540 |
|
|
if (!LST_LOOP_P (loop1))
|
541 |
|
|
return false;
|
542 |
|
|
|
543 |
|
|
return VEC_length (lst_p, LST_SEQ (loop1)) == 1
|
544 |
|
|
&& lst_perfectly_nested_p (VEC_index (lst_p, LST_SEQ (loop1), 0), loop2);
|
545 |
|
|
}
|
546 |
|
|
|
547 |
|
|
/* Transform the loop nest between LOOP1 and LOOP2 into a perfect
|
548 |
|
|
nest. To continue the naming tradition, this function is called
|
549 |
|
|
after perfect_nestify. NEST is set to the perfectly nested loop
|
550 |
|
|
that is created. BEFORE/AFTER are set to the loops distributed
|
551 |
|
|
before/after the loop NEST. */
|
552 |
|
|
|
553 |
|
|
static void
|
554 |
|
|
lst_perfect_nestify (lst_p loop1, lst_p loop2, lst_p *before,
|
555 |
|
|
lst_p *nest, lst_p *after)
|
556 |
|
|
{
|
557 |
|
|
poly_bb_p first, last;
|
558 |
|
|
|
559 |
|
|
gcc_assert (loop1 && loop2
|
560 |
|
|
&& loop1 != loop2
|
561 |
|
|
&& LST_LOOP_P (loop1) && LST_LOOP_P (loop2));
|
562 |
|
|
|
563 |
|
|
first = LST_PBB (lst_find_first_pbb (loop2));
|
564 |
|
|
last = LST_PBB (lst_find_last_pbb (loop2));
|
565 |
|
|
|
566 |
|
|
*before = copy_lst (loop1);
|
567 |
|
|
*nest = copy_lst (loop1);
|
568 |
|
|
*after = copy_lst (loop1);
|
569 |
|
|
|
570 |
|
|
lst_remove_all_before_including_pbb (*before, first, false);
|
571 |
|
|
lst_remove_all_before_including_pbb (*after, last, true);
|
572 |
|
|
|
573 |
|
|
lst_remove_all_before_excluding_pbb (*nest, first, true);
|
574 |
|
|
lst_remove_all_before_excluding_pbb (*nest, last, false);
|
575 |
|
|
|
576 |
|
|
if (lst_empty_p (*before))
|
577 |
|
|
{
|
578 |
|
|
free_lst (*before);
|
579 |
|
|
*before = NULL;
|
580 |
|
|
}
|
581 |
|
|
if (lst_empty_p (*after))
|
582 |
|
|
{
|
583 |
|
|
free_lst (*after);
|
584 |
|
|
*after = NULL;
|
585 |
|
|
}
|
586 |
|
|
if (lst_empty_p (*nest))
|
587 |
|
|
{
|
588 |
|
|
free_lst (*nest);
|
589 |
|
|
*nest = NULL;
|
590 |
|
|
}
|
591 |
|
|
}
|
592 |
|
|
|
593 |
|
|
/* Try to interchange LOOP1 with LOOP2 for all the statements of the
|
594 |
|
|
body of LOOP2. LOOP1 contains LOOP2. Return true if it did the
|
595 |
|
|
interchange. */
|
596 |
|
|
|
597 |
|
|
static bool
|
598 |
|
|
lst_try_interchange_loops (scop_p scop, lst_p loop1, lst_p loop2)
|
599 |
|
|
{
|
600 |
|
|
int depth1 = lst_depth (loop1);
|
601 |
|
|
int depth2 = lst_depth (loop2);
|
602 |
|
|
lst_p transformed;
|
603 |
|
|
|
604 |
|
|
lst_p before = NULL, nest = NULL, after = NULL;
|
605 |
|
|
|
606 |
|
|
if (!lst_interchange_profitable_p (loop1, loop2))
|
607 |
|
|
return false;
|
608 |
|
|
|
609 |
|
|
if (!lst_perfectly_nested_p (loop1, loop2))
|
610 |
|
|
lst_perfect_nestify (loop1, loop2, &before, &nest, &after);
|
611 |
|
|
|
612 |
|
|
lst_apply_interchange (loop2, depth1, depth2);
|
613 |
|
|
|
614 |
|
|
/* Sync the transformed LST information and the PBB scatterings
|
615 |
|
|
before using the scatterings in the data dependence analysis. */
|
616 |
|
|
if (before || nest || after)
|
617 |
|
|
{
|
618 |
|
|
transformed = lst_substitute_3 (SCOP_TRANSFORMED_SCHEDULE (scop), loop1,
|
619 |
|
|
before, nest, after);
|
620 |
|
|
lst_update_scattering (transformed);
|
621 |
|
|
free_lst (transformed);
|
622 |
|
|
}
|
623 |
|
|
|
624 |
|
|
if (graphite_legal_transform (scop))
|
625 |
|
|
{
|
626 |
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
627 |
|
|
fprintf (dump_file,
|
628 |
|
|
"Loops at depths %d and %d will be interchanged.\n",
|
629 |
|
|
depth1, depth2);
|
630 |
|
|
|
631 |
|
|
/* Transform the SCOP_TRANSFORMED_SCHEDULE of the SCOP. */
|
632 |
|
|
lst_insert_in_sequence (before, loop1, true);
|
633 |
|
|
lst_insert_in_sequence (after, loop1, false);
|
634 |
|
|
|
635 |
|
|
if (nest)
|
636 |
|
|
{
|
637 |
|
|
lst_replace (loop1, nest);
|
638 |
|
|
free_lst (loop1);
|
639 |
|
|
}
|
640 |
|
|
|
641 |
|
|
return true;
|
642 |
|
|
}
|
643 |
|
|
|
644 |
|
|
/* Undo the transform. */
|
645 |
|
|
free_lst (before);
|
646 |
|
|
free_lst (nest);
|
647 |
|
|
free_lst (after);
|
648 |
|
|
lst_apply_interchange (loop2, depth2, depth1);
|
649 |
|
|
return false;
|
650 |
|
|
}
|
651 |
|
|
|
652 |
|
|
/* Selects the inner loop in LST_SEQ (INNER_FATHER) to be interchanged
|
653 |
|
|
with the loop OUTER in LST_SEQ (OUTER_FATHER). */
|
654 |
|
|
|
655 |
|
|
static bool
|
656 |
|
|
lst_interchange_select_inner (scop_p scop, lst_p outer_father, int outer,
|
657 |
|
|
lst_p inner_father)
|
658 |
|
|
{
|
659 |
|
|
int inner;
|
660 |
|
|
lst_p loop1, loop2;
|
661 |
|
|
|
662 |
|
|
gcc_assert (outer_father
|
663 |
|
|
&& LST_LOOP_P (outer_father)
|
664 |
|
|
&& LST_LOOP_P (VEC_index (lst_p, LST_SEQ (outer_father), outer))
|
665 |
|
|
&& inner_father
|
666 |
|
|
&& LST_LOOP_P (inner_father));
|
667 |
|
|
|
668 |
|
|
loop1 = VEC_index (lst_p, LST_SEQ (outer_father), outer);
|
669 |
|
|
|
670 |
|
|
for (inner = 0; VEC_iterate (lst_p, LST_SEQ (inner_father), inner, loop2); inner++)
|
671 |
|
|
if (LST_LOOP_P (loop2)
|
672 |
|
|
&& (lst_try_interchange_loops (scop, loop1, loop2)
|
673 |
|
|
|| lst_interchange_select_inner (scop, outer_father, outer, loop2)))
|
674 |
|
|
return true;
|
675 |
|
|
|
676 |
|
|
return false;
|
677 |
|
|
}
|
678 |
|
|
|
679 |
|
|
/* Interchanges all the loops of LOOP and the loops of its body that
|
680 |
|
|
are considered profitable to interchange. Return true if it did
|
681 |
|
|
interchanged some loops. OUTER is the index in LST_SEQ (LOOP) that
|
682 |
|
|
points to the next outer loop to be considered for interchange. */
|
683 |
|
|
|
684 |
|
|
static bool
|
685 |
|
|
lst_interchange_select_outer (scop_p scop, lst_p loop, int outer)
|
686 |
|
|
{
|
687 |
|
|
lst_p l;
|
688 |
|
|
bool res = false;
|
689 |
|
|
int i = 0;
|
690 |
|
|
lst_p father;
|
691 |
|
|
|
692 |
|
|
if (!loop || !LST_LOOP_P (loop))
|
693 |
|
|
return false;
|
694 |
|
|
|
695 |
|
|
father = LST_LOOP_FATHER (loop);
|
696 |
|
|
if (father)
|
697 |
|
|
{
|
698 |
|
|
while (lst_interchange_select_inner (scop, father, outer, loop))
|
699 |
|
|
{
|
700 |
|
|
res = true;
|
701 |
|
|
loop = VEC_index (lst_p, LST_SEQ (father), outer);
|
702 |
|
|
}
|
703 |
|
|
}
|
704 |
|
|
|
705 |
|
|
if (LST_LOOP_P (loop))
|
706 |
|
|
for (i = 0; VEC_iterate (lst_p, LST_SEQ (loop), i, l); i++)
|
707 |
|
|
if (LST_LOOP_P (l))
|
708 |
|
|
res |= lst_interchange_select_outer (scop, l, i);
|
709 |
|
|
|
710 |
|
|
return res;
|
711 |
|
|
}
|
712 |
|
|
|
713 |
|
|
/* Interchanges all the loop depths that are considered profitable for SCOP. */
|
714 |
|
|
|
715 |
|
|
bool
|
716 |
|
|
scop_do_interchange (scop_p scop)
|
717 |
|
|
{
|
718 |
|
|
bool res = lst_interchange_select_outer
|
719 |
|
|
(scop, SCOP_TRANSFORMED_SCHEDULE (scop), 0);
|
720 |
|
|
|
721 |
|
|
lst_update_scattering (SCOP_TRANSFORMED_SCHEDULE (scop));
|
722 |
|
|
|
723 |
|
|
return res;
|
724 |
|
|
}
|
725 |
|
|
|
726 |
|
|
|
727 |
|
|
#endif
|
728 |
|
|
|