OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-stable/] [gcc-4.5.1/] [gcc/] [tree-loop-linear.c] - Blame information for rev 826

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 280 jeremybenn
/* Linear Loop transforms
2
   Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009, 2010
3
   Free Software Foundation, Inc.
4
   Contributed by Daniel Berlin <dberlin@dberlin.org>.
5
 
6
This file is part of GCC.
7
 
8
GCC is free software; you can redistribute it and/or modify it under
9
the terms of the GNU General Public License as published by the Free
10
Software Foundation; either version 3, or (at your option) any later
11
version.
12
 
13
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14
WARRANTY; without even the implied warranty of MERCHANTABILITY or
15
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16
for more details.
17
 
18
You should have received a copy of the GNU General Public License
19
along with GCC; see the file COPYING3.  If not see
20
<http://www.gnu.org/licenses/>.  */
21
 
22
 
23
#include "config.h"
24
#include "system.h"
25
#include "coretypes.h"
26
#include "tm.h"
27
#include "ggc.h"
28
#include "tree.h"
29
#include "target.h"
30
 
31
#include "rtl.h"
32
#include "basic-block.h"
33
#include "diagnostic.h"
34
#include "obstack.h"
35
#include "tree-flow.h"
36
#include "tree-dump.h"
37
#include "timevar.h"
38
#include "cfgloop.h"
39
#include "expr.h"
40
#include "optabs.h"
41
#include "tree-chrec.h"
42
#include "tree-data-ref.h"
43
#include "tree-scalar-evolution.h"
44
#include "tree-pass.h"
45
#include "lambda.h"
46
 
47
/* Linear loop transforms include any composition of interchange,
48
   scaling, skewing, and reversal.  They are used to change the
49
   iteration order of loop nests in order to optimize data locality of
50
   traversals, or remove dependences that prevent
51
   parallelization/vectorization/etc.
52
 
53
   TODO: Determine reuse vectors/matrix and use it to determine optimal
54
   transform matrix for locality purposes.
55
   TODO: Completion of partial transforms.  */
56
 
57
/* Gather statistics for loop interchange.  LOOP is the loop being
58
   considered. The first loop in the considered loop nest is
59
   FIRST_LOOP, and consequently, the index of the considered loop is
60
   obtained by LOOP->DEPTH - FIRST_LOOP->DEPTH
61
 
62
   Initializes:
63
   - DEPENDENCE_STEPS the sum of all the data dependence distances
64
   carried by loop LOOP,
65
 
66
   - NB_DEPS_NOT_CARRIED_BY_LOOP the number of dependence relations
67
   for which the loop LOOP is not carrying any dependence,
68
 
69
   - ACCESS_STRIDES the sum of all the strides in LOOP.
70
 
71
   Example: for the following loop,
72
 
73
   | loop_1 runs 1335 times
74
   |   loop_2 runs 1335 times
75
   |     A[{{0, +, 1}_1, +, 1335}_2]
76
   |     B[{{0, +, 1}_1, +, 1335}_2]
77
   |   endloop_2
78
   |   A[{0, +, 1336}_1]
79
   | endloop_1
80
 
81
   gather_interchange_stats (in loop_1) will return
82
   DEPENDENCE_STEPS = 3002
83
   NB_DEPS_NOT_CARRIED_BY_LOOP = 5
84
   ACCESS_STRIDES = 10694
85
 
86
   gather_interchange_stats (in loop_2) will return
87
   DEPENDENCE_STEPS = 3000
88
   NB_DEPS_NOT_CARRIED_BY_LOOP = 7
89
   ACCESS_STRIDES = 8010
90
*/
91
 
92
static void
93
gather_interchange_stats (VEC (ddr_p, heap) *dependence_relations ATTRIBUTE_UNUSED,
94
                          VEC (data_reference_p, heap) *datarefs ATTRIBUTE_UNUSED,
95
                          struct loop *loop ATTRIBUTE_UNUSED,
96
                          struct loop *first_loop ATTRIBUTE_UNUSED,
97
                          unsigned int *dependence_steps ATTRIBUTE_UNUSED,
98
                          unsigned int *nb_deps_not_carried_by_loop ATTRIBUTE_UNUSED,
99
                          double_int *access_strides ATTRIBUTE_UNUSED)
100
{
101
  unsigned int i, j;
102
  struct data_dependence_relation *ddr;
103
  struct data_reference *dr;
104
 
105
  *dependence_steps = 0;
106
  *nb_deps_not_carried_by_loop = 0;
107
  *access_strides = double_int_zero;
108
 
109
  for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
110
    {
111
      /* If we don't know anything about this dependence, or the distance
112
         vector is NULL, or there is no dependence, then there is no reuse of
113
         data.  */
114
      if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
115
          || DDR_ARE_DEPENDENT (ddr) == chrec_known
116
          || DDR_NUM_DIST_VECTS (ddr) == 0)
117
        continue;
118
 
119
      for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
120
        {
121
          int dist = DDR_DIST_VECT (ddr, j)[loop_depth (loop) - loop_depth (first_loop)];
122
 
123
          if (dist == 0)
124
            (*nb_deps_not_carried_by_loop) += 1;
125
 
126
          else if (dist < 0)
127
            (*dependence_steps) += -dist;
128
 
129
          else
130
            (*dependence_steps) += dist;
131
        }
132
    }
133
 
134
  /* Compute the access strides.  */
135
  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
136
    {
137
      unsigned int it;
138
      tree ref = DR_REF (dr);
139
      gimple stmt = DR_STMT (dr);
140
      struct loop *stmt_loop = loop_containing_stmt (stmt);
141
      struct loop *inner_loop = first_loop->inner;
142
 
143
      if (inner_loop != stmt_loop
144
          && !flow_loop_nested_p (inner_loop, stmt_loop))
145
        continue;
146
 
147
      for (it = 0; it < DR_NUM_DIMENSIONS (dr);
148
           it++, ref = TREE_OPERAND (ref, 0))
149
        {
150
          int num = am_vector_index_for_loop (DR_ACCESS_MATRIX (dr), loop->num);
151
          int istride = AM_GET_ACCESS_MATRIX_ELEMENT (DR_ACCESS_MATRIX (dr), it, num);
152
          tree array_size = TYPE_SIZE (TREE_TYPE (ref));
153
          double_int dstride;
154
 
155
          if (array_size == NULL_TREE
156
              || TREE_CODE (array_size) != INTEGER_CST)
157
            continue;
158
 
159
          dstride = double_int_mul (tree_to_double_int (array_size),
160
                                    shwi_to_double_int (istride));
161
          (*access_strides) = double_int_add (*access_strides, dstride);
162
        }
163
    }
164
}
165
 
166
/* Attempt to apply interchange transformations to TRANS to maximize the
167
   spatial and temporal locality of the loop.
168
   Returns the new transform matrix.  The smaller the reuse vector
169
   distances in the inner loops, the fewer the cache misses.
170
   FIRST_LOOP is the loop->num of the first loop in the analyzed loop
171
   nest.  */
172
 
173
 
174
static lambda_trans_matrix
175
try_interchange_loops (lambda_trans_matrix trans,
176
                       unsigned int depth,
177
                       VEC (ddr_p, heap) *dependence_relations,
178
                       VEC (data_reference_p, heap) *datarefs,
179
                       struct loop *first_loop)
180
{
181
  bool res;
182
  struct loop *loop_i;
183
  struct loop *loop_j;
184
  unsigned int dependence_steps_i, dependence_steps_j;
185
  double_int access_strides_i, access_strides_j;
186
  double_int small, large, nb_iter;
187
  double_int l1_cache_size, l2_cache_size;
188
  int cmp;
189
  unsigned int nb_deps_not_carried_by_i, nb_deps_not_carried_by_j;
190
  struct data_dependence_relation *ddr;
191
 
192
  if (VEC_length (ddr_p, dependence_relations) == 0)
193
    return trans;
194
 
195
  /* When there is an unknown relation in the dependence_relations, we
196
     know that it is no worth looking at this loop nest: give up.  */
197
  ddr = VEC_index (ddr_p, dependence_relations, 0);
198
  if (ddr == NULL || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
199
    return trans;
200
 
201
  l1_cache_size = uhwi_to_double_int (L1_CACHE_SIZE * 1024);
202
  l2_cache_size = uhwi_to_double_int (L2_CACHE_SIZE * 1024);
203
 
204
  /* LOOP_I is always the outer loop.  */
205
  for (loop_j = first_loop->inner;
206
       loop_j;
207
       loop_j = loop_j->inner)
208
    for (loop_i = first_loop;
209
         loop_depth (loop_i) < loop_depth (loop_j);
210
         loop_i = loop_i->inner)
211
      {
212
        gather_interchange_stats (dependence_relations, datarefs,
213
                                  loop_i, first_loop,
214
                                  &dependence_steps_i,
215
                                  &nb_deps_not_carried_by_i,
216
                                  &access_strides_i);
217
        gather_interchange_stats (dependence_relations, datarefs,
218
                                  loop_j, first_loop,
219
                                  &dependence_steps_j,
220
                                  &nb_deps_not_carried_by_j,
221
                                  &access_strides_j);
222
 
223
        /* Heuristics for loop interchange profitability:
224
 
225
           0. Don't transform if the smallest stride is larger than
226
              the L2 cache, or if the largest stride multiplied by the
227
              number of iterations is smaller than the L1 cache.
228
 
229
           1. (spatial locality) Inner loops should have smallest
230
              dependence steps.
231
 
232
           2. (spatial locality) Inner loops should contain more
233
           dependence relations not carried by the loop.
234
 
235
           3. (temporal locality) Inner loops should have smallest
236
              array access strides.
237
        */
238
 
239
        cmp = double_int_ucmp (access_strides_i, access_strides_j);
240
        small = cmp < 0 ? access_strides_i : access_strides_j;
241
        large = cmp < 0 ? access_strides_j : access_strides_i;
242
 
243
        if (double_int_ucmp (small, l2_cache_size) > 0)
244
          continue;
245
 
246
        res = cmp < 0 ?
247
          estimated_loop_iterations (loop_j, false, &nb_iter):
248
          estimated_loop_iterations (loop_i, false, &nb_iter);
249
 
250
        if (res
251
            && double_int_ucmp (double_int_mul (large, nb_iter),
252
                                l1_cache_size) < 0)
253
          continue;
254
 
255
        if (dependence_steps_i < dependence_steps_j
256
            || nb_deps_not_carried_by_i > nb_deps_not_carried_by_j
257
            || cmp < 0)
258
          {
259
            lambda_matrix_row_exchange (LTM_MATRIX (trans),
260
                                        loop_depth (loop_i) - loop_depth (first_loop),
261
                                        loop_depth (loop_j) - loop_depth (first_loop));
262
            /* Validate the resulting matrix.  When the transformation
263
               is not valid, reverse to the previous transformation.  */
264
            if (!lambda_transform_legal_p (trans, depth, dependence_relations))
265
              lambda_matrix_row_exchange (LTM_MATRIX (trans),
266
                                          loop_depth (loop_i) - loop_depth (first_loop),
267
                                          loop_depth (loop_j) - loop_depth (first_loop));
268
          }
269
      }
270
 
271
  return trans;
272
}
273
 
274
/* Return the number of nested loops in LOOP_NEST, or 0 if the loops
275
   are not perfectly nested.  */
276
 
277
unsigned int
278
perfect_loop_nest_depth (struct loop *loop_nest)
279
{
280
  struct loop *temp;
281
  unsigned int depth = 1;
282
 
283
  /* If it's not a loop nest, we don't want it.  We also don't handle
284
     sibling loops properly, which are loops of the following form:
285
 
286
     | for (i = 0; i < 50; i++)
287
     |   {
288
     |     for (j = 0; j < 50; j++)
289
     |       {
290
     |        ...
291
     |       }
292
     |     for (j = 0; j < 50; j++)
293
     |       {
294
     |        ...
295
     |       }
296
     |   }
297
  */
298
 
299
  if (!loop_nest->inner || !single_exit (loop_nest))
300
    return 0;
301
 
302
  for (temp = loop_nest->inner; temp; temp = temp->inner)
303
    {
304
      /* If we have a sibling loop or multiple exit edges, jump ship.  */
305
      if (temp->next || !single_exit (temp))
306
        return 0;
307
 
308
      depth++;
309
    }
310
 
311
  return depth;
312
}
313
 
314
/* Perform a set of linear transforms on loops.  */
315
 
316
void
317
linear_transform_loops (void)
318
{
319
  bool modified = false;
320
  loop_iterator li;
321
  VEC(tree,heap) *oldivs = NULL;
322
  VEC(tree,heap) *invariants = NULL;
323
  VEC(tree,heap) *lambda_parameters = NULL;
324
  VEC(gimple,heap) *remove_ivs = VEC_alloc (gimple, heap, 3);
325
  struct loop *loop_nest;
326
  gimple oldiv_stmt;
327
  unsigned i;
328
 
329
  FOR_EACH_LOOP (li, loop_nest, 0)
330
    {
331
      unsigned int depth = 0;
332
      VEC (ddr_p, heap) *dependence_relations;
333
      VEC (data_reference_p, heap) *datarefs;
334
 
335
      lambda_loopnest before, after;
336
      lambda_trans_matrix trans;
337
      struct obstack lambda_obstack;
338
      struct loop *loop;
339
      VEC(loop_p,heap) *nest;
340
 
341
      depth = perfect_loop_nest_depth (loop_nest);
342
      if (depth == 0)
343
        continue;
344
 
345
      nest = VEC_alloc (loop_p, heap, 3);
346
      for (loop = loop_nest; loop; loop = loop->inner)
347
        VEC_safe_push (loop_p, heap, nest, loop);
348
 
349
      gcc_obstack_init (&lambda_obstack);
350
      VEC_truncate (tree, oldivs, 0);
351
      VEC_truncate (tree, invariants, 0);
352
      VEC_truncate (tree, lambda_parameters, 0);
353
 
354
      datarefs = VEC_alloc (data_reference_p, heap, 10);
355
      dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
356
      if (!compute_data_dependences_for_loop (loop_nest, true, &datarefs,
357
                                              &dependence_relations))
358
        goto free_and_continue;
359
 
360
      lambda_collect_parameters (datarefs, &lambda_parameters);
361
      if (!lambda_compute_access_matrices (datarefs, lambda_parameters, nest))
362
        goto free_and_continue;
363
 
364
      if (dump_file && (dump_flags & TDF_DETAILS))
365
        dump_ddrs (dump_file, dependence_relations);
366
 
367
      /* Build the transformation matrix.  */
368
      trans = lambda_trans_matrix_new (depth, depth);
369
      lambda_matrix_id (LTM_MATRIX (trans), depth);
370
      trans = try_interchange_loops (trans, depth, dependence_relations,
371
                                     datarefs, loop_nest);
372
 
373
      if (lambda_trans_matrix_id_p (trans))
374
        {
375
          if (dump_file)
376
           fprintf (dump_file, "Won't transform loop. Optimal transform is the identity transform\n");
377
          goto free_and_continue;
378
        }
379
 
380
      /* Check whether the transformation is legal.  */
381
      if (!lambda_transform_legal_p (trans, depth, dependence_relations))
382
        {
383
          if (dump_file)
384
            fprintf (dump_file, "Can't transform loop, transform is illegal:\n");
385
          goto free_and_continue;
386
        }
387
 
388
      before = gcc_loopnest_to_lambda_loopnest (loop_nest, &oldivs,
389
                                                &invariants, &lambda_obstack);
390
 
391
      if (!before)
392
        goto free_and_continue;
393
 
394
      if (dump_file)
395
        {
396
          fprintf (dump_file, "Before:\n");
397
          print_lambda_loopnest (dump_file, before, 'i');
398
        }
399
 
400
      after = lambda_loopnest_transform (before, trans, &lambda_obstack);
401
 
402
      if (dump_file)
403
        {
404
          fprintf (dump_file, "After:\n");
405
          print_lambda_loopnest (dump_file, after, 'u');
406
        }
407
 
408
      lambda_loopnest_to_gcc_loopnest (loop_nest, oldivs, invariants,
409
                                       &remove_ivs,
410
                                       after, trans, &lambda_obstack);
411
      modified = true;
412
 
413
      if (dump_file)
414
        fprintf (dump_file, "Successfully transformed loop.\n");
415
 
416
    free_and_continue:
417
      obstack_free (&lambda_obstack, NULL);
418
      free_dependence_relations (dependence_relations);
419
      free_data_refs (datarefs);
420
      VEC_free (loop_p, heap, nest);
421
    }
422
 
423
  for (i = 0; VEC_iterate (gimple, remove_ivs, i, oldiv_stmt); i++)
424
    remove_iv (oldiv_stmt);
425
 
426
  VEC_free (tree, heap, oldivs);
427
  VEC_free (tree, heap, invariants);
428
  VEC_free (gimple, heap, remove_ivs);
429
  scev_reset ();
430
 
431
  if (modified)
432
    rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_full_phi);
433
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.