OpenCores
URL https://opencores.org/ocsvn/scarts/scarts/trunk

Subversion Repositories scarts

[/] [scarts/] [trunk/] [toolchain/] [scarts-gcc/] [gcc-4.1.1/] [gcc/] [tree-loop-linear.c] - Blame information for rev 20

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 12 jlechner
/* Linear Loop transforms
2
   Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3
   Contributed by Daniel Berlin <dberlin@dberlin.org>.
4
 
5
This file is part of GCC.
6
 
7
GCC is free software; you can redistribute it and/or modify it under
8
the terms of the GNU General Public License as published by the Free
9
Software Foundation; either version 2, or (at your option) any later
10
version.
11
 
12
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13
WARRANTY; without even the implied warranty of MERCHANTABILITY or
14
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15
for more details.
16
 
17
You should have received a copy of the GNU General Public License
18
along with GCC; see the file COPYING.  If not, write to the Free
19
Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20
02110-1301, USA.  */
21
 
22
 
23
#include "config.h"
24
#include "system.h"
25
#include "coretypes.h"
26
#include "tm.h"
27
#include "ggc.h"
28
#include "tree.h"
29
#include "target.h"
30
 
31
#include "rtl.h"
32
#include "basic-block.h"
33
#include "diagnostic.h"
34
#include "tree-flow.h"
35
#include "tree-dump.h"
36
#include "timevar.h"
37
#include "cfgloop.h"
38
#include "expr.h"
39
#include "optabs.h"
40
#include "tree-chrec.h"
41
#include "tree-data-ref.h"
42
#include "tree-scalar-evolution.h"
43
#include "tree-pass.h"
44
#include "varray.h"
45
#include "lambda.h"
46
 
47
/* Linear loop transforms include any composition of interchange,
48
   scaling, skewing, and reversal.  They are used to change the
49
   iteration order of loop nests in order to optimize data locality of
50
   traversals, or remove dependences that prevent
51
   parallelization/vectorization/etc.
52
 
53
   TODO: Determine reuse vectors/matrix and use it to determine optimal
54
   transform matrix for locality purposes.
55
   TODO: Completion of partial transforms.  */
56
 
57
/* Gather statistics for loop interchange.  LOOP is the loop being
58
   considered. The first loop in the considered loop nest is
59
   FIRST_LOOP, and consequently, the index of the considered loop is
60
   obtained by LOOP->DEPTH - FIRST_LOOP->DEPTH
61
 
62
   Initializes:
63
   - DEPENDENCE_STEPS the sum of all the data dependence distances
64
   carried by loop LOOP,
65
 
66
   - NB_DEPS_NOT_CARRIED_BY_LOOP the number of dependence relations
67
   for which the loop LOOP is not carrying any dependence,
68
 
69
   - ACCESS_STRIDES the sum of all the strides in LOOP.
70
 
71
   Example: for the following loop,
72
 
73
   | loop_1 runs 1335 times
74
   |   loop_2 runs 1335 times
75
   |     A[{{0, +, 1}_1, +, 1335}_2]
76
   |     B[{{0, +, 1}_1, +, 1335}_2]
77
   |   endloop_2
78
   |   A[{0, +, 1336}_1]
79
   | endloop_1
80
 
81
   gather_interchange_stats (in loop_1) will return
82
   DEPENDENCE_STEPS = 3002
83
   NB_DEPS_NOT_CARRIED_BY_LOOP = 5
84
   ACCESS_STRIDES = 10694
85
 
86
   gather_interchange_stats (in loop_2) will return
87
   DEPENDENCE_STEPS = 3000
88
   NB_DEPS_NOT_CARRIED_BY_LOOP = 7
89
   ACCESS_STRIDES = 8010
90
*/
91
 
92
static void
93
gather_interchange_stats (varray_type dependence_relations,
94
                          varray_type datarefs,
95
                          struct loop *loop,
96
                          struct loop *first_loop,
97
                          unsigned int *dependence_steps,
98
                          unsigned int *nb_deps_not_carried_by_loop,
99
                          unsigned int *access_strides)
100
{
101
  unsigned int i, j;
102
 
103
  *dependence_steps = 0;
104
  *nb_deps_not_carried_by_loop = 0;
105
  *access_strides = 0;
106
 
107
  for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++)
108
    {
109
      struct data_dependence_relation *ddr =
110
        (struct data_dependence_relation *)
111
        VARRAY_GENERIC_PTR (dependence_relations, i);
112
 
113
      /* If we don't know anything about this dependence, or the distance
114
         vector is NULL, or there is no dependence, then there is no reuse of
115
         data.  */
116
      if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
117
          || DDR_ARE_DEPENDENT (ddr) == chrec_known
118
          || DDR_NUM_DIST_VECTS (ddr) == 0)
119
        continue;
120
 
121
      for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
122
        {
123
          int dist = DDR_DIST_VECT (ddr, j)[loop->depth - first_loop->depth];
124
 
125
          if (dist == 0)
126
            (*nb_deps_not_carried_by_loop) += 1;
127
 
128
          else if (dist < 0)
129
            (*dependence_steps) += -dist;
130
 
131
          else
132
            (*dependence_steps) += dist;
133
        }
134
    }
135
 
136
  /* Compute the access strides.  */
137
  for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
138
    {
139
      unsigned int it;
140
      struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
141
      tree stmt = DR_STMT (dr);
142
      struct loop *stmt_loop = loop_containing_stmt (stmt);
143
      struct loop *inner_loop = first_loop->inner;
144
 
145
      if (inner_loop != stmt_loop
146
          && !flow_loop_nested_p (inner_loop, stmt_loop))
147
        continue;
148
      for (it = 0; it < DR_NUM_DIMENSIONS (dr); it++)
149
        {
150
          tree chrec = DR_ACCESS_FN (dr, it);
151
          tree tstride = evolution_part_in_loop_num
152
            (chrec, loop->num);
153
 
154
          if (tstride == NULL_TREE
155
              || TREE_CODE (tstride) != INTEGER_CST)
156
            continue;
157
 
158
          (*access_strides) += int_cst_value (tstride);
159
        }
160
    }
161
}
162
 
163
/* Attempt to apply interchange transformations to TRANS to maximize the
164
   spatial and temporal locality of the loop.
165
   Returns the new transform matrix.  The smaller the reuse vector
166
   distances in the inner loops, the fewer the cache misses.
167
   FIRST_LOOP is the loop->num of the first loop in the analyzed loop
168
   nest.  */
169
 
170
 
171
static lambda_trans_matrix
172
try_interchange_loops (lambda_trans_matrix trans,
173
                       unsigned int depth,
174
                       varray_type dependence_relations,
175
                       varray_type datarefs,
176
                       struct loop *first_loop)
177
{
178
  struct loop *loop_i;
179
  struct loop *loop_j;
180
  unsigned int dependence_steps_i, dependence_steps_j;
181
  unsigned int access_strides_i, access_strides_j;
182
  unsigned int nb_deps_not_carried_by_i, nb_deps_not_carried_by_j;
183
  struct data_dependence_relation *ddr;
184
 
185
  /* When there is an unknown relation in the dependence_relations, we
186
     know that it is no worth looking at this loop nest: give up.  */
187
  ddr = (struct data_dependence_relation *)
188
    VARRAY_GENERIC_PTR (dependence_relations, 0);
189
  if (ddr == NULL || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
190
    return trans;
191
 
192
  /* LOOP_I is always the outer loop.  */
193
  for (loop_j = first_loop->inner;
194
       loop_j;
195
       loop_j = loop_j->inner)
196
    for (loop_i = first_loop;
197
         loop_i->depth < loop_j->depth;
198
         loop_i = loop_i->inner)
199
      {
200
        gather_interchange_stats (dependence_relations, datarefs,
201
                                  loop_i, first_loop,
202
                                  &dependence_steps_i,
203
                                  &nb_deps_not_carried_by_i,
204
                                  &access_strides_i);
205
        gather_interchange_stats (dependence_relations, datarefs,
206
                                  loop_j, first_loop,
207
                                  &dependence_steps_j,
208
                                  &nb_deps_not_carried_by_j,
209
                                  &access_strides_j);
210
 
211
        /* Heuristics for loop interchange profitability:
212
 
213
           1. (spatial locality) Inner loops should have smallest
214
              dependence steps.
215
 
216
           2. (spatial locality) Inner loops should contain more
217
           dependence relations not carried by the loop.
218
 
219
           3. (temporal locality) Inner loops should have smallest
220
              array access strides.
221
        */
222
        if (dependence_steps_i < dependence_steps_j
223
            || nb_deps_not_carried_by_i > nb_deps_not_carried_by_j
224
            || access_strides_i < access_strides_j)
225
          {
226
            lambda_matrix_row_exchange (LTM_MATRIX (trans),
227
                                        loop_i->depth - first_loop->depth,
228
                                        loop_j->depth - first_loop->depth);
229
            /* Validate the resulting matrix.  When the transformation
230
               is not valid, reverse to the previous transformation.  */
231
            if (!lambda_transform_legal_p (trans, depth, dependence_relations))
232
              lambda_matrix_row_exchange (LTM_MATRIX (trans),
233
                                          loop_i->depth - first_loop->depth,
234
                                          loop_j->depth - first_loop->depth);
235
          }
236
      }
237
 
238
  return trans;
239
}
240
 
241
/* Perform a set of linear transforms on LOOPS.  */
242
 
243
void
244
linear_transform_loops (struct loops *loops)
245
{
246
  unsigned int i;
247
  VEC(tree,heap) *oldivs = NULL;
248
  VEC(tree,heap) *invariants = NULL;
249
 
250
  for (i = 1; i < loops->num; i++)
251
    {
252
      unsigned int depth = 0;
253
      varray_type datarefs;
254
      varray_type dependence_relations;
255
      struct loop *loop_nest = loops->parray[i];
256
      struct loop *temp;
257
      lambda_loopnest before, after;
258
      lambda_trans_matrix trans;
259
      bool problem = false;
260
      bool need_perfect_nest = false;
261
      /* If it's not a loop nest, we don't want it.
262
         We also don't handle sibling loops properly,
263
         which are loops of the following form:
264
         for (i = 0; i < 50; i++)
265
           {
266
             for (j = 0; j < 50; j++)
267
               {
268
                ...
269
               }
270
           for (j = 0; j < 50; j++)
271
               {
272
                ...
273
               }
274
           } */
275
      if (!loop_nest || !loop_nest->inner)
276
        continue;
277
      VEC_truncate (tree, oldivs, 0);
278
      VEC_truncate (tree, invariants, 0);
279
      depth = 1;
280
      for (temp = loop_nest->inner; temp; temp = temp->inner)
281
        {
282
          /* If we have a sibling loop or multiple exit edges, jump ship.  */
283
          if (temp->next || !temp->single_exit)
284
            {
285
              problem = true;
286
              break;
287
            }
288
          depth ++;
289
        }
290
      if (problem)
291
        continue;
292
 
293
      /* Analyze data references and dependence relations using scev.  */
294
 
295
      VARRAY_GENERIC_PTR_INIT (datarefs, 10, "datarefs");
296
      VARRAY_GENERIC_PTR_INIT (dependence_relations, 10,
297
                               "dependence_relations");
298
 
299
 
300
      compute_data_dependences_for_loop (loop_nest, true,
301
                                         &datarefs, &dependence_relations);
302
      if (dump_file && (dump_flags & TDF_DETAILS))
303
        {
304
          unsigned int j;
305
          for (j = 0; j < VARRAY_ACTIVE_SIZE (dependence_relations); j++)
306
            {
307
              struct data_dependence_relation *ddr =
308
                (struct data_dependence_relation *)
309
                VARRAY_GENERIC_PTR (dependence_relations, j);
310
 
311
              if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
312
                dump_data_dependence_relation (dump_file, ddr);
313
            }
314
          fprintf (dump_file, "\n\n");
315
        }
316
      /* Build the transformation matrix.  */
317
      trans = lambda_trans_matrix_new (depth, depth);
318
      lambda_matrix_id (LTM_MATRIX (trans), depth);
319
 
320
      trans = try_interchange_loops (trans, depth, dependence_relations,
321
                                     datarefs, loop_nest);
322
 
323
      if (lambda_trans_matrix_id_p (trans))
324
        {
325
          if (dump_file)
326
           fprintf (dump_file, "Won't transform loop. Optimal transform is the identity transform\n");
327
          continue;
328
        }
329
 
330
      /* Check whether the transformation is legal.  */
331
      if (!lambda_transform_legal_p (trans, depth, dependence_relations))
332
        {
333
          if (dump_file)
334
            fprintf (dump_file, "Can't transform loop, transform is illegal:\n");
335
          continue;
336
        }
337
      if (!perfect_nest_p (loop_nest))
338
        need_perfect_nest = true;
339
      before = gcc_loopnest_to_lambda_loopnest (loops,
340
                                                loop_nest, &oldivs,
341
                                                &invariants,
342
                                                need_perfect_nest);
343
      if (!before)
344
        continue;
345
 
346
      if (dump_file)
347
        {
348
          fprintf (dump_file, "Before:\n");
349
          print_lambda_loopnest (dump_file, before, 'i');
350
        }
351
 
352
      after = lambda_loopnest_transform (before, trans);
353
      if (dump_file)
354
        {
355
          fprintf (dump_file, "After:\n");
356
          print_lambda_loopnest (dump_file, after, 'u');
357
        }
358
      lambda_loopnest_to_gcc_loopnest (loop_nest, oldivs, invariants,
359
                                       after, trans);
360
      if (dump_file)
361
        fprintf (dump_file, "Successfully transformed loop.\n");
362
      free_dependence_relations (dependence_relations);
363
      free_data_refs (datarefs);
364
    }
365
  VEC_free (tree, heap, oldivs);
366
  VEC_free (tree, heap, invariants);
367
  scev_reset ();
368
  rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_full_phi);
369
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.