OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.5.1/] [gcc/] [lambda-code.c] - Blame information for rev 328

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 280 jeremybenn
/*  Loop transformation code generation
2
    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3
    Free Software Foundation, Inc.
4
    Contributed by Daniel Berlin <dberlin@dberlin.org>
5
 
6
    This file is part of GCC.
7
 
8
    GCC is free software; you can redistribute it and/or modify it under
9
    the terms of the GNU General Public License as published by the Free
10
    Software Foundation; either version 3, or (at your option) any later
11
    version.
12
 
13
    GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14
    WARRANTY; without even the implied warranty of MERCHANTABILITY or
15
    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16
    for more details.
17
 
18
    You should have received a copy of the GNU General Public License
19
    along with GCC; see the file COPYING3.  If not see
20
    <http://www.gnu.org/licenses/>.  */
21
 
22
#include "config.h"
23
#include "system.h"
24
#include "coretypes.h"
25
#include "tm.h"
26
#include "ggc.h"
27
#include "tree.h"
28
#include "target.h"
29
#include "rtl.h"
30
#include "basic-block.h"
31
#include "diagnostic.h"
32
#include "obstack.h"
33
#include "tree-flow.h"
34
#include "tree-dump.h"
35
#include "timevar.h"
36
#include "cfgloop.h"
37
#include "expr.h"
38
#include "optabs.h"
39
#include "tree-chrec.h"
40
#include "tree-data-ref.h"
41
#include "tree-pass.h"
42
#include "tree-scalar-evolution.h"
43
#include "vec.h"
44
#include "lambda.h"
45
#include "vecprim.h"
46
#include "pointer-set.h"
47
 
48
/* This loop nest code generation is based on non-singular matrix
49
   math.
50
 
51
 A little terminology and a general sketch of the algorithm.  See "A singular
52
 loop transformation framework based on non-singular matrices" by Wei Li and
53
 Keshav Pingali for formal proofs that the various statements below are
54
 correct.
55
 
56
 A loop iteration space represents the points traversed by the loop.  A point in the
57
 iteration space can be represented by a vector of size <loop depth>.  You can
58
 therefore represent the iteration space as an integral combinations of a set
59
 of basis vectors.
60
 
61
 A loop iteration space is dense if every integer point between the loop
62
 bounds is a point in the iteration space.  Every loop with a step of 1
63
 therefore has a dense iteration space.
64
 
65
 for i = 1 to 3, step 1 is a dense iteration space.
66
 
67
 A loop iteration space is sparse if it is not dense.  That is, the iteration
68
 space skips integer points that are within the loop bounds.
69
 
70
 for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
71
 2 is skipped.
72
 
73
 Dense source spaces are easy to transform, because they don't skip any
74
 points to begin with.  Thus we can compute the exact bounds of the target
75
 space using min/max and floor/ceil.
76
 
77
 For a dense source space, we take the transformation matrix, decompose it
78
 into a lower triangular part (H) and a unimodular part (U).
79
 We then compute the auxiliary space from the unimodular part (source loop
80
 nest . U = auxiliary space) , which has two important properties:
81
  1. It traverses the iterations in the same lexicographic order as the source
82
  space.
83
  2. It is a dense space when the source is a dense space (even if the target
84
  space is going to be sparse).
85
 
86
 Given the auxiliary space, we use the lower triangular part to compute the
87
 bounds in the target space by simple matrix multiplication.
88
 The gaps in the target space (IE the new loop step sizes) will be the
89
 diagonals of the H matrix.
90
 
91
 Sparse source spaces require another step, because you can't directly compute
92
 the exact bounds of the auxiliary and target space from the sparse space.
93
 Rather than try to come up with a separate algorithm to handle sparse source
94
 spaces directly, we just find a legal transformation matrix that gives you
95
 the sparse source space, from a dense space, and then transform the dense
96
 space.
97
 
98
 For a regular sparse space, you can represent the source space as an integer
99
 lattice, and the base space of that lattice will always be dense.  Thus, we
100
 effectively use the lattice to figure out the transformation from the lattice
101
 base space, to the sparse iteration space (IE what transform was applied to
102
 the dense space to make it sparse).  We then compose this transform with the
103
 transformation matrix specified by the user (since our matrix transformations
104
 are closed under composition, this is okay).  We can then use the base space
105
 (which is dense) plus the composed transformation matrix, to compute the rest
106
 of the transform using the dense space algorithm above.
107
 
108
 In other words, our sparse source space (B) is decomposed into a dense base
109
 space (A), and a matrix (L) that transforms A into B, such that A.L = B.
110
 We then compute the composition of L and the user transformation matrix (T),
111
 so that T is now a transform from A to the result, instead of from B to the
112
 result.
113
 IE A.(LT) = result instead of B.T = result
114
 Since A is now a dense source space, we can use the dense source space
115
 algorithm above to compute the result of applying transform (LT) to A.
116
 
117
 Fourier-Motzkin elimination is used to compute the bounds of the base space
118
 of the lattice.  */
119
 
120
static bool perfect_nestify (struct loop *, VEC(tree,heap) *,
121
                             VEC(tree,heap) *, VEC(int,heap) *,
122
                             VEC(tree,heap) *);
123
/* Lattice stuff that is internal to the code generation algorithm.  */
124
 
125
typedef struct lambda_lattice_s
126
{
127
  /* Lattice base matrix.  */
128
  lambda_matrix base;
129
  /* Lattice dimension.  */
130
  int dimension;
131
  /* Origin vector for the coefficients.  */
132
  lambda_vector origin;
133
  /* Origin matrix for the invariants.  */
134
  lambda_matrix origin_invariants;
135
  /* Number of invariants.  */
136
  int invariants;
137
} *lambda_lattice;
138
 
139
#define LATTICE_BASE(T) ((T)->base)
140
#define LATTICE_DIMENSION(T) ((T)->dimension)
141
#define LATTICE_ORIGIN(T) ((T)->origin)
142
#define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
143
#define LATTICE_INVARIANTS(T) ((T)->invariants)
144
 
145
static bool lle_equal (lambda_linear_expression, lambda_linear_expression,
146
                       int, int);
147
static lambda_lattice lambda_lattice_new (int, int, struct obstack *);
148
static lambda_lattice lambda_lattice_compute_base (lambda_loopnest,
149
                                                   struct obstack *);
150
 
151
static bool can_convert_to_perfect_nest (struct loop *);
152
 
153
/* Create a new lambda body vector.  */
154
 
155
lambda_body_vector
156
lambda_body_vector_new (int size, struct obstack * lambda_obstack)
157
{
158
  lambda_body_vector ret;
159
 
160
  ret = (lambda_body_vector)obstack_alloc (lambda_obstack, sizeof (*ret));
161
  LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
162
  LBV_SIZE (ret) = size;
163
  LBV_DENOMINATOR (ret) = 1;
164
  return ret;
165
}
166
 
167
/* Compute the new coefficients for the vector based on the
168
  *inverse* of the transformation matrix.  */
169
 
170
lambda_body_vector
171
lambda_body_vector_compute_new (lambda_trans_matrix transform,
172
                                lambda_body_vector vect,
173
                                struct obstack * lambda_obstack)
174
{
175
  lambda_body_vector temp;
176
  int depth;
177
 
178
  /* Make sure the matrix is square.  */
179
  gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
180
 
181
  depth = LTM_ROWSIZE (transform);
182
 
183
  temp = lambda_body_vector_new (depth, lambda_obstack);
184
  LBV_DENOMINATOR (temp) =
185
    LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
186
  lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
187
                             LTM_MATRIX (transform), depth,
188
                             LBV_COEFFICIENTS (temp));
189
  LBV_SIZE (temp) = LBV_SIZE (vect);
190
  return temp;
191
}
192
 
193
/* Print out a lambda body vector.  */
194
 
195
void
196
print_lambda_body_vector (FILE * outfile, lambda_body_vector body)
197
{
198
  print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
199
}
200
 
201
/* Return TRUE if two linear expressions are equal.  */
202
 
203
static bool
204
lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
205
           int depth, int invariants)
206
{
207
  int i;
208
 
209
  if (lle1 == NULL || lle2 == NULL)
210
    return false;
211
  if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
212
    return false;
213
  if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
214
    return false;
215
  for (i = 0; i < depth; i++)
216
    if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
217
      return false;
218
  for (i = 0; i < invariants; i++)
219
    if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
220
        LLE_INVARIANT_COEFFICIENTS (lle2)[i])
221
      return false;
222
  return true;
223
}
224
 
225
/* Create a new linear expression with dimension DIM, and total number
226
   of invariants INVARIANTS.  */
227
 
228
lambda_linear_expression
229
lambda_linear_expression_new (int dim, int invariants,
230
                              struct obstack * lambda_obstack)
231
{
232
  lambda_linear_expression ret;
233
 
234
  ret = (lambda_linear_expression)obstack_alloc (lambda_obstack,
235
                                                 sizeof (*ret));
236
  LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
237
  LLE_CONSTANT (ret) = 0;
238
  LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
239
  LLE_DENOMINATOR (ret) = 1;
240
  LLE_NEXT (ret) = NULL;
241
 
242
  return ret;
243
}
244
 
245
/* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
246
   The starting letter used for variable names is START.  */
247
 
248
static void
249
print_linear_expression (FILE * outfile, lambda_vector expr, int size,
250
                         char start)
251
{
252
  int i;
253
  bool first = true;
254
  for (i = 0; i < size; i++)
255
    {
256
      if (expr[i] != 0)
257
        {
258
          if (first)
259
            {
260
              if (expr[i] < 0)
261
                fprintf (outfile, "-");
262
              first = false;
263
            }
264
          else if (expr[i] > 0)
265
            fprintf (outfile, " + ");
266
          else
267
            fprintf (outfile, " - ");
268
          if (abs (expr[i]) == 1)
269
            fprintf (outfile, "%c", start + i);
270
          else
271
            fprintf (outfile, "%d%c", abs (expr[i]), start + i);
272
        }
273
    }
274
}
275
 
276
/* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
277
   depth/number of coefficients is given by DEPTH, the number of invariants is
278
   given by INVARIANTS, and the character to start variable names with is given
279
   by START.  */
280
 
281
void
282
print_lambda_linear_expression (FILE * outfile,
283
                                lambda_linear_expression expr,
284
                                int depth, int invariants, char start)
285
{
286
  fprintf (outfile, "\tLinear expression: ");
287
  print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
288
  fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
289
  fprintf (outfile, "  invariants: ");
290
  print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
291
                           invariants, 'A');
292
  fprintf (outfile, "  denominator: %d\n", LLE_DENOMINATOR (expr));
293
}
294
 
295
/* Print a lambda loop structure LOOP to OUTFILE.  The depth/number of
296
   coefficients is given by DEPTH, the number of invariants is
297
   given by INVARIANTS, and the character to start variable names with is given
298
   by START.  */
299
 
300
void
301
print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
302
                   int invariants, char start)
303
{
304
  int step;
305
  lambda_linear_expression expr;
306
 
307
  gcc_assert (loop);
308
 
309
  expr = LL_LINEAR_OFFSET (loop);
310
  step = LL_STEP (loop);
311
  fprintf (outfile, "  step size = %d \n", step);
312
 
313
  if (expr)
314
    {
315
      fprintf (outfile, "  linear offset: \n");
316
      print_lambda_linear_expression (outfile, expr, depth, invariants,
317
                                      start);
318
    }
319
 
320
  fprintf (outfile, "  lower bound: \n");
321
  for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
322
    print_lambda_linear_expression (outfile, expr, depth, invariants, start);
323
  fprintf (outfile, "  upper bound: \n");
324
  for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
325
    print_lambda_linear_expression (outfile, expr, depth, invariants, start);
326
}
327
 
328
/* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
329
   number of invariants.  */
330
 
331
lambda_loopnest
332
lambda_loopnest_new (int depth, int invariants,
333
                     struct obstack * lambda_obstack)
334
{
335
  lambda_loopnest ret;
336
  ret = (lambda_loopnest)obstack_alloc (lambda_obstack, sizeof (*ret));
337
 
338
  LN_LOOPS (ret) = (lambda_loop *)
339
      obstack_alloc (lambda_obstack, depth * sizeof(LN_LOOPS(ret)));
340
  LN_DEPTH (ret) = depth;
341
  LN_INVARIANTS (ret) = invariants;
342
 
343
  return ret;
344
}
345
 
346
/* Print a lambda loopnest structure, NEST, to OUTFILE.  The starting
347
   character to use for loop names is given by START.  */
348
 
349
void
350
print_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
351
{
352
  int i;
353
  for (i = 0; i < LN_DEPTH (nest); i++)
354
    {
355
      fprintf (outfile, "Loop %c\n", start + i);
356
      print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
357
                         LN_INVARIANTS (nest), 'i');
358
      fprintf (outfile, "\n");
359
    }
360
}
361
 
362
/* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
363
   of invariants.  */
364
 
365
static lambda_lattice
366
lambda_lattice_new (int depth, int invariants, struct obstack * lambda_obstack)
367
{
368
  lambda_lattice ret
369
      = (lambda_lattice)obstack_alloc (lambda_obstack, sizeof (*ret));
370
  LATTICE_BASE (ret) = lambda_matrix_new (depth, depth);
371
  LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
372
  LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants);
373
  LATTICE_DIMENSION (ret) = depth;
374
  LATTICE_INVARIANTS (ret) = invariants;
375
  return ret;
376
}
377
 
378
/* Compute the lattice base for NEST.  The lattice base is essentially a
379
   non-singular transform from a dense base space to a sparse iteration space.
380
   We use it so that we don't have to specially handle the case of a sparse
381
   iteration space in other parts of the algorithm.  As a result, this routine
382
   only does something interesting (IE produce a matrix that isn't the
383
   identity matrix) if NEST is a sparse space.  */
384
 
385
static lambda_lattice
386
lambda_lattice_compute_base (lambda_loopnest nest,
387
                             struct obstack * lambda_obstack)
388
{
389
  lambda_lattice ret;
390
  int depth, invariants;
391
  lambda_matrix base;
392
 
393
  int i, j, step;
394
  lambda_loop loop;
395
  lambda_linear_expression expression;
396
 
397
  depth = LN_DEPTH (nest);
398
  invariants = LN_INVARIANTS (nest);
399
 
400
  ret = lambda_lattice_new (depth, invariants, lambda_obstack);
401
  base = LATTICE_BASE (ret);
402
  for (i = 0; i < depth; i++)
403
    {
404
      loop = LN_LOOPS (nest)[i];
405
      gcc_assert (loop);
406
      step = LL_STEP (loop);
407
      /* If we have a step of 1, then the base is one, and the
408
         origin and invariant coefficients are 0.  */
409
      if (step == 1)
410
        {
411
          for (j = 0; j < depth; j++)
412
            base[i][j] = 0;
413
          base[i][i] = 1;
414
          LATTICE_ORIGIN (ret)[i] = 0;
415
          for (j = 0; j < invariants; j++)
416
            LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
417
        }
418
      else
419
        {
420
          /* Otherwise, we need the lower bound expression (which must
421
             be an affine function)  to determine the base.  */
422
          expression = LL_LOWER_BOUND (loop);
423
          gcc_assert (expression && !LLE_NEXT (expression)
424
                      && LLE_DENOMINATOR (expression) == 1);
425
 
426
          /* The lower triangular portion of the base is going to be the
427
             coefficient times the step */
428
          for (j = 0; j < i; j++)
429
            base[i][j] = LLE_COEFFICIENTS (expression)[j]
430
              * LL_STEP (LN_LOOPS (nest)[j]);
431
          base[i][i] = step;
432
          for (j = i + 1; j < depth; j++)
433
            base[i][j] = 0;
434
 
435
          /* Origin for this loop is the constant of the lower bound
436
             expression.  */
437
          LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
438
 
439
          /* Coefficient for the invariants are equal to the invariant
440
             coefficients in the expression.  */
441
          for (j = 0; j < invariants; j++)
442
            LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
443
              LLE_INVARIANT_COEFFICIENTS (expression)[j];
444
        }
445
    }
446
  return ret;
447
}
448
 
449
/* Compute the least common multiple of two numbers A and B .  */
450
 
451
int
452
least_common_multiple (int a, int b)
453
{
454
  return (abs (a) * abs (b) / gcd (a, b));
455
}
456
 
457
/* Perform Fourier-Motzkin elimination to calculate the bounds of the
458
   auxiliary nest.
459
   Fourier-Motzkin is a way of reducing systems of linear inequalities so that
460
   it is easy to calculate the answer and bounds.
461
   A sketch of how it works:
462
   Given a system of linear inequalities, ai * xj >= bk, you can always
463
   rewrite the constraints so they are all of the form
464
   a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
465
   in b1 ... bk, and some a in a1...ai)
466
   You can then eliminate this x from the non-constant inequalities by
467
   rewriting these as a <= b, x >= constant, and delete the x variable.
468
   You can then repeat this for any remaining x variables, and then we have
469
   an easy to use variable <= constant (or no variables at all) form that we
470
   can construct our bounds from.
471
 
472
   In our case, each time we eliminate, we construct part of the bound from
473
   the ith variable, then delete the ith variable.
474
 
475
   Remember the constant are in our vector a, our coefficient matrix is A,
476
   and our invariant coefficient matrix is B.
477
 
478
   SIZE is the size of the matrices being passed.
479
   DEPTH is the loop nest depth.
480
   INVARIANTS is the number of loop invariants.
481
   A, B, and a are the coefficient matrix, invariant coefficient, and a
482
   vector of constants, respectively.  */
483
 
484
static lambda_loopnest
485
compute_nest_using_fourier_motzkin (int size,
486
                                    int depth,
487
                                    int invariants,
488
                                    lambda_matrix A,
489
                                    lambda_matrix B,
490
                                    lambda_vector a,
491
                                    struct obstack * lambda_obstack)
492
{
493
 
494
  int multiple, f1, f2;
495
  int i, j, k;
496
  lambda_linear_expression expression;
497
  lambda_loop loop;
498
  lambda_loopnest auxillary_nest;
499
  lambda_matrix swapmatrix, A1, B1;
500
  lambda_vector swapvector, a1;
501
  int newsize;
502
 
503
  A1 = lambda_matrix_new (128, depth);
504
  B1 = lambda_matrix_new (128, invariants);
505
  a1 = lambda_vector_new (128);
506
 
507
  auxillary_nest = lambda_loopnest_new (depth, invariants, lambda_obstack);
508
 
509
  for (i = depth - 1; i >= 0; i--)
510
    {
511
      loop = lambda_loop_new ();
512
      LN_LOOPS (auxillary_nest)[i] = loop;
513
      LL_STEP (loop) = 1;
514
 
515
      for (j = 0; j < size; j++)
516
        {
517
          if (A[j][i] < 0)
518
            {
519
              /* Any linear expression in the matrix with a coefficient less
520
                 than 0 becomes part of the new lower bound.  */
521
              expression = lambda_linear_expression_new (depth, invariants,
522
                                                         lambda_obstack);
523
 
524
              for (k = 0; k < i; k++)
525
                LLE_COEFFICIENTS (expression)[k] = A[j][k];
526
 
527
              for (k = 0; k < invariants; k++)
528
                LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
529
 
530
              LLE_DENOMINATOR (expression) = -1 * A[j][i];
531
              LLE_CONSTANT (expression) = -1 * a[j];
532
 
533
              /* Ignore if identical to the existing lower bound.  */
534
              if (!lle_equal (LL_LOWER_BOUND (loop),
535
                              expression, depth, invariants))
536
                {
537
                  LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
538
                  LL_LOWER_BOUND (loop) = expression;
539
                }
540
 
541
            }
542
          else if (A[j][i] > 0)
543
            {
544
              /* Any linear expression with a coefficient greater than 0
545
                 becomes part of the new upper bound.  */
546
              expression = lambda_linear_expression_new (depth, invariants,
547
                                                         lambda_obstack);
548
              for (k = 0; k < i; k++)
549
                LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
550
 
551
              for (k = 0; k < invariants; k++)
552
                LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
553
 
554
              LLE_DENOMINATOR (expression) = A[j][i];
555
              LLE_CONSTANT (expression) = a[j];
556
 
557
              /* Ignore if identical to the existing upper bound.  */
558
              if (!lle_equal (LL_UPPER_BOUND (loop),
559
                              expression, depth, invariants))
560
                {
561
                  LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
562
                  LL_UPPER_BOUND (loop) = expression;
563
                }
564
 
565
            }
566
        }
567
 
568
      /* This portion creates a new system of linear inequalities by deleting
569
         the i'th variable, reducing the system by one variable.  */
570
      newsize = 0;
571
      for (j = 0; j < size; j++)
572
        {
573
          /* If the coefficient for the i'th variable is 0, then we can just
574
             eliminate the variable straightaway.  Otherwise, we have to
575
             multiply through by the coefficients we are eliminating.  */
576
          if (A[j][i] == 0)
577
            {
578
              lambda_vector_copy (A[j], A1[newsize], depth);
579
              lambda_vector_copy (B[j], B1[newsize], invariants);
580
              a1[newsize] = a[j];
581
              newsize++;
582
            }
583
          else if (A[j][i] > 0)
584
            {
585
              for (k = 0; k < size; k++)
586
                {
587
                  if (A[k][i] < 0)
588
                    {
589
                      multiple = least_common_multiple (A[j][i], A[k][i]);
590
                      f1 = multiple / A[j][i];
591
                      f2 = -1 * multiple / A[k][i];
592
 
593
                      lambda_vector_add_mc (A[j], f1, A[k], f2,
594
                                            A1[newsize], depth);
595
                      lambda_vector_add_mc (B[j], f1, B[k], f2,
596
                                            B1[newsize], invariants);
597
                      a1[newsize] = f1 * a[j] + f2 * a[k];
598
                      newsize++;
599
                    }
600
                }
601
            }
602
        }
603
 
604
      swapmatrix = A;
605
      A = A1;
606
      A1 = swapmatrix;
607
 
608
      swapmatrix = B;
609
      B = B1;
610
      B1 = swapmatrix;
611
 
612
      swapvector = a;
613
      a = a1;
614
      a1 = swapvector;
615
 
616
      size = newsize;
617
    }
618
 
619
  return auxillary_nest;
620
}
621
 
622
/* Compute the loop bounds for the auxiliary space NEST.
623
   Input system used is Ax <= b.  TRANS is the unimodular transformation.
624
   Given the original nest, this function will
625
   1. Convert the nest into matrix form, which consists of a matrix for the
626
   coefficients, a matrix for the
627
   invariant coefficients, and a vector for the constants.
628
   2. Use the matrix form to calculate the lattice base for the nest (which is
629
   a dense space)
630
   3. Compose the dense space transform with the user specified transform, to
631
   get a transform we can easily calculate transformed bounds for.
632
   4. Multiply the composed transformation matrix times the matrix form of the
633
   loop.
634
   5. Transform the newly created matrix (from step 4) back into a loop nest
635
   using Fourier-Motzkin elimination to figure out the bounds.  */
636
 
637
static lambda_loopnest
638
lambda_compute_auxillary_space (lambda_loopnest nest,
639
                                lambda_trans_matrix trans,
640
                                struct obstack * lambda_obstack)
641
{
642
  lambda_matrix A, B, A1, B1;
643
  lambda_vector a, a1;
644
  lambda_matrix invertedtrans;
645
  int depth, invariants, size;
646
  int i, j;
647
  lambda_loop loop;
648
  lambda_linear_expression expression;
649
  lambda_lattice lattice;
650
 
651
  depth = LN_DEPTH (nest);
652
  invariants = LN_INVARIANTS (nest);
653
 
654
  /* Unfortunately, we can't know the number of constraints we'll have
655
     ahead of time, but this should be enough even in ridiculous loop nest
656
     cases. We must not go over this limit.  */
657
  A = lambda_matrix_new (128, depth);
658
  B = lambda_matrix_new (128, invariants);
659
  a = lambda_vector_new (128);
660
 
661
  A1 = lambda_matrix_new (128, depth);
662
  B1 = lambda_matrix_new (128, invariants);
663
  a1 = lambda_vector_new (128);
664
 
665
  /* Store the bounds in the equation matrix A, constant vector a, and
666
     invariant matrix B, so that we have Ax <= a + B.
667
     This requires a little equation rearranging so that everything is on the
668
     correct side of the inequality.  */
669
  size = 0;
670
  for (i = 0; i < depth; i++)
671
    {
672
      loop = LN_LOOPS (nest)[i];
673
 
674
      /* First we do the lower bound.  */
675
      if (LL_STEP (loop) > 0)
676
        expression = LL_LOWER_BOUND (loop);
677
      else
678
        expression = LL_UPPER_BOUND (loop);
679
 
680
      for (; expression != NULL; expression = LLE_NEXT (expression))
681
        {
682
          /* Fill in the coefficient.  */
683
          for (j = 0; j < i; j++)
684
            A[size][j] = LLE_COEFFICIENTS (expression)[j];
685
 
686
          /* And the invariant coefficient.  */
687
          for (j = 0; j < invariants; j++)
688
            B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
689
 
690
          /* And the constant.  */
691
          a[size] = LLE_CONSTANT (expression);
692
 
693
          /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b.  IE put all
694
             constants and single variables on   */
695
          A[size][i] = -1 * LLE_DENOMINATOR (expression);
696
          a[size] *= -1;
697
          for (j = 0; j < invariants; j++)
698
            B[size][j] *= -1;
699
 
700
          size++;
701
          /* Need to increase matrix sizes above.  */
702
          gcc_assert (size <= 127);
703
 
704
        }
705
 
706
      /* Then do the exact same thing for the upper bounds.  */
707
      if (LL_STEP (loop) > 0)
708
        expression = LL_UPPER_BOUND (loop);
709
      else
710
        expression = LL_LOWER_BOUND (loop);
711
 
712
      for (; expression != NULL; expression = LLE_NEXT (expression))
713
        {
714
          /* Fill in the coefficient.  */
715
          for (j = 0; j < i; j++)
716
            A[size][j] = LLE_COEFFICIENTS (expression)[j];
717
 
718
          /* And the invariant coefficient.  */
719
          for (j = 0; j < invariants; j++)
720
            B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
721
 
722
          /* And the constant.  */
723
          a[size] = LLE_CONSTANT (expression);
724
 
725
          /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b.  */
726
          for (j = 0; j < i; j++)
727
            A[size][j] *= -1;
728
          A[size][i] = LLE_DENOMINATOR (expression);
729
          size++;
730
          /* Need to increase matrix sizes above.  */
731
          gcc_assert (size <= 127);
732
 
733
        }
734
    }
735
 
736
  /* Compute the lattice base x = base * y + origin, where y is the
737
     base space.  */
738
  lattice = lambda_lattice_compute_base (nest, lambda_obstack);
739
 
740
  /* Ax <= a + B then becomes ALy <= a+B - A*origin.  L is the lattice base  */
741
 
742
  /* A1 = A * L */
743
  lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
744
 
745
  /* a1 = a - A * origin constant.  */
746
  lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
747
  lambda_vector_add_mc (a, 1, a1, -1, a1, size);
748
 
749
  /* B1 = B - A * origin invariant.  */
750
  lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
751
                      invariants);
752
  lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
753
 
754
  /* Now compute the auxiliary space bounds by first inverting U, multiplying
755
     it by A1, then performing Fourier-Motzkin.  */
756
 
757
  invertedtrans = lambda_matrix_new (depth, depth);
758
 
759
  /* Compute the inverse of U.  */
760
  lambda_matrix_inverse (LTM_MATRIX (trans),
761
                         invertedtrans, depth);
762
 
763
  /* A = A1 inv(U).  */
764
  lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
765
 
766
  return compute_nest_using_fourier_motzkin (size, depth, invariants,
767
                                             A, B1, a1, lambda_obstack);
768
}
769
 
770
/* Compute the loop bounds for the target space, using the bounds of
771
   the auxiliary nest AUXILLARY_NEST, and the triangular matrix H.
772
   The target space loop bounds are computed by multiplying the triangular
773
   matrix H by the auxiliary nest, to get the new loop bounds.  The sign of
774
   the loop steps (positive or negative) is then used to swap the bounds if
775
   the loop counts downwards.
776
   Return the target loopnest.  */
777
 
778
static lambda_loopnest
779
lambda_compute_target_space (lambda_loopnest auxillary_nest,
780
                             lambda_trans_matrix H, lambda_vector stepsigns,
781
                             struct obstack * lambda_obstack)
782
{
783
  lambda_matrix inverse, H1;
784
  int determinant, i, j;
785
  int gcd1, gcd2;
786
  int factor;
787
 
788
  lambda_loopnest target_nest;
789
  int depth, invariants;
790
  lambda_matrix target;
791
 
792
  lambda_loop auxillary_loop, target_loop;
793
  lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
794
 
795
  depth = LN_DEPTH (auxillary_nest);
796
  invariants = LN_INVARIANTS (auxillary_nest);
797
 
798
  inverse = lambda_matrix_new (depth, depth);
799
  determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth);
800
 
801
  /* H1 is H excluding its diagonal.  */
802
  H1 = lambda_matrix_new (depth, depth);
803
  lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
804
 
805
  for (i = 0; i < depth; i++)
806
    H1[i][i] = 0;
807
 
808
  /* Computes the linear offsets of the loop bounds.  */
809
  target = lambda_matrix_new (depth, depth);
810
  lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
811
 
812
  target_nest = lambda_loopnest_new (depth, invariants, lambda_obstack);
813
 
814
  for (i = 0; i < depth; i++)
815
    {
816
 
817
      /* Get a new loop structure.  */
818
      target_loop = lambda_loop_new ();
819
      LN_LOOPS (target_nest)[i] = target_loop;
820
 
821
      /* Computes the gcd of the coefficients of the linear part.  */
822
      gcd1 = lambda_vector_gcd (target[i], i);
823
 
824
      /* Include the denominator in the GCD.  */
825
      gcd1 = gcd (gcd1, determinant);
826
 
827
      /* Now divide through by the gcd.  */
828
      for (j = 0; j < i; j++)
829
        target[i][j] = target[i][j] / gcd1;
830
 
831
      expression = lambda_linear_expression_new (depth, invariants,
832
                                                 lambda_obstack);
833
      lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
834
      LLE_DENOMINATOR (expression) = determinant / gcd1;
835
      LLE_CONSTANT (expression) = 0;
836
      lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
837
                           invariants);
838
      LL_LINEAR_OFFSET (target_loop) = expression;
839
    }
840
 
841
  /* For each loop, compute the new bounds from H.  */
842
  for (i = 0; i < depth; i++)
843
    {
844
      auxillary_loop = LN_LOOPS (auxillary_nest)[i];
845
      target_loop = LN_LOOPS (target_nest)[i];
846
      LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
847
      factor = LTM_MATRIX (H)[i][i];
848
 
849
      /* First we do the lower bound.  */
850
      auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
851
 
852
      for (; auxillary_expr != NULL;
853
           auxillary_expr = LLE_NEXT (auxillary_expr))
854
        {
855
          target_expr = lambda_linear_expression_new (depth, invariants,
856
                                                      lambda_obstack);
857
          lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
858
                                     depth, inverse, depth,
859
                                     LLE_COEFFICIENTS (target_expr));
860
          lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
861
                                    LLE_COEFFICIENTS (target_expr), depth,
862
                                    factor);
863
 
864
          LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
865
          lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
866
                              LLE_INVARIANT_COEFFICIENTS (target_expr),
867
                              invariants);
868
          lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
869
                                    LLE_INVARIANT_COEFFICIENTS (target_expr),
870
                                    invariants, factor);
871
          LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
872
 
873
          if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
874
            {
875
              LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
876
                * determinant;
877
              lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
878
                                        (target_expr),
879
                                        LLE_INVARIANT_COEFFICIENTS
880
                                        (target_expr), invariants,
881
                                        determinant);
882
              LLE_DENOMINATOR (target_expr) =
883
                LLE_DENOMINATOR (target_expr) * determinant;
884
            }
885
          /* Find the gcd and divide by it here, rather than doing it
886
             at the tree level.  */
887
          gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
888
          gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
889
                                    invariants);
890
          gcd1 = gcd (gcd1, gcd2);
891
          gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
892
          gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
893
          for (j = 0; j < depth; j++)
894
            LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
895
          for (j = 0; j < invariants; j++)
896
            LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
897
          LLE_CONSTANT (target_expr) /= gcd1;
898
          LLE_DENOMINATOR (target_expr) /= gcd1;
899
          /* Ignore if identical to existing bound.  */
900
          if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
901
                          invariants))
902
            {
903
              LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
904
              LL_LOWER_BOUND (target_loop) = target_expr;
905
            }
906
        }
907
      /* Now do the upper bound.  */
908
      auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
909
 
910
      for (; auxillary_expr != NULL;
911
           auxillary_expr = LLE_NEXT (auxillary_expr))
912
        {
913
          target_expr = lambda_linear_expression_new (depth, invariants,
914
                                                      lambda_obstack);
915
          lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
916
                                     depth, inverse, depth,
917
                                     LLE_COEFFICIENTS (target_expr));
918
          lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
919
                                    LLE_COEFFICIENTS (target_expr), depth,
920
                                    factor);
921
          LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
922
          lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
923
                              LLE_INVARIANT_COEFFICIENTS (target_expr),
924
                              invariants);
925
          lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
926
                                    LLE_INVARIANT_COEFFICIENTS (target_expr),
927
                                    invariants, factor);
928
          LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
929
 
930
          if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
931
            {
932
              LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
933
                * determinant;
934
              lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
935
                                        (target_expr),
936
                                        LLE_INVARIANT_COEFFICIENTS
937
                                        (target_expr), invariants,
938
                                        determinant);
939
              LLE_DENOMINATOR (target_expr) =
940
                LLE_DENOMINATOR (target_expr) * determinant;
941
            }
942
          /* Find the gcd and divide by it here, instead of at the
943
             tree level.  */
944
          gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
945
          gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
946
                                    invariants);
947
          gcd1 = gcd (gcd1, gcd2);
948
          gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
949
          gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
950
          for (j = 0; j < depth; j++)
951
            LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
952
          for (j = 0; j < invariants; j++)
953
            LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
954
          LLE_CONSTANT (target_expr) /= gcd1;
955
          LLE_DENOMINATOR (target_expr) /= gcd1;
956
          /* Ignore if equal to existing bound.  */
957
          if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
958
                          invariants))
959
            {
960
              LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
961
              LL_UPPER_BOUND (target_loop) = target_expr;
962
            }
963
        }
964
    }
965
  for (i = 0; i < depth; i++)
966
    {
967
      target_loop = LN_LOOPS (target_nest)[i];
968
      /* If necessary, exchange the upper and lower bounds and negate
969
         the step size.  */
970
      if (stepsigns[i] < 0)
971
        {
972
          LL_STEP (target_loop) *= -1;
973
          tmp_expr = LL_LOWER_BOUND (target_loop);
974
          LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
975
          LL_UPPER_BOUND (target_loop) = tmp_expr;
976
        }
977
    }
978
  return target_nest;
979
}
980
 
981
/* Compute the step signs of TRANS, using TRANS and stepsigns.  Return the new
982
   result.  */
983
 
984
static lambda_vector
985
lambda_compute_step_signs (lambda_trans_matrix trans, lambda_vector stepsigns)
986
{
987
  lambda_matrix matrix, H;
988
  int size;
989
  lambda_vector newsteps;
990
  int i, j, factor, minimum_column;
991
  int temp;
992
 
993
  matrix = LTM_MATRIX (trans);
994
  size = LTM_ROWSIZE (trans);
995
  H = lambda_matrix_new (size, size);
996
 
997
  newsteps = lambda_vector_new (size);
998
  lambda_vector_copy (stepsigns, newsteps, size);
999
 
1000
  lambda_matrix_copy (matrix, H, size, size);
1001
 
1002
  for (j = 0; j < size; j++)
1003
    {
1004
      lambda_vector row;
1005
      row = H[j];
1006
      for (i = j; i < size; i++)
1007
        if (row[i] < 0)
1008
          lambda_matrix_col_negate (H, size, i);
1009
      while (lambda_vector_first_nz (row, size, j + 1) < size)
1010
        {
1011
          minimum_column = lambda_vector_min_nz (row, size, j);
1012
          lambda_matrix_col_exchange (H, size, j, minimum_column);
1013
 
1014
          temp = newsteps[j];
1015
          newsteps[j] = newsteps[minimum_column];
1016
          newsteps[minimum_column] = temp;
1017
 
1018
          for (i = j + 1; i < size; i++)
1019
            {
1020
              factor = row[i] / row[j];
1021
              lambda_matrix_col_add (H, size, j, i, -1 * factor);
1022
            }
1023
        }
1024
    }
1025
  return newsteps;
1026
}
1027
 
1028
/* Transform NEST according to TRANS, and return the new loopnest.
1029
   This involves
1030
   1. Computing a lattice base for the transformation
1031
   2. Composing the dense base with the specified transformation (TRANS)
1032
   3. Decomposing the combined transformation into a lower triangular portion,
1033
   and a unimodular portion.
1034
   4. Computing the auxiliary nest using the unimodular portion.
1035
   5. Computing the target nest using the auxiliary nest and the lower
1036
   triangular portion.  */
1037
 
1038
lambda_loopnest
1039
lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans,
1040
                           struct obstack * lambda_obstack)
1041
{
1042
  lambda_loopnest auxillary_nest, target_nest;
1043
 
1044
  int depth, invariants;
1045
  int i, j;
1046
  lambda_lattice lattice;
1047
  lambda_trans_matrix trans1, H, U;
1048
  lambda_loop loop;
1049
  lambda_linear_expression expression;
1050
  lambda_vector origin;
1051
  lambda_matrix origin_invariants;
1052
  lambda_vector stepsigns;
1053
  int f;
1054
 
1055
  depth = LN_DEPTH (nest);
1056
  invariants = LN_INVARIANTS (nest);
1057
 
1058
  /* Keep track of the signs of the loop steps.  */
1059
  stepsigns = lambda_vector_new (depth);
1060
  for (i = 0; i < depth; i++)
1061
    {
1062
      if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
1063
        stepsigns[i] = 1;
1064
      else
1065
        stepsigns[i] = -1;
1066
    }
1067
 
1068
  /* Compute the lattice base.  */
1069
  lattice = lambda_lattice_compute_base (nest, lambda_obstack);
1070
  trans1 = lambda_trans_matrix_new (depth, depth);
1071
 
1072
  /* Multiply the transformation matrix by the lattice base.  */
1073
 
1074
  lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
1075
                      LTM_MATRIX (trans1), depth, depth, depth);
1076
 
1077
  /* Compute the Hermite normal form for the new transformation matrix.  */
1078
  H = lambda_trans_matrix_new (depth, depth);
1079
  U = lambda_trans_matrix_new (depth, depth);
1080
  lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
1081
                         LTM_MATRIX (U));
1082
 
1083
  /* Compute the auxiliary loop nest's space from the unimodular
1084
     portion.  */
1085
  auxillary_nest = lambda_compute_auxillary_space (nest, U, lambda_obstack);
1086
 
1087
  /* Compute the loop step signs from the old step signs and the
1088
     transformation matrix.  */
1089
  stepsigns = lambda_compute_step_signs (trans1, stepsigns);
1090
 
1091
  /* Compute the target loop nest space from the auxiliary nest and
1092
     the lower triangular matrix H.  */
1093
  target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns,
1094
                                             lambda_obstack);
1095
  origin = lambda_vector_new (depth);
1096
  origin_invariants = lambda_matrix_new (depth, invariants);
1097
  lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
1098
                             LATTICE_ORIGIN (lattice), origin);
1099
  lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
1100
                      origin_invariants, depth, depth, invariants);
1101
 
1102
  for (i = 0; i < depth; i++)
1103
    {
1104
      loop = LN_LOOPS (target_nest)[i];
1105
      expression = LL_LINEAR_OFFSET (loop);
1106
      if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
1107
        f = 1;
1108
      else
1109
        f = LLE_DENOMINATOR (expression);
1110
 
1111
      LLE_CONSTANT (expression) += f * origin[i];
1112
 
1113
      for (j = 0; j < invariants; j++)
1114
        LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
1115
          f * origin_invariants[i][j];
1116
    }
1117
 
1118
  return target_nest;
1119
 
1120
}
1121
 
1122
/* Convert a gcc tree expression EXPR to a lambda linear expression, and
1123
   return the new expression.  DEPTH is the depth of the loopnest.
1124
   OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1125
   in this nest.  INVARIANTS is the array of invariants for the loop.  EXTRA
1126
   is the amount we have to add/subtract from the expression because of the
1127
   type of comparison it is used in.  */
1128
 
1129
static lambda_linear_expression
1130
gcc_tree_to_linear_expression (int depth, tree expr,
1131
                               VEC(tree,heap) *outerinductionvars,
1132
                               VEC(tree,heap) *invariants, int extra,
1133
                               struct obstack * lambda_obstack)
1134
{
1135
  lambda_linear_expression lle = NULL;
1136
  switch (TREE_CODE (expr))
1137
    {
1138
    case INTEGER_CST:
1139
      {
1140
        lle = lambda_linear_expression_new (depth, 2 * depth, lambda_obstack);
1141
        LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
1142
        if (extra != 0)
1143
          LLE_CONSTANT (lle) += extra;
1144
 
1145
        LLE_DENOMINATOR (lle) = 1;
1146
      }
1147
      break;
1148
    case SSA_NAME:
1149
      {
1150
        tree iv, invar;
1151
        size_t i;
1152
        for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++)
1153
          if (iv != NULL)
1154
            {
1155
              if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
1156
                {
1157
                  lle = lambda_linear_expression_new (depth, 2 * depth,
1158
                                                      lambda_obstack);
1159
                  LLE_COEFFICIENTS (lle)[i] = 1;
1160
                  if (extra != 0)
1161
                    LLE_CONSTANT (lle) = extra;
1162
 
1163
                  LLE_DENOMINATOR (lle) = 1;
1164
                }
1165
            }
1166
        for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1167
          if (invar != NULL)
1168
            {
1169
              if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
1170
                {
1171
                  lle = lambda_linear_expression_new (depth, 2 * depth,
1172
                                                      lambda_obstack);
1173
                  LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
1174
                  if (extra != 0)
1175
                    LLE_CONSTANT (lle) = extra;
1176
                  LLE_DENOMINATOR (lle) = 1;
1177
                }
1178
            }
1179
      }
1180
      break;
1181
    default:
1182
      return NULL;
1183
    }
1184
 
1185
  return lle;
1186
}
1187
 
1188
/* Return the depth of the loopnest NEST */
1189
 
1190
static int
1191
depth_of_nest (struct loop *nest)
1192
{
1193
  size_t depth = 0;
1194
  while (nest)
1195
    {
1196
      depth++;
1197
      nest = nest->inner;
1198
    }
1199
  return depth;
1200
}
1201
 
1202
 
1203
/* Return true if OP is invariant in LOOP and all outer loops.  */
1204
 
1205
static bool
1206
invariant_in_loop_and_outer_loops (struct loop *loop, tree op)
1207
{
1208
  if (is_gimple_min_invariant (op))
1209
    return true;
1210
  if (loop_depth (loop) == 0)
1211
    return true;
1212
  if (!expr_invariant_in_loop_p (loop, op))
1213
    return false;
1214
  if (!invariant_in_loop_and_outer_loops (loop_outer (loop), op))
1215
    return false;
1216
  return true;
1217
}
1218
 
1219
/* Generate a lambda loop from a gcc loop LOOP.  Return the new lambda loop,
1220
   or NULL if it could not be converted.
1221
   DEPTH is the depth of the loop.
1222
   INVARIANTS is a pointer to the array of loop invariants.
1223
   The induction variable for this loop should be stored in the parameter
1224
   OURINDUCTIONVAR.
1225
   OUTERINDUCTIONVARS is an array of induction variables for outer loops.  */
1226
 
1227
static lambda_loop
1228
gcc_loop_to_lambda_loop (struct loop *loop, int depth,
1229
                         VEC(tree,heap) ** invariants,
1230
                         tree * ourinductionvar,
1231
                         VEC(tree,heap) * outerinductionvars,
1232
                         VEC(tree,heap) ** lboundvars,
1233
                         VEC(tree,heap) ** uboundvars,
1234
                         VEC(int,heap) ** steps,
1235
                         struct obstack * lambda_obstack)
1236
{
1237
  gimple phi;
1238
  gimple exit_cond;
1239
  tree access_fn, inductionvar;
1240
  tree step;
1241
  lambda_loop lloop = NULL;
1242
  lambda_linear_expression lbound, ubound;
1243
  tree test_lhs, test_rhs;
1244
  int stepint;
1245
  int extra = 0;
1246
  tree lboundvar, uboundvar, uboundresult;
1247
 
1248
  /* Find out induction var and exit condition.  */
1249
  inductionvar = find_induction_var_from_exit_cond (loop);
1250
  exit_cond = get_loop_exit_condition (loop);
1251
 
1252
  if (inductionvar == NULL || exit_cond == NULL)
1253
    {
1254
      if (dump_file && (dump_flags & TDF_DETAILS))
1255
        fprintf (dump_file,
1256
                 "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1257
      return NULL;
1258
    }
1259
 
1260
  if (SSA_NAME_DEF_STMT (inductionvar) == NULL)
1261
    {
1262
 
1263
      if (dump_file && (dump_flags & TDF_DETAILS))
1264
        fprintf (dump_file,
1265
                 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1266
 
1267
      return NULL;
1268
    }
1269
 
1270
  phi = SSA_NAME_DEF_STMT (inductionvar);
1271
  if (gimple_code (phi) != GIMPLE_PHI)
1272
    {
1273
      tree op = SINGLE_SSA_TREE_OPERAND (phi, SSA_OP_USE);
1274
      if (!op)
1275
        {
1276
 
1277
          if (dump_file && (dump_flags & TDF_DETAILS))
1278
            fprintf (dump_file,
1279
                     "Unable to convert loop: Cannot find PHI node for induction variable\n");
1280
 
1281
          return NULL;
1282
        }
1283
 
1284
      phi = SSA_NAME_DEF_STMT (op);
1285
      if (gimple_code (phi) != GIMPLE_PHI)
1286
        {
1287
          if (dump_file && (dump_flags & TDF_DETAILS))
1288
            fprintf (dump_file,
1289
                     "Unable to convert loop: Cannot find PHI node for induction variable\n");
1290
          return NULL;
1291
        }
1292
    }
1293
 
1294
  /* The induction variable name/version we want to put in the array is the
1295
     result of the induction variable phi node.  */
1296
  *ourinductionvar = PHI_RESULT (phi);
1297
  access_fn = instantiate_parameters
1298
    (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1299
  if (access_fn == chrec_dont_know)
1300
    {
1301
      if (dump_file && (dump_flags & TDF_DETAILS))
1302
        fprintf (dump_file,
1303
                 "Unable to convert loop: Access function for induction variable phi is unknown\n");
1304
 
1305
      return NULL;
1306
    }
1307
 
1308
  step = evolution_part_in_loop_num (access_fn, loop->num);
1309
  if (!step || step == chrec_dont_know)
1310
    {
1311
      if (dump_file && (dump_flags & TDF_DETAILS))
1312
        fprintf (dump_file,
1313
                 "Unable to convert loop: Cannot determine step of loop.\n");
1314
 
1315
      return NULL;
1316
    }
1317
  if (TREE_CODE (step) != INTEGER_CST)
1318
    {
1319
 
1320
      if (dump_file && (dump_flags & TDF_DETAILS))
1321
        fprintf (dump_file,
1322
                 "Unable to convert loop: Step of loop is not integer.\n");
1323
      return NULL;
1324
    }
1325
 
1326
  stepint = TREE_INT_CST_LOW (step);
1327
 
1328
  /* Only want phis for induction vars, which will have two
1329
     arguments.  */
1330
  if (gimple_phi_num_args (phi) != 2)
1331
    {
1332
      if (dump_file && (dump_flags & TDF_DETAILS))
1333
        fprintf (dump_file,
1334
                 "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1335
      return NULL;
1336
    }
1337
 
1338
  /* Another induction variable check. One argument's source should be
1339
     in the loop, one outside the loop.  */
1340
  if (flow_bb_inside_loop_p (loop, gimple_phi_arg_edge (phi, 0)->src)
1341
      && flow_bb_inside_loop_p (loop, gimple_phi_arg_edge (phi, 1)->src))
1342
    {
1343
 
1344
      if (dump_file && (dump_flags & TDF_DETAILS))
1345
        fprintf (dump_file,
1346
                 "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1347
 
1348
      return NULL;
1349
    }
1350
 
1351
  if (flow_bb_inside_loop_p (loop, gimple_phi_arg_edge (phi, 0)->src))
1352
    {
1353
      lboundvar = PHI_ARG_DEF (phi, 1);
1354
      lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1355
                                              outerinductionvars, *invariants,
1356
                                              0, lambda_obstack);
1357
    }
1358
  else
1359
    {
1360
      lboundvar = PHI_ARG_DEF (phi, 0);
1361
      lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1362
                                              outerinductionvars, *invariants,
1363
                                              0, lambda_obstack);
1364
    }
1365
 
1366
  if (!lbound)
1367
    {
1368
 
1369
      if (dump_file && (dump_flags & TDF_DETAILS))
1370
        fprintf (dump_file,
1371
                 "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1372
 
1373
      return NULL;
1374
    }
1375
  /* One part of the test may be a loop invariant tree.  */
1376
  VEC_reserve (tree, heap, *invariants, 1);
1377
  test_lhs = gimple_cond_lhs (exit_cond);
1378
  test_rhs = gimple_cond_rhs (exit_cond);
1379
 
1380
  if (TREE_CODE (test_rhs) == SSA_NAME
1381
      && invariant_in_loop_and_outer_loops (loop, test_rhs))
1382
    VEC_quick_push (tree, *invariants, test_rhs);
1383
  else if (TREE_CODE (test_lhs) == SSA_NAME
1384
           && invariant_in_loop_and_outer_loops (loop, test_lhs))
1385
    VEC_quick_push (tree, *invariants, test_lhs);
1386
 
1387
  /* The non-induction variable part of the test is the upper bound variable.
1388
   */
1389
  if (test_lhs == inductionvar)
1390
    uboundvar = test_rhs;
1391
  else
1392
    uboundvar = test_lhs;
1393
 
1394
  /* We only size the vectors assuming we have, at max, 2 times as many
1395
     invariants as we do loops (one for each bound).
1396
     This is just an arbitrary number, but it has to be matched against the
1397
     code below.  */
1398
  gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
1399
 
1400
 
1401
  /* We might have some leftover.  */
1402
  if (gimple_cond_code (exit_cond) == LT_EXPR)
1403
    extra = -1 * stepint;
1404
  else if (gimple_cond_code (exit_cond) == NE_EXPR)
1405
    extra = -1 * stepint;
1406
  else if (gimple_cond_code (exit_cond) == GT_EXPR)
1407
    extra = -1 * stepint;
1408
  else if (gimple_cond_code (exit_cond) == EQ_EXPR)
1409
    extra = 1 * stepint;
1410
 
1411
  ubound = gcc_tree_to_linear_expression (depth, uboundvar,
1412
                                          outerinductionvars,
1413
                                          *invariants, extra, lambda_obstack);
1414
  uboundresult = build2 (PLUS_EXPR, TREE_TYPE (uboundvar), uboundvar,
1415
                         build_int_cst (TREE_TYPE (uboundvar), extra));
1416
  VEC_safe_push (tree, heap, *uboundvars, uboundresult);
1417
  VEC_safe_push (tree, heap, *lboundvars, lboundvar);
1418
  VEC_safe_push (int, heap, *steps, stepint);
1419
  if (!ubound)
1420
    {
1421
      if (dump_file && (dump_flags & TDF_DETAILS))
1422
        fprintf (dump_file,
1423
                 "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1424
      return NULL;
1425
    }
1426
 
1427
  lloop = lambda_loop_new ();
1428
  LL_STEP (lloop) = stepint;
1429
  LL_LOWER_BOUND (lloop) = lbound;
1430
  LL_UPPER_BOUND (lloop) = ubound;
1431
  return lloop;
1432
}
1433
 
1434
/* Given a LOOP, find the induction variable it is testing against in the exit
1435
   condition.  Return the induction variable if found, NULL otherwise.  */
1436
 
1437
tree
1438
find_induction_var_from_exit_cond (struct loop *loop)
1439
{
1440
  gimple expr = get_loop_exit_condition (loop);
1441
  tree ivarop;
1442
  tree test_lhs, test_rhs;
1443
  if (expr == NULL)
1444
    return NULL_TREE;
1445
  if (gimple_code (expr) != GIMPLE_COND)
1446
    return NULL_TREE;
1447
  test_lhs = gimple_cond_lhs (expr);
1448
  test_rhs = gimple_cond_rhs (expr);
1449
 
1450
  /* Find the side that is invariant in this loop. The ivar must be the other
1451
     side.  */
1452
 
1453
  if (expr_invariant_in_loop_p (loop, test_lhs))
1454
      ivarop = test_rhs;
1455
  else if (expr_invariant_in_loop_p (loop, test_rhs))
1456
      ivarop = test_lhs;
1457
  else
1458
    return NULL_TREE;
1459
 
1460
  if (TREE_CODE (ivarop) != SSA_NAME)
1461
    return NULL_TREE;
1462
  return ivarop;
1463
}
1464
 
1465
DEF_VEC_P(lambda_loop);
1466
DEF_VEC_ALLOC_P(lambda_loop,heap);
1467
 
1468
/* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1469
   Return the new loop nest.
1470
   INDUCTIONVARS is a pointer to an array of induction variables for the
1471
   loopnest that will be filled in during this process.
1472
   INVARIANTS is a pointer to an array of invariants that will be filled in
1473
   during this process.  */
1474
 
1475
lambda_loopnest
1476
gcc_loopnest_to_lambda_loopnest (struct loop *loop_nest,
1477
                                 VEC(tree,heap) **inductionvars,
1478
                                 VEC(tree,heap) **invariants,
1479
                                 struct obstack * lambda_obstack)
1480
{
1481
  lambda_loopnest ret = NULL;
1482
  struct loop *temp = loop_nest;
1483
  int depth = depth_of_nest (loop_nest);
1484
  size_t i;
1485
  VEC(lambda_loop,heap) *loops = NULL;
1486
  VEC(tree,heap) *uboundvars = NULL;
1487
  VEC(tree,heap) *lboundvars  = NULL;
1488
  VEC(int,heap) *steps = NULL;
1489
  lambda_loop newloop;
1490
  tree inductionvar = NULL;
1491
  bool perfect_nest = perfect_nest_p (loop_nest);
1492
 
1493
  if (!perfect_nest && !can_convert_to_perfect_nest (loop_nest))
1494
    goto fail;
1495
 
1496
  while (temp)
1497
    {
1498
      newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
1499
                                         &inductionvar, *inductionvars,
1500
                                         &lboundvars, &uboundvars,
1501
                                         &steps, lambda_obstack);
1502
      if (!newloop)
1503
        goto fail;
1504
 
1505
      VEC_safe_push (tree, heap, *inductionvars, inductionvar);
1506
      VEC_safe_push (lambda_loop, heap, loops, newloop);
1507
      temp = temp->inner;
1508
    }
1509
 
1510
  if (!perfect_nest)
1511
    {
1512
      if (!perfect_nestify (loop_nest, lboundvars, uboundvars, steps,
1513
                            *inductionvars))
1514
        {
1515
          if (dump_file)
1516
            fprintf (dump_file,
1517
                     "Not a perfect loop nest and couldn't convert to one.\n");
1518
          goto fail;
1519
        }
1520
      else if (dump_file)
1521
        fprintf (dump_file,
1522
                 "Successfully converted loop nest to perfect loop nest.\n");
1523
    }
1524
 
1525
  ret = lambda_loopnest_new (depth, 2 * depth, lambda_obstack);
1526
 
1527
  for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
1528
    LN_LOOPS (ret)[i] = newloop;
1529
 
1530
 fail:
1531
  VEC_free (lambda_loop, heap, loops);
1532
  VEC_free (tree, heap, uboundvars);
1533
  VEC_free (tree, heap, lboundvars);
1534
  VEC_free (int, heap, steps);
1535
 
1536
  return ret;
1537
}
1538
 
1539
/* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1540
   STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1541
   inserted for us are stored.  INDUCTION_VARS is the array of induction
1542
   variables for the loop this LBV is from.  TYPE is the tree type to use for
1543
   the variables and trees involved.  */
1544
 
1545
static tree
1546
lbv_to_gcc_expression (lambda_body_vector lbv,
1547
                       tree type, VEC(tree,heap) *induction_vars,
1548
                       gimple_seq *stmts_to_insert)
1549
{
1550
  int k;
1551
  tree resvar;
1552
  tree expr = build_linear_expr (type, LBV_COEFFICIENTS (lbv), induction_vars);
1553
 
1554
  k = LBV_DENOMINATOR (lbv);
1555
  gcc_assert (k != 0);
1556
  if (k != 1)
1557
    expr = fold_build2 (CEIL_DIV_EXPR, type, expr, build_int_cst (type, k));
1558
 
1559
  resvar = create_tmp_var (type, "lbvtmp");
1560
  add_referenced_var (resvar);
1561
  return force_gimple_operand (fold (expr), stmts_to_insert, true, resvar);
1562
}
1563
 
1564
/* Convert a linear expression from coefficient and constant form to a
1565
   gcc tree.
1566
   Return the tree that represents the final value of the expression.
1567
   LLE is the linear expression to convert.
1568
   OFFSET is the linear offset to apply to the expression.
1569
   TYPE is the tree type to use for the variables and math.
1570
   INDUCTION_VARS is a vector of induction variables for the loops.
1571
   INVARIANTS is a vector of the loop nest invariants.
1572
   WRAP specifies what tree code to wrap the results in, if there is more than
1573
   one (it is either MAX_EXPR, or MIN_EXPR).
1574
   STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1575
   statements that need to be inserted for the linear expression.  */
1576
 
1577
static tree
1578
lle_to_gcc_expression (lambda_linear_expression lle,
1579
                       lambda_linear_expression offset,
1580
                       tree type,
1581
                       VEC(tree,heap) *induction_vars,
1582
                       VEC(tree,heap) *invariants,
1583
                       enum tree_code wrap, gimple_seq *stmts_to_insert)
1584
{
1585
  int k;
1586
  tree resvar;
1587
  tree expr = NULL_TREE;
1588
  VEC(tree,heap) *results = NULL;
1589
 
1590
  gcc_assert (wrap == MAX_EXPR || wrap == MIN_EXPR);
1591
 
1592
  /* Build up the linear expressions.  */
1593
  for (; lle != NULL; lle = LLE_NEXT (lle))
1594
    {
1595
      expr = build_linear_expr (type, LLE_COEFFICIENTS (lle), induction_vars);
1596
      expr = fold_build2 (PLUS_EXPR, type, expr,
1597
                          build_linear_expr (type,
1598
                                             LLE_INVARIANT_COEFFICIENTS (lle),
1599
                                             invariants));
1600
 
1601
      k = LLE_CONSTANT (lle);
1602
      if (k)
1603
        expr = fold_build2 (PLUS_EXPR, type, expr, build_int_cst (type, k));
1604
 
1605
      k = LLE_CONSTANT (offset);
1606
      if (k)
1607
        expr = fold_build2 (PLUS_EXPR, type, expr, build_int_cst (type, k));
1608
 
1609
      k = LLE_DENOMINATOR (lle);
1610
      if (k != 1)
1611
        expr = fold_build2 (wrap == MAX_EXPR ? CEIL_DIV_EXPR : FLOOR_DIV_EXPR,
1612
                            type, expr, build_int_cst (type, k));
1613
 
1614
      expr = fold (expr);
1615
      VEC_safe_push (tree, heap, results, expr);
1616
    }
1617
 
1618
  gcc_assert (expr);
1619
 
1620
  /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR.  */
1621
  if (VEC_length (tree, results) > 1)
1622
    {
1623
      size_t i;
1624
      tree op;
1625
 
1626
      expr = VEC_index (tree, results, 0);
1627
      for (i = 1; VEC_iterate (tree, results, i, op); i++)
1628
        expr = fold_build2 (wrap, type, expr, op);
1629
    }
1630
 
1631
  VEC_free (tree, heap, results);
1632
 
1633
  resvar = create_tmp_var (type, "lletmp");
1634
  add_referenced_var (resvar);
1635
  return force_gimple_operand (fold (expr), stmts_to_insert, true, resvar);
1636
}
1637
 
1638
/* Remove the induction variable defined at IV_STMT.  */
1639
 
1640
void
1641
remove_iv (gimple iv_stmt)
1642
{
1643
  gimple_stmt_iterator si = gsi_for_stmt (iv_stmt);
1644
 
1645
  if (gimple_code (iv_stmt) == GIMPLE_PHI)
1646
    {
1647
      unsigned i;
1648
 
1649
      for (i = 0; i < gimple_phi_num_args (iv_stmt); i++)
1650
        {
1651
          gimple stmt;
1652
          imm_use_iterator imm_iter;
1653
          tree arg = gimple_phi_arg_def (iv_stmt, i);
1654
          bool used = false;
1655
 
1656
          if (TREE_CODE (arg) != SSA_NAME)
1657
            continue;
1658
 
1659
          FOR_EACH_IMM_USE_STMT (stmt, imm_iter, arg)
1660
            if (stmt != iv_stmt && !is_gimple_debug (stmt))
1661
              used = true;
1662
 
1663
          if (!used)
1664
            remove_iv (SSA_NAME_DEF_STMT (arg));
1665
        }
1666
 
1667
      remove_phi_node (&si, true);
1668
    }
1669
  else
1670
    {
1671
      gsi_remove (&si, true);
1672
      release_defs (iv_stmt);
1673
    }
1674
}
1675
 
1676
/* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1677
   it, back into gcc code.  This changes the
1678
   loops, their induction variables, and their bodies, so that they
1679
   match the transformed loopnest.
1680
   OLD_LOOPNEST is the loopnest before we've replaced it with the new
1681
   loopnest.
1682
   OLD_IVS is a vector of induction variables from the old loopnest.
1683
   INVARIANTS is a vector of loop invariants from the old loopnest.
1684
   NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1685
   TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1686
   NEW_LOOPNEST.  */
1687
 
1688
void
1689
lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
1690
                                 VEC(tree,heap) *old_ivs,
1691
                                 VEC(tree,heap) *invariants,
1692
                                 VEC(gimple,heap) **remove_ivs,
1693
                                 lambda_loopnest new_loopnest,
1694
                                 lambda_trans_matrix transform,
1695
                                 struct obstack * lambda_obstack)
1696
{
1697
  struct loop *temp;
1698
  size_t i = 0;
1699
  unsigned j;
1700
  size_t depth = 0;
1701
  VEC(tree,heap) *new_ivs = NULL;
1702
  tree oldiv;
1703
  gimple_stmt_iterator bsi;
1704
 
1705
  transform = lambda_trans_matrix_inverse (transform);
1706
 
1707
  if (dump_file)
1708
    {
1709
      fprintf (dump_file, "Inverse of transformation matrix:\n");
1710
      print_lambda_trans_matrix (dump_file, transform);
1711
    }
1712
  depth = depth_of_nest (old_loopnest);
1713
  temp = old_loopnest;
1714
 
1715
  while (temp)
1716
    {
1717
      lambda_loop newloop;
1718
      basic_block bb;
1719
      edge exit;
1720
      tree ivvar, ivvarinced;
1721
      gimple exitcond;
1722
      gimple_seq stmts;
1723
      enum tree_code testtype;
1724
      tree newupperbound, newlowerbound;
1725
      lambda_linear_expression offset;
1726
      tree type;
1727
      bool insert_after;
1728
      gimple inc_stmt;
1729
 
1730
      oldiv = VEC_index (tree, old_ivs, i);
1731
      type = TREE_TYPE (oldiv);
1732
 
1733
      /* First, build the new induction variable temporary  */
1734
 
1735
      ivvar = create_tmp_var (type, "lnivtmp");
1736
      add_referenced_var (ivvar);
1737
 
1738
      VEC_safe_push (tree, heap, new_ivs, ivvar);
1739
 
1740
      newloop = LN_LOOPS (new_loopnest)[i];
1741
 
1742
      /* Linear offset is a bit tricky to handle.  Punt on the unhandled
1743
         cases for now.  */
1744
      offset = LL_LINEAR_OFFSET (newloop);
1745
 
1746
      gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
1747
                  lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
1748
 
1749
      /* Now build the  new lower bounds, and insert the statements
1750
         necessary to generate it on the loop preheader.  */
1751
      stmts = NULL;
1752
      newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1753
                                             LL_LINEAR_OFFSET (newloop),
1754
                                             type,
1755
                                             new_ivs,
1756
                                             invariants, MAX_EXPR, &stmts);
1757
 
1758
      if (stmts)
1759
        {
1760
          gsi_insert_seq_on_edge (loop_preheader_edge (temp), stmts);
1761
          gsi_commit_edge_inserts ();
1762
        }
1763
      /* Build the new upper bound and insert its statements in the
1764
         basic block of the exit condition */
1765
      stmts = NULL;
1766
      newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
1767
                                             LL_LINEAR_OFFSET (newloop),
1768
                                             type,
1769
                                             new_ivs,
1770
                                             invariants, MIN_EXPR, &stmts);
1771
      exit = single_exit (temp);
1772
      exitcond = get_loop_exit_condition (temp);
1773
      bb = gimple_bb (exitcond);
1774
      bsi = gsi_after_labels (bb);
1775
      if (stmts)
1776
        gsi_insert_seq_before (&bsi, stmts, GSI_NEW_STMT);
1777
 
1778
      /* Create the new iv.  */
1779
 
1780
      standard_iv_increment_position (temp, &bsi, &insert_after);
1781
      create_iv (newlowerbound,
1782
                 build_int_cst (type, LL_STEP (newloop)),
1783
                 ivvar, temp, &bsi, insert_after, &ivvar,
1784
                 NULL);
1785
 
1786
      /* Unfortunately, the incremented ivvar that create_iv inserted may not
1787
         dominate the block containing the exit condition.
1788
         So we simply create our own incremented iv to use in the new exit
1789
         test,  and let redundancy elimination sort it out.  */
1790
      inc_stmt = gimple_build_assign_with_ops (PLUS_EXPR, SSA_NAME_VAR (ivvar),
1791
                                               ivvar,
1792
                                               build_int_cst (type, LL_STEP (newloop)));
1793
 
1794
      ivvarinced = make_ssa_name (SSA_NAME_VAR (ivvar), inc_stmt);
1795
      gimple_assign_set_lhs (inc_stmt, ivvarinced);
1796
      bsi = gsi_for_stmt (exitcond);
1797
      gsi_insert_before (&bsi, inc_stmt, GSI_SAME_STMT);
1798
 
1799
      /* Replace the exit condition with the new upper bound
1800
         comparison.  */
1801
 
1802
      testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
1803
 
1804
      /* We want to build a conditional where true means exit the loop, and
1805
         false means continue the loop.
1806
         So swap the testtype if this isn't the way things are.*/
1807
 
1808
      if (exit->flags & EDGE_FALSE_VALUE)
1809
        testtype = swap_tree_comparison (testtype);
1810
 
1811
      gimple_cond_set_condition (exitcond, testtype, newupperbound, ivvarinced);
1812
      update_stmt (exitcond);
1813
      VEC_replace (tree, new_ivs, i, ivvar);
1814
 
1815
      i++;
1816
      temp = temp->inner;
1817
    }
1818
 
1819
  /* Rewrite uses of the old ivs so that they are now specified in terms of
1820
     the new ivs.  */
1821
 
1822
  for (i = 0; VEC_iterate (tree, old_ivs, i, oldiv); i++)
1823
    {
1824
      imm_use_iterator imm_iter;
1825
      use_operand_p use_p;
1826
      tree oldiv_def;
1827
      gimple oldiv_stmt = SSA_NAME_DEF_STMT (oldiv);
1828
      gimple stmt;
1829
 
1830
      if (gimple_code (oldiv_stmt) == GIMPLE_PHI)
1831
        oldiv_def = PHI_RESULT (oldiv_stmt);
1832
      else
1833
        oldiv_def = SINGLE_SSA_TREE_OPERAND (oldiv_stmt, SSA_OP_DEF);
1834
      gcc_assert (oldiv_def != NULL_TREE);
1835
 
1836
      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, oldiv_def)
1837
        {
1838
          tree newiv;
1839
          gimple_seq stmts;
1840
          lambda_body_vector lbv, newlbv;
1841
 
1842
          if (is_gimple_debug (stmt))
1843
            continue;
1844
 
1845
          /* Compute the new expression for the induction
1846
             variable.  */
1847
          depth = VEC_length (tree, new_ivs);
1848
          lbv = lambda_body_vector_new (depth, lambda_obstack);
1849
          LBV_COEFFICIENTS (lbv)[i] = 1;
1850
 
1851
          newlbv = lambda_body_vector_compute_new (transform, lbv,
1852
                                                   lambda_obstack);
1853
 
1854
          stmts = NULL;
1855
          newiv = lbv_to_gcc_expression (newlbv, TREE_TYPE (oldiv),
1856
                                         new_ivs, &stmts);
1857
 
1858
          if (stmts && gimple_code (stmt) != GIMPLE_PHI)
1859
            {
1860
              bsi = gsi_for_stmt (stmt);
1861
              gsi_insert_seq_before (&bsi, stmts, GSI_SAME_STMT);
1862
            }
1863
 
1864
          FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1865
            propagate_value (use_p, newiv);
1866
 
1867
          if (stmts && gimple_code (stmt) == GIMPLE_PHI)
1868
            for (j = 0; j < gimple_phi_num_args (stmt); j++)
1869
              if (gimple_phi_arg_def (stmt, j) == newiv)
1870
                gsi_insert_seq_on_edge (gimple_phi_arg_edge (stmt, j), stmts);
1871
 
1872
          update_stmt (stmt);
1873
        }
1874
 
1875
      /* Remove the now unused induction variable.  */
1876
      VEC_safe_push (gimple, heap, *remove_ivs, oldiv_stmt);
1877
    }
1878
  VEC_free (tree, heap, new_ivs);
1879
}
1880
 
1881
/* Return TRUE if this is not interesting statement from the perspective of
1882
   determining if we have a perfect loop nest.  */
1883
 
1884
static bool
1885
not_interesting_stmt (gimple stmt)
1886
{
1887
  /* Note that COND_EXPR's aren't interesting because if they were exiting the
1888
     loop, we would have already failed the number of exits tests.  */
1889
  if (gimple_code (stmt) == GIMPLE_LABEL
1890
      || gimple_code (stmt) == GIMPLE_GOTO
1891
      || gimple_code (stmt) == GIMPLE_COND
1892
      || is_gimple_debug (stmt))
1893
    return true;
1894
  return false;
1895
}
1896
 
1897
/* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP.  */
1898
 
1899
static bool
1900
phi_loop_edge_uses_def (struct loop *loop, gimple phi, tree def)
1901
{
1902
  unsigned i;
1903
  for (i = 0; i < gimple_phi_num_args (phi); i++)
1904
    if (flow_bb_inside_loop_p (loop, gimple_phi_arg_edge (phi, i)->src))
1905
      if (PHI_ARG_DEF (phi, i) == def)
1906
        return true;
1907
  return false;
1908
}
1909
 
1910
/* Return TRUE if STMT is a use of PHI_RESULT.  */
1911
 
1912
static bool
1913
stmt_uses_phi_result (gimple stmt, tree phi_result)
1914
{
1915
  tree use = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1916
 
1917
  /* This is conservatively true, because we only want SIMPLE bumpers
1918
     of the form x +- constant for our pass.  */
1919
  return (use == phi_result);
1920
}
1921
 
1922
/* STMT is a bumper stmt for LOOP if the version it defines is used in the
1923
   in-loop-edge in a phi node, and the operand it uses is the result of that
1924
   phi node.
1925
   I.E. i_29 = i_3 + 1
1926
        i_3 = PHI (0, i_29);  */
1927
 
1928
static bool
1929
stmt_is_bumper_for_loop (struct loop *loop, gimple stmt)
1930
{
1931
  gimple use;
1932
  tree def;
1933
  imm_use_iterator iter;
1934
  use_operand_p use_p;
1935
 
1936
  def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
1937
  if (!def)
1938
    return false;
1939
 
1940
  FOR_EACH_IMM_USE_FAST (use_p, iter, def)
1941
    {
1942
      use = USE_STMT (use_p);
1943
      if (gimple_code (use) == GIMPLE_PHI)
1944
        {
1945
          if (phi_loop_edge_uses_def (loop, use, def))
1946
            if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
1947
              return true;
1948
        }
1949
    }
1950
  return false;
1951
}
1952
 
1953
 
1954
/* Return true if LOOP is a perfect loop nest.
1955
   Perfect loop nests are those loop nests where all code occurs in the
1956
   innermost loop body.
1957
   If S is a program statement, then
1958
 
1959
   i.e.
1960
   DO I = 1, 20
1961
       S1
1962
       DO J = 1, 20
1963
       ...
1964
       END DO
1965
   END DO
1966
   is not a perfect loop nest because of S1.
1967
 
1968
   DO I = 1, 20
1969
      DO J = 1, 20
1970
        S1
1971
        ...
1972
      END DO
1973
   END DO
1974
   is a perfect loop nest.
1975
 
1976
   Since we don't have high level loops anymore, we basically have to walk our
1977
   statements and ignore those that are there because the loop needs them (IE
1978
   the induction variable increment, and jump back to the top of the loop).  */
1979
 
1980
bool
1981
perfect_nest_p (struct loop *loop)
1982
{
1983
  basic_block *bbs;
1984
  size_t i;
1985
  gimple exit_cond;
1986
 
1987
  /* Loops at depth 0 are perfect nests.  */
1988
  if (!loop->inner)
1989
    return true;
1990
 
1991
  bbs = get_loop_body (loop);
1992
  exit_cond = get_loop_exit_condition (loop);
1993
 
1994
  for (i = 0; i < loop->num_nodes; i++)
1995
    {
1996
      if (bbs[i]->loop_father == loop)
1997
        {
1998
          gimple_stmt_iterator bsi;
1999
 
2000
          for (bsi = gsi_start_bb (bbs[i]); !gsi_end_p (bsi); gsi_next (&bsi))
2001
            {
2002
              gimple stmt = gsi_stmt (bsi);
2003
 
2004
              if (gimple_code (stmt) == GIMPLE_COND
2005
                  && exit_cond != stmt)
2006
                goto non_perfectly_nested;
2007
 
2008
              if (stmt == exit_cond
2009
                  || not_interesting_stmt (stmt)
2010
                  || stmt_is_bumper_for_loop (loop, stmt))
2011
                continue;
2012
 
2013
            non_perfectly_nested:
2014
              free (bbs);
2015
              return false;
2016
            }
2017
        }
2018
    }
2019
 
2020
  free (bbs);
2021
 
2022
  return perfect_nest_p (loop->inner);
2023
}
2024
 
2025
/* Replace the USES of X in STMT, or uses with the same step as X with Y.
2026
   YINIT is the initial value of Y, REPLACEMENTS is a hash table to
2027
   avoid creating duplicate temporaries and FIRSTBSI is statement
2028
   iterator where new temporaries should be inserted at the beginning
2029
   of body basic block.  */
2030
 
2031
static void
2032
replace_uses_equiv_to_x_with_y (struct loop *loop, gimple stmt, tree x,
2033
                                int xstep, tree y, tree yinit,
2034
                                htab_t replacements,
2035
                                gimple_stmt_iterator *firstbsi)
2036
{
2037
  ssa_op_iter iter;
2038
  use_operand_p use_p;
2039
 
2040
  FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
2041
    {
2042
      tree use = USE_FROM_PTR (use_p);
2043
      tree step = NULL_TREE;
2044
      tree scev, init, val, var;
2045
      gimple setstmt;
2046
      struct tree_map *h, in;
2047
      void **loc;
2048
 
2049
      /* Replace uses of X with Y right away.  */
2050
      if (use == x)
2051
        {
2052
          SET_USE (use_p, y);
2053
          continue;
2054
        }
2055
 
2056
      scev = instantiate_parameters (loop,
2057
                                     analyze_scalar_evolution (loop, use));
2058
 
2059
      if (scev == NULL || scev == chrec_dont_know)
2060
        continue;
2061
 
2062
      step = evolution_part_in_loop_num (scev, loop->num);
2063
      if (step == NULL
2064
          || step == chrec_dont_know
2065
          || TREE_CODE (step) != INTEGER_CST
2066
          || int_cst_value (step) != xstep)
2067
        continue;
2068
 
2069
      /* Use REPLACEMENTS hash table to cache already created
2070
         temporaries.  */
2071
      in.hash = htab_hash_pointer (use);
2072
      in.base.from = use;
2073
      h = (struct tree_map *) htab_find_with_hash (replacements, &in, in.hash);
2074
      if (h != NULL)
2075
        {
2076
          SET_USE (use_p, h->to);
2077
          continue;
2078
        }
2079
 
2080
      /* USE which has the same step as X should be replaced
2081
         with a temporary set to Y + YINIT - INIT.  */
2082
      init = initial_condition_in_loop_num (scev, loop->num);
2083
      gcc_assert (init != NULL && init != chrec_dont_know);
2084
      if (TREE_TYPE (use) == TREE_TYPE (y))
2085
        {
2086
          val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), init, yinit);
2087
          val = fold_build2 (PLUS_EXPR, TREE_TYPE (y), y, val);
2088
          if (val == y)
2089
            {
2090
              /* If X has the same type as USE, the same step
2091
                 and same initial value, it can be replaced by Y.  */
2092
              SET_USE (use_p, y);
2093
              continue;
2094
            }
2095
        }
2096
      else
2097
        {
2098
          val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), y, yinit);
2099
          val = fold_convert (TREE_TYPE (use), val);
2100
          val = fold_build2 (PLUS_EXPR, TREE_TYPE (use), val, init);
2101
        }
2102
 
2103
      /* Create a temporary variable and insert it at the beginning
2104
         of the loop body basic block, right after the PHI node
2105
         which sets Y.  */
2106
      var = create_tmp_var (TREE_TYPE (use), "perfecttmp");
2107
      add_referenced_var (var);
2108
      val = force_gimple_operand_gsi (firstbsi, val, false, NULL,
2109
                                      true, GSI_SAME_STMT);
2110
      setstmt = gimple_build_assign (var, val);
2111
      var = make_ssa_name (var, setstmt);
2112
      gimple_assign_set_lhs (setstmt, var);
2113
      gsi_insert_before (firstbsi, setstmt, GSI_SAME_STMT);
2114
      update_stmt (setstmt);
2115
      SET_USE (use_p, var);
2116
      h = GGC_NEW (struct tree_map);
2117
      h->hash = in.hash;
2118
      h->base.from = use;
2119
      h->to = var;
2120
      loc = htab_find_slot_with_hash (replacements, h, in.hash, INSERT);
2121
      gcc_assert ((*(struct tree_map **)loc) == NULL);
2122
      *(struct tree_map **) loc = h;
2123
    }
2124
}
2125
 
2126
/* Return true if STMT is an exit PHI for LOOP */
2127
 
2128
static bool
2129
exit_phi_for_loop_p (struct loop *loop, gimple stmt)
2130
{
2131
  if (gimple_code (stmt) != GIMPLE_PHI
2132
      || gimple_phi_num_args (stmt) != 1
2133
      || gimple_bb (stmt) != single_exit (loop)->dest)
2134
    return false;
2135
 
2136
  return true;
2137
}
2138
 
2139
/* Return true if STMT can be put back into the loop INNER, by
2140
   copying it to the beginning of that loop and changing the uses.  */
2141
 
2142
static bool
2143
can_put_in_inner_loop (struct loop *inner, gimple stmt)
2144
{
2145
  imm_use_iterator imm_iter;
2146
  use_operand_p use_p;
2147
 
2148
  gcc_assert (is_gimple_assign (stmt));
2149
  if (gimple_vuse (stmt)
2150
      || !stmt_invariant_in_loop_p (inner, stmt))
2151
    return false;
2152
 
2153
  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_assign_lhs (stmt))
2154
    {
2155
      if (!exit_phi_for_loop_p (inner, USE_STMT (use_p)))
2156
        {
2157
          basic_block immbb = gimple_bb (USE_STMT (use_p));
2158
 
2159
          if (!flow_bb_inside_loop_p (inner, immbb))
2160
            return false;
2161
        }
2162
    }
2163
  return true;
2164
}
2165
 
2166
/* Return true if STMT can be put *after* the inner loop of LOOP.  */
2167
 
2168
static bool
2169
can_put_after_inner_loop (struct loop *loop, gimple stmt)
2170
{
2171
  imm_use_iterator imm_iter;
2172
  use_operand_p use_p;
2173
 
2174
  if (gimple_vuse (stmt))
2175
    return false;
2176
 
2177
  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_assign_lhs (stmt))
2178
    {
2179
      if (!exit_phi_for_loop_p (loop, USE_STMT (use_p)))
2180
        {
2181
          basic_block immbb = gimple_bb (USE_STMT (use_p));
2182
 
2183
          if (!dominated_by_p (CDI_DOMINATORS,
2184
                               immbb,
2185
                               loop->inner->header)
2186
              && !can_put_in_inner_loop (loop->inner, stmt))
2187
            return false;
2188
        }
2189
    }
2190
  return true;
2191
}
2192
 
2193
/* Return true when the induction variable IV is simple enough to be
2194
   re-synthesized.  */
2195
 
2196
static bool
2197
can_duplicate_iv (tree iv, struct loop *loop)
2198
{
2199
  tree scev = instantiate_parameters
2200
    (loop, analyze_scalar_evolution (loop, iv));
2201
 
2202
  if (!automatically_generated_chrec_p (scev))
2203
    {
2204
      tree step = evolution_part_in_loop_num (scev, loop->num);
2205
 
2206
      if (step && step != chrec_dont_know && TREE_CODE (step) == INTEGER_CST)
2207
        return true;
2208
    }
2209
 
2210
  return false;
2211
}
2212
 
2213
/* If this is a scalar operation that can be put back into the inner
2214
   loop, or after the inner loop, through copying, then do so. This
2215
   works on the theory that any amount of scalar code we have to
2216
   reduplicate into or after the loops is less expensive that the win
2217
   we get from rearranging the memory walk the loop is doing so that
2218
   it has better cache behavior.  */
2219
 
2220
static bool
2221
cannot_convert_modify_to_perfect_nest (gimple stmt, struct loop *loop)
2222
{
2223
  use_operand_p use_a, use_b;
2224
  imm_use_iterator imm_iter;
2225
  ssa_op_iter op_iter, op_iter1;
2226
  tree op0 = gimple_assign_lhs (stmt);
2227
 
2228
  /* The statement should not define a variable used in the inner
2229
     loop.  */
2230
  if (TREE_CODE (op0) == SSA_NAME
2231
      && !can_duplicate_iv (op0, loop))
2232
    FOR_EACH_IMM_USE_FAST (use_a, imm_iter, op0)
2233
      if (gimple_bb (USE_STMT (use_a))->loop_father == loop->inner)
2234
        return true;
2235
 
2236
  FOR_EACH_SSA_USE_OPERAND (use_a, stmt, op_iter, SSA_OP_USE)
2237
    {
2238
      gimple node;
2239
      tree op = USE_FROM_PTR (use_a);
2240
 
2241
      /* The variables should not be used in both loops.  */
2242
      if (!can_duplicate_iv (op, loop))
2243
        FOR_EACH_IMM_USE_FAST (use_b, imm_iter, op)
2244
          if (gimple_bb (USE_STMT (use_b))->loop_father == loop->inner)
2245
            return true;
2246
 
2247
      /* The statement should not use the value of a scalar that was
2248
         modified in the loop.  */
2249
      node = SSA_NAME_DEF_STMT (op);
2250
      if (gimple_code (node) == GIMPLE_PHI)
2251
        FOR_EACH_PHI_ARG (use_b, node, op_iter1, SSA_OP_USE)
2252
          {
2253
            tree arg = USE_FROM_PTR (use_b);
2254
 
2255
            if (TREE_CODE (arg) == SSA_NAME)
2256
              {
2257
                gimple arg_stmt = SSA_NAME_DEF_STMT (arg);
2258
 
2259
                if (gimple_bb (arg_stmt)
2260
                    && (gimple_bb (arg_stmt)->loop_father == loop->inner))
2261
                  return true;
2262
              }
2263
          }
2264
    }
2265
 
2266
  return false;
2267
}
2268
/* Return true when BB contains statements that can harm the transform
2269
   to a perfect loop nest.  */
2270
 
2271
static bool
2272
cannot_convert_bb_to_perfect_nest (basic_block bb, struct loop *loop)
2273
{
2274
  gimple_stmt_iterator bsi;
2275
  gimple exit_condition = get_loop_exit_condition (loop);
2276
 
2277
  for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2278
    {
2279
      gimple stmt = gsi_stmt (bsi);
2280
 
2281
      if (stmt == exit_condition
2282
          || not_interesting_stmt (stmt)
2283
          || stmt_is_bumper_for_loop (loop, stmt))
2284
        continue;
2285
 
2286
      if (is_gimple_assign (stmt))
2287
        {
2288
          if (cannot_convert_modify_to_perfect_nest (stmt, loop))
2289
            return true;
2290
 
2291
          if (can_duplicate_iv (gimple_assign_lhs (stmt), loop))
2292
            continue;
2293
 
2294
          if (can_put_in_inner_loop (loop->inner, stmt)
2295
              || can_put_after_inner_loop (loop, stmt))
2296
            continue;
2297
        }
2298
 
2299
      /* If the bb of a statement we care about isn't dominated by the
2300
         header of the inner loop, then we can't handle this case
2301
         right now.  This test ensures that the statement comes
2302
         completely *after* the inner loop.  */
2303
      if (!dominated_by_p (CDI_DOMINATORS,
2304
                           gimple_bb (stmt),
2305
                           loop->inner->header))
2306
        return true;
2307
    }
2308
 
2309
  return false;
2310
}
2311
 
2312
 
2313
/* Return TRUE if LOOP is an imperfect nest that we can convert to a
2314
   perfect one.  At the moment, we only handle imperfect nests of
2315
   depth 2, where all of the statements occur after the inner loop.  */
2316
 
2317
static bool
2318
can_convert_to_perfect_nest (struct loop *loop)
2319
{
2320
  basic_block *bbs;
2321
  size_t i;
2322
  gimple_stmt_iterator si;
2323
 
2324
  /* Can't handle triply nested+ loops yet.  */
2325
  if (!loop->inner || loop->inner->inner)
2326
    return false;
2327
 
2328
  bbs = get_loop_body (loop);
2329
  for (i = 0; i < loop->num_nodes; i++)
2330
    if (bbs[i]->loop_father == loop
2331
        && cannot_convert_bb_to_perfect_nest (bbs[i], loop))
2332
      goto fail;
2333
 
2334
  /* We also need to make sure the loop exit only has simple copy phis in it,
2335
     otherwise we don't know how to transform it into a perfect nest.  */
2336
  for (si = gsi_start_phis (single_exit (loop)->dest);
2337
       !gsi_end_p (si);
2338
       gsi_next (&si))
2339
    if (gimple_phi_num_args (gsi_stmt (si)) != 1)
2340
      goto fail;
2341
 
2342
  free (bbs);
2343
  return true;
2344
 
2345
 fail:
2346
  free (bbs);
2347
  return false;
2348
}
2349
 
2350
 
2351
DEF_VEC_I(source_location);
2352
DEF_VEC_ALLOC_I(source_location,heap);
2353
 
2354
/* Transform the loop nest into a perfect nest, if possible.
2355
   LOOP is the loop nest to transform into a perfect nest
2356
   LBOUNDS are the lower bounds for the loops to transform
2357
   UBOUNDS are the upper bounds for the loops to transform
2358
   STEPS is the STEPS for the loops to transform.
2359
   LOOPIVS is the induction variables for the loops to transform.
2360
 
2361
   Basically, for the case of
2362
 
2363
   FOR (i = 0; i < 50; i++)
2364
    {
2365
     FOR (j =0; j < 50; j++)
2366
     {
2367
        <whatever>
2368
     }
2369
     <some code>
2370
    }
2371
 
2372
   This function will transform it into a perfect loop nest by splitting the
2373
   outer loop into two loops, like so:
2374
 
2375
   FOR (i = 0; i < 50; i++)
2376
   {
2377
     FOR (j = 0; j < 50; j++)
2378
     {
2379
         <whatever>
2380
     }
2381
   }
2382
 
2383
   FOR (i = 0; i < 50; i ++)
2384
   {
2385
    <some code>
2386
   }
2387
 
2388
   Return FALSE if we can't make this loop into a perfect nest.  */
2389
 
2390
static bool
2391
perfect_nestify (struct loop *loop,
2392
                 VEC(tree,heap) *lbounds,
2393
                 VEC(tree,heap) *ubounds,
2394
                 VEC(int,heap) *steps,
2395
                 VEC(tree,heap) *loopivs)
2396
{
2397
  basic_block *bbs;
2398
  gimple exit_condition;
2399
  gimple cond_stmt;
2400
  basic_block preheaderbb, headerbb, bodybb, latchbb, olddest;
2401
  int i;
2402
  gimple_stmt_iterator bsi, firstbsi;
2403
  bool insert_after;
2404
  edge e;
2405
  struct loop *newloop;
2406
  gimple phi;
2407
  tree uboundvar;
2408
  gimple stmt;
2409
  tree oldivvar, ivvar, ivvarinced;
2410
  VEC(tree,heap) *phis = NULL;
2411
  VEC(source_location,heap) *locations = NULL;
2412
  htab_t replacements = NULL;
2413
 
2414
  /* Create the new loop.  */
2415
  olddest = single_exit (loop)->dest;
2416
  preheaderbb = split_edge (single_exit (loop));
2417
  headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2418
 
2419
  /* Push the exit phi nodes that we are moving.  */
2420
  for (bsi = gsi_start_phis (olddest); !gsi_end_p (bsi); gsi_next (&bsi))
2421
    {
2422
      phi = gsi_stmt (bsi);
2423
      VEC_reserve (tree, heap, phis, 2);
2424
      VEC_reserve (source_location, heap, locations, 1);
2425
      VEC_quick_push (tree, phis, PHI_RESULT (phi));
2426
      VEC_quick_push (tree, phis, PHI_ARG_DEF (phi, 0));
2427
      VEC_quick_push (source_location, locations,
2428
                      gimple_phi_arg_location (phi, 0));
2429
    }
2430
  e = redirect_edge_and_branch (single_succ_edge (preheaderbb), headerbb);
2431
 
2432
  /* Remove the exit phis from the old basic block.  */
2433
  for (bsi = gsi_start_phis (olddest); !gsi_end_p (bsi); )
2434
    remove_phi_node (&bsi, false);
2435
 
2436
  /* and add them back to the new basic block.  */
2437
  while (VEC_length (tree, phis) != 0)
2438
    {
2439
      tree def;
2440
      tree phiname;
2441
      source_location locus;
2442
      def = VEC_pop (tree, phis);
2443
      phiname = VEC_pop (tree, phis);
2444
      locus = VEC_pop (source_location, locations);
2445
      phi = create_phi_node (phiname, preheaderbb);
2446
      add_phi_arg (phi, def, single_pred_edge (preheaderbb), locus);
2447
    }
2448
  flush_pending_stmts (e);
2449
  VEC_free (tree, heap, phis);
2450
 
2451
  bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2452
  latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2453
  make_edge (headerbb, bodybb, EDGE_FALLTHRU);
2454
  cond_stmt = gimple_build_cond (NE_EXPR, integer_one_node, integer_zero_node,
2455
                                 NULL_TREE, NULL_TREE);
2456
  bsi = gsi_start_bb (bodybb);
2457
  gsi_insert_after (&bsi, cond_stmt, GSI_NEW_STMT);
2458
  e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
2459
  make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
2460
  make_edge (latchbb, headerbb, EDGE_FALLTHRU);
2461
 
2462
  /* Update the loop structures.  */
2463
  newloop = duplicate_loop (loop, olddest->loop_father);
2464
  newloop->header = headerbb;
2465
  newloop->latch = latchbb;
2466
  add_bb_to_loop (latchbb, newloop);
2467
  add_bb_to_loop (bodybb, newloop);
2468
  add_bb_to_loop (headerbb, newloop);
2469
  set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
2470
  set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
2471
  set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
2472
                           single_exit (loop)->src);
2473
  set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
2474
  set_immediate_dominator (CDI_DOMINATORS, olddest,
2475
                           recompute_dominator (CDI_DOMINATORS, olddest));
2476
  /* Create the new iv.  */
2477
  oldivvar = VEC_index (tree, loopivs, 0);
2478
  ivvar = create_tmp_var (TREE_TYPE (oldivvar), "perfectiv");
2479
  add_referenced_var (ivvar);
2480
  standard_iv_increment_position (newloop, &bsi, &insert_after);
2481
  create_iv (VEC_index (tree, lbounds, 0),
2482
             build_int_cst (TREE_TYPE (oldivvar), VEC_index (int, steps, 0)),
2483
             ivvar, newloop, &bsi, insert_after, &ivvar, &ivvarinced);
2484
 
2485
  /* Create the new upper bound.  This may be not just a variable, so we copy
2486
     it to one just in case.  */
2487
 
2488
  exit_condition = get_loop_exit_condition (newloop);
2489
  uboundvar = create_tmp_var (TREE_TYPE (VEC_index (tree, ubounds, 0)),
2490
                              "uboundvar");
2491
  add_referenced_var (uboundvar);
2492
  stmt = gimple_build_assign (uboundvar, VEC_index (tree, ubounds, 0));
2493
  uboundvar = make_ssa_name (uboundvar, stmt);
2494
  gimple_assign_set_lhs (stmt, uboundvar);
2495
 
2496
  if (insert_after)
2497
    gsi_insert_after (&bsi, stmt, GSI_SAME_STMT);
2498
  else
2499
    gsi_insert_before (&bsi, stmt, GSI_SAME_STMT);
2500
  update_stmt (stmt);
2501
  gimple_cond_set_condition (exit_condition, GE_EXPR, uboundvar, ivvarinced);
2502
  update_stmt (exit_condition);
2503
  replacements = htab_create_ggc (20, tree_map_hash,
2504
                                  tree_map_eq, NULL);
2505
  bbs = get_loop_body_in_dom_order (loop);
2506
  /* Now move the statements, and replace the induction variable in the moved
2507
     statements with the correct loop induction variable.  */
2508
  oldivvar = VEC_index (tree, loopivs, 0);
2509
  firstbsi = gsi_start_bb (bodybb);
2510
  for (i = loop->num_nodes - 1; i >= 0 ; i--)
2511
    {
2512
      gimple_stmt_iterator tobsi = gsi_last_bb (bodybb);
2513
      if (bbs[i]->loop_father == loop)
2514
        {
2515
          /* If this is true, we are *before* the inner loop.
2516
             If this isn't true, we are *after* it.
2517
 
2518
             The only time can_convert_to_perfect_nest returns true when we
2519
             have statements before the inner loop is if they can be moved
2520
             into the inner loop.
2521
 
2522
             The only time can_convert_to_perfect_nest returns true when we
2523
             have statements after the inner loop is if they can be moved into
2524
             the new split loop.  */
2525
 
2526
          if (dominated_by_p (CDI_DOMINATORS, loop->inner->header, bbs[i]))
2527
            {
2528
              gimple_stmt_iterator header_bsi
2529
                = gsi_after_labels (loop->inner->header);
2530
 
2531
              for (bsi = gsi_start_bb (bbs[i]); !gsi_end_p (bsi);)
2532
                {
2533
                  gimple stmt = gsi_stmt (bsi);
2534
 
2535
                  if (stmt == exit_condition
2536
                      || not_interesting_stmt (stmt)
2537
                      || stmt_is_bumper_for_loop (loop, stmt))
2538
                    {
2539
                      gsi_next (&bsi);
2540
                      continue;
2541
                    }
2542
 
2543
                  gsi_move_before (&bsi, &header_bsi);
2544
                }
2545
            }
2546
          else
2547
            {
2548
              /* Note that the bsi only needs to be explicitly incremented
2549
                 when we don't move something, since it is automatically
2550
                 incremented when we do.  */
2551
              for (bsi = gsi_start_bb (bbs[i]); !gsi_end_p (bsi);)
2552
                {
2553
                  gimple stmt = gsi_stmt (bsi);
2554
 
2555
                  if (stmt == exit_condition
2556
                      || not_interesting_stmt (stmt)
2557
                      || stmt_is_bumper_for_loop (loop, stmt))
2558
                    {
2559
                      gsi_next (&bsi);
2560
                      continue;
2561
                    }
2562
 
2563
                  replace_uses_equiv_to_x_with_y
2564
                    (loop, stmt, oldivvar, VEC_index (int, steps, 0), ivvar,
2565
                     VEC_index (tree, lbounds, 0), replacements, &firstbsi);
2566
 
2567
                  gsi_move_before (&bsi, &tobsi);
2568
 
2569
                  /* If the statement has any virtual operands, they may
2570
                     need to be rewired because the original loop may
2571
                     still reference them.  */
2572
                  if (gimple_vuse (stmt))
2573
                    mark_sym_for_renaming (gimple_vop (cfun));
2574
                }
2575
            }
2576
 
2577
        }
2578
    }
2579
 
2580
  free (bbs);
2581
  htab_delete (replacements);
2582
  return perfect_nest_p (loop);
2583
}
2584
 
2585
/* Return true if TRANS is a legal transformation matrix that respects
2586
   the dependence vectors in DISTS and DIRS.  The conservative answer
2587
   is false.
2588
 
2589
   "Wolfe proves that a unimodular transformation represented by the
2590
   matrix T is legal when applied to a loop nest with a set of
2591
   lexicographically non-negative distance vectors RDG if and only if
2592
   for each vector d in RDG, (T.d >= 0) is lexicographically positive.
2593
   i.e.: if and only if it transforms the lexicographically positive
2594
   distance vectors to lexicographically positive vectors.  Note that
2595
   a unimodular matrix must transform the zero vector (and only it) to
2596
   the zero vector." S.Muchnick.  */
2597
 
2598
bool
2599
lambda_transform_legal_p (lambda_trans_matrix trans,
2600
                          int nb_loops,
2601
                          VEC (ddr_p, heap) *dependence_relations)
2602
{
2603
  unsigned int i, j;
2604
  lambda_vector distres;
2605
  struct data_dependence_relation *ddr;
2606
 
2607
  gcc_assert (LTM_COLSIZE (trans) == nb_loops
2608
              && LTM_ROWSIZE (trans) == nb_loops);
2609
 
2610
  /* When there are no dependences, the transformation is correct.  */
2611
  if (VEC_length (ddr_p, dependence_relations) == 0)
2612
    return true;
2613
 
2614
  ddr = VEC_index (ddr_p, dependence_relations, 0);
2615
  if (ddr == NULL)
2616
    return true;
2617
 
2618
  /* When there is an unknown relation in the dependence_relations, we
2619
     know that it is no worth looking at this loop nest: give up.  */
2620
  if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2621
    return false;
2622
 
2623
  distres = lambda_vector_new (nb_loops);
2624
 
2625
  /* For each distance vector in the dependence graph.  */
2626
  for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
2627
    {
2628
      /* Don't care about relations for which we know that there is no
2629
         dependence, nor about read-read (aka. output-dependences):
2630
         these data accesses can happen in any order.  */
2631
      if (DDR_ARE_DEPENDENT (ddr) == chrec_known
2632
          || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
2633
        continue;
2634
 
2635
      /* Conservatively answer: "this transformation is not valid".  */
2636
      if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2637
        return false;
2638
 
2639
      /* If the dependence could not be captured by a distance vector,
2640
         conservatively answer that the transform is not valid.  */
2641
      if (DDR_NUM_DIST_VECTS (ddr) == 0)
2642
        return false;
2643
 
2644
      /* Compute trans.dist_vect */
2645
      for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
2646
        {
2647
          lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
2648
                                     DDR_DIST_VECT (ddr, j), distres);
2649
 
2650
          if (!lambda_vector_lexico_pos (distres, nb_loops))
2651
            return false;
2652
        }
2653
    }
2654
  return true;
2655
}
2656
 
2657
 
2658
/* Collects parameters from affine function ACCESS_FUNCTION, and push
2659
   them in PARAMETERS.  */
2660
 
2661
static void
2662
lambda_collect_parameters_from_af (tree access_function,
2663
                                   struct pointer_set_t *param_set,
2664
                                   VEC (tree, heap) **parameters)
2665
{
2666
  if (access_function == NULL)
2667
    return;
2668
 
2669
  if (TREE_CODE (access_function) == SSA_NAME
2670
      && pointer_set_contains (param_set, access_function) == 0)
2671
    {
2672
      pointer_set_insert (param_set, access_function);
2673
      VEC_safe_push (tree, heap, *parameters, access_function);
2674
    }
2675
  else
2676
    {
2677
      int i, num_operands = tree_operand_length (access_function);
2678
 
2679
      for (i = 0; i < num_operands; i++)
2680
        lambda_collect_parameters_from_af (TREE_OPERAND (access_function, i),
2681
                                           param_set, parameters);
2682
    }
2683
}
2684
 
2685
/* Collects parameters from DATAREFS, and push them in PARAMETERS.  */
2686
 
2687
void
2688
lambda_collect_parameters (VEC (data_reference_p, heap) *datarefs,
2689
                           VEC (tree, heap) **parameters)
2690
{
2691
  unsigned i, j;
2692
  struct pointer_set_t *parameter_set = pointer_set_create ();
2693
  data_reference_p data_reference;
2694
 
2695
  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, data_reference); i++)
2696
    for (j = 0; j < DR_NUM_DIMENSIONS (data_reference); j++)
2697
      lambda_collect_parameters_from_af (DR_ACCESS_FN (data_reference, j),
2698
                                         parameter_set, parameters);
2699
  pointer_set_destroy (parameter_set);
2700
}
2701
 
2702
/* Translates BASE_EXPR to vector CY.  AM is needed for inferring
2703
   indexing positions in the data access vector.  CST is the analyzed
2704
   integer constant.  */
2705
 
2706
static bool
2707
av_for_af_base (tree base_expr, lambda_vector cy, struct access_matrix *am,
2708
                int cst)
2709
{
2710
  bool result = true;
2711
 
2712
  switch (TREE_CODE (base_expr))
2713
    {
2714
    case INTEGER_CST:
2715
      /* Constant part.  */
2716
      cy[AM_CONST_COLUMN_INDEX (am)] += int_cst_value (base_expr) * cst;
2717
      return true;
2718
 
2719
    case SSA_NAME:
2720
      {
2721
        int param_index =
2722
          access_matrix_get_index_for_parameter (base_expr, am);
2723
 
2724
        if (param_index >= 0)
2725
          {
2726
            cy[param_index] = cst + cy[param_index];
2727
            return true;
2728
          }
2729
 
2730
        return false;
2731
      }
2732
 
2733
    case PLUS_EXPR:
2734
      return av_for_af_base (TREE_OPERAND (base_expr, 0), cy, am, cst)
2735
        && av_for_af_base (TREE_OPERAND (base_expr, 1), cy, am, cst);
2736
 
2737
    case MINUS_EXPR:
2738
      return av_for_af_base (TREE_OPERAND (base_expr, 0), cy, am, cst)
2739
        && av_for_af_base (TREE_OPERAND (base_expr, 1), cy, am, -1 * cst);
2740
 
2741
    case MULT_EXPR:
2742
      if (TREE_CODE (TREE_OPERAND (base_expr, 0)) == INTEGER_CST)
2743
        result = av_for_af_base (TREE_OPERAND (base_expr, 1),
2744
                                 cy, am, cst *
2745
                                 int_cst_value (TREE_OPERAND (base_expr, 0)));
2746
      else if (TREE_CODE (TREE_OPERAND (base_expr, 1)) == INTEGER_CST)
2747
        result = av_for_af_base (TREE_OPERAND (base_expr, 0),
2748
                                 cy, am, cst *
2749
                                 int_cst_value (TREE_OPERAND (base_expr, 1)));
2750
      else
2751
        result = false;
2752
 
2753
      return result;
2754
 
2755
    case NEGATE_EXPR:
2756
      return av_for_af_base (TREE_OPERAND (base_expr, 0), cy, am, -1 * cst);
2757
 
2758
    default:
2759
      return false;
2760
    }
2761
 
2762
  return result;
2763
}
2764
 
2765
/* Translates ACCESS_FUN to vector CY.  AM is needed for inferring
2766
   indexing positions in the data access vector.  */
2767
 
2768
static bool
2769
av_for_af (tree access_fun, lambda_vector cy, struct access_matrix *am)
2770
{
2771
  switch (TREE_CODE (access_fun))
2772
    {
2773
    case POLYNOMIAL_CHREC:
2774
      {
2775
        tree left = CHREC_LEFT (access_fun);
2776
        tree right = CHREC_RIGHT (access_fun);
2777
        unsigned var;
2778
 
2779
        if (TREE_CODE (right) != INTEGER_CST)
2780
          return false;
2781
 
2782
        var = am_vector_index_for_loop (am, CHREC_VARIABLE (access_fun));
2783
        cy[var] = int_cst_value (right);
2784
 
2785
        if (TREE_CODE (left) == POLYNOMIAL_CHREC)
2786
          return av_for_af (left, cy, am);
2787
        else
2788
          return av_for_af_base (left, cy, am, 1);
2789
      }
2790
 
2791
    case INTEGER_CST:
2792
      /* Constant part.  */
2793
      return av_for_af_base (access_fun, cy, am, 1);
2794
 
2795
    default:
2796
      return false;
2797
    }
2798
}
2799
 
2800
/* Initializes the access matrix for DATA_REFERENCE.  */
2801
 
2802
static bool
2803
build_access_matrix (data_reference_p data_reference,
2804
                     VEC (tree, heap) *parameters, VEC (loop_p, heap) *nest)
2805
{
2806
  struct access_matrix *am = GGC_NEW (struct access_matrix);
2807
  unsigned i, ndim = DR_NUM_DIMENSIONS (data_reference);
2808
  unsigned nivs = VEC_length (loop_p, nest);
2809
  unsigned lambda_nb_columns;
2810
 
2811
  AM_LOOP_NEST (am) = nest;
2812
  AM_NB_INDUCTION_VARS (am) = nivs;
2813
  AM_PARAMETERS (am) = parameters;
2814
 
2815
  lambda_nb_columns = AM_NB_COLUMNS (am);
2816
  AM_MATRIX (am) = VEC_alloc (lambda_vector, gc, ndim);
2817
 
2818
  for (i = 0; i < ndim; i++)
2819
    {
2820
      lambda_vector access_vector = lambda_vector_new (lambda_nb_columns);
2821
      tree access_function = DR_ACCESS_FN (data_reference, i);
2822
 
2823
      if (!av_for_af (access_function, access_vector, am))
2824
        return false;
2825
 
2826
      VEC_quick_push (lambda_vector, AM_MATRIX (am), access_vector);
2827
    }
2828
 
2829
  DR_ACCESS_MATRIX (data_reference) = am;
2830
  return true;
2831
}
2832
 
2833
/* Returns false when one of the access matrices cannot be built.  */
2834
 
2835
bool
2836
lambda_compute_access_matrices (VEC (data_reference_p, heap) *datarefs,
2837
                                VEC (tree, heap) *parameters,
2838
                                VEC (loop_p, heap) *nest)
2839
{
2840
  data_reference_p dataref;
2841
  unsigned ix;
2842
 
2843
  for (ix = 0; VEC_iterate (data_reference_p, datarefs, ix, dataref); ix++)
2844
    if (!build_access_matrix (dataref, parameters, nest))
2845
      return false;
2846
 
2847
  return true;
2848
}

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.