OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.2.2/] [gcc/] [lambda-code.c] - Blame information for rev 298

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 38 julius
/*  Loop transformation code generation
2
    Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3
    Contributed by Daniel Berlin <dberlin@dberlin.org>
4
 
5
    This file is part of GCC.
6
 
7
    GCC is free software; you can redistribute it and/or modify it under
8
    the terms of the GNU General Public License as published by the Free
9
    Software Foundation; either version 3, or (at your option) any later
10
    version.
11
 
12
    GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13
    WARRANTY; without even the implied warranty of MERCHANTABILITY or
14
    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15
    for more details.
16
 
17
    You should have received a copy of the GNU General Public License
18
    along with GCC; see the file COPYING3.  If not see
19
    <http://www.gnu.org/licenses/>.  */
20
 
21
#include "config.h"
22
#include "system.h"
23
#include "coretypes.h"
24
#include "tm.h"
25
#include "ggc.h"
26
#include "tree.h"
27
#include "target.h"
28
#include "rtl.h"
29
#include "basic-block.h"
30
#include "diagnostic.h"
31
#include "tree-flow.h"
32
#include "tree-dump.h"
33
#include "timevar.h"
34
#include "cfgloop.h"
35
#include "expr.h"
36
#include "optabs.h"
37
#include "tree-chrec.h"
38
#include "tree-data-ref.h"
39
#include "tree-pass.h"
40
#include "tree-scalar-evolution.h"
41
#include "vec.h"
42
#include "lambda.h"
43
#include "vecprim.h"
44
 
45
/* This loop nest code generation is based on non-singular matrix
46
   math.
47
 
48
 A little terminology and a general sketch of the algorithm.  See "A singular
49
 loop transformation framework based on non-singular matrices" by Wei Li and
50
 Keshav Pingali for formal proofs that the various statements below are
51
 correct.
52
 
53
 A loop iteration space represents the points traversed by the loop.  A point in the
54
 iteration space can be represented by a vector of size <loop depth>.  You can
55
 therefore represent the iteration space as an integral combinations of a set
56
 of basis vectors.
57
 
58
 A loop iteration space is dense if every integer point between the loop
59
 bounds is a point in the iteration space.  Every loop with a step of 1
60
 therefore has a dense iteration space.
61
 
62
 for i = 1 to 3, step 1 is a dense iteration space.
63
 
64
 A loop iteration space is sparse if it is not dense.  That is, the iteration
65
 space skips integer points that are within the loop bounds.
66
 
67
 for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
68
 2 is skipped.
69
 
70
 Dense source spaces are easy to transform, because they don't skip any
71
 points to begin with.  Thus we can compute the exact bounds of the target
72
 space using min/max and floor/ceil.
73
 
74
 For a dense source space, we take the transformation matrix, decompose it
75
 into a lower triangular part (H) and a unimodular part (U).
76
 We then compute the auxiliary space from the unimodular part (source loop
77
 nest . U = auxiliary space) , which has two important properties:
78
  1. It traverses the iterations in the same lexicographic order as the source
79
  space.
80
  2. It is a dense space when the source is a dense space (even if the target
81
  space is going to be sparse).
82
 
83
 Given the auxiliary space, we use the lower triangular part to compute the
84
 bounds in the target space by simple matrix multiplication.
85
 The gaps in the target space (IE the new loop step sizes) will be the
86
 diagonals of the H matrix.
87
 
88
 Sparse source spaces require another step, because you can't directly compute
89
 the exact bounds of the auxiliary and target space from the sparse space.
90
 Rather than try to come up with a separate algorithm to handle sparse source
91
 spaces directly, we just find a legal transformation matrix that gives you
92
 the sparse source space, from a dense space, and then transform the dense
93
 space.
94
 
95
 For a regular sparse space, you can represent the source space as an integer
96
 lattice, and the base space of that lattice will always be dense.  Thus, we
97
 effectively use the lattice to figure out the transformation from the lattice
98
 base space, to the sparse iteration space (IE what transform was applied to
99
 the dense space to make it sparse).  We then compose this transform with the
100
 transformation matrix specified by the user (since our matrix transformations
101
 are closed under composition, this is okay).  We can then use the base space
102
 (which is dense) plus the composed transformation matrix, to compute the rest
103
 of the transform using the dense space algorithm above.
104
 
105
 In other words, our sparse source space (B) is decomposed into a dense base
106
 space (A), and a matrix (L) that transforms A into B, such that A.L = B.
107
 We then compute the composition of L and the user transformation matrix (T),
108
 so that T is now a transform from A to the result, instead of from B to the
109
 result.
110
 IE A.(LT) = result instead of B.T = result
111
 Since A is now a dense source space, we can use the dense source space
112
 algorithm above to compute the result of applying transform (LT) to A.
113
 
114
 Fourier-Motzkin elimination is used to compute the bounds of the base space
115
 of the lattice.  */
116
 
117
static bool perfect_nestify (struct loops *,
118
                             struct loop *, VEC(tree,heap) *,
119
                             VEC(tree,heap) *, VEC(int,heap) *,
120
                             VEC(tree,heap) *);
121
/* Lattice stuff that is internal to the code generation algorithm.  */
122
 
123
typedef struct
124
{
125
  /* Lattice base matrix.  */
126
  lambda_matrix base;
127
  /* Lattice dimension.  */
128
  int dimension;
129
  /* Origin vector for the coefficients.  */
130
  lambda_vector origin;
131
  /* Origin matrix for the invariants.  */
132
  lambda_matrix origin_invariants;
133
  /* Number of invariants.  */
134
  int invariants;
135
} *lambda_lattice;
136
 
137
#define LATTICE_BASE(T) ((T)->base)
138
#define LATTICE_DIMENSION(T) ((T)->dimension)
139
#define LATTICE_ORIGIN(T) ((T)->origin)
140
#define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
141
#define LATTICE_INVARIANTS(T) ((T)->invariants)
142
 
143
static bool lle_equal (lambda_linear_expression, lambda_linear_expression,
144
                       int, int);
145
static lambda_lattice lambda_lattice_new (int, int);
146
static lambda_lattice lambda_lattice_compute_base (lambda_loopnest);
147
 
148
static tree find_induction_var_from_exit_cond (struct loop *);
149
static bool can_convert_to_perfect_nest (struct loop *);
150
 
151
/* Create a new lambda body vector.  */
152
 
153
lambda_body_vector
154
lambda_body_vector_new (int size)
155
{
156
  lambda_body_vector ret;
157
 
158
  ret = ggc_alloc (sizeof (*ret));
159
  LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
160
  LBV_SIZE (ret) = size;
161
  LBV_DENOMINATOR (ret) = 1;
162
  return ret;
163
}
164
 
165
/* Compute the new coefficients for the vector based on the
166
  *inverse* of the transformation matrix.  */
167
 
168
lambda_body_vector
169
lambda_body_vector_compute_new (lambda_trans_matrix transform,
170
                                lambda_body_vector vect)
171
{
172
  lambda_body_vector temp;
173
  int depth;
174
 
175
  /* Make sure the matrix is square.  */
176
  gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
177
 
178
  depth = LTM_ROWSIZE (transform);
179
 
180
  temp = lambda_body_vector_new (depth);
181
  LBV_DENOMINATOR (temp) =
182
    LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
183
  lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
184
                             LTM_MATRIX (transform), depth,
185
                             LBV_COEFFICIENTS (temp));
186
  LBV_SIZE (temp) = LBV_SIZE (vect);
187
  return temp;
188
}
189
 
190
/* Print out a lambda body vector.  */
191
 
192
void
193
print_lambda_body_vector (FILE * outfile, lambda_body_vector body)
194
{
195
  print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
196
}
197
 
198
/* Return TRUE if two linear expressions are equal.  */
199
 
200
static bool
201
lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
202
           int depth, int invariants)
203
{
204
  int i;
205
 
206
  if (lle1 == NULL || lle2 == NULL)
207
    return false;
208
  if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
209
    return false;
210
  if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
211
    return false;
212
  for (i = 0; i < depth; i++)
213
    if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
214
      return false;
215
  for (i = 0; i < invariants; i++)
216
    if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
217
        LLE_INVARIANT_COEFFICIENTS (lle2)[i])
218
      return false;
219
  return true;
220
}
221
 
222
/* Create a new linear expression with dimension DIM, and total number
223
   of invariants INVARIANTS.  */
224
 
225
lambda_linear_expression
226
lambda_linear_expression_new (int dim, int invariants)
227
{
228
  lambda_linear_expression ret;
229
 
230
  ret = ggc_alloc_cleared (sizeof (*ret));
231
 
232
  LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
233
  LLE_CONSTANT (ret) = 0;
234
  LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
235
  LLE_DENOMINATOR (ret) = 1;
236
  LLE_NEXT (ret) = NULL;
237
 
238
  return ret;
239
}
240
 
241
/* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
242
   The starting letter used for variable names is START.  */
243
 
244
static void
245
print_linear_expression (FILE * outfile, lambda_vector expr, int size,
246
                         char start)
247
{
248
  int i;
249
  bool first = true;
250
  for (i = 0; i < size; i++)
251
    {
252
      if (expr[i] != 0)
253
        {
254
          if (first)
255
            {
256
              if (expr[i] < 0)
257
                fprintf (outfile, "-");
258
              first = false;
259
            }
260
          else if (expr[i] > 0)
261
            fprintf (outfile, " + ");
262
          else
263
            fprintf (outfile, " - ");
264
          if (abs (expr[i]) == 1)
265
            fprintf (outfile, "%c", start + i);
266
          else
267
            fprintf (outfile, "%d%c", abs (expr[i]), start + i);
268
        }
269
    }
270
}
271
 
272
/* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
273
   depth/number of coefficients is given by DEPTH, the number of invariants is
274
   given by INVARIANTS, and the character to start variable names with is given
275
   by START.  */
276
 
277
void
278
print_lambda_linear_expression (FILE * outfile,
279
                                lambda_linear_expression expr,
280
                                int depth, int invariants, char start)
281
{
282
  fprintf (outfile, "\tLinear expression: ");
283
  print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
284
  fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
285
  fprintf (outfile, "  invariants: ");
286
  print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
287
                           invariants, 'A');
288
  fprintf (outfile, "  denominator: %d\n", LLE_DENOMINATOR (expr));
289
}
290
 
291
/* Print a lambda loop structure LOOP to OUTFILE.  The depth/number of
292
   coefficients is given by DEPTH, the number of invariants is
293
   given by INVARIANTS, and the character to start variable names with is given
294
   by START.  */
295
 
296
void
297
print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
298
                   int invariants, char start)
299
{
300
  int step;
301
  lambda_linear_expression expr;
302
 
303
  gcc_assert (loop);
304
 
305
  expr = LL_LINEAR_OFFSET (loop);
306
  step = LL_STEP (loop);
307
  fprintf (outfile, "  step size = %d \n", step);
308
 
309
  if (expr)
310
    {
311
      fprintf (outfile, "  linear offset: \n");
312
      print_lambda_linear_expression (outfile, expr, depth, invariants,
313
                                      start);
314
    }
315
 
316
  fprintf (outfile, "  lower bound: \n");
317
  for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
318
    print_lambda_linear_expression (outfile, expr, depth, invariants, start);
319
  fprintf (outfile, "  upper bound: \n");
320
  for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
321
    print_lambda_linear_expression (outfile, expr, depth, invariants, start);
322
}
323
 
324
/* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
325
   number of invariants.  */
326
 
327
lambda_loopnest
328
lambda_loopnest_new (int depth, int invariants)
329
{
330
  lambda_loopnest ret;
331
  ret = ggc_alloc (sizeof (*ret));
332
 
333
  LN_LOOPS (ret) = ggc_alloc_cleared (depth * sizeof (lambda_loop));
334
  LN_DEPTH (ret) = depth;
335
  LN_INVARIANTS (ret) = invariants;
336
 
337
  return ret;
338
}
339
 
340
/* Print a lambda loopnest structure, NEST, to OUTFILE.  The starting
341
   character to use for loop names is given by START.  */
342
 
343
void
344
print_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
345
{
346
  int i;
347
  for (i = 0; i < LN_DEPTH (nest); i++)
348
    {
349
      fprintf (outfile, "Loop %c\n", start + i);
350
      print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
351
                         LN_INVARIANTS (nest), 'i');
352
      fprintf (outfile, "\n");
353
    }
354
}
355
 
356
/* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
357
   of invariants.  */
358
 
359
static lambda_lattice
360
lambda_lattice_new (int depth, int invariants)
361
{
362
  lambda_lattice ret;
363
  ret = ggc_alloc (sizeof (*ret));
364
  LATTICE_BASE (ret) = lambda_matrix_new (depth, depth);
365
  LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
366
  LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants);
367
  LATTICE_DIMENSION (ret) = depth;
368
  LATTICE_INVARIANTS (ret) = invariants;
369
  return ret;
370
}
371
 
372
/* Compute the lattice base for NEST.  The lattice base is essentially a
373
   non-singular transform from a dense base space to a sparse iteration space.
374
   We use it so that we don't have to specially handle the case of a sparse
375
   iteration space in other parts of the algorithm.  As a result, this routine
376
   only does something interesting (IE produce a matrix that isn't the
377
   identity matrix) if NEST is a sparse space.  */
378
 
379
static lambda_lattice
380
lambda_lattice_compute_base (lambda_loopnest nest)
381
{
382
  lambda_lattice ret;
383
  int depth, invariants;
384
  lambda_matrix base;
385
 
386
  int i, j, step;
387
  lambda_loop loop;
388
  lambda_linear_expression expression;
389
 
390
  depth = LN_DEPTH (nest);
391
  invariants = LN_INVARIANTS (nest);
392
 
393
  ret = lambda_lattice_new (depth, invariants);
394
  base = LATTICE_BASE (ret);
395
  for (i = 0; i < depth; i++)
396
    {
397
      loop = LN_LOOPS (nest)[i];
398
      gcc_assert (loop);
399
      step = LL_STEP (loop);
400
      /* If we have a step of 1, then the base is one, and the
401
         origin and invariant coefficients are 0.  */
402
      if (step == 1)
403
        {
404
          for (j = 0; j < depth; j++)
405
            base[i][j] = 0;
406
          base[i][i] = 1;
407
          LATTICE_ORIGIN (ret)[i] = 0;
408
          for (j = 0; j < invariants; j++)
409
            LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
410
        }
411
      else
412
        {
413
          /* Otherwise, we need the lower bound expression (which must
414
             be an affine function)  to determine the base.  */
415
          expression = LL_LOWER_BOUND (loop);
416
          gcc_assert (expression && !LLE_NEXT (expression)
417
                      && LLE_DENOMINATOR (expression) == 1);
418
 
419
          /* The lower triangular portion of the base is going to be the
420
             coefficient times the step */
421
          for (j = 0; j < i; j++)
422
            base[i][j] = LLE_COEFFICIENTS (expression)[j]
423
              * LL_STEP (LN_LOOPS (nest)[j]);
424
          base[i][i] = step;
425
          for (j = i + 1; j < depth; j++)
426
            base[i][j] = 0;
427
 
428
          /* Origin for this loop is the constant of the lower bound
429
             expression.  */
430
          LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
431
 
432
          /* Coefficient for the invariants are equal to the invariant
433
             coefficients in the expression.  */
434
          for (j = 0; j < invariants; j++)
435
            LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
436
              LLE_INVARIANT_COEFFICIENTS (expression)[j];
437
        }
438
    }
439
  return ret;
440
}
441
 
442
/* Compute the least common multiple of two numbers A and B .  */
443
 
444
static int
445
lcm (int a, int b)
446
{
447
  return (abs (a) * abs (b) / gcd (a, b));
448
}
449
 
450
/* Perform Fourier-Motzkin elimination to calculate the bounds of the
451
   auxiliary nest.
452
   Fourier-Motzkin is a way of reducing systems of linear inequalities so that
453
   it is easy to calculate the answer and bounds.
454
   A sketch of how it works:
455
   Given a system of linear inequalities, ai * xj >= bk, you can always
456
   rewrite the constraints so they are all of the form
457
   a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
458
   in b1 ... bk, and some a in a1...ai)
459
   You can then eliminate this x from the non-constant inequalities by
460
   rewriting these as a <= b, x >= constant, and delete the x variable.
461
   You can then repeat this for any remaining x variables, and then we have
462
   an easy to use variable <= constant (or no variables at all) form that we
463
   can construct our bounds from.
464
 
465
   In our case, each time we eliminate, we construct part of the bound from
466
   the ith variable, then delete the ith variable.
467
 
468
   Remember the constant are in our vector a, our coefficient matrix is A,
469
   and our invariant coefficient matrix is B.
470
 
471
   SIZE is the size of the matrices being passed.
472
   DEPTH is the loop nest depth.
473
   INVARIANTS is the number of loop invariants.
474
   A, B, and a are the coefficient matrix, invariant coefficient, and a
475
   vector of constants, respectively.  */
476
 
477
static lambda_loopnest
478
compute_nest_using_fourier_motzkin (int size,
479
                                    int depth,
480
                                    int invariants,
481
                                    lambda_matrix A,
482
                                    lambda_matrix B,
483
                                    lambda_vector a)
484
{
485
 
486
  int multiple, f1, f2;
487
  int i, j, k;
488
  lambda_linear_expression expression;
489
  lambda_loop loop;
490
  lambda_loopnest auxillary_nest;
491
  lambda_matrix swapmatrix, A1, B1;
492
  lambda_vector swapvector, a1;
493
  int newsize;
494
 
495
  A1 = lambda_matrix_new (128, depth);
496
  B1 = lambda_matrix_new (128, invariants);
497
  a1 = lambda_vector_new (128);
498
 
499
  auxillary_nest = lambda_loopnest_new (depth, invariants);
500
 
501
  for (i = depth - 1; i >= 0; i--)
502
    {
503
      loop = lambda_loop_new ();
504
      LN_LOOPS (auxillary_nest)[i] = loop;
505
      LL_STEP (loop) = 1;
506
 
507
      for (j = 0; j < size; j++)
508
        {
509
          if (A[j][i] < 0)
510
            {
511
              /* Any linear expression in the matrix with a coefficient less
512
                 than 0 becomes part of the new lower bound.  */
513
              expression = lambda_linear_expression_new (depth, invariants);
514
 
515
              for (k = 0; k < i; k++)
516
                LLE_COEFFICIENTS (expression)[k] = A[j][k];
517
 
518
              for (k = 0; k < invariants; k++)
519
                LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
520
 
521
              LLE_DENOMINATOR (expression) = -1 * A[j][i];
522
              LLE_CONSTANT (expression) = -1 * a[j];
523
 
524
              /* Ignore if identical to the existing lower bound.  */
525
              if (!lle_equal (LL_LOWER_BOUND (loop),
526
                              expression, depth, invariants))
527
                {
528
                  LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
529
                  LL_LOWER_BOUND (loop) = expression;
530
                }
531
 
532
            }
533
          else if (A[j][i] > 0)
534
            {
535
              /* Any linear expression with a coefficient greater than 0
536
                 becomes part of the new upper bound.  */
537
              expression = lambda_linear_expression_new (depth, invariants);
538
              for (k = 0; k < i; k++)
539
                LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
540
 
541
              for (k = 0; k < invariants; k++)
542
                LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
543
 
544
              LLE_DENOMINATOR (expression) = A[j][i];
545
              LLE_CONSTANT (expression) = a[j];
546
 
547
              /* Ignore if identical to the existing upper bound.  */
548
              if (!lle_equal (LL_UPPER_BOUND (loop),
549
                              expression, depth, invariants))
550
                {
551
                  LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
552
                  LL_UPPER_BOUND (loop) = expression;
553
                }
554
 
555
            }
556
        }
557
 
558
      /* This portion creates a new system of linear inequalities by deleting
559
         the i'th variable, reducing the system by one variable.  */
560
      newsize = 0;
561
      for (j = 0; j < size; j++)
562
        {
563
          /* If the coefficient for the i'th variable is 0, then we can just
564
             eliminate the variable straightaway.  Otherwise, we have to
565
             multiply through by the coefficients we are eliminating.  */
566
          if (A[j][i] == 0)
567
            {
568
              lambda_vector_copy (A[j], A1[newsize], depth);
569
              lambda_vector_copy (B[j], B1[newsize], invariants);
570
              a1[newsize] = a[j];
571
              newsize++;
572
            }
573
          else if (A[j][i] > 0)
574
            {
575
              for (k = 0; k < size; k++)
576
                {
577
                  if (A[k][i] < 0)
578
                    {
579
                      multiple = lcm (A[j][i], A[k][i]);
580
                      f1 = multiple / A[j][i];
581
                      f2 = -1 * multiple / A[k][i];
582
 
583
                      lambda_vector_add_mc (A[j], f1, A[k], f2,
584
                                            A1[newsize], depth);
585
                      lambda_vector_add_mc (B[j], f1, B[k], f2,
586
                                            B1[newsize], invariants);
587
                      a1[newsize] = f1 * a[j] + f2 * a[k];
588
                      newsize++;
589
                    }
590
                }
591
            }
592
        }
593
 
594
      swapmatrix = A;
595
      A = A1;
596
      A1 = swapmatrix;
597
 
598
      swapmatrix = B;
599
      B = B1;
600
      B1 = swapmatrix;
601
 
602
      swapvector = a;
603
      a = a1;
604
      a1 = swapvector;
605
 
606
      size = newsize;
607
    }
608
 
609
  return auxillary_nest;
610
}
611
 
612
/* Compute the loop bounds for the auxiliary space NEST.
613
   Input system used is Ax <= b.  TRANS is the unimodular transformation.
614
   Given the original nest, this function will
615
   1. Convert the nest into matrix form, which consists of a matrix for the
616
   coefficients, a matrix for the
617
   invariant coefficients, and a vector for the constants.
618
   2. Use the matrix form to calculate the lattice base for the nest (which is
619
   a dense space)
620
   3. Compose the dense space transform with the user specified transform, to
621
   get a transform we can easily calculate transformed bounds for.
622
   4. Multiply the composed transformation matrix times the matrix form of the
623
   loop.
624
   5. Transform the newly created matrix (from step 4) back into a loop nest
625
   using Fourier-Motzkin elimination to figure out the bounds.  */
626
 
627
static lambda_loopnest
628
lambda_compute_auxillary_space (lambda_loopnest nest,
629
                                lambda_trans_matrix trans)
630
{
631
  lambda_matrix A, B, A1, B1;
632
  lambda_vector a, a1;
633
  lambda_matrix invertedtrans;
634
  int depth, invariants, size;
635
  int i, j;
636
  lambda_loop loop;
637
  lambda_linear_expression expression;
638
  lambda_lattice lattice;
639
 
640
  depth = LN_DEPTH (nest);
641
  invariants = LN_INVARIANTS (nest);
642
 
643
  /* Unfortunately, we can't know the number of constraints we'll have
644
     ahead of time, but this should be enough even in ridiculous loop nest
645
     cases. We must not go over this limit.  */
646
  A = lambda_matrix_new (128, depth);
647
  B = lambda_matrix_new (128, invariants);
648
  a = lambda_vector_new (128);
649
 
650
  A1 = lambda_matrix_new (128, depth);
651
  B1 = lambda_matrix_new (128, invariants);
652
  a1 = lambda_vector_new (128);
653
 
654
  /* Store the bounds in the equation matrix A, constant vector a, and
655
     invariant matrix B, so that we have Ax <= a + B.
656
     This requires a little equation rearranging so that everything is on the
657
     correct side of the inequality.  */
658
  size = 0;
659
  for (i = 0; i < depth; i++)
660
    {
661
      loop = LN_LOOPS (nest)[i];
662
 
663
      /* First we do the lower bound.  */
664
      if (LL_STEP (loop) > 0)
665
        expression = LL_LOWER_BOUND (loop);
666
      else
667
        expression = LL_UPPER_BOUND (loop);
668
 
669
      for (; expression != NULL; expression = LLE_NEXT (expression))
670
        {
671
          /* Fill in the coefficient.  */
672
          for (j = 0; j < i; j++)
673
            A[size][j] = LLE_COEFFICIENTS (expression)[j];
674
 
675
          /* And the invariant coefficient.  */
676
          for (j = 0; j < invariants; j++)
677
            B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
678
 
679
          /* And the constant.  */
680
          a[size] = LLE_CONSTANT (expression);
681
 
682
          /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b.  IE put all
683
             constants and single variables on   */
684
          A[size][i] = -1 * LLE_DENOMINATOR (expression);
685
          a[size] *= -1;
686
          for (j = 0; j < invariants; j++)
687
            B[size][j] *= -1;
688
 
689
          size++;
690
          /* Need to increase matrix sizes above.  */
691
          gcc_assert (size <= 127);
692
 
693
        }
694
 
695
      /* Then do the exact same thing for the upper bounds.  */
696
      if (LL_STEP (loop) > 0)
697
        expression = LL_UPPER_BOUND (loop);
698
      else
699
        expression = LL_LOWER_BOUND (loop);
700
 
701
      for (; expression != NULL; expression = LLE_NEXT (expression))
702
        {
703
          /* Fill in the coefficient.  */
704
          for (j = 0; j < i; j++)
705
            A[size][j] = LLE_COEFFICIENTS (expression)[j];
706
 
707
          /* And the invariant coefficient.  */
708
          for (j = 0; j < invariants; j++)
709
            B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
710
 
711
          /* And the constant.  */
712
          a[size] = LLE_CONSTANT (expression);
713
 
714
          /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b.  */
715
          for (j = 0; j < i; j++)
716
            A[size][j] *= -1;
717
          A[size][i] = LLE_DENOMINATOR (expression);
718
          size++;
719
          /* Need to increase matrix sizes above.  */
720
          gcc_assert (size <= 127);
721
 
722
        }
723
    }
724
 
725
  /* Compute the lattice base x = base * y + origin, where y is the
726
     base space.  */
727
  lattice = lambda_lattice_compute_base (nest);
728
 
729
  /* Ax <= a + B then becomes ALy <= a+B - A*origin.  L is the lattice base  */
730
 
731
  /* A1 = A * L */
732
  lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
733
 
734
  /* a1 = a - A * origin constant.  */
735
  lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
736
  lambda_vector_add_mc (a, 1, a1, -1, a1, size);
737
 
738
  /* B1 = B - A * origin invariant.  */
739
  lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
740
                      invariants);
741
  lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
742
 
743
  /* Now compute the auxiliary space bounds by first inverting U, multiplying
744
     it by A1, then performing Fourier-Motzkin.  */
745
 
746
  invertedtrans = lambda_matrix_new (depth, depth);
747
 
748
  /* Compute the inverse of U.  */
749
  lambda_matrix_inverse (LTM_MATRIX (trans),
750
                         invertedtrans, depth);
751
 
752
  /* A = A1 inv(U).  */
753
  lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
754
 
755
  return compute_nest_using_fourier_motzkin (size, depth, invariants,
756
                                             A, B1, a1);
757
}
758
 
759
/* Compute the loop bounds for the target space, using the bounds of
760
   the auxiliary nest AUXILLARY_NEST, and the triangular matrix H.
761
   The target space loop bounds are computed by multiplying the triangular
762
   matrix H by the auxiliary nest, to get the new loop bounds.  The sign of
763
   the loop steps (positive or negative) is then used to swap the bounds if
764
   the loop counts downwards.
765
   Return the target loopnest.  */
766
 
767
static lambda_loopnest
768
lambda_compute_target_space (lambda_loopnest auxillary_nest,
769
                             lambda_trans_matrix H, lambda_vector stepsigns)
770
{
771
  lambda_matrix inverse, H1;
772
  int determinant, i, j;
773
  int gcd1, gcd2;
774
  int factor;
775
 
776
  lambda_loopnest target_nest;
777
  int depth, invariants;
778
  lambda_matrix target;
779
 
780
  lambda_loop auxillary_loop, target_loop;
781
  lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
782
 
783
  depth = LN_DEPTH (auxillary_nest);
784
  invariants = LN_INVARIANTS (auxillary_nest);
785
 
786
  inverse = lambda_matrix_new (depth, depth);
787
  determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth);
788
 
789
  /* H1 is H excluding its diagonal.  */
790
  H1 = lambda_matrix_new (depth, depth);
791
  lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
792
 
793
  for (i = 0; i < depth; i++)
794
    H1[i][i] = 0;
795
 
796
  /* Computes the linear offsets of the loop bounds.  */
797
  target = lambda_matrix_new (depth, depth);
798
  lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
799
 
800
  target_nest = lambda_loopnest_new (depth, invariants);
801
 
802
  for (i = 0; i < depth; i++)
803
    {
804
 
805
      /* Get a new loop structure.  */
806
      target_loop = lambda_loop_new ();
807
      LN_LOOPS (target_nest)[i] = target_loop;
808
 
809
      /* Computes the gcd of the coefficients of the linear part.  */
810
      gcd1 = lambda_vector_gcd (target[i], i);
811
 
812
      /* Include the denominator in the GCD.  */
813
      gcd1 = gcd (gcd1, determinant);
814
 
815
      /* Now divide through by the gcd.  */
816
      for (j = 0; j < i; j++)
817
        target[i][j] = target[i][j] / gcd1;
818
 
819
      expression = lambda_linear_expression_new (depth, invariants);
820
      lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
821
      LLE_DENOMINATOR (expression) = determinant / gcd1;
822
      LLE_CONSTANT (expression) = 0;
823
      lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
824
                           invariants);
825
      LL_LINEAR_OFFSET (target_loop) = expression;
826
    }
827
 
828
  /* For each loop, compute the new bounds from H.  */
829
  for (i = 0; i < depth; i++)
830
    {
831
      auxillary_loop = LN_LOOPS (auxillary_nest)[i];
832
      target_loop = LN_LOOPS (target_nest)[i];
833
      LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
834
      factor = LTM_MATRIX (H)[i][i];
835
 
836
      /* First we do the lower bound.  */
837
      auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
838
 
839
      for (; auxillary_expr != NULL;
840
           auxillary_expr = LLE_NEXT (auxillary_expr))
841
        {
842
          target_expr = lambda_linear_expression_new (depth, invariants);
843
          lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
844
                                     depth, inverse, depth,
845
                                     LLE_COEFFICIENTS (target_expr));
846
          lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
847
                                    LLE_COEFFICIENTS (target_expr), depth,
848
                                    factor);
849
 
850
          LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
851
          lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
852
                              LLE_INVARIANT_COEFFICIENTS (target_expr),
853
                              invariants);
854
          lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
855
                                    LLE_INVARIANT_COEFFICIENTS (target_expr),
856
                                    invariants, factor);
857
          LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
858
 
859
          if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
860
            {
861
              LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
862
                * determinant;
863
              lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
864
                                        (target_expr),
865
                                        LLE_INVARIANT_COEFFICIENTS
866
                                        (target_expr), invariants,
867
                                        determinant);
868
              LLE_DENOMINATOR (target_expr) =
869
                LLE_DENOMINATOR (target_expr) * determinant;
870
            }
871
          /* Find the gcd and divide by it here, rather than doing it
872
             at the tree level.  */
873
          gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
874
          gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
875
                                    invariants);
876
          gcd1 = gcd (gcd1, gcd2);
877
          gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
878
          gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
879
          for (j = 0; j < depth; j++)
880
            LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
881
          for (j = 0; j < invariants; j++)
882
            LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
883
          LLE_CONSTANT (target_expr) /= gcd1;
884
          LLE_DENOMINATOR (target_expr) /= gcd1;
885
          /* Ignore if identical to existing bound.  */
886
          if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
887
                          invariants))
888
            {
889
              LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
890
              LL_LOWER_BOUND (target_loop) = target_expr;
891
            }
892
        }
893
      /* Now do the upper bound.  */
894
      auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
895
 
896
      for (; auxillary_expr != NULL;
897
           auxillary_expr = LLE_NEXT (auxillary_expr))
898
        {
899
          target_expr = lambda_linear_expression_new (depth, invariants);
900
          lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
901
                                     depth, inverse, depth,
902
                                     LLE_COEFFICIENTS (target_expr));
903
          lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
904
                                    LLE_COEFFICIENTS (target_expr), depth,
905
                                    factor);
906
          LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
907
          lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
908
                              LLE_INVARIANT_COEFFICIENTS (target_expr),
909
                              invariants);
910
          lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
911
                                    LLE_INVARIANT_COEFFICIENTS (target_expr),
912
                                    invariants, factor);
913
          LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
914
 
915
          if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
916
            {
917
              LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
918
                * determinant;
919
              lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
920
                                        (target_expr),
921
                                        LLE_INVARIANT_COEFFICIENTS
922
                                        (target_expr), invariants,
923
                                        determinant);
924
              LLE_DENOMINATOR (target_expr) =
925
                LLE_DENOMINATOR (target_expr) * determinant;
926
            }
927
          /* Find the gcd and divide by it here, instead of at the
928
             tree level.  */
929
          gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
930
          gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
931
                                    invariants);
932
          gcd1 = gcd (gcd1, gcd2);
933
          gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
934
          gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
935
          for (j = 0; j < depth; j++)
936
            LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
937
          for (j = 0; j < invariants; j++)
938
            LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
939
          LLE_CONSTANT (target_expr) /= gcd1;
940
          LLE_DENOMINATOR (target_expr) /= gcd1;
941
          /* Ignore if equal to existing bound.  */
942
          if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
943
                          invariants))
944
            {
945
              LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
946
              LL_UPPER_BOUND (target_loop) = target_expr;
947
            }
948
        }
949
    }
950
  for (i = 0; i < depth; i++)
951
    {
952
      target_loop = LN_LOOPS (target_nest)[i];
953
      /* If necessary, exchange the upper and lower bounds and negate
954
         the step size.  */
955
      if (stepsigns[i] < 0)
956
        {
957
          LL_STEP (target_loop) *= -1;
958
          tmp_expr = LL_LOWER_BOUND (target_loop);
959
          LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
960
          LL_UPPER_BOUND (target_loop) = tmp_expr;
961
        }
962
    }
963
  return target_nest;
964
}
965
 
966
/* Compute the step signs of TRANS, using TRANS and stepsigns.  Return the new
967
   result.  */
968
 
969
static lambda_vector
970
lambda_compute_step_signs (lambda_trans_matrix trans, lambda_vector stepsigns)
971
{
972
  lambda_matrix matrix, H;
973
  int size;
974
  lambda_vector newsteps;
975
  int i, j, factor, minimum_column;
976
  int temp;
977
 
978
  matrix = LTM_MATRIX (trans);
979
  size = LTM_ROWSIZE (trans);
980
  H = lambda_matrix_new (size, size);
981
 
982
  newsteps = lambda_vector_new (size);
983
  lambda_vector_copy (stepsigns, newsteps, size);
984
 
985
  lambda_matrix_copy (matrix, H, size, size);
986
 
987
  for (j = 0; j < size; j++)
988
    {
989
      lambda_vector row;
990
      row = H[j];
991
      for (i = j; i < size; i++)
992
        if (row[i] < 0)
993
          lambda_matrix_col_negate (H, size, i);
994
      while (lambda_vector_first_nz (row, size, j + 1) < size)
995
        {
996
          minimum_column = lambda_vector_min_nz (row, size, j);
997
          lambda_matrix_col_exchange (H, size, j, minimum_column);
998
 
999
          temp = newsteps[j];
1000
          newsteps[j] = newsteps[minimum_column];
1001
          newsteps[minimum_column] = temp;
1002
 
1003
          for (i = j + 1; i < size; i++)
1004
            {
1005
              factor = row[i] / row[j];
1006
              lambda_matrix_col_add (H, size, j, i, -1 * factor);
1007
            }
1008
        }
1009
    }
1010
  return newsteps;
1011
}
1012
 
1013
/* Transform NEST according to TRANS, and return the new loopnest.
1014
   This involves
1015
   1. Computing a lattice base for the transformation
1016
   2. Composing the dense base with the specified transformation (TRANS)
1017
   3. Decomposing the combined transformation into a lower triangular portion,
1018
   and a unimodular portion.
1019
   4. Computing the auxiliary nest using the unimodular portion.
1020
   5. Computing the target nest using the auxiliary nest and the lower
1021
   triangular portion.  */
1022
 
1023
lambda_loopnest
1024
lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans)
1025
{
1026
  lambda_loopnest auxillary_nest, target_nest;
1027
 
1028
  int depth, invariants;
1029
  int i, j;
1030
  lambda_lattice lattice;
1031
  lambda_trans_matrix trans1, H, U;
1032
  lambda_loop loop;
1033
  lambda_linear_expression expression;
1034
  lambda_vector origin;
1035
  lambda_matrix origin_invariants;
1036
  lambda_vector stepsigns;
1037
  int f;
1038
 
1039
  depth = LN_DEPTH (nest);
1040
  invariants = LN_INVARIANTS (nest);
1041
 
1042
  /* Keep track of the signs of the loop steps.  */
1043
  stepsigns = lambda_vector_new (depth);
1044
  for (i = 0; i < depth; i++)
1045
    {
1046
      if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
1047
        stepsigns[i] = 1;
1048
      else
1049
        stepsigns[i] = -1;
1050
    }
1051
 
1052
  /* Compute the lattice base.  */
1053
  lattice = lambda_lattice_compute_base (nest);
1054
  trans1 = lambda_trans_matrix_new (depth, depth);
1055
 
1056
  /* Multiply the transformation matrix by the lattice base.  */
1057
 
1058
  lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
1059
                      LTM_MATRIX (trans1), depth, depth, depth);
1060
 
1061
  /* Compute the Hermite normal form for the new transformation matrix.  */
1062
  H = lambda_trans_matrix_new (depth, depth);
1063
  U = lambda_trans_matrix_new (depth, depth);
1064
  lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
1065
                         LTM_MATRIX (U));
1066
 
1067
  /* Compute the auxiliary loop nest's space from the unimodular
1068
     portion.  */
1069
  auxillary_nest = lambda_compute_auxillary_space (nest, U);
1070
 
1071
  /* Compute the loop step signs from the old step signs and the
1072
     transformation matrix.  */
1073
  stepsigns = lambda_compute_step_signs (trans1, stepsigns);
1074
 
1075
  /* Compute the target loop nest space from the auxiliary nest and
1076
     the lower triangular matrix H.  */
1077
  target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns);
1078
  origin = lambda_vector_new (depth);
1079
  origin_invariants = lambda_matrix_new (depth, invariants);
1080
  lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
1081
                             LATTICE_ORIGIN (lattice), origin);
1082
  lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
1083
                      origin_invariants, depth, depth, invariants);
1084
 
1085
  for (i = 0; i < depth; i++)
1086
    {
1087
      loop = LN_LOOPS (target_nest)[i];
1088
      expression = LL_LINEAR_OFFSET (loop);
1089
      if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
1090
        f = 1;
1091
      else
1092
        f = LLE_DENOMINATOR (expression);
1093
 
1094
      LLE_CONSTANT (expression) += f * origin[i];
1095
 
1096
      for (j = 0; j < invariants; j++)
1097
        LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
1098
          f * origin_invariants[i][j];
1099
    }
1100
 
1101
  return target_nest;
1102
 
1103
}
1104
 
1105
/* Convert a gcc tree expression EXPR to a lambda linear expression, and
1106
   return the new expression.  DEPTH is the depth of the loopnest.
1107
   OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1108
   in this nest.  INVARIANTS is the array of invariants for the loop.  EXTRA
1109
   is the amount we have to add/subtract from the expression because of the
1110
   type of comparison it is used in.  */
1111
 
1112
static lambda_linear_expression
1113
gcc_tree_to_linear_expression (int depth, tree expr,
1114
                               VEC(tree,heap) *outerinductionvars,
1115
                               VEC(tree,heap) *invariants, int extra)
1116
{
1117
  lambda_linear_expression lle = NULL;
1118
  switch (TREE_CODE (expr))
1119
    {
1120
    case INTEGER_CST:
1121
      {
1122
        lle = lambda_linear_expression_new (depth, 2 * depth);
1123
        LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
1124
        if (extra != 0)
1125
          LLE_CONSTANT (lle) += extra;
1126
 
1127
        LLE_DENOMINATOR (lle) = 1;
1128
      }
1129
      break;
1130
    case SSA_NAME:
1131
      {
1132
        tree iv, invar;
1133
        size_t i;
1134
        for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++)
1135
          if (iv != NULL)
1136
            {
1137
              if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
1138
                {
1139
                  lle = lambda_linear_expression_new (depth, 2 * depth);
1140
                  LLE_COEFFICIENTS (lle)[i] = 1;
1141
                  if (extra != 0)
1142
                    LLE_CONSTANT (lle) = extra;
1143
 
1144
                  LLE_DENOMINATOR (lle) = 1;
1145
                }
1146
            }
1147
        for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1148
          if (invar != NULL)
1149
            {
1150
              if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
1151
                {
1152
                  lle = lambda_linear_expression_new (depth, 2 * depth);
1153
                  LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
1154
                  if (extra != 0)
1155
                    LLE_CONSTANT (lle) = extra;
1156
                  LLE_DENOMINATOR (lle) = 1;
1157
                }
1158
            }
1159
      }
1160
      break;
1161
    default:
1162
      return NULL;
1163
    }
1164
 
1165
  return lle;
1166
}
1167
 
1168
/* Return the depth of the loopnest NEST */
1169
 
1170
static int
1171
depth_of_nest (struct loop *nest)
1172
{
1173
  size_t depth = 0;
1174
  while (nest)
1175
    {
1176
      depth++;
1177
      nest = nest->inner;
1178
    }
1179
  return depth;
1180
}
1181
 
1182
 
1183
/* Return true if OP is invariant in LOOP and all outer loops.  */
1184
 
1185
static bool
1186
invariant_in_loop_and_outer_loops (struct loop *loop, tree op)
1187
{
1188
  if (is_gimple_min_invariant (op))
1189
    return true;
1190
  if (loop->depth == 0)
1191
    return true;
1192
  if (!expr_invariant_in_loop_p (loop, op))
1193
    return false;
1194
  if (loop->outer
1195
      && !invariant_in_loop_and_outer_loops (loop->outer, op))
1196
    return false;
1197
  return true;
1198
}
1199
 
1200
/* Generate a lambda loop from a gcc loop LOOP.  Return the new lambda loop,
1201
   or NULL if it could not be converted.
1202
   DEPTH is the depth of the loop.
1203
   INVARIANTS is a pointer to the array of loop invariants.
1204
   The induction variable for this loop should be stored in the parameter
1205
   OURINDUCTIONVAR.
1206
   OUTERINDUCTIONVARS is an array of induction variables for outer loops.  */
1207
 
1208
static lambda_loop
1209
gcc_loop_to_lambda_loop (struct loop *loop, int depth,
1210
                         VEC(tree,heap) ** invariants,
1211
                         tree * ourinductionvar,
1212
                         VEC(tree,heap) * outerinductionvars,
1213
                         VEC(tree,heap) ** lboundvars,
1214
                         VEC(tree,heap) ** uboundvars,
1215
                         VEC(int,heap) ** steps)
1216
{
1217
  tree phi;
1218
  tree exit_cond;
1219
  tree access_fn, inductionvar;
1220
  tree step;
1221
  lambda_loop lloop = NULL;
1222
  lambda_linear_expression lbound, ubound;
1223
  tree test;
1224
  int stepint;
1225
  int extra = 0;
1226
  tree lboundvar, uboundvar, uboundresult;
1227
 
1228
  /* Find out induction var and exit condition.  */
1229
  inductionvar = find_induction_var_from_exit_cond (loop);
1230
  exit_cond = get_loop_exit_condition (loop);
1231
 
1232
  if (inductionvar == NULL || exit_cond == NULL)
1233
    {
1234
      if (dump_file && (dump_flags & TDF_DETAILS))
1235
        fprintf (dump_file,
1236
                 "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1237
      return NULL;
1238
    }
1239
 
1240
  test = TREE_OPERAND (exit_cond, 0);
1241
 
1242
  if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE)
1243
    {
1244
 
1245
      if (dump_file && (dump_flags & TDF_DETAILS))
1246
        fprintf (dump_file,
1247
                 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1248
 
1249
      return NULL;
1250
    }
1251
 
1252
  phi = SSA_NAME_DEF_STMT (inductionvar);
1253
  if (TREE_CODE (phi) != PHI_NODE)
1254
    {
1255
      phi = SINGLE_SSA_TREE_OPERAND (phi, SSA_OP_USE);
1256
      if (!phi)
1257
        {
1258
 
1259
          if (dump_file && (dump_flags & TDF_DETAILS))
1260
            fprintf (dump_file,
1261
                     "Unable to convert loop: Cannot find PHI node for induction variable\n");
1262
 
1263
          return NULL;
1264
        }
1265
 
1266
      phi = SSA_NAME_DEF_STMT (phi);
1267
      if (TREE_CODE (phi) != PHI_NODE)
1268
        {
1269
 
1270
          if (dump_file && (dump_flags & TDF_DETAILS))
1271
            fprintf (dump_file,
1272
                     "Unable to convert loop: Cannot find PHI node for induction variable\n");
1273
          return NULL;
1274
        }
1275
 
1276
    }
1277
 
1278
  /* The induction variable name/version we want to put in the array is the
1279
     result of the induction variable phi node.  */
1280
  *ourinductionvar = PHI_RESULT (phi);
1281
  access_fn = instantiate_parameters
1282
    (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1283
  if (access_fn == chrec_dont_know)
1284
    {
1285
      if (dump_file && (dump_flags & TDF_DETAILS))
1286
        fprintf (dump_file,
1287
                 "Unable to convert loop: Access function for induction variable phi is unknown\n");
1288
 
1289
      return NULL;
1290
    }
1291
 
1292
  step = evolution_part_in_loop_num (access_fn, loop->num);
1293
  if (!step || step == chrec_dont_know)
1294
    {
1295
      if (dump_file && (dump_flags & TDF_DETAILS))
1296
        fprintf (dump_file,
1297
                 "Unable to convert loop: Cannot determine step of loop.\n");
1298
 
1299
      return NULL;
1300
    }
1301
  if (TREE_CODE (step) != INTEGER_CST)
1302
    {
1303
 
1304
      if (dump_file && (dump_flags & TDF_DETAILS))
1305
        fprintf (dump_file,
1306
                 "Unable to convert loop: Step of loop is not integer.\n");
1307
      return NULL;
1308
    }
1309
 
1310
  stepint = TREE_INT_CST_LOW (step);
1311
 
1312
  /* Only want phis for induction vars, which will have two
1313
     arguments.  */
1314
  if (PHI_NUM_ARGS (phi) != 2)
1315
    {
1316
      if (dump_file && (dump_flags & TDF_DETAILS))
1317
        fprintf (dump_file,
1318
                 "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1319
      return NULL;
1320
    }
1321
 
1322
  /* Another induction variable check. One argument's source should be
1323
     in the loop, one outside the loop.  */
1324
  if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src)
1325
      && flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 1)->src))
1326
    {
1327
 
1328
      if (dump_file && (dump_flags & TDF_DETAILS))
1329
        fprintf (dump_file,
1330
                 "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1331
 
1332
      return NULL;
1333
    }
1334
 
1335
  if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src))
1336
    {
1337
      lboundvar = PHI_ARG_DEF (phi, 1);
1338
      lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1339
                                              outerinductionvars, *invariants,
1340
                                              0);
1341
    }
1342
  else
1343
    {
1344
      lboundvar = PHI_ARG_DEF (phi, 0);
1345
      lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1346
                                              outerinductionvars, *invariants,
1347
                                              0);
1348
    }
1349
 
1350
  if (!lbound)
1351
    {
1352
 
1353
      if (dump_file && (dump_flags & TDF_DETAILS))
1354
        fprintf (dump_file,
1355
                 "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1356
 
1357
      return NULL;
1358
    }
1359
  /* One part of the test may be a loop invariant tree.  */
1360
  VEC_reserve (tree, heap, *invariants, 1);
1361
  if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME
1362
      && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 1)))
1363
    VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 1));
1364
  else if (TREE_CODE (TREE_OPERAND (test, 0)) == SSA_NAME
1365
           && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 0)))
1366
    VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 0));
1367
 
1368
  /* The non-induction variable part of the test is the upper bound variable.
1369
   */
1370
  if (TREE_OPERAND (test, 0) == inductionvar)
1371
    uboundvar = TREE_OPERAND (test, 1);
1372
  else
1373
    uboundvar = TREE_OPERAND (test, 0);
1374
 
1375
 
1376
  /* We only size the vectors assuming we have, at max, 2 times as many
1377
     invariants as we do loops (one for each bound).
1378
     This is just an arbitrary number, but it has to be matched against the
1379
     code below.  */
1380
  gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
1381
 
1382
 
1383
  /* We might have some leftover.  */
1384
  if (TREE_CODE (test) == LT_EXPR)
1385
    extra = -1 * stepint;
1386
  else if (TREE_CODE (test) == NE_EXPR)
1387
    extra = -1 * stepint;
1388
  else if (TREE_CODE (test) == GT_EXPR)
1389
    extra = -1 * stepint;
1390
  else if (TREE_CODE (test) == EQ_EXPR)
1391
    extra = 1 * stepint;
1392
 
1393
  ubound = gcc_tree_to_linear_expression (depth, uboundvar,
1394
                                          outerinductionvars,
1395
                                          *invariants, extra);
1396
  uboundresult = build2 (PLUS_EXPR, TREE_TYPE (uboundvar), uboundvar,
1397
                         build_int_cst (TREE_TYPE (uboundvar), extra));
1398
  VEC_safe_push (tree, heap, *uboundvars, uboundresult);
1399
  VEC_safe_push (tree, heap, *lboundvars, lboundvar);
1400
  VEC_safe_push (int, heap, *steps, stepint);
1401
  if (!ubound)
1402
    {
1403
      if (dump_file && (dump_flags & TDF_DETAILS))
1404
        fprintf (dump_file,
1405
                 "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1406
      return NULL;
1407
    }
1408
 
1409
  lloop = lambda_loop_new ();
1410
  LL_STEP (lloop) = stepint;
1411
  LL_LOWER_BOUND (lloop) = lbound;
1412
  LL_UPPER_BOUND (lloop) = ubound;
1413
  return lloop;
1414
}
1415
 
1416
/* Given a LOOP, find the induction variable it is testing against in the exit
1417
   condition.  Return the induction variable if found, NULL otherwise.  */
1418
 
1419
static tree
1420
find_induction_var_from_exit_cond (struct loop *loop)
1421
{
1422
  tree expr = get_loop_exit_condition (loop);
1423
  tree ivarop;
1424
  tree test;
1425
  if (expr == NULL_TREE)
1426
    return NULL_TREE;
1427
  if (TREE_CODE (expr) != COND_EXPR)
1428
    return NULL_TREE;
1429
  test = TREE_OPERAND (expr, 0);
1430
  if (!COMPARISON_CLASS_P (test))
1431
    return NULL_TREE;
1432
 
1433
  /* Find the side that is invariant in this loop. The ivar must be the other
1434
     side.  */
1435
 
1436
  if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 0)))
1437
      ivarop = TREE_OPERAND (test, 1);
1438
  else if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 1)))
1439
      ivarop = TREE_OPERAND (test, 0);
1440
  else
1441
    return NULL_TREE;
1442
 
1443
  if (TREE_CODE (ivarop) != SSA_NAME)
1444
    return NULL_TREE;
1445
  return ivarop;
1446
}
1447
 
1448
DEF_VEC_P(lambda_loop);
1449
DEF_VEC_ALLOC_P(lambda_loop,heap);
1450
 
1451
/* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1452
   Return the new loop nest.
1453
   INDUCTIONVARS is a pointer to an array of induction variables for the
1454
   loopnest that will be filled in during this process.
1455
   INVARIANTS is a pointer to an array of invariants that will be filled in
1456
   during this process.  */
1457
 
1458
lambda_loopnest
1459
gcc_loopnest_to_lambda_loopnest (struct loops *currloops,
1460
                                 struct loop *loop_nest,
1461
                                 VEC(tree,heap) **inductionvars,
1462
                                 VEC(tree,heap) **invariants)
1463
{
1464
  lambda_loopnest ret = NULL;
1465
  struct loop *temp = loop_nest;
1466
  int depth = depth_of_nest (loop_nest);
1467
  size_t i;
1468
  VEC(lambda_loop,heap) *loops = NULL;
1469
  VEC(tree,heap) *uboundvars = NULL;
1470
  VEC(tree,heap) *lboundvars  = NULL;
1471
  VEC(int,heap) *steps = NULL;
1472
  lambda_loop newloop;
1473
  tree inductionvar = NULL;
1474
  bool perfect_nest = perfect_nest_p (loop_nest);
1475
 
1476
  if (!perfect_nest && !can_convert_to_perfect_nest (loop_nest))
1477
    goto fail;
1478
 
1479
  while (temp)
1480
    {
1481
      newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
1482
                                         &inductionvar, *inductionvars,
1483
                                         &lboundvars, &uboundvars,
1484
                                         &steps);
1485
      if (!newloop)
1486
        goto fail;
1487
 
1488
      VEC_safe_push (tree, heap, *inductionvars, inductionvar);
1489
      VEC_safe_push (lambda_loop, heap, loops, newloop);
1490
      temp = temp->inner;
1491
    }
1492
 
1493
  if (!perfect_nest)
1494
    {
1495
      if (!perfect_nestify (currloops, loop_nest,
1496
                            lboundvars, uboundvars, steps, *inductionvars))
1497
        {
1498
          if (dump_file)
1499
            fprintf (dump_file,
1500
                     "Not a perfect loop nest and couldn't convert to one.\n");
1501
          goto fail;
1502
        }
1503
      else if (dump_file)
1504
        fprintf (dump_file,
1505
                 "Successfully converted loop nest to perfect loop nest.\n");
1506
    }
1507
 
1508
  ret = lambda_loopnest_new (depth, 2 * depth);
1509
 
1510
  for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
1511
    LN_LOOPS (ret)[i] = newloop;
1512
 
1513
 fail:
1514
  VEC_free (lambda_loop, heap, loops);
1515
  VEC_free (tree, heap, uboundvars);
1516
  VEC_free (tree, heap, lboundvars);
1517
  VEC_free (int, heap, steps);
1518
 
1519
  return ret;
1520
}
1521
 
1522
/* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1523
   STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1524
   inserted for us are stored.  INDUCTION_VARS is the array of induction
1525
   variables for the loop this LBV is from.  TYPE is the tree type to use for
1526
   the variables and trees involved.  */
1527
 
1528
static tree
1529
lbv_to_gcc_expression (lambda_body_vector lbv,
1530
                       tree type, VEC(tree,heap) *induction_vars,
1531
                       tree *stmts_to_insert)
1532
{
1533
  tree stmts, stmt, resvar, name;
1534
  tree iv;
1535
  size_t i;
1536
  tree_stmt_iterator tsi;
1537
 
1538
  /* Create a statement list and a linear expression temporary.  */
1539
  stmts = alloc_stmt_list ();
1540
  resvar = create_tmp_var (type, "lbvtmp");
1541
  add_referenced_var (resvar);
1542
 
1543
  /* Start at 0.  */
1544
  stmt = build2 (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1545
  name = make_ssa_name (resvar, stmt);
1546
  TREE_OPERAND (stmt, 0) = name;
1547
  tsi = tsi_last (stmts);
1548
  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1549
 
1550
  for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1551
    {
1552
      if (LBV_COEFFICIENTS (lbv)[i] != 0)
1553
        {
1554
          tree newname;
1555
          tree coeffmult;
1556
 
1557
          /* newname = coefficient * induction_variable */
1558
          coeffmult = build_int_cst (type, LBV_COEFFICIENTS (lbv)[i]);
1559
          stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1560
                         fold_build2 (MULT_EXPR, type, iv, coeffmult));
1561
 
1562
          newname = make_ssa_name (resvar, stmt);
1563
          TREE_OPERAND (stmt, 0) = newname;
1564
          fold_stmt (&stmt);
1565
          tsi = tsi_last (stmts);
1566
          tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1567
 
1568
          /* name = name + newname */
1569
          stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1570
                         build2 (PLUS_EXPR, type, name, newname));
1571
          name = make_ssa_name (resvar, stmt);
1572
          TREE_OPERAND (stmt, 0) = name;
1573
          fold_stmt (&stmt);
1574
          tsi = tsi_last (stmts);
1575
          tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1576
 
1577
        }
1578
    }
1579
 
1580
  /* Handle any denominator that occurs.  */
1581
  if (LBV_DENOMINATOR (lbv) != 1)
1582
    {
1583
      tree denominator = build_int_cst (type, LBV_DENOMINATOR (lbv));
1584
      stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1585
                     build2 (CEIL_DIV_EXPR, type, name, denominator));
1586
      name = make_ssa_name (resvar, stmt);
1587
      TREE_OPERAND (stmt, 0) = name;
1588
      fold_stmt (&stmt);
1589
      tsi = tsi_last (stmts);
1590
      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1591
    }
1592
  *stmts_to_insert = stmts;
1593
  return name;
1594
}
1595
 
1596
/* Convert a linear expression from coefficient and constant form to a
1597
   gcc tree.
1598
   Return the tree that represents the final value of the expression.
1599
   LLE is the linear expression to convert.
1600
   OFFSET is the linear offset to apply to the expression.
1601
   TYPE is the tree type to use for the variables and math.
1602
   INDUCTION_VARS is a vector of induction variables for the loops.
1603
   INVARIANTS is a vector of the loop nest invariants.
1604
   WRAP specifies what tree code to wrap the results in, if there is more than
1605
   one (it is either MAX_EXPR, or MIN_EXPR).
1606
   STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1607
   statements that need to be inserted for the linear expression.  */
1608
 
1609
static tree
1610
lle_to_gcc_expression (lambda_linear_expression lle,
1611
                       lambda_linear_expression offset,
1612
                       tree type,
1613
                       VEC(tree,heap) *induction_vars,
1614
                       VEC(tree,heap) *invariants,
1615
                       enum tree_code wrap, tree *stmts_to_insert)
1616
{
1617
  tree stmts, stmt, resvar, name;
1618
  size_t i;
1619
  tree_stmt_iterator tsi;
1620
  tree iv, invar;
1621
  VEC(tree,heap) *results = NULL;
1622
 
1623
  gcc_assert (wrap == MAX_EXPR || wrap == MIN_EXPR);
1624
  name = NULL_TREE;
1625
  /* Create a statement list and a linear expression temporary.  */
1626
  stmts = alloc_stmt_list ();
1627
  resvar = create_tmp_var (type, "lletmp");
1628
  add_referenced_var (resvar);
1629
 
1630
  /* Build up the linear expressions, and put the variable representing the
1631
     result in the results array.  */
1632
  for (; lle != NULL; lle = LLE_NEXT (lle))
1633
    {
1634
      /* Start at name = 0.  */
1635
      stmt = build2 (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1636
      name = make_ssa_name (resvar, stmt);
1637
      TREE_OPERAND (stmt, 0) = name;
1638
      fold_stmt (&stmt);
1639
      tsi = tsi_last (stmts);
1640
      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1641
 
1642
      /* First do the induction variables.
1643
         at the end, name = name + all the induction variables added
1644
         together.  */
1645
      for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1646
        {
1647
          if (LLE_COEFFICIENTS (lle)[i] != 0)
1648
            {
1649
              tree newname;
1650
              tree mult;
1651
              tree coeff;
1652
 
1653
              /* mult = induction variable * coefficient.  */
1654
              if (LLE_COEFFICIENTS (lle)[i] == 1)
1655
                {
1656
                  mult = VEC_index (tree, induction_vars, i);
1657
                }
1658
              else
1659
                {
1660
                  coeff = build_int_cst (type,
1661
                                         LLE_COEFFICIENTS (lle)[i]);
1662
                  mult = fold_build2 (MULT_EXPR, type, iv, coeff);
1663
                }
1664
 
1665
              /* newname = mult */
1666
              stmt = build2 (MODIFY_EXPR, void_type_node, resvar, mult);
1667
              newname = make_ssa_name (resvar, stmt);
1668
              TREE_OPERAND (stmt, 0) = newname;
1669
              fold_stmt (&stmt);
1670
              tsi = tsi_last (stmts);
1671
              tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1672
 
1673
              /* name = name + newname */
1674
              stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1675
                             build2 (PLUS_EXPR, type, name, newname));
1676
              name = make_ssa_name (resvar, stmt);
1677
              TREE_OPERAND (stmt, 0) = name;
1678
              fold_stmt (&stmt);
1679
              tsi = tsi_last (stmts);
1680
              tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1681
            }
1682
        }
1683
 
1684
      /* Handle our invariants.
1685
         At the end, we have name = name + result of adding all multiplied
1686
         invariants.  */
1687
      for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1688
        {
1689
          if (LLE_INVARIANT_COEFFICIENTS (lle)[i] != 0)
1690
            {
1691
              tree newname;
1692
              tree mult;
1693
              tree coeff;
1694
              int invcoeff = LLE_INVARIANT_COEFFICIENTS (lle)[i];
1695
              /* mult = invariant * coefficient  */
1696
              if (invcoeff == 1)
1697
                {
1698
                  mult = invar;
1699
                }
1700
              else
1701
                {
1702
                  coeff = build_int_cst (type, invcoeff);
1703
                  mult = fold_build2 (MULT_EXPR, type, invar, coeff);
1704
                }
1705
 
1706
              /* newname = mult */
1707
              stmt = build2 (MODIFY_EXPR, void_type_node, resvar, mult);
1708
              newname = make_ssa_name (resvar, stmt);
1709
              TREE_OPERAND (stmt, 0) = newname;
1710
              fold_stmt (&stmt);
1711
              tsi = tsi_last (stmts);
1712
              tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1713
 
1714
              /* name = name + newname */
1715
              stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1716
                             build2 (PLUS_EXPR, type, name, newname));
1717
              name = make_ssa_name (resvar, stmt);
1718
              TREE_OPERAND (stmt, 0) = name;
1719
              fold_stmt (&stmt);
1720
              tsi = tsi_last (stmts);
1721
              tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1722
            }
1723
        }
1724
 
1725
      /* Now handle the constant.
1726
         name = name + constant.  */
1727
      if (LLE_CONSTANT (lle) != 0)
1728
        {
1729
          stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1730
                         build2 (PLUS_EXPR, type, name,
1731
                                 build_int_cst (type, LLE_CONSTANT (lle))));
1732
          name = make_ssa_name (resvar, stmt);
1733
          TREE_OPERAND (stmt, 0) = name;
1734
          fold_stmt (&stmt);
1735
          tsi = tsi_last (stmts);
1736
          tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1737
        }
1738
 
1739
      /* Now handle the offset.
1740
         name = name + linear offset.  */
1741
      if (LLE_CONSTANT (offset) != 0)
1742
        {
1743
          stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1744
                         build2 (PLUS_EXPR, type, name,
1745
                                 build_int_cst (type, LLE_CONSTANT (offset))));
1746
          name = make_ssa_name (resvar, stmt);
1747
          TREE_OPERAND (stmt, 0) = name;
1748
          fold_stmt (&stmt);
1749
          tsi = tsi_last (stmts);
1750
          tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1751
        }
1752
 
1753
      /* Handle any denominator that occurs.  */
1754
      if (LLE_DENOMINATOR (lle) != 1)
1755
        {
1756
          stmt = build_int_cst (type, LLE_DENOMINATOR (lle));
1757
          stmt = build2 (wrap == MAX_EXPR ? CEIL_DIV_EXPR : FLOOR_DIV_EXPR,
1758
                         type, name, stmt);
1759
          stmt = build2 (MODIFY_EXPR, void_type_node, resvar, stmt);
1760
 
1761
          /* name = {ceil, floor}(name/denominator) */
1762
          name = make_ssa_name (resvar, stmt);
1763
          TREE_OPERAND (stmt, 0) = name;
1764
          tsi = tsi_last (stmts);
1765
          tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1766
        }
1767
      VEC_safe_push (tree, heap, results, name);
1768
    }
1769
 
1770
  /* Again, out of laziness, we don't handle this case yet.  It's not
1771
     hard, it just hasn't occurred.  */
1772
  gcc_assert (VEC_length (tree, results) <= 2);
1773
 
1774
  /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR.  */
1775
  if (VEC_length (tree, results) > 1)
1776
    {
1777
      tree op1 = VEC_index (tree, results, 0);
1778
      tree op2 = VEC_index (tree, results, 1);
1779
      stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1780
                     build2 (wrap, type, op1, op2));
1781
      name = make_ssa_name (resvar, stmt);
1782
      TREE_OPERAND (stmt, 0) = name;
1783
      tsi = tsi_last (stmts);
1784
      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1785
    }
1786
 
1787
  VEC_free (tree, heap, results);
1788
 
1789
  *stmts_to_insert = stmts;
1790
  return name;
1791
}
1792
 
1793
/* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1794
   it, back into gcc code.  This changes the
1795
   loops, their induction variables, and their bodies, so that they
1796
   match the transformed loopnest.
1797
   OLD_LOOPNEST is the loopnest before we've replaced it with the new
1798
   loopnest.
1799
   OLD_IVS is a vector of induction variables from the old loopnest.
1800
   INVARIANTS is a vector of loop invariants from the old loopnest.
1801
   NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1802
   TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1803
   NEW_LOOPNEST.  */
1804
 
1805
void
1806
lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
1807
                                 VEC(tree,heap) *old_ivs,
1808
                                 VEC(tree,heap) *invariants,
1809
                                 lambda_loopnest new_loopnest,
1810
                                 lambda_trans_matrix transform)
1811
{
1812
  struct loop *temp;
1813
  size_t i = 0;
1814
  size_t depth = 0;
1815
  VEC(tree,heap) *new_ivs = NULL;
1816
  tree oldiv;
1817
 
1818
  block_stmt_iterator bsi;
1819
 
1820
  if (dump_file)
1821
    {
1822
      transform = lambda_trans_matrix_inverse (transform);
1823
      fprintf (dump_file, "Inverse of transformation matrix:\n");
1824
      print_lambda_trans_matrix (dump_file, transform);
1825
    }
1826
  depth = depth_of_nest (old_loopnest);
1827
  temp = old_loopnest;
1828
 
1829
  while (temp)
1830
    {
1831
      lambda_loop newloop;
1832
      basic_block bb;
1833
      edge exit;
1834
      tree ivvar, ivvarinced, exitcond, stmts;
1835
      enum tree_code testtype;
1836
      tree newupperbound, newlowerbound;
1837
      lambda_linear_expression offset;
1838
      tree type;
1839
      bool insert_after;
1840
      tree inc_stmt;
1841
 
1842
      oldiv = VEC_index (tree, old_ivs, i);
1843
      type = TREE_TYPE (oldiv);
1844
 
1845
      /* First, build the new induction variable temporary  */
1846
 
1847
      ivvar = create_tmp_var (type, "lnivtmp");
1848
      add_referenced_var (ivvar);
1849
 
1850
      VEC_safe_push (tree, heap, new_ivs, ivvar);
1851
 
1852
      newloop = LN_LOOPS (new_loopnest)[i];
1853
 
1854
      /* Linear offset is a bit tricky to handle.  Punt on the unhandled
1855
         cases for now.  */
1856
      offset = LL_LINEAR_OFFSET (newloop);
1857
 
1858
      gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
1859
                  lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
1860
 
1861
      /* Now build the  new lower bounds, and insert the statements
1862
         necessary to generate it on the loop preheader.  */
1863
      newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1864
                                             LL_LINEAR_OFFSET (newloop),
1865
                                             type,
1866
                                             new_ivs,
1867
                                             invariants, MAX_EXPR, &stmts);
1868
      bsi_insert_on_edge (loop_preheader_edge (temp), stmts);
1869
      bsi_commit_edge_inserts ();
1870
      /* Build the new upper bound and insert its statements in the
1871
         basic block of the exit condition */
1872
      newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
1873
                                             LL_LINEAR_OFFSET (newloop),
1874
                                             type,
1875
                                             new_ivs,
1876
                                             invariants, MIN_EXPR, &stmts);
1877
      exit = temp->single_exit;
1878
      exitcond = get_loop_exit_condition (temp);
1879
      bb = bb_for_stmt (exitcond);
1880
      bsi = bsi_start (bb);
1881
      bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
1882
 
1883
      /* Create the new iv.  */
1884
 
1885
      standard_iv_increment_position (temp, &bsi, &insert_after);
1886
      create_iv (newlowerbound,
1887
                 build_int_cst (type, LL_STEP (newloop)),
1888
                 ivvar, temp, &bsi, insert_after, &ivvar,
1889
                 NULL);
1890
 
1891
      /* Unfortunately, the incremented ivvar that create_iv inserted may not
1892
         dominate the block containing the exit condition.
1893
         So we simply create our own incremented iv to use in the new exit
1894
         test,  and let redundancy elimination sort it out.  */
1895
      inc_stmt = build2 (PLUS_EXPR, type,
1896
                         ivvar, build_int_cst (type, LL_STEP (newloop)));
1897
      inc_stmt = build2 (MODIFY_EXPR, void_type_node, SSA_NAME_VAR (ivvar),
1898
                         inc_stmt);
1899
      ivvarinced = make_ssa_name (SSA_NAME_VAR (ivvar), inc_stmt);
1900
      TREE_OPERAND (inc_stmt, 0) = ivvarinced;
1901
      bsi = bsi_for_stmt (exitcond);
1902
      bsi_insert_before (&bsi, inc_stmt, BSI_SAME_STMT);
1903
 
1904
      /* Replace the exit condition with the new upper bound
1905
         comparison.  */
1906
 
1907
      testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
1908
 
1909
      /* We want to build a conditional where true means exit the loop, and
1910
         false means continue the loop.
1911
         So swap the testtype if this isn't the way things are.*/
1912
 
1913
      if (exit->flags & EDGE_FALSE_VALUE)
1914
        testtype = swap_tree_comparison (testtype);
1915
 
1916
      COND_EXPR_COND (exitcond) = build2 (testtype,
1917
                                          boolean_type_node,
1918
                                          newupperbound, ivvarinced);
1919
      update_stmt (exitcond);
1920
      VEC_replace (tree, new_ivs, i, ivvar);
1921
 
1922
      i++;
1923
      temp = temp->inner;
1924
    }
1925
 
1926
  /* Rewrite uses of the old ivs so that they are now specified in terms of
1927
     the new ivs.  */
1928
 
1929
  for (i = 0; VEC_iterate (tree, old_ivs, i, oldiv); i++)
1930
    {
1931
      imm_use_iterator imm_iter;
1932
      use_operand_p use_p;
1933
      tree oldiv_def;
1934
      tree oldiv_stmt = SSA_NAME_DEF_STMT (oldiv);
1935
      tree stmt;
1936
 
1937
      if (TREE_CODE (oldiv_stmt) == PHI_NODE)
1938
        oldiv_def = PHI_RESULT (oldiv_stmt);
1939
      else
1940
        oldiv_def = SINGLE_SSA_TREE_OPERAND (oldiv_stmt, SSA_OP_DEF);
1941
      gcc_assert (oldiv_def != NULL_TREE);
1942
 
1943
      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, oldiv_def)
1944
        {
1945
          tree newiv, stmts;
1946
          lambda_body_vector lbv, newlbv;
1947
 
1948
          gcc_assert (TREE_CODE (stmt) != PHI_NODE);
1949
 
1950
          /* Compute the new expression for the induction
1951
             variable.  */
1952
          depth = VEC_length (tree, new_ivs);
1953
          lbv = lambda_body_vector_new (depth);
1954
          LBV_COEFFICIENTS (lbv)[i] = 1;
1955
 
1956
          newlbv = lambda_body_vector_compute_new (transform, lbv);
1957
 
1958
          newiv = lbv_to_gcc_expression (newlbv, TREE_TYPE (oldiv),
1959
                                         new_ivs, &stmts);
1960
          bsi = bsi_for_stmt (stmt);
1961
          /* Insert the statements to build that
1962
             expression.  */
1963
          bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
1964
 
1965
          FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1966
            propagate_value (use_p, newiv);
1967
          update_stmt (stmt);
1968
        }
1969
    }
1970
  VEC_free (tree, heap, new_ivs);
1971
}
1972
 
1973
/* Return TRUE if this is not interesting statement from the perspective of
1974
   determining if we have a perfect loop nest.  */
1975
 
1976
static bool
1977
not_interesting_stmt (tree stmt)
1978
{
1979
  /* Note that COND_EXPR's aren't interesting because if they were exiting the
1980
     loop, we would have already failed the number of exits tests.  */
1981
  if (TREE_CODE (stmt) == LABEL_EXPR
1982
      || TREE_CODE (stmt) == GOTO_EXPR
1983
      || TREE_CODE (stmt) == COND_EXPR)
1984
    return true;
1985
  return false;
1986
}
1987
 
1988
/* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP.  */
1989
 
1990
static bool
1991
phi_loop_edge_uses_def (struct loop *loop, tree phi, tree def)
1992
{
1993
  int i;
1994
  for (i = 0; i < PHI_NUM_ARGS (phi); i++)
1995
    if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, i)->src))
1996
      if (PHI_ARG_DEF (phi, i) == def)
1997
        return true;
1998
  return false;
1999
}
2000
 
2001
/* Return TRUE if STMT is a use of PHI_RESULT.  */
2002
 
2003
static bool
2004
stmt_uses_phi_result (tree stmt, tree phi_result)
2005
{
2006
  tree use = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
2007
 
2008
  /* This is conservatively true, because we only want SIMPLE bumpers
2009
     of the form x +- constant for our pass.  */
2010
  return (use == phi_result);
2011
}
2012
 
2013
/* STMT is a bumper stmt for LOOP if the version it defines is used in the
2014
   in-loop-edge in a phi node, and the operand it uses is the result of that
2015
   phi node.
2016
   I.E. i_29 = i_3 + 1
2017
        i_3 = PHI (0, i_29);  */
2018
 
2019
static bool
2020
stmt_is_bumper_for_loop (struct loop *loop, tree stmt)
2021
{
2022
  tree use;
2023
  tree def;
2024
  imm_use_iterator iter;
2025
  use_operand_p use_p;
2026
 
2027
  def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2028
  if (!def)
2029
    return false;
2030
 
2031
  FOR_EACH_IMM_USE_FAST (use_p, iter, def)
2032
    {
2033
      use = USE_STMT (use_p);
2034
      if (TREE_CODE (use) == PHI_NODE)
2035
        {
2036
          if (phi_loop_edge_uses_def (loop, use, def))
2037
            if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
2038
              return true;
2039
        }
2040
    }
2041
  return false;
2042
}
2043
 
2044
 
2045
/* Return true if LOOP is a perfect loop nest.
2046
   Perfect loop nests are those loop nests where all code occurs in the
2047
   innermost loop body.
2048
   If S is a program statement, then
2049
 
2050
   i.e.
2051
   DO I = 1, 20
2052
       S1
2053
       DO J = 1, 20
2054
       ...
2055
       END DO
2056
   END DO
2057
   is not a perfect loop nest because of S1.
2058
 
2059
   DO I = 1, 20
2060
      DO J = 1, 20
2061
        S1
2062
        ...
2063
      END DO
2064
   END DO
2065
   is a perfect loop nest.
2066
 
2067
   Since we don't have high level loops anymore, we basically have to walk our
2068
   statements and ignore those that are there because the loop needs them (IE
2069
   the induction variable increment, and jump back to the top of the loop).  */
2070
 
2071
bool
2072
perfect_nest_p (struct loop *loop)
2073
{
2074
  basic_block *bbs;
2075
  size_t i;
2076
  tree exit_cond;
2077
 
2078
  if (!loop->inner)
2079
    return true;
2080
  bbs = get_loop_body (loop);
2081
  exit_cond = get_loop_exit_condition (loop);
2082
  for (i = 0; i < loop->num_nodes; i++)
2083
    {
2084
      if (bbs[i]->loop_father == loop)
2085
        {
2086
          block_stmt_iterator bsi;
2087
          for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2088
            {
2089
              tree stmt = bsi_stmt (bsi);
2090
              if (stmt == exit_cond
2091
                  || not_interesting_stmt (stmt)
2092
                  || stmt_is_bumper_for_loop (loop, stmt))
2093
                continue;
2094
              free (bbs);
2095
              return false;
2096
            }
2097
        }
2098
    }
2099
  free (bbs);
2100
  /* See if the inner loops are perfectly nested as well.  */
2101
  if (loop->inner)
2102
    return perfect_nest_p (loop->inner);
2103
  return true;
2104
}
2105
 
2106
/* Replace the USES of X in STMT, or uses with the same step as X with Y.
2107
   YINIT is the initial value of Y, REPLACEMENTS is a hash table to
2108
   avoid creating duplicate temporaries and FIRSTBSI is statement
2109
   iterator where new temporaries should be inserted at the beginning
2110
   of body basic block.  */
2111
 
2112
static void
2113
replace_uses_equiv_to_x_with_y (struct loop *loop, tree stmt, tree x,
2114
                                int xstep, tree y, tree yinit,
2115
                                htab_t replacements,
2116
                                block_stmt_iterator *firstbsi)
2117
{
2118
  ssa_op_iter iter;
2119
  use_operand_p use_p;
2120
 
2121
  FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
2122
    {
2123
      tree use = USE_FROM_PTR (use_p);
2124
      tree step = NULL_TREE;
2125
      tree scev, init, val, var, setstmt;
2126
      struct tree_map *h, in;
2127
      void **loc;
2128
 
2129
      /* Replace uses of X with Y right away.  */
2130
      if (use == x)
2131
        {
2132
          SET_USE (use_p, y);
2133
          continue;
2134
        }
2135
 
2136
      scev = instantiate_parameters (loop,
2137
                                     analyze_scalar_evolution (loop, use));
2138
 
2139
      if (scev == NULL || scev == chrec_dont_know)
2140
        continue;
2141
 
2142
      step = evolution_part_in_loop_num (scev, loop->num);
2143
      if (step == NULL
2144
          || step == chrec_dont_know
2145
          || TREE_CODE (step) != INTEGER_CST
2146
          || int_cst_value (step) != xstep)
2147
        continue;
2148
 
2149
      /* Use REPLACEMENTS hash table to cache already created
2150
         temporaries.  */
2151
      in.hash = htab_hash_pointer (use);
2152
      in.from = use;
2153
      h = htab_find_with_hash (replacements, &in, in.hash);
2154
      if (h != NULL)
2155
        {
2156
          SET_USE (use_p, h->to);
2157
          continue;
2158
        }
2159
 
2160
      /* USE which has the same step as X should be replaced
2161
         with a temporary set to Y + YINIT - INIT.  */
2162
      init = initial_condition_in_loop_num (scev, loop->num);
2163
      gcc_assert (init != NULL && init != chrec_dont_know);
2164
      if (TREE_TYPE (use) == TREE_TYPE (y))
2165
        {
2166
          val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), init, yinit);
2167
          val = fold_build2 (PLUS_EXPR, TREE_TYPE (y), y, val);
2168
          if (val == y)
2169
            {
2170
              /* If X has the same type as USE, the same step
2171
                 and same initial value, it can be replaced by Y.  */
2172
              SET_USE (use_p, y);
2173
              continue;
2174
            }
2175
        }
2176
      else
2177
        {
2178
          val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), y, yinit);
2179
          val = fold_convert (TREE_TYPE (use), val);
2180
          val = fold_build2 (PLUS_EXPR, TREE_TYPE (use), val, init);
2181
        }
2182
 
2183
      /* Create a temporary variable and insert it at the beginning
2184
         of the loop body basic block, right after the PHI node
2185
         which sets Y.  */
2186
      var = create_tmp_var (TREE_TYPE (use), "perfecttmp");
2187
      add_referenced_var (var);
2188
      val = force_gimple_operand_bsi (firstbsi, val, false, NULL);
2189
      setstmt = build2 (MODIFY_EXPR, void_type_node, var, val);
2190
      var = make_ssa_name (var, setstmt);
2191
      TREE_OPERAND (setstmt, 0) = var;
2192
      bsi_insert_before (firstbsi, setstmt, BSI_SAME_STMT);
2193
      update_stmt (setstmt);
2194
      SET_USE (use_p, var);
2195
      h = ggc_alloc (sizeof (struct tree_map));
2196
      h->hash = in.hash;
2197
      h->from = use;
2198
      h->to = var;
2199
      loc = htab_find_slot_with_hash (replacements, h, in.hash, INSERT);
2200
      gcc_assert ((*(struct tree_map **)loc) == NULL);
2201
      *(struct tree_map **) loc = h;
2202
    }
2203
}
2204
 
2205
/* Return true if STMT is an exit PHI for LOOP */
2206
 
2207
static bool
2208
exit_phi_for_loop_p (struct loop *loop, tree stmt)
2209
{
2210
 
2211
  if (TREE_CODE (stmt) != PHI_NODE
2212
      || PHI_NUM_ARGS (stmt) != 1
2213
      || bb_for_stmt (stmt) != loop->single_exit->dest)
2214
    return false;
2215
 
2216
  return true;
2217
}
2218
 
2219
/* Return true if STMT can be put back into the loop INNER, by
2220
   copying it to the beginning of that loop and changing the uses.  */
2221
 
2222
static bool
2223
can_put_in_inner_loop (struct loop *inner, tree stmt)
2224
{
2225
  imm_use_iterator imm_iter;
2226
  use_operand_p use_p;
2227
 
2228
  gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
2229
  if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS)
2230
      || !expr_invariant_in_loop_p (inner, TREE_OPERAND (stmt, 1)))
2231
    return false;
2232
 
2233
  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, TREE_OPERAND (stmt, 0))
2234
    {
2235
      if (!exit_phi_for_loop_p (inner, USE_STMT (use_p)))
2236
        {
2237
          basic_block immbb = bb_for_stmt (USE_STMT (use_p));
2238
 
2239
          if (!flow_bb_inside_loop_p (inner, immbb))
2240
            return false;
2241
        }
2242
    }
2243
  return true;
2244
}
2245
 
2246
/* Return true if STMT can be put *after* the inner loop of LOOP.  */
2247
static bool
2248
can_put_after_inner_loop (struct loop *loop, tree stmt)
2249
{
2250
  imm_use_iterator imm_iter;
2251
  use_operand_p use_p;
2252
 
2253
  if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
2254
    return false;
2255
 
2256
  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, TREE_OPERAND (stmt, 0))
2257
    {
2258
      if (!exit_phi_for_loop_p (loop, USE_STMT (use_p)))
2259
        {
2260
          basic_block immbb = bb_for_stmt (USE_STMT (use_p));
2261
 
2262
          if (!dominated_by_p (CDI_DOMINATORS,
2263
                               immbb,
2264
                               loop->inner->header)
2265
              && !can_put_in_inner_loop (loop->inner, stmt))
2266
            return false;
2267
        }
2268
    }
2269
  return true;
2270
}
2271
 
2272
 
2273
 
2274
/* Return TRUE if LOOP is an imperfect nest that we can convert to a
2275
   perfect one.  At the moment, we only handle imperfect nests of
2276
   depth 2, where all of the statements occur after the inner loop.  */
2277
 
2278
static bool
2279
can_convert_to_perfect_nest (struct loop *loop)
2280
{
2281
  basic_block *bbs;
2282
  tree exit_condition, phi;
2283
  size_t i;
2284
  block_stmt_iterator bsi;
2285
  basic_block exitdest;
2286
 
2287
  /* Can't handle triply nested+ loops yet.  */
2288
  if (!loop->inner || loop->inner->inner)
2289
    return false;
2290
 
2291
  bbs = get_loop_body (loop);
2292
  exit_condition = get_loop_exit_condition (loop);
2293
  for (i = 0; i < loop->num_nodes; i++)
2294
    {
2295
      if (bbs[i]->loop_father == loop)
2296
        {
2297
          for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2298
            {
2299
              tree stmt = bsi_stmt (bsi);
2300
 
2301
              if (stmt == exit_condition
2302
                  || not_interesting_stmt (stmt)
2303
                  || stmt_is_bumper_for_loop (loop, stmt))
2304
                continue;
2305
 
2306
              /* If this is a scalar operation that can be put back
2307
                 into the inner loop, or after the inner loop, through
2308
                 copying, then do so. This works on the theory that
2309
                 any amount of scalar code we have to reduplicate
2310
                 into or after the loops is less expensive that the
2311
                 win we get from rearranging the memory walk
2312
                 the loop is doing so that it has better
2313
                 cache behavior.  */
2314
              if (TREE_CODE (stmt) == MODIFY_EXPR)
2315
                {
2316
                  use_operand_p use_a, use_b;
2317
                  imm_use_iterator imm_iter;
2318
                  ssa_op_iter op_iter, op_iter1;
2319
                  tree op0 = TREE_OPERAND (stmt, 0);
2320
                  tree scev = instantiate_parameters
2321
                    (loop, analyze_scalar_evolution (loop, op0));
2322
 
2323
                  /* If the IV is simple, it can be duplicated.  */
2324
                  if (!automatically_generated_chrec_p (scev))
2325
                    {
2326
                      tree step = evolution_part_in_loop_num (scev, loop->num);
2327
                      if (step && step != chrec_dont_know
2328
                          && TREE_CODE (step) == INTEGER_CST)
2329
                        continue;
2330
                    }
2331
 
2332
                  /* The statement should not define a variable used
2333
                     in the inner loop.  */
2334
                  if (TREE_CODE (op0) == SSA_NAME)
2335
                    FOR_EACH_IMM_USE_FAST (use_a, imm_iter, op0)
2336
                      if (bb_for_stmt (USE_STMT (use_a))->loop_father
2337
                          == loop->inner)
2338
                        goto fail;
2339
 
2340
                  FOR_EACH_SSA_USE_OPERAND (use_a, stmt, op_iter, SSA_OP_USE)
2341
                    {
2342
                      tree node, op = USE_FROM_PTR (use_a);
2343
 
2344
                      /* The variables should not be used in both loops.  */
2345
                      FOR_EACH_IMM_USE_FAST (use_b, imm_iter, op)
2346
                      if (bb_for_stmt (USE_STMT (use_b))->loop_father
2347
                          == loop->inner)
2348
                        goto fail;
2349
 
2350
                      /* The statement should not use the value of a
2351
                         scalar that was modified in the loop.  */
2352
                      node = SSA_NAME_DEF_STMT (op);
2353
                      if (TREE_CODE (node) == PHI_NODE)
2354
                        FOR_EACH_PHI_ARG (use_b, node, op_iter1, SSA_OP_USE)
2355
                          {
2356
                            tree arg = USE_FROM_PTR (use_b);
2357
 
2358
                            if (TREE_CODE (arg) == SSA_NAME)
2359
                              {
2360
                                tree arg_stmt = SSA_NAME_DEF_STMT (arg);
2361
 
2362
                                if (bb_for_stmt (arg_stmt)->loop_father
2363
                                    == loop->inner)
2364
                                  goto fail;
2365
                              }
2366
                          }
2367
                    }
2368
 
2369
                  if (can_put_in_inner_loop (loop->inner, stmt)
2370
                      || can_put_after_inner_loop (loop, stmt))
2371
                    continue;
2372
                }
2373
 
2374
              /* Otherwise, if the bb of a statement we care about isn't
2375
                 dominated by the header of the inner loop, then we can't
2376
                 handle this case right now.  This test ensures that the
2377
                 statement comes completely *after* the inner loop.  */
2378
              if (!dominated_by_p (CDI_DOMINATORS,
2379
                                   bb_for_stmt (stmt),
2380
                                   loop->inner->header))
2381
                goto fail;
2382
            }
2383
        }
2384
    }
2385
 
2386
  /* We also need to make sure the loop exit only has simple copy phis in it,
2387
     otherwise we don't know how to transform it into a perfect nest right
2388
     now.  */
2389
  exitdest = loop->single_exit->dest;
2390
 
2391
  for (phi = phi_nodes (exitdest); phi; phi = PHI_CHAIN (phi))
2392
    if (PHI_NUM_ARGS (phi) != 1)
2393
      goto fail;
2394
 
2395
  free (bbs);
2396
  return true;
2397
 
2398
 fail:
2399
  free (bbs);
2400
  return false;
2401
}
2402
 
2403
/* Transform the loop nest into a perfect nest, if possible.
2404
   LOOPS is the current struct loops *
2405
   LOOP is the loop nest to transform into a perfect nest
2406
   LBOUNDS are the lower bounds for the loops to transform
2407
   UBOUNDS are the upper bounds for the loops to transform
2408
   STEPS is the STEPS for the loops to transform.
2409
   LOOPIVS is the induction variables for the loops to transform.
2410
 
2411
   Basically, for the case of
2412
 
2413
   FOR (i = 0; i < 50; i++)
2414
    {
2415
     FOR (j =0; j < 50; j++)
2416
     {
2417
        <whatever>
2418
     }
2419
     <some code>
2420
    }
2421
 
2422
   This function will transform it into a perfect loop nest by splitting the
2423
   outer loop into two loops, like so:
2424
 
2425
   FOR (i = 0; i < 50; i++)
2426
   {
2427
     FOR (j = 0; j < 50; j++)
2428
     {
2429
         <whatever>
2430
     }
2431
   }
2432
 
2433
   FOR (i = 0; i < 50; i ++)
2434
   {
2435
    <some code>
2436
   }
2437
 
2438
   Return FALSE if we can't make this loop into a perfect nest.  */
2439
 
2440
static bool
2441
perfect_nestify (struct loops *loops,
2442
                 struct loop *loop,
2443
                 VEC(tree,heap) *lbounds,
2444
                 VEC(tree,heap) *ubounds,
2445
                 VEC(int,heap) *steps,
2446
                 VEC(tree,heap) *loopivs)
2447
{
2448
  basic_block *bbs;
2449
  tree exit_condition;
2450
  tree then_label, else_label, cond_stmt;
2451
  basic_block preheaderbb, headerbb, bodybb, latchbb, olddest;
2452
  int i;
2453
  block_stmt_iterator bsi, firstbsi;
2454
  bool insert_after;
2455
  edge e;
2456
  struct loop *newloop;
2457
  tree phi;
2458
  tree uboundvar;
2459
  tree stmt;
2460
  tree oldivvar, ivvar, ivvarinced;
2461
  VEC(tree,heap) *phis = NULL;
2462
  htab_t replacements = NULL;
2463
 
2464
  /* Create the new loop.  */
2465
  olddest = loop->single_exit->dest;
2466
  preheaderbb = loop_split_edge_with (loop->single_exit, NULL);
2467
  headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2468
 
2469
  /* Push the exit phi nodes that we are moving.  */
2470
  for (phi = phi_nodes (olddest); phi; phi = PHI_CHAIN (phi))
2471
    {
2472
      VEC_reserve (tree, heap, phis, 2);
2473
      VEC_quick_push (tree, phis, PHI_RESULT (phi));
2474
      VEC_quick_push (tree, phis, PHI_ARG_DEF (phi, 0));
2475
    }
2476
  e = redirect_edge_and_branch (single_succ_edge (preheaderbb), headerbb);
2477
 
2478
  /* Remove the exit phis from the old basic block.  Make sure to set
2479
     PHI_RESULT to null so it doesn't get released.  */
2480
  while (phi_nodes (olddest) != NULL)
2481
    {
2482
      SET_PHI_RESULT (phi_nodes (olddest), NULL);
2483
      remove_phi_node (phi_nodes (olddest), NULL);
2484
    }
2485
 
2486
  /* and add them back to the new basic block.  */
2487
  while (VEC_length (tree, phis) != 0)
2488
    {
2489
      tree def;
2490
      tree phiname;
2491
      def = VEC_pop (tree, phis);
2492
      phiname = VEC_pop (tree, phis);
2493
      phi = create_phi_node (phiname, preheaderbb);
2494
      add_phi_arg (phi, def, single_pred_edge (preheaderbb));
2495
    }
2496
  flush_pending_stmts (e);
2497
  VEC_free (tree, heap, phis);
2498
 
2499
  bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2500
  latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2501
  make_edge (headerbb, bodybb, EDGE_FALLTHRU);
2502
  then_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (latchbb));
2503
  else_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (olddest));
2504
  cond_stmt = build3 (COND_EXPR, void_type_node,
2505
                      build2 (NE_EXPR, boolean_type_node,
2506
                              integer_one_node,
2507
                              integer_zero_node),
2508
                      then_label, else_label);
2509
  bsi = bsi_start (bodybb);
2510
  bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
2511
  e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
2512
  make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
2513
  make_edge (latchbb, headerbb, EDGE_FALLTHRU);
2514
 
2515
  /* Update the loop structures.  */
2516
  newloop = duplicate_loop (loops, loop, olddest->loop_father);
2517
  newloop->header = headerbb;
2518
  newloop->latch = latchbb;
2519
  newloop->single_exit = e;
2520
  add_bb_to_loop (latchbb, newloop);
2521
  add_bb_to_loop (bodybb, newloop);
2522
  add_bb_to_loop (headerbb, newloop);
2523
  set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
2524
  set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
2525
  set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
2526
                           loop->single_exit->src);
2527
  set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
2528
  set_immediate_dominator (CDI_DOMINATORS, olddest, bodybb);
2529
  /* Create the new iv.  */
2530
  oldivvar = VEC_index (tree, loopivs, 0);
2531
  ivvar = create_tmp_var (TREE_TYPE (oldivvar), "perfectiv");
2532
  add_referenced_var (ivvar);
2533
  standard_iv_increment_position (newloop, &bsi, &insert_after);
2534
  create_iv (VEC_index (tree, lbounds, 0),
2535
             build_int_cst (TREE_TYPE (oldivvar), VEC_index (int, steps, 0)),
2536
             ivvar, newloop, &bsi, insert_after, &ivvar, &ivvarinced);
2537
 
2538
  /* Create the new upper bound.  This may be not just a variable, so we copy
2539
     it to one just in case.  */
2540
 
2541
  exit_condition = get_loop_exit_condition (newloop);
2542
  uboundvar = create_tmp_var (integer_type_node, "uboundvar");
2543
  add_referenced_var (uboundvar);
2544
  stmt = build2 (MODIFY_EXPR, void_type_node, uboundvar,
2545
                 VEC_index (tree, ubounds, 0));
2546
  uboundvar = make_ssa_name (uboundvar, stmt);
2547
  TREE_OPERAND (stmt, 0) = uboundvar;
2548
 
2549
  if (insert_after)
2550
    bsi_insert_after (&bsi, stmt, BSI_SAME_STMT);
2551
  else
2552
    bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
2553
  update_stmt (stmt);
2554
  COND_EXPR_COND (exit_condition) = build2 (GE_EXPR,
2555
                                            boolean_type_node,
2556
                                            uboundvar,
2557
                                            ivvarinced);
2558
  update_stmt (exit_condition);
2559
  replacements = htab_create_ggc (20, tree_map_hash,
2560
                                  tree_map_eq, NULL);
2561
  bbs = get_loop_body_in_dom_order (loop);
2562
  /* Now move the statements, and replace the induction variable in the moved
2563
     statements with the correct loop induction variable.  */
2564
  oldivvar = VEC_index (tree, loopivs, 0);
2565
  firstbsi = bsi_start (bodybb);
2566
  for (i = loop->num_nodes - 1; i >= 0 ; i--)
2567
    {
2568
      block_stmt_iterator tobsi = bsi_last (bodybb);
2569
      if (bbs[i]->loop_father == loop)
2570
        {
2571
          /* If this is true, we are *before* the inner loop.
2572
             If this isn't true, we are *after* it.
2573
 
2574
             The only time can_convert_to_perfect_nest returns true when we
2575
             have statements before the inner loop is if they can be moved
2576
             into the inner loop.
2577
 
2578
             The only time can_convert_to_perfect_nest returns true when we
2579
             have statements after the inner loop is if they can be moved into
2580
             the new split loop.  */
2581
 
2582
          if (dominated_by_p (CDI_DOMINATORS, loop->inner->header, bbs[i]))
2583
            {
2584
              block_stmt_iterator header_bsi
2585
                = bsi_after_labels (loop->inner->header);
2586
 
2587
              for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2588
                {
2589
                  tree stmt = bsi_stmt (bsi);
2590
 
2591
                  if (stmt == exit_condition
2592
                      || not_interesting_stmt (stmt)
2593
                      || stmt_is_bumper_for_loop (loop, stmt))
2594
                    {
2595
                      bsi_next (&bsi);
2596
                      continue;
2597
                    }
2598
 
2599
                  bsi_move_before (&bsi, &header_bsi);
2600
                }
2601
            }
2602
          else
2603
            {
2604
              /* Note that the bsi only needs to be explicitly incremented
2605
                 when we don't move something, since it is automatically
2606
                 incremented when we do.  */
2607
              for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2608
                {
2609
                  ssa_op_iter i;
2610
                  tree n, stmt = bsi_stmt (bsi);
2611
 
2612
                  if (stmt == exit_condition
2613
                      || not_interesting_stmt (stmt)
2614
                      || stmt_is_bumper_for_loop (loop, stmt))
2615
                    {
2616
                      bsi_next (&bsi);
2617
                      continue;
2618
                    }
2619
 
2620
                  replace_uses_equiv_to_x_with_y
2621
                    (loop, stmt, oldivvar, VEC_index (int, steps, 0), ivvar,
2622
                     VEC_index (tree, lbounds, 0), replacements, &firstbsi);
2623
 
2624
                  bsi_move_before (&bsi, &tobsi);
2625
 
2626
                  /* If the statement has any virtual operands, they may
2627
                     need to be rewired because the original loop may
2628
                     still reference them.  */
2629
                  FOR_EACH_SSA_TREE_OPERAND (n, stmt, i, SSA_OP_ALL_VIRTUALS)
2630
                    mark_sym_for_renaming (SSA_NAME_VAR (n));
2631
                }
2632
            }
2633
 
2634
        }
2635
    }
2636
 
2637
  free (bbs);
2638
  htab_delete (replacements);
2639
  return perfect_nest_p (loop);
2640
}
2641
 
2642
/* Return true if TRANS is a legal transformation matrix that respects
2643
   the dependence vectors in DISTS and DIRS.  The conservative answer
2644
   is false.
2645
 
2646
   "Wolfe proves that a unimodular transformation represented by the
2647
   matrix T is legal when applied to a loop nest with a set of
2648
   lexicographically non-negative distance vectors RDG if and only if
2649
   for each vector d in RDG, (T.d >= 0) is lexicographically positive.
2650
   i.e.: if and only if it transforms the lexicographically positive
2651
   distance vectors to lexicographically positive vectors.  Note that
2652
   a unimodular matrix must transform the zero vector (and only it) to
2653
   the zero vector." S.Muchnick.  */
2654
 
2655
bool
2656
lambda_transform_legal_p (lambda_trans_matrix trans,
2657
                          int nb_loops,
2658
                          VEC (ddr_p, heap) *dependence_relations)
2659
{
2660
  unsigned int i, j;
2661
  lambda_vector distres;
2662
  struct data_dependence_relation *ddr;
2663
 
2664
  gcc_assert (LTM_COLSIZE (trans) == nb_loops
2665
              && LTM_ROWSIZE (trans) == nb_loops);
2666
 
2667
  /* When there is an unknown relation in the dependence_relations, we
2668
     know that it is no worth looking at this loop nest: give up.  */
2669
  ddr = VEC_index (ddr_p, dependence_relations, 0);
2670
  if (ddr == NULL)
2671
    return true;
2672
  if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2673
    return false;
2674
 
2675
  distres = lambda_vector_new (nb_loops);
2676
 
2677
  /* For each distance vector in the dependence graph.  */
2678
  for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
2679
    {
2680
      /* Don't care about relations for which we know that there is no
2681
         dependence, nor about read-read (aka. output-dependences):
2682
         these data accesses can happen in any order.  */
2683
      if (DDR_ARE_DEPENDENT (ddr) == chrec_known
2684
          || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
2685
        continue;
2686
 
2687
      /* Conservatively answer: "this transformation is not valid".  */
2688
      if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2689
        return false;
2690
 
2691
      /* If the dependence could not be captured by a distance vector,
2692
         conservatively answer that the transform is not valid.  */
2693
      if (DDR_NUM_DIST_VECTS (ddr) == 0)
2694
        return false;
2695
 
2696
      /* Compute trans.dist_vect */
2697
      for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
2698
        {
2699
          lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
2700
                                     DDR_DIST_VECT (ddr, j), distres);
2701
 
2702
          if (!lambda_vector_lexico_pos (distres, nb_loops))
2703
            return false;
2704
        }
2705
    }
2706
  return true;
2707
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.