OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [tree-data-ref.c] - Blame information for rev 695

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 684 jeremybenn
/* Data references and dependences detectors.
2
   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3
   Free Software Foundation, Inc.
4
   Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5
 
6
This file is part of GCC.
7
 
8
GCC is free software; you can redistribute it and/or modify it under
9
the terms of the GNU General Public License as published by the Free
10
Software Foundation; either version 3, or (at your option) any later
11
version.
12
 
13
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14
WARRANTY; without even the implied warranty of MERCHANTABILITY or
15
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16
for more details.
17
 
18
You should have received a copy of the GNU General Public License
19
along with GCC; see the file COPYING3.  If not see
20
<http://www.gnu.org/licenses/>.  */
21
 
22
/* This pass walks a given loop structure searching for array
23
   references.  The information about the array accesses is recorded
24
   in DATA_REFERENCE structures.
25
 
26
   The basic test for determining the dependences is:
27
   given two access functions chrec1 and chrec2 to a same array, and
28
   x and y two vectors from the iteration domain, the same element of
29
   the array is accessed twice at iterations x and y if and only if:
30
   |             chrec1 (x) == chrec2 (y).
31
 
32
   The goals of this analysis are:
33
 
34
   - to determine the independence: the relation between two
35
     independent accesses is qualified with the chrec_known (this
36
     information allows a loop parallelization),
37
 
38
   - when two data references access the same data, to qualify the
39
     dependence relation with classic dependence representations:
40
 
41
       - distance vectors
42
       - direction vectors
43
       - loop carried level dependence
44
       - polyhedron dependence
45
     or with the chains of recurrences based representation,
46
 
47
   - to define a knowledge base for storing the data dependence
48
     information,
49
 
50
   - to define an interface to access this data.
51
 
52
 
53
   Definitions:
54
 
55
   - subscript: given two array accesses a subscript is the tuple
56
   composed of the access functions for a given dimension.  Example:
57
   Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58
   (f1, g1), (f2, g2), (f3, g3).
59
 
60
   - Diophantine equation: an equation whose coefficients and
61
   solutions are integer constants, for example the equation
62
   |   3*x + 2*y = 1
63
   has an integer solution x = 1 and y = -1.
64
 
65
   References:
66
 
67
   - "Advanced Compilation for High Performance Computing" by Randy
68
   Allen and Ken Kennedy.
69
   http://citeseer.ist.psu.edu/goff91practical.html
70
 
71
   - "Loop Transformations for Restructuring Compilers - The Foundations"
72
   by Utpal Banerjee.
73
 
74
 
75
*/
76
 
77
#include "config.h"
78
#include "system.h"
79
#include "coretypes.h"
80
#include "gimple-pretty-print.h"
81
#include "tree-flow.h"
82
#include "cfgloop.h"
83
#include "tree-data-ref.h"
84
#include "tree-scalar-evolution.h"
85
#include "tree-pass.h"
86
#include "langhooks.h"
87
#include "tree-affine.h"
88
#include "params.h"
89
 
90
static struct datadep_stats
91
{
92
  int num_dependence_tests;
93
  int num_dependence_dependent;
94
  int num_dependence_independent;
95
  int num_dependence_undetermined;
96
 
97
  int num_subscript_tests;
98
  int num_subscript_undetermined;
99
  int num_same_subscript_function;
100
 
101
  int num_ziv;
102
  int num_ziv_independent;
103
  int num_ziv_dependent;
104
  int num_ziv_unimplemented;
105
 
106
  int num_siv;
107
  int num_siv_independent;
108
  int num_siv_dependent;
109
  int num_siv_unimplemented;
110
 
111
  int num_miv;
112
  int num_miv_independent;
113
  int num_miv_dependent;
114
  int num_miv_unimplemented;
115
} dependence_stats;
116
 
117
static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
118
                                           struct data_reference *,
119
                                           struct data_reference *,
120
                                           struct loop *);
121
/* Returns true iff A divides B.  */
122
 
123
static inline bool
124
tree_fold_divides_p (const_tree a, const_tree b)
125
{
126
  gcc_assert (TREE_CODE (a) == INTEGER_CST);
127
  gcc_assert (TREE_CODE (b) == INTEGER_CST);
128
  return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
129
}
130
 
131
/* Returns true iff A divides B.  */
132
 
133
static inline bool
134
int_divides_p (int a, int b)
135
{
136
  return ((b % a) == 0);
137
}
138
 
139
 
140
 
141
/* Dump into FILE all the data references from DATAREFS.  */
142
 
143
void
144
dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
145
{
146
  unsigned int i;
147
  struct data_reference *dr;
148
 
149
  FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
150
    dump_data_reference (file, dr);
151
}
152
 
153
/* Dump into STDERR all the data references from DATAREFS.  */
154
 
155
DEBUG_FUNCTION void
156
debug_data_references (VEC (data_reference_p, heap) *datarefs)
157
{
158
  dump_data_references (stderr, datarefs);
159
}
160
 
161
/* Dump to STDERR all the dependence relations from DDRS.  */
162
 
163
DEBUG_FUNCTION void
164
debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
165
{
166
  dump_data_dependence_relations (stderr, ddrs);
167
}
168
 
169
/* Dump into FILE all the dependence relations from DDRS.  */
170
 
171
void
172
dump_data_dependence_relations (FILE *file,
173
                                VEC (ddr_p, heap) *ddrs)
174
{
175
  unsigned int i;
176
  struct data_dependence_relation *ddr;
177
 
178
  FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
179
    dump_data_dependence_relation (file, ddr);
180
}
181
 
182
/* Print to STDERR the data_reference DR.  */
183
 
184
DEBUG_FUNCTION void
185
debug_data_reference (struct data_reference *dr)
186
{
187
  dump_data_reference (stderr, dr);
188
}
189
 
190
/* Dump function for a DATA_REFERENCE structure.  */
191
 
192
void
193
dump_data_reference (FILE *outf,
194
                     struct data_reference *dr)
195
{
196
  unsigned int i;
197
 
198
  fprintf (outf, "#(Data Ref: \n");
199
  fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
200
  fprintf (outf, "#  stmt: ");
201
  print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
202
  fprintf (outf, "#  ref: ");
203
  print_generic_stmt (outf, DR_REF (dr), 0);
204
  fprintf (outf, "#  base_object: ");
205
  print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
206
 
207
  for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
208
    {
209
      fprintf (outf, "#  Access function %d: ", i);
210
      print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
211
    }
212
  fprintf (outf, "#)\n");
213
}
214
 
215
/* Dumps the affine function described by FN to the file OUTF.  */
216
 
217
static void
218
dump_affine_function (FILE *outf, affine_fn fn)
219
{
220
  unsigned i;
221
  tree coef;
222
 
223
  print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
224
  for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
225
    {
226
      fprintf (outf, " + ");
227
      print_generic_expr (outf, coef, TDF_SLIM);
228
      fprintf (outf, " * x_%u", i);
229
    }
230
}
231
 
232
/* Dumps the conflict function CF to the file OUTF.  */
233
 
234
static void
235
dump_conflict_function (FILE *outf, conflict_function *cf)
236
{
237
  unsigned i;
238
 
239
  if (cf->n == NO_DEPENDENCE)
240
    fprintf (outf, "no dependence\n");
241
  else if (cf->n == NOT_KNOWN)
242
    fprintf (outf, "not known\n");
243
  else
244
    {
245
      for (i = 0; i < cf->n; i++)
246
        {
247
          fprintf (outf, "[");
248
          dump_affine_function (outf, cf->fns[i]);
249
          fprintf (outf, "]\n");
250
        }
251
    }
252
}
253
 
254
/* Dump function for a SUBSCRIPT structure.  */
255
 
256
void
257
dump_subscript (FILE *outf, struct subscript *subscript)
258
{
259
  conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
260
 
261
  fprintf (outf, "\n (subscript \n");
262
  fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
263
  dump_conflict_function (outf, cf);
264
  if (CF_NONTRIVIAL_P (cf))
265
    {
266
      tree last_iteration = SUB_LAST_CONFLICT (subscript);
267
      fprintf (outf, "  last_conflict: ");
268
      print_generic_stmt (outf, last_iteration, 0);
269
    }
270
 
271
  cf = SUB_CONFLICTS_IN_B (subscript);
272
  fprintf (outf, "  iterations_that_access_an_element_twice_in_B: ");
273
  dump_conflict_function (outf, cf);
274
  if (CF_NONTRIVIAL_P (cf))
275
    {
276
      tree last_iteration = SUB_LAST_CONFLICT (subscript);
277
      fprintf (outf, "  last_conflict: ");
278
      print_generic_stmt (outf, last_iteration, 0);
279
    }
280
 
281
  fprintf (outf, "  (Subscript distance: ");
282
  print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
283
  fprintf (outf, "  )\n");
284
  fprintf (outf, " )\n");
285
}
286
 
287
/* Print the classic direction vector DIRV to OUTF.  */
288
 
289
void
290
print_direction_vector (FILE *outf,
291
                        lambda_vector dirv,
292
                        int length)
293
{
294
  int eq;
295
 
296
  for (eq = 0; eq < length; eq++)
297
    {
298
      enum data_dependence_direction dir = ((enum data_dependence_direction)
299
                                            dirv[eq]);
300
 
301
      switch (dir)
302
        {
303
        case dir_positive:
304
          fprintf (outf, "    +");
305
          break;
306
        case dir_negative:
307
          fprintf (outf, "    -");
308
          break;
309
        case dir_equal:
310
          fprintf (outf, "    =");
311
          break;
312
        case dir_positive_or_equal:
313
          fprintf (outf, "   +=");
314
          break;
315
        case dir_positive_or_negative:
316
          fprintf (outf, "   +-");
317
          break;
318
        case dir_negative_or_equal:
319
          fprintf (outf, "   -=");
320
          break;
321
        case dir_star:
322
          fprintf (outf, "    *");
323
          break;
324
        default:
325
          fprintf (outf, "indep");
326
          break;
327
        }
328
    }
329
  fprintf (outf, "\n");
330
}
331
 
332
/* Print a vector of direction vectors.  */
333
 
334
void
335
print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
336
                   int length)
337
{
338
  unsigned j;
339
  lambda_vector v;
340
 
341
  FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, v)
342
    print_direction_vector (outf, v, length);
343
}
344
 
345
/* Print out a vector VEC of length N to OUTFILE.  */
346
 
347
static inline void
348
print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
349
{
350
  int i;
351
 
352
  for (i = 0; i < n; i++)
353
    fprintf (outfile, "%3d ", vector[i]);
354
  fprintf (outfile, "\n");
355
}
356
 
357
/* Print a vector of distance vectors.  */
358
 
359
void
360
print_dist_vectors  (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
361
                     int length)
362
{
363
  unsigned j;
364
  lambda_vector v;
365
 
366
  FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, v)
367
    print_lambda_vector (outf, v, length);
368
}
369
 
370
/* Debug version.  */
371
 
372
DEBUG_FUNCTION void
373
debug_data_dependence_relation (struct data_dependence_relation *ddr)
374
{
375
  dump_data_dependence_relation (stderr, ddr);
376
}
377
 
378
/* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
379
 
380
void
381
dump_data_dependence_relation (FILE *outf,
382
                               struct data_dependence_relation *ddr)
383
{
384
  struct data_reference *dra, *drb;
385
 
386
  fprintf (outf, "(Data Dep: \n");
387
 
388
  if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
389
    {
390
      if (ddr)
391
        {
392
          dra = DDR_A (ddr);
393
          drb = DDR_B (ddr);
394
          if (dra)
395
            dump_data_reference (outf, dra);
396
          else
397
            fprintf (outf, "    (nil)\n");
398
          if (drb)
399
            dump_data_reference (outf, drb);
400
          else
401
            fprintf (outf, "    (nil)\n");
402
        }
403
      fprintf (outf, "    (don't know)\n)\n");
404
      return;
405
    }
406
 
407
  dra = DDR_A (ddr);
408
  drb = DDR_B (ddr);
409
  dump_data_reference (outf, dra);
410
  dump_data_reference (outf, drb);
411
 
412
  if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
413
    fprintf (outf, "    (no dependence)\n");
414
 
415
  else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
416
    {
417
      unsigned int i;
418
      struct loop *loopi;
419
 
420
      for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
421
        {
422
          fprintf (outf, "  access_fn_A: ");
423
          print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
424
          fprintf (outf, "  access_fn_B: ");
425
          print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
426
          dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
427
        }
428
 
429
      fprintf (outf, "  inner loop index: %d\n", DDR_INNER_LOOP (ddr));
430
      fprintf (outf, "  loop nest: (");
431
      FOR_EACH_VEC_ELT (loop_p, DDR_LOOP_NEST (ddr), i, loopi)
432
        fprintf (outf, "%d ", loopi->num);
433
      fprintf (outf, ")\n");
434
 
435
      for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
436
        {
437
          fprintf (outf, "  distance_vector: ");
438
          print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
439
                               DDR_NB_LOOPS (ddr));
440
        }
441
 
442
      for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
443
        {
444
          fprintf (outf, "  direction_vector: ");
445
          print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
446
                                  DDR_NB_LOOPS (ddr));
447
        }
448
    }
449
 
450
  fprintf (outf, ")\n");
451
}
452
 
453
/* Dump function for a DATA_DEPENDENCE_DIRECTION structure.  */
454
 
455
void
456
dump_data_dependence_direction (FILE *file,
457
                                enum data_dependence_direction dir)
458
{
459
  switch (dir)
460
    {
461
    case dir_positive:
462
      fprintf (file, "+");
463
      break;
464
 
465
    case dir_negative:
466
      fprintf (file, "-");
467
      break;
468
 
469
    case dir_equal:
470
      fprintf (file, "=");
471
      break;
472
 
473
    case dir_positive_or_negative:
474
      fprintf (file, "+-");
475
      break;
476
 
477
    case dir_positive_or_equal:
478
      fprintf (file, "+=");
479
      break;
480
 
481
    case dir_negative_or_equal:
482
      fprintf (file, "-=");
483
      break;
484
 
485
    case dir_star:
486
      fprintf (file, "*");
487
      break;
488
 
489
    default:
490
      break;
491
    }
492
}
493
 
494
/* Dumps the distance and direction vectors in FILE.  DDRS contains
495
   the dependence relations, and VECT_SIZE is the size of the
496
   dependence vectors, or in other words the number of loops in the
497
   considered nest.  */
498
 
499
void
500
dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
501
{
502
  unsigned int i, j;
503
  struct data_dependence_relation *ddr;
504
  lambda_vector v;
505
 
506
  FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
507
    if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
508
      {
509
        FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), j, v)
510
          {
511
            fprintf (file, "DISTANCE_V (");
512
            print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
513
            fprintf (file, ")\n");
514
          }
515
 
516
        FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), j, v)
517
          {
518
            fprintf (file, "DIRECTION_V (");
519
            print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
520
            fprintf (file, ")\n");
521
          }
522
      }
523
 
524
  fprintf (file, "\n\n");
525
}
526
 
527
/* Dumps the data dependence relations DDRS in FILE.  */
528
 
529
void
530
dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
531
{
532
  unsigned int i;
533
  struct data_dependence_relation *ddr;
534
 
535
  FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
536
    dump_data_dependence_relation (file, ddr);
537
 
538
  fprintf (file, "\n\n");
539
}
540
 
541
/* Helper function for split_constant_offset.  Expresses OP0 CODE OP1
542
   (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
543
   constant of type ssizetype, and returns true.  If we cannot do this
544
   with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
545
   is returned.  */
546
 
547
static bool
548
split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
549
                         tree *var, tree *off)
550
{
551
  tree var0, var1;
552
  tree off0, off1;
553
  enum tree_code ocode = code;
554
 
555
  *var = NULL_TREE;
556
  *off = NULL_TREE;
557
 
558
  switch (code)
559
    {
560
    case INTEGER_CST:
561
      *var = build_int_cst (type, 0);
562
      *off = fold_convert (ssizetype, op0);
563
      return true;
564
 
565
    case POINTER_PLUS_EXPR:
566
      ocode = PLUS_EXPR;
567
      /* FALLTHROUGH */
568
    case PLUS_EXPR:
569
    case MINUS_EXPR:
570
      split_constant_offset (op0, &var0, &off0);
571
      split_constant_offset (op1, &var1, &off1);
572
      *var = fold_build2 (code, type, var0, var1);
573
      *off = size_binop (ocode, off0, off1);
574
      return true;
575
 
576
    case MULT_EXPR:
577
      if (TREE_CODE (op1) != INTEGER_CST)
578
        return false;
579
 
580
      split_constant_offset (op0, &var0, &off0);
581
      *var = fold_build2 (MULT_EXPR, type, var0, op1);
582
      *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
583
      return true;
584
 
585
    case ADDR_EXPR:
586
      {
587
        tree base, poffset;
588
        HOST_WIDE_INT pbitsize, pbitpos;
589
        enum machine_mode pmode;
590
        int punsignedp, pvolatilep;
591
 
592
        op0 = TREE_OPERAND (op0, 0);
593
        base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
594
                                    &pmode, &punsignedp, &pvolatilep, false);
595
 
596
        if (pbitpos % BITS_PER_UNIT != 0)
597
          return false;
598
        base = build_fold_addr_expr (base);
599
        off0 = ssize_int (pbitpos / BITS_PER_UNIT);
600
 
601
        if (poffset)
602
          {
603
            split_constant_offset (poffset, &poffset, &off1);
604
            off0 = size_binop (PLUS_EXPR, off0, off1);
605
            if (POINTER_TYPE_P (TREE_TYPE (base)))
606
              base = fold_build_pointer_plus (base, poffset);
607
            else
608
              base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
609
                                  fold_convert (TREE_TYPE (base), poffset));
610
          }
611
 
612
        var0 = fold_convert (type, base);
613
 
614
        /* If variable length types are involved, punt, otherwise casts
615
           might be converted into ARRAY_REFs in gimplify_conversion.
616
           To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
617
           possibly no longer appears in current GIMPLE, might resurface.
618
           This perhaps could run
619
           if (CONVERT_EXPR_P (var0))
620
             {
621
               gimplify_conversion (&var0);
622
               // Attempt to fill in any within var0 found ARRAY_REF's
623
               // element size from corresponding op embedded ARRAY_REF,
624
               // if unsuccessful, just punt.
625
             }  */
626
        while (POINTER_TYPE_P (type))
627
          type = TREE_TYPE (type);
628
        if (int_size_in_bytes (type) < 0)
629
          return false;
630
 
631
        *var = var0;
632
        *off = off0;
633
        return true;
634
      }
635
 
636
    case SSA_NAME:
637
      {
638
        gimple def_stmt = SSA_NAME_DEF_STMT (op0);
639
        enum tree_code subcode;
640
 
641
        if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
642
          return false;
643
 
644
        var0 = gimple_assign_rhs1 (def_stmt);
645
        subcode = gimple_assign_rhs_code (def_stmt);
646
        var1 = gimple_assign_rhs2 (def_stmt);
647
 
648
        return split_constant_offset_1 (type, var0, subcode, var1, var, off);
649
      }
650
    CASE_CONVERT:
651
      {
652
        /* We must not introduce undefined overflow, and we must not change the value.
653
           Hence we're okay if the inner type doesn't overflow to start with
654
           (pointer or signed), the outer type also is an integer or pointer
655
           and the outer precision is at least as large as the inner.  */
656
        tree itype = TREE_TYPE (op0);
657
        if ((POINTER_TYPE_P (itype)
658
             || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
659
            && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
660
            && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
661
          {
662
            split_constant_offset (op0, &var0, off);
663
            *var = fold_convert (type, var0);
664
            return true;
665
          }
666
        return false;
667
      }
668
 
669
    default:
670
      return false;
671
    }
672
}
673
 
674
/* Expresses EXP as VAR + OFF, where off is a constant.  The type of OFF
675
   will be ssizetype.  */
676
 
677
void
678
split_constant_offset (tree exp, tree *var, tree *off)
679
{
680
  tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
681
  enum tree_code code;
682
 
683
  *var = exp;
684
  *off = ssize_int (0);
685
  STRIP_NOPS (exp);
686
 
687
  if (tree_is_chrec (exp)
688
      || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
689
    return;
690
 
691
  otype = TREE_TYPE (exp);
692
  code = TREE_CODE (exp);
693
  extract_ops_from_tree (exp, &code, &op0, &op1);
694
  if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
695
    {
696
      *var = fold_convert (type, e);
697
      *off = o;
698
    }
699
}
700
 
701
/* Returns the address ADDR of an object in a canonical shape (without nop
702
   casts, and with type of pointer to the object).  */
703
 
704
static tree
705
canonicalize_base_object_address (tree addr)
706
{
707
  tree orig = addr;
708
 
709
  STRIP_NOPS (addr);
710
 
711
  /* The base address may be obtained by casting from integer, in that case
712
     keep the cast.  */
713
  if (!POINTER_TYPE_P (TREE_TYPE (addr)))
714
    return orig;
715
 
716
  if (TREE_CODE (addr) != ADDR_EXPR)
717
    return addr;
718
 
719
  return build_fold_addr_expr (TREE_OPERAND (addr, 0));
720
}
721
 
722
/* Analyzes the behavior of the memory reference DR in the innermost loop or
723
   basic block that contains it.  Returns true if analysis succeed or false
724
   otherwise.  */
725
 
726
bool
727
dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
728
{
729
  gimple stmt = DR_STMT (dr);
730
  struct loop *loop = loop_containing_stmt (stmt);
731
  tree ref = DR_REF (dr);
732
  HOST_WIDE_INT pbitsize, pbitpos;
733
  tree base, poffset;
734
  enum machine_mode pmode;
735
  int punsignedp, pvolatilep;
736
  affine_iv base_iv, offset_iv;
737
  tree init, dinit, step;
738
  bool in_loop = (loop && loop->num);
739
 
740
  if (dump_file && (dump_flags & TDF_DETAILS))
741
    fprintf (dump_file, "analyze_innermost: ");
742
 
743
  base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
744
                              &pmode, &punsignedp, &pvolatilep, false);
745
  gcc_assert (base != NULL_TREE);
746
 
747
  if (pbitpos % BITS_PER_UNIT != 0)
748
    {
749
      if (dump_file && (dump_flags & TDF_DETAILS))
750
        fprintf (dump_file, "failed: bit offset alignment.\n");
751
      return false;
752
    }
753
 
754
  if (TREE_CODE (base) == MEM_REF)
755
    {
756
      if (!integer_zerop (TREE_OPERAND (base, 1)))
757
        {
758
          if (!poffset)
759
            {
760
              double_int moff = mem_ref_offset (base);
761
              poffset = double_int_to_tree (sizetype, moff);
762
            }
763
          else
764
            poffset = size_binop (PLUS_EXPR, poffset, TREE_OPERAND (base, 1));
765
        }
766
      base = TREE_OPERAND (base, 0);
767
    }
768
  else
769
    base = build_fold_addr_expr (base);
770
 
771
  if (in_loop)
772
    {
773
      if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
774
                      false))
775
        {
776
          if (nest)
777
            {
778
              if (dump_file && (dump_flags & TDF_DETAILS))
779
                fprintf (dump_file, "failed: evolution of base is not"
780
                                    " affine.\n");
781
              return false;
782
            }
783
          else
784
            {
785
              base_iv.base = base;
786
              base_iv.step = ssize_int (0);
787
              base_iv.no_overflow = true;
788
            }
789
        }
790
    }
791
  else
792
    {
793
      base_iv.base = base;
794
      base_iv.step = ssize_int (0);
795
      base_iv.no_overflow = true;
796
    }
797
 
798
  if (!poffset)
799
    {
800
      offset_iv.base = ssize_int (0);
801
      offset_iv.step = ssize_int (0);
802
    }
803
  else
804
    {
805
      if (!in_loop)
806
        {
807
          offset_iv.base = poffset;
808
          offset_iv.step = ssize_int (0);
809
        }
810
      else if (!simple_iv (loop, loop_containing_stmt (stmt),
811
                           poffset, &offset_iv, false))
812
        {
813
          if (nest)
814
            {
815
              if (dump_file && (dump_flags & TDF_DETAILS))
816
                fprintf (dump_file, "failed: evolution of offset is not"
817
                                    " affine.\n");
818
              return false;
819
            }
820
          else
821
            {
822
              offset_iv.base = poffset;
823
              offset_iv.step = ssize_int (0);
824
            }
825
        }
826
    }
827
 
828
  init = ssize_int (pbitpos / BITS_PER_UNIT);
829
  split_constant_offset (base_iv.base, &base_iv.base, &dinit);
830
  init =  size_binop (PLUS_EXPR, init, dinit);
831
  split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
832
  init =  size_binop (PLUS_EXPR, init, dinit);
833
 
834
  step = size_binop (PLUS_EXPR,
835
                     fold_convert (ssizetype, base_iv.step),
836
                     fold_convert (ssizetype, offset_iv.step));
837
 
838
  DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
839
 
840
  DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
841
  DR_INIT (dr) = init;
842
  DR_STEP (dr) = step;
843
 
844
  DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
845
 
846
  if (dump_file && (dump_flags & TDF_DETAILS))
847
    fprintf (dump_file, "success.\n");
848
 
849
  return true;
850
}
851
 
852
/* Determines the base object and the list of indices of memory reference
853
   DR, analyzed in LOOP and instantiated in loop nest NEST.  */
854
 
855
static void
856
dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
857
{
858
  VEC (tree, heap) *access_fns = NULL;
859
  tree ref, *aref, op;
860
  tree base, off, access_fn;
861
  basic_block before_loop;
862
 
863
  /* If analyzing a basic-block there are no indices to analyze
864
     and thus no access functions.  */
865
  if (!nest)
866
    {
867
      DR_BASE_OBJECT (dr) = DR_REF (dr);
868
      DR_ACCESS_FNS (dr) = NULL;
869
      return;
870
    }
871
 
872
  ref = unshare_expr (DR_REF (dr));
873
  before_loop = block_before_loop (nest);
874
 
875
  /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
876
     into a two element array with a constant index.  The base is
877
     then just the immediate underlying object.  */
878
  if (TREE_CODE (ref) == REALPART_EXPR)
879
    {
880
      ref = TREE_OPERAND (ref, 0);
881
      VEC_safe_push (tree, heap, access_fns, integer_zero_node);
882
    }
883
  else if (TREE_CODE (ref) == IMAGPART_EXPR)
884
    {
885
      ref = TREE_OPERAND (ref, 0);
886
      VEC_safe_push (tree, heap, access_fns, integer_one_node);
887
    }
888
 
889
  /* Analyze access functions of dimensions we know to be independent.  */
890
  aref = &ref;
891
  while (handled_component_p (*aref))
892
    {
893
      if (TREE_CODE (*aref) == ARRAY_REF)
894
        {
895
          op = TREE_OPERAND (*aref, 1);
896
          access_fn = analyze_scalar_evolution (loop, op);
897
          access_fn = instantiate_scev (before_loop, loop, access_fn);
898
          VEC_safe_push (tree, heap, access_fns, access_fn);
899
          /* For ARRAY_REFs the base is the reference with the index replaced
900
             by zero if we can not strip it as the outermost component.  */
901
          if (*aref == ref)
902
            {
903
              *aref = TREE_OPERAND (*aref, 0);
904
              continue;
905
            }
906
          else
907
            TREE_OPERAND (*aref, 1) = build_int_cst (TREE_TYPE (op), 0);
908
        }
909
 
910
      aref = &TREE_OPERAND (*aref, 0);
911
    }
912
 
913
  /* If the address operand of a MEM_REF base has an evolution in the
914
     analyzed nest, add it as an additional independent access-function.  */
915
  if (TREE_CODE (*aref) == MEM_REF)
916
    {
917
      op = TREE_OPERAND (*aref, 0);
918
      access_fn = analyze_scalar_evolution (loop, op);
919
      access_fn = instantiate_scev (before_loop, loop, access_fn);
920
      if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
921
        {
922
          tree orig_type;
923
          base = initial_condition (access_fn);
924
          orig_type = TREE_TYPE (base);
925
          STRIP_USELESS_TYPE_CONVERSION (base);
926
          split_constant_offset (base, &base, &off);
927
          /* Fold the MEM_REF offset into the evolutions initial
928
             value to make more bases comparable.  */
929
          if (!integer_zerop (TREE_OPERAND (*aref, 1)))
930
            {
931
              off = size_binop (PLUS_EXPR, off,
932
                                fold_convert (ssizetype,
933
                                              TREE_OPERAND (*aref, 1)));
934
              TREE_OPERAND (*aref, 1)
935
                = build_int_cst (TREE_TYPE (TREE_OPERAND (*aref, 1)), 0);
936
            }
937
          access_fn = chrec_replace_initial_condition
938
              (access_fn, fold_convert (orig_type, off));
939
          *aref = fold_build2_loc (EXPR_LOCATION (*aref),
940
                                   MEM_REF, TREE_TYPE (*aref),
941
                                   base, TREE_OPERAND (*aref, 1));
942
          VEC_safe_push (tree, heap, access_fns, access_fn);
943
        }
944
    }
945
 
946
  DR_BASE_OBJECT (dr) = ref;
947
  DR_ACCESS_FNS (dr) = access_fns;
948
}
949
 
950
/* Extracts the alias analysis information from the memory reference DR.  */
951
 
952
static void
953
dr_analyze_alias (struct data_reference *dr)
954
{
955
  tree ref = DR_REF (dr);
956
  tree base = get_base_address (ref), addr;
957
 
958
  if (INDIRECT_REF_P (base)
959
      || TREE_CODE (base) == MEM_REF)
960
    {
961
      addr = TREE_OPERAND (base, 0);
962
      if (TREE_CODE (addr) == SSA_NAME)
963
        DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
964
    }
965
}
966
 
967
/* Frees data reference DR.  */
968
 
969
void
970
free_data_ref (data_reference_p dr)
971
{
972
  VEC_free (tree, heap, DR_ACCESS_FNS (dr));
973
  free (dr);
974
}
975
 
976
/* Analyzes memory reference MEMREF accessed in STMT.  The reference
977
   is read if IS_READ is true, write otherwise.  Returns the
978
   data_reference description of MEMREF.  NEST is the outermost loop
979
   in which the reference should be instantiated, LOOP is the loop in
980
   which the data reference should be analyzed.  */
981
 
982
struct data_reference *
983
create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
984
                 bool is_read)
985
{
986
  struct data_reference *dr;
987
 
988
  if (dump_file && (dump_flags & TDF_DETAILS))
989
    {
990
      fprintf (dump_file, "Creating dr for ");
991
      print_generic_expr (dump_file, memref, TDF_SLIM);
992
      fprintf (dump_file, "\n");
993
    }
994
 
995
  dr = XCNEW (struct data_reference);
996
  DR_STMT (dr) = stmt;
997
  DR_REF (dr) = memref;
998
  DR_IS_READ (dr) = is_read;
999
 
1000
  dr_analyze_innermost (dr, nest);
1001
  dr_analyze_indices (dr, nest, loop);
1002
  dr_analyze_alias (dr);
1003
 
1004
  if (dump_file && (dump_flags & TDF_DETAILS))
1005
    {
1006
      unsigned i;
1007
      fprintf (dump_file, "\tbase_address: ");
1008
      print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1009
      fprintf (dump_file, "\n\toffset from base address: ");
1010
      print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1011
      fprintf (dump_file, "\n\tconstant offset from base address: ");
1012
      print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1013
      fprintf (dump_file, "\n\tstep: ");
1014
      print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1015
      fprintf (dump_file, "\n\taligned to: ");
1016
      print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
1017
      fprintf (dump_file, "\n\tbase_object: ");
1018
      print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1019
      fprintf (dump_file, "\n");
1020
      for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1021
        {
1022
          fprintf (dump_file, "\tAccess function %d: ", i);
1023
          print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1024
        }
1025
    }
1026
 
1027
  return dr;
1028
}
1029
 
1030
/* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1031
   expressions.  */
1032
static bool
1033
dr_equal_offsets_p1 (tree offset1, tree offset2)
1034
{
1035
  bool res;
1036
 
1037
  STRIP_NOPS (offset1);
1038
  STRIP_NOPS (offset2);
1039
 
1040
  if (offset1 == offset2)
1041
    return true;
1042
 
1043
  if (TREE_CODE (offset1) != TREE_CODE (offset2)
1044
      || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1045
    return false;
1046
 
1047
  res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1048
                             TREE_OPERAND (offset2, 0));
1049
 
1050
  if (!res || !BINARY_CLASS_P (offset1))
1051
    return res;
1052
 
1053
  res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1054
                             TREE_OPERAND (offset2, 1));
1055
 
1056
  return res;
1057
}
1058
 
1059
/* Check if DRA and DRB have equal offsets.  */
1060
bool
1061
dr_equal_offsets_p (struct data_reference *dra,
1062
                    struct data_reference *drb)
1063
{
1064
  tree offset1, offset2;
1065
 
1066
  offset1 = DR_OFFSET (dra);
1067
  offset2 = DR_OFFSET (drb);
1068
 
1069
  return dr_equal_offsets_p1 (offset1, offset2);
1070
}
1071
 
1072
/* Returns true if FNA == FNB.  */
1073
 
1074
static bool
1075
affine_function_equal_p (affine_fn fna, affine_fn fnb)
1076
{
1077
  unsigned i, n = VEC_length (tree, fna);
1078
 
1079
  if (n != VEC_length (tree, fnb))
1080
    return false;
1081
 
1082
  for (i = 0; i < n; i++)
1083
    if (!operand_equal_p (VEC_index (tree, fna, i),
1084
                          VEC_index (tree, fnb, i), 0))
1085
      return false;
1086
 
1087
  return true;
1088
}
1089
 
1090
/* If all the functions in CF are the same, returns one of them,
1091
   otherwise returns NULL.  */
1092
 
1093
static affine_fn
1094
common_affine_function (conflict_function *cf)
1095
{
1096
  unsigned i;
1097
  affine_fn comm;
1098
 
1099
  if (!CF_NONTRIVIAL_P (cf))
1100
    return NULL;
1101
 
1102
  comm = cf->fns[0];
1103
 
1104
  for (i = 1; i < cf->n; i++)
1105
    if (!affine_function_equal_p (comm, cf->fns[i]))
1106
      return NULL;
1107
 
1108
  return comm;
1109
}
1110
 
1111
/* Returns the base of the affine function FN.  */
1112
 
1113
static tree
1114
affine_function_base (affine_fn fn)
1115
{
1116
  return VEC_index (tree, fn, 0);
1117
}
1118
 
1119
/* Returns true if FN is a constant.  */
1120
 
1121
static bool
1122
affine_function_constant_p (affine_fn fn)
1123
{
1124
  unsigned i;
1125
  tree coef;
1126
 
1127
  for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1128
    if (!integer_zerop (coef))
1129
      return false;
1130
 
1131
  return true;
1132
}
1133
 
1134
/* Returns true if FN is the zero constant function.  */
1135
 
1136
static bool
1137
affine_function_zero_p (affine_fn fn)
1138
{
1139
  return (integer_zerop (affine_function_base (fn))
1140
          && affine_function_constant_p (fn));
1141
}
1142
 
1143
/* Returns a signed integer type with the largest precision from TA
1144
   and TB.  */
1145
 
1146
static tree
1147
signed_type_for_types (tree ta, tree tb)
1148
{
1149
  if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1150
    return signed_type_for (ta);
1151
  else
1152
    return signed_type_for (tb);
1153
}
1154
 
1155
/* Applies operation OP on affine functions FNA and FNB, and returns the
1156
   result.  */
1157
 
1158
static affine_fn
1159
affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1160
{
1161
  unsigned i, n, m;
1162
  affine_fn ret;
1163
  tree coef;
1164
 
1165
  if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1166
    {
1167
      n = VEC_length (tree, fna);
1168
      m = VEC_length (tree, fnb);
1169
    }
1170
  else
1171
    {
1172
      n = VEC_length (tree, fnb);
1173
      m = VEC_length (tree, fna);
1174
    }
1175
 
1176
  ret = VEC_alloc (tree, heap, m);
1177
  for (i = 0; i < n; i++)
1178
    {
1179
      tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1180
                                         TREE_TYPE (VEC_index (tree, fnb, i)));
1181
 
1182
      VEC_quick_push (tree, ret,
1183
                      fold_build2 (op, type,
1184
                                   VEC_index (tree, fna, i),
1185
                                   VEC_index (tree, fnb, i)));
1186
    }
1187
 
1188
  for (; VEC_iterate (tree, fna, i, coef); i++)
1189
    VEC_quick_push (tree, ret,
1190
                    fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1191
                                 coef, integer_zero_node));
1192
  for (; VEC_iterate (tree, fnb, i, coef); i++)
1193
    VEC_quick_push (tree, ret,
1194
                    fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1195
                                 integer_zero_node, coef));
1196
 
1197
  return ret;
1198
}
1199
 
1200
/* Returns the sum of affine functions FNA and FNB.  */
1201
 
1202
static affine_fn
1203
affine_fn_plus (affine_fn fna, affine_fn fnb)
1204
{
1205
  return affine_fn_op (PLUS_EXPR, fna, fnb);
1206
}
1207
 
1208
/* Returns the difference of affine functions FNA and FNB.  */
1209
 
1210
static affine_fn
1211
affine_fn_minus (affine_fn fna, affine_fn fnb)
1212
{
1213
  return affine_fn_op (MINUS_EXPR, fna, fnb);
1214
}
1215
 
1216
/* Frees affine function FN.  */
1217
 
1218
static void
1219
affine_fn_free (affine_fn fn)
1220
{
1221
  VEC_free (tree, heap, fn);
1222
}
1223
 
1224
/* Determine for each subscript in the data dependence relation DDR
1225
   the distance.  */
1226
 
1227
static void
1228
compute_subscript_distance (struct data_dependence_relation *ddr)
1229
{
1230
  conflict_function *cf_a, *cf_b;
1231
  affine_fn fn_a, fn_b, diff;
1232
 
1233
  if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1234
    {
1235
      unsigned int i;
1236
 
1237
      for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1238
        {
1239
          struct subscript *subscript;
1240
 
1241
          subscript = DDR_SUBSCRIPT (ddr, i);
1242
          cf_a = SUB_CONFLICTS_IN_A (subscript);
1243
          cf_b = SUB_CONFLICTS_IN_B (subscript);
1244
 
1245
          fn_a = common_affine_function (cf_a);
1246
          fn_b = common_affine_function (cf_b);
1247
          if (!fn_a || !fn_b)
1248
            {
1249
              SUB_DISTANCE (subscript) = chrec_dont_know;
1250
              return;
1251
            }
1252
          diff = affine_fn_minus (fn_a, fn_b);
1253
 
1254
          if (affine_function_constant_p (diff))
1255
            SUB_DISTANCE (subscript) = affine_function_base (diff);
1256
          else
1257
            SUB_DISTANCE (subscript) = chrec_dont_know;
1258
 
1259
          affine_fn_free (diff);
1260
        }
1261
    }
1262
}
1263
 
1264
/* Returns the conflict function for "unknown".  */
1265
 
1266
static conflict_function *
1267
conflict_fn_not_known (void)
1268
{
1269
  conflict_function *fn = XCNEW (conflict_function);
1270
  fn->n = NOT_KNOWN;
1271
 
1272
  return fn;
1273
}
1274
 
1275
/* Returns the conflict function for "independent".  */
1276
 
1277
static conflict_function *
1278
conflict_fn_no_dependence (void)
1279
{
1280
  conflict_function *fn = XCNEW (conflict_function);
1281
  fn->n = NO_DEPENDENCE;
1282
 
1283
  return fn;
1284
}
1285
 
1286
/* Returns true if the address of OBJ is invariant in LOOP.  */
1287
 
1288
static bool
1289
object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1290
{
1291
  while (handled_component_p (obj))
1292
    {
1293
      if (TREE_CODE (obj) == ARRAY_REF)
1294
        {
1295
          /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1296
             need to check the stride and the lower bound of the reference.  */
1297
          if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1298
                                                      loop->num)
1299
              || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1300
                                                         loop->num))
1301
            return false;
1302
        }
1303
      else if (TREE_CODE (obj) == COMPONENT_REF)
1304
        {
1305
          if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1306
                                                      loop->num))
1307
            return false;
1308
        }
1309
      obj = TREE_OPERAND (obj, 0);
1310
    }
1311
 
1312
  if (!INDIRECT_REF_P (obj)
1313
      && TREE_CODE (obj) != MEM_REF)
1314
    return true;
1315
 
1316
  return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1317
                                                  loop->num);
1318
}
1319
 
1320
/* Returns false if we can prove that data references A and B do not alias,
1321
   true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
1322
   considered.  */
1323
 
1324
bool
1325
dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
1326
                bool loop_nest)
1327
{
1328
  tree addr_a = DR_BASE_OBJECT (a);
1329
  tree addr_b = DR_BASE_OBJECT (b);
1330
 
1331
  /* If we are not processing a loop nest but scalar code we
1332
     do not need to care about possible cross-iteration dependences
1333
     and thus can process the full original reference.  Do so,
1334
     similar to how loop invariant motion applies extra offset-based
1335
     disambiguation.  */
1336
  if (!loop_nest)
1337
    {
1338
      aff_tree off1, off2;
1339
      double_int size1, size2;
1340
      get_inner_reference_aff (DR_REF (a), &off1, &size1);
1341
      get_inner_reference_aff (DR_REF (b), &off2, &size2);
1342
      aff_combination_scale (&off1, double_int_minus_one);
1343
      aff_combination_add (&off2, &off1);
1344
      if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1345
        return false;
1346
    }
1347
 
1348
  if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1349
    return refs_output_dependent_p (addr_a, addr_b);
1350
  else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1351
    return refs_anti_dependent_p (addr_a, addr_b);
1352
  return refs_may_alias_p (addr_a, addr_b);
1353
}
1354
 
1355
/* Initialize a data dependence relation between data accesses A and
1356
   B.  NB_LOOPS is the number of loops surrounding the references: the
1357
   size of the classic distance/direction vectors.  */
1358
 
1359
struct data_dependence_relation *
1360
initialize_data_dependence_relation (struct data_reference *a,
1361
                                     struct data_reference *b,
1362
                                     VEC (loop_p, heap) *loop_nest)
1363
{
1364
  struct data_dependence_relation *res;
1365
  unsigned int i;
1366
 
1367
  res = XNEW (struct data_dependence_relation);
1368
  DDR_A (res) = a;
1369
  DDR_B (res) = b;
1370
  DDR_LOOP_NEST (res) = NULL;
1371
  DDR_REVERSED_P (res) = false;
1372
  DDR_SUBSCRIPTS (res) = NULL;
1373
  DDR_DIR_VECTS (res) = NULL;
1374
  DDR_DIST_VECTS (res) = NULL;
1375
 
1376
  if (a == NULL || b == NULL)
1377
    {
1378
      DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1379
      return res;
1380
    }
1381
 
1382
  /* If the data references do not alias, then they are independent.  */
1383
  if (!dr_may_alias_p (a, b, loop_nest != NULL))
1384
    {
1385
      DDR_ARE_DEPENDENT (res) = chrec_known;
1386
      return res;
1387
    }
1388
 
1389
  /* The case where the references are exactly the same.  */
1390
  if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1391
    {
1392
     if (loop_nest
1393
        && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1394
                                                DR_BASE_OBJECT (a)))
1395
      {
1396
        DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1397
        return res;
1398
      }
1399
      DDR_AFFINE_P (res) = true;
1400
      DDR_ARE_DEPENDENT (res) = NULL_TREE;
1401
      DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1402
      DDR_LOOP_NEST (res) = loop_nest;
1403
      DDR_INNER_LOOP (res) = 0;
1404
      DDR_SELF_REFERENCE (res) = true;
1405
      for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1406
       {
1407
         struct subscript *subscript;
1408
 
1409
         subscript = XNEW (struct subscript);
1410
         SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1411
         SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1412
         SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1413
         SUB_DISTANCE (subscript) = chrec_dont_know;
1414
         VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1415
       }
1416
      return res;
1417
    }
1418
 
1419
  /* If the references do not access the same object, we do not know
1420
     whether they alias or not.  */
1421
  if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1422
    {
1423
      DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1424
      return res;
1425
    }
1426
 
1427
  /* If the base of the object is not invariant in the loop nest, we cannot
1428
     analyze it.  TODO -- in fact, it would suffice to record that there may
1429
     be arbitrary dependences in the loops where the base object varies.  */
1430
  if (loop_nest
1431
      && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1432
                                              DR_BASE_OBJECT (a)))
1433
    {
1434
      DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1435
      return res;
1436
    }
1437
 
1438
  /* If the number of dimensions of the access to not agree we can have
1439
     a pointer access to a component of the array element type and an
1440
     array access while the base-objects are still the same.  Punt.  */
1441
  if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1442
    {
1443
      DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1444
      return res;
1445
    }
1446
 
1447
  DDR_AFFINE_P (res) = true;
1448
  DDR_ARE_DEPENDENT (res) = NULL_TREE;
1449
  DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1450
  DDR_LOOP_NEST (res) = loop_nest;
1451
  DDR_INNER_LOOP (res) = 0;
1452
  DDR_SELF_REFERENCE (res) = false;
1453
 
1454
  for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1455
    {
1456
      struct subscript *subscript;
1457
 
1458
      subscript = XNEW (struct subscript);
1459
      SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1460
      SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1461
      SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1462
      SUB_DISTANCE (subscript) = chrec_dont_know;
1463
      VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1464
    }
1465
 
1466
  return res;
1467
}
1468
 
1469
/* Frees memory used by the conflict function F.  */
1470
 
1471
static void
1472
free_conflict_function (conflict_function *f)
1473
{
1474
  unsigned i;
1475
 
1476
  if (CF_NONTRIVIAL_P (f))
1477
    {
1478
      for (i = 0; i < f->n; i++)
1479
        affine_fn_free (f->fns[i]);
1480
    }
1481
  free (f);
1482
}
1483
 
1484
/* Frees memory used by SUBSCRIPTS.  */
1485
 
1486
static void
1487
free_subscripts (VEC (subscript_p, heap) *subscripts)
1488
{
1489
  unsigned i;
1490
  subscript_p s;
1491
 
1492
  FOR_EACH_VEC_ELT (subscript_p, subscripts, i, s)
1493
    {
1494
      free_conflict_function (s->conflicting_iterations_in_a);
1495
      free_conflict_function (s->conflicting_iterations_in_b);
1496
      free (s);
1497
    }
1498
  VEC_free (subscript_p, heap, subscripts);
1499
}
1500
 
1501
/* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1502
   description.  */
1503
 
1504
static inline void
1505
finalize_ddr_dependent (struct data_dependence_relation *ddr,
1506
                        tree chrec)
1507
{
1508
  if (dump_file && (dump_flags & TDF_DETAILS))
1509
    {
1510
      fprintf (dump_file, "(dependence classified: ");
1511
      print_generic_expr (dump_file, chrec, 0);
1512
      fprintf (dump_file, ")\n");
1513
    }
1514
 
1515
  DDR_ARE_DEPENDENT (ddr) = chrec;
1516
  free_subscripts (DDR_SUBSCRIPTS (ddr));
1517
  DDR_SUBSCRIPTS (ddr) = NULL;
1518
}
1519
 
1520
/* The dependence relation DDR cannot be represented by a distance
1521
   vector.  */
1522
 
1523
static inline void
1524
non_affine_dependence_relation (struct data_dependence_relation *ddr)
1525
{
1526
  if (dump_file && (dump_flags & TDF_DETAILS))
1527
    fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1528
 
1529
  DDR_AFFINE_P (ddr) = false;
1530
}
1531
 
1532
 
1533
 
1534
/* This section contains the classic Banerjee tests.  */
1535
 
1536
/* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1537
   variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
1538
 
1539
static inline bool
1540
ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1541
{
1542
  return (evolution_function_is_constant_p (chrec_a)
1543
          && evolution_function_is_constant_p (chrec_b));
1544
}
1545
 
1546
/* Returns true iff CHREC_A and CHREC_B are dependent on an index
1547
   variable, i.e., if the SIV (Single Index Variable) test is true.  */
1548
 
1549
static bool
1550
siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1551
{
1552
  if ((evolution_function_is_constant_p (chrec_a)
1553
       && evolution_function_is_univariate_p (chrec_b))
1554
      || (evolution_function_is_constant_p (chrec_b)
1555
          && evolution_function_is_univariate_p (chrec_a)))
1556
    return true;
1557
 
1558
  if (evolution_function_is_univariate_p (chrec_a)
1559
      && evolution_function_is_univariate_p (chrec_b))
1560
    {
1561
      switch (TREE_CODE (chrec_a))
1562
        {
1563
        case POLYNOMIAL_CHREC:
1564
          switch (TREE_CODE (chrec_b))
1565
            {
1566
            case POLYNOMIAL_CHREC:
1567
              if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1568
                return false;
1569
 
1570
            default:
1571
              return true;
1572
            }
1573
 
1574
        default:
1575
          return true;
1576
        }
1577
    }
1578
 
1579
  return false;
1580
}
1581
 
1582
/* Creates a conflict function with N dimensions.  The affine functions
1583
   in each dimension follow.  */
1584
 
1585
static conflict_function *
1586
conflict_fn (unsigned n, ...)
1587
{
1588
  unsigned i;
1589
  conflict_function *ret = XCNEW (conflict_function);
1590
  va_list ap;
1591
 
1592
  gcc_assert (0 < n && n <= MAX_DIM);
1593
  va_start(ap, n);
1594
 
1595
  ret->n = n;
1596
  for (i = 0; i < n; i++)
1597
    ret->fns[i] = va_arg (ap, affine_fn);
1598
  va_end(ap);
1599
 
1600
  return ret;
1601
}
1602
 
1603
/* Returns constant affine function with value CST.  */
1604
 
1605
static affine_fn
1606
affine_fn_cst (tree cst)
1607
{
1608
  affine_fn fn = VEC_alloc (tree, heap, 1);
1609
  VEC_quick_push (tree, fn, cst);
1610
  return fn;
1611
}
1612
 
1613
/* Returns affine function with single variable, CST + COEF * x_DIM.  */
1614
 
1615
static affine_fn
1616
affine_fn_univar (tree cst, unsigned dim, tree coef)
1617
{
1618
  affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1619
  unsigned i;
1620
 
1621
  gcc_assert (dim > 0);
1622
  VEC_quick_push (tree, fn, cst);
1623
  for (i = 1; i < dim; i++)
1624
    VEC_quick_push (tree, fn, integer_zero_node);
1625
  VEC_quick_push (tree, fn, coef);
1626
  return fn;
1627
}
1628
 
1629
/* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
1630
   *OVERLAPS_B are initialized to the functions that describe the
1631
   relation between the elements accessed twice by CHREC_A and
1632
   CHREC_B.  For k >= 0, the following property is verified:
1633
 
1634
   CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
1635
 
1636
static void
1637
analyze_ziv_subscript (tree chrec_a,
1638
                       tree chrec_b,
1639
                       conflict_function **overlaps_a,
1640
                       conflict_function **overlaps_b,
1641
                       tree *last_conflicts)
1642
{
1643
  tree type, difference;
1644
  dependence_stats.num_ziv++;
1645
 
1646
  if (dump_file && (dump_flags & TDF_DETAILS))
1647
    fprintf (dump_file, "(analyze_ziv_subscript \n");
1648
 
1649
  type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1650
  chrec_a = chrec_convert (type, chrec_a, NULL);
1651
  chrec_b = chrec_convert (type, chrec_b, NULL);
1652
  difference = chrec_fold_minus (type, chrec_a, chrec_b);
1653
 
1654
  switch (TREE_CODE (difference))
1655
    {
1656
    case INTEGER_CST:
1657
      if (integer_zerop (difference))
1658
        {
1659
          /* The difference is equal to zero: the accessed index
1660
             overlaps for each iteration in the loop.  */
1661
          *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1662
          *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1663
          *last_conflicts = chrec_dont_know;
1664
          dependence_stats.num_ziv_dependent++;
1665
        }
1666
      else
1667
        {
1668
          /* The accesses do not overlap.  */
1669
          *overlaps_a = conflict_fn_no_dependence ();
1670
          *overlaps_b = conflict_fn_no_dependence ();
1671
          *last_conflicts = integer_zero_node;
1672
          dependence_stats.num_ziv_independent++;
1673
        }
1674
      break;
1675
 
1676
    default:
1677
      /* We're not sure whether the indexes overlap.  For the moment,
1678
         conservatively answer "don't know".  */
1679
      if (dump_file && (dump_flags & TDF_DETAILS))
1680
        fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1681
 
1682
      *overlaps_a = conflict_fn_not_known ();
1683
      *overlaps_b = conflict_fn_not_known ();
1684
      *last_conflicts = chrec_dont_know;
1685
      dependence_stats.num_ziv_unimplemented++;
1686
      break;
1687
    }
1688
 
1689
  if (dump_file && (dump_flags & TDF_DETAILS))
1690
    fprintf (dump_file, ")\n");
1691
}
1692
 
1693
/* Similar to max_stmt_executions_int, but returns the bound as a tree,
1694
   and only if it fits to the int type.  If this is not the case, or the
1695
   bound  on the number of iterations of LOOP could not be derived, returns
1696
   chrec_dont_know.  */
1697
 
1698
static tree
1699
max_stmt_executions_tree (struct loop *loop)
1700
{
1701
  double_int nit;
1702
 
1703
  if (!max_stmt_executions (loop, true, &nit))
1704
    return chrec_dont_know;
1705
 
1706
  if (!double_int_fits_to_tree_p (unsigned_type_node, nit))
1707
    return chrec_dont_know;
1708
 
1709
  return double_int_to_tree (unsigned_type_node, nit);
1710
}
1711
 
1712
/* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1713
   constant, and CHREC_B is an affine function.  *OVERLAPS_A and
1714
   *OVERLAPS_B are initialized to the functions that describe the
1715
   relation between the elements accessed twice by CHREC_A and
1716
   CHREC_B.  For k >= 0, the following property is verified:
1717
 
1718
   CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
1719
 
1720
static void
1721
analyze_siv_subscript_cst_affine (tree chrec_a,
1722
                                  tree chrec_b,
1723
                                  conflict_function **overlaps_a,
1724
                                  conflict_function **overlaps_b,
1725
                                  tree *last_conflicts)
1726
{
1727
  bool value0, value1, value2;
1728
  tree type, difference, tmp;
1729
 
1730
  type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1731
  chrec_a = chrec_convert (type, chrec_a, NULL);
1732
  chrec_b = chrec_convert (type, chrec_b, NULL);
1733
  difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1734
 
1735
  if (!chrec_is_positive (initial_condition (difference), &value0))
1736
    {
1737
      if (dump_file && (dump_flags & TDF_DETAILS))
1738
        fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1739
 
1740
      dependence_stats.num_siv_unimplemented++;
1741
      *overlaps_a = conflict_fn_not_known ();
1742
      *overlaps_b = conflict_fn_not_known ();
1743
      *last_conflicts = chrec_dont_know;
1744
      return;
1745
    }
1746
  else
1747
    {
1748
      if (value0 == false)
1749
        {
1750
          if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1751
            {
1752
              if (dump_file && (dump_flags & TDF_DETAILS))
1753
                fprintf (dump_file, "siv test failed: chrec not positive.\n");
1754
 
1755
              *overlaps_a = conflict_fn_not_known ();
1756
              *overlaps_b = conflict_fn_not_known ();
1757
              *last_conflicts = chrec_dont_know;
1758
              dependence_stats.num_siv_unimplemented++;
1759
              return;
1760
            }
1761
          else
1762
            {
1763
              if (value1 == true)
1764
                {
1765
                  /* Example:
1766
                     chrec_a = 12
1767
                     chrec_b = {10, +, 1}
1768
                  */
1769
 
1770
                  if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1771
                    {
1772
                      HOST_WIDE_INT numiter;
1773
                      struct loop *loop = get_chrec_loop (chrec_b);
1774
 
1775
                      *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1776
                      tmp = fold_build2 (EXACT_DIV_EXPR, type,
1777
                                         fold_build1 (ABS_EXPR, type, difference),
1778
                                         CHREC_RIGHT (chrec_b));
1779
                      *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1780
                      *last_conflicts = integer_one_node;
1781
 
1782
 
1783
                      /* Perform weak-zero siv test to see if overlap is
1784
                         outside the loop bounds.  */
1785
                      numiter = max_stmt_executions_int (loop, true);
1786
 
1787
                      if (numiter >= 0
1788
                          && compare_tree_int (tmp, numiter) > 0)
1789
                        {
1790
                          free_conflict_function (*overlaps_a);
1791
                          free_conflict_function (*overlaps_b);
1792
                          *overlaps_a = conflict_fn_no_dependence ();
1793
                          *overlaps_b = conflict_fn_no_dependence ();
1794
                          *last_conflicts = integer_zero_node;
1795
                          dependence_stats.num_siv_independent++;
1796
                          return;
1797
                        }
1798
                      dependence_stats.num_siv_dependent++;
1799
                      return;
1800
                    }
1801
 
1802
                  /* When the step does not divide the difference, there are
1803
                     no overlaps.  */
1804
                  else
1805
                    {
1806
                      *overlaps_a = conflict_fn_no_dependence ();
1807
                      *overlaps_b = conflict_fn_no_dependence ();
1808
                      *last_conflicts = integer_zero_node;
1809
                      dependence_stats.num_siv_independent++;
1810
                      return;
1811
                    }
1812
                }
1813
 
1814
              else
1815
                {
1816
                  /* Example:
1817
                     chrec_a = 12
1818
                     chrec_b = {10, +, -1}
1819
 
1820
                     In this case, chrec_a will not overlap with chrec_b.  */
1821
                  *overlaps_a = conflict_fn_no_dependence ();
1822
                  *overlaps_b = conflict_fn_no_dependence ();
1823
                  *last_conflicts = integer_zero_node;
1824
                  dependence_stats.num_siv_independent++;
1825
                  return;
1826
                }
1827
            }
1828
        }
1829
      else
1830
        {
1831
          if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1832
            {
1833
              if (dump_file && (dump_flags & TDF_DETAILS))
1834
                fprintf (dump_file, "siv test failed: chrec not positive.\n");
1835
 
1836
              *overlaps_a = conflict_fn_not_known ();
1837
              *overlaps_b = conflict_fn_not_known ();
1838
              *last_conflicts = chrec_dont_know;
1839
              dependence_stats.num_siv_unimplemented++;
1840
              return;
1841
            }
1842
          else
1843
            {
1844
              if (value2 == false)
1845
                {
1846
                  /* Example:
1847
                     chrec_a = 3
1848
                     chrec_b = {10, +, -1}
1849
                  */
1850
                  if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1851
                    {
1852
                      HOST_WIDE_INT numiter;
1853
                      struct loop *loop = get_chrec_loop (chrec_b);
1854
 
1855
                      *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1856
                      tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1857
                                         CHREC_RIGHT (chrec_b));
1858
                      *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1859
                      *last_conflicts = integer_one_node;
1860
 
1861
                      /* Perform weak-zero siv test to see if overlap is
1862
                         outside the loop bounds.  */
1863
                      numiter = max_stmt_executions_int (loop, true);
1864
 
1865
                      if (numiter >= 0
1866
                          && compare_tree_int (tmp, numiter) > 0)
1867
                        {
1868
                          free_conflict_function (*overlaps_a);
1869
                          free_conflict_function (*overlaps_b);
1870
                          *overlaps_a = conflict_fn_no_dependence ();
1871
                          *overlaps_b = conflict_fn_no_dependence ();
1872
                          *last_conflicts = integer_zero_node;
1873
                          dependence_stats.num_siv_independent++;
1874
                          return;
1875
                        }
1876
                      dependence_stats.num_siv_dependent++;
1877
                      return;
1878
                    }
1879
 
1880
                  /* When the step does not divide the difference, there
1881
                     are no overlaps.  */
1882
                  else
1883
                    {
1884
                      *overlaps_a = conflict_fn_no_dependence ();
1885
                      *overlaps_b = conflict_fn_no_dependence ();
1886
                      *last_conflicts = integer_zero_node;
1887
                      dependence_stats.num_siv_independent++;
1888
                      return;
1889
                    }
1890
                }
1891
              else
1892
                {
1893
                  /* Example:
1894
                     chrec_a = 3
1895
                     chrec_b = {4, +, 1}
1896
 
1897
                     In this case, chrec_a will not overlap with chrec_b.  */
1898
                  *overlaps_a = conflict_fn_no_dependence ();
1899
                  *overlaps_b = conflict_fn_no_dependence ();
1900
                  *last_conflicts = integer_zero_node;
1901
                  dependence_stats.num_siv_independent++;
1902
                  return;
1903
                }
1904
            }
1905
        }
1906
    }
1907
}
1908
 
1909
/* Helper recursive function for initializing the matrix A.  Returns
1910
   the initial value of CHREC.  */
1911
 
1912
static tree
1913
initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1914
{
1915
  gcc_assert (chrec);
1916
 
1917
  switch (TREE_CODE (chrec))
1918
    {
1919
    case POLYNOMIAL_CHREC:
1920
      gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1921
 
1922
      A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1923
      return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1924
 
1925
    case PLUS_EXPR:
1926
    case MULT_EXPR:
1927
    case MINUS_EXPR:
1928
      {
1929
        tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1930
        tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1931
 
1932
        return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1933
      }
1934
 
1935
    case NOP_EXPR:
1936
      {
1937
        tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1938
        return chrec_convert (chrec_type (chrec), op, NULL);
1939
      }
1940
 
1941
    case BIT_NOT_EXPR:
1942
      {
1943
        /* Handle ~X as -1 - X.  */
1944
        tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1945
        return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1946
                              build_int_cst (TREE_TYPE (chrec), -1), op);
1947
      }
1948
 
1949
    case INTEGER_CST:
1950
      return chrec;
1951
 
1952
    default:
1953
      gcc_unreachable ();
1954
      return NULL_TREE;
1955
    }
1956
}
1957
 
1958
#define FLOOR_DIV(x,y) ((x) / (y))
1959
 
1960
/* Solves the special case of the Diophantine equation:
1961
   | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1962
 
1963
   Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
1964
   number of iterations that loops X and Y run.  The overlaps will be
1965
   constructed as evolutions in dimension DIM.  */
1966
 
1967
static void
1968
compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1969
                                         affine_fn *overlaps_a,
1970
                                         affine_fn *overlaps_b,
1971
                                         tree *last_conflicts, int dim)
1972
{
1973
  if (((step_a > 0 && step_b > 0)
1974
       || (step_a < 0 && step_b < 0)))
1975
    {
1976
      int step_overlaps_a, step_overlaps_b;
1977
      int gcd_steps_a_b, last_conflict, tau2;
1978
 
1979
      gcd_steps_a_b = gcd (step_a, step_b);
1980
      step_overlaps_a = step_b / gcd_steps_a_b;
1981
      step_overlaps_b = step_a / gcd_steps_a_b;
1982
 
1983
      if (niter > 0)
1984
        {
1985
          tau2 = FLOOR_DIV (niter, step_overlaps_a);
1986
          tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1987
          last_conflict = tau2;
1988
          *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1989
        }
1990
      else
1991
        *last_conflicts = chrec_dont_know;
1992
 
1993
      *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1994
                                      build_int_cst (NULL_TREE,
1995
                                                     step_overlaps_a));
1996
      *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1997
                                      build_int_cst (NULL_TREE,
1998
                                                     step_overlaps_b));
1999
    }
2000
 
2001
  else
2002
    {
2003
      *overlaps_a = affine_fn_cst (integer_zero_node);
2004
      *overlaps_b = affine_fn_cst (integer_zero_node);
2005
      *last_conflicts = integer_zero_node;
2006
    }
2007
}
2008
 
2009
/* Solves the special case of a Diophantine equation where CHREC_A is
2010
   an affine bivariate function, and CHREC_B is an affine univariate
2011
   function.  For example,
2012
 
2013
   | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2014
 
2015
   has the following overlapping functions:
2016
 
2017
   | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2018
   | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2019
   | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2020
 
2021
   FORNOW: This is a specialized implementation for a case occurring in
2022
   a common benchmark.  Implement the general algorithm.  */
2023
 
2024
static void
2025
compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2026
                                      conflict_function **overlaps_a,
2027
                                      conflict_function **overlaps_b,
2028
                                      tree *last_conflicts)
2029
{
2030
  bool xz_p, yz_p, xyz_p;
2031
  int step_x, step_y, step_z;
2032
  HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2033
  affine_fn overlaps_a_xz, overlaps_b_xz;
2034
  affine_fn overlaps_a_yz, overlaps_b_yz;
2035
  affine_fn overlaps_a_xyz, overlaps_b_xyz;
2036
  affine_fn ova1, ova2, ovb;
2037
  tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2038
 
2039
  step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2040
  step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2041
  step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2042
 
2043
  niter_x =
2044
    max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)), true);
2045
  niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2046
  niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2047
 
2048
  if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2049
    {
2050
      if (dump_file && (dump_flags & TDF_DETAILS))
2051
        fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2052
 
2053
      *overlaps_a = conflict_fn_not_known ();
2054
      *overlaps_b = conflict_fn_not_known ();
2055
      *last_conflicts = chrec_dont_know;
2056
      return;
2057
    }
2058
 
2059
  niter = MIN (niter_x, niter_z);
2060
  compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2061
                                           &overlaps_a_xz,
2062
                                           &overlaps_b_xz,
2063
                                           &last_conflicts_xz, 1);
2064
  niter = MIN (niter_y, niter_z);
2065
  compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2066
                                           &overlaps_a_yz,
2067
                                           &overlaps_b_yz,
2068
                                           &last_conflicts_yz, 2);
2069
  niter = MIN (niter_x, niter_z);
2070
  niter = MIN (niter_y, niter);
2071
  compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2072
                                           &overlaps_a_xyz,
2073
                                           &overlaps_b_xyz,
2074
                                           &last_conflicts_xyz, 3);
2075
 
2076
  xz_p = !integer_zerop (last_conflicts_xz);
2077
  yz_p = !integer_zerop (last_conflicts_yz);
2078
  xyz_p = !integer_zerop (last_conflicts_xyz);
2079
 
2080
  if (xz_p || yz_p || xyz_p)
2081
    {
2082
      ova1 = affine_fn_cst (integer_zero_node);
2083
      ova2 = affine_fn_cst (integer_zero_node);
2084
      ovb = affine_fn_cst (integer_zero_node);
2085
      if (xz_p)
2086
        {
2087
          affine_fn t0 = ova1;
2088
          affine_fn t2 = ovb;
2089
 
2090
          ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2091
          ovb = affine_fn_plus (ovb, overlaps_b_xz);
2092
          affine_fn_free (t0);
2093
          affine_fn_free (t2);
2094
          *last_conflicts = last_conflicts_xz;
2095
        }
2096
      if (yz_p)
2097
        {
2098
          affine_fn t0 = ova2;
2099
          affine_fn t2 = ovb;
2100
 
2101
          ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2102
          ovb = affine_fn_plus (ovb, overlaps_b_yz);
2103
          affine_fn_free (t0);
2104
          affine_fn_free (t2);
2105
          *last_conflicts = last_conflicts_yz;
2106
        }
2107
      if (xyz_p)
2108
        {
2109
          affine_fn t0 = ova1;
2110
          affine_fn t2 = ova2;
2111
          affine_fn t4 = ovb;
2112
 
2113
          ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2114
          ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2115
          ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2116
          affine_fn_free (t0);
2117
          affine_fn_free (t2);
2118
          affine_fn_free (t4);
2119
          *last_conflicts = last_conflicts_xyz;
2120
        }
2121
      *overlaps_a = conflict_fn (2, ova1, ova2);
2122
      *overlaps_b = conflict_fn (1, ovb);
2123
    }
2124
  else
2125
    {
2126
      *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2127
      *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2128
      *last_conflicts = integer_zero_node;
2129
    }
2130
 
2131
  affine_fn_free (overlaps_a_xz);
2132
  affine_fn_free (overlaps_b_xz);
2133
  affine_fn_free (overlaps_a_yz);
2134
  affine_fn_free (overlaps_b_yz);
2135
  affine_fn_free (overlaps_a_xyz);
2136
  affine_fn_free (overlaps_b_xyz);
2137
}
2138
 
2139
/* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
2140
 
2141
static void
2142
lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2143
                    int size)
2144
{
2145
  memcpy (vec2, vec1, size * sizeof (*vec1));
2146
}
2147
 
2148
/* Copy the elements of M x N matrix MAT1 to MAT2.  */
2149
 
2150
static void
2151
lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2152
                    int m, int n)
2153
{
2154
  int i;
2155
 
2156
  for (i = 0; i < m; i++)
2157
    lambda_vector_copy (mat1[i], mat2[i], n);
2158
}
2159
 
2160
/* Store the N x N identity matrix in MAT.  */
2161
 
2162
static void
2163
lambda_matrix_id (lambda_matrix mat, int size)
2164
{
2165
  int i, j;
2166
 
2167
  for (i = 0; i < size; i++)
2168
    for (j = 0; j < size; j++)
2169
      mat[i][j] = (i == j) ? 1 : 0;
2170
}
2171
 
2172
/* Return the first nonzero element of vector VEC1 between START and N.
2173
   We must have START <= N.   Returns N if VEC1 is the zero vector.  */
2174
 
2175
static int
2176
lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2177
{
2178
  int j = start;
2179
  while (j < n && vec1[j] == 0)
2180
    j++;
2181
  return j;
2182
}
2183
 
2184
/* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2185
   R2 = R2 + CONST1 * R1.  */
2186
 
2187
static void
2188
lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2189
{
2190
  int i;
2191
 
2192
  if (const1 == 0)
2193
    return;
2194
 
2195
  for (i = 0; i < n; i++)
2196
    mat[r2][i] += const1 * mat[r1][i];
2197
}
2198
 
2199
/* Swap rows R1 and R2 in matrix MAT.  */
2200
 
2201
static void
2202
lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2203
{
2204
  lambda_vector row;
2205
 
2206
  row = mat[r1];
2207
  mat[r1] = mat[r2];
2208
  mat[r2] = row;
2209
}
2210
 
2211
/* Multiply vector VEC1 of length SIZE by a constant CONST1,
2212
   and store the result in VEC2.  */
2213
 
2214
static void
2215
lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2216
                          int size, int const1)
2217
{
2218
  int i;
2219
 
2220
  if (const1 == 0)
2221
    lambda_vector_clear (vec2, size);
2222
  else
2223
    for (i = 0; i < size; i++)
2224
      vec2[i] = const1 * vec1[i];
2225
}
2226
 
2227
/* Negate vector VEC1 with length SIZE and store it in VEC2.  */
2228
 
2229
static void
2230
lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2231
                      int size)
2232
{
2233
  lambda_vector_mult_const (vec1, vec2, size, -1);
2234
}
2235
 
2236
/* Negate row R1 of matrix MAT which has N columns.  */
2237
 
2238
static void
2239
lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2240
{
2241
  lambda_vector_negate (mat[r1], mat[r1], n);
2242
}
2243
 
2244
/* Return true if two vectors are equal.  */
2245
 
2246
static bool
2247
lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2248
{
2249
  int i;
2250
  for (i = 0; i < size; i++)
2251
    if (vec1[i] != vec2[i])
2252
      return false;
2253
  return true;
2254
}
2255
 
2256
/* Given an M x N integer matrix A, this function determines an M x
2257
   M unimodular matrix U, and an M x N echelon matrix S such that
2258
   "U.A = S".  This decomposition is also known as "right Hermite".
2259
 
2260
   Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2261
   Restructuring Compilers" Utpal Banerjee.  */
2262
 
2263
static void
2264
lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2265
                             lambda_matrix S, lambda_matrix U)
2266
{
2267
  int i, j, i0 = 0;
2268
 
2269
  lambda_matrix_copy (A, S, m, n);
2270
  lambda_matrix_id (U, m);
2271
 
2272
  for (j = 0; j < n; j++)
2273
    {
2274
      if (lambda_vector_first_nz (S[j], m, i0) < m)
2275
        {
2276
          ++i0;
2277
          for (i = m - 1; i >= i0; i--)
2278
            {
2279
              while (S[i][j] != 0)
2280
                {
2281
                  int sigma, factor, a, b;
2282
 
2283
                  a = S[i-1][j];
2284
                  b = S[i][j];
2285
                  sigma = (a * b < 0) ? -1: 1;
2286
                  a = abs (a);
2287
                  b = abs (b);
2288
                  factor = sigma * (a / b);
2289
 
2290
                  lambda_matrix_row_add (S, n, i, i-1, -factor);
2291
                  lambda_matrix_row_exchange (S, i, i-1);
2292
 
2293
                  lambda_matrix_row_add (U, m, i, i-1, -factor);
2294
                  lambda_matrix_row_exchange (U, i, i-1);
2295
                }
2296
            }
2297
        }
2298
    }
2299
}
2300
 
2301
/* Determines the overlapping elements due to accesses CHREC_A and
2302
   CHREC_B, that are affine functions.  This function cannot handle
2303
   symbolic evolution functions, ie. when initial conditions are
2304
   parameters, because it uses lambda matrices of integers.  */
2305
 
2306
static void
2307
analyze_subscript_affine_affine (tree chrec_a,
2308
                                 tree chrec_b,
2309
                                 conflict_function **overlaps_a,
2310
                                 conflict_function **overlaps_b,
2311
                                 tree *last_conflicts)
2312
{
2313
  unsigned nb_vars_a, nb_vars_b, dim;
2314
  HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2315
  lambda_matrix A, U, S;
2316
  struct obstack scratch_obstack;
2317
 
2318
  if (eq_evolutions_p (chrec_a, chrec_b))
2319
    {
2320
      /* The accessed index overlaps for each iteration in the
2321
         loop.  */
2322
      *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2323
      *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2324
      *last_conflicts = chrec_dont_know;
2325
      return;
2326
    }
2327
  if (dump_file && (dump_flags & TDF_DETAILS))
2328
    fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2329
 
2330
  /* For determining the initial intersection, we have to solve a
2331
     Diophantine equation.  This is the most time consuming part.
2332
 
2333
     For answering to the question: "Is there a dependence?" we have
2334
     to prove that there exists a solution to the Diophantine
2335
     equation, and that the solution is in the iteration domain,
2336
     i.e. the solution is positive or zero, and that the solution
2337
     happens before the upper bound loop.nb_iterations.  Otherwise
2338
     there is no dependence.  This function outputs a description of
2339
     the iterations that hold the intersections.  */
2340
 
2341
  nb_vars_a = nb_vars_in_chrec (chrec_a);
2342
  nb_vars_b = nb_vars_in_chrec (chrec_b);
2343
 
2344
  gcc_obstack_init (&scratch_obstack);
2345
 
2346
  dim = nb_vars_a + nb_vars_b;
2347
  U = lambda_matrix_new (dim, dim, &scratch_obstack);
2348
  A = lambda_matrix_new (dim, 1, &scratch_obstack);
2349
  S = lambda_matrix_new (dim, 1, &scratch_obstack);
2350
 
2351
  init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2352
  init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2353
  gamma = init_b - init_a;
2354
 
2355
  /* Don't do all the hard work of solving the Diophantine equation
2356
     when we already know the solution: for example,
2357
     | {3, +, 1}_1
2358
     | {3, +, 4}_2
2359
     | gamma = 3 - 3 = 0.
2360
     Then the first overlap occurs during the first iterations:
2361
     | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2362
  */
2363
  if (gamma == 0)
2364
    {
2365
      if (nb_vars_a == 1 && nb_vars_b == 1)
2366
        {
2367
          HOST_WIDE_INT step_a, step_b;
2368
          HOST_WIDE_INT niter, niter_a, niter_b;
2369
          affine_fn ova, ovb;
2370
 
2371
          niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2372
          niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2373
          niter = MIN (niter_a, niter_b);
2374
          step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2375
          step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2376
 
2377
          compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2378
                                                   &ova, &ovb,
2379
                                                   last_conflicts, 1);
2380
          *overlaps_a = conflict_fn (1, ova);
2381
          *overlaps_b = conflict_fn (1, ovb);
2382
        }
2383
 
2384
      else if (nb_vars_a == 2 && nb_vars_b == 1)
2385
        compute_overlap_steps_for_affine_1_2
2386
          (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2387
 
2388
      else if (nb_vars_a == 1 && nb_vars_b == 2)
2389
        compute_overlap_steps_for_affine_1_2
2390
          (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2391
 
2392
      else
2393
        {
2394
          if (dump_file && (dump_flags & TDF_DETAILS))
2395
            fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2396
          *overlaps_a = conflict_fn_not_known ();
2397
          *overlaps_b = conflict_fn_not_known ();
2398
          *last_conflicts = chrec_dont_know;
2399
        }
2400
      goto end_analyze_subs_aa;
2401
    }
2402
 
2403
  /* U.A = S */
2404
  lambda_matrix_right_hermite (A, dim, 1, S, U);
2405
 
2406
  if (S[0][0] < 0)
2407
    {
2408
      S[0][0] *= -1;
2409
      lambda_matrix_row_negate (U, dim, 0);
2410
    }
2411
  gcd_alpha_beta = S[0][0];
2412
 
2413
  /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2414
     but that is a quite strange case.  Instead of ICEing, answer
2415
     don't know.  */
2416
  if (gcd_alpha_beta == 0)
2417
    {
2418
      *overlaps_a = conflict_fn_not_known ();
2419
      *overlaps_b = conflict_fn_not_known ();
2420
      *last_conflicts = chrec_dont_know;
2421
      goto end_analyze_subs_aa;
2422
    }
2423
 
2424
  /* The classic "gcd-test".  */
2425
  if (!int_divides_p (gcd_alpha_beta, gamma))
2426
    {
2427
      /* The "gcd-test" has determined that there is no integer
2428
         solution, i.e. there is no dependence.  */
2429
      *overlaps_a = conflict_fn_no_dependence ();
2430
      *overlaps_b = conflict_fn_no_dependence ();
2431
      *last_conflicts = integer_zero_node;
2432
    }
2433
 
2434
  /* Both access functions are univariate.  This includes SIV and MIV cases.  */
2435
  else if (nb_vars_a == 1 && nb_vars_b == 1)
2436
    {
2437
      /* Both functions should have the same evolution sign.  */
2438
      if (((A[0][0] > 0 && -A[1][0] > 0)
2439
           || (A[0][0] < 0 && -A[1][0] < 0)))
2440
        {
2441
          /* The solutions are given by:
2442
             |
2443
             | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
2444
             |                           [u21 u22]    [y0]
2445
 
2446
             For a given integer t.  Using the following variables,
2447
 
2448
             | i0 = u11 * gamma / gcd_alpha_beta
2449
             | j0 = u12 * gamma / gcd_alpha_beta
2450
             | i1 = u21
2451
             | j1 = u22
2452
 
2453
             the solutions are:
2454
 
2455
             | x0 = i0 + i1 * t,
2456
             | y0 = j0 + j1 * t.  */
2457
          HOST_WIDE_INT i0, j0, i1, j1;
2458
 
2459
          i0 = U[0][0] * gamma / gcd_alpha_beta;
2460
          j0 = U[0][1] * gamma / gcd_alpha_beta;
2461
          i1 = U[1][0];
2462
          j1 = U[1][1];
2463
 
2464
          if ((i1 == 0 && i0 < 0)
2465
              || (j1 == 0 && j0 < 0))
2466
            {
2467
              /* There is no solution.
2468
                 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2469
                 falls in here, but for the moment we don't look at the
2470
                 upper bound of the iteration domain.  */
2471
              *overlaps_a = conflict_fn_no_dependence ();
2472
              *overlaps_b = conflict_fn_no_dependence ();
2473
              *last_conflicts = integer_zero_node;
2474
              goto end_analyze_subs_aa;
2475
            }
2476
 
2477
          if (i1 > 0 && j1 > 0)
2478
            {
2479
              HOST_WIDE_INT niter_a = max_stmt_executions_int
2480
                (get_chrec_loop (chrec_a), true);
2481
              HOST_WIDE_INT niter_b = max_stmt_executions_int
2482
                (get_chrec_loop (chrec_b), true);
2483
              HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2484
 
2485
              /* (X0, Y0) is a solution of the Diophantine equation:
2486
                 "chrec_a (X0) = chrec_b (Y0)".  */
2487
              HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2488
                                        CEIL (-j0, j1));
2489
              HOST_WIDE_INT x0 = i1 * tau1 + i0;
2490
              HOST_WIDE_INT y0 = j1 * tau1 + j0;
2491
 
2492
              /* (X1, Y1) is the smallest positive solution of the eq
2493
                 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2494
                 first conflict occurs.  */
2495
              HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2496
              HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2497
              HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2498
 
2499
              if (niter > 0)
2500
                {
2501
                  HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2502
                                            FLOOR_DIV (niter - j0, j1));
2503
                  HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2504
 
2505
                  /* If the overlap occurs outside of the bounds of the
2506
                     loop, there is no dependence.  */
2507
                  if (x1 >= niter || y1 >= niter)
2508
                    {
2509
                      *overlaps_a = conflict_fn_no_dependence ();
2510
                      *overlaps_b = conflict_fn_no_dependence ();
2511
                      *last_conflicts = integer_zero_node;
2512
                      goto end_analyze_subs_aa;
2513
                    }
2514
                  else
2515
                    *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2516
                }
2517
              else
2518
                *last_conflicts = chrec_dont_know;
2519
 
2520
              *overlaps_a
2521
                = conflict_fn (1,
2522
                               affine_fn_univar (build_int_cst (NULL_TREE, x1),
2523
                                                 1,
2524
                                                 build_int_cst (NULL_TREE, i1)));
2525
              *overlaps_b
2526
                = conflict_fn (1,
2527
                               affine_fn_univar (build_int_cst (NULL_TREE, y1),
2528
                                                 1,
2529
                                                 build_int_cst (NULL_TREE, j1)));
2530
            }
2531
          else
2532
            {
2533
              /* FIXME: For the moment, the upper bound of the
2534
                 iteration domain for i and j is not checked.  */
2535
              if (dump_file && (dump_flags & TDF_DETAILS))
2536
                fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2537
              *overlaps_a = conflict_fn_not_known ();
2538
              *overlaps_b = conflict_fn_not_known ();
2539
              *last_conflicts = chrec_dont_know;
2540
            }
2541
        }
2542
      else
2543
        {
2544
          if (dump_file && (dump_flags & TDF_DETAILS))
2545
            fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2546
          *overlaps_a = conflict_fn_not_known ();
2547
          *overlaps_b = conflict_fn_not_known ();
2548
          *last_conflicts = chrec_dont_know;
2549
        }
2550
    }
2551
  else
2552
    {
2553
      if (dump_file && (dump_flags & TDF_DETAILS))
2554
        fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2555
      *overlaps_a = conflict_fn_not_known ();
2556
      *overlaps_b = conflict_fn_not_known ();
2557
      *last_conflicts = chrec_dont_know;
2558
    }
2559
 
2560
end_analyze_subs_aa:
2561
  obstack_free (&scratch_obstack, NULL);
2562
  if (dump_file && (dump_flags & TDF_DETAILS))
2563
    {
2564
      fprintf (dump_file, "  (overlaps_a = ");
2565
      dump_conflict_function (dump_file, *overlaps_a);
2566
      fprintf (dump_file, ")\n  (overlaps_b = ");
2567
      dump_conflict_function (dump_file, *overlaps_b);
2568
      fprintf (dump_file, ")\n");
2569
      fprintf (dump_file, ")\n");
2570
    }
2571
}
2572
 
2573
/* Returns true when analyze_subscript_affine_affine can be used for
2574
   determining the dependence relation between chrec_a and chrec_b,
2575
   that contain symbols.  This function modifies chrec_a and chrec_b
2576
   such that the analysis result is the same, and such that they don't
2577
   contain symbols, and then can safely be passed to the analyzer.
2578
 
2579
   Example: The analysis of the following tuples of evolutions produce
2580
   the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2581
   vs. {0, +, 1}_1
2582
 
2583
   {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2584
   {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2585
*/
2586
 
2587
static bool
2588
can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2589
{
2590
  tree diff, type, left_a, left_b, right_b;
2591
 
2592
  if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2593
      || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2594
    /* FIXME: For the moment not handled.  Might be refined later.  */
2595
    return false;
2596
 
2597
  type = chrec_type (*chrec_a);
2598
  left_a = CHREC_LEFT (*chrec_a);
2599
  left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2600
  diff = chrec_fold_minus (type, left_a, left_b);
2601
 
2602
  if (!evolution_function_is_constant_p (diff))
2603
    return false;
2604
 
2605
  if (dump_file && (dump_flags & TDF_DETAILS))
2606
    fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2607
 
2608
  *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2609
                                     diff, CHREC_RIGHT (*chrec_a));
2610
  right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2611
  *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2612
                                     build_int_cst (type, 0),
2613
                                     right_b);
2614
  return true;
2615
}
2616
 
2617
/* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
2618
   *OVERLAPS_B are initialized to the functions that describe the
2619
   relation between the elements accessed twice by CHREC_A and
2620
   CHREC_B.  For k >= 0, the following property is verified:
2621
 
2622
   CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2623
 
2624
static void
2625
analyze_siv_subscript (tree chrec_a,
2626
                       tree chrec_b,
2627
                       conflict_function **overlaps_a,
2628
                       conflict_function **overlaps_b,
2629
                       tree *last_conflicts,
2630
                       int loop_nest_num)
2631
{
2632
  dependence_stats.num_siv++;
2633
 
2634
  if (dump_file && (dump_flags & TDF_DETAILS))
2635
    fprintf (dump_file, "(analyze_siv_subscript \n");
2636
 
2637
  if (evolution_function_is_constant_p (chrec_a)
2638
      && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2639
    analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2640
                                      overlaps_a, overlaps_b, last_conflicts);
2641
 
2642
  else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2643
           && evolution_function_is_constant_p (chrec_b))
2644
    analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2645
                                      overlaps_b, overlaps_a, last_conflicts);
2646
 
2647
  else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2648
           && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2649
    {
2650
      if (!chrec_contains_symbols (chrec_a)
2651
          && !chrec_contains_symbols (chrec_b))
2652
        {
2653
          analyze_subscript_affine_affine (chrec_a, chrec_b,
2654
                                           overlaps_a, overlaps_b,
2655
                                           last_conflicts);
2656
 
2657
          if (CF_NOT_KNOWN_P (*overlaps_a)
2658
              || CF_NOT_KNOWN_P (*overlaps_b))
2659
            dependence_stats.num_siv_unimplemented++;
2660
          else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2661
                   || CF_NO_DEPENDENCE_P (*overlaps_b))
2662
            dependence_stats.num_siv_independent++;
2663
          else
2664
            dependence_stats.num_siv_dependent++;
2665
        }
2666
      else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2667
                                                        &chrec_b))
2668
        {
2669
          analyze_subscript_affine_affine (chrec_a, chrec_b,
2670
                                           overlaps_a, overlaps_b,
2671
                                           last_conflicts);
2672
 
2673
          if (CF_NOT_KNOWN_P (*overlaps_a)
2674
              || CF_NOT_KNOWN_P (*overlaps_b))
2675
            dependence_stats.num_siv_unimplemented++;
2676
          else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2677
                   || CF_NO_DEPENDENCE_P (*overlaps_b))
2678
            dependence_stats.num_siv_independent++;
2679
          else
2680
            dependence_stats.num_siv_dependent++;
2681
        }
2682
      else
2683
        goto siv_subscript_dontknow;
2684
    }
2685
 
2686
  else
2687
    {
2688
    siv_subscript_dontknow:;
2689
      if (dump_file && (dump_flags & TDF_DETAILS))
2690
        fprintf (dump_file, "siv test failed: unimplemented.\n");
2691
      *overlaps_a = conflict_fn_not_known ();
2692
      *overlaps_b = conflict_fn_not_known ();
2693
      *last_conflicts = chrec_dont_know;
2694
      dependence_stats.num_siv_unimplemented++;
2695
    }
2696
 
2697
  if (dump_file && (dump_flags & TDF_DETAILS))
2698
    fprintf (dump_file, ")\n");
2699
}
2700
 
2701
/* Returns false if we can prove that the greatest common divisor of the steps
2702
   of CHREC does not divide CST, false otherwise.  */
2703
 
2704
static bool
2705
gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2706
{
2707
  HOST_WIDE_INT cd = 0, val;
2708
  tree step;
2709
 
2710
  if (!host_integerp (cst, 0))
2711
    return true;
2712
  val = tree_low_cst (cst, 0);
2713
 
2714
  while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2715
    {
2716
      step = CHREC_RIGHT (chrec);
2717
      if (!host_integerp (step, 0))
2718
        return true;
2719
      cd = gcd (cd, tree_low_cst (step, 0));
2720
      chrec = CHREC_LEFT (chrec);
2721
    }
2722
 
2723
  return val % cd == 0;
2724
}
2725
 
2726
/* Analyze a MIV (Multiple Index Variable) subscript with respect to
2727
   LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
2728
   functions that describe the relation between the elements accessed
2729
   twice by CHREC_A and CHREC_B.  For k >= 0, the following property
2730
   is verified:
2731
 
2732
   CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2733
 
2734
static void
2735
analyze_miv_subscript (tree chrec_a,
2736
                       tree chrec_b,
2737
                       conflict_function **overlaps_a,
2738
                       conflict_function **overlaps_b,
2739
                       tree *last_conflicts,
2740
                       struct loop *loop_nest)
2741
{
2742
  tree type, difference;
2743
 
2744
  dependence_stats.num_miv++;
2745
  if (dump_file && (dump_flags & TDF_DETAILS))
2746
    fprintf (dump_file, "(analyze_miv_subscript \n");
2747
 
2748
  type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2749
  chrec_a = chrec_convert (type, chrec_a, NULL);
2750
  chrec_b = chrec_convert (type, chrec_b, NULL);
2751
  difference = chrec_fold_minus (type, chrec_a, chrec_b);
2752
 
2753
  if (eq_evolutions_p (chrec_a, chrec_b))
2754
    {
2755
      /* Access functions are the same: all the elements are accessed
2756
         in the same order.  */
2757
      *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2758
      *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2759
      *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2760
      dependence_stats.num_miv_dependent++;
2761
    }
2762
 
2763
  else if (evolution_function_is_constant_p (difference)
2764
           /* For the moment, the following is verified:
2765
              evolution_function_is_affine_multivariate_p (chrec_a,
2766
              loop_nest->num) */
2767
           && !gcd_of_steps_may_divide_p (chrec_a, difference))
2768
    {
2769
      /* testsuite/.../ssa-chrec-33.c
2770
         {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
2771
 
2772
         The difference is 1, and all the evolution steps are multiples
2773
         of 2, consequently there are no overlapping elements.  */
2774
      *overlaps_a = conflict_fn_no_dependence ();
2775
      *overlaps_b = conflict_fn_no_dependence ();
2776
      *last_conflicts = integer_zero_node;
2777
      dependence_stats.num_miv_independent++;
2778
    }
2779
 
2780
  else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2781
           && !chrec_contains_symbols (chrec_a)
2782
           && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2783
           && !chrec_contains_symbols (chrec_b))
2784
    {
2785
      /* testsuite/.../ssa-chrec-35.c
2786
         {0, +, 1}_2  vs.  {0, +, 1}_3
2787
         the overlapping elements are respectively located at iterations:
2788
         {0, +, 1}_x and {0, +, 1}_x,
2789
         in other words, we have the equality:
2790
         {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2791
 
2792
         Other examples:
2793
         {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2794
         {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2795
 
2796
         {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2797
         {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2798
      */
2799
      analyze_subscript_affine_affine (chrec_a, chrec_b,
2800
                                       overlaps_a, overlaps_b, last_conflicts);
2801
 
2802
      if (CF_NOT_KNOWN_P (*overlaps_a)
2803
          || CF_NOT_KNOWN_P (*overlaps_b))
2804
        dependence_stats.num_miv_unimplemented++;
2805
      else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2806
               || CF_NO_DEPENDENCE_P (*overlaps_b))
2807
        dependence_stats.num_miv_independent++;
2808
      else
2809
        dependence_stats.num_miv_dependent++;
2810
    }
2811
 
2812
  else
2813
    {
2814
      /* When the analysis is too difficult, answer "don't know".  */
2815
      if (dump_file && (dump_flags & TDF_DETAILS))
2816
        fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2817
 
2818
      *overlaps_a = conflict_fn_not_known ();
2819
      *overlaps_b = conflict_fn_not_known ();
2820
      *last_conflicts = chrec_dont_know;
2821
      dependence_stats.num_miv_unimplemented++;
2822
    }
2823
 
2824
  if (dump_file && (dump_flags & TDF_DETAILS))
2825
    fprintf (dump_file, ")\n");
2826
}
2827
 
2828
/* Determines the iterations for which CHREC_A is equal to CHREC_B in
2829
   with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
2830
   OVERLAP_ITERATIONS_B are initialized with two functions that
2831
   describe the iterations that contain conflicting elements.
2832
 
2833
   Remark: For an integer k >= 0, the following equality is true:
2834
 
2835
   CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2836
*/
2837
 
2838
static void
2839
analyze_overlapping_iterations (tree chrec_a,
2840
                                tree chrec_b,
2841
                                conflict_function **overlap_iterations_a,
2842
                                conflict_function **overlap_iterations_b,
2843
                                tree *last_conflicts, struct loop *loop_nest)
2844
{
2845
  unsigned int lnn = loop_nest->num;
2846
 
2847
  dependence_stats.num_subscript_tests++;
2848
 
2849
  if (dump_file && (dump_flags & TDF_DETAILS))
2850
    {
2851
      fprintf (dump_file, "(analyze_overlapping_iterations \n");
2852
      fprintf (dump_file, "  (chrec_a = ");
2853
      print_generic_expr (dump_file, chrec_a, 0);
2854
      fprintf (dump_file, ")\n  (chrec_b = ");
2855
      print_generic_expr (dump_file, chrec_b, 0);
2856
      fprintf (dump_file, ")\n");
2857
    }
2858
 
2859
  if (chrec_a == NULL_TREE
2860
      || chrec_b == NULL_TREE
2861
      || chrec_contains_undetermined (chrec_a)
2862
      || chrec_contains_undetermined (chrec_b))
2863
    {
2864
      dependence_stats.num_subscript_undetermined++;
2865
 
2866
      *overlap_iterations_a = conflict_fn_not_known ();
2867
      *overlap_iterations_b = conflict_fn_not_known ();
2868
    }
2869
 
2870
  /* If they are the same chrec, and are affine, they overlap
2871
     on every iteration.  */
2872
  else if (eq_evolutions_p (chrec_a, chrec_b)
2873
           && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2874
               || operand_equal_p (chrec_a, chrec_b, 0)))
2875
    {
2876
      dependence_stats.num_same_subscript_function++;
2877
      *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2878
      *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2879
      *last_conflicts = chrec_dont_know;
2880
    }
2881
 
2882
  /* If they aren't the same, and aren't affine, we can't do anything
2883
     yet.  */
2884
  else if ((chrec_contains_symbols (chrec_a)
2885
            || chrec_contains_symbols (chrec_b))
2886
           && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2887
               || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2888
    {
2889
      dependence_stats.num_subscript_undetermined++;
2890
      *overlap_iterations_a = conflict_fn_not_known ();
2891
      *overlap_iterations_b = conflict_fn_not_known ();
2892
    }
2893
 
2894
  else if (ziv_subscript_p (chrec_a, chrec_b))
2895
    analyze_ziv_subscript (chrec_a, chrec_b,
2896
                           overlap_iterations_a, overlap_iterations_b,
2897
                           last_conflicts);
2898
 
2899
  else if (siv_subscript_p (chrec_a, chrec_b))
2900
    analyze_siv_subscript (chrec_a, chrec_b,
2901
                           overlap_iterations_a, overlap_iterations_b,
2902
                           last_conflicts, lnn);
2903
 
2904
  else
2905
    analyze_miv_subscript (chrec_a, chrec_b,
2906
                           overlap_iterations_a, overlap_iterations_b,
2907
                           last_conflicts, loop_nest);
2908
 
2909
  if (dump_file && (dump_flags & TDF_DETAILS))
2910
    {
2911
      fprintf (dump_file, "  (overlap_iterations_a = ");
2912
      dump_conflict_function (dump_file, *overlap_iterations_a);
2913
      fprintf (dump_file, ")\n  (overlap_iterations_b = ");
2914
      dump_conflict_function (dump_file, *overlap_iterations_b);
2915
      fprintf (dump_file, ")\n");
2916
      fprintf (dump_file, ")\n");
2917
    }
2918
}
2919
 
2920
/* Helper function for uniquely inserting distance vectors.  */
2921
 
2922
static void
2923
save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2924
{
2925
  unsigned i;
2926
  lambda_vector v;
2927
 
2928
  FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, v)
2929
    if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2930
      return;
2931
 
2932
  VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2933
}
2934
 
2935
/* Helper function for uniquely inserting direction vectors.  */
2936
 
2937
static void
2938
save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2939
{
2940
  unsigned i;
2941
  lambda_vector v;
2942
 
2943
  FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), i, v)
2944
    if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2945
      return;
2946
 
2947
  VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2948
}
2949
 
2950
/* Add a distance of 1 on all the loops outer than INDEX.  If we
2951
   haven't yet determined a distance for this outer loop, push a new
2952
   distance vector composed of the previous distance, and a distance
2953
   of 1 for this outer loop.  Example:
2954
 
2955
   | loop_1
2956
   |   loop_2
2957
   |     A[10]
2958
   |   endloop_2
2959
   | endloop_1
2960
 
2961
   Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
2962
   save (0, 1), then we have to save (1, 0).  */
2963
 
2964
static void
2965
add_outer_distances (struct data_dependence_relation *ddr,
2966
                     lambda_vector dist_v, int index)
2967
{
2968
  /* For each outer loop where init_v is not set, the accesses are
2969
     in dependence of distance 1 in the loop.  */
2970
  while (--index >= 0)
2971
    {
2972
      lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2973
      lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2974
      save_v[index] = 1;
2975
      save_dist_v (ddr, save_v);
2976
    }
2977
}
2978
 
2979
/* Return false when fail to represent the data dependence as a
2980
   distance vector.  INIT_B is set to true when a component has been
2981
   added to the distance vector DIST_V.  INDEX_CARRY is then set to
2982
   the index in DIST_V that carries the dependence.  */
2983
 
2984
static bool
2985
build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2986
                             struct data_reference *ddr_a,
2987
                             struct data_reference *ddr_b,
2988
                             lambda_vector dist_v, bool *init_b,
2989
                             int *index_carry)
2990
{
2991
  unsigned i;
2992
  lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2993
 
2994
  for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2995
    {
2996
      tree access_fn_a, access_fn_b;
2997
      struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2998
 
2999
      if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3000
        {
3001
          non_affine_dependence_relation (ddr);
3002
          return false;
3003
        }
3004
 
3005
      access_fn_a = DR_ACCESS_FN (ddr_a, i);
3006
      access_fn_b = DR_ACCESS_FN (ddr_b, i);
3007
 
3008
      if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
3009
          && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
3010
        {
3011
          int dist, index;
3012
          int var_a = CHREC_VARIABLE (access_fn_a);
3013
          int var_b = CHREC_VARIABLE (access_fn_b);
3014
 
3015
          if (var_a != var_b
3016
              || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3017
            {
3018
              non_affine_dependence_relation (ddr);
3019
              return false;
3020
            }
3021
 
3022
          dist = int_cst_value (SUB_DISTANCE (subscript));
3023
          index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
3024
          *index_carry = MIN (index, *index_carry);
3025
 
3026
          /* This is the subscript coupling test.  If we have already
3027
             recorded a distance for this loop (a distance coming from
3028
             another subscript), it should be the same.  For example,
3029
             in the following code, there is no dependence:
3030
 
3031
             | loop i = 0, N, 1
3032
             |   T[i+1][i] = ...
3033
             |   ... = T[i][i]
3034
             | endloop
3035
          */
3036
          if (init_v[index] != 0 && dist_v[index] != dist)
3037
            {
3038
              finalize_ddr_dependent (ddr, chrec_known);
3039
              return false;
3040
            }
3041
 
3042
          dist_v[index] = dist;
3043
          init_v[index] = 1;
3044
          *init_b = true;
3045
        }
3046
      else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3047
        {
3048
          /* This can be for example an affine vs. constant dependence
3049
             (T[i] vs. T[3]) that is not an affine dependence and is
3050
             not representable as a distance vector.  */
3051
          non_affine_dependence_relation (ddr);
3052
          return false;
3053
        }
3054
    }
3055
 
3056
  return true;
3057
}
3058
 
3059
/* Return true when the DDR contains only constant access functions.  */
3060
 
3061
static bool
3062
constant_access_functions (const struct data_dependence_relation *ddr)
3063
{
3064
  unsigned i;
3065
 
3066
  for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3067
    if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3068
        || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3069
      return false;
3070
 
3071
  return true;
3072
}
3073
 
3074
/* Helper function for the case where DDR_A and DDR_B are the same
3075
   multivariate access function with a constant step.  For an example
3076
   see pr34635-1.c.  */
3077
 
3078
static void
3079
add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3080
{
3081
  int x_1, x_2;
3082
  tree c_1 = CHREC_LEFT (c_2);
3083
  tree c_0 = CHREC_LEFT (c_1);
3084
  lambda_vector dist_v;
3085
  int v1, v2, cd;
3086
 
3087
  /* Polynomials with more than 2 variables are not handled yet.  When
3088
     the evolution steps are parameters, it is not possible to
3089
     represent the dependence using classical distance vectors.  */
3090
  if (TREE_CODE (c_0) != INTEGER_CST
3091
      || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3092
      || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3093
    {
3094
      DDR_AFFINE_P (ddr) = false;
3095
      return;
3096
    }
3097
 
3098
  x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3099
  x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3100
 
3101
  /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
3102
  dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3103
  v1 = int_cst_value (CHREC_RIGHT (c_1));
3104
  v2 = int_cst_value (CHREC_RIGHT (c_2));
3105
  cd = gcd (v1, v2);
3106
  v1 /= cd;
3107
  v2 /= cd;
3108
 
3109
  if (v2 < 0)
3110
    {
3111
      v2 = -v2;
3112
      v1 = -v1;
3113
    }
3114
 
3115
  dist_v[x_1] = v2;
3116
  dist_v[x_2] = -v1;
3117
  save_dist_v (ddr, dist_v);
3118
 
3119
  add_outer_distances (ddr, dist_v, x_1);
3120
}
3121
 
3122
/* Helper function for the case where DDR_A and DDR_B are the same
3123
   access functions.  */
3124
 
3125
static void
3126
add_other_self_distances (struct data_dependence_relation *ddr)
3127
{
3128
  lambda_vector dist_v;
3129
  unsigned i;
3130
  int index_carry = DDR_NB_LOOPS (ddr);
3131
 
3132
  for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3133
    {
3134
      tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3135
 
3136
      if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3137
        {
3138
          if (!evolution_function_is_univariate_p (access_fun))
3139
            {
3140
              if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3141
                {
3142
                  DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3143
                  return;
3144
                }
3145
 
3146
              access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3147
 
3148
              if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3149
                add_multivariate_self_dist (ddr, access_fun);
3150
              else
3151
                /* The evolution step is not constant: it varies in
3152
                   the outer loop, so this cannot be represented by a
3153
                   distance vector.  For example in pr34635.c the
3154
                   evolution is {0, +, {0, +, 4}_1}_2.  */
3155
                DDR_AFFINE_P (ddr) = false;
3156
 
3157
              return;
3158
            }
3159
 
3160
          index_carry = MIN (index_carry,
3161
                             index_in_loop_nest (CHREC_VARIABLE (access_fun),
3162
                                                 DDR_LOOP_NEST (ddr)));
3163
        }
3164
    }
3165
 
3166
  dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3167
  add_outer_distances (ddr, dist_v, index_carry);
3168
}
3169
 
3170
static void
3171
insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3172
{
3173
  lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3174
 
3175
  dist_v[DDR_INNER_LOOP (ddr)] = 1;
3176
  save_dist_v (ddr, dist_v);
3177
}
3178
 
3179
/* Adds a unit distance vector to DDR when there is a 0 overlap.  This
3180
   is the case for example when access functions are the same and
3181
   equal to a constant, as in:
3182
 
3183
   | loop_1
3184
   |   A[3] = ...
3185
   |   ... = A[3]
3186
   | endloop_1
3187
 
3188
   in which case the distance vectors are (0) and (1).  */
3189
 
3190
static void
3191
add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3192
{
3193
  unsigned i, j;
3194
 
3195
  for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3196
    {
3197
      subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3198
      conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3199
      conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3200
 
3201
      for (j = 0; j < ca->n; j++)
3202
        if (affine_function_zero_p (ca->fns[j]))
3203
          {
3204
            insert_innermost_unit_dist_vector (ddr);
3205
            return;
3206
          }
3207
 
3208
      for (j = 0; j < cb->n; j++)
3209
        if (affine_function_zero_p (cb->fns[j]))
3210
          {
3211
            insert_innermost_unit_dist_vector (ddr);
3212
            return;
3213
          }
3214
    }
3215
}
3216
 
3217
/* Compute the classic per loop distance vector.  DDR is the data
3218
   dependence relation to build a vector from.  Return false when fail
3219
   to represent the data dependence as a distance vector.  */
3220
 
3221
static bool
3222
build_classic_dist_vector (struct data_dependence_relation *ddr,
3223
                           struct loop *loop_nest)
3224
{
3225
  bool init_b = false;
3226
  int index_carry = DDR_NB_LOOPS (ddr);
3227
  lambda_vector dist_v;
3228
 
3229
  if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3230
    return false;
3231
 
3232
  if (same_access_functions (ddr))
3233
    {
3234
      /* Save the 0 vector.  */
3235
      dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3236
      save_dist_v (ddr, dist_v);
3237
 
3238
      if (constant_access_functions (ddr))
3239
        add_distance_for_zero_overlaps (ddr);
3240
 
3241
      if (DDR_NB_LOOPS (ddr) > 1)
3242
        add_other_self_distances (ddr);
3243
 
3244
      return true;
3245
    }
3246
 
3247
  dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3248
  if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3249
                                    dist_v, &init_b, &index_carry))
3250
    return false;
3251
 
3252
  /* Save the distance vector if we initialized one.  */
3253
  if (init_b)
3254
    {
3255
      /* Verify a basic constraint: classic distance vectors should
3256
         always be lexicographically positive.
3257
 
3258
         Data references are collected in the order of execution of
3259
         the program, thus for the following loop
3260
 
3261
         | for (i = 1; i < 100; i++)
3262
         |   for (j = 1; j < 100; j++)
3263
         |     {
3264
         |       t = T[j+1][i-1];  // A
3265
         |       T[j][i] = t + 2;  // B
3266
         |     }
3267
 
3268
         references are collected following the direction of the wind:
3269
         A then B.  The data dependence tests are performed also
3270
         following this order, such that we're looking at the distance
3271
         separating the elements accessed by A from the elements later
3272
         accessed by B.  But in this example, the distance returned by
3273
         test_dep (A, B) is lexicographically negative (-1, 1), that
3274
         means that the access A occurs later than B with respect to
3275
         the outer loop, ie. we're actually looking upwind.  In this
3276
         case we solve test_dep (B, A) looking downwind to the
3277
         lexicographically positive solution, that returns the
3278
         distance vector (1, -1).  */
3279
      if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3280
        {
3281
          lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3282
          if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3283
                                              loop_nest))
3284
            return false;
3285
          compute_subscript_distance (ddr);
3286
          if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3287
                                            save_v, &init_b, &index_carry))
3288
            return false;
3289
          save_dist_v (ddr, save_v);
3290
          DDR_REVERSED_P (ddr) = true;
3291
 
3292
          /* In this case there is a dependence forward for all the
3293
             outer loops:
3294
 
3295
             | for (k = 1; k < 100; k++)
3296
             |  for (i = 1; i < 100; i++)
3297
             |   for (j = 1; j < 100; j++)
3298
             |     {
3299
             |       t = T[j+1][i-1];  // A
3300
             |       T[j][i] = t + 2;  // B
3301
             |     }
3302
 
3303
             the vectors are:
3304
             (0,  1, -1)
3305
             (1,  1, -1)
3306
             (1, -1,  1)
3307
          */
3308
          if (DDR_NB_LOOPS (ddr) > 1)
3309
            {
3310
              add_outer_distances (ddr, save_v, index_carry);
3311
              add_outer_distances (ddr, dist_v, index_carry);
3312
            }
3313
        }
3314
      else
3315
        {
3316
          lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3317
          lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3318
 
3319
          if (DDR_NB_LOOPS (ddr) > 1)
3320
            {
3321
              lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3322
 
3323
              if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3324
                                                  DDR_A (ddr), loop_nest))
3325
                return false;
3326
              compute_subscript_distance (ddr);
3327
              if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3328
                                                opposite_v, &init_b,
3329
                                                &index_carry))
3330
                return false;
3331
 
3332
              save_dist_v (ddr, save_v);
3333
              add_outer_distances (ddr, dist_v, index_carry);
3334
              add_outer_distances (ddr, opposite_v, index_carry);
3335
            }
3336
          else
3337
            save_dist_v (ddr, save_v);
3338
        }
3339
    }
3340
  else
3341
    {
3342
      /* There is a distance of 1 on all the outer loops: Example:
3343
         there is a dependence of distance 1 on loop_1 for the array A.
3344
 
3345
         | loop_1
3346
         |   A[5] = ...
3347
         | endloop
3348
      */
3349
      add_outer_distances (ddr, dist_v,
3350
                           lambda_vector_first_nz (dist_v,
3351
                                                   DDR_NB_LOOPS (ddr), 0));
3352
    }
3353
 
3354
  if (dump_file && (dump_flags & TDF_DETAILS))
3355
    {
3356
      unsigned i;
3357
 
3358
      fprintf (dump_file, "(build_classic_dist_vector\n");
3359
      for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3360
        {
3361
          fprintf (dump_file, "  dist_vector = (");
3362
          print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3363
                               DDR_NB_LOOPS (ddr));
3364
          fprintf (dump_file, "  )\n");
3365
        }
3366
      fprintf (dump_file, ")\n");
3367
    }
3368
 
3369
  return true;
3370
}
3371
 
3372
/* Return the direction for a given distance.
3373
   FIXME: Computing dir this way is suboptimal, since dir can catch
3374
   cases that dist is unable to represent.  */
3375
 
3376
static inline enum data_dependence_direction
3377
dir_from_dist (int dist)
3378
{
3379
  if (dist > 0)
3380
    return dir_positive;
3381
  else if (dist < 0)
3382
    return dir_negative;
3383
  else
3384
    return dir_equal;
3385
}
3386
 
3387
/* Compute the classic per loop direction vector.  DDR is the data
3388
   dependence relation to build a vector from.  */
3389
 
3390
static void
3391
build_classic_dir_vector (struct data_dependence_relation *ddr)
3392
{
3393
  unsigned i, j;
3394
  lambda_vector dist_v;
3395
 
3396
  FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
3397
    {
3398
      lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3399
 
3400
      for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3401
        dir_v[j] = dir_from_dist (dist_v[j]);
3402
 
3403
      save_dir_v (ddr, dir_v);
3404
    }
3405
}
3406
 
3407
/* Helper function.  Returns true when there is a dependence between
3408
   data references DRA and DRB.  */
3409
 
3410
static bool
3411
subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3412
                               struct data_reference *dra,
3413
                               struct data_reference *drb,
3414
                               struct loop *loop_nest)
3415
{
3416
  unsigned int i;
3417
  tree last_conflicts;
3418
  struct subscript *subscript;
3419
 
3420
  for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3421
       i++)
3422
    {
3423
      conflict_function *overlaps_a, *overlaps_b;
3424
 
3425
      analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3426
                                      DR_ACCESS_FN (drb, i),
3427
                                      &overlaps_a, &overlaps_b,
3428
                                      &last_conflicts, loop_nest);
3429
 
3430
      if (CF_NOT_KNOWN_P (overlaps_a)
3431
          || CF_NOT_KNOWN_P (overlaps_b))
3432
        {
3433
          finalize_ddr_dependent (ddr, chrec_dont_know);
3434
          dependence_stats.num_dependence_undetermined++;
3435
          free_conflict_function (overlaps_a);
3436
          free_conflict_function (overlaps_b);
3437
          return false;
3438
        }
3439
 
3440
      else if (CF_NO_DEPENDENCE_P (overlaps_a)
3441
               || CF_NO_DEPENDENCE_P (overlaps_b))
3442
        {
3443
          finalize_ddr_dependent (ddr, chrec_known);
3444
          dependence_stats.num_dependence_independent++;
3445
          free_conflict_function (overlaps_a);
3446
          free_conflict_function (overlaps_b);
3447
          return false;
3448
        }
3449
 
3450
      else
3451
        {
3452
          if (SUB_CONFLICTS_IN_A (subscript))
3453
            free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3454
          if (SUB_CONFLICTS_IN_B (subscript))
3455
            free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3456
 
3457
          SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3458
          SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3459
          SUB_LAST_CONFLICT (subscript) = last_conflicts;
3460
        }
3461
    }
3462
 
3463
  return true;
3464
}
3465
 
3466
/* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
3467
 
3468
static void
3469
subscript_dependence_tester (struct data_dependence_relation *ddr,
3470
                             struct loop *loop_nest)
3471
{
3472
 
3473
  if (dump_file && (dump_flags & TDF_DETAILS))
3474
    fprintf (dump_file, "(subscript_dependence_tester \n");
3475
 
3476
  if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3477
    dependence_stats.num_dependence_dependent++;
3478
 
3479
  compute_subscript_distance (ddr);
3480
  if (build_classic_dist_vector (ddr, loop_nest))
3481
    build_classic_dir_vector (ddr);
3482
 
3483
  if (dump_file && (dump_flags & TDF_DETAILS))
3484
    fprintf (dump_file, ")\n");
3485
}
3486
 
3487
/* Returns true when all the access functions of A are affine or
3488
   constant with respect to LOOP_NEST.  */
3489
 
3490
static bool
3491
access_functions_are_affine_or_constant_p (const struct data_reference *a,
3492
                                           const struct loop *loop_nest)
3493
{
3494
  unsigned int i;
3495
  VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3496
  tree t;
3497
 
3498
  FOR_EACH_VEC_ELT (tree, fns, i, t)
3499
    if (!evolution_function_is_invariant_p (t, loop_nest->num)
3500
        && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3501
      return false;
3502
 
3503
  return true;
3504
}
3505
 
3506
/* Initializes an equation for an OMEGA problem using the information
3507
   contained in the ACCESS_FUN.  Returns true when the operation
3508
   succeeded.
3509
 
3510
   PB is the omega constraint system.
3511
   EQ is the number of the equation to be initialized.
3512
   OFFSET is used for shifting the variables names in the constraints:
3513
   a constrain is composed of 2 * the number of variables surrounding
3514
   dependence accesses.  OFFSET is set either to 0 for the first n variables,
3515
   then it is set to n.
3516
   ACCESS_FUN is expected to be an affine chrec.  */
3517
 
3518
static bool
3519
init_omega_eq_with_af (omega_pb pb, unsigned eq,
3520
                       unsigned int offset, tree access_fun,
3521
                       struct data_dependence_relation *ddr)
3522
{
3523
  switch (TREE_CODE (access_fun))
3524
    {
3525
    case POLYNOMIAL_CHREC:
3526
      {
3527
        tree left = CHREC_LEFT (access_fun);
3528
        tree right = CHREC_RIGHT (access_fun);
3529
        int var = CHREC_VARIABLE (access_fun);
3530
        unsigned var_idx;
3531
 
3532
        if (TREE_CODE (right) != INTEGER_CST)
3533
          return false;
3534
 
3535
        var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3536
        pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3537
 
3538
        /* Compute the innermost loop index.  */
3539
        DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3540
 
3541
        if (offset == 0)
3542
          pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3543
            += int_cst_value (right);
3544
 
3545
        switch (TREE_CODE (left))
3546
          {
3547
          case POLYNOMIAL_CHREC:
3548
            return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3549
 
3550
          case INTEGER_CST:
3551
            pb->eqs[eq].coef[0] += int_cst_value (left);
3552
            return true;
3553
 
3554
          default:
3555
            return false;
3556
          }
3557
      }
3558
 
3559
    case INTEGER_CST:
3560
      pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3561
      return true;
3562
 
3563
    default:
3564
      return false;
3565
    }
3566
}
3567
 
3568
/* As explained in the comments preceding init_omega_for_ddr, we have
3569
   to set up a system for each loop level, setting outer loops
3570
   variation to zero, and current loop variation to positive or zero.
3571
   Save each lexico positive distance vector.  */
3572
 
3573
static void
3574
omega_extract_distance_vectors (omega_pb pb,
3575
                                struct data_dependence_relation *ddr)
3576
{
3577
  int eq, geq;
3578
  unsigned i, j;
3579
  struct loop *loopi, *loopj;
3580
  enum omega_result res;
3581
 
3582
  /* Set a new problem for each loop in the nest.  The basis is the
3583
     problem that we have initialized until now.  On top of this we
3584
     add new constraints.  */
3585
  for (i = 0; i <= DDR_INNER_LOOP (ddr)
3586
         && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3587
    {
3588
      int dist = 0;
3589
      omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3590
                                           DDR_NB_LOOPS (ddr));
3591
 
3592
      omega_copy_problem (copy, pb);
3593
 
3594
      /* For all the outer loops "loop_j", add "dj = 0".  */
3595
      for (j = 0;
3596
           j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3597
        {
3598
          eq = omega_add_zero_eq (copy, omega_black);
3599
          copy->eqs[eq].coef[j + 1] = 1;
3600
        }
3601
 
3602
      /* For "loop_i", add "0 <= di".  */
3603
      geq = omega_add_zero_geq (copy, omega_black);
3604
      copy->geqs[geq].coef[i + 1] = 1;
3605
 
3606
      /* Reduce the constraint system, and test that the current
3607
         problem is feasible.  */
3608
      res = omega_simplify_problem (copy);
3609
      if (res == omega_false
3610
          || res == omega_unknown
3611
          || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3612
        goto next_problem;
3613
 
3614
      for (eq = 0; eq < copy->num_subs; eq++)
3615
        if (copy->subs[eq].key == (int) i + 1)
3616
          {
3617
            dist = copy->subs[eq].coef[0];
3618
            goto found_dist;
3619
          }
3620
 
3621
      if (dist == 0)
3622
        {
3623
          /* Reinitialize problem...  */
3624
          omega_copy_problem (copy, pb);
3625
          for (j = 0;
3626
               j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3627
            {
3628
              eq = omega_add_zero_eq (copy, omega_black);
3629
              copy->eqs[eq].coef[j + 1] = 1;
3630
            }
3631
 
3632
          /* ..., but this time "di = 1".  */
3633
          eq = omega_add_zero_eq (copy, omega_black);
3634
          copy->eqs[eq].coef[i + 1] = 1;
3635
          copy->eqs[eq].coef[0] = -1;
3636
 
3637
          res = omega_simplify_problem (copy);
3638
          if (res == omega_false
3639
              || res == omega_unknown
3640
              || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3641
            goto next_problem;
3642
 
3643
          for (eq = 0; eq < copy->num_subs; eq++)
3644
            if (copy->subs[eq].key == (int) i + 1)
3645
              {
3646
                dist = copy->subs[eq].coef[0];
3647
                goto found_dist;
3648
              }
3649
        }
3650
 
3651
    found_dist:;
3652
      /* Save the lexicographically positive distance vector.  */
3653
      if (dist >= 0)
3654
        {
3655
          lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3656
          lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3657
 
3658
          dist_v[i] = dist;
3659
 
3660
          for (eq = 0; eq < copy->num_subs; eq++)
3661
            if (copy->subs[eq].key > 0)
3662
              {
3663
                dist = copy->subs[eq].coef[0];
3664
                dist_v[copy->subs[eq].key - 1] = dist;
3665
              }
3666
 
3667
          for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3668
            dir_v[j] = dir_from_dist (dist_v[j]);
3669
 
3670
          save_dist_v (ddr, dist_v);
3671
          save_dir_v (ddr, dir_v);
3672
        }
3673
 
3674
    next_problem:;
3675
      omega_free_problem (copy);
3676
    }
3677
}
3678
 
3679
/* This is called for each subscript of a tuple of data references:
3680
   insert an equality for representing the conflicts.  */
3681
 
3682
static bool
3683
omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3684
                       struct data_dependence_relation *ddr,
3685
                       omega_pb pb, bool *maybe_dependent)
3686
{
3687
  int eq;
3688
  tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3689
                                     TREE_TYPE (access_fun_b));
3690
  tree fun_a = chrec_convert (type, access_fun_a, NULL);
3691
  tree fun_b = chrec_convert (type, access_fun_b, NULL);
3692
  tree difference = chrec_fold_minus (type, fun_a, fun_b);
3693
  tree minus_one;
3694
 
3695
  /* When the fun_a - fun_b is not constant, the dependence is not
3696
     captured by the classic distance vector representation.  */
3697
  if (TREE_CODE (difference) != INTEGER_CST)
3698
    return false;
3699
 
3700
  /* ZIV test.  */
3701
  if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3702
    {
3703
      /* There is no dependence.  */
3704
      *maybe_dependent = false;
3705
      return true;
3706
    }
3707
 
3708
  minus_one = build_int_cst (type, -1);
3709
  fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3710
 
3711
  eq = omega_add_zero_eq (pb, omega_black);
3712
  if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3713
      || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3714
    /* There is probably a dependence, but the system of
3715
       constraints cannot be built: answer "don't know".  */
3716
    return false;
3717
 
3718
  /* GCD test.  */
3719
  if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3720
      && !int_divides_p (lambda_vector_gcd
3721
                         ((lambda_vector) &(pb->eqs[eq].coef[1]),
3722
                          2 * DDR_NB_LOOPS (ddr)),
3723
                         pb->eqs[eq].coef[0]))
3724
    {
3725
      /* There is no dependence.  */
3726
      *maybe_dependent = false;
3727
      return true;
3728
    }
3729
 
3730
  return true;
3731
}
3732
 
3733
/* Helper function, same as init_omega_for_ddr but specialized for
3734
   data references A and B.  */
3735
 
3736
static bool
3737
init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3738
                      struct data_dependence_relation *ddr,
3739
                      omega_pb pb, bool *maybe_dependent)
3740
{
3741
  unsigned i;
3742
  int ineq;
3743
  struct loop *loopi;
3744
  unsigned nb_loops = DDR_NB_LOOPS (ddr);
3745
 
3746
  /* Insert an equality per subscript.  */
3747
  for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3748
    {
3749
      if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3750
                                  ddr, pb, maybe_dependent))
3751
        return false;
3752
      else if (*maybe_dependent == false)
3753
        {
3754
          /* There is no dependence.  */
3755
          DDR_ARE_DEPENDENT (ddr) = chrec_known;
3756
          return true;
3757
        }
3758
    }
3759
 
3760
  /* Insert inequalities: constraints corresponding to the iteration
3761
     domain, i.e. the loops surrounding the references "loop_x" and
3762
     the distance variables "dx".  The layout of the OMEGA
3763
     representation is as follows:
3764
     - coef[0] is the constant
3765
     - coef[1..nb_loops] are the protected variables that will not be
3766
     removed by the solver: the "dx"
3767
     - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3768
  */
3769
  for (i = 0; i <= DDR_INNER_LOOP (ddr)
3770
         && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3771
    {
3772
      HOST_WIDE_INT nbi = max_stmt_executions_int (loopi, true);
3773
 
3774
      /* 0 <= loop_x */
3775
      ineq = omega_add_zero_geq (pb, omega_black);
3776
      pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3777
 
3778
      /* 0 <= loop_x + dx */
3779
      ineq = omega_add_zero_geq (pb, omega_black);
3780
      pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3781
      pb->geqs[ineq].coef[i + 1] = 1;
3782
 
3783
      if (nbi != -1)
3784
        {
3785
          /* loop_x <= nb_iters */
3786
          ineq = omega_add_zero_geq (pb, omega_black);
3787
          pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3788
          pb->geqs[ineq].coef[0] = nbi;
3789
 
3790
          /* loop_x + dx <= nb_iters */
3791
          ineq = omega_add_zero_geq (pb, omega_black);
3792
          pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3793
          pb->geqs[ineq].coef[i + 1] = -1;
3794
          pb->geqs[ineq].coef[0] = nbi;
3795
 
3796
          /* A step "dx" bigger than nb_iters is not feasible, so
3797
             add "0 <= nb_iters + dx",  */
3798
          ineq = omega_add_zero_geq (pb, omega_black);
3799
          pb->geqs[ineq].coef[i + 1] = 1;
3800
          pb->geqs[ineq].coef[0] = nbi;
3801
          /* and "dx <= nb_iters".  */
3802
          ineq = omega_add_zero_geq (pb, omega_black);
3803
          pb->geqs[ineq].coef[i + 1] = -1;
3804
          pb->geqs[ineq].coef[0] = nbi;
3805
        }
3806
    }
3807
 
3808
  omega_extract_distance_vectors (pb, ddr);
3809
 
3810
  return true;
3811
}
3812
 
3813
/* Sets up the Omega dependence problem for the data dependence
3814
   relation DDR.  Returns false when the constraint system cannot be
3815
   built, ie. when the test answers "don't know".  Returns true
3816
   otherwise, and when independence has been proved (using one of the
3817
   trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3818
   set MAYBE_DEPENDENT to true.
3819
 
3820
   Example: for setting up the dependence system corresponding to the
3821
   conflicting accesses
3822
 
3823
   | loop_i
3824
   |   loop_j
3825
   |     A[i, i+1] = ...
3826
   |     ... A[2*j, 2*(i + j)]
3827
   |   endloop_j
3828
   | endloop_i
3829
 
3830
   the following constraints come from the iteration domain:
3831
 
3832
 
3833
 
3834
 
3835
 
3836
 
3837
   where di, dj are the distance variables.  The constraints
3838
   representing the conflicting elements are:
3839
 
3840
   i = 2 * (j + dj)
3841
   i + 1 = 2 * (i + di + j + dj)
3842
 
3843
   For asking that the resulting distance vector (di, dj) be
3844
   lexicographically positive, we insert the constraint "di >= 0".  If
3845
   "di = 0" in the solution, we fix that component to zero, and we
3846
   look at the inner loops: we set a new problem where all the outer
3847
   loop distances are zero, and fix this inner component to be
3848
   positive.  When one of the components is positive, we save that
3849
   distance, and set a new problem where the distance on this loop is
3850
   zero, searching for other distances in the inner loops.  Here is
3851
   the classic example that illustrates that we have to set for each
3852
   inner loop a new problem:
3853
 
3854
   | loop_1
3855
   |   loop_2
3856
   |     A[10]
3857
   |   endloop_2
3858
   | endloop_1
3859
 
3860
   we have to save two distances (1, 0) and (0, 1).
3861
 
3862
   Given two array references, refA and refB, we have to set the
3863
   dependence problem twice, refA vs. refB and refB vs. refA, and we
3864
   cannot do a single test, as refB might occur before refA in the
3865
   inner loops, and the contrary when considering outer loops: ex.
3866
 
3867
   | loop_0
3868
   |   loop_1
3869
   |     loop_2
3870
   |       T[{1,+,1}_2][{1,+,1}_1]  // refA
3871
   |       T[{2,+,1}_2][{0,+,1}_1]  // refB
3872
   |     endloop_2
3873
   |   endloop_1
3874
   | endloop_0
3875
 
3876
   refB touches the elements in T before refA, and thus for the same
3877
   loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3878
   but for successive loop_0 iterations, we have (1, -1, 1)
3879
 
3880
   The Omega solver expects the distance variables ("di" in the
3881
   previous example) to come first in the constraint system (as
3882
   variables to be protected, or "safe" variables), the constraint
3883
   system is built using the following layout:
3884
 
3885
   "cst | distance vars | index vars".
3886
*/
3887
 
3888
static bool
3889
init_omega_for_ddr (struct data_dependence_relation *ddr,
3890
                    bool *maybe_dependent)
3891
{
3892
  omega_pb pb;
3893
  bool res = false;
3894
 
3895
  *maybe_dependent = true;
3896
 
3897
  if (same_access_functions (ddr))
3898
    {
3899
      unsigned j;
3900
      lambda_vector dir_v;
3901
 
3902
      /* Save the 0 vector.  */
3903
      save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3904
      dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3905
      for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3906
        dir_v[j] = dir_equal;
3907
      save_dir_v (ddr, dir_v);
3908
 
3909
      /* Save the dependences carried by outer loops.  */
3910
      pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3911
      res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3912
                                  maybe_dependent);
3913
      omega_free_problem (pb);
3914
      return res;
3915
    }
3916
 
3917
  /* Omega expects the protected variables (those that have to be kept
3918
     after elimination) to appear first in the constraint system.
3919
     These variables are the distance variables.  In the following
3920
     initialization we declare NB_LOOPS safe variables, and the total
3921
     number of variables for the constraint system is 2*NB_LOOPS.  */
3922
  pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3923
  res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3924
                              maybe_dependent);
3925
  omega_free_problem (pb);
3926
 
3927
  /* Stop computation if not decidable, or no dependence.  */
3928
  if (res == false || *maybe_dependent == false)
3929
    return res;
3930
 
3931
  pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3932
  res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3933
                              maybe_dependent);
3934
  omega_free_problem (pb);
3935
 
3936
  return res;
3937
}
3938
 
3939
/* Return true when DDR contains the same information as that stored
3940
   in DIR_VECTS and in DIST_VECTS, return false otherwise.   */
3941
 
3942
static bool
3943
ddr_consistent_p (FILE *file,
3944
                  struct data_dependence_relation *ddr,
3945
                  VEC (lambda_vector, heap) *dist_vects,
3946
                  VEC (lambda_vector, heap) *dir_vects)
3947
{
3948
  unsigned int i, j;
3949
 
3950
  /* If dump_file is set, output there.  */
3951
  if (dump_file && (dump_flags & TDF_DETAILS))
3952
    file = dump_file;
3953
 
3954
  if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3955
    {
3956
      lambda_vector b_dist_v;
3957
      fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3958
               VEC_length (lambda_vector, dist_vects),
3959
               DDR_NUM_DIST_VECTS (ddr));
3960
 
3961
      fprintf (file, "Banerjee dist vectors:\n");
3962
      FOR_EACH_VEC_ELT (lambda_vector, dist_vects, i, b_dist_v)
3963
        print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3964
 
3965
      fprintf (file, "Omega dist vectors:\n");
3966
      for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3967
        print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3968
 
3969
      fprintf (file, "data dependence relation:\n");
3970
      dump_data_dependence_relation (file, ddr);
3971
 
3972
      fprintf (file, ")\n");
3973
      return false;
3974
    }
3975
 
3976
  if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3977
    {
3978
      fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3979
               VEC_length (lambda_vector, dir_vects),
3980
               DDR_NUM_DIR_VECTS (ddr));
3981
      return false;
3982
    }
3983
 
3984
  for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3985
    {
3986
      lambda_vector a_dist_v;
3987
      lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3988
 
3989
      /* Distance vectors are not ordered in the same way in the DDR
3990
         and in the DIST_VECTS: search for a matching vector.  */
3991
      FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, a_dist_v)
3992
        if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3993
          break;
3994
 
3995
      if (j == VEC_length (lambda_vector, dist_vects))
3996
        {
3997
          fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3998
          print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3999
          fprintf (file, "not found in Omega dist vectors:\n");
4000
          print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
4001
          fprintf (file, "data dependence relation:\n");
4002
          dump_data_dependence_relation (file, ddr);
4003
          fprintf (file, ")\n");
4004
        }
4005
    }
4006
 
4007
  for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
4008
    {
4009
      lambda_vector a_dir_v;
4010
      lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
4011
 
4012
      /* Direction vectors are not ordered in the same way in the DDR
4013
         and in the DIR_VECTS: search for a matching vector.  */
4014
      FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, a_dir_v)
4015
        if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
4016
          break;
4017
 
4018
      if (j == VEC_length (lambda_vector, dist_vects))
4019
        {
4020
          fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
4021
          print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
4022
          fprintf (file, "not found in Omega dir vectors:\n");
4023
          print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
4024
          fprintf (file, "data dependence relation:\n");
4025
          dump_data_dependence_relation (file, ddr);
4026
          fprintf (file, ")\n");
4027
        }
4028
    }
4029
 
4030
  return true;
4031
}
4032
 
4033
/* This computes the affine dependence relation between A and B with
4034
   respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
4035
   independence between two accesses, while CHREC_DONT_KNOW is used
4036
   for representing the unknown relation.
4037
 
4038
   Note that it is possible to stop the computation of the dependence
4039
   relation the first time we detect a CHREC_KNOWN element for a given
4040
   subscript.  */
4041
 
4042
static void
4043
compute_affine_dependence (struct data_dependence_relation *ddr,
4044
                           struct loop *loop_nest)
4045
{
4046
  struct data_reference *dra = DDR_A (ddr);
4047
  struct data_reference *drb = DDR_B (ddr);
4048
 
4049
  if (dump_file && (dump_flags & TDF_DETAILS))
4050
    {
4051
      fprintf (dump_file, "(compute_affine_dependence\n");
4052
      fprintf (dump_file, "  (stmt_a = \n");
4053
      print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
4054
      fprintf (dump_file, ")\n  (stmt_b = \n");
4055
      print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
4056
      fprintf (dump_file, ")\n");
4057
    }
4058
 
4059
  /* Analyze only when the dependence relation is not yet known.  */
4060
  if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4061
    {
4062
      dependence_stats.num_dependence_tests++;
4063
 
4064
      if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4065
          && access_functions_are_affine_or_constant_p (drb, loop_nest))
4066
        {
4067
          if (flag_check_data_deps)
4068
            {
4069
              /* Compute the dependences using the first algorithm.  */
4070
              subscript_dependence_tester (ddr, loop_nest);
4071
 
4072
              if (dump_file && (dump_flags & TDF_DETAILS))
4073
                {
4074
                  fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4075
                  dump_data_dependence_relation (dump_file, ddr);
4076
                }
4077
 
4078
              if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4079
                {
4080
                  bool maybe_dependent;
4081
                  VEC (lambda_vector, heap) *dir_vects, *dist_vects;
4082
 
4083
                  /* Save the result of the first DD analyzer.  */
4084
                  dist_vects = DDR_DIST_VECTS (ddr);
4085
                  dir_vects = DDR_DIR_VECTS (ddr);
4086
 
4087
                  /* Reset the information.  */
4088
                  DDR_DIST_VECTS (ddr) = NULL;
4089
                  DDR_DIR_VECTS (ddr) = NULL;
4090
 
4091
                  /* Compute the same information using Omega.  */
4092
                  if (!init_omega_for_ddr (ddr, &maybe_dependent))
4093
                    goto csys_dont_know;
4094
 
4095
                  if (dump_file && (dump_flags & TDF_DETAILS))
4096
                    {
4097
                      fprintf (dump_file, "Omega Analyzer\n");
4098
                      dump_data_dependence_relation (dump_file, ddr);
4099
                    }
4100
 
4101
                  /* Check that we get the same information.  */
4102
                  if (maybe_dependent)
4103
                    gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4104
                                                  dir_vects));
4105
                }
4106
            }
4107
          else
4108
            subscript_dependence_tester (ddr, loop_nest);
4109
        }
4110
 
4111
      /* As a last case, if the dependence cannot be determined, or if
4112
         the dependence is considered too difficult to determine, answer
4113
         "don't know".  */
4114
      else
4115
        {
4116
        csys_dont_know:;
4117
          dependence_stats.num_dependence_undetermined++;
4118
 
4119
          if (dump_file && (dump_flags & TDF_DETAILS))
4120
            {
4121
              fprintf (dump_file, "Data ref a:\n");
4122
              dump_data_reference (dump_file, dra);
4123
              fprintf (dump_file, "Data ref b:\n");
4124
              dump_data_reference (dump_file, drb);
4125
              fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4126
            }
4127
          finalize_ddr_dependent (ddr, chrec_dont_know);
4128
        }
4129
    }
4130
 
4131
  if (dump_file && (dump_flags & TDF_DETAILS))
4132
    fprintf (dump_file, ")\n");
4133
}
4134
 
4135
/* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4136
   the data references in DATAREFS, in the LOOP_NEST.  When
4137
   COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4138
   relations.  Return true when successful, i.e. data references number
4139
   is small enough to be handled.  */
4140
 
4141
bool
4142
compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
4143
                         VEC (ddr_p, heap) **dependence_relations,
4144
                         VEC (loop_p, heap) *loop_nest,
4145
                         bool compute_self_and_rr)
4146
{
4147
  struct data_dependence_relation *ddr;
4148
  struct data_reference *a, *b;
4149
  unsigned int i, j;
4150
 
4151
  if ((int) VEC_length (data_reference_p, datarefs)
4152
      > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4153
    {
4154
      struct data_dependence_relation *ddr;
4155
 
4156
      /* Insert a single relation into dependence_relations:
4157
         chrec_dont_know.  */
4158
      ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4159
      VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4160
      return false;
4161
    }
4162
 
4163
  FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4164
    for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
4165
      if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4166
        {
4167
          ddr = initialize_data_dependence_relation (a, b, loop_nest);
4168
          VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4169
          if (loop_nest)
4170
            compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4171
        }
4172
 
4173
  if (compute_self_and_rr)
4174
    FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4175
      {
4176
        ddr = initialize_data_dependence_relation (a, a, loop_nest);
4177
        VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4178
        if (loop_nest)
4179
          compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4180
      }
4181
 
4182
  return true;
4183
}
4184
 
4185
/* Stores the locations of memory references in STMT to REFERENCES.  Returns
4186
   true if STMT clobbers memory, false otherwise.  */
4187
 
4188
bool
4189
get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4190
{
4191
  bool clobbers_memory = false;
4192
  data_ref_loc *ref;
4193
  tree *op0, *op1;
4194
  enum gimple_code stmt_code = gimple_code (stmt);
4195
 
4196
  *references = NULL;
4197
 
4198
  /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4199
     Calls have side-effects, except those to const or pure
4200
     functions.  */
4201
  if ((stmt_code == GIMPLE_CALL
4202
       && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4203
      || (stmt_code == GIMPLE_ASM
4204
          && (gimple_asm_volatile_p (stmt) || gimple_vuse (stmt))))
4205
    clobbers_memory = true;
4206
 
4207
  if (!gimple_vuse (stmt))
4208
    return clobbers_memory;
4209
 
4210
  if (stmt_code == GIMPLE_ASSIGN)
4211
    {
4212
      tree base;
4213
      op0 = gimple_assign_lhs_ptr (stmt);
4214
      op1 = gimple_assign_rhs1_ptr (stmt);
4215
 
4216
      if (DECL_P (*op1)
4217
          || (REFERENCE_CLASS_P (*op1)
4218
              && (base = get_base_address (*op1))
4219
              && TREE_CODE (base) != SSA_NAME))
4220
        {
4221
          ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4222
          ref->pos = op1;
4223
          ref->is_read = true;
4224
        }
4225
    }
4226
  else if (stmt_code == GIMPLE_CALL)
4227
    {
4228
      unsigned i, n;
4229
 
4230
      op0 = gimple_call_lhs_ptr (stmt);
4231
      n = gimple_call_num_args (stmt);
4232
      for (i = 0; i < n; i++)
4233
        {
4234
          op1 = gimple_call_arg_ptr (stmt, i);
4235
 
4236
          if (DECL_P (*op1)
4237
              || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
4238
            {
4239
              ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4240
              ref->pos = op1;
4241
              ref->is_read = true;
4242
            }
4243
        }
4244
    }
4245
  else
4246
    return clobbers_memory;
4247
 
4248
  if (*op0
4249
      && (DECL_P (*op0)
4250
          || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0))))
4251
    {
4252
      ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4253
      ref->pos = op0;
4254
      ref->is_read = false;
4255
    }
4256
  return clobbers_memory;
4257
}
4258
 
4259
/* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
4260
   reference, returns false, otherwise returns true.  NEST is the outermost
4261
   loop of the loop nest in which the references should be analyzed.  */
4262
 
4263
bool
4264
find_data_references_in_stmt (struct loop *nest, gimple stmt,
4265
                              VEC (data_reference_p, heap) **datarefs)
4266
{
4267
  unsigned i;
4268
  VEC (data_ref_loc, heap) *references;
4269
  data_ref_loc *ref;
4270
  bool ret = true;
4271
  data_reference_p dr;
4272
 
4273
  if (get_references_in_stmt (stmt, &references))
4274
    {
4275
      VEC_free (data_ref_loc, heap, references);
4276
      return false;
4277
    }
4278
 
4279
  FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4280
    {
4281
      dr = create_data_ref (nest, loop_containing_stmt (stmt),
4282
                            *ref->pos, stmt, ref->is_read);
4283
      gcc_assert (dr != NULL);
4284
      VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4285
    }
4286
  VEC_free (data_ref_loc, heap, references);
4287
  return ret;
4288
}
4289
 
4290
/* Stores the data references in STMT to DATAREFS.  If there is an
4291
   unanalyzable reference, returns false, otherwise returns true.
4292
   NEST is the outermost loop of the loop nest in which the references
4293
   should be instantiated, LOOP is the loop in which the references
4294
   should be analyzed.  */
4295
 
4296
bool
4297
graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4298
                                       VEC (data_reference_p, heap) **datarefs)
4299
{
4300
  unsigned i;
4301
  VEC (data_ref_loc, heap) *references;
4302
  data_ref_loc *ref;
4303
  bool ret = true;
4304
  data_reference_p dr;
4305
 
4306
  if (get_references_in_stmt (stmt, &references))
4307
    {
4308
      VEC_free (data_ref_loc, heap, references);
4309
      return false;
4310
    }
4311
 
4312
  FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4313
    {
4314
      dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4315
      gcc_assert (dr != NULL);
4316
      VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4317
    }
4318
 
4319
  VEC_free (data_ref_loc, heap, references);
4320
  return ret;
4321
}
4322
 
4323
/* Search the data references in LOOP, and record the information into
4324
   DATAREFS.  Returns chrec_dont_know when failing to analyze a
4325
   difficult case, returns NULL_TREE otherwise.  */
4326
 
4327
tree
4328
find_data_references_in_bb (struct loop *loop, basic_block bb,
4329
                            VEC (data_reference_p, heap) **datarefs)
4330
{
4331
  gimple_stmt_iterator bsi;
4332
 
4333
  for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4334
    {
4335
      gimple stmt = gsi_stmt (bsi);
4336
 
4337
      if (!find_data_references_in_stmt (loop, stmt, datarefs))
4338
        {
4339
          struct data_reference *res;
4340
          res = XCNEW (struct data_reference);
4341
          VEC_safe_push (data_reference_p, heap, *datarefs, res);
4342
 
4343
          return chrec_dont_know;
4344
        }
4345
    }
4346
 
4347
  return NULL_TREE;
4348
}
4349
 
4350
/* Search the data references in LOOP, and record the information into
4351
   DATAREFS.  Returns chrec_dont_know when failing to analyze a
4352
   difficult case, returns NULL_TREE otherwise.
4353
 
4354
   TODO: This function should be made smarter so that it can handle address
4355
   arithmetic as if they were array accesses, etc.  */
4356
 
4357
static tree
4358
find_data_references_in_loop (struct loop *loop,
4359
                              VEC (data_reference_p, heap) **datarefs)
4360
{
4361
  basic_block bb, *bbs;
4362
  unsigned int i;
4363
 
4364
  bbs = get_loop_body_in_dom_order (loop);
4365
 
4366
  for (i = 0; i < loop->num_nodes; i++)
4367
    {
4368
      bb = bbs[i];
4369
 
4370
      if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4371
        {
4372
          free (bbs);
4373
          return chrec_dont_know;
4374
        }
4375
    }
4376
  free (bbs);
4377
 
4378
  return NULL_TREE;
4379
}
4380
 
4381
/* Recursive helper function.  */
4382
 
4383
static bool
4384
find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4385
{
4386
  /* Inner loops of the nest should not contain siblings.  Example:
4387
     when there are two consecutive loops,
4388
 
4389
     | loop_0
4390
     |   loop_1
4391
     |     A[{0, +, 1}_1]
4392
     |   endloop_1
4393
     |   loop_2
4394
     |     A[{0, +, 1}_2]
4395
     |   endloop_2
4396
     | endloop_0
4397
 
4398
     the dependence relation cannot be captured by the distance
4399
     abstraction.  */
4400
  if (loop->next)
4401
    return false;
4402
 
4403
  VEC_safe_push (loop_p, heap, *loop_nest, loop);
4404
  if (loop->inner)
4405
    return find_loop_nest_1 (loop->inner, loop_nest);
4406
  return true;
4407
}
4408
 
4409
/* Return false when the LOOP is not well nested.  Otherwise return
4410
   true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
4411
   contain the loops from the outermost to the innermost, as they will
4412
   appear in the classic distance vector.  */
4413
 
4414
bool
4415
find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4416
{
4417
  VEC_safe_push (loop_p, heap, *loop_nest, loop);
4418
  if (loop->inner)
4419
    return find_loop_nest_1 (loop->inner, loop_nest);
4420
  return true;
4421
}
4422
 
4423
/* Returns true when the data dependences have been computed, false otherwise.
4424
   Given a loop nest LOOP, the following vectors are returned:
4425
   DATAREFS is initialized to all the array elements contained in this loop,
4426
   DEPENDENCE_RELATIONS contains the relations between the data references.
4427
   Compute read-read and self relations if
4428
   COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
4429
 
4430
bool
4431
compute_data_dependences_for_loop (struct loop *loop,
4432
                                   bool compute_self_and_read_read_dependences,
4433
                                   VEC (loop_p, heap) **loop_nest,
4434
                                   VEC (data_reference_p, heap) **datarefs,
4435
                                   VEC (ddr_p, heap) **dependence_relations)
4436
{
4437
  bool res = true;
4438
 
4439
  memset (&dependence_stats, 0, sizeof (dependence_stats));
4440
 
4441
  /* If the loop nest is not well formed, or one of the data references
4442
     is not computable, give up without spending time to compute other
4443
     dependences.  */
4444
  if (!loop
4445
      || !find_loop_nest (loop, loop_nest)
4446
      || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
4447
      || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4448
                                   compute_self_and_read_read_dependences))
4449
    res = false;
4450
 
4451
  if (dump_file && (dump_flags & TDF_STATS))
4452
    {
4453
      fprintf (dump_file, "Dependence tester statistics:\n");
4454
 
4455
      fprintf (dump_file, "Number of dependence tests: %d\n",
4456
               dependence_stats.num_dependence_tests);
4457
      fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4458
               dependence_stats.num_dependence_dependent);
4459
      fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4460
               dependence_stats.num_dependence_independent);
4461
      fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4462
               dependence_stats.num_dependence_undetermined);
4463
 
4464
      fprintf (dump_file, "Number of subscript tests: %d\n",
4465
               dependence_stats.num_subscript_tests);
4466
      fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4467
               dependence_stats.num_subscript_undetermined);
4468
      fprintf (dump_file, "Number of same subscript function: %d\n",
4469
               dependence_stats.num_same_subscript_function);
4470
 
4471
      fprintf (dump_file, "Number of ziv tests: %d\n",
4472
               dependence_stats.num_ziv);
4473
      fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4474
               dependence_stats.num_ziv_dependent);
4475
      fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4476
               dependence_stats.num_ziv_independent);
4477
      fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4478
               dependence_stats.num_ziv_unimplemented);
4479
 
4480
      fprintf (dump_file, "Number of siv tests: %d\n",
4481
               dependence_stats.num_siv);
4482
      fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4483
               dependence_stats.num_siv_dependent);
4484
      fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4485
               dependence_stats.num_siv_independent);
4486
      fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4487
               dependence_stats.num_siv_unimplemented);
4488
 
4489
      fprintf (dump_file, "Number of miv tests: %d\n",
4490
               dependence_stats.num_miv);
4491
      fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4492
               dependence_stats.num_miv_dependent);
4493
      fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4494
               dependence_stats.num_miv_independent);
4495
      fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4496
               dependence_stats.num_miv_unimplemented);
4497
    }
4498
 
4499
  return res;
4500
}
4501
 
4502
/* Returns true when the data dependences for the basic block BB have been
4503
   computed, false otherwise.
4504
   DATAREFS is initialized to all the array elements contained in this basic
4505
   block, DEPENDENCE_RELATIONS contains the relations between the data
4506
   references. Compute read-read and self relations if
4507
   COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
4508
bool
4509
compute_data_dependences_for_bb (basic_block bb,
4510
                                 bool compute_self_and_read_read_dependences,
4511
                                 VEC (data_reference_p, heap) **datarefs,
4512
                                 VEC (ddr_p, heap) **dependence_relations)
4513
{
4514
  if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4515
    return false;
4516
 
4517
  return compute_all_dependences (*datarefs, dependence_relations, NULL,
4518
                                  compute_self_and_read_read_dependences);
4519
}
4520
 
4521
/* Entry point (for testing only).  Analyze all the data references
4522
   and the dependence relations in LOOP.
4523
 
4524
   The data references are computed first.
4525
 
4526
   A relation on these nodes is represented by a complete graph.  Some
4527
   of the relations could be of no interest, thus the relations can be
4528
   computed on demand.
4529
 
4530
   In the following function we compute all the relations.  This is
4531
   just a first implementation that is here for:
4532
   - for showing how to ask for the dependence relations,
4533
   - for the debugging the whole dependence graph,
4534
   - for the dejagnu testcases and maintenance.
4535
 
4536
   It is possible to ask only for a part of the graph, avoiding to
4537
   compute the whole dependence graph.  The computed dependences are
4538
   stored in a knowledge base (KB) such that later queries don't
4539
   recompute the same information.  The implementation of this KB is
4540
   transparent to the optimizer, and thus the KB can be changed with a
4541
   more efficient implementation, or the KB could be disabled.  */
4542
static void
4543
analyze_all_data_dependences (struct loop *loop)
4544
{
4545
  unsigned int i;
4546
  int nb_data_refs = 10;
4547
  VEC (data_reference_p, heap) *datarefs =
4548
    VEC_alloc (data_reference_p, heap, nb_data_refs);
4549
  VEC (ddr_p, heap) *dependence_relations =
4550
    VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4551
  VEC (loop_p, heap) *loop_nest = VEC_alloc (loop_p, heap, 3);
4552
 
4553
  /* Compute DDs on the whole function.  */
4554
  compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4555
                                     &dependence_relations);
4556
 
4557
  if (dump_file)
4558
    {
4559
      dump_data_dependence_relations (dump_file, dependence_relations);
4560
      fprintf (dump_file, "\n\n");
4561
 
4562
      if (dump_flags & TDF_DETAILS)
4563
        dump_dist_dir_vectors (dump_file, dependence_relations);
4564
 
4565
      if (dump_flags & TDF_STATS)
4566
        {
4567
          unsigned nb_top_relations = 0;
4568
          unsigned nb_bot_relations = 0;
4569
          unsigned nb_chrec_relations = 0;
4570
          struct data_dependence_relation *ddr;
4571
 
4572
          FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4573
            {
4574
              if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4575
                nb_top_relations++;
4576
 
4577
              else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4578
                nb_bot_relations++;
4579
 
4580
              else
4581
                nb_chrec_relations++;
4582
            }
4583
 
4584
          gather_stats_on_scev_database ();
4585
        }
4586
    }
4587
 
4588
  VEC_free (loop_p, heap, loop_nest);
4589
  free_dependence_relations (dependence_relations);
4590
  free_data_refs (datarefs);
4591
}
4592
 
4593
/* Computes all the data dependences and check that the results of
4594
   several analyzers are the same.  */
4595
 
4596
void
4597
tree_check_data_deps (void)
4598
{
4599
  loop_iterator li;
4600
  struct loop *loop_nest;
4601
 
4602
  FOR_EACH_LOOP (li, loop_nest, 0)
4603
    analyze_all_data_dependences (loop_nest);
4604
}
4605
 
4606
/* Free the memory used by a data dependence relation DDR.  */
4607
 
4608
void
4609
free_dependence_relation (struct data_dependence_relation *ddr)
4610
{
4611
  if (ddr == NULL)
4612
    return;
4613
 
4614
  if (DDR_SUBSCRIPTS (ddr))
4615
    free_subscripts (DDR_SUBSCRIPTS (ddr));
4616
  if (DDR_DIST_VECTS (ddr))
4617
    VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4618
  if (DDR_DIR_VECTS (ddr))
4619
    VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4620
 
4621
  free (ddr);
4622
}
4623
 
4624
/* Free the memory used by the data dependence relations from
4625
   DEPENDENCE_RELATIONS.  */
4626
 
4627
void
4628
free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4629
{
4630
  unsigned int i;
4631
  struct data_dependence_relation *ddr;
4632
 
4633
  FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4634
    if (ddr)
4635
      free_dependence_relation (ddr);
4636
 
4637
  VEC_free (ddr_p, heap, dependence_relations);
4638
}
4639
 
4640
/* Free the memory used by the data references from DATAREFS.  */
4641
 
4642
void
4643
free_data_refs (VEC (data_reference_p, heap) *datarefs)
4644
{
4645
  unsigned int i;
4646
  struct data_reference *dr;
4647
 
4648
  FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
4649
    free_data_ref (dr);
4650
  VEC_free (data_reference_p, heap, datarefs);
4651
}
4652
 
4653
 
4654
 
4655
/* Dump vertex I in RDG to FILE.  */
4656
 
4657
void
4658
dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4659
{
4660
  struct vertex *v = &(rdg->vertices[i]);
4661
  struct graph_edge *e;
4662
 
4663
  fprintf (file, "(vertex %d: (%s%s) (in:", i,
4664
           RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4665
           RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4666
 
4667
  if (v->pred)
4668
    for (e = v->pred; e; e = e->pred_next)
4669
      fprintf (file, " %d", e->src);
4670
 
4671
  fprintf (file, ") (out:");
4672
 
4673
  if (v->succ)
4674
    for (e = v->succ; e; e = e->succ_next)
4675
      fprintf (file, " %d", e->dest);
4676
 
4677
  fprintf (file, ")\n");
4678
  print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4679
  fprintf (file, ")\n");
4680
}
4681
 
4682
/* Call dump_rdg_vertex on stderr.  */
4683
 
4684
DEBUG_FUNCTION void
4685
debug_rdg_vertex (struct graph *rdg, int i)
4686
{
4687
  dump_rdg_vertex (stderr, rdg, i);
4688
}
4689
 
4690
/* Dump component C of RDG to FILE.  If DUMPED is non-null, set the
4691
   dumped vertices to that bitmap.  */
4692
 
4693
void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4694
{
4695
  int i;
4696
 
4697
  fprintf (file, "(%d\n", c);
4698
 
4699
  for (i = 0; i < rdg->n_vertices; i++)
4700
    if (rdg->vertices[i].component == c)
4701
      {
4702
        if (dumped)
4703
          bitmap_set_bit (dumped, i);
4704
 
4705
        dump_rdg_vertex (file, rdg, i);
4706
      }
4707
 
4708
  fprintf (file, ")\n");
4709
}
4710
 
4711
/* Call dump_rdg_vertex on stderr.  */
4712
 
4713
DEBUG_FUNCTION void
4714
debug_rdg_component (struct graph *rdg, int c)
4715
{
4716
  dump_rdg_component (stderr, rdg, c, NULL);
4717
}
4718
 
4719
/* Dump the reduced dependence graph RDG to FILE.  */
4720
 
4721
void
4722
dump_rdg (FILE *file, struct graph *rdg)
4723
{
4724
  int i;
4725
  bitmap dumped = BITMAP_ALLOC (NULL);
4726
 
4727
  fprintf (file, "(rdg\n");
4728
 
4729
  for (i = 0; i < rdg->n_vertices; i++)
4730
    if (!bitmap_bit_p (dumped, i))
4731
      dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4732
 
4733
  fprintf (file, ")\n");
4734
  BITMAP_FREE (dumped);
4735
}
4736
 
4737
/* Call dump_rdg on stderr.  */
4738
 
4739
DEBUG_FUNCTION void
4740
debug_rdg (struct graph *rdg)
4741
{
4742
  dump_rdg (stderr, rdg);
4743
}
4744
 
4745
static void
4746
dot_rdg_1 (FILE *file, struct graph *rdg)
4747
{
4748
  int i;
4749
 
4750
  fprintf (file, "digraph RDG {\n");
4751
 
4752
  for (i = 0; i < rdg->n_vertices; i++)
4753
    {
4754
      struct vertex *v = &(rdg->vertices[i]);
4755
      struct graph_edge *e;
4756
 
4757
      /* Highlight reads from memory.  */
4758
      if (RDG_MEM_READS_STMT (rdg, i))
4759
       fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4760
 
4761
      /* Highlight stores to memory.  */
4762
      if (RDG_MEM_WRITE_STMT (rdg, i))
4763
       fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4764
 
4765
      if (v->succ)
4766
       for (e = v->succ; e; e = e->succ_next)
4767
         switch (RDGE_TYPE (e))
4768
           {
4769
           case input_dd:
4770
             fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4771
             break;
4772
 
4773
           case output_dd:
4774
             fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4775
             break;
4776
 
4777
           case flow_dd:
4778
             /* These are the most common dependences: don't print these. */
4779
             fprintf (file, "%d -> %d \n", i, e->dest);
4780
             break;
4781
 
4782
           case anti_dd:
4783
             fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4784
             break;
4785
 
4786
           default:
4787
             gcc_unreachable ();
4788
           }
4789
    }
4790
 
4791
  fprintf (file, "}\n\n");
4792
}
4793
 
4794
/* Display the Reduced Dependence Graph using dotty.  */
4795
extern void dot_rdg (struct graph *);
4796
 
4797
DEBUG_FUNCTION void
4798
dot_rdg (struct graph *rdg)
4799
{
4800
  /* When debugging, enable the following code.  This cannot be used
4801
     in production compilers because it calls "system".  */
4802
#if 0
4803
  FILE *file = fopen ("/tmp/rdg.dot", "w");
4804
  gcc_assert (file != NULL);
4805
 
4806
  dot_rdg_1 (file, rdg);
4807
  fclose (file);
4808
 
4809
  system ("dotty /tmp/rdg.dot &");
4810
#else
4811
  dot_rdg_1 (stderr, rdg);
4812
#endif
4813
}
4814
 
4815
/* This structure is used for recording the mapping statement index in
4816
   the RDG.  */
4817
 
4818
struct GTY(()) rdg_vertex_info
4819
{
4820
  gimple stmt;
4821
  int index;
4822
};
4823
 
4824
/* Returns the index of STMT in RDG.  */
4825
 
4826
int
4827
rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4828
{
4829
  struct rdg_vertex_info rvi, *slot;
4830
 
4831
  rvi.stmt = stmt;
4832
  slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4833
 
4834
  if (!slot)
4835
    return -1;
4836
 
4837
  return slot->index;
4838
}
4839
 
4840
/* Creates an edge in RDG for each distance vector from DDR.  The
4841
   order that we keep track of in the RDG is the order in which
4842
   statements have to be executed.  */
4843
 
4844
static void
4845
create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4846
{
4847
  struct graph_edge *e;
4848
  int va, vb;
4849
  data_reference_p dra = DDR_A (ddr);
4850
  data_reference_p drb = DDR_B (ddr);
4851
  unsigned level = ddr_dependence_level (ddr);
4852
 
4853
  /* For non scalar dependences, when the dependence is REVERSED,
4854
     statement B has to be executed before statement A.  */
4855
  if (level > 0
4856
      && !DDR_REVERSED_P (ddr))
4857
    {
4858
      data_reference_p tmp = dra;
4859
      dra = drb;
4860
      drb = tmp;
4861
    }
4862
 
4863
  va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4864
  vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4865
 
4866
  if (va < 0 || vb < 0)
4867
    return;
4868
 
4869
  e = add_edge (rdg, va, vb);
4870
  e->data = XNEW (struct rdg_edge);
4871
 
4872
  RDGE_LEVEL (e) = level;
4873
  RDGE_RELATION (e) = ddr;
4874
 
4875
  /* Determines the type of the data dependence.  */
4876
  if (DR_IS_READ (dra) && DR_IS_READ (drb))
4877
    RDGE_TYPE (e) = input_dd;
4878
  else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4879
    RDGE_TYPE (e) = output_dd;
4880
  else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4881
    RDGE_TYPE (e) = flow_dd;
4882
  else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4883
    RDGE_TYPE (e) = anti_dd;
4884
}
4885
 
4886
/* Creates dependence edges in RDG for all the uses of DEF.  IDEF is
4887
   the index of DEF in RDG.  */
4888
 
4889
static void
4890
create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4891
{
4892
  use_operand_p imm_use_p;
4893
  imm_use_iterator iterator;
4894
 
4895
  FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4896
    {
4897
      struct graph_edge *e;
4898
      int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4899
 
4900
      if (use < 0)
4901
        continue;
4902
 
4903
      e = add_edge (rdg, idef, use);
4904
      e->data = XNEW (struct rdg_edge);
4905
      RDGE_TYPE (e) = flow_dd;
4906
      RDGE_RELATION (e) = NULL;
4907
    }
4908
}
4909
 
4910
/* Creates the edges of the reduced dependence graph RDG.  */
4911
 
4912
static void
4913
create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4914
{
4915
  int i;
4916
  struct data_dependence_relation *ddr;
4917
  def_operand_p def_p;
4918
  ssa_op_iter iter;
4919
 
4920
  FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
4921
    if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4922
      create_rdg_edge_for_ddr (rdg, ddr);
4923
 
4924
  for (i = 0; i < rdg->n_vertices; i++)
4925
    FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4926
                              iter, SSA_OP_DEF)
4927
      create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4928
}
4929
 
4930
/* Build the vertices of the reduced dependence graph RDG.  */
4931
 
4932
void
4933
create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4934
{
4935
  int i, j;
4936
  gimple stmt;
4937
 
4938
  FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
4939
    {
4940
      VEC (data_ref_loc, heap) *references;
4941
      data_ref_loc *ref;
4942
      struct vertex *v = &(rdg->vertices[i]);
4943
      struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4944
      struct rdg_vertex_info **slot;
4945
 
4946
      rvi->stmt = stmt;
4947
      rvi->index = i;
4948
      slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4949
 
4950
      if (!*slot)
4951
        *slot = rvi;
4952
      else
4953
        free (rvi);
4954
 
4955
      v->data = XNEW (struct rdg_vertex);
4956
      RDG_STMT (rdg, i) = stmt;
4957
 
4958
      RDG_MEM_WRITE_STMT (rdg, i) = false;
4959
      RDG_MEM_READS_STMT (rdg, i) = false;
4960
      if (gimple_code (stmt) == GIMPLE_PHI)
4961
        continue;
4962
 
4963
      get_references_in_stmt (stmt, &references);
4964
      FOR_EACH_VEC_ELT (data_ref_loc, references, j, ref)
4965
        if (!ref->is_read)
4966
          RDG_MEM_WRITE_STMT (rdg, i) = true;
4967
        else
4968
          RDG_MEM_READS_STMT (rdg, i) = true;
4969
 
4970
      VEC_free (data_ref_loc, heap, references);
4971
    }
4972
}
4973
 
4974
/* Initialize STMTS with all the statements of LOOP.  When
4975
   INCLUDE_PHIS is true, include also the PHI nodes.  The order in
4976
   which we discover statements is important as
4977
   generate_loops_for_partition is using the same traversal for
4978
   identifying statements. */
4979
 
4980
static void
4981
stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4982
{
4983
  unsigned int i;
4984
  basic_block *bbs = get_loop_body_in_dom_order (loop);
4985
 
4986
  for (i = 0; i < loop->num_nodes; i++)
4987
    {
4988
      basic_block bb = bbs[i];
4989
      gimple_stmt_iterator bsi;
4990
      gimple stmt;
4991
 
4992
      for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4993
        VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4994
 
4995
      for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4996
        {
4997
          stmt = gsi_stmt (bsi);
4998
          if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
4999
            VEC_safe_push (gimple, heap, *stmts, stmt);
5000
        }
5001
    }
5002
 
5003
  free (bbs);
5004
}
5005
 
5006
/* Returns true when all the dependences are computable.  */
5007
 
5008
static bool
5009
known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
5010
{
5011
  ddr_p ddr;
5012
  unsigned int i;
5013
 
5014
  FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
5015
    if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5016
      return false;
5017
 
5018
  return true;
5019
}
5020
 
5021
/* Computes a hash function for element ELT.  */
5022
 
5023
static hashval_t
5024
hash_stmt_vertex_info (const void *elt)
5025
{
5026
  const struct rdg_vertex_info *const rvi =
5027
    (const struct rdg_vertex_info *) elt;
5028
  gimple stmt = rvi->stmt;
5029
 
5030
  return htab_hash_pointer (stmt);
5031
}
5032
 
5033
/* Compares database elements E1 and E2.  */
5034
 
5035
static int
5036
eq_stmt_vertex_info (const void *e1, const void *e2)
5037
{
5038
  const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
5039
  const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
5040
 
5041
  return elt1->stmt == elt2->stmt;
5042
}
5043
 
5044
/* Free the element E.  */
5045
 
5046
static void
5047
hash_stmt_vertex_del (void *e)
5048
{
5049
  free (e);
5050
}
5051
 
5052
/* Build the Reduced Dependence Graph (RDG) with one vertex per
5053
   statement of the loop nest, and one edge per data dependence or
5054
   scalar dependence.  */
5055
 
5056
struct graph *
5057
build_empty_rdg (int n_stmts)
5058
{
5059
  int nb_data_refs = 10;
5060
  struct graph *rdg = new_graph (n_stmts);
5061
 
5062
  rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
5063
                              eq_stmt_vertex_info, hash_stmt_vertex_del);
5064
  return rdg;
5065
}
5066
 
5067
/* Build the Reduced Dependence Graph (RDG) with one vertex per
5068
   statement of the loop nest, and one edge per data dependence or
5069
   scalar dependence.  */
5070
 
5071
struct graph *
5072
build_rdg (struct loop *loop,
5073
           VEC (loop_p, heap) **loop_nest,
5074
           VEC (ddr_p, heap) **dependence_relations,
5075
           VEC (data_reference_p, heap) **datarefs)
5076
{
5077
  struct graph *rdg = NULL;
5078
  VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, 10);
5079
 
5080
  compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
5081
                                     dependence_relations);
5082
 
5083
  if (known_dependences_p (*dependence_relations))
5084
    {
5085
      stmts_from_loop (loop, &stmts);
5086
      rdg = build_empty_rdg (VEC_length (gimple, stmts));
5087
      create_rdg_vertices (rdg, stmts);
5088
      create_rdg_edges (rdg, *dependence_relations);
5089
    }
5090
 
5091
  VEC_free (gimple, heap, stmts);
5092
  return rdg;
5093
}
5094
 
5095
/* Free the reduced dependence graph RDG.  */
5096
 
5097
void
5098
free_rdg (struct graph *rdg)
5099
{
5100
  int i;
5101
 
5102
  for (i = 0; i < rdg->n_vertices; i++)
5103
    {
5104
      struct vertex *v = &(rdg->vertices[i]);
5105
      struct graph_edge *e;
5106
 
5107
      for (e = v->succ; e; e = e->succ_next)
5108
        free (e->data);
5109
 
5110
      free (v->data);
5111
    }
5112
 
5113
  htab_delete (rdg->indices);
5114
  free_graph (rdg);
5115
}
5116
 
5117
/* Initialize STMTS with all the statements of LOOP that contain a
5118
   store to memory.  */
5119
 
5120
void
5121
stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5122
{
5123
  unsigned int i;
5124
  basic_block *bbs = get_loop_body_in_dom_order (loop);
5125
 
5126
  for (i = 0; i < loop->num_nodes; i++)
5127
    {
5128
      basic_block bb = bbs[i];
5129
      gimple_stmt_iterator bsi;
5130
 
5131
      for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5132
        if (gimple_vdef (gsi_stmt (bsi)))
5133
          VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5134
    }
5135
 
5136
  free (bbs);
5137
}
5138
 
5139
/* Returns true when the statement at STMT is of the form "A[i] = 0"
5140
   that contains a data reference on its LHS with a stride of the same
5141
   size as its unit type.  */
5142
 
5143
bool
5144
stmt_with_adjacent_zero_store_dr_p (gimple stmt)
5145
{
5146
  tree op0, op1;
5147
  bool res;
5148
  struct data_reference *dr;
5149
 
5150
  if (!stmt
5151
      || !gimple_vdef (stmt)
5152
      || !is_gimple_assign (stmt)
5153
      || !gimple_assign_single_p (stmt)
5154
      || !(op1 = gimple_assign_rhs1 (stmt))
5155
      || !(integer_zerop (op1) || real_zerop (op1)))
5156
    return false;
5157
 
5158
  dr = XCNEW (struct data_reference);
5159
  op0 = gimple_assign_lhs (stmt);
5160
 
5161
  DR_STMT (dr) = stmt;
5162
  DR_REF (dr) = op0;
5163
 
5164
  res = dr_analyze_innermost (dr, loop_containing_stmt (stmt))
5165
    && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
5166
 
5167
  free_data_ref (dr);
5168
  return res;
5169
}
5170
 
5171
/* Initialize STMTS with all the statements of LOOP that contain a
5172
   store to memory of the form "A[i] = 0".  */
5173
 
5174
void
5175
stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5176
{
5177
  unsigned int i;
5178
  basic_block bb;
5179
  gimple_stmt_iterator si;
5180
  gimple stmt;
5181
  basic_block *bbs = get_loop_body_in_dom_order (loop);
5182
 
5183
  for (i = 0; i < loop->num_nodes; i++)
5184
    for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5185
      if ((stmt = gsi_stmt (si))
5186
          && stmt_with_adjacent_zero_store_dr_p (stmt))
5187
        VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
5188
 
5189
  free (bbs);
5190
}
5191
 
5192
/* For a data reference REF, return the declaration of its base
5193
   address or NULL_TREE if the base is not determined.  */
5194
 
5195
static inline tree
5196
ref_base_address (gimple stmt, data_ref_loc *ref)
5197
{
5198
  tree base = NULL_TREE;
5199
  tree base_address;
5200
  struct data_reference *dr = XCNEW (struct data_reference);
5201
 
5202
  DR_STMT (dr) = stmt;
5203
  DR_REF (dr) = *ref->pos;
5204
  dr_analyze_innermost (dr, loop_containing_stmt (stmt));
5205
  base_address = DR_BASE_ADDRESS (dr);
5206
 
5207
  if (!base_address)
5208
    goto end;
5209
 
5210
  switch (TREE_CODE (base_address))
5211
    {
5212
    case ADDR_EXPR:
5213
      base = TREE_OPERAND (base_address, 0);
5214
      break;
5215
 
5216
    default:
5217
      base = base_address;
5218
      break;
5219
    }
5220
 
5221
 end:
5222
  free_data_ref (dr);
5223
  return base;
5224
}
5225
 
5226
/* Determines whether the statement from vertex V of the RDG has a
5227
   definition used outside the loop that contains this statement.  */
5228
 
5229
bool
5230
rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5231
{
5232
  gimple stmt = RDG_STMT (rdg, v);
5233
  struct loop *loop = loop_containing_stmt (stmt);
5234
  use_operand_p imm_use_p;
5235
  imm_use_iterator iterator;
5236
  ssa_op_iter it;
5237
  def_operand_p def_p;
5238
 
5239
  if (!loop)
5240
    return true;
5241
 
5242
  FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5243
    {
5244
      FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5245
        {
5246
          if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5247
            return true;
5248
        }
5249
    }
5250
 
5251
  return false;
5252
}
5253
 
5254
/* Determines whether statements S1 and S2 access to similar memory
5255
   locations.  Two memory accesses are considered similar when they
5256
   have the same base address declaration, i.e. when their
5257
   ref_base_address is the same.  */
5258
 
5259
bool
5260
have_similar_memory_accesses (gimple s1, gimple s2)
5261
{
5262
  bool res = false;
5263
  unsigned i, j;
5264
  VEC (data_ref_loc, heap) *refs1, *refs2;
5265
  data_ref_loc *ref1, *ref2;
5266
 
5267
  get_references_in_stmt (s1, &refs1);
5268
  get_references_in_stmt (s2, &refs2);
5269
 
5270
  FOR_EACH_VEC_ELT (data_ref_loc, refs1, i, ref1)
5271
    {
5272
      tree base1 = ref_base_address (s1, ref1);
5273
 
5274
      if (base1)
5275
        FOR_EACH_VEC_ELT (data_ref_loc, refs2, j, ref2)
5276
          if (base1 == ref_base_address (s2, ref2))
5277
            {
5278
              res = true;
5279
              goto end;
5280
            }
5281
    }
5282
 
5283
 end:
5284
  VEC_free (data_ref_loc, heap, refs1);
5285
  VEC_free (data_ref_loc, heap, refs2);
5286
  return res;
5287
}
5288
 
5289
/* Helper function for the hashtab.  */
5290
 
5291
static int
5292
have_similar_memory_accesses_1 (const void *s1, const void *s2)
5293
{
5294
  return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5295
                                       CONST_CAST_GIMPLE ((const_gimple) s2));
5296
}
5297
 
5298
/* Helper function for the hashtab.  */
5299
 
5300
static hashval_t
5301
ref_base_address_1 (const void *s)
5302
{
5303
  gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5304
  unsigned i;
5305
  VEC (data_ref_loc, heap) *refs;
5306
  data_ref_loc *ref;
5307
  hashval_t res = 0;
5308
 
5309
  get_references_in_stmt (stmt, &refs);
5310
 
5311
  FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
5312
    if (!ref->is_read)
5313
      {
5314
        res = htab_hash_pointer (ref_base_address (stmt, ref));
5315
        break;
5316
      }
5317
 
5318
  VEC_free (data_ref_loc, heap, refs);
5319
  return res;
5320
}
5321
 
5322
/* Try to remove duplicated write data references from STMTS.  */
5323
 
5324
void
5325
remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5326
{
5327
  unsigned i;
5328
  gimple stmt;
5329
  htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5330
                             have_similar_memory_accesses_1, NULL);
5331
 
5332
  for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5333
    {
5334
      void **slot;
5335
 
5336
      slot = htab_find_slot (seen, stmt, INSERT);
5337
 
5338
      if (*slot)
5339
        VEC_ordered_remove (gimple, *stmts, i);
5340
      else
5341
        {
5342
          *slot = (void *) stmt;
5343
          i++;
5344
        }
5345
    }
5346
 
5347
  htab_delete (seen);
5348
}
5349
 
5350
/* Returns the index of PARAMETER in the parameters vector of the
5351
   ACCESS_MATRIX.  If PARAMETER does not exist return -1.  */
5352
 
5353
int
5354
access_matrix_get_index_for_parameter (tree parameter,
5355
                                       struct access_matrix *access_matrix)
5356
{
5357
  int i;
5358
  VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5359
  tree lambda_parameter;
5360
 
5361
  FOR_EACH_VEC_ELT (tree, lambda_parameters, i, lambda_parameter)
5362
    if (lambda_parameter == parameter)
5363
      return i + AM_NB_INDUCTION_VARS (access_matrix);
5364
 
5365
  return -1;
5366
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.