OpenCores
URL https://opencores.org/ocsvn/scarts/scarts/trunk

Subversion Repositories scarts

[/] [scarts/] [trunk/] [toolchain/] [scarts-gcc/] [gcc-4.1.1/] [gcc/] [tree-vect-analyze.c] - Blame information for rev 18

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 12 jlechner
/* Analysis Utilities for Loop Vectorization.
2
   Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3
   Contributed by Dorit Naishlos <dorit@il.ibm.com>
4
 
5
This file is part of GCC.
6
 
7
GCC is free software; you can redistribute it and/or modify it under
8
the terms of the GNU General Public License as published by the Free
9
Software Foundation; either version 2, or (at your option) any later
10
version.
11
 
12
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13
WARRANTY; without even the implied warranty of MERCHANTABILITY or
14
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15
for more details.
16
 
17
You should have received a copy of the GNU General Public License
18
along with GCC; see the file COPYING.  If not, write to the Free
19
Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20
02110-1301, USA.  */
21
 
22
#include "config.h"
23
#include "system.h"
24
#include "coretypes.h"
25
#include "tm.h"
26
#include "ggc.h"
27
#include "tree.h"
28
#include "basic-block.h"
29
#include "diagnostic.h"
30
#include "tree-flow.h"
31
#include "tree-dump.h"
32
#include "timevar.h"
33
#include "cfgloop.h"
34
#include "expr.h"
35
#include "optabs.h"
36
#include "params.h"
37
#include "tree-chrec.h"
38
#include "tree-data-ref.h"
39
#include "tree-scalar-evolution.h"
40
#include "tree-vectorizer.h"
41
 
42
/* Main analysis functions.  */
43
static loop_vec_info vect_analyze_loop_form (struct loop *);
44
static bool vect_analyze_data_refs (loop_vec_info);
45
static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
46
static void vect_analyze_scalar_cycles (loop_vec_info);
47
static bool vect_analyze_data_ref_accesses (loop_vec_info);
48
static bool vect_analyze_data_ref_dependences (loop_vec_info);
49
static bool vect_analyze_data_refs_alignment (loop_vec_info);
50
static bool vect_compute_data_refs_alignment (loop_vec_info);
51
static bool vect_enhance_data_refs_alignment (loop_vec_info);
52
static bool vect_analyze_operations (loop_vec_info);
53
static bool vect_determine_vectorization_factor (loop_vec_info);
54
 
55
/* Utility functions for the analyses.  */
56
static bool exist_non_indexing_operands_for_use_p (tree, tree);
57
static void vect_mark_relevant (VEC(tree,heap) **, tree, bool, bool);
58
static bool vect_stmt_relevant_p (tree, loop_vec_info, bool *, bool *);
59
static tree vect_get_loop_niters (struct loop *, tree *);
60
static bool vect_analyze_data_ref_dependence
61
  (struct data_dependence_relation *, loop_vec_info);
62
static bool vect_compute_data_ref_alignment (struct data_reference *);
63
static bool vect_analyze_data_ref_access (struct data_reference *);
64
static bool vect_can_advance_ivs_p (loop_vec_info);
65
static void vect_update_misalignment_for_peel
66
  (struct data_reference *, struct data_reference *, int npeel);
67
 
68
 
69
/* Function vect_determine_vectorization_factor
70
 
71
   Determine the vectorization factor (VF). VF is the number of data elements
72
   that are operated upon in parallel in a single iteration of the vectorized
73
   loop. For example, when vectorizing a loop that operates on 4byte elements,
74
   on a target with vector size (VS) 16byte, the VF is set to 4, since 4
75
   elements can fit in a single vector register.
76
 
77
   We currently support vectorization of loops in which all types operated upon
78
   are of the same size. Therefore this function currently sets VF according to
79
   the size of the types operated upon, and fails if there are multiple sizes
80
   in the loop.
81
 
82
   VF is also the factor by which the loop iterations are strip-mined, e.g.:
83
   original loop:
84
        for (i=0; i<N; i++){
85
          a[i] = b[i] + c[i];
86
        }
87
 
88
   vectorized loop:
89
        for (i=0; i<N; i+=VF){
90
          a[i:VF] = b[i:VF] + c[i:VF];
91
        }
92
*/
93
 
94
static bool
95
vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
96
{
97
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
98
  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
99
  int nbbs = loop->num_nodes;
100
  block_stmt_iterator si;
101
  unsigned int vectorization_factor = 0;
102
  int i;
103
  tree scalar_type;
104
 
105
  if (vect_print_dump_info (REPORT_DETAILS))
106
    fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
107
 
108
  for (i = 0; i < nbbs; i++)
109
    {
110
      basic_block bb = bbs[i];
111
 
112
      for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
113
        {
114
          tree stmt = bsi_stmt (si);
115
          unsigned int nunits;
116
          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
117
          tree vectype;
118
 
119
          if (vect_print_dump_info (REPORT_DETAILS))
120
            {
121
              fprintf (vect_dump, "==> examining statement: ");
122
              print_generic_expr (vect_dump, stmt, TDF_SLIM);
123
            }
124
 
125
          gcc_assert (stmt_info);
126
          /* skip stmts which do not need to be vectorized.  */
127
          if (!STMT_VINFO_RELEVANT_P (stmt_info)
128
              && !STMT_VINFO_LIVE_P (stmt_info))
129
            {
130
              if (vect_print_dump_info (REPORT_DETAILS))
131
                fprintf (vect_dump, "skip.");
132
              continue;
133
            }
134
 
135
          if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
136
            {
137
              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
138
                {
139
                  fprintf (vect_dump, "not vectorized: vector stmt in loop:");
140
                  print_generic_expr (vect_dump, stmt, TDF_SLIM);
141
                }
142
              return false;
143
            }
144
 
145
          if (STMT_VINFO_DATA_REF (stmt_info))
146
            scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
147
          else if (TREE_CODE (stmt) == MODIFY_EXPR)
148
            scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
149
          else
150
            scalar_type = TREE_TYPE (stmt);
151
 
152
          if (vect_print_dump_info (REPORT_DETAILS))
153
            {
154
              fprintf (vect_dump, "get vectype for scalar type:  ");
155
              print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
156
            }
157
 
158
          vectype = get_vectype_for_scalar_type (scalar_type);
159
          if (!vectype)
160
            {
161
              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
162
                {
163
                  fprintf (vect_dump, "not vectorized: unsupported data-type ");
164
                  print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
165
                }
166
              return false;
167
            }
168
          if (vect_print_dump_info (REPORT_DETAILS))
169
            {
170
              fprintf (vect_dump, "vectype: ");
171
              print_generic_expr (vect_dump, vectype, TDF_SLIM);
172
            }
173
          STMT_VINFO_VECTYPE (stmt_info) = vectype;
174
 
175
          nunits = TYPE_VECTOR_SUBPARTS (vectype);
176
          if (vect_print_dump_info (REPORT_DETAILS))
177
            fprintf (vect_dump, "nunits = %d", nunits);
178
 
179
          if (vectorization_factor)
180
            {
181
              /* FORNOW: don't allow mixed units.
182
                 This restriction will be relaxed in the future.  */
183
              if (nunits != vectorization_factor)
184
                {
185
                  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
186
                    fprintf (vect_dump, "not vectorized: mixed data-types");
187
                  return false;
188
                }
189
            }
190
          else
191
            vectorization_factor = nunits;
192
 
193
          gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
194
                        * vectorization_factor == UNITS_PER_SIMD_WORD);
195
        }
196
    }
197
 
198
  /* TODO: Analyze cost. Decide if worth while to vectorize.  */
199
 
200
  if (vectorization_factor <= 1)
201
    {
202
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
203
        fprintf (vect_dump, "not vectorized: unsupported data-type");
204
      return false;
205
    }
206
  LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
207
 
208
  return true;
209
}
210
 
211
 
212
/* Function vect_analyze_operations.
213
 
214
   Scan the loop stmts and make sure they are all vectorizable.  */
215
 
216
static bool
217
vect_analyze_operations (loop_vec_info loop_vinfo)
218
{
219
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
220
  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
221
  int nbbs = loop->num_nodes;
222
  block_stmt_iterator si;
223
  unsigned int vectorization_factor = 0;
224
  int i;
225
  bool ok;
226
  tree phi;
227
  stmt_vec_info stmt_info;
228
  bool need_to_vectorize = false;
229
 
230
  if (vect_print_dump_info (REPORT_DETAILS))
231
    fprintf (vect_dump, "=== vect_analyze_operations ===");
232
 
233
  gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
234
  vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
235
 
236
  for (i = 0; i < nbbs; i++)
237
    {
238
      basic_block bb = bbs[i];
239
 
240
      for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
241
        {
242
          stmt_info = vinfo_for_stmt (phi);
243
          if (vect_print_dump_info (REPORT_DETAILS))
244
            {
245
              fprintf (vect_dump, "examining phi: ");
246
              print_generic_expr (vect_dump, phi, TDF_SLIM);
247
            }
248
 
249
          gcc_assert (stmt_info);
250
 
251
          if (STMT_VINFO_LIVE_P (stmt_info))
252
            {
253
              /* FORNOW: not yet supported.  */
254
              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
255
                fprintf (vect_dump, "not vectorized: value used after loop.");
256
            return false;
257
          }
258
 
259
          if (STMT_VINFO_RELEVANT_P (stmt_info))
260
            {
261
              /* Most likely a reduction-like computation that is used
262
                 in the loop.  */
263
              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
264
                fprintf (vect_dump, "not vectorized: unsupported pattern.");
265
             return false;
266
            }
267
        }
268
 
269
      for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
270
        {
271
          tree stmt = bsi_stmt (si);
272
          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
273
 
274
          if (vect_print_dump_info (REPORT_DETAILS))
275
            {
276
              fprintf (vect_dump, "==> examining statement: ");
277
              print_generic_expr (vect_dump, stmt, TDF_SLIM);
278
            }
279
 
280
          gcc_assert (stmt_info);
281
 
282
          /* skip stmts which do not need to be vectorized.
283
             this is expected to include:
284
             - the COND_EXPR which is the loop exit condition
285
             - any LABEL_EXPRs in the loop
286
             - computations that are used only for array indexing or loop
287
             control  */
288
 
289
          if (!STMT_VINFO_RELEVANT_P (stmt_info)
290
              && !STMT_VINFO_LIVE_P (stmt_info))
291
            {
292
              if (vect_print_dump_info (REPORT_DETAILS))
293
                fprintf (vect_dump, "irrelevant.");
294
              continue;
295
            }
296
 
297
          if (STMT_VINFO_RELEVANT_P (stmt_info))
298
            {
299
              gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
300
              gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
301
 
302
              ok = (vectorizable_operation (stmt, NULL, NULL)
303
                    || vectorizable_assignment (stmt, NULL, NULL)
304
                    || vectorizable_load (stmt, NULL, NULL)
305
                    || vectorizable_store (stmt, NULL, NULL)
306
                    || vectorizable_condition (stmt, NULL, NULL));
307
 
308
              if (!ok)
309
                {
310
                  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
311
                    {
312
                      fprintf (vect_dump,
313
                               "not vectorized: relevant stmt not supported: ");
314
                      print_generic_expr (vect_dump, stmt, TDF_SLIM);
315
                    }
316
                  return false;
317
                }
318
              need_to_vectorize = true;
319
            }
320
 
321
          if (STMT_VINFO_LIVE_P (stmt_info))
322
            {
323
              ok = vectorizable_reduction (stmt, NULL, NULL);
324
 
325
              if (ok)
326
                need_to_vectorize = true;
327
              else
328
                ok = vectorizable_live_operation (stmt, NULL, NULL);
329
 
330
              if (!ok)
331
                {
332
                  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
333
                    {
334
                      fprintf (vect_dump,
335
                               "not vectorized: live stmt not supported: ");
336
                      print_generic_expr (vect_dump, stmt, TDF_SLIM);
337
                    }
338
                  return false;
339
                }
340
            }
341
        } /* stmts in bb */
342
    } /* bbs */
343
 
344
  /* TODO: Analyze cost. Decide if worth while to vectorize.  */
345
 
346
  /* All operations in the loop are either irrelevant (deal with loop
347
     control, or dead), or only used outside the loop and can be moved
348
     out of the loop (e.g. invariants, inductions).  The loop can be
349
     optimized away by scalar optimizations.  We're better off not
350
     touching this loop.  */
351
  if (!need_to_vectorize)
352
    {
353
      if (vect_print_dump_info (REPORT_DETAILS))
354
        fprintf (vect_dump,
355
                 "All the computation can be taken out of the loop.");
356
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
357
        fprintf (vect_dump,
358
                 "not vectorized: redundant loop. no profit to vectorize.");
359
      return false;
360
    }
361
 
362
  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
363
      && vect_print_dump_info (REPORT_DETAILS))
364
    fprintf (vect_dump,
365
        "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
366
        vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
367
 
368
  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
369
      && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
370
    {
371
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
372
        fprintf (vect_dump, "not vectorized: iteration count too small.");
373
      return false;
374
    }
375
 
376
  if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
377
      || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
378
      || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
379
    {
380
      if (vect_print_dump_info (REPORT_DETAILS))
381
        fprintf (vect_dump, "epilog loop required.");
382
      if (!vect_can_advance_ivs_p (loop_vinfo))
383
        {
384
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
385
            fprintf (vect_dump,
386
                     "not vectorized: can't create epilog loop 1.");
387
          return false;
388
        }
389
      if (!slpeel_can_duplicate_loop_p (loop, loop->single_exit))
390
        {
391
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
392
            fprintf (vect_dump,
393
                     "not vectorized: can't create epilog loop 2.");
394
          return false;
395
        }
396
    }
397
 
398
  return true;
399
}
400
 
401
 
402
/* Function exist_non_indexing_operands_for_use_p
403
 
404
   USE is one of the uses attached to STMT. Check if USE is
405
   used in STMT for anything other than indexing an array.  */
406
 
407
static bool
408
exist_non_indexing_operands_for_use_p (tree use, tree stmt)
409
{
410
  tree operand;
411
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
412
 
413
  /* USE corresponds to some operand in STMT. If there is no data
414
     reference in STMT, then any operand that corresponds to USE
415
     is not indexing an array.  */
416
  if (!STMT_VINFO_DATA_REF (stmt_info))
417
    return true;
418
 
419
  /* STMT has a data_ref. FORNOW this means that its of one of
420
     the following forms:
421
     -1- ARRAY_REF = var
422
     -2- var = ARRAY_REF
423
     (This should have been verified in analyze_data_refs).
424
 
425
     'var' in the second case corresponds to a def, not a use,
426
     so USE cannot correspond to any operands that are not used
427
     for array indexing.
428
 
429
     Therefore, all we need to check is if STMT falls into the
430
     first case, and whether var corresponds to USE.  */
431
 
432
  if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
433
    return false;
434
 
435
  operand = TREE_OPERAND (stmt, 1);
436
 
437
  if (TREE_CODE (operand) != SSA_NAME)
438
    return false;
439
 
440
  if (operand == use)
441
    return true;
442
 
443
  return false;
444
}
445
 
446
 
447
/* Function vect_analyze_scalar_cycles.
448
 
449
   Examine the cross iteration def-use cycles of scalar variables, by
450
   analyzing the loop (scalar) PHIs; Classify each cycle as one of the
451
   following: invariant, induction, reduction, unknown.
452
 
453
   Some forms of scalar cycles are not yet supported.
454
 
455
   Example1: reduction: (unsupported yet)
456
 
457
              loop1:
458
              for (i=0; i<N; i++)
459
                 sum += a[i];
460
 
461
   Example2: induction: (unsupported yet)
462
 
463
              loop2:
464
              for (i=0; i<N; i++)
465
                 a[i] = i;
466
 
467
   Note: the following loop *is* vectorizable:
468
 
469
              loop3:
470
              for (i=0; i<N; i++)
471
                 a[i] = b[i];
472
 
473
         even though it has a def-use cycle caused by the induction variable i:
474
 
475
              loop: i_2 = PHI (i_0, i_1)
476
                    a[i_2] = ...;
477
                    i_1 = i_2 + 1;
478
                    GOTO loop;
479
 
480
         because the def-use cycle in loop3 is considered "not relevant" - i.e.,
481
         it does not need to be vectorized because it is only used for array
482
         indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
483
         loop2 on the other hand is relevant (it is being written to memory).
484
*/
485
 
486
static void
487
vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
488
{
489
  tree phi;
490
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
491
  basic_block bb = loop->header;
492
  tree dummy;
493
 
494
  if (vect_print_dump_info (REPORT_DETAILS))
495
    fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
496
 
497
  for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
498
    {
499
      tree access_fn = NULL;
500
      tree def = PHI_RESULT (phi);
501
      stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
502
      tree reduc_stmt;
503
 
504
      if (vect_print_dump_info (REPORT_DETAILS))
505
        {
506
          fprintf (vect_dump, "Analyze phi: ");
507
          print_generic_expr (vect_dump, phi, TDF_SLIM);
508
        }
509
 
510
      /* Skip virtual phi's. The data dependences that are associated with
511
         virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
512
 
513
      if (!is_gimple_reg (SSA_NAME_VAR (def)))
514
        {
515
          if (vect_print_dump_info (REPORT_DETAILS))
516
            fprintf (vect_dump, "virtual phi. skip.");
517
          continue;
518
        }
519
 
520
      STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
521
 
522
      /* Analyze the evolution function.  */
523
 
524
      access_fn = analyze_scalar_evolution (loop, def);
525
 
526
      if (!access_fn)
527
        continue;
528
 
529
      if (vect_print_dump_info (REPORT_DETAILS))
530
        {
531
           fprintf (vect_dump, "Access function of PHI: ");
532
           print_generic_expr (vect_dump, access_fn, TDF_SLIM);
533
        }
534
 
535
      if (vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy))
536
        {
537
          if (vect_print_dump_info (REPORT_DETAILS))
538
            fprintf (vect_dump, "Detected induction.");
539
          STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
540
          continue;
541
        }
542
 
543
      /* TODO: handle invariant phis  */
544
 
545
      reduc_stmt = vect_is_simple_reduction (loop, phi);
546
      if (reduc_stmt)
547
        {
548
          if (vect_print_dump_info (REPORT_DETAILS))
549
            fprintf (vect_dump, "Detected reduction.");
550
          STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
551
          STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
552
                                                        vect_reduction_def;
553
        }
554
      else
555
        if (vect_print_dump_info (REPORT_DETAILS))
556
          fprintf (vect_dump, "Unknown def-use cycle pattern.");
557
 
558
    }
559
 
560
  return;
561
}
562
 
563
 
564
/* Function vect_analyze_data_ref_dependence.
565
 
566
   Return TRUE if there (might) exist a dependence between a memory-reference
567
   DRA and a memory-reference DRB.  */
568
 
569
static bool
570
vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
571
                                  loop_vec_info loop_vinfo)
572
{
573
  unsigned int i;
574
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
575
  int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
576
  unsigned int loop_depth = 0;
577
  struct loop *loop_nest = loop;
578
  struct data_reference *dra = DDR_A (ddr);
579
  struct data_reference *drb = DDR_B (ddr);
580
  stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
581
  stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
582
 
583
  if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
584
    return false;
585
 
586
  if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
587
    {
588
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
589
        {
590
          fprintf (vect_dump,
591
                   "not vectorized: can't determine dependence between ");
592
          print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
593
          fprintf (vect_dump, " and ");
594
          print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
595
        }
596
      return true;
597
    }
598
 
599
  if (DDR_NUM_DIST_VECTS (ddr) == 0)
600
    {
601
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
602
        {
603
          fprintf (vect_dump, "not vectorized: bad dist vector for ");
604
          print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
605
          fprintf (vect_dump, " and ");
606
          print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
607
        }
608
      return true;
609
    }
610
 
611
  /* Find loop depth.  */
612
  while (loop_nest && loop_nest->outer && loop_nest->outer->outer)
613
    {
614
      loop_nest = loop_nest->outer;
615
      loop_depth++;
616
    }
617
 
618
  for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
619
    {
620
      int dist = DDR_DIST_VECT (ddr, i)[loop_depth];
621
 
622
      if (vect_print_dump_info (REPORT_DR_DETAILS))
623
        fprintf (vect_dump, "dependence distance  = %d.", dist);
624
 
625
      /* Same loop iteration.  */
626
      if (dist % vectorization_factor == 0)
627
        {
628
          /* Two references with distance zero have the same alignment.  */
629
          VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
630
          VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
631
          if (vect_print_dump_info (REPORT_ALIGNMENT))
632
            fprintf (vect_dump, "accesses have the same alignment.");
633
          if (vect_print_dump_info (REPORT_DR_DETAILS))
634
            {
635
              fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
636
              print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
637
              fprintf (vect_dump, " and ");
638
              print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
639
            }
640
          continue;
641
        }
642
 
643
      if (abs (dist) >= vectorization_factor)
644
        {
645
          /* Dependence distance does not create dependence, as far as vectorization
646
             is concerned, in this case.  */
647
          if (vect_print_dump_info (REPORT_DR_DETAILS))
648
            fprintf (vect_dump, "dependence distance >= VF.");
649
          continue;
650
        }
651
 
652
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
653
        {
654
          fprintf (vect_dump,
655
                   "not vectorized: possible dependence between data-refs ");
656
          print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
657
          fprintf (vect_dump, " and ");
658
          print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
659
        }
660
 
661
      return true;
662
    }
663
 
664
  return false;
665
}
666
 
667
 
668
/* Function vect_analyze_data_ref_dependences.
669
 
670
   Examine all the data references in the loop, and make sure there do not
671
   exist any data dependences between them.  */
672
 
673
static bool
674
vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
675
{
676
  unsigned int i;
677
  varray_type ddrs = LOOP_VINFO_DDRS (loop_vinfo);
678
 
679
  if (vect_print_dump_info (REPORT_DETAILS))
680
    fprintf (vect_dump, "=== vect_analyze_dependences ===");
681
 
682
  for (i = 0; i < VARRAY_ACTIVE_SIZE (ddrs); i++)
683
    {
684
      struct data_dependence_relation *ddr = VARRAY_GENERIC_PTR (ddrs, i);
685
 
686
      if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
687
        return false;
688
    }
689
 
690
  return true;
691
}
692
 
693
 
694
/* Function vect_compute_data_ref_alignment
695
 
696
   Compute the misalignment of the data reference DR.
697
 
698
   Output:
699
   1. If during the misalignment computation it is found that the data reference
700
      cannot be vectorized then false is returned.
701
   2. DR_MISALIGNMENT (DR) is defined.
702
 
703
   FOR NOW: No analysis is actually performed. Misalignment is calculated
704
   only for trivial cases. TODO.  */
705
 
706
static bool
707
vect_compute_data_ref_alignment (struct data_reference *dr)
708
{
709
  tree stmt = DR_STMT (dr);
710
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
711
  tree ref = DR_REF (dr);
712
  tree vectype;
713
  tree base, base_addr;
714
  bool base_aligned;
715
  tree misalign;
716
  tree aligned_to, alignment;
717
 
718
  if (vect_print_dump_info (REPORT_DETAILS))
719
    fprintf (vect_dump, "vect_compute_data_ref_alignment:");
720
 
721
  /* Initialize misalignment to unknown.  */
722
  DR_MISALIGNMENT (dr) = -1;
723
 
724
  misalign = DR_OFFSET_MISALIGNMENT (dr);
725
  aligned_to = DR_ALIGNED_TO (dr);
726
  base_addr = DR_BASE_ADDRESS (dr);
727
  base = build_fold_indirect_ref (base_addr);
728
  vectype = STMT_VINFO_VECTYPE (stmt_info);
729
  alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
730
 
731
  if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
732
      || !misalign)
733
    {
734
      if (vect_print_dump_info (REPORT_DETAILS))
735
        {
736
          fprintf (vect_dump, "Unknown alignment for access: ");
737
          print_generic_expr (vect_dump, base, TDF_SLIM);
738
        }
739
      return true;
740
    }
741
 
742
  if ((DECL_P (base)
743
       && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
744
                                alignment) >= 0)
745
      || (TREE_CODE (base_addr) == SSA_NAME
746
          && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
747
                                                      TREE_TYPE (base_addr)))),
748
                                   alignment) >= 0))
749
    base_aligned = true;
750
  else
751
    base_aligned = false;
752
 
753
  if (!base_aligned)
754
    {
755
      if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
756
        {
757
          if (vect_print_dump_info (REPORT_DETAILS))
758
            {
759
              fprintf (vect_dump, "can't force alignment of ref: ");
760
              print_generic_expr (vect_dump, ref, TDF_SLIM);
761
            }
762
          return true;
763
        }
764
 
765
      /* Force the alignment of the decl.
766
         NOTE: This is the only change to the code we make during
767
         the analysis phase, before deciding to vectorize the loop.  */
768
      if (vect_print_dump_info (REPORT_DETAILS))
769
        fprintf (vect_dump, "force alignment");
770
      DECL_ALIGN (base) = TYPE_ALIGN (vectype);
771
      DECL_USER_ALIGN (base) = 1;
772
    }
773
 
774
  /* At this point we assume that the base is aligned.  */
775
  gcc_assert (base_aligned
776
              || (TREE_CODE (base) == VAR_DECL
777
                  && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
778
 
779
  /* Modulo alignment.  */
780
  misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
781
 
782
  if (!host_integerp (misalign, 1))
783
    {
784
      /* Negative or overflowed misalignment value.  */
785
      if (vect_print_dump_info (REPORT_DETAILS))
786
        fprintf (vect_dump, "unexpected misalign value");
787
      return false;
788
    }
789
 
790
  DR_MISALIGNMENT (dr) = TREE_INT_CST_LOW (misalign);
791
 
792
  if (vect_print_dump_info (REPORT_DETAILS))
793
    {
794
      fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
795
      print_generic_expr (vect_dump, ref, TDF_SLIM);
796
    }
797
 
798
  return true;
799
}
800
 
801
 
802
/* Function vect_compute_data_refs_alignment
803
 
804
   Compute the misalignment of data references in the loop.
805
   Return FALSE if a data reference is found that cannot be vectorized.  */
806
 
807
static bool
808
vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
809
{
810
  varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
811
  unsigned int i;
812
 
813
  for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
814
    {
815
      struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
816
      if (!vect_compute_data_ref_alignment (dr))
817
        return false;
818
    }
819
 
820
  return true;
821
}
822
 
823
 
824
/* Function vect_update_misalignment_for_peel
825
 
826
   DR - the data reference whose misalignment is to be adjusted.
827
   DR_PEEL - the data reference whose misalignment is being made
828
             zero in the vector loop by the peel.
829
   NPEEL - the number of iterations in the peel loop if the misalignment
830
           of DR_PEEL is known at compile time.  */
831
 
832
static void
833
vect_update_misalignment_for_peel (struct data_reference *dr,
834
                                   struct data_reference *dr_peel, int npeel)
835
{
836
  unsigned int i;
837
  int drsize;
838
  VEC(dr_p,heap) *same_align_drs;
839
  struct data_reference *current_dr;
840
 
841
  if (known_alignment_for_access_p (dr)
842
      && DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr_peel))
843
    {
844
      DR_MISALIGNMENT (dr) = 0;
845
      return;
846
    }
847
 
848
  /* It can be assumed that the data refs with the same alignment as dr_peel
849
     are aligned in the vector loop.  */
850
  same_align_drs
851
    = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
852
  for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
853
    {
854
      if (current_dr != dr)
855
        continue;
856
      gcc_assert (DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr_peel));
857
      DR_MISALIGNMENT (dr) = 0;
858
      return;
859
    }
860
 
861
  if (known_alignment_for_access_p (dr)
862
      && known_alignment_for_access_p (dr_peel))
863
    {
864
      drsize = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
865
      DR_MISALIGNMENT (dr) += npeel * drsize;
866
      DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
867
      return;
868
    }
869
 
870
  DR_MISALIGNMENT (dr) = -1;
871
}
872
 
873
 
874
/* Function vect_verify_datarefs_alignment
875
 
876
   Return TRUE if all data references in the loop can be
877
   handled with respect to alignment.  */
878
 
879
static bool
880
vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
881
{
882
  varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
883
  enum dr_alignment_support supportable_dr_alignment;
884
  unsigned int i;
885
 
886
  for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
887
    {
888
      struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
889
      supportable_dr_alignment = vect_supportable_dr_alignment (dr);
890
      if (!supportable_dr_alignment)
891
        {
892
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
893
            {
894
              if (DR_IS_READ (dr))
895
                fprintf (vect_dump,
896
                         "not vectorized: unsupported unaligned load.");
897
              else
898
                fprintf (vect_dump,
899
                         "not vectorized: unsupported unaligned store.");
900
            }
901
          return false;
902
        }
903
      if (supportable_dr_alignment != dr_aligned
904
          && vect_print_dump_info (REPORT_ALIGNMENT))
905
        fprintf (vect_dump, "Vectorizing an unaligned access.");
906
    }
907
  return true;
908
}
909
 
910
 
911
/* Function vect_enhance_data_refs_alignment
912
 
913
   This pass will use loop versioning and loop peeling in order to enhance
914
   the alignment of data references in the loop.
915
 
916
   FOR NOW: we assume that whatever versioning/peeling takes place, only the
917
   original loop is to be vectorized; Any other loops that are created by
918
   the transformations performed in this pass - are not supposed to be
919
   vectorized. This restriction will be relaxed.
920
 
921
   This pass will require a cost model to guide it whether to apply peeling
922
   or versioning or a combination of the two. For example, the scheme that
923
   intel uses when given a loop with several memory accesses, is as follows:
924
   choose one memory access ('p') which alignment you want to force by doing
925
   peeling. Then, either (1) generate a loop in which 'p' is aligned and all
926
   other accesses are not necessarily aligned, or (2) use loop versioning to
927
   generate one loop in which all accesses are aligned, and another loop in
928
   which only 'p' is necessarily aligned.
929
 
930
   ("Automatic Intra-Register Vectorization for the Intel Architecture",
931
   Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
932
   Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
933
 
934
   Devising a cost model is the most critical aspect of this work. It will
935
   guide us on which access to peel for, whether to use loop versioning, how
936
   many versions to create, etc. The cost model will probably consist of
937
   generic considerations as well as target specific considerations (on
938
   powerpc for example, misaligned stores are more painful than misaligned
939
   loads).
940
 
941
   Here are the general steps involved in alignment enhancements:
942
 
943
     -- original loop, before alignment analysis:
944
        for (i=0; i<N; i++){
945
          x = q[i];                     # DR_MISALIGNMENT(q) = unknown
946
          p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
947
        }
948
 
949
     -- After vect_compute_data_refs_alignment:
950
        for (i=0; i<N; i++){
951
          x = q[i];                     # DR_MISALIGNMENT(q) = 3
952
          p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
953
        }
954
 
955
     -- Possibility 1: we do loop versioning:
956
     if (p is aligned) {
957
        for (i=0; i<N; i++){    # loop 1A
958
          x = q[i];                     # DR_MISALIGNMENT(q) = 3
959
          p[i] = y;                     # DR_MISALIGNMENT(p) = 0
960
        }
961
     }
962
     else {
963
        for (i=0; i<N; i++){    # loop 1B
964
          x = q[i];                     # DR_MISALIGNMENT(q) = 3
965
          p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
966
        }
967
     }
968
 
969
     -- Possibility 2: we do loop peeling:
970
     for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
971
        x = q[i];
972
        p[i] = y;
973
     }
974
     for (i = 3; i < N; i++){   # loop 2A
975
        x = q[i];                       # DR_MISALIGNMENT(q) = 0
976
        p[i] = y;                       # DR_MISALIGNMENT(p) = unknown
977
     }
978
 
979
     -- Possibility 3: combination of loop peeling and versioning:
980
     for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
981
        x = q[i];
982
        p[i] = y;
983
     }
984
     if (p is aligned) {
985
        for (i = 3; i<N; i++){  # loop 3A
986
          x = q[i];                     # DR_MISALIGNMENT(q) = 0
987
          p[i] = y;                     # DR_MISALIGNMENT(p) = 0
988
        }
989
     }
990
     else {
991
        for (i = 3; i<N; i++){  # loop 3B
992
          x = q[i];                     # DR_MISALIGNMENT(q) = 0
993
          p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
994
        }
995
     }
996
 
997
     These loops are later passed to loop_transform to be vectorized. The
998
     vectorizer will use the alignment information to guide the transformation
999
     (whether to generate regular loads/stores, or with special handling for
1000
     misalignment).  */
1001
 
1002
static bool
1003
vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1004
{
1005
  varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1006
  enum dr_alignment_support supportable_dr_alignment;
1007
  struct data_reference *dr0 = NULL;
1008
  struct data_reference *dr;
1009
  unsigned int i;
1010
  bool do_peeling = false;
1011
  bool do_versioning = false;
1012
  bool stat;
1013
 
1014
  /* While cost model enhancements are expected in the future, the high level
1015
     view of the code at this time is as follows:
1016
 
1017
     A) If there is a misaligned write then see if peeling to align this write
1018
        can make all data references satisfy vect_supportable_dr_alignment.
1019
        If so, update data structures as needed and return true.  Note that
1020
        at this time vect_supportable_dr_alignment is known to return false
1021
        for a a misaligned write.
1022
 
1023
     B) If peeling wasn't possible and there is a data reference with an
1024
        unknown misalignment that does not satisfy vect_supportable_dr_alignment
1025
        then see if loop versioning checks can be used to make all data
1026
        references satisfy vect_supportable_dr_alignment.  If so, update
1027
        data structures as needed and return true.
1028
 
1029
     C) If neither peeling nor versioning were successful then return false if
1030
        any data reference does not satisfy vect_supportable_dr_alignment.
1031
 
1032
     D) Return true (all data references satisfy vect_supportable_dr_alignment).
1033
 
1034
     Note, Possibility 3 above (which is peeling and versioning together) is not
1035
     being done at this time.  */
1036
 
1037
  /* (1) Peeling to force alignment.  */
1038
 
1039
  /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1040
     Considerations:
1041
     + How many accesses will become aligned due to the peeling
1042
     - How many accesses will become unaligned due to the peeling,
1043
       and the cost of misaligned accesses.
1044
     - The cost of peeling (the extra runtime checks, the increase
1045
       in code size).
1046
 
1047
     The scheme we use FORNOW: peel to force the alignment of the first
1048
     misaligned store in the loop.
1049
     Rationale: misaligned stores are not yet supported.
1050
 
1051
     TODO: Use a cost model.  */
1052
 
1053
  for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1054
    {
1055
      dr = VARRAY_GENERIC_PTR (datarefs, i);
1056
      if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1057
        {
1058
          dr0 = dr;
1059
          do_peeling = true;
1060
          break;
1061
        }
1062
    }
1063
 
1064
  /* Often peeling for alignment will require peeling for loop-bound, which in
1065
     turn requires that we know how to adjust the loop ivs after the loop.  */
1066
  if (!vect_can_advance_ivs_p (loop_vinfo))
1067
    do_peeling = false;
1068
 
1069
  if (do_peeling)
1070
    {
1071
      int mis;
1072
      int npeel = 0;
1073
 
1074
      if (known_alignment_for_access_p (dr0))
1075
        {
1076
          /* Since it's known at compile time, compute the number of iterations
1077
             in the peeled loop (the peeling factor) for use in updating
1078
             DR_MISALIGNMENT values.  The peeling factor is the vectorization
1079
             factor minus the misalignment as an element count.  */
1080
          mis = DR_MISALIGNMENT (dr0);
1081
          mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1082
          npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
1083
        }
1084
 
1085
      /* Ensure that all data refs can be vectorized after the peel.  */
1086
      for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1087
        {
1088
          int save_misalignment;
1089
 
1090
          dr = VARRAY_GENERIC_PTR (datarefs, i);
1091
          if (dr == dr0)
1092
            continue;
1093
          save_misalignment = DR_MISALIGNMENT (dr);
1094
          vect_update_misalignment_for_peel (dr, dr0, npeel);
1095
          supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1096
          DR_MISALIGNMENT (dr) = save_misalignment;
1097
 
1098
          if (!supportable_dr_alignment)
1099
            {
1100
              do_peeling = false;
1101
              break;
1102
            }
1103
        }
1104
 
1105
      if (do_peeling)
1106
        {
1107
          /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1108
             If the misalignment of DR_i is identical to that of dr0 then set
1109
             DR_MISALIGNMENT (DR_i) to zero.  If the misalignment of DR_i and
1110
             dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1111
             by the peeling factor times the element size of DR_i (MOD the
1112
             vectorization factor times the size).  Otherwise, the
1113
             misalignment of DR_i must be set to unknown.  */
1114
          for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1115
            {
1116
              dr = VARRAY_GENERIC_PTR (datarefs, i);
1117
              if (dr == dr0)
1118
                continue;
1119
              vect_update_misalignment_for_peel (dr, dr0, npeel);
1120
            }
1121
 
1122
          LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1123
          LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1124
          DR_MISALIGNMENT (dr0) = 0;
1125
          if (vect_print_dump_info (REPORT_ALIGNMENT))
1126
            fprintf (vect_dump, "Alignment of access forced using peeling.");
1127
 
1128
          if (vect_print_dump_info (REPORT_DETAILS))
1129
            fprintf (vect_dump, "Peeling for alignment will be applied.");
1130
 
1131
          stat = vect_verify_datarefs_alignment (loop_vinfo);
1132
          gcc_assert (stat);
1133
          return stat;
1134
        }
1135
    }
1136
 
1137
 
1138
  /* (2) Versioning to force alignment.  */
1139
 
1140
  /* Try versioning if:
1141
     1) flag_tree_vect_loop_version is TRUE
1142
     2) optimize_size is FALSE
1143
     3) there is at least one unsupported misaligned data ref with an unknown
1144
        misalignment, and
1145
     4) all misaligned data refs with a known misalignment are supported, and
1146
     5) the number of runtime alignment checks is within reason.  */
1147
 
1148
  do_versioning = flag_tree_vect_loop_version && (!optimize_size);
1149
 
1150
  if (do_versioning)
1151
    {
1152
      for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1153
        {
1154
          dr = VARRAY_GENERIC_PTR (datarefs, i);
1155
 
1156
          if (aligned_access_p (dr))
1157
            continue;
1158
 
1159
          supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1160
 
1161
          if (!supportable_dr_alignment)
1162
            {
1163
              tree stmt;
1164
              int mask;
1165
              tree vectype;
1166
 
1167
              if (known_alignment_for_access_p (dr)
1168
                  || VEC_length (tree,
1169
                                 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1170
                     >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS))
1171
                {
1172
                  do_versioning = false;
1173
                  break;
1174
                }
1175
 
1176
              stmt = DR_STMT (dr);
1177
              vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1178
              gcc_assert (vectype);
1179
 
1180
              /* The rightmost bits of an aligned address must be zeros.
1181
                 Construct the mask needed for this test.  For example,
1182
                 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1183
                 mask must be 15 = 0xf. */
1184
              mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1185
 
1186
              /* FORNOW: use the same mask to test all potentially unaligned
1187
                 references in the loop.  The vectorizer currently supports
1188
                 a single vector size, see the reference to
1189
                 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1190
                 vectorization factor is computed.  */
1191
              gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1192
                          || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1193
              LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1194
              VEC_safe_push (tree, heap,
1195
                             LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1196
                             DR_STMT (dr));
1197
            }
1198
        }
1199
 
1200
      /* Versioning requires at least one misaligned data reference.  */
1201
      if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
1202
        do_versioning = false;
1203
      else if (!do_versioning)
1204
        VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1205
    }
1206
 
1207
  if (do_versioning)
1208
    {
1209
      VEC(tree,heap) *may_misalign_stmts
1210
        = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1211
      tree stmt;
1212
 
1213
      /* It can now be assumed that the data references in the statements
1214
         in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1215
         of the loop being vectorized.  */
1216
      for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
1217
        {
1218
          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1219
          dr = STMT_VINFO_DATA_REF (stmt_info);
1220
          DR_MISALIGNMENT (dr) = 0;
1221
          if (vect_print_dump_info (REPORT_ALIGNMENT))
1222
            fprintf (vect_dump, "Alignment of access forced using versioning.");
1223
        }
1224
 
1225
      if (vect_print_dump_info (REPORT_DETAILS))
1226
        fprintf (vect_dump, "Versioning for alignment will be applied.");
1227
 
1228
      /* Peeling and versioning can't be done together at this time.  */
1229
      gcc_assert (! (do_peeling && do_versioning));
1230
 
1231
      stat = vect_verify_datarefs_alignment (loop_vinfo);
1232
      gcc_assert (stat);
1233
      return stat;
1234
    }
1235
 
1236
  /* This point is reached if neither peeling nor versioning is being done.  */
1237
  gcc_assert (! (do_peeling || do_versioning));
1238
 
1239
  stat = vect_verify_datarefs_alignment (loop_vinfo);
1240
  return stat;
1241
}
1242
 
1243
 
1244
/* Function vect_analyze_data_refs_alignment
1245
 
1246
   Analyze the alignment of the data-references in the loop.
1247
   Return FALSE if a data reference is found that cannot be vectorized.  */
1248
 
1249
static bool
1250
vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1251
{
1252
  if (vect_print_dump_info (REPORT_DETAILS))
1253
    fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1254
 
1255
  if (!vect_compute_data_refs_alignment (loop_vinfo))
1256
    {
1257
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1258
        fprintf (vect_dump,
1259
                 "not vectorized: can't calculate alignment for data ref.");
1260
      return false;
1261
    }
1262
 
1263
  return true;
1264
}
1265
 
1266
 
1267
/* Function vect_analyze_data_ref_access.
1268
 
1269
   Analyze the access pattern of the data-reference DR. For now, a data access
1270
   has to be consecutive to be considered vectorizable.  */
1271
 
1272
static bool
1273
vect_analyze_data_ref_access (struct data_reference *dr)
1274
{
1275
  tree step = DR_STEP (dr);
1276
  tree scalar_type = TREE_TYPE (DR_REF (dr));
1277
 
1278
  if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1279
    {
1280
      if (vect_print_dump_info (REPORT_DETAILS))
1281
        fprintf (vect_dump, "not consecutive access");
1282
      return false;
1283
    }
1284
  return true;
1285
}
1286
 
1287
 
1288
/* Function vect_analyze_data_ref_accesses.
1289
 
1290
   Analyze the access pattern of all the data references in the loop.
1291
 
1292
   FORNOW: the only access pattern that is considered vectorizable is a
1293
           simple step 1 (consecutive) access.
1294
 
1295
   FORNOW: handle only arrays and pointer accesses.  */
1296
 
1297
static bool
1298
vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1299
{
1300
  unsigned int i;
1301
  varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1302
 
1303
  if (vect_print_dump_info (REPORT_DETAILS))
1304
    fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1305
 
1306
  for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1307
    {
1308
      struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
1309
      if (!vect_analyze_data_ref_access (dr))
1310
        {
1311
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1312
            fprintf (vect_dump, "not vectorized: complicated access pattern.");
1313
          return false;
1314
        }
1315
    }
1316
 
1317
  return true;
1318
}
1319
 
1320
 
1321
/* Function vect_analyze_data_refs.
1322
 
1323
  Find all the data references in the loop.
1324
 
1325
   The general structure of the analysis of data refs in the vectorizer is as
1326
   follows:
1327
   1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1328
      find and analyze all data-refs in the loop and their dependences.
1329
   2- vect_analyze_dependences(): apply dependence testing using ddrs.
1330
   3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1331
   4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1332
 
1333
*/
1334
 
1335
static bool
1336
vect_analyze_data_refs (loop_vec_info loop_vinfo)
1337
{
1338
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1339
  unsigned int i;
1340
  varray_type datarefs;
1341
  tree scalar_type;
1342
 
1343
  if (vect_print_dump_info (REPORT_DETAILS))
1344
    fprintf (vect_dump, "=== vect_analyze_data_refs ===");
1345
 
1346
  compute_data_dependences_for_loop (loop, false,
1347
                                     &(LOOP_VINFO_DATAREFS (loop_vinfo)),
1348
                                     &(LOOP_VINFO_DDRS (loop_vinfo)));
1349
 
1350
  /* Go through the data-refs, check that the analysis succeeded. Update pointer
1351
     from stmt_vec_info struct to DR and vectype.  */
1352
  datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1353
  for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1354
    {
1355
      struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
1356
      tree stmt;
1357
      stmt_vec_info stmt_info;
1358
 
1359
      if (!dr || !DR_REF (dr))
1360
        {
1361
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1362
            fprintf (vect_dump, "not vectorized: unhandled data-ref ");
1363
          return false;
1364
        }
1365
 
1366
      /* Update DR field in stmt_vec_info struct.  */
1367
      stmt = DR_STMT (dr);
1368
      stmt_info = vinfo_for_stmt (stmt);
1369
 
1370
      if (STMT_VINFO_DATA_REF (stmt_info))
1371
        {
1372
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1373
            {
1374
              fprintf (vect_dump,
1375
                       "not vectorized: more than one data ref in stmt: ");
1376
              print_generic_expr (vect_dump, stmt, TDF_SLIM);
1377
            }
1378
          return false;
1379
        }
1380
      STMT_VINFO_DATA_REF (stmt_info) = dr;
1381
 
1382
      /* Check that analysis of the data-ref succeeded.  */
1383
      if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
1384
          || !DR_STEP (dr))
1385
        {
1386
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1387
            {
1388
              fprintf (vect_dump, "not vectorized: data ref analysis failed ");
1389
              print_generic_expr (vect_dump, stmt, TDF_SLIM);
1390
            }
1391
          return false;
1392
        }
1393
      if (!DR_MEMTAG (dr))
1394
        {
1395
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1396
            {
1397
              fprintf (vect_dump, "not vectorized: no memory tag for ");
1398
              print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1399
            }
1400
          return false;
1401
        }
1402
 
1403
      /* Set vectype for STMT.  */
1404
      scalar_type = TREE_TYPE (DR_REF (dr));
1405
      STMT_VINFO_VECTYPE (stmt_info) =
1406
                get_vectype_for_scalar_type (scalar_type);
1407
      if (!STMT_VINFO_VECTYPE (stmt_info))
1408
        {
1409
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1410
            {
1411
              fprintf (vect_dump,
1412
                       "not vectorized: no vectype for stmt: ");
1413
              print_generic_expr (vect_dump, stmt, TDF_SLIM);
1414
              fprintf (vect_dump, " scalar_type: ");
1415
              print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
1416
            }
1417
          return false;
1418
        }
1419
    }
1420
 
1421
  return true;
1422
}
1423
 
1424
 
1425
/* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
1426
 
1427
/* Function vect_mark_relevant.
1428
 
1429
   Mark STMT as "relevant for vectorization" and add it to WORKLIST.  */
1430
 
1431
static void
1432
vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
1433
                    bool relevant_p, bool live_p)
1434
{
1435
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1436
  bool save_relevant_p = STMT_VINFO_RELEVANT_P (stmt_info);
1437
  bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
1438
 
1439
  if (vect_print_dump_info (REPORT_DETAILS))
1440
    fprintf (vect_dump, "mark relevant %d, live %d.",relevant_p, live_p);
1441
 
1442
  STMT_VINFO_LIVE_P (stmt_info) |= live_p;
1443
  STMT_VINFO_RELEVANT_P (stmt_info) |= relevant_p;
1444
 
1445
  if (TREE_CODE (stmt) == PHI_NODE)
1446
    /* Don't put phi-nodes in the worklist. Phis that are marked relevant
1447
       or live will fail vectorization later on.  */
1448
    return;
1449
 
1450
  if (STMT_VINFO_RELEVANT_P (stmt_info) == save_relevant_p
1451
      && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
1452
    {
1453
      if (vect_print_dump_info (REPORT_DETAILS))
1454
        fprintf (vect_dump, "already marked relevant/live.");
1455
      return;
1456
    }
1457
 
1458
  VEC_safe_push (tree, heap, *worklist, stmt);
1459
}
1460
 
1461
 
1462
/* Function vect_stmt_relevant_p.
1463
 
1464
   Return true if STMT in loop that is represented by LOOP_VINFO is
1465
   "relevant for vectorization".
1466
 
1467
   A stmt is considered "relevant for vectorization" if:
1468
   - it has uses outside the loop.
1469
   - it has vdefs (it alters memory).
1470
   - control stmts in the loop (except for the exit condition).
1471
 
1472
   CHECKME: what other side effects would the vectorizer allow?  */
1473
 
1474
static bool
1475
vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
1476
                      bool *relevant_p, bool *live_p)
1477
{
1478
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1479
  ssa_op_iter op_iter;
1480
  imm_use_iterator imm_iter;
1481
  use_operand_p use_p;
1482
  def_operand_p def_p;
1483
 
1484
  *relevant_p = false;
1485
  *live_p = false;
1486
 
1487
  /* cond stmt other than loop exit cond.  */
1488
  if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
1489
    *relevant_p = true;
1490
 
1491
  /* changing memory.  */
1492
  if (TREE_CODE (stmt) != PHI_NODE)
1493
    if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
1494
      {
1495
        if (vect_print_dump_info (REPORT_DETAILS))
1496
          fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
1497
        *relevant_p = true;
1498
      }
1499
 
1500
  /* uses outside the loop.  */
1501
  FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
1502
    {
1503
      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
1504
        {
1505
          basic_block bb = bb_for_stmt (USE_STMT (use_p));
1506
          if (!flow_bb_inside_loop_p (loop, bb))
1507
            {
1508
              if (vect_print_dump_info (REPORT_DETAILS))
1509
                fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
1510
 
1511
              /* We expect all such uses to be in the loop exit phis
1512
                 (because of loop closed form)   */
1513
              gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
1514
              gcc_assert (bb == loop->single_exit->dest);
1515
 
1516
              *live_p = true;
1517
            }
1518
        }
1519
    }
1520
 
1521
  return (*live_p || *relevant_p);
1522
}
1523
 
1524
 
1525
/* Function vect_mark_stmts_to_be_vectorized.
1526
 
1527
   Not all stmts in the loop need to be vectorized. For example:
1528
 
1529
     for i...
1530
       for j...
1531
   1.    T0 = i + j
1532
   2.    T1 = a[T0]
1533
 
1534
   3.    j = j + 1
1535
 
1536
   Stmt 1 and 3 do not need to be vectorized, because loop control and
1537
   addressing of vectorized data-refs are handled differently.
1538
 
1539
   This pass detects such stmts.  */
1540
 
1541
static bool
1542
vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
1543
{
1544
  VEC(tree,heap) *worklist;
1545
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1546
  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1547
  unsigned int nbbs = loop->num_nodes;
1548
  block_stmt_iterator si;
1549
  tree stmt, use;
1550
  stmt_ann_t ann;
1551
  ssa_op_iter iter;
1552
  unsigned int i;
1553
  stmt_vec_info stmt_vinfo;
1554
  basic_block bb;
1555
  tree phi;
1556
  bool relevant_p, live_p;
1557
  tree def, def_stmt;
1558
  enum vect_def_type dt;
1559
 
1560
  if (vect_print_dump_info (REPORT_DETAILS))
1561
    fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
1562
 
1563
  worklist = VEC_alloc (tree, heap, 64);
1564
 
1565
  /* 1. Init worklist.  */
1566
 
1567
  bb = loop->header;
1568
  for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1569
    {
1570
      if (vect_print_dump_info (REPORT_DETAILS))
1571
        {
1572
          fprintf (vect_dump, "init: phi relevant? ");
1573
          print_generic_expr (vect_dump, phi, TDF_SLIM);
1574
        }
1575
 
1576
      if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant_p, &live_p))
1577
        vect_mark_relevant (&worklist, phi, relevant_p, live_p);
1578
    }
1579
 
1580
  for (i = 0; i < nbbs; i++)
1581
    {
1582
      bb = bbs[i];
1583
      for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1584
        {
1585
          stmt = bsi_stmt (si);
1586
 
1587
          if (vect_print_dump_info (REPORT_DETAILS))
1588
            {
1589
              fprintf (vect_dump, "init: stmt relevant? ");
1590
              print_generic_expr (vect_dump, stmt, TDF_SLIM);
1591
            }
1592
 
1593
          if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant_p, &live_p))
1594
            vect_mark_relevant (&worklist, stmt, relevant_p, live_p);
1595
        }
1596
    }
1597
 
1598
 
1599
  /* 2. Process_worklist */
1600
 
1601
  while (VEC_length (tree, worklist) > 0)
1602
    {
1603
      stmt = VEC_pop (tree, worklist);
1604
 
1605
      if (vect_print_dump_info (REPORT_DETAILS))
1606
        {
1607
          fprintf (vect_dump, "worklist: examine stmt: ");
1608
          print_generic_expr (vect_dump, stmt, TDF_SLIM);
1609
        }
1610
 
1611
      /* Examine the USEs of STMT. For each ssa-name USE thta is defined
1612
         in the loop, mark the stmt that defines it (DEF_STMT) as
1613
         relevant/irrelevant and live/dead according to the liveness and
1614
         relevance properties of STMT.
1615
       */
1616
 
1617
      gcc_assert (TREE_CODE (stmt) != PHI_NODE);
1618
 
1619
      ann = stmt_ann (stmt);
1620
      stmt_vinfo = vinfo_for_stmt (stmt);
1621
 
1622
      relevant_p = STMT_VINFO_RELEVANT_P (stmt_vinfo);
1623
      live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
1624
 
1625
      /* Generally, the liveness and relevance properties of STMT are
1626
         propagated to the DEF_STMTs of its USEs:
1627
             STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
1628
             STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- relevant_p
1629
 
1630
         Exceptions:
1631
 
1632
         (case 1)
1633
           If USE is used only for address computations (e.g. array indexing),
1634
           which does not need to be directly vectorized, then the
1635
           liveness/relevance of the respective DEF_STMT is left unchanged.
1636
 
1637
         (case 2)
1638
           If STMT has been identified as defining a reduction variable, then
1639
           we have two cases:
1640
           (case 2.1)
1641
             The last use of STMT is the reduction-variable, which is defined
1642
             by a loop-header-phi. We don't want to mark the phi as live or
1643
             relevant (because it does not need to be vectorized, it is handled
1644
             as part of the vectorization of the reduction), so in this case we
1645
             skip the call to vect_mark_relevant.
1646
           (case 2.2)
1647
             The rest of the uses of STMT are defined in the loop body. For
1648
             the def_stmt of these uses we want to set liveness/relevance
1649
             as follows:
1650
               STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
1651
               STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- true
1652
             because even though STMT is classified as live (since it defines a
1653
             value that is used across loop iterations) and irrelevant (since it
1654
             is not used inside the loop), it will be vectorized, and therefore
1655
             the corresponding DEF_STMTs need to marked as relevant.
1656
       */
1657
 
1658
      /* case 2.2:  */
1659
      if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
1660
        {
1661
          gcc_assert (!relevant_p && live_p);
1662
          relevant_p = true;
1663
          live_p = false;
1664
        }
1665
 
1666
      FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1667
        {
1668
          /* case 1: we are only interested in uses that need to be vectorized.
1669
             Uses that are used for address computation are not considered
1670
             relevant.
1671
           */
1672
          if (!exist_non_indexing_operands_for_use_p (use, stmt))
1673
            continue;
1674
 
1675
          if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
1676
            {
1677
              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1678
                fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
1679
              VEC_free (tree, heap, worklist);
1680
              return false;
1681
            }
1682
 
1683
          if (!def_stmt || IS_EMPTY_STMT (def_stmt))
1684
            continue;
1685
 
1686
          if (vect_print_dump_info (REPORT_DETAILS))
1687
            {
1688
              fprintf (vect_dump, "worklist: examine use %d: ", i);
1689
              print_generic_expr (vect_dump, use, TDF_SLIM);
1690
            }
1691
 
1692
          bb = bb_for_stmt (def_stmt);
1693
          if (!flow_bb_inside_loop_p (loop, bb))
1694
            continue;
1695
 
1696
          /* case 2.1: the reduction-use does not mark the defining-phi
1697
             as relevant.  */
1698
          if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
1699
              && TREE_CODE (def_stmt) == PHI_NODE)
1700
            continue;
1701
 
1702
          vect_mark_relevant (&worklist, def_stmt, relevant_p, live_p);
1703
        }
1704
    }                           /* while worklist */
1705
 
1706
  VEC_free (tree, heap, worklist);
1707
  return true;
1708
}
1709
 
1710
 
1711
/* Function vect_can_advance_ivs_p
1712
 
1713
   In case the number of iterations that LOOP iterates is unknown at compile
1714
   time, an epilog loop will be generated, and the loop induction variables
1715
   (IVs) will be "advanced" to the value they are supposed to take just before
1716
   the epilog loop.  Here we check that the access function of the loop IVs
1717
   and the expression that represents the loop bound are simple enough.
1718
   These restrictions will be relaxed in the future.  */
1719
 
1720
static bool
1721
vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1722
{
1723
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1724
  basic_block bb = loop->header;
1725
  tree phi;
1726
 
1727
  /* Analyze phi functions of the loop header.  */
1728
 
1729
  if (vect_print_dump_info (REPORT_DETAILS))
1730
    fprintf (vect_dump, "=== vect_can_advance_ivs_p ===");
1731
 
1732
  for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1733
    {
1734
      tree access_fn = NULL;
1735
      tree evolution_part;
1736
 
1737
      if (vect_print_dump_info (REPORT_DETAILS))
1738
        {
1739
          fprintf (vect_dump, "Analyze phi: ");
1740
          print_generic_expr (vect_dump, phi, TDF_SLIM);
1741
        }
1742
 
1743
      /* Skip virtual phi's. The data dependences that are associated with
1744
         virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
1745
 
1746
      if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1747
        {
1748
          if (vect_print_dump_info (REPORT_DETAILS))
1749
            fprintf (vect_dump, "virtual phi. skip.");
1750
          continue;
1751
        }
1752
 
1753
      /* Skip reduction phis.  */
1754
 
1755
      if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1756
        {
1757
          if (vect_print_dump_info (REPORT_DETAILS))
1758
            fprintf (vect_dump, "reduc phi. skip.");
1759
          continue;
1760
        }
1761
 
1762
      /* Analyze the evolution function.  */
1763
 
1764
      access_fn = instantiate_parameters
1765
        (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1766
 
1767
      if (!access_fn)
1768
        {
1769
          if (vect_print_dump_info (REPORT_DETAILS))
1770
            fprintf (vect_dump, "No Access function.");
1771
          return false;
1772
        }
1773
 
1774
      if (vect_print_dump_info (REPORT_DETAILS))
1775
        {
1776
          fprintf (vect_dump, "Access function of PHI: ");
1777
          print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1778
        }
1779
 
1780
      evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1781
 
1782
      if (evolution_part == NULL_TREE)
1783
        {
1784
          if (vect_print_dump_info (REPORT_DETAILS))
1785
            fprintf (vect_dump, "No evolution.");
1786
          return false;
1787
        }
1788
 
1789
      /* FORNOW: We do not transform initial conditions of IVs
1790
         which evolution functions are a polynomial of degree >= 2.  */
1791
 
1792
      if (tree_is_chrec (evolution_part))
1793
        return false;
1794
    }
1795
 
1796
  return true;
1797
}
1798
 
1799
 
1800
/* Function vect_get_loop_niters.
1801
 
1802
   Determine how many iterations the loop is executed.
1803
   If an expression that represents the number of iterations
1804
   can be constructed, place it in NUMBER_OF_ITERATIONS.
1805
   Return the loop exit condition.  */
1806
 
1807
static tree
1808
vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
1809
{
1810
  tree niters;
1811
 
1812
  if (vect_print_dump_info (REPORT_DETAILS))
1813
    fprintf (vect_dump, "=== get_loop_niters ===");
1814
 
1815
  niters = number_of_iterations_in_loop (loop);
1816
 
1817
  if (niters != NULL_TREE
1818
      && niters != chrec_dont_know)
1819
    {
1820
      *number_of_iterations = niters;
1821
 
1822
      if (vect_print_dump_info (REPORT_DETAILS))
1823
        {
1824
          fprintf (vect_dump, "==> get_loop_niters:" );
1825
          print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
1826
        }
1827
    }
1828
 
1829
  return get_loop_exit_condition (loop);
1830
}
1831
 
1832
 
1833
/* Function vect_analyze_loop_form.
1834
 
1835
   Verify the following restrictions (some may be relaxed in the future):
1836
   - it's an inner-most loop
1837
   - number of BBs = 2 (which are the loop header and the latch)
1838
   - the loop has a pre-header
1839
   - the loop has a single entry and exit
1840
   - the loop exit condition is simple enough, and the number of iterations
1841
     can be analyzed (a countable loop).  */
1842
 
1843
static loop_vec_info
1844
vect_analyze_loop_form (struct loop *loop)
1845
{
1846
  loop_vec_info loop_vinfo;
1847
  tree loop_cond;
1848
  tree number_of_iterations = NULL;
1849
 
1850
  if (vect_print_dump_info (REPORT_DETAILS))
1851
    fprintf (vect_dump, "=== vect_analyze_loop_form ===");
1852
 
1853
  if (loop->inner)
1854
    {
1855
      if (vect_print_dump_info (REPORT_OUTER_LOOPS))
1856
        fprintf (vect_dump, "not vectorized: nested loop.");
1857
      return NULL;
1858
    }
1859
 
1860
  if (!loop->single_exit
1861
      || loop->num_nodes != 2
1862
      || EDGE_COUNT (loop->header->preds) != 2)
1863
    {
1864
      if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1865
        {
1866
          if (!loop->single_exit)
1867
            fprintf (vect_dump, "not vectorized: multiple exits.");
1868
          else if (loop->num_nodes != 2)
1869
            fprintf (vect_dump, "not vectorized: too many BBs in loop.");
1870
          else if (EDGE_COUNT (loop->header->preds) != 2)
1871
            fprintf (vect_dump, "not vectorized: too many incoming edges.");
1872
        }
1873
 
1874
      return NULL;
1875
    }
1876
 
1877
  /* We assume that the loop exit condition is at the end of the loop. i.e,
1878
     that the loop is represented as a do-while (with a proper if-guard
1879
     before the loop if needed), where the loop header contains all the
1880
     executable statements, and the latch is empty.  */
1881
  if (!empty_block_p (loop->latch))
1882
    {
1883
      if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1884
        fprintf (vect_dump, "not vectorized: unexpected loop form.");
1885
      return NULL;
1886
    }
1887
 
1888
  /* Make sure there exists a single-predecessor exit bb:  */
1889
  if (!single_pred_p (loop->single_exit->dest))
1890
    {
1891
      edge e = loop->single_exit;
1892
      if (!(e->flags & EDGE_ABNORMAL))
1893
        {
1894
          split_loop_exit_edge (e);
1895
          if (vect_print_dump_info (REPORT_DETAILS))
1896
            fprintf (vect_dump, "split exit edge.");
1897
        }
1898
      else
1899
        {
1900
          if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1901
            fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
1902
          return NULL;
1903
        }
1904
    }
1905
 
1906
  if (empty_block_p (loop->header))
1907
    {
1908
      if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1909
        fprintf (vect_dump, "not vectorized: empty loop.");
1910
      return NULL;
1911
    }
1912
 
1913
  loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
1914
  if (!loop_cond)
1915
    {
1916
      if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1917
        fprintf (vect_dump, "not vectorized: complicated exit condition.");
1918
      return NULL;
1919
    }
1920
 
1921
  if (!number_of_iterations)
1922
    {
1923
      if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1924
        fprintf (vect_dump,
1925
                 "not vectorized: number of iterations cannot be computed.");
1926
      return NULL;
1927
    }
1928
 
1929
  if (chrec_contains_undetermined (number_of_iterations))
1930
    {
1931
      if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1932
        fprintf (vect_dump, "Infinite number of iterations.");
1933
      return false;
1934
    }
1935
 
1936
  loop_vinfo = new_loop_vec_info (loop);
1937
  LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1938
 
1939
  if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
1940
    {
1941
      if (vect_print_dump_info (REPORT_DETAILS))
1942
        {
1943
          fprintf (vect_dump, "Symbolic number of iterations is ");
1944
          print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
1945
        }
1946
    }
1947
  else
1948
  if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
1949
    {
1950
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1951
        fprintf (vect_dump, "not vectorized: number of iterations = 0.");
1952
      return NULL;
1953
    }
1954
 
1955
  LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
1956
 
1957
  return loop_vinfo;
1958
}
1959
 
1960
 
1961
/* Function vect_analyze_loop.
1962
 
1963
   Apply a set of analyses on LOOP, and create a loop_vec_info struct
1964
   for it. The different analyses will record information in the
1965
   loop_vec_info struct.  */
1966
loop_vec_info
1967
vect_analyze_loop (struct loop *loop)
1968
{
1969
  bool ok;
1970
  loop_vec_info loop_vinfo;
1971
 
1972
  if (vect_print_dump_info (REPORT_DETAILS))
1973
    fprintf (vect_dump, "===== analyze_loop_nest =====");
1974
 
1975
  /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
1976
 
1977
  loop_vinfo = vect_analyze_loop_form (loop);
1978
  if (!loop_vinfo)
1979
    {
1980
      if (vect_print_dump_info (REPORT_DETAILS))
1981
        fprintf (vect_dump, "bad loop form.");
1982
      return NULL;
1983
    }
1984
 
1985
  /* Find all data references in the loop (which correspond to vdefs/vuses)
1986
     and analyze their evolution in the loop.
1987
 
1988
     FORNOW: Handle only simple, array references, which
1989
     alignment can be forced, and aligned pointer-references.  */
1990
 
1991
  ok = vect_analyze_data_refs (loop_vinfo);
1992
  if (!ok)
1993
    {
1994
      if (vect_print_dump_info (REPORT_DETAILS))
1995
        fprintf (vect_dump, "bad data references.");
1996
      destroy_loop_vec_info (loop_vinfo);
1997
      return NULL;
1998
    }
1999
 
2000
  /* Classify all cross-iteration scalar data-flow cycles.
2001
     Cross-iteration cycles caused by virtual phis are analyzed separately.  */
2002
 
2003
  vect_analyze_scalar_cycles (loop_vinfo);
2004
 
2005
  /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
2006
 
2007
  ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2008
  if (!ok)
2009
    {
2010
      if (vect_print_dump_info (REPORT_DETAILS))
2011
        fprintf (vect_dump, "unexpected pattern.");
2012
      destroy_loop_vec_info (loop_vinfo);
2013
      return NULL;
2014
    }
2015
 
2016
  /* Analyze the alignment of the data-refs in the loop.
2017
     Fail if a data reference is found that cannot be vectorized.  */
2018
 
2019
  ok = vect_analyze_data_refs_alignment (loop_vinfo);
2020
  if (!ok)
2021
    {
2022
      if (vect_print_dump_info (REPORT_DETAILS))
2023
        fprintf (vect_dump, "bad data alignment.");
2024
      destroy_loop_vec_info (loop_vinfo);
2025
      return NULL;
2026
    }
2027
 
2028
  ok = vect_determine_vectorization_factor (loop_vinfo);
2029
  if (!ok)
2030
    {
2031
      if (vect_print_dump_info (REPORT_DETAILS))
2032
        fprintf (vect_dump, "can't determine vectorization factor.");
2033
      destroy_loop_vec_info (loop_vinfo);
2034
      return NULL;
2035
    }
2036
 
2037
  /* Analyze data dependences between the data-refs in the loop.
2038
     FORNOW: fail at the first data dependence that we encounter.  */
2039
 
2040
  ok = vect_analyze_data_ref_dependences (loop_vinfo);
2041
  if (!ok)
2042
    {
2043
      if (vect_print_dump_info (REPORT_DETAILS))
2044
        fprintf (vect_dump, "bad data dependence.");
2045
      destroy_loop_vec_info (loop_vinfo);
2046
      return NULL;
2047
    }
2048
 
2049
  /* Analyze the access patterns of the data-refs in the loop (consecutive,
2050
     complex, etc.). FORNOW: Only handle consecutive access pattern.  */
2051
 
2052
  ok = vect_analyze_data_ref_accesses (loop_vinfo);
2053
  if (!ok)
2054
    {
2055
      if (vect_print_dump_info (REPORT_DETAILS))
2056
        fprintf (vect_dump, "bad data access.");
2057
      destroy_loop_vec_info (loop_vinfo);
2058
      return NULL;
2059
    }
2060
 
2061
  /* This pass will decide on using loop versioning and/or loop peeling in
2062
     order to enhance the alignment of data references in the loop.  */
2063
 
2064
  ok = vect_enhance_data_refs_alignment (loop_vinfo);
2065
  if (!ok)
2066
    {
2067
      if (vect_print_dump_info (REPORT_DETAILS))
2068
        fprintf (vect_dump, "bad data alignment.");
2069
      destroy_loop_vec_info (loop_vinfo);
2070
      return NULL;
2071
    }
2072
 
2073
  /* Scan all the operations in the loop and make sure they are
2074
     vectorizable.  */
2075
 
2076
  ok = vect_analyze_operations (loop_vinfo);
2077
  if (!ok)
2078
    {
2079
      if (vect_print_dump_info (REPORT_DETAILS))
2080
        fprintf (vect_dump, "bad operation or unsupported loop bound.");
2081
      destroy_loop_vec_info (loop_vinfo);
2082
      return NULL;
2083
    }
2084
 
2085
  LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2086
 
2087
  return loop_vinfo;
2088
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.