OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-old/] [gcc-4.2.2/] [gcc/] [tree-vect-analyze.c] - Blame information for rev 816

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 38 julius
/* Analysis Utilities for Loop Vectorization.
2
   Copyright (C) 2003,2004,2005,2006, 2007 Free Software Foundation, Inc.
3
   Contributed by Dorit Naishlos <dorit@il.ibm.com>
4
 
5
This file is part of GCC.
6
 
7
GCC is free software; you can redistribute it and/or modify it under
8
the terms of the GNU General Public License as published by the Free
9
Software Foundation; either version 3, or (at your option) any later
10
version.
11
 
12
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13
WARRANTY; without even the implied warranty of MERCHANTABILITY or
14
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15
for more details.
16
 
17
You should have received a copy of the GNU General Public License
18
along with GCC; see the file COPYING3.  If not see
19
<http://www.gnu.org/licenses/>.  */
20
 
21
#include "config.h"
22
#include "system.h"
23
#include "coretypes.h"
24
#include "tm.h"
25
#include "ggc.h"
26
#include "tree.h"
27
#include "target.h"
28
#include "basic-block.h"
29
#include "diagnostic.h"
30
#include "tree-flow.h"
31
#include "tree-dump.h"
32
#include "timevar.h"
33
#include "cfgloop.h"
34
#include "expr.h"
35
#include "optabs.h"
36
#include "params.h"
37
#include "tree-chrec.h"
38
#include "tree-data-ref.h"
39
#include "tree-scalar-evolution.h"
40
#include "tree-vectorizer.h"
41
 
42
/* Main analysis functions.  */
43
static loop_vec_info vect_analyze_loop_form (struct loop *);
44
static bool vect_analyze_data_refs (loop_vec_info);
45
static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
46
static void vect_analyze_scalar_cycles (loop_vec_info);
47
static bool vect_analyze_data_ref_accesses (loop_vec_info);
48
static bool vect_analyze_data_ref_dependences (loop_vec_info);
49
static bool vect_analyze_data_refs_alignment (loop_vec_info);
50
static bool vect_compute_data_refs_alignment (loop_vec_info);
51
static bool vect_enhance_data_refs_alignment (loop_vec_info);
52
static bool vect_analyze_operations (loop_vec_info);
53
static bool vect_determine_vectorization_factor (loop_vec_info);
54
 
55
/* Utility functions for the analyses.  */
56
static bool exist_non_indexing_operands_for_use_p (tree, tree);
57
static void vect_mark_relevant (VEC(tree,heap) **, tree, bool, bool);
58
static bool vect_stmt_relevant_p (tree, loop_vec_info, bool *, bool *);
59
static tree vect_get_loop_niters (struct loop *, tree *);
60
static bool vect_analyze_data_ref_dependence
61
  (struct data_dependence_relation *, loop_vec_info);
62
static bool vect_compute_data_ref_alignment (struct data_reference *);
63
static bool vect_analyze_data_ref_access (struct data_reference *);
64
static bool vect_can_advance_ivs_p (loop_vec_info);
65
static void vect_update_misalignment_for_peel
66
  (struct data_reference *, struct data_reference *, int npeel);
67
 
68
 
69
/* Function vect_determine_vectorization_factor
70
 
71
   Determine the vectorization factor (VF). VF is the number of data elements
72
   that are operated upon in parallel in a single iteration of the vectorized
73
   loop. For example, when vectorizing a loop that operates on 4byte elements,
74
   on a target with vector size (VS) 16byte, the VF is set to 4, since 4
75
   elements can fit in a single vector register.
76
 
77
   We currently support vectorization of loops in which all types operated upon
78
   are of the same size. Therefore this function currently sets VF according to
79
   the size of the types operated upon, and fails if there are multiple sizes
80
   in the loop.
81
 
82
   VF is also the factor by which the loop iterations are strip-mined, e.g.:
83
   original loop:
84
        for (i=0; i<N; i++){
85
          a[i] = b[i] + c[i];
86
        }
87
 
88
   vectorized loop:
89
        for (i=0; i<N; i+=VF){
90
          a[i:VF] = b[i:VF] + c[i:VF];
91
        }
92
*/
93
 
94
static bool
95
vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
96
{
97
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
98
  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
99
  int nbbs = loop->num_nodes;
100
  block_stmt_iterator si;
101
  unsigned int vectorization_factor = 0;
102
  int i;
103
  tree scalar_type;
104
 
105
  if (vect_print_dump_info (REPORT_DETAILS))
106
    fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
107
 
108
  for (i = 0; i < nbbs; i++)
109
    {
110
      basic_block bb = bbs[i];
111
 
112
      for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
113
        {
114
          tree stmt = bsi_stmt (si);
115
          unsigned int nunits;
116
          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
117
          tree vectype;
118
 
119
          if (vect_print_dump_info (REPORT_DETAILS))
120
            {
121
              fprintf (vect_dump, "==> examining statement: ");
122
              print_generic_expr (vect_dump, stmt, TDF_SLIM);
123
            }
124
 
125
          gcc_assert (stmt_info);
126
          /* skip stmts which do not need to be vectorized.  */
127
          if (!STMT_VINFO_RELEVANT_P (stmt_info)
128
              && !STMT_VINFO_LIVE_P (stmt_info))
129
            {
130
              if (vect_print_dump_info (REPORT_DETAILS))
131
                fprintf (vect_dump, "skip.");
132
              continue;
133
            }
134
 
135
          if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
136
            {
137
              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
138
                {
139
                  fprintf (vect_dump, "not vectorized: vector stmt in loop:");
140
                  print_generic_expr (vect_dump, stmt, TDF_SLIM);
141
                }
142
              return false;
143
            }
144
 
145
          if (STMT_VINFO_VECTYPE (stmt_info))
146
            {
147
              vectype = STMT_VINFO_VECTYPE (stmt_info);
148
              scalar_type = TREE_TYPE (vectype);
149
            }
150
          else
151
            {
152
              if (STMT_VINFO_DATA_REF (stmt_info))
153
                scalar_type =
154
                        TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
155
              else if (TREE_CODE (stmt) == MODIFY_EXPR)
156
                scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
157
              else
158
                scalar_type = TREE_TYPE (stmt);
159
 
160
              if (vect_print_dump_info (REPORT_DETAILS))
161
                {
162
                  fprintf (vect_dump, "get vectype for scalar type:  ");
163
                  print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
164
                }
165
 
166
              vectype = get_vectype_for_scalar_type (scalar_type);
167
              if (!vectype)
168
                {
169
                  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
170
                    {
171
                      fprintf (vect_dump,
172
                               "not vectorized: unsupported data-type ");
173
                      print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
174
                    }
175
                  return false;
176
                }
177
              STMT_VINFO_VECTYPE (stmt_info) = vectype;
178
            }
179
 
180
          if (vect_print_dump_info (REPORT_DETAILS))
181
            {
182
              fprintf (vect_dump, "vectype: ");
183
              print_generic_expr (vect_dump, vectype, TDF_SLIM);
184
            }
185
 
186
          nunits = TYPE_VECTOR_SUBPARTS (vectype);
187
          if (vect_print_dump_info (REPORT_DETAILS))
188
            fprintf (vect_dump, "nunits = %d", nunits);
189
 
190
          if (vectorization_factor)
191
            {
192
              /* FORNOW: don't allow mixed units.
193
                 This restriction will be relaxed in the future.  */
194
              if (nunits != vectorization_factor)
195
                {
196
                  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
197
                    fprintf (vect_dump, "not vectorized: mixed data-types");
198
                  return false;
199
                }
200
            }
201
          else
202
            vectorization_factor = nunits;
203
 
204
          gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
205
                        * vectorization_factor == UNITS_PER_SIMD_WORD);
206
        }
207
    }
208
 
209
  /* TODO: Analyze cost. Decide if worth while to vectorize.  */
210
 
211
  if (vectorization_factor <= 1)
212
    {
213
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
214
        fprintf (vect_dump, "not vectorized: unsupported data-type");
215
      return false;
216
    }
217
  LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
218
 
219
  return true;
220
}
221
 
222
 
223
/* Function vect_analyze_operations.
224
 
225
   Scan the loop stmts and make sure they are all vectorizable.  */
226
 
227
static bool
228
vect_analyze_operations (loop_vec_info loop_vinfo)
229
{
230
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
231
  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
232
  int nbbs = loop->num_nodes;
233
  block_stmt_iterator si;
234
  unsigned int vectorization_factor = 0;
235
  int i;
236
  bool ok;
237
  tree phi;
238
  stmt_vec_info stmt_info;
239
  bool need_to_vectorize = false;
240
 
241
  if (vect_print_dump_info (REPORT_DETAILS))
242
    fprintf (vect_dump, "=== vect_analyze_operations ===");
243
 
244
  gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
245
  vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
246
 
247
  for (i = 0; i < nbbs; i++)
248
    {
249
      basic_block bb = bbs[i];
250
 
251
      for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
252
        {
253
          stmt_info = vinfo_for_stmt (phi);
254
          if (vect_print_dump_info (REPORT_DETAILS))
255
            {
256
              fprintf (vect_dump, "examining phi: ");
257
              print_generic_expr (vect_dump, phi, TDF_SLIM);
258
            }
259
 
260
          gcc_assert (stmt_info);
261
 
262
          if (STMT_VINFO_LIVE_P (stmt_info))
263
            {
264
              /* FORNOW: not yet supported.  */
265
              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
266
                fprintf (vect_dump, "not vectorized: value used after loop.");
267
            return false;
268
          }
269
 
270
          if (STMT_VINFO_RELEVANT_P (stmt_info))
271
            {
272
              /* Most likely a reduction-like computation that is used
273
                 in the loop.  */
274
              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
275
                fprintf (vect_dump, "not vectorized: unsupported pattern.");
276
             return false;
277
            }
278
        }
279
 
280
      for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
281
        {
282
          tree stmt = bsi_stmt (si);
283
          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
284
 
285
          if (vect_print_dump_info (REPORT_DETAILS))
286
            {
287
              fprintf (vect_dump, "==> examining statement: ");
288
              print_generic_expr (vect_dump, stmt, TDF_SLIM);
289
            }
290
 
291
          gcc_assert (stmt_info);
292
 
293
          /* skip stmts which do not need to be vectorized.
294
             this is expected to include:
295
             - the COND_EXPR which is the loop exit condition
296
             - any LABEL_EXPRs in the loop
297
             - computations that are used only for array indexing or loop
298
             control  */
299
 
300
          if (!STMT_VINFO_RELEVANT_P (stmt_info)
301
              && !STMT_VINFO_LIVE_P (stmt_info))
302
            {
303
              if (vect_print_dump_info (REPORT_DETAILS))
304
                fprintf (vect_dump, "irrelevant.");
305
              continue;
306
            }
307
 
308
          if (STMT_VINFO_RELEVANT_P (stmt_info))
309
            {
310
              gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
311
              gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
312
 
313
              ok = (vectorizable_operation (stmt, NULL, NULL)
314
                    || vectorizable_assignment (stmt, NULL, NULL)
315
                    || vectorizable_load (stmt, NULL, NULL)
316
                    || vectorizable_store (stmt, NULL, NULL)
317
                    || vectorizable_condition (stmt, NULL, NULL));
318
 
319
              if (!ok)
320
                {
321
                  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
322
                    {
323
                      fprintf (vect_dump,
324
                               "not vectorized: relevant stmt not supported: ");
325
                      print_generic_expr (vect_dump, stmt, TDF_SLIM);
326
                    }
327
                  return false;
328
                }
329
              need_to_vectorize = true;
330
            }
331
 
332
          if (STMT_VINFO_LIVE_P (stmt_info))
333
            {
334
              ok = vectorizable_reduction (stmt, NULL, NULL);
335
 
336
              if (ok)
337
                need_to_vectorize = true;
338
              else
339
                ok = vectorizable_live_operation (stmt, NULL, NULL);
340
 
341
              if (!ok)
342
                {
343
                  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
344
                    {
345
                      fprintf (vect_dump,
346
                               "not vectorized: live stmt not supported: ");
347
                      print_generic_expr (vect_dump, stmt, TDF_SLIM);
348
                    }
349
                  return false;
350
                }
351
            }
352
        } /* stmts in bb */
353
    } /* bbs */
354
 
355
  /* TODO: Analyze cost. Decide if worth while to vectorize.  */
356
 
357
  /* All operations in the loop are either irrelevant (deal with loop
358
     control, or dead), or only used outside the loop and can be moved
359
     out of the loop (e.g. invariants, inductions).  The loop can be
360
     optimized away by scalar optimizations.  We're better off not
361
     touching this loop.  */
362
  if (!need_to_vectorize)
363
    {
364
      if (vect_print_dump_info (REPORT_DETAILS))
365
        fprintf (vect_dump,
366
                 "All the computation can be taken out of the loop.");
367
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
368
        fprintf (vect_dump,
369
                 "not vectorized: redundant loop. no profit to vectorize.");
370
      return false;
371
    }
372
 
373
  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
374
      && vect_print_dump_info (REPORT_DETAILS))
375
    fprintf (vect_dump,
376
        "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
377
        vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
378
 
379
  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
380
      && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
381
    {
382
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
383
        fprintf (vect_dump, "not vectorized: iteration count too small.");
384
      return false;
385
    }
386
 
387
  if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
388
      || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
389
      || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
390
    {
391
      if (vect_print_dump_info (REPORT_DETAILS))
392
        fprintf (vect_dump, "epilog loop required.");
393
      if (!vect_can_advance_ivs_p (loop_vinfo))
394
        {
395
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
396
            fprintf (vect_dump,
397
                     "not vectorized: can't create epilog loop 1.");
398
          return false;
399
        }
400
      if (!slpeel_can_duplicate_loop_p (loop, loop->single_exit))
401
        {
402
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
403
            fprintf (vect_dump,
404
                     "not vectorized: can't create epilog loop 2.");
405
          return false;
406
        }
407
    }
408
 
409
  return true;
410
}
411
 
412
 
413
/* Function exist_non_indexing_operands_for_use_p
414
 
415
   USE is one of the uses attached to STMT. Check if USE is
416
   used in STMT for anything other than indexing an array.  */
417
 
418
static bool
419
exist_non_indexing_operands_for_use_p (tree use, tree stmt)
420
{
421
  tree operand;
422
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
423
 
424
  /* USE corresponds to some operand in STMT. If there is no data
425
     reference in STMT, then any operand that corresponds to USE
426
     is not indexing an array.  */
427
  if (!STMT_VINFO_DATA_REF (stmt_info))
428
    return true;
429
 
430
  /* STMT has a data_ref. FORNOW this means that its of one of
431
     the following forms:
432
     -1- ARRAY_REF = var
433
     -2- var = ARRAY_REF
434
     (This should have been verified in analyze_data_refs).
435
 
436
     'var' in the second case corresponds to a def, not a use,
437
     so USE cannot correspond to any operands that are not used
438
     for array indexing.
439
 
440
     Therefore, all we need to check is if STMT falls into the
441
     first case, and whether var corresponds to USE.  */
442
 
443
  if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
444
    return false;
445
 
446
  operand = TREE_OPERAND (stmt, 1);
447
 
448
  if (TREE_CODE (operand) != SSA_NAME)
449
    return false;
450
 
451
  if (operand == use)
452
    return true;
453
 
454
  return false;
455
}
456
 
457
 
458
/* Function vect_analyze_scalar_cycles.
459
 
460
   Examine the cross iteration def-use cycles of scalar variables, by
461
   analyzing the loop (scalar) PHIs; Classify each cycle as one of the
462
   following: invariant, induction, reduction, unknown.
463
 
464
   Some forms of scalar cycles are not yet supported.
465
 
466
   Example1: reduction: (unsupported yet)
467
 
468
              loop1:
469
              for (i=0; i<N; i++)
470
                 sum += a[i];
471
 
472
   Example2: induction: (unsupported yet)
473
 
474
              loop2:
475
              for (i=0; i<N; i++)
476
                 a[i] = i;
477
 
478
   Note: the following loop *is* vectorizable:
479
 
480
              loop3:
481
              for (i=0; i<N; i++)
482
                 a[i] = b[i];
483
 
484
         even though it has a def-use cycle caused by the induction variable i:
485
 
486
              loop: i_2 = PHI (i_0, i_1)
487
                    a[i_2] = ...;
488
                    i_1 = i_2 + 1;
489
                    GOTO loop;
490
 
491
         because the def-use cycle in loop3 is considered "not relevant" - i.e.,
492
         it does not need to be vectorized because it is only used for array
493
         indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
494
         loop2 on the other hand is relevant (it is being written to memory).
495
*/
496
 
497
static void
498
vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
499
{
500
  tree phi;
501
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
502
  basic_block bb = loop->header;
503
  tree dummy;
504
 
505
  if (vect_print_dump_info (REPORT_DETAILS))
506
    fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
507
 
508
  for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
509
    {
510
      tree access_fn = NULL;
511
      tree def = PHI_RESULT (phi);
512
      stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
513
      tree reduc_stmt;
514
 
515
      if (vect_print_dump_info (REPORT_DETAILS))
516
        {
517
          fprintf (vect_dump, "Analyze phi: ");
518
          print_generic_expr (vect_dump, phi, TDF_SLIM);
519
        }
520
 
521
      /* Skip virtual phi's. The data dependences that are associated with
522
         virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
523
 
524
      if (!is_gimple_reg (SSA_NAME_VAR (def)))
525
        {
526
          if (vect_print_dump_info (REPORT_DETAILS))
527
            fprintf (vect_dump, "virtual phi. skip.");
528
          continue;
529
        }
530
 
531
      STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
532
 
533
      /* Analyze the evolution function.  */
534
 
535
      access_fn = analyze_scalar_evolution (loop, def);
536
 
537
      if (!access_fn)
538
        continue;
539
 
540
      if (vect_print_dump_info (REPORT_DETAILS))
541
        {
542
           fprintf (vect_dump, "Access function of PHI: ");
543
           print_generic_expr (vect_dump, access_fn, TDF_SLIM);
544
        }
545
 
546
      if (vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy))
547
        {
548
          if (vect_print_dump_info (REPORT_DETAILS))
549
            fprintf (vect_dump, "Detected induction.");
550
          STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
551
          continue;
552
        }
553
 
554
      /* TODO: handle invariant phis  */
555
 
556
      reduc_stmt = vect_is_simple_reduction (loop, phi);
557
      if (reduc_stmt)
558
        {
559
          if (vect_print_dump_info (REPORT_DETAILS))
560
            fprintf (vect_dump, "Detected reduction.");
561
          STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
562
          STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
563
                                                        vect_reduction_def;
564
        }
565
      else
566
        if (vect_print_dump_info (REPORT_DETAILS))
567
          fprintf (vect_dump, "Unknown def-use cycle pattern.");
568
 
569
    }
570
 
571
  return;
572
}
573
 
574
 
575
/* Function vect_analyze_data_ref_dependence.
576
 
577
   Return TRUE if there (might) exist a dependence between a memory-reference
578
   DRA and a memory-reference DRB.  */
579
 
580
static bool
581
vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
582
                                  loop_vec_info loop_vinfo)
583
{
584
  unsigned int i;
585
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
586
  int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
587
  struct data_reference *dra = DDR_A (ddr);
588
  struct data_reference *drb = DDR_B (ddr);
589
  stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
590
  stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
591
  lambda_vector dist_v;
592
  unsigned int loop_depth;
593
 
594
  if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
595
    return false;
596
 
597
  if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
598
    {
599
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
600
        {
601
          fprintf (vect_dump,
602
                   "not vectorized: can't determine dependence between ");
603
          print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
604
          fprintf (vect_dump, " and ");
605
          print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
606
        }
607
      return true;
608
    }
609
 
610
  if (DDR_NUM_DIST_VECTS (ddr) == 0)
611
    {
612
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
613
        {
614
          fprintf (vect_dump, "not vectorized: bad dist vector for ");
615
          print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
616
          fprintf (vect_dump, " and ");
617
          print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
618
        }
619
      return true;
620
    }
621
 
622
  loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
623
  for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
624
    {
625
      int dist = dist_v[loop_depth];
626
 
627
      if (vect_print_dump_info (REPORT_DR_DETAILS))
628
        fprintf (vect_dump, "dependence distance  = %d.", dist);
629
 
630
      /* Same loop iteration.  */
631
      if (dist % vectorization_factor == 0)
632
        {
633
          /* Two references with distance zero have the same alignment.  */
634
          VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
635
          VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
636
          if (vect_print_dump_info (REPORT_ALIGNMENT))
637
            fprintf (vect_dump, "accesses have the same alignment.");
638
          if (vect_print_dump_info (REPORT_DR_DETAILS))
639
            {
640
              fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
641
              print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
642
              fprintf (vect_dump, " and ");
643
              print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
644
            }
645
          continue;
646
        }
647
 
648
      if (abs (dist) >= vectorization_factor)
649
        {
650
          /* Dependence distance does not create dependence, as far as vectorization
651
             is concerned, in this case.  */
652
          if (vect_print_dump_info (REPORT_DR_DETAILS))
653
            fprintf (vect_dump, "dependence distance >= VF.");
654
          continue;
655
        }
656
 
657
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
658
        {
659
          fprintf (vect_dump,
660
                   "not vectorized: possible dependence between data-refs ");
661
          print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
662
          fprintf (vect_dump, " and ");
663
          print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
664
        }
665
 
666
      return true;
667
    }
668
 
669
  return false;
670
}
671
 
672
 
673
/* Function vect_analyze_data_ref_dependences.
674
 
675
   Examine all the data references in the loop, and make sure there do not
676
   exist any data dependences between them.  */
677
 
678
static bool
679
vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
680
{
681
  unsigned int i;
682
  VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
683
  struct data_dependence_relation *ddr;
684
 
685
  if (vect_print_dump_info (REPORT_DETAILS))
686
    fprintf (vect_dump, "=== vect_analyze_dependences ===");
687
 
688
  for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
689
    if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
690
      return false;
691
 
692
  return true;
693
}
694
 
695
 
696
/* Function vect_compute_data_ref_alignment
697
 
698
   Compute the misalignment of the data reference DR.
699
 
700
   Output:
701
   1. If during the misalignment computation it is found that the data reference
702
      cannot be vectorized then false is returned.
703
   2. DR_MISALIGNMENT (DR) is defined.
704
 
705
   FOR NOW: No analysis is actually performed. Misalignment is calculated
706
   only for trivial cases. TODO.  */
707
 
708
static bool
709
vect_compute_data_ref_alignment (struct data_reference *dr)
710
{
711
  tree stmt = DR_STMT (dr);
712
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
713
  tree ref = DR_REF (dr);
714
  tree vectype;
715
  tree base, base_addr;
716
  bool base_aligned;
717
  tree misalign;
718
  tree aligned_to, alignment;
719
 
720
  if (vect_print_dump_info (REPORT_DETAILS))
721
    fprintf (vect_dump, "vect_compute_data_ref_alignment:");
722
 
723
  /* Initialize misalignment to unknown.  */
724
  DR_MISALIGNMENT (dr) = -1;
725
 
726
  misalign = DR_OFFSET_MISALIGNMENT (dr);
727
  aligned_to = DR_ALIGNED_TO (dr);
728
  base_addr = DR_BASE_ADDRESS (dr);
729
  base = build_fold_indirect_ref (base_addr);
730
  vectype = STMT_VINFO_VECTYPE (stmt_info);
731
  alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
732
 
733
  if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
734
      || !misalign)
735
    {
736
      if (vect_print_dump_info (REPORT_DETAILS))
737
        {
738
          fprintf (vect_dump, "Unknown alignment for access: ");
739
          print_generic_expr (vect_dump, base, TDF_SLIM);
740
        }
741
      return true;
742
    }
743
 
744
  if ((DECL_P (base)
745
       && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
746
                                alignment) >= 0)
747
      || (TREE_CODE (base_addr) == SSA_NAME
748
          && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
749
                                                      TREE_TYPE (base_addr)))),
750
                                   alignment) >= 0))
751
    base_aligned = true;
752
  else
753
    base_aligned = false;
754
 
755
  if (!base_aligned)
756
    {
757
      /* Do not change the alignment of global variables if
758
         flag_section_anchors is enabled.  */
759
      if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
760
          || (TREE_STATIC (base) && flag_section_anchors))
761
        {
762
          if (vect_print_dump_info (REPORT_DETAILS))
763
            {
764
              fprintf (vect_dump, "can't force alignment of ref: ");
765
              print_generic_expr (vect_dump, ref, TDF_SLIM);
766
            }
767
          return true;
768
        }
769
 
770
      /* Force the alignment of the decl.
771
         NOTE: This is the only change to the code we make during
772
         the analysis phase, before deciding to vectorize the loop.  */
773
      if (vect_print_dump_info (REPORT_DETAILS))
774
        fprintf (vect_dump, "force alignment");
775
      DECL_ALIGN (base) = TYPE_ALIGN (vectype);
776
      DECL_USER_ALIGN (base) = 1;
777
    }
778
 
779
  /* At this point we assume that the base is aligned.  */
780
  gcc_assert (base_aligned
781
              || (TREE_CODE (base) == VAR_DECL
782
                  && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
783
 
784
  /* Modulo alignment.  */
785
  misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
786
 
787
  if (!host_integerp (misalign, 1))
788
    {
789
      /* Negative or overflowed misalignment value.  */
790
      if (vect_print_dump_info (REPORT_DETAILS))
791
        fprintf (vect_dump, "unexpected misalign value");
792
      return false;
793
    }
794
 
795
  DR_MISALIGNMENT (dr) = TREE_INT_CST_LOW (misalign);
796
 
797
  if (vect_print_dump_info (REPORT_DETAILS))
798
    {
799
      fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
800
      print_generic_expr (vect_dump, ref, TDF_SLIM);
801
    }
802
 
803
  return true;
804
}
805
 
806
 
807
/* Function vect_compute_data_refs_alignment
808
 
809
   Compute the misalignment of data references in the loop.
810
   Return FALSE if a data reference is found that cannot be vectorized.  */
811
 
812
static bool
813
vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
814
{
815
  VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
816
  struct data_reference *dr;
817
  unsigned int i;
818
 
819
  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
820
    if (!vect_compute_data_ref_alignment (dr))
821
      return false;
822
 
823
  return true;
824
}
825
 
826
 
827
/* Function vect_update_misalignment_for_peel
828
 
829
   DR - the data reference whose misalignment is to be adjusted.
830
   DR_PEEL - the data reference whose misalignment is being made
831
             zero in the vector loop by the peel.
832
   NPEEL - the number of iterations in the peel loop if the misalignment
833
           of DR_PEEL is known at compile time.  */
834
 
835
static void
836
vect_update_misalignment_for_peel (struct data_reference *dr,
837
                                   struct data_reference *dr_peel, int npeel)
838
{
839
  unsigned int i;
840
  int drsize;
841
  VEC(dr_p,heap) *same_align_drs;
842
  struct data_reference *current_dr;
843
 
844
  if (known_alignment_for_access_p (dr)
845
      && DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr_peel))
846
    {
847
      DR_MISALIGNMENT (dr) = 0;
848
      return;
849
    }
850
 
851
  /* It can be assumed that the data refs with the same alignment as dr_peel
852
     are aligned in the vector loop.  */
853
  same_align_drs
854
    = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
855
  for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
856
    {
857
      if (current_dr != dr)
858
        continue;
859
      gcc_assert (DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr_peel));
860
      DR_MISALIGNMENT (dr) = 0;
861
      return;
862
    }
863
 
864
  if (known_alignment_for_access_p (dr)
865
      && known_alignment_for_access_p (dr_peel))
866
    {
867
      drsize = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
868
      DR_MISALIGNMENT (dr) += npeel * drsize;
869
      DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
870
      return;
871
    }
872
 
873
  DR_MISALIGNMENT (dr) = -1;
874
}
875
 
876
 
877
/* Function vect_verify_datarefs_alignment
878
 
879
   Return TRUE if all data references in the loop can be
880
   handled with respect to alignment.  */
881
 
882
static bool
883
vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
884
{
885
  VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
886
  struct data_reference *dr;
887
  enum dr_alignment_support supportable_dr_alignment;
888
  unsigned int i;
889
 
890
  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
891
    {
892
      supportable_dr_alignment = vect_supportable_dr_alignment (dr);
893
      if (!supportable_dr_alignment)
894
        {
895
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
896
            {
897
              if (DR_IS_READ (dr))
898
                fprintf (vect_dump,
899
                         "not vectorized: unsupported unaligned load.");
900
              else
901
                fprintf (vect_dump,
902
                         "not vectorized: unsupported unaligned store.");
903
            }
904
          return false;
905
        }
906
      if (supportable_dr_alignment != dr_aligned
907
          && vect_print_dump_info (REPORT_ALIGNMENT))
908
        fprintf (vect_dump, "Vectorizing an unaligned access.");
909
    }
910
  return true;
911
}
912
 
913
 
914
/* Function vector_alignment_reachable_p
915
 
916
   Return true if vector alignment for DR is reachable by peeling
917
   a few loop iterations.  Return false otherwise.  */
918
 
919
static bool
920
vector_alignment_reachable_p (struct data_reference *dr)
921
{
922
  tree stmt = DR_STMT (dr);
923
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
924
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
925
 
926
  /* If misalignment is known at the compile time then allow peeling
927
     only if natural alignment is reachable through peeling.  */
928
  if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
929
    {
930
      HOST_WIDE_INT elmsize =
931
                int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
932
      if (vect_print_dump_info (REPORT_DETAILS))
933
        {
934
          fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
935
          fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
936
        }
937
      if (DR_MISALIGNMENT (dr) % elmsize)
938
        {
939
          if (vect_print_dump_info (REPORT_DETAILS))
940
            fprintf (vect_dump, "data size does not divide the misalignment.\n");
941
          return false;
942
        }
943
    }
944
 
945
  if (!known_alignment_for_access_p (dr))
946
    {
947
      tree type = (TREE_TYPE (DR_REF (dr)));
948
      tree ba = DR_BASE_OBJECT (dr);
949
      bool is_packed = false;
950
 
951
      if (ba)
952
        is_packed = contains_packed_reference (ba);
953
 
954
      if (vect_print_dump_info (REPORT_DETAILS))
955
        fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
956
      if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
957
        return true;
958
      else
959
        return false;
960
    }
961
 
962
  return true;
963
}
964
 
965
/* Function vect_enhance_data_refs_alignment
966
 
967
   This pass will use loop versioning and loop peeling in order to enhance
968
   the alignment of data references in the loop.
969
 
970
   FOR NOW: we assume that whatever versioning/peeling takes place, only the
971
   original loop is to be vectorized; Any other loops that are created by
972
   the transformations performed in this pass - are not supposed to be
973
   vectorized. This restriction will be relaxed.
974
 
975
   This pass will require a cost model to guide it whether to apply peeling
976
   or versioning or a combination of the two. For example, the scheme that
977
   intel uses when given a loop with several memory accesses, is as follows:
978
   choose one memory access ('p') which alignment you want to force by doing
979
   peeling. Then, either (1) generate a loop in which 'p' is aligned and all
980
   other accesses are not necessarily aligned, or (2) use loop versioning to
981
   generate one loop in which all accesses are aligned, and another loop in
982
   which only 'p' is necessarily aligned.
983
 
984
   ("Automatic Intra-Register Vectorization for the Intel Architecture",
985
   Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
986
   Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
987
 
988
   Devising a cost model is the most critical aspect of this work. It will
989
   guide us on which access to peel for, whether to use loop versioning, how
990
   many versions to create, etc. The cost model will probably consist of
991
   generic considerations as well as target specific considerations (on
992
   powerpc for example, misaligned stores are more painful than misaligned
993
   loads).
994
 
995
   Here are the general steps involved in alignment enhancements:
996
 
997
     -- original loop, before alignment analysis:
998
        for (i=0; i<N; i++){
999
          x = q[i];                     # DR_MISALIGNMENT(q) = unknown
1000
          p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
1001
        }
1002
 
1003
     -- After vect_compute_data_refs_alignment:
1004
        for (i=0; i<N; i++){
1005
          x = q[i];                     # DR_MISALIGNMENT(q) = 3
1006
          p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
1007
        }
1008
 
1009
     -- Possibility 1: we do loop versioning:
1010
     if (p is aligned) {
1011
        for (i=0; i<N; i++){    # loop 1A
1012
          x = q[i];                     # DR_MISALIGNMENT(q) = 3
1013
          p[i] = y;                     # DR_MISALIGNMENT(p) = 0
1014
        }
1015
     }
1016
     else {
1017
        for (i=0; i<N; i++){    # loop 1B
1018
          x = q[i];                     # DR_MISALIGNMENT(q) = 3
1019
          p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
1020
        }
1021
     }
1022
 
1023
     -- Possibility 2: we do loop peeling:
1024
     for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
1025
        x = q[i];
1026
        p[i] = y;
1027
     }
1028
     for (i = 3; i < N; i++){   # loop 2A
1029
        x = q[i];                       # DR_MISALIGNMENT(q) = 0
1030
        p[i] = y;                       # DR_MISALIGNMENT(p) = unknown
1031
     }
1032
 
1033
     -- Possibility 3: combination of loop peeling and versioning:
1034
     for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
1035
        x = q[i];
1036
        p[i] = y;
1037
     }
1038
     if (p is aligned) {
1039
        for (i = 3; i<N; i++){  # loop 3A
1040
          x = q[i];                     # DR_MISALIGNMENT(q) = 0
1041
          p[i] = y;                     # DR_MISALIGNMENT(p) = 0
1042
        }
1043
     }
1044
     else {
1045
        for (i = 3; i<N; i++){  # loop 3B
1046
          x = q[i];                     # DR_MISALIGNMENT(q) = 0
1047
          p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
1048
        }
1049
     }
1050
 
1051
     These loops are later passed to loop_transform to be vectorized. The
1052
     vectorizer will use the alignment information to guide the transformation
1053
     (whether to generate regular loads/stores, or with special handling for
1054
     misalignment).  */
1055
 
1056
static bool
1057
vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1058
{
1059
  VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1060
  enum dr_alignment_support supportable_dr_alignment;
1061
  struct data_reference *dr0 = NULL;
1062
  struct data_reference *dr;
1063
  unsigned int i;
1064
  bool do_peeling = false;
1065
  bool do_versioning = false;
1066
  bool stat;
1067
 
1068
  /* While cost model enhancements are expected in the future, the high level
1069
     view of the code at this time is as follows:
1070
 
1071
     A) If there is a misaligned write then see if peeling to align this write
1072
        can make all data references satisfy vect_supportable_dr_alignment.
1073
        If so, update data structures as needed and return true.  Note that
1074
        at this time vect_supportable_dr_alignment is known to return false
1075
        for a a misaligned write.
1076
 
1077
     B) If peeling wasn't possible and there is a data reference with an
1078
        unknown misalignment that does not satisfy vect_supportable_dr_alignment
1079
        then see if loop versioning checks can be used to make all data
1080
        references satisfy vect_supportable_dr_alignment.  If so, update
1081
        data structures as needed and return true.
1082
 
1083
     C) If neither peeling nor versioning were successful then return false if
1084
        any data reference does not satisfy vect_supportable_dr_alignment.
1085
 
1086
     D) Return true (all data references satisfy vect_supportable_dr_alignment).
1087
 
1088
     Note, Possibility 3 above (which is peeling and versioning together) is not
1089
     being done at this time.  */
1090
 
1091
  /* (1) Peeling to force alignment.  */
1092
 
1093
  /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1094
     Considerations:
1095
     + How many accesses will become aligned due to the peeling
1096
     - How many accesses will become unaligned due to the peeling,
1097
       and the cost of misaligned accesses.
1098
     - The cost of peeling (the extra runtime checks, the increase
1099
       in code size).
1100
 
1101
     The scheme we use FORNOW: peel to force the alignment of the first
1102
     misaligned store in the loop.
1103
     Rationale: misaligned stores are not yet supported.
1104
 
1105
     TODO: Use a cost model.  */
1106
 
1107
  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1108
    if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1109
      {
1110
        do_peeling = vector_alignment_reachable_p (dr);
1111
        if (do_peeling)
1112
          dr0 = dr;
1113
        if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1114
          fprintf (vect_dump, "vector alignment may not be reachable");
1115
        break;
1116
      }
1117
 
1118
  /* Often peeling for alignment will require peeling for loop-bound, which in
1119
     turn requires that we know how to adjust the loop ivs after the loop.  */
1120
  if (!vect_can_advance_ivs_p (loop_vinfo))
1121
    do_peeling = false;
1122
 
1123
  if (do_peeling)
1124
    {
1125
      int mis;
1126
      int npeel = 0;
1127
 
1128
      if (known_alignment_for_access_p (dr0))
1129
        {
1130
          /* Since it's known at compile time, compute the number of iterations
1131
             in the peeled loop (the peeling factor) for use in updating
1132
             DR_MISALIGNMENT values.  The peeling factor is the vectorization
1133
             factor minus the misalignment as an element count.  */
1134
          mis = DR_MISALIGNMENT (dr0);
1135
          mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1136
          npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
1137
        }
1138
 
1139
      /* Ensure that all data refs can be vectorized after the peel.  */
1140
      for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1141
        {
1142
          int save_misalignment;
1143
 
1144
          if (dr == dr0)
1145
            continue;
1146
 
1147
          save_misalignment = DR_MISALIGNMENT (dr);
1148
          vect_update_misalignment_for_peel (dr, dr0, npeel);
1149
          supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1150
          DR_MISALIGNMENT (dr) = save_misalignment;
1151
 
1152
          if (!supportable_dr_alignment)
1153
            {
1154
              do_peeling = false;
1155
              break;
1156
            }
1157
        }
1158
 
1159
      if (do_peeling)
1160
        {
1161
          /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1162
             If the misalignment of DR_i is identical to that of dr0 then set
1163
             DR_MISALIGNMENT (DR_i) to zero.  If the misalignment of DR_i and
1164
             dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1165
             by the peeling factor times the element size of DR_i (MOD the
1166
             vectorization factor times the size).  Otherwise, the
1167
             misalignment of DR_i must be set to unknown.  */
1168
          for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1169
            if (dr != dr0)
1170
              vect_update_misalignment_for_peel (dr, dr0, npeel);
1171
 
1172
          LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1173
          LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1174
          DR_MISALIGNMENT (dr0) = 0;
1175
          if (vect_print_dump_info (REPORT_ALIGNMENT))
1176
            fprintf (vect_dump, "Alignment of access forced using peeling.");
1177
 
1178
          if (vect_print_dump_info (REPORT_DETAILS))
1179
            fprintf (vect_dump, "Peeling for alignment will be applied.");
1180
 
1181
          stat = vect_verify_datarefs_alignment (loop_vinfo);
1182
          gcc_assert (stat);
1183
          return stat;
1184
        }
1185
    }
1186
 
1187
 
1188
  /* (2) Versioning to force alignment.  */
1189
 
1190
  /* Try versioning if:
1191
     1) flag_tree_vect_loop_version is TRUE
1192
     2) optimize_size is FALSE
1193
     3) there is at least one unsupported misaligned data ref with an unknown
1194
        misalignment, and
1195
     4) all misaligned data refs with a known misalignment are supported, and
1196
     5) the number of runtime alignment checks is within reason.  */
1197
 
1198
  do_versioning = flag_tree_vect_loop_version && (!optimize_size);
1199
 
1200
  if (do_versioning)
1201
    {
1202
      for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1203
        {
1204
          if (aligned_access_p (dr))
1205
            continue;
1206
 
1207
          supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1208
 
1209
          if (!supportable_dr_alignment)
1210
            {
1211
              tree stmt;
1212
              int mask;
1213
              tree vectype;
1214
 
1215
              if (known_alignment_for_access_p (dr)
1216
                  || VEC_length (tree,
1217
                                 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1218
                     >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS))
1219
                {
1220
                  do_versioning = false;
1221
                  break;
1222
                }
1223
 
1224
              stmt = DR_STMT (dr);
1225
              vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1226
              gcc_assert (vectype);
1227
 
1228
              /* The rightmost bits of an aligned address must be zeros.
1229
                 Construct the mask needed for this test.  For example,
1230
                 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1231
                 mask must be 15 = 0xf. */
1232
              mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1233
 
1234
              /* FORNOW: use the same mask to test all potentially unaligned
1235
                 references in the loop.  The vectorizer currently supports
1236
                 a single vector size, see the reference to
1237
                 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1238
                 vectorization factor is computed.  */
1239
              gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1240
                          || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1241
              LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1242
              VEC_safe_push (tree, heap,
1243
                             LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1244
                             DR_STMT (dr));
1245
            }
1246
        }
1247
 
1248
      /* Versioning requires at least one misaligned data reference.  */
1249
      if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
1250
        do_versioning = false;
1251
      else if (!do_versioning)
1252
        VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1253
    }
1254
 
1255
  if (do_versioning)
1256
    {
1257
      VEC(tree,heap) *may_misalign_stmts
1258
        = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1259
      tree stmt;
1260
 
1261
      /* It can now be assumed that the data references in the statements
1262
         in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1263
         of the loop being vectorized.  */
1264
      for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
1265
        {
1266
          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1267
          dr = STMT_VINFO_DATA_REF (stmt_info);
1268
          DR_MISALIGNMENT (dr) = 0;
1269
          if (vect_print_dump_info (REPORT_ALIGNMENT))
1270
            fprintf (vect_dump, "Alignment of access forced using versioning.");
1271
        }
1272
 
1273
      if (vect_print_dump_info (REPORT_DETAILS))
1274
        fprintf (vect_dump, "Versioning for alignment will be applied.");
1275
 
1276
      /* Peeling and versioning can't be done together at this time.  */
1277
      gcc_assert (! (do_peeling && do_versioning));
1278
 
1279
      stat = vect_verify_datarefs_alignment (loop_vinfo);
1280
      gcc_assert (stat);
1281
      return stat;
1282
    }
1283
 
1284
  /* This point is reached if neither peeling nor versioning is being done.  */
1285
  gcc_assert (! (do_peeling || do_versioning));
1286
 
1287
  stat = vect_verify_datarefs_alignment (loop_vinfo);
1288
  return stat;
1289
}
1290
 
1291
 
1292
/* Function vect_analyze_data_refs_alignment
1293
 
1294
   Analyze the alignment of the data-references in the loop.
1295
   Return FALSE if a data reference is found that cannot be vectorized.  */
1296
 
1297
static bool
1298
vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1299
{
1300
  if (vect_print_dump_info (REPORT_DETAILS))
1301
    fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1302
 
1303
  if (!vect_compute_data_refs_alignment (loop_vinfo))
1304
    {
1305
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1306
        fprintf (vect_dump,
1307
                 "not vectorized: can't calculate alignment for data ref.");
1308
      return false;
1309
    }
1310
 
1311
  return true;
1312
}
1313
 
1314
 
1315
/* Function vect_analyze_data_ref_access.
1316
 
1317
   Analyze the access pattern of the data-reference DR. For now, a data access
1318
   has to be consecutive to be considered vectorizable.  */
1319
 
1320
static bool
1321
vect_analyze_data_ref_access (struct data_reference *dr)
1322
{
1323
  tree step = DR_STEP (dr);
1324
  tree scalar_type = TREE_TYPE (DR_REF (dr));
1325
 
1326
  if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1327
    {
1328
      if (vect_print_dump_info (REPORT_DETAILS))
1329
        fprintf (vect_dump, "not consecutive access");
1330
      return false;
1331
    }
1332
  return true;
1333
}
1334
 
1335
 
1336
/* Function vect_analyze_data_ref_accesses.
1337
 
1338
   Analyze the access pattern of all the data references in the loop.
1339
 
1340
   FORNOW: the only access pattern that is considered vectorizable is a
1341
           simple step 1 (consecutive) access.
1342
 
1343
   FORNOW: handle only arrays and pointer accesses.  */
1344
 
1345
static bool
1346
vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1347
{
1348
  unsigned int i;
1349
  VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1350
  struct data_reference *dr;
1351
 
1352
  if (vect_print_dump_info (REPORT_DETAILS))
1353
    fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1354
 
1355
  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1356
    if (!vect_analyze_data_ref_access (dr))
1357
      {
1358
        if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1359
          fprintf (vect_dump, "not vectorized: complicated access pattern.");
1360
        return false;
1361
      }
1362
 
1363
  return true;
1364
}
1365
 
1366
 
1367
/* Function vect_analyze_data_refs.
1368
 
1369
  Find all the data references in the loop.
1370
 
1371
   The general structure of the analysis of data refs in the vectorizer is as
1372
   follows:
1373
   1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1374
      find and analyze all data-refs in the loop and their dependences.
1375
   2- vect_analyze_dependences(): apply dependence testing using ddrs.
1376
   3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1377
   4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1378
 
1379
*/
1380
 
1381
static bool
1382
vect_analyze_data_refs (loop_vec_info loop_vinfo)
1383
{
1384
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1385
  unsigned int i;
1386
  VEC (data_reference_p, heap) *datarefs;
1387
  struct data_reference *dr;
1388
  tree scalar_type;
1389
 
1390
  if (vect_print_dump_info (REPORT_DETAILS))
1391
    fprintf (vect_dump, "=== vect_analyze_data_refs ===");
1392
 
1393
  compute_data_dependences_for_loop (loop, false,
1394
                                     &LOOP_VINFO_DATAREFS (loop_vinfo),
1395
                                     &LOOP_VINFO_DDRS (loop_vinfo));
1396
 
1397
  /* Go through the data-refs, check that the analysis succeeded. Update pointer
1398
     from stmt_vec_info struct to DR and vectype.  */
1399
  datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1400
 
1401
  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1402
    {
1403
      tree stmt;
1404
      stmt_vec_info stmt_info;
1405
 
1406
      if (!dr || !DR_REF (dr))
1407
        {
1408
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1409
            fprintf (vect_dump, "not vectorized: unhandled data-ref ");
1410
          return false;
1411
        }
1412
 
1413
      /* Update DR field in stmt_vec_info struct.  */
1414
      stmt = DR_STMT (dr);
1415
      stmt_info = vinfo_for_stmt (stmt);
1416
 
1417
      if (STMT_VINFO_DATA_REF (stmt_info))
1418
        {
1419
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1420
            {
1421
              fprintf (vect_dump,
1422
                       "not vectorized: more than one data ref in stmt: ");
1423
              print_generic_expr (vect_dump, stmt, TDF_SLIM);
1424
            }
1425
          return false;
1426
        }
1427
      STMT_VINFO_DATA_REF (stmt_info) = dr;
1428
 
1429
      /* Check that analysis of the data-ref succeeded.  */
1430
      if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
1431
          || !DR_STEP (dr))
1432
        {
1433
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1434
            {
1435
              fprintf (vect_dump, "not vectorized: data ref analysis failed ");
1436
              print_generic_expr (vect_dump, stmt, TDF_SLIM);
1437
            }
1438
          return false;
1439
        }
1440
      if (!DR_MEMTAG (dr))
1441
        {
1442
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1443
            {
1444
              fprintf (vect_dump, "not vectorized: no memory tag for ");
1445
              print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1446
            }
1447
          return false;
1448
        }
1449
 
1450
      /* Set vectype for STMT.  */
1451
      scalar_type = TREE_TYPE (DR_REF (dr));
1452
      STMT_VINFO_VECTYPE (stmt_info) =
1453
                get_vectype_for_scalar_type (scalar_type);
1454
      if (!STMT_VINFO_VECTYPE (stmt_info))
1455
        {
1456
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1457
            {
1458
              fprintf (vect_dump,
1459
                       "not vectorized: no vectype for stmt: ");
1460
              print_generic_expr (vect_dump, stmt, TDF_SLIM);
1461
              fprintf (vect_dump, " scalar_type: ");
1462
              print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
1463
            }
1464
          return false;
1465
        }
1466
    }
1467
 
1468
  return true;
1469
}
1470
 
1471
 
1472
/* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
1473
 
1474
/* Function vect_mark_relevant.
1475
 
1476
   Mark STMT as "relevant for vectorization" and add it to WORKLIST.  */
1477
 
1478
static void
1479
vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
1480
                    bool relevant_p, bool live_p)
1481
{
1482
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1483
  bool save_relevant_p = STMT_VINFO_RELEVANT_P (stmt_info);
1484
  bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
1485
 
1486
  if (vect_print_dump_info (REPORT_DETAILS))
1487
    fprintf (vect_dump, "mark relevant %d, live %d.",relevant_p, live_p);
1488
 
1489
  if (STMT_VINFO_IN_PATTERN_P (stmt_info))
1490
    {
1491
      tree pattern_stmt;
1492
 
1493
      /* This is the last stmt in a sequence that was detected as a
1494
         pattern that can potentially be vectorized.  Don't mark the stmt
1495
         as relevant/live because it's not going to vectorized.
1496
         Instead mark the pattern-stmt that replaces it.  */
1497
      if (vect_print_dump_info (REPORT_DETAILS))
1498
        fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
1499
      pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1500
      stmt_info = vinfo_for_stmt (pattern_stmt);
1501
      gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
1502
      save_relevant_p = STMT_VINFO_RELEVANT_P (stmt_info);
1503
      save_live_p = STMT_VINFO_LIVE_P (stmt_info);
1504
      stmt = pattern_stmt;
1505
    }
1506
 
1507
  STMT_VINFO_LIVE_P (stmt_info) |= live_p;
1508
  STMT_VINFO_RELEVANT_P (stmt_info) |= relevant_p;
1509
 
1510
  if (TREE_CODE (stmt) == PHI_NODE)
1511
    /* Don't put phi-nodes in the worklist. Phis that are marked relevant
1512
       or live will fail vectorization later on.  */
1513
    return;
1514
 
1515
  if (STMT_VINFO_RELEVANT_P (stmt_info) == save_relevant_p
1516
      && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
1517
    {
1518
      if (vect_print_dump_info (REPORT_DETAILS))
1519
        fprintf (vect_dump, "already marked relevant/live.");
1520
      return;
1521
    }
1522
 
1523
  VEC_safe_push (tree, heap, *worklist, stmt);
1524
}
1525
 
1526
 
1527
/* Function vect_stmt_relevant_p.
1528
 
1529
   Return true if STMT in loop that is represented by LOOP_VINFO is
1530
   "relevant for vectorization".
1531
 
1532
   A stmt is considered "relevant for vectorization" if:
1533
   - it has uses outside the loop.
1534
   - it has vdefs (it alters memory).
1535
   - control stmts in the loop (except for the exit condition).
1536
 
1537
   CHECKME: what other side effects would the vectorizer allow?  */
1538
 
1539
static bool
1540
vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
1541
                      bool *relevant_p, bool *live_p)
1542
{
1543
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1544
  ssa_op_iter op_iter;
1545
  imm_use_iterator imm_iter;
1546
  use_operand_p use_p;
1547
  def_operand_p def_p;
1548
 
1549
  *relevant_p = false;
1550
  *live_p = false;
1551
 
1552
  /* cond stmt other than loop exit cond.  */
1553
  if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
1554
    *relevant_p = true;
1555
 
1556
  /* changing memory.  */
1557
  if (TREE_CODE (stmt) != PHI_NODE)
1558
    if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
1559
      {
1560
        if (vect_print_dump_info (REPORT_DETAILS))
1561
          fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
1562
        *relevant_p = true;
1563
      }
1564
 
1565
  /* uses outside the loop.  */
1566
  FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
1567
    {
1568
      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
1569
        {
1570
          basic_block bb = bb_for_stmt (USE_STMT (use_p));
1571
          if (!flow_bb_inside_loop_p (loop, bb))
1572
            {
1573
              if (vect_print_dump_info (REPORT_DETAILS))
1574
                fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
1575
 
1576
              /* We expect all such uses to be in the loop exit phis
1577
                 (because of loop closed form)   */
1578
              gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
1579
              gcc_assert (bb == loop->single_exit->dest);
1580
 
1581
              *live_p = true;
1582
            }
1583
        }
1584
    }
1585
 
1586
  return (*live_p || *relevant_p);
1587
}
1588
 
1589
 
1590
/* Function vect_mark_stmts_to_be_vectorized.
1591
 
1592
   Not all stmts in the loop need to be vectorized. For example:
1593
 
1594
     for i...
1595
       for j...
1596
   1.    T0 = i + j
1597
   2.    T1 = a[T0]
1598
 
1599
   3.    j = j + 1
1600
 
1601
   Stmt 1 and 3 do not need to be vectorized, because loop control and
1602
   addressing of vectorized data-refs are handled differently.
1603
 
1604
   This pass detects such stmts.  */
1605
 
1606
static bool
1607
vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
1608
{
1609
  VEC(tree,heap) *worklist;
1610
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1611
  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1612
  unsigned int nbbs = loop->num_nodes;
1613
  block_stmt_iterator si;
1614
  tree stmt, use;
1615
  stmt_ann_t ann;
1616
  ssa_op_iter iter;
1617
  unsigned int i;
1618
  stmt_vec_info stmt_vinfo;
1619
  basic_block bb;
1620
  tree phi;
1621
  bool relevant_p, live_p;
1622
  tree def, def_stmt;
1623
  enum vect_def_type dt;
1624
 
1625
  if (vect_print_dump_info (REPORT_DETAILS))
1626
    fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
1627
 
1628
  worklist = VEC_alloc (tree, heap, 64);
1629
 
1630
  /* 1. Init worklist.  */
1631
 
1632
  bb = loop->header;
1633
  for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1634
    {
1635
      if (vect_print_dump_info (REPORT_DETAILS))
1636
        {
1637
          fprintf (vect_dump, "init: phi relevant? ");
1638
          print_generic_expr (vect_dump, phi, TDF_SLIM);
1639
        }
1640
 
1641
      if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant_p, &live_p))
1642
        vect_mark_relevant (&worklist, phi, relevant_p, live_p);
1643
    }
1644
 
1645
  for (i = 0; i < nbbs; i++)
1646
    {
1647
      bb = bbs[i];
1648
      for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1649
        {
1650
          stmt = bsi_stmt (si);
1651
 
1652
          if (vect_print_dump_info (REPORT_DETAILS))
1653
            {
1654
              fprintf (vect_dump, "init: stmt relevant? ");
1655
              print_generic_expr (vect_dump, stmt, TDF_SLIM);
1656
            }
1657
 
1658
          if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant_p, &live_p))
1659
            vect_mark_relevant (&worklist, stmt, relevant_p, live_p);
1660
        }
1661
    }
1662
 
1663
 
1664
  /* 2. Process_worklist */
1665
 
1666
  while (VEC_length (tree, worklist) > 0)
1667
    {
1668
      stmt = VEC_pop (tree, worklist);
1669
 
1670
      if (vect_print_dump_info (REPORT_DETAILS))
1671
        {
1672
          fprintf (vect_dump, "worklist: examine stmt: ");
1673
          print_generic_expr (vect_dump, stmt, TDF_SLIM);
1674
        }
1675
 
1676
      /* Examine the USEs of STMT. For each ssa-name USE thta is defined
1677
         in the loop, mark the stmt that defines it (DEF_STMT) as
1678
         relevant/irrelevant and live/dead according to the liveness and
1679
         relevance properties of STMT.
1680
       */
1681
 
1682
      gcc_assert (TREE_CODE (stmt) != PHI_NODE);
1683
 
1684
      ann = stmt_ann (stmt);
1685
      stmt_vinfo = vinfo_for_stmt (stmt);
1686
 
1687
      relevant_p = STMT_VINFO_RELEVANT_P (stmt_vinfo);
1688
      live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
1689
 
1690
      /* Generally, the liveness and relevance properties of STMT are
1691
         propagated to the DEF_STMTs of its USEs:
1692
             STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
1693
             STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- relevant_p
1694
 
1695
         Exceptions:
1696
 
1697
         (case 1)
1698
           If USE is used only for address computations (e.g. array indexing),
1699
           which does not need to be directly vectorized, then the
1700
           liveness/relevance of the respective DEF_STMT is left unchanged.
1701
 
1702
         (case 2)
1703
           If STMT has been identified as defining a reduction variable, then
1704
           we have two cases:
1705
           (case 2.1)
1706
             The last use of STMT is the reduction-variable, which is defined
1707
             by a loop-header-phi. We don't want to mark the phi as live or
1708
             relevant (because it does not need to be vectorized, it is handled
1709
             as part of the vectorization of the reduction), so in this case we
1710
             skip the call to vect_mark_relevant.
1711
           (case 2.2)
1712
             The rest of the uses of STMT are defined in the loop body. For
1713
             the def_stmt of these uses we want to set liveness/relevance
1714
             as follows:
1715
               STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
1716
               STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- true
1717
             because even though STMT is classified as live (since it defines a
1718
             value that is used across loop iterations) and irrelevant (since it
1719
             is not used inside the loop), it will be vectorized, and therefore
1720
             the corresponding DEF_STMTs need to marked as relevant.
1721
       */
1722
 
1723
      /* case 2.2:  */
1724
      if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
1725
        {
1726
          gcc_assert (!relevant_p && live_p);
1727
          relevant_p = true;
1728
          live_p = false;
1729
        }
1730
 
1731
      FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1732
        {
1733
          /* case 1: we are only interested in uses that need to be vectorized.
1734
             Uses that are used for address computation are not considered
1735
             relevant.
1736
           */
1737
          if (!exist_non_indexing_operands_for_use_p (use, stmt))
1738
            continue;
1739
 
1740
          if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
1741
            {
1742
              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1743
                fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
1744
              VEC_free (tree, heap, worklist);
1745
              return false;
1746
            }
1747
 
1748
          if (!def_stmt || IS_EMPTY_STMT (def_stmt))
1749
            continue;
1750
 
1751
          if (vect_print_dump_info (REPORT_DETAILS))
1752
            {
1753
              fprintf (vect_dump, "worklist: examine use %d: ", i);
1754
              print_generic_expr (vect_dump, use, TDF_SLIM);
1755
            }
1756
 
1757
          bb = bb_for_stmt (def_stmt);
1758
          if (!flow_bb_inside_loop_p (loop, bb))
1759
            continue;
1760
 
1761
          /* case 2.1: the reduction-use does not mark the defining-phi
1762
             as relevant.  */
1763
          if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
1764
              && TREE_CODE (def_stmt) == PHI_NODE)
1765
            continue;
1766
 
1767
          vect_mark_relevant (&worklist, def_stmt, relevant_p, live_p);
1768
        }
1769
    }                           /* while worklist */
1770
 
1771
  VEC_free (tree, heap, worklist);
1772
  return true;
1773
}
1774
 
1775
 
1776
/* Function vect_can_advance_ivs_p
1777
 
1778
   In case the number of iterations that LOOP iterates is unknown at compile
1779
   time, an epilog loop will be generated, and the loop induction variables
1780
   (IVs) will be "advanced" to the value they are supposed to take just before
1781
   the epilog loop.  Here we check that the access function of the loop IVs
1782
   and the expression that represents the loop bound are simple enough.
1783
   These restrictions will be relaxed in the future.  */
1784
 
1785
static bool
1786
vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1787
{
1788
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1789
  basic_block bb = loop->header;
1790
  tree phi;
1791
 
1792
  /* Analyze phi functions of the loop header.  */
1793
 
1794
  if (vect_print_dump_info (REPORT_DETAILS))
1795
    fprintf (vect_dump, "=== vect_can_advance_ivs_p ===");
1796
 
1797
  for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1798
    {
1799
      tree access_fn = NULL;
1800
      tree evolution_part;
1801
 
1802
      if (vect_print_dump_info (REPORT_DETAILS))
1803
        {
1804
          fprintf (vect_dump, "Analyze phi: ");
1805
          print_generic_expr (vect_dump, phi, TDF_SLIM);
1806
        }
1807
 
1808
      /* Skip virtual phi's. The data dependences that are associated with
1809
         virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
1810
 
1811
      if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1812
        {
1813
          if (vect_print_dump_info (REPORT_DETAILS))
1814
            fprintf (vect_dump, "virtual phi. skip.");
1815
          continue;
1816
        }
1817
 
1818
      /* Skip reduction phis.  */
1819
 
1820
      if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1821
        {
1822
          if (vect_print_dump_info (REPORT_DETAILS))
1823
            fprintf (vect_dump, "reduc phi. skip.");
1824
          continue;
1825
        }
1826
 
1827
      /* Analyze the evolution function.  */
1828
 
1829
      access_fn = instantiate_parameters
1830
        (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1831
 
1832
      if (!access_fn)
1833
        {
1834
          if (vect_print_dump_info (REPORT_DETAILS))
1835
            fprintf (vect_dump, "No Access function.");
1836
          return false;
1837
        }
1838
 
1839
      if (vect_print_dump_info (REPORT_DETAILS))
1840
        {
1841
          fprintf (vect_dump, "Access function of PHI: ");
1842
          print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1843
        }
1844
 
1845
      evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1846
 
1847
      if (evolution_part == NULL_TREE)
1848
        {
1849
          if (vect_print_dump_info (REPORT_DETAILS))
1850
            fprintf (vect_dump, "No evolution.");
1851
          return false;
1852
        }
1853
 
1854
      /* FORNOW: We do not transform initial conditions of IVs
1855
         which evolution functions are a polynomial of degree >= 2.  */
1856
 
1857
      if (tree_is_chrec (evolution_part))
1858
        return false;
1859
    }
1860
 
1861
  return true;
1862
}
1863
 
1864
 
1865
/* Function vect_get_loop_niters.
1866
 
1867
   Determine how many iterations the loop is executed.
1868
   If an expression that represents the number of iterations
1869
   can be constructed, place it in NUMBER_OF_ITERATIONS.
1870
   Return the loop exit condition.  */
1871
 
1872
static tree
1873
vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
1874
{
1875
  tree niters;
1876
 
1877
  if (vect_print_dump_info (REPORT_DETAILS))
1878
    fprintf (vect_dump, "=== get_loop_niters ===");
1879
 
1880
  niters = number_of_iterations_in_loop (loop);
1881
 
1882
  if (niters != NULL_TREE
1883
      && niters != chrec_dont_know)
1884
    {
1885
      *number_of_iterations = niters;
1886
 
1887
      if (vect_print_dump_info (REPORT_DETAILS))
1888
        {
1889
          fprintf (vect_dump, "==> get_loop_niters:" );
1890
          print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
1891
        }
1892
    }
1893
 
1894
  return get_loop_exit_condition (loop);
1895
}
1896
 
1897
 
1898
/* Function vect_analyze_loop_form.
1899
 
1900
   Verify the following restrictions (some may be relaxed in the future):
1901
   - it's an inner-most loop
1902
   - number of BBs = 2 (which are the loop header and the latch)
1903
   - the loop has a pre-header
1904
   - the loop has a single entry and exit
1905
   - the loop exit condition is simple enough, and the number of iterations
1906
     can be analyzed (a countable loop).  */
1907
 
1908
static loop_vec_info
1909
vect_analyze_loop_form (struct loop *loop)
1910
{
1911
  loop_vec_info loop_vinfo;
1912
  tree loop_cond;
1913
  tree number_of_iterations = NULL;
1914
 
1915
  if (vect_print_dump_info (REPORT_DETAILS))
1916
    fprintf (vect_dump, "=== vect_analyze_loop_form ===");
1917
 
1918
  if (loop->inner)
1919
    {
1920
      if (vect_print_dump_info (REPORT_OUTER_LOOPS))
1921
        fprintf (vect_dump, "not vectorized: nested loop.");
1922
      return NULL;
1923
    }
1924
 
1925
  if (!loop->single_exit
1926
      || loop->num_nodes != 2
1927
      || EDGE_COUNT (loop->header->preds) != 2)
1928
    {
1929
      if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1930
        {
1931
          if (!loop->single_exit)
1932
            fprintf (vect_dump, "not vectorized: multiple exits.");
1933
          else if (loop->num_nodes != 2)
1934
            fprintf (vect_dump, "not vectorized: too many BBs in loop.");
1935
          else if (EDGE_COUNT (loop->header->preds) != 2)
1936
            fprintf (vect_dump, "not vectorized: too many incoming edges.");
1937
        }
1938
 
1939
      return NULL;
1940
    }
1941
 
1942
  /* We assume that the loop exit condition is at the end of the loop. i.e,
1943
     that the loop is represented as a do-while (with a proper if-guard
1944
     before the loop if needed), where the loop header contains all the
1945
     executable statements, and the latch is empty.  */
1946
  if (!empty_block_p (loop->latch)
1947
        || phi_nodes (loop->latch))
1948
    {
1949
      if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1950
        fprintf (vect_dump, "not vectorized: unexpected loop form.");
1951
      return NULL;
1952
    }
1953
 
1954
  /* Make sure there exists a single-predecessor exit bb:  */
1955
  if (!single_pred_p (loop->single_exit->dest))
1956
    {
1957
      edge e = loop->single_exit;
1958
      if (!(e->flags & EDGE_ABNORMAL))
1959
        {
1960
          split_loop_exit_edge (e);
1961
          if (vect_print_dump_info (REPORT_DETAILS))
1962
            fprintf (vect_dump, "split exit edge.");
1963
        }
1964
      else
1965
        {
1966
          if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1967
            fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
1968
          return NULL;
1969
        }
1970
    }
1971
 
1972
  if (empty_block_p (loop->header))
1973
    {
1974
      if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1975
        fprintf (vect_dump, "not vectorized: empty loop.");
1976
      return NULL;
1977
    }
1978
 
1979
  loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
1980
  if (!loop_cond)
1981
    {
1982
      if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1983
        fprintf (vect_dump, "not vectorized: complicated exit condition.");
1984
      return NULL;
1985
    }
1986
 
1987
  if (!number_of_iterations)
1988
    {
1989
      if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1990
        fprintf (vect_dump,
1991
                 "not vectorized: number of iterations cannot be computed.");
1992
      return NULL;
1993
    }
1994
 
1995
  if (chrec_contains_undetermined (number_of_iterations))
1996
    {
1997
      if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1998
        fprintf (vect_dump, "Infinite number of iterations.");
1999
      return false;
2000
    }
2001
 
2002
  loop_vinfo = new_loop_vec_info (loop);
2003
  LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
2004
 
2005
  if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2006
    {
2007
      if (vect_print_dump_info (REPORT_DETAILS))
2008
        {
2009
          fprintf (vect_dump, "Symbolic number of iterations is ");
2010
          print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
2011
        }
2012
    }
2013
  else
2014
  if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
2015
    {
2016
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2017
        fprintf (vect_dump, "not vectorized: number of iterations = 0.");
2018
      return NULL;
2019
    }
2020
 
2021
  LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
2022
 
2023
  return loop_vinfo;
2024
}
2025
 
2026
 
2027
/* Function vect_analyze_loop.
2028
 
2029
   Apply a set of analyses on LOOP, and create a loop_vec_info struct
2030
   for it. The different analyses will record information in the
2031
   loop_vec_info struct.  */
2032
loop_vec_info
2033
vect_analyze_loop (struct loop *loop)
2034
{
2035
  bool ok;
2036
  loop_vec_info loop_vinfo;
2037
 
2038
  if (vect_print_dump_info (REPORT_DETAILS))
2039
    fprintf (vect_dump, "===== analyze_loop_nest =====");
2040
 
2041
  /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
2042
 
2043
  loop_vinfo = vect_analyze_loop_form (loop);
2044
  if (!loop_vinfo)
2045
    {
2046
      if (vect_print_dump_info (REPORT_DETAILS))
2047
        fprintf (vect_dump, "bad loop form.");
2048
      return NULL;
2049
    }
2050
 
2051
  /* Find all data references in the loop (which correspond to vdefs/vuses)
2052
     and analyze their evolution in the loop.
2053
 
2054
     FORNOW: Handle only simple, array references, which
2055
     alignment can be forced, and aligned pointer-references.  */
2056
 
2057
  ok = vect_analyze_data_refs (loop_vinfo);
2058
  if (!ok)
2059
    {
2060
      if (vect_print_dump_info (REPORT_DETAILS))
2061
        fprintf (vect_dump, "bad data references.");
2062
      destroy_loop_vec_info (loop_vinfo);
2063
      return NULL;
2064
    }
2065
 
2066
  /* Classify all cross-iteration scalar data-flow cycles.
2067
     Cross-iteration cycles caused by virtual phis are analyzed separately.  */
2068
 
2069
  vect_analyze_scalar_cycles (loop_vinfo);
2070
 
2071
  vect_pattern_recog (loop_vinfo);
2072
 
2073
  /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
2074
 
2075
  ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2076
  if (!ok)
2077
    {
2078
      if (vect_print_dump_info (REPORT_DETAILS))
2079
        fprintf (vect_dump, "unexpected pattern.");
2080
      destroy_loop_vec_info (loop_vinfo);
2081
      return NULL;
2082
    }
2083
 
2084
  /* Analyze the alignment of the data-refs in the loop.
2085
     Fail if a data reference is found that cannot be vectorized.  */
2086
 
2087
  ok = vect_analyze_data_refs_alignment (loop_vinfo);
2088
  if (!ok)
2089
    {
2090
      if (vect_print_dump_info (REPORT_DETAILS))
2091
        fprintf (vect_dump, "bad data alignment.");
2092
      destroy_loop_vec_info (loop_vinfo);
2093
      return NULL;
2094
    }
2095
 
2096
  ok = vect_determine_vectorization_factor (loop_vinfo);
2097
  if (!ok)
2098
    {
2099
      if (vect_print_dump_info (REPORT_DETAILS))
2100
        fprintf (vect_dump, "can't determine vectorization factor.");
2101
      destroy_loop_vec_info (loop_vinfo);
2102
      return NULL;
2103
    }
2104
 
2105
  /* Analyze data dependences between the data-refs in the loop.
2106
     FORNOW: fail at the first data dependence that we encounter.  */
2107
 
2108
  ok = vect_analyze_data_ref_dependences (loop_vinfo);
2109
  if (!ok)
2110
    {
2111
      if (vect_print_dump_info (REPORT_DETAILS))
2112
        fprintf (vect_dump, "bad data dependence.");
2113
      destroy_loop_vec_info (loop_vinfo);
2114
      return NULL;
2115
    }
2116
 
2117
  /* Analyze the access patterns of the data-refs in the loop (consecutive,
2118
     complex, etc.). FORNOW: Only handle consecutive access pattern.  */
2119
 
2120
  ok = vect_analyze_data_ref_accesses (loop_vinfo);
2121
  if (!ok)
2122
    {
2123
      if (vect_print_dump_info (REPORT_DETAILS))
2124
        fprintf (vect_dump, "bad data access.");
2125
      destroy_loop_vec_info (loop_vinfo);
2126
      return NULL;
2127
    }
2128
 
2129
  /* This pass will decide on using loop versioning and/or loop peeling in
2130
     order to enhance the alignment of data references in the loop.  */
2131
 
2132
  ok = vect_enhance_data_refs_alignment (loop_vinfo);
2133
  if (!ok)
2134
    {
2135
      if (vect_print_dump_info (REPORT_DETAILS))
2136
        fprintf (vect_dump, "bad data alignment.");
2137
      destroy_loop_vec_info (loop_vinfo);
2138
      return NULL;
2139
    }
2140
 
2141
  /* Scan all the operations in the loop and make sure they are
2142
     vectorizable.  */
2143
 
2144
  ok = vect_analyze_operations (loop_vinfo);
2145
  if (!ok)
2146
    {
2147
      if (vect_print_dump_info (REPORT_DETAILS))
2148
        fprintf (vect_dump, "bad operation or unsupported loop bound.");
2149
      destroy_loop_vec_info (loop_vinfo);
2150
      return NULL;
2151
    }
2152
 
2153
  LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2154
 
2155
  return loop_vinfo;
2156
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.