OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [tree-vect-loop.c] - Blame information for rev 684

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 684 jeremybenn
/* Loop Vectorization
2
   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3
   Free Software Foundation, Inc.
4
   Contributed by Dorit Naishlos <dorit@il.ibm.com> and
5
   Ira Rosen <irar@il.ibm.com>
6
 
7
This file is part of GCC.
8
 
9
GCC is free software; you can redistribute it and/or modify it under
10
the terms of the GNU General Public License as published by the Free
11
Software Foundation; either version 3, or (at your option) any later
12
version.
13
 
14
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15
WARRANTY; without even the implied warranty of MERCHANTABILITY or
16
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17
for more details.
18
 
19
You should have received a copy of the GNU General Public License
20
along with GCC; see the file COPYING3.  If not see
21
<http://www.gnu.org/licenses/>.  */
22
 
23
#include "config.h"
24
#include "system.h"
25
#include "coretypes.h"
26
#include "tm.h"
27
#include "ggc.h"
28
#include "tree.h"
29
#include "basic-block.h"
30
#include "tree-pretty-print.h"
31
#include "gimple-pretty-print.h"
32
#include "tree-flow.h"
33
#include "tree-dump.h"
34
#include "cfgloop.h"
35
#include "cfglayout.h"
36
#include "expr.h"
37
#include "recog.h"
38
#include "optabs.h"
39
#include "params.h"
40
#include "diagnostic-core.h"
41
#include "tree-chrec.h"
42
#include "tree-scalar-evolution.h"
43
#include "tree-vectorizer.h"
44
#include "target.h"
45
 
46
/* Loop Vectorization Pass.
47
 
48
   This pass tries to vectorize loops.
49
 
50
   For example, the vectorizer transforms the following simple loop:
51
 
52
        short a[N]; short b[N]; short c[N]; int i;
53
 
54
        for (i=0; i<N; i++){
55
          a[i] = b[i] + c[i];
56
        }
57
 
58
   as if it was manually vectorized by rewriting the source code into:
59
 
60
        typedef int __attribute__((mode(V8HI))) v8hi;
61
        short a[N];  short b[N]; short c[N];   int i;
62
        v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
63
        v8hi va, vb, vc;
64
 
65
        for (i=0; i<N/8; i++){
66
          vb = pb[i];
67
          vc = pc[i];
68
          va = vb + vc;
69
          pa[i] = va;
70
        }
71
 
72
        The main entry to this pass is vectorize_loops(), in which
73
   the vectorizer applies a set of analyses on a given set of loops,
74
   followed by the actual vectorization transformation for the loops that
75
   had successfully passed the analysis phase.
76
        Throughout this pass we make a distinction between two types of
77
   data: scalars (which are represented by SSA_NAMES), and memory references
78
   ("data-refs").  These two types of data require different handling both
79
   during analysis and transformation. The types of data-refs that the
80
   vectorizer currently supports are ARRAY_REFS which base is an array DECL
81
   (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
82
   accesses are required to have a simple (consecutive) access pattern.
83
 
84
   Analysis phase:
85
   ===============
86
        The driver for the analysis phase is vect_analyze_loop().
87
   It applies a set of analyses, some of which rely on the scalar evolution
88
   analyzer (scev) developed by Sebastian Pop.
89
 
90
        During the analysis phase the vectorizer records some information
91
   per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
92
   loop, as well as general information about the loop as a whole, which is
93
   recorded in a "loop_vec_info" struct attached to each loop.
94
 
95
   Transformation phase:
96
   =====================
97
        The loop transformation phase scans all the stmts in the loop, and
98
   creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
99
   the loop that needs to be vectorized.  It inserts the vector code sequence
100
   just before the scalar stmt S, and records a pointer to the vector code
101
   in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
102
   attached to S).  This pointer will be used for the vectorization of following
103
   stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
104
   otherwise, we rely on dead code elimination for removing it.
105
 
106
        For example, say stmt S1 was vectorized into stmt VS1:
107
 
108
   VS1: vb = px[i];
109
   S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
110
   S2:  a = b;
111
 
112
   To vectorize stmt S2, the vectorizer first finds the stmt that defines
113
   the operand 'b' (S1), and gets the relevant vector def 'vb' from the
114
   vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)).  The
115
   resulting sequence would be:
116
 
117
   VS1: vb = px[i];
118
   S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
119
   VS2: va = vb;
120
   S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
121
 
122
        Operands that are not SSA_NAMEs, are data-refs that appear in
123
   load/store operations (like 'x[i]' in S1), and are handled differently.
124
 
125
   Target modeling:
126
   =================
127
        Currently the only target specific information that is used is the
128
   size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
129
   Targets that can support different sizes of vectors, for now will need
130
   to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".  More
131
   flexibility will be added in the future.
132
 
133
        Since we only vectorize operations which vector form can be
134
   expressed using existing tree codes, to verify that an operation is
135
   supported, the vectorizer checks the relevant optab at the relevant
136
   machine_mode (e.g, optab_handler (add_optab, V8HImode)).  If
137
   the value found is CODE_FOR_nothing, then there's no target support, and
138
   we can't vectorize the stmt.
139
 
140
   For additional information on this project see:
141
   http://gcc.gnu.org/projects/tree-ssa/vectorization.html
142
*/
143
 
144
/* Function vect_determine_vectorization_factor
145
 
146
   Determine the vectorization factor (VF).  VF is the number of data elements
147
   that are operated upon in parallel in a single iteration of the vectorized
148
   loop.  For example, when vectorizing a loop that operates on 4byte elements,
149
   on a target with vector size (VS) 16byte, the VF is set to 4, since 4
150
   elements can fit in a single vector register.
151
 
152
   We currently support vectorization of loops in which all types operated upon
153
   are of the same size.  Therefore this function currently sets VF according to
154
   the size of the types operated upon, and fails if there are multiple sizes
155
   in the loop.
156
 
157
   VF is also the factor by which the loop iterations are strip-mined, e.g.:
158
   original loop:
159
        for (i=0; i<N; i++){
160
          a[i] = b[i] + c[i];
161
        }
162
 
163
   vectorized loop:
164
        for (i=0; i<N; i+=VF){
165
          a[i:VF] = b[i:VF] + c[i:VF];
166
        }
167
*/
168
 
169
static bool
170
vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
171
{
172
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
173
  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
174
  int nbbs = loop->num_nodes;
175
  gimple_stmt_iterator si;
176
  unsigned int vectorization_factor = 0;
177
  tree scalar_type;
178
  gimple phi;
179
  tree vectype;
180
  unsigned int nunits;
181
  stmt_vec_info stmt_info;
182
  int i;
183
  HOST_WIDE_INT dummy;
184
  gimple stmt, pattern_stmt = NULL;
185
  gimple_seq pattern_def_seq = NULL;
186
  gimple_stmt_iterator pattern_def_si = gsi_start (NULL);
187
  bool analyze_pattern_stmt = false;
188
 
189
  if (vect_print_dump_info (REPORT_DETAILS))
190
    fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
191
 
192
  for (i = 0; i < nbbs; i++)
193
    {
194
      basic_block bb = bbs[i];
195
 
196
      for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
197
        {
198
          phi = gsi_stmt (si);
199
          stmt_info = vinfo_for_stmt (phi);
200
          if (vect_print_dump_info (REPORT_DETAILS))
201
            {
202
              fprintf (vect_dump, "==> examining phi: ");
203
              print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
204
            }
205
 
206
          gcc_assert (stmt_info);
207
 
208
          if (STMT_VINFO_RELEVANT_P (stmt_info))
209
            {
210
              gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
211
              scalar_type = TREE_TYPE (PHI_RESULT (phi));
212
 
213
              if (vect_print_dump_info (REPORT_DETAILS))
214
                {
215
                  fprintf (vect_dump, "get vectype for scalar type:  ");
216
                  print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
217
                }
218
 
219
              vectype = get_vectype_for_scalar_type (scalar_type);
220
              if (!vectype)
221
                {
222
                  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
223
                    {
224
                      fprintf (vect_dump,
225
                               "not vectorized: unsupported data-type ");
226
                      print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
227
                    }
228
                  return false;
229
                }
230
              STMT_VINFO_VECTYPE (stmt_info) = vectype;
231
 
232
              if (vect_print_dump_info (REPORT_DETAILS))
233
                {
234
                  fprintf (vect_dump, "vectype: ");
235
                  print_generic_expr (vect_dump, vectype, TDF_SLIM);
236
                }
237
 
238
              nunits = TYPE_VECTOR_SUBPARTS (vectype);
239
              if (vect_print_dump_info (REPORT_DETAILS))
240
                fprintf (vect_dump, "nunits = %d", nunits);
241
 
242
              if (!vectorization_factor
243
                  || (nunits > vectorization_factor))
244
                vectorization_factor = nunits;
245
            }
246
        }
247
 
248
      for (si = gsi_start_bb (bb); !gsi_end_p (si) || analyze_pattern_stmt;)
249
        {
250
          tree vf_vectype;
251
 
252
          if (analyze_pattern_stmt)
253
            stmt = pattern_stmt;
254
          else
255
            stmt = gsi_stmt (si);
256
 
257
          stmt_info = vinfo_for_stmt (stmt);
258
 
259
          if (vect_print_dump_info (REPORT_DETAILS))
260
            {
261
              fprintf (vect_dump, "==> examining statement: ");
262
              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
263
            }
264
 
265
          gcc_assert (stmt_info);
266
 
267
          /* Skip stmts which do not need to be vectorized.  */
268
          if (!STMT_VINFO_RELEVANT_P (stmt_info)
269
              && !STMT_VINFO_LIVE_P (stmt_info))
270
            {
271
              if (STMT_VINFO_IN_PATTERN_P (stmt_info)
272
                  && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
273
                  && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
274
                      || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
275
                {
276
                  stmt = pattern_stmt;
277
                  stmt_info = vinfo_for_stmt (pattern_stmt);
278
                  if (vect_print_dump_info (REPORT_DETAILS))
279
                    {
280
                      fprintf (vect_dump, "==> examining pattern statement: ");
281
                      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
282
                    }
283
                }
284
              else
285
                {
286
                  if (vect_print_dump_info (REPORT_DETAILS))
287
                    fprintf (vect_dump, "skip.");
288
                  gsi_next (&si);
289
                  continue;
290
                }
291
            }
292
          else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
293
                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
294
                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
295
                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
296
            analyze_pattern_stmt = true;
297
 
298
          /* If a pattern statement has def stmts, analyze them too.  */
299
          if (is_pattern_stmt_p (stmt_info))
300
            {
301
              if (pattern_def_seq == NULL)
302
                {
303
                  pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
304
                  pattern_def_si = gsi_start (pattern_def_seq);
305
                }
306
              else if (!gsi_end_p (pattern_def_si))
307
                gsi_next (&pattern_def_si);
308
              if (pattern_def_seq != NULL)
309
                {
310
                  gimple pattern_def_stmt = NULL;
311
                  stmt_vec_info pattern_def_stmt_info = NULL;
312
 
313
                  while (!gsi_end_p (pattern_def_si))
314
                    {
315
                      pattern_def_stmt = gsi_stmt (pattern_def_si);
316
                      pattern_def_stmt_info
317
                        = vinfo_for_stmt (pattern_def_stmt);
318
                      if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
319
                          || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
320
                        break;
321
                      gsi_next (&pattern_def_si);
322
                    }
323
 
324
                  if (!gsi_end_p (pattern_def_si))
325
                    {
326
                      if (vect_print_dump_info (REPORT_DETAILS))
327
                        {
328
                          fprintf (vect_dump,
329
                                   "==> examining pattern def stmt: ");
330
                          print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
331
                                             TDF_SLIM);
332
                        }
333
 
334
                      stmt = pattern_def_stmt;
335
                      stmt_info = pattern_def_stmt_info;
336
                    }
337
                  else
338
                    {
339
                      pattern_def_si = gsi_start (NULL);
340
                      analyze_pattern_stmt = false;
341
                    }
342
                }
343
              else
344
                analyze_pattern_stmt = false;
345
            }
346
 
347
          if (gimple_get_lhs (stmt) == NULL_TREE)
348
            {
349
              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
350
                {
351
                  fprintf (vect_dump, "not vectorized: irregular stmt.");
352
                  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
353
                }
354
              return false;
355
            }
356
 
357
          if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
358
            {
359
              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
360
                {
361
                  fprintf (vect_dump, "not vectorized: vector stmt in loop:");
362
                  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
363
                }
364
              return false;
365
            }
366
 
367
          if (STMT_VINFO_VECTYPE (stmt_info))
368
            {
369
              /* The only case when a vectype had been already set is for stmts
370
                 that contain a dataref, or for "pattern-stmts" (stmts
371
                 generated by the vectorizer to represent/replace a certain
372
                 idiom).  */
373
              gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
374
                          || is_pattern_stmt_p (stmt_info)
375
                          || !gsi_end_p (pattern_def_si));
376
              vectype = STMT_VINFO_VECTYPE (stmt_info);
377
            }
378
          else
379
            {
380
              gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
381
              scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
382
              if (vect_print_dump_info (REPORT_DETAILS))
383
                {
384
                  fprintf (vect_dump, "get vectype for scalar type:  ");
385
                  print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
386
                }
387
              vectype = get_vectype_for_scalar_type (scalar_type);
388
              if (!vectype)
389
                {
390
                  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
391
                    {
392
                      fprintf (vect_dump,
393
                               "not vectorized: unsupported data-type ");
394
                      print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
395
                    }
396
                  return false;
397
                }
398
 
399
              STMT_VINFO_VECTYPE (stmt_info) = vectype;
400
            }
401
 
402
          /* The vectorization factor is according to the smallest
403
             scalar type (or the largest vector size, but we only
404
             support one vector size per loop).  */
405
          scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
406
                                                       &dummy);
407
          if (vect_print_dump_info (REPORT_DETAILS))
408
            {
409
              fprintf (vect_dump, "get vectype for scalar type:  ");
410
              print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
411
            }
412
          vf_vectype = get_vectype_for_scalar_type (scalar_type);
413
          if (!vf_vectype)
414
            {
415
              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
416
                {
417
                  fprintf (vect_dump,
418
                           "not vectorized: unsupported data-type ");
419
                  print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
420
                }
421
              return false;
422
            }
423
 
424
          if ((GET_MODE_SIZE (TYPE_MODE (vectype))
425
               != GET_MODE_SIZE (TYPE_MODE (vf_vectype))))
426
            {
427
              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
428
                {
429
                  fprintf (vect_dump,
430
                           "not vectorized: different sized vector "
431
                           "types in statement, ");
432
                  print_generic_expr (vect_dump, vectype, TDF_SLIM);
433
                  fprintf (vect_dump, " and ");
434
                  print_generic_expr (vect_dump, vf_vectype, TDF_SLIM);
435
                }
436
              return false;
437
            }
438
 
439
          if (vect_print_dump_info (REPORT_DETAILS))
440
            {
441
              fprintf (vect_dump, "vectype: ");
442
              print_generic_expr (vect_dump, vf_vectype, TDF_SLIM);
443
            }
444
 
445
          nunits = TYPE_VECTOR_SUBPARTS (vf_vectype);
446
          if (vect_print_dump_info (REPORT_DETAILS))
447
            fprintf (vect_dump, "nunits = %d", nunits);
448
 
449
          if (!vectorization_factor
450
              || (nunits > vectorization_factor))
451
            vectorization_factor = nunits;
452
 
453
          if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
454
            {
455
              pattern_def_seq = NULL;
456
              gsi_next (&si);
457
            }
458
        }
459
    }
460
 
461
  /* TODO: Analyze cost. Decide if worth while to vectorize.  */
462
  if (vect_print_dump_info (REPORT_DETAILS))
463
    fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
464
  if (vectorization_factor <= 1)
465
    {
466
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
467
        fprintf (vect_dump, "not vectorized: unsupported data-type");
468
      return false;
469
    }
470
  LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
471
 
472
  return true;
473
}
474
 
475
 
476
/* Function vect_is_simple_iv_evolution.
477
 
478
   FORNOW: A simple evolution of an induction variables in the loop is
479
   considered a polynomial evolution with constant step.  */
480
 
481
static bool
482
vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
483
                             tree * step)
484
{
485
  tree init_expr;
486
  tree step_expr;
487
  tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
488
 
489
  /* When there is no evolution in this loop, the evolution function
490
     is not "simple".  */
491
  if (evolution_part == NULL_TREE)
492
    return false;
493
 
494
  /* When the evolution is a polynomial of degree >= 2
495
     the evolution function is not "simple".  */
496
  if (tree_is_chrec (evolution_part))
497
    return false;
498
 
499
  step_expr = evolution_part;
500
  init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
501
 
502
  if (vect_print_dump_info (REPORT_DETAILS))
503
    {
504
      fprintf (vect_dump, "step: ");
505
      print_generic_expr (vect_dump, step_expr, TDF_SLIM);
506
      fprintf (vect_dump, ",  init: ");
507
      print_generic_expr (vect_dump, init_expr, TDF_SLIM);
508
    }
509
 
510
  *init = init_expr;
511
  *step = step_expr;
512
 
513
  if (TREE_CODE (step_expr) != INTEGER_CST)
514
    {
515
      if (vect_print_dump_info (REPORT_DETAILS))
516
        fprintf (vect_dump, "step unknown.");
517
      return false;
518
    }
519
 
520
  return true;
521
}
522
 
523
/* Function vect_analyze_scalar_cycles_1.
524
 
525
   Examine the cross iteration def-use cycles of scalar variables
526
   in LOOP.  LOOP_VINFO represents the loop that is now being
527
   considered for vectorization (can be LOOP, or an outer-loop
528
   enclosing LOOP).  */
529
 
530
static void
531
vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
532
{
533
  basic_block bb = loop->header;
534
  tree dumy;
535
  VEC(gimple,heap) *worklist = VEC_alloc (gimple, heap, 64);
536
  gimple_stmt_iterator gsi;
537
  bool double_reduc;
538
 
539
  if (vect_print_dump_info (REPORT_DETAILS))
540
    fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
541
 
542
  /* First - identify all inductions.  Reduction detection assumes that all the
543
     inductions have been identified, therefore, this order must not be
544
     changed.  */
545
  for (gsi = gsi_start_phis  (bb); !gsi_end_p (gsi); gsi_next (&gsi))
546
    {
547
      gimple phi = gsi_stmt (gsi);
548
      tree access_fn = NULL;
549
      tree def = PHI_RESULT (phi);
550
      stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
551
 
552
      if (vect_print_dump_info (REPORT_DETAILS))
553
        {
554
          fprintf (vect_dump, "Analyze phi: ");
555
          print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
556
        }
557
 
558
      /* Skip virtual phi's.  The data dependences that are associated with
559
         virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
560
      if (!is_gimple_reg (SSA_NAME_VAR (def)))
561
        continue;
562
 
563
      STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
564
 
565
      /* Analyze the evolution function.  */
566
      access_fn = analyze_scalar_evolution (loop, def);
567
      if (access_fn)
568
        STRIP_NOPS (access_fn);
569
      if (access_fn && vect_print_dump_info (REPORT_DETAILS))
570
        {
571
          fprintf (vect_dump, "Access function of PHI: ");
572
          print_generic_expr (vect_dump, access_fn, TDF_SLIM);
573
        }
574
 
575
      if (!access_fn
576
          || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
577
        {
578
          VEC_safe_push (gimple, heap, worklist, phi);
579
          continue;
580
        }
581
 
582
      if (vect_print_dump_info (REPORT_DETAILS))
583
        fprintf (vect_dump, "Detected induction.");
584
      STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
585
    }
586
 
587
 
588
  /* Second - identify all reductions and nested cycles.  */
589
  while (VEC_length (gimple, worklist) > 0)
590
    {
591
      gimple phi = VEC_pop (gimple, worklist);
592
      tree def = PHI_RESULT (phi);
593
      stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
594
      gimple reduc_stmt;
595
      bool nested_cycle;
596
 
597
      if (vect_print_dump_info (REPORT_DETAILS))
598
        {
599
          fprintf (vect_dump, "Analyze phi: ");
600
          print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
601
        }
602
 
603
      gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
604
      gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
605
 
606
      nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
607
      reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle,
608
                                                &double_reduc);
609
      if (reduc_stmt)
610
        {
611
          if (double_reduc)
612
            {
613
              if (vect_print_dump_info (REPORT_DETAILS))
614
                fprintf (vect_dump, "Detected double reduction.");
615
 
616
              STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
617
              STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
618
                                                    vect_double_reduction_def;
619
            }
620
          else
621
            {
622
              if (nested_cycle)
623
                {
624
                  if (vect_print_dump_info (REPORT_DETAILS))
625
                    fprintf (vect_dump, "Detected vectorizable nested cycle.");
626
 
627
                  STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
628
                  STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
629
                                                             vect_nested_cycle;
630
                }
631
              else
632
                {
633
                  if (vect_print_dump_info (REPORT_DETAILS))
634
                    fprintf (vect_dump, "Detected reduction.");
635
 
636
                  STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
637
                  STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
638
                                                           vect_reduction_def;
639
                  /* Store the reduction cycles for possible vectorization in
640
                     loop-aware SLP.  */
641
                  VEC_safe_push (gimple, heap,
642
                                 LOOP_VINFO_REDUCTIONS (loop_vinfo),
643
                                 reduc_stmt);
644
                }
645
            }
646
        }
647
      else
648
        if (vect_print_dump_info (REPORT_DETAILS))
649
          fprintf (vect_dump, "Unknown def-use cycle pattern.");
650
    }
651
 
652
  VEC_free (gimple, heap, worklist);
653
}
654
 
655
 
656
/* Function vect_analyze_scalar_cycles.
657
 
658
   Examine the cross iteration def-use cycles of scalar variables, by
659
   analyzing the loop-header PHIs of scalar variables.  Classify each
660
   cycle as one of the following: invariant, induction, reduction, unknown.
661
   We do that for the loop represented by LOOP_VINFO, and also to its
662
   inner-loop, if exists.
663
   Examples for scalar cycles:
664
 
665
   Example1: reduction:
666
 
667
              loop1:
668
              for (i=0; i<N; i++)
669
                 sum += a[i];
670
 
671
   Example2: induction:
672
 
673
              loop2:
674
              for (i=0; i<N; i++)
675
                 a[i] = i;  */
676
 
677
static void
678
vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
679
{
680
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
681
 
682
  vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
683
 
684
  /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
685
     Reductions in such inner-loop therefore have different properties than
686
     the reductions in the nest that gets vectorized:
687
     1. When vectorized, they are executed in the same order as in the original
688
        scalar loop, so we can't change the order of computation when
689
        vectorizing them.
690
     2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
691
        current checks are too strict.  */
692
 
693
  if (loop->inner)
694
    vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
695
}
696
 
697
/* Function vect_get_loop_niters.
698
 
699
   Determine how many iterations the loop is executed.
700
   If an expression that represents the number of iterations
701
   can be constructed, place it in NUMBER_OF_ITERATIONS.
702
   Return the loop exit condition.  */
703
 
704
static gimple
705
vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
706
{
707
  tree niters;
708
 
709
  if (vect_print_dump_info (REPORT_DETAILS))
710
    fprintf (vect_dump, "=== get_loop_niters ===");
711
 
712
  niters = number_of_exit_cond_executions (loop);
713
 
714
  if (niters != NULL_TREE
715
      && niters != chrec_dont_know)
716
    {
717
      *number_of_iterations = niters;
718
 
719
      if (vect_print_dump_info (REPORT_DETAILS))
720
        {
721
          fprintf (vect_dump, "==> get_loop_niters:" );
722
          print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
723
        }
724
    }
725
 
726
  return get_loop_exit_condition (loop);
727
}
728
 
729
 
730
/* Function bb_in_loop_p
731
 
732
   Used as predicate for dfs order traversal of the loop bbs.  */
733
 
734
static bool
735
bb_in_loop_p (const_basic_block bb, const void *data)
736
{
737
  const struct loop *const loop = (const struct loop *)data;
738
  if (flow_bb_inside_loop_p (loop, bb))
739
    return true;
740
  return false;
741
}
742
 
743
 
744
/* Function new_loop_vec_info.
745
 
746
   Create and initialize a new loop_vec_info struct for LOOP, as well as
747
   stmt_vec_info structs for all the stmts in LOOP.  */
748
 
749
static loop_vec_info
750
new_loop_vec_info (struct loop *loop)
751
{
752
  loop_vec_info res;
753
  basic_block *bbs;
754
  gimple_stmt_iterator si;
755
  unsigned int i, nbbs;
756
 
757
  res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
758
  LOOP_VINFO_LOOP (res) = loop;
759
 
760
  bbs = get_loop_body (loop);
761
 
762
  /* Create/Update stmt_info for all stmts in the loop.  */
763
  for (i = 0; i < loop->num_nodes; i++)
764
    {
765
      basic_block bb = bbs[i];
766
 
767
      /* BBs in a nested inner-loop will have been already processed (because
768
         we will have called vect_analyze_loop_form for any nested inner-loop).
769
         Therefore, for stmts in an inner-loop we just want to update the
770
         STMT_VINFO_LOOP_VINFO field of their stmt_info to point to the new
771
         loop_info of the outer-loop we are currently considering to vectorize
772
         (instead of the loop_info of the inner-loop).
773
         For stmts in other BBs we need to create a stmt_info from scratch.  */
774
      if (bb->loop_father != loop)
775
        {
776
          /* Inner-loop bb.  */
777
          gcc_assert (loop->inner && bb->loop_father == loop->inner);
778
          for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
779
            {
780
              gimple phi = gsi_stmt (si);
781
              stmt_vec_info stmt_info = vinfo_for_stmt (phi);
782
              loop_vec_info inner_loop_vinfo =
783
                STMT_VINFO_LOOP_VINFO (stmt_info);
784
              gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
785
              STMT_VINFO_LOOP_VINFO (stmt_info) = res;
786
            }
787
          for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
788
           {
789
              gimple stmt = gsi_stmt (si);
790
              stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
791
              loop_vec_info inner_loop_vinfo =
792
                 STMT_VINFO_LOOP_VINFO (stmt_info);
793
              gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
794
              STMT_VINFO_LOOP_VINFO (stmt_info) = res;
795
           }
796
        }
797
      else
798
        {
799
          /* bb in current nest.  */
800
          for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
801
            {
802
              gimple phi = gsi_stmt (si);
803
              gimple_set_uid (phi, 0);
804
              set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, res, NULL));
805
            }
806
 
807
          for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
808
            {
809
              gimple stmt = gsi_stmt (si);
810
              gimple_set_uid (stmt, 0);
811
              set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res, NULL));
812
            }
813
        }
814
    }
815
 
816
  /* CHECKME: We want to visit all BBs before their successors (except for
817
     latch blocks, for which this assertion wouldn't hold).  In the simple
818
     case of the loop forms we allow, a dfs order of the BBs would the same
819
     as reversed postorder traversal, so we are safe.  */
820
 
821
   free (bbs);
822
   bbs = XCNEWVEC (basic_block, loop->num_nodes);
823
   nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
824
                              bbs, loop->num_nodes, loop);
825
   gcc_assert (nbbs == loop->num_nodes);
826
 
827
  LOOP_VINFO_BBS (res) = bbs;
828
  LOOP_VINFO_NITERS (res) = NULL;
829
  LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
830
  LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
831
  LOOP_VINFO_VECTORIZABLE_P (res) = 0;
832
  LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
833
  LOOP_VINFO_VECT_FACTOR (res) = 0;
834
  LOOP_VINFO_LOOP_NEST (res) = VEC_alloc (loop_p, heap, 3);
835
  LOOP_VINFO_DATAREFS (res) = VEC_alloc (data_reference_p, heap, 10);
836
  LOOP_VINFO_DDRS (res) = VEC_alloc (ddr_p, heap, 10 * 10);
837
  LOOP_VINFO_UNALIGNED_DR (res) = NULL;
838
  LOOP_VINFO_MAY_MISALIGN_STMTS (res) =
839
    VEC_alloc (gimple, heap,
840
               PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS));
841
  LOOP_VINFO_MAY_ALIAS_DDRS (res) =
842
    VEC_alloc (ddr_p, heap,
843
               PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
844
  LOOP_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
845
  LOOP_VINFO_REDUCTIONS (res) = VEC_alloc (gimple, heap, 10);
846
  LOOP_VINFO_REDUCTION_CHAINS (res) = VEC_alloc (gimple, heap, 10);
847
  LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10);
848
  LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
849
  LOOP_VINFO_PEELING_HTAB (res) = NULL;
850
  LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
851
 
852
  return res;
853
}
854
 
855
 
856
/* Function destroy_loop_vec_info.
857
 
858
   Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
859
   stmts in the loop.  */
860
 
861
void
862
destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
863
{
864
  struct loop *loop;
865
  basic_block *bbs;
866
  int nbbs;
867
  gimple_stmt_iterator si;
868
  int j;
869
  VEC (slp_instance, heap) *slp_instances;
870
  slp_instance instance;
871
 
872
  if (!loop_vinfo)
873
    return;
874
 
875
  loop = LOOP_VINFO_LOOP (loop_vinfo);
876
 
877
  bbs = LOOP_VINFO_BBS (loop_vinfo);
878
  nbbs = loop->num_nodes;
879
 
880
  if (!clean_stmts)
881
    {
882
      free (LOOP_VINFO_BBS (loop_vinfo));
883
      free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
884
      free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
885
      VEC_free (loop_p, heap, LOOP_VINFO_LOOP_NEST (loop_vinfo));
886
      VEC_free (gimple, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
887
      VEC_free (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
888
 
889
      free (loop_vinfo);
890
      loop->aux = NULL;
891
      return;
892
    }
893
 
894
  for (j = 0; j < nbbs; j++)
895
    {
896
      basic_block bb = bbs[j];
897
      for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
898
        free_stmt_vec_info (gsi_stmt (si));
899
 
900
      for (si = gsi_start_bb (bb); !gsi_end_p (si); )
901
        {
902
          gimple stmt = gsi_stmt (si);
903
          /* Free stmt_vec_info.  */
904
          free_stmt_vec_info (stmt);
905
          gsi_next (&si);
906
        }
907
    }
908
 
909
  free (LOOP_VINFO_BBS (loop_vinfo));
910
  free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
911
  free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
912
  VEC_free (loop_p, heap, LOOP_VINFO_LOOP_NEST (loop_vinfo));
913
  VEC_free (gimple, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
914
  VEC_free (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
915
  slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
916
  FOR_EACH_VEC_ELT (slp_instance, slp_instances, j, instance)
917
    vect_free_slp_instance (instance);
918
 
919
  VEC_free (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
920
  VEC_free (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo));
921
  VEC_free (gimple, heap, LOOP_VINFO_REDUCTIONS (loop_vinfo));
922
  VEC_free (gimple, heap, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo));
923
 
924
  if (LOOP_VINFO_PEELING_HTAB (loop_vinfo))
925
    htab_delete (LOOP_VINFO_PEELING_HTAB (loop_vinfo));
926
 
927
  free (loop_vinfo);
928
  loop->aux = NULL;
929
}
930
 
931
 
932
/* Function vect_analyze_loop_1.
933
 
934
   Apply a set of analyses on LOOP, and create a loop_vec_info struct
935
   for it. The different analyses will record information in the
936
   loop_vec_info struct.  This is a subset of the analyses applied in
937
   vect_analyze_loop, to be applied on an inner-loop nested in the loop
938
   that is now considered for (outer-loop) vectorization.  */
939
 
940
static loop_vec_info
941
vect_analyze_loop_1 (struct loop *loop)
942
{
943
  loop_vec_info loop_vinfo;
944
 
945
  if (vect_print_dump_info (REPORT_DETAILS))
946
    fprintf (vect_dump, "===== analyze_loop_nest_1 =====");
947
 
948
  /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
949
 
950
  loop_vinfo = vect_analyze_loop_form (loop);
951
  if (!loop_vinfo)
952
    {
953
      if (vect_print_dump_info (REPORT_DETAILS))
954
        fprintf (vect_dump, "bad inner-loop form.");
955
      return NULL;
956
    }
957
 
958
  return loop_vinfo;
959
}
960
 
961
 
962
/* Function vect_analyze_loop_form.
963
 
964
   Verify that certain CFG restrictions hold, including:
965
   - the loop has a pre-header
966
   - the loop has a single entry and exit
967
   - the loop exit condition is simple enough, and the number of iterations
968
     can be analyzed (a countable loop).  */
969
 
970
loop_vec_info
971
vect_analyze_loop_form (struct loop *loop)
972
{
973
  loop_vec_info loop_vinfo;
974
  gimple loop_cond;
975
  tree number_of_iterations = NULL;
976
  loop_vec_info inner_loop_vinfo = NULL;
977
 
978
  if (vect_print_dump_info (REPORT_DETAILS))
979
    fprintf (vect_dump, "=== vect_analyze_loop_form ===");
980
 
981
  /* Different restrictions apply when we are considering an inner-most loop,
982
     vs. an outer (nested) loop.
983
     (FORNOW. May want to relax some of these restrictions in the future).  */
984
 
985
  if (!loop->inner)
986
    {
987
      /* Inner-most loop.  We currently require that the number of BBs is
988
         exactly 2 (the header and latch).  Vectorizable inner-most loops
989
         look like this:
990
 
991
                        (pre-header)
992
                           |
993
                          header <--------+
994
                           | |            |
995
                           | +--> latch --+
996
                           |
997
                        (exit-bb)  */
998
 
999
      if (loop->num_nodes != 2)
1000
        {
1001
          if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1002
            fprintf (vect_dump, "not vectorized: control flow in loop.");
1003
          return NULL;
1004
        }
1005
 
1006
      if (empty_block_p (loop->header))
1007
    {
1008
          if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1009
            fprintf (vect_dump, "not vectorized: empty loop.");
1010
      return NULL;
1011
    }
1012
    }
1013
  else
1014
    {
1015
      struct loop *innerloop = loop->inner;
1016
      edge entryedge;
1017
 
1018
      /* Nested loop. We currently require that the loop is doubly-nested,
1019
         contains a single inner loop, and the number of BBs is exactly 5.
1020
         Vectorizable outer-loops look like this:
1021
 
1022
                        (pre-header)
1023
                           |
1024
                          header <---+
1025
                           |         |
1026
                          inner-loop |
1027
                           |         |
1028
                          tail ------+
1029
                           |
1030
                        (exit-bb)
1031
 
1032
         The inner-loop has the properties expected of inner-most loops
1033
         as described above.  */
1034
 
1035
      if ((loop->inner)->inner || (loop->inner)->next)
1036
        {
1037
          if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1038
            fprintf (vect_dump, "not vectorized: multiple nested loops.");
1039
          return NULL;
1040
        }
1041
 
1042
      /* Analyze the inner-loop.  */
1043
      inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
1044
      if (!inner_loop_vinfo)
1045
        {
1046
          if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1047
            fprintf (vect_dump, "not vectorized: Bad inner loop.");
1048
          return NULL;
1049
        }
1050
 
1051
      if (!expr_invariant_in_loop_p (loop,
1052
                                        LOOP_VINFO_NITERS (inner_loop_vinfo)))
1053
        {
1054
          if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1055
            fprintf (vect_dump,
1056
                     "not vectorized: inner-loop count not invariant.");
1057
          destroy_loop_vec_info (inner_loop_vinfo, true);
1058
          return NULL;
1059
        }
1060
 
1061
      if (loop->num_nodes != 5)
1062
        {
1063
          if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1064
            fprintf (vect_dump, "not vectorized: control flow in loop.");
1065
          destroy_loop_vec_info (inner_loop_vinfo, true);
1066
          return NULL;
1067
        }
1068
 
1069
      gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
1070
      entryedge = EDGE_PRED (innerloop->header, 0);
1071
      if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
1072
        entryedge = EDGE_PRED (innerloop->header, 1);
1073
 
1074
      if (entryedge->src != loop->header
1075
          || !single_exit (innerloop)
1076
          || single_exit (innerloop)->dest !=  EDGE_PRED (loop->latch, 0)->src)
1077
        {
1078
          if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1079
            fprintf (vect_dump, "not vectorized: unsupported outerloop form.");
1080
          destroy_loop_vec_info (inner_loop_vinfo, true);
1081
          return NULL;
1082
        }
1083
 
1084
      if (vect_print_dump_info (REPORT_DETAILS))
1085
        fprintf (vect_dump, "Considering outer-loop vectorization.");
1086
    }
1087
 
1088
  if (!single_exit (loop)
1089
      || EDGE_COUNT (loop->header->preds) != 2)
1090
    {
1091
      if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1092
        {
1093
          if (!single_exit (loop))
1094
            fprintf (vect_dump, "not vectorized: multiple exits.");
1095
          else if (EDGE_COUNT (loop->header->preds) != 2)
1096
            fprintf (vect_dump, "not vectorized: too many incoming edges.");
1097
        }
1098
      if (inner_loop_vinfo)
1099
        destroy_loop_vec_info (inner_loop_vinfo, true);
1100
      return NULL;
1101
    }
1102
 
1103
  /* We assume that the loop exit condition is at the end of the loop. i.e,
1104
     that the loop is represented as a do-while (with a proper if-guard
1105
     before the loop if needed), where the loop header contains all the
1106
     executable statements, and the latch is empty.  */
1107
  if (!empty_block_p (loop->latch)
1108
        || !gimple_seq_empty_p (phi_nodes (loop->latch)))
1109
    {
1110
      if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1111
        fprintf (vect_dump, "not vectorized: unexpected loop form.");
1112
      if (inner_loop_vinfo)
1113
        destroy_loop_vec_info (inner_loop_vinfo, true);
1114
      return NULL;
1115
    }
1116
 
1117
  /* Make sure there exists a single-predecessor exit bb:  */
1118
  if (!single_pred_p (single_exit (loop)->dest))
1119
    {
1120
      edge e = single_exit (loop);
1121
      if (!(e->flags & EDGE_ABNORMAL))
1122
        {
1123
          split_loop_exit_edge (e);
1124
          if (vect_print_dump_info (REPORT_DETAILS))
1125
            fprintf (vect_dump, "split exit edge.");
1126
        }
1127
      else
1128
        {
1129
          if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1130
            fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
1131
          if (inner_loop_vinfo)
1132
            destroy_loop_vec_info (inner_loop_vinfo, true);
1133
          return NULL;
1134
        }
1135
    }
1136
 
1137
  loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
1138
  if (!loop_cond)
1139
    {
1140
      if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1141
        fprintf (vect_dump, "not vectorized: complicated exit condition.");
1142
      if (inner_loop_vinfo)
1143
        destroy_loop_vec_info (inner_loop_vinfo, true);
1144
      return NULL;
1145
    }
1146
 
1147
  if (!number_of_iterations)
1148
    {
1149
      if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1150
        fprintf (vect_dump,
1151
                 "not vectorized: number of iterations cannot be computed.");
1152
      if (inner_loop_vinfo)
1153
        destroy_loop_vec_info (inner_loop_vinfo, true);
1154
      return NULL;
1155
    }
1156
 
1157
  if (chrec_contains_undetermined (number_of_iterations))
1158
    {
1159
      if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1160
        fprintf (vect_dump, "Infinite number of iterations.");
1161
      if (inner_loop_vinfo)
1162
        destroy_loop_vec_info (inner_loop_vinfo, true);
1163
      return NULL;
1164
    }
1165
 
1166
  if (!NITERS_KNOWN_P (number_of_iterations))
1167
    {
1168
      if (vect_print_dump_info (REPORT_DETAILS))
1169
        {
1170
          fprintf (vect_dump, "Symbolic number of iterations is ");
1171
          print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
1172
        }
1173
    }
1174
  else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
1175
    {
1176
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1177
        fprintf (vect_dump, "not vectorized: number of iterations = 0.");
1178
      if (inner_loop_vinfo)
1179
        destroy_loop_vec_info (inner_loop_vinfo, false);
1180
      return NULL;
1181
    }
1182
 
1183
  loop_vinfo = new_loop_vec_info (loop);
1184
  LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1185
  LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
1186
 
1187
  STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
1188
 
1189
  /* CHECKME: May want to keep it around it in the future.  */
1190
  if (inner_loop_vinfo)
1191
    destroy_loop_vec_info (inner_loop_vinfo, false);
1192
 
1193
  gcc_assert (!loop->aux);
1194
  loop->aux = loop_vinfo;
1195
  return loop_vinfo;
1196
}
1197
 
1198
 
1199
/* Get cost by calling cost target builtin.  */
1200
 
1201
static inline int
1202
vect_get_cost (enum vect_cost_for_stmt type_of_cost)
1203
{
1204
  tree dummy_type = NULL;
1205
  int dummy = 0;
1206
 
1207
  return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
1208
                                                       dummy_type, dummy);
1209
}
1210
 
1211
 
1212
/* Function vect_analyze_loop_operations.
1213
 
1214
   Scan the loop stmts and make sure they are all vectorizable.  */
1215
 
1216
static bool
1217
vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
1218
{
1219
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1220
  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1221
  int nbbs = loop->num_nodes;
1222
  gimple_stmt_iterator si;
1223
  unsigned int vectorization_factor = 0;
1224
  int i;
1225
  gimple phi;
1226
  stmt_vec_info stmt_info;
1227
  bool need_to_vectorize = false;
1228
  int min_profitable_iters;
1229
  int min_scalar_loop_bound;
1230
  unsigned int th;
1231
  bool only_slp_in_loop = true, ok;
1232
 
1233
  if (vect_print_dump_info (REPORT_DETAILS))
1234
    fprintf (vect_dump, "=== vect_analyze_loop_operations ===");
1235
 
1236
  gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1237
  vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1238
  if (slp)
1239
    {
1240
      /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1241
         vectorization factor of the loop is the unrolling factor required by
1242
         the SLP instances.  If that unrolling factor is 1, we say, that we
1243
         perform pure SLP on loop - cross iteration parallelism is not
1244
         exploited.  */
1245
      for (i = 0; i < nbbs; i++)
1246
        {
1247
          basic_block bb = bbs[i];
1248
          for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1249
            {
1250
              gimple stmt = gsi_stmt (si);
1251
              stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1252
              gcc_assert (stmt_info);
1253
              if ((STMT_VINFO_RELEVANT_P (stmt_info)
1254
                   || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1255
                  && !PURE_SLP_STMT (stmt_info))
1256
                /* STMT needs both SLP and loop-based vectorization.  */
1257
                only_slp_in_loop = false;
1258
            }
1259
        }
1260
 
1261
      if (only_slp_in_loop)
1262
        vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
1263
      else
1264
        vectorization_factor = least_common_multiple (vectorization_factor,
1265
                                LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
1266
 
1267
      LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
1268
      if (vect_print_dump_info (REPORT_DETAILS))
1269
        fprintf (vect_dump, "Updating vectorization factor to %d ",
1270
                            vectorization_factor);
1271
    }
1272
 
1273
  for (i = 0; i < nbbs; i++)
1274
    {
1275
      basic_block bb = bbs[i];
1276
 
1277
      for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1278
        {
1279
          phi = gsi_stmt (si);
1280
          ok = true;
1281
 
1282
          stmt_info = vinfo_for_stmt (phi);
1283
          if (vect_print_dump_info (REPORT_DETAILS))
1284
            {
1285
              fprintf (vect_dump, "examining phi: ");
1286
              print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1287
            }
1288
 
1289
          /* Inner-loop loop-closed exit phi in outer-loop vectorization
1290
             (i.e., a phi in the tail of the outer-loop).  */
1291
          if (! is_loop_header_bb_p (bb))
1292
            {
1293
              /* FORNOW: we currently don't support the case that these phis
1294
                 are not used in the outerloop (unless it is double reduction,
1295
                 i.e., this phi is vect_reduction_def), cause this case
1296
                 requires to actually do something here.  */
1297
              if ((!STMT_VINFO_RELEVANT_P (stmt_info)
1298
                   || STMT_VINFO_LIVE_P (stmt_info))
1299
                  && STMT_VINFO_DEF_TYPE (stmt_info)
1300
                     != vect_double_reduction_def)
1301
                {
1302
                  if (vect_print_dump_info (REPORT_DETAILS))
1303
                    fprintf (vect_dump,
1304
                             "Unsupported loop-closed phi in outer-loop.");
1305
                  return false;
1306
                }
1307
 
1308
              /* If PHI is used in the outer loop, we check that its operand
1309
                 is defined in the inner loop.  */
1310
              if (STMT_VINFO_RELEVANT_P (stmt_info))
1311
                {
1312
                  tree phi_op;
1313
                  gimple op_def_stmt;
1314
 
1315
                  if (gimple_phi_num_args (phi) != 1)
1316
                    return false;
1317
 
1318
                  phi_op = PHI_ARG_DEF (phi, 0);
1319
                  if (TREE_CODE (phi_op) != SSA_NAME)
1320
                    return false;
1321
 
1322
                  op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
1323
                  if (!op_def_stmt || !vinfo_for_stmt (op_def_stmt))
1324
                    return false;
1325
 
1326
                  if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1327
                        != vect_used_in_outer
1328
                      && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1329
                           != vect_used_in_outer_by_reduction)
1330
                    return false;
1331
                }
1332
 
1333
              continue;
1334
            }
1335
 
1336
          gcc_assert (stmt_info);
1337
 
1338
          if (STMT_VINFO_LIVE_P (stmt_info))
1339
            {
1340
              /* FORNOW: not yet supported.  */
1341
              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1342
                fprintf (vect_dump, "not vectorized: value used after loop.");
1343
              return false;
1344
            }
1345
 
1346
          if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
1347
              && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
1348
            {
1349
              /* A scalar-dependence cycle that we don't support.  */
1350
              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1351
                fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
1352
              return false;
1353
            }
1354
 
1355
          if (STMT_VINFO_RELEVANT_P (stmt_info))
1356
            {
1357
              need_to_vectorize = true;
1358
              if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
1359
                ok = vectorizable_induction (phi, NULL, NULL);
1360
            }
1361
 
1362
          if (!ok)
1363
            {
1364
              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1365
                {
1366
                  fprintf (vect_dump,
1367
                           "not vectorized: relevant phi not supported: ");
1368
                  print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1369
                }
1370
              return false;
1371
            }
1372
        }
1373
 
1374
      for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1375
        {
1376
          gimple stmt = gsi_stmt (si);
1377
          if (!vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
1378
            return false;
1379
        }
1380
    } /* bbs */
1381
 
1382
  /* All operations in the loop are either irrelevant (deal with loop
1383
     control, or dead), or only used outside the loop and can be moved
1384
     out of the loop (e.g. invariants, inductions).  The loop can be
1385
     optimized away by scalar optimizations.  We're better off not
1386
     touching this loop.  */
1387
  if (!need_to_vectorize)
1388
    {
1389
      if (vect_print_dump_info (REPORT_DETAILS))
1390
        fprintf (vect_dump,
1391
                 "All the computation can be taken out of the loop.");
1392
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1393
        fprintf (vect_dump,
1394
                 "not vectorized: redundant loop. no profit to vectorize.");
1395
      return false;
1396
    }
1397
 
1398
  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1399
      && vect_print_dump_info (REPORT_DETAILS))
1400
    fprintf (vect_dump,
1401
        "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
1402
        vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
1403
 
1404
  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1405
      && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
1406
    {
1407
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1408
        fprintf (vect_dump, "not vectorized: iteration count too small.");
1409
      if (vect_print_dump_info (REPORT_DETAILS))
1410
        fprintf (vect_dump,"not vectorized: iteration count smaller than "
1411
                 "vectorization factor.");
1412
      return false;
1413
    }
1414
 
1415
  /* Analyze cost.  Decide if worth while to vectorize.  */
1416
 
1417
  /* Once VF is set, SLP costs should be updated since the number of created
1418
     vector stmts depends on VF.  */
1419
  vect_update_slp_costs_according_to_vf (loop_vinfo);
1420
 
1421
  min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
1422
  LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
1423
 
1424
  if (min_profitable_iters < 0)
1425
    {
1426
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1427
        fprintf (vect_dump, "not vectorized: vectorization not profitable.");
1428
      if (vect_print_dump_info (REPORT_DETAILS))
1429
        fprintf (vect_dump, "not vectorized: vector version will never be "
1430
                 "profitable.");
1431
      return false;
1432
    }
1433
 
1434
  min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
1435
                            * vectorization_factor) - 1);
1436
 
1437
  /* Use the cost model only if it is more conservative than user specified
1438
     threshold.  */
1439
 
1440
  th = (unsigned) min_scalar_loop_bound;
1441
  if (min_profitable_iters
1442
      && (!min_scalar_loop_bound
1443
          || min_profitable_iters > min_scalar_loop_bound))
1444
    th = (unsigned) min_profitable_iters;
1445
 
1446
  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1447
      && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
1448
    {
1449
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1450
        fprintf (vect_dump, "not vectorized: vectorization not "
1451
                 "profitable.");
1452
      if (vect_print_dump_info (REPORT_DETAILS))
1453
        fprintf (vect_dump, "not vectorized: iteration count smaller than "
1454
                 "user specified loop bound parameter or minimum "
1455
                 "profitable iterations (whichever is more conservative).");
1456
      return false;
1457
    }
1458
 
1459
  if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1460
      || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
1461
      || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
1462
    {
1463
      if (vect_print_dump_info (REPORT_DETAILS))
1464
        fprintf (vect_dump, "epilog loop required.");
1465
      if (!vect_can_advance_ivs_p (loop_vinfo))
1466
        {
1467
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1468
            fprintf (vect_dump,
1469
                     "not vectorized: can't create epilog loop 1.");
1470
          return false;
1471
        }
1472
      if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1473
        {
1474
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1475
            fprintf (vect_dump,
1476
                     "not vectorized: can't create epilog loop 2.");
1477
          return false;
1478
        }
1479
    }
1480
 
1481
  return true;
1482
}
1483
 
1484
 
1485
/* Function vect_analyze_loop_2.
1486
 
1487
   Apply a set of analyses on LOOP, and create a loop_vec_info struct
1488
   for it.  The different analyses will record information in the
1489
   loop_vec_info struct.  */
1490
static bool
1491
vect_analyze_loop_2 (loop_vec_info loop_vinfo)
1492
{
1493
  bool ok, slp = false;
1494
  int max_vf = MAX_VECTORIZATION_FACTOR;
1495
  int min_vf = 2;
1496
 
1497
  /* Find all data references in the loop (which correspond to vdefs/vuses)
1498
     and analyze their evolution in the loop.  Also adjust the minimal
1499
     vectorization factor according to the loads and stores.
1500
 
1501
     FORNOW: Handle only simple, array references, which
1502
     alignment can be forced, and aligned pointer-references.  */
1503
 
1504
  ok = vect_analyze_data_refs (loop_vinfo, NULL, &min_vf);
1505
  if (!ok)
1506
    {
1507
      if (vect_print_dump_info (REPORT_DETAILS))
1508
        fprintf (vect_dump, "bad data references.");
1509
      return false;
1510
    }
1511
 
1512
  /* Classify all cross-iteration scalar data-flow cycles.
1513
     Cross-iteration cycles caused by virtual phis are analyzed separately.  */
1514
 
1515
  vect_analyze_scalar_cycles (loop_vinfo);
1516
 
1517
  vect_pattern_recog (loop_vinfo);
1518
 
1519
  /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
1520
 
1521
  ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
1522
  if (!ok)
1523
    {
1524
      if (vect_print_dump_info (REPORT_DETAILS))
1525
        fprintf (vect_dump, "unexpected pattern.");
1526
      return false;
1527
    }
1528
 
1529
  /* Analyze data dependences between the data-refs in the loop
1530
     and adjust the maximum vectorization factor according to
1531
     the dependences.
1532
     FORNOW: fail at the first data dependence that we encounter.  */
1533
 
1534
  ok = vect_analyze_data_ref_dependences (loop_vinfo, NULL, &max_vf);
1535
  if (!ok
1536
      || max_vf < min_vf)
1537
    {
1538
      if (vect_print_dump_info (REPORT_DETAILS))
1539
        fprintf (vect_dump, "bad data dependence.");
1540
      return false;
1541
    }
1542
 
1543
  ok = vect_determine_vectorization_factor (loop_vinfo);
1544
  if (!ok)
1545
    {
1546
      if (vect_print_dump_info (REPORT_DETAILS))
1547
        fprintf (vect_dump, "can't determine vectorization factor.");
1548
      return false;
1549
    }
1550
  if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1551
    {
1552
      if (vect_print_dump_info (REPORT_DETAILS))
1553
        fprintf (vect_dump, "bad data dependence.");
1554
      return false;
1555
    }
1556
 
1557
  /* Analyze the alignment of the data-refs in the loop.
1558
     Fail if a data reference is found that cannot be vectorized.  */
1559
 
1560
  ok = vect_analyze_data_refs_alignment (loop_vinfo, NULL);
1561
  if (!ok)
1562
    {
1563
      if (vect_print_dump_info (REPORT_DETAILS))
1564
        fprintf (vect_dump, "bad data alignment.");
1565
      return false;
1566
    }
1567
 
1568
  /* Analyze the access patterns of the data-refs in the loop (consecutive,
1569
     complex, etc.). FORNOW: Only handle consecutive access pattern.  */
1570
 
1571
  ok = vect_analyze_data_ref_accesses (loop_vinfo, NULL);
1572
  if (!ok)
1573
    {
1574
      if (vect_print_dump_info (REPORT_DETAILS))
1575
        fprintf (vect_dump, "bad data access.");
1576
      return false;
1577
    }
1578
 
1579
  /* Prune the list of ddrs to be tested at run-time by versioning for alias.
1580
     It is important to call pruning after vect_analyze_data_ref_accesses,
1581
     since we use grouping information gathered by interleaving analysis.  */
1582
  ok = vect_prune_runtime_alias_test_list (loop_vinfo);
1583
  if (!ok)
1584
    {
1585
      if (vect_print_dump_info (REPORT_DETAILS))
1586
        fprintf (vect_dump, "too long list of versioning for alias "
1587
                            "run-time tests.");
1588
      return false;
1589
    }
1590
 
1591
  /* This pass will decide on using loop versioning and/or loop peeling in
1592
     order to enhance the alignment of data references in the loop.  */
1593
 
1594
  ok = vect_enhance_data_refs_alignment (loop_vinfo);
1595
  if (!ok)
1596
    {
1597
      if (vect_print_dump_info (REPORT_DETAILS))
1598
        fprintf (vect_dump, "bad data alignment.");
1599
      return false;
1600
    }
1601
 
1602
  /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
1603
  ok = vect_analyze_slp (loop_vinfo, NULL);
1604
  if (ok)
1605
    {
1606
      /* Decide which possible SLP instances to SLP.  */
1607
      slp = vect_make_slp_decision (loop_vinfo);
1608
 
1609
      /* Find stmts that need to be both vectorized and SLPed.  */
1610
      vect_detect_hybrid_slp (loop_vinfo);
1611
    }
1612
  else
1613
    return false;
1614
 
1615
  /* Scan all the operations in the loop and make sure they are
1616
     vectorizable.  */
1617
 
1618
  ok = vect_analyze_loop_operations (loop_vinfo, slp);
1619
  if (!ok)
1620
    {
1621
      if (vect_print_dump_info (REPORT_DETAILS))
1622
        fprintf (vect_dump, "bad operation or unsupported loop bound.");
1623
      return false;
1624
    }
1625
 
1626
  return true;
1627
}
1628
 
1629
/* Function vect_analyze_loop.
1630
 
1631
   Apply a set of analyses on LOOP, and create a loop_vec_info struct
1632
   for it.  The different analyses will record information in the
1633
   loop_vec_info struct.  */
1634
loop_vec_info
1635
vect_analyze_loop (struct loop *loop)
1636
{
1637
  loop_vec_info loop_vinfo;
1638
  unsigned int vector_sizes;
1639
 
1640
  /* Autodetect first vector size we try.  */
1641
  current_vector_size = 0;
1642
  vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
1643
 
1644
  if (vect_print_dump_info (REPORT_DETAILS))
1645
    fprintf (vect_dump, "===== analyze_loop_nest =====");
1646
 
1647
  if (loop_outer (loop)
1648
      && loop_vec_info_for_loop (loop_outer (loop))
1649
      && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
1650
    {
1651
      if (vect_print_dump_info (REPORT_DETAILS))
1652
        fprintf (vect_dump, "outer-loop already vectorized.");
1653
      return NULL;
1654
    }
1655
 
1656
  while (1)
1657
    {
1658
      /* Check the CFG characteristics of the loop (nesting, entry/exit).  */
1659
      loop_vinfo = vect_analyze_loop_form (loop);
1660
      if (!loop_vinfo)
1661
        {
1662
          if (vect_print_dump_info (REPORT_DETAILS))
1663
            fprintf (vect_dump, "bad loop form.");
1664
          return NULL;
1665
        }
1666
 
1667
      if (vect_analyze_loop_2 (loop_vinfo))
1668
        {
1669
          LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
1670
 
1671
          return loop_vinfo;
1672
        }
1673
 
1674
      destroy_loop_vec_info (loop_vinfo, true);
1675
 
1676
      vector_sizes &= ~current_vector_size;
1677
      if (vector_sizes == 0
1678
          || current_vector_size == 0)
1679
        return NULL;
1680
 
1681
      /* Try the next biggest vector size.  */
1682
      current_vector_size = 1 << floor_log2 (vector_sizes);
1683
      if (vect_print_dump_info (REPORT_DETAILS))
1684
        fprintf (vect_dump, "***** Re-trying analysis with "
1685
                 "vector size %d\n", current_vector_size);
1686
    }
1687
}
1688
 
1689
 
1690
/* Function reduction_code_for_scalar_code
1691
 
1692
   Input:
1693
   CODE - tree_code of a reduction operations.
1694
 
1695
   Output:
1696
   REDUC_CODE - the corresponding tree-code to be used to reduce the
1697
      vector of partial results into a single scalar result (which
1698
      will also reside in a vector) or ERROR_MARK if the operation is
1699
      a supported reduction operation, but does not have such tree-code.
1700
 
1701
   Return FALSE if CODE currently cannot be vectorized as reduction.  */
1702
 
1703
static bool
1704
reduction_code_for_scalar_code (enum tree_code code,
1705
                                enum tree_code *reduc_code)
1706
{
1707
  switch (code)
1708
    {
1709
      case MAX_EXPR:
1710
        *reduc_code = REDUC_MAX_EXPR;
1711
        return true;
1712
 
1713
      case MIN_EXPR:
1714
        *reduc_code = REDUC_MIN_EXPR;
1715
        return true;
1716
 
1717
      case PLUS_EXPR:
1718
        *reduc_code = REDUC_PLUS_EXPR;
1719
        return true;
1720
 
1721
      case MULT_EXPR:
1722
      case MINUS_EXPR:
1723
      case BIT_IOR_EXPR:
1724
      case BIT_XOR_EXPR:
1725
      case BIT_AND_EXPR:
1726
        *reduc_code = ERROR_MARK;
1727
        return true;
1728
 
1729
      default:
1730
       return false;
1731
    }
1732
}
1733
 
1734
 
1735
/* Error reporting helper for vect_is_simple_reduction below.  GIMPLE statement
1736
   STMT is printed with a message MSG. */
1737
 
1738
static void
1739
report_vect_op (gimple stmt, const char *msg)
1740
{
1741
  fprintf (vect_dump, "%s", msg);
1742
  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1743
}
1744
 
1745
 
1746
/* Detect SLP reduction of the form:
1747
 
1748
   #a1 = phi <a5, a0>
1749
   a2 = operation (a1)
1750
   a3 = operation (a2)
1751
   a4 = operation (a3)
1752
   a5 = operation (a4)
1753
 
1754
   #a = phi <a5>
1755
 
1756
   PHI is the reduction phi node (#a1 = phi <a5, a0> above)
1757
   FIRST_STMT is the first reduction stmt in the chain
1758
   (a2 = operation (a1)).
1759
 
1760
   Return TRUE if a reduction chain was detected.  */
1761
 
1762
static bool
1763
vect_is_slp_reduction (loop_vec_info loop_info, gimple phi, gimple first_stmt)
1764
{
1765
  struct loop *loop = (gimple_bb (phi))->loop_father;
1766
  struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
1767
  enum tree_code code;
1768
  gimple current_stmt = NULL, loop_use_stmt = NULL, first, next_stmt;
1769
  stmt_vec_info use_stmt_info, current_stmt_info;
1770
  tree lhs;
1771
  imm_use_iterator imm_iter;
1772
  use_operand_p use_p;
1773
  int nloop_uses, size = 0, n_out_of_loop_uses;
1774
  bool found = false;
1775
 
1776
  if (loop != vect_loop)
1777
    return false;
1778
 
1779
  lhs = PHI_RESULT (phi);
1780
  code = gimple_assign_rhs_code (first_stmt);
1781
  while (1)
1782
    {
1783
      nloop_uses = 0;
1784
      n_out_of_loop_uses = 0;
1785
      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1786
        {
1787
          gimple use_stmt = USE_STMT (use_p);
1788
          if (is_gimple_debug (use_stmt))
1789
            continue;
1790
 
1791
          use_stmt = USE_STMT (use_p);
1792
 
1793
          /* Check if we got back to the reduction phi.  */
1794
          if (use_stmt == phi)
1795
            {
1796
              loop_use_stmt = use_stmt;
1797
              found = true;
1798
              break;
1799
            }
1800
 
1801
          if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1802
            {
1803
              if (vinfo_for_stmt (use_stmt)
1804
                  && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1805
                {
1806
                  loop_use_stmt = use_stmt;
1807
                  nloop_uses++;
1808
                }
1809
            }
1810
           else
1811
             n_out_of_loop_uses++;
1812
 
1813
           /* There are can be either a single use in the loop or two uses in
1814
              phi nodes.  */
1815
           if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
1816
             return false;
1817
        }
1818
 
1819
      if (found)
1820
        break;
1821
 
1822
      /* We reached a statement with no loop uses.  */
1823
      if (nloop_uses == 0)
1824
        return false;
1825
 
1826
      /* This is a loop exit phi, and we haven't reached the reduction phi.  */
1827
      if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
1828
        return false;
1829
 
1830
      if (!is_gimple_assign (loop_use_stmt)
1831
          || code != gimple_assign_rhs_code (loop_use_stmt)
1832
          || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
1833
        return false;
1834
 
1835
      /* Insert USE_STMT into reduction chain.  */
1836
      use_stmt_info = vinfo_for_stmt (loop_use_stmt);
1837
      if (current_stmt)
1838
        {
1839
          current_stmt_info = vinfo_for_stmt (current_stmt);
1840
          GROUP_NEXT_ELEMENT (current_stmt_info) = loop_use_stmt;
1841
          GROUP_FIRST_ELEMENT (use_stmt_info)
1842
            = GROUP_FIRST_ELEMENT (current_stmt_info);
1843
        }
1844
      else
1845
        GROUP_FIRST_ELEMENT (use_stmt_info) = loop_use_stmt;
1846
 
1847
      lhs = gimple_assign_lhs (loop_use_stmt);
1848
      current_stmt = loop_use_stmt;
1849
      size++;
1850
   }
1851
 
1852
  if (!found || loop_use_stmt != phi || size < 2)
1853
    return false;
1854
 
1855
  /* Swap the operands, if needed, to make the reduction operand be the second
1856
     operand.  */
1857
  lhs = PHI_RESULT (phi);
1858
  next_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
1859
  while (next_stmt)
1860
    {
1861
      if (gimple_assign_rhs2 (next_stmt) == lhs)
1862
        {
1863
          tree op = gimple_assign_rhs1 (next_stmt);
1864
          gimple def_stmt = NULL;
1865
 
1866
          if (TREE_CODE (op) == SSA_NAME)
1867
            def_stmt = SSA_NAME_DEF_STMT (op);
1868
 
1869
          /* Check that the other def is either defined in the loop
1870
             ("vect_internal_def"), or it's an induction (defined by a
1871
             loop-header phi-node).  */
1872
          if (def_stmt
1873
              && gimple_bb (def_stmt)
1874
              && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
1875
              && (is_gimple_assign (def_stmt)
1876
                  || is_gimple_call (def_stmt)
1877
                  || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
1878
                           == vect_induction_def
1879
                  || (gimple_code (def_stmt) == GIMPLE_PHI
1880
                      && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
1881
                                  == vect_internal_def
1882
                      && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
1883
            {
1884
              lhs = gimple_assign_lhs (next_stmt);
1885
              next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1886
              continue;
1887
            }
1888
 
1889
          return false;
1890
        }
1891
      else
1892
        {
1893
          tree op = gimple_assign_rhs2 (next_stmt);
1894
          gimple def_stmt = NULL;
1895
 
1896
          if (TREE_CODE (op) == SSA_NAME)
1897
            def_stmt = SSA_NAME_DEF_STMT (op);
1898
 
1899
          /* Check that the other def is either defined in the loop
1900
            ("vect_internal_def"), or it's an induction (defined by a
1901
            loop-header phi-node).  */
1902
          if (def_stmt
1903
              && gimple_bb (def_stmt)
1904
              && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
1905
              && (is_gimple_assign (def_stmt)
1906
                  || is_gimple_call (def_stmt)
1907
                  || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
1908
                              == vect_induction_def
1909
                  || (gimple_code (def_stmt) == GIMPLE_PHI
1910
                      && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
1911
                                  == vect_internal_def
1912
                      && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
1913
            {
1914
              if (vect_print_dump_info (REPORT_DETAILS))
1915
                {
1916
                  fprintf (vect_dump, "swapping oprnds: ");
1917
                  print_gimple_stmt (vect_dump, next_stmt, 0, TDF_SLIM);
1918
                }
1919
 
1920
              swap_tree_operands (next_stmt,
1921
                                  gimple_assign_rhs1_ptr (next_stmt),
1922
                                  gimple_assign_rhs2_ptr (next_stmt));
1923
              mark_symbols_for_renaming (next_stmt);
1924
            }
1925
          else
1926
            return false;
1927
        }
1928
 
1929
      lhs = gimple_assign_lhs (next_stmt);
1930
      next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1931
    }
1932
 
1933
  /* Save the chain for further analysis in SLP detection.  */
1934
  first = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
1935
  VEC_safe_push (gimple, heap, LOOP_VINFO_REDUCTION_CHAINS (loop_info), first);
1936
  GROUP_SIZE (vinfo_for_stmt (first)) = size;
1937
 
1938
  return true;
1939
}
1940
 
1941
 
1942
/* Function vect_is_simple_reduction_1
1943
 
1944
   (1) Detect a cross-iteration def-use cycle that represents a simple
1945
   reduction computation.  We look for the following pattern:
1946
 
1947
   loop_header:
1948
     a1 = phi < a0, a2 >
1949
     a3 = ...
1950
     a2 = operation (a3, a1)
1951
 
1952
   such that:
1953
   1. operation is commutative and associative and it is safe to
1954
      change the order of the computation (if CHECK_REDUCTION is true)
1955
   2. no uses for a2 in the loop (a2 is used out of the loop)
1956
   3. no uses of a1 in the loop besides the reduction operation
1957
   4. no uses of a1 outside the loop.
1958
 
1959
   Conditions 1,4 are tested here.
1960
   Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
1961
 
1962
   (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
1963
   nested cycles, if CHECK_REDUCTION is false.
1964
 
1965
   (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
1966
   reductions:
1967
 
1968
     a1 = phi < a0, a2 >
1969
     inner loop (def of a3)
1970
     a2 = phi < a3 >
1971
 
1972
   If MODIFY is true it tries also to rework the code in-place to enable
1973
   detection of more reduction patterns.  For the time being we rewrite
1974
   "res -= RHS" into "rhs += -RHS" when it seems worthwhile.
1975
*/
1976
 
1977
static gimple
1978
vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
1979
                            bool check_reduction, bool *double_reduc,
1980
                            bool modify)
1981
{
1982
  struct loop *loop = (gimple_bb (phi))->loop_father;
1983
  struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
1984
  edge latch_e = loop_latch_edge (loop);
1985
  tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
1986
  gimple def_stmt, def1 = NULL, def2 = NULL;
1987
  enum tree_code orig_code, code;
1988
  tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
1989
  tree type;
1990
  int nloop_uses;
1991
  tree name;
1992
  imm_use_iterator imm_iter;
1993
  use_operand_p use_p;
1994
  bool phi_def;
1995
 
1996
  *double_reduc = false;
1997
 
1998
  /* If CHECK_REDUCTION is true, we assume inner-most loop vectorization,
1999
     otherwise, we assume outer loop vectorization.  */
2000
  gcc_assert ((check_reduction && loop == vect_loop)
2001
              || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
2002
 
2003
  name = PHI_RESULT (phi);
2004
  nloop_uses = 0;
2005
  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2006
    {
2007
      gimple use_stmt = USE_STMT (use_p);
2008
      if (is_gimple_debug (use_stmt))
2009
        continue;
2010
 
2011
      if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2012
        {
2013
          if (vect_print_dump_info (REPORT_DETAILS))
2014
            fprintf (vect_dump, "intermediate value used outside loop.");
2015
 
2016
          return NULL;
2017
        }
2018
 
2019
      if (vinfo_for_stmt (use_stmt)
2020
          && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
2021
        nloop_uses++;
2022
      if (nloop_uses > 1)
2023
        {
2024
          if (vect_print_dump_info (REPORT_DETAILS))
2025
            fprintf (vect_dump, "reduction used in loop.");
2026
          return NULL;
2027
        }
2028
    }
2029
 
2030
  if (TREE_CODE (loop_arg) != SSA_NAME)
2031
    {
2032
      if (vect_print_dump_info (REPORT_DETAILS))
2033
        {
2034
          fprintf (vect_dump, "reduction: not ssa_name: ");
2035
          print_generic_expr (vect_dump, loop_arg, TDF_SLIM);
2036
        }
2037
      return NULL;
2038
    }
2039
 
2040
  def_stmt = SSA_NAME_DEF_STMT (loop_arg);
2041
  if (!def_stmt)
2042
    {
2043
      if (vect_print_dump_info (REPORT_DETAILS))
2044
        fprintf (vect_dump, "reduction: no def_stmt.");
2045
      return NULL;
2046
    }
2047
 
2048
  if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
2049
    {
2050
      if (vect_print_dump_info (REPORT_DETAILS))
2051
        print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
2052
      return NULL;
2053
    }
2054
 
2055
  if (is_gimple_assign (def_stmt))
2056
    {
2057
      name = gimple_assign_lhs (def_stmt);
2058
      phi_def = false;
2059
    }
2060
  else
2061
    {
2062
      name = PHI_RESULT (def_stmt);
2063
      phi_def = true;
2064
    }
2065
 
2066
  nloop_uses = 0;
2067
  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2068
    {
2069
      gimple use_stmt = USE_STMT (use_p);
2070
      if (is_gimple_debug (use_stmt))
2071
        continue;
2072
      if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
2073
          && vinfo_for_stmt (use_stmt)
2074
          && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
2075
        nloop_uses++;
2076
      if (nloop_uses > 1)
2077
        {
2078
          if (vect_print_dump_info (REPORT_DETAILS))
2079
            fprintf (vect_dump, "reduction used in loop.");
2080
          return NULL;
2081
        }
2082
    }
2083
 
2084
  /* If DEF_STMT is a phi node itself, we expect it to have a single argument
2085
     defined in the inner loop.  */
2086
  if (phi_def)
2087
    {
2088
      op1 = PHI_ARG_DEF (def_stmt, 0);
2089
 
2090
      if (gimple_phi_num_args (def_stmt) != 1
2091
          || TREE_CODE (op1) != SSA_NAME)
2092
        {
2093
          if (vect_print_dump_info (REPORT_DETAILS))
2094
            fprintf (vect_dump, "unsupported phi node definition.");
2095
 
2096
          return NULL;
2097
        }
2098
 
2099
      def1 = SSA_NAME_DEF_STMT (op1);
2100
      if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2101
          && loop->inner
2102
          && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
2103
          && is_gimple_assign (def1))
2104
        {
2105
          if (vect_print_dump_info (REPORT_DETAILS))
2106
            report_vect_op (def_stmt, "detected double reduction: ");
2107
 
2108
          *double_reduc = true;
2109
          return def_stmt;
2110
        }
2111
 
2112
      return NULL;
2113
    }
2114
 
2115
  code = orig_code = gimple_assign_rhs_code (def_stmt);
2116
 
2117
  /* We can handle "res -= x[i]", which is non-associative by
2118
     simply rewriting this into "res += -x[i]".  Avoid changing
2119
     gimple instruction for the first simple tests and only do this
2120
     if we're allowed to change code at all.  */
2121
  if (code == MINUS_EXPR
2122
      && modify
2123
      && (op1 = gimple_assign_rhs1 (def_stmt))
2124
      && TREE_CODE (op1) == SSA_NAME
2125
      && SSA_NAME_DEF_STMT (op1) == phi)
2126
    code = PLUS_EXPR;
2127
 
2128
  if (check_reduction
2129
      && (!commutative_tree_code (code) || !associative_tree_code (code)))
2130
    {
2131
      if (vect_print_dump_info (REPORT_DETAILS))
2132
        report_vect_op (def_stmt, "reduction: not commutative/associative: ");
2133
      return NULL;
2134
    }
2135
 
2136
  if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
2137
    {
2138
      if (code != COND_EXPR)
2139
        {
2140
          if (vect_print_dump_info (REPORT_DETAILS))
2141
            report_vect_op (def_stmt, "reduction: not binary operation: ");
2142
 
2143
          return NULL;
2144
        }
2145
 
2146
      op3 = gimple_assign_rhs1 (def_stmt);
2147
      if (COMPARISON_CLASS_P (op3))
2148
        {
2149
          op4 = TREE_OPERAND (op3, 1);
2150
          op3 = TREE_OPERAND (op3, 0);
2151
        }
2152
 
2153
      op1 = gimple_assign_rhs2 (def_stmt);
2154
      op2 = gimple_assign_rhs3 (def_stmt);
2155
 
2156
      if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2157
        {
2158
          if (vect_print_dump_info (REPORT_DETAILS))
2159
            report_vect_op (def_stmt, "reduction: uses not ssa_names: ");
2160
 
2161
          return NULL;
2162
        }
2163
    }
2164
  else
2165
    {
2166
      op1 = gimple_assign_rhs1 (def_stmt);
2167
      op2 = gimple_assign_rhs2 (def_stmt);
2168
 
2169
      if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2170
        {
2171
          if (vect_print_dump_info (REPORT_DETAILS))
2172
            report_vect_op (def_stmt, "reduction: uses not ssa_names: ");
2173
 
2174
          return NULL;
2175
        }
2176
   }
2177
 
2178
  type = TREE_TYPE (gimple_assign_lhs (def_stmt));
2179
  if ((TREE_CODE (op1) == SSA_NAME
2180
       && !types_compatible_p (type,TREE_TYPE (op1)))
2181
      || (TREE_CODE (op2) == SSA_NAME
2182
          && !types_compatible_p (type, TREE_TYPE (op2)))
2183
      || (op3 && TREE_CODE (op3) == SSA_NAME
2184
          && !types_compatible_p (type, TREE_TYPE (op3)))
2185
      || (op4 && TREE_CODE (op4) == SSA_NAME
2186
          && !types_compatible_p (type, TREE_TYPE (op4))))
2187
    {
2188
      if (vect_print_dump_info (REPORT_DETAILS))
2189
        {
2190
          fprintf (vect_dump, "reduction: multiple types: operation type: ");
2191
          print_generic_expr (vect_dump, type, TDF_SLIM);
2192
          fprintf (vect_dump, ", operands types: ");
2193
          print_generic_expr (vect_dump, TREE_TYPE (op1), TDF_SLIM);
2194
          fprintf (vect_dump, ",");
2195
          print_generic_expr (vect_dump, TREE_TYPE (op2), TDF_SLIM);
2196
          if (op3)
2197
            {
2198
              fprintf (vect_dump, ",");
2199
              print_generic_expr (vect_dump, TREE_TYPE (op3), TDF_SLIM);
2200
            }
2201
 
2202
          if (op4)
2203
            {
2204
              fprintf (vect_dump, ",");
2205
              print_generic_expr (vect_dump, TREE_TYPE (op4), TDF_SLIM);
2206
            }
2207
        }
2208
 
2209
      return NULL;
2210
    }
2211
 
2212
  /* Check that it's ok to change the order of the computation.
2213
     Generally, when vectorizing a reduction we change the order of the
2214
     computation.  This may change the behavior of the program in some
2215
     cases, so we need to check that this is ok.  One exception is when
2216
     vectorizing an outer-loop: the inner-loop is executed sequentially,
2217
     and therefore vectorizing reductions in the inner-loop during
2218
     outer-loop vectorization is safe.  */
2219
 
2220
  /* CHECKME: check for !flag_finite_math_only too?  */
2221
  if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math
2222
      && check_reduction)
2223
    {
2224
      /* Changing the order of operations changes the semantics.  */
2225
      if (vect_print_dump_info (REPORT_DETAILS))
2226
        report_vect_op (def_stmt, "reduction: unsafe fp math optimization: ");
2227
      return NULL;
2228
    }
2229
  else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)
2230
           && check_reduction)
2231
    {
2232
      /* Changing the order of operations changes the semantics.  */
2233
      if (vect_print_dump_info (REPORT_DETAILS))
2234
        report_vect_op (def_stmt, "reduction: unsafe int math optimization: ");
2235
      return NULL;
2236
    }
2237
  else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction)
2238
    {
2239
      /* Changing the order of operations changes the semantics.  */
2240
      if (vect_print_dump_info (REPORT_DETAILS))
2241
        report_vect_op (def_stmt,
2242
                        "reduction: unsafe fixed-point math optimization: ");
2243
      return NULL;
2244
    }
2245
 
2246
  /* If we detected "res -= x[i]" earlier, rewrite it into
2247
     "res += -x[i]" now.  If this turns out to be useless reassoc
2248
     will clean it up again.  */
2249
  if (orig_code == MINUS_EXPR)
2250
    {
2251
      tree rhs = gimple_assign_rhs2 (def_stmt);
2252
      tree negrhs = make_ssa_name (SSA_NAME_VAR (rhs), NULL);
2253
      gimple negate_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, negrhs,
2254
                                                         rhs, NULL);
2255
      gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
2256
      set_vinfo_for_stmt (negate_stmt, new_stmt_vec_info (negate_stmt,
2257
                                                          loop_info, NULL));
2258
      gsi_insert_before (&gsi, negate_stmt, GSI_NEW_STMT);
2259
      gimple_assign_set_rhs2 (def_stmt, negrhs);
2260
      gimple_assign_set_rhs_code (def_stmt, PLUS_EXPR);
2261
      update_stmt (def_stmt);
2262
    }
2263
 
2264
  /* Reduction is safe. We're dealing with one of the following:
2265
     1) integer arithmetic and no trapv
2266
     2) floating point arithmetic, and special flags permit this optimization
2267
     3) nested cycle (i.e., outer loop vectorization).  */
2268
  if (TREE_CODE (op1) == SSA_NAME)
2269
    def1 = SSA_NAME_DEF_STMT (op1);
2270
 
2271
  if (TREE_CODE (op2) == SSA_NAME)
2272
    def2 = SSA_NAME_DEF_STMT (op2);
2273
 
2274
  if (code != COND_EXPR
2275
      && ((!def1 || gimple_nop_p (def1)) && (!def2 || gimple_nop_p (def2))))
2276
    {
2277
      if (vect_print_dump_info (REPORT_DETAILS))
2278
        report_vect_op (def_stmt, "reduction: no defs for operands: ");
2279
      return NULL;
2280
    }
2281
 
2282
  /* Check that one def is the reduction def, defined by PHI,
2283
     the other def is either defined in the loop ("vect_internal_def"),
2284
     or it's an induction (defined by a loop-header phi-node).  */
2285
 
2286
  if (def2 && def2 == phi
2287
      && (code == COND_EXPR
2288
          || !def1 || gimple_nop_p (def1)
2289
          || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
2290
              && (is_gimple_assign (def1)
2291
                  || is_gimple_call (def1)
2292
                  || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2293
                      == vect_induction_def
2294
                  || (gimple_code (def1) == GIMPLE_PHI
2295
                      && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2296
                          == vect_internal_def
2297
                      && !is_loop_header_bb_p (gimple_bb (def1)))))))
2298
    {
2299
      if (vect_print_dump_info (REPORT_DETAILS))
2300
        report_vect_op (def_stmt, "detected reduction: ");
2301
      return def_stmt;
2302
    }
2303
 
2304
  if (def1 && def1 == phi
2305
      && (code == COND_EXPR
2306
          || !def2 || gimple_nop_p (def2)
2307
          || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
2308
              && (is_gimple_assign (def2)
2309
                  || is_gimple_call (def2)
2310
                  || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2311
                      == vect_induction_def
2312
                  || (gimple_code (def2) == GIMPLE_PHI
2313
                      && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2314
                          == vect_internal_def
2315
                      && !is_loop_header_bb_p (gimple_bb (def2)))))))
2316
    {
2317
      if (check_reduction)
2318
        {
2319
          /* Swap operands (just for simplicity - so that the rest of the code
2320
             can assume that the reduction variable is always the last (second)
2321
             argument).  */
2322
          if (vect_print_dump_info (REPORT_DETAILS))
2323
            report_vect_op (def_stmt,
2324
                            "detected reduction: need to swap operands: ");
2325
 
2326
          swap_tree_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
2327
                              gimple_assign_rhs2_ptr (def_stmt));
2328
        }
2329
      else
2330
        {
2331
          if (vect_print_dump_info (REPORT_DETAILS))
2332
            report_vect_op (def_stmt, "detected reduction: ");
2333
        }
2334
 
2335
      return def_stmt;
2336
    }
2337
 
2338
  /* Try to find SLP reduction chain.  */
2339
  if (check_reduction && vect_is_slp_reduction (loop_info, phi, def_stmt))
2340
    {
2341
      if (vect_print_dump_info (REPORT_DETAILS))
2342
        report_vect_op (def_stmt, "reduction: detected reduction chain: ");
2343
 
2344
      return def_stmt;
2345
    }
2346
 
2347
  if (vect_print_dump_info (REPORT_DETAILS))
2348
    report_vect_op (def_stmt, "reduction: unknown pattern: ");
2349
 
2350
  return NULL;
2351
}
2352
 
2353
/* Wrapper around vect_is_simple_reduction_1, that won't modify code
2354
   in-place.  Arguments as there.  */
2355
 
2356
static gimple
2357
vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
2358
                          bool check_reduction, bool *double_reduc)
2359
{
2360
  return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2361
                                     double_reduc, false);
2362
}
2363
 
2364
/* Wrapper around vect_is_simple_reduction_1, which will modify code
2365
   in-place if it enables detection of more reductions.  Arguments
2366
   as there.  */
2367
 
2368
gimple
2369
vect_force_simple_reduction (loop_vec_info loop_info, gimple phi,
2370
                          bool check_reduction, bool *double_reduc)
2371
{
2372
  return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2373
                                     double_reduc, true);
2374
}
2375
 
2376
/* Calculate the cost of one scalar iteration of the loop.  */
2377
int
2378
vect_get_single_scalar_iteraion_cost (loop_vec_info loop_vinfo)
2379
{
2380
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2381
  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2382
  int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
2383
  int innerloop_iters, i, stmt_cost;
2384
 
2385
  /* Count statements in scalar loop.  Using this as scalar cost for a single
2386
     iteration for now.
2387
 
2388
     TODO: Add outer loop support.
2389
 
2390
     TODO: Consider assigning different costs to different scalar
2391
     statements.  */
2392
 
2393
  /* FORNOW.  */
2394
  innerloop_iters = 1;
2395
  if (loop->inner)
2396
    innerloop_iters = 50; /* FIXME */
2397
 
2398
  for (i = 0; i < nbbs; i++)
2399
    {
2400
      gimple_stmt_iterator si;
2401
      basic_block bb = bbs[i];
2402
 
2403
      if (bb->loop_father == loop->inner)
2404
        factor = innerloop_iters;
2405
      else
2406
        factor = 1;
2407
 
2408
      for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2409
        {
2410
          gimple stmt = gsi_stmt (si);
2411
          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2412
 
2413
          if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
2414
            continue;
2415
 
2416
          /* Skip stmts that are not vectorized inside the loop.  */
2417
          if (stmt_info
2418
              && !STMT_VINFO_RELEVANT_P (stmt_info)
2419
              && (!STMT_VINFO_LIVE_P (stmt_info)
2420
                  || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
2421
              && !STMT_VINFO_IN_PATTERN_P (stmt_info))
2422
            continue;
2423
 
2424
          if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
2425
            {
2426
              if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
2427
               stmt_cost = vect_get_cost (scalar_load);
2428
             else
2429
               stmt_cost = vect_get_cost (scalar_store);
2430
            }
2431
          else
2432
            stmt_cost = vect_get_cost (scalar_stmt);
2433
 
2434
          scalar_single_iter_cost += stmt_cost * factor;
2435
        }
2436
    }
2437
  return scalar_single_iter_cost;
2438
}
2439
 
2440
/* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times.  */
2441
int
2442
vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
2443
                             int *peel_iters_epilogue,
2444
                             int scalar_single_iter_cost)
2445
{
2446
  int peel_guard_costs = 0;
2447
  int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2448
 
2449
  if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2450
    {
2451
      *peel_iters_epilogue = vf/2;
2452
      if (vect_print_dump_info (REPORT_COST))
2453
        fprintf (vect_dump, "cost model: "
2454
                            "epilogue peel iters set to vf/2 because "
2455
                            "loop iterations are unknown .");
2456
 
2457
      /* If peeled iterations are known but number of scalar loop
2458
         iterations are unknown, count a taken branch per peeled loop.  */
2459
      peel_guard_costs =  2 * vect_get_cost (cond_branch_taken);
2460
    }
2461
  else
2462
    {
2463
      int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
2464
      peel_iters_prologue = niters < peel_iters_prologue ?
2465
                            niters : peel_iters_prologue;
2466
      *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
2467
      /* If we need to peel for gaps, but no peeling is required, we have to
2468
         peel VF iterations.  */
2469
      if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
2470
        *peel_iters_epilogue = vf;
2471
    }
2472
 
2473
   return (peel_iters_prologue * scalar_single_iter_cost)
2474
            + (*peel_iters_epilogue * scalar_single_iter_cost)
2475
           + peel_guard_costs;
2476
}
2477
 
2478
/* Function vect_estimate_min_profitable_iters
2479
 
2480
   Return the number of iterations required for the vector version of the
2481
   loop to be profitable relative to the cost of the scalar version of the
2482
   loop.
2483
 
2484
   TODO: Take profile info into account before making vectorization
2485
   decisions, if available.  */
2486
 
2487
int
2488
vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
2489
{
2490
  int i;
2491
  int min_profitable_iters;
2492
  int peel_iters_prologue;
2493
  int peel_iters_epilogue;
2494
  int vec_inside_cost = 0;
2495
  int vec_outside_cost = 0;
2496
  int scalar_single_iter_cost = 0;
2497
  int scalar_outside_cost = 0;
2498
  int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2499
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2500
  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2501
  int nbbs = loop->num_nodes;
2502
  int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2503
  int peel_guard_costs = 0;
2504
  int innerloop_iters = 0, factor;
2505
  VEC (slp_instance, heap) *slp_instances;
2506
  slp_instance instance;
2507
 
2508
  /* Cost model disabled.  */
2509
  if (!flag_vect_cost_model)
2510
    {
2511
      if (vect_print_dump_info (REPORT_COST))
2512
        fprintf (vect_dump, "cost model disabled.");
2513
      return 0;
2514
    }
2515
 
2516
  /* Requires loop versioning tests to handle misalignment.  */
2517
  if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2518
    {
2519
      /*  FIXME: Make cost depend on complexity of individual check.  */
2520
      vec_outside_cost +=
2521
        VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
2522
      if (vect_print_dump_info (REPORT_COST))
2523
        fprintf (vect_dump, "cost model: Adding cost of checks for loop "
2524
                 "versioning to treat misalignment.\n");
2525
    }
2526
 
2527
  /* Requires loop versioning with alias checks.  */
2528
  if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2529
    {
2530
      /*  FIXME: Make cost depend on complexity of individual check.  */
2531
      vec_outside_cost +=
2532
        VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
2533
      if (vect_print_dump_info (REPORT_COST))
2534
        fprintf (vect_dump, "cost model: Adding cost of checks for loop "
2535
                 "versioning aliasing.\n");
2536
    }
2537
 
2538
  if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2539
      || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2540
    vec_outside_cost += vect_get_cost (cond_branch_taken);
2541
 
2542
  /* Count statements in scalar loop.  Using this as scalar cost for a single
2543
     iteration for now.
2544
 
2545
     TODO: Add outer loop support.
2546
 
2547
     TODO: Consider assigning different costs to different scalar
2548
     statements.  */
2549
 
2550
  /* FORNOW.  */
2551
  if (loop->inner)
2552
    innerloop_iters = 50; /* FIXME */
2553
 
2554
  for (i = 0; i < nbbs; i++)
2555
    {
2556
      gimple_stmt_iterator si;
2557
      basic_block bb = bbs[i];
2558
 
2559
      if (bb->loop_father == loop->inner)
2560
        factor = innerloop_iters;
2561
      else
2562
        factor = 1;
2563
 
2564
      for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2565
        {
2566
          gimple stmt = gsi_stmt (si);
2567
          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2568
 
2569
          if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2570
            {
2571
              stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2572
              stmt_info = vinfo_for_stmt (stmt);
2573
            }
2574
 
2575
          /* Skip stmts that are not vectorized inside the loop.  */
2576
          if (!STMT_VINFO_RELEVANT_P (stmt_info)
2577
              && (!STMT_VINFO_LIVE_P (stmt_info)
2578
                 || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))))
2579
            continue;
2580
 
2581
          vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor;
2582
          /* FIXME: for stmts in the inner-loop in outer-loop vectorization,
2583
             some of the "outside" costs are generated inside the outer-loop.  */
2584
          vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
2585
          if (is_pattern_stmt_p (stmt_info)
2586
              && STMT_VINFO_PATTERN_DEF_SEQ (stmt_info))
2587
            {
2588
              gimple_stmt_iterator gsi;
2589
 
2590
              for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
2591
                   !gsi_end_p (gsi); gsi_next (&gsi))
2592
                {
2593
                  gimple pattern_def_stmt = gsi_stmt (gsi);
2594
                  stmt_vec_info pattern_def_stmt_info
2595
                    = vinfo_for_stmt (pattern_def_stmt);
2596
                  if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
2597
                      || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
2598
                    {
2599
                      vec_inside_cost
2600
                        += STMT_VINFO_INSIDE_OF_LOOP_COST
2601
                           (pattern_def_stmt_info) * factor;
2602
                      vec_outside_cost
2603
                        += STMT_VINFO_OUTSIDE_OF_LOOP_COST
2604
                           (pattern_def_stmt_info);
2605
                    }
2606
                }
2607
            }
2608
        }
2609
    }
2610
 
2611
  scalar_single_iter_cost = vect_get_single_scalar_iteraion_cost (loop_vinfo);
2612
 
2613
  /* Add additional cost for the peeled instructions in prologue and epilogue
2614
     loop.
2615
 
2616
     FORNOW: If we don't know the value of peel_iters for prologue or epilogue
2617
     at compile-time - we assume it's vf/2 (the worst would be vf-1).
2618
 
2619
     TODO: Build an expression that represents peel_iters for prologue and
2620
     epilogue to be used in a run-time test.  */
2621
 
2622
  if (npeel  < 0)
2623
    {
2624
      peel_iters_prologue = vf/2;
2625
      if (vect_print_dump_info (REPORT_COST))
2626
        fprintf (vect_dump, "cost model: "
2627
                 "prologue peel iters set to vf/2.");
2628
 
2629
      /* If peeling for alignment is unknown, loop bound of main loop becomes
2630
         unknown.  */
2631
      peel_iters_epilogue = vf/2;
2632
      if (vect_print_dump_info (REPORT_COST))
2633
        fprintf (vect_dump, "cost model: "
2634
                 "epilogue peel iters set to vf/2 because "
2635
                 "peeling for alignment is unknown .");
2636
 
2637
      /* If peeled iterations are unknown, count a taken branch and a not taken
2638
         branch per peeled loop. Even if scalar loop iterations are known,
2639
         vector iterations are not known since peeled prologue iterations are
2640
         not known. Hence guards remain the same.  */
2641
      peel_guard_costs +=  2 * (vect_get_cost (cond_branch_taken)
2642
                                + vect_get_cost (cond_branch_not_taken));
2643
      vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
2644
                           + (peel_iters_epilogue * scalar_single_iter_cost)
2645
                           + peel_guard_costs;
2646
    }
2647
  else
2648
    {
2649
      peel_iters_prologue = npeel;
2650
      vec_outside_cost += vect_get_known_peeling_cost (loop_vinfo,
2651
                                    peel_iters_prologue, &peel_iters_epilogue,
2652
                                    scalar_single_iter_cost);
2653
    }
2654
 
2655
  /* FORNOW: The scalar outside cost is incremented in one of the
2656
     following ways:
2657
 
2658
     1. The vectorizer checks for alignment and aliasing and generates
2659
     a condition that allows dynamic vectorization.  A cost model
2660
     check is ANDED with the versioning condition.  Hence scalar code
2661
     path now has the added cost of the versioning check.
2662
 
2663
       if (cost > th & versioning_check)
2664
         jmp to vector code
2665
 
2666
     Hence run-time scalar is incremented by not-taken branch cost.
2667
 
2668
     2. The vectorizer then checks if a prologue is required.  If the
2669
     cost model check was not done before during versioning, it has to
2670
     be done before the prologue check.
2671
 
2672
       if (cost <= th)
2673
         prologue = scalar_iters
2674
       if (prologue == 0)
2675
         jmp to vector code
2676
       else
2677
         execute prologue
2678
       if (prologue == num_iters)
2679
         go to exit
2680
 
2681
     Hence the run-time scalar cost is incremented by a taken branch,
2682
     plus a not-taken branch, plus a taken branch cost.
2683
 
2684
     3. The vectorizer then checks if an epilogue is required.  If the
2685
     cost model check was not done before during prologue check, it
2686
     has to be done with the epilogue check.
2687
 
2688
       if (prologue == 0)
2689
         jmp to vector code
2690
       else
2691
         execute prologue
2692
       if (prologue == num_iters)
2693
         go to exit
2694
       vector code:
2695
         if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
2696
           jmp to epilogue
2697
 
2698
     Hence the run-time scalar cost should be incremented by 2 taken
2699
     branches.
2700
 
2701
     TODO: The back end may reorder the BBS's differently and reverse
2702
     conditions/branch directions.  Change the estimates below to
2703
     something more reasonable.  */
2704
 
2705
  /* If the number of iterations is known and we do not do versioning, we can
2706
     decide whether to vectorize at compile time.  Hence the scalar version
2707
     do not carry cost model guard costs.  */
2708
  if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2709
      || LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2710
      || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2711
    {
2712
      /* Cost model check occurs at versioning.  */
2713
      if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2714
          || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2715
        scalar_outside_cost += vect_get_cost (cond_branch_not_taken);
2716
      else
2717
        {
2718
          /* Cost model check occurs at prologue generation.  */
2719
          if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
2720
            scalar_outside_cost += 2 * vect_get_cost (cond_branch_taken)
2721
                                   + vect_get_cost (cond_branch_not_taken);
2722
          /* Cost model check occurs at epilogue generation.  */
2723
          else
2724
            scalar_outside_cost += 2 * vect_get_cost (cond_branch_taken);
2725
        }
2726
    }
2727
 
2728
  /* Add SLP costs.  */
2729
  slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2730
  FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2731
    {
2732
      vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
2733
      vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
2734
    }
2735
 
2736
  /* Calculate number of iterations required to make the vector version
2737
     profitable, relative to the loop bodies only.  The following condition
2738
     must hold true:
2739
     SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
2740
     where
2741
     SIC = scalar iteration cost, VIC = vector iteration cost,
2742
     VOC = vector outside cost, VF = vectorization factor,
2743
     PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
2744
     SOC = scalar outside cost for run time cost model check.  */
2745
 
2746
  if ((scalar_single_iter_cost * vf) > vec_inside_cost)
2747
    {
2748
      if (vec_outside_cost <= 0)
2749
        min_profitable_iters = 1;
2750
      else
2751
        {
2752
          min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf
2753
                                  - vec_inside_cost * peel_iters_prologue
2754
                                  - vec_inside_cost * peel_iters_epilogue)
2755
                                 / ((scalar_single_iter_cost * vf)
2756
                                    - vec_inside_cost);
2757
 
2758
          if ((scalar_single_iter_cost * vf * min_profitable_iters)
2759
              <= ((vec_inside_cost * min_profitable_iters)
2760
                  + ((vec_outside_cost - scalar_outside_cost) * vf)))
2761
            min_profitable_iters++;
2762
        }
2763
    }
2764
  /* vector version will never be profitable.  */
2765
  else
2766
    {
2767
      if (vect_print_dump_info (REPORT_COST))
2768
        fprintf (vect_dump, "cost model: the vector iteration cost = %d "
2769
                 "divided by the scalar iteration cost = %d "
2770
                 "is greater or equal to the vectorization factor = %d.",
2771
                 vec_inside_cost, scalar_single_iter_cost, vf);
2772
      return -1;
2773
    }
2774
 
2775
  if (vect_print_dump_info (REPORT_COST))
2776
    {
2777
      fprintf (vect_dump, "Cost model analysis: \n");
2778
      fprintf (vect_dump, "  Vector inside of loop cost: %d\n",
2779
               vec_inside_cost);
2780
      fprintf (vect_dump, "  Vector outside of loop cost: %d\n",
2781
               vec_outside_cost);
2782
      fprintf (vect_dump, "  Scalar iteration cost: %d\n",
2783
               scalar_single_iter_cost);
2784
      fprintf (vect_dump, "  Scalar outside cost: %d\n", scalar_outside_cost);
2785
      fprintf (vect_dump, "  prologue iterations: %d\n",
2786
               peel_iters_prologue);
2787
      fprintf (vect_dump, "  epilogue iterations: %d\n",
2788
               peel_iters_epilogue);
2789
      fprintf (vect_dump, "  Calculated minimum iters for profitability: %d\n",
2790
               min_profitable_iters);
2791
    }
2792
 
2793
  min_profitable_iters =
2794
        min_profitable_iters < vf ? vf : min_profitable_iters;
2795
 
2796
  /* Because the condition we create is:
2797
     if (niters <= min_profitable_iters)
2798
       then skip the vectorized loop.  */
2799
  min_profitable_iters--;
2800
 
2801
  if (vect_print_dump_info (REPORT_COST))
2802
    fprintf (vect_dump, "  Profitability threshold = %d\n",
2803
             min_profitable_iters);
2804
 
2805
  return min_profitable_iters;
2806
}
2807
 
2808
 
2809
/* TODO: Close dependency between vect_model_*_cost and vectorizable_*
2810
   functions. Design better to avoid maintenance issues.  */
2811
 
2812
/* Function vect_model_reduction_cost.
2813
 
2814
   Models cost for a reduction operation, including the vector ops
2815
   generated within the strip-mine loop, the initial definition before
2816
   the loop, and the epilogue code that must be generated.  */
2817
 
2818
static bool
2819
vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
2820
                           int ncopies)
2821
{
2822
  int outer_cost = 0;
2823
  enum tree_code code;
2824
  optab optab;
2825
  tree vectype;
2826
  gimple stmt, orig_stmt;
2827
  tree reduction_op;
2828
  enum machine_mode mode;
2829
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2830
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2831
 
2832
 
2833
  /* Cost of reduction op inside loop.  */
2834
  STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info)
2835
    += ncopies * vect_get_cost (vector_stmt);
2836
 
2837
  stmt = STMT_VINFO_STMT (stmt_info);
2838
 
2839
  switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
2840
    {
2841
    case GIMPLE_SINGLE_RHS:
2842
      gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)) == ternary_op);
2843
      reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), 2);
2844
      break;
2845
    case GIMPLE_UNARY_RHS:
2846
      reduction_op = gimple_assign_rhs1 (stmt);
2847
      break;
2848
    case GIMPLE_BINARY_RHS:
2849
      reduction_op = gimple_assign_rhs2 (stmt);
2850
      break;
2851
    case GIMPLE_TERNARY_RHS:
2852
      reduction_op = gimple_assign_rhs3 (stmt);
2853
      break;
2854
    default:
2855
      gcc_unreachable ();
2856
    }
2857
 
2858
  vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
2859
  if (!vectype)
2860
    {
2861
      if (vect_print_dump_info (REPORT_COST))
2862
        {
2863
          fprintf (vect_dump, "unsupported data-type ");
2864
          print_generic_expr (vect_dump, TREE_TYPE (reduction_op), TDF_SLIM);
2865
        }
2866
      return false;
2867
   }
2868
 
2869
  mode = TYPE_MODE (vectype);
2870
  orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2871
 
2872
  if (!orig_stmt)
2873
    orig_stmt = STMT_VINFO_STMT (stmt_info);
2874
 
2875
  code = gimple_assign_rhs_code (orig_stmt);
2876
 
2877
  /* Add in cost for initial definition.  */
2878
  outer_cost += vect_get_cost (scalar_to_vec);
2879
 
2880
  /* Determine cost of epilogue code.
2881
 
2882
     We have a reduction operator that will reduce the vector in one statement.
2883
     Also requires scalar extract.  */
2884
 
2885
  if (!nested_in_vect_loop_p (loop, orig_stmt))
2886
    {
2887
      if (reduc_code != ERROR_MARK)
2888
        outer_cost += vect_get_cost (vector_stmt)
2889
                      + vect_get_cost (vec_to_scalar);
2890
      else
2891
        {
2892
          int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
2893
          tree bitsize =
2894
            TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt)));
2895
          int element_bitsize = tree_low_cst (bitsize, 1);
2896
          int nelements = vec_size_in_bits / element_bitsize;
2897
 
2898
          optab = optab_for_tree_code (code, vectype, optab_default);
2899
 
2900
          /* We have a whole vector shift available.  */
2901
          if (VECTOR_MODE_P (mode)
2902
              && optab_handler (optab, mode) != CODE_FOR_nothing
2903
              && optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
2904
            /* Final reduction via vector shifts and the reduction operator. Also
2905
               requires scalar extract.  */
2906
            outer_cost += ((exact_log2(nelements) * 2)
2907
              * vect_get_cost (vector_stmt)
2908
              + vect_get_cost (vec_to_scalar));
2909
          else
2910
            /* Use extracts and reduction op for final reduction.  For N elements,
2911
               we have N extracts and N-1 reduction ops.  */
2912
            outer_cost += ((nelements + nelements - 1)
2913
              * vect_get_cost (vector_stmt));
2914
        }
2915
    }
2916
 
2917
  STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
2918
 
2919
  if (vect_print_dump_info (REPORT_COST))
2920
    fprintf (vect_dump, "vect_model_reduction_cost: inside_cost = %d, "
2921
             "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
2922
             STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
2923
 
2924
  return true;
2925
}
2926
 
2927
 
2928
/* Function vect_model_induction_cost.
2929
 
2930
   Models cost for induction operations.  */
2931
 
2932
static void
2933
vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
2934
{
2935
  /* loop cost for vec_loop.  */
2936
  STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info)
2937
    = ncopies * vect_get_cost (vector_stmt);
2938
  /* prologue cost for vec_init and vec_step.  */
2939
  STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info)
2940
    = 2 * vect_get_cost (scalar_to_vec);
2941
 
2942
  if (vect_print_dump_info (REPORT_COST))
2943
    fprintf (vect_dump, "vect_model_induction_cost: inside_cost = %d, "
2944
             "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
2945
             STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
2946
}
2947
 
2948
 
2949
/* Function get_initial_def_for_induction
2950
 
2951
   Input:
2952
   STMT - a stmt that performs an induction operation in the loop.
2953
   IV_PHI - the initial value of the induction variable
2954
 
2955
   Output:
2956
   Return a vector variable, initialized with the first VF values of
2957
   the induction variable.  E.g., for an iv with IV_PHI='X' and
2958
   evolution S, for a vector of 4 units, we want to return:
2959
   [X, X + S, X + 2*S, X + 3*S].  */
2960
 
2961
static tree
2962
get_initial_def_for_induction (gimple iv_phi)
2963
{
2964
  stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
2965
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2966
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2967
  tree scalar_type;
2968
  tree vectype;
2969
  int nunits;
2970
  edge pe = loop_preheader_edge (loop);
2971
  struct loop *iv_loop;
2972
  basic_block new_bb;
2973
  tree vec, vec_init, vec_step, t;
2974
  tree access_fn;
2975
  tree new_var;
2976
  tree new_name;
2977
  gimple init_stmt, induction_phi, new_stmt;
2978
  tree induc_def, vec_def, vec_dest;
2979
  tree init_expr, step_expr;
2980
  int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2981
  int i;
2982
  bool ok;
2983
  int ncopies;
2984
  tree expr;
2985
  stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
2986
  bool nested_in_vect_loop = false;
2987
  gimple_seq stmts = NULL;
2988
  imm_use_iterator imm_iter;
2989
  use_operand_p use_p;
2990
  gimple exit_phi;
2991
  edge latch_e;
2992
  tree loop_arg;
2993
  gimple_stmt_iterator si;
2994
  basic_block bb = gimple_bb (iv_phi);
2995
  tree stepvectype;
2996
  tree resvectype;
2997
 
2998
  /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop?  */
2999
  if (nested_in_vect_loop_p (loop, iv_phi))
3000
    {
3001
      nested_in_vect_loop = true;
3002
      iv_loop = loop->inner;
3003
    }
3004
  else
3005
    iv_loop = loop;
3006
  gcc_assert (iv_loop == (gimple_bb (iv_phi))->loop_father);
3007
 
3008
  latch_e = loop_latch_edge (iv_loop);
3009
  loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
3010
 
3011
  access_fn = analyze_scalar_evolution (iv_loop, PHI_RESULT (iv_phi));
3012
  gcc_assert (access_fn);
3013
  STRIP_NOPS (access_fn);
3014
  ok = vect_is_simple_iv_evolution (iv_loop->num, access_fn,
3015
                                    &init_expr, &step_expr);
3016
  gcc_assert (ok);
3017
  pe = loop_preheader_edge (iv_loop);
3018
 
3019
  scalar_type = TREE_TYPE (init_expr);
3020
  vectype = get_vectype_for_scalar_type (scalar_type);
3021
  resvectype = get_vectype_for_scalar_type (TREE_TYPE (PHI_RESULT (iv_phi)));
3022
  gcc_assert (vectype);
3023
  nunits = TYPE_VECTOR_SUBPARTS (vectype);
3024
  ncopies = vf / nunits;
3025
 
3026
  gcc_assert (phi_info);
3027
  gcc_assert (ncopies >= 1);
3028
 
3029
  /* Find the first insertion point in the BB.  */
3030
  si = gsi_after_labels (bb);
3031
 
3032
  /* Create the vector that holds the initial_value of the induction.  */
3033
  if (nested_in_vect_loop)
3034
    {
3035
      /* iv_loop is nested in the loop to be vectorized.  init_expr had already
3036
         been created during vectorization of previous stmts.  We obtain it
3037
         from the STMT_VINFO_VEC_STMT of the defining stmt.  */
3038
      tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi,
3039
                                           loop_preheader_edge (iv_loop));
3040
      vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);
3041
    }
3042
  else
3043
    {
3044
      /* iv_loop is the loop to be vectorized. Create:
3045
         vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr)  */
3046
      new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_");
3047
      add_referenced_var (new_var);
3048
 
3049
      new_name = force_gimple_operand (init_expr, &stmts, false, new_var);
3050
      if (stmts)
3051
        {
3052
          new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3053
          gcc_assert (!new_bb);
3054
        }
3055
 
3056
      t = NULL_TREE;
3057
      t = tree_cons (NULL_TREE, new_name, t);
3058
      for (i = 1; i < nunits; i++)
3059
        {
3060
          /* Create: new_name_i = new_name + step_expr  */
3061
          enum tree_code code = POINTER_TYPE_P (scalar_type)
3062
                                ? POINTER_PLUS_EXPR : PLUS_EXPR;
3063
          init_stmt = gimple_build_assign_with_ops (code, new_var,
3064
                                                    new_name, step_expr);
3065
          new_name = make_ssa_name (new_var, init_stmt);
3066
          gimple_assign_set_lhs (init_stmt, new_name);
3067
 
3068
          new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
3069
          gcc_assert (!new_bb);
3070
 
3071
          if (vect_print_dump_info (REPORT_DETAILS))
3072
            {
3073
              fprintf (vect_dump, "created new init_stmt: ");
3074
              print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
3075
            }
3076
          t = tree_cons (NULL_TREE, new_name, t);
3077
        }
3078
      /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1]  */
3079
      vec = build_constructor_from_list (vectype, nreverse (t));
3080
      vec_init = vect_init_vector (iv_phi, vec, vectype, NULL);
3081
    }
3082
 
3083
 
3084
  /* Create the vector that holds the step of the induction.  */
3085
  if (nested_in_vect_loop)
3086
    /* iv_loop is nested in the loop to be vectorized. Generate:
3087
       vec_step = [S, S, S, S]  */
3088
    new_name = step_expr;
3089
  else
3090
    {
3091
      /* iv_loop is the loop to be vectorized. Generate:
3092
          vec_step = [VF*S, VF*S, VF*S, VF*S]  */
3093
      expr = build_int_cst (TREE_TYPE (step_expr), vf);
3094
      new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3095
                              expr, step_expr);
3096
    }
3097
 
3098
  t = unshare_expr (new_name);
3099
  gcc_assert (CONSTANT_CLASS_P (new_name));
3100
  stepvectype = get_vectype_for_scalar_type (TREE_TYPE (new_name));
3101
  gcc_assert (stepvectype);
3102
  vec = build_vector_from_val (stepvectype, t);
3103
  vec_step = vect_init_vector (iv_phi, vec, stepvectype, NULL);
3104
 
3105
 
3106
  /* Create the following def-use cycle:
3107
     loop prolog:
3108
         vec_init = ...
3109
         vec_step = ...
3110
     loop:
3111
         vec_iv = PHI <vec_init, vec_loop>
3112
         ...
3113
         STMT
3114
         ...
3115
         vec_loop = vec_iv + vec_step;  */
3116
 
3117
  /* Create the induction-phi that defines the induction-operand.  */
3118
  vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
3119
  add_referenced_var (vec_dest);
3120
  induction_phi = create_phi_node (vec_dest, iv_loop->header);
3121
  set_vinfo_for_stmt (induction_phi,
3122
                      new_stmt_vec_info (induction_phi, loop_vinfo, NULL));
3123
  induc_def = PHI_RESULT (induction_phi);
3124
 
3125
  /* Create the iv update inside the loop  */
3126
  new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
3127
                                           induc_def, vec_step);
3128
  vec_def = make_ssa_name (vec_dest, new_stmt);
3129
  gimple_assign_set_lhs (new_stmt, vec_def);
3130
  gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3131
  set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo,
3132
                                                   NULL));
3133
 
3134
  /* Set the arguments of the phi node:  */
3135
  add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
3136
  add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
3137
               UNKNOWN_LOCATION);
3138
 
3139
 
3140
  /* In case that vectorization factor (VF) is bigger than the number
3141
     of elements that we can fit in a vectype (nunits), we have to generate
3142
     more than one vector stmt - i.e - we need to "unroll" the
3143
     vector stmt by a factor VF/nunits.  For more details see documentation
3144
     in vectorizable_operation.  */
3145
 
3146
  if (ncopies > 1)
3147
    {
3148
      stmt_vec_info prev_stmt_vinfo;
3149
      /* FORNOW. This restriction should be relaxed.  */
3150
      gcc_assert (!nested_in_vect_loop);
3151
 
3152
      /* Create the vector that holds the step of the induction.  */
3153
      expr = build_int_cst (TREE_TYPE (step_expr), nunits);
3154
      new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3155
                              expr, step_expr);
3156
      t = unshare_expr (new_name);
3157
      gcc_assert (CONSTANT_CLASS_P (new_name));
3158
      vec = build_vector_from_val (stepvectype, t);
3159
      vec_step = vect_init_vector (iv_phi, vec, stepvectype, NULL);
3160
 
3161
      vec_def = induc_def;
3162
      prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
3163
      for (i = 1; i < ncopies; i++)
3164
        {
3165
          /* vec_i = vec_prev + vec_step  */
3166
          new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
3167
                                                   vec_def, vec_step);
3168
          vec_def = make_ssa_name (vec_dest, new_stmt);
3169
          gimple_assign_set_lhs (new_stmt, vec_def);
3170
 
3171
          gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3172
          if (!useless_type_conversion_p (resvectype, vectype))
3173
            {
3174
              new_stmt = gimple_build_assign_with_ops
3175
                  (VIEW_CONVERT_EXPR,
3176
                   vect_get_new_vect_var (resvectype, vect_simple_var,
3177
                                          "vec_iv_"),
3178
                   build1 (VIEW_CONVERT_EXPR, resvectype,
3179
                           gimple_assign_lhs (new_stmt)), NULL_TREE);
3180
              gimple_assign_set_lhs (new_stmt,
3181
                                     make_ssa_name
3182
                                       (gimple_assign_lhs (new_stmt), new_stmt));
3183
              gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3184
            }
3185
          set_vinfo_for_stmt (new_stmt,
3186
                              new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3187
          STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
3188
          prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
3189
        }
3190
    }
3191
 
3192
  if (nested_in_vect_loop)
3193
    {
3194
      /* Find the loop-closed exit-phi of the induction, and record
3195
         the final vector of induction results:  */
3196
      exit_phi = NULL;
3197
      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
3198
        {
3199
          if (!flow_bb_inside_loop_p (iv_loop, gimple_bb (USE_STMT (use_p))))
3200
            {
3201
              exit_phi = USE_STMT (use_p);
3202
              break;
3203
            }
3204
        }
3205
      if (exit_phi)
3206
        {
3207
          stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
3208
          /* FORNOW. Currently not supporting the case that an inner-loop induction
3209
             is not used in the outer-loop (i.e. only outside the outer-loop).  */
3210
          gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
3211
                      && !STMT_VINFO_LIVE_P (stmt_vinfo));
3212
 
3213
          STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
3214
          if (vect_print_dump_info (REPORT_DETAILS))
3215
            {
3216
              fprintf (vect_dump, "vector of inductions after inner-loop:");
3217
              print_gimple_stmt (vect_dump, new_stmt, 0, TDF_SLIM);
3218
            }
3219
        }
3220
    }
3221
 
3222
 
3223
  if (vect_print_dump_info (REPORT_DETAILS))
3224
    {
3225
      fprintf (vect_dump, "transform induction: created def-use cycle: ");
3226
      print_gimple_stmt (vect_dump, induction_phi, 0, TDF_SLIM);
3227
      fprintf (vect_dump, "\n");
3228
      print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (vec_def), 0, TDF_SLIM);
3229
    }
3230
 
3231
  STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
3232
  if (!useless_type_conversion_p (resvectype, vectype))
3233
    {
3234
      new_stmt = gimple_build_assign_with_ops
3235
         (VIEW_CONVERT_EXPR,
3236
          vect_get_new_vect_var (resvectype, vect_simple_var, "vec_iv_"),
3237
          build1 (VIEW_CONVERT_EXPR, resvectype, induc_def), NULL_TREE);
3238
      induc_def = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
3239
      gimple_assign_set_lhs (new_stmt, induc_def);
3240
      si = gsi_start_bb (bb);
3241
      gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3242
      set_vinfo_for_stmt (new_stmt,
3243
                          new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3244
      STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_stmt))
3245
        = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (induction_phi));
3246
    }
3247
 
3248
  return induc_def;
3249
}
3250
 
3251
 
3252
/* Function get_initial_def_for_reduction
3253
 
3254
   Input:
3255
   STMT - a stmt that performs a reduction operation in the loop.
3256
   INIT_VAL - the initial value of the reduction variable
3257
 
3258
   Output:
3259
   ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
3260
        of the reduction (used for adjusting the epilog - see below).
3261
   Return a vector variable, initialized according to the operation that STMT
3262
        performs. This vector will be used as the initial value of the
3263
        vector of partial results.
3264
 
3265
   Option1 (adjust in epilog): Initialize the vector as follows:
3266
     add/bit or/xor:    [0,0,...,0,0]
3267
     mult/bit and:      [1,1,...,1,1]
3268
     min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
3269
   and when necessary (e.g. add/mult case) let the caller know
3270
   that it needs to adjust the result by init_val.
3271
 
3272
   Option2: Initialize the vector as follows:
3273
     add/bit or/xor:    [init_val,0,0,...,0]
3274
     mult/bit and:      [init_val,1,1,...,1]
3275
     min/max/cond_expr: [init_val,init_val,...,init_val]
3276
   and no adjustments are needed.
3277
 
3278
   For example, for the following code:
3279
 
3280
   s = init_val;
3281
   for (i=0;i<n;i++)
3282
     s = s + a[i];
3283
 
3284
   STMT is 's = s + a[i]', and the reduction variable is 's'.
3285
   For a vector of 4 units, we want to return either [0,0,0,init_val],
3286
   or [0,0,0,0] and let the caller know that it needs to adjust
3287
   the result at the end by 'init_val'.
3288
 
3289
   FORNOW, we are using the 'adjust in epilog' scheme, because this way the
3290
   initialization vector is simpler (same element in all entries), if
3291
   ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
3292
 
3293
   A cost model should help decide between these two schemes.  */
3294
 
3295
tree
3296
get_initial_def_for_reduction (gimple stmt, tree init_val,
3297
                               tree *adjustment_def)
3298
{
3299
  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3300
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3301
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3302
  tree scalar_type = TREE_TYPE (init_val);
3303
  tree vectype = get_vectype_for_scalar_type (scalar_type);
3304
  int nunits;
3305
  enum tree_code code = gimple_assign_rhs_code (stmt);
3306
  tree def_for_init;
3307
  tree init_def;
3308
  tree t = NULL_TREE;
3309
  int i;
3310
  bool nested_in_vect_loop = false;
3311
  tree init_value;
3312
  REAL_VALUE_TYPE real_init_val = dconst0;
3313
  int int_init_val = 0;
3314
  gimple def_stmt = NULL;
3315
 
3316
  gcc_assert (vectype);
3317
  nunits = TYPE_VECTOR_SUBPARTS (vectype);
3318
 
3319
  gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)
3320
              || SCALAR_FLOAT_TYPE_P (scalar_type));
3321
 
3322
  if (nested_in_vect_loop_p (loop, stmt))
3323
    nested_in_vect_loop = true;
3324
  else
3325
    gcc_assert (loop == (gimple_bb (stmt))->loop_father);
3326
 
3327
  /* In case of double reduction we only create a vector variable to be put
3328
     in the reduction phi node.  The actual statement creation is done in
3329
     vect_create_epilog_for_reduction.  */
3330
  if (adjustment_def && nested_in_vect_loop
3331
      && TREE_CODE (init_val) == SSA_NAME
3332
      && (def_stmt = SSA_NAME_DEF_STMT (init_val))
3333
      && gimple_code (def_stmt) == GIMPLE_PHI
3334
      && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
3335
      && vinfo_for_stmt (def_stmt)
3336
      && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
3337
          == vect_double_reduction_def)
3338
    {
3339
      *adjustment_def = NULL;
3340
      return vect_create_destination_var (init_val, vectype);
3341
    }
3342
 
3343
  if (TREE_CONSTANT (init_val))
3344
    {
3345
      if (SCALAR_FLOAT_TYPE_P (scalar_type))
3346
        init_value = build_real (scalar_type, TREE_REAL_CST (init_val));
3347
      else
3348
        init_value = build_int_cst (scalar_type, TREE_INT_CST_LOW (init_val));
3349
    }
3350
  else
3351
    init_value = init_val;
3352
 
3353
  switch (code)
3354
    {
3355
      case WIDEN_SUM_EXPR:
3356
      case DOT_PROD_EXPR:
3357
      case PLUS_EXPR:
3358
      case MINUS_EXPR:
3359
      case BIT_IOR_EXPR:
3360
      case BIT_XOR_EXPR:
3361
      case MULT_EXPR:
3362
      case BIT_AND_EXPR:
3363
        /* ADJUSMENT_DEF is NULL when called from
3364
           vect_create_epilog_for_reduction to vectorize double reduction.  */
3365
        if (adjustment_def)
3366
          {
3367
            if (nested_in_vect_loop)
3368
              *adjustment_def = vect_get_vec_def_for_operand (init_val, stmt,
3369
                                                              NULL);
3370
            else
3371
              *adjustment_def = init_val;
3372
          }
3373
 
3374
        if (code == MULT_EXPR)
3375
          {
3376
            real_init_val = dconst1;
3377
            int_init_val = 1;
3378
          }
3379
 
3380
        if (code == BIT_AND_EXPR)
3381
          int_init_val = -1;
3382
 
3383
        if (SCALAR_FLOAT_TYPE_P (scalar_type))
3384
          def_for_init = build_real (scalar_type, real_init_val);
3385
        else
3386
          def_for_init = build_int_cst (scalar_type, int_init_val);
3387
 
3388
        /* Create a vector of '0' or '1' except the first element.  */
3389
        for (i = nunits - 2; i >= 0; --i)
3390
          t = tree_cons (NULL_TREE, def_for_init, t);
3391
 
3392
        /* Option1: the first element is '0' or '1' as well.  */
3393
        if (adjustment_def)
3394
          {
3395
            t = tree_cons (NULL_TREE, def_for_init, t);
3396
            init_def = build_vector (vectype, t);
3397
            break;
3398
          }
3399
 
3400
        /* Option2: the first element is INIT_VAL.  */
3401
        t = tree_cons (NULL_TREE, init_value, t);
3402
        if (TREE_CONSTANT (init_val))
3403
          init_def = build_vector (vectype, t);
3404
        else
3405
          init_def = build_constructor_from_list (vectype, t);
3406
 
3407
        break;
3408
 
3409
      case MIN_EXPR:
3410
      case MAX_EXPR:
3411
      case COND_EXPR:
3412
        if (adjustment_def)
3413
          {
3414
            *adjustment_def = NULL_TREE;
3415
            init_def = vect_get_vec_def_for_operand (init_val, stmt, NULL);
3416
            break;
3417
          }
3418
 
3419
        init_def = build_vector_from_val (vectype, init_value);
3420
        break;
3421
 
3422
      default:
3423
        gcc_unreachable ();
3424
    }
3425
 
3426
  return init_def;
3427
}
3428
 
3429
 
3430
/* Function vect_create_epilog_for_reduction
3431
 
3432
   Create code at the loop-epilog to finalize the result of a reduction
3433
   computation.
3434
 
3435
   VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector
3436
     reduction statements.
3437
   STMT is the scalar reduction stmt that is being vectorized.
3438
   NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
3439
     number of elements that we can fit in a vectype (nunits).  In this case
3440
     we have to generate more than one vector stmt - i.e - we need to "unroll"
3441
     the vector stmt by a factor VF/nunits.  For more details see documentation
3442
     in vectorizable_operation.
3443
   REDUC_CODE is the tree-code for the epilog reduction.
3444
   REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
3445
     computation.
3446
   REDUC_INDEX is the index of the operand in the right hand side of the
3447
     statement that is defined by REDUCTION_PHI.
3448
   DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
3449
   SLP_NODE is an SLP node containing a group of reduction statements. The
3450
     first one in this group is STMT.
3451
 
3452
   This function:
3453
   1. Creates the reduction def-use cycles: sets the arguments for
3454
      REDUCTION_PHIS:
3455
      The loop-entry argument is the vectorized initial-value of the reduction.
3456
      The loop-latch argument is taken from VECT_DEFS - the vector of partial
3457
      sums.
3458
   2. "Reduces" each vector of partial results VECT_DEFS into a single result,
3459
      by applying the operation specified by REDUC_CODE if available, or by
3460
      other means (whole-vector shifts or a scalar loop).
3461
      The function also creates a new phi node at the loop exit to preserve
3462
      loop-closed form, as illustrated below.
3463
 
3464
     The flow at the entry to this function:
3465
 
3466
        loop:
3467
          vec_def = phi <null, null>            # REDUCTION_PHI
3468
          VECT_DEF = vector_stmt                # vectorized form of STMT
3469
          s_loop = scalar_stmt                  # (scalar) STMT
3470
        loop_exit:
3471
          s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
3472
          use <s_out0>
3473
          use <s_out0>
3474
 
3475
     The above is transformed by this function into:
3476
 
3477
        loop:
3478
          vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
3479
          VECT_DEF = vector_stmt                # vectorized form of STMT
3480
          s_loop = scalar_stmt                  # (scalar) STMT
3481
        loop_exit:
3482
          s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
3483
          v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
3484
          v_out2 = reduce <v_out1>
3485
          s_out3 = extract_field <v_out2, 0>
3486
          s_out4 = adjust_result <s_out3>
3487
          use <s_out4>
3488
          use <s_out4>
3489
*/
3490
 
3491
static void
3492
vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
3493
                                  int ncopies, enum tree_code reduc_code,
3494
                                  VEC (gimple, heap) *reduction_phis,
3495
                                  int reduc_index, bool double_reduc,
3496
                                  slp_tree slp_node)
3497
{
3498
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3499
  stmt_vec_info prev_phi_info;
3500
  tree vectype;
3501
  enum machine_mode mode;
3502
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3503
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
3504
  basic_block exit_bb;
3505
  tree scalar_dest;
3506
  tree scalar_type;
3507
  gimple new_phi = NULL, phi;
3508
  gimple_stmt_iterator exit_gsi;
3509
  tree vec_dest;
3510
  tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest;
3511
  gimple epilog_stmt = NULL;
3512
  enum tree_code code = gimple_assign_rhs_code (stmt);
3513
  gimple exit_phi;
3514
  tree bitsize, bitpos;
3515
  tree adjustment_def = NULL;
3516
  tree vec_initial_def = NULL;
3517
  tree reduction_op, expr, def;
3518
  tree orig_name, scalar_result;
3519
  imm_use_iterator imm_iter, phi_imm_iter;
3520
  use_operand_p use_p, phi_use_p;
3521
  bool extract_scalar_result = false;
3522
  gimple use_stmt, orig_stmt, reduction_phi = NULL;
3523
  bool nested_in_vect_loop = false;
3524
  VEC (gimple, heap) *new_phis = NULL;
3525
  VEC (gimple, heap) *inner_phis = NULL;
3526
  enum vect_def_type dt = vect_unknown_def_type;
3527
  int j, i;
3528
  VEC (tree, heap) *scalar_results = NULL;
3529
  unsigned int group_size = 1, k, ratio;
3530
  VEC (tree, heap) *vec_initial_defs = NULL;
3531
  VEC (gimple, heap) *phis;
3532
  bool slp_reduc = false;
3533
  tree new_phi_result;
3534
  gimple inner_phi = NULL;
3535
 
3536
  if (slp_node)
3537
    group_size = VEC_length (gimple, SLP_TREE_SCALAR_STMTS (slp_node));
3538
 
3539
  if (nested_in_vect_loop_p (loop, stmt))
3540
    {
3541
      outer_loop = loop;
3542
      loop = loop->inner;
3543
      nested_in_vect_loop = true;
3544
      gcc_assert (!slp_node);
3545
    }
3546
 
3547
  switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
3548
    {
3549
    case GIMPLE_SINGLE_RHS:
3550
      gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt))
3551
                  == ternary_op);
3552
      reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index);
3553
      break;
3554
    case GIMPLE_UNARY_RHS:
3555
      reduction_op = gimple_assign_rhs1 (stmt);
3556
      break;
3557
    case GIMPLE_BINARY_RHS:
3558
      reduction_op = reduc_index ?
3559
                     gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt);
3560
      break;
3561
    case GIMPLE_TERNARY_RHS:
3562
      reduction_op = gimple_op (stmt, reduc_index + 1);
3563
      break;
3564
    default:
3565
      gcc_unreachable ();
3566
    }
3567
 
3568
  vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
3569
  gcc_assert (vectype);
3570
  mode = TYPE_MODE (vectype);
3571
 
3572
  /* 1. Create the reduction def-use cycle:
3573
     Set the arguments of REDUCTION_PHIS, i.e., transform
3574
 
3575
        loop:
3576
          vec_def = phi <null, null>            # REDUCTION_PHI
3577
          VECT_DEF = vector_stmt                # vectorized form of STMT
3578
          ...
3579
 
3580
     into:
3581
 
3582
        loop:
3583
          vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
3584
          VECT_DEF = vector_stmt                # vectorized form of STMT
3585
          ...
3586
 
3587
     (in case of SLP, do it for all the phis). */
3588
 
3589
  /* Get the loop-entry arguments.  */
3590
  if (slp_node)
3591
    vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs,
3592
                       NULL, slp_node, reduc_index);
3593
  else
3594
    {
3595
      vec_initial_defs = VEC_alloc (tree, heap, 1);
3596
     /* For the case of reduction, vect_get_vec_def_for_operand returns
3597
        the scalar def before the loop, that defines the initial value
3598
        of the reduction variable.  */
3599
      vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
3600
                                                      &adjustment_def);
3601
      VEC_quick_push (tree, vec_initial_defs, vec_initial_def);
3602
    }
3603
 
3604
  /* Set phi nodes arguments.  */
3605
  FOR_EACH_VEC_ELT (gimple, reduction_phis, i, phi)
3606
    {
3607
      tree vec_init_def = VEC_index (tree, vec_initial_defs, i);
3608
      tree def = VEC_index (tree, vect_defs, i);
3609
      for (j = 0; j < ncopies; j++)
3610
        {
3611
          /* Set the loop-entry arg of the reduction-phi.  */
3612
          add_phi_arg (phi, vec_init_def, loop_preheader_edge (loop),
3613
                       UNKNOWN_LOCATION);
3614
 
3615
          /* Set the loop-latch arg for the reduction-phi.  */
3616
          if (j > 0)
3617
            def = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type, def);
3618
 
3619
          add_phi_arg (phi, def, loop_latch_edge (loop), UNKNOWN_LOCATION);
3620
 
3621
          if (vect_print_dump_info (REPORT_DETAILS))
3622
            {
3623
              fprintf (vect_dump, "transform reduction: created def-use"
3624
                                  " cycle: ");
3625
              print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
3626
              fprintf (vect_dump, "\n");
3627
              print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (def), 0,
3628
                                 TDF_SLIM);
3629
            }
3630
 
3631
          phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
3632
        }
3633
    }
3634
 
3635
  VEC_free (tree, heap, vec_initial_defs);
3636
 
3637
  /* 2. Create epilog code.
3638
        The reduction epilog code operates across the elements of the vector
3639
        of partial results computed by the vectorized loop.
3640
        The reduction epilog code consists of:
3641
 
3642
        step 1: compute the scalar result in a vector (v_out2)
3643
        step 2: extract the scalar result (s_out3) from the vector (v_out2)
3644
        step 3: adjust the scalar result (s_out3) if needed.
3645
 
3646
        Step 1 can be accomplished using one the following three schemes:
3647
          (scheme 1) using reduc_code, if available.
3648
          (scheme 2) using whole-vector shifts, if available.
3649
          (scheme 3) using a scalar loop. In this case steps 1+2 above are
3650
                     combined.
3651
 
3652
          The overall epilog code looks like this:
3653
 
3654
          s_out0 = phi <s_loop>         # original EXIT_PHI
3655
          v_out1 = phi <VECT_DEF>       # NEW_EXIT_PHI
3656
          v_out2 = reduce <v_out1>              # step 1
3657
          s_out3 = extract_field <v_out2, 0>    # step 2
3658
          s_out4 = adjust_result <s_out3>       # step 3
3659
 
3660
          (step 3 is optional, and steps 1 and 2 may be combined).
3661
          Lastly, the uses of s_out0 are replaced by s_out4.  */
3662
 
3663
 
3664
  /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
3665
         v_out1 = phi <VECT_DEF>
3666
         Store them in NEW_PHIS.  */
3667
 
3668
  exit_bb = single_exit (loop)->dest;
3669
  prev_phi_info = NULL;
3670
  new_phis = VEC_alloc (gimple, heap, VEC_length (tree, vect_defs));
3671
  FOR_EACH_VEC_ELT (tree, vect_defs, i, def)
3672
    {
3673
      for (j = 0; j < ncopies; j++)
3674
        {
3675
          phi = create_phi_node (SSA_NAME_VAR (def), exit_bb);
3676
          set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo, NULL));
3677
          if (j == 0)
3678
            VEC_quick_push (gimple, new_phis, phi);
3679
          else
3680
            {
3681
              def = vect_get_vec_def_for_stmt_copy (dt, def);
3682
              STMT_VINFO_RELATED_STMT (prev_phi_info) = phi;
3683
            }
3684
 
3685
          SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
3686
          prev_phi_info = vinfo_for_stmt (phi);
3687
        }
3688
    }
3689
 
3690
  /* The epilogue is created for the outer-loop, i.e., for the loop being
3691
     vectorized.  Create exit phis for the outer loop.  */
3692
  if (double_reduc)
3693
    {
3694
      loop = outer_loop;
3695
      exit_bb = single_exit (loop)->dest;
3696
      inner_phis = VEC_alloc (gimple, heap, VEC_length (tree, vect_defs));
3697
      FOR_EACH_VEC_ELT (gimple, new_phis, i, phi)
3698
        {
3699
          gimple outer_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)),
3700
                                              exit_bb);
3701
          SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
3702
                           PHI_RESULT (phi));
3703
          set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
3704
                                                            loop_vinfo, NULL));
3705
          VEC_quick_push (gimple, inner_phis, phi);
3706
          VEC_replace (gimple, new_phis, i, outer_phi);
3707
          prev_phi_info = vinfo_for_stmt (outer_phi);
3708
          while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)))
3709
            {
3710
              phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
3711
              outer_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)),
3712
                                           exit_bb);
3713
              SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
3714
                               PHI_RESULT (phi));
3715
              set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
3716
                                                        loop_vinfo, NULL));
3717
              STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi;
3718
              prev_phi_info = vinfo_for_stmt (outer_phi);
3719
            }
3720
        }
3721
    }
3722
 
3723
  exit_gsi = gsi_after_labels (exit_bb);
3724
 
3725
  /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
3726
         (i.e. when reduc_code is not available) and in the final adjustment
3727
         code (if needed).  Also get the original scalar reduction variable as
3728
         defined in the loop.  In case STMT is a "pattern-stmt" (i.e. - it
3729
         represents a reduction pattern), the tree-code and scalar-def are
3730
         taken from the original stmt that the pattern-stmt (STMT) replaces.
3731
         Otherwise (it is a regular reduction) - the tree-code and scalar-def
3732
         are taken from STMT.  */
3733
 
3734
  orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3735
  if (!orig_stmt)
3736
    {
3737
      /* Regular reduction  */
3738
      orig_stmt = stmt;
3739
    }
3740
  else
3741
    {
3742
      /* Reduction pattern  */
3743
      stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
3744
      gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
3745
      gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
3746
    }
3747
 
3748
  code = gimple_assign_rhs_code (orig_stmt);
3749
  /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
3750
     partial results are added and not subtracted.  */
3751
  if (code == MINUS_EXPR)
3752
    code = PLUS_EXPR;
3753
 
3754
  scalar_dest = gimple_assign_lhs (orig_stmt);
3755
  scalar_type = TREE_TYPE (scalar_dest);
3756
  scalar_results = VEC_alloc (tree, heap, group_size);
3757
  new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
3758
  bitsize = TYPE_SIZE (scalar_type);
3759
 
3760
  /* In case this is a reduction in an inner-loop while vectorizing an outer
3761
     loop - we don't need to extract a single scalar result at the end of the
3762
     inner-loop (unless it is double reduction, i.e., the use of reduction is
3763
     outside the outer-loop).  The final vector of partial results will be used
3764
     in the vectorized outer-loop, or reduced to a scalar result at the end of
3765
     the outer-loop.  */
3766
  if (nested_in_vect_loop && !double_reduc)
3767
    goto vect_finalize_reduction;
3768
 
3769
  /* SLP reduction without reduction chain, e.g.,
3770
     # a1 = phi <a2, a0>
3771
     # b1 = phi <b2, b0>
3772
     a2 = operation (a1)
3773
     b2 = operation (b1)  */
3774
  slp_reduc = (slp_node && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
3775
 
3776
  /* In case of reduction chain, e.g.,
3777
     # a1 = phi <a3, a0>
3778
     a2 = operation (a1)
3779
     a3 = operation (a2),
3780
 
3781
     we may end up with more than one vector result.  Here we reduce them to
3782
     one vector.  */
3783
  if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
3784
    {
3785
      tree first_vect = PHI_RESULT (VEC_index (gimple, new_phis, 0));
3786
      tree tmp;
3787
      gimple new_vec_stmt = NULL;
3788
 
3789
      vec_dest = vect_create_destination_var (scalar_dest, vectype);
3790
      for (k = 1; k < VEC_length (gimple, new_phis); k++)
3791
        {
3792
          gimple next_phi = VEC_index (gimple, new_phis, k);
3793
          tree second_vect = PHI_RESULT (next_phi);
3794
 
3795
          tmp = build2 (code, vectype,  first_vect, second_vect);
3796
          new_vec_stmt = gimple_build_assign (vec_dest, tmp);
3797
          first_vect = make_ssa_name (vec_dest, new_vec_stmt);
3798
          gimple_assign_set_lhs (new_vec_stmt, first_vect);
3799
          gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
3800
        }
3801
 
3802
      new_phi_result = first_vect;
3803
      if (new_vec_stmt)
3804
        {
3805
          VEC_truncate (gimple, new_phis, 0);
3806
          VEC_safe_push (gimple, heap, new_phis, new_vec_stmt);
3807
        }
3808
    }
3809
  else
3810
    new_phi_result = PHI_RESULT (VEC_index (gimple, new_phis, 0));
3811
 
3812
  /* 2.3 Create the reduction code, using one of the three schemes described
3813
         above. In SLP we simply need to extract all the elements from the
3814
         vector (without reducing them), so we use scalar shifts.  */
3815
  if (reduc_code != ERROR_MARK && !slp_reduc)
3816
    {
3817
      tree tmp;
3818
 
3819
      /*** Case 1:  Create:
3820
           v_out2 = reduc_expr <v_out1>  */
3821
 
3822
      if (vect_print_dump_info (REPORT_DETAILS))
3823
        fprintf (vect_dump, "Reduce using direct vector reduction.");
3824
 
3825
      vec_dest = vect_create_destination_var (scalar_dest, vectype);
3826
      tmp = build1 (reduc_code, vectype, new_phi_result);
3827
      epilog_stmt = gimple_build_assign (vec_dest, tmp);
3828
      new_temp = make_ssa_name (vec_dest, epilog_stmt);
3829
      gimple_assign_set_lhs (epilog_stmt, new_temp);
3830
      gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3831
 
3832
      extract_scalar_result = true;
3833
    }
3834
  else
3835
    {
3836
      enum tree_code shift_code = ERROR_MARK;
3837
      bool have_whole_vector_shift = true;
3838
      int bit_offset;
3839
      int element_bitsize = tree_low_cst (bitsize, 1);
3840
      int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
3841
      tree vec_temp;
3842
 
3843
      if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
3844
        shift_code = VEC_RSHIFT_EXPR;
3845
      else
3846
        have_whole_vector_shift = false;
3847
 
3848
      /* Regardless of whether we have a whole vector shift, if we're
3849
         emulating the operation via tree-vect-generic, we don't want
3850
         to use it.  Only the first round of the reduction is likely
3851
         to still be profitable via emulation.  */
3852
      /* ??? It might be better to emit a reduction tree code here, so that
3853
         tree-vect-generic can expand the first round via bit tricks.  */
3854
      if (!VECTOR_MODE_P (mode))
3855
        have_whole_vector_shift = false;
3856
      else
3857
        {
3858
          optab optab = optab_for_tree_code (code, vectype, optab_default);
3859
          if (optab_handler (optab, mode) == CODE_FOR_nothing)
3860
            have_whole_vector_shift = false;
3861
        }
3862
 
3863
      if (have_whole_vector_shift && !slp_reduc)
3864
        {
3865
          /*** Case 2: Create:
3866
             for (offset = VS/2; offset >= element_size; offset/=2)
3867
                {
3868
                  Create:  va' = vec_shift <va, offset>
3869
                  Create:  va = vop <va, va'>
3870
                }  */
3871
 
3872
          if (vect_print_dump_info (REPORT_DETAILS))
3873
            fprintf (vect_dump, "Reduce using vector shifts");
3874
 
3875
          vec_dest = vect_create_destination_var (scalar_dest, vectype);
3876
          new_temp = new_phi_result;
3877
          for (bit_offset = vec_size_in_bits/2;
3878
               bit_offset >= element_bitsize;
3879
               bit_offset /= 2)
3880
            {
3881
              tree bitpos = size_int (bit_offset);
3882
 
3883
              epilog_stmt = gimple_build_assign_with_ops (shift_code,
3884
                                               vec_dest, new_temp, bitpos);
3885
              new_name = make_ssa_name (vec_dest, epilog_stmt);
3886
              gimple_assign_set_lhs (epilog_stmt, new_name);
3887
              gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3888
 
3889
              epilog_stmt = gimple_build_assign_with_ops (code, vec_dest,
3890
                                                          new_name, new_temp);
3891
              new_temp = make_ssa_name (vec_dest, epilog_stmt);
3892
              gimple_assign_set_lhs (epilog_stmt, new_temp);
3893
              gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3894
            }
3895
 
3896
          extract_scalar_result = true;
3897
        }
3898
      else
3899
        {
3900
          tree rhs;
3901
 
3902
          /*** Case 3: Create:
3903
             s = extract_field <v_out2, 0>
3904
             for (offset = element_size;
3905
                  offset < vector_size;
3906
                  offset += element_size;)
3907
               {
3908
                 Create:  s' = extract_field <v_out2, offset>
3909
                 Create:  s = op <s, s'>  // For non SLP cases
3910
               }  */
3911
 
3912
          if (vect_print_dump_info (REPORT_DETAILS))
3913
            fprintf (vect_dump, "Reduce using scalar code. ");
3914
 
3915
          vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
3916
          FOR_EACH_VEC_ELT (gimple, new_phis, i, new_phi)
3917
            {
3918
              if (gimple_code (new_phi) == GIMPLE_PHI)
3919
                vec_temp = PHI_RESULT (new_phi);
3920
              else
3921
                vec_temp = gimple_assign_lhs (new_phi);
3922
              rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
3923
                            bitsize_zero_node);
3924
              epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
3925
              new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
3926
              gimple_assign_set_lhs (epilog_stmt, new_temp);
3927
              gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3928
 
3929
              /* In SLP we don't need to apply reduction operation, so we just
3930
                 collect s' values in SCALAR_RESULTS.  */
3931
              if (slp_reduc)
3932
                VEC_safe_push (tree, heap, scalar_results, new_temp);
3933
 
3934
              for (bit_offset = element_bitsize;
3935
                   bit_offset < vec_size_in_bits;
3936
                   bit_offset += element_bitsize)
3937
                {
3938
                  tree bitpos = bitsize_int (bit_offset);
3939
                  tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp,
3940
                                     bitsize, bitpos);
3941
 
3942
                  epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
3943
                  new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
3944
                  gimple_assign_set_lhs (epilog_stmt, new_name);
3945
                  gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3946
 
3947
                  if (slp_reduc)
3948
                    {
3949
                      /* In SLP we don't need to apply reduction operation, so
3950
                         we just collect s' values in SCALAR_RESULTS.  */
3951
                      new_temp = new_name;
3952
                      VEC_safe_push (tree, heap, scalar_results, new_name);
3953
                    }
3954
                  else
3955
                    {
3956
                      epilog_stmt = gimple_build_assign_with_ops (code,
3957
                                          new_scalar_dest, new_name, new_temp);
3958
                      new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
3959
                      gimple_assign_set_lhs (epilog_stmt, new_temp);
3960
                      gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3961
                    }
3962
                }
3963
            }
3964
 
3965
          /* The only case where we need to reduce scalar results in SLP, is
3966
             unrolling.  If the size of SCALAR_RESULTS is greater than
3967
             GROUP_SIZE, we reduce them combining elements modulo
3968
             GROUP_SIZE.  */
3969
          if (slp_reduc)
3970
            {
3971
              tree res, first_res, new_res;
3972
              gimple new_stmt;
3973
 
3974
              /* Reduce multiple scalar results in case of SLP unrolling.  */
3975
              for (j = group_size; VEC_iterate (tree, scalar_results, j, res);
3976
                   j++)
3977
                {
3978
                  first_res = VEC_index (tree, scalar_results, j % group_size);
3979
                  new_stmt = gimple_build_assign_with_ops (code,
3980
                                              new_scalar_dest, first_res, res);
3981
                  new_res = make_ssa_name (new_scalar_dest, new_stmt);
3982
                  gimple_assign_set_lhs (new_stmt, new_res);
3983
                  gsi_insert_before (&exit_gsi, new_stmt, GSI_SAME_STMT);
3984
                  VEC_replace (tree, scalar_results, j % group_size, new_res);
3985
                }
3986
            }
3987
          else
3988
            /* Not SLP - we have one scalar to keep in SCALAR_RESULTS.  */
3989
            VEC_safe_push (tree, heap, scalar_results, new_temp);
3990
 
3991
          extract_scalar_result = false;
3992
        }
3993
    }
3994
 
3995
  /* 2.4  Extract the final scalar result.  Create:
3996
          s_out3 = extract_field <v_out2, bitpos>  */
3997
 
3998
  if (extract_scalar_result)
3999
    {
4000
      tree rhs;
4001
 
4002
      if (vect_print_dump_info (REPORT_DETAILS))
4003
        fprintf (vect_dump, "extract scalar result");
4004
 
4005
      if (BYTES_BIG_ENDIAN)
4006
        bitpos = size_binop (MULT_EXPR,
4007
                             bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
4008
                             TYPE_SIZE (scalar_type));
4009
      else
4010
        bitpos = bitsize_zero_node;
4011
 
4012
      rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
4013
      epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4014
      new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4015
      gimple_assign_set_lhs (epilog_stmt, new_temp);
4016
      gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4017
      VEC_safe_push (tree, heap, scalar_results, new_temp);
4018
    }
4019
 
4020
vect_finalize_reduction:
4021
 
4022
  if (double_reduc)
4023
    loop = loop->inner;
4024
 
4025
  /* 2.5 Adjust the final result by the initial value of the reduction
4026
         variable. (When such adjustment is not needed, then
4027
         'adjustment_def' is zero).  For example, if code is PLUS we create:
4028
         new_temp = loop_exit_def + adjustment_def  */
4029
 
4030
  if (adjustment_def)
4031
    {
4032
      gcc_assert (!slp_reduc);
4033
      if (nested_in_vect_loop)
4034
        {
4035
          new_phi = VEC_index (gimple, new_phis, 0);
4036
          gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
4037
          expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
4038
          new_dest = vect_create_destination_var (scalar_dest, vectype);
4039
        }
4040
      else
4041
        {
4042
          new_temp = VEC_index (tree, scalar_results, 0);
4043
          gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
4044
          expr = build2 (code, scalar_type, new_temp, adjustment_def);
4045
          new_dest = vect_create_destination_var (scalar_dest, scalar_type);
4046
        }
4047
 
4048
      epilog_stmt = gimple_build_assign (new_dest, expr);
4049
      new_temp = make_ssa_name (new_dest, epilog_stmt);
4050
      gimple_assign_set_lhs (epilog_stmt, new_temp);
4051
      SSA_NAME_DEF_STMT (new_temp) = epilog_stmt;
4052
      gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4053
      if (nested_in_vect_loop)
4054
        {
4055
          set_vinfo_for_stmt (epilog_stmt,
4056
                              new_stmt_vec_info (epilog_stmt, loop_vinfo,
4057
                                                 NULL));
4058
          STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
4059
                STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
4060
 
4061
          if (!double_reduc)
4062
            VEC_quick_push (tree, scalar_results, new_temp);
4063
          else
4064
            VEC_replace (tree, scalar_results, 0, new_temp);
4065
        }
4066
      else
4067
        VEC_replace (tree, scalar_results, 0, new_temp);
4068
 
4069
      VEC_replace (gimple, new_phis, 0, epilog_stmt);
4070
    }
4071
 
4072
  /* 2.6  Handle the loop-exit phis.  Replace the uses of scalar loop-exit
4073
          phis with new adjusted scalar results, i.e., replace use <s_out0>
4074
          with use <s_out4>.
4075
 
4076
     Transform:
4077
        loop_exit:
4078
          s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4079
          v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4080
          v_out2 = reduce <v_out1>
4081
          s_out3 = extract_field <v_out2, 0>
4082
          s_out4 = adjust_result <s_out3>
4083
          use <s_out0>
4084
          use <s_out0>
4085
 
4086
     into:
4087
 
4088
        loop_exit:
4089
          s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4090
          v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4091
          v_out2 = reduce <v_out1>
4092
          s_out3 = extract_field <v_out2, 0>
4093
          s_out4 = adjust_result <s_out3>
4094
          use <s_out4>
4095
          use <s_out4> */
4096
 
4097
 
4098
  /* In SLP reduction chain we reduce vector results into one vector if
4099
     necessary, hence we set here GROUP_SIZE to 1.  SCALAR_DEST is the LHS of
4100
     the last stmt in the reduction chain, since we are looking for the loop
4101
     exit phi node.  */
4102
  if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4103
    {
4104
      scalar_dest = gimple_assign_lhs (VEC_index (gimple,
4105
                                       SLP_TREE_SCALAR_STMTS (slp_node),
4106
                                       group_size - 1));
4107
      group_size = 1;
4108
    }
4109
 
4110
  /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
4111
     case that GROUP_SIZE is greater than vectorization factor).  Therefore, we
4112
     need to match SCALAR_RESULTS with corresponding statements.  The first
4113
     (GROUP_SIZE / number of new vector stmts) scalar results correspond to
4114
     the first vector stmt, etc.
4115
     (RATIO is equal to (GROUP_SIZE / number of new vector stmts)).  */
4116
  if (group_size > VEC_length (gimple, new_phis))
4117
    {
4118
      ratio = group_size / VEC_length (gimple, new_phis);
4119
      gcc_assert (!(group_size % VEC_length (gimple, new_phis)));
4120
    }
4121
  else
4122
    ratio = 1;
4123
 
4124
  for (k = 0; k < group_size; k++)
4125
    {
4126
      if (k % ratio == 0)
4127
        {
4128
          epilog_stmt = VEC_index (gimple, new_phis, k / ratio);
4129
          reduction_phi = VEC_index (gimple, reduction_phis, k / ratio);
4130
          if (double_reduc)
4131
            inner_phi = VEC_index (gimple, inner_phis, k / ratio);
4132
        }
4133
 
4134
      if (slp_reduc)
4135
        {
4136
          gimple current_stmt = VEC_index (gimple,
4137
                                       SLP_TREE_SCALAR_STMTS (slp_node), k);
4138
 
4139
          orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (current_stmt));
4140
          /* SLP statements can't participate in patterns.  */
4141
          gcc_assert (!orig_stmt);
4142
          scalar_dest = gimple_assign_lhs (current_stmt);
4143
        }
4144
 
4145
      phis = VEC_alloc (gimple, heap, 3);
4146
      /* Find the loop-closed-use at the loop exit of the original scalar
4147
         result.  (The reduction result is expected to have two immediate uses -
4148
         one at the latch block, and one at the loop exit).  */
4149
      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4150
        if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
4151
          VEC_safe_push (gimple, heap, phis, USE_STMT (use_p));
4152
 
4153
      /* We expect to have found an exit_phi because of loop-closed-ssa
4154
         form.  */
4155
      gcc_assert (!VEC_empty (gimple, phis));
4156
 
4157
      FOR_EACH_VEC_ELT (gimple, phis, i, exit_phi)
4158
        {
4159
          if (outer_loop)
4160
            {
4161
              stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi);
4162
              gimple vect_phi;
4163
 
4164
              /* FORNOW. Currently not supporting the case that an inner-loop
4165
                 reduction is not used in the outer-loop (but only outside the
4166
                 outer-loop), unless it is double reduction.  */
4167
              gcc_assert ((STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
4168
                           && !STMT_VINFO_LIVE_P (exit_phi_vinfo))
4169
                          || double_reduc);
4170
 
4171
              STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt;
4172
              if (!double_reduc
4173
                  || STMT_VINFO_DEF_TYPE (exit_phi_vinfo)
4174
                      != vect_double_reduction_def)
4175
                continue;
4176
 
4177
              /* Handle double reduction:
4178
 
4179
                 stmt1: s1 = phi <s0, s2>  - double reduction phi (outer loop)
4180
                 stmt2:   s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
4181
                 stmt3:   s4 = use (s3)     - (regular) reduc stmt (inner loop)
4182
                 stmt4: s2 = phi <s4>      - double reduction stmt (outer loop)
4183
 
4184
                 At that point the regular reduction (stmt2 and stmt3) is
4185
                 already vectorized, as well as the exit phi node, stmt4.
4186
                 Here we vectorize the phi node of double reduction, stmt1, and
4187
                 update all relevant statements.  */
4188
 
4189
              /* Go through all the uses of s2 to find double reduction phi
4190
                 node, i.e., stmt1 above.  */
4191
              orig_name = PHI_RESULT (exit_phi);
4192
              FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4193
                {
4194
                  stmt_vec_info use_stmt_vinfo = vinfo_for_stmt (use_stmt);
4195
                  stmt_vec_info new_phi_vinfo;
4196
                  tree vect_phi_init, preheader_arg, vect_phi_res, init_def;
4197
                  basic_block bb = gimple_bb (use_stmt);
4198
                  gimple use;
4199
 
4200
                  /* Check that USE_STMT is really double reduction phi
4201
                     node.  */
4202
                  if (gimple_code (use_stmt) != GIMPLE_PHI
4203
                      || gimple_phi_num_args (use_stmt) != 2
4204
                      || !use_stmt_vinfo
4205
                      || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
4206
                          != vect_double_reduction_def
4207
                      || bb->loop_father != outer_loop)
4208
                    continue;
4209
 
4210
                  /* Create vector phi node for double reduction:
4211
                     vs1 = phi <vs0, vs2>
4212
                     vs1 was created previously in this function by a call to
4213
                       vect_get_vec_def_for_operand and is stored in
4214
                       vec_initial_def;
4215
                     vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
4216
                     vs0 is created here.  */
4217
 
4218
                  /* Create vector phi node.  */
4219
                  vect_phi = create_phi_node (vec_initial_def, bb);
4220
                  new_phi_vinfo = new_stmt_vec_info (vect_phi,
4221
                                    loop_vec_info_for_loop (outer_loop), NULL);
4222
                  set_vinfo_for_stmt (vect_phi, new_phi_vinfo);
4223
 
4224
                  /* Create vs0 - initial def of the double reduction phi.  */
4225
                  preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
4226
                                             loop_preheader_edge (outer_loop));
4227
                  init_def = get_initial_def_for_reduction (stmt,
4228
                                                          preheader_arg, NULL);
4229
                  vect_phi_init = vect_init_vector (use_stmt, init_def,
4230
                                                    vectype, NULL);
4231
 
4232
                  /* Update phi node arguments with vs0 and vs2.  */
4233
                  add_phi_arg (vect_phi, vect_phi_init,
4234
                               loop_preheader_edge (outer_loop),
4235
                               UNKNOWN_LOCATION);
4236
                  add_phi_arg (vect_phi, PHI_RESULT (inner_phi),
4237
                               loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
4238
                  if (vect_print_dump_info (REPORT_DETAILS))
4239
                    {
4240
                      fprintf (vect_dump, "created double reduction phi "
4241
                                          "node: ");
4242
                      print_gimple_stmt (vect_dump, vect_phi, 0, TDF_SLIM);
4243
                    }
4244
 
4245
                  vect_phi_res = PHI_RESULT (vect_phi);
4246
 
4247
                  /* Replace the use, i.e., set the correct vs1 in the regular
4248
                     reduction phi node.  FORNOW, NCOPIES is always 1, so the
4249
                     loop is redundant.  */
4250
                  use = reduction_phi;
4251
                  for (j = 0; j < ncopies; j++)
4252
                    {
4253
                      edge pr_edge = loop_preheader_edge (loop);
4254
                      SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res);
4255
                      use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use));
4256
                    }
4257
                }
4258
            }
4259
        }
4260
 
4261
      VEC_free (gimple, heap, phis);
4262
      if (nested_in_vect_loop)
4263
        {
4264
          if (double_reduc)
4265
            loop = outer_loop;
4266
          else
4267
            continue;
4268
        }
4269
 
4270
      phis = VEC_alloc (gimple, heap, 3);
4271
      /* Find the loop-closed-use at the loop exit of the original scalar
4272
         result.  (The reduction result is expected to have two immediate uses,
4273
         one at the latch block, and one at the loop exit).  For double
4274
         reductions we are looking for exit phis of the outer loop.  */
4275
      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4276
        {
4277
          if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
4278
            VEC_safe_push (gimple, heap, phis, USE_STMT (use_p));
4279
          else
4280
            {
4281
              if (double_reduc && gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
4282
                {
4283
                  tree phi_res = PHI_RESULT (USE_STMT (use_p));
4284
 
4285
                  FOR_EACH_IMM_USE_FAST (phi_use_p, phi_imm_iter, phi_res)
4286
                    {
4287
                      if (!flow_bb_inside_loop_p (loop,
4288
                                             gimple_bb (USE_STMT (phi_use_p))))
4289
                        VEC_safe_push (gimple, heap, phis,
4290
                                       USE_STMT (phi_use_p));
4291
                    }
4292
                }
4293
            }
4294
        }
4295
 
4296
      FOR_EACH_VEC_ELT (gimple, phis, i, exit_phi)
4297
        {
4298
          /* Replace the uses:  */
4299
          orig_name = PHI_RESULT (exit_phi);
4300
          scalar_result = VEC_index (tree, scalar_results, k);
4301
          FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4302
            FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
4303
              SET_USE (use_p, scalar_result);
4304
        }
4305
 
4306
      VEC_free (gimple, heap, phis);
4307
    }
4308
 
4309
  VEC_free (tree, heap, scalar_results);
4310
  VEC_free (gimple, heap, new_phis);
4311
}
4312
 
4313
 
4314
/* Function vectorizable_reduction.
4315
 
4316
   Check if STMT performs a reduction operation that can be vectorized.
4317
   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4318
   stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4319
   Return FALSE if not a vectorizable STMT, TRUE otherwise.
4320
 
4321
   This function also handles reduction idioms (patterns) that have been
4322
   recognized in advance during vect_pattern_recog.  In this case, STMT may be
4323
   of this form:
4324
     X = pattern_expr (arg0, arg1, ..., X)
4325
   and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
4326
   sequence that had been detected and replaced by the pattern-stmt (STMT).
4327
 
4328
   In some cases of reduction patterns, the type of the reduction variable X is
4329
   different than the type of the other arguments of STMT.
4330
   In such cases, the vectype that is used when transforming STMT into a vector
4331
   stmt is different than the vectype that is used to determine the
4332
   vectorization factor, because it consists of a different number of elements
4333
   than the actual number of elements that are being operated upon in parallel.
4334
 
4335
   For example, consider an accumulation of shorts into an int accumulator.
4336
   On some targets it's possible to vectorize this pattern operating on 8
4337
   shorts at a time (hence, the vectype for purposes of determining the
4338
   vectorization factor should be V8HI); on the other hand, the vectype that
4339
   is used to create the vector form is actually V4SI (the type of the result).
4340
 
4341
   Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
4342
   indicates what is the actual level of parallelism (V8HI in the example), so
4343
   that the right vectorization factor would be derived.  This vectype
4344
   corresponds to the type of arguments to the reduction stmt, and should *NOT*
4345
   be used to create the vectorized stmt.  The right vectype for the vectorized
4346
   stmt is obtained from the type of the result X:
4347
        get_vectype_for_scalar_type (TREE_TYPE (X))
4348
 
4349
   This means that, contrary to "regular" reductions (or "regular" stmts in
4350
   general), the following equation:
4351
      STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
4352
   does *NOT* necessarily hold for reduction patterns.  */
4353
 
4354
bool
4355
vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
4356
                        gimple *vec_stmt, slp_tree slp_node)
4357
{
4358
  tree vec_dest;
4359
  tree scalar_dest;
4360
  tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
4361
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4362
  tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4363
  tree vectype_in = NULL_TREE;
4364
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4365
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4366
  enum tree_code code, orig_code, epilog_reduc_code;
4367
  enum machine_mode vec_mode;
4368
  int op_type;
4369
  optab optab, reduc_optab;
4370
  tree new_temp = NULL_TREE;
4371
  tree def;
4372
  gimple def_stmt;
4373
  enum vect_def_type dt;
4374
  gimple new_phi = NULL;
4375
  tree scalar_type;
4376
  bool is_simple_use;
4377
  gimple orig_stmt;
4378
  stmt_vec_info orig_stmt_info;
4379
  tree expr = NULL_TREE;
4380
  int i;
4381
  int ncopies;
4382
  int epilog_copies;
4383
  stmt_vec_info prev_stmt_info, prev_phi_info;
4384
  bool single_defuse_cycle = false;
4385
  tree reduc_def = NULL_TREE;
4386
  gimple new_stmt = NULL;
4387
  int j;
4388
  tree ops[3];
4389
  bool nested_cycle = false, found_nested_cycle_def = false;
4390
  gimple reduc_def_stmt = NULL;
4391
  /* The default is that the reduction variable is the last in statement.  */
4392
  int reduc_index = 2;
4393
  bool double_reduc = false, dummy;
4394
  basic_block def_bb;
4395
  struct loop * def_stmt_loop, *outer_loop = NULL;
4396
  tree def_arg;
4397
  gimple def_arg_stmt;
4398
  VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vect_defs = NULL;
4399
  VEC (gimple, heap) *phis = NULL;
4400
  int vec_num;
4401
  tree def0, def1, tem, op0, op1 = NULL_TREE;
4402
 
4403
  /* In case of reduction chain we switch to the first stmt in the chain, but
4404
     we don't update STMT_INFO, since only the last stmt is marked as reduction
4405
     and has reduction properties.  */
4406
  if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4407
    stmt = GROUP_FIRST_ELEMENT (stmt_info);
4408
 
4409
  if (nested_in_vect_loop_p (loop, stmt))
4410
    {
4411
      outer_loop = loop;
4412
      loop = loop->inner;
4413
      nested_cycle = true;
4414
    }
4415
 
4416
  /* 1. Is vectorizable reduction?  */
4417
  /* Not supportable if the reduction variable is used in the loop, unless
4418
     it's a reduction chain.  */
4419
  if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer
4420
      && !GROUP_FIRST_ELEMENT (stmt_info))
4421
    return false;
4422
 
4423
  /* Reductions that are not used even in an enclosing outer-loop,
4424
     are expected to be "live" (used out of the loop).  */
4425
  if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope
4426
      && !STMT_VINFO_LIVE_P (stmt_info))
4427
    return false;
4428
 
4429
  /* Make sure it was already recognized as a reduction computation.  */
4430
  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
4431
      && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle)
4432
    return false;
4433
 
4434
  /* 2. Has this been recognized as a reduction pattern?
4435
 
4436
     Check if STMT represents a pattern that has been recognized
4437
     in earlier analysis stages.  For stmts that represent a pattern,
4438
     the STMT_VINFO_RELATED_STMT field records the last stmt in
4439
     the original sequence that constitutes the pattern.  */
4440
 
4441
  orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4442
  if (orig_stmt)
4443
    {
4444
      orig_stmt_info = vinfo_for_stmt (orig_stmt);
4445
      gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
4446
      gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
4447
      gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
4448
    }
4449
 
4450
  /* 3. Check the operands of the operation.  The first operands are defined
4451
        inside the loop body. The last operand is the reduction variable,
4452
        which is defined by the loop-header-phi.  */
4453
 
4454
  gcc_assert (is_gimple_assign (stmt));
4455
 
4456
  /* Flatten RHS.  */
4457
  switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
4458
    {
4459
    case GIMPLE_SINGLE_RHS:
4460
      op_type = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
4461
      if (op_type == ternary_op)
4462
        {
4463
          tree rhs = gimple_assign_rhs1 (stmt);
4464
          ops[0] = TREE_OPERAND (rhs, 0);
4465
          ops[1] = TREE_OPERAND (rhs, 1);
4466
          ops[2] = TREE_OPERAND (rhs, 2);
4467
          code = TREE_CODE (rhs);
4468
        }
4469
      else
4470
        return false;
4471
      break;
4472
 
4473
    case GIMPLE_BINARY_RHS:
4474
      code = gimple_assign_rhs_code (stmt);
4475
      op_type = TREE_CODE_LENGTH (code);
4476
      gcc_assert (op_type == binary_op);
4477
      ops[0] = gimple_assign_rhs1 (stmt);
4478
      ops[1] = gimple_assign_rhs2 (stmt);
4479
      break;
4480
 
4481
    case GIMPLE_TERNARY_RHS:
4482
      code = gimple_assign_rhs_code (stmt);
4483
      op_type = TREE_CODE_LENGTH (code);
4484
      gcc_assert (op_type == ternary_op);
4485
      ops[0] = gimple_assign_rhs1 (stmt);
4486
      ops[1] = gimple_assign_rhs2 (stmt);
4487
      ops[2] = gimple_assign_rhs3 (stmt);
4488
      break;
4489
 
4490
    case GIMPLE_UNARY_RHS:
4491
      return false;
4492
 
4493
    default:
4494
      gcc_unreachable ();
4495
    }
4496
 
4497
  if (code == COND_EXPR && slp_node)
4498
    return false;
4499
 
4500
  scalar_dest = gimple_assign_lhs (stmt);
4501
  scalar_type = TREE_TYPE (scalar_dest);
4502
  if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
4503
      && !SCALAR_FLOAT_TYPE_P (scalar_type))
4504
    return false;
4505
 
4506
  /* Do not try to vectorize bit-precision reductions.  */
4507
  if ((TYPE_PRECISION (scalar_type)
4508
       != GET_MODE_PRECISION (TYPE_MODE (scalar_type))))
4509
    return false;
4510
 
4511
  /* All uses but the last are expected to be defined in the loop.
4512
     The last use is the reduction variable.  In case of nested cycle this
4513
     assumption is not true: we use reduc_index to record the index of the
4514
     reduction variable.  */
4515
  for (i = 0; i < op_type-1; i++)
4516
    {
4517
      /* The condition of COND_EXPR is checked in vectorizable_condition().  */
4518
      if (i == 0 && code == COND_EXPR)
4519
        continue;
4520
 
4521
      is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
4522
                                            &def_stmt, &def, &dt, &tem);
4523
      if (!vectype_in)
4524
        vectype_in = tem;
4525
      gcc_assert (is_simple_use);
4526
 
4527
      if (dt != vect_internal_def
4528
          && dt != vect_external_def
4529
          && dt != vect_constant_def
4530
          && dt != vect_induction_def
4531
          && !(dt == vect_nested_cycle && nested_cycle))
4532
        return false;
4533
 
4534
      if (dt == vect_nested_cycle)
4535
        {
4536
          found_nested_cycle_def = true;
4537
          reduc_def_stmt = def_stmt;
4538
          reduc_index = i;
4539
        }
4540
    }
4541
 
4542
  is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
4543
                                        &def_stmt, &def, &dt, &tem);
4544
  if (!vectype_in)
4545
    vectype_in = tem;
4546
  gcc_assert (is_simple_use);
4547
  gcc_assert (dt == vect_reduction_def
4548
              || dt == vect_nested_cycle
4549
              || ((dt == vect_internal_def || dt == vect_external_def
4550
                   || dt == vect_constant_def || dt == vect_induction_def)
4551
                   && nested_cycle && found_nested_cycle_def));
4552
  if (!found_nested_cycle_def)
4553
    reduc_def_stmt = def_stmt;
4554
 
4555
  gcc_assert (gimple_code (reduc_def_stmt) == GIMPLE_PHI);
4556
  if (orig_stmt)
4557
    gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo,
4558
                                                       reduc_def_stmt,
4559
                                                       !nested_cycle,
4560
                                                       &dummy));
4561
  else
4562
    {
4563
      gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
4564
                                             !nested_cycle, &dummy);
4565
      /* We changed STMT to be the first stmt in reduction chain, hence we
4566
         check that in this case the first element in the chain is STMT.  */
4567
      gcc_assert (stmt == tmp
4568
                  || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
4569
    }
4570
 
4571
  if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
4572
    return false;
4573
 
4574
  if (slp_node || PURE_SLP_STMT (stmt_info))
4575
    ncopies = 1;
4576
  else
4577
    ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4578
               / TYPE_VECTOR_SUBPARTS (vectype_in));
4579
 
4580
  gcc_assert (ncopies >= 1);
4581
 
4582
  vec_mode = TYPE_MODE (vectype_in);
4583
 
4584
  if (code == COND_EXPR)
4585
    {
4586
      if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0, NULL))
4587
        {
4588
          if (vect_print_dump_info (REPORT_DETAILS))
4589
            fprintf (vect_dump, "unsupported condition in reduction");
4590
 
4591
            return false;
4592
        }
4593
    }
4594
  else
4595
    {
4596
      /* 4. Supportable by target?  */
4597
 
4598
      /* 4.1. check support for the operation in the loop  */
4599
      optab = optab_for_tree_code (code, vectype_in, optab_default);
4600
      if (!optab)
4601
        {
4602
          if (vect_print_dump_info (REPORT_DETAILS))
4603
            fprintf (vect_dump, "no optab.");
4604
 
4605
          return false;
4606
        }
4607
 
4608
      if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
4609
        {
4610
          if (vect_print_dump_info (REPORT_DETAILS))
4611
            fprintf (vect_dump, "op not supported by target.");
4612
 
4613
          if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4614
              || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4615
                  < vect_min_worthwhile_factor (code))
4616
            return false;
4617
 
4618
          if (vect_print_dump_info (REPORT_DETAILS))
4619
            fprintf (vect_dump, "proceeding using word mode.");
4620
        }
4621
 
4622
      /* Worthwhile without SIMD support?  */
4623
      if (!VECTOR_MODE_P (TYPE_MODE (vectype_in))
4624
          && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4625
             < vect_min_worthwhile_factor (code))
4626
        {
4627
          if (vect_print_dump_info (REPORT_DETAILS))
4628
            fprintf (vect_dump, "not worthwhile without SIMD support.");
4629
 
4630
          return false;
4631
        }
4632
    }
4633
 
4634
  /* 4.2. Check support for the epilog operation.
4635
 
4636
          If STMT represents a reduction pattern, then the type of the
4637
          reduction variable may be different than the type of the rest
4638
          of the arguments.  For example, consider the case of accumulation
4639
          of shorts into an int accumulator; The original code:
4640
                        S1: int_a = (int) short_a;
4641
          orig_stmt->   S2: int_acc = plus <int_a ,int_acc>;
4642
 
4643
          was replaced with:
4644
                        STMT: int_acc = widen_sum <short_a, int_acc>
4645
 
4646
          This means that:
4647
          1. The tree-code that is used to create the vector operation in the
4648
             epilog code (that reduces the partial results) is not the
4649
             tree-code of STMT, but is rather the tree-code of the original
4650
             stmt from the pattern that STMT is replacing.  I.e, in the example
4651
             above we want to use 'widen_sum' in the loop, but 'plus' in the
4652
             epilog.
4653
          2. The type (mode) we use to check available target support
4654
             for the vector operation to be created in the *epilog*, is
4655
             determined by the type of the reduction variable (in the example
4656
             above we'd check this: optab_handler (plus_optab, vect_int_mode])).
4657
             However the type (mode) we use to check available target support
4658
             for the vector operation to be created *inside the loop*, is
4659
             determined by the type of the other arguments to STMT (in the
4660
             example we'd check this: optab_handler (widen_sum_optab,
4661
             vect_short_mode)).
4662
 
4663
          This is contrary to "regular" reductions, in which the types of all
4664
          the arguments are the same as the type of the reduction variable.
4665
          For "regular" reductions we can therefore use the same vector type
4666
          (and also the same tree-code) when generating the epilog code and
4667
          when generating the code inside the loop.  */
4668
 
4669
  if (orig_stmt)
4670
    {
4671
      /* This is a reduction pattern: get the vectype from the type of the
4672
         reduction variable, and get the tree-code from orig_stmt.  */
4673
      orig_code = gimple_assign_rhs_code (orig_stmt);
4674
      gcc_assert (vectype_out);
4675
      vec_mode = TYPE_MODE (vectype_out);
4676
    }
4677
  else
4678
    {
4679
      /* Regular reduction: use the same vectype and tree-code as used for
4680
         the vector code inside the loop can be used for the epilog code. */
4681
      orig_code = code;
4682
    }
4683
 
4684
  if (nested_cycle)
4685
    {
4686
      def_bb = gimple_bb (reduc_def_stmt);
4687
      def_stmt_loop = def_bb->loop_father;
4688
      def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
4689
                                       loop_preheader_edge (def_stmt_loop));
4690
      if (TREE_CODE (def_arg) == SSA_NAME
4691
          && (def_arg_stmt = SSA_NAME_DEF_STMT (def_arg))
4692
          && gimple_code (def_arg_stmt) == GIMPLE_PHI
4693
          && flow_bb_inside_loop_p (outer_loop, gimple_bb (def_arg_stmt))
4694
          && vinfo_for_stmt (def_arg_stmt)
4695
          && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt))
4696
              == vect_double_reduction_def)
4697
        double_reduc = true;
4698
    }
4699
 
4700
  epilog_reduc_code = ERROR_MARK;
4701
  if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
4702
    {
4703
      reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out,
4704
                                         optab_default);
4705
      if (!reduc_optab)
4706
        {
4707
          if (vect_print_dump_info (REPORT_DETAILS))
4708
            fprintf (vect_dump, "no optab for reduction.");
4709
 
4710
          epilog_reduc_code = ERROR_MARK;
4711
        }
4712
 
4713
      if (reduc_optab
4714
          && optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
4715
        {
4716
          if (vect_print_dump_info (REPORT_DETAILS))
4717
            fprintf (vect_dump, "reduc op not supported by target.");
4718
 
4719
          epilog_reduc_code = ERROR_MARK;
4720
        }
4721
    }
4722
  else
4723
    {
4724
      if (!nested_cycle || double_reduc)
4725
        {
4726
          if (vect_print_dump_info (REPORT_DETAILS))
4727
            fprintf (vect_dump, "no reduc code for scalar code.");
4728
 
4729
          return false;
4730
        }
4731
    }
4732
 
4733
  if (double_reduc && ncopies > 1)
4734
    {
4735
      if (vect_print_dump_info (REPORT_DETAILS))
4736
        fprintf (vect_dump, "multiple types in double reduction");
4737
 
4738
      return false;
4739
    }
4740
 
4741
  /* In case of widenning multiplication by a constant, we update the type
4742
     of the constant to be the type of the other operand.  We check that the
4743
     constant fits the type in the pattern recognition pass.  */
4744
  if (code == DOT_PROD_EXPR
4745
      && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
4746
    {
4747
      if (TREE_CODE (ops[0]) == INTEGER_CST)
4748
        ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
4749
      else if (TREE_CODE (ops[1]) == INTEGER_CST)
4750
        ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
4751
      else
4752
        {
4753
          if (vect_print_dump_info (REPORT_DETAILS))
4754
            fprintf (vect_dump, "invalid types in dot-prod");
4755
 
4756
          return false;
4757
        }
4758
    }
4759
 
4760
  if (!vec_stmt) /* transformation not required.  */
4761
    {
4762
      if (!vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies))
4763
        return false;
4764
      STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
4765
      return true;
4766
    }
4767
 
4768
  /** Transform.  **/
4769
 
4770
  if (vect_print_dump_info (REPORT_DETAILS))
4771
    fprintf (vect_dump, "transform reduction.");
4772
 
4773
  /* FORNOW: Multiple types are not supported for condition.  */
4774
  if (code == COND_EXPR)
4775
    gcc_assert (ncopies == 1);
4776
 
4777
  /* Create the destination vector  */
4778
  vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
4779
 
4780
  /* In case the vectorization factor (VF) is bigger than the number
4781
     of elements that we can fit in a vectype (nunits), we have to generate
4782
     more than one vector stmt - i.e - we need to "unroll" the
4783
     vector stmt by a factor VF/nunits.  For more details see documentation
4784
     in vectorizable_operation.  */
4785
 
4786
  /* If the reduction is used in an outer loop we need to generate
4787
     VF intermediate results, like so (e.g. for ncopies=2):
4788
        r0 = phi (init, r0)
4789
        r1 = phi (init, r1)
4790
        r0 = x0 + r0;
4791
        r1 = x1 + r1;
4792
    (i.e. we generate VF results in 2 registers).
4793
    In this case we have a separate def-use cycle for each copy, and therefore
4794
    for each copy we get the vector def for the reduction variable from the
4795
    respective phi node created for this copy.
4796
 
4797
    Otherwise (the reduction is unused in the loop nest), we can combine
4798
    together intermediate results, like so (e.g. for ncopies=2):
4799
        r = phi (init, r)
4800
        r = x0 + r;
4801
        r = x1 + r;
4802
   (i.e. we generate VF/2 results in a single register).
4803
   In this case for each copy we get the vector def for the reduction variable
4804
   from the vectorized reduction operation generated in the previous iteration.
4805
  */
4806
 
4807
  if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope)
4808
    {
4809
      single_defuse_cycle = true;
4810
      epilog_copies = 1;
4811
    }
4812
  else
4813
    epilog_copies = ncopies;
4814
 
4815
  prev_stmt_info = NULL;
4816
  prev_phi_info = NULL;
4817
  if (slp_node)
4818
    {
4819
      vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4820
      gcc_assert (TYPE_VECTOR_SUBPARTS (vectype_out)
4821
                  == TYPE_VECTOR_SUBPARTS (vectype_in));
4822
    }
4823
  else
4824
    {
4825
      vec_num = 1;
4826
      vec_oprnds0 = VEC_alloc (tree, heap, 1);
4827
      if (op_type == ternary_op)
4828
        vec_oprnds1 = VEC_alloc (tree, heap, 1);
4829
    }
4830
 
4831
  phis = VEC_alloc (gimple, heap, vec_num);
4832
  vect_defs = VEC_alloc (tree, heap, vec_num);
4833
  if (!slp_node)
4834
    VEC_quick_push (tree, vect_defs, NULL_TREE);
4835
 
4836
  for (j = 0; j < ncopies; j++)
4837
    {
4838
      if (j == 0 || !single_defuse_cycle)
4839
        {
4840
          for (i = 0; i < vec_num; i++)
4841
            {
4842
              /* Create the reduction-phi that defines the reduction
4843
                 operand.  */
4844
              new_phi = create_phi_node (vec_dest, loop->header);
4845
              set_vinfo_for_stmt (new_phi,
4846
                                  new_stmt_vec_info (new_phi, loop_vinfo,
4847
                                                     NULL));
4848
               if (j == 0 || slp_node)
4849
                 VEC_quick_push (gimple, phis, new_phi);
4850
            }
4851
        }
4852
 
4853
      if (code == COND_EXPR)
4854
        {
4855
          gcc_assert (!slp_node);
4856
          vectorizable_condition (stmt, gsi, vec_stmt,
4857
                                  PHI_RESULT (VEC_index (gimple, phis, 0)),
4858
                                  reduc_index, NULL);
4859
          /* Multiple types are not supported for condition.  */
4860
          break;
4861
        }
4862
 
4863
      /* Handle uses.  */
4864
      if (j == 0)
4865
        {
4866
          op0 = ops[!reduc_index];
4867
          if (op_type == ternary_op)
4868
            {
4869
              if (reduc_index == 0)
4870
                op1 = ops[2];
4871
              else
4872
                op1 = ops[1];
4873
            }
4874
 
4875
          if (slp_node)
4876
            vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4877
                               slp_node, -1);
4878
          else
4879
            {
4880
              loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index],
4881
                                                            stmt, NULL);
4882
              VEC_quick_push (tree, vec_oprnds0, loop_vec_def0);
4883
              if (op_type == ternary_op)
4884
               {
4885
                 loop_vec_def1 = vect_get_vec_def_for_operand (op1, stmt,
4886
                                                               NULL);
4887
                 VEC_quick_push (tree, vec_oprnds1, loop_vec_def1);
4888
               }
4889
            }
4890
        }
4891
      else
4892
        {
4893
          if (!slp_node)
4894
            {
4895
              enum vect_def_type dt;
4896
              gimple dummy_stmt;
4897
              tree dummy;
4898
 
4899
              vect_is_simple_use (ops[!reduc_index], stmt, loop_vinfo, NULL,
4900
                                  &dummy_stmt, &dummy, &dt);
4901
              loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt,
4902
                                                              loop_vec_def0);
4903
              VEC_replace (tree, vec_oprnds0, 0, loop_vec_def0);
4904
              if (op_type == ternary_op)
4905
                {
4906
                  vect_is_simple_use (op1, stmt, loop_vinfo, NULL, &dummy_stmt,
4907
                                      &dummy, &dt);
4908
                  loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
4909
                                                                loop_vec_def1);
4910
                  VEC_replace (tree, vec_oprnds1, 0, loop_vec_def1);
4911
                }
4912
            }
4913
 
4914
          if (single_defuse_cycle)
4915
            reduc_def = gimple_assign_lhs (new_stmt);
4916
 
4917
          STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
4918
        }
4919
 
4920
      FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, def0)
4921
        {
4922
          if (slp_node)
4923
            reduc_def = PHI_RESULT (VEC_index (gimple, phis, i));
4924
          else
4925
            {
4926
              if (!single_defuse_cycle || j == 0)
4927
                reduc_def = PHI_RESULT (new_phi);
4928
            }
4929
 
4930
          def1 = ((op_type == ternary_op)
4931
                  ? VEC_index (tree, vec_oprnds1, i) : NULL);
4932
          if (op_type == binary_op)
4933
            {
4934
              if (reduc_index == 0)
4935
                expr = build2 (code, vectype_out, reduc_def, def0);
4936
              else
4937
                expr = build2 (code, vectype_out, def0, reduc_def);
4938
            }
4939
          else
4940
            {
4941
              if (reduc_index == 0)
4942
                expr = build3 (code, vectype_out, reduc_def, def0, def1);
4943
              else
4944
                {
4945
                  if (reduc_index == 1)
4946
                    expr = build3 (code, vectype_out, def0, reduc_def, def1);
4947
                  else
4948
                    expr = build3 (code, vectype_out, def0, def1, reduc_def);
4949
                }
4950
            }
4951
 
4952
          new_stmt = gimple_build_assign (vec_dest, expr);
4953
          new_temp = make_ssa_name (vec_dest, new_stmt);
4954
          gimple_assign_set_lhs (new_stmt, new_temp);
4955
          vect_finish_stmt_generation (stmt, new_stmt, gsi);
4956
 
4957
          if (slp_node)
4958
            {
4959
              VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
4960
              VEC_quick_push (tree, vect_defs, new_temp);
4961
            }
4962
          else
4963
            VEC_replace (tree, vect_defs, 0, new_temp);
4964
        }
4965
 
4966
      if (slp_node)
4967
        continue;
4968
 
4969
      if (j == 0)
4970
        STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4971
      else
4972
        STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4973
 
4974
      prev_stmt_info = vinfo_for_stmt (new_stmt);
4975
      prev_phi_info = vinfo_for_stmt (new_phi);
4976
    }
4977
 
4978
  /* Finalize the reduction-phi (set its arguments) and create the
4979
     epilog reduction code.  */
4980
  if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
4981
    {
4982
      new_temp = gimple_assign_lhs (*vec_stmt);
4983
      VEC_replace (tree, vect_defs, 0, new_temp);
4984
    }
4985
 
4986
  vect_create_epilog_for_reduction (vect_defs, stmt, epilog_copies,
4987
                                    epilog_reduc_code, phis, reduc_index,
4988
                                    double_reduc, slp_node);
4989
 
4990
  VEC_free (gimple, heap, phis);
4991
  VEC_free (tree, heap, vec_oprnds0);
4992
  if (vec_oprnds1)
4993
    VEC_free (tree, heap, vec_oprnds1);
4994
 
4995
  return true;
4996
}
4997
 
4998
/* Function vect_min_worthwhile_factor.
4999
 
5000
   For a loop where we could vectorize the operation indicated by CODE,
5001
   return the minimum vectorization factor that makes it worthwhile
5002
   to use generic vectors.  */
5003
int
5004
vect_min_worthwhile_factor (enum tree_code code)
5005
{
5006
  switch (code)
5007
    {
5008
    case PLUS_EXPR:
5009
    case MINUS_EXPR:
5010
    case NEGATE_EXPR:
5011
      return 4;
5012
 
5013
    case BIT_AND_EXPR:
5014
    case BIT_IOR_EXPR:
5015
    case BIT_XOR_EXPR:
5016
    case BIT_NOT_EXPR:
5017
      return 2;
5018
 
5019
    default:
5020
      return INT_MAX;
5021
    }
5022
}
5023
 
5024
 
5025
/* Function vectorizable_induction
5026
 
5027
   Check if PHI performs an induction computation that can be vectorized.
5028
   If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
5029
   phi to replace it, put it in VEC_STMT, and add it to the same basic block.
5030
   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
5031
 
5032
bool
5033
vectorizable_induction (gimple phi, gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
5034
                        gimple *vec_stmt)
5035
{
5036
  stmt_vec_info stmt_info = vinfo_for_stmt (phi);
5037
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5038
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5039
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5040
  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5041
  int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5042
  tree vec_def;
5043
 
5044
  gcc_assert (ncopies >= 1);
5045
  /* FORNOW. This restriction should be relaxed.  */
5046
  if (nested_in_vect_loop_p (loop, phi) && ncopies > 1)
5047
    {
5048
      if (vect_print_dump_info (REPORT_DETAILS))
5049
        fprintf (vect_dump, "multiple types in nested loop.");
5050
      return false;
5051
    }
5052
 
5053
  if (!STMT_VINFO_RELEVANT_P (stmt_info))
5054
    return false;
5055
 
5056
  /* FORNOW: SLP not supported.  */
5057
  if (STMT_SLP_TYPE (stmt_info))
5058
    return false;
5059
 
5060
  gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
5061
 
5062
  if (gimple_code (phi) != GIMPLE_PHI)
5063
    return false;
5064
 
5065
  if (!vec_stmt) /* transformation not required.  */
5066
    {
5067
      STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
5068
      if (vect_print_dump_info (REPORT_DETAILS))
5069
        fprintf (vect_dump, "=== vectorizable_induction ===");
5070
      vect_model_induction_cost (stmt_info, ncopies);
5071
      return true;
5072
    }
5073
 
5074
  /** Transform.  **/
5075
 
5076
  if (vect_print_dump_info (REPORT_DETAILS))
5077
    fprintf (vect_dump, "transform induction phi.");
5078
 
5079
  vec_def = get_initial_def_for_induction (phi);
5080
  *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
5081
  return true;
5082
}
5083
 
5084
/* Function vectorizable_live_operation.
5085
 
5086
   STMT computes a value that is used outside the loop.  Check if
5087
   it can be supported.  */
5088
 
5089
bool
5090
vectorizable_live_operation (gimple stmt,
5091
                             gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
5092
                             gimple *vec_stmt ATTRIBUTE_UNUSED)
5093
{
5094
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5095
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5096
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5097
  int i;
5098
  int op_type;
5099
  tree op;
5100
  tree def;
5101
  gimple def_stmt;
5102
  enum vect_def_type dt;
5103
  enum tree_code code;
5104
  enum gimple_rhs_class rhs_class;
5105
 
5106
  gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
5107
 
5108
  if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
5109
    return false;
5110
 
5111
  if (!is_gimple_assign (stmt))
5112
    return false;
5113
 
5114
  if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5115
    return false;
5116
 
5117
  /* FORNOW. CHECKME. */
5118
  if (nested_in_vect_loop_p (loop, stmt))
5119
    return false;
5120
 
5121
  code = gimple_assign_rhs_code (stmt);
5122
  op_type = TREE_CODE_LENGTH (code);
5123
  rhs_class = get_gimple_rhs_class (code);
5124
  gcc_assert (rhs_class != GIMPLE_UNARY_RHS || op_type == unary_op);
5125
  gcc_assert (rhs_class != GIMPLE_BINARY_RHS || op_type == binary_op);
5126
 
5127
  /* FORNOW: support only if all uses are invariant.  This means
5128
     that the scalar operations can remain in place, unvectorized.
5129
     The original last scalar value that they compute will be used.  */
5130
 
5131
  for (i = 0; i < op_type; i++)
5132
    {
5133
      if (rhs_class == GIMPLE_SINGLE_RHS)
5134
        op = TREE_OPERAND (gimple_op (stmt, 1), i);
5135
      else
5136
        op = gimple_op (stmt, i + 1);
5137
      if (op
5138
          && !vect_is_simple_use (op, stmt, loop_vinfo, NULL, &def_stmt, &def,
5139
                                  &dt))
5140
        {
5141
          if (vect_print_dump_info (REPORT_DETAILS))
5142
            fprintf (vect_dump, "use not simple.");
5143
          return false;
5144
        }
5145
 
5146
      if (dt != vect_external_def && dt != vect_constant_def)
5147
        return false;
5148
    }
5149
 
5150
  /* No transformation is required for the cases we currently support.  */
5151
  return true;
5152
}
5153
 
5154
/* Kill any debug uses outside LOOP of SSA names defined in STMT.  */
5155
 
5156
static void
5157
vect_loop_kill_debug_uses (struct loop *loop, gimple stmt)
5158
{
5159
  ssa_op_iter op_iter;
5160
  imm_use_iterator imm_iter;
5161
  def_operand_p def_p;
5162
  gimple ustmt;
5163
 
5164
  FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
5165
    {
5166
      FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p))
5167
        {
5168
          basic_block bb;
5169
 
5170
          if (!is_gimple_debug (ustmt))
5171
            continue;
5172
 
5173
          bb = gimple_bb (ustmt);
5174
 
5175
          if (!flow_bb_inside_loop_p (loop, bb))
5176
            {
5177
              if (gimple_debug_bind_p (ustmt))
5178
                {
5179
                  if (vect_print_dump_info (REPORT_DETAILS))
5180
                    fprintf (vect_dump, "killing debug use");
5181
 
5182
                  gimple_debug_bind_reset_value (ustmt);
5183
                  update_stmt (ustmt);
5184
                }
5185
              else
5186
                gcc_unreachable ();
5187
            }
5188
        }
5189
    }
5190
}
5191
 
5192
/* Function vect_transform_loop.
5193
 
5194
   The analysis phase has determined that the loop is vectorizable.
5195
   Vectorize the loop - created vectorized stmts to replace the scalar
5196
   stmts in the loop, and update the loop exit condition.  */
5197
 
5198
void
5199
vect_transform_loop (loop_vec_info loop_vinfo)
5200
{
5201
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5202
  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5203
  int nbbs = loop->num_nodes;
5204
  gimple_stmt_iterator si;
5205
  int i;
5206
  tree ratio = NULL;
5207
  int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5208
  bool strided_store;
5209
  bool slp_scheduled = false;
5210
  unsigned int nunits;
5211
  tree cond_expr = NULL_TREE;
5212
  gimple_seq cond_expr_stmt_list = NULL;
5213
  bool do_peeling_for_loop_bound;
5214
  gimple stmt, pattern_stmt;
5215
  gimple_seq pattern_def_seq = NULL;
5216
  gimple_stmt_iterator pattern_def_si = gsi_start (NULL);
5217
  bool transform_pattern_stmt = false;
5218
 
5219
  if (vect_print_dump_info (REPORT_DETAILS))
5220
    fprintf (vect_dump, "=== vec_transform_loop ===");
5221
 
5222
  /* Peel the loop if there are data refs with unknown alignment.
5223
     Only one data ref with unknown store is allowed.  */
5224
 
5225
  if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
5226
    vect_do_peeling_for_alignment (loop_vinfo);
5227
 
5228
  do_peeling_for_loop_bound
5229
    = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5230
       || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5231
           && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
5232
       || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
5233
 
5234
  if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
5235
      || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
5236
    vect_loop_versioning (loop_vinfo,
5237
                          !do_peeling_for_loop_bound,
5238
                          &cond_expr, &cond_expr_stmt_list);
5239
 
5240
  /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
5241
     compile time constant), or it is a constant that doesn't divide by the
5242
     vectorization factor, then an epilog loop needs to be created.
5243
     We therefore duplicate the loop: the original loop will be vectorized,
5244
     and will compute the first (n/VF) iterations.  The second copy of the loop
5245
     will remain scalar and will compute the remaining (n%VF) iterations.
5246
     (VF is the vectorization factor).  */
5247
 
5248
  if (do_peeling_for_loop_bound)
5249
    vect_do_peeling_for_loop_bound (loop_vinfo, &ratio,
5250
                                    cond_expr, cond_expr_stmt_list);
5251
  else
5252
    ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
5253
                LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
5254
 
5255
  /* 1) Make sure the loop header has exactly two entries
5256
     2) Make sure we have a preheader basic block.  */
5257
 
5258
  gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
5259
 
5260
  split_edge (loop_preheader_edge (loop));
5261
 
5262
  /* FORNOW: the vectorizer supports only loops which body consist
5263
     of one basic block (header + empty latch). When the vectorizer will
5264
     support more involved loop forms, the order by which the BBs are
5265
     traversed need to be reconsidered.  */
5266
 
5267
  for (i = 0; i < nbbs; i++)
5268
    {
5269
      basic_block bb = bbs[i];
5270
      stmt_vec_info stmt_info;
5271
      gimple phi;
5272
 
5273
      for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
5274
        {
5275
          phi = gsi_stmt (si);
5276
          if (vect_print_dump_info (REPORT_DETAILS))
5277
            {
5278
              fprintf (vect_dump, "------>vectorizing phi: ");
5279
              print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
5280
            }
5281
          stmt_info = vinfo_for_stmt (phi);
5282
          if (!stmt_info)
5283
            continue;
5284
 
5285
          if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
5286
            vect_loop_kill_debug_uses (loop, phi);
5287
 
5288
          if (!STMT_VINFO_RELEVANT_P (stmt_info)
5289
              && !STMT_VINFO_LIVE_P (stmt_info))
5290
            continue;
5291
 
5292
          if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
5293
                != (unsigned HOST_WIDE_INT) vectorization_factor)
5294
              && vect_print_dump_info (REPORT_DETAILS))
5295
            fprintf (vect_dump, "multiple-types.");
5296
 
5297
          if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
5298
            {
5299
              if (vect_print_dump_info (REPORT_DETAILS))
5300
                fprintf (vect_dump, "transform phi.");
5301
              vect_transform_stmt (phi, NULL, NULL, NULL, NULL);
5302
            }
5303
        }
5304
 
5305
      pattern_stmt = NULL;
5306
      for (si = gsi_start_bb (bb); !gsi_end_p (si) || transform_pattern_stmt;)
5307
        {
5308
          bool is_store;
5309
 
5310
          if (transform_pattern_stmt)
5311
            stmt = pattern_stmt;
5312
          else
5313
            stmt = gsi_stmt (si);
5314
 
5315
          if (vect_print_dump_info (REPORT_DETAILS))
5316
            {
5317
              fprintf (vect_dump, "------>vectorizing statement: ");
5318
              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5319
            }
5320
 
5321
          stmt_info = vinfo_for_stmt (stmt);
5322
 
5323
          /* vector stmts created in the outer-loop during vectorization of
5324
             stmts in an inner-loop may not have a stmt_info, and do not
5325
             need to be vectorized.  */
5326
          if (!stmt_info)
5327
            {
5328
              gsi_next (&si);
5329
              continue;
5330
            }
5331
 
5332
          if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
5333
            vect_loop_kill_debug_uses (loop, stmt);
5334
 
5335
          if (!STMT_VINFO_RELEVANT_P (stmt_info)
5336
              && !STMT_VINFO_LIVE_P (stmt_info))
5337
            {
5338
              if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5339
                  && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
5340
                  && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5341
                      || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5342
                {
5343
                  stmt = pattern_stmt;
5344
                  stmt_info = vinfo_for_stmt (stmt);
5345
                }
5346
              else
5347
                {
5348
                  gsi_next (&si);
5349
                  continue;
5350
                }
5351
            }
5352
          else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5353
                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
5354
                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5355
                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5356
            transform_pattern_stmt = true;
5357
 
5358
          /* If pattern statement has def stmts, vectorize them too.  */
5359
          if (is_pattern_stmt_p (stmt_info))
5360
            {
5361
              if (pattern_def_seq == NULL)
5362
                {
5363
                  pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
5364
                  pattern_def_si = gsi_start (pattern_def_seq);
5365
                }
5366
              else if (!gsi_end_p (pattern_def_si))
5367
                gsi_next (&pattern_def_si);
5368
              if (pattern_def_seq != NULL)
5369
                {
5370
                  gimple pattern_def_stmt = NULL;
5371
                  stmt_vec_info pattern_def_stmt_info = NULL;
5372
 
5373
                  while (!gsi_end_p (pattern_def_si))
5374
                    {
5375
                      pattern_def_stmt = gsi_stmt (pattern_def_si);
5376
                      pattern_def_stmt_info
5377
                        = vinfo_for_stmt (pattern_def_stmt);
5378
                      if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
5379
                          || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
5380
                        break;
5381
                      gsi_next (&pattern_def_si);
5382
                    }
5383
 
5384
                  if (!gsi_end_p (pattern_def_si))
5385
                    {
5386
                      if (vect_print_dump_info (REPORT_DETAILS))
5387
                        {
5388
                          fprintf (vect_dump, "==> vectorizing pattern def"
5389
                                              " stmt: ");
5390
                          print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
5391
                                             TDF_SLIM);
5392
                        }
5393
 
5394
                      stmt = pattern_def_stmt;
5395
                      stmt_info = pattern_def_stmt_info;
5396
                    }
5397
                  else
5398
                    {
5399
                      pattern_def_si = gsi_start (NULL);
5400
                      transform_pattern_stmt = false;
5401
                    }
5402
                }
5403
              else
5404
                transform_pattern_stmt = false;
5405
            }
5406
 
5407
          gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5408
          nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (
5409
                                               STMT_VINFO_VECTYPE (stmt_info));
5410
          if (!STMT_SLP_TYPE (stmt_info)
5411
              && nunits != (unsigned int) vectorization_factor
5412
              && vect_print_dump_info (REPORT_DETAILS))
5413
            /* For SLP VF is set according to unrolling factor, and not to
5414
               vector size, hence for SLP this print is not valid.  */
5415
            fprintf (vect_dump, "multiple-types.");
5416
 
5417
          /* SLP. Schedule all the SLP instances when the first SLP stmt is
5418
             reached.  */
5419
          if (STMT_SLP_TYPE (stmt_info))
5420
            {
5421
              if (!slp_scheduled)
5422
                {
5423
                  slp_scheduled = true;
5424
 
5425
                  if (vect_print_dump_info (REPORT_DETAILS))
5426
                    fprintf (vect_dump, "=== scheduling SLP instances ===");
5427
 
5428
                  vect_schedule_slp (loop_vinfo, NULL);
5429
                }
5430
 
5431
              /* Hybrid SLP stmts must be vectorized in addition to SLP.  */
5432
              if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
5433
                {
5434
                  if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
5435
                    {
5436
                      pattern_def_seq = NULL;
5437
                      gsi_next (&si);
5438
                    }
5439
                  continue;
5440
                }
5441
            }
5442
 
5443
          /* -------- vectorize statement ------------ */
5444
          if (vect_print_dump_info (REPORT_DETAILS))
5445
            fprintf (vect_dump, "transform statement.");
5446
 
5447
          strided_store = false;
5448
          is_store = vect_transform_stmt (stmt, &si, &strided_store, NULL, NULL);
5449
          if (is_store)
5450
            {
5451
              if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
5452
                {
5453
                  /* Interleaving. If IS_STORE is TRUE, the vectorization of the
5454
                     interleaving chain was completed - free all the stores in
5455
                     the chain.  */
5456
                  gsi_next (&si);
5457
                  vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info));
5458
                  continue;
5459
                }
5460
              else
5461
                {
5462
                  /* Free the attached stmt_vec_info and remove the stmt.  */
5463
                  free_stmt_vec_info (gsi_stmt (si));
5464
                  gsi_remove (&si, true);
5465
                  continue;
5466
                }
5467
            }
5468
 
5469
          if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
5470
            {
5471
              pattern_def_seq = NULL;
5472
              gsi_next (&si);
5473
            }
5474
        }                       /* stmts in BB */
5475
    }                           /* BBs in loop */
5476
 
5477
  slpeel_make_loop_iterate_ntimes (loop, ratio);
5478
 
5479
  /* The memory tags and pointers in vectorized statements need to
5480
     have their SSA forms updated.  FIXME, why can't this be delayed
5481
     until all the loops have been transformed?  */
5482
  update_ssa (TODO_update_ssa);
5483
 
5484
  if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
5485
    fprintf (vect_dump, "LOOP VECTORIZED.");
5486
  if (loop->inner && vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
5487
    fprintf (vect_dump, "OUTER LOOP VECTORIZED.");
5488
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.