OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [tags/] [gnu-dev/] [fsf-gcc-snapshot-1-mar-12/] [or1k-gcc/] [gcc/] [tree-vect-slp.c] - Blame information for rev 847

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 684 jeremybenn
/* SLP - Basic Block Vectorization
2
   Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012
3
   Free Software Foundation, Inc.
4
   Contributed by Dorit Naishlos <dorit@il.ibm.com>
5
   and Ira Rosen <irar@il.ibm.com>
6
 
7
This file is part of GCC.
8
 
9
GCC is free software; you can redistribute it and/or modify it under
10
the terms of the GNU General Public License as published by the Free
11
Software Foundation; either version 3, or (at your option) any later
12
version.
13
 
14
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15
WARRANTY; without even the implied warranty of MERCHANTABILITY or
16
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17
for more details.
18
 
19
You should have received a copy of the GNU General Public License
20
along with GCC; see the file COPYING3.  If not see
21
<http://www.gnu.org/licenses/>.  */
22
 
23
#include "config.h"
24
#include "system.h"
25
#include "coretypes.h"
26
#include "tm.h"
27
#include "ggc.h"
28
#include "tree.h"
29
#include "target.h"
30
#include "basic-block.h"
31
#include "tree-pretty-print.h"
32
#include "gimple-pretty-print.h"
33
#include "tree-flow.h"
34
#include "tree-dump.h"
35
#include "cfgloop.h"
36
#include "cfglayout.h"
37
#include "expr.h"
38
#include "recog.h"
39
#include "optabs.h"
40
#include "tree-vectorizer.h"
41
#include "langhooks.h"
42
 
43
/* Extract the location of the basic block in the source code.
44
   Return the basic block location if succeed and NULL if not.  */
45
 
46
LOC
47
find_bb_location (basic_block bb)
48
{
49
  gimple stmt = NULL;
50
  gimple_stmt_iterator si;
51
 
52
  if (!bb)
53
    return UNKNOWN_LOC;
54
 
55
  for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
56
    {
57
      stmt = gsi_stmt (si);
58
      if (gimple_location (stmt) != UNKNOWN_LOC)
59
        return gimple_location (stmt);
60
    }
61
 
62
  return UNKNOWN_LOC;
63
}
64
 
65
 
66
/* Recursively free the memory allocated for the SLP tree rooted at NODE.  */
67
 
68
static void
69
vect_free_slp_tree (slp_tree node)
70
{
71
  int i;
72
  slp_void_p child;
73
 
74
  if (!node)
75
    return;
76
 
77
  FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
78
    vect_free_slp_tree ((slp_tree) child);
79
 
80
  VEC_free (slp_void_p, heap, SLP_TREE_CHILDREN (node));
81
  VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
82
 
83
  if (SLP_TREE_VEC_STMTS (node))
84
    VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
85
 
86
  free (node);
87
}
88
 
89
 
90
/* Free the memory allocated for the SLP instance.  */
91
 
92
void
93
vect_free_slp_instance (slp_instance instance)
94
{
95
  vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
96
  VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
97
  VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
98
}
99
 
100
 
101
/* Create an SLP node for SCALAR_STMTS.  */
102
 
103
static slp_tree
104
vect_create_new_slp_node (VEC (gimple, heap) *scalar_stmts)
105
{
106
  slp_tree node;
107
  gimple stmt = VEC_index (gimple, scalar_stmts, 0);
108
  unsigned int nops;
109
 
110
  if (is_gimple_call (stmt))
111
    nops = gimple_call_num_args (stmt);
112
  else if (is_gimple_assign (stmt))
113
    {
114
      nops = gimple_num_ops (stmt) - 1;
115
      if (gimple_assign_rhs_code (stmt) == COND_EXPR)
116
        nops++;
117
    }
118
  else
119
    return NULL;
120
 
121
  node = XNEW (struct _slp_tree);
122
  SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
123
  SLP_TREE_VEC_STMTS (node) = NULL;
124
  SLP_TREE_CHILDREN (node) = VEC_alloc (slp_void_p, heap, nops);
125
  SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
126
  SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
127
 
128
  return node;
129
}
130
 
131
 
132
/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
133
   operand.  */
134
static VEC (slp_oprnd_info, heap) *
135
vect_create_oprnd_info (int nops, int group_size)
136
{
137
  int i;
138
  slp_oprnd_info oprnd_info;
139
  VEC (slp_oprnd_info, heap) *oprnds_info;
140
 
141
  oprnds_info = VEC_alloc (slp_oprnd_info, heap, nops);
142
  for (i = 0; i < nops; i++)
143
    {
144
      oprnd_info = XNEW (struct _slp_oprnd_info);
145
      oprnd_info->def_stmts = VEC_alloc (gimple, heap, group_size);
146
      oprnd_info->first_dt = vect_uninitialized_def;
147
      oprnd_info->first_def_type = NULL_TREE;
148
      oprnd_info->first_const_oprnd = NULL_TREE;
149
      oprnd_info->first_pattern = false;
150
      VEC_quick_push (slp_oprnd_info, oprnds_info, oprnd_info);
151
    }
152
 
153
  return oprnds_info;
154
}
155
 
156
 
157
/* Free operands info.  */
158
 
159
static void
160
vect_free_oprnd_info (VEC (slp_oprnd_info, heap) **oprnds_info)
161
{
162
  int i;
163
  slp_oprnd_info oprnd_info;
164
 
165
  FOR_EACH_VEC_ELT (slp_oprnd_info, *oprnds_info, i, oprnd_info)
166
    {
167
      VEC_free (gimple, heap, oprnd_info->def_stmts);
168
      XDELETE (oprnd_info);
169
    }
170
 
171
  VEC_free (slp_oprnd_info, heap, *oprnds_info);
172
}
173
 
174
 
175
/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
176
   they are of a valid type and that they match the defs of the first stmt of
177
   the SLP group (stored in OPRNDS_INFO).  */
178
 
179
static bool
180
vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
181
                             slp_tree slp_node, gimple stmt,
182
                             int ncopies_for_cost, bool first,
183
                             VEC (slp_oprnd_info, heap) **oprnds_info)
184
{
185
  tree oprnd;
186
  unsigned int i, number_of_oprnds;
187
  tree def, def_op0 = NULL_TREE;
188
  gimple def_stmt;
189
  enum vect_def_type dt = vect_uninitialized_def;
190
  enum vect_def_type dt_op0 = vect_uninitialized_def;
191
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
192
  tree lhs = gimple_get_lhs (stmt);
193
  struct loop *loop = NULL;
194
  enum tree_code rhs_code;
195
  bool different_types = false;
196
  bool pattern = false;
197
  slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info;
198
  int op_idx = 1;
199
  tree compare_rhs = NULL_TREE;
200
 
201
  if (loop_vinfo)
202
    loop = LOOP_VINFO_LOOP (loop_vinfo);
203
 
204
  if (is_gimple_call (stmt))
205
    {
206
      number_of_oprnds = gimple_call_num_args (stmt);
207
      op_idx = 3;
208
    }
209
  else if (is_gimple_assign (stmt))
210
    {
211
      number_of_oprnds = gimple_num_ops (stmt) - 1;
212
      if (gimple_assign_rhs_code (stmt) == COND_EXPR)
213
        number_of_oprnds++;
214
    }
215
  else
216
    return false;
217
 
218
  for (i = 0; i < number_of_oprnds; i++)
219
    {
220
      if (compare_rhs)
221
        {
222
          oprnd = compare_rhs;
223
          compare_rhs = NULL_TREE;
224
        }
225
      else
226
        oprnd = gimple_op (stmt, op_idx++);
227
 
228
      oprnd_info = VEC_index (slp_oprnd_info, *oprnds_info, i);
229
 
230
      if (COMPARISON_CLASS_P (oprnd))
231
        {
232
          compare_rhs = TREE_OPERAND (oprnd, 1);
233
          oprnd = TREE_OPERAND (oprnd, 0);
234
        }
235
 
236
      if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
237
                               &def, &dt)
238
          || (!def_stmt && dt != vect_constant_def))
239
        {
240
          if (vect_print_dump_info (REPORT_SLP))
241
            {
242
              fprintf (vect_dump, "Build SLP failed: can't find def for ");
243
              print_generic_expr (vect_dump, oprnd, TDF_SLIM);
244
            }
245
 
246
          return false;
247
        }
248
 
249
      /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
250
         from the pattern.  Check that all the stmts of the node are in the
251
         pattern.  */
252
      if (loop && def_stmt && gimple_bb (def_stmt)
253
          && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
254
          && vinfo_for_stmt (def_stmt)
255
          && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
256
          && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
257
          && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
258
        {
259
          pattern = true;
260
          if (!first && !oprnd_info->first_pattern)
261
            {
262
              if (vect_print_dump_info (REPORT_DETAILS))
263
                {
264
                  fprintf (vect_dump, "Build SLP failed: some of the stmts"
265
                                " are in a pattern, and others are not ");
266
                  print_generic_expr (vect_dump, oprnd, TDF_SLIM);
267
                }
268
 
269
              return false;
270
            }
271
 
272
          def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
273
          dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
274
 
275
          if (dt == vect_unknown_def_type)
276
            {
277
              if (vect_print_dump_info (REPORT_DETAILS))
278
                fprintf (vect_dump, "Unsupported pattern.");
279
              return false;
280
            }
281
 
282
          switch (gimple_code (def_stmt))
283
            {
284
              case GIMPLE_PHI:
285
                def = gimple_phi_result (def_stmt);
286
                break;
287
 
288
              case GIMPLE_ASSIGN:
289
                def = gimple_assign_lhs (def_stmt);
290
                break;
291
 
292
              default:
293
                if (vect_print_dump_info (REPORT_DETAILS))
294
                  fprintf (vect_dump, "unsupported defining stmt: ");
295
                return false;
296
            }
297
        }
298
 
299
      if (first)
300
        {
301
          oprnd_info->first_dt = dt;
302
          oprnd_info->first_pattern = pattern;
303
          if (def)
304
            {
305
              oprnd_info->first_def_type = TREE_TYPE (def);
306
              oprnd_info->first_const_oprnd = NULL_TREE;
307
            }
308
          else
309
            {
310
              oprnd_info->first_def_type = NULL_TREE;
311
              oprnd_info->first_const_oprnd = oprnd;
312
            }
313
 
314
          if (i == 0)
315
            {
316
              def_op0 = def;
317
              dt_op0 = dt;
318
              /* Analyze costs (for the first stmt of the group only).  */
319
              if (REFERENCE_CLASS_P (lhs))
320
                /* Store.  */
321
                vect_model_store_cost (stmt_info, ncopies_for_cost, false,
322
                                        dt, slp_node);
323
              else
324
                {
325
                  enum vect_def_type dts[2];
326
                  dts[0] = dt;
327
                  dts[1] = vect_uninitialized_def;
328
                  /* Not memory operation (we don't call this function for
329
                     loads).  */
330
                  vect_model_simple_cost (stmt_info, ncopies_for_cost, dts,
331
                                          slp_node);
332
                }
333
            }
334
        }
335
      else
336
        {
337
          /* Not first stmt of the group, check that the def-stmt/s match
338
             the def-stmt/s of the first stmt.  Allow different definition
339
             types for reduction chains: the first stmt must be a
340
             vect_reduction_def (a phi node), and the rest
341
             vect_internal_def.  */
342
          if (((oprnd_info->first_dt != dt
343
                && !(oprnd_info->first_dt == vect_reduction_def
344
                     && dt == vect_internal_def))
345
               || (oprnd_info->first_def_type != NULL_TREE
346
                   && def
347
                   && !types_compatible_p (oprnd_info->first_def_type,
348
                                           TREE_TYPE (def))))
349
               || (!def
350
                   && !types_compatible_p (TREE_TYPE (oprnd_info->first_const_oprnd),
351
                                           TREE_TYPE (oprnd)))
352
               || different_types)
353
            {
354
              if (number_of_oprnds != 2)
355
                {
356
                  if (vect_print_dump_info (REPORT_SLP))
357
                    fprintf (vect_dump, "Build SLP failed: different types ");
358
 
359
                  return false;
360
                }
361
 
362
              /* Try to swap operands in case of binary operation.  */
363
              if (i == 0)
364
                different_types = true;
365
              else
366
                {
367
                  oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
368
                  if (is_gimple_assign (stmt)
369
                      && (rhs_code = gimple_assign_rhs_code (stmt))
370
                      && TREE_CODE_CLASS (rhs_code) == tcc_binary
371
                      && commutative_tree_code (rhs_code)
372
                      && oprnd0_info->first_dt == dt
373
                      && oprnd_info->first_dt == dt_op0
374
                      && def_op0 && def
375
                      && !(oprnd0_info->first_def_type
376
                           && !types_compatible_p (oprnd0_info->first_def_type,
377
                                                   TREE_TYPE (def)))
378
                      && !(oprnd_info->first_def_type
379
                           && !types_compatible_p (oprnd_info->first_def_type,
380
                                                   TREE_TYPE (def_op0))))
381
                    {
382
                      if (vect_print_dump_info (REPORT_SLP))
383
                        {
384
                          fprintf (vect_dump, "Swapping operands of ");
385
                          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
386
                        }
387
 
388
                      swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
389
                                          gimple_assign_rhs2_ptr (stmt));
390
                    }
391
                  else
392
                    {
393
                      if (vect_print_dump_info (REPORT_SLP))
394
                        fprintf (vect_dump, "Build SLP failed: different types ");
395
 
396
                      return false;
397
                    }
398
                }
399
            }
400
        }
401
 
402
      /* Check the types of the definitions.  */
403
      switch (dt)
404
        {
405
        case vect_constant_def:
406
        case vect_external_def:
407
        case vect_reduction_def:
408
          break;
409
 
410
        case vect_internal_def:
411
          if (different_types)
412
            {
413
              oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
414
              oprnd1_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
415
              if (i == 0)
416
                VEC_quick_push (gimple, oprnd1_info->def_stmts, def_stmt);
417
              else
418
                VEC_quick_push (gimple, oprnd0_info->def_stmts, def_stmt);
419
            }
420
          else
421
            VEC_quick_push (gimple, oprnd_info->def_stmts, def_stmt);
422
 
423
          break;
424
 
425
        default:
426
          /* FORNOW: Not supported.  */
427
          if (vect_print_dump_info (REPORT_SLP))
428
            {
429
              fprintf (vect_dump, "Build SLP failed: illegal type of def ");
430
              print_generic_expr (vect_dump, def, TDF_SLIM);
431
            }
432
 
433
          return false;
434
        }
435
    }
436
 
437
  return true;
438
}
439
 
440
 
441
/* Recursively build an SLP tree starting from NODE.
442
   Fail (and return FALSE) if def-stmts are not isomorphic, require data
443
   permutation or are of unsupported types of operation.  Otherwise, return
444
   TRUE.  */
445
 
446
static bool
447
vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
448
                     slp_tree *node, unsigned int group_size,
449
                     int *inside_cost, int *outside_cost,
450
                     int ncopies_for_cost, unsigned int *max_nunits,
451
                     VEC (int, heap) **load_permutation,
452
                     VEC (slp_tree, heap) **loads,
453
                     unsigned int vectorization_factor, bool *loads_permuted)
454
{
455
  unsigned int i;
456
  VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
457
  gimple stmt = VEC_index (gimple, stmts, 0);
458
  enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
459
  enum tree_code first_cond_code = ERROR_MARK;
460
  tree lhs;
461
  bool stop_recursion = false, need_same_oprnds = false;
462
  tree vectype, scalar_type, first_op1 = NULL_TREE;
463
  unsigned int ncopies;
464
  optab optab;
465
  int icode;
466
  enum machine_mode optab_op2_mode;
467
  enum machine_mode vec_mode;
468
  struct data_reference *first_dr;
469
  HOST_WIDE_INT dummy;
470
  bool permutation = false;
471
  unsigned int load_place;
472
  gimple first_load, prev_first_load = NULL;
473
  VEC (slp_oprnd_info, heap) *oprnds_info;
474
  unsigned int nops;
475
  slp_oprnd_info oprnd_info;
476
  tree cond;
477
 
478
  if (is_gimple_call (stmt))
479
    nops = gimple_call_num_args (stmt);
480
  else if (is_gimple_assign (stmt))
481
    {
482
      nops = gimple_num_ops (stmt) - 1;
483
      if (gimple_assign_rhs_code (stmt) == COND_EXPR)
484
        nops++;
485
    }
486
  else
487
    return false;
488
 
489
  oprnds_info = vect_create_oprnd_info (nops, group_size);
490
 
491
  /* For every stmt in NODE find its def stmt/s.  */
492
  FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
493
    {
494
      if (vect_print_dump_info (REPORT_SLP))
495
        {
496
          fprintf (vect_dump, "Build SLP for ");
497
          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
498
        }
499
 
500
      /* Fail to vectorize statements marked as unvectorizable.  */
501
      if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
502
        {
503
          if (vect_print_dump_info (REPORT_SLP))
504
            {
505
              fprintf (vect_dump,
506
                       "Build SLP failed: unvectorizable statement ");
507
              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
508
            }
509
 
510
          vect_free_oprnd_info (&oprnds_info);
511
          return false;
512
        }
513
 
514
      lhs = gimple_get_lhs (stmt);
515
      if (lhs == NULL_TREE)
516
        {
517
          if (vect_print_dump_info (REPORT_SLP))
518
            {
519
              fprintf (vect_dump,
520
                       "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL ");
521
              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
522
            }
523
 
524
          vect_free_oprnd_info (&oprnds_info);
525
          return false;
526
        }
527
 
528
       if (is_gimple_assign (stmt)
529
           && gimple_assign_rhs_code (stmt) == COND_EXPR
530
           && (cond = gimple_assign_rhs1 (stmt))
531
           && !COMPARISON_CLASS_P (cond))
532
        {
533
          if (vect_print_dump_info (REPORT_SLP))
534
            {
535
              fprintf (vect_dump,
536
                       "Build SLP failed: condition is not comparison ");
537
              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
538
            }
539
 
540
          vect_free_oprnd_info (&oprnds_info);
541
          return false;
542
        }
543
 
544
      scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
545
      vectype = get_vectype_for_scalar_type (scalar_type);
546
      if (!vectype)
547
        {
548
          if (vect_print_dump_info (REPORT_SLP))
549
            {
550
              fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
551
              print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
552
            }
553
 
554
          vect_free_oprnd_info (&oprnds_info);
555
          return false;
556
        }
557
 
558
      /* In case of multiple types we need to detect the smallest type.  */
559
      if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
560
        {
561
          *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
562
          if (bb_vinfo)
563
            vectorization_factor = *max_nunits;
564
        }
565
 
566
      ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
567
 
568
      if (is_gimple_call (stmt))
569
        {
570
          rhs_code = CALL_EXPR;
571
          if (gimple_call_internal_p (stmt)
572
              || gimple_call_tail_p (stmt)
573
              || gimple_call_noreturn_p (stmt)
574
              || !gimple_call_nothrow_p (stmt)
575
              || gimple_call_chain (stmt))
576
            {
577
              if (vect_print_dump_info (REPORT_SLP))
578
                {
579
                  fprintf (vect_dump,
580
                           "Build SLP failed: unsupported call type ");
581
                  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
582
                }
583
 
584
              vect_free_oprnd_info (&oprnds_info);
585
              return false;
586
            }
587
        }
588
      else
589
        rhs_code = gimple_assign_rhs_code (stmt);
590
 
591
      /* Check the operation.  */
592
      if (i == 0)
593
        {
594
          first_stmt_code = rhs_code;
595
 
596
          /* Shift arguments should be equal in all the packed stmts for a
597
             vector shift with scalar shift operand.  */
598
          if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
599
              || rhs_code == LROTATE_EXPR
600
              || rhs_code == RROTATE_EXPR)
601
            {
602
              vec_mode = TYPE_MODE (vectype);
603
 
604
              /* First see if we have a vector/vector shift.  */
605
              optab = optab_for_tree_code (rhs_code, vectype,
606
                                           optab_vector);
607
 
608
              if (!optab
609
                  || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
610
                {
611
                  /* No vector/vector shift, try for a vector/scalar shift.  */
612
                  optab = optab_for_tree_code (rhs_code, vectype,
613
                                               optab_scalar);
614
 
615
                  if (!optab)
616
                    {
617
                      if (vect_print_dump_info (REPORT_SLP))
618
                        fprintf (vect_dump, "Build SLP failed: no optab.");
619
                      vect_free_oprnd_info (&oprnds_info);
620
                      return false;
621
                    }
622
                  icode = (int) optab_handler (optab, vec_mode);
623
                  if (icode == CODE_FOR_nothing)
624
                    {
625
                      if (vect_print_dump_info (REPORT_SLP))
626
                        fprintf (vect_dump, "Build SLP failed: "
627
                                            "op not supported by target.");
628
                      vect_free_oprnd_info (&oprnds_info);
629
                      return false;
630
                    }
631
                  optab_op2_mode = insn_data[icode].operand[2].mode;
632
                  if (!VECTOR_MODE_P (optab_op2_mode))
633
                    {
634
                      need_same_oprnds = true;
635
                      first_op1 = gimple_assign_rhs2 (stmt);
636
                    }
637
                }
638
            }
639
          else if (rhs_code == WIDEN_LSHIFT_EXPR)
640
            {
641
              need_same_oprnds = true;
642
              first_op1 = gimple_assign_rhs2 (stmt);
643
            }
644
        }
645
      else
646
        {
647
          if (first_stmt_code != rhs_code
648
              && (first_stmt_code != IMAGPART_EXPR
649
                  || rhs_code != REALPART_EXPR)
650
              && (first_stmt_code != REALPART_EXPR
651
                  || rhs_code != IMAGPART_EXPR)
652
              && !(STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt))
653
                   && (first_stmt_code == ARRAY_REF
654
                       || first_stmt_code == INDIRECT_REF
655
                       || first_stmt_code == COMPONENT_REF
656
                       || first_stmt_code == MEM_REF)))
657
            {
658
              if (vect_print_dump_info (REPORT_SLP))
659
                {
660
                  fprintf (vect_dump,
661
                           "Build SLP failed: different operation in stmt ");
662
                  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
663
                }
664
 
665
              vect_free_oprnd_info (&oprnds_info);
666
              return false;
667
            }
668
 
669
          if (need_same_oprnds
670
              && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
671
            {
672
              if (vect_print_dump_info (REPORT_SLP))
673
                {
674
                  fprintf (vect_dump,
675
                           "Build SLP failed: different shift arguments in ");
676
                  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
677
                }
678
 
679
              vect_free_oprnd_info (&oprnds_info);
680
              return false;
681
            }
682
 
683
          if (rhs_code == CALL_EXPR)
684
            {
685
              gimple first_stmt = VEC_index (gimple, stmts, 0);
686
              if (gimple_call_num_args (stmt) != nops
687
                  || !operand_equal_p (gimple_call_fn (first_stmt),
688
                                       gimple_call_fn (stmt), 0)
689
                  || gimple_call_fntype (first_stmt)
690
                     != gimple_call_fntype (stmt))
691
                {
692
                  if (vect_print_dump_info (REPORT_SLP))
693
                    {
694
                      fprintf (vect_dump,
695
                               "Build SLP failed: different calls in ");
696
                      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
697
                    }
698
 
699
                  vect_free_oprnd_info (&oprnds_info);
700
                  return false;
701
                }
702
            }
703
        }
704
 
705
      /* Strided store or load.  */
706
      if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
707
        {
708
          if (REFERENCE_CLASS_P (lhs))
709
            {
710
              /* Store.  */
711
              if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
712
                                                stmt, ncopies_for_cost,
713
                                                (i == 0), &oprnds_info))
714
                {
715
                  vect_free_oprnd_info (&oprnds_info);
716
                  return false;
717
                }
718
            }
719
          else
720
            {
721
              /* Load.  */
722
              /* FORNOW: Check that there is no gap between the loads.  */
723
              if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
724
                   && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
725
                  || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
726
                      && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
727
                {
728
                  if (vect_print_dump_info (REPORT_SLP))
729
                    {
730
                      fprintf (vect_dump, "Build SLP failed: strided "
731
                                          "loads have gaps ");
732
                      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
733
                    }
734
 
735
                  vect_free_oprnd_info (&oprnds_info);
736
                  return false;
737
                }
738
 
739
              /* Check that the size of interleaved loads group is not
740
                 greater than the SLP group size.  */
741
              if (loop_vinfo
742
                  && GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
743
                {
744
                  if (vect_print_dump_info (REPORT_SLP))
745
                    {
746
                      fprintf (vect_dump, "Build SLP failed: the number of "
747
                                          "interleaved loads is greater than"
748
                                          " the SLP group size ");
749
                      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
750
                    }
751
 
752
                  vect_free_oprnd_info (&oprnds_info);
753
                  return false;
754
                }
755
 
756
              first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
757
              if (prev_first_load)
758
                {
759
                  /* Check that there are no loads from different interleaving
760
                     chains in the same node.  The only exception is complex
761
                     numbers.  */
762
                  if (prev_first_load != first_load
763
                      && rhs_code != REALPART_EXPR
764
                      && rhs_code != IMAGPART_EXPR)
765
                    {
766
                      if (vect_print_dump_info (REPORT_SLP))
767
                        {
768
                          fprintf (vect_dump, "Build SLP failed: different "
769
                                           "interleaving chains in one node ");
770
                          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
771
                        }
772
 
773
                      vect_free_oprnd_info (&oprnds_info);
774
                      return false;
775
                    }
776
                }
777
              else
778
                prev_first_load = first_load;
779
 
780
              if (first_load == stmt)
781
                {
782
                  first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
783
                  if (vect_supportable_dr_alignment (first_dr, false)
784
                      == dr_unaligned_unsupported)
785
                    {
786
                      if (vect_print_dump_info (REPORT_SLP))
787
                        {
788
                          fprintf (vect_dump, "Build SLP failed: unsupported "
789
                                              "unaligned load ");
790
                          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
791
                        }
792
 
793
                      vect_free_oprnd_info (&oprnds_info);
794
                      return false;
795
                    }
796
 
797
                  /* Analyze costs (for the first stmt in the group).  */
798
                  vect_model_load_cost (vinfo_for_stmt (stmt),
799
                                        ncopies_for_cost, false, *node);
800
                }
801
 
802
              /* Store the place of this load in the interleaving chain.  In
803
                 case that permutation is needed we later decide if a specific
804
                 permutation is supported.  */
805
              load_place = vect_get_place_in_interleaving_chain (stmt,
806
                                                                 first_load);
807
              if (load_place != i)
808
                permutation = true;
809
 
810
              VEC_safe_push (int, heap, *load_permutation, load_place);
811
 
812
              /* We stop the tree when we reach a group of loads.  */
813
              stop_recursion = true;
814
             continue;
815
           }
816
        } /* Strided access.  */
817
      else
818
        {
819
          if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
820
            {
821
              /* Not strided load.  */
822
              if (vect_print_dump_info (REPORT_SLP))
823
                {
824
                  fprintf (vect_dump, "Build SLP failed: not strided load ");
825
                  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
826
                }
827
 
828
              /* FORNOW: Not strided loads are not supported.  */
829
              vect_free_oprnd_info (&oprnds_info);
830
              return false;
831
            }
832
 
833
          /* Not memory operation.  */
834
          if (TREE_CODE_CLASS (rhs_code) != tcc_binary
835
              && TREE_CODE_CLASS (rhs_code) != tcc_unary
836
              && rhs_code != COND_EXPR
837
              && rhs_code != CALL_EXPR)
838
            {
839
              if (vect_print_dump_info (REPORT_SLP))
840
                {
841
                  fprintf (vect_dump, "Build SLP failed: operation");
842
                  fprintf (vect_dump, " unsupported ");
843
                  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
844
                }
845
 
846
              vect_free_oprnd_info (&oprnds_info);
847
              return false;
848
            }
849
 
850
          if (rhs_code == COND_EXPR)
851
            {
852
              tree cond_expr = gimple_assign_rhs1 (stmt);
853
 
854
              if (i == 0)
855
                first_cond_code = TREE_CODE (cond_expr);
856
              else if (first_cond_code != TREE_CODE (cond_expr))
857
                {
858
                  if (vect_print_dump_info (REPORT_SLP))
859
                    {
860
                      fprintf (vect_dump, "Build SLP failed: different"
861
                                          " operation");
862
                      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
863
                    }
864
 
865
                  vect_free_oprnd_info (&oprnds_info);
866
                  return false;
867
                }
868
            }
869
 
870
          /* Find the def-stmts.  */
871
          if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
872
                                            ncopies_for_cost, (i == 0),
873
                                            &oprnds_info))
874
            {
875
              vect_free_oprnd_info (&oprnds_info);
876
              return false;
877
            }
878
        }
879
    }
880
 
881
  /* Add the costs of the node to the overall instance costs.  */
882
  *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
883
  *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
884
 
885
  /* Strided loads were reached - stop the recursion.  */
886
  if (stop_recursion)
887
    {
888
      VEC_safe_push (slp_tree, heap, *loads, *node);
889
      if (permutation)
890
        {
891
 
892
          *loads_permuted = true;
893
          *inside_cost
894
            += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
895
               * group_size;
896
        }
897
      else
898
        {
899
          /* We don't check here complex numbers chains, so we set
900
             LOADS_PERMUTED for further check in
901
             vect_supported_load_permutation_p.  */
902
          if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
903
            *loads_permuted = true;
904
        }
905
 
906
      vect_free_oprnd_info (&oprnds_info);
907
      return true;
908
    }
909
 
910
  /* Create SLP_TREE nodes for the definition node/s.  */
911
  FOR_EACH_VEC_ELT (slp_oprnd_info, oprnds_info, i, oprnd_info)
912
    {
913
      slp_tree child;
914
 
915
      if (oprnd_info->first_dt != vect_internal_def)
916
        continue;
917
 
918
      child = vect_create_new_slp_node (oprnd_info->def_stmts);
919
      if (!child
920
          || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size,
921
                                inside_cost, outside_cost, ncopies_for_cost,
922
                                max_nunits, load_permutation, loads,
923
                                vectorization_factor, loads_permuted))
924
        {
925
          if (child)
926
            oprnd_info->def_stmts = NULL;
927
          vect_free_slp_tree (child);
928
          vect_free_oprnd_info (&oprnds_info);
929
          return false;
930
        }
931
 
932
      oprnd_info->def_stmts = NULL;
933
      VEC_quick_push (slp_void_p, SLP_TREE_CHILDREN (*node), child);
934
    }
935
 
936
  vect_free_oprnd_info (&oprnds_info);
937
  return true;
938
}
939
 
940
 
941
static void
942
vect_print_slp_tree (slp_tree node)
943
{
944
  int i;
945
  gimple stmt;
946
  slp_void_p child;
947
 
948
  if (!node)
949
    return;
950
 
951
  fprintf (vect_dump, "node ");
952
  FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
953
    {
954
      fprintf (vect_dump, "\n\tstmt %d ", i);
955
      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
956
    }
957
  fprintf (vect_dump, "\n");
958
 
959
  FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
960
    vect_print_slp_tree ((slp_tree) child);
961
}
962
 
963
 
964
/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
965
   If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
966
   J).  Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
967
   stmts in NODE are to be marked.  */
968
 
969
static void
970
vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
971
{
972
  int i;
973
  gimple stmt;
974
  slp_void_p child;
975
 
976
  if (!node)
977
    return;
978
 
979
  FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
980
    if (j < 0 || i == j)
981
      STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
982
 
983
  FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
984
    vect_mark_slp_stmts ((slp_tree) child, mark, j);
985
}
986
 
987
 
988
/* Mark the statements of the tree rooted at NODE as relevant (vect_used).  */
989
 
990
static void
991
vect_mark_slp_stmts_relevant (slp_tree node)
992
{
993
  int i;
994
  gimple stmt;
995
  stmt_vec_info stmt_info;
996
  slp_void_p child;
997
 
998
  if (!node)
999
    return;
1000
 
1001
  FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1002
    {
1003
      stmt_info = vinfo_for_stmt (stmt);
1004
      gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1005
                  || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1006
      STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1007
    }
1008
 
1009
  FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1010
    vect_mark_slp_stmts_relevant ((slp_tree) child);
1011
}
1012
 
1013
 
1014
/* Check if the permutation required by the SLP INSTANCE is supported.
1015
   Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed.  */
1016
 
1017
static bool
1018
vect_supported_slp_permutation_p (slp_instance instance)
1019
{
1020
  slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
1021
  gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1022
  gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
1023
  VEC (slp_tree, heap) *sorted_loads = NULL;
1024
  int index;
1025
  slp_tree *tmp_loads = NULL;
1026
  int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
1027
  slp_tree load;
1028
 
1029
  /* FORNOW: The only supported loads permutation is loads from the same
1030
     location in all the loads in the node, when the data-refs in
1031
     nodes of LOADS constitute an interleaving chain.
1032
     Sort the nodes according to the order of accesses in the chain.  */
1033
  tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
1034
  for (i = 0, j = 0;
1035
       VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
1036
       && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
1037
       i += group_size, j++)
1038
    {
1039
      gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
1040
      /* Check that the loads are all in the same interleaving chain.  */
1041
      if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
1042
        {
1043
          if (vect_print_dump_info (REPORT_DETAILS))
1044
            {
1045
              fprintf (vect_dump, "Build SLP failed: unsupported data "
1046
                                   "permutation ");
1047
              print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
1048
            }
1049
 
1050
          free (tmp_loads);
1051
          return false;
1052
        }
1053
 
1054
      tmp_loads[index] = load;
1055
    }
1056
 
1057
  sorted_loads = VEC_alloc (slp_tree, heap, group_size);
1058
  for (i = 0; i < group_size; i++)
1059
     VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
1060
 
1061
  VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
1062
  SLP_INSTANCE_LOADS (instance) = sorted_loads;
1063
  free (tmp_loads);
1064
 
1065
  if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
1066
                                     SLP_INSTANCE_UNROLLING_FACTOR (instance),
1067
                                     instance, true))
1068
    return false;
1069
 
1070
  return true;
1071
}
1072
 
1073
 
1074
/* Rearrange the statements of NODE according to PERMUTATION.  */
1075
 
1076
static void
1077
vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1078
                          VEC (int, heap) *permutation)
1079
{
1080
  gimple stmt;
1081
  VEC (gimple, heap) *tmp_stmts;
1082
  unsigned int index, i;
1083
  slp_void_p child;
1084
 
1085
  if (!node)
1086
    return;
1087
 
1088
  FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1089
    vect_slp_rearrange_stmts ((slp_tree) child, group_size, permutation);
1090
 
1091
  gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
1092
  tmp_stmts = VEC_alloc (gimple, heap, group_size);
1093
 
1094
  for (i = 0; i < group_size; i++)
1095
    VEC_safe_push (gimple, heap, tmp_stmts, NULL);
1096
 
1097
  FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1098
    {
1099
      index = VEC_index (int, permutation, i);
1100
      VEC_replace (gimple, tmp_stmts, index, stmt);
1101
    }
1102
 
1103
  VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
1104
  SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1105
}
1106
 
1107
 
1108
/* Check if the required load permutation is supported.
1109
   LOAD_PERMUTATION contains a list of indices of the loads.
1110
   In SLP this permutation is relative to the order of strided stores that are
1111
   the base of the SLP instance.  */
1112
 
1113
static bool
1114
vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
1115
                                   VEC (int, heap) *load_permutation)
1116
{
1117
  int i = 0, j, prev = -1, next, k, number_of_groups;
1118
  bool supported, bad_permutation = false;
1119
  sbitmap load_index;
1120
  slp_tree node, other_complex_node;
1121
  gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
1122
  unsigned complex_numbers = 0;
1123
  struct data_reference *dr;
1124
  bb_vec_info bb_vinfo;
1125
 
1126
  /* FORNOW: permutations are only supported in SLP.  */
1127
  if (!slp_instn)
1128
    return false;
1129
 
1130
  if (vect_print_dump_info (REPORT_SLP))
1131
    {
1132
      fprintf (vect_dump, "Load permutation ");
1133
      FOR_EACH_VEC_ELT (int, load_permutation, i, next)
1134
        fprintf (vect_dump, "%d ", next);
1135
    }
1136
 
1137
  /* In case of reduction every load permutation is allowed, since the order
1138
     of the reduction statements is not important (as opposed to the case of
1139
     strided stores).  The only condition we need to check is that all the
1140
     load nodes are of the same size and have the same permutation (and then
1141
     rearrange all the nodes of the SLP instance according to this
1142
     permutation).  */
1143
 
1144
  /* Check that all the load nodes are of the same size.  */
1145
  FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1146
    {
1147
      if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
1148
          != (unsigned) group_size)
1149
        return false;
1150
 
1151
      stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1152
      if (is_gimple_assign (stmt)
1153
          && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
1154
              || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
1155
        complex_numbers++;
1156
    }
1157
 
1158
  /* Complex operands can be swapped as following:
1159
      real_c = real_b + real_a;
1160
      imag_c = imag_a + imag_b;
1161
     i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
1162
     {real_a, imag_a} and {real_b, imag_b}.  We check here that if interleaving
1163
     chains are mixed, they match the above pattern.  */
1164
  if (complex_numbers)
1165
    {
1166
      FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1167
        {
1168
          FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, stmt)
1169
            {
1170
              if (j == 0)
1171
                first = stmt;
1172
              else
1173
                {
1174
                  if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
1175
                    {
1176
                      if (complex_numbers != 2)
1177
                        return false;
1178
 
1179
                      if (i == 0)
1180
                        k = 1;
1181
                      else
1182
                        k = 0;
1183
 
1184
                      other_complex_node = VEC_index (slp_tree,
1185
                                            SLP_INSTANCE_LOADS (slp_instn), k);
1186
                      other_node_first = VEC_index (gimple,
1187
                                SLP_TREE_SCALAR_STMTS (other_complex_node), 0);
1188
 
1189
                      if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1190
                          != other_node_first)
1191
                       return false;
1192
                    }
1193
                }
1194
            }
1195
        }
1196
    }
1197
 
1198
  /* We checked that this case ok, so there is no need to proceed with
1199
     permutation tests.  */
1200
  if (complex_numbers == 2)
1201
    {
1202
      VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn));
1203
      VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1204
      return true;
1205
    }
1206
 
1207
  node = SLP_INSTANCE_TREE (slp_instn);
1208
  stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1209
  /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
1210
     instance, not all the loads belong to the same node or interleaving
1211
     group.  Hence, we need to divide them into groups according to
1212
     GROUP_SIZE.  */
1213
  number_of_groups = VEC_length (int, load_permutation) / group_size;
1214
 
1215
  /* Reduction (there are no data-refs in the root).
1216
     In reduction chain the order of the loads is important.  */
1217
  if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1218
      && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1219
    {
1220
      int first_group_load_index;
1221
 
1222
      /* Compare all the permutation sequences to the first one.  */
1223
      for (i = 1; i < number_of_groups; i++)
1224
        {
1225
          k = 0;
1226
          for (j = i * group_size; j < i * group_size + group_size; j++)
1227
            {
1228
              next = VEC_index (int, load_permutation, j);
1229
              first_group_load_index = VEC_index (int, load_permutation, k);
1230
 
1231
              if (next != first_group_load_index)
1232
                {
1233
                  bad_permutation = true;
1234
                  break;
1235
                }
1236
 
1237
              k++;
1238
            }
1239
 
1240
          if (bad_permutation)
1241
            break;
1242
        }
1243
 
1244
      if (!bad_permutation)
1245
        {
1246
          /* Check that the loads in the first sequence are different and there
1247
             are no gaps between them.  */
1248
          load_index = sbitmap_alloc (group_size);
1249
          sbitmap_zero (load_index);
1250
          for (k = 0; k < group_size; k++)
1251
            {
1252
              first_group_load_index = VEC_index (int, load_permutation, k);
1253
              if (TEST_BIT (load_index, first_group_load_index))
1254
                {
1255
                  bad_permutation = true;
1256
                  break;
1257
                }
1258
 
1259
              SET_BIT (load_index, first_group_load_index);
1260
            }
1261
 
1262
          if (!bad_permutation)
1263
            for (k = 0; k < group_size; k++)
1264
              if (!TEST_BIT (load_index, k))
1265
                {
1266
                  bad_permutation = true;
1267
                  break;
1268
                }
1269
 
1270
          sbitmap_free (load_index);
1271
        }
1272
 
1273
      if (!bad_permutation)
1274
        {
1275
          /* This permutation is valid for reduction.  Since the order of the
1276
             statements in the nodes is not important unless they are memory
1277
             accesses, we can rearrange the statements in all the nodes
1278
             according to the order of the loads.  */
1279
          vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1280
                                    load_permutation);
1281
          VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1282
          return true;
1283
        }
1284
    }
1285
 
1286
  /* In basic block vectorization we allow any subchain of an interleaving
1287
     chain.
1288
     FORNOW: not supported in loop SLP because of realignment compications.  */
1289
  bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
1290
  bad_permutation = false;
1291
  /* Check that for every node in the instance teh loads form a subchain.  */
1292
  if (bb_vinfo)
1293
    {
1294
      FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1295
        {
1296
          next_load = NULL;
1297
          first_load = NULL;
1298
          FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, load)
1299
            {
1300
              if (!first_load)
1301
                first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (load));
1302
              else if (first_load
1303
                         != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)))
1304
                {
1305
                  bad_permutation = true;
1306
                  break;
1307
                }
1308
 
1309
              if (j != 0 && next_load != load)
1310
                {
1311
                  bad_permutation = true;
1312
                  break;
1313
                }
1314
 
1315
              next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1316
            }
1317
 
1318
          if (bad_permutation)
1319
            break;
1320
        }
1321
 
1322
      /* Check that the alignment of the first load in every subchain, i.e.,
1323
         the first statement in every load node, is supported.  */
1324
      if (!bad_permutation)
1325
        {
1326
          FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1327
            {
1328
              first_load = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1329
              if (first_load
1330
                    != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1331
                {
1332
                  dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1333
                  if (vect_supportable_dr_alignment (dr, false)
1334
                       == dr_unaligned_unsupported)
1335
                    {
1336
                      if (vect_print_dump_info (REPORT_SLP))
1337
                        {
1338
                          fprintf (vect_dump, "unsupported unaligned load ");
1339
                          print_gimple_stmt (vect_dump, first_load, 0,
1340
                                             TDF_SLIM);
1341
                        }
1342
                      bad_permutation = true;
1343
                      break;
1344
                    }
1345
                }
1346
            }
1347
 
1348
          if (!bad_permutation)
1349
            {
1350
              VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1351
              return true;
1352
            }
1353
        }
1354
    }
1355
 
1356
  /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1357
     GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1358
     well (unless it's reduction).  */
1359
  if (VEC_length (int, load_permutation)
1360
      != (unsigned int) (group_size * group_size))
1361
    return false;
1362
 
1363
  supported = true;
1364
  load_index = sbitmap_alloc (group_size);
1365
  sbitmap_zero (load_index);
1366
  for (j = 0; j < group_size; j++)
1367
    {
1368
      for (i = j * group_size, k = 0;
1369
           VEC_iterate (int, load_permutation, i, next) && k < group_size;
1370
           i++, k++)
1371
       {
1372
         if (i != j * group_size && next != prev)
1373
          {
1374
            supported = false;
1375
            break;
1376
          }
1377
 
1378
         prev = next;
1379
       }
1380
 
1381
      if (TEST_BIT (load_index, prev))
1382
        {
1383
          supported = false;
1384
          break;
1385
        }
1386
 
1387
      SET_BIT (load_index, prev);
1388
    }
1389
 
1390
  for (j = 0; j < group_size; j++)
1391
    if (!TEST_BIT (load_index, j))
1392
      return false;
1393
 
1394
  sbitmap_free (load_index);
1395
 
1396
  if (supported && i == group_size * group_size
1397
      && vect_supported_slp_permutation_p (slp_instn))
1398
    return true;
1399
 
1400
  return false;
1401
}
1402
 
1403
 
1404
/* Find the first load in the loop that belongs to INSTANCE.
1405
   When loads are in several SLP nodes, there can be a case in which the first
1406
   load does not appear in the first SLP node to be transformed, causing
1407
   incorrect order of statements.  Since we generate all the loads together,
1408
   they must be inserted before the first load of the SLP instance and not
1409
   before the first load of the first node of the instance.  */
1410
 
1411
static gimple
1412
vect_find_first_load_in_slp_instance (slp_instance instance)
1413
{
1414
  int i, j;
1415
  slp_tree load_node;
1416
  gimple first_load = NULL, load;
1417
 
1418
  FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node)
1419
    FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load)
1420
      first_load = get_earlier_stmt (load, first_load);
1421
 
1422
  return first_load;
1423
}
1424
 
1425
 
1426
/* Find the last store in SLP INSTANCE.  */
1427
 
1428
static gimple
1429
vect_find_last_store_in_slp_instance (slp_instance instance)
1430
{
1431
  int i;
1432
  slp_tree node;
1433
  gimple last_store = NULL, store;
1434
 
1435
  node = SLP_INSTANCE_TREE (instance);
1436
  for (i = 0;
1437
       VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, store);
1438
       i++)
1439
    last_store = get_later_stmt (store, last_store);
1440
 
1441
  return last_store;
1442
}
1443
 
1444
 
1445
/* Analyze an SLP instance starting from a group of strided stores.  Call
1446
   vect_build_slp_tree to build a tree of packed stmts if possible.
1447
   Return FALSE if it's impossible to SLP any stmt in the loop.  */
1448
 
1449
static bool
1450
vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1451
                           gimple stmt)
1452
{
1453
  slp_instance new_instance;
1454
  slp_tree node;
1455
  unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1456
  unsigned int unrolling_factor = 1, nunits;
1457
  tree vectype, scalar_type = NULL_TREE;
1458
  gimple next;
1459
  unsigned int vectorization_factor = 0;
1460
  int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
1461
  unsigned int max_nunits = 0;
1462
  VEC (int, heap) *load_permutation;
1463
  VEC (slp_tree, heap) *loads;
1464
  struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1465
  bool loads_permuted = false;
1466
  VEC (gimple, heap) *scalar_stmts;
1467
 
1468
  if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1469
    {
1470
      if (dr)
1471
        {
1472
          scalar_type = TREE_TYPE (DR_REF (dr));
1473
          vectype = get_vectype_for_scalar_type (scalar_type);
1474
        }
1475
      else
1476
        {
1477
          gcc_assert (loop_vinfo);
1478
          vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1479
        }
1480
 
1481
      group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1482
    }
1483
  else
1484
    {
1485
      gcc_assert (loop_vinfo);
1486
      vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1487
      group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1488
    }
1489
 
1490
  if (!vectype)
1491
    {
1492
      if (vect_print_dump_info (REPORT_SLP))
1493
        {
1494
          fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1495
          print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1496
        }
1497
 
1498
      return false;
1499
    }
1500
 
1501
  nunits = TYPE_VECTOR_SUBPARTS (vectype);
1502
  if (loop_vinfo)
1503
    vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1504
  else
1505
    vectorization_factor = nunits;
1506
 
1507
  /* Calculate the unrolling factor.  */
1508
  unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1509
  if (unrolling_factor != 1 && !loop_vinfo)
1510
    {
1511
      if (vect_print_dump_info (REPORT_SLP))
1512
        fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1513
                            " block SLP");
1514
 
1515
      return false;
1516
    }
1517
 
1518
  /* Create a node (a root of the SLP tree) for the packed strided stores.  */
1519
  scalar_stmts = VEC_alloc (gimple, heap, group_size);
1520
  next = stmt;
1521
  if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1522
    {
1523
      /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS.  */
1524
      while (next)
1525
        {
1526
          if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1527
              && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1528
            VEC_safe_push (gimple, heap, scalar_stmts,
1529
                        STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1530
          else
1531
            VEC_safe_push (gimple, heap, scalar_stmts, next);
1532
          next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1533
        }
1534
    }
1535
  else
1536
    {
1537
      /* Collect reduction statements.  */
1538
      VEC (gimple, heap) *reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1539
      for (i = 0; VEC_iterate (gimple, reductions, i, next); i++)
1540
        VEC_safe_push (gimple, heap, scalar_stmts, next);
1541
    }
1542
 
1543
  node = vect_create_new_slp_node (scalar_stmts);
1544
 
1545
  /* Calculate the number of vector stmts to create based on the unrolling
1546
     factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1547
     GROUP_SIZE / NUNITS otherwise.  */
1548
  ncopies_for_cost = unrolling_factor * group_size / nunits;
1549
 
1550
  load_permutation = VEC_alloc (int, heap, group_size * group_size);
1551
  loads = VEC_alloc (slp_tree, heap, group_size);
1552
 
1553
  /* Build the tree for the SLP instance.  */
1554
  if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1555
                           &inside_cost, &outside_cost, ncopies_for_cost,
1556
                           &max_nunits, &load_permutation, &loads,
1557
                           vectorization_factor, &loads_permuted))
1558
    {
1559
      /* Calculate the unrolling factor based on the smallest type.  */
1560
      if (max_nunits > nunits)
1561
        unrolling_factor = least_common_multiple (max_nunits, group_size)
1562
                           / group_size;
1563
 
1564
      if (unrolling_factor != 1 && !loop_vinfo)
1565
        {
1566
          if (vect_print_dump_info (REPORT_SLP))
1567
            fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1568
                               " block SLP");
1569
          return false;
1570
        }
1571
 
1572
      /* Create a new SLP instance.  */
1573
      new_instance = XNEW (struct _slp_instance);
1574
      SLP_INSTANCE_TREE (new_instance) = node;
1575
      SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1576
      SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1577
      SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1578
      SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1579
      SLP_INSTANCE_LOADS (new_instance) = loads;
1580
      SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1581
      SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1582
 
1583
      if (loads_permuted)
1584
        {
1585
          if (!vect_supported_load_permutation_p (new_instance, group_size,
1586
                                                  load_permutation))
1587
            {
1588
              if (vect_print_dump_info (REPORT_SLP))
1589
                {
1590
                  fprintf (vect_dump, "Build SLP failed: unsupported load "
1591
                                      "permutation ");
1592
                  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1593
                }
1594
 
1595
              vect_free_slp_instance (new_instance);
1596
              return false;
1597
            }
1598
 
1599
          SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1600
             = vect_find_first_load_in_slp_instance (new_instance);
1601
        }
1602
      else
1603
        VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1604
 
1605
      if (loop_vinfo)
1606
        VEC_safe_push (slp_instance, heap,
1607
                       LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1608
                       new_instance);
1609
      else
1610
        VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1611
                       new_instance);
1612
 
1613
      if (vect_print_dump_info (REPORT_SLP))
1614
        vect_print_slp_tree (node);
1615
 
1616
      return true;
1617
    }
1618
 
1619
  /* Failed to SLP.  */
1620
  /* Free the allocated memory.  */
1621
  vect_free_slp_tree (node);
1622
  VEC_free (int, heap, load_permutation);
1623
  VEC_free (slp_tree, heap, loads);
1624
 
1625
  return false;
1626
}
1627
 
1628
 
1629
/* Check if there are stmts in the loop can be vectorized using SLP.  Build SLP
1630
   trees of packed scalar stmts if SLP is possible.  */
1631
 
1632
bool
1633
vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1634
{
1635
  unsigned int i;
1636
  VEC (gimple, heap) *strided_stores, *reductions = NULL, *reduc_chains = NULL;
1637
  gimple first_element;
1638
  bool ok = false;
1639
 
1640
  if (vect_print_dump_info (REPORT_SLP))
1641
    fprintf (vect_dump, "=== vect_analyze_slp ===");
1642
 
1643
  if (loop_vinfo)
1644
    {
1645
      strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1646
      reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1647
      reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1648
    }
1649
  else
1650
    strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1651
 
1652
  /* Find SLP sequences starting from groups of strided stores.  */
1653
  FOR_EACH_VEC_ELT (gimple, strided_stores, i, first_element)
1654
    if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1655
      ok = true;
1656
 
1657
  if (bb_vinfo && !ok)
1658
    {
1659
      if (vect_print_dump_info (REPORT_SLP))
1660
        fprintf (vect_dump, "Failed to SLP the basic block.");
1661
 
1662
      return false;
1663
    }
1664
 
1665
  if (loop_vinfo
1666
      && VEC_length (gimple, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)) > 0)
1667
    {
1668
      /* Find SLP sequences starting from reduction chains.  */
1669
      FOR_EACH_VEC_ELT (gimple, reduc_chains, i, first_element)
1670
        if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1671
          ok = true;
1672
        else
1673
          return false;
1674
 
1675
      /* Don't try to vectorize SLP reductions if reduction chain was
1676
         detected.  */
1677
      return ok;
1678
    }
1679
 
1680
  /* Find SLP sequences starting from groups of reductions.  */
1681
  if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
1682
      && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1683
                                    VEC_index (gimple, reductions, 0)))
1684
    ok = true;
1685
 
1686
  return true;
1687
}
1688
 
1689
 
1690
/* For each possible SLP instance decide whether to SLP it and calculate overall
1691
   unrolling factor needed to SLP the loop.  Return TRUE if decided to SLP at
1692
   least one instance.  */
1693
 
1694
bool
1695
vect_make_slp_decision (loop_vec_info loop_vinfo)
1696
{
1697
  unsigned int i, unrolling_factor = 1;
1698
  VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1699
  slp_instance instance;
1700
  int decided_to_slp = 0;
1701
 
1702
  if (vect_print_dump_info (REPORT_SLP))
1703
    fprintf (vect_dump, "=== vect_make_slp_decision ===");
1704
 
1705
  FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1706
    {
1707
      /* FORNOW: SLP if you can.  */
1708
      if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1709
        unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1710
 
1711
      /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts.  Later we
1712
         call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1713
         loop-based vectorization.  Such stmts will be marked as HYBRID.  */
1714
      vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1715
      decided_to_slp++;
1716
    }
1717
 
1718
  LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1719
 
1720
  if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1721
    fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1722
             decided_to_slp, unrolling_factor);
1723
 
1724
  return (decided_to_slp > 0);
1725
}
1726
 
1727
 
1728
/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1729
   can't be SLPed) in the tree rooted at NODE.  Mark such stmts as HYBRID.  */
1730
 
1731
static void
1732
vect_detect_hybrid_slp_stmts (slp_tree node)
1733
{
1734
  int i;
1735
  VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (node);
1736
  gimple stmt = VEC_index (gimple, stmts, 0);
1737
  imm_use_iterator imm_iter;
1738
  gimple use_stmt;
1739
  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1740
  slp_void_p child;
1741
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1742
  struct loop *loop = NULL;
1743
  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1744
  basic_block bb = NULL;
1745
 
1746
  if (!node)
1747
    return;
1748
 
1749
  if (loop_vinfo)
1750
    loop = LOOP_VINFO_LOOP (loop_vinfo);
1751
  else
1752
    bb = BB_VINFO_BB (bb_vinfo);
1753
 
1754
  FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1755
    if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1756
        && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1757
      FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1758
        if (gimple_bb (use_stmt)
1759
            && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1760
                 || bb == gimple_bb (use_stmt))
1761
            && (stmt_vinfo = vinfo_for_stmt (use_stmt))
1762
            && !STMT_SLP_TYPE (stmt_vinfo)
1763
            && (STMT_VINFO_RELEVANT (stmt_vinfo)
1764
                || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1765
            && !(gimple_code (use_stmt) == GIMPLE_PHI
1766
                 && STMT_VINFO_DEF_TYPE (stmt_vinfo)
1767
                  == vect_reduction_def))
1768
          vect_mark_slp_stmts (node, hybrid, i);
1769
 
1770
  FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1771
    vect_detect_hybrid_slp_stmts ((slp_tree) child);
1772
}
1773
 
1774
 
1775
/* Find stmts that must be both vectorized and SLPed.  */
1776
 
1777
void
1778
vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1779
{
1780
  unsigned int i;
1781
  VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1782
  slp_instance instance;
1783
 
1784
  if (vect_print_dump_info (REPORT_SLP))
1785
    fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1786
 
1787
  FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1788
    vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1789
}
1790
 
1791
 
1792
/* Create and initialize a new bb_vec_info struct for BB, as well as
1793
   stmt_vec_info structs for all the stmts in it.  */
1794
 
1795
static bb_vec_info
1796
new_bb_vec_info (basic_block bb)
1797
{
1798
  bb_vec_info res = NULL;
1799
  gimple_stmt_iterator gsi;
1800
 
1801
  res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1802
  BB_VINFO_BB (res) = bb;
1803
 
1804
  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1805
    {
1806
      gimple stmt = gsi_stmt (gsi);
1807
      gimple_set_uid (stmt, 0);
1808
      set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1809
    }
1810
 
1811
  BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1812
  BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1813
 
1814
  bb->aux = res;
1815
  return res;
1816
}
1817
 
1818
 
1819
/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1820
   stmts in the basic block.  */
1821
 
1822
static void
1823
destroy_bb_vec_info (bb_vec_info bb_vinfo)
1824
{
1825
  basic_block bb;
1826
  gimple_stmt_iterator si;
1827
 
1828
  if (!bb_vinfo)
1829
    return;
1830
 
1831
  bb = BB_VINFO_BB (bb_vinfo);
1832
 
1833
  for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1834
    {
1835
      gimple stmt = gsi_stmt (si);
1836
      stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1837
 
1838
      if (stmt_info)
1839
        /* Free stmt_vec_info.  */
1840
        free_stmt_vec_info (stmt);
1841
    }
1842
 
1843
  free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1844
  free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1845
  VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1846
  VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1847
  free (bb_vinfo);
1848
  bb->aux = NULL;
1849
}
1850
 
1851
 
1852
/* Analyze statements contained in SLP tree node after recursively analyzing
1853
   the subtree. Return TRUE if the operations are supported.  */
1854
 
1855
static bool
1856
vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1857
{
1858
  bool dummy;
1859
  int i;
1860
  gimple stmt;
1861
  slp_void_p child;
1862
 
1863
  if (!node)
1864
    return true;
1865
 
1866
  FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1867
    if (!vect_slp_analyze_node_operations (bb_vinfo, (slp_tree) child))
1868
      return false;
1869
 
1870
  FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1871
    {
1872
      stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1873
      gcc_assert (stmt_info);
1874
      gcc_assert (PURE_SLP_STMT (stmt_info));
1875
 
1876
      if (!vect_analyze_stmt (stmt, &dummy, node))
1877
        return false;
1878
    }
1879
 
1880
  return true;
1881
}
1882
 
1883
 
1884
/* Analyze statements in SLP instances of the basic block.  Return TRUE if the
1885
   operations are supported. */
1886
 
1887
static bool
1888
vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1889
{
1890
  VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1891
  slp_instance instance;
1892
  int i;
1893
 
1894
  for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1895
    {
1896
      if (!vect_slp_analyze_node_operations (bb_vinfo,
1897
                                             SLP_INSTANCE_TREE (instance)))
1898
        {
1899
          vect_free_slp_instance (instance);
1900
          VEC_ordered_remove (slp_instance, slp_instances, i);
1901
        }
1902
      else
1903
        i++;
1904
    }
1905
 
1906
  if (!VEC_length (slp_instance, slp_instances))
1907
    return false;
1908
 
1909
  return true;
1910
}
1911
 
1912
/* Check if vectorization of the basic block is profitable.  */
1913
 
1914
static bool
1915
vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
1916
{
1917
  VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1918
  slp_instance instance;
1919
  int i;
1920
  unsigned int vec_outside_cost = 0, vec_inside_cost = 0, scalar_cost = 0;
1921
  unsigned int stmt_cost;
1922
  gimple stmt;
1923
  gimple_stmt_iterator si;
1924
  basic_block bb = BB_VINFO_BB (bb_vinfo);
1925
  stmt_vec_info stmt_info = NULL;
1926
  tree dummy_type = NULL;
1927
  int dummy = 0;
1928
 
1929
  /* Calculate vector costs.  */
1930
  FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1931
    {
1932
      vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
1933
      vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
1934
    }
1935
 
1936
  /* Calculate scalar cost.  */
1937
  for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1938
    {
1939
      stmt = gsi_stmt (si);
1940
      stmt_info = vinfo_for_stmt (stmt);
1941
 
1942
      if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
1943
          || !PURE_SLP_STMT (stmt_info))
1944
        continue;
1945
 
1946
      if (STMT_VINFO_DATA_REF (stmt_info))
1947
        {
1948
          if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1949
            stmt_cost = targetm.vectorize.builtin_vectorization_cost
1950
                          (scalar_load, dummy_type, dummy);
1951
          else
1952
            stmt_cost = targetm.vectorize.builtin_vectorization_cost
1953
                          (scalar_store, dummy_type, dummy);
1954
        }
1955
      else
1956
        stmt_cost = targetm.vectorize.builtin_vectorization_cost
1957
                      (scalar_stmt, dummy_type, dummy);
1958
 
1959
      scalar_cost += stmt_cost;
1960
    }
1961
 
1962
  if (vect_print_dump_info (REPORT_COST))
1963
    {
1964
      fprintf (vect_dump, "Cost model analysis: \n");
1965
      fprintf (vect_dump, "  Vector inside of basic block cost: %d\n",
1966
               vec_inside_cost);
1967
      fprintf (vect_dump, "  Vector outside of basic block cost: %d\n",
1968
               vec_outside_cost);
1969
      fprintf (vect_dump, "  Scalar cost of basic block: %d", scalar_cost);
1970
    }
1971
 
1972
  /* Vectorization is profitable if its cost is less than the cost of scalar
1973
     version.  */
1974
  if (vec_outside_cost + vec_inside_cost >= scalar_cost)
1975
    return false;
1976
 
1977
  return true;
1978
}
1979
 
1980
/* Check if the basic block can be vectorized.  */
1981
 
1982
static bb_vec_info
1983
vect_slp_analyze_bb_1 (basic_block bb)
1984
{
1985
  bb_vec_info bb_vinfo;
1986
  VEC (ddr_p, heap) *ddrs;
1987
  VEC (slp_instance, heap) *slp_instances;
1988
  slp_instance instance;
1989
  int i;
1990
  int min_vf = 2;
1991
  int max_vf = MAX_VECTORIZATION_FACTOR;
1992
 
1993
  bb_vinfo = new_bb_vec_info (bb);
1994
  if (!bb_vinfo)
1995
    return NULL;
1996
 
1997
  if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
1998
    {
1999
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2000
        fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
2001
                            "block.\n");
2002
 
2003
      destroy_bb_vec_info (bb_vinfo);
2004
      return NULL;
2005
    }
2006
 
2007
  ddrs = BB_VINFO_DDRS (bb_vinfo);
2008
  if (!VEC_length (ddr_p, ddrs))
2009
    {
2010
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2011
        fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
2012
                            "block.\n");
2013
 
2014
      destroy_bb_vec_info (bb_vinfo);
2015
      return NULL;
2016
    }
2017
 
2018
   if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
2019
       || min_vf > max_vf)
2020
     {
2021
       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2022
         fprintf (vect_dump, "not vectorized: unhandled data dependence "
2023
                  "in basic block.\n");
2024
 
2025
       destroy_bb_vec_info (bb_vinfo);
2026
       return NULL;
2027
     }
2028
 
2029
  if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2030
    {
2031
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2032
        fprintf (vect_dump, "not vectorized: bad data alignment in basic "
2033
                            "block.\n");
2034
 
2035
      destroy_bb_vec_info (bb_vinfo);
2036
      return NULL;
2037
    }
2038
 
2039
  if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2040
    {
2041
     if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2042
       fprintf (vect_dump, "not vectorized: unhandled data access in basic "
2043
                           "block.\n");
2044
 
2045
      destroy_bb_vec_info (bb_vinfo);
2046
      return NULL;
2047
    }
2048
 
2049
   if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2050
    {
2051
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2052
        fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
2053
                            "block.\n");
2054
 
2055
      destroy_bb_vec_info (bb_vinfo);
2056
      return NULL;
2057
    }
2058
 
2059
  /* Check the SLP opportunities in the basic block, analyze and build SLP
2060
     trees.  */
2061
  if (!vect_analyze_slp (NULL, bb_vinfo))
2062
    {
2063
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2064
        fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
2065
                            "in basic block.\n");
2066
 
2067
      destroy_bb_vec_info (bb_vinfo);
2068
      return NULL;
2069
    }
2070
 
2071
  slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2072
 
2073
  /* Mark all the statements that we want to vectorize as pure SLP and
2074
     relevant.  */
2075
  FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2076
    {
2077
      vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2078
      vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2079
    }
2080
 
2081
  if (!vect_slp_analyze_operations (bb_vinfo))
2082
    {
2083
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2084
        fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
2085
 
2086
      destroy_bb_vec_info (bb_vinfo);
2087
      return NULL;
2088
    }
2089
 
2090
  /* Cost model: check if the vectorization is worthwhile.  */
2091
  if (flag_vect_cost_model
2092
      && !vect_bb_vectorization_profitable_p (bb_vinfo))
2093
    {
2094
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2095
        fprintf (vect_dump, "not vectorized: vectorization is not "
2096
                            "profitable.\n");
2097
 
2098
      destroy_bb_vec_info (bb_vinfo);
2099
      return NULL;
2100
    }
2101
 
2102
  if (vect_print_dump_info (REPORT_DETAILS))
2103
    fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
2104
 
2105
  return bb_vinfo;
2106
}
2107
 
2108
 
2109
bb_vec_info
2110
vect_slp_analyze_bb (basic_block bb)
2111
{
2112
  bb_vec_info bb_vinfo;
2113
  int insns = 0;
2114
  gimple_stmt_iterator gsi;
2115
  unsigned int vector_sizes;
2116
 
2117
  if (vect_print_dump_info (REPORT_DETAILS))
2118
    fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
2119
 
2120
  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2121
    {
2122
      gimple stmt = gsi_stmt (gsi);
2123
      if (!is_gimple_debug (stmt)
2124
          && !gimple_nop_p (stmt)
2125
          && gimple_code (stmt) != GIMPLE_LABEL)
2126
        insns++;
2127
    }
2128
 
2129
  if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2130
    {
2131
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2132
        fprintf (vect_dump, "not vectorized: too many instructions in basic "
2133
                            "block.\n");
2134
 
2135
      return NULL;
2136
    }
2137
 
2138
  /* Autodetect first vector size we try.  */
2139
  current_vector_size = 0;
2140
  vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2141
 
2142
  while (1)
2143
    {
2144
      bb_vinfo = vect_slp_analyze_bb_1 (bb);
2145
      if (bb_vinfo)
2146
        return bb_vinfo;
2147
 
2148
      destroy_bb_vec_info (bb_vinfo);
2149
 
2150
      vector_sizes &= ~current_vector_size;
2151
      if (vector_sizes == 0
2152
          || current_vector_size == 0)
2153
        return NULL;
2154
 
2155
      /* Try the next biggest vector size.  */
2156
      current_vector_size = 1 << floor_log2 (vector_sizes);
2157
      if (vect_print_dump_info (REPORT_DETAILS))
2158
        fprintf (vect_dump, "***** Re-trying analysis with "
2159
                 "vector size %d\n", current_vector_size);
2160
    }
2161
}
2162
 
2163
 
2164
/* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2165
   the number of created vector stmts depends on the unrolling factor).
2166
   However, the actual number of vector stmts for every SLP node depends on
2167
   VF which is set later in vect_analyze_operations ().  Hence, SLP costs
2168
   should be updated.  In this function we assume that the inside costs
2169
   calculated in vect_model_xxx_cost are linear in ncopies.  */
2170
 
2171
void
2172
vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2173
{
2174
  unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2175
  VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2176
  slp_instance instance;
2177
 
2178
  if (vect_print_dump_info (REPORT_SLP))
2179
    fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
2180
 
2181
  FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2182
    /* We assume that costs are linear in ncopies.  */
2183
    SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
2184
      / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2185
}
2186
 
2187
 
2188
/* For constant and loop invariant defs of SLP_NODE this function returns
2189
   (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2190
   OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2191
   scalar stmts.  NUMBER_OF_VECTORS is the number of vector defs to create.
2192
   REDUC_INDEX is the index of the reduction operand in the statements, unless
2193
   it is -1.  */
2194
 
2195
static void
2196
vect_get_constant_vectors (tree op, slp_tree slp_node,
2197
                           VEC (tree, heap) **vec_oprnds,
2198
                           unsigned int op_num, unsigned int number_of_vectors,
2199
                           int reduc_index)
2200
{
2201
  VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2202
  gimple stmt = VEC_index (gimple, stmts, 0);
2203
  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2204
  int nunits;
2205
  tree vec_cst;
2206
  tree t = NULL_TREE;
2207
  int j, number_of_places_left_in_vector;
2208
  tree vector_type;
2209
  tree vop;
2210
  int group_size = VEC_length (gimple, stmts);
2211
  unsigned int vec_num, i;
2212
  int number_of_copies = 1;
2213
  VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
2214
  bool constant_p, is_store;
2215
  tree neutral_op = NULL;
2216
  enum tree_code code = gimple_expr_code (stmt);
2217
  gimple def_stmt;
2218
  struct loop *loop;
2219
 
2220
  if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2221
      && reduc_index != -1)
2222
    {
2223
      op_num = reduc_index - 1;
2224
      op = gimple_op (stmt, reduc_index);
2225
      /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2226
         we need either neutral operands or the original operands.  See
2227
         get_initial_def_for_reduction() for details.  */
2228
      switch (code)
2229
        {
2230
          case WIDEN_SUM_EXPR:
2231
          case DOT_PROD_EXPR:
2232
          case PLUS_EXPR:
2233
          case MINUS_EXPR:
2234
          case BIT_IOR_EXPR:
2235
          case BIT_XOR_EXPR:
2236
             if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2237
               neutral_op = build_real (TREE_TYPE (op), dconst0);
2238
             else
2239
               neutral_op = build_int_cst (TREE_TYPE (op), 0);
2240
 
2241
             break;
2242
 
2243
          case MULT_EXPR:
2244
             if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2245
               neutral_op = build_real (TREE_TYPE (op), dconst1);
2246
             else
2247
               neutral_op = build_int_cst (TREE_TYPE (op), 1);
2248
 
2249
             break;
2250
 
2251
          case BIT_AND_EXPR:
2252
            neutral_op = build_int_cst (TREE_TYPE (op), -1);
2253
            break;
2254
 
2255
          case MAX_EXPR:
2256
          case MIN_EXPR:
2257
            def_stmt = SSA_NAME_DEF_STMT (op);
2258
            loop = (gimple_bb (stmt))->loop_father;
2259
            neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2260
                                                loop_preheader_edge (loop));
2261
            break;
2262
 
2263
          default:
2264
            neutral_op = NULL;
2265
        }
2266
    }
2267
 
2268
  if (STMT_VINFO_DATA_REF (stmt_vinfo))
2269
    {
2270
      is_store = true;
2271
      op = gimple_assign_rhs1 (stmt);
2272
    }
2273
  else
2274
    is_store = false;
2275
 
2276
  gcc_assert (op);
2277
 
2278
  if (CONSTANT_CLASS_P (op))
2279
    constant_p = true;
2280
  else
2281
    constant_p = false;
2282
 
2283
  vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2284
  gcc_assert (vector_type);
2285
  nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2286
 
2287
  /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2288
     created vectors. It is greater than 1 if unrolling is performed.
2289
 
2290
     For example, we have two scalar operands, s1 and s2 (e.g., group of
2291
     strided accesses of size two), while NUNITS is four (i.e., four scalars
2292
     of this type can be packed in a vector).  The output vector will contain
2293
     two copies of each scalar operand: {s1, s2, s1, s2}.  (NUMBER_OF_COPIES
2294
     will be 2).
2295
 
2296
     If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2297
     containing the operands.
2298
 
2299
     For example, NUNITS is four as before, and the group size is 8
2300
     (s1, s2, ..., s8).  We will create two vectors {s1, s2, s3, s4} and
2301
     {s5, s6, s7, s8}.  */
2302
 
2303
  number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2304
 
2305
  number_of_places_left_in_vector = nunits;
2306
  for (j = 0; j < number_of_copies; j++)
2307
    {
2308
      for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
2309
        {
2310
          if (is_store)
2311
            op = gimple_assign_rhs1 (stmt);
2312
          else
2313
            {
2314
              switch (code)
2315
                {
2316
                  case COND_EXPR:
2317
                    if (op_num == 0 || op_num == 1)
2318
                      {
2319
                        tree cond = gimple_assign_rhs1 (stmt);
2320
                        op = TREE_OPERAND (cond, op_num);
2321
                      }
2322
                    else
2323
                      {
2324
                        if (op_num == 2)
2325
                          op = gimple_assign_rhs2 (stmt);
2326
                        else
2327
                          op = gimple_assign_rhs3 (stmt);
2328
                      }
2329
                    break;
2330
 
2331
                  case CALL_EXPR:
2332
                    op = gimple_call_arg (stmt, op_num);
2333
                    break;
2334
 
2335
                  default:
2336
                    op = gimple_op (stmt, op_num + 1);
2337
                }
2338
            }
2339
 
2340
          if (reduc_index != -1)
2341
            {
2342
              loop = (gimple_bb (stmt))->loop_father;
2343
              def_stmt = SSA_NAME_DEF_STMT (op);
2344
 
2345
              gcc_assert (loop);
2346
 
2347
              /* Get the def before the loop.  In reduction chain we have only
2348
                 one initial value.  */
2349
              if ((j != (number_of_copies - 1)
2350
                   || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2351
                       && i != 0))
2352
                  && neutral_op)
2353
                op = neutral_op;
2354
              else
2355
                op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2356
                                            loop_preheader_edge (loop));
2357
            }
2358
 
2359
          /* Create 'vect_ = {op0,op1,...,opn}'.  */
2360
          t = tree_cons (NULL_TREE, op, t);
2361
 
2362
          number_of_places_left_in_vector--;
2363
 
2364
          if (number_of_places_left_in_vector == 0)
2365
            {
2366
              number_of_places_left_in_vector = nunits;
2367
 
2368
              if (constant_p)
2369
                vec_cst = build_vector (vector_type, t);
2370
              else
2371
                vec_cst = build_constructor_from_list (vector_type, t);
2372
              VEC_quick_push (tree, voprnds,
2373
                              vect_init_vector (stmt, vec_cst, vector_type, NULL));
2374
              t = NULL_TREE;
2375
            }
2376
        }
2377
    }
2378
 
2379
  /* Since the vectors are created in the reverse order, we should invert
2380
     them.  */
2381
  vec_num = VEC_length (tree, voprnds);
2382
  for (j = vec_num - 1; j >= 0; j--)
2383
    {
2384
      vop = VEC_index (tree, voprnds, j);
2385
      VEC_quick_push (tree, *vec_oprnds, vop);
2386
    }
2387
 
2388
  VEC_free (tree, heap, voprnds);
2389
 
2390
  /* In case that VF is greater than the unrolling factor needed for the SLP
2391
     group of stmts, NUMBER_OF_VECTORS to be created is greater than
2392
     NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2393
     to replicate the vectors.  */
2394
  while (number_of_vectors > VEC_length (tree, *vec_oprnds))
2395
    {
2396
      tree neutral_vec = NULL;
2397
 
2398
      if (neutral_op)
2399
        {
2400
          if (!neutral_vec)
2401
            neutral_vec = build_vector_from_val (vector_type, neutral_op);
2402
 
2403
          VEC_quick_push (tree, *vec_oprnds, neutral_vec);
2404
        }
2405
      else
2406
        {
2407
          for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
2408
            VEC_quick_push (tree, *vec_oprnds, vop);
2409
        }
2410
    }
2411
}
2412
 
2413
 
2414
/* Get vectorized definitions from SLP_NODE that contains corresponding
2415
   vectorized def-stmts.  */
2416
 
2417
static void
2418
vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
2419
{
2420
  tree vec_oprnd;
2421
  gimple vec_def_stmt;
2422
  unsigned int i;
2423
 
2424
  gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
2425
 
2426
  FOR_EACH_VEC_ELT (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2427
    {
2428
      gcc_assert (vec_def_stmt);
2429
      vec_oprnd = gimple_get_lhs (vec_def_stmt);
2430
      VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2431
    }
2432
}
2433
 
2434
 
2435
/* Get vectorized definitions for SLP_NODE.
2436
   If the scalar definitions are loop invariants or constants, collect them and
2437
   call vect_get_constant_vectors() to create vector stmts.
2438
   Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2439
   must be stored in the corresponding child of SLP_NODE, and we call
2440
   vect_get_slp_vect_defs () to retrieve them.  */
2441
 
2442
void
2443
vect_get_slp_defs (VEC (tree, heap) *ops, slp_tree slp_node,
2444
                   VEC (slp_void_p, heap) **vec_oprnds, int reduc_index)
2445
{
2446
  gimple first_stmt, first_def;
2447
  int number_of_vects = 0, i;
2448
  unsigned int child_index = 0;
2449
  HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2450
  slp_tree child = NULL;
2451
  VEC (tree, heap) *vec_defs;
2452
  tree oprnd, def_lhs;
2453
  bool vectorized_defs;
2454
 
2455
  first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
2456
  FOR_EACH_VEC_ELT (tree, ops, i, oprnd)
2457
    {
2458
      /* For each operand we check if it has vectorized definitions in a child
2459
         node or we need to create them (for invariants and constants).  We
2460
         check if the LHS of the first stmt of the next child matches OPRND.
2461
         If it does, we found the correct child.  Otherwise, we call
2462
         vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2463
         to check this child node for the next operand.  */
2464
      vectorized_defs = false;
2465
      if (VEC_length (slp_void_p, SLP_TREE_CHILDREN (slp_node)) > child_index)
2466
        {
2467
          child = (slp_tree) VEC_index (slp_void_p,
2468
                                        SLP_TREE_CHILDREN (slp_node),
2469
                                        child_index);
2470
          first_def = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (child), 0);
2471
 
2472
          /* In the end of a pattern sequence we have a use of the original stmt,
2473
             so we need to compare OPRND with the original def.  */
2474
          if (is_pattern_stmt_p (vinfo_for_stmt (first_def))
2475
              && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first_stmt))
2476
              && !is_pattern_stmt_p (vinfo_for_stmt (first_stmt)))
2477
            first_def = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2478
 
2479
          if (is_gimple_call (first_def))
2480
            def_lhs = gimple_call_lhs (first_def);
2481
          else
2482
            def_lhs = gimple_assign_lhs (first_def);
2483
 
2484
          if (operand_equal_p (oprnd, def_lhs, 0))
2485
            {
2486
              /* The number of vector defs is determined by the number of
2487
                 vector statements in the node from which we get those
2488
                 statements.  */
2489
                 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2490
                 vectorized_defs = true;
2491
              child_index++;
2492
            }
2493
        }
2494
 
2495
      if (!vectorized_defs)
2496
        {
2497
          if (i == 0)
2498
            {
2499
              number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2500
              /* Number of vector stmts was calculated according to LHS in
2501
                 vect_schedule_slp_instance (), fix it by replacing LHS with
2502
                 RHS, if necessary.  See vect_get_smallest_scalar_type () for
2503
                 details.  */
2504
              vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2505
                                             &rhs_size_unit);
2506
              if (rhs_size_unit != lhs_size_unit)
2507
                {
2508
                  number_of_vects *= rhs_size_unit;
2509
                  number_of_vects /= lhs_size_unit;
2510
                }
2511
            }
2512
        }
2513
 
2514
      /* Allocate memory for vectorized defs.  */
2515
      vec_defs = VEC_alloc (tree, heap, number_of_vects);
2516
 
2517
      /* For reduction defs we call vect_get_constant_vectors (), since we are
2518
         looking for initial loop invariant values.  */
2519
      if (vectorized_defs && reduc_index == -1)
2520
        /* The defs are already vectorized.  */
2521
        vect_get_slp_vect_defs (child, &vec_defs);
2522
      else
2523
        /* Build vectors from scalar defs.  */
2524
        vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2525
                                   number_of_vects, reduc_index);
2526
 
2527
      VEC_quick_push (slp_void_p, *vec_oprnds, (slp_void_p) vec_defs);
2528
 
2529
      /* For reductions, we only need initial values.  */
2530
      if (reduc_index != -1)
2531
        return;
2532
    }
2533
}
2534
 
2535
 
2536
/* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2537
   building a vector of type MASK_TYPE from it) and two input vectors placed in
2538
   DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2539
   shifting by STRIDE elements of DR_CHAIN for every copy.
2540
   (STRIDE is the number of vectorized stmts for NODE divided by the number of
2541
   copies).
2542
   VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2543
   the created stmts must be inserted.  */
2544
 
2545
static inline void
2546
vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2547
                           tree mask, int first_vec_indx, int second_vec_indx,
2548
                           gimple_stmt_iterator *gsi, slp_tree node,
2549
                           tree vectype, VEC(tree,heap) *dr_chain,
2550
                           int ncopies, int vect_stmts_counter)
2551
{
2552
  tree perm_dest;
2553
  gimple perm_stmt = NULL;
2554
  stmt_vec_info next_stmt_info;
2555
  int i, stride;
2556
  tree first_vec, second_vec, data_ref;
2557
 
2558
  stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2559
 
2560
  /* Initialize the vect stmts of NODE to properly insert the generated
2561
     stmts later.  */
2562
  for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
2563
       i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2564
    VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
2565
 
2566
  perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2567
  for (i = 0; i < ncopies; i++)
2568
    {
2569
      first_vec = VEC_index (tree, dr_chain, first_vec_indx);
2570
      second_vec = VEC_index (tree, dr_chain, second_vec_indx);
2571
 
2572
      /* Generate the permute statement.  */
2573
      perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
2574
                                                 first_vec, second_vec, mask);
2575
      data_ref = make_ssa_name (perm_dest, perm_stmt);
2576
      gimple_set_lhs (perm_stmt, data_ref);
2577
      vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2578
 
2579
      /* Store the vector statement in NODE.  */
2580
      VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
2581
                   stride * i + vect_stmts_counter, perm_stmt);
2582
 
2583
      first_vec_indx += stride;
2584
      second_vec_indx += stride;
2585
    }
2586
 
2587
  /* Mark the scalar stmt as vectorized.  */
2588
  next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2589
  STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2590
}
2591
 
2592
 
2593
/* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2594
   return in CURRENT_MASK_ELEMENT its equivalent in target specific
2595
   representation.  Check that the mask is valid and return FALSE if not.
2596
   Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2597
   the next vector, i.e., the current first vector is not needed.  */
2598
 
2599
static bool
2600
vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2601
                       int mask_nunits, bool only_one_vec, int index,
2602
                       unsigned char *mask, int *current_mask_element,
2603
                       bool *need_next_vector, int *number_of_mask_fixes,
2604
                       bool *mask_fixed, bool *needs_first_vector)
2605
{
2606
  int i;
2607
 
2608
  /* Convert to target specific representation.  */
2609
  *current_mask_element = first_mask_element + m;
2610
  /* Adjust the value in case it's a mask for second and third vectors.  */
2611
  *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2612
 
2613
  if (*current_mask_element < mask_nunits)
2614
    *needs_first_vector = true;
2615
 
2616
  /* We have only one input vector to permute but the mask accesses values in
2617
     the next vector as well.  */
2618
  if (only_one_vec && *current_mask_element >= mask_nunits)
2619
    {
2620
      if (vect_print_dump_info (REPORT_DETAILS))
2621
        {
2622
          fprintf (vect_dump, "permutation requires at least two vectors ");
2623
          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2624
        }
2625
 
2626
      return false;
2627
    }
2628
 
2629
  /* The mask requires the next vector.  */
2630
  if (*current_mask_element >= mask_nunits * 2)
2631
    {
2632
      if (*needs_first_vector || *mask_fixed)
2633
        {
2634
          /* We either need the first vector too or have already moved to the
2635
             next vector. In both cases, this permutation needs three
2636
             vectors.  */
2637
          if (vect_print_dump_info (REPORT_DETAILS))
2638
            {
2639
              fprintf (vect_dump, "permutation requires at "
2640
                                  "least three vectors ");
2641
              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2642
            }
2643
 
2644
          return false;
2645
        }
2646
 
2647
      /* We move to the next vector, dropping the first one and working with
2648
         the second and the third - we need to adjust the values of the mask
2649
         accordingly.  */
2650
      *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2651
 
2652
      for (i = 0; i < index; i++)
2653
        mask[i] -= mask_nunits * *number_of_mask_fixes;
2654
 
2655
      (*number_of_mask_fixes)++;
2656
      *mask_fixed = true;
2657
    }
2658
 
2659
  *need_next_vector = *mask_fixed;
2660
 
2661
  /* This was the last element of this mask. Start a new one.  */
2662
  if (index == mask_nunits - 1)
2663
    {
2664
      *number_of_mask_fixes = 1;
2665
      *mask_fixed = false;
2666
      *needs_first_vector = false;
2667
    }
2668
 
2669
  return true;
2670
}
2671
 
2672
 
2673
/* Generate vector permute statements from a list of loads in DR_CHAIN.
2674
   If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2675
   permute statements for SLP_NODE_INSTANCE.  */
2676
bool
2677
vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2678
                              gimple_stmt_iterator *gsi, int vf,
2679
                              slp_instance slp_node_instance, bool analyze_only)
2680
{
2681
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2682
  tree mask_element_type = NULL_TREE, mask_type;
2683
  int i, j, k, nunits, vec_index = 0, scalar_index;
2684
  slp_tree node;
2685
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2686
  gimple next_scalar_stmt;
2687
  int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2688
  int first_mask_element;
2689
  int index, unroll_factor, current_mask_element, ncopies;
2690
  unsigned char *mask;
2691
  bool only_one_vec = false, need_next_vector = false;
2692
  int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2693
  int number_of_mask_fixes = 1;
2694
  bool mask_fixed = false;
2695
  bool needs_first_vector = false;
2696
  enum machine_mode mode;
2697
 
2698
  mode = TYPE_MODE (vectype);
2699
 
2700
  if (!can_vec_perm_p (mode, false, NULL))
2701
    {
2702
      if (vect_print_dump_info (REPORT_DETAILS))
2703
        {
2704
          fprintf (vect_dump, "no vect permute for ");
2705
          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2706
        }
2707
      return false;
2708
    }
2709
 
2710
  /* The generic VEC_PERM_EXPR code always uses an integral type of the
2711
     same size as the vector element being permuted.  */
2712
  mask_element_type
2713
    = lang_hooks.types.type_for_size
2714
    (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
2715
  mask_type = get_vectype_for_scalar_type (mask_element_type);
2716
  nunits = TYPE_VECTOR_SUBPARTS (vectype);
2717
  mask = XALLOCAVEC (unsigned char, nunits);
2718
  unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2719
 
2720
  /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2721
     unrolling factor.  */
2722
  orig_vec_stmts_num = group_size *
2723
                SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2724
  if (orig_vec_stmts_num == 1)
2725
    only_one_vec = true;
2726
 
2727
  /* Number of copies is determined by the final vectorization factor
2728
     relatively to SLP_NODE_INSTANCE unrolling factor.  */
2729
  ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2730
 
2731
  /* Generate permutation masks for every NODE. Number of masks for each NODE
2732
     is equal to GROUP_SIZE.
2733
     E.g., we have a group of three nodes with three loads from the same
2734
     location in each node, and the vector size is 4. I.e., we have a
2735
     a0b0c0a1b1c1... sequence and we need to create the following vectors:
2736
     for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2737
     for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2738
     ...
2739
 
2740
     The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2741
     The last mask is illegal since we assume two operands for permute
2742
     operation, and the mask element values can't be outside that range.
2743
     Hence, the last mask must be converted into {2,5,5,5}.
2744
     For the first two permutations we need the first and the second input
2745
     vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2746
     we need the second and the third vectors: {b1,c1,a2,b2} and
2747
     {c2,a3,b3,c3}.  */
2748
 
2749
  FOR_EACH_VEC_ELT  (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance), i, node)
2750
    {
2751
      scalar_index = 0;
2752
      index = 0;
2753
      vect_stmts_counter = 0;
2754
      vec_index = 0;
2755
      first_vec_index = vec_index++;
2756
      if (only_one_vec)
2757
        second_vec_index = first_vec_index;
2758
      else
2759
        second_vec_index =  vec_index++;
2760
 
2761
      for (j = 0; j < unroll_factor; j++)
2762
        {
2763
          for (k = 0; k < group_size; k++)
2764
            {
2765
              first_mask_element = i + j * group_size;
2766
              if (!vect_get_mask_element (stmt, first_mask_element, 0,
2767
                                          nunits, only_one_vec, index,
2768
                                          mask, &current_mask_element,
2769
                                          &need_next_vector,
2770
                                          &number_of_mask_fixes, &mask_fixed,
2771
                                          &needs_first_vector))
2772
                return false;
2773
              mask[index++] = current_mask_element;
2774
 
2775
              if (index == nunits)
2776
                {
2777
                  tree mask_vec = NULL;
2778
 
2779
                  if (!can_vec_perm_p (mode, false, mask))
2780
                    {
2781
                      if (vect_print_dump_info (REPORT_DETAILS))
2782
                        {
2783
                          fprintf (vect_dump, "unsupported vect permute { ");
2784
                          for (i = 0; i < nunits; ++i)
2785
                            fprintf (vect_dump, "%d ", mask[i]);
2786
                          fprintf (vect_dump, "}\n");
2787
                        }
2788
                      return false;
2789
                    }
2790
 
2791
                  while (--index >= 0)
2792
                    {
2793
                      tree t = build_int_cst (mask_element_type, mask[index]);
2794
                      mask_vec = tree_cons (NULL, t, mask_vec);
2795
                    }
2796
                  mask_vec = build_vector (mask_type, mask_vec);
2797
                  index = 0;
2798
 
2799
                  if (!analyze_only)
2800
                    {
2801
                      if (need_next_vector)
2802
                        {
2803
                          first_vec_index = second_vec_index;
2804
                          second_vec_index = vec_index;
2805
                        }
2806
 
2807
                      next_scalar_stmt = VEC_index (gimple,
2808
                                SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2809
 
2810
                      vect_create_mask_and_perm (stmt, next_scalar_stmt,
2811
                               mask_vec, first_vec_index, second_vec_index,
2812
                               gsi, node, vectype, dr_chain,
2813
                               ncopies, vect_stmts_counter++);
2814
                    }
2815
                }
2816
            }
2817
        }
2818
    }
2819
 
2820
  return true;
2821
}
2822
 
2823
 
2824
 
2825
/* Vectorize SLP instance tree in postorder.  */
2826
 
2827
static bool
2828
vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2829
                            unsigned int vectorization_factor)
2830
{
2831
  gimple stmt;
2832
  bool strided_store, is_store;
2833
  gimple_stmt_iterator si;
2834
  stmt_vec_info stmt_info;
2835
  unsigned int vec_stmts_size, nunits, group_size;
2836
  tree vectype;
2837
  int i;
2838
  slp_tree loads_node;
2839
  slp_void_p child;
2840
 
2841
  if (!node)
2842
    return false;
2843
 
2844
  FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
2845
    vect_schedule_slp_instance ((slp_tree) child, instance,
2846
                                vectorization_factor);
2847
 
2848
  stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2849
  stmt_info = vinfo_for_stmt (stmt);
2850
 
2851
  /* VECTYPE is the type of the destination.  */
2852
  vectype = STMT_VINFO_VECTYPE (stmt_info);
2853
  nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2854
  group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2855
 
2856
  /* For each SLP instance calculate number of vector stmts to be created
2857
     for the scalar stmts in each node of the SLP tree.  Number of vector
2858
     elements in one vector iteration is the number of scalar elements in
2859
     one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2860
     size.  */
2861
  vec_stmts_size = (vectorization_factor * group_size) / nunits;
2862
 
2863
  /* In case of load permutation we have to allocate vectorized statements for
2864
     all the nodes that participate in that permutation.  */
2865
  if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2866
    {
2867
      FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node)
2868
        {
2869
          if (!SLP_TREE_VEC_STMTS (loads_node))
2870
            {
2871
              SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2872
                                                           vec_stmts_size);
2873
              SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2874
            }
2875
        }
2876
    }
2877
 
2878
  if (!SLP_TREE_VEC_STMTS (node))
2879
    {
2880
      SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2881
      SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2882
    }
2883
 
2884
  if (vect_print_dump_info (REPORT_DETAILS))
2885
    {
2886
      fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2887
      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2888
    }
2889
 
2890
  /* Loads should be inserted before the first load.  */
2891
  if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2892
      && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2893
      && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
2894
      && SLP_INSTANCE_LOAD_PERMUTATION (instance))
2895
    si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2896
  else if (is_pattern_stmt_p (stmt_info))
2897
    si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2898
  else
2899
    si = gsi_for_stmt (stmt);
2900
 
2901
  /* Stores should be inserted just before the last store.  */
2902
  if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
2903
      && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2904
    {
2905
      gimple last_store = vect_find_last_store_in_slp_instance (instance);
2906
      if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
2907
       last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
2908
      si = gsi_for_stmt (last_store);
2909
    }
2910
 
2911
  /* Mark the first element of the reduction chain as reduction to properly
2912
     transform the node.  In the analysis phase only the last element of the
2913
     chain is marked as reduction.  */
2914
  if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_STRIDED_ACCESS (stmt_info)
2915
      && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
2916
    {
2917
      STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
2918
      STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
2919
    }
2920
 
2921
  is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2922
  return is_store;
2923
}
2924
 
2925
/* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
2926
   For loop vectorization this is done in vectorizable_call, but for SLP
2927
   it needs to be deferred until end of vect_schedule_slp, because multiple
2928
   SLP instances may refer to the same scalar stmt.  */
2929
 
2930
static void
2931
vect_remove_slp_scalar_calls (slp_tree node)
2932
{
2933
  gimple stmt, new_stmt;
2934
  gimple_stmt_iterator gsi;
2935
  int i;
2936
  slp_void_p child;
2937
  tree lhs;
2938
  stmt_vec_info stmt_info;
2939
 
2940
  if (!node)
2941
    return;
2942
 
2943
  FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
2944
    vect_remove_slp_scalar_calls ((slp_tree) child);
2945
 
2946
  FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
2947
    {
2948
      if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
2949
        continue;
2950
      stmt_info = vinfo_for_stmt (stmt);
2951
      if (stmt_info == NULL
2952
          || is_pattern_stmt_p (stmt_info)
2953
          || !PURE_SLP_STMT (stmt_info))
2954
        continue;
2955
      lhs = gimple_call_lhs (stmt);
2956
      new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2957
      set_vinfo_for_stmt (new_stmt, stmt_info);
2958
      set_vinfo_for_stmt (stmt, NULL);
2959
      STMT_VINFO_STMT (stmt_info) = new_stmt;
2960
      gsi = gsi_for_stmt (stmt);
2961
      gsi_replace (&gsi, new_stmt, false);
2962
      SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
2963
    }
2964
}
2965
 
2966
/* Generate vector code for all SLP instances in the loop/basic block.  */
2967
 
2968
bool
2969
vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2970
{
2971
  VEC (slp_instance, heap) *slp_instances;
2972
  slp_instance instance;
2973
  unsigned int i, vf;
2974
  bool is_store = false;
2975
 
2976
  if (loop_vinfo)
2977
    {
2978
      slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2979
      vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2980
    }
2981
  else
2982
    {
2983
      slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2984
      vf = 1;
2985
    }
2986
 
2987
  FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2988
    {
2989
      /* Schedule the tree of INSTANCE.  */
2990
      is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
2991
                                             instance, vf);
2992
      if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2993
          || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2994
        fprintf (vect_dump, "vectorizing stmts using SLP.");
2995
    }
2996
 
2997
  FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2998
    {
2999
      slp_tree root = SLP_INSTANCE_TREE (instance);
3000
      gimple store;
3001
      unsigned int j;
3002
      gimple_stmt_iterator gsi;
3003
 
3004
      vect_remove_slp_scalar_calls (root);
3005
 
3006
      for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
3007
                  && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3008
        {
3009
          if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3010
            break;
3011
 
3012
         if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3013
           store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3014
          /* Free the attached stmt_vec_info and remove the stmt.  */
3015
          gsi = gsi_for_stmt (store);
3016
          gsi_remove (&gsi, true);
3017
          free_stmt_vec_info (store);
3018
        }
3019
    }
3020
 
3021
  return is_store;
3022
}
3023
 
3024
 
3025
/* Vectorize the basic block.  */
3026
 
3027
void
3028
vect_slp_transform_bb (basic_block bb)
3029
{
3030
  bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3031
  gimple_stmt_iterator si;
3032
 
3033
  gcc_assert (bb_vinfo);
3034
 
3035
  if (vect_print_dump_info (REPORT_DETAILS))
3036
    fprintf (vect_dump, "SLPing BB\n");
3037
 
3038
  for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3039
    {
3040
      gimple stmt = gsi_stmt (si);
3041
      stmt_vec_info stmt_info;
3042
 
3043
      if (vect_print_dump_info (REPORT_DETAILS))
3044
        {
3045
          fprintf (vect_dump, "------>SLPing statement: ");
3046
          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3047
        }
3048
 
3049
      stmt_info = vinfo_for_stmt (stmt);
3050
      gcc_assert (stmt_info);
3051
 
3052
      /* Schedule all the SLP instances when the first SLP stmt is reached.  */
3053
      if (STMT_SLP_TYPE (stmt_info))
3054
        {
3055
          vect_schedule_slp (NULL, bb_vinfo);
3056
          break;
3057
        }
3058
    }
3059
 
3060
  mark_sym_for_renaming (gimple_vop (cfun));
3061
  /* The memory tags and pointers in vectorized statements need to
3062
     have their SSA forms updated.  FIXME, why can't this be delayed
3063
     until all the loops have been transformed?  */
3064
  update_ssa (TODO_update_ssa);
3065
 
3066
  if (vect_print_dump_info (REPORT_DETAILS))
3067
    fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
3068
 
3069
  destroy_bb_vec_info (bb_vinfo);
3070
}
3071
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.