OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.5.1/] [gcc/] [tree-vect-slp.c] - Blame information for rev 318

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 280 jeremybenn
/* SLP - Basic Block Vectorization
2
   Copyright (C) 2007, 2008, 2009, 2010
3
   Free Software Foundation, Inc.
4
   Contributed by Dorit Naishlos <dorit@il.ibm.com>
5
   and Ira Rosen <irar@il.ibm.com>
6
 
7
This file is part of GCC.
8
 
9
GCC is free software; you can redistribute it and/or modify it under
10
the terms of the GNU General Public License as published by the Free
11
Software Foundation; either version 3, or (at your option) any later
12
version.
13
 
14
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15
WARRANTY; without even the implied warranty of MERCHANTABILITY or
16
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17
for more details.
18
 
19
You should have received a copy of the GNU General Public License
20
along with GCC; see the file COPYING3.  If not see
21
<http://www.gnu.org/licenses/>.  */
22
 
23
#include "config.h"
24
#include "system.h"
25
#include "coretypes.h"
26
#include "tm.h"
27
#include "ggc.h"
28
#include "tree.h"
29
#include "target.h"
30
#include "basic-block.h"
31
#include "diagnostic.h"
32
#include "tree-flow.h"
33
#include "tree-dump.h"
34
#include "cfgloop.h"
35
#include "cfglayout.h"
36
#include "expr.h"
37
#include "recog.h"
38
#include "optabs.h"
39
#include "tree-vectorizer.h"
40
 
41
/* Extract the location of the basic block in the source code.
42
   Return the basic block location if succeed and NULL if not.  */
43
 
44
LOC
45
find_bb_location (basic_block bb)
46
{
47
  gimple stmt = NULL;
48
  gimple_stmt_iterator si;
49
 
50
  if (!bb)
51
    return UNKNOWN_LOC;
52
 
53
  for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
54
    {
55
      stmt = gsi_stmt (si);
56
      if (gimple_location (stmt) != UNKNOWN_LOC)
57
        return gimple_location (stmt);
58
    }
59
 
60
  return UNKNOWN_LOC;
61
}
62
 
63
 
64
/* Recursively free the memory allocated for the SLP tree rooted at NODE.  */
65
 
66
static void
67
vect_free_slp_tree (slp_tree node)
68
{
69
  if (!node)
70
    return;
71
 
72
  if (SLP_TREE_LEFT (node))
73
    vect_free_slp_tree (SLP_TREE_LEFT (node));
74
 
75
  if (SLP_TREE_RIGHT (node))
76
    vect_free_slp_tree (SLP_TREE_RIGHT (node));
77
 
78
  VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
79
 
80
  if (SLP_TREE_VEC_STMTS (node))
81
    VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
82
 
83
  free (node);
84
}
85
 
86
 
87
/* Free the memory allocated for the SLP instance.  */
88
 
89
void
90
vect_free_slp_instance (slp_instance instance)
91
{
92
  vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
93
  VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
94
  VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
95
}
96
 
97
 
98
/* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
99
   they are of a legal type and that they match the defs of the first stmt of
100
   the SLP group (stored in FIRST_STMT_...).  */
101
 
102
static bool
103
vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
104
                             slp_tree slp_node, gimple stmt,
105
                             VEC (gimple, heap) **def_stmts0,
106
                             VEC (gimple, heap) **def_stmts1,
107
                             enum vect_def_type *first_stmt_dt0,
108
                             enum vect_def_type *first_stmt_dt1,
109
                             tree *first_stmt_def0_type,
110
                             tree *first_stmt_def1_type,
111
                             tree *first_stmt_const_oprnd,
112
                             int ncopies_for_cost,
113
                             bool *pattern0, bool *pattern1)
114
{
115
  tree oprnd;
116
  unsigned int i, number_of_oprnds;
117
  tree def;
118
  gimple def_stmt;
119
  enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
120
  stmt_vec_info stmt_info =
121
    vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
122
  enum gimple_rhs_class rhs_class;
123
  struct loop *loop = NULL;
124
 
125
  if (loop_vinfo)
126
    loop = LOOP_VINFO_LOOP (loop_vinfo);
127
 
128
  rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
129
  number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
130
 
131
  for (i = 0; i < number_of_oprnds; i++)
132
    {
133
      oprnd = gimple_op (stmt, i + 1);
134
 
135
      if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
136
                               &dt[i])
137
          || (!def_stmt && dt[i] != vect_constant_def))
138
        {
139
          if (vect_print_dump_info (REPORT_SLP))
140
            {
141
              fprintf (vect_dump, "Build SLP failed: can't find def for ");
142
              print_generic_expr (vect_dump, oprnd, TDF_SLIM);
143
            }
144
 
145
          return false;
146
        }
147
 
148
      /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
149
         from the pattern. Check that all the stmts of the node are in the
150
         pattern.  */
151
      if (loop && def_stmt && gimple_bb (def_stmt)
152
          && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
153
          && vinfo_for_stmt (def_stmt)
154
          && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
155
        {
156
          if (!*first_stmt_dt0)
157
            *pattern0 = true;
158
          else
159
            {
160
              if (i == 1 && !*first_stmt_dt1)
161
                *pattern1 = true;
162
              else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
163
                {
164
                  if (vect_print_dump_info (REPORT_DETAILS))
165
                    {
166
                      fprintf (vect_dump, "Build SLP failed: some of the stmts"
167
                                     " are in a pattern, and others are not ");
168
                      print_generic_expr (vect_dump, oprnd, TDF_SLIM);
169
                    }
170
 
171
                  return false;
172
                }
173
            }
174
 
175
          def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
176
          dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
177
 
178
          if (*dt == vect_unknown_def_type)
179
            {
180
              if (vect_print_dump_info (REPORT_DETAILS))
181
                fprintf (vect_dump, "Unsupported pattern.");
182
              return false;
183
            }
184
 
185
          switch (gimple_code (def_stmt))
186
            {
187
              case GIMPLE_PHI:
188
                def = gimple_phi_result (def_stmt);
189
                break;
190
 
191
              case GIMPLE_ASSIGN:
192
                def = gimple_assign_lhs (def_stmt);
193
                break;
194
 
195
              default:
196
                if (vect_print_dump_info (REPORT_DETAILS))
197
                  fprintf (vect_dump, "unsupported defining stmt: ");
198
                return false;
199
            }
200
        }
201
 
202
      if (!*first_stmt_dt0)
203
        {
204
          /* op0 of the first stmt of the group - store its info.  */
205
          *first_stmt_dt0 = dt[i];
206
          if (def)
207
            *first_stmt_def0_type = TREE_TYPE (def);
208
          else
209
            *first_stmt_const_oprnd = oprnd;
210
 
211
          /* Analyze costs (for the first stmt of the group only).  */
212
          if (rhs_class != GIMPLE_SINGLE_RHS)
213
            /* Not memory operation (we don't call this functions for loads).  */
214
            vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
215
          else
216
            /* Store.  */
217
            vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
218
        }
219
 
220
      else
221
        {
222
          if (!*first_stmt_dt1 && i == 1)
223
            {
224
              /* op1 of the first stmt of the group - store its info.  */
225
              *first_stmt_dt1 = dt[i];
226
              if (def)
227
                *first_stmt_def1_type = TREE_TYPE (def);
228
              else
229
                {
230
                  /* We assume that the stmt contains only one constant
231
                     operand. We fail otherwise, to be on the safe side.  */
232
                  if (*first_stmt_const_oprnd)
233
                    {
234
                      if (vect_print_dump_info (REPORT_SLP))
235
                        fprintf (vect_dump, "Build SLP failed: two constant "
236
                                 "oprnds in stmt");
237
                      return false;
238
                    }
239
                  *first_stmt_const_oprnd = oprnd;
240
                }
241
            }
242
          else
243
            {
244
              /* Not first stmt of the group, check that the def-stmt/s match
245
                 the def-stmt/s of the first stmt.  */
246
              if ((i == 0
247
                   && (*first_stmt_dt0 != dt[i]
248
                       || (*first_stmt_def0_type && def
249
                           && !types_compatible_p (*first_stmt_def0_type,
250
                                                   TREE_TYPE (def)))))
251
                  || (i == 1
252
                      && (*first_stmt_dt1 != dt[i]
253
                          || (*first_stmt_def1_type && def
254
                              && !types_compatible_p (*first_stmt_def1_type,
255
                                                      TREE_TYPE (def)))))
256
                  || (!def
257
                      && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
258
                                              TREE_TYPE (oprnd))))
259
                {
260
                  if (vect_print_dump_info (REPORT_SLP))
261
                    fprintf (vect_dump, "Build SLP failed: different types ");
262
 
263
                  return false;
264
                }
265
            }
266
        }
267
 
268
      /* Check the types of the definitions.  */
269
      switch (dt[i])
270
        {
271
        case vect_constant_def:
272
        case vect_external_def:
273
          break;
274
 
275
        case vect_internal_def:
276
          if (i == 0)
277
            VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
278
          else
279
            VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
280
          break;
281
 
282
        default:
283
          /* FORNOW: Not supported.  */
284
          if (vect_print_dump_info (REPORT_SLP))
285
            {
286
              fprintf (vect_dump, "Build SLP failed: illegal type of def ");
287
              print_generic_expr (vect_dump, def, TDF_SLIM);
288
            }
289
 
290
          return false;
291
        }
292
    }
293
 
294
  return true;
295
}
296
 
297
 
298
/* Recursively build an SLP tree starting from NODE.
299
   Fail (and return FALSE) if def-stmts are not isomorphic, require data
300
   permutation or are of unsupported types of operation. Otherwise, return
301
   TRUE.  */
302
 
303
static bool
304
vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
305
                     slp_tree *node, unsigned int group_size,
306
                     int *inside_cost, int *outside_cost,
307
                     int ncopies_for_cost, unsigned int *max_nunits,
308
                     VEC (int, heap) **load_permutation,
309
                     VEC (slp_tree, heap) **loads,
310
                     unsigned int vectorization_factor)
311
{
312
  VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
313
  VEC (gimple, heap) *def_stmts1 =  VEC_alloc (gimple, heap, group_size);
314
  unsigned int i;
315
  VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
316
  gimple stmt = VEC_index (gimple, stmts, 0);
317
  enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
318
  enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
319
  enum tree_code first_stmt_code = ERROR_MARK, rhs_code;
320
  tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
321
  tree lhs;
322
  bool stop_recursion = false, need_same_oprnds = false;
323
  tree vectype, scalar_type, first_op1 = NULL_TREE;
324
  unsigned int ncopies;
325
  optab optab;
326
  int icode;
327
  enum machine_mode optab_op2_mode;
328
  enum machine_mode vec_mode;
329
  tree first_stmt_const_oprnd = NULL_TREE;
330
  struct data_reference *first_dr;
331
  bool pattern0 = false, pattern1 = false;
332
  HOST_WIDE_INT dummy;
333
  bool permutation = false;
334
  unsigned int load_place;
335
  gimple first_load;
336
 
337
  /* For every stmt in NODE find its def stmt/s.  */
338
  for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
339
    {
340
      if (vect_print_dump_info (REPORT_SLP))
341
        {
342
          fprintf (vect_dump, "Build SLP for ");
343
          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
344
        }
345
 
346
      lhs = gimple_get_lhs (stmt);
347
      if (lhs == NULL_TREE)
348
        {
349
          if (vect_print_dump_info (REPORT_SLP))
350
            {
351
              fprintf (vect_dump,
352
                       "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
353
              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
354
            }
355
 
356
          return false;
357
        }
358
 
359
      scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
360
      vectype = get_vectype_for_scalar_type (scalar_type);
361
      if (!vectype)
362
        {
363
          if (vect_print_dump_info (REPORT_SLP))
364
            {
365
              fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
366
              print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
367
            }
368
          return false;
369
        }
370
 
371
      ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
372
      if (ncopies != 1)
373
        {
374
          if (vect_print_dump_info (REPORT_SLP))
375
            fprintf (vect_dump, "SLP with multiple types ");
376
 
377
          /* FORNOW: multiple types are unsupported in BB SLP.  */
378
          if (bb_vinfo)
379
            return false;
380
        }
381
 
382
      /* In case of multiple types we need to detect the smallest type.  */
383
      if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
384
        *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
385
 
386
      if (is_gimple_call (stmt))
387
        rhs_code = CALL_EXPR;
388
      else
389
        rhs_code = gimple_assign_rhs_code (stmt);
390
 
391
      /* Check the operation.  */
392
      if (i == 0)
393
        {
394
          first_stmt_code = rhs_code;
395
 
396
          /* Shift arguments should be equal in all the packed stmts for a
397
             vector shift with scalar shift operand.  */
398
          if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
399
              || rhs_code == LROTATE_EXPR
400
              || rhs_code == RROTATE_EXPR)
401
            {
402
              vec_mode = TYPE_MODE (vectype);
403
 
404
              /* First see if we have a vector/vector shift.  */
405
              optab = optab_for_tree_code (rhs_code, vectype,
406
                                           optab_vector);
407
 
408
              if (!optab
409
                  || (optab->handlers[(int) vec_mode].insn_code
410
                      == CODE_FOR_nothing))
411
                {
412
                  /* No vector/vector shift, try for a vector/scalar shift.  */
413
                  optab = optab_for_tree_code (rhs_code, vectype,
414
                                               optab_scalar);
415
 
416
                  if (!optab)
417
                    {
418
                      if (vect_print_dump_info (REPORT_SLP))
419
                        fprintf (vect_dump, "Build SLP failed: no optab.");
420
                      return false;
421
                    }
422
                  icode = (int) optab->handlers[(int) vec_mode].insn_code;
423
                  if (icode == CODE_FOR_nothing)
424
                    {
425
                      if (vect_print_dump_info (REPORT_SLP))
426
                        fprintf (vect_dump, "Build SLP failed: "
427
                                            "op not supported by target.");
428
                      return false;
429
                    }
430
                  optab_op2_mode = insn_data[icode].operand[2].mode;
431
                  if (!VECTOR_MODE_P (optab_op2_mode))
432
                    {
433
                      need_same_oprnds = true;
434
                      first_op1 = gimple_assign_rhs2 (stmt);
435
                    }
436
                }
437
            }
438
        }
439
      else
440
        {
441
          if (first_stmt_code != rhs_code
442
              && (first_stmt_code != IMAGPART_EXPR
443
                  || rhs_code != REALPART_EXPR)
444
              && (first_stmt_code != REALPART_EXPR
445
                  || rhs_code != IMAGPART_EXPR))
446
            {
447
              if (vect_print_dump_info (REPORT_SLP))
448
                {
449
                  fprintf (vect_dump,
450
                           "Build SLP failed: different operation in stmt ");
451
                  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
452
                }
453
 
454
              return false;
455
            }
456
 
457
          if (need_same_oprnds
458
              && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
459
            {
460
              if (vect_print_dump_info (REPORT_SLP))
461
                {
462
                  fprintf (vect_dump,
463
                           "Build SLP failed: different shift arguments in ");
464
                  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
465
                }
466
 
467
              return false;
468
            }
469
        }
470
 
471
      /* Strided store or load.  */
472
      if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
473
        {
474
          if (REFERENCE_CLASS_P (lhs))
475
            {
476
              /* Store.  */
477
              if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
478
                                                stmt, &def_stmts0, &def_stmts1,
479
                                                &first_stmt_dt0,
480
                                                &first_stmt_dt1,
481
                                                &first_stmt_def0_type,
482
                                                &first_stmt_def1_type,
483
                                                &first_stmt_const_oprnd,
484
                                                ncopies_for_cost,
485
                                                &pattern0, &pattern1))
486
                return false;
487
            }
488
            else
489
              {
490
                /* Load.  */
491
                /* FORNOW: Check that there is no gap between the loads.  */
492
                if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
493
                     && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
494
                    || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
495
                        && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
496
                  {
497
                    if (vect_print_dump_info (REPORT_SLP))
498
                      {
499
                        fprintf (vect_dump, "Build SLP failed: strided "
500
                                            "loads have gaps ");
501
                        print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
502
                      }
503
 
504
                    return false;
505
                  }
506
 
507
                /* Check that the size of interleaved loads group is not
508
                   greater than the SLP group size.  */
509
                if (DR_GROUP_SIZE (vinfo_for_stmt (stmt))
510
                    > ncopies * group_size)
511
                  {
512
                    if (vect_print_dump_info (REPORT_SLP))
513
                      {
514
                        fprintf (vect_dump, "Build SLP failed: the number of "
515
                                            "interleaved loads is greater than"
516
                                            " the SLP group size ");
517
                        print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
518
                      }
519
 
520
                    return false;
521
                  }
522
 
523
                first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
524
 
525
              if (first_load == stmt)
526
                {
527
                  first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
528
                  if (vect_supportable_dr_alignment (first_dr)
529
                      == dr_unaligned_unsupported)
530
                    {
531
                      if (vect_print_dump_info (REPORT_SLP))
532
                        {
533
                          fprintf (vect_dump, "Build SLP failed: unsupported "
534
                                              "unaligned load ");
535
                          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
536
                        }
537
 
538
                      return false;
539
                    }
540
 
541
                  /* Analyze costs (for the first stmt in the group).  */
542
                  vect_model_load_cost (vinfo_for_stmt (stmt),
543
                                        ncopies_for_cost, *node);
544
                }
545
 
546
              /* Store the place of this load in the interleaving chain. In
547
                 case that permutation is needed we later decide if a specific
548
                 permutation is supported.  */
549
              load_place = vect_get_place_in_interleaving_chain (stmt,
550
                                                                 first_load);
551
              if (load_place != i)
552
                permutation = true;
553
 
554
              VEC_safe_push (int, heap, *load_permutation, load_place);
555
 
556
              /* We stop the tree when we reach a group of loads.  */
557
              stop_recursion = true;
558
             continue;
559
           }
560
        } /* Strided access.  */
561
      else
562
        {
563
          if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
564
            {
565
              /* Not strided load. */
566
              if (vect_print_dump_info (REPORT_SLP))
567
                {
568
                  fprintf (vect_dump, "Build SLP failed: not strided load ");
569
                  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
570
                }
571
 
572
              /* FORNOW: Not strided loads are not supported.  */
573
              return false;
574
            }
575
 
576
          /* Not memory operation.  */
577
          if (TREE_CODE_CLASS (rhs_code) != tcc_binary
578
              && TREE_CODE_CLASS (rhs_code) != tcc_unary)
579
            {
580
              if (vect_print_dump_info (REPORT_SLP))
581
                {
582
                  fprintf (vect_dump, "Build SLP failed: operation");
583
                  fprintf (vect_dump, " unsupported ");
584
                  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
585
                }
586
 
587
              return false;
588
            }
589
 
590
          /* Find the def-stmts.  */
591
          if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
592
                                            &def_stmts0, &def_stmts1,
593
                                            &first_stmt_dt0, &first_stmt_dt1,
594
                                            &first_stmt_def0_type,
595
                                            &first_stmt_def1_type,
596
                                            &first_stmt_const_oprnd,
597
                                            ncopies_for_cost,
598
                                            &pattern0, &pattern1))
599
            return false;
600
        }
601
    }
602
 
603
  /* Add the costs of the node to the overall instance costs.  */
604
  *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
605
  *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
606
 
607
  /* Strided loads were reached - stop the recursion.  */
608
  if (stop_recursion)
609
    {
610
      if (permutation)
611
        {
612
          VEC_safe_push (slp_tree, heap, *loads, *node);
613
          *inside_cost += TARG_VEC_PERMUTE_COST * group_size;
614
        }
615
 
616
      return true;
617
    }
618
 
619
  /* Create SLP_TREE nodes for the definition node/s.  */
620
  if (first_stmt_dt0 == vect_internal_def)
621
    {
622
      slp_tree left_node = XNEW (struct _slp_tree);
623
      SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
624
      SLP_TREE_VEC_STMTS (left_node) = NULL;
625
      SLP_TREE_LEFT (left_node) = NULL;
626
      SLP_TREE_RIGHT (left_node) = NULL;
627
      SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
628
      SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
629
      if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
630
                                inside_cost, outside_cost, ncopies_for_cost,
631
                                max_nunits, load_permutation, loads,
632
                                vectorization_factor))
633
        return false;
634
 
635
      SLP_TREE_LEFT (*node) = left_node;
636
    }
637
 
638
  if (first_stmt_dt1 == vect_internal_def)
639
    {
640
      slp_tree right_node = XNEW (struct _slp_tree);
641
      SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
642
      SLP_TREE_VEC_STMTS (right_node) = NULL;
643
      SLP_TREE_LEFT (right_node) = NULL;
644
      SLP_TREE_RIGHT (right_node) = NULL;
645
      SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
646
      SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
647
      if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
648
                                inside_cost, outside_cost, ncopies_for_cost,
649
                                max_nunits, load_permutation, loads,
650
                                vectorization_factor))
651
        return false;
652
 
653
      SLP_TREE_RIGHT (*node) = right_node;
654
    }
655
 
656
  return true;
657
}
658
 
659
 
660
static void
661
vect_print_slp_tree (slp_tree node)
662
{
663
  int i;
664
  gimple stmt;
665
 
666
  if (!node)
667
    return;
668
 
669
  fprintf (vect_dump, "node ");
670
  for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
671
    {
672
      fprintf (vect_dump, "\n\tstmt %d ", i);
673
      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
674
    }
675
  fprintf (vect_dump, "\n");
676
 
677
  vect_print_slp_tree (SLP_TREE_LEFT (node));
678
  vect_print_slp_tree (SLP_TREE_RIGHT (node));
679
}
680
 
681
 
682
/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
683
   If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
684
   J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
685
   stmts in NODE are to be marked.  */
686
 
687
static void
688
vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
689
{
690
  int i;
691
  gimple stmt;
692
 
693
  if (!node)
694
    return;
695
 
696
  for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
697
    if (j < 0 || i == j)
698
      STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
699
 
700
  vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
701
  vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
702
}
703
 
704
 
705
/* Mark the statements of the tree rooted at NODE as relevant (vect_used).  */
706
 
707
static void
708
vect_mark_slp_stmts_relevant (slp_tree node)
709
{
710
  int i;
711
  gimple stmt;
712
  stmt_vec_info stmt_info;
713
 
714
  if (!node)
715
    return;
716
 
717
  for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
718
    {
719
      stmt_info = vinfo_for_stmt (stmt);
720
      gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
721
                  || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
722
      STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
723
    }
724
 
725
  vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
726
  vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
727
}
728
 
729
 
730
/* Check if the permutation required by the SLP INSTANCE is supported.
731
   Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed.  */
732
 
733
static bool
734
vect_supported_slp_permutation_p (slp_instance instance)
735
{
736
  slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
737
  gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
738
  gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
739
  VEC (slp_tree, heap) *sorted_loads = NULL;
740
  int index;
741
  slp_tree *tmp_loads = NULL;
742
  int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
743
  slp_tree load;
744
 
745
  /* FORNOW: The only supported loads permutation is loads from the same
746
     location in all the loads in the node, when the data-refs in
747
     nodes of LOADS constitute an interleaving chain.
748
     Sort the nodes according to the order of accesses in the chain.  */
749
  tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
750
  for (i = 0, j = 0;
751
       VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
752
       && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
753
       i += group_size, j++)
754
    {
755
      gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
756
      /* Check that the loads are all in the same interleaving chain.  */
757
      if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load)
758
        {
759
          if (vect_print_dump_info (REPORT_DETAILS))
760
            {
761
              fprintf (vect_dump, "Build SLP failed: unsupported data "
762
                                   "permutation ");
763
              print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
764
            }
765
 
766
          free (tmp_loads);
767
          return false;
768
        }
769
 
770
      tmp_loads[index] = load;
771
    }
772
 
773
  sorted_loads = VEC_alloc (slp_tree, heap, group_size);
774
  for (i = 0; i < group_size; i++)
775
     VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
776
 
777
  VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
778
  SLP_INSTANCE_LOADS (instance) = sorted_loads;
779
  free (tmp_loads);
780
 
781
  if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
782
                                     SLP_INSTANCE_UNROLLING_FACTOR (instance),
783
                                     instance, true))
784
    return false;
785
 
786
  return true;
787
}
788
 
789
 
790
/* Check if the required load permutation is supported.
791
   LOAD_PERMUTATION contains a list of indices of the loads.
792
   In SLP this permutation is relative to the order of strided stores that are
793
   the base of the SLP instance.  */
794
 
795
static bool
796
vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
797
                                   VEC (int, heap) *load_permutation)
798
{
799
  int i = 0, j, prev = -1, next, k;
800
  bool supported;
801
  sbitmap load_index;
802
 
803
  /* FORNOW: permutations are only supported in SLP.  */
804
  if (!slp_instn)
805
    return false;
806
 
807
  if (vect_print_dump_info (REPORT_SLP))
808
    {
809
      fprintf (vect_dump, "Load permutation ");
810
      for (i = 0; VEC_iterate (int, load_permutation, i, next); i++)
811
        fprintf (vect_dump, "%d ", next);
812
    }
813
 
814
  /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
815
     GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
816
     well.  */
817
  if (VEC_length (int, load_permutation)
818
      != (unsigned int) (group_size * group_size))
819
    return false;
820
 
821
  supported = true;
822
  load_index = sbitmap_alloc (group_size);
823
  sbitmap_zero (load_index);
824
  for (j = 0; j < group_size; j++)
825
    {
826
      for (i = j * group_size, k = 0;
827
           VEC_iterate (int, load_permutation, i, next) && k < group_size;
828
           i++, k++)
829
       {
830
         if (i != j * group_size && next != prev)
831
          {
832
            supported = false;
833
            break;
834
          }
835
 
836
         prev = next;
837
       }
838
 
839
      if (TEST_BIT (load_index, prev))
840
        {
841
          supported = false;
842
          break;
843
        }
844
 
845
      SET_BIT (load_index, prev);
846
    }
847
 
848
  for (j = 0; j < group_size; j++)
849
    if (!TEST_BIT (load_index, j))
850
      return false;
851
 
852
  sbitmap_free (load_index);
853
 
854
  if (supported && i == group_size * group_size
855
      && vect_supported_slp_permutation_p (slp_instn))
856
    return true;
857
 
858
  return false;
859
}
860
 
861
 
862
/* Find the first load in the loop that belongs to INSTANCE.
863
   When loads are in several SLP nodes, there can be a case in which the first
864
   load does not appear in the first SLP node to be transformed, causing
865
   incorrect order of statements. Since we generate all the loads together,
866
   they must be inserted before the first load of the SLP instance and not
867
   before the first load of the first node of the instance.  */
868
static gimple
869
vect_find_first_load_in_slp_instance (slp_instance instance)
870
{
871
  int i, j;
872
  slp_tree load_node;
873
  gimple first_load = NULL, load;
874
 
875
  for (i = 0;
876
       VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node);
877
       i++)
878
    for (j = 0;
879
         VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load);
880
         j++)
881
      first_load = get_earlier_stmt (load, first_load);
882
 
883
  return first_load;
884
}
885
 
886
 
887
/* Analyze an SLP instance starting from a group of strided stores. Call
888
   vect_build_slp_tree to build a tree of packed stmts if possible.
889
   Return FALSE if it's impossible to SLP any stmt in the loop.  */
890
 
891
static bool
892
vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
893
                           gimple stmt)
894
{
895
  slp_instance new_instance;
896
  slp_tree node = XNEW (struct _slp_tree);
897
  unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
898
  unsigned int unrolling_factor = 1, nunits;
899
  tree vectype, scalar_type;
900
  gimple next;
901
  unsigned int vectorization_factor = 0;
902
  int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
903
  unsigned int max_nunits = 0;
904
  VEC (int, heap) *load_permutation;
905
  VEC (slp_tree, heap) *loads;
906
 
907
  scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (
908
                                             vinfo_for_stmt (stmt))));
909
  vectype = get_vectype_for_scalar_type (scalar_type);
910
  if (!vectype)
911
    {
912
      if (vect_print_dump_info (REPORT_SLP))
913
        {
914
          fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
915
          print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
916
        }
917
      return false;
918
    }
919
 
920
  nunits = TYPE_VECTOR_SUBPARTS (vectype);
921
  if (loop_vinfo)
922
    vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
923
  else
924
    /* No multitypes in BB SLP.  */
925
    vectorization_factor = nunits;
926
 
927
  /* Calculate the unrolling factor.  */
928
  unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
929
  if (unrolling_factor != 1 && !loop_vinfo)
930
    {
931
      if (vect_print_dump_info (REPORT_SLP))
932
        fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
933
                            " block SLP");
934
 
935
      return false;
936
    }
937
 
938
  /* Create a node (a root of the SLP tree) for the packed strided stores.  */
939
  SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
940
  next = stmt;
941
  /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS.  */
942
  while (next)
943
    {
944
      VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
945
      next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
946
    }
947
 
948
  SLP_TREE_VEC_STMTS (node) = NULL;
949
  SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
950
  SLP_TREE_LEFT (node) = NULL;
951
  SLP_TREE_RIGHT (node) = NULL;
952
  SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
953
  SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
954
 
955
  /* Calculate the number of vector stmts to create based on the unrolling
956
     factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
957
     GROUP_SIZE / NUNITS otherwise.  */
958
  ncopies_for_cost = unrolling_factor * group_size / nunits;
959
 
960
  load_permutation = VEC_alloc (int, heap, group_size * group_size);
961
  loads = VEC_alloc (slp_tree, heap, group_size);
962
 
963
  /* Build the tree for the SLP instance.  */
964
  if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
965
                           &inside_cost, &outside_cost, ncopies_for_cost,
966
                           &max_nunits, &load_permutation, &loads,
967
                           vectorization_factor))
968
    {
969
      /* Create a new SLP instance.  */
970
      new_instance = XNEW (struct _slp_instance);
971
      SLP_INSTANCE_TREE (new_instance) = node;
972
      SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
973
      /* Calculate the unrolling factor based on the smallest type in the
974
         loop.  */
975
      if (max_nunits > nunits)
976
        unrolling_factor = least_common_multiple (max_nunits, group_size)
977
                           / group_size;
978
 
979
      SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
980
      SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
981
      SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
982
      SLP_INSTANCE_LOADS (new_instance) = loads;
983
      SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
984
      SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
985
      if (VEC_length (slp_tree, loads))
986
        {
987
          if (!vect_supported_load_permutation_p (new_instance, group_size,
988
                                                  load_permutation))
989
            {
990
              if (vect_print_dump_info (REPORT_SLP))
991
                {
992
                  fprintf (vect_dump, "Build SLP failed: unsupported load "
993
                                      "permutation ");
994
                  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
995
                }
996
 
997
              vect_free_slp_instance (new_instance);
998
              return false;
999
            }
1000
 
1001
          SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1002
             = vect_find_first_load_in_slp_instance (new_instance);
1003
        }
1004
      else
1005
        VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1006
 
1007
      if (loop_vinfo)
1008
        VEC_safe_push (slp_instance, heap,
1009
                       LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1010
                       new_instance);
1011
      else
1012
        VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1013
                       new_instance);
1014
 
1015
      if (vect_print_dump_info (REPORT_SLP))
1016
        vect_print_slp_tree (node);
1017
 
1018
      return true;
1019
    }
1020
 
1021
  /* Failed to SLP.  */
1022
  /* Free the allocated memory.  */
1023
  vect_free_slp_tree (node);
1024
  VEC_free (int, heap, load_permutation);
1025
  VEC_free (slp_tree, heap, loads);
1026
 
1027
  return false;
1028
}
1029
 
1030
 
1031
/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1032
   trees of packed scalar stmts if SLP is possible.  */
1033
 
1034
bool
1035
vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1036
{
1037
  unsigned int i;
1038
  VEC (gimple, heap) *strided_stores;
1039
  gimple store;
1040
  bool ok = false;
1041
 
1042
  if (vect_print_dump_info (REPORT_SLP))
1043
    fprintf (vect_dump, "=== vect_analyze_slp ===");
1044
 
1045
  if (loop_vinfo)
1046
    strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1047
  else
1048
    strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1049
 
1050
  for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
1051
    if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
1052
      ok = true;
1053
 
1054
  if (bb_vinfo && !ok)
1055
    {
1056
      if (vect_print_dump_info (REPORT_SLP))
1057
        fprintf (vect_dump, "Failed to SLP the basic block.");
1058
 
1059
      return false;
1060
    }
1061
 
1062
  return true;
1063
}
1064
 
1065
 
1066
/* For each possible SLP instance decide whether to SLP it and calculate overall
1067
   unrolling factor needed to SLP the loop.  */
1068
 
1069
void
1070
vect_make_slp_decision (loop_vec_info loop_vinfo)
1071
{
1072
  unsigned int i, unrolling_factor = 1;
1073
  VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1074
  slp_instance instance;
1075
  int decided_to_slp = 0;
1076
 
1077
  if (vect_print_dump_info (REPORT_SLP))
1078
    fprintf (vect_dump, "=== vect_make_slp_decision ===");
1079
 
1080
  for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1081
    {
1082
      /* FORNOW: SLP if you can.  */
1083
      if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1084
        unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1085
 
1086
      /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1087
         call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1088
         loop-based vectorization. Such stmts will be marked as HYBRID.  */
1089
      vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1090
      decided_to_slp++;
1091
    }
1092
 
1093
  LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1094
 
1095
  if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1096
    fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1097
             decided_to_slp, unrolling_factor);
1098
}
1099
 
1100
 
1101
/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1102
   can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID.  */
1103
 
1104
static void
1105
vect_detect_hybrid_slp_stmts (slp_tree node)
1106
{
1107
  int i;
1108
  gimple stmt;
1109
  imm_use_iterator imm_iter;
1110
  gimple use_stmt;
1111
  stmt_vec_info stmt_vinfo;
1112
 
1113
  if (!node)
1114
    return;
1115
 
1116
  for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1117
    if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1118
        && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1119
      FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1120
        if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1121
            && !STMT_SLP_TYPE (stmt_vinfo)
1122
            && (STMT_VINFO_RELEVANT (stmt_vinfo)
1123
                || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo))))
1124
          vect_mark_slp_stmts (node, hybrid, i);
1125
 
1126
  vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1127
  vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1128
}
1129
 
1130
 
1131
/* Find stmts that must be both vectorized and SLPed.  */
1132
 
1133
void
1134
vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1135
{
1136
  unsigned int i;
1137
  VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1138
  slp_instance instance;
1139
 
1140
  if (vect_print_dump_info (REPORT_SLP))
1141
    fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1142
 
1143
  for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1144
    vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1145
}
1146
 
1147
 
1148
/* Create and initialize a new bb_vec_info struct for BB, as well as
1149
   stmt_vec_info structs for all the stmts in it.  */
1150
 
1151
static bb_vec_info
1152
new_bb_vec_info (basic_block bb)
1153
{
1154
  bb_vec_info res = NULL;
1155
  gimple_stmt_iterator gsi;
1156
 
1157
  res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1158
  BB_VINFO_BB (res) = bb;
1159
 
1160
  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1161
    {
1162
      gimple stmt = gsi_stmt (gsi);
1163
      gimple_set_uid (stmt, 0);
1164
      set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1165
    }
1166
 
1167
  BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1168
  BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1169
 
1170
  bb->aux = res;
1171
  return res;
1172
}
1173
 
1174
 
1175
/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1176
   stmts in the basic block.  */
1177
 
1178
static void
1179
destroy_bb_vec_info (bb_vec_info bb_vinfo)
1180
{
1181
  basic_block bb;
1182
  gimple_stmt_iterator si;
1183
 
1184
  if (!bb_vinfo)
1185
    return;
1186
 
1187
  bb = BB_VINFO_BB (bb_vinfo);
1188
 
1189
  for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1190
    {
1191
      gimple stmt = gsi_stmt (si);
1192
      stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1193
 
1194
      if (stmt_info)
1195
        /* Free stmt_vec_info.  */
1196
        free_stmt_vec_info (stmt);
1197
    }
1198
 
1199
  VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1200
  VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1201
  free (bb_vinfo);
1202
  bb->aux = NULL;
1203
}
1204
 
1205
 
1206
/* Analyze statements contained in SLP tree node after recursively analyzing
1207
   the subtree. Return TRUE if the operations are supported.  */
1208
 
1209
static bool
1210
vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1211
{
1212
  bool dummy;
1213
  int i;
1214
  gimple stmt;
1215
 
1216
  if (!node)
1217
    return true;
1218
 
1219
  if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
1220
      || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
1221
    return false;
1222
 
1223
  for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1224
    {
1225
      stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1226
      gcc_assert (stmt_info);
1227
      gcc_assert (PURE_SLP_STMT (stmt_info));
1228
 
1229
      if (!vect_analyze_stmt (stmt, &dummy, node))
1230
        return false;
1231
    }
1232
 
1233
  return true;
1234
}
1235
 
1236
 
1237
/* Analyze statements in SLP instances of the basic block. Return TRUE if the
1238
   operations are supported. */
1239
 
1240
static bool
1241
vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1242
{
1243
  VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1244
  slp_instance instance;
1245
  int i;
1246
 
1247
  for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1248
    {
1249
      if (!vect_slp_analyze_node_operations (bb_vinfo,
1250
                                             SLP_INSTANCE_TREE (instance)))
1251
        {
1252
          vect_free_slp_instance (instance);
1253
          VEC_ordered_remove (slp_instance, slp_instances, i);
1254
        }
1255
      else
1256
        i++;
1257
    }
1258
 
1259
  if (!VEC_length (slp_instance, slp_instances))
1260
    return false;
1261
 
1262
  return true;
1263
}
1264
 
1265
 
1266
/* Cheick if the basic block can be vectorized.  */
1267
 
1268
bb_vec_info
1269
vect_slp_analyze_bb (basic_block bb)
1270
{
1271
  bb_vec_info bb_vinfo;
1272
  VEC (ddr_p, heap) *ddrs;
1273
  VEC (slp_instance, heap) *slp_instances;
1274
  slp_instance instance;
1275
  int i, insns = 0;
1276
  gimple_stmt_iterator gsi;
1277
 
1278
  if (vect_print_dump_info (REPORT_DETAILS))
1279
    fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1280
 
1281
  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1282
    {
1283
      gimple stmt = gsi_stmt (gsi);
1284
      if (!is_gimple_debug (stmt)
1285
          && !gimple_nop_p (stmt)
1286
          && gimple_code (stmt) != GIMPLE_LABEL)
1287
        insns++;
1288
    }
1289
 
1290
  if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1291
    {
1292
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1293
        fprintf (vect_dump, "not vectorized: too many instructions in basic "
1294
                            "block.\n");
1295
 
1296
      return NULL;
1297
    }
1298
 
1299
  bb_vinfo = new_bb_vec_info (bb);
1300
  if (!bb_vinfo)
1301
    return NULL;
1302
 
1303
  if (!vect_analyze_data_refs (NULL, bb_vinfo))
1304
    {
1305
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1306
        fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1307
                            "block.\n");
1308
 
1309
      destroy_bb_vec_info (bb_vinfo);
1310
      return NULL;
1311
    }
1312
 
1313
  ddrs = BB_VINFO_DDRS (bb_vinfo);
1314
  if (!VEC_length (ddr_p, ddrs))
1315
    {
1316
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1317
        fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1318
                            "block.\n");
1319
 
1320
      destroy_bb_vec_info (bb_vinfo);
1321
      return NULL;
1322
    }
1323
 
1324
  if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1325
    {
1326
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1327
        fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1328
                            "block.\n");
1329
 
1330
      destroy_bb_vec_info (bb_vinfo);
1331
      return NULL;
1332
    }
1333
 
1334
   if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo))
1335
    {
1336
     if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1337
       fprintf (vect_dump, "not vectorized: unhandled data dependence in basic"
1338
                           " block.\n");
1339
 
1340
      destroy_bb_vec_info (bb_vinfo);
1341
      return NULL;
1342
    }
1343
 
1344
  if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1345
    {
1346
     if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1347
       fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1348
                           "block.\n");
1349
 
1350
      destroy_bb_vec_info (bb_vinfo);
1351
      return NULL;
1352
    }
1353
 
1354
   if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1355
    {
1356
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1357
        fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1358
                            "block.\n");
1359
 
1360
      destroy_bb_vec_info (bb_vinfo);
1361
      return NULL;
1362
    }
1363
 
1364
  /* Check the SLP opportunities in the basic block, analyze and build SLP
1365
     trees.  */
1366
  if (!vect_analyze_slp (NULL, bb_vinfo))
1367
    {
1368
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1369
        fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1370
                            "in basic block.\n");
1371
 
1372
      destroy_bb_vec_info (bb_vinfo);
1373
      return NULL;
1374
    }
1375
 
1376
  slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1377
 
1378
  /* Mark all the statements that we want to vectorize as pure SLP and
1379
     relevant.  */
1380
  for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1381
    {
1382
      vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1383
      vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
1384
    }
1385
 
1386
  if (!vect_slp_analyze_operations (bb_vinfo))
1387
    {
1388
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1389
        fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1390
 
1391
      destroy_bb_vec_info (bb_vinfo);
1392
      return NULL;
1393
    }
1394
 
1395
  if (vect_print_dump_info (REPORT_DETAILS))
1396
    fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
1397
 
1398
  return bb_vinfo;
1399
}
1400
 
1401
 
1402
/* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
1403
   the number of created vector stmts depends on the unrolling factor). However,
1404
   the actual number of vector stmts for every SLP node depends on VF which is
1405
   set later in vect_analyze_operations(). Hence, SLP costs should be updated.
1406
   In this function we assume that the inside costs calculated in
1407
   vect_model_xxx_cost are linear in ncopies.  */
1408
 
1409
void
1410
vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
1411
{
1412
  unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1413
  VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1414
  slp_instance instance;
1415
 
1416
  if (vect_print_dump_info (REPORT_SLP))
1417
    fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
1418
 
1419
  for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1420
    /* We assume that costs are linear in ncopies.  */
1421
    SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
1422
      / SLP_INSTANCE_UNROLLING_FACTOR (instance);
1423
}
1424
 
1425
 
1426
/* For constant and loop invariant defs of SLP_NODE this function returns
1427
   (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1428
   OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1429
   stmts. NUMBER_OF_VECTORS is the number of vector defs to create.  */
1430
 
1431
static void
1432
vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
1433
                           unsigned int op_num, unsigned int number_of_vectors)
1434
{
1435
  VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1436
  gimple stmt = VEC_index (gimple, stmts, 0);
1437
  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1438
  int nunits;
1439
  tree vec_cst;
1440
  tree t = NULL_TREE;
1441
  int j, number_of_places_left_in_vector;
1442
  tree vector_type;
1443
  tree op, vop;
1444
  int group_size = VEC_length (gimple, stmts);
1445
  unsigned int vec_num, i;
1446
  int number_of_copies = 1;
1447
  VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1448
  bool constant_p, is_store;
1449
 
1450
  if (STMT_VINFO_DATA_REF (stmt_vinfo))
1451
    {
1452
      is_store = true;
1453
      op = gimple_assign_rhs1 (stmt);
1454
    }
1455
  else
1456
    {
1457
      is_store = false;
1458
      op = gimple_op (stmt, op_num + 1);
1459
    }
1460
 
1461
  if (CONSTANT_CLASS_P (op))
1462
    constant_p = true;
1463
  else
1464
    constant_p = false;
1465
 
1466
  vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1467
  gcc_assert (vector_type);
1468
 
1469
  nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1470
 
1471
  /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1472
     created vectors. It is greater than 1 if unrolling is performed.
1473
 
1474
     For example, we have two scalar operands, s1 and s2 (e.g., group of
1475
     strided accesses of size two), while NUNITS is four (i.e., four scalars
1476
     of this type can be packed in a vector). The output vector will contain
1477
     two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1478
     will be 2).
1479
 
1480
     If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1481
     containing the operands.
1482
 
1483
     For example, NUNITS is four as before, and the group size is 8
1484
     (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1485
     {s5, s6, s7, s8}.  */
1486
 
1487
  number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1488
 
1489
  number_of_places_left_in_vector = nunits;
1490
  for (j = 0; j < number_of_copies; j++)
1491
    {
1492
      for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
1493
        {
1494
          if (is_store)
1495
            op = gimple_assign_rhs1 (stmt);
1496
          else
1497
            op = gimple_op (stmt, op_num + 1);
1498
 
1499
          /* Create 'vect_ = {op0,op1,...,opn}'.  */
1500
          t = tree_cons (NULL_TREE, op, t);
1501
 
1502
          number_of_places_left_in_vector--;
1503
 
1504
          if (number_of_places_left_in_vector == 0)
1505
            {
1506
              number_of_places_left_in_vector = nunits;
1507
 
1508
              if (constant_p)
1509
                vec_cst = build_vector (vector_type, t);
1510
              else
1511
                vec_cst = build_constructor_from_list (vector_type, t);
1512
              VEC_quick_push (tree, voprnds,
1513
                              vect_init_vector (stmt, vec_cst, vector_type, NULL));
1514
              t = NULL_TREE;
1515
            }
1516
        }
1517
    }
1518
 
1519
  /* Since the vectors are created in the reverse order, we should invert
1520
     them.  */
1521
  vec_num = VEC_length (tree, voprnds);
1522
  for (j = vec_num - 1; j >= 0; j--)
1523
    {
1524
      vop = VEC_index (tree, voprnds, j);
1525
      VEC_quick_push (tree, *vec_oprnds, vop);
1526
    }
1527
 
1528
  VEC_free (tree, heap, voprnds);
1529
 
1530
  /* In case that VF is greater than the unrolling factor needed for the SLP
1531
     group of stmts, NUMBER_OF_VECTORS to be created is greater than
1532
     NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1533
     to replicate the vectors.  */
1534
  while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1535
    {
1536
      for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
1537
        VEC_quick_push (tree, *vec_oprnds, vop);
1538
    }
1539
}
1540
 
1541
 
1542
/* Get vectorized definitions from SLP_NODE that contains corresponding
1543
   vectorized def-stmts.  */
1544
 
1545
static void
1546
vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
1547
{
1548
  tree vec_oprnd;
1549
  gimple vec_def_stmt;
1550
  unsigned int i;
1551
 
1552
  gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
1553
 
1554
  for (i = 0;
1555
       VEC_iterate (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
1556
       i++)
1557
    {
1558
      gcc_assert (vec_def_stmt);
1559
      vec_oprnd = gimple_get_lhs (vec_def_stmt);
1560
      VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1561
    }
1562
}
1563
 
1564
 
1565
/* Get vectorized definitions for SLP_NODE.
1566
   If the scalar definitions are loop invariants or constants, collect them and
1567
   call vect_get_constant_vectors() to create vector stmts.
1568
   Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1569
   must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
1570
   vect_get_slp_vect_defs() to retrieve them.
1571
   If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
1572
   the right node. This is used when the second operand must remain scalar.  */
1573
 
1574
void
1575
vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
1576
                   VEC (tree,heap) **vec_oprnds1)
1577
{
1578
  gimple first_stmt;
1579
  enum tree_code code;
1580
  int number_of_vects;
1581
  HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
1582
 
1583
  first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
1584
  /* The number of vector defs is determined by the number of vector statements
1585
     in the node from which we get those statements.  */
1586
  if (SLP_TREE_LEFT (slp_node))
1587
    number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
1588
  else
1589
    {
1590
      number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1591
      /* Number of vector stmts was calculated according to LHS in
1592
         vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
1593
         necessary. See vect_get_smallest_scalar_type() for details.  */
1594
      vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
1595
                                     &rhs_size_unit);
1596
      if (rhs_size_unit != lhs_size_unit)
1597
        {
1598
          number_of_vects *= rhs_size_unit;
1599
          number_of_vects /= lhs_size_unit;
1600
        }
1601
    }
1602
 
1603
  /* Allocate memory for vectorized defs.  */
1604
  *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
1605
 
1606
  /* SLP_NODE corresponds either to a group of stores or to a group of
1607
     unary/binary operations. We don't call this function for loads.  */
1608
  if (SLP_TREE_LEFT (slp_node))
1609
    /* The defs are already vectorized.  */
1610
    vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
1611
  else
1612
    /* Build vectors from scalar defs.  */
1613
    vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects);
1614
 
1615
  if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
1616
    /* Since we don't call this function with loads, this is a group of
1617
       stores.  */
1618
    return;
1619
 
1620
  code = gimple_assign_rhs_code (first_stmt);
1621
  if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
1622
    return;
1623
 
1624
  /* The number of vector defs is determined by the number of vector statements
1625
     in the node from which we get those statements.  */
1626
  if (SLP_TREE_RIGHT (slp_node))
1627
    number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
1628
  else
1629
    number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1630
 
1631
  *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
1632
 
1633
  if (SLP_TREE_RIGHT (slp_node))
1634
    /* The defs are already vectorized.  */
1635
    vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
1636
  else
1637
    /* Build vectors from scalar defs.  */
1638
    vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects);
1639
}
1640
 
1641
 
1642
/* Create NCOPIES permutation statements using the mask MASK_BYTES (by
1643
   building a vector of type MASK_TYPE from it) and two input vectors placed in
1644
   DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
1645
   shifting by STRIDE elements of DR_CHAIN for every copy.
1646
   (STRIDE is the number of vectorized stmts for NODE divided by the number of
1647
   copies).
1648
   VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
1649
   the created stmts must be inserted.  */
1650
 
1651
static inline void
1652
vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
1653
                           tree mask, int first_vec_indx, int second_vec_indx,
1654
                           gimple_stmt_iterator *gsi, slp_tree node,
1655
                           tree builtin_decl, tree vectype,
1656
                           VEC(tree,heap) *dr_chain,
1657
                           int ncopies, int vect_stmts_counter)
1658
{
1659
  tree perm_dest;
1660
  gimple perm_stmt = NULL;
1661
  stmt_vec_info next_stmt_info;
1662
  int i, stride;
1663
  tree first_vec, second_vec, data_ref;
1664
  VEC (tree, heap) *params = NULL;
1665
 
1666
  stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
1667
 
1668
  /* Initialize the vect stmts of NODE to properly insert the generated
1669
     stmts later.  */
1670
  for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
1671
       i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
1672
    VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
1673
 
1674
  perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
1675
  for (i = 0; i < ncopies; i++)
1676
    {
1677
      first_vec = VEC_index (tree, dr_chain, first_vec_indx);
1678
      second_vec = VEC_index (tree, dr_chain, second_vec_indx);
1679
 
1680
      /* Build argument list for the vectorized call.  */
1681
      VEC_free (tree, heap, params);
1682
      params = VEC_alloc (tree, heap, 3);
1683
      VEC_quick_push (tree, params, first_vec);
1684
      VEC_quick_push (tree, params, second_vec);
1685
      VEC_quick_push (tree, params, mask);
1686
 
1687
      /* Generate the permute statement.  */
1688
      perm_stmt = gimple_build_call_vec (builtin_decl, params);
1689
      data_ref = make_ssa_name (perm_dest, perm_stmt);
1690
      gimple_call_set_lhs (perm_stmt, data_ref);
1691
      vect_finish_stmt_generation (stmt, perm_stmt, gsi);
1692
 
1693
      /* Store the vector statement in NODE.  */
1694
      VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
1695
                   stride * i + vect_stmts_counter, perm_stmt);
1696
 
1697
      first_vec_indx += stride;
1698
      second_vec_indx += stride;
1699
    }
1700
 
1701
  /* Mark the scalar stmt as vectorized.  */
1702
  next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
1703
  STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
1704
}
1705
 
1706
 
1707
/* Given FIRST_MASK_ELEMENT - the mask element in element representation,
1708
   return in CURRENT_MASK_ELEMENT its equivalent in target specific
1709
   representation. Check that the mask is valid and return FALSE if not.
1710
   Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
1711
   the next vector, i.e., the current first vector is not needed.  */
1712
 
1713
static bool
1714
vect_get_mask_element (gimple stmt, int first_mask_element, int m,
1715
                       int mask_nunits, bool only_one_vec, int index,
1716
                       int *mask, int *current_mask_element,
1717
                       bool *need_next_vector)
1718
{
1719
  int i;
1720
  static int number_of_mask_fixes = 1;
1721
  static bool mask_fixed = false;
1722
  static bool needs_first_vector = false;
1723
 
1724
  /* Convert to target specific representation.  */
1725
  *current_mask_element = first_mask_element + m;
1726
  /* Adjust the value in case it's a mask for second and third vectors.  */
1727
  *current_mask_element -= mask_nunits * (number_of_mask_fixes - 1);
1728
 
1729
  if (*current_mask_element < mask_nunits)
1730
    needs_first_vector = true;
1731
 
1732
  /* We have only one input vector to permute but the mask accesses values in
1733
     the next vector as well.  */
1734
  if (only_one_vec && *current_mask_element >= mask_nunits)
1735
    {
1736
      if (vect_print_dump_info (REPORT_DETAILS))
1737
        {
1738
          fprintf (vect_dump, "permutation requires at least two vectors ");
1739
          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1740
        }
1741
 
1742
      return false;
1743
    }
1744
 
1745
  /* The mask requires the next vector.  */
1746
  if (*current_mask_element >= mask_nunits * 2)
1747
    {
1748
      if (needs_first_vector || mask_fixed)
1749
        {
1750
          /* We either need the first vector too or have already moved to the
1751
             next vector. In both cases, this permutation needs three
1752
             vectors.  */
1753
          if (vect_print_dump_info (REPORT_DETAILS))
1754
            {
1755
              fprintf (vect_dump, "permutation requires at "
1756
                                  "least three vectors ");
1757
              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1758
            }
1759
 
1760
          return false;
1761
        }
1762
 
1763
      /* We move to the next vector, dropping the first one and working with
1764
         the second and the third - we need to adjust the values of the mask
1765
         accordingly.  */
1766
      *current_mask_element -= mask_nunits * number_of_mask_fixes;
1767
 
1768
      for (i = 0; i < index; i++)
1769
        mask[i] -= mask_nunits * number_of_mask_fixes;
1770
 
1771
      (number_of_mask_fixes)++;
1772
      mask_fixed = true;
1773
    }
1774
 
1775
  *need_next_vector = mask_fixed;
1776
 
1777
  /* This was the last element of this mask. Start a new one.  */
1778
  if (index == mask_nunits - 1)
1779
    {
1780
      number_of_mask_fixes = 1;
1781
      mask_fixed = false;
1782
      needs_first_vector = false;
1783
    }
1784
 
1785
  return true;
1786
}
1787
 
1788
 
1789
/* Generate vector permute statements from a list of loads in DR_CHAIN.
1790
   If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
1791
   permute statements for SLP_NODE_INSTANCE.  */
1792
bool
1793
vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
1794
                              gimple_stmt_iterator *gsi, int vf,
1795
                              slp_instance slp_node_instance, bool analyze_only)
1796
{
1797
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1798
  tree mask_element_type = NULL_TREE, mask_type;
1799
  int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
1800
  slp_tree node;
1801
  tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
1802
  gimple next_scalar_stmt;
1803
  int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
1804
  int first_mask_element;
1805
  int index, unroll_factor, *mask, current_mask_element, ncopies;
1806
  bool only_one_vec = false, need_next_vector = false;
1807
  int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
1808
 
1809
  if (!targetm.vectorize.builtin_vec_perm)
1810
    {
1811
      if (vect_print_dump_info (REPORT_DETAILS))
1812
        {
1813
          fprintf (vect_dump, "no builtin for vect permute for ");
1814
          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1815
        }
1816
 
1817
       return false;
1818
    }
1819
 
1820
  builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
1821
                                                     &mask_element_type);
1822
  if (!builtin_decl || !mask_element_type)
1823
    {
1824
      if (vect_print_dump_info (REPORT_DETAILS))
1825
        {
1826
          fprintf (vect_dump, "no builtin for vect permute for ");
1827
          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1828
        }
1829
 
1830
       return false;
1831
    }
1832
 
1833
  mask_type = get_vectype_for_scalar_type (mask_element_type);
1834
  mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
1835
  mask = (int *) xmalloc (sizeof (int) * mask_nunits);
1836
  nunits = TYPE_VECTOR_SUBPARTS (vectype);
1837
  scale = mask_nunits / nunits;
1838
  unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
1839
 
1840
  /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
1841
     unrolling factor.  */
1842
  orig_vec_stmts_num = group_size *
1843
                SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
1844
  if (orig_vec_stmts_num == 1)
1845
    only_one_vec = true;
1846
 
1847
  /* Number of copies is determined by the final vectorization factor
1848
     relatively to SLP_NODE_INSTANCE unrolling factor.  */
1849
  ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
1850
 
1851
  /* Generate permutation masks for every NODE. Number of masks for each NODE
1852
     is equal to GROUP_SIZE.
1853
     E.g., we have a group of three nodes with three loads from the same
1854
     location in each node, and the vector size is 4. I.e., we have a
1855
     a0b0c0a1b1c1... sequence and we need to create the following vectors:
1856
     for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
1857
     for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
1858
     ...
1859
 
1860
     The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
1861
     scpecific type, e.g., in bytes for Altivec.
1862
     The last mask is illegal since we assume two operands for permute
1863
     operation, and the mask element values can't be outside that range. Hence,
1864
     the last mask must be converted into {2,5,5,5}.
1865
     For the first two permutations we need the first and the second input
1866
     vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
1867
     we need the second and the third vectors: {b1,c1,a2,b2} and
1868
     {c2,a3,b3,c3}.  */
1869
 
1870
  for (i = 0;
1871
       VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance),
1872
                    i, node);
1873
       i++)
1874
    {
1875
      scalar_index = 0;
1876
      index = 0;
1877
      vect_stmts_counter = 0;
1878
      vec_index = 0;
1879
      first_vec_index = vec_index++;
1880
      if (only_one_vec)
1881
        second_vec_index = first_vec_index;
1882
      else
1883
        second_vec_index =  vec_index++;
1884
 
1885
      for (j = 0; j < unroll_factor; j++)
1886
        {
1887
          for (k = 0; k < group_size; k++)
1888
            {
1889
              first_mask_element = (i + j * group_size) * scale;
1890
              for (m = 0; m < scale; m++)
1891
                {
1892
                  if (!vect_get_mask_element (stmt, first_mask_element, m,
1893
                                   mask_nunits, only_one_vec, index, mask,
1894
                                   &current_mask_element, &need_next_vector))
1895
                    return false;
1896
 
1897
                  mask[index++] = current_mask_element;
1898
                }
1899
 
1900
              if (index == mask_nunits)
1901
                {
1902
                  tree mask_vec = NULL;
1903
 
1904
                  while (--index >= 0)
1905
                    {
1906
                      tree t = build_int_cst (mask_element_type, mask[index]);
1907
                      mask_vec = tree_cons (NULL, t, mask_vec);
1908
                    }
1909
                  mask_vec = build_vector (mask_type, mask_vec);
1910
                  index = 0;
1911
 
1912
                  if (!targetm.vectorize.builtin_vec_perm_ok (vectype,
1913
                                                              mask_vec))
1914
                    {
1915
                      if (vect_print_dump_info (REPORT_DETAILS))
1916
                        {
1917
                          fprintf (vect_dump, "unsupported vect permute ");
1918
                          print_generic_expr (vect_dump, mask_vec, 0);
1919
                        }
1920
                      free (mask);
1921
                      return false;
1922
                    }
1923
 
1924
                  if (!analyze_only)
1925
                    {
1926
                      if (need_next_vector)
1927
                        {
1928
                          first_vec_index = second_vec_index;
1929
                          second_vec_index = vec_index;
1930
                        }
1931
 
1932
                      next_scalar_stmt = VEC_index (gimple,
1933
                                SLP_TREE_SCALAR_STMTS (node), scalar_index++);
1934
 
1935
                      vect_create_mask_and_perm (stmt, next_scalar_stmt,
1936
                               mask_vec, first_vec_index, second_vec_index,
1937
                               gsi, node, builtin_decl, vectype, dr_chain,
1938
                               ncopies, vect_stmts_counter++);
1939
                    }
1940
                }
1941
            }
1942
        }
1943
    }
1944
 
1945
  free (mask);
1946
  return true;
1947
}
1948
 
1949
 
1950
 
1951
/* Vectorize SLP instance tree in postorder.  */
1952
 
1953
static bool
1954
vect_schedule_slp_instance (slp_tree node, slp_instance instance,
1955
                            unsigned int vectorization_factor)
1956
{
1957
  gimple stmt;
1958
  bool strided_store, is_store;
1959
  gimple_stmt_iterator si;
1960
  stmt_vec_info stmt_info;
1961
  unsigned int vec_stmts_size, nunits, group_size;
1962
  tree vectype;
1963
  int i;
1964
  slp_tree loads_node;
1965
 
1966
  if (!node)
1967
    return false;
1968
 
1969
  vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
1970
                              vectorization_factor);
1971
  vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
1972
                              vectorization_factor);
1973
 
1974
  stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1975
  stmt_info = vinfo_for_stmt (stmt);
1976
 
1977
  /* VECTYPE is the type of the destination.  */
1978
  vectype = get_vectype_for_scalar_type (TREE_TYPE (gimple_assign_lhs (stmt)));
1979
  nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
1980
  group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1981
 
1982
  /* For each SLP instance calculate number of vector stmts to be created
1983
     for the scalar stmts in each node of the SLP tree. Number of vector
1984
     elements in one vector iteration is the number of scalar elements in
1985
     one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
1986
     size.  */
1987
  vec_stmts_size = (vectorization_factor * group_size) / nunits;
1988
 
1989
  /* In case of load permutation we have to allocate vectorized statements for
1990
     all the nodes that participate in that permutation.  */
1991
  if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
1992
    {
1993
      for (i = 0;
1994
           VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node);
1995
           i++)
1996
        {
1997
          if (!SLP_TREE_VEC_STMTS (loads_node))
1998
            {
1999
              SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2000
                                                           vec_stmts_size);
2001
              SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2002
            }
2003
        }
2004
    }
2005
 
2006
  if (!SLP_TREE_VEC_STMTS (node))
2007
    {
2008
      SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2009
      SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2010
    }
2011
 
2012
  if (vect_print_dump_info (REPORT_DETAILS))
2013
    {
2014
      fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2015
      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2016
    }
2017
 
2018
  /* Loads should be inserted before the first load.  */
2019
  if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2020
      && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2021
      && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2022
    si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2023
  else
2024
    si = gsi_for_stmt (stmt);
2025
 
2026
  is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2027
  if (is_store)
2028
    {
2029
      if (DR_GROUP_FIRST_DR (stmt_info))
2030
        /* If IS_STORE is TRUE, the vectorization of the
2031
           interleaving chain was completed - free all the stores in
2032
           the chain.  */
2033
        vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
2034
      else
2035
        /* FORNOW: SLP originates only from strided stores.  */
2036
        gcc_unreachable ();
2037
 
2038
      return true;
2039
    }
2040
 
2041
  /* FORNOW: SLP originates only from strided stores.  */
2042
  return false;
2043
}
2044
 
2045
 
2046
bool
2047
vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2048
{
2049
  VEC (slp_instance, heap) *slp_instances;
2050
  slp_instance instance;
2051
  unsigned int i, vf;
2052
  bool is_store = false;
2053
 
2054
  if (loop_vinfo)
2055
    {
2056
      slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2057
      vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2058
    }
2059
  else
2060
    {
2061
      slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2062
      vf = 1;
2063
    }
2064
 
2065
  for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
2066
    {
2067
      /* Schedule the tree of INSTANCE.  */
2068
      is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
2069
                                             instance, vf);
2070
      if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2071
          || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2072
        fprintf (vect_dump, "vectorizing stmts using SLP.");
2073
    }
2074
 
2075
  return is_store;
2076
}
2077
 
2078
 
2079
/* Vectorize the basic block.  */
2080
 
2081
void
2082
vect_slp_transform_bb (basic_block bb)
2083
{
2084
  bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2085
  gimple_stmt_iterator si;
2086
 
2087
  gcc_assert (bb_vinfo);
2088
 
2089
  if (vect_print_dump_info (REPORT_DETAILS))
2090
    fprintf (vect_dump, "SLPing BB\n");
2091
 
2092
  for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2093
    {
2094
      gimple stmt = gsi_stmt (si);
2095
      stmt_vec_info stmt_info;
2096
 
2097
      if (vect_print_dump_info (REPORT_DETAILS))
2098
        {
2099
          fprintf (vect_dump, "------>SLPing statement: ");
2100
          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2101
        }
2102
 
2103
      stmt_info = vinfo_for_stmt (stmt);
2104
      gcc_assert (stmt_info);
2105
 
2106
      /* Schedule all the SLP instances when the first SLP stmt is reached.  */
2107
      if (STMT_SLP_TYPE (stmt_info))
2108
        {
2109
          vect_schedule_slp (NULL, bb_vinfo);
2110
          break;
2111
        }
2112
    }
2113
 
2114
  mark_sym_for_renaming (gimple_vop (cfun));
2115
  /* The memory tags and pointers in vectorized statements need to
2116
     have their SSA forms updated.  FIXME, why can't this be delayed
2117
     until all the loops have been transformed?  */
2118
  update_ssa (TODO_update_ssa);
2119
 
2120
  if (vect_print_dump_info (REPORT_DETAILS))
2121
    fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
2122
 
2123
  destroy_bb_vec_info (bb_vinfo);
2124
}
2125
 

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.