OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [tree-vect-stmts.c] - Blame information for rev 849

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 684 jeremybenn
/* Statement Analysis and Transformation for Vectorization
2
   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3
   Free Software Foundation, Inc.
4
   Contributed by Dorit Naishlos <dorit@il.ibm.com>
5
   and Ira Rosen <irar@il.ibm.com>
6
 
7
This file is part of GCC.
8
 
9
GCC is free software; you can redistribute it and/or modify it under
10
the terms of the GNU General Public License as published by the Free
11
Software Foundation; either version 3, or (at your option) any later
12
version.
13
 
14
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15
WARRANTY; without even the implied warranty of MERCHANTABILITY or
16
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17
for more details.
18
 
19
You should have received a copy of the GNU General Public License
20
along with GCC; see the file COPYING3.  If not see
21
<http://www.gnu.org/licenses/>.  */
22
 
23
#include "config.h"
24
#include "system.h"
25
#include "coretypes.h"
26
#include "tm.h"
27
#include "ggc.h"
28
#include "tree.h"
29
#include "target.h"
30
#include "basic-block.h"
31
#include "tree-pretty-print.h"
32
#include "gimple-pretty-print.h"
33
#include "tree-flow.h"
34
#include "tree-dump.h"
35
#include "cfgloop.h"
36
#include "cfglayout.h"
37
#include "expr.h"
38
#include "recog.h"
39
#include "optabs.h"
40
#include "diagnostic-core.h"
41
#include "tree-vectorizer.h"
42
#include "langhooks.h"
43
 
44
 
45
/* Return a variable of type ELEM_TYPE[NELEMS].  */
46
 
47
static tree
48
create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
49
{
50
  return create_tmp_var (build_array_type_nelts (elem_type, nelems),
51
                         "vect_array");
52
}
53
 
54
/* ARRAY is an array of vectors created by create_vector_array.
55
   Return an SSA_NAME for the vector in index N.  The reference
56
   is part of the vectorization of STMT and the vector is associated
57
   with scalar destination SCALAR_DEST.  */
58
 
59
static tree
60
read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
61
                   tree array, unsigned HOST_WIDE_INT n)
62
{
63
  tree vect_type, vect, vect_name, array_ref;
64
  gimple new_stmt;
65
 
66
  gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
67
  vect_type = TREE_TYPE (TREE_TYPE (array));
68
  vect = vect_create_destination_var (scalar_dest, vect_type);
69
  array_ref = build4 (ARRAY_REF, vect_type, array,
70
                      build_int_cst (size_type_node, n),
71
                      NULL_TREE, NULL_TREE);
72
 
73
  new_stmt = gimple_build_assign (vect, array_ref);
74
  vect_name = make_ssa_name (vect, new_stmt);
75
  gimple_assign_set_lhs (new_stmt, vect_name);
76
  vect_finish_stmt_generation (stmt, new_stmt, gsi);
77
  mark_symbols_for_renaming (new_stmt);
78
 
79
  return vect_name;
80
}
81
 
82
/* ARRAY is an array of vectors created by create_vector_array.
83
   Emit code to store SSA_NAME VECT in index N of the array.
84
   The store is part of the vectorization of STMT.  */
85
 
86
static void
87
write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
88
                    tree array, unsigned HOST_WIDE_INT n)
89
{
90
  tree array_ref;
91
  gimple new_stmt;
92
 
93
  array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
94
                      build_int_cst (size_type_node, n),
95
                      NULL_TREE, NULL_TREE);
96
 
97
  new_stmt = gimple_build_assign (array_ref, vect);
98
  vect_finish_stmt_generation (stmt, new_stmt, gsi);
99
  mark_symbols_for_renaming (new_stmt);
100
}
101
 
102
/* PTR is a pointer to an array of type TYPE.  Return a representation
103
   of *PTR.  The memory reference replaces those in FIRST_DR
104
   (and its group).  */
105
 
106
static tree
107
create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
108
{
109
  struct ptr_info_def *pi;
110
  tree mem_ref, alias_ptr_type;
111
 
112
  alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
113
  mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
114
  /* Arrays have the same alignment as their type.  */
115
  pi = get_ptr_info (ptr);
116
  pi->align = TYPE_ALIGN_UNIT (type);
117
  pi->misalign = 0;
118
  return mem_ref;
119
}
120
 
121
/* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
122
 
123
/* Function vect_mark_relevant.
124
 
125
   Mark STMT as "relevant for vectorization" and add it to WORKLIST.  */
126
 
127
static void
128
vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
129
                    enum vect_relevant relevant, bool live_p,
130
                    bool used_in_pattern)
131
{
132
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
133
  enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
134
  bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
135
  gimple pattern_stmt;
136
 
137
  if (vect_print_dump_info (REPORT_DETAILS))
138
    fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
139
 
140
  /* If this stmt is an original stmt in a pattern, we might need to mark its
141
     related pattern stmt instead of the original stmt.  However, such stmts
142
     may have their own uses that are not in any pattern, in such cases the
143
     stmt itself should be marked.  */
144
  if (STMT_VINFO_IN_PATTERN_P (stmt_info))
145
    {
146
      bool found = false;
147
      if (!used_in_pattern)
148
        {
149
          imm_use_iterator imm_iter;
150
          use_operand_p use_p;
151
          gimple use_stmt;
152
          tree lhs;
153
          loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
154
          struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
155
 
156
          if (is_gimple_assign (stmt))
157
            lhs = gimple_assign_lhs (stmt);
158
          else
159
            lhs = gimple_call_lhs (stmt);
160
 
161
          /* This use is out of pattern use, if LHS has other uses that are
162
             pattern uses, we should mark the stmt itself, and not the pattern
163
             stmt.  */
164
          if (TREE_CODE (lhs) == SSA_NAME)
165
            FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
166
              {
167
                if (is_gimple_debug (USE_STMT (use_p)))
168
                  continue;
169
                use_stmt = USE_STMT (use_p);
170
 
171
                if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
172
                  continue;
173
 
174
                if (vinfo_for_stmt (use_stmt)
175
                    && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
176
                  {
177
                    found = true;
178
                    break;
179
                  }
180
              }
181
        }
182
 
183
      if (!found)
184
        {
185
          /* This is the last stmt in a sequence that was detected as a
186
             pattern that can potentially be vectorized.  Don't mark the stmt
187
             as relevant/live because it's not going to be vectorized.
188
             Instead mark the pattern-stmt that replaces it.  */
189
 
190
          pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
191
 
192
          if (vect_print_dump_info (REPORT_DETAILS))
193
            fprintf (vect_dump, "last stmt in pattern. don't mark"
194
                                " relevant/live.");
195
          stmt_info = vinfo_for_stmt (pattern_stmt);
196
          gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
197
          save_relevant = STMT_VINFO_RELEVANT (stmt_info);
198
          save_live_p = STMT_VINFO_LIVE_P (stmt_info);
199
          stmt = pattern_stmt;
200
        }
201
    }
202
 
203
  STMT_VINFO_LIVE_P (stmt_info) |= live_p;
204
  if (relevant > STMT_VINFO_RELEVANT (stmt_info))
205
    STMT_VINFO_RELEVANT (stmt_info) = relevant;
206
 
207
  if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
208
      && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
209
    {
210
      if (vect_print_dump_info (REPORT_DETAILS))
211
        fprintf (vect_dump, "already marked relevant/live.");
212
      return;
213
    }
214
 
215
  VEC_safe_push (gimple, heap, *worklist, stmt);
216
}
217
 
218
 
219
/* Function vect_stmt_relevant_p.
220
 
221
   Return true if STMT in loop that is represented by LOOP_VINFO is
222
   "relevant for vectorization".
223
 
224
   A stmt is considered "relevant for vectorization" if:
225
   - it has uses outside the loop.
226
   - it has vdefs (it alters memory).
227
   - control stmts in the loop (except for the exit condition).
228
 
229
   CHECKME: what other side effects would the vectorizer allow?  */
230
 
231
static bool
232
vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
233
                      enum vect_relevant *relevant, bool *live_p)
234
{
235
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
236
  ssa_op_iter op_iter;
237
  imm_use_iterator imm_iter;
238
  use_operand_p use_p;
239
  def_operand_p def_p;
240
 
241
  *relevant = vect_unused_in_scope;
242
  *live_p = false;
243
 
244
  /* cond stmt other than loop exit cond.  */
245
  if (is_ctrl_stmt (stmt)
246
      && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
247
         != loop_exit_ctrl_vec_info_type)
248
    *relevant = vect_used_in_scope;
249
 
250
  /* changing memory.  */
251
  if (gimple_code (stmt) != GIMPLE_PHI)
252
    if (gimple_vdef (stmt))
253
      {
254
        if (vect_print_dump_info (REPORT_DETAILS))
255
          fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
256
        *relevant = vect_used_in_scope;
257
      }
258
 
259
  /* uses outside the loop.  */
260
  FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
261
    {
262
      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
263
        {
264
          basic_block bb = gimple_bb (USE_STMT (use_p));
265
          if (!flow_bb_inside_loop_p (loop, bb))
266
            {
267
              if (vect_print_dump_info (REPORT_DETAILS))
268
                fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
269
 
270
              if (is_gimple_debug (USE_STMT (use_p)))
271
                continue;
272
 
273
              /* We expect all such uses to be in the loop exit phis
274
                 (because of loop closed form)   */
275
              gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
276
              gcc_assert (bb == single_exit (loop)->dest);
277
 
278
              *live_p = true;
279
            }
280
        }
281
    }
282
 
283
  return (*live_p || *relevant);
284
}
285
 
286
 
287
/* Function exist_non_indexing_operands_for_use_p
288
 
289
   USE is one of the uses attached to STMT.  Check if USE is
290
   used in STMT for anything other than indexing an array.  */
291
 
292
static bool
293
exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
294
{
295
  tree operand;
296
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
297
 
298
  /* USE corresponds to some operand in STMT.  If there is no data
299
     reference in STMT, then any operand that corresponds to USE
300
     is not indexing an array.  */
301
  if (!STMT_VINFO_DATA_REF (stmt_info))
302
    return true;
303
 
304
  /* STMT has a data_ref. FORNOW this means that its of one of
305
     the following forms:
306
     -1- ARRAY_REF = var
307
     -2- var = ARRAY_REF
308
     (This should have been verified in analyze_data_refs).
309
 
310
     'var' in the second case corresponds to a def, not a use,
311
     so USE cannot correspond to any operands that are not used
312
     for array indexing.
313
 
314
     Therefore, all we need to check is if STMT falls into the
315
     first case, and whether var corresponds to USE.  */
316
 
317
  if (!gimple_assign_copy_p (stmt))
318
    return false;
319
  if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
320
    return false;
321
  operand = gimple_assign_rhs1 (stmt);
322
  if (TREE_CODE (operand) != SSA_NAME)
323
    return false;
324
 
325
  if (operand == use)
326
    return true;
327
 
328
  return false;
329
}
330
 
331
 
332
/*
333
   Function process_use.
334
 
335
   Inputs:
336
   - a USE in STMT in a loop represented by LOOP_VINFO
337
   - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
338
     that defined USE.  This is done by calling mark_relevant and passing it
339
     the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
340
   - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
341
     be performed.
342
 
343
   Outputs:
344
   Generally, LIVE_P and RELEVANT are used to define the liveness and
345
   relevance info of the DEF_STMT of this USE:
346
       STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
347
       STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
348
   Exceptions:
349
   - case 1: If USE is used only for address computations (e.g. array indexing),
350
   which does not need to be directly vectorized, then the liveness/relevance
351
   of the respective DEF_STMT is left unchanged.
352
   - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
353
   skip DEF_STMT cause it had already been processed.
354
   - case 3: If DEF_STMT and STMT are in different nests, then  "relevant" will
355
   be modified accordingly.
356
 
357
   Return true if everything is as expected. Return false otherwise.  */
358
 
359
static bool
360
process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
361
             enum vect_relevant relevant, VEC(gimple,heap) **worklist,
362
             bool force)
363
{
364
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
365
  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
366
  stmt_vec_info dstmt_vinfo;
367
  basic_block bb, def_bb;
368
  tree def;
369
  gimple def_stmt;
370
  enum vect_def_type dt;
371
 
372
  /* case 1: we are only interested in uses that need to be vectorized.  Uses
373
     that are used for address computation are not considered relevant.  */
374
  if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
375
     return true;
376
 
377
  if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
378
    {
379
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
380
        fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
381
      return false;
382
    }
383
 
384
  if (!def_stmt || gimple_nop_p (def_stmt))
385
    return true;
386
 
387
  def_bb = gimple_bb (def_stmt);
388
  if (!flow_bb_inside_loop_p (loop, def_bb))
389
    {
390
      if (vect_print_dump_info (REPORT_DETAILS))
391
        fprintf (vect_dump, "def_stmt is out of loop.");
392
      return true;
393
    }
394
 
395
  /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
396
     DEF_STMT must have already been processed, because this should be the
397
     only way that STMT, which is a reduction-phi, was put in the worklist,
398
     as there should be no other uses for DEF_STMT in the loop.  So we just
399
     check that everything is as expected, and we are done.  */
400
  dstmt_vinfo = vinfo_for_stmt (def_stmt);
401
  bb = gimple_bb (stmt);
402
  if (gimple_code (stmt) == GIMPLE_PHI
403
      && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
404
      && gimple_code (def_stmt) != GIMPLE_PHI
405
      && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
406
      && bb->loop_father == def_bb->loop_father)
407
    {
408
      if (vect_print_dump_info (REPORT_DETAILS))
409
        fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
410
      if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
411
        dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
412
      gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
413
      gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
414
                  || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
415
      return true;
416
    }
417
 
418
  /* case 3a: outer-loop stmt defining an inner-loop stmt:
419
        outer-loop-header-bb:
420
                d = def_stmt
421
        inner-loop:
422
                stmt # use (d)
423
        outer-loop-tail-bb:
424
                ...               */
425
  if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
426
    {
427
      if (vect_print_dump_info (REPORT_DETAILS))
428
        fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
429
 
430
      switch (relevant)
431
        {
432
        case vect_unused_in_scope:
433
          relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
434
                      vect_used_in_scope : vect_unused_in_scope;
435
          break;
436
 
437
        case vect_used_in_outer_by_reduction:
438
          gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
439
          relevant = vect_used_by_reduction;
440
          break;
441
 
442
        case vect_used_in_outer:
443
          gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
444
          relevant = vect_used_in_scope;
445
          break;
446
 
447
        case vect_used_in_scope:
448
          break;
449
 
450
        default:
451
          gcc_unreachable ();
452
        }
453
    }
454
 
455
  /* case 3b: inner-loop stmt defining an outer-loop stmt:
456
        outer-loop-header-bb:
457
                ...
458
        inner-loop:
459
                d = def_stmt
460
        outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
461
                stmt # use (d)          */
462
  else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
463
    {
464
      if (vect_print_dump_info (REPORT_DETAILS))
465
        fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
466
 
467
      switch (relevant)
468
        {
469
        case vect_unused_in_scope:
470
          relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
471
            || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
472
                      vect_used_in_outer_by_reduction : vect_unused_in_scope;
473
          break;
474
 
475
        case vect_used_by_reduction:
476
          relevant = vect_used_in_outer_by_reduction;
477
          break;
478
 
479
        case vect_used_in_scope:
480
          relevant = vect_used_in_outer;
481
          break;
482
 
483
        default:
484
          gcc_unreachable ();
485
        }
486
    }
487
 
488
  vect_mark_relevant (worklist, def_stmt, relevant, live_p,
489
                      is_pattern_stmt_p (stmt_vinfo));
490
  return true;
491
}
492
 
493
 
494
/* Function vect_mark_stmts_to_be_vectorized.
495
 
496
   Not all stmts in the loop need to be vectorized. For example:
497
 
498
     for i...
499
       for j...
500
   1.    T0 = i + j
501
   2.    T1 = a[T0]
502
 
503
   3.    j = j + 1
504
 
505
   Stmt 1 and 3 do not need to be vectorized, because loop control and
506
   addressing of vectorized data-refs are handled differently.
507
 
508
   This pass detects such stmts.  */
509
 
510
bool
511
vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
512
{
513
  VEC(gimple,heap) *worklist;
514
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
515
  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
516
  unsigned int nbbs = loop->num_nodes;
517
  gimple_stmt_iterator si;
518
  gimple stmt;
519
  unsigned int i;
520
  stmt_vec_info stmt_vinfo;
521
  basic_block bb;
522
  gimple phi;
523
  bool live_p;
524
  enum vect_relevant relevant, tmp_relevant;
525
  enum vect_def_type def_type;
526
 
527
  if (vect_print_dump_info (REPORT_DETAILS))
528
    fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
529
 
530
  worklist = VEC_alloc (gimple, heap, 64);
531
 
532
  /* 1. Init worklist.  */
533
  for (i = 0; i < nbbs; i++)
534
    {
535
      bb = bbs[i];
536
      for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
537
        {
538
          phi = gsi_stmt (si);
539
          if (vect_print_dump_info (REPORT_DETAILS))
540
            {
541
              fprintf (vect_dump, "init: phi relevant? ");
542
              print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
543
            }
544
 
545
          if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
546
            vect_mark_relevant (&worklist, phi, relevant, live_p, false);
547
        }
548
      for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
549
        {
550
          stmt = gsi_stmt (si);
551
          if (vect_print_dump_info (REPORT_DETAILS))
552
            {
553
              fprintf (vect_dump, "init: stmt relevant? ");
554
              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
555
            }
556
 
557
          if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
558
            vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
559
        }
560
    }
561
 
562
  /* 2. Process_worklist */
563
  while (VEC_length (gimple, worklist) > 0)
564
    {
565
      use_operand_p use_p;
566
      ssa_op_iter iter;
567
 
568
      stmt = VEC_pop (gimple, worklist);
569
      if (vect_print_dump_info (REPORT_DETAILS))
570
        {
571
          fprintf (vect_dump, "worklist: examine stmt: ");
572
          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
573
        }
574
 
575
      /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
576
         (DEF_STMT) as relevant/irrelevant and live/dead according to the
577
         liveness and relevance properties of STMT.  */
578
      stmt_vinfo = vinfo_for_stmt (stmt);
579
      relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
580
      live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
581
 
582
      /* Generally, the liveness and relevance properties of STMT are
583
         propagated as is to the DEF_STMTs of its USEs:
584
          live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
585
          relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
586
 
587
         One exception is when STMT has been identified as defining a reduction
588
         variable; in this case we set the liveness/relevance as follows:
589
           live_p = false
590
           relevant = vect_used_by_reduction
591
         This is because we distinguish between two kinds of relevant stmts -
592
         those that are used by a reduction computation, and those that are
593
         (also) used by a regular computation.  This allows us later on to
594
         identify stmts that are used solely by a reduction, and therefore the
595
         order of the results that they produce does not have to be kept.  */
596
 
597
      def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
598
      tmp_relevant = relevant;
599
      switch (def_type)
600
        {
601
          case vect_reduction_def:
602
            switch (tmp_relevant)
603
              {
604
                case vect_unused_in_scope:
605
                  relevant = vect_used_by_reduction;
606
                  break;
607
 
608
                case vect_used_by_reduction:
609
                  if (gimple_code (stmt) == GIMPLE_PHI)
610
                    break;
611
                  /* fall through */
612
 
613
                default:
614
                  if (vect_print_dump_info (REPORT_DETAILS))
615
                    fprintf (vect_dump, "unsupported use of reduction.");
616
 
617
                  VEC_free (gimple, heap, worklist);
618
                  return false;
619
              }
620
 
621
            live_p = false;
622
            break;
623
 
624
          case vect_nested_cycle:
625
            if (tmp_relevant != vect_unused_in_scope
626
                && tmp_relevant != vect_used_in_outer_by_reduction
627
                && tmp_relevant != vect_used_in_outer)
628
              {
629
                if (vect_print_dump_info (REPORT_DETAILS))
630
                  fprintf (vect_dump, "unsupported use of nested cycle.");
631
 
632
                VEC_free (gimple, heap, worklist);
633
                return false;
634
              }
635
 
636
            live_p = false;
637
            break;
638
 
639
          case vect_double_reduction_def:
640
            if (tmp_relevant != vect_unused_in_scope
641
                && tmp_relevant != vect_used_by_reduction)
642
              {
643
                if (vect_print_dump_info (REPORT_DETAILS))
644
                  fprintf (vect_dump, "unsupported use of double reduction.");
645
 
646
                VEC_free (gimple, heap, worklist);
647
                return false;
648
              }
649
 
650
            live_p = false;
651
            break;
652
 
653
          default:
654
            break;
655
        }
656
 
657
      if (is_pattern_stmt_p (stmt_vinfo))
658
        {
659
          /* Pattern statements are not inserted into the code, so
660
             FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
661
             have to scan the RHS or function arguments instead.  */
662
          if (is_gimple_assign (stmt))
663
            {
664
              enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
665
              tree op = gimple_assign_rhs1 (stmt);
666
 
667
              i = 1;
668
              if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
669
                {
670
                  if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
671
                                    live_p, relevant, &worklist, false)
672
                      || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
673
                                       live_p, relevant, &worklist, false))
674
                    {
675
                      VEC_free (gimple, heap, worklist);
676
                      return false;
677
                    }
678
                  i = 2;
679
                }
680
              for (; i < gimple_num_ops (stmt); i++)
681
                {
682
                  op = gimple_op (stmt, i);
683
                  if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
684
                                    &worklist, false))
685
                    {
686
                      VEC_free (gimple, heap, worklist);
687
                      return false;
688
                    }
689
                 }
690
            }
691
          else if (is_gimple_call (stmt))
692
            {
693
              for (i = 0; i < gimple_call_num_args (stmt); i++)
694
                {
695
                  tree arg = gimple_call_arg (stmt, i);
696
                  if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
697
                                    &worklist, false))
698
                    {
699
                      VEC_free (gimple, heap, worklist);
700
                      return false;
701
                    }
702
                }
703
            }
704
        }
705
      else
706
        FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
707
          {
708
            tree op = USE_FROM_PTR (use_p);
709
            if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
710
                              &worklist, false))
711
              {
712
                VEC_free (gimple, heap, worklist);
713
                return false;
714
              }
715
          }
716
 
717
      if (STMT_VINFO_GATHER_P (stmt_vinfo))
718
        {
719
          tree off;
720
          tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
721
          gcc_assert (decl);
722
          if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
723
                            &worklist, true))
724
            {
725
              VEC_free (gimple, heap, worklist);
726
              return false;
727
            }
728
        }
729
    } /* while worklist */
730
 
731
  VEC_free (gimple, heap, worklist);
732
  return true;
733
}
734
 
735
 
736
/* Get cost by calling cost target builtin.  */
737
 
738
static inline
739
int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
740
{
741
  tree dummy_type = NULL;
742
  int dummy = 0;
743
 
744
  return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
745
                                                       dummy_type, dummy);
746
}
747
 
748
 
749
/* Get cost for STMT.  */
750
 
751
int
752
cost_for_stmt (gimple stmt)
753
{
754
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
755
 
756
  switch (STMT_VINFO_TYPE (stmt_info))
757
  {
758
  case load_vec_info_type:
759
    return vect_get_stmt_cost (scalar_load);
760
  case store_vec_info_type:
761
    return vect_get_stmt_cost (scalar_store);
762
  case op_vec_info_type:
763
  case condition_vec_info_type:
764
  case assignment_vec_info_type:
765
  case reduc_vec_info_type:
766
  case induc_vec_info_type:
767
  case type_promotion_vec_info_type:
768
  case type_demotion_vec_info_type:
769
  case type_conversion_vec_info_type:
770
  case call_vec_info_type:
771
    return vect_get_stmt_cost (scalar_stmt);
772
  case undef_vec_info_type:
773
  default:
774
    gcc_unreachable ();
775
  }
776
}
777
 
778
/* Function vect_model_simple_cost.
779
 
780
   Models cost for simple operations, i.e. those that only emit ncopies of a
781
   single op.  Right now, this does not account for multiple insns that could
782
   be generated for the single vector op.  We will handle that shortly.  */
783
 
784
void
785
vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
786
                        enum vect_def_type *dt, slp_tree slp_node)
787
{
788
  int i;
789
  int inside_cost = 0, outside_cost = 0;
790
 
791
  /* The SLP costs were already calculated during SLP tree build.  */
792
  if (PURE_SLP_STMT (stmt_info))
793
    return;
794
 
795
  inside_cost = ncopies * vect_get_stmt_cost (vector_stmt);
796
 
797
  /* FORNOW: Assuming maximum 2 args per stmts.  */
798
  for (i = 0; i < 2; i++)
799
    {
800
      if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
801
        outside_cost += vect_get_stmt_cost (vector_stmt);
802
    }
803
 
804
  if (vect_print_dump_info (REPORT_COST))
805
    fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
806
             "outside_cost = %d .", inside_cost, outside_cost);
807
 
808
  /* Set the costs either in STMT_INFO or SLP_NODE (if exists).  */
809
  stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
810
  stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
811
}
812
 
813
 
814
/* Model cost for type demotion and promotion operations.  PWR is normally
815
   zero for single-step promotions and demotions.  It will be one if
816
   two-step promotion/demotion is required, and so on.  Each additional
817
   step doubles the number of instructions required.  */
818
 
819
static void
820
vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
821
                                    enum vect_def_type *dt, int pwr)
822
{
823
  int i, tmp;
824
  int inside_cost = 0, outside_cost = 0, single_stmt_cost;
825
 
826
  /* The SLP costs were already calculated during SLP tree build.  */
827
  if (PURE_SLP_STMT (stmt_info))
828
    return;
829
 
830
  single_stmt_cost = vect_get_stmt_cost (vec_promote_demote);
831
  for (i = 0; i < pwr + 1; i++)
832
    {
833
      tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
834
        (i + 1) : i;
835
      inside_cost += vect_pow2 (tmp) * single_stmt_cost;
836
    }
837
 
838
  /* FORNOW: Assuming maximum 2 args per stmts.  */
839
  for (i = 0; i < 2; i++)
840
    {
841
      if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
842
        outside_cost += vect_get_stmt_cost (vector_stmt);
843
    }
844
 
845
  if (vect_print_dump_info (REPORT_COST))
846
    fprintf (vect_dump, "vect_model_promotion_demotion_cost: inside_cost = %d, "
847
             "outside_cost = %d .", inside_cost, outside_cost);
848
 
849
  /* Set the costs in STMT_INFO.  */
850
  stmt_vinfo_set_inside_of_loop_cost (stmt_info, NULL, inside_cost);
851
  stmt_vinfo_set_outside_of_loop_cost (stmt_info, NULL, outside_cost);
852
}
853
 
854
/* Function vect_cost_strided_group_size
855
 
856
   For strided load or store, return the group_size only if it is the first
857
   load or store of a group, else return 1.  This ensures that group size is
858
   only returned once per group.  */
859
 
860
static int
861
vect_cost_strided_group_size (stmt_vec_info stmt_info)
862
{
863
  gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
864
 
865
  if (first_stmt == STMT_VINFO_STMT (stmt_info))
866
    return GROUP_SIZE (stmt_info);
867
 
868
  return 1;
869
}
870
 
871
 
872
/* Function vect_model_store_cost
873
 
874
   Models cost for stores.  In the case of strided accesses, one access
875
   has the overhead of the strided access attributed to it.  */
876
 
877
void
878
vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
879
                       bool store_lanes_p, enum vect_def_type dt,
880
                       slp_tree slp_node)
881
{
882
  int group_size;
883
  unsigned int inside_cost = 0, outside_cost = 0;
884
  struct data_reference *first_dr;
885
  gimple first_stmt;
886
 
887
  /* The SLP costs were already calculated during SLP tree build.  */
888
  if (PURE_SLP_STMT (stmt_info))
889
    return;
890
 
891
  if (dt == vect_constant_def || dt == vect_external_def)
892
    outside_cost = vect_get_stmt_cost (scalar_to_vec);
893
 
894
  /* Strided access?  */
895
  if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
896
    {
897
      if (slp_node)
898
        {
899
          first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
900
          group_size = 1;
901
        }
902
      else
903
        {
904
          first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
905
          group_size = vect_cost_strided_group_size (stmt_info);
906
        }
907
 
908
      first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
909
    }
910
  /* Not a strided access.  */
911
  else
912
    {
913
      group_size = 1;
914
      first_dr = STMT_VINFO_DATA_REF (stmt_info);
915
    }
916
 
917
  /* We assume that the cost of a single store-lanes instruction is
918
     equivalent to the cost of GROUP_SIZE separate stores.  If a strided
919
     access is instead being provided by a permute-and-store operation,
920
     include the cost of the permutes.  */
921
  if (!store_lanes_p && group_size > 1)
922
    {
923
      /* Uses a high and low interleave operation for each needed permute.  */
924
      inside_cost = ncopies * exact_log2(group_size) * group_size
925
        * vect_get_stmt_cost (vec_perm);
926
 
927
      if (vect_print_dump_info (REPORT_COST))
928
        fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
929
                 group_size);
930
    }
931
 
932
  /* Costs of the stores.  */
933
  vect_get_store_cost (first_dr, ncopies, &inside_cost);
934
 
935
  if (vect_print_dump_info (REPORT_COST))
936
    fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
937
             "outside_cost = %d .", inside_cost, outside_cost);
938
 
939
  /* Set the costs either in STMT_INFO or SLP_NODE (if exists).  */
940
  stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
941
  stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
942
}
943
 
944
 
945
/* Calculate cost of DR's memory access.  */
946
void
947
vect_get_store_cost (struct data_reference *dr, int ncopies,
948
                     unsigned int *inside_cost)
949
{
950
  int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
951
 
952
  switch (alignment_support_scheme)
953
    {
954
    case dr_aligned:
955
      {
956
        *inside_cost += ncopies * vect_get_stmt_cost (vector_store);
957
 
958
        if (vect_print_dump_info (REPORT_COST))
959
          fprintf (vect_dump, "vect_model_store_cost: aligned.");
960
 
961
        break;
962
      }
963
 
964
    case dr_unaligned_supported:
965
      {
966
        gimple stmt = DR_STMT (dr);
967
        stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
968
        tree vectype = STMT_VINFO_VECTYPE (stmt_info);
969
 
970
        /* Here, we assign an additional cost for the unaligned store.  */
971
        *inside_cost += ncopies
972
          * targetm.vectorize.builtin_vectorization_cost (unaligned_store,
973
                                 vectype, DR_MISALIGNMENT (dr));
974
 
975
        if (vect_print_dump_info (REPORT_COST))
976
          fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
977
                   "hardware.");
978
 
979
        break;
980
      }
981
 
982
    default:
983
      gcc_unreachable ();
984
    }
985
}
986
 
987
 
988
/* Function vect_model_load_cost
989
 
990
   Models cost for loads.  In the case of strided accesses, the last access
991
   has the overhead of the strided access attributed to it.  Since unaligned
992
   accesses are supported for loads, we also account for the costs of the
993
   access scheme chosen.  */
994
 
995
void
996
vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
997
                      slp_tree slp_node)
998
{
999
  int group_size;
1000
  gimple first_stmt;
1001
  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1002
  unsigned int inside_cost = 0, outside_cost = 0;
1003
 
1004
  /* The SLP costs were already calculated during SLP tree build.  */
1005
  if (PURE_SLP_STMT (stmt_info))
1006
    return;
1007
 
1008
  /* Strided accesses?  */
1009
  first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1010
  if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && first_stmt && !slp_node)
1011
    {
1012
      group_size = vect_cost_strided_group_size (stmt_info);
1013
      first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1014
    }
1015
  /* Not a strided access.  */
1016
  else
1017
    {
1018
      group_size = 1;
1019
      first_dr = dr;
1020
    }
1021
 
1022
  /* We assume that the cost of a single load-lanes instruction is
1023
     equivalent to the cost of GROUP_SIZE separate loads.  If a strided
1024
     access is instead being provided by a load-and-permute operation,
1025
     include the cost of the permutes.  */
1026
  if (!load_lanes_p && group_size > 1)
1027
    {
1028
      /* Uses an even and odd extract operations for each needed permute.  */
1029
      inside_cost = ncopies * exact_log2(group_size) * group_size
1030
        * vect_get_stmt_cost (vec_perm);
1031
 
1032
      if (vect_print_dump_info (REPORT_COST))
1033
        fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
1034
                 group_size);
1035
    }
1036
 
1037
  /* The loads themselves.  */
1038
  vect_get_load_cost (first_dr, ncopies,
1039
         ((!STMT_VINFO_STRIDED_ACCESS (stmt_info)) || group_size > 1
1040
          || slp_node),
1041
         &inside_cost, &outside_cost);
1042
 
1043
  if (vect_print_dump_info (REPORT_COST))
1044
    fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
1045
             "outside_cost = %d .", inside_cost, outside_cost);
1046
 
1047
  /* Set the costs either in STMT_INFO or SLP_NODE (if exists).  */
1048
  stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
1049
  stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
1050
}
1051
 
1052
 
1053
/* Calculate cost of DR's memory access.  */
1054
void
1055
vect_get_load_cost (struct data_reference *dr, int ncopies,
1056
                    bool add_realign_cost, unsigned int *inside_cost,
1057
                    unsigned int *outside_cost)
1058
{
1059
  int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1060
 
1061
  switch (alignment_support_scheme)
1062
    {
1063
    case dr_aligned:
1064
      {
1065
        *inside_cost += ncopies * vect_get_stmt_cost (vector_load);
1066
 
1067
        if (vect_print_dump_info (REPORT_COST))
1068
          fprintf (vect_dump, "vect_model_load_cost: aligned.");
1069
 
1070
        break;
1071
      }
1072
    case dr_unaligned_supported:
1073
      {
1074
        gimple stmt = DR_STMT (dr);
1075
        stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1076
        tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1077
 
1078
        /* Here, we assign an additional cost for the unaligned load.  */
1079
        *inside_cost += ncopies
1080
          * targetm.vectorize.builtin_vectorization_cost (unaligned_load,
1081
                                           vectype, DR_MISALIGNMENT (dr));
1082
        if (vect_print_dump_info (REPORT_COST))
1083
          fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
1084
                   "hardware.");
1085
 
1086
        break;
1087
      }
1088
    case dr_explicit_realign:
1089
      {
1090
        *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
1091
                                   + vect_get_stmt_cost (vec_perm));
1092
 
1093
        /* FIXME: If the misalignment remains fixed across the iterations of
1094
           the containing loop, the following cost should be added to the
1095
           outside costs.  */
1096
        if (targetm.vectorize.builtin_mask_for_load)
1097
          *inside_cost += vect_get_stmt_cost (vector_stmt);
1098
 
1099
        if (vect_print_dump_info (REPORT_COST))
1100
          fprintf (vect_dump, "vect_model_load_cost: explicit realign");
1101
 
1102
        break;
1103
      }
1104
    case dr_explicit_realign_optimized:
1105
      {
1106
        if (vect_print_dump_info (REPORT_COST))
1107
          fprintf (vect_dump, "vect_model_load_cost: unaligned software "
1108
                   "pipelined.");
1109
 
1110
        /* Unaligned software pipeline has a load of an address, an initial
1111
           load, and possibly a mask operation to "prime" the loop.  However,
1112
           if this is an access in a group of loads, which provide strided
1113
           access, then the above cost should only be considered for one
1114
           access in the group.  Inside the loop, there is a load op
1115
           and a realignment op.  */
1116
 
1117
        if (add_realign_cost)
1118
          {
1119
            *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
1120
            if (targetm.vectorize.builtin_mask_for_load)
1121
              *outside_cost += vect_get_stmt_cost (vector_stmt);
1122
          }
1123
 
1124
        *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
1125
                                   + vect_get_stmt_cost (vec_perm));
1126
 
1127
        if (vect_print_dump_info (REPORT_COST))
1128
          fprintf (vect_dump,
1129
                   "vect_model_load_cost: explicit realign optimized");
1130
 
1131
        break;
1132
      }
1133
 
1134
    default:
1135
      gcc_unreachable ();
1136
    }
1137
}
1138
 
1139
 
1140
/* Function vect_init_vector.
1141
 
1142
   Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1143
   the vector elements of VECTOR_VAR.  Place the initialization at BSI if it
1144
   is not NULL.  Otherwise, place the initialization at the loop preheader.
1145
   Return the DEF of INIT_STMT.
1146
   It will be used in the vectorization of STMT.  */
1147
 
1148
tree
1149
vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
1150
                  gimple_stmt_iterator *gsi)
1151
{
1152
  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1153
  tree new_var;
1154
  gimple init_stmt;
1155
  tree vec_oprnd;
1156
  edge pe;
1157
  tree new_temp;
1158
  basic_block new_bb;
1159
 
1160
  new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1161
  add_referenced_var (new_var);
1162
  init_stmt = gimple_build_assign  (new_var, vector_var);
1163
  new_temp = make_ssa_name (new_var, init_stmt);
1164
  gimple_assign_set_lhs (init_stmt, new_temp);
1165
 
1166
  if (gsi)
1167
    vect_finish_stmt_generation (stmt, init_stmt, gsi);
1168
  else
1169
    {
1170
      loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1171
 
1172
      if (loop_vinfo)
1173
        {
1174
          struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1175
 
1176
          if (nested_in_vect_loop_p (loop, stmt))
1177
            loop = loop->inner;
1178
 
1179
          pe = loop_preheader_edge (loop);
1180
          new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
1181
          gcc_assert (!new_bb);
1182
        }
1183
      else
1184
       {
1185
          bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1186
          basic_block bb;
1187
          gimple_stmt_iterator gsi_bb_start;
1188
 
1189
          gcc_assert (bb_vinfo);
1190
          bb = BB_VINFO_BB (bb_vinfo);
1191
          gsi_bb_start = gsi_after_labels (bb);
1192
          gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
1193
       }
1194
    }
1195
 
1196
  if (vect_print_dump_info (REPORT_DETAILS))
1197
    {
1198
      fprintf (vect_dump, "created new init_stmt: ");
1199
      print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
1200
    }
1201
 
1202
  vec_oprnd = gimple_assign_lhs (init_stmt);
1203
  return vec_oprnd;
1204
}
1205
 
1206
 
1207
/* Function vect_get_vec_def_for_operand.
1208
 
1209
   OP is an operand in STMT.  This function returns a (vector) def that will be
1210
   used in the vectorized stmt for STMT.
1211
 
1212
   In the case that OP is an SSA_NAME which is defined in the loop, then
1213
   STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1214
 
1215
   In case OP is an invariant or constant, a new stmt that creates a vector def
1216
   needs to be introduced.  */
1217
 
1218
tree
1219
vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1220
{
1221
  tree vec_oprnd;
1222
  gimple vec_stmt;
1223
  gimple def_stmt;
1224
  stmt_vec_info def_stmt_info = NULL;
1225
  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1226
  unsigned int nunits;
1227
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1228
  tree vec_inv;
1229
  tree vec_cst;
1230
  tree t = NULL_TREE;
1231
  tree def;
1232
  int i;
1233
  enum vect_def_type dt;
1234
  bool is_simple_use;
1235
  tree vector_type;
1236
 
1237
  if (vect_print_dump_info (REPORT_DETAILS))
1238
    {
1239
      fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1240
      print_generic_expr (vect_dump, op, TDF_SLIM);
1241
    }
1242
 
1243
  is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1244
                                      &def_stmt, &def, &dt);
1245
  gcc_assert (is_simple_use);
1246
  if (vect_print_dump_info (REPORT_DETAILS))
1247
    {
1248
      if (def)
1249
        {
1250
          fprintf (vect_dump, "def =  ");
1251
          print_generic_expr (vect_dump, def, TDF_SLIM);
1252
        }
1253
      if (def_stmt)
1254
        {
1255
          fprintf (vect_dump, "  def_stmt =  ");
1256
          print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1257
        }
1258
    }
1259
 
1260
  switch (dt)
1261
    {
1262
    /* Case 1: operand is a constant.  */
1263
    case vect_constant_def:
1264
      {
1265
        vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1266
        gcc_assert (vector_type);
1267
        nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1268
 
1269
        if (scalar_def)
1270
          *scalar_def = op;
1271
 
1272
        /* Create 'vect_cst_ = {cst,cst,...,cst}'  */
1273
        if (vect_print_dump_info (REPORT_DETAILS))
1274
          fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1275
 
1276
        vec_cst = build_vector_from_val (vector_type,
1277
                                         fold_convert (TREE_TYPE (vector_type),
1278
                                                       op));
1279
        return vect_init_vector (stmt, vec_cst, vector_type, NULL);
1280
      }
1281
 
1282
    /* Case 2: operand is defined outside the loop - loop invariant.  */
1283
    case vect_external_def:
1284
      {
1285
        vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1286
        gcc_assert (vector_type);
1287
        nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1288
 
1289
        if (scalar_def)
1290
          *scalar_def = def;
1291
 
1292
        /* Create 'vec_inv = {inv,inv,..,inv}'  */
1293
        if (vect_print_dump_info (REPORT_DETAILS))
1294
          fprintf (vect_dump, "Create vector_inv.");
1295
 
1296
        for (i = nunits - 1; i >= 0; --i)
1297
          {
1298
            t = tree_cons (NULL_TREE, def, t);
1299
          }
1300
 
1301
        /* FIXME: use build_constructor directly.  */
1302
        vec_inv = build_constructor_from_list (vector_type, t);
1303
        return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1304
      }
1305
 
1306
    /* Case 3: operand is defined inside the loop.  */
1307
    case vect_internal_def:
1308
      {
1309
        if (scalar_def)
1310
          *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1311
 
1312
        /* Get the def from the vectorized stmt.  */
1313
        def_stmt_info = vinfo_for_stmt (def_stmt);
1314
 
1315
        vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1316
        /* Get vectorized pattern statement.  */
1317
        if (!vec_stmt
1318
            && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1319
            && !STMT_VINFO_RELEVANT (def_stmt_info))
1320
          vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1321
                       STMT_VINFO_RELATED_STMT (def_stmt_info)));
1322
        gcc_assert (vec_stmt);
1323
        if (gimple_code (vec_stmt) == GIMPLE_PHI)
1324
          vec_oprnd = PHI_RESULT (vec_stmt);
1325
        else if (is_gimple_call (vec_stmt))
1326
          vec_oprnd = gimple_call_lhs (vec_stmt);
1327
        else
1328
          vec_oprnd = gimple_assign_lhs (vec_stmt);
1329
        return vec_oprnd;
1330
      }
1331
 
1332
    /* Case 4: operand is defined by a loop header phi - reduction  */
1333
    case vect_reduction_def:
1334
    case vect_double_reduction_def:
1335
    case vect_nested_cycle:
1336
      {
1337
        struct loop *loop;
1338
 
1339
        gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1340
        loop = (gimple_bb (def_stmt))->loop_father;
1341
 
1342
        /* Get the def before the loop  */
1343
        op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1344
        return get_initial_def_for_reduction (stmt, op, scalar_def);
1345
     }
1346
 
1347
    /* Case 5: operand is defined by loop-header phi - induction.  */
1348
    case vect_induction_def:
1349
      {
1350
        gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1351
 
1352
        /* Get the def from the vectorized stmt.  */
1353
        def_stmt_info = vinfo_for_stmt (def_stmt);
1354
        vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1355
        if (gimple_code (vec_stmt) == GIMPLE_PHI)
1356
          vec_oprnd = PHI_RESULT (vec_stmt);
1357
        else
1358
          vec_oprnd = gimple_get_lhs (vec_stmt);
1359
        return vec_oprnd;
1360
      }
1361
 
1362
    default:
1363
      gcc_unreachable ();
1364
    }
1365
}
1366
 
1367
 
1368
/* Function vect_get_vec_def_for_stmt_copy
1369
 
1370
   Return a vector-def for an operand.  This function is used when the
1371
   vectorized stmt to be created (by the caller to this function) is a "copy"
1372
   created in case the vectorized result cannot fit in one vector, and several
1373
   copies of the vector-stmt are required.  In this case the vector-def is
1374
   retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1375
   of the stmt that defines VEC_OPRND.
1376
   DT is the type of the vector def VEC_OPRND.
1377
 
1378
   Context:
1379
        In case the vectorization factor (VF) is bigger than the number
1380
   of elements that can fit in a vectype (nunits), we have to generate
1381
   more than one vector stmt to vectorize the scalar stmt.  This situation
1382
   arises when there are multiple data-types operated upon in the loop; the
1383
   smallest data-type determines the VF, and as a result, when vectorizing
1384
   stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1385
   vector stmt (each computing a vector of 'nunits' results, and together
1386
   computing 'VF' results in each iteration).  This function is called when
1387
   vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1388
   which VF=16 and nunits=4, so the number of copies required is 4):
1389
 
1390
   scalar stmt:         vectorized into:        STMT_VINFO_RELATED_STMT
1391
 
1392
   S1: x = load         VS1.0:  vx.0 = memref0      VS1.1
1393
                        VS1.1:  vx.1 = memref1      VS1.2
1394
                        VS1.2:  vx.2 = memref2      VS1.3
1395
                        VS1.3:  vx.3 = memref3
1396
 
1397
   S2: z = x + ...      VSnew.0:  vz0 = vx.0 + ...  VSnew.1
1398
                        VSnew.1:  vz1 = vx.1 + ...  VSnew.2
1399
                        VSnew.2:  vz2 = vx.2 + ...  VSnew.3
1400
                        VSnew.3:  vz3 = vx.3 + ...
1401
 
1402
   The vectorization of S1 is explained in vectorizable_load.
1403
   The vectorization of S2:
1404
        To create the first vector-stmt out of the 4 copies - VSnew.0 -
1405
   the function 'vect_get_vec_def_for_operand' is called to
1406
   get the relevant vector-def for each operand of S2.  For operand x it
1407
   returns  the vector-def 'vx.0'.
1408
 
1409
        To create the remaining copies of the vector-stmt (VSnew.j), this
1410
   function is called to get the relevant vector-def for each operand.  It is
1411
   obtained from the respective VS1.j stmt, which is recorded in the
1412
   STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1413
 
1414
        For example, to obtain the vector-def 'vx.1' in order to create the
1415
   vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1416
   Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1417
   STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1418
   and return its def ('vx.1').
1419
   Overall, to create the above sequence this function will be called 3 times:
1420
        vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1421
        vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1422
        vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2);  */
1423
 
1424
tree
1425
vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1426
{
1427
  gimple vec_stmt_for_operand;
1428
  stmt_vec_info def_stmt_info;
1429
 
1430
  /* Do nothing; can reuse same def.  */
1431
  if (dt == vect_external_def || dt == vect_constant_def )
1432
    return vec_oprnd;
1433
 
1434
  vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1435
  def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1436
  gcc_assert (def_stmt_info);
1437
  vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1438
  gcc_assert (vec_stmt_for_operand);
1439
  vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1440
  if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1441
    vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1442
  else
1443
    vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1444
  return vec_oprnd;
1445
}
1446
 
1447
 
1448
/* Get vectorized definitions for the operands to create a copy of an original
1449
   stmt.  See vect_get_vec_def_for_stmt_copy () for details.  */
1450
 
1451
static void
1452
vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1453
                                 VEC(tree,heap) **vec_oprnds0,
1454
                                 VEC(tree,heap) **vec_oprnds1)
1455
{
1456
  tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1457
 
1458
  vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1459
  VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1460
 
1461
  if (vec_oprnds1 && *vec_oprnds1)
1462
    {
1463
      vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1464
      vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1465
      VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1466
    }
1467
}
1468
 
1469
 
1470
/* Get vectorized definitions for OP0 and OP1.
1471
   REDUC_INDEX is the index of reduction operand in case of reduction,
1472
   and -1 otherwise.  */
1473
 
1474
void
1475
vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1476
                   VEC (tree, heap) **vec_oprnds0,
1477
                   VEC (tree, heap) **vec_oprnds1,
1478
                   slp_tree slp_node, int reduc_index)
1479
{
1480
  if (slp_node)
1481
    {
1482
      int nops = (op1 == NULL_TREE) ? 1 : 2;
1483
      VEC (tree, heap) *ops = VEC_alloc (tree, heap, nops);
1484
      VEC (slp_void_p, heap) *vec_defs = VEC_alloc (slp_void_p, heap, nops);
1485
 
1486
      VEC_quick_push (tree, ops, op0);
1487
      if (op1)
1488
        VEC_quick_push (tree, ops, op1);
1489
 
1490
      vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1491
 
1492
      *vec_oprnds0 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1493
      if (op1)
1494
        *vec_oprnds1 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 1);
1495
 
1496
      VEC_free (tree, heap, ops);
1497
      VEC_free (slp_void_p, heap, vec_defs);
1498
    }
1499
  else
1500
    {
1501
      tree vec_oprnd;
1502
 
1503
      *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1504
      vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1505
      VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1506
 
1507
      if (op1)
1508
        {
1509
          *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1510
          vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1511
          VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1512
        }
1513
    }
1514
}
1515
 
1516
 
1517
/* Function vect_finish_stmt_generation.
1518
 
1519
   Insert a new stmt.  */
1520
 
1521
void
1522
vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1523
                             gimple_stmt_iterator *gsi)
1524
{
1525
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1526
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1527
  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1528
 
1529
  gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1530
 
1531
  gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1532
 
1533
  set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1534
                                                   bb_vinfo));
1535
 
1536
  if (vect_print_dump_info (REPORT_DETAILS))
1537
    {
1538
      fprintf (vect_dump, "add new stmt: ");
1539
      print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1540
    }
1541
 
1542
  gimple_set_location (vec_stmt, gimple_location (stmt));
1543
}
1544
 
1545
/* Checks if CALL can be vectorized in type VECTYPE.  Returns
1546
   a function declaration if the target has a vectorized version
1547
   of the function, or NULL_TREE if the function cannot be vectorized.  */
1548
 
1549
tree
1550
vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1551
{
1552
  tree fndecl = gimple_call_fndecl (call);
1553
 
1554
  /* We only handle functions that do not read or clobber memory -- i.e.
1555
     const or novops ones.  */
1556
  if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1557
    return NULL_TREE;
1558
 
1559
  if (!fndecl
1560
      || TREE_CODE (fndecl) != FUNCTION_DECL
1561
      || !DECL_BUILT_IN (fndecl))
1562
    return NULL_TREE;
1563
 
1564
  return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1565
                                                        vectype_in);
1566
}
1567
 
1568
/* Function vectorizable_call.
1569
 
1570
   Check if STMT performs a function call that can be vectorized.
1571
   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1572
   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1573
   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1574
 
1575
static bool
1576
vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1577
                   slp_tree slp_node)
1578
{
1579
  tree vec_dest;
1580
  tree scalar_dest;
1581
  tree op, type;
1582
  tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1583
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1584
  tree vectype_out, vectype_in;
1585
  int nunits_in;
1586
  int nunits_out;
1587
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1588
  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1589
  tree fndecl, new_temp, def, rhs_type;
1590
  gimple def_stmt;
1591
  enum vect_def_type dt[3]
1592
    = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1593
  gimple new_stmt = NULL;
1594
  int ncopies, j;
1595
  VEC(tree, heap) *vargs = NULL;
1596
  enum { NARROW, NONE, WIDEN } modifier;
1597
  size_t i, nargs;
1598
  tree lhs;
1599
 
1600
  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1601
    return false;
1602
 
1603
  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1604
    return false;
1605
 
1606
  /* Is STMT a vectorizable call?   */
1607
  if (!is_gimple_call (stmt))
1608
    return false;
1609
 
1610
  if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1611
    return false;
1612
 
1613
  if (stmt_can_throw_internal (stmt))
1614
    return false;
1615
 
1616
  vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1617
 
1618
  /* Process function arguments.  */
1619
  rhs_type = NULL_TREE;
1620
  vectype_in = NULL_TREE;
1621
  nargs = gimple_call_num_args (stmt);
1622
 
1623
  /* Bail out if the function has more than three arguments, we do not have
1624
     interesting builtin functions to vectorize with more than two arguments
1625
     except for fma.  No arguments is also not good.  */
1626
  if (nargs == 0 || nargs > 3)
1627
    return false;
1628
 
1629
  for (i = 0; i < nargs; i++)
1630
    {
1631
      tree opvectype;
1632
 
1633
      op = gimple_call_arg (stmt, i);
1634
 
1635
      /* We can only handle calls with arguments of the same type.  */
1636
      if (rhs_type
1637
          && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1638
        {
1639
          if (vect_print_dump_info (REPORT_DETAILS))
1640
            fprintf (vect_dump, "argument types differ.");
1641
          return false;
1642
        }
1643
      if (!rhs_type)
1644
        rhs_type = TREE_TYPE (op);
1645
 
1646
      if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
1647
                                 &def_stmt, &def, &dt[i], &opvectype))
1648
        {
1649
          if (vect_print_dump_info (REPORT_DETAILS))
1650
            fprintf (vect_dump, "use not simple.");
1651
          return false;
1652
        }
1653
 
1654
      if (!vectype_in)
1655
        vectype_in = opvectype;
1656
      else if (opvectype
1657
               && opvectype != vectype_in)
1658
        {
1659
          if (vect_print_dump_info (REPORT_DETAILS))
1660
            fprintf (vect_dump, "argument vector types differ.");
1661
          return false;
1662
        }
1663
    }
1664
  /* If all arguments are external or constant defs use a vector type with
1665
     the same size as the output vector type.  */
1666
  if (!vectype_in)
1667
    vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1668
  if (vec_stmt)
1669
    gcc_assert (vectype_in);
1670
  if (!vectype_in)
1671
    {
1672
      if (vect_print_dump_info (REPORT_DETAILS))
1673
        {
1674
          fprintf (vect_dump, "no vectype for scalar type ");
1675
          print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1676
        }
1677
 
1678
      return false;
1679
    }
1680
 
1681
  /* FORNOW */
1682
  nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1683
  nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1684
  if (nunits_in == nunits_out / 2)
1685
    modifier = NARROW;
1686
  else if (nunits_out == nunits_in)
1687
    modifier = NONE;
1688
  else if (nunits_out == nunits_in / 2)
1689
    modifier = WIDEN;
1690
  else
1691
    return false;
1692
 
1693
  /* For now, we only vectorize functions if a target specific builtin
1694
     is available.  TODO -- in some cases, it might be profitable to
1695
     insert the calls for pieces of the vector, in order to be able
1696
     to vectorize other operations in the loop.  */
1697
  fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1698
  if (fndecl == NULL_TREE)
1699
    {
1700
      if (vect_print_dump_info (REPORT_DETAILS))
1701
        fprintf (vect_dump, "function is not vectorizable.");
1702
 
1703
      return false;
1704
    }
1705
 
1706
  gcc_assert (!gimple_vuse (stmt));
1707
 
1708
  if (slp_node || PURE_SLP_STMT (stmt_info))
1709
    ncopies = 1;
1710
  else if (modifier == NARROW)
1711
    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1712
  else
1713
    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1714
 
1715
  /* Sanity check: make sure that at least one copy of the vectorized stmt
1716
     needs to be generated.  */
1717
  gcc_assert (ncopies >= 1);
1718
 
1719
  if (!vec_stmt) /* transformation not required.  */
1720
    {
1721
      STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1722
      if (vect_print_dump_info (REPORT_DETAILS))
1723
        fprintf (vect_dump, "=== vectorizable_call ===");
1724
      vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1725
      return true;
1726
    }
1727
 
1728
  /** Transform.  **/
1729
 
1730
  if (vect_print_dump_info (REPORT_DETAILS))
1731
    fprintf (vect_dump, "transform call.");
1732
 
1733
  /* Handle def.  */
1734
  scalar_dest = gimple_call_lhs (stmt);
1735
  vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1736
 
1737
  prev_stmt_info = NULL;
1738
  switch (modifier)
1739
    {
1740
    case NONE:
1741
      for (j = 0; j < ncopies; ++j)
1742
        {
1743
          /* Build argument list for the vectorized call.  */
1744
          if (j == 0)
1745
            vargs = VEC_alloc (tree, heap, nargs);
1746
          else
1747
            VEC_truncate (tree, vargs, 0);
1748
 
1749
          if (slp_node)
1750
            {
1751
              VEC (slp_void_p, heap) *vec_defs
1752
                = VEC_alloc (slp_void_p, heap, nargs);
1753
              VEC (tree, heap) *vec_oprnds0;
1754
 
1755
              for (i = 0; i < nargs; i++)
1756
                VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1757
              vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1758
              vec_oprnds0
1759
                = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1760
 
1761
              /* Arguments are ready.  Create the new vector stmt.  */
1762
              FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_oprnd0)
1763
                {
1764
                  size_t k;
1765
                  for (k = 0; k < nargs; k++)
1766
                    {
1767
                      VEC (tree, heap) *vec_oprndsk
1768
                        = (VEC (tree, heap) *)
1769
                          VEC_index (slp_void_p, vec_defs, k);
1770
                      VEC_replace (tree, vargs, k,
1771
                                   VEC_index (tree, vec_oprndsk, i));
1772
                    }
1773
                  new_stmt = gimple_build_call_vec (fndecl, vargs);
1774
                  new_temp = make_ssa_name (vec_dest, new_stmt);
1775
                  gimple_call_set_lhs (new_stmt, new_temp);
1776
                  vect_finish_stmt_generation (stmt, new_stmt, gsi);
1777
                  mark_symbols_for_renaming (new_stmt);
1778
                  VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1779
                                  new_stmt);
1780
                }
1781
 
1782
              for (i = 0; i < nargs; i++)
1783
                {
1784
                  VEC (tree, heap) *vec_oprndsi
1785
                    = (VEC (tree, heap) *)
1786
                      VEC_index (slp_void_p, vec_defs, i);
1787
                  VEC_free (tree, heap, vec_oprndsi);
1788
                }
1789
              VEC_free (slp_void_p, heap, vec_defs);
1790
              continue;
1791
            }
1792
 
1793
          for (i = 0; i < nargs; i++)
1794
            {
1795
              op = gimple_call_arg (stmt, i);
1796
              if (j == 0)
1797
                vec_oprnd0
1798
                  = vect_get_vec_def_for_operand (op, stmt, NULL);
1799
              else
1800
                {
1801
                  vec_oprnd0 = gimple_call_arg (new_stmt, i);
1802
                  vec_oprnd0
1803
                    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1804
                }
1805
 
1806
              VEC_quick_push (tree, vargs, vec_oprnd0);
1807
            }
1808
 
1809
          new_stmt = gimple_build_call_vec (fndecl, vargs);
1810
          new_temp = make_ssa_name (vec_dest, new_stmt);
1811
          gimple_call_set_lhs (new_stmt, new_temp);
1812
 
1813
          vect_finish_stmt_generation (stmt, new_stmt, gsi);
1814
          mark_symbols_for_renaming (new_stmt);
1815
 
1816
          if (j == 0)
1817
            STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1818
          else
1819
            STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1820
 
1821
          prev_stmt_info = vinfo_for_stmt (new_stmt);
1822
        }
1823
 
1824
      break;
1825
 
1826
    case NARROW:
1827
      for (j = 0; j < ncopies; ++j)
1828
        {
1829
          /* Build argument list for the vectorized call.  */
1830
          if (j == 0)
1831
            vargs = VEC_alloc (tree, heap, nargs * 2);
1832
          else
1833
            VEC_truncate (tree, vargs, 0);
1834
 
1835
          if (slp_node)
1836
            {
1837
              VEC (slp_void_p, heap) *vec_defs
1838
                = VEC_alloc (slp_void_p, heap, nargs);
1839
              VEC (tree, heap) *vec_oprnds0;
1840
 
1841
              for (i = 0; i < nargs; i++)
1842
                VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1843
              vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1844
              vec_oprnds0
1845
                = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1846
 
1847
              /* Arguments are ready.  Create the new vector stmt.  */
1848
              for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vec_oprnd0);
1849
                   i += 2)
1850
                {
1851
                  size_t k;
1852
                  VEC_truncate (tree, vargs, 0);
1853
                  for (k = 0; k < nargs; k++)
1854
                    {
1855
                      VEC (tree, heap) *vec_oprndsk
1856
                        = (VEC (tree, heap) *)
1857
                          VEC_index (slp_void_p, vec_defs, k);
1858
                      VEC_quick_push (tree, vargs,
1859
                                      VEC_index (tree, vec_oprndsk, i));
1860
                      VEC_quick_push (tree, vargs,
1861
                                      VEC_index (tree, vec_oprndsk, i + 1));
1862
                    }
1863
                  new_stmt = gimple_build_call_vec (fndecl, vargs);
1864
                  new_temp = make_ssa_name (vec_dest, new_stmt);
1865
                  gimple_call_set_lhs (new_stmt, new_temp);
1866
                  vect_finish_stmt_generation (stmt, new_stmt, gsi);
1867
                  mark_symbols_for_renaming (new_stmt);
1868
                  VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1869
                                  new_stmt);
1870
                }
1871
 
1872
              for (i = 0; i < nargs; i++)
1873
                {
1874
                  VEC (tree, heap) *vec_oprndsi
1875
                    = (VEC (tree, heap) *)
1876
                      VEC_index (slp_void_p, vec_defs, i);
1877
                  VEC_free (tree, heap, vec_oprndsi);
1878
                }
1879
              VEC_free (slp_void_p, heap, vec_defs);
1880
              continue;
1881
            }
1882
 
1883
          for (i = 0; i < nargs; i++)
1884
            {
1885
              op = gimple_call_arg (stmt, i);
1886
              if (j == 0)
1887
                {
1888
                  vec_oprnd0
1889
                    = vect_get_vec_def_for_operand (op, stmt, NULL);
1890
                  vec_oprnd1
1891
                    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1892
                }
1893
              else
1894
                {
1895
                  vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
1896
                  vec_oprnd0
1897
                    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1898
                  vec_oprnd1
1899
                    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1900
                }
1901
 
1902
              VEC_quick_push (tree, vargs, vec_oprnd0);
1903
              VEC_quick_push (tree, vargs, vec_oprnd1);
1904
            }
1905
 
1906
          new_stmt = gimple_build_call_vec (fndecl, vargs);
1907
          new_temp = make_ssa_name (vec_dest, new_stmt);
1908
          gimple_call_set_lhs (new_stmt, new_temp);
1909
 
1910
          vect_finish_stmt_generation (stmt, new_stmt, gsi);
1911
          mark_symbols_for_renaming (new_stmt);
1912
 
1913
          if (j == 0)
1914
            STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1915
          else
1916
            STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1917
 
1918
          prev_stmt_info = vinfo_for_stmt (new_stmt);
1919
        }
1920
 
1921
      *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1922
 
1923
      break;
1924
 
1925
    case WIDEN:
1926
      /* No current target implements this case.  */
1927
      return false;
1928
    }
1929
 
1930
  VEC_free (tree, heap, vargs);
1931
 
1932
  /* Update the exception handling table with the vector stmt if necessary.  */
1933
  if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1934
    gimple_purge_dead_eh_edges (gimple_bb (stmt));
1935
 
1936
  /* The call in STMT might prevent it from being removed in dce.
1937
     We however cannot remove it here, due to the way the ssa name
1938
     it defines is mapped to the new definition.  So just replace
1939
     rhs of the statement with something harmless.  */
1940
 
1941
  if (slp_node)
1942
    return true;
1943
 
1944
  type = TREE_TYPE (scalar_dest);
1945
  if (is_pattern_stmt_p (stmt_info))
1946
    lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
1947
  else
1948
    lhs = gimple_call_lhs (stmt);
1949
  new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
1950
  set_vinfo_for_stmt (new_stmt, stmt_info);
1951
  set_vinfo_for_stmt (stmt, NULL);
1952
  STMT_VINFO_STMT (stmt_info) = new_stmt;
1953
  gsi_replace (gsi, new_stmt, false);
1954
  SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1955
 
1956
  return true;
1957
}
1958
 
1959
 
1960
/* Function vect_gen_widened_results_half
1961
 
1962
   Create a vector stmt whose code, type, number of arguments, and result
1963
   variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1964
   VEC_OPRND0 and VEC_OPRND1.  The new vector stmt is to be inserted at BSI.
1965
   In the case that CODE is a CALL_EXPR, this means that a call to DECL
1966
   needs to be created (DECL is a function-decl of a target-builtin).
1967
   STMT is the original scalar stmt that we are vectorizing.  */
1968
 
1969
static gimple
1970
vect_gen_widened_results_half (enum tree_code code,
1971
                               tree decl,
1972
                               tree vec_oprnd0, tree vec_oprnd1, int op_type,
1973
                               tree vec_dest, gimple_stmt_iterator *gsi,
1974
                               gimple stmt)
1975
{
1976
  gimple new_stmt;
1977
  tree new_temp;
1978
 
1979
  /* Generate half of the widened result:  */
1980
  if (code == CALL_EXPR)
1981
    {
1982
      /* Target specific support  */
1983
      if (op_type == binary_op)
1984
        new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1985
      else
1986
        new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1987
      new_temp = make_ssa_name (vec_dest, new_stmt);
1988
      gimple_call_set_lhs (new_stmt, new_temp);
1989
    }
1990
  else
1991
    {
1992
      /* Generic support */
1993
      gcc_assert (op_type == TREE_CODE_LENGTH (code));
1994
      if (op_type != binary_op)
1995
        vec_oprnd1 = NULL;
1996
      new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1997
                                               vec_oprnd1);
1998
      new_temp = make_ssa_name (vec_dest, new_stmt);
1999
      gimple_assign_set_lhs (new_stmt, new_temp);
2000
    }
2001
  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2002
 
2003
  return new_stmt;
2004
}
2005
 
2006
 
2007
/* Get vectorized definitions for loop-based vectorization.  For the first
2008
   operand we call vect_get_vec_def_for_operand() (with OPRND containing
2009
   scalar operand), and for the rest we get a copy with
2010
   vect_get_vec_def_for_stmt_copy() using the previous vector definition
2011
   (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2012
   The vectors are collected into VEC_OPRNDS.  */
2013
 
2014
static void
2015
vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2016
                          VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2017
{
2018
  tree vec_oprnd;
2019
 
2020
  /* Get first vector operand.  */
2021
  /* All the vector operands except the very first one (that is scalar oprnd)
2022
     are stmt copies.  */
2023
  if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2024
    vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2025
  else
2026
    vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2027
 
2028
  VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2029
 
2030
  /* Get second vector operand.  */
2031
  vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2032
  VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2033
 
2034
  *oprnd = vec_oprnd;
2035
 
2036
  /* For conversion in multiple steps, continue to get operands
2037
     recursively.  */
2038
  if (multi_step_cvt)
2039
    vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds,  multi_step_cvt - 1);
2040
}
2041
 
2042
 
2043
/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2044
   For multi-step conversions store the resulting vectors and call the function
2045
   recursively.  */
2046
 
2047
static void
2048
vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
2049
                                       int multi_step_cvt, gimple stmt,
2050
                                       VEC (tree, heap) *vec_dsts,
2051
                                       gimple_stmt_iterator *gsi,
2052
                                       slp_tree slp_node, enum tree_code code,
2053
                                       stmt_vec_info *prev_stmt_info)
2054
{
2055
  unsigned int i;
2056
  tree vop0, vop1, new_tmp, vec_dest;
2057
  gimple new_stmt;
2058
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2059
 
2060
  vec_dest = VEC_pop (tree, vec_dsts);
2061
 
2062
  for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2063
    {
2064
      /* Create demotion operation.  */
2065
      vop0 = VEC_index (tree, *vec_oprnds, i);
2066
      vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2067
      new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2068
      new_tmp = make_ssa_name (vec_dest, new_stmt);
2069
      gimple_assign_set_lhs (new_stmt, new_tmp);
2070
      vect_finish_stmt_generation (stmt, new_stmt, gsi);
2071
 
2072
      if (multi_step_cvt)
2073
        /* Store the resulting vector for next recursive call.  */
2074
        VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2075
      else
2076
        {
2077
          /* This is the last step of the conversion sequence. Store the
2078
             vectors in SLP_NODE or in vector info of the scalar statement
2079
             (or in STMT_VINFO_RELATED_STMT chain).  */
2080
          if (slp_node)
2081
            VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2082
          else
2083
            {
2084
              if (!*prev_stmt_info)
2085
                STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2086
              else
2087
                STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2088
 
2089
              *prev_stmt_info = vinfo_for_stmt (new_stmt);
2090
            }
2091
        }
2092
    }
2093
 
2094
  /* For multi-step demotion operations we first generate demotion operations
2095
     from the source type to the intermediate types, and then combine the
2096
     results (stored in VEC_OPRNDS) in demotion operation to the destination
2097
     type.  */
2098
  if (multi_step_cvt)
2099
    {
2100
      /* At each level of recursion we have half of the operands we had at the
2101
         previous level.  */
2102
      VEC_truncate (tree, *vec_oprnds, (i+1)/2);
2103
      vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2104
                                             stmt, vec_dsts, gsi, slp_node,
2105
                                             VEC_PACK_TRUNC_EXPR,
2106
                                             prev_stmt_info);
2107
    }
2108
 
2109
  VEC_quick_push (tree, vec_dsts, vec_dest);
2110
}
2111
 
2112
 
2113
/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2114
   and VEC_OPRNDS1 (for binary operations).  For multi-step conversions store
2115
   the resulting vectors and call the function recursively.  */
2116
 
2117
static void
2118
vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
2119
                                        VEC (tree, heap) **vec_oprnds1,
2120
                                        gimple stmt, tree vec_dest,
2121
                                        gimple_stmt_iterator *gsi,
2122
                                        enum tree_code code1,
2123
                                        enum tree_code code2, tree decl1,
2124
                                        tree decl2, int op_type)
2125
{
2126
  int i;
2127
  tree vop0, vop1, new_tmp1, new_tmp2;
2128
  gimple new_stmt1, new_stmt2;
2129
  VEC (tree, heap) *vec_tmp = NULL;
2130
 
2131
  vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
2132
  FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
2133
    {
2134
      if (op_type == binary_op)
2135
        vop1 = VEC_index (tree, *vec_oprnds1, i);
2136
      else
2137
        vop1 = NULL_TREE;
2138
 
2139
      /* Generate the two halves of promotion operation.  */
2140
      new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2141
                                                 op_type, vec_dest, gsi, stmt);
2142
      new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2143
                                                 op_type, vec_dest, gsi, stmt);
2144
      if (is_gimple_call (new_stmt1))
2145
        {
2146
          new_tmp1 = gimple_call_lhs (new_stmt1);
2147
          new_tmp2 = gimple_call_lhs (new_stmt2);
2148
        }
2149
      else
2150
        {
2151
          new_tmp1 = gimple_assign_lhs (new_stmt1);
2152
          new_tmp2 = gimple_assign_lhs (new_stmt2);
2153
        }
2154
 
2155
      /* Store the results for the next step.  */
2156
      VEC_quick_push (tree, vec_tmp, new_tmp1);
2157
      VEC_quick_push (tree, vec_tmp, new_tmp2);
2158
    }
2159
 
2160
  VEC_free (tree, heap, *vec_oprnds0);
2161
  *vec_oprnds0 = vec_tmp;
2162
}
2163
 
2164
 
2165
/* Check if STMT performs a conversion operation, that can be vectorized.
2166
   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2167
   stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2168
   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2169
 
2170
static bool
2171
vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2172
                         gimple *vec_stmt, slp_tree slp_node)
2173
{
2174
  tree vec_dest;
2175
  tree scalar_dest;
2176
  tree op0, op1 = NULL_TREE;
2177
  tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2178
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2179
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2180
  enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2181
  enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
2182
  tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2183
  tree new_temp;
2184
  tree def;
2185
  gimple def_stmt;
2186
  enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2187
  gimple new_stmt = NULL;
2188
  stmt_vec_info prev_stmt_info;
2189
  int nunits_in;
2190
  int nunits_out;
2191
  tree vectype_out, vectype_in;
2192
  int ncopies, i, j;
2193
  tree lhs_type, rhs_type;
2194
  enum { NARROW, NONE, WIDEN } modifier;
2195
  VEC (tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2196
  tree vop0;
2197
  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2198
  int multi_step_cvt = 0;
2199
  VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL;
2200
  tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2201
  int op_type;
2202
  enum machine_mode rhs_mode;
2203
  unsigned short fltsz;
2204
 
2205
  /* Is STMT a vectorizable conversion?   */
2206
 
2207
  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2208
    return false;
2209
 
2210
  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2211
    return false;
2212
 
2213
  if (!is_gimple_assign (stmt))
2214
    return false;
2215
 
2216
  if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2217
    return false;
2218
 
2219
  code = gimple_assign_rhs_code (stmt);
2220
  if (!CONVERT_EXPR_CODE_P (code)
2221
      && code != FIX_TRUNC_EXPR
2222
      && code != FLOAT_EXPR
2223
      && code != WIDEN_MULT_EXPR
2224
      && code != WIDEN_LSHIFT_EXPR)
2225
    return false;
2226
 
2227
  op_type = TREE_CODE_LENGTH (code);
2228
 
2229
  /* Check types of lhs and rhs.  */
2230
  scalar_dest = gimple_assign_lhs (stmt);
2231
  lhs_type = TREE_TYPE (scalar_dest);
2232
  vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2233
 
2234
  op0 = gimple_assign_rhs1 (stmt);
2235
  rhs_type = TREE_TYPE (op0);
2236
 
2237
  if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2238
      && !((INTEGRAL_TYPE_P (lhs_type)
2239
            && INTEGRAL_TYPE_P (rhs_type))
2240
           || (SCALAR_FLOAT_TYPE_P (lhs_type)
2241
               && SCALAR_FLOAT_TYPE_P (rhs_type))))
2242
    return false;
2243
 
2244
  if ((INTEGRAL_TYPE_P (lhs_type)
2245
       && (TYPE_PRECISION (lhs_type)
2246
           != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2247
      || (INTEGRAL_TYPE_P (rhs_type)
2248
          && (TYPE_PRECISION (rhs_type)
2249
              != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2250
    {
2251
      if (vect_print_dump_info (REPORT_DETAILS))
2252
        fprintf (vect_dump,
2253
                 "type conversion to/from bit-precision unsupported.");
2254
      return false;
2255
    }
2256
 
2257
  /* Check the operands of the operation.  */
2258
  if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
2259
                             &def_stmt, &def, &dt[0], &vectype_in))
2260
    {
2261
      if (vect_print_dump_info (REPORT_DETAILS))
2262
        fprintf (vect_dump, "use not simple.");
2263
      return false;
2264
    }
2265
  if (op_type == binary_op)
2266
    {
2267
      bool ok;
2268
 
2269
      op1 = gimple_assign_rhs2 (stmt);
2270
      gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2271
      /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2272
         OP1.  */
2273
      if (CONSTANT_CLASS_P (op0))
2274
        ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, NULL,
2275
                                   &def_stmt, &def, &dt[1], &vectype_in);
2276
      else
2277
        ok = vect_is_simple_use (op1, stmt, loop_vinfo, NULL, &def_stmt,
2278
                                 &def, &dt[1]);
2279
 
2280
      if (!ok)
2281
        {
2282
          if (vect_print_dump_info (REPORT_DETAILS))
2283
            fprintf (vect_dump, "use not simple.");
2284
          return false;
2285
        }
2286
    }
2287
 
2288
  /* If op0 is an external or constant defs use a vector type of
2289
     the same size as the output vector type.  */
2290
  if (!vectype_in)
2291
    vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2292
  if (vec_stmt)
2293
    gcc_assert (vectype_in);
2294
  if (!vectype_in)
2295
    {
2296
      if (vect_print_dump_info (REPORT_DETAILS))
2297
        {
2298
          fprintf (vect_dump, "no vectype for scalar type ");
2299
          print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
2300
        }
2301
 
2302
      return false;
2303
    }
2304
 
2305
  nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2306
  nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2307
  if (nunits_in < nunits_out)
2308
    modifier = NARROW;
2309
  else if (nunits_out == nunits_in)
2310
    modifier = NONE;
2311
  else
2312
    modifier = WIDEN;
2313
 
2314
  /* Multiple types in SLP are handled by creating the appropriate number of
2315
     vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
2316
     case of SLP.  */
2317
  if (slp_node || PURE_SLP_STMT (stmt_info))
2318
    ncopies = 1;
2319
  else if (modifier == NARROW)
2320
    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2321
  else
2322
    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2323
 
2324
  /* Sanity check: make sure that at least one copy of the vectorized stmt
2325
     needs to be generated.  */
2326
  gcc_assert (ncopies >= 1);
2327
 
2328
  /* Supportable by target?  */
2329
  switch (modifier)
2330
    {
2331
    case NONE:
2332
      if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2333
        return false;
2334
      if (supportable_convert_operation (code, vectype_out, vectype_in,
2335
                                         &decl1, &code1))
2336
        break;
2337
      /* FALLTHRU */
2338
    unsupported:
2339
      if (vect_print_dump_info (REPORT_DETAILS))
2340
        fprintf (vect_dump, "conversion not supported by target.");
2341
      return false;
2342
 
2343
    case WIDEN:
2344
      if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
2345
                                          &decl1, &decl2, &code1, &code2,
2346
                                          &multi_step_cvt, &interm_types))
2347
        {
2348
          /* Binary widening operation can only be supported directly by the
2349
             architecture.  */
2350
          gcc_assert (!(multi_step_cvt && op_type == binary_op));
2351
          break;
2352
        }
2353
 
2354
      if (code != FLOAT_EXPR
2355
          || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2356
              <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2357
        goto unsupported;
2358
 
2359
      rhs_mode = TYPE_MODE (rhs_type);
2360
      fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2361
      for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2362
           rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2363
           rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2364
        {
2365
          cvt_type
2366
            = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2367
          cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2368
          if (cvt_type == NULL_TREE)
2369
            goto unsupported;
2370
 
2371
          if (GET_MODE_SIZE (rhs_mode) == fltsz)
2372
            {
2373
              if (!supportable_convert_operation (code, vectype_out,
2374
                                                  cvt_type, &decl1, &codecvt1))
2375
                goto unsupported;
2376
            }
2377
          else if (!supportable_widening_operation (code, stmt, vectype_out,
2378
                                                    cvt_type, &decl1, &decl2,
2379
                                                    &codecvt1, &codecvt2,
2380
                                                    &multi_step_cvt,
2381
                                                    &interm_types))
2382
            continue;
2383
          else
2384
            gcc_assert (multi_step_cvt == 0);
2385
 
2386
          if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
2387
                                              vectype_in, NULL, NULL, &code1,
2388
                                              &code2, &multi_step_cvt,
2389
                                              &interm_types))
2390
            break;
2391
        }
2392
 
2393
      if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2394
        goto unsupported;
2395
 
2396
      if (GET_MODE_SIZE (rhs_mode) == fltsz)
2397
        codecvt2 = ERROR_MARK;
2398
      else
2399
        {
2400
          multi_step_cvt++;
2401
          VEC_safe_push (tree, heap, interm_types, cvt_type);
2402
          cvt_type = NULL_TREE;
2403
        }
2404
      break;
2405
 
2406
    case NARROW:
2407
      gcc_assert (op_type == unary_op);
2408
      if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2409
                                           &code1, &multi_step_cvt,
2410
                                           &interm_types))
2411
        break;
2412
 
2413
      if (code != FIX_TRUNC_EXPR
2414
          || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2415
              >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2416
        goto unsupported;
2417
 
2418
      rhs_mode = TYPE_MODE (rhs_type);
2419
      cvt_type
2420
        = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2421
      cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2422
      if (cvt_type == NULL_TREE)
2423
        goto unsupported;
2424
      if (!supportable_convert_operation (code, cvt_type, vectype_in,
2425
                                          &decl1, &codecvt1))
2426
        goto unsupported;
2427
      if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2428
                                           &code1, &multi_step_cvt,
2429
                                           &interm_types))
2430
        break;
2431
      goto unsupported;
2432
 
2433
    default:
2434
      gcc_unreachable ();
2435
    }
2436
 
2437
  if (!vec_stmt)                /* transformation not required.  */
2438
    {
2439
      if (vect_print_dump_info (REPORT_DETAILS))
2440
        fprintf (vect_dump, "=== vectorizable_conversion ===");
2441
      if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
2442
        {
2443
          STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2444
          vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2445
        }
2446
      else if (modifier == NARROW)
2447
        {
2448
          STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2449
          vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2450
        }
2451
      else
2452
        {
2453
          STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2454
          vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2455
        }
2456
      VEC_free (tree, heap, interm_types);
2457
      return true;
2458
    }
2459
 
2460
  /** Transform.  **/
2461
  if (vect_print_dump_info (REPORT_DETAILS))
2462
    fprintf (vect_dump, "transform conversion. ncopies = %d.", ncopies);
2463
 
2464
  if (op_type == binary_op)
2465
    {
2466
      if (CONSTANT_CLASS_P (op0))
2467
        op0 = fold_convert (TREE_TYPE (op1), op0);
2468
      else if (CONSTANT_CLASS_P (op1))
2469
        op1 = fold_convert (TREE_TYPE (op0), op1);
2470
    }
2471
 
2472
  /* In case of multi-step conversion, we first generate conversion operations
2473
     to the intermediate types, and then from that types to the final one.
2474
     We create vector destinations for the intermediate type (TYPES) received
2475
     from supportable_*_operation, and store them in the correct order
2476
     for future use in vect_create_vectorized_*_stmts ().  */
2477
  vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2478
  vec_dest = vect_create_destination_var (scalar_dest,
2479
                                          (cvt_type && modifier == WIDEN)
2480
                                          ? cvt_type : vectype_out);
2481
  VEC_quick_push (tree, vec_dsts, vec_dest);
2482
 
2483
  if (multi_step_cvt)
2484
    {
2485
      for (i = VEC_length (tree, interm_types) - 1;
2486
           VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2487
        {
2488
          vec_dest = vect_create_destination_var (scalar_dest,
2489
                                                  intermediate_type);
2490
          VEC_quick_push (tree, vec_dsts, vec_dest);
2491
        }
2492
    }
2493
 
2494
  if (cvt_type)
2495
    vec_dest = vect_create_destination_var (scalar_dest,
2496
                                            modifier == WIDEN
2497
                                            ? vectype_out : cvt_type);
2498
 
2499
  if (!slp_node)
2500
    {
2501
      if (modifier == NONE)
2502
        vec_oprnds0 = VEC_alloc (tree, heap, 1);
2503
      else if (modifier == WIDEN)
2504
        {
2505
          vec_oprnds0 = VEC_alloc (tree, heap,
2506
                                   (multi_step_cvt
2507
                                    ? vect_pow2 (multi_step_cvt) : 1));
2508
          if (op_type == binary_op)
2509
            vec_oprnds1 = VEC_alloc (tree, heap, 1);
2510
        }
2511
      else
2512
        vec_oprnds0 = VEC_alloc (tree, heap,
2513
                                 2 * (multi_step_cvt
2514
                                      ? vect_pow2 (multi_step_cvt) : 1));
2515
    }
2516
  else if (code == WIDEN_LSHIFT_EXPR)
2517
    vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2518
 
2519
  last_oprnd = op0;
2520
  prev_stmt_info = NULL;
2521
  switch (modifier)
2522
    {
2523
    case NONE:
2524
      for (j = 0; j < ncopies; j++)
2525
        {
2526
          if (j == 0)
2527
            vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2528
                               -1);
2529
          else
2530
            vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2531
 
2532
          FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2533
            {
2534
              /* Arguments are ready, create the new vector stmt.  */
2535
              if (code1 == CALL_EXPR)
2536
                {
2537
                  new_stmt = gimple_build_call (decl1, 1, vop0);
2538
                  new_temp = make_ssa_name (vec_dest, new_stmt);
2539
                  gimple_call_set_lhs (new_stmt, new_temp);
2540
                }
2541
              else
2542
                {
2543
                  gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2544
                  new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2545
                                                           vop0, NULL);
2546
                  new_temp = make_ssa_name (vec_dest, new_stmt);
2547
                  gimple_assign_set_lhs (new_stmt, new_temp);
2548
                }
2549
 
2550
              vect_finish_stmt_generation (stmt, new_stmt, gsi);
2551
              if (slp_node)
2552
                VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2553
                                new_stmt);
2554
            }
2555
 
2556
          if (j == 0)
2557
            STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2558
          else
2559
            STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2560
          prev_stmt_info = vinfo_for_stmt (new_stmt);
2561
        }
2562
      break;
2563
 
2564
    case WIDEN:
2565
      /* In case the vectorization factor (VF) is bigger than the number
2566
         of elements that we can fit in a vectype (nunits), we have to
2567
         generate more than one vector stmt - i.e - we need to "unroll"
2568
         the vector stmt by a factor VF/nunits.  */
2569
      for (j = 0; j < ncopies; j++)
2570
        {
2571
          /* Handle uses.  */
2572
          if (j == 0)
2573
            {
2574
              if (slp_node)
2575
                {
2576
                  if (code == WIDEN_LSHIFT_EXPR)
2577
                    {
2578
                      unsigned int k;
2579
 
2580
                      vec_oprnd1 = op1;
2581
                      /* Store vec_oprnd1 for every vector stmt to be created
2582
                         for SLP_NODE.  We check during the analysis that all
2583
                         the shift arguments are the same.  */
2584
                      for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2585
                        VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2586
 
2587
                      vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2588
                                         slp_node, -1);
2589
                    }
2590
                  else
2591
                    vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2592
                                       &vec_oprnds1, slp_node, -1);
2593
                }
2594
              else
2595
                {
2596
                  vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2597
                  VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2598
                  if (op_type == binary_op)
2599
                    {
2600
                      if (code == WIDEN_LSHIFT_EXPR)
2601
                        vec_oprnd1 = op1;
2602
                      else
2603
                        vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2604
                                                                   NULL);
2605
                      VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2606
                    }
2607
                }
2608
            }
2609
          else
2610
            {
2611
              vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2612
              VEC_truncate (tree, vec_oprnds0, 0);
2613
              VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2614
              if (op_type == binary_op)
2615
                {
2616
                  if (code == WIDEN_LSHIFT_EXPR)
2617
                    vec_oprnd1 = op1;
2618
                  else
2619
                    vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2620
                                                                 vec_oprnd1);
2621
                  VEC_truncate (tree, vec_oprnds1, 0);
2622
                  VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2623
                }
2624
            }
2625
 
2626
          /* Arguments are ready.  Create the new vector stmts.  */
2627
          for (i = multi_step_cvt; i >= 0; i--)
2628
            {
2629
              tree this_dest = VEC_index (tree, vec_dsts, i);
2630
              enum tree_code c1 = code1, c2 = code2;
2631
              if (i == 0 && codecvt2 != ERROR_MARK)
2632
                {
2633
                  c1 = codecvt1;
2634
                  c2 = codecvt2;
2635
                }
2636
              vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2637
                                                      &vec_oprnds1,
2638
                                                      stmt, this_dest, gsi,
2639
                                                      c1, c2, decl1, decl2,
2640
                                                      op_type);
2641
            }
2642
 
2643
          FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2644
            {
2645
              if (cvt_type)
2646
                {
2647
                  if (codecvt1 == CALL_EXPR)
2648
                    {
2649
                      new_stmt = gimple_build_call (decl1, 1, vop0);
2650
                      new_temp = make_ssa_name (vec_dest, new_stmt);
2651
                      gimple_call_set_lhs (new_stmt, new_temp);
2652
                    }
2653
                  else
2654
                    {
2655
                      gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2656
                      new_temp = make_ssa_name (vec_dest, NULL);
2657
                      new_stmt = gimple_build_assign_with_ops (codecvt1,
2658
                                                               new_temp,
2659
                                                               vop0, NULL);
2660
                    }
2661
 
2662
                  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2663
                }
2664
              else
2665
                new_stmt = SSA_NAME_DEF_STMT (vop0);
2666
 
2667
              if (slp_node)
2668
                VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2669
                                new_stmt);
2670
              else
2671
                {
2672
                  if (!prev_stmt_info)
2673
                    STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2674
                  else
2675
                    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2676
                  prev_stmt_info = vinfo_for_stmt (new_stmt);
2677
                }
2678
            }
2679
        }
2680
 
2681
      *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2682
      break;
2683
 
2684
    case NARROW:
2685
      /* In case the vectorization factor (VF) is bigger than the number
2686
         of elements that we can fit in a vectype (nunits), we have to
2687
         generate more than one vector stmt - i.e - we need to "unroll"
2688
         the vector stmt by a factor VF/nunits.  */
2689
      for (j = 0; j < ncopies; j++)
2690
        {
2691
          /* Handle uses.  */
2692
          if (slp_node)
2693
            vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2694
                               slp_node, -1);
2695
          else
2696
            {
2697
              VEC_truncate (tree, vec_oprnds0, 0);
2698
              vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2699
                                        vect_pow2 (multi_step_cvt) - 1);
2700
            }
2701
 
2702
          /* Arguments are ready.  Create the new vector stmts.  */
2703
          if (cvt_type)
2704
            FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2705
              {
2706
                if (codecvt1 == CALL_EXPR)
2707
                  {
2708
                    new_stmt = gimple_build_call (decl1, 1, vop0);
2709
                    new_temp = make_ssa_name (vec_dest, new_stmt);
2710
                    gimple_call_set_lhs (new_stmt, new_temp);
2711
                  }
2712
                else
2713
                  {
2714
                    gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2715
                    new_temp = make_ssa_name (vec_dest, NULL);
2716
                    new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2717
                                                             vop0, NULL);
2718
                  }
2719
 
2720
                vect_finish_stmt_generation (stmt, new_stmt, gsi);
2721
                VEC_replace (tree, vec_oprnds0, i, new_temp);
2722
              }
2723
 
2724
          vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2725
                                                 stmt, vec_dsts, gsi,
2726
                                                 slp_node, code1,
2727
                                                 &prev_stmt_info);
2728
        }
2729
 
2730
      *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2731
      break;
2732
    }
2733
 
2734
  VEC_free (tree, heap, vec_oprnds0);
2735
  VEC_free (tree, heap, vec_oprnds1);
2736
  VEC_free (tree, heap, vec_dsts);
2737
  VEC_free (tree, heap, interm_types);
2738
 
2739
  return true;
2740
}
2741
 
2742
 
2743
/* Function vectorizable_assignment.
2744
 
2745
   Check if STMT performs an assignment (copy) that can be vectorized.
2746
   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2747
   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2748
   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2749
 
2750
static bool
2751
vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2752
                         gimple *vec_stmt, slp_tree slp_node)
2753
{
2754
  tree vec_dest;
2755
  tree scalar_dest;
2756
  tree op;
2757
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2758
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2759
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2760
  tree new_temp;
2761
  tree def;
2762
  gimple def_stmt;
2763
  enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2764
  unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2765
  int ncopies;
2766
  int i, j;
2767
  VEC(tree,heap) *vec_oprnds = NULL;
2768
  tree vop;
2769
  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2770
  gimple new_stmt = NULL;
2771
  stmt_vec_info prev_stmt_info = NULL;
2772
  enum tree_code code;
2773
  tree vectype_in;
2774
 
2775
  /* Multiple types in SLP are handled by creating the appropriate number of
2776
     vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2777
     case of SLP.  */
2778
  if (slp_node || PURE_SLP_STMT (stmt_info))
2779
    ncopies = 1;
2780
  else
2781
    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2782
 
2783
  gcc_assert (ncopies >= 1);
2784
 
2785
  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2786
    return false;
2787
 
2788
  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2789
    return false;
2790
 
2791
  /* Is vectorizable assignment?  */
2792
  if (!is_gimple_assign (stmt))
2793
    return false;
2794
 
2795
  scalar_dest = gimple_assign_lhs (stmt);
2796
  if (TREE_CODE (scalar_dest) != SSA_NAME)
2797
    return false;
2798
 
2799
  code = gimple_assign_rhs_code (stmt);
2800
  if (gimple_assign_single_p (stmt)
2801
      || code == PAREN_EXPR
2802
      || CONVERT_EXPR_CODE_P (code))
2803
    op = gimple_assign_rhs1 (stmt);
2804
  else
2805
    return false;
2806
 
2807
  if (code == VIEW_CONVERT_EXPR)
2808
    op = TREE_OPERAND (op, 0);
2809
 
2810
  if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2811
                             &def_stmt, &def, &dt[0], &vectype_in))
2812
    {
2813
      if (vect_print_dump_info (REPORT_DETAILS))
2814
        fprintf (vect_dump, "use not simple.");
2815
      return false;
2816
    }
2817
 
2818
  /* We can handle NOP_EXPR conversions that do not change the number
2819
     of elements or the vector size.  */
2820
  if ((CONVERT_EXPR_CODE_P (code)
2821
       || code == VIEW_CONVERT_EXPR)
2822
      && (!vectype_in
2823
          || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2824
          || (GET_MODE_SIZE (TYPE_MODE (vectype))
2825
              != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2826
    return false;
2827
 
2828
  /* We do not handle bit-precision changes.  */
2829
  if ((CONVERT_EXPR_CODE_P (code)
2830
       || code == VIEW_CONVERT_EXPR)
2831
      && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2832
      && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2833
           != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2834
          || ((TYPE_PRECISION (TREE_TYPE (op))
2835
               != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2836
      /* But a conversion that does not change the bit-pattern is ok.  */
2837
      && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2838
            > TYPE_PRECISION (TREE_TYPE (op)))
2839
           && TYPE_UNSIGNED (TREE_TYPE (op))))
2840
    {
2841
      if (vect_print_dump_info (REPORT_DETAILS))
2842
        fprintf (vect_dump, "type conversion to/from bit-precision "
2843
                 "unsupported.");
2844
      return false;
2845
    }
2846
 
2847
  if (!vec_stmt) /* transformation not required.  */
2848
    {
2849
      STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2850
      if (vect_print_dump_info (REPORT_DETAILS))
2851
        fprintf (vect_dump, "=== vectorizable_assignment ===");
2852
      vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2853
      return true;
2854
    }
2855
 
2856
  /** Transform.  **/
2857
  if (vect_print_dump_info (REPORT_DETAILS))
2858
    fprintf (vect_dump, "transform assignment.");
2859
 
2860
  /* Handle def.  */
2861
  vec_dest = vect_create_destination_var (scalar_dest, vectype);
2862
 
2863
  /* Handle use.  */
2864
  for (j = 0; j < ncopies; j++)
2865
    {
2866
      /* Handle uses.  */
2867
      if (j == 0)
2868
        vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
2869
      else
2870
        vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2871
 
2872
      /* Arguments are ready. create the new vector stmt.  */
2873
      FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
2874
       {
2875
         if (CONVERT_EXPR_CODE_P (code)
2876
             || code == VIEW_CONVERT_EXPR)
2877
           vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2878
         new_stmt = gimple_build_assign (vec_dest, vop);
2879
         new_temp = make_ssa_name (vec_dest, new_stmt);
2880
         gimple_assign_set_lhs (new_stmt, new_temp);
2881
         vect_finish_stmt_generation (stmt, new_stmt, gsi);
2882
         if (slp_node)
2883
           VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2884
       }
2885
 
2886
      if (slp_node)
2887
        continue;
2888
 
2889
      if (j == 0)
2890
        STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2891
      else
2892
        STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2893
 
2894
      prev_stmt_info = vinfo_for_stmt (new_stmt);
2895
    }
2896
 
2897
  VEC_free (tree, heap, vec_oprnds);
2898
  return true;
2899
}
2900
 
2901
 
2902
/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2903
   either as shift by a scalar or by a vector.  */
2904
 
2905
bool
2906
vect_supportable_shift (enum tree_code code, tree scalar_type)
2907
{
2908
 
2909
  enum machine_mode vec_mode;
2910
  optab optab;
2911
  int icode;
2912
  tree vectype;
2913
 
2914
  vectype = get_vectype_for_scalar_type (scalar_type);
2915
  if (!vectype)
2916
    return false;
2917
 
2918
  optab = optab_for_tree_code (code, vectype, optab_scalar);
2919
  if (!optab
2920
      || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2921
    {
2922
      optab = optab_for_tree_code (code, vectype, optab_vector);
2923
      if (!optab
2924
          || (optab_handler (optab, TYPE_MODE (vectype))
2925
                      == CODE_FOR_nothing))
2926
        return false;
2927
    }
2928
 
2929
  vec_mode = TYPE_MODE (vectype);
2930
  icode = (int) optab_handler (optab, vec_mode);
2931
  if (icode == CODE_FOR_nothing)
2932
    return false;
2933
 
2934
  return true;
2935
}
2936
 
2937
 
2938
/* Function vectorizable_shift.
2939
 
2940
   Check if STMT performs a shift operation that can be vectorized.
2941
   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2942
   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2943
   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2944
 
2945
static bool
2946
vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
2947
                    gimple *vec_stmt, slp_tree slp_node)
2948
{
2949
  tree vec_dest;
2950
  tree scalar_dest;
2951
  tree op0, op1 = NULL;
2952
  tree vec_oprnd1 = NULL_TREE;
2953
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2954
  tree vectype;
2955
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2956
  enum tree_code code;
2957
  enum machine_mode vec_mode;
2958
  tree new_temp;
2959
  optab optab;
2960
  int icode;
2961
  enum machine_mode optab_op2_mode;
2962
  tree def;
2963
  gimple def_stmt;
2964
  enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2965
  gimple new_stmt = NULL;
2966
  stmt_vec_info prev_stmt_info;
2967
  int nunits_in;
2968
  int nunits_out;
2969
  tree vectype_out;
2970
  tree op1_vectype;
2971
  int ncopies;
2972
  int j, i;
2973
  VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2974
  tree vop0, vop1;
2975
  unsigned int k;
2976
  bool scalar_shift_arg = true;
2977
  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2978
  int vf;
2979
 
2980
  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2981
    return false;
2982
 
2983
  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2984
    return false;
2985
 
2986
  /* Is STMT a vectorizable binary/unary operation?   */
2987
  if (!is_gimple_assign (stmt))
2988
    return false;
2989
 
2990
  if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2991
    return false;
2992
 
2993
  code = gimple_assign_rhs_code (stmt);
2994
 
2995
  if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2996
      || code == RROTATE_EXPR))
2997
    return false;
2998
 
2999
  scalar_dest = gimple_assign_lhs (stmt);
3000
  vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3001
  if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
3002
      != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3003
    {
3004
      if (vect_print_dump_info (REPORT_DETAILS))
3005
        fprintf (vect_dump, "bit-precision shifts not supported.");
3006
      return false;
3007
    }
3008
 
3009
  op0 = gimple_assign_rhs1 (stmt);
3010
  if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3011
                             &def_stmt, &def, &dt[0], &vectype))
3012
    {
3013
      if (vect_print_dump_info (REPORT_DETAILS))
3014
        fprintf (vect_dump, "use not simple.");
3015
      return false;
3016
    }
3017
  /* If op0 is an external or constant def use a vector type with
3018
     the same size as the output vector type.  */
3019
  if (!vectype)
3020
    vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3021
  if (vec_stmt)
3022
    gcc_assert (vectype);
3023
  if (!vectype)
3024
    {
3025
      if (vect_print_dump_info (REPORT_DETAILS))
3026
        {
3027
          fprintf (vect_dump, "no vectype for scalar type ");
3028
          print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3029
        }
3030
 
3031
      return false;
3032
    }
3033
 
3034
  nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3035
  nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3036
  if (nunits_out != nunits_in)
3037
    return false;
3038
 
3039
  op1 = gimple_assign_rhs2 (stmt);
3040
  if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3041
                             &def, &dt[1], &op1_vectype))
3042
    {
3043
      if (vect_print_dump_info (REPORT_DETAILS))
3044
        fprintf (vect_dump, "use not simple.");
3045
      return false;
3046
    }
3047
 
3048
  if (loop_vinfo)
3049
    vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3050
  else
3051
    vf = 1;
3052
 
3053
  /* Multiple types in SLP are handled by creating the appropriate number of
3054
     vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
3055
     case of SLP.  */
3056
  if (slp_node || PURE_SLP_STMT (stmt_info))
3057
    ncopies = 1;
3058
  else
3059
    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3060
 
3061
  gcc_assert (ncopies >= 1);
3062
 
3063
  /* Determine whether the shift amount is a vector, or scalar.  If the
3064
     shift/rotate amount is a vector, use the vector/vector shift optabs.  */
3065
 
3066
  if (dt[1] == vect_internal_def && !slp_node)
3067
    scalar_shift_arg = false;
3068
  else if (dt[1] == vect_constant_def
3069
           || dt[1] == vect_external_def
3070
           || dt[1] == vect_internal_def)
3071
    {
3072
      /* In SLP, need to check whether the shift count is the same,
3073
         in loops if it is a constant or invariant, it is always
3074
         a scalar shift.  */
3075
      if (slp_node)
3076
        {
3077
          VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3078
          gimple slpstmt;
3079
 
3080
          FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
3081
            if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3082
              scalar_shift_arg = false;
3083
        }
3084
    }
3085
  else
3086
    {
3087
      if (vect_print_dump_info (REPORT_DETAILS))
3088
        fprintf (vect_dump, "operand mode requires invariant argument.");
3089
      return false;
3090
    }
3091
 
3092
  /* Vector shifted by vector.  */
3093
  if (!scalar_shift_arg)
3094
    {
3095
      optab = optab_for_tree_code (code, vectype, optab_vector);
3096
      if (vect_print_dump_info (REPORT_DETAILS))
3097
        fprintf (vect_dump, "vector/vector shift/rotate found.");
3098
      if (!op1_vectype)
3099
        op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3100
      if (op1_vectype == NULL_TREE
3101
          || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
3102
        {
3103
          if (vect_print_dump_info (REPORT_DETAILS))
3104
            fprintf (vect_dump, "unusable type for last operand in"
3105
                                " vector/vector shift/rotate.");
3106
          return false;
3107
        }
3108
    }
3109
  /* See if the machine has a vector shifted by scalar insn and if not
3110
     then see if it has a vector shifted by vector insn.  */
3111
  else
3112
    {
3113
      optab = optab_for_tree_code (code, vectype, optab_scalar);
3114
      if (optab
3115
          && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3116
        {
3117
          if (vect_print_dump_info (REPORT_DETAILS))
3118
            fprintf (vect_dump, "vector/scalar shift/rotate found.");
3119
        }
3120
      else
3121
        {
3122
          optab = optab_for_tree_code (code, vectype, optab_vector);
3123
          if (optab
3124
               && (optab_handler (optab, TYPE_MODE (vectype))
3125
                      != CODE_FOR_nothing))
3126
            {
3127
              scalar_shift_arg = false;
3128
 
3129
              if (vect_print_dump_info (REPORT_DETAILS))
3130
                fprintf (vect_dump, "vector/vector shift/rotate found.");
3131
 
3132
              /* Unlike the other binary operators, shifts/rotates have
3133
                 the rhs being int, instead of the same type as the lhs,
3134
                 so make sure the scalar is the right type if we are
3135
                 dealing with vectors of long long/long/short/char.  */
3136
              if (dt[1] == vect_constant_def)
3137
                op1 = fold_convert (TREE_TYPE (vectype), op1);
3138
              else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3139
                                                   TREE_TYPE (op1)))
3140
                {
3141
                  if (slp_node
3142
                      && TYPE_MODE (TREE_TYPE (vectype))
3143
                         != TYPE_MODE (TREE_TYPE (op1)))
3144
                    {
3145
                      if (vect_print_dump_info (REPORT_DETAILS))
3146
                      fprintf (vect_dump, "unusable type for last operand in"
3147
                                          " vector/vector shift/rotate.");
3148
                        return false;
3149
                    }
3150
                  if (vec_stmt && !slp_node)
3151
                    {
3152
                      op1 = fold_convert (TREE_TYPE (vectype), op1);
3153
                      op1 = vect_init_vector (stmt, op1,
3154
                                              TREE_TYPE (vectype), NULL);
3155
                    }
3156
                }
3157
            }
3158
        }
3159
    }
3160
 
3161
  /* Supportable by target?  */
3162
  if (!optab)
3163
    {
3164
      if (vect_print_dump_info (REPORT_DETAILS))
3165
        fprintf (vect_dump, "no optab.");
3166
      return false;
3167
    }
3168
  vec_mode = TYPE_MODE (vectype);
3169
  icode = (int) optab_handler (optab, vec_mode);
3170
  if (icode == CODE_FOR_nothing)
3171
    {
3172
      if (vect_print_dump_info (REPORT_DETAILS))
3173
        fprintf (vect_dump, "op not supported by target.");
3174
      /* Check only during analysis.  */
3175
      if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3176
          || (vf < vect_min_worthwhile_factor (code)
3177
              && !vec_stmt))
3178
        return false;
3179
      if (vect_print_dump_info (REPORT_DETAILS))
3180
        fprintf (vect_dump, "proceeding using word mode.");
3181
    }
3182
 
3183
  /* Worthwhile without SIMD support?  Check only during analysis.  */
3184
  if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3185
      && vf < vect_min_worthwhile_factor (code)
3186
      && !vec_stmt)
3187
    {
3188
      if (vect_print_dump_info (REPORT_DETAILS))
3189
        fprintf (vect_dump, "not worthwhile without SIMD support.");
3190
      return false;
3191
    }
3192
 
3193
  if (!vec_stmt) /* transformation not required.  */
3194
    {
3195
      STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
3196
      if (vect_print_dump_info (REPORT_DETAILS))
3197
        fprintf (vect_dump, "=== vectorizable_shift ===");
3198
      vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3199
      return true;
3200
    }
3201
 
3202
  /** Transform.  **/
3203
 
3204
  if (vect_print_dump_info (REPORT_DETAILS))
3205
    fprintf (vect_dump, "transform binary/unary operation.");
3206
 
3207
  /* Handle def.  */
3208
  vec_dest = vect_create_destination_var (scalar_dest, vectype);
3209
 
3210
  /* Allocate VECs for vector operands.  In case of SLP, vector operands are
3211
     created in the previous stages of the recursion, so no allocation is
3212
     needed, except for the case of shift with scalar shift argument.  In that
3213
     case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3214
     be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3215
     In case of loop-based vectorization we allocate VECs of size 1.  We
3216
     allocate VEC_OPRNDS1 only in case of binary operation.  */
3217
  if (!slp_node)
3218
    {
3219
      vec_oprnds0 = VEC_alloc (tree, heap, 1);
3220
      vec_oprnds1 = VEC_alloc (tree, heap, 1);
3221
    }
3222
  else if (scalar_shift_arg)
3223
    vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
3224
 
3225
  prev_stmt_info = NULL;
3226
  for (j = 0; j < ncopies; j++)
3227
    {
3228
      /* Handle uses.  */
3229
      if (j == 0)
3230
        {
3231
          if (scalar_shift_arg)
3232
            {
3233
              /* Vector shl and shr insn patterns can be defined with scalar
3234
                 operand 2 (shift operand).  In this case, use constant or loop
3235
                 invariant op1 directly, without extending it to vector mode
3236
                 first.  */
3237
              optab_op2_mode = insn_data[icode].operand[2].mode;
3238
              if (!VECTOR_MODE_P (optab_op2_mode))
3239
                {
3240
                  if (vect_print_dump_info (REPORT_DETAILS))
3241
                    fprintf (vect_dump, "operand 1 using scalar mode.");
3242
                  vec_oprnd1 = op1;
3243
                  VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3244
                  if (slp_node)
3245
                    {
3246
                      /* Store vec_oprnd1 for every vector stmt to be created
3247
                         for SLP_NODE.  We check during the analysis that all
3248
                         the shift arguments are the same.
3249
                         TODO: Allow different constants for different vector
3250
                         stmts generated for an SLP instance.  */
3251
                      for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3252
                        VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3253
                    }
3254
                }
3255
            }
3256
 
3257
          /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3258
             (a special case for certain kind of vector shifts); otherwise,
3259
             operand 1 should be of a vector type (the usual case).  */
3260
          if (vec_oprnd1)
3261
            vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3262
                               slp_node, -1);
3263
          else
3264
            vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3265
                               slp_node, -1);
3266
        }
3267
      else
3268
        vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3269
 
3270
      /* Arguments are ready.  Create the new vector stmt.  */
3271
      FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3272
        {
3273
          vop1 = VEC_index (tree, vec_oprnds1, i);
3274
          new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3275
          new_temp = make_ssa_name (vec_dest, new_stmt);
3276
          gimple_assign_set_lhs (new_stmt, new_temp);
3277
          vect_finish_stmt_generation (stmt, new_stmt, gsi);
3278
          if (slp_node)
3279
            VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3280
        }
3281
 
3282
      if (slp_node)
3283
        continue;
3284
 
3285
      if (j == 0)
3286
        STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3287
      else
3288
        STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3289
      prev_stmt_info = vinfo_for_stmt (new_stmt);
3290
    }
3291
 
3292
  VEC_free (tree, heap, vec_oprnds0);
3293
  VEC_free (tree, heap, vec_oprnds1);
3294
 
3295
  return true;
3296
}
3297
 
3298
 
3299
/* Function vectorizable_operation.
3300
 
3301
   Check if STMT performs a binary, unary or ternary operation that can
3302
   be vectorized.
3303
   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3304
   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3305
   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
3306
 
3307
static bool
3308
vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3309
                        gimple *vec_stmt, slp_tree slp_node)
3310
{
3311
  tree vec_dest;
3312
  tree scalar_dest;
3313
  tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
3314
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3315
  tree vectype;
3316
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3317
  enum tree_code code;
3318
  enum machine_mode vec_mode;
3319
  tree new_temp;
3320
  int op_type;
3321
  optab optab;
3322
  int icode;
3323
  tree def;
3324
  gimple def_stmt;
3325
  enum vect_def_type dt[3]
3326
    = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3327
  gimple new_stmt = NULL;
3328
  stmt_vec_info prev_stmt_info;
3329
  int nunits_in;
3330
  int nunits_out;
3331
  tree vectype_out;
3332
  int ncopies;
3333
  int j, i;
3334
  VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
3335
  tree vop0, vop1, vop2;
3336
  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3337
  int vf;
3338
 
3339
  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3340
    return false;
3341
 
3342
  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3343
    return false;
3344
 
3345
  /* Is STMT a vectorizable binary/unary operation?   */
3346
  if (!is_gimple_assign (stmt))
3347
    return false;
3348
 
3349
  if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3350
    return false;
3351
 
3352
  code = gimple_assign_rhs_code (stmt);
3353
 
3354
  /* For pointer addition, we should use the normal plus for
3355
     the vector addition.  */
3356
  if (code == POINTER_PLUS_EXPR)
3357
    code = PLUS_EXPR;
3358
 
3359
  /* Support only unary or binary operations.  */
3360
  op_type = TREE_CODE_LENGTH (code);
3361
  if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
3362
    {
3363
      if (vect_print_dump_info (REPORT_DETAILS))
3364
        fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
3365
                 op_type);
3366
      return false;
3367
    }
3368
 
3369
  scalar_dest = gimple_assign_lhs (stmt);
3370
  vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3371
 
3372
  /* Most operations cannot handle bit-precision types without extra
3373
     truncations.  */
3374
  if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3375
       != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3376
      /* Exception are bitwise binary operations.  */
3377
      && code != BIT_IOR_EXPR
3378
      && code != BIT_XOR_EXPR
3379
      && code != BIT_AND_EXPR)
3380
    {
3381
      if (vect_print_dump_info (REPORT_DETAILS))
3382
        fprintf (vect_dump, "bit-precision arithmetic not supported.");
3383
      return false;
3384
    }
3385
 
3386
  op0 = gimple_assign_rhs1 (stmt);
3387
  if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3388
                             &def_stmt, &def, &dt[0], &vectype))
3389
    {
3390
      if (vect_print_dump_info (REPORT_DETAILS))
3391
        fprintf (vect_dump, "use not simple.");
3392
      return false;
3393
    }
3394
  /* If op0 is an external or constant def use a vector type with
3395
     the same size as the output vector type.  */
3396
  if (!vectype)
3397
    vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3398
  if (vec_stmt)
3399
    gcc_assert (vectype);
3400
  if (!vectype)
3401
    {
3402
      if (vect_print_dump_info (REPORT_DETAILS))
3403
        {
3404
          fprintf (vect_dump, "no vectype for scalar type ");
3405
          print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3406
        }
3407
 
3408
      return false;
3409
    }
3410
 
3411
  nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3412
  nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3413
  if (nunits_out != nunits_in)
3414
    return false;
3415
 
3416
  if (op_type == binary_op || op_type == ternary_op)
3417
    {
3418
      op1 = gimple_assign_rhs2 (stmt);
3419
      if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3420
                               &def, &dt[1]))
3421
        {
3422
          if (vect_print_dump_info (REPORT_DETAILS))
3423
            fprintf (vect_dump, "use not simple.");
3424
          return false;
3425
        }
3426
    }
3427
  if (op_type == ternary_op)
3428
    {
3429
      op2 = gimple_assign_rhs3 (stmt);
3430
      if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3431
                               &def, &dt[2]))
3432
        {
3433
          if (vect_print_dump_info (REPORT_DETAILS))
3434
            fprintf (vect_dump, "use not simple.");
3435
          return false;
3436
        }
3437
    }
3438
 
3439
  if (loop_vinfo)
3440
    vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3441
  else
3442
    vf = 1;
3443
 
3444
  /* Multiple types in SLP are handled by creating the appropriate number of
3445
     vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
3446
     case of SLP.  */
3447
  if (slp_node || PURE_SLP_STMT (stmt_info))
3448
    ncopies = 1;
3449
  else
3450
    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3451
 
3452
  gcc_assert (ncopies >= 1);
3453
 
3454
  /* Shifts are handled in vectorizable_shift ().  */
3455
  if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3456
      || code == RROTATE_EXPR)
3457
   return false;
3458
 
3459
  optab = optab_for_tree_code (code, vectype, optab_default);
3460
 
3461
  /* Supportable by target?  */
3462
  if (!optab)
3463
    {
3464
      if (vect_print_dump_info (REPORT_DETAILS))
3465
        fprintf (vect_dump, "no optab.");
3466
      return false;
3467
    }
3468
  vec_mode = TYPE_MODE (vectype);
3469
  icode = (int) optab_handler (optab, vec_mode);
3470
  if (icode == CODE_FOR_nothing)
3471
    {
3472
      if (vect_print_dump_info (REPORT_DETAILS))
3473
        fprintf (vect_dump, "op not supported by target.");
3474
      /* Check only during analysis.  */
3475
      if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3476
          || (vf < vect_min_worthwhile_factor (code)
3477
              && !vec_stmt))
3478
        return false;
3479
      if (vect_print_dump_info (REPORT_DETAILS))
3480
        fprintf (vect_dump, "proceeding using word mode.");
3481
    }
3482
 
3483
  /* Worthwhile without SIMD support?  Check only during analysis.  */
3484
  if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3485
      && vf < vect_min_worthwhile_factor (code)
3486
      && !vec_stmt)
3487
    {
3488
      if (vect_print_dump_info (REPORT_DETAILS))
3489
        fprintf (vect_dump, "not worthwhile without SIMD support.");
3490
      return false;
3491
    }
3492
 
3493
  if (!vec_stmt) /* transformation not required.  */
3494
    {
3495
      STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3496
      if (vect_print_dump_info (REPORT_DETAILS))
3497
        fprintf (vect_dump, "=== vectorizable_operation ===");
3498
      vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3499
      return true;
3500
    }
3501
 
3502
  /** Transform.  **/
3503
 
3504
  if (vect_print_dump_info (REPORT_DETAILS))
3505
    fprintf (vect_dump, "transform binary/unary operation.");
3506
 
3507
  /* Handle def.  */
3508
  vec_dest = vect_create_destination_var (scalar_dest, vectype);
3509
 
3510
  /* Allocate VECs for vector operands.  In case of SLP, vector operands are
3511
     created in the previous stages of the recursion, so no allocation is
3512
     needed, except for the case of shift with scalar shift argument.  In that
3513
     case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3514
     be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3515
     In case of loop-based vectorization we allocate VECs of size 1.  We
3516
     allocate VEC_OPRNDS1 only in case of binary operation.  */
3517
  if (!slp_node)
3518
    {
3519
      vec_oprnds0 = VEC_alloc (tree, heap, 1);
3520
      if (op_type == binary_op || op_type == ternary_op)
3521
        vec_oprnds1 = VEC_alloc (tree, heap, 1);
3522
      if (op_type == ternary_op)
3523
        vec_oprnds2 = VEC_alloc (tree, heap, 1);
3524
    }
3525
 
3526
  /* In case the vectorization factor (VF) is bigger than the number
3527
     of elements that we can fit in a vectype (nunits), we have to generate
3528
     more than one vector stmt - i.e - we need to "unroll" the
3529
     vector stmt by a factor VF/nunits.  In doing so, we record a pointer
3530
     from one copy of the vector stmt to the next, in the field
3531
     STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
3532
     stages to find the correct vector defs to be used when vectorizing
3533
     stmts that use the defs of the current stmt.  The example below
3534
     illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3535
     we need to create 4 vectorized stmts):
3536
 
3537
     before vectorization:
3538
                                RELATED_STMT    VEC_STMT
3539
        S1:     x = memref      -               -
3540
        S2:     z = x + 1       -               -
3541
 
3542
     step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3543
             there):
3544
                                RELATED_STMT    VEC_STMT
3545
        VS1_0:  vx0 = memref0   VS1_1           -
3546
        VS1_1:  vx1 = memref1   VS1_2           -
3547
        VS1_2:  vx2 = memref2   VS1_3           -
3548
        VS1_3:  vx3 = memref3   -               -
3549
        S1:     x = load        -               VS1_0
3550
        S2:     z = x + 1       -               -
3551
 
3552
     step2: vectorize stmt S2 (done here):
3553
        To vectorize stmt S2 we first need to find the relevant vector
3554
        def for the first operand 'x'.  This is, as usual, obtained from
3555
        the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3556
        that defines 'x' (S1).  This way we find the stmt VS1_0, and the
3557
        relevant vector def 'vx0'.  Having found 'vx0' we can generate
3558
        the vector stmt VS2_0, and as usual, record it in the
3559
        STMT_VINFO_VEC_STMT of stmt S2.
3560
        When creating the second copy (VS2_1), we obtain the relevant vector
3561
        def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3562
        stmt VS1_0.  This way we find the stmt VS1_1 and the relevant
3563
        vector def 'vx1'.  Using 'vx1' we create stmt VS2_1 and record a
3564
        pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3565
        Similarly when creating stmts VS2_2 and VS2_3.  This is the resulting
3566
        chain of stmts and pointers:
3567
                                RELATED_STMT    VEC_STMT
3568
        VS1_0:  vx0 = memref0   VS1_1           -
3569
        VS1_1:  vx1 = memref1   VS1_2           -
3570
        VS1_2:  vx2 = memref2   VS1_3           -
3571
        VS1_3:  vx3 = memref3   -               -
3572
        S1:     x = load        -               VS1_0
3573
        VS2_0:  vz0 = vx0 + v1  VS2_1           -
3574
        VS2_1:  vz1 = vx1 + v1  VS2_2           -
3575
        VS2_2:  vz2 = vx2 + v1  VS2_3           -
3576
        VS2_3:  vz3 = vx3 + v1  -               -
3577
        S2:     z = x + 1       -               VS2_0  */
3578
 
3579
  prev_stmt_info = NULL;
3580
  for (j = 0; j < ncopies; j++)
3581
    {
3582
      /* Handle uses.  */
3583
      if (j == 0)
3584
        {
3585
          if (op_type == binary_op || op_type == ternary_op)
3586
            vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3587
                               slp_node, -1);
3588
          else
3589
            vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3590
                               slp_node, -1);
3591
          if (op_type == ternary_op)
3592
            {
3593
              vec_oprnds2 = VEC_alloc (tree, heap, 1);
3594
              VEC_quick_push (tree, vec_oprnds2,
3595
                              vect_get_vec_def_for_operand (op2, stmt, NULL));
3596
            }
3597
        }
3598
      else
3599
        {
3600
          vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3601
          if (op_type == ternary_op)
3602
            {
3603
              tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
3604
              VEC_quick_push (tree, vec_oprnds2,
3605
                              vect_get_vec_def_for_stmt_copy (dt[2],
3606
                                                              vec_oprnd));
3607
            }
3608
        }
3609
 
3610
      /* Arguments are ready.  Create the new vector stmt.  */
3611
      FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3612
        {
3613
          vop1 = ((op_type == binary_op || op_type == ternary_op)
3614
                  ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
3615
          vop2 = ((op_type == ternary_op)
3616
                  ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
3617
          new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
3618
                                                    vop0, vop1, vop2);
3619
          new_temp = make_ssa_name (vec_dest, new_stmt);
3620
          gimple_assign_set_lhs (new_stmt, new_temp);
3621
          vect_finish_stmt_generation (stmt, new_stmt, gsi);
3622
          if (slp_node)
3623
            VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3624
        }
3625
 
3626
      if (slp_node)
3627
        continue;
3628
 
3629
      if (j == 0)
3630
        STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3631
      else
3632
        STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3633
      prev_stmt_info = vinfo_for_stmt (new_stmt);
3634
    }
3635
 
3636
  VEC_free (tree, heap, vec_oprnds0);
3637
  if (vec_oprnds1)
3638
    VEC_free (tree, heap, vec_oprnds1);
3639
  if (vec_oprnds2)
3640
    VEC_free (tree, heap, vec_oprnds2);
3641
 
3642
  return true;
3643
}
3644
 
3645
 
3646
/* Function vectorizable_store.
3647
 
3648
   Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3649
   can be vectorized.
3650
   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3651
   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3652
   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
3653
 
3654
static bool
3655
vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3656
                    slp_tree slp_node)
3657
{
3658
  tree scalar_dest;
3659
  tree data_ref;
3660
  tree op;
3661
  tree vec_oprnd = NULL_TREE;
3662
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3663
  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3664
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3665
  tree elem_type;
3666
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3667
  struct loop *loop = NULL;
3668
  enum machine_mode vec_mode;
3669
  tree dummy;
3670
  enum dr_alignment_support alignment_support_scheme;
3671
  tree def;
3672
  gimple def_stmt;
3673
  enum vect_def_type dt;
3674
  stmt_vec_info prev_stmt_info = NULL;
3675
  tree dataref_ptr = NULL_TREE;
3676
  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3677
  int ncopies;
3678
  int j;
3679
  gimple next_stmt, first_stmt = NULL;
3680
  bool strided_store = false;
3681
  bool store_lanes_p = false;
3682
  unsigned int group_size, i;
3683
  VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3684
  bool inv_p;
3685
  VEC(tree,heap) *vec_oprnds = NULL;
3686
  bool slp = (slp_node != NULL);
3687
  unsigned int vec_num;
3688
  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3689
  tree aggr_type;
3690
 
3691
  if (loop_vinfo)
3692
    loop = LOOP_VINFO_LOOP (loop_vinfo);
3693
 
3694
  /* Multiple types in SLP are handled by creating the appropriate number of
3695
     vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3696
     case of SLP.  */
3697
  if (slp || PURE_SLP_STMT (stmt_info))
3698
    ncopies = 1;
3699
  else
3700
    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3701
 
3702
  gcc_assert (ncopies >= 1);
3703
 
3704
  /* FORNOW. This restriction should be relaxed.  */
3705
  if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3706
    {
3707
      if (vect_print_dump_info (REPORT_DETAILS))
3708
        fprintf (vect_dump, "multiple types in nested loop.");
3709
      return false;
3710
    }
3711
 
3712
  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3713
    return false;
3714
 
3715
  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3716
    return false;
3717
 
3718
  /* Is vectorizable store? */
3719
 
3720
  if (!is_gimple_assign (stmt))
3721
    return false;
3722
 
3723
  scalar_dest = gimple_assign_lhs (stmt);
3724
  if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3725
      && is_pattern_stmt_p (stmt_info))
3726
    scalar_dest = TREE_OPERAND (scalar_dest, 0);
3727
  if (TREE_CODE (scalar_dest) != ARRAY_REF
3728
      && TREE_CODE (scalar_dest) != INDIRECT_REF
3729
      && TREE_CODE (scalar_dest) != COMPONENT_REF
3730
      && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3731
      && TREE_CODE (scalar_dest) != REALPART_EXPR
3732
      && TREE_CODE (scalar_dest) != MEM_REF)
3733
    return false;
3734
 
3735
  gcc_assert (gimple_assign_single_p (stmt));
3736
  op = gimple_assign_rhs1 (stmt);
3737
  if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3738
                           &def, &dt))
3739
    {
3740
      if (vect_print_dump_info (REPORT_DETAILS))
3741
        fprintf (vect_dump, "use not simple.");
3742
      return false;
3743
    }
3744
 
3745
  elem_type = TREE_TYPE (vectype);
3746
  vec_mode = TYPE_MODE (vectype);
3747
 
3748
  /* FORNOW. In some cases can vectorize even if data-type not supported
3749
     (e.g. - array initialization with 0).  */
3750
  if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3751
    return false;
3752
 
3753
  if (!STMT_VINFO_DATA_REF (stmt_info))
3754
    return false;
3755
 
3756
  if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
3757
                            ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
3758
                            size_zero_node) < 0)
3759
    {
3760
      if (vect_print_dump_info (REPORT_DETAILS))
3761
        fprintf (vect_dump, "negative step for store.");
3762
      return false;
3763
    }
3764
 
3765
  if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3766
    {
3767
      strided_store = true;
3768
      first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3769
      if (!slp && !PURE_SLP_STMT (stmt_info))
3770
        {
3771
          group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3772
          if (vect_store_lanes_supported (vectype, group_size))
3773
            store_lanes_p = true;
3774
          else if (!vect_strided_store_supported (vectype, group_size))
3775
            return false;
3776
        }
3777
 
3778
      if (first_stmt == stmt)
3779
        {
3780
          /* STMT is the leader of the group. Check the operands of all the
3781
             stmts of the group.  */
3782
          next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3783
          while (next_stmt)
3784
            {
3785
              gcc_assert (gimple_assign_single_p (next_stmt));
3786
              op = gimple_assign_rhs1 (next_stmt);
3787
              if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
3788
                                       &def_stmt, &def, &dt))
3789
                {
3790
                  if (vect_print_dump_info (REPORT_DETAILS))
3791
                    fprintf (vect_dump, "use not simple.");
3792
                  return false;
3793
                }
3794
              next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3795
            }
3796
        }
3797
    }
3798
 
3799
  if (!vec_stmt) /* transformation not required.  */
3800
    {
3801
      STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3802
      vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL);
3803
      return true;
3804
    }
3805
 
3806
  /** Transform.  **/
3807
 
3808
  if (strided_store)
3809
    {
3810
      first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3811
      group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3812
 
3813
      GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3814
 
3815
      /* FORNOW */
3816
      gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3817
 
3818
      /* We vectorize all the stmts of the interleaving group when we
3819
         reach the last stmt in the group.  */
3820
      if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3821
          < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3822
          && !slp)
3823
        {
3824
          *vec_stmt = NULL;
3825
          return true;
3826
        }
3827
 
3828
      if (slp)
3829
        {
3830
          strided_store = false;
3831
          /* VEC_NUM is the number of vect stmts to be created for this
3832
             group.  */
3833
          vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3834
          first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3835
          first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3836
          op = gimple_assign_rhs1 (first_stmt);
3837
        }
3838
      else
3839
        /* VEC_NUM is the number of vect stmts to be created for this
3840
           group.  */
3841
        vec_num = group_size;
3842
    }
3843
  else
3844
    {
3845
      first_stmt = stmt;
3846
      first_dr = dr;
3847
      group_size = vec_num = 1;
3848
    }
3849
 
3850
  if (vect_print_dump_info (REPORT_DETAILS))
3851
    fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3852
 
3853
  dr_chain = VEC_alloc (tree, heap, group_size);
3854
  oprnds = VEC_alloc (tree, heap, group_size);
3855
 
3856
  alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3857
  gcc_assert (alignment_support_scheme);
3858
  /* Targets with store-lane instructions must not require explicit
3859
     realignment.  */
3860
  gcc_assert (!store_lanes_p
3861
              || alignment_support_scheme == dr_aligned
3862
              || alignment_support_scheme == dr_unaligned_supported);
3863
 
3864
  if (store_lanes_p)
3865
    aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
3866
  else
3867
    aggr_type = vectype;
3868
 
3869
  /* In case the vectorization factor (VF) is bigger than the number
3870
     of elements that we can fit in a vectype (nunits), we have to generate
3871
     more than one vector stmt - i.e - we need to "unroll" the
3872
     vector stmt by a factor VF/nunits.  For more details see documentation in
3873
     vect_get_vec_def_for_copy_stmt.  */
3874
 
3875
  /* In case of interleaving (non-unit strided access):
3876
 
3877
        S1:  &base + 2 = x2
3878
        S2:  &base = x0
3879
        S3:  &base + 1 = x1
3880
        S4:  &base + 3 = x3
3881
 
3882
     We create vectorized stores starting from base address (the access of the
3883
     first stmt in the chain (S2 in the above example), when the last store stmt
3884
     of the chain (S4) is reached:
3885
 
3886
        VS1: &base = vx2
3887
        VS2: &base + vec_size*1 = vx0
3888
        VS3: &base + vec_size*2 = vx1
3889
        VS4: &base + vec_size*3 = vx3
3890
 
3891
     Then permutation statements are generated:
3892
 
3893
        VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
3894
        VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
3895
        ...
3896
 
3897
     And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3898
     (the order of the data-refs in the output of vect_permute_store_chain
3899
     corresponds to the order of scalar stmts in the interleaving chain - see
3900
     the documentation of vect_permute_store_chain()).
3901
 
3902
     In case of both multiple types and interleaving, above vector stores and
3903
     permutation stmts are created for every copy.  The result vector stmts are
3904
     put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3905
     STMT_VINFO_RELATED_STMT for the next copies.
3906
  */
3907
 
3908
  prev_stmt_info = NULL;
3909
  for (j = 0; j < ncopies; j++)
3910
    {
3911
      gimple new_stmt;
3912
      gimple ptr_incr;
3913
 
3914
      if (j == 0)
3915
        {
3916
          if (slp)
3917
            {
3918
              /* Get vectorized arguments for SLP_NODE.  */
3919
              vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
3920
                                 NULL, slp_node, -1);
3921
 
3922
              vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3923
            }
3924
          else
3925
            {
3926
              /* For interleaved stores we collect vectorized defs for all the
3927
                 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3928
                 used as an input to vect_permute_store_chain(), and OPRNDS as
3929
                 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3930
 
3931
                 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3932
                 OPRNDS are of size 1.  */
3933
              next_stmt = first_stmt;
3934
              for (i = 0; i < group_size; i++)
3935
                {
3936
                  /* Since gaps are not supported for interleaved stores,
3937
                     GROUP_SIZE is the exact number of stmts in the chain.
3938
                     Therefore, NEXT_STMT can't be NULL_TREE.  In case that
3939
                     there is no interleaving, GROUP_SIZE is 1, and only one
3940
                     iteration of the loop will be executed.  */
3941
                  gcc_assert (next_stmt
3942
                              && gimple_assign_single_p (next_stmt));
3943
                  op = gimple_assign_rhs1 (next_stmt);
3944
 
3945
                  vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3946
                                                            NULL);
3947
                  VEC_quick_push(tree, dr_chain, vec_oprnd);
3948
                  VEC_quick_push(tree, oprnds, vec_oprnd);
3949
                  next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3950
                }
3951
            }
3952
 
3953
          /* We should have catched mismatched types earlier.  */
3954
          gcc_assert (useless_type_conversion_p (vectype,
3955
                                                 TREE_TYPE (vec_oprnd)));
3956
          dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
3957
                                                  NULL_TREE, &dummy, gsi,
3958
                                                  &ptr_incr, false, &inv_p);
3959
          gcc_assert (bb_vinfo || !inv_p);
3960
        }
3961
      else
3962
        {
3963
          /* For interleaved stores we created vectorized defs for all the
3964
             defs stored in OPRNDS in the previous iteration (previous copy).
3965
             DR_CHAIN is then used as an input to vect_permute_store_chain(),
3966
             and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3967
             next copy.
3968
             If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3969
             OPRNDS are of size 1.  */
3970
          for (i = 0; i < group_size; i++)
3971
            {
3972
              op = VEC_index (tree, oprnds, i);
3973
              vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
3974
                                  &def, &dt);
3975
              vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
3976
              VEC_replace(tree, dr_chain, i, vec_oprnd);
3977
              VEC_replace(tree, oprnds, i, vec_oprnd);
3978
            }
3979
          dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3980
                                         TYPE_SIZE_UNIT (aggr_type));
3981
        }
3982
 
3983
      if (store_lanes_p)
3984
        {
3985
          tree vec_array;
3986
 
3987
          /* Combine all the vectors into an array.  */
3988
          vec_array = create_vector_array (vectype, vec_num);
3989
          for (i = 0; i < vec_num; i++)
3990
            {
3991
              vec_oprnd = VEC_index (tree, dr_chain, i);
3992
              write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
3993
            }
3994
 
3995
          /* Emit:
3996
               MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY).  */
3997
          data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
3998
          new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
3999
          gimple_call_set_lhs (new_stmt, data_ref);
4000
          vect_finish_stmt_generation (stmt, new_stmt, gsi);
4001
          mark_symbols_for_renaming (new_stmt);
4002
        }
4003
      else
4004
        {
4005
          new_stmt = NULL;
4006
          if (strided_store)
4007
            {
4008
              result_chain = VEC_alloc (tree, heap, group_size);
4009
              /* Permute.  */
4010
              vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4011
                                        &result_chain);
4012
            }
4013
 
4014
          next_stmt = first_stmt;
4015
          for (i = 0; i < vec_num; i++)
4016
            {
4017
              struct ptr_info_def *pi;
4018
 
4019
              if (i > 0)
4020
                /* Bump the vector pointer.  */
4021
                dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4022
                                               stmt, NULL_TREE);
4023
 
4024
              if (slp)
4025
                vec_oprnd = VEC_index (tree, vec_oprnds, i);
4026
              else if (strided_store)
4027
                /* For strided stores vectorized defs are interleaved in
4028
                   vect_permute_store_chain().  */
4029
                vec_oprnd = VEC_index (tree, result_chain, i);
4030
 
4031
              data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
4032
                                 build_int_cst (reference_alias_ptr_type
4033
                                                (DR_REF (first_dr)), 0));
4034
              pi = get_ptr_info (dataref_ptr);
4035
              pi->align = TYPE_ALIGN_UNIT (vectype);
4036
              if (aligned_access_p (first_dr))
4037
                pi->misalign = 0;
4038
              else if (DR_MISALIGNMENT (first_dr) == -1)
4039
                {
4040
                  TREE_TYPE (data_ref)
4041
                    = build_aligned_type (TREE_TYPE (data_ref),
4042
                                          TYPE_ALIGN (elem_type));
4043
                  pi->align = TYPE_ALIGN_UNIT (elem_type);
4044
                  pi->misalign = 0;
4045
                }
4046
              else
4047
                {
4048
                  TREE_TYPE (data_ref)
4049
                    = build_aligned_type (TREE_TYPE (data_ref),
4050
                                          TYPE_ALIGN (elem_type));
4051
                  pi->misalign = DR_MISALIGNMENT (first_dr);
4052
                }
4053
 
4054
              /* Arguments are ready.  Create the new vector stmt.  */
4055
              new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4056
              vect_finish_stmt_generation (stmt, new_stmt, gsi);
4057
              mark_symbols_for_renaming (new_stmt);
4058
 
4059
              if (slp)
4060
                continue;
4061
 
4062
              next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4063
              if (!next_stmt)
4064
                break;
4065
            }
4066
        }
4067
      if (!slp)
4068
        {
4069
          if (j == 0)
4070
            STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4071
          else
4072
            STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4073
          prev_stmt_info = vinfo_for_stmt (new_stmt);
4074
        }
4075
    }
4076
 
4077
  VEC_free (tree, heap, dr_chain);
4078
  VEC_free (tree, heap, oprnds);
4079
  if (result_chain)
4080
    VEC_free (tree, heap, result_chain);
4081
  if (vec_oprnds)
4082
    VEC_free (tree, heap, vec_oprnds);
4083
 
4084
  return true;
4085
}
4086
 
4087
/* Given a vector type VECTYPE and permutation SEL returns
4088
   the VECTOR_CST mask that implements the permutation of the
4089
   vector elements.  If that is impossible to do, returns NULL.  */
4090
 
4091
tree
4092
vect_gen_perm_mask (tree vectype, unsigned char *sel)
4093
{
4094
  tree mask_elt_type, mask_type, mask_vec;
4095
  int i, nunits;
4096
 
4097
  nunits = TYPE_VECTOR_SUBPARTS (vectype);
4098
 
4099
  if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4100
    return NULL;
4101
 
4102
  mask_elt_type
4103
    = lang_hooks.types.type_for_size
4104
    (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
4105
  mask_type = get_vectype_for_scalar_type (mask_elt_type);
4106
 
4107
  mask_vec = NULL;
4108
  for (i = nunits - 1; i >= 0; i--)
4109
    mask_vec = tree_cons (NULL, build_int_cst (mask_elt_type, sel[i]),
4110
                          mask_vec);
4111
  mask_vec = build_vector (mask_type, mask_vec);
4112
 
4113
  return mask_vec;
4114
}
4115
 
4116
/* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4117
   reversal of the vector elements.  If that is impossible to do,
4118
   returns NULL.  */
4119
 
4120
static tree
4121
perm_mask_for_reverse (tree vectype)
4122
{
4123
  int i, nunits;
4124
  unsigned char *sel;
4125
 
4126
  nunits = TYPE_VECTOR_SUBPARTS (vectype);
4127
  sel = XALLOCAVEC (unsigned char, nunits);
4128
 
4129
  for (i = 0; i < nunits; ++i)
4130
    sel[i] = nunits - 1 - i;
4131
 
4132
  return vect_gen_perm_mask (vectype, sel);
4133
}
4134
 
4135
/* Given a vector variable X and Y, that was generated for the scalar
4136
   STMT, generate instructions to permute the vector elements of X and Y
4137
   using permutation mask MASK_VEC, insert them at *GSI and return the
4138
   permuted vector variable.  */
4139
 
4140
static tree
4141
permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4142
                      gimple_stmt_iterator *gsi)
4143
{
4144
  tree vectype = TREE_TYPE (x);
4145
  tree perm_dest, data_ref;
4146
  gimple perm_stmt;
4147
 
4148
  perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4149
  data_ref = make_ssa_name (perm_dest, NULL);
4150
 
4151
  /* Generate the permute statement.  */
4152
  perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, data_ref,
4153
                                             x, y, mask_vec);
4154
  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4155
 
4156
  return data_ref;
4157
}
4158
 
4159
/* vectorizable_load.
4160
 
4161
   Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4162
   can be vectorized.
4163
   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4164
   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4165
   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
4166
 
4167
static bool
4168
vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4169
                   slp_tree slp_node, slp_instance slp_node_instance)
4170
{
4171
  tree scalar_dest;
4172
  tree vec_dest = NULL;
4173
  tree data_ref = NULL;
4174
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4175
  stmt_vec_info prev_stmt_info;
4176
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4177
  struct loop *loop = NULL;
4178
  struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4179
  bool nested_in_vect_loop = false;
4180
  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4181
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4182
  tree elem_type;
4183
  tree new_temp;
4184
  enum machine_mode mode;
4185
  gimple new_stmt = NULL;
4186
  tree dummy;
4187
  enum dr_alignment_support alignment_support_scheme;
4188
  tree dataref_ptr = NULL_TREE;
4189
  gimple ptr_incr;
4190
  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4191
  int ncopies;
4192
  int i, j, group_size;
4193
  tree msq = NULL_TREE, lsq;
4194
  tree offset = NULL_TREE;
4195
  tree realignment_token = NULL_TREE;
4196
  gimple phi = NULL;
4197
  VEC(tree,heap) *dr_chain = NULL;
4198
  bool strided_load = false;
4199
  bool load_lanes_p = false;
4200
  gimple first_stmt;
4201
  bool inv_p;
4202
  bool negative;
4203
  bool compute_in_loop = false;
4204
  struct loop *at_loop;
4205
  int vec_num;
4206
  bool slp = (slp_node != NULL);
4207
  bool slp_perm = false;
4208
  enum tree_code code;
4209
  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4210
  int vf;
4211
  tree aggr_type;
4212
  tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4213
  tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4214
  int gather_scale = 1;
4215
  enum vect_def_type gather_dt = vect_unknown_def_type;
4216
 
4217
  if (loop_vinfo)
4218
    {
4219
      loop = LOOP_VINFO_LOOP (loop_vinfo);
4220
      nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4221
      vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4222
    }
4223
  else
4224
    vf = 1;
4225
 
4226
  /* Multiple types in SLP are handled by creating the appropriate number of
4227
     vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
4228
     case of SLP.  */
4229
  if (slp || PURE_SLP_STMT (stmt_info))
4230
    ncopies = 1;
4231
  else
4232
    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4233
 
4234
  gcc_assert (ncopies >= 1);
4235
 
4236
  /* FORNOW. This restriction should be relaxed.  */
4237
  if (nested_in_vect_loop && ncopies > 1)
4238
    {
4239
      if (vect_print_dump_info (REPORT_DETAILS))
4240
        fprintf (vect_dump, "multiple types in nested loop.");
4241
      return false;
4242
    }
4243
 
4244
  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4245
    return false;
4246
 
4247
  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4248
    return false;
4249
 
4250
  /* Is vectorizable load? */
4251
  if (!is_gimple_assign (stmt))
4252
    return false;
4253
 
4254
  scalar_dest = gimple_assign_lhs (stmt);
4255
  if (TREE_CODE (scalar_dest) != SSA_NAME)
4256
    return false;
4257
 
4258
  code = gimple_assign_rhs_code (stmt);
4259
  if (code != ARRAY_REF
4260
      && code != INDIRECT_REF
4261
      && code != COMPONENT_REF
4262
      && code != IMAGPART_EXPR
4263
      && code != REALPART_EXPR
4264
      && code != MEM_REF
4265
      && TREE_CODE_CLASS (code) != tcc_declaration)
4266
    return false;
4267
 
4268
  if (!STMT_VINFO_DATA_REF (stmt_info))
4269
    return false;
4270
 
4271
  negative = tree_int_cst_compare (nested_in_vect_loop
4272
                                   ? STMT_VINFO_DR_STEP (stmt_info)
4273
                                   : DR_STEP (dr),
4274
                                   size_zero_node) < 0;
4275
  if (negative && ncopies > 1)
4276
    {
4277
      if (vect_print_dump_info (REPORT_DETAILS))
4278
        fprintf (vect_dump, "multiple types with negative step.");
4279
      return false;
4280
    }
4281
 
4282
  elem_type = TREE_TYPE (vectype);
4283
  mode = TYPE_MODE (vectype);
4284
 
4285
  /* FORNOW. In some cases can vectorize even if data-type not supported
4286
    (e.g. - data copies).  */
4287
  if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4288
    {
4289
      if (vect_print_dump_info (REPORT_DETAILS))
4290
        fprintf (vect_dump, "Aligned load, but unsupported type.");
4291
      return false;
4292
    }
4293
 
4294
  /* Check if the load is a part of an interleaving chain.  */
4295
  if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
4296
    {
4297
      strided_load = true;
4298
      /* FORNOW */
4299
      gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
4300
 
4301
      first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4302
      if (!slp && !PURE_SLP_STMT (stmt_info))
4303
        {
4304
          group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4305
          if (vect_load_lanes_supported (vectype, group_size))
4306
            load_lanes_p = true;
4307
          else if (!vect_strided_load_supported (vectype, group_size))
4308
            return false;
4309
        }
4310
    }
4311
 
4312
  if (negative)
4313
    {
4314
      gcc_assert (!strided_load && !STMT_VINFO_GATHER_P (stmt_info));
4315
      alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4316
      if (alignment_support_scheme != dr_aligned
4317
          && alignment_support_scheme != dr_unaligned_supported)
4318
        {
4319
          if (vect_print_dump_info (REPORT_DETAILS))
4320
            fprintf (vect_dump, "negative step but alignment required.");
4321
          return false;
4322
        }
4323
      if (!perm_mask_for_reverse (vectype))
4324
        {
4325
          if (vect_print_dump_info (REPORT_DETAILS))
4326
            fprintf (vect_dump, "negative step and reversing not supported.");
4327
          return false;
4328
        }
4329
    }
4330
 
4331
  if (STMT_VINFO_GATHER_P (stmt_info))
4332
    {
4333
      gimple def_stmt;
4334
      tree def;
4335
      gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4336
                                       &gather_off, &gather_scale);
4337
      gcc_assert (gather_decl);
4338
      if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
4339
                                 &def_stmt, &def, &gather_dt,
4340
                                 &gather_off_vectype))
4341
        {
4342
          if (vect_print_dump_info (REPORT_DETAILS))
4343
            fprintf (vect_dump, "gather index use not simple.");
4344
          return false;
4345
        }
4346
    }
4347
 
4348
  if (!vec_stmt) /* transformation not required.  */
4349
    {
4350
      STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4351
      vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL);
4352
      return true;
4353
    }
4354
 
4355
  if (vect_print_dump_info (REPORT_DETAILS))
4356
    fprintf (vect_dump, "transform load. ncopies = %d", ncopies);
4357
 
4358
  /** Transform.  **/
4359
 
4360
  if (STMT_VINFO_GATHER_P (stmt_info))
4361
    {
4362
      tree vec_oprnd0 = NULL_TREE, op;
4363
      tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4364
      tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4365
      tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4366
      edge pe = loop_preheader_edge (loop);
4367
      gimple_seq seq;
4368
      basic_block new_bb;
4369
      enum { NARROW, NONE, WIDEN } modifier;
4370
      int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4371
 
4372
      if (nunits == gather_off_nunits)
4373
        modifier = NONE;
4374
      else if (nunits == gather_off_nunits / 2)
4375
        {
4376
          unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4377
          modifier = WIDEN;
4378
 
4379
          for (i = 0; i < gather_off_nunits; ++i)
4380
            sel[i] = i | nunits;
4381
 
4382
          perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
4383
          gcc_assert (perm_mask != NULL_TREE);
4384
        }
4385
      else if (nunits == gather_off_nunits * 2)
4386
        {
4387
          unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4388
          modifier = NARROW;
4389
 
4390
          for (i = 0; i < nunits; ++i)
4391
            sel[i] = i < gather_off_nunits
4392
                     ? i : i + nunits - gather_off_nunits;
4393
 
4394
          perm_mask = vect_gen_perm_mask (vectype, sel);
4395
          gcc_assert (perm_mask != NULL_TREE);
4396
          ncopies *= 2;
4397
        }
4398
      else
4399
        gcc_unreachable ();
4400
 
4401
      rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4402
      srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4403
      ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4404
      idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4405
      masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4406
      scaletype = TREE_VALUE (arglist);
4407
      gcc_checking_assert (types_compatible_p (srctype, rettype)
4408
                           && types_compatible_p (srctype, masktype));
4409
 
4410
      vec_dest = vect_create_destination_var (scalar_dest, vectype);
4411
 
4412
      ptr = fold_convert (ptrtype, gather_base);
4413
      if (!is_gimple_min_invariant (ptr))
4414
        {
4415
          ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4416
          new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4417
          gcc_assert (!new_bb);
4418
        }
4419
 
4420
      /* Currently we support only unconditional gather loads,
4421
         so mask should be all ones.  */
4422
      if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4423
        mask = build_int_cst (TREE_TYPE (masktype), -1);
4424
      else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4425
        {
4426
          REAL_VALUE_TYPE r;
4427
          long tmp[6];
4428
          for (j = 0; j < 6; ++j)
4429
            tmp[j] = -1;
4430
          real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4431
          mask = build_real (TREE_TYPE (masktype), r);
4432
        }
4433
      else
4434
        gcc_unreachable ();
4435
      mask = build_vector_from_val (masktype, mask);
4436
      mask = vect_init_vector (stmt, mask, masktype, NULL);
4437
 
4438
      scale = build_int_cst (scaletype, gather_scale);
4439
 
4440
      prev_stmt_info = NULL;
4441
      for (j = 0; j < ncopies; ++j)
4442
        {
4443
          if (modifier == WIDEN && (j & 1))
4444
            op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4445
                                       perm_mask, stmt, gsi);
4446
          else if (j == 0)
4447
            op = vec_oprnd0
4448
              = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4449
          else
4450
            op = vec_oprnd0
4451
              = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4452
 
4453
          if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4454
            {
4455
              gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4456
                          == TYPE_VECTOR_SUBPARTS (idxtype));
4457
              var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
4458
              add_referenced_var (var);
4459
              var = make_ssa_name (var, NULL);
4460
              op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4461
              new_stmt
4462
                = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4463
                                                op, NULL_TREE);
4464
              vect_finish_stmt_generation (stmt, new_stmt, gsi);
4465
              op = var;
4466
            }
4467
 
4468
          new_stmt
4469
            = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4470
 
4471
          if (!useless_type_conversion_p (vectype, rettype))
4472
            {
4473
              gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4474
                          == TYPE_VECTOR_SUBPARTS (rettype));
4475
              var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
4476
              add_referenced_var (var);
4477
              op = make_ssa_name (var, new_stmt);
4478
              gimple_call_set_lhs (new_stmt, op);
4479
              vect_finish_stmt_generation (stmt, new_stmt, gsi);
4480
              var = make_ssa_name (vec_dest, NULL);
4481
              op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4482
              new_stmt
4483
                = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4484
                                                NULL_TREE);
4485
            }
4486
          else
4487
            {
4488
              var = make_ssa_name (vec_dest, new_stmt);
4489
              gimple_call_set_lhs (new_stmt, var);
4490
            }
4491
 
4492
          vect_finish_stmt_generation (stmt, new_stmt, gsi);
4493
 
4494
          if (modifier == NARROW)
4495
            {
4496
              if ((j & 1) == 0)
4497
                {
4498
                  prev_res = var;
4499
                  continue;
4500
                }
4501
              var = permute_vec_elements (prev_res, var,
4502
                                          perm_mask, stmt, gsi);
4503
              new_stmt = SSA_NAME_DEF_STMT (var);
4504
            }
4505
 
4506
          if (prev_stmt_info == NULL)
4507
            STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4508
          else
4509
            STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4510
          prev_stmt_info = vinfo_for_stmt (new_stmt);
4511
        }
4512
      return true;
4513
    }
4514
 
4515
  if (strided_load)
4516
    {
4517
      first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4518
      if (slp
4519
          && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
4520
          && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
4521
        first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
4522
 
4523
      /* Check if the chain of loads is already vectorized.  */
4524
      if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4525
        {
4526
          *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4527
          return true;
4528
        }
4529
      first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4530
      group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4531
 
4532
      /* VEC_NUM is the number of vect stmts to be created for this group.  */
4533
      if (slp)
4534
        {
4535
          strided_load = false;
4536
          vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4537
          if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4538
            slp_perm = true;
4539
        }
4540
      else
4541
        vec_num = group_size;
4542
    }
4543
  else
4544
    {
4545
      first_stmt = stmt;
4546
      first_dr = dr;
4547
      group_size = vec_num = 1;
4548
    }
4549
 
4550
  alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4551
  gcc_assert (alignment_support_scheme);
4552
  /* Targets with load-lane instructions must not require explicit
4553
     realignment.  */
4554
  gcc_assert (!load_lanes_p
4555
              || alignment_support_scheme == dr_aligned
4556
              || alignment_support_scheme == dr_unaligned_supported);
4557
 
4558
  /* In case the vectorization factor (VF) is bigger than the number
4559
     of elements that we can fit in a vectype (nunits), we have to generate
4560
     more than one vector stmt - i.e - we need to "unroll" the
4561
     vector stmt by a factor VF/nunits.  In doing so, we record a pointer
4562
     from one copy of the vector stmt to the next, in the field
4563
     STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
4564
     stages to find the correct vector defs to be used when vectorizing
4565
     stmts that use the defs of the current stmt.  The example below
4566
     illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4567
     need to create 4 vectorized stmts):
4568
 
4569
     before vectorization:
4570
                                RELATED_STMT    VEC_STMT
4571
        S1:     x = memref      -               -
4572
        S2:     z = x + 1       -               -
4573
 
4574
     step 1: vectorize stmt S1:
4575
        We first create the vector stmt VS1_0, and, as usual, record a
4576
        pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4577
        Next, we create the vector stmt VS1_1, and record a pointer to
4578
        it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4579
        Similarly, for VS1_2 and VS1_3.  This is the resulting chain of
4580
        stmts and pointers:
4581
                                RELATED_STMT    VEC_STMT
4582
        VS1_0:  vx0 = memref0   VS1_1           -
4583
        VS1_1:  vx1 = memref1   VS1_2           -
4584
        VS1_2:  vx2 = memref2   VS1_3           -
4585
        VS1_3:  vx3 = memref3   -               -
4586
        S1:     x = load        -               VS1_0
4587
        S2:     z = x + 1       -               -
4588
 
4589
     See in documentation in vect_get_vec_def_for_stmt_copy for how the
4590
     information we recorded in RELATED_STMT field is used to vectorize
4591
     stmt S2.  */
4592
 
4593
  /* In case of interleaving (non-unit strided access):
4594
 
4595
     S1:  x2 = &base + 2
4596
     S2:  x0 = &base
4597
     S3:  x1 = &base + 1
4598
     S4:  x3 = &base + 3
4599
 
4600
     Vectorized loads are created in the order of memory accesses
4601
     starting from the access of the first stmt of the chain:
4602
 
4603
     VS1: vx0 = &base
4604
     VS2: vx1 = &base + vec_size*1
4605
     VS3: vx3 = &base + vec_size*2
4606
     VS4: vx4 = &base + vec_size*3
4607
 
4608
     Then permutation statements are generated:
4609
 
4610
     VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4611
     VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4612
       ...
4613
 
4614
     And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4615
     (the order of the data-refs in the output of vect_permute_load_chain
4616
     corresponds to the order of scalar stmts in the interleaving chain - see
4617
     the documentation of vect_permute_load_chain()).
4618
     The generation of permutation stmts and recording them in
4619
     STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4620
 
4621
     In case of both multiple types and interleaving, the vector loads and
4622
     permutation stmts above are created for every copy.  The result vector
4623
     stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4624
     corresponding STMT_VINFO_RELATED_STMT for the next copies.  */
4625
 
4626
  /* If the data reference is aligned (dr_aligned) or potentially unaligned
4627
     on a target that supports unaligned accesses (dr_unaligned_supported)
4628
     we generate the following code:
4629
         p = initial_addr;
4630
         indx = 0;
4631
         loop {
4632
           p = p + indx * vectype_size;
4633
           vec_dest = *(p);
4634
           indx = indx + 1;
4635
         }
4636
 
4637
     Otherwise, the data reference is potentially unaligned on a target that
4638
     does not support unaligned accesses (dr_explicit_realign_optimized) -
4639
     then generate the following code, in which the data in each iteration is
4640
     obtained by two vector loads, one from the previous iteration, and one
4641
     from the current iteration:
4642
         p1 = initial_addr;
4643
         msq_init = *(floor(p1))
4644
         p2 = initial_addr + VS - 1;
4645
         realignment_token = call target_builtin;
4646
         indx = 0;
4647
         loop {
4648
           p2 = p2 + indx * vectype_size
4649
           lsq = *(floor(p2))
4650
           vec_dest = realign_load (msq, lsq, realignment_token)
4651
           indx = indx + 1;
4652
           msq = lsq;
4653
         }   */
4654
 
4655
  /* If the misalignment remains the same throughout the execution of the
4656
     loop, we can create the init_addr and permutation mask at the loop
4657
     preheader.  Otherwise, it needs to be created inside the loop.
4658
     This can only occur when vectorizing memory accesses in the inner-loop
4659
     nested within an outer-loop that is being vectorized.  */
4660
 
4661
  if (nested_in_vect_loop
4662
      && (TREE_INT_CST_LOW (DR_STEP (dr))
4663
          % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4664
    {
4665
      gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4666
      compute_in_loop = true;
4667
    }
4668
 
4669
  if ((alignment_support_scheme == dr_explicit_realign_optimized
4670
       || alignment_support_scheme == dr_explicit_realign)
4671
      && !compute_in_loop)
4672
    {
4673
      msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4674
                                    alignment_support_scheme, NULL_TREE,
4675
                                    &at_loop);
4676
      if (alignment_support_scheme == dr_explicit_realign_optimized)
4677
        {
4678
          phi = SSA_NAME_DEF_STMT (msq);
4679
          offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4680
        }
4681
    }
4682
  else
4683
    at_loop = loop;
4684
 
4685
  if (negative)
4686
    offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4687
 
4688
  if (load_lanes_p)
4689
    aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4690
  else
4691
    aggr_type = vectype;
4692
 
4693
  prev_stmt_info = NULL;
4694
  for (j = 0; j < ncopies; j++)
4695
    {
4696
      /* 1. Create the vector or array pointer update chain.  */
4697
      if (j == 0)
4698
        dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4699
                                                offset, &dummy, gsi,
4700
                                                &ptr_incr, false, &inv_p);
4701
      else
4702
        dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4703
                                       TYPE_SIZE_UNIT (aggr_type));
4704
 
4705
      if (strided_load || slp_perm)
4706
        dr_chain = VEC_alloc (tree, heap, vec_num);
4707
 
4708
      if (load_lanes_p)
4709
        {
4710
          tree vec_array;
4711
 
4712
          vec_array = create_vector_array (vectype, vec_num);
4713
 
4714
          /* Emit:
4715
               VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]).  */
4716
          data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4717
          new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4718
          gimple_call_set_lhs (new_stmt, vec_array);
4719
          vect_finish_stmt_generation (stmt, new_stmt, gsi);
4720
          mark_symbols_for_renaming (new_stmt);
4721
 
4722
          /* Extract each vector into an SSA_NAME.  */
4723
          for (i = 0; i < vec_num; i++)
4724
            {
4725
              new_temp = read_vector_array (stmt, gsi, scalar_dest,
4726
                                            vec_array, i);
4727
              VEC_quick_push (tree, dr_chain, new_temp);
4728
            }
4729
 
4730
          /* Record the mapping between SSA_NAMEs and statements.  */
4731
          vect_record_strided_load_vectors (stmt, dr_chain);
4732
        }
4733
      else
4734
        {
4735
          for (i = 0; i < vec_num; i++)
4736
            {
4737
              if (i > 0)
4738
                dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4739
                                               stmt, NULL_TREE);
4740
 
4741
              /* 2. Create the vector-load in the loop.  */
4742
              switch (alignment_support_scheme)
4743
                {
4744
                case dr_aligned:
4745
                case dr_unaligned_supported:
4746
                  {
4747
                    struct ptr_info_def *pi;
4748
                    data_ref
4749
                      = build2 (MEM_REF, vectype, dataref_ptr,
4750
                                build_int_cst (reference_alias_ptr_type
4751
                                               (DR_REF (first_dr)), 0));
4752
                    pi = get_ptr_info (dataref_ptr);
4753
                    pi->align = TYPE_ALIGN_UNIT (vectype);
4754
                    if (alignment_support_scheme == dr_aligned)
4755
                      {
4756
                        gcc_assert (aligned_access_p (first_dr));
4757
                        pi->misalign = 0;
4758
                      }
4759
                    else if (DR_MISALIGNMENT (first_dr) == -1)
4760
                      {
4761
                        TREE_TYPE (data_ref)
4762
                          = build_aligned_type (TREE_TYPE (data_ref),
4763
                                                TYPE_ALIGN (elem_type));
4764
                        pi->align = TYPE_ALIGN_UNIT (elem_type);
4765
                        pi->misalign = 0;
4766
                      }
4767
                    else
4768
                      {
4769
                        TREE_TYPE (data_ref)
4770
                          = build_aligned_type (TREE_TYPE (data_ref),
4771
                                                TYPE_ALIGN (elem_type));
4772
                        pi->misalign = DR_MISALIGNMENT (first_dr);
4773
                      }
4774
                    break;
4775
                  }
4776
                case dr_explicit_realign:
4777
                  {
4778
                    tree ptr, bump;
4779
                    tree vs_minus_1;
4780
 
4781
                    vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4782
 
4783
                    if (compute_in_loop)
4784
                      msq = vect_setup_realignment (first_stmt, gsi,
4785
                                                    &realignment_token,
4786
                                                    dr_explicit_realign,
4787
                                                    dataref_ptr, NULL);
4788
 
4789
                    new_stmt = gimple_build_assign_with_ops
4790
                                 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4791
                                  build_int_cst
4792
                                  (TREE_TYPE (dataref_ptr),
4793
                                   -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4794
                    ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4795
                    gimple_assign_set_lhs (new_stmt, ptr);
4796
                    vect_finish_stmt_generation (stmt, new_stmt, gsi);
4797
                    data_ref
4798
                      = build2 (MEM_REF, vectype, ptr,
4799
                                build_int_cst (reference_alias_ptr_type
4800
                                                 (DR_REF (first_dr)), 0));
4801
                    vec_dest = vect_create_destination_var (scalar_dest,
4802
                                                            vectype);
4803
                    new_stmt = gimple_build_assign (vec_dest, data_ref);
4804
                    new_temp = make_ssa_name (vec_dest, new_stmt);
4805
                    gimple_assign_set_lhs (new_stmt, new_temp);
4806
                    gimple_set_vdef (new_stmt, gimple_vdef (stmt));
4807
                    gimple_set_vuse (new_stmt, gimple_vuse (stmt));
4808
                    vect_finish_stmt_generation (stmt, new_stmt, gsi);
4809
                    msq = new_temp;
4810
 
4811
                    bump = size_binop (MULT_EXPR, vs_minus_1,
4812
                                       TYPE_SIZE_UNIT (elem_type));
4813
                    ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
4814
                    new_stmt = gimple_build_assign_with_ops
4815
                                 (BIT_AND_EXPR, NULL_TREE, ptr,
4816
                                  build_int_cst
4817
                                  (TREE_TYPE (ptr),
4818
                                   -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4819
                    ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4820
                    gimple_assign_set_lhs (new_stmt, ptr);
4821
                    vect_finish_stmt_generation (stmt, new_stmt, gsi);
4822
                    data_ref
4823
                      = build2 (MEM_REF, vectype, ptr,
4824
                                build_int_cst (reference_alias_ptr_type
4825
                                                 (DR_REF (first_dr)), 0));
4826
                    break;
4827
                  }
4828
                case dr_explicit_realign_optimized:
4829
                  new_stmt = gimple_build_assign_with_ops
4830
                               (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4831
                                build_int_cst
4832
                                  (TREE_TYPE (dataref_ptr),
4833
                                   -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4834
                  new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr),
4835
                                            new_stmt);
4836
                  gimple_assign_set_lhs (new_stmt, new_temp);
4837
                  vect_finish_stmt_generation (stmt, new_stmt, gsi);
4838
                  data_ref
4839
                    = build2 (MEM_REF, vectype, new_temp,
4840
                              build_int_cst (reference_alias_ptr_type
4841
                                               (DR_REF (first_dr)), 0));
4842
                  break;
4843
                default:
4844
                  gcc_unreachable ();
4845
                }
4846
              vec_dest = vect_create_destination_var (scalar_dest, vectype);
4847
              new_stmt = gimple_build_assign (vec_dest, data_ref);
4848
              new_temp = make_ssa_name (vec_dest, new_stmt);
4849
              gimple_assign_set_lhs (new_stmt, new_temp);
4850
              vect_finish_stmt_generation (stmt, new_stmt, gsi);
4851
              mark_symbols_for_renaming (new_stmt);
4852
 
4853
              /* 3. Handle explicit realignment if necessary/supported.
4854
                 Create in loop:
4855
                   vec_dest = realign_load (msq, lsq, realignment_token)  */
4856
              if (alignment_support_scheme == dr_explicit_realign_optimized
4857
                  || alignment_support_scheme == dr_explicit_realign)
4858
                {
4859
                  lsq = gimple_assign_lhs (new_stmt);
4860
                  if (!realignment_token)
4861
                    realignment_token = dataref_ptr;
4862
                  vec_dest = vect_create_destination_var (scalar_dest, vectype);
4863
                  new_stmt
4864
                    = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR,
4865
                                                     vec_dest, msq, lsq,
4866
                                                     realignment_token);
4867
                  new_temp = make_ssa_name (vec_dest, new_stmt);
4868
                  gimple_assign_set_lhs (new_stmt, new_temp);
4869
                  vect_finish_stmt_generation (stmt, new_stmt, gsi);
4870
 
4871
                  if (alignment_support_scheme == dr_explicit_realign_optimized)
4872
                    {
4873
                      gcc_assert (phi);
4874
                      if (i == vec_num - 1 && j == ncopies - 1)
4875
                        add_phi_arg (phi, lsq,
4876
                                     loop_latch_edge (containing_loop),
4877
                                     UNKNOWN_LOCATION);
4878
                      msq = lsq;
4879
                    }
4880
                }
4881
 
4882
              /* 4. Handle invariant-load.  */
4883
              if (inv_p && !bb_vinfo)
4884
                {
4885
                  tree tem, vec_inv;
4886
                  gimple_stmt_iterator gsi2 = *gsi;
4887
                  gcc_assert (!strided_load);
4888
                  gsi_next (&gsi2);
4889
                  tem = scalar_dest;
4890
                  if (!useless_type_conversion_p (TREE_TYPE (vectype),
4891
                                                  TREE_TYPE (tem)))
4892
                    {
4893
                      tem = fold_convert (TREE_TYPE (vectype), tem);
4894
                      tem = force_gimple_operand_gsi (&gsi2, tem, true,
4895
                                                      NULL_TREE, true,
4896
                                                      GSI_SAME_STMT);
4897
                    }
4898
                  vec_inv = build_vector_from_val (vectype, tem);
4899
                  new_temp = vect_init_vector (stmt, vec_inv,
4900
                                               vectype, &gsi2);
4901
                  new_stmt = SSA_NAME_DEF_STMT (new_temp);
4902
                }
4903
 
4904
              if (negative)
4905
                {
4906
                  tree perm_mask = perm_mask_for_reverse (vectype);
4907
                  new_temp = permute_vec_elements (new_temp, new_temp,
4908
                                                   perm_mask, stmt, gsi);
4909
                  new_stmt = SSA_NAME_DEF_STMT (new_temp);
4910
                }
4911
 
4912
              /* Collect vector loads and later create their permutation in
4913
                 vect_transform_strided_load ().  */
4914
              if (strided_load || slp_perm)
4915
                VEC_quick_push (tree, dr_chain, new_temp);
4916
 
4917
              /* Store vector loads in the corresponding SLP_NODE.  */
4918
              if (slp && !slp_perm)
4919
                VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
4920
                                new_stmt);
4921
            }
4922
        }
4923
 
4924
      if (slp && !slp_perm)
4925
        continue;
4926
 
4927
      if (slp_perm)
4928
        {
4929
          if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
4930
                                             slp_node_instance, false))
4931
            {
4932
              VEC_free (tree, heap, dr_chain);
4933
              return false;
4934
            }
4935
        }
4936
      else
4937
        {
4938
          if (strided_load)
4939
            {
4940
              if (!load_lanes_p)
4941
                vect_transform_strided_load (stmt, dr_chain, group_size, gsi);
4942
              *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4943
            }
4944
          else
4945
            {
4946
              if (j == 0)
4947
                STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4948
              else
4949
                STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4950
              prev_stmt_info = vinfo_for_stmt (new_stmt);
4951
            }
4952
        }
4953
      if (dr_chain)
4954
        VEC_free (tree, heap, dr_chain);
4955
    }
4956
 
4957
  return true;
4958
}
4959
 
4960
/* Function vect_is_simple_cond.
4961
 
4962
   Input:
4963
   LOOP - the loop that is being vectorized.
4964
   COND - Condition that is checked for simple use.
4965
 
4966
   Output:
4967
   *COMP_VECTYPE - the vector type for the comparison.
4968
 
4969
   Returns whether a COND can be vectorized.  Checks whether
4970
   condition operands are supportable using vec_is_simple_use.  */
4971
 
4972
static bool
4973
vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
4974
                     bb_vec_info bb_vinfo, tree *comp_vectype)
4975
{
4976
  tree lhs, rhs;
4977
  tree def;
4978
  enum vect_def_type dt;
4979
  tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
4980
 
4981
  if (!COMPARISON_CLASS_P (cond))
4982
    return false;
4983
 
4984
  lhs = TREE_OPERAND (cond, 0);
4985
  rhs = TREE_OPERAND (cond, 1);
4986
 
4987
  if (TREE_CODE (lhs) == SSA_NAME)
4988
    {
4989
      gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4990
      if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
4991
                                 &lhs_def_stmt, &def, &dt, &vectype1))
4992
        return false;
4993
    }
4994
  else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
4995
           && TREE_CODE (lhs) != FIXED_CST)
4996
    return false;
4997
 
4998
  if (TREE_CODE (rhs) == SSA_NAME)
4999
    {
5000
      gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
5001
      if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
5002
                                 &rhs_def_stmt, &def, &dt, &vectype2))
5003
        return false;
5004
    }
5005
  else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
5006
           && TREE_CODE (rhs) != FIXED_CST)
5007
    return false;
5008
 
5009
  *comp_vectype = vectype1 ? vectype1 : vectype2;
5010
  return true;
5011
}
5012
 
5013
/* vectorizable_condition.
5014
 
5015
   Check if STMT is conditional modify expression that can be vectorized.
5016
   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5017
   stmt using VEC_COND_EXPR  to replace it, put it in VEC_STMT, and insert it
5018
   at GSI.
5019
 
5020
   When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5021
   to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5022
   else caluse if it is 2).
5023
 
5024
   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
5025
 
5026
bool
5027
vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
5028
                        gimple *vec_stmt, tree reduc_def, int reduc_index,
5029
                        slp_tree slp_node)
5030
{
5031
  tree scalar_dest = NULL_TREE;
5032
  tree vec_dest = NULL_TREE;
5033
  tree cond_expr, then_clause, else_clause;
5034
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5035
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5036
  tree comp_vectype = NULL_TREE;
5037
  tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
5038
  tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5039
  tree vec_compare, vec_cond_expr;
5040
  tree new_temp;
5041
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5042
  tree def;
5043
  enum vect_def_type dt, dts[4];
5044
  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5045
  int ncopies;
5046
  enum tree_code code;
5047
  stmt_vec_info prev_stmt_info = NULL;
5048
  int i, j;
5049
  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5050
  VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
5051
  VEC (tree, heap) *vec_oprnds2 = NULL, *vec_oprnds3 = NULL;
5052
 
5053
  if (slp_node || PURE_SLP_STMT (stmt_info))
5054
    ncopies = 1;
5055
  else
5056
    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5057
 
5058
  gcc_assert (ncopies >= 1);
5059
  if (reduc_index && ncopies > 1)
5060
    return false; /* FORNOW */
5061
 
5062
  if (reduc_index && STMT_SLP_TYPE (stmt_info))
5063
    return false;
5064
 
5065
  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5066
    return false;
5067
 
5068
  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5069
      && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5070
           && reduc_def))
5071
    return false;
5072
 
5073
  /* FORNOW: not yet supported.  */
5074
  if (STMT_VINFO_LIVE_P (stmt_info))
5075
    {
5076
      if (vect_print_dump_info (REPORT_DETAILS))
5077
        fprintf (vect_dump, "value used after loop.");
5078
      return false;
5079
    }
5080
 
5081
  /* Is vectorizable conditional operation?  */
5082
  if (!is_gimple_assign (stmt))
5083
    return false;
5084
 
5085
  code = gimple_assign_rhs_code (stmt);
5086
 
5087
  if (code != COND_EXPR)
5088
    return false;
5089
 
5090
  cond_expr = gimple_assign_rhs1 (stmt);
5091
  then_clause = gimple_assign_rhs2 (stmt);
5092
  else_clause = gimple_assign_rhs3 (stmt);
5093
 
5094
  if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
5095
                            &comp_vectype)
5096
      || !comp_vectype)
5097
    return false;
5098
 
5099
  if (TREE_CODE (then_clause) == SSA_NAME)
5100
    {
5101
      gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
5102
      if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
5103
                               &then_def_stmt, &def, &dt))
5104
        return false;
5105
    }
5106
  else if (TREE_CODE (then_clause) != INTEGER_CST
5107
           && TREE_CODE (then_clause) != REAL_CST
5108
           && TREE_CODE (then_clause) != FIXED_CST)
5109
    return false;
5110
 
5111
  if (TREE_CODE (else_clause) == SSA_NAME)
5112
    {
5113
      gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
5114
      if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
5115
                               &else_def_stmt, &def, &dt))
5116
        return false;
5117
    }
5118
  else if (TREE_CODE (else_clause) != INTEGER_CST
5119
           && TREE_CODE (else_clause) != REAL_CST
5120
           && TREE_CODE (else_clause) != FIXED_CST)
5121
    return false;
5122
 
5123
  if (!vec_stmt)
5124
    {
5125
      STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
5126
      return expand_vec_cond_expr_p (vectype, comp_vectype);
5127
    }
5128
 
5129
  /* Transform.  */
5130
 
5131
  if (!slp_node)
5132
    {
5133
      vec_oprnds0 = VEC_alloc (tree, heap, 1);
5134
      vec_oprnds1 = VEC_alloc (tree, heap, 1);
5135
      vec_oprnds2 = VEC_alloc (tree, heap, 1);
5136
      vec_oprnds3 = VEC_alloc (tree, heap, 1);
5137
    }
5138
 
5139
  /* Handle def.  */
5140
  scalar_dest = gimple_assign_lhs (stmt);
5141
  vec_dest = vect_create_destination_var (scalar_dest, vectype);
5142
 
5143
  /* Handle cond expr.  */
5144
  for (j = 0; j < ncopies; j++)
5145
    {
5146
      gimple new_stmt = NULL;
5147
      if (j == 0)
5148
        {
5149
          if (slp_node)
5150
            {
5151
              VEC (tree, heap) *ops = VEC_alloc (tree, heap, 4);
5152
              VEC (slp_void_p, heap) *vec_defs;
5153
 
5154
              vec_defs = VEC_alloc (slp_void_p, heap, 4);
5155
              VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 0));
5156
              VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 1));
5157
              VEC_safe_push (tree, heap, ops, then_clause);
5158
              VEC_safe_push (tree, heap, ops, else_clause);
5159
              vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
5160
              vec_oprnds3 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5161
              vec_oprnds2 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5162
              vec_oprnds1 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5163
              vec_oprnds0 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5164
 
5165
              VEC_free (tree, heap, ops);
5166
              VEC_free (slp_void_p, heap, vec_defs);
5167
            }
5168
          else
5169
            {
5170
              gimple gtemp;
5171
              vec_cond_lhs =
5172
              vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5173
                                            stmt, NULL);
5174
              vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
5175
                                  loop_vinfo, NULL, &gtemp, &def, &dts[0]);
5176
 
5177
              vec_cond_rhs =
5178
                vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5179
                                                stmt, NULL);
5180
              vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
5181
                                  loop_vinfo, NULL, &gtemp, &def, &dts[1]);
5182
              if (reduc_index == 1)
5183
                vec_then_clause = reduc_def;
5184
              else
5185
                {
5186
                  vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5187
                                                              stmt, NULL);
5188
                  vect_is_simple_use (then_clause, stmt, loop_vinfo,
5189
                                          NULL, &gtemp, &def, &dts[2]);
5190
                }
5191
              if (reduc_index == 2)
5192
                vec_else_clause = reduc_def;
5193
              else
5194
                {
5195
                  vec_else_clause = vect_get_vec_def_for_operand (else_clause,
5196
                                                              stmt, NULL);
5197
                  vect_is_simple_use (else_clause, stmt, loop_vinfo,
5198
                                  NULL, &gtemp, &def, &dts[3]);
5199
                }
5200
            }
5201
        }
5202
      else
5203
        {
5204
          vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
5205
                                                VEC_pop (tree, vec_oprnds0));
5206
          vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
5207
                                                VEC_pop (tree, vec_oprnds1));
5208
          vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
5209
                                                VEC_pop (tree, vec_oprnds2));
5210
          vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
5211
                                                VEC_pop (tree, vec_oprnds3));
5212
        }
5213
 
5214
      if (!slp_node)
5215
        {
5216
          VEC_quick_push (tree, vec_oprnds0, vec_cond_lhs);
5217
          VEC_quick_push (tree, vec_oprnds1, vec_cond_rhs);
5218
          VEC_quick_push (tree, vec_oprnds2, vec_then_clause);
5219
          VEC_quick_push (tree, vec_oprnds3, vec_else_clause);
5220
        }
5221
 
5222
      /* Arguments are ready.  Create the new vector stmt.  */
5223
      FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_cond_lhs)
5224
        {
5225
          vec_cond_rhs = VEC_index (tree, vec_oprnds1, i);
5226
          vec_then_clause = VEC_index (tree, vec_oprnds2, i);
5227
          vec_else_clause = VEC_index (tree, vec_oprnds3, i);
5228
 
5229
          vec_compare = build2 (TREE_CODE (cond_expr), vectype,
5230
                               vec_cond_lhs, vec_cond_rhs);
5231
          vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5232
                         vec_compare, vec_then_clause, vec_else_clause);
5233
 
5234
          new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5235
          new_temp = make_ssa_name (vec_dest, new_stmt);
5236
          gimple_assign_set_lhs (new_stmt, new_temp);
5237
          vect_finish_stmt_generation (stmt, new_stmt, gsi);
5238
          if (slp_node)
5239
            VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
5240
        }
5241
 
5242
        if (slp_node)
5243
          continue;
5244
 
5245
        if (j == 0)
5246
          STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5247
        else
5248
          STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5249
 
5250
        prev_stmt_info = vinfo_for_stmt (new_stmt);
5251
    }
5252
 
5253
  VEC_free (tree, heap, vec_oprnds0);
5254
  VEC_free (tree, heap, vec_oprnds1);
5255
  VEC_free (tree, heap, vec_oprnds2);
5256
  VEC_free (tree, heap, vec_oprnds3);
5257
 
5258
  return true;
5259
}
5260
 
5261
 
5262
/* Make sure the statement is vectorizable.  */
5263
 
5264
bool
5265
vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
5266
{
5267
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5268
  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5269
  enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
5270
  bool ok;
5271
  tree scalar_type, vectype;
5272
  gimple pattern_stmt;
5273
  gimple_seq pattern_def_seq;
5274
 
5275
  if (vect_print_dump_info (REPORT_DETAILS))
5276
    {
5277
      fprintf (vect_dump, "==> examining statement: ");
5278
      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5279
    }
5280
 
5281
  if (gimple_has_volatile_ops (stmt))
5282
    {
5283
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5284
        fprintf (vect_dump, "not vectorized: stmt has volatile operands");
5285
 
5286
      return false;
5287
    }
5288
 
5289
  /* Skip stmts that do not need to be vectorized. In loops this is expected
5290
     to include:
5291
     - the COND_EXPR which is the loop exit condition
5292
     - any LABEL_EXPRs in the loop
5293
     - computations that are used only for array indexing or loop control.
5294
     In basic blocks we only analyze statements that are a part of some SLP
5295
     instance, therefore, all the statements are relevant.
5296
 
5297
     Pattern statement needs to be analyzed instead of the original statement
5298
     if the original statement is not relevant.  Otherwise, we analyze both
5299
     statements.  */
5300
 
5301
  pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5302
  if (!STMT_VINFO_RELEVANT_P (stmt_info)
5303
      && !STMT_VINFO_LIVE_P (stmt_info))
5304
    {
5305
      if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5306
          && pattern_stmt
5307
          && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5308
              || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5309
        {
5310
          /* Analyze PATTERN_STMT instead of the original stmt.  */
5311
          stmt = pattern_stmt;
5312
          stmt_info = vinfo_for_stmt (pattern_stmt);
5313
          if (vect_print_dump_info (REPORT_DETAILS))
5314
            {
5315
              fprintf (vect_dump, "==> examining pattern statement: ");
5316
              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5317
            }
5318
        }
5319
      else
5320
        {
5321
          if (vect_print_dump_info (REPORT_DETAILS))
5322
            fprintf (vect_dump, "irrelevant.");
5323
 
5324
          return true;
5325
        }
5326
    }
5327
  else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5328
           && pattern_stmt
5329
           && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5330
               || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5331
    {
5332
      /* Analyze PATTERN_STMT too.  */
5333
      if (vect_print_dump_info (REPORT_DETAILS))
5334
        {
5335
          fprintf (vect_dump, "==> examining pattern statement: ");
5336
          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5337
        }
5338
 
5339
      if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5340
        return false;
5341
   }
5342
 
5343
  if (is_pattern_stmt_p (stmt_info)
5344
      && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
5345
    {
5346
      gimple_stmt_iterator si;
5347
 
5348
      for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
5349
        {
5350
          gimple pattern_def_stmt = gsi_stmt (si);
5351
          if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5352
              || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
5353
            {
5354
              /* Analyze def stmt of STMT if it's a pattern stmt.  */
5355
              if (vect_print_dump_info (REPORT_DETAILS))
5356
                {
5357
                  fprintf (vect_dump, "==> examining pattern def statement: ");
5358
                  print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
5359
                }
5360
 
5361
              if (!vect_analyze_stmt (pattern_def_stmt,
5362
                                      need_to_vectorize, node))
5363
                return false;
5364
            }
5365
        }
5366
    }
5367
 
5368
  switch (STMT_VINFO_DEF_TYPE (stmt_info))
5369
    {
5370
      case vect_internal_def:
5371
        break;
5372
 
5373
      case vect_reduction_def:
5374
      case vect_nested_cycle:
5375
         gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5376
                     || relevance == vect_used_in_outer_by_reduction
5377
                     || relevance == vect_unused_in_scope));
5378
         break;
5379
 
5380
      case vect_induction_def:
5381
      case vect_constant_def:
5382
      case vect_external_def:
5383
      case vect_unknown_def_type:
5384
      default:
5385
        gcc_unreachable ();
5386
    }
5387
 
5388
  if (bb_vinfo)
5389
    {
5390
      gcc_assert (PURE_SLP_STMT (stmt_info));
5391
 
5392
      scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5393
      if (vect_print_dump_info (REPORT_DETAILS))
5394
        {
5395
          fprintf (vect_dump, "get vectype for scalar type:  ");
5396
          print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5397
        }
5398
 
5399
      vectype = get_vectype_for_scalar_type (scalar_type);
5400
      if (!vectype)
5401
        {
5402
          if (vect_print_dump_info (REPORT_DETAILS))
5403
            {
5404
               fprintf (vect_dump, "not SLPed: unsupported data-type ");
5405
               print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5406
            }
5407
          return false;
5408
        }
5409
 
5410
      if (vect_print_dump_info (REPORT_DETAILS))
5411
        {
5412
          fprintf (vect_dump, "vectype:  ");
5413
          print_generic_expr (vect_dump, vectype, TDF_SLIM);
5414
        }
5415
 
5416
      STMT_VINFO_VECTYPE (stmt_info) = vectype;
5417
   }
5418
 
5419
  if (STMT_VINFO_RELEVANT_P (stmt_info))
5420
    {
5421
      gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5422
      gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5423
      *need_to_vectorize = true;
5424
    }
5425
 
5426
   ok = true;
5427
   if (!bb_vinfo
5428
       && (STMT_VINFO_RELEVANT_P (stmt_info)
5429
           || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5430
      ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
5431
            || vectorizable_shift (stmt, NULL, NULL, NULL)
5432
            || vectorizable_operation (stmt, NULL, NULL, NULL)
5433
            || vectorizable_assignment (stmt, NULL, NULL, NULL)
5434
            || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5435
            || vectorizable_call (stmt, NULL, NULL, NULL)
5436
            || vectorizable_store (stmt, NULL, NULL, NULL)
5437
            || vectorizable_reduction (stmt, NULL, NULL, NULL)
5438
            || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
5439
    else
5440
      {
5441
        if (bb_vinfo)
5442
          ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5443
                || vectorizable_shift (stmt, NULL, NULL, node)
5444
                || vectorizable_operation (stmt, NULL, NULL, node)
5445
                || vectorizable_assignment (stmt, NULL, NULL, node)
5446
                || vectorizable_load (stmt, NULL, NULL, node, NULL)
5447
                || vectorizable_call (stmt, NULL, NULL, node)
5448
                || vectorizable_store (stmt, NULL, NULL, node)
5449
                || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
5450
      }
5451
 
5452
  if (!ok)
5453
    {
5454
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5455
        {
5456
          fprintf (vect_dump, "not vectorized: relevant stmt not ");
5457
          fprintf (vect_dump, "supported: ");
5458
          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5459
        }
5460
 
5461
      return false;
5462
    }
5463
 
5464
  if (bb_vinfo)
5465
    return true;
5466
 
5467
  /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5468
      need extra handling, except for vectorizable reductions.  */
5469
  if (STMT_VINFO_LIVE_P (stmt_info)
5470
      && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5471
    ok = vectorizable_live_operation (stmt, NULL, NULL);
5472
 
5473
  if (!ok)
5474
    {
5475
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5476
        {
5477
          fprintf (vect_dump, "not vectorized: live stmt not ");
5478
          fprintf (vect_dump, "supported: ");
5479
          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5480
        }
5481
 
5482
       return false;
5483
    }
5484
 
5485
  return true;
5486
}
5487
 
5488
 
5489
/* Function vect_transform_stmt.
5490
 
5491
   Create a vectorized stmt to replace STMT, and insert it at BSI.  */
5492
 
5493
bool
5494
vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5495
                     bool *strided_store, slp_tree slp_node,
5496
                     slp_instance slp_node_instance)
5497
{
5498
  bool is_store = false;
5499
  gimple vec_stmt = NULL;
5500
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5501
  bool done;
5502
 
5503
  switch (STMT_VINFO_TYPE (stmt_info))
5504
    {
5505
    case type_demotion_vec_info_type:
5506
    case type_promotion_vec_info_type:
5507
    case type_conversion_vec_info_type:
5508
      done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5509
      gcc_assert (done);
5510
      break;
5511
 
5512
    case induc_vec_info_type:
5513
      gcc_assert (!slp_node);
5514
      done = vectorizable_induction (stmt, gsi, &vec_stmt);
5515
      gcc_assert (done);
5516
      break;
5517
 
5518
    case shift_vec_info_type:
5519
      done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5520
      gcc_assert (done);
5521
      break;
5522
 
5523
    case op_vec_info_type:
5524
      done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5525
      gcc_assert (done);
5526
      break;
5527
 
5528
    case assignment_vec_info_type:
5529
      done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5530
      gcc_assert (done);
5531
      break;
5532
 
5533
    case load_vec_info_type:
5534
      done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5535
                                slp_node_instance);
5536
      gcc_assert (done);
5537
      break;
5538
 
5539
    case store_vec_info_type:
5540
      done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5541
      gcc_assert (done);
5542
      if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
5543
        {
5544
          /* In case of interleaving, the whole chain is vectorized when the
5545
             last store in the chain is reached.  Store stmts before the last
5546
             one are skipped, and there vec_stmt_info shouldn't be freed
5547
             meanwhile.  */
5548
          *strided_store = true;
5549
          if (STMT_VINFO_VEC_STMT (stmt_info))
5550
            is_store = true;
5551
          }
5552
      else
5553
        is_store = true;
5554
      break;
5555
 
5556
    case condition_vec_info_type:
5557
      done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
5558
      gcc_assert (done);
5559
      break;
5560
 
5561
    case call_vec_info_type:
5562
      done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
5563
      stmt = gsi_stmt (*gsi);
5564
      break;
5565
 
5566
    case reduc_vec_info_type:
5567
      done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5568
      gcc_assert (done);
5569
      break;
5570
 
5571
    default:
5572
      if (!STMT_VINFO_LIVE_P (stmt_info))
5573
        {
5574
          if (vect_print_dump_info (REPORT_DETAILS))
5575
            fprintf (vect_dump, "stmt not supported.");
5576
          gcc_unreachable ();
5577
        }
5578
    }
5579
 
5580
  /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5581
     is being vectorized, but outside the immediately enclosing loop.  */
5582
  if (vec_stmt
5583
      && STMT_VINFO_LOOP_VINFO (stmt_info)
5584
      && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5585
                                STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5586
      && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5587
      && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5588
          || STMT_VINFO_RELEVANT (stmt_info) ==
5589
                                           vect_used_in_outer_by_reduction))
5590
    {
5591
      struct loop *innerloop = LOOP_VINFO_LOOP (
5592
                                STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5593
      imm_use_iterator imm_iter;
5594
      use_operand_p use_p;
5595
      tree scalar_dest;
5596
      gimple exit_phi;
5597
 
5598
      if (vect_print_dump_info (REPORT_DETAILS))
5599
        fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
5600
 
5601
      /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5602
        (to be used when vectorizing outer-loop stmts that use the DEF of
5603
        STMT).  */
5604
      if (gimple_code (stmt) == GIMPLE_PHI)
5605
        scalar_dest = PHI_RESULT (stmt);
5606
      else
5607
        scalar_dest = gimple_assign_lhs (stmt);
5608
 
5609
      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5610
       {
5611
         if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5612
           {
5613
             exit_phi = USE_STMT (use_p);
5614
             STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5615
           }
5616
       }
5617
    }
5618
 
5619
  /* Handle stmts whose DEF is used outside the loop-nest that is
5620
     being vectorized.  */
5621
  if (STMT_VINFO_LIVE_P (stmt_info)
5622
      && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5623
    {
5624
      done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5625
      gcc_assert (done);
5626
    }
5627
 
5628
  if (vec_stmt)
5629
    STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5630
 
5631
  return is_store;
5632
}
5633
 
5634
 
5635
/* Remove a group of stores (for SLP or interleaving), free their
5636
   stmt_vec_info.  */
5637
 
5638
void
5639
vect_remove_stores (gimple first_stmt)
5640
{
5641
  gimple next = first_stmt;
5642
  gimple tmp;
5643
  gimple_stmt_iterator next_si;
5644
 
5645
  while (next)
5646
    {
5647
      stmt_vec_info stmt_info = vinfo_for_stmt (next);
5648
 
5649
      tmp = GROUP_NEXT_ELEMENT (stmt_info);
5650
      if (is_pattern_stmt_p (stmt_info))
5651
        next = STMT_VINFO_RELATED_STMT (stmt_info);
5652
      /* Free the attached stmt_vec_info and remove the stmt.  */
5653
      next_si = gsi_for_stmt (next);
5654
      gsi_remove (&next_si, true);
5655
      free_stmt_vec_info (next);
5656
      next = tmp;
5657
    }
5658
}
5659
 
5660
 
5661
/* Function new_stmt_vec_info.
5662
 
5663
   Create and initialize a new stmt_vec_info struct for STMT.  */
5664
 
5665
stmt_vec_info
5666
new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5667
                   bb_vec_info bb_vinfo)
5668
{
5669
  stmt_vec_info res;
5670
  res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5671
 
5672
  STMT_VINFO_TYPE (res) = undef_vec_info_type;
5673
  STMT_VINFO_STMT (res) = stmt;
5674
  STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5675
  STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5676
  STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5677
  STMT_VINFO_LIVE_P (res) = false;
5678
  STMT_VINFO_VECTYPE (res) = NULL;
5679
  STMT_VINFO_VEC_STMT (res) = NULL;
5680
  STMT_VINFO_VECTORIZABLE (res) = true;
5681
  STMT_VINFO_IN_PATTERN_P (res) = false;
5682
  STMT_VINFO_RELATED_STMT (res) = NULL;
5683
  STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
5684
  STMT_VINFO_DATA_REF (res) = NULL;
5685
 
5686
  STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5687
  STMT_VINFO_DR_OFFSET (res) = NULL;
5688
  STMT_VINFO_DR_INIT (res) = NULL;
5689
  STMT_VINFO_DR_STEP (res) = NULL;
5690
  STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5691
 
5692
  if (gimple_code (stmt) == GIMPLE_PHI
5693
      && is_loop_header_bb_p (gimple_bb (stmt)))
5694
    STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5695
  else
5696
    STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5697
 
5698
  STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
5699
  STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
5700
  STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
5701
  STMT_SLP_TYPE (res) = loop_vect;
5702
  GROUP_FIRST_ELEMENT (res) = NULL;
5703
  GROUP_NEXT_ELEMENT (res) = NULL;
5704
  GROUP_SIZE (res) = 0;
5705
  GROUP_STORE_COUNT (res) = 0;
5706
  GROUP_GAP (res) = 0;
5707
  GROUP_SAME_DR_STMT (res) = NULL;
5708
  GROUP_READ_WRITE_DEPENDENCE (res) = false;
5709
 
5710
  return res;
5711
}
5712
 
5713
 
5714
/* Create a hash table for stmt_vec_info. */
5715
 
5716
void
5717
init_stmt_vec_info_vec (void)
5718
{
5719
  gcc_assert (!stmt_vec_info_vec);
5720
  stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5721
}
5722
 
5723
 
5724
/* Free hash table for stmt_vec_info. */
5725
 
5726
void
5727
free_stmt_vec_info_vec (void)
5728
{
5729
  gcc_assert (stmt_vec_info_vec);
5730
  VEC_free (vec_void_p, heap, stmt_vec_info_vec);
5731
}
5732
 
5733
 
5734
/* Free stmt vectorization related info.  */
5735
 
5736
void
5737
free_stmt_vec_info (gimple stmt)
5738
{
5739
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5740
 
5741
  if (!stmt_info)
5742
    return;
5743
 
5744
  /* Check if this statement has a related "pattern stmt"
5745
     (introduced by the vectorizer during the pattern recognition
5746
     pass).  Free pattern's stmt_vec_info and def stmt's stmt_vec_info
5747
     too.  */
5748
  if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5749
    {
5750
      stmt_vec_info patt_info
5751
        = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5752
      if (patt_info)
5753
        {
5754
          gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
5755
          if (seq)
5756
            {
5757
              gimple_stmt_iterator si;
5758
              for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
5759
                free_stmt_vec_info (gsi_stmt (si));
5760
            }
5761
          free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
5762
        }
5763
    }
5764
 
5765
  VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
5766
  set_vinfo_for_stmt (stmt, NULL);
5767
  free (stmt_info);
5768
}
5769
 
5770
 
5771
/* Function get_vectype_for_scalar_type_and_size.
5772
 
5773
   Returns the vector type corresponding to SCALAR_TYPE  and SIZE as supported
5774
   by the target.  */
5775
 
5776
static tree
5777
get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
5778
{
5779
  enum machine_mode inner_mode = TYPE_MODE (scalar_type);
5780
  enum machine_mode simd_mode;
5781
  unsigned int nbytes = GET_MODE_SIZE (inner_mode);
5782
  int nunits;
5783
  tree vectype;
5784
 
5785
  if (nbytes == 0)
5786
    return NULL_TREE;
5787
 
5788
  if (GET_MODE_CLASS (inner_mode) != MODE_INT
5789
      && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
5790
    return NULL_TREE;
5791
 
5792
  /* We can't build a vector type of elements with alignment bigger than
5793
     their size.  */
5794
  if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
5795
    return NULL_TREE;
5796
 
5797
  /* For vector types of elements whose mode precision doesn't
5798
     match their types precision we use a element type of mode
5799
     precision.  The vectorization routines will have to make sure
5800
     they support the proper result truncation/extension.
5801
     We also make sure to build vector types with INTEGER_TYPE
5802
     component type only.  */
5803
  if (INTEGRAL_TYPE_P (scalar_type)
5804
      && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
5805
          || TREE_CODE (scalar_type) != INTEGER_TYPE))
5806
    scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
5807
                                                  TYPE_UNSIGNED (scalar_type));
5808
 
5809
  /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5810
     When the component mode passes the above test simply use a type
5811
     corresponding to that mode.  The theory is that any use that
5812
     would cause problems with this will disable vectorization anyway.  */
5813
  if (!SCALAR_FLOAT_TYPE_P (scalar_type)
5814
      && !INTEGRAL_TYPE_P (scalar_type)
5815
      && !POINTER_TYPE_P (scalar_type))
5816
    scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
5817
 
5818
  /* If no size was supplied use the mode the target prefers.   Otherwise
5819
     lookup a vector mode of the specified size.  */
5820
  if (size == 0)
5821
    simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
5822
  else
5823
    simd_mode = mode_for_vector (inner_mode, size / nbytes);
5824
  nunits = GET_MODE_SIZE (simd_mode) / nbytes;
5825
  if (nunits <= 1)
5826
    return NULL_TREE;
5827
 
5828
  vectype = build_vector_type (scalar_type, nunits);
5829
  if (vect_print_dump_info (REPORT_DETAILS))
5830
    {
5831
      fprintf (vect_dump, "get vectype with %d units of type ", nunits);
5832
      print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5833
    }
5834
 
5835
  if (!vectype)
5836
    return NULL_TREE;
5837
 
5838
  if (vect_print_dump_info (REPORT_DETAILS))
5839
    {
5840
      fprintf (vect_dump, "vectype: ");
5841
      print_generic_expr (vect_dump, vectype, TDF_SLIM);
5842
    }
5843
 
5844
  if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5845
      && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
5846
    {
5847
      if (vect_print_dump_info (REPORT_DETAILS))
5848
        fprintf (vect_dump, "mode not supported by target.");
5849
      return NULL_TREE;
5850
    }
5851
 
5852
  return vectype;
5853
}
5854
 
5855
unsigned int current_vector_size;
5856
 
5857
/* Function get_vectype_for_scalar_type.
5858
 
5859
   Returns the vector type corresponding to SCALAR_TYPE as supported
5860
   by the target.  */
5861
 
5862
tree
5863
get_vectype_for_scalar_type (tree scalar_type)
5864
{
5865
  tree vectype;
5866
  vectype = get_vectype_for_scalar_type_and_size (scalar_type,
5867
                                                  current_vector_size);
5868
  if (vectype
5869
      && current_vector_size == 0)
5870
    current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
5871
  return vectype;
5872
}
5873
 
5874
/* Function get_same_sized_vectype
5875
 
5876
   Returns a vector type corresponding to SCALAR_TYPE of size
5877
   VECTOR_TYPE if supported by the target.  */
5878
 
5879
tree
5880
get_same_sized_vectype (tree scalar_type, tree vector_type)
5881
{
5882
  return get_vectype_for_scalar_type_and_size
5883
           (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
5884
}
5885
 
5886
/* Function vect_is_simple_use.
5887
 
5888
   Input:
5889
   LOOP_VINFO - the vect info of the loop that is being vectorized.
5890
   BB_VINFO - the vect info of the basic block that is being vectorized.
5891
   OPERAND - operand of STMT in the loop or bb.
5892
   DEF - the defining stmt in case OPERAND is an SSA_NAME.
5893
 
5894
   Returns whether a stmt with OPERAND can be vectorized.
5895
   For loops, supportable operands are constants, loop invariants, and operands
5896
   that are defined by the current iteration of the loop.  Unsupportable
5897
   operands are those that are defined by a previous iteration of the loop (as
5898
   is the case in reduction/induction computations).
5899
   For basic blocks, supportable operands are constants and bb invariants.
5900
   For now, operands defined outside the basic block are not supported.  */
5901
 
5902
bool
5903
vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
5904
                    bb_vec_info bb_vinfo, gimple *def_stmt,
5905
                    tree *def, enum vect_def_type *dt)
5906
{
5907
  basic_block bb;
5908
  stmt_vec_info stmt_vinfo;
5909
  struct loop *loop = NULL;
5910
 
5911
  if (loop_vinfo)
5912
    loop = LOOP_VINFO_LOOP (loop_vinfo);
5913
 
5914
  *def_stmt = NULL;
5915
  *def = NULL_TREE;
5916
 
5917
  if (vect_print_dump_info (REPORT_DETAILS))
5918
    {
5919
      fprintf (vect_dump, "vect_is_simple_use: operand ");
5920
      print_generic_expr (vect_dump, operand, TDF_SLIM);
5921
    }
5922
 
5923
  if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
5924
    {
5925
      *dt = vect_constant_def;
5926
      return true;
5927
    }
5928
 
5929
  if (is_gimple_min_invariant (operand))
5930
    {
5931
      *def = operand;
5932
      *dt = vect_external_def;
5933
      return true;
5934
    }
5935
 
5936
  if (TREE_CODE (operand) == PAREN_EXPR)
5937
    {
5938
      if (vect_print_dump_info (REPORT_DETAILS))
5939
        fprintf (vect_dump, "non-associatable copy.");
5940
      operand = TREE_OPERAND (operand, 0);
5941
    }
5942
 
5943
  if (TREE_CODE (operand) != SSA_NAME)
5944
    {
5945
      if (vect_print_dump_info (REPORT_DETAILS))
5946
        fprintf (vect_dump, "not ssa-name.");
5947
      return false;
5948
    }
5949
 
5950
  *def_stmt = SSA_NAME_DEF_STMT (operand);
5951
  if (*def_stmt == NULL)
5952
    {
5953
      if (vect_print_dump_info (REPORT_DETAILS))
5954
        fprintf (vect_dump, "no def_stmt.");
5955
      return false;
5956
    }
5957
 
5958
  if (vect_print_dump_info (REPORT_DETAILS))
5959
    {
5960
      fprintf (vect_dump, "def_stmt: ");
5961
      print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
5962
    }
5963
 
5964
  /* Empty stmt is expected only in case of a function argument.
5965
     (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN).  */
5966
  if (gimple_nop_p (*def_stmt))
5967
    {
5968
      *def = operand;
5969
      *dt = vect_external_def;
5970
      return true;
5971
    }
5972
 
5973
  bb = gimple_bb (*def_stmt);
5974
 
5975
  if ((loop && !flow_bb_inside_loop_p (loop, bb))
5976
      || (!loop && bb != BB_VINFO_BB (bb_vinfo))
5977
      || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
5978
    *dt = vect_external_def;
5979
  else
5980
    {
5981
      stmt_vinfo = vinfo_for_stmt (*def_stmt);
5982
      *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
5983
    }
5984
 
5985
  if (*dt == vect_unknown_def_type
5986
      || (stmt
5987
          && *dt == vect_double_reduction_def
5988
          && gimple_code (stmt) != GIMPLE_PHI))
5989
    {
5990
      if (vect_print_dump_info (REPORT_DETAILS))
5991
        fprintf (vect_dump, "Unsupported pattern.");
5992
      return false;
5993
    }
5994
 
5995
  if (vect_print_dump_info (REPORT_DETAILS))
5996
    fprintf (vect_dump, "type of def: %d.",*dt);
5997
 
5998
  switch (gimple_code (*def_stmt))
5999
    {
6000
    case GIMPLE_PHI:
6001
      *def = gimple_phi_result (*def_stmt);
6002
      break;
6003
 
6004
    case GIMPLE_ASSIGN:
6005
      *def = gimple_assign_lhs (*def_stmt);
6006
      break;
6007
 
6008
    case GIMPLE_CALL:
6009
      *def = gimple_call_lhs (*def_stmt);
6010
      if (*def != NULL)
6011
        break;
6012
      /* FALLTHRU */
6013
    default:
6014
      if (vect_print_dump_info (REPORT_DETAILS))
6015
        fprintf (vect_dump, "unsupported defining stmt: ");
6016
      return false;
6017
    }
6018
 
6019
  return true;
6020
}
6021
 
6022
/* Function vect_is_simple_use_1.
6023
 
6024
   Same as vect_is_simple_use_1 but also determines the vector operand
6025
   type of OPERAND and stores it to *VECTYPE.  If the definition of
6026
   OPERAND is vect_uninitialized_def, vect_constant_def or
6027
   vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6028
   is responsible to compute the best suited vector type for the
6029
   scalar operand.  */
6030
 
6031
bool
6032
vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6033
                      bb_vec_info bb_vinfo, gimple *def_stmt,
6034
                      tree *def, enum vect_def_type *dt, tree *vectype)
6035
{
6036
  if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
6037
                           def, dt))
6038
    return false;
6039
 
6040
  /* Now get a vector type if the def is internal, otherwise supply
6041
     NULL_TREE and leave it up to the caller to figure out a proper
6042
     type for the use stmt.  */
6043
  if (*dt == vect_internal_def
6044
      || *dt == vect_induction_def
6045
      || *dt == vect_reduction_def
6046
      || *dt == vect_double_reduction_def
6047
      || *dt == vect_nested_cycle)
6048
    {
6049
      stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
6050
 
6051
      if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6052
          && !STMT_VINFO_RELEVANT (stmt_info)
6053
          && !STMT_VINFO_LIVE_P (stmt_info))
6054
        stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6055
 
6056
      *vectype = STMT_VINFO_VECTYPE (stmt_info);
6057
      gcc_assert (*vectype != NULL_TREE);
6058
    }
6059
  else if (*dt == vect_uninitialized_def
6060
           || *dt == vect_constant_def
6061
           || *dt == vect_external_def)
6062
    *vectype = NULL_TREE;
6063
  else
6064
    gcc_unreachable ();
6065
 
6066
  return true;
6067
}
6068
 
6069
 
6070
/* Function supportable_widening_operation
6071
 
6072
   Check whether an operation represented by the code CODE is a
6073
   widening operation that is supported by the target platform in
6074
   vector form (i.e., when operating on arguments of type VECTYPE_IN
6075
   producing a result of type VECTYPE_OUT).
6076
 
6077
   Widening operations we currently support are NOP (CONVERT), FLOAT
6078
   and WIDEN_MULT.  This function checks if these operations are supported
6079
   by the target platform either directly (via vector tree-codes), or via
6080
   target builtins.
6081
 
6082
   Output:
6083
   - CODE1 and CODE2 are codes of vector operations to be used when
6084
   vectorizing the operation, if available.
6085
   - DECL1 and DECL2 are decls of target builtin functions to be used
6086
   when vectorizing the operation, if available.  In this case,
6087
   CODE1 and CODE2 are CALL_EXPR.
6088
   - MULTI_STEP_CVT determines the number of required intermediate steps in
6089
   case of multi-step conversion (like char->short->int - in that case
6090
   MULTI_STEP_CVT will be 1).
6091
   - INTERM_TYPES contains the intermediate type required to perform the
6092
   widening operation (short in the above example).  */
6093
 
6094
bool
6095
supportable_widening_operation (enum tree_code code, gimple stmt,
6096
                                tree vectype_out, tree vectype_in,
6097
                                tree *decl1, tree *decl2,
6098
                                enum tree_code *code1, enum tree_code *code2,
6099
                                int *multi_step_cvt,
6100
                                VEC (tree, heap) **interm_types)
6101
{
6102
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6103
  loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6104
  struct loop *vect_loop = NULL;
6105
  bool ordered_p;
6106
  enum machine_mode vec_mode;
6107
  enum insn_code icode1, icode2;
6108
  optab optab1, optab2;
6109
  tree vectype = vectype_in;
6110
  tree wide_vectype = vectype_out;
6111
  enum tree_code c1, c2;
6112
  int i;
6113
  tree prev_type, intermediate_type;
6114
  enum machine_mode intermediate_mode, prev_mode;
6115
  optab optab3, optab4;
6116
 
6117
  *multi_step_cvt = 0;
6118
  if (loop_info)
6119
    vect_loop = LOOP_VINFO_LOOP (loop_info);
6120
 
6121
  /* The result of a vectorized widening operation usually requires two vectors
6122
     (because the widened results do not fit into one vector). The generated
6123
     vector results would normally be expected to be generated in the same
6124
     order as in the original scalar computation, i.e. if 8 results are
6125
     generated in each vector iteration, they are to be organized as follows:
6126
        vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
6127
 
6128
     However, in the special case that the result of the widening operation is
6129
     used in a reduction computation only, the order doesn't matter (because
6130
     when vectorizing a reduction we change the order of the computation).
6131
     Some targets can take advantage of this and generate more efficient code.
6132
     For example, targets like Altivec, that support widen_mult using a sequence
6133
     of {mult_even,mult_odd} generate the following vectors:
6134
        vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
6135
 
6136
     When vectorizing outer-loops, we execute the inner-loop sequentially
6137
     (each vectorized inner-loop iteration contributes to VF outer-loop
6138
     iterations in parallel).  We therefore don't allow to change the order
6139
     of the computation in the inner-loop during outer-loop vectorization.  */
6140
 
6141
   if (vect_loop
6142
       && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6143
       && !nested_in_vect_loop_p (vect_loop, stmt))
6144
     ordered_p = false;
6145
   else
6146
     ordered_p = true;
6147
 
6148
  if (!ordered_p
6149
      && code == WIDEN_MULT_EXPR
6150
      && targetm.vectorize.builtin_mul_widen_even
6151
      && targetm.vectorize.builtin_mul_widen_even (vectype)
6152
      && targetm.vectorize.builtin_mul_widen_odd
6153
      && targetm.vectorize.builtin_mul_widen_odd (vectype))
6154
    {
6155
      if (vect_print_dump_info (REPORT_DETAILS))
6156
        fprintf (vect_dump, "Unordered widening operation detected.");
6157
 
6158
      *code1 = *code2 = CALL_EXPR;
6159
      *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
6160
      *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
6161
      return true;
6162
    }
6163
 
6164
  switch (code)
6165
    {
6166
    case WIDEN_MULT_EXPR:
6167
      c1 = VEC_WIDEN_MULT_LO_EXPR;
6168
      c2 = VEC_WIDEN_MULT_HI_EXPR;
6169
      break;
6170
 
6171
    case WIDEN_LSHIFT_EXPR:
6172
      c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6173
      c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
6174
      break;
6175
 
6176
    CASE_CONVERT:
6177
      c1 = VEC_UNPACK_LO_EXPR;
6178
      c2 = VEC_UNPACK_HI_EXPR;
6179
      break;
6180
 
6181
    case FLOAT_EXPR:
6182
      c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6183
      c2 = VEC_UNPACK_FLOAT_HI_EXPR;
6184
      break;
6185
 
6186
    case FIX_TRUNC_EXPR:
6187
      /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6188
         VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6189
         computing the operation.  */
6190
      return false;
6191
 
6192
    default:
6193
      gcc_unreachable ();
6194
    }
6195
 
6196
  if (BYTES_BIG_ENDIAN)
6197
    {
6198
      enum tree_code ctmp = c1;
6199
      c1 = c2;
6200
      c2 = ctmp;
6201
    }
6202
 
6203
  if (code == FIX_TRUNC_EXPR)
6204
    {
6205
      /* The signedness is determined from output operand.  */
6206
      optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6207
      optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
6208
    }
6209
  else
6210
    {
6211
      optab1 = optab_for_tree_code (c1, vectype, optab_default);
6212
      optab2 = optab_for_tree_code (c2, vectype, optab_default);
6213
    }
6214
 
6215
  if (!optab1 || !optab2)
6216
    return false;
6217
 
6218
  vec_mode = TYPE_MODE (vectype);
6219
  if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6220
       || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
6221
    return false;
6222
 
6223
  *code1 = c1;
6224
  *code2 = c2;
6225
 
6226
  if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6227
      && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6228
    return true;
6229
 
6230
  /* Check if it's a multi-step conversion that can be done using intermediate
6231
     types.  */
6232
 
6233
  prev_type = vectype;
6234
  prev_mode = vec_mode;
6235
 
6236
  if (!CONVERT_EXPR_CODE_P (code))
6237
    return false;
6238
 
6239
  /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6240
     intermediate steps in promotion sequence.  We try
6241
     MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6242
     not.  */
6243
  *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6244
  for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6245
    {
6246
      intermediate_mode = insn_data[icode1].operand[0].mode;
6247
      intermediate_type
6248
        = lang_hooks.types.type_for_mode (intermediate_mode,
6249
                                          TYPE_UNSIGNED (prev_type));
6250
      optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6251
      optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6252
 
6253
      if (!optab3 || !optab4
6254
          || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6255
          || insn_data[icode1].operand[0].mode != intermediate_mode
6256
          || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6257
          || insn_data[icode2].operand[0].mode != intermediate_mode
6258
          || ((icode1 = optab_handler (optab3, intermediate_mode))
6259
              == CODE_FOR_nothing)
6260
          || ((icode2 = optab_handler (optab4, intermediate_mode))
6261
              == CODE_FOR_nothing))
6262
        break;
6263
 
6264
      VEC_quick_push (tree, *interm_types, intermediate_type);
6265
      (*multi_step_cvt)++;
6266
 
6267
      if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6268
          && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6269
        return true;
6270
 
6271
      prev_type = intermediate_type;
6272
      prev_mode = intermediate_mode;
6273
    }
6274
 
6275
  VEC_free (tree, heap, *interm_types);
6276
  return false;
6277
}
6278
 
6279
 
6280
/* Function supportable_narrowing_operation
6281
 
6282
   Check whether an operation represented by the code CODE is a
6283
   narrowing operation that is supported by the target platform in
6284
   vector form (i.e., when operating on arguments of type VECTYPE_IN
6285
   and producing a result of type VECTYPE_OUT).
6286
 
6287
   Narrowing operations we currently support are NOP (CONVERT) and
6288
   FIX_TRUNC.  This function checks if these operations are supported by
6289
   the target platform directly via vector tree-codes.
6290
 
6291
   Output:
6292
   - CODE1 is the code of a vector operation to be used when
6293
   vectorizing the operation, if available.
6294
   - MULTI_STEP_CVT determines the number of required intermediate steps in
6295
   case of multi-step conversion (like int->short->char - in that case
6296
   MULTI_STEP_CVT will be 1).
6297
   - INTERM_TYPES contains the intermediate type required to perform the
6298
   narrowing operation (short in the above example).   */
6299
 
6300
bool
6301
supportable_narrowing_operation (enum tree_code code,
6302
                                 tree vectype_out, tree vectype_in,
6303
                                 enum tree_code *code1, int *multi_step_cvt,
6304
                                 VEC (tree, heap) **interm_types)
6305
{
6306
  enum machine_mode vec_mode;
6307
  enum insn_code icode1;
6308
  optab optab1, interm_optab;
6309
  tree vectype = vectype_in;
6310
  tree narrow_vectype = vectype_out;
6311
  enum tree_code c1;
6312
  tree intermediate_type;
6313
  enum machine_mode intermediate_mode, prev_mode;
6314
  int i;
6315
  bool uns;
6316
 
6317
  *multi_step_cvt = 0;
6318
  switch (code)
6319
    {
6320
    CASE_CONVERT:
6321
      c1 = VEC_PACK_TRUNC_EXPR;
6322
      break;
6323
 
6324
    case FIX_TRUNC_EXPR:
6325
      c1 = VEC_PACK_FIX_TRUNC_EXPR;
6326
      break;
6327
 
6328
    case FLOAT_EXPR:
6329
      /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6330
         tree code and optabs used for computing the operation.  */
6331
      return false;
6332
 
6333
    default:
6334
      gcc_unreachable ();
6335
    }
6336
 
6337
  if (code == FIX_TRUNC_EXPR)
6338
    /* The signedness is determined from output operand.  */
6339
    optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6340
  else
6341
    optab1 = optab_for_tree_code (c1, vectype, optab_default);
6342
 
6343
  if (!optab1)
6344
    return false;
6345
 
6346
  vec_mode = TYPE_MODE (vectype);
6347
  if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6348
    return false;
6349
 
6350
  *code1 = c1;
6351
 
6352
  if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6353
    return true;
6354
 
6355
  /* Check if it's a multi-step conversion that can be done using intermediate
6356
     types.  */
6357
  prev_mode = vec_mode;
6358
  if (code == FIX_TRUNC_EXPR)
6359
    uns = TYPE_UNSIGNED (vectype_out);
6360
  else
6361
    uns = TYPE_UNSIGNED (vectype);
6362
 
6363
  /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6364
     conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6365
     costly than signed.  */
6366
  if (code == FIX_TRUNC_EXPR && uns)
6367
    {
6368
      enum insn_code icode2;
6369
 
6370
      intermediate_type
6371
        = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6372
      interm_optab
6373
        = optab_for_tree_code (c1, intermediate_type, optab_default);
6374
      if (interm_optab != NULL
6375
          && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6376
          && insn_data[icode1].operand[0].mode
6377
             == insn_data[icode2].operand[0].mode)
6378
        {
6379
          uns = false;
6380
          optab1 = interm_optab;
6381
          icode1 = icode2;
6382
        }
6383
    }
6384
 
6385
  /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6386
     intermediate steps in promotion sequence.  We try
6387
     MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not.  */
6388
  *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6389
  for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6390
    {
6391
      intermediate_mode = insn_data[icode1].operand[0].mode;
6392
      intermediate_type
6393
        = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6394
      interm_optab
6395
        = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6396
                               optab_default);
6397
      if (!interm_optab
6398
          || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6399
          || insn_data[icode1].operand[0].mode != intermediate_mode
6400
          || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6401
              == CODE_FOR_nothing))
6402
        break;
6403
 
6404
      VEC_quick_push (tree, *interm_types, intermediate_type);
6405
      (*multi_step_cvt)++;
6406
 
6407
      if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6408
        return true;
6409
 
6410
      prev_mode = intermediate_mode;
6411
      optab1 = interm_optab;
6412
    }
6413
 
6414
  VEC_free (tree, heap, *interm_types);
6415
  return false;
6416
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.